From f1d63c19a53dbe32b616c0ae2e8dd831c9c2565d Mon Sep 17 00:00:00 2001 From: Dan Minor Date: Thu, 27 Feb 2020 18:57:02 +0000 Subject: [PATCH] Bug 1525393 - Update libvpx to 1.8.2; r=bryce Differential Revision: https://phabricator.services.mozilla.com/D63922 --HG-- rename : media/libvpx/libvpx/third_party/libwebm/LICENSE.TXT => media/libvpx/libvpx/third_party/libyuv/LICENSE rename : media/libvpx/libvpx/vp8/common/copy_c.c => media/libvpx/libvpx/vp8/encoder/copy_c.c rename : media/libvpx/libvpx/vp8/encoder/x86/encodeopt.asm => media/libvpx/libvpx/vp8/encoder/x86/block_error_sse2.asm rename : media/libvpx/libvpx/vp8/common/x86/copy_sse2.asm => media/libvpx/libvpx/vp8/encoder/x86/copy_sse2.asm rename : media/libvpx/libvpx/vp8/common/x86/copy_sse3.asm => media/libvpx/libvpx/vp8/encoder/x86/copy_sse3.asm extra : moz-landing-system : lando --- media/libvpx/README_MOZILLA | 2 +- media/libvpx/libvpx/.clang-format | 58 +- media/libvpx/libvpx/.mailmap | 15 +- media/libvpx/libvpx/AUTHORS | 33 +- media/libvpx/libvpx/CHANGELOG | 67 +- media/libvpx/libvpx/README | 54 +- media/libvpx/libvpx/args.h | 6 +- media/libvpx/libvpx/build/make/Android.mk | 37 +- media/libvpx/libvpx/build/make/Makefile | 9 +- media/libvpx/libvpx/build/make/ads2gas.pl | 28 +- .../libvpx/libvpx/build/make/ads2gas_apple.pl | 4 +- media/libvpx/libvpx/build/make/configure.sh | 369 +- .../libvpx/libvpx/build/make/gen_msvs_sln.sh | 37 +- .../libvpx/build/make/gen_msvs_vcxproj.sh | 50 +- media/libvpx/libvpx/build/make/iosbuild.sh | 7 +- media/libvpx/libvpx/build/make/msvs_common.sh | 9 + media/libvpx/libvpx/build/make/rtcd.pl | 4 +- media/libvpx/libvpx/build/make/thumb.pm | 7 - .../cur_frame_16x16.txt | 2 + .../estimation_16x16.txt | 2 + .../exhaust_16x16.txt | 2 + .../ground_truth_16x16.txt | 2 + .../localVar_16x16.txt | 2 + .../non_greedy_mv_test_files/raw_1.png | Bin 0 -> 661279 bytes .../non_greedy_mv_test_files/raw_1_12_12.png | Bin 0 -> 919025 bytes .../ref_frame_16x16.txt | 2 + media/libvpx/libvpx/codereview.settings | 5 +- media/libvpx/libvpx/configure | 101 +- media/libvpx/libvpx/examples.mk | 46 +- .../libvpx/{vpx => examples}/svc_context.h | 24 +- .../{vpx/src => examples}/svc_encodeframe.c | 118 +- .../examples/vp8_multi_resolution_encoder.c | 8 +- .../libvpx/examples/vp9_spatial_svc_encoder.c | 739 +- media/libvpx/libvpx/examples/vp9cx_set_ref.c | 122 - .../libvpx/libvpx/examples/vpx_dec_fuzzer.cc | 118 + .../examples/vpx_temporal_svc_encoder.c | 169 +- media/libvpx/libvpx/ivfdec.c | 8 +- media/libvpx/libvpx/ivfdec.h | 6 +- media/libvpx/libvpx/ivfenc.h | 6 +- media/libvpx/libvpx/libs.doxy_template | 12 - media/libvpx/libvpx/libs.mk | 57 +- media/libvpx/libvpx/mainpage.dox | 2 + media/libvpx/libvpx/md5_utils.c | 2 +- media/libvpx/libvpx/md5_utils.h | 6 +- media/libvpx/libvpx/rate_hist.h | 6 +- media/libvpx/libvpx/test/acm_random.h | 25 +- .../libvpx/test/active_map_refresh_test.cc | 2 +- media/libvpx/libvpx/test/active_map_test.cc | 2 +- media/libvpx/libvpx/test/add_noise_test.cc | 38 +- .../libvpx/test/alt_ref_aq_segment_test.cc | 2 +- media/libvpx/libvpx/test/altref_test.cc | 2 +- media/libvpx/libvpx/test/android/README | 9 +- media/libvpx/libvpx/test/aq_segment_test.cc | 2 +- media/libvpx/libvpx/test/avg_test.cc | 254 +- media/libvpx/libvpx/test/bench.cc | 38 + media/libvpx/libvpx/test/bench.h | 30 + media/libvpx/libvpx/test/blockiness_test.cc | 16 +- media/libvpx/libvpx/test/borders_test.cc | 2 +- media/libvpx/libvpx/test/buffer.h | 6 +- .../libvpx/libvpx/test/byte_alignment_test.cc | 3 +- media/libvpx/libvpx/test/clear_system_state.h | 16 +- media/libvpx/libvpx/test/codec_factory.h | 17 +- .../libvpx/libvpx/test/comp_avg_pred_test.cc | 4 + media/libvpx/libvpx/test/consistency_test.cc | 11 +- media/libvpx/libvpx/test/convolve_test.cc | 112 +- media/libvpx/libvpx/test/cpu_speed_test.cc | 4 +- media/libvpx/libvpx/test/cq_test.cc | 2 +- media/libvpx/libvpx/test/datarate_test.cc | 1876 ---- media/libvpx/libvpx/test/dct16x16_test.cc | 10 +- media/libvpx/libvpx/test/dct32x32_test.cc | 30 +- media/libvpx/libvpx/test/dct_partial_test.cc | 48 +- media/libvpx/libvpx/test/dct_test.cc | 917 +- media/libvpx/libvpx/test/decode_api_test.cc | 52 +- media/libvpx/libvpx/test/decode_corrupted.cc | 103 + media/libvpx/libvpx/test/decode_perf_test.cc | 8 +- media/libvpx/libvpx/test/decode_svc_test.cc | 9 +- .../libvpx/libvpx/test/decode_test_driver.cc | 5 +- media/libvpx/libvpx/test/decode_test_driver.h | 6 +- media/libvpx/libvpx/test/encode_perf_test.cc | 2 +- .../libvpx/libvpx/test/encode_test_driver.cc | 8 +- media/libvpx/libvpx/test/encode_test_driver.h | 28 +- .../libvpx/test/external_frame_buffer_test.cc | 12 +- media/libvpx/libvpx/test/fdct8x8_test.cc | 12 +- media/libvpx/libvpx/test/frame_size_tests.cc | 2 +- media/libvpx/libvpx/test/hadamard_test.cc | 373 +- media/libvpx/libvpx/test/i420_video_source.h | 6 +- media/libvpx/libvpx/test/idct_test.cc | 4 +- media/libvpx/libvpx/test/invalid_file_test.cc | 9 +- media/libvpx/libvpx/test/ivf_video_source.h | 8 +- media/libvpx/libvpx/test/keyframe_test.cc | 9 +- media/libvpx/libvpx/test/lpf_test.cc | 19 +- media/libvpx/libvpx/test/md5_helper.h | 6 +- .../libvpx/libvpx/test/non_greedy_mv_test.cc | 200 + media/libvpx/libvpx/test/partial_idct_test.cc | 8 +- media/libvpx/libvpx/test/pp_filter_test.cc | 352 +- media/libvpx/libvpx/test/predict_test.cc | 43 +- media/libvpx/libvpx/test/quantize_test.cc | 22 +- .../libvpx/libvpx/test/register_state_check.h | 14 +- media/libvpx/libvpx/test/resize_test.cc | 102 +- media/libvpx/libvpx/test/sad_test.cc | 180 +- .../libvpx/libvpx/test/simple_encode_test.cc | 154 + media/libvpx/libvpx/test/stress.sh | 36 +- media/libvpx/libvpx/test/sum_squares_test.cc | 19 +- media/libvpx/libvpx/test/superframe_test.cc | 8 +- media/libvpx/libvpx/test/svc_datarate_test.cc | 1428 +++ .../libvpx/libvpx/test/svc_end_to_end_test.cc | 481 + media/libvpx/libvpx/test/svc_test.cc | 871 +- media/libvpx/libvpx/test/svc_test.h | 67 + .../libvpx/test/temporal_filter_test.cc | 277 - media/libvpx/libvpx/test/test-data.mk | 24 +- media/libvpx/libvpx/test/test-data.sha1 | 24 +- media/libvpx/libvpx/test/test.mk | 20 +- .../libvpx/test/test_intra_pred_speed.cc | 3 + media/libvpx/libvpx/test/test_libvpx.cc | 11 +- media/libvpx/libvpx/test/test_vector_test.cc | 52 +- media/libvpx/libvpx/test/test_vectors.h | 6 +- .../libvpx/test/tile_independence_test.cc | 2 +- media/libvpx/libvpx/test/timestamp_test.cc | 101 + media/libvpx/libvpx/test/tools_common.sh | 6 +- media/libvpx/libvpx/test/user_priv_test.cc | 4 +- media/libvpx/libvpx/test/util.h | 10 +- media/libvpx/libvpx/test/variance_test.cc | 80 +- media/libvpx/libvpx/test/video_source.h | 6 +- .../libvpx/libvpx/test/vp8_boolcoder_test.cc | 3 + media/libvpx/libvpx/test/vp8_datarate_test.cc | 416 + .../test/vp8_multi_resolution_encoder.sh | 22 +- media/libvpx/libvpx/test/vp9_arf_freq_test.cc | 4 +- .../libvpx/test/vp9_block_error_test.cc | 5 +- .../libvpx/libvpx/test/vp9_boolcoder_test.cc | 3 + media/libvpx/libvpx/test/vp9_datarate_test.cc | 901 ++ media/libvpx/libvpx/test/vp9_denoiser_test.cc | 5 +- .../test/vp9_encoder_parms_get_to_decoder.cc | 6 +- .../libvpx/libvpx/test/vp9_end_to_end_test.cc | 175 +- media/libvpx/libvpx/test/vp9_ethread_test.cc | 4 +- .../libvpx/libvpx/test/vp9_intrapred_test.cc | 89 +- media/libvpx/libvpx/test/vp9_lossless_test.cc | 2 +- .../libvpx/test/vp9_motion_vector_test.cc | 8 +- media/libvpx/libvpx/test/vp9_quantize_test.cc | 331 +- media/libvpx/libvpx/test/vp9_scale_test.cc | 91 +- .../libvpx/test/vp9_spatial_svc_encoder.sh | 72 - media/libvpx/libvpx/test/vp9_subtract_test.cc | 110 +- media/libvpx/libvpx/test/vp9_thread_test.cc | 3 +- media/libvpx/libvpx/test/vpx_scale_test.cc | 24 +- media/libvpx/libvpx/test/vpx_scale_test.h | 9 +- .../libvpx/test/vpx_temporal_svc_encoder.sh | 55 +- media/libvpx/libvpx/test/vpxdec.sh | 37 +- media/libvpx/libvpx/test/vpxenc.sh | 143 +- media/libvpx/libvpx/test/webm_video_source.h | 6 +- media/libvpx/libvpx/test/y4m_test.cc | 24 +- media/libvpx/libvpx/test/y4m_video_source.h | 9 +- .../libvpx/test/yuv_temporal_filter_test.cc | 708 ++ media/libvpx/libvpx/test/yuv_video_source.h | 6 +- .../third_party/googletest/README.libvpx | 14 +- .../third_party/googletest/src/README.md | 401 +- .../src/include/gtest/gtest-death-test.h | 66 +- .../src/include/gtest/gtest-message.h | 13 +- .../src/include/gtest/gtest-param-test.h | 34 +- .../src/include/gtest/gtest-param-test.h.pump | 28 +- .../src/include/gtest/gtest-printers.h | 230 +- .../googletest/src/include/gtest/gtest-spi.h | 15 +- .../src/include/gtest/gtest-test-part.h | 10 +- .../src/include/gtest/gtest-typed-test.h | 115 +- .../googletest/src/include/gtest/gtest.h | 189 +- .../src/include/gtest/gtest_pred_impl.h | 15 +- .../googletest/src/include/gtest/gtest_prod.h | 17 +- .../include/gtest/internal/custom/README.md | 56 + .../gtest/internal/custom/gtest-port.h | 34 +- .../gtest/internal/custom/gtest-printers.h | 4 +- .../src/include/gtest/internal/custom/gtest.h | 6 +- .../internal/gtest-death-test-internal.h | 77 +- .../include/gtest/internal/gtest-filepath.h | 11 +- .../include/gtest/internal/gtest-internal.h | 252 +- .../include/gtest/internal/gtest-linked_ptr.h | 6 +- .../internal/gtest-param-util-generated.h | 492 +- .../gtest-param-util-generated.h.pump | 20 +- .../include/gtest/internal/gtest-param-util.h | 31 +- .../include/gtest/internal/gtest-port-arch.h | 9 +- .../src/include/gtest/internal/gtest-port.h | 395 +- .../src/include/gtest/internal/gtest-string.h | 8 +- .../src/include/gtest/internal/gtest-tuple.h | 7 +- .../include/gtest/internal/gtest-tuple.h.pump | 7 +- .../include/gtest/internal/gtest-type-util.h | 23 +- .../gtest/internal/gtest-type-util.h.pump | 23 +- .../googletest/src/src/gtest-all.cc | 5 +- .../googletest/src/src/gtest-death-test.cc | 309 +- .../googletest/src/src/gtest-filepath.cc | 16 +- .../googletest/src/src/gtest-internal-inl.h | 72 +- .../googletest/src/src/gtest-port.cc | 213 +- .../googletest/src/src/gtest-printers.cc | 108 +- .../googletest/src/src/gtest-test-part.cc | 13 +- .../googletest/src/src/gtest-typed-test.cc | 4 +- .../third_party/googletest/src/src/gtest.cc | 1084 +- .../googletest/src/src/gtest_main.cc | 3 +- .../libvpx/third_party/libwebm/AUTHORS.TXT | 4 - .../libvpx/third_party/libwebm/Android.mk | 17 - .../libvpx/third_party/libwebm/PATENTS.TXT | 23 - .../libvpx/third_party/libwebm/README.libvpx | 10 - .../third_party/libwebm/common/file_util.cc | 78 - .../third_party/libwebm/common/file_util.h | 41 - .../third_party/libwebm/common/hdr_util.cc | 220 - .../third_party/libwebm/common/hdr_util.h | 79 - .../third_party/libwebm/common/webmids.h | 192 - .../third_party/libwebm/mkvmuxer/mkvmuxer.cc | 4196 ------- .../third_party/libwebm/mkvmuxer/mkvmuxer.h | 1921 ---- .../libwebm/mkvmuxer/mkvmuxertypes.h | 28 - .../libwebm/mkvmuxer/mkvmuxerutil.cc | 744 -- .../libwebm/mkvmuxer/mkvmuxerutil.h | 112 - .../third_party/libwebm/mkvmuxer/mkvwriter.cc | 90 - .../third_party/libwebm/mkvmuxer/mkvwriter.h | 51 - .../libwebm/mkvparser/mkvparser.cc | 8032 ------------- .../third_party/libwebm/mkvparser/mkvparser.h | 1145 -- .../libwebm/mkvparser/mkvreader.cc | 133 - .../third_party/libwebm/mkvparser/mkvreader.h | 45 - .../{libwebm/LICENSE.TXT => libyuv/LICENSE} | 3 +- .../libvpx/third_party/libyuv/README.libvpx | 23 +- .../libyuv/include/libyuv/basic_types.h | 109 +- .../libyuv/include/libyuv/compare.h | 93 +- .../libyuv/include/libyuv/convert.h | 421 +- .../libyuv/include/libyuv/convert_argb.h | 676 +- .../libyuv/include/libyuv/convert_from.h | 377 +- .../libyuv/include/libyuv/convert_from_argb.h | 283 +- .../libyuv/include/libyuv/cpu_id.h | 75 +- .../libyuv/include/libyuv/macros_msa.h | 233 + .../libyuv/include/libyuv/mjpeg_decoder.h | 33 +- .../libyuv/include/libyuv/planar_functions.h | 1248 +- .../libyuv/include/libyuv/rotate.h | 143 +- .../libyuv/include/libyuv/rotate_argb.h | 14 +- .../libyuv/include/libyuv/rotate_row.h | 203 +- .../third_party/libyuv/include/libyuv/row.h | 4065 ++++--- .../third_party/libyuv/include/libyuv/scale.h | 110 +- .../libyuv/include/libyuv/scale_argb.h | 60 +- .../libyuv/include/libyuv/scale_row.h | 1083 +- .../libyuv/include/libyuv/version.h | 6 +- .../libyuv/include/libyuv/video_common.h | 52 +- .../third_party/libyuv/source/compare.cc | 267 +- .../libyuv/source/compare_common.cc | 70 +- .../third_party/libyuv/source/compare_gcc.cc | 427 +- .../third_party/libyuv/source/compare_msa.cc | 97 + .../third_party/libyuv/source/compare_neon.cc | 94 +- .../libyuv/source/compare_neon64.cc | 88 +- .../third_party/libyuv/source/compare_win.cc | 119 +- .../third_party/libyuv/source/convert.cc | 963 +- .../third_party/libyuv/source/convert_argb.cc | 1777 ++- .../third_party/libyuv/source/convert_from.cc | 1165 +- .../libyuv/source/convert_from_argb.cc | 839 +- .../third_party/libyuv/source/convert_jpeg.cc | 243 +- .../libyuv/source/convert_to_argb.cc | 246 +- .../libyuv/source/convert_to_i420.cc | 302 +- .../third_party/libyuv/source/cpu_id.cc | 208 +- .../libyuv/source/mjpeg_decoder.cc | 126 +- .../libyuv/source/mjpeg_validate.cc | 11 +- .../libyuv/source/planar_functions.cc | 1876 +++- .../third_party/libyuv/source/rotate.cc | 377 +- .../third_party/libyuv/source/rotate_any.cc | 57 +- .../third_party/libyuv/source/rotate_argb.cc | 163 +- .../libyuv/source/rotate_common.cc | 40 +- .../third_party/libyuv/source/rotate_gcc.cc | 660 +- .../third_party/libyuv/source/rotate_mips.cc | 484 - .../third_party/libyuv/source/rotate_msa.cc | 250 + .../third_party/libyuv/source/rotate_neon.cc | 567 +- .../libyuv/source/rotate_neon64.cc | 685 +- .../third_party/libyuv/source/rotate_win.cc | 51 +- .../third_party/libyuv/source/row_any.cc | 937 +- .../third_party/libyuv/source/row_common.cc | 2514 +++-- .../third_party/libyuv/source/row_gcc.cc | 9987 +++++++++-------- .../third_party/libyuv/source/row_mips.cc | 782 -- .../third_party/libyuv/source/row_msa.cc | 3512 ++++++ .../third_party/libyuv/source/row_neon.cc | 4374 ++++---- .../third_party/libyuv/source/row_neon64.cc | 4147 +++---- .../third_party/libyuv/source/row_win.cc | 3943 ++++--- .../libvpx/third_party/libyuv/source/scale.cc | 987 +- .../third_party/libyuv/source/scale_any.cc | 489 +- .../third_party/libyuv/source/scale_argb.cc | 573 +- .../third_party/libyuv/source/scale_common.cc | 808 +- .../third_party/libyuv/source/scale_gcc.cc | 2280 ++-- .../third_party/libyuv/source/scale_mips.cc | 644 -- .../third_party/libyuv/source/scale_msa.cc | 949 ++ .../third_party/libyuv/source/scale_neon.cc | 1453 ++- .../third_party/libyuv/source/scale_neon64.cc | 1582 +-- .../third_party/libyuv/source/scale_win.cc | 861 +- .../third_party/libyuv/source/video_common.cc | 51 +- .../libvpx/third_party/x86inc/README.libvpx | 1 + .../libvpx/third_party/x86inc/x86inc.asm | 24 +- media/libvpx/libvpx/tools/cpplint.py | 4756 -------- media/libvpx/libvpx/tools/diff.py | 130 - media/libvpx/libvpx/tools/gen_authors.sh | 14 - media/libvpx/libvpx/tools/intersect-diffs.py | 76 - media/libvpx/libvpx/tools/lint-hunks.py | 144 - media/libvpx/libvpx/tools/tiny_ssim.c | 802 -- media/libvpx/libvpx/tools/wrap-commit-msg.py | 70 - media/libvpx/libvpx/tools_common.c | 314 +- media/libvpx/libvpx/tools_common.h | 26 +- media/libvpx/libvpx/usage_cx.dox | 2 + media/libvpx/libvpx/usage_dx.dox | 2 + media/libvpx/libvpx/video_common.h | 6 +- media/libvpx/libvpx/video_reader.c | 32 +- media/libvpx/libvpx/video_reader.h | 6 +- media/libvpx/libvpx/video_writer.c | 14 +- media/libvpx/libvpx/video_writer.h | 6 +- media/libvpx/libvpx/vp8/common/alloccommon.h | 8 +- .../libvpx/vp8/common/arm/loopfilter_arm.c | 22 +- .../libvpx/vp8/common/arm/loopfilter_arm.h | 31 + .../common/arm/neon/bilinearpredict_neon.c | 2 + .../libvpx/vp8/common/arm/neon/copymem_neon.c | 2 + .../vp8/common/arm/neon/dequantizeb_neon.c | 1 + .../vp8/common/arm/neon/idct_blk_neon.c | 251 +- .../common/arm/neon/idct_dequant_0_2x_neon.c | 59 - .../arm/neon/idct_dequant_full_2x_neon.c | 182 - .../libvpx/vp8/common/arm/neon/iwalsh_neon.c | 2 + .../loopfiltersimplehorizontaledge_neon.c | 2 + .../neon/loopfiltersimpleverticaledge_neon.c | 2 + .../vp8/common/arm/neon/mbloopfilter_neon.c | 2 + .../vp8/common/arm/neon/sixtappredict_neon.c | 1 + .../vp8/common/arm/neon/vp8_loopfilter_neon.c | 2 + media/libvpx/libvpx/vp8/common/blockd.c | 12 +- media/libvpx/libvpx/vp8/common/blockd.h | 23 +- .../libvpx/vp8/common/coefupdateprobs.h | 6 +- media/libvpx/libvpx/vp8/common/common.h | 18 +- .../libvpx/vp8/common/default_coef_probs.h | 8 +- media/libvpx/libvpx/vp8/common/entropy.c | 18 +- media/libvpx/libvpx/vp8/common/entropy.h | 6 +- media/libvpx/libvpx/vp8/common/entropymode.c | 10 +- media/libvpx/libvpx/vp8/common/entropymode.h | 6 +- media/libvpx/libvpx/vp8/common/entropymv.h | 6 +- media/libvpx/libvpx/vp8/common/extend.c | 3 +- media/libvpx/libvpx/vp8/common/extend.h | 6 +- media/libvpx/libvpx/vp8/common/filter.h | 6 +- media/libvpx/libvpx/vp8/common/findnearmv.c | 28 +- media/libvpx/libvpx/vp8/common/findnearmv.h | 8 +- .../vp8/common/generic/systemdependent.c | 17 +- media/libvpx/libvpx/vp8/common/header.h | 6 +- media/libvpx/libvpx/vp8/common/idct_blk.c | 26 +- media/libvpx/libvpx/vp8/common/invtrans.h | 6 +- media/libvpx/libvpx/vp8/common/loopfilter.h | 12 +- .../libvpx/vp8/common/loopfilter_filters.c | 22 +- media/libvpx/libvpx/vp8/common/mfqe.c | 4 +- .../vp8/common/mips/dspr2/idct_blk_dspr2.c | 20 +- .../mips/dspr2/vp8_loopfilter_filters_dspr2.c | 12 +- .../libvpx/vp8/common/mips/mmi/idct_blk_mmi.c | 23 +- .../libvpx/vp8/common/mips/msa/idct_msa.c | 58 +- .../vp8/common/mips/msa/vp8_macros_msa.h | 6 +- media/libvpx/libvpx/vp8/common/modecont.c | 36 +- media/libvpx/libvpx/vp8/common/modecont.h | 6 +- media/libvpx/libvpx/vp8/common/mv.h | 6 +- media/libvpx/libvpx/vp8/common/onyx.h | 34 +- media/libvpx/libvpx/vp8/common/onyxc_int.h | 6 +- media/libvpx/libvpx/vp8/common/onyxd.h | 17 +- media/libvpx/libvpx/vp8/common/postproc.c | 140 +- media/libvpx/libvpx/vp8/common/postproc.h | 15 +- media/libvpx/libvpx/vp8/common/ppflags.h | 6 +- media/libvpx/libvpx/vp8/common/quant_common.h | 6 +- media/libvpx/libvpx/vp8/common/reconinter.c | 7 + media/libvpx/libvpx/vp8/common/reconinter.h | 29 +- media/libvpx/libvpx/vp8/common/reconintra.h | 6 +- .../libvpx/libvpx/vp8/common/reconintra4x4.h | 8 +- media/libvpx/libvpx/vp8/common/rtcd_defs.pl | 68 +- .../libvpx/vp8/common/setupintrarecon.h | 6 +- .../libvpx/libvpx/vp8/common/swapyv12buffer.h | 6 +- .../libvpx/vp8/common/systemdependent.h | 6 +- media/libvpx/libvpx/vp8/common/threading.h | 8 +- media/libvpx/libvpx/vp8/common/treecoder.c | 9 +- media/libvpx/libvpx/vp8/common/treecoder.h | 8 +- .../libvpx/vp8/common/vp8_entropymodedata.h | 8 +- .../libvpx/libvpx/vp8/common/vp8_loopfilter.c | 8 +- .../libvpx/vp8/common/vp8_skin_detection.h | 6 +- .../vp8/common/x86/bilinear_filter_sse2.c | 336 + .../libvpx/libvpx/vp8/common/x86/filter_x86.c | 29 - .../libvpx/libvpx/vp8/common/x86/filter_x86.h | 33 - .../libvpx/vp8/common/x86/idct_blk_sse2.c | 24 +- .../libvpx/vp8/common/x86/iwalsh_sse2.asm | 2 +- .../libvpx/vp8/common/x86/loopfilter_x86.c | 6 +- .../libvpx/vp8/common/x86/subpixel_mmx.asm | 276 - .../libvpx/vp8/common/x86/subpixel_sse2.asm | 414 - .../libvpx/vp8/common/x86/vp8_asm_stubs.c | 13 +- media/libvpx/libvpx/vp8/decoder/dboolhuff.h | 8 +- media/libvpx/libvpx/vp8/decoder/decodeframe.c | 28 +- media/libvpx/libvpx/vp8/decoder/decodemv.c | 7 +- media/libvpx/libvpx/vp8/decoder/decodemv.h | 6 +- .../libvpx/vp8/decoder/decoderthreading.h | 8 +- media/libvpx/libvpx/vp8/decoder/detokenize.h | 6 +- media/libvpx/libvpx/vp8/decoder/ec_types.h | 10 +- .../libvpx/vp8/decoder/error_concealment.c | 10 +- .../libvpx/vp8/decoder/error_concealment.h | 6 +- media/libvpx/libvpx/vp8/decoder/onyxd_if.c | 36 +- media/libvpx/libvpx/vp8/decoder/onyxd_int.h | 26 +- media/libvpx/libvpx/vp8/decoder/threading.c | 97 +- media/libvpx/libvpx/vp8/decoder/treereader.h | 8 +- .../vp8/encoder/arm/neon/fastquantizeb_neon.c | 12 +- .../vp8/encoder/arm/neon/shortfdct_neon.c | 2 + .../encoder/arm/neon/vp8_shortwalsh4x4_neon.c | 2 + media/libvpx/libvpx/vp8/encoder/bitstream.c | 159 +- media/libvpx/libvpx/vp8/encoder/bitstream.h | 6 +- media/libvpx/libvpx/vp8/encoder/block.h | 6 +- media/libvpx/libvpx/vp8/encoder/boolhuff.c | 26 +- media/libvpx/libvpx/vp8/encoder/boolhuff.h | 67 +- .../libvpx/vp8/{common => encoder}/copy_c.c | 0 .../libvpx/vp8/encoder/dct_value_cost.h | 6 +- .../libvpx/vp8/encoder/dct_value_tokens.h | 6 +- .../libvpx/vp8/encoder/defaultcoefcounts.h | 6 +- media/libvpx/libvpx/vp8/encoder/denoising.c | 47 +- media/libvpx/libvpx/vp8/encoder/denoising.h | 6 +- media/libvpx/libvpx/vp8/encoder/encodeframe.c | 6 +- media/libvpx/libvpx/vp8/encoder/encodeframe.h | 6 +- media/libvpx/libvpx/vp8/encoder/encodeintra.h | 6 +- media/libvpx/libvpx/vp8/encoder/encodemb.h | 6 +- media/libvpx/libvpx/vp8/encoder/encodemv.c | 11 - media/libvpx/libvpx/vp8/encoder/encodemv.h | 6 +- media/libvpx/libvpx/vp8/encoder/ethreading.h | 6 +- media/libvpx/libvpx/vp8/encoder/firstpass.c | 80 +- media/libvpx/libvpx/vp8/encoder/firstpass.h | 6 +- media/libvpx/libvpx/vp8/encoder/lookahead.h | 8 +- media/libvpx/libvpx/vp8/encoder/mcomp.c | 133 +- media/libvpx/libvpx/vp8/encoder/mcomp.h | 34 +- media/libvpx/libvpx/vp8/encoder/modecosts.h | 8 +- media/libvpx/libvpx/vp8/encoder/mr_dissim.h | 6 +- media/libvpx/libvpx/vp8/encoder/onyx_if.c | 163 +- media/libvpx/libvpx/vp8/encoder/onyx_int.h | 21 +- media/libvpx/libvpx/vp8/encoder/pickinter.c | 40 +- media/libvpx/libvpx/vp8/encoder/pickinter.h | 6 +- media/libvpx/libvpx/vp8/encoder/picklpf.c | 2 +- media/libvpx/libvpx/vp8/encoder/picklpf.h | 6 +- media/libvpx/libvpx/vp8/encoder/quantize.h | 6 +- media/libvpx/libvpx/vp8/encoder/ratectrl.c | 26 +- media/libvpx/libvpx/vp8/encoder/ratectrl.h | 6 +- media/libvpx/libvpx/vp8/encoder/rdopt.c | 33 +- media/libvpx/libvpx/vp8/encoder/rdopt.h | 24 +- .../libvpx/libvpx/vp8/encoder/segmentation.h | 6 +- .../libvpx/vp8/encoder/temporal_filter.c | 3 +- .../libvpx/vp8/encoder/temporal_filter.h | 6 +- media/libvpx/libvpx/vp8/encoder/tokenize.c | 70 - media/libvpx/libvpx/vp8/encoder/tokenize.h | 14 +- media/libvpx/libvpx/vp8/encoder/treewriter.h | 16 +- .../libvpx/libvpx/vp8/encoder/vp8_quantize.c | 2 - .../{encodeopt.asm => block_error_sse2.asm} | 0 .../vp8/{common => encoder}/x86/copy_sse2.asm | 0 .../vp8/{common => encoder}/x86/copy_sse3.asm | 0 .../libvpx/vp8/encoder/x86/quantize_sse4.c | 49 +- .../vp8/encoder/x86/vp8_quantize_ssse3.c | 6 +- media/libvpx/libvpx/vp8/vp8_common.mk | 15 +- media/libvpx/libvpx/vp8/vp8_cx_iface.c | 130 +- media/libvpx/libvpx/vp8/vp8_dx_iface.c | 69 +- media/libvpx/libvpx/vp8/vp8cx.mk | 5 +- .../arm/neon/vp9_highbd_iht16x16_add_neon.c | 446 + .../arm/neon/vp9_highbd_iht4x4_add_neon.c | 181 + .../arm/neon/vp9_highbd_iht8x8_add_neon.c | 345 + .../common/arm/neon/vp9_iht16x16_add_neon.c | 279 + .../vp9/common/arm/neon/vp9_iht4x4_add_neon.c | 229 +- .../vp9/common/arm/neon/vp9_iht8x8_add_neon.c | 542 +- .../libvpx/vp9/common/arm/neon/vp9_iht_neon.h | 272 + .../vp9/common/mips/msa/vp9_idct16x16_msa.c | 1 + .../vp9/common/mips/msa/vp9_idct4x4_msa.c | 1 + .../vp9/common/mips/msa/vp9_idct8x8_msa.c | 1 + .../libvpx/vp9/common/ppc/vp9_idct_vsx.c | 116 + .../libvpx/vp9/common/vp9_alloccommon.c | 23 +- .../libvpx/vp9/common/vp9_alloccommon.h | 13 +- media/libvpx/libvpx/vp9/common/vp9_blockd.h | 38 +- media/libvpx/libvpx/vp9/common/vp9_common.h | 24 +- .../libvpx/vp9/common/vp9_common_data.c | 2 +- .../libvpx/vp9/common/vp9_common_data.h | 6 +- media/libvpx/libvpx/vp9/common/vp9_entropy.c | 2 + media/libvpx/libvpx/vp9/common/vp9_entropy.h | 7 +- .../libvpx/vp9/common/vp9_entropymode.c | 65 +- .../libvpx/vp9/common/vp9_entropymode.h | 6 +- .../libvpx/libvpx/vp9/common/vp9_entropymv.c | 4 +- .../libvpx/libvpx/vp9/common/vp9_entropymv.h | 10 +- media/libvpx/libvpx/vp9/common/vp9_enums.h | 8 +- media/libvpx/libvpx/vp9/common/vp9_filter.c | 18 +- media/libvpx/libvpx/vp9/common/vp9_filter.h | 9 +- .../libvpx/vp9/common/vp9_frame_buffers.h | 6 +- media/libvpx/libvpx/vp9/common/vp9_idct.h | 6 +- .../libvpx/libvpx/vp9/common/vp9_loopfilter.c | 24 +- .../libvpx/libvpx/vp9/common/vp9_loopfilter.h | 10 +- media/libvpx/libvpx/vp9/common/vp9_mfqe.h | 6 +- media/libvpx/libvpx/vp9/common/vp9_mv.h | 8 +- .../libvpx/vp9/common/vp9_mvref_common.h | 10 +- .../libvpx/libvpx/vp9/common/vp9_onyxc_int.h | 60 +- media/libvpx/libvpx/vp9/common/vp9_postproc.c | 32 +- media/libvpx/libvpx/vp9/common/vp9_postproc.h | 16 +- media/libvpx/libvpx/vp9/common/vp9_ppflags.h | 6 +- .../libvpx/vp9/common/vp9_pred_common.c | 31 +- .../libvpx/vp9/common/vp9_pred_common.h | 16 +- .../libvpx/vp9/common/vp9_quant_common.h | 6 +- .../libvpx/libvpx/vp9/common/vp9_reconinter.c | 24 +- .../libvpx/libvpx/vp9/common/vp9_reconinter.h | 19 +- .../libvpx/libvpx/vp9/common/vp9_reconintra.h | 6 +- .../libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl | 45 +- media/libvpx/libvpx/vp9/common/vp9_scale.h | 10 +- media/libvpx/libvpx/vp9/common/vp9_scan.h | 6 +- .../libvpx/libvpx/vp9/common/vp9_seg_common.h | 6 +- .../libvpx/vp9/common/vp9_thread_common.c | 259 +- .../libvpx/vp9/common/vp9_thread_common.h | 32 +- .../libvpx/vp9/common/vp9_tile_common.h | 6 +- .../common/x86/vp9_highbd_iht16x16_add_sse4.c | 419 + .../common/x86/vp9_highbd_iht4x4_add_sse4.c | 131 + .../common/x86/vp9_highbd_iht8x8_add_sse4.c | 255 + .../vp9/common/x86/vp9_idct_intrin_sse2.c | 40 +- .../libvpx/vp9/decoder/vp9_decodeframe.c | 1134 +- .../libvpx/vp9/decoder/vp9_decodeframe.h | 6 +- .../libvpx/libvpx/vp9/decoder/vp9_decodemv.c | 97 +- .../libvpx/libvpx/vp9/decoder/vp9_decodemv.h | 6 +- media/libvpx/libvpx/vp9/decoder/vp9_decoder.c | 181 +- media/libvpx/libvpx/vp9/decoder/vp9_decoder.h | 65 +- .../libvpx/vp9/decoder/vp9_detokenize.c | 45 +- .../libvpx/vp9/decoder/vp9_detokenize.h | 6 +- media/libvpx/libvpx/vp9/decoder/vp9_dsubexp.h | 6 +- .../libvpx/libvpx/vp9/decoder/vp9_job_queue.c | 124 + .../libvpx/libvpx/vp9/decoder/vp9_job_queue.h | 45 + .../vp9/encoder/arm/neon/vp9_dct_neon.c | 35 - .../vp9/encoder/arm/neon/vp9_quantize_neon.c | 81 +- .../vp9/encoder/mips/msa/vp9_error_msa.c | 3 + .../vp9/encoder/mips/msa/vp9_fdct16x16_msa.c | 1 + .../vp9/encoder/mips/msa/vp9_fdct4x4_msa.c | 1 + .../vp9/encoder/mips/msa/vp9_fdct8x8_msa.c | 1 + .../vp9/encoder/mips/msa/vp9_fdct_msa.h | 6 +- .../libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c | 292 + .../libvpx/vp9/encoder/vp9_alt_ref_aq.h | 6 +- media/libvpx/libvpx/vp9/encoder/vp9_aq_360.h | 6 +- .../libvpx/vp9/encoder/vp9_aq_complexity.h | 6 +- .../libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c | 148 +- .../libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h | 14 +- .../libvpx/vp9/encoder/vp9_aq_variance.c | 56 +- .../libvpx/vp9/encoder/vp9_aq_variance.h | 10 +- .../libvpx/libvpx/vp9/encoder/vp9_bitstream.c | 83 +- .../libvpx/libvpx/vp9/encoder/vp9_bitstream.h | 14 +- media/libvpx/libvpx/vp9/encoder/vp9_block.h | 21 +- .../libvpx/vp9/encoder/vp9_blockiness.c | 1 + .../libvpx/vp9/encoder/vp9_blockiness.h | 26 + .../libvpx/vp9/encoder/vp9_context_tree.c | 28 +- .../libvpx/vp9/encoder/vp9_context_tree.h | 12 +- media/libvpx/libvpx/vp9/encoder/vp9_cost.h | 6 +- media/libvpx/libvpx/vp9/encoder/vp9_dct.c | 103 - .../libvpx/libvpx/vp9/encoder/vp9_denoiser.c | 231 +- .../libvpx/libvpx/vp9/encoder/vp9_denoiser.h | 30 +- .../libvpx/vp9/encoder/vp9_encodeframe.c | 1983 +++- .../libvpx/vp9/encoder/vp9_encodeframe.h | 11 +- .../libvpx/libvpx/vp9/encoder/vp9_encodemb.c | 174 +- .../libvpx/libvpx/vp9/encoder/vp9_encodemb.h | 14 +- .../libvpx/libvpx/vp9/encoder/vp9_encodemv.h | 8 +- media/libvpx/libvpx/vp9/encoder/vp9_encoder.c | 3084 ++++- media/libvpx/libvpx/vp9/encoder/vp9_encoder.h | 229 +- media/libvpx/libvpx/vp9/encoder/vp9_ethread.c | 69 +- media/libvpx/libvpx/vp9/encoder/vp9_ethread.h | 10 +- media/libvpx/libvpx/vp9/encoder/vp9_extend.h | 6 +- .../libvpx/libvpx/vp9/encoder/vp9_firstpass.c | 1956 ++-- .../libvpx/libvpx/vp9/encoder/vp9_firstpass.h | 87 +- .../libvpx/libvpx/vp9/encoder/vp9_job_queue.h | 6 +- .../libvpx/libvpx/vp9/encoder/vp9_lookahead.c | 21 +- .../libvpx/libvpx/vp9/encoder/vp9_lookahead.h | 43 +- media/libvpx/libvpx/vp9/encoder/vp9_mbgraph.c | 5 +- media/libvpx/libvpx/vp9/encoder/vp9_mbgraph.h | 10 +- media/libvpx/libvpx/vp9/encoder/vp9_mcomp.c | 888 +- media/libvpx/libvpx/vp9/encoder/vp9_mcomp.h | 74 +- .../libvpx/vp9/encoder/vp9_multi_thread.c | 50 +- .../libvpx/vp9/encoder/vp9_multi_thread.h | 9 +- .../libvpx/vp9/encoder/vp9_noise_estimate.c | 133 +- .../libvpx/vp9/encoder/vp9_noise_estimate.h | 9 +- .../libvpx/vp9/encoder/vp9_non_greedy_mv.c | 533 + .../libvpx/vp9/encoder/vp9_non_greedy_mv.h | 129 + .../libvpx/vp9/encoder/vp9_partition_models.h | 975 ++ media/libvpx/libvpx/vp9/encoder/vp9_picklpf.c | 30 +- media/libvpx/libvpx/vp9/encoder/vp9_picklpf.h | 6 +- .../libvpx/libvpx/vp9/encoder/vp9_pickmode.c | 1117 +- .../libvpx/libvpx/vp9/encoder/vp9_pickmode.h | 6 +- .../libvpx/libvpx/vp9/encoder/vp9_quantize.c | 26 +- .../libvpx/libvpx/vp9/encoder/vp9_quantize.h | 6 +- .../libvpx/libvpx/vp9/encoder/vp9_ratectrl.c | 1320 ++- .../libvpx/libvpx/vp9/encoder/vp9_ratectrl.h | 50 +- media/libvpx/libvpx/vp9/encoder/vp9_rd.c | 169 +- media/libvpx/libvpx/vp9/encoder/vp9_rd.h | 44 +- media/libvpx/libvpx/vp9/encoder/vp9_rdopt.c | 660 +- media/libvpx/libvpx/vp9/encoder/vp9_rdopt.h | 10 +- media/libvpx/libvpx/vp9/encoder/vp9_resize.c | 16 +- media/libvpx/libvpx/vp9/encoder/vp9_resize.h | 6 +- .../libvpx/vp9/encoder/vp9_segmentation.c | 54 + .../libvpx/vp9/encoder/vp9_segmentation.h | 11 +- .../libvpx/vp9/encoder/vp9_skin_detection.h | 6 +- .../libvpx/vp9/encoder/vp9_speed_features.c | 367 +- .../libvpx/vp9/encoder/vp9_speed_features.h | 152 +- media/libvpx/libvpx/vp9/encoder/vp9_subexp.c | 1 + media/libvpx/libvpx/vp9/encoder/vp9_subexp.h | 6 +- .../libvpx/vp9/encoder/vp9_svc_layercontext.c | 823 +- .../libvpx/vp9/encoder/vp9_svc_layercontext.h | 134 +- .../libvpx/vp9/encoder/vp9_temporal_filter.c | 837 +- .../libvpx/vp9/encoder/vp9_temporal_filter.h | 21 +- .../libvpx/libvpx/vp9/encoder/vp9_tokenize.h | 6 +- .../libvpx/vp9/encoder/vp9_treewriter.h | 6 +- .../encoder/x86/highbd_temporal_filter_sse4.c | 943 ++ .../encoder/x86/temporal_filter_constants.h | 410 + .../vp9/encoder/x86/temporal_filter_sse4.c | 1046 +- .../vp9/encoder/x86/vp9_dct_intrin_sse2.c | 452 +- .../libvpx/vp9/encoder/x86/vp9_dct_ssse3.c | 465 - .../encoder/x86/vp9_diamond_search_sad_avx.c | 12 +- .../libvpx/vp9/encoder/x86/vp9_error_sse2.asm | 4 +- .../x86/vp9_highbd_block_error_intrin_sse2.c | 19 +- .../vp9/encoder/x86/vp9_quantize_avx2.c | 139 + .../vp9/encoder/x86/vp9_quantize_sse2.c | 18 +- media/libvpx/libvpx/vp9/simple_encode.cc | 313 + media/libvpx/libvpx/vp9/simple_encode.h | 104 + media/libvpx/libvpx/vp9/vp9_common.mk | 45 +- media/libvpx/libvpx/vp9/vp9_cx_iface.c | 839 +- media/libvpx/libvpx/vp9/vp9_cx_iface.h | 48 + media/libvpx/libvpx/vp9/vp9_dx_iface.c | 44 +- media/libvpx/libvpx/vp9/vp9_dx_iface.h | 8 +- media/libvpx/libvpx/vp9/vp9_iface_common.c | 131 + media/libvpx/libvpx/vp9/vp9_iface_common.h | 143 +- media/libvpx/libvpx/vp9/vp9cx.mk | 30 +- media/libvpx/libvpx/vp9/vp9dx.mk | 2 + media/libvpx/libvpx/vpx/exports_spatial_svc | 6 - .../libvpx/vpx/internal/vpx_codec_internal.h | 6 +- media/libvpx/libvpx/vpx/src/vpx_encoder.c | 36 +- media/libvpx/libvpx/vpx/src/vpx_image.c | 21 +- media/libvpx/libvpx/vpx/vp8.h | 27 +- media/libvpx/libvpx/vpx/vp8cx.h | 242 +- media/libvpx/libvpx/vpx/vp8dx.h | 32 +- media/libvpx/libvpx/vpx/vpx_codec.h | 12 +- media/libvpx/libvpx/vpx/vpx_codec.mk | 4 - media/libvpx/libvpx/vpx/vpx_decoder.h | 6 +- media/libvpx/libvpx/vpx/vpx_encoder.h | 83 +- media/libvpx/libvpx/vpx/vpx_frame_buffer.h | 14 +- media/libvpx/libvpx/vpx/vpx_image.h | 43 +- media/libvpx/libvpx/vpx/vpx_integer.h | 39 +- media/libvpx/libvpx/vpx_dsp/add_noise.c | 2 + .../libvpx/libvpx/vpx_dsp/arm/avg_pred_neon.c | 46 +- .../libvpx/libvpx/vpx_dsp/arm/deblock_neon.c | 5 - media/libvpx/libvpx/vpx_dsp/arm/fdct_neon.c | 1 + .../libvpx/libvpx/vpx_dsp/arm/fwd_txfm_neon.c | 1 + .../vpx_dsp/arm/highbd_idct16x16_add_neon.c | 178 +- .../arm/highbd_idct32x32_1024_add_neon.c | 82 +- .../arm/highbd_idct32x32_135_add_neon.c | 1 + .../arm/highbd_idct32x32_34_add_neon.c | 1 + .../vpx_dsp/arm/highbd_idct4x4_add_neon.c | 130 +- .../vpx_dsp/arm/highbd_idct8x8_add_neon.c | 504 +- .../libvpx/vpx_dsp/arm/highbd_idct_neon.h | 474 + .../libvpx/vpx_dsp/arm/idct16x16_add_neon.c | 59 - .../vpx_dsp/arm/idct32x32_135_add_neon.c | 12 +- .../vpx_dsp/arm/idct32x32_34_add_neon.c | 12 +- .../libvpx/vpx_dsp/arm/idct4x4_add_neon.c | 45 +- .../libvpx/vpx_dsp/arm/idct8x8_add_neon.c | 112 +- media/libvpx/libvpx/vpx_dsp/arm/idct_neon.h | 769 +- .../libvpx/vpx_dsp/arm/intrapred_neon.c | 2 - .../libvpx/vpx_dsp/arm/loopfilter_8_neon.asm | 2 +- media/libvpx/libvpx/vpx_dsp/arm/mem_neon.h | 33 +- .../libvpx/libvpx/vpx_dsp/arm/quantize_neon.c | 135 +- media/libvpx/libvpx/vpx_dsp/arm/sad4d_neon.c | 478 +- media/libvpx/libvpx/vpx_dsp/arm/sad_neon.c | 273 +- .../libvpx/vpx_dsp/arm/subpel_variance_neon.c | 104 +- .../libvpx/libvpx/vpx_dsp/arm/subtract_neon.c | 84 +- media/libvpx/libvpx/vpx_dsp/arm/sum_neon.h | 15 +- .../libvpx/vpx_dsp/arm/sum_squares_neon.c | 85 + .../libvpx/vpx_dsp/arm/transpose_neon.h | 14 +- .../libvpx/libvpx/vpx_dsp/arm/variance_neon.c | 170 +- ..._convolve8_avg_horiz_filter_type1_neon.asm | 438 + ..._convolve8_avg_horiz_filter_type2_neon.asm | 439 + .../arm/vpx_convolve8_avg_neon_asm.asm | 295 - ...x_convolve8_avg_vert_filter_type1_neon.asm | 486 + ...x_convolve8_avg_vert_filter_type2_neon.asm | 487 + .../vpx_convolve8_horiz_filter_type1_neon.asm | 415 + .../vpx_convolve8_horiz_filter_type2_neon.asm | 415 + .../libvpx/vpx_dsp/arm/vpx_convolve8_neon.h | 5 + .../vpx_dsp/arm/vpx_convolve8_neon_asm.asm | 273 - .../vpx_dsp/arm/vpx_convolve8_neon_asm.c | 41 + .../vpx_dsp/arm/vpx_convolve8_neon_asm.h | 29 + .../vpx_convolve8_vert_filter_type1_neon.asm | 457 + .../vpx_convolve8_vert_filter_type2_neon.asm | 455 + .../libvpx/vpx_dsp/arm/vpx_convolve_neon.c | 5 +- media/libvpx/libvpx/vpx_dsp/avg.c | 204 + media/libvpx/libvpx/vpx_dsp/bitreader.h | 37 +- .../libvpx/libvpx/vpx_dsp/bitreader_buffer.c | 2 +- .../libvpx/libvpx/vpx_dsp/bitreader_buffer.h | 6 +- media/libvpx/libvpx/vpx_dsp/bitwriter.c | 11 + media/libvpx/libvpx/vpx_dsp/bitwriter.h | 34 +- .../libvpx/libvpx/vpx_dsp/bitwriter_buffer.h | 6 +- media/libvpx/libvpx/vpx_dsp/deblock.c | 43 +- media/libvpx/libvpx/vpx_dsp/fastssim.c | 50 +- media/libvpx/libvpx/vpx_dsp/fwd_txfm.c | 67 +- media/libvpx/libvpx/vpx_dsp/fwd_txfm.h | 6 +- media/libvpx/libvpx/vpx_dsp/inv_txfm.c | 40 +- media/libvpx/libvpx/vpx_dsp/inv_txfm.h | 7 +- media/libvpx/libvpx/vpx_dsp/loopfilter.c | 208 +- .../libvpx/vpx_dsp/mips/add_noise_msa.c | 4 +- media/libvpx/libvpx/vpx_dsp/mips/avg_msa.c | 3 + .../libvpx/libvpx/vpx_dsp/mips/common_dspr2.h | 6 +- .../libvpx/vpx_dsp/mips/convolve8_avg_dspr2.c | 3 +- .../vpx_dsp/mips/convolve8_avg_horiz_dspr2.c | 3 +- .../libvpx/vpx_dsp/mips/convolve8_dspr2.c | 4 +- .../vpx_dsp/mips/convolve8_horiz_dspr2.c | 2 +- .../vpx_dsp/mips/convolve8_vert_dspr2.c | 2 +- .../vpx_dsp/mips/convolve_common_dspr2.h | 6 +- .../libvpx/libvpx/vpx_dsp/mips/deblock_msa.c | 88 +- .../libvpx/vpx_dsp/mips/fwd_dct32x32_msa.c | 1 + .../libvpx/libvpx/vpx_dsp/mips/fwd_txfm_msa.h | 6 +- .../libvpx/vpx_dsp/mips/idct16x16_msa.c | 1 + .../libvpx/vpx_dsp/mips/idct32x32_msa.c | 1 + .../libvpx/libvpx/vpx_dsp/mips/idct4x4_msa.c | 1 + .../libvpx/libvpx/vpx_dsp/mips/idct8x8_msa.c | 1 + .../libvpx/vpx_dsp/mips/inv_txfm_dspr2.h | 7 +- .../libvpx/libvpx/vpx_dsp/mips/inv_txfm_msa.h | 6 +- .../vpx_dsp/mips/loopfilter_filters_dspr2.h | 6 +- .../vpx_dsp/mips/loopfilter_macros_dspr2.h | 6 +- .../vpx_dsp/mips/loopfilter_masks_dspr2.h | 6 +- .../libvpx/vpx_dsp/mips/loopfilter_msa.h | 6 +- media/libvpx/libvpx/vpx_dsp/mips/macros_msa.h | 6 +- media/libvpx/libvpx/vpx_dsp/mips/sad_mmi.c | 2 +- .../vpx_dsp/mips/sub_pixel_variance_msa.c | 61 +- .../libvpx/vpx_dsp/mips/txfm_macros_msa.h | 6 +- .../libvpx/libvpx/vpx_dsp/mips/variance_mmi.c | 639 +- .../libvpx/libvpx/vpx_dsp/mips/variance_msa.c | 5 +- .../mips/vpx_convolve8_avg_horiz_msa.c | 2 +- .../vpx_dsp/mips/vpx_convolve8_avg_msa.c | 8 +- .../vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c | 2 +- .../vpx_dsp/mips/vpx_convolve8_horiz_msa.c | 2 +- .../libvpx/vpx_dsp/mips/vpx_convolve8_mmi.c | 716 ++ .../libvpx/vpx_dsp/mips/vpx_convolve8_msa.c | 8 +- .../vpx_dsp/mips/vpx_convolve8_vert_msa.c | 2 +- .../libvpx/vpx_dsp/mips/vpx_convolve_msa.h | 6 +- media/libvpx/libvpx/vpx_dsp/postproc.h | 6 +- .../vpx_dsp/ppc/bitdepth_conversion_vsx.h | 6 +- media/libvpx/libvpx/vpx_dsp/ppc/deblock_vsx.c | 374 + .../libvpx/libvpx/vpx_dsp/ppc/fdct32x32_vsx.c | 553 + .../libvpx/libvpx/vpx_dsp/ppc/intrapred_vsx.c | 18 + .../libvpx/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c | 1231 +- .../libvpx/libvpx/vpx_dsp/ppc/inv_txfm_vsx.h | 48 + .../libvpx/libvpx/vpx_dsp/ppc/quantize_vsx.c | 305 + media/libvpx/libvpx/vpx_dsp/ppc/sad_vsx.c | 93 +- .../libvpx/libvpx/vpx_dsp/ppc/subtract_vsx.c | 117 + .../libvpx/libvpx/vpx_dsp/ppc/transpose_vsx.h | 38 +- .../libvpx/vpx_dsp/ppc/txfm_common_vsx.h | 90 + media/libvpx/libvpx/vpx_dsp/ppc/types_vsx.h | 50 +- .../libvpx/libvpx/vpx_dsp/ppc/variance_vsx.c | 198 +- .../libvpx/vpx_dsp/ppc/vpx_convolve_vsx.c | 96 +- media/libvpx/libvpx/vpx_dsp/prob.h | 8 +- media/libvpx/libvpx/vpx_dsp/psnr.c | 20 +- media/libvpx/libvpx/vpx_dsp/psnr.h | 43 +- media/libvpx/libvpx/vpx_dsp/psnrhvs.c | 18 +- media/libvpx/libvpx/vpx_dsp/quantize.c | 28 +- media/libvpx/libvpx/vpx_dsp/quantize.h | 23 +- media/libvpx/libvpx/vpx_dsp/sad.c | 128 +- media/libvpx/libvpx/vpx_dsp/skin_detection.h | 6 +- media/libvpx/libvpx/vpx_dsp/ssim.c | 16 +- media/libvpx/libvpx/vpx_dsp/ssim.h | 6 +- media/libvpx/libvpx/vpx_dsp/subtract.c | 28 +- media/libvpx/libvpx/vpx_dsp/sum_squares.c | 5 +- media/libvpx/libvpx/vpx_dsp/txfm_common.h | 6 +- media/libvpx/libvpx/vpx_dsp/variance.c | 563 +- media/libvpx/libvpx/vpx_dsp/variance.h | 48 +- media/libvpx/libvpx/vpx_dsp/vpx_convolve.h | 6 +- media/libvpx/libvpx/vpx_dsp/vpx_dsp.mk | 56 +- media/libvpx/libvpx/vpx_dsp/vpx_dsp_common.h | 14 +- .../libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl | 797 +- media/libvpx/libvpx/vpx_dsp/vpx_filter.h | 15 +- .../libvpx/vpx_dsp/x86/avg_intrin_avx2.c | 303 +- .../libvpx/vpx_dsp/x86/avg_intrin_sse2.c | 221 +- .../libvpx/libvpx/vpx_dsp/x86/avg_pred_sse2.c | 28 +- .../libvpx/vpx_dsp/x86/avg_ssse3_x86_64.asm | 2 +- .../vpx_dsp/x86/bitdepth_conversion_avx2.h | 6 +- .../vpx_dsp/x86/bitdepth_conversion_sse2.h | 6 +- media/libvpx/libvpx/vpx_dsp/x86/convolve.h | 150 +- .../libvpx/libvpx/vpx_dsp/x86/convolve_avx2.h | 63 +- .../libvpx/libvpx/vpx_dsp/x86/convolve_sse2.h | 88 + .../libvpx/vpx_dsp/x86/convolve_ssse3.h | 6 +- .../libvpx/vpx_dsp/x86/deblock_sse2.asm | 231 - .../vpx_dsp/x86/fwd_dct32x32_impl_avx2.h | 252 +- .../vpx_dsp/x86/fwd_dct32x32_impl_sse2.h | 258 +- .../libvpx/libvpx/vpx_dsp/x86/fwd_txfm_avx2.c | 3 + .../libvpx/vpx_dsp/x86/fwd_txfm_impl_sse2.h | 6 +- .../libvpx/libvpx/vpx_dsp/x86/fwd_txfm_sse2.h | 10 +- .../vpx_dsp/x86/fwd_txfm_ssse3_x86_64.asm | 2 +- .../libvpx/vpx_dsp/x86/highbd_convolve_avx2.c | 483 +- .../vpx_dsp/x86/highbd_idct16x16_add_sse4.c | 6 +- .../vpx_dsp/x86/highbd_idct4x4_add_sse2.c | 4 +- .../vpx_dsp/x86/highbd_idct4x4_add_sse4.c | 26 +- .../vpx_dsp/x86/highbd_idct8x8_add_sse2.c | 4 +- .../vpx_dsp/x86/highbd_idct8x8_add_sse4.c | 14 +- .../x86/highbd_intrapred_intrin_sse2.c | 3 +- .../x86/highbd_intrapred_intrin_ssse3.c | 6 +- .../vpx_dsp/x86/highbd_intrapred_sse2.asm | 16 +- .../libvpx/vpx_dsp/x86/highbd_inv_txfm_sse2.h | 10 +- .../libvpx/vpx_dsp/x86/highbd_inv_txfm_sse4.h | 31 +- .../vpx_dsp/x86/highbd_loopfilter_sse2.c | 366 +- .../vpx_dsp/x86/highbd_quantize_intrin_sse2.c | 1 + .../libvpx/vpx_dsp/x86/highbd_sad_sse2.asm | 4 +- .../x86/highbd_subpel_variance_impl_sse2.asm | 396 +- .../vpx_dsp/x86/highbd_variance_impl_sse2.asm | 16 +- .../libvpx/vpx_dsp/x86/highbd_variance_sse2.c | 93 +- .../libvpx/libvpx/vpx_dsp/x86/inv_txfm_sse2.c | 553 +- .../libvpx/libvpx/vpx_dsp/x86/inv_txfm_sse2.h | 9 +- .../libvpx/vpx_dsp/x86/inv_txfm_ssse3.h | 6 +- .../libvpx/vpx_dsp/x86/loopfilter_avx2.c | 198 +- .../vpx_dsp/x86/loopfilter_intrin_sse2.c | 569 +- media/libvpx/libvpx/vpx_dsp/x86/mem_sse2.h | 36 +- .../libvpx/vpx_dsp/x86/post_proc_sse2.c | 141 + .../libvpx/libvpx/vpx_dsp/x86/quantize_avx.c | 93 +- .../libvpx/libvpx/vpx_dsp/x86/quantize_sse2.c | 29 +- .../x86/{quantize_x86.h => quantize_sse2.h} | 34 +- .../libvpx/vpx_dsp/x86/quantize_ssse3.c | 90 +- .../libvpx/vpx_dsp/x86/quantize_ssse3.h | 51 + media/libvpx/libvpx/vpx_dsp/x86/sad4d_avx2.c | 299 +- .../libvpx/libvpx/vpx_dsp/x86/sad4d_avx512.c | 26 +- media/libvpx/libvpx/vpx_dsp/x86/sad_sse2.asm | 4 +- .../vpx_dsp/x86/subpel_variance_sse2.asm | 359 +- .../libvpx/vpx_dsp/x86/sum_squares_sse2.c | 190 +- .../libvpx/vpx_dsp/x86/transpose_sse2.h | 6 +- .../libvpx/vpx_dsp/x86/txfm_common_sse2.h | 6 +- .../libvpx/libvpx/vpx_dsp/x86/variance_avx2.c | 588 +- .../libvpx/libvpx/vpx_dsp/x86/variance_sse2.c | 646 +- .../libvpx/libvpx/vpx_dsp/x86/vpx_asm_stubs.c | 162 - .../vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm | 16 +- .../x86/vpx_high_subpixel_bilinear_sse2.asm | 14 +- .../vpx_dsp/x86/vpx_subpixel_4t_intrin_sse2.c | 1161 ++ .../vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c | 609 +- .../x86/vpx_subpixel_8t_intrin_ssse3.c | 532 +- .../vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm | 26 +- .../libvpx/vpx_mem/include/vpx_mem_intrnl.h | 6 +- media/libvpx/libvpx/vpx_mem/vpx_mem.c | 2 + media/libvpx/libvpx/vpx_mem/vpx_mem.h | 6 +- media/libvpx/libvpx/vpx_ports/arm.h | 6 +- media/libvpx/libvpx/vpx_ports/asmdefs_mmi.h | 6 +- media/libvpx/libvpx/vpx_ports/bitops.h | 6 +- .../libvpx/vpx_ports/emmintrin_compat.h | 6 +- media/libvpx/libvpx/vpx_ports/emms_mmx.asm | 18 + .../libvpx/vpx_ports/{config.h => emms_mmx.c} | 9 +- .../{emms.asm => float_control_word.asm} | 5 - media/libvpx/libvpx/vpx_ports/mem.h | 26 +- media/libvpx/libvpx/vpx_ports/mem_ops.h | 7 +- .../libvpx/libvpx/vpx_ports/mem_ops_aligned.h | 6 +- media/libvpx/libvpx/vpx_ports/msvc.h | 6 +- media/libvpx/libvpx/vpx_ports/ppc.h | 6 +- media/libvpx/libvpx/vpx_ports/system_state.h | 22 +- media/libvpx/libvpx/vpx_ports/vpx_once.h | 6 +- media/libvpx/libvpx/vpx_ports/vpx_ports.mk | 25 +- media/libvpx/libvpx/vpx_ports/vpx_timer.h | 6 +- media/libvpx/libvpx/vpx_ports/x86.h | 102 +- .../libvpx/vpx_scale/generic/gen_scalers.c | 4 +- .../libvpx/vpx_scale/generic/vpx_scale.c | 4 +- .../libvpx/vpx_scale/generic/yv12config.c | 57 +- media/libvpx/libvpx/vpx_scale/vpx_scale.h | 6 +- media/libvpx/libvpx/vpx_scale/yv12config.h | 10 +- media/libvpx/libvpx/vpx_util/endian_inl.h | 6 +- media/libvpx/libvpx/vpx_util/vpx_atomics.h | 18 +- media/libvpx/libvpx/vpx_util/vpx_debug_util.c | 282 + media/libvpx/libvpx/vpx_util/vpx_debug_util.h | 70 + media/libvpx/libvpx/vpx_util/vpx_thread.h | 29 +- media/libvpx/libvpx/vpx_util/vpx_timestamp.h | 45 + media/libvpx/libvpx/vpx_util/vpx_util.mk | 3 + .../libvpx/vpx_util/vpx_write_yuv_frame.c | 2 +- .../libvpx/vpx_util/vpx_write_yuv_frame.h | 6 +- media/libvpx/libvpx/vpxdec.c | 75 +- media/libvpx/libvpx/vpxenc.c | 370 +- media/libvpx/libvpx/vpxenc.h | 6 +- media/libvpx/libvpx/vpxstats.h | 6 +- media/libvpx/libvpx/warnings.h | 6 +- media/libvpx/libvpx/webmdec.h | 6 +- media/libvpx/libvpx/webmenc.h | 6 +- media/libvpx/libvpx/y4menc.c | 8 +- media/libvpx/libvpx/y4menc.h | 6 +- media/libvpx/libvpx/y4minput.c | 31 +- media/libvpx/libvpx/y4minput.h | 6 +- 857 files changed, 92005 insertions(+), 72710 deletions(-) create mode 100644 media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/cur_frame_16x16.txt create mode 100644 media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/estimation_16x16.txt create mode 100644 media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/exhaust_16x16.txt create mode 100644 media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/ground_truth_16x16.txt create mode 100644 media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/localVar_16x16.txt create mode 100644 media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/raw_1.png create mode 100644 media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/raw_1_12_12.png create mode 100644 media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/ref_frame_16x16.txt rename media/libvpx/libvpx/{vpx => examples}/svc_context.h (83%) rename media/libvpx/libvpx/{vpx/src => examples}/svc_encodeframe.c (85%) create mode 100644 media/libvpx/libvpx/examples/vpx_dec_fuzzer.cc create mode 100644 media/libvpx/libvpx/test/bench.cc create mode 100644 media/libvpx/libvpx/test/bench.h delete mode 100644 media/libvpx/libvpx/test/datarate_test.cc create mode 100644 media/libvpx/libvpx/test/decode_corrupted.cc create mode 100644 media/libvpx/libvpx/test/non_greedy_mv_test.cc create mode 100644 media/libvpx/libvpx/test/simple_encode_test.cc create mode 100644 media/libvpx/libvpx/test/svc_datarate_test.cc create mode 100644 media/libvpx/libvpx/test/svc_end_to_end_test.cc create mode 100644 media/libvpx/libvpx/test/svc_test.h delete mode 100644 media/libvpx/libvpx/test/temporal_filter_test.cc create mode 100644 media/libvpx/libvpx/test/timestamp_test.cc create mode 100644 media/libvpx/libvpx/test/vp8_datarate_test.cc create mode 100644 media/libvpx/libvpx/test/vp9_datarate_test.cc delete mode 100755 media/libvpx/libvpx/test/vp9_spatial_svc_encoder.sh create mode 100644 media/libvpx/libvpx/test/yuv_temporal_filter_test.cc create mode 100644 media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/README.md delete mode 100644 media/libvpx/libvpx/third_party/libwebm/AUTHORS.TXT delete mode 100644 media/libvpx/libvpx/third_party/libwebm/Android.mk delete mode 100644 media/libvpx/libvpx/third_party/libwebm/PATENTS.TXT delete mode 100644 media/libvpx/libvpx/third_party/libwebm/README.libvpx delete mode 100644 media/libvpx/libvpx/third_party/libwebm/common/file_util.cc delete mode 100644 media/libvpx/libvpx/third_party/libwebm/common/file_util.h delete mode 100644 media/libvpx/libvpx/third_party/libwebm/common/hdr_util.cc delete mode 100644 media/libvpx/libvpx/third_party/libwebm/common/hdr_util.h delete mode 100644 media/libvpx/libvpx/third_party/libwebm/common/webmids.h delete mode 100644 media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc delete mode 100644 media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.h delete mode 100644 media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxertypes.h delete mode 100644 media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc delete mode 100644 media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h delete mode 100644 media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.cc delete mode 100644 media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.h delete mode 100644 media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvparser.cc delete mode 100644 media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvparser.h delete mode 100644 media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvreader.cc delete mode 100644 media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvreader.h rename media/libvpx/libvpx/third_party/{libwebm/LICENSE.TXT => libyuv/LICENSE} (95%) create mode 100644 media/libvpx/libvpx/third_party/libyuv/include/libyuv/macros_msa.h create mode 100644 media/libvpx/libvpx/third_party/libyuv/source/compare_msa.cc delete mode 100644 media/libvpx/libvpx/third_party/libyuv/source/rotate_mips.cc create mode 100644 media/libvpx/libvpx/third_party/libyuv/source/rotate_msa.cc delete mode 100644 media/libvpx/libvpx/third_party/libyuv/source/row_mips.cc create mode 100644 media/libvpx/libvpx/third_party/libyuv/source/row_msa.cc delete mode 100644 media/libvpx/libvpx/third_party/libyuv/source/scale_mips.cc create mode 100644 media/libvpx/libvpx/third_party/libyuv/source/scale_msa.cc delete mode 100755 media/libvpx/libvpx/tools/cpplint.py delete mode 100644 media/libvpx/libvpx/tools/diff.py delete mode 100755 media/libvpx/libvpx/tools/gen_authors.sh delete mode 100755 media/libvpx/libvpx/tools/intersect-diffs.py delete mode 100755 media/libvpx/libvpx/tools/lint-hunks.py delete mode 100644 media/libvpx/libvpx/tools/tiny_ssim.c delete mode 100755 media/libvpx/libvpx/tools/wrap-commit-msg.py create mode 100644 media/libvpx/libvpx/vp8/common/arm/loopfilter_arm.h delete mode 100644 media/libvpx/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c delete mode 100644 media/libvpx/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c create mode 100644 media/libvpx/libvpx/vp8/common/x86/bilinear_filter_sse2.c delete mode 100644 media/libvpx/libvpx/vp8/common/x86/filter_x86.c delete mode 100644 media/libvpx/libvpx/vp8/common/x86/filter_x86.h rename media/libvpx/libvpx/vp8/{common => encoder}/copy_c.c (100%) rename media/libvpx/libvpx/vp8/encoder/x86/{encodeopt.asm => block_error_sse2.asm} (100%) rename media/libvpx/libvpx/vp8/{common => encoder}/x86/copy_sse2.asm (100%) rename media/libvpx/libvpx/vp8/{common => encoder}/x86/copy_sse3.asm (100%) create mode 100644 media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht16x16_add_neon.c create mode 100644 media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c create mode 100644 media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht8x8_add_neon.c create mode 100644 media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht16x16_add_neon.c create mode 100644 media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht_neon.h create mode 100644 media/libvpx/libvpx/vp9/common/ppc/vp9_idct_vsx.c create mode 100644 media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c create mode 100644 media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c create mode 100644 media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c create mode 100644 media/libvpx/libvpx/vp9/decoder/vp9_job_queue.c create mode 100644 media/libvpx/libvpx/vp9/decoder/vp9_job_queue.h delete mode 100644 media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c create mode 100644 media/libvpx/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c create mode 100644 media/libvpx/libvpx/vp9/encoder/vp9_blockiness.h create mode 100644 media/libvpx/libvpx/vp9/encoder/vp9_non_greedy_mv.c create mode 100644 media/libvpx/libvpx/vp9/encoder/vp9_non_greedy_mv.h create mode 100644 media/libvpx/libvpx/vp9/encoder/vp9_partition_models.h create mode 100644 media/libvpx/libvpx/vp9/encoder/x86/highbd_temporal_filter_sse4.c create mode 100644 media/libvpx/libvpx/vp9/encoder/x86/temporal_filter_constants.h delete mode 100644 media/libvpx/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c create mode 100644 media/libvpx/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c create mode 100644 media/libvpx/libvpx/vp9/simple_encode.cc create mode 100644 media/libvpx/libvpx/vp9/simple_encode.h create mode 100644 media/libvpx/libvpx/vp9/vp9_cx_iface.h create mode 100644 media/libvpx/libvpx/vp9/vp9_iface_common.c delete mode 100644 media/libvpx/libvpx/vpx/exports_spatial_svc create mode 100644 media/libvpx/libvpx/vpx_dsp/arm/highbd_idct_neon.h create mode 100644 media/libvpx/libvpx/vpx_dsp/arm/sum_squares_neon.c create mode 100644 media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_horiz_filter_type1_neon.asm create mode 100644 media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_horiz_filter_type2_neon.asm delete mode 100644 media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm create mode 100644 media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_vert_filter_type1_neon.asm create mode 100644 media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_vert_filter_type2_neon.asm create mode 100644 media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_horiz_filter_type1_neon.asm create mode 100644 media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_horiz_filter_type2_neon.asm delete mode 100644 media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.asm create mode 100644 media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.c create mode 100644 media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.h create mode 100644 media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_vert_filter_type1_neon.asm create mode 100644 media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_vert_filter_type2_neon.asm create mode 100644 media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_mmi.c create mode 100644 media/libvpx/libvpx/vpx_dsp/ppc/deblock_vsx.c create mode 100644 media/libvpx/libvpx/vpx_dsp/ppc/fdct32x32_vsx.c create mode 100644 media/libvpx/libvpx/vpx_dsp/ppc/inv_txfm_vsx.h create mode 100644 media/libvpx/libvpx/vpx_dsp/ppc/quantize_vsx.c create mode 100644 media/libvpx/libvpx/vpx_dsp/ppc/subtract_vsx.c create mode 100644 media/libvpx/libvpx/vpx_dsp/ppc/txfm_common_vsx.h create mode 100644 media/libvpx/libvpx/vpx_dsp/x86/convolve_sse2.h create mode 100644 media/libvpx/libvpx/vpx_dsp/x86/post_proc_sse2.c rename media/libvpx/libvpx/vpx_dsp/x86/{quantize_x86.h => quantize_sse2.h} (70%) create mode 100644 media/libvpx/libvpx/vpx_dsp/x86/quantize_ssse3.h delete mode 100644 media/libvpx/libvpx/vpx_dsp/x86/vpx_asm_stubs.c create mode 100644 media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_4t_intrin_sse2.c create mode 100644 media/libvpx/libvpx/vpx_ports/emms_mmx.asm rename media/libvpx/libvpx/vpx_ports/{config.h => emms_mmx.c} (66%) rename media/libvpx/libvpx/vpx_ports/{emms.asm => float_control_word.asm} (90%) create mode 100644 media/libvpx/libvpx/vpx_util/vpx_debug_util.c create mode 100644 media/libvpx/libvpx/vpx_util/vpx_debug_util.h create mode 100644 media/libvpx/libvpx/vpx_util/vpx_timestamp.h diff --git a/media/libvpx/README_MOZILLA b/media/libvpx/README_MOZILLA index 12808985256c..7aec599a6538 100644 --- a/media/libvpx/README_MOZILLA +++ b/media/libvpx/README_MOZILLA @@ -8,4 +8,4 @@ The libvpx git repository is: https://chromium.googlesource.com/webm/libvpx -The git commit ID used was f80be22a1099b2a431c2796f529bb261064ec6b4 +The git commit ID used was 7ec7a33a081aeeb53fed1a8d87e4cbd189152527 diff --git a/media/libvpx/libvpx/.clang-format b/media/libvpx/libvpx/.clang-format index c1483199e04f..866b7e2117f0 100644 --- a/media/libvpx/libvpx/.clang-format +++ b/media/libvpx/libvpx/.clang-format @@ -1,12 +1,12 @@ --- Language: Cpp # BasedOnStyle: Google -# Generated with clang-format 4.0.1 +# Generated with clang-format 7.0.1 AccessModifierOffset: -1 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false -AlignEscapedNewlinesLeft: true +AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true @@ -30,17 +30,25 @@ BraceWrapping: AfterObjCDeclaration: false AfterStruct: false AfterUnion: false + AfterExternBlock: false BeforeCatch: false BeforeElse: false IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true BreakBeforeBinaryOperators: None BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true ColumnLimit: 80 CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 @@ -48,8 +56,15 @@ Cpp11BracedListStyle: false DerivePointerAlignment: false DisableFormat: false ExperimentalAutoDetectBinPacking: false -ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Preserve IncludeCategories: + - Regex: '^' + Priority: 2 - Regex: '^<.*\.h>' Priority: 1 - Regex: '^<.*' @@ -58,6 +73,7 @@ IncludeCategories: Priority: 3 IncludeIsMainRegex: '([-_](test|unittest))?$' IndentCaseLabels: true +IndentPPDirectives: None IndentWidth: 2 IndentWrappedFunctionNames: false JavaScriptQuotes: Leave @@ -67,22 +83,58 @@ MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None +ObjCBinPackProtocolList: Never ObjCBlockIndentWidth: 2 ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: false +PenaltyBreakAssignment: 2 PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 +PenaltyBreakTemplateDeclaration: 10 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 200 PointerAlignment: Right +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + BasedOnStyle: google + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + CanonicalDelimiter: '' + BasedOnStyle: google ReflowComments: true SortIncludes: false +SortUsingDeclarations: true SpaceAfterCStyleCast: false SpaceAfterTemplateKeyword: true SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 2 SpacesInAngles: false diff --git a/media/libvpx/libvpx/.mailmap b/media/libvpx/libvpx/.mailmap index 29af51065043..8d9750099477 100644 --- a/media/libvpx/libvpx/.mailmap +++ b/media/libvpx/libvpx/.mailmap @@ -1,12 +1,17 @@ Adrian Grange -Aℓex Converse -Aℓex Converse +Aℓex Converse +Aℓex Converse +Aℓex Converse Alexis Ballier Alpha Lam +Angie Chiang Chris Cunningham +Chi Yo Tsai Daniele Castagna Deb Mukherjee +Elliott Karpilovsky Erik Niemeyer +Fyodor Kyslov Guillaume Martres Hangyu Kuang Hui Su @@ -20,6 +25,8 @@ John Koleszar Joshua Litt Marco Paniconi Marco Paniconi +Martin Storsjö +Michael Horowitz Pascal Massimino Paul Wilkins Peter Boström @@ -28,6 +35,7 @@ Peter de Rivaz Ralph Giles Ralph Giles Ronald S. Bultje +Sai Deng Sami Pietilä Shiyou Yin Tamar Levy @@ -40,3 +48,6 @@ Urvang Joshi Yaowu Xu Yaowu Xu Yaowu Xu +Venkatarama NG. Avadhani +Vitaly Buka +xiwei gu diff --git a/media/libvpx/libvpx/AUTHORS b/media/libvpx/libvpx/AUTHORS index 04c287243296..3eb03e92335d 100644 --- a/media/libvpx/libvpx/AUTHORS +++ b/media/libvpx/libvpx/AUTHORS @@ -4,12 +4,13 @@ Aaron Watry Abo Talib Mahfoodh Adrian Grange -Aℓex Converse Ahmad Sharif +Aidan Welch Aleksey Vasenev Alexander Potapenko Alexander Voronov Alexandra Hájková +Aℓex Converse Alexis Ballier Alok Ahuja Alpha Lam @@ -19,18 +20,22 @@ Andoni Morales Alastruey Andres Mejia Andrew Lewis Andrew Russell +Angie Chen Angie Chiang Aron Rosenberg Attila Nagy +Birk Magnussen Brion Vibber changjun.yang Charles 'Buck' Krasic Cheng Chen +Chi Yo Tsai chm Chris Cunningham Christian Duvivier Daniele Castagna Daniel Kang +Dan Zhu Deb Mukherjee Deepa K G Dim Temp @@ -38,11 +43,13 @@ Dmitry Kovalev Dragan Mrdjan Ed Baker Ehsan Akhgari +Elliott Karpilovsky Erik Niemeyer Fabio Pedretti Frank Galligan Fredrik Söderquist Fritz Koenig +Fyodor Kyslov Gabriel Marin Gaute Strokkenes Geza Lore @@ -55,7 +62,9 @@ Guillermo Ballester Valor Hangyu Kuang Hanno Böck Han Shen +Harish Mahendrakar Henrik Lundin +Hien Ho Hui Su Ivan Krasin Ivan Maltz @@ -81,6 +90,7 @@ Johann Koenig John Koleszar Johnny Klonaris John Stark +Jon Kunkee Joshua Bleecher Snyder Joshua Litt Julia Robson @@ -91,15 +101,19 @@ KO Myung-Hun Kyle Siefring Lawrence Velázquez Linfeng Zhang +Liu Peng Lou Quillio Luca Barbato +Luc Trudeau Makoto Kato Mans Rullgard Marco Paniconi Mark Mentovai Martin Ettl -Martin Storsjo +Martin Storsjö Matthew Heaney +Matthias Räncker +Michael Horowitz Michael Kohler Mike Frysinger Mike Hommey @@ -107,10 +121,12 @@ Mikhal Shemer Min Chen Minghai Shang Min Ye +Mirko Bonadei Moriyoshi Koizumi Morton Jonuschat Nathan E. Egge Nico Weber +Niveditha Rau Parag Salasakar Pascal Massimino Patrik Westin @@ -129,9 +145,13 @@ Rafael de Lucena Valle Rahul Chaudhry Ralph Giles Ranjit Kumar Tulabandu +Raphael Kubo da Costa +Ravi Chaudhary +Ritu Baldwa Rob Bradford Ronald S. Bultje Rui Ueyama +Sai Deng Sami Pietilä Sarah Parker Sasi Inguva @@ -139,12 +159,15 @@ Scott Graham Scott LaVarnway Sean McGovern Sergey Kolomenkin +Sergey Silkin Sergey Ulanov Shimon Doodkin Shiyou Yin +Shubham Tandle Shunyao Li Stefan Holmer Suman Sunkara +Supradeep T R Sylvestre Ledru Taekhyun Kim Takanori MATSUURA @@ -157,11 +180,17 @@ Timothy B. Terriberry Tom Finegan Tristan Matthews Urvang Joshi +Venkatarama NG. Avadhani Vignesh Venkatasubramanian +Vitaly Buka Vlad Tsyrklevich +Wan-Teh Chang +xiwei gu Yaowu Xu Yi Luo Yongzhe Wang +Yue Chen +Yun Liu Yunqing Wang Yury Gitman Zoe Liu diff --git a/media/libvpx/libvpx/CHANGELOG b/media/libvpx/libvpx/CHANGELOG index 2281394c8ed8..0717e35bdb30 100644 --- a/media/libvpx/libvpx/CHANGELOG +++ b/media/libvpx/libvpx/CHANGELOG @@ -1,4 +1,69 @@ -2017-01-04 v1.7.0 "Mandarin Duck" +2019-12-09 v1.8.2 "Pekin Duck" + This release collects incremental improvements to many aspects of the library. + + - Upgrading: + ARCH_* defines have been removed in favor of VPX_ARCH_*. + +2019-07-15 v1.8.1 "Orpington Duck" + This release collects incremental improvements to many aspects of the library. + + - Upgrading: + VP8E_SET_CPUUSED now accepts values up to 9 for vp9. + VPX_CTRL_VP9E_SET_MAX_INTER_BITRATE_PCT had a spelling fix (was VP8E). + The --sdk-path option has been removed. If you were using it to build for + Android please read build/make/Android.mk for alternatives. + All PPC optimizations have been disabled: + https://bugs.chromium.org/p/webm/issues/detail?id=1522. + + - Enhancements: + Various changes to improve encoder rate control, quality and speed + for practically every use case. + + - Bug fixes: + vp9-rtc: Fix color artifacts for speed >= 8. + +2019-01-31 v1.8.0 "Northern Shoveler Duck" + This release focused on encoding performance for realtime and VOD use cases. + + - Upgrading: + This adds and improves several vp9 controls. Most are related to SVC: + VP9E_SET_SVC_FRAME_DROP_LAYER: + - Frame dropping in SVC. + VP9E_SET_SVC_INTER_LAYER_PRED: + - Inter-layer prediction in SVC. + VP9E_SET_SVC_GF_TEMPORAL_REF: + - Enable long term temporal reference in SVC. + VP9E_SET_SVC_REF_FRAME_CONFIG/VP9E_GET_SVC_REF_FRAME_CONFIG: + - Extend and improve this control for better flexibility in setting SVC + pattern dynamically. + VP9E_SET_POSTENCODE_DROP: + - Allow for post-encode frame dropping (applies to non-SVC too). + VP9E_SET_SVC_SPATIAL_LAYER_SYNC: + - Enable spatial layer sync frames. + VP9E_SET_SVC_LAYER_ID: + - Extend api to specify temporal id for each spatial layers. + VP9E_SET_ROI_MAP: + - Extend Region of Interest functionality to VP9. + + - Enhancements: + 2 pass vp9 encoding has improved substantially. When using --auto-alt-ref=6, + we see approximately 8% for VBR and 10% for CQ. When using --auto-alt-ref=1, + the gains are approximately 4% for VBR and 5% for CQ. + + For real-time encoding, speed 7 has improved by ~5-10%. Encodes targeted at + screen sharing have improved when the content changes significantly (slide + sharing) or scrolls. There is a new speed 9 setting for mobile devices which + is about 10-20% faster than speed 8. + + - Bug fixes: + VP9 denoiser issue. + VP9 partition issue for 1080p. + VP9 rate control improvments. + Postprocessing Multi Frame Quality Enhancement (MFQE) issue. + VP8 multithread decoder issues. + A variety of fuzzing issues. + +2018-01-04 v1.7.0 "Mandarin Duck" This release focused on high bit depth performance (10/12 bit) and vp9 encoding improvements. diff --git a/media/libvpx/libvpx/README b/media/libvpx/libvpx/README index 73304dd62f4a..fd3337daad1f 100644 --- a/media/libvpx/libvpx/README +++ b/media/libvpx/libvpx/README @@ -1,4 +1,4 @@ -README - 24 January 2018 +README - 9 December 2019 Welcome to the WebM VP8/VP9 Codec SDK! @@ -9,22 +9,26 @@ COMPILING THE APPLICATIONS/LIBRARIES: 1. Prerequisites - * All x86 targets require the Yasm[1] assembler be installed. - * All Windows builds require that Cygwin[2] be installed. - * Building the documentation requires Doxygen[3]. If you do not + * All x86 targets require the Yasm[1] assembler be installed[2]. + * All Windows builds require that Cygwin[3] be installed. + * Building the documentation requires Doxygen[4]. If you do not have this package, the install-docs option will be disabled. - * Downloading the data for the unit tests requires curl[4] and sha1sum. + * Downloading the data for the unit tests requires curl[5] and sha1sum. sha1sum is provided via the GNU coreutils, installed by default on many *nix platforms, as well as MinGW and Cygwin. If coreutils is not available, a compatible version of sha1sum can be built from - source[5]. These requirements are optional if not running the unit + source[6]. These requirements are optional if not running the unit tests. [1]: http://www.tortall.net/projects/yasm - [2]: http://www.cygwin.com - [3]: http://www.doxygen.org - [4]: http://curl.haxx.se - [5]: http://www.microbrew.org/tools/md5sha1sum/ + [2]: For Visual Studio the base yasm binary (not vsyasm) should be in the + PATH for Visual Studio. For VS2017 it is sufficient to rename + yasm--.exe to yasm.exe and place it in: + Program Files (x86)/Microsoft Visual Studio/2017//Common7/Tools/ + [3]: http://www.cygwin.com + [4]: http://www.doxygen.org + [5]: http://curl.haxx.se + [6]: http://www.microbrew.org/tools/md5sha1sum/ 2. Out-of-tree builds Out of tree builds are a supported method of building the application. For @@ -41,7 +45,16 @@ COMPILING THE APPLICATIONS/LIBRARIES: used to get a list of supported options: $ ../libvpx/configure --help - 4. Cross development + 4. Compiler analyzers + Compilers have added sanitizers which instrument binaries with information + about address calculation, memory usage, threading, undefined behavior, and + other common errors. To simplify building libvpx with some of these features + use tools/set_analyzer_env.sh before running configure. It will set the + compiler and necessary flags for building as well as environment variables + read by the analyzer when testing the binaries. + $ source ../libvpx/tools/set_analyzer_env.sh address + + 5. Cross development For cross development, the most notable option is the --target option. The most up-to-date list of supported targets can be found at the bottom of the --help output of the configure script. As of this writing, the list of @@ -50,20 +63,20 @@ COMPILING THE APPLICATIONS/LIBRARIES: arm64-android-gcc arm64-darwin-gcc arm64-linux-gcc + arm64-win64-gcc + arm64-win64-vs15 armv7-android-gcc armv7-darwin-gcc armv7-linux-rvct armv7-linux-gcc armv7-none-rvct - armv7-win32-vs11 - armv7-win32-vs12 + armv7-win32-gcc armv7-win32-vs14 armv7-win32-vs15 armv7s-darwin-gcc armv8-linux-gcc mips32-linux-gcc mips64-linux-gcc - ppc64-linux-gcc ppc64le-linux-gcc sparc-solaris-gcc x86-android-gcc @@ -78,17 +91,16 @@ COMPILING THE APPLICATIONS/LIBRARIES: x86-darwin14-gcc x86-darwin15-gcc x86-darwin16-gcc + x86-darwin17-gcc x86-iphonesimulator-gcc x86-linux-gcc x86-linux-icc x86-os2-gcc x86-solaris-gcc x86-win32-gcc - x86-win32-vs10 - x86-win32-vs11 - x86-win32-vs12 x86-win32-vs14 x86-win32-vs15 + x86-win32-vs16 x86_64-android-gcc x86_64-darwin9-gcc x86_64-darwin10-gcc @@ -98,16 +110,16 @@ COMPILING THE APPLICATIONS/LIBRARIES: x86_64-darwin14-gcc x86_64-darwin15-gcc x86_64-darwin16-gcc + x86_64-darwin17-gcc + x86_64-darwin18-gcc x86_64-iphonesimulator-gcc x86_64-linux-gcc x86_64-linux-icc x86_64-solaris-gcc x86_64-win64-gcc - x86_64-win64-vs10 - x86_64-win64-vs11 - x86_64-win64-vs12 x86_64-win64-vs14 x86_64-win64-vs15 + x86_64-win64-vs16 generic-gnu The generic-gnu target, in conjunction with the CROSS environment variable, @@ -123,7 +135,7 @@ COMPILING THE APPLICATIONS/LIBRARIES: environment variables: CC, AR, LD, AS, STRIP, NM. Additional flags can be passed to these executables with CFLAGS, LDFLAGS, and ASFLAGS. - 5. Configuration errors + 6. Configuration errors If the configuration step fails, the first step is to look in the error log. This defaults to config.log. This should give a good indication of what went wrong. If not, contact us for support. diff --git a/media/libvpx/libvpx/args.h b/media/libvpx/libvpx/args.h index 54abe04607d9..aae8ec06a539 100644 --- a/media/libvpx/libvpx/args.h +++ b/media/libvpx/libvpx/args.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef ARGS_H_ -#define ARGS_H_ +#ifndef VPX_ARGS_H_ +#define VPX_ARGS_H_ #include #ifdef __cplusplus @@ -60,4 +60,4 @@ int arg_parse_enum_or_int(const struct arg *arg); } // extern "C" #endif -#endif // ARGS_H_ +#endif // VPX_ARGS_H_ diff --git a/media/libvpx/libvpx/build/make/Android.mk b/media/libvpx/libvpx/build/make/Android.mk index a88f90056e49..6cb3af027b4e 100644 --- a/media/libvpx/libvpx/build/make/Android.mk +++ b/media/libvpx/libvpx/build/make/Android.mk @@ -14,7 +14,7 @@ # Run the configure script from the jni directory. Base libvpx # encoder/decoder configuration will look similar to: # ./libvpx/configure --target=armv7-android-gcc --disable-examples \ -# --sdk-path=/opt/android-ndk-r6b/ +# --enable-external-build # # When targeting Android, realtime-only is enabled by default. This can # be overridden by adding the command line flag: @@ -29,37 +29,20 @@ # include $(CLEAR_VARS) # include jni/libvpx/build/make/Android.mk # -# By default libvpx will detect at runtime the existance of NEON extension. -# For this we import the 'cpufeatures' module from the NDK sources. -# libvpx can also be configured without this runtime detection method. -# Configuring with --disable-runtime-cpu-detect will assume presence of NEON. -# Configuring with --disable-runtime-cpu-detect --disable-neon \ -# --disable-neon-asm -# will remove any NEON dependency. +# By default libvpx will use the 'cpufeatures' module from the NDK. This allows +# the library to be built with all available optimizations (SSE2->AVX512 for +# x86, NEON for arm, DSPr2 for mips). This can be disabled with +# --disable-runtime-cpu-detect +# but the resulting library *must* be run on devices supporting all of the +# enabled extensions. They can be disabled individually with +# --disable-{sse2, sse3, ssse3, sse4_1, avx, avx2, avx512} +# --disable-neon[-asm] +# --disable-{dspr2, msa} # # Running ndk-build will build libvpx and include it in your project. # -# Alternatively, building the examples and unit tests can be accomplished in the -# following way: -# -# Create a standalone toolchain from the NDK: -# https://developer.android.com/ndk/guides/standalone_toolchain.html -# -# For example - to test on arm64 devices with clang: -# $NDK/build/tools/make_standalone_toolchain.py \ -# --arch arm64 --install-dir=/tmp/my-android-toolchain -# export PATH=/tmp/my-android-toolchain/bin:$PATH -# CROSS=aarch64-linux-android- CC=clang CXX=clang++ /path/to/libvpx/configure \ -# --target=arm64-android-gcc -# -# Push the resulting binaries to a device and run them: -# adb push test_libvpx /data/tmp/test_libvpx -# adb shell /data/tmp/test_libvpx --gtest_filter=\*Sixtap\* -# -# Make sure to push the test data as well and set LIBVPX_TEST_DATA - CONFIG_DIR := $(LOCAL_PATH)/ LIBVPX_PATH := $(LOCAL_PATH)/libvpx ASM_CNV_PATH_LOCAL := $(TARGET_ARCH_ABI)/ads2gas diff --git a/media/libvpx/libvpx/build/make/Makefile b/media/libvpx/libvpx/build/make/Makefile index f6b3f0630f14..be2810229577 100644 --- a/media/libvpx/libvpx/build/make/Makefile +++ b/media/libvpx/libvpx/build/make/Makefile @@ -99,6 +99,7 @@ distclean: clean rm -f Makefile; \ rm -f config.log config.mk; \ rm -f vpx_config.[hc] vpx_config.asm; \ + rm -f arm_neon.h; \ else \ rm -f $(target)-$(TOOLCHAIN).mk; \ fi @@ -429,10 +430,10 @@ ifneq ($(call enabled,DIST-SRCS),) DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_vcxproj.sh DIST-SRCS-$(CONFIG_MSVS) += build/make/msvs_common.sh DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh - DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas.pl - DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas_apple.pl - DIST-SRCS-$(ARCH_ARM) += build/make/ads2armasm_ms.pl - DIST-SRCS-$(ARCH_ARM) += build/make/thumb.pm + DIST-SRCS-$(VPX_ARCH_ARM) += build/make/ads2gas.pl + DIST-SRCS-$(VPX_ARCH_ARM) += build/make/ads2gas_apple.pl + DIST-SRCS-$(VPX_ARCH_ARM) += build/make/ads2armasm_ms.pl + DIST-SRCS-$(VPX_ARCH_ARM) += build/make/thumb.pm DIST-SRCS-yes += $(target:-$(TOOLCHAIN)=).mk endif INSTALL-SRCS := $(call cond_enabled,CONFIG_INSTALL_SRCS,INSTALL-SRCS) diff --git a/media/libvpx/libvpx/build/make/ads2gas.pl b/media/libvpx/libvpx/build/make/ads2gas.pl index 029cc4a56f21..b6a8f53eae23 100755 --- a/media/libvpx/libvpx/build/make/ads2gas.pl +++ b/media/libvpx/libvpx/build/make/ads2gas.pl @@ -23,16 +23,17 @@ use lib $FindBin::Bin; use thumb; my $thumb = 0; +my $elf = 1; foreach my $arg (@ARGV) { $thumb = 1 if ($arg eq "-thumb"); + $elf = 0 if ($arg eq "-noelf"); } print "@ This file was created from a .asm file\n"; print "@ using the ads2gas.pl script.\n"; -print "\t.equ DO1STROUNDING, 0\n"; +print "\t.syntax unified\n"; if ($thumb) { - print "\t.syntax unified\n"; print "\t.thumb\n"; } @@ -140,7 +141,11 @@ while () # Make function visible to linker, and make additional symbol with # prepended underscore - s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/; + if ($elf) { + s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/; + } else { + s/EXPORT\s+\|([\$\w]*)\|/.global $1/; + } s/IMPORT\s+\|([\$\w]*)\|/.global $1/; s/EXPORT\s+([\$\w]*)/.global $1/; @@ -181,11 +186,16 @@ while () # eabi_attributes numerical equivalents can be found in the # "ARM IHI 0045C" document. - # REQUIRE8 Stack is required to be 8-byte aligned - s/\sREQUIRE8/.eabi_attribute 24, 1 \@Tag_ABI_align_needed/g; + if ($elf) { + # REQUIRE8 Stack is required to be 8-byte aligned + s/\sREQUIRE8/.eabi_attribute 24, 1 \@Tag_ABI_align_needed/g; - # PRESERVE8 Stack 8-byte align is preserved - s/\sPRESERVE8/.eabi_attribute 25, 1 \@Tag_ABI_align_preserved/g; + # PRESERVE8 Stack 8-byte align is preserved + s/\sPRESERVE8/.eabi_attribute 25, 1 \@Tag_ABI_align_preserved/g; + } else { + s/\sREQUIRE8//; + s/\sPRESERVE8//; + } # Use PROC and ENDP to give the symbols a .size directive. # This makes them show up properly in debugging tools like gdb and valgrind. @@ -202,7 +212,7 @@ while () my $proc; s/\bENDP\b/@ $&/; $proc = pop(@proc_stack); - $_ = "\t.size $proc, .-$proc".$_ if ($proc); + $_ = "\t.size $proc, .-$proc".$_ if ($proc and $elf); } # EQU directive @@ -225,4 +235,4 @@ while () } # Mark that this object doesn't need an executable stack. -printf ("\t.section\t.note.GNU-stack,\"\",\%\%progbits\n"); +printf ("\t.section\t.note.GNU-stack,\"\",\%\%progbits\n") if $elf; diff --git a/media/libvpx/libvpx/build/make/ads2gas_apple.pl b/media/libvpx/libvpx/build/make/ads2gas_apple.pl index e1ae7b4f8711..848872fa7d1f 100755 --- a/media/libvpx/libvpx/build/make/ads2gas_apple.pl +++ b/media/libvpx/libvpx/build/make/ads2gas_apple.pl @@ -20,9 +20,7 @@ print "@ This file was created from a .asm file\n"; print "@ using the ads2gas_apple.pl script.\n\n"; -print "\t.set WIDE_REFERENCE, 0\n"; -print "\t.set ARCHITECTURE, 5\n"; -print "\t.set DO1STROUNDING, 0\n"; +print "\t.syntax unified\n"; my %register_aliases; my %macro_aliases; diff --git a/media/libvpx/libvpx/build/make/configure.sh b/media/libvpx/libvpx/build/make/configure.sh index 683b4303749c..d05d0fa12a64 100644 --- a/media/libvpx/libvpx/build/make/configure.sh +++ b/media/libvpx/libvpx/build/make/configure.sh @@ -319,6 +319,12 @@ check_ld() { && check_cmd ${LD} ${LDFLAGS} "$@" -o ${TMP_X} ${TMP_O} ${extralibs} } +check_lib() { + log check_lib "$@" + check_cc $@ \ + && check_cmd ${LD} ${LDFLAGS} -o ${TMP_X} ${TMP_O} "$@" ${extralibs} +} + check_header(){ log check_header "$@" header=$1 @@ -420,6 +426,26 @@ check_gcc_machine_options() { fi } +check_gcc_avx512_compiles() { + if disabled gcc; then + return + fi + + check_cc -mavx512f < +void f(void) { + __m512i x = _mm512_set1_epi16(0); + (void)x; +} +EOF + compile_result=$? + if [ ${compile_result} -ne 0 ]; then + log_echo " disabling avx512: not supported by compiler" + disable_feature avx512 + RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx512 " + fi +} + write_common_config_banner() { print_webm_license config.mk "##" "" echo '# This file automatically generated by configure. Do not edit!' >> config.mk @@ -481,6 +507,7 @@ AS_SFX = ${AS_SFX:-.asm} EXE_SFX = ${EXE_SFX} VCPROJ_SFX = ${VCPROJ_SFX} RTCD_OPTIONS = ${RTCD_OPTIONS} +LIBYUV_CXXFLAGS = ${LIBYUV_CXXFLAGS} EOF if enabled rvct; then cat >> $1 << EOF @@ -491,10 +518,10 @@ fmt_deps = sed -e 's;^\([a-zA-Z0-9_]*\)\.o;\${@:.d=.o} \$@;' EOF fi - print_config_mk ARCH "${1}" ${ARCH_LIST} - print_config_mk HAVE "${1}" ${HAVE_LIST} - print_config_mk CONFIG "${1}" ${CONFIG_LIST} - print_config_mk HAVE "${1}" gnu_strip + print_config_mk VPX_ARCH "${1}" ${ARCH_LIST} + print_config_mk HAVE "${1}" ${HAVE_LIST} + print_config_mk CONFIG "${1}" ${CONFIG_LIST} + print_config_mk HAVE "${1}" gnu_strip enabled msvs && echo "CONFIG_VS_VERSION=${vs_version}" >> "${1}" @@ -511,15 +538,33 @@ write_common_target_config_h() { #define RESTRICT ${RESTRICT} #define INLINE ${INLINE} EOF - print_config_h ARCH "${TMP_H}" ${ARCH_LIST} - print_config_h HAVE "${TMP_H}" ${HAVE_LIST} - print_config_h CONFIG "${TMP_H}" ${CONFIG_LIST} - print_config_vars_h "${TMP_H}" ${VAR_LIST} + print_config_h VPX_ARCH "${TMP_H}" ${ARCH_LIST} + print_config_h HAVE "${TMP_H}" ${HAVE_LIST} + print_config_h CONFIG "${TMP_H}" ${CONFIG_LIST} + print_config_vars_h "${TMP_H}" ${VAR_LIST} echo "#endif /* VPX_CONFIG_H */" >> ${TMP_H} mkdir -p `dirname "$1"` cmp "$1" ${TMP_H} >/dev/null 2>&1 || mv ${TMP_H} "$1" } +write_win_arm64_neon_h_workaround() { + print_webm_license ${TMP_H} "/*" " */" + cat >> ${TMP_H} << EOF +/* This file automatically generated by configure. Do not edit! */ +#ifndef VPX_WIN_ARM_NEON_H_WORKAROUND +#define VPX_WIN_ARM_NEON_H_WORKAROUND +/* The Windows SDK has arm_neon.h, but unlike on other platforms it is + * ARM32-only. ARM64 NEON support is provided by arm64_neon.h, a proper + * superset of arm_neon.h. Work around this by providing a more local + * arm_neon.h that simply #includes arm64_neon.h. + */ +#include +#endif /* VPX_WIN_ARM_NEON_H_WORKAROUND */ +EOF + mkdir -p `dirname "$1"` + cmp "$1" ${TMP_H} >/dev/null 2>&1 || mv ${TMP_H} "$1" +} + process_common_cmdline() { for opt in "$@"; do optval="${opt#*=}" @@ -602,11 +647,7 @@ process_common_cmdline() { --libdir=*) libdir="${optval}" ;; - --sdk-path=*) - [ -d "${optval}" ] || die "Not a directory: ${optval}" - sdk_path="${optval}" - ;; - --libc|--as|--prefix|--libdir|--sdk-path) + --libc|--as|--prefix|--libdir) die "Option ${opt} requires argument" ;; --help|-h) @@ -713,11 +754,8 @@ process_common_toolchain() { *sparc*) tgt_isa=sparc ;; - power*64*-*) - tgt_isa=ppc64 - ;; - power*) - tgt_isa=ppc + power*64le*-*) + tgt_isa=ppc64le ;; *mips64el*) tgt_isa=mips64 @@ -729,33 +767,9 @@ process_common_toolchain() { # detect tgt_os case "$gcctarget" in - *darwin10*) + *darwin1[0-8]*) tgt_isa=x86_64 - tgt_os=darwin10 - ;; - *darwin11*) - tgt_isa=x86_64 - tgt_os=darwin11 - ;; - *darwin12*) - tgt_isa=x86_64 - tgt_os=darwin12 - ;; - *darwin13*) - tgt_isa=x86_64 - tgt_os=darwin13 - ;; - *darwin14*) - tgt_isa=x86_64 - tgt_os=darwin14 - ;; - *darwin15*) - tgt_isa=x86_64 - tgt_os=darwin15 - ;; - *darwin16*) - tgt_isa=x86_64 - tgt_os=darwin16 + tgt_os=`echo $gcctarget | sed 's/.*\(darwin1[0-8]\).*/\1/'` ;; x86_64*mingw32*) tgt_os=win64 @@ -825,7 +839,7 @@ process_common_toolchain() { IOS_VERSION_MIN="8.0" else IOS_VERSION_OPTIONS="" - IOS_VERSION_MIN="6.0" + IOS_VERSION_MIN="7.0" fi # Handle darwin variants. Newer SDKs allow targeting older @@ -885,6 +899,14 @@ process_common_toolchain() { add_cflags "-mmacosx-version-min=10.12" add_ldflags "-mmacosx-version-min=10.12" ;; + *-darwin17-*) + add_cflags "-mmacosx-version-min=10.13" + add_ldflags "-mmacosx-version-min=10.13" + ;; + *-darwin18-*) + add_cflags "-mmacosx-version-min=10.14" + add_ldflags "-mmacosx-version-min=10.14" + ;; *-iphonesimulator-*) add_cflags "-miphoneos-version-min=${IOS_VERSION_MIN}" add_ldflags "-miphoneos-version-min=${IOS_VERSION_MIN}" @@ -933,7 +955,6 @@ process_common_toolchain() { setup_gnu_toolchain arch_int=${tgt_isa##armv} arch_int=${arch_int%%te} - check_add_asflags --defsym ARCHITECTURE=${arch_int} tune_cflags="-mtune=" if [ ${tgt_isa} = "armv7" ] || [ ${tgt_isa} = "armv7s" ]; then if [ -z "${float_abi}" ]; then @@ -960,6 +981,16 @@ EOF enabled debug && add_asflags -g asm_conversion_cmd="${source_path}/build/make/ads2gas.pl" + + case ${tgt_os} in + win*) + asm_conversion_cmd="$asm_conversion_cmd -noelf" + AS="$CC -c" + EXE_SFX=.exe + enable_feature thumb + ;; + esac + if enabled thumb; then asm_conversion_cmd="$asm_conversion_cmd -thumb" check_add_cflags -mthumb @@ -967,18 +998,41 @@ EOF fi ;; vs*) - asm_conversion_cmd="${source_path}/build/make/ads2armasm_ms.pl" - AS_SFX=.S - msvs_arch_dir=arm-msvs - disable_feature multithread - disable_feature unit_tests - vs_version=${tgt_cc##vs} - if [ $vs_version -ge 12 ]; then - # MSVC 2013 doesn't allow doing plain .exe projects for ARM, - # only "AppContainerApplication" which requires an AppxManifest. - # Therefore disable the examples, just build the library. - disable_feature examples - disable_feature tools + # A number of ARM-based Windows platforms are constrained by their + # respective SDKs' limitations. Fortunately, these are all 32-bit ABIs + # and so can be selected as 'win32'. + if [ ${tgt_os} = "win32" ]; then + asm_conversion_cmd="${source_path}/build/make/ads2armasm_ms.pl" + AS_SFX=.S + msvs_arch_dir=arm-msvs + disable_feature multithread + disable_feature unit_tests + if [ ${tgt_cc##vs} -ge 12 ]; then + # MSVC 2013 doesn't allow doing plain .exe projects for ARM32, + # only "AppContainerApplication" which requires an AppxManifest. + # Therefore disable the examples, just build the library. + disable_feature examples + disable_feature tools + fi + else + # Windows 10 on ARM, on the other hand, has full Windows SDK support + # for building Win32 ARM64 applications in addition to ARM64 + # Windows Store apps. It is the only 64-bit ARM ABI that + # Windows supports, so it is the default definition of 'win64'. + # ARM64 build support officially shipped in Visual Studio 15.9.0. + + # Because the ARM64 Windows SDK's arm_neon.h is ARM32-specific + # while LLVM's is not, probe its validity. + if enabled neon; then + if [ -n "${CC}" ]; then + check_header arm_neon.h || check_header arm64_neon.h && \ + enable_feature win_arm64_neon_h_workaround + else + # If a probe is not possible, assume this is the pure Windows + # SDK and so the workaround is necessary. + enable_feature win_arm64_neon_h_workaround + fi + fi fi ;; rvct) @@ -1006,7 +1060,6 @@ EOF fi arch_int=${tgt_isa##armv} arch_int=${arch_int%%te} - check_add_asflags --pd "\"ARCHITECTURE SETA ${arch_int}\"" enabled debug && add_asflags -g add_cflags --gnu add_cflags --enum_is_int @@ -1021,109 +1074,70 @@ EOF ;; android*) - if [ -n "${sdk_path}" ]; then - SDK_PATH=${sdk_path} - COMPILER_LOCATION=`find "${SDK_PATH}" \ - -name "arm-linux-androideabi-gcc*" -print -quit` - TOOLCHAIN_PATH=${COMPILER_LOCATION%/*}/arm-linux-androideabi- - CC=${TOOLCHAIN_PATH}gcc - CXX=${TOOLCHAIN_PATH}g++ - AR=${TOOLCHAIN_PATH}ar - LD=${TOOLCHAIN_PATH}gcc - AS=${TOOLCHAIN_PATH}as - STRIP=${TOOLCHAIN_PATH}strip - NM=${TOOLCHAIN_PATH}nm - - if [ -z "${alt_libc}" ]; then - alt_libc=`find "${SDK_PATH}" -name arch-arm -print | \ - awk '{n = split($0,a,"/"); \ - split(a[n-1],b,"-"); \ - print $0 " " b[2]}' | \ - sort -g -k 2 | \ - awk '{ print $1 }' | tail -1` - fi - - if [ -d "${alt_libc}" ]; then - add_cflags "--sysroot=${alt_libc}" - add_ldflags "--sysroot=${alt_libc}" - fi - - # linker flag that routes around a CPU bug in some - # Cortex-A8 implementations (NDK Dev Guide) - add_ldflags "-Wl,--fix-cortex-a8" - - enable_feature pic - soft_enable realtime_only - if [ ${tgt_isa} = "armv7" ]; then - soft_enable runtime_cpu_detect - fi - if enabled runtime_cpu_detect; then - add_cflags "-I${SDK_PATH}/sources/android/cpufeatures" - fi - else - echo "Assuming standalone build with NDK toolchain." - echo "See build/make/Android.mk for details." - check_add_ldflags -static - soft_enable unit_tests - fi + echo "Assuming standalone build with NDK toolchain." + echo "See build/make/Android.mk for details." + check_add_ldflags -static + soft_enable unit_tests ;; darwin*) - XCRUN_FIND="xcrun --sdk iphoneos --find" - CXX="$(${XCRUN_FIND} clang++)" - CC="$(${XCRUN_FIND} clang)" - AR="$(${XCRUN_FIND} ar)" - AS="$(${XCRUN_FIND} as)" - STRIP="$(${XCRUN_FIND} strip)" - NM="$(${XCRUN_FIND} nm)" - RANLIB="$(${XCRUN_FIND} ranlib)" - AS_SFX=.S - LD="${CXX:-$(${XCRUN_FIND} ld)}" + if ! enabled external_build; then + XCRUN_FIND="xcrun --sdk iphoneos --find" + CXX="$(${XCRUN_FIND} clang++)" + CC="$(${XCRUN_FIND} clang)" + AR="$(${XCRUN_FIND} ar)" + AS="$(${XCRUN_FIND} as)" + STRIP="$(${XCRUN_FIND} strip)" + NM="$(${XCRUN_FIND} nm)" + RANLIB="$(${XCRUN_FIND} ranlib)" + AS_SFX=.S + LD="${CXX:-$(${XCRUN_FIND} ld)}" - # ASFLAGS is written here instead of using check_add_asflags - # because we need to overwrite all of ASFLAGS and purge the - # options that were put in above - ASFLAGS="-arch ${tgt_isa} -g" + # ASFLAGS is written here instead of using check_add_asflags + # because we need to overwrite all of ASFLAGS and purge the + # options that were put in above + ASFLAGS="-arch ${tgt_isa} -g" - add_cflags -arch ${tgt_isa} - add_ldflags -arch ${tgt_isa} + add_cflags -arch ${tgt_isa} + add_ldflags -arch ${tgt_isa} - alt_libc="$(show_darwin_sdk_path iphoneos)" - if [ -d "${alt_libc}" ]; then - add_cflags -isysroot ${alt_libc} - fi + alt_libc="$(show_darwin_sdk_path iphoneos)" + if [ -d "${alt_libc}" ]; then + add_cflags -isysroot ${alt_libc} + fi - if [ "${LD}" = "${CXX}" ]; then - add_ldflags -miphoneos-version-min="${IOS_VERSION_MIN}" - else - add_ldflags -ios_version_min "${IOS_VERSION_MIN}" - fi + if [ "${LD}" = "${CXX}" ]; then + add_ldflags -miphoneos-version-min="${IOS_VERSION_MIN}" + else + add_ldflags -ios_version_min "${IOS_VERSION_MIN}" + fi - for d in lib usr/lib usr/lib/system; do - try_dir="${alt_libc}/${d}" - [ -d "${try_dir}" ] && add_ldflags -L"${try_dir}" - done + for d in lib usr/lib usr/lib/system; do + try_dir="${alt_libc}/${d}" + [ -d "${try_dir}" ] && add_ldflags -L"${try_dir}" + done - case ${tgt_isa} in - armv7|armv7s|armv8|arm64) - if enabled neon && ! check_xcode_minimum_version; then - soft_disable neon - log_echo " neon disabled: upgrade Xcode (need v6.3+)." - if enabled neon_asm; then - soft_disable neon_asm - log_echo " neon_asm disabled: upgrade Xcode (need v6.3+)." + case ${tgt_isa} in + armv7|armv7s|armv8|arm64) + if enabled neon && ! check_xcode_minimum_version; then + soft_disable neon + log_echo " neon disabled: upgrade Xcode (need v6.3+)." + if enabled neon_asm; then + soft_disable neon_asm + log_echo " neon_asm disabled: upgrade Xcode (need v6.3+)." + fi fi - fi - ;; - esac + ;; + esac + + if [ "$(show_darwin_sdk_major_version iphoneos)" -gt 8 ]; then + check_add_cflags -fembed-bitcode + check_add_asflags -fembed-bitcode + check_add_ldflags -fembed-bitcode + fi + fi asm_conversion_cmd="${source_path}/build/make/ads2gas_apple.pl" - - if [ "$(show_darwin_sdk_major_version iphoneos)" -gt 8 ]; then - check_add_cflags -fembed-bitcode - check_add_asflags -fembed-bitcode - check_add_ldflags -fembed-bitcode - fi ;; linux*) @@ -1180,6 +1194,11 @@ EOF esac if enabled msa; then + # TODO(libyuv:793) + # The new mips functions in libyuv do not build + # with the toolchains we currently use for testing. + soft_disable libyuv + add_cflags -mmsa add_asflags -mmsa add_ldflags -mmsa @@ -1195,13 +1214,25 @@ EOF check_add_asflags -march=${tgt_isa} check_add_asflags -KPIC ;; - ppc*) + ppc64le*) link_with_cc=gcc setup_gnu_toolchain - check_gcc_machine_option "vsx" + # Do not enable vsx by default. + # https://bugs.chromium.org/p/webm/issues/detail?id=1522 + enabled vsx || RTCD_OPTIONS="${RTCD_OPTIONS}--disable-vsx " + if [ -n "${tune_cpu}" ]; then + case ${tune_cpu} in + power?) + tune_cflags="-mcpu=" + ;; + esac + fi ;; x86*) case ${tgt_os} in + android) + soft_enable realtime_only + ;; win*) enabled gcc && add_cflags -fno-common ;; @@ -1253,28 +1284,13 @@ EOF # Skip the check by setting AS arbitrarily AS=msvs msvs_arch_dir=x86-msvs - vc_version=${tgt_cc##vs} - case $vc_version in - 7|8|9|10|11|12|13|14) + case ${tgt_cc##vs} in + 14) echo "${tgt_cc} does not support avx512, disabling....." RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx512 " soft_disable avx512 ;; esac - case $vc_version in - 7|8|9|10) - echo "${tgt_cc} does not support avx/avx2, disabling....." - RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx --disable-avx2 " - soft_disable avx - soft_disable avx2 - ;; - esac - case $vc_version in - 7|8|9) - echo "${tgt_cc} omits stdint.h, disabling webm-io..." - soft_disable webm_io - ;; - esac ;; esac @@ -1307,16 +1323,12 @@ EOF else if [ "$ext" = "avx512" ]; then check_gcc_machine_options $ext avx512f avx512cd avx512bw avx512dq avx512vl + check_gcc_avx512_compiles else # use the shortened version for the flag: sse4_1 -> sse4 check_gcc_machine_option ${ext%_*} $ext fi fi - - # https://bugs.chromium.org/p/webm/issues/detail?id=1464 - # The assembly optimizations for vpx_sub_pixel_variance do not link with - # gcc 6. - enabled sse2 && soft_enable pic done if enabled external_build; then @@ -1376,7 +1388,8 @@ EOF add_cflags ${sim_arch} add_ldflags ${sim_arch} - if [ "$(show_darwin_sdk_major_version iphonesimulator)" -gt 8 ]; then + if [ "$(disabled external_build)" ] && + [ "$(show_darwin_sdk_major_version iphonesimulator)" -gt 8 ]; then # yasm v1.3.0 doesn't know what -fembed-bitcode means, so turning it # on is pointless (unless building a C-only lib). Warn the user, but # do nothing here. @@ -1466,7 +1479,11 @@ EOF # bionic includes basic pthread functionality, obviating -lpthread. ;; *) - check_header pthread.h && add_extralibs -lpthread + check_header pthread.h && check_lib -lpthread < +#include +int main(void) { return pthread_create(NULL, NULL, NULL, NULL); } +EOF ;; esac fi diff --git a/media/libvpx/libvpx/build/make/gen_msvs_sln.sh b/media/libvpx/libvpx/build/make/gen_msvs_sln.sh index 401223a0b56c..d1adfd749c66 100755 --- a/media/libvpx/libvpx/build/make/gen_msvs_sln.sh +++ b/media/libvpx/libvpx/build/make/gen_msvs_sln.sh @@ -25,7 +25,7 @@ files. Options: --help Print this message --out=outfile Redirect output to a file - --ver=version Version (7,8,9,10,11,12,14,15) of visual studio to generate for + --ver=version Version (14-16) of visual studio to generate for --target=isa-os-cc Target specifier EOF exit 1 @@ -213,13 +213,14 @@ for opt in "$@"; do ;; --dep=*) eval "${optval%%:*}_deps=\"\${${optval%%:*}_deps} ${optval##*:}\"" ;; - --ver=*) vs_ver="$optval" - case $optval in - 10|11|12|14|15) - ;; - *) die Unrecognized Visual Studio Version in $opt - ;; - esac + --ver=*) + vs_ver="$optval" + case $optval in + 14) vs_year=2015 ;; + 15) vs_year=2017 ;; + 16) vs_year=2019 ;; + *) die Unrecognized Visual Studio Version in $opt ;; + esac ;; --target=*) target="${optval}" ;; @@ -230,21 +231,11 @@ for opt in "$@"; do done outfile=${outfile:-/dev/stdout} mkoutfile=${mkoutfile:-/dev/stdout} -case "${vs_ver:-10}" in - 10) sln_vers="11.00" - sln_vers_str="Visual Studio 2010" - ;; - 11) sln_vers="12.00" - sln_vers_str="Visual Studio 2012" - ;; - 12) sln_vers="12.00" - sln_vers_str="Visual Studio 2013" - ;; - 14) sln_vers="12.00" - sln_vers_str="Visual Studio 2015" - ;; - 15) sln_vers="12.00" - sln_vers_str="Visual Studio 2017" +case "${vs_ver}" in + 1[4-6]) + # VS has used Format Version 12.00 continuously since vs11. + sln_vers="12.00" + sln_vers_str="Visual Studio ${vs_year}" ;; esac sfx=vcxproj diff --git a/media/libvpx/libvpx/build/make/gen_msvs_vcxproj.sh b/media/libvpx/libvpx/build/make/gen_msvs_vcxproj.sh index 171d0b99b6e8..bb1c31d230da 100755 --- a/media/libvpx/libvpx/build/make/gen_msvs_vcxproj.sh +++ b/media/libvpx/libvpx/build/make/gen_msvs_vcxproj.sh @@ -34,7 +34,7 @@ Options: --name=project_name Name of the project (required) --proj-guid=GUID GUID to use for the project --module-def=filename File containing export definitions (for DLLs) - --ver=version Version (10,11,12,14,15) of visual studio to generate for + --ver=version Version (14-16) of visual studio to generate for --src-path-bare=dir Path to root of source tree -Ipath/to/include Additional include directories -DFLAG[=value] Preprocessor macros to define @@ -82,7 +82,7 @@ generate_filter() { | sed -e "s,$src_path_bare,," \ -e 's/^[\./]\+//g' -e 's,[:/ ],_,g') - if ([ "$pat" == "asm" ] || [ "$pat" == "s" ] || [ "$pat" == "S" ]) && $asm_use_custom_step; then + if ([ "$pat" == "asm" ] || [ "$pat" == "s" ] || [ "$pat" == "S" ]) && $uses_asm; then # Avoid object file name collisions, i.e. vpx_config.c and # vpx_config.asm produce the same object file without # this additional suffix. @@ -168,7 +168,7 @@ for opt in "$@"; do --ver=*) vs_ver="$optval" case "$optval" in - 10|11|12|14|15) + 1[4-6]) ;; *) die Unrecognized Visual Studio Version in $opt ;; @@ -215,13 +215,7 @@ fix_file_list file_list outfile=${outfile:-/dev/stdout} guid=${guid:-`generate_uuid`} -asm_use_custom_step=false uses_asm=${uses_asm:-false} -case "${vs_ver:-11}" in - 10|11|12|14|15) - asm_use_custom_step=$uses_asm - ;; -esac [ -n "$name" ] || die "Project name (--name) must be specified!" [ -n "$target" ] || die "Target (--target) must be specified!" @@ -261,6 +255,11 @@ case "$target" in asm_Debug_cmdline="yasm -Xvc -g cv8 -f win32 ${yasmincs} "%(FullPath)"" asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} "%(FullPath)"" ;; + arm64*) + platforms[0]="ARM64" + asm_Debug_cmdline="armasm64 -nologo -oldit "%(FullPath)"" + asm_Release_cmdline="armasm64 -nologo -oldit "%(FullPath)"" + ;; arm*) platforms[0]="ARM" asm_Debug_cmdline="armasm -nologo -oldit "%(FullPath)"" @@ -307,6 +306,16 @@ generate_vcxproj() { tag_content ApplicationType "Windows Store" tag_content ApplicationTypeRevision 8.1 fi + if [ "${platforms[0]}" = "ARM64" ]; then + # Require the first Visual Studio version to have ARM64 support. + tag_content MinimumVisualStudioVersion 15.9 + fi + if [ $vs_ver -eq 15 ] && [ "${platforms[0]}" = "ARM64" ]; then + # Since VS 15 does not have a 'use latest SDK version' facility, + # specifically require the contemporaneous SDK with official ARM64 + # support. + tag_content WindowsTargetPlatformVersion 10.0.17763.0 + fi close_tag PropertyGroup tag Import \ @@ -324,32 +333,15 @@ generate_vcxproj() { else tag_content ConfigurationType StaticLibrary fi - if [ "$vs_ver" = "11" ]; then - if [ "$plat" = "ARM" ]; then - # Setting the wp80 toolchain automatically sets the - # WINAPI_FAMILY define, which is required for building - # code for arm with the windows headers. Alternatively, - # one could add AppContainerApplication=true in the Globals - # section and add PrecompiledHeader=NotUsing and - # CompileAsWinRT=false in ClCompile and SubSystem=Console - # in Link. - tag_content PlatformToolset v110_wp80 - else - tag_content PlatformToolset v110 - fi - fi - if [ "$vs_ver" = "12" ]; then - # Setting a PlatformToolset indicating windows phone isn't - # enough to build code for arm with MSVC 2013, one strictly - # has to enable AppContainerApplication as well. - tag_content PlatformToolset v120 - fi if [ "$vs_ver" = "14" ]; then tag_content PlatformToolset v140 fi if [ "$vs_ver" = "15" ]; then tag_content PlatformToolset v141 fi + if [ "$vs_ver" = "16" ]; then + tag_content PlatformToolset v142 + fi tag_content CharacterSet Unicode if [ "$config" = "Release" ]; then tag_content WholeProgramOptimization true diff --git a/media/libvpx/libvpx/build/make/iosbuild.sh b/media/libvpx/libvpx/build/make/iosbuild.sh index 365a8c013068..978ffbbb9887 100755 --- a/media/libvpx/libvpx/build/make/iosbuild.sh +++ b/media/libvpx/libvpx/build/make/iosbuild.sh @@ -132,7 +132,8 @@ create_vpx_framework_config_shim() { done # Consume the last line of output from the loop: We don't want it. - sed -i '' -e '$d' "${config_file}" + sed -i.bak -e '$d' "${config_file}" + rm "${config_file}.bak" printf "#endif\n\n" >> "${config_file}" printf "#endif // ${include_guard}" >> "${config_file}" @@ -244,7 +245,7 @@ build_framework() { # Trap function. Cleans up the subtree used to build all targets contained in # $TARGETS. cleanup() { - local readonly res=$? + local res=$? cd "${ORIG_PWD}" if [ $res -ne 0 ]; then @@ -350,7 +351,7 @@ if [ "$ENABLE_SHARED" = "yes" ]; then IOS_VERSION_MIN="8.0" else IOS_VERSION_OPTIONS="" - IOS_VERSION_MIN="6.0" + IOS_VERSION_MIN="7.0" fi if [ "${VERBOSE}" = "yes" ]; then diff --git a/media/libvpx/libvpx/build/make/msvs_common.sh b/media/libvpx/libvpx/build/make/msvs_common.sh index 88f1cf9b5703..27ddf7fd91d7 100644 --- a/media/libvpx/libvpx/build/make/msvs_common.sh +++ b/media/libvpx/libvpx/build/make/msvs_common.sh @@ -41,6 +41,15 @@ fix_path() { # Corrects the paths in file_list in one pass for efficiency. # $1 is the name of the array to be modified. fix_file_list() { + if [ "${FIXPATH}" = "echo_path" ] ; then + # When used with echo_path, fix_file_list is a no-op. Avoid warning about + # unsupported 'declare -n' when it is not important. + return 0 + elif [ "${BASH_VERSINFO}" -lt 4 ] ; then + echo "Cygwin path conversion has failed. Please use a version of bash" + echo "which supports nameref (-n), introduced in bash 4.3" + return 1 + fi declare -n array_ref=$1 files=$(fix_path "${array_ref[@]}") local IFS=$'\n' diff --git a/media/libvpx/libvpx/build/make/rtcd.pl b/media/libvpx/libvpx/build/make/rtcd.pl index 68e92b52cc57..7483200411a7 100755 --- a/media/libvpx/libvpx/build/make/rtcd.pl +++ b/media/libvpx/libvpx/build/make/rtcd.pl @@ -400,12 +400,13 @@ EOF # &require("c"); +&require(keys %required); if ($opts{arch} eq 'x86') { @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2 avx512/); x86; } elsif ($opts{arch} eq 'x86_64') { @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2 avx512/); - @REQUIRES = filter(keys %required ? keys %required : qw/mmx sse sse2/); + @REQUIRES = filter(qw/mmx sse sse2/); &require(@REQUIRES); x86; } elsif ($opts{arch} eq 'mips32' || $opts{arch} eq 'mips64') { @@ -433,6 +434,7 @@ if ($opts{arch} eq 'x86') { arm; } elsif ($opts{arch} eq 'armv8' || $opts{arch} eq 'arm64' ) { @ALL_ARCHS = filter(qw/neon/); + &require("neon"); arm; } elsif ($opts{arch} =~ /^ppc/ ) { @ALL_ARCHS = filter(qw/vsx/); diff --git a/media/libvpx/libvpx/build/make/thumb.pm b/media/libvpx/libvpx/build/make/thumb.pm index 483c2539c68a..9c49e2d8b724 100644 --- a/media/libvpx/libvpx/build/make/thumb.pm +++ b/media/libvpx/libvpx/build/make/thumb.pm @@ -54,13 +54,6 @@ sub FixThumbInstructions($$) # "addne r0, r0, r2". s/^(\s*)((ldr|str)(ne)?[bhd]?)(\s+)(\w+),(\s*\w+,)?\s*\[(\w+)\],\s*(\w+)/$1$2$5$6,$7 [$8]\n$1add$4$5$8, $8, $9/g; - # Convert a conditional addition to the pc register into a series of - # instructions. This converts "addlt pc, pc, r3, lsl #2" into - # "itttt lt", "movlt.n r12, pc", "addlt.w r12, #12", - # "addlt.w r12, r12, r3, lsl #2", "movlt.n pc, r12". - # This assumes that r12 is free at this point. - s/^(\s*)addlt(\s+)pc,\s*pc,\s*(\w+),\s*lsl\s*#(\d+)/$1itttt$2lt\n$1movlt.n$2r12, pc\n$1addlt.w$2r12, #12\n$1addlt.w$2r12, r12, $3, lsl #($4-$branch_shift_offset)\n$1movlt.n$2pc, r12/g; - # Convert "mov pc, lr" into "bx lr", since the former only works # for switching from arm to thumb (and only in armv7), but not # from thumb to arm. diff --git a/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/cur_frame_16x16.txt b/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/cur_frame_16x16.txt new file mode 100644 index 000000000000..c26463937a5a --- /dev/null +++ b/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/cur_frame_16x16.txt @@ -0,0 +1,2 @@ +486,720 +230,207,226,208,198,205,214,224,228,181,208,205,211,218,218,221,213,193,213,233,219,206,226,199,199,189,211,190,204,231,229,236,218,227,194,229,222,227,210,219,237,219,225,227,212,207,197,203,207,216,238,208,233,222,213,212,213,220,221,222,191,215,237,211,226,234,208,214,239,210,223,224,236,248,233,216,237,211,198,227,231,233,236,238,239,224,235,208,219,229,212,241,243,218,233,225,231,230,224,221,248,209,241,215,237,216,244,241,218,221,240,211,239,231,237,231,221,219,240,219,232,222,223,255,229,245,243,242,245,246,217,234,243,222,213,239,230,231,230,242,248,225,238,236,223,214,229,216,240,223,212,228,236,219,254,240,222,217,246,228,215,230,255,226,230,248,250,254,234,235,232,243,237,255,253,239,239,251,251,252,238,241,240,251,240,243,223,246,246,249,235,233,228,246,232,236,234,255,253,221,244,237,252,245,253,252,221,251,255,255,233,243,243,246,221,234,238,252,252,215,242,255,229,243,255,251,236,231,241,246,237,225,244,229,242,239,234,235,251,237,253,245,251,230,220,239,224,255,244,249,249,249,255,249,236,240,218,248,247,246,235,250,250,230,239,228,238,250,242,241,232,237,238,246,255,243,232,251,252,244,237,249,247,242,246,254,246,255,247,230,235,241,236,252,245,240,244,228,241,247,231,242,242,242,255,246,236,240,245,246,235,208,219,251,251,252,251,244,240,243,230,250,227,254,224,228,249,232,255,245,248,247,241,241,245,234,194,186,231,214,243,230,243,249,248,190,175,174,161,130,37,204,246,253,250,230,253,239,247,181,213,245,228,251,255,253,233,249,246,252,228,228,219,241,235,245,211,228,228,248,254,247,239,251,232,231,250,246,227,237,238,251,251,242,237,240,219,155,137,131,88,97,19,109,243,243,237,242,242,254,244,245,243,237,242,225,237,219,226,242,248,244,247,253,249,229,255,251,245,255,189,192,240,228,249,248,247,237,241,240,247,247,220,250,251,255,231,251,193,103,31,14,31,31,9,13,44,26,50,35,74,113,61,72,15,35,22,59,25,24,47,38,115,112,50,34,26,59,48,65,78,66,62,38,15,54,212,243,249,236,253,254,254,244,247,239,240,243,239,249,237,240,244,252,205,186,243,200,193,210,244,243,212,235,232,189,243,236,244,234,244,235,238,237,254,249,227,231,241,246,255,236,236,241,249,241,246,253,229,243,143,99,67,6,70,201,221,250,245,233,248,209,224,198,211,232,225,217,209,255,214,121,184,220,235,232,185,230,236,237,227,213,240,222,241,226,224,215,240,238,252,243,224,222,233,236,237,213,214,214,232,229,227,224,203,222,234,209,225,226,216,202,235,208,207,233,234,230,226,215,214,214,214,241,237,214,226,214,227,205,221,222,224,193,209,209,226,213,235,212,250,180,147,118,85,99,73,79,70,109,95,116,65,100,94,108,104,79,91,75,73,126,199,206,241,239,219,246,226,216,229,213,221,214,227,222,214,207,220,226,207,213,234,199,206,213,221,203,190,198,181,183,190,196,219,207,190,170,196,190,184,208,189,220,217,106,17,8,20,16,21,8,10,0,29,18,9,14,0,11,0,15,39,2,13,4,4,41,28,7,235,225,220,231,197,218,197,225,229,195,220,207,235,222,213,217,219,210,219,207,225,192,207,211,203,205,214,204,203,219,207,207,199,206,235,223,221,198,231,232,220,211,238,234,236,216,228,226,196,202,235,234,214,231,223,213,185,220,242,252,233,221,218,229,212,206,210,197,235,222,197,223,213,209,242,197,226,211,212,216,205,229,229,231,213,235,209,237,222,230,214,241,222,235,242,228,230,229,249,227,212,207,217,227,226,237,232,223,214,208,231,240,223,230,225,239,240,221,237,228,245,241,237,224,239,249,237,230,250,228,244,239,214,247,225,226,234,243,239,225,250,248,249,226,232,237,251,226,231,236,233,248,232,240,233,249,245,236,234,227,211,245,233,234,229,237,240,218,243,237,237,216,250,253,238,249,237,245,248,247,241,247,223,231,246,249,245,244,238,223,247,230,247,252,249,210,250,243,247,253,230,255,248,243,241,246,225,254,248,242,255,239,241,233,248,255,254,255,238,249,244,250,236,253,247,252,240,248,240,238,232,252,246,251,237,231,241,248,231,252,254,243,241,245,251,242,250,212,255,245,248,235,252,243,238,250,242,253,251,251,254,238,233,254,254,243,249,252,254,239,250,241,239,243,245,238,247,243,240,242,243,239,252,238,254,226,233,252,238,245,255,234,249,253,238,226,240,253,255,249,230,232,242,251,239,240,209,246,236,235,239,244,251,241,236,236,240,255,242,228,250,239,252,231,243,253,253,226,235,237,240,237,252,239,223,252,216,231,253,235,214,213,238,230,245,210,152,148,132,97,40,203,250,242,249,234,220,240,230,182,195,242,255,250,247,223,255,252,255,235,251,213,210,245,255,253,226,244,241,252,254,236,250,254,235,226,249,255,237,218,230,239,237,252,252,242,221,120,118,110,105,120,44,129,241,242,247,243,244,233,249,232,242,222,237,241,243,237,251,248,255,245,254,248,242,255,246,232,221,246,171,164,239,248,218,252,245,252,250,229,250,248,216,239,250,212,203,218,148,100,35,8,47,38,32,60,48,38,43,58,46,103,90,69,19,33,11,55,55,29,34,71,91,63,65,69,23,25,50,82,80,41,42,43,1,142,218,231,233,242,239,249,233,252,228,234,222,246,244,243,216,225,240,252,216,156,221,221,204,212,249,236,219,190,198,172,224,253,244,252,252,237,233,249,249,252,238,253,244,250,242,249,244,255,229,227,217,234,242,245,116,88,54,14,59,229,246,234,254,249,233,233,231,213,210,219,243,231,228,242,191,71,184,223,223,235,234,236,237,236,238,228,232,219,226,221,208,228,245,213,228,238,230,230,227,217,227,229,224,214,215,221,217,213,217,236,238,224,213,225,194,248,230,234,226,243,214,214,238,226,237,220,231,236,224,222,233,230,219,229,196,208,232,214,217,202,235,247,235,233,206,186,127,128,96,139,132,132,120,114,79,94,87,109,98,90,106,70,58,59,83,74,76,117,204,208,247,231,238,209,214,190,230,236,208,188,225,211,192,208,185,212,213,201,208,215,228,198,210,217,185,197,224,206,206,219,206,198,214,184,194,186,203,208,203,114,26,0,35,5,8,16,3,1,3,1,6,5,10,20,4,6,4,4,0,15,30,7,22,12,216,211,220,193,216,215,233,232,208,205,214,202,231,203,211,224,215,222,216,199,217,210,228,243,237,216,214,213,231,213,203,217,232,218,225,200,200,205,199,235,215,218,215,211,214,194,230,221,240,193,221,212,234,209,193,207,243,216,217,223,231,238,226,193,226,209,236,220,232,240,219,228,214,209,221,218,244,221,217,206,214,235,227,229,226,205,202,238,205,224,222,241,236,229,207,228,222,215,234,231,216,217,237,235,229,252,230,225,220,229,235,241,250,206,227,222,217,238,242,247,228,226,238,210,248,241,255,218,244,209,246,231,201,241,236,236,226,216,210,218,232,199,238,222,246,235,249,244,232,238,240,244,237,252,241,246,234,250,227,225,252,252,237,243,246,239,243,236,235,225,240,241,245,249,230,244,237,206,237,242,237,214,237,241,231,207,244,244,234,231,232,255,252,250,242,252,254,242,255,228,254,239,233,240,250,255,245,229,234,254,249,250,248,242,249,246,252,250,246,225,246,245,231,254,247,230,252,250,252,254,241,240,237,252,246,228,223,251,252,240,240,246,243,253,227,246,242,252,250,242,243,249,246,255,233,248,235,227,255,247,236,246,239,234,245,232,236,217,245,252,253,255,255,251,254,247,249,252,255,237,214,232,237,247,232,212,230,251,238,246,245,233,254,241,238,235,241,249,247,227,255,254,255,243,250,247,249,247,234,248,242,252,241,255,254,243,232,232,239,229,246,255,254,251,249,248,223,240,245,252,239,248,252,242,242,219,238,243,226,197,211,234,253,253,244,234,157,156,140,95,43,196,234,228,235,206,247,247,238,185,196,237,237,234,249,232,230,254,254,221,249,200,180,219,250,241,239,255,219,241,244,238,248,251,248,253,249,250,241,228,225,249,255,250,250,250,206,135,99,113,83,117,24,88,243,237,248,240,229,236,226,250,249,249,244,252,247,251,246,255,239,246,253,252,254,246,247,253,219,234,225,158,224,246,253,247,240,253,247,242,218,219,247,248,233,215,223,240,242,206,118,103,87,103,70,104,52,45,47,75,65,108,83,63,58,36,38,39,50,31,53,100,89,62,54,30,25,38,49,53,96,76,67,41,16,205,241,249,244,239,255,252,237,254,238,248,235,237,253,244,234,232,243,251,227,195,234,254,197,206,229,220,202,201,184,178,231,251,226,233,244,253,238,244,249,251,242,245,240,235,246,249,236,210,242,242,250,247,240,243,125,92,45,0,81,212,243,251,247,244,222,238,221,199,184,218,229,194,211,237,159,82,167,233,220,225,213,224,223,249,224,229,239,232,231,216,245,224,216,236,235,213,215,218,220,235,217,249,234,241,228,231,227,218,229,224,217,200,231,244,222,230,213,199,243,238,209,233,216,226,216,216,203,209,200,221,218,216,233,232,221,231,195,204,230,216,254,243,224,169,163,154,129,146,89,141,139,112,107,109,93,40,71,40,30,58,31,53,53,49,55,55,63,10,28,88,157,193,228,214,203,200,195,205,218,195,196,197,181,243,188,222,212,203,210,212,206,211,213,220,206,211,202,199,182,201,226,199,203,232,210,196,207,203,216,110,2,0,23,15,31,10,25,14,29,28,10,8,29,1,14,1,16,9,16,13,2,31,10,9,207,201,221,200,221,224,225,217,213,211,228,210,216,210,208,221,212,230,197,196,214,187,234,211,233,220,215,232,213,214,215,215,228,224,225,229,215,212,202,219,222,209,212,213,215,214,210,238,230,236,231,216,220,237,237,237,218,224,242,223,221,213,232,210,217,226,222,223,207,234,226,215,240,225,222,228,231,238,222,203,213,222,243,204,224,214,230,214,243,224,208,227,219,225,237,233,220,216,228,227,216,222,212,217,210,243,227,238,240,229,236,244,235,227,227,220,211,239,244,225,240,247,223,213,249,210,231,232,234,236,228,249,217,223,241,248,221,202,221,239,223,236,219,245,224,223,222,226,251,228,211,211,227,223,237,239,235,232,223,245,250,226,250,251,255,249,212,236,242,249,223,250,215,244,243,244,250,245,250,249,247,237,243,252,226,221,236,238,252,255,246,232,230,244,237,254,255,252,247,242,255,235,249,241,252,247,235,223,250,254,244,227,245,252,252,253,251,241,245,252,235,239,252,242,239,254,239,255,250,254,244,246,254,251,248,253,245,236,213,240,243,252,254,229,240,248,243,254,247,246,249,243,254,231,237,244,237,234,225,252,232,240,249,235,251,250,252,248,238,254,249,238,248,244,229,239,253,229,232,234,247,251,244,246,237,255,233,255,242,248,228,249,253,230,254,237,245,244,226,233,201,248,219,246,239,245,235,239,245,244,248,234,253,250,237,229,253,248,220,254,217,240,243,252,244,249,255,226,249,233,249,252,230,237,244,239,186,190,211,214,229,253,255,249,248,203,167,193,163,81,30,182,254,238,249,253,236,245,248,191,153,227,255,229,231,248,218,228,250,219,215,213,231,246,254,242,247,237,212,189,229,238,246,244,226,239,255,252,237,244,223,240,252,242,255,250,199,126,111,115,109,125,32,119,254,241,248,241,253,252,218,245,254,247,255,236,224,241,244,255,251,239,241,246,246,246,255,229,239,220,241,187,203,250,244,220,247,254,236,253,222,227,253,242,234,230,238,247,251,247,178,157,138,85,67,107,69,86,67,46,66,57,61,48,45,11,36,29,28,55,89,74,68,51,55,25,42,53,64,81,99,79,73,32,38,223,245,222,240,241,250,248,235,237,250,232,239,234,235,243,240,251,255,248,243,214,224,232,204,216,163,183,233,194,195,211,211,214,231,249,237,245,232,237,231,241,226,255,219,244,239,226,240,236,255,242,242,228,253,217,132,89,45,6,100,213,242,239,248,248,240,223,244,221,201,228,231,226,236,235,153,71,136,234,230,230,229,236,247,236,251,226,231,225,225,245,237,220,241,228,214,233,242,214,207,207,207,227,238,210,245,212,223,241,231,201,219,220,227,215,221,200,230,246,218,195,214,239,213,221,217,216,217,242,218,226,216,239,202,216,214,223,216,177,212,222,232,152,147,119,107,118,76,116,73,53,64,61,47,108,73,76,40,54,33,44,24,44,13,22,28,44,78,62,34,76,30,28,99,176,213,220,215,245,193,217,203,206,222,230,209,197,215,224,208,197,221,205,197,197,218,229,229,212,209,217,204,186,197,206,220,202,196,207,179,116,0,7,3,10,6,13,0,3,22,2,12,14,3,1,19,9,17,25,19,9,5,1,0,3,211,205,197,203,223,210,233,216,223,214,215,203,213,232,215,226,223,219,219,239,197,227,214,223,233,208,203,235,202,204,227,207,206,201,202,216,230,223,227,224,207,225,233,236,214,231,207,229,225,227,209,223,204,223,218,209,224,232,216,232,233,224,224,214,221,209,222,231,212,216,216,224,183,208,223,219,221,197,240,227,195,216,222,242,223,222,210,222,233,205,224,218,208,233,234,231,219,230,210,228,196,220,231,231,238,239,249,248,233,252,243,230,237,246,231,215,235,229,240,222,228,230,243,236,241,223,247,218,247,246,236,232,229,219,230,217,229,223,228,255,234,224,237,232,231,220,236,239,241,241,231,233,230,236,223,225,250,218,228,240,252,235,217,243,240,235,252,235,255,247,211,232,219,245,247,250,255,248,239,252,247,243,226,243,247,249,251,247,249,234,233,242,248,241,252,252,255,233,241,254,242,242,254,242,218,238,250,229,253,238,230,248,236,249,239,244,251,254,247,248,251,252,252,255,252,255,239,235,245,255,225,233,255,240,243,237,244,239,249,240,244,229,247,242,225,241,254,253,239,252,255,246,235,236,255,254,234,237,254,231,240,244,250,250,236,253,251,243,250,231,227,249,236,234,249,233,229,250,228,250,231,253,254,241,238,248,251,252,253,254,249,245,253,249,224,246,249,233,213,248,248,252,252,242,244,243,235,244,233,241,242,238,245,236,249,248,254,242,245,255,231,245,254,251,236,253,239,251,224,238,250,242,253,238,244,222,206,200,226,238,250,245,244,231,249,197,163,166,150,111,64,209,248,237,250,221,222,247,243,200,173,230,250,251,241,254,229,246,251,207,225,230,240,255,248,237,234,241,203,255,250,249,251,246,228,239,251,246,242,239,232,249,245,255,246,241,209,158,114,123,109,112,24,108,253,232,247,245,238,255,254,243,232,255,230,224,253,250,255,219,249,244,236,233,254,254,249,240,231,237,247,187,232,254,245,241,230,217,246,243,249,226,252,254,230,252,248,250,255,252,173,90,88,52,51,89,113,75,89,169,140,97,37,60,91,60,51,60,44,82,118,79,70,43,47,47,43,55,51,80,79,49,68,21,29,187,246,253,249,249,225,250,225,241,246,255,240,231,221,232,254,225,237,241,255,228,206,232,199,149,135,165,246,232,175,226,222,236,249,217,255,245,209,246,245,255,244,235,249,232,225,232,244,248,242,247,247,233,253,221,125,75,71,26,94,233,246,249,239,244,229,226,233,201,179,227,227,236,253,235,127,57,138,221,223,226,229,242,245,235,225,240,218,227,228,227,225,197,244,220,244,219,219,245,222,248,229,227,224,219,204,237,212,219,216,209,229,230,218,226,208,209,209,219,212,222,210,231,221,225,218,202,223,218,246,238,214,204,212,212,183,226,197,234,217,183,155,59,95,71,39,43,11,47,37,61,25,20,83,85,58,40,50,20,37,12,29,35,26,28,26,75,92,71,64,44,16,11,79,161,170,200,209,232,220,203,196,213,190,218,207,200,226,206,185,215,228,195,214,205,197,217,201,207,215,211,189,204,202,204,200,209,191,196,211,102,13,2,1,0,14,15,17,1,29,11,25,21,3,8,10,21,9,15,1,24,35,11,10,34,224,217,235,216,223,230,206,222,229,214,200,236,203,200,215,221,233,213,228,205,185,206,202,204,235,210,232,214,235,226,214,239,215,233,213,206,197,203,201,217,219,205,229,195,237,226,229,199,219,203,202,222,211,222,245,228,240,220,218,224,195,207,226,202,197,228,227,219,202,203,233,227,200,210,225,205,218,219,211,221,230,219,221,228,222,221,203,241,226,216,219,223,205,208,224,224,222,238,215,223,209,219,214,230,238,236,227,243,228,240,211,239,221,233,223,236,218,226,232,221,222,227,233,220,241,242,228,229,245,238,241,238,214,246,215,222,240,234,244,238,238,235,240,235,213,221,228,252,236,206,248,239,238,252,227,248,243,252,243,231,224,229,244,236,230,236,238,237,234,242,255,234,226,251,231,212,241,240,248,226,201,241,227,235,246,240,255,248,243,236,228,229,255,252,242,243,255,253,249,242,252,225,250,255,236,252,253,231,230,255,247,254,254,243,250,248,239,236,254,229,239,234,250,223,233,247,245,252,235,252,245,230,242,216,251,251,230,255,252,247,248,239,224,238,246,235,255,243,249,224,235,241,245,242,229,251,253,247,248,244,252,248,231,228,232,237,251,250,255,237,230,248,243,243,222,241,252,236,244,251,233,240,252,246,222,251,248,244,243,233,238,248,252,244,233,251,246,246,217,252,247,251,243,241,248,243,229,236,242,250,252,239,252,212,241,239,255,237,242,240,242,255,245,253,235,242,248,255,247,223,252,242,254,237,242,237,207,235,247,221,242,249,251,247,254,223,161,151,118,92,42,204,239,252,245,227,253,233,239,232,194,242,251,241,233,253,237,245,232,229,255,243,254,255,212,206,243,250,239,255,250,229,215,248,249,241,252,233,240,222,232,251,239,243,242,254,218,139,100,107,111,104,29,117,236,255,251,249,236,247,221,254,223,246,237,249,248,247,234,250,230,234,240,253,246,231,247,238,247,242,217,183,230,237,250,246,234,219,237,248,242,251,234,255,218,254,249,250,240,154,74,63,131,57,26,75,112,79,99,193,180,135,75,176,138,49,24,80,126,113,135,99,67,39,43,34,21,21,78,96,91,50,75,39,50,214,244,250,249,254,255,243,243,213,249,248,249,246,236,252,254,224,228,235,249,238,171,232,220,119,121,163,228,210,172,231,229,229,232,255,238,250,250,228,239,241,244,239,235,244,249,233,231,245,232,252,241,243,254,216,113,75,42,0,131,226,240,246,252,245,237,219,219,204,225,215,220,245,247,207,122,78,118,232,219,237,226,245,217,244,241,254,223,220,252,206,239,223,218,214,229,223,212,244,213,230,214,227,236,223,225,200,243,239,229,224,211,211,209,227,244,236,200,203,230,223,223,216,206,233,221,213,208,228,227,223,206,226,219,202,191,217,205,230,156,110,51,46,50,29,20,33,44,28,35,68,44,44,24,53,88,58,60,37,65,43,47,46,29,43,80,90,93,66,33,54,0,56,159,190,228,182,194,210,182,218,202,186,191,196,204,186,194,202,221,208,235,203,206,219,244,222,214,207,198,195,203,198,210,188,194,207,204,190,197,115,5,16,0,1,5,3,18,6,12,14,6,13,7,12,18,13,15,21,17,17,27,11,16,9,217,220,221,223,223,208,221,228,218,197,228,222,220,222,231,203,210,206,229,217,204,213,200,216,203,216,225,237,242,222,238,217,217,216,200,225,200,223,221,215,213,222,229,223,225,217,237,194,247,212,224,180,241,239,222,220,217,199,217,220,213,247,234,243,211,224,216,210,198,214,189,210,242,223,231,237,212,227,224,213,237,245,205,212,210,234,223,222,230,224,224,221,222,216,216,215,221,229,224,238,217,225,219,207,221,220,237,253,223,225,232,222,230,221,235,230,211,216,229,221,240,236,229,222,244,222,247,232,217,218,238,217,220,247,205,225,220,231,198,238,232,236,235,235,249,239,221,238,237,242,240,238,249,243,244,235,245,235,239,219,234,238,231,221,227,216,240,230,225,231,228,225,228,220,254,255,226,248,249,235,224,234,218,239,246,235,239,251,250,224,254,241,244,223,255,237,251,254,253,230,245,230,234,230,234,246,250,250,210,247,249,251,238,247,252,237,252,249,243,248,255,241,250,244,243,245,243,241,255,254,240,243,250,251,233,239,250,245,242,248,234,242,225,247,239,232,254,214,254,241,219,249,249,234,252,254,253,250,234,231,252,234,223,241,247,248,243,252,255,231,251,252,239,255,245,238,241,247,227,237,247,236,229,226,249,221,239,252,243,249,250,240,247,245,244,243,244,237,226,246,246,230,232,232,246,246,237,237,237,248,240,248,224,250,252,253,247,255,255,249,251,254,240,242,250,249,254,249,226,248,245,237,237,233,248,240,243,248,201,237,235,248,239,238,245,214,148,144,127,75,35,218,246,248,240,222,246,238,232,213,172,235,255,227,251,252,247,244,242,252,237,167,218,247,238,253,250,241,233,243,242,225,248,244,241,241,211,232,255,229,241,250,236,233,254,244,214,112,86,109,107,130,27,132,245,249,223,219,244,242,254,245,250,250,246,216,237,248,242,242,240,253,254,252,246,253,255,242,255,246,247,175,186,248,252,244,247,251,252,255,243,235,236,233,223,250,229,249,185,80,87,68,136,132,82,49,55,37,52,192,151,108,78,149,114,30,53,77,95,97,138,130,82,19,57,61,37,54,74,97,88,63,70,57,59,237,235,222,243,249,240,247,246,255,234,252,248,232,239,238,239,253,240,208,254,250,182,220,250,193,158,174,230,243,209,216,245,208,226,243,237,252,247,249,238,231,254,240,235,249,253,251,232,245,249,255,232,219,248,211,105,73,4,0,104,240,218,227,249,242,224,231,230,208,214,205,211,234,240,239,126,55,146,221,211,229,234,240,227,235,234,225,196,235,218,250,222,216,246,218,222,228,219,224,222,234,211,238,243,236,205,231,218,213,205,219,220,230,236,220,230,208,211,215,220,234,208,220,212,209,194,208,214,226,221,220,219,213,247,218,204,236,199,205,180,73,66,9,11,24,56,73,68,35,63,31,45,35,47,69,86,58,48,47,45,21,10,47,38,51,67,73,70,62,56,41,70,169,224,229,201,211,200,227,210,194,215,216,218,205,187,221,201,207,218,197,214,195,213,208,177,226,191,213,204,191,198,211,201,215,220,219,202,193,219,101,2,2,24,15,24,20,0,2,4,13,7,1,8,6,0,7,8,30,11,29,13,1,13,8,226,226,234,219,226,215,237,221,228,204,220,222,223,192,219,204,198,216,212,211,221,202,205,197,220,212,207,199,199,228,206,221,209,210,199,205,236,199,219,232,220,205,213,231,224,209,238,212,227,223,187,224,234,219,225,217,190,216,231,210,231,211,207,226,199,210,230,229,208,235,219,213,216,230,231,229,231,212,216,210,208,233,195,202,213,227,220,231,235,232,189,226,199,216,197,209,231,226,216,229,216,238,217,241,218,211,242,227,228,244,218,225,241,229,252,229,239,244,214,185,197,234,219,240,239,241,237,234,241,205,238,216,243,229,231,228,244,227,208,235,238,235,234,235,223,221,226,216,221,227,225,241,217,242,236,233,247,229,218,243,247,234,228,237,255,250,255,245,252,231,250,246,240,231,254,236,238,250,251,231,241,244,243,251,232,246,242,234,239,250,235,218,247,255,234,241,252,250,235,243,242,227,240,250,253,237,252,208,254,246,250,232,251,234,252,254,232,255,243,241,240,250,246,240,249,249,255,241,236,235,232,241,245,254,246,254,255,213,239,235,229,235,251,247,231,227,210,251,242,235,241,254,244,248,251,255,255,240,245,251,233,237,246,234,230,243,224,254,216,236,255,236,227,244,241,237,243,248,237,233,246,241,223,249,237,254,255,232,253,236,231,250,241,235,235,251,214,240,232,223,235,231,245,239,254,233,237,234,235,231,252,248,229,235,242,234,245,248,253,246,231,252,229,253,253,237,253,255,224,243,250,255,242,230,239,208,174,189,226,224,253,242,252,239,242,196,178,184,170,91,44,188,242,240,253,243,242,239,251,219,151,220,246,253,229,245,253,253,205,225,183,220,244,248,253,250,246,235,213,219,246,227,250,230,249,227,243,251,229,227,209,248,254,248,242,246,208,129,114,122,115,103,32,103,238,240,239,233,255,237,250,254,227,242,195,193,221,254,254,255,251,233,239,238,240,237,234,238,225,238,243,149,145,240,253,239,250,243,226,226,242,242,246,210,240,210,133,194,163,145,155,66,142,107,57,51,60,59,71,157,111,102,80,163,113,53,75,19,64,102,88,138,75,18,33,56,26,67,107,74,68,41,65,40,70,217,240,233,242,255,254,253,236,254,231,242,248,242,230,244,247,229,237,236,254,236,206,189,250,207,168,157,224,236,246,193,224,227,196,237,238,245,240,241,236,226,242,250,255,242,242,251,255,253,255,250,222,243,251,194,104,92,38,0,143,225,236,247,237,215,223,213,226,214,225,227,239,232,229,222,116,54,122,238,202,226,228,218,239,214,246,221,236,207,222,223,232,238,216,239,211,208,227,238,244,203,231,180,238,238,245,243,216,229,224,210,231,231,209,219,217,231,236,222,216,223,212,247,202,204,229,220,227,191,205,219,196,235,207,188,206,213,242,229,174,141,118,78,71,65,56,67,55,58,43,23,69,46,53,92,74,58,69,33,62,36,13,33,41,65,91,70,68,68,53,45,157,210,209,229,223,218,226,223,194,202,225,194,192,212,212,201,200,214,201,210,199,214,208,185,222,222,206,216,210,205,221,229,179,204,183,200,207,198,200,117,7,0,17,13,6,38,14,3,11,22,9,25,18,1,0,14,7,12,37,23,10,12,1,12,222,216,234,240,215,188,209,211,212,221,215,203,214,217,204,202,220,211,213,235,194,211,211,228,231,199,214,208,221,210,217,231,232,202,197,223,235,227,214,199,205,244,227,210,217,230,221,238,220,207,243,216,224,214,230,220,202,214,204,206,194,225,221,220,231,235,186,208,212,231,212,224,228,227,208,210,201,224,217,199,200,229,222,237,212,210,227,204,209,230,219,228,222,231,229,222,217,222,210,223,226,228,229,236,217,230,222,241,225,221,219,207,242,240,220,234,227,219,244,224,236,214,234,246,232,246,222,227,231,216,219,239,224,234,229,249,226,236,234,228,222,211,223,220,216,214,217,214,226,223,224,248,247,243,233,250,234,239,234,235,232,223,240,243,230,210,233,230,231,244,228,208,246,241,251,255,250,237,250,249,251,249,233,243,242,254,254,250,227,251,248,232,233,242,252,226,244,242,234,241,253,232,255,249,230,241,234,237,220,225,253,240,246,231,232,244,252,252,235,236,253,252,248,229,240,251,229,248,251,255,247,254,251,245,243,244,240,254,250,246,228,216,248,251,248,243,251,228,223,253,243,255,222,245,238,237,255,255,254,251,235,249,244,231,255,250,246,251,255,246,251,246,254,233,251,242,237,229,246,234,251,232,250,254,254,222,220,225,239,253,241,248,255,230,253,222,254,241,231,242,240,249,251,252,252,251,245,251,242,249,255,246,230,235,249,251,249,251,238,223,236,250,228,236,250,248,254,248,253,242,251,241,228,244,243,183,221,235,229,224,221,245,250,247,248,211,208,167,154,101,46,200,255,247,243,236,237,253,232,220,152,200,249,251,255,242,249,248,226,219,195,215,249,254,223,227,239,217,198,224,238,248,251,248,227,247,252,231,229,230,242,255,248,223,253,249,243,152,135,141,106,105,20,135,239,249,241,252,246,253,239,244,246,245,187,207,212,240,247,238,247,249,214,225,253,253,249,247,233,252,230,194,154,225,250,236,243,218,245,233,243,247,233,203,223,198,194,253,188,179,172,60,147,101,92,69,70,52,85,186,115,87,70,135,112,76,66,72,105,101,111,109,82,64,22,22,42,54,92,66,73,68,66,33,37,220,247,248,242,239,239,241,252,255,227,244,250,229,232,223,246,230,231,224,245,248,199,205,251,253,174,158,186,236,237,204,250,199,168,230,207,237,253,253,242,213,251,254,240,254,242,226,240,222,245,251,247,251,233,218,120,142,14,6,147,222,249,234,242,239,227,230,224,228,233,214,209,227,249,231,122,46,156,241,243,220,241,254,229,232,227,221,249,238,221,254,220,203,228,236,234,233,221,219,246,211,214,225,220,207,228,231,239,216,240,213,220,227,222,206,213,222,227,212,217,225,226,200,216,224,225,196,216,232,208,209,219,193,211,240,204,219,216,217,192,186,137,148,132,99,51,81,65,92,72,80,51,78,103,56,57,86,44,54,38,41,47,41,61,56,106,70,64,54,37,46,78,184,182,203,220,183,199,212,214,205,219,187,228,207,216,204,197,223,213,222,230,203,207,210,220,196,230,200,187,204,213,202,186,196,188,215,213,216,215,117,7,1,3,27,12,3,16,10,8,33,10,17,11,6,6,20,26,8,26,9,0,7,6,21,241,213,213,199,204,228,213,211,239,226,208,210,224,224,219,234,217,214,214,228,216,204,200,199,210,210,236,224,231,211,198,223,223,239,222,220,220,220,213,236,239,231,192,233,223,241,217,237,217,227,220,233,211,219,246,225,236,210,231,226,217,207,222,220,206,231,212,230,239,211,232,198,225,215,207,189,221,206,229,219,207,239,204,223,244,214,227,214,217,205,206,232,234,231,222,235,220,215,219,241,238,228,233,213,242,232,231,195,237,218,222,219,229,227,233,220,218,221,230,204,226,209,236,231,236,232,231,202,238,243,225,238,218,219,230,237,229,227,231,224,236,219,233,245,235,231,251,212,225,243,244,237,241,224,238,230,232,242,240,227,227,229,242,243,225,225,238,225,252,204,248,254,253,234,236,241,243,249,234,252,238,247,226,255,248,253,249,236,248,240,220,227,231,229,230,227,247,223,245,247,246,243,233,247,216,239,252,255,253,252,233,253,242,244,249,253,249,237,252,245,232,241,246,245,237,240,255,245,236,234,243,243,250,236,235,236,245,228,247,255,254,240,254,235,253,234,244,199,240,248,217,234,248,240,253,241,247,252,249,247,248,246,229,239,234,237,250,244,243,236,246,249,252,244,233,243,251,245,233,247,253,249,255,243,252,253,254,228,247,240,241,248,244,255,249,241,239,238,242,236,245,248,234,238,235,235,247,242,223,230,249,247,235,224,245,249,249,234,250,241,255,252,249,251,238,230,244,242,238,236,239,250,236,241,242,226,209,251,208,194,227,254,226,247,252,194,169,164,128,86,54,168,247,247,252,223,248,243,247,248,154,198,245,245,254,255,252,240,229,231,249,243,231,255,220,173,229,230,247,241,249,247,236,246,247,223,238,250,251,234,255,255,254,252,236,227,218,159,149,116,84,95,20,138,226,252,246,255,249,245,240,251,248,255,238,244,249,243,247,253,251,226,227,253,245,251,246,254,237,250,241,180,178,228,246,253,242,249,229,230,225,246,250,216,225,221,235,251,172,225,189,102,138,99,97,47,63,53,59,195,162,84,46,155,136,58,22,59,116,102,70,43,36,14,35,38,38,50,101,104,90,62,80,34,29,221,243,245,221,240,235,244,249,240,231,230,253,241,239,255,244,237,231,240,245,244,213,181,234,250,196,169,185,220,210,230,237,217,180,192,226,249,247,241,252,249,255,240,245,243,245,251,246,228,251,236,250,251,254,222,140,108,40,3,177,247,238,255,245,240,239,206,226,206,212,218,211,229,247,215,128,72,149,230,179,239,226,217,228,241,235,232,224,228,235,214,228,222,197,228,211,218,218,208,230,204,225,212,232,219,213,209,231,213,214,205,230,209,217,234,219,226,225,216,208,227,193,209,219,203,229,222,207,229,208,210,221,217,207,227,199,213,209,232,204,158,152,95,103,71,61,43,81,71,82,96,98,127,111,65,62,73,45,48,82,81,109,79,137,113,52,58,74,31,25,40,134,171,192,212,217,209,209,186,226,226,213,207,224,224,195,205,186,218,208,193,204,182,234,216,202,232,205,217,213,220,213,224,196,200,204,213,190,217,197,119,17,12,9,19,4,7,12,13,4,29,8,7,1,14,14,17,2,6,16,14,20,11,40,14,227,224,232,199,213,228,223,233,200,234,211,189,208,223,216,225,220,206,224,194,220,223,195,210,207,214,235,205,202,222,222,231,206,220,219,211,226,227,210,221,223,203,192,223,217,242,231,207,218,212,211,215,218,208,236,215,211,225,208,209,225,225,222,211,201,219,213,198,244,227,209,208,201,232,226,231,211,224,221,211,221,191,216,223,221,222,207,235,238,218,236,241,222,235,227,208,217,218,230,200,216,221,242,245,210,243,226,237,220,216,221,222,246,252,215,246,230,239,208,225,251,207,248,229,242,230,232,246,238,212,242,219,223,236,247,245,216,229,215,249,230,206,240,221,232,228,233,239,236,227,239,216,215,247,224,246,215,245,229,235,234,232,238,235,245,238,224,244,231,246,241,241,236,229,235,244,249,228,232,249,242,226,245,234,252,249,248,229,251,246,255,240,246,242,215,242,252,220,242,250,247,233,247,249,237,236,255,245,255,244,247,249,241,245,237,242,243,253,238,253,245,240,215,232,236,223,255,240,245,252,251,243,228,241,255,243,246,243,246,250,247,248,255,240,247,243,255,254,230,251,248,255,254,253,250,222,251,226,229,248,250,254,234,255,212,251,236,251,254,254,231,244,234,251,232,248,246,246,238,237,253,253,254,249,247,234,255,255,239,254,240,230,240,238,241,252,234,228,248,253,251,243,251,231,253,180,159,185,222,243,240,243,240,234,247,238,243,253,232,245,255,230,240,242,233,248,241,245,252,252,247,255,233,246,246,190,208,206,236,201,245,225,243,239,253,199,151,145,122,81,44,190,248,242,224,230,232,246,254,233,210,173,224,255,252,238,246,255,231,249,194,225,250,227,221,227,251,245,252,252,232,253,236,250,241,236,248,245,236,227,204,237,241,239,251,230,142,106,109,131,108,116,57,134,238,248,243,251,244,253,243,255,244,245,211,236,243,243,235,252,251,248,250,255,246,238,244,254,244,239,247,182,135,254,253,249,242,247,247,242,244,251,243,226,255,251,237,248,211,255,194,96,68,36,41,11,34,33,48,223,159,68,43,158,117,27,51,27,108,85,76,44,38,69,51,38,34,62,74,90,59,56,91,30,24,220,253,255,249,246,253,243,252,251,241,244,254,249,248,250,254,254,221,251,253,243,250,194,205,244,231,187,181,134,203,240,220,191,199,192,237,234,254,232,233,246,242,249,236,249,247,241,246,250,254,246,246,252,234,189,104,71,26,44,185,236,205,218,200,241,232,229,234,204,176,210,238,216,227,249,116,50,132,207,216,198,212,208,242,228,249,233,234,214,220,234,226,242,241,219,205,218,202,200,198,223,215,218,213,214,225,207,222,238,215,233,214,216,216,212,206,220,207,210,200,211,215,212,200,221,220,221,201,210,192,212,168,192,235,232,212,210,226,186,181,102,119,110,138,91,90,68,85,80,129,161,162,126,100,121,95,81,57,64,128,136,133,116,131,119,103,77,74,37,53,144,223,225,224,214,210,188,200,202,211,198,227,217,212,205,207,240,193,200,223,193,226,231,200,197,214,188,218,219,220,207,217,186,199,211,181,206,200,211,216,110,0,13,5,8,17,10,4,23,9,27,1,5,2,7,4,4,4,24,8,0,27,15,11,27,210,206,210,229,216,215,188,212,226,227,192,217,213,218,219,215,237,224,229,219,211,234,189,198,226,225,217,216,223,208,246,233,234,218,204,209,232,213,215,231,221,231,226,201,232,220,211,225,203,218,226,188,241,225,221,189,210,226,234,225,201,232,200,220,224,205,217,220,225,216,203,233,212,236,223,215,227,220,209,227,225,203,198,225,215,233,218,214,205,225,237,243,229,220,199,222,209,204,241,207,203,211,245,233,213,241,213,210,224,241,228,219,210,221,238,198,211,239,236,212,237,249,222,227,225,219,227,226,252,241,235,223,203,240,223,226,241,223,243,212,234,225,227,236,230,229,226,238,236,228,231,214,229,249,231,244,226,245,231,238,240,243,242,222,243,240,221,247,240,240,243,255,242,249,221,240,244,247,242,237,243,248,251,251,246,245,243,236,229,243,238,237,238,247,255,248,247,248,251,226,252,254,239,248,238,224,235,238,242,250,244,251,255,243,234,250,251,243,242,251,242,255,237,238,230,250,251,228,245,255,245,253,255,253,253,244,248,250,238,248,246,248,251,253,236,248,224,247,251,250,245,246,253,234,232,242,239,255,242,240,228,215,254,226,234,252,236,252,253,252,241,225,232,237,244,236,249,229,248,252,248,238,244,255,240,250,241,255,245,243,253,254,243,245,240,252,247,238,236,249,238,243,251,249,197,189,193,242,253,242,247,247,243,219,238,238,249,226,246,245,254,251,225,234,255,251,253,230,244,255,245,238,243,250,219,172,190,232,236,233,248,250,254,213,239,206,164,139,139,116,47,215,235,249,233,235,247,244,238,249,185,183,226,247,245,230,247,255,237,178,159,207,246,247,231,222,253,246,229,249,249,238,249,253,250,237,248,255,251,248,210,156,89,85,113,72,99,143,117,158,153,168,123,136,207,188,198,186,188,199,181,153,178,138,183,230,255,255,252,249,246,242,234,218,235,250,255,238,255,225,236,220,182,221,222,234,250,253,246,230,247,255,238,246,248,253,253,199,206,229,135,40,41,18,43,40,70,41,42,196,184,79,51,128,118,61,73,27,124,94,60,81,51,31,75,26,62,45,52,57,74,49,74,18,61,202,250,252,248,249,251,229,232,228,254,240,238,238,246,254,254,255,232,219,231,250,252,204,192,240,228,134,130,144,190,234,192,183,252,248,225,250,237,237,244,238,237,218,245,246,233,255,243,191,128,91,80,110,114,83,65,38,49,52,61,141,145,157,194,232,225,231,203,217,204,214,222,205,220,236,132,41,111,241,202,214,250,225,244,216,246,212,220,228,212,226,236,210,243,220,223,207,217,242,214,221,227,226,215,212,231,219,222,221,216,237,217,215,245,237,210,213,202,190,203,210,228,216,215,237,211,216,200,223,212,227,238,222,214,209,215,225,217,218,194,131,104,92,116,104,92,81,64,56,76,150,127,145,125,115,85,74,51,49,99,137,116,81,129,101,99,86,85,47,15,170,217,227,213,202,225,191,224,224,213,222,209,197,207,220,204,219,205,231,196,209,231,201,220,210,217,227,213,211,206,207,217,203,210,202,210,193,225,191,216,111,0,8,17,15,35,13,6,14,8,8,14,27,9,5,1,0,27,0,13,0,3,4,0,15,236,216,213,217,210,209,232,207,215,229,202,219,217,236,220,222,193,210,236,209,214,207,228,209,207,212,207,245,225,206,213,234,238,201,211,190,205,194,230,203,221,222,213,240,219,234,226,196,216,211,236,214,231,230,217,225,243,221,191,220,221,206,205,213,205,203,200,217,216,206,215,220,215,218,211,210,223,196,208,190,228,231,198,200,204,215,226,210,219,234,222,220,223,217,214,233,202,217,222,233,208,237,221,217,229,200,207,248,230,229,249,225,231,226,249,220,247,246,219,218,226,224,238,229,246,245,197,234,235,235,242,242,229,208,216,231,228,245,233,228,232,214,227,222,240,232,233,221,236,211,240,225,237,245,247,244,235,243,241,242,223,236,233,233,226,241,241,207,245,246,247,245,214,224,250,252,245,253,243,243,247,230,236,248,234,244,254,245,243,239,228,254,236,232,244,244,230,248,234,241,253,245,252,242,248,250,236,252,237,255,249,235,237,245,234,241,253,241,247,246,244,237,244,244,227,243,251,254,250,248,246,252,235,253,248,234,245,255,245,250,233,220,250,243,237,235,247,251,241,254,241,230,238,232,246,246,245,238,254,238,237,249,252,251,252,250,249,233,250,236,255,249,250,227,239,249,215,225,254,247,249,253,247,249,252,248,255,242,252,244,246,244,248,255,217,246,228,252,247,231,236,238,246,249,237,224,249,240,242,233,247,247,253,242,244,237,239,250,232,249,252,245,253,255,235,244,228,253,243,254,247,245,231,250,239,188,246,251,243,190,218,245,252,225,241,218,163,185,152,83,76,189,250,250,241,250,231,243,252,243,184,198,218,230,229,255,253,234,219,191,211,245,250,255,242,249,255,157,143,232,227,252,247,232,255,206,244,244,255,246,181,84,44,48,72,109,109,133,124,170,127,147,146,112,103,89,98,114,90,59,100,100,67,40,158,242,255,239,235,252,252,239,254,246,242,240,229,247,244,249,247,223,157,217,254,245,252,237,242,228,240,247,242,249,239,239,212,125,186,177,95,70,62,61,40,12,83,49,78,176,166,75,26,149,102,62,74,48,102,65,57,64,30,29,55,44,38,19,35,73,65,55,74,37,41,239,244,244,230,236,255,254,245,243,246,250,249,253,231,244,250,255,244,228,242,249,230,224,175,241,241,154,108,159,206,243,202,214,234,227,216,216,248,254,255,243,254,250,241,236,255,247,162,86,62,32,2,7,15,20,25,59,46,44,62,31,52,46,159,203,198,209,200,221,206,220,222,232,251,216,149,65,141,228,211,210,230,224,232,218,228,220,231,220,217,235,242,229,231,218,231,223,208,227,242,212,234,231,205,228,234,204,218,231,211,228,217,205,210,218,200,220,216,200,213,222,233,233,217,205,216,215,211,209,206,228,205,209,200,225,182,196,206,221,225,171,120,100,107,144,86,70,61,33,68,89,132,156,119,109,98,43,32,47,129,121,98,83,102,98,86,97,41,28,50,159,206,184,223,230,216,193,215,218,214,206,220,179,207,229,209,204,205,221,219,207,219,233,199,208,213,205,219,213,212,191,198,186,214,230,197,201,205,207,221,99,11,2,13,14,4,7,7,24,32,4,0,15,24,20,16,0,10,14,17,10,3,6,30,13,201,220,220,202,214,195,227,238,213,236,200,218,227,201,209,229,230,188,222,216,230,233,220,202,235,209,244,209,213,210,225,219,207,209,230,216,201,205,201,235,214,222,190,222,229,236,214,225,226,230,199,208,210,226,201,221,224,236,222,217,211,200,223,232,227,241,207,230,204,193,229,218,228,204,228,240,232,199,193,224,235,213,209,234,224,232,244,229,228,198,246,222,236,198,231,229,229,229,223,216,221,219,225,210,201,214,219,227,232,227,217,239,253,229,200,221,242,236,218,230,241,231,231,229,227,211,213,225,211,239,247,233,252,243,246,217,234,245,227,240,207,229,245,240,214,220,242,223,221,224,248,230,234,232,247,220,236,246,214,238,243,226,246,221,249,243,229,240,249,234,233,237,255,250,231,223,250,242,241,236,241,250,255,250,232,239,234,232,249,246,248,222,255,248,246,236,255,239,255,216,235,228,230,243,255,252,243,252,251,245,245,254,249,248,239,247,251,252,255,245,222,246,247,253,246,255,217,247,236,253,255,229,242,234,243,253,235,243,237,236,236,255,247,242,234,236,233,235,249,255,249,254,232,231,234,245,242,251,252,232,235,243,244,223,239,245,252,255,255,254,247,252,255,243,255,224,247,215,253,250,253,248,244,248,249,247,247,255,242,237,222,249,249,233,241,232,243,248,243,247,253,240,252,243,249,239,254,224,210,255,251,252,255,244,252,249,250,236,249,248,255,254,246,244,246,254,250,231,245,230,252,225,233,255,243,212,231,185,168,199,239,244,255,242,222,205,195,148,130,82,34,203,255,252,241,243,236,234,242,232,198,189,218,241,252,230,254,231,231,208,228,234,237,243,209,195,239,241,207,252,219,247,247,234,236,247,245,254,240,242,202,103,99,83,70,102,69,76,72,69,85,46,49,88,91,73,76,101,78,78,80,76,66,62,173,243,239,239,249,254,248,236,225,232,252,240,231,237,253,255,253,249,152,207,244,244,248,232,236,241,255,224,240,247,238,236,154,112,212,236,164,69,63,97,62,86,85,50,87,171,153,68,33,134,84,52,56,90,128,82,54,64,34,59,40,38,55,23,40,61,72,35,50,22,34,216,255,240,235,219,245,240,250,229,229,240,255,246,252,235,245,252,242,253,234,234,243,215,168,215,233,241,179,217,236,248,249,217,249,239,193,207,241,249,251,242,253,235,213,247,228,203,124,41,76,54,50,24,42,52,46,58,53,77,47,41,15,0,116,198,197,214,207,198,216,204,216,238,250,234,137,71,165,228,219,239,249,235,239,235,239,230,218,231,229,202,235,228,236,242,215,223,221,223,239,226,228,222,213,237,233,244,212,229,219,233,231,206,208,227,204,223,208,225,198,210,223,204,223,196,236,206,192,205,225,231,202,192,193,226,209,198,216,228,185,173,126,109,87,108,82,80,26,53,52,99,122,72,106,83,89,64,37,50,130,87,99,94,95,77,82,54,67,35,67,171,184,185,236,225,201,199,205,194,216,228,216,226,211,210,223,202,201,210,210,206,211,197,221,216,223,239,241,222,206,219,206,210,204,226,192,200,196,203,221,92,2,3,2,24,0,17,3,28,18,6,21,28,9,7,4,3,10,6,20,6,17,21,26,1,209,203,237,192,221,214,214,221,211,237,223,199,220,210,229,199,216,203,226,219,220,212,202,234,198,207,221,197,223,234,237,208,209,230,191,210,220,217,238,234,215,224,221,235,224,233,197,193,217,227,217,216,215,212,212,215,221,230,216,214,208,198,231,224,214,216,226,209,193,228,221,222,203,203,209,218,191,206,226,201,236,203,212,216,219,226,222,237,219,229,197,211,243,229,215,225,210,223,236,221,242,239,242,211,238,222,218,222,227,247,232,234,244,222,235,224,207,233,230,233,227,215,217,207,247,240,227,235,240,230,237,238,220,238,236,227,222,212,241,236,246,242,233,220,228,220,225,251,223,245,225,218,246,241,216,233,222,229,244,235,228,219,224,223,235,255,243,232,234,235,226,245,240,246,247,251,237,231,245,221,241,237,238,250,215,235,235,228,232,242,232,245,223,255,240,233,254,245,253,244,240,246,243,247,253,249,241,242,245,232,243,249,243,237,250,247,229,243,252,235,251,231,227,246,247,247,250,248,227,254,236,240,252,248,250,247,243,241,252,228,246,250,224,250,253,235,234,254,245,245,245,244,255,251,235,236,246,230,238,251,251,227,246,247,255,255,229,255,236,245,251,253,235,244,229,253,233,232,241,238,230,254,232,220,241,224,240,241,251,251,224,240,250,255,223,251,247,252,238,237,253,239,240,238,232,242,251,247,245,249,247,249,243,247,233,231,241,235,238,247,254,250,223,243,244,242,236,255,239,247,248,244,245,225,210,179,202,211,230,221,255,244,241,225,245,200,160,170,136,72,33,214,254,241,253,232,252,239,245,213,154,194,229,231,231,252,251,253,232,191,193,234,226,237,205,234,238,220,235,246,230,249,245,234,255,234,238,242,247,242,190,122,104,73,57,36,72,63,49,57,66,20,46,29,33,44,34,40,81,33,64,57,46,51,66,116,231,243,223,242,254,249,243,232,240,228,253,246,237,249,231,237,130,157,253,251,255,237,238,242,226,204,226,178,144,200,172,226,251,243,166,109,69,93,54,29,83,60,66,172,127,47,36,130,63,36,60,55,120,84,55,39,37,49,60,43,42,39,29,70,60,59,41,29,64,211,249,252,243,239,250,232,252,236,252,248,255,244,242,233,242,251,255,230,244,231,239,253,161,216,251,247,164,184,231,240,254,195,217,234,163,183,187,240,255,235,254,255,253,243,247,149,82,54,76,68,63,72,72,69,58,76,64,53,70,73,32,38,160,218,239,218,181,168,176,191,228,234,255,222,150,50,155,229,202,235,246,228,248,238,231,238,236,213,249,226,221,231,225,220,214,221,250,200,218,231,216,222,220,237,227,225,210,226,217,217,230,231,215,233,201,213,198,207,222,219,207,220,204,200,215,207,205,201,212,231,222,229,209,189,200,191,204,218,211,149,120,60,67,41,42,45,71,56,71,131,119,113,90,68,31,66,39,58,123,118,101,89,71,86,51,42,49,14,88,196,220,207,205,200,225,215,215,225,206,207,209,207,207,223,201,216,214,202,215,225,210,213,216,239,205,208,199,197,202,217,202,199,196,199,213,192,196,196,220,97,12,7,6,6,8,30,19,2,8,26,27,9,4,22,3,6,11,21,16,11,18,11,28,10,213,220,198,189,220,211,214,213,201,223,217,189,205,228,234,218,199,210,227,207,206,223,228,199,228,229,188,219,219,208,212,237,218,211,219,213,224,212,226,245,227,224,235,238,237,228,199,244,216,223,230,191,241,230,221,223,214,210,217,205,201,210,232,211,224,232,230,224,236,199,223,215,212,221,235,240,244,227,216,219,211,224,218,236,222,200,234,207,228,238,235,213,219,213,213,228,240,220,238,233,192,229,209,229,229,207,205,243,218,210,226,237,218,233,238,217,245,203,219,238,222,234,231,224,252,231,238,233,232,212,223,205,246,240,223,205,226,219,232,216,207,229,223,227,244,242,222,241,215,236,212,236,240,235,223,204,232,229,243,233,247,238,228,240,226,232,229,232,224,242,241,236,233,223,242,245,242,236,249,243,235,231,254,246,239,248,223,242,228,251,250,253,241,252,248,240,212,249,240,227,232,248,243,248,243,253,244,226,247,218,248,243,242,241,245,252,245,253,252,245,243,253,251,235,252,221,254,249,229,255,225,243,255,242,248,236,255,252,254,247,255,252,239,249,234,249,229,252,232,247,245,245,247,242,226,237,238,235,245,221,246,254,252,253,253,255,243,243,240,240,233,239,246,250,231,239,248,232,240,236,226,252,242,255,255,247,243,235,239,240,233,237,243,229,236,252,243,242,237,233,232,249,226,241,247,240,251,247,247,251,229,238,245,246,242,254,231,237,245,240,255,251,255,240,235,242,250,240,247,240,221,236,245,244,176,195,247,237,213,211,229,253,255,240,255,173,150,154,136,71,50,218,234,230,237,214,249,244,249,242,205,190,234,223,240,249,246,235,230,185,155,249,253,255,245,232,248,244,225,236,238,237,238,232,254,248,255,239,244,254,154,108,111,64,65,54,66,60,64,76,65,36,66,39,43,54,49,50,66,49,74,55,72,12,45,152,241,239,245,241,232,246,246,242,255,250,233,248,252,243,253,220,154,194,253,254,232,246,235,242,231,180,230,214,222,217,216,232,255,166,154,152,119,140,53,59,102,101,66,154,158,97,46,109,98,77,56,49,105,58,62,56,53,58,58,65,40,55,51,63,63,36,64,21,61,219,254,239,244,224,234,239,221,251,243,242,243,249,253,233,235,252,246,233,239,212,241,252,201,188,249,237,184,163,232,215,243,193,229,201,198,189,199,253,254,227,250,222,240,250,225,140,99,67,81,84,80,99,62,58,53,61,41,63,58,58,38,42,225,216,215,230,201,182,199,198,198,191,237,255,158,67,168,205,200,232,236,225,209,207,231,235,227,225,222,212,225,239,202,227,210,231,231,217,201,223,235,220,208,221,225,234,214,215,219,216,230,227,213,229,225,222,215,229,233,191,222,219,215,237,209,215,212,202,228,214,192,219,206,192,219,218,240,231,203,125,111,68,84,66,60,78,29,34,104,155,104,114,74,59,30,44,48,77,104,89,126,93,68,75,64,46,28,18,97,201,198,218,221,199,206,227,219,201,207,217,197,210,228,215,222,229,217,212,212,207,216,221,219,217,209,190,209,209,213,227,230,233,221,199,205,205,211,206,202,85,21,6,7,14,5,24,3,1,7,27,5,21,3,13,14,3,12,26,35,5,1,24,17,7,207,236,219,230,191,216,214,216,220,221,212,225,225,228,209,233,200,219,223,226,226,207,249,231,232,227,212,230,219,214,234,234,232,187,222,230,216,238,238,211,217,235,209,215,240,235,211,230,219,209,222,223,225,217,200,218,236,209,208,212,220,228,227,239,218,233,218,243,234,247,200,216,234,210,223,239,224,234,215,206,223,219,238,234,213,240,205,214,214,230,186,188,232,218,234,217,218,222,197,245,225,230,193,231,200,217,205,228,206,219,247,219,218,229,206,221,234,219,210,241,246,210,216,230,234,209,250,235,230,234,213,233,208,222,231,218,218,214,234,230,237,247,244,227,232,252,230,250,242,241,239,248,225,227,237,235,240,224,240,241,247,233,235,247,231,237,230,241,224,246,223,229,236,249,221,249,239,232,227,243,233,247,253,255,243,239,243,223,230,238,244,250,242,243,215,247,252,250,239,249,233,224,228,255,249,240,253,245,247,230,251,250,236,245,244,251,247,248,241,243,236,231,248,253,232,234,255,255,250,242,249,255,240,245,242,235,248,252,238,241,228,247,252,251,248,253,251,240,250,255,230,225,251,234,252,248,248,240,235,253,247,218,240,250,248,235,255,225,226,247,225,240,228,241,243,233,238,247,247,213,246,229,249,250,247,252,222,243,240,232,249,247,254,253,233,250,245,255,251,238,229,253,249,251,241,223,248,225,244,234,234,254,233,253,223,255,254,236,247,251,239,240,243,234,253,244,230,242,242,254,248,218,245,247,242,237,253,246,239,183,231,239,227,224,244,189,182,182,142,87,72,243,229,251,247,222,237,243,232,247,224,183,204,245,253,246,252,217,210,205,193,238,250,244,196,243,251,242,229,243,246,251,247,243,245,247,245,244,244,249,153,128,103,69,48,61,94,74,70,49,66,98,84,71,46,79,30,91,91,47,107,88,73,33,122,220,239,243,241,230,244,249,248,225,242,231,241,245,253,247,211,220,156,144,235,250,247,250,218,253,236,234,252,220,253,247,249,214,149,112,166,243,186,134,100,73,123,78,96,145,183,96,128,130,132,122,68,68,105,49,49,75,45,55,43,51,42,19,61,64,76,69,74,14,48,226,247,246,250,244,252,252,252,233,227,225,245,230,254,234,253,251,227,246,230,243,237,251,203,179,242,250,174,164,221,208,196,231,209,232,234,207,213,229,249,254,231,250,242,246,230,120,92,97,66,89,89,70,29,37,59,46,65,49,51,64,31,47,180,230,241,218,230,222,211,209,192,245,244,222,122,84,145,213,218,245,206,244,220,212,230,219,228,215,217,218,230,217,235,222,210,213,214,206,221,231,218,231,202,224,224,233,188,202,238,210,243,223,224,215,234,223,239,192,190,215,201,217,211,233,219,212,221,224,208,182,242,235,199,197,248,208,218,226,190,148,96,76,91,98,45,38,60,50,134,162,112,121,66,66,35,44,18,102,130,107,85,83,61,54,52,54,56,38,169,205,208,199,217,197,217,222,209,209,207,224,213,234,212,220,228,217,216,202,203,225,222,223,206,203,220,213,206,216,215,226,212,200,209,205,202,225,207,198,189,109,2,0,7,2,7,22,14,18,4,7,18,29,0,2,19,17,17,4,10,21,10,40,5,1,209,216,198,201,191,212,205,198,209,226,218,208,197,217,212,236,204,208,196,220,201,214,215,218,228,198,181,196,208,214,233,214,193,215,235,222,230,225,229,201,242,192,208,215,221,224,216,228,216,209,215,215,219,220,212,211,187,219,225,221,226,221,244,225,225,245,217,219,242,242,219,222,213,221,229,225,242,210,222,227,214,235,240,219,219,207,238,202,237,238,229,228,246,204,206,221,219,226,216,213,237,224,200,223,226,234,238,225,240,218,235,230,221,221,214,250,232,247,220,245,230,226,237,218,240,206,220,227,211,207,243,228,215,226,242,249,227,236,200,222,230,238,247,222,228,238,234,250,231,236,233,247,219,236,233,232,226,245,214,248,212,210,246,225,227,231,232,226,249,252,231,238,216,237,246,220,255,224,247,225,227,229,241,240,235,237,247,244,227,234,246,253,244,242,245,244,238,255,249,248,249,251,250,241,252,242,254,233,247,255,249,255,251,252,234,255,218,238,218,247,250,238,243,245,252,249,244,247,250,237,244,252,229,248,241,252,223,254,252,252,255,244,234,245,249,252,250,250,241,242,243,238,252,249,227,253,246,244,247,253,255,251,242,240,238,242,242,231,250,255,220,244,248,228,243,240,244,243,235,234,231,246,237,247,233,235,240,246,253,243,236,254,254,255,245,239,251,250,252,233,248,253,237,249,237,228,251,241,230,238,232,249,238,239,250,251,235,251,247,233,245,246,249,246,245,240,244,229,233,251,242,222,230,230,199,243,240,229,253,206,234,251,236,242,246,192,169,155,111,49,86,236,253,250,232,245,240,239,253,244,246,179,190,242,234,253,242,237,215,185,235,247,255,236,246,251,229,186,192,238,238,251,237,233,244,238,229,241,230,253,170,155,126,51,65,52,40,36,30,48,71,60,71,35,52,51,63,85,97,66,44,49,71,15,176,240,255,253,238,242,241,247,239,252,236,255,248,231,236,255,232,241,176,121,233,245,239,242,221,244,250,252,229,255,232,248,208,181,101,92,202,246,218,190,131,96,101,109,49,139,188,125,83,85,130,122,72,107,137,106,102,84,31,34,48,52,58,7,54,79,79,47,60,27,51,200,246,244,254,243,231,253,242,235,246,228,254,240,244,244,238,251,235,248,225,239,246,248,217,179,240,249,232,183,195,155,167,223,235,205,230,225,174,230,240,253,249,254,235,245,206,106,62,87,75,100,76,46,16,53,49,32,28,60,45,52,24,43,198,223,233,250,229,252,225,240,243,240,248,228,113,70,176,222,185,237,216,255,217,235,231,214,222,218,204,231,223,238,219,238,217,219,217,220,214,223,214,217,237,237,217,236,200,208,212,230,212,237,197,219,234,194,214,187,209,225,211,222,232,228,225,193,234,212,237,219,206,222,222,202,210,191,217,212,222,143,73,71,76,93,81,50,94,74,136,145,109,116,72,68,54,41,70,128,120,95,136,104,102,57,51,13,32,113,187,234,226,223,208,214,222,219,238,209,227,232,210,202,214,205,204,231,220,220,197,192,184,221,224,189,201,206,224,211,170,196,192,202,222,216,216,194,227,189,205,106,8,13,4,27,2,6,21,3,11,12,10,17,13,17,15,30,25,2,7,20,7,38,13,15,217,241,220,199,217,209,217,197,214,205,219,227,204,225,211,211,219,210,205,217,214,225,219,234,179,225,210,234,225,201,223,223,217,224,222,228,220,229,231,202,217,223,242,206,223,214,217,237,233,214,225,212,219,235,219,215,200,221,240,243,229,225,243,205,215,235,209,212,201,212,216,212,236,212,202,230,210,220,234,221,222,231,209,206,210,235,226,206,191,241,228,233,228,217,229,222,228,208,209,232,231,188,211,215,209,234,211,221,227,200,228,211,234,235,205,249,235,222,232,235,243,233,221,202,231,245,231,233,245,237,214,228,221,229,211,244,244,230,238,243,237,234,243,239,225,210,233,248,235,240,236,229,218,235,219,236,252,253,237,228,204,238,232,236,245,242,224,245,237,243,250,235,239,239,245,235,236,211,241,251,226,245,237,249,253,242,242,241,247,250,229,251,230,255,253,225,254,232,232,236,255,223,245,228,244,245,237,203,253,220,251,252,246,226,248,224,254,234,253,240,243,242,245,247,239,217,232,252,237,255,252,249,252,246,227,249,221,230,252,237,249,255,250,255,227,251,255,255,231,235,217,239,241,226,249,250,255,248,237,238,233,237,242,242,241,245,239,250,236,240,255,242,253,247,224,242,235,230,251,243,253,231,251,252,219,247,255,245,249,230,242,233,220,248,252,241,228,253,235,224,246,226,227,240,243,255,226,229,237,235,247,254,240,242,236,239,254,241,243,248,246,234,246,248,236,232,243,238,252,250,237,248,238,232,184,202,234,229,228,232,237,235,203,223,247,174,158,147,125,62,83,235,251,250,255,232,237,224,255,246,219,199,200,242,221,255,244,221,214,222,242,255,238,221,227,224,195,176,220,233,229,246,250,253,227,249,228,225,242,245,172,127,125,112,49,66,67,63,70,43,44,61,46,51,42,48,45,95,77,55,75,53,48,31,165,236,254,241,236,255,221,255,246,234,238,236,240,242,251,252,226,248,201,113,244,240,250,233,223,241,225,238,249,234,223,178,172,177,149,144,199,248,228,117,87,69,78,87,62,138,159,89,80,88,95,111,70,108,174,145,92,62,50,64,47,74,44,33,67,89,65,66,55,37,57,186,236,243,239,250,252,234,224,236,247,234,242,243,221,255,231,244,250,250,213,245,247,248,215,190,217,248,217,104,103,167,223,239,230,164,209,224,213,229,238,248,255,250,224,124,115,84,105,82,86,103,79,74,49,53,36,34,69,35,29,53,31,46,189,223,214,241,240,245,231,240,251,251,239,207,102,93,191,214,211,239,205,210,230,226,222,198,221,218,190,197,200,237,219,214,214,241,231,211,191,223,218,216,239,204,217,204,208,231,219,206,217,222,191,220,235,202,220,233,216,211,213,230,197,231,218,217,206,207,201,198,185,214,212,217,200,204,191,235,190,177,140,78,75,72,83,36,69,65,105,157,156,135,109,66,48,48,73,146,134,136,133,87,68,36,54,14,93,201,240,226,215,194,208,186,202,205,202,215,220,211,214,208,215,213,220,189,197,197,217,231,227,201,189,193,213,208,208,198,224,226,216,201,187,217,216,232,217,208,227,82,1,1,32,7,21,14,8,6,13,7,11,13,2,0,26,7,3,16,7,5,3,7,13,14,205,220,213,216,211,226,221,230,207,203,228,224,225,238,204,223,225,185,208,212,199,212,240,214,228,226,211,228,230,246,231,219,207,214,216,211,226,206,232,225,217,242,228,219,194,214,218,209,204,222,228,225,229,220,233,212,197,227,216,232,226,218,203,211,225,225,234,220,226,234,204,214,230,226,194,219,233,203,212,243,214,213,232,227,224,223,201,229,202,197,209,222,221,225,221,243,237,224,217,242,233,235,215,225,212,225,215,228,200,215,235,226,237,216,206,222,227,217,225,231,230,231,229,220,215,222,232,226,218,217,222,234,236,236,224,221,246,233,210,223,207,233,233,240,254,240,240,242,250,228,224,247,232,232,253,230,245,249,223,228,218,236,249,229,244,231,212,239,234,235,235,215,251,241,249,255,247,233,251,252,234,238,223,236,253,243,216,242,246,245,237,234,245,245,220,235,250,249,255,245,245,235,243,228,253,254,252,238,241,246,247,228,233,249,250,248,251,253,227,247,245,254,255,240,253,248,237,250,252,250,249,251,243,234,241,252,247,243,252,248,252,253,253,242,250,254,242,240,236,251,249,255,249,242,251,234,249,236,252,236,234,247,243,248,247,235,250,248,253,255,232,249,246,237,248,251,245,236,219,251,244,251,224,254,250,242,241,245,231,251,241,251,239,250,251,253,253,243,231,237,250,243,238,247,249,253,224,244,246,240,249,250,230,252,218,231,246,233,252,254,253,231,253,252,225,246,250,249,252,255,249,208,234,234,199,241,249,253,242,214,228,235,203,206,241,212,149,161,117,54,74,237,255,252,232,231,255,250,246,241,225,194,207,246,246,245,237,203,210,201,208,210,232,190,185,223,219,224,227,237,236,237,240,250,248,235,246,235,245,180,122,115,119,91,33,52,49,27,27,38,40,49,61,31,39,48,55,74,55,57,63,65,36,52,170,232,244,255,245,253,252,245,237,255,232,249,224,252,234,250,241,222,233,155,224,243,235,247,242,249,249,238,212,187,111,149,204,230,173,174,238,245,118,94,64,52,58,85,67,141,145,102,98,76,119,79,93,133,145,118,62,45,55,91,88,82,46,49,74,57,73,71,57,32,37,234,243,249,246,240,255,242,239,246,235,246,249,255,248,253,224,251,248,234,217,210,241,248,230,172,193,235,217,174,152,205,240,249,238,173,194,246,209,208,234,228,244,238,121,82,82,51,80,80,91,89,78,60,50,51,36,55,46,24,31,37,15,47,226,236,235,224,226,201,216,228,233,246,239,210,90,119,192,202,207,212,219,231,233,225,239,230,224,210,225,218,237,212,213,214,210,223,227,234,241,211,239,212,203,230,201,241,211,220,214,191,240,195,223,209,207,235,226,200,214,210,198,225,215,214,213,210,229,214,226,225,199,194,216,214,241,206,224,195,219,209,136,100,116,75,43,71,93,56,139,178,149,125,85,67,56,37,107,125,117,133,109,69,52,62,31,51,167,215,227,201,213,218,205,196,197,173,209,216,221,201,203,211,225,210,196,222,197,209,210,215,209,223,223,221,233,228,207,219,191,213,215,206,227,207,205,211,199,200,212,109,14,6,7,0,7,18,24,3,37,10,5,48,7,0,8,9,7,2,13,24,27,20,17,3,197,223,234,233,227,204,223,223,214,225,220,203,215,198,209,216,196,199,207,204,236,201,203,217,213,215,213,218,219,224,218,245,192,213,204,222,233,226,197,217,222,234,211,211,220,217,226,216,227,212,213,214,226,222,242,227,225,210,204,221,222,200,212,201,205,207,242,237,207,201,217,231,223,223,239,222,202,209,224,233,227,215,227,248,225,226,218,205,224,205,218,208,227,226,216,202,215,211,231,219,216,223,216,223,203,230,216,208,241,232,218,228,224,214,233,217,204,201,244,221,199,227,227,234,224,233,223,207,227,223,235,201,216,219,226,237,212,227,226,239,217,234,222,221,241,233,234,234,216,235,243,242,254,225,230,249,242,238,227,233,235,212,208,215,249,233,229,221,235,238,228,223,234,207,246,217,243,227,235,234,245,216,224,245,240,232,250,241,232,236,249,252,245,237,237,241,250,255,237,233,243,231,250,218,249,236,242,254,245,239,246,245,236,250,225,228,243,250,242,249,200,230,245,241,240,240,249,251,230,252,255,235,255,240,245,230,246,247,252,239,249,251,241,255,253,229,244,251,239,249,249,255,244,237,253,246,242,230,255,211,242,255,225,219,228,254,239,230,250,247,239,235,250,221,248,213,242,255,247,246,246,252,253,254,227,241,233,249,245,247,253,252,214,225,239,242,237,226,255,250,245,239,240,240,248,243,239,230,241,242,238,254,242,241,229,242,226,254,248,251,231,231,236,255,226,249,244,243,229,246,233,221,248,233,217,250,219,198,217,222,252,228,219,199,243,197,179,169,137,79,100,235,242,251,251,242,252,251,233,253,235,219,208,244,226,249,244,211,193,145,205,222,226,222,234,243,236,223,233,242,231,246,236,246,231,226,249,243,190,144,98,110,78,24,16,29,27,28,40,21,19,9,29,37,18,35,51,81,67,60,72,82,40,110,244,244,227,241,224,219,253,244,247,254,250,250,247,245,235,219,246,230,211,138,215,250,255,250,208,234,241,186,160,194,193,231,200,193,198,183,243,229,169,134,126,77,63,77,81,132,196,135,81,70,130,105,107,130,138,115,63,55,66,62,88,65,50,28,64,89,48,45,61,40,34,222,245,242,246,245,243,223,231,226,239,219,243,255,244,253,242,237,222,224,251,224,230,253,252,230,187,239,249,233,150,220,217,227,245,218,205,239,201,184,217,233,252,236,124,52,57,41,105,94,94,82,80,64,36,30,35,40,31,54,28,74,48,75,237,233,236,248,226,220,217,213,238,224,221,182,86,132,208,207,202,228,233,243,238,222,225,244,216,222,225,228,227,226,220,253,222,222,237,216,228,219,193,229,210,207,210,227,201,217,231,214,199,176,227,180,193,214,216,224,206,205,201,194,200,222,210,228,204,222,216,212,202,204,215,213,182,222,207,219,243,220,148,106,116,119,73,79,66,46,123,161,117,124,89,54,38,35,124,151,94,113,99,70,64,33,24,138,212,218,213,199,214,187,234,223,227,191,215,236,207,207,212,227,226,216,230,211,195,224,200,229,207,212,210,213,238,202,212,222,227,206,213,216,213,225,189,200,236,187,198,102,18,1,26,23,31,3,14,18,7,28,22,2,18,15,5,6,5,0,20,5,15,13,5,17,215,214,209,209,212,223,230,228,211,209,197,192,206,224,240,244,203,218,230,238,221,217,212,215,215,209,202,215,194,233,228,203,245,229,227,235,208,232,229,239,208,198,243,247,214,228,217,210,216,241,221,213,238,203,207,217,219,233,210,214,220,215,213,226,233,217,225,236,223,237,206,215,207,234,249,226,233,229,222,224,246,235,222,204,228,213,223,209,236,210,201,204,236,222,227,221,201,219,229,233,217,214,215,208,224,201,225,232,220,233,231,228,216,238,219,235,223,252,233,226,209,210,238,221,246,237,247,230,226,244,237,219,216,241,225,202,244,211,216,211,218,238,224,245,243,247,227,223,228,235,245,244,230,235,239,226,224,249,243,240,236,255,224,231,223,218,232,234,230,243,228,231,229,212,251,226,231,239,249,220,244,219,201,240,216,240,240,226,223,249,244,246,245,225,235,250,251,225,243,245,249,240,240,236,242,249,238,239,249,248,241,238,255,246,237,251,235,237,255,250,239,253,233,245,234,255,227,228,250,243,240,225,253,221,238,250,238,253,239,250,243,251,238,252,246,246,243,235,248,240,242,247,253,241,235,252,252,253,255,231,233,254,245,247,252,243,255,250,255,242,237,226,233,246,234,213,251,242,248,236,241,249,255,255,253,234,235,231,245,245,253,250,237,235,232,228,236,239,248,221,252,254,254,253,246,223,242,218,241,244,251,255,239,247,252,248,227,251,249,228,249,254,255,237,248,251,215,219,224,247,219,228,253,224,203,215,203,236,230,248,224,233,211,227,255,192,168,155,126,90,85,229,247,242,230,234,244,211,222,241,226,198,173,219,216,255,247,203,149,173,251,249,247,211,241,253,206,203,225,242,247,249,237,255,244,219,222,233,188,145,104,94,57,18,23,34,56,30,26,26,14,44,56,43,23,26,51,76,75,56,83,60,91,214,254,250,254,245,244,232,213,255,220,240,255,225,253,251,236,248,215,199,173,116,181,245,237,225,201,207,210,167,220,234,242,233,205,207,219,223,254,249,199,115,107,74,71,65,58,106,174,144,93,107,153,131,106,140,134,108,79,64,64,75,73,75,71,68,40,39,48,67,63,6,78,233,241,240,219,252,243,219,222,228,242,239,247,244,246,241,241,243,252,254,249,233,251,243,239,182,174,231,255,226,174,173,232,198,247,196,206,234,208,172,212,235,248,239,132,101,57,63,87,106,119,115,55,46,53,41,39,43,48,22,17,68,33,81,251,240,250,252,212,221,230,239,234,250,239,163,68,154,198,191,229,224,225,227,206,226,208,221,215,242,207,226,236,225,230,222,208,206,235,192,221,222,200,203,212,213,222,233,222,225,204,210,210,214,217,213,213,203,203,197,208,200,214,215,210,213,234,209,215,212,204,203,216,192,213,214,202,201,183,239,246,197,160,110,88,112,94,91,73,69,145,136,130,132,91,51,63,61,123,129,143,134,89,90,57,46,41,192,230,222,225,217,222,223,197,205,193,209,188,230,207,216,217,213,220,218,232,193,217,214,202,214,210,219,195,201,213,211,232,199,217,204,206,216,233,216,221,216,206,210,203,105,7,7,23,14,11,13,8,42,8,9,18,36,9,7,0,37,19,16,24,0,16,22,9,15,203,220,205,211,230,207,241,207,207,196,214,205,223,213,224,219,229,211,206,216,207,230,205,217,199,221,210,218,202,211,235,240,238,227,236,223,209,229,201,203,229,221,210,216,225,225,219,216,229,220,211,214,216,230,229,215,219,231,232,223,217,221,218,202,235,216,203,218,231,212,198,231,206,214,202,220,236,221,243,235,227,219,220,223,229,232,235,203,218,202,214,213,231,208,226,230,244,220,234,215,226,236,242,212,208,217,230,242,229,223,233,249,221,229,217,223,227,220,221,231,224,209,216,233,218,230,228,233,224,239,226,235,241,243,225,240,230,233,206,208,202,222,215,237,245,222,239,235,211,230,226,241,240,233,219,214,233,227,219,238,221,237,236,249,238,239,215,229,228,239,221,245,241,220,245,233,236,224,230,228,214,248,234,240,234,247,240,226,246,246,212,249,214,248,247,248,251,241,243,243,228,252,246,248,240,228,249,242,239,237,228,224,244,253,227,251,251,237,252,216,241,250,245,238,245,244,232,243,246,230,233,251,221,204,251,245,244,251,242,241,255,251,253,242,242,230,253,245,247,233,255,255,249,244,226,249,209,244,234,246,214,252,252,253,247,220,245,245,240,231,244,244,242,254,219,253,234,244,250,254,250,234,254,255,251,238,248,245,222,250,254,252,241,243,254,242,244,224,230,235,236,235,224,241,242,241,255,223,225,244,233,234,240,232,238,235,214,249,249,237,239,244,253,245,255,235,254,232,224,251,232,234,248,202,208,234,222,254,239,202,237,221,211,215,242,146,132,132,120,85,98,230,252,246,236,227,244,240,226,241,220,196,184,224,251,234,215,168,195,201,251,247,245,206,237,255,208,212,233,250,239,253,243,241,252,233,233,234,194,131,101,56,24,10,30,30,17,20,48,21,27,32,33,14,7,24,58,81,85,75,43,38,155,240,250,255,224,242,236,247,251,243,233,255,231,254,218,237,242,252,246,227,214,116,178,238,255,234,204,198,244,216,252,251,253,224,182,225,208,202,236,250,188,158,134,111,51,92,58,114,202,172,90,82,131,109,74,80,147,102,108,64,49,63,60,79,91,82,68,78,87,54,81,36,52,224,253,240,254,225,245,239,225,243,246,247,250,252,239,250,246,217,235,234,242,208,231,234,252,228,157,206,219,224,188,143,201,230,209,233,183,209,224,187,198,225,252,215,123,113,71,108,106,111,108,67,60,67,59,41,19,37,46,31,15,52,11,79,230,236,238,242,228,212,236,231,226,225,206,146,64,145,221,197,219,222,235,245,229,241,233,240,227,210,232,218,201,208,205,226,216,206,238,196,209,212,227,215,224,220,225,219,195,209,225,216,202,221,180,193,219,203,198,196,205,221,195,217,194,176,224,191,216,205,204,225,216,183,198,217,205,238,192,223,223,207,188,147,125,90,99,105,89,81,135,144,130,141,73,55,78,99,129,103,138,122,105,104,77,29,105,225,199,203,203,210,214,221,214,192,207,239,203,229,206,220,186,218,198,218,214,197,215,201,225,213,235,238,198,210,225,201,215,200,226,226,185,203,208,215,204,198,194,222,221,116,7,2,10,19,10,11,10,4,17,23,25,19,21,2,0,5,36,28,14,8,0,2,13,4,233,231,207,200,226,193,215,229,209,218,215,195,215,221,211,205,218,220,219,212,215,202,225,227,233,224,214,236,235,208,237,211,211,213,232,210,241,193,223,201,226,216,228,218,208,225,220,204,225,214,238,233,226,242,226,219,225,231,214,220,208,231,217,210,229,218,237,206,227,231,203,229,236,208,223,215,241,241,190,208,227,235,222,238,209,227,246,234,217,222,238,229,235,199,229,226,219,235,217,225,219,215,219,217,231,221,218,189,240,221,226,224,230,232,225,216,243,238,238,239,229,229,224,206,212,215,221,236,229,215,237,246,228,233,216,222,224,228,218,236,237,225,225,224,221,245,246,230,245,244,243,221,245,246,231,246,243,219,238,251,236,226,218,230,246,239,232,246,235,249,230,253,239,237,245,249,245,253,244,254,245,239,242,249,237,245,252,246,236,252,243,242,251,246,246,245,251,246,253,219,244,245,243,254,245,236,248,247,252,253,234,255,254,248,246,255,240,240,249,252,239,250,255,240,252,228,254,241,246,237,232,234,245,250,254,248,223,228,242,223,252,254,232,251,250,235,244,216,242,235,232,245,229,228,243,250,255,239,220,243,244,246,247,214,239,237,253,250,233,239,252,255,248,229,214,246,239,244,243,226,231,248,254,239,252,216,240,246,255,242,255,241,254,238,243,252,246,242,237,216,252,243,233,254,227,244,247,238,236,247,250,243,253,249,239,245,249,254,236,234,237,243,236,244,238,237,248,241,236,245,243,219,236,220,221,236,243,244,205,216,238,252,213,207,237,178,137,153,139,81,120,217,247,230,248,230,243,244,232,253,242,210,170,235,254,245,203,180,212,234,241,254,243,200,240,249,225,177,249,249,226,249,248,255,228,246,219,248,170,99,67,32,8,26,17,37,30,20,25,31,26,48,47,54,45,49,49,67,51,42,61,33,175,235,247,232,223,241,236,251,246,228,235,239,234,236,255,245,237,231,255,244,235,175,152,252,251,201,202,249,243,245,255,249,242,163,74,151,170,220,240,254,202,118,129,91,52,89,122,149,146,112,57,47,70,72,36,97,119,146,99,68,46,80,65,62,83,86,62,89,93,35,52,3,60,217,237,238,248,251,245,254,242,213,243,250,243,253,241,242,243,247,205,233,241,247,234,241,255,220,184,198,242,242,208,169,182,219,168,219,210,218,200,217,187,210,252,245,99,123,55,66,72,71,43,55,10,42,35,34,26,37,30,18,29,47,15,87,230,235,235,254,227,242,254,238,237,236,254,147,118,179,223,188,229,212,236,241,251,229,227,218,201,206,239,230,227,231,224,232,232,216,203,202,198,226,216,186,214,219,215,208,222,234,220,219,223,212,213,235,221,215,232,199,234,219,212,206,194,233,200,196,219,203,203,203,208,186,218,224,183,194,212,222,235,213,218,119,124,106,99,92,74,75,135,124,123,107,65,69,97,101,128,128,124,129,120,122,95,22,51,160,180,228,226,209,238,199,221,228,214,206,220,219,221,196,201,201,200,201,210,217,220,201,239,235,196,189,207,196,198,194,199,202,196,220,213,200,223,229,239,221,206,195,195,108,13,5,22,3,2,7,16,3,15,3,0,0,1,19,27,2,0,14,20,3,12,9,22,11,206,232,204,235,218,218,217,184,209,227,220,217,208,218,232,232,215,204,205,206,224,207,201,220,224,218,206,214,234,235,238,213,230,220,205,217,248,206,215,232,229,212,219,215,234,211,227,217,211,204,222,212,221,231,213,205,208,234,223,233,219,220,213,202,219,221,227,232,241,213,221,210,208,214,243,239,228,209,196,237,208,231,196,215,240,215,211,221,198,232,225,248,229,230,230,218,199,233,210,227,237,218,216,218,206,231,247,213,222,224,225,227,213,225,243,229,223,215,233,223,229,217,216,225,225,238,228,214,221,223,227,241,231,223,238,241,210,225,234,224,231,238,231,221,244,245,234,235,237,216,241,238,242,238,219,226,226,219,250,241,227,246,219,234,234,249,227,223,242,250,228,230,230,227,244,216,245,218,237,225,230,250,244,235,241,242,235,239,250,218,254,244,232,230,231,239,240,236,246,236,235,245,230,243,238,231,241,239,244,226,240,255,255,225,246,239,223,251,235,218,234,248,240,242,241,226,255,237,247,253,220,236,252,255,239,255,243,247,227,242,255,236,255,234,243,250,223,247,239,242,255,252,255,252,236,237,251,245,240,249,254,251,246,229,245,248,241,255,233,249,242,221,247,230,226,251,253,253,232,250,246,245,239,241,253,252,254,255,255,242,221,220,247,247,236,250,252,238,238,240,238,238,243,224,246,248,235,249,233,253,222,236,247,251,222,243,253,250,231,241,255,228,241,243,252,255,251,237,237,246,204,253,244,219,235,231,179,223,220,207,250,237,224,214,249,206,150,152,134,83,88,221,230,250,241,255,255,225,240,251,227,223,209,221,249,237,215,198,206,241,252,255,229,188,231,249,238,234,255,254,243,251,247,201,236,251,217,249,167,121,74,68,66,11,3,3,40,25,14,36,41,74,71,49,27,44,45,10,23,55,100,134,239,237,254,242,250,251,232,245,221,223,237,255,235,255,249,248,245,252,243,226,255,174,185,227,228,198,230,247,255,230,252,250,233,155,96,158,184,221,253,246,147,113,57,51,21,105,145,167,147,69,31,34,28,68,66,68,120,98,59,68,72,48,75,68,91,49,82,117,102,53,91,16,56,226,240,240,230,244,250,240,252,222,251,255,214,252,246,234,241,249,246,219,253,236,204,241,251,239,184,192,235,231,201,181,187,232,184,189,252,182,212,249,234,213,246,193,93,56,11,3,13,8,12,9,5,28,63,29,46,33,42,34,30,54,28,92,237,225,254,212,244,242,255,226,247,234,235,140,115,213,197,198,239,221,223,206,243,198,223,231,224,230,227,202,233,234,208,216,227,233,233,180,223,217,219,225,224,227,222,231,227,208,197,225,186,204,190,229,215,214,230,204,199,214,192,224,199,226,216,218,241,187,204,220,219,196,205,206,207,198,199,194,204,207,137,161,99,103,79,90,78,77,121,117,142,114,75,61,86,116,144,98,116,131,121,99,77,44,40,106,163,224,220,211,209,211,232,231,212,213,210,226,204,215,222,236,226,207,219,228,226,194,231,211,238,225,199,207,193,208,232,185,206,212,232,207,220,203,222,214,205,214,203,113,10,0,12,42,7,5,17,2,15,27,16,16,10,0,8,4,6,6,13,27,18,26,21,2,237,228,231,188,236,221,242,227,217,202,232,234,217,235,223,183,204,194,218,217,223,196,201,220,199,205,232,196,230,227,200,219,217,225,231,223,218,228,214,228,221,241,214,213,216,221,242,239,213,198,189,227,212,223,226,213,225,232,225,229,195,209,220,234,209,223,210,210,233,214,216,216,211,233,212,201,225,233,204,225,215,207,227,218,196,201,222,231,227,225,236,210,218,249,230,238,195,214,230,225,216,218,220,222,217,242,214,219,230,226,220,236,205,208,216,227,216,226,231,212,232,228,222,222,212,229,214,210,195,224,216,217,228,217,232,219,216,209,242,232,228,218,249,227,243,232,234,208,230,237,230,237,236,243,231,239,236,214,214,246,217,245,219,242,225,224,236,233,240,229,245,242,233,248,230,239,247,252,252,237,233,238,240,232,241,239,246,246,250,252,235,215,249,244,247,237,242,238,253,255,234,240,233,247,248,252,248,233,237,246,235,252,222,254,248,247,245,246,228,228,244,246,236,228,253,254,249,255,242,251,248,233,255,242,227,249,246,245,241,248,253,241,231,251,251,251,253,236,238,245,242,234,239,236,244,250,249,246,238,255,251,252,236,255,235,230,231,246,240,255,253,255,248,235,229,255,230,249,252,251,252,247,251,251,251,247,249,253,254,241,246,238,243,246,231,240,235,248,235,248,254,230,244,239,237,250,228,252,229,247,231,211,255,255,251,240,255,239,234,228,225,240,252,233,233,222,237,252,233,252,239,231,241,207,213,215,241,245,251,240,255,231,231,202,243,192,176,145,146,66,108,235,246,249,255,246,238,243,233,246,219,199,169,184,245,251,232,205,186,236,237,250,217,208,227,243,244,208,226,255,250,238,255,240,246,237,242,209,220,189,168,169,216,220,232,179,87,30,52,40,45,55,92,41,40,75,85,115,81,182,205,186,246,251,241,238,230,255,232,235,249,233,235,242,244,250,248,243,246,245,232,223,248,184,140,217,184,192,243,242,223,245,245,248,179,81,176,216,188,245,253,254,181,133,128,92,99,165,199,200,212,140,51,75,107,108,108,119,126,70,76,73,51,66,49,76,92,38,86,92,49,69,65,7,70,225,252,229,255,248,243,234,252,235,240,237,250,246,246,246,255,222,219,250,254,244,240,228,252,254,179,164,251,225,238,203,162,152,148,210,232,189,160,232,222,199,229,198,102,71,72,74,66,60,46,38,37,53,38,32,33,52,41,50,28,42,8,127,239,243,253,232,229,245,232,255,247,243,254,148,122,221,209,202,224,222,234,225,209,219,224,226,234,209,230,222,217,236,226,216,205,243,229,210,207,232,228,219,229,199,230,204,198,234,214,226,232,231,211,213,205,213,200,212,226,205,186,209,220,192,194,199,211,192,206,199,211,195,211,204,221,205,223,201,216,180,146,130,143,121,86,86,76,85,122,120,128,120,81,98,95,113,115,99,94,122,99,90,57,28,82,211,190,208,219,219,218,219,223,191,204,222,206,198,204,200,215,212,213,228,215,204,224,224,237,204,202,214,201,194,215,200,219,218,194,215,205,202,227,192,186,203,215,198,179,115,10,16,11,13,18,34,4,5,10,10,25,4,11,14,7,24,4,8,10,43,17,15,7,39,205,219,234,211,223,220,210,235,204,193,224,214,228,214,212,244,225,211,221,218,203,226,228,214,230,219,202,229,241,219,197,204,212,228,240,249,241,228,232,227,215,217,224,216,201,219,231,232,213,189,224,212,206,212,230,200,208,216,220,230,210,212,179,204,208,229,211,221,214,202,222,215,208,217,235,212,238,200,218,227,232,220,241,189,228,227,216,219,234,217,228,216,231,223,208,203,226,212,223,243,228,203,206,216,210,251,208,232,207,236,228,236,221,251,205,200,211,230,219,242,228,207,242,217,235,205,225,226,212,242,212,210,243,215,217,235,241,226,232,212,234,245,220,242,230,231,227,231,211,255,223,245,220,218,221,243,227,231,227,234,244,233,235,233,219,238,224,230,235,231,254,238,242,216,242,228,237,245,251,228,228,220,242,236,241,245,239,244,252,233,237,239,228,219,246,233,243,232,225,232,245,249,233,240,242,255,236,232,232,232,249,240,232,225,240,231,224,239,242,235,225,242,243,235,242,246,225,234,239,248,250,250,255,252,245,247,249,249,255,237,241,230,228,255,253,246,251,252,220,249,250,253,235,238,255,250,245,239,252,231,255,254,254,244,241,237,238,235,231,238,224,249,243,244,233,249,253,228,240,232,245,253,216,237,235,232,236,254,245,212,231,218,251,229,227,234,253,231,246,244,239,246,232,253,231,254,254,254,247,229,241,213,223,239,249,247,244,255,253,236,249,252,228,243,243,248,255,232,241,244,231,208,252,215,238,239,255,253,194,218,221,229,216,237,245,174,134,122,118,84,94,223,254,253,243,242,253,226,249,243,253,217,170,209,223,248,201,184,229,244,242,236,228,238,241,255,227,134,172,239,213,238,248,253,253,255,255,248,247,218,254,237,252,255,254,199,111,11,84,108,58,51,74,74,65,197,238,224,144,221,164,146,250,245,229,224,222,246,246,250,254,241,250,251,247,241,242,233,247,242,251,233,252,196,156,183,159,197,238,245,254,226,169,218,173,128,225,212,212,230,255,247,251,194,142,139,170,191,217,202,226,136,96,114,149,159,148,150,151,120,95,71,67,73,13,69,72,65,47,99,87,40,41,19,61,227,231,240,245,253,242,235,250,246,228,244,232,224,234,255,246,244,245,235,240,249,238,200,255,246,179,168,198,225,215,191,80,144,202,196,219,171,180,248,215,130,197,206,242,219,245,244,233,252,246,255,253,234,145,96,99,85,54,31,22,11,34,132,247,212,239,232,251,250,239,254,231,209,222,109,165,240,202,215,233,228,223,231,243,245,199,226,211,199,215,230,205,230,224,246,229,223,211,214,228,215,205,198,195,218,195,198,218,213,201,205,197,229,188,193,209,223,217,197,208,230,193,201,240,209,213,179,183,209,200,228,195,213,187,208,189,192,202,204,229,214,137,121,120,116,89,72,99,80,118,144,126,130,102,121,146,135,130,99,133,104,125,98,79,18,102,229,213,244,214,218,224,225,214,232,187,228,203,224,227,211,199,236,232,215,205,223,207,198,225,209,185,198,211,206,202,214,224,207,196,197,197,201,191,217,195,207,242,215,198,105,9,1,7,8,3,1,12,17,20,12,24,10,8,9,9,7,17,14,15,18,7,36,18,11,213,216,181,233,236,211,210,226,213,240,203,214,217,202,212,216,225,211,212,221,221,237,224,218,214,237,231,228,207,218,202,222,197,223,225,243,221,226,211,222,194,224,224,233,202,231,208,221,217,228,246,240,226,226,214,224,205,198,247,232,211,219,223,235,203,225,207,219,225,222,210,223,208,218,231,214,226,207,227,211,239,208,227,223,213,217,213,182,210,219,205,226,218,232,224,221,235,204,222,231,218,221,214,220,216,217,225,229,207,219,226,215,217,234,230,223,235,228,205,213,234,244,227,222,221,239,234,214,226,218,229,232,206,233,189,229,234,237,225,244,245,217,227,241,234,244,238,248,217,248,215,229,233,237,224,218,243,249,240,213,232,239,247,246,244,250,214,244,211,219,232,239,237,232,240,252,246,247,226,243,255,252,222,231,214,228,228,243,224,238,242,228,229,244,253,251,242,240,247,253,220,226,252,233,251,244,222,239,246,254,228,230,235,252,255,242,249,245,225,221,255,235,241,226,250,244,237,249,253,241,252,250,252,254,246,251,253,252,236,240,250,243,219,239,253,246,249,245,235,252,239,253,253,228,223,253,250,242,225,253,238,249,248,255,248,254,254,242,255,249,224,249,252,243,255,255,208,251,246,255,242,225,248,249,252,238,248,239,239,231,224,250,249,233,252,238,225,240,249,235,235,228,239,235,247,250,246,241,250,255,250,223,251,234,231,242,229,231,234,234,247,234,251,250,252,254,246,248,242,227,232,254,255,242,237,226,173,171,184,232,224,213,196,226,216,175,135,149,163,66,102,242,247,247,240,231,235,211,244,234,245,245,210,228,243,239,202,174,228,248,218,238,223,169,252,239,187,89,72,210,236,242,247,227,233,254,227,246,248,234,226,211,251,243,249,169,86,23,67,121,84,66,64,56,67,234,255,255,178,101,13,73,221,236,255,203,221,238,232,248,246,238,241,232,225,255,246,235,250,247,239,215,206,194,164,186,160,219,246,237,235,128,166,222,136,158,237,203,254,249,252,253,250,175,115,111,122,85,88,79,94,87,81,61,45,56,50,76,50,64,69,87,75,77,36,43,78,59,71,99,93,55,38,2,50,220,239,247,230,252,245,240,240,237,227,248,246,250,244,237,253,227,243,207,236,238,232,217,255,252,209,177,228,241,217,204,143,186,218,236,233,238,217,252,218,150,219,236,215,245,250,252,255,250,252,252,250,255,187,118,132,88,48,19,28,33,25,133,230,204,233,235,242,248,241,248,225,162,176,112,164,227,214,205,204,224,214,227,215,213,243,207,220,207,216,226,199,219,207,223,207,227,228,213,208,196,215,211,214,180,219,194,229,185,205,188,212,212,204,206,212,214,222,207,193,205,224,224,216,198,233,216,198,209,223,225,205,195,222,211,199,209,214,218,224,177,158,152,130,136,94,82,69,93,138,167,157,141,114,106,102,111,102,90,104,154,122,110,42,30,197,208,218,225,237,230,213,212,221,198,223,221,198,215,214,211,235,217,200,204,233,188,201,220,202,200,204,224,249,192,207,198,232,216,235,220,219,189,201,205,204,206,221,207,208,118,6,7,0,4,2,7,19,0,7,6,16,16,24,7,7,1,5,4,21,15,14,30,37,14,207,221,219,228,235,220,240,214,218,206,201,208,217,248,212,211,225,214,226,214,205,206,219,204,210,213,208,223,200,231,214,230,188,207,199,249,217,205,237,221,217,208,234,213,197,203,207,222,223,230,187,228,237,236,223,227,196,198,213,214,233,214,217,214,234,225,244,210,221,225,224,204,225,207,211,196,219,216,218,210,206,206,203,191,211,228,208,221,215,249,220,242,239,235,240,215,250,211,230,227,230,220,231,196,219,222,217,215,213,213,222,234,237,201,212,224,227,217,249,226,220,227,219,216,218,209,224,206,200,222,204,230,243,193,234,226,214,224,233,213,190,206,244,232,232,234,228,247,215,229,230,253,221,222,250,237,240,234,216,227,227,239,219,249,236,251,217,232,236,248,243,239,236,218,252,232,245,250,236,240,252,231,252,220,255,236,251,233,224,249,251,230,253,236,241,245,221,243,247,253,235,253,246,246,253,239,240,254,218,250,240,241,249,223,248,245,236,236,241,228,233,252,242,213,243,249,240,254,224,248,246,248,254,228,247,251,228,255,255,243,253,241,251,238,248,238,252,244,251,249,255,249,241,244,223,252,238,243,254,216,228,251,224,254,243,244,228,229,255,233,247,255,225,240,245,255,235,252,247,244,252,229,245,246,235,234,231,224,252,234,222,255,244,249,248,248,246,243,219,225,237,248,227,239,229,228,226,246,248,244,240,223,232,245,250,244,252,254,248,245,253,235,255,249,255,238,233,248,236,245,231,233,252,235,216,176,209,223,198,231,244,233,213,231,247,205,153,155,148,117,103,252,244,247,243,214,238,246,223,249,255,230,213,193,251,231,200,230,217,246,219,252,186,112,195,200,164,50,43,168,245,239,249,252,227,232,239,241,254,213,223,188,220,250,214,146,99,26,50,123,63,68,73,24,22,216,252,217,66,22,24,91,203,236,235,198,242,228,229,251,232,228,235,239,249,235,255,239,241,247,255,208,229,211,202,155,123,213,218,136,172,190,255,255,160,191,228,232,250,249,243,223,248,140,119,80,36,7,3,0,59,114,97,47,30,40,22,44,70,49,55,28,68,62,56,73,101,65,52,93,111,68,52,21,59,217,246,240,234,242,255,255,243,224,211,215,247,242,226,242,255,254,239,231,248,244,238,222,222,254,211,181,195,244,241,242,218,180,220,206,241,218,161,242,213,212,225,212,246,248,246,252,214,235,247,247,246,254,175,128,132,83,67,13,45,11,27,201,250,241,238,220,249,250,254,239,217,192,204,130,187,217,190,202,220,242,202,218,230,218,218,216,216,228,196,237,219,200,229,225,210,203,220,223,189,228,199,219,211,185,221,234,209,225,196,199,214,217,206,198,185,168,217,218,194,217,217,193,213,207,214,208,206,191,212,217,190,202,207,221,219,223,218,213,205,211,163,131,107,109,97,75,90,110,148,119,122,120,91,73,64,64,68,100,133,139,102,107,69,39,163,209,218,205,222,206,218,215,202,204,231,235,198,216,204,189,199,211,227,205,210,217,239,207,204,228,183,239,226,193,224,232,196,207,214,222,205,191,207,196,216,206,191,220,208,133,5,20,17,9,6,27,14,0,19,8,12,12,7,23,22,9,5,30,18,11,14,15,20,7,233,217,214,211,228,221,224,233,219,216,206,230,235,228,201,204,246,222,225,221,218,199,205,224,223,194,203,223,217,201,192,232,237,225,216,204,220,220,224,221,218,236,233,226,231,191,224,216,207,214,211,229,208,226,211,220,242,206,210,216,206,208,225,219,203,201,212,234,210,231,231,213,220,213,212,209,207,205,202,217,221,235,234,238,219,233,210,206,224,204,209,237,209,214,198,233,226,219,207,224,193,206,229,238,240,235,230,219,220,215,240,225,198,217,230,212,234,215,240,231,225,219,228,228,209,217,206,241,237,233,224,208,229,219,199,218,225,224,220,212,233,216,231,213,227,237,226,242,241,236,229,223,252,243,196,230,223,243,242,240,240,236,217,229,223,230,247,220,240,244,233,224,244,251,244,242,250,245,241,243,230,255,233,227,237,245,236,243,225,248,243,205,243,254,248,236,252,230,231,240,250,240,251,218,241,243,252,255,221,231,219,237,239,244,229,249,255,247,240,239,254,244,236,248,249,255,253,240,229,251,240,237,242,246,255,251,255,249,245,246,245,245,229,255,243,234,246,248,250,252,222,240,253,253,217,244,255,244,238,246,240,227,224,235,231,245,244,236,225,251,252,238,231,217,248,252,247,255,252,236,242,252,235,238,231,245,234,254,238,242,247,237,230,245,233,245,249,226,227,245,241,241,234,240,254,219,250,250,236,245,231,239,254,238,239,250,253,252,242,225,244,245,238,245,247,245,238,221,239,227,199,235,228,190,221,222,244,231,221,235,248,228,213,233,253,157,165,142,126,119,154,233,246,255,249,247,247,241,246,240,245,241,217,218,230,221,238,195,233,235,240,253,121,35,174,196,193,96,15,107,209,230,254,242,242,254,232,218,245,198,209,173,192,233,163,133,93,0,63,78,71,47,52,47,20,200,249,135,34,2,28,147,248,250,239,194,244,255,254,246,231,227,242,234,236,238,230,230,246,247,253,196,146,165,192,205,145,200,200,199,235,222,255,210,81,209,237,178,233,250,253,254,242,233,94,90,7,37,30,22,24,71,34,48,49,44,48,62,92,50,30,47,54,64,30,92,70,68,39,111,106,53,42,55,78,211,252,246,237,255,231,241,250,233,220,243,236,248,235,242,240,242,247,223,228,253,214,233,217,252,243,158,183,211,238,227,215,159,209,223,244,221,176,206,214,224,218,210,253,255,245,242,241,236,238,235,255,243,143,86,109,85,34,52,54,20,33,200,240,225,255,226,226,250,235,221,179,192,171,152,224,206,203,198,205,229,207,251,211,232,196,213,235,217,211,228,216,200,202,237,223,225,217,207,210,210,204,220,196,183,205,198,215,193,215,220,209,203,201,200,201,202,210,192,200,197,224,208,210,203,231,181,201,199,200,213,206,217,212,186,202,206,194,206,220,173,140,133,75,89,89,79,115,126,131,147,97,66,59,64,49,61,66,87,84,107,82,67,43,4,138,214,218,231,230,207,204,211,226,218,229,217,198,223,226,194,216,222,206,216,208,191,198,214,206,200,197,211,197,208,207,224,205,224,210,228,234,177,239,209,223,232,211,187,206,113,2,2,8,13,18,0,6,1,31,18,14,10,6,9,9,17,23,0,12,8,5,0,14,7,226,220,233,210,224,224,211,207,198,209,222,217,233,220,189,217,226,203,224,188,214,214,205,231,223,207,216,212,209,227,203,212,192,220,227,236,210,213,210,240,221,223,231,224,224,212,220,214,201,219,198,236,214,226,214,211,231,225,241,224,216,225,236,222,221,232,227,224,217,222,196,212,226,215,229,212,218,185,212,219,208,215,207,228,212,218,212,209,218,212,228,220,219,223,235,215,212,244,202,227,218,240,237,230,247,202,228,206,203,211,181,234,231,227,224,198,193,230,208,249,222,228,219,228,234,238,208,208,219,233,209,216,242,244,205,236,248,239,212,223,246,227,224,225,249,243,219,231,239,224,229,244,229,220,241,212,244,233,216,243,246,250,248,220,229,239,234,233,236,236,247,251,254,247,238,208,238,230,245,248,231,235,231,246,251,245,234,254,244,232,233,221,247,246,222,243,220,224,244,247,253,253,247,249,236,243,234,229,241,250,239,250,248,250,232,231,240,255,232,252,228,231,250,254,246,236,248,239,245,242,225,237,247,249,237,246,221,214,247,234,227,242,249,248,247,252,246,248,248,243,255,242,240,249,255,233,251,247,253,240,229,234,225,248,230,254,236,242,235,233,237,246,227,227,250,250,248,227,238,250,252,245,245,243,239,246,255,232,228,249,233,252,243,245,238,248,249,252,239,245,248,224,223,233,247,255,253,243,241,229,250,240,237,234,233,237,251,244,237,224,248,244,253,248,255,238,233,252,248,209,189,230,227,215,216,237,236,232,183,217,222,200,206,239,238,167,161,124,97,114,101,143,218,233,235,237,250,233,242,252,245,244,211,224,241,201,173,135,188,235,236,224,122,59,116,193,202,152,74,151,236,246,255,235,230,249,250,208,242,220,188,150,224,238,152,135,79,5,77,79,80,90,76,24,21,198,250,141,39,72,206,233,239,249,241,167,246,255,254,253,231,242,251,240,247,251,230,226,249,237,238,160,76,68,160,138,201,252,243,253,226,230,246,152,102,179,188,211,251,247,240,249,232,230,157,81,50,51,32,26,92,76,86,66,33,38,65,80,80,33,59,41,37,45,46,82,62,91,70,108,95,48,80,16,51,202,227,255,232,237,253,242,235,254,215,238,243,245,249,243,220,237,249,233,228,229,236,233,239,251,252,186,169,212,232,241,210,174,192,242,228,201,209,180,207,207,219,193,229,255,246,223,235,228,233,232,251,249,149,100,90,86,59,35,38,19,50,216,243,230,241,255,254,237,195,226,207,210,178,179,208,224,212,196,218,219,214,225,222,215,208,208,224,184,217,233,234,220,237,227,206,226,211,211,193,215,219,209,219,215,187,204,207,215,190,200,185,201,197,181,187,202,210,213,193,223,206,202,161,217,192,226,213,224,186,209,198,200,224,197,219,199,203,206,220,175,115,114,111,100,75,61,123,159,136,147,145,101,67,64,83,66,90,92,116,95,66,45,35,164,225,209,216,213,222,184,214,184,213,208,240,229,231,229,203,214,223,219,219,227,218,217,231,228,225,227,226,221,197,217,210,219,213,199,217,208,212,208,194,210,208,200,228,220,228,110,21,0,8,0,3,2,11,24,17,25,33,2,7,12,1,12,18,14,26,14,0,12,2,31,211,224,203,218,246,194,222,213,202,225,220,213,218,218,204,207,215,231,178,226,213,231,216,225,207,206,210,215,204,211,236,218,206,206,217,213,198,215,219,209,213,205,232,201,208,204,213,205,206,233,222,230,228,217,200,249,204,192,238,209,213,219,192,231,238,235,235,219,230,208,215,218,216,243,196,195,198,206,230,202,212,246,211,194,230,220,222,220,236,227,223,215,211,229,231,221,224,224,234,227,225,219,236,227,202,216,219,234,190,218,199,202,205,225,207,226,236,224,207,225,229,217,218,214,220,229,228,236,230,223,206,222,219,243,250,223,219,218,218,225,230,235,232,215,227,215,220,222,224,236,239,232,235,236,236,203,223,234,220,231,220,251,227,236,232,239,241,232,251,246,236,240,250,237,234,237,247,223,253,248,233,251,232,254,245,229,240,255,249,245,252,240,225,247,244,249,237,236,239,247,237,243,252,228,242,245,250,246,242,247,253,236,237,255,245,239,253,253,249,241,229,236,250,244,255,231,255,239,237,241,240,240,238,253,216,235,238,237,242,235,252,249,253,250,246,230,241,252,252,255,241,224,249,239,238,246,242,255,252,233,232,237,249,254,237,249,246,246,243,234,249,224,210,252,231,236,230,250,239,199,234,254,249,248,255,232,234,244,254,237,242,255,241,230,241,251,235,255,227,232,249,246,252,236,253,251,252,253,246,246,255,237,245,235,233,237,228,244,224,240,247,252,242,240,254,249,245,241,252,241,216,205,230,246,249,198,219,152,197,232,219,204,209,242,252,171,128,155,124,73,60,67,73,149,248,253,245,251,253,255,233,252,231,201,112,116,119,43,114,203,228,233,176,48,72,154,228,194,150,205,236,241,246,250,245,254,225,243,227,174,179,196,240,247,184,154,67,14,89,84,64,71,70,41,29,223,247,224,72,196,240,230,238,250,231,175,247,246,245,255,238,230,236,225,238,249,242,230,255,239,246,205,36,16,46,99,232,237,240,223,120,198,230,131,177,232,221,249,246,253,242,238,255,228,207,141,114,44,36,89,143,92,75,53,31,41,50,71,54,42,70,47,51,9,65,84,62,85,53,115,105,95,85,50,62,218,240,235,250,255,255,243,230,250,226,227,252,243,241,236,251,255,253,234,213,233,244,226,211,246,246,196,154,197,241,238,218,176,163,239,211,182,231,195,160,212,230,197,176,214,225,232,240,239,249,239,252,233,127,129,72,76,61,19,26,36,84,244,255,232,245,224,224,215,189,219,213,200,187,184,228,224,200,198,205,219,206,235,206,218,213,220,221,221,209,230,221,217,207,207,215,212,214,189,220,217,220,211,217,208,224,220,217,224,208,232,189,191,210,209,181,178,185,204,208,195,206,198,211,220,192,200,227,202,231,189,224,208,192,191,230,221,228,204,215,199,176,127,119,120,99,132,181,168,166,160,161,119,105,103,69,84,80,136,110,82,88,64,110,215,227,233,207,233,196,215,215,215,238,221,214,210,225,224,223,215,225,225,203,185,207,215,198,179,193,218,219,211,224,234,213,209,188,215,189,207,219,211,204,210,232,242,212,198,192,113,5,2,3,30,6,22,21,13,23,12,22,20,4,8,0,15,7,0,8,6,44,25,13,13,230,208,236,210,203,220,220,242,231,228,195,219,210,213,238,209,230,205,203,206,208,210,231,211,208,207,210,222,234,215,206,214,205,214,236,218,240,218,216,215,219,199,213,236,218,240,218,216,213,212,250,210,232,210,213,210,208,249,204,221,234,217,215,223,207,208,206,220,221,215,250,237,206,239,210,208,206,212,212,233,235,252,219,216,208,218,225,228,225,217,201,218,221,220,228,234,207,215,216,241,205,214,227,215,200,226,228,237,213,200,236,218,224,202,200,241,234,224,219,229,214,222,228,215,210,238,230,231,240,201,219,233,245,240,248,240,235,218,214,196,239,229,229,229,222,206,231,253,231,235,226,243,228,211,239,230,240,228,222,247,246,231,226,228,240,242,221,255,253,241,240,238,224,215,233,246,223,228,243,235,249,238,255,244,233,253,238,232,235,218,250,248,244,246,242,241,250,240,232,242,236,219,227,247,237,237,244,239,227,247,252,247,235,227,234,245,219,255,255,237,224,252,223,247,244,226,250,246,224,229,247,254,240,250,250,242,238,246,220,231,255,253,247,246,241,230,237,246,248,236,243,230,246,246,245,236,221,236,239,234,239,246,244,222,253,232,231,255,252,243,243,239,248,239,255,234,255,247,247,255,227,241,254,227,237,231,247,246,245,236,232,249,252,246,243,236,231,232,219,252,229,249,255,247,243,240,247,238,245,249,248,255,236,235,242,243,233,252,236,239,254,242,246,239,220,253,255,223,250,216,212,246,209,248,210,169,244,206,224,225,242,206,212,252,253,198,170,178,115,74,61,43,24,66,193,231,249,245,234,236,249,253,231,202,60,108,104,1,54,213,255,238,165,130,145,197,232,220,173,212,244,241,253,226,217,238,241,236,231,210,180,170,222,244,163,129,53,1,91,118,109,55,52,38,52,217,248,185,164,246,247,243,236,240,226,184,244,245,218,225,227,252,239,233,229,247,234,226,243,242,241,160,58,66,40,82,234,248,231,214,166,244,228,174,232,238,197,250,254,233,251,232,242,241,238,233,84,57,13,54,107,75,48,46,78,34,57,78,38,57,79,66,18,1,24,101,78,57,34,109,115,68,61,47,57,184,250,251,249,239,255,242,230,244,238,212,233,238,255,244,230,251,250,254,214,234,243,247,227,250,246,215,150,197,244,245,212,164,125,166,174,150,242,207,142,188,237,172,193,200,239,247,239,228,246,253,255,224,145,91,82,87,54,54,28,22,114,237,222,220,239,247,232,181,230,219,216,207,141,205,232,220,225,197,198,229,214,216,207,200,248,204,232,195,203,208,215,221,216,217,227,196,227,203,203,200,211,186,202,217,224,221,180,220,211,206,208,203,211,218,187,186,215,215,209,212,219,223,201,215,202,220,218,193,193,201,219,231,207,183,207,200,203,212,238,204,146,161,122,112,84,89,133,105,97,96,96,78,66,44,32,59,61,57,94,60,38,46,90,219,229,238,253,214,211,223,213,227,234,236,215,191,221,207,191,213,203,229,218,218,216,228,227,222,204,206,216,226,217,195,213,233,198,199,194,202,215,187,205,222,215,216,203,214,203,134,2,14,19,21,7,9,5,2,23,4,10,17,17,2,11,17,27,10,18,12,23,23,8,33,219,216,227,209,198,224,247,233,203,226,221,224,232,227,232,226,207,208,226,223,225,215,216,227,218,213,201,222,216,218,217,222,196,219,242,231,224,222,223,234,227,236,233,226,207,225,230,210,232,245,208,225,216,211,239,216,219,226,215,191,234,216,196,227,213,220,227,217,221,224,226,219,214,231,217,220,238,221,200,221,234,223,233,229,209,231,221,225,206,205,203,227,219,220,233,217,214,247,213,226,232,225,226,224,240,223,240,227,223,228,232,234,222,216,231,234,228,211,230,195,208,241,175,238,221,227,241,238,232,223,241,209,230,229,205,239,237,233,225,235,209,220,242,229,212,226,226,240,255,222,219,235,230,250,237,247,241,227,240,246,239,233,222,225,241,244,237,245,247,228,223,237,252,239,226,234,251,242,247,241,224,253,239,245,227,253,226,251,247,252,251,239,251,228,245,223,255,247,239,244,252,233,230,235,239,226,244,222,242,225,238,247,226,229,231,254,251,255,232,230,229,249,246,247,244,250,252,236,245,220,251,223,244,235,230,237,244,247,237,255,252,232,233,243,236,242,255,238,233,239,234,237,251,254,233,248,238,244,242,237,245,232,216,254,241,247,224,243,219,253,254,234,229,253,253,244,234,228,242,246,232,255,252,227,247,236,229,234,231,248,243,249,253,245,240,239,250,226,240,249,246,227,238,247,239,246,234,243,245,237,253,246,250,248,242,238,243,237,229,254,253,244,254,230,233,242,225,238,240,240,250,234,200,227,209,222,249,200,235,246,229,209,195,241,245,177,142,131,155,69,70,109,54,128,209,240,208,252,250,244,253,224,221,185,43,142,181,12,47,195,248,255,232,194,205,203,251,233,191,215,231,243,237,240,224,232,247,238,237,212,200,206,242,244,135,147,70,0,81,79,73,33,39,19,47,213,253,187,147,243,253,226,244,214,200,203,244,249,251,255,241,254,249,242,236,241,219,238,236,229,236,213,174,228,133,52,117,176,186,188,215,234,204,169,241,234,229,249,246,249,241,249,249,244,239,190,129,77,54,12,12,6,49,13,25,45,67,71,39,58,78,71,47,11,64,111,73,100,61,120,110,58,64,14,19,179,236,251,242,226,245,249,234,246,242,244,227,232,237,253,228,250,243,223,231,237,222,228,208,204,239,237,150,200,216,237,221,232,86,92,168,180,251,249,161,200,221,155,157,184,244,237,252,237,240,212,230,241,119,131,72,60,26,25,8,2,139,245,250,251,252,228,197,219,252,235,228,159,135,229,208,244,216,200,231,231,215,230,205,219,194,239,203,233,216,211,233,208,223,199,194,211,214,220,226,209,204,208,196,225,237,221,210,195,200,215,236,184,197,198,199,199,215,225,209,203,214,206,193,215,198,196,219,217,213,194,189,195,198,175,217,221,221,219,234,172,130,98,76,67,66,59,31,28,42,52,68,98,57,73,25,37,20,27,41,77,19,38,163,217,229,218,219,203,210,213,216,202,227,227,216,231,217,208,224,209,205,222,219,208,221,211,222,197,231,223,210,232,217,215,209,220,220,206,206,233,241,207,218,213,201,194,216,217,237,88,12,12,16,0,16,13,15,11,2,26,9,19,14,14,9,22,4,6,9,32,33,20,7,0,223,210,204,209,211,222,221,207,232,217,226,212,220,233,223,242,222,227,227,224,206,216,232,206,219,214,220,233,213,222,241,202,184,189,231,196,235,231,214,211,213,210,209,238,219,220,226,196,240,214,213,209,208,219,229,208,208,225,205,215,224,213,207,209,235,207,222,213,202,232,198,204,238,231,213,242,192,225,226,228,238,216,222,222,243,224,244,235,214,194,227,219,223,234,212,220,223,218,231,206,205,225,221,235,210,224,236,229,219,209,232,213,220,244,237,246,202,217,202,230,226,220,216,234,226,225,244,238,237,235,220,222,226,234,231,239,245,224,218,240,214,225,227,228,237,216,249,234,250,229,223,230,227,242,247,250,239,220,238,224,209,243,238,220,244,233,240,227,224,251,224,243,252,243,223,218,224,242,249,248,235,252,238,247,229,228,251,225,235,235,220,224,222,241,248,246,251,243,243,250,252,230,250,250,223,251,243,242,244,255,254,222,243,233,251,251,245,255,252,241,253,248,225,246,252,250,245,224,224,224,248,238,238,242,232,243,250,253,226,247,247,235,248,210,205,239,219,237,241,253,255,255,242,246,247,243,240,254,235,244,232,243,243,238,255,246,255,236,217,238,247,240,238,227,232,241,235,235,235,230,244,227,246,235,251,241,235,239,249,246,228,223,235,245,233,254,247,230,233,246,252,248,238,246,252,254,246,237,241,234,253,234,252,239,230,246,216,217,233,253,255,246,214,243,230,238,235,255,255,213,241,236,197,221,243,243,236,194,207,224,225,214,205,254,237,162,153,125,148,39,148,224,230,228,224,234,241,247,236,245,242,231,212,163,71,191,175,57,117,225,254,248,231,217,209,190,245,197,203,184,224,239,246,252,243,247,245,240,249,202,202,191,228,224,142,135,56,0,88,85,54,55,41,40,59,221,247,124,151,247,220,218,214,238,206,219,239,236,242,240,227,235,228,213,240,245,254,249,243,253,242,246,247,246,180,67,75,67,212,235,234,244,142,165,245,228,224,238,240,244,246,245,243,231,220,222,236,209,140,140,109,46,44,7,24,59,58,46,64,117,79,47,18,26,42,109,73,74,30,92,98,76,76,38,55,161,225,245,250,255,235,238,250,243,253,229,225,243,221,232,242,243,244,239,223,235,255,244,240,206,253,247,182,160,178,247,230,214,150,152,207,215,247,236,199,223,220,191,176,218,213,235,243,213,230,236,252,193,97,114,85,84,31,40,14,32,177,232,253,245,236,208,205,234,246,196,176,127,157,193,242,227,203,194,213,221,232,210,231,220,228,191,220,232,220,209,217,193,193,226,209,194,210,210,195,211,183,233,223,220,224,219,240,238,204,188,216,200,216,228,212,199,182,193,190,211,203,216,219,187,235,214,219,218,203,213,204,208,192,212,217,186,207,215,200,127,65,53,34,63,54,19,54,24,36,40,59,78,54,57,44,59,49,48,63,55,35,78,204,216,241,232,209,203,223,214,213,221,210,210,205,238,217,201,237,197,209,194,214,205,209,222,207,231,234,217,202,227,218,200,210,232,200,192,209,215,193,233,231,194,208,211,206,194,221,116,0,14,6,8,16,3,12,0,4,8,16,25,2,6,10,26,3,7,13,18,3,3,0,13,213,239,203,207,207,206,191,223,214,221,175,200,226,222,247,214,178,230,222,218,232,229,206,213,211,241,230,217,223,228,207,224,196,222,225,238,214,212,215,232,189,233,194,220,224,201,206,212,222,217,208,225,223,237,230,209,211,232,235,228,207,243,243,228,237,224,214,219,223,195,237,221,234,202,204,232,188,215,231,213,200,224,246,222,234,217,226,224,230,232,209,216,224,215,215,227,214,247,207,222,208,219,225,228,215,241,220,224,238,220,229,214,203,229,215,223,236,203,235,241,246,233,196,214,202,207,236,233,241,217,226,193,241,198,237,221,239,240,214,225,237,223,228,242,228,220,237,219,226,215,228,245,227,229,234,234,225,245,247,229,216,216,249,241,253,239,241,248,229,245,237,238,248,236,229,240,247,230,226,241,218,227,233,235,253,247,229,232,247,254,253,249,239,246,241,239,254,253,250,226,239,255,247,238,226,248,246,233,245,242,242,252,232,232,253,224,244,254,216,253,234,225,223,235,248,250,244,235,233,241,253,251,243,249,245,243,244,226,243,224,227,247,241,233,247,239,252,239,251,236,239,235,249,240,216,243,253,232,233,247,239,255,241,220,252,241,252,250,237,242,249,242,248,255,245,253,237,239,231,243,250,254,241,244,240,237,234,244,234,235,238,234,250,246,253,247,241,218,239,240,235,241,227,246,232,224,230,236,236,247,253,242,250,225,248,255,222,243,245,241,246,247,234,229,236,249,247,244,252,227,253,243,217,241,215,179,204,191,225,244,233,208,216,247,225,189,145,148,115,50,136,237,254,247,205,199,216,236,219,244,230,245,244,213,85,134,210,160,218,250,221,252,169,220,187,189,207,216,219,207,245,253,236,240,248,251,237,227,251,208,176,189,248,248,120,145,28,9,90,86,60,54,47,39,23,245,250,116,176,250,236,212,200,203,177,195,228,248,231,242,228,249,240,221,218,249,238,233,231,231,255,249,246,228,171,144,83,68,221,225,253,231,126,175,251,201,205,237,242,209,251,251,234,255,212,225,254,255,246,244,240,210,85,6,44,164,82,38,37,67,102,62,71,49,56,89,82,57,63,100,99,31,49,22,47,176,245,255,241,252,239,244,236,234,229,243,226,216,247,251,235,249,247,252,227,245,252,228,216,218,244,245,210,174,213,243,224,229,156,145,237,248,217,253,212,226,194,201,218,222,211,231,234,224,241,234,254,231,114,111,89,78,28,28,3,79,243,245,254,248,218,187,217,238,245,152,74,79,180,222,218,241,186,203,221,189,209,231,215,193,206,208,232,213,211,217,196,203,191,223,233,215,224,218,220,219,215,192,212,213,186,216,198,202,206,217,218,213,214,207,209,191,226,184,201,204,199,216,206,216,209,212,172,194,217,191,203,232,179,220,210,210,211,219,180,147,109,87,66,53,81,51,67,58,46,62,52,45,40,48,41,30,31,52,72,51,22,111,211,222,215,211,229,228,237,226,212,205,220,220,217,219,196,226,234,191,227,219,206,207,205,212,209,191,243,223,211,197,226,220,199,222,184,214,223,206,217,202,218,208,207,225,200,240,206,87,1,0,23,10,2,0,5,13,15,40,6,24,14,7,14,16,10,8,2,11,7,7,3,7,242,212,235,212,229,203,210,236,214,222,227,196,219,169,222,212,216,234,217,234,236,221,219,228,215,227,217,223,235,229,226,224,211,229,229,236,215,225,210,208,210,213,233,226,225,234,193,225,219,212,219,234,200,222,222,210,232,199,213,212,214,205,234,226,210,223,235,238,223,238,238,224,214,230,227,228,251,216,206,215,210,227,208,231,191,211,236,220,212,217,211,242,222,219,209,214,222,253,239,225,231,209,218,244,223,224,223,241,248,219,224,227,224,226,219,229,220,239,236,235,228,204,227,235,225,242,229,200,247,217,227,219,225,235,217,245,237,224,239,225,209,228,239,219,241,218,224,235,213,239,218,241,228,225,225,223,233,244,244,230,240,240,235,229,245,236,243,247,210,243,229,230,231,246,247,235,232,246,235,227,236,247,240,229,227,236,240,244,247,249,252,239,238,197,244,230,247,255,254,236,242,253,255,225,252,248,231,254,240,242,233,252,232,220,226,229,224,238,253,252,241,243,236,226,254,236,242,238,240,216,245,252,230,244,240,232,244,252,248,246,242,238,240,245,232,240,240,244,234,254,245,248,252,247,254,228,239,220,230,249,255,238,244,242,242,237,246,236,253,232,237,235,228,242,233,248,236,237,255,249,252,248,225,247,251,252,242,247,215,231,245,231,239,253,216,233,246,237,252,223,241,251,244,235,226,250,240,255,242,225,240,239,234,249,239,253,244,246,246,250,234,236,234,247,248,255,234,218,233,225,248,232,177,203,206,239,242,231,247,254,227,210,220,253,247,163,160,145,116,55,108,221,246,250,206,173,242,248,219,219,222,238,216,195,145,184,183,215,251,254,251,254,210,217,213,206,211,234,248,240,232,252,252,253,242,237,221,216,239,197,186,190,236,202,126,151,57,9,56,82,84,38,44,36,47,230,251,117,178,226,252,227,204,213,211,205,246,244,231,251,238,236,251,248,240,232,218,235,228,229,243,235,248,213,140,153,82,50,213,237,213,185,64,221,230,211,251,235,254,253,246,244,234,236,218,232,251,253,255,245,244,213,131,32,85,175,68,38,33,72,97,51,25,37,63,93,66,68,44,113,130,50,65,35,66,213,236,255,236,246,246,245,236,227,239,224,224,235,239,230,247,250,244,209,203,241,249,248,217,222,234,239,211,153,193,243,252,203,184,171,174,240,209,222,195,165,164,201,245,200,204,231,230,233,245,237,252,205,114,110,85,83,50,19,20,152,229,246,244,243,201,209,228,246,207,133,44,60,216,232,237,214,193,229,226,204,226,210,217,198,199,230,216,187,212,210,224,206,226,191,203,201,221,193,203,233,196,203,232,209,199,196,207,190,243,197,228,210,209,219,214,224,246,228,195,220,190,222,179,212,207,214,199,192,203,209,199,201,198,231,207,234,197,225,200,171,167,183,119,125,69,63,99,48,101,110,77,64,83,65,46,34,61,60,50,32,140,218,242,206,217,230,213,242,212,236,218,222,232,215,220,238,233,228,226,216,212,222,223,220,213,219,211,206,212,218,189,210,189,242,225,201,211,202,200,194,208,201,198,218,218,210,216,190,232,105,7,0,6,18,18,0,14,16,16,14,26,2,36,9,1,36,16,4,12,2,6,8,10,17,201,188,183,205,216,215,227,244,226,218,229,233,222,213,240,222,210,215,208,221,217,211,230,244,213,246,232,234,225,217,206,226,222,227,226,230,190,213,220,201,220,204,243,231,197,208,203,217,213,223,195,229,238,236,205,221,238,210,229,227,237,217,232,232,237,222,214,227,228,237,218,233,226,226,193,238,226,250,244,208,232,241,208,233,245,218,229,223,210,223,219,238,234,240,220,238,214,223,213,214,213,236,204,234,212,231,225,230,246,236,241,240,215,237,219,238,241,233,221,231,214,214,231,217,206,234,239,221,215,217,211,239,243,237,235,241,202,218,222,219,236,228,237,236,226,221,238,228,241,239,241,248,249,217,220,226,214,236,210,241,249,237,239,230,223,225,250,237,242,229,222,235,252,251,232,244,219,247,239,242,237,224,209,247,242,239,239,254,247,252,254,249,249,247,246,246,239,255,252,245,238,243,241,255,240,240,237,235,251,237,238,240,245,243,249,243,252,246,237,251,241,254,255,235,218,246,241,249,252,229,223,239,243,223,228,232,220,254,247,241,252,244,249,230,244,231,238,234,238,220,229,236,231,231,226,236,236,236,243,247,252,242,245,238,255,255,255,243,248,238,237,251,246,249,206,249,245,243,239,253,241,212,224,252,253,235,233,240,251,226,248,255,234,249,236,243,229,254,245,245,232,237,237,235,239,233,245,228,243,244,243,238,216,224,213,238,249,228,247,218,231,246,246,253,249,235,240,250,239,209,229,176,183,233,243,220,254,209,220,241,210,200,210,254,241,187,155,137,137,67,108,238,252,250,255,199,169,209,246,245,248,222,233,216,167,199,176,215,251,247,231,224,205,217,223,190,249,173,232,239,203,252,222,238,233,249,235,229,206,200,194,186,242,171,122,147,56,9,59,76,78,31,69,36,38,238,178,93,151,197,249,227,221,231,236,211,224,254,242,250,241,247,213,230,223,244,242,241,247,248,227,242,236,177,165,196,109,32,146,209,215,133,142,243,207,190,230,237,247,250,234,230,241,215,196,253,230,254,230,245,252,164,133,41,81,174,81,30,35,76,80,23,45,32,72,92,80,60,31,84,127,65,38,18,29,212,237,248,253,250,252,252,230,250,246,222,231,229,253,251,239,247,218,230,236,233,226,253,238,207,242,236,226,180,190,219,225,226,204,173,140,228,226,236,210,178,131,185,217,230,221,214,236,220,244,235,241,206,107,122,95,34,19,15,76,233,250,243,242,228,214,209,251,242,173,93,31,71,211,208,218,234,193,204,214,216,212,214,199,241,231,189,204,211,226,199,225,223,179,211,210,228,205,199,239,203,222,237,205,204,205,212,208,214,224,218,183,214,208,218,198,223,209,182,189,190,207,198,198,231,220,184,206,202,208,218,204,209,214,186,203,229,208,202,185,211,222,195,201,146,109,58,47,31,94,141,121,94,81,41,44,36,31,24,38,150,240,210,243,232,216,233,225,212,209,214,199,213,209,220,219,225,200,225,196,208,220,220,203,180,219,213,223,217,192,217,246,218,205,209,216,236,214,224,194,227,229,231,215,200,200,215,206,205,211,103,7,7,9,23,21,6,13,9,19,18,8,23,11,0,0,8,15,5,1,15,6,10,31,20,226,231,205,221,210,238,217,202,224,227,233,216,216,216,209,209,213,211,226,229,236,212,232,196,218,212,236,221,208,229,217,224,218,246,232,227,230,218,228,233,207,240,224,217,199,219,231,230,227,224,211,226,215,226,213,221,218,219,206,188,226,236,237,223,217,235,221,238,219,231,240,236,227,236,217,233,217,202,225,233,217,238,235,224,210,231,222,206,223,208,207,239,234,215,226,231,229,233,227,235,215,232,238,233,226,219,233,211,218,219,229,214,210,227,208,230,212,235,226,225,228,202,214,234,241,229,217,252,225,227,232,220,226,224,248,228,233,238,236,217,230,237,230,219,245,231,233,226,223,232,250,220,206,220,240,248,228,240,224,237,245,254,237,247,233,231,244,239,234,242,244,250,243,251,235,217,239,253,235,229,229,243,222,228,255,212,229,226,218,240,223,239,251,230,219,253,232,249,244,238,253,250,250,234,248,242,246,226,236,233,236,250,248,241,237,239,243,236,251,242,230,255,247,239,248,221,233,222,228,230,242,233,229,234,241,250,237,255,235,237,255,255,236,245,242,241,232,244,253,252,245,229,249,242,232,246,248,236,233,239,249,227,255,217,251,246,255,231,235,246,232,249,236,245,245,238,247,243,254,244,223,233,240,254,224,246,237,248,246,250,252,233,239,228,253,230,232,241,229,235,244,232,253,232,253,247,243,244,248,232,232,252,223,247,242,249,245,233,252,222,240,241,233,227,255,243,247,249,214,205,215,198,234,247,239,229,219,213,216,229,221,193,237,246,250,165,128,150,121,45,127,241,246,252,249,206,188,198,225,224,254,237,213,171,198,210,235,242,248,232,247,237,153,204,203,201,166,99,159,197,223,224,237,249,248,249,234,227,232,216,176,192,255,147,138,157,34,33,55,48,56,40,30,71,30,47,97,140,198,192,205,193,187,213,223,184,229,248,228,253,247,237,228,215,237,241,246,240,250,229,246,233,221,169,146,214,144,50,103,182,240,171,223,206,206,239,222,250,241,246,236,228,235,233,225,241,248,245,252,238,237,162,115,42,106,166,27,30,20,62,65,27,35,21,59,80,81,111,40,98,119,70,61,27,26,120,241,245,238,221,248,242,246,249,241,255,239,206,251,233,248,241,246,234,228,239,238,250,250,238,231,252,209,175,185,211,238,245,218,149,158,205,225,188,202,215,85,150,226,190,179,163,220,221,228,230,250,185,106,100,68,45,44,111,185,235,244,226,246,211,240,215,251,213,145,80,9,106,224,205,214,219,189,223,214,212,209,208,210,205,203,227,223,212,210,230,205,240,218,217,181,224,228,211,202,233,240,182,218,208,188,202,213,211,203,200,228,218,202,209,193,202,222,178,217,204,201,207,192,196,220,211,211,201,203,188,191,176,198,215,219,205,212,210,166,191,209,220,210,136,109,56,31,19,16,67,78,41,64,55,27,45,10,18,113,229,245,228,223,228,213,205,199,187,243,216,212,210,225,207,206,238,225,226,200,208,226,210,235,217,217,232,218,245,222,211,195,208,165,213,198,205,221,199,219,226,220,209,194,207,207,234,228,229,204,105,12,14,18,18,2,16,12,6,4,0,33,3,19,12,10,22,11,13,14,22,1,1,8,1,213,229,237,228,241,234,226,226,211,245,230,223,218,218,217,224,206,242,205,215,243,212,219,231,249,217,224,217,222,214,204,184,227,200,232,220,216,207,212,242,242,227,220,233,222,205,218,242,214,192,223,229,207,217,205,211,210,227,227,241,201,216,225,223,222,221,203,220,232,237,223,243,215,229,234,249,219,235,228,231,238,252,239,225,244,223,222,213,244,240,218,252,237,203,215,210,233,231,242,232,234,233,241,230,208,235,239,222,243,210,246,235,229,230,229,199,233,237,221,226,220,212,237,230,224,232,230,213,227,246,242,229,235,237,231,231,230,229,237,229,209,228,241,221,234,221,239,229,220,218,253,218,219,236,243,233,241,224,237,252,239,234,222,255,228,235,235,246,228,241,233,247,223,246,232,221,249,243,220,242,218,213,243,251,250,247,251,231,239,247,246,252,249,234,255,244,249,237,237,232,237,244,228,219,252,241,223,230,230,248,235,230,249,248,239,253,238,247,236,255,234,227,236,251,248,236,253,241,245,235,238,247,252,249,243,233,237,249,254,226,253,245,249,211,242,242,231,251,229,247,241,251,253,239,249,252,254,227,249,250,246,245,242,250,235,254,235,246,253,235,234,216,242,236,250,244,239,234,252,244,246,250,236,227,248,242,222,253,229,240,222,251,238,243,254,251,239,236,251,228,225,253,224,246,246,253,252,227,230,224,238,243,221,238,227,242,227,255,253,255,252,229,225,239,252,242,237,246,250,215,213,204,236,216,234,223,219,211,238,247,211,199,210,242,244,153,137,127,127,36,109,241,248,240,252,228,229,187,175,249,229,249,230,176,217,187,224,232,247,243,234,159,101,236,210,151,127,14,83,181,238,226,234,235,227,254,245,244,230,203,156,199,252,180,131,128,67,50,66,44,71,51,76,61,84,25,107,191,211,236,237,192,182,234,237,213,237,252,244,253,229,236,250,219,240,233,251,241,253,233,252,200,200,117,149,219,118,87,230,245,228,167,225,255,183,230,236,253,238,226,255,255,255,229,209,249,240,247,248,246,240,158,104,23,64,168,26,46,39,68,51,35,38,12,70,72,64,88,46,102,109,75,53,47,12,62,214,249,251,248,249,237,252,255,242,244,224,223,240,253,223,245,233,250,243,220,238,253,249,231,224,242,223,197,174,228,235,231,194,171,121,156,181,164,187,232,157,162,216,241,156,138,213,221,254,247,251,159,94,67,77,23,79,218,226,252,251,212,210,227,226,253,245,168,139,98,9,154,237,211,243,204,205,214,216,207,216,218,205,223,198,201,227,208,207,224,185,219,219,189,201,227,195,213,213,220,232,209,228,220,224,223,222,208,202,211,204,197,194,228,210,218,185,227,204,196,203,207,224,182,196,210,203,208,209,211,193,217,195,214,225,206,212,182,171,216,206,213,220,185,156,146,133,79,59,50,48,26,39,12,31,88,152,215,235,223,229,231,221,215,236,186,219,204,199,199,221,208,209,227,195,225,205,229,222,238,223,203,233,219,201,223,213,231,228,184,210,200,232,208,210,211,196,207,239,224,217,201,190,216,209,220,220,210,201,106,0,17,6,7,7,15,3,1,29,0,23,19,10,0,19,2,24,15,11,4,0,11,9,7,210,231,216,236,242,225,222,189,219,221,226,236,212,212,203,215,199,230,238,223,219,237,209,211,227,242,211,215,223,220,206,237,233,211,208,233,239,220,232,206,221,212,226,224,198,213,227,234,199,217,223,207,217,219,207,197,228,190,194,227,230,209,229,203,219,221,236,241,243,224,244,240,229,219,219,224,223,233,246,233,244,220,205,224,246,233,221,233,216,247,233,231,235,233,214,231,226,241,216,231,218,217,236,215,225,232,234,237,250,234,201,233,220,237,232,222,218,221,247,240,222,216,228,241,242,220,206,231,225,242,222,217,221,240,221,217,225,234,237,238,242,243,252,214,237,221,231,209,237,226,233,238,201,243,240,227,230,237,226,237,237,208,235,215,239,222,230,219,237,228,242,252,224,239,222,238,234,244,235,207,247,214,244,247,232,226,255,253,218,245,248,248,246,235,246,241,223,236,246,230,228,244,255,255,255,245,252,233,244,251,235,240,231,245,243,243,249,231,252,244,246,247,236,254,241,241,235,232,225,241,238,245,239,247,234,216,211,251,244,255,251,249,226,246,245,255,255,235,255,243,245,241,243,255,245,252,253,247,250,221,253,225,234,236,233,236,253,246,233,246,231,241,254,242,242,243,248,236,255,245,224,232,237,245,241,245,249,221,249,241,235,233,250,244,234,255,211,248,229,243,233,248,247,253,231,231,246,226,242,234,227,231,241,224,249,252,224,244,239,244,242,255,247,240,250,245,228,226,218,230,234,183,220,193,208,227,235,248,251,207,224,204,222,230,246,170,148,133,137,41,103,237,255,247,226,234,243,201,179,201,232,243,208,196,249,186,215,216,224,230,182,120,55,141,167,94,51,5,112,187,252,208,234,236,252,243,249,239,245,242,138,208,255,211,193,139,85,78,74,74,57,33,31,75,34,54,209,251,226,253,230,218,205,228,247,239,252,254,243,249,253,245,240,253,217,238,244,230,234,239,228,205,139,62,146,105,112,173,242,253,195,186,243,206,203,217,241,236,226,246,249,244,250,209,236,252,241,252,245,241,222,172,96,31,119,181,18,48,26,86,51,17,61,15,66,97,80,89,64,87,105,66,23,60,25,105,212,234,252,246,245,229,245,252,244,247,246,225,223,232,240,237,239,236,250,237,214,235,251,241,196,247,247,220,177,208,243,240,238,162,105,87,135,185,241,218,187,186,227,207,190,165,211,226,234,251,216,127,83,75,26,86,193,202,232,251,241,250,242,236,246,229,209,127,157,87,54,169,222,227,233,205,211,236,230,218,211,201,215,214,215,181,230,249,221,204,205,198,217,212,214,208,184,223,218,219,211,199,211,212,207,202,224,219,233,206,218,193,230,214,217,213,222,215,214,202,220,228,216,220,211,217,222,208,202,195,220,209,213,215,211,234,210,205,204,199,221,217,239,249,249,236,227,227,195,137,90,29,90,213,239,234,239,235,248,239,225,237,217,212,200,197,229,202,205,216,221,223,222,208,203,213,225,207,232,214,222,205,224,220,235,208,211,225,232,194,220,212,204,221,212,204,221,206,217,232,207,205,227,184,215,226,228,222,206,118,2,0,0,2,4,17,9,16,22,12,41,28,2,6,1,18,5,4,0,25,4,19,2,17,237,192,203,223,239,231,228,219,217,218,220,211,206,239,215,198,229,217,206,195,212,233,236,225,243,212,224,214,231,217,206,226,240,225,222,220,241,243,232,214,236,214,237,204,222,234,214,215,246,235,236,216,216,236,207,244,222,219,233,232,203,237,224,245,239,213,225,225,228,233,237,216,227,227,211,237,239,238,229,239,244,241,238,241,243,214,229,212,224,229,230,231,235,221,239,186,230,235,227,199,247,211,234,238,227,228,217,227,237,226,236,235,215,204,225,241,232,231,220,246,210,235,231,223,206,243,244,248,231,237,245,229,247,247,238,214,204,237,213,240,240,239,242,232,235,232,234,224,229,236,252,233,218,233,231,223,232,237,204,228,254,240,249,243,245,224,225,248,244,227,228,254,248,232,230,231,228,228,240,244,240,221,216,239,245,238,250,231,232,246,230,245,222,245,228,236,241,204,233,251,223,255,254,235,250,255,255,246,242,243,247,240,243,253,243,240,237,226,249,246,255,230,219,241,241,249,254,244,255,245,233,255,234,213,250,253,255,234,254,250,247,243,244,244,254,241,229,246,242,244,247,244,248,247,237,255,237,234,232,241,248,222,227,236,221,237,242,224,235,250,244,247,233,249,250,237,255,242,235,251,242,248,237,247,217,228,243,244,248,250,237,252,249,245,237,241,225,245,254,255,230,254,241,247,245,249,246,240,213,229,225,247,230,226,243,218,247,249,229,239,254,236,235,252,230,250,239,252,191,240,193,178,222,222,245,246,248,204,217,236,202,233,214,222,245,185,175,147,144,24,88,247,253,241,243,232,235,235,187,235,228,250,217,176,217,157,189,205,221,224,132,13,16,127,122,74,49,83,227,249,250,242,239,247,250,239,251,255,250,229,227,238,247,243,182,138,63,35,73,76,68,45,29,64,41,92,218,248,251,255,236,198,255,251,240,239,241,247,243,254,254,251,231,234,232,250,234,247,236,241,252,220,136,65,85,30,59,168,239,238,105,194,246,224,201,239,246,244,253,239,250,249,247,224,228,248,235,255,255,238,230,159,89,9,81,136,19,16,97,102,46,29,21,30,38,83,106,52,53,81,117,117,63,50,7,102,248,234,245,255,230,230,237,242,240,241,249,222,211,225,250,255,238,230,237,235,223,235,249,243,210,238,252,233,157,162,231,240,222,205,138,140,178,212,227,232,171,138,188,215,216,207,181,219,240,234,147,121,143,55,107,199,251,227,250,253,224,238,239,237,252,255,153,89,112,103,116,191,201,228,230,204,201,207,202,209,220,222,228,230,203,230,208,227,224,243,201,207,206,207,201,206,210,210,209,196,222,237,216,201,210,193,218,209,200,210,223,236,215,214,196,180,223,208,205,224,207,198,204,195,199,192,207,203,211,195,198,199,191,196,205,211,187,168,204,207,226,184,231,224,251,254,248,242,238,173,111,92,193,240,245,227,253,239,224,231,212,217,200,203,191,203,218,228,203,203,224,241,234,223,211,217,231,216,221,217,242,201,228,226,206,206,233,225,217,216,240,213,228,210,242,217,218,184,228,202,209,201,220,205,228,215,211,233,212,107,7,4,11,21,2,14,14,0,23,7,3,22,5,2,0,17,15,14,11,33,28,4,9,19,220,233,212,223,227,206,219,218,203,222,240,229,204,185,226,220,226,223,222,232,224,216,236,195,209,206,241,237,236,241,207,195,195,237,222,202,206,225,205,225,238,225,216,211,243,242,243,236,242,233,193,235,226,231,214,236,222,237,213,236,230,241,223,231,211,234,225,234,224,216,225,236,235,243,241,241,217,239,232,217,221,249,234,228,237,228,216,214,246,235,222,217,207,225,225,252,236,241,233,220,241,243,222,244,238,233,209,231,238,233,233,219,235,214,235,210,240,229,232,250,243,228,224,236,218,239,194,226,227,227,208,218,210,243,228,230,252,227,231,239,219,250,234,231,228,239,234,238,228,219,212,251,237,209,228,218,228,236,241,243,253,232,235,252,220,238,225,243,235,235,233,241,246,249,229,246,198,242,232,239,242,249,254,248,234,239,235,233,237,233,247,253,247,246,233,247,254,233,250,241,255,224,239,243,236,247,242,242,250,255,248,241,234,224,232,229,246,238,251,241,249,232,246,236,233,240,254,250,247,253,255,253,244,240,238,235,252,224,247,243,233,239,244,255,221,238,246,242,252,247,237,250,227,246,251,245,252,242,251,243,230,242,240,252,225,237,228,253,246,224,230,245,254,251,223,249,240,231,228,233,242,241,253,246,224,237,240,242,254,229,246,231,237,255,230,210,235,227,222,233,249,246,243,229,237,240,243,240,255,235,253,239,245,232,254,227,239,239,231,216,234,255,227,229,223,237,229,246,214,252,228,230,240,249,241,237,194,212,244,242,217,220,237,244,219,182,173,162,118,52,116,239,227,232,252,226,252,242,199,213,206,243,190,113,195,90,113,208,207,197,75,19,124,224,217,203,183,252,251,254,246,245,255,253,255,249,240,218,179,180,132,121,195,130,136,73,14,77,38,35,32,37,38,54,48,48,173,158,144,200,197,172,221,232,243,228,252,245,249,232,245,240,253,255,253,232,246,244,244,244,225,237,155,97,55,8,62,186,246,171,61,176,235,212,212,242,251,247,248,253,248,255,247,173,247,245,244,249,232,236,236,171,67,16,21,57,41,28,57,121,39,28,12,25,62,65,74,46,49,90,110,75,35,66,27,103,232,233,245,239,230,226,231,255,243,236,210,239,218,237,252,229,250,222,254,231,227,201,242,233,222,214,240,217,172,167,201,231,238,205,206,172,186,238,220,205,177,171,151,204,212,232,174,197,236,177,156,121,103,86,173,243,237,255,253,217,232,241,225,226,255,206,112,80,97,91,104,204,204,241,212,202,203,216,206,238,204,225,215,226,215,207,203,214,213,214,213,198,218,198,240,192,237,208,227,211,232,215,205,218,220,198,201,191,205,203,204,205,211,193,193,209,226,219,218,209,186,229,229,219,213,221,220,222,207,211,218,193,192,217,200,220,176,184,215,212,234,218,215,217,225,217,240,240,185,159,114,65,199,218,230,249,225,230,224,205,232,220,216,227,235,208,238,192,218,234,214,206,221,207,203,212,209,208,214,223,223,207,207,232,237,205,221,224,228,227,227,207,224,201,215,205,213,198,194,225,218,230,207,223,199,196,212,194,194,105,13,19,6,0,22,23,16,7,6,28,9,14,7,0,2,12,10,1,9,26,6,19,8,4,224,211,232,243,232,216,225,236,210,220,226,189,220,224,204,196,201,230,215,210,226,204,235,228,215,197,202,210,228,218,235,221,228,237,228,238,236,211,249,212,231,240,217,238,215,224,219,212,244,230,235,241,226,237,224,203,223,214,222,226,236,215,225,243,238,234,220,225,227,224,227,222,234,244,245,224,241,240,248,243,227,254,248,246,233,215,236,229,231,224,247,230,247,242,255,246,219,231,241,239,244,217,233,224,225,247,237,251,227,205,233,215,242,241,227,223,224,238,233,253,226,246,246,234,251,231,233,215,251,229,219,217,243,230,231,229,214,207,239,209,228,207,236,246,230,229,242,218,228,245,247,230,228,233,219,239,210,241,237,236,227,242,229,214,222,246,226,228,233,240,222,235,226,249,226,241,240,225,250,246,212,225,234,244,247,250,249,234,243,255,238,237,222,241,253,229,253,237,216,236,248,247,252,247,253,239,227,243,240,221,238,213,217,246,247,251,234,255,231,243,235,230,252,237,234,237,241,222,237,242,243,224,233,236,240,242,237,226,223,255,254,220,242,249,236,235,241,230,252,248,232,252,229,255,246,248,240,239,250,239,244,232,245,253,250,252,245,244,224,230,231,242,246,235,233,237,245,254,228,227,234,252,248,247,231,253,226,239,218,249,246,228,230,250,241,253,231,240,238,251,255,217,249,235,232,234,235,249,246,215,248,233,240,242,226,226,242,255,233,246,241,255,234,222,249,248,255,242,223,249,228,223,236,226,184,224,212,244,226,236,207,206,245,246,221,132,154,159,166,98,142,234,242,247,227,227,248,226,216,239,175,226,145,62,141,24,95,202,189,248,204,226,252,240,255,255,251,229,218,221,222,160,179,113,121,113,96,84,61,53,22,15,21,67,43,20,16,22,42,56,71,59,39,51,34,27,57,24,11,3,20,12,15,25,27,38,80,91,143,134,188,227,233,249,245,242,246,245,235,245,253,238,222,141,93,19,51,217,227,160,131,236,237,215,251,255,255,247,222,233,232,240,219,190,255,217,243,236,233,248,239,195,140,7,2,16,37,78,113,64,34,39,21,43,57,73,76,53,47,85,114,77,47,70,5,60,226,244,241,228,234,242,233,255,249,243,236,210,195,225,229,233,252,227,243,233,215,227,232,239,231,211,248,236,202,144,193,236,215,219,211,168,165,207,222,160,166,202,151,156,217,221,223,219,240,186,72,90,71,123,228,250,255,246,236,252,226,246,255,249,235,194,61,90,97,76,145,223,211,238,223,216,201,228,218,219,226,202,229,216,233,220,226,210,206,223,239,230,224,240,191,203,208,224,194,220,200,229,216,212,214,200,208,224,217,214,203,202,207,208,206,222,212,192,195,204,217,203,214,204,211,209,207,218,200,217,204,198,184,207,217,198,188,210,211,223,242,216,198,200,230,245,227,228,200,158,86,57,200,219,200,207,202,218,245,208,217,202,213,232,205,221,226,222,234,223,219,209,228,234,216,217,214,212,236,231,222,207,204,197,222,208,205,212,210,240,205,222,217,211,211,211,209,220,222,217,228,227,203,231,189,224,181,208,201,101,0,11,1,0,19,9,33,11,2,28,13,0,0,0,35,2,27,8,0,7,29,12,2,19,202,204,222,217,222,243,209,219,236,204,216,223,217,219,220,214,230,215,228,216,212,217,227,234,220,211,192,230,230,230,220,242,243,219,244,215,237,231,229,227,220,214,246,232,223,221,233,231,236,249,244,227,234,251,229,241,211,224,226,223,235,223,225,212,236,247,227,234,211,246,241,237,220,233,233,247,226,232,230,241,255,244,238,247,241,242,225,235,236,225,223,203,244,221,219,246,227,203,247,243,228,246,211,205,237,236,242,227,233,221,246,225,217,239,226,226,251,232,227,244,239,237,226,231,232,236,225,225,231,214,237,205,255,237,243,223,212,232,233,252,213,249,229,228,231,247,223,198,252,222,231,240,223,234,234,217,224,235,238,242,236,224,241,238,249,222,241,241,234,248,251,220,222,240,245,245,235,218,234,236,220,246,252,245,248,240,235,239,238,246,237,248,234,250,220,231,229,254,236,242,218,237,223,243,227,253,252,233,232,252,224,248,251,252,223,255,239,244,240,249,239,239,236,231,227,240,249,231,233,228,253,234,240,248,239,238,227,241,238,249,239,255,254,252,246,226,235,249,240,251,240,236,222,229,228,243,235,239,225,243,215,243,240,247,252,244,231,255,234,223,246,240,246,255,249,239,238,250,249,208,245,232,243,225,249,245,254,242,246,250,234,246,244,253,224,242,246,225,242,247,253,243,237,251,229,246,232,248,242,240,236,236,229,246,248,219,222,247,248,238,228,246,255,243,229,247,248,211,222,239,190,195,216,198,197,239,233,242,217,230,204,209,246,246,242,139,128,133,148,144,167,255,242,233,251,243,236,229,222,228,233,236,111,57,77,2,95,212,251,251,247,249,242,240,202,185,120,164,108,84,93,86,52,94,66,66,64,59,11,36,19,17,7,47,65,30,58,56,41,59,41,58,58,33,48,47,45,48,26,35,12,6,15,15,9,17,14,8,25,15,2,15,8,68,88,154,178,217,246,249,226,255,234,193,128,67,47,118,144,139,203,236,233,212,249,235,246,239,248,249,241,230,221,210,245,240,255,232,251,253,202,202,172,34,35,12,22,73,142,66,43,30,29,62,84,95,57,47,25,39,76,82,68,56,9,78,232,243,237,231,241,241,236,231,226,236,231,251,226,204,237,246,238,243,247,245,228,192,233,246,206,225,227,241,218,168,194,205,213,219,217,205,137,220,217,152,196,209,156,198,236,227,233,235,164,106,78,39,86,150,245,246,234,251,255,253,231,211,224,228,239,163,56,70,107,97,141,223,241,239,248,242,240,251,243,222,208,197,194,206,201,200,220,212,245,233,221,184,214,184,203,195,202,226,212,214,218,226,202,203,201,222,207,221,227,208,201,211,220,213,216,231,184,201,223,209,227,201,200,191,212,220,217,207,247,196,215,217,198,215,236,196,156,207,220,211,201,221,228,226,189,212,239,250,195,134,81,65,202,204,238,239,194,225,194,221,220,210,214,218,221,219,236,212,211,201,211,209,215,226,225,187,216,219,211,222,226,229,217,216,190,213,220,201,185,232,226,202,216,208,225,225,195,208,201,210,227,242,201,214,212,212,212,222,224,116,0,0,9,38,21,6,0,12,6,10,15,0,10,1,1,16,6,20,24,10,24,35,34,21,219,216,240,219,226,237,215,224,237,221,228,230,217,227,216,210,216,205,238,214,211,238,209,212,229,231,220,238,223,194,223,204,223,216,248,230,236,218,228,220,219,225,235,220,216,253,233,232,193,234,244,222,238,240,238,243,239,227,238,247,229,238,238,206,218,239,230,250,209,236,224,207,238,231,233,230,206,225,236,233,225,229,235,241,240,236,213,210,232,219,224,221,254,220,220,220,246,242,213,221,223,236,239,237,218,241,230,221,229,228,244,226,224,236,228,246,221,236,251,228,244,238,231,239,235,242,222,243,220,228,247,240,236,244,224,249,232,230,242,233,240,233,222,245,213,226,226,249,233,243,249,239,241,224,239,230,219,249,246,232,228,236,245,234,236,236,213,249,236,213,236,236,224,226,233,247,246,227,225,225,249,238,242,240,236,243,255,250,238,239,242,248,240,242,254,223,250,242,240,232,245,247,230,248,236,248,249,247,223,237,247,251,253,244,253,233,247,246,246,234,227,237,243,238,243,237,245,220,244,236,240,229,221,242,234,249,241,250,246,252,245,234,253,251,249,249,252,238,250,234,242,240,237,225,255,236,235,230,245,229,242,249,255,238,243,234,245,240,238,215,251,248,231,236,224,233,235,254,250,232,238,246,239,218,243,237,239,245,252,227,245,212,241,210,244,235,248,231,247,240,254,234,220,209,223,255,234,222,246,244,239,245,244,223,245,247,253,240,255,254,242,227,246,207,246,222,255,221,203,212,178,182,223,236,249,253,199,208,236,244,219,229,245,247,239,154,138,120,102,80,156,251,217,239,219,235,237,212,222,246,234,238,192,108,110,59,154,227,249,251,161,151,127,130,87,67,84,56,62,84,68,100,106,126,106,125,93,104,93,105,98,122,46,61,125,58,39,58,72,43,29,27,48,66,75,68,77,77,41,39,105,93,81,70,41,34,22,30,11,18,4,4,12,4,20,6,6,29,39,37,123,116,233,178,136,123,61,64,44,111,239,242,233,214,230,248,248,252,249,247,248,247,195,240,239,230,248,244,244,253,185,203,132,39,38,19,17,59,96,71,48,37,16,29,68,67,80,66,52,40,78,91,59,44,17,58,233,230,253,235,238,247,226,232,241,251,244,245,224,212,233,245,241,249,243,236,228,203,237,235,225,213,239,237,205,172,173,210,231,211,217,206,152,183,245,205,191,213,180,163,215,243,253,164,115,86,69,52,128,240,237,252,237,242,248,245,224,239,233,244,214,120,49,49,103,103,182,207,223,235,242,198,220,216,229,215,224,216,217,198,206,200,207,209,213,201,203,196,194,205,199,217,186,225,229,208,222,223,209,233,202,229,194,222,215,206,212,234,219,224,233,213,222,214,212,237,227,215,209,224,188,221,218,231,211,205,208,255,215,235,217,177,184,213,224,234,220,221,218,236,239,241,251,244,186,115,66,81,181,194,226,237,209,217,202,194,215,228,216,227,213,224,202,204,210,213,227,228,233,197,209,194,225,224,224,208,224,214,196,215,194,210,206,213,212,206,218,215,208,198,230,220,236,230,211,204,232,223,208,227,196,218,227,205,219,126,0,4,3,10,11,3,9,19,13,3,16,30,5,15,18,8,6,14,4,0,7,16,17,7,209,216,211,214,229,237,194,234,227,216,220,218,204,223,218,229,225,225,226,239,232,205,219,223,232,238,216,229,238,223,222,223,226,214,245,219,211,207,242,217,240,210,239,242,233,236,248,216,228,241,231,231,218,227,222,207,234,252,236,229,225,210,217,212,233,253,220,245,213,220,241,236,246,242,226,242,239,233,222,201,197,211,214,233,201,221,229,219,237,229,234,239,229,223,234,238,246,227,225,219,237,238,217,227,226,216,218,235,244,216,234,234,241,237,225,241,249,232,250,254,220,246,225,249,237,249,244,246,235,234,227,244,238,244,247,253,207,243,240,242,246,247,247,213,248,220,229,217,224,243,238,234,234,213,229,227,226,233,239,240,244,234,252,242,222,217,239,228,255,237,241,244,230,240,238,213,224,225,230,247,225,246,248,220,238,221,245,234,243,245,240,222,242,251,229,221,243,249,242,218,251,244,250,254,237,248,235,239,230,237,225,232,221,232,229,216,218,254,246,237,239,242,239,251,235,248,244,238,245,232,246,252,245,245,253,241,217,220,237,242,243,255,241,217,234,222,220,236,228,251,208,215,246,243,244,224,226,246,248,228,246,234,255,218,237,251,238,239,200,214,230,249,222,237,238,231,251,250,229,236,251,237,222,236,227,237,226,230,247,253,241,234,226,216,247,241,245,227,244,234,242,228,218,235,220,228,242,237,241,241,245,217,232,226,246,214,219,227,255,238,227,237,212,229,232,244,244,208,214,218,176,237,224,223,217,219,207,237,225,239,223,235,250,247,251,154,132,109,130,18,72,212,195,238,210,207,227,243,218,234,255,247,218,207,177,92,152,174,112,109,27,14,39,61,61,53,80,75,36,60,76,92,204,199,233,238,251,251,226,253,239,173,69,48,123,137,69,23,65,65,73,53,158,196,143,162,191,148,99,201,230,242,255,240,249,254,244,231,228,170,106,73,12,9,4,0,17,4,23,3,24,15,76,93,81,114,91,99,18,106,239,237,223,221,241,243,244,242,235,244,255,237,191,239,241,254,252,236,249,228,219,193,84,10,90,69,28,40,65,61,21,5,22,41,51,54,81,58,24,53,83,74,59,55,0,55,244,242,249,243,243,240,223,248,243,238,252,255,242,216,225,241,246,232,233,252,247,203,238,243,228,233,210,235,197,163,175,212,234,209,215,191,130,142,195,174,239,239,170,135,186,242,241,147,79,88,57,89,205,252,247,241,247,243,248,214,238,209,238,255,215,152,61,88,114,135,153,213,253,202,93,55,45,150,198,193,216,197,220,208,205,206,212,223,214,206,227,232,203,201,236,192,184,180,190,211,222,225,195,198,223,220,204,198,194,216,199,221,211,205,201,208,208,198,211,243,222,223,223,224,199,221,186,225,188,226,245,208,210,235,207,156,204,214,212,216,224,246,229,242,235,236,227,225,106,99,27,70,185,220,211,212,221,219,220,224,228,212,195,209,233,221,205,212,207,224,218,236,213,219,218,215,200,233,213,200,218,208,201,195,209,195,209,242,201,229,213,185,220,214,210,200,227,237,191,229,223,226,204,198,225,196,186,216,214,122,27,7,0,18,0,0,10,2,17,14,1,9,2,9,0,5,29,5,2,8,9,3,12,38,236,236,206,220,224,238,226,217,222,224,224,223,226,237,227,220,218,243,207,230,213,244,220,231,241,224,223,214,230,208,238,244,236,212,242,221,246,230,217,219,225,235,234,234,221,236,222,238,214,226,239,246,225,236,236,237,250,246,209,249,255,242,222,233,237,222,245,234,228,222,246,229,226,222,245,239,228,213,219,243,227,231,228,207,210,234,236,221,241,238,217,224,228,229,236,207,238,227,230,233,237,206,239,223,247,220,218,255,248,223,232,240,252,251,217,222,215,209,225,219,236,241,230,223,233,230,246,236,249,220,245,234,222,246,238,235,220,241,238,235,246,249,218,207,235,243,240,229,239,236,234,231,254,234,245,227,227,241,231,220,246,230,242,238,236,234,244,242,216,243,239,207,227,230,225,249,245,239,236,238,233,236,243,240,226,247,223,240,241,246,238,240,248,235,251,235,237,246,248,247,252,218,227,226,234,240,229,239,240,235,236,241,240,239,243,254,244,225,219,226,245,230,236,252,232,236,235,252,245,214,246,233,231,237,247,238,236,239,226,240,236,239,230,247,246,242,242,234,240,248,231,241,244,232,247,225,247,227,245,233,246,236,244,231,222,231,246,241,204,230,247,232,227,250,245,218,251,233,239,243,232,231,237,243,241,240,224,249,231,247,255,243,240,225,246,250,236,255,240,243,227,235,242,245,242,243,248,234,201,240,240,240,223,238,253,244,241,216,251,253,243,233,250,211,247,230,241,227,228,222,223,206,200,199,210,245,230,237,238,232,214,224,227,245,243,147,114,109,112,39,111,244,249,245,223,209,225,234,221,248,247,255,221,199,174,103,89,84,62,65,72,78,98,82,102,144,181,145,80,32,37,130,245,255,251,252,255,252,253,230,237,191,81,79,129,142,105,83,76,109,61,83,178,177,160,139,152,130,92,182,247,216,236,239,248,248,245,255,249,176,68,87,40,161,232,217,192,161,147,54,28,36,111,53,45,39,47,87,27,146,247,250,205,231,243,244,255,247,255,218,232,216,184,222,240,255,231,236,236,241,222,188,106,1,128,112,13,44,10,59,39,46,33,64,91,34,61,35,40,60,69,94,20,64,39,99,236,250,235,247,240,242,249,228,227,217,243,228,253,220,221,250,228,230,242,233,240,223,216,233,227,235,215,251,227,183,168,200,216,227,215,192,158,114,111,139,132,152,85,70,101,183,232,188,135,81,70,158,248,251,228,230,252,216,253,240,254,242,248,238,213,170,94,117,129,117,151,232,223,160,34,8,5,15,151,205,226,224,210,209,207,219,200,200,232,226,238,210,224,222,195,215,223,217,217,231,220,200,234,219,215,215,207,197,209,207,203,206,204,196,193,210,210,195,234,231,217,200,212,222,244,208,213,221,207,221,212,230,215,226,187,173,209,209,219,229,232,245,238,239,255,190,129,106,43,19,28,128,220,208,213,222,222,242,216,219,216,199,234,204,230,207,222,197,223,244,216,226,224,238,203,221,226,210,204,208,203,225,202,217,208,232,221,232,211,214,209,229,212,202,207,189,220,198,222,205,206,231,203,195,214,219,192,208,227,115,5,20,6,22,11,16,1,0,4,15,4,7,0,0,0,10,4,22,26,18,2,25,31,2,211,227,224,240,236,218,235,227,229,226,225,213,224,234,220,206,222,219,240,214,241,230,224,240,223,232,240,231,214,229,234,235,234,229,241,213,245,225,240,245,226,233,236,235,217,226,207,241,215,227,205,229,237,243,222,229,209,241,234,206,242,244,232,249,243,223,212,228,229,234,239,249,245,214,237,230,224,228,243,240,219,236,238,234,223,226,223,235,251,236,245,216,235,229,233,239,247,230,218,246,225,224,217,229,244,238,221,235,253,202,226,230,231,222,235,230,213,237,242,207,247,223,233,234,222,247,244,248,202,233,237,246,236,250,227,234,233,222,235,242,241,223,229,227,249,240,248,240,235,254,223,239,249,243,252,244,235,234,249,250,243,239,239,227,243,234,234,246,235,246,239,242,241,238,244,243,247,214,233,248,250,210,249,240,220,237,235,235,219,225,239,229,233,246,244,223,232,238,240,243,243,245,246,249,231,238,255,246,242,233,243,245,222,250,247,226,224,221,247,230,241,245,241,227,249,255,236,252,224,245,248,234,222,244,227,239,238,243,255,241,236,229,236,222,223,243,249,222,235,242,255,234,242,252,247,246,252,251,226,250,247,248,253,251,240,237,241,218,219,243,241,230,233,244,235,234,253,219,248,254,215,230,250,251,247,238,246,251,229,240,244,236,228,241,232,254,243,236,226,245,233,242,224,217,248,225,247,243,219,249,251,250,231,235,235,235,223,232,239,234,227,208,246,207,205,236,243,181,230,220,158,206,223,210,238,229,249,233,217,250,189,194,227,251,248,134,133,145,117,23,138,250,217,252,244,204,211,185,207,250,235,228,105,91,109,138,98,36,23,83,80,147,233,237,255,251,253,238,202,112,73,72,205,233,237,238,210,197,167,130,135,118,70,67,125,68,51,68,49,31,22,41,30,44,81,58,39,33,52,80,79,78,80,151,154,171,202,144,164,64,60,40,77,237,242,232,252,242,254,218,167,138,100,29,43,71,27,57,17,180,220,222,204,238,243,245,219,242,245,249,249,212,224,246,247,238,251,251,253,229,209,198,101,18,138,90,18,46,23,26,60,66,54,60,75,32,37,29,23,24,90,103,38,51,28,152,251,248,255,249,250,240,239,251,237,244,246,229,234,215,182,253,236,255,233,229,244,205,228,247,231,217,245,242,234,192,156,183,209,217,193,222,207,170,78,80,37,16,37,114,91,191,246,143,141,58,117,228,239,237,232,246,255,228,253,232,224,222,248,249,179,173,150,137,129,118,141,123,188,114,62,25,5,64,140,183,227,222,211,250,214,226,226,211,220,208,223,212,217,204,211,221,222,206,206,240,214,216,212,223,209,226,207,206,217,222,196,228,234,206,195,232,229,198,226,213,219,194,217,224,226,228,220,218,219,202,201,212,228,199,154,212,200,217,207,218,246,249,254,233,201,120,76,37,17,87,196,224,247,227,220,221,224,197,242,226,228,227,206,221,218,221,217,220,194,232,207,239,213,213,223,238,229,219,204,205,203,212,209,218,214,202,212,224,213,225,195,224,201,214,226,202,192,214,213,211,206,221,227,205,210,208,221,182,221,131,27,6,1,8,22,1,17,3,10,21,27,4,1,1,7,18,0,11,22,0,8,24,11,7,224,211,219,231,225,229,224,230,248,219,241,216,251,228,237,212,238,236,239,219,240,243,239,245,235,236,244,214,221,239,221,233,238,242,226,233,228,226,232,237,229,228,247,242,244,242,231,231,237,240,241,231,223,214,232,240,242,227,242,217,244,238,240,238,243,239,231,244,231,238,241,216,233,231,249,247,250,223,238,218,255,225,225,199,232,224,228,238,219,237,247,235,238,224,238,248,223,235,221,230,237,232,251,236,247,213,236,251,232,229,237,239,239,236,252,247,227,244,226,233,223,237,242,205,244,240,241,230,246,241,230,223,234,226,240,237,237,247,235,248,236,221,239,245,224,252,247,230,250,242,240,246,254,225,235,243,208,235,221,250,227,253,245,242,238,229,242,246,239,231,242,240,246,236,245,227,231,248,229,250,229,235,235,240,250,238,240,216,246,215,222,226,234,252,231,231,242,224,216,238,244,234,239,253,250,222,242,227,216,247,228,234,246,242,238,239,244,212,244,218,245,215,220,235,234,232,237,251,237,247,253,225,248,234,255,241,223,247,245,233,242,243,233,248,238,250,232,248,239,208,240,237,221,220,235,224,231,231,239,229,245,252,230,246,231,230,236,208,245,255,212,245,232,236,230,252,223,225,242,247,237,246,246,227,233,249,234,239,241,247,248,221,227,217,246,240,250,245,231,230,242,238,244,250,254,231,245,222,235,236,252,249,250,242,230,234,232,249,236,253,237,197,239,235,235,239,241,201,209,206,200,233,250,206,241,198,191,243,239,233,183,189,246,240,253,155,117,138,120,47,141,243,235,254,217,229,229,250,230,241,170,76,64,30,101,133,122,44,30,0,51,188,255,247,223,237,248,227,209,177,155,173,60,44,61,61,21,40,54,18,12,29,28,15,89,43,25,9,21,28,6,10,39,47,41,15,15,40,36,23,16,69,13,19,44,37,29,38,30,56,45,47,37,154,240,230,255,255,251,203,134,137,127,68,39,83,62,104,63,162,255,197,221,239,242,254,254,244,244,238,237,192,198,236,218,254,239,228,235,251,206,163,42,32,137,68,19,50,25,28,48,57,43,44,29,24,50,41,14,18,38,91,78,28,30,71,229,232,227,252,234,248,236,234,230,233,235,255,228,217,219,216,236,234,222,225,239,217,224,244,245,205,212,204,237,219,154,155,226,227,224,226,217,219,136,104,72,25,18,26,74,196,234,149,89,68,177,245,239,238,232,255,244,249,242,219,210,226,249,238,173,168,142,156,133,137,140,35,96,83,31,33,25,181,231,234,212,195,222,212,209,192,197,195,210,227,227,207,217,225,243,209,206,217,207,213,205,217,219,219,216,226,207,211,217,209,203,197,230,211,219,213,215,215,210,228,214,218,230,213,227,232,207,206,249,234,214,212,232,182,151,223,222,234,234,242,254,235,199,130,99,49,6,29,163,225,232,240,250,232,239,232,215,209,239,209,190,221,228,225,209,194,225,210,202,196,230,211,227,225,209,210,221,232,224,219,211,240,209,219,213,215,231,218,224,219,214,210,208,205,224,227,202,200,231,198,234,234,200,214,206,206,204,208,196,110,13,7,4,9,21,17,7,9,27,4,25,2,3,21,0,20,6,7,9,3,26,28,32,22,237,239,237,238,228,223,246,205,244,193,244,216,207,213,242,234,238,251,223,246,242,226,243,238,252,223,233,238,223,221,231,232,241,235,228,224,240,221,243,227,240,237,227,234,213,242,226,249,252,245,228,229,223,243,233,206,237,236,230,236,249,228,226,228,235,244,215,237,249,255,228,252,210,241,242,218,229,208,247,232,239,254,230,220,215,233,227,227,246,221,230,209,222,246,239,240,240,219,196,223,252,252,208,249,237,233,217,245,224,226,245,243,225,251,216,250,235,234,235,251,208,229,232,238,235,235,245,244,241,237,213,246,249,254,243,209,243,226,243,253,234,251,248,237,255,244,235,250,229,239,223,238,245,251,240,252,243,249,251,249,233,235,234,254,241,224,245,232,229,225,244,252,214,231,255,236,218,250,242,240,236,221,238,228,232,229,236,217,238,227,253,240,249,221,255,241,248,235,229,238,225,237,251,243,244,234,248,239,238,233,228,244,252,224,232,216,228,243,223,232,241,242,251,236,242,246,233,244,241,232,238,241,227,212,235,232,245,214,205,232,243,236,250,241,245,253,251,244,244,245,252,247,237,246,242,239,244,243,244,242,249,218,228,239,235,238,230,205,233,235,253,199,222,238,241,226,211,221,247,244,229,232,244,232,250,247,225,220,223,196,219,237,250,251,220,238,247,236,220,232,211,248,216,242,238,243,234,226,248,246,234,230,251,227,226,237,245,239,226,244,244,235,247,227,252,239,236,198,236,227,193,236,212,171,225,228,215,246,229,247,188,231,255,250,239,178,145,139,119,46,152,223,248,249,246,212,234,245,222,117,72,36,44,62,101,170,182,125,71,27,25,118,182,168,113,119,52,56,52,100,135,137,96,11,22,10,34,40,75,34,12,18,23,51,48,43,18,12,5,4,16,33,26,12,7,13,23,22,27,22,25,17,0,31,28,47,35,19,44,46,18,23,42,36,25,29,62,138,52,52,78,162,132,84,164,205,155,107,91,81,135,204,205,251,228,235,236,216,242,245,233,198,207,252,227,254,242,222,245,246,181,160,51,8,114,56,21,115,31,32,40,35,60,38,35,12,23,60,36,29,45,86,22,48,30,73,235,243,252,247,230,216,213,238,239,244,218,251,245,238,207,211,245,222,235,234,252,208,232,230,224,235,206,234,242,204,176,179,203,209,211,224,186,127,101,129,189,111,32,24,32,157,236,142,85,102,228,252,246,222,244,252,233,253,250,228,236,234,253,217,171,155,165,161,152,157,118,44,129,122,83,65,78,204,229,255,217,191,209,221,201,193,221,206,207,218,188,200,199,220,223,235,205,230,227,197,242,188,236,217,193,216,233,187,245,218,195,202,193,210,184,218,217,220,223,235,226,203,180,193,234,203,231,242,218,206,219,222,226,184,176,212,230,242,237,245,223,185,107,90,30,11,94,203,246,241,246,225,216,206,228,217,206,232,218,199,220,219,199,217,211,214,212,223,216,218,229,202,204,220,214,216,207,223,219,232,201,201,222,226,220,237,220,234,219,191,211,192,206,199,211,211,217,191,210,219,208,191,212,216,211,184,211,226,214,91,4,20,0,23,11,27,6,35,21,24,26,27,0,16,5,15,16,20,20,4,33,37,50,25,219,220,217,248,228,219,224,233,228,221,211,213,236,234,221,199,235,238,230,241,234,221,238,234,232,227,231,220,219,215,216,220,236,246,255,240,234,248,255,225,239,247,250,221,248,249,232,233,236,237,229,243,251,234,227,247,213,239,230,230,233,232,243,241,230,240,230,239,233,238,235,224,220,211,235,249,245,224,229,243,231,236,212,249,231,233,249,223,226,240,225,228,236,243,228,244,222,224,244,243,251,234,234,243,215,244,237,229,239,240,240,209,237,235,222,233,231,242,240,231,250,231,223,253,235,235,241,244,232,249,239,247,248,250,249,243,250,246,253,239,247,233,244,227,253,246,244,226,232,247,247,252,255,231,246,247,225,231,245,250,227,237,226,229,245,243,235,229,253,253,249,234,240,248,242,243,253,254,234,246,237,242,241,250,234,239,228,211,216,242,216,227,246,246,230,251,249,239,245,231,249,228,254,243,239,229,231,238,239,245,217,235,245,244,244,240,222,241,238,229,232,245,246,232,239,229,227,242,231,236,228,247,244,244,239,237,206,224,244,234,235,239,234,249,247,251,245,250,239,243,236,249,246,238,238,232,229,240,228,211,240,220,245,207,226,253,203,227,253,241,245,204,233,246,240,239,233,237,246,237,244,236,247,235,217,244,240,233,223,238,200,240,236,241,247,244,227,241,203,221,223,245,221,244,243,245,223,243,222,251,248,229,235,232,245,239,242,197,228,245,232,216,241,222,255,246,245,193,248,202,189,199,204,180,238,248,245,250,246,247,158,216,236,255,252,153,121,98,101,60,147,231,241,252,240,223,245,245,178,101,65,52,33,78,166,234,228,190,165,113,52,52,15,46,9,12,52,24,38,55,2,79,124,88,54,29,25,28,98,31,32,45,21,16,62,69,29,27,8,42,5,28,40,11,25,29,25,34,26,11,10,12,14,7,8,23,21,14,40,18,20,29,14,8,34,24,42,15,12,34,48,105,75,92,178,243,190,96,65,38,82,152,209,229,246,243,235,239,250,252,242,156,223,230,239,246,249,241,237,250,173,153,36,2,125,54,51,120,32,60,38,34,25,60,47,26,57,37,26,41,73,104,30,46,25,45,230,248,255,233,245,243,231,220,242,243,241,234,243,229,232,224,226,225,226,237,249,228,196,241,232,227,201,215,223,226,178,168,190,213,217,218,170,103,38,11,106,144,107,77,68,92,171,140,101,192,255,255,240,242,253,249,245,236,217,245,242,245,249,196,142,144,154,137,157,181,128,70,170,183,109,77,31,101,183,214,208,221,201,217,222,213,212,225,225,213,214,211,229,231,207,217,207,206,224,228,210,189,221,202,201,212,196,215,200,217,242,205,219,218,210,227,214,216,217,214,235,191,204,188,202,202,237,202,216,198,207,207,188,170,220,230,243,241,242,189,138,105,51,14,43,167,235,230,232,240,235,222,233,229,230,231,216,201,216,230,210,233,226,240,223,238,201,231,196,212,220,235,233,217,225,209,210,198,227,238,200,196,203,214,229,220,203,214,229,225,209,230,210,218,220,212,184,230,228,217,201,225,215,213,206,203,213,217,181,132,0,14,13,5,4,14,24,7,11,1,31,0,2,7,9,15,25,4,22,14,9,34,18,5,236,226,229,240,207,208,219,231,230,234,231,217,222,247,240,221,250,225,242,242,225,247,242,241,206,235,241,247,223,228,227,250,220,228,243,215,231,222,250,232,236,242,230,219,232,220,234,227,253,231,246,225,217,242,255,252,249,248,244,237,232,239,242,253,243,234,225,243,231,240,243,240,243,242,246,251,231,250,242,247,245,223,243,249,222,242,223,225,230,235,243,194,245,226,228,237,236,211,250,211,246,235,251,216,229,232,245,246,240,234,236,243,234,225,242,244,231,242,247,218,251,239,232,244,243,249,244,231,245,229,240,244,245,227,232,251,224,244,247,236,236,252,255,238,243,252,238,247,231,245,230,244,253,242,242,231,243,246,249,247,248,224,240,231,228,218,224,234,233,223,254,241,229,247,237,249,219,251,231,218,236,238,230,234,231,249,228,214,216,218,243,253,254,239,239,242,224,242,237,236,225,231,231,251,219,245,245,247,251,240,249,252,232,226,238,246,234,200,246,236,207,223,236,245,226,241,223,204,222,219,247,242,244,252,245,225,247,232,238,253,232,244,220,230,252,251,255,231,243,253,243,223,246,246,235,236,240,250,230,214,240,243,248,240,238,228,244,226,215,236,234,244,226,238,233,242,221,234,239,247,246,218,250,249,251,229,230,244,241,243,249,228,239,230,242,246,249,234,222,239,226,229,230,245,224,205,218,248,233,239,207,220,254,228,199,247,224,215,232,242,238,254,231,220,236,249,204,192,234,183,204,239,245,244,251,250,229,240,243,225,186,231,239,250,228,123,106,106,69,21,124,243,229,237,217,225,247,217,194,74,59,19,5,22,89,185,167,141,159,137,62,21,37,59,60,22,60,23,16,33,11,12,40,135,120,92,54,43,87,55,24,4,19,48,67,47,26,19,36,3,6,33,25,25,34,34,2,20,16,24,35,38,16,18,12,2,23,7,8,45,16,26,26,26,18,21,44,29,26,19,78,107,85,36,40,81,80,88,92,13,67,189,242,243,249,239,222,239,249,236,217,184,253,248,247,231,249,241,253,252,124,102,50,15,144,47,46,111,36,62,7,26,24,20,63,45,49,56,58,114,134,122,50,48,16,56,206,233,247,248,220,244,250,221,239,239,242,235,239,242,229,194,205,229,244,243,236,239,210,244,242,201,221,209,255,213,162,180,173,228,226,204,227,198,148,44,36,82,155,137,139,94,121,86,148,220,254,249,252,229,251,255,241,212,227,235,227,226,239,159,159,134,109,127,125,149,96,89,176,165,116,64,11,2,67,179,193,221,233,228,198,222,213,201,217,214,215,208,236,225,231,215,205,230,193,233,213,219,203,218,230,221,219,216,233,206,238,212,217,207,217,208,243,229,208,237,202,226,210,218,217,210,229,233,203,218,234,219,204,203,213,229,253,244,149,104,94,2,30,42,146,230,229,237,235,207,233,238,196,225,218,212,228,195,215,215,219,217,230,228,200,220,208,224,231,221,199,224,212,220,222,228,209,199,228,199,204,202,238,213,223,216,214,208,209,215,236,210,229,196,230,224,210,214,214,198,207,216,209,211,200,225,228,211,221,111,21,19,13,9,4,10,8,17,18,36,19,17,7,0,24,21,18,0,28,24,10,14,11,29,231,221,225,230,215,236,219,244,229,220,233,228,229,239,239,250,235,214,248,217,242,220,226,228,243,239,247,234,247,246,245,211,233,232,236,217,210,240,252,240,237,241,233,231,243,240,245,244,237,240,253,255,246,252,233,222,230,236,242,236,223,254,246,241,243,241,255,219,234,254,234,250,233,248,248,224,230,240,224,228,245,230,246,242,223,249,252,237,230,233,216,231,238,233,233,206,198,237,240,239,246,236,223,226,246,213,244,233,224,240,242,245,248,234,252,225,226,244,221,232,250,248,236,233,248,255,246,246,231,233,230,236,244,244,244,226,239,248,255,242,244,253,225,237,251,236,244,218,234,230,240,247,223,254,246,239,228,253,230,247,245,239,226,247,225,240,250,242,240,227,237,249,244,236,244,245,207,216,240,221,225,228,212,228,227,214,230,220,230,213,219,227,235,234,233,238,239,244,248,240,240,206,238,234,230,237,242,255,218,236,232,249,244,236,250,224,233,236,222,241,245,248,250,232,218,250,242,236,252,234,240,250,230,222,242,238,246,252,214,229,252,234,232,243,242,255,252,239,243,225,253,238,236,236,203,245,249,247,243,237,233,221,233,229,246,239,220,232,246,248,252,236,237,238,237,245,211,237,247,246,215,246,219,211,226,247,235,226,216,255,248,232,245,244,235,225,242,219,239,233,242,242,244,246,234,239,240,237,238,251,242,231,232,243,253,238,223,231,246,227,255,236,224,242,249,255,216,214,224,189,252,245,246,226,227,217,231,241,255,229,164,238,236,251,227,101,113,98,83,12,95,240,238,255,248,212,254,255,208,88,27,24,36,80,74,43,32,54,136,160,107,33,20,47,36,34,79,13,33,14,39,48,16,30,88,143,117,105,113,38,15,2,34,2,34,67,36,12,19,11,37,23,35,17,8,6,18,19,14,10,30,22,24,14,13,35,7,22,36,17,13,10,35,35,24,4,28,17,21,59,58,68,57,63,53,33,35,60,97,30,22,189,244,249,253,217,243,241,233,248,193,194,252,238,238,228,243,223,247,231,93,114,35,8,140,43,68,65,11,23,24,33,40,37,43,52,62,86,147,170,164,148,98,35,32,45,188,241,250,228,232,215,245,238,241,227,217,235,229,225,239,195,218,247,222,245,213,241,251,224,229,248,200,221,239,219,196,158,143,234,198,237,224,216,200,132,102,154,154,149,116,52,15,50,192,239,250,254,253,242,249,242,235,227,227,244,240,253,204,140,152,105,82,134,112,147,95,97,192,148,68,61,3,45,173,203,193,223,229,215,196,218,201,196,214,226,213,199,208,219,239,232,198,210,183,232,216,194,234,229,215,213,226,229,225,213,204,203,211,212,237,215,201,234,243,200,227,219,225,216,236,207,197,224,192,244,204,225,179,226,252,242,157,127,71,13,10,3,82,174,238,239,238,213,242,204,221,244,216,218,210,223,249,237,208,238,235,223,227,230,233,215,212,228,207,243,224,221,231,238,201,206,229,233,224,204,210,219,215,214,217,236,222,213,217,190,210,217,190,205,210,237,235,233,209,228,211,218,224,222,194,199,234,206,223,133,1,14,1,19,14,6,18,4,7,12,20,13,5,0,3,15,6,7,23,12,3,7,4,15,219,218,239,228,237,211,218,251,236,253,227,227,225,210,223,239,227,237,193,230,232,243,231,245,229,249,238,246,246,245,237,220,243,236,234,238,229,236,238,224,244,232,250,244,226,226,226,228,236,234,252,236,239,240,246,251,236,239,230,229,223,246,222,254,240,238,233,239,240,228,254,231,231,239,231,245,235,251,240,255,225,255,207,236,228,239,246,228,246,228,233,234,237,235,221,227,235,225,251,225,239,210,253,224,243,228,241,236,240,228,248,237,242,243,247,239,251,224,238,210,239,238,241,221,235,247,206,239,237,241,247,238,244,233,233,248,229,243,237,242,251,247,245,248,227,224,251,233,232,249,245,249,245,233,241,220,226,243,223,249,255,255,233,223,246,238,238,246,211,221,226,232,228,254,234,243,230,239,231,233,236,248,231,243,231,235,243,196,242,251,234,238,241,233,237,255,249,241,231,214,222,249,238,217,248,217,252,227,247,239,224,209,241,225,227,218,229,244,241,233,245,241,234,228,236,230,236,246,239,249,239,230,241,252,226,232,236,233,246,238,234,238,221,244,239,254,242,245,223,238,239,246,231,237,237,229,216,236,254,239,248,232,246,247,236,243,213,238,237,222,246,231,243,246,235,208,234,251,250,233,236,238,239,247,203,229,215,247,217,245,224,224,231,255,217,226,226,228,234,233,250,230,252,220,233,239,243,249,238,240,237,250,233,232,245,232,245,208,253,227,227,226,211,250,231,246,208,213,249,210,239,206,182,181,208,222,235,246,250,255,173,239,232,255,200,80,94,76,68,10,97,237,249,247,233,229,253,252,184,84,53,81,120,99,46,49,12,42,66,153,181,128,63,76,43,64,76,56,53,31,43,46,29,31,7,33,134,169,152,48,5,21,14,12,42,81,23,21,31,29,19,21,12,25,7,7,19,14,16,24,8,12,29,21,27,39,13,8,34,20,22,39,15,34,37,13,30,18,40,63,98,85,64,40,17,32,42,51,61,27,51,210,238,255,250,205,234,236,248,254,185,223,239,219,227,243,226,246,247,179,89,129,47,35,142,8,86,76,18,15,35,16,26,24,18,53,81,119,128,133,102,95,60,50,4,28,200,225,255,214,210,250,222,255,247,238,246,233,207,252,233,202,201,255,245,254,247,250,255,245,243,233,191,214,205,210,201,151,177,176,206,208,227,245,224,134,137,184,169,144,131,61,52,138,241,245,242,249,249,234,242,230,212,238,248,237,236,228,183,155,132,129,129,129,107,147,119,139,217,125,71,64,4,114,222,242,249,204,216,217,210,217,216,221,213,214,220,224,243,217,231,232,232,223,223,239,216,239,212,232,219,220,212,226,221,209,206,224,204,220,239,206,210,208,216,236,235,211,231,220,232,226,226,222,222,212,228,217,200,209,208,135,123,39,48,55,136,164,197,227,241,240,219,212,208,206,206,221,226,224,231,223,232,239,204,220,219,241,217,225,200,221,200,213,214,216,211,208,218,232,212,237,204,202,216,213,208,201,223,216,234,214,209,223,207,222,215,194,233,217,221,214,206,222,183,211,194,206,203,217,200,240,196,211,205,117,20,3,18,13,7,2,15,0,30,20,10,25,0,5,5,2,6,24,9,0,23,10,9,17,211,229,219,228,241,220,238,238,232,241,246,242,228,255,209,225,215,240,222,232,240,236,245,231,237,237,241,245,244,243,225,202,246,255,224,223,232,243,232,240,243,217,228,234,238,225,253,223,245,253,243,246,226,228,227,249,233,233,229,244,241,252,244,231,235,229,229,237,224,249,244,235,223,229,234,241,234,235,221,228,239,230,250,238,219,233,201,254,201,240,237,237,242,253,228,232,253,220,225,226,227,237,222,231,241,222,207,232,227,220,224,240,251,240,219,236,243,212,226,217,215,220,234,253,217,227,249,245,247,254,234,248,236,253,237,249,241,220,239,252,255,250,221,237,245,228,243,237,240,232,232,253,238,229,246,232,229,255,247,250,228,236,222,249,253,231,242,240,229,244,240,222,216,247,213,237,251,240,241,225,237,237,214,218,237,251,228,218,239,217,222,238,242,243,222,236,231,230,229,241,232,219,243,243,230,233,215,241,255,253,233,222,247,229,200,194,236,244,241,245,230,239,229,242,229,222,250,242,231,230,226,236,228,241,247,238,236,254,242,239,231,213,232,252,233,231,226,232,229,235,250,243,246,232,230,233,247,247,222,220,248,237,239,247,227,214,230,242,243,241,219,236,228,251,216,221,255,242,221,225,237,229,230,246,244,233,227,250,250,247,218,231,206,242,246,235,237,239,253,223,236,237,226,203,227,220,243,236,225,243,243,229,237,207,240,228,244,247,219,242,235,213,239,239,234,247,211,244,210,162,203,222,229,247,241,250,245,245,248,246,184,253,245,250,188,94,69,66,54,12,109,230,237,247,234,242,232,255,224,80,65,39,65,44,44,67,44,46,55,73,189,171,131,111,41,55,69,68,58,38,43,50,27,48,42,40,14,65,142,90,85,29,14,18,52,76,32,26,32,3,22,1,25,13,2,23,5,27,42,17,9,33,30,39,18,28,12,46,11,18,25,24,6,40,1,6,12,31,58,93,93,53,29,56,34,35,73,48,55,23,80,219,236,254,255,240,235,234,243,231,192,231,251,243,249,235,238,253,244,189,77,97,69,68,165,13,61,73,25,35,9,30,29,9,76,50,66,101,114,88,80,43,52,92,20,100,207,230,248,255,229,233,230,223,246,244,238,240,253,230,251,243,218,243,252,249,255,254,254,244,233,253,210,190,225,210,215,172,168,207,224,223,191,201,187,185,184,199,157,141,136,82,124,211,242,225,255,233,240,218,237,210,250,243,235,228,249,243,162,161,159,145,163,127,145,133,166,169,133,68,60,34,12,144,244,229,252,214,222,222,212,224,221,186,232,236,200,210,224,225,252,255,241,231,229,239,252,203,236,239,198,225,233,216,221,231,213,213,202,222,223,236,228,241,239,241,251,237,250,250,239,224,234,229,232,252,248,215,196,162,132,80,30,19,88,191,234,216,232,226,238,222,221,216,226,196,215,229,224,209,210,216,209,222,210,237,193,224,220,197,214,232,228,217,227,239,237,241,198,219,197,205,203,221,211,222,222,206,210,243,199,213,231,224,192,206,191,220,243,193,212,215,206,211,209,205,220,212,202,232,200,203,220,186,226,120,7,15,3,10,5,13,14,13,16,20,3,13,3,11,11,3,19,9,32,25,18,14,13,1,241,226,211,238,204,203,208,225,235,218,226,229,244,224,236,239,243,218,237,236,227,242,237,253,219,238,244,237,228,236,250,235,225,232,222,254,220,234,243,236,246,224,232,246,226,246,228,243,231,234,243,235,232,224,248,230,254,248,243,238,213,228,238,239,247,246,238,235,250,244,251,237,224,219,243,249,231,249,215,240,240,244,252,232,235,238,222,224,227,235,240,227,243,235,227,212,241,236,234,223,231,246,243,240,222,250,228,254,233,226,231,209,242,246,231,223,237,232,233,240,251,207,246,226,241,235,228,247,235,240,236,249,247,215,225,219,225,240,218,241,229,231,240,234,247,243,230,241,221,224,215,243,237,205,242,239,242,252,225,245,245,248,248,243,249,235,244,241,232,245,241,237,224,223,240,214,241,227,230,239,238,254,251,227,252,223,243,233,228,249,209,239,207,210,242,243,243,242,222,231,231,247,234,250,230,232,254,232,239,230,245,220,240,229,226,227,238,245,237,232,238,227,231,239,239,239,234,232,234,249,235,242,239,254,244,246,215,255,217,240,246,230,223,250,255,218,250,203,233,255,246,244,244,235,254,247,218,253,236,249,217,241,231,233,249,233,216,223,245,216,233,242,238,229,220,247,219,230,231,221,236,246,236,235,231,232,227,208,233,226,231,234,230,231,242,249,225,254,237,243,228,234,243,252,242,239,254,225,215,241,249,239,232,222,247,252,241,224,237,230,234,228,247,246,250,244,210,193,185,181,233,233,242,229,249,221,204,212,251,227,198,229,245,240,216,76,82,106,89,3,97,231,233,248,210,228,229,241,234,84,17,23,5,40,68,55,31,35,34,68,113,171,167,115,14,22,45,47,53,41,64,56,14,15,18,47,38,25,58,110,167,114,30,29,16,78,45,11,23,16,4,32,14,22,6,40,18,11,7,0,17,20,39,54,22,25,17,42,16,42,12,25,43,47,39,44,9,36,80,83,99,60,65,72,35,55,111,74,68,15,103,227,253,242,253,243,252,220,250,237,166,235,247,234,228,231,234,224,252,186,81,83,38,50,107,12,102,64,21,18,1,19,7,48,15,27,30,58,92,59,42,37,47,28,18,117,246,248,241,236,224,249,237,242,251,227,255,242,246,243,254,232,198,216,217,245,138,171,162,153,240,254,233,215,203,246,216,186,174,190,225,181,100,88,86,162,135,119,120,91,66,111,205,227,243,249,250,255,255,250,239,225,241,228,254,231,222,176,157,151,180,143,144,142,153,193,148,114,84,41,69,9,39,213,249,228,252,236,226,206,212,226,206,190,211,215,209,243,228,246,250,239,255,249,245,243,250,254,231,235,207,204,209,225,223,235,218,225,234,220,230,214,231,236,246,233,236,235,229,227,231,231,235,231,252,245,236,146,156,96,43,27,37,124,246,241,234,209,218,242,242,238,213,202,219,211,206,213,209,216,225,184,226,222,227,239,209,233,206,208,198,217,230,220,214,216,210,209,209,231,233,219,208,217,217,205,233,228,224,204,225,216,212,199,207,229,214,221,229,183,234,246,226,212,220,221,219,230,208,195,190,202,205,213,208,109,15,3,7,23,18,22,11,10,17,2,13,8,18,11,9,19,12,14,19,1,18,9,9,9,233,231,217,229,216,195,229,246,219,230,235,236,220,217,248,225,232,245,249,236,235,237,243,231,240,246,236,224,248,241,242,252,248,226,234,230,249,247,227,235,239,229,238,225,248,246,226,241,240,223,242,251,225,240,207,222,246,249,246,249,241,250,239,238,239,217,240,239,250,239,217,235,227,242,232,234,226,231,210,255,236,251,251,240,241,247,234,218,228,246,244,223,235,214,233,234,232,222,233,214,232,243,255,243,246,242,228,213,235,244,240,242,245,243,225,248,239,247,238,230,242,230,248,206,224,237,232,236,251,240,224,240,247,236,243,237,232,229,254,237,249,237,253,211,224,247,250,238,224,216,222,236,211,237,241,240,255,236,254,251,224,229,240,229,229,236,254,234,222,247,238,234,247,230,237,230,238,233,237,241,211,226,207,239,213,238,234,252,216,235,211,226,241,232,241,246,252,248,250,233,246,215,233,237,248,241,237,231,242,215,246,219,240,238,223,239,234,228,230,252,232,225,211,235,226,248,244,242,231,229,229,234,233,205,223,253,230,241,238,248,254,246,247,244,230,253,241,247,224,255,243,248,229,251,229,240,239,249,222,241,234,246,224,252,212,218,232,236,235,241,220,224,240,212,208,222,243,244,232,228,244,240,237,245,225,239,230,214,248,211,222,246,228,222,224,235,246,240,242,247,247,223,243,223,246,246,229,247,245,252,237,236,225,245,239,212,250,207,238,237,243,244,232,242,241,239,209,218,214,216,246,241,208,251,215,193,233,246,233,201,211,242,234,251,192,161,148,128,130,53,160,234,234,238,243,227,247,249,242,132,46,1,1,49,75,33,47,46,37,17,50,93,193,184,60,40,13,33,53,44,30,9,5,23,24,17,2,44,209,231,242,238,207,136,135,130,23,16,2,0,29,14,9,19,24,33,19,32,14,16,21,49,24,36,43,44,25,35,35,35,24,26,48,62,47,32,60,54,89,89,89,61,55,50,53,61,79,110,40,58,162,245,248,239,237,229,237,228,254,190,173,234,244,213,217,249,239,248,242,164,69,68,73,83,88,37,83,47,8,38,6,20,28,10,31,25,45,46,84,102,77,50,92,42,33,140,221,236,250,251,237,247,254,253,252,250,240,235,230,239,255,192,128,61,14,85,30,31,89,81,189,244,250,247,244,252,242,175,184,181,185,225,97,18,24,17,58,44,33,38,31,107,239,232,255,249,252,248,255,246,233,234,245,255,238,247,231,168,139,169,171,183,113,152,136,157,145,49,25,35,39,17,63,233,249,227,239,221,230,226,217,211,230,212,217,243,212,218,204,154,144,155,207,231,245,252,242,255,215,219,242,228,212,214,226,205,211,235,228,223,206,214,218,248,229,189,103,97,104,199,223,214,249,248,252,236,179,119,107,41,2,70,220,241,252,208,173,222,238,243,225,244,217,217,200,214,205,215,218,230,233,221,234,206,220,230,218,236,211,229,233,235,241,220,210,207,231,219,209,231,212,202,207,215,213,229,204,194,211,202,206,226,234,203,207,218,218,248,231,202,215,220,222,189,209,212,218,204,229,214,200,203,212,214,215,130,26,9,9,0,10,25,16,20,35,37,17,22,11,5,1,26,18,7,5,8,13,23,28,9,236,238,224,239,224,243,240,224,232,239,242,234,244,213,222,246,225,229,246,246,236,208,248,242,242,238,236,237,246,234,223,237,225,231,241,245,228,233,241,249,224,253,238,247,226,226,230,229,251,234,220,236,248,232,220,255,231,228,225,215,224,242,238,224,240,247,243,214,234,235,242,217,230,224,227,229,218,233,240,241,240,237,227,240,251,237,227,209,221,238,215,245,240,239,217,233,244,238,230,238,222,244,227,218,226,224,219,226,245,245,225,225,235,217,250,244,224,253,229,242,233,249,247,255,228,252,232,253,225,242,235,245,221,243,242,250,234,243,236,237,212,241,242,209,235,215,242,233,247,232,222,219,245,240,215,238,252,250,241,255,249,252,229,254,252,245,247,236,242,243,244,241,219,237,202,233,236,229,246,202,209,239,216,233,234,229,240,213,222,237,222,236,238,215,241,229,242,248,243,237,229,254,239,253,230,235,230,253,242,241,213,240,228,213,238,237,224,241,214,239,224,211,223,219,228,215,226,241,249,240,238,218,239,240,225,245,232,238,239,230,231,233,252,251,253,240,245,234,232,219,234,231,250,233,237,245,246,230,241,242,251,230,238,239,228,242,216,227,230,243,248,245,218,204,228,237,211,237,221,243,233,241,223,241,242,225,201,227,230,246,249,232,234,246,215,240,241,242,247,253,233,235,230,244,245,233,250,248,233,243,222,219,250,223,238,245,236,230,234,229,228,223,247,247,242,210,211,226,231,223,217,213,188,212,199,197,230,253,232,231,188,252,251,247,230,149,161,138,104,50,168,240,224,249,226,229,252,250,239,122,48,4,17,70,75,66,50,50,37,22,53,51,119,185,114,62,27,45,39,44,39,16,13,27,14,38,28,187,240,246,239,150,132,151,164,184,63,14,19,5,22,21,34,8,35,13,16,5,25,28,41,47,42,63,87,68,64,66,60,61,53,48,43,45,46,15,77,60,62,93,75,80,89,75,45,58,90,53,18,67,162,248,237,255,237,250,229,229,250,219,173,228,232,250,241,231,229,255,231,170,78,58,52,41,81,25,85,35,9,24,8,46,36,20,11,36,30,89,173,182,141,75,91,88,30,128,208,233,247,244,240,236,213,233,246,254,247,222,237,242,228,185,99,25,0,4,9,73,92,6,139,253,247,238,250,250,243,198,178,153,216,230,162,66,56,47,44,45,50,30,20,72,149,229,242,251,255,223,255,226,253,234,254,249,223,228,201,153,156,156,172,123,135,146,146,146,96,27,27,9,25,22,106,224,243,234,241,229,229,220,227,209,207,219,211,208,225,246,173,86,80,43,11,122,109,144,209,229,219,230,226,219,232,247,218,205,216,236,219,220,207,204,217,234,189,93,45,22,0,75,198,238,253,239,215,171,92,78,30,16,153,236,250,252,232,207,208,226,224,240,212,215,208,206,231,212,207,205,209,225,224,193,233,199,220,215,204,193,221,220,223,216,235,205,189,216,225,226,223,230,206,201,215,230,227,219,229,230,224,211,217,221,228,233,198,205,231,209,205,209,208,208,233,235,204,212,228,224,208,214,230,207,210,235,239,105,22,19,9,4,3,8,23,7,16,23,4,3,8,6,0,9,5,10,13,19,16,23,7,5,231,236,232,232,230,244,219,221,240,242,231,250,239,215,238,225,241,236,232,229,237,240,214,247,213,232,253,246,223,226,233,220,233,235,246,235,223,233,236,244,250,233,247,247,221,239,240,234,219,228,228,232,239,234,243,254,249,237,229,242,218,233,240,228,237,226,248,217,222,235,236,214,223,230,225,229,210,238,217,251,237,242,216,238,230,244,220,230,219,245,243,237,243,229,202,236,230,227,230,240,243,247,229,215,234,238,230,245,228,229,223,223,238,237,249,241,233,247,247,238,246,255,253,252,242,231,251,233,244,231,252,244,243,241,222,245,253,248,243,235,232,234,238,243,235,237,222,227,240,242,208,240,227,223,226,238,244,238,241,251,231,234,224,226,232,246,248,231,229,250,249,233,241,245,240,241,246,233,215,242,240,246,206,233,233,241,233,243,239,238,238,222,239,247,247,231,243,220,238,247,245,249,249,255,235,246,245,255,233,248,233,227,236,210,215,226,248,232,235,224,232,239,242,220,226,218,237,246,213,237,234,232,228,232,235,219,210,246,243,242,251,244,227,198,224,245,246,245,237,255,245,231,235,230,214,241,254,245,235,250,239,242,255,238,225,215,227,236,234,243,226,240,252,207,227,252,222,251,226,225,241,217,229,238,255,247,225,248,252,229,252,250,247,253,229,230,250,226,233,243,243,243,252,234,248,207,245,234,215,242,247,245,237,240,233,233,254,211,232,221,254,251,219,244,242,248,234,228,189,176,232,216,217,224,248,231,251,244,250,182,171,247,229,247,204,171,120,111,90,30,186,236,239,245,215,196,242,243,243,172,131,46,15,25,57,47,45,44,44,21,52,25,77,116,164,144,45,48,38,32,27,11,20,34,25,168,255,253,233,237,224,92,26,5,131,197,129,75,29,14,23,5,14,26,5,32,10,26,17,63,134,90,22,9,57,60,91,81,65,51,39,29,27,25,33,57,54,60,57,91,61,68,67,46,67,93,82,64,11,49,223,238,241,251,221,238,214,236,246,173,182,229,248,254,245,246,247,234,254,244,86,49,69,86,60,54,72,29,15,16,19,25,36,41,28,25,45,95,161,147,119,50,92,122,110,138,193,225,250,255,204,131,65,33,44,166,235,246,249,249,245,156,65,11,1,55,16,31,22,10,127,184,156,139,170,231,254,255,188,165,218,229,203,100,69,54,52,47,32,23,32,6,90,229,251,235,244,255,251,222,247,253,238,252,230,252,146,107,143,161,151,159,149,139,140,127,60,11,5,45,28,24,5,12,119,221,216,248,232,200,222,215,207,223,225,227,244,253,161,69,99,54,5,21,21,5,39,165,180,210,215,202,215,223,223,232,243,208,217,192,234,239,241,250,163,89,81,26,4,78,219,225,237,152,113,69,32,25,92,228,241,232,250,214,173,179,228,222,213,232,242,215,220,226,242,223,211,218,211,203,216,214,211,241,200,207,228,229,224,211,216,211,218,220,210,198,211,223,221,216,226,219,222,209,208,219,189,241,212,216,227,223,208,207,210,220,215,216,211,216,214,235,211,204,215,231,217,219,227,219,218,206,232,186,211,79,15,9,0,19,1,30,18,0,16,28,26,17,7,0,5,2,7,1,1,12,7,7,21,22,235,222,249,229,224,250,230,228,218,236,229,198,236,229,228,228,234,230,204,234,245,232,244,242,233,239,235,224,224,237,218,226,229,235,242,238,241,249,235,242,232,230,248,250,224,238,220,234,223,246,219,241,226,230,236,217,243,240,245,240,235,232,244,228,239,247,227,217,228,222,253,239,230,236,225,216,230,245,219,236,227,230,239,246,236,252,229,213,239,226,237,243,230,244,229,228,222,245,217,247,238,245,212,240,226,217,237,215,251,249,242,237,243,231,216,233,219,233,238,249,209,227,224,240,237,239,240,229,233,233,231,239,239,240,225,242,249,231,216,236,232,225,216,228,230,242,235,243,230,236,218,218,232,237,218,242,238,234,241,253,235,225,233,223,213,230,222,237,229,238,255,226,239,235,255,231,216,234,206,219,222,213,249,224,229,225,242,236,208,223,228,250,218,253,241,247,236,242,234,245,238,242,240,245,249,252,227,245,221,245,235,236,239,216,220,234,232,248,238,231,228,202,231,232,246,226,242,236,226,237,225,219,214,221,253,227,226,253,253,222,252,243,238,233,246,252,227,235,247,233,242,237,228,238,249,232,230,239,232,228,244,224,249,243,235,208,242,231,246,235,225,225,245,209,236,245,227,238,234,219,241,252,225,253,235,249,240,246,253,240,236,243,242,242,235,221,234,237,227,252,241,247,242,232,211,214,226,243,229,246,236,222,239,207,240,255,244,242,234,240,248,245,247,250,243,222,223,220,162,220,226,189,224,241,233,213,244,216,233,183,173,231,240,246,215,135,154,135,114,37,182,226,219,243,236,226,246,243,237,255,221,122,45,3,4,31,18,31,26,25,34,49,49,34,112,215,122,102,47,17,2,10,8,10,95,238,248,227,209,148,97,43,34,25,25,98,141,81,129,45,41,8,29,19,27,15,17,8,29,151,249,139,36,10,68,99,95,60,51,33,21,13,27,12,47,56,64,72,90,87,68,65,29,38,19,72,57,13,94,152,185,237,240,245,246,234,235,236,242,177,244,255,245,252,250,246,243,252,246,197,119,16,58,108,15,30,72,28,22,23,102,133,64,29,4,24,88,126,166,95,73,29,33,124,133,141,234,242,238,237,174,68,23,11,23,128,241,250,254,255,163,68,24,8,24,2,10,116,156,86,56,37,1,39,2,70,186,234,218,149,199,232,212,120,72,71,23,16,34,36,108,72,125,234,247,245,252,238,252,247,252,247,237,222,239,181,152,150,158,146,171,169,137,134,121,122,94,100,36,51,46,39,28,36,10,105,218,231,255,221,230,212,207,241,229,251,255,250,160,57,29,54,225,161,56,20,27,154,242,237,234,223,212,235,252,237,244,250,232,249,222,222,222,244,225,176,137,68,62,200,254,202,174,107,71,28,38,158,232,234,250,255,221,170,170,243,217,227,206,210,212,227,212,223,212,227,234,226,217,236,220,186,220,228,211,227,216,235,208,222,202,243,231,202,207,220,209,226,213,228,214,198,203,230,215,223,244,224,209,222,222,235,233,233,211,201,223,227,216,216,209,205,203,225,209,214,228,207,239,210,222,196,233,223,206,119,10,10,14,19,12,20,0,14,15,9,8,17,0,5,4,13,11,6,22,32,0,16,29,14,234,226,232,229,250,224,247,222,211,219,239,233,223,224,204,247,232,231,245,253,240,219,227,219,237,243,245,216,222,227,221,243,252,244,235,236,225,218,232,255,239,233,247,244,233,230,244,245,230,245,223,216,245,234,243,244,245,254,244,247,223,227,231,227,242,247,237,229,237,237,223,219,229,245,247,253,239,218,249,224,239,241,232,235,236,242,232,239,227,229,231,239,243,215,222,238,226,234,235,236,215,221,225,231,241,216,223,235,227,241,216,237,238,222,227,205,218,221,236,230,236,240,241,232,235,231,244,250,254,242,236,222,230,227,241,247,253,244,225,249,233,237,240,229,250,224,235,247,241,234,229,247,231,240,217,247,223,235,222,246,228,234,201,216,220,230,241,233,241,223,241,224,213,243,245,234,253,241,233,245,237,212,228,254,245,229,252,243,215,237,239,211,247,245,231,243,242,238,244,234,243,244,233,219,230,246,227,235,239,215,217,243,224,218,220,217,219,240,222,226,217,233,246,234,218,223,233,223,237,236,220,245,234,244,207,238,230,240,255,253,251,252,235,228,246,248,250,253,228,247,243,239,234,245,236,225,237,230,231,248,242,240,236,236,231,232,245,232,230,249,236,235,229,232,255,249,242,240,225,251,238,221,245,246,255,233,247,253,227,225,237,228,231,253,228,249,243,250,234,248,224,246,230,228,225,249,253,230,243,254,244,242,224,233,237,243,249,249,238,244,242,248,249,255,255,183,194,220,224,230,244,241,231,242,206,198,225,234,251,176,174,253,230,251,207,134,140,122,118,64,188,244,245,240,245,236,240,242,237,240,253,155,143,72,15,16,18,3,12,6,9,35,52,45,28,127,192,139,60,17,11,29,15,58,20,48,74,36,76,55,36,46,25,36,15,61,81,64,166,136,105,41,16,10,8,7,1,17,10,131,228,108,33,7,56,103,73,30,30,0,21,40,25,32,21,36,64,88,102,88,52,49,23,20,36,53,99,119,166,121,161,220,224,236,232,225,250,254,252,237,251,240,254,226,156,142,163,232,253,189,109,43,77,97,0,43,53,41,27,14,187,191,46,25,3,39,97,167,213,136,108,84,65,105,94,116,237,240,229,245,178,67,52,6,21,190,229,253,249,192,62,25,3,38,3,41,210,245,219,80,14,5,8,13,26,29,63,169,223,173,210,243,237,163,148,73,7,23,12,86,222,174,138,253,222,251,250,229,244,234,238,233,215,231,224,161,125,135,152,182,160,151,138,121,137,180,176,237,180,63,36,31,32,30,41,11,48,201,249,236,232,234,237,236,233,237,199,122,92,53,31,160,231,233,193,43,35,159,219,246,239,250,238,234,224,237,217,249,246,240,226,241,245,247,243,223,150,104,3,104,174,144,112,49,34,84,175,228,240,250,252,235,195,165,173,206,241,212,220,231,218,231,218,202,233,246,211,215,237,218,208,240,227,215,227,250,229,228,249,197,207,217,202,199,221,215,231,213,208,218,207,216,217,217,224,230,232,214,211,197,207,220,197,220,209,225,221,211,227,212,207,214,235,199,215,214,222,223,233,209,214,228,191,198,209,108,11,30,8,8,7,3,15,25,11,23,10,23,20,0,5,5,4,10,24,3,4,24,34,15,238,249,240,211,247,226,212,238,230,229,230,208,226,228,229,228,231,233,239,219,234,220,242,229,231,241,221,230,233,226,237,241,237,198,208,236,209,252,219,218,229,229,236,228,242,250,226,242,251,232,248,230,224,230,230,246,229,226,246,232,235,243,226,246,232,226,242,241,248,225,237,223,227,224,239,230,231,202,243,222,223,216,228,235,241,234,236,241,246,219,242,223,204,247,219,223,230,240,236,221,229,217,213,227,230,209,243,242,215,241,240,224,231,217,231,210,234,239,220,238,228,229,235,224,244,239,247,233,243,230,227,249,242,223,238,222,231,242,228,223,242,227,228,238,212,221,234,232,247,218,224,218,238,219,222,254,229,235,211,242,233,220,227,233,248,226,245,219,225,246,232,213,234,228,215,214,226,238,227,239,235,246,225,221,225,222,239,204,238,233,235,242,225,214,245,249,229,223,218,236,230,254,251,233,231,236,237,246,218,218,244,238,230,240,236,219,214,237,223,215,240,206,223,229,226,216,250,229,218,246,241,210,232,230,241,240,249,245,231,222,254,239,232,226,249,244,246,243,220,252,236,240,236,255,221,227,232,243,243,249,252,254,221,244,236,254,243,237,230,236,249,218,200,216,233,235,233,229,226,239,248,238,255,244,243,249,252,255,242,238,231,226,232,238,240,241,229,240,247,233,243,222,255,255,242,242,217,247,240,230,254,255,218,245,221,250,255,245,217,250,236,224,231,245,244,225,232,237,226,221,220,190,196,205,222,208,246,223,253,215,201,234,229,222,188,155,130,128,102,70,183,252,237,250,237,233,212,224,209,254,236,192,226,155,145,93,7,9,15,11,18,47,75,81,11,73,140,185,81,18,6,3,58,49,29,33,50,42,69,60,34,33,29,52,12,67,63,23,42,111,203,130,89,60,45,37,13,11,45,35,52,46,26,36,16,78,63,26,25,10,16,23,9,17,19,44,66,72,106,34,48,19,26,49,105,213,205,168,116,136,219,228,251,238,239,226,248,255,250,214,247,238,250,128,100,97,63,131,148,154,110,17,90,94,16,64,48,7,52,11,52,115,50,34,12,64,149,204,155,105,87,83,31,59,52,124,242,252,239,219,99,20,12,8,95,219,237,241,160,80,15,1,38,69,106,234,239,251,144,43,39,79,80,74,10,4,9,133,200,184,205,225,219,155,137,53,31,25,39,188,215,237,159,198,255,239,241,253,244,254,233,228,234,222,187,156,151,138,178,165,177,146,99,133,160,213,220,219,217,98,88,139,85,51,34,15,23,91,229,238,223,221,238,249,188,116,26,13,9,14,7,35,159,227,229,107,6,59,193,228,229,242,223,139,106,90,108,199,238,237,240,221,214,232,207,197,186,118,55,48,30,34,51,21,78,180,245,249,240,241,247,208,181,164,239,224,219,216,201,209,216,209,238,235,218,225,223,213,224,236,249,224,203,202,208,208,212,200,226,220,235,204,228,222,216,221,191,207,218,221,219,216,219,213,217,202,195,226,219,212,207,233,204,205,215,220,237,231,215,201,221,206,236,209,210,177,234,208,199,216,215,206,229,206,209,121,9,11,3,4,1,29,17,5,14,36,9,15,5,2,9,4,1,13,9,7,7,23,27,52,230,217,236,239,225,232,222,206,224,227,213,248,223,245,224,208,207,242,221,216,240,246,219,240,203,229,215,242,236,232,241,232,203,223,221,226,240,239,208,249,231,232,239,220,230,243,248,247,214,228,255,235,232,232,243,242,218,210,248,216,223,232,240,234,245,243,238,245,242,246,246,232,243,223,244,237,224,232,244,231,215,245,238,231,231,234,210,228,211,227,240,213,240,215,232,233,234,211,241,219,216,220,217,245,226,215,237,221,228,233,237,215,222,234,241,242,215,248,249,217,241,244,224,240,248,224,228,231,219,236,227,236,239,230,210,219,215,244,234,230,239,224,231,234,254,234,224,253,252,226,244,237,243,235,212,213,224,224,234,244,247,207,230,229,208,235,230,233,233,228,229,251,235,240,239,212,216,243,220,225,229,220,240,219,237,233,230,236,242,249,224,241,234,238,232,241,246,218,227,225,223,247,237,238,249,216,235,204,227,220,228,228,242,224,252,216,245,234,232,224,233,237,247,233,237,232,234,220,245,222,229,235,248,201,253,230,231,227,241,235,253,227,219,252,232,240,240,242,240,248,251,220,240,255,224,239,245,232,252,248,237,238,250,229,250,250,255,238,212,250,234,217,231,231,240,224,207,228,244,238,247,226,226,244,247,252,248,245,253,251,232,248,236,247,222,255,247,231,237,229,230,245,252,251,222,246,247,234,237,243,248,245,244,251,242,229,224,241,220,245,236,238,243,255,220,210,219,236,204,195,204,166,213,248,244,233,254,230,252,191,189,233,244,239,160,151,86,107,91,32,194,236,246,250,234,239,231,241,247,239,237,202,228,252,243,151,74,22,0,18,19,48,65,39,49,11,17,90,129,60,19,25,64,52,31,14,48,36,114,106,23,25,13,37,29,51,95,34,12,27,93,135,155,124,72,63,63,48,21,38,39,20,64,11,19,39,24,35,21,8,16,11,38,42,32,57,40,32,69,74,142,141,162,157,153,215,176,116,109,155,210,248,216,233,246,244,208,180,104,56,133,206,145,71,139,120,15,21,9,37,38,11,84,106,69,74,48,42,24,4,65,107,44,37,29,75,171,202,122,64,39,133,131,60,39,53,224,225,199,109,29,9,15,10,44,188,192,161,91,8,14,88,252,240,231,252,253,98,60,116,240,235,226,211,101,13,28,145,194,207,211,236,178,95,36,28,1,25,103,223,220,240,124,183,254,241,252,235,251,232,230,235,216,209,133,158,156,155,153,180,153,138,135,123,188,238,179,123,112,81,226,222,207,105,54,29,5,56,158,240,251,229,243,214,116,43,22,0,9,6,32,14,52,158,211,148,54,22,91,204,245,246,185,111,51,23,14,72,188,176,244,140,75,129,115,112,127,125,123,99,52,17,26,42,145,217,255,250,246,244,219,186,154,203,212,214,209,226,210,202,228,237,215,232,184,216,229,249,245,238,245,248,231,239,210,210,237,195,230,233,226,225,213,213,214,228,216,219,229,230,210,213,217,222,215,223,226,204,206,220,237,240,223,183,226,211,234,221,214,215,206,226,209,209,211,218,212,231,212,213,217,223,193,203,203,119,34,6,4,28,4,5,15,30,15,15,28,19,19,3,6,6,13,4,17,29,12,8,0,8,240,239,234,249,238,245,235,226,226,218,229,218,231,242,208,216,239,241,230,242,231,236,215,240,251,241,232,230,238,245,228,208,233,212,233,221,220,225,239,234,238,247,227,235,223,238,229,253,253,240,231,245,239,236,232,230,227,246,220,241,225,235,236,241,232,244,242,232,228,230,218,215,236,249,232,215,239,240,216,235,226,240,224,248,237,215,215,239,230,231,221,226,237,227,224,222,243,224,223,225,226,228,227,239,228,231,223,238,245,220,239,230,225,240,221,224,240,192,252,222,243,217,214,216,206,244,220,245,226,208,242,217,227,221,236,225,248,230,242,233,234,238,230,236,223,242,233,237,231,231,228,230,240,222,239,239,237,216,217,246,208,250,232,218,206,252,232,245,231,236,223,228,248,233,236,247,233,236,236,244,235,229,246,227,234,248,243,237,250,248,226,226,246,227,243,247,245,238,242,235,249,249,252,231,224,243,227,231,239,200,238,235,223,226,239,227,218,235,230,200,227,247,237,248,213,204,234,212,220,227,230,211,230,231,248,252,191,248,241,255,229,214,208,240,231,230,224,232,247,248,239,245,231,252,242,252,238,254,246,209,240,254,251,219,252,230,232,225,213,249,251,222,236,239,218,222,247,250,251,244,235,226,237,234,231,251,237,228,255,241,249,247,250,235,249,224,249,248,234,223,242,248,246,239,222,234,231,237,247,251,252,238,249,221,247,219,240,247,251,236,254,237,249,222,208,208,216,175,188,202,233,216,240,252,231,212,242,237,221,185,195,247,238,254,169,164,128,109,121,70,179,239,243,247,219,218,239,245,225,223,239,221,251,247,237,216,149,150,99,25,9,13,7,8,16,0,26,13,96,126,27,20,13,31,44,20,29,34,71,105,21,21,18,23,39,66,95,45,42,29,71,18,64,155,176,112,46,53,29,37,34,26,14,35,20,40,25,21,32,16,9,24,30,18,27,33,10,29,109,124,184,252,243,238,203,246,160,122,99,173,236,224,254,243,145,86,19,2,7,18,21,15,32,15,27,46,35,47,19,27,59,28,54,99,60,69,37,27,14,0,182,217,61,20,54,97,128,110,107,20,34,183,129,51,29,5,45,69,76,33,11,111,148,54,8,10,20,41,23,30,167,240,247,253,255,253,117,24,81,243,230,239,247,195,91,32,22,165,214,220,235,174,95,63,51,44,2,123,217,200,247,171,95,112,225,235,232,243,238,255,208,232,194,190,107,128,139,156,185,168,170,135,138,146,201,245,165,44,28,55,235,234,242,171,86,24,17,3,91,221,239,245,246,161,76,32,16,23,160,210,139,80,19,12,89,123,68,27,29,128,235,224,212,136,109,43,11,22,119,206,240,187,61,27,0,32,58,49,82,97,106,138,101,134,173,248,237,243,235,210,183,173,184,222,237,216,214,220,198,230,209,223,226,252,236,239,236,250,252,250,222,242,251,241,207,234,224,212,250,201,237,237,205,210,210,221,225,227,215,216,234,208,225,211,230,203,227,206,201,210,233,199,215,217,209,189,221,232,208,231,208,221,220,226,221,203,211,203,210,213,213,224,227,210,219,104,8,14,18,22,12,9,7,6,12,11,25,12,0,11,5,4,25,2,4,7,15,2,12,8,238,225,228,237,209,244,209,215,246,231,222,218,220,242,249,209,231,241,214,237,229,219,232,240,207,231,243,244,221,211,220,217,242,218,216,203,230,239,226,234,232,239,227,230,238,213,248,229,231,242,227,243,223,248,219,246,255,243,229,231,233,203,222,234,236,222,219,231,241,241,232,236,227,246,223,249,223,204,238,238,244,240,216,222,245,239,250,248,234,231,225,246,233,232,239,214,241,217,249,226,222,248,220,232,231,234,255,211,231,245,233,224,231,250,238,252,239,216,209,237,216,226,234,223,245,240,230,239,241,251,233,228,213,246,246,244,242,226,233,234,201,241,246,219,236,229,224,237,248,221,248,234,239,247,232,234,250,246,240,221,242,205,229,245,243,226,233,233,234,203,222,230,247,240,218,219,236,227,237,221,212,227,236,227,240,209,229,242,238,227,233,242,253,203,222,241,216,224,235,241,227,252,251,240,226,243,232,232,217,217,228,229,217,215,228,231,232,226,230,229,235,227,247,252,210,224,225,247,221,215,238,218,225,242,244,236,238,250,232,255,253,244,252,234,229,253,234,237,245,224,222,251,230,254,233,227,236,246,231,249,241,255,237,245,235,252,247,249,247,254,252,243,244,238,216,249,239,242,255,251,235,232,250,249,247,249,252,247,231,228,242,220,247,255,249,233,240,254,248,235,243,232,225,246,239,253,235,239,237,244,253,229,232,237,251,234,239,245,255,219,236,247,236,221,234,190,199,208,206,237,228,211,207,222,203,218,226,243,231,187,219,255,254,247,191,162,123,120,106,47,196,239,222,231,236,224,250,230,215,238,240,227,201,215,232,230,239,254,216,234,248,207,212,199,208,184,141,147,176,190,162,99,65,55,19,25,29,10,77,69,34,46,18,77,59,76,77,46,47,44,40,25,29,38,96,184,147,132,97,68,8,12,31,28,38,17,12,26,41,32,13,19,18,33,6,92,151,179,228,167,236,255,192,169,230,226,150,97,79,209,239,255,252,165,57,1,0,26,6,30,25,19,48,0,156,199,144,43,18,21,37,54,83,151,111,85,35,9,22,56,236,217,92,32,25,103,135,82,66,68,54,72,80,41,60,17,17,22,35,90,209,244,242,178,88,29,14,25,25,153,247,244,246,252,207,128,20,55,233,236,246,192,65,45,9,1,79,249,252,237,181,119,54,14,46,28,48,196,252,193,142,62,24,61,227,246,254,238,242,243,227,237,182,143,136,134,162,154,173,155,167,129,157,127,191,234,173,124,9,19,111,221,229,187,103,50,11,12,33,136,233,236,229,171,88,42,40,167,238,242,250,176,101,14,8,97,104,64,3,38,192,234,249,145,20,22,6,61,145,211,241,203,132,58,45,34,33,22,35,132,165,196,207,173,115,153,234,226,232,198,144,169,233,220,206,221,229,210,214,224,215,227,204,241,244,239,231,186,139,130,133,192,217,214,226,216,221,204,238,222,209,223,229,248,239,225,219,242,228,228,223,198,215,212,214,218,218,233,221,218,214,210,213,217,223,225,224,225,225,208,243,194,223,226,216,210,208,217,215,229,219,231,222,217,190,96,19,0,1,4,20,12,9,20,26,17,32,25,19,13,7,6,24,24,5,10,27,2,23,47,242,215,231,217,236,201,235,214,230,245,241,225,243,232,246,224,214,224,251,236,213,232,231,244,240,246,214,224,204,221,244,220,231,241,234,211,222,225,234,234,218,223,211,240,223,243,228,206,223,229,215,238,233,238,199,236,227,234,208,228,228,211,231,219,206,247,240,230,214,216,226,227,240,200,226,247,249,230,217,235,238,226,224,228,240,235,227,241,224,237,242,208,221,240,227,247,240,228,208,228,244,227,225,212,223,219,237,213,241,238,239,237,253,235,245,222,228,212,239,216,242,252,228,242,239,241,245,244,229,221,240,215,217,229,241,219,230,228,231,253,230,240,229,230,226,233,207,190,247,236,235,226,252,210,235,212,201,211,232,243,218,241,221,241,250,218,212,232,232,243,221,230,214,234,241,227,218,231,231,228,219,230,215,218,236,211,246,224,217,240,240,231,239,238,234,222,241,238,245,234,216,228,232,234,250,242,229,253,215,242,199,225,227,207,212,227,219,234,231,214,221,222,204,231,243,243,235,222,210,223,231,237,232,248,224,248,239,254,246,255,246,252,242,250,253,243,246,231,255,250,238,239,236,246,247,233,254,246,241,222,242,238,250,234,242,250,238,233,231,233,224,244,249,233,241,254,228,233,241,228,242,253,244,246,240,244,234,227,232,250,235,220,255,243,221,249,231,232,246,215,237,251,228,242,246,252,231,250,216,248,236,242,252,238,243,224,252,212,245,247,234,251,222,248,236,198,238,215,219,243,220,170,181,233,232,209,245,237,218,163,217,239,234,242,173,177,98,120,93,42,192,237,247,246,246,233,250,238,251,179,210,231,228,232,216,229,230,240,238,244,226,255,254,247,251,241,250,253,253,252,241,238,255,249,216,135,83,22,67,46,26,49,86,91,61,68,95,55,46,27,50,35,29,42,13,45,90,162,133,98,18,10,50,43,48,42,14,28,24,22,60,8,22,83,216,249,244,235,188,174,246,237,102,91,222,237,157,68,107,233,241,255,172,47,13,21,15,53,23,32,31,25,41,11,86,227,224,141,51,50,83,62,98,151,127,107,29,15,37,97,250,218,56,32,13,69,49,52,31,66,44,96,69,41,31,22,42,20,32,164,245,254,252,220,206,67,67,28,58,194,234,244,220,152,74,26,13,137,239,212,232,188,82,42,66,135,241,238,244,180,96,13,26,36,57,4,113,237,236,165,106,47,12,23,151,241,225,243,254,232,211,193,152,162,130,157,141,149,162,155,152,138,125,159,159,132,153,76,9,76,147,221,255,224,91,37,40,23,36,25,99,213,255,253,120,42,4,18,99,177,225,237,164,80,19,108,165,125,64,25,49,163,127,108,50,26,13,112,221,230,234,225,156,120,70,62,31,51,67,139,123,127,117,65,12,20,171,228,250,183,162,187,234,223,234,226,225,239,224,229,239,236,230,208,142,137,115,112,94,105,66,110,235,226,255,243,237,247,229,218,223,201,198,239,222,218,216,220,229,214,222,223,225,203,210,220,228,227,214,210,234,180,207,224,235,221,214,212,221,224,233,237,217,214,197,225,215,238,213,231,197,229,202,236,231,119,13,11,19,8,23,19,45,19,14,22,25,30,27,16,0,11,14,0,7,26,12,19,1,19,214,234,216,235,225,234,226,227,231,234,210,224,255,243,237,222,245,232,241,229,244,227,214,244,222,227,239,225,224,230,233,249,227,226,205,247,244,245,213,227,235,237,239,218,243,242,231,245,215,242,238,221,205,214,231,220,222,232,239,232,234,217,236,232,228,219,227,221,237,233,247,222,209,236,230,202,236,222,220,231,236,237,235,236,208,249,226,250,238,238,219,212,221,239,232,226,232,228,229,216,229,246,238,222,229,237,229,223,230,201,236,237,213,233,235,222,243,231,245,213,213,234,234,228,219,244,238,252,215,238,232,243,235,234,217,229,215,222,250,236,224,232,237,238,218,224,243,214,228,240,238,244,221,222,239,237,225,219,223,224,233,211,243,234,226,225,230,242,243,228,242,224,233,235,239,212,238,227,230,234,248,205,214,226,229,242,234,222,226,231,231,226,242,241,231,244,235,236,245,232,244,217,238,240,221,228,247,236,220,225,245,216,228,231,221,235,199,237,213,234,217,223,218,222,234,253,226,252,229,243,239,220,237,228,235,226,246,240,230,255,238,255,239,247,229,252,247,253,236,232,239,248,234,247,237,243,252,248,251,236,253,238,215,234,229,251,250,238,255,246,246,242,254,235,240,236,234,252,250,254,250,251,255,229,250,237,229,249,255,232,238,247,252,249,238,241,239,233,245,244,236,238,240,237,237,252,248,249,242,247,250,236,239,243,251,240,239,230,235,245,222,235,237,228,208,211,222,196,189,220,209,201,207,239,242,188,237,248,200,188,217,231,252,248,190,141,127,100,66,36,206,247,249,249,227,229,239,241,220,186,244,243,242,253,219,239,206,228,228,240,249,244,234,253,246,252,252,244,249,252,253,247,255,253,242,201,108,31,46,119,155,137,127,109,73,61,111,56,38,28,51,37,53,69,39,41,57,15,147,239,208,234,219,194,74,6,86,179,191,209,198,210,234,253,249,219,186,145,163,223,243,195,51,88,193,223,160,77,126,232,255,216,94,8,23,5,21,47,23,32,30,43,38,39,25,189,255,238,158,61,94,76,89,120,139,89,21,100,81,73,169,107,58,28,37,34,22,54,22,62,107,80,46,67,50,62,66,15,26,175,229,231,248,242,233,121,35,31,33,98,112,57,19,26,42,10,3,131,249,254,254,241,252,251,252,251,218,197,99,48,0,22,46,51,34,33,208,230,241,208,148,85,56,2,129,218,246,231,220,238,225,182,149,146,139,148,147,149,186,152,142,143,119,150,117,23,4,18,96,223,248,249,243,239,140,56,30,14,15,39,19,62,192,216,202,115,3,17,34,172,199,233,249,120,57,2,70,125,83,61,43,57,47,48,15,41,164,232,251,248,242,251,234,180,104,77,27,44,46,43,47,68,82,85,29,26,121,213,199,173,197,231,232,248,248,237,222,223,225,236,223,251,212,126,96,74,22,47,36,75,75,124,226,230,246,254,242,246,242,235,234,231,209,205,210,220,216,241,215,222,207,221,221,230,228,215,217,193,213,227,218,228,232,196,194,232,215,243,210,220,234,217,209,248,204,226,219,216,212,206,182,209,243,221,234,109,13,23,4,25,0,22,22,6,14,5,10,7,15,15,12,7,25,6,2,17,22,5,10,11,225,236,231,220,232,237,222,241,197,222,199,253,245,228,223,215,227,233,249,214,231,236,226,227,210,224,220,189,214,219,202,216,220,243,218,229,223,234,227,208,240,231,234,212,240,233,238,222,242,220,216,225,230,227,232,214,236,241,216,206,226,245,209,228,210,218,231,245,240,253,224,210,240,238,233,205,235,222,242,233,239,226,226,233,230,245,237,232,190,243,232,228,239,238,244,237,229,236,252,223,222,223,216,220,203,223,236,229,206,230,231,250,225,250,245,234,249,232,245,231,207,236,218,231,221,235,241,240,234,231,239,228,216,238,210,254,226,235,232,239,228,238,227,233,205,204,235,214,243,240,235,235,222,243,238,228,227,243,220,228,228,213,203,216,215,230,214,216,225,229,243,235,213,235,217,236,248,223,238,225,238,206,245,249,241,237,237,229,246,225,244,239,238,229,235,231,244,248,236,241,241,245,253,252,229,241,245,226,247,239,223,222,226,210,209,225,219,229,205,216,207,226,234,221,213,236,241,248,238,235,221,238,217,236,236,229,231,244,240,239,243,235,219,232,239,244,253,246,235,234,239,249,245,241,250,228,252,240,237,245,255,228,226,241,244,234,252,234,235,241,250,236,245,240,231,246,235,253,254,240,252,244,249,229,249,244,230,230,239,233,243,232,210,242,255,233,240,246,253,254,234,225,250,221,238,239,255,246,231,246,253,244,232,242,251,255,233,255,228,234,237,255,233,242,207,238,205,179,215,228,210,218,251,249,194,198,231,214,228,195,214,252,252,231,170,136,112,104,94,35,186,233,249,241,214,231,244,250,217,195,245,250,246,228,250,235,208,215,183,222,236,144,148,223,231,250,232,198,246,247,253,255,224,248,219,179,57,5,25,112,219,171,146,126,71,68,112,57,10,24,59,29,44,54,25,14,52,10,82,242,248,235,253,210,157,27,133,229,253,242,243,235,252,244,244,215,120,157,140,244,254,132,115,101,163,206,176,106,166,251,248,180,40,31,27,20,35,46,29,10,76,47,34,19,47,123,237,234,184,80,36,64,80,134,143,35,19,138,79,72,89,22,91,29,8,16,4,39,32,100,126,78,66,66,52,54,74,35,37,192,252,252,255,191,143,87,18,5,27,18,42,49,50,62,29,27,27,86,215,226,226,242,247,251,240,253,150,47,2,8,29,27,46,17,18,191,236,249,241,220,171,161,171,95,140,231,252,231,243,243,212,164,171,157,139,126,165,153,155,130,126,138,146,152,165,135,201,237,210,239,252,246,246,254,203,123,31,38,25,5,14,38,45,106,210,219,230,233,238,236,253,241,240,188,87,27,22,13,39,65,14,30,29,24,185,234,246,247,241,241,238,247,210,162,123,61,23,39,35,17,36,18,134,229,143,39,72,128,155,190,222,241,242,247,244,245,251,242,233,248,217,233,212,129,104,30,1,18,11,41,25,90,160,153,189,220,235,244,253,244,230,213,227,234,241,218,219,231,230,227,223,224,236,226,212,238,227,230,189,212,214,218,243,216,216,201,192,221,228,206,221,211,204,198,212,214,213,196,198,198,212,203,225,221,210,104,1,2,5,0,13,20,0,7,17,13,20,26,12,10,1,22,0,14,3,42,34,8,14,20,252,240,228,211,203,239,227,225,239,235,236,243,234,247,215,235,219,206,232,241,226,239,242,237,232,235,225,215,231,234,222,220,241,228,219,214,235,225,231,230,232,239,197,237,243,236,220,226,227,237,205,242,243,238,222,230,240,197,229,230,248,228,240,234,212,251,227,226,230,244,247,238,217,229,234,244,242,240,218,236,224,224,251,213,227,227,224,238,203,219,242,223,229,232,251,209,225,240,206,211,246,235,208,221,233,221,242,227,246,234,223,228,245,245,220,227,219,254,218,242,222,207,236,236,240,232,220,227,204,250,219,239,235,216,209,212,237,247,248,218,224,221,240,253,224,217,228,213,240,233,219,224,239,210,217,228,204,227,237,213,226,234,216,212,224,219,244,242,234,231,220,235,239,229,228,230,210,235,213,206,236,233,221,237,229,219,214,227,232,207,224,219,223,228,245,217,252,236,215,236,232,243,248,245,222,234,221,245,254,221,201,236,211,220,237,219,237,209,217,223,236,230,216,238,220,232,236,240,213,229,238,238,243,254,226,239,232,243,255,255,247,240,255,247,252,226,251,234,234,239,210,255,239,226,225,251,243,237,234,246,229,228,231,245,249,239,230,236,212,254,232,250,243,254,244,251,247,241,227,224,227,228,247,236,217,251,237,240,242,230,237,230,242,240,230,226,226,236,222,223,246,240,249,235,243,246,250,227,249,253,226,249,247,213,246,250,235,244,246,230,205,242,229,249,216,207,219,241,240,246,247,196,173,194,213,218,249,244,199,210,224,252,218,247,197,158,109,122,92,32,186,243,251,254,222,239,229,240,229,180,229,246,244,253,207,229,201,150,122,231,224,160,212,221,244,243,234,223,242,255,244,216,191,211,144,132,149,65,72,156,221,163,126,85,42,58,86,57,33,39,70,18,37,53,64,43,73,32,98,230,201,187,244,221,183,51,96,238,240,246,234,238,250,207,210,191,150,176,188,254,202,150,149,191,197,195,169,131,211,249,228,152,30,18,21,9,35,18,134,212,197,172,108,54,7,128,245,252,191,94,18,27,71,91,73,27,31,97,67,81,65,45,34,44,3,48,60,64,30,48,25,61,48,90,27,44,13,34,212,255,239,187,84,34,5,9,59,11,32,25,54,47,30,45,51,29,54,28,91,200,239,230,237,204,165,70,34,9,27,52,28,61,35,60,169,246,244,239,237,253,174,191,237,172,138,240,240,243,233,227,196,161,154,149,149,160,175,171,161,154,135,110,121,179,238,218,251,248,249,241,236,246,249,253,249,219,105,36,22,21,14,24,27,10,72,208,240,239,230,244,250,250,246,166,130,24,23,17,23,25,24,30,14,119,229,228,233,221,255,250,250,196,151,115,44,16,5,20,16,40,37,16,150,212,231,189,99,89,150,214,234,236,248,242,248,247,232,228,227,221,225,248,186,134,66,43,16,10,77,102,58,36,5,13,0,16,78,187,193,218,230,241,225,212,195,220,223,212,215,233,230,212,234,242,240,212,200,201,230,201,228,204,213,195,228,205,215,230,235,210,218,215,225,230,225,223,225,211,231,220,237,222,219,207,226,81,15,10,0,14,11,0,10,18,10,22,8,11,10,22,7,16,6,3,0,14,8,10,11,5,228,214,223,243,196,212,205,241,243,212,249,223,226,239,243,226,229,236,200,220,229,217,232,223,231,239,194,229,217,194,229,233,196,233,210,218,232,247,211,202,213,225,222,220,215,182,241,214,243,229,227,234,226,220,238,202,232,237,211,239,245,213,225,228,241,237,233,238,246,233,225,221,207,227,231,249,226,253,228,210,215,239,215,227,227,227,220,242,225,230,249,215,224,241,235,216,228,215,224,214,215,240,231,192,219,237,230,233,248,244,247,223,206,216,218,227,229,234,217,228,238,245,249,245,235,246,228,221,204,232,241,223,234,214,248,211,230,231,220,229,223,240,230,205,223,220,219,220,248,229,210,231,202,233,233,216,238,235,225,222,214,233,226,219,203,216,222,208,221,213,233,212,225,208,228,201,191,237,228,222,215,216,202,211,212,233,209,220,221,227,235,248,239,242,234,225,236,219,235,253,240,231,211,255,235,225,224,237,223,207,236,236,232,229,216,208,225,231,187,208,223,243,226,231,231,233,202,228,234,215,225,236,230,238,226,249,247,232,246,248,222,236,245,247,248,228,236,240,236,249,245,254,221,240,223,245,230,252,252,246,243,235,211,246,247,249,229,245,233,238,246,229,246,240,253,215,232,225,248,221,253,238,247,255,240,231,246,223,231,242,237,218,232,252,234,255,225,241,250,246,240,246,235,236,241,255,255,238,219,235,242,246,225,249,244,241,231,248,246,227,245,206,251,232,226,209,220,234,228,209,187,183,212,253,221,245,232,227,202,207,208,237,220,219,225,162,116,113,95,45,194,244,244,233,219,237,239,249,201,188,255,233,231,204,182,206,198,177,163,187,250,191,225,251,250,254,247,205,203,188,208,223,151,156,161,189,202,125,169,157,197,137,168,110,49,72,87,94,41,49,46,46,38,59,37,20,33,17,78,190,149,154,205,183,190,133,92,191,227,252,212,255,219,216,253,210,181,210,200,210,177,182,197,191,214,204,157,171,243,251,235,119,24,6,4,41,19,154,249,237,241,252,151,42,10,81,255,233,149,91,31,10,42,58,76,49,47,93,65,64,65,33,9,41,52,64,36,47,27,19,20,44,66,51,55,44,96,227,236,237,143,71,9,10,29,31,22,16,30,11,51,51,13,35,27,47,53,37,32,13,80,114,66,61,64,29,62,77,99,94,90,87,132,123,208,198,191,152,162,142,113,98,134,97,90,155,233,242,244,190,166,161,145,145,124,145,138,159,155,145,153,123,152,178,175,127,212,230,206,193,237,233,242,247,246,238,141,28,2,22,40,43,52,36,15,3,160,209,246,255,251,233,196,119,79,21,3,43,27,41,23,9,27,51,142,191,224,222,244,255,239,158,83,65,42,12,15,28,73,57,40,15,73,167,228,207,165,193,215,225,237,242,203,165,129,125,150,210,242,232,238,206,134,115,41,38,11,29,99,79,92,61,36,33,45,20,22,20,110,213,238,236,223,217,232,223,216,203,211,228,230,219,225,218,238,225,226,218,238,210,213,220,212,231,227,207,205,212,208,206,246,231,183,204,221,216,222,220,213,244,226,220,237,213,206,110,0,7,1,25,5,20,17,34,17,5,33,0,2,4,0,4,18,8,13,39,11,23,18,16,214,228,237,225,224,218,225,204,230,239,226,239,223,208,203,204,229,223,226,202,235,216,214,213,235,208,200,208,216,228,214,206,214,222,242,210,221,229,217,221,210,226,231,209,208,217,232,248,239,229,206,194,250,227,214,228,219,184,244,221,245,240,227,222,215,214,234,215,236,225,218,232,233,223,245,225,232,202,211,219,218,212,253,238,242,233,223,220,219,226,226,207,225,222,229,239,226,237,232,240,224,246,217,223,236,242,214,228,243,228,250,234,215,232,250,235,235,233,242,234,247,243,226,240,223,225,242,239,226,246,209,227,247,255,246,248,238,239,233,240,236,231,216,223,220,226,243,218,220,218,219,236,215,195,224,206,238,231,217,237,206,201,213,238,222,223,221,228,202,232,214,230,225,232,209,231,218,221,207,227,221,242,220,233,212,210,233,230,235,224,214,212,229,212,247,246,246,232,197,238,232,241,240,227,243,236,218,215,217,223,226,223,216,231,217,225,219,244,222,235,221,244,224,229,216,231,234,224,227,215,220,231,230,229,237,246,246,252,242,250,248,248,249,242,244,255,231,252,243,237,253,241,244,215,246,242,245,243,241,243,250,242,249,251,240,249,247,241,235,248,245,229,236,235,248,239,250,234,242,249,243,235,228,252,249,255,241,239,253,247,242,239,249,244,220,243,243,241,250,241,229,224,254,217,244,255,252,255,249,243,246,223,233,236,250,237,243,249,236,246,238,236,241,216,225,214,216,184,197,229,210,192,252,251,222,229,253,233,222,203,223,249,230,244,203,173,124,135,78,70,193,239,253,244,228,233,253,254,207,200,240,233,242,214,198,216,204,197,176,199,253,219,246,240,216,241,219,159,145,126,180,183,207,219,205,229,187,180,159,137,195,132,142,137,62,89,96,92,17,39,56,17,58,44,53,38,27,3,135,241,170,168,219,175,163,128,105,158,219,255,221,249,203,236,239,239,201,128,156,255,115,188,198,184,228,154,147,193,221,235,220,115,29,5,24,30,135,244,241,235,229,139,80,39,8,51,250,218,159,118,10,17,64,106,54,67,69,95,103,69,38,38,35,46,37,56,22,34,49,37,38,52,44,58,39,120,229,241,249,111,13,7,25,30,34,29,40,28,64,61,72,91,90,92,82,78,97,70,84,72,33,37,23,38,28,35,53,38,54,44,49,75,61,71,31,16,13,17,25,25,34,28,30,58,3,93,244,234,220,191,126,151,133,138,128,138,119,117,141,154,135,148,144,128,71,17,14,4,16,13,27,9,74,93,133,155,126,67,56,73,52,54,47,66,26,30,27,40,148,211,208,185,107,49,21,14,5,30,30,22,11,46,44,35,50,14,18,54,195,213,249,181,85,36,0,5,98,223,190,136,49,30,39,24,18,19,100,215,242,237,249,214,139,72,45,30,56,155,208,239,245,183,97,88,20,40,91,72,89,88,54,58,27,36,72,33,46,58,27,62,190,226,221,226,206,212,217,203,225,221,221,217,234,207,198,229,228,221,215,239,215,228,210,233,237,209,237,219,199,200,230,235,221,220,228,219,202,210,215,207,221,230,226,197,215,116,0,3,18,21,9,16,9,12,12,8,23,0,10,8,15,4,25,14,12,0,11,3,27,27,217,235,226,231,241,222,221,229,210,230,242,233,240,231,241,234,249,230,225,245,227,199,220,241,237,201,228,227,204,238,221,212,223,231,241,217,233,222,213,222,204,198,247,197,230,207,225,219,225,208,204,227,235,227,195,246,218,180,225,228,232,237,235,229,242,207,222,219,240,213,216,245,241,229,232,225,249,229,237,217,240,224,241,236,232,226,249,227,209,251,232,219,226,228,229,232,223,225,214,228,223,224,238,234,218,239,229,223,226,236,210,210,217,219,221,235,225,222,233,241,237,238,237,231,215,244,247,246,245,224,247,238,226,232,246,220,212,223,226,211,231,228,201,220,216,223,222,225,237,234,208,222,223,216,217,218,223,212,221,207,225,235,235,243,231,220,221,210,217,203,221,226,227,215,234,220,226,229,217,197,213,235,213,217,236,214,237,224,242,224,211,228,208,231,217,218,240,228,231,233,233,218,225,240,233,226,223,247,209,223,218,235,224,216,219,235,237,218,227,228,235,239,238,235,215,238,246,223,212,228,225,233,219,237,243,253,225,220,235,251,250,232,253,246,249,244,223,235,253,251,244,245,255,231,245,237,228,246,223,236,228,251,253,252,236,252,228,246,226,237,251,212,219,240,225,244,255,239,230,225,238,246,242,239,242,238,251,243,236,255,226,251,219,243,246,237,237,249,244,247,237,251,222,255,234,253,247,247,244,255,239,252,242,234,227,240,229,234,237,235,228,203,233,227,200,198,211,200,249,211,222,226,241,222,172,209,244,236,207,215,225,244,247,238,183,156,127,92,79,57,203,247,250,245,229,228,254,248,202,205,255,233,254,234,198,217,240,228,182,222,252,232,221,200,202,221,134,132,139,199,230,232,226,244,236,192,150,166,123,136,166,86,190,131,69,81,92,71,17,44,63,57,54,22,50,37,46,27,162,243,223,246,233,187,196,138,78,84,186,221,251,193,178,244,242,206,158,131,215,209,124,166,193,173,203,149,138,183,248,246,242,146,17,0,33,3,141,245,245,231,85,0,0,14,21,127,239,232,171,76,8,32,37,52,57,32,65,51,51,23,20,25,41,46,48,10,38,49,40,24,35,22,38,49,32,152,198,196,70,17,14,38,47,53,45,64,115,135,113,130,127,104,126,97,105,82,33,83,66,74,37,60,52,42,67,53,36,71,65,67,62,75,63,81,67,57,52,55,57,86,64,63,101,66,38,119,224,222,198,174,141,152,147,170,124,134,161,133,155,126,138,129,148,142,77,9,10,36,65,29,23,17,28,15,38,49,44,33,22,30,10,47,53,56,75,38,72,31,32,57,26,37,36,69,64,42,32,26,43,40,18,37,17,24,38,15,27,49,43,121,227,180,153,43,31,28,55,121,209,195,140,70,39,31,28,8,63,158,246,226,236,185,133,72,49,27,20,160,242,238,222,121,103,22,39,123,168,127,67,32,40,18,19,16,46,11,29,24,44,19,148,196,228,222,233,215,252,210,231,223,222,221,210,219,212,226,223,219,234,227,222,211,217,206,223,225,199,242,212,223,217,216,240,205,224,223,224,222,212,228,225,212,213,200,225,120,5,3,7,3,26,20,21,20,2,11,13,8,21,5,5,15,1,14,8,9,25,17,29,9,217,240,229,196,254,219,218,224,212,225,231,219,226,233,224,221,233,226,214,197,242,233,246,216,178,218,206,214,212,236,216,189,202,239,227,240,199,215,185,218,215,185,226,235,231,215,221,232,197,249,239,229,216,208,225,207,238,218,221,221,231,232,220,216,242,211,226,225,242,236,222,205,243,214,208,217,231,228,227,228,229,229,241,223,248,242,210,226,245,232,235,219,234,235,225,223,239,224,206,235,220,219,224,222,232,233,230,225,245,236,229,226,244,243,233,221,227,234,217,239,247,220,232,207,218,232,251,247,230,239,240,233,239,239,212,203,238,231,216,249,220,207,227,221,241,229,242,231,223,232,221,219,232,218,210,200,231,232,245,233,242,213,220,213,208,244,202,232,228,208,230,209,219,216,180,223,214,219,218,206,233,199,214,229,202,224,219,237,207,235,224,230,213,230,227,237,228,230,231,230,227,230,209,233,213,252,232,245,243,229,234,253,228,227,238,215,233,224,215,241,250,243,206,233,222,226,220,218,209,246,244,232,222,237,235,240,226,236,237,240,243,222,220,250,255,234,224,237,245,255,247,236,237,251,252,253,236,246,250,244,231,236,232,241,223,240,221,241,237,250,242,231,252,244,231,235,254,255,230,243,254,236,231,249,253,243,253,240,252,255,227,253,249,243,245,245,249,242,253,226,254,251,221,254,247,240,255,248,236,247,242,249,235,236,239,229,217,242,231,233,216,212,221,212,206,213,217,223,246,212,187,194,222,230,190,193,239,225,230,235,239,225,223,251,190,153,114,98,39,69,184,235,255,243,236,249,235,246,193,216,245,224,242,253,233,226,249,255,211,170,223,183,147,101,172,200,210,232,197,212,251,243,233,238,237,165,103,164,136,171,192,114,197,120,97,67,69,113,21,50,55,39,53,48,41,34,33,31,154,250,200,213,248,228,217,230,142,39,104,187,214,174,147,237,218,169,125,165,255,214,80,190,157,149,200,125,118,205,230,249,253,158,36,7,20,0,119,233,225,209,66,10,17,2,117,243,255,248,165,75,42,8,37,102,80,20,68,56,70,45,18,55,37,56,48,45,47,40,25,13,24,29,80,43,60,107,95,90,55,77,105,111,104,88,104,107,119,81,75,83,80,43,64,49,61,63,71,62,68,93,67,88,117,101,139,147,124,151,166,150,135,123,130,156,148,142,143,135,138,106,113,80,83,79,79,99,201,207,144,124,135,139,137,136,148,146,125,153,162,141,130,117,155,123,95,29,54,49,53,66,61,73,30,41,60,50,61,42,33,54,24,65,37,29,57,41,47,46,25,43,74,42,57,63,34,47,84,54,49,53,59,52,38,49,45,21,32,52,24,6,54,173,158,160,40,31,36,44,185,232,140,90,36,14,57,36,48,195,222,242,250,232,154,82,46,21,101,239,248,229,170,94,50,109,195,178,157,78,59,26,31,7,15,34,27,2,54,65,45,22,61,201,205,226,212,233,234,234,230,217,208,209,227,225,236,228,229,211,214,230,203,197,218,234,233,206,239,231,198,227,220,217,216,202,226,221,222,241,238,203,211,220,230,228,226,103,7,4,18,8,21,11,14,4,7,6,8,12,6,10,6,26,14,15,15,21,3,9,13,20,195,216,220,233,227,217,233,209,243,242,218,208,246,229,239,224,209,245,216,216,248,232,235,216,217,219,232,240,219,219,208,238,236,216,238,221,198,227,222,220,227,229,235,226,227,217,226,237,220,183,202,214,211,200,238,203,243,221,242,216,188,200,218,222,225,244,249,207,224,207,222,244,206,237,229,217,241,229,237,197,207,228,203,227,220,220,249,205,230,250,232,227,228,243,233,228,225,235,234,226,197,218,240,213,229,234,219,216,219,221,224,201,223,213,224,217,218,217,240,223,221,209,237,210,225,230,248,237,243,233,240,245,225,243,230,215,206,217,226,225,227,210,241,228,225,243,218,213,228,222,243,238,211,216,224,223,227,220,215,227,228,209,223,199,216,227,211,203,220,205,232,229,211,229,218,215,198,213,208,212,191,228,230,214,210,226,220,234,233,223,222,211,223,241,243,210,214,252,236,220,206,234,226,211,243,235,236,248,231,238,251,222,217,246,247,227,253,222,224,226,246,250,233,236,219,223,242,244,214,225,247,228,227,237,246,251,221,255,236,231,241,232,240,240,244,246,247,226,230,240,229,246,225,248,249,247,234,241,227,231,234,238,240,236,243,253,224,249,255,250,228,204,249,247,249,247,254,234,230,243,230,251,255,237,236,252,246,239,253,242,232,242,255,235,230,255,249,249,246,243,223,227,255,253,234,235,250,252,253,239,232,255,238,249,249,255,207,219,229,243,232,201,213,210,215,220,178,197,238,212,195,248,234,255,242,225,238,211,230,214,222,224,207,235,192,157,149,117,82,53,196,248,222,229,206,225,255,222,202,220,241,224,239,250,199,226,228,248,133,123,155,149,185,192,238,247,221,247,224,252,249,252,254,255,221,110,129,170,178,189,213,142,154,70,38,59,81,89,13,37,60,34,29,73,67,72,14,14,163,248,181,201,235,227,237,252,120,57,106,208,228,130,147,160,121,122,148,208,244,148,111,216,192,180,173,96,172,206,252,255,237,178,73,5,11,9,34,212,236,253,207,156,179,232,254,244,235,174,102,25,12,41,38,70,68,36,38,73,62,42,31,28,41,28,40,20,29,43,56,44,68,70,77,107,115,92,89,105,115,109,78,46,66,60,44,69,109,69,64,83,69,100,108,127,128,134,140,126,141,145,141,135,144,139,97,100,76,50,63,52,35,82,31,37,39,46,32,51,49,27,51,15,6,31,53,59,133,155,163,116,122,144,143,145,176,161,139,150,142,141,157,150,156,148,90,18,40,22,17,42,44,36,38,43,37,53,26,78,46,60,33,90,54,59,28,53,42,55,58,56,37,29,47,48,57,51,36,95,73,74,87,70,63,29,59,67,26,46,35,42,33,17,142,142,81,40,53,196,242,230,160,116,49,26,37,30,132,229,247,253,249,223,130,109,80,27,35,131,188,154,108,41,85,220,223,194,108,86,39,35,113,150,113,96,45,64,42,29,21,1,97,179,200,232,236,218,225,229,217,237,204,224,228,231,212,231,200,208,212,212,206,215,227,216,234,206,233,227,218,238,216,225,246,234,238,217,206,191,231,233,213,205,241,217,206,125,21,8,4,25,14,3,17,26,31,21,0,13,6,5,22,7,29,1,11,4,11,12,25,10,243,214,239,241,225,239,230,224,225,219,227,229,225,222,241,237,238,231,223,229,244,222,244,222,210,229,216,223,211,212,213,216,229,220,213,191,199,224,228,227,209,214,210,206,214,209,239,196,223,226,219,218,195,231,219,204,189,226,220,209,243,234,212,219,225,216,220,214,226,235,232,232,240,249,237,219,225,206,241,236,227,230,237,240,224,231,232,220,239,224,206,230,246,218,206,227,245,239,227,226,231,223,235,223,220,234,210,232,239,228,226,229,196,226,237,241,223,219,243,233,240,229,243,231,205,212,241,231,220,234,240,231,228,243,249,234,217,209,221,218,220,219,245,228,216,212,235,238,226,234,223,228,199,210,224,229,223,212,199,196,215,217,212,225,214,223,204,215,231,208,216,216,222,192,223,237,221,216,221,223,232,204,216,217,211,227,226,226,221,220,209,217,227,235,221,225,217,234,228,225,230,240,243,225,232,232,247,238,229,224,219,243,226,228,238,244,219,224,222,240,218,251,210,234,233,241,204,224,217,240,229,221,193,234,242,234,231,221,253,225,242,247,255,242,246,246,248,234,243,249,253,237,232,249,243,247,241,236,236,246,243,245,242,232,250,249,243,213,228,234,226,246,249,250,237,252,237,240,246,247,213,252,230,253,238,248,243,236,246,226,248,242,216,234,229,248,239,243,239,252,255,240,247,244,239,236,252,247,232,249,236,243,246,241,232,233,240,226,218,234,237,226,229,197,212,209,166,200,242,221,222,240,236,246,202,211,242,229,228,208,215,243,226,251,212,166,154,113,76,42,197,234,246,255,196,225,238,241,178,208,252,240,244,253,204,150,169,151,104,90,149,216,226,225,247,251,225,232,185,237,237,215,249,229,205,108,131,178,167,190,187,154,140,68,51,33,21,98,29,58,52,32,26,64,33,43,15,94,207,247,191,210,242,209,203,238,135,11,100,197,216,142,132,127,119,143,176,207,174,111,170,213,222,240,188,79,179,246,251,239,240,226,141,3,0,12,4,85,232,240,252,255,255,243,228,235,96,45,9,5,18,48,22,22,43,45,45,37,59,49,34,21,49,50,61,67,70,70,87,100,119,106,100,73,41,71,32,52,57,53,40,55,75,104,91,116,134,127,150,131,145,143,118,120,96,51,62,36,58,51,45,33,28,21,43,25,47,39,31,49,13,14,43,32,31,9,46,55,20,31,37,17,21,1,19,4,19,75,129,154,144,146,160,165,161,174,159,174,147,152,157,180,173,111,46,28,46,15,2,26,24,29,8,14,11,26,26,22,16,15,31,49,33,51,32,44,69,65,88,43,50,54,51,73,58,75,50,52,68,46,49,30,58,61,87,84,73,85,84,85,62,52,40,79,44,55,179,239,239,246,128,127,70,21,38,24,63,199,238,236,229,142,93,89,77,18,26,13,18,5,32,15,104,217,232,142,105,38,71,199,229,236,229,173,121,85,61,11,30,8,64,176,224,218,236,226,216,241,227,223,221,225,237,233,228,217,211,218,237,226,230,227,230,238,201,222,229,204,207,246,218,223,231,231,205,241,226,230,218,234,218,238,232,230,215,114,12,18,5,6,20,0,38,23,25,16,2,1,16,5,4,19,43,24,10,20,17,19,14,0,231,208,217,219,223,217,235,250,220,239,224,235,227,226,227,233,235,235,211,225,219,208,246,246,235,205,219,226,223,221,240,240,220,217,199,196,187,210,201,207,214,220,221,223,242,217,190,219,215,232,234,214,233,201,212,209,217,221,233,230,232,211,178,207,209,221,235,227,219,211,224,211,206,234,219,231,217,221,219,217,228,235,229,241,227,246,242,227,218,231,207,227,234,222,215,212,239,226,221,243,226,243,242,206,231,230,220,243,220,211,204,241,221,242,212,217,226,210,244,236,231,232,222,235,240,219,224,199,235,220,238,238,230,208,208,225,226,228,227,227,220,214,210,239,219,216,248,231,247,235,233,241,218,235,221,209,226,221,220,204,235,235,217,210,212,219,201,224,224,203,201,212,218,225,190,220,201,195,213,228,227,215,227,216,241,232,230,216,237,212,208,233,191,224,231,223,244,229,225,215,222,214,231,234,241,236,232,225,241,237,245,242,240,234,243,230,231,247,244,245,219,228,227,235,248,237,246,215,226,228,216,222,239,243,226,241,193,237,235,241,243,232,238,244,255,222,214,255,245,251,249,240,226,252,228,247,253,233,241,226,216,241,219,235,246,237,228,216,237,236,241,218,239,255,231,255,235,239,249,248,243,236,254,253,230,249,243,253,246,225,242,245,246,238,250,236,229,235,241,254,230,240,239,227,234,255,252,250,233,246,219,242,248,255,233,243,231,246,239,232,212,233,208,237,213,194,228,227,255,177,225,228,228,201,211,240,255,204,199,207,210,245,227,255,206,160,153,97,53,54,229,246,228,241,212,242,226,236,176,227,254,247,253,250,173,119,171,192,177,147,169,249,243,234,233,252,223,228,189,202,228,239,229,224,180,129,144,174,191,188,210,167,150,142,83,41,30,120,54,84,68,45,40,27,43,41,149,217,227,225,191,192,224,227,224,243,99,28,58,140,196,193,235,185,164,202,190,207,163,117,188,226,201,244,169,82,171,247,254,239,237,243,191,129,70,9,17,25,45,140,176,221,210,158,136,46,39,6,49,78,51,61,31,62,79,79,50,81,65,78,67,94,100,88,85,95,65,89,63,76,59,47,43,76,45,70,58,110,122,114,147,167,127,133,128,107,67,75,47,17,40,37,15,14,26,33,17,20,26,22,43,22,10,32,18,44,35,15,36,22,23,18,14,53,45,17,20,43,17,51,22,49,28,27,18,33,96,122,141,149,151,157,139,150,146,154,137,163,138,159,127,165,154,128,76,12,49,46,39,31,31,17,7,43,14,29,51,38,21,19,33,34,22,14,10,41,10,33,31,64,31,44,53,87,90,86,79,102,83,61,71,77,60,55,55,52,51,63,70,65,54,78,57,61,54,59,172,235,240,191,147,55,50,30,36,44,14,33,137,194,148,104,88,57,92,72,55,41,30,0,35,116,217,255,227,134,90,57,27,76,190,228,226,226,145,111,68,34,41,15,66,198,225,215,204,218,229,236,233,213,228,212,229,221,211,209,213,229,244,231,230,227,245,185,223,215,222,230,204,230,214,242,209,206,244,218,231,228,194,222,209,237,205,227,223,119,0,0,30,13,17,8,7,15,14,2,24,0,8,8,0,24,15,6,18,2,7,26,3,6,205,204,216,226,228,245,228,243,211,216,223,234,216,230,213,219,231,223,227,232,226,227,241,229,217,228,213,210,239,197,238,214,225,211,215,227,218,222,192,229,224,192,211,206,234,228,191,217,231,194,212,234,208,236,221,239,220,215,207,228,215,205,220,212,214,236,229,206,202,199,215,230,227,210,245,229,219,223,242,213,204,224,218,244,193,232,230,213,211,244,233,225,237,213,225,241,215,219,236,239,222,236,227,214,251,212,234,213,218,243,207,220,201,211,218,204,212,229,213,229,241,230,240,228,238,236,204,239,236,237,255,244,214,229,230,206,233,218,215,218,219,204,239,235,226,225,235,235,232,229,215,224,220,206,212,220,217,227,203,215,236,230,209,231,209,215,229,218,228,227,195,216,203,213,204,224,237,232,224,217,203,230,226,223,224,232,228,220,235,225,208,219,235,218,219,223,243,229,227,231,217,230,218,226,227,230,232,229,233,219,245,244,232,231,235,226,226,250,238,205,225,225,226,226,218,212,225,208,203,223,237,226,196,236,216,241,234,230,238,234,214,240,239,251,230,239,247,246,242,249,237,239,229,239,235,236,251,240,229,219,237,238,236,245,255,240,209,250,247,247,233,238,239,247,230,239,226,246,239,247,250,251,246,231,217,241,246,253,254,254,247,251,226,245,253,243,233,221,223,246,240,246,240,224,254,242,234,254,244,247,246,245,239,246,248,240,230,245,217,230,245,239,203,222,219,222,206,251,174,79,129,207,233,245,199,230,233,212,210,240,218,218,196,241,181,152,135,81,53,53,229,255,234,235,215,240,245,230,166,223,212,184,154,194,198,191,233,247,243,185,157,245,239,241,231,253,207,189,162,188,240,245,251,215,198,180,165,166,198,144,209,120,157,173,146,68,51,130,58,98,68,27,34,33,32,77,202,251,219,219,175,203,255,219,246,243,126,72,56,90,182,201,251,206,172,132,166,231,188,156,192,188,153,224,113,69,198,208,250,247,239,241,218,235,203,163,85,113,53,2,11,14,7,9,5,21,50,87,55,111,111,118,126,110,120,113,103,117,105,84,63,74,62,75,49,79,57,54,53,37,49,104,113,110,119,160,130,127,115,98,87,52,49,29,34,27,49,30,22,36,17,1,9,20,24,19,16,9,17,45,27,15,20,22,42,27,32,46,42,38,38,23,50,39,49,17,13,9,18,14,30,43,35,41,59,85,214,172,162,175,161,148,151,160,160,149,154,145,136,128,142,146,151,66,43,26,30,48,74,61,31,22,23,25,25,40,47,44,5,18,39,11,45,49,12,7,22,7,11,14,41,19,7,19,16,40,73,79,63,61,74,100,96,77,87,80,93,76,53,54,42,62,54,56,88,47,164,193,191,171,85,56,51,27,29,8,30,36,17,45,50,115,156,80,88,99,101,165,204,214,251,251,243,240,247,179,107,32,20,18,29,61,194,223,188,117,89,35,45,24,94,216,195,230,223,221,205,213,229,219,202,209,229,230,217,216,240,251,231,215,241,226,234,220,230,238,209,235,235,234,206,227,229,201,224,225,210,221,229,216,210,206,216,233,202,101,25,30,0,0,5,7,5,0,18,13,20,19,14,21,14,7,9,25,17,1,26,12,4,9,219,206,230,226,219,206,216,212,219,213,206,232,230,211,204,208,230,220,246,229,209,211,232,218,219,223,201,219,217,215,215,215,220,209,216,189,218,224,215,208,226,219,200,215,211,207,210,205,217,212,218,217,224,239,213,204,205,210,210,190,209,208,212,225,224,218,197,201,217,217,207,225,218,213,204,204,214,239,237,235,205,237,240,221,214,227,233,208,214,223,244,222,227,214,240,236,232,227,228,212,236,221,238,205,219,216,196,224,229,218,237,233,232,207,237,221,228,211,239,221,225,227,235,213,221,215,205,241,220,225,235,245,250,244,232,235,211,213,237,221,232,229,220,245,210,211,214,209,237,210,220,221,221,213,210,232,218,210,233,227,195,234,226,242,218,199,209,234,209,225,231,229,230,232,190,239,231,244,227,222,232,238,226,208,222,220,201,228,220,228,224,215,213,211,223,207,239,222,213,221,203,234,220,223,239,215,228,214,226,203,225,224,237,228,252,210,218,224,237,241,240,233,251,233,210,217,229,232,220,224,229,238,232,234,235,241,243,239,236,249,254,233,215,213,221,236,198,245,234,231,255,240,227,254,245,246,228,243,218,217,235,251,228,219,255,219,227,246,241,239,209,226,242,249,248,252,226,221,246,255,250,233,254,240,247,250,224,244,229,233,249,252,239,253,253,255,245,245,228,239,234,253,248,243,249,249,238,221,240,209,255,228,251,246,230,248,204,226,243,228,217,255,236,207,213,240,231,191,107,22,98,228,214,232,207,237,254,201,186,241,218,234,223,255,191,144,152,114,51,30,227,239,240,244,243,245,242,210,170,186,188,181,208,238,251,204,233,232,251,186,163,214,243,242,235,252,187,165,218,202,227,235,248,182,167,201,168,169,204,157,191,137,181,177,139,85,31,132,71,90,81,58,71,37,20,71,230,231,204,196,164,224,243,206,220,242,155,157,99,76,155,225,249,160,104,97,190,170,96,144,211,185,139,181,69,75,221,219,237,213,238,229,241,252,253,222,179,195,100,56,54,17,8,15,44,36,46,57,30,13,46,41,25,68,44,50,61,60,97,61,79,68,51,78,79,73,112,119,118,127,150,131,131,111,89,49,49,35,29,26,48,23,4,26,14,24,28,28,35,7,15,13,16,27,14,18,15,32,31,34,41,23,32,43,34,39,39,34,35,38,45,44,61,80,33,19,12,3,23,31,34,32,58,82,82,169,211,159,161,159,141,181,151,126,166,151,161,169,107,134,125,190,132,49,38,30,12,35,66,61,65,41,12,45,42,18,26,7,20,16,19,17,23,29,14,13,0,16,18,38,34,26,23,28,26,5,7,30,9,16,48,47,29,52,65,75,96,104,93,110,109,77,67,54,73,68,40,48,49,78,86,93,42,54,44,61,48,75,40,27,54,63,87,70,63,38,143,222,234,239,245,234,245,250,246,253,135,46,18,14,20,107,217,229,213,133,91,38,53,17,69,214,220,239,249,231,216,239,238,220,219,238,216,232,215,223,238,237,226,224,215,230,236,226,216,240,216,230,247,223,231,214,207,210,219,207,224,223,217,211,210,208,229,228,247,121,18,1,6,1,9,9,7,13,11,12,13,4,4,0,3,18,11,7,8,12,9,16,8,24,240,221,222,224,214,219,224,220,232,223,222,224,235,201,235,210,225,233,247,209,219,227,234,221,204,185,226,228,226,226,197,224,209,220,202,240,224,214,219,214,231,187,219,223,197,202,215,242,220,219,198,231,220,210,209,228,224,222,219,197,220,212,218,223,209,202,211,230,241,225,218,217,205,209,215,232,219,203,218,211,219,238,220,226,239,235,238,244,244,231,214,217,217,216,224,218,218,211,214,226,220,218,232,213,251,228,232,220,229,237,216,220,233,228,229,239,201,204,202,211,222,233,199,216,223,215,247,242,214,206,226,234,243,230,225,213,214,227,199,233,221,222,224,240,222,246,226,240,239,216,233,225,193,215,229,235,239,243,240,231,224,215,234,226,199,233,215,226,220,211,214,214,235,230,201,227,209,222,212,230,202,210,243,218,214,220,215,188,225,220,217,213,234,222,204,210,218,227,229,205,220,207,241,242,235,238,218,229,220,240,224,241,210,208,224,243,221,240,236,210,227,245,206,216,247,223,242,234,219,176,240,248,193,227,226,228,231,241,211,224,240,211,243,227,255,247,235,245,249,237,242,235,247,240,245,235,209,245,229,213,246,247,246,225,217,246,213,246,255,224,226,233,241,239,245,221,255,225,233,238,241,229,246,249,223,242,253,250,248,250,251,245,238,214,240,243,240,220,251,221,218,231,247,254,249,236,254,229,253,214,253,239,234,232,244,236,246,226,238,223,224,237,224,186,212,229,168,219,139,10,146,233,223,228,208,217,247,217,208,187,215,236,214,255,171,178,130,109,61,44,210,241,255,253,210,243,196,155,147,213,228,235,251,242,255,223,184,217,234,188,146,232,252,187,166,201,184,212,248,226,246,244,196,146,182,160,162,196,217,205,213,150,179,160,128,74,12,152,106,78,58,44,73,46,19,86,243,247,208,190,189,231,202,244,233,230,167,208,183,99,142,235,189,190,98,156,244,171,62,143,235,183,171,185,107,128,177,179,181,209,212,234,224,253,255,241,183,208,116,65,76,53,95,76,82,98,80,91,69,78,85,85,80,69,45,94,84,58,101,119,129,131,133,128,122,100,89,59,84,49,42,49,37,32,46,21,18,13,22,18,55,28,5,8,44,19,20,18,36,32,23,9,21,18,6,5,39,41,34,13,33,20,28,57,54,56,45,44,5,25,61,61,37,49,37,23,24,61,42,65,44,71,74,144,177,238,230,157,188,165,152,147,143,156,157,179,171,170,142,128,144,153,142,46,45,16,31,20,64,68,52,43,35,46,9,20,22,8,15,37,16,33,28,36,28,28,18,46,69,18,38,42,30,27,20,14,7,31,4,13,16,29,20,13,11,29,33,59,51,81,77,88,126,122,122,82,59,64,33,77,37,85,43,96,55,52,75,83,58,72,36,62,54,47,46,51,23,66,113,88,106,98,97,164,223,217,190,167,178,172,227,241,248,227,161,157,123,34,14,24,167,223,224,220,234,223,231,241,230,196,217,202,239,243,212,227,217,220,247,228,217,201,231,215,225,223,206,236,241,235,226,227,221,231,214,243,221,224,232,226,231,211,213,235,234,144,0,12,14,0,21,10,22,21,5,11,9,6,0,0,8,2,12,10,1,0,23,14,23,21,216,216,221,241,231,230,222,236,227,203,235,219,220,234,235,213,233,235,202,231,227,215,228,224,230,233,234,221,195,225,196,211,205,213,223,196,228,217,227,215,202,217,224,216,188,207,184,202,206,228,199,202,204,218,247,216,220,191,236,220,202,222,212,189,224,234,226,208,208,194,227,227,205,220,205,238,224,235,220,224,227,241,225,208,218,202,206,234,247,239,223,226,228,200,238,238,215,204,226,209,220,205,228,210,216,237,235,228,221,230,214,214,219,231,210,216,199,209,225,233,233,229,213,226,218,212,236,245,205,225,209,223,225,226,236,210,206,211,236,224,220,224,237,233,232,223,214,207,226,225,219,232,214,238,218,230,214,215,210,209,241,212,204,216,221,237,230,225,239,193,226,226,238,212,211,218,217,231,228,195,217,199,218,218,228,241,223,234,219,200,212,222,211,226,208,214,236,205,212,239,220,232,200,224,222,225,231,238,216,229,238,233,223,236,217,236,236,219,234,217,229,224,221,215,212,218,221,216,230,238,206,224,231,244,201,247,231,232,237,213,236,219,239,248,244,236,242,230,211,249,236,223,252,244,233,254,240,207,201,236,233,246,248,238,227,230,240,252,239,251,234,211,242,233,223,245,251,233,235,245,247,213,248,252,249,233,234,221,242,247,239,255,235,253,230,247,242,237,233,242,211,229,241,255,254,227,223,217,238,246,243,244,224,222,231,243,234,251,236,235,233,241,219,220,207,226,236,253,154,51,160,224,216,218,219,217,249,216,200,193,224,184,200,246,207,153,120,117,63,58,241,243,235,208,161,187,191,205,195,197,218,226,253,245,255,189,174,210,206,130,129,211,209,190,196,235,206,215,249,222,252,240,170,179,173,148,171,190,195,225,249,144,173,151,152,91,0,119,69,75,61,23,58,46,21,74,226,247,236,170,179,239,242,227,217,244,179,244,210,107,148,227,188,152,121,197,243,189,100,176,253,194,232,217,109,194,223,215,204,199,190,168,171,231,235,181,122,79,34,25,26,17,4,2,12,17,67,48,65,88,85,64,77,85,86,78,48,76,60,75,60,67,87,39,32,47,48,71,93,22,7,20,31,33,13,1,3,9,13,36,37,29,35,26,27,16,16,51,23,56,33,6,43,13,25,6,31,53,22,13,44,47,43,36,55,53,10,38,56,55,41,61,59,60,43,22,26,43,65,70,60,44,132,156,171,202,214,177,150,137,177,158,178,175,139,185,175,162,161,143,155,187,103,64,49,50,20,18,30,54,61,58,56,55,34,27,22,37,26,12,34,10,37,37,33,29,0,12,49,20,45,35,18,29,56,32,48,37,35,22,16,29,24,6,21,25,36,47,27,29,28,40,46,75,49,102,77,100,92,90,56,76,49,51,57,63,88,76,80,83,82,107,79,60,43,47,64,64,32,52,35,39,46,38,30,99,204,227,237,239,246,248,230,177,115,103,58,33,31,55,218,224,228,205,210,249,233,219,233,200,221,208,229,222,233,237,239,234,240,239,216,219,239,221,228,216,223,193,242,228,229,251,231,212,245,227,236,241,224,231,209,239,197,210,239,106,14,4,1,16,26,7,1,3,11,12,0,43,16,12,3,9,27,1,37,4,21,12,11,4,204,227,230,225,238,205,193,241,210,220,218,216,235,205,227,225,214,234,240,214,207,214,233,233,236,221,227,234,208,224,248,213,226,207,215,211,222,216,199,212,200,227,206,215,201,199,235,206,199,193,230,250,241,209,227,211,212,215,237,219,220,210,221,223,223,210,229,203,221,187,227,208,234,223,234,231,230,234,222,209,217,204,201,197,247,239,238,224,221,237,235,224,240,234,189,205,226,229,241,205,230,214,203,230,221,224,239,229,210,243,223,211,218,209,200,230,215,231,223,220,237,215,224,222,231,235,236,225,207,219,229,206,223,219,210,239,199,242,222,211,247,194,206,242,232,213,229,233,215,208,228,222,242,234,232,221,226,252,233,225,214,208,223,215,206,222,213,195,217,205,229,251,233,215,208,221,195,240,211,212,235,213,213,224,239,200,236,220,220,217,215,209,223,211,215,235,211,210,246,231,174,227,247,222,231,225,242,205,228,192,231,239,214,237,195,219,212,222,224,221,233,236,235,215,232,243,219,239,213,237,231,233,239,243,244,233,228,232,225,232,235,242,249,228,237,193,247,221,239,243,245,247,229,204,244,242,236,227,198,250,241,240,235,238,234,247,241,234,247,255,242,223,220,239,229,236,238,225,247,234,233,236,249,255,255,222,240,227,248,224,242,238,246,221,252,244,231,234,244,247,254,255,237,240,249,244,248,249,250,245,239,219,252,234,240,238,246,230,230,236,245,244,217,211,214,231,223,253,113,60,201,249,238,236,229,247,243,210,220,218,214,217,226,252,198,179,111,104,63,71,170,195,175,177,188,238,234,221,209,221,242,235,231,244,226,242,181,234,232,164,114,196,243,254,242,250,203,209,235,184,188,229,166,173,161,181,204,186,202,218,222,116,151,216,157,109,36,80,66,86,74,49,32,42,19,52,242,247,220,152,160,216,238,209,216,247,156,186,222,112,118,177,127,196,170,220,245,114,147,226,248,172,213,202,134,198,211,192,233,174,186,194,188,198,196,124,95,65,67,92,59,40,39,18,15,8,33,47,23,25,24,19,35,33,6,17,18,38,13,16,34,47,44,33,45,42,110,138,73,34,36,11,0,7,10,21,8,38,9,18,47,51,46,21,7,34,22,41,54,58,59,49,54,35,7,12,39,22,18,19,41,47,38,55,43,27,25,27,53,46,58,46,67,29,26,26,26,76,77,63,57,84,138,129,224,253,191,156,143,139,143,154,155,152,177,195,169,146,146,157,191,166,113,82,72,87,59,34,55,56,60,71,47,29,25,40,34,51,18,15,33,20,29,32,43,65,10,11,29,25,24,30,21,44,27,17,53,38,32,42,44,21,14,33,15,2,45,47,36,30,33,37,22,2,38,19,49,29,42,52,100,120,96,89,85,83,48,67,40,44,61,67,71,72,76,68,54,81,68,48,53,24,66,32,43,35,4,104,208,219,248,207,186,96,100,58,13,17,66,219,234,226,246,241,224,229,240,228,238,218,235,222,207,240,224,215,251,226,189,224,229,241,229,244,224,214,223,247,235,222,223,238,237,223,234,221,219,232,237,215,217,209,236,218,214,148,12,3,11,6,4,3,6,8,8,11,18,2,1,16,0,12,22,8,15,3,6,11,7,0,193,206,196,214,214,212,245,226,205,204,217,216,205,229,199,220,206,217,223,211,232,222,236,226,223,220,219,225,228,224,242,229,218,236,205,197,223,194,199,203,217,210,212,204,237,196,232,207,196,187,204,219,210,221,217,205,220,215,188,218,220,211,202,213,223,212,216,227,204,216,194,228,221,221,232,234,207,219,216,196,231,219,197,213,223,212,236,226,229,227,216,211,232,216,204,241,233,247,200,234,216,212,230,230,221,246,246,220,233,192,217,213,241,241,232,239,207,214,241,218,222,201,229,235,218,228,216,226,214,232,243,220,227,218,232,204,228,236,248,202,225,216,197,222,209,213,225,223,212,204,220,216,214,216,215,229,211,215,229,206,229,230,198,224,229,239,234,222,220,242,236,232,231,224,236,215,210,220,219,218,222,223,235,220,224,226,194,220,215,211,223,230,196,214,243,222,203,215,236,204,220,223,229,199,228,218,204,251,200,220,210,212,209,240,223,215,220,229,235,219,219,210,228,241,231,214,223,238,226,220,231,235,214,227,231,225,227,254,242,217,231,216,224,234,238,217,232,219,250,233,237,236,217,234,220,224,244,214,182,219,224,220,240,236,214,248,251,248,237,230,221,247,232,237,227,234,232,230,245,223,247,242,235,234,232,241,227,250,251,228,249,229,246,231,254,242,238,217,240,247,217,244,225,232,235,234,250,246,241,241,255,252,235,235,232,242,242,244,221,222,254,221,198,201,211,225,193,226,104,78,236,239,245,229,224,223,244,213,239,206,208,201,244,250,173,159,159,123,124,54,165,212,224,235,212,238,237,196,235,209,240,235,238,208,255,192,169,200,252,227,141,241,234,235,231,167,138,200,203,184,201,212,172,174,210,185,218,206,172,201,206,109,174,176,174,138,50,101,53,109,66,77,56,54,4,71,224,240,200,113,180,193,238,209,209,238,189,179,212,124,112,88,106,158,211,233,161,106,206,253,214,145,199,167,118,221,204,209,202,221,229,194,209,228,158,159,166,195,212,237,238,227,238,240,247,215,165,74,61,30,63,37,41,42,40,35,23,39,44,26,68,58,37,16,50,124,159,151,81,12,20,16,30,2,12,15,38,63,35,27,15,30,37,37,24,17,12,13,32,71,42,38,43,56,54,23,42,7,25,30,60,41,45,19,21,11,33,71,63,37,52,45,37,25,5,32,27,46,33,58,57,60,98,170,245,242,152,150,139,157,172,158,159,151,204,174,175,132,166,152,173,131,72,48,96,77,58,19,30,88,88,48,74,37,21,19,64,33,19,8,37,17,34,18,38,43,42,38,26,10,31,26,40,38,63,26,27,38,31,27,12,6,20,15,28,53,44,25,24,37,39,15,25,32,20,14,25,9,30,45,59,35,23,54,98,98,96,119,96,82,82,72,76,76,63,43,63,90,37,48,84,58,90,81,70,64,14,23,4,11,54,70,52,56,55,30,56,175,233,239,227,253,246,230,251,222,234,237,227,240,238,216,199,213,240,225,234,215,227,243,248,250,237,204,232,222,218,221,195,241,220,211,239,234,221,230,217,233,235,223,245,214,214,227,228,103,20,7,24,3,17,5,16,3,15,0,10,1,18,17,1,5,16,19,14,10,4,14,8,0,221,185,201,221,219,223,221,234,229,225,190,207,223,215,239,223,219,223,229,223,217,211,216,214,236,221,226,226,225,197,210,211,228,207,224,199,204,221,226,224,215,209,201,203,215,222,192,230,218,202,185,210,204,209,207,218,186,210,219,201,209,218,205,219,204,195,195,225,213,231,212,185,189,197,230,214,221,246,220,211,215,225,210,220,214,211,233,220,237,225,222,224,200,213,222,215,223,213,211,203,218,231,221,217,209,204,224,215,198,195,221,219,196,211,223,197,217,225,222,210,206,228,226,234,225,221,212,211,219,240,196,215,218,241,213,230,219,206,243,246,236,231,225,231,228,199,239,211,196,224,220,213,210,210,210,212,215,215,217,224,240,188,212,236,200,228,212,234,224,232,218,243,229,231,235,235,207,236,220,215,190,210,239,204,200,225,208,210,211,223,215,230,233,218,235,209,214,217,240,222,203,214,215,230,220,202,211,212,218,218,200,224,217,204,216,224,228,228,236,223,211,226,210,223,239,195,230,223,255,234,215,245,226,249,233,232,224,248,223,231,239,217,224,221,225,249,232,212,237,244,215,230,209,236,228,224,229,230,201,219,241,225,231,220,247,243,224,244,232,232,232,220,233,251,230,252,209,247,239,238,247,234,226,244,224,243,241,232,230,242,234,253,254,246,241,254,233,241,207,207,243,250,244,231,229,226,252,253,245,237,239,246,240,238,250,234,227,228,221,215,244,236,204,187,171,188,216,241,96,128,235,253,252,187,189,240,241,204,211,223,232,217,247,251,170,127,120,116,81,61,220,250,242,227,228,246,196,208,218,218,223,233,246,215,236,224,178,170,225,185,126,204,234,204,165,157,184,223,236,199,203,199,170,196,198,178,198,183,140,172,182,151,175,171,102,143,140,141,115,111,84,31,62,56,33,84,195,243,205,139,184,225,222,221,223,239,168,151,205,131,114,54,112,195,206,172,130,137,218,236,215,158,176,109,115,219,169,194,224,205,204,231,224,224,172,137,242,231,254,227,244,228,250,253,242,216,153,134,138,137,120,112,116,98,97,74,104,96,71,82,100,65,42,20,73,157,160,148,68,37,23,6,37,4,11,50,82,47,53,32,22,38,44,54,50,24,42,21,53,30,62,39,54,50,45,49,49,33,35,69,74,64,41,40,51,68,66,31,40,35,39,22,40,30,24,51,62,61,35,61,49,33,66,162,239,220,139,132,125,172,157,156,159,163,173,176,133,129,148,142,193,119,50,16,50,57,94,46,33,71,90,94,51,26,23,33,24,30,55,18,30,21,31,30,52,39,39,45,14,37,38,13,56,22,21,17,10,20,24,26,28,19,46,45,35,45,49,63,41,27,5,43,13,14,20,23,64,39,35,24,23,49,32,22,41,51,49,70,93,98,142,152,168,164,141,118,102,84,85,54,71,87,68,68,58,69,72,82,57,48,40,68,73,65,45,10,102,211,251,247,217,232,217,233,235,232,217,222,234,251,238,223,240,235,227,225,210,234,220,222,235,229,213,239,220,202,245,245,242,249,232,236,242,229,234,202,232,234,230,243,230,220,227,227,199,121,12,4,21,1,2,24,1,5,21,20,28,30,7,15,10,19,1,1,38,7,12,3,0,16,241,189,234,211,206,200,214,211,219,220,230,223,206,207,235,213,217,213,226,207,228,220,213,230,216,215,211,220,218,237,225,212,219,208,229,218,200,228,203,196,228,237,217,219,224,210,211,225,199,213,224,219,233,220,218,207,221,218,208,203,217,232,213,227,210,206,200,228,233,226,218,203,215,222,203,208,237,199,236,221,228,202,212,217,177,203,244,194,211,219,208,222,211,230,200,196,207,206,201,221,222,226,214,213,203,242,204,200,201,237,200,209,209,220,221,205,222,221,200,194,244,207,240,221,231,207,177,239,219,232,222,220,223,211,235,208,235,239,201,223,225,230,220,227,221,197,211,224,236,230,212,223,223,233,235,231,209,190,213,236,227,229,216,225,238,194,232,238,196,215,245,214,236,193,220,221,227,228,235,238,205,226,197,216,230,216,217,217,241,237,200,217,215,206,220,221,208,209,215,188,224,238,237,224,234,217,228,242,223,221,244,239,227,228,230,189,221,233,238,208,234,232,220,214,227,237,233,219,215,223,223,219,239,222,233,239,238,221,214,245,215,215,213,206,223,236,228,239,237,237,214,241,227,229,207,229,225,222,215,222,243,226,235,213,246,236,250,211,226,232,231,241,239,231,244,250,242,226,255,236,248,234,214,246,240,238,241,245,230,251,230,233,249,238,235,244,228,234,246,248,226,241,247,236,238,243,239,255,237,245,236,222,235,239,255,229,247,239,231,237,254,234,216,179,165,214,238,233,96,137,217,224,241,203,206,253,229,192,234,236,198,225,221,224,146,117,105,79,71,63,218,221,252,226,205,249,182,212,200,193,225,231,247,246,242,232,200,166,176,152,121,141,190,228,225,156,198,249,242,240,189,190,184,215,197,121,198,190,158,170,178,134,170,125,75,203,201,153,136,103,70,32,31,53,5,67,243,242,196,105,170,216,234,204,210,234,199,151,196,229,169,85,112,155,195,212,107,142,223,251,215,172,198,108,173,240,237,192,239,213,212,217,218,227,150,214,236,238,238,234,227,226,208,148,115,83,102,123,124,118,116,122,114,127,121,116,153,93,111,155,140,64,14,36,112,139,135,142,53,34,29,18,7,15,6,32,68,41,49,30,20,15,24,87,67,39,25,34,54,42,75,61,54,42,57,47,53,37,32,64,68,46,48,48,50,64,31,61,44,45,26,21,41,56,32,67,73,80,56,71,63,44,95,212,204,197,135,127,156,155,148,169,162,175,206,147,122,139,172,152,172,117,41,53,37,18,37,26,43,58,58,52,50,6,23,36,22,40,38,30,15,9,60,33,6,16,32,29,27,25,28,43,32,41,22,27,25,16,13,36,38,56,43,36,56,76,65,55,26,22,31,12,23,33,43,32,36,32,33,7,4,18,22,20,36,28,21,36,97,70,74,86,116,133,141,125,168,170,171,148,124,140,129,129,99,106,86,86,95,67,66,41,33,11,18,19,111,229,240,240,237,235,233,217,236,239,231,233,249,215,242,220,252,233,231,217,225,222,241,197,219,232,239,248,235,228,246,232,238,234,202,226,234,228,217,249,225,225,194,205,234,221,225,210,206,119,8,0,12,8,9,20,0,1,52,9,18,12,7,0,14,19,4,10,16,41,10,22,6,3,219,187,205,230,225,209,236,211,238,223,241,209,227,211,207,231,217,199,222,211,183,202,208,224,242,227,204,181,228,233,217,205,219,208,204,205,204,226,237,196,215,200,204,184,215,217,211,189,227,223,216,203,207,222,228,195,201,219,231,220,227,229,201,198,234,191,222,217,212,207,232,218,213,207,205,212,209,220,223,212,228,197,198,223,198,200,189,191,211,227,212,204,198,205,242,229,230,217,222,229,224,230,197,205,205,222,231,215,223,221,208,211,199,225,218,218,212,236,219,219,215,230,182,236,201,193,224,238,212,242,224,214,217,217,216,238,223,222,218,219,229,242,234,238,220,206,214,207,203,225,229,234,208,210,239,205,227,235,228,248,235,219,213,218,220,237,243,226,233,205,225,225,231,220,234,233,229,231,221,231,203,230,196,224,210,212,226,227,234,221,227,238,225,235,229,232,206,221,206,215,228,210,213,225,220,226,217,222,225,221,215,226,213,201,210,205,223,228,217,221,228,240,215,228,226,210,228,213,227,212,210,248,240,212,222,223,217,239,237,227,239,232,245,232,247,240,219,246,216,232,249,234,227,229,254,217,236,198,222,240,226,252,209,205,252,236,243,229,252,234,239,250,244,245,238,222,238,222,231,240,231,228,238,245,245,234,232,233,240,237,247,247,249,240,246,243,241,241,227,255,244,243,241,237,248,242,245,215,217,228,240,220,220,221,240,239,247,236,247,241,230,220,230,207,207,242,234,254,101,89,190,228,250,209,242,227,233,214,220,198,164,168,182,194,180,122,144,87,89,60,206,233,250,236,206,245,200,210,225,210,238,231,227,250,239,243,191,182,194,184,108,150,224,230,232,189,162,232,249,222,185,203,164,222,186,182,233,231,200,159,194,135,171,103,19,198,190,143,172,101,83,25,28,40,57,79,200,250,167,60,133,213,227,212,215,249,200,152,177,237,201,93,64,121,225,166,93,181,229,242,215,211,213,145,186,243,238,211,228,225,231,222,213,204,161,225,238,218,240,230,220,185,106,86,51,69,75,91,101,43,38,29,47,61,93,116,98,28,10,95,110,81,44,39,111,126,151,99,33,20,32,33,57,13,25,15,48,66,61,40,26,11,13,73,72,46,37,14,16,49,37,54,53,68,31,25,68,76,48,77,66,69,90,89,70,58,83,111,80,79,82,140,101,104,121,113,93,128,123,143,136,68,141,253,253,194,160,132,135,147,162,184,163,169,195,139,157,162,172,166,135,85,69,60,41,54,69,70,53,73,80,52,21,61,25,32,44,28,38,21,18,6,43,21,4,13,21,35,39,25,44,34,35,54,50,24,27,30,29,39,68,74,53,25,56,66,54,39,41,38,19,57,54,44,57,56,31,28,37,22,40,27,3,0,25,27,34,123,122,95,89,43,32,15,40,28,16,49,58,76,60,79,96,105,130,105,100,105,73,59,33,35,8,14,32,97,214,210,229,222,251,231,244,206,229,233,214,251,239,243,222,238,235,222,219,218,221,243,240,220,237,224,216,232,218,229,244,234,219,227,223,216,233,206,225,224,243,219,231,224,208,212,218,208,217,145,7,18,16,30,7,0,4,24,4,17,37,13,19,6,8,25,5,10,11,36,8,20,15,8,189,220,205,240,221,210,213,244,214,187,223,207,218,208,214,220,216,206,213,211,199,231,215,214,232,241,220,211,217,197,212,217,209,182,219,200,210,217,241,214,209,216,214,197,214,214,215,188,189,190,199,201,195,200,234,219,228,220,214,221,209,201,215,191,219,207,230,228,227,206,211,230,174,230,204,209,235,209,220,233,227,213,211,199,206,225,227,218,214,222,214,238,212,209,214,217,223,211,194,216,227,225,239,209,201,225,186,217,212,238,239,208,227,233,211,235,202,217,201,217,208,194,210,229,229,235,223,228,226,200,221,240,222,235,203,204,230,233,212,212,209,226,225,191,225,218,218,237,227,207,223,219,215,219,224,211,226,222,213,212,221,199,243,222,217,216,218,242,204,224,187,219,231,210,219,221,186,223,211,217,222,221,226,242,224,241,200,217,230,194,206,207,237,230,211,245,213,180,240,208,221,224,236,235,209,227,229,217,219,206,229,205,218,230,227,223,236,226,238,221,232,207,229,215,201,230,223,229,242,210,219,222,235,221,219,207,231,226,222,229,223,230,233,219,243,242,240,225,214,240,226,236,248,204,252,244,213,200,204,215,248,238,211,241,232,241,233,220,236,215,246,244,226,232,241,221,249,243,245,221,229,232,239,224,243,224,224,222,245,249,232,243,227,232,244,243,247,239,232,240,230,243,235,240,255,254,243,233,215,222,250,216,247,234,238,248,243,243,254,221,225,228,238,196,204,180,210,197,90,132,216,227,248,219,222,238,243,201,232,194,171,207,216,250,185,168,137,110,75,61,214,239,245,225,212,227,197,206,218,233,224,216,241,203,223,234,180,189,167,200,175,172,223,236,255,148,188,248,218,188,125,143,168,193,184,191,227,207,213,106,177,143,140,68,47,242,202,135,175,132,113,20,17,59,21,89,242,230,195,85,162,201,226,215,202,232,195,128,204,236,205,126,91,193,218,145,62,181,241,189,201,193,168,119,210,224,234,217,215,220,224,213,241,222,136,212,214,212,206,223,210,131,93,73,67,67,77,44,28,26,35,50,46,20,76,93,86,58,1,58,106,132,84,55,130,109,154,113,40,36,26,84,56,50,46,58,57,45,69,20,0,15,14,52,46,13,32,19,31,41,46,58,72,89,81,100,131,131,126,109,145,114,120,105,111,101,114,120,89,122,95,93,103,112,114,141,125,111,106,142,122,78,191,237,229,182,143,154,144,147,151,182,198,194,174,154,144,138,157,214,136,96,61,52,57,47,46,61,81,57,55,73,62,62,72,52,67,42,59,49,50,52,35,53,27,48,31,66,55,51,48,52,15,21,26,48,52,20,51,36,33,51,62,61,25,21,41,45,38,35,27,49,33,43,16,34,39,25,31,50,10,3,2,19,15,37,86,133,115,127,96,58,46,14,54,39,32,26,15,51,18,16,30,51,31,40,74,45,31,72,78,110,133,189,238,245,251,245,239,223,212,218,237,230,234,215,236,220,246,217,248,219,213,240,246,251,225,241,239,219,240,230,221,215,221,217,232,225,242,238,231,227,231,255,235,230,225,244,245,230,230,235,225,233,238,102,30,18,10,0,10,5,13,0,6,18,22,11,0,11,5,7,13,13,7,8,11,12,33,27,216,220,195,198,224,198,237,205,209,208,200,205,212,207,201,213,222,196,223,227,205,203,203,216,246,221,205,212,218,194,206,206,225,209,202,213,211,214,209,192,226,202,183,226,207,184,212,196,198,212,206,208,206,190,203,216,213,206,237,194,195,218,196,223,215,201,205,208,221,218,219,210,209,215,204,209,228,222,213,208,206,220,228,215,196,229,213,213,207,225,219,218,238,218,230,237,216,215,213,213,235,205,212,209,211,216,234,203,218,200,196,195,207,203,213,213,218,215,209,209,201,212,211,211,214,229,166,221,227,212,214,225,204,211,190,221,213,219,211,219,214,212,208,231,205,199,209,219,203,220,210,233,226,224,220,231,236,236,212,207,215,238,203,214,219,217,208,195,195,215,215,211,209,221,215,195,212,214,223,202,226,211,218,232,201,201,187,210,216,204,204,231,242,222,214,244,232,222,210,230,221,222,209,212,206,207,220,213,231,241,232,222,227,224,209,227,222,200,217,232,220,238,243,207,220,210,228,217,211,206,214,242,237,219,248,225,231,255,227,233,248,227,242,243,234,234,236,254,238,217,237,223,233,236,218,198,194,216,232,238,246,225,226,244,250,254,216,237,250,249,233,241,247,250,243,238,249,227,226,218,244,247,214,232,220,239,247,232,209,222,231,221,220,255,243,229,254,233,236,236,243,242,232,241,235,243,233,231,233,240,234,240,246,228,254,252,208,249,236,241,238,212,204,155,200,175,240,234,111,144,248,228,238,199,195,218,196,174,220,215,212,221,248,234,172,187,167,117,90,60,211,247,232,215,221,217,182,217,249,204,235,211,218,218,223,233,200,195,156,157,205,202,218,239,225,146,165,218,181,148,112,203,211,219,160,196,200,188,217,143,179,141,135,55,65,248,172,66,136,148,137,62,35,36,18,69,243,238,188,120,184,203,245,216,215,247,192,123,195,210,142,81,108,185,173,121,152,190,223,196,180,169,115,157,204,223,235,220,204,227,244,234,218,194,170,228,220,208,205,223,180,96,111,64,69,93,43,30,33,62,97,95,108,88,55,115,82,47,23,36,94,125,86,102,167,127,144,57,9,27,6,31,52,64,60,54,60,46,41,24,11,11,28,52,82,55,80,110,111,124,129,139,142,123,133,142,135,132,133,103,108,110,84,90,121,88,109,114,92,103,114,126,115,120,119,121,90,120,74,98,50,103,247,240,253,168,138,150,148,166,158,163,179,183,146,142,144,170,173,174,100,65,32,42,53,38,40,41,74,50,63,67,53,58,72,38,53,28,56,44,33,43,51,63,53,73,37,72,63,54,73,40,46,51,47,46,37,39,37,34,55,44,50,54,77,66,31,31,34,58,68,62,19,14,41,16,45,46,58,22,38,33,21,17,21,22,50,98,121,103,65,60,21,21,75,77,63,56,71,71,69,52,47,60,69,37,52,90,159,210,245,239,238,250,234,218,251,231,230,229,236,229,233,236,235,231,250,210,225,249,242,220,243,233,245,228,234,237,205,202,224,221,241,238,244,210,247,222,205,231,225,237,225,228,210,248,246,236,221,227,212,220,220,211,225,110,1,8,15,4,22,3,9,9,1,20,1,4,0,12,5,9,10,4,5,29,6,18,9,13,212,207,215,199,212,216,209,220,215,233,232,200,200,234,202,208,210,225,205,227,211,191,226,203,208,213,212,238,238,231,225,216,203,209,196,194,200,199,225,195,201,222,213,212,217,206,182,227,209,222,200,240,201,203,203,242,202,219,218,212,194,216,218,204,205,208,202,236,208,209,208,216,223,193,200,231,211,225,210,235,205,207,205,192,206,240,217,219,210,235,210,228,215,219,217,195,223,232,217,238,211,229,203,222,202,215,230,240,199,218,219,203,211,194,226,200,221,191,223,208,224,216,241,189,230,207,220,230,218,236,226,221,214,200,223,231,211,230,208,220,223,204,211,236,222,199,232,238,180,215,218,204,212,215,214,206,206,196,202,233,239,218,223,191,217,218,204,214,219,203,210,212,213,211,188,209,215,200,211,211,222,201,212,213,203,195,206,183,217,227,206,235,217,217,248,200,223,221,207,220,214,228,200,239,221,201,205,223,191,208,217,228,220,224,211,218,223,214,212,219,229,209,223,218,229,243,211,231,219,221,227,209,207,239,222,211,213,222,216,221,248,213,224,251,218,227,234,229,225,234,210,234,211,229,234,213,224,240,238,238,229,229,215,223,231,241,223,234,246,238,236,219,250,238,229,219,248,227,235,241,232,247,249,232,238,229,233,232,223,236,220,227,240,251,236,237,220,239,242,234,227,244,239,246,240,255,249,204,230,238,236,247,245,230,221,214,237,234,220,236,242,230,242,196,199,224,252,228,89,141,241,196,206,190,190,221,241,218,232,214,181,228,245,252,176,166,135,119,65,70,220,247,247,225,239,201,189,255,220,222,243,249,246,243,234,211,203,244,179,128,192,170,199,244,215,114,170,213,188,206,211,193,194,217,177,163,203,201,186,184,228,164,94,63,128,237,159,48,113,128,199,143,41,56,4,49,202,246,169,151,171,202,238,202,199,236,170,130,218,153,135,117,91,175,180,137,142,234,205,189,199,164,118,149,248,237,222,208,231,235,200,205,206,167,188,219,208,245,235,253,158,91,119,84,60,66,57,47,45,125,110,129,119,98,102,117,104,89,40,33,109,146,84,89,119,100,129,61,2,18,15,16,45,55,53,66,55,60,88,84,115,93,165,155,126,152,119,104,137,137,125,108,116,117,135,106,127,109,113,90,81,77,88,121,86,63,61,46,61,59,62,14,39,52,64,22,33,73,26,33,9,123,250,239,235,145,138,155,131,153,167,185,207,200,149,139,151,166,170,174,84,41,20,19,28,44,25,19,31,48,17,40,28,32,44,62,28,24,64,49,40,65,51,58,41,58,38,65,48,56,58,57,57,63,74,53,74,96,63,57,51,58,41,60,28,30,18,41,19,46,37,56,20,19,12,32,52,63,54,52,21,10,16,35,65,48,39,111,130,125,85,40,15,26,115,124,154,145,151,143,104,120,129,120,170,162,154,141,168,250,241,229,235,236,221,239,219,232,214,222,231,230,243,219,232,225,250,231,230,226,216,219,242,239,242,231,228,245,227,204,237,239,218,228,224,218,239,215,229,230,223,225,210,214,232,226,240,234,224,251,236,222,213,210,242,120,16,2,16,11,2,18,21,15,9,31,31,16,21,12,17,9,17,11,10,20,6,18,24,8,208,212,204,226,214,219,216,201,221,209,206,227,202,205,197,213,213,219,190,207,214,216,186,213,215,220,216,200,203,224,215,206,212,203,203,227,191,187,214,189,205,213,204,211,200,228,203,226,193,202,204,210,189,203,189,220,234,218,204,213,212,201,190,213,206,227,234,206,208,212,205,204,217,206,203,224,195,214,202,199,213,200,209,217,194,194,226,212,227,209,212,216,215,228,209,215,196,219,222,230,186,205,223,221,213,219,213,222,225,201,231,228,219,211,221,207,208,192,225,217,211,202,233,231,235,194,216,207,208,227,214,216,231,214,214,225,225,224,221,232,223,237,194,213,239,212,211,205,231,204,214,219,209,204,189,213,229,222,211,208,219,223,219,227,214,221,205,222,207,210,229,217,237,196,213,215,202,226,221,189,218,208,205,220,209,205,220,210,239,206,219,236,196,223,207,214,223,208,242,232,216,199,207,216,216,233,234,210,237,219,207,216,240,239,205,215,240,238,233,207,209,230,197,211,188,231,227,243,216,207,218,220,218,214,239,235,216,221,221,216,214,203,233,228,227,222,229,220,216,234,224,237,231,252,224,218,211,222,210,228,237,225,248,245,245,238,230,240,230,206,239,213,242,220,247,230,245,240,240,200,213,223,254,228,233,232,219,226,245,252,239,234,214,230,249,242,246,241,228,222,223,228,240,243,249,243,232,233,218,220,207,249,242,231,231,243,230,216,221,243,247,217,206,196,232,229,250,193,89,151,216,226,234,204,240,251,238,220,227,197,186,225,236,239,165,154,126,108,74,65,204,246,232,250,222,199,169,248,234,245,216,238,232,241,234,244,216,206,162,149,221,129,139,197,199,181,193,209,200,218,176,181,235,175,177,181,194,219,216,206,224,157,84,113,198,245,151,48,48,78,129,190,142,57,1,4,162,240,161,111,214,193,221,231,215,252,158,154,189,150,183,154,104,107,177,154,216,247,187,187,197,158,101,179,241,231,249,218,222,196,236,205,243,168,184,231,226,211,221,249,141,82,112,44,105,64,30,29,123,134,76,66,18,34,80,135,138,65,41,40,83,137,108,139,107,119,113,31,40,70,64,71,80,136,130,119,152,116,117,113,122,123,112,106,120,128,126,121,141,110,145,109,79,85,66,84,54,55,62,62,36,49,41,24,39,37,96,53,47,52,47,29,47,50,39,32,6,13,12,40,15,174,243,241,237,149,137,140,159,179,191,163,161,155,143,143,150,172,186,133,50,28,25,13,17,24,11,33,46,30,18,30,9,14,16,25,17,18,30,21,4,35,34,46,37,47,63,52,45,65,58,47,74,57,60,79,44,87,70,119,64,96,73,50,88,79,54,39,23,38,37,53,9,18,1,8,47,68,26,29,24,11,31,18,25,53,37,110,100,108,114,64,44,95,166,149,105,97,99,132,146,145,120,120,106,132,119,138,114,99,190,233,233,249,244,227,226,241,209,228,238,247,234,237,228,246,240,237,241,210,215,207,237,234,228,231,236,227,212,239,229,216,245,218,226,246,222,232,223,232,239,221,217,213,230,215,235,235,220,217,204,225,224,218,234,132,5,0,18,10,25,24,23,32,28,17,16,12,22,9,13,22,30,13,6,3,22,12,32,27,212,212,201,195,212,215,232,204,193,193,218,206,214,203,236,197,202,210,186,203,198,207,197,205,213,206,213,212,202,214,198,201,206,198,229,226,189,192,205,198,203,197,198,202,193,203,196,173,210,199,201,208,196,213,177,206,223,218,209,195,207,227,199,211,203,200,234,191,228,210,227,218,222,205,214,196,210,206,211,210,204,213,201,220,201,190,202,212,233,219,197,218,196,192,201,213,244,213,228,211,204,225,231,204,214,234,181,207,217,222,191,226,212,221,222,197,235,210,228,243,219,219,243,220,193,208,215,218,210,200,198,204,191,228,221,219,218,215,217,231,243,213,194,230,206,217,219,225,196,205,220,186,187,210,224,221,195,212,201,217,199,216,219,223,220,208,237,237,225,221,221,200,223,210,230,210,195,214,221,226,204,219,227,228,220,194,244,220,232,238,243,209,228,209,220,215,208,207,229,208,236,241,226,226,219,208,214,200,220,225,242,230,194,217,207,202,224,219,238,201,237,200,210,226,237,190,215,227,225,205,205,232,212,240,229,236,208,246,230,231,217,214,228,223,214,226,223,242,230,233,212,198,233,232,231,195,211,225,230,229,211,203,248,246,219,190,231,242,244,223,248,234,252,229,242,245,251,239,229,240,241,215,245,217,225,237,222,246,239,235,234,232,229,237,225,220,255,249,224,225,253,235,227,222,235,245,232,242,244,236,240,218,222,250,219,235,240,216,210,241,247,249,242,232,201,163,221,128,124,200,207,239,237,185,248,255,211,228,221,202,166,225,251,253,176,154,127,114,72,66,218,236,235,215,203,177,204,255,203,214,205,237,225,209,227,223,196,219,141,144,218,156,150,222,214,194,181,205,202,202,142,202,231,216,205,139,199,207,200,196,188,139,45,164,234,232,151,62,64,80,102,154,161,117,48,4,163,238,179,130,234,206,224,226,221,208,116,209,239,195,158,125,105,112,126,197,206,238,219,200,197,142,121,194,238,213,214,207,187,236,237,241,212,162,173,228,231,224,206,231,159,76,84,96,107,52,20,43,120,124,64,97,44,55,48,140,137,79,28,59,102,95,88,122,97,100,158,93,147,166,183,169,144,112,108,128,106,109,117,124,114,93,123,116,44,87,78,76,61,51,73,39,34,60,42,32,21,39,49,71,34,35,12,24,82,96,83,101,74,36,35,32,40,30,84,24,44,32,22,2,62,219,241,255,208,141,168,146,167,172,192,167,166,130,139,139,181,157,174,111,16,38,55,23,19,27,56,39,27,40,33,22,26,17,36,7,9,18,13,20,4,20,14,16,19,3,46,12,16,31,42,60,49,78,37,81,70,52,85,62,41,84,81,60,103,95,91,53,73,83,77,51,24,24,9,17,27,58,81,36,63,52,52,50,43,39,72,106,120,146,103,55,92,154,152,76,48,28,57,112,88,94,58,59,57,51,59,69,112,97,117,175,238,241,222,237,227,231,254,247,225,240,202,252,240,221,204,241,240,246,241,249,224,240,230,229,233,220,229,235,237,236,235,235,248,239,240,227,212,189,224,234,227,221,223,216,232,219,247,241,246,243,235,216,223,123,2,7,4,5,0,13,6,25,18,13,30,0,17,5,7,13,15,9,9,8,14,14,16,2,203,207,194,204,199,233,205,205,206,223,232,218,218,200,223,222,227,221,214,199,206,200,210,222,216,218,193,197,199,223,187,209,205,200,226,205,239,204,204,207,187,193,209,214,201,184,213,209,202,211,193,199,225,199,225,208,196,232,196,207,217,201,198,227,202,207,215,199,225,192,228,200,226,225,218,214,197,223,206,202,241,210,204,228,232,205,186,211,216,213,205,210,206,209,205,193,218,206,215,198,209,195,200,193,210,217,223,226,199,218,208,216,198,202,230,215,193,228,213,221,223,223,218,200,187,245,234,225,208,232,227,212,217,240,200,217,223,229,214,233,214,195,227,213,202,226,211,207,225,219,222,203,192,206,209,220,213,192,205,237,223,222,206,214,236,217,224,215,193,190,233,230,222,226,198,226,225,205,228,202,211,214,219,207,210,229,209,236,218,213,219,217,218,206,230,209,203,216,213,213,207,210,198,202,200,214,217,202,235,219,214,212,219,216,223,209,185,214,239,213,208,224,214,202,209,213,214,215,212,222,240,200,211,219,194,213,203,224,243,212,203,244,231,234,216,246,207,234,226,222,225,227,242,247,216,216,205,217,253,226,208,243,227,220,232,216,232,230,225,227,233,247,235,243,226,215,228,244,223,233,227,237,234,240,248,231,242,241,244,234,246,245,236,224,219,246,229,252,236,237,233,237,226,248,244,232,234,242,229,231,244,245,229,237,227,243,238,246,221,255,221,208,171,195,161,140,232,172,135,200,224,219,218,203,227,229,226,209,229,180,210,246,251,251,190,148,151,94,59,81,203,244,251,215,203,187,192,241,211,213,241,215,238,221,229,234,215,216,156,176,223,174,222,206,195,197,170,198,196,206,174,195,192,186,215,159,166,217,208,196,162,119,67,206,249,237,157,57,78,112,46,102,125,179,101,51,196,247,162,84,204,218,207,204,202,210,154,194,245,149,123,165,196,125,86,191,234,244,196,222,172,121,114,176,232,221,234,230,196,209,210,230,220,158,227,246,225,231,240,253,118,78,59,61,118,43,14,35,141,116,81,101,100,133,141,136,118,41,37,63,151,115,81,137,98,153,119,102,149,150,135,134,154,122,122,102,114,97,71,88,57,51,43,57,17,13,6,63,77,37,68,24,13,68,91,78,56,17,55,74,68,53,59,80,66,54,71,88,129,81,39,43,46,38,24,39,34,30,29,13,127,239,254,253,190,128,139,159,178,185,154,151,152,124,165,155,167,157,146,55,58,24,18,28,24,47,48,40,42,6,30,21,19,27,37,6,18,10,11,34,19,31,25,37,44,11,2,15,0,13,48,65,56,27,35,26,12,36,54,52,98,101,86,94,74,57,65,67,96,105,77,87,77,69,65,65,32,26,40,48,39,55,47,39,36,23,46,104,147,117,118,28,78,145,115,66,37,44,97,94,66,31,94,76,65,44,22,22,26,51,104,160,237,232,233,245,238,248,255,244,251,226,243,233,251,228,246,228,247,253,235,222,206,232,238,225,216,251,233,221,226,248,230,223,235,249,210,200,231,219,223,192,216,219,222,221,232,242,237,196,232,221,234,236,235,105,0,2,4,11,24,12,17,8,12,1,3,18,9,13,17,27,10,8,18,0,7,5,20,17,182,195,192,220,199,200,214,208,199,218,198,214,202,181,214,208,208,212,204,210,207,202,206,189,208,200,192,220,177,176,213,229,234,226,203,207,231,208,202,184,208,205,209,203,215,217,210,190,192,215,180,196,197,189,190,184,214,207,201,182,195,193,222,198,206,220,216,234,189,202,226,215,187,188,230,223,214,229,234,216,207,214,209,218,202,203,203,215,202,211,205,213,235,216,222,213,211,205,209,236,208,233,195,244,219,213,215,202,203,219,224,198,218,198,216,200,216,201,218,213,218,213,203,183,214,192,224,209,225,217,219,215,208,213,222,190,210,223,221,232,191,196,209,203,224,208,200,232,233,205,202,219,213,219,175,193,221,222,194,203,204,196,209,229,210,206,230,204,197,223,233,222,227,198,212,202,213,210,219,231,225,213,192,197,210,192,197,188,217,208,193,204,210,220,227,207,193,201,224,218,224,213,215,211,217,213,248,195,208,198,219,196,238,214,214,208,210,208,205,228,205,207,216,213,220,219,243,219,200,225,217,236,220,208,231,199,200,225,230,212,210,237,240,224,223,235,238,203,233,213,235,225,250,229,239,206,206,251,236,241,229,205,231,241,207,197,228,222,237,240,248,236,231,212,240,237,215,219,226,242,250,249,235,249,228,229,227,249,234,235,241,249,248,219,240,229,252,245,247,238,246,229,220,249,238,211,235,229,242,236,240,246,215,249,255,248,233,235,249,218,102,75,77,127,177,188,236,143,90,176,236,225,239,222,229,239,218,209,225,202,219,244,239,230,151,164,115,113,61,58,221,236,225,236,206,189,227,229,196,224,186,237,226,217,224,243,230,209,178,188,230,182,193,221,237,207,128,229,202,154,138,136,185,186,211,164,206,193,234,181,135,95,85,236,253,250,150,84,122,105,98,38,70,151,166,168,206,254,175,69,187,184,227,236,204,197,169,214,233,167,144,173,196,66,44,187,240,242,178,202,161,109,110,219,219,230,247,230,208,205,220,255,186,151,224,249,220,247,243,237,133,98,111,107,115,46,18,23,105,127,82,74,95,99,90,70,32,30,47,112,121,99,115,132,116,118,115,150,171,169,122,105,87,85,63,38,20,13,24,10,27,14,39,69,56,12,13,33,80,83,82,12,43,113,108,147,93,63,95,106,97,41,98,127,73,42,13,88,120,112,52,40,33,42,40,26,30,49,15,66,220,238,251,227,149,141,152,132,152,146,153,151,119,133,144,128,128,165,118,65,18,15,31,46,28,23,48,19,11,36,20,18,24,32,30,33,37,38,20,29,0,33,15,22,26,42,33,27,14,5,56,53,51,41,32,0,5,35,12,41,36,24,67,75,71,93,73,99,106,82,79,50,68,77,60,85,65,74,47,69,39,28,10,14,33,72,63,112,109,122,102,89,80,131,94,58,50,72,98,102,107,135,147,153,98,140,62,56,29,31,61,147,221,243,238,243,228,230,230,230,242,233,222,242,239,234,234,237,237,232,220,252,199,249,255,225,237,222,235,233,217,241,232,225,212,219,236,212,239,223,231,222,245,234,203,248,243,215,213,240,226,218,234,232,220,124,7,2,10,1,3,8,15,2,51,3,4,1,19,7,4,10,2,11,16,8,13,48,5,16,196,210,192,196,228,214,205,210,195,205,212,183,229,204,211,218,209,222,197,217,185,189,217,209,206,212,185,206,206,210,229,184,207,201,214,201,178,212,205,198,223,203,227,208,204,213,203,191,207,209,195,214,200,196,195,192,198,203,214,187,213,190,197,228,203,204,188,193,199,193,208,222,162,211,218,223,197,216,209,215,197,211,195,181,179,217,207,212,215,234,188,227,225,211,220,209,210,239,224,217,203,204,216,223,216,204,203,193,219,213,213,196,197,227,215,206,226,212,197,214,230,219,210,211,221,212,212,218,213,233,219,223,215,198,201,223,217,198,236,207,223,201,210,211,221,213,203,197,239,241,212,206,208,215,199,235,222,201,194,214,208,200,220,210,224,219,207,212,220,192,177,202,229,210,209,209,213,227,214,195,225,208,200,212,242,193,236,203,223,212,212,231,225,209,239,242,207,212,192,226,206,246,217,234,206,206,217,221,205,206,231,223,202,221,206,219,216,217,225,226,227,210,221,190,228,238,219,245,215,228,213,197,217,210,215,202,235,182,220,225,226,221,240,226,221,234,198,212,211,219,214,235,247,234,211,207,215,232,217,212,227,232,219,220,221,228,247,199,239,246,228,229,237,237,206,243,212,224,219,226,246,216,225,236,221,230,239,237,236,243,241,227,241,239,225,237,239,238,249,234,240,236,239,227,238,233,227,228,220,234,226,236,232,234,241,239,215,215,166,167,99,58,44,150,236,198,223,132,111,235,219,228,239,205,213,214,205,207,218,193,206,234,244,251,173,130,135,95,50,84,232,225,246,225,217,230,243,255,239,200,229,230,234,214,234,250,180,191,184,194,223,184,193,189,236,195,104,191,141,153,175,160,160,209,227,175,209,192,231,180,187,102,83,229,209,253,169,59,82,113,81,47,4,79,117,196,241,222,190,67,204,203,236,209,178,189,183,189,183,150,141,240,186,66,119,154,234,223,210,214,159,93,167,224,244,254,231,245,214,202,202,228,175,189,243,240,219,234,246,252,172,78,83,94,140,63,41,22,69,106,113,61,18,47,62,39,47,71,127,150,133,73,69,117,97,116,90,86,63,19,42,12,18,22,24,63,96,53,37,15,13,20,72,95,82,99,87,56,122,110,93,9,73,103,123,109,118,159,112,105,128,36,45,140,90,63,36,98,120,66,36,19,18,25,42,29,20,47,39,82,239,255,247,252,191,155,149,168,176,148,154,155,126,133,171,175,173,175,105,44,51,25,24,31,36,33,5,30,13,51,35,17,27,25,40,31,22,12,15,10,30,30,37,39,13,8,29,20,16,9,45,61,61,38,23,43,41,73,82,48,67,64,51,38,14,15,24,58,60,77,96,105,83,76,82,75,79,80,70,69,96,57,43,72,49,75,41,91,120,130,101,89,110,92,76,58,48,102,98,115,113,83,71,48,107,145,86,61,54,37,68,107,217,235,244,249,247,244,213,237,239,229,235,227,229,210,241,240,235,249,251,230,223,251,242,233,230,220,236,226,239,253,215,226,235,225,226,243,215,230,200,200,226,226,221,237,233,233,243,231,227,232,231,209,214,112,8,25,6,19,12,22,13,17,22,3,34,12,16,7,15,10,4,12,2,9,3,30,19,2,218,206,211,181,234,194,211,206,209,207,202,208,199,209,196,197,210,212,198,210,192,193,199,206,194,205,204,221,213,230,227,191,205,172,223,210,190,184,207,197,213,219,181,184,180,221,209,211,228,215,180,195,182,218,223,193,189,209,222,200,198,192,218,189,201,203,186,197,226,227,199,213,213,206,192,228,209,214,178,207,205,213,215,193,217,202,214,216,178,203,220,212,222,197,183,203,227,218,209,214,193,210,215,241,200,204,191,215,233,218,228,219,201,207,211,207,227,203,216,205,216,195,212,207,210,212,209,221,213,212,223,203,200,225,210,223,214,206,214,228,212,212,208,241,221,215,244,223,218,186,207,194,208,189,213,207,191,210,216,237,214,230,209,220,205,207,213,208,215,223,221,234,215,205,215,231,233,207,229,234,230,197,241,221,219,210,236,208,208,215,210,197,203,201,200,235,214,231,212,211,214,221,194,220,204,220,231,230,222,216,231,215,202,222,241,216,228,216,223,212,249,231,214,217,218,221,216,214,214,216,236,223,197,239,237,213,246,236,226,219,209,204,204,225,236,243,208,203,211,213,249,247,231,224,229,174,222,246,209,208,217,228,229,191,228,213,225,230,238,250,214,224,232,220,228,211,238,213,220,241,221,235,245,226,242,236,235,247,232,241,215,246,236,239,209,236,250,237,235,237,236,242,222,246,227,223,251,236,234,255,242,240,244,235,213,193,176,182,205,219,161,98,118,224,203,187,223,129,125,212,249,223,205,204,228,223,193,181,236,164,206,251,230,240,202,162,137,104,49,79,218,232,226,228,178,222,255,243,233,214,202,216,193,223,218,228,204,190,132,148,214,191,176,177,189,149,149,177,190,185,228,182,187,192,240,185,182,200,222,181,174,107,70,242,227,255,188,51,87,132,127,38,24,63,48,144,223,236,143,84,199,205,238,217,142,182,205,160,118,158,156,226,156,78,112,151,239,217,206,249,174,88,151,254,225,219,229,214,207,216,236,216,185,185,224,215,253,238,250,255,205,97,99,96,136,118,84,13,43,92,132,80,65,34,42,64,92,108,83,141,56,28,125,123,109,114,51,31,19,24,23,21,34,4,49,96,84,65,55,20,27,16,74,126,114,112,124,101,105,99,32,35,70,117,99,82,89,122,135,129,104,49,29,63,91,58,103,108,85,75,47,47,17,51,47,42,52,44,25,164,250,245,248,233,180,149,173,152,163,185,188,150,169,157,153,158,170,154,92,15,36,37,17,36,30,17,13,45,31,27,32,30,19,17,9,27,32,2,8,16,36,21,41,34,43,30,26,24,19,25,67,81,41,56,27,26,74,73,48,54,54,54,37,4,55,29,22,45,36,42,20,45,41,48,62,92,64,83,79,57,41,62,93,87,99,89,85,123,132,121,113,45,85,134,89,62,42,74,149,135,90,56,55,25,46,113,128,98,43,76,52,79,219,230,226,240,205,229,252,245,242,241,248,236,227,233,215,219,223,239,214,240,229,226,225,236,224,233,241,233,245,233,234,220,218,229,217,211,226,234,220,222,229,248,248,223,207,227,218,228,234,233,234,231,203,100,1,3,0,10,22,16,38,18,7,12,23,17,4,1,0,16,10,14,7,2,13,24,3,11,201,209,179,211,199,183,197,177,209,233,200,204,182,195,207,217,212,198,179,201,201,198,199,216,217,202,204,204,212,216,200,208,211,214,184,189,203,214,227,191,205,206,188,207,174,200,195,219,202,227,215,233,195,197,212,198,215,227,207,221,207,210,178,185,219,201,209,219,212,185,208,203,187,214,220,196,208,230,191,175,212,204,216,200,211,207,208,189,212,209,203,194,209,203,224,194,206,209,204,195,218,176,214,213,200,205,208,232,209,220,199,217,222,229,206,215,196,212,201,196,209,206,223,210,205,232,223,226,230,220,247,208,212,217,204,222,211,224,211,215,212,221,199,221,242,193,196,215,203,212,177,200,213,207,206,222,205,196,215,181,216,212,213,226,224,229,205,217,227,201,180,202,189,216,211,213,222,225,228,214,218,209,232,196,233,238,216,220,213,204,214,205,211,231,224,204,218,205,220,193,233,177,212,236,199,191,230,214,192,221,212,207,236,233,211,237,221,222,224,237,217,222,213,228,199,197,225,221,218,180,227,217,226,224,245,231,212,223,238,249,216,213,223,207,198,230,225,220,218,230,218,222,214,203,193,218,225,216,215,234,193,226,244,215,219,231,238,222,242,223,226,237,226,231,244,230,240,228,232,225,247,250,232,210,245,234,247,234,238,248,224,224,238,233,239,239,235,230,234,241,222,250,212,245,239,245,239,244,240,241,234,196,176,170,176,204,206,211,244,249,204,108,175,232,207,247,221,116,107,212,228,238,193,187,236,253,225,186,228,193,222,237,198,249,208,187,145,112,57,57,200,228,247,207,147,165,248,238,231,203,219,229,210,214,222,233,220,218,173,131,172,174,190,179,151,156,195,193,182,207,209,194,178,229,219,177,190,181,197,163,183,83,89,234,220,242,163,49,71,126,99,53,41,39,11,102,196,245,137,54,191,201,243,175,111,176,184,84,133,187,209,192,100,93,122,127,213,179,215,230,167,128,168,228,241,239,241,239,234,219,209,240,169,221,222,239,229,242,254,238,188,74,46,108,135,132,84,46,24,51,114,148,144,152,119,120,152,161,152,119,34,31,111,122,105,131,51,9,15,19,6,18,49,35,106,133,121,137,67,41,25,22,49,106,101,102,89,126,88,108,78,37,63,122,101,36,44,59,87,102,80,74,36,18,89,129,115,98,75,64,34,48,44,77,68,48,94,49,88,250,249,231,235,247,202,153,139,154,171,220,199,151,127,149,166,160,152,119,84,36,44,43,23,38,36,26,18,22,36,38,11,11,35,14,46,46,29,15,16,4,21,26,14,19,35,23,16,14,9,37,84,80,46,6,30,38,27,60,87,45,73,49,36,23,44,19,53,79,107,65,70,54,46,29,17,47,47,60,60,76,82,73,58,91,91,81,108,101,105,147,96,95,91,122,80,45,57,48,111,123,115,72,64,113,95,102,140,90,52,52,40,77,200,235,223,239,233,237,234,213,239,246,249,241,219,237,230,240,233,208,227,219,249,225,245,234,217,224,218,206,230,232,227,208,239,206,229,210,226,224,228,227,199,220,222,244,232,212,234,203,222,226,238,236,204,118,5,18,26,22,16,11,6,11,36,7,19,21,1,6,31,1,8,21,1,6,7,1,27,26,198,195,214,187,223,206,192,201,201,202,198,207,210,207,210,214,209,195,217,231,225,221,195,194,189,185,209,205,224,213,205,217,190,185,216,199,223,217,205,200,200,201,220,198,222,204,203,216,201,206,211,208,205,203,212,212,194,190,192,207,184,197,201,209,203,184,206,199,229,214,224,207,198,204,190,214,222,227,194,217,194,199,194,203,236,215,204,214,215,232,200,212,207,215,214,207,207,210,194,209,203,211,215,220,206,208,211,188,202,201,221,210,195,211,214,215,196,214,223,209,212,187,219,185,204,218,225,217,209,202,229,214,205,199,221,199,229,192,215,208,201,222,210,187,233,220,195,211,214,198,188,209,196,190,198,196,200,215,218,206,186,222,208,221,203,222,198,226,203,208,204,228,241,221,222,213,211,221,224,214,184,206,194,189,212,222,229,196,209,205,227,242,224,210,225,205,190,227,208,175,205,214,209,213,222,199,192,206,200,245,215,226,218,222,230,226,222,226,195,200,242,230,220,226,229,207,225,236,204,211,212,217,239,196,216,218,208,239,238,200,221,242,211,225,210,200,207,199,224,202,196,197,215,220,195,210,239,211,215,228,235,224,219,208,245,224,216,243,215,225,221,225,224,222,225,244,225,236,230,241,245,225,229,231,209,225,221,235,233,232,228,209,232,232,247,226,246,240,238,237,230,231,239,248,243,247,230,246,224,200,196,196,187,197,193,218,237,251,230,247,177,85,159,237,217,233,183,89,115,235,222,240,208,205,245,246,206,205,243,186,227,248,253,254,182,142,122,102,62,73,216,237,251,122,34,59,135,243,229,213,235,224,206,232,227,219,226,184,153,148,230,175,176,225,162,167,210,180,192,211,195,150,110,219,186,190,168,211,226,166,196,94,81,240,243,254,174,48,79,106,111,51,57,39,17,47,164,229,153,117,201,205,243,160,146,236,175,171,179,206,220,138,98,177,191,129,147,177,230,217,137,99,167,210,238,245,229,216,218,217,240,213,187,222,227,221,247,253,251,254,116,55,35,47,127,153,144,77,41,36,47,109,137,148,138,119,122,143,120,76,5,37,118,92,136,86,33,38,18,20,24,27,43,78,114,101,100,120,102,89,32,35,75,100,73,38,1,53,90,95,64,49,35,106,85,73,30,15,55,120,129,112,62,59,122,140,129,95,105,96,104,96,153,156,136,145,138,66,150,224,241,253,251,228,187,138,151,203,212,245,205,155,135,132,160,190,170,116,96,77,78,93,63,84,49,59,47,66,66,63,45,54,76,70,47,23,40,43,26,4,29,49,17,33,27,37,47,17,18,29,56,66,31,30,33,54,75,43,28,74,58,53,49,34,35,49,78,152,103,98,95,69,46,32,12,34,99,55,61,55,42,37,77,77,102,76,98,81,108,150,101,83,98,79,101,74,49,24,86,126,180,159,148,92,78,132,121,104,60,34,53,91,205,215,220,236,233,233,242,226,245,221,235,242,216,237,231,219,211,239,244,239,229,244,218,236,237,203,226,219,228,243,221,235,218,226,225,243,234,228,236,238,215,211,248,205,212,227,227,202,212,228,220,223,228,122,26,12,0,17,6,9,16,17,7,30,12,14,12,0,0,18,21,0,7,12,15,30,17,3,192,189,210,199,209,176,211,210,215,194,213,230,203,212,216,180,196,194,205,185,187,229,200,200,220,197,223,204,198,192,181,215,218,206,198,206,197,191,205,208,220,210,196,193,204,203,164,205,209,207,192,214,217,213,236,209,213,194,211,187,219,184,214,224,210,209,187,214,213,183,189,217,190,185,214,189,211,190,194,191,208,196,183,195,194,185,195,189,211,214,231,198,201,206,185,211,193,227,227,191,203,207,190,207,228,218,201,231,223,181,224,185,225,217,197,201,227,194,217,213,194,210,214,234,217,188,226,209,212,222,190,218,208,221,180,197,202,224,205,200,213,203,214,206,214,216,195,198,207,212,212,197,226,207,207,208,226,211,217,189,230,205,204,194,194,190,188,219,203,221,211,209,239,219,211,199,203,218,197,218,206,224,215,211,188,199,233,221,216,182,189,213,212,211,209,225,240,220,199,236,214,224,180,231,198,193,204,216,214,229,203,215,204,232,233,213,205,185,201,205,214,206,207,221,242,205,216,190,222,239,244,220,199,227,210,215,217,225,218,225,237,220,215,213,209,240,227,217,231,215,209,232,214,214,177,217,228,219,219,202,238,209,223,235,225,227,244,231,212,208,217,242,230,234,218,213,221,231,228,211,224,223,223,217,227,249,205,221,237,207,252,246,223,238,235,225,244,235,211,237,216,249,233,255,247,191,185,165,155,185,183,207,225,243,238,240,228,241,224,250,159,73,175,227,176,167,146,87,151,239,244,244,248,218,248,251,204,225,246,203,213,204,196,189,142,155,154,128,85,115,205,237,197,44,5,4,132,224,225,210,223,209,218,228,238,230,192,167,155,177,182,203,190,197,198,187,216,156,167,193,186,158,140,228,183,172,204,238,238,194,188,88,125,244,239,235,187,67,70,124,132,41,47,84,22,65,206,237,176,157,223,215,240,189,160,246,229,173,141,147,209,147,92,228,235,147,126,181,253,197,83,110,197,240,219,253,226,248,238,228,243,195,164,224,243,250,244,249,239,128,41,1,9,38,79,113,166,139,69,65,35,33,80,140,100,53,54,136,138,86,27,70,120,110,140,100,35,24,15,23,57,33,87,113,99,49,75,105,128,97,43,23,64,103,64,59,0,30,64,83,69,84,110,138,156,120,134,123,152,143,177,138,133,165,132,126,102,126,131,113,125,117,127,140,116,88,112,78,182,249,245,252,251,221,179,150,163,200,253,226,177,104,110,131,171,224,152,128,92,96,65,31,58,55,59,41,52,58,68,57,52,64,67,58,67,46,34,67,47,42,72,69,34,64,59,35,38,29,47,71,61,69,37,11,40,82,67,65,80,50,58,52,6,53,67,54,72,104,92,77,40,20,24,41,35,79,105,84,43,20,17,15,34,20,59,67,64,92,104,112,93,84,51,109,146,120,69,58,21,69,70,84,85,51,95,139,129,67,74,49,47,102,208,227,218,248,238,238,235,247,237,245,219,219,248,201,238,231,238,218,233,221,221,229,222,235,247,232,243,226,244,220,244,214,223,241,202,217,248,231,208,211,223,227,217,211,227,214,213,218,205,215,225,232,223,131,4,9,26,0,1,11,19,16,0,10,16,11,9,7,14,30,8,30,0,2,4,36,0,15,209,185,200,185,219,193,222,198,191,195,195,182,205,197,188,195,202,221,222,201,230,213,183,191,224,200,207,201,197,206,179,199,208,216,204,221,199,185,197,198,218,180,214,202,215,230,234,170,193,200,222,213,204,206,208,199,215,197,223,197,228,193,213,182,195,198,216,211,191,229,189,212,211,185,218,221,205,200,193,193,226,182,211,190,173,224,192,202,189,214,219,198,198,188,206,183,191,229,208,189,221,204,200,169,211,199,194,227,210,232,183,205,189,201,199,200,201,214,220,224,217,193,223,211,185,235,209,207,197,176,205,207,212,195,218,214,205,209,206,206,218,207,172,232,212,204,213,213,210,215,197,205,200,186,170,186,229,198,200,215,207,207,205,231,190,198,206,197,215,194,218,215,186,203,211,197,193,199,208,213,219,230,213,196,229,203,194,210,191,201,212,208,201,216,205,218,210,235,180,226,208,200,211,207,225,224,215,205,204,194,196,220,219,183,210,220,217,197,225,201,214,213,233,214,233,211,192,201,217,234,221,204,213,204,208,210,217,221,210,220,217,220,202,198,209,198,224,196,212,204,204,212,202,194,207,217,228,210,203,232,232,209,219,211,224,207,230,230,224,228,221,230,233,214,213,228,231,213,226,228,233,223,226,211,230,198,239,235,245,227,211,243,228,228,233,245,228,238,231,227,234,219,197,207,183,178,182,199,194,216,221,238,248,242,230,238,208,250,243,213,183,84,159,225,177,160,98,90,131,207,200,239,182,172,223,208,193,207,226,191,132,97,141,121,139,154,161,187,147,148,170,160,139,65,12,14,106,232,254,213,239,228,244,232,230,205,224,197,181,178,182,202,214,204,147,147,162,108,188,197,208,190,188,251,210,228,243,248,240,202,183,59,94,235,247,253,176,59,105,122,108,67,41,43,43,100,200,252,201,156,226,231,254,192,202,247,231,179,85,170,255,120,49,198,255,188,128,159,248,198,53,137,214,241,243,241,210,229,239,246,229,171,190,247,240,246,252,242,160,44,5,28,13,35,32,70,116,109,151,104,87,22,13,76,107,46,9,41,92,80,18,80,131,127,160,75,20,41,24,49,43,55,103,112,120,73,51,83,38,115,88,92,115,97,73,57,91,128,133,145,154,157,172,180,150,141,148,145,153,123,108,141,115,131,112,96,137,117,114,102,104,106,101,95,87,93,63,80,213,241,244,245,250,213,144,188,210,237,244,201,164,113,133,120,184,233,147,81,81,75,65,77,54,79,46,77,61,78,73,85,54,78,69,59,68,73,61,61,48,51,48,65,69,60,53,53,77,59,67,76,80,75,62,41,31,45,85,68,109,71,53,24,7,49,69,79,56,98,55,31,7,27,15,31,28,65,70,72,63,25,37,1,12,45,40,10,25,34,43,96,109,71,74,48,106,140,115,71,61,37,50,60,49,110,107,147,117,67,45,68,78,119,234,234,244,223,241,238,238,232,211,247,240,220,187,223,220,241,234,224,228,216,234,236,233,244,208,245,223,233,217,212,216,240,223,238,222,229,228,240,225,230,230,204,207,225,216,217,225,229,247,221,227,229,244,112,11,14,41,2,13,7,17,6,6,10,24,22,14,7,6,0,7,8,9,5,14,0,8,1,209,192,199,194,199,213,224,207,179,183,211,216,191,212,226,185,222,198,207,190,197,205,177,194,192,209,190,210,202,212,204,194,220,223,205,209,225,197,217,204,199,186,177,197,203,196,205,203,220,229,206,211,201,186,184,215,207,201,205,195,195,193,193,212,194,214,189,212,205,183,222,194,204,188,189,198,225,185,225,212,213,209,211,229,211,202,190,203,205,214,198,180,204,234,191,186,200,211,219,197,219,204,229,208,201,210,206,205,186,205,209,215,208,190,194,227,198,217,198,201,211,198,225,207,231,207,203,207,190,203,209,215,185,196,193,227,209,184,222,225,196,209,201,228,207,212,203,227,211,204,184,196,196,215,231,177,189,203,199,204,210,187,200,191,200,184,195,194,214,203,210,188,202,210,206,188,212,199,210,211,220,180,201,200,224,214,227,203,206,201,217,211,215,220,207,196,210,223,228,224,223,223,210,194,208,212,211,239,203,204,233,231,217,204,222,215,215,220,217,205,238,211,223,234,213,222,207,208,227,202,235,213,222,213,210,200,204,231,216,204,196,218,213,215,182,233,194,225,234,195,217,218,219,205,197,223,205,201,224,215,212,208,202,223,236,225,238,221,241,234,220,222,228,219,214,223,235,205,237,227,231,237,228,251,233,200,240,218,248,232,242,209,218,246,207,227,234,227,232,214,208,164,170,186,204,234,213,229,255,236,248,215,223,229,238,240,209,220,222,230,174,93,148,235,202,170,127,95,111,146,129,149,129,125,164,143,151,196,200,128,89,97,131,124,154,172,158,172,169,164,147,175,149,93,34,46,159,241,255,220,255,210,234,221,238,222,225,211,178,218,176,177,149,157,156,134,194,153,206,253,225,247,199,235,194,197,167,154,127,90,107,19,53,194,255,234,146,69,70,128,89,72,42,74,10,77,238,241,240,161,181,253,226,185,191,248,216,131,142,232,238,129,91,236,235,233,136,97,153,172,91,140,233,225,238,249,233,230,232,248,218,136,182,228,243,239,254,187,61,0,9,6,19,68,57,20,40,105,118,158,94,41,45,55,104,102,34,27,41,54,45,112,134,109,127,49,41,48,19,27,58,57,108,97,91,52,38,49,77,134,124,157,120,120,165,159,162,152,139,148,132,102,164,140,122,119,111,103,104,53,83,33,56,81,70,66,53,46,62,71,33,19,62,55,46,47,20,50,240,248,247,248,251,223,168,192,243,255,247,165,138,140,145,163,219,211,109,68,15,23,19,52,20,24,37,21,45,54,64,58,48,55,56,58,82,74,95,42,78,67,62,50,67,31,63,76,50,53,83,72,66,82,52,95,67,65,82,80,67,58,30,26,31,46,93,70,108,98,78,53,51,21,27,35,46,92,83,62,41,28,28,35,16,15,5,17,38,4,61,88,105,88,49,34,68,127,136,122,110,108,76,72,116,140,104,98,56,32,48,62,92,207,230,241,240,225,207,226,229,213,223,236,236,248,209,239,228,249,229,247,247,235,222,211,226,225,226,224,230,239,208,238,240,221,231,239,229,219,204,209,225,224,226,214,233,226,215,211,220,235,213,199,208,231,228,123,18,18,1,11,28,3,5,32,17,8,24,9,15,3,2,0,0,18,5,11,10,27,8,16,177,208,201,164,186,172,195,202,196,211,203,214,171,173,186,176,200,190,202,208,196,218,194,190,205,204,187,204,213,157,201,182,192,207,196,202,179,192,207,211,205,199,197,208,221,198,180,223,189,176,196,185,218,204,197,170,202,202,192,198,197,180,206,205,196,210,214,183,208,205,199,195,188,178,210,210,192,202,216,198,204,204,190,207,196,192,226,204,193,188,203,160,216,215,216,185,186,196,199,229,208,199,194,189,219,215,201,194,196,219,207,187,179,215,181,193,199,204,214,204,198,196,233,232,200,212,237,212,221,216,211,203,199,216,236,197,225,201,205,220,226,205,215,211,215,209,208,192,204,202,198,217,220,207,193,227,207,199,232,217,187,200,214,191,202,199,183,206,221,164,199,221,213,219,194,213,231,213,213,197,199,211,203,193,192,210,238,215,205,197,200,205,226,231,193,224,201,207,214,223,210,209,242,215,184,229,208,223,203,215,207,239,231,221,214,221,193,198,210,229,198,213,198,203,225,210,222,208,197,218,228,220,214,235,208,198,198,224,248,213,214,219,210,198,239,224,205,215,197,202,217,219,189,214,215,237,193,204,210,234,225,215,197,219,218,207,229,209,226,228,222,228,213,233,219,226,239,229,235,210,202,229,249,221,236,237,235,231,208,215,232,237,248,253,239,214,210,207,194,171,183,196,217,205,231,231,243,243,226,249,227,237,216,215,195,231,230,221,234,223,180,69,160,233,220,199,63,91,115,176,155,170,134,120,133,156,119,137,164,86,118,103,99,92,131,127,134,162,124,154,161,121,145,88,95,94,131,197,223,214,246,246,229,218,212,191,232,153,137,212,158,218,156,219,226,180,222,234,238,252,225,238,152,147,98,84,41,17,3,18,39,36,24,89,128,124,75,52,36,105,97,79,47,44,25,73,157,223,136,67,127,206,178,173,142,134,110,68,125,224,197,70,94,195,225,192,156,43,83,143,123,231,241,219,241,220,225,226,241,231,243,158,229,246,241,252,219,124,1,0,19,9,40,46,33,19,23,49,95,101,137,104,40,18,94,99,89,38,55,42,11,123,125,108,142,53,23,9,24,26,53,109,138,125,108,112,116,117,153,134,139,128,151,118,159,143,114,147,118,105,121,121,109,114,76,79,53,64,53,28,35,26,21,19,56,49,42,31,43,12,16,53,29,49,48,55,8,92,212,250,255,253,252,165,144,211,238,202,192,137,114,120,122,156,223,168,74,6,11,22,44,11,51,38,17,42,47,37,34,16,4,38,19,22,14,40,45,15,43,59,42,56,39,52,44,50,46,66,36,57,55,54,71,53,82,83,61,80,69,74,67,122,44,83,121,104,112,77,38,44,12,37,16,37,64,108,96,55,8,43,33,29,41,19,50,18,40,17,33,74,102,89,73,65,41,41,112,122,180,167,166,152,147,116,96,82,37,36,72,67,162,234,243,247,234,228,235,239,237,233,214,205,229,213,242,233,230,225,215,244,221,223,221,239,237,231,232,238,217,226,233,231,212,221,248,236,245,230,238,222,239,212,224,234,212,235,239,215,192,244,224,222,238,202,216,132,0,16,6,0,7,15,16,15,5,41,29,19,5,0,8,34,1,20,38,13,22,0,6,12,197,192,189,183,208,166,202,189,180,187,192,201,184,190,194,196,202,204,194,212,192,204,198,190,193,190,188,182,207,188,200,191,209,199,196,209,212,202,185,191,217,195,170,207,231,204,205,191,155,189,198,226,187,210,212,202,191,199,199,237,201,195,168,204,180,201,211,209,200,189,184,217,188,233,204,214,203,210,201,200,223,205,209,183,188,230,174,220,206,214,206,202,209,197,195,171,217,208,208,189,220,194,209,216,217,212,204,216,208,224,219,210,192,195,206,186,186,193,188,220,180,194,212,189,199,217,213,191,194,198,221,195,188,203,216,205,214,187,223,212,208,204,216,214,199,193,225,222,228,227,206,213,206,206,205,215,202,180,205,215,210,220,193,208,203,190,218,235,211,205,217,194,218,202,217,204,178,214,187,226,181,205,213,229,196,226,197,226,212,203,220,192,222,238,217,184,230,216,207,221,216,203,192,224,226,201,226,207,213,198,211,198,194,218,219,206,182,209,224,224,204,200,220,226,212,198,192,200,212,224,212,209,210,195,226,208,209,211,226,196,231,216,204,233,219,227,208,228,206,221,206,214,202,200,196,205,200,219,240,225,177,216,186,231,237,222,208,181,224,215,228,242,209,222,235,210,213,233,227,231,226,206,219,217,205,212,222,234,230,217,231,225,234,217,195,188,156,199,199,167,223,229,246,228,222,224,226,220,232,233,213,225,230,206,217,220,214,227,222,235,200,96,109,224,190,172,36,65,101,162,181,194,162,162,163,146,125,161,147,104,94,96,64,70,54,85,75,93,90,92,101,93,78,81,84,71,97,188,160,200,220,213,251,240,226,187,237,163,163,196,144,212,200,248,247,203,191,150,122,116,93,93,47,25,26,52,108,131,109,147,119,86,143,162,169,183,103,57,52,91,97,62,70,50,22,21,110,155,146,83,82,148,151,126,123,30,76,62,130,159,115,93,126,137,179,183,110,48,39,69,126,228,246,216,240,223,243,226,239,247,206,153,237,252,234,240,140,32,5,25,26,32,54,34,24,13,35,65,62,91,73,114,34,40,36,91,152,120,103,72,83,170,118,117,124,30,59,27,85,127,147,159,124,118,98,112,128,129,119,116,135,153,129,155,143,124,96,88,83,40,64,36,22,8,55,35,37,69,14,46,24,22,58,43,62,53,37,40,15,38,20,63,44,60,92,89,7,182,245,237,241,225,154,104,156,233,249,229,134,130,104,109,122,205,209,125,39,21,29,15,54,34,13,17,39,47,51,59,49,26,22,51,41,35,32,51,67,25,29,24,34,25,30,24,26,44,25,39,26,43,60,63,36,84,62,79,63,56,62,80,94,122,104,107,104,102,90,72,56,48,52,32,41,36,77,84,84,76,27,38,34,45,44,46,31,32,18,24,64,97,93,127,89,31,32,120,141,101,95,81,137,142,62,75,55,64,37,73,56,156,234,237,247,247,220,234,238,209,221,228,223,246,235,234,231,221,236,214,219,245,220,234,216,234,244,246,234,240,219,217,247,241,229,222,215,200,233,246,221,225,217,208,229,196,225,230,212,231,212,209,237,231,223,217,228,115,15,8,26,41,3,25,37,28,2,22,23,6,9,0,3,5,27,21,21,12,29,16,8,40,193,177,172,192,178,210,185,170,191,206,192,182,217,210,198,216,204,198,200,194,189,198,221,203,197,192,195,178,202,192,196,187,204,185,221,182,200,189,190,204,191,202,209,187,213,190,202,205,191,193,202,193,194,197,181,209,198,185,212,188,182,198,199,193,233,197,198,212,202,192,185,198,194,210,192,222,199,199,184,205,204,210,209,212,191,195,187,207,188,190,221,202,206,202,206,212,188,198,210,185,212,191,208,194,203,197,182,187,223,207,197,187,222,217,190,201,218,189,196,201,237,208,186,207,204,197,190,177,197,232,200,202,188,225,195,206,184,221,205,196,209,190,218,172,169,215,173,216,197,219,206,193,191,219,188,198,202,213,211,193,214,169,215,193,214,199,210,192,189,203,228,204,200,197,213,191,216,235,207,208,215,211,201,204,190,232,197,214,203,206,209,206,191,190,228,186,210,197,196,202,216,176,214,209,198,223,187,201,199,206,231,217,206,189,216,190,212,203,211,210,224,186,223,225,205,200,207,201,211,200,225,204,207,241,202,232,202,218,210,196,221,219,188,224,196,243,224,204,204,230,187,213,188,210,225,206,193,224,205,222,211,228,229,223,212,214,218,198,222,221,215,224,229,233,220,234,207,243,227,215,211,210,218,238,237,226,213,207,224,209,192,184,182,184,172,193,214,201,220,227,242,236,224,229,229,238,237,209,222,232,224,206,216,237,222,224,214,215,226,233,200,58,123,184,233,154,54,102,109,190,163,174,166,156,201,180,144,168,172,91,120,96,84,86,71,55,34,63,69,48,59,55,89,76,75,40,72,151,176,137,185,196,226,219,218,219,242,172,189,235,143,151,168,209,175,57,40,30,28,20,4,38,44,55,67,76,118,135,190,170,143,156,151,179,223,211,111,58,62,107,98,40,28,34,28,80,204,245,181,123,188,206,174,197,177,149,170,162,218,185,125,192,211,204,202,192,129,74,67,53,99,231,248,213,247,242,228,235,230,251,198,147,243,248,241,171,22,12,37,25,16,54,35,36,33,25,53,34,94,97,97,109,98,32,41,33,86,114,156,158,157,160,107,113,135,121,171,178,171,194,137,130,112,134,140,146,137,120,151,108,90,91,67,59,38,30,26,42,17,51,35,47,35,30,17,45,62,92,66,65,74,29,41,59,89,27,28,49,24,41,41,55,60,33,69,70,99,252,246,240,242,194,149,156,208,250,247,201,141,136,130,116,149,230,206,98,13,9,30,30,37,44,35,30,17,31,43,14,42,34,25,70,15,30,47,75,22,28,33,31,52,17,3,19,21,10,12,20,24,35,29,26,31,14,53,34,63,61,80,69,69,84,64,83,97,92,93,93,103,105,81,78,90,142,110,107,87,62,42,15,20,42,37,35,25,32,29,27,69,76,99,107,70,24,82,135,77,63,18,44,76,99,77,42,47,58,11,53,148,235,227,251,231,233,203,239,239,237,238,223,242,228,203,221,216,238,232,230,245,255,218,223,237,226,229,219,222,223,229,212,215,220,218,209,240,215,229,232,197,238,211,237,239,240,178,253,240,229,211,223,212,213,213,209,208,131,4,3,17,7,12,23,5,7,28,19,5,7,2,0,14,15,32,37,21,0,23,0,21,28,204,210,166,195,207,192,188,185,200,193,202,190,194,193,211,221,227,199,188,180,200,192,201,212,201,193,205,220,173,211,220,205,199,208,215,192,228,189,228,201,201,226,203,177,211,180,198,197,191,190,212,194,184,201,211,187,216,183,205,218,191,205,220,213,222,204,200,214,181,192,184,187,205,213,192,200,208,197,207,194,203,166,196,213,195,224,213,196,224,191,191,187,197,177,217,201,208,201,215,201,217,210,187,186,208,199,216,175,217,227,222,206,185,186,202,189,187,224,207,215,199,226,216,225,214,188,217,202,174,201,224,194,207,216,199,236,206,201,231,197,207,179,195,214,182,198,205,196,203,201,221,196,218,209,213,196,211,207,203,197,196,190,204,173,210,208,208,180,202,227,205,228,221,206,203,203,201,229,213,205,202,197,217,211,203,203,210,216,210,218,226,221,206,199,208,202,232,207,190,209,209,202,192,196,193,202,218,216,221,195,220,200,201,217,193,188,208,211,199,195,208,221,204,201,191,206,223,224,207,198,201,201,207,199,221,199,242,200,197,214,222,194,220,214,220,200,206,212,215,205,234,203,218,207,203,209,199,214,220,195,206,238,224,193,184,226,218,209,218,232,217,206,232,221,216,209,221,211,234,229,218,238,219,228,218,247,224,176,189,204,153,180,203,186,210,206,230,235,234,228,225,210,224,218,224,225,213,208,226,236,206,231,220,224,195,222,221,244,210,220,209,40,109,228,225,218,122,156,149,160,160,150,132,192,199,153,145,122,128,116,122,96,73,63,59,66,52,63,69,66,63,62,75,97,85,51,85,161,141,83,82,121,182,224,215,229,243,166,204,241,125,69,14,31,55,0,24,55,118,124,146,161,212,203,203,170,159,118,130,133,113,110,87,118,150,169,117,58,49,131,147,61,47,61,41,67,138,194,111,50,134,148,133,168,128,160,170,116,126,123,89,133,102,117,150,117,153,115,66,66,146,234,239,251,247,253,214,243,245,254,189,178,236,246,209,39,34,1,9,16,59,40,5,25,20,45,59,45,81,122,99,106,93,57,65,34,27,45,53,73,110,154,87,143,153,119,161,145,142,139,116,158,118,120,97,73,49,46,64,37,18,37,57,48,25,17,17,12,24,53,34,57,83,63,54,42,74,113,46,78,79,83,58,72,66,49,25,33,41,64,66,51,63,58,64,48,154,237,247,254,241,188,163,149,241,246,254,133,123,96,131,129,169,235,151,86,91,67,82,64,48,56,27,44,39,14,59,68,55,39,68,57,30,42,42,42,32,50,69,62,45,13,18,27,15,17,10,46,36,55,62,22,28,9,26,26,37,23,42,18,33,54,54,57,97,97,91,58,58,103,86,93,100,111,116,93,84,68,43,59,57,26,10,41,51,33,31,30,63,81,78,105,59,55,41,71,60,41,50,96,91,64,66,79,60,21,112,234,251,245,250,247,238,230,227,230,224,226,216,235,235,246,229,236,225,237,221,239,245,233,218,234,224,234,228,216,230,224,229,201,237,209,209,251,201,209,229,229,216,233,233,220,233,219,234,232,234,238,222,240,226,222,217,221,222,129,6,1,8,3,12,7,0,4,27,5,19,11,9,0,25,34,2,19,15,22,14,5,19,23,199,182,207,200,180,177,155,197,185,187,186,208,216,205,203,188,199,222,189,198,189,216,206,179,192,197,208,215,199,201,177,204,193,189,205,206,207,189,205,220,179,191,176,226,195,216,183,198,215,199,198,208,231,199,184,205,196,198,216,200,178,217,199,227,205,208,198,200,186,197,189,201,196,219,203,195,205,193,183,207,208,209,204,204,215,218,217,217,205,205,204,198,193,214,178,178,219,198,195,176,211,195,189,194,211,176,199,218,198,214,213,204,198,197,204,201,215,172,199,207,198,201,182,197,204,204,201,206,195,207,193,202,205,194,209,167,176,194,194,182,188,208,219,187,205,229,187,217,196,215,222,207,217,208,226,206,210,207,217,198,236,203,203,213,212,177,218,188,215,174,202,186,192,193,211,215,206,213,192,231,201,214,214,199,198,190,214,199,216,200,231,209,229,220,202,202,189,215,189,211,209,227,204,194,193,175,217,191,186,222,206,209,208,208,197,187,208,187,216,230,225,227,212,197,215,209,212,204,207,217,207,187,202,211,230,199,205,184,241,186,212,178,198,223,200,227,217,219,220,221,228,205,182,246,202,203,196,230,214,209,197,208,235,214,210,208,208,213,220,223,200,206,222,215,224,214,201,207,218,232,237,246,235,245,222,222,187,203,152,196,221,213,239,224,226,240,233,228,223,210,217,199,212,224,238,231,233,226,229,227,226,228,220,223,211,221,219,227,226,231,180,50,95,200,238,224,151,148,152,170,166,183,165,161,169,113,102,126,151,95,138,94,90,84,53,61,63,62,49,73,67,69,62,45,72,51,56,107,104,123,143,199,216,212,215,207,238,146,207,243,160,87,118,108,67,96,202,219,216,253,226,255,228,233,240,197,233,226,201,98,99,148,117,184,185,200,165,26,48,72,117,53,32,50,34,58,104,153,132,84,70,67,125,171,146,112,99,93,104,61,53,130,111,115,133,121,167,165,84,30,118,238,223,231,218,239,234,218,226,229,190,234,253,174,78,5,10,18,3,45,41,40,5,13,44,57,50,34,34,62,130,98,88,95,100,63,56,67,62,22,50,94,90,148,111,126,135,116,111,103,66,41,48,53,46,46,17,50,21,25,18,55,48,52,32,39,28,31,52,40,40,46,102,71,54,54,88,87,58,40,54,112,151,151,90,79,107,96,123,130,129,97,115,109,144,122,84,190,248,252,241,212,173,116,188,233,209,112,123,130,135,145,226,235,137,57,126,131,142,50,101,74,64,72,62,60,54,46,64,78,67,51,23,38,79,60,63,46,48,24,18,33,12,25,37,24,28,26,44,50,62,37,11,24,57,47,52,57,56,39,17,2,15,37,48,44,43,75,71,96,82,90,74,50,71,70,107,103,94,104,68,66,45,55,36,14,21,20,47,92,121,102,71,49,39,33,46,85,97,92,79,64,49,52,70,194,228,240,255,232,238,243,224,223,213,218,234,221,225,214,225,240,245,234,214,238,240,227,222,226,235,220,230,242,243,220,224,216,225,224,209,233,220,231,235,230,232,220,207,254,233,230,223,211,228,240,226,238,217,216,240,236,216,207,228,101,19,3,4,25,0,9,20,11,9,1,21,15,5,1,14,2,10,14,7,17,3,28,14,17,200,226,205,198,194,161,187,180,197,192,179,198,194,187,181,173,186,193,194,200,194,179,186,200,197,204,189,208,181,194,208,193,166,197,209,207,200,188,198,202,189,211,188,216,197,200,214,214,206,218,228,201,195,208,226,174,206,219,180,182,192,182,184,201,193,217,204,199,193,179,214,187,186,204,182,164,198,208,195,176,195,204,179,179,188,179,205,199,202,187,215,194,208,204,219,227,207,207,202,200,194,195,203,221,238,197,163,203,192,177,201,205,223,207,201,188,184,198,218,198,191,192,187,201,235,185,208,203,193,192,201,201,209,210,204,199,215,197,206,186,199,223,225,224,204,213,187,191,188,197,234,199,196,202,205,210,199,193,231,201,211,200,205,225,204,200,192,198,209,192,219,220,207,195,217,204,207,203,212,194,200,188,218,197,182,214,187,219,220,204,200,194,200,220,179,214,191,190,211,183,195,191,214,214,188,208,214,213,207,206,208,227,198,192,197,215,188,241,222,200,216,211,184,204,219,205,230,202,203,200,188,179,208,215,222,215,241,178,215,169,195,212,212,188,217,184,193,183,222,232,227,218,217,249,211,199,231,218,213,201,211,211,222,221,227,199,215,216,221,226,219,208,208,202,220,233,233,222,224,234,248,233,182,171,153,178,187,209,192,224,222,244,223,237,219,231,218,224,248,222,217,209,205,206,225,231,241,223,209,232,210,219,223,213,219,236,222,226,203,222,171,53,89,172,248,213,156,164,143,190,176,165,157,147,162,130,136,169,134,116,82,9,24,31,56,26,11,42,48,78,27,50,82,87,62,28,26,54,106,191,221,247,244,234,210,216,226,144,206,230,196,193,229,234,213,186,243,239,239,255,243,238,240,226,215,213,180,216,189,102,115,170,182,240,242,252,198,83,63,90,113,41,29,47,42,91,213,244,228,147,112,199,252,254,200,160,139,148,221,101,88,230,227,239,217,203,237,190,154,75,102,227,225,235,230,214,229,237,221,225,196,228,228,106,42,5,21,5,29,58,18,23,36,65,48,50,54,21,78,113,119,110,102,120,123,124,98,75,63,26,83,128,87,152,121,70,53,44,6,57,40,23,11,48,67,63,49,75,46,40,55,43,69,59,29,30,37,55,49,60,36,46,68,119,154,197,168,147,135,146,176,145,196,164,131,150,121,141,141,120,104,61,99,45,59,39,66,20,133,210,255,219,155,88,172,249,188,142,147,134,122,165,226,196,123,35,11,35,29,31,60,16,52,61,18,27,53,34,42,70,68,68,85,71,82,67,58,67,68,47,48,46,20,46,65,11,34,34,34,74,44,38,28,59,54,42,52,67,35,19,4,35,62,33,25,28,36,9,23,37,27,39,69,49,87,72,100,76,86,101,105,101,107,94,63,75,68,38,89,65,105,106,102,44,49,83,117,125,115,80,78,21,42,83,208,251,231,255,241,221,240,219,217,220,219,230,234,215,238,221,249,229,224,243,238,219,216,226,207,207,251,238,207,219,225,226,235,224,227,213,214,244,210,220,234,241,226,232,216,235,232,244,225,223,211,235,233,222,231,215,241,220,209,236,231,139,2,14,2,2,20,16,5,15,25,17,19,10,11,5,0,8,14,7,18,0,6,14,27,5,207,186,179,189,225,221,172,223,223,208,186,208,161,193,187,185,185,162,179,191,187,199,205,213,161,188,209,186,201,206,203,220,197,195,186,192,185,215,196,200,187,201,183,212,186,201,181,213,184,204,188,194,192,180,203,214,224,189,202,199,186,208,216,219,213,187,189,202,187,208,206,189,195,207,175,180,203,189,197,206,199,186,196,197,197,191,205,220,200,197,214,211,203,195,194,199,192,194,208,200,196,202,206,216,202,196,212,192,193,194,212,188,194,181,197,193,208,208,190,207,175,200,212,187,171,186,214,213,206,205,190,214,190,202,169,203,199,187,180,184,195,222,199,202,188,200,201,200,215,215,195,198,221,194,203,225,231,235,213,194,217,200,189,213,214,209,197,216,219,228,227,219,203,213,197,206,226,207,197,206,206,176,199,224,192,220,188,196,189,194,206,185,211,206,213,207,201,198,197,231,191,210,185,209,188,207,194,201,206,182,200,205,211,218,215,189,188,194,187,197,177,215,202,212,207,218,211,204,201,200,202,192,203,203,214,215,203,195,187,208,206,201,211,211,190,185,190,215,218,184,203,198,142,191,175,191,238,214,205,196,212,212,213,211,205,204,223,214,207,224,208,190,232,215,227,224,242,235,226,218,188,173,164,160,185,220,205,215,225,233,201,228,207,240,212,219,233,226,246,234,210,203,219,231,218,210,229,201,214,214,238,209,230,238,202,218,200,230,212,231,141,51,68,178,238,217,174,131,168,195,193,177,153,152,187,167,154,177,137,85,5,5,10,19,17,16,10,34,34,19,6,45,52,59,12,15,110,117,179,178,234,233,227,205,191,219,239,148,218,214,172,215,250,203,196,215,249,232,227,225,218,185,190,195,140,143,153,214,155,112,158,160,178,206,232,247,156,73,66,92,122,48,28,26,18,78,232,246,246,165,133,225,254,233,172,176,199,224,235,143,148,242,233,222,188,197,241,224,189,96,63,199,199,238,233,227,244,206,244,213,192,237,123,12,34,1,28,27,56,19,30,28,26,40,48,69,22,83,202,166,137,81,51,89,105,109,107,130,110,77,116,125,102,151,85,36,28,43,15,32,54,49,24,58,72,76,56,60,58,63,45,75,74,62,30,38,58,95,140,158,162,196,131,197,194,208,198,174,149,128,126,106,69,61,20,52,46,55,23,41,17,12,48,17,28,35,35,31,7,94,146,183,139,109,176,241,142,130,150,133,139,181,241,171,60,20,1,22,24,19,19,32,7,17,21,26,22,23,5,38,28,27,42,15,37,61,49,45,57,74,67,50,67,66,78,60,69,64,41,60,53,75,67,29,43,28,60,73,29,39,55,98,38,37,49,40,15,34,26,7,18,25,50,56,41,48,61,84,69,63,73,96,90,88,101,91,86,103,92,90,103,94,91,133,145,178,151,125,45,26,54,34,89,220,241,252,242,251,236,228,233,209,240,205,219,240,218,234,230,237,241,233,234,240,241,236,243,222,231,241,198,209,203,226,222,213,239,246,215,216,217,214,227,239,214,223,231,233,222,233,227,230,219,249,220,228,207,230,223,238,234,219,213,241,243,131,3,0,8,14,2,15,17,11,2,5,7,14,7,5,10,0,12,8,13,9,14,23,15,27,219,205,167,196,200,171,195,181,201,187,202,191,214,200,212,187,190,199,189,191,206,198,210,178,192,209,191,177,197,175,194,196,193,192,177,197,191,182,190,211,194,208,206,210,192,186,174,171,201,168,172,209,205,201,193,198,210,205,218,177,197,206,185,186,192,198,209,190,191,200,204,189,201,188,187,173,192,203,175,201,209,193,173,190,194,212,172,201,199,188,212,218,197,197,197,199,180,203,217,207,196,231,190,210,188,195,196,208,205,190,196,196,211,199,212,202,217,208,225,191,206,202,224,194,194,210,193,209,169,218,210,212,180,196,196,211,187,207,219,203,217,194,188,180,199,220,194,208,207,193,189,214,209,201,180,208,215,207,209,204,209,202,177,209,199,217,229,209,211,196,189,225,214,193,210,208,206,174,216,186,199,209,180,184,169,212,216,197,220,193,181,206,225,170,190,199,203,202,201,195,209,187,221,213,192,183,202,195,229,236,196,203,203,186,195,205,183,192,182,203,199,200,196,212,186,183,204,194,190,194,199,206,198,190,196,198,183,226,205,212,208,225,234,232,186,211,226,211,218,198,208,128,41,71,151,212,197,218,184,224,212,222,204,205,204,199,197,226,219,231,243,207,229,226,237,205,193,203,173,173,205,189,216,210,216,241,217,240,239,247,221,215,215,195,229,225,236,200,221,199,206,211,222,227,241,220,224,199,215,212,211,215,238,214,230,237,216,225,220,215,136,49,141,227,249,230,166,148,160,178,162,177,172,174,205,175,174,173,141,48,15,8,28,33,47,18,20,29,57,27,27,63,76,55,28,53,130,224,221,208,228,250,237,216,209,234,243,148,197,208,206,218,191,156,138,203,214,197,169,166,127,151,174,162,128,153,211,247,114,120,165,159,174,192,229,247,171,68,42,83,111,53,25,47,7,72,199,234,210,152,118,228,244,183,99,187,200,174,162,164,204,242,225,240,161,198,228,211,199,129,88,210,214,221,251,239,230,228,244,212,196,124,35,31,24,18,42,62,18,11,23,38,48,60,54,21,66,196,254,233,169,83,57,66,52,80,72,81,81,74,122,130,141,94,54,2,10,3,12,50,75,76,16,51,87,97,21,22,58,87,133,161,128,160,180,185,191,195,203,210,174,186,140,120,121,61,39,49,35,24,6,19,33,36,14,17,15,35,21,24,19,20,30,9,29,20,7,21,35,0,43,143,143,126,191,209,127,133,140,123,150,200,232,129,28,19,2,29,18,4,32,27,10,24,44,12,33,24,28,32,7,3,17,11,21,1,39,4,12,44,21,44,60,47,34,52,44,39,51,92,78,76,62,66,68,53,68,92,57,57,61,64,25,44,26,51,15,43,49,33,43,66,79,66,36,16,21,28,41,44,47,86,70,85,93,85,97,97,108,99,84,105,105,96,114,84,67,58,65,29,59,77,191,246,252,235,221,229,217,243,232,239,214,221,236,244,235,231,238,246,239,234,233,231,232,207,235,208,216,232,199,224,239,240,223,228,239,229,222,235,250,239,247,240,245,219,226,229,204,218,233,219,222,233,234,235,223,217,217,215,223,235,239,220,233,119,3,9,5,22,9,4,0,2,5,0,0,19,18,2,3,6,0,7,2,26,15,5,5,0,195,181,194,186,207,195,214,180,190,192,195,189,178,189,201,176,232,199,207,193,184,171,185,207,196,175,195,209,198,216,199,196,190,195,195,192,218,209,184,151,217,182,190,202,187,207,198,195,169,215,192,195,190,203,199,187,197,191,196,204,203,198,181,194,181,199,199,195,211,175,194,217,198,188,197,162,216,203,174,190,196,184,195,194,190,229,196,186,205,198,225,218,187,222,200,199,177,212,196,193,200,185,188,229,202,184,216,195,198,209,172,200,183,178,185,218,181,202,193,189,203,186,226,182,198,212,200,180,184,182,213,181,213,184,181,242,184,222,199,178,210,195,207,203,199,195,196,198,203,189,193,210,220,184,191,212,216,203,196,215,210,201,216,193,189,185,193,191,190,200,186,193,191,191,214,196,218,203,186,186,215,193,199,201,199,189,200,216,209,207,190,210,206,200,191,177,207,230,203,210,198,183,172,191,191,199,188,193,204,199,197,199,208,214,190,190,205,204,207,201,182,223,199,196,198,228,209,179,200,213,195,214,204,210,213,196,204,191,229,208,209,196,179,191,201,196,162,205,195,213,183,111,30,74,182,210,204,199,179,223,217,226,189,217,211,219,224,216,216,218,217,217,191,212,174,202,182,187,200,202,216,202,240,213,202,228,223,208,214,245,234,196,245,245,196,206,228,212,246,222,199,210,215,235,217,212,234,221,208,198,211,236,236,216,227,207,226,198,215,215,112,60,141,225,254,248,155,157,158,172,161,149,184,184,181,129,139,156,165,159,98,127,154,99,29,20,52,43,46,58,32,87,51,60,83,79,189,236,234,215,227,233,193,203,199,209,228,132,182,180,176,211,178,96,101,191,157,166,171,167,199,195,225,220,242,214,245,227,39,178,182,180,172,198,223,242,199,65,37,76,113,80,28,48,35,78,234,228,212,163,90,212,202,129,117,179,187,153,154,152,227,249,229,235,174,189,210,219,197,186,66,117,222,215,209,220,219,214,251,233,120,80,0,9,34,35,66,36,21,13,38,46,44,72,20,23,188,243,255,177,132,93,59,91,72,76,95,70,30,57,126,136,137,106,31,24,1,27,15,29,66,99,94,119,100,66,57,99,149,171,196,183,204,197,205,169,127,103,81,57,23,24,64,25,4,25,4,0,34,9,9,9,20,30,55,14,16,24,27,34,15,27,23,33,36,2,12,29,17,20,18,62,119,168,234,171,125,151,145,144,144,208,222,76,77,14,18,13,18,32,7,10,22,32,26,42,13,19,11,1,27,23,20,29,9,24,12,10,41,5,9,5,29,5,24,17,8,25,44,52,37,35,58,60,44,89,79,69,100,73,78,39,26,16,53,36,36,33,41,21,78,78,87,51,32,26,82,67,72,53,82,52,34,47,66,72,110,107,92,113,92,92,64,54,63,37,51,52,52,96,71,117,199,247,241,233,243,235,235,234,252,210,228,217,216,242,225,225,226,228,248,225,218,229,233,222,206,242,242,242,227,243,233,222,223,205,213,231,220,213,250,233,238,233,225,240,250,237,240,205,233,229,220,244,239,235,230,218,201,231,238,207,243,205,225,113,4,17,3,6,2,21,6,22,7,28,8,1,11,2,12,6,9,19,16,19,20,3,29,16,179,182,171,174,182,193,189,182,207,194,195,184,202,210,195,192,199,194,199,200,189,165,180,173,220,190,195,197,195,201,185,204,179,177,175,204,201,194,204,212,171,182,188,189,216,204,228,185,197,177,186,192,184,193,196,171,177,205,203,172,178,162,186,186,185,187,184,211,180,183,181,190,193,191,199,184,173,188,190,185,187,192,175,194,202,200,188,178,195,188,199,212,186,189,189,211,205,204,199,201,179,205,182,187,208,198,208,206,217,227,189,186,179,206,173,206,192,222,198,218,199,167,189,204,200,208,198,224,207,206,204,232,197,189,205,184,209,189,176,224,196,190,185,202,179,183,211,181,199,225,188,214,221,218,194,202,195,205,206,198,191,185,212,196,231,196,216,202,184,203,208,203,199,199,186,207,200,220,206,192,200,197,204,192,208,198,208,232,185,209,204,177,187,215,209,190,194,206,201,206,231,199,203,183,218,223,207,195,194,194,193,209,203,200,182,212,214,194,221,192,201,203,210,213,189,206,188,216,194,178,194,215,193,174,190,195,202,207,204,190,233,191,211,234,205,179,214,235,223,187,210,161,136,180,215,247,194,195,222,195,202,218,214,223,205,216,201,209,215,193,219,184,183,204,179,170,179,202,207,233,190,218,208,228,221,216,208,221,218,211,223,220,221,225,227,208,226,231,206,216,217,227,234,243,194,216,221,210,233,197,211,235,223,222,220,202,229,232,199,179,123,48,146,225,231,221,200,185,152,189,154,147,193,121,172,121,152,178,173,205,203,209,226,146,63,36,60,41,39,29,45,99,91,132,109,137,207,249,231,190,197,240,211,203,210,240,220,155,193,192,199,173,153,144,121,171,174,186,229,206,248,226,241,200,196,179,216,137,38,189,198,176,161,199,237,242,170,54,20,63,109,67,42,19,3,74,223,239,212,159,138,203,200,165,168,174,202,160,127,160,199,225,246,224,149,184,208,203,224,243,134,57,133,162,197,225,242,244,252,188,56,27,18,12,11,23,29,11,47,11,23,45,69,59,25,167,252,225,225,212,160,113,52,70,54,103,73,67,75,84,126,116,144,109,42,32,34,70,46,58,73,62,143,150,169,134,136,155,150,120,102,80,57,51,59,33,22,18,25,17,11,16,25,15,22,11,28,19,38,35,33,17,25,40,49,18,41,58,23,32,38,25,13,20,3,20,18,23,32,13,26,26,71,171,251,223,129,135,142,171,167,250,200,73,38,12,21,7,10,32,39,8,16,23,30,5,16,31,20,10,13,9,14,24,28,17,42,20,17,37,22,10,23,9,53,18,31,39,10,19,4,28,2,34,51,41,44,47,53,76,67,73,75,78,72,96,68,53,42,51,55,82,70,43,58,67,72,71,111,83,68,65,37,21,57,42,43,38,70,92,101,121,73,40,60,49,72,93,120,113,91,152,234,250,244,255,247,217,240,223,219,224,239,254,227,228,224,249,229,225,227,247,227,218,233,232,205,227,241,237,214,239,230,223,235,225,251,229,235,216,212,237,237,195,234,239,233,225,228,243,234,239,223,243,218,212,223,216,228,237,226,243,251,241,214,125,21,3,3,2,17,19,10,11,32,9,27,21,19,8,26,9,21,19,8,10,12,28,2,17,194,181,183,184,188,185,179,218,194,166,183,191,207,199,171,200,210,183,182,179,189,199,181,181,183,173,193,195,204,179,188,211,216,192,207,190,189,226,202,195,207,197,187,204,190,206,187,168,196,167,198,169,199,200,174,173,175,195,178,166,212,164,167,197,207,210,207,201,192,157,164,218,197,205,218,191,199,212,184,194,209,189,197,180,216,172,206,196,207,204,200,205,205,205,186,198,212,192,204,200,210,199,202,188,192,181,180,201,201,195,209,183,211,215,186,207,185,185,200,194,189,210,177,213,204,187,229,202,189,194,204,210,198,186,200,198,217,204,189,200,221,199,197,207,212,197,195,188,195,197,194,204,200,222,196,188,209,180,182,166,198,207,206,219,199,194,204,205,203,201,227,180,192,193,199,204,196,186,216,210,213,210,184,189,186,213,188,218,231,192,198,176,201,210,220,192,235,190,201,203,201,199,184,215,208,194,209,187,207,203,202,189,188,178,203,219,204,197,210,191,207,205,201,197,212,213,222,176,197,206,209,217,226,199,210,212,241,210,216,210,195,217,207,189,195,215,204,177,210,205,214,192,210,229,213,232,215,193,211,220,208,225,224,230,243,215,188,200,182,171,182,164,178,207,203,199,201,232,215,190,218,227,230,204,208,218,225,225,209,197,207,211,224,223,226,200,204,238,222,229,206,233,235,202,231,214,204,204,227,212,207,228,187,236,223,231,223,219,219,186,119,97,152,234,247,209,162,167,156,206,172,135,97,84,140,158,155,170,196,200,163,186,182,162,30,61,74,24,44,36,71,113,40,158,190,178,232,246,240,197,209,226,197,193,231,215,233,165,147,204,249,147,208,144,149,212,195,228,240,201,239,235,175,192,161,162,252,161,98,239,207,199,176,217,250,237,175,40,52,36,71,96,22,4,24,58,227,251,214,146,166,255,221,146,176,205,239,163,95,196,253,241,246,221,179,237,197,202,253,254,211,83,57,126,197,231,219,229,238,91,34,0,1,25,55,46,25,17,9,23,53,54,44,37,138,233,244,251,255,234,214,136,27,69,26,75,37,45,42,80,125,124,161,66,41,64,85,161,169,192,180,142,140,133,126,76,45,24,48,25,23,27,11,31,25,11,37,8,10,25,21,11,32,31,0,20,10,5,24,37,18,72,54,56,42,70,53,52,54,41,40,18,22,19,4,22,46,5,24,11,46,29,47,182,240,179,143,170,155,155,206,217,149,26,32,5,22,21,33,28,25,34,30,37,34,11,24,19,30,34,14,10,27,20,21,27,27,21,44,9,3,15,18,13,26,49,12,12,20,4,10,25,14,20,22,16,38,6,15,17,16,29,41,88,67,82,92,126,91,80,89,86,103,78,100,74,46,42,93,89,70,43,18,72,74,49,10,40,13,55,76,104,137,95,125,125,118,133,152,137,118,184,241,232,243,219,224,235,230,217,234,223,230,242,216,227,222,246,240,232,228,215,227,221,203,220,232,241,239,229,213,215,233,247,212,218,247,239,237,238,253,234,224,241,218,224,204,227,240,239,209,237,233,224,236,233,222,235,214,217,223,220,221,224,228,122,8,11,5,2,32,11,15,8,8,43,25,26,27,4,1,10,20,25,16,0,11,37,10,13,198,185,196,192,190,203,190,200,187,198,185,192,188,184,212,210,190,188,208,198,185,175,194,208,168,166,197,188,196,187,179,196,184,192,170,163,200,197,195,186,186,190,214,198,198,172,170,190,179,185,177,200,189,186,208,214,189,184,182,191,186,187,166,187,179,176,179,208,195,201,199,195,222,193,182,190,194,175,192,209,188,217,208,172,197,184,206,208,224,188,171,205,198,193,192,185,172,197,194,199,187,194,177,198,198,193,186,195,192,186,180,201,208,190,206,207,185,187,213,196,205,177,208,190,198,194,202,202,185,206,192,210,200,194,192,182,191,181,195,196,188,197,181,193,195,196,195,194,193,196,192,201,208,189,216,197,213,178,210,188,214,211,202,178,196,206,214,193,220,191,189,197,211,211,203,206,188,211,172,185,204,190,208,204,202,228,196,189,223,181,183,198,192,231,181,191,192,178,210,194,196,200,195,208,205,212,208,178,206,177,200,207,182,189,186,209,184,197,199,195,193,214,198,188,197,185,184,194,188,210,206,190,182,182,210,198,202,197,205,176,232,168,190,181,225,179,196,195,199,206,203,203,231,234,229,208,194,202,223,229,223,242,239,204,213,205,188,174,184,189,174,197,215,209,211,215,231,207,213,228,192,206,227,209,198,213,216,193,209,203,230,208,201,230,237,234,226,225,232,215,221,239,205,219,217,230,202,226,201,197,216,240,240,226,224,227,227,206,226,179,125,142,190,230,235,185,140,104,159,190,158,170,161,79,109,168,189,171,200,162,134,135,144,140,59,59,79,18,50,15,67,44,63,202,248,254,235,213,246,211,249,246,230,245,241,254,251,139,157,240,230,237,208,170,185,237,255,253,236,202,232,227,190,248,223,225,241,158,211,234,253,229,204,245,234,244,187,59,56,42,120,85,11,46,29,44,193,247,249,143,214,229,243,181,196,246,252,136,129,235,251,251,233,223,222,245,218,244,247,232,250,160,86,139,221,217,248,253,147,38,0,25,24,20,43,18,12,5,56,57,77,40,26,126,222,251,252,232,237,249,208,98,63,56,49,53,54,52,33,100,143,114,152,138,145,147,171,173,180,135,92,55,30,35,20,14,8,26,15,29,16,49,19,44,32,20,14,18,38,30,28,25,6,17,27,25,24,50,33,51,48,36,28,50,29,59,37,33,40,31,10,0,28,21,21,6,25,43,29,26,39,32,21,205,233,163,142,156,158,187,186,198,95,13,27,14,14,31,16,38,27,25,57,20,23,20,12,31,15,24,23,9,8,6,4,30,36,56,48,12,36,19,15,30,31,34,36,28,26,9,8,25,17,25,25,9,3,6,18,21,23,23,53,23,32,35,41,51,80,100,95,87,131,119,118,102,79,46,57,61,88,40,93,98,63,31,42,63,33,53,126,105,98,88,113,144,135,141,118,120,101,237,245,243,244,240,232,243,229,214,239,253,241,250,248,225,216,224,214,224,248,214,229,242,232,221,235,231,216,241,228,206,245,233,229,228,232,222,235,241,238,233,232,215,236,210,226,235,234,214,235,223,228,234,229,216,238,240,208,234,230,233,213,208,233,128,6,12,5,30,22,10,20,8,3,5,11,23,4,0,6,27,2,13,10,21,10,9,18,4,201,181,199,213,194,191,205,191,174,184,192,180,200,192,211,187,177,186,183,167,192,207,175,174,186,194,198,200,206,166,186,203,176,178,207,181,194,164,195,199,204,197,188,182,195,188,198,214,171,214,183,194,211,196,187,203,200,199,179,168,189,190,184,205,193,153,193,213,210,207,197,161,212,179,203,167,209,182,204,190,202,204,229,196,214,204,201,189,219,202,193,195,184,187,188,206,177,168,201,167,210,204,207,192,216,186,202,184,197,225,198,193,213,190,181,194,199,198,187,195,197,181,179,175,209,215,191,200,208,208,179,238,209,188,186,200,216,209,203,191,206,205,187,189,197,163,212,203,194,197,207,188,184,200,214,226,198,215,210,192,216,181,212,213,177,201,214,200,177,193,189,221,200,183,212,206,179,193,214,196,186,206,212,203,174,201,205,189,192,194,203,187,201,189,194,179,186,174,225,219,196,198,218,196,208,188,195,165,184,203,193,180,199,198,200,194,199,174,168,185,175,208,198,182,206,173,210,210,220,191,186,193,209,202,200,191,189,207,207,224,201,199,195,183,180,219,195,200,183,214,185,189,229,212,225,220,194,198,247,217,185,146,162,183,179,189,185,201,214,212,212,217,233,245,198,216,225,225,206,226,193,227,214,211,233,231,216,221,233,242,245,250,209,238,222,227,216,245,253,247,255,247,240,234,237,248,248,251,242,223,246,250,254,219,243,253,246,242,222,187,192,150,175,228,249,220,145,116,156,215,219,241,232,182,170,179,203,177,156,132,95,147,155,124,76,65,69,17,38,34,40,30,21,193,255,237,248,228,232,242,242,242,249,231,252,233,220,86,91,195,232,161,216,170,188,206,206,194,176,170,231,215,218,236,191,230,228,142,204,246,225,188,137,195,233,238,162,45,43,74,95,100,45,25,50,37,171,216,153,92,150,216,158,124,177,173,159,93,124,222,216,215,188,204,175,170,146,165,219,220,214,112,67,129,183,199,195,149,63,8,0,24,37,29,52,4,21,46,63,69,62,28,73,224,227,241,250,252,238,218,150,88,41,92,37,78,55,74,65,107,153,124,188,150,141,124,67,52,36,35,8,28,3,10,22,8,22,20,13,25,44,28,36,46,34,9,52,70,40,57,33,25,22,16,31,21,26,11,54,44,47,34,67,40,79,63,66,50,25,17,32,29,17,20,9,13,31,18,40,21,62,29,54,223,217,134,152,151,184,165,190,173,64,34,22,29,42,8,9,51,38,30,26,11,43,12,15,28,8,35,32,26,16,6,25,16,48,7,20,39,13,32,13,22,59,40,47,37,55,27,46,23,8,39,34,29,20,18,6,24,31,18,18,5,6,12,19,19,32,40,57,70,60,60,90,108,99,87,97,101,91,64,72,57,31,61,33,28,16,74,111,121,122,94,127,136,132,142,118,84,139,244,231,239,244,231,240,232,222,217,229,207,241,232,249,235,242,222,234,239,235,219,237,231,236,229,229,225,236,230,210,204,225,240,223,218,230,225,212,220,212,240,241,206,237,238,212,245,242,222,216,239,203,210,244,245,217,243,230,203,223,225,253,218,217,129,2,11,28,7,24,1,14,12,28,3,6,31,2,17,4,2,11,21,18,18,26,16,16,8,190,225,195,193,191,207,203,174,211,204,187,177,173,185,182,219,217,202,214,185,186,180,211,176,185,186,202,200,192,203,185,196,199,185,201,198,177,202,157,186,189,186,192,193,214,200,176,202,193,197,173,201,201,206,215,192,172,182,204,194,188,168,203,193,196,199,193,206,194,188,209,223,181,189,220,200,199,204,201,191,211,199,190,195,192,202,196,205,180,187,213,186,170,200,203,184,211,192,196,192,183,195,203,184,206,207,202,188,205,194,203,198,200,187,171,196,225,207,204,179,215,224,222,222,192,203,218,199,180,218,172,178,197,202,199,205,202,215,195,208,217,191,209,208,199,219,212,199,202,194,197,187,194,215,182,191,227,185,201,226,203,202,181,180,204,198,199,217,182,213,189,204,222,214,196,196,201,196,197,205,205,200,178,185,194,216,193,187,202,176,206,192,184,210,204,197,203,177,213,196,220,202,192,170,181,207,211,213,192,184,182,196,192,206,168,174,207,209,191,197,189,189,198,184,191,199,178,178,196,223,185,177,175,213,173,204,201,204,190,185,211,187,221,220,204,197,193,210,208,199,179,186,208,219,236,192,217,223,215,109,14,0,77,173,215,221,240,246,232,250,239,240,241,220,249,214,252,228,242,232,255,250,251,250,253,251,252,254,251,235,255,254,246,252,243,230,244,253,228,247,255,252,255,254,241,255,249,239,251,254,252,246,242,220,253,253,255,239,250,241,197,149,150,225,251,215,203,124,147,203,219,235,226,206,221,166,183,185,151,129,89,117,135,105,84,103,82,105,86,87,83,54,77,178,191,171,166,169,163,161,168,143,155,185,153,179,162,63,67,166,118,92,77,98,95,86,71,56,94,96,123,88,107,124,93,153,95,56,107,71,113,98,57,31,67,86,115,77,38,58,92,87,31,36,42,40,36,52,67,10,17,43,33,43,46,77,53,59,68,52,14,12,41,99,133,122,88,63,11,48,77,39,43,62,83,69,101,30,49,44,3,24,34,45,17,14,37,63,71,41,18,92,198,227,247,254,239,247,219,192,137,88,86,101,92,125,116,89,108,125,131,114,112,30,13,16,14,18,13,58,29,11,31,33,5,24,43,27,27,34,21,49,35,56,20,57,24,43,22,56,51,67,37,44,30,48,27,37,39,46,54,83,65,107,160,94,79,45,60,12,22,10,29,5,25,30,39,30,29,33,58,37,69,224,174,140,154,121,204,189,147,155,96,70,92,66,55,34,35,4,14,17,17,32,10,28,12,20,24,8,5,53,5,13,13,0,14,49,6,30,5,15,25,31,43,35,36,58,34,23,48,26,42,41,35,21,8,10,13,9,26,29,24,37,32,27,17,22,11,27,18,12,29,39,14,60,55,66,91,106,109,98,95,58,25,28,50,39,26,59,87,122,101,96,97,108,111,104,105,58,100,206,226,235,255,244,230,236,230,236,219,243,227,238,240,241,210,238,213,229,226,242,224,236,231,234,222,205,229,199,232,240,215,225,238,233,239,249,216,231,207,215,232,248,229,217,233,243,243,233,216,231,235,243,245,209,228,229,248,226,215,225,206,235,234,124,7,0,0,14,12,13,12,7,19,4,0,15,10,7,16,0,12,31,12,23,23,14,36,0,185,163,202,190,203,212,205,205,213,210,200,202,207,192,204,190,171,184,210,212,198,191,191,187,191,173,186,175,215,197,207,170,210,182,189,170,204,190,181,183,199,180,189,179,199,160,187,191,189,190,213,187,187,203,192,189,189,184,179,207,181,206,216,201,202,202,162,196,209,176,195,180,188,181,199,188,197,179,223,183,208,206,206,190,202,168,170,203,194,200,166,168,204,178,210,225,164,205,189,207,203,206,204,202,194,207,209,182,198,200,187,218,177,207,198,180,199,198,180,190,197,203,193,182,202,185,204,205,180,174,186,192,194,192,231,206,224,207,208,187,170,211,181,197,191,186,206,215,186,201,209,214,182,199,197,217,193,203,205,211,204,213,203,207,226,201,199,179,197,219,191,178,206,217,200,205,198,182,208,202,204,201,184,207,206,196,223,165,185,196,201,210,185,192,188,204,161,187,188,191,183,203,186,206,172,203,221,190,189,172,224,196,192,194,201,200,197,206,180,176,180,205,178,204,175,208,191,191,228,207,227,217,209,198,213,202,209,213,218,219,217,225,233,223,230,221,222,215,207,215,199,226,245,250,252,234,234,229,188,126,74,107,194,230,250,238,253,231,248,247,241,248,234,236,251,253,247,255,238,238,247,246,246,234,238,235,252,254,236,245,253,240,251,253,243,247,248,223,240,220,206,232,189,188,237,212,214,192,182,182,178,170,158,171,178,228,188,162,155,137,182,107,103,162,154,188,174,90,76,141,152,186,165,138,142,113,120,120,105,117,101,76,76,127,116,99,108,126,138,105,143,115,128,98,91,77,47,69,92,59,56,28,18,85,42,36,75,86,74,50,37,70,66,64,53,28,32,7,48,40,74,48,54,15,13,63,32,36,30,54,54,63,29,21,23,2,33,72,63,58,70,90,74,22,45,50,39,26,77,54,43,38,11,35,16,46,23,60,72,48,22,3,32,80,131,146,124,73,24,17,30,30,46,71,95,68,36,44,0,23,24,41,52,27,25,14,75,46,92,36,36,154,242,235,252,250,247,192,143,144,92,84,120,132,137,151,145,115,153,147,110,141,99,18,11,26,5,21,16,4,36,29,34,17,31,18,35,29,26,34,2,48,32,38,21,66,38,62,25,60,30,41,45,36,37,63,38,26,53,22,52,50,18,94,141,93,55,22,18,14,10,34,26,28,43,12,30,43,29,35,52,1,109,229,136,106,154,181,227,158,174,114,104,79,122,99,72,50,38,12,32,37,14,58,15,33,21,38,15,10,2,28,27,26,6,20,9,37,12,25,27,46,24,27,47,39,37,35,51,57,64,54,33,35,5,42,24,16,40,16,20,14,34,17,11,18,17,19,21,39,11,6,4,31,9,53,17,21,42,43,53,78,69,95,101,89,93,92,39,79,96,98,126,86,65,75,87,69,62,22,81,227,231,252,241,233,242,237,224,245,208,236,231,239,239,241,223,229,228,219,221,247,215,241,229,223,222,236,239,223,235,221,232,240,238,200,227,242,236,245,220,244,222,244,235,203,209,229,231,226,248,236,217,209,222,216,220,238,220,233,230,209,225,240,222,122,5,1,20,1,8,0,22,4,9,13,29,2,11,1,35,12,42,28,5,11,13,3,3,29,196,163,185,205,158,179,190,200,192,194,212,194,182,195,194,196,201,183,205,179,221,193,203,195,172,184,191,167,178,200,187,188,182,205,163,207,186,205,166,176,217,166,170,166,211,204,207,194,201,174,191,203,179,183,208,204,211,171,172,194,177,190,218,195,182,180,205,186,203,197,196,179,177,210,215,198,184,211,189,191,201,205,188,180,214,200,192,183,205,205,201,186,205,193,194,199,179,201,205,194,220,200,176,194,200,210,197,181,199,194,184,168,191,178,189,186,190,187,183,204,201,203,200,193,213,206,195,208,194,214,214,187,194,198,204,190,211,203,194,211,215,210,211,196,193,234,210,218,211,176,212,218,204,204,192,214,193,215,204,206,181,222,200,188,200,209,193,216,199,179,197,188,201,186,207,204,203,197,215,209,183,198,191,190,201,175,192,217,200,183,187,211,215,199,192,190,192,179,179,206,181,202,209,188,193,197,189,197,201,210,197,184,184,165,189,196,204,182,189,202,245,220,216,217,211,204,207,205,234,236,216,222,236,216,249,240,246,246,232,245,248,214,234,229,242,240,232,232,230,207,212,229,241,236,210,184,190,219,223,200,177,215,244,235,228,229,225,210,169,190,225,204,192,169,145,167,166,187,174,167,174,161,149,153,150,146,176,147,139,125,141,108,138,107,116,99,97,115,118,99,87,129,98,118,77,108,73,81,78,93,66,58,96,64,38,79,76,72,51,64,82,55,63,59,49,79,78,71,85,57,74,44,71,94,54,81,82,99,77,85,99,98,95,68,113,102,108,67,109,117,137,118,105,81,57,57,79,71,51,58,61,50,18,44,65,53,46,85,112,70,78,68,59,52,51,22,58,76,89,53,87,41,66,27,38,79,58,28,54,54,90,105,58,44,49,20,59,108,63,60,40,91,58,36,71,77,45,46,88,70,88,100,59,51,67,58,50,115,70,65,77,5,67,31,67,135,131,170,88,54,31,16,58,87,98,80,28,43,22,43,62,44,28,17,16,60,42,38,40,40,167,245,245,252,251,252,156,117,90,68,62,76,128,168,163,154,177,141,149,150,109,133,96,29,36,38,46,25,32,31,9,37,10,29,55,33,1,22,8,65,27,9,14,55,69,95,73,56,37,43,87,59,50,70,93,87,67,34,33,31,8,83,56,100,88,63,44,42,6,12,19,15,9,45,29,43,37,40,58,22,62,12,111,192,92,128,162,220,243,169,164,138,112,112,111,76,59,41,9,52,44,37,37,28,45,1,44,39,28,33,41,35,28,16,23,15,14,40,20,6,45,11,25,8,20,26,42,41,59,73,56,29,37,43,47,31,26,18,35,31,34,28,5,4,49,15,42,38,29,15,13,18,26,15,20,0,33,8,14,20,21,55,40,26,58,102,108,127,103,87,114,84,99,62,67,53,70,69,51,11,167,253,247,233,217,243,225,222,247,234,221,209,227,235,218,249,232,224,234,224,232,237,231,241,244,222,241,245,223,236,235,215,214,232,224,218,226,235,237,236,225,240,239,222,227,239,218,240,244,242,239,237,234,241,214,228,218,230,226,231,236,232,248,240,249,121,17,0,12,23,22,4,17,10,22,3,18,49,0,10,8,15,18,33,7,7,33,2,13,2,183,197,198,212,185,191,175,213,175,186,185,192,192,182,172,194,190,201,173,197,169,186,195,204,201,180,156,196,201,173,184,194,197,176,194,190,185,192,198,162,193,194,189,184,153,194,175,189,172,193,198,200,225,170,187,192,177,185,181,183,177,212,182,199,177,183,160,212,179,212,181,185,207,193,201,188,181,202,195,188,226,190,178,222,191,204,208,170,210,200,214,183,182,178,193,173,179,195,187,191,194,212,209,218,207,159,204,199,194,187,204,201,198,179,225,206,196,214,201,196,188,206,208,178,185,179,207,205,199,185,183,174,175,183,175,222,199,219,198,192,232,209,210,201,223,198,214,191,185,206,193,203,203,206,204,209,210,189,193,210,192,203,207,208,194,208,182,186,192,175,194,217,210,177,186,199,189,212,177,198,193,204,193,200,176,181,190,197,193,204,206,192,214,189,201,199,194,191,189,180,187,206,187,185,179,233,206,206,178,204,183,195,202,188,187,203,211,196,191,237,242,243,254,254,229,229,248,231,250,227,244,227,243,223,239,233,239,226,204,207,212,208,222,223,190,199,212,166,177,192,187,150,129,137,137,127,131,142,179,126,145,167,144,130,112,104,77,84,118,75,92,74,78,50,82,96,55,65,92,66,77,64,73,85,34,43,44,46,44,63,67,47,60,14,63,72,41,74,50,34,54,130,97,65,68,68,61,77,84,84,81,87,51,96,77,66,81,49,79,67,82,50,60,77,40,52,51,40,57,90,43,74,49,38,65,73,86,52,70,117,107,70,97,55,75,98,83,88,52,46,93,66,81,81,62,108,85,82,82,70,73,86,52,74,58,105,75,71,100,80,87,90,53,84,95,81,89,59,58,56,77,37,33,69,32,70,62,57,48,71,73,104,60,62,46,33,74,109,63,95,47,117,49,33,49,58,50,36,51,59,70,45,19,45,65,95,58,130,64,21,18,29,84,84,21,28,127,119,179,126,22,20,7,75,89,70,21,12,26,45,77,36,20,18,23,42,69,44,26,121,243,243,239,230,255,168,135,153,150,112,43,82,85,96,121,117,134,119,154,114,77,116,56,16,20,19,13,18,16,45,41,45,36,49,24,29,4,5,7,9,20,12,15,23,46,49,75,43,51,69,45,41,66,50,89,120,101,102,68,90,92,118,125,79,24,32,13,22,27,23,71,80,44,23,30,40,46,61,53,52,85,37,119,164,114,173,190,240,224,173,158,154,97,62,32,18,61,37,16,42,41,52,35,35,68,54,32,24,57,43,40,12,43,50,48,34,12,29,10,25,22,18,7,8,37,62,52,54,101,69,73,54,44,43,19,24,50,34,33,27,45,35,26,56,21,55,39,31,12,19,57,26,27,5,5,28,7,0,16,2,7,22,24,25,27,23,54,83,84,103,79,106,79,69,42,76,51,68,68,71,183,241,255,251,226,227,235,246,229,228,220,237,213,232,251,235,224,231,227,242,226,238,227,228,213,242,230,224,245,230,231,233,233,241,241,235,232,229,230,234,230,240,236,216,229,226,236,247,227,231,215,229,235,224,246,239,238,243,231,236,226,216,229,247,224,124,25,6,7,2,3,4,14,1,18,16,13,5,23,9,16,10,9,4,26,9,43,7,18,18,196,175,183,187,209,197,197,172,208,188,168,184,191,193,184,174,174,179,182,176,205,174,195,202,202,160,177,191,187,184,203,189,198,174,178,197,194,167,177,198,183,177,202,211,175,190,200,195,188,175,196,203,187,181,205,182,192,172,197,211,201,172,200,207,190,192,183,201,185,188,178,201,216,205,201,203,196,195,204,197,198,201,191,199,202,220,209,213,212,205,190,197,206,186,193,203,195,203,216,195,206,190,193,176,225,188,194,198,196,200,198,212,208,195,197,216,212,224,219,187,203,200,173,221,206,196,181,189,213,218,189,179,175,192,199,226,201,180,187,209,203,198,189,197,198,210,224,200,162,174,216,221,180,193,219,209,200,198,171,216,199,203,199,196,203,189,194,191,195,195,181,202,171,214,169,187,201,193,193,185,183,208,207,193,196,183,235,208,182,218,184,191,195,191,199,183,203,201,190,190,199,202,191,201,192,226,201,223,188,199,172,180,172,223,201,192,193,188,195,188,193,200,204,189,204,202,180,163,162,183,148,179,146,152,142,162,127,122,125,97,135,106,109,104,102,106,106,120,96,131,174,73,56,64,71,105,90,71,88,63,45,70,67,59,57,35,69,59,28,71,63,83,47,45,76,85,70,75,80,36,36,49,25,57,55,53,45,62,57,57,64,56,42,62,61,39,53,38,72,48,62,107,102,48,56,62,58,68,67,102,96,100,109,142,86,71,70,41,69,58,89,39,51,84,79,67,29,65,49,53,33,62,63,62,56,39,75,51,51,51,54,49,59,37,62,49,56,44,42,95,55,51,37,80,57,43,42,66,50,71,63,52,44,13,45,70,57,62,30,60,48,52,75,82,65,63,53,47,57,16,56,55,52,42,45,126,64,41,24,56,33,109,77,78,32,3,63,100,72,105,85,86,72,26,64,39,37,37,57,26,63,69,52,31,61,53,56,64,73,21,14,15,80,60,63,24,45,55,84,115,78,36,46,47,46,37,26,33,44,47,20,23,21,57,62,53,73,35,117,206,247,252,253,254,168,163,200,255,252,213,77,13,12,4,15,38,58,87,130,88,89,123,43,20,34,30,27,32,35,28,27,23,30,22,57,45,22,27,18,15,25,24,23,35,14,45,38,61,54,59,53,64,75,86,85,92,109,116,96,110,126,109,92,74,60,8,22,22,23,65,57,115,81,100,64,63,71,59,57,42,62,38,128,138,111,184,235,244,189,131,150,116,50,28,48,59,71,50,32,17,40,44,60,69,36,63,22,48,53,16,44,38,67,18,36,43,29,22,22,38,26,11,32,33,33,50,71,84,71,65,28,13,17,25,51,52,49,37,52,27,28,44,43,40,14,37,48,47,25,27,27,18,19,4,42,25,24,41,44,23,29,14,15,18,41,25,34,20,18,59,65,79,105,110,81,52,68,106,97,100,160,223,244,234,244,229,245,245,197,220,247,244,247,251,237,216,241,247,221,219,231,212,234,228,244,243,235,244,240,243,238,218,229,217,233,229,212,237,219,238,229,236,226,210,251,240,242,222,224,242,224,240,226,230,234,214,221,223,224,231,243,219,241,244,220,112,2,8,5,13,9,0,12,23,27,15,2,5,10,18,2,10,21,2,6,17,2,10,16,6,222,181,194,190,196,195,182,194,186,186,193,170,188,195,158,191,219,192,190,172,191,180,210,188,178,202,201,204,166,182,202,161,193,195,187,192,177,193,200,193,211,182,176,196,182,193,177,193,180,175,196,182,201,194,183,198,207,178,179,185,203,178,198,173,169,213,211,206,199,182,210,190,169,184,182,198,201,199,203,205,200,193,203,193,208,185,197,210,195,227,208,209,213,180,188,186,192,197,199,196,195,188,205,216,214,216,179,182,209,202,196,212,217,210,214,198,192,227,214,215,213,189,194,198,202,189,197,221,187,219,195,205,194,188,182,201,210,205,205,178,202,205,209,222,196,208,193,196,191,179,199,178,203,214,198,205,208,221,201,214,205,181,199,216,201,223,212,195,197,205,185,201,193,180,175,191,181,168,203,207,180,185,200,183,188,187,188,176,189,194,178,195,199,192,213,189,207,194,203,183,176,187,193,172,190,203,195,194,207,185,179,197,187,199,183,207,218,123,75,62,47,42,37,44,35,37,56,28,48,40,27,44,24,31,24,23,21,37,22,52,38,26,46,12,23,47,36,20,53,105,112,84,9,29,53,52,44,43,51,52,61,28,62,38,53,54,78,35,56,29,64,77,46,65,50,45,56,58,57,54,57,60,82,46,79,66,70,39,28,36,41,68,70,71,42,73,30,57,45,55,16,77,42,32,52,43,44,29,52,91,65,38,39,41,78,67,47,41,37,68,58,37,62,45,36,25,31,20,21,35,42,21,28,60,60,32,65,41,31,32,19,19,29,28,24,12,54,39,39,32,48,28,30,43,65,40,7,23,54,21,40,42,11,50,27,33,84,28,63,61,62,56,44,43,44,31,35,44,32,53,49,10,39,13,18,56,86,35,13,28,14,14,48,45,27,2,41,51,73,116,74,104,110,31,66,30,48,47,9,35,68,48,28,1,10,46,50,59,23,40,19,12,56,13,0,28,38,5,42,50,108,70,40,60,17,21,58,65,39,47,18,6,44,59,53,80,21,94,208,245,252,237,245,196,170,204,243,234,254,185,67,13,0,22,26,65,42,25,97,122,84,122,28,28,32,14,58,7,21,31,12,41,58,42,50,65,62,27,7,16,15,14,56,44,17,41,14,42,34,32,62,68,55,73,78,85,71,104,110,117,120,121,143,93,66,22,19,26,63,104,43,118,157,142,81,41,23,37,43,19,63,51,131,163,174,237,223,244,171,109,135,93,48,46,137,95,85,57,35,29,40,10,28,51,70,64,31,59,59,80,35,38,48,52,22,33,43,22,9,15,14,36,35,17,33,39,35,43,52,63,44,17,36,20,32,45,55,60,44,61,45,57,55,49,49,44,61,46,60,42,61,38,43,31,39,28,17,26,29,17,43,20,30,18,17,16,23,17,4,56,73,98,85,85,89,121,146,163,143,115,156,196,232,245,244,241,234,243,235,239,243,238,232,237,234,208,206,228,248,216,233,237,244,239,247,248,240,246,225,205,234,237,232,235,232,219,229,235,208,248,209,217,245,206,249,237,208,237,229,216,228,231,196,221,229,245,227,229,224,222,231,229,221,235,220,96,7,26,26,1,4,17,11,7,6,12,12,52,9,10,21,19,18,17,7,22,8,9,1,19,208,207,213,214,208,168,169,194,196,190,191,197,195,204,185,184,181,183,189,229,187,187,171,194,205,196,170,190,187,221,194,204,169,186,194,177,204,174,180,206,179,181,196,190,185,168,167,190,183,195,171,174,171,199,170,189,189,195,168,192,166,179,197,188,203,194,186,203,204,206,194,224,206,202,185,166,154,196,199,193,199,169,212,193,221,186,207,202,172,181,221,186,203,192,184,171,181,188,211,204,213,217,205,204,209,214,217,228,206,204,199,216,198,206,217,204,201,205,214,201,222,208,195,202,174,184,180,194,200,223,199,192,175,193,175,185,188,174,207,206,184,195,201,227,200,188,201,191,216,232,199,175,206,203,206,191,214,202,197,179,215,183,191,193,198,189,183,180,201,196,190,192,221,208,205,181,208,173,209,209,181,213,168,205,184,197,199,207,183,176,181,187,204,158,188,183,203,199,206,191,196,168,187,176,207,196,195,195,209,186,201,194,181,176,186,179,184,128,18,1,8,13,8,13,10,20,24,14,7,18,10,3,19,7,30,10,23,16,32,4,26,0,19,18,14,5,18,7,18,36,92,48,38,19,2,35,21,38,13,31,23,26,23,13,29,36,43,20,35,34,32,26,53,55,42,36,41,38,20,40,39,43,25,59,36,39,17,57,59,14,27,61,30,27,14,13,70,24,26,26,3,64,68,17,7,31,4,31,57,21,17,24,33,39,10,30,14,10,12,22,24,24,26,41,22,45,49,19,23,33,33,13,41,35,25,25,33,11,31,27,18,14,24,33,68,39,42,42,22,53,34,10,36,18,38,32,39,27,30,24,29,16,37,59,26,23,74,57,41,46,72,36,42,36,75,27,27,29,25,20,19,43,45,39,78,48,15,17,29,62,75,70,14,41,38,26,49,44,77,102,74,111,94,46,75,44,75,74,62,38,100,98,94,82,79,72,124,112,95,131,107,105,145,117,123,118,129,109,60,182,171,140,98,24,25,10,47,47,24,46,6,41,57,60,47,24,79,210,244,245,244,236,195,170,233,242,237,244,252,138,63,68,34,54,114,66,36,29,136,103,122,105,35,40,39,20,32,33,52,23,18,39,13,32,60,39,82,38,18,27,45,46,26,17,23,28,20,43,40,25,30,29,52,59,103,89,91,99,110,149,140,159,152,134,100,29,10,34,104,101,45,128,152,121,66,36,13,4,29,73,83,48,138,201,225,254,245,237,165,138,157,67,49,141,142,104,112,59,33,22,8,26,0,9,47,71,62,77,53,51,45,49,34,50,45,27,22,52,60,42,21,10,21,29,23,14,12,45,68,51,77,73,75,37,70,52,51,60,34,77,94,75,53,63,59,51,50,29,75,56,65,68,63,59,68,60,50,24,20,16,25,42,13,43,29,34,44,23,40,22,92,78,101,96,108,168,144,158,143,117,90,88,135,216,209,221,216,226,231,232,232,242,225,237,226,217,221,244,226,238,222,227,237,233,197,198,239,243,239,236,234,244,228,243,238,217,235,228,240,210,236,215,223,217,251,234,228,221,206,215,227,237,222,239,236,218,208,234,233,241,218,229,234,222,221,118,2,6,0,29,1,4,18,4,35,4,27,6,36,23,25,4,6,26,8,11,5,2,26,27,189,191,190,176,181,190,174,182,203,184,196,191,160,180,154,195,215,208,202,192,170,194,175,186,191,176,207,183,189,198,207,194,208,221,158,178,201,195,170,200,197,206,199,169,210,210,209,184,189,179,170,173,165,178,192,197,179,197,186,196,200,204,197,205,196,183,183,192,189,201,202,178,184,189,219,169,170,199,202,213,199,191,194,186,210,193,210,188,186,203,214,191,206,208,189,203,201,201,156,207,202,189,209,204,209,217,203,206,194,217,174,204,206,195,216,212,213,215,194,210,227,210,186,209,234,189,201,213,208,212,185,198,207,209,209,204,204,212,200,207,225,196,224,192,185,204,163,193,181,203,174,211,214,191,214,223,194,201,198,222,205,184,195,233,214,204,205,192,187,202,218,186,216,214,206,194,217,212,191,197,195,184,197,210,182,213,217,216,199,200,189,194,175,190,200,188,213,185,185,196,200,186,212,208,185,174,173,185,202,190,208,198,160,188,212,207,220,150,86,58,49,80,55,36,52,70,25,33,48,51,27,29,16,22,26,25,2,28,12,14,40,20,25,8,6,6,27,12,6,2,60,43,9,37,3,18,22,15,56,11,18,39,23,14,39,19,30,38,23,40,16,29,39,27,14,53,3,51,16,69,28,14,18,31,24,26,31,19,2,14,7,9,3,24,15,27,39,22,14,43,18,10,3,29,4,7,11,2,2,7,12,10,19,3,27,5,30,14,22,47,35,67,51,66,54,51,30,29,33,38,62,41,11,28,36,21,36,34,24,69,42,61,71,54,27,17,49,28,29,70,25,62,86,41,30,136,39,130,57,128,66,122,106,156,139,120,134,136,64,110,127,110,103,140,168,144,88,137,168,197,192,212,216,215,169,119,159,247,239,240,228,184,135,198,232,242,201,91,66,72,89,84,105,62,46,50,202,247,188,170,191,249,253,196,171,159,210,240,238,253,245,218,245,248,237,236,234,235,246,255,244,155,50,43,16,34,50,42,27,9,28,43,43,51,42,65,213,237,233,249,255,186,159,215,232,230,251,249,208,89,85,79,72,91,65,97,55,89,132,105,147,89,12,21,31,47,48,34,41,12,28,32,40,40,31,34,55,40,24,53,53,39,70,34,26,42,21,45,69,68,73,61,108,152,141,106,114,112,111,124,91,100,101,72,75,52,50,59,169,101,95,95,57,35,58,43,12,24,22,97,81,64,138,227,232,237,244,227,166,132,154,66,53,156,134,111,101,70,13,18,30,10,2,36,4,43,49,74,42,50,50,44,50,50,36,39,31,44,39,52,49,12,51,47,51,30,57,55,42,45,70,73,74,57,52,76,70,67,51,83,72,48,57,66,61,60,53,57,27,40,61,48,36,41,40,26,47,41,9,18,36,26,45,28,50,10,23,11,43,15,86,98,121,89,116,156,165,162,118,105,77,22,80,195,236,235,229,238,226,226,237,219,220,241,222,196,218,217,226,244,226,247,229,248,226,242,225,222,237,232,239,248,234,248,251,235,235,211,239,222,241,231,237,226,225,236,217,236,221,246,238,242,204,243,210,222,242,242,225,217,229,228,246,241,214,115,0,11,11,26,25,3,24,2,22,9,0,4,7,4,3,17,19,30,30,4,3,2,20,13,213,194,206,219,198,190,193,184,170,193,186,207,194,211,197,192,191,187,200,204,208,204,179,179,194,179,180,195,190,209,184,194,202,199,213,196,206,199,198,214,199,204,181,188,201,187,186,190,197,188,186,187,187,201,208,193,219,185,214,181,182,211,206,183,193,189,184,205,200,206,221,216,217,221,197,198,224,192,183,219,182,197,213,207,196,177,240,180,204,223,220,168,205,196,193,215,204,179,201,206,185,222,210,199,220,199,224,204,193,212,225,189,209,182,215,210,217,199,203,189,195,173,186,216,191,212,205,202,201,213,213,171,206,216,225,181,170,208,210,196,235,201,200,196,211,196,230,193,205,178,209,199,182,192,224,235,188,199,193,184,195,214,204,194,212,198,190,191,177,209,208,200,196,215,210,197,191,201,204,210,202,214,197,182,197,190,181,212,210,189,193,199,177,195,183,204,198,209,219,179,193,181,191,197,202,201,214,196,189,182,209,189,193,180,209,216,192,214,190,228,247,210,236,222,221,201,214,226,243,219,239,207,231,212,218,241,207,216,218,208,212,194,189,210,177,174,193,197,154,135,161,175,174,175,187,205,183,181,166,175,182,163,183,147,172,174,155,167,184,172,160,161,172,179,189,186,203,163,175,197,180,186,176,201,194,219,194,239,216,213,221,222,233,227,232,247,238,219,235,226,215,175,214,221,234,252,239,217,230,206,210,223,203,205,195,230,228,240,251,234,130,126,237,238,180,58,64,126,214,248,240,182,157,199,210,241,247,234,243,254,238,253,231,159,74,61,64,14,53,47,53,207,239,236,236,252,222,246,248,252,243,251,246,255,244,233,246,223,131,119,157,171,113,122,237,210,214,236,252,255,250,250,252,253,163,137,231,244,253,243,240,225,248,251,251,254,224,127,63,51,31,74,74,52,39,67,220,236,251,219,202,223,252,227,136,173,231,240,247,248,247,210,239,255,243,243,245,236,240,253,191,81,45,11,9,40,53,44,7,30,40,68,69,79,7,105,238,243,247,246,165,174,212,249,248,246,234,249,135,25,65,61,60,70,81,101,51,101,138,115,160,80,15,26,33,47,40,28,38,13,24,15,15,13,73,46,61,69,78,59,84,52,97,62,112,141,161,117,129,150,158,166,193,154,151,143,93,108,122,66,36,52,45,52,47,58,72,120,125,75,94,52,53,57,40,29,32,39,99,134,86,54,110,222,252,244,248,255,150,164,147,53,89,157,99,127,74,34,19,36,27,22,25,3,22,12,2,14,17,49,61,42,69,52,30,43,48,45,63,38,61,89,61,74,78,71,68,94,66,60,87,100,61,73,73,51,30,50,42,62,53,81,51,53,60,38,56,55,29,5,47,31,21,18,26,27,18,13,26,17,20,26,35,22,58,28,45,43,39,43,79,91,113,95,97,129,127,134,79,80,146,179,203,246,245,240,240,230,241,240,241,241,230,232,255,242,232,253,230,240,217,233,209,246,235,247,237,238,236,251,241,233,245,242,237,238,238,234,241,230,240,237,234,224,230,245,228,222,226,225,241,235,245,229,221,241,236,245,242,225,232,228,222,244,232,94,31,7,26,33,4,17,14,19,30,11,13,0,13,0,7,10,22,3,27,15,36,10,19,28,177,190,184,179,190,166,179,179,193,183,182,195,215,200,182,191,197,206,205,186,203,199,185,183,178,173,181,175,196,195,202,173,167,182,210,189,213,185,185,190,194,184,200,198,185,201,216,187,188,180,215,199,206,191,185,182,208,200,182,203,203,192,193,189,185,188,205,178,187,179,178,185,200,187,195,175,185,198,207,181,179,186,195,204,215,200,196,194,204,220,219,212,193,189,201,220,197,240,224,217,204,199,196,206,207,206,211,179,219,195,195,212,218,186,194,224,200,210,178,203,195,201,183,209,206,190,205,199,193,212,212,206,185,201,206,198,199,185,205,199,214,205,181,229,207,220,192,185,213,215,208,191,183,184,191,183,190,211,218,218,187,219,188,198,199,195,193,208,196,196,214,211,201,182,166,191,192,194,201,193,177,191,203,192,178,192,204,207,189,186,189,212,180,171,209,190,172,209,178,196,204,171,201,206,192,226,203,196,203,180,183,189,211,172,199,165,177,205,196,235,246,241,252,254,254,227,246,249,249,229,236,236,253,242,255,249,247,250,249,255,240,246,244,245,242,254,244,242,255,211,248,251,254,234,233,238,216,251,249,253,248,250,235,242,233,244,254,232,223,255,246,251,229,254,238,233,246,252,241,247,239,249,248,255,244,255,247,234,253,252,254,254,253,243,238,252,249,255,252,255,255,232,226,245,255,253,234,235,253,253,225,255,238,243,249,237,255,238,249,252,134,133,224,247,221,187,203,251,255,248,242,253,235,230,252,255,249,246,252,239,230,248,238,184,90,81,65,10,16,38,70,219,227,237,255,245,250,237,236,239,252,241,242,221,241,249,255,226,91,64,156,174,70,44,226,242,217,243,234,246,226,193,185,175,136,133,239,246,242,224,242,180,200,248,252,245,177,76,58,89,38,78,54,40,48,31,210,235,202,226,137,149,244,185,155,218,242,221,240,219,213,172,241,227,221,232,239,238,248,215,86,14,6,1,39,59,35,9,6,48,55,56,58,39,81,130,233,223,250,155,155,222,244,254,232,238,241,174,82,51,88,66,53,67,91,59,47,112,111,111,133,38,23,23,18,44,35,36,31,13,23,33,49,8,6,44,36,29,71,61,62,91,142,180,157,155,182,155,163,129,135,183,170,128,100,76,59,74,60,46,47,41,12,20,29,48,61,61,50,60,55,49,76,79,15,34,35,92,169,116,59,47,65,188,240,241,229,192,103,106,84,37,35,101,62,62,67,59,6,4,15,22,28,12,50,37,2,13,41,3,23,38,24,41,18,37,24,24,23,30,33,12,15,43,50,46,44,46,56,83,55,64,64,69,75,51,70,58,67,61,59,61,52,47,48,44,33,15,15,15,0,36,16,4,17,23,24,0,24,43,36,16,31,30,34,33,25,50,49,55,101,97,102,84,56,94,72,71,30,112,228,246,230,235,246,235,238,233,246,240,234,220,246,249,230,223,242,224,233,238,239,232,224,221,211,219,234,226,234,240,208,240,210,231,234,241,230,234,243,239,243,209,243,251,224,226,242,238,240,210,243,230,235,237,238,224,231,215,222,213,218,224,229,206,217,128,9,10,1,4,20,3,43,16,16,36,11,16,9,4,11,0,11,27,6,7,13,25,33,29,205,200,181,197,183,187,203,180,199,201,178,197,196,205,216,219,177,180,186,182,196,184,207,173,176,175,175,181,181,202,170,202,174,174,184,177,197,185,203,180,198,194,175,214,196,186,174,195,190,167,189,199,169,216,190,211,164,196,184,193,196,197,208,182,202,178,174,179,182,193,173,190,206,185,183,180,196,157,191,200,206,213,189,190,208,216,195,206,211,170,196,213,210,193,189,202,213,202,206,192,201,224,197,212,196,217,204,190,200,208,205,209,202,195,197,203,207,203,191,198,174,189,210,216,199,170,212,214,188,199,189,215,207,215,208,186,176,209,196,185,216,214,182,203,194,191,194,208,192,209,204,188,205,196,192,161,215,208,207,191,201,204,189,213,191,189,187,194,201,183,175,199,200,199,211,205,196,196,214,176,187,211,178,185,190,180,202,213,188,181,195,180,208,177,174,169,193,186,189,195,169,167,182,195,180,203,177,209,193,188,206,177,184,201,183,175,180,211,183,218,205,208,215,220,196,218,224,204,189,217,229,205,227,209,238,204,229,238,228,231,228,216,216,223,227,219,233,208,233,221,227,236,243,239,245,233,237,235,250,241,227,247,231,250,243,238,253,221,242,231,253,248,229,246,224,250,238,255,233,235,255,254,254,243,234,255,248,220,253,252,242,231,254,234,232,249,229,236,241,235,242,221,243,234,249,224,236,255,253,253,254,248,244,246,245,240,228,144,212,189,54,82,199,248,254,254,251,242,255,250,229,213,206,223,214,203,200,185,216,190,155,183,188,101,43,78,60,44,52,33,24,167,184,212,215,213,237,219,239,252,255,243,228,244,233,229,237,221,197,188,220,231,162,105,207,180,175,205,196,176,150,213,190,200,124,193,249,236,241,227,242,159,164,233,226,255,168,96,88,87,56,50,87,41,50,70,232,242,171,147,97,169,202,122,152,189,235,224,207,246,183,159,199,185,218,222,248,251,229,85,11,7,11,17,36,54,34,14,37,72,44,58,39,91,185,227,240,242,142,122,223,246,251,253,240,249,145,150,149,142,115,63,46,72,92,100,66,123,143,145,94,24,25,28,28,3,30,36,38,16,24,31,13,24,19,55,19,39,63,45,42,78,151,174,176,183,163,163,124,96,103,129,137,78,101,62,41,61,26,50,17,16,19,16,10,25,53,55,41,29,13,55,41,50,33,53,136,197,172,109,61,28,23,126,245,235,177,100,56,20,34,45,32,22,47,63,43,34,29,17,4,12,15,30,34,17,36,10,26,19,32,27,9,23,14,21,26,49,29,5,27,22,25,44,30,61,59,44,60,83,55,52,30,71,95,47,31,37,70,89,59,49,35,17,15,4,24,22,24,23,15,14,24,3,22,20,6,14,13,26,31,66,32,35,39,69,45,43,41,72,101,98,118,103,67,24,72,65,31,89,227,218,246,242,244,240,229,239,225,229,209,243,246,245,225,234,234,231,222,232,232,230,244,225,246,221,248,236,239,226,218,209,248,239,254,229,226,238,220,220,233,192,203,206,246,241,236,244,202,223,234,223,223,243,214,230,205,245,222,221,209,234,233,235,223,113,3,2,10,17,12,12,9,10,35,17,15,41,0,5,2,19,0,14,8,20,1,10,0,32,194,166,200,172,193,181,194,182,181,204,188,206,189,223,197,191,199,163,190,179,188,178,184,187,191,194,171,217,195,195,189,191,196,195,175,190,178,207,191,174,197,190,181,188,202,204,177,195,178,208,200,205,219,188,191,199,195,193,199,192,206,178,223,185,199,215,224,190,178,188,180,196,209,202,188,201,204,176,195,199,173,192,194,175,164,228,205,198,216,223,180,186,216,195,205,194,215,206,184,199,207,201,202,197,200,204,232,219,192,215,217,206,205,195,201,202,203,205,204,218,212,210,223,209,192,195,192,199,200,209,197,184,167,192,192,225,191,193,200,185,196,182,174,195,216,207,195,195,179,208,203,212,209,210,205,210,207,203,200,208,207,210,209,200,174,184,220,200,207,177,192,211,195,187,184,208,177,199,196,200,194,178,207,214,198,201,183,187,195,194,202,207,173,173,179,185,180,200,214,209,192,189,193,206,175,203,171,194,193,201,185,164,193,199,194,179,189,205,175,197,214,205,187,187,184,189,209,194,187,204,189,187,194,193,187,197,178,190,211,204,197,193,207,202,171,205,207,201,211,183,202,188,207,202,182,218,216,215,208,207,224,200,218,228,218,227,202,216,229,210,208,237,192,233,208,199,204,230,214,215,241,214,238,235,225,212,211,227,208,218,224,226,200,244,233,221,222,196,218,202,232,196,216,232,214,229,203,239,232,219,211,194,220,240,202,219,171,92,159,185,86,70,151,227,177,184,161,148,166,165,177,120,125,133,112,91,106,116,135,110,85,96,114,107,107,165,116,170,154,118,85,65,20,25,57,27,38,43,71,59,123,165,174,141,138,118,141,206,201,247,251,224,244,124,125,122,61,108,161,176,190,232,209,215,133,197,218,233,239,217,248,162,162,227,221,252,156,104,63,66,50,56,101,66,36,56,228,241,178,138,107,180,167,61,101,168,198,193,219,214,146,193,202,180,213,233,241,235,154,39,20,26,8,37,39,43,23,9,69,50,61,33,56,211,248,196,242,157,145,222,250,247,249,242,227,143,165,234,201,147,115,78,32,26,56,34,54,127,89,122,97,32,36,30,42,68,56,29,16,17,42,29,13,16,33,14,23,51,32,45,61,47,92,100,135,207,129,97,67,72,89,114,130,80,114,86,58,63,8,38,15,34,18,30,15,9,25,14,22,30,40,9,27,14,14,127,189,184,131,51,61,22,28,80,175,112,125,44,26,93,62,27,45,92,90,61,55,66,42,12,37,18,29,33,23,15,38,25,17,24,30,31,19,23,14,28,26,40,31,11,10,0,34,48,36,49,30,53,67,63,21,34,32,67,79,69,22,10,23,35,33,20,16,28,26,15,9,36,50,22,20,13,21,20,17,44,32,42,29,18,39,62,82,42,51,61,47,17,51,81,73,115,110,111,73,51,91,89,36,59,195,212,216,249,243,243,222,226,229,234,252,217,216,237,236,240,244,223,243,244,250,226,232,220,234,220,244,225,243,233,216,240,244,238,252,240,242,237,231,230,235,233,246,226,217,226,235,233,224,227,247,241,240,238,222,234,224,232,228,246,243,223,224,226,227,115,30,14,11,34,15,15,2,15,28,19,16,31,25,8,0,23,31,20,5,24,18,23,17,18,193,193,184,212,197,167,191,186,152,187,193,181,170,174,209,192,186,195,174,174,199,188,201,195,193,206,181,186,207,211,202,173,203,197,219,177,196,195,218,174,201,182,192,198,192,188,162,196,206,174,207,174,193,195,187,203,202,178,182,204,198,183,193,168,189,215,193,199,207,201,190,197,185,184,205,209,187,199,176,207,215,207,223,191,202,203,191,199,205,191,191,183,189,202,211,190,183,182,207,218,197,204,205,188,214,208,206,189,215,193,204,189,212,188,199,191,188,187,180,207,204,222,158,201,184,189,188,202,220,179,193,218,198,182,209,197,199,210,209,182,194,204,196,201,219,202,196,200,195,211,207,205,188,190,182,216,201,168,191,191,208,214,196,196,193,212,172,204,191,195,189,193,177,191,186,222,188,204,209,190,180,179,185,191,199,188,183,192,199,171,206,197,202,186,189,201,209,176,198,193,180,172,207,202,172,201,192,193,182,184,205,182,180,198,174,213,190,167,190,201,211,215,179,180,191,192,198,205,186,188,195,198,209,192,198,193,196,194,207,185,195,206,176,206,195,192,188,212,196,183,181,194,214,210,199,205,197,194,200,226,225,199,197,206,198,221,198,211,171,203,210,201,197,196,206,196,223,209,195,199,213,207,217,221,205,231,213,218,222,212,224,202,222,204,219,200,213,231,221,224,225,186,193,238,195,247,216,201,207,207,223,209,204,190,209,238,181,107,171,155,104,86,116,119,99,103,76,95,69,81,94,93,101,100,104,133,83,143,174,106,115,137,168,147,161,200,198,192,194,183,130,59,44,22,25,17,22,28,0,1,27,21,23,11,12,18,49,73,69,145,192,182,202,110,120,127,110,170,236,202,255,206,183,168,109,223,221,249,228,213,221,163,162,202,206,252,164,102,60,72,62,34,108,73,34,55,225,234,135,192,194,228,129,107,172,168,245,211,230,208,159,209,195,166,222,253,229,198,52,19,19,2,24,56,28,30,24,52,57,71,23,35,177,239,210,214,173,112,181,250,251,255,250,245,139,138,215,245,244,161,71,72,90,31,43,24,67,99,103,105,68,11,46,42,25,45,23,14,18,32,8,0,25,34,17,47,13,25,6,24,31,48,44,19,108,179,113,82,80,89,85,105,54,36,54,57,58,45,12,29,28,28,44,24,9,11,25,7,17,23,50,37,33,18,25,81,147,98,64,68,25,19,25,30,61,92,49,8,60,140,92,107,211,241,181,133,129,112,55,24,14,45,6,22,4,49,31,22,23,26,27,22,41,24,40,25,15,54,33,47,38,34,36,17,26,24,48,26,57,33,55,20,20,72,77,56,27,19,34,34,19,18,15,20,39,25,54,8,7,31,28,29,38,24,53,133,83,48,43,30,74,85,63,87,59,67,54,18,41,99,107,80,94,100,58,68,66,70,50,68,175,237,249,255,227,231,243,238,239,245,225,230,241,251,252,231,230,234,233,223,226,228,252,242,247,236,229,218,243,230,241,235,240,239,237,241,240,222,219,235,242,242,248,233,236,233,225,236,217,228,233,244,217,250,224,215,217,231,228,240,242,219,220,230,238,119,0,23,21,2,7,17,18,7,1,5,26,19,10,4,8,14,24,12,19,2,7,34,32,9,177,173,191,184,201,204,180,186,192,195,176,193,190,178,204,167,163,204,156,194,218,186,195,196,174,183,179,180,193,203,193,176,194,195,192,168,189,188,205,197,202,186,183,193,187,220,209,210,193,216,195,197,187,192,181,188,187,173,199,190,189,208,204,180,173,213,193,205,210,188,214,198,199,196,203,209,187,216,179,177,210,202,198,206,185,208,192,196,172,201,199,194,191,203,202,201,189,188,194,193,222,188,205,185,228,219,196,215,204,216,216,197,188,222,182,201,194,199,198,177,207,198,191,207,185,204,181,204,199,216,195,205,186,209,205,175,208,180,213,198,213,180,215,205,193,189,184,195,178,187,193,210,216,196,169,210,158,211,212,185,171,200,190,176,193,200,202,184,194,197,183,199,187,186,198,183,170,191,205,179,204,184,211,206,189,192,221,191,214,197,178,191,183,182,200,185,206,194,208,167,188,178,184,193,174,207,198,224,191,169,181,166,177,188,186,196,202,191,178,173,206,192,190,218,185,192,215,193,205,208,200,179,214,182,194,203,187,215,208,180,210,206,212,189,173,199,171,205,225,211,202,219,211,212,214,223,197,197,197,199,233,182,205,194,207,202,195,210,216,194,198,234,217,205,199,215,197,185,202,200,226,192,213,213,185,217,194,227,217,190,221,226,200,223,205,212,201,198,243,198,204,186,203,240,229,219,218,204,211,207,198,212,206,211,242,233,204,140,125,127,82,88,99,94,86,112,105,127,97,72,77,65,82,149,157,206,196,192,204,152,76,137,241,199,159,192,178,125,154,149,98,80,157,217,206,205,181,129,128,141,64,11,7,25,107,77,32,39,22,36,36,32,49,40,106,144,115,244,255,222,254,169,164,154,113,216,240,243,234,193,237,188,180,220,213,255,156,87,73,48,60,22,104,83,42,38,191,187,138,210,206,196,97,143,215,191,249,208,210,173,147,210,190,179,231,248,193,71,3,31,9,12,41,26,27,23,41,57,81,32,24,167,247,245,230,158,145,171,242,246,237,248,245,194,136,217,242,239,193,101,105,109,118,45,38,16,111,135,80,142,56,25,59,51,25,53,34,11,16,19,16,19,28,12,31,24,12,11,48,27,59,24,54,64,141,121,54,70,61,66,55,58,49,42,40,77,34,28,63,27,63,15,27,30,7,30,43,20,14,2,25,16,28,28,39,79,85,45,61,58,15,21,3,5,59,119,59,67,124,155,74,91,183,223,194,90,155,113,71,27,43,45,38,42,11,43,20,35,39,29,20,19,33,57,10,8,45,73,69,57,95,42,45,28,30,49,28,37,49,37,18,19,53,57,82,80,73,51,55,31,26,12,27,28,16,35,5,31,12,14,37,6,15,72,198,172,124,101,60,49,66,73,93,66,77,50,55,45,38,78,103,106,111,112,85,58,78,99,35,41,200,227,215,238,236,238,220,206,245,246,243,222,242,235,251,244,224,232,223,236,248,236,230,251,221,230,217,223,240,225,237,235,232,219,244,230,243,226,238,222,214,225,224,208,240,234,248,219,221,227,196,246,231,234,218,227,249,221,242,217,226,226,241,247,229,111,0,1,4,8,0,11,35,11,1,14,6,7,6,1,2,16,4,9,12,9,56,16,21,20,189,185,174,175,208,197,186,201,207,173,196,185,202,163,196,188,172,177,183,194,192,198,194,205,210,202,181,183,183,173,191,207,176,195,174,202,164,209,203,177,195,195,193,228,180,170,197,209,205,190,208,197,183,198,180,183,215,209,181,196,192,192,198,203,204,202,179,178,210,226,200,192,178,191,182,208,187,188,204,187,180,196,181,189,204,197,218,175,187,189,199,203,189,208,208,168,211,205,216,206,212,215,213,201,219,200,207,191,204,194,209,218,219,210,200,212,215,178,179,230,176,178,221,203,205,193,179,212,187,201,189,208,186,205,176,195,195,198,198,176,186,205,187,169,200,215,212,210,211,197,164,203,188,189,193,183,204,204,178,192,200,181,218,164,203,190,200,204,201,193,207,190,173,200,185,195,186,205,182,184,175,191,210,182,198,183,174,186,185,195,194,182,162,177,186,192,179,209,195,173,196,189,191,217,202,198,213,210,187,197,183,201,187,207,190,206,183,200,197,206,196,193,198,177,187,206,187,200,165,208,216,213,187,202,200,211,200,229,195,201,202,194,191,195,204,202,202,201,207,185,198,223,218,225,211,197,199,214,218,214,229,191,180,206,216,185,212,204,206,212,223,193,217,186,203,219,219,211,207,198,219,206,219,205,222,200,218,205,206,221,213,210,219,209,205,210,205,204,223,211,233,201,198,246,212,200,204,207,210,223,217,207,207,212,233,185,135,96,95,75,70,59,73,47,71,147,211,253,204,94,184,187,219,250,254,239,239,213,221,148,88,129,198,173,103,138,108,125,122,93,82,100,229,252,237,238,236,244,208,194,108,108,118,232,232,216,179,94,54,71,50,35,20,25,44,30,61,166,233,225,178,120,176,154,161,229,232,238,225,180,240,188,155,210,195,251,153,79,51,47,70,47,115,134,69,51,121,163,157,246,206,111,80,142,211,205,252,214,234,147,162,223,196,191,238,240,109,7,17,24,16,32,42,33,29,34,62,77,28,29,138,230,253,249,172,111,206,220,250,246,236,238,192,143,204,246,212,246,115,104,107,92,82,30,89,103,128,153,133,139,74,9,19,10,44,40,25,26,15,41,9,13,12,23,15,32,31,20,38,18,54,25,66,80,112,77,66,36,62,42,29,36,24,33,33,26,14,40,37,67,41,47,21,42,22,39,19,7,38,29,34,17,32,12,74,103,78,34,59,11,32,9,19,31,58,137,115,108,140,140,80,33,52,63,77,43,60,77,77,36,72,50,66,66,32,44,9,26,28,26,32,30,48,43,15,45,38,68,59,20,48,32,38,43,71,45,32,48,50,93,72,67,71,106,87,82,81,88,72,75,43,44,29,58,24,36,34,35,11,18,39,72,86,116,146,109,94,146,132,97,73,68,82,75,50,22,47,11,54,94,81,74,103,94,76,83,66,83,49,19,214,233,252,246,238,242,234,212,251,245,222,227,227,238,248,239,228,238,241,222,246,239,238,236,224,247,210,229,225,220,244,242,238,241,245,234,239,246,231,226,226,217,217,241,212,203,242,227,229,244,216,233,249,240,240,222,208,230,248,218,240,239,232,222,230,111,0,5,3,0,24,8,37,11,6,6,11,8,4,5,6,2,6,9,14,31,6,17,30,0,193,189,194,189,192,195,177,187,187,205,206,164,199,209,186,180,201,182,204,196,173,181,189,190,178,202,191,184,192,185,222,187,191,187,204,178,188,203,203,191,170,175,178,193,207,208,187,177,167,188,197,203,191,205,204,199,167,198,217,179,190,203,194,207,184,206,197,189,196,214,193,178,195,198,197,192,188,194,191,209,216,195,185,216,211,192,196,189,181,197,219,199,175,191,188,211,196,208,205,214,195,219,217,189,203,202,184,187,194,208,215,221,210,221,194,199,223,197,189,188,185,196,197,186,192,213,203,188,210,189,201,194,206,181,190,209,198,196,215,189,174,196,195,182,194,206,182,188,167,187,174,193,192,203,181,196,190,185,190,174,198,198,181,191,196,188,198,201,196,224,166,181,183,219,186,192,196,196,203,195,197,213,192,205,167,175,190,192,192,194,208,197,158,190,180,227,187,194,182,194,191,200,163,186,184,210,210,213,173,211,200,198,207,183,197,208,186,179,190,198,182,185,202,181,172,192,205,199,193,194,212,192,220,219,202,204,216,208,199,196,194,180,209,192,225,197,205,207,202,203,200,198,180,220,206,194,203,233,190,214,205,217,227,190,199,207,188,222,186,198,234,213,231,222,194,206,209,207,195,220,213,201,210,220,210,202,223,205,218,234,218,208,221,225,214,217,205,226,224,209,233,223,228,251,227,248,226,201,216,193,214,221,203,181,229,183,96,83,65,56,45,34,35,35,76,134,201,222,204,197,248,255,238,230,247,239,184,107,123,125,59,58,77,69,77,62,50,36,29,63,41,55,78,100,111,112,156,181,137,88,62,66,90,211,243,226,175,151,143,107,112,130,162,107,54,50,39,67,143,214,194,170,202,152,159,231,227,230,227,221,235,185,180,204,238,252,159,93,87,69,62,30,90,118,93,86,133,122,167,239,179,140,124,221,211,217,229,207,211,176,193,226,204,233,229,138,10,27,26,28,42,54,10,17,35,43,58,43,8,103,242,252,255,219,88,138,250,247,239,227,243,167,126,194,243,238,250,176,67,110,72,32,17,20,51,103,122,138,122,151,62,37,46,35,40,24,60,46,31,25,26,16,32,10,41,13,42,46,16,38,54,22,49,110,76,83,30,78,53,17,34,32,26,16,4,28,30,53,52,38,46,52,58,10,2,32,27,37,27,13,32,38,29,29,86,115,85,40,32,10,25,53,44,50,71,124,161,110,61,109,104,74,51,19,31,71,43,63,65,70,97,94,66,55,41,32,38,24,4,13,41,1,23,32,21,36,58,55,46,28,25,35,46,37,93,71,68,96,87,128,138,102,87,48,61,57,72,68,81,82,108,118,84,96,80,71,41,37,30,53,65,62,131,84,40,44,56,45,67,29,63,33,70,38,57,53,31,33,44,88,89,71,93,90,95,92,74,71,20,46,216,242,250,250,237,211,241,230,238,239,226,239,219,228,231,223,224,238,237,218,246,231,250,224,230,209,223,236,231,207,249,209,247,219,219,227,211,212,241,247,222,213,234,232,201,234,225,220,226,248,232,220,230,244,241,237,226,231,231,206,248,232,249,225,220,119,1,0,2,27,32,13,18,38,17,28,10,1,2,7,5,10,35,6,1,0,2,10,7,17,196,179,180,202,194,190,178,186,198,168,198,184,192,181,181,181,161,193,215,206,180,175,185,200,184,166,176,175,202,195,209,194,212,179,185,175,195,191,221,194,171,175,202,207,186,191,203,189,205,201,200,210,196,182,188,198,205,177,198,196,172,190,208,188,202,196,183,206,200,195,188,201,197,186,206,210,181,205,183,200,176,184,190,176,195,181,184,223,201,196,184,206,208,178,205,191,222,206,204,210,200,204,201,204,206,230,222,214,185,192,205,178,189,190,194,200,182,192,197,199,206,178,220,183,198,169,178,211,200,184,209,194,182,193,181,199,211,188,183,179,182,199,198,190,173,192,196,208,205,213,200,195,189,212,190,178,220,153,184,193,206,203,183,193,213,219,194,187,187,199,219,195,204,203,217,192,165,197,187,191,188,191,174,153,172,179,195,171,172,192,179,165,210,202,198,203,200,202,172,210,200,209,193,189,203,191,192,186,183,201,185,190,166,180,190,217,202,176,167,173,182,188,212,187,206,189,192,208,195,203,167,191,206,159,179,184,192,213,200,194,186,211,204,178,211,183,207,220,185,202,211,219,201,195,223,218,208,201,220,208,205,202,184,199,190,207,181,206,200,233,219,211,194,213,204,209,211,225,201,244,217,228,217,218,217,222,196,215,222,212,193,210,228,191,211,228,222,199,199,200,232,163,138,222,229,225,214,215,232,205,205,197,230,200,231,152,46,55,11,62,111,84,106,97,70,79,65,86,71,66,136,137,140,142,119,57,14,6,18,33,45,34,0,16,39,23,39,12,12,19,20,19,38,10,34,24,15,25,48,33,39,31,86,81,66,90,89,105,132,124,122,142,186,152,117,84,57,50,171,222,190,215,191,143,195,225,230,233,214,206,228,208,177,203,207,255,148,52,60,53,81,13,63,47,40,87,102,124,157,233,192,135,179,216,218,209,226,203,206,161,188,205,221,200,132,37,27,26,23,34,51,35,18,28,71,47,72,33,83,233,244,250,230,144,141,206,215,245,248,237,177,137,174,236,236,246,172,88,106,109,15,58,108,119,46,89,110,104,132,119,38,15,29,39,25,27,48,28,33,4,26,10,32,11,33,59,15,13,35,68,36,61,149,179,157,97,54,70,79,76,42,6,17,21,27,11,19,28,37,77,97,82,57,43,62,24,48,17,29,31,50,24,35,69,84,83,77,32,31,25,30,78,63,59,75,78,160,129,106,92,95,71,91,86,62,29,61,83,68,33,83,90,57,47,45,40,33,12,18,6,19,14,31,33,44,39,41,60,12,10,20,26,58,69,85,106,109,105,59,97,72,48,44,33,44,49,30,76,72,95,70,90,139,105,125,108,82,59,71,127,135,107,66,53,23,28,29,40,25,34,37,44,38,59,37,52,25,35,59,69,99,86,96,96,101,63,24,53,58,137,255,244,240,251,221,243,245,226,243,228,233,227,220,237,250,234,255,213,237,246,228,243,232,234,251,230,247,241,230,248,223,240,254,227,244,215,238,222,226,230,243,228,247,243,221,220,241,232,245,232,205,237,227,227,231,211,213,240,239,249,245,240,227,219,249,89,13,1,18,4,10,2,20,14,21,0,4,3,0,18,8,2,22,12,16,19,4,25,25,8,195,176,199,181,167,165,193,168,179,188,182,195,200,171,173,184,211,183,184,190,165,200,153,152,216,166,195,169,199,198,172,186,187,192,185,185,179,194,201,190,199,203,190,191,189,194,201,182,164,227,193,216,212,188,188,176,190,195,199,206,191,196,185,204,203,227,166,185,185,219,180,184,200,209,183,198,201,187,179,188,202,182,193,202,182,201,194,196,203,215,199,214,187,194,193,197,207,216,213,205,196,189,201,193,202,193,190,190,195,197,208,207,210,225,190,177,206,186,195,209,188,205,189,197,189,176,188,209,203,190,188,172,168,189,191,184,184,192,205,193,211,187,176,181,204,196,159,177,205,195,195,175,200,189,206,205,189,197,179,196,207,174,193,180,185,190,203,217,208,188,190,208,196,216,196,198,185,199,195,160,175,218,192,186,184,192,189,193,199,179,206,180,216,196,196,195,198,181,205,179,195,212,185,182,177,190,180,189,192,203,166,191,199,182,204,194,185,181,194,170,174,199,189,231,219,186,183,170,206,177,182,202,221,185,178,171,176,194,211,187,200,211,197,183,185,192,199,187,202,206,189,223,187,191,201,193,191,228,192,208,193,200,191,240,199,215,194,198,218,241,219,224,213,215,199,223,206,192,218,210,208,219,230,226,200,223,202,217,217,204,230,190,229,203,204,220,204,210,206,209,205,97,34,146,217,200,217,206,212,212,205,206,215,210,191,157,50,48,63,101,98,44,79,70,65,52,54,34,26,42,34,8,29,3,60,50,10,28,27,8,30,3,7,20,16,18,11,21,42,46,19,32,39,16,9,31,15,24,15,41,16,48,50,73,50,60,54,52,88,42,88,65,65,87,81,71,26,50,186,247,182,203,180,143,185,232,233,216,234,201,235,202,162,206,213,254,137,78,103,72,46,20,24,65,21,34,96,133,223,238,172,61,142,202,174,229,255,221,179,150,193,222,223,153,67,0,5,19,29,43,23,13,23,83,66,36,13,68,205,238,251,227,127,178,195,232,233,242,239,185,123,177,207,239,251,189,95,73,108,126,24,89,128,61,86,89,98,128,144,99,25,30,51,37,58,4,31,34,19,31,45,24,15,20,34,11,37,58,70,36,68,171,190,163,153,122,122,106,107,57,6,41,9,10,34,24,32,29,45,43,67,79,56,71,32,41,3,13,15,22,47,21,36,42,51,43,16,21,37,46,33,54,38,21,60,64,106,162,113,115,96,88,103,99,64,64,73,65,83,89,55,31,41,34,38,36,32,27,19,39,14,30,44,33,15,21,46,32,28,26,33,36,54,70,55,62,35,32,21,38,38,32,39,9,41,55,27,57,56,50,69,70,45,103,119,106,125,102,120,128,84,84,67,18,38,23,32,27,22,5,43,43,72,67,44,59,55,28,31,101,119,103,84,115,111,59,19,65,110,181,248,250,245,243,236,246,213,241,217,239,217,232,234,235,246,240,239,233,247,252,237,242,236,225,232,209,219,240,222,251,231,228,241,215,218,240,221,226,239,229,238,218,224,231,222,248,235,237,236,240,231,213,213,232,243,219,221,238,237,239,235,245,249,237,242,135,8,5,37,0,1,10,11,14,8,10,0,24,14,13,9,3,30,37,6,9,12,21,8,13,186,172,191,194,190,179,208,164,169,185,183,199,170,187,178,166,178,199,193,194,218,183,190,217,181,197,214,183,197,184,179,177,172,175,199,172,180,188,182,192,190,183,172,182,196,197,190,221,204,181,184,198,188,160,165,198,156,206,209,185,178,186,195,197,175,187,211,189,199,209,197,195,191,190,206,202,184,191,209,182,199,174,206,181,192,199,189,194,188,187,200,182,191,161,176,211,208,168,213,203,203,201,186,207,191,194,197,226,218,232,199,208,219,215,209,196,196,183,198,187,208,188,184,195,216,173,219,189,214,174,190,205,174,211,208,194,147,193,181,189,213,214,182,197,201,193,198,181,181,204,214,185,196,198,204,178,198,193,193,211,173,185,203,188,202,196,188,179,189,173,184,174,185,211,194,190,164,200,199,171,201,196,182,176,200,179,200,179,214,163,186,176,192,203,186,183,174,185,175,174,216,196,170,164,204,178,176,182,196,193,185,191,176,186,173,208,188,225,173,189,193,187,178,176,182,190,194,202,165,198,191,195,225,213,207,209,178,216,187,186,189,204,202,192,186,212,216,203,202,216,207,189,194,197,201,209,208,201,225,218,188,215,195,225,214,210,177,199,213,221,204,213,216,206,210,208,205,220,228,204,237,228,194,200,242,207,225,206,184,213,222,231,227,222,206,211,214,206,224,214,185,75,28,89,193,205,212,197,203,201,219,198,199,211,211,183,124,75,44,87,32,49,52,49,54,75,56,37,29,23,34,44,60,19,72,45,23,15,12,36,10,19,50,20,10,12,26,18,13,35,26,13,10,39,10,18,26,30,19,14,14,32,24,63,87,82,37,48,62,61,75,25,48,48,63,69,15,76,197,200,138,214,174,170,211,209,248,218,239,200,216,237,167,190,238,250,173,71,81,59,41,11,106,84,31,41,132,209,248,253,146,70,170,211,180,218,238,215,130,158,230,225,203,74,23,11,22,4,42,42,28,31,24,74,71,57,45,160,239,244,224,158,158,235,183,224,255,225,188,135,160,229,252,217,246,137,66,102,95,95,38,63,82,32,87,82,82,103,144,76,26,34,35,28,44,48,25,29,26,10,27,25,39,22,17,33,107,132,148,118,156,205,171,145,129,128,135,90,67,37,33,51,22,33,30,25,43,39,75,64,75,93,94,86,29,25,12,24,44,50,29,37,13,11,41,3,29,26,16,26,17,20,21,52,70,50,65,92,65,126,126,130,84,111,116,120,82,64,79,58,56,55,50,38,18,52,29,29,36,28,28,33,23,17,53,17,15,71,67,45,45,40,14,24,24,25,2,13,40,26,23,18,33,40,27,41,53,49,33,64,18,51,47,84,60,88,63,66,89,67,76,31,42,47,16,17,32,28,33,37,70,84,77,79,63,66,50,52,51,92,132,97,69,101,109,76,56,116,107,90,147,227,223,246,249,255,229,244,247,230,228,223,247,212,238,222,237,218,237,231,251,214,237,236,207,237,231,227,212,228,236,234,220,246,197,229,242,235,237,237,222,214,230,244,214,216,235,249,238,244,246,245,206,227,243,231,230,220,240,237,215,239,240,238,225,134,4,6,1,9,30,19,12,15,2,20,3,39,8,14,9,18,27,21,7,9,19,20,0,29,192,192,172,189,172,182,174,195,187,165,191,170,180,177,170,186,170,192,171,195,212,170,201,158,202,172,179,216,186,191,170,167,201,170,180,175,187,217,195,209,212,193,177,169,173,166,200,199,179,191,206,204,204,191,186,191,186,211,184,205,197,185,194,197,185,194,163,205,189,184,205,181,212,185,203,217,200,181,195,218,211,181,200,175,185,173,197,181,212,198,199,187,184,203,211,190,202,185,200,185,210,209,182,180,210,219,209,197,210,208,182,183,196,172,177,204,181,192,197,196,188,182,192,201,226,202,210,214,193,212,202,196,190,204,204,189,194,177,167,181,193,187,178,201,193,184,175,175,187,196,203,183,215,216,184,188,211,188,194,192,192,184,208,185,203,179,181,186,190,209,209,205,192,210,184,173,181,185,189,205,224,173,191,191,187,180,186,200,194,192,193,198,184,176,161,187,203,168,189,197,193,195,207,194,220,190,204,213,217,189,176,196,215,182,183,154,189,172,191,171,181,169,206,208,218,211,194,216,183,209,210,183,176,231,195,213,231,217,188,194,179,181,197,200,182,174,201,178,190,199,210,202,218,205,205,220,221,208,233,192,222,211,205,179,216,193,186,208,204,210,207,221,217,191,217,197,226,203,215,238,209,206,226,191,209,210,218,235,182,216,206,205,230,218,210,216,204,217,214,230,216,116,78,165,208,218,230,206,220,221,210,189,200,212,206,239,108,23,9,44,59,27,54,26,35,38,54,54,40,25,32,71,47,40,71,42,10,27,19,16,1,20,24,6,22,9,29,60,16,31,1,31,10,38,22,22,41,20,6,21,46,49,37,58,84,64,82,55,62,62,64,49,74,56,72,40,17,120,176,174,193,242,157,163,221,205,241,237,206,209,219,218,174,184,201,246,127,61,109,59,69,67,93,92,28,55,177,243,248,201,145,163,240,213,181,187,229,211,142,167,233,227,132,44,0,17,16,61,36,38,20,43,61,52,55,11,164,252,252,248,157,153,232,230,211,211,242,178,124,192,231,250,255,235,239,96,73,105,86,72,29,52,89,105,92,66,93,129,129,62,54,11,16,27,42,20,34,26,9,53,14,57,91,40,53,127,215,246,194,129,150,195,137,123,90,101,65,45,24,10,9,24,24,34,23,36,34,50,38,74,79,114,102,72,69,45,11,64,41,25,28,49,34,37,25,6,13,21,1,13,33,1,25,30,17,22,23,19,67,104,118,146,111,85,96,91,95,87,94,73,45,42,38,44,9,24,19,24,33,29,14,43,19,51,11,31,18,24,43,44,67,38,43,12,34,3,27,10,7,26,6,41,10,31,41,23,19,43,34,48,21,47,62,31,41,35,34,57,49,32,50,26,8,31,28,2,13,41,49,48,90,95,76,74,52,59,52,45,39,82,115,123,90,76,91,87,83,115,49,26,22,83,221,233,255,251,233,235,220,233,223,232,235,237,215,216,227,219,245,255,238,247,241,235,239,235,232,221,219,212,247,242,232,228,238,222,237,234,230,234,240,243,235,253,244,240,235,243,232,245,231,247,230,215,219,222,239,243,229,216,221,237,232,212,215,90,19,11,18,0,10,29,7,10,17,44,20,1,9,5,7,7,15,15,6,12,26,12,5,4,186,174,185,195,184,197,176,155,188,197,173,178,203,198,198,181,184,178,192,171,201,157,204,192,179,195,196,185,177,188,182,174,203,196,203,181,171,179,188,153,180,183,219,191,162,167,173,190,200,191,173,193,188,179,215,190,179,196,200,188,194,215,193,174,211,198,221,177,199,181,196,195,176,198,204,196,181,188,196,183,227,205,190,184,209,216,208,216,185,217,190,176,198,193,191,199,185,193,188,190,215,185,201,200,198,233,205,183,183,197,195,188,192,177,211,188,199,207,197,204,208,181,181,205,173,179,173,195,182,197,181,194,178,173,188,182,188,191,181,172,201,202,189,183,228,167,200,194,178,186,204,207,192,166,184,196,199,206,212,179,197,186,209,199,210,182,185,186,177,200,202,189,181,199,186,186,174,204,193,208,204,202,195,193,196,204,186,189,195,184,185,188,183,189,200,195,170,204,159,187,197,183,190,197,185,206,214,192,184,179,218,186,196,193,179,198,207,185,185,182,185,210,190,172,199,206,193,235,197,220,200,181,195,208,209,177,222,216,189,229,210,201,188,202,214,210,188,191,206,231,234,195,189,194,196,218,220,204,240,212,181,196,202,190,220,185,195,214,223,235,187,217,204,207,201,197,204,196,202,200,219,215,220,222,213,223,232,207,208,223,221,210,241,222,230,208,235,219,245,227,207,200,179,204,231,222,216,230,216,179,222,195,225,218,200,218,132,69,5,22,0,30,44,60,15,36,49,23,48,51,23,49,47,57,68,49,32,32,31,57,16,12,24,46,7,1,19,18,12,23,28,15,13,45,22,53,28,8,27,15,54,52,42,62,77,69,92,76,52,55,47,17,54,29,82,25,27,133,225,186,209,231,144,239,235,194,221,199,222,198,251,222,204,223,214,255,147,65,107,69,85,22,74,89,40,39,170,231,232,208,125,172,253,210,185,206,253,202,149,214,215,163,70,14,23,9,30,68,18,44,17,74,58,51,36,120,237,244,255,166,163,211,244,246,183,243,215,117,204,229,240,210,237,255,189,48,94,126,75,78,38,31,52,81,48,47,64,143,121,46,26,41,27,44,70,6,43,10,19,11,59,106,150,125,132,195,237,169,118,100,79,76,97,75,73,42,28,13,20,32,35,51,20,17,60,55,45,43,49,37,72,85,77,59,72,22,36,34,21,43,28,34,34,13,15,27,0,24,23,10,20,15,8,19,21,33,36,35,50,108,114,160,101,94,67,65,92,92,90,75,77,69,34,29,34,13,30,17,57,22,29,29,35,34,39,13,10,13,23,32,62,30,15,43,20,40,35,21,31,9,33,31,35,16,20,29,1,51,13,45,55,33,29,48,40,37,19,17,43,50,7,40,19,24,3,31,29,22,46,25,65,85,94,58,44,39,85,57,20,90,97,96,115,84,123,89,124,67,63,65,32,34,153,243,241,244,227,221,208,234,235,221,233,228,235,220,235,248,210,223,210,242,241,244,233,227,219,245,219,245,213,236,234,225,216,220,242,214,232,222,236,233,214,231,240,222,241,213,228,227,236,218,238,231,236,225,237,210,234,233,235,239,229,210,247,125,2,5,10,12,27,1,3,4,24,4,22,7,5,10,14,25,20,39,19,4,16,9,20,1,164,198,187,185,173,180,188,180,173,194,167,193,176,173,195,187,200,191,184,181,195,190,189,183,192,168,200,193,202,186,189,187,184,211,202,188,208,192,189,189,181,173,178,180,168,183,188,197,197,196,205,174,183,195,191,192,203,197,198,184,183,199,191,181,193,186,177,188,173,185,194,173,194,184,195,202,214,179,196,187,205,209,176,183,211,201,187,185,194,192,194,182,196,195,192,189,181,188,205,209,182,192,209,209,178,189,200,213,185,210,184,195,205,207,207,180,201,190,203,207,206,201,184,177,219,180,192,198,205,190,197,197,211,202,184,209,205,193,193,187,205,213,177,199,203,212,193,178,187,181,185,187,218,199,174,189,198,205,171,202,196,211,197,185,182,177,219,194,195,198,200,200,192,210,184,219,196,193,164,203,192,211,195,203,181,205,188,212,184,200,210,189,178,174,185,224,199,169,201,178,182,201,185,214,219,185,206,205,202,205,186,214,182,196,175,195,182,209,197,194,199,204,182,211,183,192,203,204,214,203,174,188,219,195,204,192,220,197,192,230,213,196,205,210,191,184,207,189,197,201,195,203,179,228,213,218,191,195,215,218,229,239,200,166,229,177,200,201,223,196,208,243,191,215,204,204,197,200,195,199,222,211,216,211,219,221,218,201,237,236,216,200,217,196,208,196,241,232,221,202,232,233,232,230,218,233,200,203,206,214,200,189,210,209,212,235,192,136,119,78,3,2,12,10,18,54,42,30,30,17,49,71,59,51,46,32,53,45,39,55,48,39,31,29,24,44,8,22,14,45,34,46,29,14,52,34,38,36,35,45,92,95,98,85,95,74,81,84,57,45,36,61,27,6,45,61,83,201,217,213,238,199,129,225,232,190,234,235,219,216,233,208,178,179,205,254,126,51,87,37,71,63,63,117,43,62,171,211,249,171,59,164,245,211,195,179,245,202,182,209,120,73,15,7,7,9,37,40,39,8,45,71,53,20,82,236,237,238,195,154,198,244,242,229,192,197,140,197,251,233,252,233,245,243,118,6,72,88,50,44,75,72,84,44,38,34,128,152,129,30,33,35,33,40,39,49,33,6,22,12,39,137,112,159,118,107,116,51,47,13,19,48,49,65,14,32,19,28,20,21,26,14,54,75,77,56,50,35,29,48,38,50,23,9,71,65,61,50,46,35,42,43,31,34,22,19,18,20,18,24,27,9,22,22,11,17,32,32,33,28,67,76,84,63,82,79,58,69,87,78,69,61,36,18,17,12,37,25,33,40,31,23,24,22,25,23,35,8,13,15,40,41,4,21,45,26,22,44,26,24,28,8,28,31,21,16,11,23,49,29,42,55,71,65,36,30,30,28,25,18,11,18,22,28,12,14,9,29,25,20,60,78,79,80,57,70,67,41,44,60,106,108,114,86,137,109,111,75,111,115,73,24,108,213,245,244,242,220,231,223,220,234,217,240,234,249,228,240,227,225,210,236,229,235,245,231,251,225,241,211,235,234,231,234,255,225,231,235,211,235,251,235,246,215,215,231,214,235,240,237,221,246,236,226,208,240,249,222,235,230,237,241,240,240,215,140,3,0,18,17,27,2,10,13,26,5,2,21,0,9,5,13,12,9,12,14,23,32,34,18,185,172,179,185,178,207,186,171,184,204,173,218,183,195,209,185,185,182,200,187,169,199,189,207,200,182,197,193,193,176,174,187,180,178,174,201,183,196,211,192,175,185,187,189,181,192,182,201,194,186,175,181,208,195,175,198,201,179,202,188,183,186,191,205,186,219,205,197,227,201,192,207,182,217,190,196,198,193,191,171,192,191,189,213,194,174,194,177,201,179,190,189,185,217,198,189,163,197,187,203,199,186,205,186,203,178,203,189,225,193,200,200,179,218,210,183,193,199,174,194,197,230,203,179,187,207,212,211,186,202,190,207,195,183,209,196,202,193,189,208,173,197,157,207,215,175,192,194,202,190,212,199,195,192,201,208,184,197,187,176,185,208,170,212,178,160,197,179,184,197,208,194,181,186,183,184,193,179,188,195,204,198,190,220,186,170,162,192,179,184,207,189,193,217,218,205,188,206,172,185,182,215,211,211,219,207,188,197,199,178,192,195,198,197,192,207,188,200,203,166,180,191,203,207,195,184,212,190,196,204,193,212,224,211,204,199,171,193,208,216,210,202,216,212,216,195,194,202,216,215,218,206,207,225,224,228,222,195,240,203,208,215,210,211,226,218,214,210,209,206,216,219,181,217,215,222,216,223,193,220,199,210,208,198,216,203,195,228,229,201,206,221,216,194,201,236,218,215,233,234,206,211,210,194,215,198,222,222,218,192,218,206,224,204,207,190,217,130,195,187,74,15,20,28,12,12,56,35,31,19,13,66,45,79,60,45,37,37,34,92,27,39,35,13,24,23,31,28,28,22,30,24,39,32,29,22,20,19,58,91,48,71,71,79,50,13,87,89,56,22,27,75,84,108,182,173,121,219,203,162,224,180,144,230,201,226,247,208,221,201,214,197,186,194,207,229,175,47,121,58,63,91,101,124,61,55,173,247,238,144,37,169,227,231,227,169,236,185,201,220,53,21,21,12,23,26,33,13,16,53,27,56,35,77,220,240,225,185,142,218,240,231,234,230,170,135,190,227,235,249,237,235,237,144,57,43,96,55,50,100,82,92,107,101,49,107,143,129,119,25,15,7,26,42,18,36,41,6,31,38,106,135,54,74,28,19,34,38,14,41,17,7,25,17,8,31,31,15,35,78,55,78,79,107,124,75,69,27,16,45,32,33,31,37,39,36,54,42,27,30,38,38,50,33,13,11,8,21,17,30,13,19,31,23,41,18,32,23,24,44,58,22,33,83,56,75,63,61,35,45,78,53,35,15,1,2,20,10,53,48,31,21,33,19,56,28,37,32,21,30,33,21,56,52,26,21,26,40,13,24,22,26,25,31,33,42,35,44,97,73,49,61,72,62,65,77,51,22,52,8,33,55,36,63,41,31,42,35,39,20,45,83,74,54,60,65,79,71,41,52,102,140,135,105,109,110,100,76,74,89,99,71,71,187,235,229,252,242,238,245,238,223,224,255,244,242,235,224,196,239,223,249,231,249,236,245,255,235,225,253,239,244,239,250,225,243,211,211,243,249,237,241,212,223,248,219,222,238,237,247,229,230,223,246,232,232,226,209,241,241,237,240,227,239,215,122,15,9,4,4,50,3,14,10,2,45,14,1,8,0,13,10,8,24,13,2,8,2,15,0,156,185,198,187,207,190,176,183,190,201,175,202,178,168,198,196,204,194,164,177,155,198,217,184,215,188,190,200,197,182,207,187,180,192,197,168,204,183,197,181,178,177,198,188,190,160,186,214,187,192,202,221,180,178,202,180,209,193,215,188,180,191,194,195,189,177,178,176,188,204,168,175,159,224,187,221,200,179,206,190,194,202,210,204,219,188,164,179,185,205,191,169,218,199,212,180,195,185,195,207,200,220,218,187,209,187,181,190,189,215,201,214,208,203,189,189,211,203,201,196,191,222,176,200,212,194,200,180,184,180,217,184,199,187,195,190,208,199,179,178,199,198,167,184,185,196,168,204,185,192,204,194,206,193,191,185,184,207,206,189,190,216,215,158,197,194,211,193,192,205,206,196,197,210,195,198,188,192,229,192,204,199,202,223,213,193,206,185,220,199,182,178,179,216,201,196,179,222,211,200,205,189,202,210,194,203,225,199,176,191,179,197,166,176,200,184,210,172,218,202,198,200,203,212,174,186,203,203,201,192,218,241,199,191,220,224,224,205,179,222,213,210,209,192,216,202,229,229,204,206,205,208,214,203,221,208,197,191,202,203,227,217,198,207,206,194,188,178,198,202,190,198,208,216,227,192,231,204,206,223,210,196,203,199,231,234,210,223,208,208,208,208,197,215,222,213,211,224,198,223,216,214,222,206,220,210,238,226,217,218,204,209,207,194,199,193,181,137,212,254,127,76,33,11,18,11,50,37,47,40,5,36,36,31,50,19,41,12,7,11,49,41,24,32,39,32,28,7,15,46,6,43,18,36,48,18,41,19,48,96,46,42,62,52,40,51,62,51,35,59,133,244,222,210,232,212,111,135,148,172,235,150,156,245,214,214,229,216,226,233,229,204,194,195,231,240,185,77,157,92,75,56,49,98,72,66,209,239,224,64,40,198,207,243,223,213,240,196,160,116,13,38,0,7,37,48,44,55,45,71,77,47,65,211,251,246,179,142,187,237,247,239,236,170,98,152,244,236,234,219,241,198,134,97,42,82,97,38,69,96,67,39,42,97,115,112,120,159,92,19,24,10,7,33,53,50,51,16,35,96,115,76,33,40,16,38,13,39,0,8,1,7,12,5,32,27,0,34,79,76,119,93,83,92,114,89,85,35,56,62,65,25,40,13,44,31,37,16,7,7,60,14,21,36,45,31,44,31,45,38,12,14,40,14,13,22,4,13,23,27,44,44,31,29,61,67,64,67,59,62,26,56,7,14,27,13,24,33,40,33,30,23,33,39,42,32,23,29,32,31,4,25,46,26,36,31,25,32,44,15,54,33,43,60,44,35,39,57,69,59,45,25,30,51,51,68,54,62,64,34,44,17,35,32,26,22,31,8,32,36,48,86,76,90,53,53,68,74,13,71,93,114,131,129,126,81,90,111,97,79,50,60,56,159,228,238,247,233,233,239,246,244,245,234,239,247,231,226,239,221,221,246,241,234,249,233,220,221,249,223,222,241,233,241,237,233,231,212,241,238,234,230,226,214,248,237,246,224,247,222,224,222,232,241,249,249,247,221,222,237,235,237,236,249,221,105,0,2,13,3,25,40,2,0,8,20,22,11,5,12,5,10,5,15,8,2,13,26,5,17,187,188,149,187,164,181,181,192,195,178,168,192,183,176,173,187,180,191,185,204,166,182,201,181,185,178,188,203,197,181,206,158,218,194,172,221,171,176,214,159,179,175,201,217,203,179,188,194,175,157,196,194,217,186,181,204,181,188,172,200,210,203,204,205,196,213,195,185,199,211,198,198,192,205,219,174,186,205,169,219,224,211,205,201,177,201,187,207,191,194,194,206,198,164,193,207,199,192,194,195,171,204,176,189,154,202,191,185,202,190,189,195,186,211,174,200,200,194,178,204,172,189,184,206,197,208,182,221,177,210,198,193,207,183,190,188,192,180,189,172,191,187,191,199,189,198,212,179,178,188,191,201,189,174,192,176,215,196,193,189,188,178,209,180,189,228,188,176,186,183,211,235,174,184,207,196,200,196,207,182,219,199,221,198,201,215,184,186,199,206,204,166,219,195,205,217,232,184,201,202,206,187,201,196,197,195,178,203,216,208,209,198,183,193,174,203,184,199,198,211,232,190,204,193,206,203,209,185,203,197,203,205,211,228,206,224,223,195,228,179,205,227,199,216,205,202,199,213,210,205,200,213,224,211,226,205,209,213,227,212,213,202,186,225,232,209,231,200,209,200,211,195,211,207,217,232,223,194,203,215,220,223,195,211,224,213,235,227,235,194,212,228,224,221,190,215,213,239,209,206,233,197,216,184,223,221,217,219,210,203,229,226,214,207,201,204,220,174,177,232,132,114,11,31,95,22,49,28,49,46,65,58,43,51,31,10,29,46,34,40,41,39,23,47,22,17,36,25,37,25,25,27,43,27,25,20,32,14,46,81,49,60,33,36,53,15,23,38,133,150,221,244,232,209,235,125,76,116,163,233,220,135,210,244,205,205,224,229,218,203,230,199,164,185,202,249,157,76,158,86,73,13,21,87,55,44,208,250,155,40,95,241,128,134,202,169,235,135,115,26,5,19,9,52,47,40,16,30,47,62,56,55,204,249,247,216,124,193,217,246,216,218,182,106,116,213,220,242,241,239,187,149,218,126,52,69,100,70,76,107,73,90,32,72,95,101,150,130,77,1,53,28,30,43,41,32,49,36,7,58,60,30,16,30,32,15,40,26,5,12,17,34,19,55,1,23,17,52,71,57,90,83,50,107,117,117,105,83,68,37,49,29,50,42,21,47,25,27,19,23,26,41,32,49,24,29,39,19,15,29,24,21,25,15,18,21,25,44,35,49,23,52,57,5,15,31,65,77,70,60,21,51,11,18,20,4,42,73,63,73,40,16,24,13,36,25,22,15,16,39,28,21,41,25,14,37,42,53,44,8,30,118,140,114,116,110,63,70,62,57,91,52,50,9,60,86,86,88,92,56,89,74,48,43,35,59,37,41,20,36,42,67,46,70,64,81,57,66,56,64,89,111,111,96,112,90,66,64,79,95,36,41,36,120,244,221,246,233,223,209,247,248,218,235,240,229,241,249,240,241,247,229,221,229,244,224,223,228,229,229,234,237,252,242,249,245,230,243,239,252,230,213,224,233,226,245,242,240,233,223,225,245,242,238,233,249,217,234,218,244,223,230,231,231,240,108,3,18,0,18,15,15,45,22,8,29,36,7,15,4,13,1,18,1,2,7,31,9,24,26,168,186,207,195,191,200,155,189,194,185,199,165,192,168,195,174,189,190,187,172,197,184,189,191,194,166,206,173,180,180,179,193,201,190,172,194,179,196,202,167,204,175,196,211,186,195,185,184,197,213,182,189,207,186,197,215,195,192,182,184,205,179,213,218,199,181,176,186,216,223,186,195,196,193,208,192,181,185,196,220,169,179,186,191,214,206,196,202,182,189,206,202,189,196,186,201,188,181,207,194,208,196,208,205,205,180,213,199,187,173,215,181,198,205,193,190,173,201,207,209,226,191,193,180,204,190,205,189,189,188,181,190,183,200,210,185,213,197,178,189,201,170,189,195,188,178,189,189,185,178,196,196,174,194,197,195,195,189,179,204,217,196,180,199,204,187,184,192,171,202,198,219,193,183,212,177,211,203,182,191,213,183,187,194,209,218,188,207,206,167,198,208,204,210,196,175,215,209,227,178,213,211,188,200,191,200,178,216,167,208,184,196,179,165,193,198,206,190,201,207,214,196,218,195,219,190,211,202,220,195,222,219,209,221,208,210,208,208,226,223,204,203,211,230,205,201,204,197,229,221,195,215,206,189,206,221,227,229,212,179,213,233,198,213,194,203,191,175,209,203,199,198,220,230,219,222,229,211,213,218,221,221,219,198,196,218,208,233,224,225,213,228,220,224,218,214,212,219,230,204,215,185,185,197,228,242,198,218,199,223,232,213,211,213,166,174,236,174,220,198,107,91,59,171,220,183,212,251,250,255,222,229,202,179,171,159,228,137,63,33,12,48,33,39,17,20,26,35,29,24,26,37,12,32,59,76,92,48,18,42,38,50,35,42,49,73,162,175,239,217,242,239,155,135,188,113,121,224,184,246,231,135,224,242,206,206,219,214,213,171,233,228,181,181,204,246,154,61,159,73,58,48,16,81,50,66,220,238,115,40,86,118,58,160,215,207,206,91,50,13,11,22,38,35,33,24,36,74,54,49,26,142,255,239,253,176,202,222,234,242,201,185,142,154,187,247,249,220,189,175,124,189,249,132,36,32,70,80,69,161,132,143,31,63,77,87,143,130,56,25,26,15,35,61,56,24,26,14,17,32,38,18,17,1,17,32,43,18,16,41,20,13,36,34,7,35,33,93,94,96,122,91,81,83,132,134,43,57,72,51,81,24,35,29,31,33,40,25,38,32,30,41,45,21,57,39,11,50,3,12,18,16,39,41,34,10,25,18,48,29,28,47,28,53,40,40,46,59,69,53,53,26,28,35,5,18,40,24,32,48,31,42,2,33,38,31,19,12,33,42,40,31,2,27,8,28,26,23,19,29,71,154,177,179,177,165,147,115,56,56,89,44,7,11,61,52,62,38,42,71,65,86,123,117,88,66,17,13,42,27,64,92,83,49,77,60,47,74,47,48,88,111,109,103,98,107,59,26,45,86,74,79,14,61,219,220,240,247,227,222,239,249,249,209,243,239,232,250,246,232,230,211,228,233,228,232,230,228,227,240,228,230,204,217,228,239,224,235,213,237,189,238,195,238,251,236,240,223,237,236,240,205,236,228,237,248,246,214,224,245,240,236,242,224,229,112,7,11,11,6,14,2,13,11,32,11,29,20,3,0,35,13,3,2,14,0,33,14,7,0,179,182,167,174,191,191,201,186,169,181,184,193,223,169,201,195,192,203,179,177,199,173,177,187,207,177,195,182,197,165,199,191,194,218,212,191,197,195,200,195,169,169,190,201,172,194,211,167,207,192,209,197,197,186,192,204,204,196,172,225,218,212,185,184,174,202,205,214,199,217,185,180,205,185,179,181,192,211,181,228,218,205,192,166,182,208,222,187,200,196,194,212,191,179,192,200,204,171,178,184,216,196,216,188,194,185,184,189,191,165,199,191,212,194,204,198,191,197,195,195,198,199,192,201,182,195,183,191,203,186,183,182,199,203,184,203,198,218,216,178,216,200,188,184,188,192,212,195,198,201,204,210,184,217,223,213,195,198,174,219,195,183,197,214,185,196,181,186,209,212,191,197,201,221,219,192,183,184,218,211,203,195,206,214,201,206,198,212,195,181,214,199,219,182,212,209,195,196,219,180,206,205,191,191,199,205,233,195,203,187,211,205,207,217,195,206,193,192,209,223,206,219,209,220,203,219,210,189,224,222,202,205,224,190,222,208,192,202,218,199,225,230,193,204,184,212,220,205,218,240,218,227,211,212,204,228,212,222,220,215,198,206,214,185,225,209,208,218,199,207,211,207,188,214,233,193,209,209,228,220,218,221,212,195,220,202,210,208,195,234,204,220,194,209,236,201,212,234,209,214,212,197,212,188,236,216,212,224,224,228,198,217,221,188,198,204,215,189,195,182,117,114,41,161,242,250,246,241,242,242,251,244,243,214,249,247,244,178,51,42,128,133,80,24,26,17,20,55,34,34,21,40,25,98,197,253,250,87,131,170,146,159,177,171,176,241,253,248,237,228,231,181,89,135,201,150,189,177,208,255,186,149,221,204,236,201,230,222,219,207,229,238,144,208,208,252,142,66,101,72,47,37,46,111,42,52,201,201,104,60,34,3,57,214,245,211,192,37,21,28,9,31,51,54,25,24,70,43,64,46,110,238,240,244,177,206,251,248,253,206,176,97,144,197,225,230,245,225,151,143,184,210,240,153,6,58,87,88,37,78,120,104,48,72,72,115,137,125,62,23,42,33,34,22,33,36,36,0,26,8,26,23,20,20,12,31,37,22,34,38,41,31,27,46,46,25,39,96,113,119,116,89,112,128,85,65,64,89,66,52,55,64,36,26,38,30,45,31,49,32,28,52,50,35,17,29,29,20,35,24,3,16,30,11,39,4,3,23,18,50,39,20,37,47,46,64,35,70,54,45,57,32,36,34,29,59,39,10,13,37,10,32,28,8,20,15,23,5,36,19,67,12,30,12,31,12,11,37,28,20,105,151,203,156,145,172,132,156,102,69,89,49,33,25,33,13,37,33,38,40,53,60,86,78,123,72,31,18,65,21,61,70,59,70,85,85,60,74,41,37,88,98,125,99,126,92,49,81,112,129,124,114,63,26,204,229,242,226,247,246,244,238,230,232,222,234,215,225,240,222,244,234,224,241,226,236,245,213,232,228,244,248,246,219,237,231,224,237,231,242,238,242,226,227,225,226,232,238,234,231,243,243,244,249,232,228,238,248,214,233,228,244,247,242,220,114,3,0,5,27,31,8,26,9,10,15,12,31,37,0,16,6,13,13,10,18,9,10,15,40,182,204,187,208,188,193,199,188,184,155,186,188,184,188,201,185,224,161,222,175,183,162,193,187,201,187,197,169,204,174,184,183,172,197,197,183,168,186,207,172,180,207,192,205,194,206,193,182,220,192,211,194,186,206,180,182,211,198,210,202,199,207,209,201,186,205,182,208,196,200,184,187,211,190,197,206,210,205,173,200,214,187,192,201,201,173,182,196,183,180,175,186,187,191,186,209,177,191,196,168,189,189,202,188,204,182,183,192,207,189,192,175,209,192,207,208,199,212,167,172,172,205,192,196,190,184,194,183,179,176,198,196,180,203,189,207,221,171,202,177,209,204,204,205,190,203,203,185,197,191,188,196,190,197,180,199,189,185,176,217,219,194,213,226,199,206,192,217,197,189,197,187,201,185,212,212,203,189,193,203,190,221,192,211,200,206,207,224,210,208,210,221,213,236,199,190,186,206,207,190,215,196,211,207,215,177,234,224,209,198,204,213,191,209,202,200,194,205,211,205,235,201,191,181,204,195,215,235,211,210,209,222,228,203,210,209,215,195,215,206,228,223,216,199,188,221,224,213,237,203,209,204,216,204,187,205,194,236,214,224,215,199,223,202,229,210,230,198,234,214,208,208,180,224,218,206,227,212,206,207,180,216,233,196,188,227,209,215,228,211,216,207,210,215,222,213,218,207,223,203,197,216,220,217,208,232,212,196,196,216,213,193,218,182,191,182,255,221,207,201,107,105,34,180,248,230,244,243,248,240,228,226,220,216,208,234,235,98,56,50,159,155,85,40,26,41,31,41,23,60,26,39,53,29,139,241,255,148,85,185,196,251,237,239,221,255,248,237,200,147,225,208,150,193,185,118,126,134,187,253,146,168,235,242,196,194,210,219,213,202,231,227,154,200,211,255,149,54,117,70,68,44,27,91,76,44,209,204,74,65,8,5,130,244,246,232,72,14,12,8,9,48,44,45,28,39,40,76,34,80,225,249,249,202,224,231,217,250,254,228,126,102,208,245,241,223,242,169,96,178,232,253,253,159,41,47,78,87,48,23,36,57,32,110,108,93,135,79,19,49,32,36,47,68,10,43,54,57,18,29,18,17,49,32,38,37,36,42,45,44,20,29,27,21,29,33,29,68,100,51,86,103,97,82,71,35,65,86,56,48,66,36,34,34,50,41,26,41,42,42,32,42,64,34,37,13,40,31,32,27,7,23,41,34,39,35,16,30,31,27,32,60,25,55,58,69,52,50,54,68,41,65,27,31,39,65,5,17,2,20,8,21,21,45,3,24,28,30,30,35,26,32,32,23,35,18,36,51,74,77,128,153,142,129,125,153,166,145,101,74,89,51,30,44,19,16,20,15,12,33,19,27,64,84,93,61,11,45,34,17,60,44,50,71,89,93,36,60,38,48,71,93,120,101,106,95,93,137,99,71,40,88,96,55,176,231,227,226,239,244,242,234,208,236,218,218,239,233,240,234,234,242,231,230,243,242,221,242,233,233,230,242,221,224,196,221,235,235,243,226,239,241,230,233,230,248,226,246,247,214,232,241,215,207,244,250,231,229,221,250,244,222,249,227,223,123,17,2,5,7,23,19,36,6,16,24,11,16,5,0,4,1,22,28,20,13,8,3,17,6,170,198,184,212,189,164,191,199,200,179,169,188,197,197,189,183,206,216,197,213,193,206,192,186,185,168,195,200,202,203,196,186,199,193,207,162,185,183,205,200,200,183,177,192,187,182,167,185,192,176,192,210,190,191,179,192,185,187,191,215,202,196,171,182,200,181,207,233,191,212,193,189,182,188,213,191,205,180,201,199,180,204,191,195,191,209,183,203,200,195,203,188,190,184,182,209,223,198,184,182,221,206,167,204,194,185,207,194,199,193,199,197,220,169,205,181,217,199,210,206,204,201,183,192,205,166,183,206,191,189,195,211,183,200,195,166,216,181,202,194,210,180,186,206,192,194,186,188,201,211,193,203,194,190,192,218,210,206,179,201,210,199,193,186,198,206,209,187,202,205,216,235,190,204,206,202,206,224,225,230,201,205,215,203,215,209,234,193,191,202,215,220,196,179,218,210,209,216,224,197,206,221,217,221,172,204,216,200,197,209,196,207,215,192,220,184,231,224,196,195,202,220,210,209,225,207,219,208,195,222,227,215,218,217,220,234,215,230,214,209,229,229,226,205,224,223,205,223,196,207,211,213,223,213,224,223,216,211,213,199,199,202,210,222,217,196,212,214,214,186,225,230,182,235,235,221,200,225,231,179,233,218,196,208,223,203,208,209,221,193,217,220,190,215,212,198,209,214,207,206,205,184,224,200,216,209,223,199,222,234,227,211,222,196,197,198,241,232,205,124,65,85,35,121,176,203,220,233,217,211,215,183,151,173,131,184,188,86,48,101,179,153,107,57,14,10,47,36,43,54,10,30,57,31,99,214,211,117,27,60,178,248,232,204,166,212,165,184,170,138,227,218,201,225,187,111,132,166,237,247,161,189,245,206,228,209,224,249,232,189,218,215,191,201,188,246,165,17,86,88,88,67,73,73,85,61,198,188,81,35,9,36,207,231,255,143,45,13,7,7,43,52,34,4,48,60,87,6,79,207,242,234,203,215,237,254,244,241,240,143,153,238,243,240,211,230,225,113,169,226,239,249,240,213,93,39,18,65,89,72,52,67,130,87,107,126,117,85,9,18,33,28,68,41,31,35,24,21,21,18,19,39,23,45,33,67,29,16,38,47,60,19,51,10,18,30,43,73,57,81,92,114,77,69,56,46,70,36,36,59,39,75,23,3,38,23,24,54,40,24,44,33,9,47,33,21,26,29,34,35,28,31,47,34,28,32,34,42,32,12,40,60,37,53,59,33,68,62,70,45,59,48,23,25,30,30,25,20,18,30,33,40,17,14,47,59,27,26,13,34,13,23,17,35,42,39,62,121,142,156,164,153,139,147,135,112,151,147,78,61,76,30,28,19,27,41,35,16,6,25,31,35,53,58,80,100,43,28,36,25,44,81,87,77,99,95,33,66,59,29,70,94,109,104,106,124,121,126,77,74,46,100,127,15,133,233,229,243,242,251,232,227,225,217,234,224,239,246,221,241,230,240,233,235,236,240,226,237,240,247,242,218,213,225,240,253,237,238,237,236,216,227,209,243,231,255,223,232,241,237,241,227,250,242,236,232,252,245,255,249,239,234,222,225,249,117,6,0,13,12,8,23,12,19,2,9,9,20,17,0,3,18,26,19,37,19,14,17,26,29,178,196,198,188,192,180,209,165,185,176,181,196,196,186,198,192,176,181,202,218,189,206,195,216,187,163,176,177,189,204,181,190,173,183,186,196,197,201,188,175,218,167,175,183,221,198,197,209,204,179,196,215,184,214,201,197,207,177,209,198,185,203,205,203,208,206,180,169,206,185,213,207,189,195,224,183,191,194,175,198,198,176,210,194,218,169,198,202,189,195,189,201,202,191,165,200,201,189,198,200,175,195,186,193,182,193,180,218,187,200,200,190,199,189,211,193,187,201,201,183,192,189,175,200,192,194,194,190,219,199,179,189,211,192,214,167,198,205,205,200,197,179,178,208,203,187,184,213,193,191,221,204,216,185,198,220,176,189,192,192,209,208,197,191,240,239,209,193,180,190,205,198,216,215,191,184,195,208,206,196,206,212,172,191,201,199,232,214,204,206,223,215,207,216,184,194,221,224,213,196,217,194,236,212,210,194,190,194,203,201,185,211,223,230,221,229,211,217,202,229,200,195,182,217,220,214,190,194,183,206,216,209,241,181,239,217,231,213,204,204,209,224,192,164,199,219,204,221,223,215,196,222,223,201,216,200,205,218,209,221,212,231,195,221,208,220,201,206,198,198,195,224,229,194,209,187,219,234,219,234,220,192,238,213,213,213,221,197,229,215,218,233,216,237,223,233,217,204,221,221,216,207,215,213,218,226,193,201,218,229,219,214,207,181,194,214,209,147,113,50,20,39,8,44,46,130,203,172,187,184,173,151,115,176,188,204,205,127,113,120,162,174,119,61,13,24,56,50,49,43,33,38,81,146,203,250,230,163,52,7,84,209,236,146,116,195,158,221,204,188,195,183,182,148,171,155,190,228,249,215,128,228,227,246,212,211,226,225,217,197,205,221,145,186,202,255,154,77,65,63,58,50,72,77,81,71,141,91,23,53,5,66,243,250,211,79,8,2,1,39,67,52,6,34,22,61,39,68,190,234,252,183,197,240,239,249,253,251,194,146,220,226,230,215,193,201,155,180,240,227,249,253,240,208,97,27,1,31,77,116,58,111,60,60,134,106,119,74,21,46,27,25,27,31,23,42,45,41,16,26,26,16,20,32,67,32,40,38,35,60,35,36,43,24,41,20,23,82,48,96,127,109,74,92,119,75,59,56,33,33,41,21,15,41,25,12,27,62,37,20,27,31,30,34,50,21,27,50,18,38,44,22,34,23,58,23,25,38,44,49,54,44,63,49,66,32,69,32,42,44,46,27,19,19,27,14,19,2,11,27,31,18,37,33,5,40,28,19,37,18,27,18,18,32,31,99,141,154,165,162,147,149,113,118,110,122,157,137,55,37,51,43,15,41,32,15,14,41,16,40,3,39,27,53,77,91,59,21,44,22,30,89,74,72,95,67,81,52,38,13,97,108,111,76,85,84,108,107,64,96,143,169,142,31,152,242,229,237,242,245,215,211,239,231,250,223,241,222,239,239,242,236,230,248,235,216,236,231,239,239,238,243,234,245,228,237,235,248,231,209,235,251,223,237,219,239,240,251,233,237,249,237,244,245,240,251,208,253,245,220,244,215,237,234,233,130,22,9,7,7,18,7,21,9,14,17,12,11,7,5,9,3,13,16,3,11,25,13,17,6,190,169,171,184,203,185,196,203,173,188,189,179,189,196,181,169,189,182,166,181,190,191,184,184,168,190,204,174,183,173,174,179,195,205,186,214,178,182,193,186,210,192,187,204,180,197,175,181,193,202,188,207,180,186,214,192,195,209,200,207,187,172,198,176,207,196,183,188,210,188,193,193,196,165,173,189,194,201,195,198,191,201,201,198,212,193,183,187,190,213,198,200,175,187,199,206,198,181,199,164,191,188,193,193,194,181,178,183,191,181,191,188,205,198,189,182,178,191,181,203,201,204,222,200,183,205,207,198,190,184,211,196,196,189,178,212,180,186,191,189,168,189,198,182,179,193,195,179,199,193,193,212,222,205,210,171,200,175,185,204,203,216,208,178,202,205,203,191,195,219,197,209,194,207,213,209,191,213,204,219,210,219,195,233,221,227,219,206,200,181,186,213,217,210,213,211,205,225,184,218,210,217,208,200,203,208,203,194,206,206,199,189,215,228,201,212,196,207,220,212,220,206,217,216,218,185,216,225,221,226,211,211,218,213,220,219,235,221,197,211,216,212,205,201,211,209,206,233,202,213,209,229,215,224,246,224,214,241,221,214,213,211,212,219,211,211,200,210,209,226,228,221,199,238,205,210,219,234,227,203,197,202,210,215,222,218,208,216,216,209,230,206,211,216,242,222,236,209,221,214,229,230,213,204,232,198,231,216,226,206,220,224,205,201,182,204,188,70,18,8,19,33,45,50,27,82,105,160,192,176,213,166,149,207,242,248,199,88,152,107,194,194,154,85,15,32,38,59,35,37,41,30,100,182,230,244,239,207,149,30,57,128,146,111,139,181,191,222,186,193,180,134,156,165,180,168,208,196,253,195,149,240,237,235,199,209,235,212,220,203,233,229,169,207,202,255,146,26,106,89,27,73,55,81,106,42,78,39,88,48,26,129,254,217,90,20,32,7,12,31,19,0,24,49,89,30,55,144,249,239,193,163,228,249,247,250,252,193,155,213,251,231,207,239,159,94,157,241,253,224,250,247,224,136,160,112,51,58,48,71,54,66,62,51,153,143,111,83,19,27,5,46,33,55,26,24,47,20,13,10,51,15,15,47,23,34,13,31,52,47,23,29,36,39,35,17,66,99,58,71,88,74,49,41,62,35,53,58,36,78,53,8,31,10,25,39,26,11,37,28,42,52,52,42,30,18,21,27,28,29,26,36,53,27,31,29,27,67,85,65,105,78,44,80,35,52,27,34,46,51,20,17,25,25,39,17,32,18,35,48,12,17,37,37,11,9,24,30,29,13,20,16,49,10,80,139,125,134,108,126,122,123,91,106,89,124,144,103,83,39,31,21,34,38,34,40,39,51,30,45,12,27,38,31,66,55,37,9,13,16,39,71,72,77,76,42,62,57,35,34,68,98,99,102,77,91,122,98,29,78,115,90,90,11,141,219,243,250,250,246,237,249,231,239,247,240,247,241,248,229,239,238,248,242,233,247,216,246,243,253,249,240,225,236,251,248,235,213,215,239,240,236,240,234,239,215,235,237,253,249,245,245,248,233,238,234,242,254,228,240,240,247,246,242,241,113,19,7,10,31,22,8,7,24,16,14,2,15,16,8,3,5,10,36,17,12,6,29,5,20,205,163,162,173,148,189,184,192,206,182,201,207,212,194,189,185,210,187,184,194,191,197,214,184,212,188,181,189,209,171,190,201,184,187,205,206,208,215,190,166,200,202,195,186,175,194,197,198,196,193,207,194,188,187,191,191,175,181,213,200,200,191,160,205,194,211,178,211,195,173,209,186,203,195,192,209,224,210,180,207,186,194,198,190,195,174,186,206,208,205,200,185,171,182,199,201,168,185,195,204,206,191,192,196,188,182,188,179,197,208,201,181,184,196,174,217,208,191,189,162,203,174,196,207,201,173,182,190,179,188,186,185,204,200,186,213,215,185,172,206,209,181,194,157,189,226,199,203,211,204,209,196,205,199,189,212,187,185,218,186,201,191,211,212,192,197,213,181,179,188,232,233,192,201,220,217,191,227,212,199,225,207,218,207,243,212,219,203,203,198,208,209,237,198,199,225,217,208,202,234,195,223,218,189,222,216,205,228,202,239,203,228,235,209,210,247,197,219,220,216,200,221,203,218,190,207,211,221,205,202,200,210,238,214,195,188,217,220,222,216,215,223,198,223,197,192,226,197,212,204,244,198,227,220,214,196,215,189,241,196,238,247,215,182,194,214,202,213,242,231,190,213,213,220,213,215,205,210,218,229,218,223,230,230,213,200,226,213,206,228,214,206,218,207,188,212,242,193,211,219,227,206,194,212,198,192,209,222,191,232,207,217,211,162,213,211,150,100,19,24,27,43,77,82,40,59,164,212,209,231,225,191,142,206,218,229,125,108,122,100,181,176,132,69,5,31,29,45,50,40,52,30,63,140,200,234,164,205,164,19,56,145,241,168,192,181,182,160,157,116,139,195,137,190,163,184,172,193,253,146,169,236,230,224,218,200,222,206,217,176,234,183,156,149,213,252,119,33,116,85,53,75,60,62,92,53,66,130,218,79,13,187,231,140,35,24,12,15,52,22,0,10,43,48,56,50,147,214,243,180,130,205,241,254,229,228,185,139,191,253,238,215,200,222,72,83,210,228,252,246,254,220,103,68,133,158,85,32,43,45,25,71,26,83,161,134,124,41,10,27,24,22,43,30,17,50,21,32,32,44,15,16,49,33,49,28,20,15,10,25,55,36,11,40,16,61,46,85,67,65,57,60,37,18,46,38,24,12,75,60,14,26,20,23,4,15,32,19,23,9,28,15,18,50,27,25,37,18,30,32,35,42,26,28,8,39,14,159,159,156,91,134,87,42,32,5,27,49,42,33,42,25,29,12,13,38,25,38,70,28,4,30,27,25,31,50,59,27,8,33,30,17,28,3,108,135,128,107,112,112,91,90,102,115,107,108,118,96,26,31,35,23,25,29,29,41,18,51,54,27,45,14,37,29,45,32,34,24,59,40,39,70,72,44,68,47,80,18,63,10,34,115,117,91,104,65,82,102,35,60,56,31,44,11,139,213,236,252,221,254,230,244,238,235,222,238,247,254,255,240,239,240,248,238,235,237,244,242,229,255,224,235,247,253,231,251,238,246,242,234,243,243,250,240,243,245,246,252,253,245,241,238,248,222,253,239,241,241,242,237,206,247,250,240,227,100,7,18,0,1,2,5,33,6,20,17,9,14,2,0,20,22,20,7,9,7,30,10,19,5,190,191,206,208,186,177,165,201,181,202,188,164,206,198,189,172,189,179,199,189,191,196,185,186,204,185,189,185,196,188,188,208,167,203,191,194,196,186,184,197,176,171,194,198,199,209,194,192,193,211,221,170,193,201,191,190,224,210,194,187,186,192,202,192,189,187,205,191,185,191,185,174,195,202,196,191,174,193,174,211,212,198,190,200,202,190,189,212,205,168,202,210,194,165,172,197,181,198,194,195,188,192,184,200,191,182,202,177,197,184,211,183,188,203,179,200,204,226,191,198,177,193,185,192,174,187,177,207,207,181,192,179,198,178,200,187,203,204,169,175,210,200,176,185,202,205,220,200,204,210,211,190,153,200,180,188,189,198,229,207,208,222,174,199,171,205,202,221,203,183,219,195,183,208,206,207,200,228,200,187,200,211,210,208,216,194,218,201,220,200,224,194,206,205,209,211,203,212,212,190,234,211,218,216,202,216,237,195,199,216,210,205,229,206,219,215,226,222,225,213,232,236,246,225,220,226,214,200,220,227,224,220,207,221,203,197,213,231,206,223,195,187,197,211,216,209,202,230,232,232,214,211,192,196,204,222,202,214,207,196,231,200,216,210,201,198,238,210,207,202,219,209,230,230,194,222,207,170,183,211,219,202,212,239,188,185,232,227,213,204,226,231,214,220,226,241,231,229,209,214,202,202,183,202,205,206,216,212,227,216,208,223,206,192,220,222,209,120,96,95,95,134,99,79,59,101,222,189,193,211,242,177,154,226,224,182,110,109,174,108,172,163,42,52,39,31,43,31,65,39,60,13,66,131,199,225,150,211,193,78,51,211,237,222,230,197,162,136,147,124,164,186,131,124,131,138,214,224,250,147,202,238,225,239,217,201,227,212,229,193,231,229,181,170,203,242,149,69,80,98,75,59,38,68,97,94,132,178,221,152,84,209,119,47,11,2,26,24,39,31,45,39,67,67,21,120,233,223,220,135,196,249,253,246,206,177,157,211,217,253,242,192,172,111,87,198,232,229,246,253,233,66,8,64,106,95,93,23,40,51,46,57,38,86,172,139,111,14,32,34,33,38,36,22,26,27,39,30,10,26,22,19,28,43,42,43,40,39,47,37,53,39,21,68,23,54,43,57,71,40,54,74,39,46,28,33,31,37,26,53,11,37,12,26,29,41,43,44,32,49,41,40,38,38,16,27,33,18,38,7,37,48,61,35,51,42,66,140,130,103,86,91,76,44,40,10,37,15,28,22,18,40,30,45,55,50,61,62,16,30,30,18,27,3,17,18,26,42,24,38,29,28,7,34,74,149,126,113,100,98,115,118,124,147,114,78,120,84,52,29,19,42,30,58,40,14,25,45,48,28,19,29,7,15,52,18,19,21,15,14,40,52,78,50,75,70,86,47,58,36,52,150,108,120,106,105,74,85,114,84,43,40,22,108,248,253,242,251,248,251,233,223,223,252,238,242,233,252,243,242,245,242,243,227,253,234,244,237,254,243,248,241,224,237,254,244,250,249,255,251,227,247,214,240,255,240,226,242,236,255,241,232,247,251,243,255,252,236,232,248,248,248,243,236,249,123,11,1,2,18,13,15,0,10,3,8,8,1,11,0,5,8,16,18,30,18,25,4,28,28,150,188,209,172,159,180,187,186,204,171,209,176,180,212,195,228,198,187,177,185,174,164,197,183,185,200,192,199,178,183,202,198,196,185,216,193,191,195,218,170,179,175,209,196,203,167,169,179,199,186,198,215,185,197,192,211,205,200,218,210,212,221,186,165,190,179,165,190,201,199,175,198,198,201,191,218,183,176,193,218,193,187,188,205,193,187,197,190,210,202,204,176,198,199,182,208,176,206,202,177,206,164,208,185,197,204,191,196,192,221,215,186,191,177,174,192,193,193,183,187,204,191,176,193,197,164,203,182,195,192,174,191,207,202,204,211,208,185,214,183,198,203,216,205,206,212,225,202,210,208,183,228,182,193,178,195,187,219,199,222,220,211,203,192,198,194,182,200,211,176,185,204,201,205,211,198,205,237,210,206,212,227,196,226,205,213,212,223,181,216,214,206,192,207,203,202,234,203,216,184,216,209,227,236,197,211,228,240,212,209,201,218,227,224,224,208,220,204,225,219,221,231,221,199,217,206,218,214,234,234,206,219,211,222,179,210,213,210,221,212,223,196,199,213,217,210,217,219,203,224,199,227,222,211,234,201,220,201,199,201,211,211,210,195,199,205,229,222,209,218,223,198,221,232,215,199,227,207,198,210,221,237,240,213,221,236,206,221,218,225,232,222,217,205,238,208,230,190,235,230,237,210,192,187,172,219,229,218,235,232,206,241,198,239,178,223,236,178,193,179,141,132,111,120,118,115,213,211,227,223,243,184,162,226,201,130,103,110,158,125,187,211,40,16,8,19,62,36,79,27,36,41,119,162,197,217,176,221,190,103,90,196,219,167,152,153,167,211,217,199,207,168,64,117,109,149,156,247,183,149,216,249,222,247,200,225,220,217,229,191,228,220,191,190,209,254,157,59,135,88,53,45,36,45,52,104,131,175,253,191,182,139,44,32,5,7,18,28,43,20,58,45,59,24,103,203,241,214,149,194,189,236,252,226,197,163,210,252,251,220,232,166,89,139,161,237,243,239,242,221,95,26,57,89,108,120,141,91,30,45,35,30,73,142,124,127,103,19,44,38,17,6,34,58,38,39,10,29,39,26,45,37,31,36,39,27,19,24,59,50,15,30,30,14,36,44,38,40,28,11,50,48,40,43,54,32,9,4,51,41,11,24,6,22,25,46,53,40,15,33,13,28,46,27,65,19,18,43,30,38,29,27,27,45,17,38,68,111,127,74,98,65,78,55,51,24,35,3,5,6,33,11,31,36,45,36,24,52,51,28,45,15,13,6,30,18,39,47,30,35,43,13,22,26,76,138,113,117,96,93,100,97,87,72,102,106,99,107,43,24,19,42,41,34,62,15,55,39,19,52,45,36,51,20,14,36,53,21,43,6,34,91,89,84,69,50,71,38,56,55,56,129,102,90,90,98,90,49,90,92,113,104,152,235,236,248,232,241,255,241,235,234,243,225,224,254,254,241,241,245,241,247,234,223,234,225,255,248,247,244,236,235,241,255,242,252,251,238,255,247,253,238,247,251,249,253,249,240,245,245,217,230,232,247,249,234,242,250,247,241,247,239,243,247,238,105,10,0,2,11,26,15,26,18,20,12,13,12,12,27,13,12,17,7,5,14,13,14,1,13,188,189,191,175,184,184,192,172,182,188,199,225,210,178,214,186,185,173,195,212,192,187,178,155,205,190,193,212,182,194,215,211,178,179,195,164,181,197,191,190,209,184,183,209,183,176,197,170,190,188,199,191,196,213,187,194,174,209,198,197,204,196,203,185,209,193,207,190,219,214,185,187,206,179,185,205,185,211,204,194,205,186,199,194,198,209,199,198,193,203,217,188,193,186,189,183,188,204,216,175,184,200,191,172,206,182,183,215,215,186,199,182,185,200,198,224,195,192,203,202,192,185,216,186,202,190,210,218,203,190,219,195,219,186,191,220,186,218,185,205,188,199,217,213,223,197,224,165,198,205,209,209,198,201,203,232,196,203,202,195,201,190,163,217,188,206,225,194,212,196,208,213,229,216,194,225,213,200,204,217,204,232,214,201,194,218,232,197,198,218,203,207,218,191,198,214,221,224,212,222,206,230,213,217,209,190,209,198,232,221,208,217,186,206,199,207,241,207,192,223,206,223,203,220,205,217,199,190,202,219,222,239,239,205,217,202,228,200,216,202,220,205,220,223,213,228,212,212,238,213,210,197,221,230,225,216,195,217,208,202,216,206,221,210,210,214,205,213,217,202,230,230,228,217,213,223,224,214,235,204,208,241,249,252,243,223,229,213,213,207,231,215,231,208,225,213,221,210,208,227,243,206,209,216,195,223,216,236,238,232,225,210,171,141,168,157,145,190,225,185,165,116,105,187,186,127,204,188,238,229,244,153,159,219,179,103,96,117,173,117,198,209,15,13,16,97,82,100,49,15,30,89,216,200,220,204,210,235,210,148,64,89,95,117,192,203,194,245,225,208,219,127,108,146,147,163,176,238,145,134,237,213,246,208,220,205,234,215,212,199,222,205,159,188,196,252,141,79,105,84,83,90,57,67,55,29,91,186,247,254,195,91,28,32,34,8,37,32,7,45,39,61,47,63,177,251,232,152,166,229,252,229,250,198,150,212,244,250,233,244,224,105,76,209,190,238,243,242,218,65,25,38,87,103,123,162,211,144,74,87,89,79,130,129,135,140,39,32,17,54,53,61,64,23,26,31,79,11,15,22,30,36,30,46,37,51,51,29,46,52,24,31,18,36,31,31,29,28,35,28,20,41,24,58,41,61,38,25,7,25,22,20,32,19,43,28,42,23,39,40,75,39,32,17,27,26,25,12,15,40,38,24,45,37,24,28,28,94,143,100,58,99,52,28,22,31,21,32,20,15,39,14,28,38,54,46,26,8,36,41,19,3,35,19,11,30,34,53,10,57,25,22,28,29,66,96,116,119,69,74,62,78,38,18,60,104,108,95,61,47,14,29,29,7,34,16,11,27,38,39,42,16,53,31,35,41,59,15,26,25,22,95,86,64,52,53,74,41,57,68,52,118,83,116,108,133,80,87,77,98,95,142,196,219,226,248,255,224,250,234,239,245,249,236,248,244,250,246,248,252,232,252,249,242,248,223,243,255,253,254,250,254,238,243,238,236,240,255,251,253,237,234,251,241,255,238,239,255,250,255,240,249,255,242,237,255,255,239,243,248,244,250,233,246,250,125,0,10,16,21,37,17,7,0,1,17,6,20,12,12,0,9,0,17,5,12,21,16,30,11,172,178,191,176,196,190,197,200,221,176,206,226,195,210,194,190,180,174,207,191,199,199,199,188,181,178,199,196,201,192,195,189,220,164,196,195,182,212,186,177,166,193,187,204,189,175,165,212,192,179,180,205,176,225,221,208,206,220,196,232,203,177,194,200,181,184,205,197,204,174,187,208,188,200,183,188,187,197,184,198,211,200,213,191,196,191,183,182,174,180,200,173,195,185,212,194,187,182,196,181,192,188,214,217,163,195,170,210,192,195,215,208,186,193,201,166,178,196,195,207,194,195,194,190,214,205,217,197,213,201,209,188,212,182,173,176,214,200,224,214,182,188,190,210,205,210,211,193,210,211,200,212,214,215,201,200,205,196,205,221,208,205,218,213,201,211,195,217,198,205,215,194,205,199,200,204,211,201,225,203,213,220,204,205,200,235,198,198,216,230,210,200,222,211,222,193,228,224,224,230,217,187,210,219,201,222,205,219,196,195,207,218,213,211,190,215,197,223,209,215,203,212,207,211,243,219,236,228,218,204,219,224,213,233,219,227,205,203,235,183,204,216,205,206,209,198,205,231,217,205,204,191,203,204,216,208,221,219,201,207,228,223,204,187,191,202,237,209,210,193,230,205,214,212,233,209,213,225,244,226,198,198,167,223,214,246,204,228,235,226,213,201,223,198,237,241,234,208,213,213,239,222,201,214,200,187,212,195,185,187,187,150,177,159,151,110,112,178,225,185,175,111,74,237,227,82,149,191,180,201,191,162,141,186,172,136,129,137,194,150,193,222,50,27,64,142,140,125,83,40,28,132,229,214,185,226,220,178,173,151,48,22,58,154,225,198,224,229,164,153,191,150,92,162,194,224,230,241,132,130,200,151,204,219,181,200,239,228,223,201,239,237,200,171,200,242,106,78,137,116,78,72,40,38,28,86,133,234,247,231,156,38,24,36,47,38,40,54,16,51,65,48,56,178,251,248,150,163,219,241,248,246,225,142,215,249,242,242,244,240,183,135,184,253,208,250,240,249,112,6,42,82,99,97,120,155,188,129,33,22,83,81,92,138,121,109,48,5,23,42,55,47,42,25,20,27,21,33,25,24,38,24,28,26,34,40,38,41,30,19,39,14,60,39,41,15,22,46,47,25,25,1,54,50,43,71,36,10,28,53,29,25,39,17,42,18,34,20,22,24,15,51,12,17,37,34,23,24,26,18,31,49,28,41,33,51,35,72,107,98,75,32,70,29,6,37,35,32,29,13,8,41,58,47,54,46,22,24,24,11,13,36,16,33,34,54,22,7,27,42,27,31,18,30,34,52,118,119,89,56,75,74,68,23,15,43,98,121,110,63,29,32,30,41,51,34,38,9,61,28,51,36,63,30,27,35,51,55,8,40,40,95,73,68,63,70,83,54,49,53,28,91,110,114,123,125,84,79,62,95,70,25,11,80,216,242,232,253,255,250,255,245,248,244,243,247,239,239,239,235,242,238,251,255,231,239,236,243,236,247,242,254,236,251,249,254,249,249,235,253,245,248,253,248,248,238,245,224,250,250,228,226,246,250,244,251,231,254,239,228,245,249,243,224,220,114,25,31,10,34,7,23,28,6,14,3,5,20,2,3,26,21,31,27,13,12,22,15,5,19,202,180,196,185,168,203,212,188,228,179,174,184,197,176,204,183,213,185,195,178,199,215,176,199,196,195,198,184,211,208,209,200,196,225,202,170,172,188,205,198,183,187,190,188,196,189,175,218,220,181,208,182,204,202,185,172,204,171,222,179,193,188,205,194,203,227,204,200,180,195,211,209,199,197,197,190,197,200,202,201,202,204,180,189,209,187,207,213,191,219,204,211,207,194,202,207,180,203,214,198,209,182,197,181,198,168,183,201,195,221,204,193,184,188,203,187,176,198,212,183,212,191,194,174,205,203,187,192,205,205,206,197,196,199,193,195,204,211,212,195,203,207,225,224,220,205,186,226,213,205,200,193,194,220,208,202,196,195,209,190,185,221,215,196,204,228,209,205,212,204,227,194,233,210,231,200,200,225,230,203,216,218,209,203,218,207,207,202,235,219,239,217,202,216,204,229,205,203,205,235,199,200,208,211,217,223,215,218,209,201,201,226,180,224,219,203,219,201,215,190,217,209,227,217,228,210,206,219,208,211,209,235,227,206,198,205,208,211,205,200,208,206,221,201,215,210,217,235,232,199,197,206,205,214,219,206,212,199,226,202,222,216,223,211,214,221,214,220,209,215,230,219,227,202,207,216,199,217,213,241,191,89,85,128,202,218,223,224,243,215,220,242,236,212,226,194,227,220,219,227,247,221,197,192,201,208,171,171,178,180,176,182,170,200,218,194,205,197,212,127,173,102,52,208,198,110,176,146,146,200,221,148,166,162,129,138,152,166,220,142,191,237,123,56,95,170,127,137,140,52,14,173,244,186,136,212,216,152,155,158,91,69,113,175,231,192,209,193,142,136,208,144,116,122,142,175,244,234,138,213,202,213,202,188,198,196,220,199,209,190,239,230,184,163,223,213,79,44,113,111,76,44,50,36,64,126,205,245,241,162,65,1,35,42,40,28,23,24,22,43,74,48,127,231,243,165,175,213,236,233,250,228,159,159,243,221,247,245,250,176,165,196,238,242,221,233,245,101,38,33,76,102,120,111,93,69,128,103,15,47,36,66,103,119,98,115,27,18,48,45,38,53,50,24,48,29,29,42,10,9,20,40,52,38,28,30,28,53,7,28,56,33,38,54,25,16,40,24,19,26,51,19,42,62,46,65,41,20,14,28,20,30,34,33,32,48,39,43,36,41,28,26,33,41,38,38,47,42,3,51,46,31,33,27,26,84,51,56,126,137,95,62,58,18,43,18,34,44,28,28,28,29,59,42,78,53,17,11,23,17,35,16,28,20,43,16,8,25,33,35,36,44,36,14,38,23,70,113,102,82,107,110,45,79,49,61,94,139,144,108,59,24,36,24,34,40,36,48,41,20,45,58,34,53,53,42,34,43,50,32,35,91,53,42,68,78,72,49,62,70,35,92,114,126,101,115,87,88,45,53,49,18,50,200,255,246,253,215,250,244,224,219,254,233,252,245,249,249,240,248,235,251,243,234,253,242,255,252,253,230,254,251,228,229,246,255,223,243,252,255,215,239,254,216,226,225,242,243,242,228,243,241,238,237,246,252,239,255,236,239,249,244,250,248,201,97,14,0,0,4,9,2,18,18,24,28,11,7,4,0,1,9,21,26,15,5,10,14,37,13,176,200,206,181,182,199,183,192,215,197,203,175,197,225,191,181,174,182,213,206,191,185,191,191,193,203,197,220,194,219,193,180,210,180,219,215,195,209,184,194,177,214,195,194,220,179,202,210,197,193,188,200,207,206,188,205,181,206,217,181,192,172,179,172,204,208,199,174,192,174,197,203,228,208,192,184,201,184,201,196,205,202,197,195,177,171,181,187,187,170,196,223,183,194,195,209,184,197,201,217,203,189,189,180,190,204,196,201,182,214,203,195,192,197,194,198,184,184,209,200,178,221,182,217,189,196,204,202,201,203,201,196,192,199,200,214,201,199,204,200,195,221,197,197,234,170,193,214,203,200,189,200,198,213,206,182,185,215,203,198,192,211,193,199,208,212,181,228,189,215,218,227,180,174,203,216,222,235,222,209,197,214,212,189,213,190,202,214,222,219,200,206,201,236,189,195,217,212,191,188,206,235,181,218,237,196,209,222,197,219,204,192,212,185,208,216,203,203,221,218,212,217,209,210,228,207,240,199,191,188,212,222,223,214,199,177,219,211,195,222,202,218,206,210,211,215,198,217,214,219,225,222,208,219,224,194,200,211,211,196,210,212,212,219,199,211,201,230,202,214,202,225,217,216,228,205,211,215,221,244,183,133,117,124,181,216,245,222,221,223,225,224,220,220,224,191,228,209,209,223,236,199,173,175,149,170,180,205,211,205,214,230,188,220,222,252,210,177,214,135,172,122,57,180,169,85,214,206,171,217,229,172,111,152,142,136,160,179,232,113,191,241,159,50,88,180,152,144,134,81,29,195,246,157,141,233,170,138,199,215,222,77,69,182,187,159,192,228,189,179,160,138,120,164,181,215,246,193,160,214,236,245,236,233,203,178,210,195,210,182,236,206,174,194,218,208,71,74,94,71,79,23,10,16,40,115,139,246,191,56,33,29,25,15,70,24,2,43,32,57,21,53,173,219,181,183,232,241,234,251,203,168,132,222,241,226,223,243,164,158,219,221,245,222,207,254,62,33,58,95,128,123,110,76,41,49,138,79,21,36,124,175,143,121,114,113,18,23,54,38,32,57,58,20,32,41,36,23,42,33,18,24,30,46,43,30,66,41,36,16,56,42,25,16,20,5,25,50,47,31,46,44,29,50,93,82,77,34,15,39,29,46,30,60,34,27,15,18,26,33,45,32,49,33,49,38,13,36,8,33,34,58,15,37,84,90,55,48,59,101,92,69,54,46,40,31,23,14,7,28,30,42,60,81,14,41,34,17,22,26,24,36,19,32,17,68,32,27,35,30,24,28,47,44,20,26,34,51,110,135,107,139,68,62,34,41,94,117,149,158,104,52,18,23,36,44,42,64,43,42,38,51,47,56,53,34,59,33,18,16,42,85,61,79,70,84,58,51,74,45,26,78,122,83,98,113,83,42,81,62,31,106,249,240,254,230,241,255,238,240,247,241,249,249,246,230,253,254,249,244,253,235,240,250,251,239,255,250,251,245,253,244,249,224,254,254,251,239,250,255,248,248,238,255,247,241,246,243,240,232,254,250,244,248,229,255,229,241,253,244,235,229,254,243,227,101,6,0,5,0,8,21,31,16,10,14,11,8,20,14,30,12,9,14,8,5,19,1,13,35,175,197,180,193,199,202,192,206,182,189,203,188,213,226,204,195,198,175,198,195,225,194,201,176,187,205,221,204,205,184,225,185,189,226,196,206,194,179,172,200,175,187,209,201,176,212,190,228,196,210,195,208,176,201,214,180,169,194,204,192,201,190,176,175,216,195,209,203,201,202,204,193,196,204,208,191,226,193,197,188,209,215,185,207,212,220,204,191,197,193,193,197,198,194,205,171,190,196,179,174,200,205,186,194,182,201,201,205,195,207,206,201,192,173,191,202,181,214,171,210,217,195,194,201,196,196,196,215,208,205,207,235,199,202,177,206,213,202,176,211,210,208,188,177,210,215,191,211,208,203,203,194,192,205,222,204,187,212,176,184,214,194,205,190,212,222,202,201,207,222,210,196,197,193,218,234,218,207,217,205,219,225,209,219,199,198,207,224,193,226,193,202,201,213,204,226,203,216,207,219,205,196,230,204,212,209,206,219,206,210,221,202,208,204,221,207,205,202,207,232,215,197,236,188,216,221,198,235,223,216,213,214,191,212,223,198,201,200,210,195,183,231,220,202,225,244,238,226,219,184,213,188,201,236,235,205,222,212,212,223,205,212,191,230,186,215,218,245,237,198,200,225,212,220,218,214,229,220,208,195,225,221,187,164,205,187,215,208,234,209,214,198,237,221,237,235,203,221,202,193,174,194,136,202,173,186,219,225,244,217,243,240,160,208,211,231,195,150,205,172,185,98,98,229,183,118,195,234,197,224,220,142,139,154,130,155,119,214,200,90,177,248,176,34,63,147,150,131,76,42,10,172,243,119,175,244,188,201,225,237,225,112,66,156,200,198,224,233,219,202,145,143,132,177,226,235,246,151,144,221,247,246,240,254,230,210,238,220,220,191,234,198,162,173,172,187,65,73,45,51,40,19,30,22,84,88,114,164,65,14,13,25,11,37,64,47,40,37,78,52,66,84,213,198,149,193,219,251,238,234,161,157,216,237,238,213,209,156,155,227,245,192,241,214,166,134,31,48,80,86,106,125,70,57,45,64,141,92,26,43,146,175,134,135,137,107,15,22,22,57,50,17,58,30,40,38,10,25,32,29,18,45,22,26,34,25,22,47,20,15,36,30,19,48,22,66,19,39,3,36,27,21,30,21,72,86,62,80,20,31,27,33,53,35,43,50,33,40,41,40,3,21,33,36,10,38,38,13,37,33,46,56,47,71,134,147,87,59,74,64,89,89,71,51,41,42,30,5,13,16,17,45,40,53,33,28,23,13,24,22,13,45,29,22,18,15,37,16,41,42,32,18,41,33,32,17,38,30,50,102,80,94,74,45,61,42,69,95,157,198,146,63,24,14,42,46,33,33,43,32,33,45,50,29,26,42,30,23,37,43,62,84,76,36,57,58,73,47,65,71,10,103,107,104,112,92,131,83,92,47,75,234,255,251,237,236,244,254,246,242,255,220,248,248,235,248,235,243,252,253,240,241,246,251,224,246,249,251,248,229,242,235,245,242,239,240,233,252,238,255,244,241,254,245,243,253,237,239,237,255,247,240,229,238,247,246,247,254,231,237,250,248,251,248,212,106,8,18,0,15,15,27,9,10,11,20,22,5,22,14,1,0,22,34,0,10,15,13,34,8,201,202,193,197,195,229,169,188,205,177,191,199,195,180,198,203,219,180,189,186,204,208,190,174,176,226,183,189,178,229,207,182,207,198,184,160,217,180,218,220,182,194,219,212,203,208,178,210,195,183,198,214,195,205,209,238,192,178,191,208,194,208,201,212,209,190,199,196,191,185,205,201,180,193,183,187,206,196,230,180,203,193,215,191,203,197,213,187,201,212,197,196,160,207,188,204,201,197,192,182,204,205,205,200,199,208,179,196,200,189,202,194,196,188,194,209,197,201,200,192,208,196,201,191,166,180,198,204,205,166,211,201,203,208,219,212,211,178,234,201,219,216,212,207,230,206,217,218,209,199,187,213,226,179,222,197,193,214,188,209,207,222,200,188,204,227,208,216,232,218,204,182,217,177,230,213,216,190,229,212,201,215,184,217,202,207,210,213,191,222,205,219,237,197,209,214,218,203,226,226,210,209,217,193,196,188,202,226,198,213,206,207,217,210,213,204,222,197,196,206,210,221,189,194,193,176,222,223,214,191,210,215,202,197,233,203,226,193,183,194,211,205,187,198,216,220,206,227,204,201,226,222,210,214,202,209,212,225,206,238,195,214,220,205,205,216,207,197,211,196,216,215,222,226,210,213,223,226,220,209,220,244,242,231,204,183,202,242,228,214,216,215,202,223,220,213,208,204,185,206,204,227,194,215,220,212,241,234,226,228,227,202,179,226,214,225,140,164,228,201,186,95,121,246,201,96,202,217,198,184,226,133,133,175,160,174,137,223,193,108,164,220,221,85,98,155,144,108,87,47,40,200,246,98,202,242,181,177,203,238,132,94,43,98,171,209,245,205,201,143,164,162,183,203,221,252,229,144,161,248,250,225,231,238,229,204,227,237,208,207,239,232,164,149,176,174,107,35,59,42,74,83,103,165,146,157,111,117,42,7,12,9,46,40,28,14,20,58,49,61,129,195,165,172,190,201,237,240,241,169,154,225,243,239,240,226,199,148,201,246,237,213,244,212,26,31,34,96,128,101,123,83,27,86,52,72,127,77,51,35,22,65,111,139,134,76,31,32,48,29,63,49,27,43,39,20,15,15,31,21,20,27,37,29,57,60,23,35,36,34,28,36,28,20,42,30,19,24,38,50,45,30,25,31,55,48,87,61,24,25,26,48,30,6,36,33,28,40,28,28,60,30,6,26,29,41,23,36,31,52,50,99,89,85,125,187,75,49,38,71,79,110,75,38,53,20,29,47,24,54,20,28,28,39,39,3,19,27,3,34,33,20,12,30,76,24,35,29,46,64,26,28,39,36,30,12,40,21,52,44,56,22,68,43,49,57,47,47,93,184,133,38,36,15,26,30,38,49,58,52,23,14,50,53,46,40,45,22,23,31,32,74,72,71,56,55,98,63,63,96,33,91,145,104,120,101,83,107,77,84,83,214,227,223,252,250,249,244,248,255,240,250,219,233,252,250,238,253,252,250,240,244,247,252,241,226,252,243,246,225,250,222,255,236,251,249,243,232,246,250,242,253,231,225,237,251,236,236,241,252,246,222,247,241,248,248,243,255,252,238,237,239,248,250,242,103,15,12,1,31,27,2,12,1,24,38,8,15,6,0,11,9,10,7,11,18,21,5,19,10,198,213,201,206,212,195,221,206,198,209,197,197,180,190,199,229,210,193,196,191,182,181,192,217,203,197,179,208,187,188,183,151,184,208,191,186,181,219,203,203,197,195,170,209,212,209,212,197,185,185,197,188,170,170,190,190,206,187,189,199,202,210,193,180,215,215,211,212,209,187,207,180,192,191,226,190,193,198,183,178,186,180,219,167,199,192,187,213,184,183,194,177,170,201,218,211,227,219,188,200,199,194,198,202,188,202,185,208,199,196,204,193,211,201,217,212,190,205,190,205,191,198,217,197,209,208,181,201,200,211,193,212,224,187,197,202,221,225,208,203,192,175,204,224,201,178,193,194,195,228,230,224,202,183,197,202,216,217,201,206,222,197,200,205,214,212,203,185,223,209,231,196,204,215,201,218,199,205,184,186,200,196,205,203,225,203,196,200,209,217,220,202,205,220,222,206,208,218,210,215,197,217,225,217,217,220,198,213,209,189,213,208,193,214,227,223,211,209,189,211,205,224,217,198,216,204,219,224,204,211,192,234,218,213,211,205,236,184,192,210,236,214,211,217,201,196,205,215,212,231,222,210,210,198,208,211,187,224,210,221,210,191,225,219,223,219,214,211,207,200,219,229,222,217,226,219,208,213,233,226,222,207,232,242,215,213,216,183,194,211,232,211,192,204,194,210,194,203,212,245,217,237,189,220,208,225,199,191,201,195,202,219,172,212,211,216,141,166,237,190,181,92,112,238,215,100,130,189,182,194,216,172,152,134,179,172,142,247,201,103,188,238,233,130,72,118,125,115,72,42,30,174,232,89,215,250,168,137,180,175,177,138,101,129,160,235,209,166,225,166,160,128,166,176,197,231,226,139,188,232,221,249,223,207,207,205,237,220,231,214,229,248,201,171,150,225,109,14,30,81,105,67,146,168,212,190,48,41,33,5,26,42,46,14,4,42,34,61,24,120,202,181,176,182,237,241,255,252,175,152,194,244,238,213,247,170,143,203,250,241,241,201,246,108,26,64,78,131,106,129,74,29,61,111,57,92,146,57,27,58,105,123,125,82,108,64,21,25,5,29,46,41,40,16,46,28,45,39,21,24,33,33,14,26,26,17,49,23,25,44,32,27,31,32,33,38,19,36,39,34,33,52,23,56,25,37,66,76,28,20,28,45,50,4,28,18,45,37,33,35,12,46,44,37,21,24,35,27,42,71,102,128,112,150,171,129,83,35,23,34,57,57,34,60,26,28,33,22,35,7,38,32,33,11,21,14,6,16,19,11,29,43,31,24,31,17,25,37,18,24,15,43,28,28,35,20,53,37,7,22,41,6,43,70,15,51,7,31,49,29,50,40,53,23,38,45,50,39,49,43,51,40,46,24,31,35,5,38,39,30,50,63,69,67,54,69,85,57,68,78,25,46,124,132,137,110,102,113,58,29,25,122,237,241,220,249,254,246,211,235,251,254,247,226,255,252,234,251,254,249,254,240,243,240,252,245,254,253,249,243,252,237,240,254,248,247,211,255,237,248,252,242,242,248,249,255,244,255,228,253,233,246,244,250,252,241,253,249,242,241,226,250,253,252,242,97,2,12,12,13,14,13,0,36,22,12,18,17,4,3,27,7,23,7,33,1,16,1,27,11,172,193,205,215,189,205,186,178,204,201,202,228,171,190,177,190,197,194,193,202,189,199,213,187,197,211,190,188,184,190,187,193,185,200,192,184,181,213,184,193,210,201,196,189,191,187,177,194,177,195,207,214,197,203,197,198,188,198,190,192,181,186,191,192,207,213,196,196,212,187,185,196,196,181,211,196,234,188,197,194,187,212,186,205,201,205,195,200,226,205,200,181,194,216,201,210,194,201,194,199,204,233,193,201,204,223,210,184,208,198,222,195,205,196,185,208,194,203,217,216,197,222,219,207,211,208,209,214,190,215,209,189,217,198,212,194,218,174,208,224,210,210,208,233,189,189,184,187,207,196,200,170,210,199,222,197,201,206,228,202,183,210,217,192,200,203,207,183,200,204,210,218,211,225,195,214,202,219,209,211,211,193,204,204,225,214,231,221,204,213,197,214,219,179,216,225,204,191,194,196,205,217,214,190,220,207,205,218,201,231,199,224,200,210,207,202,221,189,208,185,209,196,230,181,204,217,221,202,210,199,239,205,232,208,179,212,201,205,181,183,184,218,202,192,221,233,193,218,199,197,236,215,224,213,200,217,227,211,217,210,204,214,254,230,223,192,219,213,212,231,203,179,218,209,237,219,233,224,220,237,223,234,235,246,203,233,184,196,178,183,188,221,215,204,222,211,222,233,218,218,223,236,202,204,203,184,228,216,210,211,226,201,205,216,210,229,135,169,243,173,209,77,72,220,221,130,146,237,195,217,228,142,152,158,157,157,174,231,203,110,185,235,229,160,82,112,115,125,60,28,57,196,195,95,213,174,112,136,218,220,194,210,120,116,145,193,171,216,190,185,162,52,104,182,253,236,191,140,227,231,238,241,213,236,202,215,227,209,219,189,235,238,200,176,165,220,111,26,53,60,33,89,100,123,164,134,38,38,11,24,36,20,22,21,13,42,48,57,111,226,228,121,172,212,245,226,251,216,161,223,250,251,251,247,170,150,208,255,254,248,242,175,98,53,43,95,76,133,124,81,63,56,113,165,35,112,146,37,42,149,224,165,147,113,100,54,19,28,8,42,46,17,42,31,41,44,12,10,25,18,30,32,16,29,20,17,17,48,8,23,27,15,35,32,36,47,37,36,34,42,52,21,30,47,29,35,69,64,36,12,38,22,11,9,16,14,28,16,26,10,48,57,23,19,25,25,26,75,137,172,99,125,162,166,113,68,26,26,22,18,51,64,60,26,43,37,8,20,29,45,31,25,25,15,38,23,49,29,9,5,36,4,30,15,0,52,17,49,51,24,21,49,32,43,44,19,35,44,17,34,11,46,67,36,21,61,36,8,4,15,15,40,26,26,43,9,35,26,59,42,36,33,56,59,47,45,23,60,29,43,43,91,69,37,59,65,68,76,52,83,43,54,143,131,116,122,102,105,86,62,23,84,245,229,238,239,251,248,252,236,248,253,245,229,255,249,255,247,253,247,240,253,255,252,233,252,239,255,255,236,242,254,230,246,237,241,243,251,253,255,252,238,240,251,240,229,222,234,239,252,238,239,246,236,226,244,247,254,253,239,255,248,253,255,230,104,16,26,3,7,0,10,29,0,33,9,32,14,3,12,5,12,9,9,8,4,7,25,16,10,192,201,213,201,222,194,207,215,213,194,211,189,207,188,229,182,197,188,205,217,197,190,205,199,173,226,195,219,193,209,173,198,210,209,201,195,183,169,177,184,203,199,229,230,200,203,184,187,177,194,189,193,178,200,224,190,165,186,182,198,212,193,184,210,184,207,169,196,199,197,204,187,207,212,188,191,201,214,195,206,201,221,189,192,202,195,198,192,196,214,186,198,185,189,192,203,185,199,213,205,209,197,191,221,195,210,198,185,222,209,212,190,212,219,205,193,195,219,209,208,215,204,227,192,206,199,210,174,207,213,219,204,207,194,202,214,212,212,217,209,197,205,178,205,209,221,201,192,205,204,204,199,211,222,192,209,197,205,225,200,200,200,198,202,211,199,205,229,193,190,189,228,199,228,192,219,210,208,207,200,222,187,200,200,208,205,220,219,213,196,192,216,235,215,224,217,222,186,215,229,238,201,180,201,209,216,185,209,202,210,199,211,213,204,209,213,220,201,202,209,221,217,205,210,206,221,180,241,236,218,218,192,197,225,218,203,205,202,203,217,213,211,245,219,210,218,214,223,218,214,226,213,218,220,229,203,214,223,206,199,193,233,210,223,208,222,225,210,222,217,236,220,222,235,243,223,230,208,231,230,231,218,234,188,171,169,202,217,191,176,176,212,230,214,206,241,217,242,199,226,221,216,214,230,214,200,232,213,219,235,225,167,184,235,237,213,146,190,248,215,209,84,113,215,214,147,160,246,192,200,207,121,160,169,178,157,172,246,216,144,197,224,255,239,138,110,140,117,77,39,4,165,138,91,164,136,158,167,234,209,221,207,77,58,145,193,186,243,201,191,108,56,133,217,238,255,155,127,226,217,252,247,219,242,230,208,238,227,216,204,249,231,175,144,122,225,124,73,117,93,20,74,120,92,68,85,30,40,19,30,41,49,23,20,64,74,46,82,221,220,151,157,235,252,225,212,200,166,212,243,252,250,240,192,125,197,247,241,236,253,246,101,42,12,36,118,102,127,108,30,72,95,167,139,29,108,147,48,37,99,186,163,151,142,109,43,8,13,1,2,26,19,23,7,49,21,26,9,17,5,25,23,25,32,30,54,49,45,45,36,25,19,4,28,43,13,39,41,55,48,30,42,39,65,42,50,49,83,41,22,35,59,35,29,31,46,18,30,30,42,36,27,52,30,39,56,41,60,105,151,128,84,122,83,55,43,3,4,34,19,56,69,80,51,41,29,28,31,25,32,31,34,34,40,1,7,22,23,15,19,48,38,29,30,43,28,16,42,50,25,41,52,8,48,38,54,28,9,25,31,17,34,79,42,44,39,3,45,9,23,30,29,29,40,31,44,7,33,26,60,66,49,28,18,35,39,40,31,38,46,41,65,79,37,57,53,84,81,23,84,18,57,99,128,108,119,112,131,61,58,14,55,224,228,255,242,238,253,252,242,247,226,243,233,248,231,253,230,249,243,251,241,226,240,254,249,245,233,230,238,239,241,232,244,254,253,228,251,255,253,227,238,222,237,248,226,237,252,243,234,248,247,250,235,249,239,246,253,241,241,248,255,235,250,243,110,0,0,5,12,14,28,9,46,21,8,27,1,4,13,0,4,29,0,12,15,31,18,2,29,192,179,198,228,198,212,218,188,209,210,209,211,204,200,202,198,198,209,194,216,209,196,185,179,211,216,198,188,227,213,191,203,222,219,194,220,194,175,179,174,186,186,215,191,189,183,202,183,186,192,184,209,220,191,215,189,212,175,216,184,215,214,214,198,178,194,199,216,207,195,187,197,213,194,208,202,205,210,166,189,202,194,232,180,210,203,194,201,190,230,218,213,201,224,199,215,215,203,197,184,203,184,201,207,213,187,176,182,194,200,213,191,210,199,234,231,209,184,210,227,231,216,212,217,195,208,189,206,197,226,218,217,212,195,196,209,222,193,209,228,207,241,195,207,195,196,230,179,217,203,216,222,211,192,212,210,216,213,210,184,205,195,221,189,206,228,194,218,206,214,199,204,184,196,205,220,205,219,218,211,199,205,208,218,210,189,229,214,216,203,208,204,226,219,207,217,229,193,224,196,204,215,214,193,203,207,200,188,199,217,225,195,202,188,213,199,223,210,205,209,206,187,197,217,206,219,210,201,212,183,204,215,205,194,192,197,223,201,204,219,216,197,206,207,204,231,208,200,211,225,214,242,191,230,237,203,213,209,202,214,220,215,239,222,208,212,224,218,228,230,225,233,243,247,223,227,238,246,238,213,198,181,181,181,227,242,238,244,244,227,242,212,211,219,246,230,232,243,214,233,236,247,224,206,213,234,241,234,234,240,220,203,209,225,253,236,130,200,240,189,200,65,120,227,232,157,106,226,199,177,153,98,146,188,192,166,177,229,229,134,206,235,246,236,115,124,103,95,67,18,13,127,148,104,185,175,207,195,231,179,161,145,101,102,145,162,215,235,169,143,152,126,182,243,252,252,137,137,250,221,232,228,236,242,209,188,241,203,238,186,226,217,217,134,197,240,172,57,70,90,44,141,164,76,83,81,53,18,12,12,26,32,49,32,40,54,70,193,234,197,136,194,239,241,215,183,146,195,251,245,235,223,206,136,217,255,250,232,244,250,188,34,35,0,36,106,112,85,45,59,119,138,153,92,49,124,113,40,35,83,150,122,118,138,95,18,29,35,22,20,20,43,48,41,55,26,35,31,18,30,15,50,40,36,33,45,22,10,33,28,32,26,44,59,49,40,35,19,43,34,26,57,29,30,26,33,66,69,64,27,12,33,50,22,22,16,43,55,26,31,36,20,40,27,42,49,22,27,39,103,103,56,24,42,23,12,6,35,7,21,30,58,79,70,37,27,51,30,16,27,21,21,26,25,28,25,19,1,29,25,35,18,18,56,7,16,39,31,40,24,41,53,38,51,53,21,46,19,40,33,39,29,66,30,14,31,21,29,22,61,12,35,33,45,39,39,18,44,25,23,46,54,61,56,31,30,35,30,39,16,12,66,87,53,70,73,93,74,25,52,52,25,96,91,128,128,117,125,66,61,35,59,219,236,224,239,255,250,238,237,247,253,253,253,242,249,237,239,227,237,238,255,255,242,250,240,247,234,248,239,245,249,243,248,230,247,236,233,245,233,247,248,250,255,251,255,241,225,245,219,246,234,249,248,253,248,253,255,244,238,255,239,239,250,254,111,3,6,4,30,24,1,23,3,16,5,8,33,27,2,8,8,10,4,12,6,9,16,27,26,217,238,218,179,211,183,209,207,207,235,215,195,222,224,221,201,207,196,190,213,218,212,203,179,197,203,187,204,230,198,194,207,200,177,225,179,205,198,191,211,224,213,195,191,201,164,186,190,177,210,199,193,200,205,206,212,230,198,197,210,207,199,186,183,194,181,206,220,193,193,209,186,195,196,204,190,183,192,215,204,188,209,189,213,196,224,200,196,195,212,194,200,209,186,217,190,206,209,208,184,211,218,192,204,195,190,199,206,220,204,213,195,183,217,200,199,219,209,224,213,227,184,218,214,200,226,168,208,207,224,224,223,206,219,219,190,202,178,230,205,192,196,208,205,210,209,212,213,214,212,207,206,218,209,196,205,231,199,193,230,192,216,201,208,219,208,200,211,227,211,212,197,223,193,209,191,203,179,188,227,211,199,219,181,193,206,220,186,225,239,185,211,219,223,209,205,185,209,184,212,207,193,222,228,208,187,215,213,208,224,234,216,197,200,197,180,195,200,220,221,214,221,204,203,211,220,226,209,213,187,225,209,223,211,203,210,200,193,224,218,209,199,210,226,205,206,233,192,222,212,203,216,212,212,205,206,215,226,230,236,246,249,236,234,252,240,237,253,236,248,255,235,244,242,230,237,229,226,207,220,225,252,253,255,251,246,247,238,228,234,234,241,230,239,245,249,238,251,255,248,249,254,231,221,246,227,255,246,244,248,255,208,239,246,242,249,176,230,252,187,141,75,88,238,237,173,128,249,240,231,154,122,164,181,194,208,244,229,220,186,236,246,255,236,115,92,76,73,52,33,4,143,173,162,235,191,190,216,183,165,181,164,122,122,103,146,196,187,172,178,147,183,224,245,255,232,144,214,249,226,249,217,213,226,212,215,201,212,221,198,221,239,176,140,206,247,156,47,85,37,65,185,78,36,41,83,41,34,18,32,21,14,35,43,36,37,130,237,205,176,157,218,240,227,177,133,170,214,242,244,242,213,136,179,243,243,245,252,245,178,41,17,53,28,106,104,79,58,65,113,117,134,123,82,76,167,93,63,28,37,96,137,105,164,23,15,25,40,50,38,45,57,28,32,22,27,31,26,40,21,38,29,36,42,29,27,23,41,17,23,43,36,25,34,35,37,41,56,36,20,10,24,42,19,21,50,74,98,40,54,25,20,48,19,39,37,27,26,17,42,35,37,40,28,28,26,35,25,45,109,46,64,51,36,22,3,6,18,18,20,57,45,64,38,57,39,45,15,16,32,13,29,1,2,19,43,22,41,36,18,18,16,32,24,38,35,39,34,22,38,27,47,32,13,29,37,25,22,18,17,13,71,65,43,20,52,30,61,26,32,20,41,23,54,69,9,50,15,34,38,31,27,31,11,32,32,20,48,43,29,38,85,113,26,54,46,55,59,39,62,37,33,84,116,97,99,114,122,86,24,26,3,191,238,238,240,249,242,235,250,243,243,250,250,238,245,251,249,248,249,244,236,244,222,226,228,242,237,226,249,255,233,255,254,255,249,254,254,230,248,252,236,251,239,235,239,232,250,245,241,239,224,240,255,247,234,225,248,240,254,243,234,241,252,249,114,6,13,22,8,0,15,36,25,5,0,7,49,7,9,0,0,14,20,23,31,30,3,14,41,190,219,212,203,200,200,221,201,205,196,197,223,200,188,190,195,216,205,216,214,194,214,174,194,181,201,212,191,195,225,190,209,189,197,200,202,169,213,207,183,169,200,210,210,205,212,207,187,199,193,204,192,187,201,196,167,215,198,212,210,205,192,198,208,205,199,176,192,210,205,200,196,215,204,214,192,214,212,196,197,204,202,204,198,188,194,180,209,199,200,216,187,199,228,204,190,214,197,199,236,183,203,188,206,232,179,197,223,182,220,193,201,208,210,197,164,202,208,186,210,208,188,218,204,227,201,228,215,192,224,201,216,219,207,201,213,204,213,196,174,218,188,205,207,221,205,215,194,205,201,187,185,194,213,228,205,205,215,207,187,221,193,193,191,212,234,194,224,197,206,200,213,222,218,206,224,208,229,212,216,214,204,197,217,216,226,196,222,215,216,199,205,195,215,209,201,208,198,198,216,207,213,213,210,206,210,214,226,217,208,198,205,211,203,198,183,203,196,217,218,228,197,229,199,197,202,212,232,212,214,205,223,221,194,215,233,197,203,221,200,205,209,216,213,224,208,197,206,236,233,203,200,206,213,209,213,188,207,196,216,219,196,209,214,190,181,227,209,179,174,166,177,173,157,171,147,158,131,184,185,209,225,193,220,210,191,203,210,232,202,200,190,188,168,183,183,198,208,184,200,203,190,191,184,202,173,207,199,199,200,190,169,190,185,210,146,133,187,204,140,93,18,55,169,144,143,118,211,195,190,144,131,154,176,197,162,227,255,207,164,229,237,250,229,108,100,90,70,65,44,39,135,207,149,224,194,191,215,176,221,214,231,192,154,132,118,174,169,235,175,174,205,234,234,225,235,148,228,219,246,255,235,236,237,202,243,232,236,225,210,213,234,171,140,202,242,119,39,53,77,121,173,84,12,64,82,74,54,43,24,23,28,59,51,59,121,233,248,167,189,189,234,222,195,151,173,230,244,248,245,238,153,201,241,240,252,240,231,146,39,15,15,88,85,139,106,37,75,117,120,128,102,101,36,67,134,87,38,63,187,152,144,129,129,59,7,35,28,19,52,64,43,49,38,53,22,33,34,21,19,35,33,26,29,25,23,33,12,28,20,31,45,74,81,62,59,21,29,8,17,37,59,19,30,30,10,32,73,54,54,16,23,28,21,35,20,64,29,45,11,9,21,46,44,35,30,21,32,31,44,42,46,55,21,17,43,10,6,5,19,30,48,62,39,56,46,30,15,19,12,37,11,10,19,29,22,17,24,13,6,27,6,30,15,35,39,27,39,34,25,22,33,16,35,60,6,28,22,5,28,47,59,54,14,34,41,31,35,21,22,34,31,26,12,16,41,31,37,38,18,34,37,26,25,52,24,50,35,35,30,52,78,101,77,63,56,35,38,44,51,49,35,80,110,98,141,79,113,98,46,34,30,156,236,237,232,253,243,245,248,252,254,252,239,234,249,255,236,255,253,231,250,225,250,255,239,255,253,254,245,224,233,235,253,251,253,250,254,237,249,251,226,223,251,255,248,247,242,220,255,238,223,252,248,236,237,249,252,250,237,251,228,249,234,251,115,2,14,40,0,23,11,2,1,19,3,32,8,23,2,4,2,16,23,18,9,19,27,15,19,191,185,221,187,211,179,215,205,196,192,195,223,195,201,211,205,217,233,190,182,200,193,212,207,214,211,215,225,177,191,220,206,204,197,200,203,214,211,206,183,204,175,197,205,226,189,222,201,209,159,196,201,193,197,176,211,215,184,203,201,198,180,183,170,206,195,203,197,185,196,209,206,217,167,173,205,193,196,207,186,202,185,204,201,206,188,161,206,190,196,203,200,206,205,213,203,199,213,200,204,209,205,222,225,187,196,191,195,188,202,220,216,216,178,229,215,218,219,213,217,226,200,174,201,198,187,227,238,207,183,231,189,211,202,206,208,238,209,179,225,200,198,180,209,180,218,182,221,186,208,184,201,191,198,182,210,200,176,205,198,222,212,212,201,190,214,200,206,225,201,203,191,225,194,223,193,227,221,205,209,188,203,228,212,206,220,211,187,230,223,225,221,206,199,217,213,202,213,201,218,231,205,207,209,223,190,207,237,226,206,210,203,203,191,220,218,190,202,199,193,186,201,222,207,188,209,213,223,218,220,226,214,199,227,174,208,204,220,222,187,218,203,203,208,211,203,218,214,233,207,215,223,215,213,214,203,175,142,112,112,89,100,100,97,91,68,93,114,84,71,69,50,58,28,19,13,29,24,37,48,32,62,48,49,30,70,61,58,27,34,45,49,59,81,102,93,95,72,91,37,42,48,44,68,72,57,63,15,39,60,70,36,50,50,57,55,18,29,46,64,144,51,10,10,19,31,5,88,63,69,76,108,86,93,97,89,110,125,86,61,92,77,129,119,88,118,133,63,108,34,20,138,108,80,135,123,101,142,155,148,153,151,117,124,49,93,171,136,187,106,94,120,196,241,212,145,119,239,235,237,230,217,224,227,204,235,244,224,214,186,250,208,159,186,187,206,118,61,63,76,198,190,69,33,70,109,78,36,35,14,29,39,74,56,73,228,252,194,211,213,210,191,184,154,175,244,255,239,237,229,185,162,244,242,250,253,248,167,15,28,57,65,101,85,80,32,89,138,141,138,88,47,74,41,81,142,71,50,132,202,206,143,145,124,63,12,40,29,37,33,52,37,41,40,47,20,43,20,20,36,11,18,29,39,39,27,32,13,42,52,100,163,173,152,121,147,67,46,51,65,7,55,28,20,30,44,41,76,80,61,17,30,20,26,48,2,32,32,35,21,24,47,29,26,20,25,23,55,30,21,32,63,34,17,35,13,18,5,5,18,53,13,43,65,58,56,58,22,17,17,11,28,11,20,24,16,18,41,34,26,45,21,6,15,43,19,24,48,37,43,30,55,47,14,32,28,8,28,32,31,42,62,36,29,13,24,23,18,20,10,35,52,60,18,35,28,32,20,39,35,17,44,51,42,43,46,12,39,35,34,33,68,131,74,40,75,71,63,45,31,42,25,58,132,104,121,89,95,113,34,20,15,112,224,251,252,242,217,252,255,239,251,243,250,252,246,245,252,255,251,222,233,250,245,249,245,253,237,244,211,239,252,236,236,245,246,248,245,228,254,236,236,245,253,249,249,255,243,244,246,242,251,232,255,255,252,255,241,250,254,242,250,238,246,246,123,17,5,1,18,0,3,19,17,27,23,23,6,5,4,12,4,2,0,26,23,15,8,12,5,227,187,198,201,200,203,203,193,203,228,190,210,222,196,208,196,224,213,200,196,211,215,205,213,205,219,207,208,193,213,191,195,209,219,202,206,214,197,216,198,185,176,175,201,195,219,207,197,186,199,178,191,205,177,213,222,206,196,206,220,200,202,188,193,191,178,244,204,200,222,208,199,198,185,206,201,179,183,199,211,183,196,215,200,213,214,213,207,222,208,175,200,201,210,215,202,215,219,227,189,211,190,183,209,190,206,227,193,213,203,200,215,208,221,202,194,212,218,179,218,211,200,205,206,200,224,180,214,219,209,200,206,209,215,211,219,195,206,210,202,181,200,212,180,201,196,195,214,192,197,208,214,222,203,194,196,204,197,192,187,207,210,208,207,230,206,212,197,217,225,198,211,225,195,190,215,197,218,193,181,199,225,198,215,204,205,194,201,206,208,217,188,202,199,212,195,226,212,219,216,200,215,181,206,196,211,204,215,209,215,200,204,208,217,208,205,208,254,219,216,208,219,214,223,205,189,233,198,238,216,231,217,239,204,219,210,198,204,203,216,213,220,210,212,207,211,206,202,221,212,198,196,206,219,206,213,198,158,166,135,141,147,127,128,80,82,81,107,112,125,162,118,131,129,124,126,90,116,100,94,97,132,93,80,108,92,107,116,100,118,125,126,109,133,138,152,108,123,98,106,114,109,112,126,116,86,109,98,117,119,92,108,116,85,93,89,55,89,113,122,153,87,59,101,50,46,37,61,91,97,80,117,101,107,113,99,89,71,107,59,61,78,84,83,56,104,109,68,61,45,82,107,51,33,76,55,48,46,47,49,35,41,28,35,36,94,108,91,82,34,53,14,80,90,94,65,50,113,88,121,132,114,135,193,192,199,223,237,245,201,212,203,163,188,181,197,156,106,74,110,243,198,50,32,52,88,90,28,15,25,46,47,58,89,180,245,228,234,242,250,186,195,119,104,213,229,251,223,243,166,162,237,243,234,246,254,192,40,30,59,134,121,128,50,23,79,123,122,137,76,64,84,56,28,101,127,68,26,56,121,145,118,139,122,54,18,54,26,5,11,38,19,48,45,36,31,25,27,36,10,33,17,18,19,38,36,49,54,116,143,139,177,120,185,159,184,144,108,44,39,38,34,8,33,41,37,53,81,75,48,19,25,28,21,18,28,52,11,18,22,10,34,33,20,39,44,37,9,12,8,24,15,37,28,32,10,2,10,17,25,19,35,81,93,79,35,6,21,6,7,35,37,32,33,20,36,8,28,40,33,28,25,38,19,26,40,31,43,17,38,40,47,44,31,48,42,44,38,36,26,66,83,37,19,30,28,18,47,32,25,21,44,53,30,39,13,44,30,38,51,37,30,15,36,33,15,41,28,18,41,38,38,124,66,83,73,65,61,64,64,42,39,72,115,109,125,137,153,105,63,36,18,85,221,247,251,242,253,230,240,251,254,215,247,253,239,235,227,245,245,250,243,225,229,251,249,247,246,243,240,239,242,222,250,253,253,255,245,223,248,240,244,249,246,255,230,255,221,251,241,249,232,248,250,232,241,245,246,250,234,247,248,234,255,230,114,12,0,15,9,11,18,19,6,8,6,19,6,0,5,5,28,12,11,33,8,8,17,25,12,191,175,185,201,185,189,184,219,220,181,189,175,179,206,203,187,194,218,198,196,201,210,193,179,212,228,195,207,188,188,221,188,192,189,191,211,188,218,207,219,194,197,209,200,209,214,191,199,204,190,181,193,196,210,188,211,217,199,217,186,199,188,200,205,217,207,223,175,192,205,192,219,193,186,208,211,189,215,200,220,199,188,208,193,200,195,189,213,185,194,215,208,216,207,210,218,214,206,210,191,227,202,181,199,185,209,187,217,213,197,206,208,205,209,214,196,213,205,230,212,211,202,223,203,211,201,200,224,210,202,202,212,213,183,188,210,199,211,194,178,199,192,193,209,221,213,230,208,212,183,204,190,198,174,183,179,182,214,203,201,200,220,200,203,192,194,210,206,199,203,213,191,210,205,201,205,196,224,188,214,168,191,225,201,193,187,201,198,229,179,190,186,212,210,214,224,210,204,220,239,236,222,216,215,213,179,225,235,203,227,210,220,214,190,190,228,197,187,236,220,210,208,210,211,208,212,229,202,213,208,223,217,209,217,230,196,234,216,242,232,232,173,215,196,241,204,219,209,201,205,235,213,219,216,225,196,206,250,240,240,221,238,203,212,185,172,216,213,236,248,252,237,222,247,245,239,255,211,220,247,226,231,239,215,241,244,229,251,246,246,240,247,234,218,243,238,243,209,223,247,255,246,236,237,241,221,252,237,242,228,219,213,244,236,233,197,197,249,238,212,162,86,131,255,225,194,187,169,244,191,186,215,198,198,175,210,255,212,254,178,209,252,237,253,138,88,98,53,62,49,82,149,213,171,120,201,161,116,97,127,126,113,169,199,125,125,92,139,135,118,138,127,156,150,139,83,105,169,144,180,123,180,161,228,206,221,238,233,211,206,242,157,188,187,179,224,160,87,87,214,243,114,22,139,86,58,36,14,26,8,70,24,23,147,227,220,242,242,245,207,191,140,70,170,222,241,235,243,173,166,191,233,238,244,243,184,54,12,75,113,110,103,89,10,40,93,160,123,93,52,90,124,52,27,106,141,36,48,76,111,130,149,108,110,25,8,12,21,20,21,36,68,29,32,53,57,17,31,34,38,24,31,28,25,29,37,95,182,197,167,134,96,100,164,86,47,67,53,58,24,42,28,7,19,65,62,38,74,46,51,20,29,37,43,21,29,32,43,33,34,34,40,16,29,17,34,10,24,32,29,27,20,34,5,19,25,10,26,60,57,26,45,78,69,62,52,9,51,18,3,24,19,34,17,43,48,23,28,9,41,34,27,29,6,13,44,40,34,61,29,21,30,61,26,43,22,49,24,42,41,91,50,37,43,3,15,14,19,56,58,59,45,58,64,68,84,58,59,23,47,23,28,26,41,36,21,29,31,43,38,31,51,101,48,54,80,68,97,115,39,60,32,69,103,92,110,110,117,118,44,37,11,53,190,245,239,253,250,241,245,232,254,248,253,235,249,246,249,245,228,243,247,234,254,255,238,234,249,239,244,249,245,237,253,247,234,252,239,254,244,253,247,237,229,244,253,222,237,235,243,236,245,243,248,243,235,250,246,252,253,244,253,255,254,246,122,7,1,1,12,29,4,8,21,25,12,25,10,18,2,17,5,8,4,5,14,6,21,17,15,215,220,216,203,209,189,195,216,197,192,196,202,229,202,190,186,183,198,185,209,209,197,213,192,203,216,205,191,199,210,186,198,190,211,207,200,204,208,207,181,227,196,183,206,197,214,205,179,219,191,201,196,193,201,179,182,191,217,217,197,215,202,216,183,187,216,204,220,194,201,210,203,200,201,183,187,207,198,179,185,177,198,192,185,192,227,199,201,192,193,172,198,202,194,186,202,196,207,214,213,191,214,203,245,194,205,220,226,188,197,186,191,213,215,199,210,206,197,209,192,218,171,206,210,208,199,219,212,220,208,173,176,212,226,196,196,227,205,206,192,182,203,200,195,211,237,195,204,203,230,204,208,200,212,180,205,205,216,208,230,201,201,208,194,224,202,185,200,163,223,194,193,202,193,192,213,208,207,228,196,185,192,180,208,201,206,181,184,223,172,225,210,224,212,190,214,240,210,206,210,190,218,223,203,234,211,214,223,210,207,215,206,204,218,195,202,199,190,189,197,227,226,212,215,225,203,210,230,214,225,215,235,163,143,198,170,222,197,213,221,208,232,196,196,198,194,206,214,208,207,207,205,227,190,228,195,206,206,193,205,207,219,202,235,228,215,204,244,252,254,255,249,243,249,234,249,226,249,253,255,239,245,235,234,250,240,251,252,255,247,254,254,237,247,232,240,224,235,211,217,233,245,243,234,245,241,245,226,249,206,208,230,242,247,238,208,241,249,245,203,199,70,152,247,237,211,198,206,228,197,139,137,121,97,132,245,231,253,255,224,255,245,250,255,128,116,135,91,76,55,44,181,251,224,237,255,208,212,190,232,240,208,230,247,208,134,123,206,178,198,215,192,247,250,216,124,185,248,252,242,231,250,207,236,214,224,207,235,228,210,202,169,201,168,148,219,118,34,70,210,251,50,39,179,123,41,41,22,22,28,52,25,131,227,210,206,251,247,242,201,116,78,130,206,247,226,229,149,141,212,248,243,243,241,197,59,32,48,103,136,109,117,67,22,94,137,122,70,35,80,82,67,31,56,128,127,32,27,92,163,149,122,143,82,44,10,25,18,47,48,65,44,35,77,11,35,20,0,24,27,55,14,18,37,52,74,81,115,183,127,88,90,135,155,60,64,92,87,71,32,22,15,21,54,21,26,74,76,59,43,9,19,21,21,38,28,16,33,34,17,17,17,43,33,35,35,43,44,25,42,24,12,25,16,13,36,29,35,14,37,34,31,67,56,54,50,34,11,13,11,39,13,19,12,19,21,47,33,28,6,14,26,28,46,48,30,46,35,38,26,56,22,18,20,51,34,29,41,23,53,68,74,26,8,12,36,22,32,74,71,63,91,92,112,166,104,66,52,47,36,28,40,28,22,15,46,22,58,30,33,35,48,119,63,40,42,54,123,139,78,75,35,59,129,91,107,79,115,83,64,21,57,66,126,251,248,251,247,216,253,243,239,232,250,240,242,246,240,252,241,255,250,241,241,250,248,253,248,246,238,236,243,251,242,229,244,252,238,234,248,244,255,236,239,221,253,243,231,244,246,254,245,248,242,233,244,239,246,251,241,238,235,238,255,242,101,1,3,20,10,20,4,10,13,40,19,9,9,7,15,15,24,32,10,25,7,22,4,12,20,183,202,205,188,192,222,187,194,197,195,182,215,188,203,186,194,183,177,187,189,200,202,206,196,204,192,220,236,210,223,173,192,187,192,198,206,179,198,183,217,190,213,189,189,201,199,221,188,199,174,199,201,198,213,207,208,180,193,188,173,201,213,205,202,199,180,184,163,192,189,192,216,202,202,213,196,223,198,202,190,211,202,201,210,224,208,186,185,197,200,223,207,185,208,187,181,202,216,210,210,204,185,189,211,179,205,195,233,210,187,199,190,200,203,191,202,196,200,214,201,211,209,196,198,195,206,201,219,201,195,203,204,212,218,187,193,201,173,199,213,226,208,195,210,219,192,216,236,205,204,184,188,223,199,227,194,217,209,203,213,196,208,206,204,196,196,174,190,214,208,225,191,210,203,192,215,230,189,226,186,188,230,200,192,203,229,222,224,196,212,194,199,212,206,218,211,205,182,220,196,215,208,190,192,217,210,234,227,214,233,226,218,189,209,204,206,236,224,221,216,230,211,237,209,229,193,209,205,197,235,211,195,80,59,136,199,226,234,210,213,216,199,215,228,220,221,194,208,210,236,239,212,187,220,212,175,189,194,174,180,200,217,202,211,196,195,233,224,244,224,205,215,210,220,217,218,200,218,193,209,216,220,219,199,225,229,247,237,227,237,192,212,213,205,241,211,215,217,226,213,206,218,220,204,190,202,218,229,228,191,192,216,227,233,206,194,230,232,229,233,124,67,146,246,232,207,133,111,107,68,74,45,57,37,21,64,86,114,106,55,148,180,195,165,53,113,110,89,109,45,51,136,204,157,192,213,173,205,215,249,217,160,222,160,216,139,88,172,107,150,235,198,255,238,178,146,231,239,254,239,242,247,216,255,222,194,214,219,212,201,195,199,209,179,164,180,69,33,57,193,170,3,143,237,65,35,24,22,49,27,48,73,182,211,213,221,234,254,227,148,84,180,193,236,255,241,154,154,218,230,253,241,250,202,70,22,51,103,113,114,97,14,53,95,104,147,84,71,113,131,68,60,54,60,147,135,58,23,44,138,158,143,140,65,30,36,25,51,33,39,68,46,40,48,44,16,40,37,42,11,15,60,23,43,51,61,44,89,138,87,115,102,107,149,114,104,55,47,32,28,48,9,16,14,33,40,47,92,50,41,19,18,24,26,22,40,29,68,65,24,55,41,5,21,16,46,87,74,33,38,34,25,40,20,27,27,39,23,37,59,46,50,12,45,54,36,20,35,11,2,14,13,38,28,5,31,23,38,41,70,52,21,33,26,28,19,23,29,44,40,35,31,42,68,19,9,17,54,57,47,88,62,32,15,27,26,34,21,40,54,86,80,111,132,121,82,112,105,69,53,53,20,51,32,32,37,47,13,34,44,21,67,104,74,35,48,48,63,65,70,54,29,57,143,107,104,133,94,133,74,11,47,57,78,211,219,234,254,231,255,238,240,245,222,224,246,253,226,236,232,245,241,252,243,242,243,228,252,237,230,252,250,209,240,234,245,247,236,228,254,243,254,255,233,251,242,243,232,252,251,252,242,252,251,233,255,241,253,255,255,225,250,251,242,243,90,1,4,19,20,17,23,12,11,0,4,15,13,0,6,1,38,9,0,12,10,8,18,29,17,198,167,218,210,160,185,216,196,226,197,204,207,178,188,187,187,186,190,202,195,197,181,215,203,207,206,194,188,198,207,195,216,181,197,214,164,202,199,203,207,220,208,185,193,185,201,182,212,186,189,184,198,203,185,198,192,184,221,178,207,201,187,227,183,199,198,189,198,198,203,197,190,210,211,184,191,191,186,207,207,208,202,230,196,204,207,210,207,226,206,181,226,220,217,205,209,164,208,210,212,204,198,190,195,186,202,192,197,214,208,206,226,222,219,192,179,187,207,225,211,207,210,180,202,208,194,217,207,224,197,230,200,212,218,210,185,197,201,209,220,211,199,195,204,212,200,203,191,205,180,200,207,212,203,213,194,219,194,206,196,191,226,211,176,207,223,240,217,207,232,219,212,202,196,204,187,176,237,185,170,218,190,203,209,192,197,211,204,195,210,203,209,208,212,212,211,213,191,234,198,210,231,200,232,233,217,217,212,218,238,206,212,220,205,209,222,220,221,201,216,202,202,217,226,205,211,208,210,206,208,215,202,112,96,193,220,226,197,221,221,218,214,214,225,196,223,220,207,200,199,201,203,176,192,186,179,201,216,208,233,197,203,202,216,216,211,207,230,217,242,200,241,204,209,210,227,220,211,221,222,219,218,246,203,250,226,232,210,223,219,185,218,229,230,222,235,220,221,190,210,215,213,189,187,224,207,223,226,226,232,166,214,241,227,167,161,227,233,234,177,149,59,121,231,239,207,91,39,47,45,80,104,141,91,99,80,45,86,67,29,55,60,67,75,15,27,34,36,86,71,50,86,96,65,89,116,87,100,129,129,108,45,74,52,114,116,71,38,23,135,243,216,252,249,130,152,216,234,208,230,254,207,220,211,193,177,223,224,234,179,197,196,242,152,127,148,123,90,100,195,105,7,201,194,31,37,38,70,47,45,29,129,199,200,237,243,244,242,209,106,121,222,227,244,235,183,176,211,244,255,248,236,224,83,33,62,100,116,97,123,36,28,94,154,123,107,56,83,109,89,62,47,31,74,161,110,41,25,24,112,148,153,109,35,28,30,40,36,43,35,30,48,45,27,34,9,29,30,31,13,11,23,29,17,33,84,85,49,91,65,75,31,68,77,58,28,15,38,28,5,13,27,7,54,26,31,38,90,59,51,23,30,18,62,27,16,39,21,37,52,83,43,53,48,60,122,167,113,87,47,35,41,38,83,48,44,43,36,42,26,57,73,33,37,30,21,38,9,2,30,23,36,40,42,34,15,20,48,87,49,46,15,27,25,37,34,26,53,39,31,40,32,33,38,21,17,15,27,26,72,87,57,36,22,4,8,18,59,54,101,77,62,99,82,79,126,91,90,88,73,45,41,24,43,20,33,18,63,19,39,45,68,126,82,41,39,38,34,41,33,50,37,33,101,105,99,101,108,111,67,61,26,65,30,132,245,234,244,244,245,249,234,225,232,233,222,255,249,235,226,227,244,240,228,239,226,240,255,251,221,235,248,249,249,251,254,239,229,246,223,240,247,235,249,229,212,241,244,255,250,243,255,246,243,247,252,244,246,241,245,233,252,253,251,249,110,2,20,14,5,7,2,1,19,20,14,17,3,3,9,39,12,7,22,9,11,11,0,48,6,211,182,171,185,210,181,212,201,186,204,201,204,203,200,220,202,205,194,192,198,177,221,207,190,183,216,200,221,191,193,204,213,217,174,212,199,177,181,190,182,193,198,201,205,202,176,186,196,211,187,182,189,189,218,188,210,222,193,194,218,218,189,196,177,174,217,208,190,229,213,193,215,218,206,196,212,178,208,186,201,195,203,199,196,189,197,206,212,203,216,217,204,205,186,219,200,190,211,176,199,181,220,219,219,206,212,213,200,192,219,183,232,204,229,226,194,213,200,198,181,185,197,208,199,224,195,197,205,178,196,217,198,210,213,185,188,194,204,181,220,222,187,198,201,193,207,211,205,226,199,200,208,214,181,207,204,220,199,191,203,197,194,190,226,220,216,191,223,199,195,231,219,215,175,202,221,205,206,226,207,192,182,226,208,204,190,212,200,194,201,197,188,208,206,201,216,206,209,198,231,197,212,224,199,224,236,199,202,199,231,222,232,209,209,244,209,221,205,211,215,249,231,221,198,230,217,207,202,181,196,241,238,155,212,229,218,217,210,233,239,225,222,202,228,212,170,178,211,172,200,192,187,204,202,211,239,225,218,211,233,222,223,216,229,196,210,219,202,226,214,213,228,240,213,204,216,236,230,219,211,229,231,233,193,235,220,208,216,224,236,213,195,221,230,228,223,208,205,241,220,217,211,205,160,214,180,207,228,218,202,189,218,230,232,180,195,244,233,240,215,145,60,136,240,221,235,112,141,141,161,183,183,180,228,234,249,249,233,218,154,169,203,221,211,54,34,11,12,78,90,45,84,166,120,114,165,140,110,140,125,73,107,123,117,158,95,59,53,57,173,250,221,226,254,163,177,237,225,233,251,240,232,227,252,233,223,225,244,244,173,199,251,229,169,170,146,155,129,160,177,69,76,250,138,19,25,16,47,65,24,100,130,200,233,253,255,243,213,162,197,186,225,207,226,182,172,202,243,254,235,247,204,68,38,78,97,97,109,116,36,56,125,131,130,109,65,63,84,96,44,42,70,60,81,147,97,37,14,51,173,94,121,112,25,53,42,15,19,24,14,29,25,26,20,23,27,42,50,32,43,20,47,41,62,64,75,76,43,73,28,67,32,26,38,79,50,56,90,19,8,66,55,30,12,12,17,41,99,50,43,26,7,17,47,34,29,42,24,12,85,109,74,77,133,153,163,130,149,157,113,88,74,175,136,42,27,26,22,38,45,40,54,23,52,45,36,37,25,21,27,12,43,68,16,46,46,67,100,119,77,40,43,23,39,70,42,52,41,54,41,28,32,40,39,53,30,32,28,8,70,90,70,46,21,13,35,40,44,32,56,31,20,21,36,43,67,63,70,81,44,42,43,40,36,32,22,41,38,48,28,34,56,106,74,69,65,36,57,21,21,50,30,37,94,144,87,128,87,102,76,31,30,76,38,35,203,239,244,239,225,233,246,235,238,236,227,242,250,232,242,255,247,227,255,252,249,241,235,241,245,248,242,227,249,222,245,241,241,244,236,242,244,250,223,234,244,251,254,240,244,235,211,230,246,242,253,255,248,247,231,250,254,238,247,240,107,9,0,21,20,7,15,27,17,38,21,29,3,0,6,8,5,2,33,31,30,0,9,33,2,193,183,213,201,203,184,195,222,189,185,201,200,198,189,188,200,206,207,179,215,195,194,213,205,211,186,213,210,191,212,209,218,180,196,184,211,212,196,191,225,189,209,212,199,191,206,186,197,182,204,207,199,203,197,201,198,185,216,207,195,193,199,177,206,196,206,192,183,210,177,183,197,183,216,186,203,194,168,197,180,186,186,203,209,207,211,189,189,202,181,201,198,187,204,180,202,195,198,185,204,196,201,186,209,213,241,209,176,216,205,219,196,205,188,220,213,205,205,187,212,235,198,197,186,206,211,189,173,199,198,211,178,213,217,212,206,197,196,197,176,209,207,207,202,194,202,195,193,192,168,178,201,196,212,215,195,223,186,205,196,205,206,194,201,181,197,205,196,212,221,224,209,187,210,210,192,206,209,178,226,201,223,201,180,206,211,209,219,207,220,192,194,214,189,196,195,228,225,222,196,209,199,213,194,230,210,219,241,218,218,226,225,215,226,235,224,232,225,210,224,212,209,212,242,227,202,227,202,219,209,196,228,196,240,251,243,208,234,230,213,226,210,217,206,187,184,214,207,208,218,221,241,220,220,230,227,224,226,229,225,234,210,226,239,190,208,227,226,233,225,227,239,217,237,240,206,221,233,222,222,222,237,238,159,173,222,245,220,235,241,230,228,244,218,236,246,228,233,255,246,228,225,223,213,236,219,214,213,226,212,202,236,236,250,206,214,252,248,246,181,118,64,159,242,249,237,199,212,220,222,215,231,213,255,226,246,228,227,249,249,252,252,244,237,131,33,36,34,100,47,34,156,239,247,248,224,244,173,232,242,243,238,240,247,222,218,187,122,138,195,250,207,250,252,147,242,249,242,245,245,250,228,247,240,241,241,220,255,249,179,205,246,183,130,140,207,221,128,151,160,51,165,225,24,13,39,51,59,71,138,137,129,210,239,242,247,222,130,175,208,186,219,169,138,144,214,237,239,233,242,216,52,29,53,101,104,129,98,41,80,102,166,163,104,42,57,18,25,66,34,47,38,39,117,169,61,48,8,124,151,123,145,117,17,10,26,9,42,39,30,37,20,12,32,34,31,20,28,34,58,23,53,50,74,88,79,38,36,81,54,84,22,52,67,79,48,67,61,34,56,81,41,48,18,30,23,72,140,78,69,47,28,28,39,29,46,13,35,53,59,81,74,75,112,129,87,71,76,123,164,112,106,151,137,69,9,0,23,33,105,114,57,49,14,47,36,6,34,23,28,28,64,54,55,24,72,110,98,148,168,134,81,76,142,117,61,22,36,48,30,28,20,32,37,19,35,40,14,49,85,83,49,26,15,54,78,81,50,38,58,78,59,44,51,76,54,46,38,66,18,30,20,26,28,45,23,27,36,35,22,28,59,110,74,60,56,37,41,45,65,49,39,19,124,118,94,93,97,102,89,47,28,90,61,20,98,221,240,221,244,234,244,250,255,238,224,245,249,243,238,246,255,244,250,245,248,245,244,250,252,250,248,237,247,230,242,243,245,233,245,244,228,247,229,232,246,255,251,231,239,243,241,253,248,255,246,252,222,247,249,239,254,239,237,243,123,0,31,11,36,4,13,10,2,3,34,7,19,10,7,10,11,8,0,14,22,13,5,11,21,181,205,212,189,213,187,190,201,215,191,192,224,194,213,199,200,194,205,184,222,209,206,197,198,196,176,196,175,194,209,193,199,202,196,216,174,192,202,197,189,184,196,192,189,193,184,204,189,190,190,196,197,197,204,182,182,185,200,205,199,227,193,196,183,223,203,190,199,169,216,194,206,171,204,211,211,195,209,199,208,182,204,202,203,202,208,206,201,204,193,190,169,194,211,170,205,180,220,195,186,191,204,198,206,215,184,204,192,182,180,184,190,189,187,225,204,211,202,179,196,213,190,176,196,180,205,214,212,203,206,202,198,216,221,197,182,188,200,198,170,221,193,202,203,194,196,203,202,214,175,214,212,222,207,217,214,186,223,219,209,216,202,218,215,217,212,196,201,199,196,210,197,201,196,201,220,199,205,217,179,218,211,216,199,190,190,212,217,204,193,185,181,220,220,199,208,226,200,228,200,215,217,210,207,211,213,231,224,223,194,200,226,210,221,211,204,196,227,237,179,224,225,249,208,219,208,243,224,241,216,219,191,193,249,226,240,235,240,204,111,77,154,216,236,198,237,197,250,225,235,237,245,255,243,240,231,253,224,253,240,242,229,245,239,233,243,234,253,250,254,243,240,248,252,246,247,243,248,254,255,237,249,228,99,137,237,238,252,244,225,224,251,252,245,243,255,250,250,232,254,235,255,252,227,214,227,238,205,234,191,196,230,237,221,193,221,228,242,221,131,122,58,127,243,246,233,190,178,172,146,149,158,163,232,245,226,239,229,216,164,200,207,244,232,100,76,69,73,62,12,45,147,169,179,211,228,187,131,189,230,243,207,222,194,172,230,180,140,145,195,172,194,243,182,147,225,241,239,230,222,224,223,236,229,191,213,230,237,245,134,189,202,97,61,90,175,220,123,175,93,61,197,163,11,8,29,23,33,144,216,143,171,242,254,245,237,161,142,229,219,197,198,132,135,184,231,233,239,249,229,105,18,41,72,118,119,102,37,34,77,161,126,72,34,49,16,19,32,50,47,56,43,45,70,109,79,26,10,131,139,95,115,68,25,41,18,19,31,5,13,39,40,1,21,4,10,21,34,1,35,7,32,35,58,101,99,70,47,35,77,93,70,100,150,118,120,109,85,57,50,55,55,28,36,29,55,57,110,113,63,28,35,25,59,14,17,34,20,77,102,44,30,23,35,31,54,29,17,23,16,39,35,27,43,73,23,22,19,116,209,145,52,48,21,29,14,17,14,11,25,19,61,60,53,36,25,70,38,20,92,151,115,101,108,103,59,57,34,27,26,23,17,29,31,27,24,49,19,49,108,62,48,29,19,79,77,65,43,72,101,105,81,65,91,78,65,38,49,29,48,49,41,37,40,27,45,39,41,50,40,21,54,112,106,86,66,40,42,69,99,78,56,55,120,125,104,101,81,142,91,30,16,186,160,78,47,120,234,243,241,255,224,234,247,233,229,248,243,251,254,255,232,252,233,243,247,237,255,248,249,251,249,247,238,242,253,254,249,250,254,245,222,250,249,247,236,240,239,247,235,231,228,242,253,237,255,249,247,245,252,252,249,231,246,233,125,18,1,8,11,33,19,22,7,0,26,9,22,0,9,12,5,5,19,25,2,11,1,9,1,216,175,201,197,172,191,167,181,220,191,202,193,222,208,207,173,197,194,185,205,193,213,185,179,204,188,203,207,190,179,186,202,196,245,214,201,188,202,178,191,197,188,219,189,209,216,217,221,193,193,203,211,193,198,199,190,208,184,189,202,188,187,200,198,196,195,193,200,185,213,200,226,191,187,189,208,189,212,198,195,196,207,198,228,209,207,168,193,197,202,207,207,197,200,202,214,206,202,200,193,186,202,208,217,229,189,205,213,193,201,203,198,191,200,204,196,184,204,193,205,212,196,182,185,201,196,200,206,210,204,212,204,221,194,193,183,182,191,212,206,213,176,189,191,208,203,205,225,188,212,213,209,210,213,209,203,170,207,206,216,198,202,196,213,179,169,197,190,210,199,209,190,205,198,211,199,231,202,197,212,203,225,204,216,171,225,213,209,200,206,207,217,196,183,211,209,207,217,218,176,225,185,216,225,211,229,218,224,219,237,231,247,254,247,251,249,250,240,231,249,255,250,251,255,252,240,254,250,211,246,239,214,220,250,253,247,234,222,199,103,104,186,241,239,240,255,241,255,243,255,237,255,241,253,237,247,235,251,238,247,228,246,255,255,253,212,235,253,208,245,238,242,241,246,205,218,238,224,193,220,225,196,174,100,121,172,214,226,217,221,202,215,201,188,216,190,199,232,200,239,193,214,210,177,171,184,175,168,158,157,157,202,159,147,118,116,113,127,123,89,122,85,98,117,175,145,100,112,85,103,49,111,79,112,137,112,124,132,86,44,22,90,127,117,56,94,111,67,47,35,76,50,39,21,36,63,70,37,41,52,82,61,38,65,50,134,114,122,92,83,102,76,114,74,64,149,105,89,77,77,70,86,95,78,65,84,100,73,147,91,81,115,78,112,116,173,177,119,191,66,85,226,61,14,71,34,15,103,204,204,129,181,247,252,217,148,149,199,239,199,196,187,143,176,228,253,241,231,254,99,21,59,66,119,108,78,65,44,98,141,118,110,71,57,36,22,21,28,34,61,60,75,43,83,118,72,27,14,156,153,101,129,57,24,21,22,46,21,36,34,12,11,14,14,26,55,55,34,15,24,28,8,53,29,70,117,116,53,75,124,117,117,105,161,135,122,99,96,67,57,37,49,36,22,17,37,50,85,88,50,41,29,14,44,36,64,43,49,137,85,45,11,39,19,39,14,7,42,37,35,13,30,31,50,112,92,37,33,137,178,102,23,38,26,27,27,48,13,13,35,67,37,23,24,14,18,21,11,42,19,33,34,37,19,18,63,54,13,46,47,34,37,40,25,35,28,52,46,54,89,67,35,33,45,94,110,68,61,61,168,150,86,118,155,129,87,59,47,10,16,34,33,33,37,43,16,64,21,38,57,50,43,94,96,89,71,84,62,73,79,84,85,44,111,140,135,125,97,110,86,65,33,220,226,178,119,45,172,247,233,240,234,240,253,249,210,250,253,248,247,254,222,245,234,250,248,247,237,219,242,206,227,244,248,239,243,232,244,248,234,237,248,239,225,243,245,237,251,236,255,234,237,235,237,243,249,243,234,237,247,225,237,254,255,239,124,26,8,17,5,16,21,18,25,10,6,11,9,15,7,15,0,18,0,36,1,39,18,13,31,198,181,198,187,213,201,222,199,184,200,190,184,196,190,237,194,200,188,214,180,203,198,206,191,193,214,202,190,192,206,200,194,207,174,193,173,213,201,180,175,205,176,189,191,184,216,198,191,211,179,195,211,191,223,195,208,207,195,214,198,211,183,212,202,205,179,199,211,211,222,191,186,200,201,206,205,209,209,209,190,194,188,208,179,227,177,218,208,213,206,203,188,191,204,213,204,191,183,196,226,193,206,204,196,192,219,220,197,210,218,203,200,220,227,206,223,208,185,222,193,173,227,210,201,199,206,183,217,181,198,201,207,219,203,203,194,189,181,214,211,198,192,190,218,191,212,204,208,204,190,193,190,194,171,201,201,191,215,210,208,201,216,206,202,211,219,193,206,214,213,207,185,213,205,205,223,189,212,205,225,189,209,191,194,202,223,216,197,215,196,216,190,203,210,210,212,223,207,229,222,229,208,207,238,206,234,231,242,241,244,233,247,253,243,255,254,238,243,240,239,246,255,232,241,240,255,243,241,251,224,223,241,190,186,183,177,163,218,187,166,146,185,210,192,204,162,169,186,159,172,184,165,172,151,159,149,165,143,174,159,160,139,160,170,168,152,146,191,153,145,141,141,148,135,123,143,156,165,130,117,132,122,122,119,109,117,120,117,110,131,114,124,129,92,116,112,88,130,104,79,91,83,79,102,100,98,143,100,147,122,104,127,108,82,51,40,50,61,65,89,92,62,75,83,79,77,65,86,93,52,98,78,24,31,19,68,40,16,29,33,20,5,42,55,73,95,115,113,58,86,70,69,44,46,11,44,26,34,53,11,43,13,21,32,23,55,50,79,112,63,64,63,39,19,47,66,34,28,5,17,15,56,65,52,42,34,5,31,110,103,43,80,120,143,129,156,104,129,213,52,154,222,63,8,26,21,69,195,204,169,161,237,248,251,174,135,213,240,248,169,110,184,210,241,249,252,217,247,130,34,47,76,108,112,105,57,47,90,128,157,114,96,70,39,24,24,16,9,40,61,53,94,40,83,127,60,37,54,165,103,103,114,17,20,6,0,14,18,18,31,34,32,40,25,11,3,32,39,12,42,28,43,37,32,51,79,74,32,42,71,97,74,111,139,81,88,45,61,31,33,54,14,17,33,8,48,45,86,97,48,23,57,51,34,28,50,22,33,116,95,35,41,34,16,29,34,37,69,61,21,25,30,20,67,98,90,32,45,44,62,46,17,14,40,8,28,27,38,32,26,55,32,22,24,18,9,20,14,35,14,17,44,41,13,59,85,85,55,32,34,34,17,35,41,48,84,84,58,96,69,49,50,29,83,130,92,59,66,101,147,106,139,105,141,111,80,51,58,48,58,56,35,33,43,49,42,37,34,33,25,27,44,85,32,60,63,50,32,50,57,48,86,51,108,128,141,100,95,127,93,12,47,192,218,225,138,48,40,233,233,233,252,233,248,252,242,255,248,238,239,226,247,249,249,228,246,246,246,253,239,247,233,253,246,222,231,241,239,225,255,238,225,246,243,254,240,241,230,238,249,253,239,248,251,246,240,244,255,247,236,234,255,247,234,226,126,6,5,41,23,15,20,22,0,32,23,14,34,0,22,6,8,3,12,14,13,28,27,9,13,195,179,201,182,194,229,207,198,177,192,193,184,181,200,176,191,193,180,199,198,182,195,187,212,194,178,200,197,191,207,178,178,196,200,172,201,204,198,199,172,170,180,197,201,217,164,192,175,211,196,179,186,193,222,191,200,190,200,187,202,193,192,189,180,196,195,190,205,185,188,200,184,211,200,177,194,196,219,175,200,191,203,221,191,196,199,209,208,196,204,220,195,209,213,204,213,201,183,185,205,184,220,206,200,183,198,188,179,228,199,218,191,204,200,220,234,207,185,210,199,203,191,209,217,174,181,175,199,199,183,196,199,206,217,224,219,173,208,199,220,200,224,226,217,193,193,190,210,196,215,213,198,186,210,201,208,173,212,218,205,188,201,201,221,201,219,198,206,200,189,212,184,200,199,206,215,222,202,216,209,197,206,227,195,211,198,213,188,204,191,218,210,206,217,203,204,224,217,194,216,221,223,221,213,223,209,196,184,168,179,180,174,171,176,198,175,145,160,164,168,150,163,141,149,137,151,138,104,119,146,181,198,95,55,112,73,96,111,84,132,131,120,96,90,96,93,84,77,76,64,72,80,80,33,84,57,67,61,88,94,81,66,52,62,59,46,84,70,72,74,46,61,48,60,73,40,103,108,60,52,56,73,133,87,112,102,105,57,65,53,79,35,41,47,40,55,42,72,37,40,42,46,50,45,59,49,69,112,120,125,83,86,144,86,68,55,23,32,69,62,87,95,62,85,87,74,113,103,122,89,88,82,37,28,23,14,20,35,56,61,46,57,43,46,76,136,114,101,64,30,77,79,68,49,31,53,66,72,46,25,70,67,31,59,60,60,43,57,96,94,120,68,56,26,38,42,19,27,22,20,25,13,49,37,46,52,1,32,80,34,35,111,107,93,177,185,75,157,176,93,215,242,36,12,28,10,138,175,188,215,226,236,238,217,143,187,243,246,240,147,102,155,240,239,249,251,247,159,15,28,70,116,116,86,36,53,116,143,127,106,46,49,43,34,30,11,33,45,85,90,117,98,55,115,137,24,5,88,163,128,122,120,31,23,22,27,31,7,6,35,43,3,36,22,9,15,28,24,37,60,10,22,30,9,35,54,88,86,36,35,36,65,70,60,57,45,50,52,57,25,37,33,44,41,12,34,40,67,66,48,46,81,21,22,49,23,34,29,98,61,23,36,31,27,28,29,25,99,35,32,6,18,23,86,120,76,26,4,47,41,39,32,27,42,23,19,25,23,16,35,55,23,28,37,21,30,48,22,24,24,45,42,31,26,61,126,131,43,31,74,66,36,36,51,62,87,92,104,75,57,53,56,63,86,112,67,66,55,31,50,116,129,135,77,94,66,57,39,27,31,40,16,12,16,22,18,36,36,25,46,44,34,20,31,46,28,33,40,21,23,68,56,46,119,140,129,114,85,91,113,45,31,145,147,173,115,35,29,143,239,254,240,242,249,253,247,247,243,249,235,229,247,234,248,253,252,234,240,238,243,227,249,236,238,219,249,232,236,252,246,239,247,253,230,233,248,242,251,237,251,240,236,252,237,241,243,240,230,252,214,246,245,248,244,244,101,4,8,11,9,10,20,3,13,22,5,18,24,0,13,5,22,20,12,13,11,17,6,6,9,197,224,174,189,211,154,189,202,187,182,197,192,190,168,197,205,189,196,183,167,185,216,202,188,189,156,191,198,208,195,185,207,216,197,185,189,206,188,208,189,201,190,188,201,193,207,195,191,186,216,224,204,187,190,224,184,202,185,216,208,179,224,191,201,185,212,205,189,199,190,203,197,206,213,206,190,177,200,174,194,191,221,212,194,198,186,215,188,197,216,183,200,184,202,203,190,203,202,188,203,190,184,209,209,169,205,195,210,227,209,193,208,189,201,200,193,209,205,198,210,209,208,219,215,209,167,195,192,187,216,210,192,195,184,198,197,199,194,212,207,192,182,188,206,187,205,192,215,188,194,194,185,191,194,203,197,199,205,211,181,180,206,185,189,204,202,207,216,197,190,213,223,199,208,226,181,215,193,194,210,168,202,195,212,207,231,222,210,214,215,214,202,217,233,225,218,227,218,221,213,212,227,240,235,193,206,111,18,20,33,21,16,3,20,13,22,15,5,15,16,15,1,24,1,14,3,30,23,30,8,63,88,33,17,13,11,12,36,1,13,12,19,18,30,6,9,6,1,0,11,36,14,17,6,16,30,18,4,0,19,7,7,1,40,22,23,24,8,10,4,23,12,7,19,12,22,18,16,10,50,22,20,27,12,34,19,5,22,11,25,23,6,14,17,6,16,11,11,28,10,0,13,5,16,23,31,19,4,62,88,67,55,44,76,91,54,22,10,0,16,32,10,22,60,79,72,84,96,92,64,69,36,9,5,4,28,29,33,30,33,21,23,9,30,20,116,132,118,65,58,66,83,49,39,22,42,22,45,34,21,44,37,36,18,1,33,17,40,21,33,90,75,24,17,27,33,39,18,16,43,54,19,14,3,28,43,18,11,44,14,24,95,81,90,166,121,100,205,186,173,251,186,18,61,4,40,166,171,210,250,215,228,200,156,200,250,247,255,196,129,165,180,225,238,254,228,131,38,28,60,94,93,102,48,32,93,112,131,88,80,41,35,39,20,22,23,45,82,112,129,131,116,104,140,130,33,7,97,173,97,116,83,44,30,6,5,30,27,30,20,22,21,14,28,40,13,32,36,44,25,16,20,29,21,12,50,102,60,44,28,37,37,62,56,43,21,30,27,39,35,27,24,15,40,14,17,40,73,53,43,47,63,41,19,26,41,48,26,66,111,32,25,16,40,34,59,29,51,35,26,43,37,11,94,130,47,19,12,36,41,35,33,52,5,41,28,27,45,14,2,50,64,30,10,9,35,9,20,23,25,23,53,28,32,68,151,74,11,29,40,25,41,22,44,76,113,47,86,82,64,37,20,60,61,94,55,24,38,29,47,88,116,72,46,53,51,69,69,36,36,24,36,14,35,33,36,26,51,9,38,51,33,12,19,53,37,24,36,43,29,18,59,20,79,143,128,104,84,106,118,57,33,32,71,42,31,32,28,181,228,245,250,254,255,252,248,248,255,250,231,226,247,239,255,251,248,255,250,241,228,231,232,251,237,250,238,239,237,242,233,251,241,243,246,214,253,222,234,248,244,243,234,232,234,252,234,252,237,255,226,231,246,242,241,242,121,3,4,2,2,3,4,18,19,7,22,15,11,4,3,6,14,0,1,24,1,0,3,12,13,193,200,195,214,193,201,191,185,199,222,178,203,189,203,209,185,193,219,181,177,196,194,202,182,194,190,200,213,202,207,199,205,189,208,189,205,194,210,198,175,188,195,198,217,188,218,196,197,195,188,193,205,193,191,192,182,206,211,191,196,203,202,179,208,177,178,199,197,217,224,194,208,208,219,217,198,214,209,217,196,168,204,167,212,204,217,201,203,211,191,195,222,210,221,201,204,185,204,189,192,187,190,198,226,224,221,194,204,202,201,203,190,201,216,197,188,216,200,215,203,197,193,204,200,218,178,209,221,201,204,174,194,198,210,215,223,194,194,169,218,223,202,206,188,198,226,184,194,200,193,209,198,193,203,202,195,189,216,212,197,219,198,222,215,191,178,235,196,213,208,194,195,199,196,200,224,199,232,220,198,209,203,223,169,213,229,220,213,207,214,202,224,199,217,209,218,201,212,208,229,227,238,241,200,207,219,114,32,35,32,62,19,40,21,30,15,22,35,18,10,16,22,29,25,10,1,20,26,41,12,27,49,25,10,15,0,7,7,4,14,22,10,17,4,21,19,8,25,10,0,10,3,21,11,15,29,11,20,16,17,36,6,18,14,21,19,10,17,4,18,35,17,8,8,25,16,9,23,20,10,27,18,12,4,7,14,14,4,2,9,18,11,10,31,12,18,7,27,29,13,25,13,19,15,34,24,21,12,21,19,44,30,34,61,64,82,43,22,5,6,44,21,60,74,44,106,95,59,73,66,57,0,12,4,5,24,25,15,35,45,17,34,35,12,57,80,115,108,77,62,65,71,52,8,15,23,11,39,32,31,44,37,64,43,35,83,41,33,74,68,84,119,66,63,25,42,118,22,37,1,19,28,23,2,54,52,51,133,88,65,129,97,36,120,155,89,170,199,155,146,142,46,31,139,93,89,137,167,220,242,205,212,130,156,228,247,242,243,161,145,162,139,228,240,239,138,17,41,83,127,110,90,73,78,115,150,116,104,64,51,34,16,25,22,32,35,91,100,110,132,107,112,82,161,115,26,18,112,162,94,109,110,23,21,5,6,8,4,10,26,48,26,49,26,28,16,18,29,18,31,50,42,44,43,26,38,91,76,36,45,25,68,49,47,46,22,5,43,18,41,41,77,50,20,23,39,40,61,57,42,53,83,23,17,25,10,39,27,55,83,13,41,42,31,43,38,50,62,26,17,26,34,28,58,117,48,34,24,37,41,39,37,60,26,44,68,34,32,14,11,41,32,40,31,32,8,16,32,21,37,10,15,37,19,60,109,69,18,23,41,33,15,52,53,83,84,104,104,83,21,30,0,55,56,73,33,73,65,49,63,43,60,51,25,73,43,46,55,15,39,14,39,34,18,29,32,12,33,22,48,29,33,29,63,15,56,41,25,47,38,51,28,37,103,143,126,106,78,94,118,47,36,26,52,22,31,16,110,248,251,245,253,241,243,250,254,252,247,253,235,215,243,254,247,249,250,241,255,235,246,229,246,231,241,247,231,226,245,246,254,239,234,247,238,212,247,227,238,249,234,242,250,231,222,240,240,231,243,231,252,236,223,251,247,241,135,3,32,5,13,32,14,12,37,19,28,10,11,14,15,8,17,33,2,11,18,10,34,13,29,169,191,184,176,191,184,196,192,171,170,213,206,221,197,195,217,189,187,199,190,214,198,190,208,185,204,203,188,195,189,188,201,187,196,202,220,205,198,196,201,214,198,206,191,205,201,191,183,201,189,207,208,216,183,202,189,214,191,174,209,188,181,200,206,208,184,208,181,186,206,186,181,205,195,204,208,201,195,206,205,208,195,180,197,204,209,191,197,186,208,196,198,192,215,203,177,212,197,206,197,212,192,197,195,225,195,199,204,207,205,230,205,202,190,207,189,213,210,201,193,190,196,227,209,183,205,227,206,177,185,214,195,207,221,203,199,196,215,223,203,205,212,211,206,224,202,210,228,210,184,185,198,181,213,199,211,223,182,194,190,208,195,194,212,215,212,206,226,214,199,206,196,202,198,214,186,229,202,214,219,212,208,216,216,210,233,204,207,214,220,222,224,213,238,214,220,214,222,194,226,198,183,242,219,224,226,247,235,231,210,209,207,232,215,229,210,206,217,237,228,203,236,202,198,211,196,181,190,182,160,186,157,155,157,148,182,156,150,140,130,147,130,142,119,120,118,92,129,100,113,135,146,139,126,130,127,110,134,134,120,143,122,132,137,137,135,134,142,165,129,135,114,124,146,144,133,130,102,115,152,126,124,123,97,92,90,112,148,127,155,153,170,169,158,137,143,174,160,181,152,176,152,155,163,154,167,157,168,155,102,119,184,164,107,193,164,142,174,118,102,84,31,62,162,168,140,122,116,108,103,154,158,153,209,185,203,199,187,159,145,190,200,217,149,64,106,89,103,83,62,67,119,154,150,166,186,177,213,156,94,173,171,225,201,155,167,173,211,213,170,130,121,95,190,229,252,254,212,225,247,254,216,214,212,228,244,243,251,243,238,137,57,69,161,141,131,215,139,87,140,83,3,134,227,120,115,158,203,239,233,169,107,164,191,239,253,235,188,55,117,189,191,248,250,171,27,11,65,115,101,102,42,56,128,133,143,101,65,75,42,19,8,18,8,30,79,106,106,110,107,107,96,91,152,111,9,38,135,122,85,116,44,14,27,0,18,28,33,38,10,21,35,17,9,34,13,37,15,46,4,29,33,14,22,23,40,92,100,66,50,50,97,49,42,36,22,22,43,19,29,49,58,64,51,7,31,33,43,32,20,45,59,34,16,21,32,64,29,77,116,47,21,40,3,23,32,14,42,48,27,45,15,51,69,89,27,18,18,32,49,53,85,99,84,91,50,46,13,16,12,25,28,20,6,10,54,26,22,18,33,19,7,32,64,51,122,47,34,31,23,20,61,50,75,99,92,69,56,49,25,31,18,39,79,60,68,75,65,49,76,68,45,26,11,51,47,45,45,52,23,29,27,33,9,33,26,28,59,44,50,42,32,45,26,37,18,29,32,29,44,41,22,42,112,156,133,124,72,90,115,54,18,54,115,144,128,214,253,232,240,249,252,249,249,240,250,248,244,243,255,252,238,252,241,244,252,251,237,241,244,229,252,220,236,238,243,252,253,228,234,244,244,247,247,217,244,235,248,249,245,242,243,226,234,252,252,242,221,234,244,233,234,246,251,250,124,14,6,14,17,4,26,23,4,1,24,6,2,11,1,5,11,11,4,2,7,9,5,14,9,186,178,203,199,195,179,189,192,195,216,191,190,199,194,179,195,208,195,204,189,180,207,210,195,217,195,198,209,206,168,183,176,219,186,198,215,194,195,192,216,181,183,161,185,199,197,191,178,186,169,185,210,176,222,201,184,201,177,201,213,201,199,180,183,190,176,198,193,232,184,208,207,205,192,212,180,189,182,193,213,190,192,205,192,200,210,203,196,208,189,191,207,199,208,190,189,194,229,190,205,205,193,198,200,216,205,202,207,200,210,210,189,208,194,215,221,229,184,187,199,181,193,187,199,187,222,185,200,219,186,202,209,192,187,218,209,210,196,221,204,198,197,188,208,186,202,214,214,217,176,203,197,207,203,208,195,185,185,222,220,205,227,200,212,205,203,218,182,213,185,201,201,217,214,203,220,217,202,196,199,214,222,203,223,218,205,224,205,214,198,214,226,212,229,224,217,216,213,222,218,210,213,198,225,210,221,237,247,242,244,230,224,225,248,252,248,238,252,235,247,242,246,239,244,236,238,255,234,242,255,253,236,224,242,244,240,248,252,247,236,245,248,255,221,255,255,255,234,247,215,254,237,243,244,253,254,253,252,229,244,237,247,252,255,250,245,236,236,250,252,252,254,244,228,246,253,252,248,246,241,245,238,247,240,242,242,242,255,238,234,230,246,252,255,254,255,243,251,246,245,243,247,237,222,236,242,245,252,228,252,246,250,249,176,212,237,243,250,255,232,148,108,151,247,178,185,178,125,139,158,246,252,244,245,236,247,238,241,247,249,255,226,250,169,80,98,116,106,63,76,86,154,245,231,241,255,235,246,255,216,204,233,253,182,196,179,247,254,254,219,150,122,106,237,242,247,236,249,238,248,255,245,248,250,238,238,250,224,255,246,161,30,73,184,118,133,240,111,99,192,106,106,191,243,164,104,178,213,248,198,93,155,234,242,235,234,201,138,120,157,224,209,241,209,47,62,80,76,101,103,49,65,74,107,127,99,79,58,66,18,12,4,27,30,87,125,136,109,124,89,101,73,111,154,82,0,60,147,114,101,93,38,14,12,1,22,17,19,34,21,20,40,13,15,42,45,24,36,38,41,25,33,31,43,29,55,111,87,82,94,73,60,56,62,46,55,37,32,8,39,68,62,58,35,4,25,29,39,24,32,36,24,28,21,5,50,8,20,110,91,45,29,22,9,62,50,20,20,25,47,38,29,19,81,133,50,43,15,38,37,24,116,183,202,182,156,115,67,21,36,58,23,26,16,26,49,7,25,41,2,9,28,34,35,45,108,35,14,41,16,15,34,53,56,69,47,56,14,26,30,15,14,46,110,68,64,86,82,72,53,65,77,56,25,13,27,14,34,39,23,32,56,17,15,29,30,31,20,25,17,44,33,35,33,30,10,32,35,21,34,28,49,35,102,118,130,119,77,89,96,85,19,88,232,249,237,255,247,241,255,249,245,245,249,250,242,222,239,235,235,248,251,233,233,239,254,250,244,238,247,230,246,236,236,241,239,234,251,243,250,232,241,248,225,249,237,248,244,233,234,249,246,211,231,226,236,221,242,227,232,237,239,253,249,226,112,3,11,41,7,17,8,28,37,9,18,12,23,10,1,0,20,17,29,16,8,8,10,18,7,205,184,180,213,182,197,167,192,212,212,202,189,191,206,204,196,184,177,199,197,187,183,218,177,190,185,219,201,217,220,207,186,200,208,216,193,202,169,226,200,204,181,187,198,204,215,193,189,164,213,190,176,180,197,219,189,206,189,188,193,221,203,209,193,198,184,197,194,178,179,177,181,211,175,182,196,192,197,205,175,185,215,200,212,215,194,192,181,208,200,191,207,191,203,180,201,196,184,202,228,184,197,200,192,189,224,202,208,204,192,199,205,190,191,202,201,210,195,193,216,207,196,203,217,212,216,208,194,203,199,205,204,203,215,197,202,172,200,206,193,192,186,206,244,213,201,200,222,209,252,205,194,217,191,203,194,195,190,198,202,204,219,214,218,208,218,188,223,208,206,211,189,230,216,204,233,198,198,223,243,198,222,202,187,216,234,210,199,214,214,220,203,213,222,208,208,198,213,236,229,203,230,226,204,204,218,197,202,208,222,228,248,240,240,218,238,239,242,251,243,227,232,198,236,231,240,206,238,251,242,247,229,247,235,247,251,231,252,255,247,243,238,245,251,250,239,247,255,245,247,250,253,236,254,231,248,242,239,247,228,254,236,253,249,253,246,243,223,252,247,238,250,242,250,248,228,243,223,230,254,231,239,242,241,243,242,239,252,249,237,248,252,244,247,250,240,254,251,246,234,231,228,255,241,236,234,231,228,223,247,249,241,207,199,227,243,219,247,235,254,178,128,205,200,174,165,156,196,177,175,218,249,249,249,254,244,250,245,222,241,230,241,255,140,82,70,117,124,65,95,86,126,230,228,246,157,188,238,248,198,191,191,218,193,183,196,245,245,246,218,122,73,110,161,210,224,241,239,228,233,240,246,241,244,221,243,193,180,238,231,66,16,149,167,68,169,142,49,115,195,175,120,243,229,102,144,210,237,252,142,151,232,254,252,241,223,167,221,183,191,243,238,163,58,36,68,107,88,101,58,47,95,117,129,67,62,47,14,20,13,2,37,57,65,88,109,112,122,118,105,119,114,143,129,74,13,79,146,104,134,109,16,12,2,20,27,3,43,29,35,41,18,41,28,4,25,41,10,34,26,23,25,17,19,28,37,70,59,122,99,65,56,46,47,40,36,27,60,6,36,31,44,41,41,8,22,30,44,59,27,57,34,26,27,14,17,24,50,134,88,32,44,40,33,18,43,53,11,31,27,22,35,37,109,153,62,46,34,35,57,54,237,242,242,233,235,204,150,63,38,58,35,47,14,18,25,26,18,41,18,44,7,36,57,72,116,38,50,20,31,32,43,40,77,52,39,30,30,32,50,25,69,89,125,91,76,81,50,19,18,37,49,62,51,39,24,26,51,44,27,45,20,33,44,27,24,26,32,38,29,45,14,33,55,28,17,16,41,15,42,25,54,24,64,132,144,116,90,91,74,72,7,91,232,246,245,254,252,250,255,243,239,238,231,250,229,235,252,246,254,230,254,240,247,236,218,229,235,249,225,244,222,249,242,246,243,238,245,241,247,246,228,246,247,248,227,253,230,215,247,246,239,242,226,252,254,249,237,243,254,240,246,243,247,244,125,27,13,11,4,13,15,8,18,38,4,0,21,4,11,22,0,3,9,8,14,0,12,20,5,197,199,185,214,181,176,212,196,178,181,193,194,193,182,182,197,193,205,199,201,178,226,198,212,186,209,208,198,171,195,203,208,208,178,199,203,182,207,195,205,202,200,178,192,203,198,194,190,180,205,207,178,183,178,177,183,190,195,219,211,190,193,196,217,191,185,197,186,203,181,185,201,225,192,172,206,179,195,206,210,196,207,225,216,197,202,213,191,194,222,183,212,155,195,194,205,194,191,179,194,196,198,192,210,187,192,181,190,202,195,215,194,194,218,204,191,210,204,184,215,224,198,220,195,194,195,178,210,204,222,219,214,206,201,229,206,217,208,201,186,199,224,195,202,229,221,198,213,202,223,203,184,193,199,218,213,212,222,212,208,188,225,211,206,192,197,217,212,208,209,203,224,220,220,230,211,214,220,212,226,232,206,215,235,223,206,222,208,199,240,228,181,207,220,218,216,213,213,219,208,201,205,218,206,218,225,214,223,213,213,218,219,214,228,237,222,203,222,219,211,223,218,201,203,222,241,214,209,234,223,192,221,213,212,205,229,241,229,214,232,222,210,235,216,238,201,221,233,223,210,221,217,231,218,230,223,228,216,208,222,227,217,220,211,233,237,195,226,225,238,247,228,243,218,236,221,235,225,195,240,211,208,220,218,224,219,231,226,211,229,230,199,223,230,210,228,220,181,225,203,214,211,228,227,179,221,210,199,197,207,238,208,193,171,224,213,214,229,198,193,149,115,195,175,154,203,189,177,158,164,236,223,216,232,229,214,224,216,187,212,214,217,247,126,78,103,96,103,72,87,49,152,213,123,219,182,199,241,203,158,91,197,209,199,185,204,238,241,221,213,156,81,102,141,144,207,235,219,212,203,242,213,237,218,207,222,182,146,195,177,19,67,168,147,110,129,105,68,70,161,141,114,162,194,100,164,203,235,236,152,186,245,242,239,245,147,133,186,236,227,250,169,66,31,57,121,94,101,100,23,88,139,146,100,77,60,35,15,34,2,16,36,102,110,113,120,148,92,81,108,95,116,139,150,48,16,103,155,106,120,105,31,37,31,24,9,38,14,36,17,24,13,35,8,30,22,20,43,14,45,22,22,22,34,24,45,50,52,67,84,52,90,73,50,44,70,45,37,22,44,30,41,65,48,25,33,50,40,26,50,62,47,17,13,16,26,18,80,141,35,18,26,12,30,21,41,23,34,64,51,34,43,38,57,155,51,72,28,78,94,128,192,181,150,159,158,161,98,59,48,48,18,13,21,11,21,15,29,26,35,9,17,54,29,63,115,59,23,34,20,26,39,19,58,46,65,31,70,65,34,25,112,126,158,90,84,67,34,21,47,66,32,37,39,11,30,16,34,41,34,20,26,23,47,16,23,3,25,34,31,57,45,31,31,40,62,28,22,32,46,34,13,38,48,125,144,124,97,123,126,89,46,70,235,241,232,234,242,232,226,252,224,243,251,245,218,222,251,241,249,234,251,244,226,229,243,231,246,231,226,254,247,243,228,251,232,227,239,240,234,249,246,242,233,250,234,236,225,231,241,202,244,252,246,218,248,225,254,220,205,240,240,231,235,245,109,11,16,8,31,14,7,10,15,6,5,2,7,0,29,7,4,0,1,14,13,26,15,0,8,201,202,201,186,204,191,226,187,198,179,176,176,209,183,201,196,176,200,216,208,187,222,190,188,206,207,188,202,201,176,181,228,186,187,198,199,190,221,175,182,180,184,187,183,197,203,184,200,208,189,193,192,214,174,196,197,180,191,182,206,202,199,195,185,194,200,182,182,195,213,201,195,193,203,196,179,216,209,202,198,206,186,209,187,203,214,197,190,196,200,203,171,203,181,207,202,181,198,181,177,199,169,196,203,191,199,197,203,214,184,188,215,194,196,215,219,210,193,203,210,220,193,216,223,209,207,207,224,225,218,214,202,199,212,225,204,216,194,203,207,202,227,198,190,206,188,186,176,187,200,201,205,216,191,201,208,209,206,231,201,235,203,202,207,243,215,201,198,188,196,219,213,233,202,205,215,233,222,216,224,247,214,203,180,210,213,220,195,210,211,190,216,242,203,191,213,209,229,189,211,197,220,240,212,213,205,219,211,214,219,231,213,195,195,218,216,227,227,223,225,209,220,203,212,232,190,218,196,235,183,215,215,199,218,202,202,198,207,222,241,238,206,207,240,211,185,223,181,195,220,206,226,226,174,202,213,211,227,230,165,204,213,216,215,234,218,205,190,223,217,198,223,224,209,217,205,240,182,171,214,245,183,218,219,212,203,208,221,203,172,220,199,210,221,216,199,228,210,211,209,198,213,200,197,194,205,189,165,185,232,228,221,157,153,181,217,190,214,182,192,170,90,160,181,172,183,179,132,103,141,208,217,222,212,207,217,224,217,183,216,202,226,227,121,70,77,38,75,71,103,69,177,224,179,209,169,212,172,153,174,133,239,226,184,143,202,244,198,239,244,172,64,112,134,149,205,233,220,194,192,222,236,220,216,205,213,155,81,195,104,37,135,188,124,101,140,61,78,85,128,142,92,147,184,141,191,241,230,170,167,171,151,131,108,87,38,0,121,180,195,207,78,59,53,122,105,119,79,31,96,116,125,95,56,64,20,25,8,6,2,33,87,113,120,122,106,134,80,85,95,106,118,133,158,56,28,128,130,105,101,58,17,18,11,17,38,16,8,22,6,36,19,28,14,25,42,62,63,37,55,54,7,33,42,28,39,14,20,86,85,81,150,133,87,108,55,39,13,28,6,40,54,66,66,82,12,50,48,50,74,70,30,16,0,9,16,34,81,106,49,35,21,32,41,51,51,31,40,47,37,50,37,45,20,135,115,94,186,251,217,133,61,41,27,43,24,34,32,15,51,25,35,32,15,35,53,36,31,55,16,19,53,46,47,49,117,108,34,18,33,18,64,45,64,98,37,85,115,86,44,85,141,146,128,78,105,83,57,50,73,60,50,80,66,27,49,24,30,35,29,15,30,31,19,11,33,34,31,7,33,60,49,46,29,33,29,39,55,55,50,36,48,15,44,141,147,118,104,123,117,109,30,50,236,253,246,222,231,236,231,255,249,251,246,218,237,224,250,233,236,240,230,237,232,245,244,249,245,247,252,230,241,247,252,242,236,235,245,236,233,249,214,234,217,254,231,222,224,226,249,228,250,232,234,236,236,243,237,251,235,253,234,221,248,234,114,4,11,24,11,15,27,12,14,7,16,23,12,3,8,16,5,0,20,10,22,35,8,19,7,196,205,206,183,165,209,205,198,204,191,203,192,203,188,199,213,198,182,206,188,232,172,183,189,215,197,198,185,181,192,213,185,227,201,200,205,209,196,178,192,204,193,195,181,175,188,192,173,174,195,201,203,172,211,195,182,209,179,187,193,197,192,204,150,196,208,171,206,165,177,198,208,210,183,187,196,199,173,219,206,207,196,216,204,180,211,206,187,194,210,190,188,209,212,204,194,212,213,198,194,194,184,189,194,198,219,208,222,202,196,200,214,197,212,177,211,211,218,204,213,220,147,199,225,204,214,203,180,218,194,209,193,188,172,205,203,212,196,217,214,192,229,204,202,205,197,217,202,206,231,198,182,216,235,211,196,180,195,202,199,191,206,198,207,239,227,219,184,221,206,212,194,204,211,207,228,212,224,206,211,196,197,201,222,211,215,196,228,216,224,191,210,214,221,205,234,208,224,223,230,220,210,229,215,194,232,212,215,209,195,223,240,213,209,231,198,210,215,201,213,197,236,217,211,219,211,209,202,202,208,211,205,211,222,223,216,218,197,216,212,193,217,224,222,202,210,192,218,212,208,209,198,207,217,204,207,220,203,205,217,218,203,200,220,217,231,221,208,213,211,196,232,242,182,217,198,207,204,174,242,237,225,197,200,219,210,206,200,206,204,187,226,216,192,184,214,206,210,203,208,225,224,217,185,195,230,191,169,155,219,197,205,138,142,213,204,198,229,173,186,120,110,157,165,140,166,131,128,91,124,195,210,218,213,214,211,226,217,178,209,206,234,230,83,85,54,85,84,76,89,76,203,229,190,222,172,170,172,214,182,203,228,147,119,153,194,238,215,211,234,118,67,113,122,168,210,225,205,213,243,210,229,225,229,225,184,97,103,159,68,66,202,162,110,166,75,32,48,74,135,213,108,202,217,173,233,215,170,102,44,64,60,18,29,21,15,2,114,185,135,73,1,67,103,110,134,79,41,70,113,144,81,54,79,46,15,35,15,21,34,64,114,131,83,99,88,72,93,102,82,107,99,130,130,16,27,127,125,100,123,59,28,8,24,23,25,7,33,29,26,34,19,40,52,74,82,79,64,51,42,54,44,28,34,35,29,0,36,156,139,117,158,157,95,103,42,37,36,32,48,52,49,74,70,62,18,43,23,52,92,44,18,34,37,14,35,19,69,102,68,49,14,49,24,30,13,26,21,30,15,54,77,29,58,203,139,64,192,224,185,158,124,88,63,69,57,51,40,22,88,21,33,47,20,14,33,21,14,78,30,12,38,37,32,35,115,155,58,32,17,18,27,63,78,53,32,154,138,67,82,125,115,178,147,125,141,62,85,64,66,46,24,49,36,10,31,13,29,16,64,23,25,39,28,29,38,7,37,24,43,37,28,36,18,30,40,45,29,38,35,21,36,43,57,133,132,135,108,95,121,84,23,67,236,232,254,249,234,229,227,242,254,224,241,217,244,240,212,242,233,242,253,247,240,239,255,243,236,241,234,225,234,238,250,244,230,243,239,208,242,234,250,239,244,242,227,221,216,238,230,228,220,231,238,251,222,235,248,250,238,231,245,239,250,230,116,2,15,9,9,7,29,10,11,24,34,18,9,3,6,9,4,8,18,1,24,37,19,4,23,186,191,212,221,192,177,183,194,201,204,191,203,211,182,221,195,188,193,198,190,216,210,212,206,180,205,179,194,183,205,208,197,199,185,208,200,207,205,195,204,184,201,187,195,170,201,182,207,192,179,208,178,190,206,178,214,206,203,174,209,208,202,190,196,145,171,200,193,183,189,175,203,191,186,213,201,178,188,221,180,197,177,202,195,213,192,206,172,207,195,192,186,196,200,194,171,194,186,188,195,199,192,191,191,219,194,193,220,205,204,199,220,197,194,200,207,211,214,192,186,193,199,210,209,203,205,196,223,202,201,208,224,188,230,190,198,193,190,223,202,194,196,198,178,197,192,169,214,218,212,196,217,191,226,204,211,226,222,228,223,215,215,220,198,186,197,217,210,210,207,200,197,177,200,215,208,208,192,190,216,228,178,202,231,193,194,177,222,208,206,218,196,217,205,206,235,222,198,219,212,206,219,226,215,176,216,230,210,224,224,193,215,190,206,214,221,202,206,214,227,208,191,205,202,224,199,212,212,225,218,227,229,203,203,221,200,207,204,212,210,188,193,202,234,190,206,197,196,215,176,204,196,196,203,230,212,188,226,215,225,214,220,163,190,190,208,210,204,220,224,200,218,246,198,203,189,238,186,145,167,220,207,207,217,214,193,204,195,205,207,191,202,207,210,206,195,182,188,207,223,193,220,226,180,187,195,189,165,170,209,213,209,152,173,192,220,190,224,226,156,146,119,161,169,137,184,151,172,147,172,208,183,214,229,196,203,212,206,156,194,211,217,210,128,89,80,102,68,70,77,49,167,187,183,184,169,195,165,223,170,210,177,132,157,189,220,239,206,234,249,116,76,140,112,182,141,158,205,206,227,215,233,210,241,161,135,111,146,143,49,139,179,109,157,111,51,9,35,46,116,171,106,156,144,149,186,128,56,28,30,28,58,53,73,66,37,41,199,188,43,37,36,115,99,106,96,13,30,121,131,115,33,51,33,2,11,48,38,44,96,73,122,94,117,114,105,71,118,109,103,91,91,110,121,11,14,136,93,100,99,66,28,30,37,21,38,11,18,32,18,49,48,101,121,128,116,120,84,75,95,69,48,48,46,55,55,39,57,124,143,87,124,130,94,100,46,50,56,61,40,68,66,60,112,20,25,48,50,80,119,73,47,16,16,5,12,3,41,82,98,57,50,58,59,65,64,52,39,60,44,56,66,44,95,197,105,53,42,92,74,44,137,147,145,92,57,41,37,31,66,42,43,13,14,34,28,22,29,36,26,14,20,41,29,50,116,112,57,30,32,23,51,58,90,68,106,180,121,96,113,123,100,123,151,137,162,74,91,70,47,38,5,42,28,22,41,49,15,36,42,20,44,25,27,20,21,35,73,17,42,24,52,54,35,32,34,31,20,35,32,27,49,37,29,144,140,127,109,107,97,96,22,58,222,244,243,236,238,248,254,247,227,255,245,248,255,242,242,242,213,246,238,231,231,255,239,234,230,222,232,236,244,204,241,252,227,244,230,244,222,244,244,242,238,249,231,222,233,245,238,249,253,237,234,224,250,211,233,233,249,227,241,247,220,221,139,5,5,0,36,16,23,1,14,34,19,11,20,12,7,5,8,25,39,16,15,4,24,4,7,188,218,189,168,203,196,192,190,190,192,199,190,202,198,227,195,192,211,205,189,189,208,179,211,206,207,207,204,167,188,202,205,180,176,188,189,207,190,187,193,190,180,200,195,206,186,202,206,198,160,198,209,211,193,195,188,198,201,204,169,183,193,198,181,180,184,201,178,201,199,208,187,191,210,188,180,181,182,186,190,209,199,197,170,201,190,203,164,192,195,214,205,223,201,197,190,181,201,217,195,187,186,191,200,201,208,221,209,208,206,192,174,195,204,204,217,231,230,205,196,214,215,222,205,217,190,204,190,203,197,184,176,206,195,204,193,197,209,213,189,206,197,191,191,215,199,195,202,205,201,216,197,217,191,190,208,198,212,192,197,194,182,198,212,220,201,205,206,182,196,202,187,216,214,204,215,184,206,221,220,206,230,194,207,209,212,205,221,200,224,220,199,220,211,227,195,210,179,204,209,233,205,216,207,199,213,219,220,203,221,207,238,191,225,225,247,241,221,194,211,201,205,218,222,216,205,219,213,216,221,202,227,230,203,209,225,209,207,219,206,189,184,201,192,223,205,220,209,214,192,193,195,227,191,201,216,202,211,192,210,215,196,207,174,228,198,204,193,202,224,187,163,210,205,206,207,201,167,56,83,190,215,215,193,192,194,204,239,198,198,206,197,213,197,209,198,205,213,220,187,191,211,200,187,199,203,162,153,205,209,207,199,142,178,194,213,217,214,224,148,118,95,185,147,200,200,189,185,159,176,208,207,208,217,207,203,232,205,169,205,231,238,189,132,95,49,103,77,90,91,8,117,175,147,164,218,212,194,193,128,209,206,186,178,184,232,205,205,228,255,131,109,170,140,173,118,147,187,202,241,229,246,227,194,107,118,134,169,86,85,208,127,1,149,156,59,71,69,22,58,110,68,76,96,64,86,40,24,43,24,69,88,87,55,70,90,67,198,117,26,76,84,98,111,71,42,55,131,139,113,59,42,64,27,8,22,34,52,95,92,116,106,102,125,103,97,94,88,105,67,74,130,145,93,48,82,140,126,86,137,22,17,9,6,21,18,3,35,26,26,59,109,139,162,114,107,99,81,106,100,100,102,107,83,59,80,92,114,123,147,58,132,148,180,131,45,122,50,55,70,64,80,61,87,18,46,47,46,47,56,62,53,31,12,12,24,40,14,41,31,55,87,98,109,81,88,111,90,74,119,74,78,94,85,52,34,12,20,58,65,33,86,103,128,120,69,46,11,11,44,31,41,35,42,33,51,22,83,56,34,45,57,84,16,80,97,69,14,37,19,6,39,35,69,67,103,203,88,71,110,100,69,60,69,123,110,57,59,85,68,56,23,63,44,28,47,48,38,9,21,23,41,19,17,25,68,32,59,75,39,63,23,52,42,26,19,39,24,32,20,48,56,58,9,107,136,140,113,104,128,112,39,23,195,255,251,254,242,252,241,226,253,230,234,251,227,244,235,237,244,220,239,238,238,217,249,246,236,228,244,229,222,253,248,219,239,238,250,237,238,238,241,236,251,228,255,239,234,226,240,231,233,241,253,227,245,241,244,233,238,247,227,249,255,233,119,2,0,10,2,9,17,1,3,17,18,4,25,10,13,1,6,1,30,14,16,4,39,6,8,173,215,181,205,199,198,201,200,196,185,183,196,183,222,199,197,176,188,205,202,199,208,177,191,176,174,181,178,201,198,175,194,192,201,185,209,199,200,201,201,202,207,183,169,176,207,198,202,185,172,185,199,205,200,181,196,211,182,208,185,178,180,196,187,203,197,187,194,170,180,208,178,189,212,198,189,219,197,194,208,211,199,188,195,172,209,196,177,191,186,182,172,177,207,170,203,200,200,218,191,180,232,182,213,209,178,215,187,187,208,195,201,188,229,209,218,218,206,211,210,204,200,225,224,207,206,197,185,195,192,213,201,218,162,210,209,204,227,213,199,206,203,215,168,214,211,184,185,189,196,197,196,196,181,204,207,220,223,233,171,211,205,215,213,210,204,190,193,206,182,197,207,232,204,236,198,203,196,206,196,193,224,217,183,212,190,206,214,208,202,216,206,199,199,207,218,229,215,217,202,227,211,206,233,196,223,231,196,189,206,188,219,214,207,198,198,234,204,187,183,211,235,223,223,203,201,192,210,218,239,234,221,224,206,217,180,190,182,203,198,221,202,231,238,171,211,201,211,231,180,212,210,188,181,227,206,225,188,186,219,183,189,195,215,208,218,185,181,161,201,186,134,222,224,217,227,192,195,85,98,151,195,217,180,213,221,203,203,212,197,211,215,188,181,216,198,193,225,217,177,199,190,225,203,173,214,165,167,189,218,224,170,148,190,204,216,197,202,186,136,119,114,173,188,177,147,141,182,172,149,186,196,209,208,216,240,206,206,164,224,225,228,166,135,84,77,124,91,130,77,25,119,154,173,231,226,192,199,151,117,216,215,199,180,209,225,238,192,222,237,136,200,203,185,166,129,149,167,184,221,240,252,214,76,80,142,183,172,64,121,221,84,88,195,115,57,44,77,81,54,58,70,65,50,43,50,45,55,84,43,31,50,24,63,41,32,26,64,54,50,84,82,108,67,45,48,100,147,119,57,80,39,1,26,22,23,71,64,95,101,116,119,90,127,98,109,108,95,95,34,150,168,158,123,90,128,173,125,124,101,44,12,6,17,36,31,41,56,16,50,88,121,132,111,117,114,88,87,127,109,149,134,122,83,94,123,103,99,117,91,120,135,98,152,83,50,79,18,45,79,76,114,50,64,39,26,30,18,39,40,20,35,16,55,22,30,36,15,14,14,85,100,87,74,78,133,176,127,96,125,146,138,119,50,7,16,31,37,47,60,64,147,146,142,120,58,44,25,26,35,28,41,51,63,63,61,53,66,60,61,55,81,69,55,73,83,37,53,30,33,40,41,12,31,25,67,116,58,51,74,98,76,53,80,75,104,69,119,116,66,40,85,50,67,67,65,54,10,16,9,23,27,52,50,40,62,73,97,113,90,89,75,30,36,52,30,40,53,49,27,13,37,58,19,109,162,131,144,86,86,118,40,30,186,239,232,243,243,246,237,224,235,222,230,240,249,242,230,238,235,213,235,223,246,245,241,249,236,231,243,231,251,226,239,207,216,232,246,247,235,231,235,233,225,229,240,242,241,247,216,236,228,219,246,235,245,222,249,204,240,209,244,244,249,239,135,5,10,1,0,21,4,16,18,0,5,14,16,2,13,10,19,2,8,17,1,0,2,9,20,191,189,184,217,195,184,206,215,205,213,180,196,200,192,221,195,192,233,191,198,198,173,205,191,199,190,215,176,189,198,196,206,194,193,198,215,175,192,192,193,190,176,206,198,180,199,196,196,193,174,199,192,194,182,202,192,213,196,197,211,204,195,224,189,183,176,185,204,221,188,189,182,178,193,195,182,194,203,210,181,202,188,210,196,190,187,198,202,176,185,183,208,202,201,202,196,211,190,201,179,212,194,210,189,204,202,212,188,209,176,187,222,196,205,202,200,201,172,205,182,199,212,211,203,207,197,183,185,209,210,215,193,209,202,191,203,192,203,184,217,199,198,206,194,216,193,205,207,198,195,179,213,193,199,209,207,210,200,192,208,204,190,207,198,200,197,209,195,223,188,191,193,206,213,200,224,202,227,213,231,208,212,207,217,217,203,219,218,207,217,187,217,210,205,222,228,221,200,199,193,239,193,205,197,202,192,221,212,193,215,219,219,212,189,210,241,203,228,194,212,199,223,179,212,218,213,194,198,230,206,238,201,227,210,193,211,205,171,219,212,195,191,183,191,218,222,201,181,182,218,193,211,203,202,211,220,214,199,218,170,188,198,197,220,210,217,196,196,194,204,207,163,200,214,185,197,214,216,204,214,197,192,209,196,206,183,177,220,216,190,196,224,214,195,210,222,192,205,229,203,187,183,173,207,188,178,196,170,201,214,233,176,147,191,186,212,199,229,181,167,111,44,118,180,159,174,162,206,171,155,203,195,198,189,211,228,241,208,181,201,213,228,127,131,110,68,100,80,99,74,60,164,220,193,241,173,150,211,153,175,222,192,150,172,218,219,236,198,247,221,154,239,239,162,165,185,157,129,145,210,223,233,146,14,95,162,152,123,142,177,169,68,93,162,79,64,67,65,57,53,55,66,63,73,69,48,82,25,44,24,5,13,21,0,28,2,4,43,41,99,80,71,75,31,43,85,116,107,58,92,33,18,1,8,17,39,112,113,103,135,99,121,93,109,101,105,115,82,91,116,216,215,170,128,145,117,157,84,111,99,24,33,5,31,7,36,49,12,59,99,99,140,124,89,84,101,88,90,76,80,76,99,74,40,42,61,95,85,74,102,82,72,45,72,41,37,78,35,49,77,99,84,54,51,25,16,36,28,20,39,18,1,21,21,31,36,19,14,41,57,69,77,37,58,17,25,41,47,35,32,19,41,49,19,14,27,12,61,77,49,119,202,183,110,74,33,28,40,45,35,25,33,58,40,54,40,21,43,43,68,75,83,124,119,56,66,50,33,20,19,28,36,17,42,21,58,88,55,17,46,121,79,90,94,95,112,116,125,127,61,28,86,74,72,16,14,79,33,46,46,46,40,66,40,78,73,89,92,107,113,81,93,69,56,13,52,36,23,45,38,38,25,25,28,73,113,129,130,96,85,120,78,21,154,239,235,255,227,254,254,228,215,227,226,228,222,225,242,227,229,237,229,235,205,236,225,242,235,222,249,251,245,224,217,250,220,238,220,243,226,237,234,207,233,247,239,248,249,248,244,243,239,228,244,242,224,234,233,226,241,212,241,240,245,222,117,16,23,11,26,8,14,24,6,4,23,13,26,25,7,15,0,5,6,9,14,10,27,15,16,199,207,206,171,208,209,183,189,211,201,187,202,211,182,205,201,202,195,201,167,202,181,214,199,196,197,185,188,198,181,203,190,202,214,180,185,187,165,204,201,194,197,196,214,191,202,173,211,187,193,187,166,217,208,190,196,211,200,187,188,204,188,181,199,192,191,184,187,192,200,213,187,212,184,177,194,186,194,181,212,197,201,211,202,182,186,176,197,189,179,201,199,181,195,185,172,204,191,186,175,200,187,189,222,210,204,202,182,220,191,176,234,194,196,188,195,221,205,210,202,227,199,199,207,205,205,194,193,195,203,202,202,204,201,217,182,220,212,193,180,192,203,174,183,200,202,199,206,218,208,200,187,217,198,211,186,223,191,204,206,210,206,220,195,210,213,199,222,193,209,188,211,191,178,205,214,209,205,178,218,192,216,228,210,210,214,205,195,194,222,239,207,199,199,191,215,191,227,196,212,235,206,205,219,210,221,216,221,198,211,201,191,204,215,210,215,209,192,192,208,221,210,200,223,216,237,215,189,212,207,202,223,230,192,202,225,194,190,202,200,206,200,206,225,227,206,188,205,225,213,193,201,208,219,210,207,191,180,201,192,206,225,206,212,208,218,178,179,171,195,186,202,240,212,199,188,196,205,173,217,218,183,225,201,192,209,191,207,207,179,232,216,190,181,202,209,197,201,195,204,217,218,192,212,175,215,170,173,201,206,199,171,127,211,191,199,201,224,172,121,84,55,155,180,164,158,156,196,165,134,195,185,207,195,196,205,203,195,164,212,179,188,135,167,100,55,108,77,92,52,68,185,243,213,200,110,178,204,198,179,144,154,153,201,233,221,220,237,232,185,160,238,238,186,141,181,174,157,133,182,234,230,102,28,134,143,154,186,224,195,79,75,75,88,41,6,25,17,33,20,32,32,45,50,50,33,39,26,1,15,13,12,3,50,10,11,17,62,88,109,100,51,61,33,77,109,114,65,46,52,4,11,10,34,25,119,106,123,94,86,87,56,120,99,84,76,77,74,112,176,229,192,113,113,141,124,130,97,128,70,14,37,6,33,28,31,31,56,95,192,148,117,92,65,92,108,93,87,33,43,50,63,42,80,45,43,78,77,91,64,79,55,29,48,12,34,42,10,48,70,40,43,51,50,41,26,27,38,9,9,5,24,30,26,28,29,60,49,68,97,56,84,74,83,32,37,41,17,27,20,58,28,39,49,40,33,20,72,89,91,163,189,138,70,75,56,36,52,36,32,25,38,27,37,21,12,28,45,30,23,24,70,104,140,114,87,53,69,28,38,29,38,30,49,26,108,80,51,31,73,118,100,106,81,113,168,101,60,57,35,59,49,49,42,39,60,53,62,81,55,70,66,72,74,83,100,115,114,97,92,80,99,63,42,34,18,24,41,71,20,52,41,40,43,64,126,154,138,101,114,76,56,37,130,245,233,254,230,228,237,229,216,236,232,241,229,242,211,230,235,216,238,248,250,224,221,254,240,251,231,238,236,242,249,243,249,243,226,246,245,255,253,238,243,247,234,214,254,245,228,239,250,243,247,228,242,239,219,233,231,238,235,241,246,220,116,5,15,12,34,5,13,16,9,7,17,22,2,34,4,9,2,33,3,6,4,15,2,8,37,193,198,179,208,201,203,173,191,197,213,167,198,195,211,206,174,190,214,175,223,213,195,196,187,200,209,213,203,192,174,233,217,179,196,188,210,193,205,176,176,170,201,182,194,197,185,202,193,197,198,184,193,204,186,196,199,183,173,192,198,189,185,191,193,202,194,215,220,181,180,208,194,209,217,214,200,199,187,190,216,178,202,204,185,212,202,197,197,245,195,213,212,200,208,206,218,189,220,199,176,183,183,172,194,193,215,201,204,195,193,182,201,213,200,215,209,218,202,205,212,214,198,211,222,184,208,216,212,216,210,240,205,204,196,200,205,206,219,195,189,205,192,203,190,194,207,191,213,191,210,204,198,183,236,216,204,202,231,218,207,214,229,197,200,189,209,182,185,219,202,213,211,212,206,226,193,211,213,191,212,184,186,209,207,175,210,226,208,207,232,232,209,204,214,208,211,208,202,220,175,208,223,188,197,201,210,201,184,231,196,191,193,201,199,215,203,208,211,226,226,233,207,210,215,222,217,238,199,204,189,207,227,212,219,204,192,210,208,215,214,211,222,194,196,235,199,208,223,213,190,199,207,205,223,189,207,219,203,223,197,179,207,190,214,217,208,199,206,186,186,222,212,201,227,215,195,186,202,181,194,213,167,200,197,226,211,209,208,217,202,209,206,215,218,187,208,206,188,216,212,189,207,181,200,190,197,188,170,210,210,206,143,166,225,215,205,199,215,170,119,103,118,190,205,173,164,190,200,150,128,186,173,202,240,189,190,216,195,186,212,225,199,159,189,118,77,80,74,69,17,59,184,169,170,213,163,207,246,176,172,145,171,188,216,224,223,224,237,226,118,170,233,238,184,159,160,200,165,138,191,208,215,87,101,143,123,154,200,244,146,44,49,77,72,19,35,23,25,28,21,20,25,33,16,16,1,13,6,20,5,12,12,12,11,14,20,32,48,85,106,101,35,51,75,98,111,63,53,23,14,5,12,23,54,52,103,106,116,108,123,91,94,116,107,86,110,90,89,162,197,210,125,71,115,106,98,138,97,109,75,33,21,19,11,26,20,22,51,156,205,159,72,120,40,30,39,66,45,34,52,18,56,43,31,47,53,56,44,61,87,67,53,41,57,33,30,5,53,46,81,65,38,64,66,62,78,79,37,24,25,19,0,38,69,87,74,83,113,121,106,69,88,85,46,46,31,14,40,32,44,49,58,80,72,93,105,140,129,162,141,121,142,113,146,113,81,96,92,101,79,75,81,48,45,16,14,17,7,26,17,64,76,80,104,78,119,94,54,67,30,49,38,37,10,73,131,84,52,13,94,133,98,94,80,126,139,60,22,83,57,2,33,69,10,41,33,64,62,80,65,60,88,60,86,102,89,78,121,92,79,65,80,82,82,69,22,45,29,23,20,19,35,38,17,71,143,141,117,97,82,119,56,17,159,239,254,246,229,232,212,232,224,241,241,234,209,236,239,231,238,238,231,244,207,242,229,249,218,236,244,229,249,245,255,231,232,245,237,246,225,222,237,226,243,248,235,231,250,223,239,241,243,239,213,234,245,242,223,241,240,248,233,237,250,241,110,11,8,0,23,18,1,27,9,27,20,12,9,16,5,1,12,19,13,15,22,18,29,28,35,208,207,218,217,204,188,188,197,175,213,180,183,203,199,195,194,206,215,191,206,194,201,200,176,172,189,189,199,180,192,199,188,187,169,197,192,203,182,196,213,176,204,203,195,205,184,207,195,190,190,192,207,197,189,193,179,181,214,192,198,164,213,184,226,179,190,221,214,202,195,205,208,203,196,184,193,204,197,183,210,176,174,182,187,186,192,174,163,192,160,160,206,225,197,182,199,198,187,208,186,185,211,185,222,191,202,198,209,197,197,214,193,184,167,196,215,185,193,217,204,208,190,193,199,205,204,209,196,195,199,218,203,191,194,171,216,219,181,184,200,213,196,199,193,185,178,232,182,180,224,193,219,195,197,206,197,217,179,201,194,190,204,207,196,199,207,217,222,205,204,201,206,191,193,189,198,221,183,202,208,197,206,192,209,218,206,198,233,192,208,206,216,206,202,225,226,212,237,222,213,202,188,217,233,202,222,224,213,216,224,234,209,186,212,214,222,244,217,199,201,212,207,207,210,199,198,215,193,214,205,194,227,215,200,208,211,209,227,207,215,198,232,202,230,210,198,197,188,202,168,198,196,223,224,182,201,194,215,195,204,210,224,201,214,222,199,200,206,210,205,210,199,183,198,214,182,199,211,182,204,198,194,187,200,194,196,211,188,216,212,203,194,195,187,215,184,207,204,179,193,192,219,210,219,210,189,138,157,205,196,189,164,167,191,215,229,197,216,155,120,95,109,171,170,135,174,197,238,159,108,183,226,173,215,204,197,207,164,167,205,184,212,157,172,117,71,103,104,84,60,68,64,153,236,193,176,199,181,169,163,190,197,199,218,223,247,227,232,214,86,169,244,245,210,169,181,180,116,126,190,225,220,159,106,157,173,176,227,226,93,33,69,33,30,5,21,13,32,23,22,39,23,12,16,9,24,11,19,16,4,19,28,46,22,34,5,87,81,84,89,56,49,68,102,66,77,61,57,27,38,24,44,54,87,101,142,126,94,90,96,89,130,75,99,84,78,120,181,202,136,154,114,71,105,72,92,118,79,107,62,24,38,26,14,28,48,23,70,156,166,99,85,50,50,42,28,29,44,66,46,37,49,41,16,35,21,61,56,53,52,77,42,43,51,56,58,38,38,50,52,56,69,66,58,73,77,73,60,48,33,37,27,76,87,103,75,96,80,77,47,23,43,34,49,45,34,56,54,100,118,151,190,213,197,220,173,197,168,148,142,67,54,63,76,103,116,119,131,140,120,134,141,134,131,98,87,69,31,53,25,45,47,66,75,38,42,67,48,53,82,63,54,68,55,97,115,73,50,66,156,111,112,108,63,95,102,52,45,64,33,54,43,33,67,78,47,25,42,59,58,95,121,96,22,36,79,63,73,51,64,69,95,85,43,48,76,32,18,39,33,38,31,42,39,53,154,145,134,124,86,103,86,69,150,247,222,234,252,234,251,248,255,242,250,220,223,238,242,238,207,225,228,223,230,247,243,243,248,236,245,252,238,239,243,251,205,250,238,248,222,247,246,249,223,237,251,250,251,236,236,218,234,251,243,237,247,242,226,238,224,236,241,234,249,226,106,12,21,16,5,2,5,3,23,14,15,18,32,20,3,10,12,15,19,15,4,3,7,10,27,200,186,199,194,211,197,188,207,203,203,212,171,193,170,206,207,199,186,192,191,211,216,170,192,187,188,181,193,192,181,189,180,212,174,202,196,215,204,198,186,184,204,207,200,186,210,210,205,217,188,214,196,193,187,196,200,210,166,194,188,183,181,198,196,194,188,215,193,193,193,208,181,203,179,189,210,181,183,205,212,201,207,202,204,213,189,201,201,200,197,212,200,211,218,226,195,183,202,201,181,220,192,171,204,182,188,198,231,179,191,188,195,209,198,191,213,196,201,202,173,206,218,217,196,197,210,210,208,205,199,203,195,193,232,188,197,197,187,205,214,188,221,197,186,201,212,186,212,196,197,196,210,186,214,193,197,172,177,176,220,191,205,221,195,201,207,192,197,207,197,193,202,178,192,229,199,193,213,208,195,201,201,197,209,204,213,199,211,205,210,212,197,203,197,227,219,199,209,214,211,200,205,205,210,227,210,201,202,191,180,201,191,200,187,219,178,199,218,224,214,215,228,211,214,214,192,193,185,230,219,221,199,219,211,222,211,201,204,203,202,195,218,216,186,197,182,206,207,193,208,184,192,218,210,207,174,175,172,204,196,186,200,185,204,225,221,213,224,190,181,224,177,196,200,205,189,204,208,202,191,200,202,185,189,240,213,181,217,195,189,210,205,205,175,165,185,210,207,207,200,186,193,205,214,212,199,164,176,212,232,199,148,171,209,198,212,207,198,169,95,98,83,154,151,176,205,217,237,205,115,181,192,180,223,212,193,191,178,170,190,165,197,138,87,80,63,137,116,84,88,70,137,207,227,207,181,131,184,189,153,198,181,218,215,201,237,213,238,194,100,184,225,229,225,202,168,118,152,163,189,225,239,197,146,183,244,192,221,189,39,59,49,4,30,22,31,10,32,27,11,30,13,8,18,39,39,19,44,22,18,15,10,14,3,27,45,85,90,51,47,53,64,124,101,67,83,38,32,11,91,69,52,66,96,115,143,118,125,91,108,82,72,107,57,98,93,161,190,119,72,92,62,101,108,89,89,142,121,108,54,10,16,26,46,46,29,21,87,153,135,81,70,40,49,46,49,52,27,53,57,40,55,50,28,54,47,44,42,52,50,63,50,75,41,38,41,16,52,37,27,40,58,51,66,69,56,66,47,80,57,66,102,121,93,62,54,50,79,84,35,25,25,10,42,49,79,122,204,205,221,223,214,155,140,133,53,68,60,74,83,54,23,16,21,56,44,80,91,39,36,51,59,75,110,122,180,153,120,120,100,46,50,31,38,32,39,38,3,33,55,78,55,79,59,68,87,69,118,93,154,104,103,97,37,72,112,33,13,22,23,48,26,35,59,58,57,47,78,64,58,56,90,43,53,28,28,60,73,52,51,45,55,79,54,72,42,43,45,48,44,36,2,28,23,55,139,160,125,121,74,97,89,41,167,204,220,250,254,252,253,219,215,214,227,218,230,243,220,206,221,216,243,228,255,242,254,248,252,236,249,253,219,251,240,242,251,229,244,248,253,236,234,238,205,244,233,243,252,237,253,241,222,255,255,236,214,250,234,222,246,227,224,244,231,231,122,29,6,38,6,12,7,30,7,11,30,8,6,1,1,0,18,8,6,29,1,8,53,23,25,204,194,189,235,188,207,213,196,197,208,175,188,172,182,206,175,194,188,196,210,179,216,200,202,182,190,191,220,209,196,183,218,198,198,207,216,216,185,213,176,188,185,203,199,180,203,177,213,188,198,204,198,189,227,193,186,222,168,188,190,187,189,179,185,208,195,200,208,190,190,180,189,202,191,199,189,162,173,203,195,204,188,188,187,184,203,211,208,223,178,200,178,193,193,194,206,204,178,197,183,212,214,199,214,189,185,188,210,208,190,197,200,204,191,215,182,203,189,177,177,189,191,196,198,204,201,216,206,214,194,191,197,212,209,191,169,209,183,227,214,194,201,196,182,189,227,190,178,194,226,218,192,206,196,175,196,212,173,223,184,226,220,199,219,210,197,200,217,202,194,178,200,192,206,202,206,204,199,199,216,234,229,197,213,217,196,197,215,221,209,193,202,211,196,209,217,204,229,213,235,218,229,194,213,201,205,225,199,208,213,212,232,203,207,178,188,211,215,229,193,193,221,230,193,206,195,217,198,195,193,221,203,205,186,209,190,181,219,197,214,193,194,222,192,208,208,219,233,204,210,209,218,202,218,201,196,203,204,204,183,235,203,200,217,200,187,179,228,182,208,212,210,179,213,200,204,193,207,188,228,223,202,199,181,217,179,214,194,202,184,205,186,231,202,211,194,216,198,194,180,211,187,198,208,223,160,161,183,204,212,182,161,175,192,203,226,192,179,134,93,96,77,136,168,192,198,230,214,165,102,158,203,215,185,195,234,233,182,217,178,161,170,120,99,67,58,115,81,73,72,68,154,243,212,177,159,132,210,176,115,173,162,201,220,230,220,212,242,190,88,210,237,236,251,212,146,133,152,161,178,175,204,183,165,197,233,198,214,170,59,71,41,9,29,35,18,28,39,10,25,9,19,22,28,8,10,20,37,33,2,23,7,56,40,26,43,104,65,23,36,49,88,70,71,78,49,20,37,74,139,177,141,121,129,104,113,79,96,106,118,93,98,95,115,105,88,105,114,72,61,90,107,98,114,98,99,91,139,86,45,28,26,36,52,21,34,2,23,116,69,59,50,48,33,57,21,50,59,62,52,50,35,29,20,19,63,25,37,70,21,38,26,14,28,38,22,26,43,67,62,51,67,62,48,67,50,47,63,59,78,73,104,74,49,38,28,21,52,38,43,25,31,53,128,159,195,162,165,156,94,75,24,46,66,48,77,19,76,68,52,55,66,54,44,66,58,39,56,59,33,49,48,36,30,51,75,85,128,154,130,119,104,75,69,29,46,16,23,22,8,34,55,35,82,68,63,93,95,102,125,99,101,63,52,85,68,61,71,54,27,49,63,85,56,50,60,61,58,27,36,42,46,31,30,16,61,33,33,57,50,52,28,44,60,61,91,45,84,45,35,41,50,26,29,72,147,133,149,89,99,116,111,72,84,119,185,245,244,252,217,234,235,224,224,232,228,228,224,218,236,224,232,236,249,235,240,245,240,220,246,233,255,246,252,227,221,244,242,228,251,255,227,248,244,220,242,249,228,240,217,241,253,224,243,246,230,220,243,239,239,218,241,232,231,241,117,6,29,5,8,10,1,14,25,2,26,35,16,17,3,14,0,20,3,9,15,36,8,6,11,210,189,173,187,191,181,185,213,202,186,178,202,193,192,182,192,183,195,210,212,186,194,190,190,184,209,190,174,192,191,186,197,175,195,211,197,188,203,195,205,211,209,207,213,197,202,215,220,191,181,177,195,174,178,189,219,192,203,214,183,188,182,188,186,192,194,205,198,179,184,183,190,174,190,211,205,186,175,196,204,184,190,210,193,198,197,187,193,178,222,212,190,210,187,211,207,202,185,194,213,198,202,201,207,195,204,214,214,208,208,196,207,203,198,184,195,202,203,199,195,206,194,170,208,216,181,197,206,179,204,214,229,190,184,185,176,169,195,194,219,188,203,207,196,217,222,196,206,186,200,204,199,190,202,210,216,210,191,199,206,211,194,191,174,208,204,193,196,195,205,211,184,212,203,207,191,188,205,211,188,190,213,239,197,215,215,217,222,212,207,184,231,217,188,192,193,211,198,205,176,213,208,203,238,187,226,212,211,226,229,209,218,198,230,208,210,212,207,193,208,207,210,195,190,215,199,213,221,208,226,221,192,198,190,172,222,198,229,203,217,196,218,214,168,180,196,218,211,215,180,194,196,222,210,197,209,197,202,208,184,213,208,218,176,199,210,210,195,182,188,194,205,189,216,217,217,209,204,196,223,208,215,191,201,189,226,199,169,200,190,185,196,216,220,203,172,198,196,216,200,194,198,221,215,213,144,179,187,193,215,205,152,208,208,215,216,212,182,138,105,83,96,180,167,191,220,217,206,154,107,137,225,239,190,210,179,210,178,160,206,188,206,160,162,113,60,108,80,90,92,53,190,240,147,154,149,167,243,147,145,150,181,242,216,233,227,212,215,141,124,240,213,252,239,205,171,189,179,204,187,160,178,167,159,197,193,209,191,157,87,73,7,17,8,17,25,16,12,19,16,15,19,4,16,5,14,18,25,36,29,17,21,7,11,76,82,71,60,33,61,85,104,70,60,32,44,1,43,145,203,216,108,76,85,113,115,122,90,115,84,98,93,99,88,108,90,72,72,83,128,135,80,88,84,77,134,93,163,90,21,30,15,43,46,20,22,26,56,35,65,52,23,20,49,51,59,47,75,56,50,26,13,67,48,35,64,49,60,37,50,36,25,7,28,43,32,43,49,62,29,42,53,68,59,64,51,56,68,67,64,71,58,26,42,42,8,26,32,24,53,93,129,213,211,158,143,70,41,49,49,32,57,35,53,56,55,53,51,40,34,66,60,59,53,50,57,44,67,42,71,38,14,39,26,62,60,59,53,47,81,100,128,146,75,91,50,56,31,43,8,7,17,52,80,64,68,57,69,90,74,68,56,49,41,63,63,53,31,35,36,68,68,58,66,52,47,46,57,39,21,46,45,23,47,24,53,63,57,59,55,35,46,62,56,71,69,42,33,70,11,25,39,59,35,30,107,140,129,99,84,107,91,77,149,68,100,185,218,223,243,233,241,245,228,235,230,229,234,218,244,219,254,247,237,246,236,227,232,237,240,231,228,241,231,238,248,249,241,231,231,222,238,242,230,223,254,233,250,246,240,211,239,249,247,228,227,246,228,227,215,224,234,231,251,224,91,19,0,12,21,10,14,19,6,7,13,22,2,24,1,12,2,3,9,1,22,14,1,30,29,198,189,194,215,215,175,204,197,182,208,197,192,209,207,205,186,197,217,214,215,186,209,216,189,204,215,178,188,166,192,200,196,197,189,195,212,197,184,190,202,188,185,190,190,207,157,189,206,189,185,190,199,215,195,201,202,202,195,208,202,205,207,219,187,193,175,185,174,210,207,206,165,206,201,187,183,189,183,212,193,188,197,225,205,192,204,181,172,204,205,194,194,181,194,212,227,171,230,197,176,181,194,234,199,217,181,206,195,227,221,189,191,166,211,178,213,193,207,190,218,209,196,196,197,213,174,231,194,206,212,198,187,179,199,177,188,185,209,218,199,237,203,206,197,203,191,223,203,221,177,209,186,216,198,200,208,220,194,198,202,188,207,195,180,205,211,215,195,210,213,220,189,213,207,216,203,209,229,212,225,213,217,195,211,175,192,198,218,203,198,222,230,219,194,238,196,195,190,190,208,216,214,206,216,215,206,209,196,209,209,197,250,215,228,201,204,179,221,203,188,207,199,202,182,207,221,207,204,195,179,204,203,203,211,220,196,210,218,206,207,191,207,196,203,189,205,208,216,204,195,187,190,196,191,172,197,199,207,199,209,207,209,198,219,193,196,219,201,188,212,209,210,197,194,208,187,198,220,196,188,197,206,191,173,200,208,195,205,189,205,206,180,193,215,208,205,197,211,203,188,221,217,176,191,214,144,158,179,212,221,160,160,193,201,209,207,195,189,133,117,71,66,141,169,188,180,227,223,160,167,175,206,203,224,201,188,194,152,176,174,184,212,201,181,119,150,171,99,80,56,31,147,205,190,226,166,193,207,148,199,195,240,228,201,232,215,233,209,151,169,214,213,236,229,242,198,172,197,185,171,183,192,184,166,209,238,162,140,87,32,24,9,6,27,14,15,8,3,57,38,25,32,14,14,6,37,10,34,27,24,16,18,21,47,60,76,53,55,62,84,104,64,56,71,44,11,33,44,142,140,173,75,46,105,90,95,102,114,121,108,88,116,102,78,128,91,72,125,195,152,125,102,111,121,104,99,127,120,86,40,31,35,6,25,5,25,10,28,26,27,41,52,51,47,103,44,70,65,25,37,20,49,79,65,66,68,66,34,27,51,29,22,27,62,46,34,44,45,30,22,34,66,39,38,44,39,27,46,56,55,49,28,43,14,17,35,44,57,64,165,216,208,142,115,47,61,30,38,44,32,44,33,54,36,41,26,17,20,8,40,44,79,71,28,63,70,22,43,65,47,52,27,37,26,37,38,47,52,74,49,96,57,82,97,109,89,83,61,28,24,10,28,19,29,45,52,61,57,30,50,55,61,58,55,26,42,48,29,28,47,46,30,52,55,58,77,36,29,55,47,43,28,40,51,47,47,32,63,37,55,27,49,45,36,36,37,47,44,48,29,24,39,29,30,35,110,169,123,103,108,118,82,107,111,107,50,85,116,132,177,195,216,228,243,249,224,244,229,212,203,255,240,240,237,236,248,249,230,251,227,233,243,239,241,250,239,230,250,238,245,233,240,229,229,225,246,225,236,245,222,235,230,252,215,230,225,227,230,237,214,215,230,237,219,224,128,12,0,1,31,7,4,12,7,6,15,7,27,15,13,4,8,9,9,22,3,3,5,12,22,178,191,198,205,199,202,191,172,175,170,198,177,192,208,188,224,180,218,174,195,193,189,194,174,184,181,204,210,195,175,195,194,179,205,215,193,207,197,188,184,207,187,189,208,197,178,187,188,203,190,185,194,201,184,181,199,183,174,193,190,216,191,187,195,203,181,177,210,198,196,219,157,187,195,212,187,199,190,210,192,176,200,195,199,189,191,191,211,202,187,179,197,197,197,180,205,183,213,197,185,195,200,203,206,187,177,193,191,212,216,183,192,192,225,192,217,204,210,185,225,209,217,182,206,212,193,207,208,223,208,195,203,231,218,225,191,214,197,195,198,200,206,212,203,196,218,202,207,197,215,214,181,194,181,209,199,201,207,204,195,215,200,199,185,193,223,216,209,195,198,201,215,188,215,180,210,211,195,192,204,168,207,203,212,227,193,221,213,183,215,196,203,183,208,230,200,221,193,214,224,224,186,206,208,202,219,188,198,206,196,223,206,216,211,205,205,207,210,197,203,199,204,187,217,201,212,196,204,182,199,208,193,231,215,223,210,196,200,178,195,202,223,183,199,216,195,202,186,198,208,217,186,201,204,177,186,188,196,191,199,210,202,199,209,216,204,199,186,206,194,193,223,180,205,197,195,223,188,186,193,211,199,189,186,203,201,211,207,201,198,235,199,184,193,206,205,194,190,202,193,207,206,209,179,214,171,174,214,196,197,159,180,197,215,216,205,187,188,87,125,95,71,160,187,225,217,218,204,173,163,126,197,239,218,219,196,220,177,157,168,240,219,178,126,100,145,164,103,102,71,26,130,198,207,236,156,207,210,156,190,191,209,222,209,222,184,254,188,154,185,202,225,205,239,222,207,217,158,159,153,167,175,164,204,208,232,177,75,46,5,0,11,8,26,22,17,15,56,19,27,18,31,16,29,5,14,13,2,9,27,11,13,39,28,93,56,58,59,80,98,61,51,41,33,35,21,46,99,69,47,107,63,80,125,108,127,149,102,107,95,91,93,79,92,85,65,86,150,181,126,128,105,105,97,89,101,117,134,55,3,13,7,13,38,11,26,15,39,21,43,53,98,58,91,64,58,41,32,28,33,59,117,107,102,82,88,88,29,32,24,41,41,14,30,15,31,15,11,29,12,54,39,48,34,26,24,31,51,35,36,36,29,16,26,17,50,97,215,234,192,132,55,48,43,93,50,47,27,35,4,17,5,12,14,2,22,5,28,22,47,61,45,79,75,103,78,95,47,66,38,40,25,35,16,22,36,26,40,48,55,55,58,107,71,49,69,116,106,129,71,53,32,30,19,26,33,48,43,19,23,34,39,58,29,24,37,31,28,29,39,40,34,58,38,76,40,39,76,45,27,21,31,31,39,51,35,30,64,70,40,51,39,36,59,54,56,28,46,43,54,37,48,53,25,33,100,163,140,116,112,109,75,89,117,112,83,85,56,84,169,214,220,230,231,212,213,219,238,237,235,245,240,223,244,237,215,235,245,232,224,237,241,222,233,237,218,235,227,251,239,251,253,214,227,222,223,234,212,247,235,223,237,211,241,222,235,240,212,241,215,237,221,217,232,222,121,20,11,0,21,4,5,23,16,25,12,6,10,15,7,23,11,5,13,15,25,33,12,28,3,180,177,188,224,196,211,188,214,217,207,193,196,213,196,206,216,220,213,181,176,203,228,202,178,203,193,190,230,209,202,183,198,197,186,215,179,186,178,180,195,204,190,198,189,172,186,184,184,185,196,174,181,189,197,204,198,196,203,173,190,207,198,198,186,181,194,210,179,216,189,179,165,201,180,190,189,194,200,220,208,210,211,208,190,199,187,171,201,200,198,205,197,217,207,214,179,201,222,189,196,194,206,197,219,200,214,223,216,194,208,194,185,187,237,175,211,210,201,199,216,181,225,171,196,214,200,213,198,190,202,217,193,191,209,206,180,201,193,202,208,217,183,188,205,200,186,199,202,207,200,217,213,191,171,187,221,201,204,222,200,226,197,194,204,213,207,212,210,196,188,197,221,189,179,198,212,216,197,206,194,219,205,201,207,202,202,203,207,208,192,221,208,212,205,227,198,213,208,223,200,201,199,224,201,202,213,209,210,198,201,198,215,207,199,220,206,225,203,197,227,207,211,219,199,223,198,159,212,192,188,200,224,202,202,196,200,207,215,206,204,195,222,190,211,199,190,204,187,192,207,193,190,196,195,223,198,206,200,217,191,211,203,200,200,194,205,175,197,214,179,182,206,192,181,200,214,204,193,215,193,200,210,215,187,180,213,194,193,156,185,220,183,240,192,192,191,229,216,204,209,190,197,216,189,209,148,192,205,220,198,156,193,181,190,195,188,139,132,103,128,118,88,147,194,237,172,242,194,162,146,133,191,191,220,218,190,230,181,150,211,226,136,103,126,96,93,142,134,130,76,42,132,212,219,168,158,225,214,120,135,173,221,216,209,199,226,234,157,140,199,178,243,222,221,240,209,221,136,122,108,168,193,185,148,143,169,155,42,27,14,15,5,13,10,21,17,22,39,22,7,18,25,10,8,8,18,36,29,29,13,39,50,53,52,67,54,79,113,76,61,55,48,29,25,20,1,75,119,42,60,99,100,138,145,153,134,128,88,111,99,99,111,114,102,127,118,135,143,127,106,101,89,98,83,79,117,114,105,43,15,37,3,21,22,5,40,27,36,10,38,115,106,78,72,72,53,46,46,70,88,105,135,135,104,48,49,44,26,34,14,45,64,34,27,22,19,9,4,16,24,15,43,33,18,21,55,50,39,48,38,40,49,83,53,118,198,238,197,111,65,40,57,41,31,39,14,29,5,5,5,3,9,21,24,8,35,11,15,28,26,57,42,92,95,120,76,62,55,29,8,6,14,4,18,30,22,13,14,39,39,51,39,74,77,77,108,58,73,92,135,95,65,44,34,40,53,26,42,30,34,27,55,53,26,24,18,33,15,8,43,41,45,49,67,49,56,64,55,55,83,105,79,105,61,36,40,37,37,4,38,80,35,29,30,36,40,21,74,26,38,32,77,36,41,18,76,153,132,126,130,84,91,103,114,118,97,84,197,214,243,255,239,209,236,227,227,202,236,229,245,226,246,241,251,232,239,242,247,250,231,231,232,226,225,224,221,221,238,231,241,249,242,233,238,250,238,209,231,220,207,253,223,225,222,216,238,208,238,208,206,224,218,242,214,219,124,20,10,5,0,10,6,22,0,25,17,17,21,13,15,22,5,22,0,10,25,11,12,11,28,185,211,188,211,199,197,211,202,200,185,187,182,187,202,200,217,182,183,200,194,201,188,178,200,188,178,175,207,186,180,206,189,201,171,194,192,185,191,200,208,202,204,185,203,174,175,192,211,195,206,194,187,197,193,180,183,191,171,205,181,161,168,182,206,187,177,201,202,219,190,164,200,201,189,192,207,210,167,220,195,184,200,194,176,190,188,191,194,190,200,209,217,190,201,208,202,212,187,205,221,205,175,184,194,201,190,213,193,175,189,177,195,191,185,209,208,196,200,212,197,202,194,191,199,196,184,194,219,192,207,196,194,200,193,212,188,199,200,181,167,199,203,205,222,195,208,205,185,188,206,183,190,207,198,197,215,232,203,208,205,220,210,190,217,216,224,213,212,207,207,178,220,205,182,213,187,215,208,219,246,205,185,223,213,213,239,213,207,207,200,226,225,226,193,226,191,211,209,189,211,201,182,196,207,196,189,222,213,216,215,194,201,196,228,179,228,209,213,201,226,214,218,186,209,220,212,210,205,191,182,217,203,212,199,223,205,206,200,209,205,215,199,189,192,183,204,209,224,194,196,207,206,221,189,204,182,204,212,221,201,198,189,197,217,177,176,218,202,202,197,198,177,187,187,202,197,209,197,207,186,224,210,202,187,202,195,205,199,206,204,172,192,207,198,209,217,193,217,216,208,212,212,182,227,197,118,167,211,204,187,151,194,178,232,196,184,168,156,136,144,131,96,188,242,227,210,237,199,168,167,142,195,212,203,192,187,212,200,161,192,172,123,138,124,80,125,129,110,159,108,71,131,207,193,177,189,237,128,83,156,202,224,190,202,218,230,237,141,186,189,224,227,209,206,192,225,224,179,112,136,196,169,121,100,43,62,64,28,24,32,15,19,38,17,22,42,23,21,10,29,24,26,7,11,18,6,36,21,13,25,17,23,51,83,64,81,127,116,76,60,53,32,28,9,12,27,114,104,21,77,108,99,101,108,111,94,74,73,98,134,116,98,85,100,176,185,148,101,98,98,114,111,95,98,83,120,167,112,23,5,11,22,22,27,44,52,47,31,16,33,81,112,85,52,61,51,56,46,60,76,93,65,35,63,44,43,4,19,43,17,53,22,20,54,32,38,20,42,33,74,68,58,71,84,94,120,163,162,154,198,203,161,151,168,175,208,130,121,30,54,56,46,11,18,28,20,18,11,10,17,6,1,19,14,15,18,26,43,10,42,39,61,103,59,93,51,50,28,17,20,21,23,2,13,8,12,6,18,27,28,9,28,21,53,52,44,69,62,63,76,140,117,65,79,75,59,96,31,69,54,53,38,27,68,37,41,10,7,15,26,26,35,48,79,51,44,29,45,86,123,177,135,112,99,49,102,30,25,29,42,38,61,30,34,45,49,44,40,17,7,29,32,63,40,75,68,165,156,148,118,105,76,93,116,105,109,132,240,240,255,249,208,233,219,224,243,233,240,224,245,233,233,201,221,241,225,241,225,239,227,221,231,241,222,223,219,217,225,226,215,214,230,238,227,232,232,247,240,216,230,226,220,214,200,217,234,226,223,221,225,244,243,233,217,211,100,12,2,0,4,3,29,12,0,6,28,10,17,0,6,12,11,11,44,19,13,24,3,10,9,195,215,195,194,207,191,198,201,199,201,214,182,173,210,205,185,199,190,184,194,187,201,219,195,202,184,196,185,173,174,206,201,185,208,186,212,161,185,192,209,186,203,191,183,225,182,220,202,174,185,175,198,200,200,232,199,168,186,211,188,180,192,169,208,182,160,191,175,202,198,217,229,187,175,193,171,180,227,179,192,191,202,195,202,208,198,181,173,190,203,210,181,199,194,201,219,208,211,206,205,201,189,202,182,194,201,180,217,230,201,210,193,187,205,216,206,213,194,215,211,226,211,209,219,197,220,198,229,219,204,191,209,187,195,207,190,213,198,214,211,220,193,174,196,227,198,231,179,171,183,200,210,211,191,197,206,205,205,217,207,213,191,220,186,204,197,202,208,218,188,187,199,215,218,210,205,223,224,215,202,198,204,209,231,230,198,214,220,208,207,209,181,205,228,209,230,197,211,208,206,217,201,221,212,214,215,202,199,196,206,185,212,213,201,219,205,188,219,192,219,199,189,210,210,181,191,203,189,219,208,210,210,226,204,194,188,197,221,197,204,171,204,194,211,188,192,195,187,203,200,201,202,194,215,174,190,196,226,166,178,169,180,228,174,205,193,209,181,201,199,180,214,198,173,178,196,189,188,187,169,171,171,201,190,201,184,209,220,199,195,211,204,205,198,197,218,213,213,201,197,183,188,195,204,167,141,208,197,194,195,163,158,179,208,217,198,164,143,130,139,112,118,233,223,237,210,237,186,186,190,153,186,197,200,221,232,231,177,164,203,239,160,149,165,104,80,98,121,123,73,43,128,181,215,183,221,178,98,103,199,223,227,203,222,238,226,186,140,206,163,223,208,218,219,241,239,226,189,180,161,218,194,81,48,55,19,30,11,20,3,37,23,32,2,21,22,21,27,21,35,39,17,20,14,8,12,21,20,22,19,41,36,60,57,81,78,89,69,79,41,10,33,19,20,26,61,114,70,55,121,113,43,42,34,34,23,36,42,74,102,138,102,73,86,148,163,90,88,80,67,59,91,74,77,99,126,111,90,23,29,3,25,20,51,31,37,22,15,30,10,52,36,15,22,8,39,32,33,47,42,37,54,43,59,53,81,41,54,59,59,74,89,95,146,154,161,193,171,192,224,193,154,167,194,172,197,198,167,168,176,126,123,114,77,63,56,39,40,40,49,21,34,7,14,7,22,2,11,20,2,31,14,33,37,21,31,10,34,13,48,43,75,32,20,62,53,37,23,12,14,22,27,7,15,41,30,18,31,2,28,14,6,21,18,12,35,46,55,93,94,63,117,128,142,145,143,156,136,120,145,113,126,90,76,82,59,50,29,27,27,41,30,22,32,53,23,41,25,62,69,105,116,102,62,59,110,66,67,69,67,45,75,41,34,45,18,9,42,14,8,24,37,42,33,40,66,122,154,103,123,100,94,92,116,103,85,115,142,215,239,223,246,244,225,209,207,242,216,227,226,235,236,250,255,225,250,239,227,229,231,238,245,242,226,244,212,239,254,229,204,221,230,224,216,227,215,226,229,242,216,225,243,223,231,233,209,235,227,221,229,239,232,234,231,220,125,13,29,15,0,24,15,7,21,19,5,22,14,12,7,5,2,8,28,16,15,8,15,3,25,180,209,191,183,223,189,182,213,199,185,202,178,201,200,182,196,193,218,192,197,210,217,191,193,197,171,182,172,200,189,190,199,202,182,184,204,164,217,197,198,216,192,194,216,189,190,181,194,192,174,208,181,221,170,204,187,199,201,189,177,206,198,207,201,199,205,181,165,187,207,218,188,189,186,187,178,181,192,198,182,192,207,176,181,186,213,190,192,195,206,199,177,188,178,200,199,202,223,193,205,199,191,203,208,189,203,222,201,206,201,208,206,219,218,199,193,182,220,199,214,189,198,199,201,195,200,214,196,197,200,192,219,207,213,214,209,215,209,178,208,229,201,185,224,198,196,209,202,183,223,194,182,209,207,207,203,198,186,203,208,193,197,182,205,228,189,203,221,194,211,199,205,187,217,203,227,196,198,208,203,205,219,194,216,191,220,206,214,222,230,207,203,205,229,210,227,217,197,202,225,195,211,211,197,195,223,204,214,215,215,202,207,197,197,202,217,191,211,210,203,201,199,210,202,208,221,209,231,202,203,219,210,222,178,211,190,201,207,224,188,200,191,216,200,208,202,192,197,215,183,216,170,188,199,211,178,176,209,189,200,196,198,206,199,201,181,205,201,203,182,170,172,203,212,194,183,192,212,178,180,191,209,206,193,180,161,217,202,206,219,176,202,177,222,203,207,204,193,170,172,187,217,178,220,195,162,207,191,201,178,148,171,169,181,181,160,122,107,105,131,124,148,190,203,201,213,243,204,204,181,125,189,196,228,208,179,217,167,211,217,210,182,180,178,134,141,106,121,87,75,84,135,198,204,192,188,185,121,164,227,221,218,222,221,216,243,161,148,224,146,207,215,214,207,216,213,219,184,187,194,209,161,52,41,103,42,35,24,19,8,35,11,33,22,16,8,6,15,42,19,9,10,51,17,49,22,40,33,3,40,39,28,63,83,100,102,67,73,49,26,14,0,24,22,50,109,129,71,92,139,66,61,80,61,67,60,40,35,31,36,75,112,97,107,121,109,96,108,73,68,78,75,93,66,70,147,138,60,45,39,33,39,23,39,16,24,47,26,40,60,57,53,61,65,65,64,54,85,92,84,110,118,147,143,135,173,175,184,183,168,98,162,179,181,163,166,100,117,140,130,84,50,66,58,83,39,36,35,81,62,57,97,27,44,46,49,38,46,36,2,20,27,11,12,16,15,19,18,5,12,14,24,17,17,23,4,8,23,14,36,66,54,48,30,46,48,29,3,25,33,10,26,11,13,1,50,45,16,15,18,28,39,10,38,24,33,27,53,72,68,78,61,51,81,56,94,126,92,114,158,116,157,168,158,171,157,148,146,110,102,82,69,68,39,38,37,65,33,26,38,22,88,63,9,41,64,37,41,70,71,53,48,57,83,59,37,27,43,22,28,13,34,37,36,31,43,141,155,146,109,101,97,87,111,79,78,85,97,180,233,249,237,234,235,232,219,251,250,242,249,255,247,252,248,241,248,247,240,244,232,220,242,247,230,199,219,224,231,230,242,242,235,237,223,233,240,246,233,245,237,247,206,221,243,212,218,231,199,241,238,228,218,235,242,217,126,9,7,8,15,14,0,18,20,4,30,42,7,10,5,3,23,29,0,21,17,22,3,23,14,191,196,188,202,203,190,193,202,197,200,218,196,218,184,197,181,204,200,154,199,180,211,195,195,181,209,181,185,196,210,197,195,196,164,177,196,185,200,198,188,190,186,195,212,184,187,188,174,218,182,193,188,210,202,193,199,209,198,199,195,174,185,215,203,189,179,184,186,200,199,205,190,208,192,198,195,207,183,187,204,187,186,191,191,206,180,189,203,200,161,171,181,179,184,213,218,193,198,187,190,205,213,207,222,203,216,219,191,189,187,176,198,183,200,212,214,208,204,221,194,212,195,229,204,215,211,192,190,214,178,191,191,200,200,195,175,212,206,193,190,217,201,213,202,219,208,213,185,200,194,210,217,197,193,183,209,193,207,189,201,211,204,214,221,215,213,207,227,210,211,237,201,214,185,177,230,229,215,214,212,190,219,219,215,226,239,216,201,217,213,226,206,201,215,205,213,207,211,216,212,220,210,213,209,241,203,219,213,211,215,225,206,216,209,190,216,219,206,207,206,179,193,211,184,203,226,244,209,199,183,204,209,182,207,213,208,188,205,214,205,199,212,192,202,199,186,203,207,199,205,200,203,214,186,179,211,205,183,191,182,200,209,205,187,192,209,220,190,197,169,186,209,214,172,211,199,204,199,205,191,201,189,183,206,167,180,220,205,203,196,202,175,209,195,220,176,181,178,216,185,209,204,228,208,185,124,193,205,202,158,190,186,192,189,178,142,101,133,149,118,136,111,137,117,126,139,245,198,175,179,141,183,201,200,219,193,192,149,202,206,174,114,173,214,112,125,129,143,94,36,87,167,228,197,191,169,168,176,191,237,213,245,203,212,198,232,147,170,214,131,209,245,210,222,192,239,221,215,220,166,149,103,49,57,65,15,23,9,28,25,10,28,14,22,20,20,17,29,10,8,40,27,33,29,14,30,14,60,7,50,60,41,46,103,98,71,41,74,38,9,34,13,27,49,97,119,152,87,119,113,49,99,90,40,53,21,41,46,37,47,83,116,154,147,143,141,144,141,118,154,114,125,135,115,97,119,111,66,101,126,120,168,131,167,134,147,168,176,185,151,169,166,171,165,144,163,166,150,149,141,153,151,127,139,136,125,145,134,112,86,87,77,52,77,67,67,54,68,46,67,60,64,70,60,37,61,53,70,60,58,58,50,25,59,45,39,54,29,26,20,12,22,8,1,28,15,16,1,49,16,14,9,17,26,18,21,44,2,27,40,87,87,75,51,54,72,47,13,31,31,27,19,10,16,28,34,31,31,13,39,17,21,9,10,14,18,10,13,38,55,89,67,69,78,85,72,57,69,69,61,61,44,75,53,66,104,131,143,157,134,173,136,159,141,97,102,84,80,71,49,65,52,49,49,27,52,30,39,35,34,60,55,52,74,90,67,33,20,29,39,18,43,44,50,54,16,97,141,123,119,92,120,107,93,79,117,100,120,159,235,244,248,221,248,244,233,251,254,250,255,253,244,251,226,253,255,245,246,218,244,224,219,217,207,215,229,217,208,213,246,214,223,213,212,201,218,244,229,211,210,208,217,209,242,232,234,217,234,212,232,210,220,226,242,249,139,0,2,5,8,19,11,0,11,28,29,14,12,2,16,23,23,7,4,31,0,5,20,11,32,203,195,212,200,180,198,189,193,190,182,207,196,192,197,193,181,190,203,174,187,175,193,199,170,182,179,160,214,189,196,211,199,168,199,201,178,208,195,199,192,195,190,210,182,191,169,185,192,199,214,213,196,211,210,170,169,196,188,181,190,202,200,193,204,172,198,202,190,176,195,197,187,196,182,185,211,207,196,215,199,195,186,202,208,193,214,171,187,213,188,184,196,197,196,201,201,166,194,223,216,211,196,198,208,218,209,181,204,194,198,201,200,206,204,190,216,191,206,235,198,206,194,198,236,204,203,210,209,191,199,195,209,203,190,226,202,211,211,198,203,194,219,212,173,211,200,215,192,208,195,186,195,211,212,185,206,220,224,223,219,215,192,206,200,194,201,215,222,216,187,201,195,203,206,219,222,221,190,214,216,195,221,216,207,219,231,207,209,203,187,228,202,204,208,227,217,209,241,209,227,210,218,194,213,225,212,190,204,204,212,223,210,198,215,214,208,207,212,229,202,218,201,199,193,199,215,218,222,205,218,208,214,226,203,212,194,221,226,212,198,211,188,180,189,211,203,210,192,212,187,226,192,215,225,182,197,199,194,205,184,200,200,199,199,189,209,202,204,184,188,199,186,177,174,187,213,202,209,217,159,187,192,177,195,209,180,201,211,191,200,198,179,204,195,194,184,189,209,179,186,202,200,197,201,171,167,203,213,183,173,160,189,210,177,186,145,107,128,123,116,127,100,112,87,73,153,241,183,183,187,151,154,200,192,217,216,168,179,176,142,155,89,153,202,111,116,89,143,137,94,45,152,181,153,193,171,190,169,168,240,209,238,218,234,221,205,131,190,223,119,238,202,225,236,212,250,195,216,208,86,92,74,84,50,0,40,16,4,23,12,18,21,20,7,38,42,37,31,17,10,32,35,6,25,16,5,3,43,31,61,67,42,86,90,99,91,55,50,17,13,30,49,40,86,143,103,98,95,97,108,71,91,75,89,94,80,37,56,72,53,70,82,132,128,120,132,117,132,118,137,137,136,132,103,117,106,89,88,135,165,148,145,155,122,146,131,152,117,142,164,128,116,141,117,120,99,83,97,43,62,36,73,59,60,56,76,45,54,63,54,53,58,58,67,77,68,70,62,80,74,72,52,57,47,44,57,37,33,36,27,28,44,27,24,25,40,52,15,36,48,59,63,38,10,6,36,22,26,17,25,19,6,32,18,26,27,3,22,42,67,142,142,101,69,66,74,39,26,32,3,32,27,6,6,39,26,6,7,14,38,19,15,28,37,27,75,72,24,43,62,33,43,56,48,37,40,71,43,80,76,87,98,74,70,63,87,49,50,54,55,72,93,91,125,136,99,118,138,138,120,128,99,108,88,79,73,58,45,49,36,34,62,54,83,36,50,26,27,24,38,40,69,90,47,41,17,52,127,113,115,103,84,82,98,77,114,123,118,134,135,132,190,206,217,249,251,249,240,219,212,242,246,254,234,204,225,236,240,218,220,206,223,218,201,200,213,211,216,228,238,216,223,245,207,243,235,221,216,236,230,235,223,219,215,244,248,243,242,228,223,206,250,227,246,236,124,3,3,3,7,13,9,17,0,15,20,10,30,7,14,20,22,14,9,10,3,16,22,31,23,203,183,203,212,187,200,210,193,186,207,203,198,173,196,207,197,178,187,188,192,196,157,191,194,218,215,171,193,187,184,185,194,207,180,190,199,201,200,188,209,224,212,196,180,165,172,172,183,183,176,180,213,169,179,199,210,188,188,198,197,179,187,210,199,200,174,191,199,191,177,177,207,204,173,179,191,183,199,196,209,192,201,172,181,188,188,204,187,192,190,204,195,182,215,191,187,212,196,181,211,202,236,173,210,192,204,204,190,192,187,201,204,211,202,206,214,193,213,210,203,212,212,214,208,210,206,194,218,204,210,210,221,206,226,213,202,190,204,197,210,188,209,195,209,196,206,207,212,206,224,210,230,201,187,207,203,190,199,201,215,196,209,211,207,229,194,215,186,215,176,210,217,188,221,203,226,220,210,220,203,230,229,205,215,230,228,212,223,210,189,210,236,214,205,216,183,223,213,233,204,218,186,194,221,210,207,205,221,203,222,224,184,182,224,208,226,218,197,214,197,206,208,205,210,206,215,227,201,216,220,211,219,209,225,199,212,196,190,204,211,214,203,214,215,210,214,202,193,199,206,214,229,203,204,232,204,191,195,224,189,208,191,209,185,171,187,200,191,181,192,172,177,213,191,208,202,215,218,205,179,207,162,191,190,186,189,187,192,200,211,195,213,226,195,226,212,183,214,194,178,174,197,199,206,134,150,202,196,168,148,185,173,193,192,175,122,66,107,110,101,100,78,49,72,57,129,205,184,183,219,131,177,212,211,229,181,158,149,160,140,177,122,184,167,66,82,77,86,78,110,126,198,192,206,183,126,142,120,165,235,194,228,215,234,209,218,130,213,222,135,210,225,220,212,214,235,226,219,119,55,60,71,53,20,10,3,51,24,1,7,9,15,29,23,10,2,24,41,28,19,16,22,22,13,2,22,31,33,83,44,43,75,124,79,69,31,30,5,12,6,44,78,92,87,135,97,66,58,93,110,123,130,113,124,172,114,101,135,102,129,141,114,85,63,48,49,56,72,63,60,42,67,63,44,74,54,59,47,77,76,53,66,65,54,51,55,48,60,42,60,66,38,79,73,79,72,79,85,45,106,76,57,55,102,71,79,59,42,51,53,52,59,43,50,24,44,35,52,33,22,55,18,22,28,24,38,20,21,7,49,15,12,20,5,12,20,52,56,63,83,80,94,71,35,8,24,16,13,36,11,31,23,20,37,4,28,18,15,16,114,183,127,126,143,131,126,67,27,9,28,24,35,9,22,18,17,8,18,30,11,10,28,35,65,78,86,70,56,62,37,48,47,34,18,28,40,37,34,45,28,49,54,59,31,57,73,52,77,50,73,42,74,60,76,102,88,73,65,55,48,75,81,116,128,126,133,141,112,119,106,93,79,94,95,73,67,77,62,41,41,26,55,43,33,74,24,82,135,126,120,122,112,129,121,94,138,136,59,67,46,63,180,238,248,251,236,174,118,129,104,114,123,143,134,104,154,224,214,213,216,222,237,216,214,209,226,226,228,212,205,196,226,233,225,209,229,204,203,232,228,213,232,221,236,229,231,207,242,207,219,232,228,225,207,210,94,1,5,7,15,2,10,16,28,29,9,19,3,0,11,2,7,19,17,16,6,0,27,17,29,206,197,185,198,191,206,201,197,203,204,191,215,215,207,187,196,185,184,221,217,197,184,191,216,175,220,178,197,182,195,179,190,186,202,191,199,204,187,198,189,184,183,184,205,171,176,195,192,181,193,180,196,185,182,196,186,185,171,196,201,177,207,218,201,189,191,198,189,194,178,182,205,176,187,183,193,181,186,197,192,211,179,213,228,186,210,222,208,195,186,191,199,182,197,190,202,215,210,210,203,206,210,209,189,168,209,225,195,212,176,213,204,177,183,180,200,191,199,192,199,212,222,210,230,223,211,208,204,198,184,207,228,211,196,204,194,214,236,217,199,220,225,215,232,204,191,234,203,198,234,217,210,207,203,185,222,214,202,208,192,210,191,216,207,207,235,201,213,198,202,205,217,200,199,215,206,209,216,179,206,206,213,213,226,212,217,207,229,215,188,197,245,195,203,217,216,213,203,187,197,190,224,240,187,214,214,216,198,206,207,218,226,211,204,221,232,199,234,207,213,209,204,199,204,196,221,218,195,189,223,235,228,191,208,234,201,193,216,199,215,217,215,221,190,222,217,187,200,201,203,197,209,185,199,211,203,211,185,200,199,200,210,181,202,205,217,207,186,193,196,195,183,187,183,217,187,189,190,205,186,190,202,198,194,218,180,185,194,195,192,203,185,187,199,190,193,204,190,200,167,172,196,211,195,151,169,205,201,149,137,178,186,175,152,150,103,98,100,68,127,63,42,35,56,95,150,204,171,197,207,142,155,225,205,227,176,162,179,170,169,185,117,142,137,69,74,106,74,61,56,56,203,164,229,152,151,190,143,189,216,190,193,178,220,219,175,134,236,196,151,201,198,216,199,240,218,208,221,57,49,40,45,17,14,36,22,17,14,15,25,19,30,31,10,51,28,27,36,39,13,25,42,27,45,33,17,48,42,43,36,60,75,90,45,63,42,15,3,16,17,43,68,125,117,89,71,21,16,100,65,95,139,107,129,130,124,119,95,109,164,156,148,66,77,83,59,57,66,99,80,79,90,93,80,91,96,60,56,56,106,99,88,85,79,36,77,95,63,95,48,60,69,65,80,55,65,88,80,74,58,55,31,37,46,47,46,46,24,11,17,26,22,21,21,14,20,25,8,19,38,7,7,15,31,14,27,6,13,19,10,23,27,10,3,23,87,82,112,142,134,138,133,105,40,18,7,12,28,8,19,27,14,31,9,21,17,3,47,36,132,171,131,155,137,179,151,78,22,14,5,11,12,19,38,13,7,22,15,0,44,59,17,29,95,110,146,139,118,99,83,80,13,20,13,36,26,19,11,40,9,29,18,33,36,41,46,15,31,38,53,67,61,59,56,73,87,53,50,66,45,45,50,86,103,55,79,73,75,88,84,95,126,139,123,150,127,135,131,125,128,95,85,75,90,82,63,123,176,156,142,111,115,112,97,103,127,113,107,57,70,73,185,224,250,233,99,43,61,49,64,70,64,49,56,22,82,177,192,228,218,208,202,225,214,222,226,233,244,203,241,211,235,220,216,230,224,203,218,220,252,228,207,227,204,243,240,224,217,216,225,230,220,229,240,223,114,6,1,5,6,29,35,22,29,6,14,1,13,12,9,7,22,34,30,10,2,16,16,18,0,201,199,207,204,202,215,183,213,206,185,186,196,200,187,210,199,188,214,195,172,206,198,194,180,184,187,199,192,201,192,190,223,182,177,208,195,206,187,172,214,204,175,205,187,199,202,185,187,183,173,195,192,191,191,220,182,214,194,188,204,190,155,197,205,215,160,206,196,209,184,188,182,177,189,185,186,205,183,204,187,198,204,184,197,201,209,198,186,193,197,197,209,192,191,180,196,189,202,182,168,183,197,214,204,215,215,187,194,190,218,191,215,219,196,200,204,204,194,205,207,219,201,210,215,207,200,211,196,195,219,223,224,225,221,207,171,224,238,225,222,239,227,236,221,221,224,197,205,215,191,199,216,214,214,183,211,211,201,196,192,210,213,205,208,202,215,217,194,226,212,194,212,200,212,214,235,236,214,217,220,215,216,210,212,213,181,223,228,216,216,228,197,228,214,215,176,209,211,209,193,192,211,214,244,213,234,220,186,215,209,204,186,194,206,217,198,211,215,205,201,211,214,200,217,211,242,207,217,236,229,222,221,222,180,212,212,233,238,204,205,238,221,228,201,200,218,204,210,206,215,195,242,209,207,189,206,190,199,208,196,218,201,199,191,186,180,183,189,193,187,191,215,185,210,193,191,210,205,206,191,192,194,188,207,213,186,210,192,188,195,202,174,172,203,188,183,205,222,216,189,193,187,204,157,154,191,175,214,143,131,172,162,180,168,150,114,113,67,54,67,47,10,25,74,68,169,213,174,211,212,159,150,224,221,195,182,181,188,154,196,194,119,107,130,94,125,100,84,66,39,5,122,161,195,156,200,194,163,218,222,209,206,216,214,221,148,165,245,196,152,233,211,206,232,227,201,233,163,25,51,61,23,25,36,10,17,5,25,20,3,20,22,22,1,16,30,29,22,19,31,18,15,19,41,18,56,11,28,54,66,68,100,61,32,56,38,2,26,19,44,76,104,108,101,141,116,6,56,114,91,79,86,56,90,63,69,47,81,105,122,85,51,34,38,20,58,82,59,42,71,64,66,61,97,122,124,118,72,42,47,41,55,45,40,40,45,49,64,39,61,54,38,75,39,41,27,24,12,18,28,5,19,44,12,43,24,31,18,40,31,26,41,52,18,8,16,6,60,25,7,26,7,6,17,9,12,6,31,24,42,32,22,31,11,63,170,193,179,174,206,199,194,144,62,18,28,48,31,33,29,15,22,15,40,21,3,33,14,69,132,152,180,163,127,176,151,106,39,19,22,20,11,15,33,30,13,31,24,54,19,6,28,52,140,156,147,151,182,155,155,95,32,6,40,19,37,10,3,14,13,17,38,26,33,13,24,25,18,36,20,29,41,20,43,44,38,45,31,42,72,68,73,69,91,76,80,68,48,84,57,66,97,74,51,43,59,103,107,127,123,127,155,132,143,135,139,138,157,153,121,107,91,104,134,119,130,105,99,140,109,204,235,233,251,178,58,11,16,73,73,23,31,44,24,1,78,204,222,214,233,203,201,241,221,219,217,218,220,220,217,191,220,193,206,231,206,205,198,212,205,211,206,192,216,222,237,230,217,233,237,237,243,238,210,216,123,3,23,5,8,11,5,17,0,27,29,31,13,17,8,15,34,20,23,19,3,26,18,4,14,198,204,200,184,191,221,208,228,179,177,193,191,199,205,203,214,213,205,168,202,220,206,187,204,208,200,201,209,188,166,198,187,174,195,201,177,182,190,188,169,186,195,190,192,188,179,201,203,185,189,185,224,205,187,220,205,215,218,210,181,181,202,175,206,170,199,179,207,186,176,194,212,183,186,193,196,202,170,174,191,227,193,219,223,226,186,183,210,214,176,201,207,208,189,202,203,213,205,169,200,198,222,180,200,199,191,189,205,196,214,190,227,219,201,203,198,196,190,224,202,202,195,202,203,199,191,220,185,193,204,214,244,208,190,220,201,193,189,226,211,210,198,219,198,197,214,194,219,214,222,209,195,199,191,209,226,189,211,187,233,167,214,202,228,218,194,208,210,202,187,183,213,211,210,207,186,206,207,204,221,185,192,218,212,209,220,219,227,201,212,214,229,214,230,189,195,216,199,213,199,209,212,216,241,213,213,216,196,213,217,214,209,219,220,202,211,214,211,239,213,219,217,207,223,194,211,230,218,230,205,226,191,198,215,208,184,215,193,210,184,213,212,219,169,203,223,189,217,213,213,200,204,165,200,202,208,194,177,213,222,213,162,196,198,203,181,193,203,205,192,196,179,203,204,172,214,215,206,209,169,201,216,198,195,200,201,182,187,178,170,206,212,188,181,207,193,199,205,244,188,185,194,210,174,187,228,200,190,146,160,204,198,174,160,166,145,123,82,109,93,51,52,59,128,157,240,227,199,198,208,156,145,221,231,193,160,216,215,142,153,183,136,174,176,110,99,103,68,56,56,18,91,182,217,153,199,156,162,232,224,211,195,207,227,196,157,190,236,181,152,233,231,237,239,210,185,144,100,76,66,32,40,29,44,13,13,14,33,26,18,30,21,6,49,18,19,25,3,19,4,56,22,19,41,24,23,37,30,82,67,88,46,61,55,29,36,18,34,59,72,123,103,121,92,99,76,20,83,111,129,90,91,51,124,92,34,77,26,56,89,129,49,10,96,62,49,120,82,27,12,53,28,21,93,96,73,86,59,17,38,34,39,27,20,11,24,29,6,54,18,36,12,27,30,28,22,18,19,1,5,31,34,17,15,18,53,65,58,84,70,66,80,97,47,16,31,16,2,26,15,20,13,19,33,24,26,5,22,7,19,8,11,23,4,81,208,200,186,167,178,182,186,171,70,3,11,15,27,6,25,16,36,11,26,19,33,16,33,60,104,141,168,140,140,144,158,147,51,19,16,28,19,28,20,45,27,16,29,24,29,50,21,55,140,156,163,167,146,176,175,164,49,14,11,10,38,16,25,9,9,14,7,9,11,17,11,23,18,15,19,29,41,32,22,24,36,20,21,29,33,48,34,40,52,53,64,57,45,50,66,92,62,79,64,85,71,59,61,77,64,59,86,70,68,114,86,72,85,72,68,45,87,88,65,96,94,128,106,119,99,140,200,162,172,91,60,29,69,57,86,66,57,62,65,21,152,243,221,219,213,204,226,223,228,199,212,233,208,220,236,198,207,219,212,228,215,206,221,222,206,231,213,206,205,210,230,225,224,223,214,199,216,233,210,236,130,1,0,10,18,11,16,30,0,2,21,10,13,0,0,10,26,16,25,4,20,16,18,13,12,213,197,190,173,228,202,212,189,190,218,195,233,192,181,200,197,189,207,201,174,212,224,210,183,211,184,201,186,199,206,167,191,199,180,193,218,187,177,194,199,217,199,196,191,191,209,209,214,203,194,198,213,182,168,192,200,193,198,184,199,208,181,204,191,194,210,177,184,193,187,182,205,208,179,178,201,186,224,178,196,196,205,207,203,208,194,207,215,215,182,204,206,182,199,207,195,209,212,168,210,231,200,188,189,198,210,212,200,193,221,212,186,198,224,214,193,215,187,203,197,200,207,214,199,216,209,211,214,212,202,241,207,193,191,211,198,207,188,206,225,228,207,226,216,191,206,198,220,204,220,190,232,194,204,195,208,187,179,196,205,202,193,204,178,204,209,220,204,204,223,238,209,195,209,232,194,203,213,204,219,204,206,197,229,208,225,217,191,205,215,221,208,211,219,199,223,207,235,240,235,213,226,187,216,206,224,211,212,220,217,213,194,241,195,225,214,191,209,205,192,220,208,220,220,212,184,199,243,209,192,203,208,213,204,218,209,187,212,199,196,214,187,195,228,224,215,204,232,217,201,183,201,215,229,196,219,216,209,197,205,218,184,188,197,191,226,200,208,182,192,201,179,148,184,179,187,179,200,213,185,212,186,195,187,203,210,231,163,206,215,197,200,184,187,190,194,193,198,227,187,194,175,215,143,191,180,176,219,151,197,198,184,190,150,160,150,161,112,121,56,65,38,101,168,203,236,209,171,164,193,168,158,183,200,170,181,238,151,106,125,204,149,198,157,77,129,88,91,104,64,53,158,250,244,168,157,133,164,202,221,218,247,208,224,182,137,186,205,138,156,147,243,237,230,170,99,60,43,75,44,26,50,21,11,24,18,32,21,20,13,24,45,17,43,34,20,20,44,26,8,8,25,31,26,60,51,35,68,80,88,59,58,60,20,31,46,42,66,61,121,84,135,126,90,108,79,44,96,130,87,117,117,123,130,61,61,61,44,97,150,121,59,80,129,135,71,111,129,47,23,21,24,66,127,95,92,113,38,22,32,24,40,40,33,29,33,4,23,34,5,14,36,16,11,3,22,10,20,51,17,21,29,14,21,8,78,121,148,139,168,167,160,199,74,20,34,24,29,13,25,14,19,26,32,11,2,10,22,19,20,17,30,12,16,95,166,151,161,110,96,125,119,105,24,28,42,10,31,8,43,28,17,18,20,52,34,35,17,63,68,138,158,132,136,135,165,182,94,7,25,22,34,28,37,37,37,20,31,23,46,45,22,68,144,170,116,167,163,210,175,190,110,29,9,18,17,24,22,38,33,20,24,19,15,22,3,29,17,27,3,8,38,49,59,63,32,15,16,49,50,17,17,41,40,16,45,55,54,27,27,66,41,26,68,67,92,70,90,76,80,85,61,75,61,94,70,73,58,49,71,77,66,78,58,45,33,55,58,70,55,43,39,30,45,42,13,18,56,45,93,46,50,63,35,40,218,244,232,245,217,220,212,217,214,204,219,233,208,208,196,239,224,206,201,221,221,218,191,233,230,225,230,224,230,210,217,221,204,197,223,233,233,224,249,222,119,15,12,23,18,17,32,22,14,31,4,13,14,6,7,12,3,17,8,6,0,19,20,11,11,224,218,231,216,195,212,228,190,193,196,192,202,206,201,191,181,222,205,211,184,204,166,190,178,179,192,188,181,199,192,184,189,190,192,183,189,188,202,204,202,184,203,213,200,197,206,220,198,187,178,202,202,191,183,177,192,196,203,193,197,210,200,198,190,192,228,210,181,181,196,192,180,183,209,193,194,197,199,171,179,190,198,196,216,180,229,191,179,195,202,209,203,209,213,190,210,205,187,210,200,214,183,210,240,211,207,214,206,222,195,199,202,206,201,207,210,211,209,202,206,181,192,191,176,219,191,216,220,226,203,192,226,217,221,202,194,212,211,196,208,217,204,211,196,216,217,224,223,204,198,212,211,206,212,203,220,204,199,228,218,218,202,211,209,207,195,219,220,218,194,225,213,229,206,226,213,193,202,190,196,205,177,210,207,222,217,236,209,211,216,206,210,205,206,208,206,229,232,207,224,211,202,232,215,243,215,195,223,231,198,213,206,227,232,208,212,218,224,208,223,234,201,202,199,228,203,194,215,211,214,214,207,232,199,197,211,196,200,202,213,210,213,202,230,229,202,220,210,213,218,224,226,199,188,235,226,188,210,220,200,208,198,201,216,206,210,178,185,222,185,206,163,175,208,228,205,207,195,203,204,206,190,203,204,212,200,206,202,187,203,209,182,237,187,191,198,191,194,198,179,202,164,189,136,176,177,174,170,151,215,225,164,143,127,144,157,158,142,107,38,63,48,104,203,218,241,201,166,194,228,179,159,194,205,168,213,168,128,149,180,209,155,167,97,80,109,92,87,81,20,51,221,225,204,140,152,172,160,217,206,196,216,227,241,145,158,202,77,70,90,125,197,241,209,104,65,58,31,65,33,22,7,27,10,15,51,27,24,18,11,27,15,15,23,29,17,54,22,7,26,35,21,27,57,34,45,63,61,91,71,83,56,48,12,65,95,77,96,82,100,115,130,91,88,126,123,118,146,119,74,117,113,119,96,57,77,87,74,97,83,120,136,71,159,148,88,82,161,117,44,35,48,138,175,93,108,94,49,29,59,26,48,52,57,21,20,19,14,10,16,12,22,13,19,11,35,5,11,5,8,29,23,30,20,25,189,187,210,228,223,180,190,214,105,11,23,15,8,14,22,16,26,17,24,20,16,19,32,23,14,8,23,0,31,92,110,67,68,35,38,60,71,87,31,15,6,36,26,40,17,12,66,14,33,47,32,19,29,31,91,147,190,139,145,166,133,188,71,11,17,10,18,29,43,24,21,38,19,19,15,26,20,89,123,113,128,109,116,160,140,174,99,29,21,35,24,12,9,42,23,4,33,14,17,40,19,19,8,25,28,27,21,91,72,54,95,73,45,59,59,37,14,1,31,18,19,18,9,3,27,24,19,39,37,58,14,29,50,47,59,78,56,77,57,41,83,64,70,98,115,140,142,110,90,36,22,8,40,63,60,64,53,55,32,39,14,63,72,55,76,73,59,68,15,133,228,219,232,247,231,202,209,221,198,204,221,229,217,220,226,218,230,208,221,229,211,243,223,248,206,228,203,228,229,227,204,216,235,227,230,202,223,210,226,230,134,1,15,2,12,17,14,14,13,4,20,39,39,2,1,19,1,0,12,11,5,19,3,25,7,207,196,219,195,199,196,192,196,184,200,190,185,190,204,196,191,187,212,225,209,203,216,204,205,178,200,219,194,219,218,196,200,213,180,178,200,194,181,211,194,201,193,202,195,205,195,207,207,205,220,193,206,191,211,209,225,194,194,189,176,200,194,199,192,193,204,202,212,230,189,171,211,210,204,199,216,199,186,223,206,207,191,192,215,195,204,191,223,213,212,201,218,189,191,210,201,204,209,197,185,206,205,207,216,196,212,201,199,196,190,199,185,199,208,192,214,211,221,180,201,186,189,210,209,197,214,206,213,221,230,201,222,205,215,229,233,205,200,223,199,185,173,208,197,205,205,221,206,214,216,214,214,207,199,204,207,197,202,220,216,214,205,213,209,177,216,229,207,189,228,198,209,210,213,196,206,242,208,192,210,237,221,211,202,207,200,217,203,227,224,210,212,215,224,219,208,221,212,211,202,222,227,207,222,200,214,205,192,204,222,206,229,178,220,205,203,216,211,219,201,200,212,226,206,220,216,206,222,186,208,208,199,217,208,193,202,215,211,218,221,208,210,222,220,193,239,236,208,212,207,204,218,197,220,225,218,204,195,213,193,190,202,203,208,206,190,187,193,205,214,210,205,180,203,219,213,197,205,205,222,210,211,201,218,197,210,199,198,207,220,193,225,197,182,199,191,201,202,190,172,198,198,173,152,183,192,186,213,143,207,198,150,152,137,145,194,182,139,86,72,41,49,91,211,211,245,219,163,226,167,105,113,173,163,187,218,165,174,185,230,161,109,170,125,87,129,64,101,62,51,53,172,203,134,152,187,168,189,232,204,191,199,178,219,146,176,189,50,45,74,83,148,240,202,46,37,62,69,8,15,35,22,21,24,23,17,23,38,20,10,1,14,23,7,11,35,26,17,2,32,20,30,18,45,38,51,61,73,67,77,53,28,32,62,144,203,132,113,100,103,137,111,119,119,140,195,135,113,38,84,111,103,77,110,57,96,75,67,88,47,91,120,158,176,196,81,101,138,129,68,45,39,144,132,93,106,71,37,76,69,97,84,142,109,27,41,15,12,31,26,24,31,3,9,16,31,46,11,27,6,31,32,23,33,37,170,189,190,190,171,178,176,192,128,6,20,9,13,21,39,11,9,16,20,34,17,20,31,30,25,10,21,24,31,98,112,45,84,41,51,29,38,30,27,44,11,48,11,31,17,25,36,28,62,11,53,14,16,36,82,118,148,105,148,135,156,169,74,31,22,37,31,18,19,11,7,25,14,36,43,34,45,113,129,92,71,48,61,121,88,117,92,17,15,13,27,18,19,24,17,31,8,54,15,8,28,36,17,33,11,14,40,137,169,152,141,135,131,129,98,40,25,3,38,18,18,15,37,10,13,32,10,15,24,19,18,16,33,39,42,36,43,25,42,41,25,47,47,37,51,115,128,106,96,110,137,147,163,105,117,75,68,70,70,89,87,112,106,116,120,127,101,81,75,157,227,229,207,230,205,214,220,222,216,209,214,221,201,215,197,240,232,195,221,235,202,230,226,225,219,193,232,205,239,186,218,213,205,230,251,245,199,235,220,237,124,7,12,10,8,13,15,32,0,4,21,18,2,22,1,4,14,0,5,1,20,20,22,10,37,190,204,225,205,180,197,205,211,187,212,194,168,174,186,203,203,224,196,191,213,185,223,202,204,199,193,189,201,223,209,180,202,207,199,197,195,199,182,190,222,198,194,207,202,194,210,197,205,198,192,199,220,197,201,187,197,212,194,219,206,220,219,199,193,206,169,192,194,191,203,197,206,196,174,216,202,212,199,198,203,225,203,181,181,209,193,207,200,215,209,198,200,198,210,208,228,198,205,198,198,203,157,216,225,204,221,182,176,202,218,217,210,195,203,214,211,201,204,207,196,204,222,229,205,217,203,214,195,200,187,211,201,220,179,174,202,216,214,197,203,212,220,200,207,222,202,218,220,208,212,202,179,226,197,201,216,214,210,222,210,204,203,214,221,215,190,212,195,199,196,212,212,235,217,211,208,227,219,206,195,206,195,208,195,215,220,183,192,229,199,220,224,195,215,204,231,217,221,201,208,209,218,190,224,211,222,203,187,189,209,216,197,207,206,207,205,215,231,204,196,192,213,215,219,234,198,207,221,225,194,228,206,225,198,191,213,232,216,235,205,221,194,237,169,136,179,205,199,210,207,210,210,219,193,198,191,207,229,196,182,188,196,222,212,218,183,186,220,204,210,187,184,192,193,195,237,192,199,222,190,221,218,195,205,202,219,198,230,222,197,207,210,195,205,170,213,201,191,210,199,193,221,205,148,197,217,164,190,138,147,113,95,166,166,161,178,200,155,114,42,34,31,90,167,191,244,194,173,195,169,99,137,80,162,223,198,154,182,213,209,129,88,202,177,143,105,88,78,52,68,84,173,199,176,175,221,179,197,242,211,223,209,228,185,115,188,151,48,124,96,96,148,220,145,36,62,67,42,26,11,16,32,31,12,27,18,37,23,21,34,52,13,33,37,14,28,15,29,33,12,34,46,29,65,61,65,67,67,49,76,38,7,54,144,171,170,108,106,130,128,104,113,80,125,150,179,156,116,47,62,113,129,95,126,109,118,84,82,81,23,74,106,173,206,152,60,56,164,144,94,0,33,151,116,99,99,101,78,129,143,165,179,191,188,27,0,12,28,24,16,10,31,27,32,35,16,32,24,23,30,14,4,23,45,58,173,132,106,97,78,79,103,111,58,15,17,16,13,3,22,7,29,27,1,24,26,21,39,33,38,36,41,24,60,84,133,57,42,53,23,58,43,61,11,42,24,30,29,38,20,48,47,63,17,38,37,16,23,42,74,80,104,93,107,96,68,102,51,42,19,27,31,26,32,51,48,35,58,22,17,24,48,148,127,77,62,71,36,85,45,51,34,24,30,51,31,23,28,36,17,19,22,11,49,0,41,31,63,45,32,41,25,171,183,162,194,189,184,178,141,86,27,24,48,5,0,22,18,30,33,27,15,22,18,9,10,3,4,41,18,27,15,45,45,24,57,34,10,29,48,83,108,103,159,252,247,237,158,93,59,4,21,54,85,99,108,90,117,94,109,124,135,124,45,144,199,218,239,216,226,231,215,242,218,201,204,209,230,231,208,204,234,200,218,215,190,241,210,189,218,218,211,222,227,196,245,226,215,221,216,232,224,226,241,225,97,15,20,5,16,10,6,3,14,13,17,9,12,4,8,24,5,6,2,6,12,23,29,14,30,223,197,193,208,194,198,187,205,203,200,213,192,204,198,179,197,190,208,199,192,216,193,181,201,206,195,222,184,192,206,213,214,208,180,201,198,193,218,194,195,189,188,201,218,212,198,173,202,200,207,210,202,194,184,199,203,184,187,194,213,198,191,188,203,186,213,216,198,211,196,183,181,193,217,197,201,210,177,214,204,200,190,203,183,206,182,188,211,191,207,205,197,176,216,201,203,200,203,166,204,189,213,194,207,201,169,194,217,220,192,193,195,203,200,204,216,196,205,218,212,207,202,225,195,211,208,216,206,218,199,226,204,212,204,206,220,235,216,210,222,212,234,179,207,202,193,206,207,216,190,192,181,167,209,207,194,210,195,206,206,204,209,238,231,198,210,210,181,188,191,214,209,201,225,209,202,208,209,214,210,225,203,213,221,207,218,213,201,214,217,210,211,199,206,219,226,227,202,221,217,213,210,194,194,217,185,244,208,176,218,222,202,219,226,206,201,199,195,196,192,198,187,211,202,216,203,207,189,216,218,195,190,210,214,201,196,189,211,192,221,201,223,210,172,118,162,186,181,220,209,207,208,202,223,212,218,187,203,211,201,193,194,202,230,184,223,193,205,221,212,220,211,193,196,213,206,228,212,207,198,197,210,226,216,182,224,230,194,204,196,195,210,215,197,193,215,206,189,191,220,205,237,188,141,203,203,163,164,97,133,90,48,146,130,156,183,134,128,86,57,35,7,96,180,198,214,213,196,192,179,153,158,85,109,239,194,176,134,173,202,128,139,212,163,121,106,114,106,59,56,61,179,180,182,194,236,182,201,222,196,222,219,224,181,130,218,134,71,108,75,89,180,229,168,73,92,26,5,20,10,1,28,23,22,22,30,47,35,25,21,45,33,19,14,31,43,34,16,23,26,55,25,39,56,57,69,43,31,80,51,23,1,92,106,94,121,49,59,133,99,92,89,127,93,87,100,191,153,77,99,94,107,121,138,143,128,104,52,43,26,51,49,82,101,47,15,70,179,107,54,27,79,162,94,117,112,110,150,166,183,184,179,189,163,30,23,5,34,35,14,19,33,7,17,34,33,22,15,19,20,28,26,15,33,74,103,56,56,48,45,57,57,86,21,9,5,4,26,12,20,21,53,30,31,42,19,23,21,17,39,30,25,33,40,99,81,66,69,53,80,56,93,60,39,48,42,24,42,36,37,40,70,62,34,54,42,44,38,64,60,94,62,46,57,20,30,47,34,13,24,15,29,19,42,40,39,22,34,13,46,21,58,123,99,54,38,51,63,48,31,52,34,5,32,29,18,37,49,26,8,22,1,25,11,18,41,12,6,15,2,27,48,140,142,157,166,194,176,183,193,160,41,15,14,24,4,35,27,17,37,32,25,25,4,30,35,21,23,12,19,30,41,67,72,76,73,67,68,35,32,101,114,134,184,251,244,216,122,55,53,5,69,107,83,80,41,28,54,106,78,114,105,120,67,70,206,228,229,235,225,231,226,217,219,228,208,232,218,198,242,208,201,214,200,208,215,232,217,216,217,212,234,233,213,222,191,209,207,208,190,197,232,217,223,215,131,14,11,7,13,29,7,4,17,7,11,17,14,0,1,0,1,12,2,17,0,21,10,6,14,192,198,175,213,213,196,206,198,204,197,205,183,216,228,194,186,180,208,195,212,201,187,165,199,201,202,238,209,199,210,208,189,194,175,196,204,209,184,188,203,206,195,211,186,199,226,185,197,197,210,202,210,209,207,202,182,200,196,188,202,206,170,196,235,204,197,189,201,215,212,191,206,197,205,233,224,219,224,193,199,193,191,197,192,214,212,200,197,213,190,193,227,184,201,221,212,206,171,198,190,192,214,211,199,209,199,193,216,211,225,216,194,217,223,217,200,193,213,197,221,179,208,202,206,215,212,219,215,187,206,200,195,205,188,193,214,201,215,204,199,230,206,213,205,202,210,219,206,224,180,230,216,199,205,208,195,227,199,217,199,208,203,199,213,201,178,214,218,199,213,208,219,184,216,219,212,194,215,213,189,211,192,201,208,212,204,224,211,209,226,207,195,192,207,196,199,230,185,211,224,216,228,212,198,233,205,233,209,209,217,198,233,192,203,215,211,211,235,199,206,214,196,200,191,193,219,217,199,184,201,219,197,204,208,197,202,204,198,207,213,204,201,221,184,197,201,212,185,228,210,205,220,184,209,173,214,212,196,195,200,196,217,208,205,199,196,216,220,220,216,188,210,222,238,201,207,181,212,243,202,190,227,223,204,191,217,195,215,201,203,206,219,205,226,214,235,193,181,214,209,212,206,177,145,199,229,190,177,114,160,120,104,151,125,156,168,155,125,80,51,39,53,138,203,201,219,193,166,212,219,213,202,115,134,194,209,141,142,182,218,156,119,188,151,112,129,86,87,62,48,51,170,237,182,160,241,184,205,216,194,217,203,239,176,139,226,189,110,136,72,89,241,253,161,75,26,34,24,12,14,5,12,28,37,41,21,22,14,15,56,2,25,44,20,10,16,29,31,24,37,53,36,71,68,88,85,64,52,49,15,26,32,87,85,64,96,49,102,57,98,95,78,108,106,74,71,154,190,154,87,72,95,72,102,120,90,95,39,22,62,21,42,3,26,38,45,118,129,59,23,7,76,139,81,65,68,99,137,121,124,134,131,150,111,28,29,0,8,19,11,5,30,32,19,33,41,9,44,50,52,30,34,20,38,100,129,76,45,42,40,47,38,44,26,34,36,30,8,7,11,28,25,39,49,15,57,30,60,62,39,75,62,66,77,164,174,114,144,121,173,142,158,133,152,115,157,158,138,165,173,177,180,163,197,156,190,157,180,166,171,146,118,52,49,34,19,36,59,26,50,59,62,58,48,50,41,37,26,54,38,35,62,110,90,102,58,44,52,76,45,69,28,27,20,3,36,20,17,39,32,13,21,23,4,7,28,31,8,30,30,28,50,122,125,104,139,107,147,126,184,123,54,15,36,0,19,14,25,30,16,42,11,20,12,36,20,56,21,35,10,8,74,156,185,177,110,110,131,76,137,121,132,100,178,235,217,108,35,45,29,73,130,135,71,57,64,46,79,92,79,80,79,77,74,108,207,240,244,224,219,220,227,230,199,212,204,212,216,203,204,211,199,223,224,222,212,207,199,227,200,215,204,228,205,202,223,217,219,211,234,212,242,206,220,229,130,6,5,8,23,4,17,16,17,1,16,19,16,11,12,4,2,2,0,18,1,10,4,24,10,204,191,207,185,213,183,224,183,171,185,191,230,221,206,199,197,188,193,197,181,194,203,204,192,192,210,194,198,194,214,216,190,182,185,204,190,206,188,207,178,205,193,217,209,207,196,211,205,198,213,199,176,179,186,205,184,213,223,207,202,168,199,223,210,205,200,193,176,191,193,202,184,199,210,205,201,207,211,197,192,170,214,220,210,211,216,193,207,200,194,184,201,193,212,223,230,213,206,205,197,168,219,226,200,179,202,209,211,225,218,202,206,201,213,204,198,208,212,209,195,207,200,199,195,208,221,234,207,221,209,224,203,209,230,183,204,227,212,228,186,211,195,209,192,232,209,214,212,210,201,218,197,218,216,215,215,202,215,191,234,231,188,188,202,204,210,213,187,221,211,201,197,203,204,224,222,225,215,198,209,206,198,231,226,192,212,214,216,202,193,217,197,205,207,208,199,196,211,213,211,211,228,224,218,242,211,220,221,198,224,195,212,203,208,192,213,209,191,234,200,189,217,222,193,208,207,217,185,198,210,195,209,208,208,204,198,201,231,213,194,204,221,231,216,233,216,228,223,219,214,229,205,184,199,184,237,185,213,217,200,212,207,208,182,213,199,215,194,213,217,193,220,202,208,204,191,205,209,201,215,193,210,217,211,205,196,203,204,211,199,200,190,226,222,196,202,214,217,197,221,222,218,172,193,230,187,149,200,151,211,161,123,205,192,207,203,197,148,80,77,68,61,134,211,186,212,176,192,203,207,225,210,103,133,142,143,200,180,216,208,146,145,242,145,99,117,99,109,55,16,25,102,166,131,175,212,163,185,228,213,224,212,223,144,189,243,175,114,115,132,158,254,228,131,52,30,10,12,12,12,11,21,23,25,12,22,9,8,23,25,0,33,14,12,11,40,42,3,36,48,63,50,77,75,47,74,54,50,11,20,12,52,120,41,65,119,69,101,80,100,99,108,114,123,101,78,142,211,195,144,80,70,66,95,90,88,86,62,31,19,39,6,28,10,37,128,142,69,16,38,10,52,125,79,100,72,54,69,27,56,35,38,73,22,26,32,11,13,32,49,25,34,29,41,43,10,38,56,31,25,31,24,24,23,104,93,51,70,61,38,39,54,65,23,44,39,41,23,39,77,67,80,64,109,105,137,144,188,166,194,215,218,214,195,199,180,195,174,153,173,137,151,169,162,201,206,169,188,175,187,158,164,176,186,137,178,167,183,156,149,138,106,94,87,83,110,108,84,100,112,182,163,183,145,129,103,120,89,70,60,58,91,148,189,140,132,138,113,122,118,79,37,24,26,24,19,39,37,18,56,36,11,33,13,25,42,39,1,23,27,9,47,138,128,63,50,60,54,108,95,68,52,14,13,20,32,27,39,9,27,24,39,13,16,22,11,29,16,15,30,55,88,193,205,228,204,226,212,167,190,156,99,82,161,198,75,24,27,25,46,132,156,61,62,118,122,89,89,65,86,65,53,88,76,97,228,229,229,255,231,229,236,216,208,219,192,227,218,219,205,213,211,214,229,235,222,228,222,205,209,214,203,219,220,203,194,216,222,214,208,243,210,219,213,218,113,11,35,6,12,24,22,13,12,30,27,12,26,14,13,7,27,4,1,24,6,10,46,4,3,189,202,190,195,209,202,187,195,186,208,202,198,168,190,194,212,219,207,203,192,184,195,190,217,201,191,195,187,197,173,200,216,196,205,207,195,188,184,177,204,185,178,195,230,222,205,193,195,199,201,208,185,218,221,199,187,189,220,200,176,189,198,202,210,187,198,191,194,183,221,202,224,197,201,205,219,203,221,217,184,203,207,189,198,194,198,203,181,218,176,215,208,207,209,197,196,206,184,195,193,194,203,195,205,212,193,196,214,208,189,190,204,191,185,214,207,169,193,210,204,197,211,200,208,179,200,183,191,195,210,179,196,185,206,211,230,205,215,218,192,204,202,213,221,206,203,180,228,213,193,182,213,202,217,190,216,221,209,182,221,208,215,198,221,215,202,200,205,200,204,201,222,205,211,199,203,214,202,204,204,195,213,207,225,215,200,181,213,196,208,221,231,211,215,220,201,203,216,213,218,198,182,212,204,203,197,205,229,216,201,228,208,224,210,196,207,201,209,214,211,237,177,216,216,193,202,221,210,203,194,223,208,230,215,203,198,213,195,207,220,193,230,209,214,222,203,232,200,196,199,189,200,207,204,218,213,209,225,222,201,205,195,192,224,189,205,219,179,185,209,227,220,216,203,198,200,220,220,195,224,201,225,209,216,220,182,185,191,220,218,195,199,217,200,219,196,246,197,203,215,217,210,165,183,218,217,173,207,192,207,157,156,181,178,204,167,214,195,93,42,38,29,152,252,172,227,177,202,200,210,242,179,130,139,161,157,168,207,159,163,153,165,238,131,105,75,72,88,80,74,7,111,192,174,188,223,168,217,233,216,216,227,197,139,173,231,200,127,111,69,150,212,135,75,39,7,13,45,30,20,60,5,2,6,10,28,17,8,18,12,9,21,10,22,28,20,27,35,48,62,26,54,55,97,68,65,27,41,23,23,32,80,80,15,83,127,84,81,113,129,108,114,107,99,129,110,122,148,180,148,154,88,76,89,95,60,98,43,27,19,64,33,60,65,103,164,105,30,12,25,0,88,112,89,110,48,53,67,56,46,35,59,59,22,17,23,36,12,26,60,37,41,37,49,38,41,23,50,53,35,38,41,30,62,124,144,83,126,53,80,118,105,91,69,91,134,100,168,192,202,202,224,218,211,198,177,202,202,210,193,174,188,191,141,131,77,123,85,58,70,72,59,61,62,71,96,100,65,67,59,49,79,44,63,90,72,62,58,71,72,38,47,96,68,77,76,106,86,120,128,143,144,153,155,185,185,195,200,186,160,175,119,203,186,182,163,145,135,142,166,139,54,36,58,33,34,16,35,28,11,39,38,40,20,40,40,19,47,25,42,21,102,93,97,45,44,42,71,46,64,63,34,12,27,16,24,30,25,22,24,21,61,27,38,39,28,13,27,25,24,18,79,151,182,190,191,203,179,190,194,135,121,87,153,140,21,40,39,40,76,134,130,46,65,140,105,129,123,84,103,31,41,63,38,43,171,209,205,227,235,236,233,206,175,220,226,215,182,181,217,217,205,215,200,214,217,213,208,219,220,197,197,214,206,223,216,232,221,222,229,221,182,230,227,216,119,19,0,9,14,15,6,12,21,43,2,6,13,13,14,24,7,7,19,10,1,1,10,24,6,204,187,235,198,191,201,185,188,213,192,199,219,202,192,196,187,204,208,174,173,196,229,196,202,206,203,200,221,202,206,226,215,169,194,192,209,210,196,179,178,192,208,202,190,198,199,207,185,206,195,197,213,210,192,190,194,196,211,210,215,204,188,212,191,194,202,213,181,197,187,204,216,192,203,211,184,212,210,210,193,220,211,213,226,205,185,210,202,208,186,200,198,182,194,224,203,200,183,200,215,189,199,190,198,193,201,216,213,221,199,197,216,197,223,213,164,194,210,197,204,198,197,220,209,206,200,177,202,235,196,212,206,220,209,213,211,205,201,218,191,188,201,201,208,193,207,206,201,188,221,203,196,197,229,220,180,197,191,223,192,187,189,199,203,225,220,187,208,229,218,206,207,223,216,213,200,202,194,222,229,224,203,196,235,225,208,187,221,213,181,225,197,201,196,215,204,216,204,218,217,212,204,216,223,219,209,198,203,208,174,190,198,184,204,176,203,221,218,209,223,200,237,208,224,209,217,210,211,202,220,231,219,221,196,241,204,188,211,223,211,221,176,207,193,219,215,232,180,217,222,210,213,199,211,185,192,187,216,198,195,204,190,236,195,195,218,201,225,204,211,204,198,181,213,212,218,214,200,233,199,200,198,239,210,212,181,213,187,213,231,203,188,206,178,218,200,214,199,206,214,199,209,172,192,229,203,191,200,191,163,145,123,146,125,151,173,217,171,102,75,25,45,125,241,218,195,170,198,207,192,181,202,196,160,182,207,172,187,142,186,135,177,236,152,98,60,93,113,99,46,50,188,213,206,205,208,154,223,223,202,202,214,153,125,163,213,175,114,100,69,66,90,67,36,8,19,17,17,41,34,17,25,41,13,26,27,8,16,13,39,15,5,18,29,26,18,18,71,17,28,66,85,64,70,75,49,36,19,53,78,71,155,103,63,122,130,111,114,103,133,119,130,98,119,110,112,110,143,132,147,109,72,68,74,66,37,60,54,46,33,34,41,101,140,114,44,7,23,33,14,30,85,116,74,120,50,73,41,38,34,50,66,76,57,12,16,35,20,25,33,32,54,51,50,20,67,84,64,84,72,68,82,96,144,179,172,172,201,168,200,173,179,183,161,176,182,176,196,199,169,163,149,152,121,113,113,69,63,91,67,56,64,68,71,54,72,60,41,60,75,44,63,77,105,79,90,70,79,73,72,98,82,87,83,41,84,75,54,72,65,66,74,94,62,52,72,56,44,60,58,65,59,83,70,63,96,82,92,95,123,127,127,122,123,92,100,105,111,124,158,154,110,117,138,143,86,67,82,59,58,58,41,44,18,68,44,36,40,34,36,22,113,122,104,63,50,47,42,54,30,57,41,37,17,26,6,18,25,27,6,25,39,37,6,14,5,20,35,47,16,10,46,134,143,123,136,121,154,167,121,121,105,85,127,102,33,40,32,3,80,151,70,58,55,70,79,113,109,56,68,66,65,79,40,20,117,112,138,138,164,202,237,226,195,196,214,213,227,215,213,191,195,235,218,220,221,199,209,210,220,197,208,237,223,237,222,215,206,220,189,223,214,221,220,229,119,30,6,1,9,27,10,16,5,54,22,0,25,9,2,10,11,27,13,25,4,13,6,2,13,218,217,191,197,204,185,208,226,174,217,204,205,185,199,205,189,197,180,186,202,199,195,206,198,205,207,191,190,204,203,215,213,212,235,213,184,205,192,219,189,211,191,233,215,180,210,195,225,194,233,220,200,209,202,188,199,226,201,189,176,204,172,210,205,219,202,226,203,187,193,210,206,220,196,219,195,192,220,211,192,207,196,189,217,179,212,196,191,202,215,196,181,202,190,209,184,175,195,202,202,202,213,195,196,206,207,225,192,187,199,199,202,212,180,197,202,214,206,197,214,207,215,182,226,195,170,192,204,212,200,205,221,198,201,201,192,190,219,182,199,187,211,205,251,209,186,219,235,214,198,192,214,205,210,207,209,202,205,216,199,228,201,189,192,184,184,163,198,212,203,191,193,198,220,235,209,212,194,202,211,203,190,208,192,207,223,226,218,209,197,230,211,206,218,214,200,206,203,210,192,195,213,217,206,210,197,194,196,218,185,238,207,202,207,205,198,215,200,232,199,220,197,208,175,211,193,225,205,213,206,212,185,205,177,206,186,202,219,231,188,221,195,195,207,209,202,195,209,204,188,214,203,211,192,206,205,211,200,193,229,189,227,197,175,194,205,231,233,196,200,202,207,207,196,166,211,235,205,210,195,165,214,190,210,217,191,209,181,200,190,196,203,207,208,206,206,212,213,196,203,199,192,163,202,220,182,149,169,144,116,102,131,139,151,183,182,235,201,99,31,7,27,140,198,216,208,194,225,199,214,194,227,233,175,185,194,170,174,181,182,138,141,192,126,108,88,84,88,72,66,56,160,212,184,232,198,155,197,186,220,186,216,115,136,223,228,164,84,61,60,100,60,24,14,8,12,47,30,28,15,41,16,32,25,23,25,21,21,20,36,10,11,23,13,32,34,36,43,40,54,71,55,83,64,78,36,47,68,116,141,149,135,140,129,126,112,119,101,81,118,121,130,115,119,77,56,88,64,86,98,73,41,32,42,39,18,14,13,21,87,89,150,143,55,52,5,5,43,15,5,40,100,85,92,135,48,52,34,58,78,111,108,97,29,47,48,65,44,72,71,136,126,156,145,191,178,208,219,193,187,204,178,168,160,171,157,128,137,133,116,128,142,120,95,51,76,117,91,35,57,68,66,67,80,64,73,72,67,48,52,97,68,90,37,52,55,67,58,44,68,24,38,70,66,63,60,34,56,38,53,35,31,48,51,7,29,35,33,40,57,66,21,44,64,51,42,53,68,54,38,46,50,78,43,52,69,65,62,56,52,29,40,63,73,82,60,49,64,79,106,99,94,148,195,171,166,143,148,145,149,135,132,74,77,75,93,67,57,66,48,39,98,161,143,109,93,85,84,54,86,70,28,26,10,39,52,21,45,35,29,38,31,32,42,47,38,35,34,11,32,31,67,107,74,35,46,58,58,49,59,65,114,94,110,106,27,30,14,20,77,159,130,82,68,59,27,112,60,57,64,45,72,87,83,54,79,69,23,57,66,173,223,207,227,211,194,220,226,222,191,207,202,205,220,221,218,208,230,182,235,198,197,209,217,213,234,200,209,227,181,231,214,203,223,226,130,1,12,15,1,8,34,15,36,12,22,12,32,1,2,16,4,47,7,24,20,2,16,32,5,204,215,186,196,171,202,193,178,200,194,199,195,197,187,207,210,201,205,220,209,202,193,198,194,202,202,189,176,207,204,205,206,193,205,208,202,191,189,213,210,213,169,207,194,194,207,177,186,217,201,231,216,192,190,196,201,188,196,222,194,183,227,198,193,176,215,214,203,177,196,210,172,204,210,184,190,194,191,188,178,207,224,194,184,194,202,198,205,188,201,216,198,180,205,193,209,204,211,223,207,208,210,206,190,200,193,222,187,190,213,232,180,184,199,179,196,206,195,203,195,227,215,189,202,195,192,201,198,230,201,203,201,206,203,204,192,205,204,216,219,207,202,207,210,185,219,204,226,217,202,199,196,213,174,211,200,200,198,193,213,201,222,210,191,221,181,181,221,197,202,218,189,219,212,195,225,209,221,190,207,207,198,195,202,192,189,205,206,228,198,197,213,210,181,220,213,234,183,190,220,203,199,202,203,179,190,227,190,225,203,214,196,215,209,204,205,217,191,205,230,205,219,194,206,211,195,204,206,194,206,209,219,189,207,204,194,202,213,200,211,208,204,199,208,218,198,205,204,186,223,223,199,188,206,211,214,220,215,198,195,223,175,209,176,213,190,202,213,201,223,220,187,200,200,203,198,203,218,202,208,198,204,212,204,219,226,207,200,202,205,188,210,180,191,248,185,200,203,210,196,204,189,154,223,185,125,115,121,171,117,87,173,196,180,205,241,246,187,107,52,57,50,143,246,247,204,203,220,250,247,174,121,153,111,128,148,181,189,177,193,69,83,162,145,119,102,98,81,42,43,82,137,181,191,224,183,157,231,190,199,201,212,131,197,215,213,226,88,68,51,57,40,6,9,13,7,26,10,31,31,29,17,14,34,22,27,28,23,22,11,20,11,13,33,50,42,27,43,66,85,76,79,117,145,138,121,176,127,169,159,136,142,130,115,88,91,85,89,65,62,95,126,119,100,38,42,50,17,49,70,60,32,41,18,25,17,52,46,54,68,87,78,11,23,3,30,33,12,48,15,61,132,91,90,108,80,95,117,162,160,199,191,188,143,180,194,218,215,199,192,215,171,184,167,188,184,178,162,154,100,86,97,72,57,92,76,62,63,58,27,61,70,63,87,59,81,101,56,87,90,86,75,56,73,66,48,29,57,41,47,69,45,62,35,39,24,8,30,52,30,17,13,14,11,31,23,28,3,40,16,11,18,9,9,16,14,46,24,20,35,27,23,11,19,43,9,25,9,23,37,22,44,37,24,25,48,36,67,68,79,59,72,68,55,90,80,72,66,41,76,63,57,61,63,85,60,83,114,119,147,170,138,141,152,162,160,139,127,128,109,132,165,188,199,177,145,151,161,153,147,95,24,40,35,27,13,40,20,51,47,51,13,51,34,19,20,52,38,36,34,12,71,102,46,48,31,44,44,59,41,36,53,82,124,94,49,36,24,24,59,107,115,99,77,35,19,26,41,58,54,62,60,77,68,59,66,54,43,52,47,106,203,236,231,224,227,227,215,242,199,209,208,216,222,216,205,233,209,224,203,212,220,206,214,233,194,206,217,211,218,242,211,230,210,240,116,12,2,5,21,3,33,5,3,22,18,23,25,4,9,4,8,12,0,4,12,7,8,10,11,199,187,203,202,193,172,185,191,181,184,236,194,202,188,235,186,183,221,200,185,186,186,195,183,202,218,199,184,178,209,189,213,217,191,195,204,192,191,208,195,214,190,185,198,206,195,191,215,192,188,192,186,182,201,221,198,193,205,224,194,189,197,205,214,185,213,209,205,201,187,206,211,204,189,216,199,199,198,223,178,201,166,212,190,218,210,204,193,191,178,214,190,208,184,198,224,234,211,200,220,203,198,188,197,221,206,209,197,180,196,199,224,195,183,229,220,221,209,219,189,211,210,206,193,193,192,222,210,209,188,197,202,214,206,205,209,190,192,225,211,215,199,202,197,225,186,186,189,188,203,219,225,215,210,214,220,195,179,203,182,197,202,212,201,214,210,220,208,209,212,208,231,208,184,191,216,227,193,204,208,216,192,203,210,206,176,215,195,197,198,203,213,185,206,177,195,193,202,234,210,224,204,206,210,192,196,184,246,204,184,178,218,206,228,200,179,183,207,196,197,193,212,178,212,207,220,196,202,203,195,195,192,206,171,215,196,222,218,198,209,182,178,211,223,218,201,213,201,208,214,215,208,198,202,217,211,198,200,215,198,207,192,179,207,221,243,208,199,200,195,196,222,225,187,189,214,217,176,206,190,198,208,216,196,231,205,205,194,215,190,213,207,168,219,207,215,200,205,218,211,211,154,157,211,207,104,94,153,168,97,109,170,175,166,179,194,173,138,90,74,109,72,81,192,201,141,147,199,222,213,109,65,61,51,105,199,239,207,173,141,111,103,213,147,115,117,73,71,49,84,49,93,177,225,244,172,178,215,185,185,230,188,135,201,215,216,210,114,66,27,23,4,10,32,27,6,14,6,32,30,20,36,34,24,21,42,38,22,6,5,20,3,35,44,53,30,42,79,96,67,80,148,176,172,164,169,145,121,129,129,104,107,67,71,58,88,80,58,63,56,79,79,83,79,34,39,66,35,81,61,73,45,21,13,47,25,53,71,77,57,5,63,17,16,7,18,8,5,15,9,56,93,105,114,115,150,186,163,209,180,183,157,183,152,182,157,180,141,108,121,111,125,68,49,86,50,44,76,47,63,59,60,40,59,56,65,54,67,67,71,83,93,76,84,41,67,63,47,47,62,53,33,24,27,32,30,29,3,18,17,14,8,32,26,23,15,9,5,5,9,8,42,43,20,36,15,14,32,24,16,16,8,29,39,35,24,7,23,30,24,15,13,18,20,10,6,3,8,21,28,26,17,34,34,11,22,37,20,33,20,37,27,49,22,50,46,49,50,35,49,53,64,60,88,82,77,88,71,81,28,60,75,70,96,125,109,126,120,135,156,177,144,140,128,124,144,167,144,165,152,131,66,56,53,47,69,70,38,56,48,16,40,23,59,29,16,51,24,18,34,38,108,85,86,94,55,62,45,68,40,55,108,99,92,109,50,23,21,11,20,68,105,118,83,48,54,44,23,58,35,73,36,50,62,61,70,72,59,61,55,100,232,246,233,252,232,243,217,229,222,230,200,203,239,212,203,214,229,207,224,208,199,206,243,204,208,199,205,215,187,218,208,235,215,215,120,14,15,9,7,26,9,15,9,19,8,26,29,4,22,25,21,8,13,0,9,10,12,27,24,192,186,212,215,208,200,184,206,190,182,203,180,182,160,221,196,208,204,215,205,201,222,196,181,203,203,197,201,176,183,186,197,187,203,190,205,179,219,223,182,178,196,189,185,198,212,188,196,197,168,176,200,200,206,203,202,203,202,199,199,189,204,208,200,212,189,219,230,188,191,193,183,202,193,234,200,179,205,185,211,221,212,211,202,189,195,202,192,232,193,211,181,178,234,202,186,212,210,200,206,196,181,193,218,201,193,202,187,209,194,197,218,200,220,196,187,176,197,192,200,192,215,194,209,193,181,213,219,189,213,208,190,167,221,185,180,217,207,189,209,216,200,175,231,233,207,203,205,197,212,209,202,197,215,202,203,222,200,189,218,211,181,198,189,193,204,209,204,212,210,229,188,197,203,189,220,217,196,209,202,188,184,196,203,189,187,197,196,196,215,199,217,204,197,194,205,228,203,196,204,200,204,232,197,216,204,200,187,198,213,206,219,190,191,197,193,202,197,205,207,200,201,211,199,192,197,213,208,198,204,212,193,194,211,214,213,182,198,193,212,213,175,176,184,205,234,193,185,210,200,209,197,234,190,197,189,199,193,190,206,213,188,205,210,211,203,234,226,219,233,208,224,191,208,172,202,203,232,217,214,174,200,209,194,203,211,228,216,218,238,199,205,189,189,204,237,221,211,203,214,194,190,178,191,161,98,91,130,126,103,113,134,159,128,121,144,109,86,72,90,135,83,51,63,93,25,51,40,75,83,71,49,28,43,87,189,213,177,151,141,119,157,248,127,76,89,105,97,91,66,36,138,215,226,227,160,163,196,183,206,232,157,140,232,215,222,225,114,87,3,26,0,13,22,17,46,61,30,10,12,16,28,21,25,12,20,11,42,33,21,29,34,27,40,55,31,41,70,58,37,87,165,183,171,163,150,74,88,132,78,69,36,50,57,46,78,81,69,70,68,57,57,73,76,69,75,30,35,136,124,64,25,11,42,41,36,31,50,40,27,12,13,19,28,15,21,16,18,18,5,97,105,84,102,129,129,132,73,76,92,76,55,52,62,60,32,40,54,59,44,74,60,53,60,56,57,35,55,57,49,45,50,35,46,33,6,11,24,58,37,41,30,42,48,24,30,33,26,28,4,29,23,6,17,23,42,40,26,21,34,8,21,19,32,50,16,35,1,31,3,24,39,25,16,16,31,1,61,21,38,43,48,21,31,27,2,15,35,29,27,17,44,49,30,41,18,16,15,16,20,47,27,41,8,3,28,21,20,19,28,19,9,14,36,6,25,12,4,45,30,45,39,39,59,35,50,54,73,80,82,49,62,68,71,68,83,66,71,65,82,73,76,63,88,55,86,91,91,93,115,132,140,124,144,154,145,137,168,137,128,120,107,84,79,79,75,70,48,86,38,29,96,148,133,143,123,100,99,76,90,119,120,112,120,61,61,59,21,27,24,18,76,97,115,90,74,70,33,51,22,52,65,63,58,85,53,79,71,99,87,141,198,224,217,237,255,252,246,241,203,228,231,209,215,214,221,214,217,217,225,211,235,209,215,194,221,214,219,217,229,223,196,208,210,231,121,4,1,9,6,22,24,29,3,35,5,10,22,10,1,30,7,30,17,16,20,15,12,41,4,194,196,193,213,206,174,185,187,194,179,186,227,207,211,192,211,219,213,201,174,186,203,216,211,196,203,201,218,192,211,184,208,202,206,206,229,186,202,209,194,154,189,188,211,203,192,193,211,207,209,181,187,188,196,189,196,189,198,203,193,178,198,202,172,178,213,194,197,197,196,189,190,217,191,214,188,221,207,199,225,208,209,189,226,204,224,188,220,218,198,177,193,219,192,188,228,193,181,211,196,179,184,183,196,194,212,226,206,193,196,217,208,216,209,221,195,208,198,212,213,196,194,215,205,216,204,216,200,198,205,189,213,182,213,209,198,202,201,239,221,188,210,197,223,205,211,208,223,205,198,200,224,218,199,195,192,192,205,208,218,195,200,203,206,215,200,206,192,180,203,217,210,211,225,223,197,185,192,202,188,209,194,209,200,214,205,213,220,203,193,215,224,199,217,221,202,219,210,228,230,212,195,191,228,225,224,183,202,219,232,208,219,209,203,217,203,212,172,221,210,211,215,205,194,226,236,225,187,217,200,191,188,175,200,174,195,202,200,224,214,218,201,211,240,201,212,234,195,217,217,197,188,193,204,225,214,209,210,192,187,196,197,219,203,194,200,206,218,186,212,213,203,211,201,204,227,200,201,195,196,210,217,189,234,212,207,213,193,217,218,209,195,192,178,210,197,204,213,203,184,198,174,178,138,70,55,128,89,65,108,120,133,117,97,125,114,61,82,73,90,132,97,51,114,104,97,39,5,0,60,69,50,32,35,59,73,115,177,193,178,147,192,236,127,105,116,97,87,82,71,39,139,236,228,227,184,168,195,212,197,204,114,164,224,208,236,182,95,31,15,57,68,46,40,28,73,30,61,96,47,5,46,21,30,24,42,33,14,32,17,35,33,33,39,31,38,81,107,152,144,110,120,131,142,142,123,67,79,64,61,40,57,36,35,25,37,29,54,29,54,39,45,54,88,70,57,81,159,210,189,127,58,18,39,43,30,12,2,29,12,11,8,21,22,29,29,10,18,6,17,125,118,106,149,90,44,64,66,25,38,37,70,99,38,40,45,50,51,57,70,59,62,37,41,58,47,29,16,37,41,15,26,19,9,35,10,30,35,35,24,32,22,17,29,23,28,37,30,18,18,31,21,31,34,21,2,46,31,31,33,16,29,54,46,44,40,26,27,11,20,18,41,30,7,13,15,14,34,27,26,15,42,42,33,5,5,17,31,26,33,70,71,112,100,86,35,7,46,19,19,19,25,46,15,16,23,19,23,39,30,26,6,22,28,14,23,25,25,24,25,8,14,36,43,32,22,28,43,48,27,40,50,24,43,46,45,68,60,58,66,64,91,41,66,79,57,42,72,71,75,74,68,106,115,155,149,141,149,170,159,165,165,174,178,182,194,126,181,145,151,126,177,187,177,168,176,166,172,160,152,148,164,103,145,133,65,40,51,15,14,7,41,64,74,112,135,95,64,42,27,61,53,52,56,84,68,69,108,85,117,100,97,84,131,161,230,239,243,247,253,240,240,225,249,241,225,213,207,194,185,219,195,175,195,222,201,209,208,217,199,218,207,203,214,195,105,2,28,6,1,10,8,33,0,33,20,11,8,1,13,24,4,0,23,23,12,32,6,19,40,184,195,207,222,194,185,209,199,229,177,198,193,186,204,189,197,191,211,216,199,215,202,228,184,190,210,202,228,229,196,200,204,188,184,210,188,202,202,205,211,187,211,216,205,206,190,209,221,223,195,203,200,228,212,204,209,196,214,234,194,197,204,207,191,222,193,189,200,196,194,188,213,193,214,185,197,208,184,208,214,214,202,188,216,195,206,210,190,231,203,205,212,234,206,174,204,217,213,198,191,215,187,182,196,171,199,201,219,192,201,186,207,207,178,190,202,204,194,195,205,227,188,218,199,213,209,206,164,195,228,201,176,221,188,210,186,181,215,200,203,205,204,202,206,222,209,200,214,225,193,230,211,214,194,211,212,213,202,226,210,196,196,203,214,213,217,216,201,188,203,190,200,205,211,185,202,199,208,185,212,189,197,202,196,200,204,208,203,219,198,217,193,172,211,203,210,214,207,192,220,218,215,196,210,199,202,194,208,211,195,208,198,201,215,213,211,201,208,174,180,217,202,195,197,193,179,225,205,219,181,196,223,201,205,206,203,207,213,164,215,177,189,203,207,229,191,208,206,224,201,206,210,191,225,227,194,202,199,180,217,201,203,204,198,183,218,207,193,204,210,197,184,197,201,206,159,207,203,177,208,186,177,188,202,202,216,222,170,207,190,214,198,202,207,213,214,185,206,197,209,195,157,197,80,17,45,75,87,90,116,156,141,123,114,132,123,70,131,94,98,121,95,103,188,192,158,132,48,32,65,111,105,68,71,50,62,95,161,207,195,140,226,192,109,135,139,114,124,67,84,44,159,225,205,198,141,181,232,182,213,196,96,167,209,226,200,145,62,49,35,118,200,95,70,48,22,70,211,211,37,1,34,17,5,10,3,35,7,45,35,18,33,36,28,42,90,80,125,163,176,126,118,77,100,92,126,84,18,41,48,33,59,71,46,54,41,46,24,53,17,48,20,33,18,41,107,167,189,203,164,89,25,19,1,2,22,0,16,31,9,15,44,52,21,46,14,3,23,10,61,125,100,106,136,66,58,44,52,26,36,32,34,54,60,17,46,55,28,34,7,28,23,42,19,21,26,15,6,25,31,13,19,25,8,15,13,17,25,13,18,4,27,13,30,29,27,29,15,6,34,20,28,30,22,49,40,17,20,27,26,40,51,67,48,56,43,21,19,31,45,8,15,40,22,4,19,24,19,26,23,25,12,45,47,19,13,3,11,50,49,112,139,184,133,95,36,22,15,26,20,13,14,27,29,19,22,1,15,12,11,43,46,40,22,17,16,16,12,32,25,5,23,30,22,22,7,17,31,15,36,31,13,19,25,35,22,41,38,49,24,58,29,58,68,70,67,63,81,60,72,65,57,93,88,65,84,92,101,72,96,102,116,137,124,122,176,158,160,151,147,141,146,147,158,143,135,135,154,145,181,154,124,113,129,131,70,29,16,32,3,30,21,41,43,117,145,116,96,69,79,50,37,33,27,47,71,73,59,74,78,49,60,30,20,72,113,151,157,221,237,243,241,242,249,255,241,216,216,214,201,222,194,201,190,223,197,210,195,220,194,202,207,214,228,219,88,6,10,20,24,20,13,20,2,9,1,1,4,9,0,9,16,10,16,23,1,24,16,0,27,187,209,193,187,210,189,173,189,201,198,215,188,210,194,205,230,193,185,222,202,187,199,211,212,237,196,214,210,181,193,188,201,198,202,194,214,177,184,210,228,166,189,212,195,221,196,203,191,210,198,226,206,194,212,214,197,209,179,198,199,180,192,189,214,195,196,199,197,189,208,166,180,207,199,216,214,193,198,206,221,214,203,201,198,196,201,210,182,196,186,212,183,221,215,189,218,205,180,199,186,188,216,218,205,201,218,217,188,223,198,214,221,203,222,203,186,187,205,183,197,208,199,190,219,196,219,190,195,190,208,192,205,202,207,192,222,200,233,188,182,215,219,218,204,213,204,233,208,207,208,195,188,202,211,177,192,218,206,212,227,188,214,197,196,202,218,203,200,197,211,182,213,216,199,212,188,197,180,191,182,195,217,179,219,204,217,203,189,200,214,214,209,199,189,231,183,200,180,193,209,221,186,224,200,206,202,202,210,217,207,182,208,199,219,202,202,202,217,207,197,204,199,184,176,196,199,198,195,198,224,215,170,193,211,181,223,200,212,227,198,222,222,201,170,199,208,187,181,195,190,185,195,219,222,205,193,213,209,201,219,184,191,181,208,212,219,197,200,183,217,177,187,197,210,194,184,208,220,198,212,202,199,181,205,208,221,192,196,189,217,209,211,221,190,196,207,205,202,187,209,210,178,221,117,30,32,76,124,102,126,137,118,89,111,102,60,76,42,59,77,107,112,60,93,130,96,76,58,69,15,38,73,104,73,90,92,114,155,213,174,103,215,201,114,103,66,99,112,58,60,42,170,235,208,211,159,197,218,197,253,153,95,208,183,231,207,113,52,45,70,206,244,89,43,61,64,164,249,248,76,9,15,12,35,7,34,22,31,18,11,71,38,18,48,87,54,101,62,83,106,138,161,88,90,70,84,47,39,12,43,65,65,78,42,58,57,46,42,50,33,40,29,29,64,51,135,154,132,78,13,32,8,27,10,14,2,18,28,26,54,53,59,51,61,38,44,32,16,18,79,148,100,158,126,70,25,45,37,12,17,18,46,12,55,24,10,19,24,21,19,30,20,15,1,28,16,27,29,12,17,18,17,25,22,21,17,13,6,11,12,9,41,6,7,25,38,29,10,19,4,14,25,34,38,30,9,17,29,32,90,95,116,106,83,89,41,41,22,16,48,28,42,39,43,26,33,25,8,21,38,12,22,10,26,51,28,42,19,44,155,149,127,96,94,77,37,24,16,22,34,11,36,18,9,33,8,2,39,42,9,16,49,14,33,27,11,11,16,19,22,15,21,50,40,6,22,37,19,3,18,27,17,23,33,19,41,25,10,28,18,6,36,26,31,40,36,48,57,42,66,48,66,35,52,94,102,71,110,92,67,62,71,57,58,80,51,87,65,75,77,70,72,67,77,69,123,93,89,93,84,106,84,76,135,137,83,25,9,12,2,20,7,1,29,27,85,96,98,113,88,61,54,39,55,38,56,65,42,68,42,74,74,68,48,28,39,45,69,84,134,160,178,178,168,217,215,252,245,232,233,219,220,230,174,239,224,229,204,204,198,221,226,209,219,223,109,0,5,21,33,21,17,26,11,38,16,5,11,7,9,3,30,3,2,8,4,28,2,7,37,225,218,180,195,202,198,217,183,186,201,205,181,158,206,195,190,204,185,204,213,207,204,198,191,186,200,194,183,190,182,199,184,200,189,199,211,194,204,204,230,178,216,228,190,222,200,197,190,222,188,199,226,210,216,209,197,213,175,205,206,198,221,195,187,199,199,202,207,202,197,210,194,193,200,181,198,221,197,213,197,208,185,163,210,200,200,193,182,192,219,200,205,205,200,191,208,184,195,205,185,209,202,211,197,209,215,209,187,209,203,214,220,210,191,207,185,217,219,181,220,211,200,203,208,208,203,189,196,205,209,217,205,200,199,204,183,245,207,187,209,204,196,209,207,169,192,206,220,214,199,211,173,217,194,209,210,204,203,210,203,205,213,207,202,181,203,210,214,184,189,203,204,207,224,201,200,206,188,213,219,193,225,192,183,208,205,183,210,208,186,226,191,209,172,209,233,224,216,204,185,201,211,197,206,193,195,217,196,213,198,208,200,218,203,220,191,187,224,202,197,188,216,206,197,229,195,198,207,185,183,207,189,201,206,208,198,205,200,190,222,190,197,170,203,203,193,186,184,206,200,210,194,200,212,211,211,199,224,203,198,202,222,204,206,196,192,202,204,210,202,201,201,215,209,208,205,185,178,215,205,196,224,193,204,196,204,215,197,220,214,212,233,176,203,203,181,194,192,215,217,168,202,253,132,82,72,110,137,121,86,108,110,68,31,6,11,43,62,45,32,131,120,7,62,75,133,111,48,39,2,48,73,57,76,28,44,111,114,198,92,123,220,187,122,90,67,84,85,59,77,30,125,237,209,199,148,190,192,189,227,107,93,207,221,238,149,76,49,23,40,123,140,37,43,45,131,230,251,247,92,62,29,17,27,2,47,22,47,22,19,32,63,57,62,79,107,111,55,49,58,114,142,77,68,78,80,100,62,41,46,43,50,68,39,51,43,30,50,43,27,50,33,59,143,146,162,136,66,34,12,19,7,5,32,28,11,58,48,64,36,73,38,57,43,12,14,26,32,32,105,128,99,137,121,43,15,28,16,14,28,40,16,28,26,16,25,21,16,32,49,18,33,17,20,14,10,38,19,14,30,19,28,39,19,30,13,14,24,9,48,48,8,46,14,26,0,1,23,30,4,18,13,33,49,50,36,20,21,31,135,158,181,130,107,116,79,52,18,31,17,30,42,20,5,27,48,30,32,8,8,15,21,42,17,43,33,36,17,81,157,155,158,118,103,95,70,68,39,19,22,11,20,15,11,23,19,1,0,20,5,2,11,28,14,56,35,18,11,47,61,40,52,77,58,33,42,37,20,6,9,25,44,40,35,28,47,27,40,35,12,26,14,36,25,29,47,5,18,28,45,22,34,3,48,49,34,48,76,68,52,33,60,70,70,75,42,86,57,57,75,51,94,67,78,63,69,63,84,60,73,71,43,92,96,113,93,39,31,28,8,31,15,42,24,33,35,62,60,120,130,82,53,76,53,61,23,59,51,40,52,15,40,50,54,48,70,74,46,47,50,67,85,84,117,138,128,220,228,244,232,224,234,219,205,217,224,219,208,216,220,230,227,231,208,245,106,15,4,26,25,33,16,21,21,2,5,12,5,12,0,8,13,11,16,14,2,33,17,26,12,219,158,192,210,190,208,219,188,173,203,204,202,188,201,206,212,211,167,176,204,218,210,175,188,231,209,177,207,195,207,201,207,202,195,196,201,226,197,200,201,223,202,205,203,224,194,211,214,205,220,225,219,197,215,187,207,192,178,226,208,211,211,191,226,207,211,194,198,202,197,183,197,220,200,187,189,179,197,213,197,199,201,194,199,200,200,197,189,206,212,209,208,212,206,213,207,198,219,187,216,196,187,205,170,214,211,203,195,189,202,215,195,194,201,214,200,206,186,200,214,199,205,177,203,195,203,180,196,192,196,215,187,212,192,191,223,223,198,212,197,209,156,198,192,202,210,180,207,195,208,207,222,210,207,209,212,202,209,190,219,202,199,230,197,223,205,192,213,178,203,189,204,203,175,221,196,190,212,198,205,192,199,204,199,212,196,210,203,193,201,211,185,192,197,219,199,203,212,180,196,220,230,191,218,189,198,190,206,210,207,184,203,207,166,192,208,203,215,205,214,191,192,192,192,199,210,213,188,203,209,204,180,207,185,193,199,200,205,193,204,197,192,205,205,209,208,213,213,205,195,198,191,188,198,199,182,194,210,187,191,181,216,202,200,206,207,195,219,169,180,199,202,195,203,196,216,192,200,182,197,168,185,210,192,184,189,207,217,191,185,192,219,209,206,202,171,201,210,209,188,156,194,235,182,117,94,56,92,60,85,78,104,41,36,40,38,24,18,43,61,106,88,59,81,136,148,114,52,29,16,23,59,36,14,37,46,105,168,192,119,131,162,161,115,96,62,89,68,40,52,10,146,242,237,186,124,214,238,214,206,66,107,216,211,167,65,47,41,16,40,42,48,35,47,142,209,247,218,163,189,224,192,116,23,3,29,28,21,34,57,42,60,79,94,79,78,79,92,84,103,76,137,107,56,32,46,55,56,72,41,41,46,41,40,27,53,70,41,37,43,35,114,158,185,122,84,81,26,18,13,13,5,39,35,38,76,75,65,30,47,75,63,56,22,15,6,5,15,43,137,131,104,130,118,57,40,15,20,14,20,0,16,34,5,4,18,14,7,21,30,7,25,9,56,48,6,31,10,28,3,40,46,14,23,12,27,16,17,43,10,9,10,25,14,31,3,3,14,25,8,37,28,18,33,82,43,25,15,37,153,174,140,120,106,111,59,47,30,41,31,16,9,11,41,38,38,37,13,36,5,32,46,53,41,24,34,33,18,69,112,120,120,106,41,81,52,15,34,16,6,36,24,6,11,36,16,32,21,12,29,5,10,26,40,67,24,12,30,46,107,96,116,108,96,95,38,44,9,40,43,49,43,27,7,35,41,22,27,29,20,23,11,26,30,10,19,18,17,16,28,20,38,21,23,28,12,19,30,29,26,14,54,3,28,28,35,46,48,63,46,59,49,56,70,44,75,80,68,73,73,60,87,114,107,97,102,54,11,7,18,40,16,3,10,23,7,42,39,47,83,91,62,55,56,54,43,22,43,54,52,44,33,39,61,63,72,98,82,85,70,56,62,65,52,62,69,96,139,234,226,246,227,223,234,212,225,226,218,211,222,226,229,241,238,228,123,12,7,5,21,4,39,24,34,18,6,12,11,8,0,3,0,20,4,8,30,14,7,5,22,202,189,192,197,204,197,203,193,201,194,197,187,219,190,218,197,202,204,198,202,234,231,204,203,218,201,207,197,170,212,212,194,207,198,202,199,220,215,203,216,180,175,209,201,212,226,198,212,202,191,201,187,207,197,211,155,194,181,242,217,214,212,228,218,207,200,213,205,187,197,210,213,197,214,190,224,190,220,199,205,212,228,198,218,220,208,217,194,228,217,189,225,196,182,216,208,215,184,223,225,184,212,200,185,191,215,213,209,218,191,175,175,199,197,207,186,166,200,198,221,201,192,196,183,213,186,222,201,213,206,203,207,188,187,196,184,200,210,203,204,206,191,215,199,203,188,189,183,172,207,208,195,200,185,203,193,205,183,221,186,205,207,211,221,204,196,216,213,201,194,206,202,185,208,216,209,218,183,209,186,205,191,220,198,196,191,177,183,193,195,203,207,196,199,202,196,193,168,185,205,176,213,207,205,180,203,213,213,197,182,195,217,215,211,195,189,212,187,192,206,210,183,186,208,206,204,208,191,217,196,200,202,193,199,211,208,201,216,195,190,183,206,195,205,199,219,196,182,193,216,209,215,188,166,212,194,203,207,196,187,180,211,188,221,194,225,199,205,197,212,218,230,183,201,205,185,164,198,182,202,184,185,199,219,206,188,189,187,175,217,209,201,191,190,204,189,213,184,203,202,174,173,218,193,157,103,65,86,53,95,61,48,17,15,23,19,29,15,52,50,94,89,44,90,163,194,128,75,59,10,92,53,18,37,93,152,161,172,167,127,125,139,160,164,94,95,55,76,78,66,30,150,233,189,211,147,232,188,219,205,78,159,246,162,88,21,5,23,37,21,22,31,18,148,234,245,222,126,176,248,240,248,222,61,0,16,39,58,62,31,40,68,95,76,79,84,104,89,115,94,76,124,105,35,41,37,44,47,68,41,27,39,45,58,44,46,48,25,27,49,114,174,188,128,39,54,33,7,15,4,9,23,37,68,47,38,29,52,57,43,81,54,66,12,46,15,27,21,56,175,105,92,154,82,29,16,8,4,39,8,51,13,49,14,19,21,15,32,11,22,9,7,17,6,4,19,28,29,23,41,44,58,62,69,55,30,37,13,22,24,26,43,27,19,16,28,19,16,30,10,13,13,20,45,32,62,10,0,39,145,110,129,87,117,67,57,58,32,20,27,41,21,36,34,22,45,47,40,18,25,9,37,54,16,27,70,41,12,70,104,73,128,92,59,69,15,43,2,29,31,14,13,15,33,26,32,10,8,17,15,2,16,31,28,41,38,32,12,123,168,148,140,108,109,87,58,44,34,37,23,33,30,49,10,18,11,29,42,22,31,33,27,37,28,28,22,17,29,34,19,41,22,20,25,4,18,13,25,5,33,21,23,41,26,33,26,8,30,12,26,25,39,19,36,23,59,21,36,41,52,51,65,120,119,115,80,12,14,17,42,54,33,54,29,22,23,32,23,16,12,51,62,52,71,27,50,19,59,26,7,14,41,44,25,63,27,58,88,55,87,72,71,54,43,59,35,87,60,133,180,224,240,219,209,185,202,246,208,227,217,213,209,229,221,221,126,15,9,14,23,11,2,13,38,6,19,5,8,1,13,10,0,8,21,13,5,32,8,6,22,209,183,189,219,154,199,190,213,205,186,188,195,207,215,197,194,209,215,202,216,196,182,195,219,208,215,200,206,210,210,197,159,184,170,193,185,214,198,196,200,185,201,234,203,207,211,203,204,218,215,209,183,221,195,217,219,223,200,220,205,203,208,197,215,220,209,190,234,211,204,200,185,172,236,199,213,215,193,196,219,214,217,226,207,211,171,201,203,192,207,220,235,193,209,204,210,200,208,181,205,187,185,196,204,212,192,216,199,199,201,208,186,203,174,186,203,195,217,194,216,198,215,212,204,190,191,193,194,196,193,183,188,212,197,216,201,184,209,204,202,220,204,191,211,185,191,203,194,227,184,197,215,203,197,216,190,195,217,182,179,204,216,195,168,178,194,213,197,192,213,193,206,230,214,206,199,197,200,196,195,186,194,197,203,197,205,185,190,196,204,164,198,215,198,207,186,189,190,230,191,212,194,187,194,235,205,230,205,204,201,186,184,208,189,204,210,182,215,217,198,213,207,205,189,206,175,189,210,203,212,192,202,205,180,185,205,200,187,223,197,194,198,195,204,218,225,219,200,207,198,205,194,197,192,196,194,213,220,209,209,199,211,190,184,209,204,215,196,202,186,191,202,213,191,209,187,191,209,194,185,199,196,188,208,209,189,220,196,199,207,225,165,188,205,190,177,209,212,202,210,184,223,238,210,182,135,100,103,88,65,68,48,2,26,52,74,30,17,64,43,70,74,0,80,127,135,96,79,38,16,66,38,73,55,173,242,211,198,163,112,157,170,227,182,76,133,90,90,85,51,4,173,232,208,182,150,205,187,234,153,61,218,184,73,9,9,28,69,21,9,46,62,103,193,237,224,119,136,214,250,237,245,146,59,69,129,115,111,49,38,53,74,83,74,113,96,106,102,98,111,88,100,110,78,72,60,81,61,62,53,58,32,21,49,43,39,27,35,51,104,146,155,81,27,18,15,16,8,13,57,42,70,63,44,55,80,60,73,43,66,53,45,34,11,12,17,23,18,48,138,107,94,152,85,19,8,26,29,20,49,24,31,41,37,31,35,10,18,30,25,16,33,36,19,26,37,26,2,22,14,80,57,88,79,101,81,52,31,35,30,27,35,28,27,13,11,24,29,38,28,9,20,24,33,53,51,13,15,64,109,92,86,86,96,86,36,43,36,49,41,46,30,16,28,14,28,22,34,19,32,22,22,19,25,31,63,45,14,41,92,92,103,125,87,74,46,28,17,19,7,7,9,23,29,39,24,15,21,25,25,21,29,38,38,34,63,47,77,151,173,158,136,83,121,94,34,53,28,35,16,27,56,8,22,16,17,41,57,31,23,15,20,17,37,26,15,26,54,21,40,23,31,31,28,44,5,28,27,14,31,14,30,27,31,52,16,8,45,25,20,31,16,49,21,20,53,35,47,14,21,15,36,92,105,97,115,73,42,25,5,27,28,74,66,32,22,27,21,16,12,37,30,14,20,41,51,53,34,31,28,70,13,20,45,48,47,81,64,79,59,82,45,54,39,92,64,74,35,48,51,101,184,197,223,224,219,201,245,235,217,212,215,212,224,228,98,10,5,6,26,6,10,10,23,19,13,23,30,7,0,9,4,26,5,47,31,29,10,22,28,191,200,186,195,191,185,167,195,214,216,171,199,211,214,204,176,212,196,193,202,198,174,205,202,179,227,217,209,190,206,196,208,216,204,211,188,194,200,207,202,198,200,196,205,187,206,173,200,202,197,199,216,211,197,192,214,206,227,227,187,203,218,204,214,210,194,215,201,223,209,199,202,222,204,202,189,188,224,210,196,225,205,214,205,206,218,198,193,215,214,203,199,227,200,193,185,189,206,203,228,225,199,191,199,218,194,207,210,213,183,202,192,192,204,193,184,208,204,194,205,187,184,193,234,181,199,199,207,225,200,211,217,214,181,200,163,218,198,193,180,218,202,201,207,175,212,190,208,199,209,193,197,197,207,198,185,204,201,214,210,207,203,210,207,200,234,202,207,188,188,179,201,172,171,208,197,193,192,180,187,202,184,191,193,205,199,199,204,192,177,203,173,194,168,200,201,209,197,205,214,207,216,200,181,194,204,194,196,182,193,188,212,217,201,194,206,200,195,166,212,185,194,195,206,202,201,212,192,230,199,201,195,197,200,198,195,183,185,184,183,191,185,203,210,208,183,202,220,165,206,212,195,156,209,208,208,208,174,210,198,212,200,194,217,188,178,209,202,200,177,200,202,222,213,203,174,176,203,205,197,201,197,229,221,193,196,192,206,193,187,220,203,211,195,200,200,211,189,191,201,179,199,191,173,144,117,173,131,152,126,143,95,53,83,93,126,95,66,76,51,68,59,18,63,152,138,100,136,82,40,67,124,175,171,246,190,185,132,155,146,139,196,220,137,86,137,98,98,69,42,9,164,188,214,184,149,199,184,247,137,63,187,85,17,8,85,177,112,66,40,38,125,204,233,218,156,161,208,249,223,214,155,75,125,132,113,128,102,44,88,124,67,18,58,115,105,102,101,104,119,82,100,64,73,46,46,48,67,32,36,29,22,9,11,12,18,24,104,157,153,123,89,26,9,15,6,10,19,17,45,47,67,65,52,68,61,60,75,75,70,32,22,15,33,20,13,38,2,95,147,105,126,128,59,24,28,5,38,36,16,30,23,14,27,12,16,7,48,24,24,19,30,19,22,16,13,23,55,22,62,100,79,82,112,67,54,27,34,34,26,25,4,32,22,20,17,42,8,20,10,30,61,40,48,32,41,46,10,41,74,57,86,110,158,77,15,49,27,32,14,36,29,33,42,3,10,24,19,34,38,22,26,1,27,15,60,50,10,49,59,50,91,80,86,61,44,33,28,24,20,26,50,36,11,33,12,14,22,21,30,48,19,18,12,68,52,31,99,160,136,142,124,114,73,65,41,39,28,17,14,31,4,23,31,42,32,18,9,22,36,11,15,50,41,32,5,23,67,42,48,41,28,16,12,31,9,20,24,32,29,27,33,35,18,34,32,23,13,24,29,22,39,50,3,38,49,12,5,32,21,54,56,120,121,104,115,67,33,14,41,28,36,72,65,74,71,48,37,9,24,28,36,33,22,20,30,24,41,65,64,55,41,51,31,39,38,34,79,19,73,26,50,48,49,63,55,64,3,49,47,33,177,196,236,227,233,225,238,225,231,220,232,209,217,224,126,13,20,24,5,6,1,31,0,19,40,6,17,2,8,24,13,7,2,22,19,0,5,21,3,199,184,174,206,204,198,203,206,193,223,223,192,201,186,179,211,177,212,197,203,185,179,207,179,194,197,203,205,228,180,171,182,187,206,186,212,195,166,195,215,199,179,223,189,222,209,201,211,227,195,198,208,222,202,222,186,221,183,207,209,184,198,209,215,199,210,198,208,207,195,191,198,210,230,197,207,192,215,200,220,193,187,208,203,205,190,200,215,202,207,195,195,214,196,180,206,193,216,199,190,213,212,199,191,217,193,194,193,215,203,200,192,200,204,191,195,212,223,205,201,206,201,204,216,196,199,191,196,204,178,208,208,187,193,193,202,191,210,202,213,198,207,221,197,187,209,213,192,199,199,193,200,181,210,200,196,201,198,188,202,190,211,208,197,211,204,188,183,194,187,206,212,193,201,199,180,187,211,213,211,186,200,206,195,181,198,201,186,202,173,188,187,190,204,196,200,196,191,197,192,205,189,205,196,217,200,195,192,180,183,194,203,221,214,200,203,205,171,205,165,225,196,180,195,197,199,219,199,184,203,199,187,209,197,198,186,195,192,202,192,203,198,188,221,190,191,204,203,193,192,177,211,200,205,196,222,203,189,193,178,217,223,204,198,212,186,198,217,168,190,201,194,214,197,206,184,204,186,212,167,170,195,186,200,204,199,190,198,198,195,171,190,192,186,177,187,192,189,224,198,181,218,199,157,110,156,180,197,216,186,165,205,137,107,186,186,173,93,119,36,144,92,5,114,172,138,173,243,163,67,57,185,236,234,211,187,140,136,188,155,128,177,179,100,95,121,99,86,60,23,22,136,236,214,186,132,233,223,239,127,61,138,24,20,29,125,230,155,106,39,97,214,242,246,165,179,218,245,252,175,120,47,72,146,89,119,84,61,72,134,88,47,49,83,90,72,70,63,70,100,85,83,95,75,72,77,70,48,34,35,23,25,21,34,11,18,60,135,154,159,107,25,25,7,33,15,41,38,34,49,60,58,80,72,49,63,73,63,86,89,80,71,29,47,23,31,18,42,142,167,105,103,139,63,34,37,40,64,20,16,55,9,19,21,30,30,45,19,13,15,24,6,31,23,51,39,39,36,33,131,135,142,108,65,58,54,56,20,29,0,10,19,28,33,40,23,53,24,35,12,29,37,32,30,5,59,61,57,37,66,80,83,170,108,34,41,48,7,4,37,28,19,35,31,35,6,15,31,54,28,49,4,26,19,33,92,52,40,59,59,53,42,68,52,13,47,14,19,33,1,3,51,58,45,4,24,15,42,29,32,1,20,11,31,59,81,37,91,141,116,107,93,81,96,72,23,7,12,23,48,15,31,2,5,40,34,44,29,23,22,33,17,25,36,69,28,15,38,70,50,75,68,49,66,68,36,47,44,16,29,24,29,25,17,27,24,20,39,12,30,39,33,7,20,22,7,29,30,11,27,31,84,125,112,106,118,49,28,9,16,15,49,60,68,83,59,65,65,24,41,22,31,36,28,0,18,6,38,46,53,47,79,75,46,23,27,28,66,70,49,54,64,50,50,22,40,47,44,73,49,78,151,159,224,223,221,228,226,224,236,226,228,233,210,200,111,16,0,19,18,20,29,23,15,19,10,15,12,15,5,0,18,23,26,17,8,24,21,2,5,214,214,202,218,211,205,227,210,211,210,207,194,192,194,208,186,197,213,215,195,193,214,193,195,195,201,206,224,226,208,205,181,222,195,182,197,216,187,192,219,207,194,180,208,212,216,205,204,208,221,209,206,201,219,208,202,205,227,196,213,200,213,227,214,214,201,190,225,201,198,206,194,181,186,192,205,198,194,181,197,216,224,199,224,234,199,203,212,185,224,217,204,193,178,208,187,223,192,208,207,192,191,206,218,180,192,213,191,199,217,204,221,218,207,210,222,189,191,207,180,203,205,172,202,201,185,205,190,188,207,215,196,221,204,186,182,207,209,205,198,204,193,215,198,194,179,197,178,200,215,185,211,198,199,205,194,213,208,234,187,205,189,188,187,193,182,204,194,223,199,198,199,185,211,208,200,197,208,192,187,208,179,192,222,189,197,208,192,202,182,221,203,203,198,229,202,206,203,195,202,205,190,198,196,215,178,222,202,212,187,174,179,199,209,196,194,219,203,185,217,194,218,199,190,181,194,202,185,163,194,209,206,189,197,216,215,189,203,209,208,165,174,204,203,205,204,202,202,198,193,220,209,202,204,193,196,198,188,192,192,203,165,196,170,202,193,183,175,189,175,196,187,195,210,202,192,171,178,195,201,188,186,187,187,214,185,214,175,196,196,187,195,176,193,212,207,186,199,212,159,188,193,170,160,155,176,187,180,175,147,191,208,149,85,141,241,210,131,134,36,115,136,43,170,172,159,180,211,119,107,123,158,166,182,150,123,174,171,195,170,107,189,172,126,101,101,73,110,58,64,3,143,217,215,176,179,253,190,196,157,50,37,16,8,47,157,132,77,76,62,191,253,246,207,160,218,254,249,246,116,29,60,118,131,138,68,35,71,121,153,77,88,95,99,98,34,117,124,101,76,94,87,117,104,96,78,68,58,52,39,51,28,23,11,13,50,107,157,134,86,46,45,8,35,22,13,9,37,65,77,79,53,62,52,64,69,7,26,66,60,77,93,75,30,30,6,22,39,160,139,84,129,102,35,33,34,97,71,47,43,18,43,25,11,33,5,40,22,53,23,5,11,5,19,43,35,44,38,47,96,146,125,133,82,100,59,36,40,1,18,15,26,31,16,30,19,12,22,26,36,22,16,6,17,28,52,48,32,30,61,75,74,56,62,53,36,39,41,28,25,48,45,7,45,31,43,20,41,17,37,13,16,20,47,48,84,74,46,60,57,35,34,33,30,36,40,35,50,25,12,42,10,34,20,12,21,30,66,26,8,24,21,14,18,64,122,37,92,102,116,73,60,147,121,43,36,29,26,28,7,8,20,44,36,22,22,18,12,0,53,49,15,35,53,53,42,37,46,89,105,122,103,166,91,34,45,16,42,38,8,36,11,19,17,41,10,44,33,38,17,24,52,21,19,46,27,10,44,18,48,9,90,104,99,110,108,65,42,33,22,33,53,96,94,95,81,79,83,61,45,28,20,43,25,36,29,15,22,51,27,26,53,83,61,73,18,27,44,57,31,46,55,33,40,58,56,67,75,45,43,91,195,218,201,205,228,238,213,224,230,227,231,232,201,223,95,16,7,8,1,1,20,0,27,2,11,12,26,8,2,14,2,2,1,12,5,26,7,5,16,195,201,196,197,170,182,193,197,225,216,208,205,210,202,199,201,220,187,181,200,181,203,223,210,195,180,195,191,192,185,213,201,196,195,200,201,194,192,199,211,199,193,189,199,216,194,196,209,196,215,217,190,181,190,209,199,219,204,208,215,226,199,217,222,219,212,187,206,215,208,208,196,220,201,203,196,219,224,219,210,193,189,207,214,208,209,208,210,218,197,201,228,189,176,202,218,216,211,192,210,189,164,216,196,202,210,184,199,208,223,183,195,202,203,209,207,199,186,201,203,203,206,180,202,182,216,191,208,200,203,204,197,216,207,206,205,207,172,209,188,187,215,210,196,184,192,184,202,169,202,203,217,211,211,184,212,203,187,190,183,203,198,184,235,192,179,206,193,209,204,210,199,200,223,201,209,167,184,187,184,196,183,185,205,199,188,193,199,202,173,225,195,201,183,212,222,234,197,193,185,193,189,192,193,198,198,198,169,175,175,187,197,202,185,204,217,203,191,194,179,200,184,199,200,209,223,212,173,196,183,216,211,188,212,194,185,214,207,184,195,207,197,201,185,179,212,170,202,191,176,187,180,202,183,232,186,179,177,206,186,204,220,212,198,220,192,236,221,181,199,186,190,195,191,209,209,169,182,212,170,177,172,188,189,174,179,192,188,207,203,189,195,210,189,210,177,180,196,193,176,188,186,181,209,187,192,164,149,165,151,195,228,212,122,73,168,166,136,94,67,139,87,10,103,187,130,137,126,92,148,201,190,140,131,143,163,176,165,210,106,168,208,194,132,72,91,80,95,56,45,36,160,224,200,188,138,183,99,133,104,56,11,4,21,36,47,37,52,92,176,239,212,218,180,205,248,241,228,189,42,41,75,149,165,90,13,67,115,128,92,46,70,70,157,77,122,165,170,128,112,104,109,108,99,107,95,102,89,63,56,43,49,14,34,30,92,159,134,67,21,20,47,16,23,20,22,27,44,75,96,66,86,58,74,86,93,54,26,30,38,39,40,111,78,28,45,5,60,164,127,113,141,79,52,10,99,107,40,59,38,41,12,25,24,40,5,23,15,32,29,30,32,31,32,40,45,41,36,49,151,137,123,123,94,107,49,49,39,32,25,13,7,46,39,25,8,17,28,16,22,55,38,12,18,23,67,95,36,35,70,31,44,68,26,12,29,47,46,22,27,25,35,8,16,40,28,29,23,11,4,19,36,45,34,58,102,54,17,73,36,45,63,8,28,36,22,53,9,42,26,26,24,30,9,11,39,12,22,44,18,30,24,29,38,83,99,29,58,78,104,89,137,187,91,29,58,26,9,14,20,35,31,18,51,43,17,17,18,47,21,31,38,44,49,57,46,43,74,100,158,130,102,119,85,39,29,38,13,21,21,38,27,20,21,47,53,1,18,48,8,22,46,18,24,34,24,17,11,38,61,21,93,123,112,125,108,92,32,12,10,21,29,51,74,111,87,88,80,81,82,80,42,38,47,19,34,11,14,20,38,51,44,84,83,67,49,37,42,31,22,30,42,62,61,81,93,44,49,62,49,156,232,211,233,219,214,212,223,238,232,227,232,231,198,232,105,12,0,7,2,9,11,28,18,10,31,21,41,36,4,5,15,13,14,3,20,13,24,9,12,212,189,218,195,218,212,177,207,205,199,221,179,211,207,177,218,206,200,216,205,184,193,213,213,201,196,208,217,197,206,220,204,190,210,191,209,205,197,175,206,190,193,223,188,212,205,207,197,209,210,198,195,204,185,231,181,199,187,217,173,180,208,220,208,225,230,208,197,233,195,216,197,215,182,184,209,192,219,163,221,195,211,199,191,205,204,215,199,197,183,191,185,217,201,206,186,204,215,202,202,194,201,194,201,187,197,210,198,182,210,209,201,207,212,193,193,198,197,185,199,199,203,187,208,214,203,200,195,190,202,190,194,181,185,219,210,203,182,189,213,198,197,187,182,200,210,173,195,196,205,192,198,188,202,200,211,200,207,211,189,235,195,192,215,199,219,212,195,218,212,201,190,210,214,213,207,205,197,221,189,221,183,171,211,209,210,181,180,199,178,181,200,200,186,195,188,208,178,204,196,162,206,218,194,177,200,199,181,192,188,204,201,199,185,185,204,191,185,185,186,189,198,206,184,213,243,196,195,212,208,186,207,191,172,211,185,204,185,187,207,190,208,206,199,210,200,198,180,184,196,194,197,191,204,212,196,199,214,185,183,197,224,190,184,170,217,199,204,201,177,182,201,193,214,201,205,189,194,195,199,177,158,199,197,189,165,194,177,195,200,211,209,198,194,193,197,198,202,206,178,192,181,127,114,145,150,147,147,155,150,212,220,195,190,132,34,100,118,102,73,99,82,8,86,162,138,80,80,159,170,215,174,128,174,169,182,157,138,191,144,161,213,193,162,81,103,76,78,57,23,27,145,209,197,178,87,140,115,55,29,8,30,34,62,72,57,35,54,206,243,227,189,179,196,253,238,226,230,65,19,44,123,128,118,54,49,88,109,89,51,49,25,71,117,150,171,222,158,116,119,113,111,105,82,64,82,83,94,56,35,34,16,52,27,73,145,142,101,44,5,34,6,1,45,54,4,35,39,52,90,64,55,58,81,106,56,39,19,30,25,52,59,64,119,54,33,30,98,164,111,96,119,45,42,45,90,87,65,64,32,22,12,7,26,34,37,52,2,10,0,28,21,43,41,14,12,67,63,58,110,108,78,122,95,75,30,55,26,30,16,7,27,20,28,23,24,18,19,35,20,11,46,49,15,11,60,82,55,30,53,18,41,14,44,19,48,46,48,41,33,44,27,17,13,40,66,53,46,30,8,25,47,50,54,83,115,82,19,39,46,41,20,18,28,22,43,34,26,37,39,15,37,21,17,31,29,30,13,4,31,22,78,61,39,115,114,38,57,96,89,89,68,82,43,43,42,31,10,7,20,37,51,23,25,22,20,10,40,29,19,16,7,35,40,85,60,39,94,141,162,169,85,108,104,51,27,39,20,14,26,32,6,31,27,26,20,32,32,31,17,34,41,19,12,27,58,39,25,50,73,26,116,127,101,108,108,96,48,16,10,37,37,25,64,91,111,107,103,65,78,82,73,77,51,12,17,42,17,3,15,25,30,25,65,61,72,62,41,66,19,62,56,58,64,57,69,84,39,52,139,229,242,218,243,247,228,208,228,223,223,234,208,233,226,226,97,4,7,13,19,5,17,24,31,19,20,12,14,5,3,10,11,27,4,3,7,2,10,24,5,208,213,197,197,200,184,208,222,184,204,211,212,224,189,166,195,224,204,197,183,201,207,224,201,226,207,197,210,205,193,198,173,191,196,208,205,201,207,216,190,198,215,202,185,221,224,212,210,220,208,192,228,210,199,212,206,205,222,200,202,227,199,204,205,191,204,227,229,240,201,228,224,233,192,203,209,208,202,208,223,206,222,208,177,191,191,206,177,221,223,188,197,195,191,211,198,201,199,221,192,201,200,191,175,201,197,201,192,187,206,194,184,226,187,184,175,217,199,197,165,216,200,191,177,216,202,191,219,195,199,197,203,203,191,212,195,174,207,185,197,213,179,200,188,211,189,214,218,210,200,231,188,187,184,183,193,204,202,206,219,196,208,201,183,236,190,195,195,192,195,185,192,197,187,191,184,192,191,197,191,195,204,193,174,200,188,201,183,204,207,213,184,219,198,217,190,196,209,191,175,191,200,205,186,198,185,198,210,201,167,187,186,172,199,188,164,206,200,183,202,188,194,185,211,184,192,199,206,196,204,191,196,198,215,204,198,195,194,215,176,188,200,188,205,218,235,173,192,194,211,204,202,179,185,183,187,215,207,192,171,210,194,188,211,200,184,201,190,200,194,195,192,177,197,192,185,196,186,183,185,192,160,188,225,193,215,202,211,228,196,185,206,202,200,189,207,205,176,185,163,195,169,115,103,130,131,127,152,143,195,200,197,233,220,159,99,5,79,100,77,77,79,9,76,102,88,63,99,205,205,196,136,156,202,161,122,170,171,149,114,161,161,191,149,62,119,90,76,46,22,16,164,222,210,185,95,120,119,39,35,0,104,192,72,75,90,61,105,241,233,203,162,212,250,245,246,220,116,42,50,111,86,90,52,84,106,131,73,66,60,50,8,42,97,152,217,216,123,118,108,92,104,73,76,33,72,79,53,66,59,31,23,14,36,73,162,130,85,6,17,32,0,17,47,78,31,39,57,71,67,62,101,101,109,66,25,19,29,11,3,37,34,87,124,59,37,5,123,143,93,115,91,38,37,96,125,101,106,68,27,36,34,17,5,12,10,6,9,5,12,2,26,10,1,14,15,53,68,36,78,71,65,143,111,59,25,35,14,33,3,18,18,22,44,20,7,42,36,15,24,16,39,52,37,21,67,72,39,47,42,22,54,19,16,18,33,53,44,37,18,37,35,15,31,37,35,37,70,43,0,64,46,44,37,76,141,46,30,60,30,34,53,40,40,36,45,28,16,14,4,22,26,14,23,49,20,29,46,31,31,42,51,61,26,94,93,56,65,84,44,23,19,3,28,39,39,17,9,28,19,38,47,29,17,2,17,36,51,13,25,15,27,32,55,80,68,84,124,136,130,128,100,109,114,51,32,31,15,33,11,32,26,63,32,20,37,6,19,32,37,15,38,42,33,44,54,75,81,131,120,93,144,149,107,98,116,111,42,57,48,28,20,43,87,117,121,135,120,81,90,104,80,75,47,69,38,1,27,11,35,42,33,21,32,46,77,61,67,70,41,77,58,77,67,32,43,66,43,70,212,238,234,220,231,233,219,222,225,200,215,218,207,215,217,222,117,22,4,8,32,18,12,41,17,12,18,9,38,3,8,0,16,13,1,24,4,26,17,15,17,200,215,184,191,180,188,205,199,198,177,185,192,218,201,186,218,202,187,204,200,188,207,222,220,202,185,197,216,193,194,198,195,211,215,223,186,212,208,190,182,190,222,230,210,190,193,215,214,209,210,198,234,199,213,212,215,224,201,208,201,194,189,194,207,208,188,208,212,218,193,212,190,195,198,222,209,191,200,223,202,205,201,197,208,219,227,183,178,199,206,191,215,229,200,182,199,187,212,212,206,189,197,214,208,208,198,216,214,192,188,196,223,220,189,221,206,175,200,183,207,214,201,220,200,197,229,207,209,200,196,192,198,169,205,207,200,203,208,207,226,190,175,198,187,210,207,189,201,193,188,217,197,194,212,201,209,184,173,201,191,203,198,202,179,214,180,203,200,210,194,199,194,213,197,196,208,214,196,213,200,207,183,207,202,181,199,184,201,198,212,187,208,182,185,202,202,191,205,186,193,180,198,172,196,186,200,196,182,180,185,170,191,194,217,215,197,188,201,207,180,194,184,196,201,204,180,184,184,193,191,193,198,207,199,208,187,177,200,212,189,206,217,188,200,181,181,220,165,193,202,206,191,218,202,186,197,209,202,200,192,195,182,200,181,205,210,182,188,172,196,205,217,194,186,195,184,165,166,193,193,188,180,193,212,187,191,193,172,196,181,198,194,187,184,165,198,217,202,187,164,168,150,159,160,157,188,169,156,176,197,212,174,216,234,239,135,34,15,90,77,101,82,60,59,74,117,126,148,148,169,208,146,192,184,124,126,164,216,152,106,144,111,191,149,80,108,85,72,54,44,48,167,182,176,146,94,105,88,42,48,8,183,137,84,72,45,150,136,246,205,126,200,241,248,250,219,106,49,91,86,107,47,53,88,129,154,114,80,79,26,36,19,43,101,131,140,141,70,90,109,82,92,88,68,62,67,54,69,68,53,45,37,42,34,119,158,121,45,10,20,4,33,21,67,78,34,28,72,80,108,106,144,147,135,54,31,21,21,12,40,26,70,99,103,48,19,43,121,146,103,100,75,45,45,80,124,76,49,55,40,29,28,24,21,2,44,38,15,7,38,40,9,18,34,18,10,80,75,18,68,42,66,155,105,68,50,51,21,19,38,24,16,39,11,34,51,44,47,12,22,24,28,57,35,22,77,101,29,31,51,34,54,36,26,19,34,43,26,13,52,6,44,52,23,26,5,10,42,29,75,48,23,19,44,100,118,52,38,88,83,83,61,79,61,85,78,63,22,23,5,49,57,47,34,10,6,24,44,22,48,26,30,53,50,120,94,35,49,63,31,19,44,29,29,46,32,34,14,33,60,25,31,20,16,33,35,34,20,29,48,15,43,60,86,111,48,94,144,119,100,78,100,118,78,42,33,16,15,27,22,10,37,33,24,25,14,20,33,36,57,34,34,27,59,77,88,102,129,186,154,130,135,112,131,120,132,121,52,12,14,39,50,50,71,71,113,92,86,95,105,73,90,107,69,69,41,13,39,34,38,31,44,36,19,47,37,69,79,40,71,79,57,47,46,47,23,42,23,100,198,245,235,238,214,243,226,237,224,222,229,245,220,232,221,243,108,7,11,10,21,7,2,12,6,11,10,7,13,6,3,15,19,2,0,16,12,0,8,10,0,190,221,201,216,215,202,195,229,194,207,186,200,195,190,203,201,195,221,189,177,190,197,189,228,196,213,209,217,203,197,182,190,201,211,208,198,204,229,209,192,216,203,204,192,207,193,196,204,194,202,199,202,215,214,195,203,204,201,217,197,212,166,207,222,227,215,218,207,229,203,207,220,196,201,197,194,213,202,230,219,192,181,206,198,175,188,225,200,198,176,212,231,206,221,208,204,188,168,211,181,195,190,211,194,200,223,199,201,219,201,195,208,181,174,194,212,199,198,205,193,193,206,192,193,201,203,189,174,193,178,179,195,181,197,187,201,197,193,184,192,178,180,211,224,193,188,195,209,170,202,192,219,216,200,210,201,209,211,183,175,208,189,216,204,205,201,204,190,189,201,194,190,196,213,210,213,189,192,203,204,188,202,210,191,200,200,177,196,201,209,190,205,196,176,169,185,186,192,194,186,181,192,178,211,204,202,198,204,180,180,200,212,184,187,190,177,193,196,185,206,196,190,167,212,171,190,201,208,164,208,180,175,196,190,190,204,195,217,190,204,189,207,198,182,189,184,179,203,182,209,206,217,193,196,199,174,186,199,192,201,171,193,198,191,182,212,181,207,196,210,199,197,186,198,206,197,196,181,184,204,197,174,176,197,179,191,194,181,211,195,201,183,167,179,194,201,196,210,187,146,159,158,167,161,168,146,177,151,206,187,176,215,182,219,207,212,119,34,46,60,86,112,60,100,116,153,130,138,143,189,165,158,146,147,131,151,184,196,130,97,168,161,214,128,100,131,74,91,70,16,29,133,117,72,149,91,69,50,13,23,20,83,81,79,62,138,217,185,201,202,167,248,242,248,219,90,57,71,117,116,48,49,57,117,147,124,96,97,22,9,3,40,63,89,72,43,35,34,27,78,111,129,121,121,108,66,63,70,83,40,26,22,35,85,129,119,64,23,24,9,25,38,69,41,66,76,56,59,99,117,125,114,84,76,45,14,43,44,41,26,11,75,147,115,16,14,23,142,144,84,121,60,23,28,61,112,82,39,15,29,16,23,15,27,8,57,26,34,41,35,19,10,40,31,26,46,77,86,21,63,50,52,72,25,13,32,32,47,27,6,21,25,28,28,31,14,26,45,35,10,21,55,30,28,38,61,105,38,77,83,40,63,65,12,70,74,75,11,22,25,33,35,35,37,47,28,17,49,51,61,51,27,24,29,113,188,44,36,125,118,167,178,120,190,130,76,38,25,21,45,19,50,60,44,24,34,7,64,32,47,34,14,36,70,131,95,66,51,67,12,22,26,31,13,28,36,30,26,58,8,19,8,20,14,41,46,31,12,21,47,17,64,27,52,91,59,40,76,82,89,82,108,86,66,46,19,9,16,48,53,31,31,24,40,34,14,26,42,43,11,27,39,75,54,99,148,157,144,125,107,65,59,97,110,105,104,109,75,18,24,33,56,38,60,12,67,70,125,123,122,110,118,98,109,70,47,16,13,16,32,17,18,17,28,21,56,56,87,116,84,61,51,17,20,17,33,48,3,141,218,252,253,246,250,235,218,238,248,213,227,221,218,229,215,235,117,11,1,6,18,24,9,45,20,42,30,36,14,18,0,13,10,17,11,24,19,25,3,0,12,217,171,190,202,186,220,215,219,206,208,191,204,189,224,202,225,217,190,212,189,234,200,182,195,191,203,222,184,206,209,198,206,192,200,176,185,213,206,184,186,198,192,207,192,202,171,209,226,187,238,182,187,204,224,211,206,213,209,223,208,203,205,201,232,189,234,226,217,215,221,222,219,227,190,193,219,192,226,197,207,183,206,216,217,218,212,206,203,191,186,188,208,187,218,210,213,208,215,181,192,204,204,218,191,204,192,199,201,194,170,195,212,202,226,210,202,192,178,197,220,200,194,195,213,187,207,175,182,207,165,173,194,177,206,211,203,197,201,196,186,212,206,175,180,185,206,210,183,159,182,182,185,199,174,197,187,210,186,196,197,194,187,186,162,195,194,207,200,191,201,204,190,193,220,199,192,182,208,193,196,204,196,202,179,188,179,206,195,193,196,189,204,186,199,217,224,177,206,188,190,207,198,187,223,189,213,191,195,194,197,192,190,198,170,174,197,195,196,183,197,176,189,187,187,200,217,206,206,172,194,210,185,196,210,207,217,196,208,184,200,211,203,185,225,179,195,186,178,158,206,187,192,194,205,199,198,204,192,186,215,191,203,198,202,187,179,156,181,196,180,181,211,202,184,175,189,194,168,196,196,199,187,162,197,183,188,194,183,184,216,195,193,176,215,192,198,194,204,164,154,159,124,109,125,123,149,126,156,208,214,173,184,181,173,198,215,179,116,119,66,68,75,34,128,139,106,134,125,198,201,186,146,142,179,173,169,211,154,109,121,174,202,224,89,82,122,64,89,55,51,41,117,119,78,103,69,38,30,4,50,73,69,44,24,110,212,255,168,161,179,218,244,251,225,114,65,82,111,117,70,47,82,77,131,111,62,58,52,31,15,18,41,88,122,58,59,60,31,21,59,97,129,115,129,128,102,97,83,46,21,24,29,48,115,146,117,56,10,16,16,35,72,39,48,47,68,127,131,127,129,67,47,33,37,11,21,2,15,34,45,38,121,148,63,27,18,61,170,135,101,115,79,44,27,28,105,107,45,23,43,39,32,45,27,1,30,14,47,64,26,5,26,18,29,81,24,86,80,5,43,22,40,18,25,52,23,44,18,22,12,25,22,16,43,11,38,16,25,38,51,66,37,27,50,29,121,126,31,91,134,121,186,140,114,179,132,50,48,43,11,35,32,34,31,39,25,12,63,42,40,48,47,36,51,170,159,39,26,71,83,155,130,104,148,98,60,33,20,7,46,40,54,28,43,36,23,30,19,49,42,39,3,19,52,139,82,33,68,66,39,39,29,9,48,37,19,29,51,41,20,22,23,36,49,56,19,3,54,14,39,73,35,40,50,82,67,29,59,90,61,161,170,85,30,44,7,13,43,17,64,56,51,6,18,35,46,33,38,35,48,16,64,115,64,132,154,154,139,107,112,59,57,26,112,111,103,92,84,26,52,49,43,27,34,38,37,61,117,131,126,99,93,95,112,78,90,29,45,28,25,27,10,11,20,20,16,43,64,81,87,66,16,10,8,50,60,28,36,180,218,247,252,237,240,243,232,239,212,234,243,206,230,218,235,219,135,12,13,5,18,33,7,14,4,10,11,18,16,2,7,1,0,7,13,15,20,27,5,13,17,207,228,203,216,203,203,211,194,221,212,201,224,204,219,192,201,194,193,198,214,185,175,189,201,207,219,205,172,207,200,209,206,226,208,193,188,183,190,224,202,201,210,209,171,187,221,235,181,194,184,227,206,207,193,191,203,207,218,204,206,204,188,202,213,198,203,216,203,205,218,212,221,201,197,202,216,203,195,198,209,227,211,202,195,208,227,189,202,188,200,215,211,184,204,203,201,189,206,212,210,196,207,183,192,197,226,211,208,198,200,206,186,197,199,205,183,162,197,200,196,184,195,200,207,195,194,209,189,189,191,190,195,201,191,177,205,203,196,204,210,207,193,183,203,190,171,210,202,178,204,192,200,178,188,189,190,216,196,217,199,187,208,217,197,204,203,206,193,202,192,207,162,193,192,182,203,182,178,224,204,201,189,196,189,179,196,187,200,181,177,199,202,198,190,218,179,204,202,177,198,203,191,197,187,192,203,210,187,206,214,188,177,204,194,201,173,186,188,203,187,212,200,192,204,184,193,197,195,180,204,193,195,192,182,205,192,170,209,170,186,197,185,199,195,201,182,203,202,201,175,191,189,186,186,188,183,188,203,195,195,192,214,196,220,200,185,204,190,195,159,188,190,199,175,197,190,184,164,204,190,183,200,184,192,185,200,203,197,178,197,175,193,201,191,171,202,184,179,136,137,148,145,142,164,178,149,153,154,197,191,171,170,193,176,214,203,184,168,159,80,78,102,84,171,173,137,143,122,178,173,141,136,167,184,142,164,155,154,135,130,159,191,203,95,94,105,61,99,66,30,74,140,93,66,94,34,16,7,22,41,54,88,40,103,200,237,243,133,185,249,246,247,197,103,53,60,104,109,66,54,69,125,105,81,104,64,59,13,4,38,58,96,112,157,130,75,54,59,16,46,39,61,94,120,114,121,123,91,101,68,42,29,56,131,176,58,28,11,0,52,78,63,33,20,22,10,68,141,138,95,53,19,20,27,32,37,25,27,19,51,40,76,61,26,56,21,83,179,129,103,132,64,37,20,28,59,39,12,13,34,36,14,6,16,16,4,47,38,35,45,12,16,19,64,32,36,118,85,41,67,28,21,28,19,35,24,52,28,18,39,7,12,69,43,48,14,27,4,34,57,39,15,21,24,33,127,105,48,70,117,109,176,113,134,127,90,18,9,24,55,50,42,21,52,9,30,42,23,33,7,38,61,97,29,104,143,52,41,43,42,17,34,44,43,26,51,41,32,24,40,39,42,0,10,36,29,51,50,39,33,26,26,27,66,146,75,17,83,72,39,63,51,53,38,58,44,27,26,17,31,17,28,27,34,37,40,43,27,60,17,15,45,30,73,116,47,15,46,50,102,105,92,43,51,48,45,23,31,34,14,31,61,44,33,39,42,21,49,30,20,60,118,76,109,123,88,118,92,114,107,92,50,53,117,108,102,90,38,34,42,29,32,31,20,46,35,32,78,122,106,79,90,100,127,121,74,29,49,75,67,40,18,21,15,6,42,38,47,65,81,37,71,41,28,11,29,30,53,202,222,254,210,182,218,231,242,240,221,196,231,222,237,222,214,206,110,34,13,28,30,7,17,6,7,32,1,3,30,5,5,0,49,9,10,25,14,36,31,19,21,183,200,196,184,188,200,179,207,193,201,205,194,196,183,199,187,196,204,207,213,217,204,209,179,203,217,198,173,206,206,212,180,212,220,218,180,206,208,196,208,205,199,171,197,197,217,188,209,195,224,209,234,214,218,189,194,209,200,231,216,225,225,234,194,204,219,216,201,213,216,212,217,177,202,218,216,196,229,188,187,213,222,190,199,195,198,213,207,193,187,194,203,195,216,187,211,198,190,211,202,209,207,196,204,184,189,177,186,191,174,211,216,206,209,213,206,204,192,175,204,194,207,203,200,210,155,192,203,189,191,230,190,198,188,184,207,196,212,195,202,185,207,205,188,199,225,221,175,197,180,171,213,196,204,182,180,206,207,195,214,168,192,188,209,176,197,214,209,211,204,195,202,199,178,199,201,188,179,226,193,161,200,203,207,181,202,199,206,166,211,197,202,197,200,186,221,190,181,197,211,182,183,194,194,206,165,196,184,195,203,200,203,175,219,170,187,206,197,190,226,214,198,190,209,182,195,205,229,197,205,197,183,201,201,193,193,199,193,214,195,175,179,199,198,173,215,189,203,191,193,214,209,205,190,194,209,190,206,196,176,205,193,218,195,196,216,183,208,226,197,191,184,198,205,191,214,199,169,198,215,213,192,167,194,187,180,166,202,195,188,183,203,202,208,180,191,178,210,134,159,173,132,185,199,223,161,197,161,185,183,169,170,196,187,189,199,131,176,164,104,87,109,130,245,150,128,184,161,175,152,120,195,146,151,136,167,152,174,156,118,163,189,161,110,83,107,81,85,52,40,134,152,117,78,69,42,22,41,33,70,49,67,103,196,245,232,182,147,252,255,240,195,77,72,58,88,171,119,52,56,101,128,112,82,72,41,34,12,53,58,101,135,152,160,121,110,103,66,45,35,54,22,48,106,131,142,137,139,154,114,55,62,62,132,122,43,19,4,9,50,59,45,40,23,17,27,38,72,125,108,82,30,31,89,109,54,22,35,28,30,27,11,27,16,30,14,93,181,111,103,126,53,11,36,15,47,42,19,34,34,43,7,24,25,19,19,29,3,39,27,30,54,68,31,17,51,138,72,19,51,28,37,25,24,36,43,37,34,37,11,9,13,17,33,17,20,26,25,35,50,26,22,27,33,69,98,88,53,59,43,35,39,34,12,48,29,45,29,69,58,23,34,31,12,44,62,58,3,24,40,89,76,106,48,53,114,48,48,41,53,57,52,22,29,34,44,41,25,38,16,29,42,29,35,25,69,22,26,4,28,47,63,43,51,127,74,45,164,132,136,175,91,119,156,82,46,14,34,14,33,7,4,10,24,7,25,29,56,56,35,28,19,43,114,89,58,36,74,59,38,28,26,39,38,39,41,54,29,12,22,25,47,68,54,27,25,13,45,29,63,81,119,71,55,129,104,90,129,119,128,84,45,33,65,102,108,128,72,28,61,37,33,29,13,26,51,46,60,98,146,111,95,97,120,105,96,19,71,140,64,45,51,26,8,11,12,43,22,52,80,62,66,22,48,46,48,32,16,117,143,138,123,58,142,164,147,157,142,152,183,228,228,213,214,212,128,0,18,14,20,17,15,19,21,16,8,35,13,17,1,3,27,1,15,3,20,23,14,18,12,213,214,202,205,200,171,206,191,200,218,222,190,208,191,200,194,206,219,202,229,185,203,198,210,200,214,199,218,206,224,196,193,196,205,203,224,197,221,204,207,188,204,218,195,196,195,177,199,218,191,195,189,172,205,189,205,214,208,202,219,242,186,193,204,198,211,199,202,172,208,184,197,192,215,194,204,189,182,205,203,193,209,208,219,199,191,213,198,214,204,209,198,208,190,189,190,183,212,202,193,204,192,178,210,205,198,210,185,200,202,212,197,208,158,195,181,193,203,182,215,186,182,180,185,187,195,211,196,179,204,193,222,194,189,187,172,204,211,187,192,196,182,180,194,204,184,207,227,200,190,202,192,194,205,185,189,194,191,207,195,209,198,201,187,214,184,195,206,186,193,192,215,192,215,183,200,204,189,205,200,197,204,194,197,223,190,195,202,174,220,172,216,187,159,219,187,189,182,179,168,188,184,209,196,200,182,227,204,177,213,204,167,186,210,191,231,193,202,170,195,189,203,196,212,198,179,179,194,211,188,174,198,212,198,193,184,186,206,179,195,202,180,214,216,196,170,172,209,206,192,189,218,196,191,178,208,198,186,179,185,194,182,194,162,181,185,189,182,184,195,196,175,183,192,188,186,182,193,191,214,189,173,189,201,193,203,190,208,195,207,200,172,192,175,180,197,200,204,137,174,187,166,176,180,200,129,215,188,180,207,189,214,195,174,199,177,149,165,177,92,72,73,146,237,137,122,143,93,160,196,176,169,138,138,138,176,166,169,135,116,193,186,214,124,117,107,110,99,82,68,139,83,64,105,37,18,44,71,67,78,56,77,204,255,255,164,199,162,244,240,210,98,42,85,106,113,129,93,67,48,63,118,91,61,27,25,34,15,36,91,106,129,106,125,119,123,124,101,90,66,22,30,43,19,59,139,114,157,162,155,166,113,56,20,26,46,29,15,29,32,56,38,35,40,14,31,6,29,76,128,77,31,118,195,132,64,28,15,38,34,54,18,41,32,30,42,127,148,79,110,130,49,21,22,3,3,8,22,48,49,13,3,6,9,31,36,12,10,16,23,45,59,27,22,12,26,97,81,43,82,28,57,69,42,41,31,39,51,26,41,48,24,10,7,30,47,26,23,28,24,24,63,65,63,77,75,123,23,13,12,65,37,14,33,50,56,85,58,31,23,39,52,32,76,81,50,79,75,75,75,133,122,116,103,112,128,43,104,68,54,61,47,70,77,55,66,81,30,54,78,50,53,35,36,46,46,31,26,35,17,50,44,12,29,129,63,10,135,93,141,164,135,154,159,78,57,7,10,31,0,47,24,25,13,12,1,40,51,49,18,39,36,51,94,91,40,55,67,26,38,37,29,29,30,29,61,23,26,35,19,30,36,68,58,38,34,30,26,44,89,54,117,76,49,101,73,122,136,151,149,53,54,44,93,129,83,114,74,29,54,65,46,18,5,17,42,29,36,126,160,145,136,114,113,109,79,12,117,199,84,49,19,29,23,34,34,22,11,57,56,48,74,41,27,23,62,35,52,41,40,34,49,15,76,133,138,143,113,94,88,155,196,200,186,222,110,6,12,4,10,22,5,17,2,21,5,6,30,12,38,2,24,12,7,34,22,11,15,24,0,229,186,199,180,200,208,213,205,225,194,212,205,237,212,207,209,206,196,192,195,200,201,177,186,210,169,198,214,183,212,201,216,211,181,182,194,195,201,230,201,193,191,196,208,204,211,201,194,198,182,194,201,203,202,208,194,205,207,232,213,204,190,214,200,223,182,195,196,205,200,202,184,175,205,189,171,205,198,191,188,211,222,200,200,195,188,213,208,203,205,204,177,188,193,195,194,183,215,200,178,206,196,171,195,175,207,208,180,194,199,168,202,211,201,189,183,178,193,174,227,217,181,186,200,213,188,169,183,184,214,177,184,218,193,192,186,220,224,193,220,174,199,205,193,187,177,212,206,182,166,192,194,212,153,200,195,202,189,184,221,199,202,206,191,188,195,199,193,205,189,200,206,206,197,209,176,185,200,204,208,188,184,197,196,168,186,193,196,193,166,191,181,217,200,194,193,185,191,201,200,190,197,205,207,159,197,218,206,200,181,187,195,207,199,180,177,214,202,212,171,191,183,193,171,200,175,196,190,215,194,195,195,189,190,186,182,199,179,190,196,177,195,203,205,184,196,176,214,221,187,205,195,184,204,188,171,213,180,198,194,189,201,211,182,192,211,187,188,182,183,176,199,201,165,187,212,193,197,163,154,217,172,189,186,199,188,197,210,174,160,154,162,180,160,209,203,181,195,115,143,129,124,143,152,142,129,206,208,199,187,179,184,182,196,214,195,156,181,182,46,45,65,115,157,79,62,117,147,148,200,170,146,156,142,207,184,145,190,139,171,232,182,178,131,93,145,95,116,57,88,104,40,82,78,59,19,57,76,78,90,63,184,247,242,206,189,246,199,255,228,108,36,63,110,127,107,74,54,60,88,68,74,70,37,25,25,12,40,89,97,115,116,107,106,113,101,124,116,140,102,74,50,48,44,19,51,94,157,182,133,145,149,131,72,43,7,11,36,34,48,47,30,46,21,12,19,6,7,28,121,120,119,161,153,46,31,25,61,88,71,39,34,30,56,48,47,152,136,116,129,127,60,12,7,32,35,7,35,22,39,36,23,17,32,33,38,31,26,16,40,48,24,27,21,9,32,140,84,66,93,57,77,72,64,112,74,60,38,34,54,35,44,10,13,19,59,51,47,24,11,12,37,56,144,189,141,113,55,20,23,37,62,47,35,37,56,65,26,56,52,68,128,99,119,104,97,118,149,182,186,172,199,166,178,198,169,166,207,182,175,175,163,146,155,152,133,132,130,150,162,158,135,112,97,81,47,67,60,40,58,51,41,38,68,140,40,64,42,58,43,38,54,47,55,60,100,37,35,25,21,61,23,30,21,14,49,32,28,36,54,19,28,58,87,99,61,67,50,14,31,59,16,18,14,28,36,20,11,25,18,36,14,23,48,33,4,30,37,56,46,77,141,81,36,59,55,59,116,111,89,39,22,32,83,122,88,120,87,30,43,54,32,32,24,38,17,48,59,70,60,55,96,102,92,88,44,83,155,166,115,71,50,53,22,41,17,55,51,79,70,77,45,73,18,53,44,44,65,83,53,17,28,65,95,101,79,134,79,81,20,73,171,206,220,221,125,1,16,17,46,13,19,12,31,15,29,8,23,15,5,15,21,12,23,12,17,10,0,25,1,190,207,221,211,216,202,225,206,186,203,203,214,193,224,213,197,197,179,191,206,211,185,219,185,187,212,197,196,212,225,217,193,175,195,209,198,194,183,197,188,212,193,198,218,160,189,209,205,194,189,206,223,210,196,187,190,226,202,197,209,188,202,201,190,214,208,197,223,194,204,181,186,198,202,204,203,214,181,214,165,209,209,182,195,175,203,232,201,209,197,214,203,209,192,203,199,201,225,190,233,196,174,186,197,210,174,218,205,179,205,215,194,181,172,187,202,210,201,183,182,176,199,191,182,188,196,200,205,189,201,207,188,164,169,201,200,196,195,177,184,193,173,199,195,215,187,204,188,209,214,197,201,200,195,192,189,200,206,181,210,203,189,209,207,208,191,198,185,185,196,170,205,189,186,213,189,194,177,220,196,214,192,188,164,186,180,213,198,199,226,209,186,187,202,212,206,189,173,170,186,188,201,198,224,154,184,197,192,194,170,203,206,189,208,196,174,216,205,177,192,211,192,205,179,204,182,225,179,202,198,190,204,188,221,181,205,215,208,170,204,196,186,185,195,189,188,185,191,184,183,188,193,201,187,199,178,173,212,177,202,197,191,182,210,197,181,217,188,218,186,184,210,173,188,170,201,157,197,169,195,209,221,170,187,216,172,196,182,177,182,181,182,209,185,189,200,218,149,93,109,153,140,178,149,144,172,207,175,188,178,191,186,213,191,180,216,195,206,183,135,92,126,80,97,17,74,195,180,168,155,184,164,154,185,185,126,176,191,125,155,190,115,175,127,101,137,58,73,66,61,81,6,37,58,42,33,65,56,51,83,152,250,250,215,137,220,252,224,213,117,41,84,96,134,130,77,82,54,110,66,60,65,67,21,6,29,42,83,92,137,131,95,96,122,113,98,94,95,95,125,118,139,65,35,61,57,33,89,115,159,137,142,128,136,106,56,59,23,25,10,18,26,23,37,17,13,41,20,27,65,120,117,178,77,23,6,41,76,115,102,65,40,44,26,22,52,178,131,110,132,98,43,30,15,26,34,29,12,23,61,25,4,38,6,14,10,38,25,43,32,27,10,51,22,57,99,175,53,104,156,131,232,116,161,185,99,68,22,22,27,35,20,6,8,27,42,23,41,14,8,23,56,46,167,105,84,120,47,31,48,35,36,43,52,63,58,128,113,115,129,173,185,217,218,168,206,210,211,206,201,200,204,193,192,187,180,160,197,195,167,181,208,234,223,158,235,211,199,201,209,197,202,152,158,171,147,165,164,145,132,93,43,28,117,139,66,32,54,60,38,37,54,27,40,63,34,15,47,22,14,11,20,26,40,61,36,40,8,13,49,42,70,40,83,112,35,27,61,18,31,44,23,34,30,40,52,32,20,51,35,31,39,23,9,46,68,54,43,36,4,84,137,74,29,55,36,56,87,86,41,44,49,23,95,120,100,124,125,55,13,25,52,33,16,35,12,11,16,19,59,46,78,87,99,102,83,143,167,174,112,101,90,62,32,11,27,42,22,64,58,94,71,63,38,22,43,27,51,66,53,60,84,97,120,119,99,64,74,87,119,169,187,213,201,213,120,15,9,25,26,13,11,2,30,10,8,24,0,19,8,3,1,19,29,25,17,2,3,22,12,165,191,198,212,206,184,203,193,198,222,213,199,188,195,207,211,190,200,192,180,207,197,198,186,197,183,203,207,197,203,225,222,224,209,208,206,187,189,219,207,197,185,207,186,174,188,222,219,183,193,212,183,208,199,208,217,218,196,213,210,194,179,202,204,208,217,207,196,177,215,198,196,182,211,210,189,189,201,178,196,207,198,194,213,195,202,198,208,190,207,202,202,198,209,187,201,184,211,212,220,202,181,211,194,183,205,168,194,201,196,199,213,197,188,195,212,177,199,187,199,189,198,190,187,180,202,206,176,183,214,201,199,195,174,175,177,191,197,200,192,177,202,206,188,197,201,177,184,200,200,197,218,206,219,215,210,171,185,172,191,176,222,190,199,211,180,191,203,213,208,184,205,200,204,231,191,194,196,186,183,186,208,233,209,191,189,208,186,194,185,194,195,197,203,190,174,179,175,204,211,211,210,206,188,183,189,189,214,205,179,189,183,204,206,177,200,182,226,201,191,182,178,174,185,160,181,199,182,191,184,187,190,181,192,194,205,210,175,216,206,193,185,182,181,177,180,170,213,186,188,193,184,177,221,199,199,194,189,196,207,182,181,177,204,176,176,212,192,222,194,180,206,186,191,196,166,188,167,183,178,188,222,177,189,159,177,166,186,177,182,181,169,204,177,198,219,203,144,144,167,171,185,175,177,177,179,214,193,197,166,185,174,190,188,182,215,184,199,213,131,125,146,134,73,38,141,233,199,145,165,167,185,185,187,170,108,183,189,62,167,168,134,199,114,101,108,62,85,61,71,37,15,21,71,63,41,71,56,56,146,233,252,209,173,195,225,239,186,99,57,50,95,119,107,48,34,114,102,80,71,85,33,42,5,33,47,63,101,109,126,118,83,99,132,109,98,110,87,87,84,107,154,60,27,40,36,54,45,53,108,130,179,169,142,130,134,156,91,52,37,12,10,1,23,33,31,1,28,51,42,98,122,142,78,31,7,73,165,139,151,115,64,62,36,43,75,157,103,116,135,111,21,15,20,24,7,29,27,28,38,39,17,24,6,11,8,25,21,59,27,25,6,31,63,65,64,122,55,59,75,55,152,75,62,95,46,35,40,8,27,9,35,21,52,29,27,6,36,18,54,64,51,41,78,60,127,93,29,49,38,47,56,77,134,142,180,208,188,233,215,239,207,199,185,182,190,181,164,173,147,147,124,110,100,131,120,120,118,135,113,150,164,157,154,168,166,174,171,181,150,148,174,186,159,166,176,176,181,197,180,175,161,151,162,152,71,78,51,38,52,36,57,53,15,43,48,13,51,29,35,47,24,28,48,51,29,21,13,18,44,78,65,39,84,100,43,96,74,39,109,56,51,70,52,49,21,14,13,28,43,58,19,18,2,37,49,76,20,15,22,29,131,62,43,50,55,27,39,41,53,17,15,32,84,147,127,117,115,53,23,27,32,18,25,39,25,57,64,47,68,26,64,119,143,132,96,69,46,52,66,68,94,81,45,28,21,18,78,62,66,85,95,60,54,15,43,85,58,41,89,154,134,125,109,108,103,63,115,161,187,217,239,219,225,211,154,8,2,25,14,31,37,23,14,3,12,12,10,17,2,4,10,14,0,7,5,17,6,19,21,199,189,224,203,168,184,209,203,202,206,224,197,205,177,219,205,181,177,203,190,185,216,164,180,197,191,207,213,201,217,203,215,182,197,218,188,200,222,193,203,212,210,214,179,200,198,213,189,190,203,201,214,215,201,212,191,210,178,184,181,208,218,205,209,205,199,207,199,218,192,206,215,199,175,182,205,210,208,189,216,186,210,177,207,203,200,199,189,214,215,186,177,210,177,182,196,194,186,200,220,219,205,206,208,203,183,169,200,205,207,219,202,181,197,206,175,193,192,167,187,190,197,189,188,180,181,189,196,176,175,189,202,215,179,203,209,188,185,194,191,187,176,172,174,204,182,182,197,201,205,187,191,182,186,195,192,185,204,173,171,188,183,173,183,192,220,199,211,196,204,187,185,175,210,176,200,165,205,188,192,196,174,198,200,216,197,177,169,188,182,189,193,191,206,192,192,194,190,205,207,197,195,195,184,206,183,199,199,178,208,183,183,177,191,183,215,188,214,193,216,196,190,200,208,197,195,192,189,201,202,167,178,207,185,180,202,187,193,203,170,204,192,175,189,183,169,191,218,188,178,197,187,191,201,195,205,195,204,194,187,183,185,170,183,190,202,177,209,194,163,185,190,190,183,196,171,177,186,214,192,172,172,167,164,153,184,172,187,197,180,161,179,190,188,187,203,138,96,159,165,158,168,171,157,176,216,207,185,178,197,164,177,189,192,195,163,170,203,198,132,138,213,153,123,94,165,226,170,145,160,176,169,123,191,173,160,215,146,66,154,171,190,226,71,121,123,107,107,87,73,49,8,28,93,88,58,70,38,154,240,251,253,155,161,241,241,239,136,29,47,86,145,109,62,64,86,125,101,53,69,47,41,17,52,39,82,107,102,135,106,109,83,119,148,113,117,96,84,114,102,110,125,70,100,66,29,53,43,25,35,40,96,149,164,129,121,149,143,140,101,54,20,32,17,30,0,23,57,44,20,54,102,147,71,44,22,14,68,2,31,104,64,31,17,29,118,164,114,126,147,82,31,29,17,23,40,10,42,41,70,38,15,55,41,36,18,28,36,38,31,19,28,52,62,66,102,115,65,23,24,26,44,39,51,46,22,87,48,15,20,30,32,74,49,19,10,34,64,71,95,116,93,88,160,87,128,128,146,181,146,195,202,196,209,208,212,224,168,201,170,153,159,141,128,117,139,127,147,135,124,122,152,144,122,148,149,157,171,169,155,146,156,147,163,190,162,204,193,168,164,174,178,148,136,153,123,120,112,158,153,174,160,169,176,174,150,198,164,134,137,76,62,36,31,49,39,41,33,17,40,29,21,37,25,32,29,17,8,33,61,18,58,59,96,102,14,93,108,125,242,108,131,155,62,50,29,38,30,8,36,8,32,25,10,37,60,55,32,32,26,81,131,88,36,64,51,20,38,36,22,25,22,55,88,122,95,117,124,59,49,16,39,40,34,13,47,77,105,61,55,46,112,159,145,81,55,36,34,32,25,36,120,99,63,47,11,41,35,53,23,56,42,34,106,171,144,126,127,121,152,149,98,113,83,72,89,129,211,231,237,224,242,237,211,233,132,11,4,23,8,17,13,17,33,19,16,22,15,22,0,20,29,12,10,10,7,18,25,7,2,221,206,204,211,197,185,200,180,183,214,210,224,219,209,201,205,180,222,199,218,214,185,194,205,198,205,218,198,226,205,171,174,194,214,192,205,207,185,191,181,217,196,208,199,184,198,164,224,183,220,205,210,200,205,195,198,188,184,196,165,205,208,203,199,217,208,221,189,211,223,188,209,216,194,199,209,203,222,194,178,203,214,174,194,218,202,192,196,194,221,203,206,200,188,173,190,175,209,193,205,195,176,194,195,218,222,190,187,218,186,201,179,210,186,180,184,186,202,196,191,209,188,180,193,195,195,192,186,198,178,191,194,186,204,200,189,183,175,218,204,202,218,198,187,160,195,199,187,209,188,214,185,184,180,214,190,166,214,196,213,202,178,202,205,218,193,196,191,191,211,194,203,187,215,193,200,191,201,189,178,204,176,180,162,190,182,187,187,192,178,192,216,195,204,197,189,204,188,179,187,193,209,180,188,188,194,181,180,203,200,189,178,184,169,202,193,177,202,198,194,182,217,205,193,183,207,176,177,191,187,179,200,195,193,196,168,190,204,206,188,191,189,188,183,187,197,200,190,183,187,220,200,190,195,193,175,221,185,183,191,206,187,178,197,195,195,198,164,196,204,187,185,188,186,181,197,190,218,185,179,188,156,162,182,163,193,168,217,190,180,214,192,153,172,212,218,176,107,126,123,136,148,154,150,179,176,198,188,184,197,183,181,197,195,189,179,207,189,221,132,161,148,121,91,85,147,205,197,171,155,173,151,174,177,181,170,172,120,47,164,155,179,169,80,149,150,110,114,101,68,19,35,140,210,129,88,75,83,239,231,236,188,131,231,249,238,170,46,62,88,109,109,67,83,107,122,117,64,73,71,24,0,9,30,81,105,111,123,137,86,103,84,113,156,170,101,99,75,67,89,112,115,86,103,84,60,56,17,39,40,36,22,40,113,110,156,129,171,169,179,158,99,92,48,29,16,12,31,18,4,30,38,89,111,78,49,15,47,21,73,126,74,46,17,43,139,153,92,128,107,64,33,41,73,42,49,92,88,50,53,50,19,30,31,36,18,20,7,36,37,22,70,50,20,29,87,134,49,38,50,54,41,30,58,51,29,40,51,33,13,19,58,39,92,67,82,104,97,131,139,160,162,185,201,182,189,182,216,224,187,192,176,152,180,136,132,140,158,152,140,131,159,148,147,159,144,157,148,141,133,145,139,122,135,109,129,98,124,107,103,89,73,87,95,118,111,114,160,127,150,142,153,110,176,142,134,181,168,171,154,151,144,133,149,146,172,177,185,178,194,156,167,173,120,114,102,70,86,21,32,68,81,60,18,10,41,33,45,27,26,21,35,62,133,93,66,76,87,144,195,102,145,131,65,58,45,28,27,6,6,14,32,22,57,17,8,32,45,70,52,107,175,79,77,70,34,25,27,27,42,26,29,37,86,141,95,133,153,84,18,23,58,58,45,60,49,71,79,140,89,84,164,148,84,33,8,42,15,35,35,79,111,131,85,48,15,37,11,49,60,63,113,156,207,194,179,151,156,141,126,87,80,51,99,134,188,236,236,210,236,219,208,217,223,217,126,11,11,5,4,24,24,8,35,21,9,21,26,7,4,5,15,23,10,17,18,12,3,27,7,206,217,226,201,185,207,213,195,220,217,201,197,244,235,207,206,214,199,208,224,226,220,199,220,190,202,211,217,216,209,179,198,184,214,219,187,186,210,192,204,206,180,195,212,172,208,218,200,188,196,216,222,199,211,186,218,179,186,196,219,198,201,215,207,191,196,183,226,210,202,220,190,203,192,184,201,224,191,190,217,187,188,205,171,203,198,209,182,199,175,190,198,188,217,198,198,202,198,192,206,196,186,203,201,195,218,193,198,207,154,188,178,198,216,208,180,204,193,187,206,193,216,202,210,204,195,207,197,181,192,190,197,177,200,210,180,206,202,196,195,199,189,177,177,191,189,188,202,217,184,200,198,197,176,200,193,179,197,201,200,193,183,192,204,189,194,202,199,198,179,212,190,198,185,186,184,176,211,220,180,192,200,177,181,187,201,197,208,203,202,218,191,192,181,197,193,205,187,199,197,204,214,178,187,195,194,176,192,152,195,195,200,199,176,195,182,195,173,197,185,188,187,203,175,198,209,203,198,195,199,168,179,173,196,199,191,194,181,200,196,190,189,210,193,180,183,189,173,194,192,176,176,180,207,204,181,187,200,182,203,209,201,170,189,190,186,178,214,199,199,192,192,191,201,200,209,187,179,168,156,148,188,186,196,184,191,201,190,205,187,180,186,180,205,191,194,146,102,139,157,152,163,138,138,211,197,224,203,169,202,175,182,193,207,175,178,167,182,198,147,110,86,62,46,45,69,124,162,126,128,137,182,203,216,137,192,165,140,64,99,163,173,143,102,128,104,114,124,99,21,14,61,193,188,131,59,118,200,228,193,138,174,217,240,234,145,52,29,132,115,106,88,58,138,122,131,65,73,40,12,13,33,22,91,119,112,115,81,107,87,99,98,75,136,111,104,109,66,128,131,127,157,79,76,72,39,46,21,34,12,37,45,43,37,61,107,150,171,145,171,154,165,157,120,75,32,17,37,8,14,15,0,28,73,119,79,63,81,98,188,145,69,29,21,52,129,154,95,127,113,59,4,37,60,42,30,47,32,49,47,20,21,21,13,47,38,40,15,10,12,28,55,25,46,1,45,130,24,48,45,80,68,23,57,48,51,52,54,36,55,74,83,119,122,142,157,187,202,190,200,185,211,199,185,185,185,148,152,167,124,120,115,119,132,131,144,129,129,110,140,147,132,144,97,95,74,51,58,36,43,33,30,55,48,34,54,15,36,29,18,36,33,29,28,22,25,52,20,49,24,41,33,61,86,88,110,90,138,113,173,167,145,135,143,146,131,123,136,149,161,147,172,179,190,163,143,162,163,170,148,132,113,104,71,65,21,52,69,31,39,23,27,61,125,109,61,58,35,58,50,48,41,37,9,43,59,51,32,25,16,20,36,70,19,29,11,25,14,91,73,101,155,80,36,95,38,62,69,22,17,37,33,55,110,120,87,95,101,69,39,17,33,56,81,59,48,21,57,115,98,173,188,114,64,21,36,42,24,25,52,144,187,96,47,42,32,17,87,238,200,212,189,160,152,150,147,131,111,101,75,58,94,133,202,237,218,244,234,228,228,235,234,237,223,221,117,0,0,1,24,14,14,10,16,7,18,30,36,8,4,11,8,9,13,24,13,21,10,25,29,211,186,203,188,192,210,200,197,206,171,207,223,203,202,216,221,186,199,221,202,207,189,207,221,202,205,202,189,217,202,192,188,171,201,182,214,213,201,203,205,194,216,206,199,203,204,217,209,189,191,208,201,209,222,194,219,228,222,182,211,188,216,207,190,199,191,219,203,176,210,216,202,203,195,234,200,200,206,202,204,194,191,169,196,201,201,182,187,208,184,223,190,216,195,187,199,215,239,197,178,199,217,206,195,176,217,196,214,179,195,192,204,227,204,204,195,173,196,199,188,209,202,191,213,185,179,196,210,177,181,211,200,195,174,178,197,199,208,194,202,183,189,193,176,168,199,194,197,200,198,174,209,185,204,213,194,187,192,201,189,197,169,195,197,201,192,182,181,173,197,215,198,179,197,199,194,175,191,187,186,212,208,195,189,202,191,195,196,205,218,187,201,185,195,204,176,173,196,172,190,182,221,194,209,209,194,198,202,193,210,183,172,161,193,184,196,202,163,188,220,168,208,197,190,194,186,169,200,193,208,207,206,186,178,177,183,191,210,171,174,189,195,196,168,202,194,189,177,199,188,182,179,188,204,171,191,185,197,211,209,207,202,182,205,186,198,200,187,195,202,190,187,221,202,196,187,143,155,173,159,185,197,184,203,213,185,208,199,188,212,176,180,182,216,189,178,143,152,182,174,191,166,150,209,205,214,203,218,170,223,191,181,185,186,207,206,183,161,175,120,87,67,97,70,61,61,99,127,135,121,159,165,204,146,119,185,185,167,58,71,140,161,198,119,132,112,90,111,99,39,24,21,92,103,50,114,240,220,220,141,177,229,228,245,150,60,62,49,120,133,58,64,88,124,110,95,79,51,4,21,46,39,57,105,128,100,116,85,82,88,135,127,88,71,103,81,86,95,141,137,151,118,81,21,35,64,57,65,44,12,11,13,42,39,37,44,58,102,133,143,166,154,144,160,177,160,116,49,19,34,28,13,24,47,70,90,113,132,176,156,59,40,38,27,66,162,140,110,122,92,35,19,23,23,21,20,11,25,49,37,49,38,27,8,29,31,32,14,20,54,44,34,34,28,43,54,121,78,28,20,24,23,33,27,35,13,78,72,93,106,152,205,203,191,208,210,227,207,213,188,164,156,144,133,134,106,103,90,138,129,133,137,129,146,124,131,116,70,58,39,29,41,59,5,25,29,20,1,2,22,15,29,34,18,12,40,15,27,18,11,13,22,31,32,30,34,32,39,40,12,5,22,44,2,43,24,50,25,56,27,62,81,103,87,126,125,113,130,135,164,160,175,149,144,152,161,161,172,166,151,200,168,179,139,150,120,123,99,71,24,17,42,66,125,144,37,41,39,76,49,24,35,28,32,53,50,35,43,37,13,50,50,7,22,21,32,14,32,61,63,133,157,57,67,94,65,110,91,25,69,76,27,75,119,134,123,104,111,98,22,56,55,60,62,34,60,10,56,154,134,158,114,74,77,11,20,25,38,41,69,131,104,87,16,32,42,27,140,251,242,220,173,130,114,139,106,110,88,75,73,118,195,251,249,227,235,240,237,200,201,234,241,242,226,217,114,10,0,2,9,38,18,26,0,26,11,0,0,20,4,13,14,11,31,17,23,2,15,13,3,203,204,199,197,185,219,207,172,204,184,223,217,196,207,198,233,225,222,218,199,196,227,195,204,219,195,211,205,206,215,214,210,191,197,198,210,218,193,216,187,227,168,183,195,222,184,194,198,207,202,208,212,192,218,215,215,200,203,212,205,208,231,221,192,198,185,221,205,191,199,205,198,210,201,192,195,202,203,202,202,206,222,222,222,210,196,209,188,209,186,200,190,217,204,194,200,183,208,192,197,210,197,184,197,200,200,206,188,202,199,213,204,189,193,191,224,180,184,193,174,207,193,181,177,209,182,189,197,181,194,206,214,183,216,172,173,177,195,191,181,188,207,215,190,172,216,221,187,172,226,150,209,199,192,181,187,193,171,191,200,214,177,176,176,183,182,184,215,199,184,222,198,182,185,205,179,173,194,193,193,200,187,194,214,205,213,202,193,178,197,213,188,202,207,193,196,181,229,181,210,199,196,184,182,193,220,188,196,195,175,189,201,197,211,171,210,177,194,157,192,178,177,201,195,209,195,197,204,205,176,203,201,210,196,190,200,197,184,203,191,197,202,194,182,210,182,204,186,184,211,187,194,172,210,195,174,190,197,172,212,211,180,196,216,182,200,207,190,201,183,163,209,188,174,168,176,186,177,210,209,182,174,198,181,205,188,191,190,208,197,194,195,214,195,176,192,143,165,208,195,212,186,188,215,189,230,214,186,164,201,192,193,206,182,199,209,182,181,159,91,45,91,122,84,87,69,89,154,170,128,186,175,153,139,198,187,211,156,70,116,211,178,177,130,90,131,97,119,84,61,64,90,107,80,59,148,248,227,156,159,230,249,247,151,19,54,101,72,118,80,63,104,141,119,60,62,37,17,15,22,53,55,75,126,135,111,121,80,125,132,133,118,33,39,73,72,106,99,110,94,113,96,60,31,71,69,81,114,46,31,18,22,32,26,33,33,38,40,75,96,169,173,142,136,157,184,178,163,123,99,51,45,22,37,13,38,46,36,64,61,22,25,29,25,84,157,123,106,142,86,10,20,27,32,60,50,39,62,70,53,22,47,38,15,33,15,32,47,60,4,21,23,28,28,26,124,125,57,49,36,39,49,55,63,77,97,115,167,192,203,196,162,200,193,158,161,174,160,128,135,116,132,154,135,138,134,189,151,119,109,108,78,71,33,15,31,22,18,26,40,30,33,11,12,8,21,22,20,34,28,13,22,22,14,36,5,37,44,12,28,22,37,53,20,33,16,54,32,13,13,11,5,40,31,41,26,15,7,15,18,29,17,48,8,25,70,71,85,78,83,122,133,120,157,153,175,156,153,129,154,159,133,147,164,169,177,184,148,148,110,116,124,91,120,57,35,51,64,64,86,89,57,55,43,62,14,42,32,26,60,22,18,22,15,22,38,54,51,17,9,120,163,51,112,168,116,216,147,137,179,65,23,76,163,153,112,116,116,113,32,59,58,61,35,47,51,13,128,175,165,135,95,45,35,11,22,12,35,21,21,38,60,13,15,13,38,40,145,210,161,174,128,111,123,98,126,111,130,154,209,246,248,252,255,230,235,225,250,236,231,225,241,199,211,221,130,0,9,9,6,2,17,17,4,44,27,18,41,15,12,8,43,15,23,3,18,1,25,16,7,216,212,209,181,234,234,213,208,183,207,217,210,190,213,189,200,217,182,177,212,206,203,217,225,225,220,230,193,224,226,208,234,232,210,198,180,188,199,207,231,186,189,205,189,179,186,201,180,208,192,211,181,178,205,234,216,219,200,205,215,207,228,208,200,220,223,209,213,222,217,180,214,215,207,206,208,180,195,194,191,200,219,188,174,192,182,197,215,209,226,210,215,223,192,217,193,187,191,203,200,183,181,174,194,213,201,200,205,197,217,190,206,217,187,195,188,204,205,203,205,189,207,178,164,200,178,189,191,190,185,206,187,208,209,181,209,226,204,212,199,199,200,213,197,187,194,203,219,203,187,179,186,189,207,195,195,204,197,213,170,197,191,206,177,198,202,193,179,205,183,225,185,192,191,205,190,169,201,194,203,179,178,208,203,221,173,180,196,175,179,201,187,198,184,184,195,191,196,190,197,193,200,184,181,196,170,200,204,207,186,180,186,189,208,200,190,179,201,185,186,205,209,178,176,175,199,207,181,210,203,205,201,208,194,168,195,181,190,200,195,193,197,159,179,190,178,183,205,173,193,183,209,191,173,185,227,183,194,209,214,196,184,202,182,190,202,182,189,185,186,191,168,181,181,199,190,196,182,225,206,209,197,160,185,209,197,221,186,174,204,181,188,203,207,154,153,96,133,164,205,160,150,168,206,194,212,210,207,194,210,186,183,204,214,190,202,193,180,143,138,68,78,92,88,71,57,117,156,209,171,158,177,152,199,210,165,170,162,60,207,220,179,181,124,109,138,64,93,46,148,150,198,226,91,70,121,202,143,169,213,245,240,164,82,79,117,106,76,62,58,72,144,114,91,83,39,20,16,38,66,74,79,129,131,112,125,112,110,100,141,145,96,24,52,100,82,99,75,122,121,151,125,61,92,116,108,112,100,31,23,9,13,42,53,52,54,39,16,19,25,70,98,113,146,148,131,140,164,199,152,131,121,67,68,37,27,16,41,3,19,11,39,31,29,86,157,132,103,147,77,33,8,24,38,21,75,105,104,81,43,54,38,13,5,21,38,37,49,63,27,31,43,68,89,111,139,116,43,74,104,107,146,155,175,196,206,200,179,194,152,181,139,137,133,132,129,127,137,116,124,144,126,138,132,120,82,53,53,47,22,20,2,24,50,18,54,31,26,27,23,27,21,11,25,21,29,28,16,35,13,7,1,21,12,22,30,31,14,34,20,30,8,32,33,47,30,49,46,38,18,38,7,6,25,27,28,23,4,8,21,30,47,41,20,18,34,13,22,29,37,34,59,66,69,103,146,126,154,151,161,138,150,132,134,125,143,148,134,163,165,141,154,164,163,99,50,45,26,48,49,43,52,35,42,73,66,37,21,44,56,59,23,7,19,18,56,42,32,9,25,73,134,60,99,83,73,139,115,98,103,62,57,84,158,124,128,112,109,92,33,46,71,14,27,30,28,61,158,169,83,82,45,28,35,13,31,27,7,34,34,39,23,47,28,54,66,106,156,109,108,99,61,102,124,158,142,138,196,234,250,226,235,234,255,226,245,245,208,246,206,211,228,217,238,239,110,4,6,11,10,0,5,8,9,11,19,29,11,7,5,9,11,19,34,11,24,5,13,37,23,228,197,200,188,200,222,193,209,222,185,219,205,207,211,202,206,221,204,211,205,206,215,215,194,210,205,198,205,192,217,221,227,213,186,196,198,206,210,211,219,198,198,189,206,197,196,184,209,198,193,208,197,229,187,210,212,182,211,199,226,178,193,205,211,184,179,193,197,209,218,224,193,182,206,191,195,228,211,191,186,221,209,208,190,229,210,178,206,216,198,184,206,206,217,181,212,219,203,201,197,202,179,211,192,196,182,207,218,190,192,205,224,202,208,192,215,191,204,184,205,191,180,183,181,203,203,180,197,168,184,206,192,195,170,195,187,190,217,188,183,176,190,205,188,220,189,179,211,188,184,200,193,194,201,192,184,207,169,173,178,174,198,186,194,207,157,216,190,183,196,188,187,189,204,173,193,194,195,175,224,190,196,174,183,173,203,194,185,211,204,225,192,181,187,206,190,190,192,166,196,202,197,187,190,192,191,205,192,218,169,184,205,195,189,179,186,195,187,204,176,183,191,178,209,198,184,201,184,209,194,193,181,197,205,194,198,197,207,201,195,198,210,181,188,182,219,187,210,192,175,197,167,208,187,165,189,189,172,185,199,183,189,194,195,190,203,194,172,196,187,182,194,190,195,192,207,204,205,211,180,224,191,173,199,206,205,198,200,187,202,191,194,206,194,169,130,78,112,141,143,117,164,195,229,216,181,208,197,178,208,181,168,206,204,205,193,203,149,164,180,158,88,114,113,107,96,135,226,200,107,160,160,182,214,157,94,220,168,100,170,184,160,208,115,85,125,47,76,57,147,211,227,192,78,72,85,111,102,182,241,238,206,72,60,75,115,99,56,73,76,140,132,97,59,38,21,16,23,73,77,124,127,136,114,126,106,94,116,112,137,136,57,21,94,133,91,115,106,138,124,111,86,81,62,86,65,78,71,49,34,15,22,57,70,77,49,46,33,17,60,20,37,47,92,97,115,129,127,178,131,162,173,158,145,120,76,46,24,15,24,10,30,53,2,107,154,69,117,104,64,21,19,15,22,36,75,111,69,60,56,38,43,40,15,31,39,61,84,96,82,90,136,139,147,153,126,165,161,190,218,210,234,203,206,228,177,150,133,144,145,145,143,140,145,112,155,156,110,112,103,68,45,48,26,26,44,24,41,23,14,14,38,79,50,46,45,3,28,18,10,8,7,14,27,41,23,22,15,12,14,22,23,24,8,34,31,51,36,31,44,42,21,17,17,56,51,64,52,41,47,10,18,9,41,8,33,3,20,25,10,11,18,46,27,44,36,45,42,40,40,46,2,36,27,34,35,53,83,71,63,121,126,130,145,117,158,119,144,137,137,148,160,149,162,141,112,117,77,85,48,32,28,36,41,88,59,30,19,66,63,45,45,18,32,64,47,32,75,35,11,107,135,46,56,42,14,40,30,51,45,17,32,49,130,151,101,112,96,92,56,38,44,45,32,48,72,138,172,74,73,30,27,10,31,17,31,14,41,27,18,45,24,54,128,143,155,198,140,120,89,136,104,110,125,123,111,140,210,249,242,255,249,250,218,215,225,206,223,208,233,226,234,218,235,208,150,20,1,21,19,10,18,10,16,14,26,7,0,18,17,1,17,13,7,7,20,14,12,8,17,204,219,197,196,183,212,186,211,191,193,187,197,200,193,210,210,199,206,215,213,202,206,197,219,214,220,227,205,206,193,176,214,198,197,197,196,196,188,235,189,190,232,203,207,215,210,198,200,208,199,175,192,199,204,210,203,209,206,223,197,206,219,190,205,194,221,209,197,225,205,198,205,198,203,188,227,228,201,188,198,208,227,201,181,209,189,204,190,181,211,194,208,198,193,176,193,194,187,207,196,200,176,179,210,207,184,210,230,190,202,172,201,186,210,203,208,174,204,176,208,194,192,193,185,199,201,213,209,208,205,217,198,192,193,175,184,193,203,185,194,186,197,182,216,207,177,167,175,196,183,206,214,173,198,199,198,174,213,182,194,190,204,197,186,215,189,189,170,199,206,166,200,182,169,214,199,196,192,220,198,196,183,185,202,196,207,198,187,200,184,168,169,193,199,185,183,191,188,213,192,195,189,175,210,199,207,191,205,193,194,209,204,182,199,197,187,194,178,193,227,189,206,205,225,194,177,191,196,198,207,210,204,182,199,192,209,193,184,205,206,195,200,185,216,199,206,191,185,181,192,194,183,213,223,164,180,184,193,169,198,188,192,199,201,205,184,169,176,192,183,197,171,216,211,206,206,192,187,202,195,196,200,183,190,188,205,207,181,195,201,186,185,204,170,189,140,127,175,186,169,173,177,201,219,168,201,201,208,179,218,193,198,198,197,190,218,185,152,162,246,162,134,131,153,129,103,103,177,160,123,180,145,166,178,155,164,229,123,56,136,147,195,237,129,116,114,105,68,43,176,218,231,136,17,26,54,117,158,215,227,201,106,47,97,97,110,70,75,123,124,102,117,101,74,30,34,45,46,115,103,125,109,147,106,97,102,117,114,100,106,108,24,29,148,124,79,109,75,107,91,66,111,75,58,59,81,21,42,30,55,10,56,31,66,63,91,62,64,24,27,41,53,47,23,54,32,99,105,126,134,144,141,145,162,183,209,157,135,101,79,46,54,45,28,123,135,107,128,118,48,4,14,12,20,52,111,119,61,38,44,47,55,72,77,88,135,145,150,153,145,166,172,153,190,175,196,164,176,193,156,173,148,148,110,131,146,120,135,115,127,115,136,126,96,113,72,57,27,21,29,23,19,29,29,10,28,29,21,0,19,31,49,54,52,54,40,11,27,10,13,19,16,20,2,2,68,41,23,56,22,16,25,30,24,62,59,74,70,68,56,56,33,37,62,62,69,65,70,65,60,28,10,5,13,11,5,15,23,2,14,23,5,6,8,5,3,8,64,81,34,47,45,37,29,23,35,41,29,49,41,46,40,60,80,105,132,123,131,145,149,134,132,118,162,139,162,159,150,144,107,95,83,47,68,66,82,72,69,61,39,39,52,71,48,10,12,23,67,45,73,140,150,68,56,64,46,48,24,77,66,25,48,53,95,139,98,105,126,105,38,11,44,39,57,98,103,126,101,55,49,26,38,10,10,26,47,42,60,70,78,128,150,163,155,143,169,145,99,102,149,166,89,105,95,52,44,28,56,133,186,234,229,224,229,220,218,239,237,226,203,215,215,211,200,224,105,22,13,23,8,21,13,26,28,19,12,24,35,17,20,0,17,24,0,23,18,25,29,2,17,188,196,209,209,196,200,186,188,205,192,227,209,211,175,230,213,206,230,206,196,190,204,231,213,227,185,205,193,180,183,182,217,207,201,186,190,184,186,212,209,213,183,191,210,214,188,208,202,217,206,189,205,201,199,203,210,195,201,188,212,193,214,206,206,228,203,198,208,191,214,197,202,206,183,194,210,217,215,231,181,194,211,179,202,201,206,203,209,199,180,212,228,177,194,203,186,190,170,185,208,203,206,216,190,189,182,186,210,167,202,191,191,199,222,183,194,204,182,203,205,186,191,201,229,190,206,191,200,166,206,207,176,187,220,176,189,187,181,194,192,178,219,211,180,199,201,187,180,202,151,180,186,210,207,197,201,215,188,182,207,210,198,203,194,169,191,182,217,186,175,193,204,194,184,217,207,197,174,201,194,200,178,190,187,201,196,210,186,225,178,204,203,196,196,188,189,190,192,176,170,178,203,198,182,181,203,178,205,202,186,199,168,198,198,182,197,174,180,189,190,200,192,189,173,178,187,180,176,206,216,203,175,185,194,195,203,181,179,196,192,199,170,173,184,187,164,204,209,221,197,207,200,190,180,202,206,200,203,182,193,211,216,194,194,155,166,206,187,189,209,210,210,209,207,190,199,190,199,188,181,179,185,189,175,207,224,218,186,198,188,201,187,195,187,176,128,163,203,216,144,131,196,233,194,207,210,228,176,191,203,191,159,196,169,201,234,209,145,201,212,170,98,124,135,90,91,54,142,146,201,176,103,136,207,151,193,235,68,80,126,162,236,244,148,81,123,122,71,16,139,241,253,140,19,22,153,184,148,233,191,100,68,59,109,126,85,49,59,157,124,63,113,65,30,15,27,40,72,105,131,131,139,117,102,125,113,114,93,75,53,51,12,68,148,129,108,101,88,109,103,118,111,102,118,89,90,9,15,6,29,24,59,27,29,33,35,54,50,37,35,17,39,22,16,5,11,28,36,63,79,116,122,123,160,157,163,180,183,200,171,163,149,139,115,164,150,102,123,65,55,41,14,59,45,67,60,72,38,41,76,81,105,142,121,143,198,180,191,194,186,182,190,165,169,186,151,154,118,91,104,113,147,132,161,164,147,132,109,82,92,67,64,27,30,11,41,29,47,16,10,18,3,42,12,34,21,29,32,26,41,36,32,21,54,61,25,13,32,23,20,37,28,20,32,42,48,97,73,57,57,26,22,23,69,75,98,89,85,77,68,76,71,96,104,133,137,83,57,84,80,51,14,17,2,26,70,62,53,24,50,48,11,3,8,6,38,61,105,63,57,39,19,38,22,14,34,33,36,26,54,10,20,6,26,62,67,70,114,124,134,125,148,143,130,123,135,136,135,150,151,160,165,116,122,148,124,143,107,57,52,24,101,94,57,46,22,36,40,82,79,121,155,43,43,44,46,70,58,48,50,43,39,34,104,138,135,108,112,120,52,33,14,40,52,58,71,45,22,22,55,20,46,17,46,57,50,111,165,199,166,172,148,150,114,110,138,140,134,141,108,142,106,52,52,39,16,60,56,19,60,147,217,223,239,224,202,212,227,195,226,226,218,213,238,213,109,1,3,18,14,15,8,17,7,4,10,13,5,9,3,12,21,15,17,11,19,3,16,23,44,184,208,212,191,201,221,212,208,213,212,186,199,202,199,201,201,192,192,208,213,187,197,201,195,240,221,197,206,215,197,208,201,210,200,224,199,201,202,194,202,198,202,195,184,197,208,193,195,217,215,208,192,207,174,200,179,189,182,213,208,200,187,209,186,213,177,210,192,207,230,209,197,201,183,202,209,213,196,197,174,200,192,171,187,199,171,211,192,227,183,207,196,184,221,208,194,213,223,193,201,198,190,184,190,198,222,175,183,182,201,176,199,174,165,202,192,205,217,199,179,196,203,208,187,196,205,199,210,185,170,202,189,188,209,207,194,195,190,198,181,184,184,167,187,163,183,207,194,202,191,212,192,195,210,200,198,197,188,181,214,191,194,199,181,175,184,210,196,166,206,187,186,207,181,185,184,196,229,184,192,194,204,169,182,202,187,198,199,208,202,187,195,196,174,193,185,189,189,204,188,193,203,190,204,204,208,179,190,194,213,166,184,217,203,179,181,183,188,184,196,170,199,195,194,192,206,202,198,213,199,185,197,212,209,167,209,209,196,219,211,216,207,219,200,185,221,205,202,186,204,195,197,197,208,232,225,193,206,213,196,181,179,195,195,160,193,201,208,194,200,203,209,194,192,191,199,228,197,198,160,214,187,190,191,185,207,184,201,202,194,205,217,178,111,144,113,129,159,174,133,142,192,218,169,193,180,182,204,162,202,207,150,215,183,190,201,170,178,186,157,189,106,120,112,124,96,60,94,152,210,159,105,173,170,165,195,191,91,65,116,160,216,232,125,116,123,95,51,13,101,225,255,177,130,183,250,214,231,234,91,56,65,117,150,55,60,99,81,133,117,75,68,20,44,31,53,49,102,111,123,123,123,89,133,105,110,107,93,68,22,35,9,75,149,104,82,95,113,146,139,139,122,98,139,86,68,64,28,58,55,85,55,41,25,36,15,35,42,9,18,8,42,50,40,26,25,39,31,36,27,59,83,103,126,120,142,149,125,117,159,181,195,203,179,146,111,114,130,78,66,67,111,148,162,139,159,180,160,171,167,179,181,187,190,182,174,205,177,158,178,131,130,121,129,102,125,115,117,143,145,166,149,137,89,92,59,53,50,12,38,36,70,25,4,22,6,6,6,4,15,31,32,31,9,3,27,8,17,58,61,43,38,97,79,46,23,13,9,5,20,7,28,10,21,83,87,46,41,80,82,97,33,59,83,97,149,152,129,109,91,96,120,137,143,167,113,143,88,80,128,79,38,19,50,102,148,134,74,54,51,28,29,27,17,36,59,80,67,79,89,51,12,13,23,20,34,50,1,10,16,11,19,37,42,5,12,2,6,25,40,47,70,88,105,124,120,158,108,151,153,143,179,136,157,152,139,162,148,168,119,156,122,139,86,64,41,31,84,73,51,122,142,56,23,39,10,19,54,47,60,64,35,41,86,138,143,93,98,126,96,27,19,9,31,12,43,26,36,39,21,41,73,91,105,185,161,188,185,162,133,125,134,121,156,144,145,136,98,52,51,153,122,49,43,21,24,34,46,39,13,71,173,225,235,222,215,238,217,223,200,230,215,212,224,226,125,4,5,19,15,7,8,13,20,21,31,15,1,19,3,17,21,6,20,17,8,23,7,32,25,194,218,205,215,209,218,237,176,215,207,176,214,211,191,206,211,217,189,194,206,211,188,183,202,200,187,204,205,227,201,186,234,192,227,197,221,203,215,200,184,196,209,222,201,217,194,190,234,207,200,182,220,211,190,198,186,192,210,204,201,209,182,213,208,207,222,182,229,188,223,182,202,199,221,188,181,211,197,190,212,195,205,201,230,235,208,192,191,217,203,184,211,205,195,184,199,231,184,213,192,195,214,190,200,203,195,199,201,215,196,212,193,197,189,199,206,167,207,198,198,200,185,195,208,184,213,221,196,190,194,216,200,188,208,193,208,198,187,198,195,180,186,193,188,197,171,174,186,181,196,203,208,192,217,187,198,194,203,207,195,202,204,186,215,175,187,190,190,205,159,190,211,203,213,176,192,203,200,175,190,187,208,192,198,196,201,193,190,202,186,200,210,204,205,201,205,183,186,181,193,207,188,198,200,212,182,184,181,195,191,206,187,198,188,208,198,190,194,182,210,200,181,207,182,193,213,190,182,202,185,204,190,211,193,208,195,210,196,204,212,167,170,183,193,197,209,213,200,210,213,194,211,204,210,169,214,206,190,188,186,169,197,203,203,211,191,197,187,207,208,196,198,219,180,183,200,195,188,217,215,198,189,178,188,194,215,205,184,206,204,200,189,172,103,114,71,117,155,132,109,164,195,208,212,194,201,192,211,171,223,193,180,191,195,179,180,152,152,156,137,166,124,93,62,86,87,97,139,165,201,142,129,184,181,164,213,147,80,57,80,155,241,204,148,111,92,75,46,9,103,223,245,195,187,251,255,200,183,95,58,116,94,109,79,54,113,123,86,93,78,75,43,4,48,56,110,100,123,114,143,132,100,70,131,135,109,88,92,59,39,23,24,49,130,108,66,120,76,76,47,105,99,98,143,68,63,61,98,114,72,42,52,48,38,11,10,46,24,17,30,11,46,53,49,27,31,37,13,32,10,51,15,35,33,74,113,116,125,133,134,155,146,159,189,128,121,120,115,72,126,187,211,223,210,172,187,202,189,213,158,145,148,172,139,145,163,147,121,90,144,122,125,127,128,129,130,136,125,115,96,45,30,21,26,20,28,36,35,56,64,38,0,13,18,9,8,14,29,0,36,14,9,10,16,33,21,15,41,25,17,35,67,78,78,31,21,4,8,23,14,21,39,19,55,116,84,46,58,74,103,104,72,153,117,121,175,114,43,59,99,136,103,149,149,108,100,140,89,77,160,111,45,87,159,149,104,53,124,140,65,28,24,9,7,48,60,64,102,106,111,57,32,10,18,15,27,25,22,34,6,39,28,8,28,35,9,40,31,28,24,48,28,30,47,17,75,98,100,109,128,135,133,146,142,110,130,128,164,203,192,158,175,143,176,172,113,142,130,125,74,155,127,58,42,51,16,24,36,22,26,48,8,32,83,142,134,94,97,126,70,29,18,25,14,25,74,78,65,73,138,139,160,161,174,182,137,131,123,126,150,148,136,163,130,103,98,61,29,19,115,157,72,69,33,40,29,34,47,50,13,58,189,205,238,241,221,212,191,210,220,214,225,236,223,236,114,19,8,9,26,17,15,26,12,31,24,17,17,3,2,14,5,8,19,10,4,29,24,31,7,187,201,190,216,168,193,202,184,204,211,204,224,198,205,217,200,206,189,217,213,182,208,196,191,203,196,221,209,228,200,198,200,209,201,201,212,196,193,195,222,210,191,210,212,222,212,206,225,217,212,198,198,209,202,170,192,185,200,218,191,222,194,213,179,212,195,211,203,204,199,194,201,223,215,196,211,221,215,189,212,192,197,176,208,221,191,199,217,195,214,201,193,186,210,192,184,194,188,205,167,207,196,194,179,203,200,226,201,195,199,194,222,194,210,200,210,197,183,205,198,191,208,193,189,191,189,217,229,207,204,199,189,202,217,195,207,189,197,192,197,184,186,171,198,206,192,184,181,208,198,193,206,193,217,211,197,197,197,190,184,182,201,205,178,197,233,215,177,191,164,200,184,206,189,206,164,203,207,178,186,170,198,196,182,202,193,191,194,185,207,207,185,185,210,204,203,183,178,208,199,191,187,177,189,187,207,169,192,176,201,216,227,205,194,193,184,199,215,181,189,199,188,184,217,210,205,183,194,204,185,208,184,189,180,210,200,203,182,190,182,136,93,158,209,198,222,201,182,203,198,205,191,177,189,196,211,195,175,200,219,201,177,197,187,227,218,186,198,167,188,208,202,199,218,213,191,186,207,194,201,221,187,169,223,199,189,217,207,189,228,216,197,182,160,174,142,182,193,116,132,152,187,193,223,193,203,174,197,198,220,215,181,206,207,222,165,144,175,162,178,175,141,108,108,101,98,110,104,160,216,171,166,173,155,168,229,130,63,41,102,225,219,232,152,109,76,31,14,31,173,246,251,158,175,244,254,136,83,57,99,100,96,73,38,101,141,101,51,45,54,38,31,18,51,94,129,167,144,131,124,110,113,92,96,89,94,83,124,79,52,28,10,35,179,99,92,88,65,35,52,60,91,104,121,27,30,98,101,86,32,46,37,66,38,33,30,15,37,48,14,26,5,48,36,27,22,65,34,21,33,36,1,27,15,29,23,74,76,124,115,160,144,124,174,153,146,144,114,108,158,163,176,217,169,174,174,184,137,143,108,109,118,100,117,121,140,139,127,140,150,122,125,111,98,89,68,26,26,25,31,19,21,8,5,21,27,6,30,28,26,10,45,3,17,19,15,17,11,9,33,26,11,13,36,6,12,17,24,5,35,46,20,57,26,21,33,19,26,24,17,8,32,14,55,125,130,92,91,74,90,104,111,141,91,76,161,107,110,87,139,131,48,104,121,132,96,161,103,82,146,123,71,155,123,97,69,27,89,169,126,22,9,22,26,44,63,57,74,125,89,38,15,43,21,29,21,22,18,32,28,23,9,12,17,24,5,28,8,0,58,30,51,13,27,40,25,39,48,49,53,56,115,108,121,108,128,125,152,182,172,170,185,159,158,179,193,159,178,162,144,168,130,64,64,63,80,65,69,53,46,47,47,84,106,134,140,109,80,135,81,58,44,69,69,111,141,147,157,188,165,161,177,158,129,126,135,177,138,146,158,138,128,108,48,53,31,57,29,68,202,128,41,42,45,40,14,44,60,49,44,70,204,212,232,226,218,228,220,226,228,222,177,203,215,235,104,8,10,2,15,19,6,11,0,8,12,5,30,4,1,8,0,12,12,14,3,32,2,19,20,218,192,221,219,205,199,211,234,197,193,196,229,196,221,197,224,192,182,190,190,181,185,196,201,209,195,204,202,201,205,177,202,204,179,204,197,213,230,195,197,183,188,200,174,198,208,197,214,190,210,211,206,192,203,218,186,200,215,199,211,199,201,197,189,196,187,218,186,198,204,203,191,203,203,195,198,211,210,220,159,222,201,194,184,199,204,210,201,186,168,218,196,189,202,212,201,220,176,201,185,222,209,199,168,232,199,192,190,203,187,228,178,186,181,196,192,203,192,198,208,205,198,184,196,220,189,201,165,212,196,198,187,188,198,205,186,188,184,188,201,166,175,183,180,185,216,175,210,196,190,188,214,181,207,192,184,191,208,204,185,196,166,198,170,185,187,224,205,178,225,178,199,182,205,196,200,185,183,194,184,169,184,204,201,196,205,194,201,204,200,233,182,191,195,208,208,199,196,207,190,207,196,222,186,196,191,199,183,216,204,182,182,179,196,211,207,203,194,184,200,184,191,190,209,202,186,176,192,190,181,166,180,197,186,195,189,198,199,204,206,148,123,180,211,217,218,189,240,214,204,217,207,194,180,181,198,189,207,185,199,187,215,203,205,192,198,204,200,190,209,203,217,222,172,191,234,194,180,185,203,221,191,174,175,212,189,233,220,224,221,228,153,157,160,153,160,226,222,142,183,166,155,176,204,213,212,176,208,188,214,227,171,219,220,203,194,178,160,161,166,158,199,127,108,106,75,76,75,159,193,152,152,141,157,217,196,123,55,66,151,244,246,244,128,40,46,31,8,114,234,227,219,151,133,203,141,48,37,59,90,135,73,53,72,118,140,84,47,22,37,27,20,42,88,134,137,157,134,121,117,86,69,58,16,64,62,105,95,85,56,52,61,89,162,102,72,59,48,84,19,98,88,62,19,4,67,120,103,53,35,79,60,45,37,19,55,42,37,52,45,34,52,16,23,9,33,3,1,24,14,15,46,13,18,4,47,14,36,43,41,52,72,93,90,114,144,147,154,150,177,178,186,201,175,169,162,131,150,148,152,136,133,131,113,61,133,103,54,88,49,42,22,45,20,30,20,18,9,30,29,19,8,0,11,19,33,14,16,7,10,9,1,9,8,8,11,19,28,18,22,29,29,17,1,32,21,29,11,15,33,48,60,60,56,27,52,26,8,39,2,17,25,26,42,82,132,105,91,53,95,82,110,129,78,86,99,102,103,146,85,36,32,62,77,115,158,127,71,134,139,151,103,175,73,82,147,92,184,216,116,23,0,39,20,39,65,117,88,82,55,19,15,25,47,17,10,18,14,40,21,23,21,6,22,43,33,26,44,14,23,21,60,33,28,51,39,35,53,27,2,26,32,49,64,67,82,103,149,147,167,141,160,144,148,156,160,175,150,151,161,175,152,120,198,158,176,162,143,147,135,140,122,147,179,200,184,143,82,111,96,73,117,138,156,189,175,179,142,157,145,109,145,151,155,174,148,132,120,122,67,72,36,41,18,38,57,22,14,208,246,87,36,51,31,40,56,36,38,38,24,67,205,194,223,237,200,225,202,227,227,200,225,209,206,205,103,12,11,16,4,1,16,60,4,29,23,17,24,17,11,20,9,13,0,8,9,14,27,23,13,208,191,207,189,165,202,211,185,217,205,183,196,215,185,180,175,193,175,205,196,212,179,178,213,188,211,189,182,196,213,205,197,195,191,211,225,190,197,204,211,209,202,181,203,195,205,208,215,192,193,209,226,215,216,230,218,208,198,207,201,199,221,219,181,211,190,185,208,214,195,184,191,220,193,193,198,189,215,176,208,203,167,213,184,193,177,201,172,225,195,220,178,206,229,202,195,195,222,195,205,200,206,195,201,223,208,205,188,205,196,185,213,224,183,182,210,173,202,195,176,205,209,193,182,211,173,205,197,171,189,174,187,199,216,199,196,209,194,193,209,191,207,173,208,197,178,180,202,210,195,198,168,202,186,199,209,213,184,183,171,191,187,179,192,186,181,201,168,205,193,197,216,194,213,213,199,219,202,184,181,196,185,191,186,190,188,193,188,180,185,189,207,207,169,211,195,207,196,173,216,205,191,200,215,176,187,187,196,200,184,210,197,195,185,203,183,188,215,193,207,191,206,190,196,224,185,216,217,211,213,216,209,232,222,212,204,229,221,214,228,190,204,237,239,222,193,211,229,229,220,204,193,205,214,238,217,230,232,226,227,240,230,229,224,228,213,221,219,223,207,226,207,250,226,225,230,217,235,234,213,206,241,236,195,235,245,237,217,209,210,199,168,145,138,154,161,187,162,147,196,195,127,181,229,238,233,226,210,197,227,219,217,220,227,215,184,200,172,190,161,207,229,163,87,91,101,117,66,112,126,110,122,153,228,240,144,65,73,34,141,233,236,150,44,1,13,33,46,151,230,214,230,104,99,157,56,14,58,94,136,83,48,97,127,162,102,81,80,37,23,11,45,77,117,123,130,106,107,108,133,110,93,59,67,57,47,65,96,72,57,92,72,40,142,121,64,58,53,42,12,98,159,67,30,76,115,158,79,49,63,116,72,49,57,26,27,32,52,10,18,14,16,18,17,41,19,4,1,26,18,28,19,9,32,13,54,24,64,32,26,20,23,14,33,41,49,66,58,52,96,100,107,146,117,116,120,105,76,78,95,48,59,29,53,11,29,54,42,21,33,34,21,29,30,22,14,10,24,13,20,34,34,4,8,15,15,4,32,9,8,45,20,3,17,15,17,13,11,24,23,2,40,21,32,18,24,12,23,48,70,97,135,98,78,93,98,55,35,26,19,7,12,7,9,48,62,73,108,65,65,74,80,121,87,104,121,61,46,28,46,33,34,37,54,68,64,83,121,180,163,138,96,124,83,80,176,93,155,155,52,26,2,32,17,16,49,141,83,50,46,36,26,9,17,22,21,19,10,25,11,30,27,35,13,57,39,30,16,21,5,12,27,37,25,17,31,17,21,12,14,20,18,5,17,35,39,31,72,76,119,150,123,106,143,130,137,137,126,127,139,148,140,153,177,154,179,156,209,181,180,178,184,180,161,146,173,124,113,114,126,74,124,131,141,151,126,144,139,128,127,126,139,144,148,126,107,71,37,24,26,55,55,43,28,16,17,15,96,243,237,80,31,43,39,38,28,47,46,53,13,68,183,216,223,218,205,202,227,221,214,209,231,251,221,219,115,9,0,3,33,13,10,10,26,14,34,14,23,11,6,9,17,10,1,16,5,29,16,2,10,192,183,215,177,202,218,181,205,208,211,204,233,203,191,230,193,204,199,199,218,212,168,217,205,210,223,196,187,213,213,171,207,164,206,204,207,222,200,195,228,186,194,216,196,201,190,192,189,179,184,192,211,196,192,216,198,201,204,198,223,231,208,213,219,223,206,187,195,198,186,199,194,202,210,181,204,191,191,197,207,214,200,221,219,200,187,196,191,197,195,196,185,205,181,208,191,196,186,205,182,221,195,188,171,184,186,185,221,201,203,176,191,175,205,222,206,206,185,168,197,210,195,208,187,187,202,200,204,181,204,189,203,197,219,185,217,187,201,179,210,179,164,195,199,209,189,198,198,182,185,204,216,199,200,185,200,215,195,199,197,185,188,178,185,186,178,185,185,203,184,186,187,201,191,196,169,194,195,205,216,202,181,174,167,211,187,173,190,214,197,200,199,189,224,194,202,189,186,202,186,192,213,200,235,205,164,205,194,190,202,212,169,179,205,204,187,191,189,190,206,198,173,205,201,207,222,245,214,242,238,249,213,230,242,234,236,245,227,250,233,219,237,250,238,242,153,99,197,208,246,213,226,242,243,240,238,245,234,222,254,240,234,252,251,247,209,231,242,253,239,233,237,248,251,232,248,225,235,231,250,216,240,216,212,227,202,238,222,215,240,188,157,156,140,122,124,146,120,184,200,192,147,114,194,237,228,208,216,212,212,194,207,216,218,187,188,211,133,175,176,242,207,123,96,111,96,125,92,85,55,85,152,159,240,218,99,59,77,14,49,108,135,62,19,5,46,22,100,217,248,222,234,130,105,126,33,61,109,101,84,68,81,130,144,71,54,50,38,30,34,49,93,122,118,156,127,133,126,151,122,55,47,101,92,73,51,96,87,51,109,72,54,25,60,67,70,43,51,36,49,148,182,80,79,123,163,156,70,62,147,94,72,65,64,65,19,13,27,29,17,24,25,32,16,33,29,31,19,28,17,17,10,22,32,5,13,34,10,30,37,31,12,30,39,33,28,27,23,17,2,9,30,37,30,23,32,21,43,29,26,21,24,43,11,28,18,40,45,19,25,54,26,31,6,8,5,24,23,8,21,55,19,36,27,17,11,14,25,41,10,24,17,4,29,18,20,19,19,29,18,27,22,14,42,30,29,26,8,123,176,199,191,181,175,193,152,62,23,12,2,50,8,19,9,15,62,98,116,50,62,96,97,127,74,97,95,21,36,24,42,58,24,41,33,74,54,59,123,139,116,97,82,93,50,67,129,34,70,39,23,17,47,29,5,74,148,144,113,86,83,39,43,47,6,25,25,10,21,44,46,29,35,60,20,9,12,22,15,13,1,22,25,42,35,18,43,12,12,27,2,37,43,59,42,55,14,36,36,37,27,36,30,83,94,95,90,112,107,128,137,164,127,130,159,131,153,114,125,134,149,144,140,132,154,140,142,130,108,139,122,110,148,133,163,152,120,130,153,158,123,94,107,75,42,48,44,48,31,35,44,43,47,21,23,15,23,35,127,231,171,55,54,34,18,24,25,69,49,64,18,75,211,210,224,239,231,232,213,232,223,231,218,231,223,221,129,5,6,2,12,0,25,11,4,9,12,26,20,0,15,1,14,31,0,20,30,4,2,13,19,211,221,208,211,207,193,196,200,212,197,223,195,177,194,201,178,228,213,210,190,203,201,204,170,213,217,220,225,188,219,187,209,191,195,223,196,198,211,179,186,209,214,207,199,201,209,222,226,192,206,192,199,232,208,221,222,216,216,206,166,193,183,207,207,210,193,220,215,184,196,175,205,188,206,213,188,224,184,206,197,202,185,197,191,207,174,199,214,212,208,200,205,178,164,211,207,206,193,178,196,206,213,194,203,213,214,200,213,207,212,177,216,206,189,185,187,174,194,200,190,198,194,208,170,187,202,211,196,210,205,173,202,174,203,199,172,187,211,211,184,185,203,209,171,179,191,192,166,179,200,197,189,199,187,213,190,210,176,206,204,198,199,212,207,189,200,195,191,208,203,187,194,217,198,206,197,187,224,216,199,198,185,181,183,213,191,200,204,203,190,180,201,192,206,201,202,159,182,168,187,192,176,200,163,169,232,188,197,192,204,210,208,195,198,193,187,212,207,206,206,184,180,194,178,211,212,209,206,210,186,212,190,185,207,204,188,157,188,156,207,206,180,155,153,120,97,51,119,151,185,163,169,164,160,140,141,167,138,160,152,159,134,138,169,187,137,153,154,160,138,154,158,133,146,138,130,139,125,131,126,163,124,148,167,135,109,144,186,151,134,130,140,117,116,127,136,152,118,165,220,208,142,71,111,130,109,134,125,120,153,125,143,119,143,125,156,108,65,100,113,131,124,79,116,141,127,109,105,102,72,79,92,76,152,102,35,50,70,23,17,24,54,44,40,46,43,66,153,246,249,224,250,160,113,89,83,115,98,96,65,91,123,127,72,88,56,20,33,29,12,64,139,142,138,147,131,149,122,129,98,62,50,76,78,63,68,80,73,37,61,31,41,12,84,81,81,38,33,109,107,52,70,106,110,109,109,109,51,34,84,86,62,64,58,60,31,30,7,42,34,49,22,29,34,29,10,29,19,18,12,2,12,11,18,21,16,18,27,7,7,51,38,36,26,32,17,0,21,23,27,9,19,25,15,29,16,20,44,36,31,24,23,9,29,23,34,52,72,36,36,9,13,39,10,32,43,23,16,27,30,14,16,33,32,13,38,7,15,6,27,9,17,37,29,41,14,33,26,16,35,15,18,10,35,13,34,8,11,49,178,181,191,149,172,198,145,77,23,23,23,13,37,32,24,41,93,95,54,40,87,47,137,135,63,52,54,37,47,37,52,41,54,64,98,54,39,33,62,92,149,164,101,111,55,63,116,67,27,10,23,24,5,38,53,125,160,133,155,130,136,153,156,63,19,35,40,12,14,18,31,19,6,29,37,3,10,29,21,20,13,42,2,31,14,11,32,39,12,30,24,22,7,35,26,28,15,18,44,26,13,31,30,42,34,55,54,27,43,54,73,57,105,106,77,115,119,127,98,134,109,133,125,112,152,157,151,113,130,186,155,112,122,126,125,94,103,80,45,31,53,34,29,11,27,28,36,20,36,32,49,39,43,24,18,29,40,28,76,131,94,64,66,40,35,34,29,62,43,66,60,162,211,219,205,225,206,214,237,213,233,237,209,241,197,221,114,4,1,1,7,9,19,36,3,45,15,2,12,2,26,0,23,27,16,17,2,27,20,23,20,203,235,209,194,200,187,200,218,180,207,201,201,191,189,217,224,244,250,219,205,177,211,211,201,186,213,201,185,200,191,209,195,211,185,193,178,186,202,179,212,196,188,195,186,201,200,203,211,230,182,198,223,200,203,207,220,203,187,198,193,183,208,210,209,203,192,204,219,200,198,191,197,199,225,211,187,216,201,182,227,196,196,215,192,171,204,196,194,220,230,197,217,189,214,210,178,208,190,202,206,205,188,226,197,182,175,199,192,177,201,183,204,212,196,190,201,183,188,204,193,216,182,197,200,188,183,176,218,170,185,199,175,174,197,213,200,197,201,192,188,192,195,175,196,198,210,209,206,190,217,188,211,194,185,188,193,210,195,197,200,209,186,187,186,186,176,172,201,191,191,182,192,193,198,196,188,189,214,190,190,191,202,196,178,213,181,174,179,204,216,178,199,188,194,206,202,175,207,185,193,190,188,201,169,192,172,198,205,174,196,185,210,230,186,198,197,197,198,188,196,168,191,175,113,64,74,69,50,74,63,60,78,48,53,42,27,19,10,45,106,76,28,6,14,17,15,8,19,22,29,16,17,14,44,5,7,25,1,4,14,3,2,12,23,57,0,10,4,9,33,12,9,5,42,53,17,10,7,1,7,15,2,46,59,46,9,42,81,43,46,69,59,64,87,98,110,145,136,195,224,173,147,89,55,4,5,19,10,22,19,7,20,3,25,38,37,54,9,44,54,27,36,21,95,134,117,126,106,100,42,50,31,0,43,30,4,65,59,61,44,31,21,29,56,42,45,49,199,237,254,215,225,190,138,126,98,90,84,67,67,111,130,76,100,70,39,24,47,40,98,127,143,129,125,135,124,146,144,118,81,49,55,68,68,69,46,55,14,46,77,44,46,33,60,74,35,50,129,234,126,14,36,55,111,72,51,94,48,40,52,51,76,46,55,28,34,26,30,17,22,22,3,13,25,23,22,7,28,31,21,7,16,28,15,16,17,20,35,31,13,36,11,47,12,47,24,9,28,23,37,14,39,7,7,14,12,14,24,20,55,0,15,34,22,13,48,59,85,47,56,36,12,5,16,18,34,34,21,52,15,26,26,41,3,29,15,31,27,25,22,14,20,27,17,20,17,24,13,17,43,28,9,6,17,25,27,18,35,31,49,95,66,60,20,45,49,56,25,19,10,26,19,36,48,63,138,84,38,44,59,58,108,147,49,39,19,33,19,46,85,87,126,122,44,39,45,72,61,28,135,169,121,99,54,63,98,96,56,24,1,23,9,9,38,74,131,172,141,145,116,160,174,59,21,35,17,29,31,32,42,15,12,14,16,11,15,8,21,45,33,23,32,17,38,2,20,31,34,33,15,12,25,27,31,21,13,26,18,61,29,41,24,9,29,35,39,41,41,36,23,32,19,33,28,45,59,75,77,84,84,101,113,99,72,80,77,77,126,100,64,33,65,49,48,61,24,12,29,20,16,28,25,5,22,4,31,23,29,48,24,40,7,27,27,22,25,50,51,46,47,62,56,19,31,56,47,42,39,6,55,186,226,208,246,207,220,205,220,235,212,224,231,218,225,208,106,3,1,6,0,12,34,20,11,19,13,25,25,16,1,12,4,25,16,45,15,40,36,8,19,211,210,210,196,183,196,195,208,199,206,210,179,198,188,218,177,246,246,194,214,214,192,179,196,191,204,214,189,185,204,215,176,212,219,222,201,211,185,201,209,196,224,210,208,204,227,202,196,210,209,186,200,208,227,209,213,190,204,203,209,210,190,189,211,203,233,212,219,199,230,198,202,207,226,205,197,206,196,188,212,190,181,205,175,195,200,204,182,170,216,213,218,210,176,216,188,194,209,209,192,201,196,202,209,172,209,192,202,213,208,194,187,195,200,196,197,201,214,218,204,208,190,212,191,193,196,177,194,193,191,222,172,189,219,198,194,197,180,182,180,181,193,196,207,215,169,199,182,190,184,219,192,192,216,190,163,216,202,180,202,213,202,194,175,212,207,190,178,188,200,196,191,166,198,215,200,185,191,179,201,196,192,192,209,199,192,207,184,203,200,193,197,205,210,198,189,186,190,202,217,188,176,200,182,194,180,193,214,194,182,198,190,192,199,181,191,210,220,210,200,202,188,181,118,85,73,76,70,32,68,65,77,78,57,77,74,55,65,57,77,95,79,63,63,73,67,57,57,54,27,56,57,36,30,47,44,33,35,34,33,62,57,55,51,104,48,37,39,67,42,41,40,53,34,94,52,21,34,40,18,44,35,87,100,42,5,16,73,56,36,62,82,79,45,77,91,154,167,208,179,137,99,38,44,31,13,15,6,58,34,10,25,15,9,12,24,39,6,25,41,9,19,33,87,109,104,157,131,118,76,49,36,17,61,27,20,66,75,75,47,30,22,36,32,61,38,56,183,184,191,157,160,178,152,122,109,70,38,90,121,106,91,62,64,62,7,34,59,89,128,116,118,149,119,129,118,115,80,108,37,56,74,70,126,59,41,4,28,119,131,46,49,31,43,11,38,81,168,200,100,45,81,80,55,34,77,62,46,68,48,10,32,33,25,50,51,34,7,34,38,64,30,11,24,17,25,33,18,16,39,18,22,16,32,38,17,26,41,22,2,30,13,10,26,18,6,25,33,4,9,12,5,16,17,7,12,42,7,28,16,23,40,28,28,29,29,67,62,56,28,6,13,13,3,25,25,25,31,15,23,4,36,17,24,12,22,23,31,15,23,31,10,28,13,19,2,13,35,17,11,27,19,34,4,28,21,11,14,32,59,46,52,39,45,37,26,19,25,25,41,15,25,39,34,93,122,65,51,50,61,66,73,86,28,52,9,17,22,30,91,123,130,65,42,14,9,5,59,72,90,130,131,101,47,69,62,126,88,37,23,8,2,37,16,48,62,96,80,85,102,52,52,54,30,51,11,19,33,24,25,30,31,18,18,28,16,11,23,21,27,7,19,18,28,13,56,2,20,22,10,15,5,35,37,18,4,5,6,9,35,53,49,28,36,47,48,63,78,59,41,11,32,16,33,24,49,16,50,14,32,36,26,61,50,45,48,20,27,42,25,40,46,43,43,49,36,18,8,32,16,19,29,20,22,33,14,14,35,21,27,18,28,35,23,49,38,12,6,20,43,46,69,34,34,33,43,46,25,38,144,243,236,209,214,216,236,199,237,213,203,219,239,232,209,214,90,11,11,10,18,2,3,25,13,17,41,31,14,2,6,12,8,20,3,3,2,14,17,20,8,210,192,221,194,206,217,206,211,204,227,217,206,194,224,220,203,250,254,217,207,213,205,184,215,202,178,211,198,194,189,203,184,192,197,213,195,196,207,216,205,217,172,198,193,207,207,196,183,185,193,170,208,197,192,199,190,209,200,202,211,219,203,197,203,190,194,228,209,219,196,188,206,181,196,190,208,201,182,210,190,205,210,194,202,160,186,214,201,190,201,206,209,215,177,201,183,176,182,196,180,205,207,198,196,170,217,195,179,211,186,195,196,184,198,196,186,194,197,196,201,188,204,204,206,197,214,213,201,191,176,190,187,213,187,179,200,193,188,187,181,182,178,197,202,217,204,185,196,181,219,191,211,195,196,191,199,183,204,194,203,208,198,210,182,197,195,183,197,208,211,168,228,191,186,167,196,177,220,183,201,213,195,198,174,196,192,221,187,182,206,210,233,200,204,194,188,208,198,196,211,192,202,196,172,214,190,199,200,189,191,179,205,185,194,174,190,182,208,174,211,196,183,201,197,184,207,193,213,177,213,206,216,209,208,211,196,215,185,180,167,194,223,226,211,208,214,203,216,213,187,227,230,208,203,231,217,223,211,186,224,221,224,210,220,221,198,211,241,218,212,209,241,226,214,205,201,211,230,189,227,202,190,205,197,161,161,178,192,173,173,172,136,141,122,105,96,157,204,203,162,69,73,45,73,160,183,187,186,173,140,158,175,103,166,144,147,115,111,180,142,156,138,113,89,72,132,134,114,83,19,92,77,97,95,70,52,59,61,42,12,17,4,36,50,49,45,114,202,116,84,99,60,85,110,133,79,32,71,134,124,83,86,42,10,21,14,62,122,140,117,119,94,127,102,125,107,113,40,42,57,38,34,80,76,49,36,61,132,208,207,125,80,45,16,63,143,160,64,84,90,55,75,63,66,36,70,41,44,82,43,57,8,28,45,50,65,38,23,25,34,44,37,22,11,30,22,35,22,30,22,12,12,26,31,8,8,6,28,4,19,22,18,30,11,24,22,12,8,19,5,19,13,2,29,24,23,25,34,19,14,13,33,39,26,44,70,119,67,46,28,15,16,40,16,5,2,1,27,13,29,16,23,4,17,26,32,27,49,14,21,48,35,13,1,30,14,25,21,47,25,21,17,18,30,12,29,28,34,20,50,46,49,10,50,47,13,20,7,13,4,15,4,41,50,102,121,63,82,68,53,59,85,67,47,26,55,35,40,65,120,154,106,52,17,42,12,25,44,72,88,107,117,68,40,67,94,110,107,57,31,13,28,10,2,38,49,74,43,17,17,35,29,12,24,34,15,22,28,44,30,30,14,22,7,38,25,26,24,22,13,20,28,6,13,23,50,17,7,35,26,28,13,22,33,16,25,24,31,41,23,36,6,21,49,90,131,107,56,62,25,29,28,28,42,34,25,22,36,31,16,15,10,34,19,23,34,28,27,23,47,57,44,48,38,27,55,9,13,52,38,6,4,25,20,25,16,27,24,28,22,30,15,39,15,29,20,44,23,27,50,41,86,49,69,62,55,40,88,165,234,250,210,218,209,222,212,195,216,212,199,218,227,209,222,234,116,12,0,9,27,4,24,40,12,33,17,22,1,5,14,14,8,8,11,23,8,5,12,35,16,199,229,201,188,210,193,221,209,194,203,189,198,203,206,206,192,243,219,214,194,205,184,203,202,208,205,193,204,189,200,214,212,204,183,209,211,218,215,231,192,220,196,213,197,205,218,211,221,193,215,202,221,190,185,212,206,207,179,229,200,187,208,185,199,203,199,199,200,207,208,193,210,192,213,212,191,223,205,198,202,196,190,210,194,236,185,185,194,182,198,219,207,219,206,203,210,206,210,209,195,184,209,189,185,189,191,213,176,195,206,202,196,182,195,218,223,216,199,199,210,201,192,197,176,190,209,213,202,198,172,223,217,197,214,207,183,192,186,172,215,183,170,220,215,200,178,187,207,198,200,185,181,192,202,204,218,172,174,207,190,182,187,194,204,184,185,193,191,171,180,181,210,202,218,190,191,197,235,191,192,196,195,167,178,187,183,203,185,189,204,215,202,184,195,175,195,217,218,210,190,201,194,175,181,188,206,180,211,176,213,192,206,203,191,181,210,184,184,194,214,209,180,223,226,226,212,222,223,247,252,236,255,214,233,245,238,247,224,217,242,187,239,247,245,246,236,247,246,237,248,243,255,245,247,249,232,241,239,237,241,218,234,227,247,249,229,237,252,248,232,235,255,241,247,249,253,239,251,246,241,247,249,246,241,223,218,248,231,202,189,194,180,182,180,130,137,158,173,141,111,64,33,47,186,247,245,243,253,255,211,251,253,216,247,226,220,177,202,246,248,204,178,211,167,123,151,128,86,58,29,111,217,198,160,161,122,74,68,25,33,16,10,34,53,48,57,181,182,84,76,89,57,35,96,84,18,53,132,132,77,78,48,25,29,42,74,104,115,120,106,112,106,102,124,129,146,103,46,33,53,40,19,55,29,39,57,108,174,196,162,92,43,3,9,104,229,101,18,48,73,95,59,91,81,63,34,35,41,64,17,20,54,59,34,104,72,68,40,40,10,55,24,40,37,2,6,28,54,14,32,5,35,22,31,23,11,37,23,27,23,19,51,13,23,16,10,18,16,31,43,23,40,8,9,7,8,4,13,15,15,32,19,59,36,36,78,145,98,52,47,50,27,13,14,5,14,24,47,17,3,30,18,20,34,33,35,8,25,6,38,27,33,17,16,12,2,40,24,43,17,22,21,40,21,21,21,58,36,36,80,56,50,37,53,35,27,58,1,20,14,23,19,28,58,131,124,93,88,90,110,57,52,39,27,18,40,23,19,74,182,186,97,60,62,39,13,35,20,68,112,95,67,46,144,134,71,135,150,56,23,3,19,29,55,36,66,49,20,29,34,15,43,28,13,30,16,35,29,27,27,31,18,9,17,18,25,14,30,21,11,39,10,6,42,27,22,13,12,21,44,25,9,27,10,18,40,20,12,10,19,31,15,26,100,113,100,75,54,56,58,12,20,17,40,42,17,25,65,50,12,22,26,39,13,41,34,29,24,22,50,54,37,55,41,19,15,27,21,9,7,30,21,15,46,21,45,37,40,30,34,20,40,33,41,38,55,97,25,21,23,46,56,79,55,43,89,101,218,238,244,235,230,227,230,194,210,225,197,204,201,212,231,201,224,216,110,10,14,11,17,15,3,27,2,7,20,8,14,17,22,20,41,9,28,11,5,10,14,28,22,191,209,207,201,206,213,181,219,200,153,213,212,208,212,190,187,225,236,188,209,227,190,219,188,207,212,189,200,210,195,243,204,193,208,218,197,204,184,204,200,226,196,211,208,208,204,211,221,207,209,187,189,205,194,190,206,226,196,205,203,197,196,198,206,203,203,212,191,206,202,206,199,209,207,196,189,191,199,204,174,187,208,204,228,196,198,212,213,208,192,186,198,174,200,189,210,196,199,198,204,201,207,197,198,200,177,186,208,198,186,199,202,188,203,203,234,191,212,203,202,180,227,210,212,234,197,196,189,194,179,208,195,213,199,185,196,216,178,185,198,202,192,181,200,209,200,183,207,183,190,171,187,214,204,220,200,187,178,203,196,197,204,203,188,194,187,207,194,196,200,198,179,177,189,187,194,182,186,195,182,174,181,181,199,198,202,190,205,187,214,171,189,195,217,182,182,175,194,198,172,208,194,198,202,188,189,188,174,171,198,207,200,189,169,190,197,194,207,193,211,196,211,205,206,199,224,213,220,211,226,206,214,194,225,201,202,201,185,211,209,198,193,212,211,230,183,219,201,216,209,198,207,221,182,212,226,204,212,224,226,220,208,206,218,210,223,227,217,203,221,210,216,232,228,192,199,191,209,208,198,206,242,224,228,186,206,227,206,176,188,170,196,170,173,116,154,192,197,123,93,35,67,136,186,242,199,222,215,226,184,218,179,198,217,171,152,161,211,245,183,155,182,223,181,132,118,93,78,61,66,125,250,205,224,186,77,45,24,57,88,62,44,48,100,39,155,229,225,111,123,99,30,15,19,73,70,135,107,83,82,47,14,20,45,45,103,133,124,121,140,112,93,149,148,111,147,105,91,43,33,35,39,37,44,19,90,166,198,183,88,47,27,68,32,60,147,95,59,69,61,72,73,63,101,45,42,47,41,52,25,29,45,84,92,71,53,76,76,41,56,54,24,35,4,38,5,34,15,38,51,16,23,32,10,5,31,39,47,18,20,30,11,33,21,32,37,27,22,5,10,10,21,18,11,0,23,2,11,14,37,18,22,13,21,18,59,98,77,15,24,13,2,2,21,9,6,31,30,26,22,32,42,37,22,21,28,19,5,36,18,11,2,40,33,19,14,12,26,34,21,43,37,34,30,9,17,4,43,22,62,108,68,89,84,69,38,19,31,28,23,28,35,43,52,156,98,82,134,119,106,36,30,27,33,20,39,85,94,75,98,42,78,53,80,32,25,44,27,69,71,58,56,101,172,101,65,129,146,92,9,2,46,24,38,56,69,61,47,59,39,24,46,7,26,16,31,7,21,48,19,28,36,28,19,33,26,28,21,18,13,36,16,39,17,13,15,12,9,38,29,17,29,22,32,27,18,35,18,32,18,34,22,43,99,79,63,92,81,60,27,25,18,15,20,12,22,6,2,10,24,21,11,23,18,3,36,27,44,4,29,22,60,17,29,23,21,25,11,11,27,32,32,24,16,36,11,5,20,28,29,15,26,50,32,45,159,109,60,27,30,46,43,95,86,98,75,75,196,230,232,233,219,213,215,242,187,220,191,186,232,213,213,221,219,217,116,19,15,15,22,13,16,2,42,32,52,1,2,0,17,0,31,12,6,12,25,9,0,35,8,202,180,193,184,184,200,209,216,208,199,207,210,190,195,204,177,228,206,156,171,211,206,215,215,230,191,216,230,210,204,204,195,206,220,196,210,206,197,184,206,189,192,203,201,209,211,192,189,181,181,183,176,201,191,207,193,164,209,204,183,195,190,197,202,199,188,191,174,206,197,220,189,192,188,203,187,218,210,169,195,208,202,174,206,205,224,200,221,201,213,197,193,200,213,212,213,209,213,221,201,196,203,207,201,184,190,208,195,192,208,176,178,174,200,166,199,203,196,172,177,174,190,204,192,180,199,179,198,206,196,196,179,193,222,209,189,180,191,201,231,201,173,173,182,181,196,194,178,197,213,184,194,210,184,168,196,200,177,220,174,194,200,186,233,185,186,203,177,202,194,201,204,193,212,185,217,190,204,205,174,197,191,177,181,190,186,199,182,191,174,196,212,192,173,178,182,183,182,189,191,192,180,191,172,175,182,199,193,216,222,190,198,215,178,189,163,189,183,211,194,199,204,193,184,188,204,194,172,202,208,214,207,189,208,205,199,187,202,197,203,186,165,151,207,222,167,200,195,181,204,200,192,180,184,205,176,177,201,194,184,179,212,209,179,212,220,176,190,198,191,179,193,171,199,172,208,213,220,183,192,206,186,220,178,178,191,220,174,122,139,156,209,191,152,125,162,213,192,159,62,19,94,193,243,229,206,204,197,206,190,196,158,160,185,141,147,181,221,170,61,91,190,237,201,138,135,78,62,78,83,118,211,156,135,130,77,42,16,56,156,58,68,59,79,156,242,248,187,69,91,53,6,33,38,70,97,63,65,63,36,46,22,39,66,84,110,125,103,118,124,104,119,107,100,112,103,122,92,64,49,26,31,19,28,93,150,151,149,103,72,109,126,113,86,41,55,72,83,71,46,73,70,39,72,22,37,45,36,38,13,20,44,54,89,69,48,38,81,84,105,109,78,63,20,28,21,34,11,22,23,41,29,33,24,15,44,41,21,30,46,41,35,23,15,25,21,9,25,13,7,45,9,23,8,25,2,20,21,22,25,15,23,28,35,41,69,71,53,24,17,33,4,15,27,44,14,14,21,16,27,10,9,26,34,22,31,38,23,21,6,18,29,11,14,16,9,17,21,19,34,20,8,23,25,31,24,56,47,73,105,127,138,163,116,81,53,23,22,21,24,26,22,23,56,121,138,146,110,152,149,94,44,9,16,28,102,141,99,100,50,54,46,57,64,92,74,52,21,12,82,96,83,103,155,125,107,166,176,80,21,3,20,45,33,52,57,38,54,86,72,52,36,44,23,26,35,28,13,30,24,20,38,38,23,33,8,9,20,43,10,19,26,35,20,26,14,27,26,27,23,40,3,32,34,30,36,24,34,13,17,22,15,88,64,63,124,114,100,55,40,21,28,25,13,20,40,46,46,12,34,20,9,6,21,6,10,16,16,41,44,26,60,22,1,42,8,14,58,10,33,21,35,20,12,17,33,43,50,24,29,27,36,26,32,131,199,80,78,92,66,80,74,79,72,76,48,28,141,231,219,239,192,228,198,182,184,193,198,227,217,220,227,211,213,223,119,13,13,6,16,21,10,9,19,11,6,21,5,29,13,8,20,31,11,30,9,11,0,10,5,198,217,214,196,191,216,208,214,209,188,171,188,215,200,231,215,239,237,194,199,182,203,200,190,185,190,201,164,202,186,211,177,193,216,205,212,201,219,189,216,206,200,195,208,192,189,204,198,217,202,203,204,213,200,189,220,211,195,189,206,223,181,199,204,187,217,210,210,199,188,226,182,205,209,197,210,216,198,203,189,207,203,183,178,208,187,179,207,217,197,226,185,201,191,180,192,206,204,192,195,198,195,188,197,179,197,205,198,186,183,201,185,162,205,205,201,182,184,199,185,203,190,197,197,196,207,201,200,207,198,187,203,220,198,212,229,182,192,224,184,204,190,215,166,185,223,233,208,218,215,206,222,199,190,193,225,228,192,200,155,203,192,197,176,194,195,187,192,203,192,188,226,205,214,192,190,167,202,188,167,188,195,198,167,187,211,192,173,220,209,196,173,193,206,196,200,198,182,187,202,201,180,200,187,195,200,199,191,187,187,199,196,210,212,214,188,201,214,201,172,201,196,180,220,193,202,194,211,170,199,194,197,190,179,195,210,209,184,203,193,204,192,164,179,211,201,185,187,209,190,191,199,181,175,230,198,221,197,188,173,201,200,192,208,193,182,223,198,195,180,177,196,193,189,179,206,182,187,199,199,190,200,172,193,162,184,205,168,100,104,143,168,184,122,135,218,233,187,129,37,91,196,229,218,232,209,209,172,183,174,184,119,190,184,168,186,227,218,108,40,125,176,154,177,159,90,100,102,86,75,133,153,100,68,73,38,19,23,8,74,77,85,62,143,242,237,222,99,53,59,0,40,10,71,113,85,64,47,51,34,35,48,66,89,154,125,106,123,95,108,128,128,107,100,109,123,132,94,119,127,51,37,22,126,131,202,178,103,149,185,156,146,117,55,36,74,46,60,72,67,94,83,52,42,36,26,41,67,56,27,40,37,77,65,52,60,101,96,144,125,142,111,63,23,34,30,7,63,35,50,20,8,17,25,60,35,24,25,42,31,33,34,30,28,31,33,12,3,19,1,12,29,25,32,17,14,12,27,36,16,24,11,18,87,106,121,118,47,85,54,42,26,8,13,33,30,30,10,31,15,29,11,16,13,29,39,29,21,19,34,40,29,22,23,17,22,22,58,43,37,41,24,36,21,30,27,73,59,118,189,220,149,146,132,101,57,32,16,11,9,14,10,30,24,73,104,129,150,129,152,104,52,43,17,6,96,183,149,133,73,44,55,100,116,156,99,20,26,9,72,101,97,128,133,132,134,172,159,30,13,13,12,21,54,45,102,106,129,172,128,86,66,33,24,18,31,27,16,34,47,19,30,11,29,24,35,20,22,34,51,23,34,14,12,20,27,14,27,32,30,8,57,24,16,21,14,28,39,39,5,24,31,54,47,69,116,134,74,40,26,6,8,11,9,33,53,20,40,30,51,19,16,13,21,27,53,20,15,50,43,62,29,6,29,28,17,27,43,2,34,14,37,14,16,25,23,9,26,39,28,21,38,30,96,250,167,97,148,119,68,33,48,52,72,48,60,43,130,207,186,228,212,199,219,226,218,185,237,208,221,231,217,214,209,209,127,6,1,0,8,19,16,25,18,17,11,15,3,1,21,15,27,21,11,20,17,7,23,8,0,215,193,179,200,223,191,199,174,209,205,212,198,180,202,188,195,250,235,174,202,204,202,215,204,205,193,200,201,205,200,211,217,202,227,182,208,203,198,220,190,202,205,192,210,196,201,205,192,208,196,187,176,221,189,219,202,188,198,201,197,211,202,196,213,197,230,199,200,205,211,202,214,190,229,209,212,195,203,199,185,181,202,189,211,219,224,211,203,197,192,165,200,205,200,210,172,185,187,193,180,203,202,211,186,194,197,175,207,196,176,187,189,206,199,182,208,176,195,186,194,197,189,179,203,178,174,185,204,186,189,186,188,218,187,222,197,199,189,212,190,224,201,195,195,189,181,156,174,174,189,179,207,190,214,172,199,203,215,189,215,183,192,204,202,205,194,198,205,211,200,177,209,186,214,192,201,200,179,207,215,188,183,189,190,175,191,184,224,198,209,181,190,178,213,218,192,197,169,187,196,203,191,211,190,182,197,188,202,186,199,208,200,215,208,180,174,206,194,187,209,204,213,234,185,192,182,167,193,196,207,189,196,181,185,181,185,175,193,199,187,209,216,181,173,193,212,198,186,186,190,198,206,176,199,207,215,209,196,198,171,217,197,196,175,171,219,191,200,224,201,222,178,206,195,200,194,198,177,199,204,198,184,196,188,215,209,154,165,134,95,143,159,122,77,180,190,203,187,143,134,186,224,214,214,177,193,187,196,195,174,160,120,189,214,131,163,187,171,81,21,99,106,129,169,144,102,95,95,83,125,122,79,59,37,24,90,41,24,50,38,48,59,150,215,237,193,99,102,90,46,19,32,28,88,83,53,44,57,26,20,22,54,57,116,125,110,130,108,87,89,103,116,114,119,112,114,118,123,133,71,43,37,64,183,195,149,86,72,123,147,102,81,49,49,29,35,40,28,66,69,55,58,24,83,62,24,89,72,55,36,48,47,91,121,127,115,113,119,122,126,115,86,63,46,12,30,35,1,72,39,38,25,28,54,27,32,35,23,25,22,36,22,27,32,26,39,42,67,39,35,18,11,20,21,27,26,8,34,20,8,15,23,145,210,185,179,116,132,139,122,89,27,11,45,24,25,35,22,29,35,8,18,24,26,18,25,13,6,24,28,31,18,6,54,21,29,19,7,5,36,44,17,55,35,27,53,75,68,112,154,155,120,142,114,80,27,44,21,20,15,24,42,22,24,76,82,83,82,110,140,71,57,38,29,22,140,154,97,97,84,43,111,148,130,146,61,36,24,21,64,91,119,108,137,118,134,152,95,39,23,18,12,11,40,139,158,193,167,152,164,140,66,36,41,29,4,7,18,16,17,23,27,28,21,34,47,56,41,46,14,13,24,31,15,47,26,16,18,44,18,48,9,36,34,53,19,27,42,7,49,30,20,39,57,72,92,79,50,23,18,27,2,6,9,24,21,25,27,20,35,8,23,38,31,34,29,35,13,27,32,22,14,6,22,11,42,43,38,16,15,11,45,11,23,30,22,54,7,17,22,67,10,79,238,228,118,98,178,138,107,59,38,48,15,88,55,35,156,223,200,226,226,221,217,229,226,212,211,205,222,206,202,214,176,220,115,0,0,8,8,25,15,13,39,17,10,14,13,17,2,7,20,6,18,28,13,31,5,7,12,198,188,213,203,192,189,186,195,214,160,209,175,201,196,200,208,241,252,230,204,220,201,193,195,201,192,180,187,185,195,184,192,209,201,216,212,206,201,190,189,195,199,185,196,213,194,199,190,207,212,192,199,183,193,202,210,218,175,194,221,204,213,182,229,198,203,193,180,176,186,200,176,202,202,210,174,190,192,193,180,193,199,205,189,194,196,206,210,201,199,203,194,202,207,193,196,219,191,181,189,205,198,177,208,191,178,205,187,181,177,189,210,203,216,189,205,189,181,194,191,181,220,225,211,208,232,208,212,175,212,189,206,203,202,214,192,201,190,213,198,183,210,222,208,210,196,201,180,210,202,196,198,202,186,186,184,204,191,165,183,189,215,174,198,176,181,194,190,181,200,192,201,198,199,193,226,200,207,198,188,188,190,188,194,214,191,200,189,191,192,189,194,220,191,203,179,180,187,189,185,185,193,183,207,197,184,207,184,205,186,198,215,194,182,201,189,166,186,219,176,205,192,198,195,195,196,202,202,202,203,199,184,189,193,196,190,193,202,195,185,184,191,200,186,182,185,186,209,224,213,201,190,205,203,190,212,191,208,224,195,185,170,213,205,193,186,202,195,204,195,198,205,188,192,169,174,179,182,200,201,198,200,204,204,198,174,139,185,130,182,168,195,122,106,167,242,207,205,214,198,199,228,193,216,217,196,171,182,196,160,168,170,161,98,104,166,203,147,85,38,23,49,168,191,176,108,104,69,89,134,90,64,7,49,121,82,53,57,73,63,74,158,217,242,185,90,89,107,132,79,31,87,137,117,90,59,21,32,7,49,49,60,113,100,105,102,104,118,111,112,88,109,110,106,133,95,110,90,79,54,20,33,152,200,149,91,0,62,104,96,99,44,62,86,17,49,30,52,58,34,70,53,27,39,51,36,45,82,62,34,48,108,132,131,135,113,112,109,108,95,85,67,18,37,20,15,26,36,43,42,34,23,51,6,53,39,27,42,24,27,34,25,23,12,38,41,21,39,26,18,28,24,26,21,22,35,15,13,2,8,34,30,112,180,186,205,192,184,194,197,101,22,27,7,30,12,23,26,27,7,17,13,32,45,26,15,19,20,34,38,37,31,44,55,23,20,39,50,4,42,7,30,28,61,24,29,92,69,75,82,143,109,122,100,50,68,38,7,20,12,14,13,5,42,77,89,91,83,119,101,52,44,32,44,42,107,86,49,80,119,52,93,142,68,54,37,30,46,24,54,83,96,109,93,136,71,143,107,42,13,8,2,13,60,108,216,152,145,156,117,104,60,16,37,16,9,5,16,25,18,51,30,19,28,22,32,33,31,30,25,41,16,42,25,24,58,11,34,38,34,31,36,13,7,35,35,22,29,26,5,39,54,55,79,112,88,84,57,34,40,33,20,39,18,26,29,11,22,38,25,39,30,44,59,22,6,36,5,56,24,15,30,28,35,16,39,22,15,25,46,41,20,19,17,41,52,35,60,37,28,20,67,208,238,172,62,67,140,127,53,66,39,17,45,61,29,39,173,217,219,220,219,240,221,225,230,192,208,223,219,223,214,215,207,218,90,13,2,8,12,9,15,33,22,21,7,25,30,31,30,5,3,5,18,7,17,16,8,0,7,186,202,214,201,219,223,161,218,193,180,192,194,203,215,193,174,245,253,249,206,197,233,177,186,180,190,208,189,200,214,194,180,216,185,181,190,207,188,210,219,238,195,182,222,176,189,207,190,194,184,208,203,174,182,180,184,179,202,206,205,158,172,179,228,210,228,202,216,218,209,205,196,199,191,218,203,174,203,202,186,204,184,207,199,169,202,218,200,208,201,168,198,200,190,197,203,219,215,204,210,180,190,207,208,171,208,193,198,191,213,180,205,203,192,169,210,194,195,238,189,190,199,205,210,213,206,187,207,223,204,207,203,181,199,184,191,179,154,197,204,183,186,171,194,213,205,186,198,194,175,189,196,201,179,205,208,195,197,195,190,176,203,201,189,187,179,195,193,176,168,194,171,180,224,199,214,192,192,197,198,210,227,183,190,211,194,197,209,228,198,164,200,197,191,185,176,196,220,181,185,209,173,191,177,196,221,226,191,201,209,197,194,196,205,204,201,197,214,195,197,193,214,203,191,181,187,195,201,212,211,183,238,161,201,194,203,179,205,199,184,206,214,190,199,207,178,197,211,192,204,205,224,207,196,198,176,180,211,195,185,207,198,192,212,205,199,174,212,213,190,197,197,202,207,176,209,209,186,181,212,192,167,187,190,194,160,137,159,162,161,217,202,126,142,199,206,193,199,179,220,222,208,213,215,178,191,178,206,207,145,182,158,123,23,143,232,224,135,51,20,20,117,214,218,142,87,96,43,83,75,49,13,32,209,195,80,27,86,35,117,176,237,248,150,95,79,97,108,70,125,115,143,113,98,84,45,39,34,31,59,97,118,116,122,116,95,97,100,110,84,104,103,116,110,106,119,85,63,73,40,23,46,140,122,60,20,15,69,157,142,73,45,103,75,35,29,56,26,46,44,54,67,34,40,39,14,29,57,79,77,144,161,193,148,115,105,98,69,84,49,51,39,17,21,8,25,48,34,24,38,38,16,43,40,37,39,44,51,20,22,29,50,36,27,21,43,38,45,23,32,41,20,27,23,28,48,38,20,29,18,45,22,61,111,123,134,139,137,143,87,77,17,17,61,9,14,33,26,41,22,45,12,3,42,18,34,9,11,14,24,39,42,26,23,30,11,22,22,34,13,2,20,22,30,57,35,70,48,67,85,94,117,79,72,77,46,35,23,17,25,28,2,17,47,59,88,140,119,100,102,100,33,22,18,46,67,101,91,122,133,126,140,120,72,58,42,22,11,34,76,85,118,118,136,121,164,186,91,14,17,41,29,22,52,117,135,184,124,113,119,108,77,56,54,34,34,32,31,30,32,20,42,22,33,41,51,40,31,33,17,37,20,50,22,51,19,27,36,46,27,9,6,25,29,53,30,27,31,17,23,19,90,119,192,145,118,91,82,71,65,41,8,15,9,26,44,5,24,20,22,10,10,14,32,29,32,19,15,18,22,45,21,27,14,25,41,42,15,16,4,27,17,27,52,21,18,46,20,28,54,65,114,231,222,102,51,46,58,57,67,69,47,51,42,46,15,112,236,218,207,204,219,229,205,228,202,213,223,217,202,224,217,219,195,217,109,5,0,21,11,8,20,21,27,16,33,30,26,10,0,2,4,21,19,7,17,5,2,0,12,218,195,206,199,193,214,204,205,204,180,199,193,179,202,231,212,249,243,189,201,200,187,206,179,205,205,196,184,212,207,212,205,197,202,189,197,192,191,206,210,201,216,194,175,189,200,204,217,202,166,201,207,193,184,202,196,184,181,199,207,207,211,197,187,200,221,198,209,223,182,206,212,177,185,201,198,189,194,209,200,233,202,188,218,212,216,173,188,195,205,219,192,175,195,191,191,208,192,168,190,199,185,189,205,201,165,225,196,180,205,217,202,202,192,202,198,195,201,207,198,195,210,192,208,215,197,175,193,206,213,182,226,208,182,189,190,178,190,188,201,211,210,193,217,180,201,199,201,194,188,197,201,187,195,197,188,193,187,177,195,181,174,189,184,183,197,200,208,190,203,163,181,205,211,184,176,183,177,186,190,184,193,185,173,200,199,185,194,169,200,195,165,197,214,184,184,190,163,205,195,199,189,178,189,178,192,217,185,189,199,203,180,216,195,171,194,217,181,212,195,205,195,201,192,209,174,171,189,217,181,194,187,235,201,208,211,207,215,196,220,195,176,205,209,186,155,205,185,195,232,209,189,182,196,192,201,202,181,186,167,176,198,222,186,176,194,152,169,205,191,178,214,212,185,184,203,206,180,228,203,192,197,166,192,198,153,108,121,160,172,206,166,133,170,226,201,195,190,206,230,188,211,193,199,180,187,207,224,185,149,167,163,105,90,196,228,152,85,40,10,51,174,217,198,117,56,46,40,57,43,25,44,99,203,138,53,49,76,118,234,248,243,118,72,93,109,98,48,103,154,141,143,99,98,46,24,22,53,80,118,107,124,131,109,117,86,121,140,118,106,85,85,88,94,84,74,115,94,37,33,35,100,119,52,17,56,117,122,129,73,88,79,71,61,42,48,27,78,45,66,66,45,46,55,17,1,15,45,109,157,153,191,163,109,112,110,83,18,12,38,38,61,42,26,34,12,13,42,32,28,25,35,33,40,32,37,40,38,32,41,48,10,36,12,43,20,42,28,29,35,37,27,46,30,29,23,31,35,46,25,39,23,47,74,29,28,30,41,38,29,37,8,26,27,19,32,42,25,53,14,25,28,23,30,12,26,17,6,15,19,23,50,20,42,48,17,17,13,37,18,33,31,38,22,14,35,70,77,83,73,109,63,48,66,61,66,34,32,17,20,6,23,67,43,34,80,81,85,106,132,85,71,57,38,19,39,75,108,131,111,88,85,96,71,117,54,39,30,40,117,104,145,85,106,143,146,134,42,29,9,11,11,51,39,102,166,117,116,79,75,59,67,45,36,25,35,12,29,25,25,9,21,25,10,43,41,29,42,38,29,47,36,18,33,43,4,24,30,15,27,15,17,55,10,36,26,23,44,26,36,32,98,188,177,207,158,125,141,165,139,35,7,14,27,28,34,40,20,18,7,37,41,26,20,23,15,8,49,21,25,35,14,53,28,10,24,22,16,26,12,32,22,12,27,40,19,46,29,39,40,83,141,186,131,97,99,38,48,78,63,63,31,38,28,32,124,209,240,233,214,211,217,224,210,202,216,190,219,219,231,221,207,222,196,215,95,5,14,9,27,6,14,29,28,3,14,30,12,5,4,7,22,8,13,20,22,7,13,28,10,206,194,209,186,201,218,205,187,197,198,199,194,211,188,199,175,254,238,151,202,191,187,180,180,228,193,197,208,192,201,202,213,191,199,197,204,202,186,207,202,187,222,185,190,200,198,202,197,158,190,196,182,204,184,217,188,200,192,182,226,195,173,206,202,217,204,183,192,209,191,202,202,211,216,195,217,202,196,181,217,210,177,167,183,186,171,203,193,186,219,220,240,188,197,191,166,216,178,202,202,202,201,182,221,172,221,171,188,207,187,193,207,202,195,207,214,204,184,207,181,214,199,186,192,226,214,203,221,197,210,219,187,201,198,183,195,213,203,212,200,193,186,186,218,193,175,191,207,194,182,171,207,216,192,195,186,183,192,193,197,180,189,196,175,205,200,199,191,181,210,178,209,191,186,188,195,226,195,173,184,204,205,201,177,189,163,184,152,209,206,172,195,177,191,201,187,189,176,205,190,172,197,195,177,172,203,192,178,193,193,194,195,196,202,203,207,221,186,197,190,223,185,208,201,193,178,192,198,202,220,201,183,172,216,190,223,198,225,210,210,181,216,217,186,217,173,182,193,190,218,165,183,178,206,207,194,206,204,163,201,189,192,213,179,200,196,169,203,200,181,212,169,182,198,176,210,206,216,206,178,175,193,204,190,153,146,114,135,173,156,196,141,104,171,202,174,200,174,194,229,203,193,178,206,185,180,218,200,165,162,223,178,94,55,140,118,62,53,90,62,93,241,246,158,113,63,70,31,46,41,11,20,91,88,86,52,34,108,196,253,244,128,79,79,89,63,52,69,91,126,62,84,69,29,40,12,44,78,132,163,130,121,106,122,98,127,98,97,122,115,145,122,90,64,64,102,110,129,44,34,56,112,60,13,46,161,159,97,93,125,105,84,72,113,30,33,32,7,44,17,58,55,39,22,30,29,40,55,56,147,160,130,127,97,91,50,34,50,36,39,61,50,42,28,37,15,28,16,46,18,43,38,21,36,55,33,4,29,12,55,36,35,43,9,17,59,33,30,31,21,35,27,32,19,12,64,38,20,27,13,17,18,42,43,42,35,42,20,39,15,36,3,16,15,22,26,35,29,37,36,2,28,24,19,15,34,16,26,12,17,1,46,27,43,42,21,34,31,43,19,18,11,38,24,34,43,60,56,83,81,63,92,45,46,44,74,45,49,10,19,24,22,44,37,31,76,91,106,134,151,108,60,35,22,8,8,9,35,87,58,30,66,34,60,54,26,39,45,79,135,92,177,104,55,53,83,57,21,22,31,25,24,41,35,109,45,110,70,52,52,41,85,43,32,25,10,28,19,34,43,23,26,18,13,27,40,36,47,22,23,68,39,43,30,19,23,21,31,37,39,26,22,24,12,32,7,43,29,23,33,34,75,127,141,184,170,168,187,176,102,55,35,19,23,35,10,16,19,14,34,32,22,30,40,11,9,49,26,25,21,41,11,11,20,21,21,12,35,19,17,24,30,27,61,31,29,13,47,31,46,107,183,188,134,147,134,97,99,76,55,67,55,23,21,126,229,249,253,225,227,204,231,199,206,209,190,202,215,201,197,209,202,200,200,217,114,7,23,5,12,14,3,3,27,1,16,15,17,12,4,15,15,4,12,16,18,29,25,28,28,203,196,201,197,184,187,203,219,183,204,214,208,191,184,207,194,243,227,170,225,200,227,207,193,183,202,208,199,215,198,203,209,211,199,192,207,226,231,202,190,217,218,191,187,185,187,206,207,192,181,192,195,214,190,207,195,225,184,194,189,205,208,213,196,207,202,176,189,233,202,215,176,210,184,198,197,199,202,215,216,208,200,214,203,200,197,205,202,197,229,202,214,216,199,202,172,211,197,192,216,214,220,193,211,173,191,227,214,202,184,189,218,198,231,193,192,196,211,190,202,201,222,188,192,212,184,216,190,194,190,203,188,199,175,189,193,168,206,185,184,175,191,204,190,207,203,199,201,200,173,215,190,191,182,203,185,203,202,191,194,172,197,171,193,213,190,168,190,208,217,201,198,200,225,174,200,224,180,188,212,192,190,200,199,170,180,206,193,213,165,190,176,191,179,213,192,216,199,189,181,187,189,207,184,174,210,206,196,220,197,178,195,214,200,197,186,208,194,208,213,189,178,184,186,197,218,203,171,185,164,190,189,203,214,196,211,193,199,191,193,194,211,195,198,205,186,174,155,205,212,186,188,206,198,191,204,176,210,208,175,220,188,213,194,193,199,209,199,199,178,189,216,211,224,190,184,201,185,202,180,186,190,220,188,146,179,121,117,163,158,151,119,133,168,192,195,210,202,191,209,167,205,204,221,217,183,205,203,183,174,207,161,43,30,27,64,77,134,182,104,152,228,244,170,115,69,54,21,61,156,41,91,195,137,24,41,132,236,255,246,119,86,79,106,81,32,76,118,144,94,83,51,41,36,7,35,81,119,115,137,111,89,100,103,105,103,88,114,114,126,134,93,88,111,98,116,145,123,108,66,69,35,14,52,131,205,159,77,120,147,146,108,122,88,41,33,34,22,22,53,32,41,25,73,30,47,15,18,35,76,103,95,69,46,48,22,13,36,24,25,42,51,44,46,43,21,29,37,13,42,28,15,14,29,16,33,39,30,19,34,44,37,19,49,31,35,12,64,31,18,41,57,57,42,26,21,24,35,24,38,50,7,42,55,36,21,39,36,34,28,15,4,21,28,36,36,44,29,28,8,36,40,53,15,11,23,40,52,8,27,29,37,25,15,30,45,21,43,28,15,29,23,26,41,37,38,60,91,104,66,111,110,49,69,54,68,76,50,19,8,51,20,18,46,84,145,166,142,183,139,105,65,60,47,30,24,21,62,127,94,79,98,78,39,20,21,18,45,121,100,113,195,149,128,70,76,53,6,7,3,3,13,37,47,59,66,63,76,81,24,58,32,68,35,22,30,14,28,24,33,40,38,38,27,22,43,5,28,29,25,42,45,29,36,12,28,40,14,39,46,52,18,39,16,25,15,56,34,39,56,32,25,69,86,68,102,116,100,56,50,27,19,12,21,27,34,37,14,13,38,18,43,12,28,39,21,16,33,42,21,21,31,15,31,13,16,30,32,31,33,40,34,36,12,24,12,25,51,50,68,104,127,142,132,141,146,149,133,129,112,74,53,15,28,156,208,227,239,220,206,221,216,241,206,235,198,194,212,222,220,203,212,211,225,218,109,16,13,6,6,1,29,6,9,18,29,21,25,6,21,27,35,18,22,32,31,14,14,11,22,187,218,196,196,188,194,192,184,201,202,200,197,200,204,186,190,238,241,176,205,192,220,177,196,203,203,193,204,215,199,214,228,189,215,189,221,206,205,194,212,197,182,210,235,194,196,203,223,180,184,192,200,198,208,226,200,202,178,203,175,190,209,177,208,193,217,186,188,200,187,185,203,196,201,224,194,196,189,193,218,200,209,213,215,210,186,222,186,211,210,214,198,183,192,216,196,198,210,194,204,205,204,207,205,200,203,200,190,193,200,196,200,181,205,180,192,215,184,203,186,189,207,200,197,179,180,179,205,180,224,215,223,203,183,189,210,177,202,188,180,179,181,232,202,205,222,199,194,188,184,196,182,193,188,204,191,176,189,188,177,197,190,203,185,186,193,211,189,189,189,194,215,197,186,175,200,218,157,178,223,179,190,209,177,198,218,188,200,196,215,202,203,185,180,180,200,192,206,189,204,183,191,194,193,193,199,195,211,199,180,192,201,193,196,213,193,220,222,212,194,220,207,205,195,211,186,192,191,164,190,200,193,204,197,185,202,210,185,201,207,185,206,188,192,204,180,226,189,179,208,222,189,187,216,165,182,221,210,235,191,198,182,186,207,203,180,211,181,186,199,184,198,195,165,193,189,202,201,209,189,206,210,201,184,181,172,175,159,196,162,105,110,151,192,188,205,215,215,190,212,212,189,175,155,214,215,203,193,162,223,201,116,56,17,12,115,180,183,224,148,202,223,163,164,95,33,19,7,156,239,84,146,157,103,63,144,242,252,220,89,60,91,103,61,57,77,135,144,101,66,51,20,27,36,59,79,141,110,125,110,89,105,89,109,114,94,104,104,112,98,80,90,86,93,99,87,129,129,113,73,35,8,66,169,103,89,105,35,103,147,96,110,120,61,63,39,26,37,13,4,48,49,20,12,30,54,37,56,28,41,58,40,41,28,13,4,10,32,22,10,17,60,43,55,72,19,7,31,27,50,15,37,48,39,38,34,9,28,34,23,47,33,41,34,60,27,51,33,53,14,53,46,59,45,31,14,26,42,27,40,12,15,51,46,66,24,50,71,51,38,20,3,22,39,22,27,44,41,24,16,49,9,42,18,37,10,50,16,47,31,35,35,59,34,13,45,10,26,35,48,38,46,46,25,40,18,106,150,169,138,162,179,122,153,179,144,178,103,47,2,13,18,48,7,158,203,123,87,58,43,67,47,46,68,27,25,53,97,181,168,177,189,118,24,38,31,38,99,103,84,65,89,131,161,135,144,70,38,54,23,39,25,45,71,106,89,110,102,78,69,59,74,65,75,46,10,9,20,37,58,46,53,9,34,20,37,25,54,18,43,3,25,25,36,22,15,16,38,32,14,20,9,30,25,3,17,19,32,27,20,7,70,47,58,26,45,42,8,14,38,32,23,12,20,25,31,41,53,26,43,29,59,35,50,40,27,35,36,15,37,48,31,28,59,14,34,34,43,32,42,25,40,28,23,66,56,62,104,129,44,59,49,85,100,108,119,148,173,168,162,82,40,5,51,195,219,245,252,236,243,225,251,239,218,223,217,217,240,223,188,201,217,223,209,234,132,12,6,23,34,11,1,6,25,22,17,2,24,5,2,17,0,12,13,1,5,6,19,19,9,200,189,200,182,195,178,169,171,209,204,214,203,215,207,212,206,233,254,138,187,208,198,150,207,216,216,206,199,199,216,195,199,215,197,205,204,204,181,211,184,199,192,214,186,210,209,226,204,212,198,204,222,206,222,190,191,199,205,203,201,233,193,219,215,205,212,199,216,207,196,205,186,200,208,206,196,192,211,215,219,195,182,219,201,197,209,188,204,207,213,193,182,222,196,218,214,185,207,195,188,193,213,217,205,213,191,206,219,220,211,213,205,191,170,197,213,200,221,195,202,190,217,215,182,198,191,188,190,182,188,212,184,176,202,179,179,184,183,225,173,201,203,205,191,182,192,190,176,208,181,189,183,166,208,180,173,202,191,194,211,203,168,187,233,178,175,175,170,171,195,203,175,215,201,210,204,184,191,204,178,158,202,201,225,166,193,202,185,202,202,185,197,208,207,192,181,182,184,213,188,176,192,179,208,189,176,179,182,187,207,186,185,184,228,185,201,163,197,155,199,182,191,211,196,170,172,192,177,194,180,200,189,171,207,188,217,199,187,192,187,210,186,185,175,193,187,221,186,168,196,201,203,212,222,196,185,212,196,208,213,212,210,201,182,212,190,176,207,189,184,168,196,163,173,187,199,178,183,173,210,210,212,207,184,131,141,172,184,194,120,104,143,153,225,178,184,207,216,203,216,183,177,189,188,204,208,223,189,163,199,193,77,83,88,39,103,139,111,149,148,219,195,117,121,44,8,3,27,136,103,63,78,81,52,158,250,244,188,73,119,93,98,57,85,98,111,148,83,131,86,23,11,25,51,90,121,132,122,130,105,113,78,114,137,112,129,102,104,130,103,87,65,98,93,127,124,96,114,56,26,40,29,154,203,51,44,36,80,93,101,101,98,71,49,45,61,44,23,31,38,62,28,40,32,32,39,24,26,24,48,27,48,45,41,14,10,14,27,35,35,40,43,60,74,70,52,48,23,50,60,51,54,56,62,89,40,53,43,56,64,62,58,83,63,77,55,68,60,62,64,41,65,58,29,58,45,73,51,33,48,46,44,70,87,106,64,140,97,60,59,7,3,12,36,48,18,10,32,26,27,27,66,46,28,35,30,41,9,48,48,18,43,36,44,18,15,20,24,45,21,48,42,37,34,38,20,78,158,187,129,162,158,172,151,159,200,145,107,27,29,5,25,46,29,138,168,89,58,49,26,35,73,58,83,69,51,21,134,140,84,129,123,137,37,42,32,58,102,97,107,41,39,47,48,91,127,126,28,32,19,46,26,55,161,157,158,188,185,167,168,181,186,185,114,44,22,10,16,25,44,17,36,70,29,25,35,35,52,18,48,49,23,40,18,12,12,28,27,41,38,25,41,13,24,38,30,11,24,41,25,16,40,64,40,45,46,22,25,6,30,4,44,38,41,34,26,55,74,7,9,31,30,30,57,49,18,44,12,41,33,60,28,27,40,47,25,38,24,30,14,37,23,20,40,33,33,68,194,223,146,107,69,66,86,57,96,128,172,143,127,62,112,136,137,209,209,218,236,213,222,235,232,231,240,221,223,239,188,219,211,215,222,215,222,227,120,15,10,0,11,24,1,19,22,16,19,19,29,2,3,4,0,13,4,14,21,9,17,20,29,202,194,207,173,190,198,210,197,207,216,212,188,194,197,203,187,234,230,110,154,194,168,206,195,195,216,169,182,203,184,185,213,197,209,203,158,213,192,187,191,204,212,200,196,200,178,183,193,197,201,183,219,211,195,228,194,201,190,190,196,221,187,205,216,227,175,169,210,219,178,222,190,199,194,199,189,231,220,206,207,188,206,206,222,206,204,191,217,192,194,231,205,183,197,211,201,193,200,217,195,198,204,209,191,173,219,188,192,183,202,211,201,192,198,199,189,186,186,183,198,202,185,185,167,211,187,194,198,181,208,197,212,180,199,192,183,211,187,199,189,216,182,211,202,191,209,201,191,187,177,223,212,177,195,198,190,170,190,178,205,202,207,194,177,204,207,176,201,182,205,186,200,201,200,210,198,166,233,189,206,213,181,183,202,185,199,186,176,183,197,191,202,190,169,214,194,202,194,184,199,215,184,190,199,180,191,211,183,211,198,200,218,174,191,188,181,181,184,204,196,192,189,229,217,194,191,160,185,190,197,180,177,197,210,198,212,190,191,177,202,194,215,161,186,171,182,188,192,178,174,173,187,186,197,217,169,236,179,187,193,201,202,181,193,201,191,182,192,192,196,183,185,187,190,201,209,189,201,220,174,195,208,201,166,118,116,132,161,150,142,81,94,219,225,171,139,153,222,193,186,198,193,178,190,196,209,191,159,119,148,77,63,171,107,30,61,53,24,64,158,223,160,9,10,14,45,9,67,110,77,83,153,121,195,248,243,177,75,80,83,85,42,62,124,152,145,82,70,67,82,6,19,74,102,124,139,114,120,135,141,122,118,108,106,89,110,103,122,113,83,80,72,128,136,107,109,79,44,38,77,136,94,128,124,34,89,65,92,90,121,140,56,23,31,123,145,40,18,12,50,44,41,40,14,9,30,30,18,16,24,24,28,8,36,8,31,20,20,15,19,29,53,58,72,61,40,83,58,63,154,162,196,210,161,180,195,174,180,144,167,164,176,189,152,161,171,169,172,180,196,187,207,176,165,160,139,108,134,160,142,123,94,135,179,188,186,140,160,139,79,44,28,60,37,46,93,78,60,67,84,84,88,91,86,65,79,75,74,55,77,95,53,72,59,80,39,68,50,65,58,60,64,64,44,85,66,99,170,151,42,39,55,35,55,67,35,44,40,22,9,28,6,20,34,73,91,64,69,70,106,93,103,90,62,65,38,57,117,60,32,14,44,110,100,23,71,72,120,117,97,91,64,54,73,29,96,150,43,17,11,33,26,55,194,190,174,167,166,215,177,163,166,187,137,16,29,47,12,31,10,13,14,41,47,20,25,30,27,28,41,20,33,68,23,41,32,44,26,19,30,23,36,26,10,35,23,29,52,27,24,41,36,50,57,44,26,41,50,21,32,9,46,12,37,23,43,10,33,32,24,67,30,37,53,15,38,10,16,59,21,30,36,41,14,37,11,50,20,27,35,62,28,16,22,50,24,40,163,184,150,155,110,72,94,110,65,50,88,44,40,47,117,195,193,199,152,141,163,171,183,142,170,209,205,217,246,238,222,231,232,202,198,214,225,214,96,8,8,19,0,1,1,40,35,17,34,10,23,33,34,6,24,22,41,12,22,13,31,28,12,186,186,215,198,181,177,206,180,211,196,192,201,218,177,209,202,240,217,93,161,165,199,189,183,201,221,210,196,169,197,217,198,191,188,194,197,176,212,199,205,187,187,192,204,187,208,219,222,194,197,180,198,187,176,211,199,213,197,186,170,208,207,198,218,200,193,184,198,183,191,185,191,203,183,191,194,188,201,228,198,181,186,185,213,215,207,201,190,206,207,186,201,184,182,206,188,199,210,214,183,208,194,221,180,209,208,195,192,193,194,195,206,204,204,210,171,211,211,181,200,204,180,210,197,179,193,210,185,191,217,200,201,207,209,201,187,188,193,223,212,212,171,199,181,188,183,189,201,182,191,174,193,201,180,197,209,210,203,186,186,188,196,204,196,198,194,192,203,209,206,204,214,191,181,202,187,200,205,168,195,169,204,204,208,196,192,211,167,185,181,193,182,182,175,190,196,196,213,214,188,188,176,195,189,193,189,175,199,181,186,190,191,153,204,195,201,182,197,189,181,205,184,198,185,221,182,214,190,184,200,194,190,176,190,211,197,202,185,192,186,188,202,178,214,209,190,201,172,167,200,187,204,195,211,201,185,199,198,206,197,198,190,203,177,183,203,202,194,199,201,191,173,201,197,208,210,232,221,213,198,202,229,181,144,134,87,99,140,136,116,78,90,147,233,203,135,109,194,162,224,182,204,172,190,181,210,192,94,46,42,51,184,212,127,29,75,107,66,97,146,184,82,21,10,100,139,64,189,122,97,180,219,249,252,242,141,99,82,85,102,55,84,150,140,103,69,56,47,48,22,18,59,110,130,119,123,98,107,100,119,105,84,100,103,119,124,120,90,98,112,115,111,125,65,91,76,12,88,140,142,185,95,63,61,44,102,92,47,109,60,87,46,45,78,161,148,84,15,14,7,15,8,27,12,26,5,41,63,44,36,48,30,16,31,30,34,40,18,36,37,27,60,53,92,99,98,62,81,69,124,135,150,145,157,131,155,173,175,150,178,164,222,145,156,159,128,162,142,173,179,174,168,123,172,130,153,119,176,153,158,163,104,148,168,185,169,168,142,118,111,90,95,85,89,82,102,107,105,185,138,164,196,161,174,150,182,200,187,175,165,199,198,174,157,181,171,155,144,157,152,179,184,172,167,195,196,217,166,63,32,37,48,55,61,27,24,58,34,33,14,11,24,35,9,52,82,112,118,137,133,95,109,118,117,110,78,70,36,25,35,18,30,72,74,62,62,92,139,174,163,152,120,99,90,84,163,134,44,15,11,36,6,93,187,157,107,52,60,55,39,55,69,61,46,52,30,25,25,35,19,59,28,39,43,28,50,35,39,43,40,50,39,32,48,33,60,33,61,40,39,59,55,37,32,35,30,43,40,11,11,19,44,62,76,99,57,89,34,51,32,32,29,13,8,21,22,8,26,35,35,33,50,38,53,43,37,17,30,32,38,46,20,22,36,41,54,47,31,32,32,32,42,49,31,11,52,34,155,111,114,90,49,52,112,103,50,23,38,44,32,27,154,137,146,141,114,129,132,132,146,119,126,125,144,176,198,199,226,243,234,236,244,236,207,205,102,7,10,32,1,8,5,29,19,16,37,3,26,11,4,8,36,13,17,1,15,14,3,41,5,163,194,201,214,198,199,208,188,188,202,230,209,209,195,199,194,253,225,122,169,210,200,197,199,200,216,210,188,202,211,191,204,194,194,181,204,201,187,190,217,190,175,179,186,207,194,215,215,195,205,198,201,200,188,169,227,193,197,184,193,206,181,200,205,192,196,201,211,209,206,208,189,190,197,196,189,189,216,194,212,195,192,183,192,200,207,187,190,207,219,211,199,169,187,204,173,190,197,172,202,207,206,200,232,186,207,206,206,185,221,182,203,215,209,195,189,195,190,187,172,208,205,219,217,200,199,192,179,183,210,193,190,176,176,205,195,199,205,199,181,185,200,190,203,211,204,180,187,202,187,205,188,204,165,203,193,199,206,208,197,200,197,197,161,212,216,177,183,206,189,217,196,206,179,196,186,179,207,194,205,188,200,212,183,207,195,222,203,216,187,183,211,170,202,193,183,189,176,177,192,186,199,190,173,195,188,161,175,197,158,208,198,190,183,180,187,193,164,186,200,195,193,189,174,182,197,207,198,180,185,203,212,218,174,227,177,205,223,198,166,191,195,205,188,180,186,170,173,199,179,188,168,180,195,211,181,179,204,170,192,225,213,193,215,190,204,185,199,192,184,202,196,207,235,237,242,160,170,200,192,227,166,149,130,115,113,114,146,146,97,101,62,109,194,230,148,121,213,201,209,207,209,180,195,220,210,110,89,34,29,110,212,220,121,55,102,163,151,89,151,159,153,158,108,234,125,111,191,128,97,104,146,234,193,105,117,81,89,60,96,103,117,141,94,67,48,27,18,22,57,81,101,126,140,111,108,120,117,119,100,124,94,85,97,110,92,103,96,97,85,107,101,93,90,64,4,75,185,173,133,122,94,65,41,36,41,48,71,79,51,22,53,130,159,168,107,33,44,3,26,14,9,28,19,19,20,28,60,51,38,22,16,39,13,33,24,16,1,41,36,32,27,56,69,63,97,46,86,57,43,29,32,35,33,33,68,68,39,39,54,52,61,62,40,60,23,38,45,37,44,35,16,38,28,35,48,33,25,77,53,25,29,93,110,145,140,101,116,87,41,73,32,33,47,24,47,81,64,62,95,76,63,72,82,75,101,122,111,113,104,116,126,92,130,101,135,111,107,101,138,153,141,147,150,156,158,169,97,87,113,64,60,51,70,63,70,57,46,36,21,18,22,17,5,18,101,112,120,93,102,152,165,187,181,168,59,79,104,46,55,49,60,84,42,77,139,157,196,185,167,150,87,121,156,146,149,41,5,40,22,11,38,45,94,87,65,58,27,47,40,42,51,34,61,64,60,45,67,53,66,118,116,137,133,135,142,139,131,114,118,112,89,97,89,105,109,78,87,78,100,75,91,65,75,58,53,70,60,69,72,61,88,146,145,162,171,178,159,49,48,21,33,16,22,30,24,28,37,32,80,98,51,67,50,27,33,43,58,41,48,40,46,46,30,40,55,43,48,22,65,45,49,30,24,52,97,141,155,152,75,53,13,20,89,107,51,17,22,71,66,63,139,140,127,113,131,111,125,120,118,102,119,111,132,150,147,177,171,202,223,184,247,226,187,204,120,0,2,22,12,0,15,32,4,28,17,38,11,8,0,19,3,4,0,10,6,38,39,12,0,204,194,203,180,213,230,203,194,196,185,209,219,195,210,195,218,243,216,123,204,192,195,204,181,191,210,186,211,189,184,199,209,203,191,179,212,201,198,199,186,167,215,198,211,215,188,205,177,192,192,191,204,173,191,217,235,201,182,199,195,211,179,191,217,193,212,197,219,180,217,169,226,183,191,201,166,173,172,204,206,190,215,181,202,218,197,203,192,198,219,202,190,194,201,189,201,186,208,178,197,204,213,197,209,200,202,182,213,192,198,198,186,180,200,179,208,197,190,187,189,198,184,192,197,200,176,190,169,206,201,172,216,194,187,187,193,221,194,187,208,208,200,195,203,181,233,193,202,190,194,174,177,182,196,202,210,197,190,168,171,179,190,193,213,183,209,199,192,193,170,187,173,199,204,172,194,195,190,190,177,209,205,180,180,217,185,190,188,195,199,204,190,224,213,200,176,189,179,200,201,175,171,199,184,205,187,172,167,195,203,180,193,225,164,191,185,186,186,189,201,205,194,209,203,208,185,211,196,195,161,190,187,186,183,205,203,207,219,196,174,182,197,187,191,202,189,202,186,205,213,198,178,210,207,187,178,200,182,179,196,207,194,187,207,192,196,235,196,208,196,206,187,199,223,235,200,109,182,207,171,218,162,122,93,74,102,182,137,97,58,93,44,76,115,134,129,147,211,247,224,224,224,219,247,225,154,149,163,80,103,93,188,221,90,61,26,73,87,75,110,212,238,243,161,125,111,89,112,68,89,68,66,160,114,83,85,93,70,67,100,121,131,120,89,46,42,43,36,55,79,93,176,135,136,107,103,109,107,95,104,108,96,108,80,95,113,78,65,81,88,102,127,106,73,22,6,78,109,129,117,59,37,50,47,56,62,64,36,62,51,56,102,146,138,124,58,32,23,34,21,0,10,21,26,6,34,30,59,38,21,57,30,42,33,34,41,34,13,21,23,45,34,45,50,38,84,77,83,86,59,22,33,19,50,27,50,60,60,39,52,39,59,33,69,77,116,86,82,18,12,33,73,62,43,65,36,73,10,31,37,15,32,88,113,123,64,82,55,73,67,28,23,12,38,67,55,76,79,59,74,58,89,63,75,32,14,65,90,55,9,54,50,76,62,29,28,74,99,42,32,44,55,61,58,38,32,58,37,64,135,109,107,110,122,123,90,53,13,13,19,30,37,37,26,85,143,113,83,127,178,165,125,86,102,134,143,86,72,73,94,87,104,117,151,130,196,237,211,167,100,85,74,70,82,122,130,51,19,12,3,8,23,30,34,92,75,73,70,49,40,54,62,41,48,77,51,48,92,122,126,93,140,126,154,156,154,165,149,134,120,132,152,156,173,121,125,162,164,138,130,153,103,137,146,135,140,172,79,148,175,159,156,168,213,158,167,160,187,108,70,49,55,41,58,49,55,59,88,89,161,129,137,128,126,107,123,124,111,122,123,122,89,104,84,78,133,84,99,90,102,110,60,49,52,116,169,194,146,159,119,72,67,95,136,132,110,106,127,155,157,160,153,140,131,138,144,131,119,114,115,121,124,109,129,120,123,136,124,159,168,191,210,234,235,231,100,12,3,26,6,2,3,22,0,12,8,17,17,0,1,15,5,4,2,29,24,24,25,14,18,200,190,189,183,213,210,215,217,186,202,179,210,179,209,184,197,240,212,159,200,196,205,170,231,200,207,200,176,184,225,196,208,180,183,185,211,200,203,188,198,212,186,202,211,194,189,188,164,197,207,187,189,181,215,179,195,184,190,203,180,200,218,204,200,185,180,195,193,167,182,197,189,216,214,179,175,203,190,201,211,180,212,204,196,193,217,201,204,194,189,200,188,181,194,192,204,192,179,209,198,194,207,193,192,216,219,166,202,213,204,213,195,191,180,194,209,202,189,199,167,191,220,216,188,205,164,188,187,189,206,196,189,176,194,183,202,212,194,203,185,188,189,214,181,209,184,203,176,161,172,198,179,204,177,188,193,188,178,208,165,199,183,216,184,180,174,206,186,212,229,193,196,200,189,190,218,210,195,192,185,176,201,213,212,193,185,188,173,182,194,195,165,201,182,190,208,186,205,178,173,182,182,198,189,170,191,198,190,171,171,159,188,194,197,203,221,197,215,183,170,210,191,201,181,182,184,192,198,173,201,179,214,191,180,192,211,203,200,184,212,198,186,217,189,214,185,185,198,211,178,202,167,184,201,180,202,217,179,178,189,201,170,206,185,198,185,190,186,190,201,195,162,132,193,164,109,137,241,243,193,217,183,113,98,68,120,197,173,77,42,64,55,28,24,55,78,136,203,206,193,159,218,214,206,188,104,169,217,126,94,85,138,131,86,68,36,31,13,6,82,215,244,237,144,72,81,202,157,72,60,7,49,83,93,115,112,37,69,142,149,136,82,58,62,26,31,15,36,84,88,137,133,126,95,136,115,122,101,126,116,118,86,113,114,94,77,88,80,84,127,137,132,73,32,35,22,82,101,124,92,82,106,66,29,25,51,39,38,46,27,41,103,114,94,107,56,37,62,51,23,23,34,6,8,19,23,25,34,47,20,31,30,8,26,16,17,25,47,28,14,38,16,25,42,49,47,84,100,86,90,61,62,22,27,54,16,38,47,64,56,46,43,35,67,122,149,114,72,33,67,118,112,111,77,42,69,53,81,47,50,24,42,88,73,104,100,76,80,46,62,50,17,7,43,54,97,112,141,60,62,117,104,112,139,60,38,91,122,96,13,29,103,109,105,10,46,127,149,57,79,110,89,129,96,44,14,54,45,106,145,114,159,141,125,165,110,20,34,35,9,43,4,22,57,87,145,86,102,156,93,62,43,74,24,26,110,80,74,103,124,169,174,209,154,144,200,117,42,15,3,60,53,97,46,86,156,70,18,21,37,26,19,43,43,103,116,138,90,119,93,116,111,59,26,19,72,31,27,23,32,75,72,61,58,99,45,43,73,50,45,37,65,60,51,57,91,75,75,76,54,74,60,67,56,62,64,88,87,123,124,117,120,143,154,174,159,152,128,87,92,75,79,50,55,56,52,74,112,126,106,166,117,130,146,136,142,173,120,138,143,150,147,157,172,174,171,189,173,156,176,145,86,43,113,139,155,154,128,145,157,159,174,180,187,167,155,160,185,193,163,153,138,126,142,137,138,127,125,132,111,147,130,120,133,139,118,112,118,128,136,145,178,217,228,216,101,19,10,26,10,0,3,0,4,16,50,15,5,16,13,2,7,6,17,7,15,27,11,16,6,214,172,188,190,210,191,198,196,204,193,174,179,183,189,204,211,253,205,191,201,192,219,202,191,184,186,212,201,184,189,201,196,203,215,200,193,207,195,201,191,203,211,220,179,202,190,180,199,174,198,185,205,193,195,199,184,194,178,194,201,178,198,205,214,194,234,196,195,205,202,194,192,192,186,192,211,184,193,208,179,233,190,193,179,189,186,206,224,181,203,166,191,167,193,198,207,181,197,207,187,205,178,194,225,195,187,183,197,190,194,187,205,195,192,219,183,188,200,184,189,174,181,225,198,168,176,196,208,191,185,205,191,172,202,188,186,206,198,201,211,178,206,200,202,184,165,207,206,207,180,210,196,205,202,206,187,208,223,189,187,180,165,175,168,185,193,183,180,192,206,201,196,206,189,170,210,201,180,214,179,188,176,185,180,178,185,175,198,199,170,181,188,196,182,181,176,193,180,185,184,198,159,185,189,211,178,187,188,204,193,198,203,207,207,187,211,167,223,189,188,211,197,186,174,202,181,196,202,207,198,187,169,190,189,210,168,191,194,188,208,214,190,199,194,188,180,186,181,181,217,184,179,194,180,192,210,204,213,177,185,208,186,207,210,196,206,200,178,214,188,191,58,53,95,21,26,68,147,160,120,133,131,123,103,86,111,145,146,98,89,91,109,67,68,59,18,62,84,172,179,168,173,131,150,133,75,119,122,53,66,50,85,123,100,88,25,58,33,26,98,182,244,247,183,115,151,248,157,96,51,0,57,110,101,76,62,79,141,108,119,57,55,18,23,17,12,42,75,108,112,157,103,116,89,90,148,80,105,145,122,107,102,86,102,85,83,99,107,132,112,124,96,23,3,19,12,118,85,71,83,90,133,82,33,54,43,54,55,39,46,48,89,93,62,56,50,38,42,72,34,20,15,19,13,15,29,44,29,26,12,39,24,4,38,36,16,58,11,34,32,30,38,32,56,69,87,103,76,132,90,88,51,63,56,43,28,47,59,46,55,47,40,62,77,98,104,108,33,21,117,106,128,103,85,35,36,47,42,38,25,26,18,44,92,93,73,45,35,64,18,51,33,15,1,54,100,95,128,45,67,125,109,117,152,87,2,95,143,108,49,37,106,100,125,57,62,113,165,61,128,173,134,156,95,48,35,34,14,119,142,127,113,94,128,129,81,12,25,6,19,19,47,14,6,68,155,98,152,158,68,55,50,107,48,26,94,75,67,54,111,172,191,163,79,104,185,63,13,76,58,92,86,100,46,52,145,91,37,40,8,1,19,12,34,127,150,141,132,144,129,126,98,29,9,57,42,82,27,70,68,58,78,100,83,64,60,26,82,98,70,20,45,62,62,94,25,45,69,81,47,67,73,70,72,48,54,85,38,29,32,24,84,97,132,89,118,141,93,86,54,42,36,41,45,45,48,14,66,39,63,53,52,49,78,55,62,70,67,66,103,97,63,66,108,102,93,82,119,106,95,91,52,113,161,142,121,132,124,127,150,198,167,190,180,179,166,128,145,131,101,126,136,121,147,130,126,120,106,106,119,119,126,124,126,133,143,130,127,117,108,110,132,179,218,213,123,12,0,17,27,4,13,19,22,21,16,15,4,12,2,16,4,24,6,15,23,11,12,13,8,180,180,201,189,198,200,212,202,201,185,175,189,208,203,161,179,227,235,208,206,184,192,200,200,198,178,218,201,191,187,172,202,168,181,195,204,195,185,195,215,204,182,195,191,163,211,200,199,168,212,197,195,185,189,204,189,170,188,187,181,197,179,203,222,173,204,196,205,195,184,182,196,177,161,210,199,195,191,201,214,181,190,188,215,201,189,211,197,202,221,205,192,213,183,205,207,186,203,176,208,203,185,193,192,196,217,193,188,178,196,206,209,181,189,181,193,198,211,197,198,192,199,201,190,204,182,176,208,179,185,189,184,172,176,200,174,187,173,173,182,207,192,198,197,203,187,199,219,186,194,194,196,189,196,196,174,199,214,203,177,202,186,174,173,162,171,215,195,179,231,207,195,208,166,198,192,181,168,183,197,187,193,184,180,174,194,190,180,204,195,186,197,184,200,202,199,181,178,180,202,184,195,196,190,172,181,183,211,197,189,170,188,165,212,194,193,182,210,182,192,188,179,219,197,183,185,194,199,214,177,172,176,185,193,190,206,195,200,202,187,201,185,191,183,202,192,192,205,206,195,190,201,189,187,200,176,216,206,197,194,182,187,180,186,211,179,198,215,180,210,191,83,32,86,60,4,5,9,16,42,86,68,104,80,80,94,76,121,116,89,123,92,70,71,35,0,11,73,107,74,97,121,132,158,146,94,90,84,33,33,44,64,118,117,54,19,59,10,91,212,181,137,138,81,86,164,230,71,60,35,47,132,108,53,67,102,129,137,116,77,51,74,62,28,38,53,120,126,104,107,100,122,115,111,86,115,93,118,88,107,121,122,73,89,78,93,107,92,140,109,62,34,11,45,45,93,143,102,81,101,122,103,82,44,49,38,41,64,51,64,38,50,55,66,79,52,41,26,13,49,11,7,13,31,13,33,37,10,33,25,13,24,37,48,12,45,20,29,27,2,3,17,25,30,64,102,85,113,113,79,69,87,68,54,32,28,52,52,44,40,15,60,5,59,102,118,92,30,43,129,144,141,147,132,60,35,48,23,62,24,29,31,120,36,59,78,48,51,38,49,74,27,55,33,39,101,108,90,45,34,118,84,135,133,38,2,88,147,93,41,44,122,126,165,114,119,111,101,5,61,81,84,119,33,37,49,18,29,174,156,82,119,65,118,111,72,9,15,21,15,35,34,36,30,39,137,46,146,148,67,55,63,159,81,23,121,110,73,31,48,62,56,10,63,163,187,24,51,116,42,120,104,107,74,51,130,144,65,19,30,36,24,25,7,106,160,130,117,109,136,132,81,33,61,59,22,71,71,73,84,81,130,140,123,118,39,69,154,131,81,36,34,84,107,60,42,51,126,128,73,64,107,124,73,90,112,113,55,18,41,48,68,101,89,135,93,106,66,84,73,44,44,20,34,35,65,70,71,102,75,46,66,76,60,76,52,78,78,84,74,43,60,37,48,59,43,24,34,42,47,38,100,149,155,137,156,118,144,162,174,188,160,180,178,146,136,154,120,125,130,119,131,125,144,117,119,109,121,127,125,128,133,113,105,140,119,119,123,131,134,121,130,151,172,192,121,7,33,8,1,3,13,15,11,24,20,6,0,26,2,4,5,23,8,17,7,30,27,33,30,209,175,210,180,196,199,210,181,181,206,193,201,224,207,184,176,251,206,192,219,194,218,201,203,202,187,168,164,178,196,203,191,202,211,191,195,198,199,206,211,192,210,189,186,180,182,212,184,209,192,180,213,184,215,198,192,218,185,210,214,180,188,179,203,212,179,193,194,197,179,193,185,188,220,202,192,194,176,201,184,194,187,204,177,207,197,208,202,204,201,214,194,186,196,186,193,194,193,207,204,205,189,190,196,193,221,172,206,193,214,203,183,175,197,205,200,199,208,194,199,202,191,178,201,172,174,189,181,196,180,199,203,204,203,203,187,187,203,189,196,191,203,196,193,195,179,187,199,188,207,181,208,185,200,186,180,178,226,176,179,176,194,185,197,197,224,205,181,182,187,165,197,177,219,174,190,208,203,180,178,191,191,182,188,174,210,204,185,170,186,193,219,200,177,186,193,170,211,187,202,168,177,185,186,188,185,196,187,186,184,201,213,207,186,192,184,200,191,213,204,201,200,212,191,180,205,190,189,190,179,189,189,197,194,185,189,177,178,214,194,201,182,207,190,188,213,196,155,190,193,187,197,192,205,166,188,185,206,194,212,207,190,195,199,222,204,200,199,200,216,232,137,59,119,118,60,27,26,33,17,100,70,84,58,87,112,65,82,157,153,104,53,50,71,53,26,65,66,94,136,119,150,154,147,110,165,147,124,79,89,92,80,139,103,43,56,35,13,72,118,68,109,82,50,30,161,211,32,42,16,31,101,72,117,127,128,133,83,46,79,62,163,83,50,77,121,148,133,101,112,100,117,103,116,75,101,92,113,107,138,121,80,67,81,111,92,113,62,129,54,24,13,16,62,112,112,166,173,113,89,61,123,94,45,14,7,42,46,24,35,36,46,73,44,57,49,45,43,31,51,12,45,28,13,32,16,23,34,10,26,27,26,7,19,40,46,40,32,10,22,38,45,36,38,42,61,90,82,88,71,40,57,58,54,35,20,19,46,24,38,25,51,46,63,105,116,81,36,17,117,97,88,110,136,54,26,47,39,50,47,52,111,145,157,182,140,122,132,138,145,150,115,34,26,21,93,75,44,9,68,132,105,141,93,57,8,95,147,97,29,45,115,143,121,119,131,131,143,15,25,60,85,137,67,7,36,13,42,169,168,91,82,84,87,93,49,14,15,2,26,41,21,12,21,75,112,107,130,107,43,56,156,232,92,2,95,155,75,45,34,86,46,67,192,242,154,0,62,114,41,142,120,98,80,130,184,106,46,34,34,7,39,8,25,126,152,116,76,86,117,101,72,48,39,48,59,74,74,109,48,72,91,88,91,99,47,66,145,97,47,32,24,97,103,80,28,23,116,126,40,63,116,124,123,170,156,94,54,15,42,74,82,59,87,67,40,52,18,44,59,14,31,14,47,31,53,27,65,96,130,111,118,132,59,50,82,105,116,117,48,71,37,51,72,48,30,29,39,15,32,108,131,168,148,118,125,138,159,156,153,156,160,150,151,138,142,152,143,166,160,136,153,152,124,143,124,136,152,147,146,133,127,124,113,136,102,126,125,140,124,130,127,135,151,149,105,7,2,16,22,36,14,15,20,21,18,0,39,3,4,2,29,7,3,17,21,0,17,6,20,195,176,170,184,210,213,179,216,212,189,202,177,206,193,188,151,207,160,182,206,203,201,172,209,206,194,175,172,198,199,192,169,182,182,192,189,190,185,199,226,199,183,191,177,190,185,197,193,207,193,229,200,204,187,185,204,193,204,208,205,216,212,203,210,176,180,196,196,193,208,192,197,211,188,184,191,212,205,206,191,193,202,208,195,169,184,187,197,187,199,176,200,201,188,208,177,182,178,191,196,178,180,181,205,188,189,189,174,191,178,206,188,176,188,204,206,193,202,201,180,187,191,195,190,208,209,218,203,183,192,180,209,210,204,189,188,178,194,202,185,185,173,194,187,192,188,177,178,181,193,179,187,192,191,188,199,192,194,212,145,206,204,177,197,202,221,207,175,189,193,177,185,188,186,198,203,197,203,190,155,193,192,169,179,208,173,206,184,204,181,186,202,208,178,198,188,191,168,192,208,209,185,188,198,188,199,180,211,180,201,193,196,195,196,203,198,196,194,194,184,200,161,203,215,205,210,190,178,160,189,179,190,184,185,190,205,184,210,196,191,185,191,185,207,182,196,192,168,189,197,199,204,187,206,173,169,184,188,203,194,181,198,195,205,184,192,205,195,183,223,252,157,149,205,215,193,162,105,64,34,85,69,95,59,81,154,112,103,113,139,109,28,40,29,21,19,17,50,109,106,97,101,96,86,162,224,196,175,134,148,167,134,167,88,106,214,81,37,79,46,68,150,132,24,69,214,168,4,14,6,102,134,107,128,132,105,96,99,67,132,153,166,84,54,80,98,133,109,97,102,121,102,98,80,69,76,80,111,121,113,84,90,106,85,95,77,102,122,40,8,23,34,71,131,130,129,140,139,116,80,112,105,44,53,14,17,19,32,47,29,47,36,45,33,35,56,61,43,35,69,39,39,17,20,9,8,17,16,22,30,32,28,30,29,38,21,39,29,18,45,44,19,31,51,39,55,54,55,60,51,57,53,28,23,27,51,39,41,28,47,38,30,50,65,95,98,107,34,19,31,59,30,84,49,38,54,39,36,46,67,65,118,232,175,188,224,182,227,223,226,204,150,43,15,16,80,99,40,21,58,111,100,105,119,85,20,79,132,87,16,57,99,147,68,91,140,160,147,15,57,103,93,136,62,10,34,36,65,189,149,92,84,75,88,74,94,4,15,21,30,11,24,30,13,46,90,69,118,86,85,116,230,209,42,13,103,160,153,111,99,139,143,218,244,187,112,7,52,112,113,111,63,54,134,182,201,98,16,36,29,5,33,26,62,155,145,88,90,79,52,82,63,31,14,42,63,80,94,92,69,90,62,31,91,104,51,57,140,62,52,10,24,124,90,69,32,37,102,80,31,75,117,65,105,102,117,57,32,8,17,42,80,103,55,73,72,54,37,44,35,39,25,26,19,22,40,44,78,37,119,113,93,66,24,17,89,108,112,85,33,36,40,55,53,47,9,35,52,68,139,167,158,174,156,137,151,139,124,142,148,129,130,149,144,165,134,165,133,151,142,140,155,149,150,156,144,149,158,175,158,123,130,128,122,139,101,122,130,148,138,116,113,118,122,149,111,37,4,18,16,12,40,11,30,12,13,19,17,22,3,27,14,10,12,29,8,28,4,11,15,187,211,179,213,200,214,200,200,214,208,207,204,199,226,240,170,163,150,152,185,167,194,209,206,197,213,204,191,211,203,178,198,220,208,206,196,199,211,188,197,194,194,200,201,198,191,216,197,192,190,216,205,187,195,198,197,192,168,207,186,196,178,205,186,206,208,194,173,179,184,172,198,205,202,230,191,186,208,202,189,217,189,202,206,209,196,194,196,204,177,207,186,176,210,185,164,178,201,180,171,174,169,193,171,200,221,201,200,207,187,206,200,207,174,178,210,186,214,193,183,200,197,197,206,196,200,186,188,194,174,216,186,168,178,207,178,194,217,213,196,186,194,190,181,209,203,196,191,200,187,213,203,203,201,183,189,189,190,174,195,198,180,200,184,172,187,180,217,191,187,177,177,179,193,206,194,194,183,198,181,203,205,186,193,204,188,178,206,214,195,203,187,188,213,215,216,195,181,197,198,174,195,217,187,191,201,160,187,188,186,203,192,192,216,207,201,183,202,220,178,174,193,178,191,166,173,167,200,205,193,197,205,206,184,179,199,199,176,210,197,209,208,184,199,205,163,177,208,168,190,199,192,192,179,192,183,192,192,189,194,200,191,168,193,161,188,203,180,192,253,235,149,171,218,222,243,251,250,199,147,107,77,103,40,57,133,124,77,48,98,114,72,51,66,27,35,18,23,104,25,22,11,1,66,131,186,129,113,96,106,106,67,74,16,106,180,97,175,185,53,125,209,141,64,144,219,106,27,17,119,217,172,156,148,127,84,84,43,93,124,173,154,68,58,82,78,134,87,99,93,113,99,90,107,57,82,106,95,113,61,85,94,110,82,103,117,66,38,30,14,20,72,118,120,131,134,129,108,80,92,110,72,45,62,34,30,34,42,52,39,23,3,13,35,35,59,39,47,46,85,78,59,63,28,51,28,13,13,12,3,27,31,43,33,16,16,30,24,28,34,33,14,31,72,83,90,80,102,80,106,117,97,81,60,65,88,128,53,76,56,78,87,62,104,80,104,90,58,51,34,76,95,87,58,73,40,29,84,66,63,92,150,165,110,111,65,98,128,93,120,106,81,75,55,50,69,120,57,24,114,121,81,108,99,73,85,101,131,106,25,81,116,115,49,24,109,133,148,58,22,102,99,84,20,24,54,54,119,186,62,57,55,52,95,66,68,42,39,36,19,28,25,15,19,12,47,45,77,94,116,107,176,103,63,20,16,78,166,154,191,127,199,171,168,126,46,0,31,44,87,104,48,101,109,169,126,40,24,8,23,54,26,47,90,165,118,51,56,34,52,57,57,44,27,52,47,117,94,127,101,93,23,13,91,117,11,71,162,68,67,60,78,131,95,65,34,60,130,96,9,99,120,41,31,49,82,65,28,26,22,73,127,128,123,126,105,64,62,92,89,91,56,29,37,49,82,85,84,67,104,80,53,63,33,56,112,101,107,34,28,39,45,80,73,50,25,7,71,157,167,148,101,75,135,139,159,115,168,131,136,143,137,145,180,155,210,173,142,138,138,113,162,136,156,170,141,158,153,152,157,127,135,152,159,129,110,123,128,142,126,129,97,107,134,128,99,16,3,4,9,28,16,18,12,15,4,6,16,2,2,16,22,7,39,10,25,26,8,15,8,183,211,221,223,180,201,205,167,222,193,197,182,222,220,211,149,136,121,143,204,187,230,186,184,194,185,192,191,171,202,204,185,180,191,194,205,171,208,185,196,201,203,176,204,199,197,210,216,201,195,185,214,185,184,193,226,191,195,204,217,177,196,201,214,196,197,194,181,202,190,204,203,199,192,175,207,201,207,214,209,207,188,186,192,185,192,200,179,203,187,181,185,184,191,189,203,193,190,172,204,216,191,205,189,201,201,207,190,196,189,189,186,208,181,190,194,184,178,189,203,193,178,201,186,191,178,219,190,183,186,188,190,208,197,176,194,167,202,184,165,202,200,194,198,187,199,192,210,210,202,207,184,204,181,189,181,186,179,202,219,184,172,175,171,196,191,191,195,198,193,186,159,178,218,175,186,183,205,200,215,195,188,219,181,204,201,190,194,192,199,189,225,174,173,204,208,205,181,190,206,174,165,191,191,200,192,186,191,190,216,207,187,202,208,196,187,186,182,212,199,183,213,170,187,196,202,197,207,193,185,170,192,191,192,176,220,208,213,176,205,178,173,194,186,174,186,177,193,186,200,196,177,198,188,202,201,179,156,194,194,209,202,214,180,199,184,202,215,214,249,217,124,157,182,227,232,243,251,240,117,94,67,81,94,95,115,142,128,59,39,104,130,153,105,34,56,74,113,145,107,101,85,75,103,149,116,105,49,44,93,83,60,90,67,64,110,102,207,117,52,146,171,62,78,137,195,131,66,96,203,201,142,123,84,87,68,18,4,49,51,135,171,91,70,90,82,99,103,91,101,94,118,104,89,90,101,100,83,108,85,109,126,104,104,101,79,28,22,9,31,71,123,121,107,95,118,124,98,130,85,58,19,10,107,95,46,26,9,19,26,39,22,36,40,41,40,27,36,63,77,58,68,40,38,45,51,4,10,7,15,5,56,18,23,20,30,40,55,14,14,43,30,54,116,175,173,173,149,136,165,180,184,150,168,173,202,203,162,166,180,163,170,180,163,146,153,147,140,181,159,172,160,194,196,129,164,167,144,174,165,193,184,143,86,64,67,82,95,91,89,93,131,130,141,172,183,171,158,182,158,149,145,160,162,161,162,169,150,132,141,158,113,135,128,90,115,106,156,114,132,153,123,121,102,128,113,128,196,166,133,130,111,105,108,109,130,62,65,81,64,80,67,86,60,64,70,52,108,137,120,113,69,42,43,66,41,47,97,130,142,140,131,107,114,45,12,53,26,39,43,97,86,84,109,77,53,51,28,35,47,46,49,50,143,176,60,77,43,61,72,75,87,44,29,19,95,70,41,126,96,28,23,54,125,112,66,121,116,105,105,91,91,129,97,112,73,84,124,92,27,82,106,43,39,57,98,62,56,20,34,122,155,181,134,174,158,159,152,206,184,161,88,45,32,52,99,140,94,103,106,75,125,140,101,71,95,96,85,14,35,33,42,99,51,51,61,35,128,182,169,79,38,62,129,149,174,149,138,135,150,113,115,135,144,133,130,154,141,139,123,142,148,130,147,150,140,136,140,141,129,126,139,153,123,142,157,154,141,146,125,120,114,117,151,164,128,21,22,0,4,6,4,16,42,25,7,25,22,6,5,14,18,3,5,6,11,8,14,16,23,209,193,180,191,186,195,193,193,204,206,200,191,183,201,214,175,194,189,171,207,199,171,180,188,196,206,203,217,198,188,223,211,189,195,197,188,190,206,186,193,193,181,191,194,214,217,197,180,184,197,202,196,200,182,204,200,190,194,182,197,221,176,218,176,194,186,188,214,180,183,195,184,192,205,178,185,202,192,186,190,186,205,194,182,182,214,160,183,216,186,175,194,190,200,207,189,202,199,194,181,177,155,203,208,187,181,188,176,164,208,201,224,192,190,180,164,192,193,172,197,193,206,192,171,172,203,174,206,185,188,194,205,210,178,189,197,196,190,188,192,208,168,199,188,186,213,203,174,200,201,180,217,197,179,193,168,205,187,193,171,195,194,179,190,196,172,188,183,189,182,178,189,191,184,218,198,178,198,183,221,192,224,230,219,184,193,215,213,204,228,234,221,146,123,158,196,215,181,217,172,170,200,195,185,184,205,176,206,190,169,190,192,177,190,190,222,164,212,181,194,190,172,183,171,189,181,186,196,194,210,206,155,164,189,196,168,217,184,193,205,174,193,192,174,197,182,202,201,188,205,200,165,203,199,220,198,205,183,208,189,211,208,217,197,213,210,194,199,218,240,184,135,150,180,221,217,251,200,162,112,99,49,74,92,99,121,140,147,121,103,96,164,173,151,51,35,130,163,209,224,219,218,185,155,141,114,105,65,120,106,105,177,147,153,140,79,30,79,49,70,187,100,43,93,188,147,59,75,191,238,176,123,117,72,60,27,13,48,61,64,125,145,37,65,75,97,76,77,92,115,115,113,105,109,79,88,98,82,111,91,120,108,133,113,81,37,14,20,73,77,112,137,151,112,112,112,106,64,85,44,69,10,103,200,130,36,2,19,1,43,9,45,32,14,62,54,33,42,61,75,84,64,61,54,50,31,38,37,37,41,11,30,27,17,13,23,30,48,10,18,48,29,59,126,155,98,74,56,87,89,83,93,82,67,66,69,120,94,98,139,100,89,108,113,62,84,93,119,116,136,117,144,136,103,130,132,113,122,149,152,128,108,83,103,104,109,107,117,96,107,145,162,169,159,162,151,122,168,168,177,136,167,123,132,128,135,131,143,107,150,157,145,178,154,131,152,110,139,143,174,149,167,139,174,200,196,191,172,167,125,146,188,168,175,142,177,178,172,174,168,172,188,178,172,199,162,162,125,144,154,140,142,142,152,175,126,152,158,132,138,151,142,135,116,122,141,124,134,121,133,131,132,129,157,83,79,115,141,129,142,154,149,170,231,182,117,110,102,148,148,150,149,103,81,123,106,57,112,89,86,95,55,113,130,97,95,118,129,101,144,90,111,104,93,101,83,100,130,86,54,107,77,17,32,78,87,62,54,22,62,146,173,140,101,114,100,121,130,124,120,158,78,19,28,48,52,106,110,88,80,54,121,128,133,75,87,81,49,11,57,24,68,80,44,76,42,13,46,145,142,101,91,109,157,138,148,167,126,154,129,127,135,138,114,131,118,100,152,139,160,138,120,142,125,132,106,115,148,150,127,134,147,136,149,132,153,166,145,131,142,146,156,144,147,136,106,18,4,10,4,35,9,3,9,10,16,13,43,8,0,30,6,16,25,2,15,19,4,34,23,202,200,187,215,198,198,187,189,202,193,209,190,187,182,199,212,243,221,200,190,208,205,204,195,205,189,185,191,212,206,210,200,190,190,213,202,184,203,193,206,175,204,190,183,194,187,175,202,203,201,204,192,187,193,204,210,193,175,222,210,178,226,176,209,219,193,220,179,217,207,205,209,200,188,193,183,202,182,182,207,184,175,182,201,188,189,190,187,171,195,198,191,203,196,166,193,188,174,188,203,175,183,189,176,181,193,194,169,196,188,192,200,209,204,225,208,171,196,183,178,179,183,176,184,185,207,176,182,170,197,177,210,213,169,175,184,188,196,188,196,203,205,189,180,191,216,213,218,215,199,177,176,205,162,186,201,192,190,197,171,177,196,208,177,209,184,200,191,218,181,200,187,190,185,206,226,192,208,218,187,194,200,197,211,208,216,228,199,207,208,214,198,127,111,162,195,202,190,190,183,184,192,201,205,185,189,187,173,199,192,190,190,187,175,203,200,183,181,183,183,190,204,181,188,198,185,213,198,220,182,191,198,219,184,200,204,184,160,194,192,194,182,190,187,195,210,186,180,180,180,213,202,216,217,198,176,203,193,203,179,189,217,195,201,190,214,197,174,241,229,134,117,129,140,183,199,199,187,111,77,71,65,80,113,124,98,108,103,137,112,156,161,139,149,69,10,8,12,68,142,177,190,170,174,151,150,121,74,159,147,134,195,151,243,160,66,105,101,48,100,217,84,20,148,176,138,51,145,219,189,110,61,76,37,14,42,19,94,76,70,160,91,56,56,58,109,128,80,120,109,97,112,87,101,82,77,73,54,74,103,140,80,148,91,62,18,21,62,102,110,127,126,139,116,109,97,62,49,82,68,99,150,216,203,80,28,49,9,23,19,25,19,24,30,51,69,34,65,96,102,83,76,42,43,29,48,24,23,10,54,33,3,57,8,59,40,21,16,27,9,14,36,52,112,111,112,91,46,87,55,76,61,86,59,63,84,55,53,81,68,65,61,59,64,39,63,84,59,56,49,53,67,55,75,74,79,70,88,31,62,37,75,43,55,63,43,80,78,63,63,36,61,72,60,58,69,56,43,36,57,59,63,51,39,49,21,73,17,55,52,55,63,37,46,55,69,80,57,39,84,49,78,63,82,66,47,67,68,100,76,59,120,96,94,80,119,92,109,78,99,87,116,118,103,118,107,121,97,92,98,119,119,100,152,155,133,133,163,112,117,107,102,104,113,115,137,149,144,156,133,135,108,98,130,113,105,176,162,159,182,181,162,161,154,168,137,142,151,150,175,172,151,171,182,173,176,149,126,147,113,145,166,167,172,118,167,154,132,158,119,128,127,142,121,153,103,142,168,113,128,177,162,143,138,195,180,118,150,138,161,197,163,123,74,85,65,73,111,77,80,106,102,65,44,58,105,122,143,72,57,78,105,129,96,97,80,27,51,80,62,75,82,84,77,64,58,55,23,70,134,124,128,141,134,128,159,141,148,139,147,128,132,141,148,128,116,124,121,147,132,131,111,134,126,95,138,146,157,157,150,148,154,145,166,136,138,136,128,123,147,188,199,159,143,155,112,30,11,17,6,8,7,35,11,6,8,4,7,3,5,14,11,8,5,5,20,19,28,7,7,195,184,198,203,218,182,191,186,189,212,174,181,190,188,213,207,245,231,206,203,197,212,197,205,202,191,206,205,185,216,209,211,203,214,215,210,200,212,221,210,206,195,208,194,183,191,180,187,203,187,201,167,200,183,197,204,208,187,209,171,217,194,169,208,202,208,202,196,161,189,192,188,204,171,197,202,199,196,184,172,194,207,195,202,207,199,198,201,210,200,194,203,182,179,206,196,195,191,218,210,201,159,218,155,211,186,217,203,200,184,199,209,195,201,185,217,198,181,191,198,200,197,191,180,185,185,181,189,203,180,178,178,194,197,181,214,174,194,198,213,203,184,235,224,214,193,171,203,189,172,193,183,181,204,194,180,188,186,166,186,233,212,202,188,208,209,199,196,221,199,193,222,231,214,196,184,226,216,213,215,184,127,116,161,175,190,195,167,129,105,142,144,112,94,143,183,228,211,204,174,209,217,197,208,182,186,180,201,196,195,188,174,198,200,187,181,204,181,200,189,194,193,199,205,192,187,209,206,182,200,181,182,221,191,187,191,180,182,204,219,203,177,202,190,202,203,210,220,209,217,189,167,188,201,167,183,153,140,156,152,165,152,140,127,139,130,136,197,237,162,131,166,99,107,90,142,141,165,137,59,87,55,74,113,131,131,130,142,84,106,182,132,143,157,106,21,20,48,43,4,2,59,100,107,109,117,103,107,223,155,92,93,98,158,74,114,229,155,90,152,186,94,94,159,190,184,152,153,146,90,85,61,50,15,18,23,73,119,87,87,120,56,54,85,64,111,140,112,101,82,85,94,94,65,95,60,59,113,100,119,135,94,41,24,3,46,30,84,126,134,141,143,121,103,109,60,100,78,126,128,123,155,172,131,53,45,44,14,30,14,18,38,27,24,20,75,69,113,97,98,92,44,21,27,23,18,21,26,25,17,23,22,43,29,0,34,38,32,31,40,40,33,25,117,89,190,168,176,160,159,141,127,124,154,149,150,123,128,146,100,135,116,126,88,92,121,109,111,121,132,128,116,137,135,76,155,136,155,96,150,150,123,135,129,144,149,150,170,131,107,139,118,133,90,92,125,90,95,94,107,91,118,106,98,69,127,83,98,106,77,103,76,98,81,72,80,92,104,75,78,77,74,74,51,113,85,65,82,94,55,43,36,57,55,63,52,73,43,57,63,74,54,72,78,86,70,73,60,54,57,42,42,89,47,76,47,60,48,47,52,42,61,47,44,63,68,95,69,81,54,62,33,50,59,65,56,57,43,27,81,64,53,34,53,23,40,58,64,42,69,75,65,78,111,97,115,113,118,78,96,98,102,77,102,118,103,114,117,80,109,104,103,110,113,110,114,125,126,92,105,137,139,138,172,160,131,134,161,162,167,137,132,118,77,88,77,92,89,113,98,102,98,147,137,106,131,127,123,128,99,130,138,115,113,106,116,108,139,127,131,114,153,103,136,169,113,141,107,115,136,154,124,124,148,145,157,176,189,168,157,126,124,129,109,137,116,117,106,113,119,132,140,117,127,125,117,129,126,142,137,150,149,151,146,145,145,135,156,144,159,188,195,175,157,168,125,0,23,7,42,4,32,29,3,13,10,26,13,8,0,3,25,44,23,2,30,19,14,8,4,189,195,206,184,193,208,198,223,179,188,193,196,181,193,185,231,252,221,193,219,203,224,209,229,219,231,233,233,221,236,211,212,227,193,186,212,206,179,189,185,186,195,200,189,220,201,204,209,216,186,194,216,215,221,183,188,176,182,197,176,199,198,209,195,197,209,216,211,204,199,191,223,183,176,194,200,180,179,198,196,192,210,190,194,192,210,185,203,212,208,195,193,203,212,175,193,187,196,219,173,198,195,176,198,155,205,190,195,200,185,179,182,184,162,166,143,199,195,193,204,214,201,205,208,205,221,195,191,192,195,209,214,212,182,231,192,182,228,214,195,202,195,171,145,147,168,159,172,189,187,203,219,190,174,213,183,217,229,217,213,223,192,220,198,226,232,227,223,227,224,225,197,231,222,196,156,163,182,206,211,158,122,84,81,134,124,113,93,81,66,61,90,94,133,123,175,192,208,239,202,250,216,215,215,198,201,186,187,211,197,182,187,192,211,207,191,203,208,181,219,202,212,182,201,198,211,207,190,180,194,204,193,170,185,200,177,199,187,177,198,192,203,217,181,207,180,184,201,198,200,130,102,79,161,152,139,129,109,123,113,104,108,63,85,54,88,88,176,209,89,127,139,93,108,105,125,131,116,165,104,100,50,74,111,146,158,121,157,99,46,116,142,126,130,118,59,12,31,37,25,34,14,32,4,36,52,125,129,218,169,80,55,62,43,25,183,244,101,133,164,125,153,169,201,151,194,202,140,71,57,56,38,8,15,65,96,140,137,66,83,140,39,54,41,102,128,55,77,128,70,100,78,58,61,73,86,75,103,104,135,89,39,43,18,23,55,89,103,98,127,104,107,119,97,54,119,106,119,118,133,113,96,96,79,42,30,63,40,55,14,21,18,28,26,33,47,67,89,58,54,55,31,31,38,33,27,21,36,41,25,4,51,35,23,17,20,26,22,52,37,33,85,35,57,74,85,53,48,60,68,83,87,66,77,78,45,49,82,88,76,64,80,84,72,77,65,79,84,130,91,96,107,102,133,109,93,103,141,117,117,134,137,108,101,122,135,141,99,102,86,114,112,120,134,131,97,121,128,114,121,93,122,135,156,120,109,114,141,148,126,136,131,90,124,130,127,119,121,126,130,129,177,140,137,109,116,140,110,133,122,111,122,108,104,98,109,122,130,122,125,105,138,137,135,115,127,112,143,147,126,111,101,110,91,103,115,100,72,115,108,110,115,106,106,90,126,99,133,117,87,102,114,115,94,92,114,76,75,59,92,66,66,83,91,47,92,83,56,58,66,65,64,70,66,53,53,81,71,71,64,55,74,78,84,68,49,67,73,52,70,47,57,58,52,56,69,70,56,43,52,59,61,69,80,38,49,54,72,58,54,69,63,69,83,78,50,86,36,52,76,83,83,70,84,65,87,76,84,68,92,85,92,74,92,90,84,99,121,128,117,103,119,124,109,142,133,132,140,134,108,99,107,103,97,113,104,118,141,136,129,102,109,98,100,113,107,102,108,124,104,162,123,127,142,123,139,121,128,137,134,130,138,145,142,164,165,170,168,158,139,163,177,173,151,142,106,14,4,27,25,3,22,10,1,14,24,17,20,0,2,16,20,26,8,16,5,6,12,11,3,186,192,181,210,208,174,204,183,212,189,189,204,213,183,175,215,255,223,153,189,183,181,237,219,228,237,225,231,237,229,227,221,231,199,174,180,183,223,191,197,201,195,213,218,193,218,202,220,162,186,191,203,191,199,193,195,210,180,216,199,192,182,201,206,197,189,195,183,178,198,212,208,201,203,212,208,187,169,190,200,190,174,204,208,207,195,211,219,194,201,198,214,182,201,178,198,162,193,201,209,186,191,159,209,211,189,205,188,182,205,183,137,91,104,87,128,183,208,217,218,213,241,239,213,198,182,208,242,206,206,208,205,190,212,200,203,230,220,205,185,220,203,127,152,112,127,98,149,210,214,211,179,193,191,234,190,170,199,149,166,173,184,172,185,208,194,176,155,173,179,166,169,134,171,144,76,97,114,128,104,124,122,104,91,101,86,92,114,98,97,95,111,144,101,98,141,92,180,184,193,183,186,193,180,193,223,191,204,195,207,191,179,193,174,218,199,200,193,198,191,217,227,203,204,216,183,190,218,213,209,178,201,194,187,206,228,199,193,218,179,189,194,180,197,198,143,137,149,126,133,129,96,85,129,146,132,142,120,106,97,116,89,76,60,102,62,76,194,201,90,89,100,113,112,77,163,153,119,168,152,106,55,85,114,122,146,117,91,95,57,106,113,74,105,142,111,112,63,111,48,67,30,35,19,37,75,66,103,161,194,125,102,164,73,50,161,166,60,110,130,136,160,187,187,164,170,202,138,97,57,49,35,63,83,70,97,129,128,45,76,100,25,64,91,100,104,57,60,86,66,25,68,51,33,57,65,79,103,99,122,57,4,27,41,80,109,123,122,114,116,114,96,97,103,102,132,119,94,114,98,116,88,53,52,69,56,31,49,14,20,14,20,0,36,44,24,21,53,38,41,29,33,30,20,38,20,65,31,14,13,12,49,33,46,43,33,32,34,14,52,52,29,19,51,19,50,53,47,34,61,55,45,24,29,41,23,22,40,40,31,41,25,62,48,37,40,51,45,67,44,44,34,60,43,59,27,15,33,63,67,29,35,35,18,16,29,64,41,55,63,45,58,69,44,29,40,41,27,29,58,72,28,66,56,53,46,33,51,81,37,46,33,47,55,33,42,39,43,58,55,40,37,52,42,60,43,77,54,49,57,36,71,59,65,53,73,54,62,103,67,56,81,98,76,83,100,83,88,72,96,106,89,90,80,96,62,102,80,119,108,108,111,108,129,117,117,111,118,117,127,128,135,151,114,78,137,136,114,133,111,119,132,138,139,133,113,117,103,119,86,136,127,111,90,114,115,135,119,129,130,123,127,117,110,125,103,111,112,67,123,122,122,119,131,90,108,104,116,55,77,113,113,98,114,95,128,118,90,75,114,80,113,88,92,94,102,78,112,102,88,90,67,74,53,110,57,69,56,66,75,60,76,74,96,73,100,107,88,72,88,80,84,78,68,121,82,75,74,42,34,75,85,52,53,47,48,58,59,71,64,68,114,121,105,97,88,76,84,85,98,92,119,140,149,155,143,139,135,119,111,148,136,120,135,132,150,177,171,155,158,171,143,173,153,113,117,26,17,13,2,27,9,7,14,13,5,9,7,0,0,11,7,13,21,10,29,0,25,4,5,172,192,197,193,204,161,207,186,218,191,214,196,219,204,197,244,251,223,160,155,139,95,112,131,119,127,144,124,129,131,153,192,182,211,202,206,201,178,183,211,206,184,205,176,210,207,184,178,195,191,176,183,201,203,204,192,171,210,212,182,185,184,204,189,201,206,184,191,188,211,195,191,186,186,193,180,186,184,194,177,195,184,211,182,177,206,196,198,200,178,207,190,204,190,189,192,198,179,212,197,199,197,209,185,198,196,209,186,206,232,203,121,96,100,93,109,149,197,200,201,221,192,160,166,190,195,193,199,153,151,232,218,169,131,135,140,152,218,199,227,212,211,138,111,130,136,133,149,198,226,205,201,190,216,215,120,82,85,61,123,169,146,112,114,122,103,120,80,78,75,103,95,66,92,99,60,53,68,51,73,103,122,108,99,97,100,105,104,141,134,128,120,118,70,77,84,68,84,101,81,100,114,134,159,170,191,191,154,175,183,178,180,183,191,177,142,170,138,125,151,211,183,158,191,198,165,139,154,179,180,204,195,231,192,214,202,197,237,175,187,139,158,146,146,126,121,95,109,99,117,98,82,109,113,124,144,125,137,100,140,127,104,117,95,78,67,111,236,122,80,93,92,108,140,108,212,175,103,190,141,65,52,62,110,49,76,128,121,108,94,89,89,77,89,119,185,156,130,127,115,85,92,102,97,223,148,135,145,123,123,68,149,195,93,80,164,142,69,137,90,91,172,148,84,111,186,198,193,87,84,69,64,86,54,78,67,65,60,67,87,44,61,76,90,103,87,96,54,72,46,46,46,37,45,71,64,51,61,54,40,27,15,50,102,113,116,126,128,103,95,78,102,84,100,95,111,107,84,104,58,72,56,66,62,73,35,1,17,25,33,24,47,17,36,20,36,36,36,28,33,4,24,46,8,49,26,32,45,29,21,27,29,18,49,48,29,47,21,45,2,37,26,25,29,38,44,71,78,71,53,16,23,6,26,35,40,31,30,48,48,65,45,43,28,36,32,20,47,32,34,42,52,35,36,29,69,49,39,28,47,31,25,46,40,72,29,38,48,19,41,36,36,59,52,60,31,41,37,33,29,47,48,37,58,44,48,38,40,37,53,48,53,59,71,36,45,20,35,32,60,26,24,43,26,31,33,47,28,52,72,46,36,44,41,46,26,64,59,42,35,50,50,43,60,36,18,61,44,22,33,48,65,55,20,52,61,48,41,52,28,38,33,52,59,43,20,26,64,38,35,31,42,29,46,71,52,28,48,37,30,34,23,49,47,70,38,39,50,65,33,57,39,46,57,41,79,76,90,82,82,83,74,92,93,86,93,96,106,98,118,99,106,111,107,91,106,110,111,96,104,98,122,141,106,134,122,139,126,140,112,118,155,113,116,106,104,133,114,133,135,143,102,133,94,101,85,94,98,107,104,124,122,124,130,128,138,121,118,123,114,104,103,115,100,123,128,102,120,87,110,95,70,78,68,70,98,47,57,67,82,97,160,163,165,166,101,95,86,107,70,78,105,143,137,158,129,165,128,121,111,117,100,90,108,124,111,124,144,158,116,149,166,164,94,117,97,16,7,6,18,16,37,27,33,10,16,34,6,0,10,1,0,15,10,25,25,18,32,26,29,183,200,182,182,222,198,198,205,202,190,198,211,196,202,209,226,249,204,168,202,140,1,15,6,4,14,43,36,22,11,43,146,202,176,220,223,206,187,208,200,207,215,166,201,193,203,203,195,189,196,180,171,212,197,206,181,175,202,175,218,185,181,193,193,206,198,210,214,208,213,198,197,184,171,210,168,224,196,203,204,205,187,197,185,184,217,193,199,221,197,204,212,188,194,176,182,183,182,208,199,189,190,191,193,185,221,207,238,205,188,217,140,118,187,137,143,147,173,146,154,155,115,108,133,120,99,117,158,84,110,179,212,126,59,68,38,97,192,212,172,191,166,120,122,94,126,127,172,195,202,195,199,222,208,196,95,59,59,88,163,170,148,105,94,105,101,64,85,78,45,92,111,93,98,121,89,101,104,94,84,102,103,118,113,71,109,88,139,123,114,117,79,74,64,75,68,85,84,63,47,82,74,115,139,157,155,127,121,149,162,174,176,145,163,147,119,155,134,128,142,133,124,154,152,127,136,134,141,164,195,191,184,175,177,170,175,182,211,170,147,149,129,140,92,115,119,109,89,124,144,110,91,84,100,98,97,111,100,114,151,185,165,192,154,126,102,158,254,121,66,108,107,119,172,190,231,186,101,147,176,107,50,59,83,44,56,164,233,174,73,94,120,94,93,79,154,204,174,138,145,136,170,173,155,198,151,146,128,68,40,60,159,142,71,103,194,135,100,147,85,99,166,96,57,25,132,160,163,85,68,49,35,52,59,56,69,60,79,55,78,54,44,54,55,80,82,38,37,98,63,60,76,45,78,71,71,32,45,25,18,47,89,111,101,142,133,137,121,98,111,86,79,83,125,84,102,116,84,32,17,26,56,43,63,38,52,11,31,45,51,42,16,20,11,24,25,10,40,15,25,24,37,22,19,52,43,28,13,6,16,17,20,14,33,40,38,38,28,15,42,30,29,32,34,36,67,66,73,47,38,25,33,25,17,58,66,50,48,59,38,42,84,64,44,52,39,48,43,28,42,48,30,35,49,49,45,51,44,39,12,27,27,36,34,28,32,43,30,40,48,41,35,44,33,49,41,13,41,39,54,63,51,63,63,50,52,40,68,39,40,41,42,52,45,31,19,37,48,36,46,36,52,39,42,24,39,53,41,42,33,39,69,21,55,45,38,37,51,57,30,50,45,21,39,17,52,61,57,60,57,51,55,42,56,48,42,40,26,45,44,45,77,42,53,35,44,37,37,48,45,35,37,32,25,31,44,67,52,15,14,37,47,29,59,7,50,31,7,45,36,21,36,50,40,17,43,48,37,34,51,77,43,41,30,18,59,43,54,46,25,38,43,15,43,58,30,48,57,68,58,25,53,46,49,85,45,47,50,44,38,47,49,57,41,51,49,78,45,72,88,47,60,59,56,75,64,48,108,78,68,88,89,86,90,73,73,83,100,117,101,115,110,79,118,68,113,107,122,111,74,111,108,109,101,104,122,104,94,106,133,115,150,158,151,117,113,116,122,107,90,119,139,154,140,156,144,148,155,121,124,119,109,88,106,100,112,115,123,117,113,124,124,127,86,91,99,27,26,9,10,15,19,24,20,15,19,39,11,4,18,16,29,17,24,6,10,5,33,21,28,206,211,202,210,192,188,203,205,204,189,198,181,182,207,204,254,252,232,176,191,126,22,11,5,12,43,9,15,40,43,40,173,220,238,230,219,215,209,215,184,226,201,216,209,215,223,176,213,213,192,210,195,211,185,211,222,190,218,201,209,183,185,191,219,211,213,206,196,240,208,210,229,202,193,193,213,199,181,171,196,215,220,199,199,196,198,205,189,208,202,243,230,197,228,205,235,207,188,155,185,196,196,218,202,207,209,209,203,162,127,127,120,132,152,95,100,109,109,86,76,72,84,87,78,90,52,85,100,61,78,172,178,53,44,34,22,84,119,146,134,156,121,89,89,79,79,102,163,171,156,147,136,137,183,158,115,77,94,169,196,156,107,95,124,120,104,114,99,105,107,124,116,117,132,101,116,119,122,122,125,166,131,125,123,147,147,118,120,101,114,90,84,120,103,110,121,106,86,94,133,162,130,133,156,155,139,151,180,154,144,176,153,124,158,141,149,171,147,154,167,175,186,162,190,186,168,184,148,165,163,117,98,114,78,79,98,134,146,156,153,162,149,115,102,117,143,89,126,143,214,180,177,109,78,64,80,63,74,166,189,198,191,210,175,115,101,237,224,100,111,87,116,188,228,248,254,199,84,109,149,122,67,91,136,113,111,157,208,161,88,111,103,88,79,32,80,210,227,238,221,166,192,228,103,130,129,97,145,115,43,82,155,82,68,170,149,103,158,189,216,181,131,56,18,0,69,80,69,52,29,26,7,19,63,57,19,58,63,34,91,51,71,82,76,107,79,96,57,74,80,57,58,60,51,64,56,40,16,15,42,70,105,133,139,110,140,111,81,106,73,96,65,81,114,98,67,72,31,19,9,26,32,40,51,59,48,33,35,60,41,31,30,31,1,25,18,20,16,21,35,24,34,28,45,10,26,40,50,36,48,46,32,40,12,57,33,28,43,26,17,33,33,34,66,59,73,77,12,39,28,41,39,21,35,29,43,72,39,87,59,61,65,42,73,55,43,46,56,62,64,59,34,45,63,58,69,49,49,8,37,38,1,4,9,31,19,18,13,30,23,35,58,48,31,61,67,37,69,54,48,62,49,53,66,63,77,66,50,70,62,55,41,44,40,43,42,38,50,49,38,45,30,34,19,25,24,56,25,50,15,2,49,22,34,45,18,44,32,56,28,37,41,47,28,39,51,50,59,53,49,68,34,48,62,53,28,47,52,46,54,49,48,42,58,37,44,46,59,26,36,46,30,43,49,37,39,32,69,43,19,60,29,58,31,32,47,45,32,40,36,42,39,47,49,35,44,47,27,29,45,36,15,53,30,51,29,53,38,52,50,32,44,27,50,23,31,30,53,43,42,27,38,59,52,51,20,39,59,45,49,45,24,10,20,30,52,42,35,48,38,49,48,56,50,35,55,66,37,21,62,52,65,58,61,41,57,74,51,49,52,28,31,20,33,33,26,38,47,74,27,73,99,111,104,67,76,119,111,102,97,101,110,124,133,99,107,134,148,117,125,159,174,155,158,142,174,148,132,142,154,136,102,136,122,102,76,91,90,111,136,99,91,109,74,117,96,15,28,41,2,20,24,16,28,15,26,22,30,2,4,22,14,9,32,18,28,15,25,5,24,166,192,186,191,203,189,175,191,182,174,183,187,187,198,208,253,247,206,166,199,114,36,27,25,43,40,40,35,51,38,73,184,223,232,240,232,200,208,198,224,201,220,231,200,224,205,200,205,210,212,217,195,214,195,209,237,212,208,211,177,217,194,240,244,197,207,213,203,222,230,191,216,211,186,198,211,215,205,206,236,214,216,214,240,240,205,172,212,210,200,170,226,207,202,180,223,202,175,207,182,192,177,204,161,136,126,134,141,75,55,70,68,114,104,76,73,97,107,81,71,66,58,106,77,115,123,120,103,91,101,120,96,65,54,95,84,95,118,98,109,128,111,86,118,155,138,143,115,119,116,125,80,103,103,126,99,141,151,178,189,176,124,151,120,144,109,157,135,134,117,98,137,131,134,108,138,124,126,121,124,136,156,147,121,170,153,127,74,79,106,122,125,153,158,154,138,116,139,126,155,180,113,128,149,116,138,138,184,181,183,176,175,142,168,170,162,174,189,186,187,189,186,187,205,157,158,144,163,160,132,99,47,82,89,78,120,178,192,191,194,183,156,152,144,144,119,129,125,150,190,227,237,238,203,159,163,161,161,206,207,169,202,210,203,84,117,195,166,110,147,112,135,200,208,236,200,192,97,107,137,163,112,72,87,160,180,116,135,122,105,104,97,90,34,12,53,104,168,208,177,148,162,181,113,127,100,71,173,152,86,138,154,90,93,128,115,168,178,175,229,185,104,51,21,12,30,23,57,52,33,40,30,45,41,29,31,25,30,71,71,65,65,36,63,66,65,63,53,46,9,52,55,52,46,44,44,21,39,37,59,110,91,91,99,115,92,89,76,84,97,101,103,118,131,94,85,33,15,21,46,50,88,19,64,66,30,25,57,84,49,72,41,6,3,5,17,5,26,21,25,9,44,8,35,25,56,52,55,80,50,54,39,3,15,33,27,18,17,27,21,66,51,59,78,66,73,58,37,23,52,23,20,34,84,58,61,55,57,48,73,67,75,68,29,80,62,61,66,38,48,75,60,45,73,39,43,53,52,20,5,21,21,6,33,16,59,35,35,53,70,47,56,29,61,39,74,39,53,53,69,44,36,68,47,44,61,37,34,62,83,57,38,53,68,47,48,54,34,31,45,43,52,17,23,21,10,9,29,6,36,37,25,25,36,38,34,38,58,41,26,36,62,75,75,61,57,40,68,58,64,47,68,68,44,35,41,59,75,90,60,52,45,30,53,50,57,54,23,18,45,26,27,38,50,25,60,56,43,26,37,18,18,32,5,18,4,37,54,24,27,39,52,42,38,53,56,40,49,42,33,51,22,61,69,38,27,29,32,37,18,36,53,42,45,47,61,20,32,55,59,26,12,52,34,51,27,42,46,29,61,47,62,54,52,70,65,37,52,59,25,37,40,31,31,71,71,55,40,59,62,62,51,71,61,45,71,57,76,40,25,10,32,26,33,38,39,20,51,60,38,77,51,107,75,109,110,131,116,107,88,57,48,59,81,91,74,124,136,121,125,128,122,146,156,147,147,130,134,168,117,152,155,145,137,136,97,77,113,126,104,108,108,105,109,120,97,3,0,5,15,10,12,49,15,21,17,30,20,27,17,18,5,8,41,2,18,11,11,19,14,177,180,188,167,159,205,199,195,178,193,192,196,204,206,169,229,249,153,166,228,139,46,26,38,26,37,38,37,32,34,94,170,181,127,158,164,148,170,172,172,148,135,128,133,160,180,192,198,199,200,210,220,176,206,209,211,189,212,198,213,192,165,200,219,171,183,177,190,204,197,204,195,202,220,213,216,176,196,191,205,209,191,222,212,211,205,167,154,128,108,159,147,165,145,180,225,220,217,242,255,231,196,166,117,58,61,74,114,113,67,83,121,127,132,128,128,128,90,130,105,85,92,96,147,131,127,137,128,124,83,81,91,73,101,127,124,119,146,114,148,164,89,119,154,201,228,164,194,159,163,133,102,108,123,140,149,135,229,196,191,181,139,131,113,118,102,157,117,104,88,114,110,128,104,109,113,127,128,96,91,104,107,149,157,149,132,111,85,97,110,114,123,132,106,137,132,124,134,136,157,141,137,103,112,92,89,138,190,154,160,146,141,135,165,137,127,149,173,190,148,158,128,118,156,118,87,135,102,131,127,92,97,127,150,184,184,181,205,160,142,170,142,139,130,116,133,106,128,124,145,208,220,254,251,250,251,236,255,228,191,188,157,144,115,95,126,175,130,91,133,111,110,162,193,229,170,161,109,116,133,166,117,106,55,163,180,115,106,115,141,162,107,91,44,11,62,12,25,104,115,126,121,186,159,184,147,115,203,116,104,216,197,46,125,97,97,188,107,73,140,148,56,14,2,31,124,104,77,67,41,79,52,47,69,21,42,58,67,102,69,43,45,27,42,50,37,25,50,49,42,43,32,11,54,29,35,63,82,91,95,126,131,78,99,69,97,87,65,53,83,103,121,136,138,80,24,5,30,58,64,116,103,92,51,59,17,25,75,51,62,51,64,58,41,12,13,31,49,46,37,18,35,14,49,34,48,28,44,47,69,79,50,23,21,6,39,16,31,37,31,57,49,75,61,58,31,24,44,20,27,21,40,39,63,67,55,71,56,48,40,45,55,80,65,55,34,78,60,49,78,58,48,62,61,36,52,27,20,3,18,18,23,31,45,14,13,25,18,43,41,62,39,63,51,63,55,40,47,68,70,52,45,65,55,44,51,79,28,37,68,25,57,56,41,38,50,41,52,72,41,37,26,7,16,26,12,5,49,20,39,5,26,24,48,35,42,40,55,49,43,26,80,79,56,66,74,58,65,42,46,44,57,68,35,73,75,58,26,36,52,61,61,31,27,49,21,44,48,34,31,70,28,37,31,28,53,39,58,20,36,15,21,31,21,31,21,19,10,29,40,27,55,50,29,47,59,51,57,79,51,63,56,63,28,48,29,21,46,63,36,59,21,32,50,70,30,60,27,53,38,14,65,69,36,24,41,15,30,50,44,7,51,29,18,64,65,58,45,60,60,52,53,35,64,36,76,43,57,46,83,67,60,56,61,67,72,51,47,40,30,7,31,27,35,46,37,58,81,48,52,47,30,49,67,44,82,98,56,46,91,19,88,75,83,69,77,112,115,109,137,105,98,132,141,162,94,135,109,77,130,117,122,152,129,111,110,100,145,139,132,113,122,143,113,131,104,28,19,21,24,21,2,2,6,18,23,10,14,25,6,10,11,17,26,9,21,16,36,6,40,124,189,200,186,209,200,212,222,208,220,201,201,209,220,197,237,214,125,143,219,103,59,48,24,3,21,43,63,43,44,76,130,97,31,81,77,91,68,140,124,66,63,77,99,102,145,152,171,124,132,120,148,117,143,160,157,140,120,161,135,112,104,135,148,105,124,122,106,109,130,127,129,163,174,146,101,94,119,152,138,125,143,156,143,148,138,114,113,86,65,89,97,79,100,128,210,244,231,239,250,245,232,162,99,85,117,102,145,101,130,139,152,143,164,146,127,137,114,144,103,143,168,104,115,85,128,99,110,128,90,127,126,151,204,168,197,197,167,148,136,144,80,88,136,180,180,140,172,146,147,138,129,140,127,151,147,193,214,164,138,137,129,92,92,104,65,111,120,101,99,59,104,83,112,71,105,90,101,100,90,92,111,99,120,111,125,111,123,119,110,115,110,106,123,89,114,90,78,114,132,132,83,104,87,85,88,150,167,145,112,95,109,107,107,109,70,72,139,140,85,58,86,124,113,113,109,125,161,150,123,158,121,141,156,151,160,135,107,127,95,108,136,107,94,126,111,140,103,98,126,135,147,149,159,148,160,188,181,206,220,211,194,205,172,94,147,224,132,109,100,117,110,182,214,215,174,149,152,146,98,133,132,104,17,124,212,132,121,124,161,200,255,240,209,174,132,104,110,140,155,189,135,161,144,192,167,157,223,80,126,212,102,75,144,128,136,148,76,22,38,77,67,22,18,89,131,137,101,90,119,154,134,107,70,96,94,71,47,69,76,52,24,27,52,56,18,25,28,56,46,54,55,31,16,12,61,60,123,95,113,132,113,91,75,96,94,68,105,79,102,117,118,109,89,21,35,10,41,74,102,123,113,120,92,82,26,31,54,102,85,110,64,46,52,56,19,14,21,32,17,51,47,48,35,23,22,13,31,42,53,56,46,35,24,34,47,24,18,45,23,45,91,76,41,15,22,12,34,17,21,44,70,51,75,62,74,63,49,35,51,32,71,73,72,49,58,51,41,53,53,57,73,51,56,44,48,42,17,42,24,19,12,13,16,42,19,10,4,38,52,59,65,59,23,23,68,45,66,60,51,38,27,41,52,60,35,61,59,44,65,81,42,45,58,47,30,57,57,60,51,29,16,19,4,19,28,6,14,35,19,26,24,18,52,42,48,25,80,43,45,28,73,52,95,39,64,77,65,47,67,79,64,48,53,46,59,64,58,44,29,48,51,40,57,47,35,42,47,46,32,45,36,35,56,44,61,29,63,19,4,34,31,8,32,5,33,28,8,40,34,36,66,49,46,33,43,50,36,21,44,48,58,33,58,29,36,45,44,29,41,56,62,50,41,63,55,37,54,45,44,56,35,57,30,17,29,15,56,13,52,25,42,12,34,47,37,30,76,49,35,51,35,53,45,53,68,53,63,72,79,77,63,55,69,58,73,51,64,19,25,21,19,44,34,77,91,131,106,63,39,72,75,89,64,37,83,54,35,47,43,33,83,127,110,125,103,133,139,134,122,93,115,121,116,104,111,103,87,104,94,83,129,111,78,75,105,121,124,102,111,113,135,143,100,145,105,1,30,20,14,18,14,33,3,24,9,27,24,48,6,4,8,36,6,51,8,13,5,18,22,179,174,210,194,217,208,196,216,198,194,203,205,218,223,222,255,218,158,106,153,37,22,33,43,27,21,51,36,34,51,77,128,113,54,64,93,72,99,142,119,128,89,105,99,129,135,123,133,94,51,51,75,72,85,110,111,67,60,85,92,97,52,85,94,67,60,64,60,83,78,69,57,67,108,90,56,87,76,103,101,60,77,72,75,119,94,79,64,74,60,80,79,103,99,82,136,181,181,169,182,156,156,118,115,102,115,116,118,140,131,143,133,144,131,103,143,132,127,152,120,111,115,135,102,101,140,143,111,141,132,170,147,196,212,236,217,179,170,160,140,140,77,118,136,138,118,136,144,120,139,114,134,105,99,113,166,208,176,131,115,91,108,50,56,91,87,114,135,105,82,64,56,88,83,100,116,102,128,82,67,83,123,96,116,148,143,142,106,117,95,96,99,109,92,130,136,111,85,49,107,134,90,109,113,118,99,120,124,108,100,84,77,108,125,111,110,131,123,105,90,103,100,91,101,90,132,166,161,160,160,159,160,121,101,94,91,82,84,85,72,80,66,86,114,120,121,96,123,84,79,38,54,56,61,46,32,63,115,148,195,234,224,208,219,112,105,187,155,128,122,122,157,239,235,180,152,174,164,186,91,117,185,160,70,146,244,138,81,90,124,185,248,232,255,237,247,206,238,191,162,217,109,101,120,175,119,181,175,98,169,139,79,124,202,246,193,103,63,23,16,40,52,17,55,48,77,85,61,18,86,96,93,85,83,77,95,63,61,69,39,46,35,31,55,42,43,57,7,60,86,49,61,15,26,46,54,114,126,105,133,118,104,92,87,87,123,100,98,105,103,109,90,65,39,20,22,32,84,103,125,106,136,126,113,75,38,44,94,104,117,106,55,36,24,32,44,13,17,18,32,15,0,34,40,11,18,35,44,45,41,73,45,47,57,44,26,45,34,56,75,59,65,69,26,24,7,21,13,31,58,63,67,66,67,75,56,50,58,20,44,64,69,64,54,67,46,55,36,61,40,50,27,35,59,50,50,71,5,36,44,14,24,19,7,14,17,28,56,52,36,36,40,70,53,68,59,63,32,74,52,64,58,37,43,71,58,60,64,50,73,41,49,47,49,67,66,44,24,63,47,45,38,38,38,2,28,14,34,20,24,22,33,46,22,64,73,34,24,37,20,57,68,61,46,43,56,49,49,64,49,75,59,51,59,53,68,53,30,50,57,40,40,60,46,45,50,24,40,27,38,41,59,41,43,28,36,29,25,43,26,3,29,6,21,32,31,29,39,31,44,52,53,62,45,34,64,58,54,65,43,24,45,15,39,25,32,49,37,30,52,35,59,49,44,75,43,31,74,59,61,44,3,27,28,7,41,18,37,26,34,9,12,65,12,27,40,42,57,35,72,49,58,66,36,31,60,45,58,53,37,58,50,60,69,57,60,32,17,14,43,30,13,14,8,85,129,157,91,99,104,128,121,131,133,90,103,102,75,103,93,108,101,138,141,128,137,119,139,123,110,114,126,134,146,119,126,99,119,89,118,92,106,62,75,83,121,89,94,136,108,95,122,127,105,133,120,24,0,34,22,11,37,0,44,22,35,5,9,15,6,1,17,9,18,29,17,29,22,24,23,177,170,185,165,169,142,118,135,116,149,138,143,134,134,173,253,228,134,59,66,5,35,18,22,27,47,25,29,42,48,101,145,118,83,120,172,132,150,190,164,148,148,148,142,138,118,130,134,118,92,86,86,73,94,92,102,95,61,69,84,67,101,87,103,64,104,98,108,95,94,93,75,84,97,109,98,101,90,94,114,94,100,95,75,95,133,122,109,102,107,107,133,122,118,74,86,82,88,61,79,80,75,85,120,101,99,95,114,123,91,96,109,119,98,95,93,126,119,119,101,102,111,128,122,128,123,109,113,123,137,161,148,185,243,236,233,236,208,209,201,178,117,124,118,116,120,115,96,94,87,109,110,93,98,146,178,191,146,115,140,131,103,89,81,110,89,125,147,115,96,77,101,120,122,135,118,91,100,120,83,144,148,143,136,139,159,115,102,139,135,114,126,100,119,121,140,128,127,114,86,80,102,79,115,122,112,130,84,100,89,117,105,133,143,130,118,134,127,84,118,84,90,94,113,111,151,170,164,152,144,145,163,131,92,74,111,88,82,107,103,89,102,112,97,122,87,115,91,84,63,57,47,65,71,57,72,69,94,110,130,157,136,162,166,116,83,93,94,138,148,153,132,152,126,85,78,63,90,72,72,104,170,171,87,83,133,120,122,59,24,89,223,235,236,232,246,232,177,150,117,186,164,106,157,120,118,185,89,72,213,61,101,188,221,246,145,103,33,20,65,37,53,43,45,42,44,64,18,63,48,46,38,46,46,49,31,36,37,42,53,37,27,13,56,27,28,37,50,38,34,44,19,36,37,104,72,97,99,98,102,100,106,89,117,76,84,76,131,105,127,95,73,15,3,38,74,70,110,98,83,98,134,101,119,106,68,110,103,96,75,41,40,47,25,22,26,11,39,31,41,8,41,50,29,28,24,32,42,11,53,51,64,43,45,26,41,39,40,55,65,40,38,13,15,15,13,12,37,67,53,66,47,83,56,68,68,44,71,55,69,52,45,57,51,61,77,65,75,33,61,45,42,36,48,36,52,45,18,2,28,25,16,9,14,13,29,37,40,69,60,27,62,30,48,57,54,45,57,59,19,37,69,64,59,52,47,88,18,28,65,27,32,62,42,49,56,75,30,70,53,47,9,18,10,21,16,32,50,31,36,12,29,26,39,42,52,11,16,13,39,45,58,53,49,37,65,31,55,67,41,60,60,61,63,42,69,48,38,57,77,75,54,49,36,49,74,30,38,36,28,37,38,39,51,64,27,26,27,28,4,1,22,39,15,5,28,22,12,33,31,47,65,34,58,45,38,68,54,46,46,55,38,71,39,40,42,48,29,58,39,59,48,39,48,30,56,74,43,58,64,44,38,24,22,28,12,5,26,12,46,3,29,8,37,49,57,37,52,52,73,56,49,41,48,45,59,59,39,65,75,50,67,71,34,59,60,53,13,44,55,45,29,29,68,103,130,116,88,117,130,118,130,153,123,91,116,149,148,152,143,154,151,151,156,131,109,107,136,115,99,137,122,146,150,158,114,123,121,135,122,137,148,120,124,110,103,98,88,87,58,65,95,91,109,127,89,6,1,17,12,43,9,19,28,7,17,23,33,14,2,12,17,15,27,18,14,9,19,16,14,143,111,115,117,129,117,96,59,46,34,30,39,73,70,122,241,225,124,66,59,2,3,39,53,29,49,43,27,42,68,115,147,153,167,162,143,156,141,194,205,165,113,128,120,137,118,136,110,128,149,104,137,147,139,143,129,114,110,93,87,110,112,113,124,95,110,95,81,119,109,121,120,88,92,117,121,137,118,129,126,126,143,116,109,129,146,130,92,127,116,135,119,114,114,94,111,98,69,101,83,93,78,96,93,105,91,100,119,96,124,103,102,104,103,87,96,91,104,132,122,114,137,104,104,128,118,127,131,119,154,131,170,230,208,221,239,240,251,255,221,205,143,127,125,127,126,106,85,105,117,122,127,150,187,180,205,187,118,107,152,126,132,133,158,165,169,137,202,164,158,168,162,177,143,145,138,150,157,159,143,153,155,156,146,108,162,127,118,147,114,147,142,129,109,110,122,157,119,125,118,113,102,124,110,99,111,95,94,88,117,92,130,138,143,139,114,101,102,119,109,104,127,152,129,127,158,161,152,155,152,165,167,134,53,80,77,93,83,93,103,96,111,121,100,115,98,109,106,111,148,127,112,110,81,73,104,96,87,107,112,74,58,81,140,145,123,35,55,99,57,53,54,87,64,34,54,42,44,9,8,40,69,162,108,73,19,71,141,103,22,9,10,143,202,236,247,184,126,151,115,207,149,144,148,107,139,140,34,118,169,143,161,158,58,117,115,51,25,14,64,49,38,62,24,35,23,50,13,53,30,11,40,31,27,44,23,1,42,34,43,42,17,20,12,7,57,25,25,27,53,32,49,33,23,79,116,113,97,115,101,108,100,99,114,80,109,122,107,130,87,45,25,37,11,63,109,116,106,129,115,85,109,109,98,89,94,102,81,44,34,46,44,21,12,33,11,7,33,38,14,28,19,21,14,34,16,18,20,36,27,51,84,74,90,44,34,31,81,69,58,24,13,26,25,25,36,35,44,65,55,86,42,69,75,46,46,80,47,27,62,60,52,64,82,37,59,70,61,62,39,60,60,47,30,61,45,27,43,45,25,5,14,12,32,43,10,45,37,41,47,29,42,67,64,56,66,56,14,16,29,48,58,64,59,73,53,60,81,59,62,44,51,51,78,42,69,34,46,51,40,26,25,29,29,12,26,37,6,10,38,21,38,37,45,35,9,34,5,5,50,51,56,72,59,65,90,52,77,54,49,70,62,71,61,52,53,53,82,49,32,28,58,38,43,40,16,59,56,72,70,58,47,54,44,72,50,52,42,35,8,28,19,11,20,33,23,21,25,50,17,32,53,49,44,57,58,69,47,41,62,54,32,59,51,55,24,27,67,63,69,54,69,29,64,78,59,44,43,42,39,52,16,22,40,31,9,29,32,22,41,16,28,33,43,33,29,54,46,51,51,76,39,76,57,53,37,81,46,50,53,70,79,30,42,83,58,41,37,31,30,57,25,56,107,131,115,106,83,89,107,121,127,77,113,94,110,117,115,158,120,140,141,121,108,152,94,115,114,118,137,133,145,150,127,157,133,147,151,127,95,139,177,99,121,112,118,115,130,116,94,87,90,60,79,116,105,31,11,11,11,14,10,9,16,33,11,11,19,9,4,43,12,4,9,13,8,0,42,7,4,96,103,95,117,155,183,182,153,130,105,98,101,80,74,133,244,238,148,83,137,60,33,41,38,44,25,45,17,25,43,83,164,163,182,168,175,188,192,170,142,146,111,140,157,125,123,105,108,129,134,130,189,161,168,158,173,158,131,116,124,126,152,115,119,137,119,120,111,97,139,126,114,113,107,82,120,128,115,135,131,125,117,153,126,97,127,131,82,128,123,118,131,92,96,107,94,112,78,84,105,99,111,123,100,109,108,130,117,131,123,121,125,113,122,105,95,114,111,111,117,128,116,127,130,133,106,140,130,132,117,117,120,143,143,149,168,209,196,210,231,218,208,180,156,146,107,113,110,115,96,122,151,175,211,237,245,173,160,172,173,188,188,233,202,186,206,152,174,177,177,200,202,185,136,142,126,188,184,178,138,112,124,107,91,146,125,100,137,129,110,129,162,128,130,127,112,98,115,128,122,108,123,122,155,137,131,106,130,131,119,137,118,139,132,107,103,117,83,93,118,119,163,217,198,123,165,172,171,147,133,153,123,154,100,85,119,123,138,116,108,126,108,131,108,127,126,150,146,172,242,230,122,96,106,109,142,144,101,88,115,105,100,184,205,193,177,42,7,8,9,21,10,99,165,132,139,163,170,97,28,12,36,134,152,172,128,129,200,125,84,54,32,10,62,77,150,155,158,209,195,211,147,162,130,117,192,104,90,172,196,212,186,89,28,26,49,42,41,61,75,50,30,36,17,34,34,25,15,17,26,10,34,11,16,41,37,12,20,42,39,25,45,11,22,12,26,16,34,10,49,59,77,60,49,75,98,100,108,108,104,105,101,93,101,102,90,116,130,100,41,17,20,35,74,94,100,88,101,85,102,62,105,115,114,96,64,85,71,32,31,37,40,7,21,47,47,29,16,29,3,18,22,30,55,24,32,20,36,22,49,30,45,61,58,57,28,53,48,25,16,18,11,40,39,31,52,50,81,74,59,60,61,38,52,37,75,63,45,61,48,60,64,48,68,61,68,85,70,81,59,65,60,43,41,41,28,21,31,27,49,37,10,23,24,16,27,37,41,18,38,50,40,69,54,49,46,41,45,41,31,53,54,53,48,83,61,41,37,55,38,63,49,45,44,47,68,61,58,52,35,34,12,25,15,5,10,16,8,23,33,30,81,45,46,41,15,21,11,17,39,78,56,77,60,34,65,81,85,73,73,51,56,84,65,51,52,92,84,65,66,58,52,55,44,55,25,52,32,57,62,40,42,46,65,32,47,15,2,27,24,11,19,8,24,36,16,42,58,26,61,44,57,26,40,71,44,55,69,71,68,59,70,80,81,64,40,38,64,70,56,43,36,50,48,35,58,52,45,49,39,47,37,27,12,26,17,24,30,15,29,18,25,21,23,65,37,43,14,67,51,33,49,46,66,60,58,54,55,82,81,82,81,90,72,80,56,37,45,53,28,24,32,62,150,107,110,120,134,106,54,73,76,68,103,62,60,126,129,122,116,133,138,115,124,113,113,159,152,128,121,151,149,144,147,121,129,154,108,134,131,155,149,136,116,116,142,110,118,128,120,99,87,106,111,87,86,5,29,14,16,29,9,16,31,20,46,20,13,3,16,36,6,20,23,18,25,6,43,1,29,87,97,89,152,192,213,212,190,204,174,150,132,127,118,203,253,245,158,148,183,97,29,27,9,10,13,41,23,50,37,63,148,135,179,151,181,166,148,140,98,108,137,138,148,107,134,109,130,143,145,127,141,145,156,127,147,167,134,104,113,110,71,89,114,95,97,115,98,113,128,124,116,115,104,80,96,136,128,121,131,116,121,119,133,123,136,120,130,121,129,110,107,131,129,103,93,146,117,106,111,127,143,115,110,137,144,131,149,135,117,131,93,105,90,121,86,136,98,120,122,116,106,103,116,95,118,122,125,156,136,99,84,92,71,107,71,125,148,185,171,213,221,237,248,208,139,119,116,127,125,118,114,150,249,223,167,167,163,171,178,184,197,189,189,190,193,138,149,117,163,202,191,137,98,133,150,188,170,137,124,90,93,70,95,125,126,87,101,109,69,112,108,116,117,94,98,111,99,112,117,128,157,148,137,124,109,97,139,112,123,106,113,138,97,89,67,70,125,155,176,144,182,245,212,152,157,147,153,150,151,155,172,147,125,49,84,111,138,111,120,145,209,167,131,119,131,136,129,178,216,219,123,102,109,141,164,160,133,66,83,92,113,188,232,220,145,122,104,76,71,22,56,145,201,198,185,191,194,186,110,91,32,99,163,167,153,221,212,122,118,109,71,95,76,55,129,172,155,157,171,201,158,170,103,145,179,123,184,208,217,189,105,99,10,14,32,70,76,56,39,30,36,22,50,22,15,18,16,49,28,29,24,22,37,27,26,16,5,27,34,32,18,34,23,23,22,24,48,35,51,76,89,42,53,90,107,107,105,96,70,83,93,102,118,102,87,117,82,43,34,16,47,68,114,123,112,113,123,112,104,69,76,109,94,124,107,96,95,94,129,107,74,28,44,50,50,14,40,30,17,26,12,35,30,38,36,21,36,12,13,33,58,54,77,98,43,45,28,7,19,5,34,40,24,66,50,78,56,63,63,52,40,40,39,51,53,50,32,66,64,67,51,37,72,65,47,47,58,48,58,64,31,49,38,46,25,32,38,29,8,19,11,26,21,24,43,43,52,35,38,39,56,47,25,56,30,21,51,24,41,30,52,63,37,51,73,75,53,60,59,62,50,64,33,63,50,66,52,60,25,28,34,8,19,24,33,3,27,45,25,52,30,47,27,12,33,32,37,51,92,78,84,60,57,48,52,57,59,49,69,70,59,79,67,64,63,46,67,57,43,58,27,42,27,44,43,62,15,60,42,41,61,30,40,43,31,19,13,35,18,37,47,14,18,35,24,0,37,53,17,33,56,38,47,71,56,60,70,58,66,71,90,28,16,47,40,59,22,47,66,44,49,22,51,46,77,65,58,61,36,45,20,16,38,26,10,36,27,5,7,6,3,28,68,35,81,54,34,54,45,53,73,61,62,68,54,60,64,59,61,47,53,60,76,49,37,56,39,34,55,41,44,128,159,161,149,138,136,109,62,44,95,97,100,67,107,116,121,124,156,133,126,128,120,157,121,139,149,160,118,129,95,142,129,105,129,114,94,126,152,186,150,139,155,132,110,123,135,176,133,122,129,119,108,84,70,12,21,49,13,6,5,5,25,16,29,20,11,6,22,7,15,27,26,26,5,17,23,23,11,81,101,80,86,154,133,121,153,139,181,154,136,144,185,226,254,243,136,179,233,150,81,12,12,21,19,48,38,51,33,70,89,78,109,127,95,110,133,67,82,85,97,95,91,101,98,110,81,92,69,82,103,105,76,104,97,137,111,78,99,72,64,96,91,113,135,103,103,121,130,131,128,102,101,97,145,138,128,125,92,96,118,120,110,139,114,129,130,132,117,112,123,143,129,129,149,128,122,115,123,102,132,116,136,120,168,152,138,167,117,101,111,75,134,94,98,92,82,110,115,108,107,124,133,123,85,103,134,130,125,99,92,95,64,66,77,77,75,118,140,147,176,162,215,219,203,182,150,139,108,86,64,162,175,157,126,112,124,156,159,129,142,147,134,141,152,113,103,91,84,113,111,120,97,105,111,117,156,119,116,102,115,120,149,89,107,111,112,98,59,118,104,93,90,71,79,70,96,89,83,119,146,130,132,84,126,123,126,114,98,124,107,102,110,84,91,163,156,178,188,143,195,239,200,154,160,168,171,170,135,145,140,171,147,91,78,88,111,120,139,197,194,162,171,129,113,120,104,97,184,131,120,124,99,161,178,201,143,92,80,95,102,139,142,172,180,227,151,158,163,131,111,85,172,207,154,127,146,179,131,83,7,45,127,159,136,200,179,93,80,78,81,87,86,85,110,146,137,102,115,179,194,135,82,151,146,161,234,199,238,198,61,34,1,59,64,55,60,40,42,39,7,19,29,33,19,53,14,34,15,61,25,29,29,20,29,0,14,17,11,40,15,40,22,20,29,3,25,40,63,65,86,78,81,110,125,89,75,77,69,74,109,91,116,94,109,92,25,17,19,40,88,107,147,91,90,81,103,90,177,93,82,111,102,114,101,115,125,123,150,159,77,29,57,22,24,16,74,19,2,19,17,25,48,18,19,5,19,22,31,29,60,71,81,96,90,50,34,30,4,16,13,54,49,46,76,52,76,50,52,78,45,50,57,64,64,55,80,59,58,91,55,65,41,59,71,74,25,22,60,34,58,29,38,53,13,7,32,19,15,15,21,46,30,15,12,49,32,44,39,67,34,66,48,55,53,48,41,19,33,27,51,49,71,69,57,31,64,40,60,63,64,69,58,55,29,51,74,31,13,14,34,18,17,5,40,25,28,16,23,68,33,30,30,42,57,38,43,58,77,43,64,64,50,24,71,50,58,59,48,61,38,31,47,67,46,40,61,45,58,76,48,46,61,77,42,74,61,64,73,39,51,45,54,59,46,23,21,8,34,13,9,14,24,27,11,13,19,59,36,44,56,48,45,96,34,46,66,43,41,68,37,53,62,49,59,57,57,35,44,78,49,47,71,47,63,35,57,62,26,28,51,18,15,16,18,16,33,25,33,23,50,43,29,75,66,55,67,48,78,67,44,63,57,47,64,59,91,65,63,85,66,69,60,34,44,50,40,35,32,55,103,130,147,109,122,128,136,118,101,129,121,104,142,141,87,114,126,114,108,129,145,137,133,141,130,117,145,157,110,109,88,117,107,110,112,108,153,132,142,128,127,146,152,147,149,143,120,137,108,117,132,102,121,108,95,7,1,42,20,14,10,15,3,18,13,22,9,6,0,23,8,21,18,32,13,7,14,21,20,101,99,86,74,85,99,56,86,92,108,90,96,149,195,244,247,240,115,99,238,133,54,14,34,7,42,36,76,38,28,85,79,83,90,90,74,77,81,73,68,71,69,93,108,85,98,80,108,78,75,91,83,90,76,63,78,113,68,98,81,91,97,87,83,90,109,98,127,95,89,117,122,126,103,109,118,129,133,74,122,121,117,91,112,119,113,102,130,140,116,114,135,142,91,129,119,148,143,123,143,152,126,120,105,114,112,131,149,142,134,116,138,131,135,138,145,146,97,113,104,94,112,110,120,112,105,67,115,151,165,137,145,137,86,111,90,84,91,85,112,108,64,119,189,204,217,240,232,187,133,86,32,120,143,122,64,95,107,131,90,134,123,100,82,104,103,108,95,86,95,85,132,106,93,121,88,103,144,127,119,138,137,142,137,101,80,62,140,108,84,110,91,86,87,53,76,71,66,90,72,84,94,111,136,128,131,143,108,116,125,132,105,107,145,151,143,173,191,191,182,132,175,147,150,141,159,164,164,156,147,132,137,177,172,118,57,85,85,140,114,160,148,149,134,141,109,119,104,118,136,94,122,152,157,185,150,178,194,130,78,89,144,152,127,90,185,242,192,250,185,167,107,124,181,214,191,159,127,146,67,60,4,57,172,175,136,168,104,51,30,49,72,66,43,73,118,171,86,122,161,182,158,77,101,152,129,177,161,90,159,109,20,18,17,64,33,25,35,42,25,28,31,37,23,30,23,54,34,30,9,28,34,58,15,43,20,11,33,32,35,17,22,27,34,23,31,22,46,76,91,90,52,66,80,99,75,85,69,83,67,84,123,96,88,87,67,24,18,11,78,105,128,126,126,108,77,121,89,100,136,87,104,108,95,142,103,100,111,113,127,141,152,89,50,31,48,37,47,42,27,19,34,24,40,44,30,5,22,17,16,53,44,97,72,91,74,68,63,44,35,56,33,53,44,79,66,65,79,61,65,64,65,46,64,41,54,29,46,42,58,56,57,29,45,36,38,29,53,24,65,45,29,20,41,25,6,13,50,35,12,15,17,9,23,41,44,53,35,43,62,31,67,47,56,52,37,18,27,41,19,53,64,54,57,46,60,49,49,43,46,71,61,56,37,63,39,53,58,43,4,43,23,42,32,21,24,20,14,12,55,58,20,53,44,29,26,43,32,39,75,64,74,69,75,52,52,58,40,38,23,53,69,51,52,51,53,90,53,52,61,85,77,55,48,45,60,69,45,47,46,45,33,63,56,39,11,41,17,21,13,20,38,16,19,14,44,36,40,30,52,59,58,66,60,61,51,50,74,76,71,70,38,42,42,66,87,62,44,45,53,50,57,69,60,47,62,80,65,22,9,31,7,24,45,11,31,31,20,30,14,45,47,56,54,57,65,59,69,31,47,73,44,41,76,76,66,57,57,72,64,67,46,52,40,41,40,35,76,62,84,67,96,134,116,107,122,110,116,124,139,145,146,139,147,133,120,132,109,122,112,142,153,110,120,118,105,124,148,123,122,103,151,114,133,106,111,101,95,99,102,88,110,127,128,155,131,127,82,83,91,115,94,82,89,99,93,28,26,17,1,8,5,46,16,0,9,0,29,9,1,11,0,10,20,35,29,1,33,26,3,128,150,140,164,158,148,109,110,120,127,80,106,103,133,191,247,234,65,88,201,82,25,34,17,26,26,70,76,53,50,72,84,64,63,87,67,72,58,83,72,105,90,64,92,99,90,96,105,77,74,108,72,76,121,82,116,58,100,97,110,127,96,103,119,86,79,89,84,96,101,113,145,127,106,100,119,111,124,103,100,97,101,95,111,109,126,109,126,144,112,130,129,117,101,114,104,140,116,140,117,117,83,113,95,127,139,144,143,136,118,143,114,116,117,142,154,173,135,135,128,118,112,92,123,91,106,120,163,190,175,166,116,105,120,137,137,114,117,120,137,108,152,82,151,205,193,231,240,243,214,126,119,129,128,125,60,55,76,84,116,146,146,123,132,96,107,100,104,92,101,113,98,101,122,114,90,109,109,120,96,105,128,99,86,76,82,113,148,134,103,129,111,91,77,88,53,85,79,72,41,70,86,116,142,114,125,135,109,89,117,135,181,119,129,154,160,176,185,186,156,161,154,178,160,134,186,206,184,152,169,179,132,135,130,113,104,79,100,134,102,117,112,117,120,104,123,134,121,134,116,116,91,130,163,162,166,163,167,132,104,131,125,126,136,141,216,209,182,247,213,159,78,122,220,223,147,129,118,97,86,83,0,70,173,163,138,181,105,35,87,80,111,150,117,119,166,172,74,135,183,137,133,75,168,186,154,142,83,8,32,19,37,65,40,43,23,41,55,11,75,35,45,46,45,39,31,26,17,46,32,36,5,17,10,21,53,26,7,25,29,33,35,41,22,16,21,53,59,81,84,80,75,45,67,81,96,90,89,74,78,103,124,108,83,48,23,8,30,66,88,130,120,100,99,102,77,88,84,94,83,21,62,83,63,81,97,95,112,120,98,126,128,110,50,43,60,66,67,47,55,52,14,35,22,25,38,41,12,34,38,53,50,38,79,75,55,38,66,81,55,47,59,75,54,63,60,68,52,49,71,50,30,63,32,49,84,24,46,57,54,38,49,37,56,70,57,63,82,44,59,57,26,33,34,52,10,37,26,34,31,29,23,26,30,13,43,61,62,59,28,33,87,54,64,47,39,48,34,35,32,40,36,52,60,55,55,54,79,48,73,52,81,65,45,44,33,64,27,11,26,7,13,10,43,21,10,44,32,36,45,59,37,61,41,16,37,44,20,63,93,48,73,45,55,71,61,35,66,40,41,53,56,80,75,48,34,47,47,61,43,60,45,62,44,81,53,45,55,48,76,62,43,38,79,59,29,27,18,33,42,11,20,17,12,23,11,15,68,39,53,57,57,41,58,50,62,67,77,40,61,57,70,63,49,61,45,52,68,25,61,51,68,45,67,56,65,56,55,61,27,36,20,7,22,36,11,37,46,29,25,15,58,54,51,55,75,71,52,48,36,64,43,63,79,75,60,64,80,86,58,41,61,47,52,66,70,89,180,148,131,147,96,96,76,110,93,94,73,87,134,126,125,141,147,97,84,83,104,122,101,111,84,88,120,131,132,145,167,139,151,142,149,128,121,145,117,79,66,111,88,67,98,118,146,113,112,97,75,92,108,83,94,114,76,125,98,29,12,43,24,24,15,6,11,19,17,26,19,0,2,11,17,8,18,24,21,9,12,24,4,167,146,165,215,213,181,155,146,128,133,122,107,117,109,177,238,238,99,54,85,8,26,40,9,21,40,27,33,49,61,70,103,103,114,109,89,114,98,84,80,96,104,112,102,107,79,78,64,94,86,93,103,87,110,101,118,90,89,111,139,151,114,120,111,100,88,121,122,94,122,115,97,82,81,103,76,112,109,99,118,101,115,115,118,137,103,126,97,138,112,141,103,102,84,93,97,114,104,103,131,122,90,117,98,129,140,116,127,98,102,98,104,105,120,122,115,120,133,113,100,105,122,94,102,92,104,97,163,128,123,134,111,118,144,179,131,113,126,129,149,94,100,115,150,164,161,149,159,197,246,223,213,180,160,182,139,146,82,47,102,153,167,150,143,133,125,138,115,161,133,151,115,113,131,123,74,88,116,89,91,113,106,114,54,53,84,125,132,103,110,114,93,119,84,96,100,82,72,73,81,44,58,93,104,119,109,110,116,97,109,123,101,105,154,160,184,195,188,168,167,162,163,171,185,174,177,169,178,157,167,167,81,81,126,132,151,114,143,138,102,114,90,118,87,72,99,109,129,113,97,124,87,101,123,165,133,121,126,130,139,158,125,130,120,166,225,216,203,244,209,186,118,169,221,140,81,129,169,157,24,28,19,75,205,136,157,198,92,38,91,136,183,160,164,143,176,149,95,182,154,131,160,188,228,240,148,85,16,0,11,47,76,86,65,62,15,26,21,33,52,32,29,23,16,38,63,27,28,26,36,60,21,7,26,30,38,42,23,23,22,27,48,10,34,20,30,34,35,99,108,97,95,41,70,74,74,64,89,87,93,125,151,83,53,16,23,24,59,138,126,120,98,90,99,78,102,94,68,148,107,12,41,47,71,91,89,58,54,84,112,105,142,119,101,56,55,72,61,65,53,38,60,35,31,34,36,27,60,56,50,45,48,33,66,54,50,45,62,94,70,23,41,66,85,80,67,62,57,58,64,18,43,36,69,50,55,41,75,48,61,64,38,63,55,65,82,70,40,34,40,62,52,58,49,33,9,24,23,22,32,27,12,15,17,30,36,80,43,61,59,42,38,68,47,61,27,45,34,30,55,41,70,75,46,66,45,56,26,57,60,22,26,49,80,65,17,62,44,17,37,11,30,1,15,16,32,12,14,35,47,71,53,33,58,16,23,39,19,37,44,55,37,56,55,62,41,47,30,57,56,39,21,22,53,30,35,48,39,54,31,63,54,43,42,87,58,63,51,44,74,51,46,66,56,52,34,6,20,9,14,28,13,19,7,30,45,61,33,68,46,52,57,58,60,37,54,70,78,66,73,70,59,58,58,63,37,31,30,35,58,67,61,46,70,56,79,49,44,73,1,24,12,16,3,42,30,24,24,28,36,32,50,54,42,62,72,76,54,64,75,38,52,46,58,48,32,52,70,65,63,43,59,74,42,51,109,177,182,188,162,143,131,69,45,84,72,96,108,95,112,141,99,104,101,88,61,42,75,84,116,110,111,89,95,102,118,141,153,154,135,111,146,118,113,137,157,116,104,131,119,110,116,131,97,123,107,96,127,89,106,89,81,80,107,99,73,7,1,17,28,23,17,14,8,16,41,32,10,23,19,3,29,5,8,31,18,14,5,26,3,120,125,115,152,176,163,140,98,124,145,102,130,142,147,201,251,234,83,57,58,23,54,27,17,42,28,33,45,49,47,89,114,114,116,106,103,99,122,123,126,103,97,82,138,130,123,124,114,112,108,136,113,100,98,108,129,89,125,131,102,135,108,99,106,105,93,109,101,103,99,106,84,94,82,114,146,102,113,121,144,140,142,121,123,99,122,79,88,100,74,87,110,101,91,123,104,120,84,91,81,123,92,123,98,101,104,117,116,97,106,86,113,112,75,90,104,105,96,99,88,88,100,92,93,75,94,98,116,109,106,100,108,115,123,148,131,104,91,83,105,92,69,91,95,130,230,183,149,168,222,208,155,106,98,201,245,216,174,148,169,167,128,142,144,139,146,152,136,127,124,116,137,94,118,89,88,83,100,62,81,120,120,78,78,82,91,134,119,81,95,102,72,75,87,129,122,103,122,87,73,73,86,115,99,86,139,125,119,124,120,101,121,162,151,165,170,172,183,169,177,181,171,160,160,171,169,161,159,172,182,166,134,74,121,115,82,113,125,145,99,122,122,113,107,39,70,92,118,112,76,81,93,86,71,105,105,97,104,126,118,125,114,127,125,196,242,214,246,251,240,166,160,234,205,81,51,185,217,175,70,37,5,127,189,157,196,215,123,66,123,146,125,119,150,164,175,131,141,181,159,174,186,200,149,216,137,59,16,9,59,38,77,39,47,37,32,16,36,22,24,33,33,45,19,54,50,61,24,44,37,24,21,20,9,12,6,24,33,25,11,35,26,49,24,17,36,70,67,84,96,110,88,92,81,60,92,101,91,88,84,142,90,51,31,12,54,85,88,112,90,92,108,106,101,76,96,127,119,134,149,32,25,37,71,86,65,115,73,84,109,116,104,52,86,103,103,52,40,56,45,52,30,39,23,31,23,24,40,59,59,37,15,6,38,87,37,64,87,100,49,82,64,45,49,56,62,52,30,43,63,79,50,49,45,72,68,51,39,61,51,49,72,48,64,74,36,62,34,29,46,39,51,63,34,31,21,14,4,6,22,25,16,35,44,20,51,45,43,76,74,69,64,45,56,50,47,41,19,55,27,40,38,70,44,37,29,41,55,65,47,31,75,54,74,39,50,50,30,11,2,0,24,13,13,31,29,24,19,11,53,40,40,56,32,38,44,15,48,63,55,62,72,61,48,48,55,21,52,49,27,12,46,37,24,10,55,24,52,45,75,57,28,62,70,43,41,75,69,63,41,46,59,69,29,39,9,16,9,17,28,41,23,17,19,30,18,80,88,59,65,63,50,68,49,67,81,66,63,53,76,51,76,33,74,44,47,38,47,76,45,47,49,59,40,75,52,62,69,29,32,19,19,13,26,11,34,3,8,20,46,50,63,59,42,69,56,47,47,21,41,54,71,56,41,65,34,51,47,76,62,77,60,64,42,77,111,154,146,136,108,119,68,63,50,92,114,54,97,127,136,97,123,79,97,131,84,59,79,81,108,94,105,115,132,116,150,135,163,138,118,95,101,132,132,150,187,142,121,76,136,91,102,83,123,116,120,120,148,113,87,64,81,38,69,60,73,24,34,16,34,8,0,13,16,7,0,17,18,23,4,27,14,16,14,25,5,33,4,42,32,139,139,121,110,121,133,104,123,140,105,103,129,167,160,220,227,219,74,62,57,2,60,4,5,38,11,29,30,61,54,80,118,110,78,116,109,141,110,110,99,124,102,104,105,118,133,127,123,121,134,152,117,91,111,132,123,130,149,107,126,101,104,103,126,93,94,129,101,104,107,104,91,115,120,114,125,122,109,120,126,110,128,154,118,128,128,87,85,96,79,104,117,102,113,107,125,86,80,76,99,98,97,99,57,63,93,104,135,104,109,129,103,106,87,112,127,108,139,136,117,124,114,129,96,84,105,114,117,105,132,101,102,110,99,116,101,123,95,51,91,77,75,45,36,95,160,158,137,155,142,103,105,11,59,125,211,230,244,211,199,142,127,111,85,91,119,129,98,125,89,87,83,99,109,119,110,104,158,94,104,86,114,96,108,67,66,108,110,77,112,92,92,86,108,127,130,147,109,96,91,70,68,115,109,139,152,148,146,102,146,121,147,173,171,172,124,157,147,166,148,182,124,158,156,174,137,145,155,160,155,158,148,135,107,112,107,139,159,123,95,103,119,65,66,86,80,107,114,86,100,79,68,92,83,50,101,80,64,86,108,80,109,66,152,237,244,233,230,248,226,134,182,206,148,58,47,100,198,171,50,26,4,125,254,192,162,176,139,119,124,172,140,130,154,184,143,101,170,171,168,169,152,111,44,81,77,31,34,10,56,78,52,40,11,11,31,58,42,32,24,63,31,44,47,32,35,46,43,39,30,16,6,20,36,19,15,22,30,40,28,36,29,36,30,56,69,74,86,93,92,82,40,68,69,98,92,87,104,119,96,76,23,47,25,17,63,118,115,93,94,107,102,117,92,90,114,123,89,167,177,70,60,48,61,104,69,94,106,99,116,121,101,70,90,98,131,113,55,50,82,86,45,24,19,18,39,26,45,51,24,17,14,8,41,38,52,74,82,82,69,60,59,71,65,49,65,65,45,61,39,54,60,23,49,41,55,51,50,45,47,62,69,75,66,30,85,53,40,74,61,53,47,64,33,9,11,33,14,14,32,30,22,37,42,54,61,44,57,28,35,71,38,47,30,53,52,47,33,45,44,58,31,58,45,61,77,77,35,51,40,45,38,50,64,58,32,42,39,29,20,34,7,19,4,35,3,23,35,62,43,55,52,73,27,29,9,25,51,65,32,66,60,48,19,9,17,17,10,20,26,34,18,15,12,22,9,20,41,49,62,62,38,36,81,40,69,53,70,79,67,62,39,52,55,29,22,36,2,6,21,31,33,6,28,18,52,60,49,85,42,60,55,67,69,81,72,82,51,60,43,48,50,48,64,56,41,19,44,41,65,61,64,28,56,64,43,36,39,14,29,12,7,8,7,21,41,30,15,11,55,69,76,54,52,82,49,66,51,48,50,71,60,49,78,57,37,68,60,79,55,93,47,52,63,81,154,137,128,103,77,81,49,52,46,129,117,64,48,95,115,111,106,121,109,109,77,91,99,93,78,111,102,118,135,114,115,112,150,142,88,104,116,104,112,137,164,123,124,83,68,76,86,76,108,119,101,97,101,120,82,105,98,41,33,39,44,18,5,27,32,18,16,42,22,9,11,13,10,29,3,3,35,23,10,14,10,24,44,29,21,147,140,112,98,103,113,125,158,106,114,147,148,117,149,207,253,229,77,105,19,16,65,36,36,41,26,57,30,28,38,94,131,78,108,116,104,115,126,110,83,94,98,116,106,114,104,108,110,83,117,105,124,86,91,106,96,113,112,94,103,98,111,105,122,119,104,115,146,131,100,108,123,102,138,125,133,117,144,120,122,122,122,124,108,122,116,99,111,80,82,102,107,92,69,100,99,80,98,61,75,90,132,102,101,110,122,106,130,150,144,158,146,161,167,142,148,141,116,137,135,99,119,97,104,111,133,107,112,130,128,140,129,106,114,87,111,117,122,80,145,104,68,54,41,48,78,84,72,123,130,124,161,110,59,27,103,149,148,203,201,196,174,146,98,91,88,76,76,89,92,110,109,89,88,154,120,136,138,108,108,71,92,95,105,96,105,102,115,113,152,145,105,102,72,79,89,123,109,135,129,95,111,141,180,164,100,128,136,145,142,148,189,160,152,142,151,126,132,150,143,157,134,178,137,158,158,147,152,157,165,170,169,136,124,108,105,102,126,102,123,97,107,65,66,95,112,127,63,43,59,54,57,53,28,41,71,88,83,88,103,52,91,127,164,230,250,222,215,244,218,169,205,230,133,60,26,3,52,92,38,48,45,167,235,124,79,122,140,121,125,210,178,162,168,174,107,126,215,171,187,163,136,77,19,7,34,28,38,55,47,38,19,20,35,37,23,34,37,41,32,53,49,27,25,1,37,66,39,19,32,19,49,16,57,25,18,36,18,21,25,34,29,34,19,39,43,83,90,104,83,65,56,81,95,86,118,106,131,73,59,18,18,36,96,93,126,118,105,112,95,133,81,83,78,100,91,102,107,135,171,89,103,94,83,71,95,101,82,105,99,109,97,86,101,112,126,89,51,69,60,81,57,59,23,29,60,62,46,16,24,39,7,10,22,45,55,72,80,60,52,89,68,78,57,52,56,71,82,76,30,49,35,71,37,41,65,49,38,64,63,57,57,53,57,67,72,45,37,63,54,41,39,66,34,14,17,12,20,13,17,29,4,35,61,50,33,23,41,66,29,69,19,13,17,8,14,35,35,40,27,50,54,65,77,50,70,46,54,90,58,38,52,36,37,47,41,83,34,22,28,27,12,43,29,25,39,68,26,60,35,52,44,41,49,20,12,43,42,37,55,57,34,31,13,12,31,3,20,20,14,7,15,7,16,10,3,50,73,38,87,32,66,40,80,44,65,49,41,63,52,50,51,22,45,29,17,30,23,14,8,15,20,25,51,36,39,45,53,55,53,54,70,40,69,61,44,61,56,64,37,61,52,58,56,28,17,32,63,35,55,57,41,40,72,45,46,42,46,10,43,17,15,15,33,35,27,22,16,31,77,66,70,59,81,53,45,60,26,36,69,46,60,64,61,70,85,61,58,74,71,61,41,26,57,147,119,100,122,107,61,74,87,111,118,153,136,50,35,59,53,51,98,97,36,52,40,72,98,81,75,57,62,82,91,74,96,99,135,107,118,92,93,62,51,81,136,115,46,56,16,73,55,37,72,100,93,85,104,99,71,87,74,48,41,17,24,20,10,16,10,14,40,11,25,40,13,13,8,27,9,19,4,4,31,25,3,36,19,2,30,133,146,103,119,117,114,123,116,105,113,130,122,125,132,207,249,216,58,74,58,19,60,25,51,28,24,56,52,72,42,93,153,78,90,90,97,100,121,121,91,133,117,102,76,106,99,115,70,105,120,113,118,92,98,74,88,103,118,127,98,125,134,138,109,103,143,140,119,104,89,120,104,101,132,144,147,112,146,109,123,120,113,139,120,96,74,112,127,86,102,96,114,118,97,101,128,100,113,99,108,122,100,113,97,122,105,105,103,129,141,119,138,133,143,117,110,136,107,115,113,92,95,53,76,112,109,104,80,79,100,114,141,138,101,96,80,125,109,99,127,98,97,93,74,93,66,55,102,70,112,160,180,163,81,45,86,110,125,147,143,207,245,230,161,111,85,63,61,82,81,120,118,114,98,90,140,114,146,138,126,115,100,112,104,116,107,129,134,96,139,115,112,120,78,83,82,102,169,149,148,95,144,165,145,109,76,92,93,143,187,176,201,189,177,174,193,170,178,163,170,177,163,148,129,167,159,168,199,190,182,186,157,141,158,129,95,110,94,120,112,109,135,83,81,100,109,118,106,71,73,45,77,36,50,35,46,62,69,96,73,90,97,112,205,242,255,219,204,239,228,194,171,240,193,50,22,6,20,25,44,39,61,132,145,95,49,49,110,144,211,204,175,185,168,162,142,218,238,149,139,158,83,81,14,36,56,61,68,54,49,34,14,46,49,12,44,55,35,40,31,44,57,23,32,40,44,30,30,34,54,25,48,45,33,36,22,19,19,14,31,30,25,29,42,47,69,97,119,76,80,94,93,82,79,103,121,85,111,44,5,39,23,68,98,93,121,103,110,82,104,96,107,95,104,100,79,125,73,118,162,69,77,91,126,100,78,90,82,69,111,104,123,116,106,100,97,84,44,39,54,69,67,64,44,25,47,46,32,31,34,45,27,36,57,75,75,86,58,26,49,97,120,68,100,61,60,75,69,59,60,64,55,55,69,59,56,60,66,72,97,64,59,57,59,77,45,54,49,63,86,51,39,41,16,26,10,36,23,34,13,29,26,25,60,39,56,40,21,8,26,30,25,21,22,22,18,18,27,41,20,50,49,54,54,59,80,53,49,47,39,46,72,54,43,40,51,68,16,11,19,15,7,6,13,20,6,29,14,34,30,39,55,36,36,30,13,36,30,32,42,53,35,28,2,8,28,27,25,33,38,9,28,22,30,40,39,42,45,44,75,47,47,42,83,47,52,61,60,38,26,78,21,45,31,14,42,22,33,13,23,29,29,35,36,53,54,66,32,63,69,40,98,80,54,53,35,44,39,42,58,55,20,39,67,31,39,53,66,53,66,48,68,73,85,52,35,36,34,35,24,5,21,11,19,31,22,32,15,59,53,76,55,63,48,47,32,42,25,68,73,73,66,56,72,110,63,54,59,42,46,37,42,35,112,131,108,127,129,98,66,89,133,150,158,141,115,86,76,72,43,35,72,35,29,41,40,100,165,91,56,90,90,75,78,36,76,109,121,103,108,116,63,58,32,84,94,69,77,91,85,82,65,58,59,52,78,80,69,72,61,83,41,46,79,84,64,12,8,3,0,22,27,30,18,15,35,13,5,27,0,6,17,33,33,17,6,6,3,6,31,130,147,131,100,106,117,130,120,120,125,108,110,97,129,203,251,231,68,102,33,22,37,10,33,30,16,39,48,52,74,107,125,104,84,111,99,128,122,135,129,125,122,102,92,111,114,111,104,112,114,105,120,139,130,115,116,116,121,101,105,104,116,108,98,94,95,123,100,103,111,128,110,119,148,126,111,123,101,131,116,106,135,115,103,96,105,117,136,121,97,98,77,102,109,126,128,115,100,129,109,128,110,125,123,117,135,90,99,111,79,80,107,100,77,74,102,118,132,111,90,77,71,92,117,101,92,75,100,107,145,104,87,98,116,102,85,109,109,75,86,94,100,116,129,118,108,87,101,83,104,156,132,112,81,71,133,130,97,130,115,143,168,172,172,157,167,138,86,103,87,127,136,124,109,128,151,130,107,124,124,115,158,151,110,114,138,111,102,122,122,127,91,84,77,89,116,110,143,145,132,82,106,92,91,71,53,52,106,155,189,192,233,212,230,224,244,193,208,198,219,214,207,180,182,194,188,211,207,200,206,205,171,134,160,111,101,103,90,140,120,127,106,86,109,81,110,125,105,115,92,96,70,67,67,49,49,70,83,94,109,111,85,138,223,255,252,255,218,246,208,176,171,187,179,120,28,32,90,156,54,61,117,106,62,59,27,15,111,154,223,190,185,184,175,164,184,224,184,116,112,95,40,9,2,63,105,57,73,38,36,23,12,25,30,48,23,42,47,19,21,34,29,9,12,52,46,14,11,23,38,47,30,28,23,36,15,28,39,20,36,27,21,36,55,48,83,86,80,79,50,109,119,100,111,106,112,71,36,14,5,76,44,85,83,90,93,99,97,70,88,120,115,122,81,80,71,48,30,117,134,80,101,67,84,102,95,78,101,113,127,103,103,74,109,72,90,115,88,51,56,57,112,64,53,58,30,13,29,14,30,29,24,51,58,64,68,94,62,65,103,105,88,61,70,57,62,84,84,87,79,33,53,62,107,81,89,52,74,79,74,70,79,52,71,65,56,81,63,51,31,52,57,35,50,24,29,14,16,27,19,19,24,45,46,43,51,17,34,20,43,48,55,28,21,35,8,9,48,30,17,37,45,53,28,48,45,66,70,64,73,60,69,64,64,79,67,18,31,7,1,27,27,31,29,34,26,28,35,10,32,39,56,48,28,18,27,21,13,4,15,49,33,23,11,28,13,6,11,33,15,10,13,35,24,16,16,21,38,34,27,44,34,39,38,41,52,37,74,79,37,44,52,24,29,8,15,9,17,21,14,13,38,39,38,65,15,52,67,51,51,73,63,61,27,36,51,23,28,42,25,39,55,47,39,67,38,24,19,18,63,23,24,64,39,32,53,44,4,21,24,16,38,34,27,24,21,46,59,53,49,36,55,50,43,59,66,61,51,79,81,69,99,69,93,94,77,67,47,45,25,33,56,24,86,121,95,147,136,90,136,162,147,135,168,111,142,129,98,123,115,131,167,131,125,105,137,185,183,139,145,128,186,144,136,108,124,117,154,170,133,115,82,102,12,68,133,109,103,80,92,67,56,56,56,46,55,86,95,72,80,59,60,58,73,160,103,8,1,36,4,25,7,26,20,31,21,35,17,26,26,15,15,17,36,2,18,19,13,14,16,157,183,153,147,169,165,164,122,152,152,155,143,133,151,209,254,219,78,104,57,9,44,27,43,29,33,16,48,55,43,131,150,118,118,94,125,88,125,126,97,127,104,121,118,114,150,109,79,114,106,148,118,137,143,105,128,110,123,116,113,104,93,124,93,106,117,101,93,123,105,78,103,95,133,134,142,146,136,141,128,116,115,114,138,114,113,131,114,75,98,81,89,72,95,91,109,86,84,101,93,120,121,119,95,141,123,94,120,116,100,107,100,65,86,74,82,93,95,97,105,77,97,115,95,96,85,63,105,119,97,83,89,75,98,105,80,97,74,92,76,43,100,102,103,146,130,89,104,100,77,83,99,95,58,66,87,88,103,124,90,77,110,148,172,192,245,205,163,156,133,106,109,117,104,108,115,95,108,127,119,108,133,132,118,139,132,106,91,74,86,143,89,113,80,66,68,82,96,112,136,92,73,95,77,60,93,44,75,178,176,172,207,230,226,229,234,163,189,219,212,199,186,193,166,208,199,242,218,173,182,204,135,99,117,107,88,118,118,117,116,130,71,47,62,71,111,105,155,111,67,88,79,51,46,43,53,82,81,103,125,129,122,154,249,252,247,212,228,243,200,172,116,172,202,156,136,136,244,200,74,45,89,32,9,52,34,50,96,184,204,185,178,224,202,193,202,163,130,63,31,47,20,38,66,93,85,23,45,43,10,31,50,44,37,43,43,51,59,28,46,37,37,36,20,50,9,29,5,18,63,42,20,20,14,16,52,14,27,8,33,46,15,66,86,61,89,83,94,132,73,103,73,108,120,108,56,19,22,37,51,100,107,103,85,96,97,85,96,99,93,68,113,104,94,76,49,51,48,101,118,103,78,95,115,93,88,102,80,112,92,98,94,96,90,87,108,110,95,89,56,80,84,92,79,50,14,15,37,5,16,51,86,69,55,98,92,118,102,84,109,91,86,83,42,76,63,81,64,43,14,51,59,55,74,67,62,67,82,74,64,62,64,88,62,68,77,62,56,44,60,58,76,57,35,8,28,33,32,31,8,29,19,29,33,28,26,17,32,58,30,17,43,41,28,41,26,26,34,12,21,21,40,44,27,35,45,64,37,61,84,49,14,68,48,41,12,5,0,13,17,33,11,19,24,13,13,31,21,2,42,7,32,45,37,59,39,10,14,47,37,51,47,19,16,21,2,13,3,25,16,49,12,8,16,27,45,34,41,20,30,33,19,0,16,28,66,61,90,59,40,59,48,62,44,14,18,22,26,22,59,20,40,28,75,29,58,52,64,51,54,81,64,75,45,41,38,38,56,55,77,37,31,29,45,34,36,33,27,40,53,52,50,58,40,18,36,37,26,49,12,25,15,39,30,6,43,66,45,55,59,57,60,61,44,46,39,71,68,78,103,62,80,98,68,62,36,37,36,28,42,40,33,52,134,132,115,121,88,101,125,160,132,124,109,73,92,88,113,140,196,207,197,192,172,181,194,197,184,155,144,168,207,158,157,116,131,117,106,132,157,148,120,114,76,111,157,118,103,100,76,92,116,107,73,83,87,112,128,84,100,115,82,121,159,221,112,65,5,2,9,25,13,30,3,16,14,21,11,37,16,23,9,33,15,14,21,26,14,24,13,147,151,170,158,131,154,163,144,133,169,157,178,135,154,222,230,217,70,135,46,7,55,4,29,15,27,51,44,53,80,124,129,132,122,108,113,79,101,83,108,124,138,139,105,112,132,131,98,122,108,110,111,99,98,115,105,115,129,102,117,139,112,118,79,125,105,130,104,107,76,86,119,108,123,154,158,136,130,135,144,153,152,136,144,143,147,124,135,123,132,109,127,113,141,100,88,89,59,61,82,78,97,94,95,144,106,73,124,98,114,129,89,108,82,111,95,106,86,104,91,113,94,87,83,90,81,66,87,78,78,89,32,65,86,113,81,82,86,84,64,83,93,81,101,103,124,143,123,92,99,114,97,108,95,85,84,95,84,106,112,101,137,93,135,143,194,184,180,227,190,127,112,123,125,73,86,64,78,129,108,92,123,145,111,112,120,86,106,93,69,122,86,105,103,88,54,59,73,117,144,89,86,83,80,62,69,37,45,116,196,156,196,240,217,230,211,166,149,204,183,174,181,176,191,205,202,181,183,155,162,163,110,80,87,82,84,116,105,130,111,121,68,65,54,75,70,103,113,91,91,45,67,54,25,19,62,140,152,117,86,124,126,172,219,246,206,155,186,173,179,120,158,144,99,113,77,114,145,139,45,16,46,56,31,89,81,62,130,165,185,179,211,225,219,134,112,143,65,40,6,27,93,82,94,51,34,20,50,13,21,25,36,77,36,35,24,23,37,31,40,21,43,12,29,13,24,31,38,11,41,31,41,36,22,7,37,24,22,31,27,29,56,63,61,64,70,88,80,100,72,97,104,105,98,28,31,14,43,85,67,114,115,116,89,90,106,96,88,109,102,95,97,125,111,106,97,95,77,149,159,85,92,84,103,93,73,100,99,103,67,76,88,83,65,98,96,137,117,125,89,72,87,95,84,48,34,10,46,58,58,43,62,50,65,64,107,98,110,126,124,114,118,54,30,53,68,52,50,59,41,41,53,54,50,51,52,60,64,55,55,63,55,83,44,56,67,44,36,50,55,36,49,28,22,18,9,11,13,9,6,36,33,24,32,38,24,29,55,63,38,11,11,32,20,21,16,26,50,36,48,37,41,22,37,41,55,53,16,55,57,13,26,31,7,35,42,13,34,32,18,42,27,34,24,33,40,28,3,23,15,37,30,21,53,18,36,8,24,47,16,9,25,21,9,20,30,26,18,24,52,12,20,29,22,13,30,42,19,40,12,8,22,29,20,60,80,48,66,48,70,46,39,16,5,4,24,10,27,4,21,32,27,21,43,45,81,51,71,49,56,38,58,46,42,41,34,29,80,47,27,74,12,17,47,34,25,71,45,51,49,38,53,53,68,20,48,39,19,13,55,26,8,9,31,15,50,46,76,73,62,67,81,62,64,30,51,67,65,42,47,71,75,51,40,39,14,16,25,33,33,21,47,120,128,103,62,44,56,81,90,105,102,73,64,63,82,55,101,141,172,186,176,132,193,166,179,159,132,155,130,148,165,145,140,139,132,122,127,150,158,120,129,144,104,158,187,120,94,72,127,143,132,146,150,158,161,134,152,128,121,153,132,172,186,175,108,15,4,11,20,2,26,4,15,28,30,11,31,14,17,19,6,37,33,23,16,18,26,36,6,143,133,151,148,127,121,151,142,163,146,144,173,167,150,211,237,194,81,97,46,4,57,30,51,16,50,55,47,64,58,127,137,122,103,89,110,95,98,117,114,115,110,98,99,99,111,93,104,131,101,102,111,113,106,102,102,107,119,102,106,137,131,113,109,119,115,120,119,99,121,109,122,153,128,159,171,137,165,164,141,139,169,138,171,152,180,181,165,159,190,166,153,159,161,154,162,135,130,104,147,101,115,145,127,142,141,102,147,157,110,116,136,106,106,96,107,108,74,100,99,83,88,53,75,91,69,88,88,74,78,86,61,34,93,99,133,125,91,113,72,93,98,107,109,92,113,112,135,125,140,118,92,91,88,98,95,58,60,111,95,109,155,132,76,87,123,124,146,223,192,117,126,110,111,80,69,66,80,133,115,122,97,120,119,115,119,119,119,101,125,75,87,120,126,101,100,103,100,81,113,71,63,68,75,88,94,62,80,135,186,143,153,236,247,246,218,147,177,202,215,208,207,185,173,174,193,197,184,150,123,155,67,88,90,87,108,157,122,121,106,111,96,66,63,62,94,104,112,80,69,81,116,128,139,137,151,220,165,171,157,136,166,230,236,195,135,117,133,151,130,147,162,96,99,115,69,29,62,92,16,3,59,141,120,102,109,102,172,186,213,232,201,200,178,113,85,116,64,33,10,60,105,69,34,56,17,21,44,25,64,30,34,67,33,40,26,36,16,45,53,44,47,37,50,48,31,40,30,24,28,49,35,40,31,18,43,27,39,23,29,31,53,48,61,80,70,56,116,93,107,109,128,79,22,30,12,38,101,128,107,119,87,105,78,120,98,91,88,92,91,108,112,124,86,104,116,111,81,173,165,78,90,87,115,98,78,102,87,91,97,127,80,94,106,103,138,110,119,121,85,80,70,77,92,87,60,57,50,51,47,73,74,49,88,77,108,86,110,113,113,140,89,54,45,25,42,67,26,30,39,36,43,23,36,41,41,37,50,27,22,43,35,45,56,25,23,26,40,38,41,38,52,30,21,32,24,27,47,29,19,24,33,30,7,13,28,23,18,8,21,38,35,28,29,4,40,45,16,26,35,49,36,44,42,58,60,33,50,33,42,40,33,29,64,29,16,32,23,28,18,16,8,0,24,5,42,4,37,22,30,44,34,34,45,6,29,20,36,41,32,3,18,20,10,26,25,30,8,36,15,33,14,12,20,32,23,25,37,46,29,61,8,35,70,25,67,36,60,54,42,50,70,27,23,20,15,32,28,15,11,22,6,23,46,32,64,53,70,57,55,57,63,49,49,42,21,42,47,30,32,33,44,38,56,43,35,44,18,27,37,47,63,45,62,26,20,37,32,24,50,31,48,5,12,65,48,62,68,59,49,77,64,43,42,36,60,17,65,66,73,53,84,71,63,15,38,35,28,44,18,10,19,95,129,126,90,63,91,54,53,56,47,40,93,53,84,78,102,117,110,138,120,148,138,134,132,106,104,126,125,87,86,106,137,106,92,75,99,126,116,110,110,138,132,176,195,127,104,111,124,129,132,137,153,183,140,136,116,137,160,146,179,173,179,152,98,17,0,10,7,10,21,9,18,21,20,21,9,12,17,31,4,18,4,6,6,40,41,5,2,127,91,140,129,131,125,117,148,165,141,146,147,116,194,241,243,179,58,81,24,13,38,7,35,17,29,45,54,38,40,138,104,143,127,103,94,131,113,101,98,106,95,105,67,96,127,89,115,106,117,137,107,100,105,90,107,117,109,100,125,95,104,122,89,104,96,89,96,100,107,110,114,104,144,110,148,125,141,127,130,141,137,131,128,105,119,111,151,166,167,160,128,124,146,181,168,168,158,165,156,194,168,166,180,132,125,104,129,106,117,110,121,102,108,77,87,88,62,71,109,70,67,60,71,85,121,76,66,59,66,67,63,64,75,99,129,89,104,69,82,102,111,127,122,117,103,94,88,106,120,87,125,83,80,62,73,92,79,67,101,83,121,125,109,109,102,109,125,160,140,115,115,93,101,61,98,104,116,118,108,109,103,142,182,117,98,89,113,114,127,105,120,154,98,98,104,125,90,99,54,78,94,53,91,123,107,81,89,95,164,135,110,110,222,201,89,106,162,223,211,191,172,178,163,151,145,168,155,151,129,136,78,130,101,88,102,103,95,90,93,134,109,104,104,93,124,102,120,116,56,99,163,219,255,248,248,246,251,255,207,186,221,235,222,152,147,128,173,165,122,159,144,136,182,182,143,115,123,137,57,22,70,180,128,156,80,158,226,196,241,234,145,138,156,54,37,21,23,20,68,81,47,35,30,51,51,54,41,26,50,30,40,31,35,52,29,41,48,23,58,34,30,34,38,29,20,51,39,43,56,28,6,63,39,25,18,14,42,49,43,57,64,79,108,105,89,80,147,104,118,100,36,45,36,5,46,132,122,102,123,85,81,78,94,88,109,77,71,55,107,92,108,119,90,106,106,122,95,205,205,92,98,93,124,93,78,99,61,77,88,104,87,112,91,87,99,147,138,78,34,28,77,91,96,94,59,47,80,73,45,81,74,91,84,102,81,84,61,83,51,60,64,51,58,50,64,46,22,47,18,60,63,30,13,59,36,53,45,44,21,74,41,23,54,40,28,41,22,25,20,66,18,30,35,25,30,10,51,25,10,36,22,27,43,37,51,40,35,51,9,40,43,47,42,20,13,46,12,22,30,34,50,52,32,12,43,36,46,30,34,47,48,29,34,63,26,48,39,13,8,16,53,13,22,51,20,9,45,15,31,25,28,34,35,15,51,14,57,25,30,5,37,21,27,20,32,21,24,30,31,30,29,26,24,21,41,51,38,41,1,45,31,50,40,11,46,40,30,44,38,28,23,28,28,31,29,53,42,25,31,18,25,35,21,17,29,27,40,51,60,60,43,47,50,18,36,48,36,55,50,51,24,54,24,35,17,38,36,25,89,64,37,50,51,39,23,12,22,19,12,14,16,7,30,23,47,64,62,67,18,72,69,46,82,56,61,32,79,56,73,35,65,48,55,21,6,36,9,38,29,13,31,56,58,104,111,99,80,70,71,94,60,44,53,88,105,96,104,64,87,86,105,143,169,161,157,120,107,95,86,73,115,118,120,126,88,65,109,105,94,86,80,92,110,132,155,121,125,113,111,121,88,108,127,121,139,110,115,159,141,156,119,160,137,139,92,30,15,26,14,15,18,8,9,0,24,12,8,25,6,11,8,30,32,23,27,8,12,14,13,116,143,131,146,141,141,133,145,162,149,140,131,138,137,252,253,200,75,104,23,45,52,19,13,11,46,9,61,43,78,109,115,124,136,88,105,111,100,102,71,64,99,125,106,107,133,121,116,115,135,108,117,110,114,106,97,117,114,107,104,105,85,61,110,94,101,99,85,92,80,115,109,101,96,113,76,124,100,85,99,118,99,87,54,39,79,97,84,112,101,123,120,104,103,143,139,158,161,145,163,153,152,154,112,112,52,76,91,88,92,97,106,75,96,63,70,50,45,44,46,62,73,67,80,101,109,117,93,67,99,107,94,148,127,126,96,73,69,108,113,124,127,107,125,105,105,138,108,91,112,94,108,94,85,89,82,52,64,68,94,69,104,131,134,117,94,105,111,123,103,75,101,107,79,90,138,130,96,113,107,77,122,132,175,110,96,127,131,131,154,123,141,151,98,79,98,105,103,94,72,89,57,78,89,100,105,112,116,80,87,101,57,59,67,110,54,63,134,181,170,161,164,157,119,135,75,96,100,94,158,125,122,99,77,95,93,103,87,66,62,94,99,82,69,93,92,103,110,113,125,96,181,213,246,249,251,236,227,228,180,144,223,242,213,187,195,171,169,147,128,136,211,182,171,136,96,105,155,120,37,6,30,147,166,148,153,203,243,246,242,130,90,93,71,19,0,19,8,82,55,59,51,53,33,51,22,34,16,23,43,49,30,37,13,52,26,22,49,31,30,40,28,28,49,56,52,24,2,19,35,37,35,12,36,54,58,12,38,57,67,71,92,97,104,65,101,120,134,137,53,62,34,14,41,70,104,123,123,103,99,81,67,121,99,87,93,92,85,66,84,105,90,103,109,108,56,84,86,189,182,78,89,81,103,85,79,75,57,92,120,91,79,82,106,89,126,117,61,14,12,33,62,94,86,112,67,67,88,57,47,68,74,98,74,88,112,77,24,7,12,46,62,12,26,52,50,45,56,38,64,64,44,57,47,36,58,51,54,55,53,61,53,22,32,41,50,47,31,58,49,31,43,14,14,32,6,37,34,47,14,16,13,38,39,22,39,32,51,25,29,40,60,45,45,34,27,31,60,10,17,28,36,19,50,38,25,28,41,41,48,43,13,24,32,59,26,41,35,24,7,42,44,31,11,26,29,33,27,51,22,27,35,39,35,22,45,41,61,38,42,24,15,48,31,36,21,32,19,37,44,23,35,29,34,30,64,51,29,38,65,25,13,36,60,38,35,19,18,23,29,30,39,48,13,48,0,49,4,23,35,36,27,43,14,42,31,34,66,46,38,51,49,57,13,49,12,28,51,51,56,56,20,23,6,34,48,27,7,20,29,34,71,48,18,39,9,20,27,28,20,20,20,38,25,21,53,23,49,73,70,50,77,55,66,62,57,73,72,69,58,36,52,61,65,46,32,30,38,20,32,44,43,41,56,45,81,64,83,37,39,87,39,47,75,63,63,63,43,24,39,42,29,101,123,126,103,62,83,87,108,93,94,92,82,101,80,90,108,90,81,82,100,113,92,78,124,113,114,128,101,91,103,92,100,107,116,91,106,119,87,84,107,141,157,140,110,13,14,8,24,10,13,18,23,28,12,24,16,8,4,26,7,34,17,19,19,12,10,8,17,147,143,139,143,153,134,161,149,132,169,175,143,130,166,214,255,186,92,148,18,22,36,15,39,16,57,13,43,64,44,111,86,103,117,106,87,109,116,109,63,67,129,106,90,107,118,111,118,134,102,96,92,120,88,97,89,98,81,72,106,115,128,113,93,84,73,121,96,91,100,108,102,81,75,110,98,103,95,100,112,109,94,63,65,52,67,76,67,77,79,110,128,79,121,98,109,98,114,141,122,138,116,79,59,54,42,52,63,41,96,88,83,81,67,64,75,40,64,32,29,75,46,67,97,112,90,100,102,74,99,119,70,145,90,52,67,61,74,94,129,134,102,103,149,122,101,152,80,131,126,76,123,81,91,115,108,117,75,98,115,95,117,143,152,114,124,128,130,92,71,45,89,98,95,111,98,87,33,77,104,89,68,104,86,91,96,94,114,136,146,142,112,138,85,89,113,107,124,114,114,122,93,116,95,118,168,128,154,83,66,54,57,28,73,91,78,85,132,151,161,138,167,155,148,147,113,105,103,83,103,144,100,117,107,89,82,84,74,64,84,100,81,55,74,79,114,74,76,73,48,92,141,138,174,154,131,112,64,118,56,92,163,164,198,251,210,167,144,139,142,208,216,220,168,95,73,75,135,115,35,4,64,168,153,146,204,255,162,176,164,92,51,26,33,8,19,63,66,76,58,26,46,49,39,36,51,28,12,39,35,43,69,49,28,22,53,49,34,32,56,64,8,34,38,37,47,45,45,23,54,12,53,40,34,57,57,8,43,65,107,85,106,99,91,102,119,115,117,71,34,38,13,51,88,107,115,107,107,74,69,79,92,82,103,99,101,104,75,87,102,75,136,95,104,114,105,100,124,210,214,87,79,75,83,94,65,82,63,77,84,104,134,107,89,102,47,22,18,22,20,13,61,94,86,68,51,60,74,56,47,82,62,68,87,104,93,64,44,39,28,38,27,39,76,47,50,67,39,78,50,55,42,42,32,34,48,24,67,59,45,40,59,59,53,64,24,33,46,65,46,45,65,10,15,32,36,20,6,34,18,34,18,14,36,56,32,44,25,24,21,67,57,58,41,32,41,41,52,58,36,30,46,21,71,39,55,65,36,57,28,53,54,36,39,22,46,32,28,13,12,0,34,28,36,50,39,15,26,33,6,52,42,34,19,11,62,32,54,60,15,22,30,39,23,39,22,58,37,60,69,64,29,5,38,15,29,40,7,47,27,17,64,45,43,43,38,21,35,12,45,41,22,41,24,35,43,7,16,38,33,19,18,28,28,33,50,35,23,29,42,48,44,45,35,31,24,33,28,10,55,50,34,38,18,46,30,29,39,31,53,24,35,53,16,26,8,25,16,42,25,27,4,29,59,43,42,45,37,73,65,44,41,45,54,38,54,33,29,64,68,73,49,49,57,29,31,11,29,38,27,63,50,49,35,52,28,56,37,54,46,74,62,51,84,42,43,5,54,46,57,16,51,106,124,123,118,94,102,101,113,115,111,101,110,98,91,96,103,90,97,81,94,121,93,81,107,115,138,148,138,74,98,134,86,107,100,123,91,79,92,92,112,141,102,126,81,31,29,12,28,8,15,30,19,31,10,8,14,23,19,15,3,20,19,11,18,12,21,19,8,126,121,130,126,126,149,157,180,134,154,144,131,126,154,220,238,149,108,135,10,42,36,25,62,35,36,47,27,54,63,84,94,93,103,78,40,72,89,83,72,88,76,72,79,116,95,112,135,107,87,101,117,102,115,113,71,119,112,98,117,145,105,115,97,89,81,69,84,87,118,104,109,106,107,117,117,79,136,126,95,84,62,92,105,88,86,81,98,124,89,97,111,124,116,117,92,89,102,120,156,137,76,63,56,45,33,28,58,63,98,77,97,92,84,43,55,29,80,53,69,64,57,68,62,72,95,107,84,102,102,113,98,105,48,35,68,67,82,59,88,100,110,117,111,104,110,109,77,80,110,94,104,101,81,85,112,142,121,112,111,94,121,133,149,128,133,114,115,87,69,71,104,115,112,88,111,104,75,65,87,127,57,70,62,43,63,63,65,79,102,68,115,108,123,128,186,135,160,154,126,149,171,154,157,190,189,169,145,91,117,81,80,88,77,87,98,114,118,134,164,141,130,157,138,135,107,118,107,94,98,84,104,106,90,102,81,98,100,127,102,56,73,71,94,116,81,101,95,90,68,36,100,50,55,20,67,71,26,83,47,97,214,143,209,220,192,237,218,180,232,185,251,182,132,111,108,118,163,131,105,76,114,154,174,163,237,231,123,142,154,50,45,7,36,53,50,73,38,47,55,31,42,50,13,25,48,60,27,30,30,29,29,33,33,41,36,47,49,55,37,34,5,46,47,36,48,34,39,32,29,44,63,49,34,57,81,51,67,102,117,121,103,102,84,121,114,72,38,14,33,40,69,104,119,99,123,60,96,95,120,96,94,116,97,82,146,107,89,62,77,119,112,56,104,106,89,107,97,166,205,135,94,57,88,64,81,99,99,110,101,113,125,126,91,29,51,47,29,36,15,34,60,82,50,78,43,70,54,48,59,75,74,101,81,113,114,66,21,11,15,26,59,44,43,56,53,51,49,74,44,66,60,44,36,44,45,22,62,57,59,59,50,70,49,68,38,58,35,36,46,53,32,20,23,28,19,21,21,35,16,27,30,29,29,34,49,37,23,44,14,53,65,44,23,45,26,58,58,48,27,49,43,45,45,47,35,70,47,36,42,18,33,26,50,76,38,42,39,31,16,20,12,16,34,25,65,9,27,29,32,39,47,38,38,29,54,49,51,79,48,28,33,48,25,32,41,49,65,58,50,81,35,23,12,13,32,42,17,35,33,37,24,47,44,13,47,45,40,44,22,28,52,21,3,20,29,13,14,1,25,9,40,42,44,25,41,16,24,25,53,22,27,47,20,31,19,9,38,30,39,32,3,15,23,63,25,16,37,27,27,21,36,21,47,15,7,30,16,31,14,23,17,18,25,44,39,43,48,58,43,48,65,38,44,31,49,75,62,46,50,55,38,51,45,33,24,33,38,48,19,40,35,60,36,33,20,45,46,59,88,121,85,90,68,42,45,34,93,99,72,92,98,105,113,116,124,121,111,85,111,99,86,95,110,127,81,127,121,106,88,77,90,104,69,48,77,88,114,136,129,128,105,123,130,111,123,115,100,76,96,82,91,104,111,63,64,17,35,7,11,28,17,10,9,13,10,18,12,36,19,4,43,29,21,10,11,8,27,40,5,118,57,111,77,114,95,110,114,120,149,151,145,143,164,251,237,165,62,73,18,51,51,27,40,12,25,30,75,55,71,88,106,87,61,104,61,74,47,55,59,72,74,52,46,64,90,92,130,114,99,75,102,115,53,105,104,92,116,90,116,118,119,82,101,102,109,111,73,83,79,110,100,121,92,94,106,80,85,89,86,71,64,106,115,138,102,103,86,77,70,54,100,120,130,106,105,122,97,129,148,100,85,76,83,48,81,64,94,61,100,89,103,120,77,96,74,111,101,122,85,63,78,75,97,56,44,54,68,67,98,86,64,74,72,62,68,85,67,35,66,86,79,102,85,129,113,77,60,91,125,127,138,84,84,87,122,143,142,121,90,80,114,131,137,147,143,123,102,114,87,77,96,81,75,88,81,51,50,90,59,66,50,76,76,61,109,68,62,49,69,111,147,138,135,156,163,187,180,151,176,166,133,153,157,153,153,148,106,77,63,82,78,89,67,66,82,70,117,118,93,103,112,129,85,106,96,89,91,92,87,89,76,94,104,106,94,82,101,93,119,98,95,86,83,87,65,94,88,101,37,66,91,67,55,33,114,146,54,138,88,141,224,108,165,191,216,228,231,213,185,160,180,158,139,138,159,158,169,150,139,116,121,151,180,178,186,207,90,105,69,12,17,27,57,60,58,47,39,35,57,21,21,35,11,18,32,48,26,27,40,15,22,25,17,40,13,41,39,24,33,46,42,54,35,21,57,33,19,60,63,58,60,39,55,51,73,62,45,74,115,80,71,68,109,88,54,21,19,18,40,88,120,136,107,103,80,110,96,98,79,85,88,109,107,107,85,115,109,110,101,104,99,116,98,86,90,71,89,86,174,162,56,61,78,82,120,116,120,114,89,99,124,75,37,32,37,36,15,20,53,73,71,64,50,66,42,81,53,68,75,84,103,89,83,84,85,51,33,16,24,21,63,28,39,69,51,36,38,48,28,40,68,43,35,42,23,48,36,20,20,66,43,73,64,63,6,55,60,68,50,20,36,26,10,37,5,27,31,26,23,37,54,16,33,46,53,32,60,31,48,60,51,42,36,29,41,58,40,57,54,47,72,27,54,29,52,45,35,46,7,28,46,38,39,41,23,35,11,45,6,19,2,20,32,34,15,30,50,37,26,34,50,5,49,59,65,48,35,47,31,49,33,31,47,29,39,58,30,21,49,100,87,50,46,28,52,42,32,50,42,28,36,41,45,43,29,39,44,36,34,22,41,27,28,0,44,26,6,33,27,12,28,23,21,28,24,16,35,19,47,46,13,14,13,36,15,38,14,30,43,24,29,53,41,17,43,0,35,22,30,42,27,33,37,36,44,14,14,35,23,27,25,21,29,31,36,50,21,34,49,55,58,33,46,45,36,39,45,33,70,42,46,47,54,48,33,43,69,25,31,37,26,21,58,38,36,39,49,75,144,129,128,97,64,22,90,104,120,88,97,104,103,125,127,121,114,106,114,97,105,70,73,85,121,125,87,116,96,105,47,53,66,96,81,75,102,92,53,106,114,131,122,92,101,117,105,115,105,100,106,92,102,79,100,77,87,29,5,8,23,34,22,24,16,30,15,17,14,17,32,42,21,41,29,22,8,23,1,6,3,82,99,88,77,84,95,70,85,54,135,110,135,150,208,242,240,162,59,55,30,32,49,44,37,20,44,67,55,73,74,120,95,114,105,102,112,86,65,84,93,75,70,45,58,75,83,90,109,94,93,99,124,107,110,75,113,112,102,97,101,135,80,123,110,108,134,106,120,111,145,116,117,119,98,81,107,81,88,60,69,65,84,112,123,120,101,80,79,42,54,62,51,78,75,91,101,122,111,106,84,102,82,74,107,85,67,104,76,98,123,104,105,104,84,72,75,100,104,107,75,101,107,83,105,76,74,51,62,63,93,98,56,82,87,95,77,59,67,36,69,60,40,73,88,130,125,85,50,105,132,119,98,129,96,82,110,151,142,113,79,94,117,167,156,141,142,125,100,121,111,105,52,71,75,107,70,76,58,66,57,66,53,67,56,70,108,84,46,84,64,100,133,170,150,139,157,140,129,179,180,130,88,73,35,44,99,53,59,46,76,54,106,45,98,126,58,86,102,127,130,100,117,107,84,86,92,66,96,78,97,81,90,80,66,89,104,87,67,88,90,80,85,64,55,71,102,79,41,89,81,81,127,88,60,65,155,204,86,120,163,196,169,109,185,205,163,185,187,164,120,71,88,98,114,153,140,152,175,181,167,177,179,191,235,130,101,110,64,22,41,8,55,42,56,65,43,37,39,19,38,59,42,48,27,30,27,61,32,43,45,55,48,12,32,22,36,52,48,46,32,29,24,68,18,25,29,41,57,83,92,43,33,70,74,101,111,77,52,36,75,83,70,73,50,38,15,15,39,66,96,113,119,104,91,133,105,93,51,64,89,86,89,84,89,119,63,109,107,110,89,110,123,110,84,112,97,115,66,99,155,207,108,51,70,83,64,106,92,100,127,104,55,30,29,7,23,12,26,69,82,124,113,95,84,81,63,94,83,58,83,74,104,96,78,88,55,41,32,3,11,46,31,48,18,37,31,35,18,21,26,34,37,51,47,45,51,64,59,42,85,55,52,56,49,38,42,19,44,48,47,48,31,34,43,39,53,27,17,39,34,48,44,36,42,6,55,44,32,38,45,36,50,77,58,18,35,52,38,29,42,58,21,45,45,45,50,39,34,43,13,17,26,19,43,61,7,17,24,41,5,34,43,46,28,45,37,33,37,33,24,38,35,26,33,27,60,51,56,44,56,7,11,51,35,47,39,29,38,43,40,64,66,54,41,44,60,40,22,32,29,48,24,26,25,77,13,16,31,17,31,34,29,18,9,14,6,18,20,15,37,21,17,7,44,6,22,42,31,46,31,39,41,24,20,21,14,43,59,62,45,10,24,31,28,43,35,30,23,27,51,50,51,45,63,31,17,7,26,22,5,18,19,33,12,36,19,32,34,58,59,48,20,67,36,47,47,29,25,42,26,44,36,16,56,52,21,51,53,48,44,46,15,26,45,25,61,64,82,151,155,117,113,142,120,125,105,82,93,108,129,154,131,135,123,118,96,87,110,91,86,82,96,132,113,81,73,78,86,98,88,71,98,105,85,61,91,60,47,94,104,106,121,60,89,103,137,129,101,82,88,73,136,169,165,119,62,9,15,16,19,26,7,43,2,33,14,31,18,22,22,19,12,27,51,28,9,21,18,9,24,86,75,76,50,73,88,74,57,75,113,112,136,120,182,245,239,157,65,76,41,41,27,37,13,6,24,33,52,85,88,105,105,113,110,98,96,105,83,109,95,73,66,65,66,45,76,93,93,90,98,89,91,107,105,80,118,108,77,115,95,90,99,113,113,123,130,130,135,129,82,87,105,63,90,74,66,91,62,80,103,112,85,107,92,97,79,84,48,72,64,71,76,66,87,73,86,102,105,117,78,68,81,91,82,89,100,88,138,99,102,55,50,86,84,82,113,102,85,76,87,62,73,79,109,84,82,69,74,67,90,82,84,101,106,103,90,83,66,61,50,72,101,90,113,68,96,90,85,117,126,137,105,81,91,115,106,122,137,134,123,120,121,152,149,172,115,116,88,91,118,117,85,84,93,90,59,80,62,65,51,77,76,37,54,73,66,65,55,94,113,77,80,106,67,95,65,87,108,147,131,93,36,40,15,44,46,4,15,46,77,84,161,162,53,66,122,124,136,158,135,121,103,57,94,84,100,126,109,81,83,102,81,74,79,50,78,87,86,76,79,98,50,81,70,58,91,51,65,65,92,119,116,57,56,74,148,179,102,144,222,207,135,132,209,196,218,133,81,156,75,17,29,29,101,140,114,138,154,141,168,167,155,178,187,82,64,29,24,26,24,52,76,30,36,52,32,46,41,41,36,38,49,38,28,39,37,41,34,43,50,38,28,24,32,50,13,34,37,34,39,31,32,49,28,39,40,40,91,112,78,70,53,9,55,124,132,124,105,87,80,110,53,22,60,33,42,56,94,99,100,91,99,107,127,134,109,115,87,104,99,88,114,93,82,105,98,110,102,84,107,108,113,103,111,70,100,89,78,79,120,215,144,72,90,87,90,108,116,126,77,59,42,27,12,10,24,43,76,103,119,86,105,123,129,127,71,78,92,134,96,94,77,80,82,115,57,45,10,1,29,34,52,16,47,27,76,34,36,48,36,30,46,39,50,51,79,40,68,55,47,50,62,46,42,48,52,53,48,39,29,54,31,41,29,9,25,25,8,18,33,58,48,36,59,41,59,54,90,54,47,50,52,53,40,56,45,49,60,29,34,45,29,40,42,39,22,41,44,33,20,19,42,30,32,25,15,11,23,44,44,19,33,45,23,5,24,48,47,45,62,40,60,13,38,58,50,51,49,35,46,45,18,30,37,32,28,25,18,47,42,26,60,69,46,20,10,21,53,33,54,27,40,51,36,58,46,10,15,5,25,18,32,4,7,31,17,28,11,33,38,35,30,43,39,45,47,39,26,5,35,36,25,51,17,13,48,44,61,31,14,32,44,40,29,47,40,34,26,28,52,59,30,20,51,64,40,19,20,1,10,20,21,29,38,29,43,30,18,31,75,38,41,63,57,53,44,40,42,44,27,52,30,32,36,16,62,24,57,44,38,34,34,51,19,12,37,76,134,122,123,127,123,147,115,129,122,107,106,114,139,155,119,107,81,141,112,87,95,125,63,91,90,117,108,103,89,99,113,116,126,136,100,56,49,70,79,46,44,52,74,63,71,44,66,107,120,122,87,76,96,74,154,165,166,161,110,25,1,9,18,24,24,12,14,2,11,13,34,45,16,19,21,20,13,26,42,11,6,16,9,86,84,35,40,70,55,88,80,81,94,101,93,73,179,239,247,119,56,100,37,36,24,19,39,35,38,32,41,65,83,92,101,78,61,66,81,105,83,85,92,90,89,84,63,48,59,48,47,73,81,89,88,94,90,70,113,97,113,84,107,100,104,80,99,70,100,69,91,93,103,83,109,75,92,107,78,79,82,74,94,111,91,108,85,51,88,56,111,73,64,115,100,97,103,106,117,80,89,60,68,70,77,98,100,99,97,113,132,119,129,102,88,98,79,87,109,97,103,91,87,100,66,74,91,98,111,75,41,58,94,91,72,72,97,76,115,66,86,81,79,81,107,123,62,67,102,102,134,138,125,173,137,106,93,107,94,82,104,142,135,145,166,159,116,159,157,144,106,76,79,127,94,79,97,63,62,56,76,59,80,73,63,89,59,45,70,81,82,98,93,69,81,53,31,49,55,60,88,113,95,77,81,44,20,35,28,45,31,71,97,90,187,119,66,93,91,152,121,102,125,83,80,109,102,127,93,120,99,93,83,90,63,78,72,66,71,113,55,46,60,99,84,53,39,42,47,54,25,35,70,105,96,52,88,62,88,122,142,214,238,158,94,167,246,226,169,16,78,130,61,24,23,23,83,110,29,75,59,93,91,81,103,111,34,39,35,23,27,38,72,63,66,51,56,34,56,67,43,60,54,80,43,28,17,57,57,37,20,32,44,36,27,15,26,40,48,43,31,26,35,20,31,34,21,49,36,34,69,114,78,69,36,42,31,86,128,140,121,146,140,72,29,21,34,55,50,97,103,102,134,98,86,108,98,108,110,99,97,87,109,86,60,97,101,98,88,77,86,89,94,108,87,94,121,99,88,81,83,67,89,187,179,92,86,92,107,150,123,75,15,37,2,17,12,53,44,91,108,106,91,115,120,124,146,84,74,99,74,81,75,101,104,88,79,83,27,22,9,16,17,40,55,46,43,57,65,56,38,30,47,31,44,53,40,35,30,45,43,54,36,38,51,30,44,41,36,48,9,68,49,45,40,30,30,33,25,36,13,37,54,47,43,62,19,29,27,55,11,34,37,48,22,25,23,28,40,37,60,52,67,30,36,65,32,40,39,20,12,20,21,41,53,53,40,28,9,25,21,13,42,22,31,22,38,24,26,34,47,50,45,48,31,36,51,40,14,57,39,28,22,26,65,49,36,42,41,53,44,22,21,30,26,81,105,37,17,30,28,38,23,47,31,47,31,52,30,20,20,28,23,25,27,10,23,15,31,30,9,7,8,36,17,33,43,25,29,40,28,22,12,60,43,26,34,38,26,42,44,40,38,35,26,9,27,35,23,23,23,24,59,50,46,8,17,21,18,36,13,39,34,24,24,34,65,25,20,34,26,40,74,43,70,69,73,30,46,42,24,49,58,33,53,51,28,17,63,21,12,27,59,15,10,21,12,31,20,33,83,88,98,106,110,88,81,89,99,109,120,95,105,110,95,101,88,82,105,70,68,114,100,65,119,108,104,94,72,115,102,108,118,113,62,71,45,73,73,55,44,27,64,55,75,86,85,103,123,128,118,122,106,98,134,187,161,139,71,19,4,15,2,45,30,23,7,30,27,0,14,13,1,23,0,54,16,7,12,10,28,2,27,78,59,84,70,94,93,96,94,102,116,77,112,98,189,241,251,117,67,71,13,34,33,50,28,48,44,26,75,51,81,100,107,101,60,55,82,91,90,55,71,64,81,113,73,95,67,57,52,69,65,68,90,65,87,107,106,116,98,93,114,121,92,103,79,57,69,67,70,118,96,131,155,85,69,94,74,102,106,86,90,117,104,75,77,93,118,105,85,108,104,102,101,85,99,100,99,95,75,77,74,86,81,41,86,107,112,135,150,131,135,132,140,143,93,90,102,75,82,71,84,117,81,52,70,95,111,59,71,53,72,81,115,90,97,128,103,67,59,68,101,109,102,102,75,99,102,122,112,90,101,111,124,91,74,91,63,54,65,93,136,139,129,136,117,156,166,144,107,77,76,102,125,78,89,75,63,44,75,76,70,76,65,51,47,104,113,70,56,85,75,51,45,100,75,68,34,50,94,112,109,138,92,114,63,82,91,68,113,113,112,73,135,118,71,58,112,124,102,107,78,72,85,92,97,103,96,87,77,66,80,95,57,63,69,44,91,53,92,73,60,94,72,53,49,68,72,66,44,41,19,64,54,45,62,71,90,158,229,245,130,80,102,171,226,190,76,20,25,136,91,96,101,125,121,54,14,6,23,4,34,22,17,12,29,34,29,35,47,74,39,69,45,27,15,31,47,49,81,46,17,83,32,41,30,54,33,26,55,32,6,40,58,11,41,45,51,36,57,42,17,17,40,60,40,58,46,49,69,96,90,70,41,36,44,51,102,172,149,96,62,15,23,50,57,78,96,110,94,93,82,89,78,103,125,103,101,80,93,70,118,98,105,108,92,95,112,90,105,95,92,104,75,105,139,94,91,113,119,95,107,133,207,128,86,114,97,91,54,22,19,23,21,43,36,83,97,88,99,93,86,126,91,112,98,115,92,84,79,76,79,90,66,52,69,50,31,21,8,28,40,29,36,43,61,68,38,30,57,56,63,45,48,42,23,82,29,47,51,26,59,59,20,63,48,22,38,26,50,55,60,52,37,18,39,24,31,13,10,19,40,40,55,45,56,22,36,20,60,18,25,38,33,29,45,13,51,54,57,22,34,37,39,2,62,32,15,10,30,28,16,36,28,20,56,16,9,25,25,13,8,23,6,0,19,25,38,20,35,7,14,28,50,41,52,12,55,58,32,53,28,22,27,32,28,13,29,19,26,47,14,27,11,68,78,69,62,41,40,6,32,26,40,55,42,35,35,23,24,26,46,14,6,15,8,21,26,39,22,33,60,35,48,20,34,38,14,46,11,23,29,31,24,13,12,60,54,54,38,40,26,17,23,15,3,25,6,37,67,45,51,36,37,50,29,0,32,29,20,30,29,21,30,15,37,65,34,54,14,38,33,18,35,57,68,69,50,27,43,57,41,29,44,38,24,16,22,31,44,24,40,16,20,27,10,50,30,70,76,72,93,58,92,74,67,80,82,79,104,90,111,131,68,90,64,95,83,83,91,120,126,98,100,116,77,113,85,58,74,57,104,71,85,74,26,71,35,38,30,9,96,82,98,74,128,98,121,134,106,124,119,108,113,151,157,129,64,29,0,3,21,21,10,19,10,7,28,6,4,6,6,18,1,1,3,11,2,31,15,11,22,71,54,85,71,97,98,87,127,117,125,121,115,107,187,230,243,123,45,67,5,40,14,45,34,23,35,28,81,70,106,82,102,94,44,71,61,90,88,79,42,59,62,61,69,69,71,65,81,62,76,93,92,112,97,93,110,115,125,112,129,94,75,81,72,79,79,78,98,86,121,158,133,110,98,122,100,118,141,108,125,129,67,74,59,124,120,110,100,110,94,137,120,98,76,81,88,89,66,108,105,72,114,71,80,89,121,147,142,124,128,155,138,146,150,132,123,152,117,135,112,87,149,94,78,110,99,96,107,102,85,124,95,90,114,93,163,74,67,87,84,104,75,81,78,65,103,84,100,113,86,113,102,81,88,96,78,100,82,94,93,121,153,133,107,120,123,109,88,79,81,129,140,136,93,66,74,60,52,43,61,88,77,61,80,116,113,122,79,92,109,80,117,129,127,99,83,65,107,127,113,126,135,145,131,93,134,127,130,134,123,85,102,89,105,133,117,132,138,105,89,49,87,74,53,60,54,88,71,84,85,98,79,90,71,78,127,95,92,82,70,53,67,82,123,99,90,54,82,58,54,62,27,77,49,58,131,249,229,144,34,31,43,89,143,124,45,1,38,78,115,120,107,100,117,85,45,78,89,48,55,27,65,94,48,58,63,78,77,48,39,67,44,36,51,42,36,36,43,41,47,58,65,47,47,54,45,49,34,44,27,46,24,43,36,43,45,42,51,43,20,72,55,55,69,39,52,70,84,93,88,78,63,65,31,38,38,97,74,49,29,48,88,74,105,97,83,122,97,101,110,94,88,109,114,97,105,121,76,73,70,98,101,93,74,111,108,106,88,102,86,106,104,124,148,97,90,105,137,142,48,58,176,159,113,62,16,8,19,24,21,18,36,70,81,106,90,125,106,129,85,105,119,107,103,108,49,67,74,78,60,79,111,107,77,68,15,3,12,10,30,43,41,43,37,27,55,28,19,37,21,60,53,2,48,38,95,49,57,35,22,34,18,16,15,48,47,35,35,47,30,15,44,58,66,2,18,14,37,16,46,21,29,27,25,34,32,38,41,51,32,29,41,45,32,53,62,36,68,31,49,29,39,27,13,53,19,28,24,36,37,35,27,42,41,23,1,31,20,15,14,39,16,27,17,27,52,26,33,32,21,32,13,41,60,12,18,14,37,12,38,28,47,18,34,28,32,44,43,25,49,45,49,25,86,80,66,26,31,43,14,6,22,23,48,26,45,19,48,32,36,21,26,30,25,36,33,22,10,26,19,19,17,42,46,13,15,21,36,44,38,51,41,36,49,56,66,48,52,24,30,5,18,46,27,18,13,14,54,33,46,37,63,43,24,24,28,5,9,11,15,23,13,21,46,43,41,33,42,53,43,25,34,44,28,42,59,15,55,50,45,43,63,45,29,52,40,37,57,36,38,31,5,29,21,51,63,52,52,64,58,58,41,56,89,83,49,81,112,129,101,93,58,62,96,92,89,91,125,118,144,110,98,111,99,99,68,39,45,59,89,62,43,38,53,48,46,33,38,101,122,99,108,92,117,110,102,72,106,113,95,82,90,87,85,75,61,23,10,14,18,12,10,17,24,15,15,37,17,21,7,7,16,2,22,8,20,18,26,5,10,59,35,78,84,83,98,96,108,136,126,133,137,159,209,216,225,122,42,81,33,46,36,31,22,35,41,52,68,66,85,89,77,77,59,66,72,82,85,85,82,25,39,53,50,79,56,75,82,77,78,64,94,162,104,87,83,128,100,112,102,94,90,96,82,118,148,118,149,115,135,144,96,96,115,110,107,110,159,123,91,120,94,72,40,108,107,79,98,101,110,144,197,67,54,86,130,118,89,109,114,113,119,77,107,140,153,135,107,125,135,92,109,136,147,137,132,145,124,147,96,152,179,139,114,116,91,87,121,105,121,102,104,106,97,150,181,71,53,92,89,126,93,101,96,63,84,88,133,175,160,114,129,99,165,153,137,138,152,137,150,165,153,157,148,145,146,139,97,94,74,125,162,114,130,98,72,47,56,52,68,54,66,65,118,113,114,81,87,64,107,119,129,136,119,142,139,105,88,116,128,147,107,147,124,114,157,127,121,151,110,77,100,140,102,151,118,91,120,100,113,101,84,114,90,97,96,84,80,64,112,109,79,56,71,92,92,81,66,62,58,75,49,41,110,149,84,53,83,55,102,128,97,77,82,5,100,192,206,170,35,0,70,44,70,43,20,15,45,88,88,105,93,98,85,119,153,96,103,97,88,69,161,142,105,97,81,83,35,27,34,44,43,31,45,45,48,60,48,66,34,31,38,32,41,44,26,27,27,62,34,39,53,43,36,42,26,26,42,12,37,25,43,60,47,36,62,70,86,105,83,58,26,39,35,68,51,52,50,33,53,53,84,64,104,124,80,119,75,102,77,96,83,117,77,96,91,100,87,101,57,87,101,88,109,97,116,107,95,122,105,112,134,117,122,55,39,92,122,65,27,28,111,184,104,30,10,18,16,22,10,27,45,95,101,140,116,106,93,105,116,114,89,95,106,54,83,64,67,75,61,69,67,78,83,73,38,25,2,21,24,41,38,33,52,24,43,29,45,33,29,53,32,53,19,16,36,21,62,30,31,28,54,20,30,53,47,30,32,24,43,8,39,51,40,32,12,13,25,22,13,38,39,21,49,25,46,29,87,17,20,33,35,29,29,32,77,52,41,46,37,44,69,32,40,41,56,40,47,33,25,31,29,54,21,24,46,21,22,37,43,18,29,23,29,61,30,50,42,22,30,32,19,44,29,24,45,11,46,13,9,22,40,17,19,22,31,19,30,19,15,20,43,7,19,59,65,62,20,55,32,34,28,31,56,12,13,25,41,33,27,45,6,7,0,20,10,34,19,26,26,15,38,52,27,33,16,30,44,45,49,46,56,75,41,38,54,44,54,37,3,42,30,22,34,22,47,26,41,55,48,44,63,26,38,16,22,23,14,12,4,36,19,38,52,43,42,47,40,8,64,17,34,31,10,48,38,33,57,44,45,85,57,50,45,53,42,43,46,30,49,51,15,7,25,27,22,38,52,63,58,85,44,55,78,90,87,52,108,118,111,110,59,70,72,108,134,96,98,108,131,118,96,115,83,99,108,125,109,84,126,119,82,89,67,105,87,85,82,108,152,121,146,102,133,99,119,103,81,91,106,91,105,76,66,93,86,29,3,5,27,9,18,24,7,0,24,13,27,24,25,37,3,12,4,20,34,16,5,9,0,65,27,65,45,95,79,93,114,137,124,124,137,129,203,227,243,89,51,59,21,40,26,27,26,12,36,27,53,83,119,85,59,30,48,51,38,81,91,111,121,70,36,37,57,82,81,84,51,39,71,53,79,185,99,66,98,130,111,94,114,94,120,86,99,110,135,133,128,103,103,152,89,85,77,88,100,102,174,105,78,86,84,76,55,83,91,80,67,94,80,180,213,66,63,82,142,129,88,62,72,92,110,104,115,136,110,115,86,69,127,53,76,124,147,146,125,111,112,106,110,95,212,147,94,118,96,93,83,83,84,106,77,69,49,155,182,69,47,100,110,118,88,134,180,160,92,107,171,232,170,145,140,156,207,181,155,158,164,173,185,183,174,199,202,221,153,178,129,119,121,122,115,95,88,81,74,80,74,80,80,39,56,81,99,111,82,56,56,108,86,103,132,106,126,145,149,141,116,107,120,92,130,119,122,112,140,106,125,123,119,92,124,132,100,100,92,104,101,115,121,131,166,143,132,115,125,95,66,48,76,104,107,79,107,79,95,91,113,59,75,95,90,62,67,123,96,70,26,16,53,101,109,110,91,33,15,35,141,187,28,9,68,92,123,47,22,93,93,114,125,85,68,101,73,97,98,110,116,91,109,87,120,92,116,97,57,65,54,68,27,55,50,48,47,44,47,57,73,51,33,23,46,30,44,37,57,37,29,46,44,56,41,31,29,50,42,37,19,48,32,53,55,57,82,59,77,86,98,116,87,67,49,45,56,35,54,51,46,89,98,99,112,110,107,128,92,107,123,100,91,92,87,112,100,93,87,88,73,101,99,99,99,89,105,97,126,78,61,101,115,131,120,79,61,27,58,53,83,41,11,6,55,140,105,58,44,18,24,35,35,66,82,108,111,74,98,131,114,88,81,99,99,73,47,64,54,51,73,73,86,64,72,83,69,53,37,27,15,24,35,59,56,30,36,48,29,35,46,46,25,35,21,8,34,29,43,20,63,27,39,19,49,18,52,62,21,53,12,56,34,34,42,59,47,13,40,4,24,33,39,49,30,44,58,46,29,42,33,17,37,44,23,67,51,36,24,19,47,65,59,61,46,41,49,48,54,43,46,32,45,53,37,31,32,16,13,57,26,30,27,31,24,43,21,22,49,11,10,26,22,36,34,39,7,10,40,44,8,43,23,23,38,8,28,40,8,38,14,33,7,12,45,46,17,54,84,67,49,29,39,28,37,13,28,30,30,25,17,22,11,8,34,25,31,6,43,21,10,18,27,16,18,21,23,17,25,55,52,36,24,42,25,33,38,36,20,60,30,26,25,17,28,57,35,38,34,18,79,40,13,51,45,3,15,28,12,6,22,14,20,10,30,44,55,46,31,38,53,38,31,27,3,3,16,38,35,32,39,14,60,43,44,54,54,44,49,45,23,29,39,0,40,29,32,37,45,43,87,32,40,61,67,76,103,104,82,78,94,94,80,90,56,63,53,91,97,86,71,62,87,97,116,127,160,148,165,183,176,158,141,132,145,164,163,138,149,142,142,146,163,131,129,145,169,124,153,137,149,144,113,131,127,114,128,146,130,5,5,39,18,33,11,8,0,7,15,32,3,28,0,11,1,9,6,42,18,1,32,26,17,94,72,33,34,58,80,81,139,140,120,97,122,139,226,245,238,83,43,44,4,47,16,29,39,27,32,27,63,84,127,130,93,56,41,64,53,70,95,148,142,92,68,35,69,75,88,74,47,34,33,89,99,182,133,84,125,129,140,86,83,75,106,92,91,125,82,74,76,70,118,100,62,57,60,64,63,117,162,113,59,111,102,91,115,111,108,89,82,96,92,157,198,63,50,89,161,127,72,85,71,84,106,92,97,119,100,117,92,102,174,58,64,99,107,99,95,109,98,98,102,115,170,81,98,90,92,98,102,71,107,105,83,114,93,176,193,72,78,89,105,109,73,181,226,215,112,95,174,165,141,146,135,184,156,102,140,130,157,148,166,137,168,169,141,107,88,103,111,81,101,75,82,68,51,34,58,59,68,54,54,52,43,33,54,90,75,66,81,73,64,45,68,90,94,119,91,125,119,116,101,106,116,108,112,125,132,135,101,130,94,98,112,140,147,149,150,163,140,140,136,144,135,129,124,92,125,121,105,97,79,77,84,62,91,92,69,81,73,80,61,71,83,52,122,144,118,116,79,56,81,134,144,95,75,78,88,47,43,122,47,21,99,155,152,93,115,129,118,114,117,104,114,119,120,113,130,125,126,132,105,123,113,90,74,35,44,30,59,73,48,66,48,52,59,45,42,70,46,44,57,65,68,43,32,32,35,58,47,44,19,38,41,37,23,49,37,56,49,54,51,47,56,92,57,64,69,74,77,83,121,114,73,33,33,58,50,41,49,68,124,139,131,119,94,61,69,84,113,67,78,100,123,89,110,99,89,96,117,102,93,99,91,82,95,115,93,83,106,109,92,81,58,33,20,18,68,25,41,51,30,25,78,153,121,109,36,28,44,27,73,78,107,119,88,85,94,69,79,110,109,92,73,87,68,66,69,60,63,71,45,94,93,83,85,37,36,15,31,40,52,38,47,37,25,29,38,25,16,51,48,44,22,23,44,48,43,32,30,31,26,35,36,26,42,19,33,21,5,13,40,35,44,41,23,12,18,21,33,37,55,43,28,70,49,44,14,29,46,50,37,31,53,30,46,27,41,39,46,47,37,59,50,45,43,60,37,52,45,23,31,43,50,39,10,24,20,15,21,8,11,9,27,43,43,5,33,24,41,14,18,41,31,62,8,53,41,27,51,43,52,19,34,19,51,31,15,39,12,30,29,18,25,31,26,6,44,107,60,39,31,33,11,26,30,48,37,12,10,47,23,27,26,4,22,38,21,20,33,31,31,43,16,37,30,28,15,38,32,65,37,32,33,41,44,59,47,31,30,35,18,31,28,17,38,32,37,41,32,47,23,33,40,21,27,23,44,2,12,10,22,14,34,29,36,47,27,34,29,58,56,41,38,37,10,17,40,15,25,41,29,65,46,49,11,71,53,36,48,52,21,46,17,28,24,51,59,52,50,50,44,48,61,62,73,53,70,69,76,55,80,84,60,51,33,52,34,98,13,36,36,53,116,151,162,168,160,199,176,178,208,201,222,173,199,199,180,186,188,180,174,161,183,168,171,168,174,185,177,156,147,168,145,180,156,127,98,14,22,20,33,2,8,24,18,24,33,13,33,22,20,6,14,39,6,21,26,22,31,46,18,172,147,112,62,39,67,91,102,145,139,108,94,108,198,243,224,96,46,41,0,51,47,20,21,22,29,33,63,93,144,229,219,160,115,88,104,146,173,224,231,152,147,116,98,79,103,114,103,62,64,92,105,206,110,101,87,111,100,72,97,94,93,86,95,77,71,77,95,96,129,157,103,73,67,99,78,105,178,154,138,144,192,163,171,160,181,185,173,149,160,212,214,123,84,101,166,149,136,138,161,158,162,166,134,154,146,142,139,212,250,142,68,70,81,91,68,102,74,101,74,126,166,126,169,165,132,183,149,161,168,168,162,206,183,247,251,148,121,112,103,137,100,190,237,182,131,118,124,113,111,92,115,123,87,70,52,90,88,74,61,63,47,49,29,5,1,6,14,13,11,40,13,17,16,27,13,12,29,20,32,18,15,34,19,35,9,28,24,26,34,17,36,22,54,69,111,117,77,114,106,83,93,127,131,127,120,70,97,120,106,141,118,104,171,154,151,169,140,146,118,120,131,154,103,118,110,142,106,111,69,67,66,71,64,78,85,52,68,9,44,74,83,58,123,125,98,103,92,125,139,143,110,78,101,128,122,60,82,154,98,39,102,141,109,120,124,126,110,147,115,104,129,122,113,132,120,104,124,96,129,102,118,49,37,111,72,73,75,90,45,69,33,50,52,33,59,52,59,51,72,54,70,37,35,63,38,30,59,36,44,39,47,32,24,48,40,42,33,67,54,68,100,101,88,94,88,67,86,123,167,148,62,49,58,56,64,48,31,20,77,132,115,95,93,104,81,88,98,104,82,100,120,92,90,78,75,109,77,85,100,84,87,124,95,102,103,104,104,79,29,33,35,32,41,34,17,11,13,45,46,15,142,242,130,111,54,15,74,75,101,119,91,83,67,68,39,65,75,69,62,68,79,76,73,71,70,67,74,91,70,97,93,73,55,22,14,13,26,43,38,66,51,28,52,59,57,41,24,74,43,31,39,22,59,39,47,14,51,20,57,22,40,24,28,48,55,17,39,36,35,60,40,25,25,36,12,11,15,31,46,18,81,39,30,28,69,43,43,59,25,18,45,39,59,47,26,14,30,36,46,60,72,45,42,25,94,38,49,37,52,33,66,54,14,52,24,13,45,11,6,32,16,26,22,29,29,51,51,30,40,55,9,50,26,48,36,21,22,40,6,36,47,37,11,16,16,19,30,16,6,4,26,34,24,21,22,69,89,66,27,18,27,45,23,27,47,34,59,41,26,25,46,15,21,19,24,22,39,33,22,56,14,40,35,7,14,30,43,41,38,38,14,50,33,49,37,41,26,57,39,32,31,18,45,23,21,40,27,51,33,37,9,31,41,45,30,9,21,37,21,23,35,35,33,45,50,47,50,66,51,33,62,48,30,31,19,24,38,36,38,34,37,42,34,41,40,38,51,32,35,19,20,39,36,58,83,88,64,53,32,70,30,68,71,56,9,7,43,65,55,60,56,46,55,53,106,113,107,35,10,34,145,205,169,215,199,213,217,239,226,240,207,219,215,203,214,216,179,179,193,206,203,230,203,194,185,193,196,201,178,202,196,191,195,220,119,9,3,14,31,13,24,35,16,11,21,18,2,18,0,15,35,26,30,1,24,0,21,23,25,232,228,223,110,95,95,109,92,144,134,112,110,105,202,244,236,94,108,84,24,40,36,61,55,15,51,27,45,75,165,229,237,213,127,125,135,206,242,252,254,183,169,145,150,149,138,176,155,133,146,130,179,245,183,123,99,137,137,136,129,145,125,145,155,155,155,148,155,157,203,222,159,104,173,201,158,157,208,174,209,195,193,195,186,199,179,212,204,208,200,204,193,159,157,139,179,190,211,210,210,185,203,213,194,217,175,211,190,183,195,153,129,81,80,70,73,79,45,89,81,119,164,204,206,213,182,186,200,180,211,189,185,217,204,192,199,173,150,107,135,100,62,111,131,155,127,79,83,78,8,17,7,58,4,0,14,15,22,48,11,61,145,106,64,35,24,25,7,29,46,20,14,17,10,35,2,9,16,0,18,7,26,26,2,5,3,61,18,15,32,26,2,0,25,44,41,83,95,94,106,137,135,141,124,134,98,95,101,127,119,128,156,113,147,146,146,139,123,100,76,125,96,108,137,148,142,124,107,89,119,90,71,62,113,69,91,72,42,38,31,80,70,39,84,120,112,100,98,98,161,168,116,53,75,98,89,80,47,143,149,69,98,103,67,58,66,94,121,84,94,79,71,80,132,89,82,87,63,104,104,104,75,71,41,101,100,131,83,101,77,28,41,44,51,53,26,64,48,33,59,54,45,42,59,54,43,44,30,60,55,39,39,43,22,33,41,27,36,61,91,138,166,119,123,81,127,116,119,135,96,77,13,30,46,46,54,83,40,52,70,44,107,110,90,69,75,85,79,108,75,91,113,95,107,100,93,97,81,101,92,101,117,89,90,70,109,116,65,98,19,31,24,15,34,39,19,25,36,33,61,44,150,185,122,158,52,60,75,92,132,99,53,76,80,70,69,65,77,79,56,65,84,72,89,71,50,110,107,94,89,120,65,55,23,6,20,11,5,51,16,52,31,36,49,35,52,41,37,42,31,19,28,30,57,14,24,57,31,26,12,16,29,23,31,26,19,32,30,34,27,35,39,24,3,3,10,9,18,36,30,34,20,40,27,28,31,42,28,35,29,47,54,32,26,59,28,38,30,51,50,48,36,59,48,33,46,46,67,24,60,56,4,26,27,9,27,8,23,27,27,25,44,52,54,38,23,40,25,55,50,61,17,37,33,34,50,51,32,32,19,31,23,37,41,30,37,20,10,29,44,13,41,7,8,14,21,54,105,68,56,15,14,7,39,32,45,34,29,29,2,34,41,31,20,31,14,43,34,33,50,30,20,8,48,27,28,29,20,50,43,41,54,40,34,31,46,53,26,50,16,20,33,35,60,29,49,16,41,48,38,42,37,20,20,41,24,32,39,30,21,37,27,44,32,37,48,30,54,23,50,45,12,22,32,49,17,3,37,52,34,41,30,51,73,29,45,60,67,55,44,9,24,31,47,81,91,74,80,36,74,35,61,47,30,22,39,45,21,45,52,50,62,104,91,107,80,143,122,47,60,46,186,213,193,210,197,215,225,205,240,213,173,215,200,238,228,219,192,201,207,205,201,201,202,213,225,193,197,252,219,212,208,205,217,218,118,32,9,12,16,3,12,10,25,26,29,26,16,14,0,13,26,26,6,5,9,27,25,22,11,234,239,237,156,113,150,103,120,146,156,163,166,136,188,250,218,89,155,152,47,32,22,22,42,52,30,47,30,89,122,141,249,219,142,169,156,229,236,225,223,174,204,179,190,187,198,190,193,192,186,181,211,217,160,160,140,170,200,186,196,196,194,180,214,202,193,197,187,208,224,240,195,219,232,243,231,220,213,189,186,203,189,200,239,219,201,204,210,177,129,84,104,117,199,184,224,179,210,223,207,180,181,204,184,182,189,151,158,116,77,133,176,107,79,76,93,71,50,89,95,165,181,209,212,203,224,194,178,189,179,209,202,204,154,72,97,138,172,111,107,71,71,106,122,132,104,82,83,32,24,40,30,34,98,98,146,172,248,243,229,236,249,244,240,232,238,212,247,215,226,224,199,238,212,192,213,235,192,224,206,195,193,187,175,169,148,151,142,174,176,171,176,120,134,104,51,86,71,109,140,126,117,128,137,99,115,77,101,76,81,133,147,145,122,132,113,127,95,105,86,103,67,122,151,109,115,133,126,116,115,106,112,93,85,70,73,54,70,81,80,90,76,68,52,70,122,95,46,72,101,110,78,104,81,66,52,56,61,133,160,73,61,64,61,42,39,49,57,70,81,91,78,65,75,91,61,36,71,50,60,35,58,17,58,33,67,100,74,89,126,103,97,103,93,90,109,78,72,70,69,52,60,49,36,29,55,45,16,56,24,45,21,53,52,49,56,55,67,55,97,140,144,103,104,104,129,131,102,44,21,22,23,65,73,66,66,55,78,45,40,54,60,73,73,76,102,91,82,98,88,74,90,68,111,101,83,96,110,85,105,133,88,105,91,97,87,68,93,116,87,42,39,27,46,36,18,33,34,27,30,34,79,80,57,133,91,45,66,61,89,62,53,50,24,61,68,90,67,82,51,86,86,76,81,78,75,94,87,94,97,39,14,41,24,1,32,25,20,42,41,22,44,22,50,27,39,33,10,29,46,44,22,52,36,25,31,28,16,40,58,78,53,31,12,44,45,39,46,21,49,25,24,13,44,31,4,8,20,38,24,40,46,31,24,19,22,32,23,36,29,32,37,40,63,39,34,32,41,54,35,53,40,64,53,53,16,23,31,59,31,43,63,26,43,1,7,1,28,24,24,43,27,35,60,40,39,41,39,59,34,22,41,45,26,32,47,55,29,43,50,20,23,42,54,17,27,28,24,44,23,26,39,11,5,22,46,40,44,65,36,44,20,45,38,37,18,49,46,8,4,21,13,44,26,6,26,33,24,47,33,25,33,9,19,10,26,33,49,32,35,34,30,40,72,27,32,29,25,44,56,46,40,34,34,42,41,20,58,43,30,35,43,16,11,21,40,38,20,30,34,19,49,35,47,42,30,36,41,23,35,35,43,45,37,52,41,4,12,13,34,26,7,10,30,9,49,43,51,65,35,41,17,21,60,123,93,95,99,70,113,96,92,73,76,89,108,88,60,36,50,26,49,64,66,103,119,132,104,99,65,131,239,204,215,229,206,224,227,191,206,198,193,219,205,192,205,215,192,200,181,215,213,225,208,211,229,188,227,229,205,223,176,216,202,226,97,23,21,11,0,29,5,14,6,0,31,17,12,29,18,12,19,3,2,8,14,35,34,40,12,234,245,246,150,96,156,150,161,172,191,250,249,209,231,241,233,106,180,119,24,63,5,34,10,13,39,23,55,93,144,136,156,149,129,149,138,195,224,193,162,145,159,193,189,197,225,205,195,193,198,168,115,108,139,172,180,192,196,205,213,198,200,183,203,195,196,193,190,184,73,98,138,227,247,255,235,178,180,186,192,173,183,227,250,188,174,176,181,130,39,9,41,92,170,218,201,194,187,201,223,176,180,176,205,172,177,182,82,57,30,63,147,162,134,27,85,77,118,133,172,180,195,190,193,187,188,183,186,169,196,195,183,177,78,44,69,103,174,150,135,69,81,111,129,143,115,122,162,229,229,240,228,249,245,254,250,255,230,247,254,231,236,252,254,255,255,249,232,254,247,252,239,255,248,255,252,233,243,234,237,242,253,246,251,253,249,246,234,234,242,250,229,255,249,233,172,135,88,98,117,99,59,93,95,106,86,108,102,72,67,148,100,124,121,118,97,126,130,94,140,134,107,98,134,136,131,147,139,101,82,117,82,92,74,45,44,51,75,85,53,69,46,25,77,64,75,82,54,79,107,88,107,77,85,118,81,70,52,101,147,65,27,46,9,29,26,39,76,76,62,61,58,54,67,81,77,74,51,47,81,46,69,57,19,54,24,44,74,74,105,127,124,113,124,156,146,136,126,71,25,44,43,68,11,67,45,33,47,50,17,44,28,40,40,60,61,78,94,72,35,118,141,113,139,122,116,69,27,38,27,49,65,131,119,72,44,47,31,57,28,28,51,50,58,76,83,88,108,108,95,116,99,99,82,69,85,119,115,94,115,102,107,122,69,63,72,78,112,117,103,72,60,35,45,19,52,27,37,25,40,29,48,46,14,97,51,58,34,27,49,77,51,63,70,76,71,78,52,77,73,82,49,71,77,97,69,76,77,82,29,26,41,10,29,23,33,51,62,36,43,50,28,28,19,61,26,28,16,29,38,39,37,55,34,30,23,29,18,40,40,32,31,57,51,27,34,26,44,30,42,39,24,6,2,2,54,9,37,22,44,25,51,29,45,29,61,50,32,44,22,37,34,49,40,14,46,38,43,35,43,44,36,18,55,48,20,54,46,57,40,41,31,37,5,35,12,20,24,17,35,25,38,34,46,30,55,25,39,54,38,38,52,35,48,31,48,46,36,28,21,20,36,56,71,20,34,54,29,42,44,34,27,39,66,21,38,32,27,54,69,86,38,39,44,24,44,27,24,32,7,19,25,2,17,24,10,17,19,7,44,16,12,14,17,39,43,48,54,21,40,46,61,43,31,41,60,38,43,42,28,65,9,46,45,26,45,44,50,55,38,28,19,17,34,23,3,7,28,3,22,27,34,54,24,30,18,35,23,38,44,49,34,27,23,29,41,16,39,25,23,32,35,24,58,48,42,35,43,44,56,29,77,150,172,152,135,114,114,149,161,136,149,129,149,151,159,128,166,105,93,85,36,58,64,53,84,104,110,127,103,192,238,219,223,211,207,227,202,179,187,183,221,211,172,180,214,205,193,205,178,201,215,193,219,199,181,207,209,197,207,185,213,214,209,183,129,10,4,16,5,9,18,0,17,5,3,1,10,19,15,10,12,22,16,22,14,16,28,37,1,249,233,233,130,113,140,164,180,205,224,249,241,242,240,253,225,100,161,86,20,37,14,34,21,25,44,34,54,64,110,101,72,73,93,102,115,126,130,150,152,201,181,144,214,177,203,168,169,194,158,132,17,3,78,146,203,219,196,168,185,192,178,163,157,178,179,157,178,69,4,28,60,196,226,227,232,157,145,154,163,162,193,212,211,186,169,156,150,61,1,12,23,22,134,179,202,219,159,176,175,180,183,217,164,180,172,117,52,38,38,45,118,171,148,77,68,81,119,171,203,199,170,193,181,168,179,186,147,174,200,168,168,103,27,54,19,44,86,137,129,83,31,47,81,101,107,171,247,244,255,236,254,244,248,252,232,255,254,250,230,253,247,245,244,243,240,240,247,255,251,252,250,228,230,242,243,243,255,254,239,239,249,248,255,254,255,240,246,249,255,224,254,243,238,229,233,200,121,93,107,120,122,108,149,134,85,115,126,104,114,119,115,110,129,121,128,124,106,109,130,149,88,114,142,139,145,90,73,53,94,116,123,96,87,44,61,77,84,53,61,62,35,27,56,57,82,74,49,39,45,95,62,68,74,101,78,80,74,65,150,79,21,63,66,95,46,28,53,76,72,100,89,65,33,44,103,90,69,80,55,46,76,48,55,56,56,53,63,102,111,131,135,136,134,128,148,118,87,37,55,24,78,95,79,68,50,55,43,42,66,75,64,23,57,86,116,114,108,121,51,91,149,121,143,81,15,30,28,38,64,110,94,108,129,137,47,27,75,35,22,29,62,119,67,109,89,111,96,97,100,119,116,100,99,97,107,77,106,108,113,72,89,87,61,82,97,82,104,120,112,127,82,47,20,47,35,40,27,57,32,35,55,44,35,99,84,30,15,12,66,67,55,53,81,67,85,63,89,78,72,76,45,64,79,87,96,71,72,45,16,3,21,24,28,24,31,63,62,25,33,36,22,46,38,54,49,43,51,54,44,22,53,11,34,25,46,50,53,57,19,25,50,40,39,32,45,43,51,39,64,43,9,34,45,35,10,17,24,2,29,22,34,27,51,39,42,31,37,35,37,49,37,43,31,18,52,38,26,45,61,51,45,61,43,60,35,46,31,21,39,63,39,53,13,30,23,33,37,24,27,32,1,14,28,39,31,24,68,38,34,39,45,59,48,80,33,30,16,14,8,57,38,23,36,37,42,38,36,29,47,34,37,49,33,47,9,3,40,64,61,77,58,24,29,44,24,38,25,9,9,11,23,8,16,24,28,47,22,38,22,39,19,14,25,38,28,35,28,54,37,28,29,34,36,44,54,26,38,35,18,21,34,29,43,20,43,36,52,29,53,29,56,23,27,23,4,11,19,10,41,32,56,25,23,39,15,16,24,51,22,39,3,13,31,16,36,29,23,33,22,36,2,33,41,21,23,57,45,56,27,35,199,252,241,222,177,141,144,163,155,157,161,159,151,177,175,170,159,170,149,119,80,88,57,67,102,114,145,71,140,223,219,196,222,191,209,187,174,179,189,206,199,212,167,199,184,206,210,180,176,177,191,198,186,194,191,211,194,184,186,200,212,202,210,190,124,22,0,10,9,27,15,21,14,18,24,8,25,8,13,12,45,21,8,14,10,14,8,29,14,246,238,208,137,122,146,116,145,187,249,247,240,253,253,232,243,157,186,66,21,56,12,50,18,18,46,48,57,75,125,85,32,57,73,117,122,149,148,179,193,153,161,188,178,172,180,175,160,141,137,55,9,30,19,88,191,220,197,174,157,160,167,169,150,176,179,171,116,77,30,46,42,68,164,208,210,151,149,152,197,190,195,167,174,165,150,162,83,29,39,40,29,39,55,125,214,204,202,169,188,165,198,193,184,164,131,82,47,69,77,91,84,126,193,152,107,113,157,145,214,179,177,194,177,186,168,156,164,181,172,185,96,28,12,25,37,40,35,74,96,73,59,11,18,24,11,72,124,231,243,237,230,215,206,184,186,174,168,129,117,119,228,245,245,250,229,254,252,247,251,253,255,247,246,255,238,251,246,255,253,241,237,231,246,250,231,255,253,250,244,253,255,249,249,239,243,235,179,75,82,67,94,86,130,108,126,98,113,145,150,148,136,128,135,97,132,124,73,110,111,94,76,127,135,130,117,80,68,85,92,104,103,104,78,67,80,77,108,84,49,57,51,39,56,66,57,60,63,58,29,76,75,97,78,69,96,105,106,88,139,125,21,70,148,141,89,74,60,108,162,161,126,60,33,100,104,91,89,66,85,71,84,90,77,85,59,79,79,87,90,143,115,91,85,78,45,63,64,68,76,86,98,101,81,95,83,78,66,69,57,40,55,27,43,123,133,134,109,128,112,133,140,78,70,15,6,9,72,115,131,130,118,127,111,123,127,112,85,29,9,17,87,131,115,93,100,120,113,109,138,86,102,89,91,140,119,106,79,111,112,98,94,71,56,86,79,90,122,99,107,115,109,49,23,22,34,28,35,39,44,56,57,52,96,185,108,19,30,12,65,54,78,89,71,94,72,87,57,61,102,71,85,69,88,104,87,52,41,14,4,9,43,32,44,41,65,57,81,47,33,27,42,45,56,26,46,23,48,53,50,41,26,51,32,29,36,11,52,29,43,40,36,43,43,57,54,64,35,39,48,55,60,54,55,29,32,50,33,39,14,30,45,34,17,38,43,25,45,35,6,46,28,57,51,18,30,29,54,45,67,54,9,19,13,60,58,12,47,45,45,59,22,31,2,32,38,53,26,27,22,35,45,34,42,27,30,38,35,47,50,39,64,19,54,24,33,16,22,16,7,34,29,22,43,47,18,16,9,4,51,30,39,14,25,21,33,36,49,71,41,78,82,45,25,33,12,36,21,14,30,16,38,18,36,21,34,38,12,40,6,30,19,9,32,20,46,21,52,17,20,31,31,45,62,62,22,45,41,41,15,37,44,19,33,33,20,47,32,33,26,22,22,36,19,33,23,27,15,16,33,33,17,38,22,33,37,8,22,19,25,20,25,52,60,57,31,31,20,34,18,29,30,33,10,9,35,6,39,36,8,66,199,248,251,227,220,187,147,165,135,146,148,140,150,153,185,161,164,191,177,140,139,121,99,103,127,90,81,104,187,243,231,224,213,203,214,189,181,182,192,188,207,212,202,204,209,217,218,192,219,191,218,178,183,189,212,199,194,201,221,205,202,216,217,197,124,7,0,5,23,33,1,23,7,2,23,1,27,28,2,14,26,22,18,9,23,6,20,23,9,248,231,234,212,140,152,103,127,160,166,194,244,244,238,216,219,136,184,71,7,31,3,54,15,54,49,60,66,88,145,134,101,103,105,141,158,176,192,166,187,160,205,167,168,155,157,180,177,133,32,71,60,58,65,49,113,205,199,180,184,197,178,157,160,183,183,145,84,113,135,132,57,31,119,198,177,173,184,209,208,153,160,125,169,174,174,63,38,60,79,125,88,44,18,79,139,191,201,179,192,211,187,174,163,150,81,86,103,134,116,102,91,68,128,168,135,157,157,160,189,182,178,186,202,191,180,171,183,184,171,109,128,119,106,109,129,107,107,165,237,248,243,217,136,72,38,16,6,96,151,138,161,181,241,242,246,241,154,126,102,178,254,232,239,222,252,250,242,236,246,255,242,236,254,249,237,251,226,252,233,255,252,243,254,242,254,252,241,247,237,202,123,164,238,240,236,229,169,84,72,91,89,82,138,122,95,67,104,124,148,155,118,112,115,102,134,137,122,94,82,97,91,135,160,139,95,100,111,105,90,78,90,124,120,101,117,134,97,113,103,93,91,51,56,68,53,54,53,39,16,58,87,106,82,83,112,106,123,105,176,139,62,109,148,126,122,87,111,168,170,151,107,47,44,70,87,68,95,104,106,71,144,128,144,93,126,76,82,104,95,71,52,56,68,72,84,85,131,134,160,122,134,82,65,86,111,114,91,89,87,118,108,73,71,108,111,106,125,117,141,109,90,11,21,12,45,68,109,132,143,136,124,127,131,137,115,132,92,11,17,42,147,95,91,146,112,79,100,97,118,98,100,99,120,89,76,100,110,118,107,83,73,55,74,87,84,108,116,112,114,84,24,36,36,10,21,30,26,34,53,22,43,74,96,230,148,34,25,47,72,75,90,56,80,63,65,71,54,77,68,67,101,72,93,51,41,21,4,31,25,8,29,61,60,75,69,72,46,48,11,22,48,68,29,48,59,25,51,41,34,33,51,27,38,50,50,39,48,21,80,36,59,65,33,45,42,38,32,33,86,66,72,48,58,74,59,18,14,28,36,13,23,25,22,43,52,46,56,32,15,73,53,43,30,49,34,24,51,27,55,55,45,49,32,56,40,43,55,28,74,39,15,29,33,19,44,36,51,26,25,34,35,31,43,19,38,43,37,47,36,39,35,72,46,44,31,32,4,4,50,10,19,21,14,23,13,46,34,5,30,23,27,19,32,3,0,40,16,19,49,66,85,83,58,29,7,36,21,45,30,41,22,33,36,31,40,26,34,5,27,16,25,34,23,56,22,27,31,52,22,47,14,54,34,39,18,37,21,40,43,15,31,23,33,42,38,43,12,18,33,54,33,20,29,15,25,19,43,39,47,36,53,40,28,19,35,43,11,53,31,34,70,22,44,46,39,36,36,34,15,15,29,24,26,29,14,29,18,26,131,190,228,188,165,205,220,218,207,204,161,140,133,103,108,103,127,145,140,156,139,145,159,150,140,132,147,103,113,158,229,235,240,245,240,230,199,233,212,218,245,218,212,232,217,228,242,200,228,219,237,237,218,235,195,229,247,185,231,205,215,225,218,202,223,195,113,14,4,1,21,31,10,1,7,16,20,35,15,15,2,0,8,15,1,7,25,29,3,18,4,252,242,222,215,93,83,80,102,70,48,47,99,139,175,227,181,156,162,62,49,27,6,35,22,32,66,55,74,130,210,149,132,143,113,155,185,158,207,193,187,192,197,172,185,177,142,171,141,49,54,122,120,125,108,68,70,102,204,225,182,170,164,168,186,171,112,51,110,140,161,181,141,74,116,181,155,170,212,178,181,135,137,143,123,146,90,27,30,85,187,168,172,118,32,53,84,165,198,201,170,185,189,180,135,114,61,73,106,71,119,145,91,53,101,124,138,160,198,204,165,195,170,182,158,190,200,193,192,179,116,95,138,154,140,183,218,246,254,243,233,235,239,227,238,243,220,175,118,172,208,252,248,248,229,253,234,250,242,230,241,230,224,250,245,255,244,243,230,247,247,255,238,253,252,226,251,235,235,255,254,243,250,251,244,221,244,238,244,231,103,40,25,93,145,169,161,166,92,82,67,100,105,117,144,145,128,91,112,114,97,116,100,102,67,83,83,128,93,80,99,130,121,142,101,107,108,140,130,95,100,112,99,103,105,122,124,127,126,102,120,49,87,79,111,92,60,85,63,46,16,71,100,93,88,96,101,104,119,110,155,177,70,66,81,76,85,91,170,193,138,157,95,33,20,67,90,59,94,64,73,104,144,163,156,157,113,117,77,77,85,115,66,76,100,118,119,118,190,180,123,124,122,93,71,86,93,132,136,100,131,137,124,91,83,58,73,126,148,117,111,65,58,80,79,110,123,142,144,161,138,144,133,130,118,136,101,111,100,60,27,76,115,147,89,114,113,102,129,68,103,79,117,117,93,63,92,92,79,97,75,88,87,87,86,95,91,108,107,97,42,70,19,23,17,20,65,27,28,35,41,37,44,42,66,131,166,77,32,64,109,86,66,96,86,78,73,73,71,90,82,73,93,78,35,40,25,4,29,13,0,32,37,52,68,64,63,52,49,13,25,49,14,48,25,56,36,42,21,34,15,5,36,7,49,26,12,34,57,25,40,75,51,33,41,48,40,46,45,71,78,97,88,73,72,78,66,54,22,22,32,13,17,35,32,25,26,31,42,32,30,38,42,36,36,13,47,56,47,73,48,30,47,54,20,54,43,48,21,41,36,34,21,60,50,39,32,2,6,21,30,25,51,38,48,55,27,59,42,33,35,37,54,30,47,21,46,21,6,40,30,16,16,35,15,23,38,17,26,46,25,22,26,46,12,25,28,20,44,43,25,26,53,70,77,56,22,14,14,36,51,29,50,41,38,25,26,33,20,18,30,14,24,26,16,23,37,22,28,31,47,39,38,21,38,38,24,54,49,40,24,43,32,17,30,38,42,26,25,62,15,18,11,29,41,42,62,44,17,38,14,29,35,30,56,33,63,28,47,28,35,42,46,21,41,40,46,26,25,40,22,8,14,23,31,14,30,34,40,175,248,247,251,204,207,181,182,178,199,216,202,188,167,134,115,110,115,120,134,148,124,130,161,159,156,169,135,115,148,202,210,203,214,232,231,228,231,230,223,251,222,233,247,239,238,234,252,247,216,226,235,229,223,228,232,223,221,216,220,206,191,223,217,212,211,217,107,6,2,4,18,7,15,15,9,32,48,6,0,26,6,10,2,18,12,9,5,31,12,13,22,173,201,211,136,23,47,65,51,79,60,51,44,52,135,208,134,118,150,63,36,20,32,19,36,53,28,29,51,106,181,181,149,147,165,176,208,195,202,184,182,185,192,164,151,169,167,155,51,31,47,138,167,146,118,59,61,55,173,187,152,164,154,163,129,145,75,15,97,168,180,177,202,178,157,161,135,147,173,166,145,104,147,166,117,98,34,13,28,118,177,153,183,119,61,60,68,75,187,180,124,122,155,145,123,82,32,65,121,103,82,110,77,78,85,85,106,123,172,220,177,211,181,205,199,182,228,175,176,117,66,48,89,179,238,241,246,249,255,245,232,243,246,242,255,243,239,254,247,250,235,254,235,250,251,249,239,255,253,234,237,247,234,234,244,253,249,247,255,234,243,249,245,247,234,255,252,255,247,240,235,243,246,249,242,255,251,249,248,90,39,32,19,93,85,87,127,151,65,93,115,129,177,120,120,126,115,105,104,85,69,84,84,68,37,61,76,96,103,111,136,124,98,146,130,94,136,140,134,91,82,66,117,97,88,133,137,89,94,101,86,82,39,67,85,73,61,63,87,137,127,126,95,98,102,135,138,144,146,143,148,183,124,68,107,144,123,145,173,147,105,138,84,19,44,66,108,64,71,76,78,116,147,155,158,146,132,164,147,104,109,103,67,86,108,106,116,131,154,138,95,118,119,93,43,79,139,131,137,80,79,84,99,70,101,63,83,129,104,60,8,12,67,139,128,117,144,154,151,134,136,120,111,133,95,89,76,91,78,106,95,106,124,90,109,87,114,91,97,114,106,100,81,105,85,59,97,104,96,99,111,94,105,105,106,87,94,103,70,50,41,29,26,13,34,52,82,41,47,14,14,22,49,42,18,68,172,150,75,47,77,87,83,70,87,60,60,57,107,84,82,65,57,16,22,4,10,27,12,25,77,62,76,68,66,53,81,50,57,34,15,37,16,34,11,26,19,28,24,41,52,27,26,23,56,36,24,49,46,43,37,21,54,27,54,24,39,27,52,30,67,71,86,61,71,94,72,67,55,35,9,25,19,7,31,21,36,20,39,30,7,57,43,37,39,27,44,40,45,40,45,27,49,37,50,46,28,50,32,38,44,27,42,14,36,28,50,50,33,47,42,44,41,15,48,36,36,50,48,60,35,30,52,17,27,62,18,34,10,18,12,4,15,7,11,13,7,25,27,31,47,48,46,27,23,12,36,26,18,32,23,41,50,54,88,55,28,18,21,16,33,50,64,36,41,44,21,46,13,20,31,17,5,28,22,20,50,42,32,45,28,13,26,59,45,28,8,25,33,21,28,34,38,29,12,20,29,42,31,39,17,18,18,38,36,30,43,13,67,42,36,25,32,28,32,24,11,25,56,43,63,39,40,28,44,29,40,48,29,27,37,11,43,37,49,38,9,42,66,249,236,246,245,239,255,221,200,176,173,182,187,188,210,207,152,124,144,121,158,159,182,127,169,158,157,168,155,138,160,175,170,138,160,141,150,126,165,158,139,159,166,172,175,187,188,157,178,197,190,167,172,192,194,210,207,203,223,201,198,194,180,215,208,231,226,232,134,11,1,6,11,11,27,12,9,6,20,19,10,15,13,13,6,15,37,26,8,8,5,30,28,103,102,64,46,28,72,60,86,97,74,78,109,90,124,170,73,74,117,21,38,37,36,21,27,29,7,45,74,87,123,133,125,173,169,187,188,185,177,174,164,169,161,170,162,149,128,102,29,37,67,126,161,119,149,138,136,71,120,149,157,103,86,155,131,65,35,26,80,135,177,177,223,223,185,188,123,102,177,173,128,116,180,186,142,122,39,31,58,96,182,177,206,132,23,62,28,56,125,126,105,77,134,107,61,62,70,76,127,60,71,99,66,83,119,179,215,183,188,215,181,163,174,196,166,172,175,167,147,125,77,166,249,246,244,242,234,236,239,249,252,224,245,255,255,237,247,249,243,255,250,245,254,255,231,241,253,237,254,255,245,250,231,243,245,222,238,236,249,245,255,249,255,240,248,242,239,252,250,251,222,252,251,252,243,244,236,230,114,86,63,74,100,70,74,38,98,114,61,124,51,132,116,96,118,107,65,88,70,69,73,100,114,90,77,79,102,117,125,172,155,102,83,138,130,128,98,105,97,73,106,99,93,80,83,137,135,132,102,103,72,70,80,23,29,67,62,105,110,114,133,156,148,115,140,132,144,123,116,135,127,135,121,49,53,103,127,66,92,69,46,63,60,60,16,77,90,22,29,88,113,129,144,128,119,145,158,211,170,144,115,63,59,36,68,72,106,123,112,133,97,124,90,89,26,46,145,147,129,102,49,77,111,107,128,86,80,54,8,27,9,68,112,117,98,117,71,79,91,95,105,104,85,64,61,95,90,65,77,89,91,102,114,79,88,113,88,98,96,103,73,103,135,125,123,70,80,127,83,62,84,103,92,96,77,101,77,53,8,17,28,32,17,62,56,88,108,98,59,36,18,11,35,12,15,53,144,179,91,57,73,69,65,78,69,64,54,66,77,71,47,28,40,16,42,10,25,24,55,64,62,75,92,73,55,79,72,66,47,21,47,27,25,32,32,23,23,21,37,45,32,38,56,50,38,30,42,33,44,45,34,48,32,28,37,29,45,27,47,48,110,63,57,52,67,74,81,79,29,2,26,1,14,38,39,42,17,32,18,8,27,12,35,27,30,22,29,34,33,22,24,51,41,44,71,35,53,32,41,27,35,36,30,20,25,26,58,43,59,35,7,19,39,24,42,30,12,48,52,23,75,50,27,36,32,27,32,36,7,36,14,14,4,18,42,24,16,26,17,52,38,40,22,26,24,37,27,17,35,11,62,25,21,39,72,96,46,0,14,5,6,36,33,27,70,49,26,40,24,16,29,41,18,31,18,41,17,4,28,32,41,36,51,39,30,27,27,21,36,44,45,31,14,45,29,27,39,35,37,18,26,47,6,61,49,27,11,74,51,44,49,14,40,47,24,46,32,8,31,8,43,33,35,38,36,38,37,16,61,8,31,16,13,38,34,8,33,19,100,247,228,251,248,246,244,223,245,227,178,181,169,182,175,226,221,238,221,198,188,192,173,189,195,164,175,173,162,162,185,166,141,107,37,18,5,27,32,60,39,49,35,27,53,45,50,66,63,79,112,79,67,96,114,97,113,117,134,141,157,127,128,147,166,166,169,190,106,8,0,25,27,26,19,14,15,11,12,13,16,10,4,7,35,37,17,22,14,2,16,27,8,112,76,44,33,61,110,148,160,133,125,124,134,92,182,183,82,68,62,30,45,43,35,35,23,28,20,60,78,113,113,154,134,156,163,192,193,161,156,155,180,183,165,159,159,99,101,16,25,30,78,76,82,110,147,228,172,55,75,108,131,133,121,151,58,30,30,41,87,161,156,176,231,182,188,182,108,92,162,218,135,121,217,254,180,164,125,62,53,100,197,178,167,134,32,41,39,35,99,164,115,164,166,79,38,79,88,87,102,80,90,106,96,87,159,240,239,210,148,183,171,156,136,109,125,85,112,133,115,218,250,248,249,223,252,255,255,244,249,236,251,232,248,255,253,250,244,230,252,249,241,255,246,251,255,239,254,246,224,255,242,238,248,237,246,223,244,251,242,220,255,254,240,249,252,225,252,252,252,249,255,255,243,243,228,160,158,84,59,71,76,71,70,78,69,56,81,106,119,120,110,110,103,86,65,91,67,80,94,87,85,129,142,132,95,83,102,129,100,128,147,133,131,130,141,115,66,84,67,77,107,110,85,82,103,92,108,124,111,107,77,88,74,75,41,37,105,104,88,97,98,88,64,41,81,81,29,56,36,31,58,90,146,60,10,33,34,36,48,19,46,57,64,51,36,62,56,39,42,106,137,151,145,133,103,133,136,186,194,156,150,139,115,105,87,62,70,88,101,86,116,159,105,95,19,38,54,92,123,107,61,49,94,144,115,58,36,32,8,69,95,115,82,103,76,63,42,45,56,69,78,59,65,49,54,83,50,75,58,63,74,86,97,101,108,93,117,107,87,68,124,129,150,133,97,53,46,61,67,21,39,49,77,116,75,39,38,31,21,25,16,18,47,71,107,118,121,146,117,51,23,30,27,51,82,82,111,143,138,50,30,53,56,69,91,78,45,91,76,70,39,39,25,10,21,9,39,34,64,52,64,60,47,96,56,79,69,69,38,31,78,25,41,57,25,38,16,50,37,65,46,32,32,23,27,31,31,20,30,53,49,42,46,66,31,16,44,45,23,65,64,52,49,76,52,15,23,48,21,6,33,30,39,31,42,36,6,31,25,51,34,61,19,26,15,8,28,22,19,9,57,26,11,36,27,41,30,29,23,33,40,31,34,18,57,40,41,62,43,28,58,37,40,15,47,11,33,28,25,30,21,24,18,20,22,23,33,8,39,55,23,27,35,4,21,23,20,2,30,13,31,16,25,36,52,6,26,17,25,17,36,28,42,28,65,87,97,31,21,26,23,32,35,30,37,38,43,22,47,27,19,35,20,18,20,15,32,28,40,33,38,7,26,26,17,36,4,40,21,42,40,49,18,38,38,18,7,17,51,20,13,20,23,26,54,26,39,48,57,11,37,25,8,54,39,33,28,3,19,23,41,35,37,43,23,21,33,50,46,23,19,16,29,26,26,31,49,4,134,234,246,252,246,234,253,239,239,214,209,225,203,190,168,186,220,253,248,174,149,182,175,191,161,148,152,162,153,166,163,130,122,79,27,7,10,6,34,30,0,9,22,31,16,18,31,10,12,36,16,21,41,25,10,28,7,11,3,33,42,41,51,57,76,89,90,119,91,30,4,31,8,9,17,25,17,11,22,21,36,38,15,27,11,7,14,32,23,23,11,16,14,139,99,53,89,95,101,151,160,155,137,155,170,124,193,218,76,50,59,27,55,21,53,34,26,37,43,31,80,125,146,134,139,202,186,190,165,172,162,163,158,166,162,147,151,96,57,21,31,67,56,53,50,55,88,205,126,42,81,96,138,156,106,106,31,15,28,88,149,189,193,197,217,159,161,178,129,94,174,219,160,112,218,247,202,172,180,106,67,115,193,123,142,60,43,76,41,86,63,143,174,167,133,57,54,91,96,80,132,109,116,111,122,124,142,202,193,140,61,74,88,87,75,58,64,81,104,142,171,244,244,254,248,254,254,247,236,240,254,249,246,244,253,237,239,255,246,253,245,240,255,240,246,250,245,252,243,252,252,245,249,245,249,254,226,238,243,241,251,242,245,254,250,244,247,248,247,252,245,230,243,252,244,253,241,155,85,34,48,54,34,41,71,74,65,73,99,109,135,169,141,115,102,103,101,106,120,116,99,100,117,159,103,94,93,76,79,127,97,113,122,115,119,129,125,112,60,23,49,81,98,120,78,107,61,101,112,107,110,82,73,106,117,89,61,34,57,74,56,40,35,28,35,19,52,49,19,35,15,25,7,42,153,79,27,25,40,51,57,57,55,54,73,46,63,76,64,40,60,112,158,178,148,158,168,163,161,165,155,181,134,138,124,104,113,80,84,123,123,104,122,91,114,90,37,15,45,19,51,83,79,85,130,97,44,4,6,22,88,125,134,142,129,88,27,22,35,57,47,42,12,44,35,45,14,28,41,50,42,34,43,68,108,86,88,81,122,108,105,104,140,139,128,70,44,62,43,46,41,63,48,39,42,47,63,7,26,18,18,19,51,65,99,135,120,123,114,135,115,107,93,114,76,31,68,31,116,100,154,116,60,57,61,61,70,61,56,55,15,42,28,18,16,11,26,26,73,88,65,61,73,44,76,93,59,68,46,56,37,22,61,42,36,31,49,28,21,42,30,32,47,44,39,46,41,52,49,41,20,42,52,24,47,34,47,14,66,26,42,48,21,19,14,46,48,15,33,34,11,19,20,9,31,45,9,18,29,26,50,28,40,26,26,20,46,48,17,8,34,33,33,36,31,37,29,30,24,37,35,44,41,2,9,24,10,37,44,31,10,38,5,19,16,24,2,32,34,29,0,29,26,39,32,28,41,25,22,7,34,40,37,7,31,37,23,22,23,25,20,28,20,18,18,40,38,13,35,22,50,25,14,26,24,31,39,50,102,84,47,6,18,31,9,10,43,34,35,40,2,11,38,23,20,21,33,14,24,19,16,27,10,38,25,24,20,41,45,22,31,32,21,35,29,50,10,23,12,29,23,38,38,31,9,38,10,64,49,59,45,35,37,8,35,41,25,60,56,31,29,42,33,41,31,40,52,23,23,32,34,24,28,40,36,36,23,9,50,34,171,234,251,238,252,236,249,246,246,246,223,248,209,250,227,219,163,220,188,79,57,84,86,74,98,83,74,83,51,85,69,88,116,99,144,143,137,142,124,79,105,131,90,69,56,47,75,61,66,56,42,36,18,17,49,62,15,24,25,10,25,16,28,20,25,33,32,23,55,11,15,7,20,13,17,21,12,3,23,2,27,14,29,2,13,26,5,13,27,18,8,26,23,82,100,92,157,115,115,85,89,106,75,117,175,130,215,199,101,97,53,23,25,38,18,30,23,26,54,69,68,126,208,222,215,220,221,225,216,220,203,208,205,205,216,231,145,78,118,125,95,102,97,121,97,67,70,109,99,74,126,151,103,89,82,89,45,54,58,118,196,210,185,225,236,156,154,204,152,133,174,228,171,98,187,244,211,160,173,159,114,142,176,63,53,44,74,94,78,60,63,63,103,85,77,53,54,126,117,73,107,104,117,90,119,115,99,105,110,51,26,26,58,52,75,104,145,231,237,245,255,237,254,232,249,255,255,243,240,255,249,233,251,243,255,250,254,248,245,253,248,229,238,236,255,243,246,235,225,248,255,250,250,252,231,255,252,254,254,234,230,246,254,234,236,254,246,251,252,242,253,231,252,254,243,243,245,128,93,56,44,110,103,83,81,86,46,38,105,119,134,184,148,133,128,119,120,101,70,70,104,101,109,102,88,97,86,90,62,80,105,95,109,93,106,104,149,81,72,33,30,44,39,76,59,44,47,101,143,127,102,99,78,118,92,111,54,39,54,45,44,46,59,29,32,43,56,79,68,45,33,82,44,71,163,114,44,95,89,91,59,44,58,70,109,69,51,64,118,161,164,171,175,116,109,162,153,166,126,150,96,57,64,4,11,20,66,102,58,81,65,36,38,39,57,80,43,12,15,11,50,116,82,86,54,10,4,15,78,113,135,132,138,161,113,55,23,22,19,27,34,27,34,37,45,32,24,13,43,40,43,13,46,43,75,74,94,81,110,93,107,116,94,55,10,43,61,42,22,43,38,26,38,24,28,12,25,25,38,12,20,64,77,104,110,110,156,99,110,122,107,159,182,171,72,27,61,74,73,79,129,159,70,48,39,77,53,95,53,17,31,16,17,29,19,41,70,67,84,86,85,71,72,92,29,62,47,43,70,60,35,39,34,46,73,15,56,23,25,47,62,35,33,68,39,35,49,52,71,15,40,32,30,58,33,21,52,45,25,34,38,53,67,40,23,27,9,36,35,9,39,18,23,20,5,26,42,2,11,38,30,23,16,41,12,34,34,20,20,16,21,39,50,21,15,22,33,34,16,15,21,28,28,30,33,17,19,9,37,28,13,21,16,21,32,18,36,24,54,19,42,17,16,27,12,7,5,33,4,22,41,29,19,37,38,6,30,22,23,13,14,23,29,24,35,11,17,16,24,31,27,50,33,20,25,12,28,23,59,94,79,35,16,17,30,19,41,21,27,24,55,13,27,10,26,42,44,7,22,19,37,41,47,14,38,33,18,34,17,16,31,38,36,20,39,30,18,51,18,45,28,18,33,15,32,23,39,39,31,9,40,44,21,26,17,44,32,59,23,12,30,11,20,5,36,32,17,38,39,42,34,26,35,21,44,49,13,17,31,46,201,251,255,243,247,243,233,233,232,246,230,240,230,246,218,216,118,70,54,29,0,15,3,12,18,25,25,32,18,22,23,71,154,192,228,235,233,236,241,224,238,223,214,232,207,175,194,189,205,170,169,174,195,162,168,198,166,151,115,134,154,120,117,100,88,71,52,39,54,32,10,6,4,8,6,18,3,26,17,8,25,0,7,13,5,9,13,19,5,18,26,11,0,154,133,161,173,164,134,100,111,96,88,93,121,132,149,183,67,67,38,50,41,23,31,48,35,42,29,26,60,159,220,253,238,243,251,243,242,231,188,244,222,227,233,234,129,111,168,130,134,148,135,131,109,157,122,136,125,133,167,249,130,49,64,105,122,117,126,187,205,240,211,218,220,164,137,131,117,129,186,127,85,72,73,165,107,108,155,156,148,147,176,94,73,68,88,125,81,87,77,76,53,44,63,83,93,93,128,104,130,97,116,114,81,82,108,140,109,129,65,50,72,98,117,165,200,224,238,252,246,250,253,229,235,247,238,255,249,250,246,226,255,246,254,248,240,236,243,238,253,226,254,251,219,254,250,247,237,242,245,255,255,255,240,245,240,233,152,144,175,151,172,154,154,137,137,155,135,157,153,183,145,145,130,139,118,50,58,60,110,125,100,125,104,77,69,75,103,134,139,131,110,112,94,99,94,69,57,71,64,81,97,63,103,96,85,60,94,98,103,116,104,54,84,97,75,100,59,14,21,20,55,54,42,47,36,121,134,119,106,62,103,91,105,56,34,13,23,32,26,45,70,59,49,53,105,155,141,126,134,141,146,110,162,167,96,172,135,102,95,113,114,141,82,40,13,75,127,108,171,127,136,116,62,88,75,91,69,24,22,32,16,25,23,6,1,30,57,83,73,87,89,118,167,232,233,234,251,249,178,242,146,38,10,9,41,100,133,152,135,144,124,142,92,34,52,67,96,61,85,111,58,78,94,56,43,74,73,22,34,38,41,62,76,94,81,63,72,83,57,49,41,39,29,28,50,37,41,18,30,21,38,37,14,17,41,21,21,53,64,103,126,106,102,139,108,104,101,105,87,119,105,101,63,29,42,76,86,82,103,134,179,96,20,33,45,19,5,21,26,28,18,36,34,58,74,87,82,81,87,78,79,63,53,99,117,109,123,112,69,73,92,96,61,80,68,52,69,69,76,106,81,70,53,53,56,79,80,70,49,97,51,71,121,77,79,91,77,47,60,53,41,24,24,8,24,28,27,12,17,24,49,3,29,39,13,12,25,13,19,53,32,38,30,27,50,28,21,26,52,28,33,33,27,23,20,30,54,32,23,33,16,34,51,18,22,15,32,11,44,41,19,12,18,8,2,33,48,47,39,19,16,37,45,15,20,22,15,26,38,36,26,33,29,33,26,6,27,21,18,21,19,11,21,36,11,25,15,12,24,26,20,32,1,18,34,7,30,37,112,56,21,36,9,13,38,20,28,15,18,14,20,36,25,16,32,14,27,39,14,6,27,48,29,17,29,17,37,2,35,13,28,37,19,24,17,37,7,12,38,20,8,7,23,6,22,37,21,22,17,30,17,31,32,20,41,18,27,16,37,28,19,28,39,52,47,36,57,13,56,32,48,14,22,36,42,39,14,62,238,245,252,240,224,243,234,248,242,247,246,225,242,245,228,210,129,18,2,3,12,18,30,36,50,51,77,48,52,57,24,96,165,232,232,249,250,243,195,234,232,201,198,215,220,230,198,229,202,235,227,235,215,250,219,230,225,221,218,224,221,235,215,186,183,178,142,169,92,12,0,23,10,26,8,42,6,11,1,26,14,24,9,22,10,9,19,22,15,27,21,19,20,176,129,125,144,166,143,140,95,86,78,53,88,107,198,180,50,38,19,13,53,58,33,18,4,14,23,66,53,97,179,178,167,165,167,175,165,98,114,157,194,136,116,107,74,79,139,97,120,83,100,99,69,101,96,84,79,115,168,236,83,41,99,124,182,205,239,255,254,238,186,154,150,168,131,101,80,84,145,107,72,57,66,61,48,90,101,68,68,133,152,97,100,78,84,92,94,100,99,102,72,90,86,58,80,114,78,97,110,107,98,81,85,64,45,96,141,136,106,61,75,37,102,149,196,159,115,169,207,197,202,243,235,249,236,239,245,254,249,239,248,246,250,255,236,254,246,241,240,252,254,250,247,252,253,241,253,244,255,248,239,235,252,189,192,239,24,24,13,41,54,38,112,25,28,91,27,60,47,6,43,81,52,55,44,49,84,111,117,91,59,80,62,76,91,84,111,102,131,94,113,122,127,116,96,86,90,83,57,94,119,94,92,92,62,78,86,104,83,94,105,54,59,50,80,87,54,40,56,32,51,88,47,56,53,120,111,100,124,61,71,78,84,81,44,59,50,52,50,0,60,62,59,40,124,135,110,114,109,141,120,90,111,130,98,134,97,89,95,107,116,115,76,70,31,59,63,41,67,26,13,34,15,25,16,18,17,10,8,16,31,81,124,139,186,206,230,252,251,234,253,247,244,229,245,232,241,250,230,252,124,54,14,105,114,140,177,128,101,98,100,113,107,117,117,138,142,173,154,163,146,159,153,141,122,136,79,46,35,94,70,71,77,60,104,89,72,52,57,77,34,18,29,26,43,46,33,34,47,12,53,21,32,34,14,69,37,80,76,95,91,73,103,72,59,66,75,81,26,41,33,27,38,47,63,85,82,73,76,131,167,158,56,28,4,28,32,6,35,22,50,74,70,87,64,50,88,63,62,57,62,55,80,120,175,160,206,174,147,131,140,153,151,143,144,148,167,156,153,132,141,164,159,165,169,165,167,164,137,157,150,165,148,129,172,157,157,146,130,76,50,22,9,2,16,12,20,40,26,41,15,23,44,27,50,35,35,59,27,74,37,38,40,64,74,70,82,29,45,50,14,38,31,30,38,30,39,26,22,41,25,15,13,16,2,10,5,18,17,36,12,10,25,16,26,14,25,11,18,39,24,32,44,24,25,40,33,37,53,25,32,40,30,29,20,31,33,23,25,10,25,29,13,21,40,15,16,16,55,14,21,11,7,28,16,31,51,37,92,109,68,38,37,27,27,34,12,35,23,11,25,23,38,31,30,47,15,4,20,12,27,17,14,5,18,20,36,17,21,26,29,27,29,13,42,19,32,41,12,19,38,15,10,30,27,20,22,8,43,3,28,32,27,27,14,38,22,10,26,41,22,26,40,32,30,20,18,21,29,47,24,47,48,27,13,20,30,97,234,249,238,255,247,249,245,230,255,238,250,240,236,228,212,208,164,119,82,28,13,27,30,45,48,44,100,82,74,100,73,96,131,129,205,182,194,198,177,175,153,170,167,163,173,186,178,191,195,189,222,207,228,179,215,217,194,171,187,212,234,192,213,208,197,209,197,218,130,6,0,8,12,11,3,11,24,12,22,1,1,8,3,18,7,1,28,9,39,22,37,0,24,154,82,66,96,137,142,126,103,125,109,98,96,129,172,184,39,46,61,17,13,36,43,30,33,37,56,50,53,70,72,54,70,88,79,94,79,40,50,97,85,38,33,42,20,62,59,63,81,71,61,88,55,48,46,50,70,28,139,227,79,41,73,163,206,235,233,228,175,122,45,69,149,198,212,214,169,148,189,161,183,118,86,92,128,86,94,70,55,44,61,54,85,78,74,62,68,90,84,117,103,87,95,88,80,57,85,94,102,89,98,63,77,59,43,81,72,90,63,61,47,62,71,106,136,79,5,19,9,27,16,43,34,54,96,157,123,76,102,107,102,107,95,124,90,67,85,62,61,84,89,37,49,75,50,53,59,104,157,83,116,45,45,2,139,189,28,29,37,39,7,111,214,53,10,28,18,16,32,21,25,63,46,86,73,67,116,106,118,69,37,100,44,26,61,81,81,100,141,125,132,91,92,112,127,115,104,107,92,87,122,110,105,64,81,59,30,58,75,55,80,42,55,50,53,64,45,48,49,16,56,82,44,10,65,83,112,120,111,52,82,87,74,91,65,110,126,130,115,86,120,89,76,78,136,132,56,43,22,100,67,41,32,58,74,77,108,64,70,44,70,66,68,66,31,41,22,17,40,31,25,39,18,39,143,119,163,154,243,253,246,237,255,252,255,244,254,249,255,249,240,245,246,225,211,142,172,103,68,94,41,87,140,124,150,148,109,106,107,115,113,120,92,81,99,122,142,138,138,118,129,136,156,130,137,151,71,20,12,52,88,91,103,56,63,53,98,105,83,99,106,75,49,53,54,10,32,36,41,31,43,38,44,46,60,35,37,50,92,66,46,61,70,69,101,69,88,54,52,31,14,39,40,35,54,96,118,72,85,71,138,191,116,46,15,8,23,10,28,24,67,72,67,97,98,91,58,71,73,60,86,82,54,72,101,96,159,113,126,102,112,114,114,144,126,126,110,136,134,102,123,124,133,121,143,122,140,119,106,127,93,138,94,121,110,104,96,94,104,51,47,10,8,18,6,9,15,13,41,9,21,27,42,63,48,57,73,59,91,69,56,80,98,109,132,164,131,78,68,53,44,73,42,54,30,57,43,20,54,28,28,42,31,46,16,28,21,14,14,41,25,37,30,52,33,27,17,9,22,39,29,22,12,30,34,23,26,29,32,13,36,20,20,30,14,15,41,4,9,38,34,37,28,32,26,14,37,41,3,34,31,27,5,35,20,26,23,26,63,101,89,43,27,16,26,40,26,21,21,46,33,36,37,6,4,32,27,26,42,21,24,42,33,43,25,24,23,9,17,25,25,29,28,32,33,5,57,23,20,29,8,22,44,28,48,39,19,27,55,37,6,29,32,17,3,22,49,7,56,32,17,36,21,28,27,37,25,21,9,35,13,24,45,29,30,42,4,115,230,169,213,226,239,255,249,245,226,253,215,241,248,237,212,233,239,244,224,196,162,117,29,66,38,54,54,72,49,43,30,27,47,74,103,136,150,161,156,138,176,161,168,176,152,177,167,170,177,178,175,195,166,172,194,177,158,176,180,177,181,170,153,172,173,183,197,153,138,17,4,17,3,7,27,17,37,2,35,11,7,13,7,24,11,19,7,11,2,14,15,38,8,123,107,38,64,84,71,103,150,177,164,176,147,179,185,192,138,188,131,108,134,96,144,84,100,64,49,72,81,95,88,77,81,98,90,108,118,74,36,54,15,20,10,51,51,49,63,34,60,66,73,87,69,84,71,67,78,87,114,199,107,77,143,113,159,178,131,89,52,34,46,92,142,207,247,239,249,248,237,227,250,219,100,83,130,98,92,83,69,65,59,39,58,77,89,69,77,90,80,65,88,53,85,92,106,102,86,106,74,87,94,130,128,91,37,73,60,98,80,99,85,70,43,54,62,64,41,76,9,18,24,28,18,22,19,89,94,13,14,25,32,26,35,47,36,35,32,42,6,64,79,44,39,24,14,25,26,65,153,75,26,24,22,6,150,245,28,35,39,43,39,149,246,92,44,99,45,64,52,43,41,97,70,112,101,113,101,71,75,47,61,47,49,39,5,48,64,90,93,113,106,115,102,121,102,100,130,77,88,67,73,139,114,71,79,77,63,23,85,73,29,52,50,85,47,31,17,52,57,74,49,85,43,73,78,143,182,155,104,93,113,102,119,139,146,162,168,203,178,191,211,181,161,113,125,122,14,60,10,14,48,14,23,19,16,23,32,38,6,9,11,35,39,70,68,116,138,149,195,236,247,240,242,230,253,234,243,221,247,250,255,239,246,236,234,226,221,161,153,129,98,102,84,31,17,16,23,21,7,54,66,143,156,146,133,101,104,97,127,121,143,74,118,95,123,97,97,129,93,121,109,108,103,123,127,78,22,17,11,73,95,108,103,65,94,89,111,94,102,101,132,105,49,36,32,42,20,36,32,15,29,29,36,24,23,25,36,23,63,40,63,40,74,83,82,74,64,64,43,53,35,33,42,27,68,84,89,80,69,71,43,140,165,89,10,14,40,45,48,76,68,65,77,99,89,73,64,92,67,48,83,98,59,76,57,32,52,20,33,20,47,16,33,21,26,34,7,53,49,36,39,26,21,13,27,6,29,30,44,20,49,32,10,39,31,8,24,39,53,30,7,40,30,25,16,21,23,32,15,34,26,4,13,3,10,13,57,22,13,6,61,36,35,42,85,143,112,89,20,40,62,42,57,36,29,23,41,23,24,14,36,16,4,9,36,24,19,24,13,41,29,15,16,9,28,33,7,23,26,18,46,46,39,26,12,20,30,22,12,29,35,40,23,16,14,30,29,9,21,37,17,46,11,34,3,38,20,16,14,39,21,26,8,34,32,26,16,27,17,60,90,93,36,40,26,31,6,29,32,38,35,35,22,28,31,51,24,27,41,26,19,51,32,41,33,9,25,28,13,39,31,29,43,32,41,19,29,18,47,35,35,15,28,17,13,13,9,15,19,31,25,23,60,39,50,37,41,41,25,40,41,28,52,41,57,10,46,31,38,36,21,39,31,42,41,36,23,71,95,96,106,164,203,234,232,213,198,197,213,203,220,210,172,206,241,255,240,249,211,209,156,97,88,63,45,36,44,49,33,27,20,19,35,46,79,105,129,174,177,201,172,179,189,195,199,192,205,203,197,171,183,149,180,175,125,177,182,151,156,154,165,134,153,159,169,177,116,15,1,7,18,19,8,7,3,12,17,33,4,11,4,0,27,12,14,11,3,28,9,20,21,153,132,73,78,90,61,108,171,239,229,221,220,235,230,246,230,216,213,194,200,222,207,219,166,105,102,115,115,139,113,147,153,168,82,128,133,111,114,80,80,59,59,69,96,134,87,77,111,97,107,101,105,123,130,167,156,156,173,233,192,194,203,167,197,165,151,107,145,144,110,111,151,210,224,255,239,222,194,244,243,128,117,104,125,72,36,73,81,96,69,42,64,60,92,103,88,46,72,53,95,84,78,100,77,97,104,75,56,85,159,155,206,176,82,57,52,103,113,173,128,106,70,70,130,156,183,177,112,178,203,210,164,154,110,250,250,155,143,182,197,185,200,205,166,191,168,176,181,234,255,185,171,161,152,166,164,200,248,168,143,142,154,192,243,236,42,84,101,134,116,196,241,87,116,123,98,115,102,90,142,132,71,77,135,95,61,38,50,79,43,53,11,13,59,90,117,120,98,79,101,125,122,94,87,80,69,101,116,84,154,136,83,115,158,101,49,60,52,48,70,30,24,71,67,48,27,36,40,77,116,122,145,160,175,179,179,119,90,54,130,144,177,165,163,181,158,190,182,212,198,184,148,123,72,14,12,7,5,16,29,25,7,9,47,23,94,122,122,164,210,226,238,255,248,255,253,227,252,255,250,235,241,251,249,246,247,224,191,190,180,150,115,118,13,29,4,13,29,2,14,33,29,11,10,37,49,18,5,91,119,154,149,124,92,93,100,109,122,103,124,95,100,106,134,127,111,128,127,100,114,120,102,115,126,73,0,20,47,104,97,101,88,88,94,91,96,93,123,89,119,130,93,49,28,12,31,51,7,19,30,33,39,34,38,60,37,58,63,28,73,83,69,61,69,52,73,67,64,29,32,40,49,39,73,71,85,61,44,44,5,37,147,151,49,48,29,63,55,71,74,74,83,97,79,56,72,63,77,77,84,69,63,65,47,31,11,41,16,22,1,17,12,39,38,12,35,33,4,7,22,19,13,9,26,12,18,42,13,17,30,24,41,42,8,40,14,18,29,23,20,8,32,18,32,16,28,63,1,10,27,24,34,35,27,34,21,16,16,30,13,27,39,18,16,72,109,70,53,4,19,8,17,41,33,14,18,28,12,6,17,33,12,10,22,23,16,14,35,21,17,19,8,4,9,33,43,40,15,14,26,32,11,0,16,36,11,14,23,13,9,34,26,28,17,23,25,20,15,16,30,18,34,13,30,12,25,2,24,28,34,28,23,10,23,50,20,10,35,39,60,101,48,33,19,7,20,28,39,19,13,30,15,48,40,49,25,18,27,17,31,31,39,57,33,19,34,43,17,53,37,57,17,49,28,34,18,28,16,31,6,21,17,17,14,19,12,42,22,11,38,51,32,53,47,38,29,36,67,49,26,33,38,22,48,53,51,43,45,34,49,56,39,47,60,45,65,42,66,46,50,62,115,128,138,120,168,153,148,169,153,168,139,169,161,167,232,210,234,226,170,146,157,135,130,107,114,146,125,106,108,88,102,98,85,109,127,153,166,189,167,170,165,153,200,165,193,192,199,188,173,162,164,173,179,172,146,175,145,143,163,178,153,155,175,172,123,23,18,0,2,16,11,20,17,6,14,0,1,2,26,12,4,6,14,13,3,21,30,24,23,232,211,119,165,114,119,132,149,216,219,186,213,224,219,173,156,201,182,132,141,159,196,232,155,110,111,103,128,114,115,162,160,178,164,152,158,146,131,124,137,90,76,121,144,164,191,165,201,152,161,178,183,231,245,238,238,245,248,244,253,245,240,255,247,229,235,253,237,244,255,243,247,241,207,209,202,231,198,216,188,77,91,144,158,85,60,59,56,112,95,69,66,70,55,59,64,38,71,63,97,68,80,90,88,81,79,68,59,99,119,162,160,120,60,71,81,89,95,92,104,94,94,130,157,132,227,183,150,215,227,241,233,188,200,255,241,218,199,223,206,194,239,228,206,227,234,234,223,251,244,207,241,242,241,237,233,176,235,228,191,244,238,194,251,195,56,163,177,171,117,148,251,112,116,132,96,120,82,102,91,137,106,104,142,92,79,83,122,122,105,66,37,104,119,112,86,94,97,140,139,111,95,76,74,26,76,66,120,106,102,133,75,100,146,121,69,29,56,93,85,45,50,47,48,31,46,64,91,147,170,147,138,168,147,166,123,78,38,90,105,108,132,112,141,144,176,164,144,139,123,104,102,128,99,55,43,109,139,170,159,194,224,253,239,252,222,250,238,238,240,253,233,241,253,253,236,233,232,198,181,161,128,84,75,65,61,33,22,6,14,44,29,16,26,25,18,28,33,23,13,44,25,41,28,60,23,34,27,65,87,160,110,73,71,79,97,117,121,148,155,61,77,86,85,81,112,128,99,115,123,137,99,120,103,45,106,123,112,112,92,104,116,70,111,110,76,70,116,99,105,110,56,51,35,34,36,26,44,48,17,42,57,61,63,38,35,67,70,76,58,58,61,77,56,73,65,63,72,19,55,40,20,22,82,72,39,6,21,25,16,47,56,145,152,70,34,67,83,82,67,55,31,79,84,71,65,111,90,66,89,48,51,56,23,8,10,51,12,12,3,56,0,41,4,27,8,23,31,35,16,18,36,25,17,11,36,12,36,46,6,16,6,14,14,26,6,19,16,14,19,25,0,33,9,21,9,25,17,16,1,9,34,14,22,23,18,26,35,23,26,15,35,21,13,80,98,74,48,16,34,8,10,8,8,1,24,38,12,10,3,6,27,30,3,35,39,6,17,2,22,11,26,25,21,16,19,6,23,29,32,26,14,42,5,10,18,11,35,11,26,34,25,27,0,16,19,18,25,2,38,12,1,32,47,11,16,25,1,5,19,12,9,25,21,30,19,27,38,41,38,85,98,79,29,33,19,32,18,9,13,34,18,14,9,35,4,20,24,3,39,65,39,35,35,19,29,18,20,26,13,13,27,21,22,41,23,23,16,35,70,24,22,26,10,30,7,31,24,37,27,10,37,29,40,38,38,24,35,16,42,41,48,43,48,75,35,15,10,21,44,45,45,69,40,48,58,50,27,47,56,21,26,107,146,176,157,149,159,145,143,167,164,123,130,177,179,179,200,173,176,166,216,206,193,208,226,190,217,203,204,212,190,172,182,200,184,173,155,192,136,158,144,127,155,166,178,195,197,209,187,161,174,169,174,169,165,175,158,144,168,177,173,179,165,206,118,15,0,28,2,5,24,14,22,3,38,19,16,0,2,9,14,12,13,32,7,5,14,34,13,253,229,208,220,238,194,169,137,217,198,199,237,243,196,188,165,219,168,139,114,95,201,226,164,110,110,142,141,113,144,127,150,204,200,152,162,161,187,155,155,134,112,155,158,227,205,182,229,228,211,241,248,247,216,243,250,250,255,245,254,241,253,245,248,245,235,246,244,255,253,241,241,228,221,247,230,236,171,238,216,34,108,130,141,119,85,68,76,111,92,80,87,110,59,48,81,77,69,74,101,104,120,102,52,89,67,94,76,42,93,56,69,67,67,52,67,108,53,74,86,90,138,134,104,61,170,135,61,77,78,113,116,158,170,222,241,149,202,163,95,82,146,131,74,128,162,164,224,237,183,131,134,163,244,206,155,120,179,220,201,199,136,100,202,126,50,121,80,116,56,108,150,79,102,71,74,72,70,58,48,106,111,156,139,107,114,116,121,94,100,81,84,124,127,94,65,138,159,136,122,65,58,63,50,28,13,41,81,97,71,77,40,79,105,62,68,58,20,57,67,74,37,44,60,34,59,120,127,141,192,143,173,155,131,139,106,71,74,96,98,83,99,108,129,130,150,170,156,168,146,190,196,244,239,249,246,249,249,255,249,244,255,249,253,240,227,232,229,248,234,212,166,130,89,109,63,44,22,1,19,13,12,4,8,38,32,8,18,14,39,43,49,56,39,31,48,54,36,50,50,20,102,132,48,61,74,22,22,25,54,105,86,78,64,87,107,140,118,156,146,86,78,107,110,97,131,153,113,111,133,125,119,107,91,128,110,123,93,98,99,128,103,101,104,124,129,94,131,114,42,57,30,7,31,39,45,31,30,26,38,27,55,53,54,34,41,62,60,64,64,70,66,97,74,85,91,84,23,49,41,28,25,50,49,69,8,32,19,34,9,26,37,107,116,140,85,47,79,65,72,76,76,66,63,98,75,101,65,84,65,39,47,28,18,21,42,14,18,29,11,17,45,29,30,18,17,15,26,16,23,20,24,28,13,38,10,18,8,21,14,21,27,18,14,25,21,18,11,3,22,23,14,53,39,18,30,4,24,44,31,15,38,28,28,29,22,15,26,8,6,27,22,26,23,46,95,89,22,8,16,11,14,3,3,26,39,4,12,25,35,17,15,24,32,29,18,34,17,15,23,24,23,3,23,23,37,19,8,11,10,23,18,31,19,12,11,10,15,9,30,13,7,22,14,20,9,34,31,22,31,38,18,13,4,43,41,41,8,24,23,7,25,22,8,38,8,21,12,14,13,49,79,79,67,36,20,40,15,22,22,4,50,31,11,14,39,12,16,20,27,44,33,34,6,23,24,22,34,17,29,16,27,6,23,43,20,10,17,15,25,9,27,12,21,5,4,34,37,34,26,28,22,14,25,20,11,11,12,20,20,13,13,18,10,30,27,19,14,29,30,43,16,42,25,32,39,23,42,38,21,27,30,104,203,170,205,206,211,181,176,182,178,183,155,167,167,171,157,165,161,183,177,171,211,230,202,197,204,209,216,206,218,215,214,229,216,193,207,190,181,188,182,170,174,183,164,172,190,191,185,199,175,199,176,150,163,186,159,196,182,196,190,161,205,206,97,22,21,10,37,21,10,13,22,8,32,37,12,0,5,31,23,4,20,15,8,22,24,16,9,245,231,227,250,249,236,220,187,215,242,251,236,248,241,249,246,231,247,247,252,181,235,241,185,183,206,174,182,140,114,117,135,193,149,157,182,179,148,145,138,166,187,198,160,162,138,136,218,167,176,238,236,228,213,217,182,228,251,247,239,251,255,253,254,233,242,252,236,250,244,219,184,154,179,203,235,219,179,232,189,61,83,96,84,100,100,148,104,148,83,80,58,80,61,81,115,88,98,68,88,86,123,99,71,68,82,92,74,52,85,77,64,70,82,43,58,115,78,82,103,116,145,164,100,70,145,101,102,114,84,64,72,120,113,200,252,213,210,193,119,46,131,164,105,142,153,152,202,249,183,178,143,206,230,209,197,174,223,245,222,217,192,124,199,122,88,87,84,86,72,100,98,69,93,69,98,109,99,105,88,146,107,81,107,92,113,124,110,80,39,86,61,63,102,83,161,145,127,110,104,48,101,103,73,58,24,22,58,58,44,71,24,42,50,32,29,50,66,42,67,81,90,71,40,29,38,99,146,154,124,103,145,113,114,123,135,138,118,151,146,182,159,214,253,239,243,243,249,254,249,243,255,231,245,243,252,254,231,245,251,217,207,167,148,152,101,61,46,17,102,130,38,1,2,2,11,19,18,25,39,37,17,33,62,41,61,68,54,74,40,55,76,123,84,30,33,42,56,60,89,127,143,172,88,21,88,49,56,29,28,61,46,116,106,140,156,131,135,177,122,79,82,101,131,92,132,126,111,103,121,128,107,111,100,105,89,61,90,123,96,106,89,107,112,115,107,124,77,43,19,30,48,41,48,39,18,41,31,41,15,38,24,26,54,61,72,62,64,60,71,45,54,68,72,42,68,64,46,32,51,14,34,32,52,50,33,34,73,49,81,64,76,105,96,154,144,49,30,82,83,66,64,75,96,65,115,58,39,69,55,33,62,22,21,35,20,46,31,36,15,37,13,15,20,20,39,29,13,21,24,4,18,15,27,14,26,15,30,22,28,33,23,14,15,7,25,8,13,18,14,21,11,13,1,15,19,1,22,27,4,21,25,31,26,30,10,26,33,25,17,11,5,35,16,47,88,79,49,22,29,4,5,12,17,21,37,33,0,13,41,11,22,10,22,4,32,40,19,20,15,12,27,25,19,12,18,20,30,27,15,18,30,20,9,25,5,22,19,13,18,22,9,23,42,16,34,26,42,24,38,30,37,20,35,34,43,16,1,25,19,32,15,6,2,30,8,6,9,20,2,20,49,46,85,63,12,14,17,7,30,8,43,29,39,36,26,37,6,26,35,25,28,19,12,18,29,16,11,21,8,0,22,9,35,16,12,4,5,12,39,13,22,27,45,23,6,25,32,6,25,10,35,12,18,10,6,26,9,16,16,5,30,24,30,29,50,25,9,29,34,21,13,30,15,21,12,30,12,18,28,20,69,184,195,193,177,168,198,183,179,162,166,164,182,178,138,161,134,148,171,219,175,181,164,148,166,154,156,186,197,194,198,172,192,179,198,205,215,194,190,205,213,202,197,191,172,158,192,203,197,217,216,191,199,213,221,213,192,219,230,220,222,199,201,197,127,20,12,5,17,1,3,8,10,11,6,18,15,19,7,7,22,5,17,11,24,34,2,13,4,208,162,150,199,206,211,113,99,127,179,204,244,234,238,243,243,230,251,239,241,129,217,229,122,140,169,120,197,90,108,118,135,169,130,143,182,188,117,52,131,140,192,213,118,134,81,100,170,84,61,112,104,148,95,98,104,146,199,198,182,192,230,255,240,249,255,253,254,243,234,222,136,82,134,139,134,182,191,252,177,45,95,67,82,86,86,143,106,101,108,84,73,81,55,85,90,87,75,60,78,82,121,97,63,55,60,76,68,49,121,96,67,61,78,61,61,53,124,111,100,65,105,93,113,45,134,79,102,120,54,93,75,121,121,171,211,121,160,158,50,47,109,186,116,114,107,103,162,162,137,125,132,161,199,184,212,139,183,241,192,206,191,137,158,139,84,103,82,125,76,122,104,71,92,58,85,94,113,80,122,139,104,135,143,97,88,116,65,42,46,34,26,54,57,89,123,111,81,53,76,65,102,129,97,96,52,49,17,17,65,53,44,69,45,62,61,23,58,33,39,67,74,71,49,46,78,121,141,130,151,154,168,174,184,221,255,249,244,248,239,254,250,252,255,228,248,253,218,226,244,254,235,239,219,201,190,171,136,110,92,43,8,3,11,23,4,7,16,12,65,105,18,12,14,30,33,21,28,26,58,57,45,60,142,77,131,98,57,49,75,38,64,142,136,71,32,56,117,89,82,84,94,97,64,37,48,26,30,16,41,87,78,135,116,121,133,117,136,133,92,62,74,109,100,99,134,81,125,122,124,126,84,85,79,101,133,77,106,102,110,125,101,84,116,119,79,38,30,21,18,72,70,63,72,8,28,6,56,52,30,31,34,50,86,81,91,70,65,76,70,86,95,81,45,94,81,67,68,41,28,30,39,58,55,60,43,73,76,104,104,72,69,71,61,128,171,133,63,45,42,80,96,90,72,54,68,78,40,82,50,43,44,16,22,34,17,30,14,15,15,22,6,46,11,10,49,27,13,10,15,12,28,14,44,26,18,4,12,26,31,20,15,17,29,44,4,16,32,18,22,13,8,19,48,25,13,42,30,13,14,40,9,28,10,25,22,30,20,16,21,7,32,32,54,28,83,112,56,7,25,23,23,14,30,21,32,2,10,11,36,11,47,14,2,14,5,10,3,2,39,28,33,4,8,21,32,17,25,13,5,15,20,5,49,9,30,3,14,37,30,29,17,10,40,15,26,56,54,24,21,31,24,24,24,36,24,30,15,15,60,12,26,21,11,7,22,31,3,3,10,5,26,70,114,88,65,26,17,28,37,6,35,51,27,28,27,5,29,31,30,65,51,50,7,21,47,21,35,4,0,13,43,24,15,22,33,58,14,29,18,20,19,5,21,16,4,11,11,27,41,19,13,49,20,19,12,15,25,5,21,10,51,30,28,17,35,10,10,20,42,32,27,24,23,3,43,8,16,9,15,36,145,188,176,179,134,160,154,145,130,131,152,204,217,200,175,141,162,128,184,213,222,213,194,166,148,145,117,154,132,121,118,142,142,132,145,134,169,142,142,175,164,199,156,137,124,127,144,192,216,215,196,196,192,207,222,209,228,225,206,223,198,198,184,212,126,11,4,5,9,0,24,21,38,7,20,25,20,7,12,9,23,21,6,25,10,13,33,17,23,178,97,46,54,56,86,61,49,82,114,150,227,191,125,184,186,220,210,166,159,61,159,225,90,37,67,105,143,78,58,85,132,129,112,113,142,155,116,91,113,110,119,146,142,98,89,143,181,117,80,53,89,111,64,50,65,119,110,108,155,155,178,254,249,249,222,249,247,243,231,248,142,77,87,64,85,133,155,240,147,38,95,39,82,27,65,82,97,128,106,93,97,53,59,91,110,87,73,96,64,68,78,78,107,34,49,56,69,68,72,95,79,75,59,66,46,82,55,63,61,47,36,61,76,51,84,51,25,63,53,85,80,82,86,65,77,61,27,12,12,16,36,79,40,53,19,45,51,50,24,31,70,70,90,95,136,109,109,156,110,145,144,108,97,55,47,63,62,85,92,47,38,44,61,37,47,52,45,99,122,127,93,104,95,52,66,88,115,95,65,56,31,38,50,64,66,94,89,108,80,39,89,79,96,115,103,97,48,33,7,46,17,17,61,63,76,85,63,72,93,66,100,150,132,191,224,237,251,247,250,255,243,251,247,251,246,243,251,232,247,254,251,230,233,202,222,187,76,40,117,201,177,135,142,170,166,141,77,72,31,25,31,15,28,12,20,6,0,19,69,93,117,57,47,35,58,53,52,47,95,129,83,77,103,84,133,134,60,41,63,40,32,62,71,66,64,38,52,77,53,56,60,93,59,29,74,61,55,16,117,124,150,168,130,134,131,131,140,126,131,103,71,128,120,118,97,95,111,113,113,78,89,96,78,55,112,106,96,89,95,117,94,115,110,72,22,26,39,15,46,65,119,120,24,1,41,12,26,60,6,48,54,73,68,82,70,64,60,70,81,85,66,80,65,90,99,77,88,35,31,41,29,19,33,49,53,61,74,76,95,86,51,88,58,111,149,165,87,44,67,65,111,78,61,76,97,73,70,76,57,76,47,37,20,21,14,28,8,8,26,22,4,21,22,35,29,3,23,37,18,8,25,21,14,5,27,10,37,19,31,33,41,11,9,16,19,28,33,15,7,29,26,13,2,17,11,20,9,28,44,11,21,34,16,26,19,25,18,40,24,10,19,14,16,9,79,115,66,20,31,39,12,8,18,19,25,18,11,34,36,12,3,29,35,19,21,18,14,37,5,42,20,25,23,8,36,26,8,13,21,36,30,36,9,20,11,26,33,28,18,25,21,33,17,15,7,23,23,40,41,13,46,45,10,35,16,21,12,8,9,30,16,16,33,23,8,7,44,5,18,33,13,54,48,77,75,37,36,24,12,17,26,17,15,32,19,10,24,30,45,20,28,16,23,10,14,37,10,30,25,7,35,8,36,14,32,18,6,20,13,21,18,17,13,26,41,15,46,11,21,13,34,28,25,15,22,32,17,8,16,2,9,14,14,3,8,20,16,35,12,5,20,17,32,17,10,27,20,24,44,86,155,124,176,129,122,122,135,124,143,169,210,254,247,243,229,248,196,180,224,250,243,250,249,220,190,126,79,76,74,128,128,90,110,103,100,125,133,91,95,114,98,93,77,74,89,60,65,110,129,157,153,143,147,151,132,164,155,165,168,165,172,165,139,166,95,22,10,10,0,19,15,11,22,27,20,13,11,15,19,18,11,13,18,24,11,30,4,4,42,230,147,92,104,118,82,135,130,171,192,197,225,207,150,216,180,210,195,157,158,84,179,213,50,59,55,105,144,76,73,80,85,95,45,88,106,141,137,124,106,76,106,194,165,144,106,167,237,157,84,80,115,139,87,142,93,59,44,64,157,177,234,244,248,235,255,235,252,250,250,254,127,72,124,121,98,48,101,159,29,28,62,84,82,44,57,53,55,116,114,119,113,96,82,77,100,99,86,90,103,81,39,37,54,57,43,74,74,57,68,102,87,73,71,81,64,83,93,53,68,90,71,94,151,129,188,163,42,59,102,110,88,75,75,57,79,89,131,172,139,201,176,214,161,133,111,142,171,128,112,71,47,73,64,44,78,61,57,74,96,75,95,85,71,20,60,45,37,52,41,31,62,42,52,26,53,17,40,128,125,129,91,73,66,45,87,90,131,94,57,50,33,61,37,37,50,112,104,78,49,14,20,56,66,49,55,74,31,13,11,17,22,77,108,182,178,209,223,255,223,249,255,243,249,245,239,253,247,240,254,243,239,242,243,240,231,213,213,208,200,187,178,164,172,174,168,107,8,0,37,151,154,82,50,61,66,58,30,35,35,24,57,96,102,102,147,110,39,64,120,122,154,114,38,93,78,35,65,35,68,115,69,35,50,50,95,85,68,42,51,43,65,40,68,70,67,11,34,78,53,113,99,135,100,24,88,84,47,84,167,186,156,150,139,131,141,106,84,129,151,117,115,137,94,91,97,146,121,94,86,66,73,53,72,76,79,77,118,101,123,122,105,69,44,12,11,53,27,41,94,102,84,84,47,13,15,38,68,47,55,35,73,92,78,69,88,89,69,86,72,80,89,87,67,103,45,40,38,50,35,28,15,27,62,77,50,67,69,80,81,75,68,50,64,52,74,145,185,103,40,75,61,90,61,72,52,80,68,70,43,23,29,7,20,18,11,24,3,30,27,25,23,22,13,12,15,12,34,12,6,27,19,28,10,28,31,30,20,6,25,42,17,23,15,10,9,10,12,17,24,8,30,20,13,14,11,23,50,21,33,4,33,26,34,20,18,23,37,26,7,27,39,16,20,0,57,93,65,23,19,17,11,31,4,20,19,37,25,18,31,19,32,15,21,21,26,29,36,11,11,25,21,35,16,22,12,14,23,10,24,24,9,27,20,31,21,8,22,16,22,38,15,38,26,34,26,32,24,23,38,38,23,14,12,40,26,26,24,10,10,20,13,4,12,15,45,22,5,38,32,7,27,11,37,64,96,74,49,24,25,13,13,11,24,9,41,24,42,16,18,46,43,24,31,17,25,36,20,5,17,46,25,31,41,1,33,21,13,18,35,13,33,41,29,11,31,5,10,10,19,32,20,5,3,41,8,27,25,10,47,22,28,23,4,16,13,22,18,21,27,46,15,14,52,4,44,26,25,24,25,33,93,118,96,105,81,110,113,121,150,161,210,194,222,241,226,221,192,164,218,239,235,230,224,208,178,96,66,57,84,130,153,126,118,124,144,172,139,121,88,38,36,23,18,63,50,30,41,40,43,52,49,42,27,39,27,34,57,85,55,144,152,196,167,134,90,26,0,16,0,25,26,23,28,0,46,43,20,19,5,34,23,23,9,6,28,5,17,15,24,219,188,100,162,153,149,163,180,182,230,199,249,188,144,192,188,237,208,166,194,103,194,221,76,54,58,93,127,83,47,40,40,40,33,53,73,91,157,116,88,106,167,216,206,167,114,150,246,162,104,74,124,133,150,151,84,27,8,40,135,242,237,241,227,245,223,203,175,168,154,190,102,117,167,138,106,79,102,138,77,65,67,55,95,55,67,65,67,77,75,100,95,98,67,78,68,106,67,137,125,87,58,31,39,25,67,62,72,98,68,89,108,79,74,59,63,121,112,54,89,138,65,158,164,211,253,229,171,133,147,143,68,73,69,144,237,237,247,247,238,241,250,228,248,250,250,255,246,252,238,249,207,166,71,41,45,4,47,34,27,41,25,27,37,35,49,32,57,42,25,27,31,58,37,27,46,29,29,118,129,91,74,108,35,58,86,58,59,35,41,23,19,29,9,65,34,36,41,37,40,39,86,83,91,116,100,148,169,203,222,235,255,246,243,254,252,242,226,242,243,242,238,216,248,250,230,237,222,208,180,164,173,153,134,126,172,184,147,140,160,176,140,128,116,88,108,46,2,28,10,74,62,63,52,77,92,68,97,128,145,136,165,152,201,238,225,222,95,57,83,133,124,43,45,49,91,55,57,40,51,66,55,86,32,34,41,75,105,30,60,94,105,107,97,97,109,45,63,30,74,100,94,126,98,40,90,56,18,85,154,178,146,145,131,150,150,78,46,83,128,89,115,115,120,106,87,74,69,35,85,89,79,81,100,55,82,88,108,102,71,60,22,26,25,34,10,57,90,96,103,42,47,40,32,12,0,30,25,60,54,63,64,74,93,62,95,76,98,87,82,84,75,66,44,26,33,32,14,32,11,15,44,56,77,64,71,68,80,85,88,93,81,54,90,68,48,101,157,173,99,25,49,44,67,71,78,59,79,100,74,39,47,13,29,30,7,28,26,17,25,30,36,25,13,14,22,24,21,44,9,20,10,28,20,4,33,0,9,14,14,33,19,13,16,31,24,40,23,27,19,11,23,46,9,41,30,29,25,26,28,20,15,34,13,23,34,30,7,27,26,21,34,25,45,21,37,86,94,66,7,20,6,11,41,26,25,17,22,5,19,17,7,15,23,21,18,52,26,18,16,48,34,16,19,28,30,42,25,17,9,41,38,16,30,26,30,15,25,24,9,13,39,22,40,26,28,31,33,37,21,34,24,35,20,55,13,9,31,21,15,25,26,27,7,26,41,32,40,36,16,18,17,21,37,19,75,91,51,42,23,20,25,36,25,7,23,31,38,4,26,43,46,13,24,34,13,9,20,43,41,25,35,33,12,25,21,9,39,30,5,9,10,30,40,36,19,7,28,12,28,19,14,16,11,59,6,25,18,11,19,13,20,37,8,14,34,31,37,14,47,18,31,38,11,42,13,17,46,12,9,17,60,56,40,77,68,47,47,29,89,82,69,77,74,69,94,97,90,71,99,146,106,117,121,131,97,45,45,66,103,152,160,127,129,109,112,117,117,137,106,60,15,39,38,50,52,61,51,22,57,11,11,19,25,29,29,10,30,33,20,119,228,217,220,149,68,11,22,30,10,9,8,39,17,8,29,15,7,16,17,32,11,8,21,32,3,0,13,24,7,237,131,108,95,76,114,140,189,194,231,243,250,199,174,204,213,244,246,228,191,95,236,211,80,85,48,78,139,52,46,25,53,14,9,22,37,102,148,142,88,88,127,185,177,77,81,125,195,110,33,55,88,128,146,157,129,54,53,39,101,185,199,206,228,196,224,165,88,36,43,110,88,142,151,107,83,56,172,225,109,60,66,86,95,58,76,60,51,64,92,134,102,73,90,92,128,138,90,108,145,89,94,69,31,42,37,53,71,107,70,84,89,48,75,67,48,106,105,130,124,122,73,130,124,173,234,244,224,72,104,124,83,77,103,135,178,246,239,242,219,231,241,236,245,247,243,225,219,189,249,246,206,133,54,10,24,8,29,44,54,40,17,39,55,35,25,27,40,36,33,43,33,43,57,45,49,25,11,48,50,58,73,98,114,35,18,22,11,23,12,21,7,20,24,24,36,152,121,157,175,210,241,233,228,240,237,255,248,255,242,236,241,251,253,243,236,226,218,194,154,149,122,115,81,49,89,147,141,150,138,116,117,114,84,56,126,91,95,86,37,56,40,70,54,39,94,52,10,12,8,120,118,203,187,207,173,168,187,229,192,174,205,183,168,136,175,154,43,17,25,46,53,50,21,36,53,53,33,23,37,75,49,71,41,39,9,86,158,77,54,77,105,152,101,103,67,82,66,48,27,44,27,45,61,32,42,53,16,100,144,167,145,152,135,150,142,87,40,117,138,101,137,129,100,75,74,65,82,58,53,32,70,53,102,105,96,95,118,68,56,21,7,10,32,54,24,40,60,76,69,21,45,5,53,34,7,50,51,44,79,62,65,78,103,80,71,90,88,71,73,68,91,64,18,27,30,2,13,34,36,25,71,62,74,80,93,94,101,55,86,68,76,49,83,91,73,70,113,165,131,94,60,39,42,56,94,82,113,90,59,49,7,12,26,13,10,12,27,28,16,30,27,11,37,22,1,32,15,10,20,44,15,2,19,9,31,21,13,19,8,6,22,41,26,14,24,13,23,28,22,34,12,11,34,28,29,8,23,19,9,9,29,15,17,22,8,10,35,10,14,10,23,43,20,23,26,87,84,39,37,12,7,21,37,43,0,20,30,9,15,25,41,25,23,28,2,32,16,19,26,9,23,2,19,31,22,34,36,18,18,3,33,42,15,23,29,11,25,16,15,21,30,28,35,17,32,35,13,15,36,43,37,46,44,28,37,23,18,26,17,18,33,29,25,28,15,11,18,23,22,51,30,18,14,18,39,74,92,58,25,3,14,51,11,24,39,25,28,13,50,81,53,17,48,8,32,19,35,25,6,42,44,25,13,12,12,18,23,22,11,4,34,30,13,5,7,22,5,30,25,24,44,25,36,29,30,15,21,40,28,12,5,19,24,16,35,44,14,32,25,25,27,35,10,29,34,32,30,34,25,32,55,34,79,51,52,59,58,33,31,8,15,33,33,6,24,63,39,10,26,54,21,28,33,69,50,71,67,63,85,112,99,79,72,42,68,70,65,78,72,50,48,20,26,69,72,34,52,45,35,45,36,56,103,138,156,126,92,53,15,189,229,228,222,153,66,4,27,18,40,24,6,33,4,25,0,19,36,1,22,18,16,17,9,10,6,23,24,23,18,225,165,91,91,48,69,121,141,202,192,197,246,199,186,225,210,240,240,193,109,95,224,203,87,62,64,70,138,81,83,67,72,81,29,56,60,77,119,121,82,60,51,63,58,17,29,32,25,14,54,23,41,63,86,113,120,131,115,117,155,189,158,147,173,138,126,163,118,50,50,42,70,124,83,70,71,95,146,217,87,35,70,78,87,96,79,88,76,79,88,106,99,75,55,95,153,158,114,132,140,128,146,116,98,64,72,78,93,111,110,68,106,85,58,88,93,125,105,94,114,112,65,86,74,117,192,227,146,2,90,95,142,163,155,147,101,42,142,181,184,130,168,253,250,255,240,215,218,196,148,149,130,87,31,32,16,13,25,37,63,43,35,60,54,41,32,40,51,48,23,49,30,84,126,94,33,11,19,6,22,59,74,102,80,44,39,36,36,63,128,159,187,205,231,245,252,248,236,253,254,236,243,231,255,243,249,243,227,233,197,186,208,157,134,154,97,128,108,29,1,7,26,84,68,43,53,92,101,85,91,42,33,44,33,16,15,25,34,22,63,77,122,176,117,122,193,121,61,52,34,148,232,226,239,200,169,141,102,128,86,94,90,37,67,40,74,96,81,118,106,123,126,135,129,99,125,45,24,55,58,96,65,66,70,50,31,48,124,79,29,26,80,83,80,54,67,52,83,36,70,41,69,90,102,17,32,85,51,131,129,152,160,142,148,126,137,81,65,124,106,86,114,97,109,63,54,41,56,94,59,89,89,97,71,103,100,58,32,25,26,16,25,31,49,62,76,55,28,80,111,68,8,24,22,68,58,45,46,19,39,49,64,66,64,81,68,65,59,93,80,66,28,20,9,19,5,7,46,53,85,104,89,107,100,85,82,107,71,92,60,91,57,85,78,86,80,47,72,103,171,187,102,52,5,78,51,88,63,74,50,44,40,36,17,3,16,11,24,8,36,32,22,34,37,47,14,22,32,5,33,13,37,20,21,30,32,17,18,43,16,4,17,8,32,54,29,19,23,12,12,30,26,6,35,25,24,1,9,7,25,33,29,4,32,50,15,26,18,34,5,31,16,22,32,5,32,57,96,59,25,7,13,18,8,20,10,26,16,23,26,45,39,10,25,24,39,10,16,27,14,16,33,15,36,14,36,27,25,42,29,16,23,25,7,8,35,24,20,23,23,31,35,5,23,33,39,14,37,4,24,33,44,43,16,28,39,19,24,18,7,3,48,21,3,8,33,20,5,19,31,33,15,41,18,20,12,47,74,90,58,12,8,35,18,36,30,9,46,56,28,36,18,8,55,36,3,18,34,50,12,19,29,25,26,30,42,41,18,18,25,7,13,56,7,51,25,9,30,24,22,21,5,4,24,11,26,33,27,21,21,39,12,27,11,14,41,8,19,18,34,25,31,8,10,33,23,7,22,12,24,49,58,60,78,39,60,25,14,19,13,34,17,38,37,20,6,82,67,29,50,38,25,24,31,51,26,65,76,49,88,64,28,50,57,11,38,77,35,56,63,50,65,68,111,89,91,94,72,42,34,34,89,135,201,230,222,230,216,129,96,231,230,245,236,177,82,4,3,16,21,10,17,22,20,5,4,33,12,0,0,0,24,32,15,17,24,21,5,9,11,251,221,190,140,68,92,131,161,159,139,160,216,159,113,150,194,227,202,113,98,82,194,179,117,106,77,106,129,113,148,150,152,118,111,112,131,123,119,95,72,77,65,48,79,74,59,64,74,69,86,75,40,71,79,76,88,139,169,199,167,145,112,87,127,89,86,98,126,101,97,55,43,84,72,72,63,65,85,106,35,66,104,99,134,153,114,48,62,73,83,88,72,76,58,68,134,135,99,119,169,123,130,113,131,111,82,79,104,110,106,98,128,64,45,79,112,160,95,63,26,25,63,66,59,40,50,46,8,19,6,24,55,156,103,80,7,24,32,26,22,20,25,71,120,86,106,140,178,145,96,86,124,94,17,34,17,10,27,37,39,33,38,31,25,30,55,46,66,47,39,19,20,88,155,122,114,94,90,95,115,145,154,189,228,224,229,252,250,247,254,254,239,248,255,249,244,239,250,245,237,219,182,157,157,142,109,98,65,41,6,21,3,18,51,82,42,71,89,14,9,53,65,79,14,38,39,49,61,58,65,58,53,64,74,27,40,7,63,109,103,138,163,166,151,131,149,150,61,52,17,94,142,124,109,66,70,66,43,81,77,69,115,101,85,73,100,148,180,172,193,197,174,130,130,154,114,66,74,92,33,75,62,44,74,41,70,21,37,56,75,39,75,77,52,80,60,89,105,118,106,95,105,125,113,61,44,93,46,53,100,140,106,130,113,125,155,95,86,137,115,83,84,76,72,55,53,58,58,101,80,72,68,86,83,54,45,38,50,34,61,40,27,65,78,139,156,118,137,139,122,149,57,69,62,65,63,31,31,52,54,57,71,64,68,64,89,103,101,78,44,28,14,45,19,31,50,49,83,85,102,110,103,98,84,55,81,79,68,70,89,61,73,65,82,88,52,65,59,65,105,175,166,117,55,35,61,83,42,45,44,17,33,16,14,43,17,36,25,27,18,26,23,11,7,17,3,12,21,18,15,22,42,24,41,35,20,30,2,14,44,33,25,9,44,9,31,21,27,21,9,20,30,14,18,17,15,25,20,19,19,9,24,39,12,23,23,25,18,10,38,33,37,3,33,17,17,46,77,92,16,12,4,7,16,49,30,16,19,23,45,53,33,40,25,15,14,39,7,35,17,22,7,49,24,15,18,13,22,31,38,17,31,13,27,36,11,14,13,11,17,9,49,25,34,26,23,39,11,23,33,31,44,58,37,32,41,23,10,41,22,18,26,29,18,44,31,10,31,25,18,22,35,31,12,10,28,32,44,84,105,52,14,17,12,23,1,14,23,25,37,53,22,18,27,34,52,19,18,18,14,43,8,25,23,33,3,17,26,11,14,45,1,21,11,17,20,10,32,8,4,27,35,22,21,23,35,35,14,13,37,6,47,2,23,19,19,47,32,29,4,20,36,29,21,40,16,25,14,7,7,32,37,63,64,42,31,21,11,21,30,59,20,15,19,11,15,66,76,55,75,27,25,32,39,31,61,31,84,73,69,50,66,51,61,29,56,53,49,85,93,73,96,148,216,212,238,209,208,189,168,152,173,187,240,207,225,202,183,172,149,239,240,247,222,179,132,22,11,34,5,27,15,0,32,9,8,8,38,26,9,2,29,9,5,5,14,19,42,10,38,254,240,246,234,147,130,145,170,171,112,125,165,152,105,146,160,151,132,127,131,146,151,152,123,133,133,145,138,122,171,166,161,191,211,235,235,212,216,190,227,196,205,185,185,198,193,229,230,224,214,201,212,199,194,193,197,207,206,204,191,161,162,176,189,129,73,51,82,80,87,39,50,88,58,79,91,82,112,145,95,86,121,141,146,135,120,148,100,40,54,58,63,78,64,59,121,86,103,123,131,100,102,130,153,133,111,110,89,96,79,43,72,42,46,35,49,115,26,52,44,57,90,38,102,63,53,19,23,22,40,32,21,43,25,41,28,52,14,18,37,15,19,31,23,30,53,74,67,47,62,56,126,73,12,22,9,29,17,9,38,16,12,20,28,40,44,57,78,28,26,31,105,165,227,207,213,241,234,251,240,237,246,226,240,241,242,231,242,236,199,211,219,159,174,125,128,84,108,110,72,36,15,8,30,23,32,11,22,33,37,7,8,31,8,31,9,7,19,7,9,24,12,29,67,56,86,86,106,93,101,84,69,70,54,20,13,4,0,20,27,26,53,40,13,30,19,28,32,36,10,31,62,63,45,39,90,144,137,179,148,137,133,99,71,67,58,66,65,90,72,77,77,68,83,57,54,40,70,49,27,55,66,92,30,106,70,11,66,157,64,58,105,130,63,48,41,66,32,64,61,47,37,23,48,78,57,39,47,30,70,113,112,105,121,147,178,93,87,78,66,62,68,94,72,57,66,86,70,103,99,102,79,58,40,24,32,36,26,38,38,64,66,100,143,131,140,156,139,171,132,135,128,84,99,80,74,49,31,35,40,42,66,68,88,67,89,53,47,38,32,37,29,24,29,33,60,64,98,90,101,83,103,114,79,49,74,86,64,91,60,73,53,69,38,72,80,82,70,42,40,104,167,188,144,97,56,28,60,24,6,27,12,23,39,18,44,24,14,25,27,22,37,26,33,36,19,18,31,12,8,50,28,38,29,3,11,26,26,17,34,19,33,32,9,17,24,15,35,35,42,27,34,25,15,14,43,24,26,10,31,23,30,18,13,22,22,15,14,18,15,40,1,12,9,32,28,25,79,99,32,6,23,3,23,10,25,16,13,23,13,34,16,17,24,39,21,25,14,25,4,16,15,41,24,24,19,11,27,0,40,4,28,11,17,16,40,28,17,20,4,12,29,54,21,27,18,39,24,46,32,41,35,33,24,40,18,36,14,12,27,18,14,4,18,32,10,25,29,24,22,36,27,26,19,25,41,40,34,63,88,64,41,7,15,24,3,18,31,42,48,36,35,12,16,6,26,21,26,36,28,21,41,18,31,28,5,5,31,21,25,14,17,5,9,18,23,26,32,28,44,16,14,23,22,13,26,13,16,9,17,0,9,29,12,39,25,38,47,33,44,20,29,14,25,26,16,50,31,29,24,65,65,101,96,97,121,132,167,167,180,169,200,169,170,139,163,195,183,161,165,112,92,125,131,136,69,88,91,78,93,80,67,63,42,35,45,57,61,47,68,48,131,165,218,241,246,243,248,243,225,225,214,234,240,224,185,148,144,139,133,215,238,242,231,219,95,17,9,8,14,36,11,16,20,19,11,14,26,16,0,8,26,3,31,17,11,9,14,10,23,250,252,240,247,165,132,130,153,141,133,137,157,125,169,173,138,130,134,104,160,142,173,191,165,193,192,214,181,180,224,225,233,232,241,254,239,232,251,239,255,235,250,230,251,251,253,245,255,249,234,244,249,246,254,255,249,246,246,243,252,246,253,233,246,254,108,100,59,86,113,74,122,114,141,157,188,134,205,216,111,121,208,201,207,188,180,126,104,60,63,44,65,80,52,64,88,87,125,135,163,113,93,122,151,183,126,111,80,43,45,67,137,87,72,59,53,73,63,131,116,164,196,171,226,251,245,243,220,235,213,235,226,232,215,214,247,240,240,239,227,230,207,230,145,177,165,180,208,180,203,181,219,190,227,229,216,240,230,229,222,232,223,137,234,218,142,248,250,240,247,244,225,247,246,253,241,243,248,252,246,238,231,244,193,180,184,182,183,151,38,79,126,144,125,108,72,63,24,23,34,3,12,6,15,21,42,50,66,55,64,77,81,91,129,72,98,78,72,31,1,28,52,92,125,174,101,109,85,71,21,53,18,40,20,39,19,37,41,24,27,49,33,38,33,40,99,98,50,25,22,124,149,129,114,27,69,71,103,110,68,63,53,2,19,32,3,18,8,16,29,8,16,100,42,43,92,41,39,47,35,46,78,161,152,169,88,29,114,140,103,93,84,77,50,47,45,37,37,49,60,32,33,43,45,49,38,75,23,51,80,70,79,89,92,107,141,72,63,98,82,89,94,116,82,57,80,86,107,101,103,72,52,52,25,34,50,28,41,66,75,123,111,116,148,128,150,129,116,105,122,86,75,61,68,71,76,41,23,15,56,41,67,80,90,64,48,19,21,17,20,18,31,50,77,91,92,86,85,92,82,95,86,90,82,94,77,85,124,74,72,57,53,73,74,80,71,65,76,41,58,46,94,187,208,146,46,31,37,16,17,19,52,21,13,10,23,34,30,26,34,26,21,13,29,14,22,23,35,16,26,21,28,13,29,39,6,26,30,18,36,35,7,27,12,29,40,31,17,14,30,16,28,30,28,60,30,37,3,44,29,12,21,28,14,14,10,18,31,35,33,6,38,4,28,34,32,14,87,77,8,15,8,23,11,5,9,40,22,21,22,14,42,32,33,30,29,13,25,18,31,22,11,34,15,5,39,9,15,19,17,19,7,20,3,30,29,12,14,11,14,27,30,24,45,38,34,42,29,23,36,12,40,21,17,22,43,22,21,24,28,12,32,17,20,43,35,30,19,23,32,43,27,15,17,8,31,26,42,38,45,113,86,48,23,23,30,18,64,13,13,48,34,39,18,26,17,29,49,25,22,28,4,12,40,23,13,4,0,31,16,32,0,41,32,18,46,11,9,35,41,18,12,22,10,38,17,39,9,29,10,12,31,32,12,18,14,47,10,32,37,25,36,18,22,8,28,31,12,29,19,43,64,69,100,219,236,242,232,247,247,249,243,229,252,246,243,250,253,254,243,230,220,239,239,229,162,97,125,116,84,86,76,46,56,41,29,73,56,81,57,81,136,185,171,163,237,233,241,240,233,239,249,243,250,205,175,176,168,151,141,227,224,212,246,245,111,0,2,13,34,13,26,16,32,12,25,35,11,8,8,48,6,14,6,18,18,18,14,12,20,203,244,236,235,145,111,159,120,155,115,156,167,152,160,123,165,157,147,159,174,220,246,237,234,249,242,242,243,255,239,252,255,240,255,254,250,252,249,232,252,254,249,255,245,251,245,236,244,241,254,245,255,242,252,255,241,255,252,237,235,254,228,242,253,241,187,126,133,135,150,141,154,183,160,195,227,201,164,158,102,144,226,225,234,216,183,124,78,80,78,56,74,93,83,92,150,128,117,120,165,115,123,90,92,160,124,88,75,83,57,99,188,182,128,150,103,150,92,149,183,195,227,203,240,252,247,211,250,244,252,226,244,248,232,251,239,246,245,237,242,246,234,248,246,236,239,255,246,252,248,243,216,246,243,243,251,254,254,255,242,253,255,255,255,246,252,244,240,242,243,252,251,247,250,240,244,252,239,255,250,253,242,222,253,251,221,234,251,255,67,83,224,238,235,162,136,67,52,99,94,144,186,194,250,238,229,252,254,237,251,253,235,250,238,251,245,236,237,207,78,90,236,225,167,115,50,88,75,53,55,88,134,175,217,242,231,226,250,247,252,240,240,250,218,247,245,250,221,96,22,102,160,108,58,15,4,13,39,42,76,86,114,173,148,178,167,164,159,157,175,133,96,113,117,128,124,50,39,50,51,38,77,131,135,126,60,49,62,80,22,22,47,26,55,53,79,43,84,103,96,103,122,114,108,76,18,75,62,43,73,62,71,79,77,94,97,77,51,60,72,93,81,108,74,71,78,90,128,83,60,13,64,40,52,29,65,53,102,81,97,116,135,120,107,115,120,126,111,78,59,75,59,67,94,76,52,71,61,54,46,37,77,77,23,39,4,49,21,13,19,51,86,93,77,82,81,90,74,76,73,55,93,68,71,78,99,69,72,62,69,61,64,75,79,89,57,70,66,81,51,65,47,87,170,166,58,32,37,11,24,21,5,13,13,30,19,37,29,15,14,13,27,34,46,40,23,35,16,36,19,36,30,19,24,35,21,12,33,8,29,47,37,9,21,22,30,31,22,17,20,38,15,24,23,29,29,16,23,3,28,13,37,27,26,13,38,16,31,29,10,16,11,9,19,5,14,12,28,96,28,21,39,21,22,21,15,43,37,29,24,50,23,44,15,14,19,24,14,15,18,9,30,19,26,7,24,16,20,5,24,26,16,9,23,28,22,26,29,9,25,22,23,34,23,38,22,46,25,29,35,41,20,29,18,45,55,51,35,22,35,17,34,14,2,8,49,35,44,33,16,29,31,12,21,2,28,5,26,42,17,62,85,90,46,22,20,18,11,27,52,45,0,29,9,11,25,11,9,20,0,37,24,14,14,12,21,20,14,45,28,24,23,30,22,20,30,26,21,41,1,2,27,24,22,20,13,18,33,20,10,15,22,19,24,25,8,21,23,28,9,33,3,37,26,24,21,35,11,41,25,22,30,31,109,245,246,248,236,255,252,244,255,247,233,246,247,247,248,230,250,245,250,245,250,224,183,141,134,78,70,20,18,43,65,68,39,66,57,89,75,108,136,97,62,91,135,190,198,191,210,240,233,235,240,228,230,231,223,206,234,248,234,243,233,237,105,7,2,33,16,6,22,21,15,32,9,25,23,26,1,5,11,11,33,16,15,16,19,13,4,142,159,185,168,126,137,136,137,145,148,149,155,175,169,138,152,190,205,229,252,243,236,215,208,210,246,240,240,233,221,206,196,202,205,207,220,206,206,219,205,214,223,160,210,211,221,226,199,211,201,175,179,188,171,190,198,193,211,204,203,191,205,214,204,165,143,141,112,163,164,150,143,146,127,157,151,140,108,80,54,122,132,121,150,127,82,67,9,110,104,121,46,80,82,122,114,119,73,80,74,128,98,26,54,93,81,87,55,46,81,50,173,135,91,118,83,130,127,109,127,142,122,143,219,242,226,219,150,233,179,158,208,222,185,242,236,233,249,231,238,252,236,254,243,252,248,245,253,252,242,255,254,252,253,242,251,253,249,224,249,246,255,255,238,249,253,231,234,253,240,249,237,245,255,243,251,244,254,244,249,249,225,239,246,241,254,240,219,191,34,45,177,190,212,193,217,235,251,239,252,243,245,246,229,237,238,237,238,238,252,233,223,249,234,252,247,224,220,156,27,78,189,156,186,165,196,238,255,244,255,250,239,254,255,251,248,243,247,239,255,255,253,249,239,236,242,252,124,16,13,89,135,125,140,172,203,249,251,244,242,237,250,243,247,247,250,238,226,248,255,246,186,175,157,115,81,65,37,56,4,28,42,74,45,71,34,50,77,73,107,79,84,76,127,129,108,104,138,137,147,137,152,82,93,66,30,83,59,35,66,67,82,103,109,103,62,90,79,75,94,98,74,71,87,82,67,62,49,34,18,16,28,23,53,70,68,107,105,123,96,106,115,116,122,110,83,76,67,35,18,83,62,58,86,60,60,41,39,41,41,52,55,30,57,7,39,24,19,28,58,78,90,87,101,76,88,70,97,71,36,76,76,69,62,53,62,36,67,65,69,71,41,50,85,64,60,62,78,79,101,64,44,44,42,60,52,40,34,19,18,17,27,19,29,14,26,28,5,44,38,33,12,37,8,24,17,14,45,16,16,17,19,3,28,37,23,24,35,19,19,37,0,34,27,4,14,34,38,15,31,20,31,6,32,31,18,18,21,49,27,39,26,13,28,14,30,20,27,20,21,38,1,6,31,20,21,17,70,81,53,23,26,29,24,14,53,14,43,9,43,32,7,16,36,52,10,41,31,25,30,25,26,21,21,13,18,18,9,25,21,46,14,35,24,50,6,33,26,29,36,30,23,20,19,35,27,46,21,38,15,35,28,35,24,31,11,48,19,26,26,14,29,19,23,25,29,14,30,14,8,43,18,31,26,29,57,36,29,21,18,13,87,97,84,30,18,28,28,18,46,25,27,33,4,23,12,22,35,32,8,11,27,8,30,13,24,9,9,52,14,13,37,26,14,22,25,20,9,35,17,53,34,32,26,11,16,2,25,22,32,36,20,32,4,9,31,42,12,28,34,9,47,29,28,35,40,30,20,17,46,11,14,29,106,215,234,252,233,217,217,235,223,229,203,239,220,232,227,237,202,224,235,253,246,220,191,135,100,28,53,73,21,35,61,79,56,49,74,97,112,112,128,85,65,40,79,147,166,220,220,241,246,243,246,254,253,247,238,229,225,249,228,220,244,237,109,16,17,9,12,25,6,27,0,17,25,30,3,15,0,15,8,13,46,23,12,6,9,21,18,236,231,214,173,151,131,165,153,185,164,145,145,175,157,184,210,228,250,246,250,195,135,113,16,61,148,109,104,55,53,38,2,0,0,5,16,22,23,25,39,20,15,45,24,31,30,25,41,24,25,56,52,41,1,22,11,53,102,99,54,9,19,81,64,76,92,76,96,79,62,54,52,83,70,81,87,43,42,57,49,52,26,27,58,23,23,53,47,81,56,97,82,77,87,127,127,103,100,96,109,76,68,71,72,62,64,96,62,74,40,71,92,32,63,44,44,76,66,77,70,51,55,42,82,91,24,10,38,41,30,7,81,107,104,151,146,168,159,189,182,215,234,244,245,246,248,228,235,232,242,223,209,192,183,161,181,190,192,128,83,119,154,183,172,170,131,152,199,212,207,193,203,201,200,173,172,126,110,170,108,108,79,107,109,64,75,60,72,94,26,29,63,131,211,227,241,238,243,251,245,240,250,224,236,225,237,209,184,163,178,148,114,113,91,93,71,58,61,24,12,70,216,245,247,247,249,236,251,250,249,254,237,239,240,227,240,247,224,213,205,153,153,138,117,44,37,66,51,35,2,92,237,249,237,241,249,248,245,248,232,245,248,246,246,235,246,246,230,217,212,193,89,41,65,73,86,46,44,35,63,88,140,69,60,53,60,71,129,78,84,96,91,85,74,37,64,62,33,35,52,56,60,35,45,70,38,76,70,52,93,132,119,124,141,104,74,81,75,119,118,143,124,96,95,43,47,32,13,8,30,56,54,60,90,95,120,90,123,115,98,91,101,91,92,52,69,54,61,61,51,60,67,63,83,59,79,51,45,59,52,29,54,38,30,56,49,73,92,93,128,100,106,109,105,111,95,78,79,50,93,76,54,76,68,23,40,67,61,48,67,71,72,55,104,98,85,85,94,68,29,43,10,7,3,20,21,29,38,18,24,24,4,34,25,19,48,36,14,19,47,23,50,43,36,37,13,32,29,23,29,16,50,11,13,29,27,11,21,40,12,12,34,20,11,26,11,37,15,36,23,15,29,6,22,18,34,16,21,31,28,35,27,21,14,32,11,11,31,13,29,19,12,33,33,49,44,18,64,88,68,51,39,28,55,19,3,38,12,24,25,30,41,29,24,40,29,29,30,17,29,23,42,27,34,41,18,18,20,27,15,13,10,4,3,32,9,7,9,7,33,14,44,31,52,17,33,7,16,14,26,36,25,39,32,22,20,23,3,36,34,23,4,9,38,17,25,2,28,11,13,47,23,36,18,31,20,43,28,24,34,3,60,90,106,86,49,7,9,36,52,42,31,9,19,21,21,25,30,6,30,19,6,21,14,11,27,15,30,18,32,20,36,26,41,9,20,9,32,8,32,20,21,39,18,11,33,18,30,19,57,8,26,29,9,23,16,25,20,29,24,19,31,14,41,23,26,29,26,30,30,26,37,8,146,194,205,202,190,185,206,193,203,185,224,224,222,209,199,202,209,200,188,217,177,169,155,143,124,138,152,143,138,73,78,68,46,80,80,88,92,97,100,106,89,71,106,227,224,239,246,248,246,247,243,250,224,246,255,250,251,255,246,203,243,245,136,0,10,7,5,32,3,20,30,1,26,31,26,5,1,4,17,19,8,15,7,16,12,4,10,194,241,250,251,242,185,203,211,235,194,123,133,171,144,198,254,232,226,221,237,134,96,82,38,18,56,41,23,26,9,22,36,19,11,14,21,16,22,10,30,35,14,21,4,20,25,5,22,23,65,61,87,39,2,17,14,56,60,58,34,37,21,62,58,64,79,130,102,80,54,31,43,88,76,54,21,38,27,58,36,55,47,72,83,65,57,31,64,61,71,51,75,60,63,90,112,100,81,64,60,67,79,74,71,90,54,102,73,41,40,51,65,23,18,34,7,93,37,65,38,43,17,25,47,46,10,54,30,73,38,21,122,135,190,229,225,254,245,252,246,234,239,202,214,211,182,169,152,121,109,120,91,102,108,104,143,115,105,49,15,19,91,95,101,32,4,52,54,27,22,47,42,23,34,6,25,4,32,11,31,12,19,19,54,54,61,79,113,110,25,22,11,34,105,105,90,80,110,112,76,89,50,43,47,49,32,25,6,19,3,11,17,60,43,54,9,31,105,39,6,39,130,158,177,158,178,134,125,136,136,97,95,49,63,15,34,20,10,9,20,7,20,41,50,9,38,105,61,34,46,77,210,204,165,168,170,199,147,161,157,87,151,97,119,117,52,52,30,27,17,39,6,9,0,95,136,80,50,17,43,99,150,85,61,38,52,42,40,54,19,46,67,39,41,35,66,30,59,79,78,83,131,105,152,117,73,151,89,52,63,115,126,106,81,72,38,51,66,77,114,79,73,51,24,20,44,24,12,40,47,57,90,74,84,122,126,103,110,103,104,90,80,92,72,76,50,74,62,87,77,110,76,61,89,77,74,52,41,60,44,15,41,52,33,70,120,123,93,110,112,114,90,102,101,82,69,101,75,64,83,70,84,67,47,63,64,52,34,69,62,66,62,59,83,74,76,76,49,35,2,12,31,23,36,2,18,32,66,7,34,21,27,22,10,14,41,17,36,21,7,18,24,26,26,15,35,33,24,22,30,28,5,10,29,18,51,36,20,20,28,24,10,51,38,55,18,33,31,16,21,2,16,19,56,48,9,27,17,34,9,33,27,27,7,31,33,10,27,26,27,25,42,27,24,16,19,11,36,91,88,34,54,32,7,16,29,27,27,47,18,30,32,11,4,18,0,1,16,24,20,46,15,5,20,0,34,34,22,20,6,19,4,7,27,21,24,23,11,41,10,42,38,36,43,36,16,67,26,12,41,16,37,56,26,37,14,20,25,43,22,23,36,34,25,6,12,17,30,15,31,32,8,31,34,27,5,14,8,33,48,29,33,43,86,112,68,25,14,35,32,48,25,25,14,20,30,33,38,19,28,19,16,21,42,38,9,38,0,29,32,39,18,18,15,19,14,40,25,9,39,47,28,25,2,6,14,16,0,35,5,26,26,5,30,11,25,44,8,41,15,38,17,23,26,19,26,30,20,23,16,12,14,41,131,186,165,161,169,159,168,152,177,175,172,176,174,203,197,173,148,154,125,149,141,137,139,126,96,153,176,212,133,84,60,37,21,42,94,74,50,71,93,118,77,97,201,247,235,249,221,222,243,250,252,236,246,247,233,234,252,232,192,209,240,221,120,3,1,0,21,2,5,19,6,34,31,26,17,0,6,12,24,20,10,6,1,17,22,25,15,237,222,255,252,255,237,252,228,225,161,113,151,129,189,216,255,226,186,163,190,181,207,191,134,146,153,89,108,127,151,94,93,81,80,90,109,106,81,85,106,85,82,98,115,125,116,119,84,139,153,162,184,107,69,95,72,83,103,104,67,34,60,105,81,105,103,128,114,89,89,73,68,63,78,84,84,69,70,68,87,69,36,71,95,91,57,63,61,73,48,58,60,75,56,68,82,93,60,67,25,18,40,50,53,34,47,42,50,14,13,33,52,42,38,28,52,122,91,130,133,135,161,138,154,153,124,119,122,193,124,133,194,233,227,236,232,225,181,169,164,116,123,143,128,106,118,122,97,82,75,86,71,68,60,75,60,65,51,30,13,35,58,47,38,12,22,16,26,27,14,13,24,57,45,72,86,97,84,93,101,81,91,109,126,111,89,88,108,136,48,17,17,45,39,26,30,16,17,10,17,6,10,46,22,43,68,96,61,70,100,98,97,88,140,101,82,73,128,87,38,5,54,24,16,19,11,3,7,12,28,21,31,28,44,35,38,52,48,56,48,9,50,131,138,75,29,105,112,55,45,11,53,48,48,53,5,13,15,34,23,28,7,2,14,29,3,23,22,36,16,25,41,58,47,62,129,50,22,63,60,38,65,64,64,63,42,55,54,73,47,69,82,94,98,123,136,158,170,162,139,129,135,143,125,120,100,81,36,37,52,57,72,27,49,57,47,51,64,59,33,45,4,31,10,51,14,12,59,67,65,88,82,114,123,88,112,114,120,120,119,83,62,56,57,58,67,59,72,45,75,103,85,82,80,78,60,25,41,41,28,35,59,45,38,81,114,131,109,117,127,76,101,68,80,57,80,75,74,110,68,77,60,54,29,72,37,45,76,93,93,82,57,87,83,68,31,41,18,9,9,53,18,32,2,5,54,34,29,19,16,23,21,0,33,18,15,31,6,33,10,16,53,27,54,39,42,15,13,22,23,19,23,17,19,4,16,15,27,3,16,23,26,16,40,19,14,47,31,35,17,28,41,24,3,35,42,41,15,7,12,4,39,9,13,16,37,46,10,48,14,22,5,16,21,13,16,39,23,31,75,47,42,7,28,13,15,29,12,52,14,17,16,32,20,39,64,37,32,39,39,32,9,48,7,40,27,10,8,20,18,22,11,6,42,24,12,30,7,29,27,43,19,27,40,24,9,0,36,12,16,24,9,41,47,29,42,22,25,29,21,32,28,16,26,28,7,6,12,23,11,6,23,1,2,7,27,47,8,63,23,20,21,29,36,84,81,46,28,17,41,8,15,28,12,15,24,23,30,21,10,24,33,21,33,19,34,25,13,12,10,6,12,54,35,25,19,8,19,31,25,26,27,11,7,20,16,15,23,11,38,28,4,18,14,7,46,24,28,30,14,13,12,36,31,42,21,20,18,13,15,16,4,50,109,101,148,115,98,133,142,122,119,133,129,142,142,126,136,133,146,103,104,98,133,87,98,69,25,86,112,116,88,82,88,62,43,37,12,8,6,27,61,82,71,104,205,216,217,216,222,220,232,252,251,226,251,236,233,253,249,205,182,187,212,192,103,18,4,16,34,23,4,23,11,17,12,8,5,5,3,32,0,12,9,14,29,30,5,24,26,255,252,254,253,255,251,238,243,238,157,115,152,135,194,242,244,235,179,128,221,251,236,241,231,192,242,190,184,236,220,189,133,173,169,176,236,248,228,223,216,163,247,247,248,239,239,252,252,252,233,233,245,181,140,178,169,121,132,115,88,47,96,108,70,94,123,141,139,135,103,62,76,61,72,60,69,80,76,67,68,49,61,50,113,90,39,38,62,76,71,38,35,16,25,61,90,77,61,57,25,24,30,23,38,43,70,117,121,137,97,98,151,141,151,91,127,237,189,200,220,207,190,155,220,190,135,125,92,123,94,124,122,176,171,148,143,135,53,42,56,16,96,78,88,86,88,62,91,84,87,78,102,62,50,18,18,24,22,23,30,25,86,74,100,64,20,104,135,129,131,131,150,143,143,150,151,157,168,155,147,134,88,95,58,24,38,31,36,46,33,48,35,29,105,116,138,132,134,118,121,128,142,137,172,164,171,189,158,133,119,99,81,73,76,48,46,57,62,62,53,14,57,64,65,90,77,114,91,127,144,105,120,142,139,149,162,117,124,107,84,30,30,69,65,48,24,105,59,48,38,20,45,52,70,64,76,47,59,9,41,4,20,5,17,53,23,72,42,80,50,40,23,76,35,54,39,58,36,31,47,35,42,49,85,126,140,156,163,144,153,150,153,147,136,127,106,95,115,59,75,35,24,29,30,18,15,28,30,25,37,59,56,53,40,38,23,58,43,38,37,37,16,16,19,60,42,53,91,112,117,113,100,119,121,104,97,105,122,88,68,59,37,66,59,61,63,49,79,85,72,94,98,77,59,50,19,24,26,13,48,37,78,64,76,116,130,89,69,73,95,78,83,81,91,61,67,92,74,60,48,45,64,56,67,56,58,65,64,72,60,66,83,45,31,34,37,11,47,28,11,8,33,16,36,61,48,64,35,17,20,45,33,46,16,9,29,26,22,13,40,34,31,27,13,21,0,48,19,30,32,21,28,16,21,20,8,15,29,37,35,33,21,6,38,18,26,14,38,11,26,37,29,8,12,30,24,14,55,25,12,37,32,10,14,5,25,38,25,13,15,31,20,48,16,1,43,30,28,43,88,81,42,16,23,50,24,43,14,16,51,9,10,9,23,20,15,15,51,18,23,37,28,15,51,21,32,18,14,7,10,8,47,28,21,9,20,19,20,16,19,11,11,31,40,19,46,52,26,25,40,27,23,26,38,26,22,19,16,26,16,29,45,4,7,16,4,10,17,29,6,25,34,21,25,10,32,28,10,29,33,24,23,13,6,67,119,84,65,80,37,16,22,6,18,23,21,12,28,28,13,24,35,24,36,44,25,16,18,32,33,37,7,27,20,33,22,70,35,30,31,14,22,12,5,26,23,18,10,28,28,16,30,10,28,34,7,20,2,39,40,41,22,20,0,32,27,35,22,18,22,24,36,87,124,111,148,119,112,122,142,101,122,117,119,140,126,103,125,127,121,134,105,135,128,113,121,95,116,90,100,118,125,106,138,111,72,46,76,105,84,78,92,73,87,155,232,237,217,235,244,236,249,243,245,239,240,233,245,220,249,215,211,198,216,205,110,2,1,12,25,8,14,6,24,22,14,10,16,20,7,21,8,0,25,7,0,15,15,25,18,200,151,156,190,184,229,244,241,219,131,115,123,122,144,173,207,202,142,96,111,187,213,213,106,92,168,109,105,141,159,99,102,155,144,141,212,194,190,191,179,176,193,203,237,197,154,225,228,241,247,243,238,96,122,158,135,116,141,120,50,12,87,81,73,38,76,124,121,89,99,89,44,51,57,62,69,79,59,33,37,18,57,46,5,7,49,42,33,47,62,50,29,24,28,102,104,122,152,133,180,161,133,133,104,137,205,211,230,212,171,143,162,124,169,112,113,205,151,119,134,141,124,87,102,52,74,57,17,38,22,48,132,42,92,99,115,103,27,29,18,25,50,64,4,15,30,55,41,65,71,80,124,99,105,115,125,147,132,88,47,54,151,171,191,166,57,136,179,169,150,125,108,90,95,89,69,73,72,92,98,97,103,109,91,121,63,100,98,117,100,49,24,83,189,205,214,184,188,167,141,154,127,110,132,105,105,96,91,71,77,75,45,55,56,80,49,48,98,38,57,1,93,169,192,175,149,140,152,130,138,142,149,114,112,103,96,87,53,60,46,20,41,77,105,55,62,163,136,53,5,33,144,208,191,116,119,135,96,87,72,67,64,50,50,55,52,43,73,40,57,49,37,60,81,108,114,109,126,139,49,90,111,138,152,129,167,146,98,100,82,70,46,60,33,41,37,27,29,18,30,42,21,31,2,20,17,14,14,39,38,53,74,79,62,11,23,10,29,40,12,0,35,38,70,93,73,101,121,77,107,106,110,114,127,87,105,109,69,72,70,53,62,49,60,58,83,70,80,81,87,142,84,37,11,9,22,10,25,40,57,76,80,134,96,103,96,98,114,64,80,86,85,64,72,87,77,80,64,68,38,55,69,47,53,67,69,59,75,77,64,62,48,41,31,36,15,32,16,32,17,28,33,20,28,26,56,37,34,38,66,97,69,36,43,12,27,36,37,34,28,21,36,8,26,21,13,31,23,26,33,12,21,43,44,1,17,13,9,18,27,23,22,4,28,23,36,19,38,29,32,22,25,37,26,29,31,25,12,24,10,49,29,45,33,12,19,12,43,18,4,15,25,24,23,35,18,49,17,0,83,70,20,20,38,15,10,7,39,28,24,25,12,51,10,44,48,14,28,41,16,3,42,29,24,34,10,27,35,16,23,11,13,29,11,11,16,14,11,17,25,29,32,51,23,13,25,36,21,52,45,22,48,27,41,24,22,29,28,22,36,29,25,11,12,40,27,20,9,28,14,20,5,12,42,14,16,31,2,26,27,44,15,5,20,18,72,93,108,77,47,24,26,11,17,21,6,24,10,26,35,20,17,20,6,40,14,33,9,3,32,10,23,19,5,37,17,14,19,29,23,16,18,31,35,8,19,37,22,9,16,26,12,33,3,6,22,40,22,18,2,20,24,33,31,19,14,24,22,17,14,41,16,61,114,102,117,134,134,183,149,132,142,164,208,185,172,180,166,169,211,199,172,189,193,188,201,229,209,210,227,189,179,208,195,202,158,185,193,213,196,189,193,175,168,205,229,213,245,246,247,243,236,249,250,250,249,252,233,242,239,228,218,215,222,208,90,18,0,0,13,15,11,5,16,13,30,3,16,13,11,16,14,30,9,2,40,12,37,3,16,69,94,96,132,201,200,179,190,215,139,99,139,111,93,20,15,31,53,25,19,35,20,8,23,45,19,2,50,59,45,41,36,40,9,45,51,32,30,47,35,58,15,47,40,74,25,30,55,53,105,121,116,28,49,49,41,117,132,194,95,56,135,124,84,72,55,88,67,68,86,96,119,57,68,71,77,94,67,52,56,59,77,75,40,43,70,92,55,68,69,126,145,126,111,171,198,187,254,237,236,193,188,145,154,201,235,240,217,232,205,186,166,63,111,49,68,140,108,68,61,78,72,87,99,83,65,22,27,23,35,63,60,49,43,64,55,75,1,30,40,14,68,67,73,69,41,106,77,126,126,128,154,179,158,124,147,195,179,118,36,23,148,146,166,77,51,126,84,93,67,37,40,17,23,36,20,49,71,119,128,118,161,140,156,163,159,166,130,143,105,28,36,67,150,134,122,85,79,46,43,42,35,49,49,3,35,30,70,72,80,122,130,149,135,155,105,106,152,68,51,34,98,99,149,121,83,87,48,24,31,30,12,9,29,11,36,64,96,97,74,8,124,164,169,81,61,175,161,93,25,18,103,129,124,65,26,39,10,30,58,27,54,64,57,68,86,142,191,152,151,162,176,165,153,173,148,151,192,171,68,52,68,53,54,41,42,28,36,19,20,54,15,18,21,44,36,29,28,24,30,3,25,29,7,42,36,31,42,36,35,37,99,70,39,33,20,16,24,35,30,22,50,34,59,66,74,69,92,127,94,109,111,118,96,110,72,65,47,56,72,70,70,76,45,72,75,77,76,67,62,48,50,29,8,1,12,24,56,73,57,79,116,71,126,119,100,102,79,68,98,61,99,104,51,44,75,61,68,48,51,80,91,87,49,79,41,71,79,64,68,44,24,15,48,19,2,19,40,7,28,33,22,63,80,43,26,41,67,55,65,141,73,52,4,25,4,29,12,31,36,20,27,29,8,42,50,42,27,19,26,46,44,3,26,35,17,14,37,35,31,39,9,9,13,30,32,32,20,28,18,26,9,39,21,25,45,44,12,19,34,22,8,39,30,35,20,11,54,14,5,19,24,44,35,29,34,39,30,35,75,83,54,44,20,7,24,25,19,23,36,49,29,44,5,40,42,23,37,11,35,18,50,39,14,13,16,15,16,13,25,13,29,36,17,16,24,7,21,24,20,14,42,50,57,18,5,23,21,26,39,17,45,46,20,25,30,16,36,20,25,14,35,14,12,29,35,22,0,55,12,10,44,34,27,35,29,27,52,27,17,30,15,23,19,15,36,37,109,91,40,37,15,25,41,29,22,29,30,13,24,45,15,4,14,2,30,18,15,38,39,29,17,15,27,34,4,0,9,13,15,29,28,3,34,16,19,51,4,40,40,25,51,33,25,8,23,33,30,20,31,12,16,23,19,22,23,36,23,29,35,24,29,43,92,32,69,81,84,135,85,98,149,159,158,144,139,119,159,163,188,208,202,222,223,220,249,251,237,245,248,235,211,214,207,214,188,217,226,201,201,201,207,178,165,169,193,236,214,234,226,197,187,201,218,215,214,214,210,226,242,241,241,197,202,189,108,32,5,14,12,15,2,20,12,21,31,0,2,13,22,8,23,25,37,33,15,14,59,13,17,180,187,186,171,148,140,114,156,224,159,122,133,123,91,32,14,58,90,51,30,62,35,19,19,63,67,49,76,91,79,38,24,44,40,31,44,47,32,22,31,14,69,21,49,33,28,19,28,11,39,59,88,61,91,121,94,157,188,247,175,122,164,222,199,141,75,62,57,64,106,131,197,116,89,86,70,108,44,73,68,107,168,214,170,131,185,184,146,113,88,137,224,241,175,187,182,126,192,166,156,75,79,92,86,160,195,157,147,197,244,241,180,83,88,102,141,222,214,112,84,91,56,95,123,73,80,89,60,53,53,54,58,49,55,59,76,47,35,72,62,50,117,130,169,133,127,147,153,121,110,132,119,93,107,131,116,97,92,39,24,38,49,24,34,31,13,92,115,104,94,133,118,123,137,135,137,121,129,140,137,156,138,142,133,130,112,98,80,50,33,29,62,39,48,50,29,45,56,72,106,126,114,96,110,141,133,137,126,154,173,168,179,156,148,124,81,49,76,38,89,28,29,72,32,42,27,50,63,94,99,82,110,103,127,106,133,165,179,175,129,77,95,130,133,68,19,55,72,33,87,6,33,48,96,71,60,89,129,117,109,119,128,141,149,172,167,191,199,149,130,137,146,131,80,85,52,46,78,61,30,30,33,16,10,37,7,21,2,31,35,42,7,29,10,34,8,11,27,38,34,35,47,19,28,30,34,43,31,41,9,60,111,55,46,27,16,31,6,39,36,64,71,50,49,72,51,78,73,95,74,88,73,68,76,46,76,76,52,46,72,66,78,61,86,53,81,66,74,52,20,22,5,26,15,21,37,51,76,84,68,81,128,79,98,74,90,78,75,77,84,98,77,80,62,55,42,50,79,63,39,86,77,73,60,92,83,60,62,38,13,31,13,15,15,16,9,46,41,39,44,39,27,30,55,31,17,37,29,23,40,82,130,58,34,3,17,33,14,10,38,9,46,23,42,46,24,22,53,51,32,21,40,30,36,9,49,35,46,16,9,8,41,34,51,20,4,9,27,17,13,22,28,2,30,27,24,23,34,18,33,29,25,20,26,5,26,38,35,18,42,11,26,6,19,18,54,29,12,30,61,94,71,16,0,21,49,30,19,36,37,18,42,14,40,48,41,38,18,11,18,30,37,23,43,27,13,32,3,31,19,3,12,18,16,35,25,14,28,26,25,28,35,54,28,34,21,36,20,37,14,27,16,18,49,35,27,15,35,21,15,28,29,30,30,18,13,18,18,28,21,10,24,3,18,33,11,3,25,22,50,23,33,15,27,31,19,38,50,121,85,49,25,24,50,34,29,52,18,11,33,25,38,40,24,8,3,28,22,5,19,24,30,16,8,27,36,23,21,17,27,18,28,32,13,27,27,25,50,33,29,40,23,36,51,31,19,26,11,8,17,17,22,37,36,31,18,19,7,7,41,43,19,55,112,52,26,25,47,53,27,40,56,68,79,63,38,87,83,110,113,131,124,167,178,149,224,206,184,181,188,169,121,138,167,194,175,175,154,145,150,128,164,140,98,117,118,127,122,133,106,107,90,104,83,120,133,149,128,152,153,177,162,168,155,140,97,31,2,8,5,6,8,12,2,31,12,48,2,28,13,17,13,21,17,22,0,5,5,22,4,252,234,232,180,140,130,151,186,234,166,143,165,151,121,12,18,80,135,86,36,74,41,30,59,110,39,75,84,120,94,70,72,82,53,79,68,60,34,53,55,66,80,66,95,115,94,77,48,74,87,140,147,99,160,120,129,201,233,247,145,85,187,229,229,129,98,99,122,114,109,142,156,94,119,134,126,124,69,65,92,142,186,255,175,179,248,232,214,115,42,103,162,188,139,123,60,45,105,42,39,32,27,69,80,60,58,68,81,165,166,201,152,88,115,81,120,198,195,99,73,84,126,120,126,132,110,107,117,67,7,62,158,137,145,135,137,123,64,107,75,121,164,129,128,138,119,87,70,37,29,31,12,30,22,84,73,93,96,108,74,20,105,134,147,94,46,127,175,163,180,185,80,139,168,128,126,112,98,46,60,17,54,49,75,60,60,82,92,110,99,56,4,43,92,151,146,146,143,184,201,174,196,177,176,146,134,107,60,91,111,105,80,88,64,64,44,44,48,60,47,38,52,101,106,130,141,147,157,180,204,174,187,159,131,123,102,110,99,125,75,36,74,54,24,32,44,51,76,105,67,52,41,108,182,164,146,192,182,170,152,159,155,117,73,105,85,78,49,34,46,2,25,31,23,49,26,24,28,28,25,44,24,25,24,25,3,37,22,29,8,36,35,17,27,21,13,52,27,51,39,21,25,50,11,23,37,64,27,16,25,26,24,31,23,42,14,3,42,40,97,66,62,78,80,88,71,70,76,73,73,69,81,75,43,47,58,37,48,65,84,44,83,74,75,81,68,43,50,38,25,38,19,21,25,82,58,87,98,81,93,86,85,102,84,84,75,96,66,64,78,71,73,46,34,53,55,48,56,56,83,92,54,52,79,61,48,65,32,11,21,2,16,14,21,40,36,28,49,49,19,32,34,51,27,67,36,35,39,24,18,34,112,95,60,26,14,13,20,29,18,7,27,29,36,25,24,14,28,28,35,16,11,13,44,13,11,8,16,9,17,40,32,48,16,13,39,33,43,1,33,36,32,28,40,30,36,39,11,10,57,11,45,19,25,26,12,40,29,28,19,39,26,11,36,54,25,37,23,23,50,81,62,34,20,36,16,41,36,23,25,26,15,34,16,22,30,15,28,28,17,56,25,23,3,13,39,44,24,27,32,48,22,33,47,16,28,26,39,30,53,38,18,15,41,33,25,47,38,26,44,14,18,5,25,65,30,26,24,22,15,26,9,30,24,19,43,26,14,26,17,2,1,27,21,11,19,2,38,26,7,23,20,25,15,29,26,25,49,71,93,78,37,27,32,15,2,21,24,22,5,7,23,22,24,18,28,10,9,36,20,11,23,32,19,13,22,22,3,15,3,13,45,42,41,10,8,41,18,21,49,16,16,47,35,25,28,22,16,32,33,39,24,20,16,20,14,12,41,33,21,50,23,71,115,54,31,30,11,57,58,32,50,41,66,61,72,49,72,72,57,71,56,106,99,71,92,121,108,112,109,95,66,109,142,145,151,113,131,70,90,103,59,61,96,76,59,56,66,45,74,57,50,31,40,74,97,66,69,71,95,90,95,101,70,127,98,7,12,29,4,28,20,23,18,23,19,5,1,1,13,11,17,14,27,15,20,23,21,26,19,252,252,255,239,222,225,245,236,231,167,130,133,169,106,18,16,96,141,66,63,92,75,66,56,78,81,95,117,100,95,57,38,60,81,98,73,65,49,43,56,42,43,81,70,84,75,65,67,101,83,83,62,73,86,82,84,148,216,208,130,83,141,147,120,139,118,157,177,171,172,139,83,111,145,162,150,151,86,47,41,76,132,149,128,129,181,139,176,126,43,69,122,153,90,68,1,22,56,27,46,28,48,76,73,65,28,26,30,44,34,13,43,36,28,31,41,34,56,21,51,110,114,185,175,189,197,179,180,55,17,25,72,119,88,99,79,73,32,41,33,72,85,58,50,57,45,61,63,41,48,58,80,90,114,120,143,122,186,129,63,29,136,187,161,79,26,106,89,110,112,82,80,48,95,59,53,49,46,40,18,3,54,62,99,107,144,148,157,187,164,66,44,84,150,142,130,128,129,133,122,130,67,101,68,49,45,31,29,32,19,21,46,67,105,85,55,122,126,38,75,38,65,142,158,193,187,167,165,159,106,94,96,69,67,38,27,5,12,25,70,74,135,160,169,129,164,174,167,161,129,44,73,139,135,139,94,72,94,54,32,26,45,13,12,20,35,31,37,2,19,29,37,32,18,14,1,43,16,19,27,33,46,2,19,0,23,6,24,33,31,61,31,16,29,37,45,50,23,30,32,20,26,57,26,42,39,47,29,25,34,31,17,28,24,34,55,60,92,99,82,83,62,62,41,66,96,61,86,58,89,88,91,49,67,46,65,64,73,72,67,74,83,63,69,23,26,37,42,8,25,21,29,68,59,76,92,78,88,90,92,91,84,81,78,77,80,84,102,50,38,94,62,62,32,71,58,58,66,66,68,71,76,68,57,46,38,18,11,13,28,26,34,20,25,34,53,76,48,22,31,51,30,65,37,35,7,31,44,25,23,25,39,132,99,31,18,12,14,45,33,33,12,34,31,16,34,28,33,38,13,65,10,31,12,28,43,20,13,21,10,27,26,25,13,23,11,17,38,25,1,52,13,39,5,40,21,34,37,27,32,31,37,43,30,42,30,57,19,29,45,11,22,20,22,24,22,59,30,46,37,102,81,36,36,0,38,28,27,20,25,32,33,35,34,5,25,20,32,31,29,34,40,42,14,8,44,40,28,37,25,11,32,27,7,34,12,21,22,29,35,20,19,49,14,34,42,32,39,31,37,41,31,12,35,40,48,12,20,15,16,27,21,9,48,16,24,13,15,20,7,22,17,22,36,11,9,39,9,25,10,45,16,33,19,15,31,28,40,34,68,124,67,37,26,13,32,11,16,24,25,36,33,46,11,23,36,24,11,24,39,46,35,40,21,26,22,20,32,40,17,18,14,18,10,31,23,33,21,28,20,19,16,10,29,16,24,15,19,24,44,45,31,38,2,7,32,30,4,40,51,58,54,137,161,129,108,68,40,45,33,42,44,52,54,41,76,63,58,68,78,55,79,66,79,75,60,39,70,104,96,58,60,83,142,129,83,83,103,91,106,63,50,71,65,56,63,68,16,47,56,63,41,51,55,45,66,40,34,39,50,43,70,37,54,56,62,31,21,5,19,17,1,15,19,24,12,16,13,1,2,17,27,11,0,4,30,25,16,6,2,250,249,246,255,246,245,223,243,236,168,135,138,151,114,48,61,128,130,98,130,128,105,74,117,127,111,120,115,83,119,55,35,46,57,88,122,41,51,43,73,71,73,98,106,100,110,103,85,136,81,24,48,93,122,74,81,170,226,236,112,81,186,124,80,108,141,138,135,170,197,177,103,73,77,102,112,94,67,51,85,50,44,60,44,60,102,98,122,133,124,67,70,78,78,54,35,52,39,40,46,66,52,72,101,79,38,25,44,47,62,25,47,66,59,61,82,88,61,93,75,38,53,74,103,111,106,141,86,30,25,58,101,90,67,65,70,76,21,55,39,75,110,109,140,146,144,161,154,162,187,170,169,137,161,139,130,107,95,96,48,38,56,77,67,6,38,39,41,65,29,59,47,64,95,86,107,102,134,131,133,132,98,113,134,145,125,113,139,123,91,37,43,47,69,28,44,44,41,19,26,18,13,40,66,93,83,90,121,119,87,99,119,112,130,103,82,112,102,49,50,70,55,42,59,61,40,54,53,40,48,54,68,92,131,138,142,144,159,150,144,178,182,173,178,153,156,121,112,121,51,27,39,65,28,10,24,37,16,18,15,6,37,16,27,33,7,16,19,9,22,2,11,25,19,19,41,18,11,9,51,41,53,29,36,31,37,51,66,67,68,51,37,37,28,40,42,45,52,52,74,83,114,99,97,44,6,43,33,46,54,39,51,63,61,45,58,52,98,104,67,75,68,28,25,65,77,76,74,59,94,35,80,65,53,57,63,62,56,43,63,104,68,43,46,22,1,25,16,27,44,68,84,97,86,122,106,104,80,78,80,68,73,72,85,82,77,76,81,48,73,63,49,62,61,59,46,62,70,54,95,65,82,56,23,26,0,19,19,6,7,18,57,13,57,60,41,27,57,58,31,44,54,62,47,48,26,38,25,41,14,28,29,77,94,81,38,39,29,32,38,4,26,50,31,24,55,32,24,41,36,24,44,19,40,24,28,23,10,16,35,40,28,41,13,44,16,14,17,33,36,24,35,20,15,27,46,28,19,22,25,22,43,19,17,28,27,20,23,35,26,27,20,37,10,37,24,26,55,2,32,70,88,51,63,26,19,29,15,28,23,34,39,23,31,28,42,42,16,41,44,24,9,5,41,23,27,15,29,33,48,19,30,21,36,31,0,37,1,22,40,42,34,42,34,17,43,39,26,29,29,28,44,34,28,30,14,17,48,17,2,27,6,28,15,23,20,27,26,16,30,8,25,13,8,47,28,23,36,34,7,34,22,40,10,17,33,5,36,36,46,100,124,92,49,22,17,12,21,10,22,20,36,8,48,26,29,27,14,23,5,22,38,17,11,50,28,11,22,24,19,16,27,17,35,8,36,11,41,31,24,10,19,19,18,23,22,33,42,13,23,20,15,32,16,32,26,39,30,69,61,64,151,212,208,208,184,154,93,60,24,28,45,19,35,63,48,46,48,71,83,59,40,64,80,59,38,33,86,132,99,93,54,87,91,85,97,94,90,69,82,76,57,59,85,79,72,76,33,76,65,62,49,36,58,56,82,56,41,29,44,41,74,84,44,45,58,41,3,21,6,31,15,29,46,10,23,26,3,6,7,1,24,3,5,17,19,28,9,11,33,235,255,253,237,246,253,244,219,213,139,132,135,134,183,176,119,151,189,120,101,91,47,62,85,133,84,76,60,75,95,68,44,25,42,97,109,46,64,68,81,74,65,99,89,47,90,73,97,99,71,45,22,71,104,53,82,156,202,174,86,55,171,106,63,104,134,73,66,123,191,142,57,57,33,68,80,45,66,103,88,68,77,37,30,26,29,26,104,118,63,67,69,54,64,88,55,55,73,51,67,56,53,92,61,74,82,119,122,104,101,92,125,94,62,81,100,76,88,111,68,24,36,23,32,21,32,24,9,64,213,245,253,215,158,173,184,192,130,101,46,117,191,193,179,177,172,166,134,180,157,107,94,86,37,32,12,14,15,48,35,23,43,87,98,33,65,152,159,152,179,186,173,178,178,187,157,178,189,167,170,150,89,116,73,66,29,32,31,53,62,43,63,34,66,109,146,134,166,139,181,139,143,192,196,198,178,189,165,182,150,121,89,62,43,43,7,63,59,18,78,28,44,40,81,114,116,154,120,120,122,143,178,160,183,208,196,185,136,146,124,117,92,66,51,44,12,45,21,37,14,50,26,37,16,20,4,37,8,19,14,13,9,6,38,29,8,19,38,15,4,40,8,38,28,85,76,24,41,32,70,60,68,87,105,158,139,141,147,99,91,59,61,62,87,108,115,98,117,112,126,145,142,146,148,78,5,23,36,52,54,50,57,39,32,38,43,45,13,38,66,64,64,36,45,72,51,68,55,66,35,71,25,55,62,56,27,76,83,64,61,66,28,21,26,4,2,30,62,72,71,70,101,98,96,88,80,85,87,93,50,71,64,97,77,76,76,51,62,38,52,48,53,58,77,65,54,59,63,49,54,64,8,7,7,7,9,51,23,13,19,37,54,28,25,48,41,57,61,39,54,41,45,60,37,42,66,44,33,49,56,45,37,35,76,109,46,27,18,39,31,57,40,43,23,40,29,34,42,29,41,32,30,16,15,18,10,38,40,47,12,21,22,17,11,29,23,54,10,19,30,21,20,26,38,20,15,22,50,1,32,26,40,38,30,13,21,14,24,33,20,38,30,28,25,36,29,40,15,14,16,57,93,58,22,2,24,21,30,43,28,23,32,37,19,30,49,25,53,31,19,17,26,25,33,13,15,15,21,16,42,41,49,49,32,28,30,19,40,41,53,22,37,47,15,17,34,21,49,6,6,45,12,20,31,33,35,16,33,31,15,20,31,43,25,12,21,15,30,26,19,13,24,14,54,24,37,43,45,26,35,19,28,16,34,25,19,42,62,34,26,51,94,125,87,28,17,36,27,37,24,18,20,29,34,20,14,48,16,38,25,3,44,21,42,37,21,30,20,35,31,20,15,28,35,16,40,29,12,28,19,23,11,5,4,12,24,26,34,13,24,23,32,17,35,24,37,15,25,34,67,138,223,229,175,185,201,213,204,102,44,59,31,43,43,37,28,51,39,39,52,45,74,54,41,73,82,30,42,96,116,142,80,30,38,35,86,42,36,41,32,63,51,53,53,76,65,48,61,82,27,69,74,54,47,66,60,102,75,52,59,51,73,50,70,48,20,19,9,31,32,18,11,4,18,11,15,14,11,8,9,42,3,27,2,9,10,21,16,12,3,228,254,246,236,224,223,221,209,193,118,115,141,162,192,233,161,156,149,78,102,73,33,13,45,55,79,95,79,106,109,94,66,46,45,78,71,56,65,74,54,91,79,65,75,52,69,52,86,97,78,24,8,62,86,73,69,69,84,81,31,36,92,50,56,51,81,45,64,79,103,74,51,54,76,35,45,37,62,69,58,89,110,71,80,72,47,40,95,75,109,127,155,130,88,85,80,40,36,48,74,69,79,71,82,88,87,105,107,55,76,80,82,67,50,36,29,16,33,57,27,25,39,42,111,111,71,98,118,192,222,227,228,206,145,141,187,238,129,62,18,41,98,87,80,83,74,55,22,22,17,23,23,23,12,33,31,20,50,116,52,16,72,132,171,83,58,137,118,152,156,116,127,92,108,94,91,120,68,76,53,71,28,40,43,28,26,32,57,98,104,58,25,44,182,163,174,170,178,172,175,111,93,128,147,112,131,114,86,90,96,51,62,42,42,46,65,92,137,137,121,104,90,72,210,198,209,170,142,146,94,106,109,96,86,42,63,26,17,16,11,14,19,17,22,60,16,40,19,21,22,27,52,39,14,9,15,21,16,50,30,16,25,36,39,41,35,15,53,42,30,33,43,79,84,107,136,96,45,83,28,142,142,109,125,114,94,73,79,62,74,81,105,93,111,97,131,107,87,143,122,112,105,60,47,45,13,13,56,16,36,54,40,22,29,56,32,31,22,39,64,82,63,55,52,54,77,53,64,63,66,80,46,55,56,71,70,64,80,37,55,15,29,35,34,13,43,59,65,83,108,73,87,110,84,89,93,80,84,90,81,77,72,62,69,49,43,67,53,25,43,52,71,30,71,55,58,74,53,35,44,18,33,29,19,14,28,28,21,38,23,33,42,44,58,72,39,37,46,38,38,38,50,49,17,38,27,50,59,45,45,56,44,52,35,107,102,49,29,41,57,65,18,32,34,22,26,45,40,27,14,40,55,47,30,36,45,26,28,18,34,36,8,20,22,36,13,32,37,40,28,16,24,30,26,37,23,38,43,18,13,27,21,21,6,37,45,27,38,28,13,31,17,40,41,49,43,22,28,46,35,11,82,74,45,14,22,57,35,15,35,20,36,32,36,30,23,14,38,31,19,21,13,11,20,34,26,25,26,14,18,14,23,52,23,28,28,46,12,17,27,20,20,27,47,35,28,29,43,18,14,16,5,19,38,25,31,31,13,17,14,34,37,30,8,7,13,31,24,14,3,34,33,33,14,41,4,25,18,30,3,28,46,15,23,24,19,52,59,31,14,22,53,125,87,63,22,7,36,26,19,19,14,12,33,6,18,34,34,31,28,21,37,40,46,17,28,27,13,4,8,52,8,54,6,37,28,25,33,32,27,22,24,38,30,21,18,31,10,47,31,24,37,52,41,52,57,53,91,138,146,162,160,95,152,127,115,147,201,162,137,113,89,67,39,44,25,18,46,51,43,38,54,67,58,40,51,66,44,59,44,68,62,61,53,57,48,46,57,41,33,50,69,44,39,50,37,52,59,42,62,82,54,60,42,60,52,68,82,48,51,70,62,40,42,29,34,22,29,7,21,12,22,26,36,5,31,7,15,2,5,25,33,1,9,5,9,23,32,13,12,247,240,224,235,244,228,216,208,153,125,132,123,127,167,218,156,116,118,57,101,104,38,66,70,84,134,111,152,158,166,131,156,152,121,140,157,112,116,94,140,137,138,113,139,118,130,101,128,118,123,128,35,89,141,126,103,86,114,100,97,71,101,111,128,144,110,111,105,96,110,88,55,60,34,74,64,45,57,55,69,63,72,95,134,110,127,93,150,147,185,246,245,202,120,82,43,39,35,28,44,61,80,58,54,53,80,84,56,53,58,40,34,42,27,20,52,25,51,100,106,153,151,191,222,241,238,220,198,225,197,179,101,61,26,35,76,111,74,53,5,47,40,75,105,80,121,88,113,114,154,146,137,180,167,182,138,121,146,150,86,3,33,102,69,42,49,75,57,24,26,29,48,40,30,38,57,62,85,91,75,85,122,118,134,130,134,120,143,125,130,87,17,31,90,105,86,80,35,27,11,11,36,30,57,87,49,71,59,89,78,149,162,172,148,137,154,148,140,136,136,105,42,38,102,113,83,59,23,55,44,43,65,21,29,23,2,18,26,38,23,42,36,13,9,26,25,32,33,47,31,49,72,41,32,47,29,49,72,62,59,70,96,114,84,47,59,91,115,107,76,50,10,103,169,160,114,91,67,63,77,64,83,63,52,40,59,76,70,88,81,93,101,106,105,110,101,107,112,79,82,52,49,21,18,28,43,70,116,54,32,46,33,34,61,59,50,16,35,49,65,51,82,70,62,68,57,84,100,77,102,61,83,47,82,45,48,36,16,40,20,38,14,47,60,82,84,93,85,94,94,115,85,83,87,67,66,70,83,75,103,72,61,38,59,75,81,48,37,57,45,58,51,48,77,65,60,42,63,23,20,6,16,22,25,34,30,50,63,36,57,61,39,29,53,51,62,19,53,37,38,67,56,46,59,30,69,23,42,45,27,36,58,19,49,31,78,99,53,31,22,37,20,25,2,22,29,12,18,18,25,29,21,39,30,25,32,43,29,22,16,17,28,32,25,10,53,3,33,6,23,51,27,54,35,32,30,25,18,25,48,20,32,21,19,19,20,37,15,30,31,9,18,45,33,33,19,30,37,15,24,24,55,74,35,19,26,30,39,8,20,30,37,20,42,15,40,19,5,12,14,20,27,53,38,17,10,27,28,10,40,28,21,34,29,39,42,24,13,43,42,17,31,18,54,30,29,16,40,33,24,41,51,35,28,40,34,34,40,23,4,33,35,24,4,29,16,16,27,16,5,27,14,16,3,14,4,29,31,41,27,13,46,5,15,24,19,71,70,23,11,42,28,65,83,114,67,28,30,0,25,26,20,17,5,29,25,14,37,23,5,29,44,32,3,38,14,29,17,33,18,14,10,4,5,33,33,31,19,26,30,14,39,14,12,26,35,20,36,28,17,31,60,89,105,126,119,114,156,168,135,186,112,45,85,37,55,94,140,167,168,164,120,111,42,28,55,28,39,52,62,62,38,66,70,68,52,53,52,43,57,57,56,41,62,36,45,64,68,50,39,44,33,34,24,37,47,55,52,27,27,50,34,54,49,54,46,72,43,44,25,57,38,54,51,45,36,16,32,9,25,6,13,42,31,22,12,14,21,6,6,0,10,5,14,17,9,18,3,13,28,249,242,244,242,250,251,233,197,205,129,119,112,145,110,94,76,126,133,73,143,106,100,79,108,128,153,174,175,185,179,176,168,190,174,176,165,184,166,191,184,164,165,182,147,180,185,173,159,132,136,127,95,116,160,169,133,145,129,169,123,154,160,151,169,142,173,165,143,151,106,94,53,86,60,108,78,68,80,56,72,81,42,53,82,89,83,74,83,86,137,157,148,112,81,66,71,78,2,35,17,47,40,60,93,54,36,58,14,34,65,31,57,78,117,97,121,171,202,252,247,236,229,228,218,225,208,176,111,78,52,28,3,6,34,28,29,62,93,98,96,133,156,172,199,208,226,183,222,195,188,236,219,134,174,175,145,119,99,109,51,32,13,47,38,31,34,76,108,112,118,148,140,181,159,179,115,145,208,223,230,139,220,182,187,152,138,112,118,83,81,38,38,59,48,46,74,90,93,111,98,132,162,150,184,163,146,114,152,144,141,156,142,156,90,77,86,46,38,47,33,24,15,13,19,42,21,6,22,18,35,41,39,30,10,39,19,21,14,12,8,33,19,0,29,20,20,37,55,52,73,117,136,150,180,165,150,178,185,217,167,154,194,198,183,92,84,169,209,185,158,94,75,204,203,134,117,81,42,39,52,34,42,34,91,69,73,98,115,84,118,83,120,104,119,105,105,72,70,67,15,41,20,65,48,97,132,155,175,192,117,50,25,42,63,30,38,41,57,49,82,78,53,76,52,61,84,75,85,69,81,70,53,84,82,42,68,25,25,17,3,24,30,44,43,66,81,96,104,68,86,98,93,90,83,78,64,96,98,75,48,60,56,52,54,47,49,77,78,48,59,84,47,77,57,50,36,29,20,23,13,17,38,33,43,48,60,53,74,64,52,36,18,56,45,28,47,53,56,39,48,58,28,52,77,42,41,30,26,50,63,53,17,18,42,35,32,106,106,53,19,15,11,39,36,8,27,30,47,11,33,26,25,25,40,26,14,10,42,47,28,8,58,24,8,28,39,34,35,9,7,11,10,22,37,0,17,32,22,33,18,15,15,45,20,10,18,32,24,38,22,53,30,10,30,31,21,47,22,35,30,37,44,140,36,22,14,13,42,31,16,40,12,27,37,28,22,54,21,17,20,29,7,15,47,27,33,22,17,17,21,44,36,15,20,24,31,21,29,42,6,55,15,30,5,30,24,24,7,35,61,21,43,28,31,32,10,32,24,26,20,24,15,2,1,14,36,29,20,5,28,9,6,22,24,23,50,13,20,22,38,14,19,29,33,13,39,65,65,45,35,46,17,25,48,95,99,93,20,18,28,30,12,23,32,31,6,15,17,15,16,18,23,50,5,30,20,5,12,36,45,25,17,24,12,17,18,48,29,28,33,20,31,8,20,21,29,27,21,28,37,46,102,130,115,97,102,120,139,129,107,119,99,106,92,77,56,81,95,146,133,103,126,84,51,41,21,29,32,41,45,71,51,52,34,33,79,45,64,55,56,65,50,44,36,38,47,60,73,60,41,59,50,44,32,46,59,47,68,51,33,53,67,29,47,44,40,41,25,84,41,42,70,26,56,57,48,8,6,31,19,33,31,22,10,53,6,4,19,2,35,22,11,5,18,6,21,5,22,14,45,255,224,242,254,251,245,209,230,189,123,103,107,94,86,55,77,143,180,133,162,175,131,90,130,160,163,169,174,182,183,181,176,180,183,189,173,199,188,193,210,208,174,182,210,203,154,184,162,124,174,119,41,86,123,204,179,126,178,171,157,171,188,153,154,139,153,148,166,171,133,122,141,100,86,110,73,92,46,42,58,54,57,32,6,27,38,41,56,18,14,41,50,91,84,83,115,44,32,2,30,53,39,94,97,77,89,119,81,128,111,134,186,178,211,213,230,242,214,233,201,176,136,111,98,68,52,65,23,18,33,11,32,86,112,71,59,67,91,158,155,184,200,193,171,167,116,126,127,123,134,156,106,105,117,122,87,131,140,127,104,59,19,79,127,77,116,177,210,190,192,200,190,158,182,171,148,149,121,114,122,125,88,101,102,94,71,102,110,110,64,81,25,36,117,148,193,203,185,190,151,177,150,147,140,110,120,112,73,60,34,34,53,31,31,21,15,17,33,18,26,27,46,42,43,18,13,43,16,31,22,26,39,20,51,38,38,57,66,57,49,69,60,61,72,65,72,141,163,168,191,209,191,207,220,184,199,214,214,234,164,162,217,212,175,108,126,165,188,202,170,94,75,184,146,70,73,52,44,41,37,115,118,110,125,137,133,153,123,139,79,116,122,106,74,75,64,59,10,38,20,35,86,105,131,148,129,132,172,173,120,33,42,87,115,69,50,13,64,71,77,40,72,84,55,72,88,83,107,102,119,92,61,59,55,14,0,8,29,36,22,38,79,65,87,62,92,77,83,62,76,86,55,43,44,92,73,55,66,51,68,50,69,83,35,86,79,60,71,47,88,94,67,39,33,42,26,14,46,21,10,27,7,23,53,25,41,36,39,45,23,51,59,68,70,42,49,22,53,65,55,58,62,52,39,46,41,36,40,41,35,48,25,66,18,13,32,53,82,112,43,12,30,16,4,43,27,45,43,22,33,13,19,44,31,13,45,43,42,24,35,21,40,1,23,49,25,34,19,39,38,29,34,22,14,49,41,34,54,38,21,24,22,31,26,38,28,23,5,39,44,34,44,37,18,32,30,43,33,31,30,24,65,95,79,9,18,5,27,24,30,20,26,36,18,19,6,23,36,5,42,18,29,26,13,8,20,11,23,76,45,29,11,39,20,49,26,17,49,6,44,30,20,36,22,32,35,29,21,28,41,29,11,10,61,57,20,33,38,6,33,32,21,27,29,19,2,14,51,29,22,29,37,26,36,21,10,27,29,6,20,31,8,13,21,22,25,63,90,22,42,2,25,8,46,88,104,87,78,44,22,1,14,19,23,34,16,15,26,21,36,32,33,43,50,55,7,18,2,15,7,19,20,24,51,7,28,76,11,46,8,65,35,17,21,14,22,30,28,36,24,27,88,92,107,56,83,90,99,86,50,74,104,120,95,77,81,87,83,91,57,77,94,88,98,60,70,81,70,123,97,110,56,110,113,105,82,88,59,82,105,98,58,81,71,83,74,104,88,61,62,73,74,52,21,59,79,94,90,96,105,96,73,64,65,84,89,84,119,89,83,86,62,47,74,66,65,48,16,14,2,47,5,18,30,8,6,8,18,14,24,7,4,6,10,14,30,11,20,15,43,241,252,250,244,244,244,221,212,170,108,126,132,145,104,86,102,138,128,110,114,132,70,73,71,115,127,137,120,123,97,135,113,127,115,145,153,139,140,134,131,125,101,148,122,157,132,160,138,43,72,28,0,3,74,96,101,65,88,106,123,125,114,148,116,92,121,120,131,102,112,105,118,97,87,81,89,61,59,28,37,47,41,54,51,46,82,104,105,75,80,94,60,136,102,110,144,92,19,39,54,132,129,170,161,160,174,221,195,189,207,205,210,220,213,141,212,129,109,65,41,42,32,37,34,7,8,49,65,55,41,36,59,162,187,95,41,19,54,85,155,147,126,128,81,25,32,52,24,43,77,66,75,118,122,127,121,181,159,199,171,67,63,167,171,123,140,190,169,148,107,119,85,63,56,22,2,0,7,13,28,125,72,90,135,145,175,200,217,192,163,103,55,70,114,111,140,84,97,49,67,47,27,25,25,13,14,57,10,31,23,12,14,28,34,13,3,48,1,23,31,11,43,45,46,32,19,14,26,25,57,28,38,35,56,112,152,162,198,245,228,227,248,205,217,123,150,231,232,240,222,209,179,207,220,197,192,215,198,206,133,121,188,164,187,176,183,186,205,153,136,71,23,90,117,98,91,97,90,95,108,111,128,134,143,120,135,134,125,111,95,100,53,25,35,36,8,52,49,62,78,130,130,140,119,126,134,127,127,83,32,5,9,94,94,71,72,53,54,78,52,81,78,64,89,87,84,58,90,122,126,58,36,34,32,26,24,26,30,33,61,59,69,69,43,76,78,54,54,79,42,74,78,79,72,45,45,58,52,47,48,67,43,50,69,60,54,46,45,56,55,59,23,15,13,32,21,2,47,5,30,47,36,51,33,58,24,43,32,51,66,51,34,52,58,58,49,57,43,18,59,32,76,54,50,22,46,27,47,50,48,57,54,31,27,45,43,38,45,91,95,31,43,46,11,5,43,34,33,28,26,21,33,27,39,10,30,25,26,10,37,33,46,10,26,27,12,18,56,20,30,37,27,19,28,12,44,45,40,39,23,44,21,32,38,14,29,12,31,55,16,31,6,31,42,19,26,23,18,38,36,62,32,75,76,35,49,39,16,31,18,54,44,58,28,30,8,19,17,18,17,35,20,23,41,18,18,30,29,46,35,17,35,7,21,33,32,5,38,28,50,40,20,40,38,18,19,35,39,26,38,38,29,28,8,28,7,26,36,0,34,25,39,31,27,36,36,24,27,34,26,15,25,27,45,17,20,26,39,8,53,35,16,21,45,25,47,97,82,13,17,4,20,17,28,25,60,112,122,72,41,0,13,5,14,29,12,10,27,44,15,17,8,30,11,27,13,20,40,12,15,22,30,58,35,44,42,24,10,37,22,16,42,19,24,24,8,13,34,21,27,38,55,139,132,110,104,123,141,73,91,105,102,118,142,101,99,79,101,90,73,84,83,83,109,127,122,130,138,132,141,151,148,150,149,129,169,134,103,117,129,115,104,100,95,117,108,120,105,95,56,94,66,81,103,84,112,127,139,113,169,134,133,146,137,140,161,144,159,153,155,163,120,110,125,137,100,12,3,10,2,19,19,19,20,5,32,12,9,7,17,27,7,13,32,20,3,9,20,22,1,250,249,247,230,238,238,223,221,170,120,144,118,162,146,162,206,132,50,15,4,22,51,17,20,31,20,25,37,58,36,23,23,55,51,21,18,58,37,52,55,41,40,28,57,26,76,123,106,48,41,41,42,45,33,23,23,37,35,21,31,55,33,44,42,54,52,32,44,67,47,51,44,55,53,68,44,25,24,13,34,118,158,175,168,160,195,195,191,175,160,198,171,169,155,151,157,174,168,207,233,220,239,230,220,209,236,210,188,149,120,106,114,82,66,103,67,36,48,25,7,10,43,74,57,29,25,54,62,53,48,42,110,229,251,212,120,25,10,28,51,32,25,40,36,27,34,42,46,60,40,14,64,74,90,75,80,69,63,80,74,18,31,85,83,49,53,49,52,48,31,30,21,38,46,85,82,119,129,107,152,134,151,159,185,139,184,153,126,119,64,66,60,29,34,12,28,8,30,40,11,22,24,28,55,18,43,28,29,29,15,45,10,37,32,11,24,13,62,28,23,53,43,28,50,9,7,17,32,36,65,69,67,109,132,150,174,165,199,195,196,191,175,203,146,106,123,165,156,197,143,159,158,169,149,121,164,168,153,154,101,92,136,140,154,160,156,160,139,96,62,54,38,108,122,150,65,65,68,67,75,115,87,84,98,70,61,99,82,62,40,46,30,30,54,54,48,68,86,115,133,149,141,139,132,144,134,132,79,19,22,62,92,63,64,74,89,81,77,50,54,68,44,40,69,106,108,86,121,69,60,36,30,18,39,28,36,67,80,79,66,67,62,57,71,57,82,67,83,58,62,81,66,72,74,50,43,74,35,56,37,51,42,56,94,79,63,67,25,66,20,11,43,15,11,15,9,47,31,65,34,58,32,41,53,64,15,57,50,73,57,46,51,41,40,61,43,30,47,44,32,46,59,55,65,65,36,30,45,44,27,82,42,55,42,50,37,57,38,113,110,66,31,21,35,36,38,40,22,18,14,25,34,14,23,23,35,37,37,22,20,29,13,22,24,26,47,18,36,26,13,22,12,47,25,20,50,17,12,23,11,37,16,57,43,10,8,12,18,16,37,52,11,3,39,51,31,18,51,47,23,21,31,56,82,88,34,2,33,34,25,26,44,39,51,7,12,42,51,27,6,21,37,33,32,19,48,32,6,29,23,30,17,16,33,20,15,26,61,38,33,48,20,24,41,17,22,23,25,23,28,48,23,31,22,23,23,31,14,36,12,12,23,38,29,24,32,13,31,25,20,24,36,13,6,30,11,51,2,22,18,28,41,11,10,44,32,101,24,27,23,12,20,40,15,21,14,40,112,105,90,40,32,14,15,42,25,22,11,33,23,5,9,36,15,20,0,13,49,24,27,42,38,17,14,41,31,17,18,14,28,50,15,20,16,40,34,30,14,18,24,48,59,85,126,110,142,151,162,170,142,162,130,146,120,141,131,129,139,110,124,148,118,154,155,128,115,137,150,177,150,136,120,146,167,183,152,141,124,111,98,109,116,109,124,98,134,112,120,113,114,104,138,134,122,109,140,156,174,176,162,192,164,150,177,191,160,166,208,193,190,165,150,148,172,191,93,28,3,22,12,14,3,0,5,9,3,23,30,5,0,6,18,23,22,12,8,20,30,7,3,253,248,248,225,230,249,226,215,159,98,168,122,147,112,156,167,91,27,15,41,34,61,32,23,29,13,21,32,43,20,22,22,27,44,21,29,28,17,19,27,42,35,19,32,80,80,77,45,35,59,33,45,34,29,41,46,20,16,8,25,25,41,51,20,32,68,22,33,45,44,24,45,52,69,88,58,69,70,105,109,175,253,223,242,224,224,203,195,177,187,210,207,203,185,191,229,219,218,228,201,211,175,180,168,92,111,78,92,122,117,85,41,12,25,32,20,7,88,64,45,74,52,55,44,46,43,23,45,44,24,63,148,242,252,233,197,40,4,32,2,47,17,34,19,19,36,35,50,47,42,32,28,51,34,39,19,32,51,47,53,47,48,27,57,54,101,63,88,89,102,128,101,144,123,144,135,68,141,155,129,95,72,43,26,18,45,45,25,25,13,27,44,53,26,28,59,32,38,40,23,16,39,30,17,23,33,25,27,48,21,23,26,18,40,19,27,9,50,34,29,19,18,31,38,27,16,21,28,39,25,28,38,23,19,40,27,26,30,52,24,20,43,27,50,27,57,10,41,46,43,17,55,55,33,36,64,32,50,34,41,54,63,52,84,46,30,19,41,23,38,47,57,21,47,44,46,41,49,35,47,24,56,26,43,17,71,24,49,33,60,66,35,61,77,103,131,133,157,140,110,108,124,111,141,136,138,119,108,81,79,131,92,67,78,59,69,61,82,59,69,73,71,89,93,86,112,97,23,64,36,27,14,24,35,76,101,73,86,83,70,96,49,64,44,39,51,81,79,56,49,52,72,59,65,59,47,50,69,43,63,82,66,52,77,55,88,55,32,17,22,7,24,36,19,30,31,36,31,33,42,61,65,33,52,58,33,47,51,38,40,47,51,27,65,35,71,43,27,46,36,41,29,27,68,43,77,51,31,58,45,33,17,63,36,46,32,36,10,23,72,86,89,60,5,19,26,31,21,37,32,18,32,26,15,26,10,10,47,53,19,46,26,14,24,24,42,11,34,32,39,23,36,5,32,6,33,6,4,25,13,52,21,32,19,37,38,44,43,40,8,30,44,10,21,37,28,62,35,34,17,41,55,46,102,93,25,5,43,37,21,29,31,28,2,32,43,38,17,21,28,20,17,26,46,46,43,23,20,29,53,30,35,10,18,35,26,37,22,46,43,26,35,17,55,20,33,35,35,57,25,28,36,31,35,24,49,28,28,24,36,16,11,27,9,21,44,29,19,29,20,7,48,20,13,30,25,14,30,28,10,28,41,34,33,22,56,70,20,31,16,29,34,13,27,20,9,39,52,104,101,59,54,18,10,10,6,27,33,53,27,27,26,3,13,25,24,18,25,15,18,27,16,22,27,25,31,35,7,18,41,30,28,8,16,37,18,9,25,45,34,27,85,82,70,62,94,162,179,178,200,197,189,150,198,170,155,175,150,161,158,178,198,172,162,194,157,167,172,154,166,153,142,171,160,140,132,132,134,107,124,124,119,106,129,129,127,128,156,142,123,130,136,138,130,153,128,152,167,158,172,158,174,180,158,170,191,187,207,156,177,201,166,169,169,190,125,7,10,0,10,20,3,12,2,20,0,16,10,20,12,8,8,5,10,4,0,37,8,11,34,250,249,248,249,241,235,225,213,146,115,175,140,151,76,56,56,24,21,22,22,48,39,46,31,33,40,56,45,23,14,33,12,31,18,25,38,32,43,23,48,41,9,41,39,69,85,40,45,23,27,54,43,46,53,41,66,59,60,38,39,46,57,69,57,49,46,32,58,103,110,86,102,165,131,147,157,162,175,222,218,229,237,230,217,177,156,150,131,147,160,167,142,138,153,156,145,129,129,106,77,64,47,46,25,25,12,40,74,144,124,77,79,35,30,24,41,34,56,41,46,30,30,48,54,36,40,32,66,68,25,83,153,243,233,236,225,92,16,2,24,40,31,44,46,67,61,48,64,56,49,65,60,63,53,64,75,110,85,116,136,92,64,99,123,123,142,140,151,166,147,142,106,89,63,74,19,58,19,31,26,27,20,11,27,29,23,29,5,40,28,42,5,48,43,52,24,46,55,36,30,29,29,23,53,59,50,38,29,31,20,42,31,27,21,51,13,18,23,53,29,23,18,21,26,18,29,23,30,18,17,29,31,45,4,14,24,36,56,31,28,23,24,19,24,50,60,22,28,14,3,19,12,21,27,24,37,24,32,32,28,55,45,58,34,29,27,7,31,43,41,62,46,29,33,14,27,29,47,40,31,29,45,32,54,56,68,68,68,71,89,114,106,106,132,113,146,111,122,137,126,109,98,117,114,119,131,136,146,121,92,78,52,58,95,84,91,82,62,85,70,112,112,93,107,62,46,31,9,17,10,12,27,60,75,99,86,108,118,102,96,93,78,63,85,50,55,80,67,63,83,73,51,56,58,47,70,61,58,48,44,69,70,58,55,37,25,37,22,30,42,27,19,54,15,51,33,43,50,61,51,65,52,69,54,57,54,65,50,47,50,29,36,32,58,40,28,31,55,49,53,42,37,35,66,28,33,30,54,36,33,48,66,63,14,31,47,67,58,45,49,83,140,75,25,23,5,21,25,8,27,19,4,18,27,27,13,15,41,67,15,36,29,11,34,27,26,43,31,26,25,30,45,47,24,2,29,39,19,42,34,29,31,26,25,52,39,27,24,20,9,9,19,45,14,14,15,25,16,23,26,27,44,32,73,63,54,34,29,19,20,63,18,21,44,35,37,33,6,14,17,41,24,14,29,23,10,2,43,26,36,48,25,19,26,9,36,14,38,21,15,31,13,33,31,55,19,36,29,47,41,49,28,52,14,56,15,33,32,8,30,24,13,22,33,19,26,36,45,18,11,14,12,40,38,30,9,6,35,43,17,12,37,15,7,11,71,61,7,36,19,10,16,38,46,25,7,32,27,69,82,93,74,39,17,34,40,30,23,18,17,23,22,8,21,13,32,39,47,17,29,33,41,16,13,42,11,35,31,13,14,37,22,37,12,42,14,29,26,13,44,71,74,92,124,96,69,128,202,178,209,191,196,203,181,177,197,170,173,149,176,198,197,199,195,180,179,173,180,179,170,168,176,164,175,160,180,169,169,171,146,146,136,168,157,152,155,163,170,168,152,176,170,163,160,177,141,152,170,166,184,170,202,182,183,169,161,164,167,189,170,183,154,186,185,188,105,12,1,13,3,21,6,7,13,5,2,15,15,2,17,27,13,44,32,22,7,18,10,11,40,250,255,250,229,224,238,224,214,141,147,154,194,205,84,21,24,33,32,44,31,54,32,50,37,21,41,23,28,50,30,24,41,26,25,24,44,46,64,51,49,36,50,27,57,60,72,85,51,51,41,38,37,38,48,34,82,85,112,105,166,135,142,162,147,196,182,209,196,219,191,226,181,205,227,204,221,222,229,203,198,163,169,168,164,145,148,171,138,99,137,118,120,116,84,13,85,11,30,8,33,26,24,32,41,34,19,61,63,75,52,60,47,42,86,141,129,91,65,30,18,35,39,63,61,56,30,55,80,28,62,132,152,178,162,182,159,99,64,31,35,52,52,55,47,50,112,87,120,97,120,164,170,163,144,143,124,146,149,139,119,65,32,48,71,89,50,37,59,39,32,34,44,15,11,38,17,27,22,15,53,3,33,17,24,38,20,32,37,45,28,41,29,7,33,10,30,7,50,45,30,15,23,4,23,14,28,12,17,16,19,17,36,36,19,14,36,31,19,59,36,24,28,37,40,18,19,14,22,40,6,24,29,31,24,6,27,0,30,7,37,27,24,35,27,45,51,14,14,0,17,5,56,38,26,30,52,32,47,46,37,24,13,37,47,51,12,8,13,31,7,58,35,15,31,44,36,40,56,33,58,60,43,97,98,108,115,77,115,137,126,82,110,123,148,118,95,98,102,120,127,102,121,100,133,123,119,123,99,59,67,45,48,72,55,62,71,70,107,89,116,103,97,57,50,29,32,17,31,19,58,94,104,97,75,109,131,82,97,86,78,79,70,92,89,79,81,77,71,64,70,89,61,46,40,67,58,31,94,35,69,83,44,66,26,18,18,12,30,11,23,30,22,46,60,48,34,63,69,56,41,42,75,63,41,59,70,49,38,30,35,43,45,31,29,51,63,35,40,29,49,55,42,41,22,57,42,58,43,37,30,62,39,35,42,44,35,39,31,54,37,36,72,99,73,28,18,28,25,26,30,40,9,38,26,19,32,21,15,37,54,37,31,34,21,23,40,36,32,33,15,58,8,19,26,23,26,18,62,36,20,39,16,47,44,34,18,39,31,36,26,34,10,57,37,23,47,20,40,31,22,28,36,18,61,98,42,32,21,25,36,27,31,18,38,26,45,24,12,36,22,40,27,25,21,30,13,26,36,40,12,20,32,23,42,35,38,44,34,14,45,44,36,35,23,28,23,23,41,9,31,41,27,26,28,35,46,12,9,31,21,54,53,5,39,33,20,25,11,20,31,15,15,36,27,33,21,12,7,37,15,23,23,23,15,48,83,35,11,43,28,18,19,25,17,18,38,28,20,33,51,97,84,76,27,44,31,6,22,10,15,29,33,31,6,31,11,24,38,8,25,15,47,30,46,17,26,26,24,29,22,18,34,18,25,23,21,24,28,36,95,83,105,135,186,154,87,64,108,163,176,207,186,195,190,179,167,205,210,205,183,187,172,183,168,182,184,198,183,186,172,202,169,178,209,187,182,176,157,175,199,194,188,185,185,197,204,146,170,169,194,188,190,186,186,181,190,176,195,165,170,178,179,173,175,180,210,181,187,163,160,166,145,166,180,166,114,17,7,6,31,13,13,25,2,18,0,22,4,8,7,16,9,24,22,7,41,18,35,16,37,244,254,248,244,237,229,215,191,98,130,156,212,210,61,24,33,29,55,29,46,31,32,52,51,10,46,33,48,29,59,53,42,34,25,44,33,59,40,75,58,70,56,66,39,51,128,109,85,82,100,140,131,156,156,180,215,235,206,200,216,234,244,235,203,231,234,229,221,229,221,200,170,167,154,128,135,103,109,71,38,79,33,41,74,144,169,196,169,106,95,93,124,80,45,0,25,31,34,58,38,90,39,45,37,36,62,60,59,57,39,37,71,84,188,255,246,204,129,26,34,63,99,34,37,39,34,53,58,79,168,191,141,86,61,71,91,57,92,90,120,162,112,145,135,140,171,163,130,153,138,112,111,126,100,72,45,26,53,22,48,18,34,48,6,69,19,19,50,29,34,44,40,47,43,41,34,49,19,39,35,25,40,62,51,46,38,45,84,36,51,50,45,31,43,51,50,43,57,48,36,45,66,52,38,42,19,46,61,53,52,61,68,50,51,50,53,22,37,58,42,32,49,43,58,47,33,31,47,37,27,62,44,41,34,66,45,54,32,55,42,58,52,38,66,61,55,8,35,19,56,35,39,40,70,50,38,36,42,33,42,47,25,40,16,14,33,20,45,16,53,75,36,36,25,50,47,77,50,33,53,42,68,98,96,109,89,86,99,83,106,102,106,111,111,108,108,108,90,73,113,97,98,136,115,110,103,63,69,46,50,37,52,76,84,57,58,78,91,70,55,58,26,34,24,26,27,7,73,74,88,118,103,111,115,112,84,91,67,75,101,94,62,77,95,71,68,78,63,53,68,69,83,79,67,58,66,73,49,63,54,57,35,21,15,26,24,26,22,51,47,41,34,51,37,56,62,60,41,57,55,45,44,73,39,62,72,43,36,56,53,59,38,35,35,76,25,51,41,19,37,18,48,39,50,42,23,23,47,32,70,34,45,33,60,25,48,58,24,39,44,31,30,87,100,66,33,8,34,39,53,68,31,19,5,39,21,29,35,7,0,26,21,43,44,28,29,38,42,31,54,46,16,35,21,19,46,18,26,8,46,38,30,21,35,31,47,20,5,28,60,49,22,26,20,8,53,18,41,51,52,7,35,11,42,91,63,57,7,20,27,12,33,37,38,25,54,11,35,29,34,38,42,26,34,16,27,38,27,36,30,20,10,37,47,42,16,37,9,31,44,23,18,55,30,23,33,14,46,20,42,32,30,44,20,63,21,33,29,17,14,41,25,18,31,32,33,49,24,13,19,6,17,46,38,14,8,32,2,40,33,35,4,34,12,57,102,37,26,52,17,35,31,22,34,16,1,34,22,15,12,88,98,116,34,9,15,26,25,23,17,30,49,9,53,25,48,30,20,7,0,36,10,6,5,34,32,20,29,31,20,11,56,27,19,22,31,25,27,142,159,98,72,59,123,138,62,20,44,82,146,178,197,200,197,155,148,220,249,229,203,177,179,179,190,168,174,169,178,167,170,170,173,173,194,193,177,188,211,207,188,204,195,200,189,202,200,197,175,186,190,199,181,167,194,213,171,184,179,174,187,188,175,183,173,211,204,188,177,207,197,212,171,175,189,184,96,10,3,14,13,9,0,7,4,1,11,9,21,27,15,28,19,23,23,13,15,9,13,12,10,252,251,229,244,230,221,210,166,41,49,133,197,196,63,6,11,25,44,36,54,30,56,31,50,48,30,45,51,61,52,60,75,61,106,69,93,83,98,113,123,147,166,165,203,175,205,222,227,221,233,215,235,233,244,225,235,225,235,219,221,212,211,209,177,162,175,127,98,119,107,117,108,100,80,52,71,59,46,20,22,9,7,8,62,101,147,137,124,109,77,98,92,97,77,8,11,54,34,16,41,58,45,37,42,22,73,101,115,134,91,86,86,148,250,252,246,241,148,30,30,94,100,93,58,33,46,70,79,112,157,169,88,55,49,42,41,47,130,135,84,145,93,130,82,89,66,49,46,32,38,69,23,42,16,34,5,13,38,29,38,4,48,37,46,15,27,22,17,19,31,39,63,19,22,48,47,62,31,30,57,54,92,86,137,146,175,198,181,224,184,191,192,171,176,193,192,193,191,174,163,200,199,194,188,206,194,184,159,190,188,202,217,201,208,212,200,193,138,180,210,209,190,204,120,78,73,88,128,176,175,190,184,176,185,199,190,158,177,178,214,212,213,195,187,176,86,36,14,44,88,139,160,134,160,156,157,141,149,152,159,153,155,163,146,130,97,108,111,121,133,132,100,81,92,89,126,131,63,51,37,32,53,92,101,93,78,61,72,76,121,58,73,110,95,81,72,101,96,117,114,107,56,76,47,55,49,35,37,31,43,45,49,74,63,66,45,65,56,55,52,38,11,48,21,33,32,60,93,92,84,77,108,89,120,91,102,82,97,87,66,91,87,91,78,61,63,67,90,103,57,86,78,61,74,63,66,62,52,49,39,22,11,44,5,14,18,37,73,59,31,32,42,61,55,48,59,44,64,12,58,67,70,25,66,31,39,45,50,37,47,58,17,40,30,41,18,39,38,54,34,63,50,45,49,29,37,55,47,29,53,60,19,46,27,17,48,38,82,22,7,38,26,12,86,73,74,9,13,45,45,61,13,16,23,7,22,19,12,36,42,19,7,48,23,48,25,29,39,27,18,19,61,10,15,22,34,9,29,29,32,1,18,36,23,83,37,11,31,23,52,43,28,20,38,41,28,31,28,25,24,14,41,23,15,97,82,45,30,15,30,20,47,22,25,22,10,23,7,32,36,30,19,27,9,25,6,22,19,37,40,21,13,23,48,19,33,10,13,24,34,42,8,19,40,34,55,37,39,53,40,52,63,35,34,42,21,31,40,22,11,20,28,50,26,24,50,5,24,33,37,28,10,6,18,23,0,21,38,31,22,16,28,23,31,82,82,36,24,14,19,20,30,12,4,26,15,14,10,22,29,30,59,90,94,56,47,19,28,8,31,26,14,30,16,28,37,39,5,14,31,41,31,13,13,53,41,26,12,34,33,4,19,10,12,8,36,24,69,169,172,112,106,47,38,38,8,42,4,37,117,124,148,156,208,192,209,244,255,204,186,208,171,207,186,181,173,182,179,162,186,186,157,162,188,183,174,159,179,191,193,190,188,167,146,172,165,175,186,155,190,174,171,195,181,200,177,189,203,191,183,174,196,165,180,180,176,193,189,184,183,193,157,196,192,168,108,32,5,19,32,7,20,21,10,2,19,15,0,21,8,13,8,19,29,20,0,8,9,9,19,251,255,243,221,223,236,220,186,43,81,138,210,212,38,20,17,21,39,57,73,52,35,49,45,45,60,61,91,111,129,132,149,160,162,182,164,179,222,218,226,217,231,219,217,233,233,218,226,207,211,211,184,201,182,139,150,97,89,71,73,50,63,43,39,26,33,12,32,64,66,118,159,163,87,63,84,52,80,50,70,44,11,31,28,87,155,131,116,82,116,112,144,106,44,22,47,73,68,24,45,59,67,60,43,31,81,110,103,110,82,85,135,191,238,238,238,231,160,43,14,53,148,139,123,48,64,149,145,121,63,48,81,68,66,43,23,37,54,30,22,25,28,41,37,37,14,14,15,21,7,16,12,4,52,35,30,38,33,46,28,35,34,46,52,17,34,68,45,41,38,71,44,56,101,108,117,167,178,197,187,203,222,209,223,245,240,239,249,242,218,221,230,202,237,255,237,215,214,217,236,247,250,229,247,239,231,237,225,223,243,250,244,246,230,237,231,255,249,242,221,207,204,167,78,119,139,219,227,238,247,209,238,240,237,242,242,249,248,238,227,228,199,173,122,78,94,131,196,206,219,233,240,225,239,226,245,233,242,231,216,237,202,212,229,227,178,173,177,194,223,233,194,143,141,140,99,119,145,60,75,55,77,101,74,79,72,73,50,65,61,67,56,76,98,88,81,86,100,117,87,50,49,53,39,64,19,36,39,13,40,59,52,79,43,67,57,58,49,19,26,16,18,27,54,74,91,90,107,98,107,75,84,102,74,95,72,98,72,75,88,70,67,60,45,45,68,94,80,76,69,46,66,61,72,77,49,102,46,32,31,38,8,9,44,39,40,47,54,67,55,57,47,57,54,55,42,54,61,50,56,64,48,71,55,45,42,48,53,30,36,52,59,32,58,41,28,17,49,50,46,55,40,55,40,45,43,34,58,44,38,58,50,52,48,29,37,52,33,34,54,75,13,46,15,82,122,63,53,25,25,63,22,13,9,11,4,13,2,13,20,19,9,26,15,0,22,37,13,25,23,8,17,22,30,45,9,45,26,6,45,18,23,12,26,59,26,27,18,48,30,20,16,32,35,12,15,43,47,36,42,18,46,20,12,69,95,24,25,34,29,17,19,33,26,20,33,27,26,39,44,27,22,25,41,22,14,19,47,34,5,15,50,19,32,32,38,32,42,54,45,32,37,18,37,55,36,33,30,30,23,35,40,12,46,32,25,53,11,26,33,39,14,32,28,18,38,39,16,26,49,31,16,31,18,2,22,23,28,15,23,9,22,9,26,56,83,15,37,28,19,40,9,31,25,31,64,24,20,17,8,31,15,77,102,68,78,23,20,25,39,31,33,9,17,19,30,34,24,33,23,28,13,14,10,49,7,23,22,20,22,23,32,21,11,28,28,15,91,184,146,157,176,144,120,79,63,41,43,82,147,125,119,161,208,156,177,207,180,113,145,146,165,164,137,165,151,167,160,182,181,152,146,138,169,160,139,173,169,164,167,148,163,131,142,149,171,143,153,189,160,180,183,179,188,171,185,174,189,162,171,175,182,172,181,180,178,166,194,160,198,162,153,194,183,167,113,13,19,2,2,33,13,3,42,12,9,4,6,21,3,21,19,17,16,15,14,14,7,34,14,235,255,237,248,233,248,223,169,77,86,136,197,247,76,82,77,82,130,121,161,151,147,184,171,175,212,217,202,230,244,240,209,215,162,226,216,210,194,203,180,165,139,141,144,158,93,38,108,110,103,66,38,26,11,23,6,23,31,9,60,18,33,43,22,34,50,33,35,25,58,104,137,102,57,62,83,63,95,108,133,158,127,113,63,106,157,145,122,97,122,107,124,114,32,0,61,133,157,146,137,173,151,96,36,9,23,48,45,25,43,117,135,153,181,219,196,193,126,15,5,70,144,119,108,35,70,121,113,53,37,25,44,80,18,38,20,4,41,4,38,40,33,16,19,25,19,26,40,20,44,11,55,47,56,30,49,74,64,50,44,67,68,64,53,48,90,108,134,162,171,181,208,221,212,225,221,231,235,233,228,198,209,204,175,150,158,163,155,186,193,179,192,184,185,180,164,165,158,185,205,195,169,165,183,198,201,205,197,203,171,190,187,201,213,215,214,202,203,197,75,68,98,120,157,179,224,212,193,195,216,209,215,237,250,227,199,181,157,114,74,58,26,90,110,174,168,208,226,216,246,213,211,193,211,180,193,205,197,171,190,196,217,182,205,175,166,120,119,147,181,169,185,190,142,123,84,93,106,82,78,97,121,119,78,81,74,59,74,72,71,81,91,104,78,98,91,115,89,81,84,54,26,45,53,37,69,54,67,68,113,52,42,47,84,47,48,46,17,30,10,31,70,68,74,110,102,109,133,108,74,101,76,77,60,69,88,84,63,91,59,101,49,61,59,86,81,84,94,55,61,86,43,76,69,53,39,48,21,20,24,57,10,31,53,57,29,61,51,42,60,50,47,58,37,45,38,66,50,55,40,56,53,58,61,51,65,52,75,47,56,36,43,64,29,37,40,35,48,32,32,40,21,44,36,55,44,31,37,56,26,28,38,48,43,44,33,46,58,40,49,13,33,32,10,48,85,120,91,36,9,16,29,9,15,27,26,9,32,27,22,25,31,23,35,27,33,23,40,7,35,31,37,27,36,26,28,4,9,30,16,35,41,35,13,27,28,14,30,14,16,29,13,40,35,30,31,32,23,10,24,32,42,31,29,75,72,55,32,25,14,26,33,17,3,20,32,52,49,30,23,25,28,24,14,34,24,30,9,16,28,27,23,38,25,13,7,47,8,58,31,30,90,17,22,55,19,52,47,41,37,38,45,45,26,23,38,30,41,44,43,36,31,13,10,8,11,11,31,48,18,19,29,27,7,19,28,36,27,18,22,37,15,1,10,95,60,23,27,25,27,14,30,21,24,20,24,19,35,38,20,22,20,42,39,109,98,26,41,26,16,21,8,18,14,39,27,21,27,13,38,30,22,36,36,14,20,19,34,36,14,22,29,31,21,45,1,43,179,188,192,201,214,203,225,204,161,131,138,96,100,82,53,93,142,150,142,177,161,118,157,136,156,148,157,169,159,146,145,143,136,128,115,121,154,132,131,127,122,137,119,166,131,160,127,116,143,146,143,157,142,125,154,137,134,143,152,150,163,172,193,169,163,200,189,178,158,160,171,168,161,176,187,206,175,174,117,2,1,4,17,23,13,22,17,13,15,28,28,33,9,22,8,24,15,36,18,8,33,9,26,244,240,252,251,241,246,217,206,118,129,182,247,243,150,181,188,184,224,195,214,235,242,233,212,225,237,197,226,194,217,199,192,170,133,109,120,113,92,95,64,79,70,99,65,58,43,2,31,10,7,39,24,58,25,47,60,31,45,33,50,81,55,55,34,24,32,33,32,56,74,30,29,26,53,21,68,42,61,97,143,198,240,219,130,187,175,200,220,187,169,204,226,230,145,18,86,178,223,240,216,203,205,152,83,31,68,74,85,53,87,133,142,93,81,122,136,109,63,29,19,24,60,61,54,33,41,75,47,70,37,61,75,33,16,5,14,20,26,31,28,25,37,41,49,59,48,61,54,76,80,49,62,131,136,92,90,75,114,147,166,180,180,204,228,203,222,213,240,245,236,251,239,233,212,212,207,213,192,199,183,184,175,175,175,170,150,146,163,181,144,155,156,148,136,167,151,168,177,147,146,150,131,137,146,177,162,168,171,164,153,161,165,194,198,171,107,111,104,57,38,68,145,186,198,190,184,181,173,203,199,181,190,172,136,100,64,23,38,76,99,131,199,167,197,182,209,217,201,178,191,161,153,170,131,170,173,150,157,180,180,179,158,156,201,169,141,104,103,82,107,99,115,102,103,80,92,110,105,79,99,101,173,115,109,97,102,116,103,89,72,77,70,76,89,79,96,64,85,44,42,52,53,76,53,40,26,32,64,133,93,55,52,44,46,3,18,10,26,48,47,68,94,108,80,103,108,97,98,78,91,80,74,86,70,50,70,69,65,58,85,73,58,59,106,69,104,55,52,93,92,59,98,81,57,21,12,19,23,35,36,19,38,35,53,36,58,59,54,53,24,69,38,42,62,26,43,43,71,64,46,58,41,45,35,45,42,44,40,51,67,25,58,66,50,23,57,74,47,36,28,25,48,27,41,37,32,39,37,42,38,43,38,51,34,16,36,34,51,46,35,53,38,48,38,13,42,83,102,74,65,19,12,15,28,21,20,34,40,19,26,42,34,6,31,51,11,31,11,45,29,23,25,25,29,23,37,2,11,36,32,18,20,30,38,34,29,21,32,48,61,36,35,25,37,41,39,13,41,21,5,18,38,7,12,55,83,81,33,27,27,21,11,25,29,30,37,35,32,22,39,5,20,36,15,29,18,32,62,35,8,29,21,20,49,39,18,32,32,35,35,54,73,58,28,25,41,44,19,21,28,28,19,36,26,23,17,17,5,16,10,21,32,22,31,5,54,15,12,31,30,15,18,9,12,11,19,16,20,42,27,37,7,34,30,87,42,25,41,15,17,26,5,23,16,49,35,13,7,30,30,14,32,28,40,90,117,86,39,18,12,22,20,49,10,18,48,26,10,27,26,41,17,23,20,22,46,33,45,14,33,45,30,28,31,61,27,139,192,241,250,212,187,174,202,190,185,197,164,122,98,65,26,12,45,69,94,170,192,213,228,226,175,159,179,162,182,149,162,170,170,143,178,148,148,165,156,159,155,145,151,145,169,144,135,159,134,102,117,123,129,154,134,122,114,114,129,152,121,130,135,133,150,156,142,155,153,175,159,149,166,192,167,175,159,145,98,23,4,0,13,7,8,25,1,10,13,8,1,21,12,37,6,33,9,60,31,5,12,22,27,233,230,247,235,238,242,242,205,161,196,180,198,208,148,180,205,187,198,184,160,151,122,163,125,131,131,97,90,69,97,77,85,124,112,111,105,109,120,117,105,144,128,127,139,159,122,62,25,1,31,24,83,91,101,90,97,102,78,96,75,65,59,70,55,39,31,28,43,97,73,94,77,45,77,26,35,19,36,50,152,217,224,242,250,233,244,243,228,238,249,248,244,241,185,20,25,149,177,161,119,103,94,69,34,29,50,156,182,168,153,119,84,50,20,13,10,3,13,27,18,34,48,32,56,24,38,45,53,53,37,80,37,32,49,36,46,62,47,79,68,89,112,139,127,163,176,202,218,245,208,95,107,229,209,216,234,240,219,242,225,238,225,245,225,221,203,227,198,199,194,175,171,173,192,193,174,172,179,187,178,202,197,204,172,152,154,169,174,156,152,186,189,168,164,164,151,170,200,166,178,153,165,150,170,157,161,180,143,193,175,196,187,181,152,78,31,50,92,165,141,180,202,234,226,197,190,178,156,101,124,112,64,46,36,70,90,135,165,160,178,205,208,226,190,175,183,175,177,167,169,163,177,180,188,156,151,144,160,166,172,170,154,163,179,161,145,103,121,92,79,62,33,5,56,68,36,55,57,47,106,124,141,162,130,126,135,121,122,101,105,100,101,66,101,97,101,103,88,112,78,74,76,70,84,64,78,49,54,64,59,37,30,47,38,26,19,42,47,71,80,105,119,101,67,95,91,110,79,74,85,80,56,86,94,77,91,79,72,59,61,65,66,94,66,63,60,60,80,60,105,62,42,18,34,19,6,5,25,26,60,53,49,55,37,50,41,84,57,37,50,69,47,33,74,53,51,38,44,50,63,49,29,68,54,50,25,26,49,60,25,33,37,45,20,51,30,36,71,31,53,36,34,38,43,26,33,43,61,70,37,34,29,44,62,56,55,66,32,25,43,46,29,44,24,30,34,37,95,99,71,14,46,18,16,34,34,19,24,57,40,23,25,28,5,35,36,16,27,22,57,35,20,31,16,33,18,23,26,26,13,38,38,31,37,29,22,18,46,52,20,24,31,27,48,28,31,38,44,29,30,3,20,20,8,2,85,94,44,28,17,40,20,24,24,33,58,24,26,30,11,12,34,57,52,27,37,29,19,30,28,27,25,21,42,35,46,16,13,38,56,21,41,68,48,15,15,37,23,14,45,40,35,26,39,18,6,35,8,10,38,20,22,24,6,22,39,35,9,13,16,28,21,18,20,33,13,45,39,24,28,30,27,44,72,79,51,57,2,29,30,7,12,45,31,21,29,25,37,13,22,51,22,30,32,49,92,104,88,58,43,21,28,26,26,2,20,14,39,15,27,35,24,27,3,24,33,20,32,10,39,32,22,15,17,22,62,184,222,208,161,145,88,104,152,191,175,202,200,146,149,87,7,20,14,10,44,97,207,253,233,245,227,212,250,244,217,225,209,216,213,224,233,250,220,220,232,233,212,204,234,195,217,234,208,204,206,142,201,185,154,184,174,169,155,172,168,139,162,155,136,147,141,170,142,148,160,160,151,147,161,150,149,169,142,111,103,11,3,4,13,24,38,20,8,21,6,23,27,11,0,25,1,16,17,26,30,21,20,20,10,197,183,201,188,195,177,161,144,137,113,118,120,114,103,138,123,120,133,129,119,129,91,111,95,113,83,120,38,3,22,64,102,153,128,165,151,136,173,197,187,190,192,203,192,202,190,118,129,60,19,44,115,142,160,151,151,114,145,133,139,112,111,124,129,108,56,44,89,110,202,187,186,158,109,81,68,58,78,100,171,241,247,224,229,215,235,255,255,246,232,212,224,171,100,3,13,68,70,43,35,52,9,38,55,20,102,169,236,184,117,54,61,30,23,19,13,5,21,28,30,92,101,74,44,47,61,69,67,75,66,78,69,117,120,163,142,178,192,204,215,216,230,225,223,217,220,238,235,227,156,86,180,218,240,214,209,226,253,234,212,202,183,181,190,182,169,167,180,180,165,172,184,189,170,166,153,162,176,159,167,203,193,157,190,178,170,145,159,158,169,177,167,173,171,157,166,159,163,167,152,174,149,187,170,166,156,175,168,182,177,123,79,102,57,101,113,137,189,210,231,221,203,205,177,163,108,62,53,39,9,78,79,100,136,137,156,165,198,197,197,181,188,212,149,171,200,187,182,172,143,158,190,185,164,166,181,150,176,141,157,164,171,164,170,170,131,112,117,88,84,72,8,7,22,48,18,37,29,46,81,149,168,151,110,142,156,141,131,119,97,93,93,101,106,107,78,112,108,101,111,109,95,53,82,96,73,60,60,33,6,32,35,6,34,52,75,61,95,93,81,92,59,112,92,97,91,96,82,68,88,78,71,100,67,98,75,88,26,36,58,67,56,81,93,63,59,94,102,78,98,45,28,31,24,8,21,39,31,53,59,77,69,28,66,52,58,60,53,65,39,70,51,49,41,79,71,51,19,65,23,45,39,26,50,37,52,50,48,46,56,57,39,39,27,42,32,41,35,17,21,34,28,54,35,41,37,37,33,65,36,39,67,43,42,28,50,57,46,43,53,27,42,19,30,12,37,27,38,94,111,82,27,22,28,19,17,26,29,11,23,25,25,41,25,28,39,50,44,20,41,53,41,12,11,17,39,42,36,19,29,21,20,24,21,10,8,29,23,47,26,22,52,32,39,15,15,26,26,36,27,11,41,28,24,37,34,118,64,41,42,3,29,52,28,31,50,23,27,8,49,18,9,41,45,34,22,23,50,30,9,23,37,36,40,16,44,24,14,44,40,29,33,42,41,29,46,12,47,16,13,40,39,41,39,29,26,14,19,20,16,44,40,28,7,22,32,25,33,5,34,18,44,25,22,23,19,22,12,24,17,34,10,22,66,94,22,19,19,7,8,25,22,17,42,22,41,20,49,16,17,4,31,28,17,30,29,49,116,93,56,43,23,2,16,26,30,21,23,18,12,17,17,7,14,24,27,38,20,13,19,26,26,21,35,33,67,186,213,218,180,154,124,130,167,169,161,173,195,177,200,146,119,111,115,56,42,109,195,252,244,244,252,252,226,249,249,247,255,250,255,250,246,235,251,254,226,222,254,252,255,252,252,239,253,247,252,253,248,224,254,219,235,237,226,252,234,222,255,216,211,214,190,188,187,171,185,181,164,175,138,156,154,161,140,135,114,15,8,22,0,12,21,12,6,3,9,16,17,4,20,21,19,38,5,16,8,3,32,1,23,80,84,44,59,58,55,72,92,107,138,138,155,154,130,135,134,138,146,171,130,157,156,166,148,161,142,152,67,13,34,102,140,193,187,199,166,166,168,167,173,130,144,151,161,127,106,122,117,25,41,46,75,131,147,132,131,155,133,166,134,151,165,161,204,198,70,23,60,157,205,229,211,193,140,103,89,112,131,151,171,215,208,195,144,139,150,180,166,142,106,68,94,49,21,29,60,113,138,103,109,111,127,75,20,15,64,150,131,74,75,33,88,99,92,79,76,105,134,49,19,130,186,124,89,62,97,101,127,56,21,38,110,151,246,227,229,255,230,228,220,203,215,177,181,192,192,186,199,184,74,73,154,187,208,181,176,160,159,174,138,144,163,161,159,138,178,162,138,160,192,186,183,155,188,148,155,175,160,166,160,158,155,177,150,176,164,170,158,178,174,159,139,135,141,140,141,142,129,156,165,148,189,167,159,198,159,155,108,109,60,38,75,55,157,188,184,211,204,203,189,161,119,87,48,9,53,32,100,113,145,122,160,149,136,161,151,160,164,173,161,191,193,158,148,153,171,155,128,137,129,151,157,139,170,170,186,156,153,160,158,147,162,164,166,166,159,129,104,64,60,112,51,23,39,44,60,59,46,81,94,113,129,115,132,107,118,117,100,66,83,87,104,82,105,102,93,127,89,110,115,102,93,68,83,80,85,51,23,9,42,39,47,51,51,73,108,99,75,76,85,88,63,94,81,102,75,91,48,79,76,90,91,51,86,89,67,65,51,32,88,87,80,88,87,79,73,64,77,41,32,32,17,16,21,21,37,42,50,76,80,59,48,37,22,44,66,34,44,34,52,16,46,45,41,52,28,30,32,53,74,29,55,41,52,53,64,47,24,43,58,37,35,41,26,52,28,67,20,43,52,51,55,45,54,62,29,37,43,53,47,23,20,38,47,48,48,38,30,33,21,18,47,17,30,26,34,6,15,51,49,96,77,34,28,22,32,23,24,6,32,42,9,24,14,17,18,25,52,52,0,36,31,19,44,37,15,40,16,43,33,19,13,31,8,47,26,45,35,38,37,44,31,48,17,49,16,44,38,37,27,33,28,34,18,20,36,74,100,27,42,19,21,34,37,19,31,36,30,27,16,21,49,17,39,14,22,20,5,37,38,20,36,25,19,31,14,26,11,29,38,29,17,21,42,31,33,7,42,17,47,37,31,32,33,36,39,39,12,3,9,10,33,38,28,16,38,58,18,23,28,21,19,14,32,10,6,30,13,13,30,8,31,39,87,67,30,23,11,0,29,29,26,30,17,11,52,22,34,27,7,40,16,5,24,38,20,49,85,149,115,61,30,0,36,13,25,37,30,16,20,32,10,34,23,23,27,44,33,26,25,26,19,20,54,27,130,202,230,174,207,232,175,174,171,175,148,199,182,164,200,196,175,189,150,126,143,169,176,155,147,166,189,225,252,250,247,246,255,250,250,239,243,231,252,248,252,242,242,251,244,253,252,253,243,248,255,247,251,238,254,253,237,249,247,255,255,253,255,255,253,255,252,231,229,228,228,232,246,228,200,201,187,199,138,135,77,6,0,0,9,29,21,8,30,3,22,12,12,11,6,11,5,20,1,29,36,11,12,15,4,74,44,49,28,21,16,80,112,143,157,215,245,200,182,163,169,160,184,171,176,171,185,165,161,179,160,163,65,24,18,51,116,126,137,99,103,62,90,45,80,87,66,85,94,45,78,83,61,17,19,79,120,180,150,173,181,193,214,205,186,201,222,223,210,236,132,20,82,143,237,234,237,220,114,81,84,92,161,140,114,108,122,100,85,21,63,82,104,135,137,165,161,177,49,28,108,193,201,200,172,164,151,115,45,30,63,93,81,48,66,47,107,83,116,179,238,229,244,111,36,114,165,109,42,73,161,135,136,108,22,25,117,215,188,244,211,208,215,190,170,168,168,155,162,154,177,162,157,143,32,80,165,142,173,140,113,112,121,122,95,172,142,133,167,174,188,146,150,142,141,160,151,134,157,150,162,151,133,152,159,133,172,136,130,156,167,175,212,197,187,189,157,164,155,133,144,166,162,119,188,187,172,203,174,190,128,27,14,59,103,139,181,207,233,224,155,131,61,53,11,22,25,44,89,102,148,150,183,169,200,165,207,170,146,188,207,185,193,179,191,133,155,161,145,165,156,159,152,159,160,176,168,166,160,148,156,156,172,173,149,150,141,173,160,167,142,152,98,79,107,102,100,51,65,72,69,80,124,94,95,103,65,77,63,91,59,92,96,74,91,104,103,122,120,100,108,114,122,116,110,87,89,59,42,28,7,47,40,37,50,45,40,75,89,98,97,66,73,63,67,83,87,51,89,90,36,54,74,92,97,55,94,56,60,72,59,70,73,91,90,73,54,81,86,80,51,60,35,14,25,3,31,16,54,55,36,66,69,30,54,60,40,54,48,66,41,49,56,28,48,28,45,36,40,44,50,44,44,37,61,44,38,32,48,63,43,36,51,53,57,16,48,46,49,45,55,36,42,76,55,52,53,41,61,45,9,51,47,22,48,31,13,50,46,28,41,41,34,38,23,52,55,57,36,35,34,42,45,47,32,91,117,74,19,34,5,43,15,19,46,28,12,29,18,35,18,16,54,36,13,39,27,25,25,43,23,24,31,21,12,30,29,25,21,30,37,7,13,44,26,20,30,29,35,10,28,29,16,22,21,11,20,26,45,27,34,94,105,50,27,15,27,54,20,29,20,11,20,22,21,46,39,27,29,13,17,9,20,30,24,32,40,24,15,28,34,38,26,36,18,46,37,29,41,45,43,31,26,20,22,60,24,35,36,44,29,35,14,49,13,28,8,23,29,42,8,10,19,24,5,24,11,32,29,9,14,21,16,32,21,21,4,62,67,42,47,39,8,32,26,32,19,23,27,8,24,14,36,26,24,41,26,38,10,40,33,38,22,88,120,107,73,35,30,28,34,23,28,35,15,22,30,16,12,34,11,42,26,10,42,31,37,37,33,101,124,139,132,99,189,187,176,164,143,173,136,171,186,184,201,183,174,197,208,213,242,243,168,107,0,51,83,134,172,200,205,182,172,227,247,254,253,236,255,248,251,255,249,236,255,249,250,255,237,246,255,250,255,255,249,245,248,244,244,250,255,251,247,241,254,254,247,250,245,240,242,253,227,236,242,232,240,219,202,194,105,3,6,5,17,3,15,10,17,14,8,12,12,10,0,23,12,22,27,18,8,12,5,15,8,142,144,70,29,40,33,80,176,185,202,241,234,234,159,167,166,144,181,146,149,117,176,137,107,99,112,74,27,13,31,62,119,112,89,101,81,67,70,92,96,87,120,113,112,112,133,153,135,21,19,84,174,207,197,243,215,217,230,227,224,227,209,189,187,203,67,1,46,95,153,133,114,75,50,58,43,77,114,80,50,31,19,34,26,14,56,132,212,255,251,247,240,229,144,24,101,187,185,145,120,115,104,51,25,20,77,147,160,137,122,150,147,73,125,223,252,248,217,75,28,108,80,68,33,116,207,179,181,129,50,12,122,200,205,218,187,194,185,171,175,189,179,164,171,196,179,185,155,113,45,124,149,156,163,174,163,154,145,181,170,175,160,138,163,171,172,142,168,161,156,143,141,179,182,160,158,162,164,178,151,144,176,183,177,172,189,196,246,230,186,156,155,182,245,197,218,189,237,236,203,185,206,146,105,93,124,160,148,179,201,215,247,174,123,101,65,49,33,61,121,138,142,173,217,249,232,197,215,211,197,177,197,247,208,192,183,231,218,178,155,138,187,221,214,194,174,214,218,201,167,204,206,170,189,178,215,199,188,189,218,216,201,170,159,161,166,122,130,92,100,96,85,54,98,97,97,100,112,95,95,88,91,93,79,52,91,117,71,82,77,92,82,116,106,117,113,103,129,114,85,52,33,25,13,44,34,36,37,37,85,57,122,83,66,97,83,71,83,88,91,68,61,89,113,82,87,96,110,103,86,72,44,57,83,51,80,46,67,102,56,88,80,95,81,35,29,31,23,47,2,30,34,41,50,40,69,33,32,69,40,36,45,43,33,70,81,40,50,34,56,38,58,58,76,56,43,66,49,90,39,52,26,40,46,40,47,51,35,29,72,27,35,45,53,37,38,31,51,32,34,34,41,48,42,37,48,49,36,44,66,47,79,34,19,41,23,47,43,24,20,24,38,37,37,18,9,34,11,10,41,59,64,116,120,48,11,21,16,19,27,0,21,18,24,25,24,27,67,21,27,29,2,24,26,34,5,14,20,2,37,7,3,48,30,47,17,44,20,15,31,33,25,33,12,50,40,26,41,0,18,48,38,33,29,22,51,63,71,46,49,16,17,11,10,5,17,44,34,39,7,57,34,20,14,33,23,31,46,29,15,32,49,36,28,38,50,6,19,33,31,25,15,45,41,32,36,51,47,27,36,16,27,39,34,31,16,50,27,37,43,25,13,13,20,40,51,46,23,28,16,10,26,2,22,37,25,25,31,9,21,8,22,53,99,71,34,14,42,30,7,20,34,26,21,19,25,28,22,7,18,10,22,3,28,28,13,14,14,47,50,121,107,83,22,3,31,7,24,42,8,55,21,34,23,36,12,15,27,21,13,41,28,39,91,101,32,61,74,55,131,125,109,124,104,127,127,163,174,202,197,200,177,204,231,239,250,253,202,146,8,18,48,45,97,140,161,133,139,169,236,237,230,198,198,235,237,227,228,235,246,255,252,228,248,252,239,255,249,239,244,242,237,251,250,245,253,247,226,236,251,246,250,241,255,255,239,225,254,211,249,236,219,227,241,211,101,28,7,14,4,8,11,5,28,17,12,7,7,13,3,28,29,5,21,26,10,28,8,30,14,168,192,128,57,37,60,101,149,151,145,193,217,158,87,100,93,100,61,75,80,43,61,66,68,69,66,56,31,23,69,116,125,142,169,156,178,187,198,204,202,212,199,208,209,203,221,214,161,81,36,97,207,220,224,217,231,218,213,206,206,126,152,103,99,57,22,33,29,21,23,13,37,18,25,17,90,77,85,86,73,54,38,36,72,105,142,189,236,238,245,236,244,219,83,18,33,97,87,76,55,50,30,38,31,15,45,142,205,184,199,212,168,90,61,148,164,159,101,2,24,84,73,63,26,131,236,165,170,118,16,6,104,164,179,185,164,123,109,144,189,181,233,253,198,175,186,222,223,111,95,227,245,225,180,181,205,222,186,165,188,228,237,184,188,211,232,187,165,194,251,244,154,176,242,247,198,187,213,242,233,162,208,242,247,175,145,158,197,124,104,128,133,251,245,195,163,246,244,234,147,137,91,41,50,112,243,236,175,133,133,85,32,16,72,187,217,150,128,240,220,223,190,235,244,254,192,179,227,248,171,168,235,234,201,147,173,217,179,84,118,173,245,239,222,166,205,219,221,145,170,248,237,150,161,228,251,233,152,201,248,252,201,152,161,162,133,128,98,87,113,87,47,43,108,102,119,112,109,130,117,106,98,127,114,108,81,131,98,97,112,77,111,102,114,136,123,107,70,13,23,40,8,19,59,61,52,85,92,84,79,88,70,57,86,105,63,72,72,70,84,66,58,88,91,86,73,114,57,83,52,42,74,55,70,80,69,116,78,127,124,102,75,54,39,29,22,23,8,30,17,52,80,42,44,61,81,65,53,52,63,47,47,50,27,47,69,52,42,23,46,45,41,36,63,75,57,40,49,24,40,48,60,39,42,57,30,19,21,29,42,30,30,50,53,41,43,42,45,23,45,34,40,52,63,47,46,39,30,25,30,43,33,52,54,31,46,63,35,42,30,35,54,53,30,10,25,22,39,21,35,23,0,85,126,85,64,35,6,38,10,21,18,29,24,50,15,15,19,31,10,18,43,38,35,24,32,39,25,26,38,42,41,29,22,14,37,31,19,33,24,36,10,30,33,42,23,33,25,20,12,27,46,20,55,19,8,30,91,92,33,19,20,20,24,29,18,15,39,37,30,28,14,14,26,33,26,11,29,27,12,39,24,34,38,22,29,19,24,32,29,34,55,31,38,29,41,42,56,33,51,25,45,24,45,18,26,14,28,8,26,7,18,22,27,35,8,18,26,38,26,0,5,7,16,35,20,21,20,15,20,39,14,54,85,44,27,9,12,25,21,29,29,17,18,17,30,26,36,29,24,33,25,32,11,27,20,17,42,27,24,88,103,97,52,40,11,19,11,36,28,27,16,17,13,23,27,42,24,16,23,48,47,91,138,87,38,87,62,52,86,121,136,133,102,137,172,153,181,207,208,194,175,183,202,208,203,243,243,206,121,100,68,46,21,47,70,127,167,154,147,144,147,133,148,139,144,172,219,242,217,151,201,191,195,199,206,228,234,232,244,243,252,229,255,255,245,253,243,248,226,255,243,235,251,241,211,222,233,246,255,241,241,254,232,255,115,2,16,15,9,29,7,19,22,16,28,14,20,12,0,0,2,11,7,16,8,8,26,19,18,146,134,124,26,47,45,73,113,118,115,112,140,75,49,66,58,91,105,87,109,110,119,135,133,152,162,170,39,15,78,151,215,249,249,245,248,243,245,248,247,239,229,244,243,228,217,236,155,57,20,66,109,201,179,159,140,121,83,73,59,50,65,57,61,63,32,42,28,48,40,51,25,69,79,108,101,51,38,50,100,83,57,92,140,202,237,197,190,228,179,178,153,128,19,11,84,59,75,93,91,97,77,36,61,31,73,124,156,131,123,177,171,28,25,83,89,78,63,20,29,124,150,115,63,121,202,214,211,143,28,12,105,154,201,158,82,69,75,95,128,142,226,245,176,161,200,241,210,86,149,236,250,202,167,220,234,181,150,159,212,252,206,147,174,191,205,183,135,183,200,212,137,181,245,241,181,151,223,246,228,160,167,236,240,133,116,107,101,85,116,146,165,202,228,149,153,165,188,77,44,84,83,22,57,93,126,98,45,36,90,118,99,103,146,230,242,184,182,241,251,194,148,179,230,218,176,130,219,215,180,182,197,186,157,144,155,199,99,111,136,149,211,236,187,125,172,200,167,136,140,184,176,141,168,207,217,158,144,184,216,204,160,182,132,148,164,141,132,126,131,127,68,61,46,70,62,111,91,94,118,117,86,114,115,100,105,107,112,92,124,128,93,123,107,92,64,42,34,31,26,51,40,57,93,89,105,88,91,72,76,70,92,87,88,89,114,82,89,67,88,85,85,83,81,74,86,79,79,46,80,56,40,70,70,60,73,94,112,108,72,56,27,29,35,40,50,19,37,72,46,59,63,48,46,37,43,56,18,34,39,54,40,44,55,70,45,43,41,31,43,45,32,52,53,50,53,55,60,60,50,53,51,53,38,18,42,41,30,63,38,57,34,59,51,43,57,44,54,54,37,29,49,31,39,21,18,61,20,63,38,14,20,33,33,39,40,40,39,67,36,13,34,7,37,15,43,40,23,97,20,39,22,29,94,103,95,80,19,14,12,17,38,44,30,36,30,18,33,31,40,33,24,19,13,16,33,28,10,22,41,47,26,32,45,21,51,32,28,15,0,38,26,37,32,5,42,31,26,21,10,35,52,25,41,11,16,46,90,78,42,35,33,30,15,46,48,21,24,33,57,36,29,33,12,21,19,18,15,14,27,16,39,38,40,35,41,23,7,40,35,44,46,24,37,41,37,18,35,20,39,37,34,26,21,36,24,34,24,24,14,11,30,30,18,25,29,20,24,26,25,20,18,34,20,7,8,10,35,5,17,13,22,54,69,35,9,11,7,8,45,34,47,7,24,47,8,39,27,19,34,40,13,27,24,24,5,23,17,20,58,39,79,70,94,51,37,3,20,13,35,39,12,32,38,20,12,46,13,0,19,20,84,164,169,116,66,159,124,103,125,139,181,203,161,213,206,195,203,208,196,180,167,165,203,209,242,223,242,220,147,217,180,92,34,1,26,101,138,170,136,143,172,149,128,139,105,115,180,224,126,57,56,101,108,135,121,157,215,245,214,189,167,172,230,208,221,188,209,253,243,255,227,254,247,235,213,224,240,242,255,248,253,244,248,224,108,1,11,14,14,20,0,23,0,13,14,29,42,15,9,6,4,5,11,21,21,23,10,10,11,113,89,28,6,63,60,91,130,125,132,120,138,128,127,168,157,167,183,194,213,215,208,205,194,228,237,203,101,22,88,155,207,214,233,227,216,233,213,215,206,198,160,158,164,153,131,127,52,16,15,31,31,11,20,26,10,12,21,16,43,44,101,139,158,166,36,33,118,170,212,223,230,196,174,94,46,90,67,87,88,27,38,56,103,127,100,79,88,70,108,113,117,120,32,36,126,185,174,148,162,154,111,112,99,127,119,94,85,55,81,85,77,28,56,150,178,210,201,44,38,120,105,88,46,43,165,129,202,218,44,1,78,156,212,178,154,126,148,135,130,159,157,166,157,145,181,221,120,68,81,189,165,159,153,158,157,152,133,148,177,206,170,141,153,126,153,102,151,155,142,94,124,125,130,175,135,163,147,163,134,162,161,183,176,125,167,173,168,174,189,223,193,227,209,127,103,76,61,20,38,76,69,42,32,30,50,82,96,167,162,199,176,199,171,213,188,174,160,187,161,138,138,152,162,153,157,144,179,174,152,167,156,140,130,159,159,174,160,168,199,136,155,160,136,150,168,163,172,172,156,172,157,152,157,168,180,161,163,152,162,128,156,169,129,159,149,153,151,160,147,123,119,93,71,62,84,94,102,97,107,114,115,94,102,124,99,106,127,97,103,85,80,53,4,12,7,23,35,45,81,102,124,106,89,89,100,91,83,75,90,96,78,58,84,110,73,81,76,72,100,102,75,81,62,66,80,50,68,49,101,68,54,70,77,77,94,124,80,76,37,20,36,22,18,24,39,26,75,58,54,47,52,34,76,45,55,34,45,82,48,57,61,55,14,50,52,24,48,61,67,22,51,45,43,50,24,35,54,45,49,57,28,17,26,41,27,39,45,45,32,31,22,30,27,55,48,49,38,27,30,29,47,16,36,38,47,19,28,35,44,2,39,41,30,50,22,23,60,37,18,9,43,26,42,23,32,20,24,45,26,9,10,17,12,85,97,85,58,15,10,24,32,40,16,6,27,23,26,5,41,29,9,14,12,32,11,17,15,38,6,6,22,12,2,31,22,24,23,42,33,26,10,35,19,14,40,49,25,24,8,15,17,10,32,23,45,53,31,102,75,34,39,21,19,30,6,40,22,19,30,23,51,41,29,24,26,14,30,40,8,48,19,21,37,32,25,45,13,59,36,46,48,28,58,34,35,23,56,39,49,15,16,27,32,22,32,7,22,3,13,6,40,26,30,7,11,22,25,45,32,31,32,27,18,37,24,50,14,25,25,27,28,79,25,10,14,18,10,36,35,31,39,9,5,22,16,8,11,44,46,17,32,10,46,11,11,8,9,17,26,9,37,77,100,102,52,24,38,10,41,20,52,11,37,29,4,17,28,35,38,68,121,140,125,111,111,126,129,112,119,164,186,174,156,190,179,175,193,201,202,184,195,200,201,179,178,161,185,168,161,195,145,131,122,58,18,48,51,91,123,159,154,170,174,169,168,149,179,252,148,26,96,139,134,130,135,145,178,209,174,124,82,86,142,150,157,98,159,245,232,247,240,235,252,231,226,193,215,238,231,240,236,235,191,182,108,0,27,1,0,10,8,12,23,3,21,1,12,4,11,22,6,0,22,19,14,15,9,42,11,74,96,38,23,70,76,160,201,193,203,221,202,219,205,205,198,215,242,226,224,222,236,233,208,214,220,188,99,20,42,82,164,174,187,140,110,131,98,88,77,92,69,70,74,24,22,17,29,27,36,59,40,42,48,52,27,16,11,44,67,73,71,67,111,79,49,61,103,220,239,236,245,231,127,36,31,83,91,96,41,20,9,36,75,48,33,11,53,88,132,183,181,165,32,25,83,125,132,101,89,84,68,24,60,159,142,144,162,114,123,217,152,21,131,229,253,243,232,72,29,91,123,33,39,117,172,151,168,169,48,18,29,103,146,232,224,231,190,209,178,162,161,148,145,178,174,146,58,37,131,168,175,169,176,166,155,168,178,165,169,161,159,175,160,157,132,162,195,138,167,150,131,155,146,137,181,148,145,167,149,144,190,161,169,150,182,209,222,228,214,174,178,126,56,24,26,22,38,13,59,70,62,115,131,162,177,171,196,207,225,200,199,174,180,176,156,139,151,179,169,159,165,176,169,175,155,132,144,131,137,120,145,162,139,162,176,192,183,179,167,148,172,147,141,162,146,176,156,181,136,189,149,147,143,183,163,172,169,164,143,153,157,163,152,142,155,155,135,128,140,133,117,162,115,97,102,113,92,100,54,80,90,79,70,78,66,65,55,41,51,17,25,23,23,56,77,54,109,127,149,146,112,92,99,99,78,84,61,91,108,116,91,101,97,78,73,79,75,87,89,81,66,62,40,64,56,46,66,71,68,66,87,109,96,96,92,78,24,40,26,38,13,42,43,62,62,56,60,67,80,67,55,45,37,34,69,54,51,33,49,32,39,72,42,45,48,37,47,41,50,50,17,18,38,51,41,38,43,42,48,26,36,52,45,40,49,59,57,55,43,47,52,44,16,29,40,48,35,57,40,33,49,47,28,8,27,16,65,16,53,38,19,25,60,42,26,18,65,20,50,33,19,30,40,37,35,18,13,7,19,10,9,28,32,15,76,103,123,37,33,27,33,60,17,51,22,2,5,36,40,21,28,17,23,30,20,29,16,16,19,65,50,10,35,35,10,37,36,42,28,30,19,27,30,22,29,34,27,18,34,24,37,16,30,17,43,35,51,100,89,54,11,22,6,20,36,20,30,33,22,23,46,9,44,16,23,14,36,15,1,15,29,42,17,36,19,30,12,11,12,50,29,59,11,33,42,54,22,43,38,24,27,29,26,25,21,39,24,8,13,29,34,36,24,51,12,25,24,15,22,40,11,11,38,26,26,34,32,1,17,13,76,81,28,25,24,35,41,33,9,11,10,29,33,34,27,3,18,42,12,24,50,17,33,14,29,12,23,21,22,24,18,8,73,98,92,82,25,28,47,21,7,16,23,11,32,40,36,7,38,92,78,61,42,44,77,63,53,65,78,89,90,70,76,111,129,141,121,135,145,110,133,120,89,126,144,139,156,146,147,171,153,149,181,120,118,111,33,25,58,102,138,148,169,181,179,168,207,235,173,54,99,148,176,160,173,164,194,230,181,103,37,126,162,136,151,90,148,247,223,253,232,216,246,242,205,212,211,225,207,199,173,167,142,112,96,11,1,19,19,16,4,19,40,8,24,11,21,30,24,21,1,7,6,12,10,17,5,1,6,172,176,87,25,58,77,163,223,218,247,234,235,201,195,184,188,171,185,181,182,132,156,143,157,146,128,120,44,13,24,51,105,56,90,52,32,31,30,54,79,56,79,76,54,76,61,40,13,35,32,87,114,124,92,95,115,115,92,105,100,80,38,67,25,18,5,22,72,115,136,110,111,95,135,91,60,56,95,155,103,19,29,92,141,138,121,115,106,120,147,128,113,52,22,24,27,54,45,22,48,29,28,13,41,132,152,161,220,193,232,240,246,89,100,192,238,225,148,37,28,98,99,72,62,151,233,203,196,137,10,16,53,16,58,120,209,211,194,183,180,173,176,178,170,174,173,139,18,74,184,173,164,166,152,154,174,166,172,156,181,169,128,164,147,158,158,154,207,182,165,165,185,168,182,151,165,156,197,167,172,183,158,173,194,200,196,199,221,145,95,47,3,15,26,33,6,40,89,134,141,182,189,217,200,214,194,183,187,183,181,193,175,150,162,158,149,189,187,170,162,180,157,168,161,165,171,159,142,162,136,131,159,133,152,152,179,153,173,171,171,162,183,163,151,157,138,153,149,154,173,131,120,130,134,124,131,135,168,146,157,159,169,144,135,124,123,136,126,106,134,102,127,119,147,135,147,123,111,92,105,65,74,81,50,108,48,46,26,30,45,59,75,58,94,109,120,136,135,139,111,128,87,89,96,86,70,98,82,98,80,47,89,89,105,77,94,78,101,83,60,63,69,60,74,73,91,65,74,65,96,74,90,108,93,46,35,6,31,19,7,18,42,38,59,58,52,59,71,59,52,48,67,38,70,62,50,52,26,28,51,39,34,35,41,44,43,41,60,63,43,54,60,40,47,20,71,79,52,53,58,50,37,25,43,61,49,33,67,30,30,20,28,43,51,33,23,35,26,29,53,29,39,16,36,32,26,27,36,59,8,46,30,45,18,35,37,19,35,43,39,39,34,26,60,43,17,21,10,18,14,14,22,20,13,28,43,52,78,116,107,68,15,27,12,21,27,34,41,42,20,20,18,30,42,20,11,25,10,34,34,29,12,47,53,13,7,27,42,18,22,36,24,5,6,18,24,29,23,5,54,24,18,28,19,33,37,33,38,60,86,75,15,57,20,20,35,25,38,14,34,33,29,26,33,46,44,14,21,52,32,28,14,37,24,43,43,22,23,27,19,66,48,44,24,19,45,44,36,53,46,29,24,19,38,14,9,24,22,19,10,23,31,4,19,10,27,17,24,40,20,15,23,39,39,17,10,43,23,39,23,11,82,60,7,30,12,15,16,38,21,25,27,31,22,14,25,29,10,34,20,13,38,31,45,26,47,38,36,11,8,20,25,28,35,39,104,92,66,45,20,32,30,7,50,50,10,13,17,63,106,69,67,56,45,57,55,79,52,56,64,75,47,50,50,40,72,42,46,38,64,43,44,79,131,165,191,210,222,221,201,237,189,220,220,124,200,188,93,77,64,91,115,125,170,217,210,211,243,249,179,82,123,159,164,156,161,169,197,219,184,121,60,145,177,181,172,108,154,248,232,236,220,247,243,230,229,184,181,178,192,160,141,154,132,144,101,11,24,29,1,23,3,25,6,1,9,38,28,22,10,8,10,23,7,19,7,15,5,19,20,203,194,118,36,61,80,155,168,194,244,233,220,189,158,123,131,121,111,75,63,69,83,51,44,66,42,18,17,39,44,84,113,105,113,130,135,127,133,132,148,156,152,177,157,160,157,153,111,42,56,128,197,229,191,200,184,161,146,125,100,96,103,74,39,9,29,24,91,129,138,118,139,156,174,193,112,132,172,182,178,40,38,132,182,159,144,124,89,122,119,90,38,21,7,31,76,60,96,87,100,116,159,118,65,181,164,133,157,165,159,223,150,40,31,105,72,44,25,0,32,71,89,62,27,122,231,247,219,160,32,8,122,137,34,37,70,136,191,184,195,194,175,213,190,168,140,31,60,162,198,198,175,169,161,198,203,162,184,197,172,181,183,194,168,164,187,170,162,154,151,192,172,140,173,164,174,172,165,190,174,174,208,225,231,182,126,83,69,40,3,12,27,29,81,122,187,210,207,191,212,199,218,179,192,167,187,186,145,154,155,189,193,184,169,171,175,164,161,182,164,165,191,164,164,164,202,150,194,188,168,185,160,177,179,169,161,148,147,162,157,177,157,144,167,169,151,146,163,187,152,165,158,181,160,158,150,135,136,167,143,144,182,181,142,123,89,128,154,141,122,110,119,90,93,85,83,117,129,140,119,145,111,101,105,124,98,107,98,102,114,110,116,127,150,111,148,145,112,115,104,102,117,106,60,78,76,77,79,89,63,63,89,100,77,71,62,61,54,73,77,73,76,80,67,83,97,67,71,60,81,85,82,87,36,6,13,17,8,45,33,41,59,47,51,72,55,54,62,56,42,40,24,61,53,56,57,55,44,37,54,50,37,57,22,50,55,41,39,50,42,67,49,51,38,51,41,33,40,38,24,49,41,35,64,52,67,60,50,38,28,42,30,16,34,18,46,37,31,5,38,19,44,50,35,48,41,31,46,61,57,37,35,33,35,36,30,26,20,36,51,17,34,32,10,42,31,55,22,45,30,12,45,23,34,39,38,34,32,68,135,68,53,17,42,11,9,63,28,27,24,27,18,13,12,41,31,27,8,19,30,25,49,36,28,32,33,36,29,42,30,42,43,26,43,43,40,30,45,24,35,40,31,22,37,7,15,36,37,31,106,77,67,11,19,22,16,35,35,30,43,48,27,14,34,22,48,42,27,10,7,13,24,31,54,24,28,39,31,13,57,50,13,30,33,30,22,22,41,27,37,28,30,10,45,41,41,36,5,29,13,13,28,22,33,22,14,30,25,18,13,36,40,25,32,26,45,42,45,10,2,28,89,71,33,10,10,17,19,33,33,27,22,31,28,29,32,23,19,17,23,18,33,12,30,34,24,25,14,23,27,31,21,39,25,29,56,155,98,80,52,49,13,29,1,37,32,8,73,94,95,62,16,56,59,43,64,48,22,37,42,71,73,46,79,58,64,39,35,39,10,28,69,193,225,234,238,254,251,252,251,252,248,230,189,96,145,188,124,82,79,54,51,56,109,185,205,175,176,247,134,47,89,99,124,134,147,157,183,212,141,85,86,162,152,183,147,106,155,248,242,243,249,218,237,254,236,202,173,198,164,158,130,136,131,134,130,7,1,17,6,18,5,16,12,14,7,14,2,8,4,12,13,34,34,6,14,4,25,4,25,169,186,74,13,33,100,126,147,145,167,167,153,77,66,66,43,76,50,58,70,61,62,73,87,100,112,73,16,26,70,118,194,197,203,212,209,208,201,190,218,190,185,188,184,207,193,220,121,61,21,74,167,177,140,125,122,138,139,116,110,95,138,144,184,107,24,56,134,185,224,235,233,234,220,174,123,127,188,228,147,48,23,91,100,122,104,75,65,78,111,56,51,37,12,54,137,180,213,201,212,220,249,157,90,206,194,171,149,65,25,38,28,12,18,28,48,25,26,4,50,17,86,30,38,107,148,181,230,198,58,25,116,212,161,47,37,46,125,131,199,188,197,163,162,70,16,56,137,194,183,185,167,175,168,185,169,157,167,148,192,180,163,157,135,196,169,139,174,164,187,137,164,166,166,191,184,197,215,222,198,209,163,148,108,44,4,19,21,83,105,134,188,199,213,225,214,209,212,190,176,200,192,171,173,164,190,157,133,172,175,166,186,187,158,160,184,178,160,173,189,166,180,158,166,164,166,176,159,186,177,165,196,167,165,176,181,191,165,192,152,158,150,151,135,163,124,143,135,143,161,199,193,191,166,163,191,181,187,152,156,173,176,190,122,109,111,120,145,135,133,98,110,121,103,96,106,118,134,124,138,127,127,117,125,129,119,113,124,133,122,108,112,140,125,116,109,113,97,119,65,68,104,63,88,69,87,78,67,55,72,74,76,56,76,67,45,49,75,61,75,75,78,86,63,93,93,73,70,80,82,56,36,27,29,25,18,41,40,54,61,72,63,57,52,64,71,69,68,50,57,39,59,27,81,52,36,35,48,48,56,51,45,36,32,55,41,72,45,53,36,12,38,48,41,18,53,39,44,34,40,43,49,27,43,39,59,32,37,35,34,40,50,25,6,43,33,31,39,58,39,44,50,45,33,14,32,44,37,57,32,60,27,53,69,25,23,60,34,44,38,42,64,38,36,57,38,37,39,10,15,23,8,2,19,7,29,13,23,41,80,124,77,51,34,8,33,25,5,23,21,11,46,13,15,49,16,15,40,53,17,14,26,13,16,45,12,26,51,23,60,25,28,19,21,35,31,30,21,38,43,7,14,32,30,38,32,25,20,20,78,94,57,29,41,24,36,14,26,25,13,5,20,26,45,51,18,32,29,13,13,31,25,19,32,40,13,17,4,44,31,25,14,44,32,24,29,35,27,15,46,52,42,23,35,29,19,25,25,16,30,40,34,32,10,1,30,6,17,32,29,37,25,35,14,34,33,28,10,44,32,33,93,41,4,32,13,17,32,6,11,4,19,34,7,31,24,23,26,27,28,14,17,26,27,2,5,15,6,27,25,38,26,18,16,41,42,82,103,125,47,51,20,22,17,24,35,56,107,58,76,39,34,61,58,42,64,75,45,51,53,38,59,55,39,82,66,71,17,37,40,131,216,222,235,213,226,233,241,251,242,251,247,244,192,92,99,104,113,109,118,12,19,0,22,46,68,62,116,197,96,33,80,62,100,95,104,113,156,215,142,61,96,149,150,192,169,89,172,244,251,232,212,226,245,249,238,239,184,169,181,160,156,154,160,163,114,1,0,20,22,36,21,18,19,23,2,11,11,10,19,6,20,15,5,0,11,23,24,29,10,116,105,26,28,27,61,87,83,114,77,91,62,93,89,91,96,114,134,124,151,160,189,184,175,197,193,145,55,29,94,160,229,225,220,218,183,223,190,209,186,148,135,142,89,137,103,101,33,17,41,57,114,105,98,112,110,131,133,166,166,167,211,200,227,129,33,41,149,204,243,243,213,139,69,77,44,92,122,138,84,6,19,50,49,45,41,48,52,82,134,129,178,90,2,102,177,226,235,238,189,213,210,134,84,198,194,210,213,132,82,82,33,1,92,143,167,193,122,18,86,71,60,48,49,123,163,141,151,169,50,26,122,197,217,183,102,17,13,20,74,78,79,61,33,37,90,155,173,205,152,164,173,175,141,174,168,160,173,153,170,179,172,167,171,194,176,173,174,183,190,194,187,199,210,230,233,194,196,134,96,63,91,2,48,51,98,143,190,186,228,223,255,197,200,193,184,163,175,163,144,158,185,176,175,182,161,194,166,138,163,165,148,151,184,175,169,186,171,180,161,156,135,168,177,158,143,177,139,147,156,179,138,159,149,175,136,164,149,152,197,188,155,159,162,132,144,163,144,170,183,199,178,171,162,195,181,208,184,168,176,136,155,159,154,138,89,91,105,128,113,110,120,112,119,99,117,112,115,123,109,105,107,123,134,134,117,101,127,108,113,108,116,117,118,92,114,116,83,106,87,86,91,68,72,52,88,87,92,80,92,63,95,73,53,47,59,69,78,49,68,69,78,77,80,68,87,109,99,37,30,43,21,16,21,31,45,44,64,65,37,61,57,55,48,57,54,49,48,48,54,51,48,47,64,49,46,64,54,51,66,26,41,53,18,59,22,32,33,52,31,48,58,40,75,43,40,32,48,28,35,83,44,36,20,46,11,30,26,45,10,51,72,40,40,44,46,16,50,29,28,20,13,40,45,64,57,48,74,48,46,31,15,32,25,33,48,30,30,23,22,28,24,15,43,47,45,25,24,33,32,22,40,19,12,18,19,37,50,55,51,98,153,118,47,43,13,16,17,37,11,45,42,44,37,45,33,66,53,6,31,36,47,35,28,52,23,21,46,34,6,13,14,5,32,28,25,28,25,35,8,26,43,40,35,61,6,34,40,20,47,87,87,56,23,23,23,22,20,29,13,14,11,39,30,9,27,15,12,35,44,15,34,42,21,9,34,36,42,14,40,37,21,29,32,71,37,45,21,55,25,20,40,34,55,36,15,22,7,39,24,26,8,28,23,15,28,7,17,16,3,18,40,40,21,17,24,7,38,50,12,65,74,21,41,26,0,31,18,29,41,13,48,28,19,31,33,1,43,13,57,38,14,27,57,15,17,12,26,30,26,28,32,24,61,25,33,33,55,119,89,75,50,18,28,39,77,103,82,45,47,39,52,54,24,55,68,43,39,40,45,60,60,67,44,62,57,17,45,32,45,136,180,189,215,221,202,188,196,196,210,247,239,240,223,136,105,88,121,160,147,99,12,9,26,7,33,2,53,170,84,45,84,69,96,88,86,61,146,168,134,56,84,117,165,151,141,88,174,226,245,251,236,252,245,241,248,252,200,173,183,177,186,178,156,191,113,0,10,2,51,8,13,11,33,13,2,22,23,19,4,29,11,19,48,22,9,8,31,13,17,55,15,20,31,47,107,107,150,141,133,181,166,163,176,176,190,226,226,223,242,231,225,224,220,216,226,182,62,22,75,133,160,169,159,150,134,120,108,89,100,84,42,66,63,69,54,61,20,8,64,120,173,155,176,192,206,195,205,198,209,201,211,219,190,155,28,48,100,187,197,169,64,27,62,39,54,75,100,52,16,17,41,71,106,84,91,116,128,146,189,195,242,116,21,74,147,172,170,147,127,84,59,3,62,153,164,204,207,172,157,173,86,64,148,219,213,232,110,42,48,102,99,55,46,178,224,187,183,118,3,10,108,168,201,173,175,152,122,46,37,65,48,99,107,137,159,192,179,171,167,157,168,149,170,131,169,165,129,148,154,183,174,172,168,187,175,172,175,200,197,203,218,191,187,127,105,82,9,10,24,44,58,78,170,204,188,226,213,203,191,160,188,188,192,183,157,171,146,154,181,150,156,169,165,192,166,145,151,142,183,178,149,166,178,175,162,186,165,184,149,135,138,141,147,148,167,183,145,151,155,162,158,164,186,148,120,116,144,161,161,155,186,152,167,164,192,176,190,168,137,142,167,190,171,169,161,187,175,191,156,158,185,192,170,116,111,86,93,106,89,80,59,111,96,98,96,98,125,119,121,126,100,128,112,122,127,119,126,111,149,103,107,121,108,102,116,107,88,103,111,66,44,82,69,76,84,77,95,74,69,52,62,62,52,51,62,77,51,66,97,86,75,76,91,98,73,36,52,54,35,27,23,32,26,20,60,72,66,49,46,66,47,61,49,47,48,50,67,39,68,55,54,61,41,30,52,46,44,55,44,47,41,32,31,42,37,43,56,36,35,29,50,34,52,31,35,28,55,78,21,37,37,25,51,27,25,53,36,32,46,18,36,10,34,32,31,16,59,34,52,29,30,59,49,29,36,54,26,24,52,42,20,27,20,81,53,17,23,26,19,32,47,36,16,22,25,18,35,24,4,22,14,46,9,38,32,24,16,53,39,26,46,126,95,82,51,26,14,20,29,35,10,31,42,25,25,40,48,32,42,32,10,42,50,48,48,39,23,17,52,21,25,16,46,11,21,36,22,42,49,53,30,23,41,30,35,37,26,35,16,91,97,46,35,15,20,29,19,13,37,20,11,64,16,19,28,35,23,56,30,44,21,7,24,43,38,15,18,31,47,48,15,47,32,30,34,8,36,60,38,18,29,47,27,33,31,17,24,32,31,28,18,36,6,43,19,20,21,7,28,31,32,27,28,27,44,37,9,22,26,77,60,22,13,32,2,33,20,6,27,19,43,18,21,18,12,35,34,39,42,3,8,28,15,45,22,30,19,48,30,29,20,22,34,14,10,21,26,45,107,115,54,59,29,69,99,82,33,45,63,45,31,55,62,57,43,45,9,27,38,38,64,84,60,61,65,57,28,37,50,102,137,175,219,195,213,193,200,175,216,201,212,202,172,148,123,104,94,195,220,214,204,193,144,110,67,74,139,179,66,36,85,94,126,88,68,66,142,206,147,45,41,78,103,105,111,67,165,225,247,254,246,245,237,230,239,236,234,194,183,185,160,194,184,197,106,10,9,38,19,15,10,29,6,31,1,30,7,3,11,15,11,9,17,31,31,4,9,13,4,143,103,8,64,74,114,188,196,192,199,226,206,215,203,208,193,218,224,205,180,169,192,177,164,127,146,84,13,27,48,105,90,99,60,81,39,82,94,65,75,74,95,88,144,120,180,137,45,20,61,141,198,184,217,207,209,195,204,195,194,130,138,125,115,53,20,64,64,115,83,117,77,66,120,131,124,120,150,141,87,0,54,118,159,175,104,120,125,149,194,215,212,81,6,48,112,120,92,60,73,68,72,15,77,147,139,166,187,164,156,228,85,42,101,171,114,90,56,16,28,13,49,5,37,174,191,251,238,135,30,31,120,185,207,183,181,182,183,175,156,123,168,178,164,211,204,184,185,147,149,188,137,142,157,158,178,154,153,167,174,156,196,220,192,198,195,166,161,153,110,97,65,23,17,5,10,8,6,26,95,167,178,206,206,207,201,201,184,189,161,169,161,149,157,156,166,152,151,160,154,169,176,161,163,156,138,147,138,154,165,166,174,161,187,150,152,166,157,173,189,155,153,154,152,156,166,170,181,167,179,170,166,165,170,174,152,155,182,173,175,174,190,185,197,173,166,186,187,165,164,149,150,162,181,169,211,154,156,138,177,188,167,174,172,181,92,76,89,73,103,104,73,90,74,87,124,143,121,103,146,97,126,101,125,129,107,87,102,123,97,92,136,100,96,99,111,97,119,80,90,96,76,75,68,79,72,82,91,89,63,55,65,79,83,62,58,69,71,67,75,101,74,95,65,45,59,5,18,29,30,42,46,33,60,60,57,45,48,42,45,31,63,42,52,63,65,42,60,61,82,44,56,73,8,69,40,48,64,58,58,56,57,20,50,44,42,54,30,71,59,54,54,40,36,49,39,48,31,14,18,51,55,42,46,47,47,44,38,23,58,42,27,28,46,43,44,25,42,37,27,34,29,16,39,27,39,25,11,29,31,26,41,75,33,59,41,47,24,53,29,45,60,9,45,19,53,15,40,50,16,29,27,35,27,19,34,42,12,21,29,25,7,19,69,96,81,63,43,26,31,35,17,54,45,22,36,30,34,44,16,31,18,12,37,20,35,5,30,33,13,46,42,13,14,11,33,41,35,17,11,25,47,31,8,12,24,39,12,44,22,31,82,77,18,9,16,42,45,9,46,42,24,34,22,24,19,22,29,29,35,16,31,26,26,49,53,31,20,34,42,37,37,20,49,62,1,28,43,25,45,40,25,5,31,40,23,15,5,43,28,50,21,6,33,8,28,22,54,14,15,12,16,4,22,50,41,30,19,17,49,91,71,18,34,14,10,32,9,32,32,35,40,3,20,25,12,24,17,19,3,25,21,13,22,23,17,6,15,19,42,17,35,30,10,11,26,16,16,35,71,95,115,71,98,128,84,19,63,47,42,45,44,46,55,43,83,28,33,39,37,43,68,43,48,60,67,40,59,4,31,51,73,95,158,186,204,211,188,202,216,200,189,194,127,121,146,92,133,203,253,239,232,248,248,240,252,233,227,220,107,107,108,129,112,122,95,84,173,239,171,84,86,34,60,64,65,80,128,222,237,252,243,233,255,212,249,240,237,228,175,173,202,208,181,171,122,4,8,0,28,1,9,0,28,21,39,0,24,20,4,11,8,18,10,3,7,15,15,31,25,201,162,47,51,100,123,196,209,217,192,193,210,181,181,171,159,152,139,142,63,107,80,77,89,79,39,19,40,36,60,104,119,109,106,137,149,158,162,178,169,181,171,184,202,194,212,188,105,26,73,152,181,202,158,166,146,127,146,100,92,94,60,48,23,0,13,43,102,155,160,189,118,118,156,130,128,175,235,234,145,11,64,146,150,160,128,110,87,96,130,93,79,25,13,70,108,145,115,136,155,169,196,93,121,204,218,172,161,96,110,98,47,28,30,57,73,34,25,8,15,44,55,37,34,63,176,170,240,180,28,14,118,187,206,190,158,190,196,174,188,191,199,193,206,183,178,164,157,210,132,154,169,167,167,188,177,202,233,185,184,177,186,169,131,99,117,85,55,29,25,30,23,34,46,15,43,46,82,128,185,199,193,176,181,173,210,151,178,183,145,178,175,193,184,147,164,176,176,188,161,176,173,149,156,184,161,159,154,181,140,174,144,161,189,167,155,151,168,157,195,186,170,166,160,169,172,163,156,171,166,171,167,161,192,151,187,201,187,190,167,158,165,158,145,157,163,171,166,173,197,193,165,181,183,159,162,169,173,150,157,197,172,146,169,144,123,82,98,89,96,70,105,85,68,101,94,112,105,127,102,108,110,100,122,114,126,117,79,112,138,100,111,117,66,94,98,103,117,110,139,91,77,63,72,91,98,96,49,72,86,78,78,76,81,91,83,71,68,110,71,91,69,39,28,39,25,32,40,54,24,49,65,60,72,52,44,51,42,68,40,52,43,46,55,58,66,65,51,58,19,36,65,44,42,51,46,50,49,83,44,25,63,31,20,41,29,31,39,25,48,53,73,49,43,73,57,47,37,33,46,19,31,44,40,37,40,32,62,37,29,49,41,13,50,25,38,43,51,72,40,34,24,25,31,47,52,23,23,20,47,44,8,15,25,32,18,49,58,23,57,42,35,49,11,15,49,33,16,44,26,32,14,26,39,52,8,30,14,19,50,11,35,29,29,74,103,109,83,71,30,15,13,35,19,18,24,5,4,46,29,21,32,26,6,30,32,25,24,42,43,22,42,41,25,22,5,19,28,42,41,64,36,44,27,11,41,22,54,17,17,37,56,103,53,30,13,25,35,27,35,26,42,12,23,43,14,48,33,14,68,12,50,35,63,31,17,24,35,27,52,4,37,45,50,44,34,47,32,33,26,30,43,36,38,46,39,24,7,25,33,38,16,35,21,25,22,19,26,27,13,21,33,18,16,5,8,19,33,17,76,84,30,25,34,24,3,11,6,27,17,35,28,18,25,44,22,20,21,42,26,28,11,42,42,39,49,34,46,29,59,21,9,35,9,22,19,8,33,27,35,68,113,132,130,79,61,34,36,37,49,42,57,60,51,54,52,64,45,43,56,39,58,60,25,41,47,37,28,48,41,32,49,55,40,72,131,152,183,184,198,218,220,169,119,124,76,93,111,143,200,210,238,231,247,251,253,237,244,234,186,196,200,153,140,125,127,205,209,254,218,197,141,154,118,101,118,109,187,246,250,248,233,253,208,172,210,252,236,213,185,191,179,199,183,155,99,6,8,0,2,40,17,39,14,10,6,12,30,2,7,8,2,16,0,14,8,22,6,14,12,158,140,44,43,80,98,133,157,148,120,141,84,96,87,85,90,49,75,59,45,48,49,78,98,92,97,24,13,48,81,157,172,178,210,194,213,198,213,229,188,214,202,200,212,184,166,161,72,22,57,98,154,135,91,76,73,57,65,61,79,82,101,110,140,75,24,109,160,237,234,237,186,126,129,123,144,176,191,216,129,6,27,85,110,96,70,30,50,51,82,74,76,25,23,95,175,186,188,207,214,233,183,100,124,207,217,216,174,84,57,65,23,39,15,45,41,53,29,29,18,33,25,49,22,104,125,147,182,175,4,14,107,159,185,173,153,170,199,176,154,166,166,151,151,164,152,166,166,161,154,165,166,169,188,223,184,203,178,158,136,112,67,48,39,19,72,47,102,111,86,141,157,198,185,192,182,94,152,134,131,128,133,157,164,171,162,149,178,156,127,162,160,156,160,172,150,165,151,173,185,169,164,158,135,165,157,180,150,160,153,155,155,157,177,175,190,175,163,167,178,186,187,174,178,167,163,189,206,160,183,142,150,177,186,162,164,160,173,158,170,145,161,141,173,164,173,157,187,186,186,186,181,178,158,167,162,177,165,182,162,177,171,169,167,152,120,112,113,80,84,104,68,63,30,80,69,75,116,129,121,119,124,125,106,117,135,99,75,110,109,114,128,75,88,129,112,117,107,98,89,79,46,97,84,70,110,65,64,74,122,68,83,97,75,93,84,77,84,48,66,40,21,46,35,0,51,39,40,46,58,51,59,29,49,45,47,78,62,59,51,35,40,51,65,80,45,55,67,54,34,54,56,45,44,31,71,43,64,74,74,45,49,71,44,48,57,56,37,42,45,37,35,66,39,47,34,29,29,46,46,37,32,58,48,36,52,41,24,19,62,64,44,43,42,31,62,23,35,46,41,52,53,28,25,36,28,35,22,48,22,12,39,23,35,13,36,60,31,34,37,43,23,25,26,7,38,26,20,38,47,18,11,39,13,25,30,29,15,22,14,22,18,28,45,10,59,97,112,89,49,41,40,32,18,37,54,22,20,35,11,10,32,34,35,11,35,41,10,26,14,25,42,7,7,23,22,21,50,42,9,39,38,33,45,9,42,57,40,42,29,28,32,91,81,17,20,35,29,58,47,31,13,40,46,29,32,35,46,21,18,32,26,32,35,39,18,55,33,27,35,34,47,40,52,45,29,17,35,33,17,36,27,34,25,32,26,28,15,20,26,21,22,20,20,44,45,34,26,38,16,9,34,8,22,35,16,39,24,39,105,53,24,12,28,18,24,21,20,8,43,43,45,17,26,20,31,28,46,12,21,37,0,38,35,17,19,27,27,38,51,6,16,39,2,21,46,39,27,17,37,51,93,113,107,51,26,35,71,11,33,64,61,40,42,80,65,61,61,54,57,34,49,62,76,52,72,38,55,35,56,37,52,35,60,41,18,89,80,93,115,144,169,154,136,117,101,89,87,126,186,197,203,169,169,171,209,209,193,191,203,230,188,110,75,30,98,176,240,253,248,248,252,231,247,231,234,220,223,251,255,253,247,239,227,174,212,239,246,238,214,196,186,187,188,175,104,23,6,7,19,23,8,0,24,21,4,16,30,5,17,18,19,24,10,21,20,19,18,27,10,144,106,24,34,38,76,103,99,63,67,62,95,49,62,76,73,65,122,124,143,140,154,175,170,179,184,90,27,77,117,197,209,223,199,221,193,227,188,165,166,168,143,147,88,101,102,57,13,18,27,49,103,88,96,88,120,104,160,171,169,153,230,212,204,129,55,73,172,248,243,254,215,143,134,149,105,94,116,118,60,14,25,52,51,47,62,49,70,123,178,205,181,70,55,100,184,230,217,209,185,148,113,42,85,165,159,222,213,105,48,23,14,19,30,45,3,45,21,40,48,82,72,79,108,160,209,240,186,118,11,19,143,187,178,168,183,174,162,184,171,184,171,168,160,173,192,176,133,183,189,201,192,176,154,151,111,88,61,22,49,15,39,104,133,155,159,170,189,159,165,210,194,200,186,171,199,171,177,184,121,171,164,138,141,136,145,146,139,160,166,159,138,139,127,147,160,143,151,138,152,164,142,165,148,154,163,159,154,169,171,167,131,131,169,164,152,164,166,157,169,150,153,155,169,173,177,153,161,165,160,156,182,165,166,156,165,154,167,165,185,172,164,167,197,179,160,173,160,176,165,187,165,167,167,148,176,164,184,184,162,192,162,175,169,205,171,110,97,93,102,105,87,77,68,53,71,86,102,133,105,99,108,97,94,126,136,112,103,136,100,128,119,130,129,129,79,83,51,66,72,75,80,68,87,81,87,121,75,89,81,80,76,96,93,118,98,51,78,33,32,30,19,37,31,30,46,70,80,81,81,73,51,46,69,66,70,56,47,31,27,51,64,48,73,46,82,54,48,65,57,19,52,55,13,56,70,48,49,60,60,54,40,68,39,25,44,36,51,67,66,40,51,53,44,40,31,36,42,44,41,37,40,31,48,52,51,57,27,54,14,55,55,45,29,64,54,46,26,30,42,45,34,22,35,21,34,15,22,39,23,24,27,64,44,54,47,36,30,28,32,6,5,41,47,32,51,34,33,27,23,40,8,48,29,34,28,33,12,29,30,54,34,29,27,39,49,23,59,87,112,70,30,26,23,30,36,16,36,16,45,38,33,48,18,37,33,28,12,21,38,20,16,21,25,51,31,34,37,17,40,35,28,45,16,41,25,37,53,34,10,24,33,73,91,42,32,23,8,23,36,16,40,34,32,25,17,45,39,5,9,55,45,35,37,48,14,20,19,48,29,58,56,18,24,34,33,35,19,28,31,49,16,39,22,15,29,33,44,11,3,4,59,25,26,16,47,9,19,24,39,29,24,34,13,6,23,5,19,62,110,22,29,22,16,13,37,19,38,23,27,7,13,25,21,18,37,17,6,31,19,29,24,21,32,20,11,30,46,10,11,35,39,19,24,19,38,36,38,26,42,99,102,66,83,69,84,49,65,36,35,69,20,31,59,41,62,46,62,54,67,79,54,49,52,49,71,63,53,52,48,55,58,59,25,35,30,55,27,19,32,59,101,116,106,112,119,105,94,131,176,177,155,171,162,158,155,156,149,168,154,161,136,43,21,24,24,104,202,216,254,255,245,243,245,249,249,224,242,234,252,238,249,249,233,220,246,241,246,253,206,202,196,182,179,174,100,15,9,8,15,8,11,0,10,17,20,41,13,14,9,8,17,23,0,9,48,12,9,19,29,61,32,11,50,55,81,106,108,126,119,144,136,141,190,190,167,196,209,197,209,194,213,211,209,215,221,84,50,32,75,159,188,177,171,146,142,160,123,106,88,88,87,59,86,57,63,55,15,27,68,146,181,164,170,181,216,219,229,223,189,228,220,223,205,105,38,77,113,201,221,195,161,106,150,113,87,47,0,19,25,38,47,60,54,34,65,62,114,182,219,189,195,56,17,92,134,164,115,130,80,60,21,20,104,133,133,155,209,136,33,40,15,10,36,55,45,41,20,17,61,114,149,177,163,168,195,217,224,134,0,26,121,169,167,166,164,160,180,145,161,176,192,163,170,188,195,210,196,159,150,116,132,59,53,22,15,49,52,108,134,150,169,194,186,174,218,206,176,201,174,147,153,167,155,158,165,181,183,170,161,171,156,138,186,156,174,144,130,158,162,167,152,146,162,164,161,147,167,146,154,145,139,148,139,122,136,150,144,173,149,144,147,166,151,160,163,175,148,149,141,149,180,164,178,162,165,164,184,180,170,197,189,176,153,157,167,167,170,159,183,200,155,169,164,182,145,159,155,146,173,173,165,168,142,164,190,158,147,139,167,152,177,153,156,161,167,145,122,98,82,71,73,116,97,80,93,112,122,125,166,132,101,100,131,124,103,123,138,110,92,116,103,102,90,76,86,82,25,80,59,79,75,74,90,84,82,53,82,73,99,79,113,96,68,60,33,46,26,15,39,45,74,69,74,55,53,49,61,68,70,40,38,67,40,48,36,55,28,37,57,46,59,61,49,51,45,61,68,42,60,45,31,31,64,48,62,50,66,65,37,41,45,47,20,34,27,22,48,65,58,35,43,54,40,33,30,43,33,38,44,35,37,39,9,73,37,46,55,44,38,37,22,29,33,46,46,13,42,25,53,19,19,45,46,12,37,27,17,27,40,31,17,32,58,34,11,20,22,39,41,33,14,31,18,11,25,51,14,38,20,31,20,33,16,24,17,31,5,39,22,16,34,26,26,32,10,6,17,51,114,114,95,67,15,6,15,8,24,41,15,28,18,26,38,4,43,43,46,22,21,29,17,10,43,52,36,34,14,31,24,23,14,24,27,38,37,30,44,43,45,13,11,42,91,70,16,23,19,7,15,27,25,21,18,36,19,14,21,37,27,37,54,34,42,41,25,22,55,9,23,33,33,44,40,21,34,43,42,24,37,19,35,23,47,20,50,28,10,16,29,11,21,30,29,23,30,22,7,12,32,9,25,24,23,33,45,8,43,85,78,24,46,30,32,2,25,25,24,21,26,10,23,19,0,14,15,14,31,24,20,13,45,18,30,44,20,21,38,5,25,19,45,42,11,28,45,25,12,76,128,105,28,16,47,123,146,92,81,95,84,114,77,55,68,10,59,76,48,101,41,65,55,46,59,49,55,40,57,31,37,39,47,34,21,62,42,67,65,49,39,12,50,89,137,148,152,130,134,150,157,216,236,208,214,181,174,177,181,174,155,159,155,97,82,35,37,68,74,110,182,242,232,244,241,249,247,230,238,242,242,248,235,251,249,238,254,243,250,241,200,244,196,183,220,183,119,18,3,3,10,32,19,11,10,29,7,11,17,7,2,3,9,26,23,7,29,27,12,24,22,81,40,21,31,86,127,156,203,182,200,210,217,218,194,193,216,199,211,205,221,206,192,179,175,163,168,42,43,30,63,133,112,95,79,94,108,76,65,71,103,109,124,146,148,155,163,130,20,35,92,159,212,232,214,226,196,226,193,192,169,142,151,136,124,48,2,33,50,62,93,103,108,96,127,110,109,87,19,23,22,21,53,66,93,80,88,98,120,137,119,126,64,31,40,82,120,87,58,56,43,44,8,46,102,194,196,135,167,107,16,14,19,36,28,20,33,16,22,23,50,90,95,86,97,98,124,149,214,148,3,19,113,174,164,189,176,185,204,181,171,190,215,173,168,121,126,107,66,50,22,12,64,67,104,93,146,151,164,178,203,206,190,213,180,189,217,191,187,146,170,151,173,161,176,171,180,158,166,162,169,170,170,162,161,178,174,157,146,165,181,159,174,169,142,169,152,151,145,174,177,167,168,148,144,162,166,146,160,139,160,143,162,150,157,155,132,153,158,168,165,162,199,158,157,167,152,166,183,158,171,160,183,158,173,165,154,164,181,186,165,146,158,170,176,166,177,166,159,166,165,182,169,190,171,170,170,156,193,189,166,166,167,147,135,162,169,194,223,172,142,51,37,122,83,116,112,101,153,138,139,137,144,136,97,123,109,105,103,98,114,94,90,74,59,77,66,81,60,101,95,68,86,97,111,102,103,86,79,97,123,73,80,35,38,23,18,20,42,48,69,51,58,63,44,71,47,85,45,57,67,59,74,58,54,74,50,38,49,50,62,46,77,58,51,62,37,69,73,60,66,49,24,47,56,72,32,17,61,42,61,23,39,45,57,65,49,38,55,20,54,40,60,50,37,29,28,33,55,32,30,38,39,46,44,59,56,40,27,17,22,49,23,48,32,29,41,35,38,39,18,42,13,41,34,47,47,18,56,3,30,43,61,37,20,28,12,41,14,26,47,33,37,33,17,50,14,36,41,31,24,41,9,57,39,5,8,41,41,12,38,34,19,22,41,14,45,57,21,29,48,92,125,91,89,67,17,20,27,30,9,29,33,40,14,42,58,5,54,34,34,44,52,6,43,35,22,24,9,45,20,25,42,36,32,30,11,29,5,30,30,40,34,34,86,80,63,47,13,19,26,18,6,13,11,27,15,50,32,28,4,37,30,57,37,32,20,11,41,22,38,35,27,45,25,49,44,38,22,36,29,46,47,21,39,37,38,29,12,30,15,36,6,29,44,30,19,42,10,28,31,24,33,30,47,21,27,36,53,84,39,14,40,14,21,21,34,18,16,34,62,13,17,36,15,24,21,35,22,15,12,11,18,3,25,24,47,23,40,21,18,25,27,18,44,21,12,27,99,96,73,44,26,40,181,203,163,151,103,134,125,155,111,117,89,57,104,98,102,63,59,69,41,39,34,34,46,47,72,48,58,50,21,52,39,39,48,62,71,71,65,49,57,85,131,183,183,150,160,149,175,252,227,249,255,227,233,236,211,247,221,225,213,234,231,170,163,121,81,71,97,124,139,190,228,225,243,245,225,247,249,248,234,250,255,233,221,233,237,242,204,219,194,205,225,183,93,0,0,9,0,27,35,25,0,8,21,32,6,10,10,19,5,3,19,22,8,16,11,1,28,175,85,40,72,112,170,181,228,210,214,213,173,198,177,185,201,165,167,158,118,154,115,104,93,74,64,14,16,18,67,108,112,74,63,110,138,133,156,170,183,175,213,195,212,189,225,165,62,28,78,170,205,198,160,176,156,110,107,118,96,107,58,47,47,15,40,44,89,98,112,140,119,158,156,118,144,134,120,125,20,46,89,133,163,135,158,126,104,97,74,79,48,21,17,46,68,69,53,43,30,21,35,49,168,244,245,212,159,67,1,37,0,24,46,44,15,36,25,14,56,56,71,74,90,107,128,145,176,147,31,76,123,169,198,211,200,212,184,178,162,150,99,102,83,10,9,22,57,65,106,127,157,166,197,165,218,211,192,183,199,161,164,158,192,165,153,150,170,143,148,166,146,124,155,164,167,167,155,181,144,180,178,133,147,157,162,138,170,149,150,148,150,141,159,163,173,145,162,165,169,140,139,169,167,142,147,144,137,132,145,143,165,166,158,149,134,169,141,164,137,146,150,176,172,162,161,159,121,147,158,140,168,158,167,156,153,150,160,138,150,150,164,136,147,165,167,176,172,163,171,179,186,184,160,166,173,173,157,165,180,143,166,158,144,153,149,193,222,255,209,62,69,99,119,135,126,88,85,95,93,99,94,105,53,87,71,66,84,94,74,79,72,84,98,106,115,96,78,68,92,69,85,71,106,103,102,107,81,56,61,34,37,33,38,26,59,50,67,65,53,35,40,42,45,62,66,42,62,57,59,58,58,47,74,65,57,72,54,47,47,38,68,28,46,44,47,35,47,39,85,68,57,67,48,49,33,54,60,45,54,46,24,35,39,48,28,12,35,20,47,41,27,52,62,27,33,19,34,38,19,39,52,23,47,36,49,43,36,53,39,49,18,25,33,58,29,61,43,31,33,45,33,30,32,26,51,56,43,43,31,17,30,2,21,53,18,9,39,11,30,33,37,18,26,25,26,21,28,37,28,19,41,26,37,22,25,33,40,28,17,30,19,26,29,34,56,71,45,30,59,29,36,79,119,98,87,40,45,45,42,44,8,22,38,20,17,17,7,29,32,32,18,20,28,20,33,45,54,6,27,12,49,13,16,21,36,10,26,33,37,56,36,34,31,86,58,48,33,8,28,28,8,46,28,23,41,24,47,36,32,34,46,33,47,46,27,22,21,16,52,49,14,30,43,21,14,33,49,11,46,27,27,40,22,35,52,8,14,16,20,11,11,17,23,21,28,23,8,31,32,10,44,13,29,24,13,24,67,114,23,40,36,27,23,21,35,37,19,24,20,50,1,28,39,35,4,21,52,19,12,28,39,8,29,20,17,39,20,23,41,38,28,22,23,34,34,91,128,85,35,29,11,133,199,193,174,164,109,153,122,140,109,112,100,100,115,121,99,118,110,85,99,114,96,91,127,101,111,103,84,51,34,48,45,28,41,45,31,28,19,51,12,37,54,108,126,162,178,167,195,255,252,249,245,247,235,246,238,246,237,225,244,230,246,252,252,194,163,166,104,39,37,65,59,86,173,171,175,196,211,228,210,240,246,238,213,255,238,230,192,185,195,160,172,172,101,9,0,14,11,3,8,6,11,6,14,20,23,30,16,14,32,18,5,25,11,2,20,4,21,208,112,33,71,91,163,188,194,172,170,142,139,146,114,127,97,95,92,101,73,76,37,72,29,58,33,27,14,46,128,167,191,174,181,175,183,165,209,185,203,189,214,188,184,200,196,133,11,12,56,97,128,102,95,79,58,64,91,72,121,112,125,137,129,11,18,102,167,197,187,218,171,157,161,142,149,151,206,127,84,47,65,131,139,127,103,98,78,101,88,58,31,37,23,23,38,17,11,26,16,11,29,33,79,175,226,203,197,76,18,31,20,10,20,36,22,30,27,33,59,144,152,160,158,167,153,142,142,99,68,137,213,195,188,173,159,120,98,55,37,28,24,38,68,104,112,156,174,179,186,199,177,210,209,181,196,178,173,160,167,163,165,160,154,172,159,161,141,146,149,155,175,175,145,147,154,150,122,150,172,168,155,185,154,157,161,170,147,155,154,138,144,170,135,153,157,167,158,174,137,154,160,138,149,142,132,137,135,146,144,145,144,154,158,162,150,161,165,127,115,154,148,146,153,142,163,150,124,141,145,124,138,159,147,116,125,120,149,130,126,142,138,145,147,120,129,111,112,121,123,123,122,152,155,140,121,132,132,148,146,142,132,163,121,172,118,163,243,220,239,132,66,38,117,102,103,85,100,92,70,87,76,72,86,70,75,60,56,76,87,90,106,89,91,107,104,90,78,114,97,84,90,77,90,61,86,67,40,29,21,58,50,48,53,61,70,70,73,50,34,71,44,83,60,57,42,65,73,71,52,67,63,53,69,70,25,64,82,55,65,66,51,53,44,62,53,58,57,58,55,50,48,44,29,36,47,60,71,50,41,34,58,36,40,33,51,36,66,58,37,26,63,63,32,43,28,33,36,39,44,50,48,40,28,61,49,43,55,43,41,36,52,39,37,28,29,39,40,51,41,25,28,43,45,9,27,29,28,12,23,20,17,23,35,34,52,29,20,30,33,17,48,43,29,31,33,27,27,32,21,36,4,26,34,35,37,8,25,35,12,44,32,32,25,25,33,9,22,55,23,31,33,27,84,135,99,94,73,33,41,22,23,22,14,12,32,22,16,29,13,6,33,35,29,38,9,33,33,51,19,34,13,38,36,30,48,16,26,10,9,27,16,11,21,58,58,61,32,20,35,44,27,31,2,47,13,22,30,21,26,10,38,49,29,38,58,30,46,31,29,56,41,43,18,22,37,34,31,56,34,27,37,21,52,21,24,14,13,7,46,17,49,29,24,2,2,42,26,15,36,22,17,32,19,29,9,13,102,45,20,22,25,42,17,34,21,23,2,2,30,39,26,14,29,16,36,38,25,23,13,21,24,31,17,29,19,41,27,51,7,33,37,18,27,80,120,131,37,54,32,38,115,180,203,176,184,186,158,134,122,103,108,104,146,118,126,124,139,140,107,141,109,137,143,160,166,188,163,160,151,136,145,144,77,68,46,49,41,44,14,29,44,40,20,38,77,109,125,134,175,247,252,237,244,246,236,244,250,252,255,252,251,252,255,244,251,232,248,251,219,133,66,27,26,17,31,101,150,174,168,179,158,166,201,187,181,191,185,188,114,161,145,152,138,148,87,3,36,4,2,6,10,5,30,7,20,32,9,4,26,0,8,28,7,3,11,3,3,10,21,168,55,19,78,64,118,110,102,105,89,82,83,78,51,59,50,71,92,69,95,114,117,135,133,149,142,45,20,111,156,199,214,235,207,189,208,215,189,156,175,170,128,138,139,106,100,59,10,24,38,92,67,120,75,98,136,143,160,173,183,198,218,207,187,64,39,92,156,211,188,192,167,129,140,137,110,111,118,84,55,48,47,81,86,89,74,56,53,44,33,41,32,29,20,61,28,22,41,22,38,41,43,40,83,98,147,181,234,83,13,8,17,29,34,24,16,29,5,23,40,69,76,60,63,63,59,35,48,15,42,129,172,138,84,64,43,21,31,41,60,118,130,152,162,159,198,191,197,202,182,191,171,154,143,146,151,153,151,158,189,160,134,148,141,141,170,154,163,151,146,161,158,168,149,154,156,155,175,153,154,143,153,139,166,147,157,156,158,163,142,156,137,159,155,138,145,146,130,155,173,146,142,158,130,116,147,182,152,148,130,143,127,161,145,151,162,131,163,159,146,158,155,140,163,121,143,155,169,132,157,127,143,124,148,135,134,156,140,129,160,146,140,134,118,112,120,120,111,92,106,105,113,154,116,135,87,101,154,140,129,128,143,190,133,121,118,134,185,229,255,168,82,39,73,114,131,96,112,79,105,106,92,98,99,104,104,105,71,104,90,121,115,124,83,127,109,95,123,107,107,87,70,65,52,35,35,43,24,51,31,39,61,70,79,69,68,49,55,81,49,46,58,40,46,76,72,36,44,52,58,71,52,32,15,62,67,41,40,40,51,70,52,23,55,37,54,45,43,44,41,46,58,48,34,40,41,61,64,44,51,55,65,33,58,33,34,22,26,30,33,75,58,54,12,39,15,50,52,30,36,35,39,15,44,29,23,44,28,36,43,22,51,75,29,31,6,43,36,30,28,47,46,56,31,51,35,42,33,45,28,49,51,46,48,33,21,28,33,39,24,39,28,30,29,20,38,18,17,37,36,20,35,27,38,22,23,11,18,15,29,61,26,35,8,18,20,29,36,35,42,51,22,32,18,52,92,128,120,104,39,5,16,11,36,24,24,30,45,40,18,37,20,38,27,27,33,45,29,22,24,18,31,20,54,57,19,19,38,16,34,29,25,25,29,54,63,68,14,8,11,12,22,20,43,33,19,7,49,3,18,36,12,33,24,20,34,18,28,36,40,42,37,48,6,61,25,42,41,39,48,49,28,41,19,23,45,4,17,3,22,45,22,30,14,25,36,20,37,41,31,43,39,40,29,27,7,91,92,58,2,44,4,16,25,21,23,15,39,29,24,17,27,29,31,6,54,36,16,6,25,21,19,33,9,22,5,21,24,54,37,26,29,42,87,111,72,76,44,18,29,116,205,206,176,185,178,167,157,156,137,119,122,144,108,146,128,157,137,158,156,142,142,135,173,162,157,166,154,161,161,177,149,159,141,122,109,143,132,106,92,57,84,50,65,21,40,58,50,68,104,180,204,229,237,244,250,244,252,255,226,236,246,251,245,241,252,250,228,236,244,218,192,145,107,23,55,113,173,182,198,225,196,172,167,154,181,154,144,146,127,114,129,123,125,111,84,15,1,29,10,42,16,4,5,10,22,8,9,23,0,33,15,8,20,13,7,6,5,35,32,58,19,44,50,56,74,97,62,87,94,97,129,87,124,140,162,158,178,169,184,203,201,191,206,227,147,73,32,84,132,179,202,198,188,174,157,147,110,118,79,96,86,68,50,32,51,33,1,64,105,140,179,188,184,186,207,214,191,227,193,193,190,205,178,63,19,57,135,121,104,136,140,128,116,124,124,73,62,49,13,5,61,46,81,16,37,7,16,26,17,17,21,25,33,26,15,38,41,48,60,78,70,109,140,182,159,168,188,56,31,14,7,17,29,27,22,41,36,11,3,14,27,15,20,29,71,56,56,42,42,56,60,49,85,75,140,132,148,163,203,185,212,215,196,164,159,156,167,162,170,170,172,143,152,141,134,138,136,140,143,140,139,161,142,136,167,154,141,132,138,162,150,138,158,184,174,119,159,166,170,132,143,156,147,169,157,148,130,164,134,156,160,149,156,136,144,157,146,140,154,162,157,161,160,168,184,160,130,131,153,149,122,130,154,144,140,142,137,141,166,147,158,146,169,142,136,147,142,149,141,151,143,153,146,176,169,183,155,155,150,157,169,157,119,138,147,147,157,171,138,154,131,141,147,115,138,141,138,129,127,143,151,136,119,102,109,144,213,211,206,156,160,76,82,106,78,96,93,110,87,104,124,127,99,118,128,77,79,93,88,81,102,116,116,120,93,88,95,99,47,30,41,48,23,18,36,51,41,54,46,78,49,73,63,74,53,75,51,64,48,43,50,67,36,68,53,66,45,65,56,45,55,45,48,58,29,44,65,52,53,59,45,55,44,44,59,37,45,49,46,52,42,47,49,53,38,52,50,42,38,50,60,32,54,43,45,22,37,55,35,43,44,57,37,49,38,70,65,43,43,44,30,41,40,38,35,21,45,38,44,27,42,34,58,29,36,17,32,22,17,43,31,41,53,47,37,34,22,30,34,37,48,56,28,31,54,23,33,15,45,13,41,28,18,11,17,44,48,35,10,48,38,32,35,37,18,9,25,45,24,24,32,53,19,6,23,13,21,33,47,41,32,17,16,9,20,45,113,113,124,78,63,30,24,12,22,28,40,39,27,29,24,50,43,41,47,41,54,35,32,37,14,54,20,21,24,40,11,43,32,18,44,46,32,32,53,93,65,45,11,29,10,29,34,31,22,37,49,33,27,22,27,28,12,29,35,24,16,16,28,29,43,33,30,32,39,37,36,36,39,36,36,60,33,18,26,5,34,26,19,19,25,31,16,25,17,19,29,16,5,34,12,22,21,0,27,96,75,15,21,17,33,53,12,18,48,6,29,13,23,49,10,9,33,18,40,54,34,42,36,36,19,27,17,27,36,23,55,23,35,0,50,121,110,91,36,25,29,29,103,163,186,193,154,188,191,167,196,175,149,142,134,125,153,166,140,178,174,158,156,151,166,173,175,185,159,173,145,131,167,157,170,161,144,149,145,147,136,135,136,122,128,89,84,63,11,39,7,14,11,77,103,125,128,159,180,234,232,247,246,239,255,250,254,253,250,253,242,250,230,251,247,242,211,150,122,123,155,212,237,242,224,194,191,164,169,181,149,154,164,187,173,161,146,108,69,13,17,12,1,24,5,13,0,9,7,11,26,9,9,13,21,12,22,5,32,6,12,4,9,43,40,26,78,105,127,147,162,163,192,172,205,205,199,212,185,215,209,213,229,186,204,188,175,166,139,14,11,69,125,142,148,152,105,78,78,79,69,76,65,69,103,90,137,129,167,78,30,90,110,179,191,196,195,219,197,211,212,201,167,136,135,135,86,1,29,68,88,134,101,132,143,130,144,124,97,64,41,34,1,39,19,21,21,24,24,9,36,14,17,16,34,30,26,35,36,17,94,181,116,191,164,185,187,224,222,145,122,32,29,17,2,40,25,34,19,33,19,36,38,40,11,20,42,52,43,50,62,34,16,83,139,130,165,172,175,191,172,187,185,165,166,158,174,153,167,165,156,148,157,161,126,158,138,144,136,135,161,141,148,153,163,141,152,151,149,146,163,154,148,142,152,150,177,135,146,157,149,127,154,151,141,140,162,128,148,140,142,148,150,153,151,165,164,153,163,145,139,155,150,141,156,149,132,161,158,176,135,175,126,134,149,136,129,159,165,147,165,115,137,151,147,163,175,181,172,138,142,172,173,155,145,165,152,125,136,152,151,160,148,141,152,143,143,110,167,143,180,173,187,164,152,142,151,155,130,166,153,168,166,136,165,134,132,94,106,112,169,215,159,106,152,145,110,103,83,108,75,60,91,104,77,108,107,98,112,90,102,114,86,134,104,122,81,91,81,54,45,41,29,40,24,49,31,38,73,51,62,73,58,50,55,51,56,50,81,57,45,59,60,47,61,23,50,64,62,47,70,49,32,58,24,65,60,36,27,38,56,44,64,77,49,28,37,66,36,34,66,50,52,41,44,29,39,70,46,72,38,40,18,50,32,38,29,54,40,54,29,43,47,46,36,12,30,41,45,34,48,45,32,47,21,43,55,34,32,53,38,68,37,27,39,39,42,43,40,36,48,53,26,40,53,27,34,39,27,52,30,43,26,16,38,35,83,27,31,60,10,29,12,38,19,20,34,16,39,47,33,54,25,18,28,18,29,27,35,38,19,28,33,14,25,44,18,48,28,48,17,18,35,12,23,39,32,24,20,22,33,79,133,142,83,74,50,50,10,14,38,49,38,26,45,23,36,17,35,23,24,59,34,30,33,11,27,18,37,39,16,17,37,17,15,12,23,34,44,76,87,57,19,20,16,19,29,4,32,37,24,18,21,59,58,43,32,37,46,42,35,20,48,26,32,42,49,35,40,18,25,16,40,24,36,32,19,45,29,16,27,16,19,30,25,16,23,38,16,36,47,27,27,15,18,15,24,9,28,96,22,12,17,28,17,19,26,36,2,15,24,10,50,29,39,29,34,37,23,53,23,39,32,60,37,4,28,31,6,8,25,26,4,93,120,86,52,41,41,36,63,94,132,189,198,156,165,160,175,167,165,175,159,159,142,84,106,144,154,189,169,203,160,185,168,145,162,167,177,155,156,179,159,149,153,161,139,151,133,179,165,139,167,130,106,95,84,58,43,69,38,24,20,41,82,90,95,72,84,107,105,123,144,179,205,211,203,252,229,242,248,255,245,255,242,255,223,226,219,196,213,239,230,229,215,175,202,215,217,209,196,168,171,206,192,200,181,141,93,9,17,7,19,1,28,9,0,42,28,32,0,20,3,1,15,8,4,27,7,11,22,19,25,111,62,27,96,145,178,195,215,196,218,194,193,205,198,188,195,151,166,179,147,137,133,115,87,86,34,10,34,30,55,66,72,86,59,93,93,103,114,173,173,175,187,202,197,193,219,126,42,64,113,214,207,200,206,173,142,150,117,108,128,108,94,96,52,18,44,43,100,110,157,180,162,146,117,135,116,18,5,6,24,20,50,47,7,6,31,26,36,23,19,41,42,51,24,29,18,55,79,122,126,133,112,135,130,181,179,195,129,14,28,41,8,38,21,40,44,35,20,57,50,58,36,49,65,79,61,56,44,61,105,161,180,189,193,184,168,153,173,159,149,135,135,157,148,131,121,140,131,102,157,124,164,132,157,154,131,151,148,127,140,138,139,135,123,162,152,133,140,146,144,130,149,145,144,120,140,150,149,149,145,145,150,161,116,160,166,141,167,154,159,144,159,132,160,148,141,148,165,137,164,163,162,146,148,128,157,174,133,170,159,158,132,149,143,152,166,156,184,154,144,170,156,158,144,150,154,172,171,115,124,140,140,132,148,138,154,147,159,115,122,131,149,121,108,109,121,140,115,114,142,143,170,155,132,151,143,132,114,163,120,113,146,141,113,99,49,42,85,143,66,66,72,115,129,95,115,104,70,43,85,61,73,89,104,90,109,79,99,80,89,90,84,79,34,35,34,43,37,43,49,49,66,32,59,59,69,60,54,101,50,80,88,60,64,34,64,50,50,35,65,70,41,60,36,60,34,51,41,64,51,56,52,75,52,65,57,62,41,40,62,45,30,60,66,11,56,44,63,25,36,43,38,64,47,58,17,56,38,36,68,50,75,49,32,22,43,38,46,16,50,51,26,29,53,26,33,25,63,53,40,31,13,48,20,35,26,34,42,37,17,15,32,48,38,50,49,34,29,48,43,40,38,31,43,13,16,16,22,56,29,17,40,13,42,50,24,14,26,14,45,45,23,50,27,14,43,14,35,60,39,40,17,33,6,25,15,30,10,35,33,30,56,17,26,25,35,16,31,36,45,33,40,37,27,37,39,15,12,42,57,91,143,116,109,54,42,35,24,35,29,55,27,65,56,27,20,6,50,49,28,9,31,26,27,33,25,45,40,39,42,22,19,26,27,39,21,97,101,63,43,16,26,15,9,26,9,44,21,27,15,52,27,34,39,22,27,25,33,30,45,20,39,25,18,27,41,19,2,32,13,23,37,41,39,41,30,18,34,27,31,40,17,27,17,17,10,17,13,2,38,16,28,9,8,6,60,69,16,16,4,37,44,29,26,14,46,8,37,24,29,21,19,7,11,4,10,10,40,10,26,26,24,37,31,29,27,37,17,48,93,110,76,39,28,25,17,69,95,127,180,197,194,186,180,165,190,158,216,169,132,154,171,125,96,100,101,128,155,171,153,128,140,153,161,147,159,143,170,177,158,149,145,142,142,145,167,159,166,154,145,138,135,132,111,100,79,88,82,72,88,125,145,132,130,97,63,82,102,77,104,140,135,122,147,124,130,141,179,186,194,207,234,235,216,221,206,241,241,249,180,138,159,168,177,196,232,236,215,211,228,224,243,230,203,193,110,4,11,1,9,11,15,26,23,18,29,29,21,18,5,15,9,13,22,8,13,29,23,11,31,158,49,55,109,143,182,175,185,167,169,155,184,135,141,119,108,91,109,100,58,57,61,54,46,49,44,38,24,54,76,88,117,129,152,173,193,191,212,214,241,208,232,204,212,228,203,111,37,25,78,137,129,140,87,95,65,81,72,127,111,106,103,125,76,28,17,56,44,87,123,179,161,150,120,118,66,25,21,20,22,21,40,41,33,6,14,25,14,35,22,23,12,34,51,22,26,21,58,64,145,57,44,51,108,111,176,213,195,100,21,38,34,44,58,17,55,33,29,27,39,41,71,105,146,100,118,150,170,187,181,172,162,144,149,128,136,142,137,145,149,133,145,134,133,144,139,152,113,143,138,139,154,138,138,146,135,170,151,145,150,142,146,108,142,138,137,142,140,134,135,153,133,142,113,138,120,128,160,113,157,144,151,152,165,146,116,142,153,153,135,109,162,149,124,154,144,128,165,166,151,177,163,135,169,163,148,126,153,136,137,152,143,141,126,154,118,153,163,154,157,176,151,128,141,150,160,143,138,145,153,143,129,162,131,126,147,162,142,138,149,153,150,156,167,140,160,139,129,138,149,143,132,180,144,129,127,135,112,142,148,114,149,177,140,143,86,62,52,61,36,47,75,58,118,79,96,135,177,128,40,0,15,28,50,76,65,51,56,70,47,47,61,34,62,30,49,60,45,46,50,63,55,63,46,60,69,68,45,72,91,65,27,44,42,41,65,54,50,56,60,48,102,70,53,67,71,71,66,51,70,72,68,43,59,60,85,53,28,61,36,68,61,71,63,58,36,55,46,57,38,41,56,38,43,43,60,64,9,48,45,56,28,29,40,22,46,19,48,39,58,59,35,35,50,55,49,20,40,46,30,36,25,51,28,33,36,16,46,59,25,19,25,58,25,64,38,56,47,25,45,10,36,42,51,20,14,27,23,27,38,64,42,16,42,27,31,42,35,37,4,46,29,32,23,40,45,28,28,15,25,32,43,30,1,32,34,11,5,34,32,42,13,21,23,11,22,26,23,7,24,40,22,37,35,40,15,15,32,24,29,44,25,83,139,143,106,61,76,34,9,8,18,23,19,15,22,22,34,27,36,37,27,47,42,25,19,21,14,38,19,35,29,26,48,48,35,53,75,90,70,20,26,4,25,22,23,13,6,20,31,35,42,61,25,40,15,24,25,42,28,42,46,35,54,42,51,16,46,35,25,41,27,41,46,56,31,29,25,8,6,26,16,22,27,14,20,39,38,28,39,33,29,19,16,42,70,58,2,17,27,2,28,6,2,51,13,41,31,19,28,51,11,7,19,35,9,21,15,34,26,45,33,39,23,16,40,58,91,119,89,58,21,30,60,41,66,81,106,154,207,182,185,172,157,139,160,168,181,178,157,192,181,119,113,112,69,104,154,159,151,158,170,161,163,164,170,136,171,174,155,167,170,182,174,179,181,139,175,156,188,201,169,156,145,120,92,110,117,107,103,145,143,158,173,138,109,103,111,123,145,124,131,123,156,139,101,126,130,153,148,117,195,173,170,170,193,216,209,173,104,64,137,115,131,158,230,244,247,252,232,237,248,251,242,218,113,6,0,11,43,15,27,20,18,15,6,8,11,2,0,14,10,60,11,26,26,10,18,12,24,141,27,50,46,82,119,144,116,97,109,106,89,70,51,53,48,39,65,78,79,93,122,119,99,149,68,12,59,68,105,173,170,199,199,222,217,224,188,201,183,203,171,165,134,108,121,53,16,22,42,79,113,111,97,82,62,99,75,63,82,46,53,36,38,17,33,8,39,16,80,135,163,134,135,156,90,23,19,16,25,4,38,44,10,10,25,13,22,4,21,22,17,15,20,30,7,77,92,133,120,121,133,108,126,154,144,206,189,87,34,16,40,22,24,54,19,10,24,27,48,46,64,142,150,169,170,175,185,175,169,164,131,161,170,159,146,140,141,149,143,139,139,135,165,169,142,134,139,144,164,122,141,145,147,137,120,135,148,129,149,161,145,138,154,170,136,127,129,134,144,144,139,123,156,125,134,128,168,157,157,133,161,171,158,152,167,131,151,147,138,111,154,121,142,151,136,150,124,126,142,160,141,143,158,155,161,145,143,162,165,148,135,137,150,121,159,154,158,137,140,135,133,154,148,144,154,158,150,140,165,145,177,149,143,150,121,137,166,154,157,182,155,149,159,143,177,154,143,146,160,122,155,141,128,156,141,136,151,157,138,135,170,153,150,124,85,69,47,22,56,81,76,74,67,57,125,223,251,242,205,116,55,24,16,35,10,16,14,61,46,36,47,52,73,61,77,69,55,68,49,83,36,28,46,54,46,65,28,69,44,56,66,43,75,52,37,61,55,57,49,46,66,52,58,61,48,66,53,63,55,45,80,62,57,50,44,57,70,37,59,46,44,76,57,75,28,62,58,84,64,53,65,39,27,40,74,42,49,26,60,47,56,55,42,22,39,30,39,16,39,58,57,47,44,43,51,42,39,43,40,61,21,52,31,44,37,21,35,26,48,32,29,53,53,13,25,38,41,29,52,21,30,58,34,40,40,24,16,23,39,18,30,40,33,20,36,25,17,12,40,39,39,29,36,35,34,40,44,17,35,40,42,14,24,19,26,42,32,23,15,21,27,15,31,24,16,33,41,26,38,47,42,43,20,15,43,27,20,15,18,31,15,33,50,66,145,134,129,108,72,19,30,17,8,1,32,19,45,24,34,19,15,38,45,38,12,43,41,52,33,27,36,28,24,30,23,36,36,76,65,61,43,5,34,42,18,35,9,21,34,41,23,32,37,41,34,27,3,29,41,17,14,34,35,15,34,47,51,39,30,34,11,28,28,32,22,43,39,25,0,11,45,23,20,22,16,8,24,38,34,32,17,39,30,51,53,33,19,19,19,27,20,4,23,10,15,39,15,19,14,39,51,24,14,0,6,12,17,24,40,35,15,42,27,41,65,119,109,68,57,39,32,18,24,55,73,131,153,175,209,185,170,158,142,164,152,155,154,163,155,170,161,149,147,120,126,75,126,164,195,182,177,190,188,153,181,179,150,185,199,181,153,189,160,168,178,139,165,181,177,191,146,164,153,149,151,123,106,158,136,133,151,150,151,140,167,128,159,173,168,164,154,159,155,149,165,151,146,129,136,141,143,156,141,166,142,146,152,150,67,24,96,75,98,117,207,236,235,248,238,250,244,255,255,223,111,7,0,2,35,10,15,18,23,9,10,6,25,24,2,7,0,22,0,7,7,1,11,18,21,80,8,40,39,92,98,65,61,77,60,63,73,78,93,106,120,145,149,177,157,185,193,194,195,182,97,58,48,105,152,166,227,197,149,153,145,142,136,105,115,100,103,106,63,74,20,40,29,12,66,64,69,75,34,22,32,39,14,19,24,15,13,34,38,41,26,28,24,49,59,132,131,112,126,119,93,46,24,15,7,26,27,9,33,33,2,31,61,45,50,16,35,2,58,36,43,23,99,135,146,142,160,145,142,116,72,56,44,45,27,10,40,28,36,26,27,48,30,45,15,43,61,144,161,171,189,165,136,162,158,145,136,145,153,163,142,162,167,146,129,167,161,143,174,151,164,146,142,139,136,116,137,134,156,148,137,140,164,167,149,152,128,132,141,131,142,132,124,152,160,153,126,150,120,169,144,157,130,138,159,146,178,150,143,176,141,149,153,172,152,153,151,133,149,166,128,130,169,146,130,116,119,118,114,112,142,145,132,165,120,160,147,112,117,139,112,124,114,125,116,142,145,131,148,127,163,156,138,131,147,147,141,144,149,148,143,140,144,130,118,142,120,132,97,86,109,146,117,127,143,105,121,101,109,107,122,141,122,114,94,106,97,111,98,41,36,58,37,53,32,28,22,50,39,48,77,175,224,240,254,245,229,163,99,24,18,8,17,32,36,57,58,82,66,64,76,79,65,55,42,73,77,79,65,51,53,52,51,56,82,62,47,66,54,42,49,50,70,68,19,50,59,27,64,74,49,63,43,59,59,60,43,41,69,78,59,54,41,65,52,64,35,63,44,54,20,43,62,52,80,48,60,60,49,18,54,47,37,36,51,36,47,63,17,40,37,57,53,36,22,46,34,56,53,16,42,40,38,43,45,24,30,6,38,58,22,43,52,53,32,27,45,45,43,46,51,37,45,47,68,42,16,25,42,22,34,6,54,28,48,39,27,20,42,28,45,37,31,25,17,16,34,62,33,13,42,26,18,33,32,44,28,18,44,6,8,34,29,22,52,13,33,28,32,17,28,2,29,9,21,18,21,47,22,36,31,29,15,20,13,29,6,29,37,46,67,73,156,138,146,109,77,39,32,21,40,10,20,47,30,26,21,19,20,32,68,22,5,46,30,36,51,71,35,36,15,45,28,38,90,76,46,13,13,38,1,34,34,30,23,58,52,59,41,60,21,12,28,14,30,61,16,41,58,11,43,36,38,51,43,21,43,39,32,17,23,28,14,25,9,48,41,20,9,13,6,35,10,18,41,6,13,8,32,81,60,8,28,11,8,23,29,19,16,24,6,28,14,10,32,25,34,34,16,30,35,37,23,36,26,45,34,31,75,130,120,83,28,51,14,35,26,62,82,78,108,108,162,199,187,161,170,159,148,170,161,152,183,122,141,180,158,178,174,178,158,110,113,107,149,178,185,162,153,165,169,156,175,163,185,142,165,173,163,185,153,164,187,182,172,170,168,190,167,154,131,134,102,120,142,140,155,162,162,164,155,164,150,163,176,179,159,143,149,163,133,126,144,130,110,115,138,148,144,141,126,180,148,151,104,90,79,78,48,65,133,180,186,205,218,230,247,242,251,228,116,0,14,4,0,18,0,22,15,45,14,38,12,9,0,13,11,8,6,9,4,26,14,33,12,32,37,88,58,79,100,113,109,114,157,158,160,167,188,175,176,183,200,184,198,160,183,199,168,153,46,30,41,104,131,141,130,130,100,81,67,58,76,82,51,54,49,60,80,54,51,21,2,13,12,30,57,27,35,24,18,8,28,6,36,8,17,39,42,40,34,62,46,26,69,132,130,102,151,117,84,22,17,19,28,48,24,33,47,14,55,33,20,17,4,39,25,34,45,20,28,45,47,44,86,56,60,48,45,36,29,31,11,15,16,42,21,56,57,95,82,82,92,100,109,120,108,139,166,174,190,167,145,137,151,131,148,124,139,136,129,121,129,128,156,126,135,133,154,129,161,135,175,152,139,140,167,142,121,148,140,133,144,125,124,130,129,139,138,133,154,137,139,160,179,162,179,158,147,139,137,129,154,117,160,155,135,150,156,135,181,140,163,106,139,143,145,130,134,131,110,140,146,134,133,152,149,116,136,127,133,131,142,156,124,155,153,166,132,138,135,133,126,134,118,159,154,156,160,144,138,142,136,136,121,162,147,152,113,138,99,131,122,126,94,96,89,134,97,113,115,102,104,120,113,116,109,135,89,98,113,131,110,90,88,84,104,47,79,85,82,65,84,75,69,62,11,50,60,48,18,99,182,219,237,243,234,225,224,162,89,46,41,21,15,48,78,79,82,105,60,88,76,79,56,33,47,47,45,57,50,60,50,54,53,41,68,47,31,69,27,81,62,52,63,42,60,38,26,58,47,51,77,62,46,38,49,58,49,64,40,55,39,53,41,43,58,40,36,42,45,43,40,40,57,64,59,12,37,36,43,70,53,69,56,45,48,51,30,51,40,46,38,70,36,46,40,49,59,28,16,56,47,26,41,43,46,4,32,39,43,50,30,29,43,52,32,36,16,21,33,27,22,36,18,23,28,31,35,23,16,33,44,33,24,33,58,60,18,38,31,31,57,29,34,32,34,26,35,47,17,38,43,31,30,28,28,42,17,25,39,15,40,8,19,20,37,28,26,37,21,28,21,12,39,3,27,22,21,29,23,24,44,34,22,43,27,29,31,35,51,18,40,69,140,155,153,132,81,32,47,34,29,11,8,55,41,53,22,36,10,52,30,23,25,33,16,22,3,21,24,33,26,29,82,137,78,23,17,4,14,38,29,18,5,51,36,18,42,31,21,26,18,17,40,36,24,33,24,30,43,49,30,25,28,27,12,45,30,43,20,28,19,34,29,18,10,27,49,2,9,13,14,16,12,4,24,50,34,68,50,25,29,27,31,23,35,7,21,4,20,49,24,13,23,13,24,27,40,41,26,23,3,14,15,33,83,98,111,92,39,35,54,37,24,30,31,69,101,91,122,152,179,148,162,176,183,158,177,171,159,157,129,159,150,188,190,173,186,168,163,113,101,102,99,139,174,178,169,146,183,182,170,170,178,176,172,162,183,177,181,191,182,169,167,183,181,175,176,172,163,129,146,157,153,158,136,190,175,161,154,148,179,156,154,181,176,154,137,147,133,155,145,144,154,124,132,123,120,105,160,170,174,166,141,176,141,82,39,64,86,98,111,129,138,170,204,205,202,239,147,2,6,14,19,26,8,0,27,10,22,17,20,21,4,18,19,9,20,14,12,3,0,18,7,41,67,51,72,170,195,192,187,174,184,202,176,189,172,184,183,182,183,161,174,156,165,153,120,83,17,19,33,72,99,93,106,68,108,91,111,118,79,72,66,73,61,41,26,22,36,28,35,29,26,30,8,13,15,13,35,37,31,25,12,26,13,45,19,36,48,105,98,111,145,194,145,136,152,152,80,0,9,33,4,35,52,23,32,32,27,19,41,10,13,31,12,26,51,24,35,56,39,40,44,18,35,15,21,19,29,14,18,42,42,62,76,133,172,179,212,188,198,185,205,225,211,170,166,177,122,137,142,155,145,150,141,117,147,136,153,141,153,129,137,133,118,123,140,126,128,134,127,126,130,124,165,128,150,137,144,138,144,102,154,140,159,145,134,157,144,131,153,100,143,132,121,141,149,135,150,139,124,121,124,123,145,114,142,142,119,138,133,120,112,111,153,111,124,133,134,138,157,115,144,155,117,134,136,105,123,146,136,140,134,144,131,129,150,143,130,114,130,122,114,124,153,115,139,148,126,132,129,141,140,128,129,100,118,91,124,130,130,146,137,120,133,121,157,117,166,160,150,132,168,159,173,147,130,114,106,94,81,81,118,97,81,92,76,51,63,79,90,94,93,67,91,70,17,64,17,83,121,125,189,251,241,253,255,250,233,159,63,18,10,10,48,39,67,94,43,74,54,53,64,39,53,81,55,38,61,48,71,64,60,67,59,28,56,76,41,87,68,54,44,41,62,44,54,67,52,68,30,86,70,60,43,56,47,40,62,56,40,55,81,30,56,38,45,34,65,45,64,30,36,52,53,54,50,41,55,45,39,35,78,51,33,28,51,59,25,27,50,11,21,52,45,48,31,47,50,25,34,27,36,25,42,19,43,34,22,40,23,29,29,29,33,26,39,45,47,38,12,48,47,49,36,21,33,25,11,13,9,46,24,61,15,19,23,35,47,57,34,13,13,40,40,18,0,33,15,30,22,15,13,31,24,14,32,26,30,18,35,16,18,37,37,28,63,27,46,10,39,20,45,28,13,20,21,28,17,18,21,16,41,50,30,51,42,8,20,18,50,47,44,58,113,182,174,146,120,100,77,68,49,19,33,28,34,30,33,9,11,16,42,22,45,16,17,39,23,33,31,31,18,72,96,51,49,16,29,36,25,19,36,20,61,30,52,41,30,15,17,25,43,22,31,44,43,28,34,13,49,38,34,9,16,33,37,21,19,18,32,12,11,13,45,23,45,12,32,22,32,30,20,68,16,52,54,21,28,32,15,17,47,48,29,23,28,24,33,37,6,9,11,30,24,9,4,30,22,28,24,47,81,113,127,82,70,25,36,28,34,6,12,57,87,103,101,98,134,185,205,176,180,174,176,162,174,177,189,164,151,210,158,203,158,164,149,136,152,158,153,99,83,108,137,155,198,197,197,173,187,138,181,148,154,164,155,185,164,162,178,180,156,146,178,179,193,165,168,152,124,139,131,150,155,153,168,154,164,144,145,160,174,189,157,150,175,169,162,165,138,160,130,127,118,128,134,155,148,163,175,182,173,191,179,131,130,113,116,102,73,26,80,91,124,173,160,192,100,10,3,17,26,6,26,19,8,10,11,29,10,16,2,17,4,17,6,8,4,11,18,24,24,65,79,92,114,179,171,205,188,171,178,188,192,179,171,141,165,168,129,128,107,97,108,80,55,39,29,42,21,76,123,129,94,111,131,110,58,35,32,58,16,19,48,44,31,29,9,42,44,16,35,13,30,45,2,19,25,9,28,9,26,8,24,22,37,42,55,132,156,184,189,205,155,137,134,142,68,36,5,28,24,20,38,19,13,25,20,26,67,47,48,46,40,49,48,51,6,26,29,25,56,27,16,32,29,53,19,59,77,82,125,149,184,196,212,217,217,195,177,173,185,206,157,177,165,142,142,149,150,149,140,153,139,118,130,125,122,163,145,121,138,137,127,115,104,119,106,126,125,140,116,116,120,137,130,140,140,147,174,150,141,155,134,127,139,143,130,153,133,111,132,134,124,122,129,120,145,114,128,105,143,160,125,152,136,121,121,126,132,121,135,117,153,104,132,134,138,127,115,81,118,136,149,140,143,128,121,121,137,136,132,130,128,139,138,153,132,140,142,138,142,137,129,124,125,106,123,136,118,143,120,131,108,115,129,101,117,76,110,116,156,126,141,112,126,132,130,105,150,147,140,153,126,122,120,88,81,84,68,54,76,76,101,80,93,76,72,82,68,93,99,67,70,54,75,67,19,101,115,90,97,114,175,254,250,246,252,220,244,177,86,41,0,10,17,56,44,83,66,49,43,54,39,51,51,52,78,42,67,42,73,60,41,46,61,69,62,71,62,32,53,42,49,73,55,60,45,36,52,58,51,40,47,24,61,58,77,51,26,46,46,29,62,48,47,55,34,34,50,26,48,65,37,43,51,46,43,42,44,49,26,23,17,46,68,41,30,49,35,36,44,29,31,23,40,47,27,19,40,34,47,54,30,49,43,16,21,43,49,38,35,32,45,45,45,31,28,36,30,25,31,23,21,22,39,30,24,20,11,26,44,50,45,13,40,35,25,55,16,23,34,43,30,26,32,6,31,15,28,32,22,6,47,36,33,17,21,36,40,31,31,60,33,17,29,12,18,31,25,58,27,15,13,23,26,28,31,37,29,26,54,45,50,44,20,29,24,35,43,5,38,27,41,34,64,127,136,158,163,133,91,68,45,17,14,38,18,41,1,25,44,41,47,54,3,27,23,11,8,40,7,31,84,78,74,39,16,22,13,17,18,43,49,38,40,24,26,30,11,33,28,20,40,20,29,36,27,32,25,15,7,19,32,39,14,41,16,17,7,9,14,18,32,20,23,13,43,36,17,32,33,28,32,94,50,9,6,22,25,15,11,20,42,7,25,22,21,14,20,8,17,32,12,18,17,36,18,48,99,129,129,109,61,25,37,40,24,14,25,48,40,79,101,107,125,118,182,189,173,169,155,177,198,162,202,172,144,173,167,174,172,172,153,159,181,167,161,164,161,161,96,92,108,140,141,175,174,190,195,156,179,181,185,162,151,149,141,150,141,145,150,165,164,170,197,162,158,137,138,136,131,146,144,147,177,183,172,174,155,152,165,166,172,163,140,160,140,165,163,171,155,169,156,166,148,143,173,158,160,185,154,202,195,179,188,174,165,155,109,75,50,57,61,94,123,138,108,18,8,19,5,18,29,33,11,9,13,5,31,4,21,6,7,16,17,19,4,35,22,9,12,51,31,73,94,156,176,168,168,135,148,118,116,104,95,108,86,52,63,82,58,52,66,63,82,94,42,27,38,36,64,75,44,42,20,15,47,21,12,14,24,18,37,17,34,29,30,21,5,39,18,17,45,21,29,16,33,50,34,18,42,35,15,24,33,29,44,61,126,137,122,137,121,115,137,123,77,44,19,20,31,36,28,41,32,34,38,54,29,30,40,11,32,18,11,43,25,29,27,15,24,36,16,68,61,57,137,137,125,200,217,189,199,170,161,159,152,168,181,168,168,167,123,149,165,129,154,137,160,146,154,136,115,138,159,107,153,147,122,139,130,117,142,132,121,127,114,138,137,124,132,112,122,132,138,108,132,161,150,146,128,111,99,81,138,103,133,117,113,140,132,140,138,104,131,142,154,143,148,164,134,167,141,138,124,116,146,142,127,141,131,120,124,112,147,131,139,130,106,105,123,122,145,134,95,122,97,128,130,156,126,120,129,136,103,129,133,120,112,126,121,132,120,115,123,141,126,143,139,122,135,104,111,129,118,110,117,100,95,109,94,83,57,108,88,56,94,90,113,102,84,109,125,132,120,102,107,109,109,108,98,89,86,114,67,84,92,78,75,56,68,69,59,86,79,61,51,68,109,102,99,124,123,151,206,255,212,254,252,254,247,147,70,15,0,1,28,28,56,34,52,56,68,49,51,76,53,42,31,78,67,46,65,55,39,65,37,52,70,25,34,41,31,63,39,43,48,62,44,73,61,67,45,65,56,47,41,73,57,33,66,35,66,47,60,50,60,35,51,43,57,50,37,53,58,57,63,31,41,55,65,44,12,35,50,43,29,27,33,11,45,18,30,31,53,25,29,49,41,31,43,47,48,49,42,47,31,26,30,28,32,60,34,46,16,35,41,26,21,22,25,36,62,22,43,37,46,27,34,60,35,49,28,10,42,8,25,45,50,27,15,39,21,38,32,18,14,46,36,17,35,39,29,20,6,39,15,34,24,23,36,30,52,26,21,18,23,14,23,10,16,15,32,22,6,44,46,31,44,35,47,31,26,38,19,34,38,19,31,59,17,34,34,51,53,48,55,66,134,112,168,168,150,100,75,44,43,29,26,25,21,31,22,39,58,27,13,27,22,20,27,35,32,105,82,68,21,25,6,30,25,44,47,56,27,27,28,61,26,41,71,43,22,37,16,31,33,27,39,22,24,33,39,28,29,28,11,11,23,23,13,29,25,31,24,21,15,21,9,29,32,8,40,29,49,26,33,18,7,8,24,21,30,25,6,28,37,5,23,11,16,21,27,25,34,64,79,129,113,63,58,34,38,45,45,29,32,23,34,74,118,105,113,97,107,179,178,175,138,129,188,175,160,180,169,124,134,160,181,172,129,176,166,168,167,162,165,171,171,150,158,115,77,96,119,173,197,215,156,167,177,185,167,141,169,159,158,180,148,161,155,142,163,164,170,165,153,144,154,147,142,169,160,160,177,141,171,155,167,194,167,180,173,165,162,167,180,174,143,163,149,188,188,151,166,145,157,191,161,187,182,176,213,206,201,189,199,188,154,118,51,72,44,50,57,112,111,29,2,20,16,27,23,27,26,31,5,12,8,35,4,27,5,19,32,11,18,57,22,21,14,46,41,66,63,116,111,109,110,82,88,60,71,75,73,71,91,87,94,92,85,80,44,42,62,28,49,23,14,20,35,19,30,21,13,17,27,17,9,17,24,5,25,29,26,29,20,29,28,25,11,18,18,35,6,21,11,22,40,20,26,20,28,43,19,38,38,57,85,74,102,115,163,144,135,122,96,20,22,23,31,43,38,61,23,41,29,0,26,8,31,26,7,38,33,40,12,20,47,30,66,41,70,63,137,166,168,208,209,208,184,167,160,163,185,152,166,126,158,188,176,174,161,147,169,140,129,145,124,155,144,146,139,144,130,124,125,153,107,130,121,118,146,168,135,133,97,113,147,114,119,118,135,136,121,112,116,108,97,74,67,123,109,102,131,97,101,107,105,122,98,89,96,97,106,81,108,109,118,89,97,112,123,106,96,121,114,138,113,113,121,128,149,133,130,139,116,81,130,108,136,122,143,136,104,128,89,115,99,98,119,103,104,104,122,93,93,132,97,94,102,135,128,110,116,127,118,149,136,137,101,90,134,139,110,96,129,118,89,68,72,82,133,66,74,105,109,91,93,94,124,118,133,145,105,148,128,108,105,144,122,121,120,99,106,93,92,73,86,49,93,74,61,72,81,78,38,68,91,123,155,159,151,137,137,184,227,254,247,252,250,246,246,182,111,38,4,12,28,30,47,32,48,38,35,55,43,61,64,60,71,81,89,61,55,71,69,37,44,45,43,67,54,67,42,64,49,20,45,61,72,37,60,60,69,48,48,57,60,52,52,5,48,34,51,24,39,64,36,48,55,32,36,58,58,75,54,31,52,24,55,16,21,48,51,30,25,14,40,11,44,42,27,31,48,29,43,41,46,20,52,37,30,20,20,23,26,20,47,28,34,46,32,31,32,51,40,21,33,32,28,12,31,29,41,21,34,27,34,38,31,67,32,40,18,64,16,16,38,16,59,52,23,26,28,35,31,51,33,10,16,8,26,32,2,12,24,1,15,30,53,27,13,22,14,34,36,28,31,29,39,23,51,22,47,23,33,40,28,24,24,46,37,28,54,5,22,25,19,20,27,19,46,57,47,25,28,32,48,42,67,116,186,174,153,148,143,109,68,33,36,45,41,32,41,19,24,35,23,48,19,28,25,75,58,68,31,41,20,15,8,31,29,37,40,19,33,14,35,54,53,55,41,49,46,19,30,28,47,17,19,24,24,33,48,24,53,30,17,28,12,29,29,21,17,21,9,15,40,12,20,39,54,19,3,34,15,21,21,48,8,19,45,40,31,44,31,25,30,24,53,36,84,96,126,153,110,62,46,48,28,3,48,26,16,35,50,36,67,93,123,130,109,92,150,199,182,165,159,149,155,171,182,170,180,164,158,190,186,136,155,183,162,158,160,173,159,168,200,180,192,159,135,128,88,103,134,182,163,161,170,153,130,159,173,185,156,161,178,160,153,163,168,177,172,199,160,151,152,145,129,125,134,162,160,184,145,145,177,151,150,166,171,140,165,158,150,133,141,157,166,159,156,135,167,137,162,175,163,149,173,169,194,188,213,195,191,183,195,170,161,106,52,39,61,74,79,3,11,10,9,18,18,19,27,21,4,5,38,27,11,3,7,23,23,11,4,34,18,13,23,39,28,39,69,89,102,91,92,97,99,104,112,100,106,105,63,62,44,47,21,39,2,26,17,38,32,12,35,53,31,17,22,30,51,3,21,26,38,31,32,27,18,19,47,42,41,45,19,16,37,20,34,45,22,11,27,5,22,56,13,16,7,21,11,26,60,85,117,127,143,132,159,136,113,110,77,46,32,30,11,28,19,19,51,52,48,13,20,29,18,33,18,26,19,30,16,34,94,106,143,166,178,177,217,196,204,181,195,174,184,173,173,160,176,177,163,157,139,166,161,151,143,142,126,127,138,148,171,139,140,116,115,113,144,110,103,139,120,152,127,151,138,148,149,139,104,151,111,120,146,93,93,114,125,138,98,131,95,107,135,107,114,147,111,128,113,124,115,115,100,122,103,112,91,92,119,81,73,65,62,119,70,78,111,96,137,97,122,118,134,119,139,126,86,117,73,98,93,122,120,110,105,124,107,146,97,90,85,62,90,92,97,103,88,86,66,91,101,93,121,129,118,101,116,111,90,77,120,112,119,98,93,121,97,64,100,111,70,82,99,93,113,96,108,146,125,130,147,154,151,126,124,129,110,89,91,108,92,102,118,101,86,102,114,101,106,71,103,116,79,79,56,78,72,54,45,53,86,61,116,162,139,129,110,106,131,165,186,249,252,255,250,234,230,198,132,69,3,15,53,23,35,49,63,66,56,56,66,69,62,74,53,90,46,59,62,89,75,30,51,70,77,34,57,90,60,57,47,40,47,67,41,37,38,86,48,66,44,33,71,57,70,46,55,60,47,51,30,55,16,28,76,49,13,55,48,53,27,35,41,68,37,50,58,24,38,18,41,39,35,33,40,39,18,51,39,17,33,31,33,14,55,43,30,15,58,28,41,11,28,11,47,15,56,39,13,23,58,29,42,33,25,59,43,55,18,26,28,46,19,46,41,14,41,37,33,24,51,31,27,15,0,12,43,30,45,6,4,34,24,29,27,19,2,29,17,16,32,48,34,20,41,5,36,33,6,7,21,20,26,28,37,35,5,63,52,49,43,44,7,18,8,27,26,39,48,42,31,5,26,21,32,50,36,27,20,43,45,35,21,44,69,60,102,103,151,171,162,165,118,131,113,109,58,35,47,26,29,24,28,24,17,26,41,74,61,58,32,15,33,32,52,50,14,40,71,113,120,86,48,47,50,63,25,22,34,60,29,45,48,42,33,27,38,18,59,34,21,25,6,9,5,39,50,22,17,8,12,31,34,46,44,20,14,20,40,17,15,7,8,17,31,27,28,11,37,40,30,50,108,148,191,129,104,56,60,33,15,29,14,39,27,19,49,18,60,62,106,110,110,100,126,106,176,177,187,182,158,137,170,154,147,176,159,146,165,147,144,166,157,160,163,156,182,171,166,138,172,146,187,181,159,143,87,76,126,118,175,177,176,169,173,159,161,181,153,170,154,201,168,176,158,174,174,162,166,150,126,123,152,146,162,164,144,178,155,144,177,168,159,186,179,180,184,148,152,135,152,168,175,192,154,150,158,170,147,187,180,147,161,163,184,233,224,216,213,172,177,222,145,149,115,120,90,72,106,11,10,18,29,4,7,22,19,14,9,14,16,3,3,17,17,1,28,17,5,3,9,26,3,41,39,41,50,97,135,100,93,103,76,37,63,27,57,25,5,25,28,19,31,44,27,2,4,18,28,16,33,31,18,13,19,30,14,40,29,36,22,13,41,38,21,9,23,33,30,36,36,21,8,40,38,12,11,44,15,11,17,25,21,30,17,33,38,42,43,59,99,112,97,82,57,40,54,18,34,43,11,7,15,56,43,25,29,52,41,7,42,17,12,6,17,26,5,41,50,87,146,199,232,199,197,206,165,159,157,136,162,146,161,138,150,136,127,129,141,133,146,141,148,129,132,113,80,107,99,130,129,139,144,151,114,127,111,109,108,97,143,151,144,137,136,121,127,117,112,93,127,136,140,121,144,140,149,155,152,132,136,117,134,136,147,170,162,162,123,142,151,144,141,154,152,132,119,134,130,120,141,121,128,119,135,174,150,120,131,149,136,175,126,121,130,119,122,113,117,137,127,110,130,104,87,94,111,108,105,92,103,131,130,111,98,109,130,110,118,144,103,152,134,140,159,143,151,163,131,115,148,116,108,100,120,108,105,97,114,121,112,127,137,120,122,133,112,75,84,106,109,88,114,101,108,62,76,84,77,92,98,94,80,86,61,117,91,90,110,63,81,71,69,63,50,76,47,54,66,64,53,30,27,65,52,76,109,160,116,72,106,142,181,231,242,255,253,246,255,235,170,87,42,20,4,21,43,61,58,57,61,71,54,50,64,75,51,59,54,73,77,66,57,46,60,73,24,86,65,31,59,36,39,61,61,48,54,38,45,63,54,45,68,61,50,55,26,57,58,66,51,37,27,44,65,59,60,35,44,51,51,63,69,6,27,72,23,72,26,46,43,18,45,12,19,36,43,25,23,27,40,33,12,40,28,28,35,39,54,29,64,37,30,15,47,45,3,56,5,35,22,27,36,21,32,7,47,21,24,23,38,65,43,35,37,20,14,51,37,28,29,11,14,34,33,40,37,12,21,31,30,5,7,39,30,41,10,47,21,41,24,32,16,44,48,45,7,41,18,24,24,36,1,27,24,12,11,26,35,15,30,19,26,17,39,21,41,30,45,19,29,10,2,50,29,51,39,23,19,24,30,17,38,38,31,16,46,28,24,63,68,131,151,155,161,176,144,153,127,163,131,121,105,50,68,61,58,105,106,109,84,55,76,114,134,147,169,186,215,243,196,127,59,33,47,73,23,25,37,22,3,11,42,21,21,15,53,26,27,61,38,50,20,20,19,24,43,17,29,38,0,20,71,44,18,3,15,19,8,33,43,8,22,16,41,47,45,63,79,145,116,172,125,100,72,28,56,64,11,34,9,32,12,42,32,32,62,59,84,113,116,89,83,117,99,143,181,166,143,166,161,159,161,133,184,169,190,160,170,180,161,184,177,169,141,152,145,156,161,157,150,186,158,172,196,160,147,130,110,135,128,145,172,200,177,186,175,196,170,185,158,173,184,160,166,173,177,160,177,156,143,145,181,156,141,165,178,166,184,184,169,177,169,187,192,153,160,167,172,159,159,176,160,175,152,173,169,172,147,168,151,145,175,180,208,193,219,208,206,173,206,206,165,156,156,160,154,134,115,16,0,29,20,6,7,9,2,13,12,2,15,7,12,17,11,24,15,17,2,31,3,1,14,11,18,47,46,29,34,57,42,21,41,6,24,15,30,7,21,23,10,30,22,33,29,0,16,21,32,15,22,35,24,32,12,28,30,42,11,19,31,44,14,35,25,25,9,11,24,25,10,19,18,10,11,30,25,25,32,29,29,61,33,29,53,29,33,66,37,63,48,57,29,28,13,35,36,20,25,47,23,11,22,13,35,33,30,40,26,8,9,47,29,7,40,44,30,50,22,30,137,177,203,150,137,138,113,107,87,102,126,131,131,124,116,111,106,101,122,105,90,139,124,107,134,125,129,131,130,147,127,164,136,125,121,126,125,141,114,98,134,133,114,127,105,110,135,96,86,114,113,125,129,91,130,121,101,80,76,106,112,89,120,114,109,99,99,54,86,97,106,102,106,97,92,91,99,102,103,123,104,116,123,124,123,107,112,116,79,144,116,99,82,100,69,74,71,129,117,112,135,141,121,107,96,89,108,71,71,139,145,148,115,98,115,119,86,118,115,140,124,148,154,136,150,124,127,133,141,141,134,118,111,103,116,119,96,151,143,134,133,139,134,136,128,142,93,99,111,94,88,72,74,89,87,66,91,96,72,120,108,88,94,103,110,67,81,88,86,66,88,71,36,54,63,56,42,59,66,50,64,25,44,41,36,76,120,154,187,125,127,104,141,151,195,241,234,227,237,248,251,229,174,114,39,1,15,13,32,45,62,39,58,76,46,66,44,28,48,72,55,45,56,59,60,64,44,43,69,52,57,62,55,47,45,56,42,44,73,69,42,34,49,60,56,55,52,53,53,38,71,20,33,46,60,52,60,26,25,46,30,38,57,43,62,28,57,56,51,19,38,42,53,49,8,42,40,9,22,33,12,60,27,48,57,24,52,29,35,57,38,34,26,33,16,32,18,28,28,42,2,8,32,28,37,41,33,27,16,28,36,26,20,31,32,33,34,39,41,36,23,22,11,28,24,36,25,23,6,14,19,15,31,23,25,23,18,34,18,40,31,19,41,15,52,8,43,44,16,26,41,18,26,26,16,25,17,17,31,12,1,44,19,19,17,5,41,37,22,28,37,46,39,24,36,38,17,28,26,34,29,32,24,17,14,17,17,3,25,57,30,29,40,47,57,86,96,134,169,172,181,195,165,185,174,174,152,151,177,222,169,179,221,196,193,193,193,196,139,156,112,69,14,32,40,45,54,60,25,14,51,30,31,33,16,20,32,33,25,51,29,61,57,34,48,36,29,41,17,33,55,55,27,47,59,66,31,31,23,40,65,84,75,95,69,144,113,167,119,94,97,46,109,29,22,33,14,19,7,31,21,7,25,25,53,61,85,85,84,102,95,106,120,102,159,169,171,158,170,174,164,172,172,144,178,171,170,153,176,184,147,169,182,160,155,154,164,157,150,168,166,145,167,146,157,171,180,165,134,85,92,117,161,194,191,166,195,178,155,171,158,138,173,164,159,189,167,179,167,181,182,165,151,165,195,176,158,177,178,169,195,173,159,188,165,170,134,155,172,178,184,196,162,146,157,158,147,155,174,162,133,150,162,185,208,221,212,201,189,179,182,199,172,153,167,165,161,151,103,3,10,9,30,11,17,25,7,11,31,41,14,15,11,29,10,11,23,57,23,17,11,22,36,42,15,26,15,45,36,44,38,38,34,19,17,20,38,25,34,33,33,21,11,27,25,24,29,43,25,9,20,22,3,20,24,18,13,18,44,25,24,18,20,21,24,20,16,37,37,22,31,43,36,21,33,19,27,30,21,68,39,46,42,37,35,7,53,56,32,32,34,43,20,27,13,41,15,28,26,38,28,25,18,40,42,30,35,57,28,36,45,27,42,21,27,49,22,25,27,38,63,90,118,145,118,118,94,71,97,99,116,115,125,96,89,108,88,110,107,132,84,103,120,137,135,161,131,134,149,155,165,179,136,154,129,127,140,116,137,97,105,85,66,65,64,67,97,105,86,95,101,113,123,107,69,91,86,77,82,85,69,78,83,67,63,68,39,44,64,75,35,47,73,63,59,65,81,86,68,68,79,87,70,83,101,75,69,97,39,33,27,50,57,60,47,81,97,88,92,81,72,113,95,80,63,85,88,81,107,83,113,121,91,90,95,48,100,87,124,135,88,113,75,86,70,104,82,69,36,79,112,89,55,95,120,109,114,66,102,116,101,101,112,104,118,108,114,130,114,106,90,75,65,58,60,30,67,74,70,60,57,93,81,75,75,67,106,68,53,76,88,62,51,65,64,76,47,66,41,61,62,66,67,58,20,66,69,142,172,161,194,178,137,102,113,133,140,203,245,252,250,250,254,207,134,74,68,33,32,26,26,38,27,62,39,41,60,69,51,40,38,78,56,44,52,33,56,37,53,51,34,62,53,46,58,37,50,42,67,62,19,55,60,41,50,71,44,32,54,49,43,25,54,27,40,36,48,48,24,58,30,50,43,43,23,56,31,59,35,40,33,35,33,15,24,26,36,32,38,38,44,47,38,19,38,45,29,35,32,35,28,29,23,17,46,59,45,39,45,31,37,28,33,32,30,27,25,22,29,16,28,37,19,37,37,38,16,38,51,41,16,39,45,31,30,8,33,8,20,40,36,52,39,20,6,16,4,18,10,30,2,9,14,15,12,28,19,14,31,16,25,35,13,43,22,26,35,8,27,22,32,39,11,7,18,12,56,22,15,0,47,27,22,44,11,15,39,40,26,59,34,10,30,6,32,19,40,41,22,40,13,19,19,29,44,30,29,35,49,60,57,56,90,41,83,92,113,116,135,129,139,161,143,160,91,64,51,19,42,35,47,60,39,49,10,28,53,8,43,32,45,28,22,20,37,11,16,40,37,28,18,47,91,38,61,55,84,80,72,90,135,120,64,88,76,93,111,128,155,129,113,183,145,165,114,63,55,47,39,63,22,32,46,37,14,30,29,7,13,25,34,15,71,45,80,77,87,94,66,70,96,115,106,133,167,193,162,159,191,166,165,194,155,186,132,169,173,167,181,135,155,141,145,159,178,166,161,170,143,177,193,137,146,160,173,188,188,167,151,119,91,117,125,150,182,183,199,175,193,147,159,162,173,174,179,154,154,164,172,174,169,177,192,173,174,147,185,189,196,172,180,165,187,189,145,157,139,151,187,169,168,173,180,162,182,171,178,160,164,174,169,159,172,171,202,199,204,200,191,176,169,173,191,157,170,158,172,156,100,12,9,43,15,16,22,12,7,8,2,30,13,10,3,5,34,10,17,16,35,14,13,7,21,20,18,10,15,8,36,42,17,23,43,21,33,19,26,51,17,33,12,27,15,12,21,18,51,8,23,28,27,13,30,26,44,44,47,6,43,14,23,37,33,26,18,33,8,41,16,45,41,44,43,22,65,41,25,23,56,9,27,33,44,20,12,41,11,18,18,32,14,45,34,32,27,26,23,33,20,53,20,9,23,64,54,66,51,127,79,66,71,54,73,54,64,55,122,84,97,103,97,130,145,147,102,133,73,99,77,106,93,101,89,62,69,106,93,95,96,84,90,74,97,104,121,95,101,117,124,110,117,145,139,139,151,102,96,137,107,91,111,131,101,112,96,87,97,111,112,104,99,80,107,108,139,131,144,136,113,136,121,124,126,75,102,132,130,153,133,102,57,71,62,67,83,92,100,97,95,102,89,87,92,111,81,134,56,75,75,49,88,96,100,100,102,116,108,81,113,84,75,76,94,81,108,110,108,127,109,97,81,118,84,84,77,90,118,83,121,111,96,110,90,84,91,80,87,81,75,87,80,86,69,93,123,102,95,75,76,102,107,95,111,77,89,101,91,83,105,98,92,74,72,70,46,74,50,71,78,69,101,97,72,79,64,60,103,88,53,40,76,78,64,60,80,58,70,66,81,86,68,81,38,63,71,62,38,84,90,133,173,211,164,172,103,97,104,121,169,213,244,254,250,234,204,180,148,83,76,45,24,6,15,27,7,37,48,42,32,53,69,63,26,41,66,43,40,50,57,42,60,51,42,53,58,37,51,53,36,58,48,52,40,59,27,50,66,40,61,61,44,46,61,48,51,53,43,35,27,26,48,31,35,47,44,37,20,29,52,49,11,31,38,64,21,19,55,51,33,24,58,45,46,58,24,53,17,6,48,50,24,46,36,32,36,48,37,22,36,25,35,55,17,48,41,21,38,26,36,23,30,31,15,20,25,15,37,41,51,27,17,9,27,24,17,26,25,36,22,22,55,29,13,10,24,42,17,22,15,35,18,33,12,28,18,21,15,15,24,59,21,41,26,33,30,12,0,28,20,32,28,24,21,23,22,12,27,11,9,36,16,35,19,34,19,22,41,31,64,58,60,43,18,42,19,14,12,50,46,15,27,30,70,37,24,21,37,16,38,16,55,22,22,21,19,23,32,37,35,33,50,43,73,86,71,53,50,52,3,42,151,138,61,28,27,30,20,28,36,34,52,34,38,27,64,41,19,43,23,16,27,31,15,43,37,52,71,58,70,72,78,95,92,95,79,89,85,106,63,79,78,61,45,25,70,39,61,44,28,3,15,38,16,4,24,36,48,24,30,25,18,48,81,67,69,73,89,105,95,109,89,128,143,181,206,175,151,150,171,159,173,150,145,155,174,151,131,130,160,150,156,146,147,150,188,169,159,127,157,174,176,150,172,150,155,175,160,182,169,126,105,96,78,132,158,144,175,182,154,145,180,190,173,168,133,164,163,170,197,175,174,159,170,154,158,144,166,169,165,139,151,172,174,153,173,178,172,152,189,184,152,179,183,178,166,182,166,171,150,177,163,158,163,173,182,186,193,205,180,184,174,191,166,164,163,144,159,156,116,21,1,24,4,17,28,12,16,8,34,11,25,0,0,4,14,15,7,20,9,11,14,8,11,33,45,38,17,36,23,31,31,24,29,17,29,34,27,50,43,27,43,26,14,39,53,25,30,20,25,23,12,28,47,20,26,9,30,35,18,23,35,38,10,13,14,37,42,36,49,42,40,49,13,32,24,45,63,23,43,31,1,28,3,2,12,25,25,33,41,32,56,40,23,6,21,11,39,23,34,54,24,42,48,78,121,156,215,193,216,214,206,215,171,202,199,194,196,187,153,134,149,106,136,105,94,107,121,124,108,138,130,160,127,129,105,98,92,95,79,80,103,92,105,99,90,56,102,113,123,80,47,75,85,82,73,73,71,75,111,101,92,127,119,125,129,123,118,149,138,134,109,119,121,110,124,166,139,183,134,143,156,138,136,124,106,147,127,179,152,93,108,88,103,114,125,107,170,137,110,115,144,164,150,129,136,119,94,94,110,141,156,154,144,163,147,128,149,157,114,108,106,96,84,108,111,117,84,105,103,110,72,103,78,75,84,85,94,97,84,104,135,130,117,94,100,91,107,111,97,99,99,131,121,107,107,114,104,87,60,93,62,81,102,110,103,99,67,92,94,59,109,115,79,114,81,81,90,107,95,98,94,118,111,91,97,66,89,59,68,95,76,67,61,52,78,64,73,40,64,77,89,47,65,71,60,42,60,40,62,67,78,111,142,159,146,182,156,129,113,130,121,192,191,172,178,215,193,188,162,141,152,151,100,59,39,21,9,11,19,31,30,52,70,53,63,75,53,70,46,42,43,48,58,37,43,71,65,44,74,69,33,64,51,35,65,59,61,46,39,71,37,38,22,57,42,40,26,40,51,61,26,38,52,26,17,30,61,55,38,49,39,18,39,29,18,36,27,33,58,55,22,35,55,33,47,44,51,11,27,27,34,33,33,45,31,41,61,38,41,45,51,30,52,41,31,41,14,68,37,30,11,42,28,31,38,27,39,33,6,46,37,26,33,25,13,33,32,38,33,37,36,35,36,3,15,13,30,13,21,14,28,18,24,33,21,25,0,0,33,46,27,41,16,48,17,14,20,11,10,17,20,28,38,9,51,37,20,36,28,11,57,4,42,32,46,38,10,15,11,33,21,19,5,16,44,34,38,36,13,48,31,0,35,20,34,29,32,16,40,33,31,27,41,23,27,23,34,11,7,42,22,21,46,77,67,102,86,112,118,164,243,154,71,11,54,38,31,47,46,58,44,39,51,52,41,41,50,58,40,32,3,16,36,18,18,32,37,32,35,14,36,38,56,20,30,27,8,46,41,56,32,44,17,16,11,46,1,32,18,2,25,16,24,36,34,15,37,25,53,71,57,55,72,83,94,74,79,77,68,96,96,121,161,184,168,175,142,140,150,154,125,149,147,137,153,140,138,163,169,169,171,155,159,187,152,154,137,171,178,158,162,156,167,158,192,171,153,182,138,178,140,127,96,115,107,137,148,192,179,164,155,158,145,141,144,157,196,146,158,179,193,176,145,138,133,131,144,139,158,147,153,153,177,177,148,160,162,166,158,161,129,177,161,169,178,144,142,168,147,158,150,144,159,176,187,195,190,204,196,195,182,184,183,180,171,160,150,183,112,31,1,31,5,0,22,0,35,24,28,19,13,5,15,14,20,11,28,24,10,15,10,4,10,40,24,20,27,20,28,35,21,32,35,34,51,35,62,72,69,62,21,50,28,6,4,20,35,25,16,8,23,35,27,30,18,22,26,31,23,24,39,37,27,30,28,64,48,37,55,38,47,22,19,38,21,2,33,32,27,40,30,21,23,38,14,35,16,31,45,24,32,29,25,14,18,14,24,28,39,30,57,61,142,102,158,108,157,148,165,154,168,217,141,176,153,153,147,121,111,111,104,91,75,105,94,126,151,145,145,90,112,126,101,125,128,110,90,122,127,104,92,86,93,114,99,77,99,103,115,94,91,66,88,106,92,97,119,68,110,118,141,113,90,149,129,143,116,138,127,141,119,119,135,120,129,132,120,113,93,90,85,79,89,109,79,67,101,111,125,112,121,128,143,132,141,116,92,129,120,109,130,142,116,121,134,122,115,105,115,121,118,116,111,101,115,115,111,95,95,108,86,87,98,84,65,119,67,68,93,97,69,91,99,94,144,93,80,106,94,104,113,123,83,103,82,112,89,123,124,108,102,121,108,102,101,99,105,97,66,102,93,96,107,103,76,85,80,95,108,77,96,98,72,99,81,72,98,122,78,96,77,89,78,87,59,90,78,74,78,82,84,64,79,81,66,51,61,55,37,58,77,78,43,82,62,51,63,71,63,36,63,52,88,144,176,190,168,193,139,102,94,135,107,104,133,152,179,202,182,187,249,250,243,227,175,140,74,41,29,6,16,14,15,43,55,50,48,40,70,59,52,53,56,42,67,44,70,30,55,46,42,51,56,34,40,61,21,28,64,28,51,61,49,64,49,42,48,44,44,41,30,64,41,32,39,29,33,33,40,43,19,49,42,15,52,25,34,36,59,47,26,33,22,26,16,34,29,28,23,39,32,22,28,52,32,40,13,17,20,33,46,34,44,28,26,32,37,32,19,27,37,26,10,38,27,9,28,38,34,27,9,9,6,30,39,23,7,17,20,20,48,27,30,18,25,35,47,33,20,19,7,31,13,20,20,27,44,4,44,30,34,20,29,24,24,18,26,32,28,24,26,32,12,22,17,8,31,9,33,2,20,39,24,24,46,31,20,52,36,16,17,42,25,31,43,16,19,38,17,18,19,30,31,18,14,40,23,43,25,31,46,47,37,16,37,34,26,30,18,21,31,56,39,28,62,58,74,67,73,109,126,71,39,43,42,32,90,104,88,86,115,117,83,67,95,100,86,61,30,20,37,48,29,67,55,51,23,34,26,7,13,29,27,31,20,23,13,12,3,22,28,20,28,9,15,28,56,31,38,26,18,22,22,25,44,23,27,68,68,75,71,82,79,97,83,72,75,101,65,99,97,179,168,183,161,176,179,138,162,137,168,170,132,131,150,166,148,160,159,164,149,190,175,147,130,149,150,160,139,162,118,158,160,148,149,162,166,152,158,185,164,153,156,100,104,102,152,168,159,178,144,153,154,161,160,166,148,169,166,154,139,127,143,141,138,131,117,132,146,123,158,159,150,146,157,116,147,166,152,183,142,159,154,166,179,150,125,149,162,165,189,154,180,163,185,159,199,194,208,213,194,214,190,154,149,196,157,144,123,11,0,15,44,6,8,8,31,1,16,18,13,3,0,11,10,1,3,14,16,43,12,20,11,40,21,27,14,27,6,42,12,25,35,33,60,65,40,62,79,62,31,28,19,7,11,33,38,7,52,28,58,25,7,40,45,32,19,78,11,34,51,44,50,18,34,48,15,47,25,0,40,28,37,25,19,40,32,31,22,19,31,26,20,11,31,26,30,21,24,7,34,10,14,25,23,44,39,38,34,56,95,47,67,70,62,51,81,37,39,72,76,54,71,62,29,70,72,97,101,102,108,88,108,92,94,94,84,94,81,81,76,62,96,109,104,92,112,118,120,105,105,64,83,139,112,125,77,111,116,140,113,109,123,125,95,119,93,86,105,97,77,56,60,91,111,91,60,81,91,96,101,98,88,52,80,78,82,76,72,71,83,92,75,81,59,81,86,115,105,92,96,118,107,105,129,87,61,89,76,74,69,68,102,111,104,78,74,80,58,69,70,63,86,54,58,84,67,81,80,90,76,100,73,87,106,74,69,85,97,67,112,87,109,109,130,116,129,114,119,90,85,100,77,100,112,87,94,113,123,106,88,114,118,108,97,118,97,131,125,128,89,86,98,119,79,87,80,89,112,82,75,92,86,112,92,74,122,107,91,96,109,88,101,109,72,93,73,83,82,74,95,69,67,77,82,65,56,85,45,75,67,56,58,82,62,48,84,60,65,53,54,54,67,71,118,128,135,128,98,122,154,157,145,97,49,89,116,163,149,163,227,249,255,254,254,243,247,210,160,122,51,16,1,19,24,10,31,43,56,53,43,54,62,86,68,48,53,53,41,38,57,49,69,18,57,38,39,48,42,32,50,54,52,50,60,27,42,34,20,26,41,49,38,27,22,38,41,27,38,54,27,21,29,45,30,35,44,35,54,33,38,32,23,37,11,27,20,24,51,44,19,9,38,31,21,24,39,18,18,33,51,28,37,45,42,37,38,33,10,21,51,25,4,34,31,23,37,5,8,19,53,43,33,38,30,27,48,47,22,47,2,21,21,17,40,25,21,24,19,21,19,11,39,21,43,34,46,9,14,28,14,32,10,24,21,35,18,37,33,33,21,20,19,30,29,15,19,10,36,25,33,4,39,5,18,42,25,5,0,27,28,26,14,2,1,26,53,39,57,39,51,22,31,40,30,51,31,26,26,37,22,20,42,13,33,3,32,1,25,39,15,47,25,44,51,25,54,10,48,41,57,42,12,27,26,74,139,132,146,146,166,156,141,108,126,167,144,138,87,47,15,40,54,39,35,14,21,18,23,32,46,13,33,32,19,30,16,6,12,33,13,8,15,15,19,10,18,19,15,19,13,6,69,27,61,30,41,57,24,77,52,74,99,63,77,113,112,70,97,110,184,199,191,160,158,148,157,148,171,183,165,167,172,172,177,158,160,171,165,166,151,164,154,170,133,153,165,169,158,145,166,146,147,168,156,189,156,161,154,169,148,148,144,122,124,68,88,147,182,177,180,152,155,166,158,148,182,164,152,159,161,133,153,153,149,147,147,129,158,169,151,156,147,150,164,156,156,164,152,170,157,177,160,153,151,151,138,149,156,167,178,162,178,196,168,168,195,186,180,209,200,191,181,193,151,172,183,192,105,16,7,5,13,16,2,32,20,2,2,10,21,25,16,18,2,25,17,15,7,6,14,18,22,18,28,20,33,28,23,4,45,18,30,31,41,52,87,59,58,59,30,57,49,25,13,12,15,19,31,45,44,38,42,25,58,54,29,32,33,14,20,28,28,35,10,13,44,21,19,32,28,20,27,25,30,35,20,24,10,32,12,27,10,17,23,17,15,55,12,33,40,25,24,35,22,49,51,37,40,41,34,44,52,68,62,72,37,41,46,27,68,85,68,74,106,98,124,105,128,111,91,138,115,93,101,94,61,87,61,70,67,110,87,122,128,152,165,148,151,152,139,125,84,97,116,154,111,112,140,110,98,134,99,82,62,75,79,85,54,66,80,80,59,63,63,51,52,75,57,75,112,76,54,42,78,66,92,81,86,74,105,92,85,57,81,87,80,88,109,85,68,81,98,75,97,127,97,85,67,68,57,91,86,65,76,85,93,64,82,88,88,71,106,100,82,77,73,98,102,117,112,105,95,87,99,95,116,116,127,102,79,108,97,119,130,109,98,116,113,91,77,90,126,94,74,105,101,95,130,138,151,135,99,90,98,94,104,90,90,87,97,120,110,131,96,117,114,113,116,104,106,104,100,97,88,88,102,106,123,97,88,106,100,88,81,76,85,77,74,101,75,87,99,57,49,53,82,81,45,65,68,87,76,87,77,89,53,54,66,57,61,42,52,24,39,39,28,34,83,99,155,199,115,75,62,93,84,84,95,95,142,236,252,254,250,243,226,253,251,245,214,168,134,116,43,3,18,27,11,11,36,59,53,48,44,55,44,71,38,56,50,51,58,54,54,46,56,26,57,32,61,44,60,24,24,45,20,50,30,53,36,32,55,29,33,45,43,32,45,10,52,41,32,69,5,33,48,20,36,31,56,10,34,53,37,42,26,48,11,15,24,56,30,38,50,52,33,22,49,22,17,49,11,41,20,39,48,26,27,53,43,49,20,15,7,7,47,37,58,27,19,43,19,33,11,2,34,13,40,41,48,23,22,43,24,32,33,16,15,6,24,26,34,5,22,31,18,51,17,14,49,11,2,43,14,23,7,39,6,24,17,8,37,31,15,37,29,16,34,28,31,28,16,11,24,25,22,45,32,42,24,29,14,26,26,21,27,12,47,36,30,32,42,23,41,25,28,42,17,22,39,40,41,47,8,23,40,14,6,34,28,17,14,53,26,32,40,37,15,38,9,46,22,24,56,61,88,159,135,133,163,129,146,137,125,147,151,148,109,17,16,33,7,39,11,53,24,1,13,15,3,6,15,25,18,12,36,33,26,30,15,6,64,28,9,27,41,46,26,39,27,42,50,41,34,47,39,52,60,67,95,41,70,81,86,70,77,79,120,177,151,181,173,171,168,161,138,173,171,161,176,161,164,156,163,158,171,178,151,141,165,161,161,159,159,166,162,165,161,169,196,166,177,172,170,165,159,171,170,183,138,154,154,144,117,94,75,116,125,180,184,190,175,143,161,158,166,133,143,184,135,134,176,157,170,172,162,179,159,158,172,159,132,141,162,161,159,163,170,146,189,168,155,180,181,182,163,131,184,151,170,182,152,180,180,188,182,162,199,202,177,184,182,174,154,189,157,177,107,2,1,18,13,13,3,31,11,35,28,5,11,11,14,1,12,10,38,9,40,0,28,3,22,32,2,11,27,29,23,31,10,11,32,56,34,37,50,45,65,31,35,29,16,26,27,44,11,31,51,32,34,34,22,39,43,34,6,34,39,16,22,23,36,30,15,34,24,41,30,26,47,36,36,39,33,54,32,17,8,24,29,21,21,20,9,22,9,17,35,18,8,32,26,22,30,16,45,52,38,42,46,37,41,78,35,53,49,52,34,77,96,72,79,93,111,122,123,80,98,101,98,98,103,102,69,75,66,74,90,102,92,90,99,96,101,110,97,101,68,63,83,86,53,70,104,98,119,93,120,112,113,82,71,81,79,95,75,64,78,77,96,109,79,82,102,81,77,84,96,67,81,84,95,113,117,107,100,112,104,112,106,119,81,88,82,104,100,94,117,125,78,115,108,92,92,105,117,107,92,118,106,106,103,84,113,82,104,106,99,117,84,90,105,132,87,102,116,118,103,142,104,94,95,109,100,110,111,108,110,118,111,132,86,121,116,121,108,94,122,126,115,118,97,97,131,106,127,141,125,117,135,123,142,132,95,127,111,80,106,127,108,112,135,116,116,122,122,99,136,95,120,102,112,91,105,104,119,96,99,95,95,102,86,116,115,100,112,100,92,96,115,86,79,84,86,77,51,54,60,95,77,66,69,80,71,68,79,50,38,35,29,59,64,50,43,40,35,41,18,52,68,93,107,85,108,87,70,46,51,38,39,114,150,176,220,239,246,250,251,236,237,218,249,244,210,167,127,76,79,37,23,3,8,42,59,55,36,63,29,50,46,73,55,67,39,33,51,55,50,39,45,9,44,60,49,15,33,59,57,31,35,40,52,27,44,47,39,42,48,47,37,38,48,44,30,31,47,46,31,11,52,37,39,21,35,1,48,43,51,53,20,49,41,14,45,36,42,18,14,44,36,43,34,27,32,25,40,13,37,7,35,18,20,28,54,27,20,16,26,15,8,25,27,37,33,25,30,33,22,21,24,25,19,19,41,13,12,31,10,40,11,32,18,18,16,9,37,30,21,13,17,29,35,17,27,32,17,9,33,31,25,34,18,7,17,22,28,13,60,8,22,49,30,35,39,1,49,55,62,28,26,20,42,42,48,40,36,39,34,28,27,9,19,36,25,40,21,28,31,27,59,16,27,23,24,13,23,13,28,30,32,37,23,30,26,36,48,45,26,11,67,51,35,23,33,51,68,127,108,134,131,127,129,117,101,117,149,167,120,30,9,20,28,20,40,37,30,21,38,19,13,1,17,42,8,26,16,35,9,7,15,7,18,3,7,21,14,27,12,18,22,26,58,35,24,52,50,61,71,72,43,73,94,70,91,85,73,116,142,183,179,162,142,162,143,136,140,139,168,177,165,216,179,162,179,164,162,176,155,173,134,138,153,150,140,176,178,166,198,167,147,157,146,174,171,153,168,163,141,151,172,151,175,194,154,140,97,88,85,131,152,183,180,202,178,163,171,170,144,169,164,148,159,143,141,151,155,145,131,159,171,186,156,172,163,139,158,163,160,152,155,178,154,153,157,157,163,163,143,166,125,162,163,165,195,147,199,160,209,169,169,165,152,156,180,157,150,167,122,20,12,25,20,13,8,40,23,11,16,18,15,29,35,0,32,43,13,7,23,9,5,26,5,39,14,25,39,14,18,19,39,17,25,44,41,27,62,55,46,45,75,43,23,16,26,29,17,34,25,4,10,23,25,37,25,31,52,34,4,10,20,26,21,45,38,31,22,11,47,38,26,43,31,9,42,25,21,26,35,10,21,6,14,24,10,15,9,22,9,19,24,19,50,11,41,36,28,41,35,45,41,19,49,23,37,39,35,45,46,29,30,55,44,45,36,51,41,48,37,26,32,34,56,52,25,26,15,44,27,43,67,62,59,37,45,21,27,31,34,54,29,70,39,68,62,54,81,93,95,78,80,95,86,78,102,105,78,102,110,99,121,140,137,114,117,122,72,118,94,108,112,131,101,118,117,118,129,103,108,123,101,131,91,92,127,119,112,85,113,104,116,98,87,87,105,119,119,113,102,124,123,120,134,117,122,111,112,119,126,103,104,122,125,97,107,97,124,95,124,120,92,103,93,101,101,83,113,124,132,98,143,111,126,120,104,153,126,101,111,127,107,130,123,111,111,118,140,104,107,107,132,123,117,126,117,111,114,118,105,127,104,109,113,117,102,122,110,111,109,115,96,120,97,91,115,103,104,117,95,109,112,96,119,87,110,132,116,109,94,102,82,75,99,66,65,89,67,54,65,65,65,58,89,87,64,81,61,59,65,62,52,59,51,34,45,63,49,70,40,52,54,76,80,87,107,105,73,63,80,44,62,77,110,115,121,147,190,207,233,244,250,249,247,255,243,255,255,238,190,145,135,83,45,11,4,57,3,18,26,24,39,33,29,55,21,40,39,28,29,53,64,43,60,48,47,34,37,28,25,39,40,51,58,30,47,55,37,46,37,47,39,37,31,51,42,36,30,38,28,30,36,22,17,53,23,19,17,22,64,52,20,18,35,25,22,14,36,21,33,23,25,25,15,14,19,25,29,44,16,23,35,20,17,30,33,32,10,15,33,33,24,31,9,15,27,39,40,19,18,36,40,33,19,6,18,0,19,31,23,48,18,35,30,6,5,14,30,5,59,26,25,35,11,32,17,26,18,25,25,33,11,30,31,14,16,4,12,40,32,12,33,9,24,21,28,5,29,69,34,25,21,22,8,18,46,26,27,11,10,29,27,52,22,46,32,14,23,28,13,26,30,18,32,10,43,27,29,38,36,10,32,24,40,56,54,20,42,10,25,26,47,49,25,45,30,47,78,77,82,106,104,122,127,116,101,117,135,154,119,24,11,31,18,33,37,17,24,14,13,9,15,19,39,24,11,28,13,13,35,34,33,7,24,26,51,28,39,15,32,30,50,61,33,35,58,54,66,66,52,84,49,75,83,101,99,103,121,151,168,172,177,157,137,146,136,134,168,154,162,177,150,192,197,157,175,167,167,134,152,139,157,171,159,169,158,156,168,168,153,155,150,157,178,137,179,176,162,188,157,165,145,172,162,176,161,150,138,110,64,75,117,178,190,164,188,164,157,164,163,160,161,135,156,134,144,153,184,162,162,173,133,164,114,145,159,168,140,140,152,169,151,159,133,151,150,187,187,153,196,155,171,123,169,140,164,167,149,170,173,143,165,164,157,169,162,167,149,195,138,7,19,4,11,39,20,10,3,0,5,10,22,41,0,11,22,18,27,0,35,19,8,37,19,24,32,13,28,33,33,30,50,44,21,39,30,41,27,30,40,31,35,24,20,22,19,24,26,22,15,23,24,11,12,5,18,31,21,26,22,36,23,45,15,24,14,15,16,30,21,22,14,15,42,13,18,8,27,10,7,11,26,6,9,16,18,15,17,26,3,16,3,20,37,42,58,49,44,54,74,58,45,50,81,72,39,58,29,60,51,29,22,45,54,33,20,44,54,50,20,43,37,30,62,43,43,32,40,34,55,78,51,67,61,117,73,80,99,81,79,73,80,92,64,81,104,87,90,94,101,98,110,81,121,101,94,76,96,119,91,119,112,116,131,114,122,125,129,102,132,135,104,107,144,131,111,115,90,84,96,132,136,126,126,116,109,115,122,86,148,130,110,82,115,114,111,110,106,138,124,110,107,114,124,120,147,94,131,103,139,126,101,115,126,131,105,116,115,124,121,123,117,114,142,135,105,109,114,100,112,130,98,126,110,115,122,132,138,122,130,99,80,104,106,93,113,132,110,120,109,117,110,96,144,141,122,131,144,106,126,107,104,109,103,112,142,125,115,115,114,140,108,132,116,78,100,88,103,99,106,104,102,125,71,101,107,90,89,78,88,118,82,70,55,90,94,94,104,100,57,97,87,92,58,68,57,41,81,57,63,60,60,67,58,58,50,83,96,89,58,68,77,76,66,47,30,91,86,117,105,114,102,142,99,112,102,96,81,117,110,160,162,214,234,254,248,243,252,254,252,254,255,251,214,156,146,115,83,51,35,14,20,35,10,42,32,53,44,48,64,27,60,38,33,62,27,53,43,19,14,15,37,22,45,18,53,82,42,29,15,25,41,36,14,31,26,37,30,34,22,40,53,34,43,32,30,25,56,48,19,27,57,21,11,25,37,33,22,53,44,18,17,29,34,36,42,4,10,8,5,27,7,43,29,12,29,43,11,24,24,7,49,4,11,20,32,22,28,33,55,27,17,17,39,52,29,7,12,30,42,39,16,28,11,32,11,34,11,23,18,11,15,18,23,29,13,30,29,13,10,7,23,25,27,30,32,22,6,9,20,10,16,8,24,29,16,19,23,15,31,37,35,32,20,34,49,31,25,9,22,18,38,26,12,47,32,31,13,31,39,38,34,20,31,50,10,14,27,63,8,33,14,56,30,62,44,29,28,37,25,26,35,46,40,23,41,40,61,78,82,98,67,103,87,92,99,110,101,107,95,30,27,13,10,51,23,19,19,28,19,43,27,21,16,22,25,26,35,23,1,17,10,34,10,0,17,30,9,29,45,33,34,21,53,48,45,39,47,82,73,45,53,93,88,82,119,99,165,161,161,161,162,166,177,159,152,149,160,146,169,149,170,176,167,149,163,170,154,131,156,154,148,156,183,154,141,169,183,171,147,163,139,150,162,154,156,142,153,160,151,143,155,149,157,173,192,191,188,137,129,82,100,118,158,161,172,175,196,160,166,175,179,136,153,165,152,171,182,174,162,153,162,163,178,185,175,158,169,146,144,145,141,159,176,153,146,153,169,161,151,140,154,141,178,149,145,159,163,137,167,150,158,167,172,161,142,170,178,176,109,29,0,37,2,13,27,9,9,4,6,12,8,14,9,14,46,25,9,9,25,10,28,19,4,35,25,32,32,37,45,14,69,25,41,29,51,49,46,21,47,20,58,31,39,42,40,32,29,29,28,19,19,11,42,25,13,38,33,48,25,41,15,31,40,32,18,33,28,25,14,27,12,31,15,13,39,28,25,10,0,14,15,12,8,27,11,15,15,25,9,23,14,9,21,36,44,69,72,59,104,46,62,72,84,60,51,70,60,67,64,58,64,44,60,56,47,32,16,39,39,38,32,64,40,76,58,69,47,56,42,45,25,29,88,82,132,96,103,123,96,131,127,151,121,132,115,129,95,107,112,128,123,104,100,108,94,137,117,104,108,78,109,109,108,106,124,115,109,105,109,97,119,109,113,115,155,123,127,125,125,134,125,105,103,112,121,114,117,125,112,125,93,85,121,119,96,110,115,124,146,119,117,107,111,138,136,126,142,143,135,104,127,108,103,127,128,141,98,137,124,111,122,103,116,150,136,121,108,110,138,121,125,95,132,99,119,149,144,144,115,121,127,108,100,94,125,132,131,129,123,111,116,109,121,98,131,102,114,104,113,126,133,107,112,125,105,127,128,121,131,107,86,71,112,95,125,96,96,96,122,113,83,71,101,86,86,97,73,71,77,83,111,97,107,97,87,119,65,60,41,50,99,57,67,85,93,90,78,71,58,46,55,51,45,72,43,66,18,73,61,81,78,96,43,30,77,105,100,81,97,100,118,116,106,138,106,114,103,82,79,75,82,131,138,163,210,211,234,251,255,248,240,249,254,254,239,205,222,143,128,104,56,38,16,1,26,13,28,18,32,34,51,53,49,47,34,55,46,53,28,50,33,26,59,15,49,54,38,47,48,3,47,47,15,30,20,34,32,16,48,29,26,38,13,42,15,13,33,7,40,40,61,33,15,4,35,26,44,42,47,19,37,31,11,48,18,28,32,49,8,20,12,35,16,22,18,40,28,42,21,40,41,27,15,4,12,30,14,9,15,15,35,35,46,29,46,2,26,29,8,20,15,39,34,30,14,14,16,20,6,19,34,17,14,9,18,24,29,1,20,37,24,44,28,15,4,41,39,28,38,31,15,27,26,11,34,37,49,6,30,16,16,56,40,24,10,22,13,24,35,50,27,45,27,22,25,17,16,45,63,52,43,21,44,11,36,23,31,16,25,13,20,22,62,48,26,40,21,30,32,42,63,51,38,45,12,43,47,66,118,105,96,105,115,101,98,121,84,138,139,39,36,30,31,36,27,21,1,25,52,37,29,29,32,20,14,21,18,31,31,30,23,10,35,31,6,21,40,44,37,23,34,49,55,64,57,42,97,81,86,76,82,100,81,95,103,155,170,163,174,153,142,168,169,192,160,181,178,135,156,154,177,145,150,144,165,143,186,176,159,160,179,162,161,166,154,180,156,158,162,173,154,168,164,131,164,168,162,155,148,151,146,153,150,163,165,188,200,167,157,138,99,113,109,138,154,184,185,175,164,175,160,141,186,146,180,168,167,168,178,168,143,188,183,189,175,197,168,163,175,129,160,132,158,175,168,151,157,144,154,148,157,149,139,160,148,164,164,160,147,169,165,149,168,162,163,175,155,164,102,4,23,26,21,13,11,27,24,15,8,28,25,26,0,2,11,9,2,4,26,9,10,22,7,70,39,26,34,14,19,11,19,31,28,13,13,5,14,6,21,24,40,15,36,8,28,6,14,13,41,31,39,38,25,53,19,27,27,31,16,25,13,37,36,18,22,14,3,18,5,38,27,20,30,20,21,11,23,12,14,23,10,11,26,19,25,7,22,25,17,13,18,4,38,46,48,44,72,45,54,43,49,42,59,48,40,56,57,70,51,64,58,46,67,62,53,56,36,40,54,35,56,61,85,89,81,75,60,69,78,47,60,43,57,46,95,103,113,81,78,87,116,116,118,128,131,135,117,116,111,126,106,103,114,111,106,98,136,103,107,108,107,118,120,94,115,103,98,102,103,124,128,111,133,150,121,139,136,101,99,132,117,145,137,118,108,126,121,84,118,116,125,146,145,122,128,125,115,120,97,99,124,92,113,101,132,142,105,108,102,110,96,122,93,112,112,124,105,122,107,112,125,128,123,142,148,132,100,133,139,121,116,122,100,133,125,156,134,125,121,139,104,129,143,122,139,135,148,123,104,119,104,144,94,85,106,125,142,101,129,133,127,111,115,140,106,116,132,101,145,132,138,119,116,134,128,124,121,118,105,106,80,76,61,81,101,85,76,82,77,73,100,91,121,100,85,81,72,68,60,62,55,62,68,74,55,57,59,56,87,97,67,57,62,71,54,44,59,86,49,75,83,75,72,57,41,76,61,65,82,62,84,96,73,74,129,161,113,132,113,105,37,55,42,64,81,132,138,172,162,194,234,243,245,253,249,248,243,215,239,252,225,188,150,173,112,51,31,9,8,7,42,22,45,35,46,49,45,46,59,42,23,32,36,39,36,37,40,50,52,41,23,24,47,30,39,38,41,45,21,33,56,48,41,14,28,24,37,28,27,43,31,39,29,28,29,18,17,24,34,44,34,14,10,19,6,26,36,20,13,60,34,20,29,31,17,39,22,19,30,47,19,21,14,51,8,19,26,11,35,26,13,12,15,32,34,37,40,22,9,9,17,31,7,7,18,27,7,30,21,29,18,11,22,37,22,15,35,29,10,1,9,11,22,27,17,21,2,17,9,44,22,25,5,20,8,13,45,12,5,9,16,22,25,49,25,41,33,55,21,25,35,33,22,36,43,26,30,66,76,59,33,18,28,26,40,41,28,34,54,48,41,27,29,38,35,53,40,49,25,43,33,49,28,31,32,39,38,78,111,157,129,158,138,100,102,140,144,154,143,70,41,6,35,20,23,15,10,8,40,21,8,37,44,12,27,19,20,38,34,31,10,9,32,30,26,20,16,36,25,38,47,49,54,59,75,60,62,102,86,93,74,95,95,80,149,150,176,154,143,156,155,182,164,153,153,155,174,185,167,161,158,182,132,160,155,167,179,155,170,152,200,155,151,177,150,196,162,123,161,175,185,171,146,152,155,152,132,142,146,162,160,154,150,173,147,175,179,181,181,192,140,112,78,109,108,160,190,184,185,178,142,186,174,178,146,157,167,170,166,161,191,132,168,155,145,176,175,161,154,159,145,166,140,152,156,163,157,159,150,164,158,152,178,160,155,158,161,152,179,158,170,169,171,182,170,141,159,142,111,8,6,10,10,31,17,31,4,24,4,29,30,14,1,22,28,17,15,2,9,10,22,10,34,31,47,17,24,51,21,7,32,18,21,33,45,22,9,18,17,6,29,22,27,8,3,33,21,0,12,9,28,44,20,18,6,16,35,28,12,35,28,12,32,30,8,4,29,11,10,23,12,6,12,36,31,13,30,20,23,28,34,48,21,14,16,20,16,13,38,28,19,27,45,6,65,62,57,66,76,49,59,61,55,78,66,83,70,55,53,85,95,104,84,77,65,82,61,58,44,69,58,53,99,93,74,121,81,106,106,89,75,51,89,71,70,90,70,87,65,73,110,69,99,103,87,129,109,128,140,140,114,125,111,126,140,123,115,116,116,119,84,128,130,119,124,101,151,111,130,120,129,145,116,150,110,113,127,119,107,121,164,120,105,117,108,124,126,118,132,104,146,112,110,108,136,117,103,134,118,135,104,119,118,123,124,64,95,105,127,136,135,122,106,122,124,102,102,131,136,123,148,130,122,103,131,137,123,117,148,128,112,117,136,141,109,92,114,123,107,139,119,133,111,117,135,119,103,116,113,111,114,86,94,105,119,123,112,122,94,117,116,116,110,105,121,115,119,127,149,128,118,131,121,134,148,124,159,145,139,133,85,100,82,84,112,87,84,82,74,76,58,79,120,93,101,78,64,94,69,111,85,62,68,89,96,106,88,74,99,74,74,62,68,39,48,69,55,80,68,62,52,74,56,50,54,46,49,58,53,64,79,60,65,100,125,150,150,179,168,154,150,124,87,97,111,82,115,99,89,125,124,177,166,195,196,215,201,228,248,242,251,247,238,252,241,243,194,166,102,67,114,5,19,24,25,30,33,35,29,15,41,18,31,11,48,35,49,13,39,50,23,31,33,39,36,30,35,31,24,13,24,42,34,9,17,50,49,24,50,30,30,33,30,33,25,16,16,12,28,33,33,42,33,10,51,9,27,8,47,32,23,43,45,12,25,22,23,20,49,19,14,41,44,23,25,14,12,23,23,23,22,14,11,19,12,14,33,23,29,38,28,8,5,26,31,24,10,22,15,23,3,18,20,5,22,28,17,11,25,15,28,10,13,23,17,14,42,33,0,43,31,30,9,33,14,26,7,29,8,22,33,31,14,21,23,33,43,14,14,28,20,23,48,61,31,43,35,23,47,29,17,41,14,22,30,10,51,35,22,29,33,28,42,39,61,45,44,42,52,24,36,49,44,44,35,34,42,80,118,140,134,122,95,85,112,115,122,146,139,63,36,3,12,26,27,8,17,29,31,40,11,45,41,15,31,26,25,28,12,11,11,15,23,39,20,39,31,60,49,29,50,56,42,81,60,51,52,67,78,76,109,104,107,144,183,152,167,166,156,154,180,174,179,163,164,183,167,159,170,147,172,148,166,147,153,159,145,149,154,155,163,157,160,164,155,167,151,148,135,153,151,176,161,146,168,150,165,162,174,194,164,151,149,167,143,152,146,181,162,160,146,138,102,87,91,127,164,185,206,147,167,155,168,150,156,145,175,147,157,156,166,146,150,138,150,150,153,148,155,179,161,161,174,180,171,167,160,185,135,167,180,140,173,174,167,187,200,199,146,178,145,156,141,178,171,185,174,170,124,1,8,6,3,13,15,13,21,25,28,12,16,9,8,3,18,5,4,10,13,7,6,3,7,27,14,15,18,25,32,17,9,9,30,16,24,26,45,10,33,1,7,18,27,15,33,32,31,25,20,15,16,30,8,46,21,11,0,25,27,38,40,7,11,31,31,4,35,19,21,24,23,3,26,15,26,24,22,29,9,15,1,27,29,18,10,36,12,46,26,35,10,2,37,35,49,23,55,72,71,68,87,63,102,71,66,80,102,109,150,100,79,110,102,107,70,74,38,67,39,48,64,65,117,113,105,97,106,117,121,104,97,106,84,101,95,85,131,133,94,91,95,87,88,130,113,100,111,134,137,137,126,135,143,132,116,129,120,132,136,153,121,128,114,117,149,132,140,156,131,122,139,128,135,130,144,146,140,144,134,128,97,141,126,132,128,121,109,91,102,117,118,134,121,132,128,117,135,133,128,95,97,129,133,126,118,124,139,138,111,128,104,130,121,147,157,104,130,132,127,112,125,118,102,112,129,149,129,137,114,104,132,120,119,145,111,132,121,131,129,126,114,140,144,133,127,107,108,99,118,144,133,130,132,154,145,131,132,128,135,171,130,148,141,119,112,140,118,104,104,105,123,116,140,144,153,126,140,150,109,119,121,110,133,122,122,58,82,89,86,95,92,102,116,98,100,126,96,89,110,107,100,92,107,84,98,101,111,102,70,65,63,73,78,46,39,68,66,74,60,47,34,65,48,34,40,60,26,37,53,55,102,64,81,97,122,137,154,172,162,173,194,195,184,164,135,125,129,92,101,73,73,92,63,105,132,154,157,179,226,225,244,235,243,252,255,246,237,233,234,241,233,210,144,116,70,66,19,5,5,41,34,45,35,34,33,33,30,47,28,28,33,27,35,38,31,36,57,32,38,52,26,22,30,15,40,35,31,11,7,42,36,19,25,22,27,7,44,35,37,16,27,20,32,12,47,29,34,32,38,39,22,28,7,27,33,39,13,21,23,10,15,15,18,33,26,13,23,18,26,39,29,12,41,32,34,11,4,18,37,14,20,13,17,12,29,30,52,8,6,29,15,27,45,44,12,31,34,7,36,26,15,41,37,2,24,6,11,29,45,41,9,21,31,24,25,18,17,3,11,30,42,32,4,24,24,25,41,22,22,23,30,13,40,37,27,13,23,37,39,23,26,33,17,20,50,18,21,43,51,10,31,6,50,24,17,57,47,40,33,36,44,32,17,50,16,10,56,57,71,61,59,55,37,36,41,55,66,55,76,28,40,9,33,21,15,12,8,26,34,21,10,41,29,20,7,17,45,6,47,24,9,20,25,49,21,36,26,42,17,50,37,63,40,51,70,62,72,80,96,94,122,108,141,173,149,162,151,174,171,165,155,163,176,163,141,157,153,148,165,147,140,149,126,112,165,175,166,182,161,154,148,157,168,173,169,158,145,160,138,164,154,160,168,162,175,179,167,198,173,169,155,136,117,117,151,150,158,158,174,154,179,163,134,132,92,85,102,169,178,174,162,172,160,159,154,159,168,118,163,153,152,157,154,139,165,157,175,176,167,162,173,169,145,147,173,183,154,153,165,139,152,181,184,140,158,164,166,167,141,148,171,163,157,149,171,170,168,159,102,22,10,3,20,3,11,31,10,7,3,13,7,20,11,14,10,10,12,4,5,19,5,1,3,10,43,14,15,12,4,38,24,19,16,42,5,24,22,24,2,22,49,8,18,26,10,17,10,24,22,20,33,8,40,11,15,15,9,13,11,22,34,16,13,42,14,24,16,9,10,27,12,8,9,17,19,26,12,18,10,18,46,36,11,20,22,25,11,28,19,10,20,32,41,24,25,37,31,50,36,33,36,31,45,53,65,55,76,57,82,85,68,81,99,58,40,70,12,43,45,35,66,52,47,82,60,47,75,42,63,61,80,53,65,72,51,57,42,76,63,56,32,27,45,82,85,90,50,64,78,73,113,93,97,98,115,83,112,125,118,100,123,116,128,125,138,125,136,113,125,152,125,121,112,118,121,161,123,125,119,122,98,98,135,97,111,137,139,124,123,101,137,137,134,138,138,128,134,136,113,126,133,135,118,122,116,129,144,140,147,127,135,139,128,123,136,140,131,144,128,119,150,128,128,128,141,137,157,128,117,120,128,145,131,127,114,126,96,117,132,128,126,141,115,121,146,129,148,145,146,147,125,128,146,103,132,131,135,135,121,136,126,127,124,124,136,112,101,115,125,123,127,108,129,121,121,115,128,113,73,108,87,132,117,91,96,104,120,109,127,125,117,99,116,140,88,114,128,91,85,96,74,61,98,88,76,61,77,73,49,35,57,78,101,77,75,79,66,92,45,67,58,26,51,30,45,62,72,60,36,29,60,113,88,95,109,112,70,102,116,69,115,91,80,71,68,132,111,92,103,92,91,116,43,51,69,89,58,123,132,131,153,149,180,194,228,237,238,247,239,252,249,249,244,240,216,179,190,131,98,113,48,13,20,10,20,24,29,20,24,16,40,49,33,18,27,16,38,40,40,21,51,16,20,43,9,37,15,29,9,11,7,21,35,33,29,18,6,43,16,47,38,38,15,31,27,53,39,18,25,35,19,11,23,29,17,22,20,32,31,8,44,22,11,18,24,29,26,44,32,17,20,6,30,20,23,15,40,30,23,13,25,27,31,23,38,19,19,16,30,14,16,40,18,12,26,29,22,15,16,37,55,17,13,65,17,41,52,35,9,30,25,19,29,3,27,46,16,27,33,18,42,31,22,22,7,20,23,2,2,26,25,25,30,32,44,36,17,49,47,51,19,46,34,23,37,42,33,35,36,34,31,24,20,22,20,48,46,39,24,21,41,30,32,59,36,21,45,31,54,41,38,18,47,34,32,28,38,72,69,30,41,32,30,17,12,16,29,28,16,40,21,22,11,44,31,26,34,10,23,7,12,34,22,37,24,14,26,54,25,25,57,31,41,71,76,50,79,79,69,112,93,155,194,169,173,151,163,180,163,148,144,162,161,168,138,158,156,149,141,178,150,172,163,173,144,180,166,153,158,175,166,149,147,156,152,173,153,157,137,178,173,151,169,149,163,142,186,168,178,158,166,162,146,151,152,162,165,197,144,167,166,142,171,150,106,66,66,131,145,152,152,178,159,152,138,172,158,164,144,152,172,165,180,168,165,160,157,158,183,154,169,148,167,133,162,167,164,166,149,174,154,187,170,166,172,160,166,173,138,155,164,170,165,183,152,153,184,190,116,3,7,11,3,18,6,18,18,2,26,19,2,27,1,17,4,28,22,14,24,27,23,4,4,30,31,16,43,14,27,43,15,19,14,20,20,10,29,9,16,45,14,25,12,23,13,20,12,11,16,8,45,45,36,19,17,13,45,10,10,31,28,35,13,31,20,2,32,32,36,11,12,2,37,4,4,43,23,17,21,7,38,30,19,14,27,35,29,20,19,31,8,33,28,22,16,52,3,4,22,20,44,15,30,22,17,47,53,33,36,21,33,35,22,35,32,32,47,40,69,36,35,42,48,67,53,57,66,27,36,61,48,47,63,31,60,61,65,34,23,36,46,45,58,56,75,68,42,75,45,67,78,46,76,68,112,103,125,130,129,112,111,114,103,107,119,127,138,114,129,124,128,128,119,134,118,116,108,105,126,100,92,109,118,139,108,147,132,117,136,144,124,151,127,151,129,128,119,132,129,123,130,121,122,138,166,154,116,129,135,132,102,126,133,124,129,145,145,136,129,139,138,132,128,123,133,148,116,144,125,121,122,141,152,118,134,129,134,125,132,119,136,144,130,135,146,126,159,149,130,145,130,147,112,134,133,127,118,128,110,137,111,129,130,117,94,136,108,133,139,140,118,124,123,109,102,115,103,118,100,114,113,116,94,107,125,115,126,115,102,128,118,99,109,66,107,86,88,102,82,68,75,67,90,113,68,77,91,74,64,56,59,43,87,40,93,60,89,62,64,54,67,83,84,63,52,56,42,20,24,53,70,77,73,61,36,52,58,56,48,40,49,62,53,55,61,86,89,108,87,128,158,129,112,88,66,46,22,48,14,30,72,81,85,85,129,122,169,200,185,239,253,254,244,251,234,255,242,250,252,243,195,175,148,110,88,65,44,25,19,28,66,30,39,26,57,23,28,47,26,34,11,21,34,33,42,44,44,32,53,32,32,27,33,29,47,13,34,28,27,31,54,22,2,35,20,31,24,19,50,13,9,7,30,30,24,14,29,41,17,5,15,7,38,4,38,16,1,24,19,14,28,11,1,8,6,27,19,27,23,37,7,43,12,21,31,5,45,51,24,17,32,28,5,11,24,29,6,5,2,7,5,9,35,39,31,38,8,21,27,9,10,9,8,24,13,13,3,26,22,26,9,24,1,11,8,16,30,22,10,24,9,30,20,18,25,29,45,42,67,67,18,39,53,20,7,41,25,49,21,22,46,45,42,27,49,17,57,30,24,49,43,62,25,45,43,39,56,44,59,53,68,21,19,22,60,17,44,28,26,42,37,46,7,29,31,14,52,20,41,8,16,14,21,12,30,33,3,14,35,3,23,34,12,44,46,21,18,34,30,66,55,48,62,72,82,95,85,87,90,115,171,183,186,163,170,150,182,162,152,161,143,174,154,163,159,174,176,158,168,144,146,177,177,174,165,147,159,159,178,148,186,158,163,168,157,177,173,163,178,163,162,158,164,152,163,141,152,142,177,159,165,137,155,153,137,153,187,157,180,172,162,181,210,173,163,106,90,77,106,130,168,162,171,168,161,173,148,165,187,163,174,151,174,164,167,162,183,167,159,172,165,160,177,155,165,166,158,155,158,163,151,161,152,163,170,146,159,176,180,162,153,161,152,156,147,189,143,180,106,9,6,3,0,24,1,11,13,29,26,29,29,12,17,20,5,7,11,18,21,3,20,16,23,60,14,25,23,41,15,37,11,39,15,26,31,10,4,24,20,20,11,27,13,0,48,7,24,42,29,21,29,8,34,14,46,23,9,28,15,7,48,29,10,13,40,10,11,17,44,21,41,33,18,24,20,52,51,34,4,33,5,23,15,41,27,21,38,34,10,19,29,28,27,15,33,19,17,9,4,33,11,19,37,40,51,36,36,38,31,30,34,56,32,54,67,53,24,46,49,68,49,45,64,48,21,58,58,64,31,59,61,34,67,72,62,64,69,74,53,44,54,54,44,105,77,94,82,84,102,89,89,88,136,134,147,129,114,133,142,106,129,152,136,108,112,115,110,108,120,102,105,148,103,139,135,137,116,144,134,138,114,126,154,153,116,141,138,115,135,115,122,119,117,116,120,130,115,101,118,122,125,121,134,104,134,144,133,132,130,132,133,111,120,124,139,124,144,138,128,150,130,150,114,141,148,136,137,140,135,119,136,140,121,138,130,152,130,130,129,123,166,121,157,130,135,120,143,173,136,130,138,119,150,126,119,138,143,132,152,137,168,122,118,146,146,134,127,130,148,143,140,123,126,131,116,139,109,141,120,137,124,140,106,115,125,123,121,92,100,115,104,111,116,86,82,85,79,84,109,79,88,85,83,93,89,75,95,81,101,85,59,75,68,64,52,67,61,79,61,88,67,76,79,68,41,58,71,66,74,60,60,30,48,66,90,55,59,50,64,40,51,72,35,60,57,45,77,95,113,129,121,146,141,141,132,80,59,84,85,91,75,57,57,53,70,72,98,112,104,146,159,219,213,223,230,247,213,242,239,226,243,235,220,229,211,196,190,145,121,105,94,57,29,21,25,11,61,37,45,39,43,32,13,36,44,19,30,11,30,28,17,15,47,13,38,33,23,21,9,42,33,20,15,14,25,42,25,31,19,17,14,56,43,20,37,25,9,2,22,22,18,32,24,3,29,2,46,20,2,54,20,51,43,26,14,17,13,11,6,33,38,21,37,12,0,29,11,18,33,28,34,29,29,38,20,29,16,20,39,35,22,39,21,36,9,34,18,12,8,19,35,31,27,36,14,19,16,3,18,20,42,44,22,17,13,8,15,7,24,22,3,6,30,17,46,37,52,42,32,36,54,39,40,52,55,53,19,32,23,27,42,9,26,48,40,57,30,10,18,49,37,40,28,36,29,39,50,38,35,49,34,44,29,25,15,17,33,31,29,72,25,40,51,29,12,15,8,39,20,23,20,13,6,50,33,16,44,24,13,30,22,36,5,53,19,44,33,40,40,55,52,59,63,94,91,91,86,75,108,157,179,178,147,169,169,166,134,161,156,170,147,152,153,186,140,173,191,178,180,172,155,157,183,137,151,154,169,132,164,143,168,169,169,186,184,163,160,160,154,164,170,162,166,162,159,151,144,161,157,160,162,163,176,150,137,127,154,164,167,171,161,162,153,155,176,148,142,98,84,92,134,147,183,196,174,163,170,172,132,167,153,176,188,175,155,165,193,166,137,164,133,185,158,168,157,162,159,162,159,150,176,195,161,151,204,190,172,178,191,174,168,154,171,155,149,166,172,166,92,12,5,4,16,15,2,9,4,11,8,20,5,28,8,15,30,5,4,9,18,24,40,23,35,22,30,23,27,31,19,23,13,38,18,10,14,16,19,12,14,9,18,13,21,23,50,22,22,23,20,13,10,9,15,6,33,4,18,36,13,12,13,10,15,37,10,33,21,30,12,47,27,44,2,8,31,36,8,18,36,17,12,39,19,29,18,34,16,18,13,8,19,19,16,30,27,23,26,25,36,21,37,38,28,33,58,24,39,45,63,54,59,40,35,34,46,56,61,60,83,85,72,45,68,61,79,65,50,49,37,72,46,35,62,59,88,52,66,66,82,73,92,112,95,80,100,98,92,59,90,79,105,126,108,124,122,130,124,122,94,115,127,145,130,136,122,113,112,136,148,134,127,160,149,168,136,138,110,140,120,144,167,115,104,149,154,137,128,116,124,121,107,143,126,141,117,111,125,116,117,141,140,106,102,133,138,154,128,102,107,123,121,131,125,118,150,122,91,127,154,97,128,118,131,95,135,129,157,175,148,137,156,122,126,151,173,153,138,143,147,129,143,142,148,156,141,136,146,136,152,115,145,138,140,154,173,157,136,153,151,126,159,148,140,171,134,146,154,140,148,130,167,151,133,153,156,126,154,163,146,127,135,144,147,142,151,128,115,131,114,97,126,95,121,83,94,124,122,92,78,76,62,70,62,81,64,61,60,88,93,68,76,84,74,107,89,89,96,100,77,76,64,63,41,78,56,69,70,44,58,38,49,62,60,56,15,58,54,65,50,64,7,78,60,45,49,53,34,62,75,81,94,101,122,147,152,129,124,150,161,160,153,132,90,87,87,63,53,60,38,50,56,95,98,132,158,168,206,190,212,238,233,230,240,209,253,244,230,239,247,246,223,211,200,181,182,141,99,62,60,44,24,14,29,22,36,10,42,29,15,28,8,23,43,33,27,27,36,37,9,30,12,28,5,41,18,52,22,33,8,16,40,10,11,26,21,18,15,27,16,32,26,19,40,25,19,9,20,35,17,38,10,15,4,23,10,9,11,27,27,11,21,16,17,7,21,38,8,17,24,44,31,10,29,2,18,44,45,20,15,26,8,38,12,5,7,12,40,31,3,29,5,30,44,23,18,9,29,7,11,18,12,39,34,11,44,16,16,22,3,32,13,7,14,14,38,38,22,37,44,22,29,19,34,51,20,6,26,37,50,48,18,13,60,56,54,39,50,55,63,49,68,20,29,35,32,44,43,43,42,41,23,55,26,30,19,0,36,29,49,28,16,32,15,47,5,43,21,15,14,16,17,11,10,26,32,51,12,36,39,29,23,20,23,31,44,39,44,51,24,41,70,66,64,61,85,93,74,100,166,149,173,160,167,168,146,169,173,151,171,156,158,149,163,166,145,161,166,143,153,146,151,156,161,169,152,122,164,173,147,146,151,152,159,153,163,152,179,165,160,150,167,152,153,157,184,162,193,160,155,155,128,155,168,147,185,136,125,141,157,157,157,135,157,178,187,156,142,128,108,88,95,100,144,150,169,180,168,161,138,158,156,168,164,150,160,154,157,170,177,165,148,151,153,168,180,156,162,148,160,152,163,168,168,151,174,167,181,170,149,174,161,178,169,161,177,177,150,161,86,4,29,2,3,22,16,56,16,13,31,11,26,1,0,11,4,5,17,49,17,6,10,2,3,27,35,18,21,21,39,17,13,30,18,11,12,6,23,12,10,9,17,25,24,6,26,30,18,20,23,33,4,21,28,29,46,23,8,37,19,32,31,39,22,31,52,16,27,22,14,12,37,25,22,14,37,15,18,10,21,31,6,24,14,8,11,12,27,31,25,25,19,34,11,9,41,37,27,24,25,47,38,41,31,34,32,50,73,44,38,66,56,47,55,59,57,75,50,62,67,85,70,48,55,94,42,62,47,81,61,48,49,24,59,72,44,46,39,59,10,40,46,95,92,82,62,81,75,57,64,94,86,96,96,126,106,106,108,101,87,94,131,108,126,127,148,115,114,122,141,130,88,110,111,121,110,152,147,113,125,119,144,141,128,145,110,120,152,129,133,132,139,142,139,110,127,118,130,123,134,144,120,123,109,150,156,137,148,123,134,136,147,143,161,154,112,132,123,135,150,134,147,145,128,127,165,136,140,124,121,130,159,155,173,138,132,141,132,130,164,140,157,127,150,156,141,135,162,134,120,165,145,152,136,103,143,148,116,147,134,123,146,141,144,175,183,172,156,176,167,145,144,145,127,132,144,133,143,144,142,120,117,123,127,110,117,111,116,108,116,141,116,106,90,74,105,98,101,96,81,91,71,61,59,46,75,62,74,50,70,37,65,105,82,80,79,81,92,74,69,65,95,78,76,29,66,50,70,54,31,38,53,54,62,59,56,68,64,58,35,50,59,45,63,52,41,77,63,71,73,84,85,82,132,83,133,67,111,151,150,143,157,137,129,101,139,98,98,92,37,36,52,71,46,38,59,56,87,99,130,167,130,154,179,209,204,216,216,216,228,238,227,230,241,235,223,229,225,204,177,186,164,149,144,63,69,17,22,18,39,29,22,29,0,21,3,44,10,48,20,12,8,40,28,22,23,2,30,41,26,12,61,12,34,22,21,34,23,23,37,51,11,21,28,38,12,22,26,11,11,32,29,13,13,8,37,33,37,28,20,30,22,1,28,21,19,26,30,25,24,28,55,10,5,5,20,24,13,6,21,9,12,27,32,5,4,20,32,37,53,16,35,20,22,25,39,8,25,14,19,40,8,17,33,5,3,36,16,6,13,26,4,34,15,49,27,35,23,37,52,21,56,22,55,19,24,21,46,15,36,42,36,30,62,67,69,51,42,35,41,27,56,54,45,48,29,44,38,59,16,54,39,45,31,53,56,29,22,34,27,15,24,39,14,13,6,27,20,26,34,22,28,32,43,31,30,23,28,3,8,37,36,22,13,15,41,25,11,40,30,38,62,62,60,98,102,97,129,150,129,150,160,142,135,163,154,163,135,173,168,164,156,157,131,139,132,177,144,136,144,150,117,165,126,149,149,146,140,166,145,164,183,141,131,146,113,145,152,145,139,172,154,165,154,158,156,152,167,159,171,165,168,144,165,169,159,141,159,140,134,163,133,175,156,167,177,157,152,160,166,125,86,82,136,148,155,161,188,147,143,132,152,143,137,173,173,157,161,160,181,132,174,175,135,176,169,160,141,146,166,148,152,161,201,161,158,176,148,162,154,171,150,166,164,154,155,182,164,147,127,19,16,6,15,13,22,11,19,22,25,27,45,6,8,12,6,22,14,7,10,22,0,0,33,14,40,7,9,12,23,22,9,26,32,17,8,48,45,2,24,34,24,17,27,23,11,32,6,2,20,38,12,13,4,14,20,24,27,4,8,5,5,34,14,6,4,27,27,24,26,3,20,9,22,10,5,21,20,18,31,6,7,8,13,6,48,21,11,3,4,36,13,38,48,40,14,25,16,36,33,39,32,17,14,26,30,34,32,32,22,27,39,35,41,58,8,19,35,33,34,8,21,42,37,45,56,43,28,83,68,59,56,67,28,34,38,34,25,21,25,25,14,52,56,71,86,93,63,77,66,86,108,138,111,87,97,75,101,102,122,71,96,105,118,129,139,141,150,118,136,136,128,136,88,82,114,153,126,108,132,121,127,120,147,133,143,140,147,147,115,121,107,131,136,133,144,152,145,172,166,135,137,140,169,178,139,136,153,168,162,171,155,156,157,142,149,142,151,166,138,141,123,139,95,116,126,120,118,123,145,123,121,102,89,110,140,137,121,131,134,120,121,122,98,121,132,123,147,123,156,106,116,106,109,126,103,114,108,95,95,114,117,91,113,139,139,136,119,130,147,123,124,139,114,140,131,134,105,124,104,90,116,115,112,103,122,114,113,110,118,122,78,100,73,71,120,118,105,83,75,78,79,34,60,63,104,85,53,89,63,94,101,95,80,71,30,60,84,55,82,93,62,63,71,52,75,45,64,58,69,57,59,51,33,62,64,69,69,82,76,54,59,74,77,105,59,78,79,80,74,78,55,110,87,90,96,70,82,79,82,67,94,86,101,125,150,151,121,126,120,134,108,84,77,57,28,55,27,38,28,28,43,52,78,95,108,140,153,166,161,179,191,212,198,170,200,213,182,214,203,218,241,240,244,213,174,172,141,111,106,94,83,110,41,3,18,39,22,19,10,35,12,19,24,11,14,26,22,38,49,50,36,22,38,10,30,23,39,23,24,46,12,43,30,26,31,14,1,25,17,31,40,40,29,28,30,14,29,31,8,18,18,27,23,15,7,10,18,31,1,48,32,49,24,36,23,32,14,26,8,20,4,50,41,31,18,20,26,19,14,9,24,29,7,22,46,32,42,37,14,20,9,21,20,9,42,25,12,8,25,31,21,10,40,27,43,34,20,35,48,23,24,28,25,45,32,31,20,47,29,55,32,33,78,60,37,39,49,17,28,51,24,18,74,46,62,37,58,57,51,45,49,40,29,38,43,34,46,28,22,32,33,46,36,13,31,13,24,17,9,39,55,5,18,25,5,10,17,3,29,35,32,34,7,47,39,40,25,26,41,43,25,69,83,91,104,158,186,160,142,152,165,171,150,167,155,144,137,109,157,160,166,156,176,154,169,159,187,177,159,172,139,150,147,161,163,151,155,161,166,158,134,150,150,172,143,151,152,147,174,142,154,130,151,157,179,139,154,156,143,163,149,168,168,139,163,159,172,162,142,161,147,152,163,170,160,186,146,189,166,168,97,103,77,122,124,187,153,177,153,145,175,158,141,172,145,167,163,126,161,149,176,164,152,172,141,152,132,154,167,161,170,133,149,162,140,157,168,160,151,175,160,130,152,166,156,165,182,157,103,23,13,28,8,16,11,0,5,22,20,34,26,11,14,7,22,2,7,17,37,31,22,0,6,34,7,15,34,27,39,20,2,11,28,21,12,7,21,31,33,21,19,18,11,28,37,31,6,16,30,29,17,14,25,13,37,12,26,24,1,6,24,4,20,28,14,36,39,10,10,25,23,3,0,44,23,3,31,45,22,18,24,16,27,28,10,13,0,33,23,22,19,2,19,39,26,31,35,33,32,39,7,15,39,16,22,25,48,31,9,16,33,37,46,6,26,16,32,27,33,39,37,33,50,32,43,51,84,71,61,45,51,59,37,67,58,56,41,52,56,37,51,69,50,67,53,96,87,78,111,106,77,81,67,55,61,88,121,119,102,107,112,116,119,127,87,112,98,146,154,167,117,150,141,92,142,126,118,105,114,116,124,78,93,129,121,124,127,112,141,146,133,120,121,147,103,127,129,130,127,129,130,111,95,104,105,113,129,102,103,106,99,130,100,111,139,139,147,123,113,136,121,132,125,121,135,136,114,128,138,106,66,89,53,63,134,138,121,156,146,108,109,96,123,124,116,140,121,126,130,135,109,115,143,115,125,103,82,90,90,101,105,106,128,111,72,56,89,95,69,96,105,100,95,126,128,122,118,107,106,91,119,118,122,127,109,109,96,99,105,105,101,104,114,99,131,123,81,99,78,115,80,101,70,87,102,75,117,112,105,101,124,95,58,72,34,64,69,88,80,90,64,69,59,101,54,79,63,54,50,36,49,39,28,30,53,59,56,38,62,53,67,53,53,68,57,60,46,51,55,49,63,50,64,43,48,42,47,52,51,33,22,57,87,115,142,126,84,112,147,108,129,115,90,126,115,109,89,41,86,52,59,62,71,30,75,58,57,72,85,78,110,124,123,129,134,166,180,189,192,190,212,214,201,230,216,213,198,195,205,232,193,151,116,116,109,103,47,34,19,52,15,29,32,10,39,27,10,26,5,31,33,32,16,50,2,26,41,11,32,18,21,21,16,15,15,22,23,17,16,14,37,10,11,33,25,17,10,19,12,19,36,5,18,15,21,31,44,30,7,26,25,19,13,24,22,19,15,15,6,26,17,50,5,23,7,18,25,41,16,29,15,36,10,27,8,33,38,18,18,35,34,37,14,2,55,27,50,7,9,16,12,28,22,25,8,31,23,20,12,36,28,55,20,48,22,38,38,35,31,35,26,30,47,51,53,52,32,22,14,22,42,35,13,39,43,36,23,36,33,41,11,60,22,55,32,37,40,39,53,48,20,9,30,51,27,14,28,46,24,39,31,33,6,7,14,12,38,17,22,24,22,20,46,22,26,50,26,15,45,65,27,76,103,108,154,170,167,210,157,178,172,171,179,181,163,161,170,172,176,144,165,165,143,173,150,163,160,179,161,147,154,167,182,159,142,143,149,146,140,151,146,152,143,187,171,144,165,190,171,162,191,162,166,161,167,170,146,148,149,164,171,157,139,151,129,161,178,172,140,160,155,153,154,141,180,173,172,155,188,170,137,147,86,62,81,129,164,154,199,185,147,154,158,180,147,169,164,153,143,147,144,145,125,160,153,162,149,165,169,154,146,156,172,170,191,173,151,162,164,173,145,158,154,154,149,136,160,186,110,28,0,12,7,7,38,42,19,11,16,49,14,9,4,0,0,10,13,19,12,45,11,24,1,29,23,2,14,28,13,4,11,14,36,4,22,16,13,14,22,5,12,22,37,12,6,13,20,6,1,19,17,21,19,35,20,4,33,21,3,25,33,48,30,41,19,16,10,12,14,7,19,8,16,40,24,26,16,19,9,1,5,11,19,21,12,29,24,3,29,19,16,28,20,10,29,45,29,39,18,40,42,14,17,7,18,31,36,15,27,40,39,30,30,22,9,43,43,33,30,41,32,26,43,45,32,37,71,66,105,88,42,71,42,38,53,43,69,56,19,41,17,29,43,50,32,29,36,57,46,55,48,44,56,75,89,115,122,168,109,143,120,133,128,117,137,129,112,142,124,133,146,162,151,145,137,135,133,121,133,146,110,105,120,117,131,134,166,128,146,151,127,120,116,93,102,108,133,134,107,111,113,92,92,86,105,95,66,73,91,101,57,74,94,113,119,112,128,139,113,121,120,105,119,145,149,144,114,163,139,100,98,121,127,157,156,146,150,154,102,142,119,148,98,108,128,99,117,117,123,142,153,157,143,149,158,139,142,133,137,119,119,142,124,132,134,125,110,99,116,132,127,102,150,153,162,135,109,117,138,99,139,131,146,120,110,94,106,121,108,79,110,105,129,131,106,82,77,74,72,100,118,95,95,112,96,113,105,138,146,107,72,75,55,86,59,60,50,73,89,80,75,86,73,62,77,62,36,70,42,49,52,41,82,47,40,56,37,85,65,55,65,54,64,39,49,54,64,57,67,50,58,60,40,73,40,63,41,39,39,26,22,48,65,58,61,80,64,54,110,104,71,112,126,110,114,105,138,96,95,87,131,94,71,60,71,53,47,59,25,27,34,24,83,41,76,101,91,104,121,146,132,124,161,169,169,184,177,205,216,218,207,169,201,185,189,174,158,188,149,112,100,120,96,72,44,49,21,40,30,16,42,1,40,5,15,31,15,17,19,19,18,22,17,27,15,15,12,17,28,31,25,5,24,11,1,47,12,7,1,20,37,27,29,43,35,35,27,8,34,33,38,6,17,25,16,21,28,39,36,15,7,9,34,9,11,33,15,15,43,31,19,32,32,18,37,22,21,23,26,43,30,8,26,51,11,23,34,67,16,10,13,20,2,46,36,4,29,30,44,6,10,37,31,36,56,10,24,35,23,36,19,44,59,51,58,32,40,19,36,17,45,26,34,57,54,36,50,64,35,49,21,29,28,47,33,43,45,26,18,11,9,30,15,25,26,2,20,8,18,35,24,18,33,14,8,30,27,32,24,14,19,17,29,28,16,17,47,35,43,52,36,59,67,128,152,163,156,178,193,143,165,157,157,152,155,155,160,129,177,129,154,148,136,160,148,141,145,160,147,179,166,159,168,167,167,167,145,154,165,175,163,156,176,162,175,181,158,167,176,148,153,129,150,145,132,167,172,157,166,150,138,145,175,172,168,171,127,136,161,163,131,163,150,148,182,181,163,179,176,185,155,162,127,97,77,111,126,144,175,160,174,172,147,181,154,170,169,160,165,152,139,168,155,149,159,165,166,157,159,179,161,158,133,158,134,170,152,157,192,151,170,170,145,159,129,175,177,156,112,0,0,31,15,15,0,22,9,2,8,3,12,26,1,20,9,25,12,30,21,18,1,35,26,10,9,15,27,12,14,9,29,12,46,22,26,20,12,21,10,4,7,38,0,40,7,8,24,13,21,38,27,15,31,14,23,29,14,0,22,35,12,22,6,15,35,17,16,33,4,37,27,8,13,14,21,17,29,5,8,14,26,16,16,32,21,10,20,6,12,33,17,53,17,26,18,26,31,30,18,26,21,41,32,38,39,13,22,12,17,27,19,22,28,19,22,33,19,35,35,28,33,39,49,29,41,30,38,13,55,49,61,56,72,58,71,37,33,66,27,33,21,38,43,55,39,52,58,26,72,55,39,42,71,76,78,124,144,111,124,130,137,158,152,133,144,126,115,108,117,124,149,128,148,154,169,157,157,173,165,130,133,113,153,114,125,152,135,140,135,131,117,120,96,108,112,129,102,123,123,120,100,119,145,135,146,122,108,118,100,114,110,114,128,118,128,103,102,95,123,104,133,136,108,151,134,128,144,155,101,155,154,138,182,171,135,148,139,133,120,118,129,146,111,118,136,151,120,124,117,116,142,149,174,172,124,152,144,120,142,135,128,151,149,147,135,162,161,142,142,123,157,169,118,137,112,131,132,129,136,119,145,142,111,109,123,107,133,101,86,97,116,111,91,101,80,66,94,81,121,74,91,108,109,98,105,88,99,93,94,100,68,73,87,70,76,46,64,33,43,50,52,44,70,71,60,67,51,59,57,65,46,62,45,66,67,56,68,65,59,49,98,97,68,75,18,45,81,59,55,37,71,68,31,31,21,27,58,47,42,55,71,61,67,56,45,52,52,75,73,78,76,81,103,56,72,102,100,102,100,117,126,113,103,107,91,102,80,122,87,78,47,65,54,60,47,47,51,72,36,72,61,37,70,131,126,112,161,152,153,210,189,172,181,172,153,187,205,191,175,176,163,192,172,134,170,162,179,159,78,80,89,113,52,47,29,44,51,27,35,29,13,37,30,10,53,25,43,36,36,24,4,19,15,22,11,36,27,25,49,16,15,21,14,26,47,17,31,25,28,11,17,14,29,14,0,26,15,8,5,12,27,4,41,25,46,12,6,39,7,3,22,10,3,33,22,26,23,13,17,9,10,29,33,38,23,36,40,4,16,42,13,9,21,30,36,29,40,24,21,26,18,17,39,28,52,40,23,37,37,28,35,62,29,66,44,55,60,22,44,36,40,57,49,62,51,56,44,55,39,30,33,38,43,48,37,31,51,36,45,25,42,21,1,12,23,16,14,24,11,11,44,14,11,17,36,14,5,33,17,10,34,27,24,39,30,50,20,22,50,33,39,49,99,83,109,125,170,191,169,155,160,157,164,156,169,168,133,145,148,152,140,151,151,145,167,154,144,149,135,168,177,161,172,180,184,178,190,149,148,194,170,159,165,193,177,151,171,135,165,164,163,158,155,148,193,153,182,149,159,165,180,168,150,156,158,152,156,145,160,168,162,151,172,156,161,166,136,158,147,157,178,148,136,143,116,102,118,128,172,167,173,161,164,148,160,174,173,143,152,133,133,156,163,153,142,154,136,157,146,142,128,158,136,150,174,155,167,185,158,155,140,156,186,145,176,179,147,148,107,18,1,3,5,22,25,27,8,3,27,17,14,8,12,11,7,4,2,6,10,9,2,2,11,9,24,29,5,13,41,14,30,14,3,22,9,12,0,8,17,37,11,27,2,23,9,22,20,7,0,17,25,8,43,19,27,12,33,7,14,1,0,14,3,6,33,26,19,32,3,14,11,10,24,18,9,44,32,18,22,18,20,15,11,21,11,19,33,18,9,27,17,10,43,14,13,35,48,32,35,43,15,68,15,29,28,28,21,12,24,27,19,33,35,18,20,26,24,27,38,32,9,22,25,54,37,30,61,54,50,36,70,58,46,65,44,71,49,62,45,35,55,62,58,44,51,43,69,60,61,48,49,30,41,102,93,121,120,110,94,98,68,122,111,142,112,125,118,118,121,112,99,124,119,129,122,110,146,117,138,136,150,118,160,132,130,134,147,127,146,127,132,117,147,115,144,110,101,110,134,110,136,120,159,128,152,147,152,162,164,168,185,178,170,146,101,113,139,97,121,121,125,135,135,133,139,145,128,156,113,142,128,156,164,122,134,130,136,133,126,128,139,137,136,165,132,128,140,141,104,153,145,125,142,151,159,177,132,124,132,116,138,119,153,143,164,137,135,153,145,178,154,129,150,127,118,139,106,140,147,120,158,88,143,109,131,109,114,126,142,131,113,84,101,111,98,117,100,119,105,83,96,108,131,89,96,109,114,82,78,83,98,87,79,69,92,78,73,56,53,56,62,64,62,98,69,38,72,39,84,42,61,44,66,68,49,54,63,57,65,68,79,64,51,49,64,38,57,70,51,41,30,62,50,50,51,38,58,68,49,47,46,28,43,46,53,62,74,88,75,75,63,55,74,71,73,92,103,83,106,93,116,94,73,104,116,121,116,122,122,141,135,115,111,93,100,87,76,57,58,38,24,40,20,35,39,37,36,71,79,66,111,148,138,166,165,139,165,152,162,185,205,183,180,181,205,238,214,185,175,188,155,146,155,170,183,155,127,121,95,63,78,31,47,55,68,69,21,46,38,36,38,17,12,30,24,19,32,26,15,23,34,5,3,12,20,16,13,19,8,21,11,35,25,31,15,16,33,10,25,20,38,16,10,18,31,23,53,38,42,35,11,11,14,2,20,23,18,20,8,43,11,46,15,31,35,27,21,20,8,32,28,18,44,18,29,36,16,5,2,8,25,19,34,27,38,28,34,28,32,28,28,34,57,45,45,49,43,63,63,65,58,45,54,44,52,37,58,42,44,32,48,28,18,21,17,18,58,49,34,9,29,31,3,13,7,22,38,29,28,4,29,19,11,26,12,10,11,26,12,31,24,4,19,26,24,28,67,39,61,59,53,60,75,52,92,117,136,158,167,169,161,182,170,192,175,167,140,139,149,177,167,142,175,124,178,167,171,156,168,170,175,152,173,133,169,166,159,176,172,178,128,159,150,167,133,166,166,159,175,171,151,172,161,160,162,178,174,142,146,153,156,180,143,150,170,162,155,177,192,134,156,157,154,167,146,151,147,157,145,148,144,165,142,151,116,133,104,92,111,136,159,166,151,145,167,149,170,146,150,146,164,144,150,161,151,167,168,157,147,162,146,146,176,171,148,174,151,147,173,147,167,154,172,163,177,185,165,187,135,0,4,2,9,20,11,29,18,13,25,26,5,23,19,6,1,1,4,27,30,16,16,19,29,7,18,7,16,6,6,17,13,21,24,26,20,18,7,28,18,13,26,15,12,12,10,3,17,23,13,14,35,15,41,14,18,19,24,53,25,12,9,35,18,3,2,25,25,18,3,23,10,17,20,15,8,27,44,17,4,32,40,2,7,7,37,11,13,19,16,53,32,33,29,8,22,31,36,48,20,31,31,27,29,11,22,44,35,27,13,41,33,54,37,44,13,26,36,23,41,28,30,11,62,50,60,28,30,38,60,50,33,24,44,73,48,67,86,76,82,93,65,38,74,83,56,59,73,80,69,66,47,64,102,99,127,129,113,123,90,109,82,90,93,99,119,117,100,100,127,114,85,113,76,85,94,111,87,124,127,128,120,137,144,119,139,102,145,137,126,119,137,150,147,147,141,141,129,126,111,165,143,115,116,129,147,116,138,156,150,142,156,130,140,131,118,139,149,149,163,144,161,173,169,144,154,143,135,135,148,145,131,134,157,127,125,146,140,141,140,128,123,145,155,162,105,136,110,109,122,143,127,101,140,95,124,147,135,132,133,127,144,159,142,127,138,128,140,132,126,136,139,147,122,101,120,131,107,120,116,141,140,123,134,141,132,126,128,110,127,93,122,106,98,86,80,108,119,99,118,109,124,117,136,95,115,72,96,73,85,87,70,77,97,96,45,64,48,45,51,54,84,39,62,63,43,68,43,55,72,71,59,68,52,55,61,61,58,44,54,74,44,40,41,56,54,50,41,60,54,57,47,47,62,43,82,49,61,84,59,73,43,50,69,45,37,55,54,62,67,76,79,105,93,77,113,121,108,113,90,86,62,82,71,73,113,88,93,125,122,112,147,92,108,115,130,98,92,106,118,75,69,60,52,42,44,58,42,24,30,53,25,31,46,86,92,84,80,107,113,112,149,121,145,167,177,153,208,186,189,178,175,162,210,198,202,173,156,127,140,177,151,141,129,168,167,108,54,85,115,104,69,49,79,27,35,26,41,10,35,12,18,35,5,35,16,5,14,15,10,11,28,22,12,20,11,25,8,17,8,6,8,12,11,29,22,19,27,10,20,28,22,34,15,45,31,4,18,11,39,25,17,22,31,10,19,25,25,1,23,25,35,50,22,21,41,12,16,31,28,36,6,63,15,21,40,27,48,29,7,26,20,23,66,30,25,57,46,49,55,77,60,76,40,50,31,46,51,30,20,33,33,14,35,33,48,32,33,19,31,13,42,19,29,17,7,17,27,16,7,27,25,12,37,23,28,38,13,22,13,2,31,27,25,33,25,37,34,41,65,68,71,79,80,84,95,90,129,125,146,132,148,170,149,145,168,161,154,158,172,177,169,152,155,145,151,157,162,139,136,136,145,145,155,121,151,171,150,164,161,139,159,146,163,150,168,145,169,150,172,149,166,182,134,147,157,146,164,166,142,156,143,141,175,166,155,182,157,171,158,144,144,162,158,184,154,154,160,161,166,184,171,143,172,146,155,158,127,90,97,98,140,177,174,178,169,156,181,145,181,168,150,158,167,150,137,147,158,164,174,155,141,142,177,163,156,177,163,135,152,156,141,157,171,176,173,140,154,163,104,21,15,11,6,35,21,12,45,11,29,26,28,45,19,20,7,12,20,28,5,17,14,23,15,6,30,49,13,14,16,23,34,23,12,6,16,6,16,11,25,7,36,36,14,5,54,35,20,18,26,30,16,25,9,45,11,32,32,21,20,24,10,31,12,27,24,20,4,16,16,27,15,39,18,18,8,24,33,3,8,27,37,48,32,20,22,35,9,19,8,23,20,23,28,40,58,49,46,20,20,37,14,23,42,38,39,23,58,39,43,41,52,54,53,47,35,65,51,61,30,75,58,63,33,52,74,49,42,64,72,60,89,49,91,50,70,83,87,82,81,81,59,55,73,91,113,99,116,110,123,110,92,109,140,138,145,156,133,141,114,138,110,116,98,111,124,118,134,114,86,156,101,93,106,114,144,125,128,113,142,100,94,89,110,111,132,126,124,110,127,135,96,137,112,119,106,103,118,111,135,143,120,128,119,100,117,118,118,111,117,117,87,110,110,70,101,159,120,140,157,111,130,137,151,140,132,142,137,108,146,95,126,128,121,133,151,156,131,123,141,144,149,124,114,123,130,109,116,134,130,113,145,129,114,110,124,120,144,108,133,113,129,107,149,141,138,123,131,109,127,137,114,127,100,125,145,117,107,113,110,106,112,126,142,133,124,86,114,79,92,124,144,144,88,119,91,105,110,92,73,107,139,128,116,75,102,68,85,95,97,95,68,57,90,63,74,71,32,74,63,77,73,69,95,65,46,79,60,53,56,65,52,75,82,59,85,85,57,50,58,90,53,47,69,39,62,59,66,68,54,60,43,38,45,54,44,66,58,46,51,67,60,36,43,48,47,42,42,54,64,56,44,64,59,49,47,32,31,36,51,62,41,20,62,67,68,54,72,53,58,90,79,79,66,95,99,85,98,85,110,109,121,104,83,54,58,87,61,48,43,42,16,36,52,48,35,25,55,42,16,37,30,57,78,91,121,105,117,135,130,112,136,107,155,136,144,103,130,166,157,142,165,147,140,132,125,110,142,123,155,154,138,157,128,121,100,95,110,104,74,74,65,42,48,55,30,40,24,31,36,18,31,33,34,10,5,22,21,11,27,34,4,30,29,10,30,20,27,4,10,5,15,36,37,30,42,10,20,32,20,14,19,12,26,46,23,36,22,31,26,33,9,14,68,46,30,10,2,34,25,11,27,6,31,37,13,33,44,31,56,23,47,53,34,66,38,60,65,13,37,72,75,70,46,44,41,61,48,49,45,40,53,25,40,36,31,30,27,36,6,16,11,12,14,10,25,4,7,39,29,27,21,29,5,30,40,15,13,19,40,2,40,25,42,35,51,47,42,45,70,82,80,76,89,111,79,74,111,104,118,145,141,152,136,160,124,156,162,184,147,155,146,158,161,149,164,153,169,158,150,145,157,174,164,121,173,134,150,120,149,148,151,164,138,146,143,171,177,163,133,151,154,154,158,164,178,131,170,157,162,178,155,165,186,174,155,177,161,158,143,141,142,166,172,158,163,177,181,171,170,152,180,173,157,187,160,149,149,148,93,117,98,130,152,175,172,166,169,156,162,168,145,162,157,143,144,168,137,169,165,126,131,164,161,160,137,169,139,177,154,149,148,157,163,175,149,173,166,160,110,10,8,23,15,30,10,0,28,34,17,16,14,13,4,34,6,19,15,11,10,9,7,13,24,4,32,10,25,19,22,30,19,38,15,28,17,34,19,27,33,20,15,56,28,23,17,10,37,20,20,35,12,22,20,40,24,41,8,30,0,2,9,37,4,39,11,11,23,5,19,20,14,11,25,13,22,55,43,37,11,20,26,30,19,37,33,33,27,18,23,11,15,47,42,47,46,23,39,19,58,37,43,70,49,56,36,57,80,48,68,52,62,46,50,53,63,83,58,40,67,49,25,57,56,31,52,30,29,32,46,60,49,60,62,38,48,67,33,48,63,68,52,29,57,62,49,43,74,81,115,111,142,85,121,100,88,80,118,121,120,129,122,110,115,122,97,115,122,119,147,109,122,114,115,127,120,108,114,115,116,128,113,95,106,116,110,118,98,80,96,118,106,116,120,107,86,106,110,127,123,137,144,121,99,113,103,104,130,85,109,106,123,100,121,97,119,126,105,118,95,81,101,119,120,91,116,109,121,138,129,107,106,120,130,148,132,131,110,148,126,118,117,114,123,141,141,155,112,115,144,128,164,155,136,108,147,140,129,136,119,130,106,143,137,123,127,136,130,158,130,138,121,106,125,101,109,114,101,109,104,112,133,104,135,107,109,124,100,116,105,116,106,103,87,123,107,100,89,92,77,107,90,106,134,75,98,86,77,103,90,90,84,68,65,55,69,90,101,64,58,65,71,67,58,46,50,58,46,49,48,35,46,71,41,61,31,79,44,47,94,71,28,48,28,51,56,56,49,47,57,60,42,52,55,47,66,44,66,67,59,26,58,54,48,43,60,45,66,61,62,39,52,57,45,56,45,67,34,38,59,57,43,27,33,47,50,45,56,47,72,41,64,56,72,56,71,64,76,93,100,93,77,72,86,86,77,113,71,89,111,76,74,68,61,48,51,48,59,17,33,27,44,66,36,33,33,40,63,29,13,50,69,71,89,71,67,86,117,123,89,100,139,139,87,109,100,149,130,128,149,160,120,144,139,136,119,151,128,142,114,143,144,143,137,136,145,130,63,83,72,59,79,68,37,54,25,44,62,31,55,32,38,43,51,51,29,42,20,40,22,13,3,27,24,3,0,13,31,16,40,17,12,31,18,21,9,2,30,9,12,7,33,34,30,7,20,12,5,20,28,38,35,34,52,13,31,41,50,57,34,29,37,32,57,56,33,36,57,39,45,58,44,75,70,54,42,22,71,68,61,42,34,45,24,36,22,57,28,18,44,45,36,18,5,5,51,15,17,13,22,13,19,21,41,37,25,35,20,3,27,23,30,60,19,56,61,62,88,45,35,86,79,81,89,93,72,70,89,112,91,132,122,140,90,139,138,138,153,161,142,163,159,138,149,156,160,149,159,156,154,148,142,179,172,170,164,168,158,157,147,164,166,178,176,147,112,176,124,140,152,157,163,174,146,156,159,151,140,160,166,164,150,155,119,132,152,150,155,139,141,150,165,146,171,161,169,181,174,151,160,160,143,156,169,181,160,132,163,159,133,129,96,96,126,153,171,193,172,170,160,166,167,175,174,150,133,160,154,143,158,149,146,153,139,171,161,164,158,159,158,172,168,153,146,178,144,138,147,154,108,0,17,26,2,11,6,9,4,8,11,27,14,11,8,19,10,19,10,14,11,20,8,15,21,26,32,27,42,23,6,15,1,15,9,25,6,27,8,14,3,10,13,25,32,21,22,15,12,24,12,40,18,37,12,13,18,13,1,7,22,26,9,29,22,39,27,36,20,20,24,40,20,27,36,3,6,13,12,13,38,20,8,12,36,8,25,11,10,9,33,27,21,32,38,18,33,48,49,14,20,58,64,55,71,56,31,42,27,44,31,20,25,57,54,35,38,52,38,44,35,34,33,20,16,35,21,45,27,31,48,39,36,32,23,42,61,41,39,50,43,30,32,50,45,33,11,32,56,35,65,85,96,95,90,78,73,87,93,102,125,115,114,102,81,79,75,97,103,109,147,142,96,127,115,137,104,92,119,121,106,124,109,118,120,128,122,115,127,129,125,120,140,126,128,139,134,122,144,134,116,104,136,134,130,101,122,101,120,136,132,128,105,127,138,126,102,135,123,132,114,118,103,104,123,108,109,105,122,112,131,110,130,128,161,104,156,123,116,121,114,125,112,96,148,130,144,113,105,133,143,165,131,117,140,140,164,140,155,128,119,128,117,147,115,123,134,100,127,127,121,121,99,127,155,101,158,124,118,110,114,70,118,75,85,122,112,108,121,133,84,117,105,74,93,104,100,94,110,112,90,79,78,105,107,118,126,108,85,105,125,99,72,67,60,39,103,75,58,54,58,75,61,72,50,76,76,33,78,30,36,51,76,42,45,71,54,39,62,52,61,47,30,43,43,58,62,67,75,45,52,53,71,30,62,35,55,34,75,28,55,35,37,75,37,40,35,63,61,58,48,73,52,57,41,47,74,66,56,67,70,67,72,28,42,54,45,77,61,74,63,55,66,49,62,53,42,53,85,34,59,85,52,68,94,69,53,86,85,86,66,29,60,73,89,128,139,134,84,88,88,72,90,39,51,55,53,33,29,30,18,63,46,28,46,56,64,54,51,64,54,33,66,84,69,90,75,127,90,105,124,89,102,134,120,144,141,131,137,123,138,129,125,108,129,112,128,124,157,135,117,141,140,130,125,129,114,118,91,105,118,91,98,75,95,73,51,60,69,40,37,39,65,42,36,35,14,31,22,48,33,17,21,25,21,22,18,22,43,9,3,18,3,31,5,44,58,28,10,19,22,28,32,31,17,28,39,32,32,38,48,50,43,51,54,35,44,43,50,33,16,47,36,20,65,61,41,32,54,54,48,62,10,30,31,34,23,58,18,23,19,21,22,16,0,10,25,31,45,1,28,26,24,27,34,40,44,15,55,48,58,50,44,33,75,61,42,57,83,76,72,57,47,53,76,56,49,85,60,81,78,86,108,132,149,169,155,154,160,140,138,177,147,147,158,184,127,94,121,160,153,150,169,151,174,158,152,161,180,177,159,170,147,165,118,131,143,144,156,155,140,141,141,151,141,143,140,159,162,151,157,167,160,148,156,171,174,153,155,142,130,149,140,141,139,162,133,143,171,147,149,172,152,155,188,171,154,146,158,175,149,158,120,117,96,111,149,145,185,183,171,170,154,129,156,149,161,154,179,171,139,164,193,156,159,137,184,176,156,160,168,169,169,164,144,172,161,169,163,177,99,24,0,14,3,20,24,40,20,35,11,32,14,23,7,14,4,16,19,5,14,16,12,19,19,22,6,31,6,12,38,16,16,8,28,7,35,42,37,28,23,51,30,18,34,22,15,50,19,20,26,0,6,28,22,24,3,15,32,1,19,13,19,19,9,15,25,25,2,8,24,20,5,7,43,18,7,24,12,12,20,35,30,9,12,30,31,6,8,18,28,9,48,24,31,46,37,63,46,40,61,65,63,66,58,65,55,55,21,20,27,51,46,40,46,35,44,32,22,37,34,45,19,27,25,41,27,59,68,33,39,14,40,48,64,37,53,36,29,78,54,22,33,53,24,44,31,43,46,60,25,78,60,91,111,92,95,92,106,100,128,95,94,102,95,116,120,107,144,117,109,122,118,107,144,142,132,121,124,127,131,122,121,124,120,119,102,124,122,127,127,130,136,137,118,118,137,117,112,126,107,111,112,103,100,114,94,86,127,121,128,108,118,139,138,115,105,117,102,124,119,134,145,142,113,133,131,114,148,115,134,130,123,126,140,129,123,147,151,136,124,150,119,123,134,129,125,135,119,121,117,143,160,155,172,177,160,144,150,130,161,147,116,127,134,141,106,94,113,120,142,130,137,113,126,147,133,104,131,134,139,124,123,150,96,111,120,111,93,88,107,73,112,94,93,117,97,83,71,73,73,102,79,87,106,110,86,107,106,104,104,91,108,78,72,85,55,49,62,77,74,60,59,51,73,75,95,48,56,34,59,47,45,70,48,79,53,72,71,37,55,81,42,51,55,26,58,54,67,66,58,76,65,63,38,72,48,50,29,55,65,80,53,76,29,37,48,57,51,58,47,62,74,44,33,66,92,57,57,61,67,79,68,69,50,50,50,62,71,64,50,59,63,68,40,62,62,44,59,68,76,61,38,52,39,44,27,51,45,45,60,25,55,62,103,106,86,94,115,95,135,82,98,97,72,72,82,73,53,38,66,56,23,45,49,57,50,19,42,25,49,28,35,57,31,67,58,68,66,50,50,54,40,99,89,101,100,63,90,101,97,127,86,77,122,109,110,124,123,112,122,132,108,144,112,144,96,159,169,150,153,95,103,146,86,91,132,97,161,110,157,112,125,151,141,100,88,112,91,79,83,45,66,58,55,45,50,41,36,60,21,9,39,23,21,24,24,11,6,31,50,22,24,5,42,12,51,32,10,34,43,25,39,50,42,42,51,51,71,18,69,80,52,50,50,62,61,85,68,58,64,32,25,29,18,42,8,26,14,23,19,46,9,15,33,26,12,39,48,23,45,44,30,28,28,34,41,38,48,69,62,61,25,47,53,73,52,61,66,56,63,59,78,82,86,77,75,61,78,53,82,122,97,116,154,191,197,167,127,149,121,113,143,189,144,156,146,146,155,147,157,154,158,156,143,132,153,159,157,137,151,122,152,146,136,135,156,150,167,169,169,151,174,151,142,150,172,147,139,172,158,159,140,124,143,149,144,148,154,157,142,168,198,176,155,155,140,149,166,147,156,149,146,152,117,172,150,163,172,154,152,159,141,107,101,95,91,108,157,171,170,149,133,130,147,135,171,160,123,158,155,153,173,180,173,168,172,147,179,145,131,171,164,162,149,156,169,168,140,167,122,6,3,11,3,30,20,32,9,3,13,14,12,14,0,5,12,5,9,4,0,23,3,9,27,15,26,24,31,19,31,1,27,17,5,33,36,7,40,26,11,0,28,6,20,38,9,23,28,15,15,14,25,5,21,27,29,24,14,47,22,33,7,24,12,29,21,26,14,5,5,6,16,23,26,15,2,8,40,19,15,25,25,27,45,1,35,13,18,35,6,34,10,35,24,16,49,26,55,33,37,41,17,41,43,42,29,28,48,18,44,51,44,14,16,26,39,21,29,44,28,9,48,35,5,34,31,26,24,29,23,8,39,15,29,43,17,30,43,17,31,39,22,40,24,1,29,53,31,18,42,65,56,72,83,88,94,116,120,122,124,102,133,89,114,127,113,123,87,95,94,121,102,121,110,82,108,91,131,102,98,129,104,119,107,120,120,115,105,135,92,110,121,119,107,116,119,112,113,123,97,133,114,129,133,127,108,114,97,105,96,106,121,92,101,115,107,93,99,121,114,108,123,107,122,114,118,138,126,124,119,117,122,101,73,87,121,108,98,112,113,99,152,144,142,131,130,109,146,118,102,94,132,143,152,148,125,108,141,127,137,118,98,110,121,135,112,143,163,137,160,136,124,137,94,134,116,120,129,136,150,123,136,132,126,121,92,122,102,99,87,94,90,95,111,98,109,80,87,68,52,90,84,75,71,64,103,109,102,87,96,83,96,103,104,97,59,87,112,63,96,72,72,29,44,62,89,72,26,60,73,68,72,82,48,64,66,58,39,81,45,48,54,58,50,76,68,61,58,31,47,60,53,57,55,41,31,49,69,55,45,56,47,42,32,57,75,51,76,42,43,12,56,44,53,46,32,64,56,55,83,33,56,64,59,56,39,37,33,52,73,34,60,55,71,48,47,48,29,61,46,43,51,62,50,40,57,17,40,56,57,57,61,66,67,51,45,72,43,35,85,68,84,64,65,79,86,94,95,94,75,86,65,89,93,87,71,77,73,82,54,43,45,46,27,44,36,34,50,21,47,40,33,37,32,44,32,46,50,50,47,68,64,61,71,58,59,80,64,91,97,109,109,116,90,99,125,130,139,117,121,145,123,109,102,139,123,105,135,146,155,125,122,153,145,146,144,144,132,167,152,109,154,135,124,112,104,123,48,93,99,73,83,44,37,66,10,15,32,30,53,54,46,24,7,34,11,4,24,22,51,28,1,24,38,64,44,51,41,22,37,57,81,37,47,45,41,76,55,47,35,11,44,33,9,32,31,23,41,27,30,38,50,55,25,22,36,54,67,42,68,62,65,73,66,44,59,51,51,48,31,63,70,42,44,55,20,46,50,69,61,70,75,104,67,78,80,74,74,80,84,142,129,150,149,171,177,160,162,116,165,134,145,157,138,174,160,143,185,171,170,157,157,179,165,143,125,169,157,128,126,145,158,168,169,152,138,162,164,138,152,153,161,161,152,145,157,188,161,157,163,187,158,182,153,139,148,185,150,170,164,174,194,156,161,160,173,147,139,164,165,165,162,152,141,162,175,165,182,154,136,149,143,171,120,123,99,81,143,163,170,170,151,152,185,174,178,166,176,158,162,157,155,147,144,163,138,177,130,133,134,159,140,176,155,150,155,160,143,160,94,8,5,24,6,15,2,11,8,25,6,28,25,24,3,40,33,28,25,19,8,15,1,14,36,18,5,47,34,9,30,11,34,23,22,4,5,13,20,5,15,17,17,17,41,23,0,34,39,16,22,25,31,7,12,41,10,12,16,38,23,18,43,31,22,22,25,24,7,12,15,5,18,31,14,28,3,22,13,25,16,18,32,45,40,27,4,18,30,25,11,36,19,30,6,8,22,52,26,21,15,14,21,32,33,17,36,47,3,15,46,22,6,48,38,25,31,14,5,31,30,21,53,30,32,27,16,24,33,39,22,42,9,35,55,33,31,45,36,71,55,60,50,29,26,30,23,37,22,44,36,51,75,75,96,97,82,83,120,92,118,118,125,109,119,107,91,107,99,115,114,123,118,97,133,101,96,74,73,71,139,97,104,111,84,101,109,120,128,103,127,111,105,101,129,97,117,103,119,130,105,132,128,131,132,124,109,102,105,106,116,129,146,138,79,111,127,106,102,123,101,79,115,107,131,135,109,146,103,115,100,109,101,90,69,106,79,94,94,114,120,116,123,85,134,93,125,109,121,106,115,123,103,101,100,117,119,104,133,113,127,147,117,142,113,134,151,131,137,139,137,125,86,105,90,114,119,115,125,95,134,103,110,120,109,95,122,105,121,114,94,112,100,94,103,104,104,107,98,110,62,91,64,92,80,72,52,95,82,81,52,85,73,99,98,107,109,115,102,96,108,84,92,103,88,96,95,110,118,88,65,50,52,46,60,63,62,57,73,34,92,25,31,39,49,63,67,57,64,69,42,83,61,80,58,32,54,77,46,62,56,44,45,34,56,58,59,57,39,58,67,58,53,56,36,30,42,43,29,92,60,25,35,21,40,36,52,46,29,42,17,58,49,54,30,34,47,43,48,26,13,40,54,61,61,24,59,61,48,43,67,41,35,38,56,44,61,58,66,38,67,34,47,54,59,70,59,68,62,63,60,74,51,103,107,123,87,97,82,59,75,63,52,65,55,45,43,60,56,53,13,64,47,37,36,31,21,34,26,45,39,48,48,42,41,45,32,51,42,42,73,44,55,56,68,61,75,89,75,96,82,86,86,98,98,120,105,116,111,117,136,112,110,101,109,147,114,107,124,140,139,113,114,144,147,154,155,147,150,124,146,160,157,140,145,166,177,171,190,210,211,238,183,176,176,159,198,95,138,67,50,33,58,94,41,7,33,20,41,25,28,52,42,56,47,46,41,38,70,47,46,26,36,15,20,50,34,30,27,40,30,45,51,60,43,41,54,55,74,31,49,59,23,15,40,49,21,32,41,41,18,20,36,41,44,62,56,31,50,82,68,59,94,83,75,70,77,48,62,59,58,85,89,108,149,144,167,164,149,145,150,165,146,136,149,147,168,154,152,151,186,184,157,189,179,150,142,170,136,131,129,179,171,157,152,157,160,165,171,183,158,159,177,167,160,175,146,182,140,152,166,172,157,173,177,145,143,142,185,152,184,186,165,178,162,164,166,175,155,160,135,162,183,158,192,149,179,163,157,156,153,178,177,187,161,172,117,103,90,124,143,161,172,164,153,157,172,155,148,158,161,144,176,193,181,162,148,183,161,148,144,180,160,158,162,163,144,180,162,156,125,1,14,1,28,5,12,9,26,6,16,4,24,26,6,21,24,1,0,42,8,29,10,22,11,36,26,61,29,48,49,40,33,54,31,59,43,49,36,24,27,21,15,13,10,26,11,11,40,14,28,28,28,32,19,28,29,39,25,10,27,8,37,9,10,19,22,52,1,19,19,19,0,23,17,20,33,11,20,24,16,17,19,3,28,7,26,5,42,37,21,43,14,27,35,39,34,46,15,37,25,33,42,24,40,74,9,21,13,6,48,20,29,29,19,34,10,16,29,31,26,14,36,2,30,23,34,21,19,36,30,35,44,50,29,24,3,24,46,44,25,69,28,71,80,86,53,42,54,48,64,77,62,102,114,77,88,59,79,76,73,78,118,116,94,105,109,122,94,111,119,92,98,108,109,118,92,120,103,105,99,107,95,111,89,107,101,94,129,103,116,120,115,112,107,112,131,138,122,128,96,104,107,108,108,108,117,123,118,94,112,108,131,123,128,108,123,140,125,110,96,102,132,95,114,97,133,132,118,108,125,118,113,91,103,92,92,102,90,78,70,92,103,78,97,118,97,119,112,116,102,117,81,75,73,112,117,86,121,123,126,120,121,134,110,163,128,94,126,102,96,99,132,125,110,104,124,133,97,121,121,95,80,95,85,101,98,106,107,114,123,86,112,97,99,106,106,120,110,105,128,103,83,76,84,73,87,37,72,58,59,77,60,88,94,78,81,107,79,98,95,105,121,95,115,101,107,89,96,80,61,87,101,61,55,68,47,83,46,63,51,58,29,83,60,72,67,52,44,49,44,45,62,65,29,41,66,56,67,45,56,64,57,61,40,46,67,65,75,65,55,61,49,21,15,53,68,52,43,38,57,29,49,51,61,35,44,53,59,58,66,21,64,59,80,60,50,38,39,66,26,37,54,68,50,62,59,63,73,58,74,33,29,50,37,22,57,21,23,36,56,35,67,51,21,44,47,49,28,60,41,74,69,66,47,65,65,71,63,82,53,52,86,73,70,58,66,77,83,69,67,74,52,43,36,51,71,70,72,46,33,57,23,11,70,22,34,41,24,53,42,53,30,27,36,33,39,30,44,49,39,45,41,64,80,73,77,93,57,70,101,97,64,102,102,106,113,79,129,117,141,129,124,140,134,148,119,163,184,167,154,138,150,190,209,228,237,238,245,250,247,235,240,241,238,255,255,251,233,244,231,234,247,251,227,177,156,105,15,20,10,9,41,33,50,32,64,35,29,61,34,18,34,39,24,34,48,55,24,25,21,10,36,52,56,75,58,78,49,30,48,39,35,50,27,67,46,35,36,48,42,37,29,46,58,27,39,42,60,38,54,61,60,42,46,44,52,68,37,66,70,50,74,106,127,132,135,149,145,188,164,160,170,151,147,171,175,160,157,166,156,170,165,177,169,128,128,150,135,172,147,165,190,160,144,159,169,162,148,165,164,165,143,147,171,165,149,133,166,142,172,151,160,156,163,184,163,164,165,156,149,168,155,163,169,173,146,179,168,164,159,141,160,174,150,158,129,174,165,164,174,170,161,153,163,164,178,113,86,96,103,137,161,179,163,138,150,156,131,118,132,149,143,170,166,155,172,193,156,137,142,167,176,157,181,166,155,141,152,159,110,1,0,12,7,7,19,9,29,12,18,14,10,15,7,28,6,7,11,28,11,3,17,43,19,57,78,79,89,61,124,84,113,122,111,124,128,90,48,50,52,26,34,23,21,24,9,48,21,19,16,2,28,24,14,25,9,18,21,16,23,19,11,18,0,18,26,18,25,17,23,6,7,18,15,42,36,10,6,41,9,31,14,15,21,9,17,17,30,27,30,27,12,5,23,40,33,20,27,50,12,48,46,34,47,53,47,47,7,19,38,41,20,55,45,19,12,4,11,21,6,11,25,20,17,27,47,11,34,17,41,12,50,28,44,55,20,30,43,45,32,41,51,72,51,29,37,70,50,62,52,65,62,78,68,60,63,67,77,77,83,90,110,89,57,77,56,98,112,115,103,99,101,110,94,129,113,107,132,108,140,125,105,100,105,132,123,117,133,117,121,125,129,123,117,138,114,106,96,114,90,112,112,103,136,100,119,102,128,92,143,104,131,119,127,123,131,117,156,114,135,116,141,106,116,135,119,115,121,129,133,104,109,153,130,104,107,54,86,92,136,111,117,141,112,112,94,114,123,134,121,113,110,120,107,138,128,106,112,119,135,146,137,151,138,126,121,80,118,101,103,116,104,108,107,145,145,107,103,115,112,113,88,134,101,113,108,105,87,115,110,80,86,114,106,102,95,86,93,95,93,84,78,87,89,72,94,81,83,92,84,64,78,68,45,44,66,58,58,77,48,57,58,84,66,67,62,71,98,63,71,36,56,68,53,61,69,35,73,68,54,55,51,49,67,55,69,77,69,53,68,53,50,35,39,58,57,48,73,76,70,50,52,52,52,34,44,52,45,50,35,44,37,47,41,31,32,38,27,57,67,39,53,58,48,66,51,49,57,57,40,37,35,54,49,44,64,52,44,32,22,60,49,46,53,30,57,62,45,54,46,29,50,45,35,32,50,22,56,28,55,54,63,37,21,28,49,70,56,57,53,52,58,22,56,39,75,63,27,47,60,48,48,56,45,76,62,41,66,65,70,62,81,64,70,70,69,79,69,72,53,63,71,54,66,45,30,94,75,48,34,69,28,30,41,43,51,55,52,45,38,20,30,40,34,54,36,55,44,37,45,55,64,45,53,79,95,68,78,84,102,59,115,135,112,128,135,124,131,148,170,180,187,238,250,251,239,242,249,248,213,255,249,253,246,250,248,241,246,252,252,255,251,250,241,243,247,230,187,77,38,3,9,22,8,26,67,43,57,42,48,43,41,48,18,45,35,51,61,36,41,27,39,35,36,44,22,37,18,37,31,47,11,39,31,32,27,37,20,45,16,42,22,38,33,32,48,60,33,37,69,42,44,83,46,69,51,57,51,62,40,59,59,76,86,80,107,139,184,143,154,183,159,148,156,160,179,171,156,150,154,169,178,178,168,133,136,167,154,140,154,160,161,142,140,185,171,164,183,174,186,149,170,152,166,171,164,174,151,145,181,156,126,158,169,162,184,155,152,156,137,141,138,158,150,154,157,174,157,174,149,150,147,168,146,150,146,165,172,147,165,153,166,157,158,128,163,121,144,104,62,113,91,140,132,174,170,154,146,140,112,153,153,146,145,168,166,156,159,151,146,151,161,156,153,172,163,168,155,136,129,26,2,17,20,2,21,25,7,1,33,29,10,7,13,28,9,7,9,32,17,30,10,24,2,153,154,183,182,175,169,182,153,152,146,147,136,129,102,64,51,23,38,47,12,7,17,19,37,8,38,20,10,1,25,16,29,20,4,1,12,34,16,26,17,16,25,36,23,8,32,40,18,21,34,17,22,13,8,10,10,17,7,2,16,3,20,19,0,2,31,13,33,29,22,25,32,21,16,23,8,73,21,59,44,34,32,50,0,26,47,44,23,52,36,11,34,22,24,37,20,2,29,21,28,39,16,16,50,13,20,21,40,49,23,50,37,54,36,58,64,48,33,52,53,52,30,18,45,33,38,49,57,65,59,65,48,62,60,80,58,95,73,90,57,84,85,110,104,99,86,112,118,133,117,96,116,121,130,113,126,134,110,135,135,128,116,86,102,106,117,143,123,139,100,128,120,117,136,121,122,135,103,133,132,103,106,110,98,85,106,119,136,125,112,82,152,123,133,102,125,123,111,124,123,126,111,110,118,94,127,105,122,135,148,143,135,108,134,143,135,145,138,118,130,117,124,113,124,114,118,108,112,142,140,101,94,109,132,83,95,95,110,116,112,109,129,132,95,102,113,97,103,114,98,116,119,114,98,122,113,126,97,92,98,112,116,87,71,82,107,83,84,96,83,82,91,83,100,104,94,88,108,83,86,60,94,85,98,100,103,94,78,94,80,67,99,79,107,76,67,61,27,58,29,53,54,66,82,84,62,67,60,55,59,45,55,90,66,51,64,38,63,43,56,90,40,60,46,55,55,71,48,63,44,36,59,40,57,43,63,47,28,36,55,67,50,56,71,47,42,49,37,49,47,72,46,42,50,42,51,51,34,60,53,6,44,39,38,36,56,55,55,38,63,53,34,50,67,34,62,39,51,56,29,42,33,24,41,66,46,45,69,49,65,51,51,39,31,60,56,46,53,32,37,51,59,49,64,57,44,77,63,48,41,32,58,43,58,49,68,48,60,8,48,46,59,38,58,61,45,55,79,48,25,51,47,33,40,65,75,77,45,84,56,20,46,61,69,50,62,41,65,38,65,63,43,45,44,49,37,52,33,28,39,47,29,44,37,48,61,44,37,34,34,57,52,32,70,45,69,44,37,60,65,69,88,57,98,111,161,136,181,206,210,212,220,212,229,243,244,247,242,252,250,253,231,244,255,253,246,252,250,253,252,244,253,255,243,203,168,152,121,31,4,8,20,14,17,49,52,53,57,15,48,33,35,30,35,45,39,14,53,42,26,38,27,24,23,18,33,37,53,41,46,43,47,39,51,35,31,33,31,26,22,58,38,39,27,27,50,69,60,58,59,60,57,49,52,76,62,64,74,64,100,93,90,126,154,119,117,131,142,131,140,158,157,175,164,151,126,147,160,143,139,161,171,146,165,144,163,151,148,142,164,176,169,169,184,161,145,129,129,153,158,167,161,150,166,144,165,161,162,136,160,173,169,186,172,185,164,178,135,139,156,165,145,152,155,138,160,146,127,144,150,178,183,118,139,158,161,147,166,164,171,188,138,167,185,135,87,73,109,150,153,178,177,188,166,153,164,144,123,149,142,155,164,156,135,159,143,131,165,185,159,147,181,130,139,168,110,23,6,10,9,11,3,0,1,18,15,21,15,8,5,19,46,24,15,15,11,6,18,31,17,175,151,184,146,131,132,121,133,109,75,64,35,51,27,47,33,32,26,18,14,16,30,2,33,8,16,26,26,22,7,5,20,25,14,25,20,13,6,24,38,8,25,26,45,5,23,14,14,8,2,3,21,11,27,9,10,43,12,36,29,18,20,27,53,27,54,36,11,18,50,47,31,21,54,40,17,35,34,39,49,18,14,17,30,13,31,77,17,39,43,31,29,18,29,27,26,20,29,19,42,62,23,41,22,18,25,59,45,56,71,14,29,51,43,51,45,68,63,60,96,66,48,43,40,64,36,86,69,101,106,107,101,61,74,90,94,85,133,120,101,95,132,110,126,81,120,137,99,113,126,102,121,99,107,110,113,107,126,111,116,104,96,124,101,109,112,109,115,126,112,133,116,115,117,125,120,132,118,107,108,110,111,116,108,100,109,104,115,105,97,125,111,122,115,108,104,104,100,101,98,105,97,115,112,115,119,108,141,131,114,156,120,114,114,122,104,121,132,114,116,124,113,100,99,151,149,96,106,103,114,117,123,115,120,108,106,77,109,76,112,111,94,96,97,111,130,101,143,112,120,135,127,105,101,130,110,109,120,108,111,102,87,110,85,95,100,107,75,101,86,108,85,68,122,102,92,105,92,92,82,102,91,98,131,133,128,105,119,119,105,96,98,73,74,72,74,93,47,65,57,65,86,87,75,84,55,51,68,76,50,49,63,52,62,69,59,67,34,45,66,66,57,59,50,58,93,29,69,47,41,79,55,70,68,42,62,70,42,56,44,41,44,41,53,44,49,24,83,49,36,47,53,55,50,52,35,42,57,34,24,39,9,32,45,41,29,35,38,63,28,38,26,42,49,27,40,42,39,36,36,23,62,37,38,34,60,47,59,32,28,49,59,28,23,51,26,35,36,35,35,30,46,40,52,43,47,51,39,66,50,8,59,39,38,35,64,52,48,42,76,44,36,44,46,40,43,25,35,37,67,37,51,52,39,59,81,46,55,67,67,55,40,70,64,42,69,79,51,55,75,41,71,87,71,87,58,46,56,68,39,70,61,59,50,60,60,41,50,33,65,47,48,25,44,47,27,45,34,56,60,72,60,27,32,71,68,77,75,82,94,110,122,88,155,157,148,184,178,213,227,255,255,252,255,248,247,255,252,224,254,238,255,236,255,245,233,232,246,228,111,115,26,0,15,7,19,45,33,42,24,26,23,31,51,68,33,38,32,27,25,19,47,44,38,48,38,41,25,44,39,17,22,40,39,20,23,49,38,12,21,13,26,48,19,41,73,60,69,52,44,58,68,57,74,46,56,64,39,64,53,91,106,100,109,145,117,140,154,133,134,133,142,126,142,119,137,133,121,121,133,163,107,146,143,155,150,117,149,169,158,165,145,125,135,146,141,147,135,151,152,170,168,171,143,146,137,160,153,138,160,146,182,161,168,165,161,146,138,157,127,150,175,160,172,170,142,186,146,166,151,148,156,174,135,157,147,151,138,135,165,170,195,174,175,150,124,127,107,77,134,142,162,191,177,191,179,158,142,157,156,159,145,160,165,176,179,160,173,126,169,174,181,163,149,152,132,14,0,28,16,12,7,36,34,11,0,12,12,8,18,14,32,6,10,16,30,20,32,1,5,107,108,79,49,88,62,15,5,35,39,10,37,29,17,24,19,42,7,42,20,39,22,29,25,33,46,14,22,28,44,12,5,5,4,29,29,7,31,22,8,22,21,27,25,9,35,24,29,28,17,6,14,9,16,12,4,28,21,16,7,24,15,22,29,14,27,65,46,30,31,36,40,19,58,34,17,48,60,59,50,32,47,20,40,34,43,16,59,33,30,57,14,11,37,36,24,25,27,38,38,51,29,15,39,32,23,31,48,51,35,55,50,43,40,31,53,33,47,62,39,35,58,62,59,66,79,89,90,110,126,124,96,100,146,115,110,136,117,152,138,116,106,130,132,143,106,111,114,117,119,131,123,119,115,123,112,115,102,128,120,114,131,113,132,90,115,121,110,121,106,128,115,124,121,127,149,119,120,120,127,108,124,102,108,118,134,126,97,128,109,113,133,117,138,137,140,116,113,112,128,126,118,113,124,135,125,111,106,130,99,119,97,140,86,142,139,129,135,100,120,132,118,157,123,122,149,115,124,117,118,145,150,135,141,133,135,154,111,121,124,121,115,100,122,128,95,120,125,129,121,82,94,106,84,124,148,124,120,123,116,157,142,124,109,116,121,118,107,116,116,107,100,91,116,89,92,117,86,93,92,77,76,80,89,78,79,97,99,105,108,74,85,82,70,108,93,58,59,64,65,55,61,46,74,95,76,51,71,51,45,59,49,76,65,43,48,49,41,26,47,63,74,71,38,62,49,64,44,64,59,36,51,70,47,59,42,63,55,43,44,75,41,66,36,47,62,51,32,48,42,47,67,48,42,53,39,37,12,18,36,5,53,34,40,62,41,36,40,48,54,24,39,40,36,29,55,58,46,29,74,58,42,37,56,25,22,53,45,43,31,31,44,17,65,51,57,42,50,59,28,33,46,50,60,35,60,37,26,50,35,54,51,46,33,71,44,47,38,47,38,33,77,45,36,17,65,45,50,41,59,31,62,61,79,40,41,61,54,50,80,15,42,58,70,33,86,39,47,93,62,53,61,61,72,91,67,60,66,53,66,58,84,72,51,72,64,23,59,79,76,84,73,59,55,54,56,31,75,18,56,54,58,51,43,74,34,44,39,47,68,44,56,48,54,49,50,63,73,103,104,135,156,151,203,182,160,186,209,203,247,255,255,254,249,245,241,252,242,217,228,234,232,173,94,11,1,12,18,33,2,18,12,21,29,34,49,53,27,54,35,25,40,56,38,44,54,47,34,15,40,21,42,44,13,28,35,34,31,39,31,46,23,37,37,50,41,30,43,47,35,54,41,46,51,42,68,50,55,42,66,67,58,92,68,114,130,173,173,138,142,146,150,132,125,130,129,139,148,162,147,145,151,160,136,156,157,154,145,155,132,165,134,152,133,148,156,156,157,137,155,147,140,121,138,148,155,167,150,148,157,142,149,151,157,155,130,148,188,150,146,164,161,146,155,140,154,183,168,175,160,165,175,139,150,170,138,159,157,150,181,145,158,157,166,172,155,142,111,99,66,113,143,142,162,174,143,155,165,132,149,156,144,156,160,133,156,151,138,153,145,160,140,127,139,163,111,26,3,9,5,9,10,12,22,21,18,14,5,32,12,24,0,0,11,29,11,15,19,24,18,64,34,32,51,39,12,26,29,19,60,37,16,48,8,16,24,25,34,38,32,17,29,18,19,30,3,23,2,11,25,15,20,10,31,5,33,11,4,12,27,29,36,25,19,19,28,2,43,40,36,35,27,3,26,38,12,15,16,31,8,22,16,11,17,19,45,24,26,10,48,35,32,52,43,49,18,21,31,13,34,9,43,61,18,45,45,43,46,39,52,55,38,33,28,58,51,41,49,28,33,38,26,31,38,34,44,48,17,36,26,16,42,20,15,23,17,41,55,53,48,48,47,43,69,67,39,85,74,110,110,115,119,121,98,120,132,126,105,124,106,107,104,130,108,113,112,124,139,95,115,123,114,124,109,109,101,116,124,119,123,138,134,110,125,148,122,91,139,117,115,121,124,131,141,124,143,123,97,117,152,126,105,139,107,121,118,117,122,122,118,126,137,96,137,123,114,110,128,129,136,120,132,119,145,106,121,117,92,120,114,128,103,121,121,129,121,115,125,127,108,102,112,137,133,122,122,132,128,113,143,150,123,154,145,101,140,119,138,137,121,116,118,148,153,127,118,119,147,102,125,124,138,115,95,115,101,103,106,111,94,98,127,130,131,120,106,113,94,111,89,93,84,110,99,74,78,100,112,79,90,64,68,61,68,78,104,66,105,81,95,95,67,61,102,90,67,83,62,75,59,43,85,42,34,59,60,44,86,59,79,52,60,44,56,66,53,50,68,83,42,59,62,49,71,66,61,59,56,78,50,56,56,53,56,34,59,67,65,41,60,52,71,61,54,21,48,37,56,54,54,83,26,38,30,30,34,51,26,47,41,14,48,51,33,30,43,45,25,53,40,13,33,48,25,17,50,56,55,43,48,46,50,34,32,37,56,60,30,51,73,25,72,55,20,58,34,44,87,75,66,48,24,29,47,35,55,25,44,45,29,56,37,59,52,59,9,28,34,69,45,37,29,40,37,47,60,48,33,48,35,39,35,23,37,62,18,34,24,30,43,40,49,36,37,61,33,53,41,35,42,39,52,54,50,28,78,33,59,52,46,37,64,58,42,44,75,83,71,68,80,34,71,71,71,66,64,71,77,73,71,87,84,86,81,58,79,45,51,69,97,73,59,84,82,56,51,81,23,64,44,39,43,65,88,93,35,58,104,149,175,227,249,226,240,222,249,251,255,226,245,252,246,242,247,190,130,59,11,12,11,17,28,16,34,7,35,24,17,39,37,36,43,39,36,63,20,46,50,45,23,59,30,39,55,26,50,22,27,45,25,52,40,29,42,31,23,50,47,60,38,30,54,61,37,49,43,47,41,54,82,68,51,76,77,76,78,131,124,130,137,132,149,126,118,159,141,163,157,179,180,173,156,173,165,165,168,178,144,150,136,169,169,153,170,146,163,154,163,147,114,142,170,129,159,145,153,185,158,174,148,133,177,178,161,148,169,160,161,168,153,138,161,153,169,153,152,143,186,164,162,171,157,145,166,180,167,154,144,157,145,175,162,149,166,164,173,158,139,139,78,78,108,144,159,174,193,161,171,152,152,169,154,169,170,157,167,164,178,140,156,168,169,172,176,160,108,34,0,26,19,8,3,10,5,27,13,33,41,18,9,9,29,30,20,25,13,16,13,14,54,44,40,31,34,35,28,35,26,17,29,37,29,43,2,40,27,9,13,33,26,20,9,30,25,46,25,21,20,8,15,25,7,9,10,22,5,17,20,6,19,8,30,17,12,21,26,16,12,9,26,13,14,7,32,23,14,18,1,45,13,22,11,33,40,19,24,36,49,26,24,18,48,36,17,36,13,2,27,43,29,29,4,24,32,23,9,42,30,19,17,53,19,39,20,26,43,29,25,37,33,27,62,30,45,13,42,44,38,27,24,32,39,26,18,36,34,72,35,24,65,30,39,33,63,47,72,62,50,68,78,116,112,92,103,94,116,110,96,128,117,109,119,113,111,107,115,143,126,127,104,97,111,115,124,108,125,142,134,114,106,126,126,112,120,127,124,133,107,132,122,123,125,138,101,128,126,138,116,134,119,107,105,118,104,128,112,133,90,115,107,113,82,84,115,77,110,121,134,138,131,118,124,131,166,122,108,126,115,117,106,100,106,124,92,135,131,110,112,93,122,124,123,121,110,127,132,100,126,115,113,131,122,126,117,117,141,107,103,96,110,135,144,135,131,108,104,126,135,110,122,98,141,118,90,125,112,98,102,86,115,126,120,127,111,107,86,134,103,96,87,54,93,91,89,97,81,86,100,83,92,102,77,91,66,97,91,106,91,106,101,78,60,49,39,46,63,82,51,43,30,56,58,24,43,52,46,17,45,44,71,42,66,77,60,71,52,39,69,47,76,49,58,71,56,47,73,52,43,57,47,53,43,72,45,64,55,48,54,59,61,67,71,41,42,73,70,59,58,59,58,42,71,29,63,56,53,41,56,34,51,35,63,41,79,30,80,54,46,59,60,54,70,30,18,54,50,27,31,44,68,51,49,14,36,36,30,26,39,38,38,62,33,36,30,52,46,46,60,40,38,53,17,32,42,30,11,45,44,50,45,47,31,24,8,64,30,78,42,30,30,74,44,34,47,41,32,23,53,44,55,38,26,41,37,39,20,39,49,61,46,65,25,34,41,21,40,40,49,39,56,38,31,51,45,66,55,61,50,51,18,17,46,39,46,40,40,56,35,56,67,72,51,74,73,68,60,69,31,91,75,96,72,60,77,77,81,95,95,105,106,143,113,135,114,151,153,141,145,137,97,103,107,89,118,82,9,20,64,92,130,130,163,190,175,181,253,253,249,246,244,244,249,249,249,248,246,220,228,199,185,196,220,192,190,163,205,165,48,23,36,36,44,25,37,30,30,14,64,25,47,35,38,42,13,37,52,17,14,35,30,22,44,23,19,49,25,18,29,41,54,55,38,54,54,39,62,66,79,67,80,97,59,94,100,72,107,96,113,122,119,122,123,118,116,146,152,175,166,165,172,186,159,153,186,166,153,149,156,158,162,146,180,158,147,148,154,156,174,160,139,147,155,150,157,156,167,161,209,181,164,165,155,152,148,163,159,151,173,154,151,155,174,167,153,168,155,159,153,180,163,138,155,154,164,150,170,155,151,144,172,152,175,152,167,163,173,160,173,175,134,105,88,106,128,155,157,170,160,140,136,166,181,179,163,163,159,161,186,149,157,168,166,176,151,168,111,3,36,5,10,19,22,18,6,14,12,9,11,24,20,28,3,9,16,10,17,16,12,16,32,29,19,3,53,50,21,25,30,10,25,33,36,10,35,17,28,29,14,21,19,33,11,13,17,33,12,42,33,23,30,51,35,12,15,6,14,50,19,25,6,34,19,41,44,19,4,21,59,10,12,27,32,1,2,13,25,20,5,34,19,3,51,26,10,26,30,30,25,27,24,26,27,19,32,39,22,37,42,30,33,57,37,36,49,17,35,25,56,19,38,16,24,22,33,42,34,53,24,33,56,28,40,15,35,47,54,42,39,41,30,42,39,17,42,24,45,63,57,74,49,75,66,71,75,60,72,99,93,87,93,135,99,109,95,127,112,121,114,84,88,97,92,121,111,116,135,127,103,110,110,108,140,109,117,125,117,119,117,132,121,112,139,111,111,109,88,101,78,116,118,91,95,99,91,105,136,119,110,144,115,101,117,112,120,126,115,113,106,93,90,96,118,101,109,109,94,108,146,119,123,115,124,122,122,117,123,114,126,131,131,114,127,137,99,89,116,110,105,130,116,117,109,129,114,123,133,142,103,111,120,128,112,128,133,133,119,81,122,102,117,129,127,115,115,99,110,114,135,130,112,112,108,103,102,124,124,124,135,134,107,149,123,153,102,92,118,94,76,76,126,144,121,136,111,113,114,112,112,96,97,144,78,86,67,53,108,82,107,85,91,100,75,57,36,56,90,78,91,66,29,48,66,64,40,52,42,77,62,63,30,73,44,76,49,60,52,77,35,57,79,65,53,29,61,43,58,53,47,45,40,76,50,55,53,84,86,72,54,52,77,43,62,66,38,78,38,69,54,77,34,37,42,51,48,26,62,32,63,47,44,33,77,60,31,44,24,35,63,43,44,29,31,37,21,38,45,40,66,43,46,38,21,60,46,56,41,26,43,35,56,45,52,53,75,64,54,47,44,26,49,41,31,50,44,19,52,44,12,59,36,23,40,27,34,53,16,39,32,32,40,26,42,56,22,70,36,35,68,56,48,38,28,22,61,46,22,40,52,46,36,36,56,34,32,44,37,19,40,37,55,34,39,34,59,39,42,50,32,37,27,10,21,45,65,16,53,52,23,22,49,40,54,56,82,64,52,63,64,62,75,44,42,64,78,96,76,82,85,101,132,116,113,135,140,157,148,134,157,117,160,191,173,180,136,139,114,128,121,132,101,93,105,145,82,113,213,241,251,246,245,242,241,246,252,236,248,252,243,245,246,254,247,249,234,248,243,217,61,33,35,22,42,32,24,7,29,32,21,37,36,35,10,32,15,16,27,13,19,37,18,34,42,25,29,13,53,23,22,18,40,50,69,46,70,66,68,45,55,46,71,92,66,119,109,122,109,133,121,95,127,109,112,122,129,122,169,119,143,118,144,154,153,165,125,161,178,172,169,166,160,169,136,154,157,158,141,153,148,188,154,181,141,147,118,156,154,158,191,203,189,173,146,182,162,147,170,161,163,175,167,154,165,169,160,153,186,152,151,163,160,159,168,153,131,163,184,165,143,160,147,182,138,155,157,136,172,138,164,151,185,128,123,70,88,138,151,170,170,161,129,140,159,152,157,162,159,151,180,162,163,154,164,161,147,148,107,17,6,3,21,23,10,17,0,2,7,29,22,42,20,10,15,9,26,22,31,8,0,15,8,15,28,21,31,32,41,10,21,17,39,12,29,21,13,35,50,32,32,12,24,9,11,30,10,9,17,14,13,26,6,29,21,18,14,11,24,17,12,14,30,2,24,25,40,26,1,20,18,7,5,17,29,27,27,19,33,32,24,13,12,45,40,13,27,3,12,4,33,27,24,34,19,34,36,49,30,32,32,56,49,21,18,54,60,24,40,36,32,69,43,48,15,26,40,26,38,51,56,39,32,39,14,24,71,48,49,52,65,39,47,44,53,55,64,42,43,57,52,48,74,47,85,113,88,98,102,99,98,86,85,95,105,130,112,128,115,106,106,123,99,116,129,114,117,117,124,119,131,94,111,119,107,117,116,103,118,121,151,113,111,127,95,126,121,84,111,109,114,132,104,81,112,106,145,132,134,129,114,106,114,116,93,120,116,142,122,97,101,142,124,134,127,120,138,107,94,108,101,120,129,131,104,121,119,99,125,118,148,144,112,122,139,150,131,125,128,112,114,113,146,123,117,110,136,136,112,124,116,116,146,147,137,145,124,117,119,108,125,117,130,126,138,124,108,117,114,106,109,131,123,83,80,112,110,121,109,127,110,101,96,125,116,108,105,102,120,99,108,130,119,116,138,136,111,106,123,115,108,109,132,120,117,115,74,77,85,90,72,62,59,85,82,110,108,90,109,115,90,81,73,50,57,75,65,95,77,69,65,56,42,85,72,67,48,80,76,56,68,37,60,67,62,53,75,76,55,47,75,51,55,69,66,69,61,57,61,39,23,64,64,56,57,62,55,60,48,44,40,65,68,51,62,67,37,36,49,41,52,44,70,44,37,45,85,32,31,77,31,20,25,37,50,32,72,32,27,56,55,48,28,41,57,40,39,37,16,40,44,54,50,49,27,57,52,29,22,42,38,27,25,47,41,55,54,63,38,42,32,36,34,27,49,59,36,50,40,57,68,76,28,16,55,41,19,36,35,25,50,55,33,46,4,22,22,12,54,57,43,11,6,62,45,27,22,54,41,49,29,63,27,46,34,33,34,28,34,50,45,54,8,47,49,32,43,37,45,66,26,60,43,62,65,60,73,58,55,48,36,44,50,57,30,71,67,65,89,74,92,100,97,115,120,120,122,98,111,127,128,131,140,183,125,175,177,147,140,135,224,197,163,128,108,117,66,70,149,136,181,225,250,254,255,255,254,250,255,229,255,248,231,250,244,242,253,242,227,212,43,15,27,5,12,19,20,27,34,34,21,19,28,9,9,23,24,27,25,42,32,22,15,7,33,15,22,12,44,26,50,25,29,33,20,32,67,54,31,44,62,55,65,91,65,43,79,113,138,102,127,104,120,153,135,142,147,131,131,161,134,149,140,125,134,169,147,155,157,178,170,166,180,173,180,134,149,151,151,151,156,187,152,157,174,155,140,152,124,145,159,170,154,145,158,152,112,168,131,163,153,138,157,163,161,135,165,160,130,149,163,154,143,139,178,149,155,165,161,161,161,168,169,153,159,149,144,127,172,167,151,177,177,148,132,105,74,83,80,161,168,180,153,175,165,177,138,166,168,129,141,147,171,151,152,164,162,163,112,20,1,7,12,26,18,20,13,21,18,24,3,1,2,20,14,7,6,6,7,15,39,39,9,50,0,18,17,20,29,5,29,33,44,5,19,27,31,27,21,36,38,31,10,20,37,45,16,16,25,17,16,6,37,11,20,11,16,38,25,25,2,9,13,6,12,30,9,18,28,13,40,36,0,16,5,12,18,22,35,24,24,34,29,24,29,16,25,11,29,12,14,38,24,52,11,29,40,21,38,9,15,33,39,52,26,11,40,9,24,46,46,20,57,51,21,23,13,15,48,40,66,40,68,36,31,38,52,66,14,31,41,42,41,46,47,69,43,56,39,43,42,44,48,63,60,78,109,103,104,88,83,103,99,112,125,96,129,124,110,115,124,110,148,129,126,119,120,129,113,85,141,126,114,113,136,119,120,118,146,140,97,124,120,118,122,126,115,153,135,115,96,128,144,103,134,127,114,132,123,131,141,129,95,114,149,114,98,101,110,109,109,113,116,134,126,113,123,136,129,124,105,105,118,103,112,109,116,146,130,130,92,126,125,120,125,107,128,126,124,107,138,121,139,135,98,110,140,119,144,135,132,141,87,130,125,126,140,145,93,126,148,123,124,104,142,98,126,121,122,120,98,118,130,99,119,90,103,101,94,132,114,104,118,113,95,94,92,103,84,127,108,121,63,90,96,109,134,87,110,70,78,109,92,110,90,104,116,116,112,60,72,100,81,90,105,104,106,85,103,80,117,109,96,109,89,90,93,101,74,98,91,73,70,60,57,75,62,75,57,56,40,58,29,79,46,35,68,51,65,62,59,66,58,52,60,47,65,83,36,35,40,60,52,59,58,25,51,74,64,74,66,66,54,60,36,53,40,37,54,39,41,42,54,36,59,46,43,46,51,40,21,68,68,49,38,40,84,47,51,45,60,49,42,61,44,68,52,61,62,34,50,49,98,48,52,80,30,39,48,32,18,50,49,55,54,22,67,27,69,41,36,68,60,61,61,47,49,41,41,42,40,46,50,38,52,49,44,45,33,39,56,43,13,38,43,45,27,39,44,73,64,13,46,60,66,42,44,47,53,24,59,59,20,19,60,35,30,41,38,53,60,30,43,29,46,58,27,11,41,36,55,18,47,42,41,58,33,30,74,60,38,68,60,53,51,65,54,45,66,45,51,65,76,85,73,59,55,74,69,105,95,92,107,114,123,162,147,104,120,144,172,164,156,157,205,137,87,111,116,115,89,115,153,190,195,237,251,255,252,243,250,253,237,253,221,250,255,252,249,221,37,1,17,26,17,45,53,35,12,57,15,42,38,27,42,28,28,30,35,3,64,32,34,27,28,19,32,28,20,21,30,44,35,15,11,39,26,31,64,62,34,45,57,52,53,48,62,75,68,117,114,147,148,152,149,91,107,132,181,162,145,122,145,137,168,131,152,136,140,175,141,176,124,148,161,158,161,167,156,161,157,151,144,142,150,166,161,161,146,155,166,202,141,141,159,176,139,143,147,144,136,124,164,144,152,150,142,153,147,144,153,172,125,150,166,168,156,162,135,147,154,160,124,164,162,170,145,142,143,148,158,157,156,129,148,142,125,76,75,103,129,143,174,144,158,135,113,145,155,154,171,167,159,166,167,164,171,147,140,27,8,14,8,12,37,21,17,16,12,33,20,1,20,13,28,29,17,15,33,46,19,6,16,13,26,23,22,25,15,21,41,26,36,31,38,22,14,33,18,21,16,18,4,20,36,8,14,9,39,57,32,30,7,25,34,31,7,8,10,8,20,13,23,6,26,20,16,21,31,25,40,11,20,35,9,37,29,16,5,21,24,25,25,23,12,3,3,38,8,9,17,41,3,21,23,45,20,28,36,29,15,39,3,22,13,32,44,20,17,47,51,21,43,38,23,22,22,43,21,16,35,43,37,18,41,27,38,65,18,26,41,23,46,9,31,47,57,38,38,22,25,53,44,46,53,74,68,91,117,118,87,100,124,124,129,103,112,97,123,106,135,148,112,124,137,129,132,117,127,123,126,136,133,120,140,131,114,138,113,143,114,126,94,131,138,129,114,146,129,121,94,116,130,141,138,125,126,139,123,90,119,108,103,140,120,154,108,137,126,149,110,113,157,136,100,141,137,145,129,137,144,144,116,120,149,118,111,128,126,153,138,152,134,115,110,116,125,146,109,131,134,138,131,120,132,131,129,147,102,157,121,120,134,118,148,142,121,111,133,127,146,154,115,139,131,122,104,134,135,108,127,118,110,118,142,96,116,112,105,131,116,113,116,117,95,100,129,111,128,100,105,66,115,123,76,130,81,85,76,80,107,68,97,103,97,124,110,148,89,85,89,110,94,95,126,105,79,95,105,96,92,117,120,121,126,149,112,96,90,103,122,110,89,109,82,79,55,46,52,51,57,45,75,66,64,61,88,35,44,61,60,65,52,70,31,68,53,53,26,68,50,53,76,54,44,47,48,21,49,22,78,45,38,68,47,52,71,48,58,65,34,38,68,51,42,59,38,52,81,70,70,72,41,73,61,38,36,49,64,44,31,65,57,51,44,54,44,35,54,48,20,38,58,19,35,55,56,40,42,69,30,14,41,26,59,55,54,58,32,42,30,39,66,40,16,37,41,27,32,60,54,58,21,45,38,22,43,31,49,50,47,48,31,29,30,29,65,40,36,38,35,34,50,56,41,45,50,44,46,54,23,7,33,34,49,19,38,25,46,34,37,16,49,47,45,40,53,58,2,40,58,30,8,39,34,47,40,58,70,49,53,44,29,53,64,65,71,30,70,45,58,70,53,58,40,52,62,55,84,90,109,77,74,85,86,101,99,100,103,110,136,123,155,183,180,179,124,131,144,155,154,72,78,83,114,107,166,189,210,178,197,213,215,206,212,215,196,227,235,175,12,21,12,32,15,34,29,49,21,20,46,8,43,34,21,30,27,36,29,23,12,25,24,24,14,43,39,32,42,25,37,20,41,40,6,39,39,39,54,42,47,61,73,59,58,73,48,69,64,95,126,140,117,132,109,108,122,131,177,186,175,149,163,150,155,163,135,119,126,156,161,150,156,139,176,158,163,190,158,156,160,141,146,140,176,153,147,169,144,171,134,174,164,151,160,151,116,117,151,136,172,165,120,156,155,164,114,130,153,133,155,158,159,146,152,131,141,154,139,143,180,170,160,167,158,161,156,150,134,134,185,186,165,145,168,171,137,136,115,98,102,147,143,182,160,166,167,147,158,166,159,174,130,192,140,165,169,170,112,1,16,11,3,4,14,13,18,13,10,7,11,31,4,15,11,28,1,5,1,22,4,20,30,0,31,10,11,5,38,32,24,46,35,38,24,21,33,43,47,7,10,16,19,12,8,44,24,18,15,16,3,23,14,21,6,44,6,29,22,0,20,26,23,2,31,14,13,19,24,4,3,5,18,12,11,11,15,27,34,23,12,25,17,14,12,14,25,22,51,27,3,2,29,27,18,39,13,28,24,24,50,31,17,28,18,35,39,34,51,23,19,35,16,34,33,23,49,24,37,54,42,34,64,28,40,31,24,7,45,23,36,47,41,39,42,24,52,42,46,54,38,35,43,41,69,60,24,53,80,58,60,102,97,100,106,125,93,130,134,111,132,139,128,100,130,128,93,139,120,120,152,142,144,121,112,122,138,149,123,125,139,134,132,128,136,138,125,144,115,135,138,118,138,119,131,125,117,123,116,131,163,136,128,132,92,136,151,123,162,110,129,133,116,110,98,109,124,120,133,155,151,152,135,111,139,147,144,159,124,114,165,123,134,133,151,130,144,118,140,131,113,105,117,141,131,124,123,123,118,123,126,135,131,143,136,155,129,117,158,122,122,138,127,109,146,165,116,135,141,142,111,127,143,113,133,139,138,142,133,99,105,131,130,121,123,111,122,118,120,115,84,121,117,132,142,120,110,114,109,117,92,109,88,102,102,116,100,93,115,77,124,118,95,115,121,123,110,121,110,86,83,82,72,59,73,86,88,89,96,95,121,86,119,81,65,80,68,68,76,56,69,107,79,36,81,24,56,57,44,50,33,66,31,78,55,51,44,75,44,71,64,52,54,44,72,70,47,54,49,59,59,52,42,36,54,70,37,56,54,50,59,50,18,70,63,62,54,29,43,43,54,30,36,64,45,25,37,37,71,47,39,47,59,62,28,32,61,28,63,28,64,26,40,34,45,21,41,45,44,56,36,44,27,50,9,57,33,20,29,58,32,43,43,61,43,47,41,23,29,27,71,24,31,51,35,50,50,26,15,16,50,29,36,53,42,40,50,40,25,34,49,48,34,15,53,48,37,44,10,36,61,41,32,22,41,35,62,19,38,52,41,52,43,29,17,32,53,73,22,39,37,62,51,56,35,29,27,12,48,37,46,16,54,45,39,38,48,26,26,42,31,50,36,53,48,84,42,54,55,95,44,71,51,52,70,59,55,66,80,96,100,116,84,144,170,142,110,143,190,195,166,128,143,94,137,157,98,140,146,145,157,101,99,94,117,85,107,120,183,99,25,34,16,29,19,26,39,20,13,21,31,23,31,20,27,39,26,17,30,6,30,2,18,10,24,4,44,32,16,31,47,33,24,27,25,45,41,48,76,30,58,51,58,45,46,77,75,67,63,104,107,90,77,130,90,102,131,136,137,147,130,175,180,160,151,160,143,147,180,163,160,156,137,133,165,159,154,143,153,157,141,136,175,146,167,155,167,170,160,161,137,170,172,151,138,171,142,148,145,170,191,162,145,150,144,135,115,131,178,155,149,179,169,180,151,137,139,166,147,176,165,152,148,145,182,160,169,162,195,129,162,160,161,152,165,143,166,160,108,99,81,125,120,142,166,177,186,161,176,147,142,149,128,158,177,146,150,187,108,17,1,0,6,29,23,20,11,38,19,8,2,3,0,19,24,4,19,14,31,17,2,3,17,28,8,26,32,28,49,27,35,17,10,14,20,25,30,24,20,17,21,20,32,48,15,12,29,30,13,21,9,5,9,19,12,37,25,11,20,23,26,27,9,51,37,29,25,17,23,15,33,24,12,31,32,21,18,33,41,27,25,39,24,36,48,29,36,22,13,6,21,42,25,23,19,30,24,24,11,15,28,39,8,44,16,21,34,27,31,10,57,25,7,42,31,43,20,21,48,15,19,31,15,36,33,31,34,27,32,25,25,6,31,32,44,18,36,45,44,78,51,35,39,61,63,54,44,45,70,52,71,85,100,100,111,109,134,130,142,119,131,136,126,131,123,113,123,113,123,115,144,153,115,126,129,120,108,137,129,114,121,147,150,131,149,139,112,144,136,129,120,128,128,137,140,125,151,113,141,144,137,118,99,117,121,122,113,117,140,149,107,107,114,93,95,122,124,142,122,94,139,142,123,128,129,107,95,149,149,142,146,152,118,124,119,135,136,136,127,131,131,127,124,111,110,137,153,139,107,150,147,149,158,120,139,108,87,120,124,138,149,124,126,147,141,128,129,167,155,136,132,132,128,112,101,130,116,130,118,110,119,111,117,115,112,109,119,125,111,121,130,109,138,115,138,143,138,113,120,114,115,115,110,124,126,118,107,102,96,71,88,100,63,89,84,96,107,88,89,85,58,43,35,52,62,60,62,72,61,66,59,55,79,75,33,57,60,85,54,106,69,84,77,60,66,55,82,85,63,62,62,53,30,63,57,59,38,52,74,77,74,54,52,47,72,55,61,51,51,38,62,46,68,46,73,56,14,61,78,49,40,52,49,51,52,47,41,58,40,34,67,27,32,50,52,37,33,28,51,43,47,54,37,53,34,69,47,26,45,71,40,47,50,58,46,31,40,22,58,35,41,44,26,58,35,38,22,34,45,39,58,26,38,38,58,37,58,43,56,46,29,22,54,20,20,42,33,49,46,50,66,49,48,39,10,38,56,31,45,17,47,76,28,18,57,7,57,51,49,19,24,61,18,42,34,71,22,51,48,38,43,29,31,24,54,38,23,32,57,44,25,39,43,30,55,26,36,34,48,33,52,33,33,25,49,51,42,40,24,37,56,66,45,53,47,52,62,44,87,58,24,64,58,37,53,52,50,56,58,35,91,97,100,82,135,118,111,105,144,191,223,196,209,189,188,213,214,190,147,128,76,84,71,85,70,74,26,39,79,40,26,30,15,13,32,33,41,8,7,3,22,24,36,41,14,42,10,38,50,25,38,25,4,16,30,35,30,23,35,34,34,48,51,56,33,46,56,38,67,40,35,44,51,70,77,46,55,74,80,51,94,100,79,85,121,98,103,95,108,101,149,170,187,178,173,149,158,142,157,179,166,179,161,154,156,193,150,141,144,142,141,131,155,158,144,144,136,161,165,161,151,157,147,141,163,177,153,148,181,161,145,169,141,170,171,148,168,143,140,173,177,165,138,157,131,146,148,180,183,163,156,144,156,129,176,165,173,166,171,161,155,160,152,154,134,149,176,151,153,111,118,86,65,108,154,158,170,192,182,163,160,138,133,144,141,151,124,148,111,12,0,15,55,30,13,37,16,13,27,11,3,3,1,1,12,39,2,18,7,10,26,18,28,30,13,22,16,28,27,12,12,61,8,26,9,24,22,44,22,19,28,30,24,23,26,19,2,17,16,28,20,30,28,27,23,16,8,37,3,11,9,5,34,5,25,23,28,32,12,31,20,27,23,19,28,20,21,32,23,35,32,14,25,15,25,10,41,37,21,39,14,42,21,50,55,37,25,31,31,35,20,35,7,15,20,17,23,26,31,4,42,17,43,34,43,35,25,14,30,22,34,23,14,41,18,24,31,5,39,44,42,16,28,26,45,34,57,46,44,43,41,59,56,63,57,76,87,70,57,13,38,72,58,95,92,122,120,106,121,101,100,130,101,85,102,144,140,141,126,132,129,127,105,149,125,149,133,163,133,104,152,147,120,101,119,135,94,118,128,109,124,134,136,139,117,122,114,129,108,134,126,110,121,133,113,139,116,126,118,117,146,133,119,115,124,116,124,85,151,124,127,121,109,136,117,115,147,113,127,133,128,131,137,137,163,152,138,139,134,103,101,94,152,125,122,136,134,123,150,121,129,99,108,139,105,123,121,104,84,136,132,122,120,114,139,160,118,100,132,115,107,99,108,124,95,117,127,96,125,131,113,100,126,124,112,107,129,105,109,113,131,126,124,126,142,123,93,138,105,106,121,136,110,124,104,83,119,124,121,93,103,79,83,98,121,80,101,91,47,63,76,90,78,34,77,82,64,69,25,57,43,38,39,78,37,16,42,66,73,65,72,14,59,67,26,68,42,43,62,58,49,57,60,67,81,66,60,68,60,52,72,60,71,64,55,45,61,55,18,45,71,60,57,49,63,61,44,23,83,81,57,56,56,58,47,46,60,54,52,50,47,65,65,56,55,57,24,37,46,56,50,40,53,52,73,37,31,47,70,31,62,58,69,24,40,38,54,36,48,61,29,38,35,25,40,63,50,34,55,60,49,37,38,57,25,57,64,64,47,42,44,36,49,15,50,44,34,39,55,71,23,55,49,69,48,26,35,23,46,11,26,61,51,29,37,51,37,31,36,43,41,42,7,22,52,45,32,33,8,38,67,27,24,35,48,32,54,43,51,28,32,34,15,56,29,11,33,29,37,35,42,44,29,37,22,40,50,28,35,23,14,46,42,40,57,47,51,58,53,49,73,32,50,72,42,65,66,57,59,49,68,89,101,85,126,115,110,107,120,195,190,200,170,176,164,188,171,162,123,107,114,140,140,164,140,138,126,130,101,54,4,32,3,17,15,30,41,23,14,18,38,22,37,37,7,8,15,12,35,2,17,30,18,20,11,18,35,56,29,52,53,61,29,33,42,54,45,33,49,27,76,55,79,83,63,71,63,78,50,102,78,100,104,158,119,133,148,135,123,141,133,161,141,158,137,160,151,128,164,159,154,137,160,157,167,133,129,156,142,197,110,145,180,168,169,166,137,155,160,172,168,142,165,158,178,147,170,162,127,161,180,159,139,166,160,156,173,139,156,170,189,146,166,163,171,142,141,164,139,144,143,151,172,168,148,156,146,143,157,139,159,154,144,150,159,125,159,169,159,149,119,118,112,97,92,133,169,190,166,144,186,176,140,124,142,149,160,166,113,25,4,1,19,5,2,16,5,5,35,7,18,4,7,25,20,14,37,20,3,6,2,15,16,13,29,13,15,15,16,20,18,31,33,17,20,14,23,40,19,11,25,15,32,16,12,31,10,1,14,34,42,6,8,14,24,26,23,31,14,39,15,10,30,7,18,24,15,29,37,10,16,30,21,17,31,19,29,8,16,26,2,32,3,36,18,20,36,33,36,1,5,48,21,51,17,18,33,20,40,18,41,43,56,42,29,37,21,24,22,19,44,4,42,32,29,29,33,7,32,28,27,20,42,10,17,55,34,32,38,48,37,28,30,43,39,47,37,29,37,53,46,53,38,68,66,48,75,45,65,60,60,25,31,62,106,57,91,51,59,87,104,116,110,105,141,127,139,135,133,119,130,149,125,144,142,123,102,101,119,114,105,138,127,133,107,120,123,108,131,128,150,135,136,130,119,132,114,102,125,133,102,152,119,151,139,128,114,123,119,132,147,113,155,127,132,120,135,131,121,109,119,138,134,134,142,118,135,131,149,127,135,117,137,133,139,151,140,137,113,111,146,115,128,128,140,150,105,129,117,129,119,120,140,131,154,157,153,162,129,113,118,129,128,120,146,126,143,154,136,128,111,115,137,140,116,115,132,115,133,122,149,143,105,111,117,101,108,106,115,99,111,112,83,110,99,97,118,115,115,148,144,137,133,130,97,90,119,137,121,128,106,77,100,114,128,101,121,103,111,125,115,118,102,115,89,83,72,92,64,57,77,87,92,111,82,76,88,98,71,80,49,73,50,57,61,60,60,70,48,44,65,45,42,57,52,58,52,97,83,70,37,49,27,47,46,36,19,78,47,43,47,48,67,82,49,35,45,64,32,33,66,40,57,47,41,57,44,43,67,11,52,51,41,48,46,65,41,44,60,45,42,42,59,26,37,54,34,66,31,42,22,73,49,61,59,31,22,42,43,48,45,59,55,56,33,46,62,41,48,49,59,29,49,34,44,34,45,60,24,47,55,40,43,48,41,33,34,51,51,72,42,30,20,41,43,36,23,36,26,44,40,59,46,61,17,30,41,27,25,56,69,21,54,45,14,34,49,33,51,27,25,33,39,63,34,43,40,26,26,35,42,29,30,44,41,39,27,32,63,44,58,46,29,60,64,45,36,30,47,39,78,70,26,47,40,46,81,63,46,66,58,68,40,54,73,67,64,74,63,61,81,108,107,111,68,63,79,88,92,129,162,132,155,127,141,113,87,112,114,129,178,165,182,166,198,203,210,193,145,74,39,24,8,23,3,27,13,38,34,29,33,1,26,23,14,32,12,25,32,42,33,41,59,31,30,22,22,32,43,17,39,45,33,55,32,59,46,67,61,60,83,58,57,53,80,74,63,50,71,98,125,127,121,137,140,151,135,168,152,166,149,151,144,114,121,124,127,111,121,131,148,119,143,107,137,147,148,174,156,164,159,141,159,172,166,132,167,160,167,165,186,158,171,167,147,139,153,156,136,176,152,169,135,166,154,145,144,160,170,150,132,159,158,167,162,158,174,176,158,171,165,183,159,157,147,147,150,147,125,145,152,134,177,134,144,144,142,161,129,183,162,133,122,97,85,102,119,152,180,167,150,173,133,151,135,156,153,147,102,15,3,18,14,13,4,10,25,31,26,12,7,11,22,11,4,9,8,7,5,14,3,16,33,34,7,19,24,36,24,27,31,9,12,32,23,45,12,0,30,21,32,9,12,18,38,30,28,7,22,24,9,15,43,12,15,16,18,9,25,31,9,13,16,11,12,29,24,22,44,13,5,14,28,20,2,19,18,14,13,39,4,10,18,32,34,4,27,28,20,5,34,12,13,43,11,49,54,34,55,38,48,23,44,38,18,48,20,49,29,27,44,30,48,23,49,20,35,25,41,55,8,42,38,41,26,38,45,18,40,51,51,38,25,29,28,69,74,72,52,32,57,60,65,87,65,119,121,72,111,69,70,72,85,104,106,108,110,99,98,106,124,125,107,115,129,117,124,111,102,94,132,146,144,129,126,102,119,136,120,135,129,125,128,121,133,112,122,149,141,137,134,106,140,112,139,152,122,151,131,132,106,142,127,113,141,141,124,139,115,104,116,168,137,145,147,137,165,132,130,158,143,129,141,143,141,171,113,141,110,118,129,114,126,139,134,110,109,143,145,117,121,140,124,131,133,131,125,143,119,136,134,146,147,141,135,150,122,132,115,144,120,120,141,141,152,142,149,122,141,120,145,145,127,173,146,141,150,148,128,171,126,132,127,131,111,129,122,127,123,127,107,84,102,113,85,74,89,92,92,99,109,137,97,105,86,80,85,95,101,102,128,111,93,128,126,81,82,91,132,115,110,127,92,86,98,106,106,85,79,101,81,99,66,95,104,109,78,82,55,107,66,105,72,68,54,83,65,53,37,29,60,55,61,75,80,68,39,44,44,62,70,57,47,37,29,61,45,50,50,61,51,54,39,60,35,66,49,67,57,59,54,29,59,59,23,44,50,76,53,50,49,44,43,46,58,49,42,49,27,40,49,77,47,44,62,40,74,68,30,60,40,54,47,43,35,59,50,71,50,31,57,45,48,50,55,52,43,41,38,32,52,47,78,42,40,52,60,28,46,58,40,56,73,49,58,36,21,44,46,33,39,34,43,68,31,33,27,50,28,36,46,30,43,48,38,33,51,43,36,30,40,16,67,27,48,20,47,58,58,35,56,25,33,30,49,46,50,20,39,62,45,48,60,18,55,37,53,24,34,39,23,39,32,32,36,56,33,47,27,37,28,18,41,42,48,66,34,40,70,65,44,37,63,42,63,43,55,50,47,73,56,49,63,63,80,64,51,66,61,95,78,103,92,84,65,98,102,121,93,117,109,139,149,124,132,148,183,178,164,110,50,40,31,27,42,38,38,23,31,10,46,14,38,34,45,51,25,12,35,26,33,47,18,29,43,55,41,45,17,53,37,32,42,41,62,42,50,57,45,69,61,73,46,60,64,55,67,54,70,120,104,117,126,125,112,119,145,140,105,143,127,152,121,166,142,153,144,168,159,138,155,153,113,141,135,176,148,165,161,135,175,174,172,152,179,164,127,149,171,160,160,155,157,157,193,140,126,161,143,145,180,140,132,161,159,136,138,150,145,168,153,116,131,151,178,169,170,189,137,149,164,154,148,153,150,143,154,157,148,145,125,154,105,141,139,146,128,121,141,156,140,153,128,119,83,103,73,97,139,146,149,156,169,158,148,163,144,147,116,26,4,17,18,23,15,19,5,21,17,8,21,0,16,28,0,19,6,3,7,42,18,12,9,21,12,32,10,39,27,15,31,11,36,27,21,37,28,39,17,30,34,28,38,19,11,43,22,34,22,8,1,21,6,50,6,18,25,20,22,14,24,32,5,20,14,18,22,34,25,17,16,28,40,6,1,33,5,37,44,24,16,23,32,26,18,19,28,14,6,30,53,48,35,27,31,32,48,28,49,49,52,29,40,45,62,29,49,18,25,33,34,36,37,36,44,16,55,44,52,58,34,37,50,35,29,33,15,42,28,25,48,48,67,48,45,46,61,61,28,59,58,42,68,75,80,91,98,86,82,89,105,131,110,115,136,143,152,126,130,116,118,118,115,113,136,108,95,92,93,106,131,128,101,121,107,95,106,104,113,115,130,130,121,139,117,157,146,137,142,155,183,162,116,125,123,125,125,129,107,134,116,135,158,114,112,146,135,123,146,155,133,131,118,126,120,132,149,130,131,137,146,133,136,138,135,121,133,129,144,152,134,125,139,130,143,129,145,153,122,142,127,117,143,151,140,127,152,139,127,153,146,179,127,151,134,126,127,127,115,170,150,151,141,144,136,120,135,134,128,129,124,117,133,114,136,150,159,162,116,161,173,149,120,147,146,127,122,116,141,123,92,93,100,91,115,108,114,83,111,96,75,90,99,83,68,89,86,92,86,93,124,118,129,120,101,82,55,95,22,56,78,83,85,81,99,100,117,84,103,101,89,95,51,63,84,74,54,75,58,55,25,27,61,32,57,22,64,32,52,69,34,85,80,56,63,52,57,66,55,81,83,70,40,71,97,73,48,64,52,61,54,69,63,63,59,52,59,39,51,56,46,49,54,50,52,37,37,69,47,34,50,40,59,50,45,44,35,49,45,66,46,22,49,73,51,70,55,59,62,36,58,55,48,56,40,22,50,50,55,41,50,46,20,34,55,57,37,28,26,53,34,47,50,29,54,49,48,50,50,45,45,39,48,68,36,16,52,26,54,23,25,40,48,41,52,57,45,48,59,44,56,28,57,25,44,33,47,47,29,44,61,53,34,44,57,32,53,54,53,39,55,36,48,61,32,22,40,46,39,46,39,40,60,40,48,29,30,36,24,33,40,28,63,27,49,74,32,50,51,44,46,64,33,62,56,58,34,62,53,31,45,49,43,68,62,70,56,64,54,57,57,56,57,57,61,61,64,61,29,35,78,77,48,49,80,94,115,95,72,70,74,106,130,71,78,65,113,142,152,116,39,13,4,2,25,37,32,26,26,16,24,29,51,9,38,56,49,41,3,9,37,14,25,47,26,48,48,31,64,33,45,31,48,28,59,55,37,64,53,38,56,89,51,58,63,40,45,64,66,82,104,106,105,119,83,92,123,110,128,124,132,124,139,156,192,144,199,173,118,172,162,155,157,148,157,203,166,151,183,173,158,147,145,161,166,156,133,157,156,150,163,143,143,135,154,133,169,147,170,146,164,167,165,136,155,154,138,149,133,137,134,123,141,141,146,152,148,140,156,162,151,152,159,145,134,164,146,146,150,145,138,155,170,155,156,155,138,116,146,146,143,139,146,143,166,107,93,119,119,104,125,152,162,161,145,164,132,156,94,7,11,30,0,5,18,14,9,15,23,8,0,20,28,28,15,23,13,42,19,6,14,21,21,38,28,17,35,28,13,34,19,52,22,38,14,4,22,21,18,23,3,16,11,18,53,33,27,38,22,5,13,36,18,34,23,34,32,25,9,24,22,12,20,35,27,30,13,0,13,40,6,8,29,39,20,3,2,24,36,19,38,23,39,10,22,16,11,21,31,13,14,25,36,62,24,35,35,6,20,18,34,3,29,22,30,34,16,22,28,51,17,39,24,42,8,39,32,65,44,31,47,45,62,31,18,14,32,44,44,27,53,30,61,15,32,51,41,32,48,50,37,51,54,73,78,74,90,65,85,58,69,123,115,124,98,109,118,135,129,159,114,107,136,135,157,129,121,111,127,128,150,135,133,132,117,111,126,138,122,115,124,100,97,131,111,115,133,129,102,119,136,135,119,126,124,118,137,144,130,112,141,126,100,124,125,124,138,137,135,119,119,114,114,112,137,137,116,137,122,113,138,151,125,120,144,136,140,120,126,122,123,130,144,142,148,136,142,127,127,112,140,114,115,142,130,125,134,120,127,141,139,167,152,154,159,141,149,124,158,138,140,142,141,153,118,136,141,127,132,134,139,133,164,146,134,148,158,139,138,155,121,133,95,121,124,137,130,143,107,121,91,92,103,118,123,112,88,86,91,113,99,131,143,103,81,108,112,92,110,106,101,106,127,108,98,119,114,94,94,106,62,105,88,69,91,110,108,77,101,67,86,40,66,82,81,41,31,58,84,71,30,53,56,89,64,71,93,54,48,85,87,76,87,65,90,69,53,71,96,91,66,94,86,68,75,80,83,102,80,59,59,51,47,53,55,70,50,65,78,69,39,41,61,32,47,99,58,62,62,31,48,68,72,41,57,54,50,37,75,57,53,52,43,49,67,69,46,68,54,52,55,32,35,40,68,66,35,29,49,44,55,53,44,83,51,49,47,55,49,8,28,82,33,48,42,47,34,32,52,58,49,50,41,38,35,31,62,22,54,35,40,20,41,42,30,24,40,59,47,8,59,54,41,49,24,12,36,37,36,28,36,42,15,27,42,52,54,41,32,54,25,44,31,58,24,22,36,39,46,16,66,38,46,54,54,22,32,32,11,42,17,20,50,11,34,18,40,36,37,29,30,28,33,49,30,57,68,69,57,54,50,61,76,61,53,54,69,55,39,58,25,54,57,74,45,72,61,74,73,63,69,84,75,61,60,101,98,104,103,105,102,103,87,92,71,58,77,115,116,68,45,30,30,35,33,51,30,13,23,49,29,29,41,16,19,37,41,24,23,22,39,66,22,34,46,24,28,17,15,19,53,47,47,67,41,38,58,29,35,59,45,31,67,35,67,36,57,67,71,82,90,113,124,133,121,126,147,121,129,96,107,130,114,164,158,168,172,162,168,149,130,138,138,142,168,149,144,169,173,170,150,162,159,147,139,160,151,157,160,163,180,185,154,147,150,152,145,155,145,146,183,161,157,152,152,136,159,126,136,158,154,159,147,148,148,156,127,147,139,145,159,146,131,139,141,156,160,154,158,166,144,139,137,147,136,169,128,140,143,151,157,132,114,157,151,148,124,122,104,98,136,127,162,150,148,164,148,152,115,0,3,17,10,35,20,35,1,29,23,3,6,16,1,27,11,19,13,37,28,0,4,36,22,33,31,30,33,23,9,18,36,27,14,33,25,23,25,54,48,7,26,43,23,31,28,12,18,13,13,12,29,15,17,13,25,18,27,22,41,22,17,24,41,4,32,35,19,32,18,10,4,15,26,46,29,18,6,49,25,36,12,11,23,28,33,42,18,19,33,36,19,29,30,9,29,21,11,33,30,32,9,11,32,21,34,19,32,40,52,52,51,33,50,32,52,53,33,23,17,29,63,26,37,46,64,56,35,64,52,46,22,35,30,48,70,30,43,55,65,54,48,41,79,62,54,58,69,73,102,91,82,95,83,94,84,85,87,99,102,153,159,148,128,102,120,152,148,121,159,136,139,114,142,127,116,145,132,138,142,153,129,141,147,137,123,122,137,129,118,127,121,124,130,141,114,168,131,133,140,162,122,137,147,148,133,129,117,134,104,141,115,130,117,133,144,131,116,122,111,125,132,120,125,110,150,146,146,148,98,130,142,131,144,134,121,142,140,144,148,142,137,143,131,130,153,144,137,130,148,149,143,161,141,133,149,148,158,143,160,151,127,113,114,118,132,155,130,114,135,144,124,137,147,152,122,124,117,124,136,136,127,129,137,116,140,107,137,106,143,171,147,159,136,122,150,114,159,123,130,138,139,143,150,129,144,131,113,133,127,104,116,105,103,74,91,116,83,100,108,117,106,99,111,85,119,82,65,109,97,95,108,96,106,79,86,83,101,80,65,52,61,62,55,94,96,116,89,90,111,95,67,81,86,88,49,69,109,78,80,75,72,78,86,94,67,65,104,98,83,101,76,48,71,68,67,64,56,80,37,63,28,56,42,83,48,69,49,62,46,46,32,65,59,58,48,45,48,34,58,75,42,60,48,72,38,63,46,60,56,30,26,37,15,52,44,44,23,40,36,52,29,54,38,58,53,61,46,48,43,52,44,31,52,64,26,39,62,31,25,49,53,39,55,50,63,51,46,23,59,15,39,52,37,51,42,45,25,50,44,47,32,35,41,53,47,66,50,32,47,36,24,28,50,42,48,45,26,40,26,47,42,38,57,29,33,23,33,38,28,43,44,61,50,45,31,66,35,29,41,24,43,47,48,40,55,60,37,24,42,45,67,20,70,57,41,36,48,44,39,64,51,54,62,55,73,48,54,60,58,65,49,58,44,37,104,50,94,99,94,86,52,105,127,105,85,88,89,97,99,100,68,87,79,88,83,66,65,52,72,70,55,19,2,51,21,20,27,36,4,15,31,27,47,51,40,38,30,27,20,31,30,25,41,42,55,15,38,22,61,17,20,36,31,48,43,29,50,36,35,57,62,65,70,39,56,52,37,60,80,85,73,91,120,125,121,139,147,123,137,150,143,105,135,135,173,148,144,140,171,139,158,175,166,148,164,154,151,140,171,155,152,174,163,173,160,179,147,141,167,165,167,167,143,168,140,160,183,168,153,164,166,157,179,164,170,161,164,178,165,162,165,163,136,140,156,136,158,138,185,188,164,159,143,129,155,143,126,170,128,137,148,149,154,165,168,160,174,138,147,123,155,159,150,129,153,140,148,153,73,68,91,116,120,158,163,189,193,157,114,12,10,11,13,0,14,7,19,31,11,11,5,4,0,15,5,9,0,33,10,12,22,14,24,30,23,15,14,31,33,27,16,22,24,12,20,8,25,9,22,30,39,22,16,19,22,4,24,3,7,30,28,24,50,31,14,36,22,37,17,15,14,36,15,13,30,22,13,30,26,25,35,23,18,30,35,14,14,37,34,9,5,35,15,43,10,12,28,26,48,26,23,4,20,8,45,7,18,46,37,21,20,47,29,10,42,25,51,42,23,23,30,27,10,43,53,47,45,61,31,23,36,30,26,32,43,47,63,58,55,53,41,29,63,59,67,64,37,36,64,30,56,70,49,84,71,84,83,69,66,99,139,102,92,100,77,80,81,81,107,90,121,117,137,126,146,125,107,158,95,124,128,129,115,130,140,125,141,152,175,123,143,138,125,122,95,112,156,96,134,117,140,131,116,107,123,137,138,141,128,157,148,142,158,124,147,134,150,134,127,130,95,136,133,168,169,142,140,143,165,130,126,111,111,143,143,134,148,119,136,139,132,145,145,140,155,139,129,132,160,160,113,141,146,156,137,115,135,134,118,125,135,131,109,119,139,150,137,130,126,115,90,92,82,124,115,122,134,125,114,121,125,116,126,116,116,100,111,165,123,129,114,126,126,145,126,98,105,129,113,140,152,141,151,117,108,137,111,125,116,116,124,155,110,120,131,110,150,93,77,90,114,81,72,80,73,48,87,94,93,72,115,73,100,75,76,115,74,123,90,103,115,135,89,50,73,90,58,106,74,72,57,48,77,61,102,90,80,56,87,94,28,69,44,57,62,42,61,63,50,61,56,68,66,84,69,70,80,75,82,39,66,73,61,76,72,81,58,45,65,93,75,48,55,31,47,33,50,43,45,73,68,46,30,74,62,49,48,33,45,53,45,42,67,22,60,46,31,49,56,34,55,51,35,50,58,80,39,42,58,62,45,40,46,13,29,41,36,37,14,54,24,68,49,55,50,39,53,32,45,46,40,36,42,66,41,59,20,31,37,50,59,31,51,32,34,53,36,40,33,38,45,19,37,23,45,21,25,33,22,36,40,53,16,34,48,27,44,29,46,43,47,26,61,22,45,40,40,60,50,26,58,65,62,15,34,31,31,36,47,32,57,29,32,63,43,32,30,40,58,32,42,52,37,58,33,48,31,62,51,30,34,64,53,55,34,46,85,73,48,62,30,58,81,71,61,80,69,87,76,59,55,80,67,77,69,50,78,56,82,71,69,57,66,62,71,51,47,54,69,68,64,51,30,35,30,12,23,8,16,31,52,32,30,40,18,20,41,20,59,29,37,22,44,41,24,59,24,55,32,45,56,46,33,38,57,48,47,52,46,45,54,64,63,82,36,75,57,47,91,70,62,88,84,113,113,84,95,121,143,144,124,133,139,145,123,146,165,135,117,134,138,166,175,150,136,150,144,156,159,130,164,150,151,178,167,136,156,149,116,154,160,139,154,165,136,145,159,147,158,144,154,134,178,153,156,150,170,165,144,128,147,121,134,160,140,171,149,172,143,155,143,169,165,153,195,171,157,132,145,158,148,139,162,135,151,159,158,137,170,153,160,130,148,152,171,165,132,123,105,93,67,77,111,141,170,154,143,160,110,11,15,34,7,21,11,26,5,19,0,21,24,12,5,19,5,13,4,10,23,3,19,9,8,9,39,50,38,23,30,15,16,21,26,27,33,24,28,24,27,14,9,26,43,4,11,22,28,23,17,5,9,23,38,24,33,23,15,16,22,36,0,11,31,11,31,48,10,12,13,13,18,17,42,26,16,14,47,26,4,28,4,34,39,14,31,19,13,19,19,31,22,26,39,48,3,20,20,43,46,46,48,11,9,39,35,27,12,28,38,14,12,22,40,49,58,35,18,62,45,27,42,80,44,60,55,72,73,24,61,46,67,64,38,41,43,37,57,97,57,75,90,95,100,106,100,99,124,64,82,74,120,79,89,98,72,83,71,87,61,121,111,136,120,142,141,143,120,133,124,127,124,140,113,127,135,128,113,140,131,123,100,126,115,124,113,106,128,124,148,122,154,123,114,136,141,122,125,118,100,140,119,131,152,134,129,152,134,136,156,132,117,146,134,146,124,139,133,121,131,121,123,117,150,132,119,114,120,146,113,168,132,139,132,157,142,155,142,144,144,122,124,134,135,117,125,131,134,122,130,112,122,121,137,114,102,133,125,138,118,134,105,137,108,123,128,109,109,121,110,132,140,152,147,137,135,129,98,112,103,111,102,114,89,136,109,105,112,125,124,147,125,123,85,120,103,125,134,105,127,147,136,125,129,132,96,85,95,121,106,83,99,116,80,79,82,97,90,104,72,78,89,86,81,74,77,74,36,65,32,54,78,75,104,65,95,62,60,53,46,52,61,62,63,56,63,69,71,90,55,51,87,70,43,81,65,59,54,52,71,65,49,80,39,51,50,62,45,72,68,63,50,69,49,53,43,48,41,64,59,82,61,50,69,37,66,46,58,71,32,51,43,59,34,67,55,68,65,59,20,35,46,50,48,47,34,29,57,57,55,64,57,53,49,48,61,57,66,32,49,40,62,46,60,47,63,42,49,51,76,56,54,51,41,44,75,37,42,57,64,34,50,32,43,59,35,54,51,59,38,40,60,44,32,34,57,35,48,51,41,64,60,51,54,52,1,58,44,66,38,30,43,36,74,73,48,56,30,38,25,49,37,47,57,38,57,50,58,52,74,44,47,14,10,29,29,33,64,36,37,47,44,42,53,48,30,34,41,34,35,47,32,54,68,34,36,34,32,45,51,65,45,72,71,64,70,68,60,70,54,59,48,40,48,72,74,70,44,64,43,72,69,63,71,89,75,81,55,66,63,56,53,62,42,69,74,62,66,53,62,32,27,37,38,44,28,33,51,44,29,40,26,54,29,24,5,31,34,26,56,52,34,29,22,28,39,7,69,31,32,47,52,60,54,48,54,68,30,58,58,57,38,52,70,66,68,59,52,77,72,95,101,65,51,71,84,90,89,95,116,137,106,125,149,132,149,134,144,153,144,199,155,133,181,151,144,143,150,128,127,129,121,153,132,145,144,126,166,152,153,139,160,161,146,161,167,156,162,170,154,158,145,147,178,150,179,158,153,143,159,157,133,145,138,173,180,141,140,143,155,147,168,164,137,160,170,170,148,153,137,151,153,142,165,149,180,141,145,135,132,144,145,137,140,127,141,121,155,175,137,136,103,89,76,95,129,145,162,163,106,26,0,9,37,16,10,2,12,16,2,14,32,19,5,7,15,13,12,42,12,20,0,3,9,17,30,48,1,28,26,18,7,11,52,36,22,31,9,29,27,7,32,5,11,10,28,13,36,23,23,11,34,30,18,23,11,39,26,2,11,12,17,38,5,15,17,11,11,4,10,9,8,15,27,24,35,35,37,5,37,18,26,2,19,23,19,40,32,12,26,33,39,46,30,24,45,13,49,26,18,25,42,35,27,36,11,33,36,36,35,27,28,5,35,19,57,53,35,28,52,46,39,29,35,65,59,58,72,56,35,54,33,77,49,53,49,35,72,68,74,79,94,66,77,88,96,71,79,80,56,99,80,67,88,64,88,94,110,117,105,108,131,123,118,152,134,126,148,128,131,132,107,126,138,140,101,120,128,119,117,132,125,155,136,104,153,133,130,119,136,164,126,160,144,143,124,101,125,144,113,120,144,143,143,101,111,105,129,124,162,127,133,133,127,134,126,125,87,132,127,131,126,97,134,132,133,133,162,127,134,142,142,135,143,119,131,135,124,144,139,126,130,131,143,129,118,112,152,153,139,112,132,165,135,155,139,115,135,151,150,167,134,134,130,155,121,132,120,127,128,117,125,133,129,156,115,123,134,120,116,121,107,147,134,121,119,130,137,130,130,156,117,128,115,104,129,141,141,122,99,137,115,139,138,144,107,89,125,129,118,123,105,130,153,115,123,154,126,114,84,101,80,97,103,87,59,79,89,63,75,63,95,86,86,83,64,73,83,74,100,100,102,118,97,81,86,96,67,56,102,85,96,54,31,73,61,67,65,81,73,84,56,51,76,52,79,40,53,46,54,56,59,41,78,57,63,29,59,47,66,60,59,51,61,46,58,69,55,45,66,46,49,56,56,77,43,42,74,48,24,24,55,49,46,54,27,55,59,56,32,62,41,52,58,58,40,52,61,42,35,48,61,39,49,26,44,36,45,32,25,69,47,57,16,52,42,44,41,66,46,46,44,34,28,59,52,63,39,42,48,58,51,36,53,49,58,55,66,50,57,26,26,43,58,25,24,52,30,73,21,54,30,52,48,46,57,31,16,48,66,18,48,40,32,41,29,49,56,48,30,26,59,29,44,36,33,39,47,38,58,23,47,54,37,40,24,38,41,78,43,61,15,49,26,43,37,52,40,37,39,33,53,57,62,50,45,47,81,70,48,37,56,52,39,49,51,69,86,46,43,64,79,88,45,86,88,55,95,97,107,108,93,79,58,45,75,59,54,24,75,28,23,45,47,9,24,12,39,31,26,48,20,21,61,46,52,31,35,60,53,52,43,29,19,54,41,36,40,32,42,21,34,33,60,40,52,45,63,38,37,68,59,61,49,62,22,46,53,77,69,88,77,52,56,51,92,82,92,97,125,136,144,132,162,121,132,162,171,151,157,148,171,146,181,173,177,133,156,166,170,186,140,151,149,157,148,150,126,157,115,167,183,162,116,125,148,171,162,157,151,165,148,158,163,131,154,159,134,109,147,169,168,150,166,166,137,144,142,149,174,145,149,139,142,145,138,158,160,158,156,159,139,173,165,161,154,142,146,131,155,144,130,137,160,146,133,136,142,142,148,144,120,78,84,78,106,123,127,164,102,16,18,15,11,12,11,11,18,12,35,20,21,13,4,20,33,15,9,10,20,3,25,11,25,38,32,34,5,48,16,40,18,11,36,11,30,7,41,18,41,30,11,23,15,31,32,25,33,9,29,36,6,10,28,32,38,35,14,6,27,10,19,7,11,6,17,20,14,11,9,14,27,10,18,38,36,12,9,29,17,16,17,44,23,34,44,38,14,41,22,19,36,30,22,64,33,17,33,20,49,34,35,25,26,28,28,12,55,15,9,39,61,3,39,44,29,16,33,51,40,58,24,24,51,45,46,77,34,76,84,71,71,82,84,73,91,73,87,90,76,90,79,95,103,66,90,57,70,55,95,71,82,92,107,135,104,134,126,111,113,132,137,132,132,122,129,118,98,125,132,134,127,110,135,122,117,148,153,167,152,144,170,152,139,152,153,147,147,125,107,156,129,125,133,127,136,141,129,145,134,143,136,162,142,151,134,118,123,147,142,126,141,149,115,124,144,104,137,124,166,149,128,158,159,145,149,153,170,127,120,154,131,127,114,103,142,101,129,134,160,124,146,120,109,147,118,126,87,110,124,137,134,145,108,118,123,137,128,150,149,164,163,133,130,137,146,123,143,146,146,133,128,138,122,144,138,121,104,128,134,124,120,138,125,123,136,143,181,118,116,137,138,123,149,138,130,160,101,110,107,105,113,140,143,138,140,144,170,148,165,147,142,151,129,125,143,151,128,134,112,117,83,132,111,133,105,119,110,98,64,70,104,86,113,86,69,76,70,96,95,121,103,70,93,76,67,53,88,47,86,37,61,75,69,55,85,83,64,35,60,74,80,69,78,44,59,95,51,50,58,65,39,53,42,55,62,56,31,62,38,63,34,61,55,56,64,43,34,64,46,26,60,53,43,59,49,48,23,40,30,36,38,53,43,29,74,48,61,67,35,35,44,44,48,49,29,53,42,52,55,59,53,73,57,42,71,59,45,53,36,43,46,42,67,51,55,47,36,13,54,26,28,46,53,30,30,43,35,37,37,37,43,70,49,42,56,38,31,50,47,28,29,33,36,37,13,35,37,58,46,25,54,56,53,44,36,79,45,36,56,35,39,66,46,75,46,62,40,33,45,34,33,37,40,49,30,59,51,25,31,40,60,55,50,29,41,14,17,50,62,46,65,66,35,37,33,49,43,82,67,38,45,50,29,42,33,46,38,41,38,54,44,43,60,38,46,42,69,52,65,75,72,55,72,87,76,86,93,72,106,113,89,63,58,30,62,41,49,46,74,60,53,52,25,32,39,44,44,25,38,43,38,37,49,30,37,13,29,32,23,35,4,35,26,36,56,40,52,38,32,44,30,40,59,50,47,42,57,47,51,70,35,62,51,44,66,59,86,67,50,52,60,50,55,55,107,105,104,118,126,151,141,155,184,168,121,181,139,181,168,165,144,170,171,168,159,163,169,160,180,152,191,163,178,148,138,166,147,164,191,157,176,168,131,132,155,170,169,182,156,150,156,178,173,160,189,153,176,162,153,148,172,152,159,148,142,145,165,171,150,156,113,123,141,139,157,173,163,149,148,139,181,119,176,140,168,138,181,161,155,166,170,138,140,139,134,120,177,150,162,166,172,159,121,75,81,114,82,126,119,38,19,14,16,7,23,23,2,25,44,13,20,27,12,4,31,13,14,33,7,16,15,41,25,4,9,46,29,33,14,14,18,49,34,24,34,19,22,32,18,5,16,21,14,28,20,9,30,5,11,31,14,22,35,17,31,11,16,17,4,8,26,18,22,47,24,16,15,20,9,42,25,30,46,13,7,18,19,34,33,17,10,33,29,31,11,39,43,25,34,25,29,17,30,20,5,22,36,30,38,45,64,45,33,33,28,33,33,48,30,51,66,27,35,40,29,44,54,55,48,50,29,50,24,56,45,73,67,94,103,93,99,107,122,88,112,71,101,76,81,71,57,115,68,133,94,96,84,104,123,115,118,137,138,109,116,121,132,138,131,109,138,127,102,130,107,118,128,93,134,128,137,121,114,126,117,157,167,126,154,137,143,123,137,116,117,129,125,136,140,132,132,119,146,131,164,128,133,141,137,137,125,161,161,159,162,167,148,155,139,141,138,144,140,140,141,142,158,157,123,162,152,155,157,169,151,137,132,145,137,129,157,122,146,114,113,109,96,123,115,122,131,140,157,129,113,137,103,158,110,138,150,149,118,142,150,142,147,137,155,140,155,128,141,153,148,129,148,141,126,152,165,147,159,133,114,148,131,148,159,131,112,118,127,99,156,134,126,131,112,105,101,140,130,131,116,148,127,125,114,154,155,137,115,106,139,153,159,114,118,132,113,107,140,135,85,88,118,125,79,88,98,66,62,79,114,93,69,112,99,59,56,92,59,74,94,82,89,79,76,49,72,61,28,50,43,46,60,51,65,49,63,44,51,84,68,60,66,58,57,60,71,64,71,65,72,73,44,65,74,80,71,57,55,66,61,19,64,70,87,50,51,54,65,22,50,28,64,65,44,49,56,32,39,57,43,20,55,33,45,50,47,46,91,50,28,62,67,68,67,39,32,49,47,56,34,38,52,87,22,50,57,47,29,50,33,38,51,46,38,63,43,55,42,41,48,42,58,32,43,52,57,36,57,52,67,50,36,34,38,24,49,41,56,42,49,45,35,46,34,46,46,31,26,46,55,53,47,43,20,33,52,60,55,29,51,21,41,74,52,59,39,33,56,63,54,59,56,13,36,44,19,37,31,22,39,43,12,62,12,16,51,46,28,29,29,37,40,47,48,52,41,52,46,43,40,44,44,39,38,57,34,46,34,33,33,49,50,68,46,37,69,39,62,58,67,57,71,60,60,52,67,70,69,75,76,59,100,32,76,42,78,58,67,81,48,52,63,35,54,39,57,34,51,49,18,25,42,55,45,31,45,33,24,14,7,60,50,28,32,51,31,39,45,17,46,32,24,23,82,40,19,25,41,61,8,29,32,62,32,53,68,51,48,67,60,43,63,40,41,77,32,51,44,53,69,83,70,88,124,149,135,154,173,150,145,140,144,173,168,162,140,165,185,187,164,159,167,170,190,185,176,166,155,153,153,160,148,143,177,165,156,168,144,149,140,157,158,157,152,161,154,135,145,186,141,176,145,140,153,130,172,164,156,160,160,164,163,143,153,166,122,151,152,130,137,147,156,148,140,143,159,141,129,137,157,152,163,158,163,164,154,153,159,126,158,122,124,160,111,149,168,168,132,117,79,99,102,119,103,29,14,17,13,35,12,1,35,18,15,13,2,20,20,19,0,1,17,9,7,19,13,3,22,26,36,33,22,35,15,28,19,24,26,32,29,51,24,23,37,12,28,6,13,15,6,31,32,35,15,9,15,23,8,21,20,26,45,20,18,56,48,19,18,13,24,20,36,25,2,12,20,46,46,34,20,23,37,7,27,21,33,33,39,32,39,5,28,45,26,16,21,12,19,36,44,41,38,30,33,7,42,25,51,36,38,31,13,58,19,11,23,23,44,43,53,50,52,50,31,39,75,39,52,50,58,55,80,71,72,71,79,86,80,65,97,73,74,84,48,62,34,62,98,91,94,98,143,105,95,120,157,138,141,115,143,128,125,124,124,138,128,116,141,148,123,134,145,151,136,108,154,126,149,156,164,130,112,135,130,127,105,104,142,109,113,122,128,141,126,130,135,119,141,114,111,132,141,140,139,127,133,162,134,138,122,152,132,157,147,150,138,122,136,109,156,137,138,122,154,154,148,152,155,168,153,125,130,142,94,109,126,134,135,133,133,114,147,120,150,130,160,136,110,163,161,139,159,159,132,135,131,155,166,158,150,125,107,137,134,137,154,165,110,123,140,131,119,144,147,137,140,159,117,142,137,168,190,161,121,131,114,137,140,136,137,156,157,160,168,130,160,133,101,107,102,91,98,140,125,115,108,118,74,94,132,120,104,67,94,122,96,111,99,89,89,74,81,86,93,83,91,70,44,78,91,120,100,114,76,80,89,68,61,85,56,74,80,94,92,74,63,81,60,61,40,53,31,60,59,45,53,47,38,64,90,54,50,45,86,26,72,70,58,43,41,37,71,58,38,85,58,49,79,56,71,51,75,64,56,48,58,54,47,56,49,58,63,66,60,65,65,40,66,43,74,54,46,56,59,47,53,50,33,59,56,47,74,46,56,59,49,64,18,65,55,52,28,59,42,56,54,64,68,48,71,47,24,47,67,44,62,26,57,60,23,69,28,32,43,46,51,31,26,48,16,42,49,34,69,51,47,48,22,26,42,64,29,40,41,47,53,42,47,30,15,38,19,27,54,48,28,26,37,29,45,54,27,41,27,36,36,40,35,13,32,48,24,40,49,39,40,25,68,41,51,27,46,22,51,53,28,55,34,46,43,62,36,54,40,44,45,55,52,37,43,55,46,53,58,61,39,56,31,37,47,39,61,73,62,58,60,43,54,46,54,59,70,73,51,92,70,76,102,106,74,58,42,48,56,55,64,38,68,68,75,40,61,42,37,56,52,65,46,49,29,30,50,56,37,27,50,29,44,44,21,11,40,51,50,26,38,25,50,40,14,53,53,28,52,48,41,42,65,70,47,38,45,54,22,63,70,60,47,27,62,52,46,61,50,89,52,83,77,64,63,62,49,64,91,112,82,108,122,145,137,136,119,161,160,129,128,145,148,175,147,123,160,134,188,166,161,167,169,164,191,151,148,158,141,157,172,145,159,142,124,150,157,160,159,148,145,166,168,136,140,143,114,139,130,168,160,149,181,175,156,168,160,145,163,171,155,164,163,140,160,141,178,160,165,154,170,156,163,138,130,147,153,144,126,163,148,155,147,136,137,143,143,155,156,153,143,173,144,130,122,116,78,93,81,16,21,32,0,0,9,7,26,6,12,17,18,22,6,18,18,7,8,19,6,14,17,23,14,27,31,11,39,48,25,20,15,38,29,23,31,11,51,31,32,13,30,20,18,11,21,13,12,6,24,34,9,43,13,27,18,43,3,29,36,35,20,29,16,26,34,25,29,30,12,28,29,21,25,40,31,20,26,25,39,23,25,11,34,30,22,20,40,49,49,32,11,38,40,17,40,24,30,16,38,52,24,29,40,45,35,35,48,40,48,32,40,22,25,29,34,47,57,47,67,41,32,63,24,40,52,57,47,46,54,48,28,36,40,79,53,85,47,28,55,15,48,54,71,58,90,104,86,109,137,130,118,138,126,139,144,167,164,148,162,141,169,154,149,129,124,138,141,126,124,152,145,158,154,141,138,149,126,134,131,122,98,113,112,141,142,121,135,131,160,161,150,164,167,140,149,140,144,114,141,128,108,124,125,128,109,159,142,123,131,150,131,143,125,151,126,113,130,101,140,141,108,141,141,113,144,141,134,145,171,134,142,143,140,157,134,114,158,147,154,141,141,122,134,132,143,141,167,158,143,138,130,102,124,119,132,139,136,146,151,145,161,134,134,107,115,110,135,135,129,178,120,142,125,114,134,143,163,148,151,162,164,151,144,146,112,156,132,138,118,149,158,130,129,126,101,117,110,113,112,83,112,126,115,92,92,84,84,60,73,104,69,118,129,99,103,89,92,95,59,107,99,121,97,92,90,90,86,111,95,101,97,93,13,56,49,68,53,86,80,69,65,73,82,120,45,92,78,94,77,73,65,73,61,46,69,39,58,68,54,68,57,53,41,43,31,66,51,76,37,60,57,41,46,57,55,47,70,59,60,55,56,52,56,32,39,39,56,46,54,70,52,39,62,46,40,65,54,25,57,48,65,44,62,69,42,28,49,67,44,66,61,49,47,38,75,66,57,62,68,64,54,40,47,38,56,36,30,43,19,50,39,41,36,46,43,60,52,51,31,40,53,45,55,51,17,53,18,35,47,53,43,54,54,45,27,29,18,19,49,50,13,36,29,55,46,34,59,47,49,45,47,45,47,41,58,28,58,44,23,55,33,45,64,53,37,23,45,28,54,56,20,58,52,54,25,42,51,66,31,62,53,32,34,29,38,67,33,52,23,44,37,50,43,48,44,50,45,57,52,58,42,52,44,50,49,58,58,58,67,52,21,37,46,60,46,43,44,49,51,38,69,53,59,68,60,59,47,73,41,73,70,88,69,71,53,55,44,44,39,33,53,47,29,29,25,17,56,46,20,34,31,30,61,51,56,25,62,39,31,45,44,42,31,25,31,31,55,29,41,37,42,50,46,47,51,53,57,47,55,48,28,58,39,71,52,39,61,60,54,84,60,90,62,75,74,52,73,92,107,140,106,125,105,126,130,121,130,159,178,150,138,127,138,169,216,132,150,177,167,145,151,164,161,155,156,138,151,168,161,161,144,147,133,143,156,167,140,129,156,132,140,156,157,122,121,134,156,149,172,193,179,158,150,153,132,173,161,154,178,170,173,130,136,138,163,121,165,132,147,152,186,164,182,126,156,144,154,172,157,156,166,158,127,161,131,135,141,153,157,156,133,163,145,146,143,118,100,76,65,25,11,40,0,11,37,46,12,2,24,7,27,22,31,20,12,24,5,17,8,27,26,11,9,17,16,15,20,43,8,17,11,30,31,28,43,33,37,15,21,28,21,29,48,19,7,9,14,29,30,10,22,25,21,18,31,43,19,31,11,38,38,32,43,22,32,17,43,22,23,37,27,32,8,18,25,23,32,28,10,21,20,36,23,22,33,7,50,24,45,30,47,42,31,34,45,38,30,27,34,22,36,51,24,48,50,35,33,49,69,16,18,31,25,50,42,54,55,58,65,35,34,75,41,50,47,60,44,62,58,44,27,42,40,59,76,70,55,57,41,75,65,41,80,45,85,94,130,136,108,102,99,135,95,120,137,156,147,191,155,148,113,144,117,98,133,142,143,124,143,143,134,125,143,156,117,121,123,139,150,127,155,159,121,145,152,134,136,103,132,131,127,133,154,156,145,138,143,150,139,142,120,131,130,135,146,127,123,128,131,131,135,138,132,111,134,153,111,118,126,130,121,133,131,129,158,151,128,171,152,135,135,148,157,149,153,151,157,139,133,128,140,134,126,145,122,158,165,144,133,113,132,142,143,151,180,140,111,127,131,137,137,123,117,135,120,126,118,115,117,126,168,126,137,138,146,103,81,129,105,115,120,100,115,102,92,100,94,94,103,80,101,100,142,131,129,117,129,137,124,131,149,141,143,122,139,113,106,106,92,120,121,137,99,101,94,122,146,111,100,110,136,129,130,114,80,74,75,49,44,76,91,71,57,93,60,65,74,62,54,69,62,67,57,98,104,88,89,93,58,72,75,63,76,72,84,77,78,76,44,28,68,76,65,57,72,68,72,52,55,74,40,37,71,60,56,45,44,51,21,38,42,36,41,72,35,44,55,30,42,43,65,29,42,53,38,54,81,56,65,61,48,36,67,56,45,62,37,70,49,52,65,54,58,48,44,57,34,22,47,45,54,44,63,33,34,59,56,59,51,25,56,29,55,53,62,57,42,50,18,51,40,23,35,31,48,48,26,53,47,45,35,30,39,22,44,36,40,40,64,40,73,48,62,36,43,27,33,39,56,50,66,40,41,47,44,38,50,35,49,47,48,36,45,28,27,12,46,38,38,29,35,38,62,55,43,5,45,44,47,74,41,42,40,47,47,42,21,42,66,35,72,28,32,54,60,65,51,66,58,38,41,52,67,72,41,63,50,41,32,34,35,43,56,23,56,48,51,57,54,40,49,66,44,43,43,59,58,54,36,87,57,49,77,78,64,67,59,30,55,66,21,28,37,26,53,32,37,30,30,25,34,37,30,27,36,40,30,52,36,64,26,47,38,44,32,50,46,11,21,57,43,43,40,31,43,64,53,37,33,54,19,58,69,50,55,66,51,57,77,87,86,70,94,95,89,102,104,109,142,133,119,132,128,164,135,158,156,160,172,182,167,153,163,161,195,151,171,155,150,155,162,138,144,150,163,150,130,167,155,164,129,165,127,171,138,173,142,164,133,180,165,162,164,161,167,164,173,168,155,166,162,176,169,163,163,138,166,166,167,144,153,160,167,141,134,164,150,145,153,164,154,121,136,141,129,139,153,140,155,149,137,136,120,153,134,147,134,143,134,149,151,123,134,152,136,146,116,82,80,30,5,27,6,26,17,10,1,11,33,14,9,33,0,22,13,1,14,39,5,33,32,22,21,34,19,22,21,45,37,8,18,31,19,8,34,32,6,31,28,22,21,17,40,41,15,16,14,32,14,22,14,13,19,29,26,22,9,17,26,33,26,40,18,43,24,3,15,33,40,29,15,48,21,32,30,50,31,41,23,43,31,23,21,29,29,9,20,29,9,31,11,22,42,51,36,47,41,47,57,26,26,18,54,58,15,32,78,57,54,65,55,87,57,52,54,67,91,99,83,74,85,102,118,94,111,157,133,138,126,135,140,113,107,113,107,82,63,91,63,66,97,110,95,98,87,88,97,74,86,79,90,99,108,93,83,75,97,110,97,108,113,127,143,123,119,127,146,131,125,138,151,159,155,156,161,122,113,145,150,137,139,130,137,141,173,156,132,146,145,141,153,120,155,142,160,159,158,154,161,121,126,138,126,125,142,143,138,136,169,166,134,152,126,146,163,170,158,146,164,154,140,155,154,143,152,192,152,142,156,170,132,130,128,119,120,89,123,111,125,141,137,122,144,130,132,162,150,136,116,111,145,165,150,142,163,158,163,147,141,117,96,107,156,138,191,156,155,175,165,115,108,122,129,147,163,165,129,161,179,154,170,131,151,145,176,149,145,139,149,148,148,135,107,132,122,133,128,132,149,127,120,120,112,130,136,143,98,93,127,158,123,117,145,151,114,101,151,144,143,122,98,97,126,70,97,118,115,170,130,94,58,57,91,105,68,98,112,113,90,85,104,100,88,82,61,78,50,44,61,46,65,78,80,72,76,73,64,71,47,92,93,85,90,82,52,96,75,85,51,98,64,58,63,55,79,56,84,42,27,73,51,44,46,56,39,54,62,58,26,65,62,22,42,38,30,45,70,18,32,17,37,78,32,61,58,48,60,45,25,54,37,41,55,46,75,57,27,54,45,42,64,43,43,59,69,46,28,60,77,30,40,33,59,32,36,32,54,50,50,41,32,49,34,32,65,50,53,29,41,39,37,64,49,21,39,47,49,55,53,58,25,64,44,51,46,50,53,42,38,37,41,45,14,64,70,43,40,27,30,46,52,66,29,46,37,40,59,44,36,28,46,45,32,36,25,10,51,70,31,36,55,46,36,20,36,40,29,52,41,65,29,44,59,57,57,37,35,76,32,25,54,62,52,59,62,39,42,45,58,54,44,31,52,56,30,37,29,29,49,42,32,50,52,44,32,75,45,48,44,46,68,47,53,66,58,48,41,78,43,85,50,32,67,47,40,66,42,43,36,27,39,57,25,51,31,20,23,49,50,53,48,35,39,40,29,76,41,66,18,33,28,58,47,56,11,29,42,48,42,52,40,16,55,57,46,53,72,67,75,70,56,49,60,92,75,97,124,116,109,100,132,135,134,135,147,141,166,176,172,175,156,157,149,134,152,175,173,177,170,158,166,156,163,181,161,162,151,164,178,166,156,178,155,163,153,161,163,142,153,134,170,150,138,151,144,153,148,160,129,158,141,166,163,139,182,145,157,173,151,124,154,151,142,156,143,134,161,144,182,143,161,153,144,158,146,149,144,150,155,148,125,157,142,148,149,129,148,121,129,113,135,141,176,159,160,159,161,130,132,29,2,28,21,8,33,7,21,25,31,12,11,19,8,10,9,3,9,31,15,23,13,46,13,7,23,31,26,14,15,33,33,23,13,32,20,33,20,23,33,25,21,19,28,5,14,24,31,28,36,22,14,20,32,28,42,16,7,20,24,16,32,53,36,11,29,15,10,11,42,16,26,40,11,26,4,12,24,38,6,26,44,54,12,20,22,24,24,31,52,45,36,47,24,48,48,41,41,27,24,61,38,14,45,57,61,51,46,31,43,72,62,58,82,29,64,61,60,75,73,81,57,91,104,99,121,123,129,134,114,122,115,120,116,102,115,85,80,63,78,75,109,96,118,94,90,96,83,106,89,101,105,106,100,107,70,101,90,91,113,119,108,117,162,154,135,149,136,140,130,159,121,141,157,126,150,142,117,115,148,138,139,156,142,134,136,164,121,142,102,118,130,165,132,149,137,143,149,143,134,126,117,135,114,144,144,150,119,128,152,167,136,146,156,155,158,156,141,139,155,151,142,150,141,146,155,178,161,171,157,152,139,146,122,138,128,113,137,126,144,130,151,125,148,128,146,145,149,104,104,146,140,137,182,147,140,170,174,140,152,122,117,133,142,145,151,141,159,123,174,126,122,139,147,144,166,135,130,137,177,163,133,148,121,146,158,136,139,159,137,158,153,128,115,123,123,138,145,148,113,105,100,92,118,121,121,124,119,111,131,112,142,138,135,94,129,110,104,120,134,142,127,112,94,95,99,127,124,136,163,111,77,84,80,96,87,132,106,103,94,102,113,93,77,88,84,79,36,54,65,74,72,65,55,51,65,48,47,59,72,77,86,86,62,93,55,82,83,95,79,64,89,73,48,70,78,20,47,37,37,55,54,38,72,46,42,48,45,48,44,59,47,62,53,42,34,58,50,62,58,40,42,78,67,54,39,35,55,47,35,42,36,37,43,45,57,45,67,67,60,30,40,61,47,65,48,55,46,51,77,49,58,48,70,28,49,52,34,48,54,45,44,46,31,46,56,28,35,56,54,42,39,57,66,38,45,32,32,60,69,23,45,50,47,36,57,47,40,67,30,39,38,41,53,52,49,40,46,17,26,41,49,50,42,84,47,26,48,44,34,46,44,70,38,39,56,44,54,61,62,40,72,44,35,42,36,53,32,19,26,13,34,65,79,37,45,37,20,40,38,49,55,46,19,50,41,36,30,46,43,49,57,41,54,24,62,48,27,52,32,65,49,53,46,81,46,40,39,46,53,53,56,69,31,66,51,44,27,52,53,67,49,30,42,46,42,36,20,39,29,50,38,50,21,29,44,20,46,35,24,38,54,34,51,43,38,27,15,37,19,51,57,52,48,57,30,48,81,42,35,70,41,43,62,25,42,58,76,52,77,79,58,84,82,73,84,88,128,82,123,105,130,164,131,149,161,161,158,170,153,166,156,190,149,162,158,168,148,164,163,140,160,151,156,168,158,133,152,171,180,169,169,154,159,157,149,143,175,141,149,134,182,156,155,154,148,146,173,172,160,141,125,129,147,139,160,150,160,174,161,153,151,137,125,151,169,158,130,160,142,127,136,136,145,130,154,137,144,154,154,143,125,111,121,136,156,145,145,115,137,138,174,137,138,132,144,138,146,165,100,25,5,1,19,8,1,5,16,17,17,39,18,25,31,19,27,13,19,21,31,17,25,19,10,8,32,27,1,26,29,29,21,31,24,16,31,17,41,27,15,22,37,23,43,23,13,26,33,16,18,14,30,13,10,4,30,16,42,21,42,21,4,4,24,14,11,31,18,43,27,21,50,38,27,58,23,43,39,67,48,39,35,25,48,22,34,27,22,40,35,19,18,21,16,14,50,36,46,34,42,22,35,43,34,73,56,49,57,57,65,57,71,70,72,57,56,76,100,89,89,91,92,118,128,98,109,121,92,135,115,109,107,123,99,114,94,158,104,124,102,150,106,110,121,124,135,131,143,149,132,139,151,139,108,126,114,125,133,120,163,169,139,125,151,152,148,158,151,162,149,165,165,151,139,132,137,139,132,158,136,123,138,153,151,145,137,143,144,126,142,138,151,128,144,130,132,154,113,141,139,147,148,130,143,160,157,118,143,142,144,140,131,127,127,134,152,142,101,148,125,136,139,137,153,153,130,144,127,134,152,135,126,151,161,158,121,139,140,149,168,173,159,144,150,172,176,132,149,131,146,157,146,148,150,150,128,139,151,152,155,146,159,143,119,143,158,141,113,139,135,134,133,126,108,85,116,119,124,121,112,122,131,124,132,126,112,124,130,127,135,124,27,4,20,12,23,20,7,5,17,4,28,8,19,4,0,31,20,16,5,5,25,28,18,1,15,35,40,25,31,8,33,4,1,22,10,16,33,11,3,3,9,7,28,14,1,11,14,15,12,16,28,23,6,11,21,7,22,24,20,5,14,5,9,6,11,10,24,15,37,21,21,26,35,18,33,12,0,25,21,3,26,26,34,11,22,4,29,27,13,11,17,29,7,8,15,20,11,19,36,21,26,15,41,38,27,46,5,14,23,18,14,11,7,24,12,13,26,18,40,34,11,24,22,20,4,31,10,23,15,18,9,9,25,18,14,31,26,11,14,16,11,17,17,9,14,10,10,1,16,11,2,16,4,29,3,3,16,29,45,14,15,36,13,27,26,29,11,3,15,13,22,11,39,1,14,3,16,6,17,7,43,18,25,0,20,24,17,7,18,5,25,2,4,11,14,30,15,8,38,28,23,7,16,42,42,7,10,23,25,6,11,16,30,14,47,36,17,28,18,15,41,21,26,16,24,34,23,23,14,15,4,15,37,13,10,23,10,0,9,25,7,17,5,7,27,15,5,15,6,4,27,6,13,36,8,12,18,2,1,14,15,13,46,8,31,35,21,23,19,10,1,32,23,17,3,18,42,28,11,3,23,18,10,45,0,15,12,37,11,3,5,31,34,26,12,30,3,4,13,4,11,10,27,1,15,10,10,8,13,4,10,10,4,24,16,5,22,16,10,45,46,45,26,11,25,49,19,19,3,10,12,13,16,24,25,6,4,17,19,3,14,15,45,40,10,16,28,12,11,10,19,14,6,51,16,29,42,13,29,1,25,26,13,10,31,5,19,6,22,30,20,18,37,11,18,25,12,20,14,12,2,15,26,11,1,24,24,21,15,9,38,38,46,8,7,23,12,15,19,18,24,2,28,5,2,16,25,30,3,16,58,19,15,31,15,26,20,11,43,17,31,26,21,3,12,8,29,8,2,24,38,24,18,10,1,20,16,32,15,7,19,1,0,7,0,26,4,2,34,8,9,32,32,26,42,26,39,37,14,51,23,25,8,43,14,14,33,33,18,6,21,11,20,29,23,13,21,18,21,26,11,35,17,20,21,48,25,16,16,33,21,28,19,32,33,37,26,48,39,27,19,12,20,45,19,21,35,49,25,53,56,20,40,6,10,21,28,56,24,31,48,36,28,21,22,70,25,24,34,36,47,29,82,58,49,63,58,64,71,40,68,53,42,46,59,74,94,99,79,97,103,128,109,78,100,112,117,100,129,104,115,92,106,91,82,144,140,131,126,127,131,128,148,141,115,138,138,137,143,124,139,114,151,124,150,147,126,159,145,155,170,139,158,136,158,126,146,157,134,164,174,137,143,98,149,144,132,129,157,149,123,110,134,126,112,145,139,161,139,137,138,145,146,131,109,127,108,125,138,148,121,130,112,131,154,123,170,155,150,137,130,126,153,142,122,131,129,117,134,148,126,162,117,136,157,143,151,103,143,156,120,150,154,157,159,122,159,130,140,144,145,152,117,164,162,143,151,160,129,147,142,151,116,144,140,142,148,130,159,152,156,124,156,120,160,118,118,99,148,186,132,125,111,113,122,122,125,123,132,124,129,134,149,132,130,118,123,128,145,125,41,0,24,35,37,7,18,31,15,20,13,18,12,23,8,39,16,23,16,30,2,23,21,17,16,21,8,12,21,7,22,17,28,27,4,3,35,4,20,30,8,3,15,19,42,29,12,31,18,18,13,11,19,24,14,25,51,11,41,8,11,10,1,19,14,2,11,16,25,19,11,11,16,12,52,19,8,18,12,18,42,23,8,12,24,19,12,17,27,28,13,42,5,24,19,12,35,7,15,15,17,12,19,21,12,35,13,35,11,7,0,22,21,28,18,13,31,21,1,5,15,14,41,8,21,41,30,0,26,16,6,17,22,13,28,8,20,30,9,26,30,6,12,34,45,42,58,23,4,4,14,3,41,7,25,1,7,10,2,20,37,24,22,31,4,29,6,5,29,23,11,13,22,31,30,33,13,13,24,1,4,6,10,21,3,13,4,30,22,17,26,19,13,30,47,32,17,18,0,18,12,18,12,10,9,5,8,18,3,35,12,41,17,13,7,10,24,28,34,24,6,39,9,25,30,13,27,10,4,18,10,31,32,8,23,39,26,14,13,35,28,18,10,25,23,4,20,6,25,40,18,38,15,11,25,22,30,12,32,20,25,2,20,16,23,9,31,20,5,22,13,15,14,23,23,21,18,12,11,16,5,1,15,4,40,18,20,18,7,21,8,22,1,15,28,30,17,38,20,8,33,17,23,14,10,1,22,45,20,19,19,12,10,25,3,3,13,12,29,7,0,23,13,1,10,18,29,54,5,10,22,12,13,20,16,21,6,12,3,5,23,19,17,10,13,16,12,24,31,20,36,27,23,1,32,30,18,8,24,27,12,24,50,28,5,6,5,1,24,30,13,33,27,18,23,5,2,31,15,1,8,32,17,21,13,8,15,34,30,7,20,22,15,4,7,10,35,14,14,41,14,42,13,34,16,20,14,22,29,26,26,8,12,16,30,24,22,10,4,0,9,21,13,8,11,20,10,17,29,12,15,14,17,20,6,18,11,19,6,11,24,22,3,26,31,0,3,12,32,8,21,5,18,20,26,9,5,33,19,25,31,38,20,16,14,5,17,10,34,12,26,8,12,19,38,28,30,17,33,32,6,21,38,19,15,3,31,22,35,7,21,3,30,43,17,14,10,40,28,34,16,30,26,40,18,6,29,9,18,2,36,25,34,16,30,3,30,14,13,28,36,38,11,38,38,51,25,8,36,21,22,17,37,17,33,38,24,50,31,28,25,20,18,30,38,40,9,31,22,14,42,33,35,40,36,53,10,47,44,34,60,57,35,49,45,70,56,39,33,31,48,53,25,40,53,35,52,49,32,60,55,38,50,65,65,56,71,101,91,91,77,98,116,112,110,84,118,110,104,102,117,128,141,112,114,152,125,142,130,128,147,142,144,134,112,156,130,139,119,147,130,131,140,131,130,119,145,106,112,103,128,112,129,123,107,135,133,110,131,154,148,130,149,148,138,129,162,120,112,148,138,126,141,124,122,139,134,126,137,121,140,103,134,147,154,115,117,147,104,110,127,153,157,147,163,172,152,137,159,152,144,128,123,144,155,136,134,131,142,115,136,126,128,123,101,120,107,113,136,138,120,113,97,85,111,111,133,111,105,143,102,89,141,106,118,149,135,122,115,106,96,149,119,123,153,90,136,138,115,123,130,148,150,122,124,144,135,124,127,116,126,118,151,138,138,144,96,136,121,138,99,98,84,79,103,94,62,71,85,66,78,81,97,101,84,88,85,121,59,102,107,100,100,132,89,64,81,82,74,94,59,63,43,62,53,50,71,93,56,98,73,68,54,53,46,36,61,70,70,50,51,54,86,42,63,62,46,89,67,67,45,77,54,68,74,72,69,71,47,67,71,51,63,58,56,70,63,52,83,57,39,40,21,23,61,69,49,46,35,39,23,47,44,62,43,36,40,49,52,46,39,37,54,62,25,14,39,45,26,67,43,25,52,65,40,35,54,51,39,49,38,37,43,34,56,43,35,30,53,51,35,47,32,28,40,30,48,40,32,23,18,44,48,48,30,30,37,34,29,36,46,44,45,37,52,42,58,45,29,30,34,33,51,42,49,35,50,46,22,44,41,50,66,57,36,15,31,40,53,52,27,43,62,33,40,37,36,33,36,22,54,46,22,42,52,40,32,58,44,22,42,36,32,55,31,42,60,43,59,73,54,41,60,24,51,51,35,64,65,46,54,68,60,55,62,69,81,68,80,90,62,48,78,71,82,68,74,42,82,95,84,82,28,48,61,86,73,42,70,60,56,67,31,27,30,33,64,29,57,43,20,48,13,28,29,10,40,32,51,31,46,29,32,31,28,64,28,11,39,56,27,39,32,42,20,22,54,47,45,74,79,46,63,43,59,52,34,51,44,85,82,77,66,68,84,70,119,126,94,111,131,114,145,148,146,141,140,128,134,156,128,147,184,156,139,158,142,122,146,153,152,150,156,149,107,166,136,172,149,171,146,164,162,150,146,166,163,163,142,141,158,181,149,143,121,162,149,147,133,168,147,150,157,127,159,161,169,160,166,149,168,141,169,146,164,168,147,152,177,174,156,144,148,133,171,138,154,153,146,152,153,118,162,146,127,137,142,174,137,160,102,85,85,86,117,152,141,174,135,168,102,9,17,25,0,15,1,9,20,13,12,23,0,23,10,30,24,18,18,14,35,6,43,28,16,11,8,22,3,30,41,21,12,9,10,20,33,35,17,33,8,34,24,46,23,49,38,30,7,27,11,26,13,12,34,11,13,49,24,9,44,10,10,30,35,27,14,20,37,37,7,20,14,31,29,26,37,16,19,21,6,43,45,30,33,28,37,35,11,31,41,8,16,32,30,24,38,13,61,40,27,50,40,11,38,16,51,33,22,12,30,28,28,12,8,36,56,63,64,47,49,35,24,40,48,56,50,71,61,61,68,58,31,64,51,48,76,82,57,59,47,62,89,111,89,89,89,97,75,81,61,57,70,91,97,101,71,100,72,84,89,110,110,115,151,132,123,121,133,137,125,161,157,99,119,117,126,134,108,121,112,120,126,146,121,134,94,144,128,122,136,134,142,149,107,141,134,151,131,130,142,124,146,134,153,136,113,107,117,123,142,165,138,128,129,156,135,143,117,116,127,123,118,129,130,86,138,95,122,129,145,123,121,147,123,140,145,176,136,114,140,110,129,115,117,108,159,118,122,130,127,126,123,116,114,139,122,152,123,136,135,94,120,133,115,138,131,113,124,106,117,97,125,153,147,130,116,112,122,113,79,130,131,99,124,97,93,124,116,104,125,115,114,106,79,99,118,104,129,117,116,123,133,147,127,102,72,90,108,77,98,105,109,94,65,41,79,92,99,98,49,83,78,72,96,60,63,77,58,58,48,81,67,111,80,23,51,48,87,65,56,51,68,85,52,49,102,55,50,69,50,38,69,64,57,33,68,52,71,49,57,50,50,54,63,46,62,52,77,77,52,57,76,39,37,71,48,63,60,64,67,46,62,59,68,40,82,32,64,72,50,58,70,40,58,39,63,51,51,38,53,66,50,59,42,42,38,43,28,35,14,28,49,60,63,50,45,32,27,65,47,60,39,52,40,46,46,24,74,53,57,37,60,50,37,49,43,43,45,49,50,48,37,43,52,29,57,52,49,39,23,67,43,49,55,36,33,25,55,45,35,59,34,36,43,40,48,39,58,26,31,40,50,40,32,26,32,28,51,60,45,36,62,70,56,26,71,50,54,49,35,54,31,50,19,54,41,37,23,35,54,29,11,40,34,37,34,47,56,29,40,35,33,54,65,34,61,65,37,63,42,75,48,52,64,52,41,23,55,53,46,68,60,68,51,23,58,45,49,49,47,90,73,69,67,75,91,56,62,29,66,77,48,44,32,64,63,42,68,70,63,53,54,40,49,23,40,63,44,40,38,39,50,39,7,40,38,31,27,32,37,22,36,4,31,42,33,16,32,30,22,49,50,20,54,30,30,38,44,47,43,46,57,60,46,76,84,40,100,62,98,97,56,49,72,77,94,65,90,74,154,139,122,137,124,124,114,153,134,152,152,152,158,179,181,138,169,119,148,162,124,145,142,156,161,138,139,156,146,153,140,168,174,160,152,178,147,161,170,166,162,152,156,154,160,130,157,120,138,166,149,149,136,145,122,160,161,132,156,155,144,161,163,173,157,169,166,150,170,143,151,127,154,166,146,153,181,161,134,138,128,182,153,143,146,141,123,170,139,128,126,113,88,77,68,78,104,154,170,171,114,14,5,7,6,23,19,18,10,13,38,33,15,28,1,15,25,44,13,19,29,44,10,21,13,24,13,28,51,16,31,28,22,15,17,44,9,14,28,9,37,9,38,24,5,10,20,4,28,18,32,13,19,15,14,28,32,25,28,21,17,5,26,15,41,27,16,12,23,19,27,14,10,28,49,3,42,60,14,4,41,26,29,24,11,28,40,38,21,17,12,32,42,20,20,33,45,30,23,29,30,39,48,26,36,12,18,23,39,24,41,10,28,11,32,30,52,25,11,82,41,48,40,51,34,79,31,54,28,37,31,36,20,69,47,39,67,49,66,67,93,77,89,83,89,72,90,84,70,68,76,79,100,112,106,80,107,95,108,111,91,131,123,131,123,132,137,146,126,135,112,108,155,119,106,146,126,138,127,127,103,96,145,138,134,121,109,126,150,126,149,136,124,147,159,128,128,113,123,100,102,113,135,104,129,144,93,81,96,127,126,124,124,123,114,126,128,148,132,146,138,143,111,141,151,145,153,145,133,142,146,139,130,122,111,110,136,131,99,134,111,121,137,121,132,122,134,112,133,128,143,100,125,142,137,122,135,136,130,117,153,133,137,131,127,99,115,101,125,133,106,119,143,147,135,112,143,129,114,105,122,132,121,134,117,90,114,148,129,140,151,133,139,99,112,134,113,130,132,144,110,116,130,134,146,124,98,120,105,120,121,135,92,135,118,131,142,114,144,121,83,79,106,112,121,86,73,70,86,74,85,76,102,123,76,118,92,58,70,71,81,71,111,117,109,100,77,67,75,78,86,59,96,47,38,97,65,62,100,67,71,40,34,66,42,59,26,68,64,27,32,31,52,45,50,47,60,66,73,62,65,42,77,47,56,37,21,70,67,70,47,55,38,56,47,55,73,44,55,41,65,48,44,40,39,46,54,62,43,14,68,53,78,70,53,63,46,50,51,79,62,31,42,47,68,60,46,73,25,78,38,53,34,42,40,24,48,23,47,21,48,52,55,44,48,66,13,41,46,53,30,61,36,30,47,37,36,79,68,59,49,41,42,47,46,55,44,46,18,8,57,52,28,26,33,39,76,64,26,30,48,35,41,56,33,37,29,51,42,30,54,38,22,44,49,47,25,40,38,23,56,37,43,42,18,46,32,33,37,37,59,20,25,49,18,21,47,40,47,65,46,41,43,50,64,55,66,57,55,63,50,34,37,34,67,63,60,31,65,58,75,71,88,61,77,92,88,84,90,75,84,54,87,106,48,58,58,57,71,41,30,52,36,31,36,43,19,40,13,50,35,26,35,25,41,18,58,19,20,56,36,41,22,21,24,55,40,28,51,25,23,27,22,21,44,47,33,42,41,25,52,68,51,64,70,53,62,37,77,59,51,75,61,51,48,72,99,78,78,127,134,141,127,141,142,124,146,154,158,134,166,156,170,171,175,158,173,158,149,160,164,159,159,175,164,126,161,145,146,158,133,169,174,135,152,138,158,167,172,186,156,165,162,160,160,172,165,149,143,152,156,136,149,163,148,154,158,138,145,161,151,153,154,138,134,147,144,133,150,152,165,189,158,156,152,167,150,159,122,143,166,137,136,131,141,143,142,165,134,147,146,157,143,94,86,93,87,114,122,167,123,20,3,20,23,12,27,1,22,15,20,9,16,1,26,6,17,10,34,32,12,23,19,53,33,27,37,12,19,38,3,41,8,14,9,14,26,6,32,6,25,45,26,7,6,14,15,13,40,31,8,21,7,27,51,20,29,4,35,3,15,16,38,13,18,10,8,41,29,32,9,2,19,16,13,46,14,18,16,19,14,44,7,16,8,20,27,4,10,2,17,28,30,62,25,10,18,35,14,33,17,31,50,39,11,30,7,9,14,33,44,34,54,34,41,13,26,21,36,39,28,52,48,45,54,70,53,65,61,69,42,52,64,83,75,84,71,103,83,91,79,66,82,84,75,68,78,87,71,75,84,69,109,108,116,133,139,109,120,126,129,141,131,123,121,137,112,96,137,120,117,132,139,137,136,103,142,151,138,162,142,166,128,165,137,154,145,141,156,149,133,125,139,119,106,131,133,138,151,133,138,135,107,144,145,142,125,126,113,121,110,106,125,128,115,130,140,106,125,143,146,150,166,149,151,148,140,136,145,137,147,143,153,145,106,122,132,103,132,132,138,128,153,121,120,119,128,113,116,95,119,147,126,121,141,133,129,122,142,162,161,158,159,114,142,131,162,146,150,126,119,117,148,124,133,110,127,131,115,122,140,171,139,156,103,117,124,148,155,126,128,105,118,137,145,137,125,132,116,129,128,123,133,138,110,140,123,147,148,168,146,139,132,134,158,152,144,156,134,138,113,87,100,113,106,117,80,91,95,96,108,97,97,84,80,94,52,95,79,101,112,98,110,88,80,59,88,46,68,102,77,69,71,53,47,44,77,70,33,56,86,45,85,66,55,48,54,65,74,69,43,54,61,43,59,80,59,70,38,62,61,64,67,72,66,43,76,53,43,64,51,48,46,71,63,40,57,67,54,67,61,55,27,28,34,60,60,54,36,53,53,74,53,23,51,54,47,59,43,22,44,38,49,58,70,47,37,47,65,61,15,30,26,52,47,55,62,58,65,45,26,53,30,55,64,34,56,38,59,39,34,28,27,52,36,51,28,22,36,59,51,42,49,45,45,51,45,40,32,27,53,42,31,47,46,3,46,39,44,40,42,55,58,42,42,20,29,42,42,35,66,31,59,53,52,47,39,28,38,22,55,22,48,38,20,41,35,31,32,20,39,37,44,54,34,24,47,40,36,62,32,35,55,57,23,72,30,67,42,58,42,23,21,28,58,49,77,44,73,83,58,92,62,63,91,72,79,80,112,92,86,104,68,65,38,43,66,55,47,50,43,59,33,27,33,14,6,66,52,46,31,37,31,44,26,31,66,28,36,44,14,38,19,45,33,48,24,44,51,31,40,38,30,35,16,54,42,37,29,44,77,53,67,64,58,46,61,63,46,69,35,62,33,94,69,38,107,69,76,94,120,137,141,151,162,148,128,121,148,167,195,150,163,158,180,171,156,159,150,173,154,182,171,163,164,197,165,154,179,182,156,182,153,147,135,118,154,134,187,171,164,153,153,172,171,143,159,165,151,149,158,145,148,167,160,160,150,140,160,156,145,154,151,141,160,135,129,131,161,157,143,184,150,159,149,159,156,158,163,147,171,158,171,135,147,131,146,112,156,146,135,174,150,132,112,103,81,98,113,139,128,5,5,0,15,1,21,7,22,17,11,15,45,17,14,53,19,15,30,18,6,39,14,16,39,34,29,5,19,47,33,19,5,14,29,27,8,25,22,34,27,29,25,12,14,8,15,31,20,29,7,18,15,2,33,36,19,21,24,9,20,8,13,18,11,8,11,30,14,20,36,44,42,11,11,12,36,28,3,29,4,36,39,36,22,42,25,30,15,17,8,28,36,27,40,31,41,18,44,28,28,26,35,41,20,23,64,40,36,10,36,40,43,26,19,33,40,29,40,45,61,48,33,41,37,76,57,75,83,122,102,70,102,113,91,100,120,56,91,78,96,59,54,104,67,131,67,92,108,96,112,91,123,128,115,149,129,124,127,130,129,106,121,117,119,98,130,134,105,117,117,132,140,143,131,130,146,151,149,139,147,156,138,97,109,133,132,141,129,133,119,131,127,130,97,121,121,116,133,134,123,136,150,159,156,116,178,166,159,130,150,118,148,157,161,130,144,151,152,154,169,153,161,130,137,124,154,132,146,137,163,166,142,155,145,165,144,108,122,126,132,120,109,134,143,162,96,134,122,125,102,151,147,120,162,131,137,142,160,165,122,153,147,119,136,117,148,163,139,126,146,140,156,141,148,118,134,126,121,126,159,120,102,123,147,101,108,150,140,139,138,107,109,142,125,118,121,112,107,122,122,136,118,108,122,124,142,128,125,111,137,132,93,116,119,108,122,108,90,114,89,112,86,89,106,92,86,97,93,92,87,79,80,62,78,60,65,69,82,72,72,79,75,67,49,53,39,54,29,46,66,49,57,41,63,34,22,66,34,54,51,50,64,66,47,64,61,62,54,65,90,52,76,67,50,39,57,57,37,53,57,50,69,67,42,67,53,40,45,63,64,55,40,45,34,25,55,50,35,46,50,27,47,50,43,54,37,42,50,69,60,46,49,48,44,39,28,47,42,53,32,29,31,23,56,47,15,71,28,42,44,46,46,30,49,36,45,39,62,29,48,34,55,58,20,41,37,51,52,46,61,40,20,47,49,39,38,37,32,53,45,39,27,51,41,37,47,49,31,42,31,39,20,65,45,47,46,51,49,52,46,21,39,44,38,54,38,34,44,26,35,46,27,49,36,32,25,27,37,35,25,57,56,35,42,33,48,44,21,49,19,24,33,59,47,51,62,70,57,45,23,37,45,67,36,27,46,20,36,44,57,43,43,43,61,47,79,49,64,49,65,56,55,78,72,88,98,78,74,57,59,79,53,60,53,52,64,81,84,29,43,44,28,48,32,18,46,50,40,22,43,48,44,9,38,43,33,24,50,21,60,35,54,34,56,30,31,40,56,37,29,35,46,49,58,48,50,51,43,29,46,55,72,65,49,56,70,58,46,73,39,56,38,78,56,72,112,97,65,66,101,128,112,132,157,153,113,135,141,175,149,137,137,132,161,177,198,188,184,175,191,158,171,166,166,212,156,139,150,141,168,161,150,162,147,157,159,169,166,158,121,156,174,151,171,162,166,150,152,133,163,146,142,142,154,125,152,145,159,157,154,137,161,148,150,162,143,142,148,141,140,134,138,142,143,152,132,148,153,138,146,156,149,153,148,141,131,142,138,124,177,158,165,145,129,119,97,80,77,111,100,24,0,14,9,26,6,23,16,0,21,28,17,9,14,54,41,22,3,18,21,32,30,18,30,11,42,20,43,16,15,8,24,14,25,41,9,5,27,25,45,28,32,16,0,35,12,12,22,10,19,41,10,37,0,9,18,21,29,37,23,16,13,32,44,17,6,24,25,34,19,37,7,38,33,11,6,5,30,17,19,7,37,21,13,54,12,4,25,26,0,18,30,31,24,38,25,0,27,38,20,21,51,30,40,40,34,34,7,41,17,20,12,28,46,45,29,52,55,40,42,55,70,24,42,49,82,72,61,70,51,112,50,62,64,71,67,82,66,62,53,33,59,70,111,95,81,99,102,111,146,157,143,130,133,145,144,148,132,138,123,126,154,129,127,110,110,123,147,158,132,125,128,147,132,124,163,129,104,113,138,120,88,134,117,108,123,140,117,130,142,120,121,141,136,136,88,154,131,126,127,140,120,111,120,110,151,137,117,153,166,134,143,140,111,106,131,130,150,147,141,142,160,169,151,142,155,136,127,137,130,123,136,126,150,98,148,148,137,127,136,125,135,154,117,148,124,136,138,163,129,125,139,137,137,147,139,140,141,115,143,117,148,151,146,136,147,143,134,156,156,167,155,161,131,169,134,157,169,145,131,136,125,152,125,156,140,138,138,147,148,160,131,121,87,92,85,115,94,117,126,112,93,75,92,115,101,100,96,81,68,114,142,75,94,105,91,105,77,78,70,89,70,88,70,73,79,85,94,95,104,75,80,42,40,78,82,93,71,78,78,63,73,66,60,64,59,32,64,62,58,53,46,69,53,66,52,44,70,36,41,47,80,64,71,64,62,58,57,66,68,67,56,38,61,63,55,51,63,69,42,63,53,58,67,72,42,56,32,46,61,50,40,55,92,41,55,48,54,43,42,50,53,73,45,63,61,26,45,48,70,46,66,48,34,53,59,66,44,50,55,69,26,40,45,36,43,51,61,59,23,36,52,67,28,53,42,51,46,59,40,31,37,60,49,24,54,49,41,63,37,46,39,58,27,58,30,55,44,57,48,40,21,36,67,33,40,45,43,68,50,32,59,42,41,40,45,53,28,16,25,61,54,19,37,41,28,32,40,39,58,31,32,60,68,59,42,29,45,42,40,59,75,42,65,29,36,35,43,51,57,31,36,61,46,44,51,79,52,29,37,79,68,56,65,39,72,56,77,67,73,40,54,64,45,69,67,60,55,43,24,54,80,96,78,95,72,45,63,41,60,57,40,41,76,55,65,69,57,61,39,76,57,29,46,56,19,43,28,42,37,28,43,30,56,23,33,13,56,26,41,59,39,37,46,54,16,47,41,25,36,36,49,28,53,60,29,51,43,44,52,60,44,49,50,32,56,67,72,68,42,61,42,67,44,71,52,60,48,72,113,121,104,109,141,131,150,135,137,170,129,143,141,120,154,167,164,162,165,127,153,138,151,166,141,151,146,158,156,188,155,169,170,178,166,142,169,164,163,129,180,152,165,150,136,154,141,139,153,170,175,176,156,166,174,162,157,178,138,158,172,135,167,170,174,169,149,171,158,163,170,139,165,143,140,162,160,136,122,157,154,162,135,150,142,163,151,153,120,113,116,135,137,159,146,140,126,91,59,88,80,8,7,41,9,31,13,5,4,15,31,10,8,18,2,23,10,45,12,19,26,4,7,34,52,12,14,59,30,22,8,22,13,19,37,25,32,11,18,12,28,19,23,33,24,26,18,24,16,28,14,13,2,19,22,52,24,28,19,8,40,25,11,40,17,24,21,16,44,19,27,6,22,35,43,16,3,18,11,20,32,47,20,10,21,4,40,7,28,4,52,44,22,19,19,23,15,33,52,35,62,44,25,18,18,39,31,9,14,39,30,27,39,48,34,24,45,28,56,41,61,56,43,54,63,72,68,61,54,74,73,29,39,57,46,52,31,74,73,45,28,24,64,28,49,48,89,79,95,125,128,146,135,136,136,140,152,181,165,142,151,153,165,150,159,149,126,148,152,132,120,151,131,128,143,120,134,125,114,140,153,119,117,126,133,138,143,143,119,130,132,151,152,130,149,128,144,156,147,133,124,110,118,121,140,140,122,122,134,123,140,137,154,133,100,125,119,121,98,95,139,136,131,150,148,143,147,152,173,148,136,144,138,137,140,145,151,152,155,143,122,150,124,156,139,151,128,121,141,139,142,123,148,115,107,126,114,123,150,139,150,135,169,146,138,129,120,117,149,135,122,151,145,139,111,132,132,114,153,142,139,146,139,140,151,127,112,157,164,143,136,112,127,121,119,142,118,115,100,121,137,105,106,119,113,122,97,120,107,94,75,92,97,89,94,107,110,86,104,97,83,110,107,108,118,63,85,89,72,101,75,117,112,80,63,50,56,65,63,70,67,82,66,70,90,89,72,37,83,42,82,70,86,70,79,70,48,64,48,65,85,53,54,38,43,80,54,68,58,34,40,53,48,55,53,55,49,69,42,61,49,49,52,53,48,61,39,57,71,69,52,74,56,23,51,68,41,61,72,69,50,30,40,33,65,38,42,51,56,40,39,53,58,46,36,51,23,63,50,39,52,60,48,47,44,34,36,54,31,22,37,55,60,56,44,63,46,56,30,50,36,34,11,59,40,68,32,45,58,23,26,41,36,51,30,27,46,44,47,43,37,34,53,63,11,42,34,43,61,34,34,43,36,55,43,39,63,38,57,47,72,39,36,53,43,28,45,47,43,22,33,15,45,48,41,41,32,32,57,36,34,62,42,48,42,24,61,58,31,42,45,57,32,25,54,36,49,63,63,66,41,29,39,38,40,36,63,47,67,57,38,57,27,61,52,62,33,12,50,53,69,59,41,57,79,86,69,66,29,52,37,83,74,79,48,47,45,35,43,73,48,38,43,47,47,53,39,20,14,26,29,46,41,20,49,40,20,21,12,55,62,53,47,38,21,26,55,27,44,53,52,36,53,44,33,61,52,51,48,48,44,37,74,39,41,86,36,47,63,43,57,92,47,94,64,90,98,77,66,94,119,139,111,126,139,128,120,140,144,162,182,159,139,120,137,169,149,141,129,146,137,136,139,146,176,161,129,129,148,164,157,127,171,140,152,152,146,123,148,148,137,152,131,161,161,134,155,138,131,174,172,144,155,159,152,163,141,154,162,159,157,155,147,122,143,167,120,135,160,158,162,150,143,128,172,156,130,139,155,167,141,153,159,162,137,118,164,146,170,137,157,161,181,134,126,162,136,131,76,70,74,33,18,20,8,9,29,2,17,12,5,15,26,6,32,25,8,18,18,16,25,31,43,26,5,24,15,39,8,8,43,21,40,28,16,30,26,39,46,26,2,24,7,9,16,8,47,23,22,22,32,17,53,20,9,5,6,39,20,30,30,13,46,28,23,12,20,21,15,3,34,17,32,26,27,21,19,31,12,34,22,33,27,16,4,6,36,43,14,40,21,4,30,38,35,12,49,56,42,48,40,22,35,10,46,49,23,32,21,61,24,41,41,45,35,42,52,54,33,38,47,56,47,42,53,63,40,27,32,29,64,26,31,43,31,52,53,71,68,52,74,45,65,45,54,79,83,104,110,116,77,123,119,107,122,149,137,157,149,144,145,120,137,119,120,138,122,124,148,125,134,142,141,124,120,123,102,114,141,123,125,115,124,163,160,122,131,99,134,140,120,152,140,144,132,131,152,141,148,133,130,157,152,129,121,109,131,137,133,130,126,123,124,120,128,103,130,124,109,120,109,121,115,148,150,159,131,143,142,163,160,163,127,140,154,142,148,120,151,133,149,156,131,144,127,139,122,152,138,136,136,119,135,124,140,150,137,144,127,135,152,127,127,149,125,122,134,112,114,107,124,145,145,139,118,112,113,119,88,117,136,105,120,99,80,102,91,84,80,103,94,55,76,107,118,140,134,151,94,120,125,124,150,166,147,134,117,100,101,100,90,104,119,103,98,83,97,108,120,106,110,121,133,130,109,121,108,88,44,46,60,89,93,94,89,76,52,62,69,69,66,88,63,80,64,104,95,84,71,64,76,67,28,52,75,58,63,84,110,80,65,44,63,46,72,53,55,65,81,54,47,60,50,45,54,64,67,62,74,36,54,60,49,43,37,52,47,81,58,43,51,73,56,41,76,48,51,55,67,44,65,58,56,46,59,71,43,72,61,77,67,79,64,39,34,40,69,57,63,59,48,27,47,65,28,41,23,83,44,54,44,71,63,51,47,58,14,60,38,41,43,33,36,45,39,48,22,47,16,28,40,40,43,41,31,37,60,50,11,23,35,55,52,52,57,51,39,51,16,49,56,32,40,52,49,43,48,72,36,56,63,29,25,48,64,52,44,43,42,63,42,31,63,50,59,41,55,49,18,33,35,29,55,57,66,77,40,44,23,62,52,51,52,66,59,60,44,55,64,42,63,31,53,30,44,29,46,62,24,69,68,60,40,45,17,29,57,61,33,42,30,57,59,41,61,37,43,68,43,53,74,53,61,40,74,63,54,57,63,68,47,26,29,56,29,41,59,57,35,29,59,46,41,42,57,36,23,20,50,40,29,27,13,37,19,35,47,54,52,42,37,30,29,51,51,15,63,63,18,37,43,33,60,67,22,54,80,36,52,49,70,87,75,81,46,92,78,108,111,130,137,129,143,135,125,157,141,154,134,167,168,191,157,153,158,161,172,184,157,155,133,139,149,141,142,146,162,147,125,162,172,175,163,161,156,142,154,141,145,165,144,147,135,191,160,182,191,161,152,168,160,163,199,177,163,142,195,153,153,144,167,146,150,166,184,150,170,157,170,155,147,177,163,145,164,151,138,158,145,159,140,167,166,127,145,162,133,146,134,154,146,122,154,139,158,143,145,143,130,117,69,23,16,12,21,26,12,15,14,8,25,12,29,36,26,32,26,35,18,27,25,8,28,3,14,38,50,9,9,29,37,31,34,19,8,22,17,46,40,5,37,17,55,31,21,40,27,20,16,30,35,4,16,37,18,14,27,16,22,40,35,20,36,8,20,20,13,31,20,16,17,44,40,47,12,16,22,11,33,21,34,25,34,19,35,33,11,23,26,38,29,24,23,23,59,29,27,24,62,22,55,42,38,58,55,31,41,54,68,66,37,56,63,29,46,38,64,80,55,82,77,80,70,109,118,119,130,122,124,139,122,99,123,104,118,103,98,85,62,86,91,85,105,114,114,100,109,103,87,56,67,101,106,117,99,92,123,114,101,87,109,77,108,132,128,154,120,157,163,123,141,146,133,164,147,156,116,136,135,140,164,151,145,131,154,145,150,135,128,119,140,136,112,113,151,137,151,155,158,176,135,126,136,135,136,150,157,122,127,144,153,143,155,140,151,175,177,167,151,162,165,166,153,138,141,147,161,164,157,145,155,141,116,131,103,128,131,113,128,133,117,128,134,138,133,159,122,121,123,140,148,143,154,149,136,140,156,143,162,152,133,128,104,113,159,144,165,128,143,142,141,139,121,136,131,156,138,153,150,155,175,163,136,125,133,149,174,130,118,152,145,123,131,132,129,119,130,125,138,127,137,127,92,116,129,106,140,126,112,118,151,124,123,130,142,103,140,131,120,142,125,134,116,110,96,90,116,106,151,152,113,105,92,57,96,99,69,93,102,97,108,98,120,116,93,84,67,77,62,80,71,72,72,85,62,35,81,62,58,69,74,99,99,58,71,62,59,65,56,102,76,72,67,73,42,76,78,45,43,56,51,42,60,70,63,38,51,53,60,56,56,41,35,66,49,49,77,47,31,61,61,31,59,43,75,57,38,58,72,50,50,51,83,44,56,64,47,56,49,64,68,45,72,47,58,56,19,34,59,79,72,25,18,34,40,42,38,52,51,52,56,37,56,55,27,53,51,29,56,15,54,20,30,49,45,37,51,41,51,34,49,50,45,51,61,45,48,22,64,15,49,27,27,56,62,42,59,42,55,52,58,40,36,23,52,36,50,71,43,52,69,55,44,68,41,46,44,53,73,55,29,54,35,26,69,37,45,39,38,50,58,34,61,39,48,30,44,40,30,50,54,52,53,45,39,22,50,41,54,49,61,60,62,33,49,25,33,52,46,44,48,51,71,61,51,53,40,33,35,49,49,38,45,59,67,54,58,72,53,63,39,62,52,30,64,63,50,48,44,54,45,47,41,61,49,45,51,41,43,68,21,26,16,39,37,34,53,39,61,48,34,56,61,70,44,66,55,56,50,49,50,61,31,43,66,52,57,72,65,48,74,49,40,74,80,89,97,100,122,113,105,81,134,126,142,144,151,163,174,187,169,150,179,166,160,133,154,168,152,173,172,183,146,153,161,163,141,157,160,175,165,178,156,160,150,147,130,132,152,168,138,140,146,155,142,158,143,172,172,135,148,158,133,171,146,169,138,136,161,152,132,146,153,140,140,133,154,157,147,138,151,132,135,149,160,147,165,143,142,138,171,128,118,119,175,133,127,125,105,132,136,130,123,163,145,162,151,132,125,133,101,21,9,19,3,16,17,26,1,24,6,34,17,32,14,41,21,30,14,51,13,28,4,12,18,24,25,34,22,16,19,22,28,41,27,39,42,30,24,4,24,14,15,23,10,6,47,21,19,38,37,38,6,19,35,31,35,11,6,37,23,27,38,13,36,10,14,21,38,38,9,9,11,28,22,30,49,15,52,53,28,22,9,66,27,4,27,21,25,16,32,22,9,40,34,47,38,27,31,46,37,36,39,34,68,41,35,60,49,55,58,25,57,47,61,46,62,99,98,115,73,83,61,84,116,116,113,127,129,113,139,133,128,90,128,86,72,93,66,101,93,98,99,111,113,105,91,100,98,98,72,94,92,100,114,100,103,97,90,99,109,141,115,155,144,146,144,142,117,103,118,151,157,145,125,146,187,117,143,135,145,137,118,137,146,154,161,141,146,142,120,127,130,138,126,134,138,121,134,161,125,128,131,143,123,120,110,146,142,136,153,170,134,142,166,165,157,133,130,160,169,154,136,148,136,148,160,165,148,142,160,160,143,130,143,110,121,110,136,157,109,116,115,142,137,144,137,115,121,140,135,121,123,141,131,142,152,152,163,149,130,131,140,137,147,143,175,169,155,141,144,145,106,113,131,141,153,150,147,153,147,137,148,122,122,147,169,147,163,131,151,158,126,126,138,113,120,124,130,121,129,137,112,133,127,121,113,132,102,126,120,121,138,125,120,127,117,115,122,123,133,130,113,128,99,106,117,115,111,147,131,113,73,62,80,91,100,102,92,112,110,67,95,105,81,104,55,61,65,67,75,50,89,72,79,64,58,87,56,59,76,52,92,102,104,95,61,79,72,83,74,94,80,79,74,92,84,35,69,47,54,42,66,53,39,57,57,38,51,60,27,41,51,51,52,65,38,42,35,29,51,40,48,51,21,64,54,43,33,38,48,52,31,59,67,30,47,49,69,58,57,35,49,62,29,77,44,64,72,42,32,44,51,56,53,46,67,37,48,48,55,56,57,60,36,32,55,48,22,39,39,57,40,28,11,32,38,29,57,64,30,30,30,51,60,62,57,43,67,45,63,40,23,26,49,40,55,31,27,44,50,51,54,47,33,54,47,58,42,37,25,30,45,52,53,39,52,53,60,53,31,40,72,34,52,68,53,24,49,34,47,45,52,55,40,35,32,51,54,57,21,48,53,69,64,26,48,53,64,17,41,28,42,32,50,54,47,55,47,31,33,38,19,58,64,44,71,55,68,56,41,66,50,39,65,56,61,49,32,55,44,68,52,43,40,53,54,24,37,72,51,23,28,74,50,34,64,39,39,42,27,56,47,48,40,38,45,73,14,53,23,61,76,58,37,42,43,45,45,40,42,67,35,47,87,53,44,52,78,36,71,67,82,55,67,104,86,85,122,118,87,75,103,124,142,131,145,153,168,160,171,168,179,193,167,166,163,163,169,170,150,163,172,182,157,161,168,163,164,174,183,160,172,143,155,142,140,149,165,162,160,174,156,180,147,169,143,164,145,159,138,131,147,158,166,147,155,147,131,164,152,136,142,125,119,126,160,146,160,167,125,139,133,145,146,155,166,143,130,142,155,165,123,127,130,130,139,149,130,122,121,126,139,147,114,169,114,125,137,138,109,15,4,21,15,0,15,13,30,7,35,19,0,47,27,16,12,17,42,33,19,4,37,20,26,16,26,39,30,22,30,19,27,25,23,28,32,23,21,30,21,27,54,8,29,12,27,7,41,17,37,31,37,18,14,38,23,32,42,1,28,48,20,20,39,54,24,23,29,41,23,25,12,45,41,51,28,27,27,18,37,24,14,18,23,38,37,21,11,36,40,18,28,33,24,17,17,46,42,25,26,37,29,21,54,53,48,67,42,57,67,55,60,62,83,62,83,56,86,111,70,108,108,110,135,77,122,118,87,120,100,121,136,108,106,109,103,118,103,132,116,115,130,114,143,112,121,123,138,149,142,124,128,136,119,137,123,152,124,148,169,156,135,139,153,158,150,149,139,180,137,144,152,154,130,146,131,137,143,133,141,124,140,157,129,132,138,163,136,159,109,132,163,137,143,171,136,116,115,134,155,140,152,115,122,138,154,157,164,144,151,143,123,144,125,131,148,114,129,113,141,135,102,149,154,159,162,152,86,135,134,149,135,162,139,136,156,141,124,163,141,150,108,157,152,162,162,134,144,144,131,151,149,170,151,144,149,142,162,149,145,155,140,139,152,145,117,126,131,131,141,139,151,147,105,145,122,125,114,119,113,125,143,155,150,146,142,111,105,137,152,97,13,13,5,23,10,25,16,2,0,7,24,49,31,16,54,45,0,30,12,13,10,14,7,30,24,19,24,38,22,12,2,16,39,20,22,24,7,27,7,32,28,20,29,13,16,22,28,5,2,43,17,8,28,17,13,7,5,12,15,30,15,32,30,5,23,24,40,5,40,0,7,5,5,21,34,7,18,25,20,28,41,53,18,14,30,13,12,14,2,11,9,31,15,1,26,21,11,24,20,23,21,20,5,10,8,12,21,18,20,33,50,23,7,15,28,26,18,32,16,33,8,21,25,13,5,25,33,27,5,3,10,34,8,37,13,26,14,19,23,12,4,14,20,10,21,4,19,1,29,8,26,30,26,28,36,29,26,9,9,15,21,17,18,25,24,44,2,27,17,26,9,36,23,33,4,7,11,6,8,21,28,27,11,27,15,17,31,41,9,38,20,24,31,25,14,16,12,9,16,16,16,23,21,30,22,8,28,29,10,1,17,11,20,16,6,21,18,11,35,3,11,5,9,20,19,16,25,7,34,24,9,32,22,17,13,16,27,12,19,24,34,18,0,13,30,5,8,10,26,10,26,19,17,12,12,38,20,2,13,12,35,16,29,3,42,35,15,40,15,11,5,19,17,8,45,39,20,10,40,12,17,27,6,13,42,9,17,7,18,22,14,14,20,18,22,1,5,15,24,18,33,40,33,12,11,11,11,36,8,45,19,29,40,2,20,9,6,28,9,15,14,25,29,13,25,30,32,19,16,8,24,21,45,30,27,45,15,28,29,5,29,18,11,20,9,32,14,18,47,2,10,17,15,19,29,5,13,14,17,35,21,18,5,16,12,11,5,6,29,30,21,28,40,10,31,3,13,38,14,2,21,22,15,29,4,9,6,3,16,23,13,33,13,28,22,17,22,19,20,29,43,8,3,20,17,15,21,21,21,13,33,6,2,12,2,21,16,19,31,21,14,19,21,31,26,25,37,3,15,17,14,20,27,29,25,2,16,8,14,22,12,36,50,33,38,11,26,28,20,23,23,15,34,21,26,39,29,50,34,24,18,34,25,30,15,16,39,14,17,3,24,21,41,19,55,41,9,13,28,22,19,28,24,5,29,51,19,14,35,16,27,28,0,39,35,36,34,39,35,35,31,41,35,40,46,47,13,25,26,13,50,32,23,48,37,34,34,34,25,11,38,28,27,41,27,36,47,25,43,16,9,30,49,41,54,74,47,58,64,52,66,41,57,52,39,72,106,88,87,97,118,119,129,82,102,112,89,103,129,97,132,118,108,126,103,111,91,100,128,123,135,115,136,112,130,99,139,157,120,144,125,131,117,153,138,101,155,129,147,162,160,136,139,149,160,134,132,169,142,160,112,164,133,120,121,139,137,146,160,136,110,157,147,160,143,119,140,157,157,132,151,155,146,134,120,132,114,134,144,166,166,174,129,139,144,165,125,119,159,134,120,116,132,140,144,116,123,116,127,128,105,144,143,153,155,144,149,127,139,129,150,123,169,138,151,137,128,126,169,136,152,136,158,146,164,149,148,147,140,139,165,128,131,132,154,125,115,166,137,143,141,120,145,147,145,136,118,108,130,171,132,126,135,93,107,115,157,147,132,112,121,124,123,128,118,118,108,110,147,103,17,0,3,9,15,36,5,37,5,26,35,17,37,15,1,27,21,33,1,16,7,1,22,21,10,7,12,6,0,22,9,12,13,18,6,19,27,17,19,24,16,22,9,32,18,7,29,27,18,18,1,17,47,9,25,13,22,21,13,10,13,15,23,21,23,10,24,18,34,8,14,24,33,6,10,0,33,1,13,37,2,12,37,9,35,34,5,21,37,10,28,13,12,43,15,23,5,31,10,18,13,18,25,26,45,4,20,10,20,11,16,7,14,10,25,16,17,3,9,9,10,9,12,0,8,6,11,20,20,17,17,3,23,12,22,11,7,17,10,7,16,22,14,34,25,16,14,18,6,23,3,16,31,16,11,21,37,12,12,2,19,28,30,19,12,25,18,35,14,29,26,23,8,39,16,20,2,31,14,42,16,8,29,22,18,5,5,15,26,16,1,18,20,27,23,5,16,17,21,17,24,28,33,10,25,16,9,19,23,27,19,28,33,18,30,21,12,16,12,19,7,29,20,12,10,4,32,19,21,43,18,32,28,22,31,29,13,12,16,36,19,13,30,32,28,22,24,14,22,38,26,2,31,16,13,16,11,24,15,12,34,8,26,10,33,13,5,8,8,6,27,19,12,7,27,17,48,24,10,1,5,17,25,11,6,38,13,15,20,14,21,14,23,26,22,0,23,36,20,32,16,8,12,19,8,11,24,1,13,12,26,29,23,2,21,8,31,20,25,3,19,35,6,13,5,16,26,19,3,14,20,16,3,20,10,42,15,18,13,27,19,3,48,4,8,19,14,22,26,11,27,18,12,9,13,28,37,1,14,23,16,23,21,17,30,13,26,20,15,23,21,9,29,37,16,17,24,21,25,4,8,6,20,34,12,17,10,19,0,23,26,0,14,24,4,15,31,4,3,8,35,30,22,1,13,29,21,15,8,42,6,14,13,41,2,17,43,17,7,7,30,4,17,38,20,8,8,15,35,19,11,13,24,31,16,23,36,26,8,29,27, \ No newline at end of file diff --git a/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/estimation_16x16.txt b/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/estimation_16x16.txt new file mode 100644 index 000000000000..7216dbcb1758 --- /dev/null +++ b/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/estimation_16x16.txt @@ -0,0 +1,2 @@ +30,45 +12,8;12,8;12,8;12,8;12,8;12,9;12,9;12,9;12,9;12,9;12,9;11,10;11,10;11,10;11,10;11,11;11,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,11;9,4;10,12;11,12;11,12;11,12;11,5;10,11;10,-1;12,8;12,8;12,8;12,8;12,8;12,9;12,9;12,9;12,9;12,9;12,10;11,10;11,10;11,10;11,10;11,11;11,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,11;11,13;11,11;11,12;11,12;11,12;10,9;10,11;10,-1;11,8;12,8;12,8;12,8;12,8;12,9;12,9;12,9;12,9;11,9;11,10;11,10;11,10;11,10;11,11;11,11;11,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;12,12;11,12;11,12;11,12;11,12;11,11;10,11;10,11;10,-1;12,8;12,8;12,8;12,8;12,9;11,9;11,9;11,9;11,9;11,9;11,10;11,10;11,10;11,11;11,11;11,11;11,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,14;10,13;10,8;10,11;10,0;12,8;12,8;12,8;12,8;11,9;11,9;11,9;11,9;11,9;11,10;11,10;11,10;12,11;11,11;11,11;11,11;12,11;12,12;12,12;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,13;9,13;10,24;10,12;10,11;10,0;11,8;11,8;12,8;12,9;11,9;10,9;11,9;11,9;11,10;11,10;11,10;11,10;11,11;11,11;9,10;11,11;12,12;13,12;12,12;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,13;10,9;10,7;9,11;9,0;11,8;11,9;10,9;11,9;11,9;10,9;10,9;10,9;11,10;11,10;11,10;11,11;11,11;11,11;11,11;11,12;11,12;11,12;11,12;11,12;11,12;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,10;9,11;9,11;9,0;11,9;11,9;10,9;10,9;10,9;10,9;9,9;10,10;10,10;11,10;11,11;11,11;11,11;11,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,11;9,9;9,10;9,11;9,0;11,9;11,9;11,9;10,9;10,9;9,9;7,9;9,10;10,10;11,10;11,11;11,11;12,12;12,12;11,12;11,12;12,12;19,14;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,12;10,12;8,-2;8,12;8,12;8,0;11,9;11,9;11,9;11,9;10,9;10,9;10,10;10,10;10,10;11,11;11,11;11,13;12,12;13,12;10,12;3,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,12;9,11;9,25;8,12;7,11;7,0;11,9;11,9;11,9;11,9;11,9;10,9;10,10;10,10;11,10;11,11;11,11;11,11;12,11;11,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,12;9,12;8,15;7,13;6,11;5,0;11,9;11,9;11,9;11,9;11,9;11,10;11,10;11,10;11,10;11,11;11,11;12,11;12,10;12,11;12,11;12,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;9,13;9,14;8,14;6,11;4,11;3,0;12,9;12,9;12,9;12,9;12,10;11,10;11,10;11,10;11,10;11,10;12,11;12,11;12,11;12,11;12,11;12,11;12,12;12,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,12;9,12;9,14;9,15;8,16;5,15;1,12;0,0;12,9;12,9;12,9;12,9;12,10;12,10;12,10;11,10;11,10;12,10;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,13;9,13;9,14;8,16;7,17;3,16;-2,12;-5,7;12,10;12,10;13,9;14,10;13,10;12,10;12,10;12,10;12,10;12,10;12,11;13,11;13,11;13,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,13;10,13;9,14;9,15;7,16;6,18;0,18;-8,12;-19,3;12,10;12,10;13,10;13,10;13,10;12,10;12,10;12,10;12,10;12,10;13,11;13,11;14,11;13,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,13;10,13;9,14;9,15;7,16;4,17;-1,17;-13,12;-30,-2;12,11;12,10;11,10;16,10;14,10;12,10;12,10;12,10;12,10;12,10;13,10;14,10;18,10;13,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,14;10,14;10,16;9,17;5,18;0,19;-12,14;-39,-11;12,11;12,11;12,10;12,10;12,10;12,10;12,11;12,11;12,11;12,11;12,10;12,10;12,10;12,10;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,13;11,13;10,14;9,15;8,16;7,18;3,19;-4,17;-13,6;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,13;11,13;10,13;9,14;8,15;7,17;5,17;1,15;-3,2;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,13;10,13;10,13;11,14;9,14;9,16;8,16;5,13;0,-2;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,12;10,13;9,13;9,14;9,14;8,13;3,-5;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,12;10,13;10,13;9,13;9,13;9,12;4,-7;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;10,12;10,12;10,13;10,13;10,13;10,13;10,13;10,12;6,3;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,13;10,13;10,13;10,12;10,12;10,13;10,13;11,14;10,14;8,12;2,-1;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,13;10,13;10,13;11,13;10,13;10,13;10,13;10,13;10,13;9,13;8,13;6,12;0,-6;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,13;10,13;10,13;10,13;10,13;10,13;10,13;10,14;11,14;10,14;10,14;8,14;5,13;-1,-10;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,13;10,13;11,13;11,13;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,13;11,13;11,14;10,14;9,13;9,13;9,13;9,14;9,14;9,14;9,14;9,14;9,14;7,13;5,12;2,0;10,13;10,12;9,12;9,12;10,12;11,12;11,13;10,13;10,12;10,12;10,12;11,12;11,12;11,13;10,13;10,13;10,13;10,13;10,13;11,13;11,12;11,13;11,13;11,13;11,12;11,12;11,12;11,13;11,13;10,14;10,14;9,15;9,15;10,15;8,13;8,13;8,15;7,14;6,13;7,13;8,14;8,14;5,13;4,12;2,-1;10,13;7,12;7,12;7,12;8,12;8,11;8,12;8,13;8,13;9,12;9,11;9,12;9,12;9,12;9,12;9,12;7,13;7,13;8,13;8,12;8,12;8,13;9,13;10,13;11,13;11,13;10,13;10,13;10,14;10,15;10,16;9,17;7,18;6,16;5,12;8,15;8,15;1,13;1,10;4,10;6,12;7,15;4,14;3,12;2,1;-1,13;5,12;6,12;4,12;6,11;5,9;2,5;3,6;0,1;6,9;6,10;3,10;6,11;4,11;3,10;3,10;1,8;-7,3;-6,-1;3,8;4,10;2,12;-11,13;1,13;-4,13;-6,13;-6,13;0,13;-3,14;0,15;0,15;1,16;-11,13;1,14;3,13;-2,5;-8,2;-17,-8;-3,0;0,6;1,10;2,11;-1,10;2,11;1,0; \ No newline at end of file diff --git a/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/exhaust_16x16.txt b/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/exhaust_16x16.txt new file mode 100644 index 000000000000..719c3f04b3c3 --- /dev/null +++ b/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/exhaust_16x16.txt @@ -0,0 +1,2 @@ +30,45 +6,4;12,14;6,16;24,9;30,2;7,11;11,12;13,10;12,12;30,11;9,13;10,11;4,11;1,-7;7,-13;9,-32;1,-12;22,9;29,5;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;28,-9;12,12;12,12;12,12;12,12;10,5;12,12;0,-1;9,29;14,3;4,31;11,24;-10,7;5,23;-15,-32;13,6;13,6;0,6;27,3;10,9;12,11;14,3;2,-19;-4,14;16,-13;12,12;17,10;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;6,18;10,17;9,12;12,12;12,12;12,12;3,9;26,12;-16,-1;8,31;11,-10;17,-17;16,-22;13,14;10,18;12,12;12,11;20,-23;3,9;7,4;12,13;10,13;12,12;15,24;11,-6;12,12;12,12;11,11;12,11;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;14,9;12,12;12,12;12,12;12,12;24,17;4,11;17,12;-32,-1;12,8;13,11;13,-4;13,25;14,-26;12,12;20,-8;12,12;12,12;13,12;13,12;12,12;12,12;14,11;11,13;3,8;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;14,16;5,6;5,12;-32,-1;12,11;13,14;12,13;12,12;8,-2;12,20;11,13;12,14;12,12;12,12;10,7;11,-10;13,14;11,13;12,12;-16,-10;12,12;12,13;9,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;16,25;12,12;18,12;-32,-1;12,13;12,12;16,14;16,30;11,7;10,-4;11,15;12,12;9,22;13,15;11,-12;12,12;12,12;12,6;4,-22;12,12;12,12;15,24;12,9;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;15,9;19,6;27,12;-32,-1;11,5;13,26;6,-20;13,8;13,-2;8,-14;12,22;10,1;11,-14;13,24;15,10;14,17;12,-8;12,12;11,-9;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;5,12;-32,-1;12,12;9,-8;12,12;30,1;25,-3;12,12;12,17;8,14;8,24;8,-2;7,20;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;8,16;8,10;12,12;-32,-1;9,11;5,6;14,1;-4,27;12,12;19,7;-18,-32;22,-20;11,20;11,16;10,20;12,12;12,12;12,12;12,12;12,12;12,12;31,-22;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;7,-4;12,12;13,12;-32,-1;12,12;12,12;11,4;12,12;12,20;12,7;12,12;10,20;13,8;19,-1;12,11;24,19;13,8;14,6;11,9;2,16;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;11,11;14,26;17,12;12,12;-32,-1;20,18;10,10;15,-17;14,-2;12,12;11,26;13,27;10,12;13,14;23,9;8,20;17,0;12,12;12,11;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;10,13;12,1;13,12;-32,-1;16,11;12,12;9,13;22,0;-32,-5;9,8;15,6;-8,-2;12,12;10,-21;12,3;12,12;8,3;12,12;12,12;12,23;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;11,11;7,12;31,-1;8,16;11,7;11,18;11,9;12,3;13,-4;5,24;8,17;12,12;12,14;12,12;-9,14;7,-2;12,16;15,6;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,14;11,12;21,-1;13,20;27,-9;11,6;14,8;7,22;19,7;4,1;-24,21;12,19;11,18;9,-24;14,-20;12,7;13,-28;10,15;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;13,12;24,-32;14,5;9,18;25,-26;24,-27;12,11;10,16;7,26;12,12;17,17;13,4;6,18;12,20;30,3;16,17;12,16;12,12;12,-21;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;6,-32;12,12;12,12;-16,11;12,12;10,11;25,-9;27,5;22,28;8,13;13,3;12,30;-5,-8;18,-31;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-9,-32;13,12;12,12;12,3;18,29;15,-9;1,19;-9,14;-3,-3;-21,8;13,6;-12,12;-2,-13;20,-32;-19,-31;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-24,-30;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;27,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;11,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-5,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-21,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,15;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-29,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;17,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;1,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-15,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-31,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,9;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;3,-1;12,12;12,12;17,20;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-13,-1;12,12;12,12;-8,25;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;21,-1;-2,5;-3,25;-18,27;1,-31;0,17;-9,26;-23,27;-23,20;-27,18;1,30;3,15;0,0;3,22;3,5;0,0;0,0;0,0;-17,-18;-15,-16;-1,15;-4,9;2,11;-13,-2;2,13;-5,16;-7,-2;-7,7;0,18;-5,-11;0,-10;0,0;2,-5;-5,-32;4,-27;2,21;-8,7;-8,1;-17,-10;-4,-1;-13,0;-8,5;1,10;0,-1;4,12;5,-1; \ No newline at end of file diff --git a/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/ground_truth_16x16.txt b/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/ground_truth_16x16.txt new file mode 100644 index 000000000000..850b7eda8ff1 --- /dev/null +++ b/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/ground_truth_16x16.txt @@ -0,0 +1,2 @@ +30,45 +12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12; \ No newline at end of file diff --git a/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/localVar_16x16.txt b/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/localVar_16x16.txt new file mode 100644 index 000000000000..5e4ea8eed984 --- /dev/null +++ b/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/localVar_16x16.txt @@ -0,0 +1,2 @@ +30,45 +0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,1;0,0,0,0;0,0,0,0;0,0,0,1;0,0,0,0;1,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,2,2,4;25,115,115,529;196,644,644,2116;225,420,420,784;576,696,696,841;4900,3920,3920,3136;9604,6664,6664,4624;9,120,120,1600;1024,2208,2208,4761;3249,5016,5016,7744;484,1870,1870,7225;9,288,288,9216;64,384,384,2304;324,882,882,2401;10404,8058,8058,6241;1936,2332,2332,2809;196,322,322,529;1,10,10,100;4,16,16,64;4,100,100,2500;1156,2482,2482,5329;3600,5160,5160,7396;81,594,594,4356;0,0,0,1521;0,0,0,10000;1,134,134,17956;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;1,0,0,0;1,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,4;25,110,110,484;196,560,560,1600;529,713,713,961;1296,2196,2196,3721;4096,6784,6784,11236;6889,9960,9960,14400;441,1659,1659,6241;1764,2520,2520,3600;4225,4875,4875,5625;529,2208,2208,9216;36,558,558,8649;36,276,276,2116;529,1564,1564,4624;9801,11088,11088,12544;3600,4620,4620,5929;225,360,360,576;4,2,2,1;1,9,9,81;1,44,44,1936;2025,3060,3060,4624;4624,4760,4760,4900;16,168,168,1764;0,0,0,1681;0,0,0,10609;1,133,133,17689;9,0,0,0;1,0,0,0;4,0,0,0;9,0,0,0;9,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;1,1,1,1;0,0,0,0;0,0,0,0;0,0,0,0;1,-1,-1,1;0,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;0,0,0,0;0,0,0,9;81,198,198,484;225,540,540,1296;2209,2162,2162,2116;6400,5760,5760,5184;6241,7347,7347,8649;7396,7998,7998,8649;8464,7084,7084,5929;2916,4104,4104,5776;3249,6498,6498,12996;784,3724,3724,17689;400,1820,1820,8281;784,1288,1288,2116;1849,1935,1935,2025;8649,4650,4650,2500;4624,3060,3060,2025;1089,891,891,729;625,175,175,49;81,0,0,0;441,693,693,1089;3969,4347,4347,4761;2809,2862,2862,2916;0,0,0,0;0,0,0,2116;0,0,0,10816;1,134,134,17956;1,1,1,1;16,0,0,0;25,0,0,0;9,0,0,0;25,0,0,0;1,0,0,0;1,0,0,0;4,0,0,0;9,0,0,0;9,0,0,0;9,3,3,1;9,3,3,1;1,0,0,0;4,2,2,1;1,0,0,0;0,0,0,0;4,2,2,1;4,4,4,4;1,0,0,0;1,4,4,16;196,378,378,729;256,784,784,2401;4624,4692,4692,4761;16384,8960,8960,4900;6889,4067,4067,2401;8649,3534,3534,1444;17689,8246,8246,3844;6889,8798,8798,11236;4761,8832,8832,16384;1024,4032,4032,15876;3136,5656,5656,10201;6400,5920,5920,5476;5776,5700,5700,5625;6724,7626,7626,8649;2916,5130,5130,9025;7744,6688,6688,5776;5184,3528,3528,2401;4225,2665,2665,1681;4356,3564,3564,2916;7056,3528,3528,1764;2500,600,600,144;1,0,0,0;0,0,0,225;0,0,0,10404;1,-135,-135,18225;16,0,0,0;25,5,5,1;25,0,0,0;36,0,0,0;16,0,0,0;9,0,0,0;9,0,0,0;9,0,0,0;9,3,3,1;16,0,0,0;25,0,0,0;25,0,0,0;25,10,10,4;4,4,4,4;4,6,6,9;1,0,0,0;4,2,2,1;1,0,0,0;0,0,0,0;1,9,9,81;576,720,720,900;100,450,450,2025;1849,1763,1763,1681;8464,5796,5796,3969;3969,5733,5733,8281;5041,6177,6177,7569;7921,7476,7476,7056;8100,7740,7740,7396;6561,6075,6075,5625;1296,2088,2088,3364;4489,3618,3618,2916;7056,4368,4368,2704;5041,4331,4331,3721;4900,5530,5530,6241;1849,3311,3311,5929;7225,4930,4930,3364;6084,3744,3744,2304;7396,5934,5934,4761;6241,7110,7110,8100;8100,8730,8730,9409;5184,5688,5688,6241;1,35,35,1225;1,54,54,2916;0,0,0,11881;1,-137,-137,18769;64,0,0,0;49,0,0,0;16,-4,-4,1;25,0,0,0;25,0,0,0;36,0,0,0;25,0,0,0;36,0,0,0;16,0,0,0;9,0,0,0;16,0,0,0;9,0,0,0;25,0,0,0;25,0,0,0;16,4,4,1;100,10,10,1;49,14,14,4;25,10,10,4;36,0,0,0;36,96,96,256;1764,1680,1680,1600;1600,1880,1880,2209;1024,1184,1184,1369;1681,1517,1517,1369;2704,3484,3484,4489;2025,3330,3330,5476;2809,2915,2915,3025;4489,5293,5293,6241;5329,6132,6132,7056;1764,2100,2100,2500;3025,2090,2090,1444;3249,1596,1596,784;1764,2310,2310,3025;4489,6164,6164,8464;1521,2808,2808,5184;729,810,810,900;900,330,330,121;1764,882,882,441;2025,1530,1530,1156;5041,5467,5467,5929;3136,4144,4144,5476;0,0,0,676;0,0,0,289;0,0,0,12321;1,-139,-139,19321;9,0,0,0;9,0,0,0;36,0,0,0;9,0,0,0;4,0,0,0;9,0,0,0;9,0,0,0;9,0,0,0;9,0,0,0;9,0,0,0;4,0,0,0;9,0,0,0;9,0,0,0;121,0,0,0;36,0,0,0;1089,231,231,49;1225,210,210,36;1369,185,185,25;1225,245,245,49;1764,1218,1218,841;3364,3654,3654,3969;3969,5292,5292,7056;4761,5313,5313,5929;2704,2964,2964,3249;2209,3290,3290,4900;1681,2952,2952,5184;3721,4453,4453,5329;12321,12432,12432,12544;9025,9405,9405,9801;2116,2392,2392,2704;2704,2704,2704,2704;2304,2256,2256,2209;1089,2244,2244,4624;4761,7176,7176,10816;900,1890,1890,3969;100,150,150,225;324,342,342,361;576,600,600,625;441,945,945,2025;3600,4860,4860,6561;441,1092,1092,2704;1,2,2,4;0,0,0,3025;0,0,0,12100;0,0,0,19881;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;4,0,0,0;9,0,0,0;9,18,18,36;256,320,320,400;1600,720,720,324;2209,611,611,169;3249,798,798,196;3364,522,522,81;3600,600,600,100;4761,1725,1725,625;2916,2376,2376,1936;2809,2703,2703,2601;6084,3900,3900,2500;3844,2914,2914,2209;3249,2850,2850,2500;3600,3480,3480,3364;9025,7505,7505,6241;19321,15846,15846,12996;6889,9545,9545,13225;1225,2485,2485,5041;1600,1640,1640,1681;961,1147,1147,1369;729,1431,1431,2809;4489,5025,5025,5625;400,920,920,2116;81,108,108,144;576,336,336,196;576,552,552,529;289,1139,1139,4489;2304,3984,3984,6889;4,58,58,841;0,0,0,0;0,0,0,1849;0,0,0,12544;1,-144,-144,20736;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;9,0,0,0;0,0,0,0;4,0,0,0;4,0,0,0;4,0,0,0;16,0,0,0;9,0,0,0;121,11,11,1;484,22,22,1;576,24,24,1;1024,32,32,1;529,138,138,36;1681,451,451,121;2304,2928,2928,3721;3969,5355,5355,7225;4096,3776,3776,3481;6889,4980,4980,3600;3249,4161,4161,5329;1296,3276,3276,8281;4900,7280,7280,10816;17424,15972,15972,14641;8100,11070,11070,15129;3600,5100,5100,7225;576,1272,1272,2809;841,1102,1102,1444;400,620,620,961;144,228,228,361;1936,924,924,441;225,315,315,441;25,125,125,625;1444,1292,1292,1156;256,480,480,900;169,728,728,3136;1156,2754,2754,6561;9,117,117,1521;4,68,68,1156;9,174,174,3364;1,114,114,12996;1,-149,-149,22201;4,0,0,0;1,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;4,0,0,0;4,0,0,0;1,0,0,0;1,0,0,0;0,0,0,25;4,0,0,0;196,0,0,0;400,0,0,0;400,0,0,0;1225,315,315,81;2116,414,414,81;1936,616,616,196;2601,1530,1530,900;5929,4774,4774,3844;4624,4964,4964,5329;7225,4760,4760,3136;3969,3276,3276,2704;1024,2560,2560,6400;9216,9600,9600,10000;12321,10989,10989,9801;5476,7104,7104,9216;3364,4292,4292,5476;289,578,578,1156;676,286,286,121;256,96,96,36;49,105,105,225;784,672,672,576;144,180,180,225;64,136,136,289;1444,988,988,676;144,252,252,441;144,696,696,3364;1296,3132,3132,7569;1,46,46,2116;4,94,94,2209;1,61,61,3721;0,0,0,14400;1,-150,-150,22500;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;9,48,48,256;900,600,600,400;1521,507,507,169;1936,440,440,100;3364,580,580,100;4225,585,585,81;3969,693,693,121;4225,1755,1755,729;4356,3234,3234,2401;3136,2912,2912,2704;5776,3192,3192,1764;5041,3550,3550,2500;4624,5236,5236,5929;13456,10672,10672,8464;7744,8096,8096,8464;4225,4290,4290,4356;2601,2499,2499,2401;324,720,720,1600;676,728,728,784;169,286,286,484;256,288,288,324;1369,851,851,529;289,374,374,484;225,300,300,400;961,651,651,441;196,280,280,400;256,768,768,2304;1681,3772,3772,8464;1,53,53,2809;1,0,0,0;0,0,0,0;0,0,0,14161;0,0,0,22801;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,9;256,0,0,0;441,0,0,0;625,50,50,4;784,56,56,4;1089,132,132,16;841,232,232,64;2025,945,945,441;2116,2162,2162,2209;1296,2268,2268,3969;3025,2585,2585,2209;6084,4992,4992,4096;5476,5550,5550,5625;6084,3900,3900,2500;5041,2911,2911,1681;1225,1365,1365,1521;2025,1980,1980,1936;784,980,980,1225;961,682,682,484;529,552,552,576;1156,1258,1258,1369;2209,2021,2021,1849;576,744,744,961;625,650,650,676;961,930,930,900;576,432,432,324;289,884,884,2704;3025,4895,4895,7921;2209,1974,1974,1764;16,0,0,0;16,204,204,2601;1,118,118,13924;1,141,141,19881;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;0,0,0,9;0,0,0,0;1,0,0,0;1,0,0,0;9,3,3,1;9,6,6,4;16,8,8,4;16,68,68,289;1681,1681,1681,1681;2401,2842,2842,3364;576,1560,1560,4225;1296,2556,2556,5041;8836,8272,8272,7744;2116,2668,2668,3364;1600,1120,1120,784;2025,1530,1530,1156;1444,1330,1330,1225;1849,1677,1677,1521;400,740,740,1369;1444,1596,1596,1764;676,1222,1222,2209;1849,2107,2107,2401;1521,1677,1677,1849;676,1066,1066,1681;1521,1716,1716,1936;441,882,882,1764;900,1230,1230,1681;256,784,784,2401;3136,3472,3472,3844;7396,6450,6450,5625;3136,2352,2352,1764;256,1056,1056,4356;1,107,107,11449;4,278,278,19321;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;4,0,0,0;4,0,0,0;4,0,0,0;4,0,0,0;4,0,0,0;9,6,6,4;100,390,390,1521;3136,3304,3304,3481;3844,3100,3100,2500;484,1254,1254,3249;4624,5916,5916,7569;10201,8585,8585,7225;1024,1536,1536,2304;1369,1591,1591,1849;2025,1710,1710,1444;1600,1400,1400,1225;1369,1369,1369,1369;400,680,680,1156;1369,1147,1147,961;1521,1131,1131,841;2025,1080,1080,576;1521,897,897,529;1521,1014,1014,676;2209,1598,1598,1156;441,609,609,841;1024,832,832,676;361,627,627,1089;1521,1326,1326,1156;3136,2632,2632,2209;8100,5940,5940,4356;3136,5488,5488,9604;9,393,393,17161;7056,5376,5376,4096;0,0,0,0;0,0,0,0;0,0,0,1;9,0,0,0;0,0,0,0;0,0,0,0;0,0,0,1;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;1,0,0,0;0,0,0,0;9,0,0,0;144,0,0,0;256,0,0,0;100,0,0,0;256,112,112,49;576,408,408,289;3025,1925,1925,1225;2704,2808,2808,2916;1764,2814,2814,4489;7744,6864,6864,6084;5329,5256,5256,5184;1369,1850,1850,2500;961,1302,1302,1764;1521,1560,1560,1600;1936,1628,1628,1369;1600,1400,1400,1225;1600,1280,1280,1024;1681,1517,1517,1369;3249,2451,2451,1849;2401,1960,1960,1600;2209,1504,1504,1024;2209,1598,1598,1156;2209,2021,2021,1849;1089,1221,1221,1369;1369,1258,1258,1156;1156,1190,1190,1225;1521,1287,1287,1089;1156,1088,1088,1024;3969,3528,3528,3136;4096,6720,6720,11025;25,670,670,17956;8836,6298,6298,4489;0,0,0,1;9,3,3,1;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,81;400,280,280,196;900,210,210,49;784,140,140,25;961,589,589,361;1369,1221,1221,1089;2401,2107,2107,1849;2704,3068,3068,3481;4624,4556,4556,4489;4489,3819,3819,3249;2025,1845,1845,1681;1521,1287,1287,1089;961,1116,1116,1296;1225,1365,1365,1521;3136,2072,2072,1369;2704,1976,1976,1444;2401,1960,1960,1600;2704,1820,1820,1225;2601,1530,1530,900;1225,1015,1015,841;1764,1218,1218,841;2209,1645,1645,1225;1444,1406,1406,1369;1444,1406,1406,1369;1681,1722,1722,1764;2025,1980,1980,1936;2809,1802,1802,1156;1849,1849,1849,1849;2601,3672,3672,5184;2916,6912,6912,16384;16,660,660,27225;7921,5785,5785,4225;1,6,6,36;400,120,120,36;144,36,36,9;225,0,0,0;64,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;64,0,0,0;0,0,0,0;0,0,0,0;361,0,0,0;1,0,0,0;256,16,16,1;576,24,24,1;961,217,217,49;2304,1248,1248,676;3136,2856,2856,2601;2916,3402,3402,3969;4225,3835,3835,3481;3025,2970,2970,2916;1681,1804,1804,1936;1156,1088,1088,1024;1600,1320,1320,1089;625,825,825,1089;1024,768,768,576;2401,686,686,196;3025,935,935,289;2209,1081,1081,529;3136,1344,1344,576;2401,1519,1519,961;729,999,999,1369;1600,1600,1600,1600;2025,1845,1845,1681;1225,1400,1400,1600;1849,946,946,484;2209,658,658,196;2025,1170,1170,676;2601,765,765,225;2601,1224,1224,576;3136,3360,3360,3600;3025,5500,5500,10000;225,1905,1905,16129;7569,6438,6438,5476;676,910,910,1225;4225,2210,2210,1156;1444,684,684,324;1521,39,39,1;1444,38,38,1;1444,152,152,16;1296,468,468,169;1296,576,576,256;1024,480,480,225;961,496,496,256;1156,544,544,256;1369,666,666,324;1600,680,680,289;625,275,275,121;841,348,348,144;1600,920,920,529;2209,2021,2021,1849;3481,3304,3304,3136;5041,3550,3550,2500;3844,3162,3162,2601;4225,3120,3120,2304;1369,1295,1295,1225;625,800,800,1024;841,1218,1218,1764;1521,1092,1092,784;225,255,255,289;784,588,588,441;961,558,558,324;1849,817,817,361;1521,741,741,361;1764,756,756,324;2116,828,828,324;900,660,660,484;1444,950,950,625;1444,874,874,529;1296,792,792,484;1369,703,703,361;1936,968,968,484;1089,660,660,400;1156,714,714,441;2209,1504,1504,1024;1444,1064,1064,784;1444,1558,1558,1681;529,1426,1426,3844;144,756,756,3969;1521,3315,3315,7225;3600,4320,4320,5184;2401,1666,1666,1156;2116,460,460,100;2025,405,405,81;2401,490,490,100;2025,540,540,144;1296,612,612,289;1936,1364,1364,961;1936,1672,1672,1444;1444,1064,1064,784;1089,528,528,256;1681,1066,1066,676;1936,1496,1496,1156;1849,1419,1419,1089;2116,2070,2070,2025;4096,4096,4096,4096;2809,3445,3445,4225;5776,5624,5624,5476;2916,3348,3348,3844;1521,1053,1053,729;1156,1020,1020,900;289,578,578,1156;676,702,702,729;1089,1089,1089,1089;400,500,500,625;400,220,220,121;169,156,156,144;400,240,240,144;361,133,133,49;289,187,187,121;529,322,322,196;324,180,180,100;576,216,216,81;441,252,252,144;400,240,240,144;256,144,144,81;529,276,276,144;400,320,320,256;484,594,594,729;1156,1088,1088,1024;625,525,525,441;729,945,945,1225;225,750,750,2500;144,732,732,3721;1225,2450,2450,4900;576,1296,1296,2916;2209,1175,1175,625;676,364,364,196;1296,576,576,256;1296,756,756,441;900,630,630,441;1600,840,840,441;5041,1562,1562,484;5041,1917,1917,729;5041,2059,2059,841;3721,1952,1952,1024;676,962,962,1369;1444,1292,1292,1156;784,840,840,900;1024,1440,1440,2025;4624,3876,3876,3249;3136,2912,2912,2704;5476,2738,2738,1369;1849,731,731,289;961,620,620,400;676,572,572,484;441,399,399,361;1369,999,999,729;1024,960,960,900;729,810,810,900;324,324,324,324;144,72,72,36;36,30,30,25;25,15,15,9;36,24,24,16;9,12,12,16;36,24,24,16;81,54,54,36;49,42,42,36;25,20,20,16;25,15,15,9;49,42,42,36;81,81,81,81;3969,1512,1512,576;2025,1530,1530,1156;2209,1269,1269,729;2401,2107,2107,1849;400,1220,1220,3721;144,720,720,3600;2025,3330,3330,5476;3249,4788,4788,7056;3844,4402,4402,5041;2500,2350,2350,2209;3721,3477,3477,3249;2601,2703,2703,2809;1369,1554,1554,1764;3136,3472,3472,3844;8100,5490,5490,3721;9604,4410,4410,2025;14884,5002,5002,1681;10609,5768,5768,3136;2401,2450,2450,2500;2401,1176,1176,576;2025,1125,1125,625;2601,1683,1683,1089;5776,3192,3192,1764;3249,2964,2964,2704;3481,3186,3186,2916;1849,1935,1935,2025;1296,1224,1224,1156;676,702,702,729;729,837,837,961;1296,1260,1260,1225;900,900,900,900;729,702,702,676;441,252,252,144;289,170,170,100;64,128,128,256;81,72,72,64;25,20,20,16;16,12,12,9;9,9,9,9;49,49,49,49;81,72,72,64;25,20,20,16;9,3,3,1;9,3,3,1;25,295,295,3481;8281,9737,9737,11449;5329,6935,6935,9025;4225,4355,4355,4489;3969,4032,4032,4096;729,2430,2430,8100;144,744,744,3844;2916,3618,3618,4489;5476,4366,4366,3481;2809,2385,2385,2025;4624,3400,3400,2500;4489,4489,4489,4489;2401,2352,2352,2304;2209,1410,1410,900;2500,2100,2100,1764;6889,4399,4399,2809;8464,5060,5060,3025;11881,5886,5886,2916;7569,5220,5220,3600;5184,5040,5040,4900;5184,5112,5112,5041;5625,5625,5625,5625;6400,6800,6800,7225;8281,8190,8190,8100;2916,3564,3564,4356;2209,1974,1974,1764;1936,1584,1584,1296;625,675,675,729;841,870,870,900;484,660,660,900;625,550,550,484;729,702,702,676;676,728,728,784;169,169,169,169;100,0,0,0;25,20,20,16;144,96,96,64;9,9,9,9;4,0,0,0;1,1,1,1;9,15,15,25;81,81,81,81;81,63,63,49;4,4,4,4;1,0,0,0;16,104,104,676;4356,4356,4356,4356;5476,5328,5328,5184;2401,3332,3332,4624;3844,5332,5332,7396;900,3060,3060,10404;121,638,638,3364;3721,5063,5063,6889;4225,3575,3575,3025;4096,2176,2176,1156;5041,2982,2982,1764;4096,2688,2688,1764;4096,2816,2816,1936;3249,3477,3477,3721;3136,4032,4032,5184;5041,4899,4899,4761;5184,4680,4680,4225;5184,4464,4464,3844;3969,4032,4032,4096;4900,4410,4410,3969;5625,4500,4500,3600;6400,5120,5120,4096;5776,4560,4560,3600;5476,3774,3774,2601;1681,1722,1722,1764;1156,1088,1088,1024;1156,986,986,841;289,374,374,484;576,480,480,400;400,420,420,441;289,289,289,289;289,204,204,144;225,150,150,100;49,42,42,36;1,1,1,1;1,1,1,1;25,20,20,16;81,45,45,25;1,3,3,9;1,1,1,1;0,0,0,1;9,21,21,49;225,195,195,169;64,64,64,64;1,3,3,9;784,784,784,784;4624,3468,3468,2601;4761,2691,2691,1521;3969,2205,2205,1225;6084,4056,4056,2704;1296,2412,2412,4489;529,1426,1426,3844;4225,5980,5980,8464;4225,4875,4875,5625;5041,4544,4544,4096;4356,4422,4422,4489;3364,4292,4292,5476;5776,5700,5700,5625;4761,4278,4278,3844;3844,3534,3534,3249;4225,3770,3770,3364;4225,3315,3315,2601;4356,2838,2838,1849;3136,1904,1904,1156;3136,1792,1792,1024;4489,2345,2345,1225;4900,2170,2170,961;3364,1682,1682,841;2209,1739,1739,1369;784,1064,1064,1444;529,667,667,841;441,462,462,484;361,323,323,289;256,256,256,256;196,182,182,169;49,28,28,16;9,3,3,1;9,15,15,25;100,90,90,81;49,56,56,64;9,6,6,4;4,6,6,9;144,84,84,49;1,4,4,16;1,2,2,4;0,0,0,1;4,6,6,9;121,99,99,81;121,132,132,144;361,646,646,1156;2304,3504,3504,5329;5184,5112,5112,5041;4489,2613,2613,1521;5776,1672,1672,484;5041,4402,4402,3844;576,2496,2496,10816;529,1403,1403,3721;2916,4320,4320,6400;3136,4480,4480,6400;5184,5544,5544,5929;5929,5467,5467,5041;3481,3717,3717,3969;4489,4355,4355,4225;5929,5313,5313,4761;3969,4725,4725,5625;3136,3584,3584,4096;2401,2156,2156,1936;2601,1836,1836,1296;1936,1452,1452,1089;1681,861,861,441;1936,308,308,49;1936,176,176,16;1600,560,560,196;1849,1290,1290,900;1156,918,918,729;1369,777,777,441;324,306,306,289;225,150,150,100;64,40,40,25;9,3,3,1;1,0,0,0;1,0,0,0;1,0,0,0;16,16,16,16;81,99,99,121;100,110,110,121;49,42,42,36;196,84,84,36;169,52,52,16;36,12,12,4;1,3,3,9;49,35,35,25;121,44,44,16;361,361,361,361;3364,1914,1914,1089;3481,2301,2301,1521;4356,4686,4686,5041;2704,4524,4524,7569;3364,4698,4698,6561;2304,4992,4992,10816;324,2376,2376,17424;529,1449,1449,3969;2401,2450,2450,2500;2209,2115,2115,2025;3249,1881,1881,1089;3136,1848,1848,1089;2304,2016,2016,1764;1936,2332,2332,2809;3969,3843,3843,3721;2916,2700,2700,2500;1156,816,816,576;676,234,234,81;841,174,174,36;961,248,248,64;784,168,168,36;484,132,132,36;529,184,184,64;841,464,464,256;961,1054,1054,1156;1764,2772,2772,4356;3025,3795,3795,4761;1024,896,896,784;1024,64,64,4;625,0,0,0;400,20,20,1;121,11,11,1;1,0,0,0;0,0,0,0;1,0,0,0;16,8,8,4;64,80,80,100;225,330,330,484;225,405,405,729;529,598,598,676;324,324,324,324;169,156,156,144;121,143,143,169;289,391,391,529;2500,2750,2750,3025;2304,2448,2448,2601;1444,608,608,256;1681,615,615,225;1296,612,612,289;961,589,589,361;1444,2166,2166,3249;324,1674,1674,8649;484,1386,1386,3969;1521,78,78,4;441,21,21,1;324,18,18,1;121,55,55,25;400,300,300,225;841,464,464,256;1369,481,481,169;1369,777,777,441;361,285,285,225;225,75,75,25;225,45,45,9;256,48,48,9;256,48,48,9;225,30,30,4;196,42,42,9;324,162,162,81;225,210,210,196;841,435,435,225;1156,1020,1020,900;2704,3172,3172,3721;3136,4256,4256,5776;2704,3692,3692,5041;2209,2538,2538,2916;1156,1020,1020,900;484,242,242,121;361,19,19,1;256,0,0,0;196,-14,-14,1;196,0,0,0;484,22,22,1;900,90,90,9;1600,280,280,49;841,725,725,625;576,744,744,961;64,184,184,529;441,1029,1029,2401;2500,2500,2500,2500;81,99,99,121;25,20,20,16;121,132,132,144;196,168,168,144;81,54,54,36;121,440,440,1600;64,608,608,5776;49,784,784,12544;324,306,306,289;1,8,8,64;0,0,0,0;0,0,0,4;9,12,12,16;169,52,52,16;196,84,84,36;400,480,480,576;196,364,364,676;81,72,72,64;49,28,28,16;100,60,60,36;81,45,45,25;64,40,40,25;81,45,45,25;81,54,54,36;100,90,90,81;169,182,182,196;196,168,168,144;841,145,145,25;784,84,84,9;1089,297,297,81;1156,544,544,256;1225,910,910,676;1369,1295,1295,1225;1156,1122,1122,1089;676,572,572,484;529,368,368,256;625,375,375,225;841,638,638,484;2401,1617,1617,1089;5929,3619,3619,2209;4489,2881,2881,1849;1600,680,680,289;16,56,56,196;441,966,966,2116;1600,1560,1560,1521;36,48,48,64;1,2,2,4;16,12,12,9;64,64,64,64;81,99,99,121;64,328,328,1681;1,71,71,5041;49,777,777,12321;289,17,17,1;0,0,0,0;0,0,0,1;0,0,0,1;4,4,4,4;9,3,3,1;25,45,45,81;121,297,297,729;144,288,288,576;36,36,36,36;25,10,10,4;64,16,16,4;49,21,21,9;64,16,16,4;49,14,14,4;81,36,36,16;64,64,64,64;121,132,132,144;196,210,210,225;64,80,80,100;9,12,12,16;64,16,16,4;144,12,12,1;441,21,21,1;961,0,0,0;729,0,0,0;441,0,0,0;361,0,0,0;400,0,0,0;900,150,150,25;3249,969,969,289;8281,3458,3458,1444;10000,6700,6700,4489;3025,3795,3795,4761;16,120,120,900;400,340,340,289;2500,2100,2100,1764;64,248,248,961;4,12,12,36;4,4,4,4;36,12,12,4;121,99,99,81;144,480,480,1600;4,138,138,4761;64,592,592,5476;576,0,0,0;0,0,0,0;0,0,0,1;49,0,0,0;1,2,2,4;9,6,6,4;169,182,182,196;64,120,120,225;81,108,108,144;16,4,4,1;25,10,10,4;49,14,14,4;49,14,14,4;64,8,8,1;25,10,10,4;16,8,8,4;16,12,12,9;36,18,18,9;25,30,30,36;64,40,40,25;9,9,9,9;4,4,4,4;729,0,0,0;9,0,0,0;961,0,0,0;961,0,0,0;676,0,0,0;289,0,0,0;529,0,0,0;289,0,0,0;784,56,56,4;1936,88,88,4;784,140,140,25;3721,244,244,16;16,12,12,9;100,100,100,100;1849,1247,1247,841;1024,992,992,961;1,20,20,400;4,6,6,9;16,4,4,1;25,15,15,9;81,36,36,16;1,15,15,225;9,213,213,5041; \ No newline at end of file diff --git a/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/raw_1.png b/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/raw_1.png new file mode 100644 index 0000000000000000000000000000000000000000..ebf23e3c66a3f09479dcf1d0ba6dfa80f90f877f GIT binary patch literal 661279 zcmV)jK%u{hP)x z?em{ogbnzi-|8T7^WOhI=j{256)RS(ps#-CJ^%;Yok+~g*_oNl0XU1hvl{>+ad!X! z2*%8&*t|&C-GTVin3+L7yf_H}B9Hf&84(eYnTd#*0T5z$FF%KySz>W_Cosp?f*eFd z!m65B%BG17PDGn6iimk8B6fCnX2f?08Gu3o2vO_+GaHzQT>ID`B9b^ko6QlivzehBD43bz z=gdUN7H=m239&y;L~*W!;x}N16Wl=rcZkDmI8+C7`BnKXo_WR30SIBZ_#guSX8>S_ zJB&aye5HX17=B__zNh>;KI;&<{FPbR3VB?etvwjNtsID#_s8Gkj9nMfK?~HH0JavRbSTAwx-Of(k z7ywkmv0UzUynp*V#Ebub{`UXh*Y@|su`mEo4*;tUqfyyn*;G95@0=VT9X}?NQM!|Ki8$y1xJJ z(Q36iTV06AM_#&l`}XaRUZFd8?wmb*a(a6D?fXw&fBp4pI)3=@;ZpbZ_xHyKyG_$v z^z+GNvez&Gbms1u%m7sN#N8KcUDv(xCw}~|{k6aL$8J0)f_e=Y-g~h8?svb-FC2XQ z<1gL<0Em|3G6Tm0#Q>E%e07$Yx!z}YaEIli`FGxWc;m*6j{WVo-@f(Jw{IR^KRkgw zT>kHV^>2UT6QB6(pa0`Wdi3W1_SfEc z_{A?j`rWtRe*4bt7n-JN7t7Cn_OnM%j^BFgtwn9uu3fu3ec|Hb;y2&>7r*pNzx3Ds zo&Pa_y)^)!y1aJ2btiA%!}ovX-Y@_1FaNFA{^P&?*Z=ze>firc_n$u9f4zVG_1E8h zW%W}(^;4fr`v6jpehD7~Kq3Ipc(R5kI-W8!@UYrk{@jpiEL$8OFKEhFua29C_>X_- zKRx=+lXu>E=kLG&pZ)cJ|8M-y|K|UGr`oyG4!fuL;DdXs)oOot@8aU(WPY-@w>R6_ zn@*=Ydy~G`HcQv(fBCgvpU>x?`njLGzH|Ng76AHq2CXbFX63F{hYW}B4tFTZn&1xO zXOEQTceRjjH9HvCf!pKH{oIM6q?m--m55vk}b9)QLz{5rNE%navDb z-aHzOa)YKpkM`-)r>E5K?Cf0Yu5_I>eedoow}=8}md&cFf|Un@gVzsUKY0D%^0B zL~g2Tj^Vd)Zh{X`5HnGB6On?^oQO!=04L_0dm@HFU76Y0h?$h#-6a!|FsUjy0WdQX zFgX!CF%g-%nHjUYi!c#6G7;clrzC7<24@C2xqIMKsmaV4V5R|s6A>6>29Kw;=%b>zWf`b(a0V>J-YYsyZ1i$nLqQ3!|5y+uh02g zCtu(B*iR5w0Co%jasy!4hX&N(#UCOd!P7@R_iEXHgJ?O*?m-}seZJv%$|dNm%8{fx#_*Do!8^>_Yn z_v!Z)k->~aAi~Mq#$%=tm~i;Y)i3mxw2DVQ*ezXkWyiHNXj!6D^;N?8N_faKMBr51O|-8*x4ODSu<Si<filF8`=M1mN9sSC+ zH-o#wSwvivKyI$8>`HDPVFF3B5-R|TaL8UIL5Uj_9EQ6)6W3MHEyhGd6>-iv-jE0b zK~IIT<{?h|KvkO)QLx58U^ss8`oZf5uOGaA@cO~)2e1FyUx~r4zNt%@d4LchF*Rou z5z7H~n23;-+?mDQ0CZg!sE;`jv6(V+Zx%St;1mfx8DQ>W=4Sn-WCggHGYbG`2SpZ7 zmX(>=VP+<5riqD}JJpm@)SL{Yrp!@_(v-w|csARrAhMG=pb%k4b{HTz=Yl<{;4LCz zhj218^OCw)?nPpUJ*X09B7?!)9V~E9Aa{pZm6DnT42-ni0g(BSmTLweK@>@dN{TFn zf}sIGQQK1`qNodoCZ*M$nT6Ta)rp7)4UGe2@9n3}Z5sB317)z#ZpvY5)_k#zb1(%AD_@43;ex@kvfa;~lWAln@T37)q z`J6i#C54L9L=0D8>xVv+b=d(%o1t(S4lVY(0~SgQcwQ$O?^27Sr)OR__gRi`N<@&`ZK25hq&po|%b-Z)3S{>}v03^YI zY)Hd*sv@QG_+VHEREOtucsA8R@zf;gO^tA|xX6~ltNHQ?5>h=)mP7!8W{6<5>Wry( zOSz*uHKWOVv05+Jd76(-&H$vuFTM1_wLiRi=lbpEKL2N4y!PpdjHp9Lmrp@Z5grQ* zpbBtNaS=i7axvV{uA8O6x3q4mE<&aQX=Sb~eMaiY z4Ota{Gt3Mm9)Sfgg}X=9!q`dJjR*uWW|#^9@4J*zcJKRMDsh4k+pwUxl9@Y!h%2sC zwKsQXA~mxtWGqHdI1|j_284cbkfDIdfrw@)=oBF(GAPCVGPA0?LySFp_RP!xnQb(Q zN<`LM5-~yuOEoKzXjUVON7@Gh#{~entGfa0PUw}GO~}m6vWU250MOVuxM@!yqU5Yu zjmXJK$js0?5ezw{1V_%fN=bXYq>G0;J%dEV)m8H*zQ+@gST4`0TL)+7L3#;yXCX5q zrj(NQT9$QH5m8gmQBE?r8;PlLqCuvKLNTcVFn1ynW>s|~rlLfQu;j=@6yU4ukFx`2 zF2e8}gs5eA&LW{JHDb=H1Z+gIA$^Tw95lAH0R&_)GYB(-V__YZfPezAWmRzC02|f| zlt=FJIRrQY>L8QcymU~Wsuq!{4WGg$cyERL?EtV%wUupc$|xJ?umwSE3zuNvLJN6o zrAA|3j%BkC5gcPNGA-X3%QFwKW1~w2o>_)Ao7nsuUcgPj%m8h^wKJPy4Q;}bW7&3{ z+PJ{_xZqn$HZIRefGX_)l<)w@@^ta~;>nF0H)b|&8jPjv|E7?Hp-kZM{fyR-d-Mqa zs!id5$~i2z0Re<=)wk;o2e|p%-96D8H*U-t%;$F6`R=a3v05z8R*Tkkw7QtLi*@HD zBSeIaOD3pxMtAStJvcaME-w#UK>`>cAcY&&>*Z`VeeaEj@4xx}truPqM?Dh|YQ>|6 z$7iIwySq2XP24lUAl)^f#qoM|csSM$l7?HSo44z89-)$Iv>0isa&PNZSDtVMfUE_p z0q9mp30%X6(pbNSNP%&i-Htz{uBMFHPV-b(I+XAVUff`G87g-Q%g$9YD#gn4ZZVlm z{K?rb|J}d)yI*_o`Okm;qn~?KfN9fDn|>-)TF zR_{G|_Y*fhf8#SR+E`@~2YG-RaV$w8|eTnnx5TI97qq>5NY3EdvOzM<`mbvS-zqWhr z)@bQD?df`>7XVY8F?W;T2*7V`R_D=R+PQbOZ_mjPx6C=-=*-MSgNfbU`%<4GCe6$sWivA|b}Q%)8JFpFkM(-Z%o2%pd`iF!%5Z~jDQw`-jGQ3? zW2mCOz}J55U;SU-|HjY%{LlaBCqH?(Gny=sdaMK>)F_G&3~+~mB&b?Iu?A8?184?G zfpnWADkn<(`mMt{VRzQBEsq|(|K4{qfLot>sR1TH?%LFX-MV(wwX3;iA2kL<7!HIw znaaU*V(Zm<)8E5!6Ox6*M?j3c?AGU#Je$1wqd#(Gm*GGQ)VO&6={LUo<(ca!<<@De-DTeaO#60CVyRBX-c*&bC~f1|OOXvSF4pteY*rH#NCp6% z>U1RKvTiqP5qizzN!5&G@$h2343!B6D(3PTpn>;e{V#oU|KRyg z|J1Gj#?SujJ{-Vn@4olJd+(q1r#a{T2}ERmx?U_6rgC<6R04JDR+}6w~WO*PE zbDwkE5UvCu`bP}TaLXE9mRXfOUnId}H+}{d)fgfB5d7e)lu4e5RUyVJ$R0hM7r9ec!{#oz|`Zgvs6Al#5)B z`fNl*3^;ntp}lZ-Vk03UxLFrLELam)iu_)c*#UUPYL+Nqm6~M|A;`03Oplh>;Xh1vifRG|0BJLi0G8`bQYSc^u zt6A=d$PoYJI86>XyOG7xCW+Wt4dzY*QZX}%rYSfN>`Bq`u&Zj~${pr8rKGCNOwkZU zsocqM>YV@=R+L98f~7iGhQL3>$W4y2(aZ&maz`I;7?xmja6k$N0I6VB*o*QLF3&bL zn8U#Y4>odW@4S4-t?&~lvZH|%-QqdQ_gGmbf>C&OF@5HrC6^1L``LAH8*}(jslit8 z@PYHV{6Y+%#0DuD09>BKOE@y*RVB^0ZW0FK8@|OcxPlmlayhi9s=<+3 z^%zYLKmWO({LH65e`8h}9({m2vBJjUC~+MLL>SlTI$tU&&@d>KGavgFIb2+k3{nCq z-u%|L9$$3NJ@?%1tt;d4_}WwkEig~^N2AfGIoVm9oiwT0ne1#Xbi72M^Y!D4d$Z4f zbhiQsNFXQeQKter)o^uxr<&4aO{2*yDd2eLt+x*j4(#^TkNv`*xN&8YbAf9P7(oo) z0|Fx?so|is1&!Ainpey9qV3nSY^%k}dK*=v?OcEBbVJou2ZrZaN`tq+cT^3es%)uw zzB)cVKf74m8c(;sFBm(}C4yJ@s48E$T+LVWe!af9^NCO1{`e<8KFKfKy_VK~@$lj6 z-~G<2UH4og)y;~Q%cb$Cl9W)7_V)7nVz1$~*=n`gQ|;${`*^jsWk2!V-RYXA z-afs!XUpaB&L>9=W(Lj36&w(QaPYz9+1dO_ggijr%?cCs)}5wWT+ELioi5JSww{}H z7>}pZ=>|Kdgw|1w$1R-&uu(@nYNSe4Rt9z^)A!zfr%%;nG6@E_%cyIZA!np0?E2;9 zFkD{Z^56%WcQ9W*hUIZK4^xBQ2mae&&Tw#PAr0!7UE!ISC0hnSKD0i#n=$|<1JLIz zA`FTeikYD|c5*_X4I*-L7N%KaLi-K9_q})C`ThUugAYEqclN>g`FWKqcb5ywIh!nc z>yLi*;w72}GG#jPa~?d{_~@_&k*BkcgZa z3O6%0fjU)ArC|h-))T`_2cpR zdB}-`&7BEmDxr!H?oHX5i2IzYsv>eCz{%Y}WbR4WOo_RwDkmo*b(orzq}vBN%OcDN zo7CrynTaIlOw8skQsB`BG34AcbJm4vVq~F_(1Nt?!kYUCf9CVrJTFO34BR z2kDli?|VrJMA5@y>QNkttKjZ^&ar8rFgMxyfEYm_sVSAG$4iw@B)BbrvMRydO@vjI zP%45!PnlU%6XRCP87P3Jgm&8Y%-F0BUivTcrdMSE9}xnb(HkhzeJwPm4_rvJ5lDn> zEgOo3j|MQoa_PW1U7uh@F~;6Z5SYo=m`QYYsOL9h)>1o4Kly)t*%Gou^?}`dsxXDqsFWTrjpG= zVFIc_J(2@g@Yz5AEi>|rdX3;+!P2K$*XQTwmTsu3qkHF_ckUj%v|KK!sdsmG)0#i}!bfS+yXH2p+q^!QG@J+( z42m9o=i9_ip>Z`}A6$`Z5TkbVs=X#KYC%POX3Hxt-n@QrrES~e)AOtI<^Cw$yLWGW zdXlMnbhf;4eX>IZIuxj8m##S*U=f=y_ap&c*dR?X$7DoDPv?)G+#}KR{(P=;R69(S z^*{{}+7=jD*LCZ*&8oGzIF!)4RZTtJnXTt%$B!ShyEjhGS4k=}tX8Z2{c$*ObVK)? z4XXh*9b_oFtCG5GEp8b24>NO)QW(1j0Ey)@7)dhC(URe2NTTip(INYf#IG;`PG*)k zX;yGV7A>Y1LpoZ@E1Xi@uw)>={a^l@*Is+={rj)a=kv}Tik&<$GaV^sG9RtmZf|EV zahiYm;p^{w`2!jK;xGQayw}s9c=5kH*Y^Sma=0M7h7{EQtujSxrbx z6$}>1a1!?HDWwonR#laAcCO7`Va^l>0%kXICWSw8}vsSwBo1f;}5Aax=Tv89R# z$;}ZtdRdA{I4QfCCE`F2B4U*|z{DuwO(G*_=9SRVVsH%5HM8i5&<^^s*;ks!(rwO4^{&J32nL^i?LW@BGgppoDC)- z24e~7C5Ta_1cRA5#G)BhjY6&B8ajz6%BX5o=7Qm%P`J7WW^{MNg=7Me!|PFP*_g4l z%8NYn9DZN0$B+&1pj3!8*0;o-ve9{Ly+8hv=n}qc7>iAjkA}rJa%&%?m%&MJ5dK4f zqZ*i)63vE2J0A)q8@j^1kl{WgH{Z8c*y1fNX^BfLRL;!iSDW)wve3rtmq9X!G;{(2T2_{at2d1n^x8HS9@FU?e5N|`wX~S85;@FJcUOVNKPfy+Fpl`ptjY_x+FmJO9lqJCm!fdrh@Z zv##r;!m6FGwBphJ_Dy}cATm-dKHLu=WOb<`U_SF8n1K=`q4n;@o$H5-)2ELgzi{ee>3I_B;RPEA!E+J3nsgn->=s!qs%L zGrw3L>`zKIx5YuyMqdf|AkYpbG8)2FOB-qw^2A`v<&&q6+HSSXs**q>k_krv&LNCW zRaLuwt=22Cv4~nd8ZXsVbq8|x=qaSBSm!O=ot)g!D*)7toC|A6gR=Yb9+0vS#ZJb4 zx+5~S!P+qca1t}qk~%8UhQm&(>TZ(48Isf@Jz>>PufcM*063A?GOGAGp@hRA`# zzLJTx0xHz2_FLcj*1Lc3<$LeF_fQ{<$KyNKKhiYK^g+K^EY7R*_1a{Ss?qF(k2~sK zJb&r@o%1jK)BpU{orBq@zc86hZmjm$O=PXArU`&?Z*Gi4M4(dcH#6eoK!{aU0459J zYIYG3LReJf+!4dw&D0003vy#3M;L_>6sEuy${hk!IqyX;2nUeVjcEWtB2bV+Si%{? zQQ#OcG1$yloMMYZLvL~FbT+lHw~X!&&qf5U*kCl!HD*Y`RTwd|_1VlaF*`((Mjwm2 zgk}OHBH4|Y1CfP3gN59ksWLSi${f+(0c=2t0PrX=1iTU_;?5W>WKc7f;EEz6q1-Du z1LUM4$+DZdBvMr+26JLp)etvX=^%*<;MfvxGIJQM!O0!Mj8gq6uAtiWB4VY2WE<|s zvoo`^_qnen5h!~hUEDpZib#Pb4pYrS%t>7-)GB5c3?oNr#)xR+CrCuGKfE!e&>?dj z8Ww;OETa|>dAtR0;X~B)Lg2H5Y@>a-ys^>{r?^Gz z12wwg8)&P#VRnf*KqYW)n43*erhx20gHoW(u+P9CL8am6J`^3cN;95O&^tOsuh6-CfK{?@HqC0}>2 zW0#uq#ko+@-qx0zooO5$44xefCtTf|z5mu5Pv1EB%tvnp07{0g2kOF*safsx#y4NT z$o7+;e6`us@ECc%eD^zV?50od@9!0mmtkGTC`H_~+_Ka#ymIs9t5>gH4aovmsK=9L zbd-{Rx0>ppVjBW>GumPez}-PP%M zJofjGj*pJ+9gO!y8l$c#GTcymhx*DS^d*;lf$|5x+9PF_n>Y{m9YxeXDJ~ySux$Csfi2 z2O6Wr`GO#BMCJm5I{}F$v7~j^o}5?DJ%=PUqwlHDIT~C2(&%oNG6nXBd!6oR=& z*k`MmLYYZ2OjyRL)-FHz#_R9B_O%b+e{*qhewtR(Ylq`;MdTI4J6$j5=jZ3^l`j^H zMbobPet(VJ(fYM}Z@u!Z`uVr-%g) zC?UxxGqdM74Vj6|%}i4wz?4|cBpkpy^-gKx#J&ye+;t1zhisMsB<(Y$(#@L~~?|<_=|I&qmA)cQZ}|^4=_^L@>7;cIF|I0l-9Y=NU7A1j}73sb*$} z0v3HA!B1>>r>^Tn#5Zu1Jq7$S2aE;gwuv}V>xsgL<{W0OnllMP_|f&sGT6&MS#rq$Knadw_7&uCN@xVCmkq#WJ$+V=hv8Tvs9=MQ zhLaEl7p~CsFvDDt5@slRl;Hq98w}_cM@J7oczD)}$nB54y1&1(*OW6XbyZF2V7ybU zb}OVwHLGc400hY3L5M>u@6TFOSCo*Py!6Y6Kn%X_v>NT-y#2Ww2Rl(_S<_AVWf1rjP{018o`S5l4eqd)uk4Xi}&tx5^f_uVMf**bUHL|bfKqq0AB z1As!Ps9>S(-wst1WVf0GVT_%ye|4{_spBHqw?xipaQ>y=|NSrh zv%mL;fB1)w?>%_@_~xs|CLY-UPK(K|?F+02cs zs4?5Wm zwb9Y>*@HLVJbLfFqq9fz`Mi?0rN@sJ3gBePt+tExYSp!Ut8lHLQVY*^64&4)3+G+g zJKUd+aB+cpRWXqj$B2fAI#YMAhU*v5fQT5DlDN5>m*FJlo1rCzIOmOp1Zf5lh4yDy zbU8u?50V!SR)0&c?PX$#yL%$xLim_DD>FiYVdNwn^Gb5(UfM)GT3ZtgYDmf4V+Kk% zXHaN30E?OiAR-$Ugxr_{+qe>_N(48vD&~*CH)Bo8C8eQSOH}l|jI6zIvZ1CPI#`SB zP9(G#%nXLACWa*7k_}^1BZ85Oi1#WYCT3;{RSt|YptIo6h+%HzF>FNvf`?~lTn}gF z3?nArG^P?!lB_CjM3gCJ(~zoiA`*D1EQfa25W);*AX=)#-6@Hhi6h|NAUh?}%)}55 z)**D9p#cJci8ojcQMe(OO2{c1wMB#oQP2s^4Tf}0%S~7aoT!6F9*hkw=5R+EvTzSw zI>V@h`3-?dTW%R;-ikdFn^lV==XPk^un|PXVIuC^6#9l(!Zx`Y6$&@{L3|_h+mzt9 zHdi*ymo^^QyoZWkXDQ0irRL)ji+G`kpJ|INkJBZ1+Wf@_XORfNQRwNqoKmW`D_H}h zbGcYY50Ri-yD_Xh(b`7>sx(u{7bD>+}yhVmLs?fIH8L1*O08|b$ z7^BU-db?azuYK=9RgGW$p;uLPZ+BwW_gXOnm~ z?)8iM(y$@+3R+4ST-RxgW-U!vkY=3TdvJfb{^&r6^z zqQ~H(vF>#_&#S7clCI1%38dN*Gte`%1cfBTU=Hn((pDu!i(E-NR8^_GxOKM7kDfeP zEEczKeF4$0K~Nn!W#V#o$kvmjxWikUS3LXnZ++?i{-6Hs-}&8NU9Iw`K6p(BbFIv2+Bd4YTP?2aH?xnYwm%!azthe8esRG~ zx0Ln%gp=*09!cu)c#N*Wll9X@cWqi@;H3a47ag~V=te0k&<=eDDx@)u8hQHU!Of3- zeE-g^dE3oKb)PW1R*l}Lb-M!S{{1J1&)@QmhGVFcvOu@wUvY*xK{ZlI2w;Rh!yOQ$ zojQ#=h*4p2ae938c(PWQfMIYK z4%K4mikHHt#NyezG4Wuc>5gI%5fXW@tA!`Dv#=!UdIytfPed+I)k^A|vkydagm5Av zip7?5&LnQI1R|ymCPEN$bIR!3&MIQ&2t7OF8Fuq7NF(5E07d z4dlq>;W1a}V8VeQA%!K3%YxY}wHx5q!1nS^a$}7HMXf))ONVx3lfupJ@XSibBASKk1xC)$UzuKu8&%f~8 z-rnAo13Ecbe)#sogM)*~US$Lrbk&}%+x2>#FV3Ev9;*nCMi|bgBnq)ulWOU5XT1*R zAC4+Phij)!FdRR)_uY&2-qizNdv*2JCno@53~45k8M(V0n@2<&19IzgM}Da~A_uT) z`?HJ1)5X)2(&1`lW@Kr+-LJ7hnFbix;i|?gLaig!!?y+bA4`Z8t1AG~scN^pSU!F7 z^jI-U7!4_&Z`}!IsHBcu=;C6&qUEC}Pfot__VfL$;oY;vaeJJvO{O^J-K z%1v}UNja~uv;RXs_LH}!_2$kmx2OTpNKo=Q^oFWJ&1AjhnniFh-TCmt4;LNk>5NuS z`t|DZ(`Gy#+p;~r|LEPbv&WAgPwMfNEBmwfQh_kggHTFxn}^g<4E9{ogaRHdderHt$c+5)_cDRrXkr!ZM?cPBWUJk*y+ zNz9x<7XFyT?(k@u73NS>sYhBU%vDuYRn;_-_Tj+!_Z~cW_d92_tglaZ^gu*v+pVe! zUDvkl`T1G9?nyc#%G~wT+XY$QYuzrpd~MAR#r18hXZ#-^5`VOOD6^oY?zYPpCLM8pMM-xCrMIUK@N9KFQM zs#+3~8^L)i2lMTFZ+J043CM{1gxmt2L~Kzb_s&#A*fk|~D;YnRwgiyWUDqkp-Hnh^ z>bgGQjH!l*Q5@kb%FHpuAPEt{!tsM$Rhh}5U;$$|c z6{RRH)fEUTtR$OZexj=J4O&!p2-RmP^f32s?#?Oo8m+noaP+ojW+928dCaVYY?=ZD zgn5Q4Gk1MoI$%R8M8wQ)#=#}96O(&(cjiPy=46H{=KGPeu)CUhSk;?*&Y3xUR6`r- z?jE;qe0fF|7Sd3WxVuyM)tAsUNQi;Hi^_iChs>00koyxE!Q7Zw)e;c{8<-fkz-8?8 zvKq$*KqB@{giWxfwEZT`7oI5yu#^bD~~L*=+MFEF%)s*XnN zTfiu`RmM7Q!Xcqk^yW>X=#f?}(oWy3-~)FO$y~r1qmr6$#H4OpjLzU^$_XPNXRLLw zU14YjIYv_u;b5=c+oO{Qi`QR&eSG6~y*nAjCdFjd>~TeT)vmj)?X9Y^2m^JfZTcP* zLsP({Z@+c_g}X1@3eL%*tqm!9SxJHP_{mY#%tnma4(UL~bX_YlezLd#Ful4PcaCo) zJQ5?PY16S1|68xuZQD|t8kx-3Pf|)NwPn_ALk-*byO={blD@9XNS7vE7|tOaS+$ZF zu8CY%DUCn&(N~s>i*|+a&Hz#e;c%Sl+$$10fmUdr9A8XFSFXHp>o5M??6c3`xZS+} z{`>RkdbwPt?yRb+{^{)W^t7c(yL*rP!})5tcX)7q{2+#iDK7o)kFq698If? z`SJ1M(e;NYSsFPGeEgqHFpOkLQJG0c4*&+hQP=|14pF*%G!4TL{iS#%|*zd zm(!_?H5-Hwxh9d7iZX}@7JAiKv4xqn&pBre2E7Of&CH0ByQ}A_s;W`lG}Y)31kB%m z=X>A#-jjQ8pPruT3=r8}9Hf+}Vj^01nG?FFPfw4JY2Tkded?%*XlYbc6}4t&Ua$JT zzw++xSO3xP{Num#%YWuC|69NPr+)I4S6=x{`ip1na@W0M3p5ehnR#TKAh5_Grip6rMr>==fn<=P8>D|kycRzauLp^7(6pbqCJ>5`%@_QNIVLs z2qrEnV~S3r5=Q~ZmV|-_5Fk~Fm)i|^n{{pT&j^+@_;6gt>7m-9TiQt;grcQyB_gF4 zUC8!lFTej;5Ksz04w6g!VO~5gOT}bpnl16$FL#XS@;%!24V{7p|^kmBPfG=Ae=T5#7kSsrtPS74e;gB$xuiN zh_jIZR|B}w9(@8cID^!B#cI_)`QE$ty6!@zdpDooyRkRhLo&2&gf1VAYLPu@Z@&G# zccuw@qsib`05;_4lpzwOqeqV~p01a-M$DK*Fd*-oQiuNR{9$(N9=!a*ZK;4BG60D8 z-+H6(`}D#q2L}hVQRi$7dKuX;+8JM3P7wp(B$L_nV9F__{rTQ}KJQH@JG0BnxLwgB z@-VN<-mTYdGpZ*^E`JF)EF%>a8d#7<4VbQt$5Z#!s@>mdF5kTALNa94dL#t2>N_`Y z9$u>fG}D(p|NKi={P^47{`S4Iqpy7BE9>^`#?2R=pIlFso=#^j`f&d6;&?RL zmsK}Qa_p?8S5iHyM~BmWUG{JA*)a^~wj&dscGe^$&miGsnlr zqc*?%^2>cawjP{72C28C+yS^l?mgUuB!!|PhN}zWslaO8X}hkO&fC66nvHi7RmhOg zS1}neBnd$XeKwG!!~oK)@f!N@n2#>H_5{8-Up4?nBCg;H4p3${Gs;8+B(qrLnKtd)Tq&cfmbvru&eU9PNV zR!QA7qi(zy%~F5=-fOSDcKp_N-hco7`|XqUdfhCWuIuK1?wk!kTlsiA9$$a-(dX({?|)+cfBDwm`K4c*&C=EX^}qO; zpZS?TqyP3yrcHY!A~1F*&zXRt+GHmIA*uGE`p}^82TSo_)`_T(7EyAkt6mZ17&BuC zlUQ`zhR-1^ntBk#WTABm)=VL-A_5U9T-{;{YcZM!hlIoj83>gOm?Q6xfGx~QUe}GT zRZWGRKvC{uvKa6IAY9x%Yy8UH!$eDj+=>ifFymyRXkCDNs6!|^PIXXP@BkW!NX>~# zxx~HHYhn)pXs<-X5baHBl9b?*bB3D?MG(vIV%XsV5OItxA*SG(5HJf6LC#sd=w*pW zWUz?<2@^o2)&!_plH`4ybc!ApxJPGm)U2cuHDeYsZ1@!bZVrjgAaO5KX>!)U&O}t) zHIkT_XCh{hnT4kegjiJ}ERq6hn3;R&Sc$%&Xtl*4873-ztk^74D2vp#bX1}kF~}8Z zFrF%o4m>QxKrHLwI^)B7x2ZmuLU<^|7;G5RO51gfjrd~_cy4#r_)uBcbkO(~d$BR$ z@S#C&TWn!=i6l1hezV)#KiSM@*$6nci%{|6ZNg)#^VYSLpR!x3>hjLb(G3L@AND+s zS7T<%;PByL7eQs0gh~$!Z2=l=;iOBk!f+Hk&`|>pMKgkR=sWFQng$MZ{W7haM;{y= zp?T$#pS(d`O3BvRp6GaAvH@a3p2NsROloSl#@28XW-56tVYa{bT-_XA8I3Sd5f!SA zD!{VM_V%S>C)~NdM-CXC-g{I{rzs_wUb%bcPA6!p4G)IR;7Jhr+^$vxj0QT205nZA z8jYyEn9e3N;)X|)s`jpz;ZqQBlwTCCN|}p`|HgE2c6PQnZ`%2&o-9?nn6|yj z-Gt%on!RUAeYZB-14gp(u=i*xh5#@qoS8(Sj%*-D!>GW0w(vLD+1WWhJ(uUMwU6$9 z_p4w1j!X^@55MpuvkB<&@yYJNOu5c&>!ZeE#1A7G2omyqgJaJh+IC}ErN!NAj&J9(=2|zXz zhlMAvkRk?Sx#o_UA*^Ps3^2LY!m4fth&YJdn8=uN&H_pz*&JRfGNB<8gvP_&T|zdX z5J|=ItGswKBg`x+?}aTkv(y8iUb;-|A3pr@x4(V#)_2}|>#d`+?=F|i)tiqVKYn~% z(eCc znw%dm-+KT26KB-dD`2F(!AK-U?<*1YrbN_8qpD8f3q@vDxF0ohS0PRkEDaMAxsfRl zfjIX{BoN{x5Z?H3l=C)NZxK;&((y{DNMY=(s%7d7Vp7Q_JY*&&qHJbnlE^~O$&`a0 z5OQ~M>Q^loQggsM2RX>iMcAP3E=g2XIJtWS(LfvFkXc5lMadwl7=e!{xqEOt(Kres zRSmX-xNJ+!0Lqrboq~!NYWQJ*n4FA>G{+gYhz{mXiDst)*9=5%N<^k* zXdH8lo6*>Am|nQMsgff0FnW}kx$nD_l7`t!%%c%FxoX_3$hik795sMJ(CSWHAVsOG z!JHgUYQ~YU2&)+ac7zj+T1qKuJH5jRJfPH#g-N!|#6mPQsRh`z@U)B?R>)ieCkDG% zx;*T%c(EB$PjneOBhQX3Cl=xeKRztS=9lrf1W1BNq5225613GPvMrT}^3A6GHLlP1 zy?l#G83N%Yyx6=419h>@k(AIfAdkY45JZOgEQ2aG*a#k4Z3AX)u+(;KC63zQoWQA= zq{Z1Fza(rn`=AOVq%@4R6M#{1!EG_%s<7oKve{@w4lN6|&aH3rA6@_usL<(+fK57c zGcrQ6Go4Iv_l4UB*6#14(fXZt?yqcg`}QFSsfw%bt*%z9RXSUo9G@@x>Ds#O{Z}tG z=htrDxbo!Dh>N9kkUGWgU07;pw(0K3GaPvO!Q-y$ckj-KaGS30?6L!^6)=jc1v0k{ zrU1Y|KflaMCL8i2UY+TpU9Z<|&L`7(XJ$QYcCdE|1;Pg+KE$GKqYCCU*_jq(vw0^Y zw;)1lwkar6?G@7YU8|jzJsY$&3OM>6?nHtRpcsus5A8kIybTyma#;U+ccn@qBmZcGEPA#bR-GN}5Oe)6>%xfU1H5N#HU0 zEND4gf?A^iE)LqGa^DhNESI_KEa#);q65$N_PMHf4RTNdS4}pfrjpfK9KCy=^H^#S zp%zcHdU8~EIqQl=FBZ$DX?CjG9jMSLDmNjt(PTiP=2q?%Gc!o)b0#9Ll4>uKG`p%s z+n^HMjo95xq|P~K)rhaY38H|xgNtWVq`PX)112eS2q@=F#nPLUTve+q?b-6)Z~o!8 z{@@SZ_~3WH^{sDJcXTuw?VPNxj_aA!BdX@qO9iJ>kdD_iotkv3iP~v3&*OD!c5}{E zTO~^K(R?%-y|lM8CD(q8uFH${RO_^F@&iyyuF(L2WnyyA62RWYwUv1BL# zb-0X>fkO{fa%Ug#1sp;N4iB4lNH|9^bDu4|Q-f{@m?EGsGG%}$$`6;S{SYAlNY-jf zNnsTI6J`{%TeA{Heaay)GohH6X9QG2sw!3G?wUma?XjInX?;;6%uJS zcVx>crATArS`tA-0CIQjjS>$<91uYbW?m6I>Sja)B>{c7V`@tLBlU13 zVaw4BQmkhOWUTE;~fXOUTr) zT{kYIO+0($vIe`r%+ZWQEbh(>wc=b7YzR=y#F$DUu`nfDUl$k%aCh@)Y$=07Vmw5M zN#X8^#Lblz1VM~zDNmCKlbV|eak!Tg(boMFy($!WOxhTFqPRYke-1N5BAjoejx@+l zZ5WpoVN7$3L4prhgz${&0-K@+ff!_EM%s3e-hT52Ol-;xHcU*|6oA~H`3Sat9vDAt zk2fWfylMU2LJ~H>tPPn=gYecJIhY{@3HlzUs06BL0C?#Tp^fO3K~~tBzE`e=s0=w% zMO|Yv#8?$b03-1*5e-8v&<1sjm(g_FrJ^_|o2v$Z78q|h5%TCI+bjz0L{(f-{V!STca zq=a_9n$+Xt2M@mf^{+qo=`Y;4aqY%)*8wC46umHj)jaoo@8hvY$&W9uW~xV%aUGC* zqq*TW_veeJtL1n+PW?Ki1oeL1ZFjmv1`LL|3KJZyuG+Tc={|sZ!*FOh=jEH@!y>D$ zm*nTiPv3s+Yk&UM&l~^<=AbWc>U#?Zj;>Qq?1c9pyfd9npZ=3Cz4@!(SkqmZ;mVJF zG=1!be)$hReDCe*{LZ6CkGk)t=5^+oCz^VQMQ6YA2#x1W5i`QW+d zo_lg@)ILY)q=L~dw>9~)KbVbo>t;`_cD)eF$pyWSr<1Cw8E7g5XJ7;kqm(d-NpU~W zI-?>Gm$(wpe`hikN4r?=eB{o@e*8zfCl6=!=@$D=V? zs?NLCwS(oJn3@`#483-BUBe>7aaHA1sVWCsAE`D3=4vZpR*zsQeXr(`Tt!(qqy_{a zZtRE=q1DS~03;hC<7Vb9IlrqhQE%bHVImw0wMgaeW)P{|jKHbSI$vDBpvUKUuv+po zk8a47D|`KgnQ6AMna!)Fvo75rx^{(*Pr7AaEoT$YkF3hZl{8E|HyceRhu2}&%}x)G zPLFJ~e)#zDasiaHG8qfG!WEtzC|8ime9=uKr0hI_!lC{vPLVY=8|kPfd)0ZlxEIovoGHUgyNM3WNL9cs;Y3Ms#3(=qH69)N#J?o z*+s>Dm02WI9cgH6adWSn)9_9&ZfQgm!{VbrQ&um7ki^Xjb7oQ?oJ9awzLeOMA_y09 zVpCTmVpS{5jhcvnSk0rAmqAPnQzinHev|-qfyX6DGEr6MpuY+bv1|oUY^@tZZygP1 z7}(5EKq+v@&`KJonHgfGz_HEe7Z4z&a3HCG58AjXIGAAJJZ7b26M^~rsE7?mQi9!< z0%IE!T)xepk(+E@&wOp9Xj|jiHk;dKM~Jp?%y7K)EKyOmKE%l0l&ci^Jq7@@`8L~X z%Dwcs2Y|t!k-*Q+qvByWTohtxn@x#3UrJqxO5Gd;M5Q!JV8KEWVVsb&EqXJv>8O5q z|D6xsJ=@*g9pAaOe|T7>O#z63zG|D^9z1xk z_jDwA`N8@@(==%^QX!_QUFWs05BClaXQQ0EPAZqG_pEH$bCH4paIj;TbO0DUK^axX z_E_Ai5^vQE5KtpkRh^ui;9zoa>*GU@T zsw%vu0Tmb65l%eikuhW{H$W(5)@`e*X13_EksKb}1W+@?VGN?w*$NUa)k2xsLC$@f zF~Zz+-0P^zZi_1Q%U*UyHN*SXni02c89UXMT+~&2Q00!$DGF8Y4s~YE)^Et^@GFX7 zq(z?ZrNm5>9c~_uN1jOCQSy7FBz3UGuyp#@aiW& zPS^6zPQ9Z0RaG5zr_;$eP11ZmU(KhTc1O+3Yd81hs#$fEz4ltYR^eTv{z4$ahYj7m zwo^$5fv%=C=!5h?uh&2RH~#Y8XMgC<36?7ov?8+Vm{FOoO9|dR;y6-xZx}Oq_Qc7e zg`x!a;J8&pa&M{{+JPvV7(}zOIFclx(E9r@dz8Es>k)O#V74Q5W@fQSp^b@DxXAmN z%`%avBtiRQ@QP0{H#Sf6PCumC7pRC>6Zkb$TNL?o*L5a!%vV%8iwCRB+4WHXjR%n>ZM z5@xr+Mw%3R8#jUYu-9H_eOD{s!!92l12}Fhlq8;FU34$t!NNn78R|q#Y(VsP4d9PB z)~wBX9RYYqVBn4v-agyGSdbMmlvFMY8Jqd(1SV`Yz7b~x3Mru%LrdqdF(0-P%!hAz zdu)S%;b6tuc5DX!v{EDermh26MqWeRtBUEcy@p|QqB?qP-M5N zs-nOYI3NQwJe-@ECPY{hOuPx1^Y+mxDnU$JPZfaTx)BtJZX1pThcSRQ-=1P+FJFDk3N6^x?y^@r$=_zwq3Z3LkuM|2=JP-n>F%m>6KpsK$~~vWE5!ra02P zZn1f7?6U;3Yx{>!4lT49KZw*LADa%7^QMX1Ib9Ob-KC ztk%<=-MZf5x*WL&B|;i3x(DXNoG`0i{^ZA&nWmfjhX;jFMyGi+t*F(D#}8JsTZ=Y* z{3pI}^W)E-+<(1wdVKTR?4`SMcth#r%4qLGPUh=G8GH4WPrUdUdgHgAJb5%vB3BM) zPuEkh>@_!=n;-w=)#Y+|ygqKc=~in&%S)|A&b%=jH?zs&__%RxRIP((wKkh~mqjjB z7_d2Y8~mLTyF(Qj<=M^|V1RM$aCUNX;(X`k-4}Mg{mr~uo}8WTobFB`rzfZLdG5OP zqQr zw|WAt5_WfYZ+_&}B}sL7rm0)EZ*vX--ztCB*` zUEO#*npHCykH^n9)&69XE>6$R&X#ihRNK1Wz4y+$?|t!~{phP7y}EvRcXwR3Q4(D_|6y zxfQPqm|3cX%l#O=59}U6H|)2AP(Wn!1IK`zqX6Up#2rM&tOc2d;=+plfC)wNOvT2F z2eC2WK*1ko(Fhu$HuRMN!eZE4RONWfxV1AENp5+=LwEp!IJFU~#oaT9M_~wk2h+5a zQd|{+g>RU`ap7!=hdMI{OFlq~ta5=;?+Qo%0n!vvGfKMCp} zzJy|z0BC54EtEs+s$SPa!%heh48)fPN}+i2(m2Y&=xn^qOQDKN+i=XRPZO-y-gh(_ zl|sqpNaNgU9!AG;i3gj*e};}>fH9p;nR&WbUAu8}@5=08g!^wlK0moQyuDjBAxtT! zugh)Quh;YW{NkeRCvCs>%|qp-RT4m&jKuo+$L(aqn*6<7hILNsTQi004Y$?W6VTsPFqajapk9 z)y~B;Ju|PZl zk}zM&;q@18-q?BaWVV037RS+w;|f7| zyjGgkY~EX^F-08!43aGqZ`@D{l6F)A$)ygG5^5G?^feOWa6CJI^z^L7c(?lW%P+qD z_S=wj^yH~XwR`hsUMxmLqk5vfuiAFqP4^n8f(K582LIod072aVKwU%>ai00=;k7F} z^~s~IM@B{0ug}In3JL_cez>-yfPLpWP>>vgj?>-!!PWcWzA?m@;-TCTg@eXg1N zHm4@Ft&@nmhU*toZECSdZn{W7@R$>xv%4o^@3XL|8dpVG$i*U(D2GO{+-ea)A}!20 zD-HG)?gTM6u$!5ND9|8LtVc1zGSX~$G^y9xK6!F>u{K|?yVd&S=xj6^rDY`|-C??O z`|h>fPxO6%?f&t5@4eTjsj;0b=Oj(@g_mD__0_$_v~Al@&EF=Xdr!|jqtD`f^Wpj- zt~NjP@n2k??B;$U&3Ktx)I!-SPQAle>QtKC8mjR+sDqFpe!yRsG!E;mPMHUWZ>aw7sH_=#xNO(Lct zI3bV5RQKpZ^zmR{7-PVlL=b8bi}q+5a>?RwNd-`^8htJS0$eCtk!X0InK`?= zdy1M=;v^D~G8(toBi*MkBnIH!z6k zlH1spFKjwWHo&6%D%?~9`V5U*@pGXvfSv|+fp7_Ew!eJyoq;xJ=snran9YomNDZ+| zP!flq25ky6&=*a3%@kjPfOdDP)heGKKWuw_@Zr;I*Kb_EyLZ4Qo-T5%*EJT{muCDrfE*?QAPVlGSA3&oX(y+`S#!Yjko{sje9q)?(AGSxWc3T zJD)qkE4QBCR_Saz&&Fx3D;|&8pAmW>_g0TXz6}Dxj?XKs+kQ;dM5gnjN4i8i0f440 zjWTVY>na2sd3SdQjDssp1@x>ch-cPD@F_1C`s&9C>;^?mVnZEYTK^WnkrnGO+U!}`%h0_yu1J6bGMEj-T&^J zZ?4zt>QkS3{_y$bVpUIeo}274q0hbdo-0BaTr+a$lw+-^QPl;wcVVOk02nRs=2yP{ z;H&Q+z5np|`1Iz@+uhCCQ{eUQzR%2e4tD_b$fjL3Rky?qfShw(*HGk(^&-(C(N3c5 zIq`^rUX_!0M#YqK4#~g9DoPxKCO9%Ylu4}{iH196&9KbO&@9a6Wa=D#1c}Wfqbz(S zj0oKKJrTK6&Luk`sJe2GSpUHulR=^-GxQ(4mc?rR;A`J_`pujGot{>BEN)pQMXsJh}Z-pSXGJ){Ac(oSmJ` zMvvY727lHbJdk%5`DFFomlps1zwtLd_gDWG5xxAKjH7lrQ8TMCvTVjmQfAXo5_h*g zroqQ>j9@ZgX6gi%n1fS_`%GpYPS(t#sz#XvFe7)6I5@nt1~C8t29sL}PLZ)B)4r;z zK4O|s&YB|l0nD5&i%3OP-}kXA!LjIoB$(O2;bKU{Ob%G0Dz17;$-I|D(ML9r1V$hy zSJMi~HKyZ~Kv;B_Lt8U|(se=*E-DZYKE<{Z9Bi10SZ*mmg?m;Fi&jpl_=|+VMD{ zKu{j;4k9OT8F3e&Ey54GZEPGa16W8YGXO&J5-c~Z8Q996OV}H1%YhF6p6# zWF9QWw;eK`RHRb{bSELzP5_ zuMPEv#~AM7&N4upVGnc8JZiwOeyfgy(zr_-%WZ*X2a{8gUpW>Y3}{W}{G?xsbhgr6;59VisHI>?Z91*+!{&ffX@_g?rDcR&8|n+JsX zd_6z6ogFsEAoro2X=7&9cvfeZX0&r~c$vS%$YzhEpITtFyIa+l0nZauQMdW)U-`q| zet7=J|M(xTUuXc_Y3gc3FmSv4*5i|dgM%FzPm+kC@4#b_z%+_{9W^-r`s@Gbpa1iJ z`L#F9tlIa{Xmr5)(^u}?*u63ujYi|c$z<{u539p;@;l%A$LsZackS-JnM#u9PDV9%d0Owcm3>SF<;VnvZ*A(p?w~W z8VA<7pY9Eo?&9D4?l;ysXTfj(vw!ff|HVJcckgPyP#P0U4NI06x>)Gq>a3pZjd8R( zKY9egY;R_4E|f>P)i&4KS9!du8Z{bG*LNcy=Zoe2dk?Q)zfM)6RFQL^vp4W&TO6(M zRHU_==6wf%I+Y_O!HA%$J&*_^!G!FsM^A3w-QN}Y@Y~=1#?yTL`t|eGs+o=7d+*(& zqodFM$WP{+>Ed+OkTICSJ=}_&dFdc)&;b)V{k<>!-tYa|Z#{ne`0Mzvs;XVydHVEe zb1~9gefi3ZdN4&J{uzuyE)giT3KDDIqMIm03NzyHEmAOru~|z!>D_z2}rtjC$`arBW1|VJABirtXCX=R`T5aN8b4Z}pP$!s zZ)ay`-8TvzHIu$m>|y7{gW&0}Oq=yOpF1<6p8&lZfr9Umpf4oms~d9?B4;*)y{DMj z(DWO*mw?Qm;iM2+%&2J;S%sIrq@v*%>i+I7L=;BoKvFX{1G_;haIgsgNZ8!r>_&ag zL>!Y#p}o6%(Y&!4xPrTTXb~cuTSp8yW$lfOiNp0V#G5iC8gZ$J2w^-*z~DR1iL_Vd z+THuUV`dCh6?ZH34@aCVHLbYGwaQ(sxl2Q zRD3Z|*mS3mGI_{zoIoHr8^9WZ7!malNLx&>13(n+z@fkmO>J;Iq%q}=$YADWmVGI) zvBO;gctqe30L5x455*&OFnH)M^;}XrfRMLlK*wSiS!2{Mbs+hDTVpY_rNE)KJ@(-; zuZ>_I!TbA4Bpd%w8pMzBD&ozJ?F63rbDQemfkNNBp|B>;T7H#|n_;G_5AXRtNJK-p zqA-I;abVPp>>zSreV zJKP64;Z!xFhM5y4DQ;jYzE+(C>R;%~d z{c^ed%m15?-^R!GhHC-99I0ZKE$@~P6%ne@sBy2E+U2TUu8bxaL?KS-&Fh4|#dvp& z3Y4{C{Qe)j_7DHzKm5~w>KA_d|NUEEd(=-q@}uW^^y)`He*DfG^Z9!3@Y5&f3s#yn zQ$;>GKb~qo)&BmfPt|xNJsX;3UZ1ZQXI;NSvN~Da@uFkP(RDdBTbA7OTlh%)Ke&qQ(ckWab zb~3G6s}H1d=@xWxvD)1kk4EMBjZ`E%ss_;-{~Q0=ul&ld{K^~mPv3p_-KS?y0JLLc z=I5`!c=hVl(M~g&O#0tB{*C|VKREo^moF|Z+W+>ApZ@8eKA5NRcsyS(>$;w5J>t|8 z>MFr8P9o#RE0Jc@J4TEVW91Ox+=3krHP6D0yTu9_v$33FeG6Mim}xOt2Bj*&lo>># zs%l^+F6QMZ%H`6Sojg`QQjs#dE5<4mtpi7r3?fo>N#btdLuzJdFx{K(e)`3m-SN18 z|IIhwwC-%Wvy+=i+uOSJuBXSV_QSJ_$?{@0o8dIsn!A%#YAM@_v)0|{;=y7)U$5uq zecdf-v1q;3+^*LO#}OYt$d4Y}pG+nu`#n1G6v@AcM0gd5xpk~532CSri;PrF5xf`! z%oR5q3P6WpcB)D)AtMM`E|G(yVWi}@7%K5@D9&?UY8NS`nuwEygn=QY2H_b=s@{8d zXMve=ssNBDQw<+l56_G)6Op7)T~N-s7*!F2ARhUAgu_rhc$8F_ILH}dA#iahWdc#| zO}H+FHx_eGakySY4;YXalS{0HgK#61j}q;m;LYVPDtCm*Ad5)Onna58dax<++f+2V z(L|SXmQb)#F#)Sm3V0h^ad!s0dFk*V0#&M-iX=f&s+>SfV$SingGQkkWd>mC%u+_L zaaj`_D9sE}`H^TNiYk!1dsQaXurP;hEe8rCF*7d>v_?$9GziG8Ocjf*utSKlfmw(t z3MoDadr@SbsbqKq!{Lgdx2P=2GKd%k3{sZwFuCHP&~Qmz7&bzpGNol3i)NdFw_UOh>?O36<$9CjZcUtzHy7iw;j?jc zmxsRjJHs3fR8j`5(SS6fm=fVR=GUwp&J-$*I9r>-%?3J@9Y`2W5t9NgI*7m-B7>)Y zbSYC2RcyYY9L|P_8oV)FpgCXO(uO1%EivO4qQa101e6hG# zb?cM!Q*HL4^;W6EgwTh+TmdzxJ$-UEePM5tbZe{QtGkEK-@xG&0gx5z^_PC{-`u%# z=ed`jzx`uBajPE9rq$RgF`z>3HF*+}&ZB)PCJLcxs=K#uu2w6hrMtiX=;8gNr_}XC z)Ms}0#rkYEn)-MX}nx5+uE#>Ww+|tGo?%^k$dho^dB=>-qP|$?SlYA4Y*l0fuR!A ziCPqBa=3p`JwCfQI>OB>pZ>X@efZTseD&_#N~S`1<+C5>x5w86%cXwhJFnlkaYJAF z_(6j@!4(zIEBXYgG4cGRFMa9vfA`BLPtPt`Z-3&I+b4IPK7D#77khhqdwV;nMpN$q zu-)0!*~?G9{{DOKz4wtn^zsQrO)y#yNm6(yz|5VU+`OX5 zSqUL0CJxnzn-h~csaj&mU2jgz?4)&Fn;AQG^Am69{j^C}_s0`jE!#Z5U^msiZC8^? zE#lp&F4ygcAH2O>F1w~boA%mQ*3{N_s+zpMwmm zw4W*xcBeu@l;q6;b2s8LD|Dj-OucDh3I$^VZx9VYp{Xf%sz_9$g_(ujKt!hIF{e#6 z=P0kh5&B4X@55bV!~c^IF^BNe-Bs12fz*luBQ~cx;DAA0m?Y&~+&fgWgb7%*o$iRk(?wet9RGLe5aMz&0G|avH z8DTYwHQQ#R(f(=$gu(R^toY#gx!LVP!_Z}%#)cgk_>av2mf8&!2M!q8+)<)UL#+Fd zNo(OtJVxFF*&zuPiY8RUsyRyUQlNtk)@tr(2102AU2lqdC=k4Yu_Vy0;u8mi#)xm^dXomyry6)+dv$kz7&QDj`cYU8#H^n0} zsD`71!!oO{FBbh!`z)A@r6JXJaqs?vX8P)E)u+^4d;W#}>(_Vonv*lWc6c}*V|`W( zb2rZ~3OY;xdd<_F*-q|HA3uC@dc0gNck8C_`?fP?zOb%o8e6Z=<)~F_n&x8JdRsL+ z-O>Ax9`Eh;eSfQMXKnkp|D*q;J39X#{-uBK=9ST%4p8)rD#3eD+JeOc%Iuf(b-NxV zv96V^gKC#K5tfQ~9=w0O+x`9D|NTdAJ^8{HzA*d3r+(ot{Nm?dc@Dtst5;sSb9?#z z!~J@1j$YAMX|4J6%H-8rC3U(!KC}6J@#M*a$4jHMPFW_knXT9BemV7SRM*zjzFaep zcg8$g@Fvx?bCLt{Y|<@U|m%J+8Y0Vh}@*VzN(B0abvJSc>46cr=+N(CT82 z+gJ8)T$?t@x|J}WpJB8EuTx&2U+D7U{3)FFZjN{McL~wp14lHKqVqRj`^JN(=O*LX zZZ(}wclWTfw>!sqO373b(wZ;aeV^-QJUUrU_O4v}v0I;cb@bf+=yX)c2Lza6u^x>^ zZ3C+ugs$)N&ZM0*z1w`f8cAIN7itv?<#0`PkR)Qu8ku+QLyCu-b0l~P6iu)^7+Qkd z+$6K9h4#X7Ss^1fscKjo=)e%WyA5KyLPeVqr0wXPv`UbqFpoddStpIjaB>2F7I5K&*vwP^z3YX(!VVtZI`QhwCq>RJQth$*AH!fHtWVaRmHYu)99N(w4KM$vAzf3HJdvr`Jj$aFgV5H zVv3Mz_H7rzsLvUIQ)v?%!itKi=gbXLG!GMG2$k5tiO{Zl#4>e)8JGSJD<_l4GZRrF zvmDAJQwPF@MTHnd&PY}Az7rvHHg#cx8&{Q?kx8@~71>0QY1+`;OU%L`bBj42R=9W* z=Dcz27y^Idd4$u%~EHg{QaC8B@$fCUW48K&`O}8ei^?k`-4Hc)@NoU%0 zos>Q2QkjVB9QLECCLnmYQWgwRc}!0ZzEu-5K&D3Vod%$ckJ*yMmQi+bo@1rbXrJL& zY^gv8Ag1q<14fBT^F1-eQScB@LqkPFSlyU_U2+EH{*McOX?n3q$BR=1UFPVU7g^z2 zHt&xDiVY348*mWZ#?a$^spr^aEPOlfomya;f4SohjUrz*)mHlfpD078$COk z2PVcirG$+W{P3w^s5)YVSl>tESvhq^pme#D1qLyMOO4zOJs~xyI|-kI4a_QTp2IR2 z$A}F>6Adx~A0)sAbbZe2mA0dA-9Me(e&rLNeZ>rQ*PT9n@L+P~;2ShGT!S3kV z;Y1A6dK4v$O-f`67zC8v6DT1AP_wG8D+3g){m!UnnT#9W?K2V8T}~;jQw5-Dtxok^ zTXC*Oqilu+kJ9vTcl!0Of8FTD!IhT}rlUIcT$LmS6aijjMi^%9xZ(Wt?CH~|kJgKQ z_TqPD0I;&QnpEjz+24Qqz(&)(yFdJ=f9TbZzr-JY_r({l!Sg@=^8$4L_}u3$W8V5Xklilu2M=XT2)nbbkUA;&b*!-j&6P8)zQtXR~D1H zuJ`a^Wt3-0YnF@E>Eh|KIh@RzRE<(K>ZLyKR8&C) zNw`=G2!QByiw^Yc4kC5Uo=ZRtXO#$srCbsY2!dh2UuYM4%~e&Yo52%|$sug+B1t2U zPeh$FQ>X4us{L{~zqpvM*KIrNR#;zji>j&??Mdc5yL0GQCezai)OjNP%I~*kIWL?3 z@cML`cG|YxS?k5c@*=l1Y1+P8S$;(G`?Xzr;nSGh*qLKr(ZN)R733_*qag_df-!R+ z_jc6Ep-!BLbIv4eZX}WU6Q?TYyh+XjLu3btBndgYCol3r-)4@9wjycyDN#)3H8V11 zBU2?nVJRgztA;ZtNB)x%0*){ho`jve*C6mgNGVzPZ$^0{T(EpldTz$3x`_-r#TYvx zW#AbRxr><%`hr-^J%VCNDVlysIHeQ`P??V!EwvssmkNMp4RXu|gUN`>F%ZEpv@@x5 zVl#_zs&TTyIbWy@d}&e{k(b?wh|D$jDW#B`1o(}?DDK`=vME>%R9^J;LQSE@B2i(Y zBml?^=0iPJMkEmsS!w0Mz(W8YlVB?rW^#3p`Mse(4bX6D zl2(umqnNwL=2P_k1Ro%9fWuUsK!m(K95n*1m2!X&_U9Xt?J^#DX0%MGyU{V`MoSTUO~#E+N206-X~n(DFC{QUEGc6N4l8vuCl;n~^av+?zI)Tf51 z>pCQ+ipS%6Jg)WpU^d%7m`rw?OTH)5#?Bf@xVkfb_}<0C2gi4A?>i5=f#ul;PgZ-+ zy?o>47j|l(1u}LgyN!T=zFVFwMl%|Tcc2ae8{n>tt#77yin>```IXzxZ+w z^nfF0PLp!CU@-45lZOq=XqVl=?%|HecfR`7AOFc89;I@hSf59e=~>0E{^U>Y%`l(e ztoJGaUpTz__|3QeqyN|My!F;wKl5+@;?Mr<&%XLkSKs{RH|^z@_xJa2O{$Z3AOC;Y z`qNljvg|wz{MOn#BF=c{`{sP_XK@usls>G?tVhzcfb9|X^e)rw+%n`BITK!|~ zIQJ&yi>kzZ_nv!BoH!ACul22Om~8Ba-ToeJj>griFFyOs<=tknv$MPV^y}{_g#sLn z#v2hDr{VWt)XU~bhEo^1IV??p`#zIZzyv!f?QNl zV69u#;O-_H{o?re_-MA+-X2`u-8nowyi(ZN+3Doo!Duv^$g1Gt<(FQXOeXK0H8RDL z5dsoW_480WD(Rxu5F4w_()RW?OLO{YvZ_-`X)pt#Ee3VZNN06b#Ce@!j9eB%C`SOA ztt;1yqS#y(i^Zb7yjcu42E*#~;P}f=Cr2Cm*LQDEc=J{js6j&%ib$)bAgr1sVj-A_ zFc}Eu<_@s+MBeP&N>P2g^p=%~V}PleAb6u?W**X+Zy%MGJM28CZATB7bx&ubz&>

oy@EL*MRv{^17VN?3O2-!?2$+*`vO{sCC906eFUZ2%gy~ru)w6Kla z*9LK;ou8FDq562iS|e3Wir_CV4z7Ik@@t>{+0n+!SDyCEIp*3>$bDYp30#!@tdmH_ zd;-+egaAq=Mi5KHq?S5{LKZeEgaXXeye42K=ZLyagj7kplqg>g&b>F-8)FEOyNENl z=Cz9#AEs*2J30!MTRh+Ri9Jq?*0xuc#Ikfn#8cBeER8Jp*5tYjhu@hXY)Qci7!hJz z@69cdV+tXqWE~NbIqOG6QZmFCjZ9VDgT->#I(6MM!`;B5r-vvk$?-=w<(b^xzU;1 z$9g_t^0Rz_)0;O!#}Z-yQI@{Yx%AgbN(pP)4va})k@%eF;SQ$`u>=MX0|W3XBU@VK z0R{|N`jtmHfdO(szmhXwy0xj>w8DiAJ9km;hi))KfxPEk6iU!qDBBUAeNzmf6V|S` z>W;mEf2ZK^EXs>26-ZaLIGt0W;ruZ!qt%Z8p`8J6>1xiO(c!EQI z`_{2RfzA`w2P<-ZzHkDOht$~Nc^0JQaKu~j{MP(OY8^yZbD``2y^D%RFc=gXof zK*`W1v*~ntI+-l%nuOaWte2030+B=X5g?=*y<*g;Nqg?gSuYzJ6zIp2;Bq)o8;*J) zoIQH-_8^qrUjRX)=sBwXv>YVm$13Lu{=2m7EIIW!Qs=Y zD$7z=?u^{DZJQDg%3`R>^Z8MU!^J!tKPrl%7Fxyl@sGa>IM)$v3t_|nk?u6P>Zb)P z*HN2$oBe)&qZ&Ruy7%bRuI}XzAvKa3X$j*EkYGG4k^;bdGC6qs`1sD_v%63E^3_xo zFMam2wb{L|KbTIZ1<|FQoqh`KWPUha+4itFesb^KcNWilf3MdY4RkOV+_-dQ@QL!h z)#jZ$ch1VTr6lvhDn>bz=1i^Npe6U%<|{U-qRiT6A?KJAAyAHqPwj=t8URCNGJzQ& z2nB+vHj-k95&Ih(CyyV`mi5m5U~8xU-uq9#_Xpql;DZl#BLDc0|M+PkckkW1bnB%n zn>!EYZ&_L*_FF|&>f~TKw`Ntxaj%tTL(9-GOWO;)uvMTC+kz>?7*o?U zX<1amqKqMOjH5UX<94gF1c9cP`7HBQc{xYR&IZfWYD+MDcarnznT`y#vU#TYgzYL-6Gt05wUABASOn z z)9Uhb&uv}5K1@R{F^tElvXf?f_;eQLX>xK>->N?L3qSkAum04azImR-t(}%5PP~9YP}jG2y^wa6M|P zLV*lu9pR8fv}V+Mv4;y+6?acgj6_5^IhmlU1+ltLm$FVcS<8{T=8m0{>=Qo9Uoca^ zL|V=nf^dQ@#Xtlx1?7@1-wZ`eDTO4eD#5GGjM1B!c&59uieEHDmO=>TekH^NxIc&I z&1Md{DS(iR1_Hq3g%J^v1tMh7z$*}=2*8ID(LpCWBrYzAnQ`bM?Vxh=WoteL>|8QvWu+6{db@9oK}L4 z089VNhjH<*GH}+ZDCL6h#rlycB`OOFK)O(%03zU!wK-&1CKyve=Wm}Pj6f-ikT2G4 zYyYd9W64Mf#-Nbnl_ayI{khIZCxr9D$f5N@0fjIY%X-j{^LcvfjdzyM)O_X2mFHgA z-T=0m^2+UJco54Ku>gcnf?B8uI208?wdrCyS=6i4 z>zDmO)gKQ4B#r$zrj^uZi=r2;MX09*^C_3j%4W+{F51liTF%dwZLilmT{PveFe+Mf ztQrz7I+$~F?~FRj5&#GU z6KN%gWi3hepJkAjQvwMHK*7O8YY9|f+$$*6Cv~%x@ItZo`9FOBOK+e3@DKm+_IT90 zG#WfSIDY%xJEOhlp1ZQQIi$2o(kFsfI#2Gs^*g`wJMaA4@4oYN5%z|r$_ptp%+4h>1tfFR<%3goC(c424G@UdjIMupV-fx05E${XigazWVT__EAFKtzW zBH6~EC$&-$U~m^o5F$k;a0sSk>T$WG3=orO3<1%@N(n?P-o8?ZK^^)tCJ=KpBNH&Q z&k)E=MTs2zHZypvL_lm;DMWG$a8>0P*jNQb!PCwGq@*ZeX4S^_*5zw^{cC5vW$bB6 zNro*8#>4*1L{q!Hxv?q+r}IWFE@w4wK`n*7@uin<Q}AQ};<77suL02QO`p~B7$Ff*qWmN6zLuQGzFgES7wQgTQrYfPxmRV+M#vw$JWM&jtT`mf^fv+WG;}8|^`+Y9u_?Quso0c+zebi+{ zQ1hBsi6|(su}ftA?3xQ;D99TBP-706r0GVmnf*zC5e3z3hixG8OC07?j_A(z997la z#YB}uG`B#@UUmMF3V`6cAqCWoLsBpkaftrNKw=7U5w=f^ST(1BxoMdDJQz}lQL}AS z=rA!*CVh0kBP=ix!I2LOBmhY+JG_5oxK&%HfN-p0EoNDBCpD8N)H*jzf64SAjmdhO z|L}AW@G*)%jQ(hSa9$aNJh3**xJZ5mElz04|6-rU;<`{J7jdP zxE?Bnu7dFdQGpc&`{>7~4I+80(+R2FT~uRa?(h#>fWFsghaZ?9iA!YNB+Zda&n#tu zxT->c2-O^2;b=-G%J}p9i7s562VDF45TT0|f%zxt&wy!YL=V~j_(oUTsZKe!h{ z;If!ZCc|>G)iyOkL}y|#L$+F?>2XThuliei6qxz(V%aOl!#G%Wu3t&B!6*?doKLdW z|3ydT9&T6Vi0j%vOF$YDWi{TVIS}9d;Egxl`1LQp`^+=X-2UXPCWP|c)90Uk;nmwC zOUIx8m4DhiKAtWYtJL;2q*`k`U9OI=#okL>dk^b*FfA|lPn+pZEcQ1xWifB2^U2{- zR}1O)DU>f<8V&|=+KLq=6{Qd>mW|FEsw~7|IoKR*tkEw36=RuFtsxnrm+78xtJ&qn z4bC~BALDGcXnI9K*dA{b#FN>gNw9wJh1Xxx;ns_FqFFSrhv(Z5}>& z8aKCN#I+#vlaqh^cmMu7@4WMyAH1`*wY5JFZ6oCe)HMU2O@Rj=1; zn$=*ytu6ZFxHTFUgfb2QG{*<+eA%|`_V%`{nnAAwV`Y+<_b*-Q4Mt7dDma!Lm_mRu z1&+$9rop0_xo$9+m?=4a7(+HQ2+Z!%5edd-Bu-%@vJ0c+Y-^c_fJ8=t$<(>~NyNaS zFi41G>c*8sM5c*=5X!b~%UD8lxB(e75w-`HUcSwbKC-N(35Si)WGN+112#9&pHcKq?DY`*TJQ3J{bX|?5xDz z;a~hRGx|`tl;fxhAUbKlgVFs15|1NFDKeWTCPE?+LE#}_N<=U>JW*9;3f&Hl;GKO^ zqTnPAO%_5hANLgQT@M&K+f)NnKy)~Sh$xssEHfXxBsDVxL=#iW*;YA3gkx&N%$S4k zY{?H8unrj`B2W;K;I0O005}FB$~IitUp$L@&4j%UQ*$*oW#@IP(<+3(L@LRG*L`Ga zM9k|D`EFx0N36`JH5!TlP>z!K;y6bD=Q4!|)FC2lW(I=A{5gj%0v6^UIf#gqm57{! z(q+&a5w#9{k!BvDigGkHQi&n>tjE!g2#<tp@O>9$q6P>ZasC#fTBe)yt4 z|1EWz6yHwP($I^KK?jsshgDc7wGDu3?g+i#We<+;q&j9`Mc~YnhyWSd1T`n>FtW}rtMNk5=P!mZJV^+KP|JHI` zGcXVsO99Hbb9t~@;h9@|TRY|bx9&Y`Tk4O>?fvcD{&L9&_hw`aZmH1TfWQFeKpDT~ zVz${E^rID>v1oVA3#5^z3QQPp^oD2U-Q&~c>CNR3A)u5MS@Yy@^7!%NLOwnok2kMv zU3=xFqSx>Bv9~#%1sYbQ4OG>LDdkG8QIpkiG1z(MoA3PYSH69s%O3ZRAKZWAt6!be z{eHi{d78q!Ip!lGlB#H$rq79)r)@hN4u^fdI?9cw}fAG0aZG}8i zNf{_aAhd!Q;k0NW1Z0nshD{c8TCVDo)4T8AyS{%Lgw<*}x74oY&n}BEed$YYe)s+h zFTC*SpSyMK+O>_-)A!$hUq|EK&dz2X7Sn0(nQPZKNA2U`x4!kQ#qw-(Yt&CXJ3AV^ zzIS{6tbF0*?CfmxO4)+qIK*+_*q<%h-e7CIxzR9bqs#5SR%J5bV!%n!q-978ZTGpc zcClz>R=0V`wO;R;Q-T>1!Q5!vAPgeJfVP!31SXUbfiwvW;3`xRC(BvgqT1hk?&Gf? zJviv8#By|cg5DmU|AE()%bub>J9~8JPI>Lcot+^J1&3y}noTDC!AOm&L0<{n3&o%; z7qQRP;cCVdwBp1lT4l6_ajDeEupC?&^!g@9%>qDC^xL+L#B7+SRfQ73$;rvVYISXM zbJMU&QjM!t6NM-OCIP?}0s@&a(aKsTCZn#eV-0~(C3ENC>dHm^?|>hApkupa$u5+A-3sYuFxo`FP_ z)Itaidkn9d83Ry5k->6R?a<&xt;B-Lf2mcpeqzywQm8cIS*62zxASoojVfH zh#+u`AF}IM^D3lM$%-fnHbxMrq9ugbS;%+kwR(Y{>jy+@kY95NwXUMzDwR&MD=nfk zbL~7iuS0_jOeSbr1!1|GFQ1&geKea>_1bGM)TybevI?+RZH_8!7tqmcx(L&=>2x}o zEX^#2GPP@SK~M}zFedj(TCG-QRxg^z51wwluwNA@n`U-&^!A;_rAt?Df8?2};t_BH z+U)fT#lh*>>FFuPyCH=d1a6Jf55_mWy_{*;}nvNB5qrj%O%)gN?ql zXRXySZJAli0a94ga>2b+rqiYwmK+9!N{=YU>hSPn@3lYq(|`4^-W;5N#<2<D@c;e(MiE^6Ag5R;x>O zKP~5HQeTm+eo<@;2B*t;m4*PPq-7beT)Fh<=xAeOgR8JT-r3vU5s}tHyGrF~FrUvk zrC1a^7;;hYY8E2{sXywCs{kNwYy;>`mxDn$oK^i0C)zGn^#)Z(C&%-}+2+`?KE4x{ocmS?A9?VF9X(r@c5DOdHe;U}Tuh6-sMG+6J27fFuW7i02Y@Wr$fJ;M zzxQLny14OcR5NCZ`5xxhiC+C(ikJTAjPX*&AhxVRXskrHoBmaC>stNC&B zt*?Falm?%8<<)45@d$K?)58Pa*)y|}DXFEi)sqK@?c>uB!lJF`&1%^+x;Akqh^Z-< z7y<|igmzIcTPga(Dk1>Ws225NxG`LwE*`vl|LiQyW;VQ9im5eHhO%Y}%$2RG2ugtx3{};u;wYfU zsOP2W!_)fF$?A`N{FUq127Q4DETB&L+~$JJiwp%5g27q{QzUGZ{k9sMR*kNjd0Hrd zfQ7c3JgTT#72(E5Uw-x@ml@a)96WsT#`|yY?(S|5`UNZsOtr0cZ@u~hi?{UfY&w1P zU@#c$*Ccb`7}O$_yc=`OQZe6=|)&RnQL2{t|)F7 z<05Vhs318CB@Yb8wxV*-B3W6P2c}*4iZP^BEh5z*f>0q}Jet#kQzL-{6+mRGB#j0T zMw=Vy@L{{0wc`=@dYeT!y!-yiE3d~G@4kDtSvHH3A09WmySr`MKA29Leh-@)i@HwM zAZX}qa5diE?E`4l9zQu~k00`Yu~34kt&UI5cx$*^CSI8J8<>V7HZ1_G=0^aSOJ?qk z`>_|Hi4k$Is;6xnZERn?K6KMWwpJRkLIjbiRZ&1h3J|fc@Eptl)GJ>o&9WV7%GswG zL?#i*X+$spp1+;b+SHgC391Ut?0PkIs{$jml8i`HT+NaP7MjrImN%A6jHu9LP0{uT z{qg?p=H>n6(fI7_EcJ$s@m6oAnWy>5$*?HS=7%xH)SlK=>W_P6S#DUlSS_TO?Ck99 zZ(lw-I$ErTvBk~TpS!$s9SL!cRch6ASwjtAQc(&~LCUnol}(kIlT$fcW#;55u$&2@ zDJ20y2!TveV-(Ct843ky2;8)-Z#k+4fru!at>Wt>nVGw>Fhw6}5E0x{%89rNm{}Q% zx~?5&QAtizBv_YZ&#KuJMO3|rBVq>;{46^H0p^sXC?eL{mh1yz00b672*^586-`X4 zS`rhnuw|YWvnyqxU=C_(W`WsE9HQ4YkA<1Jm6Z<{M8!D}!G{zu%0bEOag$~!ID5t+ zumRjzl`wPmZ*@vYwu1$T0-!0m^dY&`t4NH_!XcQsFPVE|FbAkIQ6dL#f~tv#!%%F< zAP0F7mN^ru>arLQ*U>;k6xCcjN<>hL97M?s4D}dTRkaw12nLVeW8yW}b`70Lt<3CR zrOp>J;DW{$Dgt7+Ck3(*f+pn*93h+3tvw04;?=tfaxV?ag@{N8-G6wqB*J-cpmi7u zpI5@!4>SPi6eXRl=NcecH|PMA{wTJA3($z^`RjH)ea1Z2WrzDx*oOg?^U~zPMx9?v z|L7w-Kn6tT24!d=roEEZ(pF~@`Q!7D2&fH0R=Vq-z$CwSFBjTQ4fIFLnl+gbog@|K z(qfn9Qb0-_ilccr#Q}k`K-X6QFjTqLlyy2n1q8Vigq;YSP0yAAIwzw;nxuba*yj z)M@tk;pNMhnR$^?)f-Wa0~^H{b%2x-Yg?9ODAc5098N8z59*U9@@ubuf`KSVbIeAN zfH6RXb{$D?-MEUtj52`~?VvxncJ-52uYdb1w~tqg5gAKi4_DJuB6{-f`!B!zGDf4bv$LXYE?>QNczCv&oB|v3WsRk-P8LtkPLnl@ zhi4}zCv#a$CX;?y3RDNF*@GE+Tf3-*>yxsKr4*sS zuo}l0v5&@NV`p^f+8%KXu?)lvB4t(egmVKkMOfbOU`UcWCZm~<8C<~!pT%3=A7SWn zLoo{$0T1jgFX@S&{)%fJ}@c0T832D1_A_MjIg% z4OVvcGJ4qj4|x&jJGy7`@6EF4Ydumewv2G5zR`Q z)bD@(Uw--fK8h><@wIE$Uh0h&3lz3$Q!=jHX^4rvk_x3WG7Ut9o2WXb4p!6Lg_>{( zrb#t1A|q&r%>?F@SOqbZDJ3?jsCq^i(*<+qZeG)UldTs`07Nha4t^M7$vP5(1`}2` zFjHD*d$B3lOd}J_k$h6X=V4}+;D%ii0=%AZR;7ZZ<2xGAq?ThevPcFpCJrI02@xUN zSU5#OMH0tCH6tz}Qj~?I1eh^IB)V@^a9o7I0je>us;QZm4w0gYG#)uaX4&za?40Q- zUaP_*+I=)&fT+PtwT?zaP$gA21`9aPT|2yZkyI4@TW>m7Gpf~-*oeE5f1z0z&iYfsX3@o~N^TgS2JTsm;^krxpf zMGa{6UxIKg(#FD0%Az_43y_D1VAWk?_ciqG2SSLz4-XO2XDRk@YPpez52q9t=-+- zAx|dKg8M~-@jtJBF$W0@G++Z&skqm4cQSf?c>${dTPY3B3!>EWtbFb3D-M!6B%*Db1!Urnaaq@9*vHY#$w--MDdM z!l5->+8BQAoj0{Ve&v~)+fB1ttpMD=fB)V)AH4IsP=~AYwZ#MR;Pym0z!d;`-~D*h-atMjkBVbw7a{zv9VD$ zZ8X%RMGP=cSj(Lo0Ln7XEE{t^p%1;S^9vxOPD=SP1i~ zLuDn6hFh~o0kEYKTef@HiJ{lj&1``t)oBqcYTj(V^ZEC${Q1e{aNSB+0-<0c58MKo z5ki@%R@%{8U11>sAW$*}si*-Q2t=0ST0JfU*`&nGy)(p=3x=&3ga|N5qK?DCxy@Eb zdKk!pWTMDqrYRVUXebMx8iWv36S_gK=cbaIzjl6EL}bqGBw}WT6B2`2W?Hy3)W31# z70(L}Zoi>0gRsTI<%~wkOf2MqWKdNK9GJW!b^bvtgFz$b%_>9|BO*>oI-Z9z!_~aZ zoLv~0KwOcTdF(KW$+`Hp)j%Wx1_aAyFU)KKsv5w|nYYXwT;vi$CN~Qiz=kf- zo z&yZq4F+ddpaKY;r^{9FE2EI9d!F%t%J2@Wh?^S*C zA!Yt3LkJ;`L4zOqk)OVH>*afAiJ5P1UcG+(`hp*P{_~&zhoAp9U;N@1uaB$Y_L!sv zpi!EpZkx@kSFir?pn@*?jN)yN|x}y+==H0G{0(v*C1UM5Oc6@o4;w-}%zLgTw#u7k}aM z<;$ynb*-N>+T-Zf)|FR3v32!>4?d`hQc_Q!)_ zSj^HqrnEYm390D6l zBT~B87R-nszR8J-c-~fSiM#!X0z?Ur>~92Og7|`be&%w&7rl@t07)f;03l6gfy~T( zEG&UQ9%vq6ssaNODad;~Z^!{O3JxKj^5p%~=IO)LWU_Us*$&{lo3q($sqL^BO`6$g zRQ4_vA%y+yE7R$8hCwftq1Di8V~4}uHWo!W7zAya^0z<{%wpzv_(zUqODYnQ|RV2a6vg9_22v#0j3@8&NcS=L z+O7nFXg1FJ?fQg9&xpux}@Tf|(`zsgfB2 z5i-X&Hx*tYi#PuF|IL5%bGP^AC#%c9w(*&dJoB%<@a3bUBOr&&olQczy1DOcV2qwrQd$*-FAEL z`l~OQ*>gp?A+S&Zc;%=6oDDV}z4Mzdee8DZHy{7_$GJaPtyWc~ZLIdMZ*OgF#jB%B zmoA0L%*+gx(hoY zEt+PSLNA(wX`SVH2!R?ULP}spLC~gF9WY~96i=T%J)SSNwg$zDiWPnD3txTt<(Du0 zsULbu%a`KdQh#S6lWGY5pgDZ|{u}-4&wS=Hf8uk0a&vRAV)Xl~HMsl*het6{Dr)*~z9NCS6e5MqIjj z?Zw+49iA{V%7CUZNn}chB%*2@f-_seflNtq^Fykx(;)NA3Il`GNRlV6JKfeKsTm?v zS1V;uU}=-n10u)>q82CwoO3t;m?-Dn&($lrn2W>)QqA)!BBB68m%JXF4jE#r%1{dI z6ibUR7YCwqRuIul?2xXJ6aayuswOorMgh-t%GRnH3;<=dqypJbxU(6}rIuv#zZ@0n z_7H%EkoTnwd=UV|!Uq|_h(y|TL#+xk3q>$h6H_uaGBf9tKm;P*)N6Mzs$!JMo%Eq7 zI0P`%T2?Mt6lNAXv#*X`O3dU&>;Q>5i$%%p%7NsDFm4tbqEkaSFbvF`%$2TCl@MKt zqeNus0!Wz6IGsgt&O1@UIv9lm#1bLEa>l(nx?{Aa^AKrF8iXL+u~h^dvVb=K-YKFj z01j@o9^kxj=X^6kx*1SDPD1P?vecQ!^Mx6cBgJ0)WklbpwO+5d08-tw7n*<)sz3x< zAcpPztw9Mc_J<=J9Grdk-P6Tlaep>@`Q?}WqG}x(1Wa{zci1$Aw!_WMaj&dO&N-_v zz&LtFY=j0>{oeKKH=f3)?vuAzNJ>pv#_{GbYwCcL+0oa&`PJ*Ue(Lme)_nJ^J74^* zzyDi*_;)T{`pC;4p%T3sSV0(N39i`;QSI#X^kmio*t&Ts5K5cW%Y%CQ^z`7a&Og?i zom5pNB6YJ`wab&m$>ya^L=5+P&pkQ#hkx@w`!63n`ot$b@fW5jOYA1pXVd9muwQJ% z&c9BLS^@}F(N&oW4oG5^(7iXmw|e$ge|Nun_&8cSZLqO_Jpj|`^5k&w<=_7On=d{4 z{Hrhh@<02>moH!5*xS2%_ufY??E%>oD$m zlf}H(>p>50?TxQj8?)JLquixLan>Fl-`lofsHb*idilnUo$qdz-20y=UmksI|J9%Q zr6<^U;Si${YBnBls6n7ahDec!8EvA#K1*9WFEfyJb}&HmuH+4K&dg#m%Q2~; z@W7l07{pn`B7anq0zVnR`G_1qG^JP+5K+r>nm}x3!Pv}l3FPE@%eiGl7SzSfFteg4 zMTCGNMAfw3m0>=BQ072tNTwj}Oh|md6Y}Qfn_M6wc+OsOU;_Y|Sy2=@a#jrR&3--3 zHvln2GjmM}B^N1~s#suvm?@jfjoFo$}MUjaQhHm!3?B5VT?ndWyaPk0nWg;?)B0?sS*x;_}-Nj8=QY!-H=oR9+ z*74HAn_Ryo0JwRIr@We%kDjQNcvJ>Zc z93O~evH<01-1B;YagGG|9UdI*Xo2iin!hfZ!d&D)5TElmKh)L}IKZTXZvdg(kFFow zO%U@&>(*P;C1Nn3DbH!lKJ@D6)0})#^Uv2q7DrGxbVnZYW7f~S-sT;^T`rTe#H@>H zrOrnL$fDL{Ft&n0iqyiCxBvxE3$rd_t+NB6uB^ueH;#3A*UfZjJ!v|3Bc=0y@_K%6 zXjAt{AeL3x0}0A#XZIIx{QlQ2z3}qRjc2dUpA0st%F4xT4z^eV%0+)LEXp9d(0KXk zwU=*R+gK037_nDsrzBE^1vHRRKOP>Q&gMx(`kPfz6vfU40k9Q-ul?@t{_4N}*SoJi zA7k8pcu%LN_fO|v{rY!)^yc&XgQ5q(&`K&f*3ELc(!K5e(aG`gQG0rNdMV=8^-J>y z-?{U_H|ph5L|12v<>`E;^M;$ApOob-X`Jlt?moLWe0(xJsE_NqUOscfj8;f-)Q?v8 zqt2VqDu);i3~&fVJ72CQGuz(1aEF21exp|zp%rwonEASm1xn+#l}0RrilSI77ONyB zh28B@WH~uHedhWl9rbsIH$S$sad>z*EUU$$ZtKNrF@1RO`02^BI~(I`uY5EV{pJ0` zxbRIJnK48W+@2Ox!+ zrP)bcU{F&k#=T2ByB>9#QcBX}lFi4>5G79rfpLzBH^7@K?QnFzkwnCTD_Kk|^V?gK zZlyz(T@Z5U2l9UG8C#9DF-Gye0l~+AMhKJ{2LQ-TA0W=*2q7edWve@b-gtj&@m)N9 za@yjlh%9WjqQ&mj?aP-hU+QW4;Jb&Xr>oWK?(w{7nuELF6On`d+gn>(rRnLD$LV-< zaCqFddVGBL?wvQccgLZ;97?n+5doEv6fESjST+;|0jS%Hf07-kZ@&O^3nQYAqog6Vou_mX8zAs`|nAE{8j22x52Bc_y86$#4BfsKiv zg{ra}Ept2vyt_3!grE?Pd0l5NL_~axg^47moLD%|P1RP+Q%gYpNY&JtR)L5_a#iIt zyP^m_hVUJZh%|2-Ys)N3CLshTt}4)~vaL%Nh$wSV@zGCAs+4C`=6D9sWs#U~&t{g> z-2!=>r@7w-6Pa}9(Z>(A4jfcw&JziWX2u=!WBnPhnTd)w+|m>wBqLFE+xSQn;Jk`X zEMhP-k?!TZKJx;H$g8pZbGNP`B2NtvbDoMNpVvepauT;C1IWEofdI=Qu8`Yw9|2qU zWaO8U6F;-31!YVuaE`FiZrW|=h<<*lWZN{TeyGSe2i`xdVaY`#vc+fM%;WO`$C^$7 zKjj}b<m&EB87tnk9DuhH^!&3o@XKF)^zA#-KZ6ZlYgEbauO=<#^JSmwrZuzWa@o%35qS5V zcV^RRBaH%EODB@Sf;ZF*%NtFv}>hGz3$|MI{1weQ?HNWkvPfBvWb%JbVheS#2d z*xTP6yH}fA!o(PSr&59d6`@U_9_kvsIG-Qh`|Wq$-rnARb#T4h+uwQTVR`TI`^$Fs z#%s@i?h_#dTFKFS?|t#hUwQfaK8dqQL)If$4Jl!U$m#X(|Jc`l?eF(y(?R4J@JyqE zc=qJ*{qMawjdHeG?KLG}MY3!CYOqawRu5)HW!x)@*=%;UGAl=g2_-N|5z!Q-fQZDb zEsUa~6%^q^S574K`+ZuqZ7tFdpj?1@%~1qMt6~rf{fcL#i@F_A*xB74^sCd;Nz;I; zernrMRlM``$&*Vbmo9Bjj!#sr2Y&qi-F9j;)R-5gtUbWmv97jEwE^zhMvoE~3U1E{KcxMx}r2qh@hbQy-e7NUwVvA3 z`GW@!9=8u-j5qtVu^+F~VkN8h?@piIzyH?35rA1?X125aWYFKAKJAIfqj&qq$H#Z( zOEbuwtN^lG1w6$%M8`Iqn9s;_Pb6bxBRFD|1{9RNJUa;7H+OEV zEs8>j%}m8qz(o#f0O|RBJom=y(ybft5@ov*b_+Jw(*~v2j22w9ZHwT{P4QR~?b<*u1GuX&%zV={QZrQ; zh*?Dqw4TMS@djox%i%zIy9N=F``KAwXZga%d?LzuYXLdTu7DOru*R6kUCko1iF$sl zQ)X3R$Uz9=%2rWPrI^bOr_)?CIs;%y5va2ubBx3ppnf-)ql(O#y6sDlnUGs!cvS&! z10bC}wy#h=1w7yOI`Ds8H{^;`L5$!U*9+wWUl@5@ToyYnDS&K2;Zp|f97~B186xOd zkssQU*I&;0u+HXd%`gpsG9^LuANAtF^( z1^?juFuas;k&B^i0iY-{W213icULp5KS4i|Twd7ukz~q5hk2MG5)??5&*2*3unVjd ztX;y`uSYK%f;F1c-dZi{^gC!TeDd`4mdrnA3S)lwY9ZT^j6DO{QViysyZc5&ew|LI8uyqF^Fw9zT5a=&d*2f9tKc z_J3-)ck{J9Q5+wiK0V#t*nNET_}1c9?C65ts}neeN|)(=T{QintXp!qTqwx98VidG+?~|Lgzx|MJOC z-n!WWg5!k8r?br~!WP;D4p1^8vPOZ>Mi@a_1j14fDqsaYo__w9|B0D>{ipxE?d|P< z|0n+Z$3K2$zA!U;V$!xPgn$2wzf*AN73HHRPj+{AK``#PE{0xNJ@@i!hiA(dpMUf+ z_a;vklS%!t=U$Xn=JV-zJe(g5Rds*29&c60$Ms@CgQ_PYO{$wzSH%Xo-jhj{naf~W z5DZdQaR!+#XKu>32Dw!s2~|)UI0+6y8^$u)DlPxJwixUiwXgu*c^=R&nDC9 z{POlN#B!J09^>KRy=R`^ZjN{|oqgoib7yB~cTXQ(zI?eq98Tx+rb$5IxTXSi5<}2d zY!H^Kc6u^L;?q>u%B3_#sx{I|o88OL>}+37Gbw>ZOZ_1S!|Cbi;p0ia-|v+vgfLmH zhTGfy{!qk1!P2D2QIiJcK-`$9TBOXSAR;m)koysN>b6F;;C*!`W<6cMg9Ch`x4eNzv}b5tUt1ZYUb>a1B#G%bU0yLV$> z4ksz4Yx}#q8(Y0F=+g$LL5tPR6jrO%aF>{QcT+E4zP#D%pPik>P#!!yeDbNAfAeqt z&FBBr-!Mab0;$nP04NhYBgrf`Fs)M|K}2xptvv5#3xT4q7%4BNL{K7QcG6;C5K(JL zDga9|b7QUG`60f}1X%254vw;S_=2v@x7iG_aP}nuOb)kt8|+r1+3-7$5^TNB`6i`g z$mm>#_nkl*xNuc8B%5`+h&hiE+!9VLzWH5l&H!EnHgkYROV zGi&|A3T$SUJfqSbPJ?gNlEmqs0{0?Y!w}S|pjp-ZMb>i)`w$*w=9Jr%qg;(TwksLC z<+*E=-4Rb!ITqA?xzksm00R|{&=>%7<@&nCCvt}smz1y{Mpmpq)i`?3kZr%uO9uf1 z&MUphoH&Onxi$(hUKl}B!-+p^s_5b;={G+1XdgTXSdoBiS zZhY+O{_oWDuYdi{OD}z9@0AxmH7tH^|Hm zAOGOe&UY8fzw%fAlh;4_`nSG!=lbSGej|p|fXmH|o0~uKQ$M?T>DI6Q>;Gvwog#em z)gQR=6QB6Z+i$-;PkefMIvOs_tl)mC@$_)gNF0xbZQDLLNyo>FYHzRXRcNM>(Y8w> z8CL^p+femY&?HfZ%3>xVxrAE{F=7zoYPD*#7bD24VS*|OMs2!k+n!b-qUwcd-L6&` z5^rv9V>ER~;-I4J+z!cpOGStf5>bw^D9U2ozZpL| zKYUa^dQ_jir>ZwTx*bB;*&l6=HkS(x955*2#(e8q$;=a`vMeh;v!<$>fiCg1IKVT* zAO0J^^!%UuQ=bS}w5ZI2&^&f-hkzMGqznN-!6cHCo)ResHuZIna#tqawz|iG?<_>B z3{n+%78jEThA0^*hjJKum>PAq4|$}r-n;;iDRtQ@QF5yooP=mVD?Wv z7*JUYZT+v@>jLfz?reN%oVsY9(sTzl{)GJIyJUeTLI9v75^s$WSL(=NTxBEnYFETo*c>%*|{5C<`xojNEtC=nEB`jf!DR4N(v!(I&KIos_rb3L_-Wd z3dyLQXb72wPGnwB0I;mx1;=2l=8{w-H;xyy99YCc2nK4~wkQg~%rqs*+E*76vof=a zF|&6*9v|zsF+6_Eac=LNoPT2(fmmO2BAq;vI>kzd^{uyX>t+PLYr5M3IbCDDl?rUl zqUfrYHBsZ@zPtXDEH1!@e(MjoXc~sD7FiP=avkGHl4Rt=kV%o51J{u^&r1P+a>>(^ zT+ViIg>^6Of9r>rl2Sh^kP=j31axJb|J8?{{K5=~F0f?s+qqDOnYm!T8}3|~{s5iC zRP%p~fC#Zv#*z)I<Cxi5 z_aD4l<0VGfzkT!BpZ>`|{pHKIHe!LD!LZqWZ8{uB&X>5s3I|+KR28pQf+UuJ7($GJ z2xzR;V5ud5gK|j)C8TBBqaG=uP#-{`skB$F-+bw#AL|>QJgGUxvJ8MSgB1lKDWG6O z2q5oN-F}UL08S`NK>6up61J{<^75@XD&KkI8xLluSo?|=Eplfz5b zF1_~2?+blwFkRb-MljbOiAf*;yRW_aY-*nU?Js`y&G*0fZM;^`Ub=bbX?TQhou04A6XttbB7t3)hBgcY-wecyjg@{;~DgnA`mS z?~QFyy!-CEpS=0})$!&R-u%vuXP&t=8Xw$yaQEaGq*t##SC;5A+Eyh6_nmH<2B{rZ z<-wxivf!nRx%b#~xV8Q4<)*G@zzql|y@X#>k2vJob zu&H`rE!tmaQ1F!SM) zv{*cBQazu~jr#y5PflmE*~!YK467JpucxX?Z8cqmdv{MI4Nm7^#$ZURm2e6^%2!1+ z^vYlk=yGo7%uoh4g*)79r}8@=xprA(PIe)pH9bI8Lx}n0mblh6j25VRwQ5kL z5ZwLKmlO*GGYtXE0Yah@gA$oC5r~v1#~J5cln5dzl)<+^N6Uyf4^Y$qQj&GqLGB{T z44AvU2M|+sGVQX)ggDb>ITUNnNM~{<0L56SDmagIeAf~fn_q$!%IIStW&$YlPF(OI z!psOJP@<63*htI}ELNUNn@K1w5Mp3cSHqeaaS&Ce9Auou*~oFs)~!<&R8ybg^YYW=gZ7&=;m zPbCcKysPuN+$m}E--c|+=sm9UJkZr!#LAFK=y*%OXWgmvvF*kdo$?3!5Fcil*Blw zqKm>*5eAQTuf6zx`aA!-D_5@k_y6WsfA@EP_aFV!U-`^uKJ(nuLDAdzq0ik85jtNE zIbPijj~*>4awtIpD4>Xu@<2}uZTZsGmu}pu_YazHeB4g?g-t`O5TY4dt8JaoD%S}`JckwiYFD6Ng&EPj zZbDUhP>lj=2#lf(Xf3IUP_Ng+YOz{0=$GT|-9dq7e!8sd?d@v%U@{(!_?;)e`n|vR z!e9P{omXGs?TwpTue@~c-D~LIc2*TDPg!5Q2Hc36qFK1Z}l3We$lXgg}yUOE*FUpfF~3b0>{Cn!+cgW=5b8 zaxfdR&lAktVUob!IIyW@ayG#u%b1yy5fKxca`WKy-q*i<|M!3Q^z`)Jqc@hzleD>Hlkqr>rd+^;vL2d7UKS`@_{JJ{adelE#qH2UKF;b>#@+UI`p&~AO? z1Z9acfcl|W8gsT9Dkqgn(KRVrHs2j_cfx%m#C!J%}hs@Gv|OQ3xDkw5(nU zBF5S{vb$c2DKpywz}?K-2^_vNFo5ij3ChY5bKZhw%WZPag=4ZQ6(I;R$57qn96o~Z zmD@CVXUrk6YFZBn08j29O~TMX&g&qNsr_c&c}G@_tJ#F~#v&7*q)U{O?k(k292N47ejh(icbTLHm2 zw``=EO|gN_2OV&RgcnIIB*Z`l70qpgJ9&871ZHArBIY%rB&8H{l}%<=TkW$z*AT6t zLdiFZ3Dg)JQzQ?S5}BD1Te8%d%^OqZLS?s`5Y!aPM9hAOO3c)npJA7EQQpQmPX)aE z_sb4Ms-{fn+%MJ{q&^19TQz4&$i?fjyEJj}izwp<9u5CT{I7scC~N0Gbor<=6;$$L zl61D^UK6<27IF~-bni+#(52&Q5IzKWTtr8FQ#Z@jUe}>M6rMzy3<7d`Y3#5EKOO_fpFEtOhYtyct!qP&OA2?b4IL2WJ|gI7yvC5O zWbrbH6pQJs>hH!7tF7Iu&pp3boe;H?N0aY-;~QJg+`M$@QnXe%xk3l6&dz4D*@}qf zMo;I<+kfKc|Mq|Nx9sUMgs>XZ@@!d;(X8qjiXMvnfPSxRmi@`;eB%?hU;N1*z&-lJ z^Pet$^rvnQt= z8G3edw3s($*36F!m_=9wNwT(?Q@gOF5D(mY=iRreSHB;?ssTrCnpM*@ttdFOZ31HL zHn{FFZ`QU=DEcwxl^RKk$VaQ@`fDHGAN2tInK!>zzWLTa{^fu6)qnBpf9wDD|Jd8x zyEQ7&;8*|cZ(g}_1%#)EPj)sojL;;6fYcyXuof|*u6zBI?;Rc< z9xj*P`qsC;ef-ATckk{RUU=b!qDZQ=D$?^Wz09k@>FKIpVv(9&QL@3nYLsY2r8KEl z2ohJJUqxlBbh>*NuzEPG?hV1xf3gLKw=~ybn59&3A4;^8+9GzQq~P2Y|(*dGEcuR`hqTUhXNr_3iI{ z=UaE)`1V_ufA~jbXV`1BY;D%8%Cae~VkU2&RfZa+WH;OPb@x(>X2o*8)JCbT7CZ@zh zm>r&c@a8+mkB<)y4&Hh6&i3{;t!6Ws56ivTY}O7|F~;Mg1)<->8STZv8Rqjjjd~bX zTNYx9_mX_(<&Ru>{?@ox?K9dXjzY`KeE{G0SF>FI3i=u z>AaB>@6b$$CK+?y*>P*Lq{!OUFT;eZE0K*Zi&mNB+%n8wD?JypLZ$vzH3qcoP9;InUE$TKzp3h{zxz1WIOVF12xUEwTVY z$Z=~HV{lk9M4600#&WQZiz0I$S}oV~9;cw#a$h;vFzO&R&GLx}#IPD=GM zQOrO-9M3dTV^dP@DtC0>uw(T(_654ekQk-xhw-%RR&iahoBu=5o<#CWYNkkn1OaBldK{hM7`<8knI-tn&kSFI~wteeGLq=T%# z@#C>Ig;TR0)t|=^i&m^j8gs&3E{QnP1=j_N7DaZU&7&fKyU~(fB#2mMDA@%|@^yKX zPjp5tGb-*4gL`ja?RBOZ0a}-6&YG$f)u?yn(s;GP^=o^>LA?9cle(^VuHD+ca(!dm z>sO3rX*Fmo2mlSEVnAg~DcO?JOyiAlvw`Q!J``Vm=b*0ZE1T6SShZ+FqNi1C`n~JD z-B)j2-QX>qVC%Kl{?sr1(x-py$6pvYgovj{M-+k~i8l4JZ4>tQF4uM4FJdqxLQ$b< z5F%P*4rLi-(CbepOKIv}jE9GZi+a`X_uB*on5-;9@Bj_dvRCP3+DpK4elnk4y0Ui! zJd;o+Yf76pjSvPKCf`PpV8ZKW2WBE)<>L>Pv#Y}?jnY)X@7Zg20Ltd^6d4y(al zi8H~`(dWPU#$R~pFDx2N7RwiJ+zOfzGGwkc$nbdHRy$X=Hp?yGFMs@F0I0^7f8#g) z+yB>pH+kpjGvD{opZ~>Q{L(-CmAiNEzDG8k(z7F;%ZjOAL@SI&We>I@TdDA3-fWLI z0>wmFX{w5H*dLI6$fy@EV=_i0^n2y;WaW`{st5)Kk^zBZ6#*netT2eNt?OkP7rk(4 zb9{e#Hl0tzwzyvnceZZaxN&-Vy0^2tvC&^1oi)Q=0X=>C1iSred+UYkH=jItGG8@8 zA?#a_GBXtIqN&?NA=b@Qtu3qJ@$s?n^4TBy6QB9SbAu||!QFr7ahf6S4>xZN!=XL+ z{crxt=YMOlSp2!a`18%M=+{k;sD)N#v=$K3l3L#cy+SYqH3qqxQJ#`ynV`7sf~iB< z%oh^8t#bu6-!sKIlZ-r2W+JdcS)6gT%S zxMY(YRmCmW!RO@lL-td2)KX7`Lr7oKkG}%3`}JdlNf7I7Xi@ zf9_Aby0dRUEYS*LEUZ)BDJh3g5}F2Fz_0Bh*C-bI2Z?pu+x=2 zB{p`p>qZ2#$kDp_A3HUP7=}QhBdM6W=Pj|pL=}K%84HYqvs>VHrIgKMz{~+TM-4<4 zSVSC5pfZ?QG=vbGiV@t)l@tWx=+zTb0DK^#M9O&%$LCxg->98JE!`vtL_{8%Nx_Jd=f7e@+a{r+2#D@k z67u7jG9fq5rwn9RCNvgH;B<0&=k14+<63&7o6o&8EaR+SZf%WJb(I>Ews4TMMCfts zSFFY6bVaM#@mZ5mSH^BpeCO@AA3ZbJ+}s>ZWx1TStF6VnK5h=uu=>-V_+!rvb`NH= z*M8s!UVdQ<$ko=Vromt^Sm>azH0<@S?C)(~Iyss4BG{Ss5z$%qreYcy^>ThTSyojQ zNRJPnB-2Jxo0LGdYMWM9D`oDH7Depuj>r7~46Eg0a`f;hEs)H(z-r$1xHs4x&gXMz zid@tHb^0k6VM!wvgM(GkaU*UQ?Q9&v2k*as@0|~BU3upAR(*eVe|EY|R{-FvU;0vW zFn#6P^Jmk#>=nZyiJ$5`l<(YC>4M)ZKGaKBxJKtPIQE*r_ zZH=M~@%8^Ugcp|NXD6 zf*h?@n<(DC`~ICXxpwW^=5{$>j!&nj0eckCwjL{}AgtPkqmk+;_Kqiuf7+{f)zu8NT`MqdV_?@ZeFe*Bcc>)Zn0Ps@2Gln>RsH zNEv&o4uhE?PuM*_OVxs9R)=*#Wr$=%A%TpNbN&JYP#+v*&Y}^(KDc)W(ik8DwcwEu zW-U@t6le`*h2tymR?wK)UpK)bQ$fsXy2YE%?%ufeGgn_fKK<73Ef$Nj$GSUdV|qjhd2dEXF9XA~#W=uV0Zu{y5ozYSUe)ASBaP zg|o;%V;ibUYK-jHXxf&D)Wiv%eu~5cc!^XkgcwYH{wE?KuzQ=X;bKyAIwvzjEK(o?h}=?F z83Kzj0Ol78%ttX40%wbQ@_Ab4#@f~Il;@7_z^3Tz!ZKM1!pzz4)tNf*p%N2;oCO@u zHU4=Q_uak27;pi}fPoxW<@Hj2o?j2-%Ew0uc0qyC-6iL8S>U`<`6JZ^e>hAvPu*Oy zMP0Ao;S;coOyB~=A%DZ&Z##t0I&zA2%2GJPqw8FqPT?u63B)1$)>`LWNr2ltG=e4p zJqBleioybu3t8=4G1RREnqwR;PImGcz5w~G@fZb?0%oe@9Q{lW1VGu1qbpPhgjKy< z)r-?<{hc@8OvTpT%^SnjbnkMnpRkNwg&GV%eIRS^=t1YeZ)A zK7VU_b-Z!+&cmbrN$gLW zNMO2?jC;kYrpl)Z+>Gq94u41cRcx_ zpZV!4FYiy@J=vj(3Z#UJ0E04;pe)cVQ`<@inYh0)n4!Y<6_omA1w*^4tE#VxsP*Z= z;nT(94C#;kz~_GP=YH<00EX1!;OQ9`qpjV&@pcK|-+b%s$4@6GXGb?TZw`n3x~^m7 z)oPWRHgb^E7DZu7rj*e7we5R_7af|C1y(ha#`$MzN@Ntg<1~R8btOgaDkymvxS21T zqKGl7wIFiU0lT;X3bTkZ3oSuP;38T^K{9A*s%^;@sT5I>!^M-$Yvb$BzJ9~dzxDDP zZ@h6(zx%=DgZJ;fIh{_&x9;EA*%@EDY)Phdd$u?mMqM`bpc*~C^We=__W$bt@$dZE zpZu9^V7ErS#OVf>Eru;rv4C>mpcW%JTPw!Q>oUs;j?}eVA>>w5J7K>Xae#;_%q2h8 znLo9XLI|EA&mG-BM2ZsRK{6MH=2)*%xvSLmPnd{82n?6wGQdPiqyfPb>blK_ASgp= zNdhPaz)}b-l47I~Kop9ik_1?YF%nTEDtu7f1O2j7u6V0)r71x<`A+~Md7L& zfV=l=&{%{J2+$BhQDm(tN8(6Dh=JJrsmaX5gOa_vE2|i=trWXcTnK^(AN862S+JzK2JmxYpwc2*E!hBBay?roa>_IMKv9YcDdARz%G} zF|gB77_3PHWi!!0(MOwb{EC?@Yk)12zJtm6LY6PCkT2zRHIRQL=gs|k1ze}wJ93a8 z+-XP8`*`25(|NYY#a~_g@zh5fu-#vYJ|4FPWgn(8` z3WaAJ)G|9dJ2^S3>*d+$bTMCrPyk4TL;zu9)T4Pld3f;t$?2lN_C|4aXHzCkbFv&> z-fSx)PN24J{N2rUe(?4O4-O}b;c7O&``*9!r~lLc_z(Wy|Gzih_{QmJnlF~8v$hox z1XDIJMHO(s#^%UGBL^fqJ35UTt+rWhz42~HVR|^N>$cY$O!TzBUv1w#_&@ylpZ_1e z@RgfC_>))nwoB60bh(-?4=1OyW>uiD#_~s?*#s8kJw$BV+A|f5z={a8R#c;%%|TVq z&Mw{9>2DUh=xs^y!5eR0AMEsQU78L1fBf}N^nuyw+4gwj{JQeNp8&vWu@IxG543=w zY0xx)+Ub*n)04?`G7lk4r_(2gXN_=G#C}!E0@TD-3*&y0`EqqSEl4(py?!y?7*w&( z7E95c{-Awy_jkVVYsbJTGH)+e;dTz*46B~CY1M)v0%!4*8G2<1WDpBL#W5PK8c|?t zu+a}y)2y0i)vA^K!N#D!G3c8%i}{((+qb{*JvF*@`?=YZM<@61o*YkB%jW&>eEY38 zzW%hCPgw%RGBSXAF|M`FZpZ?N+vhA3k;N*Bw#L?cRU5-?l zJ$U!s!^cmgZl+>4Kk~}uYOtXcLQ&Ukpt7x1VPG;b6Jrh5x;Z^ajWqjTddf^dfWQP~ z%tF?vCLzo5nH|JisBj1&gdoXO9LHs-X%aGaeh&aqPC}B%Gxf`;&j=+e?c=lqsfG8Ej&0E2;=YWsLqR0Tihd472kOWv%2OW6rWU zM5kgo(L+Sd)OQDu5b<8wC7L-(E=Exe>XR~wG57!?AczP8tEn>yd_8;h3rrAgU&4C2<*uL25CuBqa)nP*XolkG{3s{23F8Du|3Fgh0k-!bR7s z`b|gLQ4@%X(Iu^0=LhQJ48YVhP+&%;ydZm16MH(YfrFV*N24QRC`S&)LeW5EWJuy* z3KOd)W-voUIe&s=AmRi@$WIhRHMcwrBP)xjss;)nnmGrDI*5hT60c`N%$b4fN);e! z>x^Om1ePvF4ulZbo4*nfB8U)UAtH`E`QXvW!5DW!gNRZvzj6F*nxU0^F9BJrVrzD1 zu9txeauT1XR@1Z3?2)V*MSQZ z)NIlf&SkoKVNT)oQ0N9L=QETwKXc8-pmR2;8B$8ENZCDuIH|(@%8X305<#GzTba3e zd;~%!ZLPCitYdhc*BKvv7qlj>cE=-~1cm}TzUQTh?)#BecRMD)qfl2s9`WJYaUr_`KGCx{_Z zZ+m0^=B;N|tJUzzCdLt(hW%=)dUAR?sz$5S49flKNsIpG=YHbP?&!(>-p1ZweD|IA z>KMhyOhO=|Al-+k=WJhPV@sw)MS)rn2*IXEr+@I~_kQ>5fAFanpF5o_re*c=5B~AL z{J;6jFJFP4$+f+m+qZ84m@Q^ku3fP%RKtBsIRe0RzNmVG9G(}>Sw5uZ8)wlkl(jNV zl6s?}>c!K=G-+F~Xi&srUN_?qxmAsErJ92FHV3PRXM*%TL=?>!0TcH(*PqT z0bmFs${gV+RS0svf_X`k`>{@+LTv2w3a5*TN(eE=$jqT_>iMi~gb5=(V+4^6R$TQ^=5fve{ox!*Yh}exy%o9iRlPZyMESsh&m`ub| zSHxKoqJdcht69j4BZUy4IbY7n^-d8;DN#2E@2p`ns$uSXL8Pj&U=e|E;NY_y(T14= zk%+`tUZniM7(Nu}u$c@hbc1ms*OK_Qs>B53ymb&fC&hJ~UiP}~13B|HsEQw(ru-@* z!ZD~RD?zRE`gD_ZW#Ul8ObTJ{tVW!Z#Ab%5X2#x&S{p*h$}E@IyTv;xGb`Ajogp!s z=2?)EQY12$rexQXl#=r{h&2=pHP;mb2!%t1KI=&_Mi<<=DTsTRnQ{`NAim+b{X9CJ zjXSDm;};_$v<}SyCRPLj!_#ntz)Wjp2M9^wuC6s2 zT{YlZmG$=Q1BEq?l55isQy;`j9H4V@&zo`nt>%A<^B&fv(t0M*d1kncw*htAATpOk z34vh(RcM)CH;Fb$i=yzpvv&3-At@>adAL_+bhfrdUkBHTT8xwdn9#u=)M;%9aMqTI zDny6^ad%Gq%PXgB9KZ&mwryjK^HqBM^uDdqY&JU+`{XBILO@lapU~9Ol4)cnORcC# z2w}9dRkiKu+}Ei*S_?XCy(dy(Q5PgOE*UwgI4Nt7`Q-x z9Es5DBNl_QE(TR^c<*3)dyj_Yiywde)(*e@_S?Nte_k&}Wl2O!Y0I*-x~-~8BM*if zzx3CB>GjvP@sRc}jbYf?-5?v@xW2bpp>0MxTd^n*A^^02nKy!Etyo9HNQow3n7Da# z^u^!!!dJiYmELCW&EI(XowNF8(^mK#^cS2nrC`Bg2VB4kGX?gqV36|`Ed27^H!6uQhRf`cyGwrGnoqSq^@ z%eHBNvKkCGaP)XNIi9XA6~(Y$aJ!uKo}B&G*Z;}&fA42L`q7W_rEzh!eCFoW<#I`( zl9XP)^@5pAnt&|!7oSx3KjH9YY zL^0y%XgWE0@4MgoRq+vxta`36L8b%hBaAfGCQW6qzhyX08vQ$SI`| zJn`EQ10zI^YEwhg+u7sM1}&{%I=hTZH?Dr<2d}1t*(r(w@@D$t7r*${7yqBX{`V33n*)%R0aHl2^%V#GuqeHQ>$ z)qK2};{ZuXvsJk_P+FEz#E1ztSE|l)vAD^84O%71xsRP9E?cISo&dyA~LdZg{m4EF?A{obv)pFB;?XuZkgGf zo#HzUi--=6k}7<$dQ?oB@hK7?Mf~sGOtG6L63>D zSl84H-Q1+x!j-Z)Jwo6tyk&zKGu#%$Pll+*5Y5n)ZpO?mj?LgAOF+kaee#&j?=B5`uV^3U;Wqr*|kf(Uc%MOJB`0DCn*ty2x5%;qmO^=#j-4g z+bcJB4jwM&r}r_{swozst*WX$3Pl)?`ZJM*4fk)n@WQRO(xzWASD(3lbJ!~i z1j94eHqq_PzWjk;1M~){7%8FX%Fw_-IppEi)#}o)?tT3?e*gac`=_sb`m;O3s>RX6 zgTMR#`Nu!@V?UdaCQ0lby0fk>n zLQrNS=ZNJECUP;a>$V-J1FTN>Q^ZiVZEBLn9)}Q;t`;lX8PRxqC*FNf&nLAc3p^-$ zS8qN0(bwiXm#%MIy87acTPAWg(Z12alcxvIoD$Lgm1{A^>2k)Js-nM`>6_pE+OUs} z8{@Bh#((;mKk*sBt2_u7MOBm~#|VP#?GAqP^WS{u?K@X4U7E(~rCXP; z?QXe5H8Qc7kP&mMO6+6+h>^O8#591xGc;kDU)T08wx#vYyi-%(8h` z2t=fAJ(ZM*O%W)#?ShK{C`7=evqUlPVj&css_M*oM>v*?iAp`_?^Wd}DT$yqsG%LB zS73C>0E{Kh?myQ&e(^`k5PtDLcx80!V;zbK;OPph719!Ii`H|I))X`a7)i2_-q$4o zC?Y6Qf`)E%0(bJvU5#YTBa5Ku98HKo7My(PTO|>}GP9J3g3=m4_AC{A_-nn_+&&?yjC&>On2Fq?TF(NEo3BF^EB6%f?_ZHiIt$`(3WNyz6IUxV|>F zF}`3k*dRa%p+-nTYE3QmJXLjdSJymOR#nb<=7_!5dVj1PS;c)X|B$M(vNQ93Z20&-pD0yq-urrUkz*coBe zewH$+5IIxMIwEF76-gO}!D9W?Ij2rkMa89|470*PL7X@uM1Z8ZjH!F=eIbG=UemiK z>HgbP3Tds;)LPQG2~kB7ELyvg`1(zFy&AYt#IPw@D1&m1+fVZs6J$twv{1*wX`P!2 zBuj1DyEVCFH!G5K&rv!JTvwdfcdNl8o3+U1kJcN$Nm7AZ^5b2^QijCo!^B231 z9{cS2`g*(Df9Royt{uOyva+)8o_lY(<(4h%S5{W`&+YK5ywRH8`ntEj=XGy-qpuf_x99hNOEJj_ZV%SFr5Fkog4agw` zF=<$=bW@CM${}XNppk26zZDZqY3)t6IF<;8WR7Dr6rl})A)?k_uX7@Vh^inWQefpa%h@Kn=e#?GPxk) zob!wrf~P#jnB<`vh=?pF*3=SW+Kx>vLOQ!6HR?UABVt`mW&^83op+9Ns49e+Im8$R zY}Zwts)E!x=M1J!5U6z*%oig!I6{rXLEJ&qxr^iXS_XWCF(s0#WkbT6TycAZ7yB%v0Lz-_)2?FybaY#8H&%zt8o~5T#L(eXxLu zRbvPNx#xOZ<)rT4*a`x$qv254XVbHdafn5$G0C`m{=y6AuHJt8?ejP7>~uPl%;WL6 ziUiKpbaBJ+%K3}yvb1g6wq3XFzH4b|>n*qKZGtOAhX$h?WK^OI)Mzxk_r1$kSFT(; z^wM)*|Js+~w?6;EYYUyTZwa&`PTqF=ZU5jOw}0-R|I&ds9{|v*=4LRr%>%&dxWBQn zv3TwD^UpsY$gQlb4A;BE;c#PZwcqbAFDzWWdUbJSI2;bg6z;g=j*g5^ojO%C=OGk@ zNjM*C2wL)>topN|#Y^S?{IP$zva+()U5YWf;F!7eWlJ_n%v`h&OJa?8&7+xPF^ zf9vG#n{K*k7thR1O>a@4j+J+{Q5o(0eQ$Z!_{!Ro^xWCmxuZ19CwOD&%Eh&%M;>|a zgAYFJOB7XX&CY0nB7lIvAcu^IfsOtE&bOP80P+k%#*{JE#^d|m``*wUzxvqMk6i7( z`mLAt+_ZhqE&Fbn-Mh53v>_!G4ud$6Hwi8V^*L4LsG`^_2cxm%#f0~)8cqAh$r4~s zA_$f+RzGV2jiTi?#^qofixxI1x&+^7_)!#*py15=JS#&LBH9y^6YbW@%F1A4G(AC; zkS|&jE}z}H<>KZ2R{6PtN}L$fWAN*uE~xW>xkrCvy>~Z9DCvDnYp<+_@-}k_HNly#HbC@ z91U$Wn@!*7G@DHsmH_&tC~SpMu(1dUfD;lCLNfB@c`lU#(7+59wjQmosw(AtsS^39 z>U^dUQFUCXs&P0dGlH^{7y)79L#PS}j;&Yiy;XXih;j=};)0#aL)_@~`sHA9ax!a9 zkmpUwT2NFYkRTG+BW$deVSKH>(9`u{pRR>*t50z_xU_HtHpVQjnddTSQVRa9to{U|YwkN&zYf4WC*Rkyu#}Z3Yx$Wb!daIC0)- z6`@Mxq(0_`P$usS#4ul+WklpG3B_9K$`X_i2}rmzg?B!xP^!wvu9eFO0I9cPOSM)} zf*cz#DM>tR2R6$qC)$PSN1|g^r-EpG8l;LonBkcK$4;Xtu>wq+k+TE`RZaHx#JUAB zSs;*+ST`-o*GE@W-{PA|XZ5Cz8_x8qR&^xEN8*HT4pTP2KHc5dg`cJfL2X`5n{hq6 z(OMF*`B(hyY0Hhx71S2r%{C3CIvc9$tm9PAfJD@LDm99u+|@2$spm2T1%{!jAWeoR zM3E?!7DWY6NJWhO4hA1X>gmEOB-FqtD}5x%Ux*FVlhXC{bje3qGyAH~9@#RIn;+_rmSI1(Znbo*JBsgzlkInF`_c+PUq*ZRYF z?ebJUy?6J4TX#X(BbwwM+_EQTNPhL8|eWW|)=5O0M z58#dOc^v>3p{lPOf9AxA6R(`TsHzuFo*RuujvJRRUv6t-I2;nkEX%qZSBAskm8Hei z)zv}e8jXgcAI5_%+jn2Ocx|#Z(JGpiXYYMMr7)G43?UH3G7cNTwHwWLd*5g@y0*9w zAz77aP!X-lPe?GD84ge3__rUyC#8oL&@)k!zp-@5aC@myPvDWLk zq9_O<02yLL%K?IG?+X~qwOeE zQ6gg0UT?6vye7dW-GFcRdsP|G@HgDLidaR?ouQEdR{a zMIjBJO<^~tE?l_q+|grCeC;diiyN0OUvAxX&l}(S1ABIM(pXf|pfg7xC2;WGRaciT zU%n*ch)Or&u$Y}_wI_YnnA=jc+vpU&xpjWEr!$i@xK?azlxgr0DwFJM6vF`sgIwy? zL#!Rns>%)`;yJOZ^E@wOP^jq<*%75|Bl|--m8IQ*ggvF8^|Ze#fXrvM#!^B-L7?oN zW5*2E7-dAh(VUp}jyQSN zSu!W4QLi;4s8*4OfEcCleBBSoAch8E$0n(eO-;Hc&;}V>;rhBKLEyXt0+9uLsuISb z%v=M2sRPqsT8%mW9T8q@n<)uAnm0CJ6)e*u`y@)|>ctrU-CM#S?(cQH(Mut2{6GhG;kS z5K0Ltqe{FlGM6CJYRE!PwuKPWdBxZsqL2#AmU^5n*Y&1OOlKD&c%O0;P>)oI;NeH3 zF^LdyOqUQA4hQ8(s3fYoZnr<^b^G0oa~GBxjmGWwAKW!NePFi899}tkuGMPI&b0{| z6`q))dJuf1CQm!xU0gbP^8BSsm-g*==q|l`YN=|qvJDxQT53=Yk>FxLYkKR{RO9XU zzhUF@l_kvIwCjPl|DCt(+6(h+{>gv;v*X8)&mGuz_3G8Z@k>ifONW+Mz4sl$?Cfl7 zHs85x-vlPAs@gg?%gp8aa5x+;_R69trnY6{@%ZsGPt4BF&il!G?|9wPXfzlMxYdXd ziM~JVZ`We~{+n0&-SzeL+i$x^M85gr%UicifBUUJu)e;o*?2r2SH1G`<;$nz6#x{k z5>en%Roln?;N;?|Z+_|w^HZb$$jmEq8~g9MYsWj^(QGz<^GzQ*apJ_pO#8vNykY$K zGxXSz|M%M0fAEKX=!5s(IRuK{umz_DMnH$ssG_n8K(Rg?ZriraMq2QQbwRm)b*a&4 z96fh-tsL*)F}Jw5xIA)~PA)X5w?1BH-z>|rQt^#KvpCkF*D$CyxH}FZY?R$rtHn-2 z96@zB9Ig-1>ZGSC0F_4XotR_dfjQ+a7*!eqwIheB-`dvkMCgy&|;W_fqlo zuRQaWSD#v2TN_vB8{6oPdtUeMcf6BtP%M!{Rf!D6ITzYxMB&G>v9vzw`DGc;9vpQh zi-|_Fl^3o7E^y`Q>5CUHt{2;Gy6L6^oqRaRc~BA|GFz0qCl*IB#vDF|2y#puV7Z}Ry+fiiL!&Tkq@~Qn%wnlz4hk5`(sM>c zP-GrD9*>7z-|FNOQ_+KRP()QJ<>Vcj4NOjAmeK8F`NFx4E0?eK7TNRC;%aAN+WD5P zR0-3sDSI!?w_{BfW*F8r*t*1s)|_*k@~$HwO1bs;`T7znwbB!mPLK))*`!>QjHGP~ ze+-c*akHG;jBNjZ(3AfEeAudrLI{Q6~j!BvR$%$hC}@6bh4;f9uvKr>}`w zVTpWpju#7p*_`T`$H(*&h<1)AAqNFV6+nnn*Mt)}hGG=Ji;7A`M94fj*w&uL=%fm$ zGbEbZ+~A=jX7=7&fkH$|ftU!v`j@IA6v1HeswxfwWoJ%O7o#Xy!Ifgr>~43pH`?lS z%b0cWF-9aDy{4WA2_}I1;u)-!zp?Wsv7GdQIwj{8o{+;PV1&< zS(a4{4c|=12ue4q;k?G<5GZ7Mriu`>)JTrJB39{aVMIS`nN+H_aDd3#6m~yz?Iu9o zXR`B6dX4sn+WT#a0q1zLGP8}x(t-dD4&AccI={U$H9a#kbMwtNPfhr9=dPT%Qf663 z?N)1Uj(NaNhJ!JaSFhp-Cy!77hd~Z`n98va1qm4l|6)VIr z8q!4DO*O_22VZyFKfHPOOD~>XI{Px~=}-Ro@2|i1+T!BkYu`FK8jaSoC)@4z9nF^q9P;tnYfn6J;@sB_J z_5&fHWe>1}MDP*-oQzjreDTG<{_>|@dg-ObS5`MRHlkN$E?udrQ_Ibjubx_X`izMD zv%m9P2tkW&ola-Ekxxxc;rta{FSlWnT zWCj>2Wi;%bzq+)z(f^)zf9JJp*Pea#aJg{lC{IpJO-&Tdjd3-_?QBFoWMLeg>-)0o zN3U=>*hSS*Y<5}zbKbadp|?U4)^Ce|pbP@$R#B{r48~)Z7rDSWNF^Y4tSVJmjS6Wt zief``I3BkebkU_J$DZ}SJ^W>|K{{E>`r+&9__~(EA=ihhNFN|b#3+IEuVAKJC!h2P% zVqmt^Rx%X*W-tXaC)ogFb097PgMx+-k^&G9B9d9<9 zGf|_<3IWIDMAOm;3&0IB%@~9on_4IYK){U3;^G)0>O6%?9g$~}5=5ww+QO>FGA7&C z*s+M%MrnN@w#-`owI=Nw5CH83eB`6*nC(U|rGuw#v2u*pAd zc4JmUf?-VIQVC~qVm4zEiRzrMXE7#n6b0;9Cv_oWr3MihH&sIAoYT4n0bt5xB4^DM zg{tXVGSPRir<73nkV0G8~Mk%(fzC*qDq#QzksB>wg zW!~$gxrtWpE9we25kxeSWewP#C)T2@I}Iu~?DUpsoXXjx3lYtkm@EmFHVFip=3z+} z5-s%KtuaNfmH>z4q4|C>m+1k|?(%n~Ey20}9k)=TKX$ z4Ps)iI*M}~6{2L7ZQ=sg!A`^RC@QhfH!A=u8Evz#sFXb*V>Sa_r5y>>F)Y_)f=_<=VNU zD@#jDyKlRBW^S@mg-*xIYJ&omv20S4g((761)yCt%Cd~mtWiwN&a|d6L7hB@8z@8K zcoarMfwVim=>Y{z`NOZg(hAwlTjyT;8k3!=X11`fG;aD{MH@28^Ss~hwpxwY z@O=a{I``ai$NtXss^BvQyDC(}(Wp7)Fe(?8mh<++ z-0t~2&!_WdS(cTrHmVJbX1J_QedGD#Cy&RejsNfq|GxXr4!-e?Z`?&&(Q0xvbk3RV z(7{Wvw1;$v5)e7>Ma|C$B;=RvUq%`=+GJIYnOU6CHOPA(?R+kzHEKwd**O+V)$uA? zw|Je1cmU8yUKNEPM)XHR&U}%#nH(IPgU^xYn3zTmY#rmp=T4vf(o@HeA3yW**AE>z zbnfQqfB1{P_^x~IXNIpJB`EU&$jGQ0h*7;QTnrG~dSYXgwE8(xg(onRMtCNw`y#4o zz?W7?h9!5}U>^!4$0@9vHOUy*{#zodV3-mm@?H@lh=}6RCUhvpE}x#)F-+ zn0w3AbM@u*W;(r>F!(cF1c z9oJ6I%~=9chlkSDMvZ9)Sd&FD@kWA)5IOZ-%#lV!7Q`!thflkBB%(^yfxUtVY^2Uq z9H=TlE=FMzVx+PLlysjq-E+=t!x@nXawZ9gX$b&P;(r7o8?s2P9kHjun-vQnLUx1n zlIom#!!Mz(JYwwxu963sB()h0xe6v&#-`62?AS&v6)a5$0EQtBL&#fg?-43g5|vp9 z5-g*EL~6vUm0prAJ5cjXZ=jf{RXR5iM~EoI+7zA25x}`|6~I1Ab0{)?4~RUJh{h59 z@o0Xkb@9x}7oR=$hBv&y&ra=}pF6m#6#}je!`fPG%Ba<9m8CG}OmGYoL^-m;wDZ+Ee}BNXh}lanYI@Rp=B8>%H|~`xn1)`t<2rUjN|S-FLQ6pMCPe z$>S$XE{E8crI9xyX+1OVNv_{A5W-g)R&wiR|^XZYZupE-Z(zo(Wo*v z*&VE1y@IV<<__%7(#%UcowoS|1*jr|Pm-XGUeCL{Q6QA2mH;H->9Q6A4STB_qpp@5 zuC6Z*hu!Jv>BWtuI)Qxzt$uQ!;PtO!KI(FiTf z-gO|m{_q11l|+vm$)NxZiMo;dY@-}kF^Qnz)n;vKmebz;PFkIibnh#ei z8Qk~Q58m;>+x!h^ohq`XXIZSPALFp!FT2ArWyQ9A+r0M;ZF12L7f*ic(9`XAzW?(t zeR*XVTx$l^#_s7|dzw3EWy(+F<8eu4E((&;IX4z^E;A>fhXYVa%7eAIc(Ws@f;>C# z46~$4l?Z3~W-naQ0Gc;fNIO}U5>*wP*XxoeP>d0o%W_A_dJ1NELU*G(99FGXJ1?41 z&}yP6KojU>*d|z99Dn1lo;`Hv(52_T^1=%*d`TC-?FT;czy0#Rdf@h54+592U<44O zr~pA_JxfLAt$UHC$xtw*@w7`v7kCdbZHG@99jbAKRO>ucVaGO7ga~ACo>60(H#@{N zA}kq+DTUZYX3vBOl})&SCL_?M>c#d!Gcb67ML<4nv8swF89RWz&+YK2Huy(VEXx#8 z0Wq$IDv<&ipp=pG4gpp}uyaBQsAUvRfMPHzmwS}k1Dp-@;(h~rY=)4oL<2qTMZQY@X>{*~9VgNDkeF(Ps5Uf_Kne(E0 zJwXN05fj0|sVY&*f4@=6psLP7a*0(GNjU?6Gz!-Vx~Uw@NEHxrOwqg{#(q(!iYk=| z!b}pa+fVhJ52DJoFv?{H&q3;lFd$r3RYio91Vji)uS+3?g+thZF|1>B00`7p6YEqFNTMu0uTbbB&{*zt^*UnvY(aMd~r5HQbiHI6uxQD_owtEXyX^8P}x` za#wWPd80*sV$A15SpU;M_>)H;ef03jYA_g#W-q^PX3O?YF*A|Ba^d2Mr=Hh$6d(Kc zyW()`fQB@VuaIw3(a^-XgL3&epp7-<6Xf%rT@i0a&BUKG$5n~)X-)^^GK6kd+ zY&xc+y)U(AwyNr%X_;kNXtY8I+qx4O_~ny-{uf_;y?oc-bHM!$02)z6@~M^S>FNE$mBq6!UV8Yx<>hJ?D2k#A$Q`N@@W#D>64tuCVq%** zL?H910wm!;vAnc;?6ngM%iZXZQQlu41S#fs+%$H+EX%X2XZP;iyTeaytZl5Y>-R?M zQ(FQPt7rB-t+bUlnJXf6S6=$o*Lr{V#~Vne%2=r*6+$F9f{JQJ0q}}uv$?dgF&vJw zMiIrr6Oa)(qCr(PL=0S5R-OP+WqCf>AAcF`ph6<|MJr^S)-JtQZ zv2{I4CPJPe#`-f1&8(_`BuyuX5WrOwN6L;Vxqe7h#i^276da4xj7GpDkjAi0M=2X! z0Dw2fzNT0>A%KZvi0qSP$QtaNEJLJr(S=ph8C01FP*Vsna}3J0VpAfR zOjUzCGl68Ts*1>DV`z?QS!QBOymL;|MgwMMa@JoF5#xLz5WxWI^-f#4eu!H{5bTyk&{BvK?Ijufj%&Z&usm<^I) z1V}_jhPXC$h#0>Rti+(8fr&VhstR*0r%C0sGv|o4kDoIX1eRb z%XHD;cvE}G1Wg@@lt@boRZ@l!SEgHtCaQH3#ne0+E&Dg5kxG<0#?=N)uqKYGRxwr~ z1{JoZqe$*N5pm+s)fFU}2v^MQ!lpl>j6eqD<|GP1jI#NxP&zjv231vdYQCrX)e%gi zVPY;&H1UeHs3?_a{%zDe8|x2q>Ia1qr1d z17IkUK?trWa!v?g5oGKvUx*n&NyMH_L?rHr7#@!htgS)}x&BVdjb57c789jmG7K$1 z#2QptDMyUz2n56cX2hz>Ts9t8&1QT1_HB!c%Mlva*`F z$z40PPs*+bZhptUH!U=G&*h)}#^HW^>F~Ec`^!Ju{OOOZtgQ47pIBH}IC=8)!otFp z$>Mddd)+&xZx1otz1sZpS6(|dIh>fEYgUbNX}sm8&S<=K@nq(PI+Su|W`<>4IN?zd zhcc3iDD&Nl$5c+X2g5-&c0}X|MBWD-t#v8)l?YW_#KwsFtyXIsWH=bom{fHstd?cD zGq(QX7yjMv{cpb-Lddp;@B6;*`{HMx&$8^keD;9{9#|Z6w*9~_|JXm9nwmQK>|>`+ zom%f@FMi=~{`FFP!yDdkTW5Z17M%*_3=#ng@*I_*Hymx*-iBIc6M{k%4F!NJCr@a1 zqc5=+Q8cpQs9(&sZ@d4l@e|$E)z$8Jb@}4CsU5dujiw(&jI@lSMzb-_FdmO(Bq$+D z&gY=k`qhoA8`v?K9)F3D2+Gphr=W2FGn_-aQ81_+jkPFL&D5YmaGZs55Mt@`R-P9! z7=%j5xn{H7DttBSkNTr#vl##p9lv(&iLZU_>#HXp{Ei2IV*h)-@|Ca5{reM_US52* z+5g(bqYq8oH$6Rl63u%ac;o&%ZYzOy0DwvrfQtm6EX@E;h{B4M#ij9hOhj;zopWP1 zF)^`r?V`r&;GJ(0k-LUdCr+F=`MPlHy<0M1`y9eKxS?_@3$e`LB#Lb~If#j?5X?xY zsuGou0>z10#74&6&io-(up_K$1dCXI+KHy4rY!`jaL!XoOjR+uf%X0^cFdU0;v8W- z>h`)T`NWiKPUIa-O<~#t!1%T9i!Z)-_3Pg}`pPTMoPOriu;>k)8TM1URmhk2iGOH=6}k(Af1cKu1vJw4*7MiEE@N zi>MeWGo{ztdCLq%X?qm1%oJjgki<0%5Sx)xAe>oe++x-#i4E&tf}Cy48Gwii1XG#< zP!Usd0I@C_R7Ab^qJbR`dEh)(asb72}SuN}42)mn`E1 z2zMM4i%rc=W#hR}2~nzvz>(OLA0i6&98A9J5G8o;iL6c|HpF9O)f(Q#0YoB6QJ}CO zawSbyMr&6KOuazDoUW!SM9tk$=eVZCI0Q}3?CbYI-CMKSUb2kZ&6lS8*ko5E_jao4 z?32Qr%w5JrVa-%EB5Wqj)*B&}RRCZRcR-1#wi&Y!RD~tIFG)@!s?M|-g!;uY;62FV z?1kz~GL9NIH(Ij76^2QLa|zA^Q0?du)LDfJiWYES3J<$Da{Ur*T6Uc?iIUYH;=mFW zo>h~KC3P=}2pFjnQNa|Wjz%NrTu~I8MMg}%CmN5-5T$4|t&FIm+GDLo#jv-MQyFAs z$L1SnZ-Xebe&L&a4!grQlV2r;z)_Hxq~`TlGe$=yKDD+Ofry+-eHZW0-B?||vOHKH z^o94_dgm<%=c)<-oCC(BF|yC{M$;QKHjb_>Us=7fJRA;JSF_bY*yWXwI=VqYzU6&c z6#3D!S5Ke3G#(Bz#}|9Db9!ROBlq6*JN8D8fMFc;urGqtI~fQ~%+QN7ZDr**xFuO-xLjUR#-(nL4{|^!nG| zcJS!*$;;O|p}VrOax8!K;NF`i_S_DiGLy$uy2m_YbuEm`FwtyTzuLOllny26g>tF4 zf#tPX#kkV-?$CAI*o~KCKeip#sMqaZF0OVuok^vx6Bo$~w=`pkEvGG~J(~-t56c_f zakT}RJ)D(9+c|7jtjVm75Kt6(?s+`y%j6^x$bpndsVEv4j$+UdB+qjlbjxZa`DEUn zXt`{?HyD*wvpq2$mW@UO8&h$0V)0u?{@b5_`Pe6p-ShT)e(2gqmY0`jaV-Cvr(b>d z{Y%A4Hv78QKm7gsx6aj7GnC1DP{eRJ9`$1*>-ctF6hs&=EnXc*5|RFRR20QHR8x6p zScRfJaqyA*nK_(UYX<)0W8eCNfA-JM{nOv=bUJUvd~{72^*j(FvMkG8P*djB(l=Jy zM-+h8>U-weTn8|-hZB(s>ip)E34l7y)be17q@HY=S&+z5kO&^C;0O^!=KvG{E4qU< zxY(MWotT}NnS%#rdwAlB7Z;v5^xA8$z4F{&9X|ES(Ffl6v;XP;{Pw@|{_O?!1HB$D zMvMcvf>(?Kh>$ZxsFK7FRZ621Z*&cTrMG!+F4AP3wUykN9^R6HZBTN9o`VFAs1k*V zIya*$)qq=^j;8hQ87q*$(A@Tw4z-4{-qSZ%DqC~x0;|F+lQOxai>GL)(0clz5()6O zF(-F`rJ)FsdY_4iMrC4UsESMSQU$Q=Om~dVc9ukswzI|D%|G&v?*y+RZGkz z$F7#*h&bld8)B%$Djg(AGw)TSa}4;po-)x%8nx3VH$qeA1faD`a3Jj^o z7Bvf`J}R3yscye(f;|vj7lOKkY@r)BElT{Gl)7$BbyshA#6V0FL5UhgV~l3$v8k9f z(3DhEn88E}GldgGqa^LIs>WDtHX98%%0A81RMkqc&CZS8AH>8OiA>~e-e)k||69C! zlAgpELtU(F_Al$g$nINr<6Ul}S0Vx_5xmvT(Q03ig{oka-JguCsMQiy;#6jU3Y$V^ z1(7JykGDCF5JXFf?447v*tQH(hC(z})mTKlXA(#yl~(N23ruSqrS~OGsWzJzRu83- zR=rAZmRw1(p_+8VY3P&gSQVE!F3V~>a=9b2NI?owoHK_X#^bT7u3uR`d-V9!t^-?k z@9)T9Zm!)@bcf~DVcBZ6rU=Yj6h)rrHp=OBhv%*=U0v#yWqEZ)k6c@tS?pE-LT&e; z$Vv{#JRD0I<@uLhmQizb_Vn~Y|H4aC-#C4CJRZMkV*b`U4!(5y^tlt~yDgfV1@Sp>DTF#LXs+$LKI1s0hr+r3xWD|I%|rB<%(|jaEJ}?pH-8 z>-YQP-oQCWfg3(A2mR$ifA78nWi_3Rxs&I6=BExGe8bGa?cHwo8B3VtFfMyK5|5hT z5<$d?eyeFUu(4^8QLD%|Ha5z#^4@1c8leQ|vfMKy#Hd+Tc%?WhV~et)IoX=%uXlTc z-o)etu?PFxMF64A)aQX%8rAkNaP z-@&X8%1YuKqYeQ{Q)7`V^CB9eopPh|E=Fa#F5x1DxRlA@7_j_zRp%(0Ydu3%>9~sR z&Qxd5?CcD-3NByneepAoz542_XJ2^yrI%j%CM|5g`@oO<`mev`9dCOG0B~Uy0LCR$ zAr(SU&S{grlRJpO1E7Q!Oy#4kCu#98V1X?f^-fU-J9u01A_|XSa=i>u;FVw$VN5-Xqk|aW9BqA0P5U~|8Rsyi9YMy%$ zF>HoORMZhsGQqmci-@&iM5MN0q(6v&IrVG=YPb#qMLQ|knnC?7kO%WprC4&Y3E!jmmR$GPObQJ)`HTywv$>yq*|l9k(K}p z$YG{vcC#c?W#heC%Y=x6)i)%dTFC*bODoF>f{0QbCYR9WH5gPCv98rrt#PtRfrF|D zn3!U)4yJg=tWu6g<0!47k*;lBt~rPCNCsuqXtnJdhFFSFo)z`;m);8?-7=d+4{Cv% zsCz4d=*yKwXX~80Bchz$njl-+9Mfw;srIEHBfQ zvkRlFdHM2X%+0o^ChXoRvOM=5QCVp;7>6=sJAZC*qCK~3-}cwN?sanJ@_+sKPd^!k^YimR{{9c_y63xLWmCHue zXi}*f_SbecCkyAqp+aW39qq;tIpn@_B`XL~g&m`esye|r5%r7cJ+&f*5P?cW$I@c< zQjVN+QS%7o&1^gxHS;WtMibLhQ`4#I|f6MlMf*REaPb^q3_TeoI?T3>ISpPO7*SO_BPqfvj@ z6BKC*47Kvw`kErJNVym^I_k9A>np2c2|mj-BKL@u<~bM5HW6ioqu_F%g|eztDD#a* zBg?Y>XoLJVA6&y1)aHHLZlCDPJ#*^ld~fE?ncGL%pyb$CuZ9cjfBDU)Z@u-__x;p; z?Hz3l0iaUAffONXzdNc*X>_JIC*o?$mWfwhJ-xKjo2sVD)zQXaW3oNzy&p-~7*{(F z+CANj~X+1tJ~kBX=j!DYTGLzZO~B#Jm;W>#-A`jiF1 zb^q2|avm|W1rk|v)Vvcmr6qRC42{lC$NIN}P;FB;Rzzet2bX+sAVNV1Ras`bW5@R0 zGc*o(^)H@&?z!iVAAb7jr=LE&dTMTV?w)`6DeOlVJ!n&e_YKA$XwlZgI>>RHOiwmO&dV9 z{w=@(!pfQR#%+s|zR}My+X{3_00;w|*0TFUUFla_45S1eJA=r=K4 zR8{A)sA9}4$7(tOW=?@>CY87IA}f)}GmMgH)A8E2OJGx65?p3l4Nd1jf|xc~ubV`u z9-!2hRmic3XiStatG2AqtZEFku=%qTpww>^Q+g7T9T&zM)nJ1i1=t~k%Da5?tQmqZ zyOgTN3XLFVhg#HfKb6XPHcw9?$&hT4K@)S{QQc)H;>_wP=U5Y-g6dh4ZNP{z)Wuc} zMNK(}Y45T^y>9nGYNaF*La3de3ZLYVq#DgI>62dJx^@AzQxMr4zik5aq8IvtSRgH{7OcBZo6|wnNP^mMS z*hU8FXoBmTajF=sp0(F&zGu6cO;lGGXQ}s7kDMT8E=^t)Q7}PbOf#`M5@s|S1*x*a zvqA)B17?pHXVzdlVgYYEjF4BoX>sM+`A*yRp zO^uuM;W8H_*t;xAg$<-99geVV%Uo-2vfo=T*EUvqy;o0NYPZ`{cg|1m&Dza;A_D>g zXf_MyknH7AVp$E=M={12D(^UN<~7g~(G({T29JheeXYNH@4l^Dx8Av9ZewHPrPYn) zVL4e*d4Bm*Pd@!U-}621d*_=6v9CwZw574!>NK0p$$WC}{GP3@9joqmsul8l>yYx1 zf9-Q$U4H5K`ggwj&2N6>!$+5gbm;2j#vqJ_B|up@MpY8E3av)591@cw4`>xf0`aj> zRdA3f6AnbQQFR4BMBWA?2|s%tB&+qP}n>XB1iS-P~k ze0hD~mX?-A)fkRwght7lIYY$&sH&J{8P$#hi?)ar6dVKv`*z-_V$2)p6abJ50cdtw zB0vdoXywgrw_61$e1oz~czrbPcY8DKjxgn!!1fImz6C zo9_P3_kD2x{+m$$KB=cDsH)0hvzi5|LPK2b_AZ~h6h~p~xV2^OmWn-j4|0k({;0dUfhHb!-@8Bbf!C{I>MHtu9p)e+H)aZmwyHQN1QD6{){7ZKRFHQz~0H$wE-36;Asn&MbvgS}#U`_y!REX3S2GxmFLLi4sCVec4S-M(%f2Ay@Uu+a@Rw(jFD~$a+wJb$aI&{ z467=_F0*Y_L>Mj&!U!=&8`)eRz9jvKt))&xlpM1YgQUhZg&`txR8^H_s#zaKMY$eT z*GoKUQtEGxw5^|Eva7a}O^(p$4Y1SuLP8Ey@SDt2=Kl;9)wl7v9D*H{UXtd*(ZwAnaO z5IGoMOH&aOliOZY)#ols%s6?eRkgGZirAGPKwawA*f}0VF1Jnzn>7v!icGbJMv0^z ze?X$$f}YH(M8xDm2r2YAf)Yy&X$w?GaOOfHD>R7dG1wFN)#qr_bnnW)V)1s0wX3}HksVm3ML1( z+df7~yf5>G)Afhh>JKB`m@mnSR0iA7#c)q5LXMayC(a0?5SyK1xVn1$@JpiucemT^ zoj2{-x^?UJDeh&h)&z}4u(V7tWd*bn=Xo#~E-$YvTw5Is26;9)kSZ8_C;%B$h%CH< z;FSx`sDZWN>I+AgtE%E=XKGu~-dNA`{MhNmr=EEEE#LjNJ3jDmtC)QL`RBi}w!V9! zGo7(7e6Qik$!2py7%Y>~wTt~r6U}SoU;d9jIePf$+y{POc;8(ge7U+@;xXdEV1VJM z%?=u37>~Kd%-Lv^5e04q4eS##C@*}dN+`(Z%se~MA)-o08ZijNEX$Yzp#n`f$1zq} zmN7|)RX$Ouj@_;~-Z?vW_S*V-|5{Jm?e?%5OwMlKH!&GUYIQ1XBVG&HpAH~ z;R<+q|Ni@PM!%1&fb=87OB8Y(p(KupVkLQ=R|w=?-h^}LOrep%MGS{1%f!No5fUMU z5a8{& zZ$}P^F{-r`OgE+R5t)Z7G{RYs0^r@|A_vtJlE5yh%19tVqwQ_(hHkp$f~Bqr*)3+% z2h~kEpIyB4q1GX&=iOex#6HB7TnbT;lSqz8qnaGg0{0v_2?~-;u)IO$RRkoa#>nb_ z@;(K}*E0ym%o0r8XEA$F5z8JkuBD1vwNanH6~Ly!Bq>M;nS?2_Z1M z5P~z=GelLZ%CPal?iSG$5NO4R179DLncj&LsrUN%#tQt}s}h4opQv8Wk!e06lpP`| zdy<&ar~w#WuMFd;AuF2f8BvH;$cacs6ha89d6tntRqZh6tlBfBvgtfXp()YVMp_1X zq~z~1dx<(5YHs$!4D<_tU`aqkX%YvJRT}mzR8?g)i*rnXMo^C77-cnBTaPO%gW0XS zckkchoXflvg>#DautC`p;s@jbv5s73I z6ZytyJuh<3TL7|H6a@$8TCJ9j#~Vva&8c=7M(r_W%x=PWrYGm;=c}r^aQftMIKF-V z{`o!A6O0(pix?w9okYN)H|&kdve9bAfKX|bkswxTnwo08zcLt?s~cSqwc72C{)UiD z=8fg$<&{z2Pqsa|jC~opBw-jj#QBvAS56iu-4?yjzqrwF^?KvE*0#Y$r8Cnz@4j`$ zQxi!|)o5kR)My~f5CNAVizrJBK*I0^8tv-H>Wk&oa(>tA@;qPo?5BFY-s6Efoz9Pb z``?|O!fc)mV~|Yq3G%IpcDn-%yJh#LYIgDDg(pA%r!T$qlE0O2z4g{TYn{Phu)4T# z`SRsMI`fvdyyZRbeMiwC3&2xvF;Ni3sH!T7&1Mdd2yFBQ81#;xInggik&!!!Ay%j= zR602|y>DlD?c$41z4YvJpFMl+WM}Kt$!?!-e*HsleR$HgtOA;mrUW$sfWnJvmS;gC zGA=rB1v(QDUy>l4Po?pYu#=) z7Sr>yotarIErC3W9K|#mgy#9t;-SL}FTZ^J`0?Y1o<4N^(1~03|G@wHfByK7{p{8@ z=oFThH@aIUn?Nyylo$oXh)_vUq?8jVR3JI33YoRoP3ei0#1=IHUBn7phI6RIcEjsE zj2N$L*Gw*I>OCuuBq|$+BdGOs$ysw301P~pgz8GFB8IzkBqG)?2^w`%!(qp2zTV_W zGgosGWVurnvNSLR=Lwc85eW+NF2T7$5t3z@m^@H8hUg25AsAskNaek^A%)o!1nROn z8MOvwrUNzrQDcK_4o5a?2&pF)HX886su~$EFg4L;;5#<^59zpQ)rqf5D-cAq`vx<1b zcCjeV|tjb{5tqOpJ zep%JWj@UV8PNaNulwfTmMWhs7Y_A#sISwI^vWf~t@4dy~Sz*M8jO4mja-wS1Z_lbJ z)Rv62V!>TxiM&<1ok!S0!9b{Kt{RE>a^Wd&1gyiPGVIRQjDq)6wPLn8Lnagr3CN2 z#5foXG;^+55G2M>#gONDV)>cXC}r_oKP!+$2c=1Kbk138lpLAEUcgeTb^y(ov`(~_ z04t~@3R{*C!!ZSG|1uG!{1+#XDufZHCpr_ed531^xA@Trqmg)aYs=+@vzO;?zqKMv zu3l}bQSMz4GIS?Ji@zTa1 z&vQa-fk$iI*6i%5p}XhxZ~ZsF@Xv3#<(4hmR`0y{!1&_gV~;)7TU~B68ozbum2Qkh zP$C*}A3%cxGfQYji7guDjIm4wgo&B9bFLZ(0C{6e-hdzVoO6LP?|pkRuc}I#84=|% z6A{iE&1Q3^ol(_a=#2uhnaPP5<1F*GxoOatI9!R?|KLOKd*t1_8pujW32}(TnII#u z$MWhLd*ALf%Q5ndVubandFPuMl>~}u-+0TCVnbw8-@4x%>W4#?acFf3dI=MH>YNrSF&S&^U9@X)(>w#(rme2ueUYMudc2>zq{DkzIXDrz1+#r z1Ar<=1ez7fVHitjblMY{%QD0aQ(IcY<^ICi^D$;cG2w`d_Qdj1FOn;QtX#Mx=T1KL z)vq4D@S3W2S7ov1ZhPm$yY}C-C1Ohl%Nr;%hZcsz%qv$_6&cLVjivC}utnTPzPj<$ zH(q>c{TXK7^3b~;*mvJu00<4Bkt1thZ4H$NGqf@$KssEg!ru^S#178GUzKlu^PAmg zK6T>6i6f6p|L_n0@NLEW{3NzIG@Hl?>pgklbC19L+;itoe)HL9pM6qB@BYCb`ltW) zH}2gz`<5|IzS4i<%C(zsK2QKMMA=7KBIE$0$QhajuWS)fl)NX`%LoE-4fslkWDun- zC`+SrI0l;=#Yk`g%q9x9g*PQhIa}ozQPeqh&gvMZo+NSVYK^sCb_!=+2QKIOoju3JY=}^6cvG8%+pd0UbnCnVgec zS()IJpn;i%yr?GTygd?*ZIm8DNYcMxb|$i5_ZUM==&)2VL6w7TZxA&ax!MUYhRDoO zD?5o>Zzk3pMlc15-nu4~B2G}#(@grKQUYMlszTO>xWOHe`Z^ff5CQ43M@m9PnA$@` z$gv_ZIgCgV5y@a)s?oHMP{%A90XQR&rlLViSrSByh$>{mc&~ur%e}RDoLZ&~5nI2Z z9-2U+1_@+c`WVR}RU6KkNz8)`A6e^z(u6fq>ZM!Zg&VY-n}3!lsl5+Vn}`LZIm8fx ziJUAm)(G`Vs4sR3A4(QeYxzbnCE}-fK)iP`MhF;u9aB+zUhEHbKB4u`ED=?ixk{1H z7VtWk#w(W1Btg48NJffjQ*)rm>$uFlHL{)|-Cr4_Qmvm~eMwm^V(*qH~1d9n(q}jaV z>%f&pR*Y30rGnCVs3OZS8pT#;VtQ_-Tw7~4o1-q4mR9D~Q_nBwJ3Rw;b z@Q#_`87PSYcHeUF;>C+QXLb?Mv17-Y&1Rl8`@K<4nG>#5OA#JRJ1gR-6c@`3_dj^= z4}QnDO-@dpc=g0%k3B|ho!K{ceo4a0T6beS*(r?G*z$vo;l|6WtFyDSQ;m5C%r7Et zl(ILc45+HOy7JobBa6!mqjNZP>g3glQRCKwd-K){&wt~tFMsWQx8B=t0>GpKfZvv{ zueY`gCdY$upF?MU=GNP9yYtSwCdwB&&1^WJ+1C7tHMbhwj>YW6AAkK*w|?#izw7(< zdjQBvP{3GGDH<*_R)h%FR#vO3I`#6Qez)iSY>YAILP`mta?WJ~8n5;)965aH#Iey# zXJcbyXC)IZzdC#EBo=qLtpIj4z-w4globl+VvMfL0F1pt~Usn zt>htq7!t25GZbKknDT=l5gJn2q%jTkHd`QtXCa6nQ&g3>jAPJ9>OGSY3MdV50p#pr z5FtVYIpsQ7ipde!Y=QN%X5(_Fs$OFB-h-lZYyoj0l5-viwOBWe0F3g$H8eE2_!L}E zxlE$~YqqG8n3KiK3l?*ds-T*Xnof8UBiCaTU+Wb_gqWF`OaPeHdNJpu^-D|~N<<0) z$UZ~0R#fV&ztmxl@SG)Mtxa3(e=U3rA}KSC4Nj_R;}xP!)nxk441mOi5ZIH63IG@g zO9jl{1R9)qRb^!&f|wPS01{X{UKTqzZObC7vvx5A2u~j#8hpZUm){B{M35s9FY%v$KNK#fS06fXtEBPL>Re zs8OZSYBq`-NC#F#CUAdTR*H!tCy@|BRaNW@)ZSh5rdvg0&rTiI1~ob;k>$RcQUtPT zJONBWR!?muDsc^5w`O&H|2xynNOYXE=$!P&NrLP>x~r?_PoF-pd++M%>b2hBz<~qB zbhE)a*}~dd*;E*LQm>|mzI`$PF7-!D;se%8WocD2cgqjpKCV1{`^sGZ~dWnzS+%BRiFFT`uch= z4hbbw7GsP$Znav2Z0}Eh;uHVN$KMTLz(WAx_@%FX<_rJhuRpiGJbd53_?eG=>|-DQ z);pgV6rWtWw6wI;=Vpwt5yp;aP;s-_uF7?&ux_8sp0X0WuRWt}dhbYDvl-iBvV6Z+e$AD~XY&53#IGX77uWWQz zuU%f;bJJ~m_Uy5{s~jU(D_a6Hhd|x#pwXK2If&8fAXfmGAbV%?`P~QJaN8YMPMn%= zw#u@6>dc`|rvsAVs2?-wR=v5nV^3#KH@{kz<(hVfQ71byyLaExoW+GJ$4_6ma`N(} zytj1w?YF;m>)vf1Ya8S3I}4MFFCKpWum0+<4uAD8j~+dC=)Sl8(l7ne_wD~Xt+{;4 zPQ6sU_|h}l>rOh3z|^)mS+CB0`sov$F9EpPSoqW{pRQh7zIN?e_SQGP^NsKPHu8IJ znm>2>+*)_A5v$qh#?5!$x%c3~&amI>^{!>X<+<DM2B|95=|$N`wn zi9ETCbH@P2Ign##-g^G|=TE+NsT{d%#*4@_D2PTOb}ox?b!Fw~$+MR?RwKokJm2WY z>A79q-+Syw{=?2Y|Kl(J(qpaO%R;@{d<^9vUckbFh3v@>K{O6vy{FN^sI&|o($G*9+bnUL6|J9%W`JaF5 zjwS(Lh#=>5k+dG)P`yCeaKudQc5E|UnzO-51ZwH1 zEpY5mO0v;bRr<2Ob-uD<1=n#MLf3Qc0|6OF0~wps37Rn73Ij|8Qp%XcG=D}S_*)$f zBI3mEOx$WB__RrrS3yLKF)}f!?L?IPF-C|MNtZ(t0*suMC|sX7Sy|aeOt6G@VI@aw z&PCL74n-P{L=m%1Zu%#qNh9mOs;ZI2lcNbFL6SSOMzyPIw5Tw{Q;3?C!=~Dz0_7-5 ztU(ePOw4ebCI{w3sA;4m5SExSOT`fEB&zDjgqb$mi%My$Q){9)`*OF*!(RjW{mL0SOv>Od@P zS1-*Zh+>FDAScc_i6Ec^V$c8^7MaMysjAq|Qv*qvGG?PTNnVvC<)Kaq^dbc zMU}3bXqb{D$9hR3s7((B__|&N#3W53Nf=pTgQa;FR5NfiV=Fl!AtG0QSvriUMCX9 zXM!-O2Jpma+K7ZoGC=@L! zyTdL`>==yoo)5hK9Up)9N-E78e)SND5~5oK@q{&|$;Txy=9g&;HEMefZA*`s=^`jl=zwE34Tp zZ`!l}j`5kNuU))+CX9Q%o~&)C>Ima3%Z9-<8jUts$xSp{j<~Jkac|}9I#hKsUn#5Z z)s1R(wfgRl|HJpZ=RHLkLLZB3$nzR76u8ykN&u(ZckgOUq>{HJ0PyLpDp{)OsV&1n zML|%-<%Ry%sXgOzeO#`ivFqCU`j33i#~yz8;lKF9pFeTpTrafRMSIVVUE5~2;8Jz& z>65QqI@23%bc=~M-F9$tysLn36%7(N0iwS4{E?&2z5K*W3zwJB#p$EpIJB|noio#o z91+0R{@}mA{M9c%b^OwW^)gH}POU7?sTcp}=RW<b=x z^N)@lJC<+X6X3+|a6v{i_K;|lNr14l+-peh@}(;Xq2c{V13Pv(4XZJ-tW+&F`deF* z_dM|Jci#K1Z$A5+s$R{`j;eCYMr#XNhkoIgf9s`JM!)p$?wHu|?OSpAG_p2SQ5MKM zG~Ys}i4g!`-*Pd3Nq_CH|LER(@BOo{EnmBK?TKIf>8DOUwLHAITrKU`fvQ4SL8A$7 zq1nVFrH3BCpu{+0ltTnllbD`_JaE%Hh99JT{_)2jzkI)b>}Njm_N`MdA6xqBV^1G? z`spLDe)*-BUV7!0gTMM;e&;rHzyKucr<5XOoqC_G|J z6-Fu!2vh?b*;~69F{a~3S_cU1EkO;ol9=PBV2C7tiYVA27>Cp+m(X=Jo3-VTR+!A1 zB~dvew&rs^2sUUj-Jqu=koPyt9{|zlh;2Ir8&$yxU~8}!h|hdwVKWLM@*b&@fu=A9 zJ6n6IU7eO=!nN^VLI_5gb_9{6I|)$d9g7gBBdwacR%4XNNEsYaRMUh-Y9Ao6=Om<7 z6^hO~2t*_r8wwijssrSRm?)WUYN=^rW26gVkWMp6RN_5Ue38~yOkEoi@RSc*Zsbc1+2r)R99F?lDp!t*#CIJ9}M1|~-&W;=@ zDI8bvJ$~L=L>J#PrEK63I=gHVOMb! z!67CX`R2gFgsx_2vErhNrE``{r$WxUP*KtpDM%zHHKx5l0kPo`TS-SmX2FumFdoSv z;)oy)QV%kca#J;gP*xISj8?oFv`vMGDvgxzuV(WR?-(v=ITdi;IYg1*vdns500feG z&-LU;wf3YaHph`xXLuk+M1VO7uhMU8gPSZp#&$A*MrLH0(+CK_aGn7jkH@8kiAKX` z2qBI_rH)PO?o+G`yrZ0x@*Fj0C7llxo@r>IJ`P0CUAjpb&q zD+S;Y+jmai5Q12Z`=v}xP3^vU&z=K2I|Wu&Sb5xP;p{6{x>r}W?Yn6t(q^j4aaOc_ z4hjHaT$NdoHzx|AP;7YbeWUR8Q37Ccna>C+SQ`g!7o9!3?|Ne}D24P0s)DBX88H{P39*{grDA`5fnybDZtkwd=x@ zfANn$^wcmb7Zw)Yd&ldK9XqzxTWvHNqs)uTcdtA5!nNO6{M}}=`N0o=_kG{~`U9W; z!f|)}i^GMoEPZT*xSB+`%U-Vck8XU-g)O8$6r3u z9?$LU+|_Hui3!JJv|4CpZ0qVhcipjd`ve3zuU@!x;d9@5Vr69o2jBR?cYoX6VaVei zjk<&7D;(~botTV-tl3zSx9n*E@Z+_= z`LjR!>>qvh$tRzDc~DMGwyK^!_?EZ*yWjaAciy&r=Ir3&6EFPvSB|%U@9Q;jW-}TzOr=p!w>)TPkiFV z7hgOl=WiLhbwRU?zNeTdH)YQ}=HZF+GXj0s{?WRmo#K0_3qFAasQLG{} zi^`^g)d~%JMK-*UXy4OIP=P0MPDNDAOJd!0vkfRYOE9amRa2XW5D5gyHYKsltV&Ew zL}IaUj$uS?z^<64sfa7fzzJqYBB>)55y>1PfOV?G=%#?Grm$agyO0$t3cwK{QQ@l~ z#x)^T&z5tX`Z{K1i9uD!Q)a0#oS3>OtH5>7#`0ag^_eIMAqns{kh2UIvj{<%QhgQG zyh7<2NFr)QRZ0kA?~ds8Sz9Cl;E9x5eqi`g&fHicPAT9-{kX2~Ki!C6NW6l%#% zRNAs*i*rsnHgfbfs>Q39E?r&PzI}VM-Pv|v-_EVs#)g)?3QE4=BhV!81aTB7;L^pb z%v{EXvKEboBKbTuigkcv#Au_Z?e?YaDBrRFqi=rami>FbduijZ{_3y3@c3r|xW<5) zN7V#plbI*)@@zdGdhCSA|}?6#%^Q$w=7R?hR`>1y#3w#+*Zvj+fAU-Z<-TS7G^d}oz8Z8->U6i9tbC+FN+d$u~4mNQAwjciWPwZ|tXCAuu(_eb6 zI(H%5zGn^C9)K}Q0@yRKzCP-ez43T_<@^EyH)xbgb)lf0nxhJsH+eKyhB-n87@oC ze;cOwL;+62>ng(8v9@k9kw+E4TE}Yxh{)Ii)FIYARg4E|39APSFq0+KpTYHgZF7ub zM;&7|5dg0u#2oAPb&OFQX(AjX@uq1S$fa(H)~pUdoug1XQ3b^qjWASONZ44P3Joz@ zW<&yh+b@xJKc+YiYBPHkV+E2UQ8DwXS^KPpurp#MQJK8=F-FVn78UamNR-X_g%yq= zM9TrO%Mokoh-o#5$kJ;;;e&CugUp*8rJ zc%PZP+mj=LJqu>Mg(Q$SHif7l0HsNQMPrOrj3HV+9#q8it#(OGL<~|7u0lmwE~bcd zE|yj1Tuk>iVvJdCx(fB)ZH_k6J*R0l5 zqM8?lb8wO*-yoYVQD>svZh2Qh2`=|}vuHSq5rqSQoDy(M6Q!oEy6WI*09h?)l?dyG zS;!&;K6NZo3)BSaraZR~0hkl07|vH!rIZnaEAh;u7|T!ige@=C)M7cXAo z$=Uh6yIGMpGpayU0J|WXdk;y>o6)d89Q4*keeZpDM8gohh)+KfAOQ)JGj3bIKO7Ci z)Z|>gYwkOL^!pp@)xn+Hkd@#1*0)xN8(D_16c%FHJ}}wUeyfbnefjfWd;Uuw``E`G zJUF@i{Pd19l)-O|`XW*ddd|7Fr%vXg3YbO|#vJpa<$SR%Yc_>P({4QD4_w62|M2PW zdF!F?_>S-R!H<6L?h_}bpMK`lsZ&qRmwivOqZq4lb!k9EljwNY$*Yy|>git98&`qK z&~jPnbR5by)Ol_SS~-o^wom;pML9izISD1 z<(AKfuRZl&{$nxhbeh|D%`eQMd-dv`J$tt8m~FKE+SM+{Y8;!xs(7S*+uq^awK&e4 zmPN7krUT8L&Bd_z%GX{z^v!3VE6eZuzVG|4y+8KYV~;&{@~BAotKqqW&3*mPeDYJD z`qZbNfBM|{t6WUBNS=7&kH32Akt@rc!-o$q{n2N?{_OKh`OQD{v48aI|L*^ooJ4d0 zK#TqV^lQKNe|_q&-~VI(;OBqymvh3_6`cI)>s>y7@!XYbBY_rDO4a7cobn*UAeFV&ifB7H& z-If`A?-z!@8-Di>zjd+K>tTRHpL+E7zwNU>^2i5h8??YJj2E9-c=9j*>WRl6fBMCz ziQtMor8M52z47Ylt6%!*zd!Y>m+vb6{%db=-}~h2p8eY4|MowA{pMeoS!><{V0Kv- zpLp%)bKg3D{`^Z{xhT)Qw){_b?%TKTLl<^D_o=5J8~*X`-MiP9u8GLory7lW^EWo{ zU0hu3J%40%b@kHrwW*tmcfJ1|Z@l$^mOun6C6o}Cv2flv1BOiALuf3yDA=Ho#RN0X1&eaZ>SNAcE9k6(aJ^=OA(# zp|G5Lg-7CY+L6dSpd=FN#y&$SR!YuvjxNTi6+u}oFa;!0l`O+7xl+<^UspqE$|CiO z3s4jXNKn+nW^2n+?J{NNP(wbds!|iBrt!REX4Oau2rVf~)(MeG4#*l&NxkYU4~11U ziaO3zHCEy%mncfa%$kTDN(yjls}M-k$3Pi-jS8{52BsQu&LIdv5eQKuTc5^Q7b$Pe zM3f?8^sl;A005M_H4#7vA~6~Tl|Z4aEb}r{<~=d=^MM6{|xG#Uj=YU)S`K<@LzZXqH;Oo)u&m_=AAm7beEc~2aw z$~mXZWf^kk?T&J;R%7T5t;oNfb(ds(gq&0PUrVp;9!)8)sv2X=UB=gs8tOfbD|J~u z*({vG1ESPuH1nJq5uBxyiedMx86cZKRBNP9T{YO~mJ*IyhzVr>0|=NB>R2NrL2wdy zm<%MKwLT^yL^ub93SlrR`@^vWU07Hc4rThld~|fv^z3B8b1f`f?4CMtqIlq5c;|yU zXA4@oI3ZM%lO4(%Aq0&M;)SzVUo}xP=s^UzFGhp$IO^=S?c)kr2DJE=`|kMuiy!{( zcYN0~S1v#L=%W`eT^u4icJ|8p`g*6L+DNblU?OLl1-|O|}Pp?1zYrp$fcW?XQAO7JV z-uolZKmYv1@#mM9mrr)aAxN{|7>~zm7&O|GEt1i=8!C00xpo*1htNsyJX7f%SB(ZY zn+q4N?%cWap@-jFRn^1q{m?rfxpSt%`E%E%7e}3Ld}X-0D1%`SySHsOO~$}S0Dtj^ z|NimEAAjmg&z?SesVZ{s{nclVJkkFb#m?Q`Zg=r_9=++No6fUN%uemjTfJUydp;Q^ zXQlAdJGO1xvZLLc#yZuCuKA-s`tiMc_dfUJA4t$0Tjx5>;u~LoaxfV5msVb0 zxpuyYQ@8INzI65K+QM79)0e*R@;`X+p4ppj+S4m9T)1%i>aD-@V?XEH*<+7A_T16u zf9duQPi?39`T4WQ`up#G-S7PGzw?0)JlFsLcmaUl{_WrV{ZIYTo4@}b{nKClxjDP- zCvc(Zo58gugW!&v5AMI^AT#f1w_bkf@Uvg|>`RB9J$m)rFw54;_7D8%PrUKN-{w|( zi>>h&zxc(^KK?iBJl^{7BeTs6C6EIQc34?n1+a2u;nKPDORKA@sv%j&nW+jvL`uh5 zquA!Bv7*~9Wxw;UKl$nL=MTL7&4=RJZJ2)G-sS@j-TC6>*FXRK^E+H~aC-fZ|MK_l zd+VL|`derYlYQu=^@X!1`YTt5dETCx`T76#kKXythxNaI<4a%q(!YKA$Y+1}-<(XT^ys%NP8B~D? z5wHh)L;+BBVqgAph`=fm4Z&$5fE2c_9LmU^#E?zUFb>}P z5KCre?*LecmnlEI_hk$Y#BA~gwN}6VIZJ6NHj$6lms(s$8+rCB8ZN)3{EB*vIp>g1GQ;!{bS4s&ymh(xJ~hyivSj0R!SQL5lTa7vOoMGT~18WZ#- zsbtXLoC_jCLU48j)L4ryQ4`kFsoUc2%=CEuJxHA!W~lirlBG7rXnLCD-4!C{sDarP zMOIZ6Gl-b1n-N*67$`XJ6k-*aHE-CWQwgduglHxfi(s}QG$w`xi-?DT$KLp z7K^G8JIR~3MNOwnBvC>Mb

}*Jgs7Z8_(hF)iSroIEmA)A}|aBm?~>AIIMgAmGh` zOB&EX1fy6XONo-E05>gn70ayf4G&dRqHx^K$TRYovK)=#3M2v7W&vbB!ZICD>sqM( zBpcft^@l0dDOFCJXPxwqoG9rrA~M9)X*I+FaIr)=?7L=X+pay6t^NC_`hBddtoJvD z7l-Agg{6tfSn;ZB+D`Y-jPH&(#(<18hOE37qAx1Tv2mAd~b3%~SJKfQI=_G8D+_sdbc-OdK%<>loIz3!DO zSDF*m-o1Mty7l32ed}AVURb|!<;sa`uZhSOndq;sugtZ5>Iv~|r`WT7c5O6r5h5HSt9Cgn;YKSH?|AsVfA8bBJ@n8+aGfJZj=bl=I~szozIgcj`Sb68 z%lrEM{;A`qPM^2p&&{@-8y>Q}#b?nwY04%<7$h7RWT&%bWvzO}Wr-r8!;bbMoQ;DI-N z@Z?x54@XKeL+t%l%_O7(6 ztBZ#Q^1%=N@F)Jae|7tG|Y;qv)iljG*@$q)a7@B6@qA51$N zP#!=0hky8opZJ|WzWw&w|M9>4xmcjFhKtV~{^$Sk-yS-2=v4nw-><&s!~e_2KJxwv z0KjXHe)Fr3KKkU#FP~XB-=3RW?w+52;GQ4-#LwQnHxnbaRK@Ayp`&aIQKS3|23%uU=jscKcbDRbZbLndcbFaj1k)u##5QOtHkHom;nj z@P~fz+5i6R%IBVc^r^=i-}U|nGR)p|z;`ClY&6$sGMid|>~nwf2e;h%@qaxH-af$A z5f!2EV~;K6cmM03{Em-3oBrGzhg^m(#f+= z|I&Z`&HLW`uK9P}v7^{Ix1;>riESIh(-XkW^AFzf;MV5WMibi(?7r)!o5ySGtK(}9 zZsQUymEG)h>$g07@9lTrwsq^)9hYu8di3ZY|NH+mxc~kGKlU31;{n0Bi#WA>erET~ z?wwnU477oQF!YE(VLm}dYXXfHT6v0LYj5DW=T0m?|M<$v$`iNr-t?w7-Ln7hXa{?S zP(_~+W9rLRx_K;tci6;Q3B!)LX@jeErc9}xP=N(9T&IT#lwqz*X1E&4taZvN-HciYmAozPM3g+ZiAfNMh%+yM z%NT%n9AgCMwwyu{V^GxqhbRu$Pq$SSMWjT@%oZ7JS)h!R>tWpp8+nKbXSsq@)aDmJ zVt9M+MMTvB2%&U!{(2w=h$S$y!bL%>g7;p?svD( zL{=yfhZqqn00PsWauuR8=&02dWbPF!Wt>DY@|J<&JrfiW(MmUfHgOdY6_AQj0b9Z&6t{hL^d9StR9An%+ z=Gvzlzw_zSS5OT z&hpF=5P`>93cCM4tP!<(1o=yR)ayPX-Y6i>$O1h4@R8Z} z0|0k`z{bX8alHWm_C1e(+xP!q+(OPh^U5Fp@gINr^j9uixUkk(cBO@W=fL7(bI~rW zt`u9l%}%$GC!nTLs&g5lu?Es2wQ6sC{K?<=)JxBN%U9p~{(FD$mE&jT&-{l^{O;!V zW`B+voq00}UW;rotE#Hx`R3|@DHi>HpC(I>zWJSh=imIJH{XBf!4k&1gWvzszVR!+@>_^_&!g{JKi;{2ZuR-izIn9OUZ@La{_MAZ`;UM9zrA?=;yA$S+PrhF zUA3Wvsv8jTSTe_xosZo-`^|6r?jO11UmR)w;?3XLs;bH^IAB~!J2b3xuEA1tdvdU~ zw!NIa>5-FD>o4w}GP}c0DX)%;(`R43VERPV9AWAfnoFLH@Am+t(J?t=v1G9~n!sHr z=HB&|`%caS0AzaKU;V(}Yb`Cc)-Lb-MZe$QICS#{o1pC{O6}n zpYHpuhrjhljyCG|d^t6Dj@NeguFc)`;CKJu-~F~9{2B&m_XmIWv5)=kC;s9KU-&{f z*y(gSWnn_)9{HO0%&lOn4-3QcW1oBJ%(EA5mfd>g!e?Ika=!Y(JE{y4@RNdwM$=+v zcW-BBOC*prE=^pk>5>Ej8Zwm>0jhkOmSuVIsXu-4e{6NTN56RGi(9+&p0|DDn-@s~cQI^kZeo@WtL&A_w+Ant zJ8^CMbD#U%f8YGVpMB;l-~P4V{I0`qHx+DO+YJ_1j~>b`Xa&)6hyoUY;%zktHqhOL zQ>U(MU;g4`G8y)_Z{50e`=zS~4<3B({JF`+UH9R)ted6PHUL1cf)HRF0S^x$Ks3z9 zoSt||Y6ZqXnC*r#v)9&6z?n@if*-+A* zjd4t>DKnF@h@u6UA^6d0f{4JPq|984Nu$W2ZjwQWph(@?Itx1|oE6}zK|~^sMgm|& zbS@s57V2pWkP=mu;E9N=i-?cPj8BL-M2H!b-WU^~FGCOuSP-$!Ou}FcqN3<5a}y0A z3I_zxn3;h}#zfJaCCAKaVE-4PSqcau8drC&5|M%!W9r=PnL7pmjB)!q7^?dLfrz~4 z=sghCXpV>i7es_vJ81l?u^2)iAQ18KfLCD_Km%xEN*UCmT2jzxH)0lM0k6iGSgEbG zIW_=*AR$IGn?~=CjT%w_5e9mV_KNSi5Ng{C07P+dO?jADRS3GDJgX4vY3o{80bmx9 zh)Z!@(Tfyi*#JJ~qM6z*f`llLBuNZdi-cipb%~6O0kQ}tF5e6^O9iGGT~f1cGYli% z0?^jSc}=puHg#waX67VGtb&F*TqP#QSm%rd@PI)@B}rTzAU~7G0zgbK+)vJ!U8OT^ zdHfdx(YIVbo%@edUwfR34kwKi0GK&VQ)^ue&EwTq^gU@7tWqN`-{K(Yrp!dzxu$} ze$x;B;19kT7V|tmTlCk~)*gKGD$HQ*b6nDNX)}^awo_%h4dHM9! zSEqE0(ll$g=hqKrEl53II(zmHKl<_Ko}Nk);keuGicFZtOLMKuub$bn`mPUr^|HS_^`BuER}$ z;S?r0te%ARzxMvdb4$%n^iQ2Sb#Ct~Cc(|q`q6V=Xdc;~Os0?D`{V7mzd!%|tD)z< zaOz8ohnE*FpSV8UJ#hDGyPYH!R3T5m7${<}SOv`BETjpHzC618#LL&8`P}o*Kfl?y zP!z?q+kW`q!xO7C*F0#lPW>{%N&=8TFN4ttrudqxE-p@FUQ6$w8TuH}Qx%|@aE-Y} zM)yGx1h5G(gH@6i$f#=0r*OJj^hBe4K8${kRIIVOvXh!P6cD2fUrqE)O9E~<5rC`MHo zIHr6N604dRgrrI;WQc`@#adf?CYkF@s=B0`5fM$o%)qg9m{jA*j=-g=1`q;@J28tv zG9ncKXcRmmAZijNkP@I~`i)m_i~$B;RkdwLK~a%oj93srG12fQqDlk;9HFG3LN=yk ztD+GAFalIZZN{_@grK0tB#HpYED|LJ8c;+yk~I;As?R!6R%T>op0NqK&(^$dsEl6{ zf-tjj77?jFkrfE*OjQ&SiIiFVk^r^kGQwBPj98cV8e>9?=|FGDD4Mzje%~80i_Wmt zs!E8#B&tNf5-2u6zecZUOgs&&L{xGGv!>#BbXIYK;rLnvRaIjYK(a1`{`iQPo`o0;GwzYWh(^$%r|T~2rs>KYJPrx;lN@Q zB;G6#kqBbuuW_c`ZCB0q?(S{?>(y6PQQxQ#1e7d58cYpuC{Ib9E?#Ca0{Pytnv112t3VWM3r}rF4Og#cl4G@Cey)oIk z`s&p)S2wpOKF_;z554t)ix)43fg4+(~2#@|jPrt*s&Z!v``b!WW)??oDrcWM^l`Zw@+#-3$FaobMi6U(X4Et8-^weBsoo zQ2~^6)#O%9W>PX4B1)q}L^6EUCJF;{>MKugfAFhd#l7j_Bd1@+i(AF!?G3!MWgCg@ zS69!UAFt&rZ@aJh#El!9gWvh^PhGxzdFkXM8<)4p&Q~E{Y-r2K4IHj5r>3W zS}@;2-)J;e+}x#0mzIwoee3au;&mF#_?hRP`PBdX%vY|Ty+MAj=r7%BUHPL|uJAO^ z^Q@)EjvQFKsMgwZ>FC_4=Pm^{&?|0 z8TNOE{h**^SV|W3+yKC`tU?G?RUu+XljZe8NALd7fd@bE)(^gaWd+8WuB~7SkS&53 zSUCv!`@ZVT?#*{sS1(__e7<$7r{x3FT}mB(?@1H62Cdgz{mH{XBPw}1P$f4lwWpZmF=`;CwM(&FW- z=UQKL-+lMJ{lLL)4xB<}U=9EPDu4jc+X4V+Hz7@+k!F5c`0Xt-8s6@2=6Sw(d+V94 zXKx=a?^w-0|HSW$$hq~04j(>zu(1LFM*!wqaA6lj!8oX@xMtLivI1aZu*XcL!CcQD zfothToJG6>86XA@Af{G!riT4zx;Us2 z4pjiKNVS;+0P76ITK`34B?OE@17@()NX3#D7}fsT2zt1WG&CKZwX@58wd-+EI&TFXox`7IftSuwe*EU?Rf?k zFcg1roEfvVW*o>`W5Dba5KN#{M;AvUu9?X~J)pkiW@rQv2x1U1*4f%w zR69z1U}Idv7^@KDNGv6&fC9$UqA-FPFY`J~B93dy^~Mm#i`pO;|7(m#gV)5N@H!b6 zyk2NB`}{h&7C^ApyNZZ0z%0bZs%v-WTAjILhnE`-m|QNdU%x(T&#$emEv+4Bx7$qv z066xXrn$`=nfT=`_wu(y2QI%k7XxEUlHY>|A7%;{-X|r+4 zn!6PGlg&+j@%hg$EiJwGjrVVDjqYf=qZwVic)5T&5h0e#CIPdG#-5GBlGj~M-Ck>_jYZ2{=|cCy!-g!1_1DM^ZLb|{s0|hQ@_yc z4DH3w5;nu6w#?>eZqs#+ybTE423s3yk&wk{|C!ajMd23;*GY50N*_aD9udHw7 zh`3l_RZVy2mqQ5mChb$d{u>`SfIs}hKMd`+qyTHf+c$6C{LJtF`Jey!pPzZ@`OV$E zY;|sJZSB}@m0@)A{&eRg= z#v5AS>^_L)_9v@PUhdywu5Pyb0C4~Cz>9zXx4-A?OaK07Kk~qlM*-mI7+?(JX8bZp zpfE@Ba!HqNHk;Z!yR$bxKhGu~mb%rw{Y~Hdy~`)>zYcJ3c|RZEtUnybmF0b7ZYOKy$;Fw{9NaDjYxb6A%2;5B*H%Bmb=YSh@AXklwX;tPLSR zWdHz%00=Ohg0)aESO;wf5@57*-SS98rqijmaBnib>s!9&*s)`yKik^-=-z+(?SHbn zyX!k*jA{MQ8~?`N_#5xI<9n9DJ=}$@E!d3q9|Z3~ai&O%u{|?dHrCunBkL@l2N1zw zjb0dA-=jdm5qJSaun5fcdPWwQBk>K&1W~?Km)7oYbNja;r8rV+s__HnL>%^)HY4<|25gmJYL!4QMJK!6aG zX7pA(ne0sW6OkuXtq6@V2qpwEp!kaS4$;Pd?azAzM)pg1TSued`V1;1C%Ja zWd$G(iU?#;m<^GrfEA2E?6XUvK7sXhDhjR5vGr8JGn1IsTxRs7#6uk*7DhrPsz|v@ zEyQZ!+O`@*jI|B`0x&B8Ia9NwwJi-nxDF;K14e4Dbbs=wZDsnpKjTbET5Dxy`NZ}$ z>LgyyHNp|;jM~En8k-yICry!{Dp-J;?QWG@@4XcM;?>Rbmv82IKEJ-&UP?qd-EPAf zsPp@5O%#e~yZ{jqlzbTglPu43lUjgi)KY{nZs>B#Ye`QlO*E9*GtltUv+RnVG5IbmNaOsvTS5Q#;Dh{r;2Ubuec#cR9W zLDopmojdnPieL3tKJeMW1FH2 zc)QZ>;X95UJvd_?e@}004ge z!~gm>fAcq=ym|={?^-@wmZcyPInzu%0&B?9Y+7lOH?1!kiOEBA<>1NfU;6Yf{_EfV zjc-~%e*F0A<>r+uSMg@y6!G=T7ze z{iKoY?d|QIxv;yt`=viUc;Ui@=XBUUj9>M(?*f3MWA1JAHaBlir{h4jf|$3VWtgCa z9IY=VMZ%Ks9NX#j&5wNgzuvla>l?r2d)uyEyl{3j8l{iiy_7B;0t*0#GMGDg;E|Oh z=OH^fvT54fG2p!~xVduh-mQ#a_W$wmk9^CCZ~5-L2>{Ii0C3~%t><5P`N|6~ zh3(yGBP&qW%TCsFy_P4UJBszEf97M`5B}|UyyG3)t-rAf<}CmKkdEQ{7hX8~+}YEw zoW9Y&prjOzA6s9azw55u-CetL=2q|43xEDwf7YrFyzwVD-#YnF|GQQ7_Q?ZBv!(!m z1^@E&lb`$~;&bEi`24X$-|!9JkY`644OlPX?Ah|lOHbXpcxq5>v71~i_1=4rthWwM zZcno;%kwk=ihTb|x<+A`^d?n4%T)D-mUuL>Y776@bv}^H6bUKtxmn0P0>N9YBeQW@?`p z%3dqythEwYwT^x@01^TkvmZw$3NTYP_m)>)B6YfK6pqo{vvh} z=`3-aXn!10B(lyR*6b!3D2fUQZDJ!j5>V07K9drEZmeb6znCSy^JeLy`ycyj$NrE} z3%=qPQoXXo4^`NoQgoK%Ta$OSn+l8}LSPQSAifG(Du;4VT)ldAI=|9rG_s|I1MBNY zmQw(@IUSECqaz1bf{5o}6Gzq}LRFRRcH0nes4S{T2%1bQ&+GvJ2*7|LNE{#|0I*8l zY&05BRR9SrA3U%)8n)7W{^VU*ZyAQ8)m91s%gf8Q(;$FY<#_4k(=UGM%bWC~bMCH# z2O6#9(XaX1lP3>NHgAl_<7-#0wi|P5bZ4~f25vB#Hjl0*nd>e$opV@)hG~)pN9vSQ z)Tx?D#a+#mgQBX0dmnz|JKlD5_1w9ts;Y@aZeFX3QL~%K&Z+ZHA3E{Cp#ugC6kJV4 z{YJX7@XDpL8~xFPM~){!QlLV$ zG{%)a@jA(sAbj1of9JP+?+2R<0I)qC{@(BX-Y5RzV=r!9g{s{wI4Jm1CgpT69u62Z zZLA(Ww7k501*MiY;UAmlkiLs?QP;_kT9 zY@Vg!ul~s2`LPfGRKq~ByY)|h?8koo5B{(}7`4_~tyT+1N!v9c9j9s93YukUZW8BQ z-t8J=Tn5G%n^cp@~}fOb4-ebrA{ZYzSvX93m$n&Z(qO7SF20U zo&VIYesXJT>*B-_QImbE)ylg4R;$&2qA{ILKOc4%4;*^y=Veh`yF*9$i?WTr^CRl_pd2D{jn4E?{fa2q9Dnqm`_1&bd*upe)(=-p~I|_{_g{f9u}I?|T3H z<>lh*-}6uZuYdYMXbz{48@O@>p8dU#Kl{ZmPOtvO%V$qL_nzF&ZNKep|L8>0Gy}Nx z%(>^Dd}8y~t%ZC;6;r=v4B`tKL!C~02M^*1TdWCjYeCnBA}b;MrZKqe)9tg zq_eGt8Q1_3gb*9kLE^oLAeM}p*`|bu(F8l2JYwK{7_XeuBs|)EdZ8fX|1huJZBV%s&Z_)gD5Hl5g|}R!~#?&8^q&e z)>f|&S5%R@HWU?w!%}5xxc>+OIQRJXdaeTk2su0ECd1e-@?VY62 zi$p;f01XmzU1|dW)>;)75oIMrFjhojOe)W5!RuKVrNJ~BgJ%|NrBg9i`U4V8Dg=I!PNg88K2jEZT;XQIs!Gyf{1lHvB16qnb1_hvqDk% zFfGSrIS|{KTfXa_yN1Kza3IcMHHNLNor@Q*+_uYy&`=rlZOD%;_u5 z#N>H?_4yZfH@5uIdyk$t@rF0uKR=HR4+xOw+3kxLZr<9oF1vo?IuW&(mv(x?#rdT? zZ8U9G7UhVmBu%Tp#wAQ5iI!v2Xf#*~b=>o4I!kxleJ2B~-FI?sZf@a77l*?`($$0h zxBuYhC)3g5-H&kh;rXdtJbPiIxAX9$C#Zr%^vPE)T)wvX*WdlsubjI4m8V}ga`eR2 z&CSihpd9YqzIt&t?=BT1vtSYBIg zHkMM@SK3{m{ysLyWLhd%ZLc8hWFmSJkKv}T=>J^wAMZwlZZLv zij{4<-Od_Q5pkVnRaKAApBxT{qwo0bKcD`}FLQ5gZLRBEwwxXCi-@=twwbx(GGF;g zlczFGdd@jFZ1h41caKltC&Dj3{59|Whu?Adn}7Mn+sf}a_Z|P~`Tx{+n^jsp`^<0u z?(exrzVR2^*+bvI_1FIBkN)U%`0`xWw1&(5>;28<=U(}mXHPtQf1X2U6ZW3DeS3Rn zFqsxXg>vXmyK^fK%ryaEdb)V!b59ILXV0F0cI*C=-}rsscaOY%V`JmuRPdi0D|m&9;}c6j=?~)!%vMx4-sRfAAZB@N%}AcA?WlM3_2Ig+u@mN&+8;27nMr z4giR2r>W`Sc&D=Tn&2Y}+l>O6dRan-!~qa^FBwu)D8$M)K&bn}XDT;UrTQ$5b(JwR zoB_ZF4bD1|0H8{GMM+4|^<`Aj(tJ;|;N}&#rh=)N$$N`W6$WRnu#)Kp1oQNox z*p!Bd0uY#E5jkTWnn{RcL{(922qB(GUP>Y&vdr~?3L=VHSG^;t2!lrp49tNIm=Hq5 zFaif6ViiMmp@uPrl@XK-37~+I0nrd0w$Ymk2$6^e0AT?DBvVgC2!Me_6f7Bvyb79_ zCr^l|B4q1RT~H$+pm9n>@Ztf0Lmg8VV=BM^ut!AC4rek>V=!ol%D^C=h+;}g#XhPv z1r-Tn05|}!Fd}*eRWPe z#2{|g4?N42in3fALNKOw`iKaUs&MU`;#$AVu~kPvkW{qxJQRRuyblXe=J$usNW zxYl`S)K4fdYsq4*jkF~)06Ai z9wus!d;Qma;SjuzhykkSUBnqaGb4fkkaa%~4MOlDLCK_z=3KKmH#axmf&T8Ox3MwU z+LA-Z8ja@qk;ARIZf>COxl&FOYm7mJQi57?s)pGk6@e#}pH!Yi&1;Vb0YC`_0g%%y zk82YMBo5fw?a}bzmAUSG`}|j)zj@)+WZXS__Uy&yFWkI&bN#W$9(w4ZrF-u>`Hr`4 zjAd?aZh0=EvC-hK=Pq5?xUqBW*s+F9mzS5Djbv%DTVZ*1<4UhAy!T6~t}J(zi>eql z@_uWyJtB2OV?$nci@+W`txjP?iw7ThJ zc;)JqVy?YYgyFdh7hiq#=Jl=PM~~fesQv8b&0ep^%occBjLWLp+TNa;l}=+W0@1FW zFR!mG&#&Fy*bO?J7Uk!kd-B`9>;2%d+zSqg04SRuO^P4-;UE3*kAAlUzUx5ydYTl% zX`YP7D=6$&6-J)SO?BIleDJOX{VDIV+PY{mSretj2R94!{N|F zDk2j<6p?CM+nuEcs8FGH2h z>5-Gm879V;+^KxYT9orb9w*t;L|_w-#_`}3*Yxc zKXmsSK6L*-{GQIw6`Q@yXU}bZ>RD+X9v#{3o5|d?F3#;igKvsG0Pqq$T+PQS$sn6f|>aV&E zt4l37+=r?Hb`TiS7%do^oa604z|3+uXhxmTtlC}!7yv;vgycaL7?lXEsr6*h7nvB6 z*<7LuwoV|7@sA(~RPSdo0~s{7aL=$;<*g~(LkeQ3P?oRE?yOh!`*^HL8RNh{z#ok~J_VCLtwNHh?|^ng#3&gSA$HRh3Mlsi-;$xGVBHh;~#hSOk zF`mtcVth8dXAwyAERJY5u3b&~^P9J~7_y_s?l`pG002eFEO7u|u_z!J1E>MQv~mF8 zOmnWY+?w;*Zfenh!HDB1fCmXIp)R~04Th~o(+h++F(_5CK;fLb^ra`xojW&o z_k#-y3oFNuEgwJj=C6MD+DbOL(ay3Ahts*aE+{b9{PF3RPe1+S)9LCR2eZzxV_CP` zZKQcO$#$>bxO(R7^}RApQ`2_M?i>d~NB~QhNTpD4F_)yO+M5mo%3tj@mt+{X2%pE|X&vNAn-?8N)ui3-m@_4x-L ze#_nWy)yM9-ce~%)8&&td`{C}&ax(|^0GORPasPp}BQKpfgMPpQUwG<^r)~@$ zUhUlZjt73>|NO`&KJkg>I^Mg<{hPZY|M(JES}8EvM#?O6exn_rK1 zmg>!qw~zcg|GWNN!s%hPtupk)ldKv&PFRY*47s(#p=P^$rV$#p@Ode#w`LiMYJUXK8vW?jAuYw)0{pdy)<3`$gxz#U#-S{yYLb|L(qwZ z46Ao|hq27)+i3_Iz~Mq#8LVne-~sarcl$(UmwnASRyD4V{I>;|>Lx-N(aNtYINTQp zmUeeA5iO|BU?q4zm3^yh2*5TcPruVs0@16|o~OJPb72h$*<|6~B>E_QyP^F6{X6lw zP)UlchW=AeE9TAlcXIgVg6{J3{efl?-zVH=LwbfwbIyURiJk8OvONv2&+69Re}KBQ zp<|Wlx6G$(PWMygu804;+5dXxF$CAh#7aB2k z>|s^fJ)Ujm3nhScG-2X0Q!k7NDohW2pMZSN64&1tT>v6klr>~^u%T`ekH#swIETwC zB8wEuN1*T&^)Ref`)@S-6qbrK(Wr7+E;P90vS^uN$bmM^WQ<+Gz84>k=r_D>Pc-kT zhw6YKtl$VICRnn~st}nM2+Pk3tCGC1l-8n0i^?P+mp2fmXP<(5KWU$$*gb%a4>Wiw zNjG<*Fa_XWMJ?_H{zRiqnGiWkJ#HYIUeFh~2or#I58?i(4+c05(oDgvK5@rh}e z2>^amOUOxRX7FBl7GC7|?^Xd9qVfF>Ci^1pljH2Sd~@VC1A-Cjv~_QN98q9G)b#g) zW=VT69vf%WJ?vT0$FVM@H1$Q(Q;=eWx-xG%?&{?;fP9{sAq)AHF>%J1re-;Q5tnr;WY{JAR*_AFr?VS#JtE`2(Tp)F5Snom= zsmdV294DqrS1tpQdHC`7{KzpiLkvG}P_=HoV^pPyOycT_i0TH1x0a%z2lovj&pw3(jcR0pjan@Q-9h>c%c zYdnST>9fF+eLO=BSGo>&|CllSj_Yc8UYwKp4$jI1-0BLdK6_n@@6~_$hxI&iH%Iehv1RlOGQmIgojz`EteTq1HFTZWsz>5WzP{ON z?>%qCtRy;`f@ zYJ;?pYx`C5HD>$r?n2BFToB!za(){A+qk-Gnxa=g1eRPf;;b&Uye`wK(=MRxPKpT%QCnzn(Oe$Q z6Bis|VucH92O$tBE%@0`{(R<=?>yt#uLxxEQuDH?(4J|XjS$?NzLxphr?G|OAAz6h zc_vc-G0MHZ`93BmH!mU5bu;+y!K&-uoTs=z+!B3{^ioapfZ2?Kp=fp1^MTKY|8~Z$ z!XE@fK>&shkk!Lw_|4)}s%LDiyKMgH^yY2UW4Gsj_3h37MkWg>Io^%+-}F#aljZq% z{rBJPNUE4v-DYU`jpyH~nXf)SPO)~bNdkADGdxUIN>yqV2FJ;@cXvPKkKi=eQxWoJ zJ38xuq|l09yz?Wa21inXu?Vao}N-+a~v>qihvVfeVYHI_+HkV)PM$?DJ|gxIa5iW_eXsQvlYHBO9}ZG zYBI;SehvZ+=BL7XV4HLR)7*(#gfp>FH1k1@fG#(=fyfEeISZeyIk72k4rxZaBAdyX zpXz80dHXvf#~KSBxz_Kte}+UxFfl+{ZDEAy3Osgk(beGseb@|7R8i5Squj325}li^ zMy>)eBEj#LeI>Qa!=>JbCsWL{2JJBqX<8pBK#YYBQ^qn?OvOrMua<#~jQMaY`0(=1ukgjfuaNezk3sN%2dYcfl+d+Sf z3=au4n#Fqh4_lT5~Yu1MiCy}{TAc1fLn>YGi=|m2>O?fnVS0=8Ijf1 z^Hwx83UIzuC5I9RH1lkH`Ydopa{K2Zp{GO5q@jFY>9o|^0bR>Nly7E6lyu(5miuG# zGH5$CsWPB$>uk;4;kfTiKO=LUdttWq`6@2H*TZIiz13|dTP$_cJASj^t*5)j&!35J!mmfu z&!1`h2|52cHzCFs%$c7eito&nslV!?clayP^S|}_8JvfcO3b#Rd8W-jxODs96l3qN zW6wWro>F&hWIQ}P^nnh62a~xA+J9#-i56`EcGas#-G7eOzkRcn%n{9>`oMfx5Mk=p ztn>`Vo>kKtvn+8)tENk-5s4&}%hgaZYD06IZRlZEE*(3CK0f{5gM{*}Tv|-KluzNeu6vElqVFF<=~_FQM74}GyvsI*Y;M<>!Et< z5hGxHDrx&flfQjRT+@Oa36GN&8EKi9xD1nENT?^?UuG50N%r)-NEsO{p+8)DSbuPJ zcK7sj+bQ8nWObnYF`YY!CSz80scGe}m2dsH3TB@M$umBcJR~=2F|9R-hcA9;4*C%A zdc|E-s_UpbyDHPcBw5KM)5bKGe=}VGrZ<8))J@vgT+}5_I=+WRmkt`Ww`Yqo8n-8% z5@AV4YvWD6QlceTe|BjqU6|Kt-k*Ze=&!dM%VyWiE>Dj9~!hB2R_3rIn?B%e@0}T0r%RTVMMzzA2QN>xE#A6V=(j0FFoel_mmc_OuLtq3!c z8JqeV%XTUt(#M`yDqM_^Z{Ip$7K%vG zrEm3NHD%Bth>^f+N@R|9&0T9I&;4XnssQm`Mxzxtc;q9Hut<$sEha_9L_zHt+vJ2$ z3SFK;Cx?W^z8(`KGHSFZ%s^*WFZsVFYDG{ZQ*4pH6a4U{MunlF4m(g+F?Q5QJenD3 z*tG15DrX)6KYLbPepJ(sLIIdS$uQ?r;?ze00lauRJ4dV{^${cps5y!Yk*7=Yis5$iCn<*9lAp{v)=la=uj=mu4_y)Hx-*Y@dA@`q|o8F8t9%0MdR zlZWT1LATN*4(uk{?pX*UlVF*AgtOyYx;T^mYF%6cMM@e;RPGSehU?lLAD+f}10v)R z)vRe{k6^Z*9WZjH2NwexqB5A9w;BJ?O58|>@WRPC^eT18*t~a;O@_I{1gAA~3_q#p zgciTIrv6$~*}XWEg_TfhZld6D)e%}gEU#g;ofx%nKsA8UB~E!XGW@PA|0tCHRizq$jSnmb=w_67xufAI9{ ztg`Z8(RLS`a)^Q-1o6JDt9kRd68_j2ZBVeaxv_qNkoF|E1kf;27@*3_mil=ZsVq4%ezK9DSI<(bGR)6Xg|-s#@XC(dBCIlz3%g$lZe_eLbl zUbyf@v^1y`Cs7$?1C_5th>#HVYuo>BWZy=}AAcLK`(!WmhH8V9e)NQWOim zQ&MIWO1tE2-In#L`3lZP@oI=Rr4S4Xwqm8Q8Gl8JOvd*LgFW5&^Y|v*4hoQ56&cY$ zC@hu?^l0 zO`)^6xhRPfaM69wII)CSjkhITi=Qn0yZBb9d8&y=5H(_l z>S3XYb}m8>84uP_gRx9z!9b-5Xd#A?kR|LxHwpfhcZ-Wam?5x=vVyp1*@y`_^Wg7Y z_|XwVBYA7+r}z~n6Tlmmr9^?D^f%(l2(KQ zdr~){2)l97QVHeqQ%@Qa#5h&g*u(uHFak*nM?AIBWXBQDiLLtLKt3mA<|e>i1i^#6 z9IBzT({m$x89F{>%F@pQV_{ZKV8MYjFAC!$1@M^B+A$_Tq=`Ea8|b>rX=<3jU`vIC z=T<|0!*hZ@2*AdP;2?&z%ut?9IE2bbT#yl}K%i(Y9m=_ehy;$Z7d5#)JCZaQ>y-Co zGQGSA5ZOWhWa15C4;c~FbEON=^;U)xa+*t})2|_GnDQh?wupw(CUbp6$k{}H``Ugt z5{`5&p!g;}3@C~r99IDJrRxIT8bC&vQ&R*Y0m{6bLO6MeL|*9GZYbwAhU}kEg#R9U{xD3zaS?LA^3>AoL{T?q^A%qO^UGHV z#E2=?Y#*)o2xtwsv8iI`S;rr?>y5yZC)mHMFnv z`ah0ao40Ndm3G@y=hEf_@`T;6*OxrLL|!0R^7H3X-c&A`yZ99x{(N}2uujvgbjdIr z7M^(*>wyNC(P3&;v*N-85xhQm&OTqWuL4hHbW4!baIr~Z5UVbn3y4rE|{{Hv> zo-dsy>8;Jy>1>AtJ{GrpLEOGZ(d3DI>^IWXI>MRJYS7=0yIYoWF}j`SY3?uer?}q@ zU=?hyqHbK4Jj%2nBJQtgm{4jJmcNamb{(@C$rjXf;=nx%+q~jf7PeoYgFgL1?4jJPwQ#E6FsPc_=nKTwf7l zt;@Q%r(R)Qdq=F$$JF2R_w26l5VC;`XmW-Zx~yaMMUmq>;K2l`;W3La+ewKQBq0-R zzaN42Fk5cx9Q8+bP4NXL$gLlm?k;#>TL`e%dNV08*5Ih%2(Td7M}v-C}5( z9xRYH=lmFIbFPhbN)8by0??*(>Mg7|2Fxf1e)pv0qsq^;rL5iHz2P=X0K-ByT{380Ru z5lF;?Y)<~-f_Z}zrfvvnWu(S!M>)Asv+f=aOOh)ZOGPPDf=KAbs{JyG16J&ack-G7 zv6TCj3d^>>5TV!)of&SPmxT+XN(B0qzd?th2=OR$lnF*N5=wER1v!h58M-jV1WPv^ zFtx}CXn0f{jw`5Y@9M`7KxH%o0V3GnWDY{GdnQ@y@5V9}B|X61kwfZXk9H3{fNS&L?#S)Jf1oT^JVn6B(# z0ZMeKoW51s5*V=K$k5=g6wIF9${L{kx}p@O;WPk zgF)2U&Zt0OTJp>{|GXeRq|>&+qeKuK8Wf_>*Y#_Bt=UP;eaphC{`@(@JtGr-s73WWS;PsxG@$DB=L6vhUDMekWx+JfWE^AeE_HdCmz zSUqdLmtSI5RZ+PWczQ|>RHJ3{h2aYuy{@ed}>dgSU3(# zIg1ec_&JE@9i-ez>69(-JJ(uR9%0L~KNF(M7W6shfRQWW0kX!WXWrNNkXrvPChMwh z&ZJVF^O~A-ZlS2y`ZHDdcX;Zy6Vce(x^(|c=(G+iLQercFOOV{GD^1>!_rUZpj7aj zHD&oWXU$D*gpboF!%K3jqX)QkbhGGdE(3%XL7I4^bOa%H?^-bTc#`#v>-@Pt8EteT zp9r8K*~L?^u(_`DQwu}aeUIuKHhrMikL>@^{<<~V}w%upy zEFzKwFxv$$lWxAtfZKgfnT(vMNPo(7(CD|MCZc(35i4Y@}`!5`>?(na^&0k}Pl+qJEDsJn@Jvu3%3{o=zD%Z9)x6-i;F*LeC6cUctQCu+R zAnMoShw06_Db&6QM^ z=K4IDs`@aA#i1Q5u6IeLDw$71u(3*E-ckZ1@JKM21}Ar}(9%=i0t1sjjQq3mwvHf| zCE#n&r%nTR_2*s(w42IqdheK2k1aS9qIRMaF0o=>nk?D1zG_3+hYzZ3uw}JRI$<)Q{{fHEW+}AGafO>Zx zM#(P!AFIGXhaek|>7T(=%*63acWz=5&ed!5DJq63I2+Xi#hn+H#^WOk3Mlkz$@9t9 z{PPj7O6Vm*i7`#NkS0stD4~@a8>(7~+d`M-Z}vBDhTqS&8j#3t@ImHBSnSkDW9Ma_ zmDyM|Q^+89w&zw?V{1dxO~nhweNxtH3@h){&QsFch@-zm?3Ij{r~X}|TlSX!cG%A6 zcVcGeBbYDq$TU1TFeK>BGoC`#KhL{<+_cVw9b=+%A2(+T@(di_m&3msBRLCu=P*OQ z^#u$c)T5_HrFKerNIsV}!MbNpW>pW~CiY^@+UY@$8P+q8Mw{oNPc|=p&d+BY?oX^d z*ZhN_%LYrbmni5!#`s0z1Q}Y*YU?`shqhbds0dq&vAGA$n?jjh=gsYKPPUqnn z-*q%~L=17ek!eg|E;_uZdNe<+W|kBYGf>z*nOV?e9rUgT7ojBTZIP{~ySdeAp}1!5 z90OjCqH!nVi#<7-`)PiV?-SHqh8YCJ=)WEdz395HJNq@B$}8fYflZJIXuqv7`p@;- z;nP5Jr(s%NLjPqR<3p;Nih+48&KNJ{DCGB5(Z0RciO4thNdz~q&X1>KNzFRdp5}b~ zXr=lBgLo}p`@DA*Rd|C@4UvZFQtru;w#89kL`^5eP5y0@1k(=WwCLKVD`j1cd>38H z3Uk^IJ$O!@P&*HYcW89h2Z^gMdxvj29>cr-)XV&>3;$8S)pAwQJ4OlsNdx3}`DFcW zZ(_6S&AYDNSN5!aB>sJVyi2zA4~OyBR`jeXEP@;lFS*|l0I|kLMiBN{)5D_TP#lAg zDwTbA;?*G{P@vQ_ySG{zpD457xta0$zFe#cEREcZF3>42wPog>-g3-%4;WaKEKvDp>Gd?WEI;fn%-FN_4gVk)@c(v!aWfPyIS6}lsG5tl)A!R=j2?upl@|w7}7Vl zoP}Nac!Y$60?BH05G6EhK}kWSORn`ZQ=Y7DFJyiU=7AHwO`YN&3uR~M@}+{7 zWy5!)1{?3+c#31r6*He(K2M&h=a~9YhQZNi&E8XRon{lNxkAeLj^EtG(%iXKw9bOg z#>;Tbh$hP0ws3%z3I9hQ6{V^9MwjSR)8K1Xpgaa_2m(5Z#y?6OwyYO}dc<}&gfKlN`)}!lOK5{o z5yb9NeG*eSlEeS3iAR44z24FM9=lK2#gGab@Hj1X8G0XmkBM0SJ6suOLQtHZwba6| zVj_cVbXN3qZ^3Mf0{+B@%4F4Sp4n#F(p-Y*5}|~C6XV- zg?N=;j`nGYvwfW`MvqWr8!fAQHeixbT}_wBC74Sz?eL-%QWg7VX%Xz6Y795h+cYUs zg<3u?d`NoP(13v5+1gE?Si^_8wF8<-woUn)!G3gl zS_`wzmTs-4w?vr^YyG(G{Ow()l8y%uZFUN|S=W&Dmt2r%&HvXmwPZ|(F4qnnwoJc* z96!Nv9I<}|qp+x-9O`uo{`bAt>OZI6KdUR@U+_I1N_#_U!p_b+@2z@!HbQLCKuOCwfHMXozX2g-&jQq|7!ruEzcZwN_zESkr>&2=0c)4=J^ z;KW7FAl)&r5K2e*9?lyJz@5n|`6?KttqNy`hl81HzZcP)Aoh=3b#am2e1w$e1a}cM zbGS%a2pmfRXh3z~9HZj|Wm@;Hs?*YUdsWbA+nGqKhf7q3EPvwmsw^7GOlxB-{&TOv z2>ye1QviS_Z`KbZPEY!Toq#Gg;p;>*8#rL`FE0bWXzUW{1e}i?54|wc4zp&Fq zh*qG4@cXZFHKJ*?ksxZYU1(g?5RX=&+Dn$SB1l3cs#Z4{Km5l(jY@|xX6EGo1U@(j z=gLniBdiblm^Bxiy*4i+lM=5VC!mv)xlw-MzC#?gmu{4aw!);`sO6jd6(w49V$7A~ zAd!*<8Jz$xl|AZKMVa;O&3E_V6O92&CS2{m<5fie-&BhcaC-)y{S~X*Cn&5sw9ntR4>YQbL{BJ?KptJS=h?vd! zQ`6s@joWK$1Ov5Nof2|l=<%8bRob;*1N$UdkYgaK^HYNuy>R!}fJ>ZgzPt6jv;PDG z;xy!MK5I(wBcF?864nCCtj^3kTvE5IaudX)t5m3&lB@Hx>lXArOa~;MuDm&H);{|n zz7}}Z6Hs5rw*Ra5@A^mdEBep(TJ8$hCmxr-Q|PLFjOxrkcXx|bLA^2@XoMFkEOJ)t z1#>ec0wfEK_&l7rIkx|8USgX6(=QiCzUGxN!njueLOWiJ%3k|1)#~Bb7dhFr&rn5Z zg>5Z~(q;F3#*=pgl82etF=b2U#-&X>?ZIOs2U`z=zgDF#B!YyMhFlwLa6S!XAp|rD z08LyirfYU48)=_+2Qcx|d}Cj~e%)O1eN|UzD1JsVGtHgszvW=cu9i{NmT{mX@k(@y zb~1*;=rg6&CcwGXG^uUBpwyLoWa&Mj!ChndXXyZyV$N|l;8YjE9--o$>#Sk5pXb|J z%3}4d9pIp~=Yspx{~!GXcff^5cIi1rTBd(9E#ZdI29;m|%xx(HC3(A@7Bp_9wTC>n zdKw!WTiaJYipITs`7(=Q3qe$|;9R*s^h`?+>XsfS4HT%#*Q!(;o!qdGV{3mzVdP=z zR8&}#DVr1pq#PMK?S=C{`PImqis{Ml<7iVxI8{NmW4P8j1<92YtW$=!X^nBComwVY zI*7PNazzd1Og15{SB?5_>!pUSQ`%U^ICDQvPfgW|%ukyh7MbWFjxmKI@d6s1Xk!J( zb5B6Nnt?93iee?7IM>;@&wukTg#sf~+Jer0Xovs)oEl01IOwd{I5+Hhl^itdRR2LP z{QCN5@2@Al*Q-j~k$@NKM|akx1b|s+B#06;0v7>8?@cEZ43eAh?Obc|;L04!MeYV$ zUk)-K8SwC17g-V)QgCemcx`?W#UNuQ@|_MMY%Fl#oIRw7jfK2`iGUE{#Abl8iu+Uc(wK z5VjQ907fHjW8(-Qr%K?YGo}RPqQ|#ub5-AElL5mOy`IzV&_^Qb|M#u?Zq4Z}`E+t7(fk&hCwN za~;9arFhXu$|ciksu$P}jMPa5$dZv{U4#HN6?AEoXJ2N3H8}!~mzGOnf}pa7HL1d| zoo%npuVihZJa0aFLE={Np1AL;*fA+aiuo8sLshZ6)k)t72Sq}2JPRZB_hNhYmYV*z%kxsRWOiFNQ zQn8(zw~P-|2bZFj?awk5w)_qax}Va5L8P19jkH9TD?=f%uaQ+*K$Q8*zo8N&$WO)O z{G|{Yt>{|JaBqhIlBQVyHI$ef0H3Jl3l-1lAz|UnDA3l5`>>ngSsW z74|t+mL}_)h3+2ao*m~~(+{yn|S;NFo7QY^ohc>g%Xa`a?Yap zgC&nA)%9^>{9;#EGG97qH2iUdE=hkM)!f;3f2y!?#Ft)~%p1|DTo?=r8QOiVdzKR1 zO;2||^2qYkE!Ywa32-P|=B;q)Z&cD;&ulZFr#|iWOyWBzH2iQE+}&0?&n{bp*%BX0 zv*x)cB#(Q^5-+%Ba7;a&N#c3A%qOZ3yE4|Q7_@W<@DG086Z&^`&#JQ_4P#q9xp1~l zw#o7rsvj3X1;3pB>~H764<%0O83W%=R;Z1PSy#8Wd%HC>`)s`J)?5*})%&U3miWz? zn+PT)wfCl%wM8vs(x}IdVyftpV{fN|KF?4FpN}Mc%EFagYBzz3!CUbB$wVA56NkS; zA&1|LyNz?AYC>b{P4s<={})<}Rpm)223m+OS&L`LP3g885)OUE;;Ca|Vi$6jA`s1h zlldvz;(oCCKaXaKqdJ6-j}H-)dYPJ((cH?y!U8}9*h|42iBch@m@1v&J?5cflPiJ? z5<$?x$wCPeQwig$U6Muz zUHc1LUQ|>aTXJ@l;i>0~=7x_s1EBm>K9ly_irkmu^b5tLD8t zhll(#Ed@k{jJl1k1=k5?r$?zWu~jkqRo4sfTc1QzBw!?ff}kuMmZdOvW}t}mXc3yL z8k}xJD(NQFZ-V-aqD?|$TQ~4A7Vf|Da?niPWG#Hk0Kgao-$vSr0d>toAtZo=rp6IC zim?Vw3s7V|g#&d-mI?gF(+2gFv2!SgJ44Vj*G z6DV3I;hhwDCpvguXQLnMQ4#!Z;|NGXYp8;q9}%4$RGz@i?ZTaq7=gTXpcm`nT-1)r z(92BCU`Z4(C&mbs@t8}TnFKYL*MC9fp8}Yem>X60f#9{AT$@?E=OecC&ZAHQRp-)H zal*JoA(}xyLUWmhlQ%UzHa&bH(Mccxx|=|Pz^n~ z4>3$2JTNtT0Cr6*!g{Q=LE84vs&CNb{^ZPS$*K2b) zsXTmqvOkVkp7%by_VM1`am~Zau{yG(b9Xec%QG-p{@T%9?11E&-H*k+lrdQ;v=W~k z{}^(C>Bj$_b@rTxP-oY>s2>Mj{=Pe{Hz=0ll^P(0FcJ0tuF{GB8Ej@$Z()8U_eqA2 zl!JssBvEL^<=n$FtK}gpp!@!{h-}Ek}3?N|A4c!vM(O|R9DpLo&z%PN8l_rbSn+lozon*$fn2mq^mx%EpOvPPB-8svG2En#Rb`K0sNo8zy z_K6Hsj0Y=f=m8X{TKguSb$r@WSye(fIm_TaPki4hy12wkjHja(l_64+SjMxp3P;#B z*jelrN5u%d802Jutx(Hi0h&bl&PoVJolhj|BxgAnZ48)!ftq9EBWvQb#xI$ND&>`N z_Xts~h_bg>OPntOg;Vr^oe5WVwcST3?~vqVBCHPvkv6i^IA4?p^{1zaS&j2wPM;%4vqb71LV znrEu)?LRa@E2n2J$rMT;m6|CkMPP`SDw&zW&kjh{ta;P5re@fw7M*DGKApN#N|M2% zw?#${v+c`xJ1!Dn{Y+cF)=fCi_0#rt=bOKY0u^h{jM9NuN9j1FbD$R??HmgNMrR-@ zX?bf@7e@&?%EhGC{xcrGb)-UcqF@GIJ%~NRfSlBSLywBO$rV=nemE9qO6hCpKHK)k zKrCyV4-fXjj*e6s=5f^7Xud;7dPuSRr;0lhRf$|wa+~*Lhr_$X1*7;jvf6T zJ$}63lwr8*+(>yTHCU^XbU^Q=x;U~jKsHiZyLp|G|E0YxN$G{Kk8viDNri|n-=r=0 zF!X-B@p)VGj9ftQ-^PpLb{?6#;h7KXn{V7*`pGldkgCmVXKUZ5#biPcf7{UDBqn-I zY6-e*S!!+Q{0Nq@q5{p|g2~$p%pj=1!+~<5L%H8X&!zu89K3guEd0tRwt6)@Wro2T zY*ur|q_z141??49V+<_;cTpkN!%t>}6UpApN}IME7?IXk_QGj+>-dP-A3fU5NUGhI z?1^pGHp-`?W*fwgQ~1w?CDjI8xVnWmsTh6(fXqyK*b)~c@s*R5SLK>? z4xsOF#7o4=q?B?xL_KL<7}pN5BqE5x&N9v1M!QT;sx3PU(?-T2S0|;A#siBI*%`8->@4jq7%5w>FboF8tbt&@M`6zzw&Gp$&+w=LU*=`1QY%pHz zkhi@P4IXbw=J)OVkFd3J!9{}Jbm=qh#z~FNGvsK6UacvDVy^x-klx<;OZ0ULkoWZk z2j+B4SNy=Lq6c#wgko#Y$FX(l1-;wHZ=BQGcxmb^%#dbi9k%mf&IVz5FQw7Nv~FSS z{!+dVqJHIo&Mfz^)aWhvG~`N^xTqc03ZF=Hm3ruVJPq(`T{-3LN!yeRI3==*oEZ~Y z$in>2nxI;SW1jdStwig_`FoEA=sW+svb?~FuOBadHljQ{F#mf4bziuBD<=Cx zNiK{y%f&QAqYv9)2apLM8A2YcmXOEVLG*@K+c9dk$gsMB&yP~XP_zg=7gNc!4J8LE zg3Y>CGt?VDHB(@P_dQC88qexbBZ1>JFdH&gk|78XB7YA++DoVDJ(Trf0UEM?6*qac zEEXjq6%tvAgkA)876iM!GCvc~H2x?+4)K&va0~u^$HtHYaHgO9Ei#m(u8{?dPDDWv zPE?Kw-yp1p0OlmsVl-Pknp_`dpjL9tWXb^7UCc-TB)}3tAnfRkVC~rzyu#A+h=Dt}oG95rlS4CMjNyg$E^$Bybq6CxS~N2oy?%2QnzA z>)!LJ!@>V3m;s-L_=S|Eqa|n~2%PFl);Ljw>YubJRF(pP3C%TBcigVlE?1x zuQg#bjTx|{8f2}}Kthb13;7nNfmTMO7&2jIC!7-*xfD;H$#G5S!JBwq5(m~Ju1405 zVn$4hDecav9J2#n#eYblZ2f8Ln3952?TWa6I+|`{^Hy+;)2PaAEye(m3jx?o%8dl- zmVskz%ZMQ`O`$?+rw>6{wCB-z)XQ~@7#s1ZUNtUhBynm;>14}D^m)qE z>6E3NlatfrapT~Zw~Pf=CNBNMt(a@}>>O|sjJP;EgF4~%g@6=PZ3KVxfM1xHQ0OsD z?g&v)UNZ?61h)LQTH>E~8MsGxBaO**I{^SI9wV*3t6R4$bo&0@ulyLa+ZTxwpUPn5 zMEEnsu=GZGhfrm|Hfg0vYlYjx)ARD;Leh@ht-bo&$-#N&^2*AEbnquJSt{$rOWvs& ztdy#LdtEHW{rn(12ZztEP`lRHi`jj^>7>h4_f804UL{Xd?&(;#Blu3qa_S7aFkFCg8}GqE#7G!-b(A zQ0(7Lta;5GxcGRamk0fhbrbWruX$Baq+7W_6W0>ZaUW}W`S*|ZG~3$mC>cKjzEGFb zEwP_c_{Fzq%u>KPLroxt^zz&zTls{WK-l#;pR7SyKkwT{O?ajzA)gpWgP9txAv~uq z`YUO%Q5I%sUxKg|a$^oK$;#((bJ;iu+A_)YnRbzK5zVKJ5>TnY>~Dr?tBvxSasY7!Q6S!|nI1iMp>x6|9IG0Lt*VuFc&e1c21}NbXpicdV zM%_k(a&-G?RjcUb>s%?Jg7p$kf7Ix0=-k29AGTsax70o(L@QAQT$3|lR5)YMr}hHd zKu}m zL;%@8?3q4K*&?FFj#3*s2IwNVi{f->l>5_AXcVL> zU$rnrEEp(XgCk6J`cvlzIaoIuPy~y@HW}n!(e7FWK1 z0V01%lj+C!8;`oIgGPnGBnAo*pnfO$Vp8Efph%B#<&Lu^+JqdBo0}6B3wC$U2Iv6v zO3{`$kmB`UvUoad?1}phx<5;h@-dSjRoRN_oU)tuLe0X+b-Jj?moPnk1=kn01$VHr z$mO9*C`3({3avBj!J9ICtuSU%SSZ|!XE>KaP}d0kxiK(gq1#6g~LRm`IBCMF)=XLU@3H%0La%!*jVF|ATm|>6(1Z- z5}307GM$`_a+IyjKGpZe!3HUWMk_y8D@%!BZu<*2s(TGW9KU~+^<6CwI?tu?!cIQF zjAV#2ig~X_ppQ5a=%*t&v|=Qp#|i+5ARz&7ODGh4cZ%okwK^)UO zpg)1Cymv@oVnS0mYoZ%rGZ;dflcjH_5#X^+`(*vRj@}~wrT*cK`{U?tEFZFOYvkym zYT@Tq@urPyygrUgfcIM2XFE%&F3A-&FxDdByD33m(pUR}FJdSe?LqK19vSvDVGD^G z`ivJ2(N%cVAD|-B^yj`9vl(}GJ^$@^{~iKSK2DH^2gTTRr_AzK$TlVY{Qn0nLDIf! zvzK-{bE~T>OG~~iC;ee&+-9#&WIB!J{QSaFBO6G#vN^oEwfC0$-f-udJNM~pFF*NY z_ST1253W8Y4_$lp>{nj8aC>72el#*bCKtCnN7Z;d?(b~QB?q4U^5-s`dU4&9BBSNGb|+7seyKMe(;AHr ztR6UeU$fnAcVY3$l`Chrd-Ig09ov{o?yxCc-}Y}lGB-EZUA=kW!2iS7pT=66W#^gL z8utE%GoL%hjp@di5mA|0nOQ?-jVw~4IFpiEKnvY+TMY=7ZMX-(4aja8g8aiU48v$x zL#xq0G_;12o6RPhq$H9>aT04_6{{+1$c)U4A!EGraOQ8=d#~jm`#X_kx8VSoMP>zW zoO|wxZ?FBX_kG@ewWwU_LP+=a#zkFmVt+86&1?VhECsPEM9#Sd@w_OHbr#z4+Ek6P z7rMCF#X3)}UB2*{eyiPXCxf)zZhQ85p68u3gdh&evdomKstU77v6w9?Ns?qSD-aR= ztSX9tmKmcMO~a0JF6$;CgmLAQB;B8#5z%;mu6p|Q=MP_b<&|sKZz(`N%!<*d48UYf z&;(HkZJwu;REz55jLVkE&3WNt)-jj~N0w3l1PEF`4w4 zxt@(gWU+|V%#4T$Rdrpvl#MYtg)Fn|5+VhxTB4@v*L9r)sq5Nm?wm86F|)_2AZHwd z8c)GmD~Umtyr@k|loB(pFLLYbW1Wx7$)W`cCf6im3?^N;Dpgj+#9^AI9mqhzFhNjfNFTT^tY*~rO%~g|S-E>%apFm>DvRq7BB2s3~THgZGYPClAVXxPF>%aWZe(4Lp zbP;X>KsVAq0|0)YL#WkDp~3ecYS87(T$@vaS+i z0Hp?9KC>bqAojOvl2H7xyPyncmf@7xzSKOFaCt9<=84$EVxxGDBmm$ALl7c}ylzB{ zkkcN&j**N`-0 zmVtu?@r|Pb0stCB#Gs834Jsr=6q;Tui6=)0O?fFQh-gq@<`}tDQ8FYACBd`W`SS0LObaL;Pby2#DMib^!RQV#HcvmL&d!oBT~s0Pt})FCqX!L;*-aBTG~< z5K(l^B@Vs}PX}K%!VrgT&5De9zs|WGcgb-ixpd$HsBQ$KRnCH>nU8v_Tbt2%`%pzPC{#R3ufE z_~{Z=(|)k4 zgRPKGY*lTYTixBQCUXD)U;8XEXZ2z#wVzLp&W>j@3;}~vQS=@}Jf*g%3zMWaC7A|f zXjrt7i)WAsrDe`txV(S7`}py$Z0;N%o;-a1xU+hFvAegw`+Q^bVkUT)nVnC3T%SK< zW`i)BjBP6m1imcV?KbxNTQ_gs+*%zydHh@VpCtYDAOGoJ3;?skqo?nG;5NE941ek3 z&KG|6r<3CMPEJlBxTg>9z5C$F@RK_=OFC#Lf{WAfS!+;|W{4@k?)Z2r6&%fe_qTtW z%kj>QJ0C0-_a8jq?ztM|;`Zf>+n1}dln`>u2=#lVK9_qXS!)?HB0)2i^`Ps(R6Y!9@TZuOd~7B=`N({#>U3E zYGE_WSv~0F=lkm+gv+0O>Hhuuzwo7>m`oHTT_z&)$~ossX?gE+qu%?@>Nz5MdiQy! z(|LD)^72og$b)Fo#F$|1|bXbxksfs#D5=je0B;h1U6g(n!THw7)vQ%4H#|wa#3kU+@YXAsj zR3YTkq??#ZGm{f(U55Zkl4ZSlrmA5EN<=}tstTW-PWMw#LO45Wx7%CUTIpO_dquA6 zI!)0~X|0`k^gf6oG^T}%e%>BZS`5U&rwq6BuOjS z)6>(*ct%9YTH0>6EoMaICwIoe(=Ar^HiVdU=stP24G2jK*0x38LF{u}g z^NP*IM&zUZlqhLLGbt>)7vkPGE`Z~sH9|>A;@Q8vAI8)IVze?cMtC=fEKAw~pnxtB zVDX-+0NV6fP#km!1YB^&7y<><_&@P9V?^hip;#Vo&W#>c{5Cb7foUe#r2?_&b5fKzL7;P$wibS9hKSj-#mWHWS zYcR%WSOz*&nb|qV#u7#+V`C#_hUVQ6i8MD2N{9jhn^Ie3rWa>6H7gT91VM<)h&$s)uz5TfG#bf%|L@JqH(EG}#h!tuP@?Y7o0Tuizfm(C{%`=mRWD3Nh4xT0i*I(R1rs>08wld_(a z^{f;T5g#;|*s%lvbzO79zyLmg4_*Mss0P&##6VCV>ha`k|Llxgt+h0N=h2hXYC4_H zFWB_P8=oFtxt!+7+Mqw^wC7cAj6qH-H_ek&eFz)?paf{IuC`LhtQ;I3Z=E~;*-yX8 z09Y1oGCsP0z%$G~R=@P=+kZv6zxkWL`IQIzZZv+p_xw%Rsa#!DWxL-!8J}rUBqCF$ zxkA3E6O*CIuHLwP_44qYciwsQm9JDEeDLtqKJT3W^o@^i5Aea0SzRyA4TsEJEvmKQ zT0rmssilG-1PBTVKw@oG`Ppo-b$-nN)Uh`wF?pZnM+Y*SXiph^-dcP1{A_%*cj3bJ z)!SDzk*D9^ef;?GA3msiy@Nmdd3W>X&15Pfi_Eq|2o_y6KeLAVz0SOtwY#k_@$vN1Ha2i8xp3hE<%Wpb?aVpny^2W7;On<;Jh*>u_L`*!O0_AmG z?~eDFxgE3^Pa$tzz4Piu#{fWCSJU~dv(~E?RlBu9Np4fq>2wwcr>Dm!rTAW_mn4ZX z#v<)Mdpch z-TB1LmFaZ){QlYV=g&J=S65e8FK&0d_w!ln;9z#(9Z}`XorcH!ZUtDsaN%Ijl?NOi~#qXPx zYk{||I1iTZdoh!;fNX8hXx+v$fnkhrY;DW2lpptMRf~+b5 zA|j6fBVwPKLjw(I6gcPOch`K>R0A^yAEKHS_pkATFRD@9QHr}q(q)d;N55%Y#{fqs zl!)MR#E#dI5Y$?eWl#k?-yamGMQwS1_uzwf-)&u5dHbF3Vb;5N{mzvum)dQpYPWmv zq+G1E^VWD?6h-B#x?B`Vma(-Ao)i!@&(oYcMMywqskNqU&;SO=Z8nwSM6#ogWA!^5M8$7f&s`Cq!deXix8*IQ||dNc=Gu+V z{lZWG_CK6dXH~0}0RU5CI89UA>9iJQZH(y!27y!qh}XdWtuKA*+_f7-WSNaIWwn@2r>p`Xi|LuQ))zP&4y)iFK7802 ztd6F|(R6H#DJX;x{N!Y@Se(LOJRWyFb-Ug6Slq%r1|Z^dEAwu*JAmGkCr?m{iyP-J zu2&+`%X-Xwagy)txuYoqz{0deB$EkY$gvZA(7bnYa`MK{eD;%XzLC`c0Q)CLh)QIb z!2;zbA;e-bn@)@QyyC=iVu=z&>racb$A{C?nNgdFTWNKu*RJdOYP+qflhlgHZtXd1 z<&M4gwv(4-Su0tx#&K{A3mnZus7;bs(Mq*AMPlYUEmd`M07;VMl%;8E(t?Q6I!C~~ zCJv!+Cy2OUwAQ8Hq921y}_gk;+ri@94KCJIGV8{ z-ru7H5LYT1!P%mS=xQH(u;Vov0HTvWEUP*&J|jaQLqsG#Fp;1TApwCxY(2HjsemDf zh$AA9$Ua@B7GF45K@bUqMKlnqfA&rkdQcp00_XODnu9} zuU=4sIBIU~J_K+Q{iEdqFq?pqI3?T?0vuQLBA5=9>)A)KC|F6V1k7S83iQRMvwoR`4XAQ>M~Z_Cj>NQm~)5WI;u07BJ9YYd@jcmYrbQiNte z2Y@E~u7JYKBq$5VI`){6E|X2#T2&6FG-oP-(!V-X+(R}^y(n;ytmVuPU$02Y9ZNnE>=uC8@Y z!8r#(3=l9Vsb7@Ec$%#CB}AW)C;~Ep4+g1XqBL0_4)Q$D(srxW;?1pdue|!n^?~=k zpSQbd{_XF6|FfU{Y`4=nI6V>6(R7S^`{F}tle(;umH>p5(G~L#A3puy{fA|e-+uG6 zE2{tqo9i3H{$MlDHnQ{&-~aIK!*Ti<|An9a!e1WAg9i_GRxe}#HkGsS{?n(=LkPiw zt7~Jz>G6rlTVNA9KAPOW_eT$Q@7%fbxy!4+@f*MK_RL?s{ZhID1p(Dni~0EA_~h`U z)5>gYx&lxLNou$T0!<9JbJ*LP6Y*fsQw6dV0IW>`z$&=;;;S zXctA{4GlIo&yMFsQS6?b6-7~Jw%C8XJ1!@a$)I}Vy+504Q4|MN*6a1Y&|3_$OeSXh zcs5yqi&rnaI-JhuE*!3$ot^b>!`4>2I#@Y9cz$gof93Y7DJLI(_~F5Yz(cd22UBxP9| zn~=qlO8~HB040-znJ0_61uAQ+s@JSfEb2^84i0w%rfEv;DKif`(Cv0Ftx=NL{+Nl# zpVqAE%~{$aT}!RCtl%8C`@O2_Tn<#0B~=qY9o6}&h%ibZotNr;IOU^|g}R0iIJmib zXRQxV*c>>OCx5zs`egRJoOM<@FRfkFv+{Ir>StAF+YD`ocBLqa@+2RP&c2z$*4Eba z?*02;`{Nrw_181Z*raU00HA_zc+`N{5IB~W^9G&)01P0JI?iC??CN%z=#*0~)8Bm=h2$0slSxI4z3t9>URJrNwNZq4 zF&P1v;2Wz#e6R-JELlPjqBtu>wnqSsOKnAM1fS*U7!x233>!4|-$p-W)6jv?j7mui zf&wULU_vBTT!v9ed_%AhQ3i>PXsU|RT)UasqX$M*B0`AY2Z=(63{Z+ zHwATZ?Zuz~1gHw&Jcz_j+U9-_6cH1lcDQtOMnG9)yvCkhRbb*+c27h;Ugf>SqSh$Z z05qn>_?{DDa2}CW5rV!TXCPt>;9y+gs7SM~35bMB!ATQrj$bFrXR!{p+2X+qaU%Xi z07)dg$ZE$v9RP^Xxd;)7|5a3aSO^uoL<1~D>jnTIM~e-JS7eC1G6{lA-g|<^k4!{M zVr*oZ#ncr*^tfoV=+OWG7$6W4ih5r=6{li}6E-5DEDJU!%`%k`ysN#WY*oC3pxmr) z;#ZAJ97K$tD-IkDq49ALf)gO-hRz?~oQ9`D7@a+#F=RxL-SQ40A|gPjxK!E*188%H zKqTTM{s1xv0dnVd@9+PUfB64+=bd+sv#pn3e))~tpZVBlU%P%|^_yS){px6f)4E!R zP7eKH&xN|1rU}4eu|Pyct$l4Y4_>2Mz=!K-Z@ATNHaq6p_N)|U!QM$`y1c>RX>rH^vdfuH*TK4 z)N1tvD(4Q5j~BD~>guWzR<*07oKC0LE?-KSo}HepT)21#+i$(`iH+I*?%nskaW?&_ zzwnFg7EC9gMOllCi}~4nD*d(1bLUeFBEX4p&ShEVy(a=|VNp!`gHE@XdJl=g#DF3S z!piD~v6&BKleX0==aA++FJ_aI(b~!i=+nFJ-cK*~)=}vGY%-ZVdu|U$w6m?Reda=U zkWJ<_@w_OfEek1yl9Sa-JKSG6I~grbM$_qZ*PqVk^P^{r>2z9oTb5CsttlkfKWi0fC=`WADkVYj@^`q99Y9<3eL?Jbz4QG;0zIy zBrD4@d^_3h^!6U?;z4;6Z*E@Ztwl! zz0J*+zW0s$V>X9-ZCUv73haOIWcy3^CCh8NgE^=I5=afp3_&cEf&joa)}sP5AVTez z;{|-g9`qh4E?fwfx<4R-k7y-Ap}5ILve*FyMV)Pb>E>C`D za3Znu6Oj-^6^UbxQw0qmiaKp>$?!r37gAN^SRl?4(~0fC{Pz zQnQH{RWcB7=gqR57}NuRgg6;4trA4dl{+%jqC&HrQJ7gIdQMpp(E}pJ`z1@Hs4ko5 zQR2vjOSvB7q71+zArVsii5f&?4WLcm_q@g>Ey*iM3IBY3pP9v0W34ONKMqUSpY`& zD94pP5h=1rQ*IX%smqGlWb zAPI;F%osmiB8+H^DeKx8qu9{U;na{jiOWl1%;>~p#Vg841y{W8UWA`fJDT^!op}+ez9)? zXfXBypjty<=9p_)j_(W#6rIC_)>3K#5m=zAU_PBad-kk+@O(TT&-NFG50Bro^%s8P zXI^^cs%SOdJD5ynYnOMnZ`?dLNU{uy!f=*^5Jae|s;a68v@Q$V86ctnA%maST81EX zS1m>uI}_b-LY7rznaf>y))@Zf_=eVhfvD zm=-X|K)pcF8eO~F*-B`q*ZS2zc>7|yzWO=+)TcgmY1qv@6hR@$1{)h`t2Gop9FHGO z4~N6ywf@>-v6z88*xfUo!L5&<+yAY5tt5Ht^-pBT>R~ahy@ZX8jmHPioO3UI{Ee-h z&AbK?2u_GOOLST-tZ5sD>2%ub^>PFA3Xs7IB+R66>CTWstr9M-5j znZUxCQp-XLXSyO1s&STOcII2oPq2RL^*f)u^%6kS8C{lTyWN&FV`iPZ>TJ&Af&i?w zS(de1Z6Zo3spr+f-lN0)$0`iSi4r(*9V#&#I-BR6%+-ic`;ZuGF{!FbTBNERSXFbf z8nkYV2HqHxssKo8YzRSu?5doK_3K%as+SFu;2Sjko zCrNBb5fNmTCVn9TO2l46bY}v96Gg)a;60>`c@bXx(igqf%bjWKvb{;-kEd!CFtJaI4vy&ulT)0dubCT4KL&nOV-s6O+1U^-wz;c+DcSWLfQd+ghsGDH@sq%S09u0@W9UW88e-IVnm0Bs9EE~N#P^~o2)tmVA|gQ`A|Xx^ z%q##vtuUFEvfnG^_=sz zB2*RBH9URv`1JHNz1d~9bzLK3RZ8%bWf>wEWeU*hw@q$4C+j+?NcIe4AVDCTKwZ=) zvy;JKFzfq zox<7X#^$+=i+p(4BHkFRxS(mPn_H?E9@x}D`u)`;X~8^zzJlWp%8&}MGcfb4O*?y8zhCJiPb#p|{imJbvpFMkKrgdNJ5AN?4kbiDHzd`-Wy0!&4sjH*Y)5&~Dw&ip|Oc}J@ z+;#JW7*;N9{kfm}so9Ym3@i}LPR5}O;6SXDW!V+JxVkaC-hcM&+2Pr~ZDn;O%xJBS zk55le_9o+_p#fA;wjweS@|1LuITm0cq9yNr2(>koN*O{ZsDp&1cxG;SgrGHbYAQGDZtrlfj`{~peO*N0RRafY27b=KgCvC6^+(O2!P75pG{Cii4_pk2N40o@p6xd z+$ajop`tHt%uV?VAs{J85Fe91z%Py;uRb2$vdoY8+ELR+OQ;y9JphOYn~gaU0fmM| zzf3cM_nwjWpHEYP&{u9ThWnT{wA%Jr(em;>SguWS) zR2vtFN@x}s01Avms!k*X5jMu4aiyzj2S@}6qRt0VU^E&a77jHNk{|-sszioWBsfA2 zAw&bR)PaZuW6L1C_=zxJgRw`fgEz5pM-|1RssgJ5F)M%sPef7eGbvdMF?~H>RQvn; z&p&*6dU|TO-Rt$v-`H4LSvfyQhr?uk{rvFq#kJ2jESyY7JFeHlzEB%$lVsUC26T@ts3nBtQ5ZwLme(m2ryL<8C z#r4k4#u|_WlHfoL8;A~sg#5pa-$8LOfAV8da?9;E8A@ptc zTz@mLR_b>5_s?J4UKCXoyz>x*>QK+iIoWJFo*=C_4=9e*j4NLPFUopR)`hQKYIBp+ z^Wxx}-~Q7}SKhhvlgZXj2QaB!QI)C7R@%KR=%XKg@1^rw&mQi6_4mJi`K?d(&z*bo z){T3A^21;G&407DwsvuIbN|Vc_ntm6#;o=FW$pc9)=5lI7%!mbAWcEM0|ZMJM1gcV zpF3X-hP{BG45H9#6Xj3?lvViN_upMSJH4cx?OXQz&8z82Jvn>+!_n;B?>{lU_LVDF zzVyGiSQN#R-LqD!)!F1;uUDKTU;FCUzV)rqV!^`>8Y*hl5a4VyRp2&fsv3*>&UMUS z(7Ad&-Pu|18D8_7B=ft|vuT#qb*-Le<6=5HBqD0{w|2HVr_lT*-1X{ zeek{S{l*bpyjXnh=Hu<{?enLdc{_V|1o!Tau6%KR`IDDd&j0{U_LAb5E;BN7r=N;Q zlDAZKX!Y>$aOd`G|M5@#L?^XX1;EhhbP5qo5?NH>y>#^A)k_=aH=dvE=LgEOYLVdT zU^VlhI62nY@y>deun>_N^AN(O2?2pjf{3L7L@cE6=+(3VK)C}bEC33LuML<58&yRQ zgsh<=)xvP8o>`f-pP=|+;_~Stq}|E6i~Y4aS&SAB(z-Y3?Syo9clUACLrhB!kn^io zlAWEk^HsLLzu!7sBo-e}dXF9)-Ltma?QV8Y@;vXhR;*3)y7GRO9OQ`Q-e7ldZjhw? zCr^*4Du%;hJMqSbYKj|!Zj#U1J?YGO^my;&PIq;6_3C!#@X537OJO+NI<1rROv~=r zc4g7k*=%YBphDErLM$bX%Y?>wu{^#25h#{KB2Ya05javuq48$`G!a%_GLBV2xdAR9 z0>(ohKtMfXuNA6_7zuns98yabWn7oXU~`0${uf6IQM^#)vjE z4Ix6%5P~s;h*8j{;8oRw34%(%i0Yd1EkukCpBUmpfDnQ)R)Qe0HgW?!-E)X36(Q878UV)fVgNP_0GY1VS$`oyws0t?L z4w1bN5qcfvbcvv_Xx5_u%iDQ$u>`M(NT!hy0FVU$z>8B=jULkoXcvixK@eG(TQ&3d96cR*400cBR->5+V$P}~ONQ4p)(fb+^!Ey)zMF@!u z0;o98L_iusFvzOvRV6fj4zC^&IUb@hrGm|8Mnur?(l8Ctyc}ne*r5|?8I3Z_(fy;b zxEKh5nz0xFmKDFvfasi$+Su}~5Lo&ingtL509XRQlX$=2T0uJ+z1NkRmR z#bRES{eB-ADpywCu{FpLeF~}?BDDn|_NxJKXp|3)KLQakmV!8n%#sKLkw8Qq<%ND0 zb3M&iEGVK#PE{G1h$yklWDFPsDlnhB)1$q;y}h&jGZC@u|u{rD_0uW}coD*YG z%Pr=l;erHT93SoOk4DeW&Z?qj=EBVc1`y`o{N3OD)!+Q(ix)4B{*#rT|M}0KTUw4n z2o@k}md=Gr!{O2C$d?;eFRc$dQ=<#WU-|T>?tbt%Ns{fg&4&*kX8mrBY8UhO-u~`; zhi9+8@yWB%k@MA}DwU9eNAG}aSaZ0{`x?(14DcOAb_T&B0 zX`wsaUjE9fw|h(jYor5H({qKMItM9zqo{yzfoI9PYuCBiH#^9vv*&nQrAB}&{?!F0~{_Xba<<(1k zZ9bo$)dvu)DedfJHW^PZUfj5_w*KhRqyFZ#-QC^bIZe8waoujWv#cFLXm_98xO3^s zz!iJXA6i&jTf1?62)h38Nj-N(!tLp3GT%?0JbCh{`{dH4@t5*~nX#B>65KfG+>xDZ zoJ+nqSv$GAKiYun*SD{IeANtQzj)6eV!v+^GpsVovh2#OuzsPZdD83kY^F()EY7MV zNxXzV{^LLX)aQP}SCB9m1RxkMX6jYF=9HeheEH(d%WG?E`;Q*)fB0%Z zvgB0eYEyQI3$d*mySFP#`|u7CU+pq@gnX!2vi^l zK>eezL2BZh0zyg_L$v(=$SNbI_Z0#FvZWB9!9{YD1^@s8jyJgF{jO2{P0EZRFxwEs za)T|8Sv+l!3CuxJK?y?;mVlrH1_FfTWF^reiU0;8000{JAo)7P8$Tj~VW*O?an1<~ z5Rw<~D@vPtYJ4;kqC}?&a{w>^B4T65he`;6n@(_zX$#SYR@H1PqI^OWB*-$qAgYm$ z+LZNDvw{Ici|A_?8PrR%w{lbdqHHjPph5tkbzFteN8(5PCvotei9prWHNJpb5F!9I z<_Q!L<{024MD+30Y{ZPE*+7*E2|#?b6CfgnABo^b&BGy4l8UQEBp#w<$&15yxd#SC z7lO6cxe5Rp4oYL0#1I0-h;I;De*NZbHa3L7(4M+QB?-mm_U78~e9EwXYben`_uA=_rLk+pZ)XS{?2z68@=?|k>|BuUoR*SkH)&SLQd0TLkCgcyIBFZ}xlB$D?dGzjJ5U-Y8!CGp}yy-h&56<;m8!|KB*=NqZJuJdb?a}Q1)gmTid{hX*Y=I#U8JDNOtd2;2-l~*xqFV@d}X6MS4 zD;XuX=kqWAnLJ67NjlH6340}vE81}Z$Z;cL~ z`SD4I$Ppb?SeD)|&Q#U21PvKkwZ3;{bApjy6!>UV#yG>kv8qUL~%wP zfRbP|TM7WMQuM!aRGLMw0wRhxT8w5u2GJ0e2Jpc!*R_w2^?19L5M1SwB#9gMxc!NF z8Ej~k9snZKlbZ5vVIp1X2yyg^=T>}b8)okW6yw~(1V}NOq7j1N128j)1n-4n#ydf_fI>k*m8cG$fj>ItA&$d= zn3{5WYrOZiao`{#3JO^Q-?(l?1c6jBh-YR%k`NH1LMPsPL#z^flxrFW%wX{(LKh7jty zW=fb6sT=}$_{fD5voFjTUaZ(Kx=KZ(4U}jf0tCmQkkI*nAOstJv zO5XdklY`^q|3(?~O(yx6;|# z+S*Q&-uha9^+LPdUcWk@PNzrT`h(HiHoNzr&f6eI=*3t?fUMn`mjz`>H2;Wbg0`BK zH0-t9#+6!C_2_Wl4F>b+;hgLJXHS@UHX1#7{P?x4omSS(I(T(s<)ZOx8*u*IM!(W8DA3$McWu@2aK@g+KlugM7L_svo$&lN)y6FHet!x~u zY(Sf{)s}Toot<^ox(;Ba-FCCtZ~XAPf9Ws%rR{V5NAJJ;aPQ#oY|dM~otLhx+&|Ia zkU-S$?d^Tx3tw2dxK$b0vQU=bFmCHK)^;BpA7{f&W~hT-TQfBP0Q~kZ|MEZod;j3x z)8}cP&a0%qy8iI#{;YcD-^|FN?eEywwY>M_ILR#Hq-E2x&=8~(*n3~=B6vRqBqHHx)U!drx8|lpDYg+Y^-$ND z*i=;wC7n)-5gja64BA2WsZOyh@SIJ+w#N48=;&}{XLFdFiCH;!ZkXi6=D6zeq}*5? z)m3o76lDhI06DI?mg^XT7(28EsVQrr z2DJfzAgqAs11uwms3E8>W0R=FAVEMB42cNF+Z#~aBB=l}>5}IS(lmJjKoDgn#ejs? zSnqwLhM)pML+?gMTL0*$_4PngEE@teMm^vmyi) z8x@<-0I!TOP0mG7MPop8*;1>BXxKnAK2zMzsSkkQ$O8g+3ZTSMMIi4bLhoLP3f9`N z1eV0_LRB3gB61@dU<|tgECZ!Wjs+qbMGPlq2dxgsq8& zTvatpBC(%165=p8F~)eQ@dX$(pepcEbTP*G2%#rdRWc~z5h+TSWSEJqwQ)*!uAEM% z)1!mqHdj|k-KqS~SA;Z%-j_Vo;`bZ z^!WYv-@oqN*T46jdi}~b-g(mA+3vT^^AFyg?my#|0a;30dD_Z}3nT-XU~XGE5Wx8x zSHJe%KRr9%zjS`Hm z&7PmtA|gImYsd8@gizT85i1Q^Xsj(wF)pp0056L ztqNpZU0=O)_1a*q&vSS82j4#)oweKT3N_EuwT-RWeE#U&4NewcG71$0LP#wg>^zG(kkrB$j9bF;Y3jLc`*GTDUA^Q<8FPZ|Bj2N55yD7P$KP z+j#5xg~fyRbUHm*9IE2Zm22DE+uOYQ{Q2{v)9T=OHp8=X=g#FfR&V{;pB}(w1po@( zfo4e=zyhd(Ltq0&K^1&_1O*TeYG6q!0+6NyHUeJ*?5S*$qF~pkEm^}1CL_UQ0Yz-1nNJPCjYi$6ps)@!%_PFUm?>z#zx{d>q zx&Vl!)G>r3BIX5p0Ws39g0JKG0077!s^Ha&Ly9wv1Rqq8nGq!TfJDR;U$7x7B2Dv4 zTn)MC5)c4@I>NjqHm4Y4LgS4gMG&u`*p#Q6c&`Oj1@-ZOdBIn0s3)ip`#)4m9{`Yo z1Au@O7ZxI77*!?ShC}19LGL|kWP2!rFi>nT0RR$K)jHJHS^h;aQejL=q`SKtq`Rdx+ymyPuV|7E;VO0^UrntA=g8 z{|3`40va$W%MKSz9%K12UjGh6^%A=!u>+MjR!g??g`IrwW`6^c6a|JV`2xkwhZH(N z@q+IA85SuIFwNTl{vs5ePlB8tHZ3UGMFgD!xLSwX=ABu(4w(MB1X9x2wy$O1?#Xs8 zWVG@-MDsIk4cIB2{>K8}4|>P}?=kDzLXlkoHrAs2JL4WQLt|7_R3k`1gR6{XVcGu#1&>wycRHx0S5o#@s6P#K^RLLl`_Yc6R`(X*HHod6T_WN zC8D9%C?=-@+F92vBqY4l^=vPkztBJLxpZr`;)jfJ;HF7d;}a4S6BAu^Yj#{%Fo<_u zC!~9oU!Y2U9wCbK>2Ny!f4j8muIwCc#r0FUiA62oQmnNx)aw|W{m}7#bo=(^>q&Fx z_yOt^6XI*_k$OSiUvZ-TKB|18BICW*eG{_uLJQzgt)?>$%U> zsPqu#Ki+N>KQ);RvQ4ou40LPpNj=_XO>bhX(M9DJSna<)qR$~lNPl%wPm{^xhp!w~ZdB_K&$6v{h z2H9s5PP9&e<+~6e);w!YTqCRlMoo=(xjE_!<7^gqq?_*7fNZt*4BP_t+lVtbV*+7a?Hk`gichgIez& z;kV&W*|)#{OODTJU)QJH=hP6d=kt5q-8HJsD_v00Z?!xMCyAc0lWt@YpoA>Q|Z|}Dfg*t=WWQmI7kif@`=Fw^u#x6dpL`}VXPb2^@ z9!cNB(wGWV+3Tcz6@13=l<1@%`c|Klffbz-jS!4F(9vSZUSiEk2I}SIk8e|%22qKK z(3$b&W_bPaw13~5gpChM=pFk)OjCS73<3@~X%M=Wz8&byf*=y4V#7b6zziDbkIE7G z(mf@b@M3sO%{-QwL||~=vGZE`huVFtfqf||F+&_BIvo_7a8qs-27(E?vU5g4%6lms z)XQIG+BVQg=6Wcj+9xc}q~#QG)O=B6RgiIqmq%)`B+rh;PjG5W4!|Jb+*di902~8d z*VbCvzPOj}20lCV^*c$?K~k*T;ZNKmuH87uRHQ7GH5w;^nVX{jtQVv$J=aSjOArN7 zWnrO+crk5ClDz5{BQH51tJ&qJMW4W7fb)`$`>!>zD7U{u(He2hydQZl8iev>;{z$2!*%B3byjUcyTY}fbyR&FP<1CUka~5f((_e(!GBXm!!{KVGcW++OcNhFJ z#HBFS=M3SKO=z4>V<3Bu$iq`xSxYVr*$P?+l9rXlfc|+CfXIQ;5YQPo9kpMP{44zm z9IWxhoI@aaJV&08SYEPB3aZ+9O6$n4dLKpv_oleP5s{G5x1tk1nQ{a4N)IA%ILnTo zs9A!>@b;O_pDZCHzQAN55`QIMovB&LWb2N~5qm<2fXkEtG*#+n<)}a^8NmYDarh>s zJ`7B;B_TVz8VbG}_q~At5!R=>xqW#u+dt=|*0~D5mESqX%6)HPaRG55Gq^f)-rqGn z{RQy@Vr*#WKh`@oQQ6FY^k?jElKNr0x{4s%=>7b|!zJIWarWe%ahDoaHJyR9L7V2n z`bE1GEUqri2RRco7N1eMvM1r=Qu$eQQ zJDY3ud?)hnPXPkieSQqda&1@0NptS&yPA!am81N{cZ--B1@A*`z3caenSP%KpVZcv zJpNfuq!z#(QZMrq4QP@~hbfI)yhB2u^bSxRrf>5H<(N$-a6?wI`wor0^V^eYE8%?g zxkYc&`x9Wfldmy*(TP*(5Kt$el#V|8^wjrs2%hwfHWn>_&`dVYmD-s6My*tWiyXhy z?ecbU4+zMiT`7F1r*-rDjC*c)n*m%izCDk}#%{sg!LHTmTsIf@ZlTDFn^J+*f5njv z!LDx>FQz+)XpzTkok+nAGVBFZ9_0)}9^N6&+=4%^&GHzvvpNl`YO8pF_8Cm-x5Z@@ z`RYfH*87)L7ywNJ$48F-LViN1`7Mv`&gWRY#3ReGdWE z(u~Rtark>Fm18WuAt>a}g0a$umtWZWJr|Qche=zD*m-yPQ6?$;&oA__$n4l~TU0De zVi(&ir%i_)HIbJ?ootfl$Cnp7pMAImh5W_u`VM4dJL`s&(o|0-Xe2^9jsG1SG~wlM zBCuZHTt792J~N5mhw8w_`Y-Sigpr^MG*WC#I0%&!O%}`k!qdKgyN`V31sY5e6?e*O zRKV%tW^(^KDhx9ysb0b-RC|<7cKB9GG#<(+J5UyY3{&mrm6L}X^Agg5zo5%8$d{<$*u84WS>^;y zee=ZyFToVSjFnOtTa39@TXGDHWGK0WpWA%xMPE^omTVM%UzB%afEvK6h3cJSpCFgryQJUhddN2kP~T*t$Q?&t<_f&PGvJD=Z}^Tn_}QL&atu$NZ1? zIiMmQ?xx2Abbd|azSuFhlXo>^fg&h~Em56Ssnli!Yq~%wFgZFu6p)i@SJv=bxLP>8 zaY;2*EiG2lR0LGFkd{{4xuf`01h(QMh*QS4T7oCDQ+@1f-N`AqRDkFI$+&qxyvc6cw=SX{)^QKoz(u zK0Bub!2d+q$+q{>+b0@W^~;Zz87vYU%2tnk-YX0TVp$vLFkxY(_XDJ{Ps8p{54V$5 z=IUt!f}c@JkNL|ECI<1yC#(PK5x?DC+vxuM8znk%Z7uYa=&8@aaIM4F%_|7VNt5Mt zo()mguHM`v#&a+niP`ZN`rUd3ZYSay14B`D)#HIi_ifATpoeYR(90p9;dQwCRzhrH z!OPqIamG8VP^^58wSitqd~R#L)$IQKdF1(SUZiq+!@QDKyyIf;X$Pj(mI{vvpn&vY}sIPb7*R$r@p*Dfn0Z{Cw-6TSxfwkRthsHgAJHOfsX)ox>pe>PzkqyPjwT?EAm;ANzi)tTc1kMXfmGEI1yQ=QV}9eNWKK zYgNuDa#ifc?#>ll$mZ#Kd<+)*!s`O~E=->uR942bw}0omb$31gwQugJ|LE|>9N7@# z+jIH2_9&LiC<`fe>c32!PmH+NxS|zZUkNqQYfM;iAxLvt(?*R{G+QGd_eNVpz&T|6T7fBva2QGoBW` z&^DV9&w9)bk1i_7BkZf6O(E^;!cAfjeM=Q?aZuYw51DAsfuKOg_~QLeuVCPpIJ%|xK`AfqB6;wyvt^+LN1?Rz`ik|*Xf9+A@hgl@e>d|Gu z#S&ip;C~we!^82}+m?Zs_p?|c0{6g!<|a_$?5^;2G%4un8;CNJrMo#*j$o^gq#SD) zhQ6Pxo$)Ih6$DP*Lq$M-S&U{=yOdC(Z#B02!zJx`4SV7JvT+q;rp5hRg(_r{r1V1u zps?nOLOIyC;>fA}cnM5&t{?QW(qOx*FBLc1x()lf5bd~}(JyfLZ~6k;fvR=16pKIM zZy@psx^OiFKAmDXV{?jLs{2ljdh>M{ebYaa%-;`vp>uO-39+ZYMpr zDV9rr_-oAM_9q2e9G5u@bVw5$C5o6!UJZSK;(Z{wLbjm#TYH-plA@a;3T)EPf}`Za zBm&@Ma2Yzx_JAK1f?lx|S5|ZMO}Sf17iRxABALJpm7TNkO`@-*tWDTvClzXV5o#pK zryP>j=&6?}{~6$`3`N6Z`F<#(Vh_aC_nY9u;exbd4RL~ZaV3ycy2`j>fC0fwGDId$ ztld|{?rI~AJ;uD1nzPBw<#0NbRn7z_DlbAe`_WKNgffT5&mbuksU*iqMn^$NXVFwC zc|O$5sR#Bv6&Jkz@qsiC1mlJjTdW2SNX@=TpQF1Al%=Y3j0{#lN$KQngbcKc`=!zs z!yv?FOp@^Ztr;l*#+$fBB%DC58q*`Q5Q#W3p;LIhMwN1T-Sd(BUJC1p0E;h4%H#t6%fj)1)kfsnuC zA#vGdN5(}Ce8LC>i;it=;}=X^^MaGt*4356>{zN;$F5wfmgLd~sw&7XtJExXI-DAI za(nz1oMojz(E{}duZ}eY!mgtPaB!}5U2RR|i>wq*`ug~IZ6$CymWvq(d{+~JefaQv zIBNbh^l(%7Z0ed`P*Bj_91aaC39pVEcf)n9Tb0>_d^=5wlYygVKOiWE5H(PtSghQTY zTPHNi-qUJ>%7eckT1~p$h1Gi@YDv5OqfQ5>tjM2?g&g1F*M7u+*0ezuMJ@SW?opf- z>O|5Qh9XkaMygv8Eq&kkO^x$wYqYg}HPjCeVomeZ{n@0xjVu+m2Fm93Dm&B zoQGkQY3nP?`je2V^V2mj$;+9)@EZs$K=5xPcE{BjzOkgjREc?u5nHASvO`W# z5N}cnLlUYu$tgf;%y|p-D$fo!yX}A_~o~k#K24Pig>Ux#48ajrI|dAvnDwM z&*=!UC@V^}I*>vM+ahE=d5X}Oox^a^q@*+8he*-$Zjuf#_Q<{><@5pCh03Kgb(0lU z_oFm1(1~;OrAt``xYlt0gA~C)eZz%#AT5)lfozCRbrcB1E@;PC%Ipk+C5!)oV1p%v zlnJ5A5)D_lVc8fMV0t@Ut7%aOY!uc?$3OeG45tNfSd$M!Dls+{1u>4J>__6TjZdc) zL}3)Daelqn2l`rZVK%z+b|=u&L*3MDYmww4xS|y{u@iD6MQi26i#LT!DUE{Id{GW2 z$ygmO0Gsk!jo?PH!e;+*O^S*tFZ~%l&`Jn!>!7^R81;nSe1+5k$+S=~C2(p#IK z2R5pQP3iP~@H-i=Zo(Qux=2A}z%aoF27}y4$SW>pl4b_l@S)KN*n3`7mZI(1lG{Jd z?e`DvUb~jwMlbWq{0o>KUG2)1-#d+2)&CL^zVZ)n7Znfb{tT+d%B7}Amni@6rsl)o zUsZ%&*;h|7vC4lsvCbXC3(KUEhZe;6`)3Xw&yP$KwPxba3m(%kZ(wSBm@`Ln1hiyk+Eb zx`SnP2JUqKvVNh<>(;1Q&ph$lLm=0EUh7k9=IylMJ2{Ql^Za-_Tc%%Q_WV=7`=4dF zTld3Ib$hdcqENw}^y&eT$fMKe8u9LDVCz7m&xwGEz~X;_Fyf1s_rJ-#ZJq}Tv28a) zjy9eaLZ7dnj%ZhdH>?g-4sGAP%TJ8TE>Kf=3nuZ^TO7=K)pqsUDfI66`+1?U2JfVa z(Ce%AY7K_T0^M60Qn|2=6B=6D=V>Ow%2KC~)i(SRUVl4?$V#%!Z~AXzXhZxAavyeG(RX+d8-ZEe@I zbanTJ_VYyE7mVAzk3yu$6oJDG%o5W@HIhNnIfO}ST>`V`=YUjK$#DsCyte1`|HhtR zl)n(u`hOGPt#NJ8)iXXw1`BlB8>Rsv*nCY&n1C{ycLteBkyoONWCX|KbCJjVlGTCb zqCgh<5#i>U)vUzSqhbRJSWH>p%wOqHmsSLzK+;Q|;F944dpckcKyv3|yI&8rS%-i) zm))s|t-s`iOQGRfNa|$Z5J%t|_baXk%58U+ed~U=&K-eRFoVOkG9yVPWg@M4InbaN z%R(`Vvahu0xZb=;u&LG~+4x?^%A8zEkRVQ~MNeXiNhuBQ8x^FPp5b`|J`Vj`u?`8U%de%;tBL(2xmslpY?<(H2WGEuxdzwyc=b|9sV8FL zp!b^1=<6aNEB-B0UZH{Q7a&rGaxi5ohz2}8XrGMhNpA%WgM2YMq9UqAtwXdKz{>iJ zsPF$$5xL?Y4MnlG@L4<>K4xm#Ny=Od;sN_oavZ6Q`OX6fp#d;-1 z>YGd1zDj9ONS}^VTRF2V@m$M=5ARt)xFk6bq})Y!MXv@S@_&l9@+XbH!_1?2|>b+;|`5>|%Vstj3Vl;hB`$2dflhA2Q_TG2Mg zmh!!*JRlzLyLUc{|9#Zi?JF&QE~^WJ81ca(g*42;VfS(&V|0(*L=j12!2l4r7Q3G= zYu_)AH;gR;f?S@!P{N#%6JFMJKv(CYkPus4@rYG->j&goV8}_(GoJa)kI>WYLgXcg z%suEYMAD82#b=NAXFAuj%hHO|iXl!R>~pQkC1+Ko*U-q<9HHo zJmF}9BkthV#Qah1<3-v1W5@^bARy@Z^ueB!0u%xNWU34|TV|HCA#;T0BS6-f63|gh`HdvgSvtec=H{Z_RWOHh3y_&YuFMj+P=UTbXqOBf z2&Sbs%@D+Dw))0562!IqG7Mi)RZSo(Xcoc{j!g4)X&+8|b$mhw0M4-s& zP_!AohDO7MlC8mw0~#`_C14B?6p@>!h@OaUmt>grtEGR8g{m^Q`7N)Oy>!c_6CD&n z8=fWkDpE#kV(gd)F(D54wy@c7`)i1OdRRaCf^G+WOzm+Z+kz95YhOgi6%zyd45W&CknL+cV zZW7MDpBoqlos^ty+LW}G8SxXI;P>%k&R+G??qK!hc~^_+Yr;E+Xx)78Y7{TsUplDr*ofRh-)9hDXj?ralybGXr-FSCh zkv|hPx*sY8dYT?*2AlCFjz-?rZ8qiHw{u78?f@II+6$rIOTVR%cg@8zZ{8WcTU%Xi zqLEjiq5!Hdm|4YdHX|~8I3iEiS2mg#?&v#0?!T67M>I~4jQ2Sid@4KfYyTzf#$onX zPij|JY#5q5$$r^^gL$eXl>UQlfPRV719V%rVQyL>PixlIWeoytj{K%Sb~7_oX>D!o zQg-@!PW$-RyzL{MTcBhlliakKd0BOz{N7jN{=eoxe5z0VBfqv_(1YX;MK=(!tGJh)vX zjgOenbrpHt<8Im3WBxdee~8|e{Ej-JF@i_qDZE)|ckjcw3Yl0mw+^j}F{Y)pitB2d z&%w_4Z+C+$<2^tkYc;=JEWBUcn>l|6)mFnjasF?5>-WApH+zA2@ZG5mZ|?X|6cdy= z9InG%g~0X01T8f&Koyi6s*lyw(o8zin*+;^e<>luK$5Z{{@y-BbEMdf|bYHY4JEN+vDZW+8<$Dj4LU;8XO`b#;isP zm*F3TQ-H@>d-!)B_Dn;=785+rF%&e5EdhnaTGQ^c-3&nW^yUu&xG|aBHC_}v84IS_Jt#tXQ(AFbx;h5G2AV(rasePaP_Ov27GaYHLD3>;9MHzGl5^Z*jYI4ofia4Q;uI+=OeKfnDN?dl63}p= z;YD$NRu~MVR`M@XiWG4nxGY5palOTGsyvH+27M@sqzV}+ItGE2=y8M#Ll9az zOgL0qnoucgH^tX(D+pZTg>^ssOpI!Tiil-XG&2{90kM>h4KAb?BF<|1MVd55E@1#@ z-GDUoGNSX)tq=r&*)?u{DfWZ%(*Q!cav!m0ctV->ma{`0L zNWiqTUXWvA!|B;Md4`bTYMOG493;otQ=KHyQ>UE8F8pcIQ0_bw23B(2`q*gV>?$IJ zfw4&KVYT5SR?k)KlX(xYZVSD?Dht`I4ZfQ#G^;+k4I)lWdmDy8m(tau&01<4iUkvo z(EpcEA4Yjt%uK!wR3&?B0XT2^V;jTTj%DQa{X#=?3*}qI2+w+I;kjmb9UsCGnkB)^ z^8!5u?9VpiwOWUc0@qG2m$y~N55J~i3qTS?e&pwvclvRA=;-D&id||!p%n2dFYn_1 zak&F5t(^(H(W^tKlbOHhL;>v-qJFL(rfqYCB(sgx%CejXpKX^K08pI+w*qc?oxxag z<>BFcCGGtHQ^>!vki*IRp0*B=ieN>O7l#nKqVZ2J%rlrRbyU5OBXy0%e5;RrBFMF_ zK>WK~Pp1Qi%irll{r241s_H}OBQ^~I_ZfpW4L%M`YO|x%{qat_$hBZMzo#FAqXH8X z6S9Yn;)`CK*?m8BqiO+<1F5&QwRK%f$W@i9D(3tjXQbgy|F7J~wKf0xUlKG(+GMvv z^ZS3FL(i#wd#)FZ-PUMoULjA`La+QD?q0(6eTv1$Mt_?ex~>1z5&CcPdANVJect%Mt6{#G&rTe{ zST;De%a6R)TdDk0F|Q8T!@8&ogO5U=Tc57RRg5ps&ZdWyg~S5g7S(KnK(oM!I02we zk&G_T_L&J%Tlf-C>PXA}@XE6Vq`6Gmpw~W_8!^ujRCsAYw)QHOsE?aNLh_TAw_9LxY$zf>7QHZL1pM= z?bG}_2QzbOGbgu+6C{$H>6@EKl+vJd{6wKdA?n8c@3oR`G%GL`++b1sM3DiGs_$L? zO9&hpq~-S>14=-s{1KO3DJ@mmbP>lFj^0quHG~ZSj{n>7C)DBctSyus=pJeEqI2~NT$nxHLk20 z*fcTSPdWa^$p`FgC%8NQ-MeUutJ&e6uC72)+Dg}gRlVZ7Y8Urt4YKnCUv;K*h4}0L zQv^p0lfO%}`K%szi9eD*|GQ1pf0O;&p0F6g!-)5&Ne`*5a{^Mx%z#uMb2$tg z33~OsQf@gghx3E`bn&4O(u4<$0#cYsO~heFen@)EPGyGwNyEk zl#(7@@`t>}45J_}iL|E*;n!xeSXopYNH`h>D%3t@rdTQ?mXscK%B-&15ofoggi41G z1y>40b*InTFEA-GqUeQAY75Yn`TTU0bQ6j)pOU!)YuC)hv? z7;n~Ek~9oo)HjVVG62SEwtW^xbc}h5m*6cXNLN+%lXah^B*@;r zt{C)ZUf@NQJqW@$mBMJ(;M;Ux7#gkY^YzS@k*l~NuQ(GmLXuS@d0_1;lL^AIVquV7g0og zK9){|qbkPmZ8EV|A2A&mfjbCY_xmn|LOL+;ohlq_YCFB39^f=(%ni3S4v=bFC0ZgJ z3n{wmgV%+8Kl(M?d+Eo#YiwA)`?XWp^`swwTswa1@xPz77-`t0;4me*~fiiPSu_*Ps~Zx!fV!?$+8==<8|S;<8g7xp-eJTQ>z@I zldH(3SG1zT4o9zlQI659uV<@wa+?`sTqSVQwbynZBq-2OG<}^! zeioxyzUH0oetygj_nVuWVdqYZ>>GE7t=&M3H5f7bGl@_%jbKM~L;$_dK@oPOETmab zreu{7w0@A&e(db@wv`fQuO_d`*XVt=Ah6zfzD0dJzpy-nV}6<;P!o{N#!>5BQ|-Tz zH!ywvvCOx#Rf1LxQJr!9)H~eskG3Z0nyBZ~^F=`Ae2)4ZR9kxgBJcg??OAKjzxmqW zW2Z%*>wt)YFG+F(Qun~{elfOqv-Zp4st*_4bdIti0Y`T04!bqZK@2d7hxe641prLPaOo$YwY`?q#gnQ1>%8Ib@ALm}WBu3C+L{w5Y8+e#NEM)%D(GcNcVUWN`7U&@)cS9UEgMlOtYc z+xU5`ik4O3w}=!C<|d%iR5Y&UtvK*Qo<<2Py=Px!Vp7lZ`>do)+AZSTY?N{rzIXNC zYI}3=(T${RRkpE{hU)?~HMOqe!eZcui7ts=DiEj4R}i)fmM%sZOxqpAQiN>-%H@I9 z#URx@&@etop-Y3dC5u(g3y3g!DLA!OfDF{F{5VM@p37Sh#UktnxDN`I)~#%A!=`ol z+W;ERVmx`T78H#t37@mSA`d1^G;yYmM*~(h@e*qAc#1Z8&Dc`Lj}_YoteBrmisQI> zUt8=_D7ut;7%+nw?S(kwsAOH!n(^V7+MqgV5X4#5f|v_Rrj_$BV=zc(kI#dm4N79O zm7UvPl+%r~(3Bn6e~~mug*yy(-^!Wh@8q#dVj+0hA`6Ktjw}!3tNCTKZI_yz&s9W~ z0gc1tiqqakfTbh&TYos?JC0Q*k2EdY6qAGxVe7pZw!yWKgRIlMl!6SPXqFRClQ{Mp zPuuq;GPg1=s6si$<33ZwjsNW#hElio3{=S~gUZ8;!_g&wCQM=bKvGbd=|IH|Uxv;s zUC5+QN#M-(MS^Z4MG^R*7IPrfp<=QHq7{EK$qEqaw6A4Dgt4zt2>6v1)zp+h_JaP8hU^E z-ie(Nd}T&pa1Y7RbK&wWo5K{fQTSdNB>aXkN}(uD<0li@z5H^tq!-_iKq-xV)FqWh zk9k}qHnpkief<*%SGjSB`LP%3r#t%;&njnP8CO=GL?q{l`hVPRdH%nT|H$dHT_a`7 z+Sys2DG9cQ%|TSUaqE=mX)l`b+I_w!Z5t5f6-Vzq2+FbX7bz)b*%qhUt;D}uld9BR zynHm?Lz;B^dv$uA^(m~2zp}K=dm0yDfji}B00%?xs{|poP$%{38%Q~^v8tUf;S(m zjpx{e15PS@3->qs+?ytr$ZUIjum53j6ui5R$K^sQ^HrwNV@PWLoXI@CqzT-Z1rl9_ zL6?>088H+1m=PyK?{jl=Yn)bIb8acbh;Rg-dAf<;oA!QcFe5bVSlbQg$vXx&>v%hP z1IW(3e=GWK>c+PckDs6KC7#2|wuGmY8qAz3*j-;Gp|3g93oI@!LNuHguY61uRLd)i znT;AeuIsA8nTo^yEoeWRD6SmyAN0wu2|tcK8XFt??*sM78t*|cnKz%^OvL>BL-}*N zT7pFZ(1U_zCWBhX`E4E;rS~S`zs~k+;rbokSW|6D)wIa$!8wNZQpc`q zIhXk2xiQt7|-&aKjrh4eBq>RF3d4^?2ZZ<$%Wic55{ME`-2X#AF$j&ner zt8%DZXI7Ay%BEs~{?l#~Hq`1r5DvsKeT5P%IfD9Ss6Hvuo>&iNfUIMNatAoEMNte| z8br=va+DJYzaS>};7h$CW=o>DWYA*4hRvaez8Db2+r1gsT4v&lSF~b-@(syN_4bA7 zhDF8Py=#(Go5At2~jbUcM<3w3*9II7MYbngNunzex29x6fpkg8xS#DHq7WCElP~ zd{vz4!l;$9!b<_>n%n7$d)bFm*(c}cV6EX`bsk>F#h6o$lB+5Fk`z^!@9dGld%%*e zm?atlCnH9SGva2Zi;N13+kEYb1%;NNhPyz0OL`fgep;z@{>kVl5yg@6tS0^KI z<~&=koRibK;p7egIYw1C^8q9qCD;8atl&qYs6122Y%HQG8!T9LVwcV;O3lbeP z1Ib8<1(S}qXi&njDeR42X;HJ`306UzbUsw%L=RKfyU}QAWe4wceXz5k^lE>XueV}V z;l8&iI2RS!u+Lh}b~*V*e#q(e$u~J{5EOR!a&n{<&b@o;#*$e3>xIsxatdoSR zuMsFeQyM%r@LJ$fH&iH}^nTHoFl2u?WNY>N2cU43wuuo7Upaj4xYRK2eS?x?Lsffr5t zpvR1doCbV1OZ#xDAaOT_<>A}I{_an4&nLo<6oCRc0qI_4gp3&y6f_Zh)>DWqsfBJ& z(PbLijYZR+MnaVr>;*>cZU+w`M<*+8tCTSc8XM1>OX+TFU2pquE7oJjXRAxztnL5K z7uk+uIP%)dystDBsZ+uYJ-ZjLe?H8Rc-o@vettv_4@A8Mz64=dQ z+*;lWP;mjZ1#_}ex}Y8%QCHLR4XtLs+Nk~(dT#zSll(aNj38gcRmmk1Pf6wg5Yf1_ z)8$zj;D%+a_kw##OvA<>7I=Wxld9Ol*U2;}-;ocui7|nbqUWmJc}7lAYk)Tkg~oKL z`+WCm8o&syck*AaSJDFqV-}F+i&aV!9Nf%r4Cezy0yUF+k+S)(WQvU7*doP7Uo5P~ zdrSS(tQC&HpDKMa;VoH?e2w5;9HL!0hrUt=;hGr!`d9EMBY$mO-NOWU0#coz%s1xg z#Kyk*CGm*1nyJLmYa~tALn88Tr?(4>*?-N@F`KDe-HiI|W}Nx+0VBjfV0D|%M8)N+ z@M<>ViTCPhk(`!x-cZDMt}#(LhT^qwv1UNFSm71I-P=T!kw+$9RA9S) zLxR0=_GGRieaWM~v5|-J3TQa7qAQ})@m|MSX?Ofrk9spH&ypC{b8sYJ zfBuWmf`Nf^{eHj*9}G)Pp&*VOTxpY{hc^8v#{LOwaoAQ^lHefu<6m!$22vrF=JYU2 z;%dxIb>}j%fJ$?9?=qjg*S1!RENN8UOkr%f;^QCxazp+mo^?jZP-mLbF6j@mUHw8F zIy52P&_w)o8c8^cRv%o64+b}r8Do$eFo+jKCB6b{sbJ%xOGnaO>00575%Z}0B?IAM zGozq}xt~YBPiU|B?m8gCLM3B`@rFs{#0907l8;P>7-YSLnMzDtM+ma|M2P+V1#>@f zIf=RcMauvQ;vm`bkNVKIwQlo>6#*s)h7N|J^Z9TLz|-Pjak4reN=PIz=0RFsvM>sq z>UxjlaHy4b0=B=b4XKQzMR7X@S;Ko%ucKmGxi^z)(0F|52pYhGHWZ!JqeLowoOcdG zwThJWvqW`N?!B=j)`h{~sF>IZMRlAyTFVu(#IsVe)6Z2bAkt2XxEp^fiwI{h3$1e+ z&E!xyo@T2+6~|K8aL$Y zPdh_H@=R`TtT=9dU#$snn2$EFS`eYiR1!ebijM3mgrjVN1VcRb20ssMrV|BUOrM_% zReb}2UZ62u&h375#b>Ue$GITk#B7%fxHX{!nWA=(;6&^o6V>`P~dTMmD+ax+T|r5R^3cQ zqw$B>w;D6NY9ebdGquWpy1o;@7K?oRz2?dES-rsM@uxnKx~mC2%qzvN>mev7^mOFh zj;dzeZ-tFb;_B)84=$4cC(cyE$rlAA@^abe<-2$9Qe-gP?;exI&$p~6L$7xVd>Sst zc2lrHy|oY5&$Z7tv?0gkSrks(Mkg(cLa$%b&_w@qI(@w4F|7PdMUg!0rcM-kLjH8c z@$}amiP{vzDmZXyT$w#P;%*yTm7xP!6w1h7`pr7B`S$>&NU2e8nM>lqT4Mj{s2ON| z3;*d2DAn65i>^IBm#5XO>+M`QfIYd!5C|#P@WQjEMESnu#Wz+s*5RAJL}|il@L*KZ z@y0DhR1~&W=Tr8B95SQ8%jFI^S7`%6+oRj7Ld|Qp?qMfCeOfA;ub6!TVjb7V#^B)RBe@{Hs*kz?wvI_4pk1bD- zu9rDDF0(6gve)9O&sCJerHbY#K;@2~U}=*kCrv-4=2fsEF7(m}k?%@rG~*!E;}=eB zVD9aYu9b+7eOwCBWRa*K)$wylZS{89CWU~&z)xP-9&LYIukIh!HQJfl$=g5NtEi0r z2i0D!^7ZvKZp!q&+6G|OwT<97S(MY;K+v^xolH)hQTXcU=zBk!iR^hcEf8_aMZNU~ z4s$pO_JYc0CcN$}g9rJ=M%p0gXqBveilUM%`u4B?P%SS0!V?EmUg(nQ%wvW%!$R0} zmZsI-^xFK#$q7v%q4Rj5U~>qWW);gxuxl(9A?l`KpinM&W9#n(%%Mbrn=|fknv*6wd>bpiMsz$bROPt z^lca(tM^?kdJm#R@1iczTdYn*Z$YB>8dh)7Yjmq6SfZ1#Bzm;yFM=S5o)CQV{Q-OS z>^ZYD^Lw8AzAkKi&-I{-EED+_5SG_+^e!&t>*B=4LA2aL!gM9utcI=Zo@VaWsah9( z>T~Ipqge0B+nxIxi}1^av+C=a@qr^Jg>Wygsjz*ki*1ur!M;E9?R_oxkvF}57nN)A zqF}J!>BAv`)U0M zMnIK%Jm2 zG6z-u^zw7n)GH4rGZO?teUZx}&D^cw3=VfkAgo5s3o2EGFe?Ib?96fc#6H1ojc$h3 znhj$PW?b)R-l4bW{)U8ci54X%=lCUjF8>^DpCtUw(T-v;+wiw3r%Qf1{kQro zCXh(jupWND_cW#L$|=syE^7FFG1+ya2{RMZN>8F?vALBrDo!JVO2C5ZjfOW|oBuDb zeacjBo^hMqPg3H`uV!Y#wAl>$aBko=pBtsVg4=jXY}d?&tK-bq zZ1Jk`JBCIOgSimY#e8_`EH z{^WGn!Nu_I|L6Xe9aBp55bgZeT0dZgN|P~=fVE;mxP^_I{X;vDZ5>+yz^gb*GlfQ-*2*08V!7 za)U@KDv_|VH&fV%)erKOiHbU&s9Ms#i%<0U%rKSU9#8QOi05G! zY7rgz^iwG1&lht-EK*!jMVpbqVtg^AzlJ*%qErn{UIxT#^6MCB+x6lJ&AXiWYY3kj z`c6Oo{5>$CT&ZdNmj;4^5yA;&A1;s5LA2b>6bKvGHQ*n8Q}Bv!mnDOkf7*23pI6>^QgM_G|d{_~f#W6ys}YuCmus z_bb!sm!~HIA=d+E4rax{hmjz*_4A3nx3_or?)BbGwbR07BRCc~Ut$60_3d~qi@)mn z?iZVG59WXFfz#0ANr}b^bR90bn0e(T+Ps<5*NQt+seSo2Is8Pc4@1Ib8GeXv0V|c| z{9Q~(0mm%=iZ0M-^zzeTOm61$k7rF`+marnsr#MsC)la-7qicfH^MSFo1WY+T^ua* zNw0z{6*DUfy3}Fvwux>KMKyQ76%JbD)=(adOibg$!xL#~8{gFtYf`6fHdOIr+ccY3SBnzC;TbNw7~q!ft_fouYk;PBYA)b!uiaR`y|^ z7OVUu3n-Ej@W>+}_E-Xi+B2PvT`K~kud8g5aJdq7-%XtQJ^o@N^qu_1iACz>!TL?) zXpSKH)Ar2i^NAYuFTltD5X(m1BdzagWCC^JTzhy8GEdsu`+`qa=5HCCn!0}niSwTJ zVhvPQD&p#=vg5p0O7KiPX5>YJGs>UeBr_pg#PjH20w_2G+qPVJ_ArO@Tf6RYWi}7P{$4Rr$ z$5r&A`BWw|7p%n13;-|XZXzEWzTgPMreXDl3z6{lr)wvCuytPHix|J-^l<0wfAY+( z5UKt)39dk_I_ic^lu2;RKHK>vIFUhD-&w=~p>+K4t@vv=yp@>o5l4hl9We0JPrp3# zYOR20C6UcklfI|{Qo5F>W;j^`&?gcq6OX4nIdZVW$f$FqpJ8aHVzARPRQV`u%Q+In z@}#-C)xm}-Jm~|9nb4?^rwkAJf)l0zBQ@VnqkZ<_BB_jbtk(M^&&x&xu3qX+$Y67qGtF8-6|Enie+Oa^h_(+Q-p3wmtqPS{}`vV6CcJ^Wjd$ zn3WpYR={j}O~*4;H`L=&LgeyCfB!gijBreLTRW#{6-sc@0~UHdl6f zpa*70`)W+ojivKG`s%)a{n~=%x?e?W{n_I|JloSJl@TG6o_*~Lw4-mFmxQNvA4U+I zA>FX+d7C6$#nbX4edL%;G7Xv*x`KUv6?zBO@sGmo#pn;m9JfwBPDLOc%*@@aib?UJ zbs9lM?&W10`Z6TD=MqEwxqba#N?l(X4Klzn*vH9dwl8RF=Q#AT>q*G(0sPy_y12MF z<3sQRxB>~bm2;$-8sQR?DX~xIUV;1DA~T(g3ewv$o~2QNx$&^@TAqYC!rs#kg|NHJ z#I>783l3(THjXW389Q4ixz%yidL@lcpTYhM*GoXEjzRG5h^5C21b%vK4!eK{P(OeF z2ck2FeyQXb|Kl~L#%@0^FPk~5M+w+P@_&D1X1XoJ{|u1n^gWnA%wzxqlnew+>q~U& zPZ(2&zt}y<$<7XHQ~TXDe6go{U=+OfUaqj6>eH;TDOd#6Yka(ZukNwHl^HMFxTIk? z@j?#e}m^Wgalp)zwMxR@QC)`Sa1-CB590!jhm}@OVAMTjZIY zHtN&7v;c&CX_nVH^)12E&@B}k(0vXwMqH$@v@sTY@vsv$Dgj`&FPjW*MI zbrq4KG@mh9D77d&S$=o-L)>7d+y*Ed}XXBDj%k3OX$gZ)SB(rmJ4$gjm zEodJ`x1N^q><~WG_}1|Hk-c_G-VGeH6qvhXrkqjZTDH%3Y=KPLr+kr@>IlLDxI~(% z-_+na_(t>VJfkR{+~p#D1bJh`7}NTC;)Sa3Z;hWB>P!quJQLC{CM_+T_Lmr5Q9YuK z3bQ3Xc56tm>q&p_eX1$5?;}%_gK(Zvc0%i$!cMU^HIX4z&n8gbs3TrSsz!C zLeFC@f+5miQv^bWzUMk<`tMoYH`xUNR&E~bh&P;(&F-2p->s(JWMISUgNO4d#M9|T z4Lcrj7!~|`TNUwVvfQxjwqgKDIqNIRh!qh50I{qw7%S)o*-6aNvG@*wx)N@#BF;DG z!ekv7woN{n_ln$bLx8*Fcb=rA#bPD5If7fLQr*(8X+(J_)fas$TsYiN1U?^hEI?=Q zPzYenf<$BxDivsS*C6wqKP6S>M?x#U?Ab&0Q^8PvR)xkC)Pr5c2tnPmJ7@;jB%OHT z;V5QB&_*he(i?DYf0X8elNpTDZ_OB~W|8n$>5SK|({n-u3HlS#yHEreGixZr)`kO? z9i@?>aTIhY7p?}BJ5pD$35$iYru5*H^Rl`kIbB(%-xEYibbBe+#UM!r$C5ecP5agQ z8H!Yp6^t!Lwyclgx@xmU1bpw}7oDet1xk?wW$EA20rS@`gp)n0&! ziJ=R28xrwm*LVG{8e_@>+8|*WiPk9gpP>TWpD6;ccUnf&Z;QMk`-Sm3xD3ll8dQ>4 z01q}h4mR8dofo>v>7tZ7?%2cpP$N{V!~C^d$jfRTqC)}juC`WQ-P?4=aeU9G@cQ(J zhW~5m{{F;o8~GCO-Ai!`PZE0slo7cv^#;;XTde;F6YFl&kqt8IQ8nFuT;G_wm!U z;G9|{trzm+xBW>+QI*B;ZlBuUPe(^b9~u}?aX+6KT@QHiY5Qy<#j$2rNDherhJ6sOQFgO363TF>ah~_H0{&29-H#0fH&z zI2iPEjJuJ9FTi-Do!i0S*ZA^h)g}|qf3H37cEjcNVq?tD}Z2hrrXZj;G9l_F-GEiz}*Go&WEdti$tUe_>W$UOraCMaM5V*f>x7tAIP)DCg z?r5q$K+pdOV2zG(bau02`xF78?c)Eo@+85qoQCtrt3fHt@hk?Je4_024&UsLaj(-j z8fz9hRGBy!a7@iUYAEj44ikIN%Ll_Qe}-TaGAO|U5`PLNY{C(^C$T&=IAj`vk`6;@87{!`YN@}BJz0x;^PwD& z_F)Vw8k!zAB7{_f!OY+v7#tiqJ}|Jwn8txoU`+si!0+uGq3fzTC}wu3lts!G5j%u= z4p**tPK&gIuyO(j81@h3clZb{>=7?9-}?`PQ3M~f-%M${51KQ`vQVUYgIAI%4=)5> zlx#&3O7rn04%IQAnzD+L+*Hq)Urg0ie`){NI6pMfuKTg7l|FWe1%ZJEoZ3{e4CmwN zx7m1ITlfq9icFrB zpV*7!@e4jIR@wLgb~fZLb$E5NwMa5z|LyUpYakkNeQF~DVorlIx5E0K zdpVn>b*xF#w$ErID)S$xrnfGmcr}P&R^LP(fJQgfgqrP`Gu2;O!xG-CuF|U$qUu`WB@W;FD#{9pG z-ukt<{`tJRADZ;@#@!`J9NPa@$?4^4k|#;_LX|^0q(aw_yBBw^-n^)ec>A!u>DS?* z4s(eA{CLocjEoFX9?SVPTa0zaVUk+ro{wOFjsWl@Fzhf{viNh8!rkq0PjB$Q^6(9O zOOu`TKtGYZ?vo{i5R>KR?Z#b5cGh^Q78N_#K?{~nxExZ|%ogkzJuCmP;_>^MPV_!?D>(71~5_N&`j zlQ~wY2+Dz03oBtzA6ey4yerK>r9HX#C9_)c>JVJJ&s%{yCMRbXmq(PnAUC!BaV5NQ9O|*H;71LNwj#Yh_kL?0UrnAy8x(A~0 zUw$_~ANiUtps#88a?E9Sxq2l~bk+KbS20Zd)r6h159@j^ovHre)El7DLQZtF*h~*$ z(_?R-Q|jCrdC3rdx=aU`M84zCtuZSdc@D=`xy(9m3Jfqq#>I=2R~7c9ap#JK(RiEu9E#t2(3e3-Z)1eS;c!%rEZ zXn0I&PY7%huF_(UP@T2F+2@%MWx@zJ*uB?G&nJ5}OA18!lEJeO<+N!A5MnkMA*~pf za1Wtf;2?tz?!-~*jg{9Q#Z|0vFVbP7VCs?tf_}Luay8IeugG8ce?H=3&pIb86 z#l+{O=krqo`UB3M9){<+TUu!&CJxQ&n)J$E6F?0fYX!8S+b|JC_#sVWXwArheQekV zUI>l>v`sDgK}8I%B14tzT4E`-jicR50;N7~BS8)IiZ$u(*HT@@I0rrW@n(l?G5x;| zI`PQHF7N{q8&jQ8m90}5uFk`4Xj^Vis`IGah;b6D*+a*c) z+i?YJYinH#m!^f!FRT{C(V+Kz#U5qeEYuSJl<+MwL8iimKH-yePQ_+V`=h_MWcH{| zGJ4aGx#hxD_6mVFGSQjeS5-*j>rlLIzCgc;LtWo5J3-M#-TkRtU2YZl!o;G@5CmXM zXwABw_34V1paW{Am&C;PGf(PKRvRkw8dVN?+_em?mx-^6cWd$<)VkrlpRxV=$(N5M zs!hj)h$pi%)=R872FiFd?XyXp$}Os7>xlCdZoLbS!7>tm#|Ge>Yh=uRcXTXwZ^*g@ zX)2jt#)1VMgo{#ozU+3Dw~SWtItuo-w)FOfDa)ck34f!UFj z#m8|RwqnYP^B>*Y`s!Ll7?_&AXBoiA{K#?^B*0mnJF0wd;LlH5$bqSIN{_A|=tx5B ztpr?-D+F#YR+j|sB*@?Y$ke-I<$iv{MYfnxstzs`+PTpF9S<%&X31ao*w^oQ}AC8y%I3+-8$WN3qR`s zfz<6yS3Fyo$nARxm8 z+F5rr*FWZSQ!3@T?ew(A6T}r)h3K#3j^*U!3~OXLEUKjk99sl>Oz~f-bgk~=7p{F@ zllK-`T#^d)&UVPleTzhOafFXGq)(71md%l}6}mAgyuR;93pl>*+OTh5?lJy7Ffecq zcTFN}@e#@6v@r?dE8xAuTo_m1_eZB%2K+a6`ucdkC5PBxqz1Z~)oz4&-^$f9rdV>B}~ajjw`URQk7ebabSI zZn-U2F{~d2QIcvYc1)R;PYHSET zu@mARz35f0U;!w8mL=)Gl&aV)ST}?dL!psZ5sn7ov_>IL&>65%J5u#(m=zWw7L;;) zQ8Q?WO~r|_C?Lj)*jBg1FE1-gM?pW(#2_~pX+I6wu|0OL{D8J4*@VP11v(*#NUuAU z>fKgqMPY0f$r4)ZX%T>P13SA5!9W@|z`|>%eJ({{^P4Tc%mUmFy`KI_ogCoF62T)3 z{gP1w5VCn8-@Xed&06HQjslRpsFh#?zkwgdh(RP^;ucSxEWD2mi4b9rVpH{!i2Y92 z%q5eizeq=+Z>Qh?=@SVfF#`9)y!sT+o??;|NfOuO_jIoW`WV&UOh@*N`EWB{)~Fao zgN=A-%SM4m6*5Y#%V>rv0b*)XMxc_MKx+AWF~Y@tBBopY19|4!%{a_^Gc7ochwCMXEl$BF>;j zxTTq~w}xSjD#Hd`PWJhl{#`oVjfa5KeK0H(1g=7f-}InWrG->6E5yl z2KhyRe93Xnc75>`LYZK$`|DYVU%=blzZ)M5$M^Y5m=0H8h-%6F$H%1OmMS76FLf$P zQ|}ZEhJ=pP(Ws-9MU9MK$`8I>zW(qGcb5OD2y=$<6M-9Si|m<`k3NmKwRHxmDgMMKY zU_BZIW;Am!XV>x7>*1#xHvk})lbJ5*P>nlEIb+-2CMg|yJ$RVc8xndmRtuu@tS5BL zCaNhtb(X#7bakA`)yfSR;Gsg5;<2CljYvcp(Z=6Anx*NJ;hHLpLgE_imh^TrA_d1; zZwMQ*7mwyKxpBh82{-R}hU$&}f=3ANgO3XbzBd|ff#tjWK>d?z{fyi%#K9p#>p6B} ze|`*!7I1f#1Ru!iDcqjNt=&wY1i1J5RWiQp%TGj0EHcUN>J~A9LU^U!L2x@K=-1^m zQOwC@8*ngzmVU$Bw(DQLWq%w=UU>VCJoG1+jS5;Xnlm{{**OHq=~?ZgyeB4lAFZ4G zA?BFOROZdJinohwlyFzoS54$Eh!WL0( zR;mhuyY@?%TrV-nN7|Nh+|RNdZ&PUPWE>`gOUBe}-iaDZ)TkQ0yfCOeP4ta5Fx=4Co_l!*!!ZXE$c})$ruRJaA=|071((^UNpe_ zUvK(x9v69I;pj7Q0sZ~#!lOoCAoQ=H(Y^XcQ|tzIuabo87(+>Uku^$61N6`r$-Gd# z=|v9qdAR!nTDzzSVK{5Hw$XCyD*Y4UoyEC~aR#X#eQGXUyvnR$mmngHD`l+r9!u}EH(G=ea=iGauJ>}# zh4b~~t9Oj+JI;fOaAhMJY&zYSS$bviy8ma#$ltxW;;yvs5d%- z_RV9RIE`F_(AFh(z8Vfnw;o4DE)?r~uOh`?7BRHO{(6=b$sVeYVr&}hc5>NBh`-^n zV^(auj5monDe&AVXi{TIGOm~(~0UoMMuel)##AN*-xq z=>9_@s+1Lvy7pP;s)=XcpAOI2{ardXTe5hRD~U_u_JndDWGV!)DN7NaqFFxD%`m(c zpNAdM5Q$Wa8!EY*U>O-|_gwiXoVVC~`S}E_er##mHVoNMj7dANQ&=YEd5fm9FvF1> zppsm&$-`e%AmO$7EZhhYhjK0Y)om5f#;%;pc=KWteC{EP0R39*h?Odbfrm`=c3u?f z)P;Np89J3ec2R6NM)gVdLf(J_4CuGW)vgvGhX07Jjn>CSj(E5yPp3iCv2>I3Ak=3G z$@md@9KZ-4n}|C+O2|9E%J79aDT3+i*Q+o6Ubag;uZ@~ZK77dVB_~fA{#?#KN>zOaP%z2NyxJqmoRr}&P{bf3v$D3_ z+4o?IL9F+bke}`-kk&yUT*C6Fz?J)7HM49dV?A(T{gnB8zPq+Z6T&Pz)a&q$f0X%` z-0S_>+AX1$`OB_R(BdT1jutK$?`#bT2nYx~znlblPb&g`N5PX<4gVI;O78#do(+n$ zn0>VZmmEVI8^T)X6_(bP;t5{P(}O z$1DC^9`tDD+dau%F(6=l=mrFGXK)s^^n~?wdHe2$REInEC@`m2K^{a&z}pn zB<@`wND3yam2YeeypV46+<4EkGzgOF<$C;H2M06AT;6dr%lL0h53mZ7C7cYLPfWCF zWp>Xg$Q1h>{`#!98ZO01W-NiR8_3QMt2atNM@b({!|K+KoQiZi_dYbb#m@W4X$AeK z({HBUpz1>))mS;R*I@A+F-P0cc%V(f_vs_bA?(LP4ej>I+qpt00@4<<RvN)Gqe{R+Ejkw{?!HL;_K1RE%m) zR3wzioOj{F-5jR=N^?g-_v7ng}uGGs}gUI_~!D#T-V#-QwBi2YZ zzWHElBpnB4%ir}r4P&j_`jogKz)VaHM~fEwM+!i z7=;MT6Ywk%Yy%>!2JgwS;3K)vD6J$U+>eq@-{xe$@QjU>!W__Vja8-4A}F(g4tT$x z-{162bWS4U#?4YsS7JDnq>&bR1_i!D*)S)>l#%fR=Q_U7xCbZfRHUEa#dPV&!YgKa z7bpIP+3&g30*Dj6bxYo`{1QGq?8oSrU4r8{mU!C=G{ z{POhCp(;fQnOv}VgaEIqp)3$VfuEc-fh2b_jhUugRY9Xr$=k#kkU;|K297&9JnslE zA^SWtEPCzt&WV_Z&z4L}966BvG^O8F^)QyFPJNE;o$D(AgF ztnjr%5j?z>{_4TsEaNf!+I__ynWq|!9CL%gqP>CK79r7(03cd~4JhFfDe)$4 zvQoWu2a7qqPl$YzG+xxXJh9tgC3`mqiQr^Dt1vhH*01F?5rjwM3sP^1eZlfMDIgR# z!twVzdDuVizFX!EBS3@s0s95%u78_@dl|j_*)=eU88cp z#|GOFAxHzm*Tm!sdrmh@bcqd&Tx)w`WN zzMF6gyRHtuZUHfxEO#3$cSV9>*TdoehCfd=H8kkVl<_1`60U{*7drX;_VW7rdhf0Z z$v74ZuB;wRu7c#1!_oGX$JX5+efN7WE_{`KUfpot9D?B{-|$^;5LFZ>|Mv^{R;0ph zX*fQsezKV;D1S9yz;gd*xc!>pkaAD!`Q2&XjsN}8#cIw7ljlO#=r`Rxfooasg@XnV zj(NsMt`K5-`tQ=MsmISr0Q{IjxA2?x=YiXap52z&+o^YNN>)}FE2uw`MA+O+-0knj zLs`e!!n$6toelmMeg-O39`xN>D!BDY%Q2JSy#%&y!I6TG%(%?0ZusZe_bUf|K39Ih z-h0pQTJAq*?lNoAJoydc^ca>UkL6AVDZ$VHPDCRF=zevGejwOmga7ub>+o5T+$3?4 zM^T^W0ebntV@oM?YuH7ATBElZZ=Fa$#w=I%`}zh&z8QKn9=1Jen#hT>#G#MOnIDaN zvpu-2cO(Vs|H{OqB|X&0@DOZ3C6JoC%>+JVC%>DzVGjEF?#pWVqI+9Qu&Bk>g|k@mHwp4q7OB#PhKm;R5?S6<;JMa-`}4Jt!2fX zVe`c!*E)j({aaDErN~$H&LGu5KDV(MxbtYj}6n- zegmRN(15`YS!KJvemu4%oVI~7dLbQIPF<(6JPEwo5#_h^2% ziyZOA!4khRROI-tTRMvkFo%e#^N$CWvXfdCWA*;7%sfvY%2;n`8*dTNU*CjLZ;y&BWDK_a8u?FY-*h6fex zsdDM~MKm|d(w+(AjjV8rmqx(H3UhI&po8WX_?}5HB^YuL*8wZJB1lY8m?Nb|slbM# zCjP7}fQRjrqZ|(x_JEfXgh@#OLdNMM5$S4JNmKP#C*ee;oG4x{UlbAXP8Ev6%n?t~ zPMg4&p9%ny4e$UgS9J=m*XjnqEDzLK8#}&JpBPz|4xt=={1q0l4cOWVkQKJp%IHr; zG_#huZ1&KfqLBQ`##At!LcyW=Y+rtg@y`BaDynHQ`WZ~KL!McdDm*d?ebP=dl|wC^ zn82t^(l&%!Z;VLi@y7%UA~8@x3(mf!(0`fh_Zz99Ar20>XeG13&D&zUWgel#$2PSn z`Y1qg*1}5>PW3zy9w;26pY5iifyX47We){1 z0N_kj1xu6@k^6SbsU+0z{fCqPqRDqKJJP1H%@Mb^DhP218_p|m@xMHQ7i~QcalmxP zZv4JK9lZ~dV!Zn|d0!$Z-+Z~c{_pei`-|t~bN`-|T+dGaGg0u}H^Ni-bNQe*XeQ?W zAmX%bH@D~t?Emrw0%@^e(6gNIoA&$9)!{{Xp26L(w$!=`ayd^dobJAWMr3Z?;>RiP z&?GPL`hGXv3j_cD^ z%AnJ@@SnFXGV~KiefO5yDbDgd5XbLK>%D;|R>yaLs^#&3|JcT1rJ*Nz)$?n&Q}gqh z-P^TiPLh*_yq6wtR`(~I+vg50zdaN)r3rfNJy$Y*MKEE#a;-T0WL?*fJFI+n$w^PQ zB>3oLxWrUi)+UC)i4c$Xtum|U=Oi}aG~xbF(15dKliq*HO)K(i>uYPwg;I-cF1G)# zQg+j#Egq3lOxRccirI1vZ}#7~EeZH}!Yv0tNQQsohyQUwgkHrJOZ#5?cmTi-fVc5R zFyzFd?<$X*xib|B40#Lvcp8j9Q%}3nS6G>%Kg9Vo^HcEpo{#C54xg$@lM*&m^X$u~ zbZ?)&BkHZ2`CEUUh4ve7vuGIHk%$2q*AkdQ3$4jQX>TK=wcxMrO$*Vp`~m``^KD0K zJrh|Ta#Xql{($IK4>!b zeEuZ-^oKn6)AG-=i+(%Bf%h}q^Tr-bNnp?bbC=PIb`vFKl{U(1GY(2yzODi1F<*5H zr2mKvM2!}7T{utlqd6}4X*2{9BIWHb4BkCW!CKot@^p7E)L8bls&>}SXSq;^I_%&i zn@o&m1n&ig2>4d@Qres(@xQsEgXZ4nKG+tfi;k#7(vFSj4^;}kWtjbM_feki{JbF| zImhUc9{e9?3@NNf_Q_8)J~|!j8Eoy7!HWLIvs}l89}9vyE`{$)=Df7;@!D*j)qDg! zBvEST*~u^MkaW!HbMFB8sP58N>CP2CtgMOzii!Jd)^MQaUHV#A2EPzCagE}+d_St>v6T9r}rpY`x?TxVlJel#I0lKuf94A)+h04H}* z>cIo^!Pv<|y9_Q@Dm+L8wvmBqDop5MnyE4zY^v!u$AX&iMdVN2qbJ^FyHVk_`xXG@NnU;usB*^o5?{{x( zcKY|gf91la=~T)|qlqtCr=|=xL#4A<0AD7kh7z_Yg!drCn%2(#Ne;C-v>v-GI2?aX zSXfda|G2}^01#Jb&v&|eZYhs*Y#RY(MfnPZPpbTD3Ug6wB?6|R)`sU5ihZ%z-;)49 zGgjf6IhLtS($-^30N}+3xCE#>_n7h9yWx6`t4aZ<_~FjCV;u*#|E!K>W?q#7x&5LF zH|bZQahnJq-96(}b6tq$Z}EjTqA|IbfqigaG6{T}_tBVPmPd}ZN(TkbeEOgx->yZ0h-=X+R}- zXnAxIbn;O2Q$W3zRkzRH*LTr(wOJu#8W2!4`t{_bh#yP+7v_6sxk1kd<2o%7dFGeo zp5qg8D}OKfJV9y7{rUahtN-ubLc@=Si$nJ+v!1!#CU5_h2l~T=;hep+S~R`Z&70f2_EoK}sN%=v zOpqM2!tX!9efKIX2OO-5;!z6MJ(J^qeu9M8Ka1nu@zH?3QK8iJr<}V#XobI@f_003 zHj!esPA&(gF&ND2YG3c&l$mpQc5E;o*ZqG9oV*L-x6sZCEdi7#y_-^&EDl zK5`a~@3?5%>>H128V0`6-T!8NJs8Z~3#v}{yE1jFTkKcuMIO~Y9&J8#Almfw>GR7VWC{P#J(Goy=&NqrZ!Z$bVgD|T}jlT zzB>APF4AxFkILlFS+XProS)7;>pkqbZ|GT8`1kqvUKYGQ@Bk++>|B)>N?7~?873SJ zPg#X+NCNOwY@o`3WyDjg{N6W0BT+@blp^uc)L03CUj>4VBFKn$ zcy`WP{CKQfDRkJD*gz?E`p(-I&`0w;1jqbXz_b-DAKROn4WuIuv=yk}jfyCX>Mx6W zRlWAngj%3Ez`VX|UacL!hPRx67g~eL$u)t%;fzM;$QK+R2+ZNpv;a)H%2E?RxSvPA z!kJ8Pr5GNw?$&Ae0}W+IB~0My=NP_y1t^Y-Q)r%J(g@jDk-URzWOZrpzI*L(ZD=E? zN6OgP$N_asoK3^a7!hRT5G!gH8ficPgF7+`i>9-&3$96XvomgNvcbh@TuBLh%f$^4 zswyNl1PmmJL_j1PtO^kje@gQm0$z3lv1=?RBZk1>zuA)w(}2eK6(Vh6chRr zY@{Y!ZQ&^>-r=-vdZroxo1+vUc>PQIjs6W@LhKj>4=Y`qhesGt{80`MCFBDVb+ifC zFsK)A2TJ36h(y%HYf!ELP735Z&+|7CF?cTt*u0*RyxRKq2^xhq&O;Qii|BqZRW$Ws zhQ2b+xw&~u7e}1n_V2AlG|;bP=frbYhl)ia%a&pR#GI)Z*XhYvcOk27wjXqwOSlNx zbHyp4;oh21i)}~4CFXhw1#I~B8AEJabS0ycgQ=s}W>UJn|E}$;o-?J&x^AKH19o=< zBs88)Nn5V>cKNtAEWByPu>&Bq*ysp!+9NiYQp?)bivJnX-3m0IDK z)z9U@7Yi*<%fMrfs|VOCe@3DOaGIgg4Fdynb^fsfRW33s9s4~jW|}jts$L-4-N^@} z0_O~8>fIkzU#;~vx3=2L6`Rx5SGTseUwu8aSpT~td07&$B<(-n=Gd|rx~s;?p(nt(A3aVRP>iamixWklJKyrqav-(3yt#rH=)PD zEHwWIGO>`fEBuZm|L(!3lhok`5-G`nOhO~9Bllx2Ckc<2>-1*rEx}mTbAQjYWaDmT zI&tdQn|s@g77*2$STVI9Zny7FZ3wXv3fg<`J`u04hTeW4m;ZY`C)szm((e4y_jdCy zTBgPou@LOPD=FA_h;w=AKO=VNXWT5OcGAC*Jlc2Sd0%orx+@P_+#YQo8V5T#I7l(- zs+dLl1O*09y-tJWYqdWItxKRJm*<@a5HM;m ziG^I}+;i`){3Q=t-?#t9%k?~HTdQ$FTAuMTxS(-PO%#P~Cq2&3oON;M?qOu1QOy(0 z8;Q#`Zs2+%!G+k?|5NMwl^IAJI}R5XR$2lwAp0Y)!Wxc z&a_xBxhfWVGvOh9k-1?BqBdBxze1HaWX}S>el^=6BQDmcos-L(%=Wtd>3qIZx_=R5 z66=CMya@B!83QdPZVG3ElzK^I<3F7{gKzaU%h!9(h9<){@bACq9Vtoun^^9+&r;i%q)X5Vdax|uJLOLt~CWt*iBO?y;)m%GKH%p%8L_R{Z=4d4z$cbLz1d5d&d5wv2duZ$|vKgAma(vQ+} z!kgI>sH%6SyI`bz(^U*tM~?%rl-cX}4dfSDoJbR=4C_7H5(FT?QvCI1u+g;T zBmF}=02dbX>V$VTjkvh{kL13Q6#R&Qy^M$#Snzf`r2T3dIu!Jn=FRwWHHxAhO-dpl z;#lh9j};L=oW~Cv-=%bG&;W;2-K1c=?4y_of+7|9VHDT?C@U3X#-7ptF#u(dL#bMu zETHpaEp3z*C*r5IM>lhF9D&h=HREc}q>A?y*Pw2aFy`m`-0LT@MXe!U;8Y>1aAqjX z8;U66rI^m;MTDLuKj`~+Q}Uc_acOC(>#;l-yBB=k5p+76=k)v^4WO-=r&~P}ghcLz zgdNN`U~#b;?cu$?9f3CjcP<}|JRe0+R% zhIV^zo9>qHj}>D-vzgT&ftC=e|obo|_#ONqblM zJGvc2;zvd8`QOj4-z}Hl-tLBNfuWi|=X{e^i+y)0dLcwbp_}pCpbxVO{oI;q%=$dA zjvVkMt&MJOBw>97Z4o4jQ-iSv5cJk7+drS^fvD^>>l>@R{HBIgV|vY3cdP}*;s1^m zOg_0PQn9+Uv|Xv>rJQLyE%pY zR7Xym4CA^UqcSRb)BU^BmZc{ftirWDeuqo9ECD;acY6xfx_aY|EsXzxdx&pGYYloK zo0NKTzZL%}2arFGxd&^pUxznXZp-_EZ+A?3{|pHld-l#A`8FM;z+)^AKr!F0-jD3x z^G5^Vg8Par@eBXQ5MSdr{AVt%Z{MB1&;;ldnZAit+US?E@En2$`+THpqn0X%p|ZCLIKMb|wK zIudodYZC1HcRGLApm4KfqA|=8xUh8?+8KDZvnST`G30$E8@88gG zMu@yzWYQ}PtwO0&_{zfSt5JpSauQlNcBHLy`!wEWf7aY09pzfg)7oIB1Cj{9 zOi9oAQo+r7hu7vO;l_$b-|0#fVXPYc5CUvz8$8pB!kQ?N{<6%-L=F6vmMV(DCw7*8 zwsV`7V~x&44|eGtr8lGS+MHkR_b7h+V?!|ZAI;}V7ux?(be3^VetjGsJwn+K=^XqiQE5r3(IH4l zNOyNi2nZV;5~Di?NK2=LG)T8dN{4hk_jAABK6|m9o%1{2?{%%Cpxj6r#N33HQ-v0y zoS3LP^1mK5hcSb#>U{(=x*!5U7Ds9)1A8`$YhCwL<(uL&ml-yj^tyN%4Ma3I#uLz- z3>5_^uIF=nA6CSO-}70`=SUe4QyvL>5C?sy%CG9|MHR z#F+#BwL{I8rzOf@YHkKM7mT3bF_wTw1xNJ!VR>s`VB@T2##sUk2=|^tI^pJ#+?+w2 z2zhH=ikBsQe~J~TZ6J5Hs9u3%Zb?3uvR9GX9k0v>M#>PQRC#l6n!~SotW$;PB0gaE z_~F06%}_WBASwZ68k^o0xx|XR6UjJI5I$0ft!IrEqW%eqHkkD& z8S42SG;tdzSdNZp13Y@+A2h2G%rj_Pj8tZ%BqUyq7uaX=r;R@A%f|+(oG>0Kzj`49 z;$n#@JCxK&dq0ecCP-r$#{-#OIF(aj1WC(J0Bc{e7e3Y`~tz! z4mwhD>%}VQjPNG!IPtkp5?7OgziXe}OVU?rGkh%;cvxDTC0y(IP~A@_5@^uQE&K_- z_+t66SA+P|F_O4-yX{?cfA9Qc#YrQq^UZ3byVFHUf##h^C^rf4@d4?&?xUhc>hP9Q$`==t8}NffDyYzmg}wEWVdge;RfU^ z1HK_iF^w@p9lqNDBDLP==JxR}_k6OKF*Kp$c=M?J`1D2g%?94e`5acYc_n13>*#K< z@-}~>_+fX<1#k45lY;uP*qo8!N1?V)hkHhk+UHw>v{;!uwaskXRIN<`-h3F&O)pv9 z>|R2(_Pg@Zk#67kL@(>#Q2sadtYS~NRv~pphsRnyPoxm-_Yk?tK2&qRzWUfV-L+WR zAxiVqtBmo3bCs{z<<8(}#fg2!H9+PwFe#pFCQ(kPqZWNe98E|?6zqn?;>{k@aAmfB zhSP1!x$Nk*b5Oh5c6zk8IzKk?PULCB{RBs z_0nJbgsG*{(9myd>G8rTV86YeUi{(iED#vSs%01gof#3MCiUo{mVo^XtVCQWid;8Y zJ$;MkT|tsnQ+?s(ilPx*Qxqg9epF}R$gV&_sx#}+yW%(IQzr7k73tpml8H||Rr{y? zv`YQ>ydd7JL&ZNew5^~y6}dD_YlQJRgyB8ZH3oxIOTo0b8@J2$fW-S0IRZ%RZ&J`e zCq#MAMV+)vslroao<5@f8<(lxs~5Syn`d|Re@aHwq$+*%YFQ2_V@ldC6`>om14a6J z9}M2JYHwjExJm`G&C=<3Y3Xqqg>S(eJ1EWCq(lt_J!bJ0$I;svSW2kS7_T-cIjuCAx%9 z>ltCwo#iamD}GqGpx~-{frvM{YWVrdnCcMj!GA#CrV|8}G`6aMpRvL3IeP8^r30o` z>0Ez1_|_O7%nk2|VPl}EBO(36p7jM0EO1?oj^}b@_ExJ0s-A$~sF*<#OQ6Cw36kLk z%3Y1_E&7&5H*qUG@y8Ex z!c-u~(?3gRq^IZWaP$lCV|X1J0$a=b&;5bNz+L#U8NkvO^ZTv{2F>_>r-%)>Tr?7I zX}>8Y4)}LIYt+%|Z&P~^@yn|Jsak|A815#$W{CwkFG{f zCUf#4@apLG$7byN^Q5Gtj+2mV--~%GqYnSSSKd}vBPksZDDg~V>xRXv<0jxNpM5h& zoOy7Gca{`;*V+G_M4rKp@$T=N2YUfwpk^91{*mfRh$4vL4+_~SWR^6qLq^0gWf(`3 zm^zI6q+cty@Kt>>`k-OE&yoJ1&ExEVQ-Qwu>Nmtv#H*p>`eGB<1bc6^M|ynpT5zvA zI?EChUC>EVE&dq8(#Cg`xx?F8Kl|$b_OU;AyLzL$FXE4Dmsp_W^E0EzzQXH6Gx~rrKYD*=j1saR_3r|2F2nHF-`(%T zX16$nV6d$EyWH-rIv3xT>F?Ri`JNBHC@a#6zD&{977=sHn--baeLl^NMOC4}WZlB} z-Ety$c+M|f2n23-SI9$}VZ=s+z8vTt*O>^4z8NpQRsDWe_FDAvX6572z59ETiM8V` zci{>llTh{3>~AZd{7xDIHAbf@X3JgsR<7W;P~R$KGB{h3mv@1@g4PU`9Xb0^bP> z*QRxdww}%L#;!JhLJfE%Yu`$N#(7K8bjsMMUA^*p3`o4<5y(Jf)0D@gYP9L;z`{aK z?bON2wjzL1`3{tf=i?Jyde2-tfOWi3Lt8mcKQDQj50tLkv3jKWVx}pHN4a>C7Om;= zXI@vM%pAF%#{zTsTb^anY~SVEdYVhzwr%xzGZx*yzUcMsAKr_j^DS_h56;XTtDdeI}iJDvq0uDu#mId5R7i;n5wBtTf|snAME%#cnZ%g;Mi zWV=-54#F@N*&ar9O%(~=_s9oQ=1*4CR9!|cM$s70nZqI|BN?kA)EVG0lzJ|2%H^vk zOhY^E=CN(%9#sqkqXja}Eg&);@uWp;n2g;jT13!V5>QPJn2NP~*8r5m&_^-$St6SI z^5WA-3}u=WJ}4-Y3w$%jWo3!^qMLDQRF@?p>-QXy4rnacydeIoo=G*CE;9mxVvDX0 z>dsS)B(HdK58DO7pM?r=Mhl0PcQSd)P4TtNWNRV|c`*}^ zJesy7?$D%_zbz%k#_BwMAJ#x1sA;~r=5$a5I3cn=pIdg08U}rj%R&-gXCZPR+E_RH zS6jx|ats3&ugavWLX!pwmYiQ~y03S799TXv^bcW8R5&o^LG91}2Q4ff4fyke{{C;c z_{~oyeJB6FfIiKx<02n8lmmOiy?{rJ%a!=*b0s9_3Y-_Zyi0#AOmA6fzc{ebvJSLyoIl((Ssiv=o#{%w?wvD`xv?7-`V+U)!N+7!c^7w_(po~5V%SgYCu zC^AFle1=9Jc_k`FrbWlzh#Xpr-fuo$JU(Pr2J~7lsESA~+>f4WT56zFWPm|}YMlsD z$#FE?uQtEqg81=`_-&$iIu@vNyW_0>sQscFFYr{3F5qfy+7XvP4g<1Ao&9j|W3fT} z;hDJ4+Qiw@t=jAThKH*LUn{?eD*yF`hJmAs`CFYE&%JKHv1~)$0eiQ32hJ|{el0yg z0y<;_L@KNVZMI!rucl>_M&i+Ax+TmwX;a^N2@)97*z7K>aOMB~H|gYXV=(E(Y#-n_ znl`}jb{E7BTJK@AK+Xa^wh$wI{UP3y6;^!4B0i#O>0!)Qj1rhBHP(5kd^p-=sA;s% z*2zuxNPeKmV+wUJR!n4@;OO{^FG-*xEAUGBvS(sqV#e0W>$301m=@5|>^Pq8xcoVJ zq&swGCo-FL)g}&br@#&iJIiASd@9=(ka(iWHD^H`%?YYvd=^v*8rPaqF%ufZHK&(5 z{Dj!@$J%iYEg&`?uux4|ZpwS}mg*fjwP|}_g?eU9LCVNLKx3kT#I9R5%3M!2qA7e3VFBU?ebVZYnWb1FLwWLlD?_= z;&kXW`PpA*L3c3{=n)cCZL#I%gjv8K|I+eXU*T3a{(@uCq)pjkCW#c=L=fWTY<)=j6*L3Tn;vm$AS?sD)kr} z(@GiCLAVd6D(rTaF$YNzY+Jr+ll&$%2p74hC9ojbv?Ym%%vi7ro-xmcPzjo!?T^*~ z#k0_X4{DOuF*(k>`z3o+(K2u;uD&2dJ*m)AavZ8Fh&#;gvzI0zczjM>f)+2@m=9Uu zbTKMK2C+2Dh)Br2B8uj2ruao+#>|DzlYt>*QWf(LklBFH0%`i8PM*fr?xijSAM_N8 zuM4ljXr=~Lkukm((tJsRkkv1TaT0xFBhidk$Dw9&xqIWhAB?T_@uwq%+cHPyPIa$S0@FozP>-W6uk}rf#~h72tbB+3)|wCO=E2{1DBVVdrNo3*^kRN zXB_~TO>WcrX2xC3qYQmo`fidBwO_g$`S`~P2*6zPR`97T7QgzwcyOM+K5OXba2l=S z+qUuxYda<9qm>0iV4j5~{-q(Np zOMjnz7;ySJ@XxbsYY!1(@r#|^1>a(?vRR{Q-NQj+(oYbHXV|}5WpYWNX6le}i5W%c zaO(xT5NZjkuUcgStmOv(X`D5VT#mLI;DW3RowU^k1;YnKR)QiR?E)J%0 z;$#xDM8_H0f5nOGtH5^iKELwL1X_gDmy%blsD%B=bS?g6JFdU$;hOj_65agiax3;> zC-&jb>UA$E2xFz)_p0~k)9qS6o&TSBIaT+`45sP}h1=X3{ zb=-}0_+AeJ1*#W6J_bBcSF$WI9vKCWO#>L!&>mVH3(MO47P*F_7_lN|GR&mp&~imX zQW;r}uJEs|TE{X8f?Xg`7RkGXt^@>U!Qjm|nE0C-xqK$HJY~e+rRc9Hth$((m`}FR z+=;%227ek$tnynEFZ2)Un=fW&rxsUN>4nl8nKg;z_Z15Ks;Mo@^!(vwWc1%2QvfUGpbq z5t(@bAy!N`6; z+uyzxt7x~Fr?o3Si@+1yar$1lAF8{h?Y8ULvRt@wdRimGGRyHWU}jYma;iV}aERiy z5-zr$=@a*LK005$`8}Q33nYXm%4u{woJyT6BM<(AQ5`&~<=m^Dq1aJuEo z2JPB7kA`!N)(kWDqT4meA|*qfo4Gm zf5E)Hr?XwTFfCaNkUb`(mRWpRZVj{r=%{1SXo5*9P>kaI>gw1;CFeK$%o3otmn>rS z-Du(QqNS3aj_zqJeVgZrX~*uA$FiqEi?8T3=Marf42D_54_7{F`wv6TE!I{atc0H! z^QCLYG$(c)8)I*ez0O={E_A|(f6-4wbv~QPg&mQ8$6TX5%elE;Vt(}DtksyB(newm>X2cXiQ-`BCFyvjQrcq%+5E2vu>g%1@xT4)?g z$1Uv1RI68KPoWx0xz(5$A>_~kWi2QZZ{7@L$cI0nR6zprM}6&SBX3xU*l@$5)l*O( zDrDhSBgt}F>GkSnggo0HBV-w>5Fi4^^kGMXmgAe1k=MuMsvE3%2)#OUfM)%b)s@Cf zGdKUok1`!cdne#o_5Qzmqhl}2d20jS$T=jP8nBn%art;PV)W6_@L}Tj`Ij$WiXWF= z3vHJdywLe(Zz$RdY%tGP15AV~$YY#<9Bd9TsP^O#xFDm*LlCXgL2;0^6lZy2nW~HA z-;UD1I7TCQzG|28BYTDmbj3eB{CV-X5#7Py*VsvV;%S%2=+vaGaO%o!XNWD}RwQyWRI+a%%xb3iqV{Y2 z2&{89`cOC3`(RAKxb#}GU`6SEl&_=(_zp7TZy!`EMrTt4{JdtgISm2j;xY8Jb zn5@7VKKpUM$MWmrYRO#1x&Dv*Wux0nr~4tG&h*+~+3>@TPsR^E!P0nmjPjr~WdjnEwCVzvy)7CZ5*0G}Q}fdbr-c|7WU3Sy|CT;k2BZ(?pQ3@Uz#dy~K@9`_G*3avpEur|Rw4?)QIJ(>Ar! zeEDhx@OpN9?O+LA(VK@nbt2(f{vNkmBk6C7J&>=yHwe_^aSg_F+t-OvN}u#d>!)94 zL@d6!Fua$zt?{t*aH#gaTC$;|(OM_)JIGr%D?|w5zfVpyp^yUcROk;&TmG*85~Ulm z>2vtrODmqRRs+~RBr!gB=%mc?aX^cYla`8buS{_)7DNqg3VcJRboxgH_^A7Bz6 zscmKoXki0P4%5WTBZoBrFZ;VXCs1$IKO%aK+V=bX8INvfCNiY!ciZJF2PY?^j3|~w z{J`;rSl)>c&lU%Nldb=baQNNSX89%hqDF1O8nzZ;`z_|W_V4s$P(~fXrk28l+4s+~ z-yR)e2Q6WNu3BB5)JSDC=<~i&d#xB<$+wz1jP;1wiA~fg`X+hwAP=PSc4q5e3||ju zSiQjJGY`GSubDogV9fX3s#0ps?WGPw2F)8-42_}pS0Qvy?M371HO|xf-^TX#u?_E! zqtTe#7$)D;@pzd$av%&4sC=G&Fv3?AW5S)Rh?hZddv~MFSXycd4Ejl0aKYY(4~H|N zQ89tb&En-@|T*&(m|+qo(=^l`&RZJLjUxewQmhgZ!sFXwX{ zS0OJmDCqLqL9`?nqFxuYLsB3*1BG^(5%}j)$@8d2=;k>w1eI`5pi1KeZo*})IAL4) zx!l+AB2!DS1RPKZeihQ0O~Oo7=V-)}0oACnc9O7R;%dOTq#`gaw=|}(e$~qN{0c$@ zLA1uf$R5oRibO^*h#?wkiU~#xDw7CZ3z$V^$;(Vdp)l09B2omDP<(IkFgneTqtgCV zaQB&uG>hRgn(qFms;6!iW%G8{w87+p3{1pk7e>b%`m-kt%C)J;*_%lt0hT146EZHg zB%1Ki!QoeD#FgTb!=@m`$(unS_A!+Cb$w`t9dfO;278JnaX}38Nd}|{{K`NPwGeik zZ{4r3K+nM((FK~**pN9n=_c>B%g1@~`#}Gzt;3azcMm;duK}M-Byw;H%+M9f4{7Es zW1V#6%U%2TuX_;dQmm@A^={YRi3jze!QcP!=JW$O8QIeG{hpJ-?q}Fh;5O<}qhayj z+eM{86Uk`#zlc1Gk4I+yL%ANdSzA6UuY`Z($DcVob5|Y~%^F3>7sWI6>PdlhkKr3! ziAZ&SwK_fRmvwHwPLHm@C9aO9*7Sv&c;mu>cJ}>}$Wd$UjDEoN?SB`-0EP2#K^@>~ zc%01u>+<}W*DR=ZVk-EM=rVURIYqmSr*44V3>^HEni3i+@bes)Wwp|Vmy=+J|X=oeY^H>ZS;IE>rIXX|#HW*)t*@diL{)GKWy=qrT33!>WX;SFJtY~jtZpcJwB{H9ul|TwiRaw+*~bhM+B5v zT`U36{ZXe!yHXZH!gP^YFWmY}!SBNQCk-)!QG!h0iW6;lEWJ*Tt&6`|tt9p}|1=mi zMBCWp2Jd`DGF(=pIn?EPcumiGsc60v-o3XwJ3i)GX}cSD&S>HH3{6;#dap{1;%yfPg0!l?F=>79!G>b=Yo`{AsmZ2;_vPoM_E~3Z`*|d+gt~H&hII zD%a{?z0ZFDv-=hwc{s-(6zilgTmq2N*V1?uwXBL00$^8tF|Qi0PxpOa4mdrVsn6Yo z{hQ|&mY4m#cut1rY$N3}lx2ZwgA0w>R~}1UZBn6`B&;|^vD{}w!h>h6oOTyk1LQ*6 zFM(EcOs)0=UG(M;!Z*B{P)cQYf%NI$$e;0;63|l!+~cc-?%K!lL7qAL56e_fPDPzI zf!F|U`C#n~{_FoECDx`NGGK zKy7UIn~zcGClXc8%Sv14WA3%m=W!#}x*pa-N3{Et>|T~bjR4AQc^d<3=|JeV?Y zC9m+U&S9Do3OC%5-OvntSOni&F7a6JoGb&vy?%CId6ll0tBjdDG4UwmP$5Y2Q#o3KRLw2 zU~7`l!Wf4OdD)0!N~baXKI2sSVZ`%ftu+-2;)?#BR!3rKiKCYEHCQ18!+4O4KTqa; z)~`eLk#Tiww>|_LDGCuZiI#^`9my?RSK)z*2{^d>IENtY&s#OQ#+A@UKU zEN3i`GFx03jz&2}-H%@pp*>582sGx#NE8W~r=zFBdXAf_2EIprdld>3lUoi;%flQ_ z!PHZtEDx91aW$e(p6=TJy-b*5143i?#(dgo>N#2jl{fdz4qzzw1nXzH2PqrdvKvz#)!Ty;^D@at3>i+{2HhG z!hxby4llp!Ms9FaEhT12bv{HYWwH9(^Sa{zJ=>O#SIuP5v1P8>49f!p551u}Lt&3B zM9~jP5vSEwciI<)YC7Q|u7HhquwKi7`{;m|FZpG;p>kp?S}ta zh6smRT3TN09q1PB(E5_A)M{gdkxXw!fnJ?S!XOFjqL-Pfd;jL20l&_qdN&#^*`pBTS){zNb zsOfkq@ak|+{OW-z`*z*x%*>287qGVYkoyir=Ig|G8g{rm zAX`pF>7U>>MkSV04YQ~ZijKv@OY8PnZCf}+1rVFgPnB$sCT-Q#E{-HGF7hF+8xwO+SQ7`8wT9HN zK)#XHgD>1ilQP*E+zI+N_du97^Y`_)le)e6!&A`|u5NB?K26YC!7vTVbEEq7g!OuZ&q(LJs^O{tF zxW>GO@V2&PkX6pFW8ORrXcbl>3txINCs9r*ZZcKdLJmrAn4}FO3L{@_aKm_rYagpSrzEKB{2wuK|)F>!}o-Kpj?rk%kAZ~13FZF_g-OzYO1L)Q64Ej zosKHH!lvMT#te=qViLENU@#9hC;m3VsFggNj?9!|lr_hMlW1reD{eQtJtcywG4j-6 zu$Fdn^Q+2hn*UR&B`QfPl+D=S$6$gfPrj=)F~>CJKjCr)WISQkCD9lzePo%V?n%;ff)zaeUHZ)wt zrv83QxR+TtP{AaZxwJj)P9MdypJbt1XP}$BByDad!&hq)^xdi7b!POAsQ#(c*HY8u zGka|v48%^4tGIN!_W8mB73|r`u2jq7fvn@SX0JiNz`@#ScQ+4F_>VQNv6_B*KO!$y zVCE(MfD~EKAG#^B&lHrkE`m~+mP|0rF!t@Vz4>F!K=!1INGq4k6|Oe!+0f?3H8-s1 zBVy+fdAN&P@5kVL{^JL=!0o@;jvHR1TWJf*Z%2oMIN4q1!eGWMVGne-EsfP!z~1a? z;B9y{DdD)@j%NxP+MACvJaJ$HIQo{o%&z^INkgZZ-*HU-I5Ye9aoU{pzQ5yK#fJPH zfY#r#v@#dY>NF*9zw}#ez8U?(nsu{3@a}D0O?v`0NP_n1O7pR8)_L1r%!N4TPg&?| z`=ggxcH+mqEh)|YfHG>!f92qxx(ydqOv&>`Yn8nvzvsz8p9{Nx%Olucv-Zc(-cJ2O z0_Kh*&hYHpCh_!0Az-c@4-^)5Rvfk8bn*$L~c`jrY*{XRkn>{-P?&{(v>I-AtOaf zu=VXXg=QfE_fSo~>o1QTrm|fLT`hI4ANHadPhBrtDND61pNEk$ zH^zgU_o%H(y56(@@}{>cjXOT_OV508v0nFw3LZj}Uw^MwTr1HW{~4?bZ_Nzss0F09 zU9N?XecJ2KzGqq;eFPZRU#9!5?L@=SSh~10K$uc`s?Wn@JXGN|Ewy0+Ggb2NmqdbRX7u3k zL`iO#8=*bVsR*PFjBRgB^#w^}wyXN4wg<^z{Dgx7)X7bSVCSv2Ht#PLq)RV^AUVd0 znYApeKnaLKJ$zod+M)=VP)G%ahMq04%Wu;t6XuDTY=k; ze3lIX891{I)zW*rR#c&b7?AX=#_IXNDj|YOGv*OUNTXtcA@k6}wz7gSo@cNcpW28~ za*2^w`RqggK_xvcZD-&>)6P`;IUMAP9*kndHYY|zA*~&SkD~s=)QV0`AauPQU5U~d zqk@fPw#j0fm*)j_!}rzTtx#RwJKZ0jxVJMbwEIClVJf2a3Wp$MiCc6&ufyP^L@275(@ej0S zN3lc!2M}NWE!R94m0n0ea73OE2g()}m9{Q1+>XhV#`cm=?gW2}xi0`18Ut$#|>zWm%D2?L7|U$Ul^V z@s-8(R-dKw?>2f~TAHstGRY_2WNc^LfK$0D)j?|S*W~)e<&5Qrie-(=m5BmT>oWJ+7W&qX+8Xzw zMZE!Gy!8{aV%0*eG--quJe(05CxmZANuJx1DdTJHiMLdb3+%(>ig8EFl-*^^tk?HF z*XFv;b?WMjsPL?#w|>61kJ-iOM9=oeHk^m7OJ3WllGG+Q-oMnsEPdG5P zHh7si?A`6XZvD5m=j6Xt9JmF{O8b3xE>k;qtXn`zJA)tR`P}`d*-yd7!_xP;a+Bm!|q+I|K{f|7s|KA+5po zXaw&FS*bcSv2r{L%XTC#E@BLANlE72+ig#q&p{Iub?2aW@oS-06T8i-a>n>!h<5C) zq8BD%OrW5_47U_=oj}dLXi*5j*!fX3i#Hvexu>J^?d9v&mBeSNUlTVJ37wv|Ew6E& zQS7C9bXizyy!zBo2L`3}2jx+Qt6fS5`g+`KtyOGpZvIe9v0%qCdBuYV*MwjbT8_q- z-6UA(a`(Ev|E^ECss2**RfH6H#Qufd<=Io3DN9ecEg*fy<_6a))A#~uG$8tn6OAbO zo;JKwWO_m(v zd9nqnZCo{;u7ST!kd6=k%$FZ7N%|D&w$_aAs~0jNU>TMJFX)l1bB;nRs13t-^Apfi zD73IA!kS^F7MF`R7TZ?hFGH?*GPZ==%CIIV*T$9CLc`n?KY^}=4N{8)VOp$krZHf$ z6L1SD8xk0Ah5qXU|Mg*nvV9Jg@z`LVV5ccVBYciD))Yt#)RXWf2;y`V7v^Z>-i z1%V@Me{<==pRI>><+y}P6l{sx?dmcnT|*G>HKrz+4M0DehLs`X?@=_h=rj^WJ22vB z^y*}3FjjiLb+u4cv}9-M)D(BR?%^q45yUMj?a+lMdzwThbbdtq(^vO$KGj?^b8TLC zIwv{HISdIf1_aqenfx5iX)=bOz}M()#S2GZVivJ;MaA_}&=%WsRHZ+L^?c^h;5Q?=m2r^EU!h(!Xe{x`aCrypmV7BA67GD#*Q zHK@>A(1$`2VB`q12q@c-rXT4do`cm;*q!N z={HKZZEh0kR7Ql>*C*L(hnRv{U~xjy5bkZBcl&X0B2XOxgc*zKfc`=?>%&{0VGo^A zhm22`2bUr~w{yEUU84mpOY?p{w=Loi7fkQOxJpgPa%XMB(cg)}roGL~{2d(5-#iR- zd^&8}HVim;Zj=+^S=KT>vLGiX7gF9$bF@?O`sP7m?}f;0$8q-kgwcatpa&%g6mYQW zf7Dlz$=Y#oZPuT%K;(yYT;$Y#w-x@n?XI-=?b$#-ovMyHcPgWD*$q8Vx;J)TmpClD z2wPvPN4gy@ee7*=+eTBn9zS&T>f;rwW-bV}^oXVJFRTUh>Pf`qHZN_{C>3Y}jpb8o z`3)K0A~=Y1Z~iHL^RL_N|UXE!DNMEltI-3-Te zIqe)g7hyHk8BUIl$pC0yczMWMkcKQ_%(JMnP8vWPD-khzrZ{=VdK~>b0Ct-CbFynT zT0rO=XeGNrg4w)1_ody3G1Vu=;c%#%)-(+d@M-bL%icT$l(hhJ>z*#QKE(4nHIF}G zKSGe%z&yG#*@Ne=W=(Lwm{jsMBevE{nrgFhsVL~4vest?r%j6tm;UoUrP};XC3Q51 z`4@;UlNti|t=*V&5u0^X=i4*+CRQBp$mGe(oi~4ob`JEuG`~BY;39!APFKx47@Y8%u$EKFoeHCI-|xl zrEF!8)eotR|IyBl-;qEf)J$>9)-+4Oj0etASJX@D zc(kmsKUoVMCX^l?rXnKMSRgEXi9$FJzRrO$g3?QoC7<8t)kq;2tJ!g)Yt8$raH(To zDV9&Nh7&68LLB^F9vMkWmv#t+Ca`A`g~ znXL^ISKVR)CMXX3s1381B7K4PJ@$WQa31jRDA8~z$r+tm{zm(WjPSPUvp>-*iG>j@ z=Hte2CS@ju&+_sDo$x>Wrb&y~E|PE_Y^7{FY*J=ZrP`+;wBBD{N40%f^wxVB#~)`^ zbwAC*b+-_4TKPT33!{W-fhNH1&~KZQ?GcE|d{k#-t+j3F&isGD`q7M2eWOA>J&)Gy zIK!MaxwhsJdGV0muc#*crDD`Yz6oi?d<{QK4k zsaM~x1k!@~Znjn`+VayWWNWo`>USHsq6QRY6CnDQhU1uV9Ig)9@0NlQ<&wt1`T32F zjo`O<*@0Y5+;p*i*QSD^c|mfwKaviAtyF|`Id?1`JbfB|30!8Bu5ZuG&OYvEKfErQ z{A=eYV*JqUN8ft3mF{I$yFt@(c*K7@e0QgkL7jp?ZvSdH2 zHz|&W!%tm1KG~b)HcaU4Wa=$0&ClCh&n)+gsCD0x8@2dcb7-y^t2@~_uX@x z@?^371$Um43bN{aH9aC}_s6kNJrm zV;7f`x80PZ!YY*_-C=1#f3(weSd3<({Ee zZVQ+&V<@vFK$MRDUmI!!=AGil%2*P?>#lfR$8*YSQ6G`(`GW+(V8mj|I+=ivd0aqd zE>)yDWaF;YAogM?Y5bL<*V^*3uWoYNYi+U%@kUV~nKg0$8+1cASG-<>0mvVEXr$5< zcU_+D%G<DJ6$eVcp5|D=?DRvG@#u1ulI)Wz2mJ zR9m}x)X4d@Hn;!nrN2QaqK-rh2jF7ylO(YG2&V*jk7}j~pYait zz}zhL(kThRODBtfcFsXgcNaMATmDN#R%R#Vc}<>sXm?&>%IAe(jWb1aJ9Z^7niAaMCr%U!=aL!rN83l|Df%*U=!jb>H8}^$aFvUbm6m0(RQazOc`!WG*c1se5g&xBm3oYuDFk zAG!v|PJyfYgubMDhHZtKMh#@g$CsihfKPwVqK!arXz|MpAzt!>1w*e-2jZVWxiz`J z*IhEfLpNX~SV5@>L4`7Cb+9 zS?SnJBn3HYqq-@FfOmd{e>T;eW+B8S(tRNk(2wekq;F(661`or_Wx@YcwyBb{rq;> z%u0Z%6VSIm?yf#`WI8STKRgTPPIzBNB;$pF0Y=ZR`riZXsKC7u03W}yQgC>AOKqpG zrA15Ro)(>T13QY<~hC|2RZzffalEz7S4xr1Le&e-#$D-(6T6H*mQXew|a z^YKI8n>GZh#g?f}%cx^+c>FWPAb>Inzy}#EmY@SRG@EZ5?Athpv_&H17Z-+?BDF;9 zbo8`-zkKZfH8=S_7-ZsPooF!J@DRMT1-ayL2_#SfuYz`P3|no`gM=ARl@BkJk* z7bjL;b!m2T90cL@pUI=Kl$x}OJ(OSSoqd+*8nH8`4VD?_OYsgO{b)0KTZE6Z{au6C zSz$~b4>Mq&)#UBo@N_&2VFCuwt`UwTdHW(iohv1)%<)*|eKJZW2^Dja_`Ite3 z+5$VnU{!t2yQnp@y0Cv-?{Q`iaCx9e+15*m@6+tT? z(GUcj0Tv1-XsM`}CKe3%EYW{qh(T~E zsamheI)yCUVQ`rJbEk{r-~TSqgLK=3G4ir2*HZ*aXa+RVPa^rSIcF6bG?iKyVgwSj zCHev!WU2cK_M745G6oS}u2e54#=gokXJse|>f=bJaHy}O>-J;<%9x9`)MbJ7C9m*X zs>jMrZWl*sjEiFySjSEaJ!oJ z7CU>Z@kMZC+|W`lp(s&#n3O`pNk9w};U6O$t)-xewdB{ooPW=h(bqP5!u#MJ#$eE`9m^&AqIbjVEyV(CH+8wp94W&e!($p6kPsXf3bRJbQjHf7~q^ArbNH;>Eu3z{^l;9c{aySuvt1ipR$!LD7~ z^E~%`&iNhTr<+3IWa&GH%$FGUb{&J3A6LPlxc}u8n+wa#Y!EKKo+|G@3`6O)j0~68 zR8M%AZED``+8(@lTd7Ok<0?2LptIX-&O3{;kZY981jreJLdC*G&|n_BN7ak04vjxF zlg(CjsL0koFN?G-aQPwYKJAG(RT;XPWLs-hL}rG7dfG{x4oxh^k;~R7#zTPl8BA>N5^@nU^5G0zX_>x78=0U9d55Uv zxtj03Ps}aPhapNn8;oLreY)_uliRu7RGD?#)i)72fPFD^(f};z-uT6R z6J5aMRN;j96izyQ5c>0j?3ll~RlFk>pM!~wav?kPtt1+lhx=9^WxN^L zvgGt>OtBkOXzy$*QV}RC%b2Ioh@dAXwhDp8AI<_)0fj2l6l0d$)QqGw_~KLPy}CB* z!yv4~eH?H@sg&!ni0B@@Mu~12~Zj@7=Bi5&?db?z#pO5^|XE zsG&xyL=Jn%6hZKZ;YK?DSzvcqt!K!0D)UJN`}+sG$QG$b;Hw!!U}UILlHFL=hQ|UI z15;tFkWgFmQV@Tr=~LMo|Hyb=Oppqwql4i=>Ola5j>8OfS_lk@gU=qMZ>jxO8&L>Q z9KrGj3rd41ObUgh%?Q=L98WVQQsIkLO9skk3rTmESVwV*%Z_}@v9=72f-(aInrwUe?fmnLn_DqE7QTWn`r6}Z5!K2ow zS03i(=%gl400)o=iY}3U_8ecx&%~Jgn+6N>X& zxwYfAiX5&a&z|;w^%x-G49}uIe~+k<5x@bpBV9Q@T|5b2hh z#hKqhVajvN%fngA)9q4OarB-?)|?P|)U6_@V!C|kBs@x<(BfplcJ}nThNanWBYIS` zc;!2L+h1c1uk^SPbNHd7!1D4ky8llq-(!;JZNJ94x|_x^zso-%D=q0&e_qbtzGNf2 zZd;wGb(=g6CbFL|Ek0erZIf5(7krN@KB#n6RcyHcWC}P{KWGI5B_8IB-`0V+ILN^F zUHKEwog6q{%B*VL%iG}?+1zp((Gm^3K`U?TV_as1dSBLG{;fT&!%6&hw0sL!1^^Kh zT5RoCy+;mhmw#?wrL>&IH>^$?uUegr+=kB7){URlk_bz^oLjn+nfvWj{!0?QU0??Q z(C5AGM#j7!5|GBZ1>3HTj$e9>zme#?Ckwv@m(x)%DuL$O=~w=*Q+|1z8T_4iyXLub z^8aUL@o8I4YYWmXkF@0fBGR3p+AOJSyoOh@qjdDF&`bmBbj(h&garfymNR1mOZjiu zDgzXWGM%_HF%cfhEssI`Ya>=zZ6z}l7J{iWg14y)?h;V7E+ifP2Km$^=GwEOu@U`u z-g@I+iTwWDzgt)^tdk=9R~lbyxvsfi6tWBzv9>#V7p69~e$bl!?5Z37I#e4gy5Y28 zuskXRK68RBbuQ6M-lGH2X`*lU3pJ|yGbefR0i>vDz}bKRJ| z?8vi)@V}qU6N>PKIViM3vHm~T<5OcKyj!minU^~LL*u9)!UO;qX>N0T;;Jh(Pxi2R zhpA<;n0-8S+G|-Q`E5RG{nl*w!-??cWsjZSjgLXXrwv>4nJZmg>%Tgd+I==E+OK=9 z1Ttx15`_Iuj_6=R+$fhdK*5F&XVA15Alwjhloy1b7V~q4`u7_`jM_Xvb$?0@ezl07t9U-BVA zFb*M&H`ttbn1BOKu9dnqWWX|Y(hqL|z(^D*-}Jh6xRLJlw?G!AAn`m1gbPwiR>Po5 zceM^hhoQtB%9?M>LF1;_f=0>@o&hk(wg|`y_h9{19qvMRY`?nbA~A?p@jZZg;cIP} zl>fpqIRN0V^3G6IAijza16xHL!1BEt^36CY4K6K@kp6=Jf@|i~BXEMP5d$*VV!hFs z1>OvhYE($`vbxkHWHL5Rxk8!_s!Sv!g<4J%9{^Sv*IG>KxP4l4M(V;@XE zoi0!I0c2H6#5_DK7Xss)9c{e!PawgmqG_hh1A)mGPKtG;g~ z$paYLdM>>v7$*9Lf$#;fK~WMVIbk^_8egLE?u6%!O}M`q7&xr0wY@yQB34Ue)$Y|1 z6T72oHXuTvLUXkK6s@wn^j!aHKK*-8`Lt}v;eIqjjRdAub4gxfLej2#eK9_+c@(}? z^4+~?zsP=}rgAsT7YJ^D`rzlYhyHS{^m5(Ye%HHDV-W_2-G2MOYi`Y_PYA#$vHhEz zxc%ygz3n`R`V+D-;za*!^lDFO*{3b5=b(V=6}A4qoTamCuayA-$6hn-|9)HEMnVS6 z&jir&yB)+`YQFn;I5;37V&g&z&BuINBy38zx3~7E`fr5ZpHeOf-HrHNj;Y@qD?KTl zA*D)cook2ocLqL3J8Mt3XYVLir<3PUc99R}(V`t&j#Y~>1}}NmHSz&>x9U5Ymt*$l zf9(G*RRI9^`$JB2#j$$I1YdHI1+1a=%GGoXh%P!m&I@eUL?RZ+s@0 zun`l`om{eg^Lxd5q&HTI#M0V7=@tUKi2X`yViu=+8a9OgTW^{RGqepKB2h-H@e>ZY zNzHI_*}bD>RX15dawSwKh!uhv zbtBZJGSS4A%B;)Z)23M9tnM{NfOk$e6wR~3288Y&(%n`XuL(8&e>Kkzj3Cu`eiI_B z<|Phh)0o>U5%xe+ZTb@{uOH55a1*5GyT`Xr|qeTU)+VJF+x|+^_UQw_rqt{8J$|XlE{Z{dh89x{?%*_EP zC=ZVm3sfdH>#a=gLjeB%)CmAXd&1U_-QZPC;b{OR2Nn!94W$fZn8{Z}4$rjY`3-U6 zl$Iz)BvU30VhvPZ6$|mnNE4)~Jmr1%kL~`w-G;94Enw=~_Xy)`*tL`|#mz znu6Q{apm`!#Gs`9VM;;y!*~>N9B*cBidSNiTcoI5U?X7T6R{S z3UmECqFq>Lt$FYEzrx79?09ret6}tLP9ynPSRAy#;|)QXXr?!FWJzS`U^fI6=U|LL z8>!KJ9rjQBXmpr_X7s86-S7}tb+J#U8SWp;U@yI7Y79{}`C{sd?oYTNn@tt8&uwAY zotPFyKv1a7E}aBU_2QLBvEoF%9N0_r42m02Pp>2f6uEII(!#=gAh}i{F4OMuiLtN@ zRlPYNnt$$4>cBV*ls-vH2l)a~UaSbCyOU@3=e6*_R`A#jmDGtx4XU_@9 zQ}g9Gw^9Vf@#TK%) zEO8a0fRAK{j$9fAJ+l(*N1-gujm<;ohLajLGL7He?O3|t0c74A5lRjf?y8QIS@$=7 zUMG`1NI+4aWbWp+GE$uld9;ozne+=@(@czmdaLJSMMTT= z!u)*Bw>eigw-~%W;pU%D!**FBd~Jf^oSLItu4S3V9Hd{VbhFa&A=T1iut77seK?uf z?z?TvgyD_grYRs!1Yq1qD^zxrCT}{cN6E)@SAQpS=>6TOpyhXtqsEY?Uw4+{CwGgl zG*TB}icZEBbu&B8rsjl9I;IllY)trZYI`j)O?w$1Kvyg=+yE!CT-bED$EyC@8vA-L zTV4ZQU6H%>0awP`w|Gc#ZC(jrxRQ-Wqp&1<)QYdO`KW?-nU|swBtS8zu8^yl;dwS> zcjEkyuEnY8lTQq*n$4tL<3a<}j+-N>9#*G#Wv{P2j#+`ekA!bpPmJ_d9ZNg@3a>ep z@H+W7+57Ip&t@AE{ic5pBU-0zkEaz*Hgk6jH#e=v&W+TDOs~%N-q6dv)sVFsPi-N= zRS9^LN<(hA*e>>CKfow~T5eVaT4GWIX?R+Kdi%O06#E|$&vkC z!ty3EoYJO$65GX8)gbRbQ^|JG2mEM`v~5F|i^?Bx$PLrPhMG<{;8;exKpc^ly|Jjz z;-;#ch1bClG*A(vEe8O-&lrpi9F;XTj7IN-W;&HNN*0zTasphLRAJcOU(i*e;Q6VA zo_aHM!8wHm-05Uxg}WI`T7F@@#{?A!xmx3J^jGxw#(-Euz&=pDmXjhdjSOg0V`phN zU)5RrAr-~!z!cR#aaRl|F*%>3Lnfz?;e^hp;;+crgubn8cxxK}EYHDDEFgg{rU|0y zyHZIZce6{7s-3k5QteFVWJAiweB-apavR?rYkwy;YlY|X~|QkZNGe}=6#mjvd7xoFPen?Plv zGq_`d5n&Dyv#ALKG=XGfAXNyAt`0Y?z9S4Pbv@$|5*j*;X$>sEcAas_oJ}c(@r9D1 z2I~-$B-8wyHKrIEC_|P>WX|bdthTp$UBOAR(p~xa`1r(Xy1FFmTa-04R5X{CmK^lF z>(|b--e-z6}s9e#^iE z9=aaIF=R1@8=viH*_~OQ91kEpagU!aI?96wERjQOOlhDKSvTSUL;XDuz2W*a9f725 zR~~mV*XsPoeP^M4kArS$ydH^JAr4)gwL>k zefpTs9)+bDul}HwnM>8$z|&3Vsk4xtp3uxFaX2wj1roMsyDSxbQGTnH+vC4Qo3%yJraC`$ z#ScopTeJQ>lgMl(>)C znokJ~RhQ3j_NSF_iO)T&Hvqsz9m- zmj@5ps_W4d!JCGv?}onOtd#~OZ+-XTOINf#Onv7Wri$iut#4yTl*wpTyRIh>#UaVT z5!{n*82co>9EezArh1uf{6B#?b0$h;tXb+4C0_m8|4bqeKLbcF^I(+6Z|_`VLOoiA zg|`DGu~F8_-+(h>#LH7q9 z7NJrZ2SN8LT=$8uJ-3*)Y!v zaTIU`pc?>GpoR3N)<$g$D#u~wBWw-VCr}sz@J$d?+#q9~E}+EjW*5F`KqoO0CheQaLydQ5UaKG=Rzw|P~}9od~K zvb`+wKUY?^JoT{qPMkh2+wy(O9z`ISDM!s~TB7?NBiy}D;$*0zwO04poj>{94qUh| z>*yZcc>audsteICKhn&qd9bF$IWw<;JJb(WJMZy5?~gqnYkj(x`Sjh^!_#PQ`hwe_ z4_}IF;>gQLV+~DE;8m%@pIu8rX)ZG+-1`2@n3VGgIr4IEAuWJ$wO`HNu zej|o21Ckk%o8~It`d-y%d%Qf|i8i>@%^ji4z4bkOmN^r~10b#1&13&&+Z(I+@zTtQ z%70b3J_?Z9Qb!-{?A(OdTU5>YJkEwBT~YWQXgxppq+5LKaBI8rtS>ohereOUfCHH^ z`p%c@+lArLzK0gR^kY}#hQD8hKD4H+dEPv-e|nf#dn;~2NbZ0&`A-_%+Jr4d-X76^T?&!vc+R>KXQA!et7fW z_-yIa)`fu0yY?SDIQClipS65=;R;1+e3EweWU1-6NOM#eu>^*g7*CS&oJ;!a*TlR+ zd!|BHva`(e8Z(!0xF-wO8N-xz{XY9+v=f`bGDi&y73XiztV^a8K{xiLqmUqT8OxA$ zgiA@8oKb+DI0OW3UilTg?_ZFkox!HVCf-Yi+NjG7NwdZVWU9ioy%LbqLPE?(@07bX z5@9oKeRi4riGJpEh0j(aEq8j9N<|6zD;E=|?&bL#j+D%-;l`l_h}Se|gL&!6X4sSU zWQsu7`eI=Q=|HYgk9|&Bb&0HGy{@WS1m{;ibN_vwuRC^dV=rd2s5G=h4c-BB43dhf zO2mS%gXbK<+f_e<;uZU&w}0<`wf^S+lqx#>@e9gHeD1 zgkT9fFzWLD&db`Cl#Zi!usDWesSjEl3d>3>$}SBUEV!ot&ioG$EsNs6_tnN60^VzZ z!^tf^_3neEQt6yN$r@*ZAOz_!(_Twb?0Mk`97A+dq>_>f!!5`gW-}v$i$lG$3Q1YL z>E=lN12{=@vB?DtiKO^Y0qVv;GeD?Ep173WWn96uH8QNx<$~|HkKgI&yGhrFmQIFW zEE5QOLy1C&t%#qQ@E&1NjKB;I2J+CxB(@|_;S%8B2BYL?x{M}B&IJDh?8$1p1Cqo1 z@7mNkxf%Rnz0_nr1FFnH>uSJ1MPQRo2O)2|;THx7wN9HW`q-DFal?n=k(3^BMcjBk6UF64`wtpXOF{DrVno<+awB;# zC%ktNh}3E~>M)`D3fR@TQzXbAnuxz{5Kl?~Ux0M!BhK;Cc@YnR0vTWb}_ z%&4G(lZ7V+z5)6KP;DSXT&M86XSHXuR}aUCN2plzz;`vmS7FwYsL~mWH&_!;)I3GW z*8Tn7*T~!mvObnX|Ng0~%P5IGQIPdc=R?Fw8T-q5NbAvs#m+FC+l@tJdb6Qv6&Yf# z9v8YFZ;d#aNh12Ydbj$)u4!3RE85~DDGS0@QO?4~F8Pi-M`u{jLK*fs!b~?RJSob= zZFOpKF!xnU%lY`8=oLfnPvNt*mgm!@wTBVEJIlN1))T#zeRYr7Z*I#j988)@8SYPg zV|}DC(a}g3kV{lS=UBoY0xmRW_jlNBWPWBF8@DiU8tGgTB z{_gEaAe9Im$Y=4kc2@%&F;$@I@_Ry|v4J>k@m0xlvB0}Mb% zh<9jw(yEte#1zt%ka!?X@;iZqZ^=Q`oGUuZB%g5cF zRE7vUO4{W029~)wJvU+QlXlVv7Mp;&WRSA#KNbfwhQ(IkpEv!*G#p`W_O>{YI+_)l zLzZ-a*9}04R1ap8mdAiC3o3y&<&E3IG7@G&(1jZJRyE@geroF&pd9fzD;IC8;sTVahF2 zuF*@3Jwp)$mQ#{ZnPIZ7yo72{Ns?Z>bAG!wrJ+zOVh)8E&mq$2_ig}+swN^=M&N(}Dipzj+}lq7Qn0X4ETWv#BeNB!aL`aq&nE)uGet9W z!D0VUat3||tUL8DfeHAc2 z$h*Q~IV|%(#~QrTn?7Bu;60PWPZ%6n!2qC=6Q~0LiK^r&>O}BzCNMAG zIkh|=!n#QfmpuM0A&6*LT?8MZa|l*+W>i6|y$&yG?%nRSjJ}^1N=MBbKy*Yi(&uBq z14z99vNRY!uwaW&ekM8ck;$I}bftvY(FYnD3BHn~5@2Tpy)FSrnoENzAD-(rDoDgB zskn^shYJ%^lN&_}i9j?ZXnlfUp@Pi1IeU?7Pg8Rq6(Y$>gzXym8Gz=47zHy{&a=~jvj}gJFU|Q5qs6jgAj}I#u z*y71ehr!||Scp%)XlUSD^P-)>$tEQ@Zr*6q(W?{AsK@uU9ngva!cYMN!9xlVMHzcm zm#jS(vXk3afmnO9zj?S_E{`o8?lwcph<3cc-r9WC+1lzOd93K^={fDmzwxy>nJ~Gd zH2Ao>q7%+}D_gc3-I&dyeCI71+hT?_YDdnoIdI}sng9bp-&?yaW7o$vx|i6~nDBor*R$4pi@+dPQIA^ef%g zp6*!KCD2Pyu-11Suh;&Kw0}+BVb5IA6>6{ZJcv#)@V(E`dY437JR!*c<8mIYk3<+L zKGfJ-aHRiW6d(lk4lf_xTkEPQdc=)cO5lFZM6d~yrV5sITA4aTGK0f~@Q}wB>tPs1 zkpv=q_3-BmgUmx#F7#XvCU*d?U)L|JyxX!puz4a6OWIdZpdNIqKPl zZ#Bd+=Ctb?scNy`>Q;mNXI6TI%9r)&uD1GG)gySsV4T+2G|)BXD!{N0lX$?OTt;4^ z?P^Z{B^>JbB~P$V+a99PUQYwz`_)p-G#a`i^~O9@yi^JUeSF>83Qe*{SD4Y*`$azSMDpG3b=- z;_2D+%*bFJV@C@B|H(N&-`~Z63M^nCKz5R&WB^gHhPDiD0n+rE8&Ou3x=Nx`BUw;h z{a_4f*f=g_Fc>e@D8L_!9`>b<#b0d9EQ76jwW^!Era)6BuNPqr!xZx$h%PoS9pddxgTGnmKn7&cv&R>t zVdC9_#pay7kzbAJZ0)5%<7WEMZevvlAO~+k@D;X-6&3(at0A6!4532|IK}=gqd_;v zl+ch`XDo_^VM#+F1&C-Qs$FHli4_E*K#OGNv^fk!k%EYaGs7sQ0H^?T6?LfcfNdmfjl zUzy-#P@O0e3gzZP`xy+iO6m%>j$7Es$R}qlqM;p-0033SbESEI6DCScPTP?fJC5E* z5`gOD2+<@na!n_fJ9ccoxIlM;C(Nf$R^dTK5eT=9!ZEE z)gcytuO483Fw8f2Kl{&Bu^sJiyc zl3;zSnWS-eET@8T(p0a9(p(WGKNtuNG% z+dlutNF-RCdO1GHxcOpaeX6Rxy?fqR^VsEonOQZhENU7_FKg8>LH6Uc3?awS=GJ}P z?+5!UmH)1_kXW&&1GDopAIIy-A$F1Lel%o++Y|tT2lK(=qeD{{zmcREszV2ae^d`0R zBUkYHRc}o6nH4MjgeXUc1`pvVP?lpj$6FrRsV;!YB9ZCgr|u3778v{p*cG1^`93kp zL7#`OprGabE6;jV-8^k3fIFpcF!{gt*|Yq_(!$Nc7Vbt)Rqbv&51)~7cUA|3#y`ub z8Um!W6Tj1LJX@oYnvBgF#%0Ls8f~&=RM3vmG*kF=K+-)@g-iYqYBrdJ@azZY2)qKs ziM_jNl2_=7)BLGAYr7=l6~{Gqs>K<-Ib4XmB6Y@Ng+WY1R#|)Z7a4H=YBn(sA4<#} z(|w#Z>w0N5bGDk%-;uA|LP^K#%B2(b;q+94)pYC1Qc0wWHl@-( zJReh&K>31Y3$R+WcNT18@-shl5}dwNX%gC-u^bb^u*$+<0+v6SV~0TBgZU(E%(UOp z)`^&9jm&MyQPhDcW~su73iDg0b~*_|2GcAwv@=R)`D&s_`0?zikOrBQmjth8{`T{F znuR^cFt8fcd3L(5SW52jyOb>-5u4N((qZrpHZLkI7>!dPowjZ7oGr#FHU)*$QK)KH z?Am$3M#0BwApXaP1Em5oTUgGB7IAl zC>tlYB#(??FSs<4p=wpbc#DE#YC1ct8M4tx11DrDAc~>o> zzjk8*6Uex8#nWAIDg2H8cwps#b|9NGsin!XPHD^tlzEvDdh=jBT*aC1y@olvVJQ9s zWz0E(d+5QGacKx?Q^@A_34nuF$u?PF8vtJAr@S}+22&zKUQ0X_G&P(bel`N_KmDspq|EL)XtFa<7L5BhS>CvUP zjR~Cx03Sxdn%sM=yv8iwlcJB=l&_=(Q%ce!S%uq4ksNX?NHr!`p zeSLlLRl6v8)PD|9RKDj`L^_!x=04pC5fRZ>g-Jav`M-|7ExtkQjI`eA`$_WHxM{}f zCf5tCB5{%FDYvb6GP$}sdY-mEK78r#VCj@^ej-f#uiP?4oemX**%1fj?H=W=O|#^Y z+3x!n2m2DOUgyRQx6hAf?1_P}Be%?U8>5YlsbYQai=L>2zBQk_6>5hU!Gt7v9}FT&afqPh|UqN4x z8|DF=Bn5Tm3GV$k8>)`nwWe|Sj3uQGon$oE3R_hg?*ZzMpy*Ws0%Pl2s)){`y*b6z&Yy=RsTCtX z*?67GOIbUT;q<&ynUL|>Bp9pqwQ@)x7+v}8YQP-Q(5gU08RZBd?JP8(^B7Lr{q#jz zR_Y3}?$fkMH|eDMrbAdcwC{fLvZQH4mj44HDTaHh{pEhJdg-!Hqmm(s%y$`1+;L33`~Dzh1%a?FYt&HL)dU2-;b~2%2~Cc;T0MdmCtg5-5ao?O z$_SzAF)A#+pMyVR7tYO%mYpRGk>E}QLh!MXY#wQARJGMlp$P{`VEHRjP-$<=`9bh? zG(1l4VI<8c~MfOlSdB2y;0Y#Mm6g=qFejnw;z#Yz0=%T+1h*$tfuyv`l-c^&sIu8R-YV zA#?ZgR`4K^TYz8!q@*Nx)fI=|!YrYwEElRWZ#d6fiBwEZ$if1!6jjt0Xt+a4V8I0g zq3O;c<`VqMHp`jjnAa0~f-}b`(r9wY^hC5j&<`oYbdu}ES$n@8($VT`Tce3>EOu?s zwR&15;CJ>56JXGa$;z`4U2G%17On(HUUqLY*_i3g;e!T!^yU%{CMus6yXLi(JL1P4a_1%tb&Up(tJ#h7 z^ER*1)fSJfb4NcjvwX(znVyg@S)j+HnVYtTqxxSiW91?j-Duz1xGC`t9wt)HH&;8A zd~PhBXZ@a2`nJAjQ1u{mg?(+wkMtcjF2?*edi)+c+wUJ=7;TXdv+tW>1aB#YUCRfz zSG~?K{da`Nw(Htnc9(XaX5rjQSuMh2SN6XzH_zv?ym-M)P*lVFSXdPvA4i`yw# z5K0$DMiVcu)y|VbE}{1&#;zPxciH0C_wk#TmzT!^w#v4lyOWO7(Fp^!yL%JrUc2XA zMap&dbI{AujvKqJ{V&F(|+Dx`nXBx z`+}t7KFn5moxXJMs`C4kvgy0OR79M8e4gIATXS+q7Dq3k-OfYK*%o}WF+TSUe%9aU zk9-VJfnp!2{++#C8hGq2t%?5EbhDnHjr8$Y9<5@2ESCnL0044OadieNpZzJD?}lCH zn`NB6QArwm%a@Is-`q`EL4NKWiCDQL)nBCRfQIN9vqoX?cYCVbR~+QrBeju)3%NkT zswkKVLZdF-L}SLzFiKV2kRCENoN)3rxllF9tG!y{ERn{Evxg7D%@vwQhTcc#=i_EH*1R|sO~00GRq{ZBdlH|lZYHF@9^Sa+Q$!bu zPe&@Fga7wjuwTp_XM%ROZKu556iXT7^YC@ zr|KA-QDymI2()+7d)WIGO}cb??+?C@=c{v=+l}ol(?UW8Mk|%)AJ2roG4y^*f7#f5 z%+_nDoI1?m66nAp3!?dp8!37iq`2NUePcZ@z+KMy$d4v2IrLl5!36K}Gv3>*t$~D% zYovJ6h&@ZsZ8a&;zY-1Sdtu|#Ju6DU5WBct4hTbue@+%Ha^%@_P;Kh0AqjX;fVrIx|0%e&b`YCfL!pVMX03OScslLL z-5E|Bef2R+Lwq2)IBEb(FFBY@ModGzXJXmdsss^sAct$d8_9#i#TADIjz@3^%P+`( z=_AFj;yEPX3hVt zDTn?=i+ntyzbie=%)h=mShoPEOJ|py2eZ-C4{*3VfdTI`U^EbX%dO?@;6mP4qum8Y ze|QH25Ti^vX%b2YAOymgTv53@LlUBZqs?WD`q{Yqy@gyJrU`+SI26Vyu!jQfb$nFA z$&T0&&l-8ZwTIOeOvhA75pCFqO(Ox#*0@h4WRJ+LL-?Me)lHliyhU%G+oW z(9)O?7Priefi2blGVtANotq<$ekbfYX~)5B88z=Mj0FzuC+lOhRq$6q9i>KV9Nrtp z(Gb29k0dg`vE?9*EPm|Th(!nmb5JIpxQ?jWYg2HBV?- zYihupl5IoM@k z(Krj)a|t!{ZmkSFJ8eB6ANNzs16Tj9N2>Q(w1n8gL&|CvP{ex;tKuyyAY*Z#=_FJ& zknMeNN$Fxz+<*b@`|vk_rqRLbv=QU1y+hqdP|rP!Bv~4~IP%Aj$t0a#O;oYUEq9%{ ztwAYUR`adu|2XTut;l5nRU({bb#z~+ezOHeOKRMsuDx9jv53z5!n_NsKV3`KpWy*z|rad0{9dY1()5jYE1WTM0h%o&s%Hk|-fY&pLP)qCEo?>;;F zJ@&okw{N*wdv0C3UmW|?a^rT|l<(3PSTW)fWbO%kN>$mqeLzL2fYaDJiB64~3}$u6 z`1$aJ(77?7p>_X#p*e0xlJ|aHe>%@_I`bLXo3zk0*GP8LI~f-%AjH4FfcTSnZrl{I^7nrS@z zJGMFfxOYr__k{4&>{|JkpN}|_ziWJK*+V9Vt;BXY-rI?B7rlx+$3lB+UAo@cY{1e` zPw2<|@Rj!XrSh%3Y1H??Pw3e~uK+G$HmpBjI?^{LSv+DC^<0DjUvP3T1AL-F0UG{x z#+yXm%h8f*f1-zN2Lb@`q!HHs<>K~-lI3+E6Bh#l-YuYox<;^x2+HYC)E53;UL}l( zhwro8uh4IRV_UobB6S$EET)wB(IL#%EC3(}q(oV_8~{M0m{Yi<1wxBevYAE|Kv6Je zW^KtF7yzZbN&)~dH8CUaU6=3nj13!}FpoSd2Z3aW0i?yWZy<)8mc};BJODT&yCdQj zCf2W9`(L*-k#eV1-)E7&(=uadBo)a`{MRG62CoZS@ObS~j;xw4?^s|)STUXGPik&jQl#oQ*5 z@G}+*ZF6C-*RNj>#sA7`XgKP#O4D!tep8rYAmZ`kR|#TiDd@=UKl6veDp!~O=anP= zYWtzuCRD3gq5V6z)8$sL-%HvVHqE!2o12G|dDBV!lWFw^FPkw|D%7IpU#JWe>}I)m zxSPGtMx&w|njkXkBmAv zoNWuc9d)f7owd0kwcRe|Q6!iJQ)(8neX|J0cQT1{%e1=+PT^rE*l-Z zeRlrbhu|&CCQ%a3oIAXnv_CDL`R+}Za;H#vdb_yP?Z%o~S=ju0h*+y8*>H4n@@qf5 z>$-G}D?;__j_Mj$o)yhFwm;jk()2gW{WX*Ll{G~KULS$g5LJk;Ep|gRPuYnYZYg#% zyZF7Q*PQ7?7{czpNtxfc#HkAM2g_O(9NVLL0x&daQP&U-4!QUEH50;ION_bdrn_oi ze;3ilTXdAzXv_ytHm*Q$TSxu;41)ngxCzXd57qH)7<85dY)s)~CzX|zYl{CmSG+dj z7<}A%JM@LQn?u}2S#KP9$i?koMwSxsbfbv3Qo;lH{k^L07g8Hx_%sV zn=5qNU~6RURV+r_(Y*ihpFx%UyOW=B$L&dXF`tp>zOj9s`#`5togHDhHV*WKxF9v* zvYN7#Pj1P=J^LD~NRt{Tkr+U-{4i2!sy4&4q==qS9@`2_yy~?n?pY~95qVbjWZ_4c zqP)qA2vUMvmc-5vb{V46*EQCm5S%@s{#Spt!qs)3UXS;h){UWF^eSrf<@)aVXiDbO zYk;&k+B~ItC$lAtY?~gzyh!;k7(6{bh(gsNneU(4Cdx+;;V!Yx zu_gVmX^dmb?7X=vC?rcBGGR*T@zDDH5a@B9t0qNoBnj#)2t&gL#0r0aROw?B^&~+y zi!kvZUBl?ejX;vpRJT`*f3$+$9Q}6$^JW;9HV{23EW-b>y!dNo5@|kyk`PMfC^Q}r zI$@3lD9=+!H`f*+%MTnCgsKxoXlj_m=)x9&k)n;^vZi6=;X(FjfL>;R0PH7v_mGQ* z6h{u=kNxK!m>e-d>pRdaUetg(a1-0gACMlZMDXeudKbjO{CT1$ynr0Y^o73YS4i)I zVMUY|hpoL4!}0{{Idi1{@)s^JYr1yMpyyA*0WLrD9-ustGyMjJiY;^pVw~YZ{0$CysfRRvn~q#Nw;$XjWoP?zxjzC)i+3AFJu^Hy&!lT%PccecC+a0i}EZW zP+f9+eS`D~ZiWUu-esB$yiEcZJ`4g!o!2pax90n5^9*Q#D|o4f7T99WBvL`o1&;#- zPJ$f%SW5O;S<^+`fXo;I(J*{E}vf|oN6pWNi{^E?)y($+oiXc2HlPA?Z~e=DQe0c@mRM*Jr4#Jp}%5U zHpIjC=l<<{jvaV6r*eJMdSf1f37QzPF5#%7!n)oEpm@^vKc>yaACf!$Pd8s_e7k`i zBUXRJ=s}^gQ4T}Z?#v(s zQYR+0qO$%l`i)9@lvV0VIhRV#f?3&c&a}Bi&)2V2Ccu{H@4xJx;FIj6liN6STCGabP!|W0J>G13L+Be=Hl5ooxvTziWYw zs2BqR^RAe-5F?K^>m*oa0!BfpEUQxzp)k!L6V*X_Q|poy7uTmC2;o?rW^cwB!ZzybnSq#(X#LBV^Tn??&z_t=+`&#toq#5bGaq)=>Ifgfg0fg3?}?CSe2O8 zy+E)0Xy`-pi^Sw|%coGT2EsZ*GulDjMczz$qGvKLM-(ta)~p&n-!O{Y0u?&E7=K;w zv&eI@?u&xfs*s67S$JhT$VH!{hv1o%z!}s?82QnVS`P5SU_(i^)XS{+6A~uZ4o67P z=o1-qh{qX|5KA<@smfnZrM0XnL6L**z)zqjHUR!GxbJ(gpzZkTztUd#C}yvWAa%NN zbWU~rfn<*hj!46A(d|AYV+vOrv-zQ&#uH)6M3qjWV}o8O{tV=7YDQSHuV!mwkYIrEBX1a9 z(#uuBD9tKkqa`82^P(XHc^%1<>38fq?8DmUS0S6Jl zNI53K4XDSaxDw!wwhYne4*cwxwz)Mj{>3=?_pJXQhN49H1@|DbQ2#~V z?;QN}_!`Vhbzh1bOC*xpJ3KsjK|WO|>HUyga7_F;g}XpfDA&A#2bHM%-( zi;(~NWAOIgYO-WxWOHwHhu#C7?G#}Vayqf#yRo;0`EQ!Oh$iCFnLUAZv;EdGP?O$^ zDbM+h^{<7hR$O~{=V7{CzP-&z$S^^)EIa%3B%6+OT{W(KczEcq z&dm4`rhQG&{XEh9d_?!$)#lzM@R|KSsC(^X^|zgGLseqTrc1-}4wH1Pll|3POfz{( ze%L>y24KRy&nSBMJ+_FXp|39C`~zgG+}Q!s`kk7yQVLbXuP3d7ht1PRb2dcL?S4`I zxfF3W=WT?PI@J`;(ZvIE^I`f5On>6a> ze|pKQ5tx%-i3s=eT6U?*^}$&YHj$|Y%B1JtV96}>sg>;rzi&0pph}wS(CO+gx#>gW~hktxw3m*#!`uLLh_iQj1%qBLCLO!#%y_sm4Gf<;O4Scd>qF( zkq@(g_@`*_`2vcvQ!O$pipq;-Auq|4L7Vv)zAZe3ydqIyu)N`XY=u!+^ru!+HVzZ zV|uRa371|eqoXI|;h}&Vb?t{kA9j77w_5rY_h*RXF%+qJPG#c(Vi#4p}5cYlR*)x2|D|`?HT4H!pUc70f2YScg1Bql2dw{`W zY6lO9VW*)FT%@2`OzlM(=zHh9xeOv_lqMa%;lcF;C^=lnocbqh#fSK}ku%i!6`PX^*`=;d*g}aCGYo?nVzA1&^IxUQd$238I=8 znK0Q;KO)+TjQbtGkbp`Tv>{{+=qQWy(BBMG%8`*lvWv0kQEsizT>iq)91rQj@90;EhGK^4;STuVl;1PIgM?x)pF_ zLc{k`L_#TBWCIA)XKVC&{9I7Oq$}60%_P3u$gZyLu!H+MSZ?f{&sU54xj0oc=ZIjD zdW_Qx^O`o^`AJ7X&OgfO)67aAXgsIPx`MhE4@`T``vAt!ow`-BBhnZ&lWXy%&MlZi zf0vBEE_;_e35jKGRA3RhJx9Frx3WPZxhu=-Vs|*eG<8h*aml{U@ndca zVgPw~|7kiTJX}yY>Yz%riWOLacDjUJ+*1^T?*R`~e-p7YyiAMn|i?jsma!-%X2SHvHfGyS7fZ*0+pIfp&9P$ll|P zQdHOA!TERG#Qd<|O{|3h+oqkFd;gyA9^KtTF8csNk@+(7ozd%+-06|U;_F9F|0%z{ zy``6T!N-30(zn*(`@}`JgU2^+>uX;_ubUDR50%JF$UuU(d*QdIFIl7h#~-RWWX+FJ z!Qn>ogxur~n21Y1HcNXytZbtfOHv@D#lScf)`&eQ+S+ccRe8w;q52s44Lu%Yb z@=NJQYGM{*wEwUvxgxYlK?n_ihgPq&w=S9l%N@0g=c<8pvvaUzoZ$wRQi(1LFXxM1 z7h>hSo_QLsXK%`EEoSt|fos*HqbSOP*)t;G#@D(KK6bPgwnhFV9ClA6B#}KCRc5)& zd(1jhI_!LKY-A63uhv%&a+O|%-3-X4mwye{lJ&F?lMoSM6@Zn%%ztp`;ZUY9bxBm& z5!DWKT!Ok083-0$nJkWKvu)ExcrRuhlF0>|!(I<*pYerr%$5ON(D$_(LdB8zn&pb- zC}lX#?V+x$8tK=C38KjUZmJ(o)9vfKP~DT)=0>uUVqC#!@b9~vhl9?t{vyX~iE8$p z3N2JbbUd#pF!DALOtpW%=Z_X-c*$&(v8KOfht{~QA^u4LZ(uCdmE}X-KItDV-rof+ z>niZo<@>+0=Dy#kv9SNj_(3x*Wh(n2ueNV&m8?etc&V@;^cZpx?5#B_*@Hh{p=jPP zPqK0pJ4&5(!{Z;gw})kk!qWprp^7N8BZ8Q6P`bx^2^!r$3?NHar?eSWNE6@SpjZ;c z6A8aVlI@~sixPU2EG*6R;c--pW}m_j1Y)XI1Hp53)kUEOO)6j#LBZqNrr!A~hn+Xc z=Anvqe95k0s*^Wta3U)$P{9X|%Ih4%2}}f`0E8jS2zrl-qlWG}e4autHwz4{%+$2E3*j-n5ygm{QR^a9X8d7V6yh`x15qixmBi0C=dGE9X%TvQV?=3AN04N* zhNip^dW)2xRL)5`>@@9??@M{ge?R{G6q;KV1s9dqYl-$GVubhtm;19xl#<{C_^<~U zQqv1A$xkDTA|de8({SYg&03Zbsn}bAA)Tm|EEdG$6TDu+Mlh8ZxQcG*!?hhu zndsnnP@*EvyAG;(F+&I;K4(NE#it?aJ@3m%Xs0dXr+7o^_la{qMc;hDLs7_uLrkT; zF6uOLj3~jQ^zjLbzd1P|xWRG`t8b%e40Co}aIER@s~BL5GZK1pg3a{t6cJ!@HVsvb zT;zKVZI?IkW_@gsv^8oTtWxn$Xhhl|1_*htcUO_)lID_T{@C@6(vZ|?^7R+~ael^D z1jjo%Io^+yK7noabwX2iMzK{Y>9=3_Bfo29cWWMw*`TT*aAAKl5J@Or&c!+Sz$yZS z+}-T`!raD^ZXN?fao?f;)T{Z`816jKP!B6hemzfK7;x|3b5r0i_0hVZqN2j4 zL?MGo{LNeEf<=Lpb->>DJn7%o)^=I;_Aebr@TJ%hOAjQMNz!+ZeRM} z|4gtCX#nQayX+beb9-Dr3~6Hi?t2sPYl!aprtMYuQT3mStHJ)MzJOt79_i=xxHh+t zW7eLtzqvb19+p36Qcj$%2!MY5`r4|v=k{RJA4YqFln;1LZEOBg0e=d}&2JBm&Ugxg zk2XFvOilQaynFuJh_&kZH^U;_DlqV0)(@N<1i?<}`o-M%{0Dh!az>ipu4|F5pYSoGW<23Jgp_+uc$H2nwd#75N8L3>Ks1a2E=_X;ndBquJ{{(ef_Ydp3T6g;fv4|7vurDv z27b-FJLy{`K_9|uoLZ9jGL=Uy4Nxi5~DNWVepBEB@r^V)m!lMk4VtV{8KjRyfeL8|m&a!g z<9S2ib&ZPi;7BUIr9qu7yYF+LR$2&AH6KYY1H=IY5;_YcqI-pm*Tn>>7s%>iB)x7_ zUa=&z*N#qRc7kMAMRNod#ncq1q0-S|@D$>stoFKm(yD=I*9j4(r3N&`>It!di9l$I zO;rh_2uyTnB<^OoN1ZN=uDLcVl7oM%=&OvxpT!7{vPh@enM7wQeho~fsg)cI{Ma!Kt$(__f5iR9 z$KpEqJ8;vH=l2^*`(!>DIId2#>e*RNk!)St$MYG0W&ppY#uC7|UL6^w6`_q4f-3#+92WhJOKEPKBU zF#YWycY=ss#oLd$Am}Mo{Ii_FWb|)4 z2#D#qO{4YV(RGVSQ8>K$dVYdsB60$pI7&G(*K^EMi)@AOR&1DG3rJrG1Qw$8)^$=V zuo--)OLj&ov-KXKM0ce;E_!|>llyBrLi^uFr$3%Nj3-7md^2x-^}|iJML5^=g|h*V zeRVGW4PZ9(3l>}{NV2g8+lEVJ&JgUq@Rf8K!Y5vfPkr-vfXwXk68 z!+!g2kAsD3b4>A2^ZguW1Gn{d{~bJwr^w^NoZ|`;MvKJmtM4@;(jDgj1%1Dt(SDm6 zjOOCD@o;Q$Z!dj2*nYFFP6%Vdx4jZsSR ze*-7|e^Z`V!|%wY?pFYzxwXju?6rz){@=At1j2dO)4d!WRWen1Oc@$;7rLzo3$$VB z-8?ME5(?MMs7MnEMZ^K0N>U7eoocKm>=q(I3CfnjeKIyG*zXv`r8U-NuaRG=F(aER zM&a63X5h^rjOkUF(~yTj(vVyhSk`Gnct*f-N{SNTp6`v^urM~hI~NJNUZ~a;DGIDU zS@tdxJ;)EJjm6IGU43eFXX5DpcRSKHmRWSdH{Q z7ksO0kX1pHfrfE);Mgg&k5}<1OqiG2=JKb{D8s6;n7JI*yIzcJWwNz{VS%Plh-3-# zkuvcCV;ZU`-Nbb_Pxktd^W^;>r-3P8K&z-n~-l{GWd|{aNLWrJn8*xw~lN z9|cLVh*p}Uz**hJ)~IIm1ZveSWGQ>^`7lYaXN|1v4%w*Q?7Uuy z__c!f61q_A!uc8(I|3QcF(Md)5Tj-{CnK2=Qm~zGNldJvv4!Qb1e~za=515v`+Zdp zWtxB6<{&Ez(gVw!dROJSy@lGbfU=o%u+X98B6xp)hKX3#4T0i?RxXRF>@+Kvw5YU|oQFe*Y9R{Wolwm{cSUT7Ss6vtd|aZr|IBo0+i zXiBei`)B1CbB75iZ97u&8*6E?E-WDys%Kb6`;~o=7<8qaU_;HI)~`*AN~t{)a1gg@ z;;dj8eW9rYrt0Jd@M)3rT+|>UDr@T!m7~{L0S4hoy@Z)u#xSAiQmTJ+XWjZfa0k~5 zq4-f|MW*YM5`rrSK~C1FUmk`jitztT^ZbGV3Vg88N2J$slS;a%vOG3wuWTv++fd*4 zn6SwJ%N?a4m7z>pO<%lok?{~R*q`&IiGB#Sp;x4B`asgSI_WOCerMN@Pk*{ugqcoT;mLVt0bjG8i&bi8`zadfz_Ytf13*d9;d_|%37?$vWW~qr-4EmH zgAO7d%+355h28wE@ZSW2sJjCx z((OUprY}UR``u<2E%0 zN2P?jwb4hDxV)AmKAV-@;hbp7h>_STKNbp?d&@f7C}V5U-w=Ll)H|o&%JGRf<&Naz zxJF`*H5?pF%WnSPhzW$StTOf-iR2tQX`FB}zAWz_wQ&?CA-opc+GbT?q;)N3Fj5&_ zMKv_&3Z;ql#Jem&Bs`u;a1~IGf!pBLvlV;`H_AKQ>h$;CIjJ*gtMBS+21a%nMVLu0 zqel$8U1u|i9Z-eLP~l3o`&z^_N6t_>_*2^k9Z zn7o22)R9-1!8xf~z5-$C#RFn6^jKo?WXUVfl29gui$SLjX2hJ+(Lqf6WD2vpGn&mr zxET5>Fnp_IxOti^{uhD@nQ3F{mCsAL_&HF?H{a!qVZy(0O&7JYxzvc)5U5!PBf z9>2F3o6!nHBsD49&*@9;ID?{pby?A!aQ6`j`aRw%3Ost#7=vzO1qeTPO)fA{-B^Ei zUQg;36stkyH%pu$*(4>!CISne$6)`quA!t)HN=aX@d+FbuZWGW=VMdTs421g5#?J{ zw^3pg?8f)(O$eeN|6S=WvN_1)dGmCK4*2laGoT`=wy_0O3imJN=s2l3we!#Dt+#8?=@QmpY_am*7sUu^J|HrC1jFA+-rSZEG`~ zm4u7R4{@@T)!r#xp|{r3Qh1Tt7!fZZ^tFdu7MB0AY^9h(3x7sa+yXjS905;H!`@<* z`%|5$%*l@0ScULHFGF##^o%Bp3QsM-OvVm=!GI0dsKu(}=+@UZHa831eMbr?x1?9rzP9`AkE2D`WN&S)ue)$f zOFt4@NP1t-UO2OIMv|{=Zkd=GoiMe`6VzvuD-bf5(L;SCQJQ2dF%|_q%f=i;UrC)^ z&TlMkrp3g3tU`<|t!}QKivWNlvW4KCAIm41!H#)T-1S>uJ*^!}3Jf>m&d^!SCeGcY zq{aOk+ZslK!XKpXVm|XoK0G8gT(Le9+rORgnFzxXi0Yc%_(a$Bd@w*D9rAmW9V35% zm*#^=L+Irab%W$1(!DVJs1^+rgNp(R1Mum`$s(Hvq&hdOBTLK4aZ5UT03}}f;ptSW zr6cIBU*6f-x%BA(NuYr$EQD32!k)J|7U%won^Ivemfxl_uYp&wH8ZKuyd;V;^$-0eREHjOz|1s5|rm}?$-Rc$j@3q!P+{Amp* zM--E1yxQwO9btdscqVLV*GKjWgEv3_2$1MD`ygst%P*IAYgM0-9PmZtjbXHKv}#hF z9H|R8L?0K_%TaYB#pjsE{uWfMO!OU&T7_8AfyJm-;Fo440nxuts8!e|Y~TA4O-l+F zSvU-QBrN7I*R^64uU6d6Wh=IA9Q9Hl%`Q|ohfi1$3!&=8lF(G=KTQXt3W!<1cAvAU z+@&YN_b?+@G)DY%{R!1k%KFIJbPi>oyhyrn6_TBRnm!l%Rep}Po390n~6g=C=4^6EVY{nx^;Y(fRRiG+WY1UN&>*J};s|qab1~PJRH%-tS zszCafHrtvS3-Q?;YK?zU;A4G&&j#d=e9jOWc9uzsxMtBw)Rx9YAe#98d#9HP1M>+P zE+Y{TtD!!om&rAU9iE}-^g~W0P_pR%=2_xVvcdR-i7YCp0BYi*S<*U>keN~@pXde* zEkG23(NOK0U3xXIpXkg_>Wa^U|5)N(<^Oii+0bO9OcB2nM4k{7vg1M=+e-2I$B% zZ*O8SQI^^su?vf1Wy-no1+yn`(I#^xquo zG8s^aKfm4ufF549e~42g744_-<$zYDyZgqG=qPWe_g7eI{STY_f}4|iu8HaC8zXyr z`_`BDaTqIZrHJ}u#{;sA9};ahm`JwdQT5ugv+~SJb8mKbr1^mk3Rjhq4EXT7iV*0? z*g2kn*r_9rZ(i)9p0V?s6N#0#mlZHZ=yYa%S z*@_7^ik!q!FxR{w21z#PQ!Rp7Wd}UX*PN~k)6vJsLI>re@w0n@)4)`vP1>V0{L6YZ z@_9^w*bLKp3Sun#?Jhcjvhu@x-m8V$FoO5m6boV|n%IN|D5M$;N-5SamR4m%>k;U> z7=pHqOCJ`+_ncl}Q-HBx%(P%~7v8)jj*K-+4!N|FAlq!8K zwoIP@YP=#-;KSxDZrpd&VW@vOA0}04=~efK*CdKeOqRB3GICJNPs(zF%7RuAY?auJ zzij9U*@tjcqhbxTGX!1m3FD1$l7l^U-m&Z?@zHaD;#J_I%_%0Y>=cQh5yjDL31E~x zMCBO`2$Y(;iGe700ZFFv4L6EJBNSm{!V@$9{i3&~m+Q!@wSFL7pg`jUQ*7(WpcAUd z!V2PZGIlwPpuAPyS=woAQ)rM2Gnba&4zOH^D^g*`Ns1vM@OcYC$o{X3+VTsADeqARwT{9W>{K$)^)iJtbaSLa2oZSmk9MYJ3VUp_H0#vUR4&)TWR(mLO(#`c@7=af(n*e%ZA9we+m^;y`Wq13mg`=y@Q|*)4M^5ff7F}n@)XUsO688b0 zmqWKNbAcJeZFfa`OQVxZ5a=cJ^Gf#s%F}jXxnN4CNI`GPNbGVJ43vJT0_ls_t?3H} zSJz8nZ35XoLBKiwGPeZ}rftE#T-(*<(A$QJME}$E+$2Q-yylS)v|awr1|)#8XY+@G z{kfmGnw^Y?Zk)yae$hk9^Ci!b+so6?0N+r%u!a!^A+ebPXKMGJyE%CRNuRTlx0BP; zS-@MGT;m`1cYXVAmR0IGS09&`tKfOb{po<`$^Uudzq`o~$cqvEIYCL!>+Q+ONt3OA z{zXB*)8e>anTZE(t1>S)Z28zqwtIWqp$zK)1f-5w#vnDOkmt#@Hutgu>P_k( zZD!>pL+82%`_7GjxqdKMZz?loT!NNMaJSvHM@~CFEMGu~5oFlpYxenL<AnaR)=J;ovMYB3s6Zn+4rLQlVH^u8dhkx z@Dig@M!cmAaw&Gs-e`9eps8Wv2!YxMDgEA~Q8geOh+fLh;!vRTql2BI3^`sSkO_lK z!a|yjm}tWOEX|aAaT!wbX~^a=>;i9_*D_isxbkhJ38fyJpk6};6(S)&oqK$%ak>2%v1tl1+!%tOS;RS?7 z$!BqVEUgCgM%3ZdD9p|~T?~^u;3Pg_t8Q#wOarl$?AWMlR{r8k(&?5b6(LMUKP~kK(a@6)CT6){e;K@VJ_bGtZ75YhPXmJ4O)0~T z+qH`R?8Olr!u8g8iiR!kl%Sfg!3?}aLiv0G1wVhHk*B_|$}28u6zLT6fXqQnc79u+ zLwH`ogs3cVnEORZN`57Zxbh=tGN||*BQ?n63E%%9QuzCZ1F8c^wvA_5-V;d<;`0^A zPq@sp0ZjoF4bu?ib%JVu^uc?kWB@4#6Ib`5^SxPboHUG#L~Z6{K}fDGPzn1OM-8$; zg}a%kVJAdnvZ|>>@hzL|#5{Jcz_~&y!%KB&2_+k_(cSzOe!mIGQNnlrx@nW3azeD0 zAzsTj^4J7*oG&n=It_$tkueYfg7H}!{_1hSMa-AKZsr7hp3@n1hpWs9@Ex2QC$9JK zR23-8cDzI7nhB68QPu$|CvhcgdwB3t;NN))vqZy%8SLB;?X`TvK9R9tMf9n5VIAyW ztGfjjPoJMH!=!=%1n=|QdOT|u+1Asw_wzVjcq_g<^r#kCmABiF8{FewQ;FW(oxTGU zXz>)*Pm4W|_QN?39w9d;8&jRrZ}xW21%ayx5uWf}fg+=;e}Dn+Kl_RG%@gRs!O5`n zze$UrW90J5NtgfeDzH`p=BCE)T?2>l1#3WqVRelzWyGK;d}kIIscy;)$jQkcH-Vbi z)G3V^z#unx9;NH>*`NDde$x>4XkaeA@?i6fM~w!wb>*uLJ1~wp#|_a04e8H)RZ3N2 zqqH4K_x8wG;_mb2fWLUzPF$clLU8l-cXELvXS*5u8RK zA|=?hg&!SwsXhv=GEJI>QQW-=Af)_4g`&ZZ&B|hJr(IM09{1ABn_C zP5UEDUc)rX)5qrNuA4bE9>P51TYZKpX5Y1CUrFl@26XG!<$1KSyTyv=&R@S1=|{hfL@uxVB6*T1gXY*K+^WTbG8ks9z(Rg zgNZX`0-`=cqPksEEu+%02;qPuf}2MMfi=DlrX~(tTIWZ6W;PA|lyaug8mFN@|O4CxKqPltKIz zbU#RlN9j`!RU3C`f;aD}>AE#6s78iez*_IYT_k^t8x}P~so@}+trF@>QP8O1`%`REuf?g2^3So}pL1ufqq$cAR5WN^F-Cb|d-{+fwVRfwH zLv%xjUZGlJ++;YPC&`o3dWT*H5r5R4pOw6;aGItR=9OKr{jwsellA!&9W70%?j} z72rhb&x_1(`Up92CUaD)F&ANx+^QpTc!!S)E7mZC=73^P4tst~H#$VvS7091N(Y_A zbBc?3BQB-jjYu;t2v(KzgFR(rhq_Jr;O72V#VhFP5(v+IT|R6Vd_7JiSA}rcxS^qs z?gjolnGJPFTR>1fFy$qpLZtO&X;?LgRR-!CdvS4rgCZdRcY<@kN{|ZU(`QGjnc!0s zZmw~DVG3t1cmM?^kT{~<%U>QF9v(Jz_w)7jb!3-p2D;FutAW91DU;HHcbEI0+D?H_ zeIv;Gomxu$RKn<>rhb+|M~haLEey6;KIf_pJ3vmni=1}HjOhFT$U9%AD&bsx^RVovcmED#SBQ=b<+y6dPPfdSVu*oxF~{y))feqA^B zc#F3C9_sUq0bv-6z^i}2dioVmfZiQ4s@`>)g!LZ#iBFCO_p}CWt6AL5aprQ44_1Mw zu^iAjU5;JJ4ePjWuKix_?e0!VUyU;1JQa`^iU#gZ>=i!r-Slj2eU&YHN`9S9JSUZW zI!ocN=#3(#v^%uA5fpxX`w)Ij{`|l=qM3CXayhyxQE1q3c-J+V?79Cu>CK}DySmyb zDm&&QyazQ-e|MrvK}RGCylh<|D50~eC4Y~8e9|;eV3J}UjF4M0taT}s_OsD| z^*mLK0B6HWiR_at?(5%>-Q7|LzJLbpgG9&)=fWEG(C%*KSgbEwQ{x9Ox=1)l2@zVm zig!j3r3bxJr>gVeTJ)cUrc$z{Hye%T~2Vdh4vl3-M>iUP;38WlCJPEako zrh^T)seFQ=(hM33V}FAxrz8KLGg-CZV!B#wb4vliz@YqCKIP78Jl}RAx1E}|8j^;F zMs%5~PX^7NTV*vK{*_ACnMI;?xU9fa(LKcD&-b6_KVG3laN&Ci1cNcH^Lh^+hp;py z$%x`;Zr)iWV+MeAW9-h>E#GW^%D3mW`@RMOW5f)P5pKm2%5GzI~@HH`OMYs*H{uGI9Q6oyn;!?yv(-UQZk@r0mI8M*AfP z(T2U86%5*Hr6gb~5U!Z@F^XHorr9zqqk*HXt z+Nz~~2`=-7cQMN7GMq%ORWkdENy(rCG^J;{%!9$kAu^y}G9U+RJq>)5#{SR`@IhN( zcY{VLk6Va8WXZu6!4Z{^fT5Sh;l-ft?|R8TX|_*~PHZ>ar_9SjDJE3<)CJT%DHPIUdN-?7IApgCo@81(#eG9yo-8X~o_CKdw@q@7M z&(Ftsc%(wYJU!j&K@>o-?k22{ruThyo6=5dxlmkP`@`qw!D%U+*V#N4uRacBu)fvr zr3<~B-9-Wp0lX1z$^Y@Kc%&b4kk_3&kDCBW{QB4L8jHv3jT_fQW0 zs?RcA^I*JFX{zA)yR}8+G0^04Fz^Mxm_Dk z7kk{B`6>-`nNtWkDDS+&X!Ut{ zJJ0*P=Jn)b3R!>f_fFTG< zED*ViQ${huhJ&9YkB>?Cf&|=zaJF+}#DF!pz66ovE6gwjG}wSNM7AX3BW2jJBq4wH0BP1pHS63z_0$<582;6nuU|aX3XS?gwGNt1J&OG8MGu z13WQQIv1~Wg5PK9{-IJZW&zp&@KNiTND>z4f@@oW0A9hjwPTh-IqYudU3L4RNTgpl z*w4?eKEQ{YKsjjzVnKrOjymEXuMQb&Sb+*U4P|0YR)S^uxpayJpDm=N{*T7-(`1Py zcbQW4J7L?JtA5VMOvfvU_>WgW)oDW`;IyKx`{9ZK$ClryaJ(>g2B z!G{EtrqHxcoC)jGn{}vnqe3C z;?d??2tQU1c#|d_V$brzw6tB>OE?_x>4t*^w_8!8kdxPa%(1x)u^ebMs6grz9h=q=y@;{8FFCAtwWGgy^A7)REwByTJTxqtqLoaqTw;UE(dG??9QUp4#%L_$sPD|o z*$TH^{BtoiLsdFtL*+%s;48*;Vo)*Rg}`jt>yUj8wjf%)LK7}6cPG4$5q!m4x}HAwQ2uvAVY?0ul3F)``<_q6FCsG2SI2XpINTVUwLHP+Q8 z@SMx1a88GWv7>-^YkV%p1iWj!V?o9syvz1DX&~03F^SwbV0v%I9v<#7J;5>}eUB6B zaBR3~W;(HT`}6zuKWo={vG6C7mY{A6yZ=tZdv0CoH zZ!bj^Y*#i`AD;w(0`}_NIeE|R<(`ztmh#;F%}P6fQ9Wu;>I-l zzqui`&r2!ch8mmQ*Ry>E!9|b1S>L|G2hN5cKY6T3N%nLERk9&Ea+qRYJ)dWDhg}QA z4G}2WVmFUnI}CC<<|V63ii;mMIZXT6k3zQ7(d{2RAd0f>m^jDZ>U>9yj}eGyR6~a* zaqshqHF4ln`6ah%(7yuz6BoCZwd0DDrdh>zJSmz4gqnG!oCY><6z`SDTK-j~Z`ibelG z7>pQm*@^NMm9^MLAQYo@IH#bGzUJ^BA+<$uMW2Uqj$uYbwxCMtN(3Vq*SAGe{6(y~ zqb>-$_+z;heoX>zY2vSlXs7A!OBBK(zZ}X#mApz7inh5ys{`lLl6oXUMqp2enLTvT z>w~YOog#EKG2mA}##KvudwjOq{G?o$F3AwDD-bjq<+P){q#B5UBN$|}aqaLOBfJ0j ze2#G9T+i}hH(@7l$crATa9E%>^eXq|%e$30^f?qOchofY^~!+#UF`{WeYi41tHf&t z^}b3+Ke(;L@D!Md)+pUV1u1`0Bn_Eaz-iiM-?gC5{w!>-^N~|sW`WaUG(zxGnxcwe zs)9pz9=4&wk|07?Lm;_#(1)?~-;EM%I}Q|KtHtkG%83e=gLD1mkg;((C7cyR>4`Nb zYxoAUt_1VaV+UEGGesd7K$_m2VD{A{j{aP=T*hD$hIE1VLVhJxVk8)(=q(hHRMtaK z2?Lr&aed@V5V{sb2B)%q`b>L3u1zQ}}sj!(5@H0AqBkg!sMX9xy? z=8RW|-4oou%P==72|>AZQ6d#T>pDZ?hp+$3rA;8obH0Yk5vf8TtaCZ|R?=W85=2o( z-3!{$?6Q&zHFz}qQeX0Z8|hiSyU6G{-MtJt83u_#2F*yB{wGxtHY(7VRaqw2@y-#} z8#x(B#|E2FS*g%qqP!3k7d(M2tAuoj_f)9Z~e!jkd+n7xKc4OHu%pE-~R^sXfKw*3A>-Sx))%QIfyIINV zo~oJa!c`yNY`wC;XP>okYinwzaK;2();sQZB^&$tUSoc=wYP^}Ex}>H6RSaY(Nj*& zZtebQZA49-An*YM9<2i+f;uOSaPZ^xf)ng6pm!)|r1Y&&kGI>+5QVSS!*&F2i0#YQ zKQ&~4;re!!qW|i9k_7K2d`J80|JYlQ3(Z-ZKCv3LxTZ>Rnhcu8NDAjCyhz$b{1H8A z1^bJJ{;j+t4r<}YT{E@s?z`6VH%51iVs^a(W;)vQ_lD@kLn%=9+SzvOl zmwbHi%$oUttwGG$b1_*U>f3h?tV@bSh|wHTWc*H6Z~C9ipGis+5*FrJX3V?NZNzQ+ z+P$w9^b134ZhJhdIFRZZW|FUO@x57IgydnBJY@zRtbFuI!Gp-tPab8aLbe zymucmIE##DubMb=(h35DRZTL5TbPy52-XT*5BvB&AM?~hoDP z=$$mii7;Q+t@i>y>~KP1qgBcPmJris!gP$c;C5qvEUvcJcW7{PkoqCrs(sV#yp;k&{hW4w!h#bw;&|K@Qm3@Z|VEfqoi_(rMw4kMBzmw6z^m5J)jI zpmg@S0KA9fa2^VkXi?JDO)aNGu6c7vnQSvMRmN@vO;sY~CFb_dAs?y&Ay6&lzx2s*sUJje%IbSm>1w5ExAg$ev_*5lLo8vypQ5-N?%>v#Pf4X zOH{an=1>hR=zhKS&{g7UFP3GuTpKYNgp3K@Muvp#u6sdIch(WHNl|ngG|g1f8lkx4 zYG-R|yV%Lnupfjd3(TbCH<4;{<=AfuK(bez+*)g__yzC3K5gx6!NK})V(w7@s~F#m zY9HW%D6OM0ik2l@+8_`v2#==Wz6bx{ z-hj8d*&yT3zo}TvBFUHK74@l3;Rp@A4AmNF3E(!9IVWaQ9MT4ZFZuwgdoUV;P$XU| zqoBQMb;e<*^!ro@S^_s}O3g%5d=7V|&etZ(_>_U4zmdL5qkw28+T0)h%2qFEVU|d! z;_DK9{;rR(LMn?d*JFc&CId0!|5)*Xo_{IB?gK4$+Ur+O2kxBY6GlTEZ(Z!@)~o># zDo-EyKWqsedT1MK3GPUjMoiiDB|U=$p24@?ZbM^P76{ zyHNmgcB)oq(&6^O?PYXoN`-$d@1d7$`JZ`T(2CIN%8LKX(JY`IzbtJ9TxAGu{-qIn zonSHQq`*QQz??dNO-KT^sak1?Pn$;igkROf=yNBLeWoN-sZ#hyTgIci{(1HWIe#o) zBZE1g{5kWx(t}-3>~k2Z2RPN1o4+o>dwPF9tSOTQ{K^ zt*6fVklGDg4=$mQKPGPkWy3TzH+nt1Pe!MKW{jxW;MX5~zBOCXav1HLp>F#JwVWXr zaY^~2sG$EqVGW(`yP@}ap~1dZyUUGMhwp+)3R-jA0|VisxP88a7kTFJsyGtPKI!ZK z*I$P2Zt&X8 zG%>i4fmE_AB>^~n!h(x5(|%CtF8{mZ)+P=Z ziiD7mP*+ofR|j9GziVh{s9m$-yp9&NDs$V%d9^me$S-)d@gh?NADLXTyT}(53uv3$ zt5n19W)2*GKV2$viva))+3FSGia6e|ba6TBsVWM(Y+Shkipztq9|Av=yxz27fAwoA zjf+!;IX~1YxhG+R%5aDbGY>}PQj#KCxvn#GTAf;}Cx$H?YDc7Igl`8)hb+;?w>RPr zlY(qN9011B>#Y^=-{s&P4uR zq86^VNU)$(Xy_E!>%+4|eTpMrI;VGDtLM~LXJ>0GaumOK^PHTZ+*yf(i);9nmt+w? z1=U@E>6~`>v#RZC0pkyBDjD71tx=+DqOxgv8R!OtDDk|@6vS^9qw&y0y>%bB)JUC4 zSVT;qOJ^qV^Si>Qr6M7Bs2T#u1J?-AIt=s;QS0a5#-FyP^oZAuX@wQ>)s;wRoaQ?z z#O-vrnQYo0!1Do@+!q$C^i0%K!a_jR4cYwbCmMJkxCA=1-NQMmYl(dE4 z-SbKj@X&Rltf+*^rGDEYmSW6_mT)(Og>nD9-}X3rtq+5s4!T)zL9BPgXoiigAeucw zj9^V#5SCd@EB+1!l8k+}8Z;83=-yTewLl+H(gpI((xK z+{~ke8F-~Mh{3yi8brzvMSMap@}Vz-46=ytrM{P-S+hR!GUAJ-$0$;$~N2-vRhXB&l zdtS`bdWpxV*;!MA;~AiH;)LaJY?u2V5-dF;wovSK`1zMfrp=qHAN9Srt9jd_r58hX zv*I3SL9QR_QZ4Wtaf$w1U>=ITHP&4>MW$ToT<3{EPiAy8;Ak zn`$1y*@kCyMJ>q^kK<{&50n2cYeWDY|8?iEDCBm`kIA#d9OyNlYsgAcFzt- zZ9aCWVF%Ex_q>KaIKe9b^kv}d_~dSq7{=)Xw61z?v05_VfIIsCMc}rH;!P3vK<~!Z z#>O<3^G2Wl(iWBTrW%HUn%%7Rf%B%o+a=Cu>P9Vb_iyb!LAG}ZA-Ui3L`9#5==M6L zMM?FvewTaG#n&jNlJbR!Wfscfp|gG={C(grk@CIFHh2n1Dq%VqW=_ak`aWiuZM^v+ zV=Llgf*W$}0$b*$Wo`E*Ni1?{Uu&;7RZ=ABCIkJ$W|q^$3co^Bq0aY`(Q`Cb)zIMI zX;ss%Jr=o9*0Xjxnt3d(y>eL7Q|jC6yUeIa%)j?4JX~s%oz1Ne4mq1C5N+n%kRPsp znXC9OV(<_AL^Z?h2P6HjmAokq4i4+nuj8~MM!MlbyB-iuN|UkXnTf&2RT14FH#ZIB zdB@g8&Ka@GpU+boYBe*{RO5=rcLSu!oVDJgyBDxb3uMI6Gg6x4ZQ5o?IRPwKd6j-A z9}o?B1-_PGWG@<3d{(ECY|>f-xWJn4ZzS3$)r2pMpJ3!xi5xY1(WMZTRK{{0f?Ztp zY_;=W`@%a)d2 ze=3)ykRHZ+qXKsC;sn!!G>MtlvhfTeHS%!4DD>7~BL30g2s~^$dNNDfPphN^fUgq+ zL|QgLnUkp^vsgJyhT3#*FkdbWD%SCtN#UZ0#{Td?!?)y=pBF|%qz54Rbx?A0W(TaW zj;DS)0XC{Hg%Nl%`RT(npYw{|GLvVsnkC9FD~K7c{CSC~lqUmc>0p)pT4szA|H^Kl z$*%{V;{hp7eYT&1$cOmyMY69~q{|D-&_b-Os=jd3(^sV$PnGa)Zv)lsYT&)*2n8Qx=pH@ z>wG)X;hUrkH8HGTfhZ8VAkKA`!mQ0nX@NgMq=dA=vU60((_xxHa8ra=U;5IY(0jM+0 zWEauV%F12;=5TD{SX=(P($R4qPiA*pQ%{#*SLql$r}chdG~29Rp=NQ_=r^hC#{oYW z!zlY#Da+bc8ADqfW>Hx0&1k{IeJo3%fx%xgq+Pr~PY%#-(h)3koFL)WkrztE~wO1lpg9-0zJTtoxx?ECpKo zJR@`1EO|E%Jr>zuRGOTkQep1AQ<$y)00M;({xUR>Z0mde+ckjz`h~Ij^8qN$yUYrG z$VysU@aeAgY!;1pm9G%Fnx1{xqj@EW{D}?%wfo$e106iGp%-I;z0UFV!#p9zCxBl9-Ewo#&yOpX<}3 zDA0mHQ3QWJOnO_-;nO4w0xkD?-nqUk^!x1BF6rs(s}}~w|1z{B$N7>s`#Mu425%he z^YD6DR(0A6IH`5YqoLgyOsG|>m?yD9F{!qxJxVMfiaiUW66iy123p8fP+TE-UDnN57HM9UOS9x;=vl7*OROt2F*nYdb92{YY{$naI*h4|1Cl^>g=ATdr07&=Z{OCm-1VM3 z;j&`MBdrl(6WJ$PjOb1EnB48@J)9^!x%8e8yo3b5j7KX|P|)-QJYmrH-Ll(ECS78Haf1A)70-B;Q#X0)mFR*q_WwciFf5ax``Swx{ zRUnqA7E?VC>yEQ+(v;6+h}nJ3V#(s=-+-ndG5)3QDTeheQzK(zd(M9l zAl*0GF{+rO3n;ZCP+(|>u*lSSTwK+G6mf zTRVeG>B)`X(5pR3Y1~-p-o%h{9+m!UFhgAZ_9-#<9rpkmmd+ppU5qA}rvyqV+b%7g z;j9f&gM3Yl{tu0py$N^jmbLhuDkB#!1sA0ZWo;#z=6o78uJU0GdmxdFjK-nLv;If1 z|4>20a(HB1&7;jf=s^bjSRCAlfDrHQHb`r4S&%2M`-?KNY&83xg513ruY?s6E39i3 zn~{PS=Jh@~```yY*rZ>|mqiB!4tLuU9?peNQPvO87){D*)@e{-vyJ{`y?? zu#d^M-s|gkS}LekWWZPJWJ#GDU z^=EyOdc5ym78H6s(}S{Gk~om-0|j9wRRA)s%Sm1G>EMU&=e0ZLK>ZUw zrPNfQ4K3-y)rL5w3ks|Sh%c$ZWf2q2uKkx9uYRqc`d^aZ6987@#fk;zbbzwBZGL2C zs{UoQ9$VJ^_*duF%Z6+3#lcA&i~jh4pEZlsW259lbm8j?us&Pcl4xu4@Cr5sF*Edc zc0Tmdgy1R`K0nXPX|4y~ee*m6VC^`l5%nRbR|@LX#`bq03oN(op{*BJ_~O2xnPh8I zz|nL0Llg>_E$&p%EbtFlNz(fwb}@xl);ahjfyCFxXH~kr?d%jgI>O@@tSu@soCw{s zX|`Rm$G7=?3pCu-Ta+Y`s-gsXOpb?(`{hsAe^tbC1-6!0Pds`l?v@;8JR!-k*8-nif}(__DAx zS~~Z~yJZwh)o+3{f=e_TnI31gViWQ=T{+M}Z}IhyV9-1wY( zyA|C{7;-AC`9SnG;)_%gle7rv!@+Ow-`T>(>da|p>z0c-N9GitSd=gk``dc-5|r)I zHcc$ct|l-^R-aBqb;Sd9^!4;s7gDPA>)U)_pYhs$eXWO@iO|T9`r}245A!KutLJ;} zp@A6D0*Om4y*{{%21=^5IIV7EUTiJ}ALg%VQ>GkQV=`Rh3;izS^{!1f-hZ{YPYb;` z4CviF>VF*_7HYFU$f3=&=H23qXfQmyV3( zd5b_A4paX=QD}y{XCp`<)2)iOr$|__P6#1w}Lh7+?4oZ5V3fpv`c#7Wx+{T6W5Ke){l6MJlE`Bpo8@6~Xnw zgZ{FxFh?P@a0DjCmPK|22{t+liyJypWp>2(nI`#e!-(#Tn&^s|cu2h9M#mZxZB1ky zO-8O!DUA;Zm{3QK5-n#X8{H4IF$=eh^n_PDbFH+~(wN%o8a2`=@qO|PW~__9!ANLY zNQHMXyXVV*3H9|%}-^Z?RbI|Ql<|N%ZgCC7W^;|JP_fK zHTp2r9$8T(p}Lp@nU$k4B%VUZe;tK?o`W7gJiFzKWqHm?OvhiUSgY_OxU(}0`y#6* zW4_=%&!~*b&*7fNm}G&IzW-tw5p|gkhCBa`eJqre*yh7685kw^Vpg5!a3ZsE_9`ch zK(o&4-ubjMY8&DQr|l0lM*tDNeaCL_&k~g^jD$3k0I0;;@VGtLp3*wGDkwd@iWAO# z2U77*S#1FXElyR9Cf3y$*Uy!588P^>79iep8}Zvw;r|91EL8;}x4C@a?}sY`VgIGk zt7_}9{kWV7ge`!g{{sYQ0FJT$(An9kwx5%evJ=K08CEuBvXtRD^Q57x+j7nb1-u8R zLRRrrjuTDJP2GZ|SkT|W_aUf}z@z=zQk`iqEW?Q$Qbu9z>+4(Pb>&&TWbXf(6&KR2 z{1OuCX#w`Stsfc}b2bMg@G8=*VEynuAm9^)rzUx^ zcMF{2r&Qtr*PELgG!j?USB?OI`@P3a?-Ht7dt2M)>kdny{{b1)zdQ70^rm|%sF@Uq zP4TPr-dQ$m!R|7=vw3S}|_}%I-n!VIlqDQ!DOowyk zt;_ndP``&W`dTP#>y??Mfy>i6FtFR>`5aJ_R*K;M^{XBaLx-H2zivT8L%pGQ1uz`{ z%WHrYV`oHT(m&tt=@b&uH(&5HW!HbcENe%6K!93_z!L`OVvn4m=l^{56W^J2T~|d# zp~!Re_Ft^?^6K8?r~`c4D($7K=P2K;mF50Jo_0^CP&Xl;KA&P(;L^#>Ou}f)IR68= z{745ztKfPrtcJ?)nC6UWhHl`@=WD)YtWHXpEWbY#`u!M>!>}qMdRUik8I)*Q)Sx8$ zas1xH_M~+1>TDapDl4e9y))lz!fZ|o`QyoZr3`$*p8je|JfSsIZTkaBV;{vWgs=Z- z+$C@;@n`uTX%~E}a!K&n?49%NH9f<{^5itevbv$WPi(YB93pUHnF%>qZgpB39pWMn zaL+wxI~6s0!?s->KPC%1U`}Z*TRG7Q`SnXC4yIEl<4pa$Au14K*y5LD%%IJL{dPNE=_L zw^POFL+*+6jxK+#mhNt)qH|NR?MEN?4}CVDOE>1Mr@jn%1Unp!{{vP72-Ny2DGj3?=9d%A*0<9KX5vC?T9)tMp>*ObvECX^$T zDzkSQsr*LDaDkcbwz^7TEQ!Y^LRCUXh$2UXr9p@n=@`bpJpP#e_sQBxC1(1ftb`7+ zZBPUq!Kyz`z##??k=4NIYn8^KgG5QAqdbku6;}X+UP_bbFtw}hlhvR1kpY?(-;S0s zz=l@Jik|uok(W}G+luL*CpA0xlgL5I;YU z8MfPap5>H$@}D5cpjvO}B#2Now;1Y}kf6x`*2FAu852F5o^3+|k#BFpHTkxR7B8+* z`O!d;Q0Pej>y0%}PQ8<+|5I(dESf`Vx{ZVLuT!7q8WDI}(bL{P75arA^&wlNR*sT6 zDxbe2|UU2p4Rs zRLnQ7{6p?t>A5>NooX)*z>knX^z+f&1}{es@Dfc>J)-ay}*l&=L*Zbsx!L#UbGRah|-H!u=yxa}~!Jjx^lO*pK%!>+g z{Qv$Nu~Jx)MO(0kFJzf@aZyWN3=_n$%zAD<4ra99$p@OoQMA|L;-G%T`N&Vk#H_yR z*}DTDS_wrp&a}BJSmp^V@~jKo+*dZv$jPwj?$-GDAr~awae#-2%4Z4uh%sWdk}V@8 zl)rvZ1oLuZB=8a1S;~Cn+>6=Mpk<9grJZZokZSH;&GgMNJi6))x_u0s+H`swynJXw zBvgv7#K{ijVTW}etb5Lw1=hmhcNI2=>%;t6btW?ci=SyWD=uEojx$Pjagq++RG*+~ z=noR2=dBx#hDm=PLlF*}WTJFNnsbRWjaya~ntvwslJ1V-(mZW4zEgW6iM_vS zOtU%q8nNPhD+7OE7Jy0kxp{ecHHG5IB+K#UW3{F|h|PZaR;7$|WB3QJ`hoIO?w^() z9X)M*l7kOH)YD@im0vo|yDQGb3~W9@-~}w+Ed_dy7Lz6ZCwBQn2Jb{YRvLp8&h8rP z>Gg(u_)O?cl9{<78?IVuT<{4hj?4Srp8v}BOLUg@BFeL!b{QN$*lIFyULuBnIfx{- z>XJ8TpPqVDh$z^>-`gFE&OrA=1qdy30J)iXJnHI%%FJix#)Nw#ZxqT=k@l+A>Yd}G zCwS7;C+M0uYIE`L7ckcxi#zKrT}`}i91r4kq}OauQ?HosDY|=lp>1GAib|B%W@dB| zS?5bgT0Ob>f^yOla$?I}kon_{cl{D(631vqyAk)5?$40^qPMhaBtKbbF3ZW5cB8!6 zqFok+ux{%ljH0VKU*$sO%meEc z^pe=ta%7FkEhx9AuqX7Xgi1}~Fny~au+3Z+I>i5@IUCNcOQh}b{(T!Vq>R`_74I#x zob_=*rJSWB@^l!PReb!%cp+X{43d3b6CW((ff!3a z(u|2J`_eh#v2XFTbj+8EgclQZx$E!Vnh#fzLBXk?!VpqRn>9Q}T9VpXk8@%aji;kM zMy->_v+;>?_{b3uq!_rVj1hHEZ1p~))e7)Wyw!G!G~&}s{b_*Ft)QYc)HZJc4B=AN zu;K(K6w=iEtk(gQ%38k%B~p3ZWo8kMbl z%Pa_^wsS5jfTd&mXdn$4KBlE_h440eu+tAdbk)6-#B&!8r{;vdM$P(kZ%97hgm(7# zo5ODNJ(c&GINoSpiJ3R|aQyI(u`%@ccJ*I*~*Lf=3qcY?;(jx*Yo_(`*1z!b+3E#DeZfmd4Gue%lqN+ zg&?CDwZi;=Fqi@~rji{SWX~CAXz$S3iO)6CVBN`(cVy0v#@k& zcsfPQjEUtJi)CS7+wj`}Y1(pf{FSz2}bMDl*qvXRwP3@X2J4)n{W75N>#8QZrG z^6I#exEuQTbnj;art!yvtPL3s7^Kr`!mzUm*ScAB2E_x0wNXig*2%eaZA3G#vs zzRU)lHFJi({*XhBQ`kRW>=oS|(+8LZR(50vJ1Z_Qn%76kr!n|BO%2D5ng2d7aLGuY z&um05^a}pB|9r*S7qGvcF|@hbz7+`jvnvYm3gVLxF0_m&4yi&iHl`q{ljv@29Lxr4 zQ6{W>&oGOOt9H-lo?9Yk&zyNi=Z*$l?k?SgClBnX7PG@ zdh}1RfCcxZ+@)D+QivqI>0M`_f9ohg^Blmk6a!K}zGfQzEOl z1&n%s?=Pxz6F&?bzRDE{U$Ge0CVHe7)V9{JswtH2(&eOxQ>zxL#9@(zIU2}dh%;7v zYZhx}4iorv{4PCm$~KkZKr!6EX`(vD5x;2xkOr}Qp2W?vzF|7A1WCZKV?101lIGgp zNs1a6&%e*o&pMtT`R67=uD;;*(P`x7dC*NHZyHB^rw79|^ssmLX-M)4xORIwb`qal zU$zS(gbJpZqsV;Lf0em;E-x8YHKyU}v}S^c<437r#b{$$LV&Sqck13w`iElNW&XUTJ2qvB2>g39YzK_Fi zZ+0f<8cxoojKN1p;ls#o^rg3yLXA|z)-nZ!e=nT>8#7Kz%mtmpaXo5@rkXYf*Z6S;W;h;)e#($zm7Eqln!Rnu7-&y~3jH^$JC zf-A6ed3xR8Kf|+}OO+{peS@Z|LDubab?q*Yp(AE3GK6GTX4-SXN=;?5glw5s;R( z^LuPpPa`i9K!+FDNjZCdOjgo`_4wz-94p1f*!o=`$bMj;f5#d*p!*n-=5mv^^|8^a zDBvc2-`Hw>OVoW%hksq1FomMN5BSrCtE)A0JbefX4Gw-OkJCTf>hI_KD6j6a1+&Ps zS()_E)pbJZN;NlxR{iyUlOb>U;s?LJPA|U$+)roIXZ5wU`$iTKC94ri(5JPn zKKNg2@7l?Vvp(qchxq{(*7oht z*vRFv=ZSK}42B*Z#XUMA=o#>b;lek!Ub=Dqj8M#ebdg{{oErrik#4m3;Ya%B8^Ul( z!Vy7<=fS}X1bJ~02}<0GuN?ZOb4NFNf+7Gh;p~|=>q_h}+s1G)horaFB^U-gUp*kc zGuiwOO`KmD-}fz?lGWiST=?E8|HcR10h?DB>F=I2M2Z(?P|PD+rWTKZ=3l7a5N0Ch z5$4h->=$&%Iu(p2aS=b~!an{Mq?>_*w15z& zBAef}9gcEtsWtkU^2W=Ck9;H8&+X_J6HGnkI9rtIpRE3So%k41)7Y3Cj#$OoIg?I< z?cK3bJpG=#PJ@1Jm4n8$Q<-)`-2;^t_g_HMi$Km|d5v$a@=Clxa6~!7*6)K}T!|Qg zHzqo|{1n&kKeqYkcPaNi(a^+4R5yIb+EB^qJnAZ84an|BWvKyN(a5QQvf!@Ncynzo6#jQ#ifjHj6Z{)bm%a4Ql zJ@08kL+1dAHew4<^C;e7$6By-_aLqv;b@?YHwip)bjsnsDi`5*y|x$=%`P7%H+`!; zbsI2T?~&C&D9Ic}K&9V4N_z(kcQIvRS&@JJj6|{#JfE=Cs}n*G57lqR!P8R+E%BN@ zGb&S&%T-WWEIK?cy2VwY!)!3N&Vb6NNXl3?VYT46^dhCbUtHufK6M8JXd0_7lDtC| z=?F^SzNk#ql#Iud|E6zD;^slXhaza5luD~Kx}%|DSXRd4VEs1QOE-**7gRne#&2UP z&IS_@lx51r@9?04$^{bi2J!y>YiMKO{8;<7<`?r`nMY4ZlaOi5gDRPP~hdJrKi~K)rvaD8iOW9&rZDJj=?4Sjh6c+b$odWIXy>} zfwqzkl;1d)js`Lqj!90Q$zQ3`cn@ZihZI}d?g=Sg=d;O>Kl0^%V+)J193TD|INoZb zAX1>y>MhHnkam}SPnoDif%HjL6+fP`hRQgn{D!!zX0XszAhajcisOTksx~xHiGf}d z3B*_HEKF-zTnr^za7ZCZQM5 zh-o0QGzKXu(jX~V9$~@!oZv6 z6-|H?ZUYVz77`Hhedu%l>q_Wr=(9RsB!FtaJzl78o|HlJ1q$P@ z$U=`+on}q^4knH#jcG!I?#{XK3*#q&Uj~QKV8Q;oy81iL4L=vByCA)5VwWU%=?%gvE1 zBTheq`g`NAb=*4F+IS#cWQ^7bG*+0G^Rk#~hT;ff1QM80=0d3I8l1{^188apvmB%Y`En&p_F%8xjDhl>j+Q+b^p-4R3s_O#R#RaCx; z+%mM$pp=|&y(`DddoQ1+nDjRY_I-Q>v(5Q&GamJMpHN0~10;B_uwYhX)ZxJfn$eewDA8rD3LG^?4 z*81MZX33j&gfJqA6dk$9yq+4^Do|UA*sw)ip8AxSNt3VgEl;X`#!L*dqML2FVpG_4 zs5EG18&~Ri!b0rd=Wbtf>dG-OP%I8z8)-Xb#h8kw73 z3M40m3Q=W)p`j^87U!yD(~p;Bdf`^$&v-oo$xLGH^_<)sAxN~6yn;O%yX_txAtQvF zsU*`83ZDCwJ!BNfh?nw(*5b=-Mx?;Dy{$Dwv8WQP3!IQ6(%)sl_+6x>uEuKCwyN`f zur(BkP7gjPyN#JH6@K|Cwg z4F<@%O2~>!TgKuWE8@pt9vNglDIVJ|+<2vpPL|g{jnc(LRUZVb*%1fSv!1PMP@Gpz z(JWNZK-{E^)}lO2>lkCC!rsiRbDD$TIDQqC;gaJ|X@eQ)2vh#gzlE){_eTG2r%md| zRHBqr+~fc)#N;X{&4!m4NU2bdR8Cq5D#CUViZs+(gCuCPT*KojRk0xESge7 zsuyp5>$v-2K7FO5T4D@&d<+_N^U2y)erC#9^DUmcO(%g>lD#fH&F!C{P_f zE~h&qaUvMpqY7gRPz-0=w$8S>Bd?%nK%Q4Ai190mLlYivQaQ6pq6L7LJ`RkWfLL(~%r?u@JY_m{>IZyNu zkc9EVIXvrM}NG)%rf~OuP6b4QuM^B>H^s z0C2rAr@7Be@csiQH5Jv%--9*7%(vc;MKAyCg30vsEHAAG3)rRcf0|5otzAQZ6MmOkCVM7>CSId21l z;DfKmk^xS@p&z)v?m64v(b>oWnoqmj5Zrtu>Azb0|GRvGlKA*E3}FwnzwX_^ci}Ig zF{OV13R%DB!-IPik8u!y10(rX0p#G8`!2q#+yG=~PcJWPy2AXz#9Q1dhvb%VQKxF< zS(DBV|38*|X>%O!h&_Lmur6kG3!kJ79b02Wn3|YRRF^i5nW7gAv0x(b@>C?22&Bp` zUe$deFQBJK{oYk)vWVnNP>>uRBjm>FzPLB_Qj^PIyTa}mRdAD`V%l_ z?G1|cKeGH&kn!`#Yx%=GpYld49_j6wcVKU!Gc450ZSqsh;~OgPpBvE<-Y~=1y(ad( zn8T$mWiv+WMu%bwS+v!L9DXeDzyd%Lutgkb4Qs3sI!3B_aF!?%1RbxB#_I>X(AZ_! ztW-Ahy(NL#Fs=||wZ#=+CZmuciUgWnc zz#-i$NNg5OIk*W+~mu?S*YNL82h(;*IL3C zO3kEhG!?%&8ZO_+c&Va+2=Lm~BAWmF%8SK5;rD%SdP=(cNf2RVK;>u&KMZq~k&wYs zBc86!fjmc+2km3pAY&sl`Z7Nq7n@((XiScj4xwVo&#M1Gb084#9#$s=c@a~KrDizT z2>CwMO8sihQX&1AC8yh~Jn|_3CXIlgx}Yt3c9GL0Th5XD!9HykG7J{=;YPLFPqAqr0D+xDm zYXN!%RG16QUj+e44PQ%3YzQmQQW9kg589IE=a6Zlfg6dcq(mOT$p*TgxTg&V2H1pD zBB-^vxc8NRP@HUh2Z5U4^*f1h$>*R)XSgfu_T@<7)mQQWzO?kxLZbJ<>lfikj_0<- zOYAE=Q10n>9BcOXO;#+_5H=*xy~m5T68S>qbey!RikH2gCmdZ)Z?+>-)rB#t+V|j-Xv@jh?5o z?)IYfL~cC>(IBt)A2RJi2)fps)=28imxV(Mn>j{Fp@!*N*(`AvYAH%T=PxQzmrY^b zf$L(q0TpTf`TFuk!=wmE+`PI{mAP@GLSHl;i+h?a-OmaHA-v!jLcYjxEVO2NfHqfR z-r=H|b#*f98aF^+4pYA>dcOy}9`|B?@p({&I z?v2@!nSq9xDCRL}zq7kbzDEGIiGSrk;zqs~yKmZ`VO*=F-m#n`W|tJp1A52QRq5tMnPJO^g=g0U{J?emMC{ApX&N}UCM zIyFdA;7_Mq+gYHQ9pWaZN$A4$wxU!Bs0Ylc z%+bY=s>2|nuk%8s|CJ1Bt=lEFGW~jI8ihi@$xgR}g;r`yO_F9WQUCEEn-^A&wc*k( zX-~YL_D{d?i3VN)pUXIpy2VJp4Io6c(LjKp*-bC1du;3x9Lwh^dS@zd+I2lvWVIR^ zJ9g-3kyaMcb?qJXuP!k;ww4@4X&K5Q;~O*kSbCJ{`-_;lnzGw%B=ULsbt#B9^OR`M zja&cYTHyh#5UpTVL$eG%Bl3bWO&Ld&X4&RF2(2uSZh8!8kBa_%%oO7RIHe>UF(wub zKUR*w!UyAfzKkwEN*j0TCnG09tfSuc1Vpf$#Tc^Uu;x5@0yJ~D0%I~h`8EoIZwd0A z(ex-TZmMAYtjM-OnJtG5P+%Q6!sUxu8UGsLSF+;?BkAhm(U$PxSn%^PaDij<>FA3( zP4T!59a7`}E9KU7{*R_A1&14>2}$vm#IgM{wbB-y`|K_DPY^ghRs}rcAwpT2L*H4k zN&HOslYW5`Ex*zj`E7l8M{7^A^q(H6$d(>z?9vSKXGT!=c2j1V@af;k7^!O&yJY;0 z_)iGMZ||pg4&>=UAb=xX=&-i&C(2Zsb9yTG%``TcDs?;ILF zQ;=bxi*~r>g`qpP1(t0x6MKH$yZ_rw1C@|WnO&8Pc= z$;rvfvop|zM>Jq%kLX0x5C(;jOCbj!)4Lu1xi~wM*93vQsCodHdq<&B;{O0@L6yGK zW9;|)CbliQZ_v`=UAAR(}yF0Jms5bHXtLGm* zdZbo9-k%>-^R3GlZan);!Bp1s4?cQt_u|g4{Ms+?uI~as0N6V?`8PM;`NhBXmsj@Y z$H&Lna(~d{@4WZc-}&GDoiLhRIe+=evp2|jl=#ujkKDukpZxsiFFbRjcJ6z(ZoPB! zCiL?0Xf}NKh(vl~OTA)!>)e%V*EV*yILnwZM&)dPff|WKS(Y_rS)PoK6yV~{dchhd zb&opxi*huZ3lsvtOaPWdTC*l*SzdE3s9onoN(N*=q3E5f%!x_IBO+o2SV9wdQQ40U z@5UHkx^Znd9O}_zF<(498Y1FyldI}8#qzA4-Lg5Ll+!VH@=-l+*pQ(P)d{#xr&G|v z75<7YHSvp#DUa2Ae{=NYbb2~}Fr7YGl$*l*VSPT>kHFvclQuL=~RW4FFmyC}7Ia0JC-1l|^?cG&X5R{NDHSIxF z9i$v^4s|4lfNd#rRvD$~1%rV?N=Rwp*{CKEiO45wrIKTpn45&CqUKmsF{<}o)igwM z#Ecrr1d_Rzh}zB&wcZ?XYM3~pDY;&VKvfyR%nX&8OvuE71m~PuB%}yr#17MNfdCOA zsHl+C(=)1tfbb!8i)uv zZ)ONlRhy>ncDqr?ITx#WRaO0!8JhL=b#h(ry;w{ob&Syg zt7=(JiO4uhowgXG&s_+CAkF(C006OwbeQY9o-L;RVPDGn^z?KXN+k`8wLH)7?CtFj zhZknEvMf(4omcC<{YNJ!CpY(Qk7fGg(Ve5CBPX~qSfyT3*L4jLna9I2)SV(oS(d?J zjBzP*Wm(>Ra2FBPck(=cZom(p?9ItSQI`n3&R18@J$kVJ{qKMOCtki$ zh55j(%x7?Xe01~X%|HIe*T43)uhpP|z=0l~9=naLrpg+KwA9%=e_ov{DjMEBINqNx z_d2fJRn^68RxK8pFJg?5eU@eA_{hzN4(o?|cmM6T7gbgL=+3*>zwpv)&%OA2zxR6^ zYwM@SClBs?yxmLk#tQrB|;(9L{Ccs!d4z{SliU6k`^gTY|`a5{|^fai5p4Dtqmg+*{cF(Xn554#IHcm#PQGIalx#ym{(90h`evC_-olYla zopP_%i7OkizAGn@y3@=`Fv&dU$Z=8SUM^fbx6Nyh9zBZC#2D2p7U&kRxyofI?miry zHhrU_7li>Lfk>z-_TJ|z+z26oiMsBGcTPXNu-UnG^=BTluBkc&2{(imv{waLaiv)Zx?)O)G&{fA zSslzydyNaqM&Mj*nr@S23*rnogH>~vFJE49D{sB^*7|dUFaPuxuWVjiEEb2iAMYO> z5yh}SiJ2{}b+>xkj~+dmEoRJofRQ@1?gv=~CymU|bsW!|N`tZEN?h`6np&P2L;=eH zJ)@XJ@XSQhvT@Ea=7Q`X05YMUP8PKln0tt!}xp@#zEvR9o61B1C|+g2`Dc zMF5z9&ZMU4{}8uuFwbztMYR7WhvQTC=e7cI=vcrQGgAW+rcd5lx}l3GkpoaNAW35R z*8iv`QPq2A_7tT~s;y%J3`{gt8xjXJ~i; zU}t86Xkc0DNf?`=B#IWRgn0Ie34mfFL_`@!Gf?Ez&u?X+X3+>)f&rLwkd!F^V4^V! zBRa=T9hgWGSA$dhsWU@`2nbBc2;)qRl3Pa!00y@-OST$QQ9vNZ7$Z6#&3qz;AZA$>HDRJsQ^Zsi$p8TaA!YEI88H%z)S{4}?@zm4 zNk@`;1}W606MVJ8+iJ?r%(wKko=wresb6KFX96Mu z6GRN!I#!6t3_vsuT-w4kbz;&XR0FUOm56{*MCt5}A|i+pbF`S=PdZy-j7i%cLyd?r z+3A4Os@(($%zj+T!Nv;c}o&(*P1+C6WODGMdgT6>K7T zo=e-4FaTI$4{BApAY6AYo6i@qSsqT(~^X0Z3KrkRquA0AN%zqdcjs zT+=l3`P_RS>-lJW5<5jS4do(M<)cRrLI~3)_Ii22aXuQ#d~tL-Tuc^6`$zW=_Bx%8 zU+FgT)#+%|93C%X zoaD5+x*A+ZvRs#M4fnWLMUsY{JqvQVa%E9R5{r!i_%gZv_i=)YgFM73gIyrzod*ivw*RHPg zad@=%?){IdmExPnAHDwVdmnuG_LC=1!u`|t-h1!lNAGOhx)uK7|KQxYbJI6(|7ZWF z|0q6wyt%n~{o2c%cfs_dn;+QikAD1>SI%F3fjgbI_V+$`aK9oP4@c$xC0ODOx&C?zc5fS@!aL!#q%!@)<%xsS_##m)V(=;6?88%~MRduJ>>1OWH@W%Rj z{nHm-dg-O!@#yWh-+p)y9zVW+H|wbBS_UGzobgKG=QAAiww_%ZjmKlPK~;y1&ho`` zmz#&vSZe8lsI0J~dBquY`@PfS#lijzXiY^pi&^2Zgs$(-@;N#M@re@9$f8HfdmkO@ zE9>hH_rIL|um9$Mc;WJ|%=Y)c{`Y_L&ENXX@1A`BW zs7+b8wJelnUUU^OkSSS#WP}zxF$bhzvNwwtYCN~GbhO9a1xArZ4ZT}(bZ&LE0-GN# zuD$yF^Upv3=tqxNSGHdGa&NGkpX=|AX}sk3H?Ca#=-!9cagV&Lo*$rfMxnaCwp*6v zy>M^P8<@+-&3rxATu2 zBP@E|rpjg!niz;E>tsc*m}S&(B<-`oWHOneyXdH<}8>J!afm`!dcHF6(V3~LiHs1B(OwSrn5fEnHEG9sxzGj zob^9|45?Mu13V>-LfQ?xwB9HPAfUMzf?*&cL`R0R=Z1^|fCc0>`$QJTSksj^E=0J1Z!Mq02{gqhJyL=4%IHrLb)R8<6$ z&6t_RKw8rZASePt1W+>)iIFqLGr?@@(9wX104a(^o=P~|4pth6A-3LKX!A|dsbgfo zX^vq@bB=19I(niJ+N|BQ#?x8tWNp4o8dQ;*fds_#wct}YDV&i-&Z-=MH0=VA5G_IP zu;rpASQ%nlY7n9&@>n{X5mbc?Qi$=>vLK)lQClrx>Yb!v7MYr8M8ve%N71B5jxm~m zw%Q;-0Wb(nOiu`EF+C@M1~ax0#0<@pR7owVT+6cbS(dr1u4^D@ng*Bh{?cI7jJwO7 z9P(Qn)MB*f`>vb4N{$SMF} z)8VMs>mm56jFfrry~hk9jz%L9QKZRyQB7tZ5l*Lr0Dhs>!FXIPxLb6A%rG_q09@9M zaS`eU)KtuKR!2w{8c;_>mZ^$ao{YTeu2>Wod0a2OpcJ!5|?!I-QjbG?V$(*4DZ0oylk%9M&Km z!Tz8>54D-)-5e2e&o^&=^xk*hqDrUJ@tbe{Xnps>XTSL6`ybuxx@>2#{`&V|Bd(FeE9GoKAudc)9J~Wh{EEe zu4~8#9V+_BZ+-jqcmCCX{qX%CymI-)>(4x!^_G2iu>a<__da<4XO=f!-MKhgTK>_k zTR%L0EKc}%+B`W75W!=oQ>?A6J^ReF>uWn|(V}Ttv&iV^(9E*IwwZ<5$a^1aI+={7 zv&v`G_Zns=8K{}e7mXTs5rb&p+|2x<>g0K22Ix8xoO55vSJu|n@{OK|G)A-8Y`L5^ z&>U1!M?5I70WFBd?AXQr*3xHx=_fWeHtxOuV32i3e%=fQn*kp`c!-PBPL@H@>vuZ+ z!Xf+hUOgCehqESdU0@_@HrIEH%mpYM_ZVjeYBl6VHY{Kt;%lM2xYm1ekS&_Uq?bi> z$W0SV0LPqnWrm0qcgP5u;R{!h5Yu!-vCLeLKc_opqu$HksG%s0^ycMIj<=Gn3fKdt_Knr=G|wgF?)DBGS>)Iai8D zmsesz*cTE_sFY4oU}lqYlsVs&b!E$*yg=PwE`|%S5IY7dStim1n78|D1#2K?0LT_V zBSbbtOx+9OR#d?ZXdv+nhGBpP@vI1-v)PIRU^7oMwu0?AnB`7LjG~#ILOqdxMKS_Zj z12RYvZWcTfu&Sz&3n4HPIY%ifHoXNlH6v`Z$I|*_@N~g5vuNPN&K43j0t6*Q1XVpt z%>a!ADK<F7vRop-8gVu)y%Vou?#T0;UbmBbP^i$vs-9-aas zBu_+a+ww@vqA|7GV{C(3*ppb~1O`o|p++J+17Vp+(ws^#JENnZXbJ(2&bxGe5P*sT zgS6~$P)!kQ?JxxaFgZjJV~kH5AtKTiE9qbog5zXYMs7oVQ^^Hwp&1R3IEC>VCyOyM z5DI`p@C?a7@>~pG5>MK{SOzdQTe}8>zeYTip0E>h1 zd_I2o@b1#ZO=ElO!*^!0*~Vb)`ZHH|uU@%*`}Rsvy!XR5bTYYe@uICQZ|!VL-H;mQ zTm_h(o@}qJ_6I91r ziBE2S_%HwQH^2GK2Or!6fURzSb9wpl)@IW*ak0}h&4Xq4^Z%p&{!-)qZ~x%Gnw{J} zx3&J6&wR!?Cycjm-+cV-KYnIs=h>Hj%ndfa|KQHuy$2m=hQs0G{aIb>iY>a`?$*Zk z<;$0sw>O~3bE`;otU8^~*E&vBg($}8wdwneLNz@;)J0fb>8$qi!^6W)zUH#5sfM1Z ziP#8Gx>*${EQ-ECyXyV<^XG4DZS3sqEV803%et)mD34XQuIuGG&gb*hFwB^vvd@Ym zSYB!PNG2Nn&3o^cGR=W{%lXRE_In?`_v+e3MEqcQ;GIcPWgT?99KDyS@jwU}Sl|ru zu3L1ehH4li04!s}NTqha^P~ICD;siU=i-x~IoB>n!zMFk#{m(f&T{X)uRSOnEXNma zY(4JwfA{YF``?}Z?Z5PQmY!J;A>{iHzWUX#zWCy6qtR%6dwTo!?W1tSh0S{zuxt!M z-|Gx=jn*eM72>S$nJOW2LDEFYR{VTEf968|`PZI#e0w<2Zo5teSX7INo0zjjclOHX zUi$pYKk@aif1S3Zzn%Ls93CEibMsrPtE+>{8^z{kc%94G^o#CMHHl*#EO0-Ym$C8a zV-*XkLkJ2zNy%P$u~5sL>#3SruZv~|WAZ)^J!_gKQ0blT7&yj-3K11Nkj6k8W@huT z(#34Dy1I%%oob^7O<3*cuIopq&3IPFz5xIOnn6T}C^J#MraVM)uly+V4w;a zf+|G&q+Np z_rUbDU7ui6X?%{<${x+kTH|iZ&j7R3%1$m1WCCtkmP$#5Vs^&pR4{g|QACs+#~3L= z$EDpm0{}S`iJ<%x;GLFg#RO$&gYOYhRDsZvxdas`X+xz2fg_1XQezl`b1oG(ffXI8 z86db8^#O2J)|j5h3O%f++?)3Nr_3m|0Z-z-5^v z1StS&Dv3ly93z;Bp_xI{1mIFLlXMawGpj|BWHB~uJ-sPxj{zX*lodh2T1jK-E<82= zm_Z6*VN_L#38dSm2U42`r$}f3nsR+HI0jWRv!n}2hsBhdAkKlA#nd7~N#hPUJBX4D z}@`lBfWnsPk%GEFmIFq%-l2iHMnn5SY2vm~m!? zlCC$iPTtjM=tGEeAaYrjQimE;hYCT$qS~JvtLlZ7jXckb+~?iyliAVvwbk=iFO_8} zAqGGxggm7zNynJ5mdOTh2K&>Y_VT>Hc09h*4NcbVtYyXN>FMF4Cm+83em|~`Mx&jj z<;#~Y?Hvy^#`@;F_ z&pmqd$Ye1**^310%LBt=9oIX>z_I5}{G)Gt<4=C?U!OmJ{>u5wolfVszWt4@YZw0P z&-}UH{_WrX{onlcpZe^Jej~5)=1b4L-0%0ldl-K3_F?7i*2lx)JO6rRWo5G5SJmdj zyL)?ktIM0+Zui`!3;lk-B71!M_U`%BT=o39)v;CI_?&w@#U3RPe?ztVDg|aCDW0un(-^lZPCDtNxfN?w?_X9fTs^Vf% z6vMZteydYKT-sVY{P^+bKmYmn?jB5*`PH9%`SsUdUmYxUIvwJ2spypr)^(_;)I!R- zu0uEmmdU)VPlr@?qE}VTC2+Gg>B~^wUhMVz{g+>GgTWvl7kQo!W#YYG&kYezL-gJc z8WfSy>gUG~j{esF`8W6W_Ih#f@X`FoXQZlCUDtPpM;G^)x$IKaV|WBImz(u;S!}bL zH4dJuuq-Pmn20U282%arw-EucWfEUyY^8iz%J zQC26D$zstEQQDlZddwJ&q2C{DC>2Gqy2(w`EE=6OlaHZh1?6mB)&0EF)Xm(7EX$w_ z#E@|osxYd#OP#zZ0iBKmI>)3-gN}^m^P>al#|D}ldLn5pqNsob5`mdgvI!(#>scx+ z5ribVO8{(%@RT9M!pww#PZMPc1i;i{#8#Y6fbfa7gSj2Pq-RF@q&i8)+GLlFElCRi zzzFqeoq?7hAMg}5fyq!y0N@;xsQ|DkfQ2O7G>MGvtm%!(Xsc$NGS&edv6%^`xG#r@ zDPbgS){~WlnGrO(BoqbAQ~(T+&7S6+ntA6erZ9I>l|+CvNI9tKL$);mBDP6nXo&3w zR6rD!6Pl4sRUKv_1|W0H4H}S=rd1D9!36;9l4aDVVFGi@#8d<%N~()=qgO}~FeD<3 z0BQ;Pj^HFp5;I1N>G7D--bI*^TBkg!p&4YZ1zrJQNX-&xML;n$$R3aziQapNVy47C z<WD#2 z6_~w1D0*3^(*Xb`Sj?wQwdfXEBI~FMqQyEiWld_+@#G-PH&?oDG5`4Cz2R(f?%ajN zVlkagcQh^*bwMsl%<_(z0g#9RAvw;FrLNz7|E+mBiHmu!*IixeO-Cmit6K+qM-FLx zI_z4$zH?>w{8pA_^6=qebn>0={7GTd-&mbYCjCx-bTZn#d@-V^Ml;Mj&SvB8=IT%X z?9VzOXqw^vNi|w5ZFC;p{;1fhu3Wj;?e;qz_x{_j|JVQJ-{V2|%8eT@UU=crqetUM zkAC%6e|7oX`Wqj;^TO57zV^yjzW)1PA8c>v_Qw8ver4yC$;ssPAD)OTuV7bCWcI<^ zold6;;rzJ^wbJQqcJSc+7^7zwLU{1xUZ>M(KA!%kcRs3TVK5k6*?cyH0495nAIe*= zU(NipFMj6SGuOW{8NUDMK3H7LX9oudv)ODJvtF;aw!VJ#+Ka23%S7ZH#<(c6mANXT zbvn6aJe^MCtZXLp&tK^N>}%KI_||CH`-Pvmwzak8wO5TbQjLgZxyUjfgHxg0Kn;4# z9&xkN-B@0^eB=71YuApZ@zMUQYEDI^25TJmH@bu6<@0V$RbhH42rdiT+uJ^7!{Klu z_2Yxl^7e&;@!}ZEW2)YL=l0&?(|2#ZGb)F>{neEfURvJlcDwK0J{gThX#J*I5XDed z_a5ARa=2g4wSmY0j#&i-q4MH7ZfPnsZw{VwVa?C`UGM#SliM%8_~OR1*Y@}KPxeMv zuU>6@eR_H-xAV6?zCGV>mI1k^{k&LSUY^hAJ6o4t``oK@8UR2sn++#rT?H`OSnO1% z)x*u8`d3Vg3j46>@}0|>LnZu{N)QTV#w|u?5(b> zbS#@rrww(pEIU4$o8c-H1Y|r%#O_khIk%qooO5}Xx3;!6xL=lKd@>vvHBA_g$NNR} zd0`p}ErXB&2IeRN+MmuQ<1$<*`owc$1#LpD64i-^0E@)}Koc7xE%_k?Z#h7^2O9_? zLJVMLsS6??YDh*;2i(chM`yvAgy7sKMMD~+B>t2cv`hm4U_bz;U`a1xpMW|5Kt2`H zW&~fgP5^_M z#du~c0bpR4^rUJ8?KZjvS{P=e$t?_UrpahqFR5@5B5ER_nP&h152T_BqzVX1gv2g{ z;D{MPL6}&T4ah*8Pf7PgQB#6ldbi9>XF>1?h@eCyK>Ju|8q zG7+Y^OzJJAP6HUI8fwZ{VO2Delu1hjh|a)L4jU7Qh&7Rk(#ovJ0FVKi7zlWBDQ2v# z2MD6Y*4k=H#ApTvY3H3<3T9?1i2w%zsrW-gNvur#qy$kjIwPLpS$Lb7rM3bwOB5yo zfY7Q;+EbH=q$R1aBvr6;n2H90C}L^OMTm%E27oEPj#B>&5s?^7R4mce5D8IJT2R|a zG(^;B&?X`xLNv3K1QbGgKM+Dl#23)uoNEjKzyU}TG+B-fRZU41G!u5lbJYg%((dJ) ziwxNrf~ZBAGZdR^Klc~@Qc)CPsZR?FE7qW9?ArdHb9Jg&sD;r%o0(*($7E>4Bn<*U zfV$uBJHk*`O&#khkV7*yb>q=gox?nr5Q;3T)EHeOiq3oUPaZs}KKghzEiZfaNL}A4 zIOn_zWh2$1X_^W%GpkZz$2~IxBNbo|!;|BqhlhpBpP-Zu4AEs-xw#37hfj{m!_$7h?|XS9bwn}7Jm>rOkNR7i z&%XRp{pe`FKe+qk!RGq$ThgIf>pfBamCFI>Cczwy$W zN5c;v9@KT+oSsaMkE^kH?Du_nZoPN)rRR&~K{O_#Se4AY7|#&Vvm%7af=1K0GCzLx znM;5E=dWG6cIihy+|PH<4d&#RmLxWzsrrMZ)6>(@Ok!jNaZDjr>}aVYO;c{Ht-SV` ztNpc0iwYOz_~76GoDxy2<76^fXgC;-?uUEMxxLZJ(a{kV#o9FNLT5goKcds6rKQzl zeDd(*dk@~-J38IFcTg;`R!v+~r)#wzKqFN+oemd^fZ`;B8o_ag2lGia9o6D;A`(yp z9Jt7A3>h0=EVAwGZ9n#h`-{`zUR6~`jsET*{_tOI-&R$g)QBJPN)bZHv}4v$7dxF! z9j2?Bt8pbem^^v)Cx80cKl6nr_weS;o3|g|J6Vk95C^M+D1kGbj%32+4_BI|X)gKo z)A1yBVYSn}RgYQt%GPqH)48~EWwj`}->QyIj}GQ@Gdt~ISvHyLHcbQOC*xA7%9?m` ze~-&JT8zRXTzhc?v<_>_-Q{IE*k9?dE}p;S27|2k;)nnC!)ZBRIe$(pmQ}4&ld8mw zyURtt@Mb1H>vTF9l$}n8Iz2?(Sn00xdupu4xW1`CmlliDCd{;^sBLlq@l@kg4W`$QVi838l~fRxfiMU!L_&5RtHJzY=<5!2=o84!|_xRn_)BN4>d zDg;yfM-&lA5GqK5)>H|kgGfY%#GX@ecou!0iHJucB2fiEX)|98+kGd-D300&iUV(I zj9>r^WMYO421-aooO=CdAkm~)OqxhFYk}XuCV(z|76O2@UsnM_XDUbp3P|Y4OB5nA zjhcKGpF+Z?`WMl~rj5P>Omwd{GJ^+615YCRvVg7CFzf2t~w2 zA|TbMqQHa*pY)30Y-H9RgfyxnIMc(nZwWC$Iva+GUm)X%9^Gjr*%0e z1r)Q4l+eUfwX|j~)zNGoeRiq8^W4t0+&AO#c(u1$beC1`oSq!4SC^KSmb~g>u@K0D z*}^ha_3Rw6TAa-%&u6pQQ8v80_i**8@&`8`9o+o* zrO&-o42rGwT}1rD-~D=w@!a{{%`3Yb{ngoQ#z+d7dD!23Sgfx29+A&i2g^VFcyjsj z<=vIlrjDl}JbdtA?P8yy_u#=DW_xMpnPz`7r?xTneeN4^9_R$PsxDH4{hZ zuCDEFt#_7sE7a{Zqp^PQ?nYHT|NQe?TU+l>%9|fQh%%C(7HjdUT4(n>Rxbec$&-~1**S~!I(fjZJ@Qv>lMNxIp zNMSrCBFj7DI@G0jpLs842HXXrB^~Cy{PSP@@n8JKU%d0+_|~mk(=e;GIi1gnqS&VH zsA}Hd-xHCHkeNHx#3LTp%T;4(c{`7VmGy9mCzC}n?;U-3Iu7joa`iZL7JWFLqgxzQ zV^t*?kidnEtA3|6p4am+WD1((fBhZ;!0N>H`~B6tURzsR+uc>wvJO$g=K7AA)%TBU ztQV(K5#em0s(c)Bvq49ys#;U(qt9WmT4$?AkG7ZcjVl{do6a81hj$;?{r9fW;-V|c z)A5YJ%tpKxkHdHv3*^W7ysm+dob1;nW~v(R?OnfqeLu&6%Q`P^S2ydI@SLjd$FlM6 zfY~`$qc%-*`QqU6)ou52SSs~?HMy|6wzai&|Ni|Q?No=8Ojt2(Z* zQwR{w53{Ceg6B}z$Q=j>nM4uft|``Kz_Lg92xKT}Lu>zzh9fOtY36+vJ5Gpb($1Bgb4VZ} z3Fie&s%`%}A)%s*AtZ?(B1M(tsvw6dlEjJ0i(w80Gy%BC`xql;P5_cF1xtxZ);_;5gAz^IZsD+q{$qJbE}D8{r!W3i};N$uYf*wVBBfteA&RK?UV)ltB}S+LDk44gun>`$f`Ky@5v72L1O#emNwJwi-H@1* zZ`YFuJ9Eh1kKqA&4@YIik zsAr8nNSuj;a<`U?3rT@tMnuGHV5*wFix#vM1&M$#iConTuvOY7$6zWT+9KU3jl(Ea zpE2TD3q+xfUi_uW4Qot@;zVyPYpWWEl*n4ej~Ah4b7ga^(H}{iVV$ZLIgY8|PNfr#*my_KMs@ zKKkIDwHwzWK<0h7+wBzn%y+2YD-rf?KD@ksp()2rIWEic#TQ@f`%WcQmgnmm8=Jd3 z|LS-C_02mEyWMW5li$DlVE*{AXD(-ol7*ww0+BSvNma0qG0v$Fk)9HZZ*Y+@2SGsY z7=UQ$of7S=Wjh;L-rGbTRMX+!{f|G-wt8X5UwH0G)4cQk$4w&$VQ+6=r@%3Y#d-?oJU)Wkbzx?RkyWjZ6cW*tu zySlnMIe^vGRq*C8D%io%A^RSCs;XJP-|q#Rd@2`s;Xc#Wo8XR&S9}!*Yz=<4sdW(xWQo130VlC9Lw~y?l#`cwksjB=%JTo zS>`(Ey1@}qfd0Ihcly2F(jc$(D1>=a1qsLO4D7|=E?l^foo7UB2L3aj`3&a02M-=R zI6cJmWpM8J_?QYajfTA}bL%R;$Zsqby?$@2tJ!)F)>qfo*1G51^78WetztHteQE=7s9zS%6TOim-I z4aW~RFRsY?;N;}w*@kDc*#)hq&GbnJM)hKF{+wG|(T}z0F2e{m27`XF1@Q>iuZGEF zeSQ7lpt=9x_M#j!V?W?%HoZO#wJR@0a2Kza{^lSJc&)nv#cshV(Sk&egs(= z1u%ym0b~vWxTbCtfEh3%0;aHq#HDDBgPKxoEN$2*!6A|apOU5jOy(JZ2=ok`__T~j z+gCzR1C5_R!<@skfSZ9SI0tnF&~7#X(u5j81nA6u2mom&(?*a&YdM6oHizS6%1wLV z^jByAMh?u3i9t;i02x>jQ4|b_84^ee48VDb5k-+mH8|(mQHTJxtE~}XjL}*l1~7A! zhM3Yy6;qvsrYgkXn7~{J0T~bxG?6yIG^X{jT~|`>S&B?!HUm{*$I1Rm$fjl~i33}4 zD7E^`0GLdqhL}VSPraLMuDhyo0&|3vO9H89b4E|cqz`b;0RRA*p`sX>Ge!hS$)84N z(*z}IW7twR0ufWd8%jDt001Uxs)!6mieg~C?SCKu7^Qe#5Ruk;hH8kQOhJ_`O;6g& z=MzD+VuGHjDN$n7@mY`UlPe*O2~3sHh^PTF1w=F;Q$r+R6H|{~L_tW*l%n_kOpB_b zL6Lx+q+yUEnwl{HnqpH&@sxWXV~jD1k%$y|CK6P&X__>ZlF%@bYAQGsxuue&e4orw zqY*IjS(lSc5Kz&p0g^*o~oLHt!%HmJcEXqS*PUcf~ob|g0!;}37 z4?Mv6t@ZhQKAq;<7cMuI{or5x&gVY==YRg^e|~(p*I!)=b^Yd#eiRl{*Uh%BUd;2n z&r97-XI{>-EQ1)AdxPol_~Va0>R*3mCa}iY^4jM5`nq%O;Nj!((MdP6UJlIMG|kWd z^e^APf4`%Ae&=Gov0e0+hO?u8`@L_LqtmVB!QS5fY&PQxye9{yNa|5lvw*5t2SD`P zLxAZjbbb(`=eQQ*`Kb9U+Q$SfixJDKnb;G$g^Gt34EC?XUkGo`wt(C=d;HL zPwIn1oKCd{ijes!i@0Kmh-#j9`u$l5<1mel)mTSps@j5;bzPg14;51{gupOkfd0CeGGBz_hnk_=O z`R0?GH*Zd^ZhG%`z3p7zF*n%Q*jSyGqtWQOUe`IdQg%ZK_4|*<)yf1X|aie&B1QHI*`qcqsil8Js7NE#F@llRbDx__L&xB>%$b*PM3chdooL~kaM*Y)@4K#o=jOp2b2ml-~0g(d? zA%G>tNh^0iBnxSnlFB6n07g>NDC%5>t)D{~+Lkh*Gi{Mg(o&O*5D^@hq;a#fVQfxC z(Ik~V>3yqeI+|#KKwDv7s+Hudt$wy>0TLofXm8DChG}=`SXDhBBBmr;qhxewX=sQD z2HJ9~fYOHAv6(qUa&4}PC@?c4NfZSj;ur%Wde5qf!C)<|I^o7+l4=4nIZd7pQ&j^> z?+^i7Z7Z;fG9kn^O0KP)7??>@Zml8{8`D|olZ@Avs+_b0q8I_H2&T6{M0V!pNIub2 z+nO;tiAjH>z~r1u?VENbZXz*TO6dF3KdPEgY|>hA6#y&(GqXe0)+!?Hc*`6k7=eKU zAfzO8Me=5r+`LTQ)C^+kNjT@wSfUXcCLRcYDMnFlgWuYdp2?-|S1Q`tGEI}(L_bwh z077zAKg9&67DzG@2X=&tBEp7X7BLbLE1Ig9muSRE$w4h2rIB=Hg;t5grYH)|aZ)a; z05fII*Hx|k4*SCD8bo3=M?U$02!IhCyI_@wc<%|gX&M4WX_+Wy=$tn(RS313Ye!** zno27@L#%jq)A1x!HHg-A!_2d~%zB+((Xl8@UPQdwd_FrERr{xl(?#>}Xt;Xo_S)Lo zpue)YvD52!kgSt;GnY?ilm4Jrk?(Xmu4+UzFS@3L5*DM;>Cy4!%a@Pt-}~P8zDKa{ zJAP$pHPmssm}i~hNtnNO?b6Tw_|L9&I*rdP#BcuYA6&h9)iaMqqj?Mu4-Z~__EiIJ znyS;u1)$%DcRzSEJ{fJUu7$D*nEBrFm94F#qoeme`tbbu^J_f@(4mOS^Xt#wsAJgK z*|>P|;{92(vbJ^qN8cKKe1Gf0v!g|QvcKOnP2oELAVPVbOEY0abTD`HyDK7d`pZmHt3{g9|?-k3LQJ>Fc%UfGp{gumyhlj)QWImstOdj`oJ-^_| zVm6;NP1E#3-RpM$j4OB6m!J77uYC2ZU(K#w7Ln8W$*3&9bMM`QgM-eH-hA`T@i>gf z<5zcZV`JmV8;3fQ#gnm_4KAIy@TM^>bw<<%qraLnw)Oxr1hw6$4Y~QOL54@-7jbR8p3U z_xpQ9)IU0Lh`m~HHl2430Kivf7AhZSwK^z@q80#@_wd3D%7>%(hyQwOb?dzU3vLCv0UBim2mvH2XD)5ay^*R$5<^3UJkLA> zGqhdcw4Ms6o@J;qsJ6HJ*tDgFLF+(yYIQj)Xy7bv9byzdYafV!ga81I5fM4306+?$ zz|IjqEh{k1a{vJeRrS*$5)RRru~7o&y$Yg9a*6^0piv|uOlz~z(_x2WLNq`iLMKjC z6qJ)G)=UW1taY-On3#j09p*(WiTMfYzIna&ftJO}C#k^)r}RBr=~tz044iIYra8h?KM_5|M&3aT~u5 znt6rRkz(d6z}N%#58uE5Ai0RTT$@NNFaQR4JBhv|dwu z2s~{{kfjwiF;qp60T5FU#u`Ixf%kBhNavi>D5}~%ei9pPfrZZ7vjCm(lRR;)Bj0uTQg^+U3651OIHI>8$sDng6E=_x) z#oXsE^D&Bqh#9(!P1sUupmSmxr0IAcV{Bpr0PG7yl+c!I$=M_#o@hRwo1t@=Msq~v zd{Ql@J=_Q?Oi0AD$)stT$-(jX$&ql`$$9hQg)GaKIA2~`GoZSO({izB>ZYu7m$5)O zok<3ic|diZdcHS29+tCdIh%H8)r%WDH}5^EV|9P`OKUfVj?9M2CA_NS+_UauG8 z)O+tcKF9(csA+`=h=WLoxUfc1K@o|Z1T#bHX=5doMNzC4ytUDbzBe2W`w!k;X1=t3 zetoe1f$71MgBgcLoAJFTHe3AMSDyKmfA8~`mexQ1_+vO6(c)lMtPy2#CbP*r8WlyM zp=RXiqT;ORdX=*L*3JEYd+W*RAK#ozcz1ctO2bt^=lnEq$A@OJywpE9wbL1NTm*~# zUdJ(>SZ!u7mj@r*{jKVsK6&y)jwf|pheb34Es&Xgggnm%gMMAtM{!zOQ_Tf2UqcMbxWAk(C^Os+~^uago-@0|{ z^!*112M1h5GwbKhIfpf@BHo_jgZgAM)9y-f$-1-IY<_b5%%DHN{lNn9A{X_noH~yL z@^4Sxef?V>=S@)*cD-}$+OFFsnSeE5S=Dhcl$u!2; zot^gked%{LHZ~@ELqyCr2d>{+>Msu`;bb%(&*qPZd*t};=;XMkJi+c9)gQjV!uI72(n|2ToYOU+KSt0;%qRu&D13}COndh#H zo@5$b9ct>EX>_Vx=Nf}qZJ1FdHQI{Ca>3(JES}X%`+u^3v-rP${8tU<62H(Nvvx&Ox5FD>|J~l?;Iy61yPDLQOx(oN6P?&!nSFpr2-s z3M4r_A}b`bLK;S=xR2CLMgUL-V1NexTYu78g9$Qe7(AHU?>b*CJ?3j>32xps2wWLe|AixBaOzHtKO_4N6 zhG3!yKwtu(Nmi*Xv4B+7Br-FQsVS&KWQl>9Glyn^MrqYnFf?$k)kTs@I#{Xth*46~ ztR}ifT3nN0Gc8@M|01bxQInoREjVSz6jfuAt++%PQ6w=)qQ-m!|Q9@jxI_J``CPP94QFF`&qN)kd?tC6&By9Ozm=I~!)(wD0%&EvgU_@k$ zU;+sy0jOr4&^b!Svz2Cm03fjk0JVsOU~P;X5eY;!b<}!i0KqJQH%ufFrE97!EQs1+ z6QYshmB$>eAsi(H9nCU781!o2G|ebX0i-wR3UOJ@&1`*reJ~jGFKl0Y<@wd`7Vp3R{?TxB zGM;R#EK`ib+1zz|v4n5_-tU}W8${=NyW3Y@efEv-d}n)kxxam`-|ug4t{fc=F{7%k zyRK8$uy=U4bKydV3R%RELy*|pTwkk8?Df3&rc(qMKRM1eH;;yg%sj4VlV%GAX@7{?qUfXate&Q>L_io*~rLkxtsMqdl zw*w)keB3NbfOLhHjLl*;na`Yaj`Al@=H2pr5sAU7y21r`@4I!`>2$C+$g*q<_0jC4 zmZGZg*}Pa^UmwqF?|tbs064)cFHROy=Nxz_o07Q~9ZI9<+!NIz+B!GRxgPYYs#ZUzHd-38Ab89}yEn8Yk2Dx8^h02i!7 za8*^ElH}|*nF7=HJEP&PcaOF1_xs=b^6t`4Eq&(YFFknhVB<$8^Z9&xGMUe3H@Q=m zq2%6p63gnOQCoDoi0JFQUsksdo`ev1Bz0X2Mg*OFc*3=!4@5AWFNkQh07uY}5K*_Y zvIwD^%$T``8FWGBO{l86)2Uqe$Cc389G?q=Yby<iJTfTl2sR;7W-L1O8t z5rB}8RRED7soc^d5uc7!KCR(Mi-nyv(-F~G6P@>pib%+ugbp#r1_RKUP+l=P6p(>| zQ)660YYJ8)I-^iH?^Tt_nOLN@hXDXF2_=}G-eeO9D~(hTyhKxLy!Vho-H0hgwg5Dm zEDwU91Z*ZMNW_YYi0CshBNZl66sEIAw24UIj2xr2RctDo+J_;bK}h}(P&09k08G)yp-GDq1prWcI+TfI zfRJ(a6e7}ktW(n^$%oTROW>#JEu5s$XJZ#LG}6|LgD4RZTZ}HXf?q^50zf1L19P0k z7?Zgg5`-;PW*|V2iX>ztqQrUS#)9c^GL&j^|MvTLFMMHZ_x$yX zJBNpdxVrTC!7+BT7p}diz3$4&3L8K)*WtRZv+nZ4hX?bfJb&dPs)1O<-CFh2mMaJS2RsG&+|3kjWK?7@bH~?-ucnZ58rz4!}9*A5@%6ej3q-v0Y}ON zH6l?(BxV_zs^+E|V`bpLd7q_&NJMK~ZEp5mwg$8`e{_?l)8{W;ytuvf_R7+|d-pU> zLkK4i4i<~Wy2IypcOS>*`0Y3Mis3ikdTZ}+fBXDSC>IS_2$<)Cyw|Jink(pJy_m%c zEGMe#x^L4jf9dmIyn@5w@WK5>o?*uqbzNr!D51Z!mihki>1k!eh=2@g}G1%PPlw~g>hxO!WdNL{N5TrLh-C0?A<(aEQgo}x)W;MhZXQE9M zVT>9%vT3FcfXP)cP8YRPB~uCAZnvAsG*oqPBEo_IO_nJLjO%IBG_k6x$>iAe&G47U z)7{5=gib76$vewVnB}gXc z{@{TE{K2`m*4NkhYZvQj@ssau4Tr2%78fXhNA zS;kKLbzK)Wr)D9gQ^Z9#0)Y2iXR((nBAVtBC7>fBnZx|#*tQu-3zmM4)A`Iov$Vck z$#{FZb$s{uyZ@K}Y;@zs|8w_uKKHrLeR1V4cRC$XXqtvSnJKC|=Zu7%^O;Z4Yasxb zIfrRt(x$E$Xv%4|Hk}g5z$gJE3Bk@{+WPCZVfdtrk(jTB_ zJ?WVcgoXft5I_-n5Gk;l)lGE`WOdEiIcMb*kuiFB_-OOBJKuB8-fJyA>~lS{3CJG; z5$+yt=Jy`6z4o`h@AuKxBicDRwuxB)aL5Fd^n;G3rP0v3>UlIYfz%|DBzc5vm5qjNMDXszxbO)ni>!!e+?qWR*4*1%4#Q+oHDa)^_z4^hY-?Y$i#>}w2_$= zQkIqM)WJHzcU42I`zMBp{AWggn`w6@xkF z>JXfB0v08fjy0MiA642Gv&?}g5hGaoOb{{gQMyH;NYgY3KE<|~S)(#LAd05L;n)n@ z*dn89AhOW3{i??bS67pTG-vbc$;q>lAO{_d2K}A=ox{U%UOm5>`n}O~HfJDD?!n3P zmtT8zml&8MvM>7Mg0HVG+SzJ69%uV|j~+cbefNVb%bqMRUp{zw*N<-O2@ zhsz*M(;OWg^^W&`{Ozy*+U_sQY_&pdGp;xF!Ew1>hvcq_F&3FSeg5144-Sts8UZ&= zgNWok=@W$cp#l?^R_WSQVd;jF=(_-58oQuAQ z0L~18Ko*=MZ$(|#C84U-+IsKX)|edz(rAr$YHGHu@`|ecHtXNmJG#8Qym2{ubAMlt zj!v0AeDttx>e%4P$yK|YIDlU`y!+K#_y6>d|8_E&{BM8#mnYN5$3=N>G``e~$kqot zB7%@D7d7TU1!-hNtWVE3S6BP5V>9wN#edD9TlAp;1pYRV=eis-zHAywifUM-ku)bs~LA_7#k$mnhBM*%Y;5{t~3&Sqv(l{AW>37QEI z7^(mf#ttpowaBWd^B5D<3qfMC8!I4)ftq?|5g{f}OOQ8END*r@+ZOp!GJqqZf*_-z zYgJ3nj!-~?1~D)|vS6w%b1AS(h;_RG0LLCMgtqpMkkC*e4NOX+$}W4c{flkz>=Ljx zy-LJL3~h|SG_1;;38DzJi{)%M92Qjxgwi&v`Rw8I_m<1$jUpcm1~+CG^ZC3HJ-T(f zta2nHSFLM>AQ*LVb**(93X3MrQ;ZSw!aNhYs_eIu%kTW z^7!e_!LciPMAYl`rWfZ$Q6Txe=($!zEY2ram+k1)HxRLL^`HFBAHUD|na_M?{oS8D zTuc_vCNK{yC8KkFCn5lbB-Wx4G-Pg2HLE!}qR6p{j&@>@3Kv8)H*?vRGmLRZ5B&M;GBE?YrnY6;Og1g{{DV#&Ar!Oy0L$B zZh z3`ObcdUbiR@Gjf5YeO|x)e5^-S5-AGs}RB?H|>}7>u%Y4xjj?U$;|0los2Y>Jf%`3M*IX(UEfBk2F{nvkecJ@i0 z=Uk1}b$#*_mdg;FZZ^Re-VBjj1E3fx=R)h4nYGEXOj;kKTiCo97ul<~wJZzV?~fYDM+>i?6?WaL1`COb;iM$$WFskTk1&wbbh{84ibe?qU-i zQC-(pUPPpdr3hTrYwx|Q>OpUCur7(H&(us;+}p_|M#p8Zax={Yqfpb@pkKw%$Zzic z=l}SBPe=d9n>TL;$|e)D!i+7mOi`6%p64d$2nC>RQx|HA9driI&OC`^1^@}e*!93C zj<6Y+$#%zX$u|+ZOMa>m1yz-Rsvszcn81~4Ktw4Hism5s4ney4hD=fMKa2;ySj0O9 z(2zi?nC^+K8Xz$!U^-(IfJ0P59#14sizdO4U`LqtSl6xFPQU8bz9xXi_y&9opXD3P!Y2{LAC zifGbxk0~lKV3gFp`?FL9p+hFH1dNRYkah_O(k-XDNE0BWj<1MFDQC0YRv5c2vYn9# zOsoq&h`NlW3rd5j5)%@~7}0=;)gl-ukbxlxaaZGRQKLm9GZiqREm?~J5eXfGnNK)4 zG6N$fAY?I6b%ux-yHrjZcWm1|2nqPE0xx{KjsTH@7ywdw)5t{?Mq=^=X&^vCj1~wT z6DJt}5kq&hL$Z^Y0i!Y^pdlppr!si}Fl1yBY0L~j2oZrbD7w_ymK+`_JWK$AvRU7SNNO^d*gAbx--8l$ZfObrmC&l<#ZN;dS9Hp z|0&FxdsMk;{UOFYr);vAG?#pIxY(y#$M-U>R*#-k2M1jA_ix-OiN61hZ+!a6$9Hf4 z#(uRo;9h`hmlHHlRn@XtP8O4w-hRz{Pez`gmFP?{Xulf7c9Hj7KdXx2?(U7FS-t+z zkAL{}zxlfl-+k}&l~*QZ^WgHqUht*lc?JMc(F`JM#mEXXBP1gXmm7z-E zR&||eNlN)34#v(8dyClYU7o&u`*yy2SerebE$5A*HP`3o)8(=a;dYL@qfxJ)UtXQg zYy0fo*Kp9AU0l@br_Y|8H|WpL&(F_YmSwwp_ix_3DeYi0pWE_T4)CSl`@&~G``N$$ zyZ`=!@BjM+Ohu#^?FU%H+C!)UU-&-t`-fLok4~1W6|q+-ydRgjnPr2ab8fm?ZFFOj88ADv#dZTq;k<#IV&%>ZCiOVe7x-lBRYQ+4J6ps`3qy2`c2;Gii~XVmNQsy7>r zMz8iV5ox~tQ86BeUwUb%N5A%i{q=f1Z`UD&qZ_Zk_uhLL8z8uzxftWRY-7|jSgURt z$qBQy&LU*o%j!+Ik>{b6d1!m1;beL}olakVyLaR5{Z|g&tn2#u<5|Ctb|p5SA1bfc z>qUh1WCN8E5iG5Od=V@(aoU@P5PVb0I-J(hZ==J-a4;;^S)8>SFsw@N8?%}y_ukcQ zG-L}kX~fSS9{%c^CA7kx zFtb9~Iy$YhXQopoHPI_T6kTC@P_-dC$-d=xk=5 z$jsP$s&Vwh<=Mix(3C z6$!hAl$xr7;|OL(B1(yl+T9G$%!s>PUfZ^XOOXADL@G`Vga9)KDyfo)DFO-h?(oacm7)J|6g{??BeG z*|dVGUqRu+q*DA;@Nha+ZoPb(u&4n?Db%=-*Q7uVA~&j-~Y9QmC) zH+S-v4-O8RKYG6Y=;7r-F&qy0qP}|X@w(icEKWX~<^7%A!E*BI%Xk0&Ti<^F$3N-a zI6Qvs{!1_2OP42Rjxi1fy_1hVYTNec_RSAI_+W2*WCS3JY8!2;s>=JLUVlE^AMft& z-aI^b`t<2{|N84sKYXyick9XH^CeDr(?syS(OB9w5h*%>P-QX3TC}oA>S}f@Av*7{ zaUcK;8daOn%L))9vX(2&hd#u(sb4O~x6APZ2-nl=8OEBM>B+^_WRkZy%JTkraB$;h z?W%Vl{d8w{bm#7^XAhq}etbFKTr^EnjLWudq3Hv_#}|)3>ppw0|FzIG>&>(*%jbT2 z_33xdKYZ}~$@2|0&biI}R784ZF%28xJX#iGGn-B7RnUMda+NDvjKp-cS%wg7UNduP zv^cx`z=gJLg|x2g*g#cPd8?{QSvgrO`t>G@@ess0H>k}F2A+Doo>bmBN9B(9zBC=np zbg@`)4Mf!7psAbLm0K*h)fF=XWeVW9i81!`K?p$ry!U1i$*p4aL{0;NnS)xI3|v?> z&D_^@Q}mXbB$Z$iGc{3ZycszdF zRi@43@4xfvtFK=D{%%nmebL`rtyVX(^5x&YO^fKgU!PrHUS6(dVLF{|=51ZO_4Ovk z^~M*Q^=1O>9A7Rj%d*^HBN{eBF~*BFd*{jNY~JR19-IF2i%DBZ|D_{;)$nF>|GIA5 zcFrpRDF-4agi~#alOLE z`;a~T(K~-r{1113@3XgeZV&L4tb`2AB({LgIYuOc0ASeF%~MX~yJi!VEnou@qu^&a z^JM0TQMSLF2uW$bLI9wkgak2N;UXYx+rbzS8*K>*lvGJ55)%^=0V#lW3J0JiPlx@7 zE0tvMbIf2C6MNkfoB=zghhyq244h)69mA^)A>A4kgoqp`U>BzYCLMA}PDKz&C=d|< z8Gun1Q~@HO0IzbUTZz(E}DS?<# z=Q-$L^ND8b7%jOX5*pkR&kaR2`7@H`mJ%vuODO>WATTs$CZ^7N+)W9IkJf=U)r>p> zDt2+wh})JL_}($rVdb}02^@<@j6yVqV<3gGsiXnK!URwa0tOAp1A}f z3MvBT5~>!I7_jSzNxC4a8ewpr+SstCtlHUZHWtN5bi3G!F(&WPwtN>u2rl#FDUB@Y ziAuZ@r3?`j5ZOtPl(tT(86sM37VCN`NR6nAZPPT1_5AqwnAxw_b*L9lpFVx}z3;>r zcY=8DeZR~G)s z8IOmUW%+P?^X2;&C!aK@lc!_d?B)A){?^fJpMUH1<#L(ztCOn>U9aDM^;J0By?^(< z1JfA9tOnYA(f0D>Y%{%drbEt$y>kD?%^P=LBIR*^=l1O{l0ROZzB}gL%YB*a z{9JZ)vz)BgS(aU$v_IN>6yF^8dcBv6!E&*D_?@44@2^ij<$|-g7u#574=DooP0BnEL@Ao5t znelLZ@7}%sPTueL&GUFXhN>hY&2Y6^{n4NO>2Lq`uhfHnRaFaFPC|J9osXEA$G!D> zz54Lc_4W1UDy&v3zybg&SbOia#TaAmGgX~Mi!qW~1~4-%ww! zwU%v+9GlGiV$&ev^TqYv-d;cBdb1=y25T*@9Ks@+x_tMX-~CtLc)s`l_=Dg10~x;J z22h`V>2Bi9nE zB3%dp4b7>lnjoS^H2@+^NQzYCPDE&l3`U8|0Ld}=bH7b zE`)S7wB(IQM;!oo?~|P164p8bqEn69(K|ppxxviL8EeoNP%@WjtCsrc47#p01Ta*B zq|P&o2vXWxZ?BSF6&I$19KbqiXZp;|Vr)6{(4nBYE745_wn)5<$f`y}gf0BQ%$y?t zkdV;qV3KGYmdujrXJZf|>IfOoK}*ey6a5+zC?YuLx(C28g^x5!0Iv$1vZ^Lv0LUf= zq|B6T3+WF`8z+}WkTh!O&p}wW%P^7D!UG(Y0R_8dQJMx4K-$b0JC6$!NwTaapAQ-$ zfC3>Jag32X8W?Gph#_P&)Tn7ha6aoyXGp0RH(~F%Gd&|BB<6~OF(X5l*98^n3U3n> zg~Y&-A|@|%7sg5;e85CdWmQEo+-6?D(7JetrJ*>zYpTl5Z4Z9x=rmx;3MqI5n8IC5 z33R5+tR41E&5Xf`2@y$@%%$EOG&LsWZHicszyKq#k#zvyv=K8%ab-kw047YCh=3xa z0|AFFTEMQ8#yt68K`kO612U5%4k1WTm;01K0VGvp1yTg=jM!#I5YYmV19H?xjJlpL z*gzR6_gZga8_K~5T;?5!H0{N7^6=^9gAYfeQC;@g<-_7=wGQ9^H~;-&v6y-H)?07g zJ9?w8TMTVc6phraHozcJyfK(wUY{aE*&m!9>CHt$+NL-yQ7_r_K4um*cyy z=Xw6%d*8kI-j6PR`Q=}GW9a(BXOEtR=_vsC-NBu=Umxx4?+?e#1{o>ob2&K@`1HxA zH*Vc1d@;YC_xt@2LhigniY7s0!BkXbR`pL$Pe1(b>B-4SvuQ_z@#SoN`Q$WXHscUi z>^Mg>i|C!ih%M#p>#S`gSV-TX23eMwOjXsuiz;YvM8;NBS*~?#<6DEl{o~_jy}gTz zi$>PL+S%D@Gn=|bn0Ywv-MV#r>)yTNAsI-a`y#)`RTg;$rQngmT%o``OMH z|H-X;_wMzajYgxDVT{pnGZ+kdJHw(Vnp&B8jVlqMBHL^>;)iuz$1KDco4UNXxM&6| z9gUuZXl8BOhWeD2ak*TElXYF!^=18s|Lq@wIcAnvn^{<)i1b*QIrzM7+elCltJVOp za_o@8z+wb1u5H_mpn(e*L?kr%X0vH$^FcmvBr(QKS+s2%vGXDV7=kdd_;Mr4>Z-ig z$|WKK4aBT(G%k8yx>>JQ&435%^?I(JklqPu0?~j09j*|OFf&ULNiuE_(FD|l)f^%b zquQ-?&djGxTQ8=oJ8_`_sL6_)(5*S_X{@Kq*shiOV19hb_?Bz-S7<_`IYTMvk;5;{UmCGvd`p^VHg{)(J0suyJT{j$PXD@{inluw|^5Tj)a)6+!8tet$GpI0gcOWq$aT-35 zAWG{g#S+XR8DfkvLzXCxlaEroBTa!xVB1#5z351Hjv<<tac9G3o> zUQq8ieRWux6cCfk#iBYyKm#+RE=hripvl{tD%}yxFoKy$Do9fROInEnIYjJms@6F= zRXh9_10Z56iU_G36^Wqp2zPT|(tfZTP3*<(TtLyGOYRdQ5p_rs1$-J;nc1iL2Dblg zW^T)9Nv{=bJ;~{z*Yv0XtExk9YRTC{9p)U&K-q{WwVZU$R$~N2R$@%6B{nd%G+%=e zA^|ET7qBWQn<|hHA`zyIV|oOMh%NODixH;JnY(E$GczVNvk0&q(Od$ur8g~obWBXf zL$Ep^c41D2qOa#XtN zdO797EnB&|x`Jo*Y&P3u`R$#9b=w5flA-7oy{y-+S{;K5PzKV5#r5^5*SkJFdGEdV z^1?43oz=eqO@-M;uReM903W~h+G{sndiC{2Xx1KF7eD#Y_s`GIU%hoao6W%6+2pi0 z8U|@rZ7j<&IaJDCxn3=o^ZD&BeJNdRs-lz-4H?2UAh;NW1-d;ak0N0asB4iEA? zcjEKOdQbYhUiNx>bMcP`7tH+1A`_93#@=G~BU&NgG8XfCUN$kasO=(yETLk{vYZW8 zSzE}aK3^_uq|u336N#w!KQJpup@B}3uQ$2yeTf%t4ZO4#2z~XtJNDhC(|;^ zR!9q}E6bR;K_KP<)o=E84T!w2Gpk0PmfT=wplyK(<$3 za0rMHy#eguVsCHnjZ9T_Q;(+8>8H=jP2s*(&7F&TRx%e)ix#~v*_ff1mjEzug_*fE z0f2lsnZ@(fcTYZTzv&AkqB+6A!GY}C!NEawxBqc_`O9DWg%Cn-#ckVq;n{2^XXhf- zblPq!>S3@a7=NYyNr350GzL8 zAy%#o`v3s|07*naR0J5vfCu|n0l@RcwfFv(>(7=`f<7X)8qCaMqK8{z2NKZ+3;;5Z zQN;r>W4cCa6flsEFs_IQF&$cIUty{61T}&x_)f-nF>e5-R7+Tm=P>-EJP)9NB|Kb>Gv^;BM$}yLI#Fuw~>?rWoEJj zXiBwSiijXUM8+(tfJ)wph;wWj5CND)C5hooPE?~A5oPQ|BpoDadXjW^7xi0KGJPnb75K%zLlL3itBT|sA z;|OVMXvRPQ7PK?jm>IK^Bw{#5QP8bRQ4kR|Z4+Nq4gvrnJfG zWHH7hR4}6%s5nA03y9b`h_-V!GmB}0#u@;p?nLk*1ae4@(^H-lls0`8Fql~sbnH+; z)l%_2IVI`}4%19V+A@ImxrhcaA~p%CrYNaYS-^Z(XoN%*G-OrT)Z!S)k(xQ596VkA ztNRK@O#3^P)_d<=(KPKQw7t>LJ7=m_S64B{QLk4OJ~r)Uy{>V66&B7>2+d;C;%vEG zY^KxcW;Ks7hT&e@n8paIS?OKIJJo=KnpvLb0C0VMeLk7=`~7x0+v)dq_70BskB|Dr z*{2_WaPe$38ga;;{qTd27LV@VzyC)6jsEWLk>5GGvlDyqqj!Eh8kF;^YkY-QtId2m zGa0lJz4v8VK7IOh(}df1Us}u-Xh;Ngz3$~zG$;$&tfyJ-o3Q>jfBa|ffBdjrG{fPr z<>u__ghd!2tif67^B7|cErBN=8!#(q7*{bCi0n1ag*7X2BF{&|-5YoAj_%wpio&(>=+h6s^YPyk(eP`d z(w8sYIehK4*S>r5)7!gm{O+r7fAgE)tY$Y34-Xg5o*W(?4i83WXJ;3G|NP05C-rK* z*=$0JyZfdninuZ&s#S?-H*>1mN};Nc-+TH5e>~s~BChJzIY$kvs>>q)n5){hb|%7zk)u^siRuEh->>SL0H6k1uh$ZCRRtDn*4LUtgir?n*sNMr<+9jp zLMr`g8#6Oig5(4=<+A1+uIJ zX%V0t=1tQWm66yJX3mEY%+YWS-g7aA*FJaf#us0G^yrc7jzb7Lc|XSZ;h%qWWfrY< z&WRSP7_(MD3h@jagor4IH3FK;M5GurH(%;~z8^vei?dV| zh&@g?#EtjfdCs!TLsOPz1sY@Q`CN&e*5#m#)*{+Y;COO{r*u~@W?WiefT)_f31Uzp z7cDR|Y)fB|iC@&ciX_5Es^I|aI2?&l%?>S6Qlu@KC0a%h0*IKR5P*m*fC^&rqTq^= zLI{}9>V!ch5kIYs4pG?zBx4ss@SL(9;ut~GQ5TuhxrwRjy$y4Hwwg{57(yK@4B8#A+rnMq8maY)^PNs}-_MOHO{?kJ71MPS~>P*K55%{w#jDRZRg zoQFi35<^0ugy+JvNdTg-_0gy*rCwh`5dky+NSV&`1T_+g$OEaU7!r|6WahLwO9AFC z5RT}OqQ!&=Q&lB(&XJgDrnL(oTKCq#R^R~uBr}jCknna*Lx@1& zJ);>)TFnref|*A!Q(_7sFp?wIv=cKSLN!q7qG+H-fT~F?tRHnCMtM=!b$q;f=fU?cKmYjcfApn2$nM;^ z{oY^vxxd=zP0Z&1f{*1fwh~>8QN-d7ZKYjlE=1n>H`~j~P zo73l47W~m3Y_Oh&VbxRBjK_O>dkO9+%Y}1p*SpnfwSxW0WTIm_K0emftZiEfwWmsXkCb?`vNh{9$e2FTPnnR78?5xJR%FTZ~9b~zpnhjFws7!3L( z-uuIM-Qw{lYT8Ph8)R7~F`z`>_aTH-r(Thnv|v3js&W`qShowHkX1R08&rjv-N^So ziCMDZ37bgFB9vL#wlx4`RC({Eo)eL*%JVOW*sTZc&VFNP{J0*>P`dN+ zKj3iS&yz+C0Kj#2QDXoE6z4ETbB>*W!S={U=yv30-pI@#tp+Gn!>IFJw>Ue9F%UTb zWLA-w08mNbNZ*=+3Nq#1tEz!P0z&~LkXQ0n3L9c-J%z-(WD`&$1XB|NL?G*|Cq!fr zou{N;F!HwCRk_07#h_6^PQEh9I#n0AA!r zpaWh3fHjD;6`0h z1VAHDlYmSv^D(3m+lvTV*W3#R$dGWEd7eAu&05+ReV$Y10e~Pi7DC&^GEZX|AY$9J z&bb&VM&&Z4Uy=%CW%Fi4={I(MWnZ(V zW_j~VpNpC9Rf9x`p;6C>=(GAlJKui%{Q2{X1-J9Mt~UaR*fh>(<^1IO{r5lk>5Mj;%|YJw z`~CZ)-h4j)yD%-=1}HDfayUQ#+~+=b;7WIWy;Iz+>-O)iFXP3Bu1BlYDzkChwk@oy zO6pYvfZSr1W%Ix>Mwdz3w*7#rx^APHfpQ4ZJ3_>;u`H9kt&5`Ah4qe7?l=S;24K~; z=uM~7k1aAYwc?r8MYYJ}#X4(ht$^OMm~$?$*6q4q7Ij_sAQO+8&rfCjtpnJ?TpfNVOQW`jFD zyWOwX05JS!SbzF-2#tlh<)V}B$#J9XoS1}GGgD~SOALtUxSTDT%p*?f=#_{@&^YI= zG6jI5>>;9$t6r~%5V52}^{3~b_A^y&$Lm~VQ>-Sz>{(kDMX|6}L}cVfqfzh1cz=IC zKhFG}qdu2=c~12P0QR!Mt+(&mvER6IF^7vOoJ_9F4948==R1QvX2#7PX9rZy4&2_a z|Fd6w`PJXJgd6}q_{+uj{`5~inEn0dKmVG$JGP8Vu?t>fVvK80b^!$Ox`8vZ)#>qk7DMiX&PyR_x=Uo zOGFG)9GU=9<&tfGRo3fe=8LNAx2sjJ%GlH+46ELHzQDpC9`A>BxrZDtsF}CByLftf z8Z#ao?SDbEg7#Gao~KZ^z`wQ`)_=)Ec4A~H69PggVFi<`E0eE zE*EiK=T-3wU-(kL->)anmh<{zF|mw#C07MiGq-wt?!T5Fjc=Uu$CkOvQB{`hM0-S2+aik=&c zuRi`@y;>EKyi&-?%&3r+EvitFfH(&LGZpfVR+}u#z*^OqVX09$&+5=-hW(-d<*L^k z#|^}Gw>KV_!)N=)r>CdFwwX6qXXny}9K!B!FdPr=-8{N+>$dk^$cf0Jo-gY8@t}8i zZzq;Kx_$UL)Q_HhbTx0j_6x7S`_T`cJ$atLa{uh?EQI{{8276ixcOxK{>eZ7jo-MS zk2jmmy}^x>B@91YH_i1Y!*Q?2ueTd_%b(N3wrx!hpP!$Hr%_cmj=dj1v&sOL=+<$K zu4iTeVvK9zQdI>5BF#Ml5G_OI{YnE76^bE*GAm;fe9)UmdtZ$g&SlgW=d!*zL=3%~ zAAY!ecJ0t`jTjB{ja1IZ%&96O2i4LFi2@kO&?#kEB^C`qxoJYZcLd}8!Ey?Sc)c)0 zJd15n6#W6!mGyJdxPgUL{XVj)Xz)1zG!#^027-hDEFwO*`RVG}aB^^PfNPNu<^?;C zue|m7!S{dMJH2Y#HdsCA52WP~0%c6ZP1BZTxm>IXoq32+c6%V})!H>h--kT&-pkOc zs`}M<^zeh9eD{N=P1Dq4Q;@7R?|thVVD&Y6@1qM%Xvs=dg}ikDu>n=dnVoaHpdzx# z7yu^g6#%GUwXqEfGZ%!2QVblO1zS|@8b$}CL~0PEYv_;;=^9EvBu1o5x;v#~XpkIQ z+Mxslq`O;6TIoi*;q34H1LnH+-aPO7+-u#a+*&J^I!98q92A&XpUeYEE0#8#tM$eF z4SjRls1(>>FxTxY-Rklh;V}x3!DH|oo?n=vVY@|hU%}YvKwM!keu&CG zY$OKdH9`UeVW^PJI>vSpFF@M(s##5 zh`KJH>&EMEb|W^EQk2{9fr-&2cA`^9hZEsB1KlB_sb znBFuHOKN4){-v1}rPU=@?}n0 zj^|cxv%{!1HjiTU(#{6wFJ{mxMwUqYPn6qF#~*i>Ph-0u_I59LLjz%z*b&XmW%C@0 zb~4S@LI5rXnP!fs@xPq{Bs$HW*IR!_GllSvRs;V1>D;}U(ra)a-Zn`lIdjfUEu-jO zyYhdcx!C9N~I6eX5_@_^QF}z6<@n2nwQi9$sJ0nqsJMe$BQfff3Y2vufY2NMw4u0-CuDP^1bM?YE@8wzY3CTdZ*jaq_ zsGuTYf}}1XLNEpxG9QkmBnF8Jk!osarjYsFY6xtT%Z@DP$_TN?w`+jYl=Ry+sWk{G^mP{i3kk7y3e(7wPzto_Dh(E9J<&PgM7h zv7Kk{LpzT$BNT+hUuv3@1_{p@?92mf@x3%n{lk{EtO6UI)J@gLh4%Zjr_*>fN#_+zK&bUBf#rKW z6RMyT-``i1pXAZL(-B3A+w&#Kq>uQ1EwkKSecCZQewerL`utfOnUA=c>0}<|a}%Z2 z__$9$LN<8QMh=?()U#o_mF2p?|Lb7Hv%A0Azq_&XXQTe6d7G8CPQrR->q;!rPw=FV znKhZw&IYuXHLMSo8CK`>2v0EgS9}lna@7$!@poscArIXDZnyM=@Yv47%GP z=7lly%BgWj8@dP}BmQopoQe1zOE0 zB>G+zKOTf!<)F#-Fgap|ppXEfj*kyO2vaPJtAE|u;h`}%-hd`CiB&-Fncxp-VZ{~#7rLrVl;GWdI}$Rq*0Urv7#(piLGo0p<($O3x@i!s6n_mRzJW#(dNmJzH9MrNT|N_Qmog%f$zf)D{+a(P~3K zipYcbCp2*2moAYTHI((WexB?DgQVn3<~L}lJ*2=UcVI*XU4xR5$_EPXBZTn0vEU{G ze^P*31d%ovikK-N6C|R{PLm@DI_R1mQk|3OzF0={peBCvc<)L8+Nq2i6kjxF|FT}a z=v(N^?ez9}h_1#_FV64Ae=?DTI_T0KGAS^KV! zA-Z#h>=@)`3cVl@uM{7;lA?-5+@&i=5|IoF-%Qs{z5P@@W8Y3zlGP_FbM}X=WF|RK zVx`p>i@5mPp43w=s73bX6l0GlHm%i_&3yD%HuU$R?@9}Yz7Peg9tOXIJ;xzvMinGB z?F#*c`T_Rm$NQJ^F+AL7(S>SX#Gw3P{*1yD_58wz30tf~1?7P@G6pa`?AfkVF4HKR zzIWLr$rg+sUeXrlv!ws9&InDELxa6Ufno6!;zo3RpyHuZAWuQWfI;8UUtZ{>j^rS& z%Id-3xV>yXl>}o{Q2ziL!V^lGLO`Yz#k&F0 zG$qr#A_X)CXQg(_=tPa!;@B5-cOEq#`|ocYoL~1X&lv{zy6szad)%CaT(we#+Mw zyH9^?^k1xhpODT20*|__w~o(r{0PJK4(@wq{DB)ZO)TY`x9#3) zH)l^#IX=F(e^b@&{jZnH-(+T*E|lAgahTU0C=D!`Dl7dv^VfL$XnA+Jo1jZ{Jm+=V zu%;v_4u-wv>n$IB{H#T?77w9NC;K$X$3ca5FVE}wx%TUwn~3zzzfP8xOYK=>N9`wD zT>iU30w$m~$Kwvq{Op|#M79KmOtW)t$FJ7n5vj{El=~(Vrg|2#CMMm^i zN1m1Lp!hd75xRWimFXTb@a9@kTxEGO$3a)sf5XmcX2jI)5AwD~-HJV%S2CpzEzWl@ zuKw=hE2S%=PmFj&^+OV_lC@g-Q$=kvRNJ*P)Ia*BkX7a3Nyp!>ujq(Ej=W{}VR(Zxn=6>s+*$+6 z3cKB68OMBx`utLWZ=>`ehH8ThDfqT%_|(02k61h?Bk~Hf^}Ah(Nj<@^^+|>T?$| zQKV)sYj%G~Q~?B}`&XVQZHK)Hr$%Xp2DyX@*;$97~7*2gYa0 z^M|9O!0(1!bM$+_^eJf|j@bslew&i3LCY;c`G1AUyilB9_0r16hW3Hjuvs+`{LT>E z*I@MhKvc+=qn%xgca?l{>15;tUuYv0Ck+;B>g{Dw_5aqF6F1F==0*1ss$&m{Sm4ny zH0pnORSxhqsysZ|(O__-fygqLCkhqhUIKw=;Fwx~pMk*+)tJZubdx9 z3wgY$e~Dw_AOgGEu993j`89?(23twSDN|h-!_Q#ZG(0gEoi)Frcx7W1+Ttu8S^~y4 zB$H$D+b@=1u8&7>R0-Z|j%O~ebJO$rQ{esO=VVmnV}k$UzP7BL0F>9yD#ME29*b$t+BDb#R}m@B~1VL#8mggg%`T~ z%K$6~P0*Sa7T_+y0|ndf-+p&>XS%UzCg$D#lKY!QE+{>=^u@oz6evZyFfJnw2ivUb zsFLFTtwHq**&u7e3vTfxkg@fMI}QfVO{nQ7i;QIbiOOAyUp^70pcP!b>j9irqnrRWFdRaiejQ9)V9Gw>cefl*oh|X%^|*eiWuD>~ zxHYVGnhuum{%5#Z)7UU?krZMm9$<6wh5|?M;tSPz&D!4i{p#m4E|O^U0H2b%{bi2J zs&P)vv3K$K`0@p!Rc)7-a{=cKPmiLj*S{oh#^doF)_O}steKLOw?g)#`Rr}W*>wOj z;>o;c`(5j!)A#0fZAxm6SJD0gQ59jM`3R0VyjyO6fA6OD_V}qf)zsYQqb}YTd4m!@ zPZeCp%KTs^>kM{dv%de(EEI5Mq=}! z_aB2bW|2C){4_WbMOQa&BAmnKD+Q1L-pbf^aRvZP%;D%@Rol&8z`a`AtzP?uUchY{ zFtHxYOKe&6wRWCGM|TZ7v@hK)#d7(dL}z+(J^dFASq-=Ye(5dpOu=&d%vZm`eo?Zm z5wKTWPKGp9+S4+VkVx(wF1H6fNj6;+E&CTee6H}|Dxtw1>TJ#QWWIVzi_bZ@zTX;6 z18lISwYyh$!`qs#nuW)*Tq7_afi%%YtKN97JfWNQ0%XKVz8y-hEsHd|Oth|QEd+9Y!Km(r8DNwL0Y zkJOY4n)KtzF4i~0Qo; zeX)I(CoT)EmW1PCs&ce$ztqaMu>DARbs`e=ogd)QrD?k#Vz07Wp@|ys6Zp2DQSyJ6 z?!o#laNEN#3&lwVgJs{V6EfuoqJat-AQi!`VhD_5aYOlh_p0(@$duiBmYEI@pSL#A za9wMFm)D!?gW-@)W6KFa%*}nHYlY_K=JCt6aCnK@)8X#!x6|g<^_HJVvzV|3;83(I z))E2FoUz8*TwYH5P&TsydwK*+^OJ4CIfk+?JBfcjPpqA`-!qFL6t$!Vg-&>UkOXhn z5XgLa3hThqi5Wrl;FIzcRp1{71%fG^Lg9-dSWGoP0)FFBE}MHPlP^1rbt(ZBIHkL# z9z10|F^bvaz4g50D+Nk>(@qFI%5@!`RVaFKW==D-RP zI=UWHen6)fnr@egCItsb*uAjeAH0ezggfuGcE_bHm}L2)nSKZ`h{)o8kr~cTafwcl z=^+xn!~+hh+sI&>5u(M85!Pz{Coq{ zp~JvEnTmKhp9F_8pohyshF%h?+YAw-Nx9+4L1R=k^Flf_lq(3p%$9o}x`OPJMC~(G z;Vj>XX30T{7Lc_-uoNSCps*$A9RyVBUJwK4)r=u55C-z-nyRA=gQ{@_HceY4X)HHU zzX}pKEpMf@2Pl<;?z*Bd*Fgh-}&HICt zo;Fjb4-HhOn6q8~fC92AoFsyP5rqdSt_SnAh-zs(30v_}^Au%VAIMg^crBY(9+6@< ze1tVg@l`E>jPxgXad~N95F%NT7)=c9JHy76MtksG`?9P?S4|=#M)|w?7DR+ZrNW3SE&@puXP$hZkgTI;ePglj>)eJn5R zeSRJ`9#&xT@9M3M*5g{;+Lk2TvKAkWMx~Eo9PxSEYPqj1&(`W5x$Yme@r(lQrgry4 z@g;OCKFlm@GxS}sO;bq20gw2|YnthjDp>c(-NVP*ve`NSqB-20XFHBJqKdPuonL7M zW>j&#+2LM{m50BWLYK#(RR>yJH0=Q15>G8@c!Fs1a9S^l2K3|}w|;`;AD61M)7JV0 zHoE;Tw}$)MZ*CeqJUu-P$zDzZvZKZA?QY;0-8=8SIJ@E-RFl%@%l)!g5&D`TN`n4V zqs!LYB!U8y$};Y!C9Lm;_r1QW0oVWj5(aqr`%4U%8O%xEj?6dYJWluNg#1a>8DqQ^ zylSzIMRXP*h?6sQUNVfkA`7i&_Xfqz8My+L9{g&$E?VMEN1N~k`FjK zA!h$RgfpF=wd@dgQ9#^(#q&61FlOAy&uu4_z~+-YFp|(Wuu+<{ z`!o+at&e%TUp6z?eOsGrYilzNRNrxp*}KS{smOJ?y=uMEj;H2Vt;$}-Kvj)Izm*Zqm5#NM3a!NI|wr64C-T$V5}ADiYx zAQc_al=*Y8p7lT?R=V{m4?JW84HQa>4w~==bxfd2HCdEIO^lF0AV|ef>|Jc^MgI$a zzx$TR?xLh;%`Uof{IaIo{7{eszw&rShZ7WBQMYQs_;ATy(+a>AyB1f3ipd@tU(k% zQbmD9^*ONICip>r#(tYTFGUA^oAWHYWoR+59P5qyAoJV%4FULB2Fe>J5CJ2v>mISn z`q+ogt_>I3{N#-t5erTBs%p8~&`L(ocQ}s9EE*(g1)*LC&UDDYm#mn19dEc#+^WGR zhxLu~9V&=aliQpQ@P6XX4d8QPLAS*MfxgZZT6r-|14)4dZ{(Dk1U zK%>&mmz82p)^#y|R+0(BjdBK8_pw#Eq}3KKxgT7y8(r5JJ(d{VGHx#c&~(!1deUit z>oq|0N(*@1|2z5K?tORI(BH4X6_nD9nRF$YF|LLg5@k)*8M2wWO(txeEcBRR_nVH^2X=cXKTV zFou4A<6WP@h0$6VBurdjJk}U!3zBj)r7OY4$zn;@lQA|;=@X9l7^Ntm;oYxq&be3y zR9qd!^}#>NO1>>u&&kPcnn7K-@%neVVfy%Vb0!Js5>_9NJPkizz>Fe*JPO`zhdoX0 ztqtJ$6}vm%o%y#pzVgs@$R+7rV4V>?9}^{mX@~cAj-6`%#=X?s-Gk|@apJn_=}>ev z!RFslL}oy{&lTYAS_Zz0aZg9xnD`|nc_QUFr++VWeYz>Zo<<7+7c<(Pj>|h&cfG&W zn38lp7^u&(2uQsfDe?yqDCEFhY6*hP$+&B$2XwV`PU#{Q@xO-s)6g+!4>%pI3ix+0 z&$rv@CL(?+4IRtzJHD-9^-_Qh$B-)K(dA30l-ZblBi2bEaIxWu1G!?>qNh|kRa4!r ze03e(Jgqzqibw_uhz3UPGVgV;9F;Pi`v`|e%u;p!@^t;&V{Ie|j+T}xKcHJPY2%LG zydG1?^}pRw`aIvQQ#){WZ?+1Mfho@C)tH>jAJaF-SVfJa%TlfkG5x_^UON>EpTGX* z_o^qG;}mSOp2m!+UWxHJyRn%0S`?FWC!kmW2Zm|&|=T-*I-wqw9uv*-)P{IkaeONlbGMDyL`3vV(R z0^UnbX%Zb_IFfI>W$uD@dzH#I)jX*eTV3`8-B5QCEYrPHl)$cyeVuDXSN1t*c>AAr zz}W?jxLEr~X+GwmA6-NYM|tx;Z#=8&!S4JewO+>xx*t z-U>JwfAZ|ku9m{SOj;+E;U64T$tKMx(TCAPJ?!3E1Y)xVQxVqE>+rc^B_im)k?RGt zBgm}Rgn6Du@)KK}x%#3>B=Pm6JP3KR=s;h|GZ4?fxPdDS#i9_qMINmbDKhm0oRRkl z8kO2fDe+Y4-&hKh#KW;-pgl+k`4`AD3@o1dF=4tL4WMF{F-C+iQ5^*O&Uu4eW-ZE2 z!+V(^;35yP$&e~#`2-4=ujzH9W5z6HWg!G7!pTS(e{19PP&TLXViP95mkJ>#tclWL zOGVj#Cy@NQkKy@g_$!7&q>rf-=ukxWSs+SMhD+3dk3w8wB$UDUSIRU6Z5ZiPxJpQ5 zX(_7I#47?Er(ybr*-%6#sUb%SZD~&+o@(fG)tLkoX+m;hm{$7dA0$|;0#kU><)J|k_xsUz#L>lppaFII0`fpwRx)ysTiC^Omt1Zmi^VRJaH1MU zC@tXxiAJWW={7TWh7+>Bwjx-Ll;J(3Fc9UVi-ncUI0C`eeE!qZ-X@T=2``zSX79uI zsSSg=-?XNh#0`>Hmy&?|U)}hFR04*c@s1BB$SejB1`id}+F=>11Eaa2a#>*9$L#G7 zJb}fzmTIQzbeL~rC>h6s2KYe;LN*iK1|63lAnt;d zJXF*3T%Nx5XDL@@tvq(2y84*q zzI!e8(E%``DvUdy?9Q(XIryFYy_w<`Z~uI^;#n$VeFYB`{hpl1 zEvL%u7xmOtENo3W^^d%$B;pq4oA&E1t5oOOWiU=3F-p}4VNiKyAlbXIlCwbRd|WoVxmS?%zgqRX zTiew~F&3llpRw@qn5>F3gOmL1+}}Pfr%p59HIT?0HKaQ7zuhfK%Z#?EDyKjV(Wo2X zeQjVc*YTbhX7w6lm-sp8=eN@(LDe%Bdev9kv2G9Zy4_ZeK_AG;<~Q5V-0h#FT=3TR-N|rSV)KtHt$J zp6VNJS|@SXxa^^R8=V^Zh3WbT>fOhNGE^C?>Y=pZBIz7+MC5sTQDP0ts#xB($t+3G zQg19b37=Eba|UrR7M8>r3x&K}0)759JXEW3#)c6kdz=hb$RsyDLPpfuQhIZ`oA@CXsaCyZwaUt7765`_GV)nbazu&D$`?vbIyXt?}?!Qd|bW{F#Do|~^ zi4VBgTzyzJGLXnteK!{(oY15&b>n0??&mF#o7?J{Vm=S09$DI}17MN8b zS33K=&q3IbFEXTGi9vZWqVypMI$AIm!Fxgyi1D0*BL<2Henj!}X9Ul2K}xV^8B(Uu zlW4N;`jUU+Pl{BWoQeEj$e_I{%|}0v*!n8jw^5(Dux{leSy)UJ|1{D&DEDkU14(r% zO1S?h1JQcMGQ0{y806I1@3X!H%es##so+xOBL}7rLq{0MA*^UnelswmrNtH&3_Vfv z%-1D~AhGYKGPE8dRTZKH1DR0!B;LH5?1;i}cNKADr>=Ur%Y? zx>~(o+WxTv%svh6m+Sy*7vTA{w})J#SUXKpqc%@v9k0o`xLu$03h;KZP{m>MhB-hX zlTHCk=?AcG^$)FQQ=Y)%DH-5o%TFGQcQv}PcAjr^dA!_oa95FwU zT#ieOtIDxTf1kSRUBKBPlez6pl?pvis%bZ@qS&bmSJ5I0BkIHul~hE+Vv&LQub zXRd>Mr8-3`H{wi6zP+SeU;x+~ENX=)4a4i*+&aP^R>E``W%Pai8J)Ygv-P12*BqE9 zY&TK>wz}>#d?1eP<+weTEo#48!@(Z$12{&fSIJ;qa5uNKa2)BysHc6hIJ>k_l5c;~ zNHwRc@`Gj5ZdLi6DAB7q3+0^O0?hXLtw;f!41#Q#v zkP@7GP5y54Pe*6Ns=t%XS!aJ>$Q}u~7nmQm;(xsu+if}wIxASQsbkXub?O7Eth~AX z-KI9fcPmewTle>V>V8_V8K>WI{p;Bmox(TsoBbiVUa8oVu}u8n&^nV-OT) z6bdJ)3Z=#JK5qZRKU>Gk^UKR&XNz=b;}7-rlCBr;VKi;E^ZNYzkt%9JH2!B1PsdOD zPn#ip(atMm z9$Q?}A=l1j|I-lvZM()B4b-k6WGVI+gor|T-~t)>7v6h5Z>YvAE}n7?m5yJzMGM5< z486UK0T>BE&r5+4L?Ar_L1rp$NG0P0a)&20$r>osXe?biGo1f~quAMMXevqv2Bx48 zv^dA8qr>PvyA}7aOp)wG+thf1ASmu_uV6ffF%clRe4arRnnu*uvN0KffFM(p7*eUGTowk{veUwIZwm`IxT}Max=&zWFjp(`T5X&K0kQEIT|$sqc$E{GkjjeGB3d94W^JWxpU-O){*fl8C|K zhoZ|cC?5y2(IF9~zfx}=&K3-Wx*0jy@@4r_jvUd>O(VNIsv{F;eB0IS7iX)PvRQGz z;boiRvUC`B=%;3qdHC`R$_bhUI@B>>y+r{$$4zmy?xlK>fl}5KMA%r+lwTA_=H~AAL)9DJ!aJ)wFp4DW z))jp*JJl&5xXE8ns+I<0-DI;-oON*oq!XgT!S|phCmw(Xt)b8~tm) znUnO|-VK+L3S7bPsB1`9C7mg^BTrSL&DThov#0q8oei?&%bw9qeKt!*3gy7H>nJ8f zu!Kr=G+JVn1`m8?X-qG3jT?2z7v+it{YhK(0h}DZPmkHhG4j6akJOOxGmo`@|3=pV zQ%=W)cKa=Cl^u!1h5=z?cF-SkZMsn6h0xN-V&sd;xiZY!=(1i@=Z8)Y0G7 z>&g{7uwkor=53xkfu&Lo8nT`IrUo6;1ZT zp9jf+e+7s$x%BpKJDPUN`L= zm?g3Uu4Z!sZfN{wM~=9T1yx;JM9;$p$O%|(o{R=sBdc)Xg=oJFe&aB>!;x$}ARFDE zvU5W#u>`w?f_(j9ikV}03;8L3(9O=bAH^S@bApvLtK{>2ixSufCE4LsvR1SJD!KGQjK?^I2rDb|}HfsH=C-)_tku9Q~L{eHkNm zzB{PEAvO<4BSw>$7(|y2(L{wOn(SRb^3NL-6C4>-wP^Fg}FpJtMeF4d5~T1iyEP0z>k zd1?#4VszAxyhlf=dH0%DPzpY{{T0d}{VN2tSi(kp{q2zh-`*}TPX;+u$R#nJYqp>} zw=gRg5@X#=ymSBR`QKF3d;rRyK5~#XhO0GvIcVfTDwDDBWN~R^P*h@4oV`Y{g9e|}X6Wslne)rHjcyW=WKw~5Z zY(v(WEi*^MeM4GXTcra&MU{Ch>w@fEk25S}(WLnGIQAyAV8o_LB#)*(^Sal+svg$P zw#S8L$^cC8<#q`^jgh2iqx=5s&BorP@|9q5swMUP;QV06wbhP<`wr$z?DB)rW6J7J z&#M$tJ5SGm>W{@j?Xf|W{UXUBYu%P==zsfG)Wsu|F!oKHF^?p#2a~Fvw4bglx!Uiw zY5YFd>6+Vz$9`*2EjL)TX8)Ce#&?VsnUZd0_8L{B?)MM8FpwaY&Q<{k3=3`U&PE#c zlj2`qMde?Io)%j(tVa*d{Bhwm_P0LXIy$PPycKFYPf1dfd>mOcYJHk-kj3iHy&D1i zjJaYTQXvJc*^fR=c~;R~dqV;OV%~r$J2!$rqN2NCrct6zBwEd4-bFAo``FRqVq~qD zJoO>#^6IMne*W}g&gkh3c2Dz^8*sj{%nl}#A)Yl!@1Epd<#>nW7{_dVb)H^UY(3(N zNE7|eyS{B-oiQGhZwjiaEJO)qIhj!ZV9`z7R4z1KcjfPy;CuYl%{ynR0taMubzPKZ z=yQ{HRJzwX{s~wWAKP95aGdT;K`_o)>UvjMP*jIX2 zy3k)3P$F3~i{d@r;w9Qz|GW`b3{KK@73XSe`m(*+_?6EX1T$rO5%Y6rVvg7maEAc^ z`P_U|2iS&Z!zLFIlK0Y6{Jk0=V@ld_SM%l7)z#*|S8YDmMvp_8fOgI>`yEr#RMJn~ zieeyY{C<@r@zqBqu8K+@hgW)&OkZBkvc0xUOPsr9>ZWvW-epC8 zrKJ%gEr{(bG2cVS&#TA7^TbTA3Tt_XN!GLkhHbk08mU20{H@X|VK?rEI4VbqpFS6V z@g34OlVn9>d0w>?A#GH3QcqaMvNlWdFfSgH91S{V#B3KVZ17GWf9n~j;rH=lF^@K^ zwgc^*S3Xk(PAt>5&`gBSk-@*PhFjB6%z)F~{?&UxAZ6s`v3|3i`xH6%`0Mi0CM(DoTV2ENul^#;cnwY6RBEcO6Y^Ooo{~`>7c1~U4)^zy#*@e36QMD;*2Jg||tGw9s-aWIKo$qL@(wlZ`$qL`_|gq7b}Z&@^L0 zL)Y`{E23UFTl_2%z$zXLkx4Jc4RI&TX26nJAefvY!yQYZhG5e%z!7FzFoyawOxY$l zeCIuEkcDaiHGz^1HT-9INKm9~%+vDm_y`Zf87dsBn(tWz>CA6+Hj;~#OhB9y=JqxU zt5Djqb%{>6A?;L{H$~$mEr_x2gG(+0F>_dKXTp1E`aIn?R%}JA{aHdOnCi}0N#H;J zUno+iYK8q**5P3YT-qMR?YCdZr1Gzo6(xU9(S^gc7&B1#3hQC#>ijofH11ZX(gZ;1 z7*H)*;;{6s{kqS@PO*)29d&7%qlzFYS&i>1swpjsx6cMOs`;L0kijJkiqLr=5Ogq@ zz`_He4cirwf$GQSs6|wereIN$esed7fRCw@*MOvOr+%DhMVj=HnxR!;P_smZ6}y|W z?)D{6Q_7)Q1beqa3#1)cCUV~4#}~)%HH9}d0j9CZzuEh9Rrr|AbC;YWX9ruOnIs&JP zj%shelRabZ^!1wGze#<53XdRiC?eVj?2sbwy|u z;E=T!vOjsL$JERwxm-(rz5YH>Q~rD8U6uYkS&xZM>LPyG6-{>AUS4kFUnl)kMk0OK z%;uZ}kz92{bE3J9V^t|~2)8z$rI7CHhF|-{51Z zql4Hu3Bb>P6Q7v4ZeNdGsm1L#g$sykM3zbh+RjxPun9GJ!-sEZT`i-%cgTMSPdU_= zayV>pUHu1!T+NF1!(hvcVxYiBgn(+MtvGqiPZFVsR+5vM-saKC6a4tNIO-N0HGeN_ z@}q|hwZ{#<`UpmgjFJFnds#6|VZ!e@`&HleP!gu6=Y1yQ;TWaZ*Xvi8TJzEg^ zm9*&5psEwEb;o7>s0|EVWu`nh5Y#a!v{rfU8r~I0=~#5+HAq4}+RD+dc~Vm8sjRlK z*dyon>V#k>)WCFOQdY`LID+*enTS>avNra@0X{%9#}`cYm9MUpr}f`2nrEOwn}&-& z$;IMbM1_K}3FY!JJCq^c-rh3v6wm+u<%QOhok8{q4ykc4Q&Qo14^^~l?vYnBnLF3- zA&t%tbgvUHMSO|QXP6wx&%dHhlw&9|o3)AUFYW3Gr!*Tm^LrpvUicam3O@k7pZv9o z&X*YpPdEdUN0SE|+tBxYw|M?6T|*OdyXLP`i>~8KJxZs1)O9@+}+(t zirw!3kDInveO=u}UxoJg*qHo7n`@=FpP%o_k1CCz7$YwvA<&@GChfHa;!|Kxt zKF08H!K9+rez9~e`m>j?XPjm8Hr_5?F8i~u&1@KtK6Hc4FA75n=0!^+ap*QQjcgpu zZD%#wg{t?$zp`8?pq_Q>H}@Sh_P~Dj zQz=p*m%3s2-Me=*lKx@=&J)L1p2myB^&*F~9%bhJBY*7q$&U_Y)PF}j9Qdx@ZLLQf zx{1{3uz;ud#=Mn@8n}eDEB?Ii%cS4`P{4L~x>+$gI;vb1dv$pU5(Ii-CR=qD0lI2y z#hClMe=dUL0u>bZpA{}N7*U5s$3G?a+2?6&@8Eo60~*%_?BIP}HnKEdl+6rP=x*zq zaE5m504xvMj5(`!&eP4JqtceC&ZST5A6@>;w@i?-UJWR@fVJLKesh5h2IVbX#! z=9N2y5|%Djy?r^ax6pj^MR)p_t4SH?K07=n=#{2>0O^*5I zHok)m(y6oWwOkqFf*J09dIrIxI4od-Asr)hMu1`uojm;M zs3O3SNKRcCWsM54r(qkhz(8w0A{7`lk;3Rmnv0^R-MxOm4+p4ZWvd!fnJp$dQ-UHR`8<0Ll0C`W9 zZn^p#I!b9>e34Y-D3~Gqv(3Xmd*|nVQwVd3p2Nzeoc{USb(eqN@zhhhQ#T|1?=B;N z1D9*R58tl%iq+!fQCL9@M|70vN8H+xuRsfpE-Zy#h87XBBAX*2b}iHza2qyPwb?8T zA$pNM%D=!SAi&`K_ujO;M=BIqYV*B*{TWCO*ILB8$y=KTN?TE4})Rr!Q1MHLUP_T@X*sf|<={1;Wk*hvV#ur-877(ltUTNqh6$ z-kEj%h*Hp?SJA>zho+Xse6mR;*K-$FqcdgDM2mSFsHg}8WTRXHR0ccGVbIXjp$swuEmSdfRT#COr;M$em$?i`^_EsoWKMLoB*PnB;aTFG-IY-IB~Hj6R-B0Q($ zHddsK)@>;f!~gK+PYFAnrr%yK7W z9NuBUWHBN^Conrs8)((FQc!_nZfr5wlSM>d20%PLXE;t#nUyIyb;ZVg9mrV>=drIgIB&OKCGgwR=a-XFfQqG< z=xT-&^htuR45f)VVECwSu?+Sql!u}36+=m(wA`o+9%JUxB7B8to@&1Jf!JHzxSOPM zploC%*)@FSjw7*|EpOV@9*&U4Wm{|?CkC7GgrsUZIZ&b%0fA04#%J3G4wd; zji~E^MfBFlr&OWnc#Go||0@m^E{T8}E@H0F|2i=xf8L!Ow?B;X{sO4Xl9YgAZ;OzA zfb}?=jc{zem^~@|e1Cm1-b&`b$?U(mP`B^CoTCDS?eT>_iBn6sZtl4VKA{;1E&YM% zS;!sAYjZgWE}K3TMD&)u*&`1BQBE;2U`3UWdhQZ%eQg3-Zv4@YLFVl4Y`MNrRJ%3k zH^~vFMo{gMYF-R&H5@nFrc$=0&bmeWEDib>&cJO3#x&HuoJiv68zAl_Et4J}sQ)>? zvO;|>2*}nb{kJUJ9{X&^Kk0aU8v_(~o%54}rgXjXbME+Y$GxO_MFhcm(_?E^{JI>k z>U%cJ#fRhYwM&GI(LiTTzjd3#LRueJ*f)C%9twQ5o1f{a1cRgosf0amWCSQv@pX#( zE&G8Bs?crhd_!g6jxnZ;!!?@oz}~T%m7MREufgwOyUlI?B6G!ewQ9>E`eXaUrI7wp z&3<#IVZ+CzTITzm$$}d@-ptYKqx)Y)%B#1vjzDJq8Hw2exe<|E#|PL8I8uc8)0SJ~ zd3Pru=_AsOyF2^sqsGFPkMg;}YC$kwJ%8lFSQnE%-vPxU75$(ojvJrujT~shqjx0A zG+#AzXN3OCcdJhY11KO$H-xK=eD&xA9b^^-1XqV{amp4<7k^S^2o&Q55t$+vEPud0 zGU+D_1NyBB&L4OLuV1J@v*e%#TZXV02%jfubQhystv0Q?;XiDUn2Y*3wk<&-5&COM z!x?6G&j3MU)(~HuiBu|(6&IiucC&s%iRW63M20^ESDg>mJM_PAmwfTJX9>9J@k}5# zn2QyRvHEAXlPl2RKp9bQZ^>(pVIM)r;Dwscn!qb$VzHqmr_!8{O(%t6LG{TvM}g2& zT>7OgCVT8#?!eP>hj)Bl&Sx@7ROnp3nVbDJpk=2G*(6EtHvzs&OXoMUURp=7k`0CH z#r}?rh=SVsb&lg^|2dC50mz=%hPT8XY@zO9bSJBu-It_lX#fN=ZbsX-HhCbG*4u5W zR8~E)<&UsJS}~*Yg4GG$aB)v2f0EWpydbBFffFPk&{YVn&)P*lWvy;V*3Ju`zUv4i zz=6WF21WT^3P5A%RE1Kt+|*SmlGE9iiDhC|8h>bNE3?zl>R%1($XI7vV&e!$eSc}H z_~ns&0?y4hZ4Z1DgK&+9YIJ9*Ua71{y@bKu0FNR*e%1ne?b zL~|8L#hpY`EYnbUWO4SyADiHBh}IXj4H4nzxIr9mBSJw$hm;#BKR?;7_F3jcT7APu z$L$bBngVoU8D?avRo(#6)RP-YNG&Y5=G0Nhu*DKB^y`3N|<3nqVd-Ibvp`_PdRIz z6r4EqC)03d5(4hLJ&}e+VM!(L?K22h291s|@^V!9M$87#VdYD|uFO;gna2F2E}tLy zA~$Hc!u{Z`gWQ&T4l4fUmeAwtARAMyeB5CxQo&%#!9@Eu1BybM!OO#}!>h4~ae!_n z=M0(Xnf0wAWW-n`U&_vHx$ADgXPU}xp;6mj&1tD?7ir1a1k!p7VqSn?Qg@Z7`R<>3 zs`(Y(u|bpD?rFt7{y5&sW2@v%Ykv}FX4}QA9;U?0#K+MxfWA0g74|ycnRIc!lcp((B8>V~Ro%|8n=N4x z=kryr9#zR489RokDhS^DF3x%be`9CMH7oMb7xpOHTgo=gpup z&a$k@-DA z-Te!Phld?Q(==ulLSF3bZf&`TXX9Shb0Fh>Nhfti!N)Sr;oc_=NxgBy%xCs{kx(M2F)0A~A zRV@Y(5z`n#0F*>aNGBRIONeMdOaOpt4r$$Kwniwa0XQVowZgF;IL%H$35m8Lh5rF{$CVxZX77!vi5Cz0_Jqu&11aUVoq?ENj4;B+ijEE=}kxmyLvYS9 z`}DM`Dp0DGi2#ru0R>aAb>#^HGZTqwBr5<&K#~jv=PsG2(=nX$LQIsImCV2>!RgmF z3lNFs2*46G6`~lb&>E$00I790goucqyy?1EFs)RAfOD+Q0f3jdW_!eFz65e?0szVm ziOdAfy~BuXhOB~!5!PkRKRyK#5h0o?Vy>zfg&YD?P;mk#hAe1?OseXMO-%v3&+E2{ zF|K=vQX2)cXI|Caa@Fn*%kTX5@BHpJ{}lji7QILJADBF!8?CY<;{u9r?lx5ke4=+CVO6=g_{d*UBqpGU*PZy;xSMgZ7z`1Vk zjW)Jn^@w8EnLs{t>;aLz&qG|^fAC3fYedd#RPW)@{y|$e^6K2i@9gsjo2bhIUnl9!pFUqzld`A7kIUY{t8yg$F zoy`lEEh~&d1-PD3%%k zVH&*xVJ2PI<)ENykjF00tn=Pm>p}<%L{)Rk2*7PE-ut?<5TvIrdG&AoHct-KY*P3M9E+E# zs-8S^HW&>2Ry7z5s%P^obD80AxRWn%h}Wy4>-YNu9IRHW4}bX93opD7mj2-2U=GKF z!JxbXsoON`U$R6_)SllC4UjyKDq!UA2_)&U3rx-&fB#dovgYuB#b zdbMw7Av-GSx_*47UDM6{wB$1HU2U4C7kd(<#pQ)7V_#latyc3tI{2-3|JgfLbM4x- zSHAXBH*em2)xG4M?>nd}7yt;MbRgpDr3)bsK|%;oLpoEst}{adOVQsS>s!&(;|&n; zJkxq^8cplz^VX~Xo`(Y{3j*iwq!eQWW&j{t;}f3h6S&5IAp%Ab=NJu44UNo9Gc>{s zQumc2V(Q!WNu@9YGoKI+xP~RHk4Z2}M=t>pn0X|EC;-Oj000CSovp9RK*V@nX(l3` z3o}cNf=TfQNhXYRnMYMiHd1FqL`fqGD(Of@B7x+Q003Zf0ATDygh81xYU%<3Fwk^- zQi^vPL+U(&)H>o3K-YMAKqO*JSSu3)VrJ;cp!4gFkQjUku4FPoSIHBYm zP8J?gOm$^xRdMdnOb;fJGc!;_SX(;@ovgi)=h7CUnz50H8WAAl`F_s1l=4M0?7Emn zA~Ywe5yd%YDu_seXoS3edjgg|8;b-02z?@w5fMZ|gJ?+P6~w?8*y6ga z3~}vfF*6e(W-tH)U{bL4Sq~&4$fTCYZyrp{EGF<8ftjV42d5SoNi%M$kP~9UsAQ&` z8gp302-G~F@$+uZqzq0rFU;f4ZDoif*w?8?U zwxRXYVmvM`ZugzrsOs5lc7xjK@88i!F3@lVfPRW#xMS5ZgHH4evbs(Syei-n?-Y5p(Y~!kxPx93DOBN}A5w zzxf~k>fip`fBRqj-~Zo#^hbXb{o={PCpWHN|A+tZ*LIGJU;4ve>gLDe@pu)*XSs-p zNVlLYD+a8NiF!p*Riy{jc)93Z7>tj1i|RstdKI&5>%|AX_uqg2?BkE7LZ;vJq z=R3U}i~RU(QP$=8T1n}q(%o{?GK6ni}t@A9)I8O3B zKby9QIL(|{^o~TDp>t-?fqLJSy@-gM6EhZE7ez6kl*Pypv1(aFN{!UCtGDm20Q$Z> zj&VU$RaFDaJkPW7fQaxyIUbLp%6q-u#uuLXp_hWuVJ_5s~#3WFvi-ubVZY?wgih*Z*v1ZL}U zTsY5yiYO-wrbT8BvQCmPKr)T00|0YLpMXT6Kr^!#9TBceds#(=QbqeZ<&mZs5p|SU znFs)#2s48UGl7_?rj3`KODv)q*BvG64iS}g=;FL{RhSt>5kL`Qj7Sd2t)XuiXw58N zCqdTH8UQd$)o}@glAi2y#wcKFH8ay6(?S49M7>KIhOM=PHPl7d+6WpUMvZ{r5iACB zF2)okVU>`c231X4t+oHRLrQx$5ruW7958wUks#*`IuTj_oWSYHO0y6Er0|s|mMF@I z=Pw2nMWPUs3r7^y0jISJAt}TpfTYc0I@DDaSb+g!8X7^?do<7x5D`qwAR0u887d;S zT|`6&3aSY7*@FyGV>Dw`GE@VwRG1gnnidfy!s*&wMchX5U1w%u3Wh=u(Hx-}dcw3} zQB_%Mfs$&6k*IBJW;Evv%%V}oOKqU!-Tuaa@=KC$Ycb5rjjFGkG3)u$CPZJH>GHVl z-o5jq$z)Q*_8V`1?+hl+xvzw)A;KTO`}@3QuWaw#e{#QEb{m(=rIn8#A3WTDc=hU4 zD*J9QE-3oF@zw1c9aPR#Kl&mdEeB`=h#T=Jjl2XME`=U+ulJ zcQ!xlkMoDkd1v#uU>xh)yoG8?|$^j@BEWbZ{NOs@TBXyZqzvE zur5(7Zgwyj3@U%x>-9E=c?cnvqxpQk=vn~aC1=DWaCL7xhgw!AJl>5l1Sy^z96cgX z)m~Eme5ESBZSp+V*ojCl4a-2DIC6v-F_M{yOYWrB%-H1sP{i7MKVwobHgL=)sH$0J zX4dbQX0~WM=N*awpjc;SIr?teJlw!uuh?DzG7q{CVmPcut=Y}cUf;R+mFId}TU)oG z-ZolfnrI1)Kn%rVPWImUXK*R`r1r~rU-7iSvS~HpDD`zRcy<{%UdJ*ZHG4l*sSy#0*Axx_rv^+EO^BHpH3XNkplVQaGIdj9j8C``G$ZIG%a_YE*(-y2Q!(OOHox&(3sK(5EK#B84+Ow zL`cy`^durAY({F5>;jthA4J5aDj??8RLulCF_(TH#2AIw9!W4;_pfSdGXZ(}G$c~J zu6f-_SRp+JRi%|rl$gf5r+sxygiiyyF4eX%m;negt0@th5W>3ST`h_VQu^tlsxmrL z1%jvEFC{?=$nNO{BF-a^FF|!B)YhR0* z5RoPMhu357`h^F;YtWpjYP7V9imC#r#7uzcp)ptwaGK(>&KuL(z@4ZbX_vH)?2-be z%uY^jwOWNdcg{gH5yM2#7csLSiiqok^7{460N~LP_@G^o2%G@tA{v^mBj>8Ei??3? z(%0|*ux;CHJkEzhSWKqVY1_7lc>BYTfAE7Jgs}m@F!Xx8-b$BU*R3wSAa(0LcyvE} zqq?;F#v5;ZvUqs#$pd`m;=?<4u54d$;|Sf+*>w7+?a8%k*EaOU<5hj_!@CE6{=Im8 zgND9&*f8?R4?foE)V5cP`C*2clKK!fiz4kQg9ZU9%QD7Dnz6wTzW=Sv+l6b_I--%Y z-~NN&`Q8U_0|UA8Y`XmZJKy>6_PgIXx!v|9oXFq%)&JzRpZszS5GT{?&p!Lt=BtnI ze*96^R8=JlY-`{Wo%5yZBjR983X5&=JDWRCwg$uD5D|_08=v6n7;_dD2aqsf& z+qaLlPDi8B&8<gZ5nYWsHJRa|kX1$(;ybk~v=aqL4wK-{K+Vw>w z5$n*w_U;9h0|dPLYc4{Jkv)0hIkB-TSruRv1wDh zQ9WBWbq!UP73!OA>DNnK1t>wmVPj^6If!u3wp&|U*UkAA)X0jsLHTAme9kYbs)|Lv zSS&_R%;)oW#3Q0N$@9E$fyipD>bBL~ilS)S77JIBMQKN|3?Z}*RaNqAsT!hl&P`m$ z%-$r%t|%bS&B_81flhk8UeDSDGB>kPro!rlso(qVM{k3Nfy+Y(3`H;Rai|&4HN5=z z@jO%^gwWM3;*9UDW;6W!<=LuT#j{@JlxBG^2zmkV5D_fLYI)YcTXO&I;p1n!t5>fM zZuuC;*S}x8b{1A;vuqBnwYu4&N<<6`RSh&{=Eih59Bx#-^`Wvqe|YlozPm#xWm)RL z@9gXpm$z@+xN-gUmoHtq^m_GjRdIR7buFtdpe4iqx~I%kiHwMu$vHp-HdKoMpZP;+ zZQ=Oue>jQBREMAjsq{6Sez0avAYzjG(*b~~ze>Oi$N@92=@uYNsfR>TAB1YcE+v11 zIp;j5Fa?_#n523+GBQmL2Sr3<4yK+6AR)yF$%%+#DsL?SYp(YOm7&%7lJ2E6QBI=?btxE^fnlPD?EdF?>{94O7e<}@8%+_ujun1;G*%|?hn7kB*q+kl< zU1saeN5FH)ZAji1x9&p*0P%e8qFsL^DM=ww{2;k*pK2cDNI{iA)dg3Nr+@aZ{^f7}H_Mab*=#m@JR1xK|LXtoUpnWe5cc-= z{ulqN|Mf5Y!Y>R4d!M{>``>=_y(8@f!zMEGSDlu^o2w9e)6m{1PTfdrtMn%9Dqd~kjf@d#Q3OW0}XI##w zb=}2i#yOx9C#nhkn%DauMz=bdF)R-g+ZblIvu>Tj;FY2t~EVu(2_itQH6 z%(C)`xB>wHhY}@@29;+xo~c1!2iZzQx)3uM$DRdid!Cu84NhS)uZ^@3T&EM9nU``*&!hm+HgR}n(HRLXK-UaeNa z(0f-@okq>53?VF{A!6Z}h@>W4LL7FW+M7rS;q~46l~-SGzuj{4d2!Y}QQex) z>$)xnWtQ<0q^@fRLw04?g;lg7qZs2{ocDPzZ(U}iP6gzC$3(u2H7OP9W9-lCAf~BEw*2zH=U!YU!4%$*hfc_CMXZx4F4_)4BGd?i^9= z%|*XhERMZ*&iVF?ncEHkG(=oN!!GOh`x`sM7-LV0>2w;q5bLEZ@bQyr{jK-!RNu3o zsJ`^2FD<_I*Dq~cdVcT>m&{XMEEW+M4V_0K!iWGs9Ega_o0%hKq{QDKKr~P=K;Y!g zBz-D)xU>+^7?@ej)Fjn|rTw>=tcg$S%}d&e$ojskXl5C^6knrsada+*fB?pv;0&VT zJx2>p*G;X6Xow0%=+#s(wO}Mw!=*bb0C0+Tf*@H0KvTy+NG8C;h>$Q^B5Hss5)x^Y^U0KKUAdRg>8ywjNJL`dNu=L!4Hrr6yNU9i0AiAOa{xf( zLX6(CMiB&Jr(K&?35pPtljgiQ&A`}LgPH?GWCbv=D8$@KBqGXvvR(o}!lv08i>~ZY z(=aL+RmmeFVxSnKxl}%voFA}u0wJmbKJ9ivB|=jYiJmZNK15`oW=0y3C~9VgQG)^G zE<=R~M#KbarUE8WVvsD$OxXa80}-WXP)x*>fJFqIm>Gf(qRif`S<5-Zsw#1fUt?yC zVrCsAxof@dAtFQ2r*(!!WdNR>L~EL6V`F0)y20*+H(&hx?RP%@^z6gOckeG&wKU+P zUrZ(+-+eq74D(2i@_se!x~|B}naz(D)23-YsUHtu_shFq!d`V2yX_m#oZP+FR;!U4 z%xClc_wweW_iz8+Z~XsW`TR>?`iZYZg{?Pk{Jp)u`^SIs z2cLd?=gP$^qyDJZ>vcKKX0yJyE!X2q*XDJtzAk#XHd+)FmpwC+Rdse4yMjW=c|BUr zmS@>C&-19R#h5<^W_@t^@y^apU$&OZWt<)ZfJfQg-u=a&|LQZ(Jmasv_$NR7;eY;5 zzj1JI&@3Dg#h8ikkS#CrR|laixyu5%t}p^K$}2Mih((c~w03MQ@Mes$?EAW|s}T&h zs^$I3pkH)>50BAl|=SqQ;NsjAu@M>AjbCzDC&xGc+g(`bxr8AGQlQ zSm7}P*H|HJXu!0zE~AVoi?Qn#6B_oio^=)m%VP4uW1-dOKKHrF$1<>Td7|BuuDG;u zHa`WwK}5~`G|MutfmD~AmHnYVYhxRIk;M?vOH~$tgXJQeW#oFB!^H{b{Q19e{VQMj zTyJ{4uItt9`*|6s+4#wH<`;7=^4jaXnF98FHgX+@E+CN!GsMTjBPW>wckts?;VG6!kHv^K;;4$e6ZSXEUM(9F7~ z7Lg2HJza)>`{;W=IDB~W@Re6yd9wGl>({Q2PB#eEIZs563;+UynK2R*CugCZZyXR| zZ5>S~7a1U&_iz~iIC5yHh9m|6tNjtIWS0Z z*)<(Cnwha*TT-&@oP@XzIpDn<_0CdM>e(NlxrI%RCOzW%2FNuRkk)u5^-M`3C|6?8-nK>=V5iNf>tHN-@n z2Ova36#@_eW|pXC#vX|%o})z}c@+@_(he#OB?^E9BvBFxKG+&(Yo_2qHKySMOo&M1 zXWwo8Bd9^@hFvG4G#aL-z{~(sfHp%!kbvj)>m&#WDIcVaV!%-mQJEA$Q>`7Ls!Ef3 zOq7{J2ms*8hb|a9LvMlk_0W>99G~vDo(XnVpL|`P*7^!R8^r{Qm0RRR9Kp}C?IR#c| zx;86vBUA%YH4!T)L{0WvS@+qlS4^tWz$0Zq5;by+-g`0WLdQz|yf=-jn&QT_-50*} zxzWef>FH@pq36ohbh4OdXI&p*FeooQbLo@G#2CiV?_hQB{zD>a_h$zm+?Ut8JMVvT z=h1`7Wb%W<2g8jaQ|4W9b?fTJa~psCKmH%T_~MK6gR@?*_wdmj0W3MYaN)uWFTVWA zC!gH)acgU9wAz@@=h2GA(yR-p78`&loi>y~hDFy!w;}Jm_uZAJ*T4Rgx8J+-U;q05 z^V4sC=VCSLQMJ9jecDc#xvd~Z`qf|g`~Sng@wYa2H)j#P`LBO(gz(JEpKAbC1OV`_ zKm0SfG5BlG{PMMH*EV)GCX>mBZ~b{}gXV60@xn<-k4`6FxpK2TQ+CYz6({@nYgN|UcR*Y;uoLm-?(}L@K65n zZ-4vS-(Jk}7+uwQRn2TZs>&Bf((hAQVT=n>WJih)5ecpB+DBQwTrLl$uz+GCv^kJ)wSIkVV6tzq% zR6UzL*xcMK`kAg`e+j+Tu5Gn>4({rus;V*^J$m#=KbWF-GmI1KM!j8C#dhTZgaQ!_ zvI-IBZD(f7Mwz)+dR0EDC)g`Chpt(4tHtE;!@IMOKNbVh+0T03*q1G~`(~77qe2fBlb5c) zW)j~!`LxTmnKwpxTh~=p75%bl8s{7k4HRurchRz-ihE-gsq&frizYv`42$!{N})R)d~xZ*QNRoJgmkv8FeD$Veq;a@;#z|UvVpb8;Wn{hZCK2tBf_gf2^ z&1T1LT-Wt7L7m26` z7Gv}jO6P*(s$VWn;o-YShl@XWy!HKW|CN99OTY9>pVPla1#N~M=_F0F9x|llITW%; z0EUU>w4N%`9>ktjTcl(wl9@R|Zwkr&sVeJhA^-vsQ96=M1l1VXM7{SSLWqu85{QXh zx}&9o(IJL47=u++*i^-zpG=6Ts84?&0*)a9s&ftnB#P0xz)USwg(X}xtq(voViIkM zlD>|Hq-NySs4GNtj4^hIOhjUlm{RDHHDzJMJ9?^h1EjQXszY-MmMZT7z&k?3R@6D? z2tq=VT~+p49q zH&spLtDr_q5uc_o1exJ8%+{@ ztqHc8;}~QUyGuft3&$){i`bDr0hWm?0U0K~Q04 zI){>*L?o}OVGZjLM3e~81b`I5Pz@|4N(fj~6OkpA42dMiOsfg2foKANAOuk~%hv4x z2ndFTz^VwSkeOQD21HJY8$kmzPKG`cG-a+-*-*rQOb|effLJ0jJAhzj&SfD4Fbtg# zIwc>vS_K3&0deu^+pS^YU4}SCE!-r>a@smIMlfU>=zhY+3ym0O8^z6}e|Mbo&P3v(r zIPBu{m# z_xAp`zV)rE{mn1F@e{kZZhm?^JMB*X?9cw}d%yAj-MzMRVe7?Sua^y~$?UKnm)D-X z(%*aWa?&qj=ps`- znbQi6mu;`tlVN!Mr(XLXy*5(SC$r)2{oe2W=|_*cj&-J)%On`H-{~)RcFL_BM5NPP z!8>jdw9Js1XoV5{{GhOD>sSVm1vKZFa*$PcZu`<0;L{pL!*R1{my=}{$1xCa=gC7C z5HSu|P*p2*7FTr$0nqu<$7YDguB^MRYg^8;%=>(?Snc+Du2;0}@vWPq7cW&Jax|%aT2-;=u6t!qS zS5k zu4^%~wzk|48far1dq#xdvW%I{viYnja>*)XEMknyrXGz(ekqNHGcuRE9qPQtTn916 zI<{u!a}2?bOtLJiDk0Lia?(gBir7Z=rq0E#%d)KUt31y&h{X`h2gS^oj5-lCuntuX zJq>!p!Px4$uF9h8x(u?COKWQ~U9b*E)y|a~KrS8h7Ny)@x)t$9EHSo02m!LWSJh)5 zd%d2(DnlrH$Q#*3=Bd!6Gu5#0BId$`opXc>5m~tm03b(Yb4#kvR+2|o^4=_;J>0+N z-v0WFmmXeud1GVaQoPuTYjKJWSyJ1Yff?$$oAbwy4*)O(JD2Ux4Wj@64lELpL?dQR zlnWz7G}LI39K{$tH~<6`=hzJVdFyH7Ey8D&VF?!ndCD@V@zd5>i%1CMl3E~9l!@6T zc{v1C1Y$r!R2SNy9+=NFk#-)qOjV|c2$@03&>8`DA*8ch5x^kEL<0-XxfH`mIa;B} zoPJnQ8=F2V2aL5!s#B`l*P5WgBEx2-f}&M=8!q24obG$N`E(HLn6`kpYBHR~r!+3@El~28=N>XCk7Sro(`U5+Wj+ zS0jWFlhR{mpwY}Yb8YAd5CB7Lkx)U2Ne#$5?4pIvT!zT1OA(=BJ-vk(nF!26T6mEn zB1Hg2GjT{w2#8qvylq?JT#cKflj+09$I~ZOmR))MMeqIa!fw}fH~hxu>o4M#5USFc|E(pv|A@+W_CcJ=I|`}e-~<`+Nu=%Y&E#mmoZY;P^<)vaIr%YXH+epzz+ z^+I6pe^($At^2%~qXG9-=_{sezpH3!|H(vgH(=^8i`}2D7ev9VF~d%O|pxBshu@{K?E&9f&@iZ8!>pUAzuI`NmDH>cwL5!+ZGA zQPs^j1ecR&Y;%`6hc|YM(THN*f`bktXIzLgvonG`b5LewS$@^Er%b-1>2!YTHvs_b zbaMkQJ%4o)<^+_v?8#vxniU?(aqkLMi1+{?#yIa|h>@CXxmscf1rg<*h+Jt|=0gm1 z-TGXBq~f7-P857rtTtfp)zPi%uT7`Z-sA4Wk3QVo=vy&3S{XW=pLH=}M$3L#GE}jP zz0pPp;q2_VC<-$#BI7&q0h~~|#ZF{}!($>y0Ji#Yz*@;^=PmW-= zI2cqio6Ta^6h&dGrdlciz_MFxY;0u3@Z{u#$mQf9v}IMfUfDEFTt-&vL{N2egxi&U zvH|nH=g1E~eKz&RyOsC89QFX!GIHD6+tI}s#hjS|H5%fgu|iM=7@Kc+7ac&*#cFvtkN=b<^RrQvsX^23{{A}nby|TOknyTY0N|!MJ zW~#ylpvZt^AfU|5gd)Qf6v^O);rc8ZiOvA(ds|IjaUB#>g&{ zsDKKHijW{9%!~>aHLa_Z!DwB!w~mvE5n+I+s^ADShqY=j1zXmuEOTbEQrNnD)%S|c z&CROFMMSYVJb6rg9gRjeuYTd(w|?u7_uu}{{`${IlhN%BSso31zuwUj$2Z@2p2RpZhB;Wz)$x4!i)IJv#`>W#nm@Biv^TbDrKFTVR1y|Vh^7v8*nWg`Lz zz@PrO@+m~MYd~f4|pEU3N=zqG@#3+Um=VSny)OWkgW<+FCl0HS^;9)uiLo1c6p?#J5W?(n zFYfH@$R6~b-TNC4?bAq8rqLGn5o-dE*%UQ0^KmYtsZSNi) z9-iFauj@!MgwTO`ge*YonN7rdpLvQgN@E^!4Qgb?=1$wTfO8<(D&W@Ti){vtw`{vN zu6L}LA5IV2vbT`vJ0-DYX|1$dxgU&2i^Ym!)TWCHF5mDDM@2WCPC2kzh&e#!F80dR zNysC3!B1NUt^D~v`Q5X#vtqV1u)ZH5G>GWEAM|@BdUkfw>-DTBl*1N|y!TJ)E`)Gl z5Y5MY<+r!DIo}5-xhIW~k2YsrM@!Y%-Kkeyrz2M9p{LekIMs-VdC_0hVWnq&kioo; zG5QiRh6UJC;#fOE%^1vdXJcn`I1EqV?){T*W#zQX`t##9#-I8tfB(^=M}PW_Z)TV; z+Lnv{G?0)Vph4GINqOPBpd#u+B%;n^+pLP-NI|V_p@H$BpXInQpI__m?OpBPxN#$l zH$bdtkM++!EHDEu7Sl!W&N&}21P#5$13+E2#?TX1)m6-*5$E$_RBRpFs%dd- z!Z_*x5!&1#;!2^H4M|*B34{g!d>N}D`iYMci7aPWq2`{D2MTq*wcPB(@NBeNt%^C# z>e)v{gNVvS=(GU9ww)hQDCq7p&GuIRrPdc&7A9^wn@%p7Wx|3I)Y{m#ZOqYoZ@X4j z<#0f>Axhryul^cT&)eOzRP!-ZLB(BNSqECzIKZA^Z_Mrgllg~65vtg zJmN{7SVY#oe#CT*?@%?>m>~rN1n}!Keyuz^=MYUKU$Ozr%L&CD5fMopq3RlcnS>T*j$oqd)9u+(6cf{4DTKHNXNe~1#|*?20ZAp* z-ozLjx)2oDn0;c6n5yHnO;STP1D8fCQ#4a#G4r~HR*6VdbeGyp_ zox)HP8ZL^eA`<}kq$HuKX(Dj7QM~s6pr%YRMMZk!egG5_5-Iy8@ zIYS^KkCN6QmMYN@ECK=vnu58c3?U#T5E-F%h-jR*F*^2Uh7i3&F+f64H87^!)D(i5 zd3GjiyG@z^1=$Dzi8Jr)DZ-x8_?Q@XnfL3b6%h{cZJw#E zh&wF@mDFaF}YUKfkSCyyQ;9v%{5Rh7Nj_)mZBH?$1%`TTP) zzxXS^@++^s@&*FDckkh&$^Pqaym9r)MtVJb>l@$vZ~y5(HpGjUFYoPL0)WSlKe=<~ z-M;hBKKFbWjeqciA1qe~v-y+$jePI==Fz)%zO;Ac=l{J|nx+Zg{C2Kl`&E-Me@1{;U*{toT&0cJJ&Mn`&a(3y`r7O=2#{G>;e^J$S zJuhU?&xb{JcCaYRGK4TYoO8gXOokUqxYFC7%w1L%=yGir)00&>^aEcks~BUymz(Ki zIrH8ZuD9w|g?Ao#+RmBTyDHD~j3H=!cQ!jZIk`;ZHnvt`0ez250rjFzi&wP@9>Mpd!3!;hXcWj#RgN1slJXtfk+bqSVbS*~ra z?NMF~27}2W#u$CO1Pkq`7;JBho6Y_Gec*;wTU%{yZ;q;42L}h*oo$Wq-oq6-$gEt< z>)aQPTs~`afS$7g5FIie5R004Q4 zG3JtwMx%vxsygb8V~k_R#3Z}n@(meojIKO;`SHEx@bLH)XD5#yX%XF!f>=~2ec3dP z0pygKm#*u^TxC9cG@n*gl?#NnX%I?Zj>=l<)|Og2UnCeoTX^ruLl!2WeS$WUIp?Y> zbI#>zBsvkEc5pl=^*o}pVk5IU%gQYD0GIaSnLEo6+Cg2{cc}pYV~P^F=}8BNPnysy z{fOL#d){Y90QHir>5l5-JkL23u6&-eh*ZOk!C+9#OHnqxzcGdvzWP_5+xTMtXiHk? ztWy@U+|0~`2*9$nDIeGJIi`|LUH3htHON!VBYij{aL$3PVa%$UVQyxK=A2uXs&}1p zY-TCv`Sgn!6+uA>5mSc+nWze#3m$OJ8zT?{OYvGu{7^`zI1r_o!905r0Ta&@gA!5B z4gdsF&;!iOamqtNqFXYvX=EY*5)toF)l8kK0Y^k6a%iMti0Q&Fsk%&)suLswvXsam zU}mSLrj(?0@&M~1b%j)_ZUEpsib#r{rqWXoKm(<9gi=HR5`bjA!7@v9vUA27tUExR za}uM7F!|64dq+s337n!&w^nH_g%QF{(5iyV$K(|Qx6zJj0VoR4w0F2i(=X;nW0NWJBbL8s?;#; zpVYu8#`t4@ROZ}ge;{V&&}q=Z6Jw*FnrjDS=$Rj)KE&bi!qQ)`-) znSjR0>G6m6-f!DhrrmHjj8f0%^D7r_?Ck9IE^JY+$S+oJ{^h?s`-AB>{;&Uyy*j>j z?V1abvi#)nV_B`>zz*KI^Z5D4OId#Z`|rgV|LWiTkDk5tN^j8r=%bHJ;KnmILvvPD z)pEH+@6VcfKJGpD`b(|Tvg{28qm#vKbYVM8m+k3f5oFc|@X;ZlhyHj2?k&FY`@gHN z-zb~z+uwZa{{8#6AAP#LxA(%}mUC`8h~Cqk`ee8{{Ga^K{^vjcvp?qmKK$_Er?)@6 zdGqG;x2^-g65!FvI*&s`kPGyLhVzxu`-Z*0GCYZ;$Sst{y#k(sg%KwTuv(cFJy= z=Mti1+((QtZfw%67j_}9-uuz}@7+C}PSrPn(ZqL~o0}K1^62Pj(yhvzTJj=N5;F6} zs_XUm*yZ<+50>3Pxp%)1k?zuQPa8qrfBo|1?+oTK*4)A6&5OMPCnt}6|Kg&x$zXGL{3n=xyVQQj}IDfF;6rR?}{ zzL6U`b1gDw+;^@3aIRA=F~e5pC!MM`mLnFG%~8W@yJ)H^!(J3DT-Rx5#uyBJ=F18L zWQE^oXCZ{9UzOVzVFe+Cy{&w^$UYSpB9KOD>uM^4LDkmgy$`x5a_X}PbhLA^Zrg4y zfVrmzr1t%7X70`!>Wm;tU5COhUcJ}_cW`h}3@Y%NwFIpmg%G;Vy3vQ(xbrh}d&wRduJhTrPn+A^>SYmcF7M6+|94nq?E}mYLf@-Rt%CdYP*M z#{ArKmx^8$3gb-bZn`ttYpdcR1q61O4JvoKYGTyffk`Z~eAMf$%#LSE7dr2K>mB8p zhZf91=D5svAY8oao>?W@dJbvr=?sInYIHM5}6+QI()m2FOL4L5nj zgYh5_ZAWb@AfG@nA{4;-x3p zb={-AEQB_4PRu!4HRy}L@!`p}cCSbNgUyp)`lY}6SO5Bd)`#7m!Lo+7Y6K@(JLgP-;*`Fro|q^5i*Jq4mzg z$Ylm*Vu+Y%lB8>!ohcY9G25E$VF-YbfdOb#)vU)VHk3{mX05G$y zF$vK*r|bL!s46=!Gf-l58l#yZrmRYmjW{7s)3zl>Nv$90c8*FQma4o|fQW%q&jFo) zN(>Sr5fP^i83BR=Q;kR&fUV<{AVgs2y|Ms+$mu_6caf$P1XNIDXA;3Ib6#YP6axVQ zQUnq68DeUtHN%Wll>`{DOT92425X(h0mc~ZvtSO88UY4V=bST?D5Iq#RoXPOs9`7_4}{6k~Estg$lzhwO}%odP0s2r5nV-h*R9LX{8#WTgR_ z)QAX>G&W2^T~fB-9ATG+>2o%#0YH(JT@%!hkxj)ppLzfjn9dB?J5?1CN92J5GBYij zsanjLj?PZ~(Hvt$!+w-J9}FwEy?6D(?rzgXUDhYgS2uQl>8Jn3$;VHAZ{MZ`r=H3dvMe~IlHjCd+GA!Pw&m&yL~%n zdh^XUPmhlmv$JJAd-wgfU$3^OljFFW?_R&UTDBA;tJf9)>I}t|HeHmjzH{xF+s}XPbJ@$=j~*W$oSZi1!TYRTLe0gbzW>oj`-LA4hdB-ERV=tQ3}-Rs zOw&%i&tJOgFAd@7w0^WdUCnC{%v;RCTM?^aex=uMCkMyqn7Ky*8T*-TZjQG$s;=uk z`T(DN+D&p?iXKOEF2kq;!JMR*VK(f^VmfAc`TEYPn@^Xgm-^RF&rVORt({(fey@Ld zI60ca?zpQ@qL0x)k@^hSEc30Mx~{QXF-7jk%(^`54K{3b9NK!E_v%?))u+#1+W7wQ z;H?L(C*jB4$+at(4hijl>qv=^Z%(wGrw)ZykUa05&0=Cc^34JX3kejQ)Iwscx`b0dE zE*CBY8r=`eGZvxRrQaogm5tfxp&dKW@~P!G%T;NsxP{K=&o zJAT9WERU<^-r00`dTgrcf9nkajp2^!NO9 z&traN*AIvNQ$})6?(U;OS$Q9^x3iiT?F)+w$FccHfxyT)RkcNzXIXt>$IbM;3%s?p zb>o>D(do(dyqiCEE9YD}8qel)Y@n{&wpcznc@nI#g0Ur zmWW1@VwniQ3}1UvM~kAELc6J@wn@-1O+*DEu?PU1 zvBB6$4_f5QU zL`20r2sjech?W*T3JAvDj35{wA_O85H8WGBs77f}lYod30UWEbM01qKP9h0AmJk8J z?A%AJx>jNk)D%)13q&HaXd-g%1xfoOLnM<95rx=PlR82SBBBO}7?K+%jo4{4R#dg9 zf`}k3GmCPF(fKqq6CkI8I$|^gWM`CK9vU>788|?1>31OujxCs|Di|Z6p&^qRr~x?& z5|cl}AUfxgPUD=b>l#S_qdMX;2UK<#m1F!2v1)7W(7@C&2|!dKPtLixXf$@pP>uRq zm#;JPa5yZ>QnYTGX7}0+B5J3TZn2tXSeE6*>(_qf7yicm!%y#g>$de<0O*Ez;f1T8 zJ9_rHV#g-+s4Tzq#h zZ~wc0w^%H``qMujLqoj*_lI82h{|HnD#z!Cx}&E*dhd@PK71%O;)RX<)5Vh~Pd+($ z)JhARy=TAl`u=os$TKrLIy(9f{{FA-?d^^Fv@BisHzVxLp4WGGb&Mg5xJvjL< z|M&m9`yYS2y?x=rg$t0E$Hzzh(I;Pj`Q;a%d+p-Ii$A=5dp72N-T(OGkHaEBFsgxS z`1Z#izVqn8JCE-lA0L0Yya)h&pNG2X+v3*6VRo|W4v(hWz(a4^TQ0j6Y9R1DyL$QR zOD|k6^XlU}r}y@cryYv$GZK2e-h~106`E%@9u@h1@GWeWI2;a#W3H-fyz%1x{{GSI z@k-otUAGFM^J5}1J&989^anlSGhQtU9S7JOk3wwP&Ya7Z%X+@bmdkK*a+c@i$;bCX z2*&|pP6J135Pgwl*;z9Mgnn6ebe;9%hxtY;UU$K~5Eg4%$28 z@p$vaTRp_nj~-2rkEeSvr@XrOEN||H1`9vf9YLnmGjPc{=N|4~ym+zplgVWAkOwEz znYI%$YdreQU(UAy;P_~?)4mVJS&I24a{M&I4ekf&Y{hFrQaOc&al6k zWqAlI8aJ*Ar}HN#C%o#G^CNQOc2>`(5<)oX8fKRl7DDK`p>uB42%<`=X+-VydP}nD ze9~t}45K1%XRvd{U%R#&D;N%kJ1;#)MEAbCxP91c6-O--^3vy2_6EJuXQ!QG8(gx@ zP1i3jF9W{wxbDyR8fb`NdpwxUW|+ZnG=QS4Hu~-n^_KC0d68~~xe*Ye0F+H(tsXjU zr509zwf1l6s*lnGw+mpPt6E{PJ!w*o%^P9abI~*JPlz0}`2-9RTxs zlai`15`7_ozt*?m1m%ErKNkSFR5+is1Vlx|7&~S*G(fb(Ht?vruB<&bhNx;<;;bAOKNFVl}F&0wmt|daZzjvc7lfx;EC#7>EdB;zUO<0764B6C^}TMOg>{(b9I@ zL;|Qe=a5lENFXitA$8tz!e$7h9+L#h1ObFDjSfZzmP*jTQG#8^E~M^{MBg$?nOZUfR3jo#;RN9V;{@kMR#nwVM1qh?V^lTOyv4PvfFug0 z)Gm^!P{NdI=bRHcH%p3`Vv4*nlY$^2gQpwKE7O1{ibN4DAwX3X9g_QuB?K0W(W#|gif(nlt-b9O+B3*PNrv_ zcX{rkvyzDdBu6*-HKl*6@VaVk0!$*tbv&`zf@#xz4 zN;qjh+TZ`=lTYq-4I-Xge|+=i%{Tm3RaLGWRxrG}tRFvqeAIx5yxYxAnt9d&0BFG% zZ(Q3O4|TP8fB*EOqbXAl0j>`9x#xB#J&TS)Z1=8Ra2PlGarfE|BG!YAd5rH*4nDg7 z@$E&JqYTbMp7*noMP$&_uU>!l+LzyW_uY3NF6*V?%^vKIv$NA_kyoa)Tr{x@q3zmL zV^KFoTleqZUrgFwueT@|u~+0f%jFW<2@y48^i?^~LDzLuhqkVlMJUQ*z}a9h7;O|G zgpuP?ksVI9h2xN5_K#nx^Sr+^r={!%4Q8_m!5* zrKjHR_SH`h9}>}(EiMZtHUk)r@=KSt+uH}%E?#*e*`xXV=9Onu^<5aOG-kHUI6Ffu zio65r3^Qp&WEq8#s8>uTldN+i9zb~9EvHv5etmmmtCPiWqu{T-ux0iizWLXhrWw8c z{ujUa#mC?N^pF1NkND9OKU*vYtfqOSs$aG|IypHhq;*6`vECSL+#J6&o6S~-kE*KD zFf06!2g4c(o%DwJauK^0Ig+fRH(}$L8+suNWky(SESJl+U5H4=tDN)UGnd;KS4Y#( zcHQV~JQ$bx=5RO+`$utwlv~91=y>&`cklFK%gozL`P?h7e6(-#McblVhR#`(wsnI+ zmY0sRuIrYIV*uz?uJBmGW|n1>dDp6D{CM~3)j?^tnLq#a3l}eL6y4Pj!tq;&2L}fq zojx$LiPI9`*!5eNV+wQ&J4el{)q zSUA?D+An4n35f^Bj0v@6W~rmxTJ_$y*80rH&H#X17P~GZR#8J>UIz%*q64PdGX&98 zzLlI1>5Os2F~(#n0gX(gN+Qbm8KnTvkyCokvpVlxqx6oSx*1mLgSMM{^vjP|nFaU-mC>tbA z$_kPS(zawNtZ>KgXft+(mDtGc?@tlXD( zf7W~NIsS0p>;d~zS9NDr=96!|_uTXU|9^i5wMaaKQR8O`3t>V81tQ`Ekwt|W&3W<6 zngEGyrL_)?Az>>J2Au$QU64izJU9yQaYm(-1tvwIQA7~fizWdP4TOSL7I_vC0r8qb ze>-ae02D&c9ywIyc_2YCh^z!z5S1XJ^`MlpT#+IW<-H3D8%2y5EEbBv35r4EoFfH9 z7y$FZV2cYv1|uSB6k+lLARYoNe}R$n-W#QybKZ;A+JjYwD9A>Phzq1WC=wPWC@h|> z)*1u-7J-!V4n-FBQXydZf`GnQhQJqqbJEPLkV%0nol**c*;`PCh@2Izb*#{Pr_lt- z4-tjXIcIBX)AQHt|8}b4SXgn*1BAwSFV=1x%M8W$TaB~0vq6HO^Ab?;D zshy9{4o^={$CG4tcb8SI*Xvzq)rZ4jq4e6?8hFnlf^2<74y#!)Zkl?hb@PSi{%3#o z*R3CZ{PD-fhbKvrv^-qyEhWw7csjIFPxG>KX{~o@wmWM&O4Wu*0wgcvZ^{Bovn{1olXZjy&To=zW>hd?rvVe-rk;_o@y?ZI;*{I@5b`x zjg1>0efYa8R(Wb8;^croI@yoj7+xA&x{R@V69OfDjSlN;@!a}RG z91T|Qq-m1PpL&J4x4IEqvLcz|8F>#PMQ+#D1}3AdN+q`~Q!8!S=rp(1S5Hq*Yf%E; z6`E{p%ro$XKZ_=8$u?_wk*_sXRh3mvhSWSit=H>W)5L=o=6W)jqP>@bewPD`h0xzW1VP>qkCeB5rgpIhr59u|({&nzdT*u$)!e zHkCwT^RaDRsK>R)`pVc^>>Hh`$kRfvu3BreS>9?i>Y||HEOCuS!{<^tpOiwRa}Yr& z9TtU`(zcWO(2cx<)wQM3Xw=BYJ-zC|qL!0~tTP$wN2{x=7k>4RQLmT%oxQWCw(7Dc zPnj|*n)+SZ|Ya>}D%RifTLS z(0MaEG9Us5Ih(Ww0I`k`!@2?_7*2gaLYyZvWf54A@EHIA19OU*ycbYPSeU)mT7u=5`cuFcxDcp3GWu*G9m&WkSdxv zc%9EzBP3oNGKv?BLdgy;vLOH-JSr4NfPhN8fI=oh@6kCvw^ld+K+zz;RsaBlK}IYQ zdGJ~-*mTZ=b508pCbvcoESAh~lgO02mMe9iG3|h^V=$%4(WtTI+0*NhN5jo~n12H=E67ucCDKVvE`Ia1<>CDC$B!qFH@ltJpMUvRf8m$z-rG4TxUsVCW?8e_ z`Q)>EkDoqXT3YHRt$x4%>}+pxI`S@UH|m!+uI}&er$u3z?|<@fF`L$rg>8&RO7Es8 zR7_rd=}t%24n`;4sB?OF{Pf}DvY0vrRH&8yYG-xL8ReY&+Sk7JtH1JVS1wJHA{HO1Bn~iIiZZh-mbZ>rqdTX$B_3G6JKKbySCy(p7 z_kQ~9bbNGV=IQ2OWpnf5>gww1rQZ6jt(tuO1OA}O{A4m&RYRYb|LP|PH-?&?^R0es zbJm{E=XP23qP4HSH+iP!Z&l>;w0Y59yD~7;INTc!N7Zy@x4f+MS|*B4PKViuy!Ut2 z?D$ZbR}Q}Z{NS7J(#Ja=yFGM!z)2ZfdJP!KyeaIwI8IRDqgFKh@a^f@WPet5uw7)+ z)n2Dz%xQMgXi?mVvns=?XXo~Tb;;3jdOF%yDbFd6>RD>Dv~&rWm})-u-nUydJ0H&} z)`KKIo;g>nEnn`mJ9nr1-IeZUf9?ML`>up4=V?|UD&WXgzG*AA6?>%>5Lgf}97ztX z<}v`dd9DiAZPe3)k-5}gUtcXgdB$m>4QK_vEW6Eyni@rTB9jmH?lwD(o}9gFcj}$` z;k!S1_~hZ<>}(>_W_K$|lKiA7)0}Z8Y$p`=wpW^Y`{3C@TFbIg)?ZGS?hFR)Ywx}H z-Y0J#wcdCG6G%sM{82G=^S}28f18L7Nxl8UkK!`x^?GPpDvoLrX`?GH^Jy_9+iJA{ zwXV@dtyZ3%o^jUi_m>76d7i^kKk9W$&sk-`Pu5qqdnZM-R9o$JhQnbms`r}RBb|Bg zyVN~BJ+<0J&B(bLB6ef4l^x1)9LLR8vnYxSosCA^c(8k*wXSz4S!*QRq9TTQp>jJP zsMUVwlgHJg-~I8~&LkV?nzQ07m$afNlGvs5vC-XHyN>gM8CEuKr)fH#XK`H9cBGVA z>DHgWbFDo3u+eQB-+cPnp*VoOjATwb3HpLrh3w<)w0x+#**f7951hJ9G{IG9=H0@H?Is%Pfy=F(G~DaF)M^p zJhR00a;9hcq#JkZn74G*j4X5M^Nz07J!PFi8!xs_AQyqj1l}BfxVE;o(Ok&@YCFEF zs!?sf(P*^dKE{@+x~m*3KIc4rVW0omknwLwDvK+-X8}-%T8DR4^46Ve?VdErxtoER zwblsY*$XHUI4{H!VHQ!a$TR*7mLQ8B7C~kO5GqQ;Mj`MqQ5Ikit(9YD@58AW_@RrB z1y;1cnX51OPQoIAzJAVoAPO!4!Ntm+f94t?YtIQH3`lx`gRp?0M57?kB1N*;u^{3X zU7(6cGXg*bmWady$|4S=5tvy#YGtf=A=E&^Y(V?UdqN6VRz)h@%f%xAGLjGw1!NgB z8*P}`gOJghL%h?IuqYiA2N78u#-3^MOJW8TAz_8!g=7IAQ-C%kN}cmefih}@gMvY* z#IvVx0D~ii0qy00WdS(_5&{4(9uc_;W$xsi0P+{1Sz`<{FDReqmRn&4aGF4C?Y$2) zLuAD)q_k&Gh=?Re2tb|*$%$2p48>t!BBGGs&`LY!2uUd=o<$^hFckt4@;RxWeW<=u z)`Br+0R~aVTI(!}Kw!NsUjIrcgrnFIh2jfM}Nl=Jy=x8HAf3x#+0o)jm?*RNl5cj@Q;Vv;9L~xkk56ghbHmbyja~LpK&|*BWYcb~ZXZ?4n!lbZQs+ zwOY+~>${_q@4WWazxp5l)$Y|>qvQM!fAV{_oL_(Kg`B_vy!+v2f9pU0Pw(Hqzf$jB zzkYqNvOJs3TGNTIO1IXjUD!~=s+vwB$4{O-osRQ)kzcCUE^e&1F5VifulL%SCMwpN zYqb}a(yh-v`)qvj?!1__=o2M3>Y z+m9xr{YmwMQOajjFKb0IHNB!P&68=#uI8Q8ie_239~Wn3`J}&1c`xr#uWsuvMw8dS z^7>}IvA4H(d{j)Q(=$$^D2h+v*|P^))$6t{F^Gjrt9|40$;=N1tGUPAN-N6occ(u`+d zd!UmT|4#Y&Orb86wsU>$djee08vr*6{wp%isV0_g~xYD5c)}V1I9K@A&8hKvQ1m1d@(KQRFh^3a7J~ipsQGZ>@KF zW#@43u<%o3jNUS})jEwqIjl9JS|d6hx8M2kUWL_sKCh{~sEW1LmIz?gh-wX+j_A#Z$95@HXBJ^L&RNo zQg79yh^<#+9)I&oFa5%ut+(GEPR7mQY&|P$7B1jtCb-aHZILZ7A$!h!#vt01;A7KmnigG(Cse z#WOH-*qtauz{Rf0yWmdHTC;V5&;hNT)Ix$J zp+Tft_<#@)nUR@MlQ1v1?;KndBqE{xH2_@%w6l!3ppg=3=PUr2&@lH$#y;yk5orML zyqvpL1cE$+%uZ`9fvF!l2H7EkgvUT2B9&OB6oh^tTnvKVd#`k`Y>0#r3jvu$Gy-_% zIe_-9A`tO_0H|360_GLYn_&b%yf7=&1d7o!bHMp)6+0_2dZn-gVPRy22m%%1Vh7_< zn7sm2N=ZPNY7$&j7cERo))`U&;GAR2v5qS)tgn5DltlWEtR zrrvF*Y1(Wi^?Ecd97QH~MeYhWpIFAYR%d2r4pL`FIEN zypdcH*nhVB*5M;-t-bsC@aU{q>b1I!xI387=X-ZQozLehm$ps2>9bNLbolAP+VbkF z&pq$Ghkn1kw&wS6t=m6zC(PWcb&WALY5wrTPv^N=T3JdzIFQn}Hlph{w}(fyR;%^? z(W9-ct^er%_#a<=?xq*`;oEOU7GAu1tBe%@JbCowZ~u3{@$rw}+}hf@aryRAe`)HA z`@4^q?JTAWdX2qo{GckbhU|TMG#?et&YLwoSl(Q{aJj#-5-D|DvKyx;Ab_?{YcE~A zbkzFnv(NJ3N4;8Owv#8Et4n3_x{SyfBW0t{@ssv zKKk9A$zg62ctxPD^b;RPLURTLWn{M6Gi&QBo~^KrBQ?onKDXcvc*F|kXLD^JQbrQ7 z%GoI^0cDFwBlXmLK9@|$5kr~J=GzxHqbSp`` z-@o$g=;-psq4G<-M2FYGZqg`pMzp z;gvY9U+j82C!(uwyy%=;Sy5fwe(v@_MAjb9zw@2%{Oh}K*6a1SRx3+NtTdaAOW4p{ zsYt3$oJ(378`T=jJnyup)9KJ6ST5KXwgRYPKp52Lo?$jMimc6vIQQmon4R(2^BZet z7j!x7Hm}_F-qVL?dFDDoMNya^6wyHJhWICCni`yMB zMdqB25@o1nD@0T(Zq<^!)RC*5fQ>qJ-8TTx&HFy()5+Q4@lgeE<<{25g-!F}2km-X z%vD@vhFP>mBS0+Zy(3#e5f{>yLEfmUYUZTb>@=N=B3=AHAN=6>M?Y-+VZYxWAD^`ltFfc7=B+sF$yA}gt|=-?*Y*w3Ici+juaxT zP`vlxRDf4_W^^JV!5HEw;75^J5XD*xfB-~R!XYFA8Z-i0afrx^a11M0@S(yG=0H%9 z0c2(uhO%&-4t|t?o<#p&qXK?iamprTB(XO5?Kd`sFVZngb^a5p!WM#aiourrVsaalC$Laa2!R^_JxhfWCBKY zyImc{^ZC50@+3*Due>+~F*747VXiH8moHt2p6p?YH8=BcFxkBD-bbH4diw+C+*;$g z3oC1fS-#op&rWxbkB=Le^akyX=bj&pMkfy+U%!5RDt`O<=XzJSe(-}IEPFSaPD%|? zuT>nJ0;rfkqZ}We-23!1=&WKhkuoP&xz@@Ebh4D3P0yC=?f>8}{gunlzmNfZ^42@( z=U@KzclY=A5#TT{{>K09Kl}KnKV9!{+_-qN)n864clgem)p%^VXfU)#Z)5&Mb8-^K1hiCKdGIF=Ih0$zQ z9#m|Q^Qb=c`0?jQy|wFutM2QoH^1}MZ$A9^-hcb|et&;|zfsjMUbyhx63en|CG~0L zKO#pu(#Y+0dwXMTHadeOO1ka6ld7yFApS@I90;)hR-T%*deU&EOQTv}$MJkVcg{tT zL9Kl1`nBG%oq~mCv+10p0#=g#puO~Y@nEA@-(Kx*Tq*9{x%23B)T`6=TUU2>cJ4Vo z&9hXpM!Q*@a%EWyBC2XMTiV#*q%j(e^my1IIyo(<2k|S7Z~WUo|JBvkfApgtO^;1n z(Fqidc7rfxV?>!Eu7S}u&o|R~Q53J%tw^=ekw??M|;K?i0WCECC%l@ zWRmVpilTUSbXZjttw-Z=R?$kg+x>7j2bhnlvs$B_H2Sm5>rwRJ$B%|?_wFCuubMy1 z^SqJ#9ud{CX4h@!VKX7)luME%v(`D6Buca^obL5{hg{80hugKhaRF%5Fg4Rz@1Nz8 zxXQY!S#NV*_(|T95JgK-M|BR44^Q4Z&|1Ge`S`WYYZdkH-MjZJmlH(vU=SKA8GEY5RcwmN zN63gZ>s!i9n%3Ha98`9SW;V+Xew3}PnRYYp^?KcmilQoKCuhUqFaFx{SAOA=ea!1y zmt|5kwK|!^S(Y6sC5Sz7kt@YVWe(P4sv%8oj%EcXX;O>3Nr#!8M>qAU8`(*g)QZbj z);GL&k00+GjYp$Vk?)`Fq@&gMSNfa`rfm(YUMtpuA_yoHnlD3uAlxtU3$%IAKE$Dj zgzE4>FVz(I0SZxv+EO4UA_(_j76s_RE3K5E_g+XXioQfVFak=*s&KG&f&yae6cK22 z&KXq9>9<4K4~L_=P1lP%ePHup$H!VF!qS z#<47l=|KPiIpis{QpkYJEX2%e zLMP5UB+brKSce1)27&jiMF9}IiU0`(yqCZXWrr-mF3e%R7Vy>q5n_0efVfI)ErgD} z76j`n_MjDkLPc6D5zec5p667CPL@`^(EthtWK>F7TRwaC6i%lc6%B}2d#inj64N;I zcF^qWB=+f?P`s;185T6a1niK=s9vqz)HT~~UVrsvRU9Nqalbug)-@g6z zFMsvy?Cjw5$>F1iMK&7@2CCJ(diARF{^PgazPxdv+isuCPp+?ApFZ2a`;)gXzwzZ7 zkU~>U(*2{|Uawa&c!sm5&!(eERrCyN&Ly~v)}y9-+bp^{_~3$FJ4%^y1crXrt`a>etPNTWHm|VE89#o zbCPA*!P(AyJ}QaBBy|THvxpw&mYaMHt&r`PEm3CB?%u1)$TDun=H&>d^>*rdn z)?44t9zTA(vf3*1^38vG?_fJCe|fYy*!+d6XRSTC*}QrA%NKrp^!ewXUmoLdIQ%gs z<#7IEQq5*FX_Ptn-mtDjBmRMnRUr6NxyY@_0phm zdU`5H>CoF}IibL;>)YF{x~fek6QBrHoMwDB%or0!h%Co(oU}l!BKNK z99ERaaco;zJv+?=6LWGpM6VmY2A|EGxZ29*yl@OdZ6vDeJkMWlUx`Ne z-wz(V{tK5M{a^m;-QB&bOLuC!Jip$YINy{4y=p^V19xjiN+n-;M2+1lFb zUE3ZE2FA4FIR5ab@7}p{huTq56nT|bRRu|OcydzqG!Z>i!(vIe-pjl7YRF)fA=Pa( z>a_?Fqugn&*QOVk8KS1vny^+B#p9oTeDUJN(wQvF(E8bI=10Tvczo-#$>HH)V&E5_ z+io{E=TmGefI+&J5H$^<`|#=ksiKrj+uHgpn_TP8iIvz2*AREF)$wPiqtRr#>gN8)TMZnkN)@x>bXfV~5y{?gIHa_1 zvGfjvHO1%pg^DD1TX8z-mA+LPU>4kBmqX zhy@t-;w-eNa037k6agLf5fm!`VCN-Ne~Kvl+0k{rkOy|2NFxT`)#5NlMnl?$;~WGq z7(>JfFngs{INCu#$pTkuj6oFeph$bqLDJQM~vnL<<`p(vaY5tVpx0Hl<1;lL0SLFWUk z%po9x3OP>>=RSrZzz0`ufO&`)1Z4ceO2e9T*ncCUA%!f=NTfYGQi_pSSPQcCViX9P zkRs8DC<`a8TOdXdQ7Z_`bka&(z;Or}4b0A0iVQQeIHS!X%#6Swhz!Uif$GhQ2t^?v zggzN2L}YIX7Yut7X+-pu1hoy2i6ZA5VGtx75&|nkBvBC15sJWvK}WMiP*t#~pe#hI zU~~;NHy*$Z`f+JR<|!3V@$14RQY5yAGDWl-*};B>U)QK zXXCT%)UB?rZ(O@ly!~ds-66mjBF=pv=~WZVhlQ|_c>xak?S8G(p3mo{=YQu<|CwL^ zQ-6j4#z&|3KYVZf=B@f*jQ}R2!@u#L|5x|!-Rt+4Z`^oJ>t0&%;d?*o1KfH3R#6mh zMos4&7um_t{(N#aolckAosG5i^^L8~t*v_0aL%Q!n3ZYW`-X{%Vs4#3HT${YQe(BPsmyVWmpFe$l`urQO{(5%#-FM$z z{ovVXH2VFit4@b`JyuFhRYXKf*>q`X>6`kRdsaMLpEQ^7bbOFilfJK?O>_5)TNnQP zH$Oi4IF2vB`s3;k|L_lM(Dcp&tBtj-eqEk@{#eD4^Rjn#=CrDL%*)Ccqcxy-BI;4B z(`usHrZ*Ul%Tj!TqF%wYAmV*dU_z#tJjL zs5YC;4o9cuxa3Q<_a^s~QENJ#9zXk>h^#B7)2T0U3YOSFYwcBk) z4XyQRw*~-JHYTEZMIyp31^|6SO`2n%t?@9co3`?e<@(a5Qf@C7k*|!NuCA_{6S*0i z%Nts2Jf3-_)Jc{2(zm-YR#j90vpmD1)9KhDdWRQub2gjhCR<)!c0whKE+RxMN5^p# zC#x%?(WpMj;!1q0qy$#_i7}?#@;9$79nbZtw#(b?{r!DsTJ!ll;z~O#YK>N}Z^ts5 zm-WhFHb0Ka?d@%>l7pi|oX;Ai-FtSNr{h|!p_DNqm9_aSI<+>h&9f|P&zmA-Yv7Hq zo5%oSO|!)=>1pL=$0kXVxf+^|sgFvn^|iJ%o6TZE^WmfpS)X|APExC>di3ICGWqbI z@9*sV!T~u3cOI?*H!U)vFt=KhbKn(lh0pR{@m8VOzb( zKmHLC7y$$&0`;O;C0x@$0SF2Rvq$ml14=xE4+T&u9nuitx=buaX^-r^3#D#Kp;F2@ zAR-_K04gMGQ6U1b(^_$;p;rVzZ0#5FiqgQ$#OE!mh`<=26og6?I}xLlx0XPmfEW5= zQNtWA$r?b!c|ruGo%3p;rYl5nAX;g37EuT_l7zWINFkAtkgp+xMYcCmk#|0fR+@~6 zgw9}0F~H0b&;vqH zdyoT*-4T0FN-+xt#=KuVJ;8-=1B`$OLLqGtHW1zkgIx>+0xW>2f)OL^4-`5i3QC)Z z76BE!HJV9*R1S#%l=q%Qvtu-+B4i-|;A&B9%gh1v9=;bZWl~F=7i%q8O~wd1$0A-9 z4j4cQDu@T~85Jm{#4!@-KnzD_USRsglXpzSr~)lFSVYjG5D_>F0Ac$ABtWP|*gH6{ ze?!Dz;Rw}v!h%Rjg<;eI0Kr1WLPV*6fItPpia2K%0*-ZG>{{g9n;bkY2n;F_apD0X zKuI{%v7Kk5Kt$^G`g}fT=Blc)$q)dXRkd0zY1FmW#uy^L98=Kl`?r=M&1*VGH~U_MK0|XxG|q> zY+kwEM1T79fr@KMv)$QR+uhxbcGDX-Zj4~Gy1vw18~or0KR9^&$fsWx(|P{(3nJy= z;ZA?42esPT)-^=bRP)|{e*9=?vun2V`sS4*e;h^8<0>1DhyTmJ{J;LSKl$tR8ZyA| z{?>1=b$d%UZe>ZG0eXZP=ZvOS)@^2#f`e)Z_#!*MoL zN*y2Uot_?N^J%?aUu*TZ2Wze63#c0!r6}_Z07-9Ze{s3-)aR$CX|6m)i2|Qa=Z#bf zoO8@qp1)G2>e<<|qgrmUT%9iM|NiOU+S_^QrQ+nm@yg1|)?V9?Ioq0Dxp?c9=byiK z@7{&aj&^o--W5nEQ*16-TVkS{gU;xfv&^1;K2p)i1;9a@ZY?$L5BGj}ce3->|Iyyg zN1K}~|J&=A&85hEipbDW`IVJ==6wlZw9d0^nx&<8eO)Ufx{xFQP{fheIybhS)DPx) zHp>zm)RP8Q6Qc|^I;XQdEi-BA{yeG5%3K`GNYv!$x#ymfiXyDR0~P}N-q}xo@~?K% zVI0Q?@$qanGne~$m$jN8txt=9Yfeut{EWVp5d^2;y3 z|M0=rZoCBRdTp!s`q#fcole&-cj7p%VzaTaF}A6-w#dqd4HMx zc19i5?RH7(%p7}PTFa4&DXI;+0N^8Pbd4iOk|YAM)=ttCs#@mq)z#H;U3u?2$tDps z^HEV0rNoFhPv@iYaV;P3@9*n+636kV?P|4}m1ezOH;sX9)qL!~_3!@io9*6sJg#=_ z@$vD)$EU}~$0HHwHm7bwn`PzP7S*Wwra8zxkWHtH0| znxolS8d++^F~ZCk)2uC}Q~z*&fURUR$@lkZs~ zJpiwi0aSz$MUB{(Y0aQcA~Z_-iY&B@!b+xXYPA}@o3SzGwa0@~b$Wk#_WBze-D|zk zXms`%-g)QqgDgHholQ-P$km$%S3AA#TH|nRmx|haKA-2RsCbtqaU9ED;k`eTXW#s_ zuYTv1U-#a3j&+{r(Ep^>YPEB>^I&JVJDkmCPveIhS2wPI=imLxSH7~c^l#UrII&P< zfC9iM-~~br)z5lj;fu(~IcZcxG%%A=0&J}%9fc))c&GN>I~Pj4!gU+HX9BMj3bP81 z0q=2vh;TZzivcJ@BBI0unR@sQkubII3Lf0ngP_hn7Sb35y%UJ2od?kVIP3$6hbrtBEBRdFN#GB!WRcvXdwv^ zqA(JA@Q4Tq-g^Utj=eY*4@|}w??a`Hh=_?)kT8Nt1Bpb?o!8@?x12aQZ?OqJi2yf) zMUa?8P=ajXfti7bS;&F+49WnI^SRdAOK=4#=X}6egxWtJ3Pq4n0XrcgW#Ykc zBj6u}cp@MYK)`pS)4 zJU;osgAY%hogI(I$JO}J*^_(sKiJ$_ilAYw-Ff^Jop(4}e;bEm6h)Di8Z|>v)TZ`o z>>5#<8nt)rtx6HQ_8zr=A=I9sYOh$eD^}Isd%x#Bf4g$!x=x()e4pp@+_xn#x#w&& ziL_6!XInizx?#u(ICLd^B&gkr`n`K0{cyRs(%Ek8H%G%fgt9K5+9LfwgLC|_Y3kkm zgy}WW(hcU~KH~GC5l}>zPYg5y29Nfe($A8wVBh2)2k&0y5<&1QRx}M{Y}o_Q@|VjTd(c4h`SyrQVT77mS^; z#6gq}f1T^n)E@+124^X9@?bul{mbSxw-egNnUt56N6hR>4vj)q1lcBXf+h_ds>%aA zi)TDffNCw)P)CPy1`j~I3aJTD5z~aOvnX$erU2)F_M@{-vmbdmZAUqjy3^b(IXQAe zD?Zt=(f+NiS1Xf(YC~Im0t+fJeB*9QRPQ@};N1!LImt+LNkkPG4D0CR6!#C?eaxV4 z;~YAzIMva)`o;7yr5F;zc!okDic_>uxpu-?s$|c`GoRbMMh^k43;3$_Lo-Pl=4N(Q zP|Z^)6CKwZ7F%G82q+vE`D4H0)#^w_FO1yPuSAWOK>Q0F(oCXsXFLxbsD*$JD=mJ zBQl5->}f^hzAd+M)S7BoHJ5CtyeWU^oDyl@49K4{PlB3 zgOODc%<#guw;oPsy|FJw?;`aoO*=3Fen2d-;12DvUE0;VxiW{|AMfn!)THNn{lw(D z@qQTaJe<{^b^MOoPLL6cpnj`UtqsqmYdU$%%ta=`{^UnLIJ{B*DLByNZ!FNjV(Z5a zk0juF0U?d{(=W!RASMtLNWun97qx#IeH9hT9tsD&`${+8euqv`ZijCJJ2_AGQ zHFzfbB{*N--0QmweToe$ma7zQgoG`K#Z#S>nGjxecu@AvPLlhW!5o^;Q=)8s%Juyz z&u#zL?yl8=P_9hHU@}T?53}d8+J*~>l0!uk~v{XO_=^nNb zd=TuPxzsQwFt>trky$&~^}BgQaE8vOub1*CsyLp8t4oyTQ79?hwS{z`$knzUUE+D@pv{%C^B$O^=90;Prsr5SWiL^G^W z(_c&edqcx?TO<>So5$}%fD}`=(<;bzpOT0UPPJcDD6F2_O|kODoh_eQv;H6$*%{%<@XbI*f=X6e5G)wCG>gxZr&o8$pY7~<yzCvJ$vZ;cmmy4&q^25@; zi*anr2d5wD8XG;`oG$-ZVFQ?eKe^dcYnV1}wHHQtNKYSL+cVw30@QLUazO~MCpC4W;GcKl!b926XwY32L zu07N>_g||vRxosr>!JgQ)eso~%XMQ_Zytt^GV2@e)<;|d)?UO*FD~9~`7`5Zta@>U zmgGL!_(@fM?dV~cY^>{+YOS0&{O%v83pkcfHMh-N&o1mtB4?^_w&JCYjSb>cWM7+G zl!ud(CKZjOlwfY3-Sy_(g~q|3vZ=ETWD`vtE8}p$Y9=%{op8Kyh?;2Tx?#?d`}NM* zS?9WXDSMQ4InZZmxc%eSvE;2224{v^ytBTu6O$Mi0*-5;Nx&)mG=9nV_R@6(I=NLx z@7}sN4X!stMw;LRSJ1z(h)Yb*U`x$Y(-?I0^Yd$w&apHoDQR|(WBr@`9;^WCZdU9L z;DTz$x50}P1l+ZKke+W{b#zW>UMhb~%FZz8QBG9y_W~N&u=43Gk4ytkU}tELs=%<0 z=F9ZidD9_ghNBGqkCfGgqnHLM4Dt|t-Ibl9GWIM z1MTlK6PUAh&Xt*1j#z^$?c4RbxXdBdq`3;JRNUmQHm>dMX3kvEjB+1^XVOV{=Ms6( z*9|9-TS^oKl6eXo^^Y3zO{!kd_Hbm4azgkmAry^DLa~MpKt09K%^Oi#sJ!Omb52w{ z9lgJvcJD*)iMhM)vLXE=E5`j39hDhT)F}GF(C`}Ir`E@E>S5Y02H4+t^?KUp{3gA> z%=5dex=(pviCarKD!#kBTdR#Hi4c&uM+vHxk8c1r=gH988ZM2kE`@T}wbqW)6X}PO zotcNO2D@p{;8QgJ58bvZkW67U#vT%O9dFBSzDw2(dLve4V_K&H_BB61O^<&R9R<;l z!Tb~D-BUL|Rmr2q`bS^bb;;7tOXPfI*k3nB0n7HqWSs=K?ln&M;Rv%WBlsvFOCXq~ZyVxuGJuWvt90U>)+vtv%n2CaiWPn43w3x`gu;G$@Qu#06KpyrtP=tpJ`#o+*r7Tn|`|`Wr zNF+XE*q}Hmt9EDDnG{pOH$|wck_^4vi9Nw3W5P#QB~XDZl<9|s+++Q-Pphj1=RNq> zVInY^TZP$#myF>kd_eMo{UWHOz66h>Mdb-Vn~b^%Z>z}V;Lpu>L+J#3EMc%n%IYHoCdYt<|4o`%&>&*{azgwRutJd3C(5uKf(mR8Fq& z^{ii8fv@&y9Dz^>SxrblZtUc0XV+-F7b$+IY~xa{dTF`u(D3?l7d~mBb774CVuRgp zzw)8Mp)BcZ<=<62E+)J>YxAn!zoqZ8fEaZr(xIvc=6-fOb$9nxFNWg3-xSrHyA?Ea z#1!nGQ|}LN9%^*U?*I;U$Cdli?b2=4eJG1E5E)szyFYO86t8MOUbkyCf!v06T$w(c zOJ6Ih+BKUn{5Bk^U@u|ln8fzkJC9DA;lA}r1(+kgKf979$1!7if3$yZc})v*J31Rz zdg_%URLYy(D-p&>-5sto&@#p*>8`7>%A_Nq(7~;N&$hzU%tB&LMRDrZ(ap!lmSSq> zEIgY1yZLr)g-;B3+*-Yujwbiw-u}|!6^{uu3IJFwDJe+qE}|P3hxeUZ=2M0M019VS zZu{*Z(7w69+BnbyrdS{H5gbEv$7}~`)(ewFl-w0Ts(_iwq7`*^g~3SQ4BU;z?-B=M z)$!X3I`|1@P(LfI%!z$3{oYxHQBwvM{|Kbwim1$XbER!$ep98jNdIGO%a-KfiOf zJv5?F==!7;-joANOb^POwy(Eyo{SngvMAP0uIqCjZUjawxHUC$H! zUI$(eQ@Q(C0{j84YEjY78w<)t2RjlQQqq>!$7;#0CVKWn+?RnJ+FQ;EgN9w%N~!m8*V3Po)8?1=B61 zwVh_XL;e6}V1vZ5qTyLyed0fDc!s!Il=j@a?H4aTETpUbJ84=v#58KGs|e6ID*le5 zAjSmlthDAy4WFzg$5OE8xCqijguX&5#-E?!Ur04S{B<<$F(6QHSRNc*YVtTKemc0v z@WxwO)K@HsO>Gi4VtBVCNkT0?D!z4bf6w&p{nBOcn}({pnQbr5;N$GlBCU+V~mK0)iI@k>j5SOpn> zeh)y49!dQ{WI&+Mv~vYXOISTskWnCTOQf7Ow?C1CuF5=qTodf`6rvUK zeLW@PjB-}+U+^V?2R?*{aaSq9tx^X1yHm2zY}_xp5&>t2dcA!y_mQFNdkXjR)PO(f zJ3|K$8-!4hi1`QWM>EZQKI=YQda&ddt*9KXsE`!w(BTRwU+pt%Yb$ofHMnhp#qnlF z%J)G7oXm{Sgj5_OuG3%aWo?CH+vtxm1Dxf_6EOsv(6$R$Q8@y_i#sR^i|RJ|qS?eO zGrXG;1s<=pu(o!H5iQ-qr+x&Ilcgv0fpDl!vkEI1IE1D$6PToF0wG))_0a9oVL`gg_}%Iawf`P7Z(BMiGdOk6q9P~aG&n(#^nq{|+RtMbUq(eazo zqXbxCoDzjIf0lZQe2K-go_aqNT8mJdbBI!G&q^LkqCnymHrYWepFGSKNqI^G-b*R3 zkbE)JCXqhcJg#5P^O|Sobt0d_&cFQWh543%hx1^DQZ3?=tJ>3;X1;t~O2hn$JK>VY;Q4Ab=r`7FbALXW^!_&(WT0`S>Wg% zv!x#9yaD9w@bOs-y^vnM&6CcJHQ2R?Ug5OXsZSS0 zcbl6zOP~Gz@J`Qf(Q9YtPhe!<%FhoJrJIF_2^W`_=&aYdz$BFusRC&Gc&2G!SrdV-o0q>HfhAKAYaYz+5&6M#ENgIe$zqA%2vc zDiAL^XWg{5wA9>BaRPA~$4ak-J+Fx}4c4Tj0&T504fJMAzMDa6tOh!rn$Gt?G8IPt z-`R*_JUvq4vnm;o;eP-BBagSYx72ah3hxMSoQ04ZoN6RMPGO$fP}~#2PKK3T&&_y) zfF~!Goh~Ea7LTd^<QAx@*~Pl}?WX>HU$Cmoi8>fMe1tO|9J7wGzAOG|$HI@LDgwZ1o&@L4<*Q@cQ4Di~GZUi{jsFoCAOd zeWQ!gG@$xwKFq%3Vgrzy-2U+5Bq?$#p~SU>U5=)t)~?P+G`rOg(3}j;__Ng4g#-+O!r-G zdB{S97+6J%?3%V}Uvoh?ogY~iUhHyya z9UiSdb_Gwr1d+*i4$FdAI-8>YatUC8*tLhhD0T`H5HdodV*{H`Lm^naMacSmrCTRbn_R|)1I6T|zlCVur!2#LDOOrX-b|xDv1GOf@Du#VciIm61Jq4J|j3t{Qjv^sg z2}ie&83~_aX9RyGJ>|8=mjOL>!!LWz%MX@)356Gg#Pp!?TZ<_1wqNZRl2Sot! zw4vIdKhM{V!L9;b>mzqj{1|?L+{S}M-Jh~|NrMJRUh-dIlvP>?M79>IrEwiUx_OK> z^e0G51P}BQtvgK>3dd#7&7mw#RwN~)ToxgirFN|({G)Yb(&qhZ1&2}#e2ksPU|mm! zb4;Hu>w8|2{2V+@0}4jLES+9K3=GNT!CwSd(`lS6-i<(^&~987*jIR551;Mo7+qOC z31^Q8o;9A6#O2Y^3V^%I-Q5?;8aeM&N14PU1{{T3o9+rGTvOGOSX?oYklzG^a(lsS z)-Qq7!-pPzuHHc5(GR}+s`05kmY&Mgo&snBOUWROrocUut@P`)?#mdgg=Ol+ljj*_Pdv*F}+gVQqC_ z>)qL2-c3MzZLRykcqahklLVB%H{HVJ2=>@1Lq*@>dR9}f)v~1+W8rfv%$A3>6<_u! zN0nxI-76_@*|9=Z!`-jf;Of!bO8=puA>}>*Z0Cj#Py=M^=NwaycIW|qnWzb9MX?bfC8pX9vxJ)P5en|{t9b#r+i0cgi9bF}O!o`7gs(f$l0 zhU>)Y<_ut?X$RyRo|u{GX+Iukl%FW(j#66e&s@f@qq)oBH-Lh;5!JArE#B<9z5v)O zj0W;>f_1|{pJ-%tgH^HUj4INB2a$ZgL!nYKoNW0Dakq4p{7!1mNJdSe|a!x`ZYf=qjG9M}$@Q(+f4W4VN@EJkLVW418 z1EP+Piz_!pizM{n?PCxjPOud@`0a`<{st}hF&sxT!E5)c{HwL>Xm%D2G8rxrIXNWs zXSG1@*AZ;T38-;<{10;87Hiw?ob7^`^ux(wYAfn~aLE;Qc5GoYNqkf4 z_c=f{u}6jc1SbjD&IDCDsa zb4rFVHAPsmEe?q2{xuT_uM>6~Le3UajR!gy9k7Jhf}#r;`_{Q)wf28o2&sP1m5>M& zqatoWVKM4(M-VSm~tzD;x*+aM9&u*NeU23A`&W1F{tnAy$nG(VS@;R}!!@ z+19$#%|kPa?a)rf*g{S{Zdxh-PnTCorfpZnDf~ZT#-K@a55^ z&xWi)D*j!xO_E?$R7N8r+3*3<(yYO(O{Nv+ypj}gCej%Zt%q-QW?$#RZRLk={HjzY>bXECuKNIw-A zHBYCCLkcQJ3FiDr+htU~IkQ^E{72??T8c@l{6itv4S%`=5}7FK3M1_F8G5EO%CYA8 za!>zlHl@6T`;MxrWUjSc=HI&l_Tv)To(OSdV6pZ%r#}jFET#ZEO<8{dBmXd_30A~> zOwdW%s$&mV{s5(M;Kcht`%<~IwmArxTwHPJ{+|@EYq%Z#vEjV|NJQC~Tz;>~;8mIU zl!GS;p^@^q68o0CY8N*sCX-hqx(Si2^~9wRzHi(iXbWxMmG;kG$XPu-J$*P?;T__y zcx6Exqtknj!Q5`&-#*#MN7N*)iQusol-X+2b{ZynK;w=q0mEw6(q)(gGU!^|Kb}5E z!pkFDz0O6UC?iG-@zPE^9`YL99L*T3AZeKKrwK5^r=*;non1R>maMPYFlgJH)R)^3 zuYHeHD-H6ZI@qP9U=kAHGx;z=GInyp zw6`pv$M*y7q!Da*VNV$ge?o%?zv2iD6B_Qw*ZTe7#%t0o`=a@CxM`F72}Onp=Su?2 z&9@M8Gc6A28;NQ_-kg5*XR4D>(sn)L?GuWqH}+rL2n!WvhsUl$!a>S*U>G~2S+X3| zMH0g2%kntT{L8Dyt60Aj@Vk!jyY76xtjj>jDGlYw1qA$vi=Xd?`aSzsFt+EWK6`4I zaQ2}H1p71* zF3Vg;5>EBTP!OJC&cFfVGG~G8LX*JNP?c!Hw|I<7vnD@Emt7?zHj}1VJ`%L7q6wn9^mjf-!@Ls~+Hc`kxKRSgF7yYnCPOa2~dxnRtOj50MC|qe@ zM2R6YyQ}$F+gK?=FQjhiR^&B}2Wu=D@m#PWTzW z!3+ZB+xS-J^s#l}qVl9RSj1@CkO&mRsEx+2sj!Lt+m_z1q^q@MU#@zmYD5{{r=fBF zJ-d<=@BIW&0{@!oT;bN(d9k-YIB`5TXJAvl!Lk(C29Tlai5jaO4ht`8*~BmIm+{Xq z^=YO-&3v|7)}ii4e;0COVdT$-^Bq!m?%)OSQva86tapeYMs5~$7yCv(H1RcgK1%$H z>C3x4>o}iLr{6Pfb}MeK&ae|$Y;{>ZURMR$COpzGKNlC5bd6C~8~@R8ok!(?wJ21J z*Oeh^Q9biDs4TzV$@v7KTP~lVGI?4il5_1=XNq~)U88)noHEluAE=*B$g2N*u*Ks3 z@0`un;W(fXDDvdH+4JT(q#|^yKF7If|7-v7(PrzPazH>f+HsXu z6lb{M26h4{wm-XFxt34OH;WjXm;~+F0mwjzq@}f8w~=0h^P=xwN!vBsLDw39ud=xr z&6RAusr!TuB0`ss+moEC?P8BI ziTOZV9{y_Q?Ap+%3W;=Z={6_6;3+ zO3}^zz6W;!Qg3>R)!u8?s7sEXFk6kH(FoCplh)5ojp9JLViI)J?3o%X{Ko`!l@|bU1|Rt zypgg%>b9Z~EsK?ZS5fhq4Q28u{@i!0DG91+y4#wv1QV5{Lt9Fv`j$prE(U-$Hgr@ z7}0tIfvC={a{^Kn&NWi$I{#2b(&Gq?AYjaTeTWM@1jW!qun~^<}j-dZnd702p zN*gNN?lbH->P!*xoQp;-)zQ|H+Sy3yrv1Jf_yTtcmzl; zQ~sNN@Y*sLm~g}K@_pTOO}`cH%a`WXu@xW*cChAVlG0Teg+dt40TC%7SJ5*%GYqIq zh^fH6b7YwbDwqCP2qe+~C4ZMjsAEgQ1UY7Uw;e`JfjFst$BY$CxTat>Tgxs3#eU8M zSHe`o*<*79FZYd`P9lQDIT_&`+P$3C3VZV1Af{lP2}A!=^IRq-)}vK9B8S6HUf&7a z+5*QaKH6TXDWaTtD0ri{bQSszY=y*`I<5d&N7 zAsm1GWBIId)%)Hu8=%*>r>~FyAz%s43VX%HDMXY?)H09Y@-Qko&SH+kINUnKY(yPV zFU`k8fI^OHS`t(5TMtmc{Aakx92@2M*~9B%^W;ANs+hu^=yg$t^BY zP`-quXV2#K`LyZ%TF10O;C~1NXIf>z?db_H>AS1HE6sad<>F}4_E-Ku!snah<&Q~w zp(a ziPT&1p@MJMt2J~sXUZzIFDz|cE|CT6$F4|V5$9U3#zo8GDQe>wFzTA-n7X0m*R}=K z6jf(;&gK_(&Y~BxwpXW)r=*YSGT*W0WlJy>V5X-r82ac}DjCG@^?q6Da1I?$Gj5$3 zjzl|bZT|2U=JNJ-@cTWUV_AE#8qM>u2pF}JT*bb4V+boq&ioHJG1;7OjjA$RiAvHx zKe;kaJ6ZXpUqTJ4+X?k*jeVkP7)!HP!_{Y)mz#@`7FC<96H#VM8aM=iAEl3gEy(saxy_VnUToK80KW!8K=G3uC?%* zo_IS}_iuKq=j=P!UD0rQF+*u1ka66KfETy7K%tfWHUj(&UiK56Q%#fAIJ3OH64Wc7 zM0SzM4%spL-XyH}d0B}uSK-_Tf12HIcYlgr{HHM?xF{4uSX@yJ@Aqh4K>65!VmZVM zrp*Gyrr&@3?zc}nR+H}9ip*Z|*OP|+SCp(t1D6An>+9>rr4y%J>S=I8eQ%+ZZ*GZ=^T3qz zJvEE&OzW`WA6fD~H6+0Jdy|rsUHeu>mlg>M{SauHMTwG65tdsIFuROqc>RTdOJtiC(2zDYJ0Rz3e5xX4LIPb^WX-_`ffa%{O@nsG zjGz7yMX1Aqd<8iq&0`tf2tq#yvHo1J@sqaqGQY(bm2sZP|9Y1NofqBcN|LQgKBm_M zbuItA6u6}FSA*iAF2K|qwCtKuZSY`RlNeq44kaeON8ib0#oFG}t7E(;^ z^B5WQb5&&4jj~W00(tBouaI2@^d*|2E_Xa1Upj}SU0?X{ce)Fh5V^c*5W|@I8pn~`8VDi~D&E_BUakHD`)9)K!mY_x za~{T$^wK&bnJgvhN#$9K0$2RYEl7Xi-Nt7bq%Ga;Bm@!r8Hxzmy#CZ(sM^U)BQirKV7PB9LE_BD35R<`!XZCzaANkr7% zOJ5i|zzevB1dKRU%c`-8dmEw(`v23UGuNjg7u#4Rer6>T)7TEA!Yo*m%u6SIS5^UH z$kjBex<1nO)2?zl+$|F9*%b#Q0@5mbRgMlw-$QSEb*pKn5*PmgZJ^3FS1WgSQ7Mtj zDbu^Dc&+Bo4?zw}v3{y=+dbD+?hH%k966jn9}loqwFm6vcq^a`?|?+FzhbL1?qDpB z%ZF<;kEAycq#H+X1Fyt4B!>e@Ns8Q+GduIX-~?-o{)=H1@z)AG!raAn*X%jpFX(Y) zqJA^>kt=oy;<$82zg(B6CSqwhV|lMqa&feFIVY~FK@+RcCbi`uhMHGtU2F$U^xwUt zv$^^s(Bs<1HmP3vzT!NQWjshbLbqeF;zN7eY3xSt+R+;nkl6Vbd;LQ=lc88l(tZZ8 zWAv8ZOs?hFtQk?-Blh$8MxsjRs38hDVggl3cu#`tWfiMK;K$u{C0>=XHCsiE6AovE zy%u!_uIE=ko5MY#mr7E#e%k0S*!88dczMny!ni%aZhi3l_}Hy>v1a-oe$8iobnORF zKRJM36?~ziGgpc#)yTUqM=>-m?70MOh$~uY#}Jn`z-Ay7v4d#@IB~%y$&ERr^q>&(=ePmwp$bpre)XH+ z8VLFs?2f~J-RGr0f>(I5_tRoCkljA>K_@A3&G?6PpWDSTVI3%+tdwabU-Pp z3K-{xB4P5f^UB0<>Wlwl_;e{@tLO}U6cGC4c)D_)$SUCLV(2w@xl z&YbAzU!4l2enX-nd0c8G_T3jD_4UW8@HR?9wS0(qSp2)yudqn?c>zo(Mbh*HlpqH^ ztN_WcP)a3^2|fE!$)q$)wEyT48$JXt?oBw1b@_d8ENoXv#AAvVEP5cx#0ZKoN&oo> z3<5D!{$C6*UL_D%Ymyk9hDjCzWxhx4y?GR zp>o5+f2Ty5LS-KFG}W!V$Kr>C$-aMejNyb9Ye&WrltGT0q$Q<9MVt9Y>!#HfNq`-Q zBbbW8^Ws<^kSrdzQ&{__X+u8D4P0BxL5tk~;0yLZE*30ZB9wh6Agm~qB27s(J+mu+ z3l_=YxYF52ZsY)a2C;<^lo6mVf+Ub=1z2w5#ss@@^XJ>mEffNgsXBDxIN{(7V$vCu z(N1m9b%5i$%ynnJWukjqVCm-O=I0|qMf|T&0?~vqf=JNEvf5<|dmv|G&nCixIy(NC z{^z#icJXV3a=ZM_JX3DJ>~GRaWR!MY7DHR0hDkjihoaU&LqwmSpM(Ux?oF4#k0u&V z8QPHF`;K?FoN1;3N1h;d#4gb9BfT$Oy1(tZ*gu`Y5B*YnIlhR$BR<;hwGp@H0rWcQ2R!E&~?a4~65i`a6p)z7}W4do0SDoY>Wqf(o?PAIFT;#z-UH>he(;3oIHd z$lDvmmB@>#)QUv4xkEI|rP|R8CH8jbGs5x=VnC9pHrA z-yg6PbVakLS-F!&{?Och3P+wp5}W)+-q1XWV~!dSIMJkraua2tZT=0%oF2M3OVSeK zy-1sPaBy%b0=~@jd*!}TJL}%Ux)SDtyE#}Yb{`DYzb7%6dvkrVbUzQ2-6JXMhA{F{ z1}XiHASeC{82N~R5Ow^?M+xbiw>mKjh3cj5US6z@IqnjbaJYpV`l?q2DN6NU2z)|B z%60$9gP$grB*+C=b_}=pF)e1wQ9N%@xzww79lncia{_>a+BhGrz=}J6IoANk{ z(K?bm(EApPNsP)7tqn-c7_d(a0QQZU>};*wlx2sya*_dPf!)7hf)N*qnDR zNNcJT-x(Cb7NoB|0$Xoxr&6xA*2G?Gm;}kdm&`oivxe*oC~H>n$Qqo>+y#UYk70&&Aa!*Q>vOEnml;Nzl-k>ZJT# zrX|IHYhBqa1ks`V<|~}{MHkT@XyC`=PG`VeQliZ>laQttgS68z+Hh_pLD^bZ6n0E) zUEGvYPPbh+z5wiHw?1rY?==jnX5N>_!40q00ngIz+$a7FT9lCA<>AUgc~9QWzJt+t z+S1Km&EbBwFrDS@9*U_%78Q?V+y06Ek!&<)0W@@kc|!}#R4UK%BbWIxcP!ZxEWEMK zAVwlN)}NI0Bw#Qd8fP7h+Z~~=WcC40!2Am9Q|{X?RuC_jpc?=$B^GdCV`sEYnLvkO z%0b^vhJ`UVN;~<8V7N~-C}z-~RHi7#CmL!3ZBvv--<;4~e=@e?DL~h@Mb{H@w`Uv< z@doPyVJ)J0;qL@5K<3YZ0qTns^BFk^?rxVFHMLSCr8P_e_m?Qmb5d-U5S1?k;<8}a zm*9FIk{ANBegnm_E^LKYJ=1nbNkRb&nF!Gc_`40a=inN*X$S0c`q;~n)x4PSl$*TdsieEe6v0)Nd6d0&nVn0+J}DjNc;&06zlM( z5*3Rn-TWp6B(gCEPWb7l8JFNoGT|3Xvm?w7<5HPS@EPH^FDbg}RJibrZvkX$O8y5z zpMH9WFD5?gd)jVj?%L|oez(T(Fnc$jhjLvDe>+atsRFQEDiop2?ObkS`Z_vgQ=Skg zA#fx0!{(*USUwfe5i#lNP?&ibIy!2v+wrYLmmLD}4*0k5Z={sqaGBy~U~6(@a->d5 z>THE+TVOO0Zyy?($*r9hj#v|kbAP3Ao>zBcgs9~0((gl$_JrBZCh1~`{NTy1>}+f1 z?|b;(msC#~+4tsHmlyJHZh8Gdif?F%&ouC=Ah}+@c5)&E#M^%XhoavZdS}G?&ej72 zW~-mx+W;)cE`c`g&CV*znO9rr$pZGb_p*zv?+>8igNtt1ycTt*RClF=CZbuxa@2wDm<# znrVy2RwBOxy162gQhp_DqxLG$|I*Q=hKmL)-jc^Memyw9>EZL$3^|&uGCy>t+bwc% z;UIn4Not6Ph?Kq>Cv0_Zy(Ya%u0vDf_@6;fKFR7-EItKO@Oc`Bze& zW2Eci-zDq5QuWmHj}I!gKn$w!Z)DeE_-bESkXGcL_Gz%(E!%eRJ#^Mj0#!AT4Fo>L zu5P{#hVubsqLCSN^3fZVc5!}=p!mGD-Qb)VbCP3oQd=tk3V669WeJ^gw3gJIe5G)c7WR`y5Ukul$DT>fJq*xIh4U-BnSb?MllLK1ztef z4k8o%!xgCtst*5TQ6)<){`7T*nYlgGUJeTOp3!QNPv@Q*)qx)C9m+HhBZgJ-5~!)A zD(|w~&oL*ToXPWj*ppH`Iae`87rQ5W2RDHd^znPYl+!jACXYPC7Ar{*2)_U_U>@7o zngS=c5Uy=e8Iyy8hVg1!QnC1PMnFGxo-&R8sHd_32#_JY;r*}0JQ4~s_>mEIBz1kJz=c~o0#>Sid{q^(VK~ukyKMR8l;gOyCig%r6`Ne8` z!8NU5nuP9IV(<8PK}Lt^*j?%&oG`3E!{gqKokjgku0RTeCOJ%c5z{WXywPuPIIyT zxrGrotYgWMTgvzNFD7(>lwQJi$DTh!zrVriWIa#(O!f1D-fl-r_e`qS@r=zC*M)qH z>)1g0c%ii=8{Yltag?({bO8uvt`0sUwzibPlYRYFzeq|yM?l_{WF8N9!gq4>cF3yv zEwuZcPNYaL`w72+nb-42V9DC;wjg3Iy2staPMmPtIOx}8AqXg|SV30{o5IT4x{AY( z%Ud(kTl?`(D+2r<~6Q-u(nUgVw~p`>oF_pq~9rG-#1Ahs|2S){J^ol<_~; zCkY?PH)+U&GbC`Gw%IEPQ~nky z$BAx+;>xN7kyKezF+T+{f?qLydoT0e;w^}PAWCMtxE_io6A<`R^c7dZ%NcwDlRh?t_BN-xP4t^`?twjC2G}^W81*OC+Cc2Mi0ng^>dVff@9tZ(_ENCiu(C z`9$u(&D98WuGq|oKhaP$fD1TCo56z!eerbLFFUivkFJq|HYVOrg8yDN6iHS!I*9*M} zbhBz&Ue(SuFYUetNt)czmy5tpB0<`~#_}T2ptYAsJUWz}6=%YK2B+~nzFsSTk(L*9 zbOnGV+WilD$4|{Virg6U3PAdGY*{xA)Cn z!~t!4wcmCRki)pYez+a&TRW_TC^$Ox&F} z#?LFhJDXIl&z`LlZ@sx4Dmc1h+n@iUy~&6WTg(5sk8>Et0fW-4(HfC z0cm-32~LbLw24-v_FA9!#LgT`m`c^xw6z6<)t2T3%@-*WrE&jCar8Af-hw)5l9X|3 zs)_k}u}lv0OzGh3+!=_?&;4iq9f3HI47k}UMt9xBlai2Fr_j^UO{D-r#h#;(3_4v3 zaHk+hCY-B)ZcwN0kx(t(6VV8}Xp+(bjQ1BA5sas&yT#kBtg3pL%{U*_ zFkWoE($BiK$h&gw7!$Xf^7&0H0jzGCR#n^%^8`q>9&MKf9SgAK-W^^Vy&)}k5O~sj z)ndszIBGOF(wxZCuSv)T4i3G8Dvh1BdJu0Oaq3ws)zh)fv^Y}d)a{7_oM zk=*Eh{XWUDF!lXb3Lg=jf1V!>>T?i?U1gI5H2vl_`7$GBmltw#1SS#WmnYoCG`jsKn z1cYk?B8!<(vCkHry<4&*=L1ha2~(3W(6v2NEF8BEBn98y+voD^lU8=hUq(D^JoKFq zdN23kV1Zla`kL&bNPni(TiO{m%0~?|p}X3Gsx9 zS#gO#U||bve*9B~-va?b6!1goC+M0V2soGV;%+!g!F%kiUcX2u!uJaZ0hIU_Et?<` zxg+)Kqhu+HvxmH#RWhoekp-6oUvF9hL#qqrJ}Wq4sQeg52cpfAweL*K=u*fY-5B-? zgW;1nB$GZ<<)(l>>VbY`2kWJ^<|HF#Ampt1udG?f)n&*3%nS{E6~_Al#x@`(s9^o> zevOY6(WK6UKw;M+3V4cxpNC#nD-@!*zaS(&TsYMe$th*{Z_7cGkCR9>1T0GQBM@&| zhWs3ssE6q?q6OgoxKN!(3M!5DcGlLG4x|tj?6M;4r0~88Y#JM;E!IL>tG1VqONw&B z=;dCSQj2})3oFq;_6cc53PQ)r>B%3z(^Zwp;Q^D&vqNUtA>6``A>z3gr&EIOuQbU?0!3AOI1m;P*iz;MWwB zf;jU-;K@>MNjPka96$i?Y%pK`GOrCjmyi3x*Xt&h-B84$p2$t#d`SVJt|kC=srAc` z2mi=r1ASC`?rg|daz-ft0&zyrW6M?yNr{R-kbt6}mGKCRVUANdKz}j^XiGojXkg4S zM|XDzy~@ofl*43IO^v3lvXLg!>D%kKt3GGHe*GHGSt5I}+IoCCFohH$|Dz@W`lT`t zB)W>4YU!HQ)&|sv@uG7YyrgXaa{nL^FEr18`%y|#ozJwTX||d*F;{( zO^)jzm(hgO+3CQ*z}@<~&x)^A4Po0Pv*{9v&aR4AaM1pC1qRraVIR2;PeON ze;PR2GMgCx`~6_zzPYCJMd)YEz<;g$fp;eu{B+ZR+kF*f{-A&T@s_ve`%4u9g7pqO zz1v`jQ0-rmVf9p@bh%0oZ-$R-$oUd`DIah_9ldV*SCEz}8;^sX?4^a<_#@XX)`(6g zpX@}JQf}SS`Lii`9}9~DiPu1UQ&9~1UK8NFln@o=Wim0~0?_YJ+rCmC1Y=(5 zJTkjqWcG0wspFlCN1Y?|Yb&{5sD~@ZU%YNP4jMQBuu$nV1H9 z2A_o|+Yw4HG7?*vmWC7tJvz&v`+;4LXo#d+pqM5w3taQl^vjhQwg7l94*P&Pz?{oT zWMB6c5W*s_9xff68if?f_?~Uem2Uxc%9|?hz=rT|{gxr=x#*G&VNDqmz1iFOb;;Rf zg4u_#1O1M+jElq90q|?biTLiTaAp2yWH0|-zUAX}**iE$Xl{lDNNO_W1-rR6uNp~S zA1?%b9upn|?8Nsq09(L=oGA8V0LDnvSfcY^SK@6SusCv6)8*cJra*fU<2ytBCAxm4 z!`72iSTk~>o;H(rR5pk1iK4k%T9)CKwqE!foycdL$Be=qCAbKbptY{a z{Wxavw|mQ!v*rRr<2%umTzn)N0BJ#AVB$n>*K+2|9fmd_ILJHqBI2-8|s4ldV2XA4_ZjiRY z?yhwg2UVikWcoAHz2S=Dc!hc;Xana0MOORWqn8;M)odc{@Zt-mM}V;pq)qn_6JLmFfUz{Th&un{$%lD`6rOKdSyTFw=9_@D z@$#}3GLW5Ij#x97kO2fCI=jo2N<`RPR|l637N0301A4#pk6UDR#*qg$lx<0izr#>Y zOc_gL2J|c1GOmgOB19zX z?}Aa~?qh)v|3=Chae`lW2RoJNzV0Iwk^2TM&R7y-Y@#54&7RTwdlmwb)zQOwl{|bi zkVy2=FPr4W>nLeT1B&y>ICNn&A_+W^3ew`xV(rtl`tY?3%^M;3=o>4&93fWHP*Nzz zQVbTm1cLocmWmu3+@>2@#MJ6+MZ85p;jz>97XQsGZNGRrKB2kOXPe_Nz*xY|LX3^( zlV#V0G3ZX$Mv+4CrpMJuK!K20t*|OQZ(0R4Nb(wkvNG~y#pimtj+bcaR1HBGGcL~Y zDCuVad4D!EZr8qaT04PDBYTKAV}=x!;~KO<=Cc3tu8-^G(G-#_=iN@`dpdJ`(8sSE zfE2H9rIvOrYFY0AN^MDRQ{Z$Z^{8o>8tj;M!3=i?+1p4W579DZ_i*rrS54~&W zPF$mPl!Sb6JX>_wW3N0+u6v>Rj(9%VS@G3yQTUs#u>(UmRyZ%E~5C&{0^WYpSznpZV#&Z z$F7;XeciW5gdcrB>GMYA^-LDY3(*Nt*_z$96&A(K?}UpL2! z3bL0+Kg$7k_0P^9SyeTrL0iY$PcfP<#u;h@1p8gy%U3#>Oz9c9nq5DM9Mb1m3?JMz zP?<$EzvAcJCZAO+iz)a9bPumvTw@-%FVQ86;; z3$K3jV~`l{hjw>njwScRv9};FJP{E~JI3oMQNKVUWV!Wc+3AhW-`mP|{0&l=FeUp> zET^owv$mw6u#{IkNej)}=L)T;HU=eQxU$!M{rHw`u|{34o*C?Lq<4iz7>;^gN)()mg?tej|I=d}Z8l zxXmD~HQ*vWJzX=;Xe6pm)T_(Q(NqE}rr%|fpGn@grTOgFYtL~T9kaB}t_t0%@N6%C z$BP+~uI|6Jx#w@cbO(tUJ4ucGIhyOS`#G|5zO?GQe^%3Zy~w;0_&{TSY=FiNC;n-s z)*63z;FSV%kcIp9`Wo)sM8a5{>ngf#FNl;hS-CE1qP93JLJ`{(exE=f497%dVYIg74pp%dE4|7tn5E7!K+#>{ z4NOQV{U-Ol(*-9^yc>VlJ<*hife<8?vdudU=OVS2V+eg>nw;zZdU!&6#!Yh+fcrN94x&9*$;iaN`XG?8PFf1#5hg#vCeJ4q?OW9zUIlvFA$+k7Ss z)w_OAT3cCx?;VWFVnl0GZL)V&ho+JaP4##c723HL`UP-}w0C;K=~-d$J>xBOid)^Y zK3cs;jDxgEB$K>68AOlUF4uuW^uvceEailU>P#TF>@384%sbp#JCurmne@A5%U@g0e%P-;pVfmOrt&6zSMCYrZn-%021`MfQush@la~1y~~;5bE!^4>R5nE|ADl<**gQOU zfn+8!F?lds@ivik(Cl`6oNqj{#9$#i=p<|P z=fhNI&6S()z*GE$r(CoQl}{~SD_;8C|1gVIceZg1SehXJ3!LCue!7phhC)5rMTGgW zPyIWGTL7hU`85*m!ir5U!Z5bC-LnH&!Ix3)<2eTOK#tvcd49$_x>yd9ef!)le~z`^ zQfBmG;J`+J8B7=YVLte8YE_h(MDyad=hd?a@D-oqQj-lh$;Fh#(o4X^*f{X`&oI!z znve?ex3Sq=zFvKtIGEMGgqg^bSrYg2^?jn6Gj(}(JGl{lt+U*ru-dLlA#t|1pVfTW z8C19Qv|!I%dv>-l)3o$qZ=VGLR!(<*Xel zO%gWR^WJI0|Jr+-Q?+~x0G!+<$LcD{+|G_SeO!OilSUQ!7aL0MjKk{rj=$Q}2D)6V z^^y z{LIL&>xFRAPFmt*EQj=wCg)jUe_V11+Db!KP(wB_PBvBAn>JwO9MtVc?}j zoI2t1aPjMmn?T;yr;98amG%sf59PjxK5_^<5lTJ=U%iL*arE>oNjBX15o1zo@KbpKg}zsGATjnw?=uM;nv$J0DLXhbBP z1Dh2-M|{$gu!ibk6)!XCwh)_(xm)|()CGsfSmqs50ssE~bvN2CqE2FX^K8dBX_Lw- z$Rf$if9)oEv!~H<-qIw;j273grNN^_%4gv~+J7UUXbt=UuVO^8u74`X&U*Z^_jAI< zom{159GB~+vd`gdqU}M*o)vSzYU@c>6q(=7XdB_Io$Kyhl-upvR&ULmBngv=Tcz`B z!TQ-!{;HQj+^a775-Xh^{bUvR;JtHi_3;J;UMp4on`&hdzk?s0;$?<3eBu~2TI zh@Iq?rr;#nPpn&o1wXx;h<(Y!BD+m7X&MJg)1kELYmz{{ zRRT?{^7M-9o(X#Mthg*UA;%Ty*S4bRp|I_<+CmEqvVkYMA+c@AG&o z1>Yw@$J*IS_m6l)4p?e*fgTrGbg0Qhs&W=c2IpSb?MF{i(68hWQ;brR=MxIrCs0hb zB3ofTg;fS~wk#}7Z0Gg4tTPtx@;*WxO0CIvdi~j%k=3`syzrxr-4eThtUaZCTJ87b zPjr^v&V*?0(|m5T6d_JRyAnHj5JmU}Q@pOOxL(7nMUOJ>r0ETCEvh4m3`_l6lQ>3K zORtB4k^}6Zvid90v&QYpU`P!*`PoF3*o~f+6k7AV)+dzg z*wR?#{?Ni8R{vr57>EG!BEcA&p!xcjTwgv4*dLJ_U{|bPCJ3w%{q;vx!(Sk z!BP|WUt>*RXWB@BObFQ@_wDKh!9JywkAJcsF9BofjhSy#?T1~Dc?QwX@G%Tczp^F! z){w%s`0LD4z98d_h3v>o9l|wah3(bt-I-CA^U`^GJtjBc;K;`Xtc^k2!}u0#4F)*O zI(&~tj;979b#^OCNqrIz)SFP%dO>}h9cEC} zdvF>6e8|RUyq2G2trbbx<@u>A7HAnoceA_uJ@Ov%s$EUAsq=^IOLn zf_h7j)w6acd!xuMn4O!PcSr9=b}WNE+;-=x+!>fiE;Rc4D=@i>FU1O9i|q0bRCYHu z2OUm^+oqp~++jktFn3cScShZJyB8tXRly-1$2d8+%SE?KHLR9{@2#h#d0c{b_a~!0 z?Cr}bfpb$C=R1#dh9~U!nzgc_&sZ>c^14ZA8%VNOzqk{pIOrprX2fgPjVW}StORM6 zFvhrs!^uduJg1bmE?UI+IsWThn_60&D`Q+ORd$JANO*OwA86>|aM`NYXsnI?Ijo-U z^!ZHugoHLv&bSJ!_m5O@V4D0>h%Ss3H!UMTGc5{SN3tatyrsgJJgTQQ<`rZz@37TL z)`+05r;wwvNUW-`n!DmLLerGEG6fzn-wzzxnnDeeo1CAupitBoHx^MAu&|FwR_ht+ zvB7sY(eX6=IY4V~Wi4g7qEnCQZGoz>XA@($WR`|9gF_${VW{)|pW{s(=DPpmjG~sK zor;>^5Pfls=@&Vt-@gu0wm-Yy-u1)8JzW$G(yuKV>P0&}>sFbj78_QdkI9ALbI8U+ zZ^9?#YH~f(N1`uO6)|&FMitEeemjX(ns$nuIa7axR;B3f55@gxLi7F}1L4w&UpxfO zz%nJut)L*KQpifcfJPj-N1UK}n}?uHMl&smtb=_RV^yEjM7kE(uOmojv|SEE$;tG58Ho2kHw{G`{oc7d0P&$(pEu*IfrY;3Gdi=O~} z{Q1HDvuDqwe+JK^RT$$2Hw@wg2V7zW2VO|PT%ME}0*Gek?InE2u?#6j-OK z44$A%+p|Sb4}l2AqHtc|DY3}nt8Ht6$~eRhYjfTXbLHhD@(tDk5rE=~*gS&e3x|?y z|IAD#i1frJNigh57F|29UI~k3CfSfXl?6Mt#x9Wrr&h*iqg!_o6O|uzSoEMj9fxDM zJ%mKsXk){%d{&D%r9Qh!;c^zlo8x}kOsu46C@c{{w2=wRvKv6ogOOrYj~S9th47=6 zBZgT*?dTqs5}-L)ujyD4Ay@SKfgmS@T}Lkw3LP%bkdWIN<@HF;#VYaa6Ko+>LaifA^gh#02TTNLR`t;)QGsxW9=UYGwYsH^N>gO6*Yh&>fyHzrh4yNymx(A4 zGo^BBx(OTx_5J?wYU2Y46pa&#!&q}RXWDj!ZoOImJN^pF<5180VEO}6+2uuTPyJh< zo@BDp-96vrtcGP+ySSs8qiUn?5`SK*6Io8hb<^5dI!0n*58Jo%6_ezh(ikM_8$$V}CuH#v(P=%jd`En_dgH}QS;7i=*&9|nr zATrbowLT9Y%wlI}WDbxmKZ7~Sw%EMDaZ4JEViT9h@0Y9DL%3QO0L8W!Gk`s4d#zy3 z%are;tfqA-FFv-2yZL-sdH#i{XvJ12;-AG3eypONxFmo%cGVczIIG9-8@JTXiXYt) zkl7l``2PLncA}B3@z%}4%6S7ln$B+=1fxLN2{ud_#wBzp&@nCtoK%^T&d#MkP5^|5 zO;S}QpK1GrxsP{k{iU^^-{r*Zw>)6s>u37?S+@;Qz~hORVh*=}$pk$xFso@6nQ}Ke z`+I(+wQ0tv)pu=u&93d2BctVPvpqJGgTxZV^?)8qv7^X(HeY{aZZ| zN-G$)_iyrdP(w@+wt`z^Y)ER?u9uot&eSUL(es(DY3niTKDszh2g`c@3Vb=SkGUO zZ-*%q(!=q7nVS-^D0o#7vLI*Fs4Umlp%|8Tae|gQ6raF@D+L&-MwNoX4WS5@DbITB zK&?55^-(QeJ?uq=k}zl}HPEEF`joiwl|dREJ}~(-_xkK}wCyz#f8c6TV%cdNn?l>( zIcT&|(>Q;~h=(KXfJ*)$-t0+3Et?+PF*Cj=6L_5is2P>&O>?Koeog||#JoC%1^*it zQCOEw>$nETXd~H$^5*EWDoxtlxf8y-Dg(LK539#Mt3PfA4)Owh?B(=mVmju+D zkOX?MN)b>vDxBr(q`;x4M7T#zq#Jd$)u7=x!VjyRs3Z=F9fXI9*zN6cUqE850FwM(`KYhvpN$?L1eVQ`<#~k zJA|FxNy4$;BOa`bjFLgu9x$sdjTP?H40I=3wG1qPE%5OV2IQgo^fBAljjy&bO`JX9 z2?54P@rGpCZ1Xfj!(^#Y2YO3G<6i5yZafgU=ZuC8q-K zB_;;lH`NN|c6`)5XK?d!PE5Bu%`Wh%MIK^(BgUmAC!wc=s&OzPh&~aiQEBVV(lCMK zwbW8ip>jaUbW@P<&FP>}rhHvYakRp887^U^{YQ1z(aWcnFneA~S=_=_ei|%LJg*_W zTZ#qcd(c+W_TK4YY492*R%Z8DN)))QXw{$}voDx9Ae^{F{;Z%Mt05iD&GX_5GC^_D`}~ik76<0VG?sxV&ug&IC8qXQj;#m=!0C0xet@ z*VXFXE8&_zZ(w5d?f(<1M25beS7%#rVLiIK@Hn6VaBC9SB!3NL(V5$S2`9<@{fFtv!&p`Y6Hc{ zV=_Q(cVN;QcsT>-Z$P*n?#-BI9h%DgSejld!~7hWwgryqt)I_kX^o;8ur&(FwM(8< zsa*^h`7rITF$vS$GMIGKd0R-4rbJ!uul)PADV%q^XDf42p9d`KP7hvRX1N|PFSq(^ z4AdCY-Hg=SL^@7ynca2YZ+74G_BQp$KP_$D_#+#)7(U!U<&^{tMkA9Ppsj5H{J zpD#;l>%utm&-ZiMH+sys|8Dhq9hfWvozUh6u|Hn?%d1^JhXT(_kELqtxF(Km&i2pc zZu5Zw))9KK=^0Dr&KgOq6j!C<=XHD#@gd<%r2T-FYC;5&im^2k<(;vlPuPJVexbOX z#RA#tanv1bgjatt)`q*Hum~;VNRQ)$>r|AAh_=vyg^Om4Bz%E(`>hy0y`|3At$xR> zA9l*dj4+JHP(5k>7wW6Rr*qa_Wrk70Hn=~qFqn(8L zuO2wYg1*y{BJ42h@e@G@@Q|z2?!T0IchQf;zFe}Sj~CI#SrrbS%Lj=S(JT~j!Y7a& zJh~Me*BhX0{H*G4hQr!aO?T83Hb_{oo=V^I)A2J)7`79L&hQfgjX($reM!cWRid?` zs)ypg1vz7185h1MXB!EiiZ4S`sS!pG$0CaEi+}1zi(0 z1&^ABo=)l0{1{?5#ryXpq3;n@MCi)Qgg}MRhCR3Z(_Bs{DQK&e9XbE89A%XrCjiYz zQy-R(MQA%$OGk*CJmY>U7a>UW_E|nRC5WZi;%SM<$1=$KOrcsNcRg!lle^1!=<82H z`RpN}%;)8)A%_&Xr(QV!>wE2Obzs`f7-7*qBegZl2 z_mQgnUA`7CzUCD+ zCrEuJ?oMs*24rrcj)DU`fsfn9MeyOpLDcP7JK4a-g1q*V&93>~uktiBiLYqJP5*u7 zzd`1HA773#al0jwZu@e(e=VBJAl)gz3ut69MWvb{dpOKL_(lPqJ!#hV|A_$8zOG(l zQcYz|jm&*lMX9DM$)MqURT5zA9PwOpAD?uZ(>8H^0FkKs-nZTSj~YZ$u`q?4mbolX z4_QxP-eg)14&1G}jWX*#nQNR>sHJ~-)^&Vxn02@K*vRLbSjhj)vh|s(m6hBLY&258 zAl^CNFPE%T`0Xb|N*(V!0T%$2K1tLHUafC#AjlHrFz0`G;;0MH+7OarT#;O5QOv&6waSZGYmaGeo zv!35bu0$7_dueP3PLmc>@wVx3HBTDFCbt=ekX<@jDSE&fQFtmiu-NxybgaU`aXp9s zAr=cts8Ax7;sH4JiyH*;%wHG>J}!<`T0&w4d*RLlgD{!Y%-b)$c$`4M_qdG5(uK~Z zzZ11Xw}-JYnHR^7sbdumx930MkdKvpLFC1dEujZ*?3AWsbnv9e6jK0?5Yacl{@(T9 zT~oVN^i*CD{MfNeM1{8sexrDqrWJ`Q>bib8epLCv;N4F zbpAaCwPw*l=cjrTC{wpyJj8>JHPd=P(St2xAhmJ(+;>6H}ddcG?fx_Nt z&w0hGui!U)7t*FoU9gJciA9dG4-9~Zy|S_1JyU^Z+cctj*e3!7=|8=Dd9V_2-e3B% z?p$~kaQZYwT?nJ?rJt&-YtWEOim+IoY@adTU(1|ytkD( zMi}CUrL`KEfJ#9SS0o$W@~7o1$uF6zDxVK_2zhxZ)02UW-+#fGr$-NY()fxOh2W3L zLs<+iqYE9BKK*=5@*Ra2HV~01iia(cWL&Tr6SDUP_8$JTy*;iM*R zg8S>#8d-qik%zo~f<53TszN`Sjpd-`c5rIqPW4*c9ETtBD(S?p$0N?JNBRI_%^1c}}~f8X%#wM0ZT z5~A=O6!8blE)4)#BJjx)RO1-PBPbONvO_qAT3B)O@%8n*niwD5IICRopa-;CE-i8?SqQZ-nGAL z;mv5?ft7FH!p{7*7VRaC%*@P;e-gfrYHv?GTwV%n$+7L6xb4gHKOc$J!zqe>an>hJ zAn3I3BJzLR+*-n(>9Df?o|4^}))2Y8vn5&XBm63>~L0fdYve>GCw-zKkTUnbCY z_q=Lz^NmgI)v34jkd9AICb^L8sL9pe z@73G`7vy&0YIjIO(-ePYUzR&fqV8J{NA=;+8hqiCNImkoq{f8SUF)D~hGcv__3 zU1lyy~HC^|Wfs&r|`b zBu`^C12V_WrUS|L#(l;KUHmjFAU5DBd2GtSZMYJ~HV;7%roJ3Nd=wUxP+7n_>@;{V zNBT_1qttFE+eo~sQdnAn7`|O?3Q43nET^=JG(nwNVxfO{yK&o@v!?yzXAcv~1*a9q zD-5K)ViIf4<{Su@gJufL3Cq=djj|A9U{Z&5_S?Ajh+{Iy8&mDfnI011ZQ)N`RQk!q zEqq>jkt7^x_hBedtu@pNMaT$ePvD65)3K|F5sR{+{=;+ zElwytO-2a8^5%fbQ(xHMJ!prmrpuGjUlx$6gc#t&u@huCfQ@-fxeb;mPCAcXwTgn zC}j&))PY8rqj`%#=Gg@CjHi-`J|E8h?Jd+VBLoK=@L?y<E$?=*CO2dU`IP?_;)hb`Y^w`AT#?Sy7=QNc zz|i=hT6h2VbxrsAK$R#kW6Kv6y+~*xWqujv-X2hQJ)@eQcJAe-bZ${vz1w};eAlCS zJ7en@I)AX*(&X1Zz1?xX6~EuPziDUlR8!{S6ev-y(8V9DbV@#m0@Zd0v3Pa<+|OP4 z;d{4P;mGxtLtezugfm7cyT9-WKB9iux_bp!d%FC#KC&%5!5DA2O6n)UK37fd z@@lS84{Xo3&S6<>3Q+t)Dl7s zOmwNsOP{UKF`WkbT-MVbNM$ zI%?o)RAPQ}waWOewWh9YH#*r1ev=b`aXA|S8E2hpO3|hoUtNTpLH3ZYNXR;n*bex~ zl({=R5f}sS;vP6Eq}gbH7mpf2ExL0;CTX#XM80Z)SV4k9aGT=d&jmQzDB-+*u&&67 ziJpivC?6+xNd4sh24Up--m=fRCf@;&S-k9|*D&c0Hd#f&>hHv4Ks47-Xo!q@=1cw-1XGrH3@AZ`{LFMxvu8f5umpAq^5gl$J4 z1%Wlxpu~*$3q*HiGSoOjuIfYf*(QJ~=>6USa#qLJ%gWX^DyHiIplsklPkkGK&bN;PYMjr&afpH(m-* z^{2FPP-rHYI{LFK)H-P@0g24%z4&Xly_rR3!5vZ*aTrfuj>Dm?Gp5VS{d(_}^@_J- z$YTKtc%?#}HRpW=l00B`KjX;QlUZ0cly2?v_vjo-V~FRZEa&mvq%`|?5_064-R;}uvg9w6 z;Kjiss1#d*4Tv?=^>So{hj8msBP|8q9q{ity}Vm=y&eB^9v+!@GgF{%7IfPV+mbg5 zTNqBgt$J4Ca&R21aMA5^OV)krnvjt2^8nZYO6l;Yx@cbU$BgOcJPq2mC9CK@etS2P zc#E8<3(s=xzBRhv2QAD32?d}$kN%~tctEGekmRMT(O4PEy#G z=;x(YuJ<>5IGyoFQc)(VjyFKvHl#tuq=__=4@$@UZSr)_{O>xvf5(CUDGfC>wZ9(g z%6JrD)sQaK5@^@AT?ufJexGIGxq?+D`=?b=&$(=h!2~c%XB3ftGJ!;ujHCT4QQ9m7 zl&m842?7ue%Z+Tx3CPssPc%#%wWCwvd$b;1zj+BL2Dz4^io*Y@tg4n?%&k~UG0E_# zHJpFRY7eA#kQ?tQFB7dPe8`Fg1Fc+xOG}7d8WRj1t;Yj0`e})tAR2yHS8G||nyjiA;2>WA}3o)yBX`BFgkJIV7`cJ)7LPcnpCB_3ze0bfuQY^!sgP`jwESwoX- z(BjapKRz3J=k2wye%m&rPXI(=~88+BCBp<5_ z3JL3kvE(D%KD-)yr!CK-hIzF^k+j=kJNaQ2<7mwAeNv0AkIFw$!q1J7@Vol?*w~nI%*fZba?7acb&zF4N(sTx!aXKZu74Fy6AASxH+L2c$(H|$hVW=6zdKS2>t^? z)Z|y$qkqvNY;_yR36hUFS?Rs>c8C?OwxabgC z3D4rzXu?(qKfdCC2u|FtFQOPd7(^)#B^miLMj&}jsOkI=xI*}^x4Od*K3qzHeoOaN=12fodp&-ctX{`?t|-#%Y_ z%N&GRNeN(<3~Ht2tth%1nUX)e!N^?GVa*WB`(T{(k> z_;YjA`d;(`EnhAp9|gi6)lIWh#~PoVZLxsygb2KM`=f016KbCPV0#O9FRu>apD!6O zhS)y2X_Gpd(vAE@g-;}INd)J{8dm5SZDONbw(w)!eM062rUFp14~Y+V+Rk&WNI2Ti zHaa<*iq&km=IOSq5tX%Vx%AJJ3B-_R0iAP~v_&+1gn0fU&SDVx0TP&?%0Z{O-IBM~ zUr&jkBhB02{5RkDdCit^LU)yoXiTL7GOyd)+X2`|AVzYv_2rt>p>IY^11pzskv@n; z0xMKrt|nyLsDn#RYkz>+S2?BDu0kxBqDQk!X&aEg z${00DRKnY)w?jV^g)aw9C~EH+2e|FWi6{4M&sFp2XWSy0R*0pr>EzpCpqn97fo4ap+0Xax-csQ4x zaJE;~bkj3;RcOzsM)Ik$M6c*>yy|0Jg&5- zrx>n7e~-4HdCY46%+{?$2g5M<9>4!?4S*&Oh8(TST!-B4qz;C!{@d_x^0ve5PN&5B zI8EEMwF>8E-hR{XicT}ariwBNaG#Iit3vk_GDk1PkAFP=2c4ib0+qfF`}@6q*$HHQ&n zpZxfk@dbo!sBq$lVPBO(=kO1W z`~0F3kb?BAHh@uTmbUhZiJoc(T4KDJE3MjC!pCwuFpix0tz&`D&MCf-NPf!q?E7EJ zfNG3JGj}ZX!uuSBdP6AJm!g2fCtznIH&L0Ft8vkNoXX!geV+Oj;Dg+q0<)yVT&L!u zqowZaMq%4=aZ{0yq??hX;iYSqA`9EMz;uK$_^I066nHaBByOk3-YCp`+9)&SFPXZ!8s^seB* za+DPyv`d+sLD|tg8oX{C+uf7aXJmnmNP@!w4Mt;8Ec`v7#N$_V$L9%n( zuHS_fT8{c%LquJ}bsSA#zr~+}z@MU@0E$|)LLARKUGi5725D0b2IC{CD0DOQS+~>SeQloW?i&2t^zc^ZMZiKl` z9;i>FK@OzSjQS}d0_ss|0=g{Z`P`i&n=zx@fFocRU14v3YNDe8jba!_*5;Am2cQ;6PH_|B$(kjZo2ZElF; z5Ck`l5&^+N-Zj8E8?ns1ubI1#_kCbVGNlZ)wPyr!?^Zj!&E=SQgGIR!-QJ49XGtudPCPmRW(!FsVmdkMX8B52TUa8Rd_*?}b+9Hx(DU5fL-<^Q>NfH&m2x9yh?u zrsoQoe}#k7N1)C2j^RW8kNP@qiufVek9S7;<3=3DV*cOQ%$)FTktRKQqhd>NME=?q zG;)GKv^XDonOO%3g8fM*OsJBpj@OQUN-NY*a!m~(Mew%d(k@vNmuk!mhvf<@Dp;eh zc^Q`<@qNh{u>2{yGy1V>JQN5Lq=&OgAWwRX=|WB^_=E3dutLE(bBPF2Z)D$OP^1B_ zjow8A09g9z)786o4>XTk=R8`+Sso7;CG}44Jh(nv7g8>qybBLU99!Eon1O;@*488~FZaF_f}awt-vLyT-S_J+v1%xR<5rz7s}+N-UagXkGsEwF;d{Qj z!PGw__14?&SI>MnAG($1D&INmZO-qTx76R8ZywaXG``$)-n2fHbv~4#jydnFNPJGU zF0Xvf=)BeQ-gu>Ovh&ZX^ZLl`<>X9h*|bPCd%g9%VEtvoX(HoE%dcYLXqDT0Z>?SZ z!k2%MUjVTSz&d1V3Uy&ga>27b^Nsfm2P$gX%z-nk zXVeYrezUiArUb~qDXtzc(Ek{ci`{{+xxVqF%AjBzXiR?J zO8*F|<=Xa`B9g~yzs*Q5D?QwMK5!=F8_E3rkuvuq`&A`7CDhw{YvAl<*UoomdwOI< z=5{#KsWR{x$Z~YTD*_)^M??09-lkYBCx$ZyO)?CRa>(rR!0dNL@h$`|KZGM)3}HqD zf<=4j392It4}n5awT!R~!(#H#(s183pH0MwgPnWR_HsRZ$!fta9cYto(gKKSC4LD} zdcOLBQ3|{gZYX&;=wG0vGNUu#uO}lWJzQN)(6!r|y$6J@W^%7=yr6+c3Z~)Di{Vx2 zk9Q$@sq#e=@NG#Zy$)IIqjB|h?wx0`MM=8~rW>8AAkrPduCc|_{_yvxMUQ9vt&W@1 z3eN*}PXptdKb!wpmDx6?MPqAQ^gb`1z1Zn_9vqvkc|H=Vv;rWIwY4Z$HX;iayi3Es zEFc&KN=Z97g_iehHNp%jvVQxbZAhOlBOq{;*P=86&9REY9RS2#8RumFm<0TQny-YT z(_#5xGNG00XJl_1N6Ip?9`+eP|72a7^_G`+IQw?OwOEL;dh1A{2fuD|Ed>NI{*LlS z0twX$Ok^}nb9pqrH;O2QlB>7k8X}plyZ%Zn#1=OXFy@WT8Y}e3%p*jp2yMT_1d2q# zXJzy=eY|^-UaOf{8wJ4M_+rMN*86d?r;r!U-p57rrzJ9<>QzAzXl+f#zbSp!IIWJE zwLj=uo&S>j4k?p_*4DK$IoRrP@Nx2twt%~TmPIDHG4d~$AlmQr{!c|);-%J)Qz=I9 z*Ni!GNr2B9B^$QV@QHxU$xvi`f(1`S{Kh4MiI)uwlMcXXo*j%;OkF-HPr9~k_bMwb z72hee<)nmsV@0y1N=;F)P^A22r1(C$w2;D|B-O%^T{tXl{BwwYP{eG*NgEl}hgW-R zSV;a*2eeQg$itcj>08Ww^~hga^PVk7e&KI( zj1{?Ef2vbFCur#@3YWR~-{FW0OB(NjiQc|4L@gB+g^?SVjtI-H^lhZ4wNizCqoUZR z_{7K(YY=^Oq>|#3W=vxm>klh5N72u>+>lj4%b=hVLIq3CZJCk323hN~B0;UcB`<>W z=!p8bez~zqh)Fo2Sb|Ld3Vf*}kDEOB2v+$(fP^+i`P#qqqqejxAHt~9pmRA&NyBJ* z>c(zHrN!#Ut|3q?Y{tqm&fe@`&jga1)HRmZDO}=tsvhNdQ2$EoW9{sB$Z;}05c#c; z=V7>zw@w=#O?TME=qj`sy#B@bdxiNUAp<)R8f&Vdr#F8CUrLx6n|-N7C`WG?&)ztt zIbvDrhj=lmrNqK(86A~nh0K*sKR+9r3cp>$sS}gFkXEm^Lp=|nzC~X4_KIp>&o0~c zRb0|X3r*iqft6%l?{u)tm8KwDyytHp?w*2g4U+5@3BtV{T>tg!y|}GEKfGt(IyApn zSzLQB2%k>*a$nEv>+mqY?Cf`$L*{-q$Y!(bYIc+ve$qeTeMl;A+j%p80$l%F9L%?S zWx+RSUQXAaTWJKZX7M-HpWI%qTJ@eS4V;0hI-sV9yLH}fG7#;Jr?1dIa=QWngA?qK z-o~>ReYQ27@Q#@SZFhJIz?Utlv|>rNkRu5h*_s0Xw$-@N#6Rx<(WY1h6ZAIKoV$B_ zo+rO_X3z5VwXr#MyzG1K2nhTEOBRC7+@)SC>zXgV9XKrm3>6VKqgGD-ny< zC51)8Rjo0~Z?lZ9o|%wS(VmCP(@>$zWcoS8R8>hQk@~oISXRcYwfR z-a?GgQPGCeGUYL^g()n6CkWhh+-PG3O1TZjmO_jLcdv(3;Unt=i zKzcOR3&zvI@5{f|sz?f8b-yO*3cfr!@rRt9daBdu7ayR@^3{2ogQ)Y0cp}CNSJj zBB8cTmG~`?>dZM)9Q;Z_RbgP2(dvuZ;1F%plq6U6hfo-PN*LNhInF2iP${W56a*<6 zSlw0xE{v6N!){%4MvQ zhKMXI8Wk2mIhI|Nf;R`gSALl*uES15) ztvwVodzKu}`ty?a;{LqEiO_L@d0y=8ed&;4B8T=V_1!#CXz*S3%0u|e0@>s8ONbsx z7w)|N*F@b4MvdbzSWR*YWl=62ea^0y(c@|~VyAi3`QEDIC4@Xs$qNI9XK#`8j@%aYt#m?(jXW>8~Q zss^_i4sBy0#`rW;x_F#k*oP??3k7q;P?s_-&R#YOGN>k*wJaB%AP)8s6*Qj1jRq=D zO#WMViBV%t4&jjUF+J0GzxO5S8>&n(ZY&9?u$3(rSL6^iV_A0yOmX6&9)U0>JU3Jg z^E6Zp&rTn!w&3D!w8Z>gqm7`Ajv`J;WkN2BD@KLz>eouMbEJYiCPZo(0+w?{GDV?O z$r$udj75mem8rvrimOB>yW7h^aSd@_ROYC!zf1>&je&%5_dX32578IWj;6jP(0oA2r-tlF ztFWXfjmE^V*_o1R*A*+pbjN*u|2;*e$16QL-?6{XVWiy{&l-=tAtM@FC+&IYKG<0i=-`|6Mp2%rv1BEVi{CNTAD08H;W%LiYX8V z6GQAOj`$&1hvbi3s=;4r@=jm%L}=wIP`A2aBSfpU^)~IGJbnjAgq4<-Ga5#0=Y+lm zf^lFYfAg3dbB#n25)w|inX_#bl@kStPfJ;C@JJxtb=2s0gbmOb&6xb0aT0YO^zT-R zvI1E$BC$~dZ8c0a8r^`y9Q)Jn{|~>1s{^yBmh~r^&U;Dhb=&26oAU{u8w|huv!^U3 zJziFnJHO}fmx)zoU!OS>_;E@5l=;ONnYW{l(R1H}z11G}t#1tAqtNVhc#f<|9dU1H zaz9-(%ktTzB-_7voEeiGE&po-n~dUL6>wTr41wWAr(%0XHq@$?mS(EhRB5?-{ub^} z@n#<{bl917w_bmGC#8ir-l*EXo=-&oyS!8pCGWnPyJM@-Ts9E955_s5#jsg zk13!^^9gL){8i;O7TC}n#%ACB?bSAl@V*L5nRV^F7FN-3N&9p)b6U~~1Z@QqET5Jk(6iITqo%Sq*mNu4hG$Z@p6iAYhW=^_B z=`IGbX4VWd7N4zZ3%aKt%Jkr;1z(c5+*|=bu_p#g=6K?}_t@B(EUxVnjItnd+&+TL zlm{~t2e4{Zdt0o}&lOOmU~9+ocE%bWDnc_|s#u2aSUdd0YxV20+{~RO3s#!I2~f8;Z2?Po&mVXh4@JslGn++V(ar2jLxY9>{M48iyY(46v@A}8(H5d z4P&&FR}<^dQpp_Q8(G89#!P?Kn(+|=X)&+Jj|HZtTw{o!U#|51a1NqQet5gnS!S_9 zL=8Jd?~Yk!={L86+5`6$f?c15D2-1n^vS=i#`;9COk<2rFDG-^C!|oEzUZO&Q`n2J zqPz*7cmPXoPqIaU5>hWKV+zR?Z6=l6u~62E>$? z-Q7;Ioo|Usxgr4dt&R zjtH)6kg?Q9!auBNkleV}qX~gmEy=|}`!j4Jl{(2RuNhV#U3E0% zN0*?Q1?cXCTA!z6cO&jobvw+_W30+-t^pdZhuW>Rmk-WO z9=|eLqlyf9K0FFAco&u%I&GjMvZPY7I1yGb`Vggg&NF=~$CPJUdw^&Bs`YZdU5oU) zTGwv(tlyjWy(#j88<9P1*ujC}_a+`IwO!+L@0>_M$ailqMKXauj|bt*&lk)u7gf)8 zFNf0wUtR|F;LDxQ%blM)8h?+zEUrIvKHUqw?3v^>BL6#jSrvkJH+IY&?9#|*Wv+O; z)ivLbeBFEh{{8diS*yq8?(ydM?D4L#*^2xHWz#Ru^ARVt%8olxjYKamFIKnsmZ+H& zn6SjEZBa=YTI^V|M(oGAV{-wUWv<(uZ2rkOR1Ahd?6wsh9Z#0ERUh{{uf63g?-zEm zhibEVA=i8Uh2=)0J0~))H7`F)zbjy)SQ8yT+?QHtEC|5PIBfg9=>Hr}ZYc6;urkoI zh8u}G>v`|NBSX_iM@MZ>=c37kPEW8zVa@pDU^er*c78tR@lT9WX3cFi(vn+wg9RAZ zO$Wvs-D>C{PU$+XajM^JC`Fc1IEt)RedT71=ImCXza*Q(#=1_UqwJO&TzG#> z^3h^OrgBYO>bP=(8YMmEo#Y0^wMV|N8)X+e=atmIl4M3whCM$LrsyDm^~5)O*@RcZ@wZDdc?@m8gu6$ z981K)VimxeW7(4z6tfBYAtQ-qkhAbVF{N9?hoc(!#cO{){a+Uo`$x^JWCjH>>$nS& z$`&9+pYdY)dmRJ(Ozlj$sn+arnj@U6}eynNie9#&pijCS){*6iRI?8@A%1#a)tuW~2 z3Y)+f5}T=`nXPx>{N(l4zYO45M~0T(d^e1k3fL+iqSjTg!rPEx1OG-qfWBcmn0=%s=zU9ky**?{>~E5~qA zmq!_qg>i7DC{iSFLN)d%gxTiI<0Me$cu~Jn(XG+qrr_JlJ0#o9>!1hzlHkPcF9u1B zY8-!Z7sv&R>VJEsA4b7rPal?j`I|rg+|*W4u6qF{>LkoO>UN(krJw8=hk!GKjXjJQ zS2=rZUFmXkj-NOXfR9?AKSPE3YT9pFI89S$)VJwO5(O&T5H$}0HF-E9AWVT^8h~i2 z3z|y_#`!C&@@Dt6KVP=kvd08$X89xNwZKbe=lf4!B@PL3_Hdh{WKgy^aD^$H&Yhee zAxpn3mP;0=#)yl97Yt^^0e$^OnSYIxZ=^2FwkHbaAP+Q`Nzcs>Rs0#3xlse8U%6eB zx0|153Us-Vk*Q>g+}RpyN^^|9B0D8_9hvSA3wN`DeoeOgG{xnl{&q1+$Qi)KDtb|$ zoU-Yl`(tG#7Rl*MO0y3H376{WOGdCn0y*Uu23bqdh2y`0sdKQ^wl!j~+n3k~LQUU+ zotysX5MzZ%3j99`@wt5fp%H@q;S=C{lNv>^nv@2AQ0az}<6Q6QIGTgV>y4C!BjZwS z`|v^Slq!*?uze7PtAlRcHUTZ7P@hxDkX8xSL(F7?hdq1bY3an27jq!?7ZAq`&EJ^P4?MyyJ#zO z=;H7JgZcAI&!dnqG=7t1*5$V#{hUktuV3J{ssP1hK7YoEkMYvNLLU@D@`k6UmDf(N z6O-B{=L$8oiBFFs$m|4tpYGJwI?v5sj`!dp%iQp^7r4-h&#bK01Kgc|z}?mxM*SmH zpvB27eTvGAc#JQ_&fjoVANJug;!=skJ7jFu!(ex;_1xa<_IkYBW*Kg7z(sNWjw;0N* zq}s*t3W)(|Eru3b_OE#*al3TwXE?;?8mykO1VzZIjLTYd-|r-++>UdOdfRc4M4RQI zOV9m5P`BAl#!#A9&kN{H<%i%_tE!B*u|`uN3isyIuQFm#Kxwp%=SN{u4;geId2n^S zMg#?)9q4a6aE3pw{7Ya?7orJg{@lo+*_^)T@!S)dkdQEd0Ld&@fKQP zkeX?5<(V27Y$`neozU|*UsUtkYa|o&zH3K~E}S_i$zvOEua2J+4zU$c2~qVs#+%TE zm(10l-(6j>!_=Q!-PXM;x-}m#+UFK`X@u^-voYAMwy4-0b&pj|{_Y=Wjx6)r4UXJg zJiK+wJA`>CtF+r;dER{jT-72SamhJN zhD%CH?#imX&j98vK4Vj%GA|E71cV>BPB?VzNLI*W)fnhdYik!wogU)Ld2cJz>hT)c zFh@p6{xPqe|5#{|#8N;^?@@?~^o`2R z?|Lu+FbE~lWe&M71t~6{*!1`Hm%|!nYG^e%Rgf2|MECL0L9`zrsgdnw);gPED5rh^ z9`s?OB+Ku5PHpE}@~o}eWv^!y@SVh;&W?{asLDn`_R_3%}`X34`70pROiSC2*k+qxbmNWq?80Q!l*d9qUk?ecGepw-E9gv7Rta< z$Ej2dm0uyUsRS9#(0%O*;GHODg!BdRBz5m?|CrtU5E)D4Umdg=AF?@XS$P>fL{-6V zWLRCCnuDI~#6&F_fy7cF)t6TbH4%#_DPZTU2`IIg7Rr@K0`HjWuZ^zdiTi6jP|UdM zrR^sp2#d!bL4v=`aXbr2K>f!-u(*jXgHnm!d{3&p9&eX`paV4ynZxdxZ7)Tm3&Sba z0yDalcT=q7`ZZsB+Q_+{p>cdGpg*cP#p4Z1F5-vH2|TW=7m7e zgFu*n#fq~&EXc$D83zukN0W4gn_x5DWn!=-F@_yW$HZ#9yP4hyj#Y}skoBiPLPp2= zVt<`E>?Ij10`#T{FSB^X6$hn01$KAdn@F5+aFmJwRA0%-o~q=_VP^Q zH)qY2Rfe)^q{HLm__FHOc^Us(Ma9Q{=Wy%5{-;{m(a~9?8~hL(ezF12Shl!2IeXDi z!hgBzyff3g@2`@5<9!AUS{I!URlwGcV_Nm$j%HnN`k|g%SIBXDC~{{Wd(E%2Bu~lD z&iMtwRHJumWMrVkOz?6PgBj^n?=Vz5Cqfk#KU};D%j+9ALuS{)wX*3BrYzY7yn1m| zmW^o^Zl7VYAGc~4?b=_a=I7_BS{v&Og)X%^+phlJPo4SN?~J_lh4)=tTpVX)0B)R9 zUYvXLbe$Dnf=E?e!SlbBq)`%iB(%A-Y*?`3l>L&4T4k#W&UkE5=v-aunVADIKUgFI zxiVcZYU$Wr#)No~XP1M`kOK!HNp6i@MFNXk9iFg59q~ zGjSP7Y08yWUo|;-AA)SGkyr{s`)0w^6amd)ObAw%*`qWX6##>zG=jh_O4^A9qBa#I zA){Wj`M@%|f)PA+`81BN z%iH2h_w}sJPpe8!vts^oWM4);mNnSxIYmse)dBu*!`;f|Y3pA9*KIF$h?Q0JbVj^bV4Wr-2%~u<D(qsV4aYvIyL6NXKoLmnAzrug==fSBB$h zgk~NKx}E<`wRN_&VdB^D=^9KuhAAs&4$qt;v0$+`>QJoQBV~?VPrC^h7TkB`veY6}?*8ot zt|zO}*qlkrWh4bZBa;NYjzk1t=rRoFh-he~@`E7Z_J?`$Fw0*AHSQ2`m&AsKbb+aD zacF1&==*mmR+Pl0=hx6oMa%qTY+^*_D?|-hO6c}0JRAtmUc&Tsjt&tDNXj21{3raE zt}JogA0}F8i5fbB_z)u_VHRm_J_ma|8V)a(@U<-Bq228D{FEUsq$G)9h+MeIem`#3 z?ybN1w^0{kNs8BK_9T|#?ckndgbxk~MsLC|n{KcvQ7Z>)Krl@rinGs)d>-8}sr-V% z`Dw!j!{X+Rp1(L?G?6)$$QG5N%qkqI$ybOd1`uU`ilEdvCH_W@L$xM}G72>J8z}yq z+cTeOM9-+;EJZpE!UvF2AF&iHM)aC$n=+ud6pX2eru~x9QTg-+BaUQVf&iyju7e76 z5hTUzLf@i0UX7iN7OSm6S0JGHld9Tj(Vny=PTx?S&LkaM5M&vR{s~o_xN8;{n;j$f zm%5yGpk}BxFQs_)&s$wQ4Xe`9uq|~umBa!>yn{b#ml}d04vC*nu~jAcs$Z+^c(*lj zi^wK=t;)BCMn9}S-Zc8z-~4ZL^d-c_FJ^xLg{8bv%$L=WZ)mXf{q;C?E!6Jt!cXSh zdQ#SjKc##ViDK-Tr8twEtTu+wvh&!9+HF$60ln?oW6xKYB9=!LC zlk`%4QmC*}sTl0$H9byAlKGr1`9K~r^d13Y*yFO$(^PLa!swU4LL|NWu3Uld=mEHK z>LWGyS(-Kb7+MFSj!?y(rTyvBnkx&0{MpG-rx22~C0ZST*6@3+n#U5FqxC#e)%NgY z$sD!Sgm3C9lS?Zq0xC|&7snx@|2<8Q0U8f3EhUcw#_jwm`|2zU# zr+!rld9@V$RJJ7KUiW)>&eH=l8%0@Br+d&VeP?Wtd?qcEZrk0+PRC55vRQ>V4JfAX z6OKk?seF-`$}12iXIQSjB1I67VZqN!5i1g6m=r?6t>E3PmtFlE3PwzW>mY0iNcBGb zO-Pg-N3!o#Uc1m!ZzJ=#5b$8i{iuZh8W&V&EkGDq<7`2g9AN&#B2arPL z`2hJNJg z0c`|l9tsfx#Bg`h`Fv>8m374V_^Ynm5_t;HFYM9L zdWD_gW?Be>07jWG@jK%hab>k)Upg4f>eS07$i(XasLHp;#hPFsW)> zoEs4aWU5g_sURrscBoHW0=c2A%gdKYU98dLV5!&(QN*YG)_gcV2oY_;DI3NpswDgH z`i-h0B2xMD2MPAt_olloz4I7&xL{?SnF~6c3FxRU0j%yeL3t!*vn#=r%DSGYchg=l z<3oXkNIG{nYK*cKNeiV0mFSX0)c(jh| z<~&xvQTdm}1;6=+MwUKz(^j$h3rz4=r{0jQe^}+WnSk>hbqB7VdA-+bvImPxH-36Gra$L@J4cBV+^@peKZ z;OG*r=l$}S^m3;_WU{EKM}`%Wfh~C7zcr|37FChXa~==$J}R{Mw;ll$4@2Z!4P#pF zIH{1&_976$;C)4C+WV}5JU?*hI51i*y;p0nErU}n!AVq&(cwI2tc@6>A@dEFIw%iL z%EVc<-lDs5H_YsN8mZ^`umTLJ`9I6lsygmZegh>ppgzdFb(oztap^I0Dp%x{_qV<5 zF=NGdhe6L{ORaPI>~{6GvnoQuRh)~p5E=BH9U`Mab%%**D1+Eh4izJRw~k4np21L1 z{tL1_B;U>sV)RwR;^Jrhi1MY7vAl5A>-_Zje7xJUM1#2=6GR&>Q!-nZplmztky#Xq z^W8)6*Zg0qtlkw)8?n}%Q(Q>Tw}Guyg$CP9J{Oq;JkXu~-T=cyyE5G=P_WJnGK!R3 zG{)=A={*407y!SuXy!BVRH!kpCf7hVJ&ac36Xc{Ja_jD{dUvxoq&^uOkfnCw%J0-% zf1~JSBA6wnjjLq61WU$d$4MD414!TAOpumPnUWu@5K(iS>XZN{EF0&&@eEX<=O*6l zOZnzasklD`IwDc9f=G|T-(aL~nCPG7FNE_+g_6eyZ=*X`yblu8M!o~MkNdS=*H)jy zq3QF&RVE+ljOiq7!G}LWR}9_lJzUM~%`-WO*XQi*o7PAD1y{$@)qmA_o_p@Qqv}S) z6xK;diHW6BKHoMM*{VeT_8L!?NHj?|2s*IkSW+-P%7D9dT+Rr2{KG5w<~58L8$9ck zg|yecz@%$aN-gnC6A$)}u+|Yb2ls}4dYm`a5+_Z~$I52c2dz8z9YPo4E50fOe;YL1 zK8VatX?DFS(at3%gaJ~R{0hNqwPzdC^!KW!Z%suMDJIG!H;`N;PC+Eu`9W(aXL7Lg z_bpu!oiuJi^N7=%PhbufNwOF;1nS#)f;X2^>9u@8**xUAi0{A(xP6kjlzBM8P}Oo2 zf3aTgt?BEsUzhu*tMI%D__5i7Th#j9eU{%=vaYYYU75#M1*WrS_dI}p+Dbz3`egM0 zpoJcDldi9)Mq-g_r8T|7nTrz360TavtH1hFZ4|N~U!c@dnKW!p%@?$RF&!WS*6`m5 zJ#9~B&&R41_DMm7&I%VtRAR%s5~iif$?bK{n8qn_^89$Kln;Zmhnkrq#`NkThQ4FKn43afH;8G66c4BhMNoD#X%V&bh%8 zJTK(W1`deDV8l2^E=DBfEHMC&!C;)&BE4pbSbU3Fd=0S@mQhj{UdO4{U1C5U)y7}| z2f+@%{YFgh#}@@5XMRQl$0C{iL>GqqeFdh26qAHQAoY|6N(u5XTL1cSgNTwrSQnyH zCjKm^4NPBHkroV@H)r`s1ySkc>7oST;wt~^!pug4T8@{XF_^>z&OyVe!%#qP$RVUw zCjr68{%`elcu<8$wSQA9FMl#oIye=Z)Wpa^*#v`Nz!L<`+!TaH8l*o(lq(z~g7$86 zLR={7y{b$s_Er9@x|Jw&!ao~L06l!_38_Ir1k`2lhDi&mBkT`VQ|W)GIy1gr3%6Pg z6O;2;&_rd<6ki6abb(Tz$0%}Rrf5ZS%ptqNny>xMs_G0;lx3=&i87|V(<47lo>*3> z97y+lcOk{+FTf`>-t`YaW6fj9tDI^mjv;3Q7cZOlIS>Id&(46afcQl7~@Qp?{ z+FGyr{Q94u(_5m+w#sI^zmKv`7(JL_x_7$Ty1w4wUhH`@YtS!wQmu4Zu9az5Cx-T# z^HSip6i0MFKMxQ(gxKlyp`zZ`KdWRBd%;J;^*k#Ut@ubJ$|R~dJgYiusMtDX@;sa` zw+8jRPt>w}4#?J8T#l092mdpmI^6EUndP{rJ-*a%J^6LKm}PzN+a>CBJ?(u|Tf6Uk z5(44@AK%PqYkcT@)WsEN?!0lVS|HNnd5llvu3BqjCB`Cqi<-CCmca?J!UkSHR-7h2 zk?@H^`B5KX#gtn;=>mAdxfV>hJ@SGOYDI=83&SPbU2i>n!h zq!ORo`FfG+iRy4vIuO1^&QDQ!F@Jh@tGpJ5{JQ=2D*6iqNMsKq7`-{btRM)VTqEHKIieR7zxsLFK9)bCwn}S8J$3l#nXQf}f?@O88lv z^Ila_qG)9OKVbUF8deHn+O-RUC5`#?Ps7VEvE}b@Ol&&WUv}R=rEM+7$)t_#?qCZ^ z$8GOkSJW^*PA)ICa9V;Pq})t$GavaYt{cyO01N7<%hBU7M!A_2DY}A8J&o(NBt=z~ zde^^z8w~pg#BpJ-tgP%yJVb7$J&-YK>v|^v_@G?sm}w=!$z@^@n%E#G-~77z-Q zNhOl`>{Idpl-$})ZXs!vg9M^J)>)XdeO+VfiHL-)*b?Z%KBY4_cN;_PnD(` z#-pkvdTdy-bV2>@G;a|sIAw+w#a}0-yy~{&-{BV-x(0{Z#j8tiGqa+H`S!=!#{_)f zww1f>R+*Pd7J!<48o=@9k+`jT8*)Jv{4NRt0dN}un6eDB07BS1 z4*Q?TSm7zJa^lEEz6v9hki_cJA!8i$HGzL3BI4sGf)JtTh*kx}Asg)*5O^TW1*3d= zsX6!w+vUhYCqZ(=D>|wxkoFy@Qg4}_C5X?SG6yAZPKpr$^coFOyY}}7auzz{8-E35 zs#30_0105F6H2%_3_&IL2j%co{Y1>8KZ~rPK-%V82Xe&Z{t(c`C?^TkEa%nGHH#!C zgc3msZq{MMXRguVb~X;_X3I?oi2`3VEZT}ClV#+G8- zG*wPM66u0*_4lexBXViQiHSi{{h+Q?&>`IZf*@uxk0nf#CxkZ0KMW~=CYMK^ciSU# zuU56pWUIK89vu^u!bU#%TAorhfLK&Q#>z+(IY>$s+VaW8-roNtp?DRXvfAnQc;V)I zQNZkDVe<7y@8XoHr{dfG-fnlhv%l~$JR13@zY{MHK)r5|Oq-D=C7w~XR!iu9{)}X$ z)6><+74~cOLXuFD=w#Sf8Nn@HLJ1i73Cn=3hHaf!=f8SA-8584OU()|!Lg5k<9j>5 zY2uk@BDI&uJ>9BCzr7z;MOn|&>km&!os8!2)y_w^m($M2#w2DvdF?B*m*=z2+xW)h zd#_eC(lw7R-p#YKUqz{6?zc$AN9|bWV zz8hZ#JG1xhzujGDF2`Vow(wl38b}aG=2h2hD`HL=BWg*&rQp?GZ!T>Y2imCSmg3KR za1uYZEwF`2*kdoljtmLJcCGE!x8IT!vLCN#(0qpf{8B1mvSMp`>FBqXHAZuyy47*<#O^c|B*Dg{Ojf_x!T=T$go`w;o zXxuLe%N0W!SW0%~h|Ip89$C|cmGcktgJNk2T+4uRhoWHu`$G%Hny1U*W_c& zHZ~*SVt&6La^@FT`nNlyqM4lj%FSB=DW&6V46zXXAP}zvs#Tj z`ieJXr-$e6ZJw5mi`J$E);2aac31IK(qwB|y1Kr5B01Od%yy#YTz03e0BM8d$hm&L zkV0Ik?JcfprR>nduuCu)iKG zUOVY|HareX0_k^v(y~@&=Y3Pva6Nv_&pk(q<5PS7Z4aX;8V)B_>K=?Ut!>uDoZ*m5 zjLzV@PN+^y((_&PJ@3aB?6{ksAJ^ixwe@u;0nJ{wEn17x$jw|8ZxGMXP=`U$au|1O zQO~cfZ$w-Ce0`+9u@NS0$xuS5RH2o1xnaDuf?<#g;z3srrxLf_(QAId3b^@m&Q&`x z8~4n|utKVLhi)%2pW(!Hd7JyRlx=cP}ujHU=`Qc4%}*&dBmEt)xZ z;?J6JN#FJ$-9?2Mrj;nu|LSuPNE?;R!*M{P=!>K_9sf8}!&oF>Rj>J>^7`?)VtM)Z z$^QFJUr#5e8S+K85!bx|!dqkfGEp>oO9>^Y%l(csUt2Zz*%>Ecq+v{v#gZ)1|Ekw>3OnzRNKtg< zG{QrWfz1sKPNf?WXP6l3UPZl&V;TWe7&vSAs?jmN8b9;&Wd=$V{6q$U3Kaj;qRfVU zCZ{hK_&N;A%xQ1vQGE=J-fAkRpT72KkuRA3`=h<<+iU@CJqHL9fC<}wKN5gQj<1>@ z^WB)0FJ>B&>;LCdWzY2#0}K(3@tatd<$$ERI}hUgP8J5aG*BG2sOV>Blu@YiOB)p6(s-}g~o*FSx0)j#yp_(FBOiWx@mJpzkj z7Oo-juSbS9XrtSzBIibn;EGuG4MUzkEL#rzy0S0>1&Zms$5#`o zDgM;55q_zr>#>uOhTYiMsF2C`^fyUYf$t%{ zk&Hy`kkwMilozrT2L*fqV7l#H+>RWBlvY9q!KC_d*Y{H60xt_cfja0+UX} z@;}~A#8bU44_Cy`0#tDqJ7a2Df)B@=t*8^g1EjOQ*69QgW93xq0QxDd=FRc6<=Zf= z)Khq%6IRGkI4>*;469-c`$fu&Xj1PEIN$GbDh zIZRLl2uH;{7BXjF@VWufRBwqukrlTJ_7<=HU_7DSwe@*_^K^PT&RN#a4);iv zv*Jb}TH&+P=AWRS$_vF(U+Zri-QKj&{1CBvKIh*t{u1ohDSY7AgWJya}X+wC|sL*EXwjfeL%QOU9jR*nn$L00n? z8A()da4d*UPELHBYf~ro9R6bbrTQM4X0zVf>aw#*@kXcx9++7(bHHKc20ZSHcY)bN zx?3Yr$9iCXX{Hi4HsBD6y+N~LZp2JO)zG|g`I51!)Ka1axb#R<&Up+xN{XYRBVFMK zF2&hNE-TL9Ch@8?@vp~f@4bDk>iR#YN)GU%(os4txPr3ln~?@pC8|lkV=sxJR*Xz1 z)G?~%(Q0aH05kEAgwQRt=g@8KV__>24Z1K$q+0!a0@Kb;s@;S<#c>x`RCyP%1a zd4Br|vJ}_NM-l*ot%WE&N`is{$$R1-g=oclh@qAR=~g{?w=i{7jRZEC55h3KdPnn& zw=F&WAK!_XPN7FkGZF|;(u0h_iQ-y=dc9rHrZv_52U0yd=3-mVvLh|4upi1YXtD3= zWIkQnxO}g#tOYI!{ty{K4@bCk$krgZ0cxcv=7u_U4Ya{HUC7TrK~`Q;B!zEH8#t%h zNF5Lt{**n4o0wMMhu?vJDqkBAi25rm%N>!Gzo|t-LP+L_olR&)$2sy%BuAJ-^yv?8 z#1ZfFkgP924n2Irn(Kv(^f|HmA_F*KrRaQ2eQU92`u+gL4k`2YnYo;_%pQX;{nK zA5txlg!Ys7=;F;{nY`xg?BktZLOT1)!z9K@6g5-eT5eb-hxWHktdAbt+V!q({x#C< z5Vds7eVb@sv(R1i%jxZIVRD<_{qot1yPKQa14fo#E%$T6z(5+%zWY~F54@ShE)dha z;b+^Z{JdtkODe6oQ+AM3%P>ev5)qxj?{+@j8hC__QgFQ9{+QPJFYWSPz_oJbU>|Hz zu@~LYNE3ffqjtK|cKK}A+1}8w>q@5J>N+m?>^*%fTz9R1wj1j-vBM;^r`qXy=Y);= zsPIal2HP!jsth~&|Mhe8zy9~Wu4j4IPLg&jO$Hdx(*u{k8PY&t+qF%vTfxiOTHrk+ zTAsw$%I{^MKN_&#ZXIMZep$<^C^%?lFNz}Oo#tuI%~P)>igQm_gj2>(ickq;o0GBm_~ZVsBw-LD@%K#*elPHo zzy5{2_lE=Ni&F1cvU^XbM>a+RG2Sz+bX5BRO}bvIircCcZA&JjP=h~b7T&^&2x^oK zx=EU;V?4JN-iR$hfd8byE2Ewhc4Nv^XA_t0%3f`_o99Svd&7lQ7v*QuB#ueF(=3)flD9nvMe0Y246_Ge-bK12?Pq#B)RvBRJp>5pu<+Ha5R^{CH$D#)V;y|rT7DweTD>|}>aeuJWBqF0@8@M#49C%3V_tmv*@~q%(5BFL zb9d$6TDlZ;fX5&rLKS`UHN~qWe8ZdgS+eNz+M@pBK6Q$|W1bDpp@agxGQlGD;OwV% z0@T&`K=-=$_PE8iQEd+3m)o^NVZA}aQ4S)nAuZ>%ea$feYf&3}d!~akmH~Wja-bc> zw7|fb0osABD7lP05w>%kc zuWk;_XLK=4IrTim%y~1nkPvw` z^FAmuSczGZ7-GAb&YzlodVaIXF$14%{o%W)6{Cn=BS(^yzvN?h+J^fHT>iWrdm;CO zG)YV@l5mA7myw`C7SnEw1*#=xTW9Eq(8F#wCgu&4!y&={S~8jzD9h|ALuI#(-|Sma zrXv(?C~FGktiJ!j$=QxjdWmlyOE&Y9xMp?EQLiVt`1kM7s}0t&W@Ml`1ps~7o z9RA1H`y7cR8(V4p{Jzvoesdvq16TsNyExapd?OS9g<5S*2sjG+0uxT3LXLRsd{7zD z^&P6uuUkgF*58<}jhLM_b)#+ z@SOKGR|B>;nu#iaak#Cf^>{B}JK)6hW(JSm|1aqdnW6@&?MxQDmZ6#iG(Hg?>|?5` zUEU;@T~=<{W0(}9TQ1z%Oe1}Gylr$AR+q6r??loG!gGOfrG9>-G(21)+JgA+8?{06 zO#Nc*M_LOiVyXuU>7#|G!j)~GfpXEAyW=(H528)?gB+DDtuFfjJd`WRr?A%T{bD9H=$ifbZak8q`&;m;{v8wzc!`ixYlT29S`^>=W44wWrYVFoG- z0WkkZ283y_l9D}NZ{#bGRxLTn*F?Z{!p0LTIrRaq0=MXQpne%Re|U7L`8Z0!qKis+ z^;EKY#+CJnp!p+`1yF0Tf~Ax5Gr9@jPB-3C4uvVG31?QMf00Nl;r|#n!jCz+&vrNC zp;-~Q3mg&IYvP*G?< zrdw*gT+8!mSiHO1<-EvANj*RJm_H}&HaAnLmZiU8iGM0viEocp*_C&q56Jd`Q&g*|%0))2e>!gR_Gy+A%qd(mJVaOD+n857Ul^67A%(#i4ix%{FO6?5|p zt*xfc>M+yUN^`b+HA;H-!Zo4J_Qq7Ka!amg;h>(o26O!PxW&M}Zq^TZX;n6E;|ck* zBQchPJkvlGxG-5u#(wzs_wLf_Mdtk#<*$kRu{R&+K3dhz8~GG2+Mx@R7QR3>EPAd1 zyg|+H5#h@bJNJu})6me`FRPWoa@?)|)@Kg-Vw_htBWaYGPJ@)S%DEklF=q@1#v)YB z$~&6ulqaNQdH$R$ejoTOeo&Cw@z=zD@wIwHLQn8rzQB=NBYx-l107*ek;YykZ;56D z_sZOg7qku~!)BGAK4M+`5+dXef%J9WjkS1Rmz=Up&K)|&VExLbp^kbiUrE6K2+zte zF${0p=!NW|(O+DCN*FO|-Ydv+_NTNe`e9%Zmr+}U60 z+X-`<~9$cIHyHGW-RCWFPqExnKn&EK-ew*@i$W$0jk z$uLD8D(^4A3Fky8iBMS>t>L;XHahs->~PE%`;+r&t6!`{fU#7HeNqFN89LVjjP|rG z-UWfDJrzL}4{x~M==S44iZkn~A4!|amA-jf;`T~btO}YRL=^b+m6AP*oYDOAo|WvE ziJh<5;R$Zf#~~~-#hA-pYHMS#G6MALWdtZ2=SXU~(guY&Rbi`G?wpy?qyYU<5atFU zR4A+!NU}o$c0?mYbKdVBrMm~~A%$!smF3k=8`{UD{W4dCI<{IeqLXRP?cb;NoG z_Air_Q?Aj^Bd}|140z%9zFANeEihp+uDvwgi;zB z3p_Fr+KZZaE{}n0;&nc;G?Z3l7C51zL0!63uMUwS2$n@{g;6Tx`{r%Q zj%-$5iuKok78q4Akvf(**byLhC{OT;-!oPur7qmQT=CDfvz*Zs!%bkefe3jt;u}su zLcL35X=;dr1dCNUS`5sf%lY+{!qD((c%Bj&tT*|wcJs0XP|W6bbF0G!I{=>({s zEvjYb%$%ILjP?{8Q7-D^OJ6)z9lR=HuJ}R&{H)mjYaCrFEVe|IA z$^G@ra>wY|yD*i zY@)YBrzje8-+6N4TGw)ay)}y$bNJ75V-E9D8cvcumY$!_e)n0kB$n? zev2K{<-2H=MEe@+KZQ;DvC9)5N$Mz9K{wqH>-3_=!G-;f@Gyqd@979veNrvea6&y^ zXyP;P(2m;wz63}tHi~AA(QIc?Z*{dsT|}v8M z<$7R_>vvoTb6K6CqgrEdqL|+w%gS7!m%ecoKWI%u!qCvrw!BA6lZ6PPS`8(d)z=f6 zncW5?Wi_A~*CL7XbcybrgLpT?sIsM_C8vnKPqfk^iDgbfv;!S|jDu)vaP97(^38eW z!#d5wK*0Uv!$})K&&@Mc#puFGL)xfRfXj^zJ=mM2H@TCzjeofI5*oXm@VY-}^LXJt zVVOYB={Q&Hced7^qPJNyUinbe=5gD&{pEV*6c7Cro2|PNjPi%Bkiv!gU*Xy0p zJb&G~e6u0#h|FgnPt83NYxhA1%|^E!E7>(e(oqf;6C!oDqJ9GEyIsJGnxm2x-3fn- zL*K|vl%qbVgJXUTv$XsgH)`F!17 zGRkptbAyzqnQ=Y(p-Eb*Aut>%NfMfVsYu3(wi8F+JPqK-8AG%66JXMn@O9Go4j^#i9Qh**BS{rr3F zIsoSOsxY$sJMb&j9IX#tNnv$G19Ec z_@H39gDZy4_NUF8kk{1TAnU|`e%Zj9W#ro1qSI#FwKg*o%FIkjpY)$|%Yx4EzXnRY zdAISj%HWw(pD%J<0tC_mYnoEwWFzGL|B^%PUt;GeF_@C$8gx}rViV?0$r7*||JTbJ zT$QXU3;q@iWh9QZcnvqupy+GFxiA*s#m0rmP&@{Ge>diknPctf@^=PKVonj#%_xym z4^&235u2?o7GF|G)7nLYzY#;8;Mo(@Qj*BPxXVD&R4pHiA4%sMLj6a&o=gAD-jiX* z(QkNx_XN>j9863_62>^y|C3nOu@U9xJ3pPX7qUx*#FmkU`Reu7Pxg9CjBe=!e~;Ea z!r%)&nI(xednHdrW<&fer@5Zsv4*4sva-}(H#N45s%i;}FCP#FLA9o6 z>JKNq&lmkR>^|zX+X#toOhl_D-HA|-oX($^^0Yx3n)a59{AgotzpIO3$r5jaZhT08 z7Q=ykc54Gg=0V}dN!Z_M&QyRrdr{B@9LWFU`7Q&2y3q8q}r2D=(d;Qlk~(WhAME zmvs3mim@K&#!)-Z{$jK*n6W5K z({o?J$Y11g_Wa0|>R%eg$#nAciTnI<@9@u`KfX6Em9s-c*G=q5m2x!ZsyJ`W=<@ew z1Gh6LeYJG+d!lI8NxDZ`0@{)gFB8_^JvEtF!9_n)Ru9^|s&1xqG`R%VjyL))8!YY1 zvb?sK_}`Mf&x}4g=wD%`7X)4a%BcK~I5+@#Q=LX-m%s0Jw(Y)gayzk>bC2k!dlcWu zdbF7Yf94ed^Vrk;()&zu5(^$DC>G;3!>?F_c&&UM&zXf^Tv}aJFc?xx*)Va#Ff6&S z^*=ZgmqC@Mlr+#V2Q8H7KftruGTb*V&2DmYah6Ma;M{HgppX^*T6XK#=l|f3u!z40 z22vaNZ~h@n=((&e71zUN%grWvhrpT#+kH39sT~(zU7Aaae)D;6-IV9oQ;v-Ps%<*3 z*hz6v2$JqvZ2WGlpF;2%!#Mc?>!{<3J7Ppruj%w11PUHz5R9s;SOG@*^N{7ZZ|zTc zJk_6|=%XlRW_`!BG?@{soM;r53)V8wr2b71AN0*cn(IjzWj4=akeMaL6JvYKU)32l zf*iPA35F0X9iD86%{sN>hh6-Zl>r~rLZrgRe33H&pB*EO{j02SHo2^MmkdED?Y{*P z&s2HNsPN)!)3S46Q%f7zk>nhHN3qZ-oP2#YY+=`m#rp+VR&U2^1U51lT7j z7#;sKDEy@8#38{IoN^+7UMMMzCPQ#~bZ=Pxw5?L{d$o=$F(HSy-Ex0nC_}J8Z)|cw z_&8uHtROu)9Ia{|NIbTgJX9%y+5oFWM?~=Zn<)=>-G}3BEjg~M={$;AH#J8&J%mBh z16Li_YvxFDUVgSb;taR;asvkMDdv6`+KB%A;ZvU+)*2z~5>R^>Aw=uW#(c%d3A+x02eS#4K z9mcF{Bd#*sIN}ljVyUKvJCUZ7vwo%V&#sM>#Y~;Q$k_>Ox5AYp>#RH6mqee11I5V>QPax z^upacyaw(MO?3@us}%kC-k~wfr`ui1JXSu6ux_d0N~1=|6>@rd`ewevUov3nsL0kA zazHMBjQEe=Rrfn_ox9QI)LcC7G=4u%sccEVyF(qho8CO32mAZqa)bHLt6r(t<;5JX5t7eN@_V)iMnKjm~Bdfe)-X1++ zn3xh@#!V*`aEw{DpRHIDssq2Hq2K}O+3wG*hu#Xg(k{+GveMPV1 zDTRDbv02=Dp|N~LnY(8~niVgNQB+NdaQx`>KM?W_u6a!C!!J8)2ai7vQg+af@UqRB zm0#0(m9x$Mwnx|KjzTAa#vQ@62xp0;k$9q&HWT%&(Z=pVgLls_7Hk)EI7xbv6Hm3d zZLE$vbL2r3^U1lUlx3CKYbI>C>m~L2`dN zj?n!4%Gkd0L#x#O+jCu?-O=~-%m2pvnpdxOy%_9Q{@GkSEHppdo_^f^T~zz0AAb4k z@bk{a%H3hZ{RsVsS`NjJr$GWW^2kqg6I9W(dH}&UK0=s&DTN7g#n5`fMOH9Q3QDjN zdSo*1qJ8Fzk>lV1B*hyzK(F~G;7${j4}BZtREu0-eL)yab7#~iNPRIpF1NV2Xs>N5 z&KJ5rfZQ5o{YYg0GOg6^fqLb7{$S_ym!1Wn z@uWyf0GnXV`f~KfBe%=Lf1^KfpF>f{C0%>mX%`2JH#slu+2m$Ky>)-Kq$GFc63!&* zML3Ai#T@Mqv#gv~$vl^TOJh-&m6Q)7p-X>zy5!sjROGA`9N6!UeYtqzC;?Crc=`fI z3`(ME25WZfN=tu;S*YzJXHw1CUwMy#5}r#22fEmFKO2dC>Ty?uKqwNlo~>^vfr1%G zNxCJ%q3Cs1K4#tL1!+yY?s@_p@b^g#)6HRRqQ(p|%h^w|;uJoQY%82G^28%{a49on z!75k~5o;1&EaLGzonmlZ6?nQ8Uq^JOkDo?g(anl(UxO~T16J8%&s&@z{WTjr?#30t zsdqU{NG|cjaYl;_CLC8aQA~wJuJxfpZ}KHMZ?5sXca%v}AvjQjZ6jJdZDxqaE6xbc za(eCf9O5TP)(hIILQe=kcR4mIT#C&c_SEGGiHlYoA&9u2DJSqE5UvBpnk!?e0L#dt zS?f-p_9J=t**{!KpXbVB@pBOWc;_bLox@mFHRZu`F0%QSIJOh?6LGE6kc= z99V`PhTNjqBj9rKWqX$4! z{5-!P1NX^zFA0cMM&aW36J@rVSVEOp^z*T;!(Vmt!Ac+97?}Hjn6%*mrgFUW+y=KJ zRvY=TK=(uF_jPar9w9A(f}}U3BlY3CLDyl+=k;4+vu(Sb7YxDh`m#%brox=r6=TK; zO;8JGRk5L#$MIWE2W1Z`ai@A>-Z8hn5Jr_$z^=RjpOKM=?tk7b^(?ENh;TMKlA?XK zpz8aMw!ErgWUI;wLXVa13_Multj+0mjwkM z1Fk4DA6)ki8djZJJ@P2I?s_M$uCG(nGd>?s8mC)2@!7U1Mp@VT#3yktXUKO20p`3k z&LrE76aPOE+A;O5o+oE$j`t7u57!UL^y1g^N%~H=9YnAqkL&I$P06O~uB`j2ahsOg zlWe(mV4oc+6P>N$+5mu_S8PHJ4k!%^ zc?Ms0?<-wTYB^g(DMftm@2xJY3m*@)=C>ADxv4o5QeIb>a1dg0WQh-}A3u2SCDnS~ zVaj9RS-AKCNEFTUk4fZ-jSN_t<12K((*~HOwzPzjMoTXO{&sB2*=+zTJ2CaOE+VNG zDS(X0@ZFGeth}@F604_4K(AgOS)enbk;~*Rnkz(k_U{Zn8Db}v(W-w&TpjlO6N3PS zdhaU;pE+?=K8S&g2$cU#sFUV)LD=Nxq?!rBSFTJTbh1K!ILA5kI?oi9(Px`lI5Xtq zivsAI=p>1;?a>h8i_7nf!H1_Oo%$woJ6yIXmQ!9(xu-rM@J2UQH#1=iI5otWOA-Nl z;&*zJ0q16gSc9Uw*14&lJbMCZ%5s4DzhHDoaYMw(OAl~x1c-gPA2PW`4f57Oa)XGj`}ZZGUPZZFxr&-lD6+vf_-5qV!vgpw4bzD&hrL#lRHueGQ4&i&tuZs%IK zob_ZKaTP2(2}@3XSqq`Os9c#d5MP|{E-j6$Rw-$hr|sh?r6!yVbJm3+>(d~z!}3}T zTrqtcIYP_m!yWf|J9O@c8-1zU!x7=imI)<7v=E(ychy7ejYu=yX!o*HgO*SQ<|;mC z_*CLyH|yZ1%SBZ0crNQ9A;VIpk>9cL7bAh4xFM-~;Q*Jr>vKw%g8mu-jdZt{xKCUg zaZGOZPs5gf`q>l1auc*mG;|i(gLp5p*^ULWqCM~8~qT0GcAeGb(!w@)JgH|_$v2`#G9e-!0m>4VF{F&dNbhRaS*TAEob3(2${ z$%|8tt{4P?et(9MzuNPS=Yj}4UGcMmkYIVj_fo0HcV64FSfj%F)eQ#+-E8Yv%O<8? z4;C|Uc0N4(91r2b`eW&!PTD8d`7VY9(&=$5=EfkO`H26$J?7Gw$`VWE@*QI?K3Ikp zw?Nn8anD`QE-`n0=q9D+vrsvNs_TM9L721_Cd49^WrXP|6E44aMS`}+a|$^S`Mcji zEX*W~x}bFlCWtQ{x;p+HcRkbjTHx14Nps4~$DLwz9i2*4f2hyI%9P1PKv<6hSri46 zeM7T>N*?6QLg?RJOqRWCL5c7)?W7Vgf{YyJH@w~qdZwKwXm?V=!~Yc|^9%nugh7s2 z213rjjLn^O&tc@=Bt~MU4*J&F(dbK!4VJ|=TQe??=R}7-1|_^EiJ@M5@{$1C-a#&1>Ze zW?l+VU^S;|@!J841nvB8zkr){7N5*X5};)cNJR|Q_)Yq}h=hLT8Rt>B}f3iQw)Ov3<&W3ia zRc|7>Kkuq~t@`Z&BtXiG9l#2yxzc*EJkZq_FYe{n=a=Sk^B!m^=pkCUiCX;^#ojA* zwbmeY-H}y^@@TtS47i+|CkwdP4*0S;ez$z@@9bO!+Xo!?3NgKVp@dvzPF+2w=NQFB zyqjh%a#$?+Ys-yB-N?=k7+!75vQzDv53eGl=~7k{0$9AfkbTF?9))!$kagxo=(N%# z`uQ)A)=D>$^1uCejMPbMO8LQ^pNu3A>j%~N-CSz8`@Sb?Shrm&qi#F9Y`Yi<_#2gV zyZats{5OPs?=oXkFG#plhQZ{8r5*ksxt?C2lOoQWl-7F1GF7ELZKM2iy~ROyYCw&#iqCiZ^^71OTuZwhcrlDgi>8#;XnDvlKFW!(f|(M3c6 zS66L;t-yDC^tn6S&XsY*-GWt{^>)^n^eq`#Xl#9#7d9#7;mOZlje}#?qe`dOjf*04Q^KjE-09scZZ)NRE&(clLK!qa zUDK%Rku_u1mX&n3O&fD=vln-!RKSY6P+6(=%wLE4$BOGBE35NXG>!T(g)Exi-%4eT zO_OGRIrwj2B+tt5yWRaKu?4*;@8b}(Mo`KIwR}PGvJ?GPOTH~UQTf9F4(M4y%cY?t z+mLo4z$h44G>l`;?kd{@@~*f0!TP?WqvNSmms9Jyiu=_(3!lkoQHZv>?`}gK@6KXhIxx_I@}->G`Xw1$%{YS6ya2JiK&Dy}$=*0enr* z?IvSm&ck>Q(Sdkg93=`yijqa0#!-EC{b`m-?9mUG8Q+P22tdC7l5UR^w5j41yR*lxCBMy~E)oV1O?!2uVX3it9)VHSICd^LcX^h=tt>j0* z44=AUXs7wCfBnKF20H@!gxK`MZZz|4z9XR$m<0A2#|{t-Z6d=G&R2A#k1)D)vHqqZ z2!*HoxYCa`vm?#uaimq3m*IFe9-ndXKs^y1s`g|&0;%fdFKAdnrub>wqP03I(@Y82 zBhk;hV|@M` zlDU%}Y3|z27>wbG%%du@YFoW*xFI@P-uu#uuAUix?TDr^#2OVX_|MNd!Mr_?Iq-3c z2o8Y?6G8!cyz&E|0U|4l)~BQbD@z-lv%v^r$CK!yNI*DxD!3+3&A+AyGT@Z(i&yY8wY7atu5>HI0;EUegX6LJJd8ce^>)KR=&Sdq4bLle2DZ{{ODCt;Q9=j()h_iC+#C54uy9bW^VJI{Vv*F76wqxD8UB zeIKgn1RS{Qe5nF%Y2`@UYQ#JutSiGC!Y!GfyXx>hLy^Q&pz*-ye>f9?#5e z%gM#dN)5T^1&;U=j&;$VL>)^yq<{y0O2H?S$XW;GAu&UCotwesO3{>I%UGLEI+P9*|bZirRhtj}!4Qu>_zIyJeS z*bcZGB38`WU#3bHP0J6q)y4iXS0~HnGTQCxjH>yvz~Lz@EG$*KpQhfx|KH^CFGhva z(xpn-FqtHkCke0l+0B%PCa|y|1;uh;Mxb-~zZ@CI;?h#{(Vq0R@u;WIQC1v59oacP zeDmBTYfH<8CquB!w|y=WttS<(Z^E?cWnn|*rc5LUPYV(nWf_B8wSn>#GwTWoxc$I$ z7h3&-1S>;BdU+Dx|9f-zdZC4+v5Ag=U5i)zoeJ9qCQEDv-UobJ>2Jk=i_s+{G#^2t zm0nRA9tZpmx+yiB5lXA~Yh36!j)Jo(v2E87xV=fZcIh{NE31M*V0Y2k%NVRt#G9f9 z=OrBv@>$t+p8)^iv^Gqf$w%pxv5JO6ggHpYsWvf36&^!1-O_OTuk67*^O8!FQ`~~U znWEmz0`nncz`q3k;qdL3TZ8&#cQKTM3^b+7sli^%+QVZy{UAEBx>PhnEaUY+ zq|n4CgUWIv8_&cbZCk)zmKJ3RxclnH=VUWO-NQu|Hq(QxXL`*Q4+@JEgqq! zi)2s8iCm9+`<;PNT5E@uTJvV*rb$uX{}D8uD@xBLPGtL!N2Tw#WzPb|L?EHd2a=vA z1UiENRYyRb=F)+`*i5?=gcV#}Z#gpp`ztKGc^>naT-(MVgqWgNivV4z!v<3{RQntR zm0|vCZ2K%<7R(HSux9@taLEXD5T?NeagfMkF%siozf@FVix?gT!*}KNaoe#ebI6+( zEMx>HzuD0hAVka{_-{;6;3rcdCmz$^wHTOX?8-t}pzpqcrAzTMl_U!gGc0H34_MeB z6{cDyT{wre`4%;A1wxu4<_R&mSq7v+b&ax6w*Ye){vxZ}4EpG4S|I2b*4~5=3Dy&Y zE|D-Tn-zfpA`VBDvS&e$eJn_6A0w>rD~uA0X#*p?E|9B(KsMgn`_~&q^WG4Mch_>aBr+>o7)SPP+-$turX5zb z=Q?jd|Gv9&Umxtk1S6)ObKEreMYc-^IUtINru%y%X>p3>H%4Sv^Uwtso?aYo z&fI5VRWPw&1{GTr#>5@`NSbRNK`C!H?zJv&KN23MG^M@j-OQO|}>Wf21BUSf{g z)>gtRVCgv$zUCmLqX7(cmMv+_1NWwtoT^&BR#v#D4#0J!zrEsf+P7gow|D+8{$L)! zlyw0bbk&tK+cepocRKD!p{>4{>aqgIhZl80n`XDerAf-sx9FSuw^37BhpRr`@7W3xCbXvh zc$RHi^0mXt*1O>a8*|q#rX2K5sIm3k_?#bOhya8^HkCk~v6Sy`pt8ro zORubS5HL_~m>F7jgvNc};QRnjyz)QTvCA88FPJ{J=-bL0=sODV-ka&O65UUIEab@h zs(}18G@JCDLGZwGA5e(NO|MQwui>Dr9v(#GZ(*-DjIWcJe~OPw8wjq7Fzh2#0piZ@ z;2*zAWAQ!$#o>WErugL`Q|&~Br`)#_lxlkJ;hi{bho7;YKq}DYp#P!=YnEO{$$|-9 zL7KE=pbU0GY4RgDBp!7(f2(I+OL&ahrrDvS?stM_jr~yH*)XRac-)w?IISO6hQ`GF z4U69pdhUjaJ>B{+m=;6scnc|sY6n$NX1ZH3`?gUqiZy($nRAGN5M=6ai9&Ujm$G{@ zKt`BGIEQKs3lat+RP0d<&H~hO<0IC!$>wdVChVsL1!yf8@ZkmM?=kb3szv0Fx($&^ zy2mIcrR4b=BesI|?&tC$)n0E{LlFjgF0{f%rdy(JIa3nf9JlMDGu(u8MaJC7>JU7B z76etRSO`afsq2DSliQuVn3l@6k=!ePx&-sBpCnk)pB{s(dF+MKdN_GbjoSUV3mt^g zl!4n>ZoPcB!h)4`*e$TdulXw1ef$Y&?%w|)Q;r)jZGYi5n&GE$lp zs6LPwTh!I4u?0?CsWC^ZehJJ2TTe0yZ724y12g3^w`*pSy>!;y#~M#OB{NC~EPtTR z%kw10ZW6eK#dn(Bpf-PYx_bwbW-*tq>f@Nrl7*PXJMZ!1*C8GJ{Pt7B z=I9U>1yyci8ZO9+pKj@yG1E8Gk?e9Ds$he#NBW8Fv>?8uBmGk+JN%9I>uY|Q*85!H z;dC5wl7V?Y6C~rxi1>gtRlCn;EX=$0B;dKD~pp3HH9wm!NVU+dhTa=)*kfy>fsAHJw`AIwXYAg%)o+ z1^rLSKI5j7DsLdcYJkM=NPkvus0N2DOi3s$LJ14PAly(9N~{F<`#paX0X*k?FE|q~ zr|$~ZcwsCe8J*}Tuw=T9!8`CvJJ>ZgRv)s?+fCbnaXrb(XK$_#NFlMrpzdz%#}3sMOH(A0=o(!% z75H-LTQ2*g7H)n6+@q|c((wlFyDCci4_24FzMHcFnArd3Vi6mE`P$T4dVbZjrt`gLaQoeJ9hX6n;$X1WhC#$^X9ZO;VGZ7 z99BF21c6FhZj|kUv`UGCkRuv?+75{m(#_mf4{Ny;4e@M#Sr9Oz`B$L-oD^mjz;k|M z&xy4*`3k0{qB6f+wGj6D(c7~QO#t6I^->rX$T`$qwBl#1XrU&00@b|=Lko$#r5>>v zh6Z1gfWVXYn_tNFS8ia-#3V;U-)CSy9Z@|#IUC+v`K9ayJheqw>o23h;W~?~4+AyqO*}H}(Cjw6tGH7EH|#ty}suD#Iy$1H*-p%Nq+O zXgB-uLm*{gHmUC|)L_xjd^5;)I-Yd5LQFZDH#u5;%WZ5w4-v&%HGTjbeB71`l@k)4 zb*U10TCW`ok-YeCR5a}?@9U4dp3TY~@>&irX{jvu(7WkOFcw8ZsAV+Sv-H$y=@sn0 zRImq9y~hT49>|%~+$lH*$B06$McSxn{*>r;+0rmW+Nel0i!2$N16wyJ*kfP|yM_^4 z`;h>NX$Q@8FRMXNcYwl655T>^_LX6mv45Yunxr9}Z8iZzGJ$R3FLcUQLl9m+V>jEW~`1aCdc;y^3 zl*f-up^W9{ex%ZmWq5k0zG`kl(*w*q|3aUR6;Hel25H81Ky}IZ>|&Ler%1z2HS8y# zbnl3JaOF~wJhfKxf!LWTs3?0G1!yuTj5$~nPY?@8d$w|bMsI4UO`6&{R^yPr@QV=b#*wi7c1AtG4o9adK|PGei-9xkRuaZ zszte1Jm<2toZV5$@Fja8`CtpD^`v!e3UjPmbjB%hf}nh+{=AX> z@(*h}6a$%7IECr>#nuPZpvYExk10GcIUkX%-TkC* zj5ZEFA)`<;>@iE#_sJx%te2v1U{L!E;Qz>q4p#&I?ToKfk93JIHrv2PdjLBgGV8%k zYBc~byxSE`D#z8UUnopg>spjJ9cscu)avOeW+d@Tn`;CbJp;l;WCD&?M)c!Gxj$c_ zXD;&Y?mjOpEv>Ew{I$6p?^3MxQg@W1bgW25YN_d07!Tt60FsValC#vT|Gj?Ozp{sG zlZQPG#94R1(fd`*?Rdd#>+MyjpSaO}>&1dS3ccxfI(|ReDw&U(9C}?HCD{KXf>ZlI z?xP<{&%Qo zU8ktNU<(?KmVG{@C)IXtb91pgL`3iCxHUasd~}xLR-~EbbVkT8-OrN!+mr`@UGQG6 z2+!(a1ecZcYeoC4d!6pR`Dh6%r50*t=(fI(ZV6ACRZb7-4}a=My`ahK26*??yy@SG z4!|U#&4^_8rNu z_APA`&uApCBmGNzl$SNv(Vaouy?ayA0Yd3nw0*&A^~tZ}5J@ayWSE@$XhG91M%enK zg#N0Yo22>&7euVZ_g{R*V&-S?T6YS+*HNqU4_Pq$>m}U_^h~ya{Rb?vSPDYM-@Odx zaZgFE60ksTeUB`{NycTgLYD_?)FOfFsWiFqkWPn`zc$Deg5#78%)A7&7Xc6501Z@Q@ z{;@u*vT4$U#x;R)_}&j7+7IO=$zib|Ag4hX1e@iJgUb`NU1*V3^Y%Reod)28XWgxT zL=%Z*P$PvRV@p-y^3xA#EF%8^9T)5zpCo4~P9%i0QRrz-9hH=NVA6ee1xP&XpGJF1 z$mPu#OWo|W$bB+Jwhxzl{qKJdD8Z}@A1jCL9A*pwL%`Qgq?OOZy>Z^S$P0Qsm3bfs zZ#5~?u&5qq!$b9(-vSi{(No#Ha{JM=^X_R*dTLa8FTK+4U6E zWBaoU$t+7Uk=F5Ms})-`!KZ(`fd;WMY%D67qOQWoDt0G?+@x>%EG(7&!TL~(d3WtSqmgIzueYl z8DlOg+}<>1Vg9X0)W48w@j4nD*&A&f{WIY&Id&Ne7^E0Qpbq(Ox6`vm0043U`QJB@ z-ao}VZ$8@+UQqi3_ec?2u@6G7*x5k!LT+&$YQ{I)ALN}r5Ohylk2kY$aNBWNA@wUC zClI;iO50Z!s^7GH*tqEi91!LYyV8W6dk+8_d7WCi)r%)R_Vz?_-oTIC)es)4>5HMS zsHk{zo;upNw(@g!ZypS27~%7C@I|qU^{uVKaj|fB@*+TeO&8Y}oJ_nl9=uvvF%EcW zx|GxjLHxY;7D{^=>AG!{(7SST>@5q;D421VtQVUQv)(Kl^x@p)t*5oN#``o*ba3Mji?mS+rETEM;aYB^1EWpy)qd|#Nkb1-ZwbS7x$mG zJi$>B)LyoOFT`H);2osqmz00=%0?kcy82rIK$M6gefj! z;XmxGqU0YkDXjFr%LVug!jYX24mH~C);Eh3sJvk%LsvVMBTSqw8)-kH-wYc3`>pLA z4*{rMJpIKNo7d=Z$zWnr1OkT;WL`g9{)|V8^HH~pgKK31ySo+)sZL7L0smR)Puu6F z2+DS39g54d1~U>`k;_{k13sNgF&J2DM4DMgNke4|(Dk2!5lynzp1(s7$3%Jo@-XIg0fqRf?$S&3+H7HlPR1?1$(@ z;D;h65FiFIFNbI<%63(nVw*qd`BB8O(=_s00`ABexCf*l;e|J(lRch<{aLk#L6 z9Elz|{D1LI{EDv2_UDU%?V+5uLu;?vG)q+`Uhkb8>u%otuQ>BRp51iv5OmEx_TLm= z1^=C^>J4=C<{TqtiV;}<#9TcfG>_6SDdH}Q9|Q7xx>xF&My+$|%7h;0E9Cf^u$&c` z{oU-*_J6UTG)#_aIVb-|(Rqin`So!)R0;JXMp3l(2x{*wjTKa-cCFfKwf5d>@7Q~U zqNqJXRjI8gYVTEhZ{Fw4-?>~al5?K(J)h5gyM%{Z2u1T1;4p*`#-<0G!D;gzgF+uO zx<#Gu54b=3k-Fe1Fh1{p*5 zA~Bw8Kl}f8e&GK4Kfy1F3O|`6m^kw*Wy@gT>*RrGB!VPR%tjfCKGcz-vQ)U%a@_c5 zf+#I7{J=sodJ_%jaRg>;96bSric@5@#(Dxme$tt#|c8fC$nqGj*#ZGAF=EZj@uhZ$( zT4(6`_b={?zNr*`-rf=vYgsgOcARMG%T1Yfzq{+L!x&~ z-RQu@KGzM}i!=w2Bue-$xX#zuUzoF75Q%Y3QiAC5^rl?N;0!olZ~sM=%wNRTa!K6& zQ@L31!Mq08s8H#u)`>_So}2b69;tWUXAYlzlC7R?cgm*xG@?Z3uy1BzRn(uGlv9}g z)7*KocI06f6Q9^pdb&_D?&9Y5naHG8Svaz$ZB*-6kUM2lUPJTM=|a@ziK*kk?FMZo zW+iL+NFY5aM^*S4c0iP5706#YIi(fuAQ#LcOnsD=1F}&b;Pb z&vuVpc&Up~b((xVBC$pQ5pfR`3h_=Fjgb&(zW{>ssRND=8^)Y9$DM2!idVnEeTa_p z$yq>IrnS2uDz019wQhQHd1<4iHRZUMYxsgwXI-ApgBv%S%Gd0M_|~eXins3wPa?%x$Koy&TGs@BEidE znWp9p=}L;JG}*tISj#`#&$dKoJNL6e?e0PFq-*Ta}<*P zXBG{!g8zhse^Cjle*9m9)) z`ZXyE+=smld22!48m@Cprs?r-B7gRI7Pr}xd5j@TwwUk#@mP>5Akx3HB(xQx3i_cT zCIGY6_DfYzE|jG;8rh%m0R)K%Z=ou@kRDO7RcEM1jXmK+!_}c^RWvG*GPGWpQVFj7 z7d5tD?~QQfdEplrZWo?z+;Y=TY3#$0j28gDnz$ne)wuu*Tv&M1$=H4rB~P!4WeTdH zv5{wk*ese90Sb?-pOimEKs6@yZKE}yPwkwB*Sk1IoK-EN`KpRgHOi&zQE|qS(vp(! zK?MmWJb_`&(EPRMQp1xw^_D|}7Kg}wu19E1k>@0_ymgF^-~Q~>4R|iT*%YnU!1Bw> z-@l_M5S56AAjND2m_oQy2GDz6v3>B&42S8{9xBc(z6~!byv0)imnbzR*nsW7mwECK zaEMMB5$e)$;+f9ge{huRseYc?yeu)HI7o>- z3(CP9N%0Hb=yk1UpOO_UELkj~M+NYq&;OQ{g_t}lOGMYFNP!s=PZ&bBvd$LdH?yK( zO3-;->PNDeDd=Mgn9unDo&V{m;#Aa;o$x@Z_3gi_J}Bel&cM2pXL}BlX{VQP=6j{J z0`5Q4(|RXYQQ6tq$@VsGeqNr(D=Sx4x&B`T*Ll9#WYn&lTsYb1`d$MVYtM7vB!a4I=4MOjNciuAsF_Q4@a?K^C+T9$lXLO8i&WGf*$sbO~>FgIVtz;($_ebvBp3a`0R|~tr03~Jd;opIu z)zR}LwE2T^vmsy4TsA(2JqQgOokS$1?vlvlMbGo5L)yhECv zjhX7?pyls|k-6z$JmTx9B>s@u?#iqZwz184QDs_t7`b)9`S*itTSu= znb~=cNYlQ#9=XP$2y!uK845#bihw+ zdFUuwSB3cp12~);oQ-Uee8*SYq}nG|rith~<7a^sQiwg~h05n?+rGqsB;?VfPZ=FC zJj`GQG6S3cAZbb-4By`c7BQ3YD;$G9TG+nfl1oe$4*_xEC-^!>r%Mn8zF&ob$HGyG zubzMs=u^$VK7v1fOj6z95gWe?|QvnIAEE}ho)TMh;SQVn|6kNnMl6g8pDPf1B8 zeA&>NX?%Pfl%nf=)X+x>vZu<7j7&X0U$9s|yJ6S;n8TsAFnxr+KFL+=)BRI8sy?+o;f?^UNV8s zoxSvMbL#|B{-+kt3ga|h7%b{lO|2JL&$^x8x(HG~HyaxK1Wnf~bE}<>W_y<-8A#uB zT(@kjMt=X_<<36_v-Byx>%R&#YStut%4QYG$U6Jhr-oWu)IG?4su<_}e|=c#>1cY( z{q*o&vAFP5y$7ch((iVj(*>>GJJe4f^_KT#d-DNpkHpi@N?`mN{%OEeDdlr_mgH+w zAyi2c`i5iTEMxF57FQ_Cu{bUxe>~@__;n}?F}HvMbRDzV&c>(^8a^xx3%4eK6aJ<* zVR1#2?u!kOfBY>5{hBr%QXa$^#n&)KHjqC#IH;inrJ3bKvmZ_TA_mp|Eg9sD+}zx3 z4Ys$h1MaS#a#{vvw#2DrcWV+<(*j9-mfWe-vpZt{Wego z)BJiyc~U!m^^N`Zy{@5=B}vu6f&Y3v4Wcyc+#_h(u70_t-GkS-YKoM8fIr)jE**B; zbB4iSXcVVRryTHIZ6&2TfP6&W5Ssh^Gw?e#*|${MLh7}Xi`uup2Gnq|6ia{3a|ZR= z$xsoNMHZp5fy*$j>Z21if}-R^Cb8+~^B${4vklK8shFgsM`{R(?}mqCGbl!Kq$IlB zm^JlX2iCpXMAD*JX+{+oAkfmFpuy<(B-QqfCxH-HnQn?jYlf@glp2aQ%_}9Rk+KqC z`SA^(03&Y$KQFJN-EP6P)iP4j_WKQ$n?}X1<@g$#*#_A5#$4PVQD}-U|JWrUv@KlC zr%_*7KFVU=#( z!fkV5?|j_MELuo*b?$1&D%W?CE7P`h>N<@(rF_2c^y9;llbGX=>#2^j9ROY++#jy*8-GYjwu$C6551ch_3%a- z9pBnG-`-w2J}fLsceqMR1MVNc&5p*Zl9H%#01JVVlk~Y2`&V#JPrl2ZEpyxaQydt@&c&u7y& zMJQ$t0jlunX4Dvie2**_NU-{eHa?U@KZNQ>o}iIzRdYt{al8Al+FMOTC0`r`xj35K z0v*Hm=5zSF#1mEZ{%sFQ2b9soP~Udpz<|ocBY79DbWcHQ%ffss&JZg4?uCTkIJAsp zk<8+MtnFcN!*Hs?B^a2*i|4WSUozzP=y1{!wefh10}KjA|LJ-BGjuBH4F$t{BYXxN zn4?laZ#X!hZ${H#4F%kr!!06sbaJmF>M$=x?Xc8O}&scU|7Nx`xb=t%?Ai}_U zTc$x$G`T5H1ac7wGMQT@)YiY=c+B`={1>F~4^r8Uqep4KfZxn8B|HraP(CPF;*_F3 z_(W#mPUIbUhX#Zbr85VK1iAL+s~DZqqnWKuqou*53I_j02R((kB*-DRMMHFnYT6H& zkkO;Yg0S#_*l_k_h9;C;9{CA+QXf|y{!E-mS|AvO#EpSq8!TfM!^>qReiHT)qy<)t zGG5b%GB@hKJ85=Tq}rFxOG`q%r^1=j;Xdgv{uxvon!a%9V2cmnIYeg0j5Q zSNRVB5OC5n%O-3idyEjBS$wtpp5RUrnhFMMz`IXbNc0`b2fDM9c# zPle@cR~*BBPtQzL8={?Rk6;&%#5nNA8+MRPBAG)`1I>^h+p9oUU(HgxeBvxqv1{xL zs{X&KO%JFWMDK0D(PvCwL<_oNNvh-4 z>7A=bPHxkk=q_r3qw@kF3-oze-m_Yra|407of0rgV{T6YLc~;!oR$_|;XJS^#;XIN zz`-i^-(Vs7fSq(b^@b40lhy#RC!fO4s3mK(wKnH^vsM+25 zcQ~9aF;)DiT3|tD?y09A<1OIQJl(zAy;*$lJLwvUD)8V78MP)C{^ZR&|1t~~KI)>I zvi=^J$CYPlO}YO2*rdz#h5*W9sgFq4&)sa+P6+>l1mbLV!gMb)E81FoZ`>T$hKFCr zb?S4nrdjA^FOA-Q`qAXPQ%~c6_q;;LR}v1d)S(@(WK}LI;~F`cpT`$)ukqL$5_tu+ z+HsX2_*ffs^~0@FuO5nCW>@L#)-xWV)QB3jUef|M?iJsLXP@UrU zE*}A77en`&Z8XaJ9olwYya+e|Rm@3?ippC5Sl_+06UrUITa_F+8Tz%UHqNdd+(|#X zsPW%^iBF5O?P<>fP}Elp{s54U&93Fgai(5ousp7xsT)X+gHEBH29t-8amHVBXUs0I z99?++GrHz)`}Evo1q5m*0f{g2WCHul{R!_)=g5Kcg?_#W(Kz?Wf%%{cEK3>c?+@$V# zVK#!7&E0QyfO^69vNzjxWxLMI^%m0qfX)|SyS(5bT~+4?oP>;s9>p}R(6<0-|) z^NWdH6R(pcp8Em1821Y!ug!^z1#g}qVUfkT`>o5+M|^*w54(VNO`YzxhOHr2`ld6? z-_5UjsU73%aY=U{_F%NU0-SpU$~|vpm>2CcimhfF_MRLT3kvcg5rONsH67o$T`?{Q z+vmK0hl;tG8ILW|7)qhytKuVVsD_;7!JVD-r3Vq<>P+FV+xe4@-qq+8*F$#9nt z@o{p-UU~Yr&%_So#|W1F6o5|7OKadK{N;zBA69-XzsQE})v^7gQ5Yon$rH7ZKL&#hgrVz&>0c*e&2BCf;< z$38b@efLNA4(00J^DL#p`nolY`e%}A(#Z4}Y=)mXgrLK7p9jm#hw1>z%J6NMl-D{| z7g)m6hm+ge+i%wS`|G!^tt}AzET0l`9u7}(vvYh8JMmgGfZI*v65#u}nElXfkn?am z-QA~bYhr7=v%3Hw?H;+;z15lsw#qdyGK$p#ULUTvUCKVAQO7I0MLR9a!y)M+fIS8v zNY_tQ&9`};EZ-2P>uHVYmlNRvzD(h6s>aIa0G+XAbsU)MHzF;?v#$YX62IsDnjFF= z4*>+Y0O>q&`_1afiSx@fKxQ=Bb++2#@NIT+v9W58XmM@^(`xm-E&y}~GG};ubN}h; zhvn4&8`HAJT3Rxe6=!FC6eQ^tjA;1IBvqF)8Z+|bvG~VC&m6}|H7B`9c6nwdkcp$@ zO8YF$e+LWgp5F}To1Lxf2`t;wM!0!+lHGtr=B8nLao(uVt;_u%vBv#ZDHi1oY)PAM zh5F#OO)=H(d;>K*KxZABmd?L=+U}ngX4;N>M0---IPA>prJ-PtOJ||Njz_g!w8cGdqJ4x?2uiR%ay{mQ*2|}ppAGgVu-Mj zL=p+s51);CKV_=j0a$q`kHmjhH8N6C5*=Sij_j29dhA{STl4k;X%=QTqrsl?*8r87 zwhhD2N&c8)A3<&)3a`hewTF@3YT5T_)HOeBG9t!LAyx-cOk{ofnJlYWOvJG8P<{k7 zy!yaf^lkaCxG3YStpEJ3UJ0-jTtW!?K0e(?RB*6G{+;P2xj z?7Kbb=zCR88J8pJal3S-mnu@ z+8>rna_`r|9xi=s%69(@;XTQ@HodzZUP&%Rh0enMdVRVW&er%;2mI4cfa$(w4}kI} z5bRl-`Ff)*WL{B~RKwmij00#6owx8n%V>H0`NUuRf-$k7#uO~->2F-EviTk$OoQAOw%8^MQ8LG{n zFY~{wk<9F&O=}c%;j5Ib=hhBNM=t*`=bxrtYRi9!p2}H|>A$mm3&VQ(=v41%n?OV;{qHg7-9ktU zs!!!*g(9vp5{+s)#YTP05fjaHOkHdSKXlBrKYc9qr0L+@d&y z0P}Gp;`!O&@B;FnsED+{5THoG{5Xh3_H+)9NSv27u2Tg{69#7%826kqi_CD2k4RdT zg(In;MbUEUUzrpM%*hR>uC`30p;!qa?L5g&KW^4sssF~%5i|1&l*aQCC?Fz%zJwA( zlQbC3U+NQ`@bxKBsz=0{gGUQcpe4pWuB7%UWD7YYA#@GLte8BAiOg*7>!^M~r8-02 zd!;r93<~*<%$rEQg1*QktvOi($}-JVB1{Uxp${y8gQc-JaKis`ph2q9qmM`;*hg8| zGG)&;#=HkVE=hotxOvP+9fYbdZa0g~N9*I`;|-qi$o>oYDGQE|;PAW) zR(5y44p4@Lg-Lkp(Ov)EfGX31ISAzQxoYg<@uGFG`7V=9*UARJ`x|=RmUUI@(au^1 zo~X?T?!94RB*IAdH`XpM#6h;&fMtfNR9fXuf->pazF5^-)eN7s^>;g)ebm*|5_*?yY+yicjvFk8 z_@>Jxd~&uDb!FFVvSF_ytY%hg9j)wZTk(9JR#Pj~%GA@Z{rtUFifN-{?5C49w_&|) zVj`j*38o6yR+|WD<*i>mC!tmi*Lt|FpU1uI{R&S<#}yUmTU@wk=n-%oG-^CK{f+d| zv$y;O$lyT7=VVfGPq1KL?|h_7)MS9N<6^zN#78rlH$m=1O7PEHq$)wbFW{O#>tvqO z?W;95G@KXDx*K`fXubdr8D!#~Gb_~+4m%5^IudW=el3lIus|SXxSZzTSXWXt#wx>Ctd1ra;W(uJx-2-`)KR6ukYr_hcV%tG?x2EavGQk5&B9H1N=t#muK-S5!(-Hz6tP1D`a_@7?g9LV?|E-hYNI|5;- z#sz>i;J+!ccr|iA`cUX8J)-S#wEU?-h-w?L`K>XxbLRF)=KAeB`w)o3E4Ap^t|2eX zFt%$x5d%B=!-m#)dgZn#xz=PW1q_l!FUC}Am1_D0&p<0VS42R>U9Gpq&R}J}uTwsxwpy*8LiU9J zUc*TYpjRIBImQII&axL&Lq#3K$5KWn7oKgRkQ%Iv3%N$XJ@bX<(eAB}u#92V6vHV) z$Y3GLNs=EJ@};Gt#-7Um$&cXs_fw%iO_F7ils=G{mWkMj47L?a#6T{dukbV?ysez2 zKZglfkQViY)}5VK-k4mTrb0!UZ7*va7DeAiB;1Up2?OK9WfQ#8qrh{s3P};%le9Hl zN!jCYdJ7oNW)aE=3u2{w;md=g!jitKC6$DX^jgXu==3DHVTOS-qRCtSdjZyTNi1T^ zrCfK(>ZZ~vCizrb(2hrW%~9?`BrSxC2l~wxhy8Z^nHdaZsU6vN!A?eRs!mY1$0YEN z>!9N02_u@kA}M52kqI6tMPd;GGKlYuv4KZI9t$u|8vd=rgcBwxLk}>MjGwp?BoqJR z8!tZCDJu)dG806a1hHc?AaeN`Xt&!Bo{`A0hK8pZR75ZnfEw7-niIAAmALRhsze}j z4RB}KI#s#omf`NZCAb9m3!CidelN#R2sdtEkoYgY2H$F%EG*7Y=6KN3t+;ZVr>)aR-!D!A)6*?SGd}L_ zsjg1LF=uY$f>vxLpl`I?=Z`p+P}HmY)`MY#aT2wI9{zdnoqTK}Ow0`o}jR!4T^ zp^E5NM|=GK^vTApfy-;?vg(B3(3Mj)%9LNSvh2goOajnhy4swNxZYxdOD2q0?r_hn zf2hwW60o&DB_R<>)6zN{yRVqK@Dus|m%r*EQ{8GI_hAXRO}{&zIT7#gFM%K{Ejr7# z$>y51Y3Hae_1Y^Ukz2s^F8Z5ZiWbkF=w@YD1+Y)rj{aTVk~0Wa4OVlgn3u}fH(Xq) zcbs)2>*|e$K_PG6B&2Itrh5re<4UyllUGNWO2*6$G<42HWJ4W>WN|rrk$I`>zOy4%6yu%eA zbv1PTCY0@9(OYf%^Stuqwf@kcbekdOlfBhJU9)|K??=xz-J!_}Tj4$SXAPxB!cRJI zfXsnhGQ`v|>6pU3!^eB4N5f@%AZoh;T|z=aG=B}S%cd6 zrsp)>({SB21XL)8;apdTabsiY%iO|nDOSmuM;#~HZk9||u1w))*Bw6^9t}^==^wRL z655aytCDL_M*R^}!9JF}Ai-SJu39W!xGg|Nhuw_G<%NgK%Vu;{gvXop>p3mImgTQX zhkT5GN?l)S-6D}NR)jK$k%E&`tU9 z4{)GQ)OWQXT%@et$lPqvZR~WL9^D(a8&A2iiCZoVHH-`opF7g6@5MOUJzsk_4ZJ5z zM!dt{yZzb0^BlkPJ~qI;sQ&rvVL!9#_PH-5s16U*bRMJgV0`efJGv=zcY88&H1%_| zg61S#@Im+IF!AQ{(7ooJ_G;Nw&Z$b1hBG^wV zd_jKGIw<-DVS)<-|J%>P5r4usEBW|7CbB{TA1hf`@4q*LKZBVdq&;8rcW)()D^!H0 zW}qn(HS@>=ee(*~Wj{}fVZRj07N)V?lMP47f>og#`kn`mcG5zWBEa4|$^C8L=y`jn z%uxEFkb385->K&YCqQp;n74v=l3}2ac9Mmo)xDZDb}1Iz{ZwXCUt20)1wxsznq^5U zm6UXLN)th(RHN?MYpRIR8)_0LABhF2nJN{7PWltW=ugj%QDfv!!ha*_o{>LBlY39! ze2GufQ!=Xn{6~KaWLe(I24ah0pNL}@QV=}9L8gQ}D@aB%u7mOt-zxVQO!&qvJZ-M7 z#0`kAL5)pJ=2Peg_2+5mf1-ucd3S}glTn!!&_h|iQNm1qEu7;r!lM*ELwH$Xgn455 zC7dKxoGl`ZlYJ_Qte;K;;qSW~t0JhN|u?u4tMT1g-~wnbN@YRXg7H5!JW3)UGH)4ZGj{+burk50Ao z94&H<3F*V##34;c)J>T-F@Pi#x9zm#yafHMFj7ZA%L~}`#~lwG^c>Qff0}qN;)rBc z@z{Y&!6#yj{h~HEU%(M$1#SNvi;PRVAJj}y>FdWwwSO+&Sb}z$ZhE5zJlu{|r&2BS z%I-$o+}%qwfCu0e`QDmcmraK$*SO&f;LzE*ep0!kTMs&PI@RBqI2ba1I5*}&p8GIWBH)=V<tD; z_)L`GEGgmPq^R%QSKL?5Q!niQdqtG$Hr?50!s}sS)#U13Q{S8;OylfdE}5H(3ts_YqMMDk)lG6X!dB|?Iul4E{mS)cHO}g&Yt+$tDjzrdt}r!ZQeQm` z^byNXklI25U5prwf~}}&mEH$vbRy+;S0L0Q&`ohNqCU_*GXeHki3z3ppo)G;^)ik>iAOq}5 z2#Rk_Lz)uuDO8X%x=Fvrop|?p&TlmvZ+;_5U0TmzR^eyq_OWEuUSxV|)P!mK?t-Jt z|4k|0S7`SIPer=@mz$LSS5d}(_oD)T0b=D@QdF^x-wEDSD5Sd^rpr0Qiau?=DYBnc zcj)li>X_|}uLOs3e>jT(FH+{8MBQ9pdtg{!D9bSOv3*8yx zN~&hXgr1b>S6dE>YbL|%me-I3EHm>ZK+mKulJw6Qg0ZG;OrKb;7&Y@H?U^PB>rW4C z4XZG!eJu1sI%3rFQPg|m6c(nigt)WkR@jpQ8;6zrsb(r*?Wt-7vp6D1raC738Lzg7B717^-6>?+)DX^Dw|VJ=w||FC~)57|$` zE+155#Pg4vWSwgAUDwN@@B}?suo037L>~nCA#BKC5jp8?Os=(f!TN~t*mv+`?kYB2 zi=XNhi&)Pe6LZVQpVNivBb#(+MT^KG)zR6IHkYxpBLY2T@E|QAlj*;tNqrA>xN=xg zFrk)eJd_xOua%0Nd?g>6Y8!|(4kCf-!QqLpyo8)|xpih1i36W1U)=ASYb3uxuMX`Q z8OU{m^4|GF>jm?ITs8E~q7O;2(13ARnq0}(w6H#9N%Rv-Lx9{8HWvP8&}eHiDGa}} z?-fb`+VkS6ii|208WL}6XZBhAEs+?@_N-+^7I|hNc)@`? zJ6r?*)>%hR_K;D`cEt8OYbNA+E+xQl;XC4ytQ;ocPip{dEf@w(5}%h4aip$rg?`t! zW#S*7tmEzZj?}ZYwFa!|@e19LqW)~r!>8UmRG|Y9*0^zE{b*1(x|BCFNmFefgo-zk@K50&hA||Sqi5hAUqVDgrw+-{GDC#0gdW~ zMUzMzGCZAqbl|(C{9zoIE?_|N6`ZD!xEjCV%V$}x`fwcfqCpKCDf=%6H-BM^7f@wO zqDK@gTA=vE@*Ao`Dt6dRS10%ne@R6}gti`74)^1*MxeGE z<#L+8kYYr6Xp#KO0m8$;t*x71_X7RFTMs{3~|6Js}cy*8+ zT|*cO(I4nDA5;90v7zU@{6$^fA7F1*IUeBH#^vTT281eiYkxZYkb`s*&Ngr`l9a#$ zReCc<`WdBI2udYYJ!@@Y!UK93q2{w>PaaMMorCE{I4 zXH!d46V-o_qj{I3GB>9e?WcQyQ~Hc+0Jn;6-v55?ZtL_^UHV?88 z&4EuU2B*x7)+fTUw9B=4BtU6F^sFgZ^z=!jb+4ISNOtwX)eXJ+KZAmLGHw1sU6HZs z`VZz;cJ+D|u7Xf>NO}Spi+h72w=lZj07zN`O)GNe{=qByJTo1E3cQQ61 ziK!w#3G)sl!O+Cg3Zz(919Bl~IpH>HHp>B*Vb%mFy@>n2WPp3w>H*1?Q8@Kg3j0e=9A zHc+3_+*ChxP9b_8aOgOl!fuF9tiM8C{GdB&y=o%XNJ~5}j zdaNVD?HCJC@ESg)5)`$Wax6%^q8?8TPA(B>E%qr|K`Y^zrm0But0J|?6JbfPD=TFsD_h<1w#n- za}u$t_ns#jpmcTkp%yrQqzKTLkt)0>^z{3m<{6SiL}TWr-ccu1)%D>Jaa z@Ex%dv8tUu?*sTW4w!2JSh__3XI(=bt4h*Xc9eW272A zbG(!KYeLjSUc_~xV(`;;dh|tA>}UT;YHCdk>%hRW6Bn<+a7d+XC?qn}c!H=-t0Il@ zt(^~W+i!p$`)Twu(3F7GGcsZaLw}Y~vJE%eh=ud@{3eYUJ#Vs~Z5t5iZ}|;zj@5HG zW#>#r+G*;g^mETPxF%%By>~`CZv2EkKA$YBvCDpvfI^Z6u~u)JV2n1?=Yc8e@R?<; z-eQOKtnfxzrL7_o>gxU_E;e>;M;(~T^>hz}8H?S<>;qGY1;2;!l42gL7Za`+QV?jl zz!zvN`QI&76w1w>cii5NmP!1=UX`_e8H0<~bi6Ayw9<21-WWN^Wm++F_bmRNGOB;G zyUI2q*7Ig&rnRFiWQ+$@g%X{-gn$&J}kua?NR0facQ+Qve7;)pViF9eGW~;U7fev&3fu$#(&0t z;s?}ja^b8~9(sv_=` z#lh@mZ5t50dV6})$8dBX0f1BE4kkJK#kL#ojtlR1jjV&W)3qfsH%H#;+?JelXZTUl zTRNZhQiMu)VtLdvddMuJvr(ibOFnP->45+q{F8+MSf^K_Zt{@{SOaQmiN(c7z{|`| z!iszTwP?6iu&>rff~UVg+R-e!aM?vPO>|`E>~oD>^S(wuAQULl41$yDm%C+?H>4aaEqL61-gkSC1xLLXS?H@N&d~~@eZt`@t zvQn&H=?uop9C<)Z7iVW@{N`zcn9{SO$-g_VCDu@xk&}}2@$e>oj!g4v71lEy!XJ0Z z)ZxrbDWR;LE`FkqASr^=+Q?9mcO7Aod`fAH=GIM7ezT5Yb3*nnkt;vr19Gm`yBPO6 zg%#oa;Tk!BQ}TuPCs+wyK+x&wSn(ayOg=21?{Xmdo)j@lE;Smf0ts$+0o+fVvB9Qdvo552M=tr*MD3;l^g zxDj&ejcL%<8%4$s5(;nzEktQtI~7XuBj%e0J66CnLbZndYrVHt!@G>|2=Z-a7~y{) zUV0p;Ma>}X(_j?JCKowV#L26Z!bnCyiu@U`OlZXWksA^MN54h%SDzf|IntWDirLGVEwHKbeuUuAgX;T{2%ocxaDZ`e9272G+3`cK z7CDNyNlW#Ns+190`Y;nr^`PiUZUXpyi3l$l~_W8m=-go>b$9 zp{ZZ3HGAb6dK6;)TK)RfDJ_K)=BT$T74Ijj{!CiuDURnf_v2xSqX`9eekeg7UxcUW zG(%tt+gY<*w?K_5OdGl`PU!;fvkZ>L4KBYoIQ{fZCLT%T<4|$7R9hg3&#KCQf@ZvyfHLGoAa9`@}+NSyot@~X;SPrmwDx}vpq*sMPLNy2HgkF*e)vG}9 zBYJRd>^cV_uVx#N>-Mw#96o5Bcm?0_vx?B7H|GPZM}E)r2ns0xqONQnO$ zcJ=*D|C99of?Bs)U6KS_A4CMK3RR{W8qOyNljQAMcY3BT@+{^HB%J-#Q?tQjWEn~i zm^i+5F)Rt~uNj2DkM3kt{247&1mF1`)^u7gKm5kkkgcrDzHQPraLad0L2s-N)uvaY zUc10%VE^n9NNm&9Fl^xyr8CaY%h1|dm-Fi%6}a>2MAGHbRWGz1=UZHNhWF3sG1+Wv z?!Yc-Joj+qBQ0Yj1>CqvI4;ooUmk5=O?w|;Ltcs4=~*sDd~6NW?ZU zWR5e=%9cu(@x7DtW1f~NdPeSN8?{zY zG{(W##l^+3Aabr$~iRHh8!axj(Gitc`aJR4C%SXHF1L0H*710UC-rk(r zysNOVD?5y5%u6q|z@g=SMEWj@jxO%v;<)WH?tZ_q{boMv5rB#FKe|!`e#}uS55>FO zy>_3&!{x5^r1h?$F3Zi2oAorg_e**heVP01;HcEsOh15#+SyU5Fjh1t)l=!j4;*xMAWD|{F}!LfhC0FFW|U?m`E0!MxSk?>Mro54*)K2@Xcirenyv2 zaV;f>ne8(4{Gh6E-6IX7kAEC!P6FFP($H`9JPBa0?M=f`fwI!S`X5K<9nR+aw((GE zwql1WVn^*Ao1(<3*n3l}YVW;b)rwVAjo71hZEEk@MNzeCe@jvOeSUBLcO0JM$dmiJ z&+9xtCqB3Qe{!Yc=h9yiGu}vHzkCh&bDXmu`7wiS9|MSZi#SA)Cep08O)f2t|Ke%W zLYy%-shWfJ)-VXAfi0^BgehQ=pSV+*S@cCK6IcrW;2lW#93Li`8RiezH2|yDBhq*+ z>G~Ek={CWxXjITijd^$QX9*|JEed1sYVj zNd;#pq0!l-*>KZXnFkqhy1q^TL~kob@+r^jg+t^a#!sOM=ULgj!N2ow zsCgGiP9?j$x}RH1Miry|o$xM>e^7CP4DqgBnVY=xzIFK*G(o*OauaQfu)>v|I^F2C z5uz0yiiZ@%!9-lFSK+xg>g`6=N>`s;jMG)Yg|oxs1;#HhNE{|Y=wo~&!k$~;&0Kli zONSjniOK*@x&U+cSdOiGDnVlgtb!PPx}G!-9)2oCxvY9b(J;)5jkhW_uIOmRj=XUp zQUl~m%?R3Ni?pbtVxgULOg-8jgm|oR0kPcFGYz)1J+7Qou$676tE1<4pYzW`Ff8HZ zUB8pm*H@nrN+aR8Od3kMWx%51zm(Ik%PxJMRL^qrIaSJEvK3bJC1fN|hXCFF>f+yD zPaEnx|4z0-usp?2-kaUGW`>!o=J``HcWH|<*Dhx(SG}1)#gFS@cdHo>9$pck28BH2 zn_shMGIZU{B8KhrpNCxUl>!rNA>*l$)pH{zAhBdcVOHNmI>(9@}M{k5a zM3$IO$-SR6&;2qnu(7dmBqoCJDl0}xavhwDztN?Lx^U7h_$>h2UNKIquC9L8uJ{q? zNUl2Ft7w`60%(&i>+yt&QAV5!bOoU%<`&K$29sXlwOqIc`Tlpcy=&29|#e1@nh(bmoA4*WNwDSf*{ zX=WsPzw@qLXJo$VDCZU6NW1sH*hR?f*pDE#2qsl#wG^gZKQk~W5*6O4T=fzWUZ^8axR*PYowX#eQgfy=i2tK0 z3;8OWU&%TtHeT&fsCAeT75`rt)+-H58f8Q_frac7Sd@j-3tKc9$O2Pc;rPa2kbWM8 zLb}W2=j))d%E2#HkxW+R>@Z!zCjRI&5-2{@nwZwT7yGtCN2l%F;LpXgMUVwg(qM$s zl=;5_D<~*NdO#XmRyXy>l6`BR z#=9*4-vK#^N(9fs82?)**1mtR7yBY>sk8r$^<7XcnpRCwH=&`Lj#Q4*PTIW_$%z!9 zPfF4PJUY*FH31_1YiFi@blld=m3`&qhbg?GYF^i*eC%7_27ybNTXu$r2AR8}hi9Jl zS*AXn>lS6z9<%Xc1oIK^vx%few|;(Rq1jr#+zfOU7(J^4&jgTwqbTr^EOBxECj{Ri zv%aN-X#@_{+;QV@(HUIOU((1?Tj=HGrORg^UQ-8=M{xwMs4D-I+GR^Qa7VK=w!r+2 zR~Wx8M?tLPQnZbAXHrRa^%1c z&pR3rgJq<`sX#OgyYP6KD3GYuC3ei6myq1t0Cr9lwy>`xOq&!V zFSliLV^eM06B$Vm8(XYFP^>pjXHxYz;z&{k@THdqd29kj2%A}}w2fV~J4uR=u$eBdXk@jv zGx+K%$j~O8Rx-qw$t<_~dcV^6`X|6_IE?NCe|;kk0xjNLSp8Xc4N|wYyC0YpTmP}z zb%WP^hxcds-)54`{cS|)!>M`j?mfdp)ZMP8b!hJRYHf%hX0ADtnwomSXMFq#&`g)P z(CI>Gx|J=juxmo4K%C+}dl~drCjrXx%}7)j0ms=UXD1WKph@vD6tcwhQH&IB#lBVm z4vs#3mD9Yv?bz&ZT1x)=mF6YSAA;&0%=4a-xfxfXWM*b&_ovgmfNv=}==(_Ail|w) z&&fx6Vr2N*rI&WZun9FjEen{0Ix|UMru(4?c(J7DfT5&Fd;oHg3Q&ZeI8;^RX$wBr zI5B0o^1n_it%k)_89j>36Y<}kI(*Zx#FCxBe`J@4lBa@33JA~kdJ3#1@qhfXwD3t* z(8Om|MRKfWcXMRHS&5?r^z%%CupycGm-xaYJYN$%$`#ZOVQ1D%6TGzBnQ4PS5YV~a?}S=kh(ix}fTC^<`FEn^n> zN`DN60honHo{3?I3@F4wuC?kZFfqM`;<6&~YaT_Qut~9)nL!*_^7!u~mJY`Y7%|sB zX`)nev#W@Djd#_^aZ!;jo<0f6-Y$>n9D>yi~ z3mmkb|FPDtK~b2i+SsJy`q6bUggF*B}gLuTnf&%i&0dpE&Pus~0nD?7k*y#w(L z=M@Y|LuOv`jB=bFeKR_`B9_xa6Ein<^RD&?i+%cb^*1X{WldC4vT+Bhyua+~_VYYS z<~vo_YPAt6_AY5QIz4os7gzH#-Ap57K1*|G1HMnjrb?BkkeQP5_g>)iTyPU7gn+Ss~pq( z7%D}Incca1-Y6iEBxid-(5d<;e-bzkUVdL)TU}zfN-Nv!MD-1IfO$o#v#>pFSB142@}MLB2R?{9Zyy1Vboh(Ox# z)%8{PRVMR# z2DfY>o}Fg}6K(hnZ?!mdb?03s!G8?9WBKi>MBmQxhVQ4X*X^E{3)rD*epR#>G`u)1 zFg7>XIXLq5@hMh)&Nm#naM4S|MB1BHhyxoFX0hy~{`wA)AxUZqY%H9whfX?Wf`{T% zAPMZcBoVzmQ~MMCv-52ZM)rqRb+TK)+P8@){xgd$7v3ss-|0~NE8|A`T_9bbD|K?y z#Ws0)CSVJQb!S$7b^Zry2aE5YzVE)gTG)WfjtWY}2HK0=JB8dhxorhsBt6W(lDS=y zkf;S(-5>5Rv2V9lPh#f6RTXvfOicoBM@j*c(xbE&gDZA_FD|Bk=ZU%k(Y!&he2IcTO4XGlAh%=t8x$1->#`Js4shQa`+cP zrE-pNA~OM4fi`0_>60 zyF>d2B4)SUvr~BD>BF$2 zB%iT*6Q%Yg=Y`;=TKBVG*&fGLIWE*I%-g!L6t(i}x#f~7$89w!Qp(_I_IIghkFW&U z=o%6P>=#S=DIk2bRwyHQ#hFHngub%4vI_1#I_PAob$4F?h$IE>0xw%qk>R6o)|e3w zNa*PrwQh};;cqPH=S-vk(JoVbPY+I3_=$6ZmhbGgt`=?>ex8>+zK$Vcg!%D_paxwL zU;j_NYJCLFYZx2e!oWfp0LwCAw4#=hrB&)1ml9*4z;>S?75khiCHtw&kSJSR9}+R5 zz>aM~Yt2DR{0&86JqH4sA412`RZhl{DJOSrdA5ckyy;X%5pl8~O%NkrSEz#^?QE9NIrkD5bXijEq~FzS0@YHN(FYb+!cQy2NV;2dR!$_KH{0iWcyH&F zw92&SYLr&|*SGQQ?HBV>5fdBO)947vsW6J71eL0V142Sno3T02IF(6)C-#MZ#rGt$ zNZE+QYj9ju)HPGD6q2TCJKDt2r?JwkP7*&r8%ZutfqBQnN5+gLKxQH*UBgt2-^~;X zEezb=7F|3zS7{hgixvRtGj_f3dtb0Iu~yU^(95EcB4g~ClR0uU)+XGd)hDcO;7=xW zS%Dpuc^*Y~2R-ftpX2tZVwVP38{g^>FOIcwjq9RXoSRNbA6bBwb=|O|w2k7qw#RAM z=8TLf>kEkKQE%ejB+vjAZjY5tI^`9GF2b+7^hqPZhAKt}c1KvB+?pg@bN}O7@?AQY z7PUzjiL+d3w#|5f2_z?V>#Q@PW_k%#_-&}n+{`x?8Oa%G5j{K(9Y!xVjHxQvBpjT# zL3Ff5)JYRlZ26x;g>sCXo?9akl-RMMu@Z|>>RD%wTmaCpy8{gKi!CR)E22hyXQZ`` zF3QbLV`~0ryA}hAMG{Y5dNmAs6>{upC81`{`)uFcNd&dx~+N&hQH(%)M64u8I=W1xPta_&-dAWqFL4$igjzQVrLPjCEAr+|E36eU!F7I`#~&$IfT>I$%@tVy(@qvHk4#bP>bd zuf8$$kh7DJSaXJ;O24}pE6=v&Eo2v0^V;wQ14+D-XsvCZWKz;;0x$r(YWfDV z1NXqD{W3kUL&M8B;==TzDEi|bowq6%1{>|~8=)~02$H;uCl>e1W5D~M~(#p!}s+72R+n?PQfPO;8hC_vM z{9yN~GVp_G?5A`HuicS7$z&urrZxzdlvny6kMElny{wG_?HQDPg4=3T6)03nKZ$+Q zyTeCskvbkACVV6_6M2>Xl-i;@~~Kvaj|7vpik-A)tu6wq(wxB>(OFqF|t2}(xO|1c$z_sN0b3N zjP+3z&`};Oa;yCmK7;rCJ3!+N5ukx=T-S5G(uIk^?LhcQzi2w!gcKASSyks1M9KQt zjNqih{4E_5VK%7e=6$yI3S(i#ON0u`c21UjydV8I3zox}(K7}qb_R?Mp%JR&M4{&O zXo-qKHhS(ds9uc}$`Tr>WoP)fTtzxg)`*IGDt3;M0UwIAV2DV?Ngj4mP-|1_|E-3g zAaakzeMBmUjudF4x59>6;iK813d-TBgv20$N6+B(FWTC?8%mVh9%;f9VUT`hAk>8g zI8Vi<<}E%Iv*1aQWgtCJ8S44N*LHjm7G+Q}8I#r1?32Jza?&Lbj&mb24yLf1bE_On zY9%8Uq|;K#UODf0?+XCDyd{n2yzXW)PxsdNs*;&2M~_#(^AmwA9o4Z1j;8B5 zcQ;+UFY#>~UB(2^L5C|7j!sUJkW7yYBoz#E)v%p5zlq%mwjw9ACN}6-mcvOS;Ko>W z3er4c$6_o}AekuC8~FCgz4_VGnD1~E`Df(T3gBG6g1-d%1(P;<*}kkaGf|VTg%io6 zfQt+49pA9*nA&y@#+DaE6P29(E726!;=Q#xjM}Iao$44j*aqPOQA9ypbWMlkVTl?$ zbDsc_F>Ybm?V}2;fh!F;7}mUi1lbC`1IHEe1ufEKCx7Lr?6^F#Vm3_YA8xj^TQMB- z0KG9j(04)>H)$BbcLk~swwFcn&`64orlu&DeA{fc-f`)b^R;*Zi1myCw(J`#Glzqs zOB00Q;yL(~MQu2BqdyEUf{@VpgOYar#D0S{wLHhmx-OHZDZQfUvaCj)j)wT!-{y2*<;-S=wD?|~hdkU@kByBT z&N4)%?XD-Dz;`(^D=IN&E3d z6L4K%Fy#`^aXX3{oEvr&V;TN1A^G%Ay_xixeMGOtPi-5%Ry#E#Hf_B{k`w=vG2^0* zcfQM)oZ=p>G{4>b-+gXq5PZH1!`Q7BqiI(Vu$T%^XecSE3-LG?`Z0*I@Ujbw9YeQe zDe(M9TZ8uE`PuBi8=?IKOSxxn^$Yi`@p=9}a|-&Rr(Vz1XZ6LX+DwSZnz%rre{8Ak zBIj>$nU+EMt0A}3)S3Vz zOXjw>`}SmgMP~F@!r2C;`>Z0cFfNhxI%SgD|5w89OCM`e#om+&d6 zE(O&bz8qpHuDRN%#x@|(_j7Jykk8Wg%8GI2{?areF;8QlckACj-=$ia`+wHLtT0;%0{-;*g{PZ*7A;2A zoNynEiHW(h$TMXGqNC3tTIP zv0S5OWK92vIy#SRFUILu1R9WJq);{%1RPiqZ$mc23T#~nESK|GU^zIH(*R|Gw<>sTYd(KG4ncMAydwevw9?7KG zvgX0n^znjLXKnZHVuxzXz15j*HcR_Z^3_jHy~1C4$F+}(pM#1&Hn*>CE@oEFBckVA zee9=r+G%cXj_{lweAi&~XSl!zkDlB$h2YRL-9=Rx>$bd@OQT1PRIXF? z#P;vb((|XFYakV|F^I?H0mxN8rV;-;aNf>G4@r=a02~^u@}}UI)K6xJplvy(FvaF% zBv!BAe>W?6LiOTvs4p>VXBKk4a1jM{V}fMHs<+Vw(FM9cc!Rzq#q^?vQIT32%uxcADDcVUJD-T{GQ|U{umV6 z=I`b1{`J+)8!!9L;gGYxDjr(yld4+W_G4{F>}e&O9ZSbdRCdQo_SN%)1W**m;Lmc# zLWUvas8H7*NPy*dDAZWex#Vl9D(S`QV9fKw-qiCI?k9F;i%G7${C0lkHShKBGHT4t zU5CKdLO}j}a#QSmVO_G9%93)rWLx4&q6SRZIQAsyB%tW5#DQqTt|6~+%&o-<1IUoN zTxZE8`a8EvJ9rQC`N0=`n%#H554ufc1hu&JU;WWB`+7f{|L|*!xBJ%74batcMgE6~ z*qGkmxfmsUrdk{vc)iEY5O`GNBjbCs{cis9Zp?nyJosvRK=b3d`H#TcrEbG&Jr_X< zxL*LjX=hhgaGf7Cvg{Ey%B1m`RX`7h;vFT^dpCAW_;)Di=i%vB272d}?EhMl!I#p? zgbnfM*P_6lyF@=CL)QVA zI^-HC9A6n@2QJ;hJ`7U+p1T_>KF2?3#MUlM$K^PB`3YprPx*+AURa~Qxre%wKW5%p z8l8`j=$J=k-D)_QPCc{ZNr~kEH5Bh{^o`i0YSmuy$l`gx-&^bIBDGghH|0T1OK}dS zM4c-9k?-1Rv=NCcCwJKU6*@kO%o6+>@BPKTsMH2<7ZjI>$WO#rD*rJe9Pd$59Z`Wb zO_<;>w}EjDZBhU_HlYJ<6xe3Ef9sY56H10c$?-iR<@Xq%uqG~C9q4VHG-_uEs1_zK zMpLAKtx8@a##0B$6yzDKU=fUj#@o3sW*+rq<$?>JeWK90Gw`SVU@^?mjz1n(hc1i* zeR7}&Pp9Wv_f|C`&5F^FUg;8s0?AdrEo(GaR_{n%w(tNt@6s^LRw?f_IiURLbEtaj}iHiDjAG^Ql;1+yns+Qoc9P z)8BPyY$}jax;Dau1v38f7utH-dYpwBY=Oj)qp}o0Cs7B{8~#hUuJG@~$R|oE1O({)LA|!nGp7 zB6YJ)v+7GEe>#Nl36BOFQcej*k8H=9H_fW7U#=WHd-*fw5YA*%EgT1V z*`w2(v$9!Hy}Wl2RKsqhULqZ-qOJac^QxxiV2@`vx8aGxT-CBAB)M2`XTzq;thQG4 zpxGr~D{GW4)pcbd^{_b)U4Wip@tlP3H7?kjn_dQ)Kj|6&_k1&#ocabCR|;@oz3~LK zbIKY2zP`%!m=xY?-q3A-1m68qY_^9d2<<=mW-8sfMeji_?JCm>Yzgm^)+v27PeKeQ zWFGc9HPMnoM`!WeJ^3aznPc0hE9-EvFE>g0CxIRRzTa2)U!86z{R_|5^?Ul>v7iAc4`1$| z{W=afp)?iAQ8&)}X+vfNaFWcWrEbn^L%98YAMP$mO7VQ2>OTQ6a`mz7=h)@o=>toN zY&E^+=h0#43Ap)b*Fl@2ENr{b48S=>ui6M0YW@Uf6i-PPd;e?Cjt)Z~qh=QgXkFEg zm2lI-u^-(~U3Cfk=eQ`ID^Kgon ze|ydw{3rg^kugK?zm0$)AL)O0>on1mM%;j`?86_tyLxhaFLS5)R72z5{NdUj0Iq#C zGh5A#nxKk~_}(QL^oC)=V{*#8>+c%xk^%!3F)2Yx74oLK_F#E|bFj>?Buhwk(vZNS zxM9LyJa6)l;`Q(+d3>Q+s1mL^%=Txs2yLDMm^L=swtBglwNlNLH}kBmnRiO0FIn%^ z5Td-Kf>PCRq{M*?4dmwS%>(OOyA~qE7hYY;_&(ehy=v&|I~V(#^UQiYSw7a*fp%A( z`NfQ_)b&^Ru{X6SHFd2SnLRr;Y$iP&Nv2fkLW&k6!Y#LXK3nWSq`DfW!~)4SVL_s5u$CPb=(Rr? zqiR(6V+O=i1OU^KLu2rN;{3pfIXTWPl+MK^Bi&U9patKXkLIUbx%lk;TmV4sez8aC%4#Rj#%%#n5nEJR2uZ2GDdA6?Q+arJ%#Q3`1qmKG-v?x zPt3Gxxy5cg*KA&a#p7uLXFJE5t?=Br|0!IAASoPqT>~ zXWk;P7XBpDW8a9_`2-^b*zgU;(Qv9_iSKfade5r}`?9S@Sf3vbUC;6lBg`lf@!|$1 z+w?^C$=43h$S4l`3_4DXTBXNiavbA-(^!<|?>wJc(ZrbLd_b`uGIZ#w0)-6(>H^>TAa)}eE%4AK{PZzb_8A|x(06Uo(tV&{9~Yh zAj$RUzS(NnoyuPy7vKHy>22%VS1@o2b21CLE~M`r&}}MAQ$E=s!UoCGRFlenCX=GwEbL#80PHssh4`yz?e`3sZq|bTbXN(~Qozg5{QA3#3r74?z|!;I z*o2b;T|2R%(~eu`ktRUZnvRoBl(Uq(J6zzVntz2i>oaka^y3HoGCXaeEqKkGL<4`N zb4|P-EU$igu>Mbp<-4=U(f*{fKR!O_bFO|5Gio|Bh~^e@#eTpmk9pWk3TBt=`u(=~ zVN2$k-SuvF{ciW+Hed5w=iTuN?-Z}}-QV@2NwX8P)j;Q#6Cfdm$I#JY#BJ8mMRDF( zU4b0kG4p$9XISgL`K@{XSMz>5|Na1Q=HB!^{I@7`6zp$$zSDPdKM15VE(b0KuFiF3 zet=VE$onCJDlo0T{PvaX@Ew#HqP)|lSn4JmThY!YUG2wqo!55@ZuarKc)4VuxquVJ z^2D;sFRY)aC`n8)?g>P# z0X%K##Q2dyt>y#qibzIcYdr+zqe%(cXwiamx4$j{z7Nl}KKiWPJnwI-&QU&dM$ZBr z9g2t|FAZ5`m#FvwXFYW^njkKvFe*wl(5Hl9Ed)}`i5fX_Uda+k*fiJ!3@=1$0VnP$ zy4GB3Y{KWPj_sPy2`mZRCyV%5amGkP15@}}D}@Bvd5oNPfiE9)vZu-mFH0NyF;(k3 z*kD$!U=J!wRBmgW)&70TXPloCV*71WAFb5nJ}nptqdhoP7_I&jSJ~VY**!e?P+)+I zVoV=}x8n?EiKK&HGLd>b6=H!e+jvZ3H6RJa7!je7sJMpd!NhF%zAZBr6oWwVN2Quf zvJG&(lBsrLeFKR`;*-XbGqGjJb1*Z>PST_dkKur-DF#RmJYGcN+ZHvINS6RN4(o!V z>{wLFbC_eA6$MNJhQWKxML{fBbQr1}t`29$V^vHQ06RDfEfjJ>anfMBlAMZa$&fOm z$gpfz1JG8pn7Wu4tGxGD+hH6|fRb;c^rC?ZrkGGf?%LL1$jH1C2_HxW$r3(Nd`DI0 zsn;viCRG-uTqsAyGnQMYm7+ab5JShb+g5X=lM==D9kCK74<(y>>n%#X$eZg?SSI@Y z2A!JALH+BTkI-6I%Il_+C})0SYeV$pku1S%&cW*Z@t%!9c2@x5tBKCv55EIS&+CA` zfJ?PQ?{>G~0AJsKG3!@6f*}jlvC8xxTU=f&p3Yb5XQC8@TmiWOX}X;|t1d1knXqrC z+;a8`k0I#DvX+j?%39GjV_*mp?@(J?>*@{k(CJVV?-#%%pKfT?#__ubq?2>2|@cl!CT^kz4+6AyR+5FaZ zsWM&(f@EE^5wSUdvM#EFa3I8>*$g=`Y%UTx6Nk>(ul?|l=&KF2PIN{cr%sQGtrZ)F zqJSFbxk}P-oQaCiJf{|&d-B(aC7ZXKpAvpRAjiH2U%kbqECVf3JiRFJ+WzQHLxW~_U>nfmQc)fGrRV^V z{E~1*)X$nFTZ02vr5L0`yp20O0#DXD|1jKdtj@M~pZB>20Xc%qUw|MVbiNt9dp0+5v!gZ;W?&D@F_}Z_@*W!dvI5 zy>|>D4RG;qvkeCadF_!E37cQw>l#igCo}U)6Jry)?dH0hWhA=|in!y9ajJKlXF&3N zI}gV6Gv|2$JJ?a<^lb$39Z% zf4+ZC&0B+IQUB~H<)F*dE~x~zx-J>8V}7of8Pxm9rK8KG_%_;1F`?Z@vOj&jo-9{e0bN&7w@5BC* zeNxEju1rOfAJCE<6!dRZ=GNO+=kD*Xvz4W-$>j1Z+DTRllHt2KSYA1C<=qGG3d$nN*jWC!$I8K=^T_5tvS|>Qs>`F#WX0 zVz!zJU(>@EG{iLw*9NW_8(CpAAZ@iGQVjC@$z`sfV8Z67ZRg)g@M8oFj*sCC=TIru ztD^GRA7Kc&+Rp6zPf)>RK@8WRlL9)e&*H(fJ+}Yzr%iU^v@$oTZuQ%h=9&YEF$sc5 zAv?AMH@YdeMUo&TN^2c@u-kg#rkK^Re0g=3S6#kF23NMyQODE=`GnBc`nIzwbSi!0 zyz9?jT58v5G>GL#(Mn$yB*+Hl#WP^Vrr6Gp9L$+!w%y0tjB0hS%})=?K$fAAQP-9I&nNN?x*NN?SNh=jMWUa2SuvG5S$e*w{u zBJ@z0t>HLS@t-P?iW3yuo z1&Wk0TqOtxtCi+6}*RI=A>TSiW2tABigrDLZVNvm^9yU;(ewq!hOHE=(6=Irf%)qaQL=L6@v_dvSS=0JZarmMES zhUmZx=s7y}7k`qg#B_b^1H5t}A-2Fx?8=fR+S1q@NZU+ZMF1s3mZGqM?UB3^2R3&z zU)oD*M0uqAa<&mAQ^n4QeG^wB@ZRxF<@#+RiCM>Qv#vltf|l4Bi6Pxtx=8Bzl9?85 z>jb#NF3@Okgha^izq-u$Z{goYOcMQ%fRkBs>B;eIL(n5o=)ZQY2oTmMSuD`ip+n4n z`!@dVdo@kf{_z{X01vh9DCZOL>`ek4w!tzZO4ADi@6lJ-Owea9$Gu-8U=j^S6-^Tf zay750kyWJ)IHcsCr(lFQ?MXQf!clKL3I>H}Jr9+#!wUF3#$m-`CYQ`l>*W<1RbOC^ z>z-hQSa(c{iA3Bqkm3EJ;fmuJiYKFR{bUT<`l5orP!GmR*GlH-y^Ig_wC=d9BO11V zPf?wv&N_>=-d1vWQySE%x^>LKLiCmFV5pDC1O78mmme_(vBA_*^u(7A6Sc;KZ%Dy7GiEH zDjcDW6)O(qADRh^g!SkrGt!NIj7}Wc@!}EW((9Zpo&GEQF#A_O$$b0ZxDBvK_zTIU z%Zzs$-YMm(@dRJ!(#*UOp6Bc^I=Q^5j-TDrS#2nKR+(ye`qoSd-@P>N1<~rL|GmFY zli%sS+M~v*3z+vNVTI7K2(S4s@0&xCbKqboq^nwV>T<__onuG$DlYl@b3l`K<$dlV zpN8g#SF$+MBsV;Vc;=&R%QbqHd?i1%nZCIkiJ88AoBjg6%X@b;p!k>fB3~wi=Xy?L zpDD`0G{L6{m*>ttT@E}ejHMtx7LMT@2Ba=9KF z9!403AP?KXooMmlzLF&5AfU1@tv2LcNWhuA$U!ErIpCx11|}X4k6<^qKO^jmBn;Al zogwFE`41*_SBDK#8V^5nHmB}+|Jd7WGRPc>yH#bFcZYGRb=@R&xg=a|FI}w`%^z*g z15?yT9#omsN0=+a!(PZ`TK5t?*YVv%X@0f&T8-;lkEHIq`&TjM0ktPL)cFsN4NLfF z{2sWG6F)UpuCTI0<5ZM(a zO%N1=)2qS!?r#q&lEKR+$IlP*QV4NE_2g9y-4P?4kc*a(KWW{-$vAIgye1y(`>TLPXF`Y|Bhm_-}D&jo(vUl z=_q@(w?ALuXw*Ul6HnSYac8F@C=BooG=vx@kAJ{n1Koj-nWM$Szeco)rP=k4%D=kY zS)kPsDEd|ilo=6eiwd&jS(oeEiR6r!N*(siUjY{jZD!EBKQLCksw^kGpEF=7k%==7 z>jbYYG!n4OXjR4COxlJ)^bsX!H9Nbfbu9~K?1JyD+vCO9(&Dh|b{R3Aq~)K;uvMp@ z=_8V**thL^?(6knf)p~yZY=kjr4RWj+4l14OFSQSJvEysXAgsDGX>{~l(R@X%k zwC5sICD=jx{o|5<71QFfVrA`35pwu&`-(_3L?B%rV?k&L<7yr5!A2qEhY{=wjfJPd zhv6tKDh_!S8q!bbK_$EA;pqBDJRlPmLR{R>!xjI2cTnj*rHNaRXY{jCqfw&9fI(tx z3aN<}QKuyl(kkWrk^d=e6xJ&$l>)7!tFLNUR5;5ioD}fo3b@88`6&zp!{%l#v(j8& z3`yd0fs|F0y(3-}LMULY6?UjH8?qTY?}Rc8@6xNCNV!i5)2uXBu}tL@4JlEmYhbE_ zZm0KVut9Mg1Z6b}l+c9=9Ln;lXutdppP=Q&3d&W_9~hVAT>Q1~L-8=*C*#O82Wm{Ik>Fgf@znI_I=!;<%scqr6V~^x3erpPO zOa_xSesF|Dt9SEa`q>Llrasg^AF5+$hiopBu(Pwz&L?EAtf7|a4c}Q_P2SxM#Sb(q zOPR&^dI3y;%W7+=g3Eh(G52_N(?z#xB*JrM-fNXk2)?_GfLM=){>FL9GraA}Toh7U z${UHocm1~G0MveTN%9ha3Xe>F4g3Uzw>UBH8#kBg2{GV-#%$?^4#SOOzN`wXlbgCG zjMQyW5Gkc5DBxO%igSGK^j%#yQPC#EwT@NLD=teOGQd$fYc(J1AG_o6otYv_!46de+A)cBqs8&%=ZSHs+&+#!z(S_|OhOHumtUvFJ9ramWi ze>p>zt{hBj4C9VMdRvuMjLYR92?Naa&q2(>v~qqa>MuxX;!?Q`_Or#=w0CnW)gjRZ zPfQktkydsg*?=@2jRW#3!l0?e!K8Bo>%_`$-X}gb0O09Qb*rOwhKEfAIHNFe()enOG z|KH!!5+~|$={tXV`G{}29)8~xa_a@4?{wEE3uvMnbkN5aoYcisVWDJKc*-7hh%(pmjT zvMXwzlvZ5bd0) z?abZXVFkT<2a9T5m0RCqo-W03>HY;zXAiFpb{-0v`YBJk7qewDjVj`|1yG_jp>L z)BwZXVu*Ck1pS)S(XV;Ud!L8JiHLlsUjNxARj)T5_CxMu0#3J7w0`aH@ApZ#-VGL! z$Z8z(oQ%GjpO~1? zD3qTqBQiJJU9F}jBgI-)_N&qkznxNwYAik6o~?jQcki)MLX@>-IV*_xu9`nuJqrKG z%zdf?rT~Ex!7^Qn&^m>W^igll+dG4ceFo3~DQpNTM*Rnrrs zDnF4Bc95+YY5cQBt^#9aXUC{P)!4{IT^5|Ce-jAC*UuVFJR8S5Khn@-BX0)?DO}d3 zD^@JD9%4CKlYp^E50fWuyGtJQ=ig`yOFDsNEqZjsZPL(yiPB~qE|J^^Zd_rsN=iXp z^8+aUYH*6AWA$duqtGV+Y}SdbJyCS?4d}5N*2{%i^o$9`iIjt?LT=H#_n~Xkg1k;H z^iqS->JOY%N6z>Kj{p0|Z2U^)^;jAlQQzi@lZR4A<^;{2+E&CDUa(^CVSwrG>wP-%5sea%m3yB6UcQ{I2rQ|zA00I z*;_z4EEM_=fxcV{1;T7CipK&1+guJTw)n$F#`c7;6fTg;!Pi`E_N+6?Bn7kw!1Dss zsVz0uty{JhEtYAN2EwpzRDzd9ah;^8+9_U=^A`>o>op62S|^6@22sFgoquC}GEi6O zth7qN2}osOq3Y=;1xZGupYKxh%0dJAuWW6 zRs3b`G~by-)cmA|PP(4R70Im@s~0xmktzp-udw>9JnUI(wN9|y&}%V#23Z(7vV=t!d2N3thyb)A;m2tEf-;8AfmgS|-Xk`thXaOx? z`YzhV0es=GHag7)>T0`Kx_F!E*nkPeGBTJWMjIN^l76|ZRx3#c1$YBTOShoMSe~tT z4huw`_2$%VbFZ7xi5|{arqL_gMEeYvM1u2cU*DqnX>GR`=yXd8VN-^R4UON@@G4n@ zAK71i;(_i-FV7ls#uDuKR>&>!|D^XGq{h&QOzhNV^2(~Ru(Jg_a7h*{>arcK&32Co zH2RT`f6yf^on`K)gx^Q2fz#B3-?VusSWa+-ZL9#VlG|8l z9E#e8yF|H};Ua^1f)5n1IgPytx78tdc4Hwx=kkd4`G!M7pdn%et|X-Z(H;>MH_>lt z0%$tQ+X%v-NvwOm8eCavBv~OBws6hlV?y@M&FI3ZyjnKNWkm0J`I-EZC$@MHG4N~0 z)M%!+x0mktkx%d9i;nSoihTiX;1#`)S+h1u)WR4$!)s+#5MAfGt_eL zJS?ooVLDA(tDoX@3$}q;WnGp7O{8DMpEqP$y$e|ioF>P zpC;pFhv^xj)VJykEi|t2W9>{cN2B9&4h@lf#$+P5O`~GJcqNcbl`#W4JPI(p;# zX9>mlpB;?tbShWBZMl$Asbu4E>65zL%ep~duf}Z0obDZ6FW=wo8~UD(DKam7;8@-J z<340QLxMRtFCBganzSfErQ$+zesUotEX%!^Yo`$dM({;|AUy{hU1B)@W*4Uc^{~UM zIkPJBD$(-pkzj`(7k{pqZx0*%t^=iRR??PlF#JXxj?C>3!Tx8LTkW5H0j!>ht&5xA zx6Ff>Roob+MX|P%kIRnWHnzm;4{3R?{fO*tERU?cs`PfzgZYNr@TNcOw)x1;#mnpJ-%jaPCGW}02H)l7*Dd$w z7{@~W+l~LL^EUX1S>Jtusc}Rxg$D;2;lk*y4a(hg>XxoM^nykp{2w%eRqDBI%7Z_u zKye<~YtR7)U6D=x5O*^TXvT6_JCGbVuR`h!18}N>_#Ibn9s&p_k1GbL4f=a}HDPkI zN_%Ucb#eVF_u@oMbA{x?zvg_S)6T$_nRmbK)l{K7_Hp zVbdC))?E2V#ek(?Qw7@=|L4Sg#AgoE{E$f3q6JgN!_1S0#w!b ziPLq!uu2vM$+beJ@fBQt6i7+G{(?^$3o8y5>><@N`!sm?TWk0@|!{ZSau z2us8*3Ywyf?1Red%5s~^Sy+9~7;oacbfPZafN?&vs<)o?ejMvUgFxCkGZC#X%!2a{ z-TsWry|bx1vD>9L< zKiRs8pFB{O-_GfZ%>{s>_4ub{3X}`cO)J`<2OiesV=B@RZIU(7d6+D@3y(%F3ul=| zBBCcwgH=TlmQO3z`wN=^-bP1h6j=|4>^`Uell5nqcR;1qY2fYx_-*;Zq( z_M0WvT*bQu1Zrv%Q_yi49=wEBhFbO0@R1IEtLZpdaxp7f%aQf7Hk28@^C~&FRfV7c z%{P21_N07N4ha7&BsYfyXI*?P%9Y8`3_QgeY*C}w#m;K`h*SGP2T}L~iU51oeRI`E zaoj841o{frc~l@QPg|9bFKZZr&O2&%J6BH;!1McO8>;rr$A=!3Ov@vs!L4lFIdn$N ziw+*mg`n)sy6GzhZ059^#FJDuldKF#u5?plBb|#b+;g?lO@}XzI-kGs_H$l{>FgQc z_m^oJTa>I3LZ(re12q4mgGn*k>o{+i zvLoU5d&v9^w&GrQNf$z%BmXEbMCP*YUZ>sn->0U@2H=5T_ZY+{{a(9T2ND!Q0s?hf z2>hY%px=%dl|*a6X`*toDc1rOANRs(BND7R20*jD`4nZ@>6CJ$3CNSrb?q7Ig}kp) z;+}(W>M8MLh5Ei9y#6*{|Ac$;g}MqFHE=dmXrT;T8eIM6>pz&jMB~2Y&&^>8WBK9K zTz}SeaZB>G2TWX6mq-_~L=Z0foeB87-HM8C6%ndUu+P5U+BHMKolV1w{1*Xog?+Vh8m@RHVs>?(t#sAOgugc?V%`pOCzlqG?mbaiO6AG0&l523}0r1e~J>Ngf~KN~_zX4x738 zw}Zoq68*ey_pK1jjEp`FKCVC#3=qx_h#EGZ_HV^B+@Bx!x8MBa1_bY)o5c-lCknAI zc8;%Bckozss}Br$F?T??$DFOaE_eLo(lEIxrSstI1y^64Ye|tBQ|0v^uNlGNvndD0 zUeTWp4X?{Tnp|Z!p&pAF{l0ye=l;XaE3vj1*liwDHaQgn~$}Q2PaD*=xIv6MeIOIYFM${2_*UDWUb>@vwh)(_jr4Id-W*3^w}s= z1ykly-3|BVgfU8n3>o}JCnHR~cudM|e&5d_h32IM&Ryh5Vv*9MAteJP zw7_`EydYe2s4Q|o3@c0afD@P$6W;;>AmJ0hq$5PcV-1jLpVNDP`urog780b%O>Qggn~5Vj4< z+c)^}S7uXh2I&Q`$&f;5-ui8xN)gDP3@LL4Wd(wvT zl@^+mIr`tMF*k2jX1F8?MRHykFEzS6mUr_NTag*Ab9z}>Qi3c>4TusRdyKG>%s4P! zG=Ae-*Xkuk9B5e3Z$Sxoj|i!8#Ga6AhVX<%48f>L*1Kw|xvH|m6@)r3&EQZxd_~^y zR}|c(30#h+=CVlQuy3u>iXC&r=F{UUEC0#B2|9``$^~8p7D;P!iyAo}S5Xoc3Osn8 zoB*S0EY*&qLN2B(8Gqf+eJ*Z^0A{|4Ej^S${nFGd==CTygs>$_nkN}AFtJlC9v4JS z3^QAig$Yr-q6OnbT9lM<)AVWVS~pch5~4M=X?f91RMz|&8t`bHZ-{z%kTe6g((lRR z_eeSFUwr8sK#%DG{J#Z0)^V6Az6_L7miiQ?tmB52!$()7ZGVv-(gT6aMxX>*!Q^G* zVq5HS2tqCb2$<-VO1&qp8o!$nysD%G>@(siQsOkk=ukI}UQe%z(23%Iuip8L6T6oF zTE3P^AIbp|EQuAC4db5{`!Mx*>x*zT%8Y0;or4=T6dcVzaL(`WqAnAYT)&-7)K7Tca9(+G8JqzzYaW#i+Z)r- z3DykZ&04U*O6j%u`84|95Kpe}?hbSr!y8Qr4hi7J)m z1v)6%XxRYr@HM8k-sZpms9YH3!f7iXyfSJ1x4ymI$4vlo;p-<$8z!ZYSNENdIqMkW z(1a^rxVy|P=9|_b(Z2%1NK|AwbKBGS{@xo@NcFec+HDKa(3#7Mq~|nl4*+az2(^;x?FV`Pfew?x<#_P_r+-if5@eGhIO8G za_T#TI&trrf*ErojrYXStP1J1(M22@ROh6WKVpe`%Q}01Q5J^tN% zCb{LKQ`l5tx#G9q?WsrHBa?O2<{#%Z+o1TAqBA@Wp$m(+Jl`!@KBy+^0pse=xGf|B zGIR%j-$vugmktgs7xhv<2r5fMc0Ed#E>X@T$-?R^;#=I6+LSWrr89#0IlKBz*NH|A z3T!X`Wr6)?lYAP%C)3Efbl;n!-kXKUbl_Fy9}icS(lWof>Cm16BDOdTI3@ZK2vG*S zy7E`OOG%k^(+wC*NlCtd?tmo+z=}DQS*$Nt1HRZl`<2zXAM&fvJbv=(m%?%zQ@xSo zn>TO#t}8B#(aCcAFRy^mp5I}?MDa+iM7!tJj1@Qp)c`NnKcMCIXqJW9V3eWH7>t&S zl*Lm2T2z;qMQJ3=+0lFfN&;qFtvfM*l}48=q+#xmEE9k$-|h_VVOkG=5-_jq8Cuz( zts;Nrv93>*){*}t%bT8W2&NHs*WssXBwITm-{Y&G3Ij^&+~Q%|eZj=0DB{v4`!x4v z2FJ>ZkeVtR`pY(Qp($T)UtfuQ2XF|3&iuRf1!I4&`~ADA!L8=S5ns2no68fd?*;Q6 zz;X5g-eap6VZ~3t&(-ubo+tJgaG$7?FqX%( zNLuQY>8m2k-M$x$Hhy(7vrnEk zF>1Rx8ukCT+8y}|Tz>FL!XJaR?l&NR{<>5= zM~r>=i~;%cB}FJiwlj2}k@b-*kGvue2hq2L>@l|<@F+2;KLWf~0*2X{k#;;Kc%W{K zUlu_~oI=dkRcngFQTPR|Bn&PdQKm5CPbhNd_?TCdkF$ng;rgbXSF27D#jnGtyq=aV z5MTUk5iHG4UaUaS5k?3~$J*P&bI4sZ);SvVA+&t*&!4bI>xFEC_2cm&pW_%QoF)gT zZ4glc1XeWBV68s#X#R(3*3r|Q!UB(V;RypFs0i00nYwteISD-lod^Qnm|hGfTTxH4u6jiKxO2wI$4+zJrUsM zrVM}!BT0hL)Vjmdc*XhD^r3OG$9`O5L=dy6WE}8}GcJxOT;wy=4fRw zDp-LREW>^ChTfUq#e zrPS?O9rr{SnWFj2zA3LpzpS>I;LWV(q8q#C6J&35UWx|2}$16iy;1E^c!(?1dS&v?c_rB4hh{ zsuBc<5QqZZIr;LK0-I(d1s8AcQlBE*9h~HpYcpH9f>L@Cnj%fZk~VD4xV_F)4X`rz zFewk;UK(Oz%t-&;Y6>>~*Jf`(Hb4TI6i#^7T7%$lcRx$Z$m%7o@p@|Kho=BYgb&a#(YQ=w#gyT4m{>*rrP zQQBNHKGaJQNbf zj&SIU(}c2fqj`r0UYO$h!dFWD7is+G+-DOzu3&&b%#jGl?p{&ooK3&>2qI6)k{Xsv zT~1?SV&Xj<9<8h#B$e3?soL{+dADCl_DB+VF!e6A`#GNg+-7uYrtfh-mCN6M&!q07 zvA2h}F^6{>AN(~JQv&5h#;_TgOqHbb`}6g_5Hq8}yCmO>lIyFTX0Z=$@W_p7fg1z= z_ia{$ARo^shYO8VO!Y@&83$L#FYossmrhk(-1RcwpYHC$4}0TZN9Ii_0~s&)n^rH= z2Plbpy;7D(r`_4i9`NFU$t7>Lxbh+1b&zzhGGTY7BH0u*K6(K3e)d5CtEyp>p!65r zJ;bMJTKfa-$)4mrYL}4*-YakF;n1@Kk7bi`UGtsu>sD-2Qw72P*>C?V?EUfO$=2r$ zgWT)LP+%Ex^~b3>Sw~@cX~`-F$lN^131$X#0W^gA;2c@}x!`Htrc^!`jye|{yjle! zZ8eF;zvd5aRyS&vZ-$p|rbXJ$ZUHE)yWu-EiSlQWa{^{S6eu4LGzH#TF!RG6A0I!H zx*a<^b31B-Jji-7k@H5h;_D2#uxXNFJ4QK;K-syzSMu4WG7N~#r0{L2vV?o()KDNk3*-k$^91JNBLZHA zYr)~g0*{P-V??Tz+0d}bZyjIAWd;3M&NEPAg<>CVkzeQBS_>j@{)=R%pmZM5s6kmT zD53W$=sdsDvFLb~b4jr@7>65VMz>_~Az0Hp1?PQ2Y|4canpyyWNjq!j2dIpsglU?& zYDWknEwR17lMv$?%46d5_@%HvreKYE1fjeFk6+((3Cd0pg1bqarJPm4<>u7Eo_xQ{MTd0dI6Jwm-m) zwnQW`FS@TUzsh16;08mIksL5Ujsbr)AQBW9&O*2>D%w7 z^NOh}CEG=7X{DU9gw#%M!a@_71{D7fO7i=3vQGQq?c^TgR{WaHQ(1DVz_44hmJ9FY z?+*_yuBUh711~Cf)1mV$TbJxTI{)f60DmRqUMaRDCD0g z|IRqKs6#dm0ATcOuZiS@7OfdK=EqwLYk!7b_GfqiGuFn`yn@2L-uA1hO80lZQAZR* zhv^GqUe_|1dd)D#OE3SyA)eK31^g&IRvdiQU}bVn_v^OQdU!Eoi_QR99)AdNH@JB{ zML#jU{`M^}fNCXU@>@IbxZ8Z}-@`VOt3Xnu^h}x94wuzTQz4xB{@5||jpw+Dkt|d| z2#Q9RkiI2-i{;sC;$=I$xHd^r$sEG+rka9Ni^^+1&JEF=q@5VB9^_mw^vQF^g0N4YqqI|<3 z*4zguLs|8Bq~+Xf(|>*@I#2p3Wjo+K{8pM2-=N*xw(%O#!Ac?jn6sq^}uym}T^#3jpHT`QA4*x+fvzA@F>f7Dq8 z7$odZ5pWc%+_w@qF>B$xwGtGKie_gcGCNk)Ohg!pd@TPD23O`&VT}RYeTKMkf)Qco zV#?%kKHM@&Y6v8>*NO4r1_JJPvai94K;vhtH=zWgf@P8i3fH&n!mUqbpb2X115GLX zucG*Sw5#~Tu0RKO@mjMb{W!A6eEm2irUHD% z-p+}=Gv)7@OxHS2#NC|yV8Y(V-ecNf2SCD~_O%%9qQ>*E3%4U4ZJY|@k}6dIeKG0} zrrlG#UT#qrF&iw_QfUT1IsaR)P}nil3*Id{skrtpE^**CDo%@2Cb^ zr=esvuB}`M>PxFW=^n&Dn!~>TkKNeS=E7UPo2z&4+I{Np&L8@n@6Sd0pLBt1T8$hV z45DRtq#qt;515+ zTtiwj1dF@^d2pGaSFo|m!^cb43$2o3Vgz4-xQv57K81HvQ`3MXsN;b1Qgn#34cpso z(GRXFVpK%eQg?`{6_BjN@_Rh=NaO!qfik0)L#nPXAfLxgT7A{A*pw`Bz{D&iA@Zv* z2lX%*Z2!?O4K)MSpTVvDojnxQl@$n&jS=AwY7^Rm&Awi38SSFt;=A(-w)O>X2uV*d z;)mzgsambbIuK^>Kvm@NJPHv)m<*{62=h~v`0c~}>~U$;3)waBxolGZtup?fOmT-) zg++MToQ?jEAm3B9WNNm0`H_-PvagD?a`=9OBVs&@$u9OuVWR2NI+hiS1@;&lJC9!q zAIDTbdSWE*k5#7X#a=1nu3`WL`sy&Psqk56zfpPwx$Kik2NBYSPDg>f8Dx8%*TvP< z)kl9odQfkJt$D{#9x^T@2es53a&X=L?dI@fCgh>HW9mWu>)*L0C1-IV-a1B+6#pg; zDsfiIMMuS$nNpu(d8dKR?D{j>QHK!IADX6;SuXD!937*zfW-zEP{}> zwQ`Osjn@fAl(7V@n1~%tu)sxJ~+ctHG$2y(vIMpU_V za;Xp{6C{Yw7^{;b+Z=J=KHVvoyo3*-|hPCRR7UL;alcCKn-?SC=D~!kTL3DVk z@a+K~v1N?`GL57Fg`yl98Vb^@C4m||g#61+O*bVZq}u1#U^_|1(W2I&L}Vh@)jJU@6D0=LD;mxJG@B0q|!wR&I~)F$lOed;g=nJrB>yFMwWzA;_X}DzB9p;R$z0i<~H=kQ`Tr^s_@z3#>Ua= zC*ZtD?1y>W8-NyUBNn-yJQE6+C`1sBlzt<$TqVW{4{O1q5c~pwD+S&^`K^{Jdi3&1AlVezP|h6Gv4h%x za*2yN1viVvdKpB{jj%ZT5zJLxMa&IBYGL~U-Z++8>zCKh2%}L_<9ale_~2pMn{fW! zfhSxu7UWeU@t@+EMQCfL_Ln|kZ*E$))G4>(P>BS3_?__m1+K{K2mj%Ng2=nGSE9mX zXPp=!WC^<5)B!wwG`tP0lR9eJW~b58eK^un1;dP#-}f>ua>j($8I|+XQ{E8d7mOq2 zlB0g{9U228b@}Hx@j6S_%B7(9sM&h8uk`RSd8P7e-WYQYd6tJsEZ^~SseS8GKS5WN zZCl^APN$05ly8~uwZ;r+MZ~H4HxE;Bqk$VFby&8{=hiJoa#f00Dw#*@okOf#6ewmz z2&{0y*)#j=z|rw~+VWMZ&3t59xFT7L z{Qjx)9t7lf9`=sjU74T|y!XeEz+P(R{`>9N(0ToysVoy=EZ;5i&Am zQ*GWZAZg!0;FT~~oR!`G%52j)YucI$#@738za{C0sM#^t3knJ;ox*HMXqpsp&};UUDmggTSakdv6N62ppz8%5X7^rUBVsDu zp-V>gr6ovSGFdYNAS5*zLkjvR#w@C1PqQd0Z6yzB+Q29_w9kVJhRPZAYOSLyxSKHt zhGnTcf3}U&;Idg+zE=rF=_zU5{Z*)AFW=j>th+ifB=N6LCsR7`o%JOkrr6R@ZXPvi zY!uxMTirfUM>pMPklh_&8Si$o?riXzm#@kzUt*_pbPhfl ze9OR2ZET>m%Sk9vjvp;Gw$fd2n17a)H(<&!BxmDx&L%ojza8^C%2mD}kXd@Wd%LSc zw-(-63a1^2eDZ7*a=-o)%A7i}Y3E(+a4cgp!<;&!xO2YK?cE!*)c)!|59{;8Q_t_; zt(2#&8`gID?)K&G41=T8rT@*p{}Xz8b`-Z_Om0uF@0loiA9&CzX0^K9477^7KI49l z5p*Nbt0##2baj((KlfJZ`Y6WVn859<``wAd@%ifAu5DpY^59*mUvq^oIzjNljOZ!+wJhgb|lrO=&>4PyZMOEXw<%NYezUOc*s&3?_@=GDz5)srAu zWm3$j|D^jM@r?v@RpZ{TEn5F!j>P>jD^WDlbZl+)rXv$AJ;hMG` zkIHi!MhI*1kkT9xIR^;>zF-SQvpmQugm0#ME2~wL=&!bS;e5`n3aTeh`XbUWb zruABDrMxn~rM@K(KUy{4lSQ|g$9Y6AAWVTVwIGM4*U8lykMOh;tc;6PVYk|(4kq^+ z>ZcvSXTi%m>U?goj1}iW+72k&EEWn9=c$W-z!i$kZj966Xbb{}P-_#?eUGP??nA(N z9uO6Df!0-CD)Hzd;XAn%WN$4vLY&ey|L@heKg3X0@GlQXb>p)8vs=+u4k84i6Xi{@RbFRuHj z&Jvx?9@Zxpr#Fg~+LmTSsO>7vYQQ)=bb zFevgasqr+S#39}0>*0}mZBP1wlUQMq>NjzPIt)otni>>sW9KeOnv9+e-hl`Z*)>6 z0ci_|>f^nBSK1vH#y(;!C3XVk`@2kU#_^DbI~Rr6#(p5Zq`lv2hjo4SdkQ#y&ebOR znrCOda3bR(KGK2$_85r&^Ru8s?6`Jz$QZHAIpx^c$ z@RSYbuk_lfSs58n4nFD^QOv&_^nkcbU$8OHJ50@q)wI%PNs0Plc|i?9%x39e<&a;i zsCe8npNr0%7t!RhJZa=#W;0G^3q4zp79GGC;R@ZsYix@-o^`oz)R5|fEf3xOeXE9M zAKyBX<`^a)0Abf=Ic4ENlJOE5M_$L}Jk4X+7q(&TL~dTz!-8A3<(&2F(tV=-;L{>@ zJkj4h5kbfivxZ+e^7r)g-{J%c@hU&`Y;t_KrTEn}5Da&Nxn8XtY?vK=Z0sd-{g=WZ zytuEvHt?d7^DVE*v3=RzmGci>qD2Ka!K`Xk7pBo$n^yKY z)MJ=}RTda4t@&GB5`dw|nO`r#m$5hm}Zj3ck<~W(e-b{I`ZTc%E4h2Fiy)52B>9M38INw76lg# z5STE_ja_455&lnw;xVrW5Wn9T>ohnsz)F1P3i`KV(LE$})tz;JoYc-J%{jnWm0YY{Km#1^y)z!e*u>qtXNu|r-&P6w?R(FhTlOM!Bd@| z4kzTGGyfRAbPd%J86C+w;C+W(@^N-))^FuXnT$2tNWOfZX8>ZcT*epDn>otCRI+!(ER1UF-3S1inG9YG#U zuSFN1pD0Z|8@wGrmeM%;eiFAu1cc^V_U?F}z+@otC`Eg{q~7JcSq3C#cA~5GW6D(n4vW;$80vU?cmBT17fs{2!gNW{G7NZIz91R zh$kEkMl)bKX#IP0btS@x^s1<0*T$OZf;VuZQ>HooBJQAUfJXwOhC>H7j;DqLLkkrh zmnWVsEG)EecBhEavIZBdGxAWZ?<>Jr;TZ_H@{6Fq&=?5exY&Ozn$A!}PjXlz^?Uhf z`v-~XD!k60JYDn1bE|=kU+jm*jt+ICjSk0i%kQMT0p_Q80MPMv1P3GwussQ2T`FP8 zp|W(ac!>>EJlhak;51SZ&~W7kol~=Z&)g>0YtmPjba3ZiRcn1!mr^K>&POZ3HF!a> zZ&Dw;*Cy@kAqKrB8G;f9hINYpi+O?WWHT=Gq?;OSZE3;S+5bcq4z;kMpSv2Y*qe_R zcHMi#i5&Cu-b`p8X~tlHH5v>SgE{bPKK$O7_iML{C$uV*KYHDH1TU`wS3sHuZmq09 zGVI>m+VrKe=SVwahoTei^(N4PdbNmA@P}N%PHD>XI_{f`3dcV zH)t3S_3jtq*!%my+<h5A6{pmeT_mh}Ga8UOPporP{{&(#}N9me*hs;9d{yv_Mbxx#{TC6{$d- z_S5PCA{o#pakxF%8qGUNFn;097$Rz<7RCSJ*Y`PUgNdWt=7T)oK<79&HB{vB_Rzz~ zgHB6F$Hw3L{_eKAI?UPxd%t<@?^j!S7}h@|92&pzcMk8|H>v6NDpl`E)~EsJ;fJ4x z?ysxH(5UymBaOTVn;(syPDT+t7D-b#XHKd%W99>VlMaiY{gFi44(4ZEd#s!X$6s#u zwipgW!E5?|3%Orp<~J})T(wHwO*{HsG{3~`-LLtdnB1k^Uxj*Lmwc_d(lhV&ul+CojrtKp!A@m2W)N-q@Mb|C8Y2EI z^*t--J2EE8h4PiNb0o(zxMnq7NM~(s?E6piACn$@Q(`xdqGNfk0|{k(U{UV&2?rEa z5Bl3vp@={sd}gV62)KHP1#o`0j1Mi;2=>5oF;1c()MnBOeV?-Wz3=;bvj!zw&K{E~ zO;nC#`W*K^dnf7EkNGkHa@dxO(#-2fen+smnCb6bBmf7UZWuVl*Ko3cu3uj*; zYMttvv(lH60J(Yi0LiyS!J{1?r;bfme;e~wU47qmzPE^)2tw|PefSCpb$^*tEmZ@0 z*%j9KkfJ#J@Qm*h(~dL6n4q{<|BX19+1l#a{;iJYSEpW2r+W_i)L(@gX-#d}2jGZ> zyYdOCzC~t+vDY{AMhhk)R!3Bzwz=!A?06svlpXHY&)6$oxJ`0BUa|&S@5}c!QSLdX zulB8r5^cD-~$6;BCC>9$fHQkKeO^-bipb^ zWev^m`2wNDRhg*>J9v1$JVd9zYV_qoO~bO^_4@grdXFck>7wruUo5hoP$+hS!;H~0F=ay?Thi)q9Ho#|=bg)NB_6}?(Oeh zVg7^?1-xhTmfWAMI$wD0-j!-Qyi7lQk>J2ouGX4K&56&(orR)!;R06{Lm>$01li%9 zQBGhsR#=2Az7<{iTM9mTRytV_D!eMATe%sr{y0#E+E3ExtdnJR?p6zX`yCmhkT>+6 zc=(1mCR4Rw-MiJ8_@LX4z_Y>4+*RX=o6$?ot|9JMdvj4qPU0!5%%bJbP}hL4>5a$W zGky^v7s7c073KW<3biaGl*Wn=r2sVVGgs-1*<5zL!qUh$549tXNQsh{nNI@Pv3ee9NZJh17mpXlC(EZ4R9^EePEyMr_$7fukRIpX&F6QNVZ z{(C;IUb`r|MgU3OAd-Nvs^*R`+dt2i^4j%&`#i++eK&bOJ|09FSDwo}jz5L`I#^YS zOJ|}y(>QVZnvuBHhCILgpkV2Zl$3w9ctr^}N^-EejQn9nBhsdvK_InOmN%M`0Rsf* z<%i)PIZaUW17NZK`S584;`!gkjdOcv(~AJ3+DCh*Gkd!@KAx&Z6IGp6S5-8m>*py* z-|BniTj`c?kruD4DrEV+wJz<9NV3A{G?*(KCe#E8`s{kK7emUtIBsz3*G`eXSEBLL z;+Fe~XQtys8PL$$jTUbB9zlT<{yp1kF}Zp>C0Z{Gf6Uvz!=?^t$_wy0L02y&P7p`y zT9B^9{il^NHSjI?z|y(3U>rlYS1XMWPtsuP$NyUuFW$z;@>LU=P8G{IIr)I;DZ`nD zz8aEx({c)_Wn5^%*L=r3P2{Wi8OtH-`u8Q^0@RH_*=m$G_h2=(FI;1EA@!idpkeId zTEey*!!z7@NXz+>nThF;*pt*@H^>8Q2GHGI%bb9`Qb;LW)x8Au?qebN3 zLft5h7|@9@s>3MmS|jzyjFM>*_QPNNa6cdvWbF|ZO##Xm3=d)ZwRRZYiojnRlamd` z&0=jWHF`XR#0rFe03Vu>yjgV&P(n?g9G;#|WZIqjcwnFao1@B$&N+Dz@NR%ObS?7Z zlO~5}4vfsqzjnsEP>w0al?x*+{G0K1c%avc0~`%mr)I5TR@qiZzGAoiCYRS)_ZTUC zkv;#Dz4mK{`-^3-mC?hgQ0Ds{=ha_xK$h{ewV{TCSitGu?Q5V6`Pt@mkDe;p=z7t4 zi>N`;-^OuS>|*7P4b+D@*~;|(3e6hb`oR5P$N_ET=+?>Hr46P&#RX|gr?Y%2-h&FL zJ%@dxOOC)TEucc;;@8En^n2qdZ%isfvvv={qRgVgp*{E{w&=Ks4&vmX`U%s%DBAH1KV14J>u0Wd4H%_GgtPm_u!zl_4b3iySoor8rWrT zz92e`ANS=e#JsyM^h_`&-dns;bjLb8gEp|dD@>BGOBfF}83mF~`MX50RbR3@Z>>Sy zo$)$mX3x$4+5P^Oq_?Z{d0nONLBD~mugeY9X#45EiEESlQ*W#~=T_t|6lI0zOo18i zp!3D_@WH{s@Bs<7l~Z5Wb8VXcYPuPPuoxONYfYbY+lqJeE&ZV+DJ-p@s3T!&v?wHu zcm5460D2m)q!g_tDa_w9>j}Z_jY@HOlI{^D(DS371xDHlhcN<0*LpxDZ?@^&jo9^-*l#*C)T51;C1-(cKGd^rM5USy1)pZDk>?8=?P+>}`Tz_Y z$H$C4LtUt+HQFkVl9pSM0D_yHpTMaaa9m6w1~tSazOpgbM&L5PrVVHJjH6%^|ALE@ z4?_UxIjXA7uLI%=JMpqI%4ih*pS&>I-`u2=79$)o{Hz9BDRb7VC_#{W+u#XTw)%5I zRD*2nk5~G&-Q8QlBB40|L)i?s(04W8?J(U%7LBriBjBVIRNU-YtaI6DY8EXshCuS% zNOh%eu*TZ5SLVLdpdm)_mm01uhZk3d)V8*-$#My==JKA zSrR>gRYF7}gR&-w9WC+NRRFhK4Gx_ffuh53*%N<0-l}O!#9hzhw~FRxRmzjYFQ%}^ zZ!@$lH$dmdT0ZB1W*J}~nj;kGhtgdT?AX?C{PW}8J1uR-Nssf?7J#ia=R6u+r2$76n!E;MDD6(47TdLOe4eT=cWkm~ zJuI`EacV+(Eq~=l2&fo}*mh>eCT3-Fp%cYji<>VOwy(-N&o2*;j;cx#LPcEz9!4&c zpCPab1AZ67zdQT?{sTfA@#WPiX)5gDxNjX5{?2&xz;$H#wXeI+?FzC%pSK0d|@qci8W_<4bW_>qbcj;Ghav77M0|Ekdcqj*M})3pJz?}Niqi_=;UyquSp z*UMQu(X#nF4E8X7OYFT8^q}i}b=AzAPa7xeA9`rrg1<5(@P|yG*+hnW=?Ga_B&47V zHurIJ`ZvJg)(-sdqiJLM=sR97vhms_7b&|1@HFk-(bBnsF0uRIq1j<;!|$DzExZw< zQvDEWg|vhVCN2y85CVJRly~xifs0EY>yYX*9j*)0@|xP!v_{-x>-ufQp*3@MLc1Zq z{!C(6AS1pyI=Kk2e5%r(zRq{my#;vfIPYtFyp5k=7f1Ia0Ls(m>UIJkN{)YqkaK>Y zZ?C`KPbdX$6i*xEU^ml&B6t4|G56;Nqo-c2qfZTcB5f|ZmVEq5@1_GAXxhEEZVqq$ z`8>n?sP#r~UnHtMeLE_LKhnc=YDebacWIdbEGB4QVtHwZUU!?^jQa1~)>t+oNf?K% zYQW8TdC%VL5xc~thcR>y88)%A#`D%r!$*^U&C9Wn?%REr8v%@t54p4hCxxKfd%uKP=J-xbJ9e*;}Wb2zz^ig-g4fP~_`~}Kp>J5P2lEZH) z5piji`2Qa(y8f|Oc6>CWR`fPoQUnSy7L1BRP}1d7Ks<$iW1hNGr3}i#*m2*&V2{G6 z6d$p|N4AiT_DO3;pWSrdzJ2-C!Cx?I^a$bb_wU#RV1xMAzZ38H)XqWb{&-p9;Onn7 z8xJqtQULt_pDbBx#!;Q^Oix;%Upox)3LO|Fsm}m|LlsImI7YZLjDjN3wjAhf0|p&=aJ@i>|FFrKtNOEEqxJP1XSsg~}*5 z+P$_dN5T1E3Lr`xkaS;c%NCa$J}uZoSdryeukhhFZ|oL7KESaFVDoru{bfASj(1t&0%gzAne(x$ z+Jw`9Cgbo{e-~98B9tH;;`TIk?cUunm0!uMPS^XXrbLDo8iR^t2{~W&yNAbHz$F!) zXquT3f1|?X)L`-5iq3uC$_Ka6X|T`S>K7gnQ^E(KRHj}&Eh+rC!2<($($3Kx(rV7i z-w9*{YQ9OLhjL7f0^X*}A|?y!$x~tfqv$OBntIzfJVII=h=d>|HAGtY(J;x;lWs{7 z2GSkUouf-y8i9dycZqa^#Au|Y-}C+p=d+#jJoo+mURT@>KbE$Kuv3{wwU(--4wquh;TV?ghqQWOk3QqML#j^X^V)m5tgDZHaqiVo^Qg9W^~_8>iGyZ&@Qw(bY1DnoZ2j95 zgV~Fj>1h3w%Ehy4b>e%ig9}eE=3<>X4_{f7S(J=+g=JZCitN7kxHHv;p?eeU2iOQ6 z$p3ER@*G6Z{HJ<$60t0|96DNB%ED8J$hamYCHiKcmz93s`*}J$>-8U#rm-5^O0WEQ zPSnYY{xMAbqx6>&43K{qW>T6eD@j}@R>*f=;EQ2|2WeIVBjA1w3u2=B^O5{E<8WBb zfkP0c5U19x-=8Oz5C0zSue~dq3vCNz(@5Tuj2uPw!6zFW&IWmdYa*hf0~uk`IN4Yo^Vttdcp8SG~Tdhm)Cc&d9Sxf17kmta`2eO2gT}Qrxnw%}$?2 z)QimLOjc*t>?zQ&x!%di$){6G{1@id6r%^5J(QD|3lFmGYaA6_Tl!qy7l17IsM~GT z1D#b(awYC}KX2@DxDR0I3*`V%`*l+3W24Qq!ZLOr-u3r6-=n50apuO_+Qb?>(B!tm z8Cz3dS>@4er(T&1kC?G|Y9@fTjh>t?w=Xpk+VNkkPa2L9@zUb&HR<8cu6k+I0EXAb zOOIs<(ut)VP?kj7;mqZ#U58~QN)LWm_Owi%Az*-#zY}k;G+4d?0;nYwf zn`SaEhy9BF{O#l!7&LYoUQ;e=Wq_LmYmI@!F^Z{6vz65I|v4>&vk*^4iTz)7o z5u1e*>x5F)=mm9jA6ju4HchpAe?tab>%tUjY7Jj$8f_*iWufY^EKV~s6XDW9sR`h8 zGUG;9p}rj`1x6I0^c2^VR02~@S{BLXe|#B6xaeI6Y{!rNBqYRMz32^;@U^+TEH1fd zecEocY5!PO<@eg|oa?EZ+ZBM*(0N-6PfowOy242z4xy$F1S;{H<_1+1_eN=OJ+30) z-Sb;WP~Tw6W!d#9C}==FLJgDUZ5=qE(8x7Qzp7J+i0P}^yYG3I>UdwnXntxDb-auZ z*I>u!SY!%v$bA9*&W1tH_PHW72=mFegb42%0by$ZcjlhN=>AoFJ?=Z+$6my%1)=F0 z-p_tn zV1Jsum(r6Mge3Quio?Lk2^b@^sc=(CWMJ&+AmU(jDovs{#`tWOA(=z-+wc!6yv7D05E z*Re}HJfYKK$UTQB@i`@wo0K_Vwl!H=tLmx0>bEB)BdLsZ=eOBQmnG+{M+T!9^!DE| z400;uUQa*WJ=xV=CC(NV5)C?9pb2$MvY0r_sXqvpm)~l| zrDwoa0|qn}_T}<#FN?2@xTsTBZetLBezUngdk;e+6HY&l4EHS*U?3T05Xh42Jon)EeT0VHTbiAwqYyGm3>%^J4q6~R4Mj`Vf%YNBIbl)=3miw0w z8Kvmw*q1W>bDPy7Y07(+vWz_zi8VbdC6vuns(ZpCW-x0PyBwcO-6InR{&$9Q()Bp1 z$==_QZ6vqU9lFamOg-O_)Q+{!4PbNRCv?0$X40yvHuUEdYnD~=x;LVILAIwz!F9@>hX+2k}| z)aKCn2{%3NWh^s&b`Y0fhN)M*C8{4WeW_9FAx2TRXNGbBUgg2+6Ej~ z>m5na6t!}r*PmY;XLyRzwpSnAR_f=2q19TDpSvgF$Qr@#O!Cgh5&#Bd!3k`fqm zyyV2ume1;F-AptsSITf(38=#+&j1!#X@}dkW6^3_tdp|;Uyjb=a!w`>p8^g(bfd;- zI(W^zggM83dA_DxqpittV{az=KM4uJ#yQxVvmdQ9blS~k$3DydSgwrSFPJOdEnkE_ zUbq?E>t1gy+pNOS)H(A_t`}OfE>xe6TN_13&S;(s*3Qee&AYs!%0<=N`E32Rp!<+) zG1k{%3q5{dEzU7m0uU8DTtB|IlIe05R2E!uU{WW(eZf{((=nqr@P3U2gbd`EqK$ux zO_;npiqIo`llLs<8pK3T3ccS|@9Jcm*Z5?8A0VQ_X zA=((Q6&zi-t$47C3n@Y@1PYKhj7>W!ui1>|mJ1#{TUEd*`%Q$|U!9K9w_q8JOlkv+ zG8#yF7ngyW5i_zz*dR;HAi(d9ijl|vYBBH11It z3}VU>Kr^zK)jVlHl)uTPQcqsdd*fUbOvI_m(JIe_GszDW7m|5`TI9ky zXoI=S=C~bVHLHoMxx3sm!6dSy9e7D5rPYd@^L5faYw!3JF(5@PYGHAdg-jg=ceXkE2XmlNeB=dNZc7;kNgolwrDrg|&KZHRC{3VIXv)l`AeGGC8Wg>mkJN_bEicee|eZP+Y9I> zrp|-pw#1eYyTYJCUfi&zq1?ff1n~;qtgXD?aO&R#Or$P`vr>{h)$rIuM|KVp<>KOf zbDs$5o{uUt^uJCvCeOm(44>AVEPJAa>z}F6b|r=W-}v{2tA(i$d19$f7L*;v`+BIx zz*Y57#SErUN(Pl2pon}zScNQ|vdjj!aIS_F&lWg_<<%$ogZEGfYQ|$m_R$runeN-G zY$m*Tv1*VcsB?s9SFY|P5U;;US!t$RpD{gIxoU|Q>%BDf5(;tq68?k7X!+*1x4561 z%O&81${KrgS0k39O7_I|G!p#y1o7ADGXoC>w|_R^`4dw2XD_%E=M1KOvJa=ojEmuo z#Hnlcc0!-lN*I#$+WQT6QgrGW0yV#pYQZzfUb_;Z8()#A zUk^`jX}>>6(!SwZ`tM=$%Xz0yp@!E+f>Y&)HUEGsr&!A62brD`vDZOh6!h4qkk97u z8dU~}nr{iYNGDW}{2Uq$J|D7=2$Y>Xj6eMT;`O>uCy6Du*MN~v6N zUk^6E-PCS>xTsq7+aK)f!-FWbx@3(v!a{y z574ET&H<|M4cG9n*cA^tlE$jYyy^;|1Q=Zv{yiAbw4x}?E0Vr})^Kft9}OzV|8l+Q zzo*yg`>)k4j144-M+U|7&tqF~ak{+3(w_rnSKDdPH5|he^o1HYfY9mf(=+i+wnClv zQ9LtcFG+BltSXlG5cLS^pjU^qb(w`o=~_xVJEp8@mr>=g&g5?-7WLYxrR)_-1og>w zuTGc%#()j)F)w%h3D#T=~ z*Hv<@rS@@C?$7<6ei;UHs zm8(DJ3tD7YFn^wx9TwB%{x9XGp;!LE*ym*LGsm5Zw}Z) z^SQ}lNh*F#W8fb;|L`iD4gU*+2DEwR9RsgZk+SKC<16TQ1d@GNc-3f%j%Eb_c=Ysy z&6S#PADcdJWd*%1#3nYxz1`xv+i1V#GrmizJ$vj-aaBvNjec1BeKfPZcG~$uVBQvW z>axtQ?0oncMF1s}`FL8ozp%!^NT9^jfjctXhE1QBSG*t)x2I6JJsi0)crr0BL{c&U zV>>q@&E0h|)W-Z0wecAbWV7!bH4%nPtq|0civv{?WaFC^n252Zr7}C2hJ|zIMa{^Q zNMT^95rvm|cMxYJbr*2o>=>}1ddNWd4B_RG-wQB7bb(Va$&sipq^%8K&7osbvXWIu za2^RQ{aZkuvh)jV#sm(3&Id^web1`?(zu6GLBUd%F0nR72A0aiR%qJn%ihNf3K(G= zfXPcU0XvT%rXVmG_s2KVQ9qP~C%e{#dAz=Sb1wiC3uA0S)y_;of7_BtnK40bjFCJs zY&^wwC5xdKt$!s zUWCs%%YMUHVG#o(89mA*R+6m_hF9h zjS6XbE*WDf8TVy@IuvsnN{_>mXP~KMH{lY|NRqBO!}o&?EcK>vDXNw&$hP9}lV-@= zC^RyR#KcaUr&c%FgHH0p6NDzoQ$EI(CXJ!&+UKcRBiRK<6uu5-`=^D--=J(

S+U z4UM^%NA~a_&>1fMV|^e+z%VCkhNI3+@-!qqO?e$yTBRFR;VP}%G6Je}*JT$&D*_@0 zwRhgP%WeT@1AjHncjQi~AIM zscy$2`WMLP=E&R6Mxw{*XfHZ4b(;Ue2M5K`!)S^D2FXsZU1yAEi}Sc0i?hSkrFzBj zS;#q`o9+vvFiB;*^bYkD*%{~+xL$dwE$1(id=toh}hnJQ! zuMAGskZ><9Lm;eUe9#66EF|U?wX2N%*5|D_Rm7d{c4a2{xdxQUmkarRJ0Bc&kw%y+}<9|xJ3JI2R;+$Uhn=mB3LzW;%<=sTfZ#x zE0Wy9knBmR0s953w$~v~&g|s$=Zhsmu3)gq$>ukS|K^SAnG>w%lT@7%1e6AyGZrFLPR9D@Za7=Q#sbv5cp4$!|9=JTujCqB+;+}2{D z4`EiQZEvR|$OWjp&W73~swrolrob#mlkwR8AX;Ft-0)XZ4H;8C#{iRn!?*Fsp)5_h7 zNQ$7>0POkuFK@COUrctrBeu$Y0y1fGb6Tpsoi!yG%N0Yb$&pZ-kUEM3n$P` zHe6{W2H6>6Nv2&jyS`d{30l}yvJo=|@Dtxfj>SY?nYGN?NAchamgUaRWwx~Fa0JC9 z4r0^ZS}yZf3-tAX)rR^?kO>?jgtTTU)ZkyL5EnAad4YGuQz^oFv)C|*1s)uN>knf0 zCy~{<9vd5@XuF#>s-gfqnmQbq^ETr~3r6;64?74ThZK}>M@Qd2J8ln=Ny<=+PORX9 z4C`j>I&K~N#4dmN@4)HHQZU#wDh-m_cswDLHj3EQEY;uFwQ-Pwoe+Q&%_6^tKR>g1ss*Czu?>X6fU$vUn zE;Y6_zgt4eI6NkbxEMx8E!0nIedednqyLUroG1POhf%42s@SKbIqU2DzIyCtC^$!{ z0(bwrvogOS7ugH_PYL3=o1~UD@}h+4IeaGo4)c&?2mkZk6@MGu^&(>lS~Y;4Z*vj+p6r9%8w)a#7#;$Q zaPLzXY~J}zr>p`#sM9prqA)7^eHU909%b5l1w5MPFpBrG;*RH;q=dPULJj65d49m0 z%pIqy09S*%D1=fLp3>*Rq@L?>lCqMB7%%ptS@T8&+^%pZvU65cXZo^9^;|eW-@rUU zYk`08q)cE6O{wuwUHzL0+@Nm>aqtkAH{rrcy4mqkEF;nrF0a^OUr{7HeZ_7isi5$e zDmt6G1x40sZ=?^P8DKa+5ihh1o6iymzuuf%iB1049L3uS1_V`%ZEwYn-yEpyf)p`n|5* zg@tbE+y#)_3;q@M9KQ$g3)PSMT!-f*Z)J<}W|=^~w4H+XODUmOG$T z^XC$FtLgEg?Iuwh_ui`IN{D!`NuXCAErZ)sY50LFJ~bEJLda;YM}Qb$8t1|Iv=xr$ zf&)^Ii74e|YQ1W0S*=G5>`|I=tJsX@p%d3APePlqSV8~xTQt6`J8_7N|AvAL-Uf6t zlUQ76*t}}H3%J><)x{uLLp}XBjc#a~N0pXNkwx>Qc*8B)eGhBH+<&)BZ;L#UeD%Rz z|Gr0C;`XSYT-^P&e5-S=aRHfF52GH!L>=;qwVCd2epmn2@2^B7FFC@8yIOSN>CDZW zo@vtk&-ZIm%c77sx~)1-ad3B-Nj`?>N4NIptaA5p)F-H; zj(FwFl*rza?An5%f3YEjiGXDZC7SBTed_g$R;<0jXM@hidOtG9$&Af-uk1KNbi4u~ z`I_H@0F0Be`S8%;S0^qewsKUVHGH*qq`G=NbG{7tS99o={{2pAbH3mPa(J2Jb4)DM zKb1Q;e-3~^d}&x6hum3}#m0|r#u7q8*C-of&l41dabS)+u)R>Ryn5czaq+G9;o;xB zRgTxCr=JbwttA-pqfh?hg5JI@%a?hMC3hO=f0Y>x;{X0$wV%mB3IRj1;<(t=65jA& zU~PDXML?x%<_0kn=|NRZCXT-!Iv#v%STS*gzhtz_02ga?~ zGUU7E2rr>|}7#++wAUo5j2`707J`OrFS_)#Ju~LuND+elC11ui!Hc|uv4<8I-KSzJz6Ccb-IG$24sdX$+# z+D}PGC*I_;KSR&ww&GXSbUxDU)qnUIPS#s)WL?p(uC0kGZ>BWccnMNtD!Qmc{6VS) znlr)#@EK08gPo3I>9S~PXbzU$IW^YP7glzGiFo&gr&wB5Eyu7|emz2a@;Q3xNtNND zrMjhV$dZd;@#1Fd`1t^i&HMM#jA}(h1aUADpj!lJVLcQVMGK-n*Q3u$ctZFFTtnGA z*VosZA9EyHd~Pr4)8YrX3?FP^kfQ!qjPr@S&EXe{ zCEm$!y$EcO@U(^2_VAyp2jlkDLtmR8ItE_rya=ts^^LQ$$?nMa6{z-9>x#|{N;Az1 z5|Sr8jPGm&bIRICUz6E$hWtKA1;6kwQhfE6`f+N#m zqj-&|JVlJW_tGOnCacNy#bsqR;4J;p$d*JnJXz4Nc!G~!VOo!Z zj6!#&4-e*GK){gBhGAkd1P&m>_z%yr!iCz9*Bow9ahL(j=aKL9SU-3$G`?meU5c36 zV`>4^1tjWw1=5{i@gf7fSzmE2A@C`v0>%o8clbR_9(#mDYKuw^_kah)>r9TC5&jA8 zVwA3Uw`T-%FnOPiemalSoc9D`!Ky7*A;7uH+jmZpp{!Nsp{o7W^0boM{ z1h&tvz36C|=?uqlb1eyx!Z0b=#2?$Y`SYzSP&@I*oX0!Ww#(q72jZfsQsI~O=36uz zDk`Q?9E+mk$_4YzTN)yiL>+loZB7+4FL4XcjPgvk{b}WOOw@QW1L8;^j8^wBn^&z5 zByc7ZsH8dQ-$eIr&isZ|f$tQVcSdn56MT1cmPqPbD8g8U%m^sqfijcDtx2&+d!EH(Iqhg}^q1WfyE3jP(1^Ng?_g!#?^fbL z&>rWyi;&CUcCkuwb^p5t9_tsaaQ3W9BwS7pA0j*cR2H}U{ILG;u6fyXxHY^xm7uZ42#v*hIsMZ8ru^Ei{i^nd`&Cbo zpudY{7MFKBwOp+&>7(PTUP{sygm7F9(`z^qV9HVesyQn{&y?q zuKkWZ3WxhnUH(t#n>9$Nc;Q&F>D!k*t{96?K<0ifzhfuayLYP4v+ceQDZjNbK;niz z8`P`+TA%ruH5$w?1Z(wfdHS>sL>G`Tm2#N){d@;-UXPVeeIGww13*c_L{&?hX4|wz zcHWAM8|Suml;UFV;GjY*FV#TJ((;0}tMYPv;vfykwZ@1MiC1=g@UH~}WXJ_{! z5zXnNR%i;;siw>pQotSX5gD7omS?S!;QfBY= z=FYi(L;KkvYYx%)2f9ziG%G(HHh!1o7+1~iA^aR1a^EKl3vo3Ipvnj-PU|Ly1q20a zJ8?)DKv;^{g-mbAplrYa(jPL+ix$c4t$rIVNDfse8}$~-8BI9z$Tmw;C_Jxjs9j#< zZe47!m-zfz`g`yTy-yiIVEo~tue9j1Pzv<3V-fU2Si4JR5zj>HZEVtC;2NoGN|NYy z=+2+V`(B#hAagiqp2p5IBI3o%^$+yH-?%yQosK^G03$@6G+H6_jl){za~ve8_RZ^x zi!Q;CbH*o-q>xXoHoBxFv#&^iT_alLZOc_09wlNltg%lr9`PBePV5}TLqQJGV9#{X zwM?bLl83k>oStc+vrq)labOb*CV>c&uOH%|mN6vp2~R*|X;)c{(eeRWN=kqUN8l^8 zlX0GmfFAS-A{!P_2w!+-1F_h|2IWIT)+iLPP1F;==tYcq;p!;cD4pV`IF*Jbw(7!jE-u+F%py zUP*Yxd%R8FzmD;kuoCY#K_tuE%rKO!Lq9H1_lH>~`<_ zHxd_-sj?MVkc;2rL131hUFRkvqfZqUvI=IcJL!FW`<9W#O40=}9J##TP!1PV%3ApVafi?&<>$^>;rvI1Q(Dk)8 zr$nRswHJ@bi?m3dp)6w-Mltxao4fd&$%N^)5N&Py{br}z!>xpo-?&F{mHm=ID7T;`i_7YPD7DoGIwv zL-VS3f$!GP8O07=R;HN8Y0u@~-iw1%t@--HVZX-|qwDE*thSqp%UFpROjSkR9f(a> zI}yR1_OJEvVS$Hylamc6-@ogV)3P9td+k+HMtjrA;F_S{nc%8~Cl_j1JSHF9>R8?G zwL5Y5QOg`V5IT_JKp(VIW0Yk+$RmYWf%oIJdBIAw;qq}Y_OPZ%$j?Th(yN3O+NH+F z383~mNfgrk@t6g7K4}V|nHHB#e9|y{W8}w8Jr0Mm*M9jO z>00{!y%+2@wRW4{S~j#oO*(!8P}wrfq3`)b35xH%fBh2(qEAPrz#-H1h)68W%_hL4 zKwGX_fK1%l@VYueg<1sI#n~li(rY=OO?bYOY z%IXK#M>RHjICy&rXC6IU9E{VYKSDDm+1V1roRq^7~61Oz4+2$-apga8aYRR>XF ze&l2%XI+5;8jF-MQJTm3H_-gMJ+R=gzUKr)?a1kbx+#Ewk{Sv&)nV+#n#ifJ8t;_F z*>7lik2Z+5Q+rBKh!TK3Q@5hZyApRsCBvV-S>+d-8CeoGPG;t|5^mQK2nFo%H|7Ev zRQ5#Qu?>@lvp1uorP8i4odn8-WHHJ7yAoA~&>fi_iOA0k3wS)b3_$~F-hYu8jMbAJ zAoj=+xC^LLNLd6mA_-JWs*5W(k$i&O^4DJ7TS2#sGD<|0E=ysiRH)lU(|79 zd8DBhynkuao(F<4pK$ZZ;V}?wQznclKrCJfQ{)JIBsffBA{tP7S04Vo_9aL$SLn;1 zo)Mdo08*y6Sb`?JN%##~1)|@>kS$jXNT#MrLblW=o;m32i2Xda0D-XYOt`ZQYKC%Q z?-+58rCPAt4ztByOy$uW0>EL+AMo-Wt%<4;yBWHA$`;`!|fJ;?f!O! z!+zJUe*vy^sCt_3DdJu>_U8s-;e-9D-z@3OW{olhlqY+WI72d>KX~mJ1cCHeArpj< zU>7@-ZS9|upp@zvneFdT8z3+%a8_@tOkbRr$2rg zM=aKBiM`8wZF87tB^g%P{)w*T%E-a%;_N;prr-Oa<9`2f-!E%)R>JFSI&v%cOGr6+ zv)SB&%V8)Zrr%>O>e6eLea67}Z^r6BS|8^JJ%=qQp|q%RMcaKS-NV@ST1r)vM6tAy zC16j!*4ZM1rk>Y;yPg|KWQ&U5UA8*1yH!c7?q9@=oDRwrufA%%+AFJi*d#Q*i<3B@ zYH4be`2K0`d35xdg&Dt|P0|DrQZ6>kWhI0E(0q5r`#$Sz)z8PKwbC=6GEmsY-tS-k z`JXKP2D{}N?Rt&JjAFVrpYs*mZWQuVBxmIz5kRy*{xg1{>+Aj5Uza|PF57Z!JP61h zwM_h#ot<5eE(WS@&H5=1yAt=s&D1Fvpbg{BFgm(j2}~*p-_8E@kZea~OprV#$jE!W z)=i@Av9t9mHa2FV%en$V6kV$qM!Dhb^ErxYY3}&KGyeBy)TuDWKl$WHb`bKrg>TPSYxQlJA|yfQE=PCA zU`YSc+*9jO$4;w`_xt$IZL%7xhw65qg|(x<${Mb!?zkQYC9VY@w^v&pE*BG~U{mz+ zrBc(>9H0`1Y{8=X>Z&hPh67gpYL(C4P%aQ&}6i@cCWT@Xf?u3TIsP zFM?wFn?Uh`p?P&m@n3@#^=v3QZ@%#oU}KsCR0lGse0UI8IN+)+@Y|CuVG8IcU`a{q zDLo~dVxgO2@QvTU<;?K4E+p*-E0dhXex61)13!;H$PVytN?Y^4^eESMS*BnN`d0X3 zzw$NX@81GYJga7<5@vn~^KOnX);_16nW2xjqpx*&eMHv$Ww=CclNPM=Lx};lG&jDq zeD(`IJSrq9Ck6?L8%VP2OIW;+hbkrpz5GR8w5a z=S0ou^$WSxf5>;i^XXja8)_!%>|>&o=QWLz)xnEmLm7q2;^pSjVG*vMLd$-IfXg{K z`0zpUq+qktq8CDiNx)#12}cX+dHjk16SG;MPtwdAR*+JX{f97xHf4*MwjQCcxy)zm zT`2(p2u|JtLO`VB&1kEUYQU7<8)f*AEUqOjQO)RjcIo%Z{awILt#s=9?qziHhg8FI z*@g%!@mtfRLr(I@&uT@;p(dS~;f3ty>V9*RXT~(inG28! z7j3)J5VIO_5F^fcxM}=WLeL>J;yX~|OpW_DE+gaDK_zi+@1=LhTKiV~r+|25c4)+V zz1fSQjX$P5N6;vTH;X^TfAajX0h!>now_~r?KT_%kKD1Y<>xYO*UGVfIbc3Ps6q!3dhY7%%YPXQ>PT-Qu46gm?`6GW2~a_1|%th7(rld zPyO!oFILn*Rqrhuqhkyl`VG4$am-5h(!uU6Lz|QLL-Fy^?|%!5x6c|%D)+Be{oMZZ zJ|5z-dx{gdd}p#wPAK+vxF%d|)cZdZbouq9SEQ<{(fxH*RR4xWjD6en_C~{sx1Y23 zMb5i-4%w66mwr{XwA8qsw!uk)pD|74g3G4&ZriV|td7NQPRFA6XRAV6N5NfmuY8XI zS=804uSm1=?!v>_XAR4eqo#j#+Fs(^`zQ&K%(jz?h7Cv8i*3#LH=4=gU1Nv6hX569 z{%6yppivVQ;ex^JZQz4tjzx_jI?TV7FF?r=4$DEqO1ir^yeU@N=8TbC_XiS6kP>Je zqKv}M&8^|z>4ovbmND&N6v*Ucbdc_Tg6_Ud;^BD3@A3X7aq#$V-EY;m+WTT!n-CfH z({HOV2f(?}65U+)VTr)vAr=e` zaWL~+3?tw+Y;CD+;0suGH2e{&Nd}b|t1@alxi#^P;h;w*E*f(xv$? ztYm^-ze3Bab(zA*GhV1%tE_ClzH)!#=KRke_t6>-#h=x{H2jmxN2RJVI>FKyJN7m%n z+s4iXaO+_&ANF9cs*bBxalnyqXZ`7S~{5j3HXo;TBqV7Ji6;KT9oN+Jv z?US2Cr~v8ZRPQ0u{6prSRjbQf+l8kt?cW@G3_z3WYK6F|b%VhC#YR;Pe4M4Fq#jgh zkhd6cdO+wk%U&o?0W2x7Z3u)5r|Y({_8a>8~QS&5kZJ%7RLRL&`7-eOvnMnzk3 z_0WgxBt0o1{X9vhVFB{z{JuUzA6jixbyn@->+4HSpVHko9f6MpeqE&Jk0a#B0}C5q zWbC0-R%)C={Dpptfb^wW`5d>#s>80Q`otWWpvSL4uml<*=@@+T%F`M)xaoE}u&1B- z>cpA&5*~r6ps*+-B|Ds&_tZZwRToT@A|^E=lK3Z*5sXa(dhY!ga01S+tO^Wa;M5m= zsYqMO@@F_CJgAor7GfkGX;riP;!l`6NqZ@ykpTlhO~Pi3JI{ieO&*vgA>tWKX`k>B znn1RC@DudF%$90kAF^!hc-68XNOyLv3c6>|>Y{qT$@}W)@e;sZ^U*RNwS;+*nW^l;tCnE`tux^YH`(G@GhLRWw?qjL_~M3dag zXtkYqyGk)=_}Dc}J#LU|=l0(c4-kso8a7qa_?87Pa)W?64_qd~$41FzcH zZ%1e#IyO%}R2c_oFkq8@4Kp>*4=Ms0Z9zOCB-q?1ACkC&u%jfxzP*RxulF};q<0LD zSy53CWTBhtEq%m^*F}m`j#gT4CGYqBE|>a_F2q1Pok@xw7t4aH#vJ5jyS&3SXV1ot zSKA*Eo~OWBdYjAG^AI&_bhqSpQhXuDvQV9wU1fE>V(GS>sMRInC~$#WMn?T4cj`vy zzkj=Le=NK@vPe0uNWK0o^^Wqq^ld@{g7rP#S3G}wi>N#IDMg5~v=hICqu<|G|79He z?gkI1a2@OV{$1<5pT8G0Zs+6Zs=6Dh67|}Li^OLz6d}{I05;mXnHzc%x6JSM7WjEA z+-$Yl%vPp#XX{BG6njpIjIE#ap7(|j6BCd9dV6r1==8SGIe(|ZczmJiaXlyVuK5}0 z+vAyt@wHd`U8m7>`jj<9})3Nt{D}`U1+u63V-ugmFx=3PLe;n;N3tfxL?K#&S}zr=?wLRR<1y6p$BP4zfgZW-q$K3q@3zwZ zjH`EB6N22j1pNS``u&BUM}u{`{o~!$(Y)7kaX%2hqNDITeaJbFP@bQlxXjtTy6wMD zJFu}Aow0q&5W;<@&Lj6T}mJ_`^ zy4O=E2`OHCJM@3-+tF6k786H70vVYC6hqmN@K*vE9z&thgb12tw3K7mpk&@w07nQD z_|G%pCj@I1#N%-$m)+mr*ADi29ko5P&5{K$oaxy<9$H&0)U-GO#B`>%izk4EcUZ*L-LlQss0>nyclv`~u5 zJE0&@nO2!`)BsCT28w60uKfybSnY#rb5R0I9SSb)J z3S>NEUxxx-=U3rsa2fVxv6xYWY?VXtSP}u7R=5Wq6kDpRH1mmv zow7A4lk@=?ZmOJSQ8{E3#Y2z-C!LW0jiFC}@E@xJjW&Bq<>B-A;bX$U$6Mf2aaYLo zNb1~906H|`iJru5GaR4!fTE1;A5Wif;%cv%UKJ7&^7yY&Buj<$Cmgob$kXRe>74Z@ z0Z7UjSb0t*c5gZA9+c?g&er!;Wxh)M@_|8`2MCQn898kmCWS$isi420aWmmekmRuh za6ueA!VoI`Yo&)VCD@4=l<5?o7G^mqE3Xw;fGqua(8S4uB?%(|t%bsjRo{n5T#^xv zaLh7sW=b$OXW&=7*Zp&*Dm&JAY~MPgAN8=Aahzkc7a;}smJa?((&W)v-TpE*j#%vF;;Ce`oSgAI&3UrX>5*# zKCzzwvgf?5FY5R8j7ek|PDWEH(I7bC87(|dz!g?Pj-7FkC@O?L$UpLGfAn;{eYm{5 z{CC}IYxcLR@fblobB`u|_RJ4Wy}IIe#Qal}U#9tAo$Hv`lxljB!W*~St4F0Jztb+1 zK06<0$nwV%KI7}~H<9DOQL|Q1`+{us?INH0e*VAB!H?++Cr3LU>k@UHyxmU*sZ|ed z{-jxDQ$53#9jqC;z4xu3RmCdN2TJF+k9L{&U47uR)fS&cTp=1_R#}$47&Za|_u8VMFWbMXozERaxypFB8vkg$~j1E>efs-V`7eeybvYr3r>96{o z-P_;oKUN1{3okA|23I|dwqFY>tKQ?i``cgtNQd+z&b=xet!lXxY{&FF!?k+()h~7a zABGOzzhNv45FDEnL=@K{;_iu$_3)(n-#VbSnx%VWB|@LMKCIqV`QAs{`wUxAH?l`q zpN+2mQ=TOUu+67dR@MJ512v*6TfE~CN71gX+mRwtox6KcVMKqtd95jaAQoGt21oi? z&n`S$-;G%(X^Ba^Br>7D>0UYU{9(ZPj68$vi3qpwn|d5OaeIkOkK;3r#N;e{{@-e! zcQS3UKqHIO=YLxM{@L02*Zp=tuqxB1PA{+=;d3^hQMEK@R5dG3{Z6^I)Jk7pA9qoI ztkJx6##56?HSH)xJI8DL#Os(-wYy?JN3gHFrf}}~aX0JPk+`>azVJ`Mte~JFvfiy% zLX;hWxNOXW-oK7D#U@=9wRQrMIPDPRy_er+xsH|T`YE+_1+TKeH3J*+H?u}$70eneAW16jqD?}P`MChg;aX0wFQ zLNS^bOx9RwW2RKZOWm>)fb++IK>F_w#H@_^sMUvO!?qgdSmzNBumV%HC|y$ZMLkc? z&m>-be&~Soqz|a_P-84ZG;*e|sAG$5u0v0kNY2#?82*&ZgK~C%r zce65}At#6f#T!RjP<_dO^Rgw+5DwvKh7E?!?Susl;p1b%N9yo_25$f{i5abq!h9k4 zc|M*}0X!Mg+*XJuPpyC(7$IqPHli*DK7G4P|B)OM$DymJm7ye`)PO@Hy@equFBQfa z1X?qj(yJQi2S42kAp7zrh)6(zAz^JTpY4VL`*S|LuTVwt)$<@qKC%fHf7v(F*o4Ao zG;`P4{1DS4A6R%Wz0{y*R9tu_sRu}Q%VND%a7Uf~R)yUHVJFSt@QUFxoaQYz#&be* ze=!VR*eR=DQa(?@BhhrZ;<=GuXCOAIZ34SR`l8j zpVG%7$DGDni)xpmU+QuOzEJ_kOA7&z6?N<=YGxX>q!I<3JRH*SH>vLtPMa3-8Ds+= z4gRpje`n#W`s||L$O>8!m6@SP=7c-rz;J#kgR3e%?_i7i187U!dMr^qj-Q4dzkDbDq(kk& zIB=e5-Fdaa|1d%O85;yT_k8m9r_CCsXMTR(^5);VJ!)7=Da5hnmvq2==EhlRX=zGD z?8#LK>nt4|os1c>qWs^$h;30x4dtI0Zq(v* zK@`c(Y78h6q*KMk&29u9q`%e(1ZwXc<&})$Lg)Mxu~@l^vB=ZuuUw>zX&#(y31$a``X)_UNxh+e`Ma0O@6$;Ee?n z!CwKe^UbW1?5BmnA|4F#|IIoFAeL<01$@ap;9@$C?1xKjGQSqD++n;+Pra`C^2Fid zzi$q^>*qq7T0f}|$c`Q`0C+D^^S$?;kuh_17B`vQH*(#;(z5lA3Am8r@lxiei!TZT zcHAcZ1J)Oc;q#r+<{s$tH zzdcj_JjPYKa5aALf>|IkLyj1F`#`OmM_HKG)dhN=vUzxa5_Uf!_euLkye>t&rIE+n zqV|?YcG|zi;-SZk96*Af^rb^3DkC8beGs0}It4iz9=P zJvN_fzQ>9ClK{K6_t0Td=&7uUy zaJy^F7P@tMfLg3r7PCMFobAqL-UHm5r!Il}CliLR0O^e?dUJC7SB@B*S{_vJ)+69F zKsYn_Ww7Dp`QR3b?u8#qLgK}TuB+AHp^8`%CQSM>$5|ymg0pqIoKMPYNOa!Ijh>mXij;i(6zn z$C?vW$vy5qKE43~(sIvBLZBTyBSC`V=;radIk(?w$d)-_OmIpKxK|@>w%y6+wJxE7 zr%@$u47Kon{@Qp&#^Y3UDMpWEusr0hr`4%u=pfVSMfy<3Ak)h&tInh2Hxl&QJ?>(q z;Sw@3ciVRt0QlEjl2t`qU!*(d zls@CsQa_yf>9?e20maKqK@~mHNG+dmax>yM=P_@epiq%_U#;r>oIV(TSqm&L z@F+Q>Z1f&;{+Ws@#kO(g>F2;(=m#0fr3gZ-qrlZmZ!NQ>MH6HK5J)87h>!_af2DfR zo-ik8+4p`Hv$tsKz5k?AVhnxgaaemB9UW(T8%4=JeN8mkqD|Y`g*52$so6Xg6U-g` zNDcxgQa__38<*_O|F7;3&^X@l8L3nE!Z9OIL!IDpzE-_u{T!MCl85Iy3QB71DF(-F zhcFwgw_6aXG`Bzt5bY{<LAom-kqFQ^0XEoBL;l-n@eYWY*3OZTmz|tVppi;SfLTe2 zjp=zRZX)(18Mabt$Z75q0mC_(em3Cx%KvB}l~iq{WDgaRk#M)<#%8GAr1~ct$6TKT2RbGmZm}L!cBDr}ljmn^VSkT|Oj%L8xA= z(TAMMQ%rl+7Z*e&ex%ubecNQcfcIYW3(@PB7eN>3mtN8#C-$0Fv|URzo(Iz19o^1PnXL}9gBxLtg{!55<(+kfhu#;FsMPaV9Nxon>Y z{k2X-Y4{34+@SJ@@3>_^Z?f}Y{x?;&tx})5&t}cf{pCjI#&&%Ur#miZ^8yR`8+`!F znN#TgYoHtJqWeNMu_G~fw+Dk^BpzA%@s&E{?xO3%awlrvPfd^0OdCHo4*c50&-XIlh9i7X@d^l1n*PgDFhbm-7j^Fox#w z^!;xyMVLQs4xW#Bg}hmFs$aVPG|>Q|gJN!f%cZ7t936b&8PVFL9l)H=D2Q^r^cGz- zm3!uSw=jXZ44zPVA9<1;dIA`LE)5Jc=68;_x1F!HSM3^{UAvJ;WD(!~;o(Ey!^y*+ z-NcDl;@%WHYF*V!7V*k%5LoV8VmHBmU=V@)gCB0O+%4dqeSfR)DDighv0_+=v_mQZ zNY7B*Yx80EHcVO;uypZv!2mr{o*Zhy*!xCeX?orjxlm+$aF)0uV8!1mR(<3vRUi}a z)nwNlXf`%}onRAT=IP;a)g&}EznRPck{EaZ2jfgw`wbUaYZ%qKbum;%&{=P z$7u1plhe162MAUFsEd;;X&#P=6QwcW;i_Z2*aC7vSxsfjodDgF=TcUIHy6w2UZK+T zPs72anVH6gy2iXOJTZQ1-3Q9=HfvpEBMaO zarD|*t(J0&T+WzdD^usg_T)2(vtND_vL`>clg{L*e>G?}7PvtaqA8PSq)~2(=4tG!=j(t^ zIrMnPV`(uMf93Kr;ArR7;T@B8J}|3|ydJJ`8e8wdXVR`PT$FJ0*l4yxiqgA#dOmGw z0WhU1_-oP=trz~+9WO7@txVG-?e=OPr)-^^DrPP>=?>59R?kPott9e*3N(_zgd{Fw zGS;+_$8dV7t-i>i`N_#tUrM{O&#>a5$pC2#y4Mfmc@^Vs02>zw0y(MTy&)3T}0E2mRoY11|h=184|x84$q)8QwM^{Pro}lGdsn9n1-ifxVBue zmqwB7`0M>6x>U(!jL&ak8`&Ig(0M6foa$L$O2Y8fuOh zzgK2i;c=X7Tt6h)JC+Dg(lX{hu7=>+U&T7b2myop)|crBIsj44<;NMVxaqN|@>5YE zJNIK6%O|_t5)t$p3r!Zi3_&!pq1I%Bf|3pcDUHpRg$N!zZHfkbUw;3v!>`6F@ zWbmaF1^g<*r$+L$Am$#D)6$3PHf~U5R9p^CDRMS?5;FK97{&Nxz((NL_PziRT6C7R zHZlh0e|W~I7LMQx=I}86MT_$UOr;!cK#gE7J>&%8YuW$7qM>gs%S(frsw#;OG2V^6fZ}_ZMhf{q+ zE&8!wTb+`y&_L!re6O^|BQdZrFZl-$1)2A=k5tWIbveB2#g=a{vg7>{ve)@A?rVE> z{H8;WcCmcGzjv6w+po0i59JN;YW$GW8@Yo??4W{-QcWE&@M6DUc_`hkWCS6K*LuYo8 z+H=W+@*VD&w$QV-z}`8rmNmrbsm@ZB3MWJsdEhE>?kbzvldiZlN^iptjEZv5IK2wEHmXx0}pUfWjO+cMgC_pU!RV zowXC_?1h}pCj*ZQ^I1RMo83*#R%R;Tsd*hjqnP8b;Ko~{kks%SIUBSzUv1ScAKQk@siacO-LsKW{Nht*B2ph#h>Y|Dez+1Nq<2zxw;gdRp`TP-p;Mf+NslSglA1RORQL_0rGdrsll z&T&Hi$6284)BagdUnFnsY31}-NEa%sbZ)O$Ny$RT3EKwAty}t1cO(#!^+bu}DN z>O#v>Rdgn2WzEAVv!DZ3u%XfvI`;M3k_*{mI6LFN>(?c2WH7G0OTH_(E4c6s0|J5= zSdj(J7TR@g)oj>v4i5M?b;ojI0}Vny z0tF6oMH^y(nJN94LouULl=hiXA*;R0PLB=LpQ`a7Qk5w*;Bbvyx6r5{U-nb0>VJpt zl(ZpvQ!}f(zo$^KobZ`anqU$vkv)yv$h$~P<-#J+d+PaN5Q-D=-f1J_Vqj#V+vf|6 zlP<|~x$Cay`dKB%_?mKVC&hB`M&|3T<4<$#Q&*A773qAj8nm48?R6dEl4b~#hJ&O> zJA%&*u!Ty`Nr1~tT?(I-P;f+uG>y#eX|cxwmm)#2PFAL8gTp2k@k!?rAfZ!0vQ zFAnzP5Iv0{Wp=nx(s*F;yTTr+AZ(VL=O3MjNROrQ-_PQ)8YtCy?7d~^tauUNM)&s_ z>xh~bb*CVDkSD0%V*yP4IdfW8n!~V1y`GI|uwy!P^w|eO2i7<0cq&@?)bA(#*x-ir zSj>V#bj2OzgeG=r(>9GizKF-LiG#kxkVDtYph=Io+Uxf0Ymix1Sl_h9;?(?!wW%^S zIQ`0A**)D;=;oJDxO^Zxx9(|im=Am*hHf75=)O2Q2+F=FM<^Ma6j#fCeheZEim=lw zKCt=n(q4*VS3#FpLpR8fqn|*m*tt5~Mpy~2>P0vnk$g|7g^97FgNY-i6O$i&aO4!+ z^tU-kLYY~i8h+KR{r4A%+dwY#7IwQ_Pe_60!nZ-mvBzbM(i!6M*~TeDNEMyQoh!?x zGidv`HRFb!hwFVxzyb`5k_B>cj2<2y`xd5oR8ya|Oj!hTq#g~ck6#V35=Ah7DgUk+ z6U98)A8;x}P-{F&s`F_eLFhX-wtT#j8$x7>D-laC0nIVqX2{5CI{iKan-*#pxm_u) z7S7kHsPbvjio5jLc-hY=gWa+zh~fTZAm2A?Lo8N#cjAjeQ=7OL{aEIYQd=2Z)X)!U z8n4MoL2O(3KR)pn_+meDQ)7;9Qc-!cb(RS7dqRuV{Af51dyV)%oVX~xGzf2s0*(jS~;*wX@)KMa+r$^Ol9a6?nNe#8u{{&d_L-|}=o@G24iG2k3%OG3o>DX6-}gSntbW=TH+r$9v{>6Liq*% z8y-lJy>>o53U1-pjz#26>3m(T|;GJ@@}QNy1fOZ$T?{;fUKMaCcTAT_A=w; z8Xd~`h1OEJpzBiphw0Z=m0ONK>yeNVf3doz#eGpe*x!fiL3EBlBzNA}*_aA>cK!=M z#9ANp@4h%z<1Eg2{K&jM`s~`EwWE5x@z#dlmov|v-}P!2?HbVOeb_-_vYc1ua(%z* z#l*-+`O6Inp@Pumjal8QoFt25E^q0)ysW#QdOumA5BZlqb}v`7c==$c5;CiKEca^q zl90Xc5r|_KOWyQ$O85RY1n*mzk={5QS`7ld@G(*hG)>YutF3FN;>e2{`ShGzbHdnDRKCAiA>r-d396-U|e_3FH zcy5TMHdgVMVgOZEKi2jO`l8B}n%sEui=9v^4r(st#>-`DZEd-Wo+y&rQWJ1L-$l3> zXg*pLo!s(TZa5h*xKm7H;QxmG*7K(5#+GFYvZmq%A%bpxZSKluAM=~j!t zJ0sZ;9l@CQQG>SCl;>Lc{{H4rF05hk@M6bv1Lf}>0Pw`${$Py~F%^)IX}dXYGuX{A zwxzyL^6-zSEG0h^8GhXx>T{K>>5}_2eaI{9Uq|FcksE@JCt&Zam4CS%)!1m@1EJXX zanlqM5)cp+vSh7@^(_a`nsYa!-Vfw6Qwk@<<&Obqe+4%Ln8$WiF{`d*R8OT?q$J0Q zgF7W{5s=I!LNf%96Zc1z=*i=ynJ}qzy|Az@CV215(5R=M0SGoDZT%hCRRx#%; zmR>MsQuCaemA&m}wE-75P}kEv6lc_ytr|)d=ig{h1)=tdVt{(> z^p&ZGnZ6)5Wj>u7ud)t7Wm14(oaJX&la_;e*T^`zw^GrZ71WA8K8XV#UT zNf;Xz^;Z5#uKwyfI*u_9QS)_?H|R*_OxQTHaleozm&bR>8!UkYR>LtA;=Q_CGNMd4 zL*YOn_WOuCiypJxMjS44xc_*>*|_9pt)PziE{ZBjU(vuRSvBf0!TU21dA=~FC`Fyj zm4+2wZnV(}262dxxykU;KvjehLC9la5EcleuB2duJU1Y)olXKrJYNNk>l+n5_H2H( z@R&twH{C;PG3? z(N83!^hW8V^Y?tPVi0^dO<$j)sz!vW-%N*~;p2ab&HieHulrpQoI!`rpU^-P?S>^) z6-nPqC5{8uDkGiuujt4N4N|zUq18VE{=|zy+?b;b9y@nlK(L-OYAX=v&jj{LLEH*> zaiI`17Mv%jDLmd+WpR4O6jXj*#LsdA1WaHm&B*+dVaM?{J&|K+_HFPt0$8SJYD@$egCQZcwvX> zR=;9W5BF<4D>w4S4 zM|}(b@1Ox|fRfvz^MT~UTF~v^`KI|H^)Lxjwrhcxm+Rt;VL<7Xi&#+O#os~p!-Io^ zcEiq~GuI~)^D&+siwDTVPN1TFy-bS=;rYz-=v(T{S9NDAzUr$0y=v2jb= zd!1*65>M@A;aF;x|=e#m7&iYnlr}B?n5Zr2h4IJ3BYGF^)#liR;!! zXRTA!(B7)%3|#d-EDzEbSzZ=V&=g(G=nltC_=xg=zJX#Mj^%Dg(r2U-N^E+ntL-xV z#@_|~3xL6d)q5LxraG(V_H5jZjpGNZs})ayLi^SFd*+0aP41B%*T`1a0e7X{RSUQB zGas3!^3xGf8sDq0YUM^6iQTc|nfrW1XdOyBIUa|0WUZA5RxkDH9NTgePrZHSU-u3V z&C!B6eTqCbgD?~p8XM#K#WxiT6xxL2>M>JDOcFFTwH0O*zhIjwhSnZOMKlbT_$cW% z4LFNbZ$7UgrZ(XbV(#R!tQtEjNRDagTm6!8g=poo?hZWZT}5(`CkZks12qV~Z+|XY z^$}^l{Mfi}4%wT9wtljJ{MBCAe;eX)^S{yTa>GTisPFmY_F3U&`sY?7#>;dRDqae> zk6gexBHHnUNrEJ@p};>nj^Dw%ehKCb+)e-kDD7TfTg!HKX3~1oI$JM*PK)^;nZnJ> z0$6=V!U=Fd;a+#={@uqThqnv$cMJbB_GVI%FsU@HjE}vn?>gB$KYz;+DLs)m)K9(k z=ZzSx)$|y0{EWkehoAqyK?$JD*(JC9G3n&{Lai4_Q)h%VaiN<>dgaW21%ZV@uRk&C z&B}{WC=w)<4?3|I{p}99`WY=kfNiPo%3-^bG>5}r2N&I1(nCw)TLzFYKnyKJTJlg< z^&P(#mX?e}1kmGD{~^vo0BVt7xh*xLBKq3hWz>4ypOiS@w>@`KL-xS>z`?4^ySrg3 zPZQzW$&^rE5i8>I`49;}1sfM+!+QS0uGw637r=XbhyrJ+Z|NN;7`VdLn*FAgSi0L= z7WU4{sx|@v%!_1mY;qzB*lY&nGqYA>lOnxssSRErkSLwI?QH_CC;!p`%YSr6+Sd#) ztMkY5stUVz@9wS+pc9Ggiq8BkqEM z4D9!r~3kEyQ>YpgvkJ38o30D;my)x1;`|-gHl1ZpA z26%OtAM?czk&-HZu6|6!E_pE?a43v(Dm$5P08xi0EVTiJpSZKMuX;#Nkqx>hYr%5^ z*U4v^ffJ6QISjdq9r6O`TE2YPsP`5Mi3h5cS21(W7eu?*B!$~zg8PI>GJ+EpUTWmy zZ#2W6Iy<8>F|-~U@-a9TXg25q9kkY7$%mtPhC%`h6y9Pi_)JJ%$wN~~37iCerB>y; z&M_nQ8YlJShl?e3Y|_Lr5hhBBv;e*f7UfzIaPUx2h9@Wp?BctG6BJoN5tZg)i7)(Jm|2=vX} z`@#Hhyf7GT+6r?!jAl%ls&3{p`+8t^fB#*Ym)PF$W%uI4`0A>yZ}Dp4yb(!Fwbb4G zOGJri!9)q0SfCk&D>(n^S5~~{+Xdg;SQMoEr}TuS7<}BJuQZ(7$LS;V3zuQjn-dNA zyt?4XR@3K2N2As{Q7xG{C-3*GkwfuIW`Q*=?&Z5Ap?lV{!RI{J17odTodpag!0d&% zeuBN;-u)D0!~zPqcT$dgD6);eo^YnSG^@kIx=i7U9>fNP?xinVK_C!a&%4%#!3R&z zl-)C_F?tqz?F&rk%|27eZO5``dFxN9moNYG(;?^qYOsCSE33KO-zdhd7m6!V;-hye zF04)6W6^SMKcG&!LWxlL$;m) zTRZn&U_v-wP^ej>N-3R>WMQlh`POsOCAZMHX{j~}iS{*U34iN`w4CkEmUU?;t=BI5BB(zwTiIiXqmhQuyCp z`y2*#FK5f@Oc^D7gJ^ZNCV+9lDrC?PQxQ|GI+DEeEVUv0B62R7^0M7%O9ya#ZIRhgWvD zw9&_8x7N?@oe5-Q7&tru<=o!laZ)8JTQ+kWuB{>xR5oObQUY-8(@5wa7@#%>o841v{3(=#>+DiKp+ffO>oMQPC zhOysk1F$BI1by<<(6iIi<}xBUc_JWy2|0VGnl|h6YLE|oz?XAgz5`Uy(@g&EGoQnr zsk_n=j*nQ+`h#={v1@pa__k82;?>o(EUyAq(XBrN$qNHFW!2N)wvHSQJAsb#mD#^zG> zP0|TkinRJ{tDiHyy}7yBxEfek6B;H|MV*~xdhiNPj@tTH9LInBSN{yJcZ0s^xjWB=gwh z&sf3!^Tpi#atA9N<*FyCa5hXwl!-A#sglwJ0^g9<4w0@?mKN`iceBOtDwgW$*LYsffaf}4pEMb&Ar&<}7`rvlil8A!ei zEx|+zr?4-CF+xt!6VY=ju^(Wps8-A?cznjsV%RngHRxG?uECB)MMCb_$R1e~;8ohh z0T*fa0HP|ipt3lJ~X;)39> zmG%E|-JjwQ&I9={Yjcpz{o*Gg|m&*Xy1 z{?&3kR%8Hy2cy7Cl4Lj$O;)^wUw#Hz+Pe8aI2ap)Ryvy;}V0AW&+bjVH>|zgek}$$liY zGk~mSiT|tcZ2amc_AGu#{DG1$C1pBVfLYA5b@5rT$nI0eX~P$_XM92HVW;cnk@N&C zQEfq}!r2&f&fo+lrKIxcvagXR@gi}@msWh&!K{vVrt9I4c}5Iy9yz2+Z7KfN)J?d( z1biNKUvx;7(+ARP@?P^ZqnQ|q_>oC&(<`_;&Cka&g|*+2N&F)=fsNx; z(ap|lE0hm!Ec)PpuRRh#LyL8%brr}?E{7fcQLe~XEFfZk>-wUr%cgYlZ*WQ&GrUay z6Pr4smZCkM6Bm?K7N;h`J(6ZzNG_Iy?Qu$OA)5ajr%!Qe^Sn$p$j z2RGftv>P+w=!Jz`$gz()U*i!+o&?KY_S|m7MxYfGu<{c=+h+JVWw99~s1<1bzAP4> zy<}dwKU4yqah>!zc|D1TQrM6^;*H%q;kun6o>aw{B!P(wO1GB`-(f5p8Yo~L+hPDT4r3>BA|YCB|g8M1h;| zE0*uJPZwX>Qpddf>Y3zH6n!$)MDlYB_%#*{O~e#XM&Nh=-m$vY>>b!(0fTE&_CkBPU+CAm5YbV z-e{tSgZtS3rK?lcoF)q29i8!aUhmc%q$OoN6Xv%1dr&^rkRkM_mj<3mCQmSfwH7@i zWjBx2@l z)?P@4p58IG^sBr=NUks%XCqE6AD4(Mj+2X)P~KdA)XdirZC1YikyMej27zqzS5E^o z8aq%(Ig-n#!VV{upI~@;n5n4}HEcSxzV(z&rUo4SJLCaat2On~0lsQfjz0{^Z3!(q z7+bTlIpNYPQQ!n}ki0p`EGK)EX$H+!r*_AV#Jj*lg|`b2=LSoAXOyMND61mr-@0j0 zFIZT~bOCdW9yICWHcC!b%G}b@(r67f)<>PL3md-@p^LHQh?E#<)Tsd^K04&WviB3H5BK)f$P^s=g2P(N#1F!z~O0RFnJJ@jCb^%a|||s zCAA&N{OhXsdZy;J*LDG5dbv=QTZ%;0Zqg;II3$W_?_~i|>df+Lk?+3(yisQK6uqWseb zZ`NmUGU%DU{zSL4qGPh#2Tj7LHOE52ssaaqX>NELBNIN&0$ z$0c#2Gm>(@ajJ#~NA^o?(L1c2b3RWKsIw z2K#`s;A+{FCLlC}`-+E9Pb0FIwL~pnkP8Q5OaV=k7#aB%Y%TC)B}7b@08f5Iqlgc& zgnNANAA0g|;3&>t0 ztuR``2a!A(4UQU{xOU4O;+>Z3lWEm0GHnnqQH1 z-%qo57Z|DZk3RJSCRmIU0b5uH#L)H9CaMb6b}wNkN8hlpWlV6IvveBOt*mrdv6E9< zS8zn!*xAVII*T^%E#R-~ov*Kv(%zFkL|rIv%w{R2M6Kc}h>f@CWi)^6E8$d92KimVB zD2&bL#1f&o*m@Duy|;8A;-AUlUQLq@TNcMxZse7h={-F|!gE^fW1DuN0dL-XsWR`R zVz160b7YU@Y{4BCQYRZzABABsA=D>JIEBz!%GxtteFEQi;OxJbg4)gXyMH$J|rz0p@q^ELv`O+Too zRy;taOZj+*{}PM%?pB%ZSIa%?RjDp)+PT{2#OtQjmu$XE7ID%IOrFo|=hgr2Z1NsB zrJBpKn%6F6j>iv`PjVaHgr6VdumiXSfAMn7uisDZrqpxGD^fo63WOa00q~E9Np2^9 zji&z|N4Hny3BRNK0PGQ5TudDja{V>V^@o(o2caO-{G>mbnQx$GDJrjtp-I{Z<7l5C zF8FChrca$K`bAGUn&a`sASbl8kJzE?Ikth((bvxJblj~$`6r9@mWRGlPKhG?iR0Cf z1yhKwa_Qs>D}XSzG$T{=6=P%7*_I}VxGHoLi|inQiEWeUq zlL{{T#a)8r?hL)-1D541YUP()kwMAaRQ{N+Z?9%_DyG_g1OQ=9=SV;PX69Sr@SV;t zm%WllI9Q+;{n+0~lOA)Be~s+v!SvmfPHt}`qN=O6Y3QEDHlJqSI z*LO9-&XW1xRn53+JFfL*$KLhmn`IHl8{`V9GiW(I8s@0**a}IlP=1;b;O1dWt0dCG z8imtrW$W$#a8#iyt6f0>a#2oJ(JLif4`k#`gBYy|!ZUn&vAzYhzVp~J2o|;vmwD%D z0EKR%va_l*97e%wI4m5`txbn;H>8=ywQ}$m25tY=j##ih`qb~7q)>y4Wxkm&)eA+4 z9%0;`qzMe^1&|j?Uy4IqbUe%*}IbYCz3Ai2$)C#6=_3 zsf9E%40<&K+%@7kV>;HwaJVkfh|mj^X~qiDlj5mbGLSz`aIh1*WPyemt5go`O!=YTM|KpoOY4*-)t`XwW zTBAb2o&p6p+uFjmMu3}40BdwDF-tz;`or>soQEusPG}3VHQcewpd1H?lo>xi7Q=F{ zOdkh!1=CnC@mjHDqp2&82UXgal@`qe9E9_06qldz+d`=L^*>ffH$x5v9c(&uDmLmWR+L2R6m zrEC*RejyiDC&4{6@W+Q-rF8uG_!#n|7C$_0v4w-x@lRO{&r0u<5S8Ma7>HYh zmYbzL9%&QLBM{}|(t=6TEM@p+K8&S5@J%VQWs*n+8@KG=e>dEp@lWH{b)k#n^|Y`u zGb3Dk*}U>;orM*gmfoh}*)P(9BThRy3ZpZ7$zN}jG!cGc0SW{|^$H=y{%eU;k<4%S zg%!Ua`_!8x&Ly&Xb~1W;CaaXL*tE{=9dWv}F3Kh40@H)7-)m}U#BjL_S1v0-#HIlN zEUbVwwYFApnz{0)CQ`7lO`KLqpEa$qsq@X%t8=a;J0Sc*bH!+r@i(tD+&1jo5X*;N z&0ddLX8HG>5l)ffK&Nq_E$qtc4%z*8nXlvbfpb?;c@^!`Vdc)>2$bI%_lXe+UZj1; z9D#UfPIvbw_qU%fj{iN^R0imp@S6W*ZZFL*A{+7GZ|idZGH8*In31XIPw&@BsF+qQ zjq=et=+zUN&{|ePEy?|60IWqY_#L-zqN)WTfXrqj&E=E9a&_q&~+ z&)XP*@CWK-oQhPquU<6cTfEBpCHU@U4q6JrmQ!pSGpyJSz6PzE{@* zPcqxPnsXh@7V3Z1Hopqn+MQXE$~GAOB#w5jHd*9e^Yk2UG?^Tl|4UA#7)R6+{T$D2 z|Juh*hl_3@bLJY5FcL%GhJ;AZ#f(7M;Ip*CPvDf^n#MY;;+cX?lB9pG-+DPY#m?WJ zcv*Oeyh5Aq&mekiKBkDo zhwt`Eod7yVgL4p2?cJz20|LKMSIzk87wi%kcvaJVNu(He5StFSR4Vo7sMO&F$5Bq|#)?UE?lzKoZ;6G!!S;Vqv|J4;zQl9F|rkxWgyU zF+~$^qe%_RxoZDYlo+G_4^QOz^n~AAsYUnGI?Ik0zY`O)tdk)n4Vu>8AYo1_Nr~+6 zKl$t|#w}4&mhD}~joT?I&-A3!)0E?%Uq->8#H!q1-;Bi(Du@DKqU0nO1|kOa9Xdhi zsH6wi8W_NwYNT|6Nb_i$!?Il#}j zS#f^p&^n`UIiB{#F~?vyri-g$<-Z=&V#-<5WD%F?T^^Pwa${qc{Ty-h*WUGXavW79 zSDqthjjgyBO3zB(F1(zmYMG@a0a|4~At_PpQPgg1#h&@XpuwowTtWjA+$w7J-p?f& z8nNgM!A!`@XS^Wl!S_G3s$jTq9?jhXK228WKZ2QjQac?@mZ-^U#r?{$0z0@oy0$sE zB3P~&SW-Uu_Q|&}EayK(=2Os$Brd4FJPN##bAe&iOBgphybIKbe=|+9mBSt-p_vbx zrPbiX`T;K?L$=J49{!Nj;G8kuK&d(cW;#8fg6q-N&!%Ard!+yxY?Jh_*Hr=h2oE%B z3VpIBN_k|F{ZwAa%fp3j2I5UuIQZi~#VDxBr=&-qeq5~gcT~Z}#4U5?WA;}IGO&Pv zUnEZyrj^K45_+AVU<40ko=ipEPacSWF-zo=UJH z0qY!^c`i=PaxO?WB0|vdNsNNUM@f*{_$Q~)6Gv`R;fLGco5bj;p39*TU8~T4J|Sy^ ziUe|k`7mA}Lh)Gut;XBLGo^Ykfa>6$tG8_T<=l?U5ONiJ-O`e)9UE}76B;I4MB>L6 z^+ksU`q)EqYr{Ub+=YUy2M7Jx)R;!3FxhFxU985Ui-Vas6I?=x>s=Z?F5p)D7+55v zQYsSg7i6S;|Fw|M`326|>6b*fu3`Qp7hLkg4SUP82i7ckcDeO)Xo4vQ(E{0)e7mXVvF(8`gYz3<7#UKn$SEA?LDG8{Q0E5};q1szz49Pc;94i&#IjO^^aaL zx1+zj+m?0Nk59Da{_U(Vzq@aGG^fRPQ%7U=i#>a2i{*J;M=wY}m6x@pHGIgX>{75@bt>ZGyVbOFKM&B!LnPiQYcFHBXv#WGLA9~)nR zp7}^8rkyO7?vHG(*#uFsJHN9BcIc~z7H~ik)%rrQIGYGg+fI%I3Jl?EsG-q@^poTX(h4{&{U&F+J$McnMY1aX&BC>ID|3Lc% zP%dlBG14e4lZ*BaNkz=KE-4{Go`>DQN{50*~_pXQ6|On>jfL1@P^^`606hEx$b^WS{m4AxlR^ z0Ug!iy-JPiyq8;&rZwKA*2kyVAuz4W{{H*MLIs0+7cJ4=Zl^M6_Yi zZf-J+G5X{leE!r>xlj?^snasO-=0)RI}oISRXlthM_OYi|Iy=2J~Qfvjqan?EV1A? zXQbvyW0nX48S%iW$b{2m%S_;-(Vt2ea(UV};Jf~V7gwf&et{>e*@uz9a8_#VKao_h z%ZgdDXeC(Q9=#>5q~j zE()I#`*8AK2;%pv+2 zA(~-Y7WaM@LRLhwQ6EQHzM+o+xft_dzawpu=n65yCbJ_G%P9HQ8PB2y>3fbOsd zldla;4KkgkS~F#|dan?|nS$FKuVP-j0*SxW)f^#Usrxtl`9z0qEc+Q~C1G?c`b7pf zO&6e^#15xbNBx56>XRyRa1yWv*%}#_P%1JTY{@UC8I)O>yH#ap7{HSnhgrqZ%=#Gy zJy5ua{xq4n3vbl-O4g`tGaBX8fhCA0=TYXL{@3MPddwwuz=hF45V7LxNGjM%*T4&5{vx{kvLY7&APdF|0!f?}wkt^Qceef9=AkOz76?3?RL3rQxV$oX|3^q1?PBgRsR(-i}A0?f<1(4+DUHQB`_h<1A86Fz0 z?K|cdZw?|!!qzT|?uJr;ynW5$B&55=cmF3`le-qx+mUS0*a57lhQGh{fBf`^wF|(TW$vXcZuM@avtuso>Uij0 zHV_qZH#VoET5;mf#K6FCcYql@tzQYdELc|NyYsvXKD>`Lsd)ErcM<=iZRBk?suhX! zyWFnJ5VEXx1W1~rpA@N&F{ll8 zdFe9KO#I>cE)qX>7Sbe_8fc~JbG8Y;%XjT=>)pkJ4FXCERwUOM zLm2P``~6eGZh_@ph(xG#fli*&UknO(zXr(`sopdNAsCT;L-r-a>{0Y~z|q_MQ%hqk z{(O^#K`d{~d}u&KJH?j*CtN!U<{AAAoGYsLPoN(uB_`z_PQZ^6MmoH{nI&|<9zPtJzme}^YL6quFJ*YJDBj? zg%bLxl1Q(TJ|@7L)jUwOUY=En(<>y0vWCNWl_KKQYtOLH%w?X?)=^`3kC~JEhTlIoG|vHkXj1-u$fiH7c8pDilpQ#>rX zQqzWdtTa7RiaRNvk3(fh_Op2{E+lAU8HdUmZ`)) zHW^$Of3Q%>m+2+OmIT8DrVa%^d z)aari{mn47BWp?Bu5x9_aFaulVJJTr*0SbBYm;Ho0%f1^PYC%@Q@thrM8FXBv3k%fAJ;;4 zWcF20gbZaMLJp1g@A62auFfCfWl;W=G`;1j46!pL0(x?lus|HF{OyStg$C%`j8lC| zqKx^YYvQ+H9;+T6uqUs4Ig>{~J<~fhG>`JQ!qP)MR+Zl1WEWdZ6v1he&3&w>?;epI zW)7khk>JlSY(DDbNRHerkY{c=p`Ksp;Qj~zXQx?Gl1s@h_pv~U)+a(Mi$nX1?lbEP zv*6CV^F@nh?b&Z_q^s@Hx!2p3^DvmvH6;J(RqVXQTO?J!81vnUt37mGoz%1}OZ(JB z_Tv0}`|ZFtaM|{z#*WH#o}K%H+^^}copQQ3vyaHaLN|n;1oMm6mgZJwXW@i#pKy0J zKpn}kWbGO4ZX9m$-M1zxtvG9!n4CVUO>b z>j^BTIqYCgk+@Ys^y%QurpXDP4Wrwi>eo8;(o>8Pi9C(sP@h1;=%+#)eVZo0 za`acc`Audc55Y`2*7Els9~nK`@f?Ck427%M`B7-v(KP%s;6b)ZQD1Qp+PN6!v;X(_ z_&A6fJl%&KF86~orG5R|ozCm8b&tN?o*IXp+K@7Q!q2VRdDIGhvNiI zEG^w?+Gbv@Fqb>a0G2gVPT3zPjc&GLYQR5nJFy&9^3zEfyzg++;w(Y6XPmS~J;%x0<1F4?3YA%qKb_94kv9T5(F;_2e5BzrGRzxMK@FCv89~T3 znI|jutYyw*ghJ>QX))r#dH^W($)zt{olq;)o7Cc}eYQY9`0xo_#MeA&Ea}2ASa)~v z+FCrf_I!8UF4-0zaKe-~*tE}l!qSLkj+2`7<%mj%kH!Ds*6>Lau6vP6~qM)l{Mu9^A?m4oTDB0d&Ou?sA2V4*k^qC&)v!r*EC z@Wk&iYsBUi0~>FYA$s@d2=UNKU+L{CGZUJm!jQ4sV)4mp&>Paaj5HZlTJlWs<$txUb7J~P*#4xZw}B~n#@J5Mh>K@i7! zzzk_;i2GRumbqz>KyNIR;ZNe8tLChxcyh~0FFt!r>ttZxrMc0rsH>pVC+LdibLzTR zTIG-RMj&hn?dkOQ4zuaRZuK6lb2OU;+j?FctoGlNEz4vc;YVT31?=tHPPoM~ z15{$B&4P&sU3F~<5G?!O+azWgRUX?u=+b{ChQ$L4`9hl`IUaE&N#dgJ42uIwuSts? z^i5+iWY9KIYI&KRb}9S~-mip=wmpDjn;5RNc9>a608m$X?a*sA##a;3epEsf=sT)s>$IKi$h>ePPBfsnj4~L} zhS+&OubIjg;;OxGCR2Faw9N^^kYIhaFJ6G@uTagwdIO7IPhnPc^xN*TN z?oSs#Dj`?EFwhLih`;Q@@&3n&hbjpyKikmoLv2}RaeNcDI9;ENdMyx>R9l_!mhe}R zto3ymz3%PI*S`d}s~DYrLM_U9SR6fvWi!|v(h(H(Hwf7O47rQ%7f$KloD?VB%G zQFVk?11#0TTIXSMaA|32(=)5{9}GW4pZa6{Hz@u5{TIQxw?1}%s&Uo|GQpQya1gYsW&G@94mt(qn&39!S8bnG++GdHC8xl z8GYA==s8@?u)1^&8<_^5<493F3#Ij!%4&~}vp)x=?jhbDSEtBKkg{zx6FM}-fyYkI zGxFx|T1Yj{#_um~-pzl6s^9OE#c5@vgMW+gPzf>!?SSFcNLt0qR$Vvvm^fxF;;@?i z!N~BU_T{k4GtH_JiS|TuhS>oR=~ujWQM5Z})s3^MPgu-ZG@h1=GYIpU#$w3B6uE3#)ePJQ_O#rKuJ9qk$*vx)8Rzm)yFP ze+?90rcS(%9eWAoO7jDIa2|p~s6QbltO_Q`V^F#wuP2GBzO(y8S8-=tC)+;JzSX3> zl#1Nr9hX7y&K9D7!6wa^$9gR!P4BtK`@r=)v+E!RPP_-J%U1s_rz2ZtS3DgBrStMY#0~_GV#uAV98Aw|~bk znCxC}jrIb#$xw6IFs3j>oB8F*r^?m&rVrTFXG=|+C-=#7uSi8`HH zU$?(^Ff}!`Q*a*yv3Z|A=Q?nVaG(U1(r`Mh6mOx?vP*|Po-TJX1`xxIXQBS*e@jyp zWZ(mDyO(!^V?kN3g^c>ZpR=E_J2ylBwEMz8weT)S@bFPyY8=l+xfb(X{(R&Sd4{w` zx{#;q?CaT`EU?G*6W=%Rc^^fs-l1YkGR`IcnbiC#{#8v=8?D>us9)?iXQ5YnuY~F_ zpQIH+J@(G=>ds3` zbq-0dd_)|~Xw!mlnDGf*;Ps=zq^AB7{w<3>Pu3|G&V=^QI{8nxhK+~ydCS&MUrGO4 zzM}|YX(hNKdl)3a)LMy}1bFPZj^WbLt{8E6o{x{*fvjX!^|OFcN+};a2rV3nzzNzV z#eTr%IX!FfLKjP9J(Ec~S7Fk0Rw9zAi+MOs`Xlp9Jkp^-Px2em8(ue9?_(+>Cw*OZ zwANR^VxLk{ES=K$0g0PxWqr`rM2{T9#7;8T)F%Ywt%W%J@_fL!zTj#c z$ce5NT*k+*Y~n=965%Vo*1SZ3Esr6VU`!oc*BJp%`r0)bG-BsiLG4Q2#2MUuXe-ZQAg$u2|uB+n?ojFZ9 zbUE zO8_P>N2DV@(0o=sUKpqnr{+GUw9d3DXXD&rVi{9+9V1#>y?#@1?Q_UER8LFAocy)` zada`a(g0dV3ta06l$KviWBJGuN^*qzb{s;Ewlrx~-_-kpCZBI-A(BN!r%xJY{(5*} zog`!gnJHu8MxLxupON0(q@0Iw{hq9c{3*7R(giv-&SQILhH}PL-Vxv)Vdia8YKpO$yC>&tA29LFPIosCWG{CCqi<_RK%^b_RO>5MdDt{UOHcv)b?I zxc>L?Mb_H>+WBg#qMKEqy%=MU6_`m+T%dz%fd_f`pK0f_0n)e<*;EFFCBab3xnqm8fnnvKDOXANBR7Vj8)ngJx<%u+_7H0qgT7 zVs(t0qT5fqaDC#?+Cy%uW=CtHOIpr5Lt@@?;UL;VLpCbx?0lj-7!V_M1q;vEH3ViR zW(Nj*PD>Zt7;bKcGD>G&(Qnuo8b%NB@v+O@jLlC>Oa#SY&GZIRcj?%3+6U~$SNNmP z7c%xlA3SIY2giiTqkZh1A^&90I#0kPW8zLS2ML>e*uUi{sxx+WdettU@Hli8auhdF zqB+rczh?qhy3GCH?&wu1w}y<=Y8f^cW^~FBV3yv=7A%vtvP_k#2(7FMfqL%!4SX$0lqCUpe~EcA>QbkHZ4y`ma~M{VoQ(1j!;E`d*D0tEjYuShdaPW;|I4JKGEzRN#6G*`~QG+$50>5YeyS z#zuZ?3TVYy*ZOG6cJssfqy<2Zp!U(GPtd|<;k!+j=D{r*f^q{LCNwn}wu)G`kXnKTVfhyx7#P?86aBA!O8HBmVK+N3imJs(IQyzjgyV+lGZoA%El=-uF?Rj9r=reL z;B8WahjJoi|Fpb=5Iu;v^XQ zK6EY$uQ8nzP{PNWQ7yo6;QE`D&Bf_UNlm(rk#W)fu1d^gQ>f>>bHR2Ohoq}v;-qn) zekZuqNKOb*p(kdt<)Fpq;9AJ}K@?3!MxI)D^>!-T3+zW6rU6YmeL044(XI7Js@N_v zcORNS34JUx0>{*_YxiB_CwGEej~wPY9+3g|p9j%@uQUdEi{ z|CXdG^CcznC<&A$46~uYV--?CDu9@d#xA<_$<}>O5vfdXOM>U?Ei|k|K~_QgY_;AZ z+^p2pwte_GEgaGsW9BNMN=qoD`3vsQK5`uip{e;Vc#-|&!h2zR%DfK!B4iP~?_;95 z*!XEwNT1lFy+lef%_QQMxYN+ZOx~% zu2NsWgtb1MSjxi-7es+u$3PcVoka_$zhx7kWvH)f)GU!PV7KoPbUe_tzX@(2i>;ql z+eQ8Q1xW!+TO}sfHcaNy(GW6%)>EJ|L;C_tYL?(7*~f zk(XsslM0}a-e_OD9Xs*(ok_UyDYoh)^WmznY9t%UW8B;Yqd*9+Says1_+z;tvj%RB z6)yH!qgAi36Pv8~H#0&TtbCNPN`t>W*EL?ph5 zzE7+jew6ZYYYo7?M~Nfs+GNip`YcU9UR0H6f~;nGw?o)V@%3)YQy zExtWYY}MOX*lQ2N2HVtNZnuqZ-pIbIGPb@ilWP0hNrG*!lcf9E+mkl_AC-j+5~fvZGN``0!a2T}Iai4}epUX4hkq2ZJ!O;Smrd$hur3nb%2<)aCS2XZ{Z z%{|7eVr+$WvM0$)YF<7*J_nZtQi~xIb1W~~gH9eC4*h?*%nS}#?Yo;*5B6z`^bE_q zDg7@_`u)T$%IzRDj2gDqZ^`X`nIe7PFv z>WVP0rc#iW*~%GfTkF4&(>TWT6n%<1H+{JF4*eKqM@g#Vd`?UNhBsf^s5-qSdlu?aj;getfxa%_cYanV> z{57rOi?h2$NJ-FC(C0r2CM#xK4fuK`?%z`}Q~uqM#N~mRHJ#k?>kK(^1wZ+9R_5Ty zsL2fc?;KbF63c&lO+7_ggXl`)aM}&>ua$IxhK8E~NPDLL#7XdFE#*T0{nt=7y)6$9 zY;J5c2=R8`>kM;OSEd~?c7FxF#R})U*I+G#wlHpvy=LasmoL_zVP6p;7l-R=&6(T- z>rGikHNI!ZGkezyd%218Upp_?N6ZSm3vW)TczbsblgZ=$E5yBK(_R(se#IJR>#_Od zDAHlE{geZ)y_H)~UHdpjlbl0icIuD%@n+^-dQhcQBg~7KlsE;igM#^3d zAPBx;jG#-1bWs6i3SUONXb6O`hav`&$&DfD%JKaPjLBqeU~mR! zFg9oC3_20rINqKAHhLA6{UzS>4~5Hn7PKkq}2pXJ&j(3l^IUou_Q1}!wCMiO7)0$ z4D}y;%#=4tZ6*VMdA~+wHVQML7Pw@5p6RK@tODO3gINf^bVwN68ZqjyCs+h|HcV4d zP!X;!!9OT)QkguKZstlr&F?yvr9bHXJMjZHaGI6|AhE^Fj>A=WAi7Kh9-39ZXrPfX zbPIt$8pTmWnv7p5`S0nhXX=Zp+e>g+m@{~Qx$Hg~6%S5a>%W|^`j(c!*SPY5e+VI! zxW`Jd2^`BmJk0a-ooH1U9os&W_I#0uwhvctr?&VrsAru?gBIL-0PP|8Oq^MvLZVBt zOjuA74V2oO%5d`Ak_iGF{NE=a?Grj&d~qUarl!0!;4DjFDZ-0sb=Yi|)dd1TU^SUu zg<^dJfz=PVFlP35Ilb};fJ4TyhX!mXVlldG>~|x;45_t`mzTb4bP`Pe=w$snI1{R| z`}fi=>D_Cko-b~9kB%=&UX_R~0mZTVf2^hU(LNGv3Y)mtO;gxrBHED~b++;S$i5JqS6+n%<@0WbNw=@KiLtR=yf=VK+}=1b)Gkzfe?iEGA7!Jo z4Kk~nENOcgItDER*RzT~=h)BeDcB!N$EY*EDEunIHiq|%isZbaYd!9#$n|lJzUGUf z2cn{#o@P#H7I0Wp+^(PaR%6%%v58;o>pL;$?C$1<9cQ-vYgj)A-kvZ9?NnEZ$Xug*%2{-TeZ9VB5;=q-#ayw{ZfwE|zv$8+@T zc%RzEv6Jtc_RsC1rwb+Cj`Qe*{JIYBs}U)8&;ObmPDjG7gup3Z$wRAT;#h*;k!2TC z1!4PCH_D|G*SdNBRG+{Wl{ZsS4NL8%Npt^Pi|&Lk-`Kgk?*)TX#w@FgtbtYAV3~o3 z_xn9x4!r#Qhe`lt02-CHH~yd)}Zn5gD&(5itWwez|yg2Ax^_&+KfRIqjr(lIcavexwCV-S+@A9U+BG? zhJdkaG6ZF_zcczBJYh$}|YByB%h)kaZZ zJ@hGe6@mjjJc|1}ehz&CP5x*Eh;0u(!c2`h`RpH*E_?iGo{afGlR2(E$nQck5I#=J zcrr;%1<(>g*#H5aL@(-oPY@g3q()fLa9|xE&PwSCp1el`HmO1gk%SoOM$iPj zUM9s#X*KRrmW5}Rl-ouY2~;&;nU9r}qr8S;Dh1w3UK|GJzB|gN;g&N=QhO({?5)BT;OE_j=b$=Yo?IJ&`2wgh^eLmyEFT_)xPSg7N%43~Wv{(FRdI*acnTP(d;upM&W%5cN9HUh$~{U6}k2D0%6V*FJAdnOyu9r?J7uL z$E(Ib_E_-%1TI3K?9Z)FPLzFqdt}-pc(v5YhH#boBgu5ImudANvsDJp3KT>#GnH=zCcj+Sz%# zlM3oBxW9!R0M*TTfB&7c!K&1h?d_d#{aN&rA&oOp$P>LDpVA6RikTs=}ImAdTe6cM`C-V?B5!U@7}XA^JXYn*Ojhnn~{8pU2fg zeDgBx|M`V{>n>906nZ?JISt#iT#h)qnHc(ZTY6E@InY52pk3$yaX3-9iPWf+LuvH~ z?PM6c@a}wzM};>E<8rXnE>U3SXGfb))Z|yY3YdmJ%BrreMm!~jk_p@N2#BJR@U-n! z)|qUj>RTZzpKQRD{El(T-rdlv-qMkgk*TNP2&o_$n>Kq#5rla~#VT#Z{mD`8QL|^X z9Y18g=h<|>{~+w({jKrpLBQrwK3M8EyEX1ola=-;T#+asOtPF30OAM$Y1`+3bkvP$ z;AwxwC|G5<=8I5ZS+WTF8VxDK6=!huhna ziBo&7MOLpTV7Qkx#2H7&$2i-3kG6E%J7DZ6=OFsk^&#^eKe?dGf-25nN+wNsC|G{AQBk!s?@jjUnbESiS2OR>R z&t{Fkgk5fl8eZO9U$LK2fnto;kEpb)va$L9B4es3cxmz7ljw|0*)(*Y2@JveK`*W= zj$gkneo22{Nuamvxn`HHm-HjfJa6R@QWcVV8VH=Ml&m;O&&-rNTHnX;T`VH}j#vWB z2)2vY*ua_g#SDTF+RD<8=P-MZkwugh;)g>AyL^9u zrOh!j8xHFe1Dj)%a{P=k5@Mdtf>af9wP(^bOsYIB2Zi=*)HY?Oia}!?OwsuVCkGfX zb==0!NzW*O2`+ifkE*Hl=Rol1Jo`iLNJ4xwnY*>4YK-7I3~FO<-G@-OS%?0(&G)#;9|@g0V!JnkvJZ)d*#J54&; z4_poZ+{h)e>=0B^FkbxGzXEl=GX2~;QcP}At|z9pb_S79V>=-;x{=qdLBZ0jWUGlK zOB~UVKGiE--oTFi82I^dz-{+xFngz*w6q7G%&7OR$hFk%$}K!URR9$`_sq^r;arX|L%Fme$<<7;?d(Br)dskE)?O(opnK5x`FAUker~!uzsdHz> z#>P0UnxeLV<=qt70!%6TA_has8v4)USX!8OM5nYI3;J=sX@HZlioi}4mPgF>dgj8! zsxVTmjB@9ezZ#+{L(IQ-hFBPkzs;9QNk(z}(#=CR1igMos~Yi+(5D9dz;(x|P->;G zvA`M4bsvp(llm%RXTW+tba*;4R#Z`esiPL%78OK0_F^-%GmOYvgA{Y|45 z=;@s@uB&_Sf*~SN&%E+8a{w_oseUOSu`2}>g0d<5Ru7uH(@Xfv9oOYgRtq!1T?b9e z@Y{7hD*%M=&7SOpb}Alio$mY|Vpkx4R)~3u#s;jvS3GM0hgGM3uh4Hgf;|EP&XZI3 zMClXz99DZ18JcqWqAA{%gQu_0{0SmzLUncX7Z)swsc{DTLG2$qT>cL@*?QO)iu=8I zH^xNbKJM=p+I6&0{&8HE{`?mpHcVU=%z#kzHZ&aA9Ts$L4#hs01qmJYB~2-%#GID; z5Y@4#713#835{6H{W#6op}WfBEL}arwnJ64hiRJ#utV~PvP8Fh(*$M_G9Gb9C`ctW>s2V^e9Hs^k*qoz@Y56 z%kvBAnN0sLBD-KIVQxIp>)!d15npJJa;ww`S&@B{+3U9-G241rHHl;ZXv@&!CxjqH z>8}T7HaDv@C#tKT#m1R%mCU%gee?xMbC zrcbYz@wz8 z{3oOBb9A>Df+#vo#?#O=%cinPC!AV$^5T2M6w=%NK1Wt@7c-7z-G~vS8~3}wBOtWYvVo_0}cx*&aC~@bIR%ar|J8W93SO}kXdqH?EYW1e+Ufab= z?~UgzTh+AvgEFXM$XcOX;I6{7qt;2kqC&|2l7^>iM{6sy%GpGJ(N@@jwE|x`wC~HL zzpnfD@yjfSjd?ToWaF2CO@Sc6D*K(>=yqVYnXB0&wC_R$yof=En;xie3YwAE}0eFyBdmH`%86JH(*xrE>lZ*7JMs3 zK~(DDSRyaV$7yt9)2@5hu7aEh(deqdXx|VxSi?l&zuEkyb+hOxkwi%rTYOa6-}`KA zLG^L>@QlrMwhw0{+`er&gJ*5AG%eX_7h`6}e)j3ujdu6246F+%Cy27mJ?^=t((Slq z3ENNguUH+cEHwv39RG#eg@P@evmY^Xqc*yW?ZN)dVIdy3@tb-mS~M%?4f{>sZ6K(I zWA|yGYfZ)qi*8^2*%jU#{}~G}+iPp!wsY74w3WlbsvrUT2EB^GY>=~IfW`Z+)`dK3 z&xuZwmo#vFUvpq5p37Zhm%o_MF_|LTe$Y&-BFy{=teH9-3<3o^$>1h4^l56wuF(!3 z;C|}p$ieoVz=;e(t*hu~~ulddK@uY|p_<27)J2W(u?9c;$!sdkTye?1#4S|OT z-sHf@h+$t&Wqm_KgCZQxYJ4Hi6(fi|)A;&sH?yb)^F%qik#l5tkjajPYJ`cGyog{g0{~lH@EL81c7_IJE^X;jCc11=M z!#5gy+w!adfJ56GgL)`JY0HfDvi*h!GZZi(p=V=NV4)yxt3yjZAc^D`W0l6ao@@*b zYWh2Zau8WpG=vExh}BJ*PbrNA&1`K|>-=XeCPp_g+doc?LQzSV64ga={Iuw0ycZW( z$gZt(c^#@SG+1Bzn_229**Hj@YYGSqgu4*hFcRy5tnSO(Undo?|CeQOe8IAf!u>m_ zrOFu&HbO4MkJgc) zqTR(l2*0N`wVdk9=ZLI*ty`0B4=g1$V7-!|J%&`@Pv(D95r+D2<8F_$76YzD6R~HV zx7REc-3Up(lS9yXTM^m?cQ>|sg|Z_j;o<_07}B73-ABv+5GqwjN2|Yz@G~=RvCtjIpNwFh|6dxyt@z-D&A^%L>W!|)Owf1 zTt@XW;=gF6@t-&g4bki(SI1z*tq732d?hY zs_0B9BT{OnXb1$nvw}B6oq(m{*LH+Q%YPAj$(XMVjspZqf=KGh_>7>a_*PqY?(Cr<{WPE_zxN}eM zum6jI;J$*V?yZB<1o|$U5+!57w}(%hIqa ztu3%%;COr`BzT2@SR~)9)4{A{TWdj<|IO?-Fi|{9_ow}QWlPkO09gOCT~{|#am(FU zOkF&7FKjDusUzSt|0WE^oGdfIX(XXKnDvwLx|Drhwhf+XFTQ}@xD!01=Nsg~5k2rl zX4L&VoNrm^cAH=Jh8l)m|Er8qABgqIh`AfOHu z5Yo9pI==REM5ewbPwCwxZw6%M_%KLP^zvG``Y+Wq`yKtzZhdG-8=6@3RV}P<#qt^> zyC-+P@H?&<-aI~>cg%4Y>XVM3evNGBkCy&O@8~D;1?&@XH7<<*4cb(PrW)tnMr&Ta z**O%w(Y)Fi$vXOiw6WStct#;=@@1Dj?09pM4cFkw+lu>$%;>Lg z9QuzM92|viV?q?H7V;$|$z;8H8;A2_rxkrdV83q%bR{W+jsKfa;eNANM_J9SQ7CE! z9$%@ZJW56CpOwiON)_p!O);{Yx#s^0gWeu-I9i_R zHmWf!%mja!nYSA@tSG^N%b#>Hb463E)dX-yz`|MKR!uAjyXRKfCqlBKcBUJh6VFs; zGCMC(#^C8U)EDGC(rMI8jmwkLD9JGmMtDQ??*Mc;0&Dbgc=FN9$=58R_PDK9g`1^` z-j@dh|11k}$@XCGnrReFfDUnahlGU4m~v}G##dGks$$@LoRiLEB-p4`duN4iZ?Tg2uV0pSQSU8VFCD_f7#jm{AO-qe zukg_6t|GW&eMnsC8i-n}grbsEfpASiSG4yo3;Rt@DudbfL64@u^VOD^ZxR7Uot~(>2#1bI0kEFd$W#)Jb{G-Shl^XAsvw}Y?!xHt+WeD$C}Z>KO`PxZb+cr_tL%%dfv#1xCm$cJBMi8C#a3!hp>{oVI733m z2m9QdkJ)*|Vs)Ge@i8rg)c1p+T(-i50K_6%eVh~nS1wRD9b{}_D16t=!79EX2PLbD z_=$zlanpikv%ZqMIxxGRC1fHd5}AT`8Ij%`QR;}}td~wiXA6nZS4+N`D(bf00`_HZ zeUZFedBO%?c%Rd|s;ZN`g&BE)*-bgCX0rh5Ye)JxwkXXk*jp}AnlZJ>Kgd=6w~$Qo zc&s4_l(6d&5V8U90;JC&Egd8ji-Iz%|p6Can8k&Yuo zMyhQT7=ABJjZpObw2x0sIu=+SO^z4dABRZs#=OI`5kqCxZ}9WD&tf)r2CgnADvClc ze>{IL9ChI_8f+Eyu-xrP_^mn_!T5ofMYuNfq3T@=QYg>{iR64n_=E=t=;BJ{3>bk2 zMlMBh?7gAMHS$_Pr$xRa!sqh2|MM}ThZaKpf`Y&K5SqhkN(c;~nk1WtWs@k%Fr30n z^@4VcdO=*#)(OUjMEuPg*7A=Hm>G{SYJDb;ha~hy8u?IdvkGAAdB^2oAn0a992%r^ zYk??#(J2tcx_S72tvlCydUvst3P!Cm7Z~6!$L8Lc2%7)Rr~Z%iYB;v2n#8O139@J{ z;Wuy=R()5-79I}~P%AXudrl`gZFUnf@wd!lmk0=d8{PGMe_UbML3C8DnD&(gF>2|C zF(q~R`EzaFhn^*wf@e65BHK`Wvl?_3ig*J`Ofkm=aB088PmDY7SN1`0a{~bKYG|}; zqV4PzyBV4c51`RA{DhoNm86SJ!wcUi#`}UfOXW)ypAmL2RL$gRy+8gk3@;CR8HHCb zH_Ae_H4-C{YiZ6>R{gu_TJ5)-_vwmegAMw{>mAnn_b;@J{r&t}9nEKg^3#jxe*k}l z8;i#x`#N{O5OC|7Nh!)RdI4$NcuJ3}p*y#)Jcs}#(KerK*L@nmI=J)f?5(H!2AD<6 z0vn-=8AOBjMvF`xlU_()={L;$EA|YwwY6o*Sbo)<@#1<8rx1%)OXuD*_J2%LdgdR@ zaC}dm7oE*d>QRtN~B;ygEX z93JjntF6un>9&t+oq{6RZh}ewfaSdLZ>tL}MLtxB1Fjbou-O;?3tLT=N`HSxcejig zj5kEA{kj8$+twSRzC2t4Sd|En03w{^V{Hp{=he5);N_M?fa6=dw@5&2oQ!r*yXA%c z)GJA2!F&y89<&hs;5M)zoD0o_SeZ z<@(k5&yx^vjrT##Z=Xnz;gFRrU`tKt%~8?qSghh2QWWOiWVU#}{RH&q8E=gw@Al0O z85f3LZnk~%2k|&5gBhD$p)f@S1xg~ZzDghD(e9?pgg?*Tia?MFgj%-VG~1@^{)Cp< zzqut=qVi|6u66Qk?5wP;Z71i;e8E(1!@G{AfQclVBc!B@VgtNOKHV?*JA9yvIL?i3htdU5oxAx+IamzTp@cAve3yQ`}?yr2jn z=dXbkg-@HXlEgTR`%m-A6E#|pMh&wZB`lC|_4lUxu*+iZw+IQDWCQ{5m%R`qmfd;W zWs6nXYqyXDi&7j*C%_)b{2DNX!sDD?IcVH6K)(7iDdu6uKu^}i%g;*NMIMKZ*bp=S zECe-mt)U#%kk0rrGWDqU;J_1JB!kOgAT1w$Yl&^I?*P*V)+hoXT7g*7*Bkxl?DI!0 z$5$jD#{S|fr8^6(9~^wvC#yjn7MkV3u#P+6R9xIZGtqY1m11{6=3n+LrmyJwq2uk! z_U#Y`vp9eCO;P%dvFnSUOYLXbWtqB_AwN~^U*!$*(L<78ZscT^JXPKpB2d}u;4XkvT-lg~D7bkv4G)Pt^I-As|3XGLD21 z2ru=f02)c4;C`-(cb6myaUWK$)t3_xa+rg96}*#pzumLN3w#}dVUDb6ro|9BLaM>R zXz^_!5hhZJg~sp?XF6m+UqBMfC6T7ZR%74kB_Tj+Q4$v~vTYXiJv>|`jxW136ORPr z$^dznQLJvM2B-^DL4_lU!X?kSsU@)>*B4?F>6Bzsl>~IuS7~mGdq9-2?|$|Vo7d?> zBB{l;fAE977rQNrSA2y*2SZlG=Ww9-KjMJ&QXqW20r*Y^gmZ|5+y~@<5Z@4W)d*Z3 zuwPU0{`TNf@#e<-wq)K-u`sJC`On%*0|yOj%~MT7!>l8FLFs8>lmAR!`^m11jr=<( z$gdGiT?p7oCgecuQT^!pX5{0=m~= zENE~XW$S)~-+FWaawOT%V_+$h@2q5D|B`CSw|TdQ8CF`ux~bg@$?|#e9k3s0A2$p3 zagWrq0^-wzz{^jhLWszZch*PHmt6%I<6XIIq-pf+NBCOvY_DaQP}?|I`4R@ z|38XfvZ<~Smt=3kwTfh3du8vvw_JPA5;w9peMx0p``R;H`^wH%#x=9a{(XKP{^oK& z_x*m2bDrlIMVgNuA15yvygb303)9&e*r6c{AsffQ7GisUQnK)73;nnF$YX8ZA>8$s zrKxkHO2HH`9h^*Ed)4Ua;ptKTLPBi-INRs6P%~H~zrTykT5HJQ$)}SJONs7oy9U=I zA72&n$=7hur0#fmOFWO)f##w_7Cc^G&`381UhN=F8%??eGKZH<8d_V`sxAU^{+zV0 z|3G5`^Ub#J(}an)FVn&{!`?o7Vt0?8+zLC_m%{}C{Z;4Pfn@2u^XEg)*MpXKrj`#P z#I~gG=L@g)&Q4eE{<|MsQLY=zMLvmm@;P5R?3U&aP59ycHD|!~i02LR9+u-GRag~j zyWxTTi)r-JJefoeXA(qrUuFWRv+%pcOJT=?`KX5x(q9wr_Hx0Xu6we3ArO;z+>z2j z``L>-^vW-(`)R-)e&0~WxY!YTZFx88&mbA_6Dg3%ZtlGxfLb;SwLaUKE~lw7e;Iyv zGc^1xB3xJT8YXjX+=>V48s0w3oxQ&ZzrDXoExfU7n<;dE#2|UK-b2{Pc>lY`KqCF) zTeG}_!UY$15BJKv(4D=_G0#dQ5*4fh0C5L{rSqQ$w=bU^b^Ay>G`>WSg?n`TvgD(V zE-}~47)`{S+!UZ(_c!}}0)l+zMYfODMO^lre6}f5(5hWJ_zPn` zW9K?WTPPhA%>tEQRxE`Tk5eJW2ff04d_>-s!1+r_4{p1FY!2Cj9j0NS>0LjGXJ3u1 zMk1<)Vo8(Zi1vVB_z_?&WK{Hsxi@IeqcKv{&E3>&UH@4v9;gC`NpwhyCZR7R1E4x; zIt>u9Od$4UK)wj^7lg&HwiS%7F+xh+XYsd6}w`-9PZM_;Up- zzfIhuCK;G#9F8a|C%<5I4y7m{zQmjUQ|**N87=_sO8!Hpvs(dl2{!Z($Q2GKOYyj{ zX6kYp8Bit`Zz#EhpgfGP8p1+@kXt9gQSKdplowqn$KxsJ$O)n8=lF)Xn!qxy3{ja< z0DJl+NAx=-Aoh$oS};A&EkWY4#;*V0S(Y-1-$2Y-QW9N};c)B&z^ zc759b7D`_On;;FoannvdrD@bBse3iq>VL2)ecWgn_Ai0PH^YASsYX)(dCChAs7#m% z$Kq6-`@6mYm1PdY>JCu*beVj55<%8$1|lekU;+1pyX(k=w-xAuBE}@>`#}d9o1*Qv zH_h0CAF+=UrMK=zS3-9$!hY@@8DB4V_hi2NbM$wPm_@3sPRnA8q}g!FJV~E@f5mYJ8U?Yb-bS zu7l80mVxOLOkeP*;YH3?n?_Z9yL4-f); zWB%{aS#N{yy4x9LbAc3{aH8Pg;46#Jrd9z<9YC1sIHYNTwXc}ZPdwnN8%$*s>t8!M z-r~TvwO7U$H3i<@HN&w(~a>iA>M4V$g^`9-ba)%S`g>2$w( zc9XuOB|T=iXSysd_P%1Yyh;*A5^bd(eFGwRZ{}SsTQ9dZk)B~!yq(9q?dPkLTGF?F zPy(9Ybl)nfG1r`%?DmR2ElURDMqC+_bKeF7%RMXpJBVR~Hwa__XrK-caA>9BH+y{} zj^f|hL5tCZ)NW0dA>qf{2Cm112RE3@y*=Q%gBx00PCT{ryczW?Mi!1JhEeNujj=7= zv=#<3(}P0r8dZ4?I#Bx&xTWO#1*Z~8@e@x^Ql@P3=DSVW7xIrp?YS@~Cu z2Re+;x;<=XTyFB$qv~5~_inB@r%~JU5m!2J&!hur2DF|b$9Bdva%S(49lHxgRh5>i zE#+tkVaLjWX*F}qmwYxAitE=iPBJW+_Ls~4j2?g8FvGPYX45pOgI_pW1q0u0TiGl4 zT*Wn+VM44aHE%6=cab8w8agO=C0D#=F%I~{mQ#=j-N%f&S}qbk5Xhi6Sbio99(h@MMM1;6c5HO`Mxz6H6(gzCqF6HG&}F zU5IT>B|tva07f^wPwBrzD#JcFs0)%O!^1>O)W1!pUMJljtXxUo%$zvx)Jw4f7^avw zmmlwN(A9FV`f(y=q4XyT%z9LEdNchHZ3VQ^sxv~nDG$fXqrJ+Wt@I0xKCpjks7oVl z;8W$VuVG$SA_mQVgcOU$eHWD#fyzBELBt`ThSbDGR27xHrEHOEDSTt_DpqBq7ADFk zt};_9KI2N91Ftx7-(pC&X>IpSNY+b~Gf)@coFxM>5lk}yE@7hffAZXe!HhiWcAMjG z3EmOI(%xvFekU57Q`S)(4IzqTN`Rt4&=i@J4uR&rEv96jkDFjJ~t zbJ;gvV3ki&+9uHSXHgi@`;sM69h4>inCw%sz9gLu0!sRboE?I9*g;F2oUSey2`TQ& zq98YTh zJXe+#Wm4QC4hsooNt&WCHt$fIsZP~2LND?{5yl;J7zv4bEWU;?`;VP;wd763;mjCK z3oP$I;74AW?@;Ejy)^@+Hsvb>bQf!~YcP=nE%=oRFnFTwfZ!a%VH8TV7Y}q?te~C$ z*Uh3O>^7*X?5P~TiQk{EID|OGJ8?$W>FG1`^F#4z2xy##SdEt+pW=Cdry*j8Cd?l0 z?*82DieAeEPfI2V4`kGg2l^!X`SHq0 zJ%~-@AoSnh-9hKs(PnXYy|aO#;jB_WVI8^oye6hT{C3zd7ddk_J`*A}R`3D%G^M{2 z(dZX2r%mej@dA>CECaah=NAC*(VdI#L4`o~7!zN=5+I3cOzgNAZ|uBvzx6(&x%*++ z6nYczlDi7-kE(zY;10*owL;fqFG>m)s;_$EcLy1ralv9HV^5+zy#wNh$ zVm~Rqosz|bb`R9Gf3O6IsJ^`qj_&UIZiS$|vHmrDkV=H8QGo)uts0__e#QJW@2``B zM$_Fs855??1yBkk9*kmD1{+h@+yf8#FWxO*e^uMBmY8nK_ZXxas{JCtN23-_u@ZEO zjGl{%3rT+<<5~gODm&!#^~b;b$&>#2>O$%!k`(}aUaa4~o4Gphyj}Uf*zNL8`ntG# zs2Ldb-ZlcUDWj+-2kkS*i_P$~#IZba%2sy*J{lYlE(~NOVM1ccja~6)#va#)B*aXdLw>-sQc&iG%e9qo2tzlxDNlka}~=~5G|zc z+kdQ;$tiT};#vMS`Id@cO1gcM(ie%38$m(>dKvYb>Z{UUAf~H;K@$P}t=SHE=DgO9 zfLLAusgr3S(JFdw@CToJ)ivWS_I?2+7F`(c9Zv0b%a<0ILh>p$oA4f#DEaR?JmBR2 zSVv9IN4AK}OF*ANo8t_9*5mSXJ9n%tYXRNrxaJdIu0KmU#?FZ*F^ ze=7tG)|TfCI#;yhfzTp`F$N6wWGS^ra_p@`S=mI^QABweAS*Xr((GJudX;UKrV}HRJS$j zh^DPjbpRRb6yW~WMk-J0)%8Mb+Z_?3)Vdh)i!E>Y4wn~pdO$`7AzhN9SOC^6y7*!_ zl1yxUOPofGfEh1U1);5q8!=c5q&W9ns%TRo%bwVpwcUQ7-0m&lIuS}_8m3duAfLxW zq7a;qKIL}3A;5F(NgX%x>X>sT{);`quZe*($Wy73QZiMiMWPIR^gt&3a9A22!NF^6 z9{eK$LSSoy=*XtE)tNa4>hZ<7HOCcl1B6SIzK%fI$ad*CwD*sabtM$eCS{42sm=Y1ay1VxbWQkuI}%C*lze zWTC`1beNu42!4e$!-o3?Z5mrc#E!W=T9f2pCQ>Kqr2VscSXWzCyDA`4FTgj}a z!dhGgA(V22jB&CNU2!-O>vrm+CDc*)*b?w~dfY&>I$L?sq?{u|G?Frw4C>;Fe8R@w zKg5F$>;mZLbQNU2HoCFIkx9%X|K-7*nwc1MxitQZaDGn4g^jK?Qzc%UZJr(xVoRxR znXcsYI^MD};5@l()9K+GmXO5eDec$sk%|hV`F0kc{|E~dDI5!wYxFqq&5f?*)ABeF z%^hXZhx;Cu+#hb7o%!zm0TNWJRtmx66Jw#qM;x`Tl`T1416gbs2UjG61 zf%g;PZ)QbZr3rs3eFwra2Xk^BbWyT8o_{HW&p*;?mmKiJu+|}7y<X)L_W4>Jnej%o_9&iylKYx=@)EPu%k)Sk&7 zdb{7q*ep_Sf}ANYPk5?uq{%KZH1f^3%FuHE=lT3#MziqU=|+?s5#wD#aB=SYKAlZn zoE83+`Ri_{11pJqih4ag1s#%fG$bPWG*36)De@ z(g4_oP3p2Vay}OLXHj?_8|*)Hax1S)OW_90&_myxhbmasNpp-`MH}2KG*j0`|9<8B zVN;WJtw0p1W064}oag_9D&1Ez9^>iE`5F()Ax;s5M{u77J`Zausm zYnn!OTKra;!q-x@PD!2X=J+TdfNUz|iCl^*P(h{XI?O;$O%yYW%*QyXL4b@qS7v0s zudC!kSnis==^s|>O~)9vwv@!UIIe$YI#*G-8)6m%mU&}7Yr_y3wh8b zN7N^T%{yV56iHuj#{?E~az1|il;&y{h(kI|&CMz4znyD*)N$X9R01Ai)L~qJ-pBZ& z0aDhI5Fad1M9}C0NyJ7kv1VeA?2TP|USc!gFgLMlpgC*pA!Nq87Pt6XUps&P-&Mr^ z?e9H#%Glq_TYGeP;s-7ttQIpNa+V12bA9PH3qy_vX_o)}F!gNrz5chpdD9dd`XW#y zG(?CY39hTok!rD5Z}Pmynsut!I|LX4Nk9Ou^hlRDr#A7M*VC>S^S6;ZeyzWpU*wKD zq;QpNE*EV|5V#K0g*vP0rq#`n(WoUleQ10IP~7FVL{64pn5e5(Wl_o0MUcn*8H#=k zPG8T#tvlN(;RQs^&hccRHmFJpUzxqNixsf#B-GbF8gtuD|2a7E-lm2KN{v?ilrtRf zl5JvTAT4`HOF|YXsD%i3%x$Kcb-eU+dwTy`UL1jKWdU=#f>VFqIjM zvFz)rYUw~dy!%}|oPoL|A=z2`)hjb~Lj!|=qMt;6LM*7Zc>kmNGTP_H;jWH4$CFB_ z`ZzPs%AZiy{Supp%l_d}Q9}b>@^=`T2#+fU_mE!#on`k!Tq%#3xf@6>L{QiwC=IyA zlj)+X00KIeq$KSap&+PZeM0IN-guvtLSE}L;gOrJ9rZK@NUspVwwxbgdBy8ZDleBHAF?@tmQ zU(hfMMG`X_Mv8U8pf04kzt{7Z#|sw(57nxJFPM!!%(U1xF4bx01H|2zy8CBe9C)8ClE=o;MK;=CI#u{cm z&N(g3?u`p|(L29;mZoP97j<(5wy;IPj`x^gfCmu}w%SfpBR<@oLvqk=`qb0Y)4w)D zyX!1i#b6~{NDSun*aJ_y7Q3>bhJ?n@984=AD(h!#-li~Y@O>KJDY14Jq> z1@Yc##Y>@$5J%s79}$B7%wq1$=C`{h|D1TZyru z7WxK7R53a3gIZCIwoBF-;`)bprb;;o&d(1D!ga4mKnD>nyOA;HC9*x)Yq*eLN zZ{@X2EpK-WH%ONh8?@zry`NNidx8G=lo_xK^xZ50MBGnPUYJV-^NbQ7paHPomnX{L zBcBd2rt0grp|iEi6GrPsETca7Z>I=FuZ*soV2I zMiX02P0e|&CUdO{CLEjy7D0607I3_>H_gzAt!KyHG6Li85SIz1(hF= zd8~9GO-4_&EX513p`-S-aC^blK#Yha+nXcyjD#&c&UO{OK*7ZG>S4#02} zmbZO;)RY)p+$!=~vj*%;_^`%rgWrZ%vOdiDAg?>M5{!L+5OTwi`vx{Q%&uBm z>8O$-Lhz6o_HA;g7UBOiy8AOGqAt#6sXJHW-?xLC<+YxE?-4BF zu;=nC6Juj$#t+}6@=IdMI7cdSM*m88g!vE3X;W!X>Je4AiQ2o0jsolYnOWluQ375B z^m(1){DAv5RB+PcWL2;muB{LJHaRZ&ka=qrHSK6xjViXX_&`f%y?oz+7(Z z10L@MM`_|EW2xy7gvR9}>~4G&8clW|;+Anfab#@u2I{s5IjKuSLXu(vo2;3;d-&cVd00P=)Tdfef&e*b>YiOZd@Yk~Z*Zv~nCtlZ& z02Ypzq;U!@2v!ViJ0TAODNxDDco54F5P(FJw9e%@vL1FdgGZD;E0bl9(9#`iu#r59 zgyK1>5Dhlj(pT8aiO?(92~ljs+UB%EO29JX>9S0m)De_*ny&k46?qzgoB2vG7#m?8 zUgU$*hf4uojsZINanPA3?zRXQMNBi%gm`8vJBtevsNA%PnMJmWxVj4R^75>rVf;JA zUE<+~-CNz!gl+Lj*o4{3Q{WVNanRY0HnsRWUz-^#$vmtEIXwUg;(~~XK)5m>rY;-F+#DW-pv7-{ zA-l0BoiY)=*OF9-DD#w2h!mnK|uT5%EW%cJ@hfdlS%pTnEecs*01u^|< z2fhR>B&6vdF(x8WVlUKSzmY0wQ`sKAre%@Hu!(IC32bc|uwPN|HPP4CA7h=Sb!hka z2CFmUm(M3<&RGi6rXs-d#J{0RJu3AkXX4Lzq#aXQ8ehal!hDL?h(2aNUE}$NskI2| z5gLd=UY5Fx0?w@PhQ=^$)j5a{U?iWGd40WC7o1>@xx6{QIo=qSzR$cn1%@TwuQ-cj z7mfI+{TUg$hK+wcTvJ;6ebjCDHspQ_<0R4&ayBe@MHC)ovg_R_DJA8naH(}SDE;50 zOK525Y+eTd6=Q!4OS1oG|j!;%FB+0n2J2B>z6ze4QCq# z2KF&%*QbmjH~+tGt~R7kfatA;Hbs_{9}Nrc#h<5Lh-E-BV+A7)azv@A#ej4K{Z>dk zNK$g(#LVRT?$yzp@Y%>&qaPr>5p%zt&kP@JXx1s?2Sk)S@8Y^3EPmI|O7!66#2|8O zvD}9z<3L}Prj$CjJ@~u2tc=iPFX?=2vQI1Q_%8aou=eXO^zf^oZ{sea7*VfhdGHTF zEGn(OU+YqRwko%&>%$T~t1#f^P6ycVpMrfuLe@72v(bawcffbM(qla}QBI%TnfBXS zFK6#<+53rqdnojrLu=s0#O;Rn9KaeJ^L);r%1y~(gAi3jo~h0pEr0u^H918*BJp3p z!sjeQ`{E@!DXEtMO}mIp9fAXtPX4VrH)_dgc}7G&rLfw&=;7ik+ZYZ#$V{I3`r`oevc)OJ{D4` zr6?u_to=zBfGA8T`@5WDmc*vKS3D_N@RN#gK9il$R97fblo^M!ekDJ3g7`?fj@Fwp z(iCMuE_|9dBz->5U2t8>M}|jEkp(X>B;f8DSW`i^*%-EsrIeri%uQ9#7@D4 z&;tP<770>x2s2rympo3^*MA15=#Wz9p-NpKwjx+E5ExhlD?7W#<73Ts63?`w$-7fG znPHTySTm8{AEvu51nJ5JOh+$;_xQZ8;;9)@YQ7NwCiQiO5E zz%@ZY3aTaulv07r(urgt9f0VH)u$+KltEZ+t@0jZ&qGQKVQl)R9aQSf@=CpFU#!y8 z8ji=6O25FV^TBL<0r619{b-t1S~;o-qv2^vLxTsNp^}7D1KE9c?L`Ck35+TW201ahQq60bz5OgSmN;eV27wXLf`;#wj>?fdrscVQaSQHR~ZQ*4YPBqL~SB ztm*SxL@>Q(3abLK@*#jzVX>pPA=|5~H!3PjEa_Xh!rmJL$@1k78eoZ^9xlh}(fscX0UkxPfV>@qd+_2MQ8VP7k~Upu*Pe9sy0 zij(d-3xki~A@@W6=Xa~iZWpgs?!M(_hTRL9Srm7#+}?IBJmsuvJ05!P8E%DKn(yQ( z2|0LQNRhk}vi5sh-na5(1I12x^!w&N(=^9cN8lQ1T=>^-zUOf~`rg|6V0F1LXf2jBiaLtU8(tB{GgNA?Z!5igbn?VulXuIUE0JqHFh!#o=L z1vw5C)p=4;A|=Z~oVz~%HvdeW1aUke9d?d?*{Lse{w-l!rEwu2sCmWQiI_$DWE99H ztIBp3;aaGLeOd2wAj`c30q}iiwWM>#u=%tAW=@T!F!c6pc4GsOL@l~CbY6~nh7I^} zJ`DD`U2nz~yfR}5ckwJxv%DQ&&(B>6nl)*yY0%BE_id95!FF>VbfjXN%l`Dde>@rH zDpklB!p!&WgmBg$=);*NRZ<2d1Wpuq7QJphy;wc!**)nY2L%k%+7gce-Ih_Xyz7$l zX{zEx+e{lr48eyu*+GZ8=2tCu`3E7K`mUCDC!Id$eft(wjdx>DC>DZ(&Pp;1@_Pz{(a<0ug?iZm(;c9ivwMdE1&&j{jI!(B`}G0k*XqthVY8TSw}St$`s~ijXmX=BT;$tSBEJ`I)UH zcORkbR83>Ank#QdjTl4HMcv>@Rr}p=_}|%B@aG9qIt5mIA3!DHNnxB8tmD# zlgMvrt>hx%&?~@>zFubXvtDy6pDt^GqW{h7R!N1Q&r9Fm-FpGR#U-`9s=HBX_}R#e zceC44b4O#Ne^@ope}44!x0cBFX17Jut?9z@-cg7aMCzC5S!}A>JFgnqQm4Q(Et|Z2 z+xgUf0~hK3;PHOrN`|ZJ|DVWtOw+;X`#Xs3OLoxb zA|L^oCLjJ?$F&_n1}ZZWM&}3teElfQm+hwiUfR$2rP>L--i!5lrt@EX)4K_iD4+Hevf8OCRcqIsAf(N$K~LLrWQVxW@p zy#gQcKIY)>-{tT_FR|QM;6#awB3S>5z z*P}>T;B)BiF6V({U;7FB`ia>#TM*4v=#6Rji5p-l`R;9y;Tz~30+6mLIfd5Il(qvv zIBnA#Iif)xbO^7$30#+OFxfe1Gu9Eq`O35XY-XecEgy%{A>qRz2W)$B6riGY5)ifT z$2f{{T6r*FMlL;HhOZV#vVM(i=|oG4#Y&zsNNDDm1Sy8|f(GP?M{*(>_&~1T;K#|~sqgTB5ueu?; z)g;Un=+o4_p0*s|K)&t_mVpJIP$JL%6fPO_QM;9&rK0_Zx{f6c6UkkbrgGb=ZJ*B4lH#+aK016mPh z`{tCe_~=W^5rlkp9f%maTD7?-{Ts}fo_FH(@LSx&_2>MY+~sq06)$QD4l1Pr4p$NC z{#UPVCN=)!$HaniBb#EARG#X_|jIGeJjjm7-P8 z?E`^M^66twSEON~iEHD6&GYSxYv<0TZ67`Km$Vns|Bqfp>Aq$z>Nb5^BC0gn>VJt0 zPIzth=c9jkh!Nubx5wcZPyP3Ca>uYXHXorLIsK3a_?SSWNni-${}mAH)XG)?==C?> zoOvilA$9aX1h#Vq<4D4IJ%9ZB?Y)J5#(&pql4G2W3^WQjN+Q|nK8b+^31R}nLK%5+ zMwUP()-LiR3N?zEcPXF#@pbPkXvhHYN4z-GoCv-O***Pi;LD&m3<*0NOad4xVLqpa zzkB)yLnIjtukDF&-2b*;(Bph&4>Ha`T6VOx!_OCk*0#2``sLbguy?2FJt;9_aE+Z#ir$w=wJ!$<|Db-n-1kiSsO z;rjY|-Pc%u=y^7LE7rkH-=STz%0uGo_`fVe#XLj-7Yroxt%8f>zxs;~KaVhp9Ccv0 zNdhPcwPo3cnsgo}BEHPv78}f`LJHMaTJM3Iq8#b3px~3U;p`uHA0((bS9`PLR|-pc&qu{=vmeuod?Cr z8LgLYVUmnb6uL7q6c|7earJR=Dr9m@$DdJK7J46lV}rtW}^K2ka!T+BvD=MiQFf% zH$=1$Jtt$tcsUaQkJ3SCtU~38`4n;cAn~iXy)s>3?HGWPu_{NDwX^z~pi2C)wlU31 zEV&Vw9h;}WsK6OJ`h|=Zj@ia9T$H+$Kf6;CX!o%%o|SN}dStk(^6ZsecL|p~LQ05R zw3m*%nv(%#z)~jklAAJ~orUBNjEa?+q@?J=0?Odx84w2*dK1TmlCgcY3h zp7$01yDmK&?CcBBC48{TuM$<@g$pyr*$&%G$AYLXM@bz95(VbBjOM&;) z?wO`Vn1(91UG5vRf(Cw|1t|n5$tMD4R~st>wR=bB?C4*AF&_H*zn9(@?uFLwA*C)B z;UUOX9*)sUP2D*`y_yW@k6@wgh^oB{t#mh$imWm0&`HGMR00k8kzF3p=C>H^w6Cmk zvm6_D7peHJEEpwf4oPAkmWo!vC$|B|@MIT!$-Y#(n{km5OX&M;a;{?tsLm(OHirSe z#(#f+^$phI?gro`cKn;)z@W~H?`!Yo6AJySUN@TYuiRN2EUeu5IsU4dp?*ETxEOd2 z7XYYR*;K$i?C2{uNM5X9Zs7ztkIn%_@x{%_SR+6b4+v=bx4K+6Ce^y~R{CPt5)giN z3e!Id7N`v0)(Uyv3dW6;cz{T$?1>bi^~LT%t{}gVkcu4}q3nO$ zK)!omGN(#P$o0&^N~bihEH`_OF`)fAGiYq&Zw$_XV-#3Q5F-y{CR9P6o0l!j&8OvX zx;}`b{KT^v=xJJa9E5^>YNf@Ud`+Xt!>*za=$AYg85q94jfF4kmeaJ*m{vIe%j@4c z@I;_uA~zog_<5#F20*ahkNt&4t?-r6_wO59>b8BZ{w;q?Vt74&5v-Dub*tM+lD{zB zzH;4j>7vAqqJ1&8mlt?7(Wh9P>}yqHG*B@tJ{#IuQ&VFX34fH^qWspIBz2Ht#H@5K zy5~LcW9S&i5|ffbGk_v2$PLgjtIkP;1h`oju&JOBdGNumU@V)E9>SRY!OJ%(x!~9- za%fB!r555WM2%Zi?EwpaO(L|`o`_kwy8@c(NG%VQ;__p}CTZYM4f21S31W8!`ZP^m zTLifyF6^|8|HlAXDx3BGBbFhSOr3%p1>=mfZEg@-VkuXC{LGAciK;6RzseIL0D;Gj zRXB1AaePVtPE`UcQ8Os_#>|84>K28bpU>G6lP8Lduo*G$g9xJ`wi*0-vGznCRuvFR zYr1y8s8{s|gf*1`%0sGA!mSHLg|gaKM5=m=+bWpP_IWj!@$W~0WYjhWmHK1B`2ySrAB1%+QaFy8iDgm}ZA6enAE?c)qGOF0 zq3#G(6xk&92@ra0Ko*(?8CQ58ml6wM{p1iSEF=RKjKd>-DddnPf2jTvMpkLP3!%&v zaJbn^|1$Ys-%FM?hFGw8DpZjij?6;F=*T_QgiTe$dV&8O6obP=4zz?O6vjLGf<1-U9P z5SBDKc#Ypj>%Uq8en&5_3Q@u(E9mPgDv}w9wgP8os(#v_#}~wn2h0+iK=3{PZG<{NBBMNF>eTOL>bKbKHKrXJd=GjX>$F; z-q_)3okwG9UC<-))+Gz$jPiCMsglP&)R)VkHhtSC2loY<{$La!)9lQl(TbfPXG|=f zptBYTdmyaJFxU&<;f)EgwzS&uy^TPI2N%mU3E62va$vbQ0te&s-hZZ!CfumGSM@Ws z{l3N-9vatN0Jo4#3)JuEqy0|Hh`x~AAg)*7=9uP%m>7p;7>aM$$RAcwSQ~J8ym97z zOxk(ayzCkF%)p@1hYiHH{ObDesQ-!SbkH<6KfIw9Su&x6x>m_3c`z;No|&NjA_ZR6 z768=Ahx=Y;44zdlltN&OfHSJSxd+`YEC); zRjC0_iJKRoGQuqYqA+NQ{5lxEB(2@ubu{O2m-sEeTyYb_&bvu=YRq03VEpDSP1nZ6 zbRsT)w;#-Dtu&pNKOO6v7ofXGhG|R923G>iBu*Mh5S8Jvby%LF-_Z_=n`Zf1j6ac4 zO5*Sv@4`Z#;fH1A!sa0B8;oox>g?3eVsBs)knL!Nu}I&nc3(7f^e{|jw6qvX7u}qVrqB0ukpQSe_haW zUE|^TTTwG{&G*H>56`>ZB$uJD#!fMXrmy?O1zG^t=9bsNt$*0fdw^E*_pWXl8@AR5 zfBM>XWMdU#+fl*MWVBu3H*ap@iYhP_=D9}ZMkD=!1yAKwOC6@wC+iRBnDLe*gKOXpyVzIu z#$i~bbPF7d5iajM+|2cK(6S$O5GDc;m2^vh-@K>~05#PLW~)&UgsX4e%zZDwiKtV`aiba9Wb#h9~)<+&j5kR zn}`4Y1=JfY7QW_ZRKhNHiTxk(c6B5#{PaEJHJi%F1Ak5PCO>Z#qU6{9mjIk_JKocA z-;v+)YJRrE;FPySuY84jxb~}kUJ2Spd-id8v}fp*?Qh_qN15y%bl!Iq(lFy01aX(V zD1~y!o-Q}G8MkO-&a4E9Mqqyxn7g9WgknI5FPo7Na%LyAY<13CIW|xV-;+EchDnk9 zen4aW%P520k(h7;eXT)YtH(xO9NUSYdol+Gm zfGQ`|RhugS_HDWyGv0f_v2T;!diwgJ4?r1;gdiCUuYi=bGV)1>)PX(-I~Rc)J+*2% z9}y9lt;(%kXUHY*zK${Udf4*r*wwId#Gx*A($+F)mI~8MC8A{UrlfvT|Dp0Cb{R6_ zLyW^a$V#aHhG+zev*=B2^G2--#b3<9l8y#aJ)qO`uLxx-(wiXU&p~f~%D@Jiz;WFs zyd{sILe_E0RC3V#avk<~nwdZwSz_iaFHEx{X+lvPzsBU$Bt`Pu@5$C5Mkn8zSB>R9 z&xTqlmt4d{9~m$y>Oeu16YreZ2qUJ1VIq7Ka#4z|&pu?@bEls!yAA^-a-iI4x<`RfFHnF@<8N0M7Jf>Y9;h3?l-1FKmB3YF;ETE>Mn6@l2LyU~f_Yo{5y~M%rJ{jP zGKDMmoiY&;>cfO5Po8YE5eMaNLg3WcT1co+_6Sh@jI3?{2YnYH zM)wf{yn?@KEH-IVzN6n2VTN#)oLq$zw)}_LK(dmsE#j}E$!=H!tt}OtGv?E7nHhf_ z8wzD@CRs{ZAR>16Z@n&Ab*|Ft0in*v#UBvm*Z%k)lQJX(aD}#BDbq**`-u6;eapGK zqU$G{?e{*Z#LTLiTdbT(-H(cdk{#NNr(z?C)yf;nQa=KU`zguu{$Y_M-z4ItzXh(1 z^ZQ2CrTGOCFP{}wUIIu{_Jb+wL8B3&5)ulQ;}}{~M-|oxa|3Pe*fa0)*}|HP^2Pna z5DHtj8QzQ0ef08wGgs)=3!R-bn%}p%jJeI>2k#lF3)_Ikp@ijj@$;>^e{k^iyi*_K z|+d#kjobP=C>uzU$D31o0A7rE&;n=hmuw#&Ap zS_*jPwkGjQwVnNbKfS+!eEK3pT{=OOoaVnjMcJ^IM&*o}=J3Dy3nL5eH zf8Vd=dD~?7Cd1y$U}oO`K3BSSM(mx&K|9X?EhSm1)Z{YIXuO_YHkorNq&>a9#WVx8 zgF#~p5UuUn=<%rTcQ*~^S(WwQ%%maf4j&u-y;?6!P21wiievxVX0x&yGyY6#6KtTx zdwTK*L)})Z{6mg~q33(5%`e4U4r5(ED%u`Lv&J1ZG~HhT&hxuEL%l{`bRX~!Gt+Q? zoyW+SVc#ri4tUOvG4*`If~d*uG#m4bjo4+));Y;wAjZ)$JoNg{JYXF}Di!|S+Y>2R z1~yV}FT7B*SGO0-x*C5wdc#-lwEdhIq(WZOT@`Ur_h@TVb>Lyd(>T!IN%%`D%7cMM zT<8zvC;3}~33y^uvYF5@<%`Fu#W}(eR$hXUayV3V!UWO@4%K>L$@K17nfe!Jo@`vuuIoX2T8f-`b3|Nng`TnsPdX;y2c)NBgE=n7 z)HU0lJ5w$I6ZkXH=#2rJkg=j}T3=Dc=83pKHq_#EvXA$mIOm~iDc?Adaff4Tl_kgG zft+bE&~QaECC1795@8;4|M6eWWm4zG-4(B;$^C?|*1cvPAIIZU5e;1rdJ3_$iZYgT zTJmO)83$Jgq96_=+dB}aTI#|=9Yswc-Y7F*ONNuBxXQ0XG?1RGGlPa>SZSDJ99p&Y z)1x9F@}JoTsrgw*YEfX^oRQ$dWQ0Cg1&}L$f_{c)#3*N=$+aL+Z=Vo6>cxA2Hj1t@ zqK<`^&5Z;o%CN8bc2Ns5!(`J@4V<)Vd}p$%;&4sZ=Q~@WpYMj6xO^(~ibySmyV*dQ zWY5KQOC8jRWC^Cav_#Ke2VeiY8Y`{dpyFaLvo!DFscv`3#Xu<_qt6SaS2gG49pH-AW!jN z^Tw)KGMP%qJfwo^+uK9${#-dU{FV9$d`4_vYfjczf+ASUz;Yx7P_$if8P5nFN%zoz zwu!fy^AlERHJ1${2BpsSA}{1kExD~77}PzT!lFehWb+Q!OI`3GGdRfA9UKMaXrq#w z>Lz9`PMB2rdr9YhY`^H6n5y&E696VwPqp?1EdbKD$-vm2clDiR237j=J;X=&iQJnSFHE3AU`|d-Sz81k-BoRc=1AnKr|m+IMvLW@S%yK z|LfOTXHpMIEGS%Nq$F*ikoBjte-3*mLo6}`>)(;l5)ALvvaEh8*M6|tet#}O$4CO0 znREEr9vT6}tm*B>v+Y2ZG=2I?!_w_c98g9x9|FWMDX#3RKFvw_Q@7?P+IW01Uhj4v zPP?Vt-16ygJVHL&Uq{+MaIF1g>g(5zWg49i^MO0FOdl@0_ie6jUH|0V>Ikcq5iorW zJeARQdUy}P8@rbW^0x{imv!+qnNG8o9>E8Ty2heGtLY}2cctb)k&`m6$qbR!Rf`0B_WD{m;A8@wR{;RFqE~^<3{LJ~o?{U*6 zFpTRpc+y0p8p!L1Pggy*J(l;!o_l*|S^?chsdGUhCKBMAe?g*-0K=Mbc=CMtd{XaC zTeFnj@y-%?l0Qx8zoX;Q!>g;%=6vs;=4Uo{c#T}sS-D6uZ?pKGJ?OkxeS%&9gn^wy z*>y9)dqs2t+`YsUXiVP_iM)k2P2v&Farf2L)!_KuHv#`r{~a%#ZNGLzl3{`;hA6n{ z0}_$wEBCIY-5Rvz54S&&4>TbuSfDojXo5#`^U&Q1#gur86JKWtun}ti7Vx29to3}? z=6d^j|60@I?i0NEFp3Mp%m9-p6p#4&6UAh+u(+s0!d-OxQ;mn^JN07lOz+STIqD-L zHT{CH_jc$303m`u`THxHbMqylNhvvNsnt*L7!3BSsH$`)7kOPbFMeM9$5)EB{kX7BNb4W-JwXQ+e)6Bt-hubX$o+s1 zCc`WcbJ8zdrtkRD|C)Q?#+mOub|i6v_t;BVHLv=d_BP#ll*%O|LtP@PMHi#$WX4!@ z=WS5JVvvM_Iy`{K#)#`3z6PoKqVK(y&Zx81OlA>P+1g@p?%1Q_AFDIMVJE+$&C>bq z5yhv~|NHXnUpNTKFB+xx4r>1x1Q9OMW~&g0WiDo#1LjK*2(y~kZsCKQ!Ul`8kEpxT7_jx{IEW`9ngq@c$T7>#!YgA-oY`) z1pmqpxN>=ZW&R+91uzH+#>R#Ls)^hbDbqEO34KR3(S#Ck?x4b$0XZb_nYM-*eFLnb zR68Fbz%#D)^Q(ChAF=AgWT?GgMH+ehdxSD)kyjw?qBsNBl!AfO$s7X4_;%7fbsrgj z17iE^4PxgTIE9kR!c18!Tmi{koGvH!LBrciQ%tS5QUCzAn8xc-Ad@YXLGeTwT+t^a zQafNBqmkz)X*i&#!J+y-O+#ZvhMkggF+kj+rS+fZ$25O6>(-WN5*CN?W|+rgNqfgd zm^w=mjxk(_PLrFch$?(Q)KtB}o!9qI#p8C}f=XCdP~eYZ*=!4vEb2L4K`044lXnH|YyIE4W%+uX%I>Bx3zTR#rCP z^ylBp%XVd6CQuphf^7^bu?tMpnfvAfx^i+K$zWd`#@jd)2%HICk)ZXr*rUSLY@3l7}--B&?XoCnI*Pc+gO+O3J(-FNf+$nuysyC0$Zz4hgcYNiM#Xo2ujTjV@3& z`y=uDG@N0|aLTDpSwqtt5L1IX0i3s@jowZX=q4o z%;QiJ?1tW=eHBmGj*v?_H}C&kB26go?VlJI?r6Ld-C(|+LMCp$w0RD+9@mi!8gHr zef2^FKyN|}?K#SsQzji9A3rGrLU{^PQjv9YbB^&3wKxq^IsQu9gc)201B}Caqb2y( ze5sN&MlU`3ZR4)V4P_eH_4Hao>xZ_Dxxt~0L7+x%ccr+8paxcbeQ~naH>%}lm8&^U zIQ*CG7PE$burQn{@_F%xZT!pUkIA@-^vQ9KK)|M%F-;%rXqF*o+~3wyl=l>yI!Tih zSM)tsm&IWf?MB8eY){6mMG90WeS3nMLLYOkws=!egFaYdo0HhzI~?R8vU!l@Z*Akv zz-hKHFepO*!=H)Zy88MFR*5wIq#gjgszCd2r}?d8NXXs!|D);qBP{%@c_#a(7ZRKt z?CjOwGg4!;8vr^x9#BZ`ljS2mj>+dIYNrYfv&JBM>`uR_Xc=?A*p&|IFs^di3KlaX z2mGhZ2@$l#lM0b_cuUR6GNw0%r6OPcD==U(CWE4o>1z4c;RfAPMN8Fhr zbr}6*{Zsqw-|QEH;6V7&fCp_{HR7XuN2!Kmt_n7r@hdfo*OSoq z*2cW>vqDHpV`0(e9tCXNBwB#WZU{m#5s}fX?CdT+RtB7)0pGnTKaz}olgADy_$*bk zjpt7XfhHkX=DQGX=9qHSGZ=JP#7+Zwojmmn@)I<%Jb&vpYu{P?%*H~OAS+KPE z&hb#=HX%(9b#4-DfR`8K-dWaPO7Pj(#N_`9hA<+eTsD9iyYGE9(x?&(p*vyi`Sgne3J|%BBRX z{k}(%IPgq87WQ|5+nJ*}T#{r~xxW#*5S3c7VbJlLO%G$J*vKGT+IVb4zX`yQ}B7H+2Ar zK-ix2mHu`2^shmI^>%8#1L+St-p}*9x?^`^x;ZD^K8#U}$4QM4$+*NEe=P#ko}#RO z@fEDh4hULT4FW#cdSPCEWIwnJ^AsX`De}%swkUGaIJrag_k}$eWz(cd8SK9}$r{f|WtEVke&q zv8ffYC%v9>t91@ES#5H{V=z`VfHn&>!pB+n||7IM1ae4Pwaj%6pxpB{CVDD)EJev z5uX26)!lV!NBA~4UjrW)(do&L`WZun6Jm7`6|jz28Dd#t?GES?qp~yBnZ9u^7QPA* zVe-G`ETEikv*Z2|tu>BRI`Qt3p0n_jI6KP!(NOz+4=Zbj-j_m)Ads+WkEcWMgCFe3z7zm{{2p z)Ua5eSRq-HsJUU}(omH`av#9wymm7EHZjvwjZLi=dNa(US!pK=9O=vAzPm5T$S_lq z?$@>Djd4pEPcC9-1$7@CPBE3vzE*f{vy=BI=+&k4htQpbF`MWoMBRhGe}4UQrdLnp zwjL+)AyvsSI_!GQSME~J&pY(rS09e{<5DgMZGaSKgj#k67#z0d9!$Z!uzS_D-@erR z&K7q$8>;~f%RgG#oj>#%iAHUZsYoiby{Ur+i1d!$Won}-io)20RL0$;`zME5kQA* z7yU?YRL+}yJL+0p+o7P?Leuc)s?;Gnk#!l&f#>(c|1 z`Og53N&Pn~3K)^m=U>7bPW5K|+$5LtpoXA2^b<9`Ez7w)ZC=j~bZkl<6njpqDw@7? zMmjnCR$9Egzq;8YXLg(jR2@PbUb#n4+^yl%<3vX?&H@S^gVDuHgdiq_&pZ3$0NH^Y zoPa~hyn2VEPlbY=?klI&OJhsueQ>OIL867KDLkx(VmHO5?X1kR!hiP;c&0<%=k0$p zyhZb&vcB`E#lxFpCKzeP41}dAxrht33;G;OM;?({urg4B-qWG!8HKT~fx_CKuytg^$t*D%-P*9c z5Ietv9P#2rtIuD08NSSYv6Xuv&jt)YrHiJU@r>x#k`eT!Y(@bvrcocUY#DhOd3nLR z?g{341&_r())1zYaV*~s7w<&iOAGGD)d&$Q|J8IM9X9h=aDZra&~p9x?oouz$!qY- z(l2xsy#w^2-@Q*-T77fi?I4S9W6t4QV)~z;`R`0Wk$i?r{GnIH13kXUT1o|eh#vGep$!_E&ohtlJ?R=ajv~?d< z(bjMpC?D?WvVG4%vdj~6=jVv#jrST%4fRoX)_BUHtCoddzhKSZyJ{2fWeGSlF7Shh zo@yAE<%Pg+gHzQFSncfW)JH~&WPRW&cfbz{?(x-|*)litd4BRj4w$dz?O#oxv@)NE z`8kMXXH8$xT=%6upxJdtTg*WBnRb5sn4fRE+Hu{-*K%~2UI2x^&0+M;4Q3J#Wtrkx zX*#5FLy_|+;G(Ik@aRERnKYgdytuwFzE(QFY2JNeU-xNm^KhoMhHYodrp4!`f3SK-;O|(OnY(xP(=l*Kk-s=lK>0RsdN?8EK5gm4d zLz^5QAA1YR0T63K-|Cr9!+rb39N*RHjw;nl#`T)5Qbi(hgt*ymhBKGk% zKiWe8vj)Ws0H}X-br3+9?+QN0&3kuU+MTVOoT#ZX^+wLVLn_EB$y{z+^iB5Ac9*ai z897N4wAwN$fJH^TT7D2WyXr#}YxOFfbn5Tx(~F-!fBuSa0XBk%AT{20N!EnLjzhRv zMnV3MM~aG}Yr2j2N~XxIQUMefw`_!(dcq3&G5V=J`U_tI5+khU7*jnY#_)0X^;=_p z%+5U!`Yg4EfUm0AJty+wMhuE_BuO8UrVd6*@>AT9W&lq$n~F%dqh54aLJ^+lCFSDJ z^6ve~u&WN~9Dz=su5C`6;Hj{>wL^_`(9htXaj0F%kH+-zP;Ms>9~py+`a9%Y-$2NK zZM+_++!Z7cmBlTHQa!?(r_+lurf;+g7xLI~8pX44GpMUPLDe7in~5IZTt9_R=Ehpg5Lh^t-BqP$}gN7t+9SDNp)yrvO8!X8k!F> z7{zJ-e0Qg(#c>-k5G&i~19eDf3^55&G^m^BE%Nqo-almrQcAPOQ6GG)7}L!7SMS6X zs_4i<-9RAsH@?3ZKztT%20UckPfB~!=pFUxAKeJQ^>!pG4S7&WAtzHV`-#bhS&NrQ zG!}&LW&lz8aK{9;`QnT^|1I_yJICkt1lk^6(%Z=}SGTr%UVRDOMSosAOPBT*+N*9` zDkvxz8&b2QSdf3*dZ(6Fz^a#-NZF&W!bH}^DTb^NtPOoio6N(CE&@B_iMv&+Z%f2T z&8@Jf=vwBa)!$&2>1S~`aLsyZ02xu+dsEd)> z)2s9Kp6bouf3wB?y4J{n;(delA76hn6BPmW}lb#y%i-%V9`LY-3C8afMQ|d)a;~ludFgDfRAHbVMAAUy|}Wsn{fB+ z*8?SyrIg0d#(WyWR zh20B+p7y>7oU1nGRJuO*S2|rul@47FGol{q$gTCVZZTu9t<|2SImF>~{2YSpw@ z7kF{D$E;dHFIz*ZI1<#wvu=?gWUk$=$0t)C04KfRoUoG~&d|WW|5tEcu2p-@=U(P_ z*pFovuU~KRbm5gCu3ifRHntRRwKugIEgm9_fq{Pzouj9_W1n-fb zMTFK>-3NcRvC4LhRog2c zpAt;x<MO|0xB0wEeY(h+=r6oor)Olu5TOd2`_2R_(x9(t1AJG= z@#B!|{mK5PlkDxy0LW2(mNF*le_!s=(vNGP&_uekJ_gxE-L*GaaCN|Wv9n|)V)D5r zdX{1EqcTzhrUi`%b!x z{c;s=SX&fzc~gIt2*kb>c#L;5BGZ}|LL%87mj`ZToB2?3%Sgv)qA%4ZB@|!8e*%@I zY|`h}3y9HG^xg&)@$pzBOFTv70DPV5=VH(Y^v*9JAp56!)I%#->1V5ff}aHA__+Hl z-_n7T#^oHu$onmu$qMh?Nq)lnR1>7ko|`AGFF_uT%+4m#Lt)PU-lC}q^$T2%JAcW+ z`6$r$WjjZ+sw@(!>MJYY1MprALkb?Mp$w<=koWmOz9}kPdVTSKase*J%|h1`r`Yo4 zUb4b;^OvzCHwH-P=3lvb)h8MV%kZaRUPA)p%DM8Mp zICGEm2o+Zq>+a2`lWAG$`a~T~b1TXzn&i)#k|m=VsY=-?`5|waAL>c5t1_3rsdJ=+ zrM*U_>4Wh6BWjs)P^kx@X6-ry$gJgI4qNnB(gO|CPuTZFhPZ5Ul!jI;HY<%yA5;`l zK#@_&mmBI#QdYe1JpV>DW_-np%9uUWrE2knD z`+zHA1dntno40kxC%|O0?Xy^rKWbOaG`!ZsRN>Xp)Bo((BH#_HXOn!oyuNx;zTFks(mS#WyFln(wkI6nj&eTgUTIooehgl&yIxp{9M$U5|d@Wj5)3Zs55!I*iO7cEuA8gG1*;fHB+iq3`Xj=H@FrfE@X& zDJLtdNBz5=^Etk<_o@;4^~Fr${1p4&9>5~|rC!KNc;fX{aOgj?`9;4w4kIf#M{n=l z36Hi+5t~;@)CG@h6ezSHl}Dx0)-5fPjj%=yLcNf`@RIXd$EcQ?UVQED&i3|K(hh(j zVka-Vz7{AMeWkDzwzs7g_J>p9_0NK8AV0u)@jDf00~Pe?*gc;;eRNZFFBLj%b5+%K zc1EbZE~dHOv;QPh(B$l0@;qtRvK2u%N~i61Qw;lSe>AvpS?cC9PdBbr@HH~3YI^9> zvh#9Gv1%a$b99&xvX(#l`sFgZHu(G|P!N07A|1Lvhw%V@{sUki>N-nUS_CxcNlPQ6 zn%5oHA}_A&Pk)d9dfA@79&lw7c6<%aybJdFzIpvG>1u2;qR4CB-OS9Rzg&2xxXCA- zX7!K__}%krUv<}B9-(Zyf-lefgoAb;C0XHQ3Das}!NE=DwKKW}k3b-V+GWzx`4gAu z(XU_Mc(gI!vrE<&TZhAB1aD$Zqq5Kff-Yl@QS^>hZ}s285x?Aj3m4zzgTO7B9dZrR zr<;~UV(ksLxF80;IP}1saf;AS9UYemwU>Bc<&HXc@OUUQBs>36F!rtP^lODCgURE| z!tdrIoJ~EQE%n`g=A$k>=mfUh+)*bv4vbUn6AJ^HP78k}>(HZLpOr5Dx&XaNiaX9g zv|5n!mR|?~@%i}ZC=e)QtGfxJ=Q1a#b#0zo&kJC)jAVdAhs79h&zy_%gS(aG@60`9 z+XnKUQcDYqK$uWh9m#C?b!2^>up{@tVlJlbYID1_=CEvGV}t)jdW|6}j{nn>#BxL5 zcN>9v?{1FT9%9SslDD_D0I{XKfYT(9G12 zEea;w#nP(cXPFI@&1T(K0oOS1G2o-c(ixX$v-tI8vyrRmeYd<2Qak5-sH%jDy9*aZxx8shgXxa5@A~3gbBi@V#A`a?hK6Mgc z{s@p9SU|oaWMAmg^*_JA8IyJ4`V%jl(gb?OOql&_ zP6YbQ3QmB7XqCB(elwB`sjxUiO3GNc#9Ga~^ik8Baz{g1usjn1QcpD zu1_@=vYy8;Se5~XWkRgzUMj1zjf``@iUir3q4MIP<{mqy{4C>zN*qXvLp?(sT#10@u!AAybbOUgFsT^p;Mo{gR#VVL5wJgUJY3I2*iIWsEFEk8S^Nfs<&A6v-S{52Jen9S$F>b1XKRuJ0(ok!= ztXGM3ctD(!9YUIl)zi#oKvK@j(}-__-VZo`Ny}DaLTx)3>alo4 z@C;D?u2Kam^;m8vd}2=3<7K;}`mOYcn~4QPsWp}dx(m+k#xh6H>E5i076JB7QczA!VP}_XA|R_{hB&YG>SdW5r`L zlzQy1JwN~Rrcd0S9>l(L2T-2d1kR4b&7-SO4D{;n+(E)|&2D?xjpx=86K~ru>bW2; zwm35=_?;i&8Gi!*@Lh3M6NBq?rO%%}z4Vz3dwme``{c=ddp_xF{Co4h97 zfcu?RN}ADVpKNdEWc}M3b15jc>SCz00K1?Ha|coz^ZWH-+nAX|rJz@#p^B}q_#>1sSjak2xmePE7a0`Qpu`J=;nMqFsPelFM8 z2qh%HE>&xty*LNQEG~5QN=uz}c17h94$lgL_jBiWt%Fv*yFyln(jQze18l-4xtA)z z3SD7=p`nYPM*&#!{F4SCNKG@tO;QF2@woyCi_U|t3z6S03qSE|Ro>P)&!>UkOrQEq z(E8~3>FMd^t`a3@Xx8&D|EBu{#=?&FIX@`m*jQTnK5a7DEq)*%Y4`YE+?z|ue9BZs>0<~7{e<>7oRX0nN1mFGmEiBpj&Hd zFOJg3!mjo+kNj(`JLG^=uwE50D6qjZXRY9B=NJHSg`UrZVfe`TjiK1aDB5nZAkQ8r zgMY0|yQtmAP?NaH>93yYv5OlEqv+wTmhHUa3hpAMfWMPKQ>m#4r&6fGnbIK83yC?~ zJe;4wn`C8gGXs4@S66;u=xD3ZFmo~TnH?Ij+U}jx*VC{Ll$p;Kr*`i4w~WZ{4y0w* zTF!O=z3xXAGY?W@?oohVZiDyAL--*6U6&2sq5HY+fMQHDF`HOyFV-w|7nf_yuSSp9 z$*UwHN{OTTPUOrG@glMWod8uddlVLWaADtsgv!-|$Mw{*YrJ&x5c+%`M#MU@6n!If zt?UlIQT*!PB*~TOK^ytrIlSmhC^npWoQxmk_oX_Xw>Z9@UOWh^f_XmQu-5FBb2ZY> z91WM}qJ=}LC@f~~XWWLesW|vL+NrCh+~=kxW-O&mhJ{Fq<;}7(w2{&pNTj4g`5?xI z>k%(Y8r_L$Z*Z;WC8Ujv0FBWlp`yMGQj1;3CljOo0AO?bq<;ZO_>9An#~F-=7(aC} zQ%A=Rzc&-i*RPjD*41$rv9o%3Nda*Q1G<~kd}qSt#D{fwfc~mMHm>gqK|Hjlh z^d$8vDUMIU+!-4&B?^B5=Xwj{qxEw^qij{^7=A?gePjn*0Gq8*-|p~BvI;TAQNoL* zUV4FH8)+%GLko@8C76Xf?7wKRg6cL%&5P|p11unt7#NAhea2#dIm;#AIe1aV6k8d`@dD$L*Gl;N7Tu&zGG4Q9x8@rs0mnbN>k#6Qm zTZh+?|4K{qp^5WcS89=!dTBmHdc5OmQB$EF&x)dL8MzCQy3b?FkdKiv(~UG16}Qz+ ztDlmhQ-wL}*f#Jzc=WN5v8Y{D5E8-c;n3PV1@GvM!cfWtjE}df-i+aXSC}ze{(O7~ zo2UqfS4{YVL_JdLJ#XD-&IdO*R%7vosrCN2cRFXQ^&Eg9{Et=p%l*y8pJE`g8UU5q z7oH7;f-8uqNN;S!cJ4Cm$DIb34^nQ`=%Cy9OPjz1EO$@ z7W#1K3X8`ZFEqj{J_N1jO`QXUjI4|!3KC61!{jT!f7>r+ge*x98=%sJS7sYrIIn&) zS!u%~khFTZ(=k<-nJIDo{+tSb zuF3%7wFqU524H@!{u)JFH-*$-QS0Sbg<&~BVp)0n)7IjM^#cH1`@kmT@Xw#}lM2SW zh^j^A5ZuD{Tminq2a@ZA`=D z;MzSS43Ox%&P1eZw{>{$v_#3S-Fe)%}5Sd)W8vt8vL2LOmCLaA;<2mwqWHFuF3nrDYvGi5fj-+_>^^#VI} zbSJry(Wo@zOqWlC;2#O059|A*b~l*T0drQ|y|CjQtMhgRr2@Y(W8Kcf@~0G#36(gq zm_#V+OQ82gVB0<-^5kG#oURY+>Y!~*P9?8Q&&L2p#~7{-WkJMs=8dDJV)-f}9pVL% zMn)S6t$eYbDOK$4O5fW;&gaqz9=T!qAs0kpq<8Utqa!2j<4H8S)Loaqbw4j3MJxxw zBq&o$-2~9NrJC=7rtC~YWu0vgmNSqIS+q3&;r?d8N}k(&yRP*4z_jABM#VX&JjStQ|s}8 zi@-~9K=yko4wx#Qj&Tl8c+z4$IN2h@NUzK1<#F?N!? z+dA&Lg5iRc3$c>iw&_Yf@^?mmBBWgVD(v_n6~ovpvCo&YcA1o{j@9~b9W@9YQs|@ZANYEt1Rrxe zo`7M<=rnofu=cTPSW{NXRh5jGOm+An{=R*LvoNiclkOQR$(RSrJbax5eZw8f|cLv23v7d9wmLV7}f1tP@9g5@4tgTbpl6c)V~N zGKvpU`i4Xjcr85UjXty7n+TL?mt|)HJ!@N_CDt&>xR2ay6}BfqJtJvWM}^n3?X;}G z>Y;O0<(O9jH^Mox>xIw@zO`hZ6Ym>xKV=r!EcIEedsbxK%#jnosKf!cUjn-tKj>yJZYgRtgBdMoZh|UaU#A zN6v$UU6>Wa<|L^27=pYT1I)nD5T0;yADypU#NXFm_n%C})0Gx@wYZFZ@cOlJdspFf z_OYz&!;b=Pd8X+_LHX@TQ;FReTgvoW?~Szx{*9|dfK=B zHAjI)*$pYhvn^m``L#y1trpvFWHoc+vz@O&1P1?M6&J+0Ur%U#nKTkV#E-JC_>vX6 zhQF17;s7!Gb(m?{@OcbB(`79ib!>(PuZy&&BSHyE0q1OCry^xT93S!o-4Had<+-W1P5tMzA532f7mkhSj(!zsJts)z6%QBQwu+ z<|ZdoT>$DtpzpsI6=~)(elvcdY=DTlQ`7cI`=vurS)~BWU;9tbLRgt6g;wob7R{$* zJ$}rGUe0vZcz30TxnK%g1&D^%e>chsHgBlHw#A%lMkUrQqw0l*@t$RAo^Dtq zjti{z0Q4d+$pGGtGbaRUV6&Vl`_%a)HAf*Hor1yt2q5~Fp-;}JPCivGHWC~hSZb&` z)%_#fu@|!8J6_P_;ennh#H>Zrgcf$Cd)x%2A#k$yjo|^r&Kz$h%`+uBhL3fwaW~Ky z7CO*@IzP%crE?ek3SLp753l&9&W{22NB@fxp9EBA_H{tc9YW0eS#e83p6ir>esNw` zh1~|F+9p7AKwt~qv4Y^zg9k_rxP^yRF*rFnT|f6}!0dt)pE<$a8(?A$y?m6f^JJM7 zjClXNh0&q;m|PN6(tfb$;PHw#6Da?7mtohjSpfX(Z+d>IrJ7Xr*@~JKxN)k&EAK?W~sv{6-(U^Y(em(^6)D$-V@j0w&3> zzfIAHGYH&YS~Y*)Y^J^&rvan4ep~2Sjll`Pv<0H+u|0OUjlqAOiUpZ*<;z-gti>|4 z4Hl`u+CH0ym@33f@S8@YjGGg<=`nYbl2bpSkft8{S_fqH_M)-(ijnkW*gqu^y0;HX zlEzm8jEo3U?&yS^la)uWePyClZ~E|Rc-zWRGJU5 zi{X=|<4{{8Ty}VMQ%Phtcd?`~kvnPz$~a^S;#^klVE@*E5y)tKJp&2kg8L#VPJ@8?vDi#PF&xbuK9nyny;fiOY&geIhxhIT}MUj*OC7Gi8uN zS(m|M$YL8}l>?X9#EdNpOt8#bb)q;5SE#vNeDZw?F(!2w(kRw5oZKj{5shb+q+!-Q|5-e=1OzQo-=tickKQM$&u5J`Yx`fr41Z~<_A zA@wnPOO7{(^FEOc3@umjTDZEv=UZix0u`0*f8^0ZSe<*MQ)YY-6Y$J$;x2=UcjO-C zfM#Ekd?~+e+&QW>o$5;TLFAJ`o zDfL#1)jcicABFV2vB&*L5vssoTA61QEb49h$Dy{2C9w_W~NxP#y+52pjOE#K{ z&MvX8=ECQvsUlm!zldNop^Z7g2fsdh^Mot*u~UkHh!Do?+)nz6s1YQY`>ReZnJk?I zs8Sd7tj9adWBvI00cxu6cs>R(x$u(d(VN>LE{!6VLfzpKewWn(@DZt(^(ak#S$R$V zgpY#pW-ww<6+K_Mm?N(+s<`eG_~<$P@fi6d1Kv>sI9NS|`mj{0IIGo8QtbRpeZFbB zN5`b~1z_Sx=2**PVp2qvM7PaYw=~Ie$gcl*A;Xc>KW_kHey;PRIGNr#C@5E5f|>HC zoVCp4;6FA0KQ!x1OiW2Gz@kq%a+bVQSwNLJyt?;dx@lX$GQNNLFz!{0C9a^uHOKjp z&6u;edH??YzI#n{&&NQ&K;Wt}nBb=5_qUFtdEt?`2LHpHQRw&$#P#3b?8fjUHa~p8-cM)2> zX>%B3e^o<6fJ~s+wQr}4oRtux>PfNEMe;TUSkt36?JZ+^{o@)Cs6-IVunEZ=G9r`8 zio8)pn}$h^SI^Fdk5q??f%kzKoQvI|*UuLo!#{TzsNuBZklBvG^Ei$XTj?3SGHmj) z(1@f@jhFc(P(RLmL%``ie*uVAoSZ8_r|)JQncy>I2<7OSq*a3+2$n$n@Z#=KlVZ{{H@PVN9H}irX(03pC~)gM(0%Bt#!&wsN}B zFsZA%KCCw5Cq)lw9$ocS$k&m2kg6HtRG9WJL^$?s#O^tVs<+0Q30Z>?{@y>8VAj-D z404DT1e(2fUI={C0goJOI@&>!PalpFBU;@%1+<>O!+wClg%nf>1#KA*$@UxPA)q7{ z-}AB~E#1^iH^2})GKEA>l{y9vix<0fvtq`aU=4w=M$+DsiAK^nP@CmCIbn9ES`UZ# zc)&|W=1=UMKcFPRE$_3<*h|uQ1!Yba`@_x{I|A3IKj&^P)3?1Q0-L)JK1&}r8_=^; z_maWd9&({zD@4z}BasvFNTCoTk7p^|cIs%q54r%<2S%w9=wNk?)(kLipH?j;gPA>TRvhwNGz$xmm>{0tkO1kpUj|<2& z$m-*R^yvz#n1OEsJP$xn5-tf%`ZYQhNrAg(ou6e43sc-RD(KV4{WvYqRJru}Nfo?B z6`KsJJX|pw@r|8r2sLZTjnP0+t3Zk&saBYQ7FA^&SDoyKCXWr)FZ_0lZA}7#T#nC( zAM)o^|2UhRR4Od!*u%1U+hqG>N$EN)9Z`I4#6#L>i>25NY8!`yX2ncX6w`31UUPy* zZ1(Ltk1I)qUB)Pp^g!SAnj|@6V4kTczXRp{UFDleFh{Eb{T$#bYRO?!B?X8~)+c)r zV;m&MC9VJFM(cmDCk*icfF%moYvys(jNPgY*|uJ_#o=;I=g^Au*!gwtJop#tL?|{&zSG2*gB)g3cG!> zzzD#Ocf?*yCIEV-JW_22B8yaG`LtX@Uo>o+F8Y#G6hy`c5h^FK*<`q8jmxK(b`e?m zd8{)(taR;cUY3TpH!gc{h$AVy!AJ|Tf5!E;J4+ezqVOw3sB)Q!&FQv$^XJcpSj^bh z!LZEh(+V}O&rc=;JI%xCOEpKe@%GvFjL3RfnaPJ8ne}>p-*2x!R%4!wo-&Kj_)ASk&CTTn?;r{+ z4$zoa5)0*oxt-QuFTSgq(5rE{czAu$fKe=nHBj2?(R=MM)?01@Q?k-&rcygldh=(` zM=IAU@Kp?Q$jxkYVPi}438D;GPlEjaO>$oT>7nU72k1eHXFtn#gQnHiEseYkOX>xW zE0Z_Rhm60EpN(%E&w1mV8dc%3KtNoKXx>anqBf#r)ut?}u-;wBg zd-f|@y_r@aErUH@xos-I@82r;N|ZV4BT%E#4T*fQ!s?6*X*NYq+ynCECl_D8{`&p$ zrOn;DcQF)#koP8wL6eLi+f1O4$_d%HTK)aKV3uqxg_qaI2qdf$Szhwy*xE!L7IBmC z7jllAjT!AY;B~hnzZ*+yGy-lI5|VuY{sxMthUJ$DH9EN{pRu0>xTp^F$qOovo5u_w6(F7*sUq{j=LJpBy2@5|FW>=08>y zfUc{a?eVKCMcr~f@>QewWNosRadPp$ zryhW8Y2ObN?{Fc4?9kKtq79&Td=6;DQ1vtZX(&>(G728PlEtEVL)kDfJ9QLR7{s6| z?4FKDWJQp|$yE}FS3SdC9lPic)pqV3F*I0*6#@x}sl*9$CRfCOvboy=HgQ02OM}NH z0SQlOkUG`z@EUIQvKTSrcZyBPz5(;{MLyKpn&n4Py7uun-UQ{A-*ddvCX9NI#?W(} z;mmFlb7-=8DVwI)>36?R);Q#6BTyctM7SB~(KBVc5S;y&eLA2uIy$mGZ1b%(vaW-X zMpMR!C*}n7V4>H%;%7~|kBoMs`fP0aW2$CMEMMrN7MWmlW`u56Hq4CqXDVmBCaRkS z2UjMKA#d5sPVWk#P9`hLtY0QL$PTT^&?yI zJ~eN@i#C|jef>hBtd1c2x*?!-BjY2W-V>S}_oo6RKv>Klj|Ci6&x@(c@)HG?5LlScyF$< z7Y?JOiHBM_n;>gQiyt;RF*xuP6BUX9^mHx&w%S%*xtU4LCL!$d5Q-fDA~Py#9_Rx- zP@^8V3zU?K&y^@V8La$vCGX9946YS9Vh#@{7X|V*moBE&j_To=91}MB0yB#(4FRV< zP4%*KZq)Df3N$xK36gCxkApjOeVYm5Wme|iHL2*f(8G1 zKK8;8$sF~KQ^Oj{>#2QaWCe(1k~7ovg$3E%_hl7=2sjHCN}s~Y{s2Z(MB+TVC?PJ0 zi6fE!53Gb2j+4}WF6%;Sl0k0sl;<7y{&JcUd9`wM za&qGDyV$gkQSLe(ld=rBH|#n=@vpQr9rrNaxnBzA6+9DP>etul4JhE}n+~}>SrG0J z?37+$ASHp60qtFWN3t2@Cce#6>5%YvR@*+ShD{BPO)ci*RGeg%cy&Xrj%TyBYXU3Pa}2t*4o zGt*T%zdv)({w73m|kJ8SV-JY-rV{eZzc6pkyk>}&oao*$~FG~RQ zSXpoB0YI@kNHZq2Lex;}@Kuw&%L|-K~xm6LL#ULi|NoX_|;h+hc zD1IO-N@u1u^%*`Xoo&=k=OMq=d7soe6<=^xU>79g>Er6>W~%XRvKyy?=udw*`@J87 zT=xLfMdJ3g#RFf#;%_5#ASAp6@Rfc1&Kv9bT*$?-uX_k9%|XOgQ6`U#sXPLah|c;9R(OqNlL*oF3-$&6B+pS2{@E@8SZ+TNt;Rwif(hh8 zN$R2%4^c{x=?82n*@$`qh;wOPP?e80042+9KX()n_Tv%~=C&rM(19rsM}IN!Nh`gY zHL3bSm71bv@5?ITfFc5COs2AU;1URx>SQm_rnmiR+$Tc9`L=zUYrI3w0ZJGD((U+u zSM!f^-Bt#NfjCfY*U@6T@zW7)rij?pa$-&y4+iel_o7q7})jCRi3B`#nXv{cy zWWT^jYNg~xUPePRi{ZZ&wUotLK7QKS!sM+sHGth(9q_u3q-9v9`rn!#n5?F*sM zZSH8Rgy^_Vd@SsxIYQ?XAl}w&4dl@48Ntwi5*~OM0mHS!s~?Bn9Lc=sXGP}CqzN-6 z(tDdf!{8i5T$Ju}BOg3BSDQ~>(i3d3puJ9PyHLI-137MOa6&;!uG z)uol$UHN@m*CjB`-~2f&OYXd%2)qATcsta4xtmxkee-ww-{5)J`MTxJU;fT>m##CH zLdGt@C+Ff=BC^A}jG46v!uC}>LB&SJk`>lW5ef6am(L^PbDj%AgmA%Uvf>(1d_cb$ z!ayGJ6rj913>bSuSzVh8L<4QfDb?uGXZT$y7CD7r8H*onRk#L+cP;UtBO|3mF~ItC zbR;)BXQH??I?lG2lL9OfubS9SlF};|rE6jG>YwF*JfNaiZxHA-&3DJCmH)n7ndVyEsbF8e3k zpK5l^9zu#$c~EA&NH~kG3T36T#1{EgGvHRbi*o4s=})rt&MxvoigQ@!3Ed`K;r0~XdwTF zewhxZKg$CB+_w66bLnMPAb9+*!RzqW`m;st9L2EWu7mgFDdWJY^JOU`qgm+H$@bp> z%*x^8Oe4zPV{}h(B_+4b2udvoQvU~tyHz+2CA5q#vo7-Ww*ga7u2&8JjWdQ`t}4%~ z@czoM^J1>E6$%PBTnlqp=UDp`tB+j@WH@`PzO(tFiGmvK<>qjn(QV~^m(+VR>U;OA zhl5d0^mJoN0h>JwOX-Zj;N148avZ^X^z8M|y(RV?+TI(A`wJX_F z9ut76M8+hp^Z2y*w7R5M*47FSG@6^5mWy~3tY1_cR_Vh=Mg&lWHfMZbX)y!AXIpRFN-&x%_{ZMPDYPu^}46gf|uH3CGZgQLIpQR zBxiAc!m-qX;EMtQ=sgP>Q_L4_by~688b0_DqKUU0prI%J84TVS^y1%!~gDpJ>`F_^kNJ z-Q3Yk(}doxh%+n|1b^GFm+2Z@eE!dhi|fOrr!`v#Ml2Q)_eR^L!vKeL5#WC4{poz0 zVf1o#m>!A%+5&YNoA!oyty|v-rACqmMzzu#l|-{GqIpK%Bx@P?~Nen^&_ON zawQxQLJahQ?9$9G?xQOYgBXoc8tZfL5i0g5y|r`sXks{YeJ3t|wspT@mRAE*gVbm4`=|zcT4YGTm_Viw37!hXwpxMF9eD7RK`N7i~sgEq_7C$#x=IW_63_ zHiO^n_Q+eLb$-cXZ2M@E_-LXec2dk*!Yk0YYsn!wPB2RF9UHE8+;U?FbG*c+%19m< zoXQ0PIUoejcJ0MXNe0wUqUARS3gn@UoHitvDo{sJfirLhTm=zl!edX(4BgXZ5<<{X zlBaS`(8FOBPq6CjY=WE!TwZ<*5eP(pb0x0i1}b`O)sfn$dF&esE}oUl=A=qd4o76- z$S^VE7UK@ke|4_>_)~w6o|be6Ze_i)MbxdJX_ak8Jfd#et`*P8(0|*hzSKnJpu9=w z0?582$}OKi>u9a`OV6^H&Yix!#lFPm<>-}65VHTt8;&9bp&twO#kdws{Ei8%P_g@h zZHUdz8n*LlYX9=ZD32G2DF@x`-Q2Vr$I`CXn~q|=L=W3)7*Lf?^lf}zV+=OIr!$6@ zx0Qjwx3%bw%gcqUk;DD{?Vmt}CCy*b?60>acfWuCZq!`iP40S>rQ1Qd*L?3qCdP z3~;@f>DUzW1`%C&gw^p%5lUi;>^EI3{Y}Gq%|qKygT})g0P?|g^O_phufdhDqx+k~ ztjCg@)a{smRWMo-2C?2bm1(D=RBJbAABB zuSKe+GJ%vvshi5w<-ds8);eWq-fYPiQpgxrp#oku6LxQo8T%&1Eo2|#S>5<<8uyr= zQ4Q{XTh$z{&Gmx-D$hk8@hwtO1@!itM&ouY2nPgGg(~Qw@xLyLbG$mg?z9ZOhWPDV zEm^jQT%WatT>^jksq0r-M@e}+umAxP2gC{Xd{@ET=b!!f0H_MTm?-J#>AA3x5-Yr4 zyE(jH{C_6OBdETiL3p_`-fHF{7_4XF^2cJ4RR@n1=$q2B8m^ZPyXsEo(7LLjdmNkP z>nkoUuKx|V1PjKJ`S|$kLq$J~mI_^$%=MA=j|5-rIq6W^t^mK-5uc=z+WG`_E*PCs ztoMX?Gp|a~+Fpm>CR+j;8hn2NSZZf#6bWPCN@IkNLCPku5g7%@SR7Q(L!IlX*IG4w zSQAC+HyTR+pgP03(>puUIiD7Zh2_QE{E^uzd|J&EItq?H)Tl(QjUv=l>A|UL4BvPE zG_NK{l0VKuN2^#~!9H1maENL~Lo|i1qG5Nw4sU}5pW=ach<2Z&`8(S0Rw}AC+9nd(BWeG2Gy*6G54>>p za4-CHKnkC@#!T|{(Jr{4b?V*U8^;rxm?D+>*c>@=d->`$`nZfF^Sv!Lw1A?c*{q?_ zbG|TRf^*}f~r5Kvy1!}ZCq^JgvR$? z6g3%*WDMjSu1`#YbF;Z37&x`L4NFY8GWcg9pKusTX!SRdD0M5QCVCKZbcmK;75YSi z>1Q73tFsE%X4cB-Zj%`MW(DRkF$G8B_nZjzv6g8%Ry9rmu1P{pmqA-fZ&e`^BzebE z4XpW3ecD`49AOdASBY2f2hg67rdi*4p$6(6#dIIDu(LmHoZVnI}Srq-`KkJX$mUwcsBzn=} zZmT=+V4$wc^!^u?Aga4+ycy8Q3RaHEp3S!%mt(5d0x0D+ZH2OEkH2G5hky12 z{=7_9?K1(95Hc#J|B4oIN?w8Sn%flFzXQCMK8ipzs7J-fT7;G7I`~ zdRysQ=hxcs2^f(YO7pz5)hWH#%8=f!@tn4&^_Ph`s)-l_!Hv9hR@>=l{w%XNWVQ|+ zPI?4l13_$=FbIm{Kj$F%d+0sIQfzV2%Uu(^r?jE2Yd(jKEx)bLe@@eGrm|MiN{t61 z9~gpwxY6f0g>}bMOALl?q`u9~>?NN`I`u5?mr-33`I<{Ir@&#R=ske#vS@@mdkF~j zRcN%ev=FiA&xvZq#nm6QpXc+2{*7W>4!BPeYkrz9^RK`#nLv$k8d z{(Lce87uC#ToC~nFAI|m+=GKh&P!cQUlM?bt31cv_zUIGX1d}4#eV#C1`HycDm*3C zFq(&><-Y>j3@)b`W7oxO4y*+|vd-VO^8i@`@q67Hee`K_Az<=bY3peY`2w^7rLg?6 z-T=pPxJAJ{dS3cn;YjnB1BSPYL+Gc(I7*!k)W5>mUU+3}DV<&vFw#3WDpe~a& zMiPRPjFtS)S0ihOjHTVSi{~L;l0lw_Q)TAgr)5I+2Vb=@X6|q396KJ=GyHfRbO`81 z3vx0qg0Ft($av}1naXzg15FJlA+@i5BXg{)o7com=UG!a_gu5~PXG?eHWXq%xlkGO*EM7{fPwbo1xuukI-~3E{q* zy}#YSh`rj^nDksta`(gL%^Se(-)n{5WESi}OVt9W!(T;#e}5`y`-&`>`Dkob`W(eK zh}zA)OWm>gqlw`jrg+H%*~e|I`H)|LMG|un>sA*%C?=u>w}fm@0!*eiXPnApR`nY_ zzCIov3OLs_x}UXSq;)`2-avg1U~8PM(t=;Aj8-NsJB)pdmvDIRh#(5U2UVAHCgcHQ z&ATt=e0>?Cajo!Jht^ z4J~RiCz52?jg8iJfA{4e18(F`%EcJt@vnH5+VALZpBrpKL6`^#qmj^`z9liihjx z;r&TJ;M`d)47$}P3-QXLY3TK2F@D*Tr}LU}_?z(~$YMpK;8Q#ZgbEJJg^g8rlTRcR z8=+nbL0iA)U5`8iDgG|zq{tw#Ki9&*L!(mwzY@}?c?Ax!j zEJ6>$VJ?tX6?v$`{@}%=`&$7gE&C*42yQ~o0xXi`4FN=`o*T9CsD8Fd!E>yP289^K z0Y5T~5o^Sv35b1$D#fjbCZ?xp4Up$`oyMbb4Y1FOk_dYG56 zh~dMi#N*=>Bf8(TtZdTa+A-@XNPsK~%GwX8_L2xDL(O$~-#_1fdSeaEP7ZfQb*ab`23FwmSI%Jpc zbMJWWH!WK`ug{j6R~g9G02}h$YKS>>l1L69T@v060jbH5A2E;i0QUXu$y9m27MX#= zjP#rj+QFgD>^;h7L-WdWfj7!RUw@{T6_RHJ%?1br8nSY7;!)C19^3HYfS`h2wqzhq zTQok&?x{(RPMhP4#mY)rDUgZ+gU!_pzVMvs0|>;!#6>|F`lHOSnmJJgZ`R~N>a5At z$jB@hOqCnBMw0@jA}79+YS|)WJ@TB2j$`_EwcYa{Ht#SJlS%wkuhajv!}w{shrcKJ zo3>VI`kG&M&anhckA@U)2|#=M($|Sy0M3Rq#H-$KU+C;_;VtHE`>{A*Qol``O;x4j z`SCCJKg04+rudxkoyOnCs5TI26xfVjE64(7v!DsHyn@i(l83HK?>3z~EY#9H$DTx< zTeL2phxmmoH|_7W6+Vq2L&L^eWQ0Rk-ix2^Yey^PBo@=+S#7oa)08Ng4`Osp)t41@ zfHo$^$Cd5C_I@9)NxdLW-fG-yt!dgk^44^1143F(J^Pj&zPsXXp}%;+e$Z&wdP zTtwtngiP1@S2ej}mccp=!zM1LJAEg9HP7xDLtDE7P*+ukYyRs=F-}*VZ|fvvWI_)A z1loMZU_1W5-V>%BHwaNfN!n9W7ThZ;7+~s@B*>7JUL9&^ClGw&b_O8t}_V(~n zE9~#W(GD><|NU`;!`fBYWnP0vX5ZhTzrOiZ4veelY!P=Sy>3x~J=p8{Y!RWpH)jmC zD!O*>lw&EtG=B8x)1$vdm6cs5Rrj9-w@MgqygMw~k|r=Na|uf&+kn5HWVhAeLxz_o z1NSg|Zb;%4TPG#$m8|veP0yUbHL>F}>2hT78m@fT4jg91ilPLVoZpR=p}H-!HY(4+ zmv-Mu9BmNKjL@R|{w-$j*aHuJ;!7q5sK-pI07uTWHX$<>5L5QP<`*!^NZ#)O?Q!c^ zbHwM%&&e`NbIqL>Gqn~0hm*6jneff2JLHGu^A2|2VYu<0!Af_}wN`<&#ARJIuHe`u9?_y z+wH#IZ{i01Es_b}p(zEGC=`!$2(mj4ey3Je_3kAKRxe8NaN;cStd2ptTr?fGcwXv1;3)3_3>2WcX4^TqoptzQy)SXm<| z4aQn3;{N+jU&3Lf@jUco;v!`J@@Q3RAs9fhh0bKK75Q+XtR$h;0F;|tk3+Erw1sCu zYDR?*^~8HSpfUics`wy^o$J*WIJ7>JH846v6Q__Mf%S z-}U1EpI$RQ%zgcFBQc6S_D#|L1srvkkO)DL`AEQ56rq1rI-xBdXcFm_nVq2@GGfB; zoh=n<_CS6&*Q~tjYVQ8(+VXnwYnK*YVQ6dc&4tH9aOk>7zwWxPJxR*{ztWBQ*LK+Hd%Ja3JL_}Xy(kTcmOqZWt?DcQdE7v@ z3~9!Q^OLx{k~1h(Nb-;bk{hm?rUe+fs52-$oBAtlFzrSK`Qy8-p`TP)lA)#Mqc(_) z>S_!(X*C913?E_^@-l#NovPiayMT3S$Wvwi0}C#mO4hXws9KnaTI34K<3MnUi$LUG zhrPXilyXG23RGltq7Yu1mVSpcA53d?Lr+N;8O-Ddv_bN?1mVx<0U#jEPx@}uB|zlG za08>vt$UM6p;n=pnXs^|U~ygVoi#Ywy31J?=FAqd#&|s~GMaqx^Y35x_1^&0>u57^ z4hjF%3Bc=&p?ZBQ)~0#1HkHNf5d9ol8=rvd-@g^tM}Z7Vq4bR|s*3yZS^0I)VY?Gz zo6Fcm#z(MksrT>AjdacI$dl*Q233!8&qA~~Ufb9ljcs5i98dI`t7dkP<3Q8m<)tu{ zQht`e`oey-qs_(EmrtOBuh$8Vcq9 zar8T!Ka&e>mjs;TazRIbHa0dM?Ia}3%ynIzim&>wPyOrR0B&+!LHED-pD_YeU{X@1 z-s9E?{;+*zkAV8ue(!?pjK`Mng<}%|zvd|}Kn2$<`R8wK-uC4p_X@VVMB1X5%PmF9 zTwUcB6lD6g4X7SS_G_-JTeBqG(>?``h!d-L5NVq$?@UqAS z80k~_I(PXt_oKKgd_@Y7v+n_r^KYf^`ILa}a?yE6r3zC;4og%G`XW4>6KK~Vrz7g| zF9}n_bDT6xP8|FWn_Sy0nkBGb-hjTc_4CfXV*qMY&l3QUP*UG&kcAJVUW5p&v>=A} z5)}tR<*~3$D8EXuW{cVDuta6UJ$Z>ejNGEdq9)^L1q?qhFtcLKmPQ|wk&RC}%NEk? z%Rh=wwjC+*UH|u`0tOk1gK@nFxQ*(+I+`VzSZJS?@aug-!ajOL7m$PSaLGw1zSEXq zRGMb8;0m0t{bOUPA^{(PDQ*=&_yilv@6}p=I09>~X&ZPlspI@i4Oq*y(i)eFp6S z@`a$9>o8?{#T&$LF)nHr<&P8}Y!C$RppsZZ5PhR=r52x>vK2@mZZ?}XO22Z#{tIT% z**(;ED;f{QC#YQel`~u{aVnS635|V?!iW`HBhXpLiDzehnlkPji5YRj6Y-lyyKA(Q~jV(D23ZKZ+U^NIW8czHKAu92T3Eth8LR?R>3>Nn4*5;%X zSy8Zsd5!EPcy>#5sLj zQQUXg<&Gd4aIINDko0O;#%R=g%TfTKMI3qlY;c2w{>Ki4XyazM!SE$sc$1c$G4U=E zn$BEQ^Z<;aGIp7}T;Qo)X}{Wf%j@s*k5=|_qx%(^Y{1cmDrV;6FBFlWa$AsVUBLD% z56zt0n=t39!n>OqSF*OLg#fS2tg5iimK)Qqe^<8m+@Roy(M=EYKj=YQp+n%E+Fp`( z?AySXVEyE2DV{wZRGuV`T&NU}3l~%rK3imkG4d}1+7~&>wH`)A)Fa~3fEY=7ug==J zNd3r@A56c8q3P6dYz(DObd~>c5s<`H1irR!q623L(O{3M3i=01GpsVhR-oe}7sEgK zL!FrMLAN_t3tgDcK-`PZEj7D0SAA}s79_uI&`aKoU^&%cKoYOXVvcrU4mdp5>^r_} zzloMAZtx8(PCjfd2iQ*Sf!SVSW^pRk+OX3O@jgy<=!SVWPJ&3l(R!iz&55ec6Y4A) z%loUj;bu3|*q0tSV4kWWAkqt%-0ES_xaOwhTF<7KMCGvsy%J+~IH4gfAfnrZtoQrjKa47E-<2u~?3dMl1KSnAS#GWtBTQt<-e$C4yx%uzz5?}}m zF5Yk5->!{Vj!fg<{yq#jkC@eZY~gL5WfULBc+#r#%Coh>XM1BVt7pmKcHEvZ?5gKf z9ILNSdaBK>Gp=P_N~2VuSx&hN+m-Tjdjot`mjkE-TJa$^!?pII5sh1IF3|wl<)VMr zZ*MQ>EP_T4*MYQK-`G10b@>o&lmSce^xw%=e2}Wgm@(<*!Le3_*xSx~&2eVTu*KD{ zGl8nmURPO|>k=Q$Zs%bP5Rd0O1A3=OKnXI&qPn*O4!++ z@m(Duv%2L%o5%j~jAb=YX5%lpxUkUD*dmp$sU~c-^}!B&BUqohS0SngDL(i&tzgY$x(0LRURUD2*znRL@vH<)uc3XJ%Z2Jzx$9 znA@&gM2iFaqV%@Qt72m_|3lY0h;>K9>;a+=IKu^m+nLkEuzmEErAei2C6BDvgbFo| zwRBo?qkh=9nhCIPsY`k#OiVd1Jfy@f*FjK}+OxTV+hCnk_TIs=^@jD!?O&IdE#D&| zOcIAAwDK4p#|rtBBH8RU1!0dPiSd~I@^ve1nr7+sL9f97R(Wti1A;i=xEWp+^ztr5 z!9zv5TeSI}v@8vrdecpsyH-RXP6ZfOXYikl_z{=wiAn85!bHCIbE0C_e-h5k%_#D3d2ZBP+b}ChlZfF+q<7K?MHV>;@OQHRAY5iKOorr zs<>b-(n(0H z$jv?7b#3F6n$VK&A`ge!YSEODL;*^=$lJ~NQAw8&r$fapfQtw6|(Ss`J!ch z9&1(jA!KafLeAxO?vN1%HPHUj_n9}F>*-7zqs*P`mk;7^JGBearU%S(5$rx?@fzH$ zpbMk)I9!TuawS#>N*cVWx)el~kZt^M5}u($L=lO~BqBD-HpI?a7>I*$azIW>=3H`n)yu0rk#Qc`ZxX2i&2LE^S>hQeqtg!W!)?QEI#UoJN&mtyu znl0l>i3}Xu1_}@pk@-bp$X|y?_w6(vL6cp}jE@{&uD-7W4bSx?h6MzGK;c9n9Kvvd z7ZhN9=4qdfN1eRGoM^AHK&Ik&ej5ak{H`S4p)K~Q;bVB5{@?kDsxyP|aUl<>mHkH7 zd}WpEA}Ep>Xd?(2S52er&_GRp3OoZ?w_mgX?-Jujpg1vT3VZ5!64)@c?>pW{Bb7gf z8e2f2*hJe}+uN&qCJqkV+OjFY?5KifjBBIqGt-YaoZt@cs+DsjZ)T_3^3% zM2KD0dyY1pPmTHeNa9nJo|$?Buar#fq4@aqA^pD8j^b*kU|4{gtIzfc8vStBP@0_# zsDt~Ik+{3NyVTILZ(E!!yY1W7_NwC{*)x&x!~Z6dKJe@7V?vsn+uEcSizXZ!U#$ad z1i$bxbF(%{Kd;cx(95fLI6tS( zlD~ln=0vS3R}~F4pV^nc_g5=nFL)o8huy)Nm#E0vB1-G`6KVL*9FVgnPbYy%+t&4T z|83ay$Xrd>tzqXcN=|&ZJVGG9b=!L<_CKsy0Od}r*w>NZY8U9pi_rTvIc*tMmZr02 z8-yLL6gDz(_1d7U{hY}QGKpi27)wLeKTaZ7WYB}Daw!syeMC;CxsNF!`e>j3m`RES z;CVzYX!1Ne;{=$?)0ejIM>#dir>tba#!Osx;-eXGP)eE8>L}KJi_<-gH3?ydcyN8Q z7J}_pO#<)=Q+_Xow>Eln-rnuh)|guwo1430@xf1%&);_3Zjb$$=OJIGpr{Z0AY^Gxa3LZTKg5+sXy@g5I2TUC4 zMAq?ft0i8{U)YC*Y$!pmLxT^X5k_2DQ?T&aSFQ2#vMN$DBowMq0Xf&W=`kDX}A&ZOa7iOkeS2O}v zO2k~j%QY-CE@Z`r$B7bSXNcOiYFw(_U5p-yQ|HRSUjD9jYhJP!%Yu(JQMjPd0BBeq zLNTlMsqCZj_vr85FJKxF9d|Y3Ll*`V^2`wmoq|Kjg;zvWDeOpuIpPxp>0L^Z4}zp= z&(GPXENWw4^4n$7Z|4aMxsH!CneeH4uV*lUi>;KuNy635rr`z=7N-5|V(rwb+Q$a8rvi zRCltX(y35UH&FSAAi>~SSuq!B$DFSY!MRoSq0r|fVDKx(j^UTv!?TTOBW9D@pc|$+Fy%oA82qyR%KG~Q)m789?ac`3 zHb6fy8i-1HV)ExI`H7PX1bsvWSJ@wj?GOt8d1%ukC+%2J)iZ-eQn2vS1J1w3n=;r`#v=!M%1*L-Cb9D!kp7lzn zxc~N}XiBZ+x!P0f-!(erX2zS?a8e1qP>CVOahYc7CguwF6!%DeCO4L7{cj{uc;9PI ze|Mtf)%dbUZrSWZv`?gR9XW)8Tyf}ow>Dwk$4TWy=VHv`iTJN@A{>b5AJ-hS$STX0 z#Z0x$)Q6%HA-=bYZ8M`Nxp|lP$6G)}{m&=dc7u=fdGn2f*#ZLWTrqRIPYVkQ^YhVf z8-HsaB_G}`(XL}>tj*_WEG^qS4~SJj;RGNYBpxUQd`s(Sx$Sd6L{uby?Bp=a`I@e@e<>>E^Nu*btIrF~6Y2!2(&4(=0D9=h;`svIzYcOMHa8CAqIas4=B(dtD?k0M0F2D17W-FX^T9 z%tqR|EM^cpy9L;es?Z~mrJ2f1?^V{X&;Yq~OU}kl5 zr31XLCuU;-Zz$PnU|mZKo%m9|CU!DL*i!1)`L^(`dRl%PT8edSig6WuF_H zyRTIM)?vdJF7NmZ=hkTWi5rnAIk)J7Rw7=#W>p1p6B$%F(=B2PIxTXQ!z(4XQBe;V zegjdtzGAc#w>?l8wuCaM(%)boKmM=ar#t)+b4)31r1gYIlBzHl$|IHK6;A#^HMt_8 z;HRgle@`2|HHm+9XiZLMdzS%igWzI$oi?We>>FFa0PtcbNtq>&fF*iB2*jyaFV+Ti zrBGDS?64siCwWFiC;$f8KF7n&WqnPK>->`zD2lgLQcY9oRwQ&_`d*}W(1HCEA5T&O zb;Y&9pwd&AUiruqqc1h!QskKNX1!22Ii0#MwY_l_MgJ-l_0nk|8Qw(VU2Q z!K-=ze;p2Og!R%2)9JWJ;wBN=vX)T9k=Q5rg&lpm!YvH_cY+;00SH=tl@C}*;)DnU z-e}rNf}peHcJGQxN&YAZ2=~*9QbsC0rALdZI0i@Q@Nh9W*&x2D*-<2n1%B9kj(ze- zNr&CZ%6@l!Z?8IW{_tEFg7%>TLDcNeEF4Eio-#+g9mRS&evxVIpEZ|R_^k6nX`V4H>H^t?(S2UmjMa@G@vPOsQ%3p4yf&Pdt%K*I=P}0 zJ)I(#XsS9%cz7AFZfb%a0i;Bq5Jw`FiSFe(V1ZPbn2&=qKeR=RP_PjYMWjD@Nf;^k zj<$4G_2Bbk$kCu-@7EC>Fo-f-)<2@&DK~hP(#dnVkTyf~d*^3k99gjec7egNZ55{& z1Rh=<=O8&gF+wPsNw;nc@`_+GB=~CT)tT4n>F(UPez}b3YmQ92g>kA9@>EcGBs+x@ z0o2U&1M48Gzr7pRAf@VSdGcjkf|QP872w9Aw04nv_h+S^S8h4%uUglMTDd`|8R@&w zN%dDt`M#Ef%;x!{BOAth&bvZ_!XKXdUheI1ogS^3n3+Y}AG86Tg<60sI|xv5r*z$a zeTy+9du!GedfOW#-hMuQ-V{Ub&^6s65cc}%*1e)rFty^5@Zx$HTJrARgo9BVL@W|X zAfQXs;!Yb5_ z+c-P^DBsTb-$7gYt93Edt+r9(Y_0qnGZzyA?=+GB$5u3B=$Eyxw!*)!m! zCCEu&Rw5+Q*O!UBEt|{!e19@^C?op$LY%i_kr6_ zF3;YqgxuX+4kQiNFL!lF`Ia2}`ktxOOm5Ryw0-t-GvuN&FnRd!Vc5+d%iE(Q%Q1@_ z((>1MJw)nf=Y@<M-xwG1v!U}Co#eurh-JPTgsqb{Pe{k*^Xmx)ux)lmPP6qq%Nv=+m~i>W#1c# zy{w;H4NCnpXiJeGqA`_G9Tk_F2Y05Tg1CvZM++pXR?X1k#X`K(p+H5JAT5dFJK(IT zUw$?6DK>P`8DrtKp5v9sI()ts=D#=AoOrVdaHDy6{2l^^{9p-E;L!Ae0AM;%?TTOy zSMRz@UL{-j|LF8S+hYL8a{K4kzfN-0viMZZOUF%e~$L%zR}BCW(;C$yFXh zUq8RB{`4gl|3E+x=D+S5BtJ&meKTr5#~D%p6L{l7Eg>R(809J=> zx+L~!e$2g7+qq1thd!}br7Z>9GMSU4la`GnO;CXg#(@flu^QKa&o1z z`9u0RQF@fc&6GkQ$LQDrTz#gYM&<7WK1{!XPXS`Qcoi5xrYHuH$IEloQS-9BcOD*j z{#+)2$hb~Vw*=FqNZvq=8$Dn|U{$M!_$*`4mtDAg{qxcDo8GM2=$5a^rT~BSl{TB| zH+cmCJW9n4ra`4bXgY(fD_ZHDS(%Q`iijgTlh%ysH!dL^f{iO9hPK#HfkTx$1Qm(Z z`-sQhI9spT5B&6n*uk820m&gT1(eZHNcqgmL~8`4vwdX97}NqmA7ea)*o`~)K5m1(xh^6 z;|x4u4LUMkc&~wI;UIn|sq(Xe05qExgH-IJ^-7Vql2K)|ek^;H`4E3368WzFH4=8E z%tdAVn}`TefwG+v!FRUzEsBeQ<0WiK4(A-4e0ad4L?BzQJI#D(B!LnUg%`h8##%c= ziu7f{tdZ13-tHE}B)BnYPWaj}{bbJ=ob2Z;-k@9zjeuT|llgsan@C+4S3Ix$i!+kE zPtx<+6~v%CL)A3;`mbOVJ7nl#ZRn|}pe&F>4G9k3I?pX@8D+?)YZe>tuplFg1>6py zm+n43GrZmyz2ktOj~@&%4K^QC^PV3udATmW*0!{;yzkrm;CFZO@Oq|9ZgmZ~1h0Sh zV)>smnV?tu(nU1LX1D4CWVDkW9>)IX;`+J$$GbCJ6_Dc;3FrkLXg~%euMKif^ToXo z4%`@5pyOZzk^|uTqx^IxWLlAvS&>s3YlYqF7$Rl0fx_Ai^I}hppO-vN4(kdkA`gq{ z@u;6}nXB0dc!uwS?*9Z}KlT|MR4ed4qbMS3YdkXCHuraCoT6l3K~8f zuF9XIu3c^q_VT{Mg(SQe=_?ghc5ywLrhPzx5Oho+ILo)ihezVPQ%R2yMJRq7H#hwpk*|j4+9H*`>x=>s~r48y$hck@5lYKLXTcZ zQ7YmcHqN4bytYPijAonH?&B0^EplWUUKbw7$Y?Z_Yjyb_e6S4s6+@How&HX=AD;}Q zJ;Vh<{6OR=uXfqoHAC=vZ0< z3;x#~c3&cQR~TB`0O#F*E>n578g_r{cfTLTolv~Y2mYLGxov2HZoyg_Fm#JPJqRl7q1b|lE|3WW{()!FQ{#nlmesePbj$fdWaU;@La$5cK>oKg zU+P+~^2GtbemBOepT;s^)2jV%df|<2!8gNB&zB?H$!|lJgY3r(WP(n;K6#xM-u>(T z_U#*kv#p5{5UtL>+d4J{PaVz$7IRF z{2|+FyNUJ7tF;G=Ne*S?We)+s#>2JR@jQvCtI55~+S{ATeO9RsshOTEi@@z(p8f8# zGoZxB(!?b2^wF{WXj>PiU(n(TgV~IcUj5+q_UUNHtZ^7~RN4H&8a=}7@b8QK20`WS zYLuCAJ{dE~t8}Ng(vFq0a2K(+_&PxY3d3(6t2fV-MaNao@vPK*1fm#a@dNa9Nzp(n zk(6qtT~0RFH%F0wJq~1yD_u-(=dgvLzP(HGhr3DdruPEP7)T#egFp zMM&IkgAySmuqUHc{L>_io?uo*&bE2NA&Ta6J(E6?Pa~#8ggKGOibv{c-zk;tD&K#c z+g(`;l+IEA_8A{U_n&P8Ao}I9dj;f{`K(WhU|!Qplp$s%KR8j4+aTfg@I~^xQxg{( zDq*dOS4_{TB0=9oG&uCYsY=r(*Yy;NrEl_*tC@sQ#*-pRbzf7<5{&976kMLuC&!T{ zuxi;b?M6|s#^ovFQVWqWZ;+NdJ`q(UChNwpWu0$zuHpk5*GW!b@|k`kexyx2vd2lq zCqmM*M3ul9%;S)8t?4&+er}%(D?jkBv~MrwoX_aQB>j9`piPfYsPzW%S|87v_yL>I z4l7=*G$q9Ya7>ERXe5h7y8Q-lgG=RPuMrWMfGM=9Gz?JqtI$rU7`9pYkjA92O7t0J zL#_zvf;^2~fMcEK`By?6VNl$vtw?oQmKOTS6KOTIGg)~BT??r9fUd*kp>YazrB1bWOLMxQDVIt;=8h z^l{6>RG0I9Q^e-+uX_vV@V?)p2F#9%0QCG2)q*Xo_un11q^QFghKX6?XRl1I(hTx-0VV|SM>07i)#cvJPS z+ud_s3I=CXP(&6qj`1(I{}q2JdGL0(cLG3L`pKOg;U=GGfDhvO#)dd3G*3FWoLLti{eedYt@;GqV`JOHuaMWm%*oeYMw|9c?f2w|gBLOP_8l9;0=T~Y$l9n#%0V01_$G17s=KpLc5T3X*o z3n)m}_x1VxJpAc7dd6$p^LgKo$8~Kb|DnFn_TKwXN+hW?Nyvws7KT*9Qg2zXl(%># ztB=&L#G=VYG{wKHO({arrPBv5R&!b*upK$0MHji?yCc^RCdzdUS>V-?59vCzFUw~L zPE>QsJfkP-fA3hDINe@NkGb8SxozK{{hU5FJ;$~maRzqoZm@Uw8N96*Lf7`R57x z*6znsxO^xd^LpEREdyiZzhph|E#%MJe-W~tTPSmSTO*FcW8hWx;{PH2z$ zI`rr35Su%b4^-6DM{AcC>!BA+8s^zYrvXf^jBc*3Ph_)#wtc%!fGXgb^wx|)+p31$ z%zt`N5hknO_ZH3ijaRFVr(@ZJy)oBvYxy#?0(^XVw@WRe>V@sz&7oe6w_VPRJGVt1 zSD@S|TWXWh=WW2vVY`Jkv!tW!{4PzCyG+2egJyAjd@N~zzW3cbJtlOX>kF0jK)0i| zBU%2qug!-AQr=bG-hNiM&OQBi9m*N~PIk3#@?6&a{p_r)_UQqo*2<`V*c&YM?~SP` z^yjAH-6X{&cjOVzeb-9GQC zpO1LnAEs#nou0wJ%&Mu&_nt??X`Jsf8=Mb@GF@%{PL19`QmTJsCDbQ0$?1OT zL^6F6--j&TL^JV>b8Bs@?^5D`l$7Md{kUcf`C-#*fqyxig|fHlr&rgcaseTt9_Iyf zX|{HDb_lhNVWzu(MnFyrD>x9;+R$uqyK&u7GOaEiuu;6-)KvH+16$jkpZH23>QBu) zqDeXyQpwNVL#!KmG~CPZ3m}eXzeEYC*3)RJttvebO5;Oe4Z>cXPwBRhfXsuDaIGoKPMlVGTQX$rc<%E48g$txS(lPB9^dO zlu(R)+ZobOKPf<%gGnwxw*O0GYD>WOH4vf#W7p6n*N(%(bI_xv>s3F z3KzPbEnAI49Lo|#th)~bmtsjcMAe_ZDMJ4@>46oLVv)jnS{g48Z8N2M-^!;aa;Ob4L|K&RE`Nc)f_qs+Z`I(2 zg^|Qc*t1%L`DkEXNF>`sP;r_th`GVOSq%k!IWmTVf`U-ch`!l0udjscjdMtq-|1Sg zci`{@0*3AsA&y>@j6&vf^_e^^rfwVljdl;^oy}o(X|Jv+!o~UH1A|F%p=~H+1uW9rQ_31JC714)@sxpVNG~3Q)TdK(63dbwe z0gIfsST}3lq&0$a?*f56x0|Kcljd*yFa8uAbX-pyN;flgzBULHDGw=nBeum&ioZdo zO#RH%y;tE+W`M_UCFQ}SZSU)?)7twV=b?Yv?k}1%Gc&2W=pY_Vve?g`wQ7J5%+TAd z#lrQaWvN!_m8GQ-*QJb%jAwjQigl7JQUI!HJ53A}R&`=zr6|8Ain8jUv0aJ# z*Wlq=Q{ryyPAdizvdEp3Acn`t`U&vVz(oZbhaxAgpKXku;9J@T2Mrnry0?oytL{Bg;504M{ZQN0YF6rC{HX@Je9&&w zI6ph(Ur>%jev#ybcqrJ0UimaT&bq6m$!HK&9U~A#Q~KWeQg)d9);erc;wkVBUIeON zOx=GjF8mr7g06PUuXdRi0`9L%?s6lKZqk^<)JxtV;9MYRBof$ult0TiGdG_!&oMGL z5A~mZvh?C$bs^-ky6dL-ZX#hacp|NlO7?*G|9cRR??ZyF|5iz0oij&^7n#nVthPiV zi(g&s2L4BS#KgdWaytd?u4`ub+1c8Tr(T_(>c3Mq%-DGRb?dx276_75pOFsKgn^e@ zN=lxR*-0mKAIdGTM7Lp4XAT{zT8j05(vIc04^HkgoT8xBd^<*53d{MYTtw? zaU!#rAl+&k~-wXy4hrf{>l-}04Ji(p84K!35D zBp2-Nw7$;BX3kYOhJ zhCkTLtMP+dxkJER&Wqni7#TC#V9|Oxi5IPnXBT9vGWYwh@8YY8h=}4ZfAe)3PHT7rGsir7Wt*O8R|Iuc^Vf+DsNOyr|2-iaU%(-h~{gFfuNA z3Atg73t?@5!Sar08rTJ0d~Uh-xhF_GI~)0gg_s_DEhKQNE~%#R(H}ZGX=AI~Ge~^? z{8+r}vCs0E{#z7Sd;WQy9__H?P`SHiJv<4L&rBcd0Y@T7UwM)V2*}7Wd{J=u^hc7MGGs^@ zG&B_Uu$JGkr!)$(2CVxD_uca~*m-{bTckc_(o^ zZpGSSm6E=&963zuq4DPp;BsZ3yIaxooeif+>b>E>NNFtbTA*cWAA_z!M20>gAxHpv zY)}mV#*5QcUuy5`)!Dc&vCS`)iiu#X7Sk(I6rV&%Nl9oSQXE0F!Mv)eN}`j#i4$++ zawi+fUQiNK#z7tsQgSG zVB(&q%)C#r5Fw>AGv)|sSyhzqN)ov9Xsn#kR)V>oc}fs!R0w zOZbVU(8gF&x2vYlS@YpaEB2&k4ba{d26+PXA1rpuv2FEkZHv?__;ik_>&6@Jx6XD$ zVtkm|%amU)UR28G>A~@O?M3wy09{zFtIIxO6z`?FPFxzC(v~U=G!}GBmSq(DPnwB< zWuSCm!$Z@j{k;0dZay$J_Hg})T0s|+s$!O?BOqZlW zmpPM+d!gut?KP&isw%)cRFFo^73dHKh_zgx7iGGHJ|*0&IUQ$%|ZsOTR6*@cRgB+B{t&1xz%7 z(zvPB2plfs@uN}8IR5Wvys#ilM|)>eVd6U@{=dqviIstirzhrbE)q7QqodCdKnYc#O;|NGs4Sz*A@BXJ7hs6kv>wq0h6GZr zK5eUpyX6{Hr5QF~Rp8FKdUgX()%P4%n1F+=QZG;qoOsBg<3uIl9PY|LAqo5|oZQ3} z(@Vvx;c&dDy8HkXxT?vZlocEoE`HvWX_uKTITZ_21W^>Ny3ku`eQ6)slW@Qq7e5=> zr9h9?f6`X(OgAs=SlT~z29!r32bg983F`b@zirbniBFF>FJ8W_1{fo;K$GI?%e3*! zml!V}{V_8cZ|L;!<;iw@H1OXom>u_$%1@tPR8a|M<$~skAe3>)aMK#BQ9?R+PW%Mq zRhruUoFc*?3dxQu(kvJ&MEIEH0-37qe$d#>g$IUiz!9{PDFm!G zc+-OLIJ8$c^9w?jgvPbNXvU)sHnneG40)l!pyCR&QfzN41K^#7nr@&AEWf98xn+UExc0kq*HXLlq5}*$I|$kJs^hG zWuw~q-rQIOF>Hp;E>WWU=ZgJ4fLaH)6NEkvC{E{VZZEa6zv0pve>T=+vKS;1!%iaZ zTV7rM!VvFfY%UxGiVaItgVFj_uR4gItz943N-&I!0EWT(=gwsbLX|E&;UITQ3U&w| z-i_o%JXD)Mks2km>gp=Bb!S?ZZX9s-dHDdilWm@!p4wivW2rQ@9dcZ=dPo7myv)U7 z_NCv_SbMv1g(VQ$5&l5d{a~a0?WbFT z(j6(D8c(QMqNvTTb!J3)2l%t*v9CLU<+9wx_;P9gF+Fwd%9lDzfP5c+YhDOD5j+i3L}Hp&x_Me@@~g%@(tb~@x%@bn1(Cb9DL zV{wp`Q&TmTqyRFtLjg_6qV%k)Q^|e`_0HTnmJ#UZDpY?Z@g$-5Hfil{!nd>GaxD#@ z$6BQWB@P2!v98&!ze`TFnBYJM;Lq+qfVx7R)&Fi9ssGR#Qy+@IJm+ux{kQ z7zM2e5lg-Fe6HNR9d{M@#)yHw*unDwVB=jqNBA2%AE59V`ku=-Ez7UJV~tSo;p|_wV#_IT%sn^37MlIS(QKRjWweo zGw;}}&lj{NockU0 z7Uf`rr=?GfKAqR$s@wNmA01^bKbO`f>JkUu|i%vEcW5FYhbuxXQ4T%`Clj4SU=+NaEK=M8y znRS?Oefl6A4RyM1>Y=ZLj(|Hx$Uz2C{-&M`A5d{r#^6&%jH3`vPv|iOXd)cdHlC(* z3nuymV@?LfK?`n57{LogjAx2?n(DugXpW}7DXDbfwCO>b@W+Zwx{#(Kf=^@PsF4vR zVb*uxX?i?HMLd@1caV}t4?)CZ0M8Tzx3EH;u)j=4_#F}D< zEbQw4yEUF$UTa!UxZiQ`)1Bt2J%(a|q7n^Avx;jHgzU^rrF}g*TGtuRHkuq^TlIHLlv)dTC0iH1}IMqWudzgH!+en64unSI;M~9%X*hZ{EIQWCR2ycg@sh zL{CSB2700wfuxMl)5iJ5)T{NID|5{XE5YEVuE5Wp-(FuH1U4sWP@7AhW?c41i>m9F zP%Tw=f0X~#TEenV`bnsa9hVEpWo6o|20eT3Qe+?EVC&Lhm}Ty$xec&W9D6H%2%Nl; zGa)`;n|c)F{y8)mf7|rC=f+<{_l=H8GzAB<3XXjDe|BGWkvn#8827gC?iOZ58GXoE z`lhG9x=Z|CyLYIb%V`S;_&b*E=&&f+Eahg2s?f1H)^2#7=&jt^F%`0Opz%o#&#HUH z|Dxw$x!L9g@P(P+tFPbxv$4j>ocAbLSWkV1!OsQrg2Al^Vqmg()80u|Yx?&?y;_{< z55-I!{9)t$(xKAyyjR>dRT&VN4L{~ofoDDFWL)|Fcj2#t`6U6&{j|WXPQ8zX~TaGde+toHcHv!2FHUL#ZDvLVhiXPx~G8n_u3C>*(zv(@pg@=^>E3J?vPht*T<2@Y*EWe?@KPznL-W zW@-jJtr9N^wvHQ$xq`v*+!Xh;E-Y+x^c>wGT?Vn=j8!)pppYeM*54}JEd*s*g3%uZ!70NnDUW56B&Z@%#6(_IqSGI%9tE2q;(mSC(i_QJ zVZvnKZQ=mb7`{v0PKrBKj`I#{UWJK2< z!^4Hv;b6;4eFW;^7IA8XMUsO-{IIw>98HEcoq(gkAv|rRB6-nF?0i`hNHD-w?t8Fw z$nMo~!X~|L7I}~p4RYu!8@{OjU|?;#;8#3tZgwLsr+i@oI4-?#B}PW|ewZ!C;mrf^ ztTs0an*yVjGBhFLmJG1SRvBltkHvOt>rU7-a8ejolE)8?jXopOPM~Ccg7T|t8f{qJ zcqzlc4g-HPQ1FVwxZ_KFo?~TaO$0nmgcyve)Oz+V>Wq|2=_?q417;=EA=gHlIRYyu z3$-xZ+>gw-;mSa5gOD-egN`Ln?oQM|@zC2nXI;A`W-Vt}RXSAGvJr;h784tr+UXOg ztj7Z$YZ4(vfyX=_X-X8IrZ*M&ta)#%SLYKKB>I015eFJSa}+`dDuy7nl+D<3s5JyB z9KeaT=FV&861IC2VTOP_!RdZLVB3qr&2#^XbS{1iGZ`)u{St6Hr~7Jq#@9k~XzFhY z-UF!sdY#&Li##)SuWQUh{~jj*yv)11p}_hR4RDHRG?+s?!SZL%(~sg*v0AF^(NLpm zE{?a~^eDAu4^EYj51)5kdWPPI-UG-8b2;A5XAOJ>YUy}IAS$S_z#Ga>a+lT{!)tyW z3whKcM8kXGQir9N{}%rY_eB1BuVK5i<>ea^a%bUX%^VZ|xw&~fBlpQ;@gs&Ue={O% zW25Nbjj9@>Z@=t=BqSjC^{kv0kKaGwO~Ck&6h>#?j7^8R7#8q7UhP)smBptnPU3T+ zfJ(>Ozd}acysp=~i;yA=O`($Z4avw>mh7WZBr9pu9cSC9qv!DU_%-V;u@Wg2B`CS# zh<_b8W+@@L(vICvOeC^H_2eJ=$*kQ`Ybuv$YunlwZ53Hz#H?F+s2y?up+Jk+dO-X9 z#L3Ot)PZl;^{Sql`IpXsgKB{;AOb+$3M|)gX`v5Q;HWrg-E+na8*ZRdRCcH|8G>Eb zcUIGX`1tii&%i*lcZapjl8tTG2?g}slpn&oF%*Ev^j(F8ULeQ<*xhdt}bKi z1+&hp^zogi)vg_rZ*Y34-(tL3MkiDKDHUwjxB%|m{b!BF80WGUz?s1x(&U=a3(PcJSi_CED4pFHC`BP_qmFA!nLU4_h{NjN~;6F z!T7pe92TxFNuHz$gkFw*5Xd((+dF(hOigXKe&J*;cN!gk6&gl*|2O8oIpO|#zqae{ zbktlr_{uA0%Mz8l@dyxy1>SFz{Es-ZGXA{tc)u%PYs7d)w9W&eQr!9kCmkVX@yw1zd0hX=Y z-GMSG(~G>wc+C9qzQFAYutlC+k@4J{t!&vmoSse{NuO`60$iun+AYDKdn58ZGjTz1 zYtSEATnm&Lw-|Pgpz>>AtvoC1!+FGmJc+#)4_W9EV2F}0iO+=V!SWj!^F8q>tcW9o z6bQa%;U%Q%7$g9DosF`(d3+LNMJ#Z2{_pq7*fDC}7z_+I~lJ9lqDVR!xST6=4N+&QeV?sNU4)iz|ji zA;Sf)TMI z76wHtRBhy`8A3-#bccWoObOqS8q$}22%Lv4pxV`6#Zv@7Qc!5@HL6gEaCcxo)yt<7 z_P`bJiykUzXc-+T5N@He9ULEq>G4D{Kg8D@;xuBLkmjVTOxI4y^5=WOBkDbCEX|$^ z8E(*t(}HOCNX-_h3d8yK0yrGjI|3I1?+2=D-`}1&k(vMt??4VNwDsm=PqiNZ@^4&~ z)jtGPBS?{8CBX>1WCWR|QKnM3r^ef!OraE>sZJl&R6P1@3R^IUO>Z|C#ZH(ti`GH3 zR;1grAS#`WapdVtdfp@YLuyTTCz`gUJZ;`&+EC`U52d7ihofU@(i>p}^rGO+Y?%k{ zG^nd3=>!^LuzP|gRGn4PhCTyvw3=>6hj{~m(yDS+;mrAk3gc$KibnEfW$BI6as4#>|Y)^(m@dF^#@GUd@ap7+@HjxIc6r?(p zm7em9cEf$)O>W`+&9;DP7gc%?PT$|Z9!#PoxI!$&lfj28=vPkC=chm4&X>GMqoquR z;64QruBM!RCsN>0=aN99qxy6y9-35$5z$I+kT=O1%s;y;; z**3o%dEIJm=C{52+{$Ht>eti{;B88#?c?Vsk>}6h8GL#}JrKEZ5P5*biY*w9l$_v! zjM=*@%%JHFp9)*6SKFO#qPy5g+rEZlx__kj6FiP9+dd3$Z9Tg-N{I6cve?mR- z?($Eh`IY_t(-wwmPMd^VfuPd_x8Td=(b3Tj>>dy~>bjP@)5x@O5-6Zc8uX0e9{Mlc zIQx8+wE2AP>L(_X*7c{)A(0WMbYsdJl{?b=@%u}lKJ>qz`!3S^)i$}Cr7m-0^TMgo zF`3Zv;QPBotTL}{F?1l8+7n0 zbpK|wdV3belSFl;N=HMNDSMDoO zPbpPXl@pz!W6Wsio@xhAFH-o5{MDhjuW$JshoHtv9Ztp)#f%#bVNC;anE68Qd!806 zcufGZ1}UcSd{1wb*0qa!E#gbRmRyDTeT%prBVMfKCUc=8q`762YH@{s&80{_JR?Sl-G z(ZH2h41{08DPQ2S8$n^8^*sKNv5Q3hfV@POVpG^q;%gbQhD8ui>;tIw7_#E2Lh&bZ z#~ivn3ZV=n`1?m{9hDe3UMyKuoWf@=jL3j{Mc87Pr0PB(M935L1cA>QXi&`GX*Qgg zV0mJy%uR7L3N7$Q-2Hm)%tOIoZk1!Wjzr>Dt1=x36)Ts8t^51vR6GjHZ`q$Jf2K(I zdHpju{_A8Od=Sw2eZGqJkco*~o7OAD!a-& z4?*`pmg&nGwGK{9O2bb6%Sjysqga@;hRWfOl*&1A7Cdeu^z5u8Xei4ttf6zA{`+Oi zqHg}r!*AEz+l=8#S1A;bcH6J`Cp5)s6~wZ78U#Hx$7|Jaw4%XlsR1tFcU4jCP+)_0 zNiNb=F(-}6!QsYyMxq{X>-ZB-NLG@AYCwh~dT*J@kfnNfntk$Q(XQD`OR6T5eUM0N zOL2{|3jRmj4|5C`yo;qk-BX+cc=TDZsT{-ZMQu9>xe8bx#R~L&a`jM<>sEKW$wJ|mqCev&clr6sq)Fq4jcXm>JHHsoHkf9H% zq9OgCUfS51BwL8t|L3urvu3q6l6Y1P7~^k4j4~65(uVP_EinBG)1vBa!rpH_6py0X zMD%Zm`uet=cKi1~{BK?U6TyO3Tar>Gn*-`Y$yqi=8F`)qqQi zhcYA?TeCWkx zTi~3<>DCqc)d$y`gbxdy9zF+N&+jz<0>)*tyOznxTk7&wA|gNc*=6at29E5)o`&fs zUxo+teB&BEP?`4ru4ymmIY( z1bs^;o!m}T&2-~7wWS`3dFKv$?d;Gu$jr0UB< za7PLHm!%^pnvD7J%hRi7_P)r_pu~YW@biFk@#oKDm>5=Dz03WYl6S1dB}qqDyUxt- z4g^B}#;fF@4CU^g2*Ca}i*wrcELR%2{DOpFKuGBFUS7!Kd;@O|qy ztBX*@#1(5Hic&Dynd!npsXJ`y(usSHq70{g>1?sUlF>auC6$ebgOyebv8bAsIfW=m z8EIQ9<-5+ke!!hpOay}Nv#FwH=8dg@a0eKHU@S=rH?#!z{D`H01p*h-zyJjds{=b) zt>n?4l(ZkQFbpIjpX+D-Ys4Ny5IEcfU|r;EOpK|!Pqged3Jn-GRcfHq21=pw2-@#u z{>G&az}dHaAWQAqthQN9YVZ6yjay88=Ij8N$>#8&;}U0$?WUhDRh}cbH*uoui-}Sw zsk|jM$eyCv%;~J;8)3w<$>};OY_@t_(VwgsC1HX?RViZfzvUxvb&HZe|K!0J?i*vn z60$%z8A|l5QJ-x7bQ-X8Oh>K6K-7L&yv%F_fz)t8O*ud$FmP}T24g;Y9B@R`!=w39 z*CAgG+)m2K-A+nIrVs%~YVUwmiba2f=@q<(5?>m$tYAS}6Z-FmN?SsX&wlx12(sf=yU)sXPLNrwl?`M?uCSXkw%F zs^(PpTYb=)GmZ6BP@?df=TZy_K>o7z)x|4F>G-+3`vx5EYYq zLZPTle~`rxn9V~-N$?-V17J-DOwFKsMcQ-A|cU*I;|3oLvR0Qq^c!ATQJv8*+PG>`Y$7)H7NnqjO z_cd*iY&AGjz9&H$h*`oY+bAkC$0*+c3m7gp7qew1N!2bV?-u3AV*>N%ov-&+mZ&Et zyogvM(%a7y|Enl4eC~ih9&!Y?psdfZYy7=JU8t{|9h(n7v;Q?kUBm+ zoR~j5Sl$4F1le=fMCWhs7bhl2WH`1N>~=5zJ$%hjm~T|zgsB%hT4{aZ74lk^I-Zq( z0R|tQjscs zz}pMT;^ceGmQbjFY3Ovo5&?-|CHL1v(&Rk?F$xWqvN9pl__Pu1mB(wEoB$}ts0v^a zmOX}3A#5?09Qn&ckA!fc+BRuyJ>B|}1hJpz#rshzM%-}_%_l3^ZHP8EDJX?9{NoQ? zcgyK+W);DgdU4q$m}tmj0Q#ZT$YS|CUVR4HF2X>P(EUkz<_m$yGJdMZ4F zDT=Anapl?Gu~_a!L+xWNxl&@kV1%@+>k(jz0ej4JX%&x;dZrEHquVFF)bLm=gv&Y2 z5|<4GS239>+l_by%2BCkJeL(x-ICO!qyJ!d!SgKzs341Zug&Ts=dMI2F-gO_1*OQ1^G5n)=@(?nkG-iJ& ztOC%y(z3y=tsgR^&-TRVsoI`_rrB7OD6a4>lbWWCU(Nn5{+3OokgO;5cjAFAs4d{C z7Y;F^W`cd!7sX9Ruk%WjDA``=n(;{HWgNb*mXOvaOPX{8>U+h9edsuO~Vl|vF zxg&H|Ffy47wOkd7EUb$3rb zsQ?gvA0OkZ(?!hb10r$F0f0N@w-b}#IdP?JXA8LJX9}D;LY{!;y>UT%Zg6I)XH4T` zV;61Ja>iF{8}x5C5l|1;CK0DeBKw}o+y%2Xk4^O1F6Q)==A@g8v$I;jqnW#(Yaynl zrVO865N%!9jZ*M{Kr*X0ULGDE^JDZccG*s>q)99hzrz#Y4|g`rhwU$Ub2bEcn8rSJ z9%BTk6jMGmp@bGa?xoi+Cl;4uYP-A|E=KwRxEp_-v`D^2+RUp99Q6=i?kKG~NB)}A zQ@nRB(bX?q%IA$ujQ8_$#nX1Z=z=Tu1Eliw^f#*qBp>Nk>|agw5#v8N|3l)};=1H1 zY%VL+&`lKUb6II>8tvN5YbP{$Vvw@s|4eELxG}Y)zY~od$T|Qhk@W`E(%5n#lJnfv zlrj8Bh0UwppP%LSk_N3pcSh>-T-7kxUJs!sNl6^bisFG%M`mWGvTbJxuFcmLHeLV@ zyGqvZjR~(IwP4%v^aznaySq)LXLk(!{oB0OjQ zwGgCn4msC#q}y07tY#Pv)ki21=Jh5$)V<6eSjac>-wu8JKe%8m+c@jx#JJY{ce{uYw zE9mezU~Mi$E=)PqG^XqmaHQ722$dO*e9#ogFTHuVZ-px9E%L0X2U0EUg;CnsqHh&A zZwxTrshh@5`MjWtAbtIrpB6^|vC;kuwxz`=w~vhEZmCP=pd;L8gsZbeThGJR;7gI+FO=J1-I_N^i)ThIA1`RW3s$$1~W(o~Zs!~?zQ35Qzm;O^#FsrC!5?RE*$xm1_AF9A@B&f|C82;;pnoah@}2gK>HsKnIG+Ivj+e z%y{Z>5=Y3lR4|!F@u)lxYaEZTX9b@!m{=UnTQX0Fi4`hy;Tnw{US`4D`~+x7y+me2 z6XuO|*F+PBtQj%geT_?<4j_&gSm;YuC;tqwn8U6y=uJ*K3Q_Gf)Z-Ns*=O zj{EFsL<&wW$ZS?_kxyln2C;oy(h27}DSB){WX^18)oHsIPqRzZ4E3Hx@e#7jHqsHX z7)#mNS|m-e<3@#*Xt(GsW%#Q})!8glRCr{_&^B7iHCA|AW`*Mr4?=YEd^2>(GIXBM zRSL$y5sMN`;Vhy^E^=#IXc+7pPrQ&LwNCcu8cP&_-o z3AwXA)YkeM2^#n+`6xDtCjgi@)uSoC^6W+^SF*~l8N~Ch?$U5Pb;Tht(Tql=;NYZB zPeZjC4V~DsA=-!Gw2`V#xD?Ew=qL!T|GaSnGmb@aHXmJkGhj)X{%_@`#k zjShDBuH*`I{F|;eziFY_7|;wDENq>cRzaxFr}5OlBsgHA5JL0Wb{L(k3c96f)U=<5gn zITN8HtNC5{qB-bbH1u}c>{1FP)pda}2hv@DinnO5kH*1?U46zxq_*R;&xHKl;!A)C zUvYLhbzL}8{o1a!Y$a19ExR&*gd`H&voEUd@&b=EEaC^-=f{ROHvSuS*L-^!U1tHA zHqs*6++x>XPI^|`8`>(SVb97tiKwbSx)?o~^R6YRbND?0dU8N~Fi?IF5@?td(D7vU ztyF%ig6;yAZojqWR4W>yvz#cTlRl88H(cYGq#;lsWmUkya30VxU))^J-#a)s zI1gYJvAjmw>!`!K6%acv!U{887n zE$c~2H4Lot@12CWE7|3@=cjiQXUpeni!`oR`MW?dX}hIqexgYLmKg6jI{GD`R5k)H zv#as$___K1ip=QPEHKW>mY^ZdfoYPzodXEXSK|h4)$xTP-&#D+U2C6?#wcfJ?`E|9 z=b%f(;|U@cz{G z^z*AI#{l!0!)$E)bF+f$`LX~0v-CyJxHg-tYNWEoJ_oi(kd8Q?WUTMwO4B)eJ`=Kotc}A#TEbe%xahA-k1Sc0EI|=8kyAv~q=769_Q2B}M~j;su-9P807{V72%M}>-5nH$RG;~1vCZ&VW*CVBI<~;f8FQ+p zvgIJz8Wg-(SqXoD!K`ru;wg*Q_Jof?6kxCh!%-&lJLgHv82`eA*ry*C_F3#LJj6sr z$m*YlU${jx%*>uzP$+QgOV_5Sf>Rf&lU%QmzJ;>58l!sV5Q%>vNHU@5KAOkuca!ZX!WSabV%h_Rg! zzJ;%^uRsm|__Jq>iKP4WUcVY=g~L*HxPcLk2TBO0p~9^x9qk}v@(x$@Ga2^<4fZbt z2C3wP5yFtBxe(Y!xkHX^c%GB#Sh~(L1En1DvT}_m&u4H;$Pc4rX1q8&YjpZNIaC$% z63o?+4vVm0()kt^7Zpj!Lloq%m8?YP0Vk@owg{y(0 zq?m=g(bijJd|Y6hEXkDAh!{O07SkDf3_$}vo1_4GN=dCOjc-Ckgvz>dRu>M(Q67D@ zFo1nRGOF~Ce(Gq9s$`0*c3JQ0T>+zeycB}t@u{~m?+$RN80PFUbHRg3t^;AK@-U4} z)@^aIJ|vgTYY0kt(ud?p*U7J)gp&WG=qwzX?AkCs8WAK!LVC0)-7QQ&Qjn04cG3u= zQw3qPl)xCBj#9egrCX$HAl;)!^F81Gfo)HmbD!(Juj_Yvd%G^8VN`A=HU1uvo~}V? z;*&C5WG*v%GWls@4_)g&YYDz0&va$ff%gq94-IL?kFNX1+Qbfiayb8+c$au9GY+Iu zGunC!ShNQ0>{>WjJ`z^rAHEs5bX6Mve8ts8ta{QOFoFR)SBN?oS?e$1sy_)~<*?3epvUDnq2>flo0a3!Fn{u{Y=&0vbt1I-($juO$FpRal~U<${7+9yza{o0&9RP2obcPL857f~ zRbUA`0usE|?EoygyuB8#=oS0hJ$%}4YggkzwUMH3k0W4C;`fJ ziY;I`aQSGQZ@*Jq=hp0K`nSfQk%ys}$+o*vUpHKs*xjAr?+&QAjYNy(F+{x9Q)V_IezjWZ%k3*M&1bJ8M{^uT@?UJ&IM zeK-#X9_T@Q4G9HM`_8KnTLEfV=f8uIj`5be)sT>o{_%eRDEaUo;8u9CXlyPo<88I! zXmLBW_w5_dj4e$BoPEjMJ#^GgB)tpgdYv#t2U5#9M6)aX_@MVC>8}>T>i(xLMPT5&OG`UlPf#{O z)45{jr@1b*o>IPL5*rw&7@u$$qPrs-n*Bl0cz`E23Aca>MV&Wbm}e#RH0EmxoQKMe z_WcVH&>tRFr9o-~12IK=R3P0`n@G_pA+A?r+aBUr=4h3?PIR^X({(7Ao=e zTMc_EfZ+iD?|NCM@`0kTZnXiTaLkqrPWQB|Mv;^aUm*1lQHN$`#*{Q(1gQA!ca8uu zA(f$b+4uLq&$v5&tZgT=oRi&m++H!EsbLTm=$d|VvT^b^1h9XSi4y0gFDCH35gias z(8JV^>6yN$o=_29HO%D@5hIEpCM+8R&4Lhi*(BeTV$zb6Z6LNouFZK(_u-i9YM*ah zYCA8=<@{ZJZ_O2g?ly0<&BG}eUjpI=N3LZE;;FRAJn=Katl@@12^}&djM>rJox)xH z=NJtrk`H1lR)XWWORQ=Mff#F)quS`$X@AEOfC$M@3rd7ym=?J0W_!~Ea;VCrKgQb= z(&d|DvfaJWfI^$~{(60Xw1EX;63&(`|C6X_rT(a}6nwk}U~+q7L2R#eyDe>HX~Kin zbPW=JwL#h+<>dh(#t(VGoNs^4Sh}zAliI*#D?iX#c7EZzLe2T4gtIBxO=P9yqQ>5q zNBB*r9Ib85o*H^zJ7MgXD!gW@TdgURl^6jlh0OWm1hcAi<~0)oHAtgi?V~#5nbI#b z#8aNV@UNN>ef{2rkwVTCC(YK9DR%#)G-CR!Dj4DqPWng6V&6?9YzLBGDZ#4bITB9* z;u;(>BZm7hQj#KHjuf~705!V)fyK*3D13B;IaQVR<`?}-IAdXiKmmS8d%0hqm`F9s zL6B;-SR2A5hI05}ZSOplik9oEyf_-W|9{L^;C1@_ z)yDQYx5C|>lX%GX(O{dGsnOVo?c)BB(_b@zg1v#KNVQjZ)d+)0qUW5iJcx@&MBMn7 zl~R=rCST~(P_yEMsYDQ;V;cW4grJCqVz)6=%x|v^a_f; zSSEY+->|a6S-4f}{Qmi0uNLzxS*9r=@io9M$Ci7NrQio3w{9^3*p6P7z-H`NqAJT@ zQ~L9dZ6|-x&qo@Czu7xhlY=A)KtAa%pqsCzK@m4fL_Wm8gi5-<(FQB1y-G^bb-Zxc zjZWnRJzZ+@<l?WQ}5*69AG`_tW#?^$)gR7=*j_1PUl zfS+I9B88Skw*Gf>z1RL6!@)A=+RM=hrdOjFYQZnhVSTVI*g9K=C2GV@IoRY3rty!63yalDA z86!G7LZ{=uR4bYTH+Prrrb;SWubfO8%U9`vjHN17j9m2#(04o$M`$|IC_QoY>?jJO zjb6>-m%lmM1{x-(i6u+dXl!kfbk=D1>Du=8$NRV}CKeW0VtU~}8CBkyE?4ne7fuF{ z&$*uWYRZAwQfjP`W5%pQjhoZ6?U3Qb**vWB#WoO&dUd@GM8h}5jbSH3CxZ8(L^kfN9o7*a2bdIGEPuFTN z;pd_?S_lXZ+O5L8FIv0FDf$r^kK?Bbc{hyn+2LjHG7_QHSnF)+!V@Yu@qP(AYM6!0 z%ybcu2UtF{S4C&Q9a&Yyp^S@tWG-`Yb?3VHe=_3O@HvpPn3y*kAg}5Ug}&-_dp>BD z{zF+?YIAUqNLEC|-+e^4x8oJ}ADq-2)CfP5e!}2xv@f^-7bN-;(*CXzlmNlCL>)sgsiut4cxp^>h6@U-=F{uiyvZOSNylO01mH8qA#`BMYF3SMAF?U`it^>V#wo4u zfPK0D?Iz=YggDX|b>0v^cd`$hAUn2@_j1^#Hou#!gWCTm^-s5YRLJc7chcP%4j}JL zAY4qT^xwIm(;vYC=p-TRT-Q%>!vR7!p$*FC-zfYZK3be008)TRJHNl={(Us5$+27R z{Ga%`^8i`6NlBKLo;n-7(Z%WIrp3()EqVEnjzA`n3(DSWL~8oS3k{};-`>x7Ca;1@ zP_|DVcy)CvPKv;~5imkwuRZ+I$WOqumvO;z{zmRo5Hbyxm&W@_R}dHKh2Sir7qgS> z>l|Gl+y7>2#4p4nO5j>vv=aF>z+UnxA)RbOt)Y+CQu?^8eJxHnge(ga!&lTkn z-xa>x=Hs&#ASCj`?O;hS`IUmqB`~HwS=V;w*!BYak6ZR1-(?0RsC>-`z=4`0vq(FC zHTj%g?N%7kG0O)4c@Ag$vxYr$qGVG{s0~dkX(oE9Q0Tk_sA)Mm0{K(@&jaIbn)&6; zv+Sv>t=YFP=@vTXz0s|nA2C@1sjY~cxms;l|+$~3wh69oJHt`9$=Pi#zzn+yJK z45VnLYrr-v759M`TqO9Y^U#-mbaXVU8LMobl~EHFJ~}$G1X!`L=tNEnGqah9w+40A z;GMbdm`mj5{>r&^E%4^wjGcMOXDI~SEGt})--Vo8U;bTWP_<}r#|{c$YR06DJ^``D zY9;&bP4At>(~EAS#>dA;2fKlf>iw+u)hevd8x62|KC)yY(%OQ`?4K@)kJX(dFFKze z?o2OshYKvcUG?#-ljz>|YzD$lvCsL_3x5;5mwln&dttPmD15s8uQQzG?q>W?Z8|`B z)$=)9t((8=#vK zg!riq>EqrZ#UC*A%$2qG`N}0XAZwIxk7mjTKEnWtBQVKs(IEJ|;1AeLMP!(p1FCPNTKbzZO8Z}9^|pfx0ncqn>9pM(PVVr5V< z0S-t`sA~6d6|{1{)~E_8@}lAfLvrOqwyxI;3iSzIoJwmy0a~bo z+sKD+%ThhXC^9btr~BdzO|FmJqYA+{mr>)x4mJP%H?*)-z1v9FRO`&SM=>UX_t{w2 zz|O}=#^AQs9cm(2z8A37^E8&e%gY_v(IQV}};o9k;;1o28PJ6?a6B8aW71}5` zU)SC-oNMcy7W}2?YuC`d5L^yD8_3`*2xxw5E`ewi=YhJxdL*1?LGp*7CP>kK@QY9) zjj~Ja?TpxPegNW%Z>fJ$_nrGKd5ihI92DZk`o_!ei+SmM;KD^O=wPG1~ zG}p1%xvQGa%^kSJPW$-hUtDRHmmR3sy)UxsW}u8TsWeOIR1Et^8vSAfRl-w>c&bQZ zAD|0O+|Hr6MUfiu{cYJ34j=b^Bc(ZKvUU~y(S?go^o3T?byARRCS$rm^2BH5-z&+u z+a`#@ZRh&H-SZ&t)2;EMj_;AS^hnr9ZxX6kF}8!}(^k0Fd~Lrvzwev}_*FMvd%+)p zayYzBQH{%ij8IeX+wV7s!2Xsth&hc$WIIC#b4lI1+w4S2^X^9Q6O>`rykGRiL%%-I{W@ zH6E_eDjj?aKx-=60Eh zUQ0FvPU#^=d}+}5+7TRnMF1)f5{?i(3->xXjGgNpBRUtBBd%+8zuuWFS-Kc*dt!4F zZ5cSz=69dm=|lX`E1c3*RM3hzRlWb^TlEB3nJ0G(%4J-Tv{O9eG%(A{zG@b^-e_K- zF#k7%i2S_mx|42|EPXHQEw%*@LpOkHPh${_#bZI$yM_ID_%5UrTK7{`U4ZNvBtnoSZ`X ziw%MO{osEOEe`97g>6x@ zEDvv*X^RYgnz|8;cUI8V_R;O;o9vA4b$5Cvmd_&}AaMYY(|hyeHFEJ{EQ8{|gPn3` zQ^Ys^=h|0Qs+&6Jh=Tzq%-MmXMf25@g*dC=xmdB#U+cOM!|mDasBv0a76rdkyW2NF zQZ`g!4ZyK>cjo1p`qyn>T5s!O5B7oNJdf!{K>nHa*lFR;MZ)Mm)R+|}IF?Bnf!_4p zt2>xMZ%H#y35aKF&q0d%z)(B^h_bBaD-o}r)z=hbt$2>fq10J_a!`$7Z!CN!dXaPg z9p%hW(1he#D!Q%EHQ?|RMBDdC zdH-fB%(t(3y;>H+;A-A<)>D&ya=6?vDt~`-l3Ttj{U~T0(-T#I+Ca?Lo{^QHk{jpu zZPlQLr0_nGN1cjO6Of#UsBT=D0V(gOAM6R_u>k_}?S#u2u%N#nP#%I$Co9+wn6z~h z$NgM7*CRmlm{tkytO&8T89~cX>b`>3A`}g-sE`_OAQ09sc$M0^PsCBnJU{fhW*XE8 z3#eHl;Howdr-aIt&uM96CU1`THJ&5`NI6?QgV<+adbJ1qaw1#)Q1mbE(2;snnhxkq zHqN=HHzikM^mBu!(B0r~d%U0su?G~Zl3l#Ba}$Od?mcMZBF}<-JD`foR<Oe!-M_!TDSw2+AQHK@E|L(JD>Eoc+37o zZt4#N_2ya^0Uz-_ZCd0fJas5(LKJF(x*yw)g?;~E2L(1vz(f&0%0f&;3HS~=p z1w2Pf>g7!Q!!a>9oIbZ9zfg%Wv`=3ZawtsMF~9|WP?BBwfL)oa#uAv~n|WqX5c5og zXpd2vIV|EONrIk(eI%TxU<$RRNy*nmR~ik;e@rJxhI>8jqR2AJ?ELoa!zWq1xjHf~ z^PRdWV5(@nrKX;k52q8~$s3-5A97#<^~!2$Gv~G}dF)c@n_-3^YB@hJu0ruMMWaYf zuFa_K6(B3k@%s9D^xm%0-)&!YyDi##_5M8gguLxCW@!VvwAGO^uTdvqEFb9R*SHs# zYAW08+v|C<{cnBx?Iv759!4{*IsS#OY$m2DOxJGsQ^5|pZUtR@2aq}3hz2X~hI>i<_0 z!2<$ug6cC52b&g~DgsaED}v8{YGB&#?}Befm#$Zfg10QXfyjqNxyC>rpNqM(PrFku zq|HRN@|@>IwNhU+{_$+yY_Iu)qj=qnzF&UqXK%UTHZNF}w_}XoL9ykkZx&?bw!H4c zfcyD6Ir!dNiMaRr;{S>KnAsK+$KU{^#0M7Gy4HZr?UHM_PYiBdLXt`cY&(m5FbLZ) z8r1UsJg{wW7YS>73J}rgk0oQ9yN02wqw`jDpOnGYbNN<((uu;G(&d8NOslU|$EVwF zjwdbry~Wd=O+SrTbq+;84u^9^zzHcnEKWUqef(!CE?npT|1L330WFg}$|5g!TcvQf zcNhMVJRGPy4i}1&nzfXO-%m4nh6juXZVv-N36cT@H+T1EcV0~`x4RXsCw#z>X1^kC zl_&Aiz2T;RG}zp6|AS!dWJLwSG|?z1DDWIMjgeIKkycG(KkIy`(u{mecFhu9GW*f{E8k^o7=_o{7+@t{4xd}V4=xEe-$bG-A^?4?QTBj=V z*B})aG>lWIdp#g@}vtfq{2bIzK)x$z&|G9G3)Rfp>1$qEYhy z6iffwn>FCmJ^K-$C9nklr;{b$J-RW}>@DfP-eVfx=IgNzC@ijw`}_Mf;p2t&(8NcS z;cQWT{qDm<3El-sR{E4~9Y3E%{zSos_O681&2VCHT8xxzKD?oLEb{ihqy*JQYJNzP ziW@&D5(m`u?8%dk&!+f#V8xJpdde|=`-reuHQr66?I}9SWF#%wLCirhU(y4t_Yz03 zoBsnD$PS24E^(emJpupM)hQ?%8)3&YImEwR08P-d{s3{ZW(Bx@#4lf}j}$1G6z0l^ z;Jt!8r{DSTfi<-M!^)%(IALPY>@&Q$-o%l3GN!9&OCS)DBG4k+bgSXoayMw< z=X`Q{cT0UOa+x{B(TcprB@*xp)qglm(EKyIrzrfj)&k5tj}4u$-7)QQlY{1)@9Mm{kG?bHKJx^~^t4VRO}UvFv;!s81dd2pA)bFRp&kjI@V!#tk1!y4 z4oAYFhX%*;!>lSM5ezOSU?CkWKQ&h7EjWUx*-lnVK0wQym4l!1P-!z24@`Z5QxMup zrfO0iy=D%9*d1TBE?`Ng3&d|;G|M_F>#DBczSA= z^p>!kzy1vgt|jT~$dKiJixKLDhRXZnNxrzC8%a#0Lf87SY@=P+my$^)N+A&*LY7Jr zI)pyHT_9sGWrzq@7Dx8O*9~)Fp=mj9c$2x`{+!GP5vQvDXW7Ydm9mr;xXy=ZANN3je{}Ev0&7HjTl5y2% zC9g@{Jc1l#$pr-+TL6Ich1TmifbiDT;u)kfRSZ;qCn8$KnKhH9p?TnE8w(9;p(Cv+xu^-%rtmY!W?_9F`ZIJRK@gWq`@l( zctH#75t34+H7nkKiC6;uWcp%m1-*c+Ox)pzMH1fK{7DkPbo+Kuzo6G}IoSm7TRqHk zqlIhGJG$9->5K{>6363=FzmUEDJOAv?zQ`DX_c@?`P^MOo5-Y%)jV#6 z;L{MD7A%(?wAvc(9FrP)! z*~q_&k@ELVwfDz+S7*C3wL(1&u0y3Py~vZ;XIo{ zo6%c{LN+4w^^_RskA=2Ymc^sTbGO%73l}KCLtq~^x;en5%mIgQI@JJH%7O; zIWGlGJj(swY}U)|Lo`oX3iK!_QNN1Jo^v0S8yN7XhrLNFF#N#PtEbZY+vp5!qcuXE z$8f@5|N5yTryFA_iHM^fmr)FUeavsnHHZt`txC#99!QMy0QO=86G43ZYk!(@UYtt* zrN}1b8%7^o3K}s2ug-VYWMx@SH3B9p>q3llbkq#9blsXY_6QTl4%-pvED~lqecx*U`zmUs}51Zo3(5yQv5~8~k^UM*FkK`JeA`-{h(E~$#=#9op7u@(i0{6Ke2PUlpt48O=-&Vjx!wCK`XoN1Mp^;e z(K@j>c6}(C(>B%|1y`xEA1^=IB5rWLGU2uoju`Z45Cif_QB1#6wx;(E;AU1GTk z8~MWbnEpG85SgTJwhBzcYVo~$^D}IPc#@DsvfwMc^ug;%QV{j`h*zqLsZHBJ>JxAD z2ibZ*Sn2u2Hal&7c`I zhbkl~3<7Z$rR3$6++05FH?{EHEpfe@`~RAEf9O#~%F@9CgwEfeFWv9Aoq6ANJYSUc z4XFI_UdH*Oy?Mw)db*F`VtH1w^ZItZgzKsY;8JP|p$AJVeAuX@r2JA2kPbhIiae>n zeJUAtu@f_2GgUIae|MMsuV#>O8U~{C4jl2FjsxqJ?*Z%A@y*JtyLUjP0|Hv9;Ctn9 z4KZ&9$R6Ibb(?PTA@m>*PTU($t+imayXI3_=d41cKM(ftFxQ}>jo@~X|_4oA3dH(69vu*o zIC#sgry(^ZSqG?{BA)+#OZ=FlpxSrFBH+sH#AmkF*`bqa?Q}&_jyPCcmM#n@$nWZA zwC!HwtEx%kM{nH5;nD9J%Fw!n1sxmj0vOg(I-{p#(N@_0{6AY^H<2aa%uo%=j^f%3 z_;ix3{dXJf(e&-bF2yHmCx0kU%!8c)$+EbC-p0dwj&6025ZLN1>{|m?0rD&+@c7@X%k4!sd;7}U zS~KcsMjOOh5;rOv;hk8TbLXa$M8o?-Z^t$W;EGViuJ)mAn=Yg7PvVB6S8RjtcNo06 zmhN`OZbmxB#&o^Q0Kv(js0^Fgw~h>7_ky|A!T<#BxL%~guv7{MNRU)H+U>iUBU!5; zOucIeCvYxSWdn()^vXFP9ab(Sa>Dv8je9}efT(rCdXEp3oDZjGSe9J76L#_PjdX&J zAfK>9lm={+AfL3ydMdfVf*fFcTxHtXH%tvCe7trB9KuyOkJ#`DY^)nY30_-4F#%;y zE`?oDQkxW}AW&^#RN=&M6ZCH+e4=XzVfmD+#?XL37;=inYPC)8$!zk3ZhMf_ME2P= zOsOA7Ju>CxR0&Sg!~MJ?OF22bv$EzP0%!>`ca3{JnBc?5pm(bNo0tIqyDILXX0&tI zqKlX4WP5nMXfu6XvF8(=XP7+msje{n7NM(kC zlJi~YthS1Qb9)go=P1-xdljuoHB0k{X{?l}2>UT2?lY))9gZN#( zL$B3ztr;Wq9qqoFQ1#ozBn{=@C4G$mH~hBxM@lD_W8PF41Z!u`;Sw(Bn<>|u`JtObi&%k!PIv4o_}kKFfq?z9rHhp|Pcm~k znfC{^>{?G=SH5#v-S@9}kuu2Y;pf*_lL*YjFfBCubuOMtzUne#+mxMR1_^$)p&g4N zq05fX-$5pJ=vzLQPTOY!+A4n!xIkg#vv zqO%zLp|DCn+|>?`T6tN-Lc{f62G{1JwSkqx!#CVOVj%fbj2zJ>a@|R?yl+TevgBYKyE#1t5_Gv(TMMfx6bV($m^_7ZUb#F1@PZ|CD z|3*;hK_@xTVZ9qI8sA=<<&codD9<*$|Hfnef<)%I?i3(O9re36EX-@ajN08KY!6iI z(&H1c(LrC2P3xV0Z%*Saa+z}xn0{IDw?+VclN@|F58RS5*W0qTRitBJqwfh+rtNCB zxk<8|-Tz-k=FD^yT;-@uz5!~OnWBs?F==9R?RxmO@n466=H4dY(_2<9{#r= z?|)-nfxRWdxPe?>{TVMv zwT>AxQX{MmAtBCi>~-54ejHEE1D1+o!^bH%EVJa#*3zp$MyD}>;M6>jiLMCLBQ6r@ zC&E>^Uovd?O;oi;aIWVjsi} zUg+}~Or_}cBJ;5T1p3tKw@R3Du>FyxFja`kr58>|f~T0#$i_(&`+W#M-f?VjA(b~@ zV}#sL$?_Piw+P31Y6D@iG{M*H9QocVjUm{tUnYE#skk|}v!Y|0G zCn5zq_#omLq1U=Ui-o65x)C<2Fg21y=k=|I3wYHQ6ySZ+`s$W+Ag{>*%9# za@QN$`wJEcoxf-gx5JtiQ;P{_vmb1PlZ9mK68sv3u3m^+=4ZiuAs?ata>S5ilzg_6n0Iby+*RT*6!zN}FpM6fT*MjLoO<1cxbYQ> zE=z+&m;d85QfNE2JQ~>RJMoZixjJsq8{jS(eU&d}WN08?>zA|geyxvf+NArd{!3eo zb)jBaLM0x>MiSKr)*r>isL}H*mIJ2ScWs@n$C%qag_m6f+B8`&&_S1jiT!KnjxBUIgMAzF1~%u_ZHD*!gWujHA;Aj^pr6IX`@ z8cypCQA4m^88{TAyo6`1Zhwd;VnhNjHUz@x`-V0WBc7Ry%m4Yc;qa&#OFGfRg~wVM zkCV&#i4s{iDEuOD+i+pFMn zQm~so48@KQ4+EuL19Lee7) ze%cUsR4ISu$U}gAC}85XXuQvngb3Uws~J!+Y+o{#k*N|-3*{g$i75jnvA0 zc)R^ox`g#53@x=f*fXmWAkp5sGoE*J_RE5O4hv?duM=hZJxi_fsaN>?ZK;R6Kg2p+6+7)?Tes3SR7*ThP6tQ z(}E^(XpnXpm0yG|Y5G6a;Dw!YJ`u$eIg+$<-$|1qr%@_t zY85Kp2S@l2p^4fwMx+LS@;Ho31s~MWq^E8O8kT0%q^ED_RYKB0=Bvqw zWWA63`5PuM7A00?swNL`M~#sy`aMgQvyQ-qGvkz8_D8Qhz@zKGjer)Dv@M^nbg?UH znE=JHVC4f95M4GAVVDX;mWWb!AU30@YlsN5EO7fbWpmev`DQ(2otqq3a(>GN;KC&R zPS6?fI81L4Y1}-bDoRy7Vq{j7@gicfF9U83RLL)nSnRkKe650V`F1Ao$OF3QfBix zu1IgmA~0a>GU6%>GX=iZ^7r&vpu+yjzCepFc88Y1p=WW-yY(bvH2A!;);VJk=<4>E z9iZzQC0fKXvPS7LU;!{~$YN=e-@afeR^fIngpFHKP&e0N!h!y^!96r~;{COZYgFHP zD>5s~HP#!w-4ph3@i~{3`|8z=yr?dvr9X?Q`2DK@^y9{v;YPZS2Z?faz zl&S04Y8(b=c>*nzz4CzV&fv@8{i@(E5$D{A$5c~@1{^oT+M)vLr$QgLfB1W2nr{XK zfvN`E!mL+JT3H4^n18 zE3et1dcDiK?NM+8`X1`Q(grZDUtA#2`z{md}X7QW9Mm{TMI(R{$ z3#8!|W+04LnZPs__Tpln^+<5o^3q?4IGROk;q~cpWNf~k&nP0Rsv2TdV|7MqrpG>B z-oH&>CA-a+ze|b3v=IV0`zFtmgY9=-I$3*XP5oS6@3FfT0cQols|CTw|FZ7$f=}bL z<%9N+yN~4S{m!0wUJLaA5Ih1#V7^Ft=I-w9e687vmi54sKg}0IfRizwX9ty`79kLR z@_+))Gl>uS{F_=4tVnD2V1)NOc&g8)%%!@}o|vc?Ke@9HoeEniia(Ak|-5abot_DFPlMxwIy8RA{a(*8W?{N<} zOcf6P=n}i^6$x<_SZT(Tu(`h!0IP!_-_#(&#W4JN&rT$$Kz&+L5$y z8otzZ%9js?{RAutHCnIGtUtsGSF{S3r$pHM9Y@!o+g*!$7K__tpg3*wzF7fW6auj<;_%x?29#Vm`i@r< zer+e{p36Lq(mbz&Mr7qSZTi81`PrG)t3NiVl`PM8VG zWQq>4C@2WCr3*JW6nP`Y^^;)h?{JKk9lc|j0l{!7!e%4e@RbOkU~Pk6`^I{e_fjBs z;D%w8S5%m2DaXAD@P z8WXi9BU_#8WtxLv>5=A076n=J{T@H8OTY(`4#bEmOnla6;~nDz1BS2IZY3v4+ZKv-wRjL*wQB+|8lrC)VXPXSE}qAI@kdxf%DUenx(v;cnE zOR8ULx_tLWlDA+j$6F}&NlJQ5X%lUh0898skYMa~SsfZBoNByjGwm6rW#m}X=6C)v z_+nF`{-P83Z{Cx4qa^& zj8_u13qfp>L4dF9e6=?&eE6i7og%4uP_7osQd-w&&!zeiD_`0;lfl%4Fh5mpdB;D> zoKNKCcXtA0{f>GCF0Im+8J5&XuSyPlL(D=p&@TcyfllgOBQqo8o{J}2=+Szf!bOMz z-Q7uehmK3D7a%tKGN7Biu?O$3^k{ANnlab%m%>|@CUs52F3CoK&nQ^{vUap_v$wZ5 z>&-lE#c()xG-`A_8XK{dALs(Lq36{zj}BbgeOf#J;%*WSKkz{4A@JQt0^GNqZURCt z@18B(8;@MxF9W6n7L#U@zsD@Uf4RZrsIb8abW!?|G1z}vpC{EA5e6Fx=AdmlyRk8Fd2n1P+ z9}X=^;m{5Vr;zp9Zuzg;_BS@dV8m{xWU8W9wdER!yAg~m8S;rr=%hGyn zdf%@Te7=YY!d@#}jRVsP)-)KI&4C+u8hLsHtRtRuH-}6FI<=hUJAy)`a6W{k!bX%G z=4pB$C^;jYOjM-Wyl5B1tl$9kCCzu3GFx$!__ERt6x}{T*t?*N&+?`!iKAX`iBSrD#Us_s^=`Ix?zBe-fz27>e} z_Vk#;RV>gTC)eO@>@KuN9dx}h!3RmKsd-4f4L8>v7F2t(KA5~(re@w$n9!yN42rEm zUwlfYRw_0unb?jr%L+an4L-aiXSum3nMMa<7lVJ@t)H2?2A=Qz8x1YWEh2aL(n z(o`)Tlf+Jl$M_VXWL_i>5ukqrMWTNvib}7&_ytscs8!+=jk&;Aoo*sQS0_4H^R~@$ ztqLR~GV=TbWbKC%FM(XLo;&k=?JHYIxw&ImSUH?Wg@f^A${zrI&xrIcKP72#Xs|VO z>CH5IJNbPTpURF3nL35A8E&WSj(l>CmUu^OkB`Ge6}Q3TMEM&^_($n0*#d$oxoZC^ zV87kjBiT~c<2orv7-Zb7IHsX3q(T76&ymKogumo05i{u*q|_aZjUXEQZ^}T=pvCk==CuIn^KO7d8Myxt!w67xE<5M1y${en84gv(F}wvr`;e-% z*jr26tzX@R084w-Lwd2T0q$RsA?E<}C6Br{0h~%3 zBBcCMqQb2H3i8?pV6P#JISdVY$=;nkIzJ!NfH%tX={b@yi6uUIKB1JxMET+oIoAn& z(^H$%7)+yfZL>+)v;6X13dFuwleV!z#*Soc!G?~)muByjom$^17o{_+5nk81O=Xt;GTpe0s@qy?bUd60R|G zXeH@%-q@1ve=hx;CTi9yRg=qOMvE(v4hcJ0Ys8`jy7$}a>iw<@j3v_1($YTb(EFWC z#>*<1O~y%&h(Llc2Lg!6p#9;7i%Y z4Q^fpPKd9nL_o#BXs+7>hqA{}xd`IsES1`2JaU`OCg^5QczUAF2yCQ;*m++4g=HnZ zKP=RsU}JfN|*awJ|ydOHeUluJ%a?ohYy5Se*5T84O{VBinZHmp{`8IAcOq9o|rF>RzEs_5s~Z8@uSEU`i!#YJn`pM5xUb% z*6}Rk@Ti}Pt%;Ub-|_eO77yp#>%e0_l!24A@3TA!?!s?uP@01Q>qJFGLsRJpoB|bJ zKfaKOH2&eHm@&7hn<#+O!X(uCgUQg1e%++ads*Iv}d= z^nUg0upQb_{`%2e*g+y~iS1}EHaYo*`j)KCYL5-kpWRWIA%)k$O%&Y9o_f3UL^iFT ze#Y}eRIE;%j@{Ah1rMCge2z!+8#7*rW9C&~%%zDTtKt^@F8V%BZVi5Y=MP0G*sFP^ z3udp|GwD$xt?Fp#zM)}(#KV*%Sv|(b642*oEG{O7FxZLSf;ajP`c$S}`a-yjmLgtJ zGj-WuUzxFu?wQ{Aj8R|PO+L*cs`R#W3c`OiXeMG`hV!yx5O8oZu09DJQ^e- zHDC4YOCtJ+KI6gC(Tkgl+^7@IzHme6SvK>r6Z9r*A`ru7$#VK}sM`5P_DAhwB0O|? zcH)I@+FDMzX3#a*+{O%ju zFclXUw>LP{^e&oIM!mxt_dHk_&bL1 z4cwitX*wd}>yK_JhcL7GY4CL>9SR|WQPN?5DJ=&*Q~T9mByo;Mb&Dz8dYz(akuMBn|rfnHz?YKcnkJlHwvJm1CN=MYzC-@M{&2nmhZ|I1x& zG}0!mZVY@Sc0DYxMtkG?l1;Z(qZ`H8IG^4FVLGPz7kmbPXR>FxY5|j}+*|IyOC6qZ zsJushitNuZ8vmAsD@{5>l_<4h130u;#=}3z%k32$NYA-%fAHr*Jp0*fyS0v%p6lZ4 ze{AQ^R>nTrS{}|=w4lqiyty5)?$xbX3YQ5#DxEm~O!(++ajX;MXKChXq`ud(C>YXK z+8HVO;h)%A-grW3yYuf@HFZBR{!TW*33pao46`t*o~SfF&TmL}&B}RP(dI;QSHYx* zL)V z>C(99(H^T^a;qIp>B@_HYBJv38CIo{K+OCrG3lmkNIg8&$lx0fn#0M6hqOf{eLV*~NQzOxCm?oYOV+ zKbzvP5K2TM6=A~ikCck!olYlxw4{U5Y^74A+PWTagkzmJ7Ge+cRap_Q>~0T#x~1ym z_9wW`Y(wnAP!>Krs~nyLODH?{qfZ#|vd^}j$k%UvvDDh(rPs})BV9IHSJ>Y~QOOCF z+C2l?-3#xPWWLb$)(f!flF^^L6 zZNy3EOV~SNafQ`s+7ZfcUJdI(RjgC{_W9{&(l6wKmt5A z?0S7V?V|}J(_BJke!xq8I|*oXkr^`0X5h|sznsQ+dRQn9t9j$Xg$F*QcCfS98g$(i zK|g$iYpfJFVd{&8aeU&ra=WEx-lRfS4s#K&Kl2%~_Dyp2tcuB9u*$WsJLL)27Zd7&T zF)ZBUMAk&2f}d;z$R^s>?=GnGXC2nUW;)FK>e{CUo-|UrXRaYEO2i8$D!8;9=5J_Q zxCrvqNTRRum|bTyJ8yE&%_J+($h?rXuMTc?KEf&{rAOV0^-kWI1Q@oc<-;>>c3p9k zcq^1+T;x*G+OPYb8xM%YZ(kY_O1uV=tM|mTc^awRg=Cixbm3|GKBFV{5iLRJtboN# z&Ou?vS1`QS+RpHZZP?se^K|vvUQjH!_lGnc`Vc2?Suyg|1bF%!E=>`qYrW=U1bdvgcAd{+a){b93%^Pk)~4%$+#YM#}RsZmxA{=IF$Ca4|^ zrZQ`>vEv@yAf)ViFiTT9R&S2CH1(9G@#Q|1(uFL?4eZhlym;pw{efkmWfI<#$+`uZ z^`==~hlKWKGLxBypK*1mSbS^Z{+##DQ#FhJbH#7}jPt>OvYGDRV!+!%dU|T^aT7ng zEXnP|XpXcfY2w_PWLZPmmQ&rmCXFvZJ3hyh7-n{~v8lTV#+XX}(oyX0iT!c6L&S2&i!L|aM(h3sqxrgF>=T2x(Qa+=i_JgD|tmspTY z++4)FIEQTY#b<=kPJ;jQ@k(LJE7h$ji&E&eG2Jzn=67PhoiMI9@%252E>(MRZ&Vg1 zgQO|6f8|Yg4*fD(Kd6V%+IPB!f|^NM;b(1$Y1h2-xT{i(C}DC$)t-yKm18?_Mb*7W zgFII@@bz7Ra?VSdmt2)3>o9Z9c$Zi8MJvK*IlmPON-o4%rp*@;sR;=QOXk!LJ`Z`I z8Q=T&TibHjDp5zNa%^zzGwRp1_gL9{L16jM?2(9&L2%wSdti6$L8mWW*(pA&^fBy5 z0qH-Sx6y(DHK2ERA*>e>_3g^rfS~;+Bfgc-FM~EsY_tCyXeFsWS?h39#h8mA;`@TxZMNvLxskyag^pW5!L<6f|b@Mb>4rg)x9Lp>NNV1HKFbN25o zw709dq{pX69t>w}|u*Tys!S~Bt9KdqOL~P{i#kL+#PT@A^)&~B{9#5&XR&b|lic-y-bH0`twK#*eX59{cePo0 z+P&skU(kAQsys!|r>}01JFr@LCORGG_lU5ik6T|L_9tlctDJo1`nDmrzlplkxt61@ zMq3K|w?fNyhJ|8Y~VoTgk{--g@=oo zn?rhM*7Nar?>erOpP8IdI-|BRGi{jTB(P(DDyefO3~#7*+==9ke_Fc9bMD4(ZKC*s z^KC*Je|TV7p9Jy}q;KD!#+?4q6%|k741YbLtOJObk}Y<}PruW%`^q~pL=qZ_yeE9N zQ}$B6%QY{U!XkFb1%+BajxcE*cJDbQMg*Z^?kBG>piV@LzCzIN0(bI@ldGEiI_n|W zF1MRpt^`(o4)i$v$rvUGY-Xb9uw2M zHk1C9WLA~(_KZO2k-cW@*wN~bBlo%?U*3h-)~*M@RvE%O_6o)GN(+p*e>sSI@WEneSWdy##0=U2XN4(GsDZ+xjm`BdFss}bMw$P@Kn zNr)H*z0}=P7r1Ypx=weLF0UH==hxm*Q!fUmvJ=aG>9k|SxG2BCt@49qFyuLM2|U9=}fR9jjS`RMUmh5Ad{FlgY-$guJJ=A!bx`m+tQ%^R=FNzI}2fJ6|x0yao-B9PMXA&Hj^pj;F3iZ5E+ z%X!n=#|cARQ!M&~iKTo*CbyySXQ^J(9#p-JTgIs0j9r~uR0V;wl0q6JGJke6LtG4#&$ zY2w02qNGR7-Nk{b_zGq{ZXle_uC>}!WkB<4;cdRyJ&4HnzO+QV{es%xa4F3C5 zS|jKvd2^k%BptS85w^7+HWx+*-KMf_6KSo@`)o&48oBw1wkLhmqH$PE`zL+Wu5neR zPWkHoU;7VE^#H!`+LBu<_nK;STMMZm9GlC8l9zq=T3B)PnIK(g64#lYRXT=ibRSd~ zta-IyL56x_?{qKppuZYFEERn)mK({gfA%U9!HUG0M9-U+!sYND-}XyBkIW@HE}nWh zwCEd~mfI*x3t2WIlM(mlyYAV`kn($!JNs)FZ|)9H`NY)#28GHSbSN1qyuB(CIU_0j zk`a0=v>NdKHv029lds|^Dd7bCO;$)ECOsbaqqosn53=x-eAx8)_UV9=%dL-}3%G~fRpaBIH)P2?MxHJBQuJgo(*s-b zYiQAWKXmU;Yb9-Pt2uc7wMFRGn!iWLEVmhiMtQ9>ZH`=C>x-{k{yJ@%gbEj4mc2ndu^sOmgmUrIdq>gN5 z&*uOOdPG7Q0uh1XdWwL;8E&eDJ=$6ZF1WwTM*A)@bNdB43bq4Ip=>lnWo6j`F#K%$p=+ZlmRWp9FV}qJ-Mazv(joW@ty>~9D zPca#gtwyE|6*3y0`i>@P*H-<>#AdWF{fC;^*SZmTEDeZtAtN692dvG^e6}42pw5wfBC*~6?+>MERW-GnY4E>0e*rRL7?^`a#Sy5o zlxL!dR{&=rCXC26O)A65DL>VRQs4I@3LlNHDQ_PN5g$Do@AApX$bI*q_p>(Y+%qk# z)a@Y&vV8nA`uhU|L&j0VUe;_O(;h}lh%X_{6PY8EU?MkK6Wjjw-)Z<&4(PqbML=Px z$afz6^VSYiO2F;%pdS3YK(c0-riQrL-URG#MI8tBRSrNkM2e!t ztXv4r<2gAw-h72M&)(vj26R^jvA_#@jE{Zbtc{yg$My806V6y!N_!;s!2Qx!+4rN$aDmez0Ouc+!zYHEH|Ci*nm)NCG( zxsg@xZ!0Y_aTY!jc#e%$tkm`+s^=J<|5!i&(y`K#55}&vGF3Fhb{QjZ#an%9w6{NM zlSzX(?WE>Z=3Jf=LJK~exsn46As!(scbgCP*6AT~SHnr#5pBc&XfkZG=17`0TMn{j zkAAZqtTHW?PK&P@}67DH404hH)7SE2JOmpbgPAimQhnC}F`Zj`)P zcF=*kPeC$h1V^t6b^?cA7~rIPi?gARthIhWvQlObGq!t-AZixsjeNj9SaRTAarfzC zJpWO=fO#PeOy-o0#G;*GYx@4-PK5==R*T;HU%h=w0?%KIwL}|y=_)9Fmxg#OU`m;| z83P&jsW=~llI{_PFTKNa(1kxY@j6>|fjREEh|-zs+9(A@+u+9oX?vL^yzu$ZN`sJ0 zdF&mbW7p32ldLLB>K3;KHWO(%8an~%If4_!?Ou;iQyaA-pdE;}9SaSqyBGTB&)-f# zAOLQfa8sNMXZU2$PLgRG$o;#JIVbtrq=FIU<*+p+n6p?+`MI!#IofDhd7YS;XcV$e zzxWRGb-x+}pfE<&!t*iT+hqzqv0q;>#6rirM0tiS|6C7G`MiiijlFokq=za+I_M>3 zykg>(QMGWvVC>{?yLn47BH5FkYl@d;tqR&CzneG8?SfOr z__zpENhaextS8bm;7Nnjf54kDQN#o`AtgQ^Wp^)_ozvjd)Kpga zM6LH?pTWOjyDOO^m6(Bvp!v1SK{Zm@r^X-2T8ImoEq<_wEI)hECFrc-%u(#LtSFIN9w$WP%8K+)Z)Z^s7qE zMfn7Wuas933{YDn_PGrFgFCk5+WV@#7-&Y54r+Oa1rB3Prv|E3H7aF5UyR_YjY0z% z@_MEz4+KTP|8D41l?MUGh3ATU+`!~x(344iF0vldz#+AgHBCFB3eskSs>p90E63hz zz(C#Csp6=?gK)8j3qz>$Eh#3Milp~e(9FWJP=~UihtN-ZxtyD#f8Nmki}+c~$^gHX zp&%Ub-rV&ncxY%S6(i(qSYHo6!^&2ZGXH&l0Bm`^R<7EY`c%+D2(L`GUO;uDRDg$ z`+`ZXGImoEiU`@9vKTh$z82C~R{~n9>f1=U_Pnp| zQiven4=E>g&s&EYpw#-k&)A|A829~?1Czc6BSYavnVVOqH+Na}L=hn;3b_hzyGmAo zy$6fE*%^r)?>UX^XEfD!e45ZX7tu$Gu3O))Ln9Ee_UA}3S2yp=a(wLT5p z#oL|4!QVsTGDz=?#lTE7O3y&w+1B0cb)Lm=-){<~EW)#9yz%c+OZh=Jcy7lBW`{M< zDpS7Cl~3=_+aAsP3mjdA9w$QK$Ho8T&`QGqx@cE}RvxxkGaW{qoNA7f{ZikfGTj`I zVq&6$(AURee+fsZdl7ug^ifNv;ip^W_}*7lFN93H!chhH5pp7u&mIwc=PyIIJx&@j zL1+BvGkK9B&#`e5VufXl$jb%Tw1Q9M^AC}CdGg!%(D4$&5o8}(alDJwtBB`9KPAp+ zV!hGmL}L`}IWd_Jk#K!7B>pnu)eq=eNiA`l89u-a$;hKnaTF<@w7_jkRG-Yd<%kIE zdkRG_4$HY}s(%(pk@ya4rpD0Uz+qG9D-BO7Op;`xczrD~m>_j6{yNx5a zs^bmzYgey?`R{s4kpjYm!M|TD#pKMU)w+OY$)zeX&vA@#jS_!#1_xsc&O$sPgD!Hq@>B?Je6 zU5+9VW%5w?Z7w*;x6$TeB39W*7yZxyE_NRA{cj!aG%;TGqvYSbHj>IOkRn+MtB5#A z%YVU+7eZddMeB*x5eMS1iA_f^C_!*VIl()9GV8oB;zCIk$MrtTg|qNzbAkfvGwh`~ zm#UbIC%(Q_`{qX!|K@##gZZ4pRmYs8_F1qy*KB&j#|FD5^OA56U!|BF=+FT2J7mSR zdBCc)hFvqwujW5e^;GEL2)k$SZ0Fp7$w}e+BG8O*tlpJ4#H%Og5tiK{rl@R?fh6- zEFE435E2@-x7U!uguKh^u%jK(Z5#3@HEiC44q_IvU*~d=xDL&ft&`k-OvKFv?2e_G zJW=}(pS8(sFR{i$0{}jLmeb;urh}0hvt=tkLz=>9>Tb@> z)B8X?JS^(pJT8c?y6#IwQ&^*(8@@8Zd(y>JVo8HdWc_%Y3XE*!NahjY?A=t86kZSq zY{R3m5L9MoG}eSoW%2EC$SVz4x0qAzNCQOP zz;Jd=bhPXKNRR3C^L*$oIOPmxmz{nNt4ofye8w-xIOIccJN)>j*<%o}a2 zts84Y0zqV+=Q-ebT-XRc*Ljwr3CoIFSy@p9leS_%UteDfck+tD6GD~FvKEZEp8jFA1Y zaN`4Ts&npit{Jz!*R;Jc-;)`R+vl3h+8dsqf7oe=^4SKrkf6&5eF4^}*2VrXbwks4 z!X33S#}GyQrOUscWG)8Yp<+Y}6Xsy&s>ccAV8iz)Utb7xAA-VP7>H%a)b2N^2CoNA z?-Ro|M~>D2Q)n@V2O7FqVMVYB+IQoLq)km9=FmfT7j#weLk(JQ z^RCkFe}nFZg?;LJxnHWv;tPM=Z>bI^8Hsr!wt^3WN!-@(qn_bIWcSWcDocq5_aRvP zBtM8M)%K+>y~SfBBXYkI6jIM=VrSprtow_c-+se>O(DHcqJz$yM zMc>-+LpCa$#zV&;-ka!Q4y2<8UYrZcRTZ{>^9;*#1%BNFBiaVBpVHh}KHx~8^r^<` zrACphPD&v107p~(rU;Vh8vF))vXMt_lVf27UPXQxjVr_v`REZJ`QXto=<$*Z;Y1vk zl|pLZIwxg_J|&0n;x-TyV?@7>B%QlWa@)On-T z)F|il+oia`>31#~6?jo2?5LT2T$Ez^Sd1Uy`v)RX({%V55F7Q#XPkWxpk-2#>tVEg zBBpsaqq#0?Q?1`21g)=v-rMMrewax+=%*db{~cEASM0EI)mp7D<4+Kd7_9#f+PS&Z z>v=9Qr?DO+D;K`TMoz|HH$1J%M>Z}0v&!1;SFM+smllVq(RMmDnjZOMju!%@kEMN_ z{ufd^KLi6%E`vtwd*sbQ`E)@2GmCbzDLcUgeU0rt*@!b-Spk1W+d`%PEHxS=?4{si zkU0|sxslGLYA*phk>`4SnXLg(^Ckg$=hDjbE>+;_lV9u0iD3tGY*S%@Y69gwSva~L z?be4XvRk*xoF<#f#4Gghrz+6Q z^j_7pk3SMr- z+Abr5py^*h@b2@ob|H9KF9t>~SdTbs0;L~0{kafAS;YRfWr8!Za;n$uPt!_$jX>iT zm3A~uoANkX0HeRuLxA_2JevRQ<2BbhD>R42<~zp?++GOO`3Q<+U2FH)Oz#>7!fQ>` zf28kotmh;a8*H-Ra0NeV+Wm97rB0ZA#rl1}R?u5YGRMcXpwXhM^^^erAT@!0@)ZP! zJ(5!z2qy2`bU)Dx%9{TEwA=jzmmyftT=g5vjfwMGKzJf&s=GN zgP^B&q1~qO3AzmkF72?g2%{?U*?4R~SS|SL_qBt`A?bDy>VW`y>#_2X72CL@K&pUdtLSg$^Aw=w^+Si+?*QyZgd?A+!NQFNEKaq<| zQiWevOT&?0<2h!&Q)|di5-hB$s#0zIG&+x{Ip z1%;b(C1JZJ;U}S+VDo+neeUFNct4dkHmY6FY39lZ!(z?MURN9oSA1>9lW?bZm4!(D z!EmxS8E!lbhllHnIgXjnXFEwGBz;#TS zp1vBq^XJNR=q4~$ma}mqK1SPRFRq9V%I~Tf4*Jw*=Ky@w!F&{3iiPiBrK1V{VFTUd_|AQrd{%A-IqBE&4seIG4>S zPptw+rLHT%^Evw$mQ#?|V<@dXt!{eF>@{!6a%dU$PHZ*#&FODne^SPoR)$^BwnZTH zFn^*b#Vz{vdSqZWQw(Xs349kdzxQG|*V};;v8H*S0Q$r29-yGvnA8aM3kf-#Pqn=T z-hBaHvP-T10|y)q5AF6=($PWMMI{$Tn$te&tj+_QN zpA1s*v45Xdobf_5F6kMO?}vT6WMNw-ia{pLG9wYy7LsFPb$k&1rP8r~iuOBaVsbKT za#}6)U}w3?q4;n1U}n^>8UA!#7_2Sk53A}|AHq!aogva zOYY17G!KRH=&zRr6FbM}>~?h~LYm%1wa!WnrApgADgG~Op?T*b?RRL{YCRqFW}>uN zT=__U@iUF$am3cw*SD-wGfv2=;hFnT{@>5$uZOJ7_oN!pmh%6jPp-a@-E?-c9x-hF zMaihkDRLgK9+Y=xp(IoyH@yNBZev&_Yl+grHuCVKpB#u)l>ChgZyDD}bQApaWGic6 zo+8`FnU9jJ%#e4_2r+$5`7SF2{R4oGzAvOq{$7c{_c5=9Bffzi%&#Qe1rEe5&+~{f zGyMl=y#dS5=-6G?_yCx2L5h9c64~mqE`MZ;{uP15A>daUx9cRi z3m2k#f$hCVi^}3Sw?CNV z`Z;oQbH^)mC}+955;{G>=B{|ldZc};U4{j1L?Y?zA5NK$ty6mmRd19I$HvCm-4DS) zcR%AIxN;)UOV8usrMju8lspTx@9%o|=>!$WYllB$r76vUMR=p>AQudu4bF007uwmo z;*mIqt)u(ATM-0gpOuc%D3k~&hDaCrB4-Dl17cJDhu0=$w)bN7Zu)>P`OK;;6p$d> zamGOG#XSHebLK^FxzwWC{61xRzxGni%2p2w4GhH9WDNdJG4?0&Ke^Txs8K_%<{qE> ztDIPbpW=eA|L{igu~ntK@&dJINJ$MdPC7p zCTBYbL|5wjVC+dPsdu(bxbUrX!#4_%~H6}<5q<%XC zL-V*ED(gJ_uYfDzDF_`%igp^3jbM*KRxjDcTtpzK0*5XSnZ7Fv!LnTlZ+PI= zx5Nr9_np(F>K|Y@%@AI~C=!LD?#Pi;$ZXJokD>RRH{i}1aY5m5GI*4C)6bw(VnQ!my8 zoRmf~K4E_WlbXN>GIL{^rCX+nYDzTD)C_pPbWmDnJ=@ky_^N;ZfPMySw{KKr>B8rn0qi4%9>xn|Q(CKP`zzzu3T$ z(DL}PaH%o_v3K!!abcNCfSXqmMS!r1fp~1knq^;I@fpqB(t;s1IQc{cv6btoFuhR4ZIp3Bs z{K;jM{Cp(##w%iMA9mC?3$O2qo}ZyKaWw&U-KUfdDTEv`f?cixcfF`=#k@6;ur;Qu zK?A#kZRn9PY}QpHV1;jbr{l>MFGMQ-N%?sS;&IMi)E+fuKzQ0smd??BD$(X?7|lksjLV2rFhW!({Z3R8u9JQ$64|Mp+{>c z5$wgZF>O zW@KbEIRXUY$cln(1zv!)hERnDRGopJE>-E{A5y-T*Yh~s8EkKVf%f&M+MuNRlwzr0 zNZn@O({e;31&t_2KX*$#&l&lu2_&xgnCoAc0u^*I==!Ww_OO3pufz-32x!6hSHV}? z;3_>dbrDlmE{fot{WQHUz1#?HiRR&UJT{Sz6ai+Afp5lrxJYaL6I~EQa)`^X+Ik`> z&tc5wnZ4+;w!Q6l)Nqb9>Y8Vj6+x=sKJIrW=$kycD0blruwJ+ksy=eV$K0dJR-6oE zb?%-m|f;zW(jEgieR6Ka>-g?J>xkpvzS>`H6ho3OKQRn?tu( z&uY=Y{JP>5`NyoX2m)D@5?4jmkNw1O#$lCQ8kn6}GcqXO8&yBO{z1@0f6fOhyDf(J z88>7#&cXt@&ro!D9VDdPjf#^IlIUjKqsS^ElP3ggy@smUm)3&yzQqyv7Xm3#GY1C; zYwIB^|1Fo-TUTT|9HrHFcjgkq_S)GT;<(vHJem);%MWLo|8C#)a0oiEaOtEiN>9-y zrw=DJc4h^esZ&9lvBe9K}}}!yo3G?M>yQe z+usJ5(|@F$y(PJL@2mC$F-8sslv)b|G$tEy{|hFPzi+dlmidjQ@Jt-`Q6Ki}Q?t{7 zI&D8#pv>pEU5U;`4DxC<%we;a!*tQyyGK$ldueEDq;qNmF^h2eP#nl1H?E?QJ@u%v!M6QEW*j zFWlHFQ&kG_+1fQ;CK?tYRp^slvL2-WY0NF96`+*EKE%srXW1@f7XKci9`5?JlYF%P zi()Gl4BKd75h?(rW#z3`+n`3uf`o5p%{!%vimOoE=Q%17*sFCwh2sLt9_$yZB;*DoFV2p{pq# ze5Y|m1d;X|(y8CCFzTU&az|AhC8Fca278VvNsndtQl8qu2f;Tg!8)&~Wz{@d! z1<>10I;V#k(jW1)7ebibL`#0kL`FDa+Tig033!r{Fk(|BB6qpU9MVF`fKm!s`4WNX zkfIBZE5bk1N5R1!Zi=JJbV4U|sL zdyGf<8hiO+8_oMsb9abp(C4?q1Jau4KispAc1DhLo5AjpXnV7YOsTg(XZ-@rm(G5h z4xU9&WXzm^pJsudFo4lzx)Qn?R_fiBD6ea^PKdff1{v+pbp|L!esN-XmBRPqjY;2{0Ngr~33$IN1J{23r1$pmCn)!O z^BWNq9^Vk_rA!}f*IW#+)K?)a*zxQrfSLH28Nep?WZgd$7%&j~zA3|Uj1`dP9@m1} zSb3*0h^ncFz2prtHNSkL9?B{qpNDj*uu&<)R$_^u+x=wi1av-{P~|59eapP-df5>F zgEbdV{{2x`3&jeDZ!(gtQag(>e}F*x1vhR@_R9nX!Ew+mX{=Xj=7vfCj>zoJ@&{}feql&CDu0Ugi}wblOuWIokQx>MWx zZ5y}9cbmFPhr35M0@>pNMsV- zwJX5&`b`@ewJzIhay*F5Wmu{V_YXrGUI%9T zk1HU{>$8Z>s!c4yEP*nWgTdf15c;uo76R-+l>xQ>^Y<|^K7oOOycxtCTXzbD;)OzA zO#x|S7E03gaiD)?eGM*lvsEUp6XY$QNWj(<%O%XQt75WSCwwOlHvUt23Lfm0a}1Op z?Tv%QzH^riP`IR07NB9p}QJ8edE*D134l0qE>LxHj;DH z_y50CK*NR)2)j_45L`3#J0e3Bjdj#`yEO4X`Mg5t`&j}Y;US$!!HuD(h zsbN@zBjBguT1e=c5-Rht4$7s2)cyp;GBMG33usOd2%IW(8&6{Cjn=z9$^XVgfCoyj z51SwTlV%{+BRd52H=tqMR#J85YuZ@{_M|&p@YEj_eflUZ7@_K+m#u7!-gL!E{qro6 zM7_+Wgk;3bnaQgN(q`s961}AeQXFn#FL6e)bMPPq-4bN23~#3y@#3@PRLP-TFwcdnd;(MO?GtK0FN}Zi~Ih(`_pk z3Id1aH{BQ=u3opZCkDP3XzQRpeIb=k_!B3SC;yvqtb`h&kXCWBTnd%#+IgR`ciwXt z3;9u1(TJa{6Im>Dxvrp94`L-aWAt*8iGqx{K&njn$Vz!jeaeRY6SV#bGU$4^?Z@v5 z9y|{dS}&SD0{y1d`3UT;KoxnF0eh$Z88&d0-Sbv#pa#3K+36(Wm!8has2-`si3jx! z4fjqF1G6p0dx4{&0uC>tfcb{!t<==i8;{QEW@*7@e>?V198w+!0L|7k?N2e{+4bJ{ zXZ)!wW%%K%9d`yMu8PQ1`aAU1H8<2)5j1JFE2`&wiId`~&gqm#*+W)`CZJ2U9=yWW z9EsgJ3C15q`NVR~b&6>z;w>J_bF40CSq>61h2_0F8b0(eNN|sf6)(pZ4nG{V!*GFp z>*ZTw^TEOJ{(5uRlPx$x{a_E^0JqFn%QRsG1%IY5Cd|cONx2w|Xc0aEAg|i#p!8$# zyfpmBcSO_A56^Uw(jYi@{-^OTtrrl_bnvQV5V>~&`r2JZc4)jS{@kkc17T!EY8HrN z{WROY1!OP2>qR|~D&+DO>@=?PAUx!y{WGHdqwO@)XvWGDS+Z$k7Q(^Tpzy_1Di-#2 zBFk02vOM#T%^Ug@+zS{yV;wI0Mi0ex8zmzPWe!ieR9RKu>Ii0qlm?XzlUE$^>Ca2W z<#pN~{^+)T>~9Yay4m(|ceT*`S`RQ;nK&gBep#(H2*oT+az-Gm+eouksP~Mh^V*oQ zO*c#)BN~0)UpFlpdsh?XdqYg}NlN)`^m8hv$lO$4zLcm@VpLy7ymNJVKlJc$^1q|( ze)Gm2NNYE5nP@D}Xj=Es9bKm`@3Yg(jab<>0h0IbrWbGTY6oa{{`NbQbh4gV_%Q|g zd}gh{A0uEp3}$t0*$SqJdS_4#PY#1K0yB{N+!_QaYcr~^O}70j9kOe| zl8rFcttT;4?}3AL-%$zBb+h!fP<)pPePj!o)*2G>Aw)o zc$;ZAMXm4s8nlDSMA~YAT1U^`pV?O1C;O?OSG?sp=$qAIh4K;n@AS)-Pt5tOzBBV& zi9?3#^SK+t!zgZas7UF*z)V`~`BF`%9lr=HA z|DTqs#O3yRL`^qI`za0`OS}lWkn7qC;2-FRt-@xx zoP`#mX1^TX`gS3c7J2yGBnY+EN4j%?~0xTSye&Y zhUl(K5xJ~j%m;$EVUCfy@Rh{_>PGYjVi5~>Kt{cApO z7&E`X#?T=!`V@hOh_{VTaf~cjCm6^opK=X}`y1^+_jwuyLRmbHn8NmzLg$o@{t42+ z6L|jfXlXc$58UvrHzwjdSnRwW%G)YyRt8RlM+^i7(oIw^uvH8NjriJW04<3m z-SI@(TTIciQaajJGjcuNj`uOc=R58!=%9E((bCsNVmOIZ(g#|VkOz600EdqrbyL)F z9AB#j?T^1-&+av`u}?OxSfXwBS8VqI@c*%tNBmrXx3>ePFni*v)CIX(A|Iea#(eX& zHma*S{>~IlFM>#p#Jbr2ehh^tY^oV#Xu<4xHD?~mE57d~#ejujt8w@TR9yJt5u2b# z{#$#)!^4;$V|`qwpYpj&a3qsGLQWUWn3Cx&WR9-Bgc$T;fEc9Qdg~W8QdEEKcy!7E6fwMS;!4;9d*Foa6L2lY1P!G~vOUDCEJCeR>^Y*uNStp5+!|l)H zKfUt0kMuI>rI6-x9ZB{VPV{=F3W%5v8P`Nd7Z)~**2bWIY4sz*^d3PYly-4?x6uD+ zW{~C;QmKB@67&yAEQSU6hN>7QPYmJ92_&@c z2I?WF#=dzuC{($1@d}qPi6JFAwH-Cr=XLD7Squ z=TcQd8mv^|_imVN4Q{1^dVDEois)Ys8^%)o!Qrs5ck|#Urfr<)0%8}89tre_7VlEp zT`HXk{Oj(pDw`!co>Q&5whcn=!VFhuLz@$I9Wki^{{Gx|<&oEph0mD*{X$Rg_TC=s z6qK8)URY4&{y&b+JD#flkK>1=TOn8Inz>iuN*OmRDRjwRxrCI}&CZ?~m2t8anH4gx ztaPoLk*% LI^g|14-?DzTp{`Gjcj|bWE01u}t}*MEIJAAK%5kCHA2<%%4yY`mB_Px_#|cwr>symaP4LS}VPDcS0Dz=>ZY zF7g9p>RL)O7{zDW$VP~a+n`~x8NwrR+l7Mv68h7e0Ji8bBu1tif9~Q zebs4}fk0ln{w&eKP)n%S%z>C?`@DF0!04;y_UGO2cXzc??+#a9-rjz=|8exwDC3sL z<=vm{FG7FLZ(iEo-TsQl_j&%OyJT;!&KPCK9h$jFnRvSgXiSttP=xf&;gkD6WB!d8 zg>5f}?}Na3_RN0+JG;i`x9Fk5S4REH=(Wp@go0$8t!~5d%BAh2p6b4eH@xNFImEiV z2p!yvosAd~)K}?HTQNB0AXm4vsuqeaR{yuvbgsi@<)5>ZY>#@Vjc zZw=(4aRSfj3lZ6eR>aOkaK<#3Zg50gC-BNeGG)Y(Jovj&#g4IOQE$Ys<$d}lIk{3( z{H}=Er>%$ESl+QbrhMhUHyjz2X~c!hr-H-W;w)L-0^`zm1HJK%n#hc|lR;NM$-Cfl z21@?KTx=vJ%xT8`=1VGZwLm+GSEG?hGqM>ou<^BK8RcMeWoGm9{I4jIGaW5DYCcIDsO-NL{v%u2 zBZwtqIA7MM>n34dZR&PGKlQC4_OyN`JeVoS#?~74`3(XRF&7dPXbi~|MnK>q0j}Ic(yq(J(7_6;B1SIjMT-SSYDT5tGAS0`@Y1EzqkG;ZgJqn;-YkBgU(^J z3``QUdel?ZhaWyK=QAS`Df5e419bxB1Dt?unz3(aR>mIXh1f^hx0f5wiPq8Z<8)Cf znk7a8b2c~g_7w=UJCYQTCW4 z{|>U@5;t-jIOJ<&ui}nM57Fs1mhXUE5L^c?>y-DKuH~2BaJJz>6Jg|{=@A_g+1>_R zL7#jY86jMD1H*5AFdcPE&haq{YMHsb^KC4Qosxpg>^>NcY3 zI98#_GR9Vx#Z-^akP zB05kF-=m6rcDeqf!wrFMRQ4I%Ky%MWPL1o$DRN1q2~4JCEetQzWF7>xt@0n5UlK3i zr)odalirLm%m^xexabGGFKY+>q@ zIPs>%@f7~ourUip(jbGc^B$9MCp;7nYxPm>QD!`mngv>R3%T)d@#Eoj+*Ri8{6hhAeV@*wb=%%R75W?}3skb_nY3 z9B=@$S0<6*{K4KSWfP+Mjk$F?Mlk>>FOjpJ<8-e0tI&E z;P^(ZEE;?^Ey!8p5Bn|@{#o+8inoyssO#QU{I@W(yZm1(?OxbWW5u$VDj1mVmAV03 zfTl*rU%7wZu3iSr)7>@atXI;+r*>yCmlCN{YMdsvoGF(SkiXQhh1c`QwCiw*1~Zh* zuQ3?4RV`JTAH!Lqh5Y5FeCKfC@`WRU4)OF%5(ni&D%TP4$@8E4>6JHktgxD-jnMF4 zKNysmJ%($0Wt!=dvvUI2Smbcu9)`HXy5_mk3vL8)jA>eHNv$50iZ|C3l%iFgMuYuZ2jH67A z+DCc|e<_MtO~#q}mbFgoY^P*?G7NiZbr=gGRb?dYouF1by44k&uf%(sQTp{?gU76x zi}Sl_R$k?{i=683st-ho$Q1~Czrb;Jwce-2!dF2tM*O{3PXy#$FFLrEk3dlFV~xS; z+e8d*^f{}!u)`X{@(kBwF{fF_c*lyA-lsYGI&;slz<{gNh06OJh@#{mnjU(j?PPn_ zjk{IoTDZE`{oEx8_{WF}k9rM-WE3jaNZsbSFs9P@;@BPhaCqo?!u~Q}Fn*WoMaxFl z2;(A{@}qt)m-k)Pcei{zKL7sw)we#1G;}^eQM!u)oZSZG)t%;oWeY`bmJaBlKZU_4 zRxC{Y2-JobH*SV*eXSYzlny34wB63no=Q+5NQX9ExKW&XQ>7(iF*9iTm4afWN4dF{ zuy^#=r=&rXVxVdlWnaYk%y<>~`iI@ZKW_?rn$tx475L>+fQ|PrXRiM9z75X>`BUdR zo<=pzu&vx~vf_(?Ur{=!UgUFy{{;^klQqZV1RE=|i@sO7c%H!Ft|C)BdN8jKTjRfw zJ5cULOCwmO)~{b1U^!mInxOiiul%t0H;ZBiz4L4*4&aPNUp}G}0+qe*mi3eW zlK!cj4Eqbb48Qj-@6>?m(w5ygAfl~q-?{8YlI}`EA{EQlgF`bPbFHJ9)s`Dgth8`Q z@)3j?v#5Z%MpjM=^W!rrKIdZi!F;2H)4@AZOim&{?~tOW^sU1^`6J56E9cp%`GtCs zn#Sifxl$}<5%q*`a&bY4D^4stQZ7U3oc0WrXD->N{VQ}ovZ)3U7ebZM!QlEEoh^1d z2P|x?YHvDv>6#>p1NA|(f>^#vwgZg5P!m8vdKG>9Rw$&wmig(tf#?ZLYzmV;+H|FF zemK5YU+>%6F5#}0#HY=sH=_4>meO|=y5O5kkUoqEEp+Elg%QDzu>BhO&z0*XVh=(_ zrDQD*D}f6#B9JfE{46?3DMYvWd!F9+C4rfs^(8{4E6yQ{Q&wYTCyd4l@g!f4^rvHc73z|qV?3xo4=JEvKq-e=n!v@)<_&~6vQ8eSYV%Lcas48 z?iD&RHiPo&!N8^POU>jutE*qlZT$xa#S4NzS=5Gu3i|E2-z!UY;;S<@C*5r5&l=91 zW@d4DS&*t>{E!#!%);ufNowe$@*67k)7y~4s}RhLh9xe%eA4AoP&(-yKXG+wM?zI8 zVvX3;8!C0%u-U!lO7st+-jf{(&z7n1`K6GVJu+x84!mvr>it{6tJJB3;=Wp6P#IuN zF*HdtFvNFU%#gsd%+3Bi+QpNHK7L56YgQ1D);mx(>_P{v9E&)qW9m%k3g&{{ZwYn3 zw^!zm{T%6|f1cU@2Cz86YXZ&`I_s)^{8^*nJ@OfNoG%|9)FqF9gP4<|`c{ON{1~%E{lF zV5#F??$*dh6R`FmAuSVJHt0gS_6y3xoXqMnH4;al*>UPc{l7=8qNOi~x6&qF+ZcR= zpb;S5Xk$Ce8+J;KNBt7TF{WRFo;Ms_Ue5wkH47T`;t9XaU=wiA92Wz7pz;v0Mp)Gakbkt+2AWqas0} zdz_$n2@Xx+aJSgXMkINUpsQkljrV!_URYAuxkw1?$qjw7T@mty-1U{@i5rVShJp}u zt!1nm&+-_|wd1oZmQlc7QgsY2@i87TH|m}@NpXl5%w2Abq8|q124qNiS|$bfXk^#! zP2wH-Bz*a%LOY(C>b|E+0VxLIdEpsve3~s^v^wxvNIdERLX>$t3nbz~{01~Q=9#)q zJGI)W17upnM|#{9&`3pGFm}a8MKeKg4jert>mzW7pjxXO$=UXpzZLW8Dr2X4gb%3< zPH$~iXEe?g!sMJa-wa@fyPp~I+c}Hdm@xh@^U)}1K6tkf)c>i}<>obAXOa(AlgY|e zGVsE)0>MWJ!>G(-@p#{6pv4!)A_{Nlnw(L-!)Jt<IN0(#HkCz}bvbWQ+zkW9ih`V9)lXsi97J}Cu>*)8NF|~d& zk~+4f{<&$h`R_-{$=(0b(IKnhe;Jfj#>~HY=PZp9sP)R*`g$i1<;6>GW(31J`AxHiiWdry5gPJibbU!7GV8C5`1^B@k2bbb3vxLQKu zknci4*K|M3{AAwvZ$z!VIUW8l^g0IjXsT(|nBe0baBNK>@@L`HLXIUHbR9(DoJ=OT zpbiw>=KTue(HNbH4=EdhxKAu6zgLs(O4A~mV;Nym6IL(3DBn568Zq%cxT&^6HSp)G zMflgzYr1)U_zoN65&swuSXYsWpDs)Qak7ose$+)e1x-s(jaxAf^-wm zg+ZCD=ibuGoVDedpheePI}=TDmB^3zoaxn#JJtF`UL>AnTmyj?4-nU5R8&okXyBJ- z8ERuz){aq1z6?WlU>~o$a|a!n(`?Tl#mO5|Dt)a3#&aKt;=KM;;*}&u8yM|m8!6Ys zG0V-n`>X%Cs(RH-Of z;;b^lOB^!>w8z^?vi$&|E`vs<@qq#Mfo3J)B%knf)2yOTe)R9drn)x`@aCZ0lsTYW zUgGGTp}u%<8{r|~p9*&lPKExmIgUY|y-axGpj-OhSog3F(u>dEIVF`#VAv`F(6iKC z-4Ci0_!?G2Uh^P(Uq=*xzew#R<)|oddf^@Sn2^0xsx}QqNKoBO`lz0USeu)nKY$MT zJqNNS?SnAn{w|}Namh)p#uzB{I-fY(pgo`@85u2q_8dFeg>n%5>wDp>=)-{Ky}ke9 zioiR>3cZ!8jJ6B({AY)a@>yjZ?Y})#Nf)Bn5`PjL{XxB0{IY4RttcjdFcPK>{Be_a zUCWEq@!JOY4wN$!Z~3Te?d|(A&kc)*#ff5I9Q#Zv(zpqopDux6^wArRw#y7lg7w}? z6vJlbT0UgoJd2t)I|5}mjryq{Mxr&anr}!he=c<)3f=@JTYqQ>2pHmZh~z}3ypmg+ z+mh5Yli#>f&Xcha#F4_gb6xjv_MgzZ5oFp|Jq)*s^b1fzC$I{YLf@1hVriZ&K z@nf%zLouMp;P0;&yMO+30rU<`?`{1DjGTP&&)N0I%+9;I{W%p8y(Ng=n zA0uGq6OcqKRWPU_m>K$DEAb``9uC8Ii10upRwD0NS*`RmisB2+0#T52qK(tf{s9&l zdwQ7*&O3li^P_v^Fhxj|xKMLRZpx96L1yN4J#0J{AbaEC1pvU)Cio!}!qpGrYT|QA zISPzR-{WD*SemIBoz8SB(lf_8-fMi;+F0a3Jt^@6P2Uxx@rFpe?k#~~Qd`f=Y!{^J zV=~5}Sq85?B5(XKa*_?s5Ur(gorW^_=&D&VJ~)e!dc<$ejD|MZE+q@Kj{4lkzUoeaBq1r7@rwT4H&&M9 za>WRnDd^pg4uI=h$h3)IYy0>!*C{(nN0CAwPGOWi5q7b&j1$RI&lEmw?v1u=mVx7d zeyVAkpB%0wg_iX!9o&~LeQ(*{2lh7D>TaznR`g*bhs#t zjm%yC4suE}e?c#hKgED@zD5#ksD|^v^W+ZXUf(aA%-&ZzGNQj$pUyAk#%RbB*za?C zVL2VBa7kmNNiwMsLS|ZECQHzsg|T+}&;ePt?5=4(bkF zXSNv+4;{ml;~le(VdSz)T)qrjy^Nr;8B$lsoF+ck)m)(2PZbz zXr!NV*%B8kh6X!5`iCzc6DM5qt^_=qnIGy+Kn zd10FeG^;)nd_7gR%bS~!5SMKMG*wjJtA_fJp=8q|6P!pCfxr#-lryz5vwwif$r;*& z100N-(J8Pm9`k?*ac1H>hP|-ag!?ME4}a3U?-FBss7;nLn|UFhHbZkj8j}NiI>C+T zar?Wn8F_5j>dGU$V}gBr^$GmNzv_5ii6;x34jt|9=z0EB{f3j>vZCFn#U4Dq{V1M# z?=C8uKCX%^{W0t(6v&32HJP%3G*1P%68iS62<6%SlxGTF0YA0zM-#YT9#6qSt#SjG zCd(TuT-__kjq?`=Qa)i2$3Oa#u&4X)XBPI8Y3*ThHN(ZJT=#2fw7nxxRz68V8he`O z75Q8rC$fQZ2$p?Jz=QjeV0RdJyY#I`{R)p@&NZe>-{{o-@JI_^T#d0`dPw4^mM*+* zifN06zuDry=))Ni)~;P2nEmm1nS&8SugJ91y3=$}##d+(M>gkC$J>ixIFfxBm=UYP zApz1uHwl}?qPR`^)65}_20z~b(BFQZCq)gcjN}n3ho4r<%pYSx4m^7Zy*PDRuI9Tj z$lpGGd<`JxA}#4cDT>+b$#t|{U&+E&N$?fEFzO}v#I8Rmk`LW`?aciFY!i;$`LtD&`j zFyHi)uO)$g80t_X%$ur(;BYr+ohN*K%VoQC($!nv(~q0su9*fTnNZy-u!}2lQJnc@ zu!J83&B8s=3=dF;!KYbp=*F-J#^te@{8^V|9LCTDDVrE$heFT_x#81=+;bbg7o5oz z%NMe!Qe73a;Fn-ABBSlqWN@=&l9j+Ccqha3{TRL@7j56P>{34E8lQ54oFfRm83pYM zlRIEfLUYJ!5uX=fK^QD(&9jesYx3L=9TG`W%Q*0`7dT9}M@ygy#a72cep; zm>}{3qg|$+q65pCKQl|i8Vxy4K)KEbBjV#>UWu-62LI@Ks%;w4ED`hTKVra$X6|J` zx7WSjASAKBy}WrcOttXT0wXNz2rf>Z&FwKMRsA z`zMO89d*9mZ-*1L;!eC>n-8O&a#T9&V987HSecwSs1VXs$D3aJN@zyT+4(VwLZcdSt^wM{|;byS^;2Kg>Lv zG$*BXe>gTvLzl#_C5HKUST=%^DZg?b6Csk~%6{K0l1X1ECDNEmIhSc!!!6ssV7`qp zl7j{{k5cGm$L>CoK*hnX3N`E=4KzU47`)OSSuE0%Bm2q;WF-b`Ignp1vWIk5Lfq6-cMa>d$q<8ouj_vzQ7Kf$na^$_Oq zFO-JUFlRMJihLOt@JO>@KnlZ}VVdFR;Ujv@1WkpMEDs)7Kq+_eloYYO@9h$ zlHHYRYNBFLyuS6GO2gLCjNQ^GMLsLqBC$P^UpX~=d;)MXU?9yXoVp3Tv*HKu;}Anlmpuoh1>^(^rMDoPT(_ ztVR}nMa4=-NdWGsZ#<+}!xn+&76?vDTJJ(OuscurYUH(km;4b4Svr!N%o0g*m)?}tV67Juu^-}KAd9{{0y@5TNQP)9+)d+Xv}=dIn& z6XCy3ya@Fg`fM=jH+c6-723OhJT~i02jNM6ng#;)u@%31z9f`HddQD_MnamjQ&0}A z?$_jf#b->*Oa3+o z1kV3R*IBO^NX9=@WKtzTfkCH+k2Bgi8pBedw;!ad`u3jf8r?rK2r_Z0O##s5*& zo8^H@LW;faMFY(PohZAf+ETa0@V&-`#KY)FX7K=-OuD7jgtFXJx=H`arzX z^mVC=!Z}P{eP*A5scu7_yrNhDRnVpM$q1(g63^CJ-Vl70yNA7TOwq|O+KJp}Sk>yE z^LCC(wR-whQBFR#fHa2DD6lPZNu1?)pKk~;U;-qP{IFD%a_*lFHv)k&z@+ecZO(y$ z>Vg~#av=6}=(~`n`AgB9QiOm=a~Sbf^AH~SOL-D({!~ zJ+pk2*b+zOnP%LaEbF$|P@>6L=b!VO(# z2lpkt*Q~{f8B8MLa8m=tLNvdsh&DlLJ8!Tz9~bo8+6Vi!3OJ`f)!*DyEhD)c8B!u0 zMq(XWZW`Rw!>(@=$_Nqh52q3^%o1ppu4$g?+Kwr%qKx3a#*#8pw*r|q2UO9W_Fnpq zBW6TX6emNc(=AN|{tQb(`0a1MfB%m94^-2>_ZNui;+4-T&CBM0xB@yu@c!07OW^j` zkkx;jvoU4tAhUfb`0oVp^9ln46L>6Jdy2qBzOhSM-9hI7uj1awDpPCt^6LH%aS+if zx&61-=<@!zy@AU+D>V)omw?gf@^j7q)%QPNPd_V@C_a9 za1G*hVllLbzo{M$B&c`DTe`Zxd8Pd-DpOnw66fesJ?d-I+zu?C3f~TJKZ9WnmnHU* z!JP#m7AH4$D>gFCtO(`6C?P*hCU6h>^j}I()hH+6`85$=Ow zmNR$=6$@GmHfr4ZIjCVxNqs?y=!(=fu1AP#NAH(0dEwV+x~*^`^8-+Df#M5HF(pWlJ#N|@{*tKBnXuWgX!v|J5 zgwWBexXeLH-;pC`+Xf`3>iRs88yN{sgP@xjGD>RAX&_X+#^Ga*MYy?ueT zJ^dRi?>pF&?*s1y#{N_G>;q2Gm?En;j10{;h4tFWh{(aIHy_SGs5=R=VtAHZ2pd5O zuql4tn)d-ES}B6IO=z`Q+P;Zj5f1;6J{=UgDePo75rC}C~u8N zZvT;TlI@D5_Aomoz7x2haD|1)0Z(=Nm6LT`%bhdnbpK4z;=!_|xr*b}5s!{9wM zzmmf}c})`X>sJ>2yv48|dDfTlqUGOEy7W!+lhu8dO})UVp?q3HO2*6n_m>672Z$6= zZl77yr~EjImCzk|TcX(d1o4GKJX+IdfV)rs{xx0Y#oMo*K)ec6IY|vBOj|3qu=D6# zA(Q&hlGrzOJsu^?^1?=C5xIyq3w|N!>`1ie9;J0`7M*2z=ut-fylG@32QtZd!%Z)S zJ?swgu*O-!q9ZW^VP6jU*ao;;WsJy1Fa@3&>9aq<6rq_hcD|k>E?V1RgN+JC|2e%N zpytzmGm9dw&)~3SOdnuYJ1ZY!45(NzCz8K*B2ZSFSw$(op2DKlF~*OSSH$EPGvyxl z4>Mg=@UT? zy5;4>`E3z&BwUut73;1bU}#NXS3=Ki-xJ5gj4uQU-@SVUk(Z=kEng6yE%)bb@b(<5 zQd9UEXom>f6@hy;mQohZvjl7=_tG8oh{PqY>)bbvlpar+N}(*~WGd@sERJ_H=H1$8 zYuBhc*7L zSSjK(17R2%2MHw2Bmo`K163Mjm9i4Sl3$x9NRMA?k1Upyr%ok?x}oi2V9s<@!1Kb->mRK` zEXAPuO{F;&p^f&cU=wS`X!h`k)syoCb}X2x()aZ6!~S5%EEg1SVodS78Ud1=y$5YP z4-)PO9Kc}x( zZVI>D10aI+bv^vOjn4J56DQwpzPpoj@^Z!eDqzH}g%|-O)JVG`IC0+l_d5Y_7#=j@ z`j6$$SBCAlw*2WIg(84;aAPa{-rwD|f$+ckM*met4qW>C7eomE7XKR$_Xe-L`M|<_y!y41M0aeV;F?lm zIRe8x%z4;_<)G+Y1FXz}lgE7Gk(lg>-?gSgw222**8cHK^|YXi(;X;LM@ZywyjKwj z0na98;i5R*@D`U6tX?(ThyMgw1+D;2)h| zQ6&aBRC>0Rn#&MKAbqlGk02eXa!U3fq7Pv(1B`R#84bm12(efWJ!2hxJ_vIaBgaKt zK-tF+az|Y33VQDTYcY>YS!Ve$&yU(bdFp-6prELBj5i9CF95MHOivvd$2-0kzcIhg z%v`C|`)!U``w4|LoWpdCp{ilgeCebL8i3_p!5be4h_%H?@Ik3lYO?v1J8E*z+CIMi z*n{qX=SY3e_{Mt8lq0fAahU!R2BRl}S|Y0@>sD@=#N|d1gtGMdE#ERn96oLmSEZlD zBO(w7`&W=YB9KwlS@Aj&a=_3VnkGN`Rug$Rl&b9dkmG`qz^#^mySGEJouLVEb&EUu z=i7=SSDL~IhQ(K20yyi1S3`{|dV!VUttM%#nz)+o;lgzW3Lwlrt_I^=b?DDrC~t41 zaV8cIc@{LO##F`R4xz!Br$O~9-m*NYd){&~lG9M3()`SZ$M^=37~eqV^-|w5_fOQ? zt9o{##%NYyfC(r`aq5H^7I^%Cj1OO2rokG3mHQ?>k2aigxE=7qkr(;nS=L5g4RPt< zN9Y-%kzj;p&0s>9;k|o5j^zyW_op6(A}d}jDsy=RJmmS&Sh4hqbj~RT%idsk+*4+D zs2(J7l)rSS`abu0erqL<_a$NO)G)Pabqu56M4ISkha7vXe9h7MvtA>WChVrMHry$8 z&p43+S(covtm4toQEBwzTgbD+1vG506Q_$)8~q;17LCc?Z%`VGy(fI^I45Rf*<(4| ztcIfKg!zEk0q`MZ&4;8O(fddwjO;VS~q9lKATLMiJt15ll05~f(nwnj}YlV#hq7MQIeXL zvRHfX6gb;P)j2#Bi9@QNae8W6Njmi6erxlEyPecm2Oo@OfnL5i*wbtz@f53D*TT1G z{S+U+QOr2~z(b0)yYii?id1{m49lZqn+@??8nXz@V|V5Qiv^^VmsUR2tzNYUaFrtP z5?egnBY|0IP(XA-xsHAT9RF^S_Fs*@mZE&>AlA_)`ji(HX!Y5Ym?6;~dyHkNE{`sA z!L1Tzvj-Pa&DM{yfQsNHf*LQE{YLz$CuWadA&w>e><6VRKKPUf>~iB3KB9^JwOnxSo*{JtDFg+hdc8%N?a6)HQ0^4PoI#X6T(hvVwtm zpd5o3tAFK}Z*x*nRv!7+YPyV>OKYq)!fhl4lxIAq;EPfpR7V~fB z9}3w&;&k-XDXR-lu8L1D8V9ij3&1fwt`0#q=&nJX%H2VDc;Z_J_ zfhVrKqe{J1Tz1rXZ2NmK=Qzfgq#nY(5&qlZRw<{95(_Iddvb5Qm~8$MjcGlL`6S7h z&X&X7HXDf-ClE|9TyZ&T6C}!=sh3Ga;_XNzrH3~B{8sws9@TS(MiyJ53A+jLX= zhsRn00y4J8vrIl&t}^muaExUtV<0sN0BkLgKaKJB#dB*A()MtmAVttcw|7`{Zj)Es zcN3rNvoBMr9DZ>s-4YVHkT8$18#=F!z#YAxdG_s7QzdbXJrC~5vVX&kHjWRkMV$JT zYeBaKP;a(Su#!MrPt6cWw(rE;rH6*Joshoid2+fXyyY_(&^4=8ER$T9n+9Yb!Bgxn zLN_u+-6n~Cde}6(tfEKOI#^kD|E1-B*C_RpHv{YUP@XTkxGoN{A{n+$@SGbaHx!+8u`43Fn z;mn}fw-sk1%Oph_LgwtU#F7>m+5Q2F-`Zsc7n(MEqF^O7#GMMzf(AbQ9Dsngz=*%$ zs)@s&?@Hyelf|hCbyo+~@7(XC2RY*6`a)J+hjVaH9qqaUiuI29NCyfDren*6a=53T zLX4E-5cWxkn(2_r$!T?dD7PPb+RJm(b<$LqNgsdF6R)oYx)@f4Iz44^a=a;ez$nE- zJBm~;@DfyB`TSY^22-wR<#Mq5qL<)l1cdiq4Lzh+JUp<7Yt1E{mBNtBF%v=*jrxi+ z#*Hm4E!mcYf8T0G0e;px81DHndTsR?3d)tt%B;LJu$CvtnW8*mRp&ZWn^3dg2iDEj z_P##$Z7bJ3X~3NOJ!EuQ_2*~M5$GEzYcTYEVgJ7$OgBAWRQ`IL`894=SZS}UH>7{zx6HZeBX22ZMHmAA`X!$u4 z6~?Ad<-_%!j}UGGxEKsB;1g_%$1H(}%Jmnp)2eU6y|LP}dM@Q@)ftwFT6hB-wDPRl z1ALSn;m1DG@NS3@YXteZk9jMMVgZG3csz>gmei(R352<7B=TUkrQ}&mV*=ySD)IWv z%WG)*!y){bL_t z=VPW;_u%YeJQ2th&tFv?qgghv&H|NpI4t`xwlnt*zo)_kMN$T3GX1J??aNga3vYX2P9r zV?iWJl9osqW~_IA@B7Az>RTC1cw6VFg7`EB4##1|B>mWh*iOKT|%8^j(EL; zdp~8R(!cRv_Tt|F$g(y`NzO;Y@tjC%Z}?0h7e(ja*RpH<4QDo|i_x3Y<+2#Y*IIo` zJnu4OoFj_K%d|Z42yVKGxX9@hC9++ge*4cf#WWUva8l0?DI2j_Iz!JC!>|0f$aVz} z=MdqAd1@|*=XL*C_d;plda8Y zybfM4SXZY@bwCK2;=>BD$klj?=10fE#^g;yEl?&Tf$>+t)dGItb5qIWbLmH~u|`M$ z2=u}ys{`wB3B+i&Ui3e?PMdr!6(UHSRDe)L)u~+W;bfR{*PP&Y>_gh)DIsdHv#BZO z6N6!x8oc_t1~0l%)=%B=BC)+aM!`E~%dI`9q|i%lH@aJ|7m+{_PVK?hnR%N1${kdx z6SBCjN74|OOIm*5atIzd@rC|x9$l$5*L=Y z;^EPCX6SJ%T~-fpdocNC} z?mZJi_f$xWig4!m6Sj7l{_47e^y%sT80?~8Xv59Q%lq4|guR{0@U0(pGy8Mvdtbdv zzy0S7k_ZNOeV=~||3`{!F9J``y7*l%?v#IivpIO(C5N%^nRr+mvV{yjQ%+9*11ynM z>Gc0#F+g9RtOUvtu(_=^?`>FLANcTkN?!w?{|tT#3VjG_7F}4$tTe zl1!a+vCx!IKs50f#-&QO#G@mh6|$s09*nhbKarfnhpqM@Bu2$PQ@SFn@_(^)K06+$zo#7posSv z(-;?`lkOiM)h1pD%6_1DOmV5Cr<$CaE^zEetKt#8ReM1k9m~2XeZqLk%FH4 zJMuSa|KsKD@$V-oDmU&~^4ccAwZW@GHMp9H+10o|{!dxeGc1oWM>K$aGM91&k9X8Z zR^J@8L-Dr6_S7pzebvA>YOX%RQsX1)+$TT4Fa0rX-!Kp4L@-sdxFgwo8r}g#N2wJu zWzQico5Z|bu~C@%CK_Hz_=D$0+5*BJM)-bZr3GDC@QpN+&XkbshKekP#f_$GXdN-q2}!U(d98z1aa_c%9fNarME4;( z6f^r_d8<{`d5DRNmtUnWKMM00g^$qUJZLw5i2K-G56t-6=tG{C+2YI#j?D)n%S23> z4&Vmt@)*5BdWQov)sVHkRJT}aPRf)}4YP!u9PTQ!R;y}b7d8G9MKLzH!^3f@&5g*3 z#g$dYt4+GO%7`7%z5;X|X)0TSPd}UnXC=c^M-LrB7dwAbC`vW_F~=B)kY5^%fl+cL zF2@f+4{jL$xvhsy@QjDQU#wR#OS9}U>64n77MbhYyY8ymYtXNZxb-x9t$m|+;O3jg z|A-M+2ATpuQv#$&+~8<{x#e+C5>ZDfJ@2YawQGGy$_-tu4qvJ6D;a#83ps~nM2{c9 zi9Us^7)7ym@Yu6WG_^fo*EfX0m^SEJ2@H9O$EHSE%d7c*fvDiUBg?aV&&8+aY=1g` z24c~*7^#)@nY@7c-74&+Ma8ahBG)3?Wz6K=quG?xSEeb4p0)>i`V&sK%NXkjDT}`% z+s}l4k)-AvKk~AgauU!gr$bS!-l}ZvUmNRHjFd5I0;HHgSDW&}$4ghP=*xUMZ?t{m z;maH?j9fX9)kB%xMg(^H{q%2ZTQ|KikFkTH=ek^uFTqpiMpj1O&M2;ZLsP?4S6!i0tn$>k zKXD^j1zvbjDX>ZG`)+C3QXlebSRf2Ek_k zf<7}dFy!7puFKbL@|F#ME7a@X$carv$$m@V#bx036Mh;qy>si|@0tJF)buw0bFjy} zxU>tF%k^IrO?Tc{creW}RkM$HD`eF#gI$L?;pp(&&bRQ6_b8%NQ207q zFZB4T@t6{ufyIcJ;q1pi3nylr_vPz#RQ{hP&$2$w|eQE*kh=(gjC_8V=#e>3B;)75fZVe6qwmxFIO3$S3 zg#J}SCU*=c%Pwk=uUN>7v@UKMEjM=Am1O3Etsa2Lip(%`?dw^lS>B#Xe7w*Oc!d>SVWq1;%abYqQu_U&oR;*)z8u7|7f^o*&Om$|?wS%7_XP z&amucgQ^bWEFPC@@Q=|Coxyx6V2&LAEMxds_H?uITTsGkVAS%rQ+oeT_{J@;(HSlR z=xiMOh`lflaw|(IaP%NfnljlAkUVvus&yh!WYA7egn3R| zUH_cj$0ZO@WD>V6OjSzAF#S!~xCp0Mmm^bH)%5dGHsz8?`CkChlHro;UOPSYjB(d+ z{lu-d8<%#MRxiKZ^Q7@Fyx2>bnx5te{MNKtLZ&&Y;SEqsC?VuyZrlNHdxg6>y_g_K z&q*n5s;@_+@O3(3L*}W?wWHJT*grF-jg>$2e@3KDs~iuDe}+8x;L6X{sD#V$Cn$d0 z$s05@hS!iC`I6%x($0Z6V;Q2|(>6<^_;9EZZWEun8Gg~ji`-%2@WD?pxx%)@9k<&E zyx>P6)eT8}Qm$>!v=I6#OMwW6E>MQOs3MXm_$5V8j=-L5`X2FA_qW%y(NBDb zuSkB>WY~gMM;OcTl>6Pz5=qy}{i;$;CyMUi_1S5D?l*&1X~6=7_gdd(tY?Zm? zlTO&vjY60CqVKc7+7e|;E&@pRFz50G%Dt$V&-)`3f1yvAto9|agvf-Du~HIq-clnc z#$zuQ*6GPQp7glKnI~m(t9Fx-g=zaN$rX9vD2;|;?nSDJppRFTNd_w(I>f9NGhyX0 zcPi&XYEa?>%-eg{&)X{rQl*Rpk=Et-H-4HOLShc~eOam*qzgLOLO+L?R9u`Ab2ath z8(LR2XG$Yz%V@gFO`l4}{xT~`eeHbeu$V}GjxuKAbr(_l#(T9VC6b2{<|8(P`Em<-`!dS*q zQN*AU5<@7v84^>3-}CwYT>a&`iiUa4`?>G?eZPL+lmn@7?V9YKHtE_@mfldXL$#=j zbG&T-Zg*6(x7?*jZLE4X{`=Jton)zJ ze0{OP%qhpqd-&hjq<6&PQY7F@-2n@=%fn4;%aMQPy(8CVM>e{cZVMyz^abyT)sERF z!9E~c)c^a<0w?AA+|PZW1uD1^v`yQnH2`ID;%7KEA23zX>4eA);nC%upVMYqYYe7G z=Jk^PrIpd<*w`flo5;W38%>ex{gLae`SH3^?fT7)#lzQSy-MQdTw$Z-i_y}-Hsb42f92~+me0oY*A4!?E@dgE5%z5R2se8B7pLAp|(Ts+SQB6_4552o?R~>3DuiE+p{Xxf>=?CiV zgOcFYPu~<|BlX&fOZ_NCzQUr1DTfZ6j(St4;XQZ?9Eg7( zDSE9}ZT?8;aKMejo=lL|M7E+U!w z#<|kOYF3F?r5eJrj}O=fmzkpcZwz>#JE$B&vP7>raleHnB=kZ&8!kgpH(w1$gWmH% ziB{jDPNuil#Q0*dRN)V>xbB znRsIN2Yo$6>2U5#g>$8!26udKAR0g4F(w1|*6Co+#gp_E@P^;xM{nP*kBmIe4+iY< z=DpGjW>|92?JswPI=?XQ^lB=HT;n+F9Y2iR1UT^R zxP!YOa}exiLEpR9`g%0mFc=_ILk}n~_QURivz?-_?uT1}w@86kRb>FQkLT{4vnj)Y z#nQrDc#jebt~1exAQHV&?wF1v4h~?StKa5%U)F{YoaA9&bsWc6Ib$256m{?|Z(_;& z8BDWMSYj)PE|0&GFHGGAE#w!?oXX3@VM;pEGgUHsK$FhDh1n7Kzmx~@o!i)#D_CMF(COZ(lh&95%O7Sr{FLC&HZ_NRZ{g@dh>$$hSF%hxP`_@-x0+ERF$dkT?603VaCG-B3 z9|CCu?!~E-gil-6o9?S`Pg zptf?7Bt27E*4!{Ml0XvcxC0Q5DTF~Cg2Os!l4fO4O;wWy_X6%}=dhp*cj4k?mi`DnDBt{hMpa9*imKo-)$uNa(0y$3pGay?$G+GD zIo*?j31-M!we({GLXi5DqvtL-eR5cRcLKz5LtoSQ+^?IC2Z;;nmjt|RRMhXBV-ne5 zgeHbWgw<(i)(0()H8&4}!%stNLl()&jNcAD68G|s@LlDCCK%u+5D>n`B#t-&hkDsN za_q#vwWfbpodM#fHKPZtg0GCRBC;pxsX2npIB{wAA*>=5TZWV(vj1LCun6x($cq+W$!*- z*jIA6xfQ2y*Ex>w^^KEf zcX$bjfeQVO$0VrUA%if}puTL`o_}nuNa^RfImZhrSPi4nwxm9b=&m2i{I`nff=g2( z3O__XTd%EP$nX7sU~a4Sn`b?VzDt+IUX2v*2(pSTAuusAy^j5txFFlk;J^I4QEStL!RC2PZg$qVn<3+Wi4vR{WJ%Hw=`(U#16wTEpNQl@@ntnKW(ea0CH z$pM$xJ2{byAG*=Ph=v!lb-$GP^fh)ODS8bT=Hf^+_rZOl)Q9FmNsy z1K9)>ph?T5uSD}rh_po+OMa&nQMoR_m@A(AScI!t2C-y)R3cF*+}mADQQ2gJmQu(x zNon#%)ZWn!E=ZNn(=VDfdlX>Smdtz^-A$U|o9moN4tpao^kTm@4XnWy4W(1}_u9V` z!?7=GjxrePM*Nht&#RTNh3J*RQ_}B!teC`$w|-z4;tfRNGU=Lpir?NUrp1GJ^asov zErebxsE@RTp_kq*8&CY^Xm-fGbrn!dSgTzCe>#S|<^0I6pyWHF{y-5OJorEdy?;BB zM0XA#a_OPl9`e4f&&i0prjlEC75PRqk;5f`M7GJMs;7_`=zA{JF(V9A7V4(>qE+&BhV$SN^q+KMe);V@bMEA z%D3g|=_1NxW=R?8Ye`+GT!vjcgTY0i#15jU^ASXJ7rY!rdj41;)dc(3|1`yGkXAJ_ zTgSTLgT|9)x}on5enf@f^h2N+q9Oh-t*_27{O_-yvLUsXw8Fmj1stocpnh8p(aK3E zys)I9A5N{xzOXf_Cj-dmeW@L<YEE!C3cWLoCb)JR|Z0U{|s4?9T+#Af;XQ+; zfWTkv2Q%c+&t7-$lip&W3_9tLse1buYp9OoFKPYw%0pgj^WJ1oc65uucMaCvT5{5) zjUn}LhQK*>07*Vfl&YXmz|isK2y*BaseB8VE7@5cLU7a7!&{J*D&-23zuYhCIH&lU zF;#spsyG-IWUjYX6 z%oA4pDbBEB#~#>CYN$5N&h$BTs*ORqhVL>(JMFs$QVt=Bex46q4nMinDc^H^vi>Q+ z%YirbP(zSf_tKvao$KJ^LgeochjWHX6`|Mbu6R@*ohn#UzczA}zgY;oJWNc~uuQWP z704&|*|QxTn6%C(=X=Nt3%`FybA`(8hjfH-Jw7Mx67((Go zzbd-5)_yNJi6^wS7W$R!*#Q>sd^r;7k`w5asso4!U7fv0bm7zK9Z>uj>$Nn(HN|SU zfLK;$E|Co(=?8u3+cGGH0+3NGDXr)K4HeF2$0pwH#gEOY(LdM3?vCt7p+K=XVP)Kb zpy&!g7r1=;C)2aslO&y$s08gKG1kB^FV{9+?r2^v!>FjNC8_U{B*04X-FIPZJXNqh zHS-z!<#j)+-TJ82xAO`gkI4~<_Y8T*#7FOK2h#kxd?WOwsLWGg+vL;eR(7@^Q6>85A&GYDYCl5F zWrxNlufbEF7O!e=itgX|Gwz)rNHhZiuEHOnV2z*Gl1%6s1;j{xNIY35I3nmo#hFGx z)f?ML+d8==dV1lVPtdZ*vLsCK+!CxM0kDy!vmsTFDc%KECCfpP7ngUuR#7?Q)=c`A zBg2(=y+jnm5mM7FSBr9(>#wq^2}h5|@gCfl)ImGode+)=4_KXz4O2wYG|cU*>WVC& zs1+2oEvPDu%@BUjFxDJ9DVSOf9NMK>emKkP*K8Qo^q{-^N|09dRgmsb%JSM7+@5EM zPdRJjFsmG|$WzJ5?$AZey9a6EOPzw_s*+~hNbU`;U;j!0PAYsl>!(7$)|vNxlWS^g2Zq;U*6I}4sY`!b3++A6tEhnPdxNp^%Gu^$zcC<*WqAgWD< zaXu90M1-Vyn_yK`IL%5wdgehDvrn6vlz05N>gDd)Q6Hl-!J75|G{Vb6dVKNlAl z|0B$R>ix1+XmV+wd-gx0d$~FE31FlZ#1|aW*JqDA2bj1gs)2aoh(%9Y z*wVxEqKKGGln!5h<+Z3Jpn`2N?5g!LQ5bkFbh{Rek|+2iG1rr$Z`|j-|EfyilF;$w zBWP6gHn*TC_5D!qW#ega7XHns?$3`52TOV&*SSaLHfWxZfALpKAsbJ1YubR^YuBJlfqNEkhWu#c2#A6-iwyTK}M>D z%^l;~8%`VD^I@tjqe>5F>r#43D5Hm9m)7fe-Y3)@x(4|Aqa|*I(9Xs@D+ zPR>YVlSUalT2x23f5?07Tj)MX8P^1hlY)5L=W18$U2wau!N*tnmo!Em_X8tz;nfiM zXbO@+2sQ}U%5Z4YHB{#&J{~;5Yf#qEw`zS9QW8Vr$J6}w z9)b_t?)dr2OMU7_07vbE5}%}<&6`EAw%o1l$lwFk^hNN$SZa1-Yn}z}tQ!T?A_WI+ z8CT7oI>FaGzC0bI(Pf>Toh@#Ob=P=^J;>uxg0W|Z{{@cm&PD}r6uom4i@|V*u)C%D z2!l7@fJjgqgX)U<4_jAx0=%8d9_V`;{8VIIbw)U~!@WGkQf!Aphp=(85}Di=&>(-G zUKS3@0r*&r>J8p(vwU)$*W^;|1${J|7p}d4^W4w%dd+X!x1z zVQW*d17amwQkk6(R-|V^p{&4}i)MvAOSIp^g}(K*wW2-Iq@2jr5a349FDXQUzMc@T zl9Gp$!4a3GKxbZINTb(aik+0CBoyp;L0cOXhyKy%UUJC+k(y96%k z>@M$6_L2_|6vnxRe-LWtwlyk?l^$zu6T2NY$gwhR`)JRtQAZCo!%jyC4L#oaZe9Ew zN~)M~lzT^Vsn; zQCA@geK&mh*DMV7ASsXf`XPlLM|!d!5{af?Wi#p_Vfab<05+kj&*s47jmC%LUvHD9 z(vIgxD?lGhC8X}ilr~H2z>A3YqPPI?Mw;oC{G0OIDkCnltrOo@-uzz z+)>5S5Tj+bZaD7+b;r^x`Rks>({m2SXG5>z<9s+qcPIznI65GRl{5ay46Ee~&g}-` z`UBS0>|C1T-jzhpF)f^a_muqW>gJ!wWq{Ni{@V%c^l2r&7qwNk1jUPHtjnDAvs}b& z`GmoXt0&Hb_~d60&|4z4x?!MgXR-4K$)akALO2lUl`}*MYt4Y92T-uzb6y#%RAuLbvRT^I+%;DDp4IK)!k?2IK<&=tuHHWXmXyVPWfkA;ondX@m66 z$dE+i>P&HlSz}F86VVutFD{!E`$&8!n_sD+NTrq+kZ*#W_p@=NsBB9F3^*7hIn!p3 z9GdrNexNzr#N2a>zs<=R_^Aec&GGu?FQ<);&DO}xCiZe|pAb&uqcJ98{uUT91cF;YcVqGlTl2Hrh#M6B2h@l~m#32LVhS2?Qv76cg| zHol6u)^S1lx+tieN)d*926ZJ|@U&e9oGm+qps?TZbDrr9u$Lf}B8?r`s9dV$ zz4)DwtP}=v_b00A7<=S4Oa)Ihw4N%NH9ni_{weq#i4cAc%byr0r&)KO>lvJ^A`9E0kJfxvBgC%?PX-U4!bEDWvu{SCtG!?tzO=o!1tx@h0Sw8TBk1{ zJhE{Pw+2F;!1-md**zNEIv20vqYP|zeooNdFv0*~kjw*rutWQ5t3#_Rn$fK-q(3kEi^eKx(ES@4!Skqzm5OYtk6Pq)0D z7|Q;AJUWh)CKAF<5{3`_QR3?>Wz?5QKrhLF$|cML4GFp#Pp58fj7TK3s^o;b<5jB9 z;8QX|C^vr`^kDeaHvo#!^r}z8W&L@eK=e_mMyY-FUX7)m^XgVL_uJO*+u8gL;!JdA zT;SyV#~~kPv)LW~8J1#R3HjuA7j2v*SSZhXdE^~X5K01|1>k9HvH*~%xU3P;g@gZ% zt~dSJw>dV;ZrqiWG5#kt@^30^>uE6RA~LgFC*M(Th>8Sxb~59h=Uu_C$(-4ad97YR z7CHq@-NK(T9=^IAXToiWvN11P8($it(N8i45@nGP6+BJzW|07dxZ>Za8&pqZ>Z>4@ zk3emTjm3f7hD#+ThjY#G6X#3F!h@JPyHYZPX#tyPPGC34#9OEC2`((nq@pm?;)x0RN3s2 zZ)w5L8dYM_L@jJ>T2AIyp7bbDV{Ly!i!8t%tiDSUQ-XB`iDz_?+^<7f?RnJ1G>XM> zL`W04a*mPt4ARk_*68jTePr1V^24jkv;9sD!AldJL1B)@e;2%TQOYYy+#?EMFqb;q z%hXgu1e$M=Ui4YEw;B^q;r72*N6*Et-F(9Yag(X9u`bR>2WhEGea`OTG(`FsZS`d5yR>c*qT zI~p6Ch(t&aU7Px#LyX;wj+8Wops$#yC=7H9c`HYfyU|*x4j-Hh*YV{zU%|8Kmcnpo zgLqmI>GxNwUwjby(OyO#ddy9=!e(H5M?~;dEi0etIn5V22O9M#c{#<%Az4$Sx zR96hlbaIq&ieY=9wo~T}S4+T}74#(lUOeL2 zCEDlVcwuT~>>XgKFo7m1C5WdDY$KjIf+(N2>au&#d84q^{47cC7Gvn6b?;N|*HJP^ zS5GHcgDOSIc|S50!@(J4eF!(`htT?Fm+uv2njRLtNAExEnDXv*jLU02NJ(;O+CN>I zM)2H`zq@cKG&0=pk)JK#Nai#~{sV1N$B@bT*=+^_=#`>-Mz3v+MXhXYJk32j^<5^I za6{=H$lf0}-qrC+b#&VQGe~n^{6iyff&8GkDORE&j>NSWx|n*GWOp}3`4Njiqw75t4hfAg}zmBT)=7}(ddfpe{Z+7TEW z5@J05iP?GpSK+k->f8-YWYm&fOJs5{=TsBcHWsIE*7CYw-P>-~UJ~dD(~xWm{{tSL zRrbZ-c0hjF6j8lY_v4muoPye6U|llSEe-7^9=C4kLdnFj^L^o&%Lry<9-rf%3?g|v^18W4by#LxY7%6#{)aOAn|G$wxpGFO1CrQm*o*d&3xv~vlo>$(VxKjtc{+Lp% z3PmyShr-?P2W)N94neB;U2rjr#h*!udq!41y$hDL?6pj$1bhOI5=m>2qpkwy$|q3o z<`idC{fN@r0ro$04Wga7q8_0oz3%sTCoiVFtAlhNNrRv#o+i-j{FD{n#$rju*2dRV z-+g=8P^wBM*GvfB4ytpnkw}IA(-`>`bF#AwPINAXZJx>b_W}Hwo!$q6ur&Z{9;=6{ zJ?9KNpTr4pL+$z~$(`sl(=+Wq=JKM-Qs3r0==ZMftIH?L9fm)&VDpDVOZJ#>L5`p+ z)k-SnmT&9ryVS8n8e!*e%N;>^mQ)rrzUJ^o;VT?o(!ECDvFyw%$%AcG#`U_LWT`a20@1co^^n)+!EM) zsQ8G{F8DPMRLNFZc~Hm;&82p;?%tk)5@Q*RA`&%FBGp~=*D^Q_e`^12+ED5DRBYyf z7&TH~n3@aPi$4BVL%nx+`5r*h`q8|YC`X^2FCb|hR^$Ft+qvLRr!i-)K#;PEx3iJa zcl5jMkaO8?;>8a0`e4*YlIwL+9Ldq9mw$VYSJ%zM(32-0btE-zC)cv)-%PbL2Et_! zE^jdKW3B@&vU1-qy~E+wPENL@oPwV=f3&eHB|XR}~|`1Jqp_jEB?@uZqCq{$}WQzgb)G1u_{1dB=PkE;D8GPc&l963jX~|88Yg6}8c<=VnvTuKt8O z&RF-VY{6Z?iI=S;40bX#BnER2#jA)bQtuQt}$A9$ur)lu9{!Z-M ztl59!u>=;ET|gE-rJmEB&|mla+bb@e;v~PA76Wz9RKuF)$CGEzeoCT*h&Q;u8#M3+JtL4bxl@8 zJ9<7oMU0AES}wCb$qs{tLBU((hT!Jh-@lYXE{IqLFjJ%8o}i+la*jgL{rsyZ!6;b# z%vISI(MvWLzG!c3Yx?uA7qo3RIjyh0n=krTw%HM!{d9Ucq{tuk z9G6SMqA^c5c8`EHaGM(-xoB zbtD&g?)Dyko@~A6i*ywCrx$yF%YMkZTj|i##ltXy?OLEOnj+l2fYf|AAkiErbL2et z*~CYKC-Z;sGqcj5Cybtmh-3LJb-O1r{X7Y7{xNZ9ZCp$)8Ov~UmCww)zUXPf(3gBm z%DfiotpNc|>)UU5wIP0z6RL1L39?i(@d z)3uwL8nrc!gk|}~I24FJJL@^S*9AY_<}Tju(-YdgwpbI)c8uY<-MCe>EFodGuHAV_ zqin~xs_Sdoiy^vlH9$K)Qo{K^AW6=S28zXm;_l4!-8gyX#}^H^x5+4v#*oH=kmw{x z$1~mXEcSrw+lKqG7`qn+Pj+$p#z0Xl3~B9_6WFa;;|)3y8NBJ5{G(@Ku<5VhNW>=V z65UCmtm)q~U-62em0^8;h^B;`9-U&5^QuyK0}tq-X%+NpYIK_yTFH5Yb7ePN2q4~@ zVeyA{4Itc6(lOU}|GA?F3!u@wQjd2+Ts(q$@x1kP`WNBAJq|I1X}1icb5m^2h-WJGcAqva>*g=H(q#pBiAA5<(cc8 zMs(@Vor1Bis4O3HTRTv6zGAYppoXLqu&G&_;FFrZ9r}3t+GHX3_<{T{Ofb5!B3qgF z|J%V$-C~z8lTBgNw~Wa1HHQ6_V65Q zSS65S+`qQA_RpTgE4c-%aL@>&Mrbu>3G_Gyr5A*5&V*?>Nu_4uhW?&7p9aV_ z74KqiJn)H$84h()%%WJmw`|Y!o(#7AX5Cw#)eS(*4@tg!h^t67LB^cOjscjP^vS>P ziuH{@WTaTG9E&LuvKUmT>dOSjqzM{LFPbTR4kS3R@p=H+6VGc62n?fPTcT*RzVx`- z&|fI(o(Yb}>Bl#_MT zZv}+G zQ+mh|DNca|ytYROehW!Yc~Tq}m4MC6X@ptBan<2)`-$Mj z;Kg2zZZ$BE>xs)C4*?NV0xZ7su_6kFL%V~4(^RYFG#T93)U>%U%I%mPH|5~yH+;0x z&=w#y8*AxnVCaur>SSB@zC7PknF+4#s>{P84Qu5b(>SmL=~ztX(28bE4HcxR?VSvU zxs`FjEz)NRNEQQ~@In3CS@Dh(HFFGfMG0rEgI+yZ5nbsC9z{xK*b;LY#Pa}B2)fWW zBas8HrkaQkk@nCPZw2BX+u$MtE92Z0KP_!_hYMO z_{l)Fe0lhArj@aoDZDC7@|B5)D&L#^g~~3I0{=7Qad^qK_RB30LOc{{;Wzh zKzB+74brymBWg#4L~2OhANpeNHp_~bE+$EH!*BXR*@yZMTDi)}2OHPin02W)+QZ+7eY+uOZ4bya(9U~wa$tJZe5 zftwq4R6L5r*clgrRBj1Kzr1-`wYOn1Q(Y9X6oQ13s4Ujjom-$?NAe%YmQ+OnwrT7< zc(^HUIBl+PPX7&-2&`0lM&ET7oy)WM>Q?UU$ikgS1KxoMQWbmf_80*%F-28 zqKK}=kfhZo?!!rOh#q$u#a{)NOEu+Tgz;~KbTH7{A$vaoboUL?lqsw6{vpXM3a1ge z9qv}0uul^aiKCPp7`yz*Sv@H2)(TS?LRDcqD|kS2$NR0&4Q&322YirE8ma99u3}g^ z-#@L;^4`NNQdlc(@w5G|D01=wVERg2R$9shyO!alRWn)=iUE%OJa*`?i65sQaLaeD zWcnRtFba=_uzR17KU;2#QkxuJb8AW;_;?`y_t-=uXM^#W61nkC5ri8n!bTz%?>pU? z>}5uRsCIuZ?`1H3f5aj&m-+(c0bG-^6_RVsqnhu8?Et(ru2reRxm}GdySiK?Y*7O? zr;VY+U6|L_@!(R|-4P%Xld+aT7<3-*nf^T*5=i{*$K2 z=v0d3Tc-9Ky2}n@Y>!JsSXkwG8P_{HEhp;ehuvXOfRRnM12@IR?a(iciA+%t4K82} zyP54b@a!ypFy1zXe%WV&wHPBOKiCd`cv-ble*;Nw1wU*StUJasrAMqJm5Gh3=7LGa zvHzRBLtVf9!AXzwzMq+Xr)FpCa`mw8jhaG90q3!Hv;DIHs?563_l5RH?E{GcFRBEp zTLT=47k0fCgB#u`@qrYm<&Njzh%>`Z)?dm7TjL`_LcT&`F}J|(`($Abjo@d9_nH9X zpoEY`!aHU{X6Kx5@Hy;(eZ+2eYSoU@@UqGN7+sJ1B=c`HLYg7&XkS_|W-ojbS78yO z-Bu!Yny=X%tA;_d&-lSa~DZ=8Ml1RKy>1!e%?FVmL8S6ViI_1BI%Ru>2WiL6| zML5Yaq~zq!T{zr=5})8C+pmi!%+K!**cQzU5{an=du0liz`GtFj|KB0K-&OSu&fD8 znZxyV3f(|pAb7GJkAsBM{C<#^ty7l;`@q~$&$b{lUiM{O42@ipxS1k?NX3K1kC-y9 zS;?%YWpqhHW&wn&CgMn!>orJiKV3AlOf!8Q`!h6FhDZLpw|~D@A|Y_tX3tZn&DH+0 z;jlmr$=QJL&5iJ;%OD!$19XwD?gWsz($v=GoUH0Qts#?J<%KUwBHd^NGEUnfh+1-* z|12yd?yRFPpF?LZz!tqE9}0;?cu0zmBegsxz3yb_2*U@Xhu2y!%5y``)gaS&6vsxt zWyqnGMrT@Xa6uXv+|HkU>Gm~ioo#B9Hc!dP{MI1wrNMV+_@|3Vd{U=;P*GKO*og|T zwkI`w0lktptUej;4KeE9?tv0nW&OppMkzat^ue3NI6f%j8R;dkkdFEzH;vJjC`c)X z4>~PjW$b5+=#8gYih-q_xE0mo!WJi0D#lfRWX8JG(dp`4-tT=MNW!1I|M{qj_h{l$ z!yhJ#Av50$u1Ilxxsg2?&kx7COf}#hukVACwgG4}%N|&xMR(|DxNYW5G;_MuNl(Si z;7{hQz6_<6VdA9&P>p&T4)^#=Xf=zPjyqoA^_V0UMF%1J=7@Mj66x7F^vk!saA+uP zD~b1}GS2t_k{~q>`nB>H^z%&=WX)mMxt)ud9y8Z zP4Rut45QWmy;(@y1={q_w3w2&A0kR6*?UTk|GcRtx#qOq>jbd0D@_BM;v^$+R#)a& zq_8z6EZk2JipD^M$e)EmutO$^M*|^aV8T8+?@)OE9$qVSPM#J%7sbS51{g-C5w26P zUG=*4ZV1{E!k(IARnkxYc4HnxZf}L%yQtqj2`##bDg^v#AVzBis#xP!n2j^M5Vr_2 z)A)OrG7i*R-`obpV-W$BU5Hmp{_Y1poT2ArVKBTj8O}bVL za&ar5gZ${%+5R%`_uFBMXB;h4SG=H97dSFM4cqUZ^x=H6Vy`bwH_)9hBsmLkyeSqb z--OGD5#$l1lQsG3zC&`TAyBOCnkedLJbL@J_ z^oi=emHgNF9@s59G6>s)OO)xTe7`NbLavdujhi^KV|n=MLv8URubxl?MS!K^jguLR zmbLO$vCo|ylB1z6M5;P&h;?$=eA z(M%(hu}7X?kIFVZY%;*?(YW7s9K1r;eL-zdaM+cJy}e9sxygSP563G@XCv0y07r6t z%L}iLSAS@6ZPZbgl$M5Jv{h(OPz}ZDa#{cKKOJGsxI;s&QjN-Y84xVE*s&FAvL5*;8nA_hsWZr_=1Fs8p50 zTa@Eq8J%9)C#k?T!Tt>DVDN5-JFIO^+5~a{xSQ9>1obbY+IN_HY9e`L{~~PuvHaO( zvHdml$Bq((`FS8 zPMUjr<=*qEBNG4?Ua|gPt+MuC_6DclletY`XmrXb(p*(s2+9K%>zu&?4<>1kPN;N@ z!$FAWCLgaZA93eea|jZ=HHsyHJm7^yWE16%Sk@H&QAHNgP#c?-vsZtjHCqg8j8Pvh z^rfWH)SRxli-q~q;|L@XH^w0)^KyKU9S@!e>D#z*5xub(C|9{+`K}NQXybU|wii7b z$13@n6P0C(pL6L9xnAvvBcGVHtrp{pioa2qJS&R%7%fw&asaUzPrUWCxb+Ox^Ux2M zm`p#D3lJ%oE$DWn@>uG(GJT-9WJMa#@v9YndyOU;q zbd4vPU}smQ8G|9<0o~Ve4_tAScj`q~Pjd3frb7*!oqWya0*~N&RWgx1{J3QOcTcna zdywH{8$@TMQ%aT-%`{^v?*b3#%!4(7bJWx2$^)hUOp{uz`)n|fRgfxOS#O*N0_Z;b zRD0vIADNEAl<8adfzy8N1%&Mo%Sj zXs&R{40aLhhB3s7gBs7edcv&e#|Hgsr~+@}es1(wVSf@MpE<2hOKDeCP}ARKXU;Te zYlb)N+?cH^2)<||c+h(=(5^e5{9t2r)#YUP+Mk|3-&q?U?t{IIWn$!JHOa`1#Eboz z>1_(g7zKCeyH=lBv#Pj6XWTEcw*s{;?sdsB(6v&*ORd_}g7y2zvf|nDhT(bHG2n3~*yP8|!6}%OKI7B^7CIgRP)6tWB&9 zho*yL{LlyQk;d@VMF-eU)_aiV*kwN}#f)r}-+8<~lp{cY>Kup3&GlYV`aSc&(y#ba zo)y}5@ypv>81{0;8y45y)AJ6rD_zofwiPiwtM}68T(a;^SeHa`Oqcof^Z__J-sTo~ z58`lz&W2-Ec$=((5)YjRZ~5K`dB%{HU(Fw$BMbX(g$i02x|yWFgtr!;DuWkq>hM&$ z;%J6n(8x@)F$>e2pmucvD=1^d(kUYBGA!8vDX`U~91-#DWXY|ltz9I(` zSlK-vpAxy;_^!!+7a_MD-<9v%Kfx4v(_(${440r;V(cZ^+BKbR=mVX@vU62|gmgb! z0AYE3D=LcgwP8Mx*U#zt7AVO-H1xv7h~;9Zbyh~xU`nit)5ao5L!`GK*uIQ@u}~A6 zy0wjq$EU?d&vbj0)!c7+^9Ry8BiOo9s?^}AS0q?GUI<^jMcUrN+;fK%c$fPr{OMH=gAam4 znZ#JRdBd*WGzt+J_KLl01jDXqC|q#Zg_x>?z6ecHkb|NMNk;V<*B(%`Pr38ZLlNoR z(l?CoR(gy<^wL-R{X3K^++_7!;84>7}|ACR1P!xH*vE|tm(4^Q#FUec<;4Y}@0;UEOwpgosLn@`UHFAu587Ix~lxnhj9(WI*f9Lha*8i|*q&%R}3^1!PiHmBr6%rKh6GDo)YikraU|ly{~>+&}>*<=%?Do#i^B^KvI(e5OkqUYuI~{?Fw@=#yn-yOj z`L8unL!g>be|Z{s!a*T_vPoCcV_bw}QTf(qBzb~C%Weh4A%T?kzPwNDd?#O~X9=OU zPJIN#;a-E^0skl$kz5#>3%n_6Wn5%%rlnz8jL~_6?b=yCe)vUS`W5;7=fF}Wb0oNM zXU^b?Y3T!!>F)b*yY%1ztNg76tFS`6=kH+^)k3oPA7E}V|5lG=cTf5r0fj?9@2hBs zE{<*aF%()B`qxPoFE9%|dCHi32)!;cXptPmcL2+gh+4nMC!fP2O6P2zo-3)+pZmzz(3Am>xW1S2@< zcHA|y{QZEvY>()zBwm+?;b;&^g^w53RvF%gZ#k1{swq!m=_U{0Tdu4wsrw_ci0Wy$ zN(uTC{%+n&x|0=O55uLpD7lTfM;Z}!MMYR!l-n5B)kh+%0hbzactuXGdO@}ND0M?! z-A%%CD`ZBGo(5G1bjo=*yM7y;IJrz_7A}u`su*6hz_*lhy5x{oZtf2&IgM9+P5@QI zhYK(HyRm8_m!;LA@CD@F()SU!$souFpfmiG8&@?ALUN0>F|S?Z!IGI5_~erbR{!H6 z&3fu}(QQPPJdzSOemJG0u2g{Rmu;Co8JuGl+%im$GHjU-O7$xZEvE;w!^>_BuYa9z zL`5)A%yGqzn|`WbAEr?b{*Z`~{t#jEQHihHZ1$CWYft6<*T}C*1#CVIXk)w?18qfp z#%gC;`i?-AbwU;@GY#|@YC8`~e)9Y}*3wva2K}-s(E2cQNsC8io-T8XvZbD*m4HJ| zAB0}o!k=!-jQNLF$akDrRE_^~V~|#OeYTDoYW$%A=eJiHaZ*8Fe6{zyF~(9$z~*ZR zL6rqyEj9Qn?P1RqeD+iF=bSR?HxM1#pSL_b$SRm6h{Rkf$n=w3S`OJ0)jaBR_XkVS z>=}s!u9K4;6?CgcCXJBU!gK=xienwgpF-F?e?Xbxx>4B6%nJ1zgDd6n_tWW*9w3&4 z`=rX&&np5uc`Fj?p77jn>%+iHLW8vVHwI_lFnEp8y7@iZE}-(K(+5u8q1rp91^>8B z>Id=s5^flK1YteChvkALv?qRbo(oj1zC&GUSkM%M%lS&Gf-~RyzIbWaf;j_S>@Ob3 z7L69VM@p!U&7d9uaA}KU`KW}Y@v(l(c5IUlTG9>qyg3y=9#)UdKj$$_J0BVz67rcK z5%bhzTa-!caH!$)su)eB3PLV5bAV|O%9dDqLcJq`(C!0<)DLP%#~e%KEyG+M2{}Km zD%?sVw2mH{tar&NI2)=dnV^;Vv;b0>2C1=uie&t{%BO&dfBo*o zW%I(jf2DVSG_ibkm-T9_d4u$dRS1*ZSK!~HmXX(x)E1hoFocJ&#)r`b&L&6&CzO*B z!kASr5tGRcQCY;5?2FzAC4UDkY(wO{OMEh9ZRX~LOZ-YrG!Mi7U&S?2y7>keXYKPO&}}8$Ri5iBuBP z{IB=U1!v98&~tv#=Qo|ls^z{1P{1ii(@lDhqV0$1V342$%DkN7nlZ1MyWsOBUp2Du z#CkgKXM8uUC6_47-50Qj|L_IsWaQhU#Av;#dd z{q|XTs^_b|)eQfebCB~~<@eb;vSTQ}@ESIJ|8v3-c3Us4+R%z4Fsr_LgeuVUvE`XQ zOqPLFKgTV6^G4+DsPWn)aUqML{E3ZlMvaxNt{+3vX9@Uw${#P6L3j`mL@U741Aa;Q z1p)_A>0r;H;1^_a#Y}b?S(~-o37BO5^NnC_luD}zdn zUV?CX>m!uALEuu*%o|IR>{~u}xN1ah`lEIN#vw4M-w0{9Xb75jT?>Mb=Fl7MT;>c< zShTyw2R64qRK*@(G<-Bc7s&EM4T6w~GLGvtJxl$uN#)44cmtU zv9GjI7veKX=og0J$^{N?zgRn9h4=nZ$ zwa(2jsn2o6M^&(e4jwwg>`=;2@Wfsv>Di^s{+k0`@(Gps3`b>u0o`d#qXy88r0rx%jpOt{`bZAsM$oOoF&x=dR!Ohau}12}Y&s!;dLo64U0Z z39{hgzd4NKAojU4ypKE;8TkkP z_6)mfb_iwgjNtZ!Ac6Zrv7v<@!G#iKWEnU!_t{80M^AwMrINF9wG4RM*X5_7E;!-_;vSO5xJ?$Re7@81`j zj2)$59)6?KHTGhS!Ld=_cE2K0`Qxwb6MrK>AT;?OJn!QR$B5P5)7!KkA3-KG{9hZu zpL7lO%#@LFI2UFv)6YVg-Q+ z@lNZFWN$K6VIOq{gOi(LxO-BR8H~7Ks2& z>}mCQJvqP=kK9}bWvjXg@WyX?kHONer;9rWG7Fl;Tz!wg(rMOz+{i~AY}R#TsdtIS^px>!d_Z&Zttd|1C8AKd^cC=v1M|Z&slQaP(<}E=tiL_5c|PkFTLDUQ z^#@TGv*$N$Ae8i!@81g2^EB9i^`VI&P%K5R58;AFUXq`Fr6u<@FsUIoI3eg17!rSx zb7?(@yL4eZ{IX-pY-;dg7HvCvZ42m6IuxOmzF$Bj7yAT4>--A_|XUq4Qac_$a%uOx(1_m6{k6*U?ij>lVDO(g?kU^XM0?)U8V)t9r!p8 za1%$vG&4aB>m%sNd?^KXaJ|Dhv7GxL_1>DXWvc+NIR39Nn?0P%l7RozmLcs#>T4B;<1geDA-92xA(qP33-pNBPByl- z=zb(1e(uaKdbbv&rrZ$3b{?IugCGh1NQNoQZr!6%K_;iY7yJ{~wIB%{J;TuH?T2$G zCuTG5{;e-tr+dfChJ&ZOm(eiKj`F)a?=|-Mw^&``lnfm6l#&F(Q%`hYTsC3hHcO0y z$G?wxQZ9Ga&Q|d(v`}6%lDY+Mvdd)1>Z#BL6oeNQ2PFcswKa0XL}MN=fvVPnUG@e9 zmE0VwHf4hZTDGdSQ0ptADCLvzdzujDZ0gsQVjZpktZ_Tc1>IJFk7{5~e0*2tkx6HU z_j|8uJ62i_#UPMx*~tRKONFbO9PrQLRqe>NpN7c$^nLL32cv6_)c4^*&i>#&spUIg=9-nB8Gz3qj4LT2euo3V^QzmINFCEfXznTei5!6{lT;oqU$hVllpPCwc z{nKJV2wIxK(w&?IuN>X5jTUeZS8sWI_nKfW)PtViQXU=oHkVGx%eU?c)~y-Al_e!5 zjn%4DJbHE8bjWWZv#?vaL#!mle#t$pQ_O2PeA1`pG(jvbPRaEVPmVr9jZM}#%SonC zZeQvDVt7(_D+tRc?-|V=kAJCZeVdp0qEVWPUE@QY38OLNN+JNF=NTvPK_e$k+ z772tIWk_Enjno6+!pnTnWSI}aO*=tvt4(ifQMze!W6X#34D z+h*dYUM^Hz6xjeq!W<8sUs(Y-gn~ zN}OFst^WM>se=hn!>D+5W-gs^$ReHy}Gmv;b(zm3e~F<9P58l{N;2X-MB*)Odq=5T7L6c~!&MLNAAg$X)AvDQI*|US3xAA}e80b*tyF=J zwm!}{`zyA_lc}U%+x}|v@rbcQ;I&cAx3)Gz^pLEvNV1(=?!*AC#LCPRy6S=W`l?oY zY5nEwnxg-3=fHab6!|>sB2sJ#Sv{^E=>XxpP~*)kg1j?&&+2RP^Jo&Yw2swyy+e$D zik>b8#n%{ruJalTj8LESt{MEK-GBHWI0P*J^J3_n@ngm+RE?Y}ur@mi>THP(O!_JF zXHx0*GSGL(_Cq`FfZ>aqD`Or(v6aO~{o{C=mmL=f)DKkn*A*MdR~TCz(Fe?J;{)`F zb8@gM1`8HcVlo6JGIeV~3srs#dk)AZPr$B*kco=W3a`Gcp1R7HW|ZFpbL7a)AV!r@ zj6;okTGk`x#DDswRX&6EuWlW3744_K|Jp;95J929X@T1|Gwx2G&W*8Zdb#r64Mo`C zgvNYE*d#OYIpKRVKUX|v&LLIYNX8{kzg>a{VymEVFwhF%kB~QpRO2OrqE>nxj)(9W z6_lXKxP_1gndyhJu)8`ok5%op{|oKqA=t#yEstlmy6I6EFe7X)b(y8qh>26!RA z%&Q)krRjb5URC9S{}CK8U88CwA5S7}xTlr5<`eJSzz)fI%N|1U%EKu{>iKAEv!yb0 z0l!MecZWEh+;qF^S#c(MYgPY7I9)@Eim;tQSUF{757U=hJ?vUTyciPTvP27 z9uX#NkQuiOQ|%64Nu*sNuFEf}cxHm-UJcH9ldPnj(x&i+4V%ahV6v;f>r%iD{q+H1 ze)FIj7{vyh$AOmUeNDZqR7JIluAlTCOI9QE006nHKXzQ3U>5CwYGjemiP5EDNf-{Q zqj_uIh?ngh*Kt?z2N=7a9P)G&;dnQdT@!mQhx~9a+{$L}XVjjmCJ{9luDfjbEIgs` zTAWg01&Q{Zl2Nga>F81e5CY2DRMiMeyX5CHZm9je>~v)^`Oo%Vs0%pBIX1#AADl(7 z${(!<&CrsMC~qte;-p4@(YZV}M6`;|32>1sD z$wM;pZKoa(JbdO?NqeZT?^TW@s+N8@ zd+d-Kq3;pS7vuKPNC8nI)&J6%+;r*)<$}lZfNtJ;_~cEqrFx@B#813?A?>I z;z@w!_H+s?ZNZE_<5-7S4+vG4H+E}W1xfjnF9<512n$;MtNg|{b3#3qGdQ3TCq(<7 z*8M%u`D8*LKWb2fIpHc@AS!2NOa4mpFowSYBDUaTLRieaAoIKrG;qb<1Fa4lT|#1v z;HTrLl_P_pO>ctZ%PArkPA2kzP~Tn!6gu#3&<#w*9k;T3qMAn_{lSUQ>0k81Fzt`2 zb`ED}R-nzGUX3JqSN;kJFY=fiq67)?LGf+Qfu9FBDo1Lms44D^ebv^FPP^i?tA8ng zLyKj9huB3I6y_)OHFy{Fx#2|eND4xb3K|rpRQp%I7$HCaU0(Q(!HRH4)WT}D6mHFV zzh$s8Z@~4-r#gIpp$kNB;BK=)@n*;;pb!g-zkq(#SY)h%pl72}z<%29jv2aN5x4Vw z*my`St_xfhE&9PruP$_F8esW9uKG7nIFulWXQ{q_FGuszDC_aTd1f>Lf&#P;oY({H zuvJFRPqKniYToQNL^3CP^>$p2K6_+7w6?V-1Ao0>8(tPW!4Ku@z$0J=6`oz_{uhE? z7lltQx-F@DAMJwPE*^(JsKMIixWL*@_y|_+D+J}vWS~~AXt6iH?b4{Yhur%Jv(KUY zQqSK)1HGU=U-%aRzwy!z8Jg{Em{>TFS_Mv9Ra+<%cne$McW4?U}^3>AKxJs_mx_oPLSA5NdwSX5WfUnv2#6y)Egj{vWSAzSJlh`LJHL> z#&~&qkYMr?OzsGT^ipG|+NuLQ@ST{;p_hYR{J4>VDVQni{Cx(2RXY~G03dt%wxVMr zP`P?6$LeiOJ(iBx0c@<*#jlMFay^3*l1jR2*gjY<0RaA*K#Ha)xem`0m? zAM8(|{VM>CuLR>JhvTl?*mF4Q>(h@2(VkyK8$@^w_VOLMp(xmFEhO{qzLxiWx`pPu zF00=!jrKn=8K4d7Uy1LPQxalJE+oiI!muejlwc7f>VgyMOgS6Z#al4ba2LY=;M>m$ zaZk%%2Rn#>rxQMICtvE**{ymG_Xw{S~zx?}_K=>tfgRaevuO&r@dmPYh|+uDnQt>?M`389b~-o4uuwv9yL`pKc^XTTVnr8IJ&~Sp25Ogm++7@(~`R% zN4nX^|I2m3iW@HB>e0|so~Dy^y8}*x#=rsnS7d^`8seEQJAL!&<9z#6f<-G;VuBg| zp8_S+cbs;@bl>#f?F-QX{L|jv7c>2Z)d!qkrS3ns`1LUu1bqYJ6XEC+AJywMZK8CFNIhJ{0mH^NV2QCCODa9(yP7j@e>ml4acAJlwJGlr_XZ%*4c zDUBc7gC&?@Q533yF|_r%DsU{@4E;l8+^m%*5 zv&kX@BFkm!zz}afM@L^t<>7}y_~hhYA!-4J?a-0Ahu`UJq6On$eVG@|S;?!v(ue{8 zq-Oq&N`>T8hoIM`gE<$Ejs^NdmZo_C0n=Ffz=+Xsj!qCWc^NZC$Grj!k8{h>7leeK zC}79vG3;N9RC_PVxM&+8U16M41VYFd%&wV6I_4_vdM57aainUZckGl+?1!OQ+SPlm zK0Cj>anM4N&b$MhgyG32zRRE!7CuzW_h;)hh29GcTiogJ+2Ru4-2!321>h$so%~lC zJ1Zp_W$`;ay<^a~ix2ueTD(=YJGJ|(lM}QpR(5Cfwzzj|nwZR;iH*pAk?(it=1l=` zS~f5A`LrO2L|jGwmKG78JOj~4eix_D_ROg+Q==26@RiVy`+Gh8J~;nW4C;4Rc@T6O z`$vfy3PH*rnophNgNKx1Z>e^&C|hen*CqX+H%mw0(;x6(j*mY6Sw9XwIQ_pp3jy&> z1Qo1!dKTJHvgp%)>8|F}03A55XaT3Vmk7q4MLJZ}0@?Gz&T?H8EPPEeWoMWqBy zJSU9Yy@}iBxrDp>U%^@fo4&LMlCRc+zox#vF^ zC=|_v=I>|H za~VTr!+|7DQDU$!#TuYkB1LVFI)90ynfJ zy>)nZi1=xrPeaha%G%o6a=K!WWkWa!OjQEhT_HYv5{1K6_p;@72 ziNRQ#IpYaHOF66mXE0RN6d5Sr=CedEPn<&^dq$D|Z~wQ;koxn>fG+q0i$a0>(@^Of z2(M21%63VyN!DfR8DZS0mLd>xSkFg)yDW+L=lW>ShC#o;1##ujYF!idOIyvI(6&_%zrbU%?WqO43!+C-BG*I zKx_rx3&Hu$^378a6l66l=joo8fa$3)we7dDUH4Tqc}tPzGH{7N5b2!+*xb7BbBE+F6B3Ph(k%nb<((c`rD^BIz@G}51!cQF;N5CopG(m^ES z=^A)WRx9?;YL@sjYxNBnJyu zerXT0ZU>u_3Sc^Y8p*gF$OL)s6KG>(u#I0AZbjFHe$y_P`Gi{OvehUjNnW!^)q=g# zY)=9k>BcqW$U$34({UXTokLDx--5aa`Dl?SD%P+drk1r5m!yxIE2pqRDn-WV^o#ey zCv9tqt%qe?cxqXw+4U*%Pv8OOlyk}MqOrV`Ki+AsJLqhCt@!m4nb+EEWII5!Y6K`( z7yq~=Uo*R8dqdpSsWZd>#vZ(;o*@!w+#VhFW>1_6K9u3V7YU=bQ0Mt!RZ}TfF;S#) zyM%Zrcq`a!ap-b(5iv_@5(KwzDb%rQe2!OL>Rt?K7&+`Yc1bi&Df8!>-G6U>jIP8+ z{*7H9`g0mYKf?b08eX06k6gPNxoOun9HnMZ*65NoYvIWQEkM9`Z$dxBlZWZhZ{l>6DQ;ggw4BwN`5}RErb3~0NW-{<~syWhsI+jg539;qF;V&WRMa_ zf>p)K94LQt`the{k*l6#T%WAX1Yo!!2vH6YRTIYkm|6)ssN*>t`ZC5bVsbEV66SSJdCwUn>4KTYfx&nX^a8*$OLE8;jX97e zLp>l&)SPu9@Gy_fhrOJlOLf!chaM0{t|17%NqSuWh=i*XwSl=1o^>n0X)Z`dWY8ZB zE;Xs~F097Oa;U+dK&f~h(UpPadl6!re_zKlik8GKV+-8(wBKt>3a+^B0g$RQs59{z z^KYGS`q~@88n?2*B&TiAhzH7eMo|oe?{EeMwGo6`iJ#mu3GyV0hiH)Ac-=)nF{g0o zdqfjt4&p3k$UocUTAz$>>%6IUXIy*I>j|k;Bp4w|pgA2IweYnqlO$Gk3es#{n)vnHB?nE8zJAx%VIJMUd3!B4{BKj5RZP5}2MS*{aPD&N|1ha*!-r9eQdEa*8lXHG zS-lzcFak@=+nIznwBVIG;8%5oz9*QuB6kI4xutF$Z6ncatWZc#2|ew zdZ(~!Jo`mN={4%*Jp+V(6;TgCoJ>(sqhRU;x(c0Tpm5`L-I|f6=!FN&Fd?TFCp66Eglm-L4b5@F@7&DA5YZeS@@v z5#0QX{(f{pp8ruD0)c1)2#)VSh^gUyC<}~>p4!@;T@2HW++;?sP?}hj(Rs$swBy#} z$i+5k@EXz64%<20v~oCd_QcL8AY46;coVtov$b!Q8+ziJ9G)|@5Bd%&4qHuA;Jmmo zk0}EYb3m2J8~FZl`G2AB>D{xetC34Sy?VReS@WYmuA$G4>9=dOj(mHfP;euV4e!hj z#SPQqZef>@2IeyX_+})mmhH7qe`$b- z30A!k`ycCEnwWfJ6HyS@#Q;ug5^29#&6s(m>T0!}nZ`E@q^HE^x=8GY#D)=jFQucy z!SPcgD9oxT#Gfj^uSVa1Z3)5=?h!c*a5+p#rSp=q9s2F}W9L&ipkJX@uR_&VolQAn zEf(+rRtvnz-IBwisYw25ITtPV?1&%K?kmIWQ3RdhSE{g)%4qd}tqvF8c~OHq;(A|? z68o7q!{+JScOiLHu!p~5kFGK2>%;Ka(5=LKE+S7}U8lb88){@|z6)&>IeJ0beNc(- z@SgC1GWx{ra}Gmukb0n<#Vs+8IG5b1EY7Ikb;^k7&Eo|#@~|o%`mSw@B4y7LKLwY7 z4uE+u7?u;`-<6HpFE#p57xdeO*|;$M&&c2AP?&Flf0MvSN(S%Js=w4ro#x@!ig}jK z)E)6i;^_#Fk1~b6FQ0bCS-9aA)lg^cPr#q%;HLv1hGcUf7Xi0st^kvp!^uSjGHm)J zhEVxZ0c+pMES=Di^hC`4Z))a+JX0q>lD+H>KuVZ0&-NbCR((YaBXh})bR~5K$Xj27!Fn8cgSNK2(tsYy@qgX)|`P0aVPP(tq7E{+u{7--bfAu41 z>W*EX-!pjOyUkJTL7-;Is+aq-1u_q_cl_FyTyJF*zUcQT&Ca+HALS}EI8vSA&!}dE zYF3y2*#cIl^^LVPwZOBSCp686Hs(C;_Dj`m9?&@RCw3?-h}$D{CroN=X;8;4PWzcS zTpiM8|8bTJqO(Pv9s~Q0k=IKO2W_9T9||=)8_RBGHeBO{R_!oBW5>j{!9ly5$d

w^Tez&;x;Gw^Di6D?weE?NxnFI7 zn1Cm3h14z;%)IxqK&o)qF>Bl5M_sV?hC-g^n)AidG2ct);yh&@bhR{_d6?sGUMNbZV2)oI-OHl1UF&g@)%aK2Z z>1SdtnvKmGJbj>X>Q+)#dagi`GV(2TlU++xDW=!xfYkCOVc}2Jr%`cFLp;8y6vTuM zeO$+IqKc+jUl>nl4O8bi%U%`TP$8k@s|NQ2N**fV+UZGVW@U=C4+D&Oo`-lGyWsMj z6=H3BQYq2W+%Ze~=qw>MAD;fW_#idrN1bx$#O}|?ZO6#Xl~@qYKV3`nUfqh1SgPE5 zYcaIe_ICjsO`85zWN%Nu&>M?fq1QyRY8-b?J4XET2#TCKy{joBWs%67;^2g8vEZP1 zDw0b&>-_W40p<6rnHuVW zR=D(Rsv(!uQ*SZ!xq6OEr(5htqGFKGs7?XLX^uuYx!wzLN=|EoXB1C`4`y)9#aeD-ugWPCxb@4BffLN_D!;E(rTLc@}#~tf!4FocUD8s%3x>q z;}7ikfy-DE#LlMgo{4Z*U5W$OFWjc+DQd$hA#dn%mOUz*5D)mm^@!S z85htoq4na&CI>R}B~Gb0u!1w1pe%|K{4p=D_HW@S$CErcC+OXibdhF1)sDQH6zdpS z(pRXk+36OdCWM95nwyoQ+$i%IUiZutGI&ZZsf?{m7=-!4xmW;Q^_bBO*yx}x%0ag8H>#>S@XUE^In4JI{^ajp;W2H3zS~F=zk0S)@+5ry_&LOl zG#+kR>vcHcf=9!7#_iI1kqiqp%M(aU$Qh)nYQ)8Rss8$jV+8?BPowsDJ6lGUPVvw2 zPE2WfU0ced_-$V3${l_6R^RCSy1D3y{$lbsnw?m5YcmSchV}``mI&NERscUE|H)8I zP6lB|E{KpwXqBKxKPfuoCZD?0wV!;+w%45t;&z>jo=Oh?+3*JPIlg2K-d$4lj331d zhe4_+V8nh&IM!4=P9|z?0yBS9?RFB+{TP%pOVt!s8+#@ryYHb|k-_;GoYYR1$rX8> zu5E8uyOiHYS}_|Au9Ff>tG5QwT_dG5uXN4*xqx*9^I5<{2IB%g{o_ zIaE204u}@v}4&H$(Y>&7r(95 zrnNU)Bd;8H?{)?KKGF1U_crI|>W?lvHpr7>yW+%~r9IF?-HC>KD@Mme*v0lfJ_MiQ zgk;X*0uxJX{L2a{d~Ao`xc1a3e(h98eYg-y`s3;uW5d!;ql1Z6TmvN+B;Y$uh=bMF z;0K+f+u*oY(s1~e?AWRJd;i`R9UN-*Rwnv2Dv zJ_uFkpRRJQ@k-(Wb1OD29!L&8;wsTXq)EO$I8)_CGMQclUVk=#J-EB=)v<(;woB&`Wrr!3m^~IHfZy=C!WmQt5FV=gaMd#s3)XD%sDOlW<(=~@f zeeWG1ZVXoobAaz}mQmE-@VRa_eYGx*R&7{GJjYNJEAx6|L7EVtp7s2G7zSR>e^axEV@s)Dbr)yN)eP`%-MvRA0{E`^VbeWqdF6p<6PER<^*u%v`YdXqe_YRjh!$ zXoZ|_rf!WimQ|hVMq%u_N!nkl^lrCh8jIDLE&+QrN#{-;&bha)bc!kW+YVdN8cLt|+w}xwxgEZ2B>ZErUeV&|g9L(*H6k%CL! zT_53eCTrd@an}&z=lMz%eo0OP*1qz`Ji`1HL_p3_31X_v+Xp*bhE;hIe9?rvX+<`k z@JTFUf_@-}DtRDpcjO!MTH3;dH!mALLGrzv#N-i^C!(lsQJ$A`-$H#5qUzNcjx^(# zq1&jwvq;G4WHkkw=^q)2HpYkfj1BmbtyF~k8tgoLDY~c&Wh8<+QEB7qShe{bXPoQ- z1FNszEc#*S$>$7g*();LRr_0zwJOS4340pY$OA86uLi@p@)+}r_t&PwE@@5dR(zbb zzJbB&z93r9-o~W{$72Gs3%iTz!#1cMfI4%!;>u|_By2s;Jx!{!fTg$iT%r>IU1~KA`|CBy*#t3lj2^?T|N4 z@hbms4O487^qBT;ZDZ%Z+|`7B@uB@HWS;gIR$V=SWcbz!zPZTAj6<#5p#eOnq|om( zZj(VTg{89eOOs@hpMFa`!2Taxs97`eq#`|G5J50^nWh818*Bx{pdSZNf(AIoSJ}`^ zs^xkC&m z4~I&l^zmg}8ZS-6Erjwou!3>duAYINVrtt|(1%A9|erGo$mXZ0y!SKEq7*{vJX zeiW4oOlS;44o-EwD*YnlqAgyywjq4g1bIWxMh({Ju`VGCvS4rf`_Bg;fUz7Kt4RU} zJi;{lQhfw#*b-!l$vTmL#_m$+5>1T9LXdl7#4bF4EOXiY|VN>ONpdq)&+UVes?RcufHxcun|Ry()p zk(yn%xK9c`B%|i==i9i#I~%^&(T04lM?AxCPh9xk~ zEGb>iitx##cxm}WR2=l+QeMW~W?KbCk0%}j_kbcpNWE(^B{*VdYjfMS*E5z;FY3@) z;3!oU{5Qj)^Q6HY)1Q5R@kYN8}Q)Q-r92Y~q7%p_zZ4q-t$_|(n+N#4c zaCfCo&;54ooY;Fw7#IADcG7xJ%e5{PRmBvhi8Y9DocZvKw8vA!)G?39-pwrU%jgi( zxp`Ld+tH#*yZ9DGpu#+g74Z`rpDE1AB}tfE=^fWD_#7mHCd}cC`_C$H|JFaHLby-( za0qqHUg&0&hsUWcDwPgi{7LRgT=XY?&I{q$PUUaeMWrmiDDXzReE*O^Qur{j@mgcx znBP*`(RlA-YqPIa$8Ik2n~%k1Es?W)@Hsy!86)3P=Yto##t$WFcMMz=Nj46Dgj(1F z#P#K(C9rY(a_`z`QO)E`M(6flz_R~l_eP7ho!@p=`Umj60O1ug{Y2jhi|pes9LvT! zlK{dYF7j*SzoG1%ui0BOf;)>0%gEi`b@1(yBA28&0l6# zWUQY#9?bgLyYV6YRpe!}Wa;tQG>^=q=WF7gQq>g8K=?UHEM zQwbD@?`8j_!9%lZ56M8Ck|${LdO`KM!AG%Zh1b!@BObSx*FZM#b&&`Zf7ci}S)&_P zO!ywP<>HJJ1q67V+f#iApz6%xnNe4G=ER|Z$yD*waYS z8zl3SCL6@ZCh8;B294L(tUL;08iH;o5(X&g=t6>0oO)~h1);bm?vCxBvyi~=iQ|t5 zohycaI7{!UoWb+m#%YzasltdvQUoVz?qv_bGmW5D%#g!N)?LYOb|91XiDkSE;+HYv zhUB@oBJvES+-7WJjZUG;dx1dp)$O}}L0s>gyUv@GnGv`MChe#x(?{uferE1_40R1g zSF!@S5Ad7PK8N&Wd`ZJ#^Xd6`|5fkwI8oon`d1zDoFUvdK8(Nnf4Qj4Z)I7jR^Agd zoePIt<;|R&bmcDy_M$~%j6RBZyf#;YzeFibH<%WkL$R%ny#I~QO1;xpdKM8q*@`|wR9K(zm5{?z9dG=})^{5q}BqHCeg)NHo zkvrb4V{Yxm++%&;@nMjQ1-np^D*%@E&&l<(yYUzx=QWE$)Qk7<@OG3D#NrRMJP#WC z4|lgej?p)7tA#brU){bYEDw8u?CpqPaZ^?b84nHAr1(FS@3QJ0spk6lTPg0G%dz?V zp2wf<6UbX3)!cNy->~JZkJE=9N zZobDgfKNvAxWr6Q-!%Kj0XE*xuM-l&QZM8^lO2Ta$VjwY$qX#|O_%7i z9}xdSxqH5HIaCdOGuXqjv_|^7F}eQJ9%#dhfAv0UMl#6%vAMO`as77r%`wJA#1+P5 zRn52X_!X~7Z~&ZW@CM`PYf?RDCv=lNYp#mCcv6zhYFq!aKH9*1>t1$ZTWv&qC&2u@L?|k2hhUIu7!A{k$t6BP>=OE$z`yujq)m!Yz8hU4v!!3IXGZOTU}R3kg2!inONx;k}( zP4t>rmw5d>Ivk$Is^vA~s3z*FJ z5D2uhJ?5im^Gbnz3F>#5wR*k&#}w%F7xvBJJPH-gnMMrxA+=+2K}f(pcB4YychQ2A zMn|L!?P5dL20R&*BdiU$) z$hV7)URU$fyk7oT3eMtvUv)&lhZOEGxxJR{eY)xTn}3Un1)pEco(al)H*sv?>Jn?8 zL*X5PhBQ09T)VsOr8;lAX+dlDkLyVx$Y+hyw;s&2LRPcl>b{G^9ecqel6_a*Q~?Hq zYlC$wubCK|@I$UD(XU)nCMYsQo0$XS%2EHr277f^dK);>qFP7ytTz&N9E8=4bo8c!ERqHCu`oRoUJ#GAL9l(AE}k zEiaNToszx7T*3X1?$EUCS~I%#AZ+E$_Il)Yo1otNh}CpZCX1YUvztr&)3Vt%V!xxh zJNQ3>-~T3t+l!V>zfXW#qC{0qR?~OKt?7f*+3c;n-RY!=t^T>tU^8CdUj3I@ck}}m z*p-fL5I+sV$M4np11Kb2^h}PEP@%w(+P+y`{ts;CO6jC$P;;>s)-r7{Sp=w@ftJJy z$q?c#A6A0~Fa-mhQJKs3&{&|(ngsj4MR*XNs`%W{p4X>~-%ft3xY;|`N=99yVTOAv zaYOVw-{1fKJMMeD-p}Xbk=Uzo2?s=cK=Z^kjGjf#nR3`1lC3$v!@(bX)fcnC5J$i9F{-h=Eun5%)KME-prv-Cq|%a zdqwes5$`L9N7g^}-)Ly|dNSj4>@{us=HH zgCgH9%#UO+keO7(^@{M6RP9}tvoq(kv{qTu<=^ud$=3pY{K#N)9P_U#FYqC9ts-q`$lgn9QKM3^3sy$yNSX6$ z`#Uql^W06Iq$qV^2MRxVAJ8>PP#Ky`MAX5;B5uYU9Ey=AsV(~rid!Y2Y#cUY55DRs z`sFLHH&-rNmX(^H7Bg^Hz6+BR#7@8^TUZjle)t_dQmBY{(#PT9eZ{pV&-12!IYC`X z@Z!cL0?M*(sO*c<8i^D$s#(NBn%t}O=idPcD$l~HU2)-YAI?xrMoi&(X#t5Z<26i_ zMdMA$5Ki~k#B{Q;m7Z~C;OmV6#Raj*5nJ>#>Kyb;j!02~OZJXX02m z!BcZ2f6cGSX*Kh0_4SO)UwYY9%~1|X4~Wx`yO&iT{{2b*+40h7hHq<)`^VJa)@tn* zdFJxW3L|x8G&RcdVRVG{LzYA-QWBhgTi6uhPfF(9rZh*geL4d69J!VO<;~}t4xn2} zNb*Y$W&PguScRM?qcO9n2`lw3(A)lPKXsDeNt*2;W;)s4F;C7YDWK>Q0U5NnlnId_ z8E>5{C!v1Nabcn=O5u#qFyD7fJd#(tq6hhjY3m@%;|LC`X_lY{ z(1%Y*c17uTaBdBlI!TAhuvF#w;0LL+ zaPsY+achEm=vZ_NfEIWxs{=g1Ja@y{x_XuHdDwP-M8y~h?tegob;MirydPv@%k$h& zsgD3iY$_oDlM^nhjJE|OIRx&yE$a3>;lUHrk^sL)1VZ~L3Vl!#XqRrGSbbL8(YXuH z-|nI7+rFfW>-xZIHbX`)|E6VSn zyQq=|r1~9B9s!ST{csXm$hUAJ5~e;3Uq$YDM0DunmHCc7pF4D|kCy$v(P(i`+&*QR zT1P*JCX9oJpCl=_sFzJsz}y3C21I5`Rux_Vt!+^pT-%m*UL=}sOL;`2uTEmnIimJ z9O)4gb7B6Ok|_cSABE9M)t2bnuUvkuFqbire-wFUb>v5G=4NHSZ-Jogus+#6ImIfY z+e(QxCOF+o10%e)qdOb&*sx2GrksaBX_T2*BK(AmEu2q)`R>y!`qV!RHz>y6p)4($ z4N&Jpe|^q;|JIn$n`qdv709PUbR(Tv8Q;-5Wuu1F9`@V~%(fYoKf4yW4-=o|A;&b4 zMvrb!h$e*M#a^GQ=@$c~Q#1Bf&Ty~QBCX}m#!?SdvNt0LbYrq$>6)Da&3iw+ZrQss z^pNoa9(&{^MN3noK77fRX%Tku-0r?+e|u&sdDkzRwJTcw&bL_>YqqJN6|urB-`eax z=Q3}-&z3s(wF-|uo@p~Jhc`=T+IUSbRB)IvTS&Y~>` z=5~kh$8T>9*|&?Zhf5{-FKNs==Bx5hg`ve0I3Y?p}8>^UV2p?=+UEMq5oszc-G7#dA$Px%G9gjiIcE zVVz?@j|^grdrB15voI7&?r~qM^;W&~@ZrOmhYzKT-d?sgjrU(U)0z3at)_8vE}QuG zW3|VQ=iA8eoh!}G5={ek^ylX~5kVxw!7DrDIQEyrVDz7M3GMA4Nh2yB6~qzG;U9(Q zU)h_^&lo!j(VKnIyR>uR0kpP3$VW@Z+CR3A>gKr+~B<6 zxqroZl3#h|J_^u;Nx#Ah8$bM5Q4z7=ib=*Lv`dO1U07?T_Czwj^~u49<7x`= zi%Z96ns%7xtbfg`OH&BhV+UzxznN_kL>a^J6CYxRqz#up|5Wf2{U>{l2T?Zdt&zk& zb)S~JQ;&B8{3X;EoWcp}v9;4*{{+lFMVVd0hFG7Xy3`GC-wbJ%CK4vdd)11xRa7L? zZqs{ zsXhL-ufJXhhbjxZjFwv6jv5eFs_bi{8&rH~^P#^TRgDnOzsV~2By&RN!ah?cS!!5M zXP(fI@f*3mDSdiClzQZg%H_FF=S$b>ZrvP+RKyZu5tv+87k#-BQZXONEEN(u!c3{m z^3I_o=6zfY)_$0N3Y*c=x{j1de+c@@6RH*iC?16YkwQi7hF@UU}9O&SuUB zK#Cr&O?RcRR2kLk(YK61Lbuj(q}!;z{)4gkESX^2Q+f%@%{ZwJB0Tv+*lGu2#8 z-VA2xa973BG~A{3aB#d)tAg>8<@j9;?Ef503!0c>cdPn(RV*AH1iqdye5e$?Hl=DY z?u#&KYWDzJ;v*_lPoJ5_^!mi>w`j1Mz1SW-hUOW!c+-EZdyoC7aCa5h(UG8CDiJ7SZA3^5K50 z$Jh(Ct0LcW+xE!Dn7jvO^F3(=2J{q4cI&rKn)}HMek^SsBk0?cUfG)iL#m3RGEVw{ zlnd*8M*)XWynx{>!V?NID$ehp@(@;)9hsfiyn6-~diB=3!0vTY&wCu&s!MiD8wupc z>_aJ)`t7-(GZVqZwdNk{#s7|fQ9wz?-gJ1-zW22 z5_xIrI&aP<5`3aR6)t51zDa3aN-xkjjjFWXtT)LMUmSs>t62p0K%479Q#4wM~ z_sAHxl*H@j_o3pqjJp?wP&FE$2qJG%6qE?mgOM{|%>^taCmaU6-{)shNG_RkjD4QwFuSQ-+VLMgCp#jr=+KedoWdvj^@<8{;Os z0UOuD!8OSg;m0+>?>_Q3@Z9P6biG27+(^zsp{yy`uO%64bRX@O>&n&8O3&ID=%25Q zJ+B2#^mI>iAIF<&*)RrHcXtalYw`;HugnLIta?UP-v~(88|1HVy;qpm7W-d3 zq3G_lKB@wBH~^hPm&+7otoP9+BL2kl{STK_AI0AA@b<3w*fibvM+cHa>+1N5oykOM z;fJ;Z9$v{_XSD(r(_>g6?Rz2Ur7m z(Kf1Qo)g1US|m*b9`o7ipJ=1$3h@5$m!qT7H4Qp`AlSrtc+q=sQ!B+-krpn<=OR4D zs~dFD+V#*^r7p=_&7pQ!y+JQQcAg$6yZ1n^8^6a8ObLcoT9z9gWTYS1sQ9queh@wz3&=aXGp_hX^UE7}B>rop+8Lw)u0WN%P_0i&pK8k$}PlVsp46GO^{?r;44~$8TcWRy_+n z3MF9J)PDSQ0S@aIzua&$(X-rK1AEA~Z@ZOlLOy{UfWa560BQ!^Hhn5Xs2%WIjF zcKd_`Xxa!3qM4q*#)#f9IQq?d2K#m6Pq5R)wgmY51w!cfWP5k($xa|{p^s;~>He(a z(jscN#2a~RQ9yNL(g=$MO5io>bRy~m?*6M|&(mvV z#=|CZZhjkPr&#WFPt zv}|limAZybpHFk+W{33c!+V@)ClRvt{+TW9d+>yw0hdx}!n%%8337d2P~tvNl9|yy z1X?9rYcI%gU%~!rvE#h&@T<79rk)mWY#{i;{F;lfb|>%G^GHQ-N}--vNib475_iQ` zmvEmc0?OJi7pzI{6C?#YRDWPJDX-1mFF!AffaNOpN%9r6P5pfUqdT>*CwUUQNf6Zb z3Vu_z)v(i208Zef?nSPMj$25aEA;0?<@9Z@b1w(erlOv#6|Co7YTjnjQg^lyXTx0b z=c`#4>#MexOIgkT24|u}LLUY+MI!$o$D+137kqa&7I4(M1u|Y?EwPh0egPA2EDZxn zanDfxXGYVpYOx}~3mwkt9C;0$){6(e{d!P-FBUgv8u)&r9P#V90Xq8V;uxlQ=}!cF zFpS=(%R9V3tQ_Y{j+;2n+@tGFn<|wB7Y6;K*T-eG*p2J!TW_Ynv~vR5!&S(FAFy?J zMy{{uqB4HK#_O)23rTCH%g>~`7-yrW-)?iXBpCKiHa(GXn~8TFGSAd zX`ssTIwfqMqICeZD`J>?@x}V+Rx_n1aqT*Q1*$|ag zEpFLv56NKTTu5pvsB0d83jqZ#GGkldiOokQPkD`B(`YikFlWTlHIFDsp(TMl%3~?) zUM_w9xE?(wb3l(tD9ou3XN5l#svFBKmsw)Asz}n*_m&V7y%zn_r;t1H=z!-;gNEWo zTw_h5e_W=Izfk(2gFm0MMe%~b(Iaw+YWs~kYN~Lul$Y$g#62;yr>v*DGf&)wWP0R< zHD4IOX94I|-izO@kj#WYtv%WCOHcFG&dBQ6uhPLhemUx-@7Bqk%_ro!Z(4Klb5J$h{6K`^m=BMQoqnD$0*B&xTQyba3q}1+B&!tAwv#0v>IKx;`JAXqo<=hGuctzcSPYso& zHi&1H0EpabCCg%QD?TN!*SK^|rWW#T*12uw`L-Y2svdxfAJ_ynyv?B~535*-rsQ1Z zj`^Y86$0gzJpJuY5Ft2LT@2TzZO?vXiUyyji^qJg$j+>^kuKt<@bpT?v^>pwyow^V zfUUW=)cxLUSFrkC@t%k={h-ACUXi7gqprb$7s|OuGBccgjBP9Dr_VhLV$4SfrXI|f z-0ETmYX7YD_I8iCG3<46ibqnBD-j=N=_s^nsL&)`)X5w;e&|1fo?Uux zt3;X-w6bLFNe7)3HH6+UU z-755eVM3B=MUaUILaiV(vqun3&d$~nv9My_==G}F@45TNJhGM;K~ADu*He|}nkt){ zq9>x}*cTBYT`I%3mGx9nW1|PiaVL}E+p+p72lOvWEvm7nkN=8ae?KKri*!sFx|(mW zoceuIN8yFfF){OmZ@JNF*Dn?* z1n?1LwFcMX%&=khm&B*0euj2X{T05a2@kHjr=*T2H8X)1UF(Y5exqL;SOxNYf~Uo- zU!=sM(gQfeDo!8YOV-_!Z~_4Y$#GmdAQdN)ENp<^XSDYDB>!{ieD}qv{)K>>?1{E1 zx1(UIZH|Z%>QNCY{_f-QD@jca*vC9VR>pY0lfBXj#5ecZ5Jnfn#f^F6BM4>YBJ>ys z<)}YDbfc@I*t~SWyGmVqFFNxTXp|7ivgdSlR>FXv9bb>KYrJKm8r5v$!8D-=L zs4rSc&=dmyT+qlgM4KT=eyNNTz+Q3~uF#F}* z_WM1PIg|qaiq$8nP11jU96(Mj*}Vb3?(|*>*eh%|e(J$<+Pq^^*!_jjfmQ^|+d}^~ zIlpkfD+FoF8@usGt(~EMQ2q1ci~eA4)|{{W!^_UjEQxbi@Xb2xq-wk8cI}q4!ah#3 z==D=z8)E23|>K*!J`*^Ro@Pe4dPT@(=L?l4tQxjR-h8h$|fqw=n1$iWHtFL|FUY zpjaKbeD#5g>Hz zir{JwUNR01q)q=f+{KILlk^Zre~-GB7eGF9)F|Iji3+84UHcEX$-!dowk?EUR=snN zc`ft(mP&z5d$Bj2>Xc7)7S&vEK98eP3lZY~>Q?f?-t>W!CckpbdTl6SN7 z^yTkFu)2G?eWO+|rc7meM3Aw5`X3ANrJ8uJ*qI}Uw_btTQVx=o#vON~aji>}T0&`P zD{kz%s1o0x_wUJb9_F^yp3*Cg_*Lw~f;}>im)8ia!Q#-)HZ%H<<$ijBj3%YX! z9JeK|KKr}1%W(etJNYRz5TL%VpYg=}d1>~DpoN6ZYQ&a(7WM5o{N?ydc<1}4Pv*^4 zHNOxM--D^65vrEH=>f;eK8;2~T!+kwGOsi)g$(raSrZJNQ9^%P_FLnqzq#G`4ff6J zHwbp-gyxp=y|P6>9x;?oQzOXjt8@uZ3|}&CBtmAM&QI^geDN&9o;^OVu2g;NfE5=| z^x*(lSwNxbvAzwlJ(;r9kv^dZfowt5$hT2jk6CQ;5b`>>_WQ2lz|$+xp+T&$j=x{y z4w%y(o$bk|-Z84|OHtxKI@~A(x_efbtLW=k^TfDn95>W~+zDMx6I8`(&V@@Qb^VX**JyPR6|0;(PT4gOh zq?dvqSZe%P-gYJMwRcd5Re%=iUMBA~Cep6Sik$goW5+e7MTH_bfn0f}c$fn9hQ_h|S2!yRF(Ck4naz5lu7nYHbOao3$OU+df) z^>#Ceq{GKregFqI86=>kubk`Q8w|k83%&ULp^@FBjWmQGYZ+SPnPnZ7bQ|3$oH=e4`-dQRhfx4g2>@i|lZ)S2CdCU0ufOJ0js#TVW?-zJt+2JtDY zKSLhu?3(#hB8iUF{bS2Dc?n$}yyWq95Uh*-GZTd*fScxv$~S5M{`~s&Yj>mo7FWAD za4vd{dG2qLbJO3^!Hr0E>96@`s=uynV`iM1@zq;@Ka8(F+xojSQhNFJQyL5S%<0C& zEiA7SwhbY^-#fn1Ui1-_9Yt{%-YfXof=w$YfO|W~{{xQ}g@t%+FMFT-&J^aVb6Chg zBM3}tO9#&Tap*f=dEfD%{<9G)7_EJ(3kX6(*f8IXA(6Q`JeESutMfmc?kx-U29w!l zr?It_HDi^udyfw~X(6gq&)*#p?3vDk=nMqdk(9t|0q7^)35)?`WInu{ZD`1&SzoU( zmji}e`qbUT&JRnL+>uUS%1~a} zrN9WtnqTF^4p%4LMZCPA9(#odz=?%9UJLNh^YE45KV&kx4uVN?n82Oa*eg`6->%=8 zl|odRl;TLb(*&1I=Bh&U(-?IR z(*pR)4gDJR?Po7<9hyDrXp}#1=pG5g7KgI5P}w#TwfE7rPp19LxQ(#~^aNjs%QVNI zhy7+~gCiJDhW5kr^V2x7@h@I&dvqIlJH`)7Ku$B3leqVFrj&!(e^8WJ3fdZc4Y&&oUY11e1zXVQft z!wz{TsK|f7&Xp5LC@D`?U~0EfC~8rqG4WZ7SWMg0zJ0(YV<=4sRG$V~x;X&LVfq2_ z?={j3{9=sS0>rEwFHlqp^SW;~zGh<%fyfh>SGa>Up@$DF)l2P`9>rlFxe}JpsTW+q zFKE($6KuTCsDmp8&X>ZTdExewnputjPnPb}x)B}}KoQ9%t1pD9n~&Hi(==R6anx&# z?4Rn-zH5Kmj7anM;c}GY42S>kJ-NqJRvLu#u#$V%VCGG z*sU<07^W-9vm8jn^z)k$38Jq(a~)4D$i;Mj>qf@M?8_Lq=WdSrLEVl>1SbTPWqx%8 zbObsF-ZABSZ6DMsR^Pi%`B*!bPj!KE44@Pry5l}0Xkc_|4Wj421c##}kiaq)CyV^y zPeuF_6C^R_e<8{0w6M)+hC<&jo2P7n{%%ZGNN!2@4{wGdKe#MRPf0B)XU~JNBN6qE z)0+J_>Q1dw)kT5E8P<OWYuJiQ=nu-fSDQkppb!ltSQvbml2+(;JCM$*<-~ z@bC6Ijcc!p*wce^5f~moF{n@7cc=>^CJug1hKY}Zoc8U*Kf6z>vIAV=451-O>T98U zO7EmrjB40ETsYY=MLg0|f6ou7I6}3EdZDP3ejzn8M(oCK3|X23;oz5Xzo8S~;Ifw66+>T9OI4(!@qK>@t7qR`~@2jvC5^Pd(U$`C*2mP9k^R-7}4m z+qID&5M&|C3GwsEZ#R+Sm)Gr4h%ImW0{HPPs%6%l)EtZ8@(#F@j|W46XaF-tp`6CH zDN;TXCw9uDI|VxEPgpoA*Q0U*A6P#-9X^K!Cn}J-j5N01AfcE5-Im7&8Z$H7=+*HQ z^^NT8Y^;TZc7)pAKnv5e1?LY(g=&gNWHs+DH+N~yu-LEVN=H1Kr%|5+b4_t~{c?{{ z4qTEU+9eQTNTt+G2b#AGp~Gl$G@V752oO%RE>f|K2+^+KD( z-BtdnmXpyzGTcKE*%=DI2(7&IVbnJN`=Z*sdDn?X`!{>sIT|sLls??qk8jrOYu>5* zx8c0Q{x{W7OFRs?7?M5CGw+26zp#XO!5q(I2ojYB?=oQ(EAo`d`FVL^*GKfs<13rKRsbUr@LvfgFzX^M8Y*PR!RvxA*vmPewLt zo;x=qE`coW>=ti%+2g0E?jYEELs7g%ag-x-MCC4%qV@r3F{7uX+1EiPwpz_%E|vCP z2f1Q03DA)1d-XiK%HF9r7r0$bW04iXL?)T2uqCr1_n`coLP zcW{5F?xvAx@Ug&Qu{LP_jiWpgwY6K1UmnjSvRmo=@$j|r*$HZiN`C-pzKsP;n29*%Os&8Q-=%3=>JR@>7<;%Ci zbJ1sRk}ZOJ)r~}}s;WkcOzH6tQU3QEBGxbTzkBs|o8dgkBEMI9IJoR^2ku*=dFJ+1 zu*IJs?rt;Z;ihjWo4?FDB!ZSS^Vd?XHJ?(S=>f?xd`KEIr`~=Xpue`evZ1~{-Tz^V zGlqEkF8Z2j#okJ^@rgR6BKBmC;I<*&g6T79eIU6Fitm?vr2#D37(%~f)bp5NEQKz2 z)~}IN{>LM}PunV!ouk6}+8!!Z9%y=wI_3*=}d5S=m-&9S9m|os9vd&D=q#0*#&w2){=HT=;coVf=YGl#uE@rJDOC(7yb@ zV|6Tb>*f;w;O3Ib!)YD^F0)Rk@Tm+zTC!TKhChOkpMSR-vtG}k^SHR2j}H;c>_kaK z-r-d@$30$C!M;0HyIua`0Ws@oB5D>&s7!_(^Ax|?DaQGgec%0qICMkx3N}k>=UxJF41G45bpFX>Of=$3F68^HD=39B z!c#TbN(4BGb{bps9_ly~NdC{%TI&vTTmI?oG|ImFxAY)UPCmh^2B|XfJ$phf^(V%{ z$qUmoo0sqp0pWyMaZ+g+Zcl7y`?^)Mg9a4Nf zuX%(bt@AivdaH?LF_IT4${T*_>dd)2#zPd|rt-tc8~uR#$SeF>9kRnbfqSsU{R4{8 zyMk55PshT>DR7K0j>^FSq>)TKL&*qKZ*`^+^_dYORpfI5E3>0Exr^bg4C@Or=)l<` zmmetbt4*nrA;ILNd-=i$ABg-wyFLOgCv`ejN#*|0ibXfZN2k7(FfW%J0h6oqct+-y z3}VTnBNr8pEiJ9quWWy7I9)BCpLG>NLLBc=1&*)OsLF%4h~>6^7C2q!Fcwv~!v;ea zZVJ1}e*7u?BHC<3`i#LhmEzm(deG{reic>m)t~oVX-=-)&Hv`Ajy$Mp+Pa>r_}W)O z6B~#E)wK>na`~thC)ogWL-?Ka0Qoj*X5+|u7&Rk|mFjT(W*R!3BM1LAp>$-kqsd#8 zDDE~}HHf1+!s<24r5l%6&rmv%+0h%Lb$uld&qW9Kl3tz}PYYjGP*5;6V5gT}4PCn! zc!i|>?Adq0hnF7?_Sy8Qn0t+9uXQ56a`uUU{wc)W(L*G>tQ{TMyzUSdw%e-`z4hC3 z^N$j3lRB`r()i$3;(W&E`g{ZT)*tIp(JM_XcTsOfMX|ltX@8lN;#bN?zoE=v7c7gQ%rIC>ob~ly6ksm)=u4p~$=TkEKAM6JcHMX; zD{NX>9*Hd5zgX;7%VFVpB{3DwnBia~ zI&80r*4?LU@97U%pC)H?PSOl3N{bzGYD1^P znnMW0o0w4GQpIWJDNfBTd8|j>`dXrTQhOvnsH;!TKJtXDhaxXrDLlym-fAXbjd9Ro z7lQd9596~&mX?H#`xBb%-5M^RV~?&py@y+n_=Mq96HGmnXnmTjlkig~(eor}O#SGC zDj%mmE6nrugjVzTuP1goRc<~;rYnKPpJIs@@4iDcBu-Mt0^`8X8G8{cCFqbqKcDZl z+!){hFCyx|QN5sxmgH2Spyh983JV9Mo$c?OsoMUaPmHBLGM-~%nqp@i7MwLENTz5I`5=k2PQXh#H4wza*we&{z|QD)2PI~U(S zu~R%v;cKS=#WF^1E=_0ZB;Yc*L#{~6= zMW=_pBN?ti5O5DZNQ7<>uo;Q-`L2(VLwQwH6tal1{tZU|8~nFAO4c8@0pw*I0hxJi_x>v)El~sEl4;`?rO#cgpVO*u?{q&X)%k)2fXD1AGMBO`A)4M$iai&<$s>8>^7HqCUQp3*s_Gf+$M0 z-i&0Z*z5tNCJ&aw|DLuO5dx`pHwUVkcUF;Y84tt$BV$L-rxA}dG-zCxyrMM-b#&K` zW>BVTFL=$}s5m{bh2g}{=UZE{B8$x+eiPg6hI)--!DJLi%v~j_%r-?bJZdU=6UE-AzHa1XK`7brVbjZYo~H~Sq4lo}t# znJrG>d0{subdvIMtDRmeUS6&&o|@FF*_G4QZ^j34RX<L~60_Vd|}22@-)xkJw& z9$BH^$Uy1%dJWk5%tW)N$m^x&!XqQEa@fk!>0-t7xBCkg)MGXxAIjuVUSts+r|7Rk zyv#SUh;4x&s5TemG)$*El1cZbzDk}F`<8i#l!*Aea`E`>iEtJYvX8q(`J(fOn)78s_q+5-I)^6!7-hNk6Hgc>N!}IAOlY8c0B=gq>omj zNTh>dtUHXYgBqvAK~UMCL#bRG z0~QX`3k8&YQ((XSpdI;X&ED1WBe#}4TX2K=7Ba}e~LHgLaXRB`%? z=M39TG^>|L$SA}X-OvD5^kB84j)%q67Xm{+vUeEaQbo~!{w%{~o+551g2~MmnZ&q$ z?3F1dFjP}JRuak?vm?b6H~=N6Oh!UdMRcVuV`Yj+2N1)gjuEzQ!{NO6JVEkcj=;q% zI4&9Hz<_v|LVe%AVk(31-SWLJj23bLmjrXUk7LW;dOOGq;j79A2M3+|E>_e8c{cy) zRA>s_tkRC&{rf5(xow|4f_DJH(981R7gl77RpI8`+#dJujWgp9V9X-CMzK4FFhP=K zv1cX$pa4`d^(W7aPjBDb4=$*HR^nyl+-2oGO>%e68g%HaGzu=#^ChK=Xyw<hk5^Q^pBG+5N&Ps zftV*^=`j7dkpeX)i6%;iI)=hnlsy0klGg|u*whI@AA)Qc0AMeP72n612}6Q!OK2Lx zNW3rPigw4zdQW6X46(G1>p-^ZS@K|zC}>qO8Loin-e127M-05jW)R?%45VpR#9vM6 zoCStP(clwoQG9)PdT}|bKBxW?037l$fpl%S0F!&GtPryUAPT|ny=-q{jr{7za9-n# zA#ebKhe(w;CA^CV={KD|06~j|0a=@9+a&mn@ewz!AxwKq4h7}lN9WjDP|?od-FSY{XYVZ=c?t46eR(zi4$#f{>y}) z$k34A-RYvGgj+<^3Ny-PvG{h)h2K_Q>nLVr4n+i{)W1q?N9nCNCBws0M+NnqvxtQ9 z&Z+80v^wt;r410Fr5#M?U@}n%*Hx>3?S>!k_SR~!sF!cWEu;bI3T5u)1O#{hV+6CO zY5?hJ%@MY)Z9%XU2qH?Dvh_?ify{8r|kplmmfP0=(Gm!^L zGXxI1xU(Cg`No)#Y=Kx1j^upYeG%mJjS?_$7=>*!$6oP}E{c8SofpMsAVZ?mU~}BM zoch9=vc-t|_zrRj_@AKnQ3-MfaFmJsx5FOq-r3x=E9s>BjB^9}5=`wb0La=3(VRE6 z8eK>HM){>kSE}A|EQqSY*|MjT^-YfpKCpm}8Ou`7yrMj9N97(oedSu`F1utukI=6>Xkr+B5& z0?s$GWjd$GUhoM5RamXO!X%6p*rV3@xOOCrRqYO`EW(%p5cL@<{sN@^9iX#}K=7qv zHJ(DLNQYqrIz7S#93n%RYL-U4z}{FGV+f(S0B-<1h~z{vkavMER{B1S^(-?V|Jo2T;+|@on&Rlcp^GHKT@<>IacDbY+L7jL>FS)? zcb~S81;#qTysvVO!lBWtmk3H2hzl_zV-sN?ejt%Zi2b=2fGphjF;;}+PaBcZZTBT= zUcl`L$1*c?kHLV*A7mVVsYoRgexHwgIcwI=m%Ehv@v!C`5=LXJA_A*tD{x3`D5LhT zJTiSQdAEstuFXi_ipW=kRAIl{@L3OAtz++Mjc{`%$Wp>UV?WX9H&XrB{oMGA@)b4l zx}|ye06m?KCx+5g8SIlhnjk-*<|i2R&W#`NnD@v*eLkZ|RT<_6REskYh}4c17J874 zdSG5@nD-ibxO`c2Fz_9+g!m4gJL(oAKq?v6h1ja=hmobvdGLQbnO6Tgf>1zu2MPTK z?xCR^G3yeQ+fCz#?Xs?FHw&e&2RV(e1HcEH$w&k-E_I3E=?cT&c@pPg+t*{kZ>X6W z_2tK|;zi40V-cH!4~PT}gMp966!idP2|R=EjZjM{4;#ASSw{Ph3%@2wwh&2%)hUQc z3DV&LR#AYN06<6@`(%}VglDg1^N(9X`~l=cTtGi3(6zhur9El<-~TdM53{av#6+(( z{~ct_G(Rk@-ks+H}rLG?OC@&6Kzw@=+t#KsB!r!d+0O-(NjuGfqjITrVV5fu}cH;-*P&z_pBd;@W!#|KR zfKtFf=*c}mkBP18gue>#qUoLzJut=f9^6Agc4%Wr(%3fk1eI$Tw8~>o*Nug<^rWy? z<@;XQn80v5{YGiLxLW;o?HTgJ5?QQVCS?h@uU97iU)+$DZQV4M13}{$l+~1kt5#2E zQlu7?3=5R;*|?*X-1zOLx-mmC9*+e@6mJgf8$yg)_h5}TFdbhzCcoZBSGtHCi|V&S z{&^yM?qSd~m0fYV_6F(Ulho+W~9>});R4`*L8#RZ?0znXhY;HcY;R5p(Kv{2x!F{^RoE=6?~UL3YvPcQ-vIDunl zeFhQd_|uxP^NwLGo3a|i(-J@VfT0azoOOEM#-4K@>2!8x zhES@~p#)fAaerzGe8hq9R8Sv!{675Q!l!UmBkU2sLG^YuGo&)~C*!C9DyHiQcwcOS z@q7D(8(FeQOcV@q17Hr&>h&FCL>H6NJ9Y3ZsMJ_?axD?oi-D^g2GoTnL8bSI7wo0X zQ;U(}hCB)%hfd;GW;+jdK3-B(9s3#GTdhSchRPd;xPZG_SS~f}$^WpIhrKEyL~AZ| zMIedC?aPSQoh)8aa|Hy6@}N?p)Rl@?a(zPb%MJG;r)s^!Iq;m6tgBz9!daj+^@G3# z?vWs;^sf_1{m&6Nr0ZEVu^vrsfM2$#P#osfSjd;a0G6x3V|`Yner`!JVpC~74)$*O zv5j$9KpBuBKwR@6suz(p94m@2S#Bso%8de~%a;q0udQ(0)_jd0c;OAUMsNQjN3+Sk z5;GB7zn2h+c{_rbX4#By(8Xwo9*G0fs(FFFf~wFO|O@ zGy5FYSSbb{KN zlbHcNRy2|AJxXD}A)KS8;-j)9JA3DCa|9TPsTrFH%bAM9${cnYFpbJRoGerj&N9Df zD@zV&OPU?wNXW2uaF!Sse2S>6$CWu-COp95IYf;3E5fsaI$X;b8|MKlvXCT7_OvJx z0?WNk%e_VW;?ol&i;v|*nBZ_qJO-bq1;h~NW%zUT3`Bq~KUnds+~gC-Ge@5tx_{&` ztauXWsIBu3k(n=kZNOy*K)(En4>WaE;v+AP^wXFvPSB(Ouo*9S-_2$&KK&3mLoJ7S zVOlk}((1AnY6Zkmr)z3z^Do=jW~IQcsFhPO2}Rhs{Ews}PVPQ}_e^QZfgij}pBO{- z&n6ZbghSGw1V<3lmGYCzRTn33M6Zk^Dje1N`FxXZ%#$0hS=;-$0f-;(P=NNoF+g9> z8$llJH7to|(rZ?ia@XU2~c4quK0RFtlSPyfmELGRbMsWEFMsL52V4l2f zbd*0F{oDNW|FTsnt%z@F^XhB=@W`o|=JC}U_AYyMgE>VD4!vJ|{zq-i8RP%!>f8gF zYWO&QhUAgh(M?k3j96E!dJHv}xvf&~wB#DC7O&J;?w29u&q1DFcq&iA&`2oD zC83(TB_%_}(r(ZB_kGX#^ZarCI+x$?d-)7lxSLaUbQ_^oG}riz$QKYKy zRlD~jTW6Q%U+?Ng2M!Tm5jto-1Jr?5L4!JT$9_t9BKzCE6Xz0K+%N&sOHl2*2)~sn zg9{A3lNqoy_Vi@4*4jLWf;g=|yF$bJUR=(2e`Gi8)#7{6%dTl55NWtdP8dng!1^59 zk{-ZZAdUamF+&7Zmvd3~a~y3fs!rpXj%os{w>kUHl>qLxde_s8#_#EaCv@!2ca(qF zbs8_+SZG|8CVlioYruAM_3G|SGi`Lv5JDEHNOw)v&`7oX-Ht<;8`sHnbYOc-se4R# zR8;{No9YpfV7yjm3PG+~IH$P;q`Kiw;Y|uT?D`2x+-#vUDcB71H>&PoGN3!!4X`qo=@n<^blQX*In@gi{-&r5hBb0#}8A#He zLXV1buW@zpU|@bOAv$sFU0G+SzT~n9_p$?o*gqTJ1>f4$Apctax9;)DdO>W;ig_O5k$<8KnLT7? zhh2?zr#99a?Uouts~wDtlh5p;W}2_AG-*8?)w+6p(aPDP{B+I*3UiIIgQao6wP1Z3 zdO3i&Ekv~$a6Z!P3xZby(W8!>^}6NjRi-pD_g=Phqu-=+?zeVZT)OBS?dC<@57EV6 zLG)D`8%|`3kx|+- zIq)S?O$1w}%D6W(6xQ1uH_pW;c8UA&x@oPgIqFL8&L%f)%&f=V4OAvQo5dVzF zJ)7A@W51h6{xz<>o{5|Q=#!NEygYS0cxCzmJosJDacOJRy1XI35;ee7gF0mB9dhv_ zvy-_4vfR$>|pB=>&JD5P#LmV&78x+bG8XFt&*$O(L8x}=H6pBu;fXCzg z@%heW#DnkX6S93FRJgfBI1_o_?>wS2(~-DYd6Qtj70{m4ukR2paTkeWntQ*rpr0Nk zWQ`SoO*zCwS#-h-H{($FzIoh( zM(Hhc?-eEd)4#0_7WKr=PPdJTrNQA6!fBwLacXitjFpgZ5vB=es#_6>`n3nn1jn|hO!T{HQkQ9wYKk;r z=1?eDU~UnH9}l6khZSmIvF~C+$wFP_bU2y&abG3CcxeOqtF-FDI0$mCpx$29!w;%3 z>4yVw^h@@*V!)cQ2?|%rTyZxv8zej3Mp_Yx4K@p+Pz#HEBbNgPtpax~fQ>^)FpSmqPey`wqN4*-@=ZYS>F)Qt3O=eZ1p>GLudE-kDQoU40NV| z%kLBfq`)oupqd^iYm-o&~IAa@+v*pd!85>O@X#t@eaAk7bK}@ z&v#llkC0k0N2m;SfIlWq%7CX{39JdAi93z0S{^v;L<(J(JsEAQg3y;w;cXCZ*c%+Q zwjhVXD}(Kh;xD|e*iAMCN)ftMst4{AvV-{iPesMk-;wq$WqD4-!8J6PCkMd%Qy)Gm!lkG2YFTkozGyEY*!9LZ@sLeifH#f=(NxjTZ`GU={xMGN>?( z_ajxT-46ceYU$!S3eT=Z`zYD|J`!o@5gcwB;$5X=#4QJoe2{F+2IrH|xoEs6!2x(1 zcPLoKV@YNe3(8+a-@ildHs-u^Zb^PF^Yhw9YMEsevx5KkRKC<#$WALyvU{{w?u{gA z4cuXVb0uBv6sTV2(R|b4pf3&U%;;;Kun-NI6bR5{V{y*WV)$NBqS_@6!615PrpCA` zpW3Q-V>_gV2W-c0GW*fg?We*eT1hYsF{``oYwd0qum`azv9Af`#KDIC`dKOeBCL_d z^UQ;qhn!AUAq)eHgS|x;c%FMmYy-M;9Xh*s6X@YU5H45B ze&2T;Jic@1bluHfgubRDB;yj1iiR8qs$)30EVBeY7Du7(GXF(j=+ zI>|rnECcM`h{WloPt|QW9A1Wo2{NtWcXm*$W~AaMe3h!A%otf;-%EKh^nyxtNrN%= zq}9Z22Wj*O05Fb>!mR-cx8i*aFjhyXhI7%j1}N}U@*U>gY>x10Z5+Zvw^&0WJ%Jy2 z3ahWI>i4z}52;A|CLNAHh+AV%-}hK_=Y!qf zOQ|6ahB?Pbg%#pTI~u!IAl@_#2eDt2GoUS0C}sKLu<#Mgck>`8O8UKvkemhg^OyPP z_ctmLH2o&hPv0s2A;FLOG!*}iCO%VJJlqY=SDhP)|M6y8x$s=k!CKn6&|B2cB?$~3 z$gt6oB;=m8)5IP1tOKFVW0B*A$CQe={87MXW4-|~UE^2ViKq;X7Q+D^>x;5zU~b}& zw(Dj{(evz=O?V#B`W4Z7j89X<6B-PUxA!Fe4CJr#{HmiVlC8-4ap+l*W8gLHcogk> zNrIw=HA?iK&1{>S+ooh3VZ}SKtdV!U0nvKY&>+nmGFLoS8g`#2CW7X8UUDVVS*}<( f4jKIaGbB{19*bDvZ8Us66F?3(CwEe%LqN*EaxSC8 literal 0 HcmV?d00001 diff --git a/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/raw_1_12_12.png b/media/libvpx/libvpx/build_debug/non_greedy_mv_test_files/raw_1_12_12.png new file mode 100644 index 0000000000000000000000000000000000000000..92941218c83e84926a939e55bc8cef249488b381 GIT binary patch literal 919025 zcmV($K;yrOP)4$)sp8h$}86E;axQV1Nmj(9^kZ&iCXT-<}ueeOs;bAn2(oF5IwTmF2!7AL&3mJ?Jbo*k|h zz2;C&&(=!hVIrOh)!d0TE04|RcJ{t=8a~qVuj|jV;{vrKFU}|HMVS4Bqg)3DvHZD| zzh}(L4y`3qcNOYi`scqDmd_(%9uECT*qQeoeoqq}g6%J!7xN`fsY1VL_f~UpemuX{ zLq1+CrlJ>QXAOB1WEuqya;NtTGWIPnz0UNPqLo;yr^y>j2gMy?9XK?P$^@ zc`^)F$ob{vM$MxKn_BUi+}krPJ8+WKoYS1RVUF;;CxrF(dR_R^YP=mN6EYbBh6oBE zUrmH;YMzC1aTCsz*LPR&ZZ2UoVd zMfm*k*fiPVqdS>1+kC_?7T9Cj99bp}LrMGNR~`K?|7h^fe*eGwZg~$e z%f~!CE3oUB&p{^JkN>bq-r0Vqe&nCM7u#$CRfIruG+W6e7I4*&J+u!pVr*np)7%7P zQtting#H?|(611yVG2YKY@xq&@=Sjq)Rq%eQxnQF+f0$4Pyq6C__%l5zJkC$NFJ{Fn?XXt)KmI#E{@V|##wsqt#+|6YIXEH<+IDLstL2MbcZXFTzF`HEpW60_Ga= zqUH1jFG4p$u5S3mBF`3e$GV&es@D;OOf$_E-Tve}PmlmTNF;I_)9!^>oRhfy8P4DE zW3jN?WVzCm@sft+#9A76KOO%HokTysLgp?u-6k%TCR)~>p z&MLfl6D0~Q!Y>!xVdEmuHniHHPLPR8u&9r6!;+Qh+!zcBjb%a?k-pq)qs z=4|HD=6erwH`9$=_p?v(nfnB3S@TM~K3dK$=#;uRw3U#3k^jyc@xO7G!h8R|_GUu? ze~P`L`-cn@8(K~=dpF{!p7<;uj#MpH#e(C7n(uszWL`5oBz&# z^H2ZypZ;g2t+PoaCqltzekRdbBRRwc_BO>w?{8W>FB?NV;U|gaqNxw;#Cp~bljWBQvL(fnl%1O{v&k3i_{^G)46Q+rzIV6p zZLd%K^v9`OX<6nl9g5{PW-FdVCAdv$%UJz$0(UFH98!pSR71pXg)0YMds>X%HOzp* zr=$EN){WKXcM4+aJ?f&SjYk2ME0cqH@>$QNlHpAz=IxKRUy}!JNEqtz&C}*9QlX=! zeZYXGI(H*p=)*koZZ(93m3H*BeLl^woP2QQ-OZYkm-B`?o7I#~Z)DKbNm>}1Y-%Q2 zV%WKp8^lTUUQ$CwJngI-=`u4!CQ~Z*PNham&|z8o<7oI!qCnncT8c}zYi5enw=A2- znd(ln9Fvfp{5Nxg@T!fwTFsFcsR^!p#DT?RPTdKT5uz8sR(4cQFW|c0@KHH=N)XHKZ*JD7g!MY);fLtfmB}M9@YIF?Pb-L8; zW5rF$T9%nk)uG-!n9E`qv3AynaX!_mAz(UVk4Qus2^S|kUeACy7IQV$T~If3r|i zoV0Htj!@@ISr{ng_@Q;KEA@0tQFB;ntrA`wBQ`;UzIsS8z3eh>v+x~PgQQ`x*=Ocx zn_1bmjHG86$UuLbRH54B{siqBJdkDsnM7SlDB2COBp>3b3Jjnw3EVd2SYoPR&LYtz zB1C#7LI?7cG0%sVxbljp87+53-n?%au6-mUha`MP|vTm&L{4Y){W z*26?z~lmzs~cJU0Ib4Kf-Ol*-9 zemPu`hKuY)%$2iY=a#UeuIGr?L<@4ZsDSHOwM<@N=qz7j%&I;~FqyTS#IB^rShRA7 zmmj?tUdfrMnD2PHPvr`~z9?9$HO;b^<)9W+MP!Ey!S-U8f=`@WdF`OK&r%Zgyx8i( za)k{$V%X9J&#vr`G$YKVv?8m!tUj5d@8;>nuy%#vI+$!IkZd-FV5TK(>tgDrzZsiR zOKB5wf%4ScWO~-J2Yzvy6GyjWN-1>IM0!Ho=G@HF7ApqDxUM^7wSMIu^XI-z2)S4# zf@=38#!3fomgJ9$HL_Ybe6GvV@pH%delON57T#S?O22on{?6}}zm@pi!Hd+*;Bpdj ztE3j&9I)>tZ%(FiFn5^O{g;bLFv%x|KkojczwyQ3KmNb`1L}v<(^h^<+b8?C0z#aK zYP*ryABO39J+B{6SBHmlr#R;L4!3`PVLHm8mp9Db&`i!Bf)4BUHAJ~V#WOjo7AKxp z5=G)|<}GfL+M6z!8sjIF(i??oe>;B6mJ0kx>kL$WN9t=I#(Mn%9~bnAM^lNlLx24h z=oF11-2-Cm(ZC!oBqL~Z7puUaVgu_}1dJDLu=*IksGzh_)^1*>;0+jBC5g-UxufXf zZ1$NTrMCEG+J3LP>FMpsLWo%=c`5hriwnhVTY{@E3xg>ME0dMysapRc*QLo7TPWn2 z2lD9GM?>}31A@@YXTnb0vW4_GcD-{|+*a*meRJ@!ei<8%$K$;AqHS2oqtMfz+-XTK z)V0@NYL|&;cAuj(Ei!(uz6h7_$NHz}*2UG_Q{j&mnQ|}u{?~()(R(>1M~69{{&Ghe z4NLmLxm%U>ft=Y(s~c%NmG8>&J3>9oP`x=*D$a$z_AF7hggkp#U(ph}I%%v>gJ-oO ziAw&^C1S@>{B@d1Y^k9v z_@-CDk9H`56Ffy>+U0g8PE4PfbDUcOBUI-#Z zoC;zaprX+nAQdbwg_fEk{I2V}5!keWeiz92B;q9&C=x_+84c7#4>Ul_AYI58Ncfak zC}E~g=S1EK!8j6+n;r;;yX5usyMO~Hf)*kxW9FtZ0y?y)a|10J>&L1Lu)1VUZb&}4 z;ac7ehdG5_1=IPflMFEj)Kvk}He&H4qSTC5kYM2<@5fOV0aNIoySafMwK6r}bJq#G zsVNOGO%TB(W)3hwV{?Qv9-%lWI0hValNS>ahSAf+RnI@KKxuw1$-fgacrC@67}Rb9xfEtOOPCVoHIB1Qn+5a=nAtcB#YJIr0WH?tk42qhx!EnO9x)U=?lB%}#?+7N#CZzg{2Z~evJ`n})$cmLAH_v*o~e*N9u`u#Vy-n`X$Cj+r; z#GF-6DDV==_58f<^0#lBu_!FpgxFQ+d!Qi1^U>9N>ze(rBSSNn-RD!)cXw#hMk@BX zNSC$X-6XXWivA!-Z*2Yc!>ttcM!1Po*cUA1;}8bmLIHk`h|db^TKYCK$|M=l5xQ^C zM35LyVjPEli6+W>_s6Tw?P@&J=et7t$~sD&gSaZ#siZ!|AgSK9$2wxULyYF#(CNG# za1sgE!%Pg|rk2SG?8_NgYXk~aDvVaTlfxJ70wz55>9l`Tiwt9X^_HCOZL6PyUk@A=Mx#Ag5~j9T>y8&Y zOSM^4a|$Pk_3WCQFo}eBUCC2YEk#vKPXAvlhz;p-RD2~ zFRxCSAOGn0KW7_*pe!x+GhZ(K@$=iK?H~T_KmGUK`HTP8|NO82M}PJo{v+qr5rbZ_ zV~sX1H|Ff;9sjx4sUeW$$!UmI{hAt9c9Sg0Oe@-joPSkuRAc>AVKe*7U9O#WVi7~$ z<5G4x(7LS?KFaT3ei4P-MtjWVblv^dLxV05u6R$1RM*U}h>NvJYkydz;I~~T72hAf zIh|0`e4Z>xOZS$rUbC`T6ivZfgVn-aTudUt%W?wah#7IrYnN>!xNTu#hk;F+rsEp3 z+JYG6qL5X!!lE>61_L*5uEe8sS|*V_92v8uzt*5`Rr~5x?2)PK-XwgHV&x^@XlyE! zI6J*bcX#4Z6b0EtD))(Sn2fF#W#Y|?{GqIdndehtGkW{XtC}^R*c&B>gH0pOtzWx| zC0-W!nQUh>>bX86b1TNQp7{QkMAE-CR%gLyc>Dap?Nnt=Ivz&dTz)H=UCQ0%_zmMY zr!dY&Y-s3gtT1yHU0sHF)Ry&Om>9M!KCwL<*ET}qSMx;9T$-zjD+k3H((`b)+&p8- z^Wraava|5B{lvOD?o7#dR$O*;sTa+u_uS*pgv^`i-SE=?=7RSe2N zj^r*K1+UUX^ z3MI(J?QN2uwGdsvaggh!DG~r*2VgkJ1K_u=R>vE-OvoN;bM&2MWCs(}+W=t(7`7O3 zRK$cDD%_mT zGNEi^cM&7MholFf8eu&Z%#BXksY)Rbzt}}N2;`GSE;4;|rc+RxFF%BD@r?twXb8rV zpM@)qejI9uYLDT-PR{3j5gEV^$ri$%K&@*=9hrTe-wyCpK_8aYRH7NXkkw#P+K8V9 zP$FLI5=g+2fa7dp;VwjKH8cFh{^L)-YK-02L6r^aVJZ(2Jgh{-V{+cw@wnPv=Igwh zy!b``PR1zg&kNr>0@n6k!N$8anN-V#YRhU-P< ziQ!6xc3o^s3SO@=g~451xPZi2W4D$KS} zrpcTmnh6fLcfsShxQNFkq_YJ$Eb(6I&aeK;H*%}*e_#FDzwz9tI82))nXr_+e>?Ik zYlmkKHQfJ4=T3Jxw|lv3hwrs~aZ$bSYU0b%tyE}!CmN4FI`=<34HJqN#YSuE)NYh# zz0_`Oldqc58$*U=;)XlHO0Jk7(^#|{UgXbYPgnpjGLfsGJEc;{p5o37o)5bB+D5`s zxJ)v=2~3gGE2VdYMzmOl7ibr~B5gJPJLB}~wRii$oajb{*1Z}N^bhyX+(1N9E_VIg zH3c~1*cge6CFZVlk%G8(_ftlYz%C+O_OH?@cdv{LI6v|%V=xZbBCLS=;IA%8S#xVwE--XE%HY^|GGOku$!W+s=z&Q!v&P zfv1StJE0e*nf1bQ<{iG9h&*zh=|yu_*lZH&y?z7nwD;6121oPN@q6~vH9ILE^FRCDfLR04 zO;8Z7`tn?i4*9Z{>$kO~f!bhj*~r;M-HfrbOZZ5TxV-|)qiL!DQ}PG2#6nyinV8aq z>5!Vt3siVMWLc7!DM9g^q>|S&DlaAmBHNrVaUJa0Aye4?(-b?`Q{9r-PqxIUp8L&e z`Z74juV1(4Lm|dqrz~PN6i0K0?>%}Qo)c_XIc4>6>g-kLW~)COT>6mZ6;iHF9(l!Z zDP1d4aeV!jza!Txi{R=uU+UGDj&{Y<$*cRB{qZDAUN4i%S$()@9=7E;K|YwP(Ro8q z3?atTiQw+l)KK8DDdXl2>&l|iO9@|fmm_~%jPcRgAJ4KUuF0X)WDL{#(KWHnOZ=)t z4G!GphI37=yfL&H&nPrI6E{Q9r;zRbq^cm(cXET*l%Vuu7c=RKLEEp4rvss2;J~{M zwKq4|weOn7UeW4eWi3T)uxGC&$d$B_w5T599(E)_^z*xyIe(+p55gN+%g+eDotpO& zi5x3cEWv@v$mA9i* z+r}y`KTp=3ier}$EKrLbZpewP?66{jO-kbUKpV$UApBOnRd9y>Is>(|mI8c5So{p3jB}GRUet zDLbdCufg=K4abS#XArW*)dc04r3lb=_2X8^DUkv~93Xa`G}b$JzUKG>O1rE^nh_h< z`EFsf6B7@K<~BTB5fzp0271_yUfPv*xC%h>Ehy_0Kt*|g120rT%*dJIQLE@LxAK~G zM(I)#!Z|U4ze9lt=8}Es_WHNQqQAT4zN6oY)nY3R63!C6DO5Lnc%@&-djCSXbdtTd zwstm2?IAq{RwqQtUz}f$ufic1*goExB{#%e$;KRztAMz{vyfC{yaN#M7PEP}z$+i5 zV=wYh;B^kt7sR|ta$%+w|MmypCM(4)Afn0;FXRwI#Rgiu=AKqyc*-Qc?IdA_ePoyp zFbl>TQMkQ($cHi2zgILCeNEmvEg4SF109=8Q<`Wi%NrX45IuYs^aW1YB5*Q9rinLm zj;~~R1y@H-2t!XzW}v7+ltdgcdjc3aDmg?fOO-tS%k-Vn8W{tU7Dv@m7tRci4?4AQ zgv-(jeVCF$YQ7EJJ6q)|DI_RE;K3pjP>~b0e(+DvKJ8B~pXej!GI;stYOFKPrZQ{) zy0OH2vU&DK3@z8WSlgJAs>V6BIeKeb5;=z!ppv3mO+@ zIdQovCDG_juXLC;mtm#ZN=P&?la{J|@#CLA{+Z@3=O;?s+ulvIClQm5Z|QGl^DJ4# z@pVvE>A1hN&ePMPJy`dVWfM80F$3OzC3mB#K7&YX=vZZEBfn8`h;&HaTlh3FKaW40 zS*6jWmdkJ5PacwP*IB1_myo#Z$Q7lfn7A&!t{VHo@L=$+dNg(^_dGsI%6+N?koMEL zVJ$luTz$!MlgJyfc0jvX!qgB8?Y+DX?DXfq|K-pA%in+b?B~zU|J(B)eE5@p`LiE=`r)7b;s@4a(e&yIVG zq|qmP`m_3p{>$bshzt2;?#a1bq3=)Ty65d*#$EF)tp%j@UXV;Webc+1MpYrk zMB;3yWTEh0dl>5W)zR4paeaJP*ljVE+-d4-F`ekUd@S3mtcO_!WyFlP z6>`_l^GYK7mNhHKD`s0RA$qb0E~ZkU%NXt1%AM^aQ+E!|R#>8UEe4cOwtQ1%#r@{! zE8gCEVh)!@qfv}Fy_s2%x3b);4Rx<}pDV{Yj{7K?3hu706FDkz`!L?QxHcPtvA^!? zoSmoa%j}q){qd!mT^XwO@#Kx3d0+@>M+N))1aBmRL56=n5qT_v1 zYo31D)0SN&enpHojfT`WO|Md84QQ7>LZ`yzEUjro106!5|~K ziZYIk6$qvQ?U~4MoQfkFFk1lV{Jn8Hfe}k5bPTU_fJsOwk_5IQ| z5`g6@3Ez#^MoQsxvbF3?yR<~NV6!gxoJhip&yoQUfu)K@Ja3*@_l*d9lUUQ;q%(-( zEj77ZdK7vfwuq+ain`#A=mj(riv5VfEE&4IH_|b!*Q_c*g03|q0Mo;-emq-E$csjzcK9JBwA4&|g z!moko>+mjA{aGN^R0Yg7i05_+T>Nb>NLyMSd9MOc4e@#?l25!c0n~8P4>OU8T3GP~*19 zzYW0pES~sfgm_>l$-#FD001mHpgv*{tY^x@inVZ1G0oGp5rTfdta~Bri{rdu4NKEW zl};{KKe`-jJF~gaZH@VgZDOtp0-G8v=EQ1JQ;_sz)pRCU4vw@?x;^*Y5r*3i+*^R- zL>`{oMrxNempt?dO$SK+RT9zNbk&ZoXFFl= z+MdN=cyQ9ls@^)C|zfVM3>gH(BW+~62 ziRviJYv@cScyog~@@$HZsb2@thzu)f%u^wpFqPS2JvCho)1J@%yy{O7CY)Dr`>Y+&Y500H#4}(e#bxZGt%6W znKU%%^=OpouCe8m-ST2FeXUDyvTQlkMoJW(Ig1?LkM#SnIR;N-L81<$5#>}(t!})S z_>8&q`{D20gq9QZd7fHoliBDgW!nsymWVieQcL^sj}LZ>)x|D^^t2>Oohd(FG`&ed zwI%g_VfZ$)dhH*3dLbS+h^m{DlULE1zf^g*tJLWH(RfZzQahuOYxfeG#pH_-q4wE+ zcGL3>lk##GBZ~YVU;psWe)NMc{)2zG6Rh-HNTm^{aZUCa7!0^j=xw3R2x$xy+kS?-k#Edu@xzUnbdR?1VUYJ}So1 zpVr@Hzxmt${#NpX+^>B*Y=XTmaL3-@sgX+!4Jr4r;f&Sg?njj%^lm&)dUBc9E;lw) zMlj*YvKCM4X3VUR??3cr!d5c-lJMigwH<2?{q(ADu{0lJ8^og%=};+8OIbm4786;F zv#v=L;v--NuopsarU@Q5rC5@@tc9pihxZ#4AnK4?P=GmSW}1u;%1OzyV41whpq;02 z6p&56zm#b8hI+8zY*IWKr!z4k&dfXL3nyz2GxVjlYit^7>Y=1=+P!6Re>ua{=N3Y0 zd5?*??4Dwf+f1u}GT)wUHn@4J>btViQabKtq4-s*ogMI(GCi=$NrHt)ZnfR*DQ%~{ z=w?fqf1}~QlhIpxNxj(>g@E$<_Y(DZ;<2{+=*#EpmC7DhAR9w7J~obfo5Q}{km9jX zg{w^-Y$ukb<~ORV_al0(OGk^PrYjA)Y-dB`i;s&pkKbFbiAug-m1V#!PkFUKlGq_8&F??w$t;{(CvnN!`)%8!O|@W)TyditEOl!GOOkKXHeu9ete`c}i23;!J&rQCiyc=JSxkXF%lQfOeZTHcN?T zUhLeVrZl)K-KLXQY_!%I$X%ExXiy6^ATF;^F1jlZOa1^}!l!{aSrAr%d6w22Btw}r zvy5=#%t6XldLc}&6ZIIBJ=2;m&&Inz=y)1H!%2sMtBqQaFEXpK#t6qYh@}}WZMci| zgmU8QE(mZsSfU-VG+4l$lSCF≦{r3IcKo2#Ox9)l0`E$S z<`tOb-Rzm^T|?(96z0H0w%I5ch7X6B3yc*{l3JKq0I&x~g#fov({}bJVfGB-BtF0K z_Dm2tI*7o}FhBKX3lSrKvb=X#nU&K;IYN5rX>7m%fR3rT14v^y*aXDYXc6XHfitg| zekbyWC@Ul1V{jbrjet9}^DK^WnUcMvvmCDj8DLUddP%`t9k0wS8&_g6(5)aYz-wf6 z-)v&_92l%8bkt4XPDqz2v}qMYAA?we1zwT(wo&D}ZY~KO@pzTYz)7-Z)+x41e)$FT+gvb-Q~NM zd7-&{kcsM~l`PjdG zcjsHTnC9>QgMF|0rSs(ARc@9%9G?uD-Oo4YpX%08Klvmu={u}PpAlF8@X8%qiG%U? zUM-4S?zOlkx7Ks%a(r2+eO(%_Q)Dm{XGn8%_PUn7Qh9s1vD@?M7ejcnJ<=kYwns*F zIRf<^p{s*t5W+LZqG(uNWStW}q5;4C9!FbY93UDZ_~JOf-yPeFE?yL_JtRdC9YpHeUx1 ztTf!{h#!L7y(qsR?+(?Q(a|pf{9KRrm^sezN~;)Pio;~F*_dQ@f)xL%u)%gS?0jb1 zvrkn6-sSdoW0Dy;SEhY?H9vUOAlYg==#?-B8g<{bMmLmv**}|~p`2*<59h0oQbfO{ zjOlWnTZrjyBs=ONkYaAzqT@@GdnvWP4YCdM2wiREV%U=&>xqlgZeO84nEQEV=uJrFoD=qW%gK@}wWT&o{*~W&@OS^S zf8*h;yY*Y!pIzMB_;_z#aI1WwQX^kwgVK1doOu{qdA-Sw=hN5&W|=X1^6jR;(%tNV zcP5NwEo1QQ+%Nucwq~2uNfhII({w1z%dFvVldfej;;pImXhId5Z#BJ0?Cr!ke=?sz zn$A%wqag9=R6@)L!!cY}Y48megFt@a&JYjgq)wrM_zc;RcUnt}(>R+z*k^1FS$zUO zPQJTjYSeLel`@Dq$1Kv6JJ_3O=t#`UBoV)4LM|}h?=X`yt?f{W={tu65s5p(c{Z5d zo30l8g^?ns=1oONr+w-)d)Dv2z8ZYdzRqPEmkXM)2+g3K76R!bYLI#}5|6rY6|!P7 zbmqBL`Z8ik)l@B*7HYa?S4#)>NLSuJJ6e2Wmv51nw$r8gy5_BR+U;#g+oAD{&8Xsc zl%EpR(+t(NM)qPdSBkP_u9P+ds+ymed%yBDd(Suv`z0;qSf%k*Jgv@_b9a%9%tHF7 z_ZJ#PE|-(pgDuUHoTsTt?v{}%J94K4|C6H6<(B8gUu-qiW72XMRUSte=K3b|dCnS5 zs|+7Z`M31}!*#rvE2A-JZtCnFm$Zzds%0&+sW284m9$_biq(9%Qj+@D{*%k;PxH6B zgxFe9N@jToXO%x>9G``?Q#vpo*fbtY&gOR!4$roV-qxH+*8R;cw%wRUS=k6l)8Slb zg6Y3bj>fdWfM^_tZIYTH zW4K39^TqmTKu(*H#dERUwAPLW!j2R~u$l2})>uu~M_B6vTn2SMtV!Y^#zgGm6NmpE zi4Y2A@)@);v0OmJM9Q(x-DwIqNn=1@W^@rJqi~Gn4ngdtT~3ySh#vQ}?y2F?i-0gT z0-GnHCY7kp$|}%5#E_Et4t>eRJqvFqyg3iuW?^CU5m~uQQnN9Z>1bUbQ4cJ|&8!3n z02UO{@Dh#ueXPj#b*A9yi+gshK+!YF2-gfomQG-y8_i?#5?#WD}pYAKVcJewu?iTR2!#V7^;WXwiW zS|IyW22VC=TK0+Y%%Zrg(WNcr*vKF$0T||{XiI&o*dN0sG}8qlPck817(TIFIg`YM z4o-{r2NW!D0wQZ^m%x*m1gFxI2|tOCwb!#BzW$ega{Vtj{kCLH1g2_@04dI?W@5k_ zSN2V{yz#ZIu<|ICk<$;C>EnyF!x&yjV6yXMDL7XRDe#4&nDg?_Ok$+X7q}5uOE`n> zf#)^El+LU9?A-n1(?5Ox!JYW_I2P(CP)mr6i7v$PNNbM!YRG3TfdsvJbtLcvmQS;H zG;IsfAw(^n`eoOEsHS6PAwRTW^O#XRI$c(4ch{q8!K)ozuP>4Y@zS2cCi}O9EKmq& zmB~zk1vtcQ{s81SxA@5m^TxWibVwnU@~c=`V_P_vg4wk>fihSzhgf!yoJe?%n6Xb= z=v-tkpV@|<0kg<MA{n%ru8qMjRaWAk*Hs*npipD87(hvU!tx5TeL-oF&V1@h{BiIDENUjB~1 zy^=qR4}z0THesXmOY$VacD$LfD`nOeN0o%2LwBxQ8LwUzd`+J_wcFbhGT-%cbnmbi zKTRLU3ii!dtmBibsrMvPGhFRT+`e^JspRCj^WZmsE%jUU?|gUV?)Q^_N!h;6U+i5m zH{n<+Zs3x*4A!W%*t~u{NhF`XSNu>7)L?i^b(v0N*P}OyX(UcGZE}=Th5SI;)DBaI zA-!5NIn=jl~S$nURbb6R#N8SUoxR<|uD*I!vlT9pmgkW{%`47nojwXQJ8$bVX3 z&QcfowCa~WwKlfsOcJ33bi-JWO^ioGS;_G6LVy@grl zf&)Hf-*8p>V6-wj*Iqw=bw`XPh;W%V-dtBV%<0cq`aHTU&%41J3(PC5JKuaVfx5g2 zdY#q&DKoDSbEdlVnkT%!*MjkYd9xxp>;<)qIde+WGs0n z;MDk)zV;%Py`pl*T)Y}3{5b_>H8xQ|U%IQDKK*I~xbboOXDgE#+rixMjO7 zs=d}`c0+Y2JP*FsXL2L6Rd%h}E zlt>;?#z6i>jH`v~1kZF^z)zIt2HMX2Vv2%*s}X&AXss`64;(`yfEu(S&v$qY#dsEi zDAF9%9y-7t(Tap(ltwn;i(QC5xZFrcN2v<|(-~X#=uQ-9#4%5fnUri0ZU$!DMjjya zNrSOtqXQenUGC`8HamR+2`HEWnK_Q9DN`pN2xNd|YJl8yDM>|eU6O=hx5ycI;Bah> zQ>mCz+?IGDr{^Un63J!IOKaxDqR>bFYmpM4w(BS^qvsAxYLtv@ziy(8%=^Z;MKydIG3o$~79Xq3{FIWY4Nr+% zcSD8fAf&fNbrA=V&wKI3EdZFHP)>0ZFCn6X!h$QJJwvjA)(xESOGGw&_=-CxTWWn%AqHL;A14Y0a35i7u~Vz0 zrR}A04}7@v#Kkx1ZE{;ue0y2BO)VeEk9>}KP0mil^Pf!DWs6O1KHaCv>aXAWR(W@= zx7K-Q{PW<`A8ma5-rIWSe8c)8EhkbzFNYrC`mNjMv|b-qweW^eru)`ds^s92dBoj0 zx_AamY=bEo0Iy8bbdUuNVt}Ot^C||XIF*S*3G@IBFq0}eptoZO66SB*-pmqKNq27! zTQ1@~EXCxwlCLe#$J?JY=7Bjr9aED83VM-_>nhr=Sp^irGbe6Nh<3)t=!P6yH}sPw z<_IB58!%%TT&vll`px;>*l!!7(=CE2{N7DRT;}wO+p`UZUpTw`B$n!I$wp8eZRkPV z+mBTK*{I1wuFV~}k|K!Da+e!h-Xx)YeS_G(P6*lI!!=?-ydWwuT!W-WD}tW9WmgB) zb#=uw=dPx2 zt+PM9)?z;S7yGBFrObFgHOXsFxD#y`-(D56_1}K)ul-x=Un^{_=DX!(v8t73-^&}z zx43tHy%BcI_tl-!H%rl17u!c?_mY{T$+8wt={MRmmV38Mthv$~tUz~ftrNwhZB$x2 zH~!2W%`4{eC;u=!SR4oG)_OaNE8~ET-@e(ho>S+wG^lFE^(H zBKzh2YbuS#?KA(~#Q`Dd{DiAl=d3YmU;A4LV{vvg3+ICKTo5nX#3ELkh0f{5fm`Am zC4I3w`5JZo^Y!Ueo&kc~U2ZVZs!g=dUk75Z0xheL{S+^IlsHWKx~u|V)7c29@#PWR zWzh8Jm;#O+Nq(FW$ap^NOT*^|sE@9dE<=703t6USjEJqu!RoBgEG0-e&xD?oAu99A z!>_8^MO}wL4)Zc7rq$@zs7)3~(@SF}0i z=F;5=y%x1XVXw{ZjU|-}Jt`n(@EWUF4scd0=mIVO+g)H6xOF*>0eNM1`;xZ&6 zsl)cm13E^#^Hp(7i~L4_Y%T0;*9z(J_Fn5O0V=fgO~%}0y0*j^_SWI^X074(FT0#d zLopMsI6>%uOg}cnAZCRIl7MnrBB!m=S z;=%*flv*%@%tHnh6`N~vb`9(~kOODJS?OhykOaVvpE;D0gQv#W>mKIroJdRY%7Ox> z*h>o;>zeNl%@!zAT^J$l!&@>~aMH|$;CaW^49PMeZ~AX35K-eAVLXUojwYc5`$oC) zlM#geH727$whmYVod>}ov@{c-Oc($v7len!n4PC&r-6eA3n|(5tI*CdN%;QM;}AfT zW0=owY=(&^7s$u7!Fq)UACjvQSd@kq6Tm8MLB5xLS;3Bto*_~pM5BT!5_K#=78B}= zhz#_N%MD^g8IpKG(HE1UDzt&?TxY?Wk`XQOIV`Su0n1;qVvI7TCg29L1zYIaUBXMO zze`zMiT0umb2ls`#^l%_FXLEfkp*;F!{g52(wP$?7Kvt*(3Cf}Hg~TMi2`4HX11Fb zUSiBBf&7x^oS4t#m~QLY*|;s4;~R5nzPhNJv5&b4mR^+Q6c-~a$9vZZP;XfGvO*MY zPJ2_597ZhQAOnKpd9C|St15}P@7~+ntm^Zw3Qz;(^nW;g`J7Fht=!#xw5{Zq8$e$I zKe&W;i?2f3$4G;-J!2~M*Q;Yy0RZr#vKtLY;<7+ULoOq8QFlwNtWCTxRlR=bc?&hrF2j z+qZr*o9Vx`m6n#L=f!6k;n|_t0zM`AEAzV3%&&icZIg9^i~0E`yL~I|-%K;{A0TYk zc(K-1ME#eukN>!}{OqIGA9L;94wtMpIAszgt=y!xvo_wFr3Oj4a30c*>DVC|=Escc zUS?m!LE_Epz!e=>sn@Wzm^WVlG;)%YBb=>OOjtmOm2_jejgW{ zr6+W|m#g{hshA{agVQUet40=1ay@MLx7SXxum8EEZ$`z#tn=0NHG8|l490m(O7!?r zjux11_PSq1BFB>T;N(hKYq*Ws<;sbXoXhTlxHc$t;_InetUE~v^3g`+28TZlF!JJM zWB-?$8y|2YA7?IQ$H(NYcridC;4TwOnee&+=`hkyhRhpFfg!=3c|M&rZ`B3sd6Vb& z3)+L8vZYOqKJdSBSh2^V=_VpG`ipUWL?WXD1IA#B{{nvwOum8!QB+Q=7cChie zt$Hb9m0EqY`mg7|Redzt%OTa3?|+zVRy2A__lR%ZVc+~f_@94}x_$3g|CPJ{>#gF^ zM>p;$H_OJ|bIKp4T14#Ld-tUl$Mf}dW4$yVle_%5VvSFWbS!s$r+%bZ@#L+ZI4>lC zlj>W|)WT~~%YqRgdR!(BZSMx$th6$E zY_Ip_&0orKy>O;8lAkD&38Xkp*cy{pV@ymLzKd>lwgjepW2~GpL%s>U2o{n|*s9s+ z4|v3ywc9hHuF55CHMY0jXNDx*qjtR%w|?$7VdmpX6p|MimM7!DXB!rAyLj z@*Xj9b>||pUT{`tG)oN@ePbzpnZA-Lm`GhZVBt^lZ_kdR%|YeD^&g!LVn>;7W$Dk+ zabt6nouAYnSsU%)vgw{9hP&dNpr^%tzacIbdi0Qi`yLt&O5H zoSCuP2i>~pZ1oSPEV&}@sjG_ZFtT%-4WFc z`M|U~TIu$DnYbW|#dy9ijdi9~il{^K%^UYxqT7SY`*BN9h{+u59n3lyUef)&2V|qB z%9|Un%#9`3o~&}Pa@UWp-Wmvl3DKJDh(ZG9vcIO{bSi}3l_Mh}N9O0?;!6wEBpZ!6 z4)ca!FxC{woQFm~^EE~{+GfUs;VX(av0Phci!ms-)-hQ@YvmS^!9wfENlrkr&(e~? zIo${$4wZCZG{4K?9iP!;%L(;yh%RsiB2N062>sj&!FpQ8E!Pz)j0v z4KaXf0fQR=uD)g@l0p$kU8}ZmIt#OI8a0XK*|y3wcBq^N9X@o%EHnYSS8jDFoUNfs z(#7ABy3_Awyoh-iq%yfc0`dzJpf5pEihmrSH4=P)h$R?bVT^PFPYvAS+VeR|Fe7CH zHD@qcp-c>?7B9_3ZG~sGaioI3PK6w%063m;B0yxo*pu$W2Z4JT@N+LK5r+Ca==I@g z(+Lu@7LwH3U_wnHQ%J-!4!cgx*Yl~xlDG=%#^+Bz`G5Yw^^c!DOBrZa*Rh}(3x*;& zIa3I!SHf`Q<@uVuZ=sV+0O<8_4s~iKxOvf8gBGd;4P>}Ap4zo(U^`*AK>w4 z#}aJCADY|W%{-m*>DMR`5bGjX~_rX{P~$<2d*^wIU_ zKfHc5xm?>UWOmKs@9u0#)$6zA7+@sl6c8Ac<>nAuHFW`!&a`a1*J#qK?K!QnD>Hm5 z8!jlC9IvT`_3{-EZzd4V`{gMC!nwEPy%{3eFB_ivk2w+$fsZk z`7@R$W4v@5+0_ciUHVw8u6-}F1um)zn)#&Ag!yz51Bdr)Uv+Tnw$*%$(n8au;Tf1rZ`i)>Tj}}Lcm6DI`}gsMrmiRwlv+R zGQAYzOj*{5gmIlRBBW=8utvT00wPeQO5-Gi(H)Ewb&yOBLkhDrN5n`Mos{6@;@m=) zk|HQEMq$2vcep8@-?T=9AhW%)vMrDa_p2}d{!cH($trw(XitvUaE6pNwBwkFA^QU!h7{(M2j$%#(6__j!HYqX(6roKyYn#7cRFpJ@Ft-qTv-RraN$UK*Fv zk@qdkKT;buPNVg^5#7-9Sp_q;=%=8Mgn4hRm|~oe-&U>Fx7me}Ece-iRO-@RDzky} z!pr3rxwac;C)2UUo0C$wce@vP##}Dn&Ku%ZZINi5pRBssc`W}#aVKyc#LwV)`f>8v z?Uz$&b!jDu*f`52C4e>+A2Wm*0A; z_3M}Qa&Kk(-CKoUdn5CW@U8XLhl3!?3**&?|2k2qQE+3d_WawWU;97*-v8!z{_?#y ze>ZoxwKK&5TX^QR<2^-ESCW_1P1D~FOIk@=Kj9j4_j-+|zGl_Yb9<9bVY=pgKDk4` z^C+}lAFaRc^9gJE(K0Ejr)gDBg~7xc%O(5i>wGX9tQh1my{`_Ym3iP=r39B7op+Zr z5Rv4a%Y9H;)OGOV1SKct+^xv0M@G0niUbxIsj~}X5jdQ`A;gbpbsyGsnUUc~$ETti zqYrGhIKLRP-nUj3UyPUG?AeoN)`rUY}O(Z7ejjw-(V|qN3GaJFpM-o+; zRqO=0-k%TdCcZs0dkr!5AQoOvFAiAEmjttq;q>APIXHN_zB=TqDUf9Ggx`bAiaF9+)X*~ z{23FYX$UZkx|CXMK?O7*v^g<_rr@l3)-(`RT|P9iCwM4OTTy31B)MjftY*b>HUM*# znQedqA>(4H<}_WY5C^PzYbS$oVjO^IiN=&JJR(|+#k;cQ5%J2BwBtMrm-4t01$5d* zNu5j;tBgcbDdm8x60)g}ipRokbFp z{%N)FK(ZDPgle%>c(umF=UA6SfKNC#(<_M)GN@0&bh;BNinxq<@x$lg?6ARq@9T*j zZ8=u%zBJV0ikgzZhQ}sLnlX|;F4NLMvZDAm@9v*|N(+0Fkr`A(%}y$DW}ugI6RfBm zsHN+JYxAa-IjW4;K_yLzowHiTR={uT(nlZutVJwesJ}(#(qaaoiv|CO0)C5*h{p1h zRP26TQj8J8njeqCniG4L!*&R$LPBi^3E6%9=AZtvmp}jPuZDpY-cHPZk$GR{gWoCd zef)s9N7uu@(0TafUuFJE)|)=@{@(FvtssnqR!a8C@yg8I~sJ^3F+IhVSq{q*E-J%94~lvxia=Ogv4<)iC$I(4qY zv!TD*G`1dT)jzDy=hN2DbHn$&M;Gs!^iN}-&?_n7dc5DQ8B7GKqb~({nhD%6eaoNC*)D}B zLOgcPVjBnnLX~0f53b9(+p#eyhKO-P%TPJw0{!UIlP_{XHCgP<9uic-yzE`@=5K#3 zsUdkf4EM6ucq&y~nv?@-6mTT#9V5I8-&u|3Kmn|7fAsp7A3pwXu0H?Hr|R-s)lbH9 zntV3DS;W8gvzqb#CFkdV{=faV=Ktn86I2A-UN4n?V_a~T`5b-Kf9z<5RD2i1T#p=b zmR^hL>3M3Nkn0!hhYo%47rrKQufNXc#rhp{aT&Ig@#uijGDsX+-4uU|F{VTMiw!l; z@WXw`_qd2QxLnE@1nXiV`jv%q&QQjRnzRg}?zlnRu;UUgOB~5?O5c=RNe0`A}@wr@lt9Y3?oo$1bv6kp{Jww8+BNqAztRO*1t&2IgKFfuox*=?zc3DHS|&of*3EXji@f&p(bmfJ^OG;na=X8+ zH-A*k+?DGI&Ggi9|FLL_`nf#w+l}$)(K;@iubTL`go77mVI;NFmZf^}g`C8fuety5 zVPZlAskzXq2(weAvQ@fKQf1O3pp^a`TB-jchdILhx9%8BqJ>gM*^ zN{~EgQR&=U7PO_rzqL1f7UAo!glloJGOAt4K;{m^fo^B42Lri{=-tr#Y%nL zp&Ab8a@Ft3Tz+upN%mRwvP4ry(sw=ng(VM{N-FTm2LgG^^1d(-|H24;&`nKBi$x4S#hG1iBw=k#X5g8+z9=h zM`brUG(;(xw^_rQH*#6oT%^-!F@pU0YvHXj#`6Wk$Do89lgsgNUJQ~6uF9crncS*) z?b{iZW!gJ!^33Hn(pQ91upE~u8oMJMMnUwZqr|$$OT}09QW3aI2XH7^MCkFO3eryG zOd6aZS7*T>MTZ@jQncI+UYz7_?SKN7sCqWUv-U{Ir0#PoVA^riVAN-x%aBtBuM89& zij9FBF$o1!3~dQo60s%nW5hP#_lFBNE4#=M4!YcOcVzc7^IQ4`;_VQr1ZaIW$|vMJ zU2oGt8JKAlo8%&$Zpa`Z6!Eg?-nZvTI+<8XkhO_dPq8o1s^w#7NZ@WBj55O<-HtZv zI`jy}Ov*_CgYFm=1{NRAgR!)>!oi3o<34b;XP6+iaK?n2Vd~V!RuTj$Ynh5F15iO2LrB?3_x&g@uax13RJ^7VuuaHJ5q^J2W`&LwR&)F`S6;W$zjk)lyf;4uj-ixHGscn=5hV)Cik|XZ0MU|8Wgp-YsHz^U zHrDF5eUQ1E8Itm~c2i$j+g<8_9HoQtES(7^Op@nb?mb*T7305J(Ew!<>;IytJwj}A zrhvI#NyY?R)VBGT?wyBG@2dIys||j~HXr}F+ItIt@Z)e2)Updc4YD-BbUUTLzU7B` zW_-+UryX}|2TJ6-HyD^fcP0B9VtudTFpdw7zA1Hz)^7v&!9V)3c$0YazgHv;{05-W z!APf8S^jYwdnITDl4X*3tu-HC{ZIe-)$ztmj_=Jsx_4*N2;bVdDM#^Nn)DNycb+29ofA`zFNd-`J{Zt-q1*^K? z#a6orifCqGfGN&am0n2JVpuH92j&8W^;heE&;P~uhuP#^bpeM5=(gD`&*pyWj??kT zYB8w!3izqrk43H7KmEZUTG@X!*g8A-?D==(RmzSQ?<9q*<;U)^Tz~WHtK^U3wFKFH zQC{A7R?0r_l>asIPlsRDoJ)dZiphl1_?i6-m-XkH=X?4^s}QC?39)>~HLi`>a9)z) z*|k{HY{@)37ovQM!D;qk>CGAJm^W@Na;woLEv$*FFf3iNSS;8Cj|ANZ9rMq22bT$rYT=%*9PKX(mP8@^Ja*f8b-x;l=)FM{B(JA$4k5Lmcqqqv2zMq9NZU~cX+!d}z z($?mjObP%ak106`ym$H6|4$NV@@t<^Cqdy{GCy@=%N+*=} z?)lb=Nl(qh^u^ye6)LfFCiXY~clZAJx4rLlVZ%;q4q;c6Rb+^aer7FheG+z*V{RLx6{cUj9C$>oBQ|2U-C?7r7w3^|k zSwXS#+ro`?$DS#w{b_aloOt-=>$Xz63Vj5hF6tKbu<(=apHG$MS-;hAA2x0di*f4H z#}ZoI`pU1b;4|lHZ6fE$^X9cE)6ePiZ=quU?8P8H7-xP3N=v;gTESCeBNrd7lHOp< zh3OL-b!Sjo?LS%2z?!g))&9R3_Tn(Khxi1R2p<%c!DU94bdeZYr`peFSPDRB( z$sLu3xxUl9ou7BFzOmVgIc{>yI85XxR9WvFH)+&TCBZCARxyhVRiwe`e#y1ceIO4Nde zWF@NG>#Daw2bt@?lBbiJUgag8Nti}* zdn5`sUGqs;r?$H6fqhPthy#Z$wL=tZJ$#Lc?~)gOq_EyZbd7M1Oz?VYh_TH(a%Wcz zx(#ymddNx8mCGY z-y~shaw%MCPRCv^KEgl?ZOO~s1)_OLo6jgeYeHWPvHVaF08k<;%PY#5G=~BejsuWG zMdzjLl?^5~rD@-z?ePtXW~XywX4E-E!inYc9H~}gWFO!G5-UZ8M-f{4<(*<7-}V4k zfd?M2U2ajcDfphkx%3#b6nU3g^xIIBmxwVFgHHHYz@4!*VK!KdkQ|dRw$2Jls+}Er zs%`paTmcHah|K*LOZO>aWf3ePP+)*mG<0Key$mfc97G#h!=+15S3sdEj!a&CkrGZDP#ClHmg94Ssu_>1}1FR`Uvkpc7Dq z)I)B-#vgvc3(a7T`C$A!^TlVA(HxKdjbCB@@>{7&x6hpZ|ChL-B%^`dy|UzoNc!kw z3f`@+%_`sg;O$c4z2gC~qv^GivnMY<|IHTaly1MrqiEV($Qj9wbnZ^TRHR~FSll^pdji@Zy?OWmqGOmTRsFI zOOc$J3=~yFAq}{9sk=@0=lSysaJ0V783_Gl^`=rMMWm|FG&B`2Ww`%ed?q)r<*&E_TCQ4+*%>Fxgzh%is0*lwA`p}7GG^mlY1BY zVgJh~f7rQ^W-*JMmbAU&HTPuV=BQ-)ESeBTf$YLdtux!kf+OXLu~V4xe1#sfx2Z}@ zt&v=s1eZ-t%FN5#i;k|orR7$_QsS2rW#&xj3{551O7rc5#W&8&C=Do!6YK?@v%Yb8 z$&C4Fd}~}C1`+$5qa?!dIAE4h)DE`Y0DMkmCSyb=`oyc(7Dr-K{L3!IgQ}_Lg;};oRFiKBP^GAb+ zt=B|iCeKrAqr=NwX<#l7id1+S#b5rX?dvrzD zc0lLvJa)8Fj5O*ml~iC{56P?aGNxNBzB=1dRY@7l%64TBh064*$}9sc-ihM8UCAX& zE!**F(VUiyfjwQlmXMv+i_MZ{`eLL_9BZD7EkM@5OrUNr!=bg1Lp)$sWT|v))X*1= z8A~7&G9%jzId|I(p;s6_jrh_xrZF-etql6>Zu0xDq@kX`a+|zxlHBl9*&PUOf-7&) ztxjb60-Fg|1?|vnxORzrP;iB~&pVUcuq#|H>EJEq?5sVeGVb-PgGLVZqV>`so;Xah-93ySu+ST1ierCuG?3RlIDy7W|Fz*r+G` zI2ogyKa!JdBGTxK?df?Vb|g(QBVLD*@T%v+4Nx!!B!UlpVi3w??{MqV;Hhl0B?Ti5~qusc`=3 z2>oi7yO@2$N?&&gGd&rZ{9GBpfdp zx`WUPW;ORa9g!eD>5cF9hI%erGG%iLp*;wD!WVeW6^f}{^DUxHaku2XpkvxrXUa3shssIqqG=<~(n_4{M6d^ZX(1AZ?<6oY`YlGJ zDznN8%u%W5;nsqF>j~02uj%utb`7BZyzKezATgmWHr{A2nQqjm^3~$#5F(i^k0zI@ z0eD~vfbl)WnWliXA)S=69srXwiR?7Pk9-(RI3K#ss)K%d>I5?iafuEHw_#wM4FiaM zIk%-5W7lj*Scr28(2%@K_(-(4OOR320Hig=G50bD0|CP}f z*<_+q?k){!&LW)>;-aX*j(j@SNSwDab`M9uj^K}P+Ad5 z#9?1>06NQ&zhv_+#1KaYmeTZmJXBWR#?bC=DQ({fN6fORhrAk%;T{Cc9L~)!Juw>& zAmz(yn-nhzvnlx$y^8Qg5lR>(kqMVZ0I{{{B32$}gQ+PlM53)77YU$^jad2(YXbCH z2o^&9(jzJ@y)B<8$`G4F3iGLn7~qpO4(HBU5))}0SkKT&)U&K^r^nlf97--yqUw)N z(7!mfUX2c0YPnF@{mu2}?d`mnUGSkfg)u<5t52EUr`_e^;mx%2bn2Qk(Y|e`f$$W9 zpCxmb#Z;U(5(|MI-e_9$Gc)ZL(jvTZ0p|=+UDZIvYEjI^E2F-D;Z~NOJxRMpE})Lq zZ>2Jp)*n#cNsO^5(n^LRfzSs5)Sxd+D;N(f8j71*R4r|OPp~0)cTaprYaRTL7jI6q zfwR0azU)}?blTJ|$G*?2|MmC}pIvskFFrX9fAq@}*(?0P@YBU&(w^DX&+gT(&7b|P zfAFvVr+;<$Oy_}bzJ=4%LhuqKf)n5N@~UF zr;WE;OmI7WkxzUZnk69%-T<`esww&&wK~z*vL-Mg$$8`%NuQ*gO)}v zn@8LPFpE9IFm)Oa+dO5^U{bKo6EZLFw&JNd>JMDXTz*xIW4*mpatBzn_vN*&E6kuG zSPqWmFNepXlze*RHV@{1bkhw1Mi@~`8X;8!`|i34Kb8}M-B(v% zeop1xwa=$v9vq25+$xA#3o|>4Egp8~;fSJa;Bf+jBFeUABKtNy{bC2!m%R(4Ap zi3RsoeQ#ge92!&qpEZp5!6-(^ME%}PpSh>TB3;sgMgP^W*q;tMe>yQpZ_q>gU?rZ9 zmsi#M$?2_B^!l7W?FbvuY-_m^a_icw_x(Pbx#=#SnJS0fRRh;pJB$F)3%nMpgz*fe1e zOI6w_`S&>=kAmR1Y3#S-unjx&AyV=#s}1L4*)m_RWV9i5F?tr{`ti%sI6O8sgV>sm&>1~gPaiad+o%6O zlK#6{vjjg3^L{7i^PcnOoNm6iyKl#tp0qogvAX~kflCrh5Q8m=l4V*dxymk=ELFKm zQkk+P(zZ-fHiKjc1i1i#1vc-@PVVXHbh~d(Z_erD9Q=o$FY*`|1p^{TrTOLNbc@CW z6g)**6_Ph6y6Y|1j@t`sdUVMow`U%=GJ+3(9swB_X(F|<#O9OuqifhYpL2`39OwKJ zvzFu}YuWPiMl=|phIF>OTu;qov@~&E)@AIuC-X{eg-|d6mkk?iDzEp8^F1t6?HkWE zXuIIjvle16ORZ%CO{_ZdD4%^6j6k-0kYZ?;G=u>CPA{#DQU5wA6k`vTci{%&Mw66M zB@dahNaNZD)IzYg2QEZbMqP6_?R`R(z)_ULxR+dvr)(DDXexkjUju6dR4~wR;Eu*h zDvkF$P@K{f7MEfdSV~XiCXfUGFmo{Va!i9{cLqlEgu($1A)+8RCsy!%LU?UhTN3P1op-w@J0oY5&g#2}7bA{<< z#Yw0@G^b|)r$TZw1YVWUH~;{E&K8^ll*~-vyWYTs5y+e);^@Xav$Tegm6!l#ct)@w zx#UrpE=qCf28LxoF6GWsCD!i3!hvxS6-Y$A6Z!)Kmd!*K=CsMSTXo*fZ|KE zp9BJ^@?>Wen@}I}Aiyf+!D^;HSuw~GZ0}Q|1t1E|>Xqp%=N^<}0#>W1i4X~50AG+;CY4L*0D&E) zY2)eOGvSP3yr)D3CF}Y+azdwv6ef~izC}^*o=b#$OSz%+8%8OtWwFFwoRTQ<(!sy1 z-ELJSR^1A3t52_cXWrA|)?Jx(+bm3PN+omB8zV^-0us-Vut5UUYUH(Lh~t#NgCWM- zdOQf%OVip-mH?B72+l#IN#2`jtiCEexl&Hg)7$dy>woPxZ{>u7{C1Hg0igNwKmWh| zINMp<{`TK&6z<-xYfn&lVVgr2~!#&6vIm=rg^)Ud`@fm;%21}jIECwv#nh(3jBNME$ zID#!s!q14AH2u_`!kn%y00GTsBYJKSRirBqiF5>sa>cSpds1hHY=^Vg{NpThb;n ztZy4*!D6a&+4Pc>UX1)`x7gixLOJ8_2x&KqwyrbZboEXm!X##EZQItjFAB|D1|}kd z8n$=fGK&{$@s%tvl)^q>=Z&1UA}dOkOKNmCEfZF|M@ft z9@k_sqQC0mDMQ;_xM=h;y7bfHFz1|Vt&~leS%jE|IN|Jy9_M^PSH(1w9HheA4%}*I zt+A5{QWO{1!pgaZd2UE{>~-d8m~$@INmg{X)?bzPkU}*KaC_+qSl0ja0X47KSA(zg zR`-js6d^ust=C}{L!Va={u(S`^SXJJQQSe>NHx7E#m>PmtZaag@atxF*{oyETvu9A z<*FNb5lqnXqR*7abTM~p-=B8RlBkBe_1nWWE}sbLPi=HC{56r>=-%pQAFb%Jw32OM z4Zj|eVBI{=*N)98zLA-2R+<D-qI`d?oo>(M;fgxAS{s1*pQt_%VahZPRxDLYtBR(Rz!!TRaD zK2S0VB0!|O>JFI0z^9;PWfMz)(8;PLYJpPkuce8V_+>qTT+&vlA+85;o;n0&No4^H zW*5dhP8Tyd-wUfn8gsGAW_%$lOCWKCI12%VaLe`qwZGsTTZ@(J8JJ#)mhVqyp;iSH z02l~1;R=@49R-q%KLwNK8M|#`04C5rTsdGlBD?|z7FK~a89zhpc@%5z0nE^*5CV@W zOF+6aok?cK05?G-^9&bPtpuoEX$J=kCeJWfsRW$iem88x7C`L)Iu6h@SPlmCFmpM8 zg#>AY)-YR#xCm>ZcA$d44$HPk(0Js#V*r~+(wEPbL=_O)HBqln8C3w|i@|nkZHk(N zno+kPnPM|Kk3tkc0Jrp8pyz;@r?0|GBwQAs!W@c|6=WiK0MNoau>h-a06?y15Iw35 z`7|Nn@l=@^qNm!S1yG>?r6+S&2q1PLk)R6I1YAoo8be&V^E_QZF9nd91cOE6%^(ph3aU|( z@7Wfc8cqUCW%LffN`@Jg0C>5g;!S!TRq?gxNNi2>K_Ruq-3(Jn?$vu5*-1vyU)xL< zl(p*m^I~fHR_Bv|y}!0oFGlBF;cv&gxf^KBUNHH_{onr2{*!x^lu9PR%P#PxNupT? z2q2KyUp>)P@Ov=MvAmyl9OV01gTT;1G16`6bAU&fTxx{I&Zm zlstB=Txa*BjkRRDefVGh)Bo}P=Ld9xdh37v?eD(zySU2UxVO#_zj;%D!DJ3^9apbX zLZ!(Dv%mYhjhjOFE&5y4o40@K? zuDI7|VwpWE<9}kp3TWg8%zGQ`oYh7Xcn>;diTAHYn44y z6a4dQ^iidVn9DFVCG?cWD*iYP+)obfLO6eG1>;n_w^7^AuVX{o&Q~o>Ucn01X3o6Iy&KLIz8em+?HuC(6X3t!vNOx879nRAmC-2(VXzNjzIz zmp;o+Vr6(Mca2OJu8fsKLfp-aO~-nl%igeRJV?8o&*rM`nv$q3LT@Imx0CB@Mq546 zz4aU4MC%Qn;eUB}LXSt(lWtU*EX7y z)zkwqZg!*yCa1%B&1_MoN795jNwlGS?R5G;r@$3^4USDls+j76tGtL2A< zC;553V=d<9{)++^Hear}`BMw~B6moS3(?TTE7w_is%?^olMx~=&vLUY@kh)2F*q6b zargqc?9L|_&y`h)3+;1u^IZ3=kHh5tD<@rhPUXGY%(BntFkYElX7gh4)16bkdM;*j zmuHSSwfUIz!a2?a?!FT)&g}1e&R37K>Dl_=g0Vh%;@5z#3fwZsT@~o!WMwwk15_1+ z({V+%y)CV6v7BbXIM}&N0_OLIS53;2y|8UNDciSD=IBuQQ(x|<7dl$Zzbrq$Z0hrCH=H=+_@@rWPKuj&lNSN(bPa+##BG3x|rKyGIR8h7r#oSb!!X&{Yh8^C5rH*lW|)q$Ftq`-yL z%8nTgeUr6Uxg9CE=nHc>r@WYqM>dv7xaXOSwqSI%m5TXst5}6R z1TS|KL?qF(^a((@go}?rPJ%FZONm_e7-*F$9($Go`ZC5>AlmM>4)mJ7J^{pE0cm<9 zt#hCd(EAD)p?u^E0PBk)m>2|AU~j1C2EZhc0~81ouxM_^NF=}xP|yNogUIlZP#l#a zfjjAeWq^73z+sv7xX6=0o1Ox_LM9NC;Ao(EAcljrZcJVW-@D~9Cc89O zr#?XilWmx$F{x*n6DR*nY5;JQWL*PNRGVw63JW&IczvArld6wQ_;fK=S76m?cgdp; z#ST=a1$uRDL-!!RLXzMupb!H(lM6o z?LqSdaz5Coh1h^2Ib`AyH*g57(H!NnZV`S-w{%sMvi$oP=LiW{xtyeeS*FGo2onI9 zIq_tiWWSqoBU6C%FscI^@E9|SrUCic&iLG3ev%q* zDaR}tH8-eu1OrfRM>vWj4nR`^NAmZKIpKNw>8WI28fpQ;l^sQ;TT>~8oTcyGzm_Nf z8U_fwbuSO89-I-s`26zm=n-OpW|Vp8uly;kUFS43F~+3!Bn%+aoriEMnIh|%+szCf zU(0Bj@C@(i`{9ok*Ncy+hPnWBEq?an)%f<=5t$NyrXFm)RiF@)SY*viY4nGm&ZM8W zKPg4n=;E6@zaG`{xdgHvk&F$LD4VD=Wlb=YX!Aa0sl*1L94seWJq>`Wua`e+rtteT z%AKE1#D0qga7Lg48?yKksH{zuptaEC+uVFWV$}7sm&Ebt+4UzwV_=TH{LmwE#m4S8 zAFNAT#q8$iA3*0pK5_la!U%H*_2)a{_w+w6KY3vrD8>5Tqs>L?b-c2k`2POd-`uP` zFo@y>;&l8hae<>c(zqHShu9)l#CAn&YIOol^+NGYRZ@_vCm!%gMF!Xk5E#sW0EktS z_Otg1n5LH?JE2eq)ltd@FPJpJ}KQ^4ryh9MSxda0f^E7NV5pTAF_QoTD+PQ6;6H{Y%d**l(zqr@;TXz@iGENT(?agFkxVc{>J^7xz zRjhq2MXm}niOY~z?P7EMfJL$7=Mb*|{~6Cr`CXsd(8CO%$hQFAkw$?{p46Lqi zrJ5s*B*xDy&{)PjX0gvJDFb(&5bno#@;6le#9it4m5jku>J35Ht?N?ITx)$ zx=M|rfR#6J0=z)}Qt_3Kur--Z$H~yizr3vODk?p&@k3@+gG5NDDi^qhAD-2oU5_GP z=+Uwv1WoF;0y3WKgg=dBrlPu zPu|-S?$o8yd2(euupabRRm#4~A9pI#*^qd*jYrL3u>Lq5=Phb_q;|`4D`bWuKhqJ6 zExsyXiQ9^rl<}(%lybvKZc^;q@omzx^kuVc1)@Y&ZX!&hdMf6y)x_n}cDXdaE!#}{ z>JD``e50#J&?sA2P{Uh!t%>>iI8 zs!3ZH*Isvf5rnV64SmyowU~L8kuhDERWyxOuXP)n6RgEMM=OQn3YQ46DAk^AdH8ic zalA>*;bBw2BsSpq{7tx5K=sxa4o1*Obs*7$>lyDJCi0r8zCpfC-{E{hP#N3QAta%#r}es1Fa&;%Oyck`|%a_jjScQv0mwd4jPwu$RaGbN`Y zIl(OutJ>fPpn6uF&uq>Xa>KYWhZuaL4#p^Md+52KBzAkE<1KglP6Tce$z_#sGHHfT zUV7Fh)EV|#qsdTpw4NkF9jqhdqdgK@Gf1?S**4GwT(uJ73Sfp>Hq28i=%TBOKEaxw zEk6i@8xDXMgu|8;7U=0|w6y~OO+d20vfbexfbSZSi6XQ;ltHw!d$3kw+iNI$E7Pxdq;hifR0EJ%CfF&fM#sSCx=Y$I)AWfiwm`7j$9P276!vf$G zpmM`O7c7z1L||6a)sluw&}V5ktS`hEa0UoGbh(6dH_5fGFjLYakO&QS<@gxnp&s+#VUr5W0^{Xu3h1S(PZv5{NIOHH0&Qd2ASDm* ztFO#(D#pRnV_^mWwvh#B1mKiuJrcK$ zf2b$`xIG=81&!y@#I4v$HO!a;69*!m6J7zfKx&JnIfe}ZfB_tq!$U_248;lw}x&{(dHPpVgGqWN%0dZrD!Q;wJSjdB(%C|58P3boU3D{1xwx0jk=c{_BbV^yCWLPJA<0 z{@RTj*ET5YKYiqX^!fbS(nh_>Y|2YGtK8L&!h?1Ps824<(Em3@%GFKs_5?D;Ch4a)xN{s}b3k-nyLw7D* zJU4Je99|gz%`g7VKYg!`vKwFDNJ;pE&f@6e^H2P!*7*|lHA^Kh#z@6(AOWQVfOYYY z|BHY4-~F4PO(x4Xck%VbNtU%*DN#gK{)Up>BWF}{xyl|X))Poh!+p;43|W(eSFP*;Qsbc zCiB!FzytWb-;w{f|GU4nO;&8a zkrHO{tLB-eRl0X(DU@=KlT?)JGxO|JRCC~^jW|e`f-0+jb3u1G)v#z?sb4mW7!eHxi z&Uvf!J2(zp7?1wN>NKsKvXSchpEUGKJN?-Env+ z^a9T%q^3MKgt7lE!t#5HF&ub8LkxHii9a>N&n<51;}JrrNe19mj~_)77l;C`zdJL}gO z6!J9xRF5?(wQib;&mF%TWZuhVyh~vuNP3mo6NRlzDP%C6$Z-in1I^U4X+MKJEh}JNMrd@f{kmHp3hahdm%!o6WRiLGty6|Nz6(>lqi5+ zK%|CR0?CalMkYuu40Q$gvJViWF{57q1i7N-bto)@VdE81AxlcwRi3i1Pu>9+q>P6* zl1>^x0Xla&kcWNAWI2$b9F%~%*MteTC46~;LwE}0fT7labMoBQ6!jn~*Uq)iBMu7* zC{RQN$jGFS9Hed#djL5;3aKRr5*zY9Ex26D~M6fBQ_(218fDwd`{VWC(R|qzc(ODHoLi`@&XE5?7CdM!=Eu9w` zGnFm$p`0Xe-NYf!uCjFxRuK8Im|T?L!fGO``V{0oP@)eI#Bop%h7}-@l)?u9SL6(F z$sVP00|Foq~BL&pPerv?z#OhuRlv~b@$u% z){yV9N!;7gSb>C7(t~N0aHAbw)aQw?!hj5k$Zr4XpO`;;jI0zT0-`nR1Q)jR-cfw{ zeDM72sa9KEZ7eS4juU4}qme{}OFq;^e6+4$G#FXZne-Sszzaoc5r?8xoXVjXR2 ztHdU~vrvds>&Upc_&@#g|LXt#lkh2VN0NS1`ZD9(PB-4U`OF6E|MP$Ohfhpz)%8Ar zgz&{1oHw7z{Gj&f+KUZIzz2KuIGpAi!eaJ98V#-mZg9iWy)DLIjrI^!&em` z1Y;Wj_FVu)n5L>>03qrQ6I%9?dGxX{Gp7ts2{)EYwcD3F4(-K-(qH|(#=Gggjgnp3 z1R;mgd4|Q*WoYg}b7~Bi^EurgttO@pj@9~?|LC9p(RlSkefraJuGeJ{{Wgyq;`mCX zoYTlIxsz6MO0d7oXN2POc|1)P{r)g`q7611O8j>2c9f;WYz@wDaeb?G@aehldn*_j zZ6^diTrI_DrLp-^eaf-L-faE)hJ8$=ML%7D7Zt>!rVkM)?!<%Q6*?SFaP8?mkLA>aJwzxUl$N>i|n zPjuUFgzGgUO5XNdJ&umMb`?J@=xovj-|mr9V324#^3oa%`bhGQLxhlG=!7~6yGdu( zz0X5^v+#8h(pj9XgcG6`?|<$+;=Pr{`VNc1J5Bir<@nXfqEqOrQL|^_+K;vxEiVrg zZHH&8$bh9aHbkJ(Vidxfv`wKfLYT&0ECK-dK`T@9u2}@8;ZvX!bvfd)~j8t%Gw8({-5?&anQ|FRgI0rIKfqzqN)H%m^fL*N$nHFnJ*ZlzW z`BYDf#&T17QK3#N7y130d`CD=QlG^4k@09FZ|k`c1-BkxIT!!8ZZC_NWp3D}4;y@| zW-pRPFD~s95#!U4m1yHqoNeAq(o=zRpCVYnlN8=YbphU+dM%r2=hv9IH9CC}YWN_Z zwxLIquTX?!RGY*P!?Sz$TI*zHXmy{|^;f9BHp8}3Yb)~dcnQ$}vo~PqaXUY8HDbEC zm<8E%(@%6OX?YwqpW&OzLy269*)#NWeKANRS}PxC?zuOEN3aApnGSQ3qo#8@>|e%c z*E(x^NNI$nXUMBWZ~nrsfJ&4E%yFbDYB;Is$coFQ0Y!AcKCIw7mYK+Utr23sAEUFC z1Yil{s7#a(7#u*tT9mGxZcaW#$%9Ka_NXtqS0-4cZa$b8lLa6N{+UO)i~P}s5}1Vhh)83Tn*j>TZs;1!_dglI zk_cw90M|5J5E-~sS1?SsW?4bMfx1%`3X&i*i_ST7`m?2ZR8+EFy=c+XN{y0joAzlX zF?}X&Wp3N5Us)?uu;lhAZFpT6pk4|pLYcqvz``Jqy1*vLc?ojG;tlKBkWAT=ffU0$ zONI&RA6HksU%o$1J~HwIZ73Vbzxti+in12u~mi~ z)4bLFN$;lIpplEf?IkUDmBgWts0Q39{w$2A3RsKaPtUI&{fUcyxALFd`~L5I zH!*tCemqgvziSE`XmrD$>F;iTd;K<>`mOK3ODBA3(TQ>exyAQO;_s-rn@I6x^8U3} zM2cS*Y(~cCn0SqmfzK?dlrQtG;{N>HKgfHladO=@+B5nBgX-S2GhL2{mtP;vX>8q! z8L%G5kVbWINKHUt=6Ymz_-{+dv1wmt67o~cv{L{Ococ-va@#wLEb|=n@HDksq4-+| zWR#77Sy%6W`OXW;3P?%ZN4~%R;KAf1Cz(!rwU{3{685#~S6_qk>SReT==Rv3sk$O^ zoUq6)`HeaBp1_HHz` z?`$1rwk@LoA!xD(aJxe|)z0Sqs=^3d@YW%2@tsY z8U_SX95MEl2U5}18iD<(;7WgCIz~Y0R8E382^L8XhMymqoXL zYTvrQS0*=JixTmHdOh-yj%~t7f7TI1Q`E$$AC8X=k#&%Sa8tciCfkpUk!jl6b@A*S z+g=RM*oD0v^ukS@D_;&4w}ArJ1GxanvbZ5thz`>W7&MjrR69+lAukH8k8mYd6vT=X zk{#r>jerZV?P z#y7b{3ep9Ml;za(mCfb08zNW7^(DJ}KHsS%Z>^<$H=<=*9!gssPy>Y+qxmtlJx(r0*i?rU2UKTot5nZH z;{vu7u`nhfk3+i+7Y<&L&mj$qo^JaP!I36p=9;q+xd3iD2kJW`SkD9sK#@$R zl0czgq~ygqvI0vvPgRrA6i!l%%=eD~=6ncCvNQqQw*XiafZoD0^M%LE@fK7SR8lek zSQLSN3mTX{!jv-~5(cYgx2FK-eGQ~}R05fkYb5C)&>zdWK(#dUFdOY@6Dnf>&`V}m z^&wH=7AYJdrw#-e$7v1H<~k~t3Q*yo2Nk(svLHMH;uV0AmYuk#XFzaMF$!w%GGvY~ z(8k#yala(wuHO`~<4!j1R8F}$eeW_!OPARC6ge0hgg8W;Wd1KC>y#mjX7Pel4>jmU zJh@~$PjGw==oB;?j#iTWar$DW_o6U?KpMT8$C~refpJe--~Wc(sB*>f(8uS6RTF~3 zXPq;{JEoyvKq`qW+0^4g0ho%O-OSAFPMbd={Db_W7_>Y#wK7jWiN>caq9zKDN_WyX zhOD3ISae-&@~U5c?a3{ORu@16cig5J)s^f8Bhoj)eK~(m?Vn#Bc-P2jYq|i42MYco z{e+jVht5S;oi3+)zqZ5Ozw?`oHlMHL9P%d9)S-LN5M#6l zD^oh}d(fOyTZV|iA~d#EqZg&;(Z1^<&w~u>eB~B^3?ih{fXRV&xWDYzfuA-&ECO?F4_PdU%YAGgW15eKLd#6Y z1IEEtwWsaaj6z(N31ihV&a!kZFbB1C%mGqMB}k8W9<}hgmz3SFGN_^^_VK*1Gcl!s zgQH)REVSu_PE;_+s)&hH?4I#EWBwvo)RrfNUpcN%A~&0`uCWf|*%YD~`Qk;xXm0JIohQf^habhAZrh^Y$C9FY|ZF(B}%g%iKGQAeIDW&oGY|`iOZrUz8$+37Y-v5;8Cg^Mf8Rv{r z6)}$%X-_sEY*{WGTewr^4)|Gjl)apYSI0wt_0&kLpA?EPdm$fOXRD-=QJrDZnkG{z zMry6p!l;+?XIFEY+)Axy+=_}Qt7o<09W^JEM|hKOA)UBB&4^a0G~2^;n9@jW!i2-%cgc2FxAj&>Z! zLgicu2v+5-IpLB+Qt-aDGdgK}c3mCDqv;NM@&MmRZeR>^-8M12tkFm^#Vw1#@yhv% zqc&3ZBx_1zlnMK`p=-(UlOEz*r)F|d<{-B$EQUe9o<aU|QV!kj74zBVAjjZ1@( ztHt#zht(FG_h1te<13<4>{;6y-Rr8NJH|X6UAlY{M%oqB2(+^^yzoav(3$BksbD~} z8PRo+F{{KW9l3bidXMjccM1EY$&d9ZWS z*dh+rVxTc%0MHK8NWy;+?p4*&E(|$N%A$5kn{xDohyMOXgtq3d7@KnkXD)2TlZLaR zbkxh1#dsjEEDTyp2$DsQT%Xh+H!mg6`jC!n`6@Y4mll#aZ+=cKUt$cr(oFcna=4{>0*@Bj|3b1)780)O>BU@M5OflY(kr7WNVSxbO4 zLge1qffewEZZ1GGfZ=CQgHdKP0|{g{{L&f+SLGrcrhNyU2`ma{0uX7xz|ZNcn9JppL5VOZrLmB{)M(?*9p z1(&t{3~Yk57BDm++cD%T*#J5MvPKNZh2I4WK=CC&S&SQAB_9G?xEQ25CSAI^Iq)TQ z6%j|l<4lC_ki~-ssY241(bVx{br*qd5H%)Es#mxK6Z_G1_^a&kGGd&L2~Go%Bo90e zsaeboGp_dOB@6l8?nMzWY|7e$oZTO73?$Uu+zwAo?b%gn{3Kny^VL%N8zd|x9qsTi zBWCHDj00|nGrioYk>^SN9M=^AV9Ot$fJ1PwD(9JmlQI%@4{2%j>aZ|AS&cUKl%#59NbI_Gc&`pmPN=Nh?@Xm_Jy)$oRP$VUl0 zcc&m2`rEDY&1UdT)0(ErqOenqy94zza(fpdg+k$N7_e;8Q^kDU~G~s0s$2W z652>)02A8h6GxxM*Gtm3ptJhS96-D%K}zDj)}&@1edxEkuV6y4J5iB(KDeCyqO?|+ zy=Y1AKDjcUKb>W+Z=g^erTVQ81=eK}?D=Vn>|E9MxP-`lrFN5M_E&hKRcU|R`SHG) zmDf!nRZ#IK4w9L$tRdvM^Ms>Ntpv0Nq!)Y+ClRzj>4Cl+t(Ps6mGOmVA%wPAboJ4R zF`XrI>hu{=604M&BoA?6!DzJTmqp>G$daYSo}9ji3Hva=x`gOv#LSrSQO_U5`Gey; zTo>7+IL4Noyx{7vGBCZ>e~UK@DC8F)>d9DT^cg%A!Gi8=!X?>t~)4nlZ;>H)FxxXBb);G-FTPNO8T99+Ktk>cCTTcO1Bt`UP z@ty84ePg9f7aiQmzzr(%~2xcKNx35O`_V~}~41)~J$64-OO z5L^|2qqckOxuwpk_J!|}tMp;HaJH02OS2fz@|{3zsucL4H87=qh7TiRiL(fyU%$W> z@(wQ7Xtb1iwN=6;$H@Qyyn3mw6LA*E1tg%^vX$l7k`W^|hN$r%Tcc(kn!hH`b{D77 zD8(HP*m404OPPvO@=xb3bjiuZx)#lXbD!k~k^tLQ)eGaLndHl0WwBDmaD45l^UD!&z(&%WnzJ}sOT}u7Xnq9ELZlBt~{#X z_m0%&V$__qT3^PD^+c8+Hp3!gg&~auEMKye)@<7gD9s5ry-Xa%IZ~aV!9@nYS1QLd zHul0|u^m#`iF8n{td(YZcl$NnlWy3vOtSRk=at@IF%lt}Z%q=+=R03Tx=YR)-f|^! zpVJ7Wk+Omv+9+2%-e#4B=oJ;eG3>HDx4)Vu>=ZQ)Qdh=hbdEeJ-5}RdRS?zqlTSYN zu4`Gs%I^0rU+BHJoL*BW#+kgQY={}Rni=QGyD~GSaTwo%>#~@6GVD`RuIoJ)y@~@V zL}y zqRTgMIk+sI9KCwnz zEpvDUv1Sy6Rn#Tt6;_A^>UK$0>sYo9a3`r)iB%4Aq&Ads+7`C~a(ORQK@1wy9hiet zdTnlDP9hk|E5XBHAtR-*gk7D7Wf&ta=|B!Vpb?;|8m)(Hh~%6m3lWR{29txN12!q< zSios=2IqO06L4q4A}u})%NeL2msb^7spB#Ti(diMhMo<) zH_+d`o&$J}IQ*Ou%!4%^g#-X4a*8XG;0r>LfQ?{?Tpca^BIp1#tl$P{Lccfas4kxK z(HMgH5CJeQDw9CyOOV_LQ~?wqoYQeKXZSoQGc19YEjZaPZ96D?84`H{GUp6K-GFHO zq+7E9bbwkf%=$%3GS_un^okURlpqnfAo96=A{_(FEr_RtN4P*P=FC(WKLH=`2(t|q z+bBWZg7Ld6ndnH7d9oG1nZ+xeB)mxnwt*hD96BAmtd6TYD~`Ba1~#V<($=;qk{d!n zSbM%Z9{(DUi4L&AOX!oc8e)la)&-LlH*D}ySvgzE2|+k0#u*!+lH`m+M{8{$oA}!1 zwrU#yg3tNm;$kvNJCO~$jJxq?ZUf!ape%N21AvdE9iZt~Ad4VhNpybfUFwVOpZ&u8 z=(#bo<}TiRW-deqmzlI3QLRBIvx|0;daA;)LE>)ojqvp*FEyhn+E-Nv`&G{S!ba=% z5RZ}U+xfXWfl_}ACCxZaJ4pvvYM=p(bVz%FIRoIXRqYJ!KLi1I9cEJv*sQX~T<-7w z&YQ>!Wme}F0KMK3aYf#`|0Rp`VXw#nM!Nv63cvj5^ndx!pZ?%a`#&3)`Rw%m(x)N8 zNZHxp`vs-5ck~_^lCr}8FW#BmsulV_z8L=7S3lB*i{sN<|LA|79}Ison#qOf7jw(} z9?@ow4QW*!mI};PHejZzA?YuBIj_3tFp^Ht__=q@o&DiaR{;^ZH8_GU%lOt5fl>`r z=`fpcixLda$uogrApjE~W?qS30^J?QyPGu_UeD%C0O%^W_ca&$vS^9dO@$Jh$8ER3#{6;ELvON7F^A&Cr9aWU@&nX{=5X=Z zTeI$`!%{IrET$DA${2;OGJ?l^bMM>t(+}&bd>E#r@brOvg{d=-@96@ zOm;ViKVZwIm$iANAZ;!^TmN#LeAiydBP0zMzu~>J=;Z6A#tf^H+x`lBSsB&7CuiQCatG65OvZ_ZMKD>0Eye|hAMMW_(v?v6ONHw?`#sUQ45 zp8xpcSI8ISCb5%>H4^Y0t2=Z0rJ#B%UL@0py@8x{f0iKt}1XUCY2WrswWBC|rJ7GRan)C(tyzw*k{z zsa1(Q8{%i%6`THynpI#x#CqO)+R;0`<&(Z~Hdrc&G+n4CG=#o7 z??5k)FrboWc_P(-d2emvb3D5F@nFsh9&w%7x<7R5-mO4MOqV}eb4V=~d7qk zSNBf0l7&REEDtFR9c!tUGYaZnMukKhX&puGq}g9~>8Y!8FYUfVjfDO7iiHSrOl>AG zGyN>olNg0%7i+reu$gUkA>xi*4m4c0C&iN7am)RHkm&(`t<$eC+3x8b69(km!`tzVvXghAWQ|6f{ zaQvBfEm(2F3}=92YBPJ8mM{lRc~+-KJ=CdE{;h029}cYuDNfM!l2$uZZ@x@k%jIF5 z1^2SD94>9230pgY=Us-;Zm)+Iid2eN?33j{P$t*5AwAjT<{lVKUZ2-=xc52EYgbb^)YiRFy)V^c$Bp!E`ASMpFS z!zWTftHnMT8q3*&{S1f*&W{q!9+;=< zRWybal;M%sjf#OS7+{UT2+SG{@EM4_$GGdGQFW-1?kMR;lWuIzkjrIkEo&RuA@B|# zEMN>BTnoWKTdri4(B}c337Ae!fZ_3;JDmgoKm)jd zxyz%%9jE|0pi&`KGFErMgzSdF@Ky{5r8r?G+b^eU%}NRi5XW^+r7i2jA59lyjY(1L zO5Y0d%UNmz!ZZd2>7-fah(s~F$}OCICW`-Dw^nkf4ocnzWG28?*XRUHr>L8>T!27Zr1ImiaT?mB32R`C)%odF=(e%ynJG^S91LGDXyU;7%$fCyA;k0t;p zm;T^07esJG8}mW*aSe-&jg3Y;$=!`_y!}@aCp*>x>7UqIN!wdxH+rXb{MC}+!P_FTzHw03%ekXN*FuzF6$D?8D^J-JKy)9A(7(omQ z19N@``PGYeymw^ESnM9?L>9}$b}+;L_-8Kt<41q6`s9>7>JUv*7B~Bu+GiB9{NemZ z@p<<>-hF;(lHo;pjqh|P5r&6X(7`w?6 z`q!Gz;53?7M zMG7)DgX^|&l}Li`(?v-WJF%8r;#UJ7ii`gB?rNA!S_LkbTFj1#^6fqA)kjKEgnQz{ z*PQa~D`3OX=O4Ln{wF_ODxW@k{_`KV-~T5c{^Ha*Ib#28ddWielE3&;Ru6DIzvf1M zDltF{^jvfa-hgb_@K-s%pm}3x2G~cRkp(nw_!t0;Qd{UOLMQq-b}VX4V%ghZpFkar z_w?;i8QfK%yik#qpdHT?gcK4{sbq({gfujsO{2Su?A{8!Z~)phX@%nH_3@X{<RU}mT_I0?u>~rDxDx20` zjQg1I5!OLVd_&oameq82JiogNHZi+j!cf0XHt^~PWpZ+*Z^W}#^~9~Tm)t&fS4SE9 z1>kN^`)~f(`Q20QIk7no?|kYn8HHpz21BtOQgJe==I1y3j!3cbq)2qLaZd z6)SUF8Lh3ztIl0*IgQpialE|z#WgkvWZ%0-2pV=V+lTu*wRuDy{>Z!=(p92%{)o>VC&yK|%znnw^+;k$8xhV~~X|*Ixejc(V$DuK?vd;e6 z%*-jBp+&LPRa4HoUn->Z7LdIUTjlOj(TZMHE+w9Fp6g93>BZhij6YY%p=+@_p*J$} zQ|=m)7q38(@n}jevZFV-v>Zy~b0n9lC1wmwGu}1-DP0t2!=k6i1{!!ZEh|= zS${&Iau$1ml^NkW&;N&P>4W8fR|V$IgS>|M?qGw;vR@_7B*5*NNM=tmOL3y(82Os(PZJf!g6w%3wa@XftrFv2-+4Ghj>JdG z;95nnwPBD@g5pzBMqpqmTOMF?r^8bCwHsAgjA8@77LBscvg!O6lg-N_vp!ec$w9$7 zW{IgRiiOVv$!A29)8Oi*xRik!7E79OXEyDZp$ib6FO)2k#8a{fm)az#fs-do4(tkS z$Fu2;C5>8@J%GwYEhG|7er13-et0gh=ZRz#8G0TZL3XKm@Q%9}6Tu503P5BHx0FEb zWJBdjtMw#`7m~=F0opV)aYaUTt!Q~$VL4zcohGj0^P7N}v<=WXgLj2g1le&Bpt8fM zFaly#HbCV2EF!B2j6kAuxeXs|AF2TAI$*uegTpIewvW>DIn%#+$?%7iEwaf;K9&cXIxrO0xU&?6Twa~ga=^c5Kr*| z03wAocGEE4&dAHmfR!1Qg9SBMageehG<{wwiIEbGdJ>yTEkHqahVZo<5TF3P06>za zeGYj5fC{ryz|p5T04$-Kb8a3PaCjA#x^51_S$#f|;8~mRf|T@AON1mz>R!cfFTh0$ z!^`o+9y+bsUkl}eT&_3Qi*Hdl2-f8)>wBZdRlFUUpO$L=lzyoe*bnLP9h%@$p;1fl z!3=T$8uWD7$3;qEZCK>y6Z2^2&j!xZ)`BpSP;ypH(MnqA~C=*rh0mTGNpfqK2i12kvpa}?ILUqG&Xvkx!@EejgJg7v_VBMe%Q z;l`jsDukCQf?@x)G{D<0Ug)uwkJ92Y;^rqBl+*<*aXbCt%OCyZUuoa{Tcy3P@7)>f z6{`k7z~DLW^>Uj3%H7}odTsB^zx!8(*T2pFTJt|dcfb6#uig3m2Y)Ya&|fcay#4p& z#@_bdCBO5|y_}3M%p;o5F!@+pjF+RW+@!09ywqnlRd0aND*IF#{b*6<>GMUa1#FGA zEJ$qwY+q`rI%pzrb1FXp2vSB(={Z^GLgKYJb}fyBc?~u&s@I0)1^fml5&+A*oq0H^ zo&yl81hbW%k<;OmB)0~RlD;cdGl+ABeeg$r(0QKsx4p&GVbP-Zlb=l&v^Kpl@Y{3q z*u%F<&g*;jSGT|Zm%sO&z0$kicyRNtU^~B&tlax5``S*T&EQ37ysUT80GJU?~7U%D+K&8J8x`mH>BPOs)ZSTPVavVVb9pl6b&p`Y~hlT*YSR z*Xv-ZGMQAU0oEQ_UZ?ukf3gqBMCM7I~ z{G>SyU%k#Rk;hZzI_I~O(=$XOCNm~K9U+%dxzEw4phtGOL19Y$SLPip13aGZd|+5+ zsgJ%G?Y}Vyc3^!me58nN+@8+^!>Mu)zArZ9HtA*z^ass2P=k65i^3nx(5%mNZZRg4b z!~~@cV-ug;0?#9&%lVun8g*e!ukz-#`qUhRP9_?q2cuFtwbfM>6??M?!Hkit%4w%; z#O{_U2?ORQ6R&w0xhYEP-8!H7UhaQpe2@|qhy(VMJV+#e1HJI%BrHMCB(8PHjOPXi zaRTxP3aeDnHb6uOThfs?LayL+R7aKC!R>7q9iADDE8%gCOEs)Si%)v9%Xa^!A#(G7VB|lZ)%%<&v zFHggJ9Y|bA``66vadlZX6C1!c!jj>xi4GlVXwYI`FgrfZqB?PYXUb2pg{7Iz7@vPm ztRr7LEngVP;rh;5ykYt5bnbWZBNaSXu)_yfDjyKShnF7tM4ho#%pxpy4(x76@$(l+ zpIlQ&64keMUoDN-pH7r<)Rsal&>c-;*-P`t_|V zq?pN`oeNCc#L?=+x?|4E563~!ok|J1m0MRnoxv@-uQ(USVztqC>8;g;DsIEg^srd< zGJCIPt*P>dnYtnnt(c3eASZ{!Aib62UgGhhkw#aLox#PnpljiPbu?rpg?5UB>WCed zVSJ|6NXOCRaD~kvG$EImxrODTaR3upO-9kC(LpJm(1Vnc`s8}?ERsaBS334eI(-aF}@KB}+nA7Gm z0mGeZDo+E0g20+f3jwPR6Qj-HhL;4qEZR|oVF)Gz?*YI`c>s`>7hM1nNHm5*1y-Se z1C7+wG{7A0p1X?HBc8cS0I*oI)FwePBF|ed!bx#rfz=ge5M%)J9~(j!Z1rTJ0E{FY z0L)S1@mRGkw?9ex$z|!-0pBJ~tCTcHOR`_8As*OMG$GczHeaD>GxS98q>Y#XFI61n zkAp>?%eda?1QbTKP4qQG;!YgQK5!Klij3ZFhmQp9NdOYY_v{6RR+Oa(Xr+jwfcEeAk$B z2Y6Tl0D{wXn6cL-!8-9=V||V~QhsZFc0|5w0s&*7 zWv#L^;w3Ii6RI-)^pAh^Uyd&>9GEwxNGwP&`UV3T z`%0+ycLN`TlGix0s|^11VAR)fG{dQsM?-ptdk*LS`ri&`*Vitb%U@4C*x&cTeA;wpGE0v~-m2zqR(xZxx97cQ^0ed+^q- z@{VxNuKnfgx3RFW@2`HVNfvikTiqj_SFE^NWI@dCSp<%4&GMNb)eRTZ)9bTW4 zA)EQTfAGKh_y6@j`tN`K|NifP@Z{(J^v^!~=>PTd(Vrc3-}~k4=YROK5B~GN{qKGG zJDH8&{@Uh^_O~WC2ID-d8}`|ga#@}y64e-bCo;?@7x^Au%_T!_%Z3<$YQyxE318Ts zx7z;O-DrqGd2H+s-SW?1x(ZSh8u|Lgrj+`isK*Yz!M<5vxp%^OA;XXM z3;S;t{@#4Uug|`6o?m>c^fBUQOKWMAj+w+WRmrZ zviEBgb5_NX4Le3}=1}}be(`d1)U@6h#YYHFx!ifrs1nz4gqN?Xz=?K2 zRZD?8W_78-Ee2fqLH}*t(IfxGYP81Ig{i)0Upy*rnfk_wHQKDi9phQLAoWIDad5o2 zb+m;d5x9lb1>OFZ$Edv{rUW?BEZkt5WbW1dZ1v1t&{W@=T3%UF_uIHv=S}#>3j&b#pJ-QFBZ0s;Rs$JisvmTa z+Yz)AYor6~++{+qo0%=g3Z{Thkr|^SY}t9sV#NVZ*{{g;1|3s|pBtaIb*vsI1CG@_ zNK}~>0`j0u+7F}X6M9v|XggZu;@trZE2ayOEv8}9ba`;>G%qmEE!kDb#oq~Z)iGX;{*t$Xr{90 zIS6Uj0SsRNjsQ&HUO_#I$wHN3T^Lhpe#$4r0_;M?bFa%(ZUeY02?CeO)PohwXiN4a zjssu-R)?uZ6=JUxW`K3OfNxU(+I<+JbS1xkCj@~0y_(uMd zi2yr5#J@5>2u6r$Fnm1pbDF%cN?Xa}`X=+_UyUCBy8SH}g1!diNHAD-RQaM#lrJF` zsK|gJ?U$-jazZiGv9VJzw7QKrmw~_|yeYuiXZB*-F}2l`AG~V*Zgrt#W=}?w=YR5F zPG0<0p2TjxiIKR40Chgl*o{OP`fY$Z#WG{3AsBmMb zOPRo`2$le_i8<+nu8?cN7^3_pKLIFcWt>psXl-IBw*O-C`Mb1PqNrt$zI&8?Cf`-x zfO2N2xTyKwfBU(-^XpttZ@+}(4L?kYsiW!b*O|Zr@FC!Vav;+F@Z;#`Fx~84bbs=5 z(@0(M+SQMRoEooGT6uNNoc(7P9|{s?>wKd~{PD^yvL&-MyD2tXVmCLe4#?Kf_v>ab z=BxE5beyZZ(ywl7Td&6F%PIWb<>-L5h^SYL?G7M?$gj=J>klL^Dr(JcEi;inVmN7E=j<8gKmS?2-J|fr? zW+SOm8JS<4X!9L;QGNV+es{FW5c8L>%=Fz_r$>_oZTbO;m!jrN5H_6Jv)VxwUtf4Q zM%v!FOeF}W-S$?|@ZswPK%?}d34pi9-C}TwTCKVW4ztZ%!yfPnukn22z2BM2SFj!f zUJfhz+gsWo2p6B6U&R{%<5$X8xmlXH7M@5>@oaR>)Nhmu<9u?sXYc;8z%VXaIvZd2 zh=X+4I3as9DIToeeaQRI(M-tvi{sTEnCt)U_x~LDzs6eL(8AXf(Fzn@TRY9d76@8+ATvG2&qc_ur|g&RviV7=o(zOr%JQdmqYTUY0e431E1>hYvl3Lx$Uiyp zzll|x9vU>Uvy+-*%gP&Ee7yagP4r6xwaKc|EUN9y{fjKgQC5k~5 zB>`p$c)aqVSDRqV+=81VtrBYH`x6z(>BvOt(K^CIb`FbsDRkJ!;U>+GG!tDP1a`t| z-VRpgJ8O%8LGJl?Q{>cp(eV4vHbiInrB{inifp{&ZK{*NMNiU5FU5$WFh?w7VPRi* zfr~N2rSD4C;Kjz{0p-jclh3gTQW(fc>Xh$&?61GCB`Q9$jFV*N=5t;DjlIp8| zDv(aCwUJ4D*6c9}!r3rWvt_dP-unT)Iy>G5qcSF_jgdrl!$e;UF3u2hx-6thl#;$C zxIbjj+qtaHw<~)^RKfA-w=T-V6wjAhA9zjU+ADeeC3TH3v*Ja$f~*J^^S9=5TI z2|9a4_gJ!ma+nFFJ3`9KZC0g%81=R?dr6(cG^-Jw5GAT|DTf6uMa~Gn!Ym1-vqcI& zg{c$<9BGYEZJ-NH8#R4$rz7kWiL7wDf2yoKl{fUcxLOdS-0th9S#jsHx;NG$jaJav zn2zV|mxT4g2$t;8r6EMGwClC8DBVpRz2FM(61Z;@7r;z3nnmXah54V@@??@pdy#vr zao6M`uykB>kiF%pIkGk=d@F^u?UJhM)e^?0w$m$K?CQ+vwNj|-F%(qkw9m5uV_%M` z(Lvpa7pQp~N)0i=LcvlXiu7qwWb%`lrE+5+NS01nKmyFMQ(oQ$VPbCHfoZLisz3*w zH&>A00O#3&K$=B+9K*<_Nga}}07nQD2LSvgN>?icV|S4(5I~38j}=PBnF7U_0Q(Gy zPyq1ka{xptC4mWL4j^kd`0G|tMZt#D1T0DdM7Nt^5UXMuw1{_9Uu_bxTL(mUrUlWK z5_{2meUDi9JecSq2{HIq3X}kQ1%q$^0Nxm2D+F|7be;^beykY)0DFQ(!kh<3gCb?_ z7#6*1hWN#=2#6v@yz-mJP$dYP0RyuLR7`49qGXaL%Ovc49&nWMUjQiKGNC{d=pnsl zPKXv9D-d5M91hhna5VtMM`Iwb0Fq#AP!vG27A8w9o`nJlAgso`2$%>_3WrvaP`U+bA0!!yAaPxgeA0fR^ZbBWLJ*PnzUj`fj}T<6 z5vi1{d^$S%=+ld5@AuyyTu&Eg*azeI`o|ykKjhdb@B{`?+|3OqFr90Ni=!CMX*7d& zPqXG^*;gm+!#b?=qfh&@MS&qS(bAO8M5O*mm>B7oJ7z_A`rP=r`dL5&j9>MC_Ot)v z|Nf^x_!BoDt~N-eU)f&jF{Pg0;4&tYqKyISYf)YT#@fo_J zu-M7*($H(tH;#)ke5q;Gp8b!hj~-RK)y}Z>pZ?%q{IjR$Yv24^f9Ln>iG3jIg;M#O zqX>)#q8%GVY(^S*FLj6w^|_(5nXA#$_x{PtKl<^%{b&Ev{2%|Bb#>Lo>tElEHoc|g zb#8TR?_}ufhe}mwvi*6_pIJZn#j!Z>CUHTkF}vCQ)Nmj5rw+md>4)D> zrt?s|Sxf{aeQ5#@6zWl8*0nr#rDxZw*`3w6`n4S~N6paA;tFhWi$oI1{K!`f#~oGg zWwNu%+q2cjWcs)w! z#|hakbH?O8XT239O8%|R=O4eXJY9eJ%J0AYfjv08II@2-?G69HTl-;s-3gby!sCr= zA!2W8#~awo(bHcWo=`rEvulltK!i%Re=%^eI)7R4mpJbhq$T@YJO_d|3 zmhS7FALVAVhv|;U? z+*+y|G|iu%X)HczH}0N-!+K$LUfNu1hk#RwG<9?TQ{GHS4I>QVb|rlg_wMF6eJk@Z zQENrq0J-0LTK~$DqSA<2rtKT^T$J9|SCcbib(QiC&)?;z`m%~+)m)f3Ew6jg zlEcwf#!0dK71q!60(vk>k@KS9CT{4|J){1$Swp`QP)R-xq7jo9Kc@dw!?X+3aTcw^ zes8_~8QZ^QuP!pDmEvgR+`Xu^j;9y;b~o9+cW)GJSxJr^u18G^TI-h)y%=v6L;v(~ zyDiIn(6lsrmI^irOFqPs_m_bufN2S}7{-+hF_rVEhaSCwz6Ij_bP<)C*f(Elfu-s@ zIZXeYqX)N0IRLlL_Hhfvl;6f!GB+~G-jck=>K$QwLPP8TUB3}?Y>yF?m-5@i8CJ<( z{x}Tmz+D~&qCl@#b}=i;2U!Kl>3Vk#{sPRa!gqL{pUm?fWR}nm(bcqgT}QXp;R;9b zbf1QN=)zKliV5Rk1l#ZujyEk%Sy(Ys8|(4wYFhFj0N}JKatznt=2J1l_#FiNhp^GQ zSSZjBdr*M6q=?aa!tQ$P`u^q(lJt1!fx@7}<^jgpVtr3)l;lgaQx# z3c}d}0O$~bl}98#8-O?sX(Ss3%>z2Pr&>vLF%vOiMhzz08m)40BP_%$2L^K+AJ*t4V!E{?6&nx9s^O@1?cA( z7BW>~qS!+Knv!D`qbrOe(X?DY-@a- z2VyPk5J0YGzyTmw0AL0IT)_;c`M7#!gB>JH7cdqAVQHQq+&9`?NWs-%0_>SbP;rwN7x)POJD7oMvbMfioP2|NwC%bd2U_rcC zsmmYU2kq1hBIU})xLZHH9KO49)DF|JK9#aNI;E{$z5tJU^-ur&gPHR|90C>mjoRON z_XcEHVIk7%^XY;~RjUAdk%13wl<>`mA|5A{DA@s^R5*?Y#v6vL(E$x$_$&XX|NP0n zX)T9N*8R=b>HIFD9b8$)jcGg7ntvt2kVo`lDC>>M!1(jm0pXX7BcQza)ltpjycgXqd)d z#b&K>>q&_P?*so8Q--6-amq)peax<9SPi+hFh*^qx9*^2Ch(UP7`Y;ZtOjY^==WX$ zXlZz7MU+wBd;ru01)35#mki(>#;S$`n^jyQ{fZ2PqJCOQA~!UAY5~dIru*6#C&Xm& z5;6;Ew36sm*!k5e$K0NhjZq)>qsd~UU>D5 zk?@3l`q|0kynp*@KDMWPc=9s8)QlPWuh{tIi$~atvjhJZ?&|o$2#>MTqI8gh!Tq|- zPF8~b7>YXo!SwYhlL8!i`UnP+Bl1`12AU1{4T@V-(8Lj(@6H!^qVOpUo(?kp+IsZS z(<$sx!~S(t`&A?+q$|I5Nq|TumRnGrgu6P++>FFFyI-g}*+R^+$~}y(psy}OHO^g3 zZq5-IrpJWd+g0edEr5gg0!;xL`B_rWNIo~tX%L{%aMiSzp-6jT7LL1Nr1Pi?=LCd{ zYu!yHq7hmhLeV1SEOk&CwFDc9YHTNaFyWBFW5-ukjpRy?Jxn17_qNXb{Sb3nY~mF6 zW9sMD=e4O{5*@*f4l9*a^5kq&&as_F^+xJEZR# zq|_DJD6UYWriMm!q39&`SF6(IIG#_cmrIv8>ReNymUZiQ7IU{AIHaiO*lTmN+WZw~ z?g=B+X^GtN^B25g;h&%*s~lVn^6deVUX~}BL^JG%G5H3^>x~;C*$O`(Q{N#GJU(ib zJI=;~=B=T{?=01ORX5!~g8hCdb|~>G9x&$B9|M-0rtU7Yl;5Y!kD_hDk`kC8@?t~d$|HXUk~vWaFX@9 zL#-;1)K2!z^*3mgCPBfjLw$mTre^9u_V6`0Q3CH0a$mu(R3@z62s96U2?mE30vrmC zv?CRU21CZY*d5w_flMb$b1W`;#px9Wz)O1l+)0Qma6uS=JMa-W0+^vU8En9SK~FpzgPs5(P*`atZJycPnVocVIyraF-PK(=m%oo> z{FpN%f;LR(*(GTX1_ZclR#K$}j47Eml5K!j3J?+g1-6G-aAPS+P8R?TB|em5ETK~6 zcDF0t_^G3CR2Gv4fWXw7Kv0C1qCWyq6qqGhBMMf0A>{z$fWpwD_6dw9Y>dHUz$!rS zBCtllR>=^c4vRLJh6_BPeGp;{8wbGqOD4(yFb<2a#l0*f98-kyWD2FGL(RiU1g{j5 zfRoe$5Mgj`1;Mfl2tW%HRhYY}a0C##)GXu{cuxprH(_8G_-x`Jpv7OzyJo>0`6WbuB0jM&C=^EvS{nzXG1Tpl6=akcg2nJ zAN{%ae(S?O^|P;#Q}6*0!fH2>tP=qMKp;RK!^h{BO`eGofLdvQyt1E0UTIyRAitzl zP@XdnKl!~Me)Vhr?%#Yv)&JxB?#dfqzI<{IV2|&PL z%LEH;M=aeDi|yb%!i@n6emY*5cvsj1ZKh=My~(f|wR@|%w}zdeiS3YQ65*^E-Q?35 z3jiIqJRib2E-LCaY6meDz=dQ)(r98Ss_#0=h^N;Q{A`$>Df+3H5V>1IcJ9$Z&WFVu z%1Z#-EX>CG_cID#3fHdRiv*P7Gt1_pv#qbswy8tAk0 z%kJq81F#Z3QH}P+kDkukSYV$BcJ_t1A7TzOzz{W-sxs#Ba*m_VVGz{9ugxVndUE7Q zuWfQ{cCFYuITURb-P+k-Rf|8CJ#KQPk)RLh@IB^OwK) z3xB?Jv3;ozH|_&J$T-J@$U^GfH`$)IL%DYr#Ucg4y`UYh55}kQEJR|!1v^o(SBsKm z6uPh4iPe?C+W2B~b*;RRS7ajQN+uV`$b>6kCe$3!YBUKlwc~a4_B^|(jeiiz$gwj* zw1*Xc^=xHbEflB4-f}Uif)AvV*a!O~0%ZVfMuh4b48k;~#`BuuP*})DsOVeTz8Kn8 z@d}Ve#ZJ04%L9%};j<3E1n77HKm-0LERwBaiuiawm=%GiCYCf4`H3 zakX#gc6^LPteS6A!Y@PuId~%!K{|kDsCVNqFd*BL>GC% z&E`!aGdM9R4kwn!FEZIlr(yJ~!)>Q?8uhH@PJc(5@Y7i*4R3f_r95!ahn>yw)bqjt zUc5>n&zbWVTX}7uP4URekC347>ynzm^g1HAxggb1jT?C~6)g@gu?&SLm=W%5S4!Vz zQWve%^Eg?ODDkuVVLyy6Nn31F+$4V>MfnQj#V47hL@W=Mm-a6ucEv~wi0`Fsp7b6i zB7vIygnHgKcG%4o4JqH!%+!vVaC)2x7f<%2c-Tap!JA_k+Z1b&1sFfAE60+Cg43va zmy?M%q0*6LcW|b(CutCRG-qRdXC^ji+W@~7&#bAN$YQ=&iD`ggr6;;Kz}8n^4R-w$ z`V{zmEKHo^fM`?ys^K^Cg$96kVMZ@q#9jJ;&zuB0nhZp&HgZ&(Nt!iqUBDNr+5&@H zOhq=|n%Tz&kYn*kNlSdnK`vq>&Jw-Eo>2vxzy6d4#o%-Zgg&+a@Nb}(Ef`>m$Y_!i zzCM^;!5|%U1d?_g(8dr5fB_It(=!=97ZF9QJso6CBAb#UE}-&}7&Gi>u92AC=cbk@ z0Ws~nz(F$sR0cX53=yH=LxEp3!p;%kfXB8&3}QN+QXzmhV17mn8VPVYJOY3jPI7>l z1OZ!f#@eO$h}sinq`qLt#n8v9#1Uv3-nwGvvf?5HmjrAbM9ARb5|BC;f^!jG2)&dK z4hL%h{=`CBe$8Gt3~1;uD>0f{l_6uH06IWaC|DS5AZdt3X;2Xpl4BYLEjN4T3}_if z&7#rgr6qvY1jb|PJm5>|5w(JUwLh5v$brWIP#l+p%qc)~F7qz6MvhDv79tkFjRJK_ zPelZyR2TvUK#UAyhNUc6mp$y79(Pg_3IGiNi2l9CW7P+xQPfZ+lO3i5t z4T&*~V7Q9np2P{$Xy#CJ4InggE{GL+2sQ`emeja?Svgovnt5@HTUrP8W4~#*<9~V) z)$ikSRmGj%dh?RK#{mMxfh|sx-_3lH=hyPo>CQ#s>&ni5{nbD9xBt{X{$>lw&BL6G zSP)hHHB<#0|CI50LY&pNu#@T6zS8-F-}u9CJ?Z_^Z~d#^{q8qkoW%osA_v!Rd)l@Rwte#TTa7K9E^Mdp)y6hS1{t(ns zYk$)T&?_sn+1!clP)=786p9dxUhS|Mi}H0pcI6e94(rR+B|k+(Ww$^BTmY<|79|pZ zvvEnBqu|gCQ}9&Pj7bpsNu>=2EY)!+3czUX%xS}dv=0|mXSwYo&!_ge;-}UgTzqJFeeQiHTiWm0&BSCbyubDq>(U^(0rrAT$rIvOE8dflrn-@J z)8xE)YFr2xTC!5fz!iRVPm(*7pDZLc-uTO<;C2T1r+2cin}6>0KN|bV&7WwL$xPvm zD3MOYu^N--Z7jO8trWzWRR`D`i{bN5ueeo&sj4}ku0f)di}Gr>J2NM{!jvnFpu#QFh+&ePMO|f&^WFRU!qQG8s(s|mzB5NY zmfeM9IAk}{rmy$qTiuJrsE zZ&%;5#{q#r6tz)T8vHb;dCd1ViBG(%%OO_Qn(_768h9coyZ7lNbZwmx>mKLSC-3D zwvg2(Qv44zwzS-$l%Bx7GDG5_b@zG-Yi0f0bEz_IC(}!`)eg)4P_q+S#V|ia1TXo{ zYw)-caZEX00EqPQt2?#+IsSC$@38nM#ei-nJM8jeJ$^X{CH@$5tjO8FB)M5^S{$TN zbv!d`OHwgQBmyaqxqdy3?q*Ekqm1V?YXfW&%00aF@j8jSUsPs8?Bwd?fD%(L%u>3U zAWy_Zva@SX9<%FnBW^7MS;r2SQ4z~yM3gKN(}}%xd6eQZ%Uo>Kx8+G#<(-tSQb=c7 zVpLvK7S3LYJYIF?thLKEQ>01IM~9-dU%KLTcS|(H8CcFFO?)v!BR-O_xBD6;Uv7z!w%X~LNk^RpF00jQQ> z9_gBwuBizPnnQ?&U>uS-Op-+4S42RRE+5hhHOn9`v$+E@=0n574Ce%heKnB{4^qzv z?}xD&q(*S&nt&QMNJW$_&p*(jZX zv~jHm5l6s83~8jzbNW*)1{TOLQ4|EG-ShnQlVs(!Zfnuu>5mUzKxh2*c!hL-ku2v5 z;GSOE_k z?r06oVxhb`$QOg1caQ$?y?hUyo?JY#&%gNumrZPWWt{S27p2es;%l$`^y|gjg+;Oh zTXa%>eJpMbQ}MitfTXrA*3u9A;XvPl5;>{KPVvTitIMWGr~HxEcEN5>AbegnN3o8< zb5`}8jPo-~2-a6x+|5x<(t7aKyGRFdbYPiFNmzc9}2cV4M z@=+_%Is;Due;sB5ZO36@BQ_F%eK7-0SHa>v#FCFyUUWZ@hWKjGO-HT=W1WcQ!z1N&i^N!3f4RJ5$@-{H5E>&Zn=2t$hXB zhqK=>Cg#X`wrEc9LE-f(qB{v0()KWYT9l9qFM< zj@gHz+-RMZ&edm_B~L?fba=P~s8NFjkS3IJyS zF1TRaL~3ImF5*y|tNm>;EU;N8<^mm+3gVobX5JH!Stx`vr^e4;{I}r0HUH0U=ky1k zES|5>C#7)ZD0-0nq_|&$=d&oD@0s@FI;FF6d`@9~4@)i}7FRF~U=}kkom(%yNyX<9 z7b$4x){=(A&Ql}Pv zBLF(1Hx^X-v}@_y>G>6F1UqY%><^g8etD{Ax*G%*emGrL>jjA;Dsvj8E3c|elUu!# zu3*Gl850&;7orrElWQZbYP(EvNI zwYe8BNSEi_;<%r@!hTpui-Xi*IE#PH4&VR1ldpFE;Qb$*f9<2aZ~Wm`yTOYi!}+cI zXNUb$rTLjppZ&_$hZFDp`QZHb4ve6uY2(&ND9~UDX!NEzHfCupKZWxXvxmu8tlXQgho?klP+Llv;wsZBh`q&b3dX0F5R1&E(}12L z+-oO-_e!U6t}xM1Y$hN^2Rc1Uj_+7xT~S+0E`eRkpiv)d6v&nG6?}h65jp0RaZkEzI6)VT!rU;6jR+~vjH z598x3vXsYL$v#2W(Qsq3XxRQ($4BJpW7~_^i;V8&>3-F&-X5cm{R#0A&sB3Oi|Q>V zCzG}YV=q&JAFz}Xz$mh)fZg8mboOkvid#5CyN+2037+rMoXTrm>8QbQWes9cPg`sG zYB&In7>1Jxk<}o-Hqy3YwvEt>C)-&!%W#?ixY$}W=3HjpF+2by@>>8w&s)SZ5aS?o z#WkS_3YG*`9T4jZ+MnVg0$a9w)t>O$O(7LZ&_uhQE}Q8UQ908b1aGx-Z} zn!W<~LQL7Tek00CH2|0ldPMdF$;~}tp0KhAlt5T?-EPsNks|^o8P}9t5QAXM#E5mb z#Ltp)-i}zZ#SlbaO00i%xiZJ4{LY^3*K_L-00pda> zI0YE?r<^@OiH$`v)`W?^(ov$HnBLEb|^=_gc@B@=zr zFLf17a$Z18##DnWm*)AKINmMn$j^=zkwbT1y;E>`==%jn&PtiW3lxe5TYQl`0Z`#A z@eOp{Wtu`l7rf8c`De(2ji{FcaZo`*RSRl-=}}| z|JwP-Pe(T4_jaNEhlH09lW6IfahpO$;exDvX2r+L^-&Fw8*6-w$m9p|jVl3HTe<27 zJ?|2&v`G*fXRXXPjO0Q&h6xH*FJjl9mtYP`LfFg8Jt2Q%f@t^RLD;^aU_6C9rVl9q zaBV%S?J21Q!~!rbQj0-Ye>rSFyw6M%ayF+S`T9IQ?CzCXXpw6C*ylEK?>_tLcitZZ zVs5>mi!1aOyz6*UW`uKR>JR4j(}U@Rn#Ku5OII;w(}gL3^*#Au0@0GvXv16Of~Pv| zUVFZm3mzx-b^lWs*5hj#VBqG6+Ror9&+5Sa@r%t3CKKDv32crEjz8%xtmzguZ1D+uo+_s)dhkF>KwGYCLJ4Pf_ z=LvDeN-1Mf-5uXQCx7?g+us=ez}`80_rM$eHuChB{x9X>uRr|O?tA~jKKzgPG+%7m zZkO0B>49c6!e1NV-?NThelJKQkoO4vn<;7Or~aV=`Q<4|o~hp*2rntaCa0UH)*v%b zFErh=);lk461}$uJu0#&NNmHQh&B>X=`W%nbslop6ihfJ8e%WlCyLcTUB(VMC|YF? zgLpa)i>;w17ZQnC3Lh;`V=HS*S)nYJiDU@lbk{cZqdG40KtORl`|#5|$ixC87KCHA zye&%tM)ebg39QBz%pr<9q`@&<4)ddVcDLD;6`+zmD5i|%fK>e@lRIv05=+q1dDpQc{Q zF(Xsv0<}MtE%B8OYUS_r%4FcOHkDv1d3|zbs!sOdZVP6UgS{-@sYdSVQQA2Y+T%CX z?o<9b>ZWK}9`rDl)!_u&Zx_f1on+cc!?>9khLbeO4I$F)9ZIvfxOiHgIg(HGIZn&Hknqjp8JDN&$YLXk5bE^C1eRRhGLU|d;HXJI9`n!}(ZJPl zbhe$>!Xv39e3H@v7w#Ax3WIu=TkjIb1TNAz13)+=Saw_k{OL9ElYrrb*NwHyZO0XR zpOATgvNk}`11M)un?elPa3G0rv#!+Wj;?!$24Gs=0ts?%JO*COj|iYtH>w}>H%uO}6DEL+Rj)3k;%VTaH0(54QO=A90+GIl`V4 zRv;?E>vB&xuIqQST8!Z61LB!`-o~or~1DpmzYZWsNfB>TaG9RaR-17_=WEV05 zuTG=*h(b-7KEr4r2_Ezl)ddy2Y&Z@obNva?I$!u9baoelTJF}GMAlf7pw5Vlu>&fK zr$Ivpx}e4x@4g_7(hNJ@r$rzxVTaL0mJvJzn9B^UL}tW4gxJChSLT~6LyX(A(X?5d zYk;u8A(irM>R45D2*7if%Bh%#F0)+=*RbKi?o~U- zJnm&WGG)PrgU4chvIKU;46qr7;%FRMZXYMyKzBZD>pXYfLdEeeF&rCn1iX2YBv=(6 zy-$|YWJoLZYeXEGCSza*7r)gM@Sz0oXxOO__FncUrD|iC_PE@9h!TbZitB0WB$&4r z^JDthQ4S#BXIY2*d=82zEfXfJ$r9$SmNwq{@>^o;%1;&Ee%(rGhkySM{>eWfFWKgg zUm^=)g5-n$`g_&!0>`0=CfI2?IZ%2iriEBVT~QH)SzuE=Ku+aU;g5a!Z~VUBVWygjPsCfy*4 z7CJi(o)@w|fG3?AJ8R0NHc#g&ilr!| z4ZVOnOC!%`&f4uHo0axzNZ7jwso%5abjY!X!T7pVEul^M>DOY(K8>wc{ClgJcE)); zfu|-utK__5x%DS;J2s z-%)n1Td#TZ4#d{4jQ6vi{lKxN;XY534?4q7j=uXX=iu?=WbefU;u9m9eMFQp=pslU zi$v{^6{dExUsSN5;f(G=YH+uSUd2n&5?3sEhZVXoAYV^b*MF=&b8{476eP=>PCHn-0rz*)#@$UM{`1FI*qkcR1jWcxsANzI(JmSE#aDt--A9?eYD zsSkY&bZ%x(tgH#5 zwZh-XyoHaqduyXjPuFw9q-flHy0e?h-$Ify?&~YkcRue;SI63t1xmwxixv`N8@UShlb;Wl=vVyC^<7g4`;S%1?hGW|lnvxEbj zk<^*lZse?s)i&x<(^Wc(o#`OZv%L-=In@7%pxN5OR32tR+WcMapD{>t1u}S)& zTB342=hCF)PhtCE)w}3S$Lq-u@H9-tncT5@iwe<#i5_C>s7%9d6((uytUrWoCy}Kv zI=yT|M2-CwNJaoAIHif@6CpJDQT*$R38L13>)#R1>Q?D92&c zVYKvAl#Ep|gjbdL5-juHy5V42Aj|-w+FAe={G{FCof%Z}U`HWLL%@Lv8X3=<8Ck?# zX?TE44CYFx%~oYI1k%JF#}*U<;0>N)1sYBlh<6H<6TL5x2%G^xR}5J?rRJ2J3>-*d zCIGCc3u#E>;MRD3Kt2OVt#yWu(If}O+!=`&TTs3w=>(*<1YnP@Q52= z&*@JCK`(9s2FJw4WR|umz&2$LF66$^TmqO9mJQI$aYQ-|mlZ%!JYcw0ndnm|l{Fp4yCIFWS+l-=ktrk`^9|1FBmI1cNRyX&S2@#*?(P3+u({^7&ll zTmzC;`1HmWI29eM}l}i}4auogA%o z_we-;o>gIBYqMyxRA_Qi%`zV&h$L$Zf=HsNq7W@>lZj^ReP|wIiH==W}>ofA25;<=3mp%5LvSg7D?H?>F;J^w@F`!g*ghxV&8z z7f{(QW}OlPy}*Nh4y7TWSWA<87Wim_MvLeRCZ=RdX^mNuA6iI;Q95Tr0r<5^0`ZHf zKwwN+=X0sxx$|JYoDD+8IKri{@>v$(Rz&z_3>h2U=luLdE&XhY%``Uy9S;>eN^mJk z%AslFRinE&sRRtPbb!I^IHceR6$LV)0v{ix+uydF{=HvZw#E3Ip(sW`=5`_t<_!Mk zB_C%Bh78QX3+Rm9Ft%b2j29=S04M`sohQne=Do7Dmi8dcHc)uz$!j^4HJj zX}|;F+5L>xxH#8i*LU;rq@K!}c@Rw72H!(SvXdouBA7YgoZ8Ds97kO5S!Ov|HdX@P zu_kiN?E;%biXZ@yocQ1X7Obx>J2gCjaSnTIT4`y# zb?oFCpk2LI_$#+kD})J=kcfu<>DsZTH!HPKQn_HNtry?>H~;qIC-+gbaCdTJb9&AW zE9t4C%=0svL6*M-7bB3WvpqLb20u78pZF!V5|uu{2B`bwqD&5lPJwA@QNbkp+Eo}` zv(Z?cVB+j5a#V%jHhugQT0ScF>cn@1`VehDFsKg~_qyapy)|QGBHuyKQEmVJ>d@)o z3i~;l9V6XK4+yIv&(Wc%;_Xsm9rbs~tQ!2@nQ9*+owe|2p%zpd*4grzX2*TPKJ-`W zlNc%cQmePgrB~jrT-~f@YD)=omKC!D2-RyBthH-4aK?nzX5nI6PG zg{bWk7~HTlGHBRJ@HzZX6#`%}0gf8%S?3BF&lf35EcGg^g(B)mbdx^C1DQKkd$4H3 zS{fm2&>g9g7`m`~=k6+x4pYybmXo!l<8 zyg2x>lZvyURX0Q1a04EE5IQjGg)xj)V_*Oj9O$Cijoy9drTYqs#XsCmb^3f)%3^h9 zh~Q&c%YNwwNLJUg@P|JPdi`Ey%&u=C{mhuq@omB+qckzeMTPMeHKK4oIs9n*Ie&Q( zdvZyp5S=N7CMH0lti;KD)Xt7_m09+)i}-qfXdlx;dI=6IlP|^N`r9Vd(l<|q{>aHY zZ|%qDM5lrhPSW^Tj}2#OSvk9RwoF#>-VRUi!?_jySet7+WyjFBBI0;*R2wFU)gE28 zP%oYQksTaWF(lR>f$?e|)_nt+fAQ_=y4FrSH8w`^y>)NFGT5h22s(Z6R3Bi0nIs5h zYcW-&Gf|Orc5B>oMW#RQFQH40aZl_nNFaA_nT z4^Ee7M1AQ%(K^%{b2ybs8EJWhrcS-(4DI1ox8Yt}*h&BO-~R@iGT|hYJrR?)|?$o zz7-LeeMmrH0$>opN(loB*8oh0Vq31^Uh@&B>h`%@1_v7~0$|RefHK%Yuo!mR$}k0! zb_#}*!W9ZaaAn7>tB^)34V;4de&`ky@4*tJfRdj2W0wyIduGkX7#MCWn4ukkvabO! zuqjeI(xxgU!e@0@{&%k%*ZvBRTh5)|{WVud_1n1ko{eF{W3 zMXtbVrw9}ZQ6s$F8cC?Y>7aUNg%I>l!E3J#gf~S10RR9=L_t)c!WomJ;sUfx9f-n~ z=T~E6xfvJdU|dO#3RFG;YdT{zVC)Pu>S8c1AWPLmk8lAp@vRw8N;W02rnvxWh+dk` znJmLfK8GXt0N~_A9jhbSnc1~nS@x}OJs;Eufi3kw#dc;4Wv8w~UXg@5g_UU{dmr(eDdNOyL z>6IT_t$ZdvI}*!ZxcW{K2mmcW(!U(G@-YfreB4rdV`*C)W%P zNOz(7`C5ibA7d6Sq>#sh_+r=wNUWyVgyiN3S$R7xH%j?pjE>Ji4%>dh20o*!I>W}z zn@+x{DPzFT>;)ecrr1yzk1c397E3gD1N5NRPsElro8=?QsfFyfh!6J1LW>8PmXa8>p2fWEa3;4xVaHRNsch0?0%T zDDAF5pTzoZBdX@CMYo@Ph%``u^pX%hPbBiM-pcRXfA?XeLfg+A_75|So!|dZ|DEun z%ZYDhAC3C{TDRAVK@XFYq)7n}IAsCZYg@FPXtIUupgJt>qSwBQ+NV`csQ&E#a^g@i z&x3gg(NQ$nPsvwh&R(`{RtG-R#W}iWQ+VH)!$+@d!Aag1SGkht8@9vo1^WWyX$zOaFKH5 zd2{xm5+=B`pQP-+|1ElJ@W;AI=TGj##ff|Z*uN3n`PfhEerFf7S3w$REKpG!XJig1+FDvR|96lADYmZ4Lg=( z*g=P*8w%KISVo-6frG?u48H-ziXLoz!ciswj3$E5mMYQ+1qivqSVDn&HaQVWg#+eD zxhgAZUpopx%6kbEY|xe<#FdbNmTcW&Fq}q5z_<0G+O5a$FQ>t1KsC1QW{%Xj7*Y8AI?GDE8~1x9Ev06Or(nZ zlSJr*U#Mzonew|xccqQu1xRTdu_rN6nqJOs<7a>$3{qmg!r*9EdMQ+zvm`na7RB&1 zjq>6vuDZwb!=t@ChMjg2^5$CZ1~tWoVj(68iO{)}(WUH9n<5fl*r{e|*N#Yp^EYO% zfD^ttyYwbu$6}q~y{;Xew$IIvR5(V1A4Yt9W56|8{^E-2c4)@q5%mn6EC?U6MLLVK z>Gm$GMG2%ES6;4$<;;wo6b9p^5fSJ!0>pFUgR^CKb{U3yIA`Qf-Enub zw#>*&=5&0?(NLqY!xTqKSM}wQuu_1AY>pVn#UR~2e_kY5nHz;fEn7#ugnF4|h!|Ip z7{$p=qOD14>v<_jW&b~AY-7}jcAQ2BnoN;>NTU4$w3zk z97+ns@dA{@0%^a44O>@@H=xH^0E z05lM9qvu$f1z?t;)RlcN3YP$pa5$?|g@{t1oFtAhfRWpR)Plqz4NecFP*9J7_d#ES zl(rn3*D?n%YI>8_M4bVHL;NVfy+ZfUC2bE9dXA(K7AWxG!c*g+e+SQcY~~1C^w=NV zZ=B}j^%D1svc~SGkR85={@K&D8hYXu;_B$tqNj9E~q*{@U{-q0@Nk@qU% z6SRW|Q2dNY*J|JXE$18Kvs_uMXJYGVLEPKI3*P+h2i7Gt88>lb!MR{S88sJhSb!&( z%l!@;IAQLn_AHOS?d`qy9cSdGb+dJj1Aww062nP$+=f&X<{-*i(_P1*5JlQ-^K*+$ zolL$xAieltX%aivNO+n{uMB%hwJrB@)P~P^FbLiD>eKz-{*8|flsdX^zl?N^W|1mf%+3X2?>LOR9r?L=MC_0--owx$!4Cc!Z z8y2~g!*2Yg?6MnnjMLxy)_KbhsoA3s#7lqri*)9e+1)1^z2F=i5>Uf405T0gMw%nS z6BFH6@Ykt%XX0TH%$hoD3qV(2e(ZNM=R9vjE{?e3w3xAjPRb4J=(VBVxUV%6i6voN zCTN)mZ6m!^=r5$RoOAYZexb*5N8ht%@l*9;SGkbzn1hhW@PN;-hM77)h9#%z-DyT@RNB5wZ{P zg3T02e(BM)jLHFLJ64G0XdE;)`g=e^zpVJsj z>3NF`kv%quQjsOD`d=0H&$)%4WxulgOK*Jk3Xk(%!$I}?!E%hwp}x2-)1uva&|AFU z9F6?_jSvZ);7mES+Td(XhCKSKbhxt5U`aE5%twqP_TRsDr?pw751&ZZj6HpYFUd!$ zQhb`KrKI?y@4sEooT0I9wy^XAG%Hc#d%5ND0gi23@->JKlw2>@DNX3&ShDzh{q#tO zEdR>Ohq#q~xIl99B0JFrLeO;|HjQtGXN>o3G_qF5+zZ%1H?3tz#Sg^$sz^^Sxr9#F zLfO1A;RP%`UXuNH+e6A;YGp8PncsnPliJwkqCavKhaOAHM>|I<{3+b8%NbHpWvRnaKc# ztxb^kEz7Bk4oGN>1B3^zn*@S_xHBbmW6e|yxo>xAB76O0fHfe1faqWNZ5~vL*$2u` z$`JxCL8$AJ&I6jwCz-pS-{6bu6mfg6_$Acmr=#Zc1KB@qEvYUk$J%>yj~k_1`%O31 z!1G3ovFw|O7)U5zfMsK#vh3+6%?o*B_9%N@@k~`O?OUS?u9UQoa-14)NEKpX%E0E) zTUjQ`nlZVs5yFr{UI*+rJs%gpIC#Ja1hMVLB$Jh?Z1j>X^!V9wtIMU7k13*>R5sjx z+ldMNwzRWDhg$#H?L^A1Ad}Rv&zdD~R5^aKTAG=OP#24p!&vAXCd5@Rq}&xfz{x3m z@DzU$G|Hug)Y985OmESaI~hOBTZJe!>q$G?DZ!qY&13ds&d%(WchobVkBZrD>N*mN zoOKTdtFrw{4=Dd^Z57dJo#9tXM{$A{v(ICPqp{aYoQ1x-j~7SI6n%aaQOn)w;M7;e zeirHT6Io85RZcy-M@Cg`{c@*^3E8L7jT{?`ovX!3HeT9zY#!OZGbpV`Lh77ZD|7mC zqQGf3p{Su_#j>95#M`@8dFiHOvMOY)3#|{fXK*ZW-tz1ZF1m?mIWZnyh2zV#?7#)_pWguaNQz3hl z8%KIjCBwpGF#t%xb7fX+*?Yt3qj6{&LJG7yHG+!e&JM>!_+!ZIN3P6eQ?gmVB0W@x%;0{S|m zq^wX57M>jd`<4b%BqSg^U4^s&?u)HQ;o)7VlW8EF(SewuAfREKgE>Go2N~$Y2^fi? zI(Z(?%{m=Ap`xbS;zCs*d1Ev~c5sxH`m-i?b4YY}2*p-PY#?e;t%Gxi{_XA1;& zV}Zv5a3W2@e9klZfMtmp@G$n&699?QRFN44G{)ZpY4`@4aG?`VVU;Em> z{+;i9|DS(y@_P+3k#~CMVSlNpWHNrL1oW{czNF_{sT!tM(&;_)>MWIHgwddfmTWT< zDebUSS}b?D-H|eF#1Jkues;dS5|;YY&)xm+{=pC5zq_^erTV2Vty5KBLNDc|4lL=JF;<;?h^lU$vwN`Q| z?NWIKUwQHJ=ws^OWi=DG-l*LElRsYiRyPcVj^9V;K`xkZ>e)losnoOTQEdql-WgvMmYV5 zOl8BD!>|1KJZDczCEbn^Ug9$Z&7d1Ny8E@?`2V~t!w5>yp@%1+gWvI)?uHWp91D7W z6AXUs1Cup@g45DC56lAQw=oRI&w~|`f>f*p(JV^AdZDz3L59n@T>^8aW6ZTJ7fPr8 zvLkajen-WN;rd6R5f;4P4-?bzXDjY|hqLzKXeES~&sq9=yB9y)nJ8e8)Q%4=oI%*c zu3;?C9Py~ZAAJ|kJr_zFf8sCQ{M9f1x`T;esY*W{;bPDtC%0$`1w4d{mMTBg#Ew1b z65Q66BqRCOa+iYG8Ce^UpADFHV$|UljDfn^k`lXf_H^mKV#i+9iK_ML^f`W(yIIz1 zJ11g(bZ0|%@Ee08A~yk{J{>RjCr_;FsdQQ_`o9j0L=Ez1S055hPjck}8MO4#0ogm< z+P;E@m$cBH+a%(RKQyY{;~pVZI`_aVrVgcW3LpJ@{cGR6 zKX~!Y=b!EdJwu2GMyDW9{KZ0@qx~CS`GsHj>|c@r&=N(Q!pm3~`oMjfYPBJHk>6MW z0`M@u=rQhM88ihuabs4FsT6~(j+8x9_7t9NasyZ|RcqUB7?8PSI3_1=F)PMhSQ#CC z{3&c4#O4ns!01~KsM9mF7)Ji-C|&0A?rypZ*lzxK?v`LaPSQ{%_@a0z0l1-_#)}+^ z&?K9r)H&7i&k_j=Y!Lc*x~nqKQ?TW1ICvU~{sw?Vu~$M?T3|8(kdPqWR3g9@rg9-b z1JHe=8=FLQ?#)q$LCjZ1%UFuTjnR4)v)T*k z@RG!xYK+SCP2qatZWb1q)+ZDfSFqtmZ|&rnJIKVp+gx^&-q&Fq-_vX^b)gylo}%_V z_L6wpHpdClo$+zeU)sd_2s`YL@-=77st;?VkRjLu*O@OSuOXUsj!kiW@|rd#mf$n@ z{K*9Yr>vr~qNmQXgP@b?cQ0N2M94@3NWOrRb&eY*+K_K7v$aR@I2n@u&8jkEN)O_z znVf$c`gIeeK)tl$f^MpKOp_s#v&n1h_1UW#X?UrBS!|{qv)MGC?B&J1IC{Rp*uHpi z_h^c4Yl3G0-Znc_h7sAbMIO)Qa)*3#DK=EM7tV?|XR=>LhK)mssuCnQ?oa_vA3h_N z`Wqi*^9Vj)8Yy6LR%6}xW`8^>6&?PadXfI3oZ2X#ps0R0h{>?B`~w{RISdB)M_j)RbT;- z-Ato(mIvV$>pEO2Rsk_N<}G+oBMMi*G@zL3A|ukVK+vYfL^oi_ogQ>-J!EYaTq6?QO;^Zk%h3t!3=PqPR?c-WX?@(AjM8` zJPN?o06f}o?@h0x_W&HEd)!61RlOr#ux(2QC>{6raX3HD!SWD7B2HfmA@&+hTXA3w zttRF30}x+U%CYQzOT=>uu9Co8o*YcZw0LUIwat9!9HTVtM4<|9s~&0CiDd|7z$0S4 zny|Bg4Poxkv)af9dqL}-3Ocay8)S^Ls&ieT-puRdFBFp{rA87^$#s& zl;ZiUYY~p4sHWdP1lTA>Ic@W#O{P*!dpKl%a76_VYWdG`-OKDF>uRAOv$twtCaz#~ z#uenD|HaizL*9DFMb2lf)T{zUOqW;y z5*;4qjZrGg$C0Vd5P=>7MFAS1gtuUGAWh-qiSaxD^KK<*Cnnu`q30%otz8@Bo$?EI_LVRaj4y)|r?G;}=e>>XTEWmeb@J3JGd4 zOe(6gy3g$@7grni$)7pG1Uq~a#y64dwiYc^W7!V~a-1LUEpa<0JUzDuPOG1Y7!>0I zv+q-xMkFS(X!e--Aqv_X%RiE(86yVi zRp@ABF*fc;Ml$MQnpDBhW6}k;pjPGcVmHM+1B5Z3@rCc!hmj^27nwapcvN=GlU6~j z4AQgn-5TN6nk(9K6cS_g{A~+O%yB91z3p8SjWo!icFn*rQlz=(G`$-u^!5HWRLE5# z2hQVzuUsAuEe{`+T-u3wV`@89*^j?mxF%`4{#hJhb6D@@n*DAc9Qc9fSdt(c}q_ zjaubwBx&KR%p^LOMzh0j-cn@xd4g~kc8>UFp7a*W?Vy3HV?EHGl4gBVaZ|%#8?EKB zz&wv#JPyvhOtxG)^*y)b68>&(JA*wbzauH>g;zMQKCtgtDX3={QM>_H7f)tNBd#y{ z2A9Zo!8pa^^hiqx@v#$3JNSywlQm?wib;?}j~rGNx|)=X{nv3ye?nY1m~kyr=mmn8 z`yeu{C~OPT(kg9kPllPb>`?;QJYdyWEUu^f)3e#FovCHXt+>XlG4$#jLQ#2nnmf7} zU{*^zp63i<$o{KcTLG7cn*J6-ps81bQ-XUDQt<#5%#V!q_uM zM48Ud*`o(yF)f7!M7_g`r|(w?bO#Y_wBVbcR|>VORI@uf~sJ+&YO2ttukJ2 z(ixr{tJbmbMH`Qqwn-j#2Wd^=$d;sXaKQ9}$xIEX4c!)<0O6(ZjhUG%rC7(eO;bK= z^K!J{?Za!ESe`?cx?{M56v$3@WcrS7__Gh%&FLc4;%o57fcfE z#yAU^x~On&pZ66R2CfQsQs4pt(bnPu#8;cK0EHAV%OEs$xejv#iVlJ+rm1pUIM^Wo z0Dy%Q>M9G|lSv-jmnMq1dn3(nN+z^@rlg9V-<{7O*TkJDUXFvfs>W!?F)4>(TLCyLPrjYzDrT~G(7axk8KItmQ4pui?ED0YR!>JvRf7RAb)6D`(b;owU^4CbfKEYL26KZh z&l3@hB%hzSz#qUzU3OgB7;PpWq7R&5@I%+?42~rIPL|EdH)%=o+;6WOEoXhsmw471 z)`3!)47rBbHeLAS`Y1+qg@q{_{TRZ7s1dtr#RIRbpi&)TJIxQ<2D-{Dz;Ct_sTjNb z)`a%?UWy)n95m1Bhu--6AAZ_6>->Ov@1H+3Klx;eG3Yco^h7e zsG=5erS+++5=aW4td8}naoxSTIG?ro`#|GL68i- zJw{X1?zgw6*RTDHul%Vmzk!zcs4Rjwfr|XvXVH}J53u;0+Jsj?qrs!04<^eO)`3rB zV|IB(9JgqjNBJ4dqnGD1o??~YfHFD$yH9>|=l(ai;P8L>m)}>>i!c27+jrDj7I=UE z{xyE$tj)fAc;T#~LmdkR6>AV&-*0a&dvF0WFu-)%=2nIP0tnOsVeEi62_>x`r6jv_ zUK%Vt|M_eFux=8gn!#ERDSJ{cL`3HUVgegxkJ)w>cgqE)OlNysx|TZWn$9@NCwlO5 z*mJUSXS4tLOQU-8ZadiF11oJzz#{Q!TK9YhfcF_T6s}5ckI(Y^CGvEh7L({SvEmtI z7nWS;Y^&kL;dlzX$4&EeuQ3;ZF}8tR8`Hon-Oo;PpcV%FmpYesmeslP@^_wpv;Pgg zHe318#aMS&%6z6iw31x-cv4>4yvbhgQu1v@JzUT}kkee%H>Lnanmw%Ft$A4Yj*DPx+K zQTUwGZck*EjuL~`nh4YZm}xXP_B8@d3#}2_CID7f$fgCD!yY8|fHoM>DPaH!3UB`T|`F=#ZQz9?I&t2&o(Jat`LAt32~0+Z4{;4()n}C z3)k_%71mnVeeGO+hh3K@cq}H+Oz7o}!rJm!%zQx)40byY+?~q0Ip<&WuxnNr_CH(8 z+@Mj^7==>=J5SPGU6<+?!A7So;$AS1CrI;*jnV5!4v1LtfEGwKKCfjND(8P z4bGA&42%n!AZus?yVNELyHFh`&+yZHIr_ojn2WevKeiQk&J#jfXYM|I75& z7P>3^36<#?Oy4*qZRD+PGMMFu*!O0N)LY3XKot2TJ3i@xF2?Jv!;G9+uuRlowEJ|r ztS#Rdj#gxh0b#U$OIk27km^{9?28FDJ&EV`MmrVjXx>VN7di-$DYn3!Z)#xHZ(D$4 zpsq}(EC@YdPZJ}ar;ni08zBXOKDjk?As>bmE-)}({tJ# zA_Ev>6XDpTBfx8g&8AM!Db2;l_WG!I7pEY8*BF7vQb0fPh9PW6VVG6H2Df2Nmm(a% z7#Mayp}SH6C4>{Gx5b(W^L-`WRoqe9)Pw2rwT=&<2?5RAkzzQ7PfsIS@7kLkPe^YXD%w!@&ig#1hwH zOSk2YJ|`p}9PpukMp8iy+)!X?+MI!Q0!EsL5yV_jc;qZadMq-7;XV@p4vVRe5ACQ4 zXcb%+S)L4{$)&80g){%W_!4H(#Z)WJct{d#=uF+P zhvqD*0EGoDq?GSyz$x*pLL18l*Uod189C}=XUNsiTWmJeJq9ZrSN!htP|0P6XO6C!B1HHUKjbHrD*o{6~^El>8Kc2S1LP^Y0zC+vnql z?Lo(xlqZBUqR+m1@#sJOit)9YUZaO1`L_Gu9& z1ulY#f{-E8i^rjC<7|g~=P$2b&9P&!YHL0+vq+bbpYgrNFL{#++1BYQa>>hhMCSxB zkwib2&9C{O1Kg4j0sv(}>nj_tvvN`329?@k4#OlEc5#veHoyQNcD74l*cq`&z!gvt zb!KI+@gW4L>}~L5evzDHZ+El5`C#&&KK*B3eQBSv5p4%eo%@%+`9hCL5fm;Fv9(sM zp7w?bL{CDsj2kYPP@Ba;1vVjTlfVN9q%i}KDglvPfJ`fvHUWD&>A|mSt%v(ZIM^>* z(Vd^U>Q{mRJ=_2|d=aF1snMMU%?)>>GX0c0@|M8TNja*$gWOq8UT!_&c57$i;_5PV zm?EeQa7h@cLjkB6GJqbf!vwJzveDw)v&bJHxSs$hgMo05W0*UG^{xnfoOb|x$e4JF z6+=Vk=#flrdJX^C!6*21D%eb{)x}?ZZK?2@_}QOYyFnux8E!C`EPL`n{yWdvy~AO% zqKW0@E0<%$vsRlh5UG!MX3xI*{3Vaw{Zcg6KfU}{(|_de{namDzX~hqiUdF^A7N26 zhSG$fzyfh3)$x}fK6-!j>HJ+7)=D>j%9S$Z=}1*+2!p68;%D|%?&C!4Q)c0sX;da_ zkf3F!t}5s^e{4gRpNby`{dHqf*;*pCRM9WsBkEzwZn`1nV&l8kLWPZH=VGFBJQ=@D zNukKkU!24%+-%+nYVtrOJ_Z+CGlhtL$94zwAhlzi$BdoRUpRTuBpDNyh89|0&ZM|E2V3%K>g3w|KIoi*5COYyp=(mYiB;5GvapobmS0otm7?^ zhBg`IPBDL}X;6&TbKjeMXE3`Po#%@>DdyLh)L!wg|LlxK(F<< zYZD<5741m1iLSSf$COLvP$cRhS2p7gkoD9Vd3G}g+4$~=BwmizIyFF^0ATQLZIbFH z#v6sTDDi>-XXvdU0mhGJZlCtoFcRuOLlp}{pRmcQ>n+BZP9*0w1(FD6-~oVz z*h?kc6{%#zm$PI)TLR*u$g>kST0_^5K4$M;BIMB!+df4Wr_B{%2pfIQ9Hypm!(Jhi zu|jdm9(|L+V>q!s$dAPs)ptsQ@Je_iRFC`)MEIz=t9*JXZGMtSInE_G%uDlIldj0k zcj&?QxvL)GQ@xIYDNiD&17}Tk-AzX*Z6{LW@i>Dzw`hW;@T+-~D+g1M8jMu<+Qnph zS^zN&n|n3Fn{4S=#$Gm~=Ax6Irmj4SJ8?VAITPgMA|@7?n6KDL=u+5o;@OqMxpMtj zO>pBikXI`i4~FBp%G$up23_%D3=}09A=z-j3e3gEQ{5O--2Gq;Jb5;-mnIsV4-XS= zax}~Q#1{ll?pp$Rk&#nrc7cxlSBOk|xMTw^QHLvgm>RZ=M`mn~MsrCuXjpFd&aq7J zFV|sk0JCzNG>Tj35XV8gc?Z#8NbeD{C}kgaVWs)?@~i`KZ7~XFr_UsI7``~47`}{* zlVQawz9pF&8IRQ(OpQJ1%PcAlL`GfOK85|&R^Dk&O9>{JuRcTfcA`s2eSD=iI$m$n zf*L`SGp9&3aSMV10C@RP_uPdnh)1Jf)2~;|Zfp*g0V1Q51T{7t0iI8m5}5^5@Vfup z%?S%;?og(Zv!Z$#;BBOU0?g0}o%KhSjpa$4A^YIff&!){mKS4r*vSAP2Vn-#YXO^- zdv+$f3B6Mtdeop0>|hJm0vLe;#8T*sJh)_l2Xd(m$jh;&!QAcE0`~spG+9KZvjyN+o1D1ymonNzvCI-%J3 z(Yel;psqmZn`Rn-?Pc4L0$kjKxSF~aVwMh)l+Y8jl!In0iJuegtt^rOBIl!|z0|q0 z=Sj6P56RFuKf%s!Ax^p;2O%=LEu)TLJst9$=_CTLgB-|lVva&q?h4T?1$r9tgBk?D zL0s)Ey1C-T3{E3}7{ELmJQ7LCQ3s&x_})2CCASB__zCVHLr!UI4rh`b*W`EVpe3j5 zB{!P0kcH#5rxKZ$NwSNNqbYZuF&^1dXex;`$m&UJ0E9V-27D`|dm!d=7%Re^#OF5e zZ70%xYKgW3gzVQJeY#)7C%2g`$*)OCi<})6ikh=3YVDU9>o6pD5b>4$*4k3lDx}^% zgzDbBlWRj6LNx@ZipwaTr(k?uYwPzt3ntC>r{8HTl=q*!`>TKPA+8Z6dzeKJIw!xI z5`yy0fBq-0|E)iH>3{sYfA4R7?f>{Y|G(e<&EH5?A{n4fp`%p$20$L>3?yqrOZSdH z9X$EN?>zj2_P77{-~8Qw_^~KfeR}B$YOtn!Ap>-w&5N`VwsE`y`SYAXskHHa@^+XZY23*xI5`+} zXMSvH9ENxvd=G#mAGARUz|tbCJeFSS`+%7vT#`~}<-tX}b&fi-{521^{_O2ne>~OM zRxbTGWFB$J^aI@#=${gC^U1lm_=NE;*3+2g5u<^*)}}xF;S6Fn=LMgh5BTNV>tFhF zx!apuzq_3U5lnczciAAM$e)=au(65q(^@nCa>UJgAKl}({h7^@2X4?b$NeNo6L=E!qsJKHY5YDFh8BHE zn37kP0r$k4{xIxyvc_u;x$eYk`WlMcx6lHo6IBBo6fFe7N_a}7Oe{f_lC{wg7ts5h zdkw+H*|J3^!*p}!r(r&fQw#Km8hm;Y&Y|f5T)aCNnX}XK;rZlvTl0VDP(hDE>i8pN zs5r!7l2VT8pc;Eoh^1qYt?KkUgeLG*1s3;d&#_z~?$*!eXKR)1LT!gQNuS2X zPt2mdw3dJdW?oA&$LYprI@LA}5K84OQnU?ZCn3E>o*sR|DfiaN<^#ygZ=(;7tuSCN zCiO6A$n`Wj2kMPn*$=+E1XlrdX|Z=ak_v8o-!w7|=rkXF`D6dlEfjj`TaYHEfRxnq zP7Ep*6-2RvWX$Udg{bRb<-UX?gJve1o6?%d@R|6$xrj*-#p4haKWtB4ryFb4T+~C? zV5nk~TkS%e1UM${ba{e93jC~fBgz@Ev$i>3v+`$=%}#TLlC_Ftg;KbC zbK;D0mGUm<-nLWY4SekaP);9=y_5i&pUFnvpoicUZ7sMI8y`WnV02oIcP5z~mfc=T zb+mrcpjB?^QgWpEd1NqRnC7vO8szy6Xr1=4LfZQX5*tpHre0RY+q>m?8Q5bW2+p}` z6_C(tX&HpW>-*ZE$`lYNvZzE+44HH z8mR;wxGTbR5GJ^i(^ro;=>|52FovK8ZqvRE_&IpXG6L#h0;;=iB$0fC^OOWU%#|o2 zkYO7#n*en9gGo0VmNjvPrQ_BDR9F-`lDPp8CC*r{4w_5GNfGTq$YN-yK;8yOLKKd@ zLMqn=F+PVNR>`Iv%8iBODM|>V5k*~5xp{=d3?xXl(`v?IooOCrqhi85gqP`o2H7PE zbeIJ<|3<0^P-7C=h9=$yq!?IGzSwjwtZh&P%)VTU{gR$`CD^3RLm(8o)3fAoY>(YW-Wkg`21CQ=H-aVZ<3?;^?9 zzwz)_J~w2h$Y`i{=B8ma<}G6Oyp8f>f%|vAdF%4(;Z&234@@C@@Et+U3PdL3SA5RM zOZ(PAvQ^(*#pn5tTmR_WgY+-E-wyu)KnY+{2k!s15>O`z39_TY*4s(EaQ9UrY=3d- zN5t#3OASw4BLKs8Ic^Jx9^vFKB2mJI{*BChp?>M@d&Ic^FX(*fH~({68%3{lwTsLV z`*QV&6fUjB|NC4>(BxaBkB%ih!xAsd0$&Xzoz|!^t=D_-|GU7KArw3GB13-{!{e5Pre1X=XP40F&R}Bmeten#~g?2QvsOU5+deGa3LAK;nXetYi+GkQhBQ znXP9mi%75r@cAx!CI>0@!kX5W@_@|!B8J-(*JdXrAdvim;X3QTRuA7UrMzTA|HP_S zk4LG=VA<+N&vK1ALx1mlH*Vc1-63CE`^j$-)z6B@SYrLPfH3Guj5uFyJ@}PR$+B^1 zL`$z{-@dVt0)Sk>bToS|FgIWAbr6Bmi|Lf@!Av>z6nFa3e?7bNir`Se#jUq*$!C}9 zZ$DXdN=CbsrREuDfw?)jq!`ExPD&E3-lTG)Z8WYB>w{E?FlE_pQ-X28B_km(tOum% z_Jn)XA3jWwsaFq&XM>y6;V0|M1p)s+Xx%*&POlXkphdgg)bukGTD=^nbZ}{fcEc%u zl(AEJvg{G)sF1eC_Nnp`f{j<4-N}`!jn3qbP8~5_Axx&ay>ov>Qu$73ZMgNY6Ls^g z{eYa2F{xOj_F)_ANtMlO7nHtq!XYbfoB`6(Mm$rD<8;lum~2%w$EZZv_Mq8W%)yD+ z%4Zhv!wFn%z=Rt}@~Mj^SSvQi z2WepKAy-Ton}_*}OIBeK+3BrO(=0N|jM%&?cE7hEu5v1nee28i^tWt&4G)hZfSlsA z9lf4r87R9y>J;FaQA9T!Q_hZcc`?095s*>QITQH=Ra=1$Z)#XFplpT{_Os zwF5mY`JIi}+P8XiE@L}=(K2J-JfC#V5waDv9xr}T|S(Tuv}*4yb04aZnutu z7{ek3UPlNEHm{L*65c*k)tQT~=RLAFi|d!U)k9}o7`X$)XXD9J46U~>cC+f%*_ta7 ziM9}rO z1=j|ICo(J0JBp1&s7?T{P6C2bO{vrY51B52^)e1ri}?UFGh?zK4FD`=okA3$?Skaf zUY1J)WTC{I!pHSOiRc?lt{lW7%;8Ms=@L`xVf%Tf9;X@|bWOg7M${>cd40K-P!Be< z8&ec2H1yg>d2S6b8VplSP?VCOOxuWP2^xtWWhcjWV;SFas6__SGnk?5x(tR)27>R> z_%0o;jF4C#iR$(dPsgB1_9}1Z%Mkzp@#P4jNx0!j{>BkCSc7c0Zt(L#+KfOr1=&aa zrG|q7i&TwMyOvGucVV+K1|#JOj}hc1wJ;0A09Ki9=~;UJs$a$E#e$E9P+HwE8PFLB z!IUg(onaov^S5CzhPa6K`;aTJw$ooN$%byMND0(a=PF*1ILvolicB0qn1Gm&_Zr?} z1&;uDFh_UE5l~n8?CAt(QCcXG53Cjyg*wX)4VC2vuDiNOMpQWJ%(_l|PzEspLv0a_ zC5VsT;!;on;~b*0u)x*5QIO$|_~9Dy%$kuuV?Gz+ zECoIh5ODqXhu@RQef0T~zayUVZ(n;I+b+mL$BFm+;l||{?~ca?CnEE?7{?ZN`A2Rq zxCQkA<>*Vl_t9}{1Z4i{Ape!OHw!FW_@BwAts0Es3U>5|lY!i5eEq{m(%WqR>+Qy* zLn3#E?9OoKKV97{{qn@PqMYu@RDC$x`^0pvI(OQ}O9>mA=Goj&=7c?S?KL@_99NH{ z9TM|*k5iw;4yReU-!5$9?(vdzpSE7heS6p?`S{B5@Q~EcW(bF2@eM|pq!y=8gQ3CsgwI%$`jh`)FVz0bWwD0C7R3zJbndBuA5<34AsXzbU+W&d` zb$@no|9|_BFJvjDqo4hw)GKWO4=^H_;bhN*t-yE85_S0#4N|g}Z2S6X`80nn!NN&a z0^nGfTbKB`53$qx{7pemrj-iGO(}cg~VBk zQ#uzh#?G~^55tdZE915mi(rXm&SVaGQsAqSzv6>goI=F4F>#(77RCg8;P{t23x;t= zk-T}`{hoOnJ%YEh0Q6|2k=`1&qC?1}*tn_RpU1D3Iwd$c8kV!{;v%8ejGTV2cm4TD zS~4%-vUQW=$(d+eTetu7_h)YsZ%5v7H;~>~YR2Z*W!PJMu|}ksg{NOrGX4Cc)Bk&6 z-v0v{EwtbJ!Nx0Bf81ZWOt6Xz_N>WfQ8pvEfFuBn2k5v8A*(K9_>*0#7c9Hy>EMco z5(CCcugq1r#`bY;g9DOrs1&i%v;kB1 zeBSEs2gdC@|K4ara&}EpFgHpw!b!pcW^#coHr5*xFcR?C-x8M=JM<7IR$~e14@iDd zbCdN&vCGUSY!K(Y>?zm5iDciXNEgqA zt?g6)v^J^f$^AyOTtO24^2-UNX_~<3V(GllU25hX=Fup%niY6L?}6O|Gfr?zv)V`8 z;U&t$x+$?vF`B-MMH$QMRsA4pa@B@9X2;kp@ zr=v18Y&pCA0tRRRX9jGW#1w!hQw!JMKeCZDbvy!CqVk+*EykrGtcZ+STpkBUcq|1e zbd`@oQh6TXnRC8J>-;!5yfRPqQdcjBF1Ofr7c+?-sz!Q&H|Or$Rnzn;kiqpbAkYb&}|>o`rjHbR|28P9*Mz zs_#V$+H%26U$3TmQ;TusZ=L8@Nsg1-GheW>Ox!gj$v76$Z3xqnPFtPib4$47nZqUZW^<((8Ocbak@Rr_ z8N@))7uYmfsImP!liTeCjgGA=&#LeyI-7lSrOOU-Z&eW32&a$ZHWPBdn!>D-FE^Jv z8xQf`ZC98NO|RwzNtPf2aQBl*(SV!+7d&wTt!aA;dMU+Jp*MoEIFIyLh}AGGuQ8M^ zIjSz(*LC_V$t}MmcIvj|wGzZ&+i3rIoJ_j(= z(gL~#B;{eyO9O35u~$Q9H<4SlZH0CP5XVvfHWENAIe~x#S16K>YB8d+47o>%5ecah z_z)4GgObaxMbGshlgD!gbTSk+@-RKW$wI$ivW6!SiI@sH-`vG2*(xBq$<8Pf2+*r`a!7J?bWEFQI38mB0Lgg(d4@(X@Xj@&IyruMCB9C^-VO6N(79*&0xPft z$qeKDOq63wE!zq_r`^pYm+?*5H^_QyVA%tA6~6G7^MzwR|ps zIO=eI+yOu_2&SJc$^T#xhUJF#GW?+5qpp2rS%WXJFVNe?+`b0Gj!`udjE2z8%=)((ckr2o5@!%t*c^Mjs;S7 z^Z43(AV-MvALiO?wL%p>TFL(LS01If!E0A8%frVH^C%N6w{5}(3w^*-!pMXw;ya{Zt@yTa+u*KlzzAtDEC)R{l5PT20B))dR zNhTc_3DENa(Iv)#PWiDH(Pu_slZ@G;WLlfeSM<>UZu2o7tMBpobHhgFL1{xgN#F?FnMlZdz^3OlC zYspm{NVjAtpvmBX-o?1=HTINuURK26(e#6}x8k zwpcCLK1G5Tj0&)aXst*eQ=^0(FE0Z2pr28Z)58UE%`;8BsCEwu)yt9giAm?f7%qXe zeQ|RM5Us~_Sagk*tXW0Rp3+I>5+46@8@a;hM3m2i_q0}Z$_I%1LpeCFS|gGuBh-cLnh z-qAQI2LUGI!$APBa9SS0i3%{a&qYjr8rw^E9o*5(EIB)C_dNeXng%Sq9h^wcGh5yf zJV`V@+=98!KNQcdnA(V1+rTAuo_$;)2<^-`7b^|tcWT^Hyi-9_vY`gDVa^1@3ZP8R zp22+7h-Nio4l`Fz;U`oly%!j9uUA-`J#ZIGY1KGNW~wc{ludS3Z=f;B%;0EtZb7yr zAO_4@FhAK#G?w>pA6@@e;+00?k=Rwb#T=k*d zym|s+lmLMVxWL|F<ZYX?Tc^D>{-BF!CN|(c9O1kpDKuyAR%qWxH{s;876|K zfEB+1#~V^%i5#X}W-N8RzS&=5V*Lmkw-sqB?DZl->U2R14)L6kVl(kLF`u6Vz--5@ z7Y12qWBuH5>TtOqJ)c@S>nK8YGigsAht&DG*%#9Ug(6TuX0ue%Kp-Z~SXFHHBS>+U zQ>__AFS2PKv`2$(;#_8<2lW^$63uZj9TbU2N7Cgr*Q}U5tqLyRqd>)Kl)sgJ>gzcpz1rQFrEM=2;GXRF79n z<_M6&YTJVj|E(8$ulA$R!y3$YzG$=>_jnCCWPNhpi=TTn(cX8|>umvZGLxLP3NE9Q z5C;RN=>3d=%zlISMurh^!c*~|bf z+)w{%^mgapj%tI*qdSlM-~IX{jcmR3-%lDrC$d0%1R)NDLSXZG<>grB;P`Rr>q}Oe z>kOds8{hpOI?QxoDdKXF`#BSkP&;J#bsf4$>vQk?)zW+6QKyf6SL`22-~DUv{OOVP z%!+E{uDj=Fg^v2OKk}Q!t=R25JE=q1n8-dre;8L zFu`H0PH_KgcXoH*{EzSVJ4b&x3&;u8tIq(^jIleQF4%{UW($q~_VcI$Kli0K(Hz#+ zGizmOJ)0WFSKvzzi1ihP1_b7msb}m`!~@3$irVj)RoB3S*sKJRfdX~JyE)`tW^hGH z0wAD5g#kE%t0m`AfA-Z!;%WEfM_&2yC;1Va{T0^|-y&W`Jv#ADdpDRa3Su-CecviZ z(UXfd+0eAJev4Y$=u4&SlU$~^{cc=<{@vc{g#@fv5eK|+Ajw@0AA@1Azwi3=VSbi&AbFGr6-(+m{On_*gAWmu?Tv5}Acc>*P&5LkD8U z#1^LNscNI5P4#NT=}EJzBXUNchwP_W%r@Ld)_hify8geG@tewQx%|}We0|TZe){#F`L~0|jXx@)KldYl`_;;8&2VSRNqF^UJ*drF?QAJ$-k2-i*vHj*I|LX@=DXp#jQOQD`Alyguuw zZ8X0ai(`G51Bctrc-o$gnNalNCrMjN7ZVLHjI8O)=%hw#xtxYJEl zw&1&?vnLNHoVgs5ZL}+Ld!)Tt#Q+n!GomLYnD%;{Iz}N%wYXmOX&Ts?UQe_?|Ng(r zqr^8S-+VrnUT`Gz0wf;7h=)iFh7`78L9+-vlD~z(%%|yjS6E3xjm1$GWkH?;l1GrC zLl)*v+VCG9X7w(%NZ7Ag(xx21k%rmuzJ!Cb%Z9(jP zHg&ZdwWg!qdaMJM$d_5zo?GjS6Kx(ZS<0>{ue?6F{c61P0i5+s-A!|W_MQ{h#An^*cck# zNp59BK1)p0q#eN{ek;cbwFihfxABJondQ+)GOHzlF}RtZe`gWdI`>#Bf2?)9Qs+y| zaiMpeN_TSj%s+93r%A8fX(clf(*#`^SYjNFRAg><*(_HnfBK<%8J-&jT)mFx#Nrk4 z0$hnkobsB8+MH8wJ(A#Phr*;#!r=lv7^@JZ}jKURG(DIorS4- zI-1bkza)}6>&UpD)++`jY~3Wwd{kt02eCr%V#moP8VAQd?(8bY{JQt0`0S6+nO7SXAJt?H8q_W%cQG2Bh93`bqBJ@ni)JeaaB-cWix zKn)+Q4m%n2mX3`9V9hG0ZL1WNArLA@3S z9n2gGN1kfN)L_H%(BOX6I=p~6K3^ppKa;49Bpdq;}pE8jB zmdA{2M?HxE(gS0Jn%$wIB{3xQw&Sbkk|_qicSOjos!OvGH~<8iWe zX)My0mtVYjWzo%l=dzY;IQ&|)dyY7;2CEGA)OtF<|F}1s?WMkCwBeBt#cZvc%I5POS=3)~~?7`jg-M8!PPG2|xFjU-{-Q|HaS#>wop(waWnO zT=aSN*=b|y?K)&B*Q0Tjc@#!`y>DqK2t<51fJBd>b8@F2C??omxfQd3h*ek^ zUA~@*rf_p~fcHC7kW(RJMNQheo}RSo%Aftk7xkxi-uc(R_Ju$H-T&|P->HB3H~#1= z|MB)WhckE9_;mKthk@^`;6m}z$N%;-t5OK2F&KiPrd==&ECFcmqV8uo|HWQ1Tx=SX z6P|r8VEn78=rVl))3GzEM+3fefZx{#A53<>GI;ywHoP(TkN@!B{qMnpdzTEJSeRW* zQ=MupiYG06RHpK0u2xqzVZ9>b^SmZJD5c_wtem%B=a4IJ%s-CCo}CV#vF%E1ebi{4 z7U>ruTRd3qoIa3|oKo%L)t60jri)dluJ>hKPj*9d;6^i=VAIei0hY1E89PRxIqI24 zcfyV$A7TbNT#3SwfD~k=hr{t+oXwU47O;^#nm;(*q<|DTZ5GF^6%=aO$bfCfG zl18@v=)oWS*SBB)$uG7(ckq?J`ae_Toc!`H-6%roRNKqV1lGq8uat%z2}r~2*f5#H zr8S}xmQyo*K7nWq#(JAdK_%7IaWm44YdOcQ0QBYr*W;_)yaq6H%jU{s#3XXzl!fIe zfGZ;}Syye`P$@5pwpDDo8{i*& zZyituvQ0L_`n!yOXzXKh7C1a)9FQWAPK_Ph0sGVz0Rcp|H4M4F z9iLrCbsyP^=kiRiizl-a)FtS}`S^8Wdim7H&MfV@nIU!4u8rOD#)~qNbO4K|k4A%7 z{#`YEF^QyO?LRvfKNS^=(f1D478jU!VV=BCQm{MG3vrv2i`;t;n448*i|}ov-fLm} zxx+G8ZY5Gj%aRj#!XUjBH4d|GMy&69xG792V3+6<9$rENGn&8HJV2|+p%N_T2cdDE zT-IW{n4Ryw>qjnZqp&K6?;6pY_-PBWN@+_}VPAz+VctAtN7WQEc7|a~(K6JAU!0K0 zj2&NQb1E6XKk9yT)jDzViHI8sv5E4L$e!y&F)n3(3Cib4bF4dQ4g!q~6V4Q(IB*2I zJ4pH|D3UQH1oHs4hvJ8fSEimsLziAD;{weYY~P3S%=tkre!mKTiupqb; zTUM!AN~}$QM7h-z^Z*+GDvYOFdZZO~hW-9Rx?lhdPSu(~U2jpH8z|rN6@XYIL>)vs z0kRU$kn#H5OnB(1?&eqI0l-cgif3w2zIrq_wH$h}-`!8-&jXx5k5Q4CnUHwvUAudw zhsZR}llH>ljNwaN910ou7QoB;V^~RNwB%q>u^{paG-1yn)?!A8bA5AZH=-3HB55PwH{0bfWsO4`F+fU9IG-KN2bn}pAj8u>Gb$-7ejW%Pe0#!bIWY>5 z(#RR&MokI?P!YrD6YQDauR@Y#8jmd@({Y8QdQ)xAgv-_mG{6oQVLAx;Bro&eO94ZN zNs@{KG;`pIV7XaIN z>WVO*B=aZa;QA$&)yrpzMv&%+LL{H5F24`zo4^3(tL zr~iIb0`dGRb)fuZ`-?ryPKgV@tp4y9eh2zg@=2O|<P4fzI8+k(WGI^*~1gtp+$otA=}vNDlf!ts&Ue$=`ifr>R^uhR;`!7 zF&JBqEr0cLtl|ZvsNdrbRCU09@>ebb7yzjhfC&|VT5_k{&qQ}gjw~H-AgpYq;K6xU|Kh}}rx!Lbvf`H2V$5(&k z$}=DTOb-Sy5%MMp#W4^`UkyXYalkaR?*dpiW9JUT*l#D|c4p3qWZw_u$%v{?~eL0T(NLb(bG)b7ypI z@r5SbO=}X~@dm6}rsMANkm7RtdD=JiVuTxFg_v&CxVE@bvx2yQeo&s4$LuhxylAN3 z#N@;hBp4VSRWqv6y)GuS@%V^O^{6B7RL0`_L@tsH(}3)2K$XB#A*G*SvBlyeIE&~9 z9=DZ&+KzX-!~UIT_8)1BOSpt}$^9%W;cV8{7AU;s>E1WC|M0;dFyFd&x4rW<9R2Y3 z|Iq*Xr~f6s5POM&xqbGYz)Up>At3vW-r7s)u+8`c%$DJuVI(8Ndt_oW1$!J+Aa`5` zJC>n`*5o3H%@NyA9$pZyg-lw3nKT{y&f=70y@*CyQH&_ev5KoU2S~*8_Yx4-3PE*( zZcFEAhnKjK4YP387f9rzH?EX+G;6nci-0XnQW`U3_o+SvVW zr@+DtUkM}isprwA8BBecC0V&S;Z&lUiX!*I!+50H7E>z~AK%5?Qf(K@^9$%oBeo+B zw6wOWl70nTm>HJ}|81t`9Q$xbUcAEPp_-H^`^` z!PetstG%3jQHvg2^BFWn#qK%U719ku8Wvzcic|E^qn0HK+wZA@?De^7z5z?JE#llu z88C;Hj0Mz_Q91(&!P}j&#Fd@6eLiiWE|Pbn!6HBH^3s4YiXnT4J}RGIs*MKzV)DE` zR_;>bjmXsCZJaR2ko-hnQ&WkKI%@}`q{!|9lwgk^UFqQYgiQ6Fd0v^%*%{75vQ+h; zdl0~4LcuMrMbD}r4QWQYpbIVsk$a9+?TK-m>}5{C=4+o0&`%u96+c3EQ!UAd>+wCYt(4ikkN`+W8!5W8P4U@d`m3eNru}m?TOx6N)`q zigqH9^Tx9|9)7+Rl$)^}$7M@Tmu?2@X2C57vBX_M3D4^`r zVYT$CcG@1F*C>4jL*|1>(pNeWD;~@GCy1Q3EpF)z550zuMx3+S&dw9#L@SJ3;xoYC z-5uA~W0fO(>;YoJQWk3+kHsK2MCT-Q&GWp37GMsYF*eQCGvb;$2M*>be(JMo#LA<` zLOtu|cf01*;!vODv64qaJLQ2w5z>|sK(Gi^NQhvC4xQA7?}lMcX7>z!4LWL&Ey(=b zsXmclahhnytqMz<(={>^ZchA1ZZ-5FvOewNpo!ojWd9s7J+16V&GH5RSuZ`SK=;l; z&RqFJc-OwW`q8WQx!yv346=5jU^iZ&`BqdLF9;l<(^l9l-1&?OZueg zexdR85c|jtxBWT%#FNFnH{X)04-||M8X2|LAvrr$WL_ z$U+h_!adH=mMU{>iN$efvRox{$6*xPXop zAqLNTBOpS^_dIQL2zUKf({?|kWSYb1=2GwotaxV}#~iP^;0$W_&ZyR3_WeZGLmjjK zK4X?w2c;3(mDR`_)u#(qb@f<#Skg{vEWD>_zMr(|)pg(D6vRsr@#(V#cKE`4w$EymRvVz5ez zps+FE$LCfEVz>a)AdSBp-qQ!m<xK)B{TiZnDV%@;j%ypSO zgCP-HreHP#dkazf;%7pbU`?l9?5}kfX7YnMV%Mht@pnkJgL2Va`aYXCz)ga?(hA{)<<6ZWM`&x4Km0(MCB!i zYthi3!XTGseb{qd;se6J3dt-)RU>gox8atRf6nd(EU$GQiP$qVab`v$*+UtqHtd8% zO@b00LE_Mp0h>X&OgB&Tu7J%v0V9+VnKiLJEKxva(Z-FZW@mFyR+n1Dyo#_(Fgydc zqlGt+?p`gowZM!K;z58$$X-AZbcf&1@4XD-(R4DbB~HQiPXW^~EfsGpO4}#u80v^CA-&8sMn& zXCgyD=%HFSlEJg5v4n3Ix<<@qj_Bz+s!q-GSaP6v!w6ms`1CysEQ3Vv1zO}DsZ!no zoC8ThoQ-XKp&yCsytEDiwCj70<(mu_J`BsV6_*E(` zB_Lk67dNn5;r6N@@YArpmr)wbmpSHw8tqicx?yjo#8KvyTClBV-Y(v5^TAV;%NB)0 zlju#c;9!EZ?O1FXN3}r+nW&T6jg0GN6ZoV+5!Jz)hq{w<6$63=yg#HGkS@23^QVb4 zfEuma%4CtB?mmrWEPTU+HZr_1E^`@B2FcB15Lz+qd5RtbZYdQVvJ8+qB<6}T^88hl zP0i>H2q-AO9uK6x_!RUADS}WOa$Ae7D5Ha}W>!#`12sLtt9@yI9=ISMTSL$~KCCC{ z2;xF~gxrLyFu#x%a2QhrYqfZ(g-ARAcN(9-@>DP;{^h6G(r}0M;k*aqgVq!3o_{h6X+#J&JnEbFGaTv=CjYfSeeuL1O;YTZm?Kxjc-d zDQd~h2IVQV74w3&BlCcd0^0&Rvb5wMCuljcFnj7VRmGOEq8)xBanjac5PhmhjZUeG z#vc)#)hS`ZLZ$aYdWr(XL54zafWycYFiJI);i;qM1+3Gf!_#SJej0Bkf>mR*Gly?5 z4(hBbYN2Z*MW365e7GQj<`?7Q9A$AmeO^j3FppN^iHThGX7K>az{!%FMyfu|2{Mov zs@{i~3i8+>!gnFgh}a zFq#c%CehZEgy1DtfnNs2oj^dtmHm zckD5w@?j^=bHYbS@zdkUqnTV0u4tEXuYBivX)|?|`>w<}E;*!OS;AxZaIx#OwAdgm zOkAK41zU`Iad>SH>-&!KE=_4uXduf?{IOolqPc(~zR_;8 zI~a{`P0l*}LVEFR0j5UI z`*8=QV$Yr1nJii~qkXw+7M36huHj~pi3OMpQzi&EutPjwO!xZn(^wfppyfI~iKJxXP)Wwg z2q{xB48jt9OQr$Oatm63&nCmAP8#26<3#_(meIz&!Mb;+2z{qfIp>UY^V%Gkm5k{qA~oepe3D(ZskWY3X{o7vqu)4CNNl6ID{=qMiC6ShvLo3R)~Gg=KDr>DjT~ zP+3Uxe2enQrrn~ctEoT%L0wE`P8u~KcI0x&Vs8%PbwhUI8bEY3;fl;XD;BA?mlBrh zgMqpGfwC4)>QI9^C9Y(&-Eth84posFB%(zw>rh2qN%m?)a4>^*BX)%YXd_;>^#i5inw+hrQ^^eAnlGi zX_3dfHe>?mNq}LP`~(}zBA4Csv-AZ(0fR{$IGNKtH*9N-gv0JKG&RY>^*+9s{_{9W-es7*+C9680234hNUfRH*gBlgQ z$DAfQ2x%XaSM&YaF1rwf9VZVSZH#oZps51+~C~FTq8N4Y% z841nN&?v7YaYFGtAI%IwVQP;u<>e!_HMa|;h$_sl6T#qCv62AiA~pSXBw3zE63M3k zn+o_GlrYfbxIEBonZ2qCPvbD5rin>pQH0f++$n>@%?^&p_y)c}#2N{hsf*d?C%2!R z&##$;jusWt-OS7YdQ;#fhugRPpg>ESg29&6Ag3!B3N3@P**~ zw%#t8|xjO6V@ocMGzXiiszUd&`$IK_SlMdC7DaY?)(>We^pOqGGq>XA9eXx680|$g%BnlkIx` z#coO&=Dy4=T|nEhbk}`kKAk>C4V8_Uw-+mg?UVi0q2F3juanDbqv99o5gJ&uHJEfh zFhEOy&w`*0KqwlJ42=PPq5zWdJ-`bY9~zUxV%pMhVZH5GneYsN5U~-QEW;JAEoF1F zciQQ-n`3?w9~jPXtT+W6x0d48SItZG**D#B5i8j_7xSm*jc#jlhCh0r{Z{8=Pu+jc zeQ)$r`ai-BzsSYUCV%|S8`G6~{175nUismh@yulaAg9hH&IFP2Y>p6Npmdv4JYP*k zRp@6m4AP_7m)Unqns5h(H|HbrT=Xx(Vs3VFC@;@5E8ky^+?(yMeFGV?pWFMN_)lE@ z?myUf*Pd4r8>QL4(<%-rS@u52mhv5!Esd3sCNxyt5>WprC9dGO4qdH{DFo)9^eU58 zxV*&G4w~&$w%k;aVbhD0gjrlWLaR&D!JB&K@@(~4$#|Ef5|`(svw;jTCXL$gAjNEG zj~kuyIf=P>r={kw`wqKu7orG+1@BgK-;b_x)nGZ(8RPi;v;;Nbbt zIeGid@m~v1`nzw<|9t!OtbO5=|L-4u_|ivz$n#XD6}Yl6v225)(DnIdU2Ftax8fwu zXyss7q@&Zdg*T_L9;N0b@R5ZOfrS**ls*?(dfXlv`S_v-`E{6sl03-SF(flN-c2Ww zdZbmLBb>9?OFKN&!tjPOD#w$hquIDG?X7~4fZ@m6>@-u{%8chRv;aPLG@qljBcA+Z z7Pn`nB89VP@}jM)R9;c;2+@sbfXsZGG2a^+x#y?e=bCjEfGouh>Mp=(l!lUP-(NT7 zzN*MbG9GT%0aC=Qk5YsSnMG{(q`ht~?`{mV4+fhN!E1H)8z`U^-lxO^FV{C6O>Zgb z{;`V8#z@AX1S^rrChes;?N8fLCU=e|u@xgCBe^G=jYS;AGPgff%}f*n&*$?{HcI1DzCA&)5P1OsByA7F_i_u_xHH<}8zN)f?Jg5uW)wj< zW6Wehe0xH3SoB!O0x~}BWyNt}FCMek8fRw;1eN}>l=S@v*({Nv2P1+kL7tg#h@kaE zqnkT#5C%CiBzfULrXvm>1jGP1Q2R?hA|e7j( zG{MCrt3N*0Xe(klQECX#hT~XWMprauh|^skf%lbEu>@E6N+%2G@1-p)SD)QHY`C3cyLq$v0h9nx3Ndg-MrEXX9*ONcC+S=?4h{^G5W5CFGwyieXl!O|7(Bka zJhW!kVR%MH*d_bJjQ`2z^B*nb856~WJsX=iMr?-QJFL2S&B}`f%MRfXPU`j(m1zdA zUor>#+T&gCa9(d0b9fIsG_vj?NS?!j)j5S&Tt#*VGh}qDKQvAb-@o(r*?)Wb^UnLf z)crqy)A-usTJ#+OOmg<)_cdX(fSK%`>fcS)*|~dm>PREX&oQl!<~g*0x?I( zXk{p4F<@LQ>?n^YxD$C_Kh-)E zIUB9RhNVp=xdaUvY-PezmXvuk4yKFib}-JY(8{Rz!o#}}QO`EN8J7@>3pGXC`bgq? zf8x9T(NF)vcVRc5e(5*E-@jk{#QoAIFa7lM3kCG`TlIgv`_6spwbfYi(*eL30Mgn- z#dV-@3by4?J9QVp)^%`4ihCB}jQ92r`-|eNvY)wgaNAMsCx;8ijry%Nolb;VACnKg zc7_N?HZ)tG|IfSs=GT5nd2;q`*7)*@{cWE~YM!gB$ zjVF&(V74I5`2&~Oh(PnHc=nNeRZo1HgOA$%7zWfBzYyEJd6&q=gVX~3a0@-HZd7wO zT0!#tj8G&rjyOC0w_m*fYlnYo{Pw^6+^>I?*^Muxe(>-Abm^l%_Tp1~)0%AO;ZbX% zQVN=d(QWZj%-WVdxcV;1v(f>Xno|lrIF1o88^RT3X3SuB0#JuQbZev=KNnwjz^KCz zj^Wh5QzZRR^>XVgeKMzE2mt;qf4ef(WjMFieiTh)2G%`UQWv*<6U>e>anyVXo7ZBS z9I*R3$}DBj#no7fIE#`Ei9K=yhIMCDgVvff&nsZAvRE2z;>>GBesY$LMK@Y;N%BTx zt}usHNwsqO1|-HH4K6M#p>{0A(KPg31Ipsmiyuxs8emuU(|eUf$v(>O)@cQz*#c_J zFV(P&;UqPcqU9R9xDv5_IsJ%m@tWVipOwi(x0~b+QArC^Qt`$;ggn6qIizPCVsgsZ z9)#w|)F-gCl3t#1DdI>z$-=U@C3K^aOz$pC%AN+V8lJk^&#m5~^RcNxgA=t#Y!XLa zugoJ_&MA=dMLx&Sl`u~$^ZvLn2OinTlhIXRBzx3`*6Dm-1FFMxI9nsbUCM^6kK?qV zw=9U|0>4@ijt90)V?+Yv@x)5$p03!k5i@{TBmMku0%?TIk@0ojuOg1;6 zJ)5coh~55)J(&s%0a*%gD6(Fh3=*)?Q-+ZVDDX1ukISr~;c(G%9R}!jYQ!z`2y5Wk z$pX?ch*~UpPZ+xzjrwj@MwJ3NI3twhc0rE{BmfIhdIp51K8296e{gg-h zR-eV?SgZvo54f$T&X^CHMkvHoNgW4I{aM(YLX+QnZYOheVCf7R5+$|lT|WlzNOO0= zX*mNFPxN`u3xnbmMIgcfj{upV{i6XU%)9C|PlVVU+6(}XA0D!y&7aN^3WoWSY@R-= zfOOK2mCb$ng@Jnn-GraM0~(Y&qzG#o16Z^H1PYeM@a~vr1jxtsM>a^!Li(LHB##LZ z5NPm1GE!|Y+GNs(x(QqtBmyl6PwQR(aha%8LE!0R461d=l1R(|osIy3cdf*nNeW0g zrla$Fr9#CS>;=u+*@qMF0v)Vt(44}sHCjNyr0Zo(E+D-u8(J3FH8>)K$nIt~V#J2v z=lCcii+mw~C?ojK7M$@$L~Jv_K!Tx>=YZi^r3o9TQ2I!oQ0560_^4FH@pP?QWuZQY zN)ua)fDoGiCCGF{y}GYx@@YnT;o2BCm z3|RE=-s3s;*7@PoOvr+yddfQkIfV9uO?8=t#5xe8sQ~HhW#HK=k??lUP@ALA73j}a ze>C1~yz`yk|1W;_zx>8ufAy@75v9c+k5$sFK{j-y^?lyc93>>_N;lcUolG(4HlLwy zV9#OJA@=^i{e>S{|BCXd&wl38XFqY}6RYwTdbqAR(|H)x7VNk8x{ZD4Zbp{OQj4&j z+N4bRD=T$kL_&NRDy;aV!`{kOSii$!6UuXYyleABjoYc9j7E((baQC}T@A@%(G|x! z=x<9bG^A11#(C_+{rPYhY0lY+Dw$?xb7`$L5W{cbyQd4U@dg@+KX~%N;ra_;1`D>j14b~<$dEBXp8>fILk`S2mD`_Z zP$s}UnMtxq>7lp7xIA(+tH)sBy1PFIT^oF8i)^U-N*=l@OygSeJUkw)e8udsw9;#I zAqXZ6DB}} z#-05y$>!h)a?uy5U;3dRNaTrg9ri+>EeuVhkNe0haDw-+Vt(2*iOxA>@2BCNWAJ3o z@9vFCATB~thBz`8ZQftK;boX3aF!hC~n2`&~>P zdECjWKmAud&9E|v*UnIrcxP)Or-`b z6I>~#m{MaA_<6~I$RlYqF7#9USsZN#pp389`79!gb3IFu&RihN(0;`zF+n_P52p=x zntQ5-Z6feVdDe%}fEqM0=Kw4-2g!<<4xt+j{SjGcatYHgZF^_pa9n@?2{ zh6M6yf0mB1lYv9&a?^|GJus{Bl0QswZS5q2mOka)=kyB#5kqH^+2Nsc}9uMpZIT$D&FkeAw1q2Kv$}jK{ z-iIN!dKUWBv`#OLmiKa@1;)4y0Jem6fmb(Go=5kF5UX)f!fLGoZt<|FaJY+rz*jV{ z)lppeuH*F>*vDYwgcD$x!C;vJfgYoK5X%OlX|r7jX!wfm)6*!7p)&)00A$V&Comxc z?VQb(*jNV_*@(|?#uIWX0iu*{C#_i4#Ue5EP|34n<25)rXPm8{1}wcg1c%e(Ts0d; z7+af66ypGdV-5dM@=juL&{=aUuHMmNH0*RCwG0eCiS+UM<3@A_T~qFKH4UaPTVC=V zv;>USK|>f5M{mI5ls>&kdE>ri&$g0e&l)$CFZ zWiv;5FpBAI&l&RQl+XaIadUQ0bYQ;swpAtpL z&9gXU#g4ZCr`+9|xEY(wF`8y)wc#0b3HMGc^ysvbHl}Q^eV^Zoco7jr2sm}YUGL+n zS{2dKy0H4bwQHb{Oh{y*&%||0J*!s|18v{T@?>$>1e+N;eYaIILxZJ#Sb*8cgaiiu z!9X&w05uaX;1RHJI0WEkK;_YxH=OFw1Z3uk7v-6C0x}WYPh^J2V5{ztOUx25?&Lrp ztBy^mTe&P<;5Ys0aI*czy|3O$exa#0BS~a@n&e^ufTO}DG#7h)0mU4J1MI8bm_O@{Zb6X&1#f$#bRSytQa z{fQJ?d^k)+ zrYm}`@r%2E^q+s_?H3IHM_&7E5?_J2IVnMPJ{(v;uc93OvSsaoo1mwVwE7z$?#ZDo z;U}$yz;Xd_zzBTEtIn$0Cli5nW>kLktrn6KcrZWm_43bUTTrh(cT$P6+oW6T!YzMBDO)UG-e0r6E z;Ell~5)<~9sq5O@G1Xq#OB@X!yYZ{Hiw8ZjCCDpO33*oC@ehul`ftAZ^qYHM{VQev zWT^yqb9*MmB~R}7QJ_JpL+Y+@VG)o$QI5kjsR#Kz>;lYj*qOQfDuyXpGwL4tXIX$3 zaTX#@bWHG`yP3Fo?kE1u%}a3+_n2t_un(?f>Oy=7rM=X!E-=Jp zHZM4?GWCkR_I%PGjkD+3123PJziY+!wHRY{#uQyhq`7C#FQtjMz3$d1_GI}!4c0TS z{qU8IkA7!5vi=F{sm8y~gG&OFZiWGW0}OPjVEc*vfY&30T#VA!=$Pi(Cgu>@`L)9= z1L8BWs%wxwJa?%->M9y$ zgw1Ds2{~g<0~S9`!;KTbOAx6huJ#+;YT;K`Va-_#o($b%u=$-?Am&&Q~m0fX)=O+qRJbfu*d zFrI+q;-nE;O{qOY%mo&k1l%N3-vcY&=sPM`^GODJr%*S<)G3G^B{ad0Lu?cLJvcpv zT{JgJ1GqGHN)NMUCjxf9V4T6g8ZCsFIBKj`K*XoPv3l*Cr{uwoN0=S(_fNJ-&)vnD zW5MYHGVBjGc~24t zmLyHb002ug7`XZ_8QM^QnS%JU@s_BeIK;{QwA(?`XREj<*-dbmBZ?X;u{2xsCf4xz zLu9O@>iR4xnKX5B;{x2n9PUi=^rM3ed`Kivgz}>^)dM5Tt24cdwA;`c3m67wgNNvx zt{x!r_7Vk*ByC6oGk0jWW1y#7#h%T?1p@lxDPRW_--k4oFDf={3N>zQ#V9#eIz(cf z5)|rfv4ziu3TJt;t`X*#@l-Mi712Q`E?a)GtJk}J$yHLPc;QwmQa7jZl%$w+(uz2i z1=8mC(kJ$P&uOt zFWI>_VjdqBLB|h=r$|V)r8-khXU9)*M>YnG%bF+3Ju0aV=X3uKx!CFVWULp1Tyjdo zLteHI8z7&0`0_4a_ARYubf<9Ut*ulrI88cr2tN*?+>X0oj+;le5a zx#BQ(V!sP#Gk|_fzX&*QE;bO1KeAlNM}2)W$VA%A5Kiz`7s1ZPU?FE>j@p{eyyP(3 zAEj)~Q~TpF><>I8!V-yCO9yh%j4CMi?kt=b>z%ZZ!XOVlMQkbVsS1YyPn)uF8B+9o zx1+`a$QM2Glw89(`{C^@3uyo*g3~Ny$w(;oBA#&6Cg3U4&7i!Ej99P|Z2TBR4UJ9! zA{9@S0+n%6(H7&SPR~8k;-?NwHdr)kfkRe<9)x521yVzqNjPsHK??E4y4`{sXj8X2m@Z>O9mtupaIs` zkLEP1QV(^J$DV>eu`i{=D4M7s#Iq_R=1s+G?83y7q29I=Pa~ECjge#!&~U&Xo6D-J z#9-Qi!&IW*(p{+S3*M<9F)zCH9rOY~0`u4wDy)H~Le>Num0dhpszl#8Q{O%K_M`o% zAASD0^-cM2KYzJwVb!fIn~#{zl*BqjG^pJ@`QNsfx%!DFMRA|^9-hr4WI0`?%GG5loQ?=1QQo#{e)zBFmi6VnpL zE|ZC&!~3DQcvw);-s0BG6b8r9yIFg4zbKQCxaH8_Vfn0qsq% zsAm*QVg|RsNgx)J-U>BvBC|Xoj0g^P$(;<4+wU2F6E98AFV0H&jux3U2OS3)4wA@4 zbkR7pjp}K^R^K70S8tZ8+MRFF_>nbR2*rJVc_k(dG%=c<95JI_oH-$aOYg!>I}|5tt~8dIs5B&&3vBLCgj5N+DD4G0%TuH5dQ#pZ=-)gN*6Y?3ez- zZ~n%2e1GcYXBSo_9VeSuF}(|G6L45yl5t`ZXh|@l=CW6*fwn)E;}qNkab+bU?&*{)h&N()I67k2u@Y&hWqR$Z(f;;-@J^jN!;99*7Z zSZF6jD2;ONI|Q=?#%}TB$Sh^Vq08Z^3Ew+NDBy>lAi!47L*pVcm?|VSF-uq{29THd zCywOWBr;e4=-?O0RcZxh=tSpXgnn{3w{ZWe)1CGreKtlB`%e+fBG8)MUQ5$5Xr~O# zqbBkSR>to1U^A-^op`_s!VZj{{tnUQ;IMlw20zm{uMY z3YAVTmJv?W+R{RmL3k^`%vtU6P|h2cnz$VIA;JR^@Mql7)n*~cNz?>@hguXErA2oo zWKARi=_rK)1CPn-RdjqH(>WHvFYEZdWX5aL8#iW$y@SnCF$f&HlK_~=Nm_Hd1Y*1} zQh^j8OBX#7c!hmynY-!w)2+jFe{Ed%$?BTigIJ;jFh%Vs;w?TovQ}2Qdc?mkMEP-X znb2$Jr(+e#VV)Z;`ft~>NpW&&7*`d4Ru)cdu9)m2_oeaWu@v%T6o~@<*2sv)k>N;U z+~!)$I|0F$rWzo0V~vU1=5ANMEQMXVHS48UM|H{#0(CN_Gd0PId&y*{e9RmAl76ZS=i9SmhL4QZampzi78F;_fNjIFrLK(xSBqYV)Og9%6 z>XR`QSxO6;W?dtFz*C#SD@D5RF}NFMDdsbi zKyhT*ScY7GJnMQ~HE?1;*gS|+%a*x_#pNTQ>so)}#nSRVK!yif=ulE2007|-RhCKL z+IhS0dq*3*m0ZSI*W_83-gBpGQsjQUt>7amwsgs?Tf3yR@O2n&LJAp~xngYG6Y=!o z$5QMzzc}4~YpD?)$05>pctSrIM5>ZP1sL?9f>jEJ{N~$TE7Rz{9rpc@7J>|qq z-I)F1*{mXQ^xJ>DhkoWeK2{FjF>lQdoq6maVixIEL=oO|_;XYxP)v8$h%mR6JANsf z>fU6i{k!IyjsJJ+()V8YPAXVATl=+xx!p3~5zoIo8gWsj)!_Yvp5&hl*tLS&?Z!3? z@dTb^2;oV=;@i_lp&!C5NqjhLH>JXG78AxZ9c;YhnCNUlm6U~4YINB8&>(kmW5*=` zp1H{TgXx`t&vBFGD-GJaiYE<$hFAI|#*wE3I-b6?Oo|@udJ=kYhApJeD$^?vg(2l8 zfFSI3lG^Lk^immhUF*Poz8WzSwr5k>8bk^sAJEn*l2}}9W5)=TW5?=y2NX!@Ke>&P zE~>q+r{h%HJ9ISmKedbNQ6 z!GHR_)bjE%-R^_(GavhzEpQ4cQpd5K{>t?$6Lj}h8n3=1`F}%`KQPyC?aaRMhkvuj z-*y7-g&$_wcUSuL+X{;ubw;n-^OvvuU(f#N`DN4Z3ak)=89+G>0LzOD`_Nh4E`etU zQV(6OLSLVs_9uJ0Uz#ScP))2xh*pz7Q;K5zh!&vqJy`33a&)VH$A0~6S}Vxe_w^23 zKfm;;pO66tA()U_mZ9UrXs*k?@IuSon}ZN2lhgbo%gcpAX(4y!Xcq)D^w_31*`%Y> zgsUgKq~d5mUVIU$4?y(BqYUOXlcxccb1J6^DCK8d>`C!bbOGc>lZMqtb3ml{7y(J5 zD|P6`f*85~O?y46LE0agQar6g=pxrZY6FxS+8cjzOT)?j-T(2y*Nq3v{?oPmkAM0n zf9{|DLIB5^I{clZ{#P%gu1P{%2R#jg7R z{rIbnV@A*-m6;b0EAIy*R{9$rJ=X^-r{aL-|Xw8BoUc!c4TtTF9a#15n?#ne>?9z+~(K>Biw zK_|W)wl54)hpE;>7*aukRk)$teh!vSb!4J~tm7Tfn1mI_J!_FAr&CoHwUHuo!yYP% zqoyktsdfX0@(6q}g`SZzH1^X|^MId3i>%TG{w)WO z%Cu{MEZ?WxLfo8p8@bdZ9Z51n^+{>8#Vi8Q$x|2M2zz--b;S*0A3B%q6bL&&Zl2la z;=+`e)CjU#GM@pmt&(hvg*}L_LBS9@Gj~gYZl*oY#{~+;9f%iwW}co#zm~8nDq0zs zIF{{Zs7$Z}-a_!fuC#_rfP%mRqen)O$PLd5679CO$lEXvz^DVaT+&%m_Mk&L+5yRC zVUAbRMW?Nh7}AQ*TonmOD`ez2XUr3<0#2cuD924$c7)^nCfNW@mV^XWhHYsuwd`|~ zdc>&)WDu77-VTlwMO;$|Dhy3Y;GX zi7e}A=o)Zz(A`J!6pRM;t4k?=STqt>+x9vmKr6n z+=Ww+7eOC)nL>UKiZ{VLe!Q0NK)jHd7c!;;?Kni0`XH6S)lKJQ@SVBq#vBxJ&d30D z$S;Prk9bEs_%|R5Rkgh%=Q12b;90%}S_l|fLfpw|<}?G>&^jdhVS~%g1Adw`0y2Y; z@CQ`q>VUwGnbp#>-TSA>Y9#1pq1t`W&pbQp2!7d6?DT3Dof)z-f-otF-651>D}XjQ zUXFTidXOu5Bf!%Iain%FI0(~%ipjSc}z43gj}X|4rj75cvIVAie2*>l1Xy|z@ZQj7`9G2KFPaR7t| zTTU7loX+6cT)gQ}^2b1Fv5Clg$APOHt#I1|FQlQq$mR!slRntmIOnL(XnxS);Zl}lQ+u7_VELAaPC|c5;UA(%CxjW)lm02aR?A+r}sPG{Ua6AR1cb; z_*+yzSuQTWsAeq?T(W^QZ{~@2QF&Q}2?9}FkY2t}7{34Z(+CC!^$KezqEBaUtx=^m zmrqak>fccR`6nN~_Sfy_zwa|w7rt-h!T%`l-~F3y#;bN3g>qIKd-wdqsV7(?B9)ax z4ZRz@>52J`XMgC6KX;L=6zSHDPg%q7_rLP?Z>^3d_r&PGro(^EY zG!*X%_@d?xG8!W$0=>oeqw&?rlani}{BFHqSbQ;X!ZIawL8-n7L@=0CUHg6x-%vN_ zORcRM>JRC~7rePe(~&bTSng}GI6Qayz18^Pe6mWW!2p{D+c7DUnFESK zpAy)zDL@sh9)#&gp*8x5xVRaa{O{k7T=}Tqy}gZm;@tT4|9tnZ*L
6HkMY?Yz9~S|fE#sau_tv9LMcfsNs3WNHMW11(7Ta>O|t9IJQR-yM#G)#{n5iF z`|Z>G`p1@EajVs<;QE5p1Xu^)Pc7h$tq7)vx!zS-k#QAqFM_%J6PXxN5QR!uZ9yIqej&%o&N` z%w82l8M1nB_N;oGTc96JjEU|;2&nWv4 zJDy9aJl$M&uopI#Qpig55ibd>Pu|eEr8;_fIjuIOQ*EDSQ`H{cRh6C!M4GTM{+;9D z2PrThdnh>U6C#;ap;4sF&SaY0SYAwQb+x=mYyj611-+2>Z`6cC+g zmcowAcS8OB?D_X-AtCl7ym>gt*aILJp%%oS4bBQ{U`(geR%q9uc3|1c5**&&W?DGj}6(}32)m3H8(sWD6qLS8PihcJjBDm07jHnP-@$7%aE z2hzNuyNw2^krQVAC^M^GoJn}F0b)&AnM}zjbXSnqjwe((uNp9tX3gHNUTa5o;n?op z6DL`kTlY{F?M{2;=@5E0_edF%OH7kfW?F-x=UqD!#;}Q=gc9LftJzmclA1+j2!d!z zio9o?=|rqq38JVHza1t~dua%ef=0p#N@sS*?K9EcJx>CF0mJC**|KqOc&5XXjXJ!< zj>d)6sn{M&gK{;8;o$EpiKz%)pqFg4k5Cw-N~pJ?!h8VB>5V9;DF_YqEIq;&U_b=* z41aOBhCa5AOX2CG1&iWs_DO?vDn4vKg=deVtdK_Jutouw7b2~Q6Kmhz=Zc=5arAK> z5(E}ep*>~6i@Z*cV33jE5S;@^d;1pPFQJ398jtH{2Q*utrx+|UsLx7O{6W)km!WB- zuqV`t^l&%AVD*;=h3L8aNZ+kbU8g_EVhLq15!x~Ju$1`9nI(%0-FiDj?Z)vOzgnI z<+OHAMj;$2F{$H>Kul*)JC5{WzQ(5P8PRtv9bMf9w_~RBF=iJ#5VDn3FvmeD5ibwT z=+IV1g4zT$4JI4wvy%KC4djd`xJ*nFncmmT7{2m*>_6}`p5h~JzIMHEm3+WZ_*W=Urb z7~^MYcKPIHTP??aI;p2n${7dGFL>k4?q=`)lW+a`U_i$|wer&GrHo%b<=EoJgUE5r zcO)T=>M=^AQMQ2%rfV{l`dk0$cYbOuT|v_Tdj8RW`@KJW^Di{8%a@pFg(>;3%;B5k2g11+e?Zi{NmvH!Y3+nC9?U- zm1I=RFO`xxcXMl<%MIBhyD4L#?hZysI_l9;Y-f=1O?=d$vPtpA2)T4p4{{^QG~@hV zJUSf?dRb!jFqWf$nOGVmlU21|Oi$u%t{zwBKu9Z`6K0DYsQGsEhQ9Zl(42eXq{Bd! z?I8r;Uo;mV<3hNf_>1G!5xHZ}ZQ6|-V3Qf1LzRaVu8@}=erTef^BlDE)%k@0?B*5|0FBXt@=flR~6X?QMe!t!t`tg^Hm2vFdKf6D8`|U+gEuX}uAcAND zo;3ZH$K($r?Vdy*;e|!T}y_P;vf;=9w{UED5MsQt>-V-zOr{afd|xq0luJkjGoRDuMON6duy2s@{ep#g(O z5N4HFm3=zg{=E4*OMKOY?he;L40n(&1sXS#3e zz{XdXs|YfUOz=W24G3Us$Bm`K!P9 z|Ip1pp_f5|P_beEp!>fc|IJ_i>F>YSc=Wk1z4>`J^LPI0PrmWnU;0rX${-v~B7?{t zb75sFfj#TFpgy#(Zg=3007HMQUXNX>%X=B`A;HBuz|CRkaOV9b5Acqkj2nK|^{()m0X$33y`68%uU?o_7-U!|KjIpXAT%eh! zGw=bwk?Sv5>X?~d5X^-{^sHV`yc`y>W;BQpwt?|m0JM~-q3J#CeN;fE9j4YeWkem8 z0gRpg_n1=>?HEIPg0_R@ec~pTde@nkO}gx%XnO#UQbQRh;?VM@ zAjhCf>Pejob71v%t$Z?%cC6`mKA95pS5xI1>OFrb=Bc|Rr_SPuDK3WS*-^3ACMB4S z4JFRZB#?cvXT>~=SyX((2L+&#<;(_9tdQ0y)Scc$?_4ywt7Gh~X`_QM*Yzpl&NR!< zTeH^r1qcBx*;(9uTm*3id7HfwYekQfkha9s%$l`TTQUR6#d@MbqpqujW~w~eIV+fF zk!jZMOl>-Xn0|A6*<**86o|cC!$4qM@8&M69x>(kvVEy#&!S|h5E}=I$_fM5e+<^6 z^6W052HK!0YZRu=HlSeZg;_E%(}5J`X-;J1?iPB_hg*K;QFa&vKqDX&d?uLSuH|!? zE{H-JR=Z}28w15L^wa`~u}S*0#pIzqwz5LOQq8mwnWJ+3naNqKdmo-oFs&Hz1-I1B z5Q}{_oxtY~)HooYow5$PB&36*_;zI8L%k|CUACKI3Z)YexCuWAQ#*D&E-rA~Ny7SwH{FMEeh$~lgMJEmw zsLI%%(=r>;Y0@5Tchmh0VNVu>X7)HrXfu|FR)e@d%U~SU6BZyM3Hgwc?OHwC|MFzF zfd^7wl}@b$zi2@8A}xvYFchRSaNVeKDz`~29ST@Z^WB+}fL%b!1T4Z8!X+ot2iRkW z7eGzGNe0rRrO0k~K+IB07t(Mh8;}qo#ZB_0fKfqD(JmUc^nj*FQ0%KqASM%8cLT}^ zyPUM+M>Yd^L4v60krY*?eXb8tEFQMuYdqj1FvO+9LN=O*xjl7gh~Bfu0HdiCG{`dn z$ImsZ3#Zdj!7RjAo5(F1q*z=T=uMg*!L(3>)UvAYL6?TqTo9juYBb{ERO@bk`+j3Z zdf3dih|5EkPgGwbL!H>bBfjTHfB8?X-mmu<*y^*(YIRnhEs9Cu4X7J&x^Qrj6y$r{~;U(N7u9# z{wPcYFE5|;`I6BDEuNSPU2DpAtx*fa_o!TMnqJRR#;KmQI*5N_wSV%t?K(iz<5hqi zo7fHtdx5#7q(yyj=iq&Y!3_$yXo>}nK_MyP(FEuXh@}%B`s{`3?7A>EGwi~5F4Atz zG%txy5KOb;I5%u#rkX*xJFlH_3si~TfO4fa^b`3HwdKTd{@&nSyML=!M~2bbjisaS z{q%Dy+DgL+dI92-K#I_KI{&r*{#(ENLqG5{Kmrsizg$A_>i#3;EARbU9ER_UyeemK8P85WLN_0cTexW`49&^3*ulLRoo7@wyXni zlOh-p%{b6TeMS}C3)l+wwfnWVw_Rd5A;N{?T{dw+F!KaGAy!V~W9Lu$2an!D+3sMN zUoA&GBI3E@fBf4&nMAJr&)d7;jgMtx2|jrOaEy&`<1`OAs^TQ9qtL!EJFlphu@;u?5LEiCu)VvGgR_l%V)|(6X*OwN?%Nq6vBT^pjsTPn%MVPG&?n zvO}4iKdf*e51Zb8rVNv|v3JJWaWNaAn=a^}onyggp=PBPhVl7oV*F)hSZ}DOYN_A$ zrrgvbap{5y2(tjR(;R{`qSy%PVHL(KI)L|VIE+F7$eU5uQ-U@O#B5UnbxPD>Tvd?x zGCD|gmk#?&Wb#W3p@uY4MBPsf8hP)rF{Osft9|eByOVHs0ZRv%;vB|PPEhl-ZFUIp$DyORiL%t=nD9fHW)-|L&O7loQYz}Eiovj2O=4`jt>I4C&pz+$!^Hl;5n^e zCs_|b2>4pWC=8KRy)KHjo10$X3q9ZHc5~E7!|J>qpEcoDe*z{@<~0BtV4byVvRQTbep?L*ksu@CIZv+BDNOqkQ50u`Sg5vTJfW^j7> z9uZkBDUFH4L2?NGXmv(tdbsSJu~6q>Vx>WcNRRgCuuctS6k1BC_#IE}I-r;5gpnDj z;zZ}ti+(GN*B|(JTw;Z}E$dgW1Jw79s0~BR~R!>OxILMg}@JB*GNWK*lIU| zxFYGWtbwN)F+(Tc9uAY(ybB2u+$``2xMRUONFand+#Bze;bj~qCd60yxZ``35YfXn zA`llR6&v;wh~C&a^>}m=X|_k~^Bef!;&8XLpsk7d8d^`xl#_`Mrzg451p7!`_bC(a z*B@-4A;1D%^FJ6rmxpHr%U%~yZ0p-;Rxx4LVj$2nDTF_7>w?o0^~bUH#h<$I&%fjQ zzVB0I?6M{7`cJ<-dC0yqY;?MAt8Jb%qp>r6YOfYE4<7%;=id6ur>gv;pZe_c|8e1^ z8-XY$pDwpsz5s{F>8r3qJokg+pMHtgp>;={8l8t<`#=Bw)!+U7-|ihR`BsJ5{N3OA z7r*sexBu|w*5hUM|9todf5KjoBYB@uFWIYen@P#npfg`p(+87HNKIesInuVw&+BO4$NHsOIV$#R8xpwjp5+S2OQ{a%gK@Axao?5(b|o2Z(PqZ)xrW1o?K zs9Q4&NpI`IQ^HZ?5V) zThA8<@~;z<+Ux*{pFy&|lA{i#A=FD7*WUb5{)WCg;A;)yW$mvIryss5BKYT^V!tjZIQ5{o;Fm;s1W|U(S5&oxN{A{^!5+*+2c6U-!`%GxToq(bQ zLI@{35QF-FfsS|tGzJDbQ*?_PjfS0g^Iw*qJ<^&uH^cdS>H)GLjYWclQ>~oXb2rS! zUa~YEPv7@}5C7GvK-xl1v)yH=R;7IoA0iN zdBD@%jt|}i-XM(00Jc0sw6s@W{+>%SuGcsI*B1w$z!uW>QF#EjXycy8U_1`xx$vZ$ zSP3@{W~9IrhLPaW;L&kFqVUjpUqu6{H6jm?A&ee=$n&939ea4{vq#6ypSG*q+;R3Y% z!1Q34cRnq*YFY4A0`V?XinWy#EV!Ib+Uy2^ItdI4g+qD_5xfV12nq5Ohy%zPe5VgW z6#9Labp{AOT*5%MjbK8sm9e3F7_Pz2jM*PKmHj~WHpyhgFrr5^h`k@A265o+E|0NY z6ylJppQ6X&I&RzkozeR%NDiFGdx!%D{=TfN053CRJ|KA5Q2_BlS$0k!aLYBo*@Nqz zqFZshPeI=z>3d*gL)YDduJE~NqyPGNPa-;{k@)KVz{yzK|*IUq{vZX0L_ zdeZ&M@gX9bmdj7llx^I_GadG&89O3{AL!Mbk-1wfdBH_bl`5V_rQz{%ewg=ZCApgF zdk%#5(X?0Wv3qWK#LovohXx~dnPQ9&l_IMb51KQ?rm%t%#`ooVn%_IL<@q}c(6Dv=&b2CqAUlkl=*{DcVA z(_nyM260Te6}FE{9bnzJx20oo3*rLsPLanZ!(>sx)BVJ@u@j6A@nTvE_zvU3Q3x+U z16~@yBQd$$Y$w=kShu&4k*gLl0~;O9B{SCe*s#V22w$6%shd%V>R2hS;zVfL5i;5Q zP^QXDDh-Y7&LZM+vWe@eGIkV~cH4ox143aib}fHoYqdWLai``#*M7gWC1LIM7h_G zvk}?RW{2Lhc0hu5FXHzJ2kj6|H6b;K!bF?*vzm|%9<6~EtkUJULBl#foJZXKmhMm1 zbT#5)$l?m-i4|_o_$Jg|XJR(cYwk=_X1Nm|1aN$ zjcbiYjH4#Xy3^wYmx573xSEBz*ZYGy_(1})eeoKZ?fLm zXx{dis}Q{1XnGMTk9b4{V*Wd>K_YL2vNN?&KNRxfZ><*-*+jNJ#OrQ|0-ORB0gl0; zJtPFT7K9-J!AZ)d18V$n;=9 zZ@Dw+0mYCb6YA1zD0Zp|;H|xB`4b5}=ZFkI^3ZgR={nny^dxM8uiu8FxkeQtGaS^K zHWE(IVC`2xr*xhqxZbv+ry~(qF<>G-z){bT14B>^9q9@O&g6kwAB*-A{SfV#hIvq> zLXQhY{Qv^x(Xv1m0hyIzd}o9P5Dp9yA$6R9=pHLi7*!q*0R__lS8t+j+}Sz4a^&Gt zC?nA!J2ZaisHPo4aHlS-nh!U;#yG@L*axlVbbG@wp>BAinB3to=oS<&FV1hpgv8vn zNr}Q{ASSqghmaEx&Fp~?Bnz|k@x$mrF6*PlKKUd1)XVaJJh66rR9BrE7&jpa{XR4Y zq%V%vMnNK|#=r97Q(^qb&;E=qt?=#b^^!EIvkR?k&n%0XBfjR>HsAZwFZ3>MjV`-S zrH=O6O|iR`AmWAIDP?LkvYP2L?e8;xgz5w9^w0aebDu6h*@H56-L&nKP%~`7`iX@?AGK4@LxXp#~a#PQvi_RZihbJ=h z*rrl-d2H|SNS?TR;d2jiMzH*p81C8{kfb-BlP1s4i1@_k=Qh7LaY!EKsxKk#P2&57 zjp#BG{Tx4PaB0S@r|bwKt^6HjXeTx8rdv~|KbQPNbHF=-9GC~!ZhiNsfBV8(e*1>y zh5H4^CFWf6=-)m6!_Ztjd&NLkZ*Oir=K{yjCEOgDnJ7rNo`3R-aqWM+_Ms!RRB=>h zHe>E%#k17h0UB_B@Qt57@eRCrpDJ!^<-kWC{)=Dw+qc)&pX+oNV7|C-xW=m}jG#Bu z_JbfBCW?Fe;IDT=5idUiy}Y~Ek?;CL7rOrFQT4D=)CC!Fw;Mv&oAJd@#Lf*rG4Z~% z@Za};2t|zaM(-`rm)&&mQro#dJ(yv?VTcFjEY4<|L}z z*39MYTR(XAE$+Hl4ZgM|g-(9$S4=XI`8+)8ent7zhiVwyQ}tS0HJy$7ur539%8|hi zFxy3%4j|_@VzZ)^2-yhH`wu&^o;Cj*EI8v3+R#;O#iu}jI10e|O6m$UNDtQ@;3i8Cyk@N1t^tB~b;JREgeC|* z3b*bVmJB0NskdiHkKQXbTFq<3}Xu;L%2B%M7=nUYn`Qd zuvzygY|Gc-I)S$(E6QmmsT{%Ns)zH|6Ngc|SEuGJm%*78x;#aCP`lm})x%peBZ8np z!|sq=IxmSGY{cgrr;&HBDx@d`+r3YNz@355wU zA$PIo_J^c}9l6O#NridSJBWD~dA~9bol-yH@YZ@}y6RR>gaaM`ssi8uyvVOdIKOX5 z@))`$p<~Im@;cJIPYIynwN@(}@)&?sVIsa<8qifgM&_Ml!nwsqqaip-fy&{MBsxoM zJ(F8j`P;%o*7h93xa zYR?M`p21d6r*<{Wxozo-I{|}zb&xr&jV$a~!Ec_zR^>9;q|W(69S;x-! zfGlD$XDH^%L)WYJSW0f?-2sWK_6zaLIbt&W5IZ>7o_X_l{R3t90Z}|a8`HhS(1*xe zgS}_Gac5`F%=In^wF4@~wM&5%xGU|eYBCrh{GLYO1%jDxm5lJksB?`~@R(ag`h3ZY z_QAUZgKn_R39&jDAh~-?%hwByq*$GUw9-)wiJt3ws!M3RQ}T_twy)<9BPb_32DHu5 z1Jw4oZ^390hWgA|N`bCDL@~_RHaf>32nPo-yM5?4(_0f(P&fCHkf&}eidC@V**F}t z`#3~YrFiHF#ltNMH~=3PZl^mzOBoEzX-Y|zukm=PEy;HGT=B##&~CNnx@T(bZGFoMkv#6rOLX*d{nxS>07NJ^2& zsTjsce2~@#he!AP%P~=1p7T3a50Y_r4uzIMPp`)cVJbtX&K<;HY*%hki05X9ck}{S zDwN*cKX5wR)%#HH)d8wR0VD-zAJjEN!XWXw+Y`k1q|QV%1MTIRiO9X@j;&4}J3!pTNhG z_r4)q;yZc06YZ~+%Q1=XaZ91EKmG?#KlcyrzTSxG0oH0>-I8eKiqC~RMge}10~qO? zXJ19D(Rz@HipSjYnp<>g`um_k_MvuP8zEBAT(^ee}2f=Z8`yxKa`i-~0(a zo_X*A-6hy$Zj`KWD={eJaMr{CLky6l%_;O`->>Nahj0DeEfPGhH-@le);U1KJ%}PoK>(S%KlDFEQ`_sSv$ccMQ2XrD7#T(W%d(W}# z1HmKZr`}lYz4-Jm9OqTdu{(#Nrv21?w;QJkHuvzoq-&?Q7{E~J0{*Zy^n;4eNr zHhgOR&Rd74FOF@QUXL1&&dH$B6K-5TwA+q)`-smHSJh3+-|`A`$}!8?`eDTTE~Ptj+?XFvf&rD`g~-gCxr77z)iX0cclGY$)UMbZ)SPz9Jjnm(%YPC1 z$}{N@+<&*X>551&84iH>lfknuJ^QuCUlDq=O}(5|P2i00{`MDc{YT(``QADHVDA&N zzy6(#pFH=~uk?O2eLuv1rs{`?q94(0tPS=zYn5@ zMqcNgeR4nDb&P)o>kOb-B~B{Be>*TzrO)s}f5#Vy8l!RBtPnjCMw9#Q{5VyNSLWcN zoT===t{^lhf1*yTH0w4REm(NO1;MDx6Og()gRz+3lAx-yZ%sme8)h85&&Qbv_;B2? z6jpci3E9=l3ROGSQ9ScZ9dN(t#JILyyM8=4%LL=ICOmAEgD4-i7bijnQ6<2 zViZ}&TiaFTV{u1*_p&ic-~bXB8@?kmz-<=}<+1ko;~D^Z2vwX0BVl2Ckwn(jx&k}# zaf4$0`Mj9)56gMn2E?6^jf$hM*?2}Fd;O-%a#JArx?DX}@GSwZ3}cPTP|cuTpXXK& zk6;;Rx@OHB&`p&P3eZRyfI&Hc@gmwhhWTPgIQwPGCeJ8}~5|$SV%_MZ~HCw7Q`&T7#oG z0qG}gFRbjb$L{Z&*Hr25nv>;NNdj!HPMFaUSHD)b4<~)^+JSZrPR!%04mz4>WB_kU zm9#uzJYZghKtjbvDbl=slx@p{S^Ab{9qA7Zx7jITGp^hf?;Rv~)FJ8qy1MsZ>|$G| zf<&cP_Tk~P-K5}C_px0@o?s%#>}pYVr;F4|racJ)-2f^=c=#;zZ#yD@4X)AMcNG2A zsMK!?=2(Co+t##cVxpwT(fJ`YYAD>4trwQ*jCvL(fe{eRy!T*zI&^wuO6xUI0=+t-rvXJ0?tNLsi$E*c>P|9Z;x(#~^*goD~$^ z0e|&w(Q=35;DE4I(Siwz0=oxRTyW{ya1fQ)y6?Cvw4;Qw{sA~Vevk&BR^7oRsEuk} zIvNRwc?BC+p+4fuBG7Ie(nena9iTJx5`FN8{W*gMI^4$$11b?LVMe>{x<8cNL`t)o zQ#|Oj&jvEJoHLy701H(TdVDCD9BMX@NIrfi3OO*BCRzi)NC=0;Ca~dwfz9A3R`HAp zP;|A-SRklz7U>W`0FaYv&r8K2$M5vmp>JhoFgBX()?VM~4ZNchFA0UndE*1#`T;iY zGJt_#Ed(i0uePTq`)b1FYsblZyTvCfTOBfEEpOXnM#pHEttz*p1)El$C* z@zPlJI4(=#>%#{;NF(KcYgOO5@%djAFJ|W+c;5$thi@$3-7C9Np$%kVaTZ&1x8PozkOc$xmw3WH{ZUt``YE7+^l<} zzj^*sfAjKhKqCSJ00Mwke*5Qt`BL}NM?UrbU;W3wO0>Y)>)D;k*IEBJfAYiiY1@S@ zCK{kcaa3(rrU>l6OcriWs8HUEP z{`#{&ed*c#itj4)ByeE+s(*nv)mUHY{q85e{MA2xV(@d?hu{3rx6W@|zV-Eg`Tns_ za>enp+f3dxF|HapDIvPtAVS*9W zaWyb;{B)6wT)%TzSZje%yBB``XOr`}TnaD+fl7!AsHUh(>ad zxWp(ipeS&6glcXK-?EFLyg_P{>VzYR5h&-NpzG8+4Kl1c+HPPlZ(o6oQ@U~5@ZG^B zT8Yq(bOsD5jX8wv(TaR}X>Oz5fT*_)iWNL!3xjRK9fOsYt;AH4Y;hNz!XenX@v(K( zDZz3E!U50f79hUN^l&vW!ovw?GcS+^OU;zgEl`V<{V)OnceAnWtJ+o~zHDlM*PVH& zun-a<1V$csj_?k~6sa!DJT3HaDEV;8tq5*2QwA=1co<~Ea)gvnJAoLQYlVh0j@8=c z={c&i=?S2lm=^4KayBk$ycfY^R5Twg_#mHD?pLaw2gjR$i(}TF7M@TkHDOr0_3dsf zIon<9`a{w7ipFLRK=xSGdxB-CEIL$%@+|k`IfsIbwiC4@BgN&)#vNz1G2>>Zbn=Kt z0>%?JswFK)V{MAbNDshH*$SU;#5ZHxW29R%O9=%V7%VfZcnU_}cJh4Ht>)V-ZuKlG zIFqdAyOT4okL_L1n-ug)qATK$lLpa%tf8oPT>23m*;H`J>o6M8 z@GLs)Sm#`aVj`GSTn&ez0Twn*sX04hc6tsX(_atyFP4F~K}@u%T5y0I!<Si1n}!yvMx7w+iy91RZJHM1YB8!p>=8f2f}sd=C)_M>r61^o1t zmV2J}(ox$Q6?K1t8o4)J#Q@NJ2SIcj^cWNClA>Q@!v>66`$1!zI1UVe4TwR?u{_ZQ z&F|De&cpDiIPEY7{a`r^3xGBKz5o#XETlFZjj=QCO!J^lk45#Qza;=8?Mx$6LttcG z?-?$e)@f;L7fA(i2onHY%LgGvVVxc*ZzQ5wIIv*K3sT@P>Y1(zkoLe`TS=o-XAL${ zpWD^+AeV>#%@Pr=A9PH|)K9TAZH*a-J5YdCt?L8-N$rl9cL#^fN>We9eY$Fbf1Dxr zNw()Xnd|a*A7ip_9tjEk(QvFa#uMP^;MFmyLniznasd0<3g$#_|do4@^8QP9}T{#IzlHEbN}_vT>YP(uTG9M+B#TqK~97$K=5bJgJYuOoZ;vlX^s+$tc3IJjgkiyd+|I*647k=*G8^&wH%=9rw zntgWo)wOSA5+eHYL0z5{k{3^g{~6vuH3{l5FtVcLt^R}MDDb@g{Y@(usR zy?^-6oAB(LUw!d@J_3B^bSS^+h%Y|-=YRI_JKr-hRaZoSh!a5gyZ`a!r=Ghsec>a& z`^7KrbirnLnsL265JeFQhE;szbLPr+{Da2@+u@8|{ob{=e)>)IP(c#CJ06;N4SD3Z zKmMz~a1y=BCKkx=zV!Mfc=PU+SZd~jwKN>5^->N`a|q#Rr2bZ=k)#gPVa=j8s__`M zHS(_#ULrPj!Mnp+voBSamn?_iBOW+fzql9#?+X9r@BZ|Oi6_(H1=DBHAW#?Wr8f1_ zEBfUgy=V=$UVpuL&-}aYney`E+N_oy2A+A<=-k?ETIKPY#JS<2|9IumiTjLyef*`n zKl$gC$N3{)`s_zb)_slB#`~&*8-DQZAIGWVACC_EnFpsJ!ehrDnC!kb9NTJbzad{v zzP9)qkD&^RJ#-50eC35N4R4D#t{j6$DWu9j5IiE!V8+Q455MQLTRUsCXLj62A6xPt zy$ayR3|ncHc4B<7ddIbYjtID2u7KM$tU%Z_bp?Cmx@Mk+y)wwYd^L-{AHuT>j!I~F zuANtc@c`MA33{&vS$EGBa2?o;WJ7QQqI*CDthgV&defYZg#rw`6wv}^&L!6v5sPzR z;}%{+EjJlY4Tz0~$C|!F?wuhft`9^8M33e+bfV3ahMtu3O1Oj#%I17)Vh_WdzDm@}`1;W;5$9N*yG(3k9BB!>V$+WXvq{*nt4=&!l}Grxzyv@F zb{2{b8PE}sYLCR)Lzgu@WH$v=W|}TZZbkv^h?WFS(kmJLkvsxoJ*UKZp+|w2L(LZM zAF^8CD!I#Zm2vkDS9lQmyD+Y4&SN6lb~Uv0kh1p?(Pi_A6K?Yo-Z&wW&h2vFL4nt@ z!ksQ;os@vn!lf2970{zRhLE@F6>&2mVEQ25jdFfmE5|qKSCYkP*Nm8Kh#kda&F^6l zMmiz;uw|U0!jr2q>q)k zn^gFIN{%JK?Sn2vBLj+s%D~~)bI>;EF${#Pd{|-b>qj=QmAN>2M_WU?XJ^8dnVX5j zxlz{>*I4mE2VC81MNK;~b|93-x`r=_RShI!1$8E%<2+y~Pw#bPW^Cz&em58LKV;f~ z4PY)-mSBup_XsX86ZK|5Yxa1*?I;BOr!cPsvI{hxoai(O?~aV#li!5kSu9G&3VCSG z!*szhuXNf`-F8Qrb(EOzxC@kR=dE14S9TxEn|ij^%Xyd{cekNLkNR)jpf56X=`;il zs#H>gz*Loi17SB0p}znki+K>{;ikGV>d0M1ndpp06MihD*Bdr(B3#$ti57!OF6ZHX zWs*sVG27qdx`yFnhd~$S#OZDk+)6h_2XFMfX#)(#Pjk?0DB)~h1i$9R)@&|@QfFHD zdXN}Z#3PiP$M+`+XT;H&JPR2C(?dAWKB#ux;4zPM{dm9JP9%chGQ!(5cp_}1)MK$Pp6|IhOyp0^rj32ggR8>56%#PLLVRqT+UL)_a|X;LsWA^peKim^&UxSU}g4) z{VqYwdfX>>33L)ETUc@xTwxMyJy9jQ_l0<=?nSt~YO@RN{ZTME3qHsjp=d}#0kr_r zyeVD*_CQto{P^kd!#eb%gE)9n0kYcj$o@`JjJjDPZMcDz!Qt|%6Y{7;1P4GP(5r%P ze9jetA!vPHlvKe>(O!=sTT@lvkgK=_8L%;bb6O6;!a^PgrV9$vr@UT_-ABOH06PIP z0|^Es6cjzf5MY-91~49VBYlCD?Bq1^dhzc0<#%^a9d7vXU(zOERM{noy|$ah%kE;5 z%hmn+ej$}2oOIt&Az~MU06q#pfN;9mYU+ED=O>Nmid07P#VOJwLlECq!RvP?imKrB zGwi4?2dZXZJhl~YqMDF@4JUm*ENmr-@V)=z12Yg?+D85``Z8dbL?k@{Ei#Jr;Wqy`eafBz|*k_4?Z%|G>vT{mbwB%-rXUrujqt2DP^K(q#%G zWr$8{*1_r-E4ges6rab7bOWeI3p?wP+wnV+!OQ76SoOX0qesUc`tn3zF7!?#`s3w~ zB#r~XPxCMDb*)d&omETvva7UF`$)=ZQ0RQ^aU-|l_SFT;z0g&C}7_o2F zCR^Sf7Vg9d!c(lh{oyBn^Hx zuTL<`uo*79{Ed(2e)Ut|`n%i=nQOxX)Z>2|I(y~L-`5QNf!o(#%Q*h@Pk(OyBkyZM z(X=xzXxK00|Ka*y{oVI39gUv+@^}AYgYAY9|M2H^iGyRdTon_r339zxc@i z3_#;L`0LN^zP5LHxp#TAIpF34gRV8R@SZ=O`rU{GzkKDXmG3_B61?p{=K5s% zBS&Dt9SuUO+zTghtxray(?d*N3oa{Hol8uQs`;}qPt_OM>00f}0wg`dek7_^4!XA< zyKXv9v+kLxUsmX^-3+dFf9-Gn^*4U&(`TxTpG}0^&Us&~Ua!U=(SGXRUd@E#pZp^i zXz4N;a;+HwFW0e@(9Hage)hw&_ptzTfBB)rccZ?iPk!yGpSoM{i% zZy^b#HyCJJ_y5v6sPTtp-M{+olj>gm+2>z;{l(he1Mm8$t6uzpu6NWjZyuD1DSz%_ zb{#}=mDQ6#JBR(cZ~fdlg&i9OhSgYlGq>w^gt)@7iv!-zdIvbQ`8YR&I~n zzjq%+Lfu`{OY_6F?QCT;Vs?>g?zkKlI8i9W@32W-dC!o?mMzVtYpNv`>tc{JnK+s77vytih%>V zB?vP|qqfT;yK%N!bCWI2IX)nZRKU0YrhY28qiKFFL-H;lR(qaM4V3vkM#X%l4}mp~ zPV(2cYNy??yM{2Odl^01Z{AhH9>WUz^E!E7R0VnHA*0Wfr1QurPe9%5`}8beMOurm zO9NknbgIsV;aRK#Y9}Do1uW>@6(AI55K?P3La3 zKdwgTJfwWZ+U-W{BpFVx)$YV4dN?8|!gUuFW8hK3I}lzBKyMLx6r}ec$d%*U`rS-2 zdzpB<9C++J2vK{}^pZ~LII*~U_ipnDjqQx^D@4ioAl=t=9~>CQ zP-&8`c$dbe&KgVs%XE+fYpoJmL?<+*9thk?uyNgGmVGkWwn-iJhc-7Vo}eDBN3=mU zMqF*VTQbZ?3Y0iL?1Qfl$$4;&=GP^anIIg)f<$bYh8KWRh`Vl7TXE2e~Ft+ z?b>})Pm!2x_J`A~r>VybO4`RJrzy)q8GyF2BnLQ3V9O z;R;7`_GYU$%O=1^b5;#jJ)`j+7~Pm7xob>NY!AvlUvfC;IXxvAJY70u0!)G05pc^1 z9P9#2Ud8E?QlrDX>BF-m;Cm2BxRnI>H%HKQ89^F@IvBL!rz>kr)DKY6XM8*0n1{A9 z!f=hpy)me-h|G);T&Qh`kvXcpt_8Cv!8;hvv>)Lg1)~P%`EY&=O2bC)If)^dP$J39 z@uU2tauU#yUsvjfb#i8QUu}&ON^j^V5*w3Qcn_k@%5LqWMk&^+zW*iEJYP2sb zfJSz_SU|g1eg3EUTy6Sf*db4fVTn6vsny9lOFOfg7qusW)MNGm|1;y931gVo5S(WT zA4EP1PDIiX$_C?nlREOTPyG!AeSp2{)%~yDzO?!L)-wp&3LsK}A5BtXIQ)xJNcHt3 zwm|uWDF`>rLHlBO&lnytbys!b&~b326rKYI=@z{rc65k7O-%YKDOG!E@ z>%!cmK;cnGw|&=gI*^?s&K`c@fj4fx8p6WV{L%;i^(Vg>p7sR>Vq0ZixX}sNGvEPe z)Smp#ziO?it*<;1PDA#!XzJxa=L7Csi1dkNfaH&#`PMi8=ttuEZq7#!mE@woyP|hY zW1;2c{peO#d+#K`GcVoz-2Zv_SKWUEf1u0%S(J%UATf>c&w3+{g~o{&V~Z_1KmB|F z1+(ab$VorMTZZ`d3sri$9(~I(5#O!<>svL`SsT=Z?6}uz)E^?lH%E7RSc<#e^5|V> zBsQuy?|9-NB(#r_LZN-V`H!DH*YTmPJi5aZr?;=SO(2+|M-c! zcYpMie=nMK1TK7GL3TlZ*Po?eB)iV5?*W82E>t*5%7Tc;+9j ze*Kmeb$Bx|H<5r_V(u?8Gq?3tf)tRHOs%Ujp)rSzM7&&il0 z_27{}=$L>pFb~}X?f%9r+J04;pHHu2LuB;5zy88Az2fix=I@-HpFTA|raim3Y<~Op zM*BPO{L_t3ZN2A1$K%ZV`oDDHkG7@IMCF&YY<=R^NfFZ)GPM z@j>Rzt?y+2^Vsr<8#mW~xU**jU;fbiM=pl$_XV(9!;vdKjN@m@Zdf{0dnbD^^pU(L zI@1*!$8d)@;qEq&j5Z0^>wET>FYP#D`_D0m%BTFCA?b-;@^|!*#uFg(aFd7X4y2lxOaJmmyWzb0IvTmG( zM9tB*KG)Mo5hW z`}Qtvx!l%X-k-@1YsUBt?wX8$<48n!$~z7H?Rbi*)*=FICt=CYlq>D*reAP*CD@Hg zQJkoEG^YgH4#Fu9(e>^K$SFDML7UrL2x?U~P<HKJOd$FlnC@Ecq_;Dm4ZYT~eA82h%W{_JRm2=k?RZN88=dmig&meP z1ci|lF*utPT2LOjv~o3k$|b>gaAXyBg}29?v)l{^!wk6Js><|u4-w)gdg+Z^-z;8`^GE_=vJYV}+! zAWEtoZykahftmn1DrY=|*eTIs1T~p@YGBz(w6BE9#PA{V%yLm`a-FPtw$a^>gzEd- zaqEarI&eG3?sy@400mH*L-#n>zEcgpBUV_$LJMn}+eM&kfFo^!9#sb;?~Jw99vRG{ ziCbaOHEP$)g_hBr+mAHeeutl$;$MeeAZoDlJuFfFs8XN?_7KRW&OBaf$&9 zY#$;*RsnGO0Ix`DLUvX?ej=ZUxDnDfs17{LE;TbLgC_zKB$)!PjRWRsQdn>xo^BI` z3A`?W*99`fO8_a%$_{KZ>7D=?10_w+<$0Y3mUIwKQ_c>cDBC`F!!0R+ax{IL4I2iQ>fns9}1~=`%5vM^kEQgaEkw6<^i0c3V9Kb2! ztp9=8XOX+RTYCjv{J^_Dc|1M~1vJ#z+{CwDj`i#T%sBhJvW$G`Z=tG|^Wkj!4S~xwXZ^RK#P$sMudWIv@vVXICZ~{h_6F-h02AY%^ z^MEshp+@BR!-MuW#kvby(O>z8ryJNy@4s~W=34nhH%1YGs5vTt5*;f9msB7obpW8* zqq&`b_@ssc*#JGzSw^p-`TzRcuRp(gb94C?Oi;)13pfFtYW+kA`OJs^nr?&OF+Atf zQZZKgzLuqgpB!BIk3au{B9B4tf!{j)i9h@9LtI$_&_)vFNt`?3ByOE~6}eh* zV@4t99&@<GM8${zpsU-Kv2zxTSM;y`04_ZEv*k7t@j&Nq9vk?pczf98wdM4YJUGS`B0zKP%e*8h9|L%A3O9spkl zxG3?|yrHBj~&rBd{(z=Xkr!VziSr zq1T`kkzJaH=dH@5l5l9$D#>O{Zjk4(V#GjH-vz3b2S3RX{ee%hdI|7U_B+$A<_^$a zY;`oU1qhkx-k`uqAgP9z28)P$nVKXqOsJ`LGn!FB1l7w*N)s|5SNnkCUQJ-&a1Osq zZEWRal+cP?#|U{QpOFV@foTh_d8q~?s}voH#UV8Lro_`|0E1nq=EV#i*l(rCK^yA& zFv{0Q{q62el59B-EvW9QyT$TUV=KtaBHWBxm*skS)wR4i?~JdHSh5pqiJP82)1%8Y zI#SJATr9NJ@riX}8~O!$;B=JFP!P06p0Z+%h6&E^+8gYrjV&F~Hxpyxi#%%?hEO*( zcEzDCxl%!!G$x%cxj0xOyVEJZO$}~D$4w+{VWT7bH8qzUIy}vAsc1qO>*~PMNlwO^ zdLm!xu$S+)1BP2kcl{oRnhv;(#xUk&sSK+XG&-9MJ6r^Yb%O^D*TJB-?9DU_Qm~-* z{0=3&YwZ~3LTW%3z~_z7ik>1JjMGKOW6(?T8Xyq!>N}KogiUq^B1v`;TuYSDZg__m z`D4!7K*ucBGaA+H<_d!ulBR?+Cg%h@3`SMqi5!7zL%(VAlA%LgZ|{nITq@?>o?+Gu z@J?F}aoy1XnXVjzCXv}>rJIc6B5O4s#L`x`g?BjJ zt!uMNj>~VRN7iN^j0GtS5vN)T>tiKl9_;`gIv9xpstPRLNjDp$+NK)QI$?g4{sx!#Fi?_v1y|K$ZkYP}=O#G>LBd zCB6b1^!7e3uqkt-!T{=<=6+3QgSwvZ^bgsPnkO%VbC7Y+#&#o)h0DHP8{%G5s_>RG z7?eS0`+PHjkr34vDsrzFq03!3dA-~mLUVG3>OlmCgwVg6SqB^xj0U1ZIsHzdWJXjd zg#f_clske39vcHX+-UWZax<#O87x0?0S2)a%h#+f(Z+DP14tEk0*N?=gHoCdnfp41 z17*owK!J1)dp!WdW+*vYii3zG%)GDH1DMA~Tn@~yGEn6Kibrh5^852sC3L9X9Ty zbtuW@W}R^j5N1CnIo$MuNUPL?mxc#A08s$BMGtP_C~7*`2slsF59-sG!xDr6@d2N& z0eS)p^zdU%Y*UL}-`?FY8wZx~oqzh_zyEOc>j2GOOb3jwz_r~Qs_y9l0st-ogjE>W z00-?|<(BzG$2!=pb_HDMPrq;8e-!1?jX=Q4`E>YefU zHjeZU)WfxrZ3g+hpT5>XAe1=HmZ>@80-P5oe(TZu<{h!AQ`x|SlbH`EX5tIy=c4yH zc`j-UROkWKC0na6{hJTZ@rOjjmxh4x=4&^SA2~XV1%`JM!xna&=*LoNZuS??V$iqD zFwpg zAW@il zPyg&QKl`_j|4U)>mOFj!Y-J{-9gfxu`OeAz{r%f#L%c5nDy|(7rkaxv$A9T#AO7>- z`IV1+T23@3aGj-lZQ60l5!4*+j&4fK=b07WP@9jQez0h0vKfn0BbHLNUX`r`U%rTq1=j^9_zi{&1YttXKtT<*5 z@A5S(J9hWRU;gUC+VB49-#-k&uYIOgUb|7)-e3OSFP-KV9&pHh2y|MG-@rq@-+LIc z!t2+OkALxr#_H>5|Fga^v7Mje_Dpk+|J3TYzg+vqUq0wLYl5IvLru690858jO+A!5 z(!!oq3{-KqVj4re*xHMD-7icK9aOFGO`WkDHi=sPmi^jBx7Cx^(B+ZHT-za^zPz<> z??3-fPe1?luO5`2kn$@owM~J;4M&wBB`fhep)r>pnba=&Sp$Uqha`|pT<fS6usIiWc+*t^Syu;$ zU3tBjGTgg13`}sXaa2tqTHxjh1JDBU%cLr`(h zy=w^@nh8{#ZRee+L*Ef3K@W}d`@nb#v}N4yE^uyx1{iUe&c*O(hFk&T_@-LmGyd)f zy;#c(U3GO3J*%2ZOghumM3qzlO*zOLp>U?PqSfr}na(12iq%Rg?^M%92n`HycX+rP z#5|4ZVztiPX;dw$EG?(agd@lED>r(3_d-@w-45TuK$ywksXD)n(u>h( zu`%FVo(aLzNTbQ1%j=T%hWoaUn-!Jjse}YjV9$i^nEIGP9i*H(jK{6*yY9k_NsMHS z&!X`)%s1}X-bhTGPYg0ii;Plk;t@&`E$gs3Wzok2- zZ$*Yd-LYjcj|?7H)ca6^r>9#DJZiOGD>tTGi+%@fbHZ+;nv&wa!?K?Cr3s0YTCCCX zm5(cuY(mS|V=lWk5M(BjZg5w5FK}`oa93zYMW;UA6t}%fx)ESec97h0hzk~KHX?Jg z%{CP_p&c1fIuiQuF}Q}Q$t3e%flLtBIJ470A;Xhri!oGk_z8XN##bIaK~TXuOoH90hw z2?D+<^*2TNo>0mafbalevoLa9X5`WWVH%%_)M>S*V*MH?&c% zSMOCAjqzj~G)AA{mVi0b012sO3qK(M8Gr=%KoV;X7EUMQf}y~fW?v5bcwW*ua~v3P zw9dk*07%!Hz0=|HP+TJDE(-R*))S)lHXKZu&RYrb-hoL7E-W~#z+MO#ve=U6V;-&4 zm=7nR0}xPUGaG~gaN`}p3Y7~TD&*};VE28f&u78q_UL%h)uIU8ZTZNJwzME3O#16W;<4hOow96k$8t6x{vbFy8V;liN_{AdNBscu7UXhBFDNOO zqx=PM0{9eviIdzh6D#6{k{ZskV2l{jbMHv&TsQrJ^wfh|o^>@re81ZNLD!Z1R?&_h z^`_=e3XHe_oENTdSk~l~ru*GTpnZ@R)7f!_GQ1BG7!%N2<{X)QlJH6WgTRQ~R z+?ac41`kCN={xC|wt2AQV8}Iaq`Vci;le4JH1N&B`s7G^$?(Ns#L#I~uCP3kNS5PyN%cJ+Yzh*J=sLar8{+xea#~`WET$h0%Ohjc%747RDw$ zyC@f3c>Un5*FW|j>SurJw~t2`)*QklR=QFV!Z^iEF82ls z1=|f{Ki&%JK|x(O7q?|?tTHi37beT1)o)e0sFd|T5;}YFG_ln8&-Dxgzjs{S#W|`a z+98Yu07Ssw?fk{mC!1Ym`v*b8`2$EG3kRib54v!Te*USqzC(ro_K#lA_)k{=P84q- zu0@_C$4%h5fVA)NJaTt!)Ah(6)dF`_uEQ-@U>me7-ooXF{?BJ-7mnLU>##7?$a+W;;GFfz>ano`(`9Z~vTJZ=L23^3noV3pCix5PiI1x8h^-F0>Llk)VdxN&C=iPqI5@J`gmc%*eBB zm`Fy`dpqmZU?^!Od}F;kyJOu%X#wBU5>$3Ah0bWv4hxAuiSKoHy;$pN^n%A=2#%_B zfEB}}nP(GKhIR`m>e8W1l$Q|4UW#gi8Nm~12~MO&r4~ZDuxw8VoYXQe9Fr!Y=B-+- z&?zI{Acrwm@~R=vZ)M-IwxHJ{r4IDbE zxn4x1x1U4LZE>0QOxBZ*qemDBy8 z9rqxZ(WD(aE#jWDoKIAS)u0}}0q%MY27}F`V-oyZ(i%{os%bAk;Xti(^t7Th6{?q5&;=QaBfVi^#zW^G{6yc zzv&9*bQ9VxoQ45zd!U#Y1kHF4vxB2x0Kf)o`?R@W-{S4#Bc-S^kys)JZBPQ5pC#4Y z%c!tGFk#8+yAym1IJcD{yEQ%-^#w|8fCPjGJ=(**y_-1)AQA|qa%4f{<`AekRB}Y6 zT^H)E4ZiMnMR(h1nqQzMT0{$5Cl6+&F@F$%1pgS?+jbBXz)UV-=~UMp3lH3R4`8aH zdq_6`0P1pg4+>5)V1?0mjvrlIgz~m)a9T+0I!89>n3w6z+d^fJW+wXdR=z=<@&$g3 zRDHHJNMuMtfFs}-w^0D4E7$x^<1o<a z;c-&^&-YJ#W{MRDMDA_m>Atc!o88(&`F@TLS(5AU!}`K&Tq%XC*LcT6)WnJ~kb}Jb z_WPaY)7RaOV2z>n<@(B&a`{k4j?t+rrH=VNwDIl_e&W_nbMzdtG+{U<(&h#*55mFD zpcV<;H$@-0fpoMNWQyQJ04cp3NioKKmW_0eUxO%XyU`~e{`2U$eb=;-R))DJGZt~kvl36UTx=F)!qENmeZH& zwUI_)-xL1$+ykN$8^*bpRve?I;`Pi3J)Tp8`}LtG+;~w|AF&)>S=}-jGuso*P$27> z7MK6A`r`E)$xkI8&A3b?Kg64I|J<4IpwaNe6mW9brYGipEQ((@hJ0OKa^d=HJd6cH zPO|Uiz5lHK$(PQ2FaN(!X1)+hpNa!;Cf}=5-HqFYrg2*=17arsYpk)*k ze-=?fqE!#MJle3|$MabcMgh^Kp1Faw7j??<3he!gK2hZRZqaUfs-C#BRTzxKnBvzw z4uPg~@%_ld{Ys)HWdc2a)dp|>*8(#06KRH>sJW!%?V(2bQR$)O9y+w0R_H<$@I zw?+>afvDH}<)}K&=bYrBM8$c#;Ay#N2z45*$dUj`a0h8lb@KiA_z)n|cc^ms5a(yC z_Ojs&lUOno0T7LAY7q>9#?r(O545@vP_b4C99$VWUoe&Opn0A zl+_kk(s|bn^8uhXp)p%@9Hi4RNk3%Xo^7S}%uym5vYHJ!Y*0`zAgA#Ys912Li7x9$ z*>Vb!LjZSg!21vmG=QKy?9q&`O-M8_PUx0&e^OKhixLz)8Y7y}bz8$Nl{sp&BL*c! zTVqvJ^{Ld*599zy&mI^PXa}Uo$SF8ML@PKrd=MDGfUojyuVFJDmszv03A{4s5Fsn9 zg%0sv4jlq0ZDl6tkN_Cyiyd4JI09fvK!~8*K)3)^G=7i77xoo%yuv~fpaPK0_uXAD zoX{J$h)7NYFR21h0HO(X%orbHLQ%0jbR_`OOkQkeTLJK3(Bom;0!20?s^v0b&61+} z9LS(s^&zl%38POXW{}lkVrs3H`cT9Kd%bFgFewpXp|-P46{MYu!zuSejCIp)`UB;yzKX7lD?WlH?% zH(!10#*=SUFSpj2*~AyF?tFf=Y-rE_>GsO-3;+J@sG1qO2%*LIY%M>PAPF0FIwx^p zDfza%T|0F1LDgisV+FLwAn{;zs~6$gUKiURBxG(ci-pRjKj^aed*07{g#FO}{K345 zv%d8||J)D0^uzD4qY;uKaHi)HPiHzM)Ey|J+jEm7Zv|^<^t3sJs6mQe?aQbxN1~}) z^*vvJoHFG*uNQ|8G`p`5DK(Uxu3neD)*KmJv6ZA-NfwaH>9N>&ttz25JDGfTZPyf< zq|x%D$67}u62U`vUG*6sx)Y6${o8K)o?rRscfR&F(vS(|^;+%9E0>?FFB}i$S4kwf zfzdH?)UUZQYdGun2uW0Gl_0rk zus}en>6y=#9p01EbD0ZB!u{&vODm6uo2%uiZFUq9{b-Zt`Zx3CVx_ z&5s;=WT6mR9iZK>zMQ}G`m3x z!J#$GDr{-;b07V=Kl#na;JAALK2IsX=8?blysHe z3%i4F>EHS3_sY&!Z~Sx<&5#dGJhCD?B0fpSd49D}iWue1a1?0ckzM_A!^vcG82YNX z?wEPcBmerZ?+gTHibT|aRjrQDSiqz@111sGD2%UVv64p3*-5@c8yL`Do+4>hV$s%ROVFdz4*e$5$c+nKn` zZUo&mZkk|B8Q0=jS5{kGY-*MSO*s4GHJ zojT&@2i_pYb)t5}N#Up(l9{ny+_i$w$RKh$Mu;}L*6{kFEtwA@{-%`ST5fgC7!B2K zbKq5)sj+lfv_v!-+OEeP^Bt~&qi+}c=vw3g)@^Unlfw|$oNg_dbyN)$oD)1g@(Nlr znrOuAfb$^f?|DgH$unYlkcml)$y_*W5r=7S7;*a*dE;8h5i8a8h|<+YcA(i5hhdDdJDQ0|4u{9|*GXak{xC4$#VJ>7Dr9c?C#Mpm zSI28_Xopf4kIO5*_92N>t)2z@Wz|xwCc$#mLkS(4b~)aqv3*r^b9Ab4uL02X`imk* z%k`t?(9+%LnJbQ#UILvWmCa_D%cU0uD;HPs$w5>mC6=~(SW%cnBK_TA1Pr&nZet{M zh&m$=yg@7l*d4{04A4?%3y~yutTmKX3oVh_j9W)1;|i$?v51Xnm^ok$vaIRa!0qvl zu~u*I8z}D)==IKhK4R!wkBQnIjBRze$W}*o+;!jq8^n`YWJvJNT2uG%QK#jib`6a& zo#hOiL7-j$k}GnxblA-gO{P7QpBsesxo`}xU_KJFfG(S-$QvHK*YKJ`Kcu_Zy%dB2 z>_cd;QUVm57Eb~UT5ca`4L>j_Ko9&aORRY)mNkMgf^XUm@H@~rM>{3PQot;+2x0oS zDBIZetZTSugqssM-a}O&EmyIR_I;3O-)$k=q6a*OofV76C@9sjxDF&FMlRPL)^8l- zs)PnU=!}nsLmLFBj=;;x993-efnuOB0xPHsd+REyag#u|#=U$k2DV_yVHg^2N<3lY zcZtYxfIbGmQv&cRup@IEa1arEk)~PjDb3Ypw(sVdGU)w^M}u3m=4j)48?lB3YLF@p z-`>YxbIL`O{?OEYNu#wh;rlUiA|?;*{_b$~=H2S3(A9_*8&bn0K(xKd&#AQUmc5D^Y&oWR{n0Gx)|z92R(?)y*; zN0}25^rTq~g~OqkY0=Y0T-Xl3Su8ng9e()6v)@`hY!*btpZn!_?7k`M1AZU#sJ;ln z#b$r^2kniO!2qk@)kQ14RmT2hbb0PnT$B=9i5SzhcUX3#@3s->xoO0|6+Goull ztV=*JOMlQd%>^YAokt)v`wndJ(-)&uly=)E<19uk?G4_3`o$lumb-5^MkmtM!>eEW zFS;HDXGnvM-QX+R?7-9h_`>1~d#!%%#`KNumTN$m-|iNqf%m)OFjyDH?g=zzEkH9N z--5tZVE`}+cwwXEq3Wy&3|Vdv=&$_o!@u!|zw`JLuS@$tWP@uX3Y{ zDPA*%@79%_>yKjyV_so{;z9k|&fc55PZc-mJx5ot3Q@+RNdYV>UOE_M+z<*4B)_+Q zXxhU@VILPvNtW;-t}Z;b+1j{TcDsS8O;@F-Eq8F2&z7VEu4m9ANxvQ<;xG4+?*|ck9cKKmVnF{Nt@pR}QcKw_K^++^;OR z%u%Qw8z$@dmvj>fO&@{e59ZIySjD7C3&2nKxA? zgQTMDcm&inl4)#b@4X(%;~T2?tz)$hW=3}|J$U}@fBE~zNne@zSfyH(j>%HVbV5-zsByXBh#7 zLC|22L9RAKFt{&MbZFpP4*q2M?J#p`<9kMa!T;fR{$cPF|8W1*uez3}YtIxm4DZ2R z(N+qh;T7U8ZS44QOem9W!= z3cfq$B6ZE>0b0i0LugSj$3i3zX`N6TGVWBacd6chop27!%gqz48+V+W4t2{atmMuH2zXI2WLvS!4;5AnfV&yV!lNChMTtxE^iVYnAfpdGF6kETmET62ir+lSgc<`lj2f{q-o+SWZG zXclZALXIdsLLO{+AG)XKaNN7f_J|^B>G{a`(!pjvH7V;$Sqb0bO*hI_P^jE6j)q5_ zuzyf>CbuJfzcP?urg@?hIP4cTc0+TggN5=QAuq^>hXQhP!1t{wnjLH-W*FI%?Od|Z z4rFALNiaBuw%a>Q6e9agn_3v{9tvaxW%2cNG2@i)mcbIP}PO_0&Sq z>?PT3v;5Y$GgkK|{m|DON3iga*_Y#IJw|mE&!oylobww#zftb5&7=Z=<=X;HO4YhC zZ$Z-y(-`pN0L`PUf4IShZ6R*On3BzSZnvx+av@9yWeKIy2)@0n5It++zr%kB-;s+HMoFx$Fb^>rl-1qTGYUt^m&?l9pT8xv1K3L z6JxKtYEejePajhG6yilENom=`WF!PhgKxK0nc$o zhJEFMM)Qo_OW~(X2J}H*r1JZ|?9^zJl+zUtN^)$0V>J~JN*I7Mx?zSU02tWUU>uir z29S5E6if!IG-0DZxdE%0a5xIEYP(2#kp(XD_7$g}0~Y zj({x4(3+iphZj>Ni#AbD1MDn}leFHZiLhvK4y^)gFGfcYaOs*&fSD9DNV)+4Z~z)n z?70-xcjo;Nrdx~=`SV3g=x= z0L|kYh?)toEm`r){e4`LiE-uBt?~VgUzUJ2^pm#_E@Sx0A|(dsg)}vP z`f912-s^}-IxFMkoD<-nIUm=SAsH2Xgu6iQsU8yZt=(|a3)E&mhXUM>aRS+suq?+) zR_(AA&cR5}`9@_MFu8`bV^N>?qSe37c+TRe&vkY?I%2+GyZiIn)q2IcwY+mp8+~kQ zmwsgI^9S!`9;$A(VuhGL1iAy9BNz;t8Y%V<}u_*J{l_ z!^NAD${$J3Y0vGB+XFNHF2u3R+LI&jI46v>&pqoNw(YtXV5`wzt*P$`=B-0~$gF2c z2a5R$LJL0i+h6$Nuly>*!lbU-#l`i&N@|9>n;#~K3uk9SCF#}I-PO6UGg3eUjCzJ$GP!KF z2Jby_!aL!m*|M*kJCGsk9=)B4q8PwWIvro3$2#1-(B184+xjG19$rud&PQez-T8)V zUSfO433PHW<>NBPJTW17Cca=k*ub}dKixYs`1~aY-Vfx^cDH$M+~}@*xec%6-#ZZ+U(*XG`z6b1;m;NInlKqo%Ve@k8DzT{2rijehkrX9 zn$%xT5#G&~*kkN&0GYqF({XbtWo%czb8hwn+m(h{noAu`BIYozHT@uC0s$>6omJoNR> zwNG|06r>*pkz3`JC;qANiIm6IvP`^Cl7pU|Ze-fk)2f|(;z;`Da2wNvco z@+WEPd!+S^RnnUI`~&ZQ??+IWI>2nW^`umO^QY}=3%~JapZe_QlvX>raqaTmf8P%j z4twDNwxafY2?Lmvr_nGvFu0#vC{K)htBY#w`g8yGUmy6=<+;@ zS-wsrlY8Lm2m)P-gq$7pn7N=R4Tt80$PO4b94tbsfHD1KGcBHBJGvEEX*uSWEV2Z* zR5>2-C)Ex%OrTYu1U%Y{C^}x^fD1+*9d)j4WWolJT)gWSLo8emdL4~b2i_cD5Q8zz zs@gh$jO!$y7o0AXmH9-QX@D!_E0-k~;G;};qJa+09g8~FvAQAP^WbD;L-H`;ZM<5? zNU}IR@J+@h^VQB|A15McbPG*SoZ&q;;CANa(BOVfaW%0@Vy4Rv=3aAl9`Gjb+QT%9 z!2+T}3+Y;>_q|bS&FP$>*iJ&flqyb^;xc+Yc>jBn(*9~QoJxphzUghF2M-)NBbk|g zw~(XW)em>PgYiR0-;o^9jssUOWe3xd!QID&)&xvqR3AgR_Dnja+%aT@@!|uF3Oo=z zyt}g+k2(9{h~Of{dOLD5=0)7y!rOtO(+KX8Ar$c+9d6t?G>+Kbkx*zl!&XYR6_0}& zycMDqb7b#vtQ`1KB%McX!AdxLg$OOt->SnXaeDr^qJV zLauim(^{Ad!#-U}4bvjk3m$q0WC$C&@&WW=d>yjkROtQ!wVT8rvN$Vs|fKGcVJZa$A6yq=3fcsiVSFQ&`K{TCB^Cbt6 z1tQV4decEa4n&hX6e_+LQP#2)@aYcNfdDwzEd<^XfCG>u1p*eKc!v?nahrr=Js8KK zcf?}KNeeL)%5G{%#KSdZ2#ZMqfe{XbEp<`GvrSQ4zUYq=<`hta+6P9@5CK%_kyz=QA-b|7Vh)# zU6(&X+`~d;Fr9A0wls>f5_V9^>3SnRR&7rb4p*JhJ?2&>Enlx>GiOHR+Gd)m%tNBJ zVTN#RC>lLi&o`cvcPu-yDLNjTY{A=XET-##71p=L@MD(Idy03xcks>sa~-9bOObHh zotRr1KQTI+{je4=ncGLKbR!m>^-O{Ss+*=@-0(oST)cMW*30SGxms-WPE0AsCue;k zlR33@@SWBh*PiXp&qv-M6Pbw{onpPxaE}-LDWqArt)WMbP95u#-HWloOErga+u`4` zU@WlW??jaNX!l4nDor{gp5ZVb3WO9Lcfkr9cw_nU{iwfYC&7od@WF*)u8SchG&Z=5|%Dz9j%?iSBP*S_{* z8zduB#KNbiN=5yOA5*^0+gFZB@5|u**9?5PaCT>)od=Zy&Bk%Zr8haAjrNrww}#(2sPq$LYp67N>i%nR{1a zhIDfG#8kk0{0lMwkI(yn5yZs_M_!DpX|Cfz3cKy?WoeJ zrSX@aDHh|;1$0SpW|^9#jNxwWY3$~^diT7sxhFX5Rc^xKOr+hPyzrAJiBy7{r+d9U zdw6~_Fwk7vL*!GliF{84BUsn9tX{BL4IQ1RHQ`rlh$bbsp;3yF>W^zhY}^{4NMpa0l< zzkJ_4yWyK=xc0&uZiw#;OGxsyk39Ot^lweO-db;0PF&i420m1Mt+YBQsK?uzZvo97 zOWoDC2;|Xu=(mr#m^DMDaRo_)D&R%oDlwCXK@J8c2%|$XXUnx2PPxOU0L}VwG~^L>+RYIf*rM3B18n&^BqK4y!|>0k-1Hr9;Y2Zu zr?0fYTNSLtK_C$icY~gmRHOaKX}s4fdt}o>g1p|9g1lVw1rrqtx3PBMkdB8Z49~4+ zD#uUfh(7Nh)4FEC5k9WCXl^ZT@dbys?ZK(}VrQRr>k1hlkP(MfEYinL_MnAUHOfl@ zxI#_`+IJ}Oz}$0cw!bwgc{gbN7(qy)L$Uj{dfrThW>S{6x_Q4_>e11^sO;p zZEwWkda@N7&A!>$jk+dbx5)*@OJ*uH^7EZ*u3#7$#cn|K6kUP@-`**BjRyxa-B7~> zA8mG5m9vZ1HI17$hqPA(m)iv+gLL(1Z~&Syp_IXCfHVwSLf1v)x-1ALq$L2Sxy$j1 zB$q_9oNLdaM--BC44qiq?4U45#`~Le82A<4@PBNirAxAnDmv^E&QnZLaC#j< ziOfmqRU?m|l;LfUe5$Q3 zre6E^@4OhBK}tV-wdW?Q)*1TVGw(jqNkEVXIP-`*v_4r48Sq-ncQ}>c3&Jj+yCk1E4p8 zd1Af%(c?Kz9YZ)@e=Ji4_bT z7QGH#@VHvKgE+T=HUjE6O};Q=L>oWb$5=7!I=Vb@kiWYyN4JEh%2gxg!M6T?g75=c zwlpya_5PjnuG*Dzu9M@>LS$~o6gwe$A((RAbBdnSmJ`NgIFVu%|#?zQ01k3*#u=r@Mx zODUN$Yk%@z{%&~v=KqdJ{#WaN=ePd(mjgeOYk^_hxqNQ#6jy}qA3fBZtSO8QefEbx z*t_=gasSO3Oi)v@^_PG2zVaj2a26cjCed*2{?LYVO3sdUhi|-+ki_>@eDJtBw2Tuv|EjM25 zOh(adS9-e0K@*VXd%C5O&5RR)DxpEaj#-l{gA9mV8jYyyCy(QB_7qUIr!!Ifc}X#% z;P_aEuM>q17+0`4HOT>YpYRFPJ9g)*&w~IyB)r!lzLiM?(Dx`1_Szerj+wryk3kv- z8C#b+uUgW30S_&sCR#>WU#+DBKI%hTvoYUVX`mw%I0GyTv>ilvWRH~$i1^5S5WduF zDS``3FowXIV`;1b^qhK1A#q`UqZRB&9m(u_1`FexcKEcKXX9Yf1z7{QsM8fh|Dfj) z27+ANRHa8slBY!a6j3$UNq}P5_L%0w!7xh1+kV8aQ_(RyyfwbA3DXXw+CXhy)woLZ z!se6pk7&%D^inM21FTE<%wjhaO$6kjIPbU9EUz?v|K0a5LMqs zw{MzZZZ76BkC7yIsthk#uwg1zbRDT0N%T%vm>BAy+kVOEeCAvk0eUmgsEU`$F^g9pg|5+Taws@9MCX~-FK ztvJ#|=LuncT;C~rmje@>ywL(u)|_|nP^#6RCgAX8#4PlZ9&ZNl?V8c%=P5utMKV3; za`X9Sp3@5au-cv=FVfSkaY}IdelFTZq_|0(lNW*Zn#8uPw?y@mpn$G;>hlTA+QD|v ztJW|d5pN4DtBBNEO4@^zW{}xFBuZDuiKj}P2pB=%@#p+&?#BwhOph)5TJuUNXSw~7 zOJ}sv`5a-KARZABinJv>)@qDgGsP4Q-y?y$aSUKHJ?UD4g9&U6^b?psVXy{tWMklw zi7pw4wMQ(y!q(YFvbf@&R-q-%X!b|(x^$eTldr%I>8N$Z9~p}8Q-Y} zVMz;vr1_~y8JHLL07k>29P#PU|7B1Gfs7^tA^;#cu;MfU7n-9WS%mbaKbLSVfY}m& z0q|=x5W$-O_}ZeVdxH2$@78ikVc96_t%0r!2>2oZ1xN}%pL7<0;HZ)$!VJf7Q2@Yj zjv*7n31|LPewBH?IiQRypH;S-v>&)u!&3?rpTl zP>=%uZS+6Fi!T>Orr}nP_I<2h9<8m05%6@6jrVQd+rs4AleM(Shi`})X0aQn9n4n7&=E@{lEX~ z|M&I3s)rrkEV%}zS3o8ue&>B8>By(^Xx2HrqB<2FmxQfl^2N7U2K}u}*mua^^|<%3E3DxBj{O zmoJ`FzxM(J0rDsBe(NOj>%a65t|W7jt#1yEIr#4R(D?2D^hd{*n`5aSYG9g4)LC1!a>-8F56I+emoyesQun9 z?5Xhw#7Vpu-L3#T=zEzN74K~j!AZ0lvHsW09o%_*Ovh|X_p0BHq7Z zOnKtrKB?qCmplC6mfe>8Fmy;(qgd|Zm;mPK?Eqg=YlDqMOzgFWi=pMga7F8%C9O?P@zbt{7~exw=@qoKpC}eHe>lh!=J&FEX2@P9 z_%*IuM=a`coFVQBe&zZCjk(I^(sCF(6N#6h{TUneMzv;Z*_c-j`#Y!jJUNb@EI#qK zuQk+$=_rMSRk#oa<&O-IgXe)l!-PHTm^DwoAJpk%$d={narYaVgSn=+8cr3a|hJ?I@aLmuf7<9LhG2mzVdwRY>+n42gG4C5~sQv^@Hd^0}imW>{?K!9`c5=6bWHUvmYqnl;TR%gH) zs<-F?LTZNvhN!s&v+cLj3E6J?t7N(#v4z-<6SFkpH_H+%(02;9*ltbpi>R759evy=NuDlMs^r-FD+@UOkn-Dwla~ zkAjzYyVA&c981|VwCKWhzppns$V{95h$bd`nsxmWHG`4e2>T@+tMRCE`av-DC2{~A zd99hJmJ4vDia6{cCtKk4W2~JFcR#bdDB@b$e28zT-q`|w)pjSYN#?=C^Hsla*o zhr?PVN+n?hkp`$2L!{^_>Vjy-BUYcjwHn_m@A`U?BzFas_Mft6>rSs^SVV>#%}2Gp zU?8;o7=E%k%X70ByKv#OGmyYSP4BGb32?H+2cP>U zw&itFACa^x_cM`Aos)-i22@iac7*7hH1APk9EzNy<@~#(`xgDIJr^`H7=lGr z4~))6+6o6>uY)mKtyr-{ypK1+L;v_1mPaR(oQq*OTs_A~R@NK%T)E+d9fuIPo9f-M ztZ2e`=3|ch>K$3v;F`~pRrglI^HJ1o6pYnL_=6 z-5X2-8l0={g1_z>J?I%%NjLyfy-(F!bP_$hZJV z3_Xr*Ewl`o#{C{psdM3^v#9ogDbsGyVI>)Z5DsE1;w+m)Tit0w^J86qOzBU6L}vq> zJW=K`hQ$n;#7)q&D$&g?1L({&JbUhVQ?Lr!d0<=$^ZaZC4g%#(3b0+A;=z=UqC9{U z2pK;o21&x}>-&QtpeBH3FY|L905s}91XL05Kmo#p6ccErSPz1b7c3ZlgCUx1nV5xA zK@&`eQzhz+IRrj4T)-~@po}p|0d6u)gR0Hjq!QMg(;>cTJ zYb&|i;PXHJpMUb5cQ>DuWz;2^u}Yx;p%82i$w%GX7$^dvQ3w z^G$d8PCmXGxaP^>$QoHXdQac)9EjizVO~bCFyD`Xh>j}QjGx0oAkD73qlX87@mRT?FTV9^YV!EvO>%n##-HpS9#3+9&*FbL+UW5cf782v z1YTP};v_IIszWON#OgoV`mJBNck%sSdG^U){7EF4vmbo=zyIz3PY#GE_y-NZc;Hsg z$?%bWG41}`Uk&b6``GfmJ6k84%xjTro#6_+TG9(0T22zl{6+Y)z?W9jWoo z_eOSo6w4!Zxl1pFvnDsO{f87}HG1hu;2;{YjkC19+}t9uy@N??>J#k<#yttbQZ~Ui9_P?|}#aCYX#b4Wa^?T|^#rb~<`?kxgfxq|4N-_DzUNpJ1 z)s;Y;8`(9xkxnGn7ryfS(Ua4iDWJedk3V-`{mb9K9{J6G@hntie{=P%`_msR;a1h; z5qtd$-Lt3p%oPxtKM%}a0k=0|;f{BNh`jq4aM3#RE z2XYfUB+EC5+!+K&8N}9UQ2<|?PxmIYAb@buKO^-(h3t}Gi`IS!R^F_FH3U`%#7Qn1 zzABxG9Br%Uj-W??DS-TE_5X*(d-4D#(4&(+aDhT0^gN^jX=?1{SVm)gKu+X52W6_0 zw1JC)nk3;yQ3x7isOG?6z5*V0fd{lq<0|U$z|$$|Wd&7Lb2aB;-f2gp&IKr73OtFl z)M+Zfv_pwzANZ^ZD%Z~Ac5qld)##fR`FV%p&wLj9tgDEwYG~l&Rcy+;RC_Xo;~}Wg znvav&sZOOY%l1q2_5OMtRA!F1birq?OeXVi*or^!Hu(miv)}I!Uxz|tMq>BjNOZZ$ zRzrU4v%yj(1qe+%U|;;Io7`8h;~m0_G8o%-5RZa555|oOM&m<%nVWFjL4i^^-s|CgYQv zCJH#db&fBuadJ-rQlGsU^tclLm%U2-h8=@X0k?s(1Fhw&Mqcpw?RvK_7WDOCj8kPb z7E0N@_fLWa4}y+7lQ`?Gmn?Hh+cr5$3D^C;vFzjVGs=xW?q!3kWnzK}0e`S;_Y;rM zkwn?Ibm9m;33_?_B0jKepOP`BA1z`<$}mlQZU&-z6V^+c)cAZN@WO?sg&PZfKPV1A zi%h=>$dj2Jfs7IyP!eEE*g2-JjHT=b+o{d$@kpL>$pk!^T($9@$~gwz!xIo$dI`t~ zXm8C)7liOxJ4xvPUqKN69p^xbgK-I(X<>j^VAlMAjK0PMI$Fg$+~A{oz=(CSo;;}f z5-4Hfa=bnE=A>ekPCYq*@9Uj>Xm`JcdjLt8d22ff)a?x!9fHgFKl&n?F+Nv4I za))DpcC;(`?C|Qvdoonix@ZBTg?jn>^248eZ2jQB9>o61TT#$HHbP~e6Wz(0SE4@f zQ47p#%bSbs>@XlMOs()TV1XO{X{0>TVwtqWz8IF3su>z&&Vpq)oGWURNoa+&`raA6 zuWQ@(x9?BG(0IMU50l@>XVdPI>)t=V2+TJ>;gGv<3I;Vl`;qs-OS5|cTw{81P;k&6 zd!CQW(J$Tm)_gb->fk7c`L%H8ta;yuFD$Upqj{T7($qM~)eprl_t&4lOn=ETrzNzM zM~WxCU5O?Nm_tnU6{Aqw2CF_tc^*2Qb1_zag9j&y9BGdraV%6L!b|fD01}sK#YnAk z$b38Wo6Fz$?u*2r=G_^qS8lf1d8B&u+uuGqFl7JF#|q8wK6&S-FK%uux!02XsoWWy zeO7t)Ti~gXm@#S3iPu@S>U&M-3Y}I>6n=9 zL|76-BVzty()J~mWn>`oKi(Rzh;G4{`FtFqFXk2}>Ope~8zP|tG4*;MW%=?Y8oqMc za2A(Ck@$r0zVxfV91@jt`F`*7e%&)wR92F%gs zWM`Otiafnb!A)wh6=T;KF76e0*yQ{IW5>t)&yIdC04FQ zCmS9Yx_fi{{Mr}zOBOqeZh^AShBk)k5xV}+N4~XZd{TSI_wlh{QBCDX_6KkO(qFN^ z`764ev5;OJ1g)7m^3wuHj!3Zbx6%4Z?#Iy`mO1psiE1z7C>P6O(yeW?SYpLUYKlS&Y$;u>!3L%g4?0yA?7fI;bL z*PS#TFJslPqV_DUKciVuX}ACZp#?bN zIQwTaz*~TFHmHOBS{!(QQ(vf}_M*krJ6JpBz>x}dV6la*x!NNq6p@+IB^-(#VqzSv zNBgpNC+vH?IkS5ve5+ZzwfPWw~r zf6Sb?jP=S*?>zazOxm~Ot$ozKTIC0(Lh}%mdBX3m}qh+(+@XFwg>SJ zRXz&Yt!Za&)DoTaGu*^yb2AxJ6sOpW))z)hREy^eo$feCF6~dpKC5FCd?!6ovm+Mm zUGM^EriB>h=^-K6;t%_Vmxv?*g)?M($6;d8M(ofXF30c518k-O7?h(AHn|1$azr8( zYnoQ|kIX#dv(|{J?>o`y7nH>l^uz3;2Y@LAf`kWOh`@a(AL_75E&u>Yt=0I+v)=LR zWG*A<3K2|Tz>D@TS=T_prkkCgbgWR}#GS(P($##iGb%1$jxaJ{@ApMdXR|`9SA27% zok7A_hcl<|S|D;RZjkvG;NiBI1n8n$Ycb-?0j>v$=#Nfjg>~E>nGI|cr$k>kkZ(X| zY9pkr`gUC0BMoyEYlqWPsr`ma-+~+v3`_v$2YRlwX^24g_7l;kD`JB}4cJyHP3hjBfCW`bQAp-DiB zl|dsbTq~0>+nYp4pCST=OaoMgT6=Oj>-|)Gxas6!e`s$6R-;40x~XHp-v+u*8PsZt z{3@7&z6e6v@f==>Ol+0W84nE(9Zv>!)~p#oJ&bk@kS4Gd4T>0Gx9)BQk&CcVRB5Z* zN-bdf#vJnaOa=+mVf>uN zInrN9YGDDleXpM%f$OWsGx~0!)qWqO2eR(N)i5?2;7<1iTAqSW^IC!3}%$zg5noSVRT(hy$(Cnt2D!*yJBRsjI_hz0|flkF4GB z=d+Fq1)`C1aGEa$|Df7oiN&}di z8`l1j6U6K&4g75{cF6WJq*xE#PJINJ*MwqQDOO z{?~BkKfL?%n5WTe+WD z*YYvxU2YM*Y&n#0qg7QPa}iIUL*{}X2LO*i@Nr_0)Q~VMb#J^vywEr`bSo7*srNjw zQF6h5u6&D>T))pwZTI6-*c42Abl`+H_pQHAc*i7t(q<-s?$;CF$YF!1-zex4W%{)q z-aeP?&hY$fcfu8o!Q+o5%{S-0&dBzSpa_Fl)1feBFWLKP70yVM|N4XRG&n}Xx_lhi z%BUBzYD-g9P~&zug;%W*sc2dN@ybKTw9gU$^(1q(KdN}!m0mCjxg)4e>5QN8K#5=$ zou~kin2SF~A|Su8G&UK%Ul~Caogv6RjZk#rZ)IL?sYmhn0^VkR^n2c4*Toytau*6J*`zFFQRmPu>|(?X}SYgFaY7L!TZ8hPT`P zzc8RpdgfG={o-q@&WBbzhQgy?pD~wy`ElL728lyMZ~NmtwE71hWAVjvFLev-=@BLI{rh_oCj=1BYuc+-L5guBX&#U=)62l**^7nJ}68)t<3Dak5Q@#001GV ze**V`A45%3m4OKMg}6U7v3+D@q}9-Vb56lS%|Ah+Ma`aWHTD{`!U|!m*Ekh3xs{gQ zBl@~zy-v%x)ytG=GPfk1T%M}%l|)|;kPTvnyn53*L_G1UnGes<*WCf%Ih)zVUasPwc{vBGA1&LCDaV&ck4 zTQgwX$9lrab*Z%L6R$iT*Ml!dOm6`D6&N)hVZAX>8huat=JCU0MrEn55J)j$}S@Jd)bqwm^WB2A(*rx4mj$ z!@xQ-Gfu|?n1|HoHZ+oRcHkh*h5JvrX+bH}yyt_mKM=zhYvfXnIa*@4s$WKF>CC{> zI2SpYpAF|BzqLQ{T`Pv0UXU=6E-up{L2+o$P`NiI0k1f?*Vbg+Se*vXQIyHcDBu{7 zLrUV>5+2k5clgnoyr4WrD;rc+-f_hz;m>9W*~QT*>7ZtMLBS2 z@2rc~DVRcV9rZ1YWc>~rd+0RAeK%*MncL+67=_!8yt}I1I6Kw?_v~&mM$n~JHIN3N zg|yv3I*ij^+CKbj16n)x?JxD{T42saedlT|r{d_0x zFmij1^*+>CaRJ;geHUmlsDA!f>R(lF_QDOpuu_q<>v~3}1w(okh(j{K0Z+1zoP~O+ z{Q#PT4;Qa^%9;}eHaHnZ7cjsL^SWJOWyn%`om`ViknMsOwnkw_9^r- zwCoW>V^+$CLWxHNShN9=2#moz21)>6GUKJjWZ74eK@%X%d zxEdNEr^~ufLnnD4pn!0!u6;TKr~;^osNUtPKnYO-H86(Cn$NjZ{duabP#Ls40cgMg z8X2-dlj+<{tCunk8OWDk{~{U(T)6GK6}~%kU3kvtTWs7E}~~np__3?VVJ6{nucOyn2gnZmcoq zu7C+S@rAJ%w%8s3;g5!uz-(kL411Me`8&4iX*P!1!I%z%Di_RV=C(A>vmjCnx^pXnoop~g&MkhWQK7V|C`PKVv zJLM?4(46o0?m_gP8|yQGw0qNz-yZ|9EDimS`cWpj0Kn!Ns1GYmadn9!9)`en0hMRj zgo`rFB_>_jjo9kxcQV|@&D6a$7SgJD+#^_XdX^0>YG;rX-Q|I-gHdghe-lm(lY&QJI1sLoiv*1B@q z`5E8Y2^(iTlXc_PMXWL#yK#w*;o6tHPxW^Df?rtJ`Q&^(=z0JD2Mq(w&X#-Y!_)s3 z?i*e@MPbc1Nvq@Jj?17cj&rcPS4@-q-p3AYEiiS9QRkt}2gAtla-~cg3%Y&5iS!^x-2hTRt-o5r7Lv2p z^mN`!1>>^zr}KK&PyD=h)o*s@4Z|0VPV)1QSPH9Y<_t~C&?R+v(dzArBwzh80|<3 z17rP$7>pB2LlQ7duR2v(cu@N^AD)sI-|H*6$P#oc<-V99k4#T;bCt6$2q{C($<|81 zLCzkk0yR)VGbio!Rsx>%#6{i0HG@ywKBk)VL~a_{8aDf-2s9f}rT=YTj*AWZ-}G%? z@KNLstpBY+{OT9Jyea*&KVD*1UzMfsJb{S}z$fy+e)LF~Kl$D+zYG0PInMpa@*|CW z=I^I}_Rb|UAaduZgdJekrdl@X>e7zlbuQQAQ zuD*x=26YL%;}8*Gt?1RtKm)(YZv}19_)?5e{tqRIgcGY&k(S6?}=Zop!Iu zuu*2Adu(3!T~@gw)O#_dx3$-%M!CWEeb<4DCdrO>ZB7BS`Fh3%=Y>u}>k}&2%qQ0n&baSW1ss&xFGZACDDoXKF+fb+2OT!@f$BsVQ<_LU`19Z2#46v#?0OSVBU5d|{V71J!- zvKuYbTwITlRqsg7HKF6wa5h*rwJwDCPv8nm~US~Nq^}sb33P8jD1sU?` zw)|w`jTS*^ro?YDXiJksExyi8e_XSuHQzmb#*XlgMWh2*I6fO3fWbPtHNXy&3wgUQ z5j)!^)#5uTaK{7SG4Q~~^LD%M*QuZtou5?nuluHTkBG4QS3lkk^q28CmPzMrPuCh5 z1%;65dN5>ZD{YpGCbD1qWEJ3>2^9O8!V?Jm^H|%4b4(PrD92sB~WIfHW%&-xm zynDU;OVfH+46(P1%~7ZL>5DFSiM@V>c|HiS?2wr9MqqYvv9s>K+t{@;7-+=6F8usw z)##IW~IGI^WV4RFT!>-lH+FfnD-jM zOTKJ69ra1XWbpCEN%V(rnafkmxCdem==4Dx2BjD0o-eSf(aC3_8i?5bL<)B<#x?0A z>$?l;rm;L4t4yFh8F=VLfJ(dwn+Am8^pN!8{43ts#m9i(03rkUa%P?z+u+>i+yw$^ zKy5(6V$X;s2>@z0G?x_800Slga6kaI3!tPiLU7zSGSB@)V4(DaViv$B1AfJCsK{Ic z1!K1oThB@e_=-h>tRWlvYm@y91QB4S-ZM;(wPJJ0$-gkD<7ZZp8=lZWtEAgDxchub#SGZr(E zlUm$WbY>~bXuids_&8oSJC8hTdutxGD>M~Dnyw#NsSN9UEYv8UxI@e>${tH*gtsk8gUR~KQ|4&$bu|f= zM`!ad{lZtnMfmv-6}?^g%x`a1_5U$9!$g17H*BMA+*=ipc5U9%BxwS1cr+f2!o9|n zqo8(y49Fx1vI!4A?X&2DGO)@)M;t~o?qP&G6E%tm`as1SkyB9n_yJ)6)FLzVp*Ba) z(A-_cn!ZAb?*rGqyc{n(r21r*WQeH;I0B28F3@0C6<4od?x6w_DK;}n_apmyjTLfv z)g86UQ{+xyqD#J|Zn`P#ZYw%!z{eB#^9N6sv-z_>JP9lV^~bXan(XCEUwCzqjgec2 z@B}XtgR5z9hZFeffmymgls{*SfwfAN*efADv_?2TWy;MR+l*=+puoe!7n>HNd~pK0G8!a4Bj z${+sT-&@of1hLXQdK}}t7oagRZ`eTQWfH=t!6NGeHH-?LVc~mq;vkI<0J8)xdO7LC zL1rezO0OMVu(vNF-2r43<}6Q^<8H|rZr@<|PHC0c8rLs+ zcLU49Y3E56Eet|qmYkkzu$^YqnbyB<-*PN+09^#PSXymVI&yk3zR$@U1UCI?q|LSA zlx!J>_QXQSD8HKX*YT-`AzwV3WHuG3OeUHBViMP^X1M6DQb9kPMFMHZC zkZC}Xh^=;TBo)I4`(t6r3-nGg?(H9v3_j@>0;fZ;*cmlVIgS#RJEJB}6%NJ>l^h}O zq^EZ-AkBBQ*vKFP=rwBAei{_OW6RmZblp=tItN$1+vCON>SXyvzKT0q!G8W~SdixM z1t2qe%npcQkhz#3gs13K06}nyn=KP%ez_L5URy4gE)9>fJ^Se?ay_VtHo%Mt8XB+6 zu0LqF5ua(SXOa3a*cX8hs=N02$XdMVhg7U0X~_Awy%bm+Dc?ogKev?p?RyU)3YMO4Xn75lxk&h*GwNP1-Yhg_;Y60GJ%=M+x91_d?{|%Cv5{6c5@S z1V5J;FOH*l7jahGG`HkA+6HeuJ=@tL0{gKkgJ<~OWq@7a7wVD*C~JjdO-b`9xme6iGE=?|~qqT8!iczz~c|=|z`k0QEL}(+3P{nJu<7xBzGzd(G=5o>M znz;eiMWPgD$H#owaYxjEpSk)*mlA2a#RF?BqPAHiyEAwp=<0wv%C(bbh^mVWqt(43 z3fMK;r66g#w>61@5C$4YXebBjFp@FAOalO~2yj*fkgL!(KDq|{Alug)q05lr9omf$ zP=QOw27}s+nxtc4xOa}jt&4Wtib`$Or!;Dkhsj%0f~9{3zZR_n)kj9rA%Kujdpg&P z)b>b|Zt|03&&}yc#q@es7!lAb?TJ0e| zA>dkEMt@W&-aI&HC3kY(MI8tz7(bxa?tveCu1QmN@Pu2q0@Pp3C;yxL${IWsK?_l7 zZSui)UK}*j@Ynt9t}0Q|GT@{NO)<^jEN^~wBhrwz%=kG|JUMtVPU3_?271W+&1*ks zNb8CFLH)}(BgVMG_Xo#!a=O?YqF43XJ4Dwr279sOCO7J_P*NRgCxfF8L*MuZdc7yF z`E_}rgqzFa$h^g&-8)aOoWMRNUG04PzkeSve}gZ#U%C2{nl1nLKe8*R`*+{O<7x22 z&Zye5?$s;bnfrUrpKS;KCRIKRF}(UYj=i-UhQQ$DB1Wp(?eMIl^X1SWFkJRrC0|vi{RYG5W*4^uv+3iTg71ZP zSz}%?BFFj!V&_Mlvx^6pLoM-^${uS~439TI-xz|}7k=UP^ueF(n$*gXGK4hQM7xdD zOD~IOLw?Q;3?ddOyUpS}Dr^1nkthDj{gX8F(oy2RuQaWwj3?Q|N3q>oQ;({A6)_(h z>bQLpfHuf^YY0x{eR2nuweWFeA}kUzc@G19zwgNT(8H`ycweCTqn^hfn=ZU@s(mjq zTJM(oRJ(bZmqIh}wD!RK{`#2%K_k&;g2Z^`dhOOx8#T2Z87VzR-z%2r&G?Zzt6hi0@P8EO-jhHqf>KuHK zw%13gb7DAV@jkHMwByd1LdPut_KLuG7K|^tQ(n>}YSz6J5A%UHd8Mu$4kbSc=Im{| z>&Cp=LYH+4MSyccq92q>ymu@yVJ@AF>)A?!uQ|l4ng>xvJo&U>*a~ zg$t767{IPSI3E%$m{7uSIwb%_2Qf{7D9xU`GD8DR1puH0|N7{l(F#)2kiOYLhral( zet44#^Mbw7aZk@sBGJAAg-Y_}@;ZGnz;omeQ!U~ z@+^E1yjX3z;IRC8GDi`Y2_D)j`-Dt%Jay;`I#)a{P8Q~@XJ~cfy~0xpOHzta4nhlD zb??%9Ey?1o(RguZK1(V{P@GMTd;a-&yy|;?IH1zl7hgU((GZ_HMi@RQ>isY)iYC!} z*`W{pp{+)t_C<}c0QBH=?)c7q$BV6P$6GxR`1xW0w5y(e6yP|h{6eVcEBr{MWI z%}h}={=-FuDbbLC7+Z%=^;=N*V0A9u55dJgK?ccgdEJH@@++zJ1}Bn>T5D>Oj_Mf+ z17hG^vIsp*{OH9_k|}hdpf7`-Hu}tJ7M5!KGiZF*D&7WTO~@N*ZscUxy0q}p??65id) zp)ck?Rkod8V}Y@A-N0^t#&Jj|5)M1_g!Lp27k90#?daFhxk?=Vi zS`1E=Zsb6Ou9}BS^<{FqD`~|MsE|&;h0T7wlZyFW3~})(Y4APRFEQ9nDct6y5s-Lu zjhs|E{5S{Zggy)7xw(W|0#{eGm=lpd$WH^c#O7`Wtc}}`JR?%jqcuycp_1n6yxYrslzRfj{_hJz0N3{Nv(kK2haBpG?B1{MymDvNKQ1-?tW#;|(G|(&i#>6Ctpm|m zZ_qd7pH>|2U7sSoTiMRWz_n~sIFqMPJhMhghUuRgOsH3CAe;X%TGq0!kXE%0(9}WX zWL=FXX=GM4Zwk&w-GTD6+pos9&yX+P8;qSQ1Je!;ySvYt5}^!9-vQeTA3$WW05c^qutNAGJ{kjZh@2gbeA-Vv z9Np+&#C^+iwu`vSLZKKGLW<$4C4#@^EEc`r}e`N7r*YS=_P!4 zhhKa)|D-qBvkxabA)JBYsXK1f$Ts_dU9N94S$8cme)`=K?OB3GJWW`jAM6D6=5Ucezn#rE=w@jZe~t9&)vYRT{CZSqmv#p{ELhM(%@8HkGk%bs&}P& zS3O@aR-!BAn!IO}z=H@m+10{}ZImj;g7H3oZ~4vP#w7Fl%PMVGTb2ITkKt3O77pJr zw;zM1XO#CV7bUtwAZ^Hwd*sYF@pW)!N~q8bb{goi$wR_;pk_Y~UUW}7?#sWaU;R>+ z%t!!K95{QakRvc&22Ka`vP1L8J9e$f0=Nml5%3cEe(S2wrx)27f@Xm} zYkax6aDP(^&$vD5{Po$Z-^C7DNrX(p0N#lR5exS7a|D3RNo+S@1VfMn@o=+)155^N zE&xxbEwltC=v`0nvuP&?xIT~opu9_{Qvl#n#ING%-B<;ts9`STEW^~1A#d4QgVx$`)ocMx?}j(xHylKW^=O&ndHTx6a-m+d^mor_wfXKg&d=U z48B&4)>;}Fzjb~hE|RI?_z2=qOOT>cfQ1+_-t>2ZW1lAIkQQ$X@GVqtK}y%7*eIMv zvNy+W|HE)AhUqK8^GCAxIvegPhA?iIBVk z(lVFtD|^KjM{%PvYsc%1n_7mgxnel9Y}st5PnSb0cgG*A3es2nBksyA{qdM$J6Rg( z_-)lm^y1F8qqSh|c#t@!&6jR~;WH3kf*+a&5z^|_K{`?yh@~+*Olh;axdG9OU0?5-@Z_4>Gm9uh)F(}T2?S1FDQXwEBn0IOQWo> zewm5-K?k1i&Cek8axTQP$~uB|!K(}ER1=LE2w#IAob=i2d0aOyUZDG{v)2S4csj{% z$@=}kY%AFkW`QM-zZ;mw9RM$TCqPdsKP``hU~b(z`j7^k8hN8xv6gvVz4KfIA!6)| zP(eQL(tbV!SM73g9K((w?U?Xw_(BQCud|-^-0;(%-JD~dbdrj9M_@&oAUCqEdr^m% zDQ2kPg!+lxy5*+>Ed~&QQmb>5BTU6aOid2Y=o#}`Kr@NQL9}oPQZRM6Qr==4o)4J`=Vh zxCtb1J;WWm(G_Tl0u!Yjp#6ke+GYR}4qtF`c+O%tpoT!^sHOtO3cSHIa4Ej1^kzQJ zu?0t+&A$RL!1*r6{k-h~e8|?iJ`iQYH3KZ+0E{>>d4LVqFI724Q3ejd1RT4#3DTg? zHTV3XxZ6tu{|i9@IUavVKpGTH-)(IG*lfc_V&)BmF4`Z((zg+FxY8Ho(56iuTVOoN z##UnpH8GzmK>^v7Mq_}G?P(2$kQpfVHD*d8_$)LMN<(lh1)cbzbN%^Ua6@Zo)on_i zxQ5fd1|@?M&;`J*1|SQ;Zk;fm6wj)EC`M5z1vrujojoX&BCqv!p3!U?l%9(+ioB6 zxl#Pey4ycKGA25{h4os+p5RC8h8=oxQlmYPHgrz(5owVq1{HeB*uU zXVqh|OPp+ob_31qub2226ua=bQTKbs4QHJ$b?IEP-}m_m_&n1|&Y+$Yn|H}q)AqCL zXZ!C2Kmo+j?gY8Q?%GYK;{#wTP(NirIGZ>tRT5B6@pbLWK=sp6G%AJVg{!Lc$locb zOS2VjA)(oS;+chXyu%EHRK$GbCIur5k`dw5F_Ov-6`Dvt&Mnf-?f~d^`>9`nV2jVU z%{Br$pqEwp)TV#D<2jVx9qRcOG`<3pesWWQ>)&`~Eh;AJY_#{)^6H^#r7TAAxDKzJ zpFp0;r8?PRH-FPP{Noh=>Oga6!P2hvAi znrjuN>tX;TAUg}dN%eR+7;kDknSjDWhcJLP47mY>tpGL#)cb(UVQ`YkgaL*CK>)9S zEC2uS07Pnagy2NoLW=TbKeQ5*YlCnA zwueboYNNgbWu`O369ij#k{$TMkqcq10w{jZC>%&50U7 zEOEUbcPFhr!~3E&BTq1z{`5n1eLDBM$=xlwMh5)DK_7~=!0FE>k{{=Nv<0YPN8t21 z+aE&;4rO1MVzOFj6HK~tCS9YTu9<>_Gf}f5Cg(bA8z!Re;&g}R=09bwyv%4$%198@ zndCJEJD5-cfik{{c%$?Bd6JqW9T=Hg9!%`FGAtuo zbC$rh;9SWYtlj2>q#j_$I|~t?3y++}dc@buS#%AWt#DIdG1HvgokKpd;*^&cLJ9}a zU8~2MAuG{xfe@M^Z3@Ag2`ACyT&Fp}!hK;7^PhHWNi0U^l=-})3v_EPMnkkH7a{6m z6az8flltd9G!{NC_INbJ9t;ECQi@&j0TEd3ji3T1IN+whoAz~M6SE<3d83U`ZWf>- zHj6o&Xms66fw;b-mpHkwl-Ijc;=&KYy17)m0!~hqPzZaD`id>gw~95ybP^gotWMz# zkj17Du#5zim@IR5_W2i^J!TqY{6MuMfeHqoOLv^dLh7E;%y6DctGC8z`c0NJ-~meU zsV+MQz{RBfB`!0mu@Qg31~SOk;~-^+1X(^QY6~MAD~)&zNPso@iG79$4}>L z_|?qcO?>};?Fj~6e*K){U}Tb=3MW^HsN_9dUY)B+3!oSbdTb<7*CLD{jzS1Ix=P6x zm&1jgU-&v&laTewRE3{n9WtWI>k3lb7@{1wo2lp>3w57@K zr2_2`nOHA2m*rOfCIjVt$Q-$hi&#{cU{xZkKe7V!Bq<8X&L36Yxn5$3Fuux{Dp4ayk0?%3b+S_k5 zE)EmFw)z#ib@HH3t^Dd0kCNA3bq6oFXa`Mtn5+7_HFwnxM2FctPrP zx_5ozmE2hqdwn4zA}%B~wFE>3I=L6v40spKBboq@)1eK=m#@9B5Pccl?C{&9YL^?~3ibP#cU3)RLnG)^D|&@G9K! zfaUi^&Od2C?G9~}_*fJYj@()ry?yWR{N~U8qiHpF%4@O4(e1eLs|vuI zAbh9s6l8?V%6S9W1@H`rX}Q*O(>BBFguNQkO^c>;ASQt7h!91a4Rie>#LW9v@d?O9;Z*<;P2brWgG7RnKYRh-i~)*6O{aZzqiJT z04{fT5(@zY7%bz~LLaS+ok5L^u0rUsQ{G29L)L^eFhQ$~9kjg@)e_y5>FZ+ctyFi# z&P+2|hh+N$!%$`cnoseGGjdz^4l2EazK%T$u@cb6m)7Kip2D^qd|nIoky5wsjBJc-wJ~>V zox(9qhnoX(@#1qpV>Ns{NO5E#OS5LvzdVcqAZcDLdX=RH(3IlbY`Z4p1! zp~d^_jVLn$04k-9T_8BXFL7%ohy__R1O~M&Rz-s*UJY1*9j|5MfhLHKDr}3}5CJcU zyTIh9>|l5v?=>}->QX@;pimRSVISz)X2)Z}z))Ei@c^2FyvT zTZ9R}?wAJjDFA33w1<#?L@lV%d9++JsppYN?T0@(9jl}7oy|lBe`OV3@lU#Y$p7WyL8J=-Qod2cUchdX< zrDI2hem($B51Y9N&j)~XB>q@_@q;IOKiPSQ)z&J#y*5eZ9it6X4^`X8x_gfwSpD88 zs+_bd{3k=YnAyJB$GvC+6@B9MRDrz}LOR}$iAm#iBl5U=fcS^;d)m`El`o#-3uK_c zC;IPAabQwLMMvCzZEVH|y$fWOb)e~bKOI^2Ec;e|?rQ=iik|8d;>mD8_)Ks1)6W1@ z%Z|pw&tFhaX8IcMoi~9!7uw0zNbB?+|Ftui8R__9X0kx|4o?oWwO+TLX*r{VzIT8D zwf-MQe;DlUc2snUFfgfztGD1ER-gRY^QUNgv(p#iC_C9YJ#3;C=Bc7l!{IN|($ELS?L;V7y%?kAwtU=q0c%>yRJiy>VAx-Gnm2|{hDqAl;O*;7>^B#G_Qv(w zcVk!n+SkI#FW&p>n}O?ZxXafF`irkFIHB9ojkH`3ZN_0x`9(hMeN@_cH20n|IQh)i z=2r*#1)1pW`>W-Q{y?Af&_L&BlW&6-*dLo%4=T;)_L4P9HM?XSA85vWJ(_zf6p!r9 zhf1V~4abL4R~p&$*Q_zg89t%HKe3Ejo)uqshnLg7rHJ%~bfF=&?!X&ZCSRpIZg=?> zJzQb888*qv(WvgvGwDq3?*%+H#3<&;NT%hv%3_RnBcq>gy6UwF3u{i+YIc3#vEFJM z%)+#PjaxXU6CwI)6>scei+lIg0REMz!973x;Tc3sWq zRS3?P$GvjZdjHrX9<6+41+L18oB6G3rfYMd$dbZL^D&^GhrW z!0rI#5`ZcJOnBy@!Mqt%WVZ@T78niyHDV?N03kX$tv_EB3~r%5^9SMK8bBkdsEEPm zx_jlrKDqSLSUHU^k3J|{0W1*q#hB8Qfk#k4>{_eyf}%N+kb{Ha#Y#M5m4j5cg`Q0I z6eymvsFxzSt1NmnzppcGT&^UGdDa{EHZy z;XA60o1%sLn0a5;5Jhze5{8>mm1rq!CfX23Y1_Vt!t5y`d(@=1Ov;Le-1A)=Us7Dq ze|Wo^bx%m|7;WQE%e-$k$- ztfl#aO3nXRD}2f5hb<=%9a|{YQu~9gd6GPG0+O+#j zhsn?Ah+2g`=op@6tbn&mP@4l8y_}0^(s3PN!Nszj0HM!)ug9PZcpNg@r>$GoB&WHg zG&q47KGBKm#VNXB`G(Z8L(cp@1(xmhYzd9S?(Qc_X1U52u+h2Gi^Sj5I1t;NSXs0x zbpxJA)7|Y~@`Hfy^)6sm;{zDj77|(tdCz>bDit%_yh9EtoK1sjgP_=)88VM;?4|BF zXd~WbK+mf9xvS-qLJeu7V_`eeJ{gS0t+8=)vPRu8LLQ$9RLdT|7>1WE2EGAk4}hPy zdY*7JPS1>2&|noi&wx}yBuS2*K;txo!Qio;kD7-D2B*Vro93}S(2W7|6o`2hX0lEn zeB5yIgE$xWPTI46m|6@cz+_rAv~)N!vMG!<>I}X>OdFK9PJrt&)^=MGACIaarvf}F zAv~RlfQpUsaZu{z_-yu$(AU*5#uYSYu>mt21`0$-vB0`)T)=aOn z5R7kdN!56O2N9epBfq`k-nMCWaz5PspQoN*`E1?$GlY=2k!f}7gU|FXkXD-`V4wtx zYV5?m9^PIsHx0;+1;C!-k{4d` zO583>hNl*#i{oCq)g1>2LUXK{mZ3qPHGiJ#e$1>g1^gHI*BneV6T_&T!_X~X`e|@~ zV|HWywjNfkKJGq)X@uz4Bi~ItEu0%W0HJnz*|2ePR`+b*EtkG41h!6XI3ADs$r^0w z%xS(|7`4UE`m^mws`I?wgvmdzly2SeZr{BDCSb!SeLQr^&_-ZhhA1)Rxp>IIubx-Y zz+452T!H{}tPq}sBnS@HYtQ#a7vIO9(nwEy_bRxm>Bn;H+Q57oZe^4JA@*HiAv-*7 zNRdPTy%hgEyzea+=bY+dc+GYwq^W)qpI}~fS>gBqBBv2=<+0CMrjf2?Fu~Mhw2Kwv z8HkYzigRwkzP2(wANsX>W^j|&DCTAhDy z<&@U^UZMGcR{i^8yt5$j^VJ89oz2wzn6vP%JJt#-VAfGJa^Ga=DxFO&KJY~!H&)2i zk7rMPl;#Ty0Vm4RhZm^-uc3}g?tC+vJ_c^0E5NR>lBw*}6ur?8zZPEk=0E=B>?XTe zh_rnoe+1Z7r~&qDa4%CfsyVM^oCFo#6AaW1*K`!WksGYk!oZ}na?NEM=U+<3x;DEk z)mmGD2PTul$CIv)r$DfE?WAHTgc2dj;aSwkGBX?yVD9b(O5+F zbsblyW^xVmfd&A%GSn&{?6IaZROP3(2(#XNdwwaIKA_eyo1@Xs0+eHnZs^`4Zy-7Y zAmano=BnWH$hNgy?Qn?u{1Tt2=!%bQp2hp#*?g3X%@34PBBt10)NH2Th!c;^*6=jR znt)(CaB(@Rgz<#{p;Z-pJQ=_fU;f%bq&1nG`Ae*%MMGc2Y^zi2$5QjWsGyodImyY;Zwj{+#T`ZJ$rn*! ziGmo=`N_-9YZt3?^*e)2$Q}A|wRCyuVFG%e?<5KJ%ryq+L1N;YM+$&~7tP8~;N&&` zvt4QFrfd8A*dW9c{y?`sp_dx)KsaYDL!4}M2faJN{rv>xKk-K0y|KR-pOsWamZDG;(;ES@7-KYiOe!E@Gs3$`YiqnVal?%*z`(Vg+gVr|#@7K|mfpZHql} zgS5IGx4FDA9ae5U10Th#uy+>fPvTpqbqOPWKRS+8J-hkUC|~t=o%7gk%t>@FyiS;4 zg>ejV`|I)Ac`NK+jQ1!nH=d^^$6b-v=E90^w--uIYy_GNby~BLkqIpUwPH!xK9GN-u5&lxJFI_ z>N3=u1caDXw~Qb>rRLn&Dfy7B?P(j%ggEyY8-k2EK8spg@q&g3LXbmm8cu`c!r;`R zP0P8WLSYt!QD1!D7)7-L>yZF3Al2Yv?XxBkO7JE=0HgNQZwVR=0B(>|o?iiAmIU4V z2DGUr4X;kvIvt$DcBr4+or6qwAidO^wgV$6T<2;Q&4M^pEMEScjL40b{I9zpEP;#e z%@3K*Uu$~w^kQg4Ae{0WNc zhhkMOWWL42gfrLU4A(pBXDRhlpF+eFTTu19ze!CDbj}r`j@75cPz{0NPmmEm^1cRZ z?=gRVDE`KvFd7W`!eFwxI2xNT2G3s6C?7*KLobTOS*xHj>WUbJb>7W{`(B0KX)LGD2>?(2<83obxB=eomK3P&{fx_1g zrgdQyE60nJ?>bWpzxpcw{uuGl7e*+>?)~X$hPrb83-^8=3^lhi0(cMfUTHA~-G9`& z2ra!_1J6VoN^<;UBGgFyF7R97ECQS!CM_zLkA5({3OSkWbT(ya}Z>M z1%x`u&pO>V`H14-X-eN>o`Pds0%67<6>{%I{)Cn8)Zbw*n4geB4Uk3ycu8wKjlho# zMhU%O?d-ZjQ!$t}DstTS3#kC-Q%5(KkxD)Kt$(JOCF7L*8{TWR&X1bi!Ez2`EWBie zB4<0XCST$GUJDxUpH5cM3nh75{_dq#?5bmIr{9b%$FJ?}JT%dYPib8KrR943wUOIG zkaF|mnUDp&9O!{(pYP-t7N-it<@o{Hz)lqNi(mZ~i~-;R(CQt`?C2bFgPc+G4i|gZ zLDJ1~F>lne>uMV!XKfGWxm~g^`R0mvUTu!{=RYu~dhs*zJW9W6^&Vk;P|EI!jWkbV zpH}2_yyI5M0pgIs(XH~~1%JmL6WEqast9dHLLAWup9k#eY(|bmL|d+q{rOnPp*^$f zo355!)AgW6C+LY@jPdO)K06!ie0`3ORk&U%WSmJ$3>HTBTPmh?u-yW4<3hhbLE?pd zpC>TokW&OEKb~6O5SM3RYL#t8#EbS~_E{G;2QAJK^1;$_;if1Z&+BmfWyS~BgN40) zg+hL!nIMpa;QU5n(X?K=`_>_jvF2uBC^pH$qV&F_P1#w4WM}q?>Kbx2c)j4tE+a)Ac?wG71>KZ+S zF2b$i={}W+f64L~h-LRA>;i!>th-lOxvXO*C(cW7^-{W(+OFiAbAwfK%I32}3u;xT zdZgobPnLxycJE9+l4;8gIsG2u?`!H(m4w3cIc;O}$}Th_rp?teZ|k`SX0SSpD#(pM zPPXLM&J%c;kb)hLmn*nOdWBu-8RHS*X+>hhR3B{0>hmk7rf_#)+0*EtcQ9&o3QW|7 zrMK0+-FS#Bg_*L9FL1kB2n%=9@}+jny<}r)g~$8c;*~UTn2gu(@zAvT6i>tBa>xBDvqFQ>3QV9Z(X=V z(4W@c+*Re-3nc$K`5b3Ncy7cI-bFW;SdOZ5um5P;`U!nLA+~kLoV>hT z6530Os6qZk`0N-4ca)2h8aC)uefu ztM#Hh65#j+;0rw=TEt5l9o}LZyJMJUl_#0aRp3H9G9lLy(vA+ z7K--87p2*hT)rNz7ZkgnYrrKo0kJ;lDV^JoUPgkgl)q>#sz~L^@n9hsAsWp&J=Ht+ zBiO(U@m?QH@9a!HR=6di z_25{o;+;bRsxWCaiA*^(62$p5(}sHh^LJ!kuluW2B6;a6hZiZMvsAFwJXV?@Xqk*y z0f#2)GitY~do38K=0Nv!M#yNz*w%0vz)Q&7=YX@r=Cli%ZQBc=83aZU@LK?B!Oy+6 zCk5L{VB~NF54-z(S=+_EBuoIX80!k4tpcadz7POeB|V@83=jdFH34R4wwe}$b-OC5 z$4x|~?Gd430Av90E(3J1(4LrAJ1&WaXy7kaN`ucAgjx0_UvkWc9FzO@)ViNGR+VIV z*bL^QuLP@$PV&tSf4m5Q&QV$}GdVNb#5C9&a3w+<%x36(m=wD8k*%ECejn}tyG6CS zGgvscbRkR5`fQJ$21;i@0cI%V`uo|INR5Gc>qkgE4r= zo;)c}ZD7p;JJpBrJNT~uk=hsMw`Y@r&PHTi+gpsc+x1Ifu(WjL4t70zZ;s!8XUBlV zqxH$buXH^%5JAGZk)7}2#+W!2@+oUx_PS0wh_T^v66tL6&Y5C#DKl*pS{p9zb zG{)7Cks-8~f!Jv0CLp4xpwja?d9)Ewm+l45h|2|RVKQ=dTHs45W9_BTzrxnPwLO2^ zC;Y>&`cwax1o>B-Z~g5rUwLVhC~RqKSJx(mrD*Qt{@L)8C&>=-T&t5vOx}Dr^Nq*# znt$Q_7cuv<`iOqM|LrIL?mt8>e*dH0XOAEKXos9MKL0QE@J|(M=iteI{Wqxm;qiUv z0rc`@Yd-GxuTC{zRc1%tU-ekmJGlSJ&>u$Gi&5#1A6wD{%=oZ_$DFU-rlz12nE({= zAd{15BUmp{#Z)l1Oe71l8rS=He)PPryA=qByejOR6RHBr|b z5Nf!M*G?|Q8m`)HBIgW%I}32p1TW&LCqerpBOX0Tn)O$C*4)LbXHI_b{_QxMuT3e zwRef(E@aOwhvOf>2T~!Q;^$X8w?g*aZ|1+bEG&DIe^AyxJALr%`Tf~wIdMG@jmDH| zjMvCdKK}T!dr&)>vU4@?m%shn-@5ruK3WIAr~tl~p;tYcc<38;9&fr0n4wwC`X&X z>9ZjUho}8Hzn=@~EC^vCJXzfjOm|}zq#BgHxF$(UMoND6AY9sNCo4R#mV#Xx#h9gI{bzx!J zhx7nUB93#Pqpb-DyZug~0Sj%cVqS&LoA1R((qC9wQA~=L4T>s7PDM``=Ry7;S2oRT zt@SM32muA^T`sl9bUqG(t_8t0fiz^rYT8mm_6Myu>uG z+Ha$IA|5Rb^$CT=1R44qYa}K#8f7El*>g8CR|!%*Ze4hf)b!kkUH8L6`@GsH0W#DX zGqjt3>~SSebQH~Z+<@)&D~@S@b#!>t?@LTcC6hKo%@*czIYMBvyhs-@d*p4A*%F~969$b(uA1o~ zzDY7+-Gn3?99hEJ1T$1F6&vRf?oOu?u7JmtCU1c~P3BrM$MksMW!C2ryj!4JmV$enR&WiaE?NBr8)tNEuGKvf6`7Bb)s3Y>h`t(m zGOpDA=jRvQbLfx%M*06d+jxVHuRFQXVlpF!SPA;^-n zC)m!wT|6~|R=?JswOBneTAd|?5vUhu#N@G`IOE~^x%#RD7$6<@530fr%2|j6B(!$f z12Pc11FQF(<~&b_*CDGG9tz?WHcUy8(E2*27coOp#Z-sciz9RI*-0DYMY&a!yOr5eLp?}O2q9)A6c zH*bG4wFO*iie;I(>-RH3ek~(_ypvDP4Swud(5%_J-h*fUudm)%y0v+CYkoL%h`ss@ zjY+Im`_bXGEo1#4AxnPWV)C}B_*C>~KJ8|)TeA$*A;Ikjpm6}ZW2Ad9zez)dAWoV1J?Bs%nm>OV|z{sV;H~oo$oaIPpf}c={WD$ z<=^@4zpXs}-S@@c`}F+L+0MsCyYT*I^!MiD1Np!P6LlD~qiOlv8F$P*|B;rthr0E)19VO>WT2SoOcmPSlI_wJV`1a-3gS%nDinWe-lx8|&-8E^L_axBv_F ziaLiLeuE`Ma)Tr7TvLar>VjnY&_<)id*+P;?51yX7^-v(weozfJsdyfXe5`PuYneX ziGDCxxGRNSu<~37bm4s2S3~6xAK>Nfq$iG6)iP?%kWg@Mx#p1_mjm?>dg>pXz}(@& z4^Dl=dD8q4K59E3h>v%Xw$8qB+V#YI>9Y|)VPjp0ztc>|Xzu(S1v%F+^}{ek)>a?4 zmX$cVDr_f)(T%Iu7H+O&BlkujS-oTHuebajQfBzc_c5yHPPX3CF%rvc3U3!$S4r;6 z9xN!3-QLyNo1n(XRD#{UvQs!LYNCJP9vApUcBo1pzVp8Nofj{Dccr=i`FPkm`l0&% z#rF??bolrk``O2Z(trHu4-PK=12AWSpn}2*y%AE-SHnmLGZ=Pt3^cON${^nWR6@!R zW+<iecCN zq7(Gv5btdg0ZB%iTGqBf2w+XKJp+b)4EWAm91Zwx;u3G6#>~h1|NjKx1)OAgVi;)t zA7d~xW<)Yo7FTt5HO_PoXfz{PXj_(Hz4Dr2_m7Tkr2%I-iya{-v`EAyr-JhfZ#z}cM%P_MSy^Av%rr^wJ}W$heW3#!^zfw8N}Tk zmlhm6#AzuD%R5O3GIrJ`8;O?NNgGjN#KtJS#YsJO##)Y^uW8Wv9V^aXoX7pis!t3B z#>)pNOf4z#L06wjFFwqT-IyEc!x8$BR)~lUg42mmgOEQrh}mgXTQEHa+LUG6ibHP0 z@0>NK!@f#}Y5bKA+|7-X#&qf*jW-6jr~X*jUFwuhjE#?ubw2O98UInWEg%Fn)=6;=o)gRrc|Q! zi92~I5cHArgXv&EU|6eRX_6DS#wsnvE1ku@*EEqbua&qifU2U`m>z7W@#guoTB15a z);k@I+X3fW-9~Y~%G zZWZD}5XGCrF4raJ!i2|d68bBux9(bK(Zjj~Ir*sH%baNYF@<&oBF4N;5W1{Aeo8x2 z3|us1acm_|KE|Gi+7=L^kiL>$=z)3I!EYlV2FX{+Oxg6|ooYw583)ZHp|k5oWoqaz zVr%lj8Sf5viB9h$&F-9KId_Z@w{SjHi{ur%saR4(Ux*ws9J%0ofL1Df#{*6LvAob=S@nuC?9S?e%f_hisl;3hhgaOtQwgNX%R<#IER z$p+I{5Rv{9B)z=~mb~*~09=A3rY=*_sH#~dltOc^U^lHA?EDfkyqHIK)wkO>vS9~d z0()SGw;e1hnS)^(2cIFN76Z>=5#N-zr9Mi4F}Ve%0P6GQE8vLipwSyeFU<~FkOKqQ zbP#<2sd@-RokNll2RzZ0uVY0|t*JW!FJ`KCv}2#K9)%$khMWbk3ye7?plKb4Tos&7 z4irQonH}}RD%2MhHretmoO`>$S*RUN5kAmt@E4^I{^-Ha&fC9J+nSELr67Th-&+}m zGCoBxB8dZK)a6TZUa55bCzH}LAeuej?hM40U8A3O1>*w=aiBWbFiq7Q`6J<o-^mK-2SCYq?hI+3SbkY?-bE&xMcFg=8C$C+x!0R^FEfe)xa@v}MPW|C{OKKQcd ziWsOYTWi;U?p?ew8FmN-4IuL86`^8N3hY}GFwyZ)jxeDgQIp32UL z*KigZ%b&HjegJKYb6Deun}2Fl(_6%6^Yy|a@rTT{B-Hr${P+WYr~6)Hy>}`mMdMJ$dS(6xQSi5aovM_CNa)FY1^*$)@M6rD!~XEe=ik&ne|rCde|-4x_YW&SeY`(x{+AC=KKi!sfZ_CTaAr5) z9oeq-*%7ZvUi7`yVf1zNm86n;;H7Y2GlRDR&Mf7BV?`~zzIx|rQ~z}0X((OE`0`c5 z_ofKhb3*zeC!=Y&)BaNn1U(2pz|C>tY;Z1kA^y zew%-Bauvd)t)jG5&PfKU1{Tz8F5pw81ClrU8DT2sSrWkaFa0f^`4_~vBK!13scq|iihVgwX zqbZi>+@Re*Jv`B{=F1P5NB2&7Zt&5xeb+1Q{=JA2Tsx0XSKD%We-afD8mAFaM4mjzn00b+k#9d?O4NrU^A43-17Cc~iw;bw}WipPv z;WaGCd{i1C9gn56i~?{J(2C~;7u^tca1b_fR0Yhu-`l(EVsD^k&7U8c8-NzGN7GM<}-W4*rBT8zBs*f2^q>?>4kNOT=lI`kBPUfDRC;iiGE06)@vpSkF8!(LL^lsIk$ zc$dhNpQbM8iqa9wE-dXy;ya1jCrtB7FysxpF6{*8_b&##%=X82M-AO$MY7g(Z#cSQ znA}N~tud$+*!eWYa4z!PLgjsv#Fq8|VV_{zO4+3OO_=LG3k4^w-s#wlTANptGdC*s zrP-9WH^ucCBs7ib2gdsCCp97HdB9GcM6yKuBqBbWjI&J_fgr)S5GrTG*+Ci?x0~m& zF>Gz<+R1~lc;XeLtOnso;dmIC4tx>?$VoU8_8<@y@KOmNT74O|(;Y3eoFYsvd`0n3 z&t_j^RYzi}If45Hq1JW4f=6QWxTDMQME=Yn^4<%4bqiwr@9lPNhTu-2Xa~Y8x|eHb z#~oib0e!N4)E09mcC~fs5b9{Kph+a@Z<+0Ll8y&Yn-<)&s;(Cuf50%kSna;#JPVd4 zwD=HYe0sM{xA8=b$XMEFI?~S_{i2->j??t0w?Am~pDAuOoVfb^utYz99yt`IN2~~K z6kIR1h4W)&EoNZnHpajJzXH;{flQ~5a^zVJuII@kzBTEtY3Q805pgf*Ndohz-Segn zCT5FB^|yP*5FLD|Sf<bYz|1oBwf4OXS6ggM951V?h%VyB^MV1kioK4uuhqYH5fHdG)E#}l6Lp=?Nq z*)%nPmKlwe?4HA|IfPQozMdF(XT))Yx~-HYrsM!_L+$YH5e0W+^27v|j|W3sqkRn> ze3|y0S4I}g1e)X>@d7fzoFVPVtUT)n0~UtG!VKBtCamAO*no`BA%yPuV%FYB9CtyF z98XoKmz{zD6msg0-T;6zd9QQH#GDB#b`-c6G_$?+a&D4y$1e{Tw zpc|*m#nW!NmUlWqOmp<+5?82^6Y58SZ=c~PyNzi4;hlOAAzVBwh~h&Hyg0-?Y#Mud z;0Vzv0eL<(^w1@k2hG!%JKf*+&xiOFdYxc8vN!-ja(MQ$!4+p+9MgsJjH&~cpc=Dw zUriTWfkt5};aA-3`Sze`CnWq0B1qVu*Ecg6$8l!#`QZn(9rEtOoy>Xe!mH70Uv+n{ zWkcViPLk=dHYq#y_qRCnt0Eknak(uJ0jsdm7_8-8*RPCB^soAUt@rJpK7DZ3D}k9` zNdJ%j_YCd=QD7oIXPpa|)Jb>Gpc2+nc&_J`$nFpdI+^@FG;ReP*{kL?r~K8G^73~M zzt!5>Nz~^r_x%$P8?^#_uYZD$nzB=~0UCaLf&?b3qx9j2*51H<`?D$d^dXG`zNlG9u_W{v{gxwu4ZD?e#UmNI(^SIo9 zxcl_Q_sbvtHZmC%69{;B-dFr_u+?2E&JSoonPbGmlKS1qO6--SP>i1}N1NK@rSRxK zeomfLp8x5~^V9EK{PW*8|KwYr-`|=1b#HJX`Ar6}!Znyn4 z-fTMNQo3s)I)HlWL}bY7qG+^(0;Dk^@2SOKzMlBC-}3X0V*wCgU;SZY7>3p`S5(+7 zHcEw$n!OGdd8OIE$A2Ej8+PNg+B>Se)EhCvz4vsIK04p8bq(}wr76x1k3(MUNN;+^ zr3t#-ZR;CksKP2Hjj~+^1brRntb0jZI?HFegugVw+GSn}D|*)&UZ@&bYcnf`cL6~o z6^ENR1<^_p^|lA0J{hI$m-K48bJzi{}`F3Csoe;#JGjgRW=aOt{R z%GxthH1P2WfrH-2k^Z^NR4545@9nX&!CM_r8iCvez@=s<*zp{GsS8lwL zYJK@{z5RM#4qv`lY31{q^3h|ztJ53^gEh&ix<-Zt0#)z0!wlNx{#0sMIOhY0yd6UE zrUnGRs1Ioe1yliy2N&m>yckE%*^3pDo5w+)HsC_9QZbx57{=L8y#VK6-0-GH;oYI5 zTITbax)G!p0(9NZPJ&D1@JJ@;_5!XAX5%XA2Jfpz;-ORcg@Gs~RKuAW5r#eSHqPbX zdw>Nf7>9K4R2`ZXVGOV{Y-I$A#o`=p!vIj7fk&s8C)m+gsd!ESF7!BtsWBJ;Z}P!z z{+c59V4iNke1d*gwEcXbIov6xIX*iW%7Oiu*~hio)ZdL4yL+y1);hi7I`Q?A^jVK` z$M6ZKl2Z@iJn8miF=0o@7#klFAX-K_n7s%_@`V5L`7CK3Jr|fqGslfIa0PgA@#DLz zq`DD;54^!6F6pPZyq%zA%41Hebw)fUp$V+{$vhLcb#-8**gjNemwH{Hlbd!FPxZ8yEZzYa?maUBWk!SZ!_of z{R$+**e2nGWLNZpYaX0aN8GFMUQEGUQ@xS)dIEFZX$5zt^+6I5@u=9Tp()csGcyr5 z;C?CkDbDe5h@>3Z^Il*=bWSl7pFVW89W!W{H9kM5HWjSa9E$!~qs>?*O^#BHH`Jqk z!-Zp|E1EU+@vuX5EuS$Im+{HNDKrb(zPf@3vFOxdJi9wHs@8=zMRGNTSgxsb6o|9% zS&Ia-??i>73Og|9szbjBi1Xd>xqjaz2hRfQG~s}=$H>qKk1f}1WEfx1B52pF4PLA2 zxRddkP&GEvBZH@M+)ri;WCuCKyYBgPfUF;SDnHbSl7=E`zfjFKLphm2!PF5ps-(j0 zl!g=-+7#mA?L^)*N^%=;F8x)2)kjb#%rt+Z|myG+AQ;l)xfndyF;%5S331|Ud zLOcnCV4SG75y;zc`eO&}Y&I0cBBJO}Q0QglnIASLrPP75I&>SXH}$}{9~ zOWk0B`5Q?JD}u>6;JZR@X8E0@>r8>jfln1)PHwtReEs&ZuPy`yMG}Ld-oVx5%P7YX z?ORN<7?^GjpNK`<)xP@)mCi>Y+5zeYFlT^1B=dM3dAAy;Lb)(Cb92zD;f^=p$6iGD zfEEd$P&EPo_Y-b9*&7BV*fBDdDL}}~wC;Iw43s1>KJQdQPwM4I_wM;eS$gs9r1SP& z=xIJIS#i`3m|d!n9wIxA)EK1$Ux>_rGtu_-P4f9RAQ9Q;cc_hf^0WOMXx_tzh8KK% z_p?9#?2+)vi=(UWyb6U^DDgz=*8GXdF`qk@x8GA^C{a^cHHXe zSKo!CspjH}*MD`faWXg~K}i6Y;*jpn(-J0isQl}1`%I<~aLl=bkz?R>Q8Rqn$vM2` zN~HmWaddrF7m9%FBSW03t3XG52?nCp-ZqYkuFa974t1`Y+*~W^_cP%9ZnxfVuTe_2mlK|QP^0$-FhEeuU^S#N(WlTHub z{+rC(zvTAjn%TK`vT;Q4hgDz|qZzApMU7*h)a7U-w*LRV>R>M&`cQmB-||T(pS4hC z#TyzAi!r1CVwV3P2pvC}D&xu{dnk;oItRtofeUWUo=XYcBuwt3C9K)Vefb9fPvuXI9qYG9q%>M z0bu)x##9+Gc03AT+&NHyV+lC-c*?T}Kk|t6$)+)7#}h}yx-f~h3Et40+Ev5d^v8LC z2LJ~=;EY2@@xJ{sG<9CbqAD2bS)c~xc2S=)2bXQ|j6YFO%)Qz}Sznglb?S-~osMyW z0!C0I>^6=wpR!$h+9IR`*^v|&i{BSl=IlNGGd4!vC_^3tF zAMoZiksrj=TUxi(JQf%Ywl!`_Cq2E+M=r5>=GmFLa5HUA{jN1`y~?>6a`0HK;elgg z=UUn^OEZcu)g0;$$Z*?gvS;@P1)js^PRjc|37wSu!?x>u5IGD5UUe?apuGUh$!{-3 zO$DL0kb&Esw(?!3FCSgNgGfN1pOOp}bus95-!Rv;}h$Ni1evkvgmz(LiP73Mb!md>-job768M#GqCm)-`b(J99FQUb8-bZ8fp$J~C&53qm38 zr+^wV7hWw!@grywf1$Z5qIhvKNO2MsG#MoC#C;R1=}>K0U~}1Ai~wyxn#!t@v#sY6 z29_=ZUa@zN-J+%)2ZfsH1L&3>fh10-DK*Yr$N`fgfy->Wc>IX7jUB@dCHv#8$a1t% z*PM!@6mc583J~U{Wd;}%;7aH$f_TPcC*u1c0rzn1w8?m)+X@c2)+OLm~#(RT%)gKBUp0+k~ zOP<)BjenHA8C+Za(tB@z;oYwvHxG#pC7`Y=75%CKO)4NZL$V_$zlr<7=NI#^$GF*3 z1^!G8`G9ydhAcEjhfbvYcuL?4o$~ty93xR|W*q`{;E0F}rXvn)nd0gH%lG+wHp_98KSt zxAJ@L?CP(T-*mrm#U1qp76LxKJ5qw4kw5J{`2mXtiOIx5ZLJe^%d7j4ddaeuNdN!^ zhZa?J;?<(Hh4?FlOYzb|hKaJx%0J)T|JnXon-i5J-+lDtRgFD|GpAn9Mqk&x(q7}4 z#f9PG^mBP%_|Ct#X`d8{(KTX^f8WWl!BNXMa>7?bE2fZHB z*@mhPuwMlus7=JrndS6YRNFC^f8a$Ltg{#(&0{*khpMLxO$^(1mtAbCD z;U0Sex$FMv*RcVK*Lu+qOnZZae%Ema3%}`yogHpncDaLa6e;;!(ghYr0kaq^ z1XFJ` zZ~U$AAGp!Y&;a?3Yb#&w62qsVQTj}b4_|SxK8~FW**X~6h^dVb#@00%>RGjZXj0|;kg)1LeiA}5h<)C)#b9p^0ulNdkU z!&fKMiH%A(rt==fE$Cj&!?~tx00e@G)1`UPeM|&Hn3a@!%GHGJ)Fk_foHaThxbz(Y zdfz1-m$3CE(-k>M4&m0+N9|7s`?Vi^bM*1UgY!+fG&qPnPc^eE-HB7AQ8U=XTAmk= zH|s&_$~C;YT(!RLB|W~cCwy1q5=JdToa%8+jyAPPW$JuGz&#$P5yC{t_kg{%5rR_G zq_fYJD_ke|jAwuH@a!Y?p7b>iy9tj3zw>HD?qbfZ=7b0i04+6%Lu%BykAfx=y9`Vh z5>#>A3psou;ha`ku;$^YEwnrYL>tH=*f{}^pEq6o;|@;M`x=ScikLt`Jy!&!@RSaA zWOa_FbgP0@K*#{@TG4$E@c!6^Yl+SbGk#{R$Kq*sFbnHFpq=)sxGI*=XHm{}^H~nW z6oBN_eda_Qd%6gJS4Rf`P^~b*uy}vQ7f(J&1NOp^GMp2IH%Rw|l;<@kXaN^$abD2# zj!59uL{At4Gj)@#w}9J%+xpY6G3Xp??m|?0!6s1B)Y4tvd!4rPa_| zsLS;l)?*ysRC?60JZM9G(`y8Dch4aeiM@2Zg^e!rIG!n;_|!h$gnDO04r~zqkA6O7 zZyvHsk7O!1a_6)2;11JE3TA)G^AW6JF7{eR8YvMC*?=RTYljH#Q^?i1<)yain)&*V z3}cAHluo2q{a)vm4ML7M!5)dY4c(m#UvOdhcQI(13`^ePCYWVgN4_ zcL;M3(<=%);d;*MG5>@4s2{KGuSK-vHEa;1Xympw>b9-sBVwx|J{$P;AVf{+ zCO^uRASpws>Set#enjgjT*+63Zv*WTJQSA;$gu|tOlUJpj7D?T=ekF^$d0sY$~8~# zWdleIe!}gD&d}@&lbZ$I%||B-sTBk(Gs55)7EqAgH|nQMev9_ zCzayeJ5^uL6J~RsQgu}^!*Cc(H+t~c!oyz0m%-GRF5BAe%X-)fU^!JG9SjtvV^D75 zR@_j0e3U6TFD!CiyQ$I-DIKW;0Qo1lB-oE-v}r5tA=Z4R9RbM7_+K$pX2 zYPNz}Bd?Y{ZN)7D9YPhC?Q~#LK1MXDtO3d%id;zM7{ zyw)!&A?br#uzyFJlA-DeRYk7k96Ny;1or)3MJ|BjS$s5ZWZ-_SJ8=X(JB$hoiZTXf z^`)ui%FsjqP_;`tYB=F`$Hx_WdhmVX-UmNW2-a0C)~x4)eyBzG^v9=Rnt8u{QLm0{ zooj;TZ;y1}TETa1@z(NbZTnlhO)=jZ1Y-}8kDYrLC#4gZ^@5r-hH8b4NtUr52w)Be z+|nq8S&4*9P}#RZu4Y#z11DQc+kiEG76Qw zOo$Xyvw5vH2F8YXbyGvhuW%AyY%QvnE-*~Us7}Za>smB1? zP%MXcS|BlAIpDnygm!g|Dn5?aMko6oG)2&!CGE!Rcs6$4_aJ2gD;Np|B`nMi5KRW6>k;;)r z?u7v6GUi?$J=-Sly!Fd{n_VhVwK~#V-t}isbr^y|@yzQE`0)HH+LlFQa6wMp_1MTd!$-=R2FF~mMH~(siR}l72aK1;Z;{^yp5_F(m1tVn z+uU_Zng|$l!%R*lk)CG7vccq%@a}sHg`hI?8cR@&PWXO+Q@=C5*m*f>+dr(08e6(- zW^wY$)--TpCmoFwGNkoJKdc__ z_aw}n3X3T=f=%E09~U-m=*1I|1|DqaLcNVivy6513GBBzCtlltbx{Uhd}U&|!2CE0|(%Z3#zV|kz#?bjz>#ztSCT}Y$V+_B+1(CaKKLhf?0sX zFso9wVEMb!FDf{mcQv_GRKRAX@tCvAVFekv$_JOIR;t*9)2)d|@;kbT##nff zo1t8$cd>;zCr)d51;N^fwoKLAcqt}uuROTMnJ=JUT@Tervf8{VkrLqwt3jnYO*IpZ zCY(4EaHmI?p2JI|o5k9b+@cKW@j(!l$pkAe8Kl{J*#FcU{LJ;En{3M-Wu!xv?@zs$ zjpBP+{J0wkvSnh;Nvv!1pF4$C(H6X2KvR^}5SSV$X=2%+6EvL_k%h zni$;XPg}yh2(~y1BRU|wPFaTL78u4Bab%QJG_HqSLMPHmOsR+Q@LpdKw|$?`RCoaO zvJsy;-Fzk(jD=q=SK*piP!o=e@;QCNd(^7#gxT|k*buf~j9?ZF`gvb!kc#vM<1}K} z_0S}gsYAq{fFpT5yq_kSoAQA z#+%`w28>SUoF}@TIDj4~f#Gqz!9Pon8aQy5JTN_&@(o}eHGPM(R!`&6-6Wk1F0ZOrlIHcVFyz%-Q11gm zfp!M+&WLXGXT3`96>&n>!QBhIUlv;-AC<)}P&%WuZ;oGax@QoEkyKFa`?W+iipTvu zeMS*5xBd&4EH6;D^q)REeYUW<#|(swRi^#4RA?mW9S<1)_>bQIY*c*~d7>wCJ?w5Y=>0YubS8F?$bub#7<#I|kt;ePTHH!F|=@9Bh!BAKt2@lXh z3Qu)%D$^dQyYukG`9e*w2t1`Q<~v(*X!8TPp7*%I@7MslB?Fu-f62U@1jlUT5kv( z3SShwO%-mosFl5T52G4y0O`EEO!F-#FkIbB&v-u1CO||4FbIYI=;^;bgq~KYhu?k3 zrOWPYU{v@z{zY=`=JeXIv+(Ul_n+6F_we(5Dv!r0X!6BUo#%UEqa^&e-Yd0RC$TVv z`Fm%>D?tzX%?vn?`11}{F35IBqj{37dr^^zJvTX1AZ zBwjsqjgHDQ&HsM>KmS1Z-A_ix@u4Pe$0}7l2!umGo<(Kpo_7P=cuq|}uUF58VMk)= zofkJ_xxYBgf8Bk1HgEg&w;kd0KFrLABixm_Tkn4Hjm($+?mxKl>Yrcm|E1a0|NR@w zD=CkMS$*sEURCrBhjh}(y2El4Fx}JYB!&@+djx~`3sRun!DUDW+R2=qVQIJ?h7;q z53EydLGvsGBY?>PJ-jqYhEts8tXg3#)|T>w$*u}91z=9GXX5(Fg|7@e7uH)KYIwHH znNsZL3g5*a{tS-f(Yn_5>b?Z2ssU0Fp?r82OBdhx#X!2N2WJXv!FJYl25j(^Gsac{ z-f-SoUe|TAII?%9;CpTTpj>Ut#~UIg_@h23``rE_UsV6#V$W6Pw^ zolAtuvz2P<))=Ry)>DO#By2u(o5q1a{# zg@BP^pi?LUkWM0H09EsO;yp2z@YDt%$>vB2xR=7ZgT)b~oP?BEj}D7d*-FPjYiqKp z3zd1xH)&R!6fc2Up&2|G>q8KWFiyO|X50G?&}sH7xP$Bys76oX)22UU>|mj=A00V| zng?|Q9*pW2Bc~@0JV{V1hk-uz>JA*s> z9-K4eMBu5%dlLwV?3CQLTiOyj_HIBPD`I2osSA4kAfe=Fy$Hbd)~f-A^#uYa5+)q9 z{YKgN&>9#fi5D_KUy`VI49YLm&N}sBCD2)tq2ue~AP|&F&NGtg@?8j$`Y)OdQ=!o~ zgpD0%d^>qTCq5k~yL2jw>zv!0+szJF-6yUi%X`V-#@>3~Jf081jBPK1QD31v?#DLW zI&X1Lyb-!_z=$6Z(_(t_k-16ql^>1ImRn?{Urv#xLy|b?UaAghA3C?13@UU_EBUeI zP|MLs5sw~w9=n}nWBIJEb=ZX2aUcnGKF`0jMt#yX0cf*11>6x`=#9e&F%o^Rf(>?z zALJA?tX9j^`atrxy4{pEREk-BN7B3KnD=%X@@Xh8Ir^!YSJ6AXdw5oLK%G7q|4*`BrC0cH_X zqG8YKcCh`LUGzO&vTLQqu_Nz8Z}!b;f!}+MNgI{E$h@g zI&MF0JH;`o*yU~!P^>#z8!K>%f|^)3^Y$Mp#@N}3VD3XG%9?ye;~ zX7sk*DCyud%l5pow_AcLZ0VqCP@Z4Yd{=Y9Necjw=H`0!sn>7R;Dq859r zy$o)o@>i0Px0^(owWMJgfD5)l13+9@N7SL2ng|l|+t%Yh*!$D&#ka7QNv{%0TWJ};NLk%iqaM3tEHsE?wMLi8m|>mUr{BSnJ<@A6?1Y(G9##2&h# z&%RcB@g1U=3P88;oe_PFaC?iYTYIY3PlcBcU@bLx_6w)E>+Jf-AFVSYCw=x>*fU*o zVIB;f_Ygzu1OQS6FqrkF@o`oX%M(|p&dy8#<_}pbZVd>$ zjglSkNB`5m`7fXU;NWN2^D)##j=o<$-aeG_p_cJC5O`xym?@#{Jm{J;Eo`uWcW z`zQNKo>LqQ z*HQ`$wmE{bvFL6%3wm8pwhiLcKUDxf1TP~65djXuyB-GRHF=pQXB}tLMt!u*oQY-L z1^g}mP7Lj2gaqNBKJA`^@nHz^4sx{ay$3XwaJd{;BXDuc{2SCP%kfyqU z<%QwMUc2v ziUCQ6Jj8O>N&n9VoB6;Soet3&Id0q*Q{F6Ol78D-{u^$JJ&9p~In=bB$DL(__kCTC=KO9Iii$=$oBjzj@@qDv{M89$)( zfW5EJJCgt&h9Egg7JHpKGskyYpf(uK+zA{vNz0_XHMRcS46h=JaL}iV_~H4bd9Q3@ zj8uDqm|+g32u%z&rYbjlz?i@B-bn1UB^Nov`0NxedVMN19B#@!r@c$px$94+09!z$ zzjC2FUV>Z)edYTh_|1%_9H;J8G@$WmDgp8GPDlhDfS(fydTHAu8P!f-PTV z2|vOEx2t&i! ze@<)p9nw``sa8&G;fW==4Itw9H0Ip^Vb9# zqIIz@!zN=q#oOl!k=6}&5d;rEoEnP5hb zTLjMF><7qeeu)Ithy|aj?|*pso4;7Ry$rVgQ)Ta{Rz3;bUIB^4D~T5xz84v;I~q<` zsdM~W`AcSSP9sz{OQ2m_96$`(O<18?7MBKtW95U@P*7h0nBIofND6lJvCR*D_RW9r zXsdU8M$LMiryyOU4ljlVmwX?8Do<`*`pb*k+oh{jX#4$V|MkA(Yvzu-n7MRO`AIwb zAI3*-&M}haraFv2o7DY7jC&{G)Axo0QQ8e8mpbG)5x6E#U;Mh9IbK15?>XZ3>+%%Y z2_g#g-#72x`b~^@H}gfR=?J{@3>w`QP=%}h?%ONGEsHR}=J%ctzcL$?S_z%+64!wP@k&xFaRSI_)U4I=e?KFY| zNQdKr+H*7GQo`v)MOHnQ0f07T+%_+~Y@ED1Yt7NA@%72nrW&K#6W30{^5{i!?d;G| zm}4EnspsVG-goQdo3!%HYvmAmb4RWX#q3+qvpwP(H68ZeQG*Q(r&Au`a4^i!s<5~^ z>~z!ZP|@ltN~Gy}%ECdyD333Klr6lW?s2YAPa*v?GoG;CKO2Tk%FMf?J!+ibQWi=T zyr<;@w-UTIS$ru<8#gCH*;R|wDh6c$M3F}xeU9-~Po3|Q?H`;(i1SvNFSD6$rYWlGG5UMnaKO62(^|GEp2ed0*=9onGg&a~>G*xPNAoA`M<;YM|H?*gelWPt&V|qJ z-|uL5(yO73<*Vb${@$$P75mAqGoz6d_pQ*@Z{4~9n`0%^?vx4wyCt7Fsd;fep+uPg zScLNZw__=;c0wUJI5Bp3tfQX>=9V9T_zKuw2Wk!6e;*{)96|?L35JKdBs*!yLM;!HuORTfzuUA9BF2Z4ZWyhFy!XbW@em4yM*TK8pj;;4ySSQMu{8$ALM5K(pt; z8>p-!&%98frVlfdRxWdeT58Cr7vshJmkp-1-Sn<{;e$OKzN9x@XUOrCOm!KsL@H-B zIUTUNRhtd)ZVA$8@l|tf#6Gbip@6;R>cyJ2o?SUSs6=xPH*aXcBiR8anK)D|w&^CW4b4t=hBQGq?~aSO@hV-r(BH^VctY^~6E-1WUFgY)G;VR+uK z)3&cR7-y5k50z$690dmQ!cl@qh%MKGV}bHKcq$VIO*8BbXyvLIBx#S<+CzPY=i|ZC zWoCJMdY+}6_5p9s`G5g)pi?(eL4gI?7OJqaXfE}!bA&+K?n%%|j!Pee)7ke!XwEr^ zsxGxVk2WfhJ4~W&oggQIowf^uoI0pLk-L0$j%s$K0{xAnDoQ(&Tu{YTzN=Mly0Cke z)xBDaoV*gYAD0;{?p{|AR5^n6bmAz0`!soU;G-5ZxBD7}k=tlG=sdAD-WscD2yYThUUy zi(nzQIVXyzWN}B}-e7u~VFAITMQzo_T#Kb*BLNQ06S`20Ht|fCBpiV=gL%t`2p#WJ z1Y6}C?eQ6%a(jTa66~Blg%STSoKRlu1TY9dt{^9>d8^$S_`R*>$JMQzOiTVy-W!ZA zFSUAD+yQ(U&>0J+2Rc4*<~kbVSB!8AS7v$A+;2?V&P;TnEq%av2)M;vJ}4TovOn5A z*V35#s5+dybo}W2d+jWHu8MV-L%BZ=+;%k{C6V?#yIkk_^Y6WUS?j^bm5nk)5r-M? zRn{Hx_Umnr>0C7ZeE(&4dc_qy%yRZPV)VvenqHf|ew%O@0ASarRewCydA?9x29Y6P zVsLG?mKw)i16x0GOs}WciDx|BQmxMhM?SOTn~m%apjU5ONPQilHWnlM11t4TM0jSE zj5gF4@#gfyc2iV0kJ=wyRDKfis?BHmx%2ev2fME=T)qeTo!W~*3>drh@Ef-?_q_u+ z!l7~@YkdAd;=;J^DnR*T=+1292WETo-q+t*>Ylo>_7%<^nnYh~O!qd=uDyZYy!j2# z?HXR=@*;F;)&TXIm`$Y`{dO&f)GRb?oMr`+1vi%1@ML+%% zga@Nbuvt!l$q#)mYoh-)i3+W%O-6*~6M2BUosp!QKvYuI0b~+d(2v=nm}^MnkmXzy z17He1tbL2EkwjtC@!RXpTr!>cGpcp5t8SAm^iNLv9`pm^?0U&ruCf*qQkkN@H2uKWkw*;B(KP zHJ%GD^mS(1lD&tz%aefOPfjjvT)!)uhMIBo)Ca2v!FzrgX^z?*2H3;pNCX`@iblQZ z{l)p@Ijk4mY&+#FI>wtVx1CA_P5WeRa;4x6_Z(-UBDBbM#N!)lGdX{Q8z6g!M*tCH zD&sr3IC2+Xh48T*#%0uIDhc`T9U$Df3>oBm{hqf|%S4T64gb?#UWwH-d2 zWD8Ng-3dkyAldO!?vs*2(3e(bp1 z1^)3!3ETQr8b0;_lkTz;n)=4; z^1?Ig1a$>gE2rXoOwbvEk0N92qgkG9y92$wUTAR}!R{Qqf%wNKF@A26fNQ>Gdiqh) zHx3wY@@OLL;5ERv8GC39uW>UzdsrfBMZzC(XhU()@V(g9amU~&aYnCVTWAN(b>t-x!b|F=!diN z+vk%tmTrfVu+-J{7aV=aVwbLTJ0SlSu&xL(}rpt%`S zI{dk-v8n?`4lnlOH6;-4I*2F_V(Y?z&eKw?)865m{^KjD!#~&*2Q$c5u?qow@eS$o zhta>X7OjWEJAsD_LI35Vm$h5~3@OEN=2t#x{c&a!+}o|x%%1YGT;=bNr2*m#U84ju zIV&AV|Mma*(^Bfu1P0uLpTPYWrD5FOn9h2^2=1r*O=TzJ^vj&#fB0YSOYhzO^bh{@ z-re7R^Xlu4JX&oxSzKs5@cy0Wilg-M{obe7**{+z4;q6Kb13-R#Bh=(m5^Prn(=rIG5l&JPYf zYcq<976V`FTJHx7umd!i{h{{fU%w)^Pkf!2vb>&@cOPrYU_41Wxm{}MtE1DEYd5{2 zprSj`zWlYD{XZ?dvoe_aFH3Nlle6Y=(^y~Tc27g7D8r-u{S%g232W7ItF6f~x8F~F z=im?pBDPs?g`UYr^e}E~1A;F*{?OZ_NVnsqP`TKhT>z;jZZR6tzSebgdJhGs=J&0n zh3!K$np@{PQ3Ko+x--|`)sI?WaTXB30Kf*)ADw)Rx3C*4+=-hdKY;wABcRyk=^AHobF&Q8H~oB8-fEp^3v0ZQy%oGotYP&ja=yogu*``)Vc zXqjIgk9#sTwm)p0EhoY)WLgq-5yptmJL(NAJU6L4MZ#CA9o{)?%(WvQe5vul179e4 zhKuPQ7nsS$c51cotfXkpcC{HqvZMG9G<130^yI%}F#pY5Tkd+3DTp1K6Z3q?mrrX3 z6*?+0SD5vhwB^ZELoSmHu!(6^tT$esh0sgcN8)IWR-{Oke^<$jR2ZHSNOIEYqn1@48BGs^}QTcEZhg zt?Ua`200h-g(g6~Jk$-6;=rwTs1v5(i}uh5SFd@CU6FkWon!;cA-N35H2}edN-9??ZcU0+z( zCx2pdM8Ow|b^9s^kVyAfI;@Cuxx#%Hh~#whGjzCwc{HCYA z+|HqAd0y$2ig2spzfwp|>XMHarlE;-DLVd8kG*mE($3_i*Gl%H{M^KSI`a5*5cJS( zk>0)ToPXRi!oKBsU;F!e%v&iZq*XHh)iY=Pt*4XL&ntDXuXU%YRpN`{4qO{toi11n}`;zNSp0duhQc=iOM0b8_06`X(qhKH_ zWi8^@jj*Y3E=C`ABz`tFbLb1uu94Ms+eoh}@L`@)65@a$k%}C5kA3O$lTTJAq)SCbBH`!unFg6z=>?pbVd*4vc`ikDtF<;$z@~%!K#Zy=o$U{OiDIhLd=$Nr zI=FXfHr$I??8zJW#UcJWI#=0wzOXb;*^p)Doim%p77R1DJX8ZpkK3Ew^yQe<-%GTT z^(>?LNzfO>yT(k)h~=&{Py30e1JujDxav7fp&_*TbQ)_ACwKfIV$vFEF7Bn6j6o62 z(YoG@N&acm7fysR;6h*Ln(8&&p_U56*09|MDVpw-PMAOuBgnpp7H;z%k572fpgo@S)7(g;9QU*IY@H|rt?h`=yw!1%aJ%l?)d%E$`^a2hh^uZg z`uMLqf4w(#BybN^}cVDe^p7cE; zmVA@k?h(OgZFuZLtBv!4w;ySJ$8=n(3tauP^@p~hetzsw9jTbKgZ~6;Mvcf|jFcBk z$X*P-@#LxXZkGFFS2%IzxLk1Xb4sF>9t)=50@Dye#x8mSj19Ycj`YUHz*t_-sGyux zr$_B4r;j&<ix%3V-URLR89u9L+dZy$$65^ zV&_Zsf?Yn`{K1sd39@)RkXYfkH*0MeOlcH8!+Q6A+z3X))8Ue4KlS-admjgWJg?K! zUh^2TJ3Idy;d;j^)>f@M_4^+^di>!jk_kyy$mq$TAW^nJR+cWD%M#KeP+XC;p-z48l#til8K;j0+cAg*TK-i^73=PEoY)=~G={6iUi+ph|(UfK$cX$MZ1vx{Waa|r%( z^l$#+E55}5&ua$RfX}i&{p82e-j8m~z2oq|lT}pZ6lph3h`E zZA~8SoYuA%5q%s2!v>KIH^X?a`;6;{A~!;kZYBKcxN&8Axf0%u4(B^LF`>8|HmjKD zQQ-ITSG4I9m-!T|-g>lMN<}N&epnB9U$oCcsuArCt+H#AJwO=e5HQ|8N^#{~lKVyw z-^R6gGejO92o!~fZy8qW_7egA^4C|2g8#5G5i=8dOCTcyqwksbaeH_%S>d)rkzt~~ zRM#;WE)5czzq${ca}2=bU5VCGT9m>DV_^ljcX(~I0NjqC@T8ZdyW<#p zU=* zjf?L5M!0`!s`Z$*&`D8q$apZ}UGeK7d+J06F@LDXtGEoE5ccdnWDwM5u%RDYdT}y> zF}?+hg^};Vn2O00k z&KSY{OKkxs$EoS<>C>+5Xl#z<^z1?{KDbnBed6@aID&emX^R1W)2Oa}eWpD)9BrGy zWWP`Mi8d_>F`ME1pa{n3<|ub$O05)n zczi$9SxnvxeSRO0zV&i?426;e!zIXiv*PlcUq`TKoW);A0l}Z1klLZ|Vn-o;$!a_cPjXtQss# zO!T`XI)75Wa8A_d)KU>vhCuf$aef12H`gWMTtqLA$8 zYhVRB8+!AQZ{pQ_lXhdH+iB3&TkRW@k>`#eJyB5p z%pZ-4cxW2L|JVO zUeN!(7sb9l#!G|} zrJMxhxM+dBSjq{^&I02A6^*6T3oisPZMc{9{p9h#``7>c{l3*rFY0{PE|i7UBL1SM zi=H|=Nee7XcvAQIhu>JgWMI%65Y}sti7&+-zQ2d!vxf+QJpMhv4z;84DI0bCaYy~OJ|SK zTJJ{}{cF_elW*d^L4)kV6AuTt41sO!FVA3;i&C0)#PNm?lIjmU%LyNwS$wwvp7%cyArM6n5$-BIPG3Xx*DsJY&gO8o_x;m{J3zyR&N^4kK!rAe#>@vlx ztq5+RMyG3qqE4JHK%G`!q^8+Qal-s8_+&M8t+RFHGFM&3=@Z=V75u?eE};JC1Ha`v z&l#r->RND2&Kv3;Pp%MkRB^Pod^9}d%)(cuTRZdMq~*EPYCMV0LjSECnlZO`MRLix z)|7*^w>|0F`$XM2`h4j}y<^C*D3( zt@umbEl;@8JjKkF!+;M}dZv15LTo~|CqkM}La{H&iPLH8=d=~?$@8P*Rcib+b!QIq zg-*V`Us4jSmzzQXNE+);>F#i=dEB?06OAukw6-(!cJlxDv&&!3NcEUE35-f=Ct&sk zz!#k{9;lbV!~k$7mSDl|2oOhuy@fv6ol7R16$3cpdI3!U6!a$XDGvBK5)y}q2>yf$ zzg7kc1!R{$Qw0xTu*x&0fi0B*__El_xt6D(OK9$wxc4?-&s?dfV}bzu&B=E^>Hf8u zVq^ArGMcSAzyd5ZjvP~yHX#t;aDl*YX}&+kHF9bzBiE!vhv!49^$ixi=kT%zMm#Lmw({qm$qm66t<=+l z7Ql;o86P>xlt3KEPRMqU$OL0zVADLlUPKOlk{k{B8*cXDxScfeI1R?Rs=2Pj0)td8k+%2_*JE|u}l=CzLz67Q=O#(@{KW&&rqFRI)kz!}u?rtl5 z?q}|7`W{_-ZgP~ah_Mf)%I$^qNv{uLOu9_=&*NfR@m?*XJ2ca5?>p&PeUGuV8e^FFi!9Ps)-!aY3CuHy%B|q@tUp#&CXyon} z?N^&A%WSNO`_)~;gQGqs z1l9&rcRE73g%@;p(NG14cdH`5fw@OJHR82g_7n7F6pkf=?^FhlMri0}4VuW8y^Z6~ z9Vy5-O~UzM@~qp4f5B=Vc;^z_E^w3$F~AaG!Nnov6)pdg?`nan&ljs2HvF>J?T3oj za9N~ZQ!WNXV$G0i66-8Wt;=Jihfjd(dEm%(n9FNRvbtJ#K0I6l+~Q7r}aL zy2~7NKS<1;VCRd1aK99qG3^zt-Gx2#$#)Lh13FoMRn1O^T`4bj=poOCSmaQPEpfw- zL!;t>FJthMFP=08T1QG2l9B+3bG6w%4aVmzf(k}K_eHQ8R!}-V9!`oUvzfn~X{q4q z?YXDpJ@2|9@Ue?CCSYH@oEp%cW8=#Xi+Fi(=u$o7(aCO}kGSSqihy~V&MAL>&Vva~ zC((3&;gN+!OemqvmuH6bsVmY^hn{%0A~EpEpNlv;V#r2znw6K&{&Z6Q?&a>Um~SI@ zl6@?738_qxIcu~aX0tvB7?XS7`rzch|MPzayf@q#LwIt!Mj5lYUtIVGu`rH`^LWgD zt8j-DB}Br2aCAWR)Z@e5p9m1mJ~hsY-$@=?&wIi8-tR=t(hlz=F!_GLfr%$K^Tps| znfZ}RLRK<(^;~^Vp46WI=wvR^$p zg2l))<$d&#b8>n_dkJDSj=wlg=2zy0D`yu6S7di;nUHByv6b9hMmN zAunJ!6~vPZ3cS;Y7pT3De*drjJIF(WV@E|F0kkmlR(7vaKCECMpSo3>x@T#Jp6cS9 zG@^qy;MSCzYd9aUeSgBeViU(x$G!I}UP%2Qb*g8azs`ETniLODFVb(ZZSSp%X)~)I zQ;&zIF#^BeA59413rXpT&Q>j<^T8i>XVm8NE)-R=)bQ9ZkEI!U+Zp!uRXco`RRG3& zHb+O3H>NadL-b^YU)cX;rcja_Qz*`h=RwSO%})OK7>X7L190i~Dzz;g|5h1!G1lCf z>EKlFtLpUt9}LL?$tVL&!o5XiKEyvemJQ#p-v;ADKxbD#M3QB&d>@;a$+BywQ+tA}s7@{J*>>9dsEqcoAoka9VqE4ND(#V?-)Ik*>$@ z=nM$p9RV$o9UK}U?(y8#<()=kcF?YmU5CATkW1mlp?oy+XbB4K^|_Q2Dtzj&u;bpc zYx%6$_0xR&ev_{2*cWE|I_2;aAZ)USjnM+_lDc56=6s1qXPU0dQ4ei2D~Ia7Jssba z=YVBb90Gf~G-iq5eptu#-lJ-2@`4S#G99G0ol2Wqme2 z8}T-vt>f+dzx?a}(?9qtOV%6@w@3ysQ&9E;Op~b@*4GAQOu`1VI0j}Gv?Mv~qDq&s zls+?Y0s^##Toek>RRT;zK_ws`&B9$%n_=pppXm)-iLfO){PwVJklE+cZ0r2UT>15(C zn6zyq6%@1{>OyrxM)!JGm$(CvA9bsJLd8${!Rw>dt>h@W%QlptE29+7@lLh`xq@3w z&U;Y1H5Yi!x?FL)%X^DFQJlP2<0hvq&83s6(1@b?)ejC6<|IvL^6$Owij zU2+^7CHBE&L8w2Vw1bi3g2=rN^E-*w8blRERgNm2%<4F^EQ=~ZFm%|vM7!N6ji&)Jn@S)op6w1D?#rP;UB9wq< ztvUR_If(45;;abB=J4xS@GLsp*abd{*UKZS&|jN)Pu*e91z>eZhULM0cD4J%^OZ%e z6kb8Fv7(-)s`U}E+-gg^5;UW@94m6SUmaC(*Sm%IcX{iM=cp0xVq8tPk6VLoWA_t zaRQ*tIahU0st~Q%$*t9Vi*O(H4*4|~jb-S~qjs`z>7Ihe{d|yj_30_5Pkr5tl2f5! zO4egntGi)<*Ko+aK8Kuh5spAFPiPa*eZrSE?l}0tDA%XIOE~A%>XQ@E;c-CKTGr*2 z3j+=KEA?87HC_kP-QYax?*c#479Kk&s^7`8D5yAli^I|YQJvAqK385To?@Ek1P@U= z?2p{@;qV2I0DAz87Cg7mLPY1O3_C82Gv&d14fE+u_mMn$Z$zKFB%dFj0&HXh-$VBR z?;=a)=)_(qt~%Qtgk~o}=k(fBs_INqj)A*oje*jE;jn#azix-~JIT{^=_`}+!P4p{ zXH~ zs`gkpPdv5toEc}^V@yvs)}F0ApH;EBfNKD}j@H>Esv%w>ez3rg4^jVE&=PFR*A8@0 zK5VCkj-z2Ii+Bz%JiASPhpDWh6^ zJR7#AgozKiww>1yG^{^=jCr_!pBcSVsm&FVu+n3*E7uO6ew=~z@Ci3428)k%r~9n` zv*hs4anHAonVq*#^tyNOe^pY>{H)R#!hzf+v-PtvbK+?ROt!)}iwEQWmGA=pBx8|S zcYJVmZ#L*q3AEnbj{kHWKKs@6tH>;R>D4Q<9xv&C3`T+Wki#(-AkI1H)n?W2zRmoK zt+_g8;+YVL0zwYa~t^t)gA#|eLJ-|e^-Pw2Z&Y#AJn&M%K6{Q%vn7RP@T{H1ds z*Y|(>kN?8}^8PA*d&lj#b~WJPW{XS>TU|T;m)}Qo*x zaPsKR4=ZkP`y!qh{I@5MDfl0yW(v0R!Q9~RnHTWSV6B7M>8+C=wEbIA=Ib+4bBB%I z**g4*&no(m;zoA2-TC3@@Jh#jn}(Ghqbfie6H;EYvV>GJ25ti5t%0wcUHoDa1^?~+ zPyhJ8{_DoR9Ry7fj^cgcc20F{*BO)B1`Mdr%j+MI@Qs#TV)_%I{6y_6rhaMoWXC7U zUUoe+ay5;^(%0J{g#b%>wf61LzddJNOg+cx#WmO#f7+D`p8cLR5GRduR_^Y7i@zZL zZb|UT6pDB|ZkK*Cs0qQDNad3~$T7}M3mM&JCzad+llr!&UNeR_a zX}4dWP1;857o4wsz0&#ZAHB$=zHs}>It@@-@nXppk=x_HkJS_RZx)Nyc2DkYTXDL$ z*hEm&&8&XmCNna?7OXV(*5CdW4Ego=uS1FdaPs`}8}oE+G+so*uq{j@u4qdbtTfb} z4KrU}$?9WsqOzRopJ`ROx3KgXqK4++#$@@CrS*d&Ttsq<3U_w3qx8ZvL#>{)QLf!2 z6R2S*A<6Rkm`cy? zSb(0o>Fg=oh@GBYEVK6>m6FSDV{-JGlQgHRLc^vCFKg2zgNLhRSJPYV6}9&q#nvap z-nmG|!kNp(uF#u_u@`11`DVLDJx}#!j#JkW7iA925jG8BO1z~Wmk^*WCVuOOzqjQ3 z1|zVj1ND~T^+dPFh9+H+$6>6i4aoM28Qvs@VuJRTm5$5fbJZ7+gw0#e0GLOk5kKgN zz!F*>1NAzB{pq-s^aNpKUP|o1<5<@osHg++;aOmBGQm8sM|{8I%+0Czn6TQQY=E9M z|78@MOhC^uP?~;fgi0acWZmcx6ac7lK-Bt~Ou!B}v}TZQ7{e^}*^~oelj(>W7j!0j z?HT|KLWI+<4#9PYJOCtQ)?H~2cOLiB?| zp6wwHv=&&LpxyZ(R9Ssu_7}|Ub|7JP6LCrHZ?~&yc77?XOscLSJ&JQ+3af+4G#GXB zD@d&F84Yz4CWvqe!e)R6oHfMspy=>nk!0ztDs|C-5zc4I-xXekom;C~^t&w=HfyKO zA5~Zkg7VD^-^8*O`{?+#PpDHG_lV+d_!(^{rl6Y6KqbyT5hn#_Xbp?&4*9S#*+4>h zA1|~w8<;cPw_nikQsf|d!i8>@@#tY2yJ54xw9Td_{?Bi;zkz3_~`;-5Qg@tD6ywiT;r# zP<7FN0aM{f(dvGG4qw}!>Z3jcImY`*IAnJdZ6?9TYUETgFXa(harB^823tDR5O;~9 z;w>7y8j0-Lo6*J%uVC>PM?+Son=_-;{lIPe8jELsg=tv9Hnt@6eW(m+L z!O>KX2qNW%KkTfw4!uqq7O1loYCdLX;o6C2Rk@9vIkm=e8ar=8STqJVo(^d!v#oDg z*QV>Ax=(e(z38r=xxl4Co`+6cZqIJ@Cr*0f;CZ4D5*t-$W=$n}-+`2OlEaHbQwsT^ z$-`GeX)cA3o2jTS-%Jdxh3Rk^JVaOM8q#`jM4;0bsXe{jiT_MKe7E@1 zSH5)P=dl=>NlE|=VgNl%tKJUY1La`pH!E%h1?c7Udy9YgR$@91Ek2QPeb?)F4L$+a zu5~8>IF+n19ciS;yx{3czuI0G?N4^vxf=t^YEm~tq{oikaNM!y*OH4I58SagKjcd{ zUXusAR$Aqau-utP)F-MuBhp!8ZQ+;x=Kqk&1l~Iu&;H?uzpuRa|NQD}Z*Nc#@DLBZ zY5Ta+`lT#dfAgyw|Ms8#FSPZQ-}=p8X;9#|V68qJ_^|EQ*&E+Em;V0M?*9x%-6^k# zqp+d5fz$M0*h|N2!3n}zprW6K+$1%;R&Gz&!Trzve=l1vWNKtMO}sxy`j=Q1{;_ms z5Z;1MhSm;{(+nv!A(y)ZCb>5@ZZcmB9zA!OQxv(^&r%^%o4@u7{ycskdk`;=R2^J% zW4^o+dL{}>%e6@Hzn^@2Jk;nhzke((U%n!9-3vnrJq6fx3{8hjySc7=q)Z!wIYb)v zZi04Y?Nag}rTF)~t+iU_6}qn_0={!E=Fz2nzMVfC@A}`onx6ggdg5|idKo@8?_O&7 zg5lj6N5r7}&AC)Bl?}oa;AIvA|#Y zxu4Je{;z%It-t-hhc4lLkMmT2)_XANf5~_)uvvecj9jMOh#|gE+Tr=2mz<`+&f_+6 zcfjBMr~g>;COO%7Qga8@i_Xg}%Xyt&P?++r7xiI_^>!o7HT z=44yHxm0BtYC;D32U8s88iKCfPW0yk?rwP?T!dA^?G3a)*)-s=q>iZKIStyqT=HZu zDJIu8cZTUpJOPe^gWM==SHIxTZO*ug>*M4^ZAt#~SEAPrho=oPJQHz$;1q6a8HrxT z3J>|8wL_|B!@W3LNE)mqRo<8RD6D>T=#?%tZpYVt?w>t*XI3=M+-MX`h3=Nq=cQ=& zXdF~MVh{u^kTiUcvqWSMp_lAI0K4V^&%rcT2hON<-GC29Aix1H2_|DnG(3q*08lXW zd{obnkWPjX&nQ*)UATr0lNS*m(a6C5r=l}81P9Ccq+jZBxE2AKQ*a~#$hMQXpELl= zg0sn~;+iP{umd6(8TPy=9RO6BUH@){cFlq;UU_Ce?)R_YXWl7<1cPgrwmt1sT zhTh3z_a5AweyMJlL8?=q)l4LSvZc;;oRl>v&d*howD-Phq;t4IJA0ll`KXhbNsgBQ z#y;CmdaY9y3V#$kPPxNPJ~|AyEcd{LY+!ve<1$X8Q>s3hM1`+NogJDj8|p$Ng3&!) zfmf#(t|M$*Q%dB-$9P3L6zB}a3r?-kxHt!h!f8DpF&A6~|pp&`G!O|35)^ z0Vr8|7zVokVX8XC%vHB8W8d4{(>PBoeqU=^iH!p6tH#;L5jX6FM~DfZ75a~w6F<_GPnYW6 zi89MBD!6jmYAn!HsCul0KRZX~xbv$19_EkNrU>mok~Z@K1D^4qM#PiwQHzvkI_0~s zghvN%2%h##&N?BkXN@7CYK2w;$Xw09AWk5iplHmG4{MDO5k=~5`(BLArRhgR&A_rb={tTySq~J?U2aD3iW~OAugntYcN{K@jf7` zJ|i*4RXb7eJi%)p?G=|ErwPa57!#u_i&ug%X#hKgWHzP^Y4&Uk(5WI%$<6}sbcZ7a z4S6QWFfj<^HVke@HqS_E!})v$gHDqG0RR9=L_t*SUv$>maE~w(9<6d*mD*jHR2UL$}~`K96O;(vT*zG}c{ojt}?f@ca@Ek(3L6 z;zE~R99GG#twfh_&{$&)ikAxRoV@$TKln}^*uWaKOYSCr7-jglM0mWU*^qJW{h+KZB9M1Qjt$W(CbzL2TwoIPzlUS4ec+}jG2E4a={H}B_w)whd;Y6BPCu;wyhuM>p; zhnK%UWehV2C6o$=*UxvJa*PU8{zCJsTT~)(?6=jRNbAOz?Hc1a}xYKhc z+4`%|;$G#`X0ZxYEr%yYmTAp$fRNX1JS-qrXC{YJViMZ9s{M~&|MnmK;h)Sp(!4Sb z5IK}hIWo-tQ^8niB>GqrC)_7=mrX6E8V85>uD|-`4}Rhwe}7WTZr}N2SNc$PhA_S6 z4Lk6HcSgyrDuHJSM<7nGg%QbR#hQJ#{TEuVbVP6_xQ_6sgO&?YIK2Et<>cSz$z|{K zMngRhU$)spqJPRzIhaX$t4B)-PDSbTWI#5&g|zf#IT}nw!#|Vs2H+_fT}p?4@7eRe zdWob}exhx-{mPI?%EL{4@)z~nvk&Emf9ns1_(T{ykY-W=&Fl*YOXt;|#~Ys1+K{5R zv^X;AD3F_UsI_vJU=oeD{^#MZjQ_#qzq&A!U<=W>bInQAa3P0&-p~50((lc zAO*mVll4fbK6M^KAi56tBQcH`g^bHtZ}@`o$yCo{4j{;HwN6pI5KLo;IR~U8;K9r(5&vWW=rmuYTySABCfO(gQe9AH zc^QKcLsrJ2jthL=NkfxZwhlVIVK(H`)DvbVSAv0G7Rqro1#FUK{Ni0245SXuVx;bc z?52R)5XsKyqcqCje}+i}Uyx|hmc$WQ1)j4SrvSu=>!PV(0ZfnTBE+ax$Dpz(nxa~e zE7W@GkS+<-5w(37XV(hEdlNH*?y1?!?lXRH;QumNX&bgXR<1olp;UH34;O1a=A@fW ztQEOJFsR(o{BwaxY#ynd33}Tgnb4=>dX`aI0gpkM`2C@fj9Cla)KR^Fe5RGaQ{z)_ z{ngImPVlp|RH+7ft1nzcw^g3afcD_fq{7Ftt6X{0L37|-u=@q z>su(hYmtxfb1x^bPTNO1G3v#b&v5A>+gO`K$O*1FF}nK$D;&25tSB3s_R{X5xVH{x zRVPrKt+p9p=Z5aOKQFcJ(ZmWRMxTbu-eM%Vri?%JHdd;e0&_k)Fr-c`ncm$FZaI7E zK)8@iyPD7SM%-RJsy|rXXL`=08wWVfb6KC-M%-f$!E4zR@GNKKt*$Dft)QY9M-9>m@T$`>UagE2b0N zgVyvKa^Xd@u|DUoTLhsc@#keKNCtZP_zIXqQ_Y=bn_Qvnig1|jM&TLnOo{6!tf?t$ z5oO^d7k3w%?mq1i z1jK0F>8dl$5+<<5$Gl@9?oK*_<=%?~%ns%@yY#!?{qCRN{dSvbNB{P%8|yyyk(SGy zA4t~CRG#cW)%bPCp*?m_41DZRIN= zGr7UAKi50II|;4*?I{1H&(N|qaz^A@0}oz=^n-92eQvnt8CPuyoe{fsG;GThlHo)f zOIXc6E`cCOX;#%k%=mf#c*Y=Kv|S8EjVWp-P_XH~yEWrUdwu8qZhtzj>goKKsOFdc z^51#&(DN{*N1G%z15nLM%1keu=*P1(R!7htT{-4W>-AWdFqeMtsB<{Hvr`H4Z$y&2 zpfi!w`gb^ldT>w`mP27=mTNc*mb_)1uD&+EmO1@5$IpzorGmf7kq;rD{8>Q#QXSJ1 zq%U$|9*JdN40|ygB<0hyGtzefOizK;gPzmyU$Xlq7d(3U;Ny4y z_~NgAGbEwvfEv_d?ty!QW50))(tv zcuYTgg}B5F!6ypX2zki~;E}F_EdB5woc_i4-o`!G8`2QO2@ixvB3Sk5@q{?dx4x8n-Lim%jOH|KJyq#j6)YvVrB`JH3m&v2?sw5Yy4TMs)TjZ5O8uE9Ax- zmQQn?XM-0V|8+YeRrU@{;HK~MA;7}EMDGUSw&%jj_v=pQ!Z?kM^V-f``^*=(nzAA( z?Pul}7en3$EjXTwE;vxHjhI9Pq6Ze0CfU-xGP_M`0dnwLAC6@^_t7Um!mCe>@L=1x z$7?0$@zD$jQYnSD38@>?dX=Z%%(6oYD}6&^!lPB-`b6xN$gv(n%#z?~^;QDWyRKaJ z&;HvF;qt>@__Z&GbCI~zZJmyR8WKz~x1%J@D!=uW#zPmFpc*);d zaTE@hT#-ZbmFZvp2vVaGu@m15BH^boSVkUpW`gmKdfwjRWT0Ae7eWRzR_7L+&>*C3l2E zmWhBE7D6H9T&|@ZoM@YX%7AH8kKGuFMZiD9ykJzcLCFE2v5u>t4U;*M!2!2NjR651 zV{X*IXKDEio&(L6%dSZx$c%?o^hxVAQ~Qo^amzJ8@W?ct>${0?h3ttEGGjNUNV^NK zj0-zU{8gvEeKJ`3$*Z;FvTi0A(S*$J?Ii8}NKPeEXreT-ZOj^yQiMorhyG?4ZO@pTaXR9H>!ttXhAPdSyL# z{JoZ)nW)&rK&@iSr&W@P(uQ(O(KeBa=+&uJQuO)@w+ok|h33j}7ytk_Y%{DQu} zR3SH#kz=FduzUz5G=|xzzif$z5WLJ8XM{ahT z-7dD>FFyvxWw)b91m)?6uE``5QyYDF>XzMz`Qef6!2?0MnvL}Zipb_;yJ9h3Sqhf; zCwBWv!q_L=x+%Nti^q4bdV)MjUAMix9u7UN%c3e;6mK#WJ}T)+=X) zWY(i^W6;ZP_0iNo9xSA9o}Nm3^Bc?_yepQjkh41E2B|&{&>qs!21f%gDkyHqF$SV# zQfNvm)|pq_ClHBwmblZZJ3F<8;emO$*7A%iqG`EuA$eDu!XvSDG@4#u%~NHRC*08{ z#G}Up*LX_#3Ndx1Efaa07;ery>GLULZUwKldiS0y7g0p<1dim2>ev-kKL;3{_}P=kgP?<=M;uFWyR1~+pcYN}k*nB2EzhbA9>Zn~d{Pc;(_0<{xNn{Ctdz~98q}j4F2Q*d( z>ZHVEHvBBp8J66~+xJk9enU+-f>(a&ACY(O?#82k-Mw5a{z!iD{y~f#`IciH!?4wQ zYEh<(t_`=oQ}E_>AO9Na;L+S0S2MQ%WLYO#`cVJ`Ir8Y#nq+poow@n`0UGsV%|16b z`Bd~tW8ZRqfOHA-FYdJC&4&6PPPd=CDK4yT%1m$OS0+UJvv0YS2I9IoJ6Eh?nUrdl zgdhFj;g`SlZ`kRxnW-_B{CP!-s`AL=iDppU)=)llz*t}GBZaIWmz?h5#rf!;L=4Ek z0m8!}feiX-Nx4@ajL_r8@bQZeJ)Tf(v@cCxKah@e%=Cpnj&I@ zpWO zn4elkR?njc)fyLIQ2xSn_Gn%zJGk^EM*3cDh~nx0{n!8Q`r8aAhCr#~AqSvVVhR)$OBziS7r~B7e&C>&Yb;zs8NXC~)5)t_t z!UTu9<(-wE1`Yb&Bn?h00l!_cwlKE1BfR*J^5R>&H#g=l&t~r4smmz8K-*3BjvjH& zm&3vN&-a-Ai|uc(ZDrrR_sHd8p^C&gdU0>^_>}M)G9OU{mv`$^{pf<)eTK+BuPRsi zLx!CldL$C4xIbv@tCBzJ8x;hMonCj%eY`V3=pppn6VHxXO&a#P2gfiu0d6XIXN$P` zqPc6){IHyh*svPuDTl$=KZnTM*%wwgv$Z`rFGW-Gb|E&2jv9flBn9}qJ#x(%!%3Lf zk@{Zd)m2KK0PXWbZeJw06mYseA8|D6PK;}IVoR}1>8w){jbxd)b8#FbFMkc^A9YKosU;|6He=?SSdbfi* z2iWvR4ZCw8mD`8tr37L>EnjEjpFU}D_AP|11zLFttA>RBiN+&s|710q|E3!esYh!^5CAkU@8v;`PXG zduH#&5V(yRJSm4|vLuV?@xwbF1a_uD45-~ZAFuR?a zz+_rzGh)5#FWl?}`et-+MtR{_v>aA$b^Jl3^LUKH+7>$4MJU_!k+e5{-(54&)c{kj zopO=+w=ZTQI6Vm0eI1uHO|1v}V{1`TtBTM6RR}rkx?pUSjX!=5Tnx+iPrU@+#oo9u zesFl?pF(n%@uuVgme%FsOK+&B``5bQ^8I3}j%bf|fWV|=^Du572&z)!v_6?A- z+u7y%AkQK?jttP7G4W}5%fI^(v=uokMHL$HQ^wTEpe6~T@w-xeQ`5^)VJ7xQ?Wj)I zIAQEk<}lxLrnvw`yZbJ$%c)G=djHv{(`BzsugK+UkS<)K;8RC_*VP7sR^?EikZ8GD zv*BC7C|Q07)oi#gAeV`@t}G3;wjy_RqB77gEZf>11Q>48LJ%~SlO_z=ZbW@SvuI88 zgAa|svYQS&n)|~QsxBaHgL{*<&OP;hK~GSOvl)Dm=$}ulfb)JzyfHXx&bp#cN8^DH zdrKeP57L)p7Rlz$hP%q++#46Kk-~uJ+^*r#)lkw*{o0k^_^G%@NwZ6FP%YmjCiCzY zIO1{gAkw3Rl|UA9P$i;e+9=L9qbMs6fTmt|gv1Ln)m^ z9SiXAQSqb8FTeGmtS|U2XNBs-VlB{JKxMOWR`6w=`;=?0TcRBhh|E!-dKjG2LMT(+ zE6!Lr@LM5lMrI}i0NZVv$)KR+2BW$oL#xkWLR_wkt8 zojcF@dOMv}V=Q5oSe*viwEktxK2CHi`s*kq*Gx)(LAk}FKRVc2W8O2^n-&#_qWhGb z%U$wc&oaj9EbjW#_WNIX)S3vp=h)Y-6yDkOo8Yvc#8%vlV`+hMUAS?jlyY3!hq$(I$A3*(Uzj^o1RCKx~duh+W*(C2drFnGe!5TX! zf@k#{JZZPaGh@;6ts;fu&@7sStMuQJWAYR9_QRYA#?TrU1K0S*KOVi+zCqS$VbH&dc{?YejHGXi( zrzCE?Zc)LwnRL-4=#1_RPrkqX1p5@ELAO8SDi=`J7IqH%LVA6^y~`)vg;>C0nsat- z(udLTzxZ$ep?xVvfD7(QWw1*g3X*9*eKUugpDvK#$om@QR2fD?+fy*> z4Z+y=n{y^Sa=uOo+(Mc_7J`@)J3wGk2i+3?!STt6O7Pw+Fy=@6A}V*K&MppWX=0?AWYk zpIUqdP-U<|ZvCh)2GcgnAy2E@BrWzGX3+fb!N@&d4S8AQ!|&Y5d3I2TxATNbhDTcc zjZ}{E$sxtT%QLeV=$X;SgWkL%dHor8bvQP1gNj(4ZPvT7jqwV&P=0FLAi5aC@Mxcd za=7ZKtNmg9C6a84=N|&3h1W6*qmPoISNX-=H`H9W#uoi-q;tHi22#cq0%C8zqzkSQ z1E8yTY|YU1OExm_d;Na>u-00#=haD<=v3fYPb7m0%&c?1a_}ISO?Hw@7Ha9w?Zzgj zXp8Y-({l7Q;4lcuPbbP9qaBvxz###-@6e*EOC|$yIst*Q3*@6KXaoTZvXb>8M z6NEAZPeg7_G7H1nallE!G5;|wlayCAUA{aYB{@)oh2@zb)`H!8&*BxX6~+XAgYA#b zdQdGX#t?cWcp6f>+1G3fb1W~Y7@9;I#=`+r!_%FxZ>q~KZ=F9%$m*SC6T0E+XX&M^ z)KdsXSXtjy+;G@!E}n++Zl zBqFZF?Im>tER6Pi#{1nV*4`kan)n2bRZGpWFKAwJY_AyDIPI~NOEt3V3F}d}RK!y) zk;%d5+O$6M4{SLT%LR6b6;}+LM7ZdF2s_L4&S=55#LC4FdNjIl2*iYQDYRsa0}Q~? z2Crkq7e1#P(OUOT5OZUTb2qF6)-6OK^bFHiH^w!~f2tOxrHHYkR0qMSLnofk)u~zb z=``HGgq>L-Og)FEi@A%+2y@d;Y)D*E4`(i|?6*A#%G<89euW@qV$vK{A}xH2SCVYFce7@uj<=$W_cNkA|U+mF(z~P`Uk@(v=o=G2T1OP0@#w>ZqnkSp}UL{ zUSafb$u^5VAuD+(v>6!Ie1~~Y64x9q_`2HKf9Qfo{%mB8s25y$Nj9w-3#8M2*kRTa)p>KOzq-hd0(AdL?}B%Ilx|M&icmYXu!{ z$yO>hrn`8Dffhh&8h24`%mv(LHnQ&NvhAUMCa8S1(Aju$_OxgbXOs%sRr*FSBas_I z+_e_Sdt8XKv~w1TF26kCE2|m4D$`BO#x#)Wj651SK-9@ub+B54fJOqpIKnlg44mao zAY%Z;mkVNvZ98vxdK%;`93HRCgUASPDAfu9Y7tW$g8Vv&Wl8d@g(Is1r~LuidM&qVpk8XT|$5`HM;-!?ZdO~ZQ*2gqpdiU z0`QS}lT_-3ZS!opS33HkH^2?HNod}+rL$F~jzG!7G|nz`C!vM!>`}R}|Fb{8wUWP- zieMfd1b{Ee-6WWV|1?H6^b_DOt)&-k1C~k@W{1MNL^++janaE@n`M5=5$Dl4&!znA zYi2qUyFyWP2pVS^ddNk9(zl+W>d|@g6Rd)HwyZ1e{`_a&h~8Rt8&~5IkVm|p$ao8I zEfhFy+EMZZ=RL}P;FDH;deywG`%ln4kb_1em3KG^H^8+B;43NLYq^&bN-pR-vT!sv z8@T{3uD5)u_6+weY+ia9jB^-cXraD0nofOjIa5+22-0?}D43J|Cl`Kkg}Lrqhbp}f!n!|w0>mlq!spAOG)PzUq$;nlh9nv>zR6iE#WPcyg&UPvLGC~-J2 zK7xi{{`LRuwOgMj#vqiW!E)jfmV9e7rDb+aWoee?#M3&@*G(X0O zs55|@bHWz7TVQ zbD=AR`&zdJTJ+_|j;N|kyS`f&Za5ckG!QsJJ+EDn!jADid~NWuSWn`%9j%-3<;hY& z?gPt5+kly;o=brvN^o1xsW@TLJ+0ga0Rb6+@yBvw76~lHDRSdf@>5sB&AsraM<0ye zm+QDmkBICb=AyE6()Emu_q!L#1--E-5T4M8%BP`oH_2=AzL#z2vr*o`qmb&s)>s}L z3+r0^TpHL0reJfSdpxJMxRCA!+7Q4=`myugmr@;}wlgW1eqHFs0EyPwks2hC-f*vN z+~-~X9#q_GRkp$T+l|ug;{R$1Pity5C-zZcC~;E$;>$nnyE*%rg-~RDu12Ywp9vZI z*5=&hN!A)RjxTN3eVukHCow6WsJE$OS%JUiRWevBCT^F}ts(c$$NB9_@u^&zIp1b} z?$)(elviG#|I$wrKkI!Xx5i!!#IpgPcb;ELx<&BKaStR0-s!W+li`vp2%H1XF$OaP z+9T}`JkE2VmZar?fIBn}0oBXgEbQu8h;^^w(-15vE3FgCU5-LaUNsI}>R=3&T-r#f z7ieG;8TW^#BHAqzwC8P^fMt*em(mwP{g5~Y(U;M475J-N@00fD8wC&mq}nxhG9-MI zh15N5ipRkc63o0sDJX`XUPRWapGoHDbvwq+>*)xCCab3B8TGpmiKO2QU4LHHmkra#-_e$Q- zLdt0MIumc)`2Z;*Hl0zKL+uq^J2ZRh6?Py(OXK^{wWZIBGrJN;R8}y&C8bFHbTmt^ zy-)`=;i8LseylkHP#s%zx_SZM^>NS$s?{p9I6p0%-tOcUH9;AJ>Ivlt#z90Ao-Qw4 zlE(@%IYs%xC8w*Y8grd=VFjku7x}JOyCP@XfUsoL<=nRG#E&M3FA=2pDh0vqwmS)T z8e=%>!(d1WnAk&~!;1#cP?eT9pUi| zd#Q#W5yVWtlaSG>WRi27qqG9Ie&7_|;v*tFI18+sUG!Y{kWJPW#vU@RYW@A1wF0}C zir~b;z#$@#>oinDZK;dz1xc; z2M+Cnt%G%XGg_xdhiLDru>3$BxX6qj6*Ms%fto+?q+J#9qi9dDz1dFmz_*heu#g$= zpw9srMaqGQ^uhSr6qj6fU`4>M;m2i#d&h4+D@0;lPv`l0oOU2Qj;u>$mZc5@iTVwkEKvl z4{|KtNm-tjYjS%lU?>gE&4==l#Z+vE z^}gHy*4cBqgY6dYXkYux*chmmdvganp>RnrC)tLl?ec)FwqW+IJ*H_vV0?9 zG%&X+xnHG0s;^itle1R$_uAI*wcG!6@#^6)cjIqIFTQ`?&pZ_Zm+=MltJnen;&ftb zOxrSbTy*cO>OZ+fCyzEb29?K-qWRVT`b(;>2!o3nIJ)V8y7^#SZHcQSct47eQJ3ZD z3uf-(l{NGE5yZE*=Ay@{J{da7rBc$DO>R~l@9)Mg{Nh6%Tn!c)`Q=`cTboNob>h=< zpWl4zl`JenV8P~34QIma&22;>cJvb0P-f>k0C^69&tHnZ^b5JiPs)tf3}uE-b>C}S}JL}ZA~?(y?|cl)pX!}b?ZE(u^OJ92R zb>$~D_oH~o+oh?KAZ)x>KR^CpyVTqtf=cXTx^7$CoL4B!yP#2{>+&Z1R`=fZP3dem zla167j*EhEm$m1!mQz#h0w>_EFn7m zq1ooR$p)Wg#O@aLW>q>27~Hex-|v9K*^VZHdC540L$l3(A`$1>^s%!Q^lTxO z7kyM$rw%QD;l``!^I+aNvzOrvVH$Gh)Q(itq(s;a zvs`%QsGRgThfIvpCm0Yk$r?n3K9>sd09X@Cu$87+8+jAMqn=2fD@)#q`}(wg?+Wq#-2NkVxwxKZwmKmRkp zg249Siue4sbGcQF%=x;c+SSKF0kQ-F_niMm%#l4(&_A5=p91^d10SNn>5q#BH>I3S2@2!2M!y z#t(KmJe8EIPJg^>)JD#^nA&T`hBy%b=_49Y99SWt69;fl`hyE0NPvfaV4MQGWA--6 zrA%XN<~8iZgu{#Oit9}rk~D*`<)9OVM643eO?Hh z+Jqwobsvfeto?R`zRuC!1+0yD3EYH*hgIi=5UoW(q>jqC1!_k-3om(^yUkViT>o+5 zV4ZkN#KsxaFpt_;z(PiK=d5pbh!W0$+hcm9V zvJjgd71WE1)q@xF7f@$&)1Nl+b_p!B59|7ynzPmeE&oa?-|!wa+Lp*b4hXM2=&k25 zHI1a(LA%;R=GDNN5%KuXcYBCaT?3KlA((YOXX|0yS`_)=#S zSXDPPzWmC3?$E6BZGy?Ggjx{rMBq-?NXkJFZNq1wdvZGgA{dU-yxJtwO2|P z(`s<&b~z)B%|Nhq)F%?^>IffAXhJsjW^-Oc60362>B|b-Xar8nW->BH0c&wqPvmBx zZu^|$LP;`uKn5>%Gy|Q$T$+worotY4#myL2%H`D0>P{(U#)<9~DBHEW@VP0|@p`zj zQQE9|dT&~z9qag&EM7%u1n%DHZLR71BZ%^pdfn*jba_v#wF!EYckEA{^Dwk!@=s+} z!qG3o>MmUDmhSKW3C5)YXr6};W(h~Jk9fNq=`SkBLy)j@605|x97yBt@h4@;eZ?4| zcpP|7f$Hz`xNFuG;Oa8%(Dj?>t>nnqUnGy2a7Op?pa zO6QT=^)EWtb}c;@n~+WVQER|lr$#a{=BB?h>-|{|NSvHKs;d8^0luP6e}+wLM+fwA z6|P2Y!MSjObL<*X)bYAER&R)*MOGDl^vM3JSTC#n>EU~O^?2xP>!){q_njY{6tp?- z=RY|@nNFv0_9x?)m!1*6eSqfWr}pk_&nui96*?`~2k36Ba)5vCt@OkmR7&sen^sMB z#&-|TR{{>Zb$I)O$HpIh@b7B%0sy9ff^Iw>=m~L^e9QqW0xd^DhkgZd2*&jJsk3}- z@z#xh@Hakptb$Nm@VZ03fXzQ?)GY1YXI)cDJ_XmeVoyG8GFNjQLkq;zRU>zlV!i^6 z4t??EbqNkaNs zz}Y(-UK8yH>0<(pgCP>V>`1bxaa8(gELrR7`$)>~q6*Y0>4~~~_OqxDe?ESB0TqW^ z?fBE_*mF4&tGDXW8<&laF0Y;jo6z@A5*5auh1SQcL#y*pN^imi*fqbPSkEQTy3=wG z)yRDO3Flxgx1w~eGCm(4IufB)+0oAjF4oGx-a!4I0`gAIm3-Wl2s(bo(AkqS!*rdg z{;QD&#rsb1Bf~8erHdqdPO%`}HyQ@oVSLm&cPP7kV?_{UIqg>K4Q?$0AN{b)(vHYn zlRHPHH70UYS4B3LfuEaZ--}-DozU);yBRYbELR4iD6ntBja?&_bdGn?umU8aCkSlF z2p40=^Aqkw>}Oh_5NZWVA^HHF%#JMgj=Qo1JE=(KdnX>%2+xdZUp5@7x$)|VeIt&V zn(;gooafI4tN|TB&*s))<;Z=;+Z1m`7K5BH%u$f!SbXhiV>q|u9re3##ACaIV)?9g z82sCD0<4x~(LiVl;Z0sy$G|R)txv&D)W1BIKkkbDWx6+x&C<39VO=q8(8jeqSkx1} z2~Y!vS!Jn(jB<)M$h+=@fqx9XfOR{g(6|d2i>MC)n{5q;S%G?*7+!=dd{hJ22>3=S zy5t!LZ1@CB(jpV`*iSt4rP>s8`^V@#16*Z^$7MjvO8hDrO6Y%tHeLo9I33t)aWBKR z-|%kEAs=)H;hNfEAq1QtnkSeR2Bjysk17F-oJ=rthD}&@ytT}q(lgNY-E8DWEd(bNMqsM;`pUt-qBW)x#$M4r?lBC{3U#O9l__1T?JW3DI zC;n~(c6HidkF933Bler7|A ze>or2mBdl2a6O^-jy#amL2ylyuMID3uhg+q51By(T4EUt;^%(D?=;TgX!H5-#s#=3yMmLV!bV3=13`azj?4_$XThXnSXV|Q0@VwUv_NO@DN>`SvzKAw$D_V`iE0c=9yc0c zlDj0!cHVW4x27JM8b(R%7jr#<--Rm4YPaWn~_er8c1j0KKC=|a0_S|SW@0HaTigp6?h zG`l*L;Hw&;#f3pPsl$eO+9#bJLYKOcp`Ti^%8v)v!)kN1JWfQT+*2-54mBNGc4Jz8 zVIWtUiZbWZO?@c9Q*SJRpAM&+yCM1fA*vQigeWrKl|o4f9Y?0)qArS8R6k{5U7JOu|lic7zQBN zqnSvNb1e9|XCS){noSUM4%D%q$bvDj+@LuHeh1J*z%XD0fL8=)OKRomD6m1>1|Aqp zz+M6HQE)YFqSlE7Cu!gW0CWKe*#Oi)IR!F60tFb%kTnw|5zsdz8r8ePd6wD&sAU2e z0ZlNCseQXMdU(9Mvr|C7{PIuFf0nKHp3w~d=-c1_(Y;%9S1*3X>Blx;NRR=j^-kAB z=+7!>*je{0=6y$Ha;Cia5xV4AFy64w-)lBwv4FBK4d~!Y3ipU}w>QpXwOVWUhW~H< z-2YZwn!1YRuC|6#=eCH@s?3u7r^C6+M&GqjA3nJhNg+(V@UQ;#dtrt7=HFfaXxCH< zLtZ|nysxTgy{D0&KNyLnmGni-^7St{tVa`V>F^eR{jhr${pw=n?ONxVu<|lED%CG6 zM7nCR|D@q~VZ8T^>(`m*bBWKPcePSRo4+(uPjLmSt&>YciKqpCXQJz3`zysuBk zKPnT~5npC@@S@IY`Axb_2R5c7Onm*Y`a2zdmJSQ2JI|xD&TuU=-ICcy;$EmTqc4I; zcA8MKCpEP!KDqSOZ~o@j|6wAY!ji5K0FxmweLdIVosYC8`Nv1!YQOJOsy_1`3TP{=^lBOYP&fwdku_^;nBbn)A_VCDwh3r zzVGpd>BgY$>&>Vb)z;JDd|PYygRb_I=-~CJvR9h()~$*VoL#{s*-Y98RUGlum3G+e z&dAMCgd7R(a?QKpaTLN-bYL zcm~k04ihZb3VI%sLWTwZ1b5hJ3U*^f;G+FjC!foOFH}Pd^rmdJI|6qC3;htP_*H zzHXo{VQuIhiSP-*drrE;eieQl#1B+%>vR9`uYCjcexVidz5>8h?M!-Mgw~q~h7%xI7>X+hU39w{ za2f&xBuLu;T6HqpV44AhHUSZKac2a4a1nRAO)wD+yN0SOV0y%RE{pZA)3F2J&2 z1c8L9BTj6*1Ip{Y%kR1hNdir1F{duoskUZHDeQs_T28?9?9q6PB&!Z(*@IbQ2A7|V zYs+rR0_%=`t%HhVPYeNlZs1+wl6~K=eCyx-`9J$NV*`c(a5TBuI&*%8=FE8V_h=6o8%Qg@vvE7-Y1PoELXgTN%)mv~7GxKxOqpi?KJv~(*- zVq7nD(LdI}Vys5H0!EF~F!yxfOl_?C^iOP0 z&TTk`nvKdhmWoXKwGpm{^uXe5@9i?YY-4@ZP{=9bm`Umf7`RYG&?{=s9{ZoWxs~yY z9dH$jQO0Obmr_~0sd`aN(|8crkj)3{gKKxtvH272_T9|aiNX=<4k_GdEVL3~@e2IQS)N>UwHrD3g&EUhOPsPrv8dKKM7^@TG&1%=i)#B>*Iu}iYZWFvr8>9b(c1Z=!cpWc#!>Sj<71eeIi-?p@EKAG zdP0!p5!<&y7vEg8#~r_NYxWLH*)k?n$Vp z&v(+w>{;*F;c=l1l9xdBeCl#1>+U)ob;)G`XB>XbE`|ehHI5mY@zW#fiQmi)6A6Pd5l%uT{V+xyw&6)qr;wSD{gmhC1HRo}7hhS- zp6}c8X8nsuZl?y#_St%9pt{K}-pi|th0%K#gc(`#sB;q%^(bv0@Mz(x+`VA43Nb*2 z42CM@$uymWM7ASvJT1^Pgh3yjMVs zZZKLL^jm<9b~QkoQ)dvi20VZV0FLOWHPqa#Li55p^#0(Znb9}r1!50Y^_Ol_ z?=PGpr;#2!NdS>o^?QxTC6WI<7)mzsoMl#=CIRaF&hR}ZAz~j`aXGo?%kE1uavv7JW)Gh zJ)?)qDaI8h5#a@j-tbf_kQBv;r?QAWJhl{hhp^YTozcwUamr`jZ=bWS@j?D4w<^yD zxT)w`1>sPDZ2}o%a0PsM@F&!YlUa7D&1b#~{v?PZhs62ltUej61_I(2 zFMnobKICvjpWDaVfByYLn?sMAGhn7QL%UOK-kfuXA9tfd;jY`!*xl{SITI6FW&9xr zKBJe;47Ax7eXn1+S?@fF-0~fKSm)u?U;meX`Rm{OM!Y|81|Xn>d!$;w|Jxt!a(-Pp zdiq`Gc#@d8Fi2ppv`7$Dz2nid7OB1LO%ICqs^*25^=v!*{Xc#3)XU{aP-{n=k@iT- zlcO56NrhVOaj=qjk?zb3aEtMK(L<-U*l(B)*IChAyfJNcHfPL6V~oDGNFQ{kp%`|) z$1bO$LgAl|3RQ**;%3)(-P%!0n})kI@Asp5Px_x#!Z-}u{q=@)o8A%}Xrc?&n@=lk4}3!cVp?sl1-c0cFWXBP^zI!Y_T-1f*`Ffkc7(UX&|EJ-P=f>yIA2hc>+=!N?s)2! z6~seRldw^f(T-&^8btf`adJIhYVvX5Y9JG{R&Y{bijt&~(hKIdSgxVzvaumiA7!Z5 zItOp3H*>{a*j_m9-pv6w^%J==_d{Mo;}!4W^o?s5&uqcd(7KU&a_HWv1YMqS*n=I5 z63wO(oTWH4x7L3wsTdXv<9o6%;Xb+Kan_rj7cd~Kfisb}>JzWurqp9e+Z+kSI+}SS zJOprZEIk)nv)QCpZ_{Nj6(ff%u#_ex83Xb{Ko$>%VqP1?UM1Vd(YCJo9nHEc=;Vy^ z4)j{o?t!WlbFQ`gkf!Y>J=c+XsWxmxy^=!Z#@-}7elqN)*dXef@Vhug79v}Lllzps zrs5UCA5oly=;2^BnR?M6p|m`%S=SvxUzs8Oe5n9W>dE}_zA9$YUbEFZn#GwBLQY`k zpd&ER0K(avHS$sZfJtKB5mfhym$!T8v?FpcTJ3QugG5B-MFHNNA8QVTpNN_j9!jQb zrc&?Qv(7|ch(_@5jCXI@pC5)_AaxZ&Jv8D{-1TyAB?ihqy>s4OMCZ<&UZ-Vs5P8-K zE7+9k!g@7tf2M_)C*!kKe6VZ3=9QGRWA{ZT5%b9KOwz1FMDJ-KOi~%vr#ot*vDN;l zZ(JL=)cNyzh06PLu-bV$j{XedYK=N-$-No{6?w|)9^gdVGMn6kk-1`lHb>h+-jgA!h+#D< zUWQ;O-W)WE>fJ#0SJD|Esv=>%Ly9ZMfd*QNJvgyfSi^Cx`%FX`{;BhMk}uh#g+yGK zPUh)AvoqK1ey|4g9>o;bcz=tqPsR|Ki#sbNgokQu#rCqH=M8B!M(j4HA($Qv%`V%x zK=EaF+&nW$dJ-Oy}|2Q4>HA9WXarRzKoG}~ckZ0D&`l$XRlUW`* z6qnDL<2oJ1MeA{MxgH%M5WGWcKYfTaW@5Zm=TM+J zLQ4QwK&Zc{SnYL52UFJId=uRf_oj3ztIs%Q&-63b$*s_>&EVwQtuf>l!VUGgLkqVd ze7bPnYBCp6SJ6?3M&BKbW@qT5#}jYLbH8B5==#Pg)an_~Hm|JG9Xy?#4{aF1qZfx` z4k1xwSUpOu#Ira3j}O{YJUz)bHR!9jENaXj;nKGi&)0p_1N6t z^uLuYf5gFJoxEffzwu`A<9!b_Z@U^+%3gvLzhm_ir-(6AH8{(6r^_8n;VjJBK7dcl0F5#ElU)c9<&ov3c zZFem_@t6PoKmSW#%4=322m=EE2DEyZ+3zv(t;@c83KrXYAcJNHccesB!J@d}l&X@k-tEWmwS@d~NMc^>bV!_#}X4yUX z5C&OKpM?X?3U6&uTp#b5fMLMM9E4$Iz{b~#pphFOz+dZuSU@Hq3O?5wd40LXl*T}U z0UZUFaWar+(?HW*B#v^lxP#;3Jyd z(-@Zqm-fN=JaA4xKVaY(0?W`01%LoNIQSR;=fC(bzy05Y;RyH`;EZM_?ZHT)9N`{3 z1p(Vx9HKrCydD^Sbd~_F1|G5lXS9X6n0&icqfCofRL2J<%Hqb*1AX|;JN>-o9KLyp z`skBkhQ?*2SD2rVyj$_6AR4iN&8n1!_u#uhoULAHxUG!^ zb0Q*>cFV;NZrfKcuL*j&rlnv=3?e*hO`GF6(t#KwPF|_S;J77I_iE}TY>^W?F#E(! zZoMMm(Km2U6N|;;VC`ht0qu ziS?@L4dD`654NhvRMr_f(q^&!nZz+FH*Or1&b!tWQq|#^nh6zfkca*B$6cR7*PcbbVP$>dp~F zp0|59+`f_LxqO=J5jbMCbS6SY{V7H7A5|x7lQ82IyE^bCsN-{Civ~?Ir^UK3-(KWn zZ`-CDYqPj(A;UAHp6H|4u_IMSTZb!7OYKsbyv@;K$J#GQa?6J7>@AM z@0Y?Cl-yJG2Vuq~#Ths-FhA;a>*_lWC_BYSyp`a?hao66zuw&aQ8wk9w$`1#@VfSbKXeP)TqDPzP3s0UJErF<#5F<5s&;8~;04(a?Yak>|CCiZ*KrlV38zD8g9$lCR5JPwEY&kzK_$j?-p z?^$!!+A~A^!e*;|J6T=oye!%0e%e~itLKk8+pC`IN<=aE^FhF^zMxsXf8uiG9pSU% zGsV(!OXgmwvyz>4_94aQ>yYF~(b?WQd{OOvI`%_n&rA{>wPF{Nxx@|gqg~+!1%A-I z{>7Jm@bObl=!B@`G}f&ka%L|rrfkS1da+=)7~rez#Nu6#>!NsJCh>07^{eYYynEmJ zG3dBX;<)g{MxtPhE=91(%<-uPYwH+bNzdT zGS&?TL(fR(SJ`FOymu^4_GQa8awM2w`ntpW5WHUz^q>9d-~9Lg=<6H>%{2k10Z#_J z^M`Lw&Yrk~uwzR6N!RuGqkng6uB+KAT395rk#y5fpxFt+sGbw~qovqL(k~sKN%YO6KNG!zm!~FODD7mCl^q36CF3YW#D;f| z?>yjaw{pfaF3Mz$V3RHXV#3==n5ARBJ?w|1i)%7-@8X>RnFy8VPyNcZ!D16dnD2qN)2r*^VE*451((h>A-3Sv#y7xlcIu$j}KTsQxP~aWH+z? zuJws}mI3YP40;7$slfvUq;73W54wEU#xYc_ ze8FLC652hF{c0t>6Oz>NQ_~=j*oDQxQO&;E>D^JhG-#|7i%(QR0QEHx-UqTpcrRH- zF;)iD*YNJB*Fo`oI3l z8>_z%0+*%$yr+TW1jGPvF@UM|YjmIvnO;94mBeVc&H84DDyZ5MjJB(|>m@@vQD`|X zgqyt%k#ki{&paYdxjr^<`|?N&wA~U88L+_ysAhCbR$-rIi$jt#Ex$n#xCk*W&N22N zMNpXqJ|1AcP81GWC3rh!Ykg@6FcuAJCWwZuL<{B!zbJ%vLt%*Q!T_HHL>^?RN z(m(yS8J3!8bB|L z>exAKanN$y?jEMvHxKE~)gb;59WOBsu}kCKJpe@CVpA;_aGv1Q>nHkJg}4U`{`bOX2BYQP`EXg>sm z=3GlX4uoN)g{x%B@HL3ONb0I%P_)>X#kw^%5YWSgh~nQJp-zhrOuJz#{0PPurFLI) z8-9=zvG*?z!w*+xe7m5S_0CtnUGdSNZX{oIw4O0z51Z5`E_FZ|X&q<`wcI0lr0M5r z;7&B_6v)D4-|`X<^kDd0wJqj_uKy&|mL>y!&h9E6m}N4Q2XCA<6t>^DRf6^hqA=Io z%9y9BRG+rv+-I#&mEETYGqToH$yJAGuujC~m3Bu+_h2V(T$0|5O8)cy1M_oZd;9b4 zcp*|mr=i{~v)dQGF_<<&LCwhl+}21V^dfzXKFzphnx0N#$Fv>v>6JjLaDMK3bADXB zojBN}R>{K16bDW|UdkC3;+R$HzDoMp%Uyp58ca8?WlG(Wab&N|8^uzU!{0((4?|;h zH?c69mUAA+H^`k>ei-N*vZ1<9*=q7&aqJol<8!gmS(KeUZ9Pq=T=xzuxhSlOsy*}4 z+ynSotillTLY3(l;ph0s<*|{Dd!A$;5S0K(jQen8e)r)EDZe;Aoi53>BUQbG$&bCZ zr-F4t%3^=Cqd2B~R5AMHk^IuSb7xy}b?ekrQI=$y+8UJ03)WdP{g>d?`AgIP_&e_( zX+{`O2?HTEom9I$>Q9hhqE_=D-Gfw^9zX$8=?lPXRhogNuq8E~x0Ur3PYGsTw(;W0 zM8+E{3924$9@)=LKg7Ox!74cp=+Tye^+qH?YSJ07r=go)6Q1q~ zHogifv6iL|r;!nL%{^5tmJ>N03y%G;i zD&Qx7dh)Cs!?5=Ar+_=-e(H*cyy7-@=u6(McbUUin%pO}Jb3sNySUbES5Vnc8X6o= zcWa+UX5I_|Rd3tR;5#$Hi?&|$N=X_{2>BnhzmpQ78Y*qfeRZ`m8O+Jm)06Z1YH*9s zxz`HO@Yj6r+V2g%ErpEelxSe7I{ic#rAUHIBEVxphaHu+98Q%_tRj%T2BelYH{qWM zH-G*oZvNbFej^HGC!kCK5aoOS<2EgiQcLc+>z`ABGJR6}=l|>%?tFMuWld?4!otTF z7{(V9FS@gv^O+yjdmBB8zq0v<-#Z=mc7qI4z~fG>?g!Eo4+gm20FH~0syLnTeq|xK z+EYZeGH$YYb|4?dwnKI_!jrV!m_&(#2?ni(S4xPy-pp2=l)h47>KTX*lCmA?vcoMq z^0DE&t~lA)QOEYh=k56uy$6r(2u;s1(%N}Ysz+V})b3Djx$XrqW2TYz{JY_(4*dqlKO(`Ogs}>zNR4w<;l_kS|lFoKaWFz0hj>2?yIP zq#iFuEMGDK^<4u|x{M5YHK~&Osz)IsECIodHi7V`u2dMfDXi-4ob)2gLuBGQXPe%E zyI+>5^~ANw^vRgC(&+Li@`O0GEo5diBC^_H-8sX|wuO;M;oe{oAcltAoC~21H5g5W zBFjqcyn8m-OJ`UylfS|t4{r=w2fzAPX5aYc4EicH08R=F$AF4TCLp&T^TDL*&V+5y z_sap50!SO2OI0Ela05E<2t1CDdvW*XBL&WDmdI*JU;se!;}B*X(|*4x;}fbm1U^ca zIgAbh0Rn1wyckmJBd3v~#~KI#Fja@uiR;S=bRSo6i^Pjs#5^s}uLwN>4>~~y;FucI zl%1)|^b7>tpXWdjR)z`EI#Z#%70`%obNXL@>mU8jzy0kX05ux8a6pX%9njALPb+KU zs2M%?7T~&pxFE+QVX%#fZ85q2Kxw3hS6bm9(GlsGE;pe&J&t=^rd+<7>%=c! zQ(>F!Ry>|oPYon^q8V~e8qJ7@9{9=0_UY)#g_lFe+VkZGjg5o@JG8|NYu;tmv#0YS z5emlB6a!$0v7S&oX% zw)%maY)t6a3EA6a-o=Sq{vKZq6o+Jpj9bKUAW(Ct*&*FOi|b!LcJVgag#|Af9GP7u zuw<5cVOj@1uh0z!uY4SNI=>S$Man}zDsllPGm^W0%*h&C)p2TViyku+F2ep zR25^OPql)dzUDP2h6YR896jZ|Vo$sbxsH^kuZ5VKDc0HUO=#87V!IR3#C>snl98?F zs2Z|eo_wdjueulA$DQbcj_r^W=h*c2QFWmkg;1uSAG77BoVM-=Q{=%}C^UoVr%lH- zRd^1Igzb&=!Gv>IbA7uaR<1!yCUz>5E@L*Jo9+3wRBY%k-0NMt$?*?5^}{Eh+P4$K$Fr>Q`-&Brk+f>t_$DX@43@*WP$@qjBG@|&;b$-gQ26V`jrU@iyqc`j9p8r=Y2U& z(U4<1!|2;w^1mc0CvyX$LS*5`zr~3O(!v z@_C~j2h8ZxzO3N@({V2pSa0k-PC(0)#f=lF^yu8kesQaN(BqBn z=S=;~jyerp94SoFJNj}z_VMigMedig zSEmzw@#f?E+QwQ>ZJ;U}Frbq#Kx`{2m~+^)U<{7i4lg^suIGQOK}m;#t3ik&z*zA3KdU^(uYnnE>kQ{F#VxBp`M}3^25fW`_=CF zPft_R8~)ESo2~bE2H4ru?{tBtx4{~&>&N-Zg7X}HpdF@`=JLw?y1Q@Px^aGRu#{9= z5$lJ4_AHS5Qt+elG!IeevuC?=p``OdGf&gPPjYy!W>xBKKitN>vKQ<_x}E`eZiYmT%aO@MFl)4{?Ac?n2B#WuDb*Y z%ro8xKYH(X|J6Tw^620YSk5$fxf&aOW)`hImV44SKgYJ#XN?D~^g82ZuC|DcLy%unVr3J57?|LJm$kVatf_B*>@TtV=cHwsG%H(U+ zFNVXD#ed}?GA>mf6PzWQCX*h6dX7ujBq&he%9+*k!6ewJ6xLx4G%o2EF>JgK@MOnn zg!zCv07MRS0bq1_jNq(M(duK)oz0VY!xG{Hj}A=PTVy*45HrEBU?wh0PmY~wh@<8m zdk5InWuQ*`e&ThpesCh09$hDk(fsr!w^qs7D!T*$+W}S@8!x-)pEMTiO zd_h-_!z}0w7hTX1uXwPq54wBMk`N1_jfJrwTK$Jd9u7^d#Lg^~aB=zo-U?Cod0bU| z=(rIKa()4wq7W?jCh67q@Nogt8N9`vTv$y~b!4pf3vAXW$tFF$5%BFl^{}18gQy&x zTh+S_j#-G>+r4f=Wx$$HsL>`A0;#%h0MUYWDmsz_2RsK+lTjnKW>xk*)1~>zePKQG zP$=f9K+mDQTwN{=_98K+SoKqhN7MMa!6P%y`f!>$BgGg?N6o_lp4H?7ZsXeLKGIL! zBcY4+#$B?bGp`U{#cK&(gqaRc$HD6%P=@TKAoQeCy)0flWh=ElLIsCka55jT?(nE= z+ic(N=^fRJrGz!dd|lB5il%f%&^cQ)a|Gnk0=p@J^)S=3ZWMrt3YDGotIGJ{)RoKw zLlm2|teu6J5Q5Ytc8%4$V^`{gezqvMo;OEf4?gk1v4Kvx;aOBF7+dAu$FSp;ZEuU= zSs*FQWHL5q(}|P7B)qNqDCi)kI`Sm3%eQ4{#>z<6V{OT8hkf{`G`pv;>*GCUDQXN( zt>&D^t8ykO8g^r}IOlkAKBGtX&;8ia*gu5#v}q#MgXC^YT(?O(iki9t)S8(WCMR+!& zOo70JgQV6~u}yJ$K5-ezd$N`AZ&Un6gys35M{g^*bVKx?k0#^%lp2y|50jCS78E5R zv)ReLd-Uz!`49iPpmp5Dly|bBf!PUKj3m$4VCn>|>71+?5!^!LGG_uK>T|SoN7Ajf zySNvwVe%FrP8*Ok=P}RPAV~!zZD{$ak;b~rwlSeukSC*?w_?hVzB2&40(6XgSC@Pmu?U{>LeEs*Djws)2C3-H@QVTp)( z%94p|BLNF7s_vG@OiYZUFgZ_k+ns16dyGHNfs$y}g2Vaq zq-$IU`Ow)JLt4Vr$>VAOV;xOmGYuZhMez4-XV973FKwl21HQOF0`|;Ck8dr~9+%Lm zh8?boZ%CyjY>xsCofC2afezd{?Eg4jTZq2lsYu*(Bz(RYe)tw4JRcbBRI&%lOGN9P z^whP(sF5ZK%~aWecNfY=rdQFsz*l|vl$>4KX^J;|eieo|X*#VAUcGTsGraln!HJsU zUH7%o{PN~cxduC&ctBjp^_%15kW8tvIw(ABl;D?1$Zd`b)q5JIv_pux6GE zfAsdZWDpkN(ToyH$?vQh2E$8u1EX>5BP8lC!rW5r z@#roPmJ~GVa+JLg(meIH&9TP?L+J*}xw2DlKevyT(mEFqtOXyKvf=3F#ZZ|ONmBPs zLe?_)jOK5+FQBd>HR_CP`+@bXhyU%E%lMMW|FNFk361veKKtd*eXV!NP!Fo-@AZ!A z#z&TdafUy*x_Cv*X{piS!HvK6S1-Qyg;gfySRfk!_}awxZvTF9crb@t{KMZr)Q$i5 zU;oB8OyK68w-gWZgQLSWhJE^qKXvRH^q@n3>QJ`M*Cf9(w0(L^{uTS`P*QTQKjyd}37t!ER z9-SX#>R~;khu``-Rl1Kkmt!aR-gLXzuWCrQ@Qs)L-t}KKJD-gbKsEv81P;7i_rSCi zf;@mM0WRC7nqHWzT)8s>}_y!hqPo5+Rt-N2Z7D zj_DKyWCMsb5cGqd1;nJ#KEbGY8$P3EbZ|z3jVyR*gX^c@sY?%^VUrM$!(hp<3!3T- zkaKP0$CTu00cnl(L3a^wbLIhPW~V0Q;Kk{HZSVh+-~Z?T_rG-las&i)DQ=uJVcWMr z3=ZY_wQPI06ujx|-PJ*0iSLi25USagky8zM;=J9EWEyCaO9lK*8r;bM)NU0b8oB4E zM|B6FNHVSZJB=~#207};pf=R0BE~;$O+Htv^lM>yt9_MVp9V{+;lUq2={H}=ChHAn zX0g^OCxqBZK6T|>Inx=85AQU`-r24IuV)tzw+{c;5Acg5Qgr~vb$W1eZGO0vbQTg8 zctkd>LT>yyYi;C@t2=v@S9MPbX^K-!;(~Q-nuwN0n^_T6UNP&Xdi=#GfK3>}5rQIi z= zS>x%suR*VRm~S`g%)%o5Xv!}ki4(Q(wcdPhK2{#^7x)cx-rj+qZ#`ctW+GQSC1HQe zq$YmjE8*A#$Q?2lpgoqwpOGKF)yWjr>y@3={Nw}9z7CrwE+XG>BkoOFtbr?4Y(J@m zw)fa{A};DvjZrd+uNR*p#p;m>x;N>*LsUEi+%iRy1%FNj zpLwZL`Dj{DXuI>SrcfGrvl6V#YtVgdC_qqfyF38s4}!GCnHi|+Q!jveZ;xaZ-tK#@ zy2PoGto=E^2OfD68LHK_OQ#ITGz*pY3MOz?Xb}M&z>`6~#VP4QY08RobdJEe-V2il1kxE3_>e%(?Qq`6XicP~uj3S1!D(;Vv~7w>QCooipb zrrpEw)O>TS(`g`~Y%mm6@lv2!eExL#(fI05{NAl!TL)7GK;%d(9qu2O z@6WG&=F6c)R~TknyF35+-~ax%9iPlMi_nHx7+jnSRuGbXzb!t51zc3Zk}TGmuA=fv!96wBd;SMtFnE5M5h=|S#tX>TtWVpS-H@_R?#uQ=!u_BqG@ z{eS!x6-AYa4hR%b?*Cr-qt@|-FI=U)Aor8^ z`Wum#Yq5_Ludb)K?$FG?1})qTcF`086Iln#H{xQL(IDezsL8+oXlbumKgws}k`76B-zxUewr9YWs z;6HqO)BybKN+-XcyqEO2`!z$jiN?=N(+)?5Xf_sD6^V&5AZT}gsIn+Jno21Yp(N(y z$PJX3BXCr#c)akC2qWxK)#by>qfA;oJGWq$JI`0&l^V-_-3_(VW43m*-cMJ8d-K)N zPHg^RXm5Q7(81t*FN_RpEm#aOo3Jl)4h}!ya8X#=c3bvj5;VB7) zdpWWfJqrJ7Pe%Pp}t;r0fQ7B64S3}%T9b712;5mJ8k*Xu&!5ISc==@JTc#W9;Zk) zz5pa>>MUGE#`c|SXOu9vfdQOy;AQepfcy;Yu68C9BS2jA4tLsaX4Xz9qKXHGPo{0-)@v*8ecJZK z!Wq^9+x7P!J7-rpee4s6cEr6j)k@MC%I^3M7pa-tq+1NGvwRh1VMAiI z0pqqzs!;#nn)50LI|7Q&)Zf$Epf!_UBZ#xICNJmKM=$Wc=QPswvBB{fynmB@^-g23 z3~{vz3odiEd_4bnrkY0W%Njw>h@b4B#8pPBPXrInGSdaw(H~7Gp}0pv+vWq-vCP+= zBCv0x7Z}dC>*)@uFsY8Fk}!Wt@tOm=J_r`wu-T^D7d z?%s2DqXmxI9M`R+cY=0sD-Vepnd} z?>?87*WJ@IJ%Y^)4jCjN!V;F<_(li`8ztcnfk0pb3=B3ijI>U7zwX!1<+|0qG|`asFd>5MjFv!8lC#g*hsL#-svYpnl^M^Yy}<&&N6se9Y{mmhPU0hg!|j^> z+yDDt4q^45{=&ahAn((=`(OKK|DfYZ#y)r4E&v$;0Y))%lqV-`RxE|L)c}QOS^FR1IGvUjepaTn6 zlAu`WEV})GY|BJ|o>WCw%rOSQ4SFEU=jNwjfK?v->uIzxiiKOxOUJ{EYY(f9&QL z-*BLBf=c5bcdkd$&`+dsuIjO3ykfO!WBN#jSDkSNDS>fvh>d9Cm@qebIL_6zJ!muA($gZ+J{FZz7j(+oe03elX} zx5nL|rb{gSmATKVx5sDqr2c7L`Qh8Y^`pPK8o?DAo6-mmsK5LV-|5Z(ch36iSAW}a zqyOpKuO7eu^rCa+?$I~S4%^wmtx%vkolrJ>VBq+9o8KT1RjHmp{Gx50#s{vM+utvq zJdvWAxH4$Oi^92!_8G3eORINg`C8q2fv}S?=V6uS9acbGfYmAt@3@npdQqoESzk(w zgAww5g?CpPcqR+J#_jNP4$lWZ?G`SKRfwARqmSGJ2Af3owR3mEg&u{=y=)4}<%i#g z&OMzuB{~je6t%H}E9C4CWy_rU{b1lqsvR5&K4b^=RC5%z05w|)3 zN&;xl1RH0-TIhxAd@BlEBB)Kkz-O~!Z#re80eGaH{gYq%?RR$mr+Etd{pyqj>T@3A z0xHrWfzQjH)jJ;Fl3aFbT~nXi2;Sz53c~%Dv9$vR(ulRBL&L=)v4&sQ*}F9R^M#@Sn7h<{tnUE`uxS3nzL$c$gwak zgDUHY<88RX-kS7|*lEAPTuWYgWIZ)Yi^*UypdV}S=y1%K@!9@U{Z+Kv+2{O=WJh!I zGoCAX4CD6G5DZRCW>MpY-~?S>)T(XlW&et&(40oy{khy+)nDnlFAZDSh?k*2ve6$- zt1e z?LK?KB%58zOSKLVJVL2}K%LRDTfa)=qEujL`83Kp$)58wCyg~9^dyvQ(T9nNLz$Yj zo?9{I9KLd>>lRD&CYW{-85!RhG=c^(Z7VlsxUm4^#{+p`!wMd06Ty_OvAO+n(%~3+ zcx>Cm<~uKqpq5}5TS=t&?!>7DNi9FrYsJ929Liq-+IgdDxRiH+kNu=aZn>T0fVTlf zf=ht60PmZ(05al0+#iwfwgy0|#XJCH^?EqST22DRy+8up>5G9|hBA!}QowDsKx+#R zPtcMJ{8%CecyMtT%PuMxzesxaY;J9dGYx=Z@uuXGoThWKQvIpWtDOXNjQ}hrt0Vdc zJhkeD>rpVxx55b@x^^}md)u1-*5<%4uA-=(2iy#>8$i_o$pNq>gB~t;P=^L0sF@%o z7uZ~$jDX8C%D5Yo-nFy|63aLW2oT)>V-r#PFabye@Jb{~ssJnl03rcLCSXp1lb2B^ z4Zs*o-K-yga0G;7uDvl>y1L%YO&tie8Xt%jh-aAA0DvrrMBBXN7^j~D?-pBKn_FG| z$A3|u^Q>K^8~_Y}7MKJAKW#(04g{0u4FKi<&@B#won6rIfr({;!Z46Y2Dz71=KkzR zl88QRQwPuZK(O?xB)YFyJ#G)!6qs|8{Z@eBl>K@`@9!99|gAfFY=*L34nB`?L(UTbA%uGw_<8 zC$S&G0DwpRf-KDt@rNhp$qux1NdTy`h?2-h-#mH#<_{g3hoA9Z`Vh;wpu-UiS&ZsM z^`lSlp%^5hck9Q&;L`Rt&UjRN<413oN+n~Ku6{0j69(HI7tIzAo2*l+f4gVhX6CFT zr4k&ubch4P>%G->1e}+fU;C|h|K{QQ#xT}K#F-U5LT1t5|VF0JgX8*KNaY5k&C*(8HMauOof72a1S%eB* z0Yop7^zla@{?R;q(`X(%k13hlsO@(U_0pglomfWEmvNnS@zLFVI5EhKF6Bk@5Sfw+ zA{O&V3hPDZ(`SvwMpkY4+X9pf86)rD5^{q!O06ZHxlj715b6}{xVP$W(~9y6Lf$(Z z<6vivpgR^hYklinpS2t#Srh=KBw}U*(dPQXF6!$V7e=mvq2!!4Te;Dm`G_w$X4-VV z>*coG^p4;r(ww7$7%c@uI^KNeX>5!g6}7Z9`7?d^4?g|E&%X5U?Udrqm4@B05DW#I zpuzX%C%nqp8*T0V06upJJS$XGXU+2=(Ab#>XygDI6tMBSGY0yI$M0?IHz*?S#yTjv z49t-{hY3OWMRj9^=O1&M@Hc92EJh*PF^gfknCIMS9@^z4XG~!3_sBL3h`a<@! zr4xZp%jaN^$+fQcl+(SU*{WAkuBz!y(nYzSLsOw(-^mY&qQ!Rt^vbv+W{+*wtqi88 z8=?~>+GRrUwq`Dn2%0F7J!cn(u0SJ4G_}`+4MA_&qa|a$H|_e^CRNB!_dVxLbknDo zDMskeQ1B3smE#}v#xE?jTBmJZrlxD2Y^^IptkuN$0oTJr1Sa`{W9dNZd}T)dU`j|x z?dfO`aA8jX8=&)p44l~U*Btm-=RuzO6y_hGbqPD1DogD$&uJbd<>5~4CBqo5HXYry zPGQf8)4DPD9{O<*_3mr_SJTSOec$#}&m}|q+SPZYp z`nl4(cvk9nso4S6WRcNPn_cxaTD{n*fH=kiovjj=< z6kXsfSoc)uzTlPliLSePwQ(GG3(v@I7_>qmrXqIOuIP&5PM6C$I5W9G66`DfHR^mH zu=CY{JRfS2u>~hM^JTmPU~>R(xsj+bY*x6CTQl@C&MiaO1PtPFPVG(!7u_;J8E;>~ z0|3}=;Ha63N2@u22O!h>I2v|=DGHz=5Jkuv^R#sk7-Jw+JW#0Bu4~MZV5<5ds16Oi zslcI(MFHqnpadgR>+i=Km$v7>y6yV%3u>f3F;$(O;ro#4)XO8GsL7G;B}~v$OW1D*ubIb9yF z1yUoom@{10J^61oe8XpjJ*?Eg!w!64;S9HIu&^{d`d@$NZ~pSmGoUT;*s+6gglp@$ zB?mPwPbg!74K6ZVy*!zED(@qO@0)$$rN4h|ahan5K*FJ4|Ly-ZXmMZu%Ee_sNj3#+ zKw*3afVckbH*Icz?TsHxZdJ*n?C;upstJv~#GH-&K)QUQ`hwITKBM_@=8qpiWjyDY zUV~v^Y3(Q1q^^?_7 z^yG%YJPAN>jc;Lt7p%dNn~FCa{R3xpI?lh;K})F~(c}|}SzqB7KJ^p9H+~v9F#k=! zD1|;u?*;sCKRUkf|C|2p7aPD)-0ft)`p%vDlm0tJ!WDJ-w@t%%{D)a%6=-PutT~4$ z&+U~SQ{c=7uhJfmU2p|dVmotamp>iuy)*K|O3jQ=321uH0fT-Eba_@8<`v9oERqSC ztvuN+uPug0KEj%WHPvnF=O(!+E|=zZF~Dk^3V6mkF6fDV_VneHjU?yz^5F1juj{@99OAZL3h-~!l0O@Ws96vyomSa&lIS>T@}!*HplvnuGI zhbhWcoJ?5VLF+Q(qzdve(T*$aUSJNO=&(J_ndv3HSZ~iW3Gb+#8bTdhaGXIt4v8D% z9nv|!yF9tQH$5FC`z6#zRJWPY+67pDZ(yWRCDHI53X+Lb>tgZBVH=RMzOrY_=F#FkI@0rnh#p$aXPE;z{*SCsup_jjL@V=Tz z3}fr?cM#+15RYI%X*SpEr&2FZ4h|wohiS_6pm?mfrZ`Eu%97OBo?iWI4DkR)K)Jtu z?_(`9@9w(3&wqL!UVch{?*#{2{Mb!|#HJZED*`Z#eXN(T42I~`44E4^+9L~^8%Xu5 zQTWv7h8`P1!>hmp_vR-TC({|%E3m5@f=`J6eeXTL8;r4;6Jt9>Z`F^@c~A1)C{eJrX{HvRS8(v*g}F`)jpH1iCDS8#p=VDd|?ixk2YT2<_9mkZu=v)K4sRjlPA2 z7Rm*5I%S=(#asyQ*3@yoy@W(XGY%pJvMo4hQy&e**bVUgS zP9jZbnRG{*(_RPjjJiWPVA;@V*NBsq?0su$rNHXrNj-*^1-Ks-Jivdb?X(V;=BzxC z8WO{b>0bR_C$~bJ4$rO_WEUG0N_~DM*)oV&S$YQZ9o#U2GV)RPeAPGBI(F0L#4CS! z9KEJvyQnhL>Tip}%T&75eBxU6kX6Us(Nk1;eGZ6aMGu{f_80xbQfrxIRoKj{E3Lc| z9k7G--pRIx(*C5~k{r>(9c}SOzh^tgg&7K~y;))6S_7%UqYFB`FA4$)1-Qx4z8qRR z&hN}&n`g9SH(5mLb-ElkYtm6mdNRFaiN8G>SMTh8h?|v5^UK_4OV)1Z?C@1&>z#w| zvIz3LpOU88_5vB7o+Qo82hSg;riKt4o5#z-U)^Eta|Q5*OFYF z`pK7n@^iTGZ6fuVzXh7oqmtHYihuE&zjN>CKMlRpREDS&43zJ>rJ zW*?8DX}Klpl)#g4Q1`0@(&;UZV*+tbC^5g~+1DIN8~4b+)Xe?ihmXIdFf;RTxVZN@ z$plM&uX1+pxY&(d{6b1S$U(ZNcf%WminuUSqmKV#=k4*oShpF%DQyc2GcLO9m~fNb zJ#iz_84)*@K59p1V?sS&F;;)@EC0{`*qS9zKhTI*z}{~@q+$H_KYZhwtKJ2I4-`Om zUDk@f^Yy<04eY%5MIBd+HkAg{6RHRbsk z2X)!!=pf|9`B$!vyBb}P6|(mj5TW9wTKI6jOPK>=khZsK_v<#_zer5lxg*odXYj@0 z@jHKoR~-A@S2^Twz3lqEz50V}cfa$9H@GC$d=`9RrvK4HbYY$xxWd}0Lr7{OQ*3#r zvk$q}EJf$}v5UCbcedz~E>&jh?sI#(pawto+iS%;H77TvO67`E{OQmAV&d|3qFEn5 zJ{6z8<#$8JY@x$BavyvC$Ibun{H?PMCARnIyL8L7@N>?57M*Obzed{EFFQwMe|j>q z$g=I<;9HL>LM|M4Hd;<2V6;6<>VOO~%{vRLJUpoA;T1)wQL`CY?qJBobPWm3r?2?5 zQ&>Rr#v5!_Ed3QCD*Ok%U!35B)B zwtZesO-99v^LMbRt1a_}X1ar|w(15Z2<3a`mcaV?VFAXeJ|Qajl)$5PwidEiU9a_= z9lut9YmN#Z^^d3)g>Bt{4|{reJh2Iwu+4pHd~qChr}r`8D&q$W+JdH3@=Do`+c zA94DYc-jQ32mqB{*QQR~d}wUZfRurOcV@ASV6cia&!CH_1b~JCHyAGf2#PQbz<;LN z-{o1!(FU)DRuF;gdZE1`N3y&}&r#URR8zuW(-&Ci0|5nn189C2CUkKYI8d_T=GiMBq9+O!Qnj-0jVx_}~?NDW?r;;mzQv7y zxK$n9y(O=mkPhA!rx0%WVRO>Q6XHQ(l{CigmA3DxLn=c3RaF_Y)cSk*v+&v)Q#7R3 zF}WDECd2wzhMj6)9di|@dyZsyT&TgROjx-p4B@haAFo#&t>u|=4YDV|Z=`Gm(o2I` zJ=3Gkb4^dixnN+v2{Wwpn=T;~Uu`*pg@sT2-`U__sy9ZHi91R}hEcOe+lJ{0Me*bO zljMA>99eK>-zJta&JVCI-#2mh*vy-Z7^N3R5)&RA{81|Sf(4D+0E!CCC%E-OcjN5& z{n-GY4Xq*}}Wg(lyng}vb8SG#VB?vxc=ErfV4a69%7 z$D1$Do?0GnXy`7urWb{9RZqD6Pu@bDpGC28N6b*u6YmTxOi7QWOkpFOkjpq~$}NX5 zaHl+~orNm%Ru<=*>8T#Vex!u03!Pg7dTNgPn0I8f%o4hbMpZ*6-)k=Pc3{s!vDZ&sV-%pWx?t z{AB~04q>t}kpO=FS^rNw+Xn)}h~pG{ScQeU_X9z3R&q1LHs z7(JY&^znzaJ1aN8AH8{LDFnLz5&m*CK7QeE#j1}JFYD?fT#gJXgCNXlbSxj;b|1iR zBHpAUl1%072h+d$=YRPJoeu3okVgVP=OU;mNFEllK>+Au^$izOCtqiBe_lQGu>ODd zEB}<8WpjJe`bOuIcWb%woxivF*_i+-gR~ET30T+s<%hpqqXVgnFD>*E(lqhoj=%it z4|nPW!u^9J`a$EIXiv(9`4UkYovmw8b$0~L8|`TsERcbn_^{HEBOR{RcZG97!&{r7 zNA2dxyjw_L);=}0zhm-+TZnPZ`!hPr$F4pb5!6V5lYWpnM8@G6&gPZoqMe9my z^r|;~6hZY4@jNncHGSaQ#o~;c_;7bhJ}S9W<>~vng*k!MKidAt@%KMW#pm{(?Cw_b zu~(A8D_`(84dzSo5B&2_ZTa}o*IFMfeccaVvYO*>OEAHd9}XhPwa(y~A!Cj`^n00= zWvEwdVh-NKqQhj}GeX2x%9ld3bIRg0bY5r}@N#I%7#|92uh^n%Ny+!%f&Mr#3K*@9 zI?vD4&KYCr1bylWtvJpqK7l-Z5$!@s_Ij<)1~&W`(EUn!*^sm{EC*X zI=q-hpy++x`Ga}4y%FoDb{^y}hAB+9xqTJAgqrY)Dw8w_Pchn8m3LM)H-fSg0Dzu= znhy|jV*!XSw`U;7v{W;_E`ujL3|ImT5F1|D=tUjuNa-CL{0|nyiieGI2mk^Nob7o) z)zKE#K=ANXcDk1Jp@I3Qz-3I&WInE|6BHO7aB6xdtbBP88qi=w@(_0>a&wXnf(;B! zf{Gm)cmOQ{(%n`L0^v=o%pO!#)Hho0gaM&|4umZC<;O5p;-COMG@aL0z<>G1u?iMI zaJVyh2xRAZj|DFb#chxgPmLh}v7k{kwud9oApm9j@nPpUGMqW{4nlqF)JuXMyb289 zV-HI35u7`bCo(D@8n;!u85_~+{5tM%m9R3*xTuIb5VQ0ANN87et-c?Jrn?$& zgvi#4Z4$>4^%kd4u0qf3{h3EZ=4-mvmxsdQRMue>zIHs4(P^h>^%Ybkzo(5d*LDVU zlH?3wdC0JXvRiB#R^PV_|JaN4CXA#A^CnfzmajE;6Zw%n=R zg{SDPGu=4Cs~e@p5Y`=kWlFw7#pKxEi7(1tQoB0&z0u0 zFW@VOg+9(7xOu17uFiE!c+s<59ZuEhGVXH?ZYtY_u5@`WNAG?LXWUb_DF+y5P$Qp+ ze!Hw3`_oIkwvS9mV!~56AfgWPiO_5~L*naDkKgl^g63uhiF z+j(vU>qkS|aA*6tEz}ige*WC5b}v*HyIjMNkpZtdXa*XGRlh@celV@k^T%MSQw>nN z!ZjUcIueesoRX@a=Y3xDHXIU1OrY&d@uT8t6$xF%e6sjEOl1ezKW7`a?WH&;)W%4pVk)~!oe63JtW$eQeV{7JwGQ}&@@Ui z(3yXY&&ud_=VHXx`aR6Fj#dI3SY>Ym+&5cGbWPea<>VykpwjvWnF(5?DRN)%;6`%* zQIyq_m|2Z^WTH(+A6Hz#Tddig$2umr9E1|?4ZR1cMOO$t?^kk78be}>eO(SiQimOx zp&dN3Cy}m3SL~k-{O%%q{0%G4$cDK{w3~_}*%*%?oU=0LTIW!U`2n^I?TUuVoN|sH zrx`b9D*aMpHwr&oKIRN3hUO33r06h33Z~O7^9qw{i)eH;vSennzyG$@m^uHW2CK{- zHP3n-O?j#_nSke)FS~C3s2Nd*@q+dIne^mv!oQUI(br~w=F$(ne*4${!|lw=KP$a> zdDHQrS{M`btuHOTe8G3<4%F27bK}WJ@4xeYwOY!x|1&unEIPmZPyWa69@Kv8>F?Zs z>*KHfN9`Z}*uVJepa0E2`t@JA5L+9lP`}GEuE@{*=->Um3v2zhZ~A=Pd-f+!|M|c8 z&tLk?*FOK#zw(p+_z#}ERhkmr#5{%S7k_*u<^fj-2>|=F`i?fb)5N|_K(wzf43^AO z3d$xRxyVLc;u4Q+xVKkPuNMIn06-MYUSH?K*~uv``>4Wl3quHR!*V!xz^GDu_Gk6e zxH_13D5xvA*WLS$@t9v;{JyBaIrn-B~5l&gI2 zzy6Hsj+!NW&VCGuyNe`d~^n(5mZ{`S}GuYJH=aA{XBtR!md zXqyqHngm=g`jT6DtJo%Q*l5EI1#BY(4SXC9jXa1e3OgH~;!NM9@)vX_0)mU_evHhH z;sku4oJt_bcOGu8(ST_=XBPX-qTAWu*ko5?{<}{3PPws`Fx+UFp(l%-mhmP)9Xhe;j^Prn*eBsi9)?RRom{;O+3HBBF`q&ChqS;8pEk+gD5gRuvlzV#gudBlfgMnZ-I8Dd)Wn3!m;$+d;|dLoDw83YrCEgLahLh znKm4CG%OE*EVGW-Hn0QsSmPTs@Rj*Z69|hAnvq)#5CqOaQS$bHIs%bt7+Aw}OXB+& zF9XM<+UQUS4Zx%((E#>58!dpu8B{KVVc#q_Z2535B?A-^?Rk6J5mZL?Pk{Ecprgzn z1$OPpKweK7%c(ig)lHu5fh>5Jpfj2s2>`#-A8()TVsQWfICZv0s7x#Z0gO#_{{ka= z&jEmY-dI(rIP0JT)0hgT8myz3Ri3oz0Xk%XXFT{(PO1~0au{WzD9y>2M{ZYRl8y8h zU~?d%O5i+!JD<;_v8PA;*1Je;;Tm=Jw0&uiZc6i3HhKyZQeZ|_dX-= z!-$I($)j^~pKh(mqJfoy-Aru@xllSQ3t=;`%dK1-9-hS#*H7ER%Cz`Y47(hucC?jB z#PO#Osr^TFFU1SM8E2o>Dp4#3om(~M>ADa-^OqAPwzuw&wF0nqwts=0Xr@zXU}dMt zgqPODuG;eS#*{Q<=N3oqm6dFFq8gg#JX>UA$#XxsX7Q3Ob6mtOqo>We(`~Hp|~H;|J1&OY>K% z&f)xYz5=N^QH*(=p69d@${LMgJ9dqlH&AQ|Z%l;sHyy}}98*Nzojne~mtWJKeJpp# z;_782UT#*`JdTD*O6({u_F61%cTo`wPr=wx(x#h({?kMtm^Ib$b%GvALN!6j0NK>6 z?c`HcT8X%!FcDP^czOX|RMEld;83hwKRXcko81^SUuoxB(Oa7*xFkHEk}TM?sgp_W z`dqg1XrnS~aRnP-*GnrMRlWzy5q;TE>%Ord8e7urURX>?097j~muDBSPI0fW9^>7( z-Wt{ih1u{zq4B)B`jRZlLbDK+!lqA<56^}Pmeb9qI|`Z4yM6ymJacSy)MY6NLAo`8 zUkk^x2&~H!n~v0T$~=yZx#aR4jh|nbXA6`3{CcSNJvSO|R?R@@3?*OsAxL~a9W4wX z0ELAQnJ%3+5I>Az`6L=gXItoIvJH}K?TS(+N+;%mc zy@BbVqt~My0fBuRExboIm|nXrjuZAe5)pl-k1Q2*Hn=zJQ44OrVz3CTv>yhiqYwe- z*<{?RB~`lA5th?Sr4co76K+sV@s-2-33lu0$encXL$?wnK?rx^dLC&4Wsi+1!)x+strzV;r+^HSVxIWY-!YE;A_OVi}!O_Bz%5cMq}{utj+Ihj*A6E2n-d zJf=QDsfA4Ng&%z57u>JL7QT0$f&a-r|KTrvL4{Lk?~lmWfg32GPA+4e4}AnXN(TbzN)YDNzF=9~4476ctF5=C?J~`)eK?R08)7E^ZD|yv_?O1)JNYKaM=qy~)gz#sAOS7^2kN(TozI_bm zBc5A}Qk?hf+88vaccEoH_)V#F#qziymCt$mYjH3%C(s9f`e)y{w*z8ep|WN{9V3)# zdk<{`H`IXYCVfOujvK9a&!+Lp&-}tSe$8`}2igQEf3$;A_ZwueO3>D!`b<@xy;Op^VRWO$|;uL zxPS|x$$Uk88&qvL)v2S`Y(&@=EYrn1dZX3mtpj9ln4Dk|O!Ol2!ES!Z4?Nru9XKbW zX}qfs=9?)Y;ig6;9qOOrN6osDtRG~(SHe5TBOg30i}YHcp*Nz)nOCLX9LW>)Iu$Rp zWzP-l;c+vuY~(r+5m1^W$e7Ssm&61GEG>gzA4ZP#W*9@qLdUW2+JyGqB6caV4$J;Y z=@Zx)!zQN)&njMWpc~#r!V#LgPK#}q$GlGtf8uE7z2bv^*ZR`H{n`RuIB(?he+aMu zJA+g&v|n*twHm!*kX~Ms=id$$fSl&Wf6i%A&WCu6H0n_t>e-n5LDzZqh;{mw$30;{ zK@?u!df5Elo$&DlPJ8ff+#uFj?@29!8zr9G1;D|4{`Cu$F|dkWb#CsW$edKq0`~2C zDtAS5mAlt{=wacadrn0gL?RaZYPfr?Z4LbXx%&B&X~<(Oha82q&m=8wb|vWh^OG_1 zAYan8*ROhJlGi8ur)*8nA?Z39WNc5pv!hqZw3yzl^RI3`GveTZ~hT*r&FhGt26a#~3|3w3q5P(tRXtxW(o&r{zIi3fXB}39Z zQ)h#>-Fgz7>bVsxW4fjygd|I}UY_#*DF+lnYbqSV# z5ejiP9Q}kt6=Qx!2hQ1G(^grd4*+lh-BeWxoXm5cwm90Xhx5bg6o^`%2ml}h00S8U z^aypg$2x;eBse3{S}-_l{j~%04s3!gH}_D|_DWb>TNuwzYtN@^Lzn8RIAyj9cb#iV zP$sXGTNINaxi^|;Jv1|DS)2S|RAED5u8r2DQ^fE`;Aew@tJQ%go0~WRs;tJ+%|4L| zuTZCl5B2l~ZyWC1s+9|IF65q0f}C~U4g1UE*6NTyc9Y9xMZAnA@F201Rrj1?L-YrX zqDXlOcH$c(41Tg+o7$eYcGycZr~GQ-Qno=Izz4w^?-w@3%fxjm5ts`*3z+=L3M@TPYA`S9NyrU8a*mEPfhL_0f#X~>p}l5 zoISM*)+mnPCak2Kw=AxH%DFDJ@ee&%*eIEWnQ3@L+yzjui!5rSSf@Hn1Ct5L7l{~{ z311yU=XR6=CxIq23UHU@)oOHPG#-mMxZ&o^TIbLnT+j?V&9{=EbCz>1#%-(Mow1nX zjv8{!mKtTkKhp_K7W2lot1ToG9_gMt%Z#YGIF_GSON6prwwct9Q1|!8+kRh^)P15k zLRJltaxEN&kzUx;mO=Y-@aPw(b@x70O zMB0msgcB)vwe<#B4z@Rr$&)$CkYYl7KTXnNfoFZ!YDael_WWn4lX8gdF&J&R2eU(T zICQkeF?8(-?%%jo^WSyQ%~k;`Bb!XH>u@mI@o~NZNN(a%E=*F*xhvNG@5#;=S>QFO zk;)f01tRy3;Bj-)RmHI*Wj9SQWeXPqw_|-Nc zwz$Tm5m^>zdX#K&)GXmk3oykG7tkdzB=`ZUIAuS?yngq3u*(2gk2cSk26M5)_7ENuM zWAvn{UACMf8t}*vb=;|MEQg1ni}-!;6i>KaQ4ZjfX&n4O=;d+6+<}-C{QPk(l45Bo zSwnKhezX4uvp6vFd-ZR%E%scr$^KTX+OF%GSRcC8=M(xTGS&M7^|RS#u1!;s|M0JU z^DzgA4Mr2siAz|l({Q@>{??EgG;g9 zx7+SWdPtn{`+hMTnfRW6!Ay>PWx5D0L|C!ty2aTLh@vO+WWDy%QB7=TiXO5)mCmgC zYJ*%d=?H86|YhKWH1$Dva9;X_Zwpi|Lyu>!n?sz@wy*=lv zS3eQ+({5wp=6qF0J?XHALtL)8Qq;!L%zleu8M|gbi^s9i*nwY6c@CfENgGk-IzDY3 zpWBs(d+xJ@xPm)kNJ|dKV~sfGCGab)&{Mmpvd14wQV5>N^b3)`l;ouW0fnoB`RiMS zw~od3*UGUMx#6pKh|}m?A^p62D$_&m_2~3oS#x$ip*`SL)<1*ZY#Mh*C2?@ei{(d? zf;*FN*P38Ir90bE50yQ2a={%J?n+KwLunNAM0}<0eR4v2fLH>iOIwqhlW~&UL=vzm zq3J|;h8hj{q&~iwp6>Ov7V+@CsfR0g>&COhU}*F_uC=Ji%-l`{$^J)8m*i7^>C~1+ zbA@WT;EAT-nx2T@(GpSEyuH3&cyj!g-+qF)pYgQ)KOf(V`iZlX=4#S=x2@1|m^mpK zX*jS9KkT*QA)}(i;?rRY6(@j&#j$Fn(oQ(f9ZVvF^|D(lfw?$4v|mkQaRs1p6=*uJ zj)1Y;mSBI)f?@#3T-ig1TEmgA=E=HhsGtXivko0pyw&+CkInyL)PDF6Y(J-P?b z4447gj(cA2f}{<0PKU@ED`!UsjAwl`C{Jj!N?r)M6)zC-AS4Uw95J4t;kJ4)Mor7X z8UZ2$9d#@;<^j|LLs3aE=1_B1qsDTHhCtu0dL=KXbR0R4qX7q*C(ce z10oO$<4935gAXjAUXAjnMVq7YeP9k55=e?uW`!(FkbKmi%{##Kq%{B#)VD>Ti%%nN zfC3a%)VzRYG453tcuw~HLG8hFe|xLP9v+e2(jyB5{Bnn>;K z@C_$LmgcaO6PrG+wH*jtoY`w!Gxo6bzH4FV_lnf`N^2)tW^o9u6as& zI-v5iMsliC1HEAH;)CPPzz#In83}z>fsUDWSW2URCcsKQujTIb~Dog3EcZ z_JPX0Vw?#EaDeTnLEYb3PLh%|m{{S1%V6$wAQV|AZ-ai-rm%8sQxu0QZ<;-9PN5kX zp~{H^PXm8CiSon4$|Ak4Ani*9+evz+PDPjMFwodZ@@Rm(L=<5sW*VtLSb~CN z(q+5n5?NwA-d}VsJ>#l;@fqX}+unf!Lgz#`(UtD^8Df2if{d?9ld-Y*9(MLA!pV%o zqDw`Bu0ySJ**D+oRvixc(Kxc4IF~1k_2Hg;%O#xCfwyaKUDH?i?o2u0coClr&?o-R zil;Xums4f54-Fj7z6$j;rwda#OFA@oB4qHbn8CFJCGRH}il5b$bnSZHTD&j4`3CN8 ziGE|6gFNMPF2$aVMb+aNx2<5PKGrtFnde+J37-~}6=+94vtzR+7g0`gpkcYT()B;I=Z zgKX`vfSXOE*HV?MJx=?)Btyh2uKjwmWza8&eE7(&E(nzsb+dnv>1mpm`}}?0ooGY|edk@WfZ!SYN^)FNqn~SRIy~`C-B?l#EYZ z?I&9Hh)D}7@W~7#fDvas#l|m9mygNmc8^OyeZpJ)KsVDN#;9DVywUSE%;KL4q+vq^ZyLpr1u z6#2^s_oUvN%a=aW{&ZNK(ryO4`JJ!k|Jz68g%_5J(7*Zk&6Um8uO(;evxD+)^W|0i zW50s52CHo3d66r3>W8SY*tCZkb3R+i`>m!i`{I4R9It_M+wGK(iK&C>(E5rt>eRn9 z6aB_HWCrRR?pu7YaVpNH!Kc6S_u`3Y=pg>i<85z9(8W_c9r*!2bFWHSN7>zgH#WuM ze5>0%CVSeEs=hJmYqyyIo#Z=8>`B`Zc0Ihjs6Ond>skBm5}Y*Jn+GdcY;fjIuw~); z^#&$njz88hSOL3T^rDo?CscIZ>8RSU=x{NsFMBYj(m;4iL9!~P%1`sWR`pID8* zJW~G3xH^3INLcn!F`jQ9b)VH7=xh(niOOMYx}!5=)#WiIch>3xuTP&evm*oq#R0#t z+BtrFB_8Sd%94)zhtS=}Lm1dBdh0F39l7v~KQaEs6Wf}3JsGxE2QK*S zyN~3NT(+8X?Piey1iIBcIV7C+{%{ncZ0KXno|zNRfXpfa5IdpJ&+EBq&FP!>$UyADZZ z_uvC=Fw1%=S-TkH+5o#aQqRW(riJKvxmxH!EN%C2G^ll}CICqtY0!~YXde)8s+21N zga^Q_f;_{Jh~OBgnrGYvzIuE#R}Y^#%94X#3W9NWETKBUCXhYr@^^aY(>~L}5qBUH zdFHjyQD-irjk6JtqFZsaGiIl%85;TDKx>DK)Du-Sb*;5zz3?&mt$0`Mt^C~du4ufD z&z9Av`Dg;N`nf$e;2!&{hf%)UQ$vf!bl1oLZ%^r{x<*YeA`9K+ysByV(5*&o?#NbF zTLwINJY|33wEu{CNoa5Lywr!95YrRNAiB+ZuEo`pv4`a_x@X(wVZ6cEeTxoG|x$O)zngBTwXp_kKN}l4N^N^4`1m?+%VEx0>%_& zT+nC~{n&T(sX=?_7J5$?XRZtzHEHUrl!5_EnRkuMiHk3TL$29;2_=oWB+w0V?|@f5 zav($c52EA6gSm%`;1V?VP&!*)@C`WOq`bb8Dd;CD{fgIU*J{t=j`^z8YO4oyY2ED# z#)3c*;bEt#p2CSB(}#=0G%(Af6)`$?2p+?b&C8W-x)r`duf~G19w7Y|+y*w&)(|8? zT*XVxW@j-%XQAoKmevLcthiekS`9U*>NEQ`2R-vY*g#&@M7U*?uy`#O<~~p)|130{pfeKC}Fc_6A!W%J*cgfKkTGybzPfjS(AJJsi*_t$5jtSz z(Xhf~hfNV|_W8$Jjolvf5HO;i=n?((seflz=3j8PvV$lsMu*X@b@EY@1erK}EJ|Tl zO69GjBq+8EkK29Oj-`6XUWc|%LzA%+tqd#x1=z3Kk->IE@H}^?Nr}g^LDCDvy z_o|O&Rh?lXcOL&4IITp9@2d^&ym7^M=pS&;LwIIrE3xjm+@EVt_L6|~1_L&0ZUZ{=eg2z}GKRRE;ulncQtRl6>T%=3fYa@8O!S?v7M)nuPTA zQ7^50`h0~r2idL6F*tfnUaaW*WNk|OVAC}+LAoKVyx{&M7%(UUS~~0MUT0n* zfvmV4?|$v~w|9=JAKNtOvmOsojccy$!X8~_eO$zS+tbLZ#>icKZ^!=9rT_RV{}d)b zMgyl^?wfz|o2lzBE_;;=90Wn8iJs=cLS_HG<0o&uy73FMXw{!Yg6mt_S4#p zk@(GvFkSU8cpXLvB6NcG|iXq;wpDPROmUHqNW)$|Hg)gN3)lYA#{tKPPQ^E8( z(&!JyR))QGKr|mTj`vHQ_`;ldYK#E{1w$q#DBcKc_5mFXzv&rXxf?e#z_!vcaQILo zLc{X?Q51Bi14e;^Fqu@5lie_yI6Egz`%OacG@m%MH6u2jA^Pp!Vxc#l^t+Q@|7P$9s)t8|)FWZAT1SCw zD$$YaxiNf=byr1AOV0$Xtf)p-9OQA;1F*eb!!5p|a?Lix(kn2ojJ@LyQnJQk2cp(0nKGei9=1t8dvlwMuBvsB#jLS3BE>+fF}bp>gcR?Sp@gGq4fGI<>bb#E&vShx$Rwj z$jLK956Y@fapjWjdjP?+!0PrW+%t?ooPbD9Icm^mui*)YO$YD*z`#R#z&=TWg+l8S zh*{4@%;9*n zmeiqDB`Ht=AkM2S6zEiJiGu@NZxQf{OGv9B8689PSO%WpxL)-~0zex9s0H8~>&)Sw z$U{KdDZ^@2+Q2M|XiZRcsKkfRc}!AsZLFHE0NEu;iOG z8;aavL+}WB>=W8G%jI(7`VzDXYuz+?gfC7cM`S#h5e5UV(9iXfG&1oHEA7Yw_wA55BL0ZZR2ufvMoccC>u%Hg zam&HgtLff}Qc*@yxN~+*FO4kqJL72j8{PSfJ{M5Ts8~Y!w0p%2Ow5Vx-cPtA9K>{M zf~DYrbZ8#ytYSu|2Sht083oAjjZ|uTrrpgHUV;X>ax)LZO5`0@pgqjado^*o0$fjn z-rBuWhcOgTu@-b{Tx8M?r3iRBq#2zEP{*$Ye65>VZq3JahZfN{I9W3C3tS_~ga=bH zBAs|lFn7=4=bh~CjU76(%|#TkGLD+zwjRpye z*$L&{OhleP)*k~Ge8KXGioa#~=Sb8k4W^Sv4SD&8-L0jV;$>sL51H@Wtyd*UurMR0Y1Qz?583B5A;L>Pz z;qGx?kB!W_jwzDgrt~C`WoKSbyMb@f!@4H!bIU_9-dKIl+42**TXavU*CuldjDYXh z^vW#dJu_5liI}VpZKn6!3S%B&(CNdG3DLLc@}%Dur$e=~I+6*Yrt!}5#BbOHl}Qa# z=M&4g%9|U*mBXx_?KhV;oJ7UyFUPQ!2i?2Z^Uk4idwt0dO=C)U%f#TJ%qhV!A`^$3 zJ{aSktI(tj#!$cc?TT|>UKo1^$aFb3ZCaqCkByK$mc3e&?$a0-Afkc9weSByYYodA z+gawFuRpjSf@{q@v^xrR$4PP3UhP7bu|eBVp&v?&r@och`IV<2KG|D#A!EN0G6FfO ztLD~17JWiT7%F{TbckuDlLCT!nh-yVD&75AVZu1f`DWje_Gw`u#Yx_lQGmRXo2;`y z274A^2ym4$PK612Cp{mJasGO93)6d6et*eRedQsvn{7@e*$>*=j_O&D63xkACJe#U z`u@QO^@DG4^z_33=AO5ZB_H^A+W-5pu)61dFTFi?Nk25=9TqpvzN^?5=@N` z^8Z^n5ohB@Ra3RY`pF9PbtMx0Bg*}})FhTSj#3Y-K)&0D)!<1Vfh|YUDjNN&A`&G{ zc|Dc16l1vpQHWP;^viwUkmOcef%RbnD?@CU`IlFq|7Yb#zxe7u{Ch9`(l7ncC3NZmo>xXvhY4M~!DWyUl$%jj`N2~t!1ReH}#S~GEAqut}l-9d$5VArT zecjQWVHaA0uh2u$wMk6gj5p`QBTwZ;=y9hdkQ^;1cqozCRv#>qD#*C+TFpGTod%y@2H z>U4z5RTqhM_~Evd#dv>E7>K0jDmtm6+1^~MK0GNt)mF-OxU_(1QXrrr+DQwvA3UAP#FS=;=%cL#4K z1J51|6VE(?b0`b%@4kO99X)#I{_g$$yStsz)4T66%H!(WBKH&r>*S!qjCIeM7u^`< zi;ERL@uDXckFKqS4UO=S^E9TCvnYHAOC`ib#a~C$`iiZ$50M7fJc+iW{^-Nx8VG&L zJD=Yw{M$SK@ta@S0dME48yyy`VD%^fAPN{Tb*$MBUBH9$#spo#dfXn6WV$L05jnhv zHu^^UWFq4%E1m$C1`g(UBAYM01URw!vl0caUG0GPAf9^08QfD{aq9}*mx|CAH6Swka+Mb-gLyt7~kP-qDSs0ch7(s$R= z6yy=irl1bcl%Xv-9i?4%t_h4ZNy2NLWh$Zd20d8OLWgk4ODfN%Ye@x-O*@00%;ife!k#*~)^2jBGQb7u4b#(#V?v$5-IR}L3iO(l5b zel!omfm7cIU|XghciEv>ouRy8)wOiivP-42@)Y%``EmK_SSOw4blFTA zQ?JMIS$`hNSVP6Ht*+~Ay;r1B?=5|?HXx8)uaA-WmqdZWT&MRmD2(}A?crHyR)=ef z6!)p5sD-*5?2XVoiB0NoukE_3CykY;v=lBc`+C!8XEZAHA~xAJO2(4yhy4(k9L8L1 z+eKcx^dqcTYvY;5xVO~nN%pwufp9;9&rZnWeCK?)UUtP|A=LArufg4zv-o~{G(GK& z;KP9OcHrxhVLI_(b*~>_M6w?sd&kpZfqjlTrc1KgjspFPe4j7J{gHxHSy|C253USS zNNjjULK0peBrSR)v4fV_JTn62Ls{mZb_T-dV2G!iGsXm$@=Ttx+p%KXV4u*63K{if zZ!D05+ILwJVSL@025iGdv*mAHC>sDX89muX8B-ti3`J%f?(9M85Wfb6BJ73u)Po5R z1Kt*>w1*(WsBly5S*^v`Lco`?^DWXn=9q{)p1!qGn>%ax!t$Q1uX)p^?;sd946Q;R z?~Mw!<{YXo>=o5K$?Nw%Jswvje$2TyQ^HLxNfBy*2qt;YDpI>B^o%55^8~rf7wr{xw{Xhykp*dN;ko|Uh>Xym*;17`Bqe#j9~{nRu@N)rL|p&KCZOKGg=)j^AH)RlNn6I zUSLf;w=nL+qLrx8<=Oonz&8WPU`YrdjrW3K^F$c52=}C(l$aLRomwM`mwft}4_2EF z9CM0F(;IL$lq_21;`5;=ZLe0Xg_dl2loFpZ6Sn2(qxzLG5VpKf`D~m%ZELEt9abJ& zI8D&4>4=v@k}b6|6d+f)ml88KlK<@cU;in{7fhodz#?iWAK?aHw9igRA%_bR(+EtB z_mYFuE7wkcNA5h4#&rUixp`$WnDZJc)p9tFqaE_&KAU8*qt+h1lfNl>4Pa`?IkAWi96AbUx-o)1`rXADp9XwoI$@buiz ztLMOM9X}RlisEv-t&vS@-Sg57&wHW!8B?0#rT)S>^@hHd1^^&;AA-Aa`}+U&nU_EH z<+w83sLa>)XLCgBQmwKES198LY?R`s+=K6aXIFd&oB{yP$_lgmT3RHYrAE0dMjPym z|01ZLzif9ayZhN+{&%;3@+ZGAXY(8w{qKj_9hduOU%nA)yX{pbb>cn&;L*`e`>U*1#^#1w6N&4@2aVy2=CoxQU36H@o zK^RZ%HCLB0J-yWBmO4yl)J4@dSa<(HDucYTazPERE-siq@(b5q{8zua z@%x`&yG-Qg6-|M-Zu3!@daBp&vXL?r!%a{q)IN2hY2L_Sh36 zMpR#&y0(swC=Wq&y%rH=VFr6e5UHUw=T%5V+Ny5XX797_*ia$MoETL6+CLkPAlj45TaV)}p zrjd%c+uym<>n-ISHEuW+_6%neGJAHS|KW$r2kP|XZmw7_4X5nWV{5m*)2elb)Y15K z-+yLMzOpi^#`ok_!ZVM08uO(jKlZx|Pkmu&n#9`#th61|8Q&=H?$jGP0}mchi@G_B zzGL7pA`z^wtCZqB#S&^$BSmvv*Iinw~g>0kidjN%8} z2N#*FI*>jKj%hpC1p(3ADCZ}5zc=Ud&jFMaeYg@$q3$XLKO6U2!D zZyY+gNa;9cbmx2%r4x6kE{27&(1rYD*%^Q*4Y^OgrZfdBi3y?C3=8_b}r+70t`Q*+c&Hb|P7^xAF)joh<~I`rackGp6RpHKamQj>_r< z{?heP(2w(m&Qj%+kH)X{U_^nder(Hov!pervBWqQQh;CQ_0MT;Uz|r z?ue^ypAQ=mj;+Jq1;nchNZjm^SjyDE}rxH>E&JATXP5HUO-W(^-N~Q0@2t|L2!5)T`zyXCsYP` zm0gcGVC^ci%$p0R>#@hjb7!`rnj90|W~{T&XiKg}av)FayTf@CRXv_)Z1i4x!Znwjv3CNk4JD3lF5lRVuhUEC+{oPt~p+!ly z*J0Kiww~W6$G$TVx2MPbXqS1z@06**veTkNkwToC-9{DxvUY0&wOiGHS$X@{DxaKG z#d*haqPG(y;ywW5c@i)>WQfEjL@P=_mL7u1&-}_S{^$!|Y6BUEz&HN#VJ;Z@(#8zs zM!bQ>q?jcyfKNu?{*yoO{a;L+n?lnK&-hX^#RiD2* zsBSwCt7CWy?8V_^HP*fq?W2}2H`+{O?6_M9GP4!AyYTot+n{?~w{mvR+E+$`>aOa= zK`i8io{?DlZLe!4Ii$5wLBNS}#hW_w&QKSQfHI@D9D$4kuvoF51~I$mjn|g_g;p+gu`m zkPkbv1rVfW*jUvCylK#&m+1bf(I{Qo(z|)Gv4o6^<*_iCz&)aVj?i!0$M9O%RbW$@ zOB$0vSo2c|=Q$3fBhVwZIo?1UN@rS?>Oo+ZJbgdYYoNdydbY$~dwu2`3yTDvoHV)~ z*(xqqo~e}aB*apGlV@NyiMZG@@M&2{+hi@@eDr8kph9R?5{r}g$?$rzA28+6&WF+GoaMd@03!}H~N4NGr7uA2N1h}v7|MmUvUe#I! z&>S0}g-Q*z4F(ef0gJ$b+kA~H?@}lo15wHEML{$T4oZ#wX{(h3WoOd{kPuLGA$W!> zCl;6sNFI@AH^6p*6yXvMfPl5Ia{-sV3}Vec8x>vY=s9C)mY`QE9mj{715wrb&?0E5 zSOB^S`h(#?65JVrVQ@4{+O5?#VumGv<_b+Gs>Nw<6la|V34`WrUyRPWs4Jk^IBef&QuOLeTR0R&B`$8_KvJ zHZ=u7FQbn33DEY1pto`!4_R$A^u0u)i)a|25sq@zyB;XubV}@mm*dO+#zMoZDb9_I z#PnR!-h~~gRCOBky3MPK8=*I^F8$9Zo2wf#f#W!?Abk?mIdV9XbRDpeV5jFi@`hITB z;*TgKJ?6P-1)kjLS`y1HVo_Ne%vRvxlt2PVaB$U+M~0F$9dvwEyJjeET{T{?HHMfi zktSC;S-7wsHa#C+K)e>1P4eF5ia1XToWcheeeBA>HizVjFikMbG?{@lCCr`0duzf< zLkvW`Mb|bmKlhW~o%t&yASOq|7r|ZIsKYeuXLfo-D8f z0!bpnb6s|6R-(O;33b?F(8YP2u{nM+V;YjL*y)aS?`o-dGcG;vZ!E^Rwn3aP_UBx( z;*;Y6`T|?)nxdxh4%ES=sg=HPk_*ju4)C&p;pik@qt)Z;96KZ5S&yS0`ea=h(I4;P z0q<3FC?E7B(cti0NrZyozwmbH(<8#sch2=(71S_WPFJ+ydvH%$=`jHcj19t$h`V%v z(D|NNK(pPJ-?=uz)$x>oA9?9_L2hPHnl9h{iLo>%g?gFxF^5Xo-Dbjx*!1Tu-{sYd z-2?VXT;lOqmAuw*_C#oIII3+cU~YPaxX_!9EQTi`q#bQUN85%|aP?@&GHnc)id&t= ztjUG3EQnu*Ab$}w?cd%mr&GO1Bwk(&l=%5n>uxm-og|T0 zT8rz^%@;FK_vHY2ee7$}gNo$Ec+CUVwyOBfg$ss$q;+~n+C7`u@_!}%qhGxFdSf~# zUmhr%q22nVB^s_WItI=jOvq95bJXjhfe{Wg)lSXRKJ-4-iyw#eK@zN5B|XmU%5Sdp%oZzfI-w7y(}1I(EXFg z|NHQu_f(`ltjgp*n)wbcze2tko7;?G=HtAiHZpsejnA)TV_`tW_2N2kSfsvBLo6f{ zxk`0&mA&giC7myM9THB>J(G!5@0(2xyWTxOTL1mq-~F9`@jw5=ul?ztzgzo*zy9{) zx2*lq;ho%_3N$+ZmH>~m2aG%Bum+e<9V1t>ZT%BtF%emKgXcK+MjPH7kU7YgdKho{ zwjMPa9~EZy-;534mSJHndCgelDI*w_mtv9jz>g(|>~gBfKr;lv?P%s9>*YEi91wCD zJ9-*Q=RJo)y1|)BP7jHrx$Yz;^&ic500A`Ca1VG?Pz-9B$O#VpK^Zb#v@oF z;yGR%pZd39B6!F&A+^$NU0}QNAY2;Fd69y%JDocjt1}?0Vq$sw0y*2zhUMY>-&PtmCf! z`em={){i?b-sT!gYx}zor8@`+8R%F^O`Jl2jqvEGk=5ZeYAMyk23$3eKD`1|1vHSc z67{*%ATNWV8o;W-p>BpK!0FtLc)QiHq-C(52V6oXME_jNqKUxPtP^ooJ1Sh>;M)C; z(*-TyFs1nx(^G#IDCbk1gZrg6n$bybA9b)do=z|G05Slh?^MoxL0|r=rt3l7sp5>l z>o}lo)o`ZNEq5;KPCCW_AM^o|R6-+8hmbY_bT`>fs);jq1~LMbo&)vS_7oeV4AOUW zB_dr0`uV*I43AwmLcqG}J#D2)BZk2lCnuk)8hBQwVx8E*7PBP+7FX{tBPBP+Rg(H+~)Ke(oXO-bM639 zP{c9v`aaMfi^p-)ixwZDgv!QM&h9gPH{_Gb5vSscF92F$Db=n_9L|z!v~7JLSP+1DQ@F6eFpc6+LrSI zq4b!UjiU~r{aw8~Q9Z2GVX}AesjniQjv|`hL$0`;dGow~-V6|ug6rOaAhm~jW>!VP zQ1SV~IyjAeU}V*BFwngO4dJ4!M{nqt2cKebE8@@pZu5MqL(Zsbfl zp@#9H!XAe-#nnO9jJkXxWZ`k%<{g=gG{bhhy^ot5nY!qWSKaKWJc3%`@I7~69B+>Y z5>{>uR_0Tu7^+ZBuG^vq$2Ujdsa^!n_-r$#hnDbC{kW(QRVj&&Gm`9Bpf(B|MRaO* zWKak$=B;S_BYQcAt1$H*v1dBkcI}7ub zs;BIkX3N`LKax0zE_-;;56s#QW_Vw-CS+LED5v~HuMQrT+Wr%YFDpIFXwAqcv^$L5 zygnPc`TI+%?L$K66H?&-DA_%QbeeXla=R7J^cq z`neauxqahLt|z7+;@+^`bT=?oVcfHZhg4_Tl&uAgp_U6%rq}E9cx+>i_9^a5emOh< z(`a=b%||l4YRtxxy<;iU>d1JuX49*QmxelAC2l6okUg)ty3WD!~cGQ8oJ_cz5 zUNa)eVat zcr50=DLaI?JT70E=;qV#dG~crIK5QzV0E9>?S^8~q}&DOg~JsI_(GqQ2JN+0Q7Mnb z0Mg2&@ZC^vIO8}&gz6lcra~{$`Frlx@ZpEh2YFXrJX5+CsHt$>Hyt~X5#Y;5D-;HMkT;+In|Dq=ai9RV<6aMJ2jVi^GmaH@ zhq<^GERBKD?-szFn^&*?+JFAfDv9Eg{XgCH*FJo@`P!F)H_dR47#PrQe0-|k|D)~v zg-fx~%J)rrU=^@U(Xh#(A&_tX*010HXzyZVb<2g(&-VI^xb=#gQD4NCcKHZJ{J4v9 zfm=VVD-m~aztzN^b?JljQL9=Ot8_UmF?QBJ7>%`JvX?s2W!)>O0>E=dMHWy{v&Ehr@mW(|maXJW zUxOIF>>5lj#w90ifLLoST@LxG(HnRDYPFHhyhkJ(ulS__xn2vbA7+brtj#_+CgyiEnWeH!=*(;mGtTk=&0_l9m;0ewC7Jhs?y4rBKm2}SWn z(~)}eht?=#Pk7v9C*-gk7p3wpcHS^&60Mn+fci+%^OcS%WDO+%Le8YA`r4~>*cGcZ zRdiJX84z<3-X74LOoh(7;U}mX0l7tEnJ6^_(Xh!GKxis3aVK~-OXa7aCGb#a zi<*VyQMx!jvpb~dYx)SR!rY zga=AJ8n1A(Ij(2mRy0>$uS`aj(|xl2By(j2O}y3*z2D6R+vn-RQJ>W6Hyq4qd4I+C zQgs#`kv{4;m)_5ZxsQH)64uT&veC$3Gn}__HW;NYf~KoD&RCtG=$zucK*DiYb($7u zJZQox+1C7?)sm2%J@Y1m3!B?}t?-!;Cu zy+E|_olf2d0_So(UUdRXrE%3}#CN+z$@g zexKuj%sFR0A7{_5L4@vuc$a6jZx%Lv&-wUdE9is-r>UCuiD>8MzZZ;q$y5lo zJ2o>oDIU#EtdLTK=MruA4%G^_8N_hcJ4I(c3TI=cFn=7HeTNIhJdou&U24kBXCIH7 z`Rg5OmrYr&dfz+oG#dam#dt=WsgaAMkYi4|4yc^OCQN*Q1*EtTCAdTPwX4E6hJM~@ zuYQ_hok=vvQ)Pl8nbB-om=@;a!H_?NYgVkM=$6yLcE8}Z4QN2abFFj`4IKKTFK`{# z%H!)R3_RkaSD^*E-E3FtDs7C4mPvJn&_o~d!;^WOhHnLV_yu<9UaiEmblO&Dn%zD3 z7)!5d;ZfpS0F1*vsu;%6VddE6dzwn?&~Yn<&EVv=E~L!Mb3;4|aMGt(k&@q0?l`N? zR2dcEXbHibV&5O*=Wc|{+hVodbi61B(ZuHZYqG#Cli6?4OettJO&5+0*ipML&~hbe zVq1x1+%^V)A3m1`ZU`3@CZv?7zFufYa-4UEZMyd?r=m#5&tpYb)LFMhgJn+@q0)%l z<)yB%TMg)Usi3QUsy1Ck*)NCNvpx4ipW_Ng}=HIgB`4g z9I~OO!M9iZ&i-fZo+8#QbiO9dM(|^JWP-740X$HIsrczh9pIM5Q#_JVLO)NW|wu{t{<2P5$=Ulny(^0Z55=MH*!uyoxcY5C#)d zF`Sy)#W|@c2Ty>0k`*V>;**`n?g@6FN74J^YK?sI|0f7P;BCtjfeIlS-l_5Z_DcJ&>^3^VulWqQiJlzoWS1+wm*^@x|Zy)_ecgul&K* z|My?Lw&o3AB8J5K`~T(9qbKi*(Qe*kzsW~(5U$4n9+=W~FW=$_B-0F;H^T-9vd^;g zlSLadd`xr$FgeZ3yyA#lkB0P@%XUa;oYa4-GGzDLy}$~?>F|XYn!u>5;%~fnUqaHW zA(kqrC-ant=jqoC7Jym6RXRBSjsN$zD%-;p6+D`P0p6k6bSTJ8QeDq2Tb}!h5%ayg znjM-mHTSo^@JGKgmjOONexrHXKKx_v#kX!igcCs_+KpM%{&&TbKRi})(<>KWVoAs? ztHTq8v6#C>$N%^*e>VPLd<0cI*VQRk8}^AnNG>`QWO&l~F8KJJW)n)jcJpf;`hs<4 zVw+;IR~Z4T6nDDR5OQu%(sTOSWEO*G0TXl3yr&&~^(!z37rx)@_XkfK{n@O8rWB>o zKL}gKwAeltN}>Y-#@N_}hLfCgRa5<)^HS0g_1(ySao_o`77{-;t9vZi)o44Kq0{86 zE5;?tRfk@N18%~zkn3xX1%92}4jT?-t~+?&PK)Qg%9J0pTaK7#+^(3qWF05;- z1$-+tjsp8?5jZFbrgd{fV5vT66njT9vS2)q44|W3$3>##yoY0#372fSpMI=-klHHi z>tS_7W5@+Mln=dM`(99SOdgHo5hJ)V9-O6ZYsnRfE8WPTI~S075n6Qe87ULOt3emi zN}h?deHfi(h^Qd*E05fjdG&G#rJ;Za{{o}BX*<~# zMjrW+VH^$GP{($SIO2U~_7NnBW>=n3wN)A&g3E7(aO{H5e07StKPbw;wbvRR>ZGXm zJlz((*TW_-6a_nRJFQInPH6TDjJ+=4ZI@GW8jkt!g4xj_eZ%V3! zsX@;a^i2iG5DA6|TO~5AI!HQw4p_aWCCozcUTLA@>3l+#@BsoR77Zm0?*yJ(FYP+J z_P7~@QqVZ^5U{g5wc*L02h2Od_g}uay^MLvywybR1*z$g{-Lik#?Z3YN6zBWA$zT% zzRz3s&2Xx}0iR6eujh6eqa)AZCTf%hSaKYMrJxKh(_3}Y17F2w+fm$oqU4o zd!3QmCknK^zZk?v-gE18{m35LO}Nk3r-%ceRsG|~nMQ)2lk7;j5oD0+1YKTr$A%@x zW_NC{emJkiO{{>YPe?Bz#)jQQv+tg!&Q7^n38IQXPx->V93jD1vJ(pa{HRl=LIMGS@z2NIx-5RTaf;G}fjp4g@pEyaf@p6V3 zt)6SQ&MDOcdDokp#q(hy?jKDmV{HSL>$c)1$Q#X>c@WTtcS-Fej4h3(qvb!2pKY#7y2}F;Tth0NLw*sLg+p^!NG`a_o0-qd) z`mngEg1Sfd`3arD(V!*M)8GY5UUInSeN+-252eo5UaN{dtzYPm)}v_1R6hrzLB!zl zg1+L5$=ukR6%0s_8fY`Kk~?AgV2HPvnV{)Xz+8@XgDn_wQ~AcQG$e0vQz3B7t(H~} z#7ex|U3LkZ?A1OAo@g%F9WX|r(9@YW_7E8sR@ohU;F`#l0f@reN?4hWp5jWGY_FyE zecECq?4&4q8gTZz4lhN_EiFVNYhLanbzA8-!+h;C@jF5aPU8R5OSjr7Q0LT{h(!mR z1mb;d>&tF`*6DnhX9Cf}6POfzvgvAYpgiF$XKVk>DeNRXEJIxJQ$V?c#UxS z)U`*=vv6}10V_rq?uGWMB1u0ZIc5~Fpvi0`;t+1eJ-cVGnEj5!-;y3lkLvrMx(|Ny z^Kbmrmw)}2zVMZ>+K{5{rAdKzkpJ$T_ZxfG5&Q{DUB2|{#(+1V@t*TDS49-`O(R$D zG;lz94ZxcZOSd_@!^riW2B}ZYUb!+15ynYlf^h!HLi+TDuRWf7x4G>(`Nj^nduUFC zb2m?oE`rkOU)&3yes}eCI=Z#~@X!DB8~?}mo>=Gn({GbJX#Yj4u=DW0{psB{u=(=^ z4@iGG@Ce%677>HBD_pFIDEfAZC@xC1)q*oP-O?+Z&8Uz>|Whhfk@ z1wWupC*ME&@Xc?q{=b zFfGiuzo4;i8@(DZKdhw}QHytXuet)R=q&iMf5E1e^S0Csyxy>UaEM%HjH0Fl7r1Uu z3eYbaM*%wdN!z3rU^v(!ebTJ#^vg|I^x&nO zj^qjBbB$3LJ9D&0P=P)!Xo_Wt1W%2rhFuo$5&e>pOJy7GsYQGgTuMD~MhvF6+RJ$1 zF%oG3;>O(v*Z!IPPktd4`%L&VpP&EQ>o3mvumwbQbsQ~5N?yE5Z!IN-Iu8P$TfD^nZ2pqh-AyZ7tGakKH}hC7|EjIFF3G?*w$KkC2!Cv&`%8e0 zMMQ+?e1fw?xTgZ6G^ z-k#HXy~q?eBp2Fq*Ggtw5@O&`Io9V#_Q;`@1%Nt%HvyV%ko1qzapg<_u^lVc11;d0 zNc6f`g&3&H4;E(t_nI9-xr73*E;=|6!!Qey{PV0~gD@+z0^&R7a(xSAz%xZa9Y6!T zG}oH&THgUoJLHKu2>8VzAws=|1+Y#*M#BAU+wuZd9ePxKY>TeJP{kKN{3Pc9AOVt{ zvIcN3S2rS(y-ghUKklc!&xP7(*5Hl|VLsz&}aIUfI=QVrr$a(RZ-1Xs^ncQ@5Q_ zdNay}NLX@gk|qzm*b_?bI~?Eg=x>=Bw1Z$%8(T=R!b}+f^$FL?mE!vH zWL)=HmlhmH6JOSXoTKa&&(ZGE)@rwGL|XFya8z0S)!b-%5KI^a2kG9xhFW~#@H;1f(4V`;poxsyk0*tIjqArn{< zv`XT^Dw3Mw2%2~{LZki>6QZrERix$Mj_AL2t&BH~k1X%Yg>WhKgry>@8s=(k*5*zp zWhe_Zd`JrSCC8Iq`lIf4JQy&Y!zahA;BN$7Nvy)r>GqEtMxqSoS+pkvAWD;pr|#_M zALh?pNBvu(lYNT+p$##e9)ocr18U||ekf2lLxI494`lakhgld|UWt>9+CI)kb)XuX zE~P5!e*I#>TY`G3%M2L7DAAY>7oSe=COn?;$QSU^_^6@~i_;{3qYWdFsmk(>Z6^V}pJ_N-A!sLC0O|D&ApvIcv&ihlZ$S}aa=h9bmp1A$ z!3KCD{M2>a{qXRuL4XkyIuExtYqcf92Md5-i~=stYt zz%dYQOlgN{8t}9(U}jnW9w}Ur;5*vb6Cn0Pj@(lG4H`!Qgf;KxoXb%5l&%u9 z0J?iaTSa_(EvfBR`O)qe^0Oc{O__eO@gUxvAI@MpnHVl8Ep&uKx{HeKWPdaD(bXp} zti6Fg8#`aZ*vw!w4e%k%FHk8B9v6AC*7F9if0N@d@9Y&*fU>}5c zd^_sc{>rQKzw>*wJClE3HclEfX*2DE?Lb+L0sY`O>di=^7s;hh8}Eh!PEyEVv)Y56 z+mlbE^Ir|kxxLn|`Ofp>WVO>Od(BKG0^bWU*D8F1jSbhtD}Au+||SheDW90Rxt_Idiwv zdTT!5oZje-WrId=0#iU2;VM>P#H}8B{+MjPayve29+F(&v(h~srk2;iU|(N*QvHi5 z;`R2BZ~srf{k@$JfA$|^E@qLK-{fm|>$~s1*Qgr$Q#XiJGM}*A+-cV@qoblpAo>1U zIAg;yjW942|zw9KKaU*Hr*aq0yn1mX-oEjXOnM<4?a5|b+0VA z=QmUU3hn3B_vQ`d^u3ciFTD6xE-#v-=f3Ubp853kJ%rorL%Wx5Z29Pp;&pAk#|Ld3 zBHp_n&ibG#1W(RKv2euQew>`jWrOzIB9H6wNZERhj?A&Zy3s&!yf-uAfCmO}%ZL34 zYbr@D+FKv<4G=i-C%W~vjaOV^-h(N30>B86G>6yo>+>mT){pzb2~yt6<j zi=&bpvG=OkP44l2!Bd?x*ke_>&1VFjOwlbuuJRY^3G`$VSP#d~?#SGNB*#W|dGjK9 zCM{A>sUSB6G@fu#%A`QAh7V4=g$;96pi7;J6_HF>+d%_gKmk)JFH=T=>Qy z@JUR^+7R$cY%qxk-m(mPc~C4JNiUP;AyAHjGj^CS2zAGl354+JrMaKsdvVf^Y_ulTkka7K$a9)Xs$yL8m9QL!m8-Hh! zdCp90E-vCrY_tr5=!)1C!K*1bNlPoHrskxr`(ixDU{>9rh`L_M1`UN$O- zE)6NLZ|_n8<-(_tV&g!Z0_h^iUhK?){5Kx}{+oMu|H)lHJ|D4x90!otM|^%DPI-Dc zlLdFngXp|3g7kZ$A9Vm9045JkQPysOvq)TWfOu&NQ3IXG3Z%6#BzKlgW>{W0&;3XQ zZYSlXMaAz}s}uJxAg&WYrRs-=(9-4n{_{~Z8+;MRKno0mUAvf%a$poz89*HDFu`Rd zK02!TFZuxR=c(o~RSuaeYG>+=3cyV>K(4vHaxOQ!;|C*o41$0ikB&*&D1hn0w%&6R z{uSUka7atmrCIFRY&HdBI=yC%_S=IQ1gLo+Zwr8R1Pz1eb`_!1M{$qdva;f%DFG*> zAT!t6@4qPrj>El)_EFTs4VlG=AzhhW{WGw4%Vc;rDK}uKHdejkOx*OBb>|3ZiURF04M10|sA})KouZ zo;fPl#DEK~JwQG`syBJJa~{G3OEQnE_~ur{I9t$RZtj46pMD~cpVbs;X#o&VTl>t> zazW(Z$enaAri0jY((ZJ_wcJ^(F`3Ur8*nXph72*L1T*udA2cemv7WvOPJ5#aH5nca z4n$T;hux>Q(xZJo%DvEL_E&2Vv8)M%V78oA6c>Rng<)79wzhe~bbD6{I+WCWXnb31 z#f}h~=pBZY*oQE-L=7Ey(Th@yn>C7d&Erk>i5W*iF5WzHc0z?%4@?5=wloXI9FEzs z0tLsXeJ)839xiR>kLnOgZFQgTt|t~JVy8HJ(dR`AIVkVIMnl+At6)1TdQVMDyLl=ZK1+jT2D)OW{m4; zA77&lc}9{F8o7Y&yF={0_tY*s{CHr|psZ?>%S=jL4?2i%PP*gtq!(I6i%E;F`NGjk zA4S~d5Q=f^*lv!NiAbbzqy(?P?mP8|mt9w^G*UQd0Nf*a<$xjJV~$e04oedzH75fD zo5;UDC_b3m%4I+QicWfj*xo4BJJOG@rHFAHX+P9GX=^}r=*kDfRP8J{ZGI3HyGtfbygj?v? z^ejecXuhHKMYQ`uU}_Am?Yf_6tIfb-=|sz4 zzzX(ZWVGa43*R@CoJImiKt_f0!ZfI)ToI#xcNt0zhD0KR5|}V)K`0KXn$|}xc_Dze zDoY`*oqNw!ux7+hcE2aMBKD=qMv;XpG`u2$^qz?J_t zy9vJJN;_s!Z{~66faZa5(L$%Fw7g-vz^kioU@GX~z;Ua0^senOm;cXS_;MlWZQlLX zgL{n?+dt4P1$I|&P32uW2bZ40q?^?H-Z!s*e(ue``!D~-Itx_#?|u2-y#BKX;p87+ zr9Cg@b9=l!qHHks&D%d2#X%ni{joJ>0^JGy$-RwtKmN1q=EYaB`NONu8`qh$Pgv@L z6$ju?3w1OkYZhqjm*89EBVK`DnaR=D1G#o2>j}_bN4eoN=T+-u?CmM}M-p`b(Ere%_nL_@}ue z`{VbYm-O**%mr!!X*r@PC@`9X`E!9*!r@E`xl^cI`BW_@mVWWZFaOMCYtRR7|D8X( z6G(r3{mNJ}2n3ZrJxOTwJEi~n6!}^@c;R*+9qb7sH|vSX!TR&RJn^1fy7sjzcFNKA zuHmU)Ezhppp3W1iQ*`$1Nc*j~f3&oe_Z3H0idO>df)ga>JeELoET`A-)HTSR?JNtZ zREHix{sn004zRE(wYx$lCL#xRXTIaKk+z$nX_~HuwYyC^wC;E2mVE!Hrj4T{(g8?>?FJ%huv#J;DsCP@Q`Uo<|~=Fy}M( zj=33Ti`l-4#B7+Yx;YDPPqOu8C{Hhn-Et%+?A*FmjNB2u^Xf>{uI+4+Y4MJ^%S)Rf8 zEWOB?({-%VhR0TU^_WMr;I!9CGd?ov;+qd)HFp3jz$^7k*Vp^SJDvFLTYURoi{H>b zth#fq3%>2UhcrlKRcgjT!{PNfELd*WocKxYK4`gzH>An&^3wX+HRTuxC(`McWL4a_I37$-(hPWvl9328sT)2)P}GX^ zL};$+e#AY9uB&R%&_ZZCf9&dPY27{2x$H}WMlBe*F+9`HAUpZ$T5gj_I*?f`O$tXh zG|$}w=MCxUva7Cvad~)Q)8Qwb-69nn0FWGZkW8rGvD{ABe_p$vPF%J?6+kgo93&iA z7o!4T*UjZ%OO61c2={&P2|zA;bGlc)2&mpUAZUju$#vh50Sq2>ce%Obi(NzqiA-?D zTV z^|J&{ko%Aksl~4s=f^}4bdU^{Lq)n?>Md1aFDV)GfGT_CdR=kdAg04fh&TZ$#iL?m zcGY#(aUH3n7DC^`y<}9PJ<~Rm>Q-59geX8#3onGl={&R-vNZ&UHc&hsK?4ERlyO;s zVo#ry*=f<-?Kt4gH1UEN5$=c0_gEi-GN%!;Av~8n&bxDtnFa+XvZ0-&JhM|>n4?23 zhun6vPt7S@uBym^kJGU?r|XZh!U044pU%I&W!?WY;Y?tc>aO**>R_XYYIE zviXpoElh*JbUApq7!N8^+f6#W9hlz51%VRMDCCU?Py1agPq4?8usYoue6)?F5BvkR zD{5v)3O3iP(*>W+RQUWy?4&WXwlDO$W3Y{NYRI7J)^q*?OZ;TusZN58Fx;l6&Osle zQ@&_~9&D-kG%mjg3Yy!yDy8ikt=SX~JlA8^l1%Xc- zAX8+)d8Rl=xNtAke-yF#CpDofai@_| z9&%iW-NnLq#yAfP)UsE)hwbKwIm+%@Y?u1O-gds;-C>-JUi6g#-u zG#UmyJ2~hwPsN5#VG}{YfHmqA4x7|PCxwM4}ZJ-egQ~;06)K;e&^Xb>f>%M z-F)RAe0J-lD_^+r^$YW_#Yv#SXJbV@IEFv`qkF-DJRcWdzP0?;=9QA*bRPZQ!FLLG zONY^25r9c|=a7n`dAfmL3$>m~G(RItGGNfMPK{+f@7JB@ziJq{Pw^4 zy;`3?K;rcwkZ~(cMS)%z>DrW`phVC6U?g41n1`iscJz7^cY3#3NV}7!H{V`v{KY{grBz5PETfk@=3BWB}31?5NM{s53n-ish9X!Gp|P_*T)(n8>;9 z=OdP`?bM?tv=2PZkpmT0OITC2O7WHOXe7Tfz?OJ2k?a)BOPpi=@StbKGq{>h*rAE9 zauV^~@YbEw+-Dczl`jZHY%VDn_kKW_yWHD!;X4ExxK!Tdj`l-hP7!v3Tsg595JpQ5 z#hf0KWN-KZ&1M#o^@`tBgC2(Oh zm+ge^X*$Rvl}Ks~3dFQO*Gw4z0RR9=L_t(cc~Q_X?0%!KCuHE6nsiMK1rvS*)SQ41 z6Rx&RQZ|bLFc}3s(9q?6J{!NrfgVG}LFv?#4PCBxI{_0J-2uUd?jU243FP$F>;^FH zpn?w)GH}CiJt8i6fT2%N2XHyHM~2^+02SD|_W|^-a#;l{OjyxoQnzb*n^EtS3V>E$ zP;lU=%}zrg0-Sja11JE{OsfG^wjnp{C(M8_r4gFphC|Fv*MRTg5dc6N1H%}uNK}x+ z9mxC>uQ9}!5j2JXt3K<%WHsqg9}G}p%6N6$)c2={(A*V1#=1FlK?dEbY+R_VoWC%E z@8Q0vvX9rBm)p;C?)IjLKVGU|MHks6F1oQtj#B#pSisgap~$1$g*IyxOFoM%)EA=n98Q`)f{PP%&D5vDFSAtc%* zvdYfJMH+R8ZHXQ)KMv)6R?@^jLZt*XGCMG)>*-6BZPOzup{g_gc$E%v{@rQGC0U+9 zQ8RL5X*zSfVusd0FqbN`6J=&MC^6xXhg}pOsKP3op$pwCz%&U8Qaw7|L4q(E@_xTN z(zB>%x)+f!Z>HokdkrbD?~2d(x|0(IHxDJQ$z+cs^g5b@=$XULs}6Mp1cLRl5p2_; zm>E`aRkWV?&{Nka=PvZj`rhqoT^ffOun>?AbZwa&LCcU0S=J z^e@FPh}V4nETQ`lvVA5l#2o5-aV$1Ug*qP3wEaQm=w(Fc&F)L;L~TZ`lw&MdRxz6F1rK~2yZK0u zh88!jV6a@s(dyk4iM-O6th$TN?Cu%Y08*zS= zN!f|)w%BNK(^hNLoAodlt11+n2G6YQtQYvucH}UBqmQ1QC$ror@1_Gn4EE?oaA~~y z(DlGG2a)xy6nFUe%NvNo@Y>@V%7rfHJ7RTM-~t=R$?s+lYdlyM&U*_r;>f#~I&ihk z+RR!jm&cKMYc{p5Wdyk}I%zIXh~U{|O_`3G66u;wopLblF;awsUZuiR+e6B!UfOQn zR+&Q}^Kl7NTz~NZJwU?0SDnh?op1Q=b%Hy8S^Tem`lHjoKx>`$Xy2OqbUZrlogd?; z<)e2>e{$4&(C+_xv-D{LdGDie#mH9d?A15W+!!%Hid$ejF{(+spB&Q3{Q)4HsZMa* znh7Uu6N!#4!|LT1f4^O-(xu()KhfpK&;Os_`{jT2HY30~FbrPWJN=zwb#Qm~-QT7g zcfV6_ZIA78+p8c<%C0ozpMhXajZ>%g(`Xv5X-+6P)vTEef?W zM0gHa@YKB(fV8RQ6ESj88+s~#jm&BQfcT95c%nW-J+2TG3*38!x;A&iS+Kg9GUtkT z>sDtrX?c>xiao+2-rVZTUtbj`0B~UUH-2|G`lA7XI;N(C0_VCTq!~RO%V^OOpMLyW zuF|uTy+Y6FIDuQ8?T;J5zx~yhmJ%Moj$vSSsl}HQZ_T~A>oble z5TnuTt$%fU$bWC6-}Ws=hO-CL*i%s?nGdrjOgfuEB$gZ$kZ90AI}6hzJN&&sQgE{8 zD|78MH-9>8?UEilSzdXW^1yPF)3a#uVmb@lP+Bzrd-C1ku{g|nEJ8)Cx3$?t=QnRcZ@kQqT#ieBMOaJof z--%~3*HLLPH7!cZ#p^P^`psuhdETR$261xHw{%5)W1xAuh$HMZq*d(?k1SI0B9jC)a%R^KpL@R3D!~9dcLlYa5Eq?U z&^Q1wRnz&1I1cn9a`Q zSNZlEBr+SA?ZItS=e)>VI;x>P??`tt)U+s$P_>D>;wSfpBBCI zFghNigE7&^sfSs>hiL!-0aHOJWDqW7R&d5nbBZNXaR=)iB}=|=5Dwy9Q%-SC;%(Kv z-pZSC{3WdL-mp88i_|pYN8^^a@9kb~Sn8V5rJ%Llny>wEy~^Ie^h>ye8W7|gy%<^^ zDwG0@-gC1p6#V@eiozqT+jrrI3m>w*(|~i`fgv%eP5HXu^uHkhoAkU=CGwfv$fJpns&7* zg}IDf&!$IErV2se66zv(`KieDS5TTBFrM*Foq))$F^YFv(@?w68op>w8Im@%KK*6X z4YL9A+%>w2F%+ISzz5w%Y_7>o&q0FuAQf>`>dE-fq$dgo1)JwJPxpjWhb%I$MVnM{ zOpUx*1sYR((=fCY_St7tzE{WKn2yY7<9W$@rVlR4KC~ZreRorG1vragu-T0{?pmrH z8Kk2obk5g=v7nE*==)FE496$?!CJXw9703AK2z}a&SC3uTtnKfCE@0<~M0i6Nb3p3Wu8-NX z2@X=}fz!&9RV;)mW8VX&Bkf~B>&zZ%qm{u$uh_p%z)|bsXE|OSH3C{S+ATQ~j>{(< zZ_h}aD3y#;dKPKw%hYG_=^d-vYHFV>Yl{`p%PgOc*{{$$5IL+&7dGLk%X(_v^ny$h zHyY;PwYL9~L)`OZM$vE}n)b&`!FhQp>+Jdy!Ad}lVr-=3;1q3viFR5J^eT^s!yIwY ztMN2YHP=$RQN$&UvYGlcaKibgs*iJZPUMgNqwf#C@$c%t_kQ7_zH{FjdRp%Nm#6>R zKmW^}zy8y*dGP#4C-6>vd)#S0t(K2jxkIbeu-!jB7AaD)~Xl&)i$l;h9|I$DICx82Y{^Hdn?Q~xF$ut1jm7NE> z`@iy?kIM>1H~!n%uJO*t_o&kfyNzw!q*fPKFE5`Kk-f&rsr~rv@BHAv&=RYkyY|A| zn=j1ILqOXdH_ngjTT%Edm0k-Cd7g(i7+b409cTkC`?sVqxSt7^?rOqx7;%>q0L%pY z_O09#8EsHE#LoKF#!y@9I*ow`X0n|ZRR4IC z?PA({j;)T;RrE1D-Q zAUDgptxdsaSgNDCf;E?4K6W@9SF`#8YWJ(3hn5LeZjz{DV|XSw3*CYqc~ojde|W5W zj^tXbm8v)@rcUvrG0eNp^pgQra*x6))=g~lo@C8Ta-}DrHsC6ONmus|HUV74;aFc( zgC14S4aEM8dh=%LAHVcc|Ao*EoPBx!pY^p5Ot|)`GWdk&zX!&HVC;19E;+3Znv#5m zo5!dTg6d|QiMtPzaIIUhOE3}mS_bRDA-m8L=Efo4owP$cjmpoGRqOF(*xzzHanr*a zT>pL^-S^a|g6^JRLvb%^|HO=F%cYT7A4b940-D%$8(OzKx(ijm+cdhwItb$~W-MUN%tNhHPAa37LnjT&cDwoz;a)=-ZI z49W$bd%$Bangoc)fIbISx#?^Qp4-4V1Gq~*cJ`kQfZG%H zCG7wQiq4wM3)?`^Q{WY-*FYqZ>3J|dkzY+*is(Yv%_@H|G*nJCto$=}f=d>*NN zfH>~RM4E0;wr<~(!yoMc;=DJEj8%F1B-k){+5#Hm84CKJfHl5`LA0Lh3 zn_Zg$zA?R7ly>8T&W^bPd1RS#!Zd4*me7_yHde>`cM5AkQ*O7C1LUN_8rd0V`^M~n z*;bjAYJDOyr-CtABnM&07bxU!xOmLP4aVQQ!jZw2%7&Z|4Su38#-jZIZgq5gvZy{8 z&)u#kiqEN%&>CekWxi6jVeIB6fm07NgSz@sId4{9Dz{8@mNTE?a3}?>Nrv+&H@M4!asCcVFXsvgH@b-s~mV>jJ+>%FFaJCSpasQV@x)< zk#bG;hwxM(hA%j8K6hAstYf(%@FMTIV!}3SYaTru!9#YXzH|~7oR7_B z+pbYiEfQ|c=n%>6uVuUv@L>FluFS~Bby?p*=G@D~;)?J3WYINAPEIei3Fm>xt#F_= zqHR^BNx&hJf;&*Mm;i$DkoFHU%Z@n8o*>$olh9zDJ65VRYepp2QR+{xne)_o+;n8k zb$#Yc&-Rz7J<{XQ;YYgVp9%pdF_rCrkW!het-OS}x)RClD2N z<73vtY6dwmTWgQ#`N26AtRRxlU)XQ>qAPP3$fF=WIPVdWiQnNa@DS~b%5ay22OL~K zy#IrL@%UHYxzql3)4xYat;iO*-dK^ig z!yG_%D&SxL>VMpQ?@*tQ0Jaqa@df~U_n>Ew_XBRnY39=0PkFmX$at~!%P&TLVU3-a zrN6ps@5y>TIydtHz@ieVDS-X%?w#{p{)IQMFT|Y&MNDeKB@Ps8kN))^{suE!$$s{g zbzsi7Q5UGRfRmpLfAqueUwwV^t)IDd{R;d#$?Obkt;dJ^ROanUNhMXh?+iqnn+yB_ zK|$W}ggY|Yji(bIlArdI+pn=_BhS!(BJ{ki+#@90a_phSm4Gy42YqbR&eP(@jWMf@ zcUODT(w8OS_Psz;GlNaGLiUqvZv+5OR4y##`g5e{3GML>>)Cg;?UVAL)BEVrqdP~f zr_&m@T_;y0S0i(4phjj@U2-#-{)y5HJdxMjs_A>@pxi{)y{NT;ioqc5iEFm++_^n3 zp-o;VspDP>9i9_4qt}dJ!DYr$Y52)xx}bO3Zn=GFb!Bd?pZ4c(W#5V&I`^aKy65;T zd$Jj@K57e$PzrEigKl?DX5$Qp_dQy^nMQm{U=+$n-J?~vBNgDWy{j`!K{UUP27$BH zDTrX^vfWImtvh*%SDEkp9c%%7biY~JA4I5}~6zA{>yLa=+fOEu;UpK!cd zQ6s;v47Cz|3N(4g)>Yd@g7bd1;3RNd6}#Pb-5`q)@0Q1QG#700rDJ%6x>{pbnho@a zFLa%^z(E^`#?#I{E%lDT{`ohCr4QQ=bqnuV{mG}pxF2%+q5zn{waU?}YpvvYq7#)T zfVpl#UYogw+NK+9l}=@#GbVUNVr)Yp*0%8sTpIml!fgx(~5J*D+ zHG%Ma04n~sd}V^}i<2oA5^OOO3iEJzUo+JmfaN(NZ-hK!aAN`#20$I3Q{k}^=h`%3 z0Q%EG;SwN3EHw|BQFa^vHiu&E^NOq6chpTB-f#o~DNz;}OsH{+IP=7gz1<-w$&?GY zH430D;G%$W!hyKU;df9p)PxJcQHB<#GzbIO>49neQM4SIO&!M{A1zF&qD~epXMWB~ zx1~xlOvJvHOYBU6|AJnRxx|OE|spu zBHcMMGDKH*27>=U3?##6V-0L2>c~|Eo951W0qUmCqjNE$P@h0e#KDcSi%e{u>ub)k zF;k%oPqm=qg@))DfZ9RZ0mn-}>bLH>eCKZIxe#%r!#n();DsQK_H-lca!lA2Hn5MD zw)V@H4R--`nl3q=l`dj`_|&KeJV^*Tr@nu?m!4m=$W(TY^{DbBEm?ae#d z7QE38kY`I%$3sC0<(tcp=$`B&+bKQL4#zjSOXT6k5WqU&;WwVTFbE$^-Se?Y&4w>r zh_n?i>KhNcO=q&}x`W%7f*Y0FzJklcu|?}$Cu%wGjK$*c)OOgzY}C2)qr*N{gW%CL zzBIMJue$B7--)4|!#14o_4Dw(&%N29QG4`$^FoQbzTgu-W<;B={v71<`1yJu<=kgM*g}VWDzb|0X-LvtnXwWvLp6m{ zB1%r+LJ3*(>I;*WG1&p&ET8p2*PbF}C-dUT3b$=Id}t2@T1Y$UN}f{$W8$!W%@dKI z!q>!0Bk9$QOs7Vj`n>Ex*XDT-yj{r>BFLUZL1QdHA!y~~I3(+hs^m=AQ=}K+EACUz zv|X&5O3Pu-w(c9a0v+kwO~~E&R0f=~Zk&gy7b`+az1YWE!}+yyvyZj>C|*o{ej_Y$u9ahel~D4-7Eyf8(ZWa#Fj=Z(?2_flKtkwllHl9p}C7s z*Tefn(?9Mav|ZY^XP0B$`EM2um=}eZnQ3`y*at-O@$kG=C|lTrKRp3DUdyD)U^>U2WJ%B8|2y+vU0u0!Wyv6$ zrc96`dGGh%KI`p&bn7(+oYU)$RD(By(T_jq@+bHv0O<2OB<)+)br9>0j0wm% zZBKtRYcHv~0-3`?uN#&4PmgQd;HmhhgJ$rO0>DAfM6fVRk)jPW#}94{>k2lW5!5G!zVNeR+c!fi>SR*oR0XN z^A{SV_Q)~YeEc_m{MT}y`!_=Xe*36#_u>DW*jQm%hYNs*7C0Ng+kbUb|DJIp>d&nE zJBmCM`lpU*WhQ>_clVREcH%Q`Z3t|WiG)oEq{QOjZ~xxE{Pn}{9SkO)4gNH_x_Z)| zvO`a;HRzW6XK~|rFYTYp%-F<8cG|A_ch5C%HBLz}5O+V}t+>s;twGWAus@bDcSBaR z+}i4{r_r5q$1kDld0}%`jHRTxa=>I!XSM&=HT@Hf{yGIF@nh%TZysPqxpfazF_PqEa@QczH(&5&6KV>6d4 z3II@b0Dwa(06|Inq9EZQwPue4GZLK_`oq44!u1fdBROIC55eA<6zkmwKg$BR=RU!?3L$lItnrP}C)#Y5ZwExoQm5Lsda~G6(`qQGfo9D5 zBswO~o2{e(AnlNMY*TQy<^5#ge5bKd-3=%`$7Q?Ibq9wnp7l4T4~!S}9i+ij+$)f> zKY!GF+%;d~Jk3OW(3=+x^1*vSVxnAGU=$*D+Hdx5RM(V1Zr?{^pe=mXXKK{K2!Sy{x(`KDp@*5{^lBl0Abz0p=u~%Xb`3 z`U<8M`Zob(&x)NXHmrO-halnbQXrEQB-R`fcC=-bTE*yuH9SWe4Tvl z)R~+(yPqF!UoDs@HR9=9OT{^AoA#88TO78LW<7fRbjjnXQPm~@L>U=Ln41@ki zw8Ns9YZ<+BP7AYMd~bkbIe+ia^THU-QJU;FtJl=$kMxluOe>HAV80kb7t$t<;0To% zDYt#tC5v)gyEOCImpl=CZb#?J2Ry82D!W%Nvhi2~o1GTjK|Q8BQ5C=Sl+7(cQ@iSU ztr0q(*dOz6hx;+-U?yqzrCULr zI5osnrRtklPO?KN!}QRZ=_aDz*qCG5j@c_9e8dE_VUye94I17cd@&t4GZ(U-Kza9=>b(5?hA1n@Gp&_f!g zs|r7S7G1l=r1PI$c;tn@@(Q+3%}IY_;pI3p3tYSQ_$d6ul=~}j`e`8Imv|pOaewgM zv(w>U9X}FoZR{+kOdSZY>xazpoI-*+N_m}NaoE#=PjYH0)vD!CGBTS1!gu8j5Pws; z{9(9TDFYBoMk1^J(vjq5;S{$xS9pdML=Qxov^}@({^E;kODkWs5%SXCSbF{SFL;xk zfBt*_zyJQ1zgrRRl_!I|^19zzPzKIMQjJ|pMw6jv6yn0rUhC;ApS}L`U;Mg@E%mzH z&g1)U-T3T{MOP9969YuHCwKPt``sUX=}QE^bjbit1~wZtAGy=}_?Lgr!NxBC=YPIT z$6&P6MWzuj+g9Yi_xHc@*Z=*$uUDvcd;2%azxy{!w6o1%`p6=k$`x6h;m-qGi@=wU(JLu2;2({??J^oC{#fFXz9gICfrG+6vCWiS%%DNot=#}6z zr^kg)T34;-_W*Cs2z~hU;<ON$8OZKv>_pWJsC*(5-qYf^R)PY zE@G2-Lh6h|vsMj+XBbY8H&c#?7K&bHQ=>%sbG_$6?9*B04-P!*jb8sjQxUYM!5rnR zI*~E#!uY**>TEVwZvRk@N%O}f(xxw2SC_2_fq%yDTB0u9eb{PvjQ>JNwC9li)s37r zI^=(V0&K{K#UkAogIpX`EITvsQiJKw?y-6{>50`Nqg z4ZtLD*#==R8KC`a_y~0ZFn9`;i|=a7rWuWnQ#GhmD0m;-Sxe{!92S^%#A41g8Vh|nFwug-P`fSR%A@({FRQWdmm z;NbyKM(s^$2-@V#PTeC@Chi0VKi31016~fsEO>v<77)c_sn$JS+YTPb%y@tp*KY^X zEp@-X!H`bUcifk7 zJqm{g(4u;wW9alkSgCP~?f_;$%Vnp7QEx}Z;far6p53mdg5I{b`IhLQ>yJm~emGP% zNk`-HY@B+CY%Fh_;D~~j5F?g#(cWT>ZaG{D=Oq@P3sfXaO!{Q8sSTa2;dJZl@Vw${ zS`p{Ec{UTM9Ji31WE@6U)ZN62)QW4J5&GinBIP``>$7{TLi0(6Y*#JcTDOHd08$*j z+QfVtHVK)oANg6x9KOQf4%`fPx-dd}j$`D(zR`v9=i|cUcJOVq5=TkRm+m6tNp02d zYlpeb*af~3n)zYw8F5sd;oD~C_GA)tZhzP0=?2^LI9MC zlJy=-2cck;v(Khmxx#vSQ1f=Q#f#yW zXF4uu0ys%HIvz;?bETf&yv4P-`zZGx}vEis}pLc|T`_pPKV40Uwn45Od zmj%f#Q|BzlPRKNTo~{ zI1@b1v@NTK#hAFMAu1&za>XXPVJ_;5jNQmQ0K5Eq1H8I*9h+K~)D)JV2Agt@ z7TpQ2fa1onsK&Z>p?f5CAzv4hk|gvgRPL%RZ+7da!SWZ^{_d~+moNU;UtzjK(Zs-1 zCi}n#upuZwWY0r^@tm((am3ERBCvBVTAqS}TT^jn1Ovd1YNqSJqhif{c<2t`fZ%`# zx&%l2vAFEG58i*sO3PN;$qV-mv01KcR04nhFij&}c8a5*r zArA&Hf7Aw<8SVE`U~Yta=0F6m(q;@GQm6t>TPz~_B>5cnNZsxyT^_)W8%R)C6dJ@yc{E8`7xA7l#eb>zHGS zP`G19H17?cdq(Cx^;uw(B^W`{cvqlNkILw?j^IuROk^RS0KrNvQBQq?kI z$AN%b!x1-oz7HSb20IPL#`Di>IG!rVe7GzW4L{d6#C1JpxyMnl=v!C>cp)qb6Wa~7 z8%|~jUx!U)nKbV*N3s_-rh%4!mY)v<)5~)mfEtjr@;7JNf__Pr55Q=F*}Pd)Kj*;s zV3K^CceplLpOFuX6a00biH5#1J;r+F_^Bg^koQz`ArJJ!jxUPt%>wG}-At&=$@1vu0d&E8DGdDYvSSec2Z+T`~{@5f3I{dDIN6=(ZA zO{YD#Q5o4&-4ByKd9Z0F$P;BFSdAxQ2ROUZa>1VS!Wobh%Ncn}`KO^{#vA8L_7i78 zjJ@s17Kf)H%I~VtYgjW00HI#4PkIE~&r>d&=~bP<6r!n@=4E9Vb4FZ^QMNEe7$_~)dI^@BDbfN%=0a72`k^M*OPJ1JkP4!3PhX2M z5eYrBhI6mRhJ8&OMUm*IQr-et7)h{Bs-p`_Fo8ih)0&F-!c264g>v*$)&$eAfy4%Cl*&U}V&&RwK`?7V zTFrnEp9hAI3FF)C(YUeqFYX0X#R0*Q3E~Gb@{n4EpCr+qhyaX06dMoj0-xdFt*~< zFGT5AJ^qNHWR?NR|Jl0_>ZRWu`%^=hkAFS7r9;p6p7P`2r9o*fj)z4@)a8_14PdYMe#TwZ+PEwPvLL-Rh0d+$dh%!lkYqjtLIUdvv(&B0r6 z^|;@)noMgh`%|ms34PM>>ZUT9Kxe^@;3yf6g+3g0C0za$Gptg*;i70=r5MWso^#^) z3Wsa5`^+|b2JUT+;+7S0;?o0cqy&PM!Xh<#tlZ)+JJaC>GLkvnE*}ednuI?(rSqsTpqhw5pAI;q{>qvD_2KX_!e> zwfwv$1uKrdUG_|!;Q^+4aiO>S^abukbJus?&2tMhk~{_8{vg_ljpw7I z+2e#sQm!OqX^3z1kr?02LjjF`PBlQwT&a%|DBL<3##lvRx~@ctH$Xb>DM-Tr%hENR zeGq8ilTXxTbz7vcXFaAg>(x&m)%l6^@jDNnfB5XQY*0^nVWOu_(nK~A(dSy4Dwf^j zPlft+v6yr6bLXE%y>(%*DwqSU!H5SuF=Ip)UZ1RJt;i%Tq$?N>8tM||SlegD9P69) z^s_mr;-E~o%Q9&4Fd#S*aPpoj6KbFL`pQ-8a$^Mpw4H(8C#iv^8Ywwhb)us&XlX+mYJhHK5QP|xaw$VVCN7SexwHs0 z2dp=IEq*T9OX(1&gzc+ikzr0kb0UBH#@jB&B0acxg{^9tWGc2NIW3Zt%e2m*9<CrqtKf2hQ{Rf_wHmC`<>`h# z!4l(#`x=v4t$wz_R%|@z)9Mdvo@>kc-Dv(f4PmvO*Co?MVVV9QxNfJQ)TMMIi5VMJ zH+Gd$0P8$Ky9Uhp)baK;OFMN+3QZ3VrY}6-r{g@^UP|EOKTZ2AATDN)I!*6H>qA1P z9f0aM0XUDD!by)HTS~0oqlq#*tY2)@u6648`ZJ9>_e8apX47;$jrwFn33*S7tyZ`l zucbks7eIe$#dq6%v^~kGmrgY&DG5&}H0OO#SfQAVKPVeT0uV6zM|*YZ98x@o2bg={ zA5+qX?VSxacM_TUwEW%-3+}YIl8iXdYm*VD)hEy*8)fyr zb)v<^0|cOS(@Wwj3$xmogh@TA8@T+(|XYVYCUIP4ZC85ZG% znI1*@_QZ)SR@(FHDgx2xZ>(HNrf5G=l}%@F)oggrTEMyae5L#%nWu=o&^T~x#*$}R z37lS|7?0k@iqnW&>OO4}p_aqQK$Wg{u62g@m$v=qEuKgA^;F6A9K7IKs%FF)*Np~M z?>H2Sxj;pl3eDE(FvW}FwTOwDW#?q{jFlO?i5`H|DF8+T&VcXJkw9y}I(wCvmkg~L zQ}FS5;amS>RBZO&oswkQGwd8GDOt$}sgP&HpN@nX*Kve;$DFsl7-Ge^is)uFSs3bj zGYhLMVU~WH8x6cRalfXF*oW0e*5jS;ZJ)i)5_lE^= z?iZ7YT43yP`J-z$gq5Y>^vQRBz4xQTsQTdHkJ@DceD8h8(?{ylJN#2bJzTK{$zI^`SFjzvycmC||9`xj4?1J$omuEY3OQFbcqW|&E(|fyr4(%Y0ga@2m4>5l9 zi`q|ZW^Y`)m==K|6)*Jf?v*P~|8k-C?CCPT`>cFs8r=+^R$^8&1vUNc1P7uk!<+Ff zq>3(xS_gFQwZ_pAmCj>xu?!eqL(hVs4)o$Z-?QpJSl|K+|M1Fs#sJK=sD68NHu&hT zOAJ)<`_C7UWOUJEXtgbpa`s?2ug-5gMmW)j{Eh$fZ~oKk*CYV`{4Wo1f9KZ41nSda zeH8Zf8iT`kcOG8(LTod>CIi?pl;!;ULu~Y@T)5@P{MygGabe-&GKu8@F2QmTfN%cV ze>(qejq!wTuGn$d1Kao01GP?<$YXJrditbv_}h;M&l#72tCQ_=h(0(bo^|SvX92xd zGd+r;a38H)d-?C8%XQb-<}3PvNwKlT-67bLg}p zUC@GtXGPja>Md<;DWy}q+j(x+T$mtc7`Ncd`FIhaUjE6cZ;l(8Q}eg1X!vk?jSt|mBZ@d3 zo|;x~v3;g^;d)iAkld4zFI2e7k_1_pOh4|lgH!t|06+X-AIue4@?)!o8t&c#riLYP zPVol%bKqV$mYF!#%_O925Jjp^TyOZ?$Ya-0Yj3TMkD-+-v0W_0O5CODoXjlu^3#tyAAZ!Uz>`;y z?Cpz24#}-yyhbY)jKx+=Px8a=lI2=^emq*=bhy=#I|^_|x_K~%ILwBxja@nBiui2$ zCfk|tzTlxfcbbuopuW=fi~Xd?=ONTvxHVq<*sm8 z2*=~L9~^U)%Alj6TOJS!f-(U(CNMmLY;F{Z_B{~fXFyT*&0wq^JXWU|m;$g00wm#G zMlhGnlB!79ZcUkCpj8ez5kH)pD;7urZ9VjM_I@;> z@sIl*)}c(GK0Eh9(9v_Y$KI-CX(_@rUzpV4@Ofp~+(5BBU+w7=!l_wt1qzFUbRXHL zXa7{__!+Hyu8ji49%SVC7hwYT&d>0t9p7v!9C;B%k3t;DHr?}{1p`4OCYNlrh+ux! zsLYp6CLgzZ1u92;Lnyx7SN|ww@UvNf+0vEBup5m|z)Sf7K_b9U6(19GO;3tg8^|;6 z1x5mDavBp{UP@z;www&AB+^wq0_8kz?O2+Vk5PomgO$nL+C6@K$Gv{}I61XmhUADb z3Idsr}d)-DGdhCUc;R?O{^1)$ER+}^w znp8~8%CZ4k)m{9WBicRVPp8X`B7^r_ZC-k?+}sn0-rz(f^;2%zJ8pU+7PSi=-8r zkZZJR+HFG%Cd^@=hd`5n3(BJ~5HoJ5oF6JM5 z7uWjV1fJ<|S8Rj*B#f7per^oUa)RXUEUG*~#_VuV?oSfL%t_2tmgKtMA9(U)H=Q5B zljau1{K&*1Mm=ceDK74?_QR1*EdrEgU^YqCt375UqC$gadJh?ramtimJ*i|*l$!Ij zWRs(qJYT8}qKq$S$}OZYk2^@MVv&x3=QO}*eK#ERjE1lT&t%heo7J4Cd-5=&D0ovG zZA{t*n)i(HdfCyWhV(S&J-C*)mdrQgRA*2zefXKva|1_a7~V~}ki>I))}andEVnb9 z;FdfNe5#DHsDqz;`&a+>Z+-ODfAjs{YR-JvQ`G12A(}X^GDQG_Hu>Vb+*#<1{WlsK zefn^~jYWozAgCW>7a}&c(k9zbpx$x?XJG$^CVx5|jXsi8fK6C)K+mr{9mr|_<-nJ3 z-TcWP|I*L>)K}LD&Kihk_x_7!BQ1lmkNw2KQK320SJH0~{??PHwX+^%!G#-F&4K2M zXAdX6@*_Q{s9G+$9Eka1DM&3(tuz`NzWLU5CYQSq(YR+^=D(7^2@^lfFK~Fv>*~V*X_@7H;HiCEszod7 zq9~=@Y3-9*_3V@C`+Gt+!>}V0A&@t|_A`QXinfd#aafU^Po)4ahJ08mHF(P{*mCAp9<)_vYs5JuG zciw;goj?EQPpj=XF`*G+@n~(VP3(qEKjGbG$lFt!31#Rie)ffb=6>m`vNrkSZ|<^b zcJ<8l!f|*I`k2~}GmKqcr zvudXaPw08I0;PdZnBVft?3w6}Ktn0m9T0dl&nu*XFpw0`&sF|JL(uNwFnpa&`U>g1 ztxQXhde2)Wn1opG4@2fCzSh~8jJj3QPZeZ9Agwj9II3aN$)r!@GI2Fe2KRu&3JWUk z7vPe_;yIuqg{x;B67=KO##0S0-qxYIEpwNjcKllHg=%4a7;+(TO6)iKND2=|+-=%- zoDjU>D4ZG1itfa+GEVp-r%jg;(R;vIe(|$D#pNLbm1c%@My`)1%kWnm{ee1Rs&nhh zL;PG^Z8DP-&&sfwf3`sTj_@v`CL{xAlWZ|F4fSE86A;aW96HQ8-ALz=9Ja`YyBdi; zJ!MAfiNl+)NcYH|b>iwcJ@a~;b|nFmquFWW{_%sp>w6Jrnaum~m7ITRDexSA`GyPI zjD`JtzSoa>Y+vGQFU+Sq7haFH1m2@d)nw!dc2iYVy1sa^v^$QL@R(+-3@z|c0iNZu1*XBcFWIp z-hoBW`65B|f-pU(zI5;m%034B?n>xp{onknpZFQ4YrqDhAz}JR6oA^+)AIhzc|VEU z^MVKxXe&Gcgqu$rJ{oQ!p}rsoR{%1AhM)x4l*!ugyaRAJK}JRDEC`%&5uif@*hNss zy%0bf#Uvq410Db%Ec}T=4}lGs{_zMq;u@;rn1Ka{iA_Ma2^^tGCr98DAWndUJ7o-z zBntuHn1Mzt0ti5AW#9n9#XdhSS`AwnIeW)I6oGM6)Deey7C6T`DLV?>$gcl_< z0Qi9hz&_KNrp7W3OLI|WicS$;X;nBN-<0fe8kSBWdwwt{rzEsa1Y`?2H4la{V=Q)- zX7dBb>ePMq25AYeTmU80obO5f{l|4^h4T~r%RWVw)IDVtaP)u@iSk*(ImS`f>e4Zv zKwRjwxj5`i!~|k}$C<|Ro~@sdmyk8*)Gos4G$xqfH9Y2rdjtA1n)A&1E3uR|hR?8* zvFuHr&uSps#VxPb#3@L*ov6>6=h$WULY+vOyHyWw)v5bav^G$1@)=!H5s|w)lO#`` zI@La5hFA0-0q)sDCkl6Q92!pwv&{Y$RUs58|T`{sWGP~Z3`K{SNaCh8B3VN}1 zCEOz?)L2rY9-baU7-FzvzXi`9b?EU-J23Tf3_cHchO4HJctX(+L(9xTpyYYerQ80^=52M?{u(tgMngvr^2L1F})@pY%rao}n_jn6)#HJ$xxGZXCp>eVBp7?soS-|w2#<(-F!#n z)wa8TxKHk&4v2i3=GA*srT4kg9ix&SCNJ)_I@`QoozAoR9xFgKuSS8< zcok%>@%zX<@Uq`NIxDnB*HLVj06Wub`QJ8dVNO4^!W2!n@dc%+Eg$Pw- zumha3$jOvYHQGluqZ3#EFjCE#6kJYMd4J{h(-LWkp00jmVZfBmGeWG=GAo{%C zuxIsT0fUE(kAR)G+MYnGqVQr0vFm!V4;k@Oe|Tif9r><|XvgSBWHanKTOi0#wlU*0 z5xsM<8vtE}Gku|m&&gCauy9E+J>@tWS4Ry!`by&3)e!r_v1=lJTY+Ubhxei6GiA&Y z;9)A+n3_m2;cyiu5UarB5FdcLfvVx+202+j+2w8c0Pkz}%bz|WKdonV_k6~=kjPwQ z!mdk^#0#0Qi}U%yDjryRZGB-JuPgWb#M&cDh1_{G%} z^LZDW@uQ+Aly7}^uyP7Zi75^CJqSs7btEus)=Q{Cp7}!?iMjcvbl`5f86g;RNZ|rE z5zwWKmqVYwfKfm#GzPM#eiP>EpJ;5EgbVB{+=y?qB^#s)e?0%o~}D zOXJ+E0UXCyQXcvjZoIzzgYTJneeI>M{^T-*MKkaiI_UjX>2^_(VPbAR3%VZn0z8{g8qtaZ!ckm$TxrgvrIsmm1=*jI=>>=6 zl*nP6$wBUh>~7BDuMTj};@8mEP(S7a$F~Hm8)obXRs}2d>*V4P>ok0$DoEUMk{!x9 z9=aTxr)mC0q$r4yID*H-k@Ap{WUo!>wSJr#7B2hr@u&m4^`E_dmYi;_Odxb@fB5k? z@0Yk(m;EF+7w&rIgKqE8wW6i=$&uqz?_)ko>2M`d4r8H?OPgEa8h=rW*GrXUM)HU& zT{x=8N`@DT1tdjsFx={=jh|XF$16dvG2skr^Bf}UUZxCeXCRfAJV$P?>Qo=f|NVKV z7w;!t_qh}kTO+76`T(94bX_RNo+mn;VYv|<48A`9w>_6P+!>)w;picPPeQt$(qIsX zEYAwgTqALwdFtRUnW`IZxn_^SqB!P!0gT4Y)qJYrYq5mckt40O`Fa-br+`|I^!CfjL0(PbB zZ3q)t6w6J|z=SC_IKBYDN+51HPpy$Wld8`+Y)qR3<(>^4>;dmv_z-3%i}oyE=@;{6 zXoDPuCpj+^D5fqqP6aTfw}emO#GaI!NMN4yO+UV-<-${wcO0NKq(=u<9C~!mN6Sa1 z<{oKYt4(I<)xA1Hq{PRFd0mbu0pH=2`Ybq}4gwB=;;N+EoYgs#2CzQoys+C1^sFM*7oO{LXIW~MPe+68}Ot}~w;q~*{ZhU@LqQ}U>|5ckvjE1`*xJqk{yEr2%s)-dKz9VH`} z_jdN=>J>m|EXH!JDTI*^)5CTC#D%!yGjnwmlN*nng!?I~liagJ?Jmced$?y|1o_3N z6XA_6Q}4UUIQ(o47#k;U6Jif3}9Xk=>AA-9!4x2^OHMv+y%#r`Vm} zYhGTs!8?_yN0430YD7~9vG_6Rk!61u=uYbDMR+YI2>yX@Py~^qD4iQt)bn{gyzAN7 z&sAA=7Iph;-HIH3p{sj$QdG0CZwZU#ps ze%WnhbY#_SUT#;LDDH1C+lbMfL9uO#*W6d@FVmpSur}3MnQnyXVEc0EIp%wDo$3t8 zqq-4qyZYX4u6eGn^s$*SGXs@J;L2Zv(~pB5jYaCx+8wl7Z5+IV#@CPV|RYW(0-WJ`MNWbr5+|+5!;*C6G3|{YbbvAG+`letPM(|4d&C zuRC22q`g{E^OP;dWdxf$YlmP^q~@F*$<9a-_WELId!jMXPXd!u`rxBKzC+G{@M)*& zN{7aa7nby^#Kp^tPTM)ZJdF^cIgO)D>={Cgj|8nslnb9SwPumMXZ>ViokD^ldSpbe zAvZ5L(a2ymV^T-)%9V}lPNfqhu>sRNaHGrKh=?=i3%GCNy|O{A+~f}i8{cxg*jm1Z^-lpam9W2`R6et@!M&!l007rJN8OR@GhH=` zgNR6O#~of@hD)q|7T-kYQ#XjnZQl4_&T5cG3g+Ml8@l$XPY+uM!x*a7PF+mn!5vUS z#0P_poymHn>m%#M^hUQxvUYKA+|PdTGZ?$Ui5A@v%pUR^ zKllT0car<5C2xu;4j-Mk#TGI-JH7iezx#hpPs=2i$-B73S-RKO>dahk=UHmkIGVpB zX0-o5LHGeLTYeY>n*Wn?oys{^-JI{sv0o>t)vaJzk|VN>0~mwNFkp{?onePzy$tZf z;32KE!A7=aWwpAcPA}*C-p#q{R?hL%$#K7Lw(@D)lQ;2z$#_E%xZyt(d)N9&KFwb0 z8=jQ>OvBpZmI7t8CFAw(i_#-*5t0IB z74a0){?mM`3M+ZLgjcdwJtb0f;ILI|Z)jPfB84w+Ypj5sMH*Ek280Id>hzIRWt^-Jt6jf@NwPA8&$@JfLywhyo}k6V4NZ}a#>{`Pw3U75J2 zop6f!RRhqlTa5vr!yE{TK^F$jMxsvw9vOHW z(q)L~_Y{4E5O^1Ji=ah83}|^H0HUzx!DWH(0U+U67i79U|5%{BO~8nzm76rRj@E^g zURjBvEvp3p=#j&o?#Bbn830WTxLGV(Q>WU4qPH^V1)2i(Nbv=%A(JK=j*BWtdxnd^ z8q@&UoHwH+$mk-#0WE>fXldYUyMPn(A^IT3f|S9y9H0AS*Hl4Ja8vv{q!UvefNot}9q)5nZ-}SiKt`9PBWf4eg>OP zqaw-N(DD|Lsk+3EZ%2&S=NIPD*G}{iBy3wTvPh1#jDzJu&?{zz7(`p#F*b6>tlXXJ zwSzLL`kW}$s3JWl_E@`Pk%(@hQXr)W4@$ z3Xn3LlFxF)tPEMpAtTCrnO)Nz$VQKcNngMCIIYnbS3>IDVnyiN0oi};xyM2ew0kOR z1&gJVfJrh=Vs%xSk%8Mh$~=U8Umdy5R%n8oL`&BxBE0r|*4rwWd2!|vjf75InPXlg z+J#w9b1B0-Zmi=_e>?D^l&%g~l<^GVc{$KR=Z7ac89+E%fr=4Wbv_;j5J7 zRA8ni<7709bxyJ{)El16^_qO^o|-u=#5_c;^y=miY(=_oc3N=sz}ROXMF*I~9xpZB zv^(W#ujUA?oNYzAe3?$r2pAu7-Wfk83&HxDGF5{2$tJm@(}Ur&Au82k>PF;~yZ|?q zj^GoD(RfnI%$>4@@Dnb*ofuXc&gkwam~Riw?E3hJpc;=qpo!OxKJ~Tc-6o4*~ zJ)dx!aOr>no)#zImSAi35b1RdZ%sU1L|P_upE;Mj{Hd?-<4BwM)2Y zwHoQZX!llHEvHPnjcECW52mJ!4li2F($~nBpPd9jU52 z0G+9^O7v$2e(`m`f4PHI20O6p)YsZiPXE;KSuc!Eo7Qvm0{i#oK0EvK=l=B{{M^sy z|Nc9lUh&#@{x$puA37t+?nc#;)B&!UR~7g1L9swQ609@g7OHL6aXrxg<+*o0{cqnm zcwFmuK1t2LUJIt0i}n!!Br1G!``=3%apUK&1KI4ZFRrhmfstvZ z=<{)rO)Ag+(Qp3d;XnKT6+(9S%8f6@u8y(}%U!B&ZXbX=IxOc-W~ZdmF&1S6OUn!00glnr z&5bj!fKG%Rf9`OYUT_9s3d5>#rhisTgj@LCep=AE zy-V@jg(R47&V;-OJA~|_giO$sHlPU?-E?9XLCPeIH(O&Gi2DZyL6IJowLNBG~UqgZj(~PEO-eC(% zGxcZc=3W3Gw}XdNEP5Iy$3toqjukuX!pW`Bb-N#fvzIrsTCXm3r4iUsayAQ2+sc5f z(ZPXK2**ij(Rnra6iRH91UJ!tA$yj@95q6E9j_VS(#9B!VRALtix@b|Pq-tJs(awa zWEjDENAH!tO&tHW2=Ub%eb+yBKkc({G?@twQszC00clq0r}j-+ybDf$teafd#vHZ-D3bO-F5%~^}v`iSOC!K0suweclyl04Cs4- zJ|Jjd3MIfsEE)Hcj2Y+$pcGU9G#Umf7=cg<2Xa}~I|m;9q3kdnf~ip$fI=Do0}3p> z9Ihh#X$b1D03#1ajt2mQj{_Y8=0L)FtYL!@#q=*uIp`M30A~`kfEy2ZPp8m!qsyvO zV~KOZa*pQ#iccy?06vXzr!kz}*SV+6T%{|ru`;dUx2rokL+s0PJIBxl6qCNE)M8u66N!n%gVClRj+3ITqHXS>%Wt5p zB5H;ZGyJC%8{nKGPOCDKJoX@12%SfJwQ~%@?K;C44+~Du?{+z@=6pnC)krcqKA*ab zd9p#!-FohGo!edEub)S|4`(c7nwl}|g+h-SDh@|c^p{Qo_u_repwzCYpO~sP_PTvr z{Uho*RPMJgf=p;|giVHaq7FpD^5LRexL?@z`c=-fqh4cG`o-#LaB3OEw6Z<^4 zEKCMdgC@$%sBOgYYy7R4BoQ4x?HZ1vCmRq%R^9#S;?asLBI6&GS#lsuJ77*;;$|X{ z&-?Dvee_XZa}19MMZimIK%PilbtgC`+FUW{6$yWqNKU;u4VVmG8$dS1RdumFPV4dS zRLZ*_)QjoLNmH&KNRe|RIYZVR3#m__?Jf)ZCQ~s8b873ce&4H92k`nBDMsF<$M?Uz{_d(c;tNt~=o3L@I1z)b!+&b3IgVEPFtj<2fQ9J$F^x%73>( zH9KrEX&K_>`1i|n`S$ldK;O)zf5Lb3@6QJL>A8DE z%$JNHk4js6Am+NhhFm}JpR^Auy@92lJ;(Q@T2F4;E2weSS%@As6W`Lp<*u zcHKddOX=By-t;GxQVdO=B#aozT|vlXxL!6sGjl#I3#TLfuKvKfx|#`n=FHxUU2&H@ z$;?mA<+J`L@0al3xu;KHFj?h^rF3e3=-oSME6f%M<(PG>KwBQWdUxg0|N1kpzIo+^ zl*gL0(&CLZa6UVA9(dNl;P~r%rN*bjOIMQ4Pfa(Z#3Lz)3Z?hT)o-#_*Wbw9@FjYE z8WjqlH)O&8_rGl0fZ$i5`L!UoH2+xS`5EqP^AAc-f7dyeTf=W$9GWS=lygnU~EExdKK^D;DcJ+qCX9m-Hb_mo=FMhv4>UPYmW zUbt0{+SM3eJ}0RCR@H`2pvT1Vi{32;gqlZBU3C|udmYOeAxlF8!`>X2itM4c!bXU2 zQI7r;Ylh1rk@787DRIfy+)F@x2dO=iWBguH(xrq<=>@kKcBfbg6BQOzl1_~Eb>j%a zn+$#Ou!F_4+^WuUsxEl+v#DCnk%YjC(|y|#7H-BXSMqa_!s=%)MJ`>uv-59`=3l<& zmd;wAJ3Z*xr>Obe=etLnm2+-(Ly03D!0(Mqf{QahdEsxabVIh-2mx?-BvyTfA3IXx z1$hCDAk{&!&m4K8!9Ym2B(D02TNT56%d(h2?-;5-Q->3`C(!9xEmzmp+-({Q1YCWO z7unbVO1&vJqEa(hD+QOsN^?(QJ6ZN!EOIiwUwHP4JCce#-k z3IiaG6p%~6rW^#mpb3a>!p(>S0Ekho?Ex)?K|R3Z015z~1QVi;HNk>#5CmE53=)Sp z=22_lWN}8K#Ynkb^bv?lj`Tr_f!^#&b^n{rb_K}8|0J2s76wE{=P_9h@{{i*$2rfRt~=4k$r2Q?ASaVMYw)v zb?!SL{(Z;KoSuf?2L<>lKGqqU(*x>kY_J}y*cXDA;qEwN`Jsqe+~DZs{aJ84 z>cdeaIL1fD1o-S;88j!3Yj2Dsj1#Z3@PnqCGaKt_)1|r~;&osvN9&G_CuCQ2nVYx9jEBcxC?xL^t4l)v8QO< zRacdcm+yer&XOk{ncE+GM>e+)vL4rx)~hEG$ug^$pOx0wPz{@xpe4!Xl0(Rin&t#a zXqd_qKIF!TkDEXjDwyl(Fg^zfHh1|>`Lr!x&IJmDy=$KAclu1ThbN67WCW;s=%jl( zW>L@qN}yHuv#0tE=W~O)Y(JW#+Ks4=7WiijuoQnj3y2YR#6o`0ddozN1HlDLAbde5Al@<(!!MjH5u5mLOPbS?rwxDww!H;99>o4Lh-jqC^^lryND zak)EN-bsYhq&u6V%H-ZL4_2XE?7*Ew&UZ}6Yptv33Qal5R*~@FtuCCdX1#lsNRF}o zU6@=XC)v+IzP25XBiRIBF5ZX_lg@!|1!k;o;hoR|Mm|V7%NB!=xhscBw_(VKYu)uUuYhkb*=qC=poATC35FJd=r5NPv z2l~>`tY4Z?;?RYc1LJcnoc10cj6&2FBccEC(YJ0tD()tB=cZHcS9u{Z_qVTp?sz4j zXb&#!?A|}yf6#uE_!M$@1n4kBA;Y_W^ZwQkpJ-u{=b zIa720v{urT=u9j~;a+3VlmOV?_io{hUwAF{W9wnVEraODxZ*$ilfQfbm(PDbePeZv z2DIP@GaavZ*7}Lx`t6-Z-;Hv7DDbO}c>Z!*B;(-S(``d3U!<2;*1mM{TC&-eM#)?8 z+8fQj@ZD2CTCL?*>r8Ql4Hz?O&IROUqona1aA{iA3cTTNhn6)1t?-ZDBN#|FA8L8S z?lS;5ve7xIsgEGsyhaNt^pw0z=T`C^i08CA`byj>xGcM)<#eO zXb{pBpot1dn2|C9CSYLH!h@!JW=;eEBu6ZYnSjO=C*2IYf#Qroxo$(nNvAsr6SfHe z2T`^NOehFi*prblWEfjyc>oM|T!WQ#8USYk5Rm}jJ)en!bv+99=KTZcs0SR-8yYqM z>+tRoY)5L8OikgI=y(BoN|98%64d#j(E+^XLiYp?sAtrkH{q=hZ$u{|987Py2t5E} zlOe3GU+}bBxln^Y7ZiQmwo799Mq$fBufhV^L4E?3RXZ%&jJkvQ1tM|g$24d_wctq} zptY*2AG%r#S>DV!G1?l=kX^lSsWz*g6EysW?(!w57DCjmJI^Poi*6)(NM}qAb1U1~ zg{4c%7(^L0SVPAM-G9qHj#{Ug?V-QcOb}+seLZ?L~ttg%`;kMjO7Pa7wLNoun|W4?XdpilwdNB!O-3b)9R&c|F{}Vf>le81wrgte?EV*DAX}b zFKjr|D=0N!m3>H zo-iR_Br0dPFwXhO$tV-=dPFJCSS#tA){(@4vA?r`H8r{|yZsduP1~E>-H9n_lPz^E zmDT|xfrR6!34VMf#oeGo8gt2Osw^c_UeiR!jiih3ubzMy0H3h+pxV0q@Zhky`)Auf z>Q4Lc@HxU2ri89Gz$l?LO^9y3<~s(-WUTLa(y%CZ>uz#b;q01tGLIF{`xG+>M>%Xl z1ye#XXE`?sX0FK|A7aRXr@dt3)Bb%8!O0nqKkY{vzH4sHSoNb);0kf>$d3zl*kjs3 z4?%hgp~nx**q+j=4hJE}7h)QCEFD#wf|a7Zo(SIz=P%q4F<7WmQ{1n<{`MQ^E)5Cu zZQQV&M{pbx0)Fe;8{$iM>4Up%y14tK>ibdiooRmE^_t5|`YSk>Ny}I>`0aN6d#B%7 z7%ldMDQ7kx$j{&?|*wEC0>Eak4z@{Wtc5C)J zz~z<`m>ir31$e!kj^KW1^_aqr8r}L|7w#oqtCLwM@>=T062Ee`qb4x`Sh2UebB+E2?PIFl)d+O22Zrj z>g_^(w>*Rbn4P1u7dx}>H+2v=^PP%eOd7YIH%bE(ae@c(s9PHi`5AskmLkDEJHm$$ z*(-6zcHaOXSVmA3a>MI$gX_K^E!*(98y*(d^tggYw=1Ma>|wMVeE1+KIigpvqSs(X ze%J8?Vrn5el?WB)QDGc9-yc86lJ>3Ayy^1M!)#{3dx3{Q&i1nI?2O#e%?u@-MWo5) zSo};}`~kq%MpBSHToJ+xTHGbyp!M)X`IB!PJ-l;r*q=bS@dxDr0l&w z+zxrdnmu>PLtUUZ+wJ=_Xoe>i67&h4C&PIi^Q(RVCdNexia)~=IvFN$WRcXk zc+~UO21oJJsoVEAc}r&uy*fsvLMlKkwU6r&NOOSLFCTm1-k*;Vo(N%uh-NmT7Lp@C zTk*grL-154~)|mqU0B!(ebO%@txo?b>4mApa@BntAHXq=+9&9u|ir@PnHP5W>WtA! zF{=55nXyMDV5SC)W!N2PjHZ};#}aT54iOHWaSh?>Sl{T)gkhJEdtmmNVi28U+y{HZ zTtE)8%nQ-eo(Xv+N=Te7TpdwWmY5rEURUG30-H61KRs5oWbFd!@?wlIET>Ci{gqh4 zk;+-gKC3i5i_x@9rK#{RR0)GQJJ=skcky`v^^XHKxz^&%F*K?r`-2E|I#b;o9bvWI zv4;t`EYslp5APB7aCJK9zRXI(vmy*6$#;7 zMa(`_+`)l(9r8_0)-N1g+!?Oky~Vww;j8mDd^OteVS5{fYiH;wff&-DLntwn+W|kg z{9eo{wNWuKJ~BHR$H9j|31ekvTxE$B$aux?aqFk2m4MmuMocAZP~HJsf?uI1q%U6| zJIsL|$t)%-lT{NQSZiolFu9SXubmRu*a%Ua(`^Ka9-s4!=XGQ~<8nnuNwqt-O@lp1 zUXS-jfs3Gct{8Q=+^TmZ8`s8%181)85mO-F8~d1s)_b9)G?yjYhCeMmz!_9goQoa0 zW!E(Nb*--C*Sf*nNyo;{qUej`2v~&x2`K>cDk)4$GR1bgKE;pr^GYz>+la(s#|rK* zz!Dimw#NZuiW$STEY&w&PL<4F#Upur5ISwhuveh(;@uZM%d-+(u{CY%YjZ;}M7#ZG zV)doRS4hdR>Fr(1(4^s+(V_v*98dc#aZ!xul(rulAIj0Y!{J|K010MJeU=VS7W1b& z#|PuGUOW_P!^J?RD=ZPLc=DOimS_8K<_sZ7Jqa(6!R0oFNjtviJ z__&^H+8z(%C<9c!=m_OQ%Zqyn!7GveQRv*0Sf)-5(=K{ZRGaET1a}Yfv0m2G(lku! z1%kQ@te7-kad})I;`4ctCHEej&_F_qOf7@ssrnxBbXfR^yL0&bL$|*JQlAZc`t7&p zvZ)I`YwG%q=_{AtdYj6$<98k8YKyUM&T1p{f{n_AwuE;v!kt8;E5TWW4GJgze9H%2 zpeq(#^UhXl$d9|Hm~Ezoat?Lgr7 zOmVky(O2dx<0hcek0$@R74b9Fd?KW{&gaQDt_DJH*-G+`vVHH~!yi1kOU;P+)qF?- zFPj54H{Z6wZoTy2@DuFQuBoql;ac_=L(z1G7E}PxGdqN`_%uB73VVuD6f`plV2iR( zohPPd(Y?r1;7-e+Rp`|0q6VZdpZMzV#a||^{N~cijrK&uzfLJ=Kk8fkM@z4++44m;q;VN~G@H7@ z1G5NpT|XS`vG;bqeucV{nOk6h@)&?;L#ek({bA+3|Kp3-E?s%Ut%aA)s>L)=0rY35 z-yi=$32NQ~1A#OF)mAoRpZS}10sq72t3hsQ`CMvk&1pgZz1H-H0qa`aXr7{Dy+JW& ziu9v8EzC$>VdKsd`ygVxRx6uTG0pkv-S%vUE7D`Zd?m3NW}6SKe`-^8T@)so1h)*#p~`e zBaLO3V3-L7>O-mTlN-vcuezCX*?Mipl>)=SKx;g?Hw;yB5uzJ$4aVYrpPn&JZvb5l z;r(&jTR8v=THhTWfV|QVmu8@{DeWFUdH?pGRP;%~F`9q;pa1MHxF4NtwTjik7_zg8 z?x^+AhO&u@Iua}J*bY; zc6I9R&V$M*zjGS2Y~?ByU(8n64mlj4`d~;JrvbTnI_{pRb>60t@ad}TbxB-ujCea$ z#GL7Qkd5Yg+$%l{WyCG#zDOldid9WCs|y(ok1{|>`GLWXAJJE0)V{n0k{tq#!V`dj zT8JLfyiBFi5^!U1ek3>(cSzd-{dNpkr_gK_pe=y=!Ax)p03N6WG6ZS00bs2?4*;Sy zL%MiIRs>uIJ5g20H-vT28gdW6Xx7O;IRy13Do?gaZ;QbgYM7`OmsA& zSPjq~;I+UgI!pio1~|YFUL8U-0Idn++M!H?;(-OZ0S-KXHh`^Uq2qK&IFZk$xXHsk z#5!zZ0jH{gD^e_Tf_EVg4p@Mq^;3W`HGkn0as2yLM@i?yFfaGu))n9KFzOqRBOfWc}r1BEqC2deW=E-rM46W&LogZ^b6A`?yO0f$TbShGNB! zF|W`MoB`Rx#a$LXPPabaB0fE5b6!exqwmr zrDB*W7srzt@YHwB8*f=+Z%XS6BdD?8Px$*fk%)`p%=jV^eTbBmtGht7jnd+F+j@v= zhv!D4F`89fJ!(F(n^j;HO(sP}a35ke$+7PXZRq0a;+ zFRA^~O58$ois@*9UAbj&JL8EUdYt6F7;*ASZ{jqc7cpi3$6X8>rrp$%O(?MWg5ZiU;-(RtDcx6ba1ER z;pjl%r7DwXV-m7ki~;f8IoaOBGlBV!R~bKCt6_V|ZHruFyhnXV?xLHUZNM!y?5nj* z{CIs&5VkV83T?S?!(r4PzO{}+}DkrTvPER0&CtPAHE zZ?(g?v0ij?ueR=#JWVl%mQgr<9xW063B#!`{Elr+K1>ylL!-PApy3j!B|+fb27K|6 zDJCN)R8c5zM&zs4v;ATRlHD*hLb4(i8RpBDa`pfo<*lPF$)C8L64A=&_%y|?V$c)^(B22;SD_rC=|AnDKKqZ5bkWVFalcrh^{ z-OTkkVAz_I@6RPcb_`?!H0cwVwZ-JAsF@9+q&^D@%={Xky!6G_zx0{u#jmc{yb*Pi(4l_HkfAM_2UEZnr$& z6ofH+7TqgNd@TL~CAi$L%)G&_W0eQ=#Z2z2);go-Ce# zNgzmxF_4*{Kp^08Z{$Ad>u1R&0p`jvo8Y(Jdv?enUN-OErzi=*>p=E_O{I>niFq4V z7Bc&x6UJLW&q}qtS69+!T|o4E9)Q(V77p~|=i^ap^>d$U#^!pyPuce%Ny|)mCW^LFHD1M#VpG zm9XXcn4?ErRD?5Tvs{Qnlip=3;0VnF&slJN7 z-981R0C1qW>!fPv+!XN5fxbRJ))SSX4|Mxd5r9?~lzM$gxi4walGhX1QN|%60(iOv zyfg>`D-vS}JqjlPmWJhl(9Hut0?_iKc%?3aLlt`gtcgUhQ)ZplqJADIV@(p!u>crg z=!&}{Rm(;P01YWc$c!>JfyCiP8b`zl7`aRg=$8i}1>`zL!I4Wf5R*W35)ScQ6#%0F zfZ+0jdJjY(ZKQ(%=oi8m9g*@zW;Os<&Rd`ZtS-v?98@_H#b7Aj<7sMoR0C+F1?T@# z-2O^2@K|_iI>2=yTB|-yJtxNNy~7fhwt1{#;d%bF>{be=?peN2sP(~AAX?rHG4p<& zJn9Pm4@Gl5{n!>i)pA~nWFl0OYTI#Ny!^-?eMLoB=iLIy+_}dt`1{ji2G*a}GoFZo z3RA-#mW*_t17S{{>8K9T*O_e1kl^uSKZ&IJ?5W?QBa;7m`Q-Z)5dq>#%crc~8d#*p-3ABW)<-km&_O4n(GsewiWALAFB{K) z^7WC@Ww*7JS1P`eDD&-UzNQBU3g&I>!tm1WZF3LMad zW!BBNIt>nYwXnjln+Rygu>KC8I-8p+Zw+GnMm`lPH$0P_hv)rQ3-BJV=n!46Xvsl1 zA5o5G2I<-UL6_oHa-xK>gaU&VpM|%&u6g?f;Qa4+*m74EY~Hq;d}VLKejz%1=3ZHM zip|X{__}n`g2?R{9X-{Jgj%9Qnc~1n$GRdO54OjCB{wZ%35#yfhAHc+1X-bR=?)TwWU%7v;o;T!X zrr|G5+o_Dm?VAU4GAK#m2x19bXD|syMsUNox4h?P8S>2B*+tsIGtyYyhYf) zyu`k!6k!}xj2f`+wz#68-L1Y+fyOrUi$I*#tt zkEz#($SKv<5YsO+s`jwefwoC4^yHE!XMFtEFURbLr&8EY9e-4P{7icP#lv#v=FM9_ zczN%rpup(Js=eRPLdTxx!5x2#emDp7PT+jS%!KMc=#>5e{j(QlfwIy1XcC%N^xc57 z0#s_uua)b%G;4!^jrb@Pr>GJwBA4%4-lU=pPbpzUaOve z)p|AZne;I+jvgOwym{-YF?90W`Hkf=-A+!1gM9!vm`~<{6eT+SJpI}$U-`w~_(~2~ zPo7toW4XO_0OgaTBj#}2Y1Rj+r7J%@=PFFSu33N=L2h9D`Q6Q*xE1m&EYK2&Gp*4u zI#9qLy#Iqg`nTU98cK)lUswzLc43uHWR8kz;rY{_4C2$v^AeoF@I<3{+gpr&a`wQN zn|oJ2D#QFyi_E=AeEskKy>G~S%|_v-)i^mjx)wbD&9fhdLB0-Zlj_hnmuh<=-6ot3 zm9!Co#;%&13$5-@MbNOzcGn#a(+eXN?r>p z+5=ulPuklC=X(=#?#vVGqOw=soy>dmmu5WexO99JG<+}cme-3!L1*K^dx~!oX)CN0 z=JCcji^AiT^~uo&=tIXq2sp?=!REkK6>-=#-y0b?W3Z|IRVu6YWEi0K(>2~+Z_qYWC8f6_K(CAV;p;}jvb;Y{UzL9ym*ym z8{aMfNkF#0;Zws?Q)BcirfAu{w6K1c>A2;lzZ34TF!pI_>(3uAW$;e^M)H(BKk|Fs zod?~W#q2L71|-=}fvHK4E}GsM3TQGrl>}@6>`nR}8eV(wXUEK)27=zmrECdu$1cN~ zzal~+Cg|Y_=vvq2PAW%NCxe>*0+l~uU-q?20Rn|hBmjS?JiDp7T^L`B2FH()qMLdH zW1Z82^^zZ*<0kG8nTv31EV?vwdoq4|NOo5z=ivr{MmA403ZF!RUp|7qc4h4Pkg#a4 z`+~03^8~8;BPF%KuVVGq5QX#Y+5S1N`$(wBg&zo|qNA!V|^wV60OJ+`X5bUy+ zxHlFIYO2TdX9J&D#>@;|5z-5BTOEJ*=xI8zQ1E^bJy;*btL&L8y0BrYe%J77fA7Bk z%NfQMccVwUp~jmKd%}=3<3*|V%MEhP%c{@a5@roD9GBPEy2I1GQ{xK>{2NoPE4|#w z#gSXmK=xXg>KGqYE4SD$lH+|H4XMG6pnG+)^T-!D*U|5ct5 zopRoP^=V_zy&R#2J=EB2X;(UBHW(h)&ijv#rUJbKlfsdhU~be~ulpSl^Fag{MH^WA ziM^W4?&QkpjxO?{IzAvof?j~%XgEc$U8}nMdD!LMfymglHU&q7d91;XdD}6b*>*|k zrxSB*FA2Mz!PCBjQPXp5jNu|vni=Hnup5M1#;BH0yxCHTQS>Bem{?w1w&`-OEK}xn z^u_zeo6=w$a7Vc?Yc!GeA(B5AZPpL07>2nanel79osRBsg4^NIJ%i#?2}G?sPH3X2 zz*~e3Zx4L@Fmo6E^JDPftax5dodtSJoOL`h_-1SHM~M|?`jmVoZlja6nl9Cdjg@~l zI+SEp%GQ6B!tX}zYE3jbz|wvKb!Tvzxik1E&m@MerzaHd)l;gBRbi+)21$arLYX}D4>9I~v})rvHdaj0^-GAG?0c<)X}tv#U=o7F;x z*`nVa?{r8Du}5Z|1T!3RIvvx`8vbW;cLh3%C)3Q)p$@eD^{{)p5ClXAi~*o2F-gaJ z8xQ%{=wNc$LBuh{V(u*cp676>$YD;fS~y1X5vC<a(@Gi+5yb?)t($E~3!g>|3&edUfRLR|&spMN4>{4m62fr|niz#px^H)I3^ zn-f5Dr7}oIBPMI29%)o!xGZA<04y*p*`aK6;GG=+ksDwj00002T5-u<)b|8TRs(Tg zZ&wLA3UE=tuK?5mJ*cXfp(F)BDa8i?JZ)kCl}&)57GOa{goUNvNcX!j0$Wz}5(d&3 zO^1L9JOF?}bL5RgN%#({F+MRYMKY+y5p`B5aAdr#zUb&6DqYKTKuE@h9^?yivt^cXFr^ER~4o_L%*IuZQd zPD2+&XWdnim`nva^q6SCi9jK$KOWV__>2doBn` zg*5Ip+PH&e>7LZ>cXIbpv% z%~_97D{orbfo0HPPD7QU?wPeqK+*seqA*(Q-wydT zY2>jyqTZGyMC7R}m27qDHr8Kp+|tg-3e)WtA6P0iTeVow6YAu;%9c3IgrA4%i!Icd zDH!{+6SwMz7R1o0&BNYZqG#&7X&0lZXnQh*C0H?_xicH2V9=n47vSXsv)D^a^7r7- zv|IHNLj4RglcgzR?UA^xsJ?Ve7aJ5E&bs!GM7cFgExR(F^633nbWeLcL!*Z$%K0g) z-3ZC7!D-=69&`p_0tN*CAY?AJY-Iugw* zO{|B?9Uy0inFFY|f*A8*{Lf z6|7@>hF1aLoJ51@@YsaCsO4$C+}j&)v{2d25TtF>SWS7TD{nAA{NDFh%X9URFKcF1 z$8awM*XbvRrCWl#L)vvGJ#lqf`=zCj^3Z0lvaf6mLx!}@9Z)j4Z) zsA4xQkIh-@mDG3r8==#;oIDJ$yJvXPK}Dd|Pq$pl$yZ1)@p-dL+h>2Zvw8aJ++y7>X{C6l-d)VOY7U?L8t|K+Q1#IdaO&g z=FTY@SM}f%pWS(Q*Ux9po_V={{PKM5ms)PRS=$>ZN9W&so!M~V%h?zyITa-w zQe137=nOrjLIHY42Y=c>xjE)q2EA8&;18|9N+;r!o=ERBS$ejUbQj|EQ8L7Hv1Nz&qcWid+!+WIFTJFaOrcXRiX`yXo#{g#e7W zwC~@0ss%^Se&0DHZ~w=y{;W9Dxd<(7bnHNodGu$8Wv=~O*Oof2DNz?N-W3Pn$-Te+ z%HREy8ujoZ@u@ezGBfTc*F^nKfAn6X+Pi*b`WND=gc^^Rmu`PjiC0>S?vB2V9dtUk zeohqTRvX{hd%x0-WPQJ~{PtA5a$jnnv#vjbC(pn0v`q+wORKN(ht?a(HA@mbZn$!N^u; z^jbL4Q;p-fexlZIR)H&wH7q*Njfwn7nL)hKE6X&;@Xh0IowDk2Nr`#2h8H+gbm0Uy z;*|NU#{r`mloK9u!wmSna7c)BJE(-#a2|$gwMwg>~_2ao!ck4-737WA9 z-XL1WYo=KP;X+7qMyZCo%yor@sbyEJ+q~C08EeWaemx4iFD+xn=@KzPnHeSk?mUU9&%gDw<5OGe z*}qc0|G)h18u%5Q?3`GsDP%G{;}G6)xqATSHV(+h9EbvG8#wV6iz!7jqoMK~2LoFI zlhGb;dF^Rn=n)OCjX>ORK&}m*g~5fM*=wTwQWFC_uxNmQ9w^tUsny7dC`_@42ynJe zffxXt3J4~_Q9+x@HIJ-h+H#Ec7*0qgh(5ENISsWa06`$CgOUgml!Doq)JK^lI2@J! z@PFHW_xHbe2y8eCY#B6D8q)wVt)uUh=Le}VR9B(@WEgx&N0qu zt49)moXX(471@PW_7q7;5w#JBHnpz(1Pb|Teg4|h0lFsK-{gqI2bEYt>iKfGr|xZ- z+D5l!&G`9URwk5-mD>-q>qCLJ8ZW7ZG78^7&0Uy z*~+oVEnTy_@-_dHC*t)va??YG8YTn9dKJo=jN7Zt8Hw?3!OAsVX=t-u`fxGpAUpcA zS9TdVZ#y11)Rmw4Leb+-82)@T3%g1ro&&H_9O6oe$8#HSdv2kiW*69J{WIp=8WLgU+}YtUo_pxYQT1 znNe;WM@h5OsXrct&U?useI&JJre|9)X3l6)^ZKaJU0L(Rd$oQqB}}UJ@@fC59>u80 zvFol?LHRh_z>E&9%(l`X3mYdF0sFN7D8d0pXUzzGx@$10iR}&##1n*rY-O5`b{wuA zI3rMZ^nl!gkj3-u$C!WkWMwUX*ls#@L29N_feqQ13*=vp{$|cEvar}J8FULR)2aBZ&NGB)yz{0&KxZeN5U@S<> zu25ejzsPKr2Ql2|$pDH3H2}^4SQ&I3B+&1_70;%uCT#L8sU7Yhg^?Dx7;*{b#6RHf zcVA#8F?(a74yu{k-iM{%X**={=WqV+zy0fH)&B6Xg(~H454!e~7r*&}L_ha8B3Uzs6qUpxv=bF=4gx#`Cu@bCZL=ej?vyzoesQr`ndLldVL zeE)j!GwsUu~@Tb#Tds_YJ5i_YS#3_F1!ob{n^NO38%Y5xe_jz!%J8ZAK z_V#N|6GMI@=H-W1`ZwJAfJ4WM@jshzY-)CH;Z2#TFGnx^=;`sRpYk4#J+EZS`+YV` z@Ae|gxCCx@Cusw**Ib?7yNz8(qqT8`sv&bz$WcIOl%px?*M9ZqiXP~j-~P9?O8b}p z<*)RQBvJXFn|A zb6$jw_Q+x5QQElb4eY+G^!>!g;sEKR%18QyS1-Wm6P8~dJzkssa`WIvhd7^&OOuF~ zye|MQ!Dvz-J36wpOnN`jc^v}>O{kylPZ?j-U;W9~X3TT@bs0Y0{lOne)kN{N?e%xR zeedPjfATZ0r|O%l9&JZuDTkFNi3VZ#Yef51P9RI%OJZS24cnvlR%_(ZWWduH-^(Io zO_id$)Hz+uWW{hXFkw1?6LG2+e3)_kw6lfvB9K)Um@vVjI%B~T3;^#FjnJVzdq@fp ze-SvFqmXAM#mRhFdd`WwVu8`KVQ68%vS9#@F<&x!X4LV{0o(95@Y680%oMkw8Hi#H zD_&6$p-~^{;Wz7v{a~*@gk=RWJPIAARm?)92{LtcByGr`qINtxmyy?AYDX;?-Lv-Q z40nAnNfU7)+>I>UJ-&C5a`x7^wL2x?!BaK%IJ}X3{HlAUYx*7D93NfovGYFA4hYwL z`3FPhv)ISq83=ve7V9GFcpSeOHpnIdTSO#Ush(yeKA4*5JH9^X&y+e%4^$V5DtEyT zA9)gM{K_gd_>1>7cd~WV^NDaP(4qpv?$G^;1yH=V3H$O-Oc2uhcda%5hqdFM+E_`S z1HH(Ue+*Q9kSQY|#S~~HhsEbW2*|@0o)33vhSYpnZU{gXoGw``i7?6nh&wQVG^x`9 zg9IF<+Av_$3kqqYsF)SpAkqK;?*LB_U=qHXu)!eDddwC?u)WTa6NE4jO)5JjYMPU( zgS3kUZL^ki`N0sCJgsFt7U&8f3Wl&_;C$Q*6dA!tlt}{MfLSDN#s2FE*Z>B0!<|JY zSgKD~`+gh1e0=P$Ndt^Tc#l|{M7?NZAVx{MV7FZrL`cI2kP!i)rpCiXAYm5cL09NZ z=bEO&-GUm4@%>RHj0BIQ$s04f)+smBV*?>}XiaDIy}MrvouBDQJt;ADQj`RJGM^h( zza$8y3+_RUXWYH6OuHDG1?`R)#aF?g{-X>Y>+$tGkx`)=JFm`E+eRv>;x^U4(d6G9vatCg>_TaM7SUB7Pbsh` zozrk*xmS;ctv3q33OeuU>#83{u1Tr?=s&o)pYkt?!;Y|tAVLet+T&Z2V+zI{6pniX-Q=;0G>IGrH- zg8_RSBuPT)&WFww(IaTHZ$@=&a@G;QkP~0hl1qW$+Qpa1os^x&8gC8#m3=+D9=+4+ zB?8huw;H=sCYHSGBkkdoAs!@r<|JIbX!WGobLMkwur7s9RnTgUp`@9d2MtixlnJ^= zLWjY^m2q=g^PY4nmve-xGMc!DlgX4_zpF%V;I-5qQ>F;zT#Pts2cM4W)LNY`yWfK> zk08ti=JoamWaOL$A56i%oqi3H;xqnGx!0S?p5N44%izWLcIu6@@uwtWcRrsKPA3l8 zdC>}b@;mhXFEZSpO~OqY@#wh@ZsK%q_w%hQ!V3%4j&y2 zv;jD;aIIKhZ-%oSuX}2*8eSBp$HpL(n7Rb;b^rtc@J?btfM>i0VhU~Gb)VIGfy)qU z`vDA^oRXKPHc9h_%LE_Tqcx?clvtS+6qQ~=qCpI)_(dWCPn6oF6 zoTO)nl7mdi@X+EJna`*SFa&@H!+7Pn!!yY+szM-08l?!p6kmAB=JkR5}$o-Wmxh!Hv&G7w|V+T=5?@Ufzw0d>0cQaUpx2X zSC&mitO0rLYQP=QQV;Sd3`qJ+CfWYJu@qA%(pA6@)d!Fk^!!*)|3v7X*3rdT{Z6s^ zUh$#N6*!>cZ#vqaKifj0%qzaHe&@p7z1`;tkWC39Hgxs)GX zyCql8O2x?17( z1cb_)kN%&3@edA0u=d*2ufPkw@K`)@I1Zc)Z>KTi%Tw2;&k^@_V`rQ14(s6NY|h^= zY!sUxq4f(h9&Ea2))28=a=R;zkin=1QaKB*gQ-C>co}2<+VGHtbuC>&35ug+({<6QoZ`8g8YjO z@d@Z3J+WQ>&wAaiQNh{rGZvS@6n?`0-c=>Rv!}{oN=sPs*ZGo2kt z46vl=ssn#Z8mU=_pbpuJE46#RI>B}0$qOnTQ7}X{+TmW@DRIw3>Qu{nd@(E@Kf7yF z%O1?u(p_ioykKr~6Mwhf6>wrI8R!ne858LpzQ_W^j)AgaA z-kUtFORO|c!DgA(e#1;QLfgi0_o(nWY4$dIdEi4k;kBp+^i&y=MBAT?C__G?9-cG@ z3GjyN^zd~f^$l5keE})C^!FaW`_=Tnbo@=ob^!x8N748SsOEHkmvO005R9lS7@ARn z1BwRn?qMJC;@3#jgZvg^D4@#0dEEl6TR)rv353Wkh?L6P#8hNjsd5n0u z;Bl{yK5Q5%>%wW@asyYt-jGq4R}=e%v8NwI^seP*)R|H6(YVH?ld;K-l=~P_$eG)0 zp zabrz2NAh;me$7LcAPI{4%LcxL$o%lsPG2OH7dkqLb3P~Pp^pY=%czEo8=x@Y)&s9^ zV^zCXip&Q)HJ1c614iDCqB1rnM;<8Qt$b&|+!_v!sv6XdQ+`-Jf}0CTG}l6p9cLOr z+j`09d8C=4KJv5@6==c$oq~yuXPr<>Gqda`@Hj6up`i$SwEiHbguL_5ttWmm9ln?# zMx1I>1E=jwvT|bB*e(x?^URI2dFE|szt%I`X#IF5?RLUWi6T%*9vgPa3c*yrCWdfQR9t=@m=yNL5EG3fD z@B`12w>~k!<%dey630zX?T2QKa`-tQgprhSnYq>Pnn0!7h=TVnsl9}R3+ylA}0cck8h(DZA;1Cfi)jdWCi(IGKWGYfx=y;i z|B)XrZ?;U9rTx3%--rLN(l@?dya-yC5}$ssd~f&8wO{{x-sybO_riDI{x>Io{4;BL zP%?+)U@n|~e6XFt>xcV4x*eYW@Thprzu0G&?)TfTq4H?{DBwLe+#3%bijThvxBlSX zTbFs0QK4rg%a#4!)4kX^^k=?!Q^mV(GywWdY41VEyX3x=?HKhSh$ot^h8M6h0GBiStjp(hSxIO%5JNDybd7qY4$=Hy_H?%e+@%GL? z{IkCw{KAs(FaD^8*-qEf%rwH`3*@0b!u*L>w4aT=^&@EUX80&8uQz)j9xz%B`Qw*I zFWSd%{>z`ETp@2_B@Dp83w`qs)L7<8_Q(G3)gEDj0IQu4WN3GP^LKyycRJg(z;gJF z)Y~r78dTA?)&I*s{?3_&?eHpA(y;m}Tb6?d{*E3?gmzNlV` zRVy!7BVhv0_1$#(Y565FfBu!meg&K}eUQaTQn7hxinorw^vP-AFi1>arX;1m6WO~8 zMZf&GW8fbgJuDatY?%Zcb(wAapxN6Fjf3Bx>xbNNcJruBoDXnPA>vo+2Z$|W6IPF% z27{|x>F~_+7rv-(3Cr|G+d_LeJD<&y9*Vzmwez%M@++#b1vjYV7Ldp?LD2|tWI*2Yn1A?k+k zOYEcP#ZXfxDGQtL*{PAgi(#wHS2jCkqJV(WX)c2H;KodytD?&vcL>g9E_LxXGv|pE zS5;+SD7ERei|Ha$%e=Jq>GaE7)O}a=u9irF-$}+9ZI23ny72rq8O?hb?SXwV$K9&R z8b!tDUjjA!tgq6i-;%aiE${5@jxKA8?vZL}xUYBPho+L7%a-?^ai5#T{tVxjb2F1o zVk8zmN`2#qxsGKgftIVFAO~cWwE*BE;P^$~z2;A7VVfM4C4hT;I?y4|;zkU0 zl~VyJD_u9xTdhDi^MWE)YnX3JK>%_12rw%E;I7of=yEgRR5MD~8l)!tz4R`d$jdWCPcZo0e!bttplXS-Q zCsD5{nL!50HFfJu7B6wuE?mJM_F*>!uZ&|5 z^YPcgrCWudLIWLxdh>|=@s$47p~tIoXXr+zk&reIR&nJZ&y#Eizscm36_jlEN;8F_ z3VZ4tyr=|A3ez_x5fmQ#j@{DIKn zA!EJEt_5SNr*vNN?V?=Yjt_8Qx~-A4sC!6%A!u(_jUa37LRLavlxeyqI~*|>+bU)f z_}MlAAiuqKFJU@ucmapiq;>lAWIgV6`9l6#wSWZ9My}|3PZ>iO3W{nHlaUreZjUO7 zZ!poXq7R;x(c(qq)u#M#5R5G@VUM;4$D`gV<%$vG&HyW7WJ$*p%_s@WhKD|wXs#X^ zMOcukR{MDi-GV|uD)#~j)@)TZ4JJ&n=ftv!JDyn#^ans8Vg%c;PM$E>l1E(fPvw5AmR`K{nQ3O^ zpOn=NYn_=b#X9UL_P5%-0XAt)7WLA`AU+MR+wwqFp#9);vc)7>e6ONqUwQKGJ!lby z!y!fwu&cwhmc1mO`w&7fN2GkD<&o!01mbrc-e1aGA2)4cjzpfZ(^=)&fFQI+qRL2_ zxs-436K--jL!G!>ylRNbYoWYRKaS6*I<@lLpzlci0graiq=2GQ{WNR`>Om`=-*8Hc z75Fryqe=d|ZfjaM__1gj=J95<8|!`AjScn$j~_^YlwGka&imhHy|I%F6{#W|KaX;JKQU&0hspx%#F*{i>ct< z#PL<_vR1RuZ#)8Klza3Yb8xW#U!EGs+`2Tjd29H5@Q0-zqL^Q^ar-J}%_gl{xAgpL z-+K_e+cSPJ_rqEDnsdDIeq3>uIUrx2dM)VO@$zB~k+F7VZT1Er;>i_j$YkKpS zXJ^I0#7bcr(Bx3?2fGl+C{E1<5w34l!5gTSAg~ut4K+F1+JagWT^diB=RnAWvo7`a zGyLxKVC2l7UkodEeprxqTt4vX>XnB9`=46*_2YN``_uoIM0sZ~{|5X$=JDivFyA&~ z`SXh|QmO}JoZa$fJs<$`3Yp)Lkq1WGkw%hsJEOU4ADp4DBk5ozaPEBo&lUkK>fyze(viu4E-~XHwf8z5p7p0@rv26v z4S#bk)Fvi*9KL#U-u+ew{)o6KF@HMLEOq4a$s9r9;zavybF4P4Zv|^1`uOVN+-a`=l6_5j#29f1Z#y{flp<#5Bgtc z?A%C+vE$=tC*57B1rZ5t%<{qhXe>tSD3enOd)$#1tm@OGF&VgKHPfGXE*?gli>q{? zb?JTgecMg0)|^72IiQljOoxz9D7vpYtASzRRS(5k5I`*8?MHwe_Ok#6NTO`@F?-xm!OURT zKtRU{g)<`2cLpK7bE3FW6vUOWbOpfAfn-I!iId?_;?zLce16;ly*I{z565WK)flD{ z=DimzO;B$w7LV1$$P!%E<3ndIAj>Ej>iN2tc2JS-`){O5#e+38T$Sy<3nLo#u|r&+ zed{S)8=@nH&McZ^!genO_lrDl6k)d?vPAz%c<*RvEnxaZ@35$2)Owd8N`mgyyIzZ6 z)6Z0TD&!Xq>r504ABf6%w>mjp84%TEaY~X8rV`<&-BBjyeBLqss>i27q|F9A2&hcp z;O2>GYW9I|{rvHs8wU+`VX-2&g9Q_Q&E2;Vp6$8kO=>VJcptUk#dND%%fmn3r}ulI z5^}IGBZ`s^Xm4o+6* zI%Og6LRXu_k~3!-J{#Fb)1j%&^?E z6D2X^37-0Es*uQdHYOfIm*Oj|;vUPkI)0jU{5Az|cJ&;DgU{5(PIQpQ~JCEl~Nm+#Djv8k84um9!{S7$1*d^}6 z&vN;6#6I4|^h{!nM4E%crdtpuv#LKh%@_95@zirvCI`zor2jO?dg8)RRFm5^ZGCQ) zEDNSJ$TUmsxs@YxI0l0YimAazEEH z!|O z$)F6H?$O%7`($`Tu`!Zz+Z&3#B)#lOGkl?>twgUpYaUHikcMZN8m9uUv{NH+jEvi) z$43Him(jU?zbev#1(0&xWaB>PxHg+kXf|Ga5;Q_>pIwS#6*=Tq2t*QmUyx2mASwq?99?oW)Bi(mZTO@ct}xXddgsKYOryYt^0 zPJx{9YX8#;*PUd3`R{yD80>D0YlI=T%+Jkny>`dR72o`Y&%8Z#wBxNc-K*{s>Xq@X zL&Unb0MkcKwt4ztvYq@y?|-dLpB-Z1{kEFt&g5on&jBHn@fP zg2x^903c`-yLEKupzybT;pPTL`&8Fo1rL9%hK(M+Y{OEs^TKQ0gJQ`8vs$$ zx@q25T+<2dp-+S%Va;Z5H6srf0M|!0uCK*k3PlAe!ra|%bdlXO`3YeGZe z2Y`X}N=uzYy3tm4%Icp+gg&n_BSNh0Pra3TWw_X#dOpC7udZFn`P)5TqW=h9h$iOV zHoM&xg&22RbA_T^L)oX z+#gP~-Ny5%`xzq+I>UqUNpK0neJA)>Rwh$+i(!%5g5nXv%Me=N(`U%Zzyn?1ZY!CH z{Ujh=+%DFVp$c9}74>r1P^4Qf%0WXX%|>G#&flq>ac*x$U92O(g+hbW>P5Qt{?pIJ zRyr?VBxt+iL?)Xx*uxPV73rA&?rVQ}@hfw;g41Kl@100!qg{=JT?r~!0tODXIbuwJ zJ^(39KJX$zx1MwpKxDGVmU|ijrZ0&Cq&FFModNH{QrUzGEMel*+`V~TKIEMlz}dh< zcfDh8Ift^Sqfn4L0}guQXpEajx*`bKUZ_Bh%8_b~2p=}#WE^emm$+!cgH=N@=}e#X z)GZ<=uf8lj2}I&Yg@}Lk;{v)zr$j4ar zX0S9S&fSOT6)`$21QXsu$LQ1>$ty{=Nr+If(l_`h3O|q%k!g)@IAoJr=@cuLuRc)K z?f#7SpzThZU8C?Cf9d_Rp{cnd+5W~(YT7sx>JyBoPie**qK&RGp)a^7_=SSjp^g28CIoID7MN#Q;wVO2F9 zTd=?(Ghz=W+8b1<>4W(kZ5VHt@M^vy2(k9)V+yR;uB^@&!U!D)$LUJIy|54NM$AdW zK*|oryWxO%49T;zQ)yW@CMRgXqfEr}O#hxX@A39csEz5Oatd{uo@LFbtZ$y3UeS6@u5!b;8}w+GW~0p)+A7mKkY9PE63z6 zGWpaD-(i^dpG?-TV&~tE&J{5_t3!da65ub^3dk~dCZ4nV{hn{u-)xnPUKL(|0WuYs ztM6O$eh#7Wz{t4#CRZF!IC4tIK|vlugNfg(auCrCU$MZ(7~Dc9Zq}H7mT0e(EmuB> zj|K!4hn0FJx~c;1@mw-J+zY4LFBx}Ts)aSaB>*11)uBatwB|Xt_EkoufCGRcfv~*u&S!t>8^8Y_f(awk zXCi^{{+;3>Ctm!Abng23bz;$E3fgYs`r_f)pS_?y`#V4G)3D$CNmec+R?3TjVNutY zE?#XtZhrlf4{m9$tfT?&)x8(SjXVZav`k(2N4+2r)kMtU)EXc?s5eu!jM+Zh!Q;4x zWdT(o{EJ`!0>_b9lwhZXbpPS~#=+jB)al>4ay3O-Kl)FLa(t|hifHHgXaDKH)r+OS z_^0{EuIwy}=az;lAjPCtudi z{iArHOD1DEJGM}n3`N1!-+Suw6Pt9{zet5AO%cE|IxxjYsThI6Xx|Gw=m-FKbTJTAwJC_1P0QG5tFv8+QW-#7n z{ND9!T1Zy(?Lpob>%(H4t#|YUb+yIc^v*VC0S1*qn@k6y(7v~g<(oY>l|Ft^Dz9mu zPO{Dp@u&IYfqss1go1c#nZb`AJsL}!GDoE@t24 z$W$m$59NL_nIC@U(ZiSXRAhVq!TH3f@6vb6cxFbDRo;P>&J-NTtUqrl^Yezb*9d-U zmsz>#Gy;*CQ{&%%@qhWA{jHn!b=mgg&Cbm3?9Pr>60$4l5E2Nug9GVte0ZS8g9ir) zghvQ@X|;Y=>t@IHO!u_!sxEJ`(t977k>N7z{qervaEUn|xDv?@6SDL8%FUzKCz~g( ziFy&i8b{DDxO~Z(h-&-KY$0dw!(*`8N9N{cADRhYySEB;u}BQt%iQuyw?Qf3c2&Cr zKM^1sUJbg)C?vEPY7sL%_>mR4c=aGUfP;d6Z%v&lz5~8;Wn$KnJ^em6G6dgtBuQT@ zjq`ELDWeyd_zNhpZz8mMUVSo(&bsT*FwUoTP=odJ2==_}(kd4Z{^URX*Xcih^{wfd zS;@4Q<|gpj9KQgC078NuK(h6z;)kYyaytx1pKST0Fzs{&v@vk-CF`2@nVX1^T6w zZyo?jMqnQpYQT+mm2(fjimrnuXhxxkUJKOcYH8bM6UtbC;B6NZmF z-cD`0v|KMy2KFOh1U!LfTf+~ikyBOO5pG0$l89ev-wlQw&C|J7wvE&8+99@gO4&?% zTm~Cy5^Ij6C6{X#l2&=>6kNL5tmT*Y?0(>6K_O3&xH2t!x;ub-ZgTcsn0g6Mmx?(q z1ZfxeTo%8BPl8fJ2u-+;hxnU8>Ucc^-GA79cH>y>URnf@rNb8VQc!Fg-Xz8r&#`F4 zE1$F&KqwQ=9ymSPbQ7!0 zu-fU3MzDUXPG{Ug+_97M3RDFfQs;|LU(7pBXlOC1IQa45eDYcf8CMd% zM&B_{vuB5lnXK5#Wmt*Ww3Xx3;ePGf*(F zt=8`?l(Nu_^aB|C>>ypsa${{g5mK+UkDkg-1tM>pb$}k%=oLJnAk@z8-@o%O&dd>Kc7N8nKzTcb*!HTl`yuu6%Y|OwWvMpUz=dFjZm=^%u zs}_j&Byf|4w%meJYe!)7KaiDYw8a!(&Ih~&d5R4nS7+9EM+l8lXL))cJN|JkDtpT)N>BjWCl%RDm}4OG(G ztux`aEsIuej9d!(`ssmwo<-xDc!XFk{;A?Qi6t|M`5+`taTVJjlIkq<@9B zR;`<>r;L@#MBn|!f0Y8zW&g}yKE0*LXMWMGLJ@Uxe&Wfp|6%b9pJl)O;|n_dqIv_3 z*`P#2+!*Y9`W4A@Y5tj)`%@5df4?=D|3R-EMe?&(92p1Qfw47(%(%gh-u>M__~**$ z({#*nvvqa4nL%J?j#c?SYy^(X}&vN_j%59=TGai ztFOz^^5*LGzyf^z()N#}O7{63xVF}54b$q8KIqclOBQ@CqkRm>q}M@y>_q> zTJBbE4edI8#cqAq7eJOY;Ucb7oZh&ko?d=wu{uq!e{b)Br@m8xnZ10W12fl|@m@Qv zjAiTVw?05z%N)5AYBQgJ1R1d|o2$TrJuYJ`U;gV~dj0Rb+W*SlgZujqNOogVx9``7 zT^I6vtv6%`5j%afTm)oF#)#!30@fG74|CZq*I_NsH}AsFevw)I;*9rxOKCaY?O;A6 zC+hRqcDG)0_olmjED-6c+<;W!0D=f(CKJ1$Cj-{7ZJk^W`PE=^7rF*km%yHKw|o$@}_79OrhPmi23hr?{ly5+87Ok&vV4 zc1PVJ`hrumV87rYt9_o_b6cdmeExMvw_3W=~GcJP_$vR zo$dQD9{~UZ5GV#PfCw;f6iK%V2*2qOvCe2%an%$w$U4fT_hZ`8Lv zKBx}@?uO~eQ^lvBw=F{ zjIlxsGtvXXT$h7iV)$s`p;@+YB;r4kQ)kt9^J@tEL;(|hm*I$9&` zhRbmH_?0%TG@=iE#0|CX$Qd*ghKCI@KDXd_pdmAe1y8(VdNNu$*IM&)+4|wB<9U*t zcnChx^)Jg)UhPQ{_eI5z1W(!a+y^oFOM^osnx1U=ZF6TILaz3q3$M%VXXrj9FsEx& zw6{BCuM9)QXpsyVlBS2%Sbut->t@?-s6C`~P7Pc@)iA9j`^H@l?yKw~%7`3qxwL_A zF*Its+dU#Ks@$U_;^n(8l6Y*?-iTj0>YpD7%x1Q1pCzK{gVX+&nJ$Zmc<{46qnU!` z;cTl_T`&XegrD;}a{ZctE%{Q`RIeNMW$(0M%f|zIdr>k!x(nu(XPtef4xL^idwE@A zL(YlyD}JxcunCUwt*328f)H+u`nUu*B-NE1xb!o1fMf^&Pyps{f_X5s&`lV&yq;w9 zG4Na_##SFETxo4a$G}2d8iFaV^&3J0k(=JG{1%wKo~&I8C|JV*781Y>%Fn*c0ucAN zoM3eyfExg`?A@?qvkdqdrdOWOiG@j95@BcLT^(Fw@FW0$0RT8ZDdBT&3`IfblbG?6 z#*QKH6v#R;@POt6kivnzc~oz%oL%f8GwT##eUS4Qqi+nMHAz40e1n5;5KrA5XAcP* zM-LTGI=}x->eAN9U4Pc`@xjz!J*nlPRJ8IN-rJq_xFSd(PNe=E=!XHZy}i-Mbsiojhs(gW}Qa*IbttY~`Zdy0Z~` znchluA9WvM?ze&bG``yUHTpx>=UP245zzJ3;8tfX-}|%DHy;0M=O8s>A9q*T^K*Pf zAGZ@ver~th^l+iN^DZ~N^+cTU{bTue=0CoqJUnP>^wG?bc<=7a#Z>m|vd)y&`}Vu>Rn0|A*tR-kIBU`@tgoTmT&n-)pLK zXT@1{_Qsb++q0Yht#+6o8j$Wt2AI2&PI5_r<`=uAr=&-}F}~g?-=tE<%_5dU<4~LE zG-E=nh7K?Jt)^3V7sXLDIXP|+dSge5`0$BiwT&KNq5uULBwAQ`?fuS(895K=ELW;M6qvgL88{qJD! zAN}1W=eH6L=uXg2y*nA!>do#Msi(W`;Di_wQpgKuYT5~mP)D#MwzSPwj+-T^sKbP) zy8Rly^H72Z?#&p{{Jo2v+xI;RDp9D^`g6)qZlqS`28Va6R&v9GQXbS06MFO{`BAUG z5@kAscIpz|;(YH8^;xTbyOW%f1tSSJmKfAWW*o6K@o{mqAXA;o*M1$g3s=jZs*$Uq zWOkG4Jg7ZdTgi1~H8&c7$t1;sTZl%ZD(fJMh0w}j?NRdjy!EdBH0t*GYvYvZtR;`U z2&Fk(*aZpCK$oMDJT|0}np(IqTG&}u9~=@a0&Y4YbhO|$59iOc)BHAT+@+q8tGmmS z7xkHS-1A(2G^sn|I-Hb?n>u1*bQol-U6Um+5uW6WF>8S4cq3skT(R3 zk&5~ytL~jdpov82F4C*lf!IdfNdVr7PPPF^K(xn(1Yh3-p_=aYn;jQ<1`1buGx{q`9zYzzK=jEnz}OZKLVW;>x{_**L~lf_jaVw|il_j9 zBE-hWs^Gi=utyEbkGC9EgEoUFUB!u~b&E&Eh8o3;1`N^;Fj8or7#G07A_Yz>37Z9^?Bt#LS$|oFyB0xaK&Uj+etX zI_vyoXuc$wWyfW6dUr-%%R4)eeQ8gq2d6?DUm}5ukIO3o!*-_nGBPQv0CM9Wf2-w) zQ0y^1{Jh?q0Ji04dwcRSO`OpMK6G6!>{DKED27*tf6P@c!@+XSDMs{uIPC6J!MtnG zYWtHBlROmyNo+o!Y^Z2D&_A7X&UU$Wh(^5AVpUk#n}=f|KhYtsO7MYvHep=EjA2pJ zYZp|XS{3j?7-9X&ASYXH8EiuR0#&oTGQ)ZAL51be886wJY;#|q^Q$K>Jkj- zhx)~un(X=!P=1=3%RCwvM}^+yYwNr5r_S06Ct~A(r#X86Cyk}2=bo5PAH)5rW5c%O z2s*Yws2Lt+lJfO*=h8T%AK$=O`+1U6N$YaHgayuBp3> zR^`+pqeG`P7%QANK4g9%c=4?Y+#^j8^*E)R>tXLbYuBIibk;7g@-r9A!8L|p^Qlkj z@3!&9{uw?W+;4fduF0Kmoc)eOGxU#IiyBhPcwJq(&2?_9Ueg{Nme#i-=MDTwy>+R) zWOnz8b@WHj$7}l@jI;EhhC&`L!8|BEOCFYyxxYw0cHP;RbfDr*Cz{Y~XR`4_@WRzg z$G3-Pw_11OpE8Z_zrHoBe9Zo-Xm}VnCEE}redFujyY}Eyr;G5>D{W4BTWFoWta)}N z;b)RlMbejw9YY@{Mf+2f3YMP~D_*+00aM418-aGr%2gp}jS_Gd-}%%UxTJD_B@w!u z>D=B6?)X>^JlXvLRzI$;zm@o%KYv{DoQ^)csL#eh=qYwOE4j~SHLC0{s0%~kz2Y}# zimUmp{9+WP@^^a$nO)reB{349$ET$+_wK{n*IlxnT7c^Q-~O35uUz15m+4jS9qg|N zWNq<_FTe8lR}?KdI&Wp;bCJ(RNgxqG_+GP`$tlVIV}tbK0_fZ?JS_gNnb*E7Z2q-b zuoiX#01RBc0RF}9Fa587eCz&qI?q!JcsLOHNn~6DdOukG%207{tbB2kBM@(jncO}A zWN;Xrwl0!mlIO}B7i<@p+abzqr_vX(;%iK0IAFjw63i9UczLdRO^?6WPTYQXgi@-~ zW(SWRs5B4;rVRK%=Vseqc68M=${t=BMEb}Yq?L_*ys76a09o$GAS%S$?N^Vb(l&bo zj-7WAEA=oYWzGkA)#|Dff9*%VyqgprFLL}`T&`a@tqdfU?zg_;z71MD z?ZIzcMqeba#fV!~p*O|bVQ6KsU+Zh>_jf+Gz45i_h3fn`mW!jxVFR5^qtSS> z^2UgpATFs>nBr5FiPj?WqD~4qu`w6Ph`C4oT0HNgoyqS<2l}@rb7>p$x0n;|T+(Pq zAF`0{p|D7VZR1=;T0I%2U5?1d?kQA@h0;eXFa7aO;A-g`hW>eC8u@Ez;PK+s4*IwL z$+sW8o`pbKQvi`NJ*_}54EZq~tfgH5bO1_sy>0+NM-E=Pz|{rX#i==tjDi*o2okuW zfaihs6P7rE=K(nY;-pJ^a3BZd6OMCM!PUgfZi2Ua_9fPprrH)4g4@6YdGr==gbxJ} zkNClO;PP^SrEBISUO=Z#RFAvpri&swKy$e$FOUq~Ss`f)oO`0(-cu(E;Q0g~cP7FN zH3e>ish8TXxVW99JQA3!ht3yT@GOsB@HCvhkj0RrSHE}zcpr5ohM^mdIw+wwDkBm) zbD*>IaQD7&J&rYYd8J04KSKoQep!mVQdO*qdh2FAIdb^1T(WsSq*B;) zZ;-td84+Hjae_IQO5hP!o$WHIt-Jk;l~`!+py7v=M@q%>Ilq;g&D$rOgb(Ycn3DZk z-1|;P-+*4w1KsJZgFw+WVvxhwketo6DoG@f^QwKebVCWxkHgi(*g@PrNM=Kikx|xZ zI>l*x!84{7xvpMA6?gHX&antSzH}1TGU+fFPKYz8`K)(~Y$~|Z^>|Q#=Y%;Ua@Ycv z)yNDc_SE%+@hy~;)5z$BCE;G%=TruP!O)y_G2#Fd@tR-LQHt-$3!iZ#wZYWu7nN$d z=39dGf_G(EI@@z1EBc9Mq*&E_+QMj*Pr~kcD$>Cvh#`oP(nvPk$E($?b1YXZ`od0K z^*~YdQhU;>>RAe?h4VbZ1X|y%%F>=s(CoN&0YXxFN(Qt=L^w|#j6d*^i`PfhM@bh~ z?3HGG%&{r^M7(3n;p&-%iKh>93xQeh(wjjMjMMl3!(V-Mzr6Ztr1Utj;F=iiPkyf- zS=pKmBg4Wi=g#+JW-_4Y!tvC!`jID8&}CNeVbx;s+3~W1ADr#(E?-|Q^e%=mb*`=s z*R%O3yzwMCmcDBISntHJSf9G39VHgl-h0;`(O|@VPdkya4l$=jmuwqmmVQ^N{mS~4 zzUO`E)9+OQZm{24hT_XFzutdWFF(^um2nOf@E`5$_@+C}VVWg7yB|fT!692}obT$_P`O3Ba=wkT6Cr>$?D}O$? zSv1O(bJPE!T(LU012&hhCZB=axcHZKA=b(naT zet`-RT3)o+T1Rj`eVl#SyWma6C;9+zOOH$6jNe%SeLPz0mA?Ja!`iH<0!!Qh6qQj!|Bh) zkq6z01x+2bOQndu(zqCe6RbMoNqzFa#AotKZ6eMIMwsj=*4xqB{X{kVI@2r3mUY__8% zZ8w1SyeFS6Yv&$Ca|aL>dbM!&E{|rA&8e}gw=NMtWP)AFZ0q0BaF6 z#YHXcUR2-%-BzAC??>k-oSO`lp*tpfoM;=FrP%I@JUH)3il&F9bg*#*uVDib#99WS zxVJXv)Zrf&Kt%4)YoABN&WJitP0sIW9D29bOi6S)Z?1Sx^G%_j!s*6%on4>YmvD8NXqi~-sX z;0O&1&Q?%fu5PGDc=1hin;3E0X#5fQuQAK-x=>8M&9D%fpmRq}imS_+W!EGxc4_Nb zi-UtXteVw)RJ&DC&xbeDP(Jp0xqgBE_~1L(IXQa!X{Z7n8Zo8(=YRC*H-3dCQ&V9W z)I-In#sC}ztULlh0gj-H;Kyd)r})+R9osnr6ejDK!XR7#qOaSAT^>|73utCWFnUT@ z^9T9j-(TJ};Z~0>(vPqqU&V6Fw+<$ht$CLx+%9A-Ut0V%>;o`!%Fp0TGC)3r% z;OKU@`Pxq#TcWJmF#d=m@ zCnx!??0)@M@BHpxRfGM03ZZKbOQq~#*W(4c0{EaKTXI%`@+ZtxbQsC5`v~UgxKFGo zEaNaILNE_FXy|~O*-#5PPq@HEjDWkWL1IdH(B4O3WI*K4ikE>JKGdb=c55O{`G(gX z$1Q`;caA6Pm#=XF5xe*t@)JzTdDf`<$FtIzP|f{G==Has-3ao40StV6 zyMC^5RyM98n zAQ2&7oKdqE7vH@Txlt?R^7Dw^B?r}c8oe`!x>up}FeIY+<`lNEOiL*G#aY82<{XCx z8EV(F{yEt*ysHtey;RqKG5bJarqnrY`1+F+IrM71_$o)jHM{TzzWS5bDqBYFlElTJ0YdkkvWKHMup7u(Kxf(ZI9ihqAx%@ ziR(y5KK2wql2DQs6jD~yQ@DLoh_ zTc%~th!cfXH=E1{uzGDPc<10D;9on6AEj+GeRh=@=fR-aTY;nV>-gk>@1)uQw%Sex zq&rib+7d%2>s<_~&CgviM*0B>W=NFtE?oTZf1tHGDNo+p?JTh=W z4%-?-DhFnO1p@9NP=+{XgLqMqhk>9U~~tZ1vzJ!tK~q- zFEx8k2IWbwOIJ`D>9ozDQ6jp2W69aSr-y=}VWveAfa=fZE}zYb8G3+^ToXoR8enc{ zKqk=4o({vnPmHFDz+{~R69f4vuO%~lMZYjxG(gQsb%Le^!vZCt1Xv;?fOED81PUOu zzAlq_I7iVZ&1^KegUqvf#r<-29U-ccc-!Tv;LlTL8LMP%(i|jKA(o8XF+q~}CYp{! z$BM;-bsqtW9ZT70k{!trpIwmZOCMh`P*Sj~^TU99s4*Yh<{3UhhRjK0j=s8Y>T?9r zV|vqExYkzP!%n(AP6iYMuPn4)s|e)88eKeTVqVs9K|WP=`<4> z_iQZZ4&D}nR@u{g@&ga{cGrv1i*a!Xac)=+WE{ss2ko7#b*;<|SN5)EId)cKEnb`F ztsUR0Yqaa1?m5ZPwFirlU{U&2kfNa$t%!ENSH#(YtOP})o3CUzH^)Mt5gVWJeqc-? zKa;T)5$PJkA%u)RC94*JB~dy?yU=VTa>HtmMnU41Ui9HxT_P0cDwj`~bjWEuId?AU z-@ZD7j>7TFc64r;D>6Ak4kAR-*+nbGp_akNHBaA6by$Xi0g7|uA?hBJ<6zAS;7+Fr zl`kpb^;liqFmEJMGsokJ8M(bm&r{P*AYABy^-r9u4sX*;?}Xb~~8mMu|XU~Kt4 zCgybX10yH6>HsTp-<2^{TbBUQ(I}n;ZblF|y;k_}e(OV9?Gr-}0@xxXje&(6)<}Ri z0~O5`_BH_vj(bJeNKUlSz&F*wJQ;(SnmH-9d7Xz>1pzoL1Da+!tRc8*4+CO=nXtnx%K z#!|ijg5XFF@ckuF>Ws!!BOJSEc?ZOpby9pZ?tK)|9ShTAmjGhO)H)Xa?7qYVS#f}5eGt0@~2lMHbzL7 zvJoTTLGSUG=4*b`i{FyON}%kah~Dsr{k~rZI>0bn zE5XS^#=E__9_C&^#_Y!CThB+3CX6jS)8uXh*B4$%c6_hb=6>q+U*nK*#)EyM-OjvQ z3gi5yqQhV~ACX?`nV&~q`d9zxZ@w9N@mFK%fBH**tA}mthOD8a#?A0>i$S1Be)UpFM4}!7_0>@#IheZSS;<*rx02o7 zgvgU62sHpv?^X;m>}}RE1_P#1BKcWsZGNa#Rmtn*H~F9(SNLF(b4Bud zfH5~${h)1b`hb_$_JG9W!SM9*)^7I_Dhu*0FXdrI26mX=fEp`38d zebYCT#LHC=AIqvCfZFhgEk#u;s|W-hbtJIYF9ONS8s;jQ_jx^EJY&PyBdR6uft z;+9>>1)@JKH!mp%VW|1A`+++Q%5od9Rm{OxX>gRfxsA^UxTK3>J@j`PPmAy6KQ**g ziw*>VJX#)C6_<(X+LvUEq>7&uO6b?4j*xw0%Q4y0COHy8UslKA!`CFZMXestUJR7 zU=1G(&;jzKK@kfia7=BkIckcb8v|7=dKzJ?+yR#zbHHIY8n_2URR&Bg{~Tt*HUMkD zWAlW!Hy?*D*$rJG*dVM0$c)vI;97VX_mCLowQNTPBdg%LMgSq61M_GQFcIgn4?_FB zEQe02eV&C}l^FN*!t|v+B8OY@3lHPd%wg_vQ`+7jL@D`Fpf#@N>X)^x^f>Ag(4$a+OmEK7)v;&RcOHo- z#}5}J7u=O3MdLW%{&9O1bynf0ej!*Tr8$@ayG}Rchl`N|CCi6aF+K=rITUzH+|Rneh`th_oC5cK1}y zOeX%ocnU~zyb=u`47JnSs**WT)#R7NQoH>z0D9?#Fs_%@L4`#NHKOD>R@`Hxo`=d~)KV z=wM=OGLhRn2y?=-K|6O{k+}PnUF;5~%tP=6aOo1{)FtIDM9dPz|%wkid+_`o!*pq4#`j$;tcBP@?QbE6hB zKU@NZ+b4tY%@;r=(eAUpW9rI)$Rrc`pYoB-9qd;l12WfFI=mHmMgIGYIv zgIcIp&3BZoP8d)M$M+hBcht#KS--Rwl*j^SDt$TTT1jf9Llrco z!3@Fopcsx-r=!&1r!Kq^z0ks0<3vZFMRQfe3~P%VG;6sIY2%G10^p!X9SIHJTu6k0 zp$;@X`CK4(Pz_v4+I1ezP}qBCod5M&Y_eTyobT)4xB9~ZhkK*$8ILSJBGGJHf2=$< zeXg0g%*_4HkdPg^^(oIdCx%Ei05nqP z)!F&zrOF4N(w#d$qU61Y&1m%(Zv5&s?B_A;U^s@W3C9gW5S|3MS3i5{P3+eeOTqN# z;e#Q{gtb&Xd4^S#^MSKdjQxL}1 zc{C~8_ z@$KAL+`X-;>gNgT=D+`k*Av-r6oCNqQ(yjvZcbtnIG~~4L)58&oOj~#pfRVT=(559 zeqL2vL&tZ2_m7xQ-dS4nb2p;wnUdWMUd_Dv?hn3twEve^*1q`8{)dnE;eYY3{&Dg* zek=R0|GQJfoRL6c$$t^XA+2NQ{^t*F_n*0Td4;;dV1x&tlAzQ+d9d@3zLNan_KUOO zSMe2B#Eb&%_a5EX6IO`t9UVAYn5ymk2*tx%-*9NmK z*W+HRt1~?!t0SjQump42XDEhAi)eMe4}Dbi-yEMDiHpb>BWfvbQRL@1!HCUv@zngeZ6cL%^&+QwD-sjIaObZ&`TUHdj_8R z)l{HeRw@hBvkTcf$LBMUvtY|~qrF&0TD;({yEM5w>BXpcLmO%1?s9T1(rtHYpNn@F zQ=9K>E$aKH-4qQTi{lV9byu}JGoM^5c^3uxFk+;7J12{g)rUn>jvT8+p$AXBq}ea{ zVl!}d62#Y&aEw8IPF(;b_r2q)Os*J*;?NoL*#~e#o&?X#J}!!YgmSAl{H3aC4?L=6 z>2|*oXF!)okKN=;sqoSG#pTj)C)LPQYAl$48lT{u%g^@p!ew9F5S?sjWInETwN`kn z`ACSR;D!(h8f`s%9oJ8L!xt8Mj@85{Kb>q-VP}>uwEFqNBw2SfUOS8Zc*(gi`LwO3 zFK5W((lF_urjt+G%qlzv=*(D?n89*;E`&zPWSgZ(>>wm~sxyrJ=xo9D^6ue@$tB_# z;QR6_=_(MFWHAWM5R34>gUS?q+T|{JG-~H(@qiIcOP*8xozFP9rOT_^ug*fHU;MxR z&tFWgJEXeqB=C{X>3{$ZuG;Q>X|QH_fdK;*FjEz5P&8QW=hP5|!U6z>vEoAk=5I9l zsGkCfkz`DQC>Zu9iD6hSQ0kgSIZ{0xB&ahm;$R;M5GNR!Ow?rdm=J?avbH|QtsQ(KmF*A>E!I22FiLVWtyEmfHg>ILwPmTl5FOzA+Ar0O~B) zM#3rx4TO{PC;&X`n(m=-pos~M5L6N%v|9pQt!9E`#cuhG3t)SVV;-jb%n5~dm(*Cb zI$g6ZdP7Cp^X1mlS$$S*wU@81y9e;0$=cDV!SkQKNUx-?{>s( zI;GFt-!1q@SF!z`@2ztCVm1IlE!RDFhylphaDB!FKQhcpznwR|my22W*&H3b!S1-Y08aJ9_Dyuz&s`Ft=~gl#J)Ocv0V5%XPUf#h^r zk0Va(B4i>R1~+^?kIV?`;v(^CarC3!%u;PLYu)sbgKHqN z$0U=3D?5%5`uJ?va{}1x7S+Jv5u+(-sH4^mZaxk zrO0Q3(R;RWqgrjO`^KNx<0RJU4`GZ5?t~-3Vs9+LUQe?OasG+Z60W{X%9b!3dD7El z;OuY*oA=dpJ~>?scKK-_t@<1u_f*zIW8wv(ieARuRA-P{3pDLOtPM(`ZYmoJ0H6W= z>eY+^bZKKTezxdem{wzMufjQ>2Vi0^iPP@Y?s^?IW5EHyURnSIXrmxhA6h;h27W-Y z(%#s)&Vm2G0szr^Yd;pc0-!p0lj#G11D9ojpF~U>*Z|~|$|91i5y1TlVEO>OVt|q^ z|0Bn^ubC+|X5auNK_3850}O-bCYmr`b5H^z|AR-e} zf@#_Xw2p#L`8E`xePG+s0U-Da01yCR0A%kKY7F17z?mETT@g%VMIBE3tZL&Sh#bF3 z`au}H@|#^SerwEuO8}97z+IN!Cf^1_-)7PPSAO{=P(1*D`tTY$+kN$mf8*`B{2%kv zdAs1bKl)nq!JEZ5EaJIe{53BCf)1t_c%=$}&m3i|LFbK?8$cm}0|n}NL!Bp<_A;mV z3~qeHWfn%@d&;{jq14!nMzI^l&wk;{>QIltFbE)0A90e#FFg0<`nK6uhwab!NdW2> zN6>TgqfYY1=bRqk2X@R!fA@b)73zrx#TQ@6T*U~`15><>gs=AWo*zH}0N`2a3QlTW zL%~2ZWJkaS*Q#JC;Q);0PMhzU#phdZbJ_0jlNRnzKx@5+Uw^u##WrpR#wY$~zkK<# zvtPy;DfSFm{u-VR6VDFA&XnG|ZTps8CP_}+>HGlc8cn?Z9 z{NivMx2jO0XGHg>E39@*cfvX}5FBH?HXkK9OD^yw$HX~BGq;?Ngx#K>0lJqTAPdr1IWQv%<4D z-G|>z9m8+Pye8MlxBdO^`(x_a-Mmgc!^>JNeM!Q z{wzBQ~X5Wu)P}uss(CUuKz{-pgn@!K@Wi~buScCGp>qZr>&|5i@C)rQFni6YAG34}I_ZEHLXb6Th>Rqst4)fCg~24GOw z0M7SaK}s_6mYCL&DC*b^>^Ws%!yc~_Z+M=1I(QfXA1+zyyoL8Ph zt>M`k&9-rFDR~C_a2GOO;;NrNjZB@-K^M^C729@>^Tle%IcJrgYKuh^+Hj#f;}TAg z;VldI1ZXc%e5)m!^$7u8@c@V9DZ0t_w;h^2Y!_?i-62VuL{+Hfp$J1lRGapxG2>d~ zJ5ETG;_*J$kA(f!{z^1d{}k#l!bsqT*Anhqp|Q(hsa=zZv>AEqs?85kq5-|zb}`z~ zl{tI=+Ra*2aw>^dKYH7J1Cu#T2#muxYC6unp57_#!yH95DzsXHSFXb8T|9`oopzKA zu=3SX$D>&xFUQRVW9}A2O3P%|@a{`9xom{Mn}OyW#QOEQ=f3VRTq8lcH_fWOT5k&* zOjAuG(mwV_ipf=DmHa%cd9~qKqcSC9>5ZS7r44+HO|4J`Dtu-nt(%6MgBINuNgU#4 z#gP>2{w51zJ+I>^q)%NQi9f=}pJQD;Um&81A=4hWq;wsEodi(4jA1sqhg5Vq(Fi>c z95ul&TN9Kj?Z#lo$Mc!ywC#;m7ARq5*_v@d7lk@#Vl|&H@3ls{90?=*<0N{mzGz0o< z2>|E%1Q6*~k-=p}ey0AC*f z7(|8+y5>&}1wa6F2*9-Eo7#ex_Zjo#z~RsJ^r{|^i=#c$j5}1hZ~Ilijex{fVOX4? zG%yUH11abLAmR=H%wefQ5Xj=x1kwPQUO?$qlLg%=fO?=e=rRF^BN)gt@h3Fi9Q*TK z@Nq=p0XpOWuj$o$kRu)~uUCb@NGPkOX@epFiRIKnDZfR%dO5atCvxs?*N4;W$MdVd zOnuS$OcY-$x2lwQ;034v0RR9=L_t(yq8JEI0h%ySkfWU5m9g3CH^@q z(36^fl)uQL$Vm6Rb(AjfD}!?A)IMpW4Nxc0o)`K3|M|)8WBCE?v}AB5Aysfe-g;O( zv4^36#*0MQ6hN?Lz%>Z4lVIb5zISkJ5a_>rbdSH-I~o!OJKl%=4FmAJb!r0B_A4^o zh8*`H6?O23ALe>bzXckmBILjSpWb6z)#bm%`41Ac_tnpa^#Az>-+AxJSI61uh=E^u zo0gIy@}*E$=4)T;t}Qti!?VbY&k3;dt#@v`5WD>IuLs=8DDVVqkg7I59+rRd?Jxh! z@EhFR%`FW4JPLmNm*;Q(*55m)cjrV_TU>I}HmTu94`D{O67gC2v!Tf)G5^kk)~x2~*v=O){4c*YB!9Hr!KF-nB0>6o_y#mJ2n8(*iJjR_}|SAg!1 z<-HPk;3@cKHhZ(qemS(*@4&(^;X+yZ0bQSXQ2ncj7)rKIO160X!3A+QF7dgbQ6Mh~)0F)4ObUb-ac;Ob6k8uWxEe zz%G-Qm>@l&#xh@AM5ScZnFt>2PQz^r-^Ssw-t{|bT{d;z&zL+H1l~EwPLm0`h}r>1 z$TdHxJdaI~P3(XBcRw-w#V@VSr?{7Ic>nfad*SE)6XK>EfkJ(;Q%E~F#y4?0&nap)gN?2?>y6W_*9kg?`gv0~%I8G@6ww@VVOo4}} zAAL!Rs{r;wKy1^d|4;{qwJ7Zr31+%gHQ?5M6N~iESF8q32Z*4GJGf)HB6~^(03hb) z^5$w{hg)Cd2HfO8W+$s+6nC)JrQ$gK>wh%8XS3P?vwDeBbl+M7eiY7;-0x{ zQV6UN?O5y~=tT`^kD4OyV#G&T!>g%KR70h10Dmzxv2axG)E8PGEY_U)QQJ!qax}FV zrDF%}8W6^!Fij^<3{@daq~>pBdQZbicVe{!t697S3@C&_dUOY?gWstt}MLeuUnWCf7Rmvmk_-( zHQGS{ci(Q8O{8_tR?>RHFFI9A)?1!YOF1v;2v3%`?A$~McZl@;sEhK4^V0K*9tZ=L zPwjgV+%=kR8SRMJmkYcx6Nz0t6dg*ePOVS*%xW?=;#aBZqBl+6m8nr8@ET%FM%ujle@Xfu5CY7 z&4V_8#@geuF&*iJL*v6y+|Gjoar8kkQg7eS9RtY(_2Yhvf*+57G~$&!gMMhkTDM$< zxSXgR?Gqn&%OlNH8uxa@ar~QdzBIJ^R`{e{ck#4R$r;WDrpu;t0BRg_UaFn|soeAJ zw_CUE?xVr{py>xWqh}&VT~IhXe>c_v6}xonwAGt@pEAy$g09}HosE9~&N*6tRIV1h zInYt^XI-&mb}K*1q5V!}I1!5vj6&h`iDxVU=WUEvYU4AQkjdexE?6L^_D`x$_TPK( z=>U?(YSS?C@()h5!up&`c-EEN#oFV>sTrfg=Hns4-fI+Oup@}YyhFR_v?_)2vE9g9 z`-XkdGx}g63H^pcs=eYl8ADa=JrCrB`svz9m5g7zgO1<(|Ngmt)?V~mq{|9uAa@T8 z_8xuoufErJg^507`FBu&n7+=)5)qx}4!Vzc#S4wDYdfO5W%nOdkPpgdAME75T|2yVW%bSOmxlal?27B#-EQe( zFGOvAwN*s+?mTHW5v~38zxrmWaIPYfopKvSeN2o?dbKv!^2~g3Sxe;rI03*3f&cL6 zk1i(v=uug3|4n6iCgvoyKYRGyKmN=AYuMOD{V%P&;NP>|Zps5u_m$&2&KKd&FSKU@ zEALDjjYxMEFdm zDIKdlIul`y997D#rm%6-a>VQb$cjeX<2a6EvF8sK;z+kN84IA<`Zl6UPjvNOcs_GA za>6=x%RW}ve!+~SE91kcuQID}0ky+Bx^sR5cxsb0n}sxYDyQ51_Rjog5qZqv$4gYE z-iG@9F`5c#OYQ;a-tdpP;Qif41G($T8NI@^=;}g*dOu<|&xx!nd?O~93(iTZfs(X> zgZ{Vay>#~$<$aZe*2~J9!581ef8q6){^67JPrrTgN5A>OSN{9s|M8z6{nu|#{`4=( zkBhWsdthFop@zbKT7x{L*$Pn9;ItnAp^DUD=Ug@OlL$q+?r(0DYpMY6z$RqOFokX_ z>QsHjv}0tCv_-ul(3Kp}2ASkAJJMebZhygTL`%gf0zTvaN$>KF?NGeoJ+7zv*yzFf zj>YJ7HXK&Asead;>u4g+o(%VatBMRwtJ6R5U=LOXN4#9{uC=7h6~3mo)^Ox#+Bj-x zOJ%Oni+&n|ed0CG0CC@up$&}`-wM~4stut&4X-Hfh7)H;_iHKVq(9O-@W=oxORBb~ zD8A5l{ZA$Hhr5Hy&y;(ZSRr%;v$+6b~*Zps?A zP%yRmk$Y%Z2F{$St^Cje($A2Ku;^ni{TE(eH4SY7l(z^~yjK8!G^60ss~a zz?T6{&3wgGC?)Z*<{(O;CD*NS&|g3PEMtvF-b^Md z5br7CavRk%dGbC+Q|SjCcA;I1#8YiY#biJ1j$*)P#K!6K?tpOl6^UF8ookpOH~JNB z1+NdhtF@wWV?{1QF88=&wF41+TzBN5h6`QaOq5cOur`|Z(i=phvf=AFJUPrMX zc(QIdVbh9Gm5bV%cjf~x^||t4tL&2C5*xn0O5BsmAC9aPLalwx&~=Hasv9n6FdesLR_k`oXfY^VGf;XhIye+#Nm=GLAUseHwyBh4^ZO4IpP1 zqvo^}2r`Do214hDkCy0^0gJvMYsw2ly08zMv{uGf34~W81Am@10>fBKtA(MKqhbwg zOeGyK6!&6Nxx`L=PV&B-hv-bIr8v-tlK}Z2gJ**3=@4Z$_ipVoKaZGw&Dc8X+L45* zC<(_py$+?FVBB`(VdDvJ+G)`I;h}Gk@I*4rdMou3dH)EyUfQKH z=|1c)-vucv2~ZE{|FJs$V&SR*O2kYng|n_QUZy#c9zJq)HYiX>dX-=_(NVhq>$CUi zZn|O)o)P-r&f}VV5J|tVAb#Z`|5AUBgYvEE{F&K7Yd+wIr;5RUz4z{$@_dCmAj_A^ zxJ?@J$)xcNKHpRQ#JK_$+FjjR+nw91%AE}#Fty>*5g6_!NBB&W)A%0N*t8mL* zTl!(jyY8#^54>uoEAYeF`%CR;?*ZIetSd8VubHe5RmfD|tvl7D zHExNFG;BvF0g*zrSQ~4t$v#Csnq+SZ%~8vWW(px*+5diC+e!yf4OxJ_I(Fl;80yny z_?e9wTHb}}EAHfXZ{2ZJ%fSSh+)8{d!4=GY{Pz0E*?uO%GBA1nfOt8BS$?ahj*JGh zvPezjq!6*T;PYMowy7>)Cv4T-N1DWJvVGjnps?iD%fd7_t`y(<^a1z!m+$^r9!(Cf zy!=n4mD>5kfh}iz+`0XoqfB6pAt$%n)|YzO|1r$PI)a;B=qu( z@VeZBw27r&690tdt8k8369e*1WH;MzyUa68a+ok&y6hFyN+YW5d$X*Q#q4Ai<1 zqN$dzqqg{n60F1Zj5LOanNrf_cWa@GMh1L|w&e1KSMl!K~!yg&ht_nP_}IpJfV;i!=`YO z@JyIe{G@;`TABkDMqYLU=bw%}o1Xhs?v(lUtHhZw{Ad6CcNgA#t#m>Z|Htoq>u0|w z3=Zo2(nqtyU4^}Dl=~n%915FDja)U#c>_9rSu}rS%q3sb^{;WTr|oM#ODlJ+7}p`z zNw#D)=SzK2ueqp~H}uFMxYNai#YuG%cP_xo@}fk9#=orI`A{K#O?p$gR`GkzN^{p% z2gE950sE*^Sv)iIThA@z+=cafjIpB4iR@|m@cPad!7~pXCFX9bx!mO>cHafh$OSil zoluIyaMa;=bdkpeNB{}`5)v)KK6r<6W}!-fH0zEE8)>}DDKQQjJS~G*u&1^Ctj|7jK=v3!)d=szgPw}D6bXUiiR@@O zeH(FeG-myt={{&JrK1;7*}}O%3-gZOAIGm&#GV^6pox9zJwrTRk-u1(2Ai{3^rB+} zkIfhk7MX#WhNS`BPaa7b0M3XrZ3?LPgs!Cnlxf3=DA6E}n>87>r<%`{Dcg;z(BK}n z%rw2{1wQ0{a;z18QW%lybW~sVuIinn;w!eaF*`!an9aJ1;sZu< zos47kx+RLs-qP`G=U|fV*jKHdFE}_IaXqM*VAX~w3A;Td;Yyg!DN2$~)`#&KS3{~- zF%>S%7z2D_+Zu<49nyEaF2;f4u7yCGsmMZ5e6iF`h0(C%IcO-FDC-yasViJ@1Bwi| zXJua?;9>DvV39lqUJirmH7|y{=DYXVp%Po?dr0ysf;UKjdeBd9@8jdXI)Rt&d0CZM zIS)NJ+b^n;r#HOs9J#X>uC5p!-0^nnv4x%i;R)>zjlb|*3=mW>1vu8$h#?FzW$BAgJ z2}GYw*7yC(0qIFVDeB=xcpNh(P&^?={8ko7@>Eh{?49U)XH(dPxV=~`e-bSu(B*OK zAi2C~ucOfH!7O9jl_IKkTHfbi!3hFg$Gkb^8-sosxPHQTL301+S!&d$L)fv{A`zdV z%{2HqOjpZ&%h95xupP9bLH&@tRzofb8^431(My0iUkd?8kA5M`o=s2l6FXu-<4B?H zK5q5LK}}swu;@LNS{XC?;^@@8-SosLOf2Isl+`W}B?R)$5K)lau)Rhn$ZAmcn#O95 zhz3)4{TG&o?sn>9cTOIF9>u??UBC^DVEGiSD08BvaPm+>J9Ot^p?Oc~@}C~m zYG`<(3~octiBiAA1ro zN;U}f?%e(GtH1v1`9Jxl>gRp0`RE2>ZX)@80Xq1@pJ7(77+G%C)qnIfpL@6Z+%ZvU zpF3@u@OQ(?3>=AvKZkBvt3e~>bd7fV+G(_{eRB50M}{wQm8cr6K(f0uYGFnH$-POv zcHcwjXp_p@gYy&-)uO8BP(v0OVwOU0nW5lzMF` z8r$MF0UK6imI(+GAw3zyw|Hc@B+69C^y7`W6VMAdGNE63VeW~bcHnJ(sF7^OQ?!Z=6rejx$TF&fCbuUZ%_t4O-z!R$-xp^NNP)mH`CA} z9|(H!5f4V)Z4&j@_2rwloKB>)J^Ov`xm>=izNX0Yk9v}8Iu>?Ww zKi23G^8Z?Xos9(s)tR?L*4BJrP?j!-!dJALi{`11VSS$JSmcB$UV%&lJxLv~2oy(~?e>0XjWFLxr%>R1faA|81h!kTpwDb@N? zU;y7)Yx3^UM+B)_`+m>iye1r4rTM!|msvVXafw%3g`kFIPC*e*MLf$PP`!GFP$EuZuV5Qz@?$!j{}^6U zR%ZY-F+s2aa+GzX86yBwg5YffSoIDW``J?fyw$Ph%GChn1U7$~Sq3EVyXWK_5Lc>z z(zH4RTjPj-$F$Ax>uxr1!TEI3dV(s3}h!Gs4Qsu58@QZm(rD@QRoEawOZxwh%3NYln}lM9=E2#Bz9iKNnj-hK7Ts%(Hz#cFuDKZ>lw4 z@Y7BqV?UpV1kSPgt{>cViNQ)Mxzc6N7Ww)oAY;4lOY`_Fly}+tka^QB8;zTseqOZ} zxQy)RK$>GX@UC80&fc*Wy*_Jo-RzzsGg83Fg3?_%yEqXvzl!;ND^=qD^vufz25H&2 zt(51X<<0;^;Z0NMc$biRx5A?IbWpq!9+q1zNPf|2P+skhgyT)RgO*(8bfrk?D6Yj> zXMC!TqV=Ut`cY)K?w$Xh;7D~X7JzuZ--)00-W+ke!t57bczo_taqSbz_DFuh zB`5=TpO|N#kQ3UpPol}>fn+e}CvxcRcGXLNJanyA1N;@|uq5i9ZjSKH*@4U1DKrHt z$I{bS9Tw{2GT}WYcLV1i4ASH=KJS$;i+2gWF-bc~rZY=0p;zcY%^N@+E~@5vv{htyi%WS!1Y&RCv@@)HbF zShqR$qbb34`*;vzWJ?P>9RV^wrKw;ieGqVSfZrr(Z5S56(ei~y=&XqyAY)1mToOA4 zPiY+Hlg)}%p8IsJa3c_(l=A-G9H}9tao6RuE>6T4>^pHg0zE7}KAJ3Ewo8*{p+0|u zYPs?rgSd~zzaS2tTy%r_Y?~A26ucXjGXt%g@&ve zZf#s{_P+B?>a{mseZZ1Lpq0SIAwbU;<$ymZKFmi9*628_R-v&?5Y^i3lG_*;tJCrn zlk7;^p>E;j{bM#2AMPwMcPv-RB3&QOHC+#ohO2%_n3IM`7&lam^Gv`Dsujsh)YEGp zF4QFPxy-%q?q|IVgNfk*evv>nizDON&_d7CUqyfA#O`k(%QiHA*fi(pw69(&pH++- z&I;AGN8O5lTQmk)C#V7MK>4EowNdLH7TE|y&b#}ntG}wzr(?O$&HsoizC7ERJ9m8K z6U;f!(hh&THeP*aAVPt-^3GwPTE4t*(+;A0o7OevoPNdfVK`dDB{q~>zu4q`x^Wv{WBM+3!df&dsHV@zXuFp%b zUtUrj$_LX*t8(w;DxCNLs+Om~eIjJ|;0O>?dVMqIKd(2Jvh@8=vH6!xWZz-UOiXxY z?ob#lv!R4C`jKZEm!ymoJ&pCoI_{bU-G?poWt%b25y=10=qan5e-x^92IxzcaGx2t zn_%Qy^IR-k@=S(v=E`g!<$neN+jUS5`TLn7B9nF`g(6SAte}=>%}ZkucsL!%F-^9J(j5@A6GAH zE~f(z1-$}14D^z2X>>YEEj*(TL;pv^^I#Qv>F;3t3O7c!AwMSS;y zfAP{S{oxVObd!P=!OjFJ6TR3RDECTRar>^gsHlU*$h3TxnswYRYaod!YrS5VW+S!` zN5gmB!)q50R7OFkqNe$^w3&k=YwocGQkPXOKl5gsim%-*iR^mGjemig?8<}VZ=XBd z%bpkryMhdZZD*C!r{s)UgVJ^DV;$VyY>j!s!EK){v_iyyxxD}~T(i@+4976DUhX&2 zeIGO#A}wy+OOBVHj-E#6_(o1u=3>sMERr_TgHcP&dRmti`(xfUbgFYJMv25FtLF$G z)TV#82c*jl>OSQ$x;;8`Ss3+c(x~3DLz#k?-hwn4)$5HE24)*!SC{BcJ_DRWdUzalg zH{d#CCfIS^+KlwhEoX93)eXim^mhY(FF20^TYxb?ut|oI0TBRcWO8Ut{Jls+Py(}# zPm0u>320ql{Oq3M_bPzFLFZ&LaP+km6+wYD0sw&QqI(1hIj4@8IzR#rcl~q>gJ}zR zARXfeDr$?i*Vcz~fO6A`LzmX>9|<0i@sM5KYDjQWaE?~4Jd)u1A3G7w*_n@yg2WRa zR0O)G*-fgErpPpjqcrXw1^S5*CbL@DMG^M$nVY)EZ)AG#Y5gLGK90tn09QNL$qoFS zBMrT@$5&BgVG5~D*_6b_Kz{ikSMXTp8hMJip-zNVpC9zvm!;HNzq_ztHDu zX6M{JX*J~YTb>BHRXMV=DAq+45}PF4h%oVDP0Ww8L9H*#oS*B7S;7s2>NU?mQ5}_V ztSUx>J-!7z8M60;)hCA%Q8?@heY@J&^-^aKk;cAu-z}EW(t_zf?7ALuI(q8#OBZSPNuKlA%!cB!A-UkYe#1%iaXeRI*qQ2FcKAZM={s+z zi&5VotWIrfR;Bc;(Vk6=uTyPCPu6-`>4o`mX{-QJR^1;zMdJvGFOE8v{!Tvjj6Zib(Cmx8fcAc zfl;+vcTI;2L9g2w79F?e{RDw^8q6%9C?I4tggRil9)UjCB?3#d&o~^cF&KH7R!zzs z8WKP*pidOK>De1?ZcAWz_(I%sj>)R79-3LbN@-O2 zj5ztde9_o@JO~N4^aWpNLVj9Cn@|7v>tFxYxBhbb_P5VwWQ?0QPFqExG-7+5z{AM) zRY!BfQp!&3^BqU26F@rVIfvzIZf?`l+GcSg$mfqC7(T1RlhC8dTiuIw!%Pu-fm;iG z3H_2YcBQGwibtAIec~g(yy`c5BjR2>Wev2cTeKmbF%Rr!YaET5mp+(Tc`Ny>#_FG$ zL>$OC=o7Jd9aE>vsBB1`(ZY%}!$Kz+w+Y$PBw_5%hutyhIth+NaY%d)ZvTw`;tD)d4UL0Q z;!_z%&P^Hj0f2hL)0MYhLO*lUIpeN-hc#s&5uMS{A=;e3mLh=D88FhuN7e6qnESm+ zhoCc4uj^&&o42UV_^(}k=JZbS&TqcohX!j`N7MM7%;#PRrk{J-_rLS0@`FbYdr!u< zzW%lGli&G^Z+v*?zx?%2e*f#zNf8Un{x;{lzUWQdSWGNEH8lc;(a*i;PK9+?`oa4j zN*(i8Jn+?(EA4K?4yo@>9>L%L*7arm@Bhv185du&9_-)BTW<4nf~pu|0yIevKX(MK z%JJ;wpNMzh_~x3wcGM-ml(KZJ3-zWEWtn)`@VkceN%i5h!Ok!F*O%(cA1c>o%vavF zA(Y$lFP`fFd0sDnJkyk;lUA>G5?tvkhmvWH8QdRZ#Ev8Q(X5*JYgZD!XG{9aq2{zp zx2Ynp5|hxP`wdpULTwmdT;BL=*A4PI?bcoM-pHE^+dn(^Dgt+7AP4PDowUdWjbHxh z%blB%3TJ-y)i0s4ra;BmWpfxG9%({BuQpwrK{LtDX@*8S5Y9`LVRe=+@~CD#`R&VTXZ$QQ1& zF7eIm<+*C%t$+GkUwN;)zYp=lhur=1)yg^6!Rzmh`#T?=j}ydJ4)YIZ^?JLi)Z~KKWt2 zcD`^j5&41$;KAgnR)8B1A%mp(n4Q>eoi|2}iis4QCsJoWdGx*Fr~jjPg6|YR&egT2 zCyg2r($lfi)0^cjQoKx$mv9IpwGFkwj$`mjtJi$ue7fMUiIBQS@?L!mv4|0Ub>f~t zv8T`c+*;D#UP`W{q*L+O?U1PyJ;aouXeYluC0UtziVXUV%i}5QC|!|1(&Fl^P`qba zu9+_OiavESi{ajkdle3}kNL#WH!=WXa2uq|h7V%U-h~N_>M#*=wi-ecG+--StOAdi zm`>si45ViOIMIrJb>agkEgqbLVbC3;nm78eMFYsqo;dYxuXg<^9JAF2HT--gHnPDq z6_ciLKZ~9)x`ef4B_PGjO5dFKfdv8$+J{I9l>ESGO0_Nn!~UQjD3q;&+O?3KL3tTv zn;M7&^oD)L(;&R$jbe_qyA)8VA;1t0E1#Mp)qg!FnSdaN4Y3{qS_F(d0G_igD2S0O4Nn=;g4s&9TO$qU(F}|^-9-&GQFFAX*qvjKcvMpMF8xpAd3w_&x)a99 zq+&QHA!Z?9n$rB~btzq)5|nctmYk^1>ne$|6y=|W8?9Nd z;HP_t0%+=I6^kz`26Q~bnr+71x&A&IYobDyRKSWk@@w1Tr2yA?vuPqd79 z@nEzrPunw4jOI6(%rI=vZLW=v8_6&gh0{UQvyK8=n&4#**XDyg{WK9^(aty`?Pjsd zNcQnq6%uRtbCJR#KDGs1dGr&#*%d8AHI+5#q!V1wA4kt06uoe;CugCClK1OkDrJxD z0?`}IHU|?Xns9l;b8G#KqY3xttJcD2y8T!_6qv7QT8IeW#Xb7zxLp?lWyOYzkUL*w zGb63tH*?kA`~9}ES(68w!IPT+8K=`C>OOMTbZ>T#{)(hzn$wz!1K&N(*fbS__Ke9Bb#Da})w zAtUj-Pn$A@GYkHokUuC7>*3*u=p5XSfMZcSWY(wWsZ`xC; z*Di2-aq4&J9*m~~4~5K7>lAkVJoMpdw4rCPQ_FquOJ;u5{^9utJ5L4zAcb(qdHP7d zz=)nU!UJu4B5^ZInWbtKA3C+EN?*q(uF;@u1EO@u;d{yRfVLXXQmHK^iM`&c_HTl? zVc`aSYCUg$Zslhg3Q2|crxOu-Su*m>@G^ll$t)q;RmbJ2&{-uWK^Prf!}7d!Q*M<0Izp8VlA|N7ti@!i{*u)ULI*xr>|3Ks9( zv5rh92zsu*;6~s0rbB!BF357e8bN>ec1wqlEQqSwjcE3>lw)>6iq*8F!6@)}1*JXm z9UpmmFWC22OwYy`D_MlLBfRjd!fU?uRn6yIMZTB<&(6=kK;Mi^^zAvqdVJwXTaap& zNjgAxa4+j6lf?EWmLg&`IpKUGmUhMwPVYTe{h?<+DRmR2inJ_w#oqNtBrAV0^3&|9ID2Nt)0gPd1{BCpf;ZVzYQj#`jsckk1ClVSVzX>{tiUwZIKPlS1pfkX$s z@w3Q=*<5HD{~ik7Z1=_X$b2lG9njJte!sk*BTFM+0R*nb%CQ7Qt0g4OlXSNvlNX-g z@@NnzkL6eXZx~pn-u+VW+G{r|Z!*W*nQtvGz46A2p|_qhSIzu>H1|mqt&ZI>e#Qxx zRlh@)oU3%^Ct~C?m$pnqG`8)LfK6c!2XRbjGuKstP03*)W0?z+=R8CTkkB*dC!%k2 zQdstLYgRw&4uVSbr#(@3+mjdLKNwGwTUMxJ*XGa{eSQPS^b7F4-f;A2pFhgw|NXae zy+`#%;z_L}I4YKVXscZz(WAaw@49}>`!5CGUS14kSP z@|*z#w?G?UtN=t9Yw$ocMr<$Bg3r7$z+>Qq8_f>^*XPgaW*39(7SI{mwP;OLfCIM$ z0~#KZQ)FS_3@76k!g~X#(y}l5HQ*gJB>+OW<(Yt3r#n;j3y9|2MUrErJpnumG;D)T z0OxUsM#Xp=&_|eHfT@d8K>Sdmbi$210S0as_YEJl!2dr4;Rm#AX<`uO{yXPgRl9Po zQ&p!<4mZcXox7)JhS6xmNFXE-2%dQ+Eb#JzW$b1ASOy#O9x|4QA_*i$8fi4slRMq+ zzB!$o bq?Fu`m_k94&fY=RKc)V?sGDlI?7&C!J`nyjF03skx(X&CD>(o=I2qrQ- z^hne+W(l)UfCA#QrkXf%2t;7D%_k!aQGzspLq1jAUIW~TEeNu_BKEyY#<3m_a$spO zIZ3s+*ob{yO#~lKL-8rD!-y{ye+@r0d001j&8Z*B?nQ@}1$WHP`<2}0*7ata%P+a@ z5=ezn!4_2CPz^c(h(Nh&!zFah3V_bYNn16*!I`~r7R=cO<~>L7h00<`dYV>4f$1FT z37LAoY1n5I?oE6`&&(+vE@yp5_~_LesjJi)6jR^zG|r-$<`tmrI_j-45D(H9dMPbBK*p>v)OdK6?+8l1(@AZjPnUCw`T;L_NCEf*J9&Jrw=&`lU29XjCeT>oQxKF zE5n5*n56x(uQCYU=gXURb^nVCCv)Rf2Le-N%iopMo>hfr#E>->%dv^4cl~E>Gk$L; zSumC9!Fo3e6}B?WlIqqb>NO#R_1_Nh5f*8KbGw~c$$+WH*jphzEE1Qmdq!S+ce!1*+8!xK=vo+uJt z>drs(K_hFoAxb+!Ym@PZ?Kl7Le;EGV-%Z_0;C?#12X=;&ndU~Y28#9SKSPk_Cr zOiGAbH#f|2u2Cjsc%G$A$$e27MR;dp63ye9CgVT)kq_#uL-_QC{#w9k3lbFW5V{C6 zE|%({rYSgOykz>?SrzB7%o3+>;VbF23v%|`-R+0xMefC@;d3@6-k(VZDW3!&WQk8) zQ08o#D@RZBMOWd%2n07B!9Y?RB;~2`a872*XQ!uAMD}dFH2QOmZWo{J-tV#|dXdfr z=WjCCzIu(A6TM*h`PS2ijZcO#oEg8h@c~I;@R9oL#p(AIV?6Lh++D9j92lNv@0#@` zU8h0}bNS|ah8i~-DvbWpRnl)|AC#vTFBbf*rqk+!B0l-fZ+~(BJjUo;a`<#~@^owb z!B_sLTUWpFZ|X;8zucy8AGZz)`3r)?_~7cX8SXSD@NCmDL8Nv`7`w8G7r#LD676XA zQuko=vVQb<&~>JjI{NIu!gqTg{t)0=Wq<7dD)Zh!yNAAiy5 zesS13IokbhVdsaRI~}C;r;&^O<5S7+tv*Y>y#LvUuUvod!_TYx&qw;j@?;N>SiL}X zW`{iK>F@AcM(Cr3114TMnu7fmS&ITzYE3IFk4_d}Rgf7mU(R<|jM=JjX0=6Bb6s(V zmew=XelD<$N?-nQ_?US7aqF}Har^K7gMaove_8qIm*RJSr~DiL_>2Gb2mk)dKP0y+ zZq(Hf15xYb%Bsr~&3JxFZ`|4Z+E+K{J?m>bb@kEtEBHj3R&IvDJzoIq_#-elFOv0E zvUV&QJHqbIsqM#YO|8}cm`0+)&5t#CXH+J%S@)!YoV`$1Fq$||ZeZ6OU0N2C;_KlLsavOAM|k_E&b*(`(pUWp(6u>_*S9v0V4gsQt}bU*g?1@GtcK++SgG7wdKAV2#ko z5_9e9+DQ$7FIt|2T+d91MF8fGKnQRlkAa|sS?j4BJc5AJSQLwcSqE^8&{shp9bKhT zOJ&6Nf$oczI)jH}kr6cmfSK9C3E&H&6$}W?_lVutR}$IR2jzh)!W^^^Rgj9-xKMc_2sQ=xl@m0fb?*Dft0dinzE21I1yRSQsgW>OlrdyNd&skOS1D zlLiUI1Hd99xey(+;gzX3?uOWu`uOZ6h}Jy|osUe~y*Sh2?x zl7sR>yLc#4WKiDMB{Uaho}ez-jhl6@-3meZvNH=Yz@v7>NjugY_c-4DlAY`5y@I(- zc^U0ubU@S5c^%%E%*+fMuK+Hw(@okn!F@3tL+-^Xn%5Fxre7JAYL3j~X>UrCYHufwajPm~WauJU zg~H{av2U!;pWBAC2;g+{uFh)}vb@W@rJO~*v*tL;zhVRPoL+>SN+*!M*rf6_ek=`5 zijWuEW5hH~e;F@OQl$rJ*Wyk0;w7=!v>eCTo+GP1e+#QGw2~w>+^`1GLz7VCw3!U9 z1+O0^VKSUF)*`)05n4h%d*56wo(W?uDRh4jD01qx;9uxmu!FO#6+R#b7H*0xSdJhz>{&zenxIUEhWP} zhctZp91fMc$!;()1(z3DLXi7PUDKArP4SzKCM7&ZlWmbB-si+p z;u`Xf?s8>ZUZ&ksW0r+E5Xsh0&cqz~YTZ5AZ4AW7+LHbi14YhuuJtPHeWPLOgW&j3 z^_W(c3!|cW8G$s;Jnwxgy!`B9K6>cT`e0tD62hjZYDJ1&)!k@{Qc4yr%(&6Amj;L3 z*c9_GaDn9v=UH>Z>WP+iuU%@k3JB&b*H(RPt$t)y2)oe=lM%ie0tHhID?qZqV;6k< zTH3#4OkX-Z%%~(*)l829$#1Ry`ZqqnQk|du+fRA^h^ZDn`q9VRmBak^ z+^;(S_v!QVNu%<7xSyY%9W)OwrK&e)VAyUnY!+G8Vh!!9Mw*RN!}`?$Eb3zM=^Wlj`^% zw_bsk)8Gq9&2!R`%&3CL`(p8MHg&HI-5L%E)6WOvZPcl?m#(gPFC`Mc_M5juZ$&<} z#~<~0th7}RMjfHNU=?i8+d13*BkADj*sRumFetpbm(x|oGr@aYb1*X$PPeO^s`&~@m6V=w- zhAVjno7ypXwYB0SSBy%S{}>@J&oTL%3*Ojk$bpvYe~R=EKVDLvDa%X>&95*I$&Poe z-4rH~u;rLLQl{a)-*v{P05If`rtCH^eu6&Oe<@F-UX#^wO!UvUzV|gxZUSzF7GG_9}3Ef!^q^iraMKVmx%RPUXokM>e_T=fkA8q}Y zhdWZCf77*YJT_mchxzuQfBq!&CRS}s!G>pYFRBPU7tIVd6PcMmxuUxPv=YKYY3(+& zjQ03i+dqX}m%8&aN8ATk+!d#ubi(zTm6q2$?nd7?4+$Y(ZM;wqB6pG%TaW7g`36p* zBG363^ud#hC$bAfL8p5m)A5G81%L^opw=8d1(?M21=$VJJ|6&1wAuBuY8p7N9G$eL z^qgFmG~dk4gY5M%+*dp!H{zxerkIZTjXEfTM!W|4%QC(hbPXJ9HSR@5+l%cXPa&bD zB$18z#-+Du$Tzb9E^0Y(8+d^|Mi8jmqSR3YbF2tH#S%s{2+fxoLGUF(qcnu}!95n5 zSk8!s9T+UsBkmba^Z-8`Uie6hP5@J*H6&8U4O-;zQ+7!D2%|PA7;biECXa{+0$6qt z(!fMQlny8(I3Nxu@Yro+1^AR7>{Ax}BIjA#5PC2?gF^MGL-o^z)*P;AC2l}=&Zh}# zg&h;Lxsybz3mG1U&daH<(@DB8jLb6!>so6*sYy7!;w)ta*5e-++tO?6!I*uu`g3`C z+Qw)s;z?1qrzKp?%<`4v!R$w1G9@yBqZPg2hwOXnJ3VI2A**ea@dV*EG|ix!qlr8u z58W)p#HZ|TZ+6n_k*E!O^y)YiS08BsV=#FuLKhPf7F|?MzUn$tuO7`%LkhbQ9FqM@ zSv!|rMKOLHqb8=*bziBwqqQMY^s8g1r?;iMfk{Fregkg3R-|sdNk?7g$+CP?N^l`u zN!5h-jQ&R3SCq$hW(m|Sqr|T1sL;S_P2061ULEFB;XYsYghN{+wCIiweQN21>MnCA zOr@g~=kGm@jsrvDQUPnv^{|>>N0n(<@VUUCBbMgAyo)1NT=040o63m3o`9VAv5#_3 zB)U=A@V2m~1H5Mh1Q3??dzwh!;g%qT2xyYm#=Tvn<%e*s@>F8`Ja9;FeY$PFy?Bt+ z(AK&kH(IHCJC~tR+okss58lDdPD`l@$+Ol*rY@YS&ZnJv7I6whhNb$qRo;>M00U z@2N`2IIOK>+G|96(WOPj8c7^wJze@~a(uNiw4o&}48oF39;lj5x(!xzN=)M}XmDQT zX>^6^Cm7!{DgyTPk-u{j&aND|JdbGJ^TM3+XbL9%X=~`znrzQUxU5KGIE%=Fk#`f= zDZYxRt9H`MrJce{`LHyx)5lXBYmYeCGVv$$Nr^E<*5oe&A>! zdX^?^PJiQvf!@s~+gtGwT zr{WAOOnPv#BpCm~vxL@lH<3%-49 z9IyB(U2s0VR72duX4MQgo^2kMmVYC;sgLcGMcBd@a089R3tx`4z5Qps*bL6?p#`V8 zdW zeRub#```ck#kBrvYfhqP^GI`c_VSnn(NXkwy7`zQngaINuFYP#N=F+}?}FXM35maE ziBWA?$Q*}8lj1J<)r^xEDmNybhQ(0Y?H6P2(>?*#7Y`YDoI@PuH@U$ z-#9_8EG1umZRxLtZ907UYj4c0zaiWmqYN7|7{!YQy1Ro;Zd!x=&dUX3ks9;Z>(I(_ zQT}T-@t54;zavT?tS|rL+_(Ri|M@Te;|TxFude)3HhXyfY`p&uyw%@H{mtO=+?~r@ zVCn0zrRBumx`y8XGeFG0T)FZ$qM@%PgTFSM+tfSWK0QAaf>qGomONvW82a4T(+;W z>0YUt#$wZ5Oef^+Z19gFSj#yb zU(aqY1tDOUXq&#UWr^Wubd@1EeB_NNDevY0*qe^MFo=zZDDL%BO47P_AQAiF&t z3K-=KIop&SSn3O;z0ixiG*ct93 z-8n~(bc4hBQrgBwOe|R&$nf#`X)knrhLg*x@99>Z2wORBxipO^N28fbbWVQeIr6Sp zzUF4nr5$QKr?e&akn9gtkY=jvPI8AhaII{l3k&&!m+m*4)7Iuu!+Yz-#q12D3qI(z zBYjtXwRY#~eOGkCdW4x0{jbtbn{TjGREP7KhA(B%40c1N_)XCU?F3RaBRn(@hopm?Q6?f%_>b zl4Q0=kb~O(y1U@$ako|M>oG5ULe@h`rpdWf$3!RVLx5i z_gUUEnK4N|=N$4`jvsijt_gRYv8<1Fm($e#uurJD79g+XeRe$29rgmu-1Y95oF*?$ z+Fh68EBYwmNirA`o1(>@o&kRFX{ zC#9+*KKm&{Tt|hBZ+L-?=)$UoD=WLla57@du8bznv%}bz)Tfn@Mid8|0lZF}^FMJ_ zL$8MPl`A0|5tdWq^7hWTMp1YE#yb0>y?mNCMpB0(b4r;>I#K^-ameiGY7?A z-k~8I8m9-faQK($@Q>?5x9e7rM%KPmn$6V(9m%dt$2&hM{sB5}FU2$aPya+jVY4H+q3%QhaXgy0 zjrpmG#)JFD;fqdLP$= z(#hj~C5c7UIU}A5Up$&L@LG@Q^RCg6Z#BdYQ)9BfEKk|%IW|c9goPCbG?j<#fTKee zW_$E{cwRXB!x=MwSi=SMDf&iy^Tzvc>sLSf-cSEzvvOX;>=EBx=_n@+)b{I)(8)-9LV;~wZ4rd2Go79lMSV_~Y-aII*$f-nR~f=hlI5GBL#!H#F#4O4+)7W!|d$M~5%p zdDrlPOP`Q)+{edWiXJ2b4VQV(ckAg_8_^fmr~i*?ogTlxIJ32bXM2H8Ikyh?jv{Qz zbFGiJ_l}!)20H%1+^c_QmSCXjBuYwIID-i^PaEIx$8pQv>EBOcsaxJL?jv_J;|8YS*(2x>&Y<}0Hisx4Nt5O##4M4U^M|evB7Pm zeQp!z%%u=OnICRk_^SQJ7zHFkjmwrDPx(#*gguIag6;D$TCXT4vY;xfbjhV4)Fj+v zI(_>Eg5i5B5vPp@CTPos zAkq4OSpH&^HnhoWt?`o$nZBUKIAJ}bbHNX#m!c49H*T(Wb(~}4OPF9mzhV9@&mrMovuVj>2E1F#|DVvM#D4WeY z`8Gd4PgpXMB$}mC!OhN@m&5WDop%Pvnm8-RTo}H4$#ihu8X;z1^oOQ>9hc*|d$m#H zBVu}cPY(3w8dpvR?r%Y7$NTRAaLC!_WdCx=>lTilOR2R$(<&KFinDpHJsN0EkeS<4 zW{c8ry9h1FLHH$!zmFuJPfek^n@kZW=P*6y?Z~hc<8)6qin^QRD_XcBx`v}O$UQ~g zEhFcoaxzOMZ((dl@9rh#n^j+P7^Fk??X}b5Fvy?L+DpaRbU=->8Vs}kVurAm8|=-G zH)@23>7Q>4jixJ+a7wn}_j%3xJi2_`jwR5a(OUEzEXtl89149o>)t5LmCP{9pH{eG z!teXR{zU}gm>_L6I_;*&;DZoZRf1TQ%QJZ>t$Z zAvT4Lk5(?Yp1T1V!iOIBO4T{c_IxgE^!R)ldd<}+z!6^HRB2$nla-O;;lPtXR0NUK z@%5(sv=kg%`f^lRie_dS)7OXbAucxKK5U}ToA6@)X0cwMvx)b61%x1%x{}85I3} zq?&)#bS-+v1y2$I%5DE4-eL3^pZFrEMr7N*kyOu5qs95D?Xm(IPl+pwyJeXFlL2z& zV{Gn9r$Y_>eGrYtPUw1yoIHPUkw)!u>SpWJo#Ui(&F#yGO%K}eg;IAh`%#6N6hC%+ z%aasKn4B8Nd-?wT2xHIbKeE#|mfBx`~RdLBhbQf;t&5_#E?M$X}ps*j@ zyZi3SyLbZ(1#ae`zx!ABHUzep5~5a z0rOS#8QZs|x-?IWny8;=uLXw~vbEF-f7qy6uJ_fvNzPN>oKv>Ld&z^-^=kCLxl3es zuCVpmU-&Z%O2a&JJzoe=KHkTRW4_jH&qWuerM)EYy8zut>3YNK1( zB>#c^-Z%gIj%Vn2H^WN336-}$|E=Z}zWllU?|)bXzZd=2&_#|M`kc9{LWxyt*d!39 za~8wu*74QQ?QNImqx>JlYnLuIPZJh)zDQh}pIlA+>g8_?1CRKH@@FoYJ5~l?Je9Je zz;U^uBjC$0abr|E>w|*};3mgKGPQk&pbt2c>qN?nt(QxiXZ;^>qszbW9&=nb?fyt} zO;L*56g|2eZ!P!EGO?WUkfW*kLr zc6u>2v>vLach^o}dr@;cbK|*oYi6|p6_i31m$2N@hhZ+|7N>x48-h&a4@WUvk3qo03w2EGjMJf z^B-PiAL^zPvg& z@bYJrG_i46^J`>k^Ab5+*B8jCbUm5j>XY`}I`AB`OOF^SgI|zTVju5tMQ^k^AWj(W zEL2bT$~EJXcJxwpxsSUg+@(BnPbcn1vTMXH#cc7m)t@;*sqdc6R38a+9L8e~oS#Wd zN7p>pCfvNcIw&OH)bNvso~6mDPLmV@4c;`4mH7GEiy?Y=sbKjeeF-!a2R9F8%pUYR zV|`r6j{?`{AhYAn*^BntbIn%)Khq;(Z#>QuyJb3KduKUcw}LXmP|sJ4)j=kHkd3jF zYjk<)m`Y0Ot+%^McyTJS%^-W4o5AET?5G`oU}^KGvKNqs_KMntCuD5abxQ4eRqSNV z$J-0)4EY!)*jx)Ac^0(UeB#WdGNXAOHS)$vH%{-Dd4@Xj_OEaffn3nI%A5cRrdLXy zL{UkxB5kC1TRUL%KDkOSc^tJgM`kL$Ni}eynwBeh89($G;kPm6kr%lutHHEU9_4(T z%_LJL1Y;Z|qax$5Z1YreIKjt-Y&OOXYXu&m9oZMG+UG*u>-P(RtA$i%+-I!14=(1Y z%GjmZMAGQm!SDok$aCEEo;_H@`e^J-@67d}mJj&(^*1XcYd)T3_1bmH-Rb0`qdztR zsg@H>NQYcp@HygJw%fiZy|sKQ%$Kj~=EEZFa^xUq!;Nlm)T*V&7dda2oTFn~X4{_d zI1E<&GUj&0e?kaPU1j{T-yloSnSShr`r6tA=!1c;P8jM`$1MtP|%PyE|_m`_yL_Y0{ zuFsLutD9@>m8OIBulk!Z*S~R%sr38NKpQn{sd=-L?IBT5>mt254MZ3KoUG@06@t#I zH=yT(_yQe#7H`JJiq08^)fMaUxgJ=^{n@o`7tS*n(p7n*a>L29@l2(U40UiMI~|4! zG@i3V)R6pRYYNhP!|VGm1W#MVqo-ohuZJ&BZ@NoPeSm)4b|6h43?*p)=SSMYV8QOB zgN$6r*jd*OJA0%(#YpqilZD{ilHJ~x@`{PPKla~9c*yN<=BWE_slpDa!!lek@74ai z&@RYIs5O1Kb&^E;darpc(dFg|zwc&O?|_{_VbCksR4pCwjr(Ef#k0XhD7$49XYk?_ z$2Q1~v-~L^zEGK)URw9% z26kiV>IZPc=U-`v`#<*W{l{|c>8>T8n5HLr+uMSYGuBsFdm={7{?98!%-oxx2b7&= zkxl}dldoMG5noDhC!kw3{qrwAAMKyE|LBvJcnF`fQ*Mc@YIa~+?(8~f5xLc5Shb64 zM*L-`O?Uh49(F{3p)DanyJ{_W%IlO;tyO9@{Dss%`7U$r7#`JOF0^LM)zRF8+pgs0 zViKadp6fiGMDvsdRA$;O4cHV5Rr?_2@{zRKWqkbyXW!{uj7yeT%zth?{HfXbsP>T) zev)d>zg}7S-Hhie zJ^s&s`ni0yeEOuZ$n=g}vwZWMn=QrfnzvH!W#5GMA4h5QwD^;8{=4t#5ubLt)IxP> zu>9l47r%Y>;TM1MyZ@y6&yIicYzaI48sp4=<=UUmu7A+JzUtnbYt6SHiHqll>P>P2 z53EP0+v3YlJCPr6pMQCLJ!9Lxa>I3(;0}h2$}ZBoQH3Z!%$8p+2>7cj@K~o7RJLAlP(K zoo-R~#>uTRb176>6q?S4yG zWV%RM1}4EWZeuvFl@kDOhG`lM0NCeddf%DA0E~ut)iKmLvjp6i`4(hY=Z5UasdFwA+YQ9(b;I` z35NWQDJW}Wf%il0X;eckH{UVwht;-&r~KUm;FEkl(WT3*(xT)nAO=s(DAr)HnHD0}N+MUr*KviLA$W<__YM%uueljePGkiUno>JoI zh1C}@w3H3T+D#d?*>_J?7vVHLB2a8@s-1jH1 z9Kx{GudX8gLi9_i@8Jqp60Y_UaW}*^!esBn7dLcG@~&_NdwRao zxHG`d481Y0be}Saa*@ZFS$u0No=cY>RI=n+86HK=fRF&B92%L+c`yqwOnH+Jnw7nJ ziSOWg5(B0BW`uO)%M=KF_S)|2JfR+qly^CPTMV|RZSR=>sHP+rEQn+XrDj!B0AM3J z<5I5fsq(hZyzrLZ=?cYhM8^*-??%{?&kE6QUYe}I@8zW0*3n=-U@V*0o6E&08>ttG zNqnk^+U6TK>mYjYs4OJDlK{Way}TF z)^D@M!@PB!>D}2&2$!Gr?|hxZYYV#zeUCJ3YeD53&7Ys?YiEI6(76~$eJ|y@(1&x8Lws*h{bg;CuMM<5 z1-X65sHD{VflJgMy0e`hW!>~?KFmir*VKNIZ;5IVuq zLY>EZ^ucaoYr~UnNYyxF(s)I)3;xj)BnggIzUQc7K6J?st8(z1*}l)Jag6Gxyabkn>f!K7y9$&&_#>Y( z$GtR^H8lO&pLuQeW)?957eCu8+RuXr=vsElDpdbwxOZ-*VSe;NIbOOHR_>#&y7gkk zzI*(6HW1%_bm7jw1;Wsu+aUQ${X?Mohx>{Y8^ZEmQ!~F8=W>1OrD>i5AKGt@rGqtG zVk}{oIpNPHqos_mK?`Q-hC3r)10MEch*BbUWqlOdQiMy+G}oDzvge5|H}J3ew6VAC{@J7&X4Z^7-PJH?GCnadsb7d26fBoVr z_dodlw_jBN;rej&WAhJ(zjOa%N#t?B+byF3?uH`ua=)~w&Vsl77YDQVsvl`Y{Di&l z>iv2DmZ&(QzpX2YX5aBbxYt69b1wB1dHb^Rr{glT3r?=Pte(E%ZsL=YKfk?O7bNz9?;=Dc%kIW6XAY;wC;uEIzX;kXP$MAE6 zqZrR@G<-$Hyl3GVEKaYoVO^1?Q+_@@?M0J)|Gow#pk~R+B^C%loS`cSr5(C^Y9jKT z-No0VPj`j`u25rS_hvR)Wb-eA=rmjq)H& z;H~H+TCp9!nj3lQhZqV^rwx=Gp=tvd8NxKw8DaxeePMz)=(c8t-)&5eDQ5`0RCLRVGIzjY@{a(>f9l`JLd@n98?F%!-QxP&4A!*}=Kpo$x_D;|bSo*OT%$cLET=RYlakP!_ zbYRJ_#tp8(*v!(rSfqdGer=I^SaPx(Bd;aaf~hahOxdn3F1e3K%lO6FoFOLw&q`>a zw|qNTEqx|~_0Tw8YlhBxY2RiK&eNea>v>UMnL`Wo&wQDr_edELoYVRG`5ZPLyzr9nZv52#uUE+& z7N)xw=Lboh4aa62t4Yn?D_>;B79Fa+N~}TA;^`=SBSlw=%>$wBCXyUXkcmY*Z*ei9 zNHn~i*;u%$4(*`&BX)S{%%=NYSKHemmZpMs%M6>?;U_3ehURMbReEvA9UEfGoUs!o zLNpXN`H809Z`WA=mkWIQ{Lb;$%49F_PODFIjb`T#`%CTUK=k+txJ*Ur$-#5|YR-;Jyujyi8ZlN~z~?c}W2 zbON`fULouA&h1v3v%!0C@^RFGc!Da~ek8@*ao_)w2jxlm`zHte&kEtT7?EAm8JeSg z?D(KV%yHU&5%Avkd}hZ`L~~`F!^rLw>+4|IS~=#Q;G+$FVvK-diAwf*_t7JRm@ZqdKS>=xFNfLqT6$zuDQfuX0m{Dhq}QDFtHHN_ zso54`b`UDX_!KU4`6lP%%7YD0>;P?cEyc%Oip}nuaD8R{Z{7Gs^>}mmffr8s z1*JXfKwse7e^xdg=0Y8_u>quu*Af*am;bkt? zqp*Gw{Q5@kKOgcrrw(loE0|4Vo2`9%F?3_X4c%R9&QD;+<@uG+^Da4=br(XSOfE@7 zQ-C_RDsThuIds@M&AZ_Pv1Q5Yyk(#XgnrqM4ec*8ImSHd_FxZ)%)&AbpEgW)nB{#&8%6s|MSNcwfV=7zTX8Q$h8IeqS{O_Dx;7c z@t$!(mmaF{m6d(q*xd*6?Wl|6^3M-KC^a!w=JVb5(f1R1Jg+lUEIh|V&w=rn(MSd?!Ip@LH(1&r_ zr{hd~?#P_<9V|ZAV@4ZeYO+xjzOWn-LN?M&KzqzWu{{BVDs6fpNfh$%b>ERnUn#ks zU0vk+l|Br)hKZ-SFQeNHX(Ki@iNxvD8P|qX(j(y`CaGe3BM1de1_5hB5ME=S9rYC& zzpl9+r&#DAG@l)wA3M=&*oz-K0Q!gH*w-KUl5fmibJ1^Sg1wRJ5)%RQHV}Kj7ojI( zF*@P<@+TXY-BB~p$XP~{oD+$Op_8D+OiniX+}dxqA~z4YiX9YqU8 ze}B`Yz~LO!?zOI&+~n?B@6js=oVQHfi@Dom*VP95fl;Un44YACNf~m9r0goU4}yU1 zfFJ-fuxx{xI~eUXCB{C(0f0{;sJ{Ro8Ko}mXYk=fOd_4e^b#YfdF6y_66O0BNy=bf z*#W!cLmhnI&;!??VV6G*+R|rXqij&LM4ekNB$z$ETvqnvS4uoaQS<#=Xs$EuZNT_oJ`6Sagk+XH)#(He7V+ay4|*uzB0dJ0F_X zpf@wG?7H-dweJ!}O-8O7Ox3*5LT^buw8Ea04Ba~>7`hd*z?kK)_^8slJ+y*} zz24BS;pv#mc23jY1-rHraQg0oJ#G=UcW}`X!B~W-sv*6uG+&RWvRy2Eq#D&sW?BQIbDYJD;|ZaU6>O&z4ls6ijf_l1KybF8wSAss=xG2Cx7|K0JenX*{q*uA`4yT^ z7MJf$n_ME4A*G3veq0!~8D8#A zJz(Y^t|Ln!r{_MQuw^Ii8&MRwxvi7w`hrxh3vo!s-l9i*3)F#HtaHr$FE4xG;9S&wpga%9no9h-5CeB^lgnLOJz z_H+6gby?Z(yL+REJ=mbPb$HvQi5PW5vEFRxi&SLFsZrUp(z6(HDZEN@LRYQzHN=*5 z;DO)^r6J-PO)+`18MG#Y>brF8JX~aAnZTl&Ae7e$xM4LqyBkaFiSlg+(@!1wm{i9sA?4`$j}Fih1l>*8AH^rLRG39Q zCvOl??7ij3VPQN>eK0rm&#+UY^U-6tb?6?_HRY`#m5Q-%Mv&_kbkH18h_QDEC~i1Y zppLK2)^u90C6qRLAtKXSsbyI0hHvru@$^r@OMk>UG>#|-_6l~ef4>@AbY7W^@31VN zM_}l^boTfF{(*@XXXfxmT`e)_%p|LXTY`T3Whl%ZyN z$@HeT;Be%&FMQ)N@69J;FTXjQP$|8CHkwh#dOxGOKHlDLr8~jf%YKY6Gx*t|N&x{m z+YP^pH4;dAs{+W&PUHeXotr(+_sl6aT*BCn18X|U9^|NNbudv zrM&kR`>qW?I(~Gg`)`PT@hFVJ&|Jf3U;Emhc{h#4epS7B6GX_@%#4*oixfCcKuTph zNsNX$emo2g_t@5f4^z#perL8VN85{m-myDX`w)~x_WI*^do(VV^cf8yJSOHx&|om0 z$t@51bXR(o#LTh`$zZx^}js(z0;+;irse= zFKJB;JAk5vnvR?wPtQjAmylL%KFE_EeH*D~1Q4bCdPWXjLG9*tjJ20HFUNDan`82g ztAEkw26D@7T1qWkZ$wAdh}6{ydTd}7p=A{SU5UZRqoYXZ_v?`dOXutOyXM(&_tP!U zc@w^l@b6;p{}b1hYv|u`Vc#rJxC_2OoRW6@L)6undcvnn2|L|Vwi{iOJ+IB|jaMUe z2m((+gY-V+odHzKZX9L|UGMN=FF@sC%@vmFyU@hbNxn6arre>T#9lhA1JXw~ycG;L zS3-^IjH()tH@n>XZm>(mE}5Ox`vUArI#J(j`}Jnv$d=$FIcAN8Bp&Rwg3-G)e7juK zYz$0$7w&SJ2;G?mC$!Js=_yX7q*mr-s_COb*N$#gxYJL*=sbBa)vekZ{W6@R9DsF! z@_bygz`?ZSM^QElLrd?~Y{1$Lvojz`jkfgq2~-#_1HboyNf}Uc>}`&mSoj$9EPXn3 zndi|;7##Gyv0N0jMku?T*iJO`o5dH#`P41eVW| zqIiFxFOPwQ@!fT9=>p7JVipK5Lk=;~WB9qlW`PB$QP;8n20#?1aSnDe5>OeBqH2{? ziv#=sLt?x}q9dZA@Y7Jo3dN;JGUoqlNlMmF7H5i7~b$*eO>O^Zphb3Ro`ajwK`%?`)fzTuwg%+B0CV)#~GZjkyw z+Fdse3MOBLPMMb8&8kdzWxZ*6T@&THd8kaI^01-KX;cOeG~wwiv}=~KbZikXRqJYY zd_;N*&w>%tWriKIYkDsB0Ir1epUwQtRhd|}*0`H(Csb~x!egg;T_3naDW}7<(EjbTh?oqA`)<4WF!f9UobRt)k>B-@VI$2AOmT$MC|PJ$?m}Bcz$wW zic=N=*P%}m4qxZKFia+8OO#X~{jJ>GlGHA9%8;~oVYo8JN zrs!rL%!eW)a)26Al|kuoCrwF%cz>_&z4Ybu*;O@3B|XE5j?Y|OKbb4~TtjwN;F(w5 z5|=Vv)TCwgi4Y@5^l*_;ABHP&?V@~F4x$o)#>27q#PZcU1gcAW#`A_6>^NL9?*L|~ zEk{Gb*7?vnXy#p2;?V0UNXuUamztF%FeQrA(eu@x3&`r8(_T8gDIkYa(q74XEJU7G zTOLOO6gK z(t%&_p_;p7^|xUQyG|qX0hnxKp4_Zs&)4c9$Rv0R(Pm<^26^7KM*PJ zoPV)2zTi5&$#~qHJo?GwXIq^GTToqDFY6cJ`qWQzEx^w`+eKGBG(Xu%m&8faC(vhc z@#rV}oL?mwt64{ogto+CjXDga)^8%Cv+w`6o$vo?s}O4SVe+TDy`lK|wFu;jz8Ti| zH^iGEy&J$5juu@Xeg4ywq|Xvr6osVc=7X2Jc7HDfEe(K^?lXUc*>{syn_bBJP=947 z`-zCO`~E6lcQWm6eN~FCmi&f|MEL^;JUe@Fd^%*{!HODkFZ`#@Z~fL@@%xX@{+MdN z$S*uHDN6wJASVKuw|S&f={nZ{JoP%>!ANTb8>1$v_n5*gdC1VAXja(?achqp#ZxZM?H~eKJ`tnpTWR8`fykqL>1gis3GNV~1zq(rL z)HY1d4Ev0{$*jKduAjaA^>nx&R9?#H9{1?UV*ib%-3Ym^ps0U2nZ&z+*Twg4T3=h? z2)X|1+182LgZH8KAmqFFMRD5e*=;MrmndRZc}%x;Hw)ke9F|$z4f^Vy3>v%}b+|Ql z?A<1uBmbHAS2n#6bA=gu&U6x)Td9x!aX7yoMX*U}>I+AyvdJQ$AmJD;>l z($mLY?SeC*sya8sUh}3zIyLr512@&I*#mcTK~q7^P&B`<|H#c*{Lpx+bihhodM%)B z@a@>~+&3rnMfK(T?Rq$*{@pvnzdlX;{lE0h-~M-h|1bRhznqrV|Ks0!_<#Pr@BFh5 z|I1krH5B{8Vje$KKJJ%Vjg4MMRQFH|A0DX*3`8N9LvsLdQZszbL$q2(&u~qo(a&E* zTDGlvgl^V1a$@8dgwId8Zipd1pB~rE(nY&$aQ$NigDo#PtST_n4AtAk&jm1?wFiiO zDv!5@Pk6v@^6S)%?B1s(>!rSPLiv7eMf~cVZz2Abnf=#m$RYx=!W1rbk3z#$H7GEX zTSv4RhqHC(c6qwim)4%RHMUwd)mhyT0~cM+iqrL;Gg=LVJ_IFK@}bxJ-i|u`n@mD@oBdFUSo_-qWyOGllj_^^qjhzunuop z1@i=I7@Ep$#(HYY?qu9^$B7glBI&j_mr+VxC?v)D3?YnfE>yI%JzB*;9se`{3>>6 zjx)(=q2qoS^)9&2q;weVspxy-m2fzS8j#Hi5kzQ(b5HW%iBQ!Puz=^;B33V+`NRWn8aLAAvVqDA$En%Q_ zBxO@U*JkqJ8|}-xH^`cy4XFpZdbr4 z6N!r)q8~LLUTQIl4RhUe_b`bax7>3Re z*Umsnh(tV$2Ws=U(ziXH5Q2|W58!6z;$}9v)%Xl^^Z7Yn%#7Cdm^+AQMzeM&ueoZy zV#LMCq4vgp{8h+Y_C(Bj)Vfr#Tt4U2c3q4w&7Ibeen9cUwVzDCa)<8hb#>o7ac5u* zW07A9Ir`xbt2@Btb8pj=XNBAPr;kqHPrn)p&Kqd}zdBmJE%$*nr zZzHd{CoksOkqhd~@7#1SXVhi;v%wods2|OMTrYa;J{=;H0sk}>T~At z<{*XGz!V}V;iS}7}1GTS7jxp#Kd4{ z5<6Uch#oU5^=<_NVg&+;L~!;{pSuyFI`qZj+;X~2zTDr-#EzbH5&PC<(BuS;v{ne6;4-+=8M?tdu`>ybKo0;xq`5pdCp&ew~QM< zPvhc$`K_Nn61TtQd2`vf%;7t|rQnHI>t=VzB<}dfEpYBBxsYPw z2Wj7G#3zfRtN!X|#7c7EOdP_ln9FDo&y_oBAD^! z+}J_trt8rChCVpQU%{LU^n(AlY$NSelc!K? znT3u18BrVN_3y%f%o-MS^_|7$>;>BBbesin>DIgelXLXWfB1B-2oYcKYU+4>?1HpP zcQ{CfHRlCFbKlB*>%lX8OV-f|s2Ktq8+j%{4B_OfhD3(Ee16>aNgTt8&_~05CK(t> zUiEY|i64;Dg^LKJNvBV^Xh=hPY$o~r@BT=#(WQWfjSORaxDRgv2KEP@pr%bS^W9K?;kI?Lj@uHIkU^{JG}rU!b^hPnd?4!N@oGqGF<@a z)q4uL(Dl|ah@ktRw`KXRyo1;7*9FKw&lk7V^c*5+pS%Qdc7M>c^5GM1pD7?A+Ff8s z(?VsS0M%mDDg$Fq6_b5-lBvx^#NRoqrr6ZQQG4$OjFNrzMeiF(h;B7?rQ&%JL;iML zUI{{fCqvrdY8$NgX-Q?&w!1s;wd-2uponY@ID*tA3Ko1tV zaG9xlJ~WvWT(4HT0gs?gM!YvjHdH%FTW`fT042M11@@o%~w(A?kj8UZpe z_V)FI*d2^~G}v!=1diG~;2uYf-V89 z!DR&Q&^RcA8ul2L=vXY_pSD1EX(kkhdvpHW7@$2}4R-|M@)iJB>)>gSc3>6ssb3h@ zf5OMC3An?2Y9sFu9q^cs1+7W?5@X3=3%1D>B;~A22hfw8mj_QlK=p{Ntu{K?LIK*-c$sB&;T?5 zRG4m&fj4K}Q(hUdq;G~?02(09?gDzW($6V&7iqRSxdj3qIdx>n$<%bA6-TI=c*D6k zNpL00$;n;%%2~OS#)!U)JJ!VoiXldxfJ^ko{L@Ao&6YwfpBUks##6NGt0Px#_D>Jo zgs)P|4OZUG{hE#u;RUB zOzB1Vku+4qfKNYeKced!>))=txPMQHkI{rmKrO`Kth62>Jg8Y+J`_%Hnsu{>3l9=A zxUt)%v&YkE;_F4^{JL10SpnzkCc3rjn4S^GaCAE!_)vX*XB@`JmHOF>+$9lO+Gw5d z>pZa~Ee0k9W4W(dgk?-(Zlp74&6$Iz%duMqs@bzkZK>s+8=1xM+r3tO0h$v$Loe1Y zcSvpHK|Ht))dps|)5_e;YP-eKP`I&$6l{8;3cYJ`uyH~{ zLR~nwj5vLDvF>yONPu(r!XQIK6KQwL4v1bY(Qm<@b8~Orlbf$j=!i>-S+}aBbA41j zB_Fi+uFS>To5|QwmkeB|jmPpEv)0TVv2@$-zGiukK#WkjtnAG;HnxbmW2wvle}#<~ zI-qXH18eZ$%L)AEdM!V&77hVe4iLQ-?63Hb(??Wdb@BM!O5_v0kpYoBoxBFxgoUa;PCbjy2%U`iW z_F;3pROCcFUyUW8-NRNA+N|n!DnS0%?e0BwqEGA?QDkmrKmTITf1OR|HygSiDm6l( zt&ZPmwBz2l;PPl@pO6D6pL0Q%c4t-E>e|r7EDn9vS!yZhO-trb1`}N!r;wq&WUf9C zmj_+7V|lU!1zn}hPQcPs_V||vzc*OkNCy*seRB{@mju1*P4@w zYy=k-@KHj3R8e_vRLP^s@S^PiK|sF0c{w}5YyGca>LJQjTgLZY(KnL!W_yw=6+a9e zs+Mt;Y+Z1E&Suid#;0KDn>ig5r{?EqI&5&d_E~Gh5Uh{LTz@AsnHlKpRbctb?ZLkj zzwiI#+IA>vgJ(_YrD3%nK*+qkdlOw?8RtbJ5x@BO$zG>5PY2M~)+WpnwH>dV%LS7W}jsgPizS^LQSrZaHuPHMmwfd8+g`+aGJc%Aki{fExK zhF@uwM8@9QBdA2#u;v`Ge&%0Y>}TqhJr1ACND{j^#sj<x;F^5*f7v1eoyQ-Y8(~wlA`}H-f!>`L_s((U`kPB1 z8;0~l3(Fo35<$<-tCzFB?~%I|IfYtS9=;Tfqtnx&tFTEg8%XrK+p0clIaI$N-2^9n znkOfYE4_4B5yPB*gxCB2VMB|)Icj_^96JBGbU*PV7XZXzvw4?a)Q3{dKfTp|aWP2Z z2j#Js#@XSuGw-FMZ~GSy?&jTEY-Zx^)g)jjck*V$Vv+beFbjSyCNKTf+h_K1AQ&iaz5Bx}^Y9vGmYfAEs&hOxSSHE`aoFap)z^hfV_1@Ti>^ zH+}hSKBA0fK0a(drC9Zp&PhM=tMiQS+kVq@h=YIAdF^Jf^&3&|WkeVcCV)Q-jWGCqHBG;Mmxj0p6uF^h=uUL494i(AkDeR{kRr1oSKi4iXw zD$(x#xFx<{gNln1LB%SEO*U=%`6cLPD}tfCWn__2iE$;owe-ttb+V=TvBs&7GTTr# z>K)@_Fc|3;OBbO^i|j0wEi^fg^`7*ZrG?oSUutuaKHr~_Z+WCs4bxYefiJO+x1xOb zZ~yn}?-PKz)RJSXtI`BsZhQKz5tG}-tgj5gk#4xyL7Q7J40=3;@2t20C>N<@8h9GM zLmGG*;3CMP1nx!RqAD*8=-#Fp-X>kk?HEaZ>ci+=u)@_a;~m`q8%=P$1R4tIw$w_v zAM^O^1E9u$whW-U-oA`q8inNp5LpF=6K=Ai&Y}~nEVKj&BBybg9A*P4pxIgf!im2C z9h*ux?6C_hP$o(u-M40T$)y|!6A7Z179Zpegd&}7@e+0+-H9g3@x0o)2U%^l6I6Hg}B#qT{yBB=)XuD-V(2qj=onIq)S1Qj(-h*@@sH3$>G8(f^>o12PxZ}7*SfOn2Zi~^amna{0 za;KIYo+^RJRBTb5D=fEdt?>srI3-P5ZcD}1LRC}lW^f1tqVYIOz)eCYEhLo>J>o7> z?p)IjiY632Niy64Ub;0*bJxsPZQj)pDP+9s(|UM~qlRym;@@|5m#gq6Y~{|{;Im;m zVHo#2Efihv$$E?ZIUvQ#Fqq@Zt)r<&<{KV_nV<+q!f4iQJl_pP?%F3Be2XNkCuMLa zd_IVn%5a{UjfeO8@@Zd~rG{k}m2y!^30c2~)>_*W6kQpGZ11VhbEWb8lyZ-C!VPt= z_vu$%=@-VMykbmmDD#}8?bxha0XR0Kr&onvqoQmd|WFPU&pT@Eb0j zHaQ%M&G463+E>m82SQ@`7(Ti)^|#hgB!9=^4(~z{G1m?rW(0Q9H76dyL)OO?6?es> zEPi+f5;8I@7FEX3jD+&v-M<^v+llB(zf^dsH4_R<-Xuav zEUpHg$+X_DQtpLbyzU&q^?3)F$@!}Av2lF0%zozmEWWT9E!@MGk6zbD-5+ms-%y3a z=Jqw+TMx{8V!+k}5ku_SxS*eLfon*i6aviFH=ZIt%6)oeBpvea?6>mELxjXO$?#^BIzLorSD)nvvd06`_XIPugnB+@B_DOCeRAK4jxX_u z@5;M(IQR$x=tM|`I>+b&*{r`VaDQn16y^;oMEf3&>nn%2>t04YdqK^+>`ynIydnC; zGjgWT>6?+#IaRLi6qRE!*rOBgczzG!ENcfI$i7poL^+n4DmX*hGt5psvQv;nXa_*n z^bE=Qs6(qF%_Tt>d0Rt2^YXWWbpZN{@N#wXq{1(*Dn&AkPA=vZDDH}A-b~sQpO*c5 z%crhgi|iFt=t{Y?g}dfir`67V<+UZ&2)I=!{>!xB_A%O@{*)F{mwfTEW_pp~?lKsb z*&b4KJ)8XwRlMCt+Sko1`*eA(BG=Dqds2k#j-z3*Oa?&H?U5z}bd86L4f8=H{~KWv zc=7zFv#Z?rQi~^0%rOdMrA1dkrZpgFI~@f5N!n)@Ur#Z;_U3CkZ7m*KsC$K>+>;qs zk24{hcJ1rE@Z{^&tD=jV`{0D-nV$ByR_5_v48k0D<~8j>@lWnQ&)$9yL@1L0!+v9I zcTfMt%hswl|$X zR~SMd&cKS|zX-G<#pClN$R43AOywm71TZAfmD)+gH6yGBoCavG9gzZ8hJJL_+*tACBOf^zxMY z=7RI1w~N9fGDvuuPT2{O^TFO;!@T42TbQ~-E^Qu;I-8R}CV*oWF7vU2`}Lj%w-%1J_p|rFZ8QmeJ28>vIb!oH5$m2#QDyp^Jfp^6IgoyS{XDM6 zhNqC{2qaCjIX{QXPez}g03Z1O$-`TCrqlju`)QY7346C|Qn!Jci;GbY2AG&VEEdwv zb#>JrFiS1M*P4n6T;s=#&I%S&xKf{QZVuochat0Ob8;}vyd8q`_>~)_<5prMOa*ZQ zAe$>+_ez5s=tjJsKsJ)n^JG1_mqe3#uX$H%wO#eVld;i<@v zIQy3zC6CRCU(_@YG?w8N}wi zA=Opo@L0P`*+?HD@v<;vb)TYn-zxNXg63pEy1jYV@Y@RV=^W(|ms<78Wr6G4BZZj> z3gDvX^bE%8d~a#cmI^sF9Zjb$oUURHP! z*Gz9dqa0U?a(_V#JQIIJCmL=Uig!B&R1;o@hJ?<{YI*?^%hRxVqsyLAr=ER*T61&5{flV~h2bg5OSnO&YxHwV315Q9 zF>bVy2EK1BC$x?g!_czsBV*&06T%V>>jh?*v57Gpc3Bt}%`4~7c+oSs5I5tov#Ajm zPexe}1B}=a-C9kNr%l)G*m$>1YgBtfU6A!UjT40pF8f)hy$GAlJeWYG>o=1!%$9XI z(hF12Y2>g_yR`uHaAr{3$y|2%YS^iIHkn_g#5z5;0w{Xst`L~O+tg_2rkgX3hcSFC z$dN8P4BD=0+~Lj|7@N*Q_LF)mc_lO|31i_rgmCVbUGTfv@i9q|r=DCGu7~sLfUqE@ ziv<=Q*Yv3F@kG_L5t1^{wAURpzyr@y|E|`;xH#;f(cp{TbS!vC z#TL5!lbf~Jwo8v40rC1slIA(F?DH}mp9dZ08B5SOHKb~1?FrPn>|f|W=&Yj3ky4^+ zjH36JkmCwFWzSd}^H_5lwLXMu5skml+R1h#>OU-2QLNl#<=rv`_V&)KtFnQ^6bYPX*|y+yPus?uEubp`uqf{ zkVs4JuNZ-&rE1LCsFMU}ANs{K#2!vL6Oyl|I>XkDdU`+54YfqZ6|T9?J^C-=wU7Gb zt=H};b-ga%ot($s_{;mA$*3^vA9B<83(bA8+Q?rLK(R+MT7Db3CSY=JHSx4O&1P20N5Y!x z2XH(|Q1H~9P)s1bOO!uf6&kd0(jQN$P z$~_eP0zdaz)tqhAJbOMXf}&WR8Nx(ezZ4W1^vMY27tq^pa5AgD`HgMnSU|qR*`Y z;#O=84-W$VB&S*mKJ)t%vg4a0kM#$mqqdb#+otX2Jcs&VZGL4mnegs`S5_Xv3;&b& zUgC26V!uja&K_~{Hnnoz98bhn|8nG-3d%H=D|c`H5C7mTYKZ>7d?qI5r^4cNY($0% zNb-OK<_y>@z;ys51$YoJDr;PhKmq8Rnv4?iMF*P?ik*>nS|3`gBMo|cIJJY=K1}K} zqZG#83V`MbaNK|qFC29q=>aFaL~#`mfJfFVt?A7eJ3d$7*rd&vnMwE+ z)>)+{6|iIhhz5Fk=mqTm^hdw^um9YK00bGNt5ArGwN(WuW7LPloH#~-L0z37Dr3z? zAOykybip-bu&vwHO*Z{(P!WJNipSzL3rUeEPllRUh|L{MM_98?!T~ngNwhsMF3zXR zEj9B_!8Z^J6v2cc`{>QHw@m+MTzs~KGfKtxoZ>GJw*h(M{?zA>*0Pn>sT_Rk=E-3n zWJj`JFeE@Z$IalaFiez*=K*Tb-lfOuorr;y26Uv4LPU}ww~jLC{O%Z!gti1fiyu$n zRZYdDFCy(lWZDa*=uaAioeC`+HvuFY1W6u~ORK9kuI2kUS#1rROTnGUWF-%~i(;Nb zV+Ys@?UdnW(3r9#%mxc`xMg;k*gQ*}O`|sHmej$)ax*D~xjag&K;(z@-kc+?Yl{+5 z!!sTt#zGAnoF9bTzuw;AMs9iUH&UOaxzG)}USOAUY^@kk#G z#Qj>V|C+u~O&=f=la=M^^=X$&C9DH~W2Nut=J-I<60UBM=(&P!g!D+D=~e7;XFs_U zG;@;~&W*T?nSn3M%7~Q+wL9w1b(Xu?Hsg^|VuVdc;$>kpwh7SHTr@j2hULP@e}!{b z@m9vrX5*DWVsHXt(WZ>7MCii?9JZrHSoa^jY_GpYw8{ftBN^z_af^|hIwD`gCbq8tBvb5`x&}K(W zvw{TQ8q0%2Qgu@)IT?vfw@$_c62~TW+FCu8)v!ISX;Sj;coc^HO$b>$WtGNeLXZr?~Z>0b7`Y8!=#8l)pfZUJTCl^bQ!JWf9V^SWoV>`ZPIO$DjOYL znN@$D@UHwIl)C1vp>*6n$z!?Kk^GaBlc|>MgOhPDFxLo0{4?xMukzaz_V*$wVd0j8 zb$;@pVXZWNeymtW9DfH+4e(xexo6RYsKEuR(p_S}OPX zSoCfEi(XjOrdn_FFYrUYoFw(N(GH>ct9|;2173j;`S_ZGkMrcho5Wj*FTW&1AN32- z2wPU?sZ-q}Kyp{>u3x!23`I&|KdU$=b-;2Q7ZZFTItx%8eSTup{=2+~V2c54Nv;T|7-X zROicv^LhvI%+vpR^3)RhSB;GmY1?D*|MonXJ+htHC(pmsS3Hes-B?Od1!O-x9Z_RX zDClgLLc`R#gRdpqeI3mDMxYkdH>&hFH-;y{G)h^Meb2hv)W`dmlOz1<>(R-7|& z5SsTgy_x1qww_ly{@5icuaAJWGPOE?$Qf^!lb;bEGawdSMhC%|@Y;>2gBM&+$XGvt z7;LK9d(aS0=7utIsovi8CYLfJvT05D*5H~aMa!Mi2vUfm3-Z}Uxb7E8t-V14Kjp={ zz1U4V-qJqa%cw!cuT#I<{bF%^PmVu*G|5^Q=eUZXm(k)W#@_4Zx)dB*BrcDyKe2WL z6h0K(+#ekNwx_SYsJoLP+4ti3vjyzxMEmfr`?ccYJjqd|cy(I8U(rME5^hjdC|m@mf}t+YQ>)Ul ziTzR8iZus2A#UMkqVs1sq2)jXza4@6Kg05jHjKQ%462kS3o$rvF7W05^n3rx(?UMK zdi8I|(dF4G;a>!+4j2KWshHy$Bs-_n!n{DuY`{(hy!vE_#RVna2}+3`9;B-hRfiv5 zU5j@mR8&49d|?g9WCs!`E?kx5%!C03qedf9>1h*wH&TZnq0;tA*kCbssa+emT*iRM z$T)IR48~(qmf^Uj2qJJ8xH>@STJc*(tEJ#TaN1z`>1_aZ0A4k9mYoS9x!T2m5<~j+ zB0mEIR>-#X{3Xn@6CBNgQ*Pg9zwEHyV2?P{Xbi9fgZs@M8x59`5(;4bjQd2Th^vMT zK?07vgkraKc3(u;>xFd> zwB+&CGlcg+>S$Jp4j9z}aMh7Rc5v;AeR1SN-MuA)u#0}p-?9f8_uR6(_Yn7??OL1g zlGT9_dWw|4`h20j(VSX`QaAl5vY4*_cv;PTCwpntPk=4WS!;R&e9f`+y;>yR z-;soal+6`-;v0$7K2DtPYX5(N@B>_u#4rpqE9dH*b9mh^=Qla*&L->vAOM082@=Yd ziw&bh0rs;jCi-~Yoa z+EOWT6q^ZnXccWW@gh=O+X-47RXX4&mkE%E7MJ~>f4-NYvf8O_M_1xuX~JEQNMzPu z?4`z8Y0;+J644Vv^A7?_`?Vs zjKSm##xAF@TF-5Owr4*9d1ncYXl4kj>w6J<=_t7Z^NAC45GLT)UWN438Lk)QTQRan zq*K1>1CKdo!0;k1Oh-M4^;D=?kc+qbt@MWH@TK#nV^$_ssD~BjHMZC-G(5=_UMrA6 zoQwHOrEDmgeP&S#PQbIt)E}nX08k;+ysA=_WOOs$g@MokpP@;Q}&yNKjs^ZdL+ZBXJz!yjf-1j`qpwa#uN< zn3y)dywK_`B*?@_dUe~3bo?6Bpyw%Hdy6aoBIC*C37w-(y>~F7p)Nx@J_~hCue$b( z3@vGZLEVdY7Q*p(fAXbR^yBMm7Ev{x)_QYDnhHS=XXy3DzGyjV7lgt!?24X8S99G=4h=JgE`c$*&n zhsk&Hn&6aoM1djW$AwS4eeS&F_1mh7*QjVVoi?J&L;6MKV6Up9gzn}(sQQZZFFotp zx0l{n+FZKyGO1mL&s*$A#Zy%M6hD{ZSA?FmYD|uFkE=m=zug!Mr`}oWU~wd^Ltj(W zmXvgo2W3aXpn6s4j7jLVzRJ#)-yY(}_y2$YU#|R;bHlIm(9140ZU4eaqVhxDxaDSR zBcU>zLbHLn>=^Zh@EYVBPKuq5;_!r=i9lO(S@EgFpQN&i?Zd;vX;D;s&M53cKu&U8 zf4%J>Nn&zNw!-IfYL~FKc~=CY9R*%}W8JWq+m+46e)tzc$1=sotn%nP;OH zXR^8lZz^7M;dKvfTou4SGmY1Nkh}Tv`M=PjE^-IVy=YvEtuxs2-lTc&^!dWePgyxO z!Nnl-@NUwhsr?UZjPFoS^J>1MG&p!6Vcu1CHaYu8>CgZCkLLb8QyC;hr?`C}fA``>0#edTBEV111#%PFt*X-9}`$dH$ z0-5#fW3B`Xn?&#s>|5ul{>YPzjJF3a5a$3f81x#2b|aZ}cY~cl-Qy!WXLo~c|0D=6H#aUfBW6+Uq0czWC`xZj8_P2Ds>u1=-;dJVU-U2Y(=lagK$M{;KNtvx8z8MG@E) zpt~L*<8mifO$U_JA%1So=QWv=VriS&9E2X+GZ~#zf_PYQXv5aX4R(=7^s^D@%A5_| zL=T@8kGxdgo{3ot?+@o!!HbEuP@KPKJD0}f%cL0o6~F0NI$z$kFQR{jedf7~55jad z3X{9BgonGT%ys@87c0DD=HSCR=-@i~Xs5~4%psZzzcxKM_kB;nfBaoaY)@yB(;XNL z#vS+87;mXi8Y*$$h`jrUD*LuZxi7|{%oLb-XiOt5IK5=+Qj{yMu-AFS9RBfz3-Ufy z8hZBS;u{0!;=hUM^SA+=8o`WoEc^ zp1bh!-qy5=2No_cEbj1hc$!B~wZrt~g`>UJN`M5=oUW9K)7S{zbuP-akF^s3m;f+h zlT(P4PsbsEtvMmh3ZKEfpc8oI;(!_iidF;AfMC$yGhJTGnvnw7kp@6~o}bhp zG=_*YhHJN%ziKt+pngzJBMX{Za$10cUFmwh44=-IHl0^NMV}yW$vpQ*+2Gzt-wIsQ z0b_I6dydS{Q5NYA$3pmFzV-)X)lK{f(o>nLm-`$~DL9qocOvo}^rJ5YGVK^YPHj7TxLPr=apC79Fdn5E4QM zwR$qbhV{yLJo?a9B=E8nscgdbdCcVaVgN_NEnHO0nk9-evz4g9-f4LH@W@j}2 zR0FrF;BkZ_aSRVeVPjfZS?OyNlh=Lhmut=Z=L{i(z&t&Do`M6Du-2p&{p`663|Z+H z8O{S2M@PBW?39N%e?QOPD*onyPjC*#rzd?j=Wmc;rsu!r`+B=s6n#+gl8K#s&Sk%y zhi4}$mVD8GL!Mze*8)&r{~S-|9IcVcqC9g3jJ6p~$$QzUd9L+RE!5h{9Ue|VO;;|D z7xt`+h3evfP07)U2N8Bd+lvAZ&f2{I5qNrxr}``z7$_@~5>T<~Syn0q8Q_!-1j&87 zWf+Ppm8wjXtXw#2MiYy`;v(ogM`jP__ndsRJU?C#g6kFJL>`=~h8uSGx1XCAPDZBM za#W=!EW3}xn`*3IIAd_}Hjw+E)J5k+|0yZ1LnHV395>H_hee>Z57A zXofQC-+lUEfB^2J`JcwmeUE(QFtF*<<@ED;=W;itvu<%z^yq}s7K(0%)28YrUMI9Tv`45 zSnHoTy=yYs;f;5q@#Aaa0k=6X70Ox9gK1$SqRZKLyBF>3&hg2UStsh>i6Emvk__Oz z(F+*D0x+g3wzckTe~5K_6TIYoFprfF#j(@C%B7kkZ~%^>CrY2~_%NUXLGKmXu$S%t z5Xbug=cm&Lz6cPuLzk$Zel!KH7yNsyMH5{pZ|JApD{ZQ;t`|ww2De#&8T*<%V00c@}13A(s4Rd*lAXSKbr71YihZUl8NlHx#bi__0&*&Ep+ZO=OlXmd=$WPIn_Y^JCMI~985Nzv_l-*o9eE_8#;e!|Y z+Qy!A9%@v}ilv1-z}idKAX6B?bnkv}Hh;45>9e0kV=Hng) z?o4id?`CCh@aHh+XsZX1E8a=);G){pM=sVd=HhvxPYth~&^ye2{6pp7y{0Sdy6y1sM2t`k;1HF)&uk=>LB~_X#|fZsaVB8_?%o9a%~*BU zOhk=HNj4~DMl3(zhwr?05q-5Ya*Xrz5?~Fy{VFi$)?QSI#T9_k7dttiRm}1MVCt&} zltdHPeVPN_lBKdcL9QX>A3zGAQGhjGOoj1kdddxyYn9x-dN%PBKo0X$5{R>d4UxUvp12|~b zaS8+r60w<7&8bh*UuRaeK~=<#+Q8L0*h|UqeyLzboapn5%|KQ<(!nq#sAFhNV`8g- ze6O}|0?|Ars0G)szWaVdnoCa|h?6`2FbcmXih(-ikEDNJmwtC29J-C56%>p82qkx2 zDccY>5{ z-VH!+osCKp-bY{C5$`GHH6)Bg$TD4xH_}AtT%4d+s&72)KF<<N`|jJ#bu(#$L@zsl*~k@-BTWtSZtM z#PW1r*MZDt0&qC!aeJz~5v#f`$0r2=cC%BiBSWfZ+&(pKFA@!)zfoT_7nBrYe!e&S zPK>(Ciuh>fak-wJhq<|WqVH*pqiH2zjHb;gwf%MBRnO{g_Uk(LvU2PD_@M@P?NWko zYvnYzsR~5fE{Rs29sba1yhcw3<7Ed6M6nAC%7^d(tAlhDtnLMa^%%c)OxA>PZj_A)qp3lJrAR}nphN;OQVOL9BEoV8?mMd}@@ai8+Zgzn)QQ8KpX>bkVkc(ToklFQUMf9yh6nrGQ|D0ds3yG7L&3%IEP7oP zU(5yPg<-4QjpEbnV5iaeHMNo=8ZpVjN4c@VgaY-tO{u{(ZxYg53x`2xFd`R6CdgTUpmr<`IOdt@)sM}yh=b2tX|Yk)*kA&T=9 zxXu#QDI)4fYQ_<*z=J06iUyqBiI<R={vWh|iXZ8T14D*}ieDChZ1Xr<%%49BZy5ADam9|T9vp1AM%n*E5|}1{A$%$ne21! z&B!yo5*cY%8a{Oa19;d}jI<-?nXiyFo0xeSXQ3lv+t*`*>*g~K$wfc!5CiIu$ZlHD|j~(vi3BJ3Vzp11|jy{WKZ+FNkTbc}2 zm@GRMBaw|2#YS{bv4N%WkYU}ps87kUG0R`QbKKlJtxlyJnPBt9$Sopc z>pjqoQj6rfv70ZNwEpMKPt=;T*lhI!Yrs)wjZIYj&)6BzhEE3*Ct>o)a-?ob|TP3(O>H=|bt0*<>>0~1os7Cw@Q)9~Rm+O@q zNLo?E=`;=$yTWzUeQ-4J!8VrviXX}qbH4|m{$l7g^9+i3mPdhJlA8BNqszf=ZJdzk zZgh^yih%c@_-5?HH)uC3qh1_N`r-KM5`xq+p>gmLfrk6D-~PRTd?4QXM;kH6j%(1+ zfyDjKqrHp2!ll^LfzIdfuHUis%D=iPGRcqRyup;nPVkLOzd9j$+Eq6yz2p?dWHGNPr?9&^B9gG9yv}umm90lRn^hG=Tb<1djz4AAlVd$oloOAJCgb+K z77B)!pcxMFi{y-IP}TMrq=~0B;iXep#2HGETD-({3w_oF#|7d2g2hLTDb%VK{uqAz7yskl}TUbZKiPorL*m95f20CqZciN2O z7d_&pV_ejNP_Q&8^~49=fhR zP{-bB*^Y{$QlzpGHtuvWBr)Yo5qY4EA9cO z1qPWCz)b1H5z)8g*FgmCmXYUTaV`QcXX@3)ljmmBJ3U9+`?YkbTZYTdU)szQ>ws+J z8Cl=3?YU0KvB{y7k8glf*;Q*~kX*!x-I|raIEYFzbZstxzi%(J=YpKIf7ycuNvhZ; zJ24`F^xK1W9hN!AMNk5~fJVMbRh_>&8xG19NuRbhz8V#g@mYiXLSHEEzkn#1I3Z`PN6FE{NVlWD(uP#(IF$01 ztZA^1IJ$^#8MIM4l#dQiN5GV(;(a3JgId-FA(AO!af{}G@w0AF95%uz6~H;~0pgjW ziDNZB&dt=b^bo#>cr#zcI*?Gzc>IeDHmNxM?{?0SQ`QN_>pRFeCYAuf(K5#KJ>fz@ zfk$1S8`z!rb=Z5=G(y2io3|EEmu*fUo=H7>C4{Vx3NEe{GCW^vvQb}WDVAPbn=tcJ z|I3+yp^-!?oncl|Sy<9<>^m{PC$I!Ah+8{aH1$q?D=_w&p!@2=N^Jh}rCWFY+V}5# z>+-enf^*YkOeJ6QsDeHHE zdZE7`h3iuQ9MDV<sCY;rg zgE=Q+4pK+l4JEW6!WRY+5>-241)|^53p2I* zr~HGXV!;%VlY`w(>#ezX(WUyI zW52Vq9lq+ymVq}@@;W{rJoR;d(l|k-Cuh#$t{&<$&yA0EzcQb6WOH`6))Cl&Hd82~ z>`ET0v!cXskP+kQo;ERG)M_Qeh@Oh!G=1QjKoT!EZnC{_oKr+sJM4k=d z5vA17aWwksJQ~@2&yl`5=X!I|cr6k5tA63VxD~&$xD^}w0VmFGrXlU+=Vbtux`7a- zoqC9p90e}fSUsLf7YT!|q|ACgV-Q$>x*2Uif6obAiHx3lI`2 zg<0mQ)>6Ik#*q_dq`&5YU}iozo=bK>+gouE0!;^BS_vy+x8r$!NRLC!7Y#o|heJ2< zU0*pVIDFc~OZ=&o6wlB`ShZBw zbzJl-4Uc~Ag`tqqr>cRYfum=XfsHmgaFw@f@jIe5>GsFfT&|Hi5%>2nhu%(qaXNRR z6^c(54tv~_BL7pm@DuNZ?3qnRzkCpHMciOM_o&lMOf!sw*ja@QuXBIbF7Dmy+<$TK z#lt5b-hKSx`}ZDv{_;OoxZ&yH!;99Bf6@N;FTebQ$7i2Se)XWL0s^P(9{opHFKK&~ zL2DZqJa|F6c#Py9{G9x}C;qd&AOHT-|Nd8h_V+p0_$ng*M)q56rR51eo8JtFa$PHM zIk24rGQd0ExxO8HT{WXiQ=`c#x0<3S;pK-tFN7HFP7`*yT<|lt8XAZ7By(v(Vs-zv z2e8rbD?hN8PETm{5=Bqb`Ov6}N+&IEoIFC9s?b1y1rL^-@{$=J~Oh3^=4vl{(0@HDQef$I+u>p7u|z!8SrY zpDKMsSR2zv;N?4^$yo^|J}#D5tz0$R8KV+&=4D5buHZ-{q~2K&01Cb#Yc5R;p>CU5 zkU&tf?VnfOyBTEEVK-piNHK-F9Hh?6l&KMy*UeyQvqLo}h&k5j#FD>zax8_(Kq~1! zbMemMH0GQC${P85V056dCM^2LQ5@qHd%Q$}5kvFC12 z!OBON9%H->uNs!_3Je=j!|3ER$TDl=_UEP zO>kxO9MMxQ`S@to2_2}ej6UO+-GI|upXyL0}|i3~{eli*jEty}fEez2i0 zd(BdIy7P;MI)9_7R8ha(!$jKG!g;E*0ne@ z+=*(9{$96{%Z5*0M3%wDXYGU&Jzx-Q!y~wT$k$1fhnsvvK0BLB8!^8?X8X92AE? z9)fQ6=dh0%tYH#(2M<1a|4=bgUV9%~dn597_Af-29H>Mz}n7OPUX$_Ry!vH9SW<$moLDxb?pu3;n@e` zA1#8*PyfZe@BOXUE`Z6_xvO1nKmO&1KN@$k$K%_6=4XiDG-#0A(ttDQiHQfx2v65z zId==7jUj2>m&JH4+}DPP*>m*L0c@Z|S~36nxG%nllmX?Jwljj*%$&bEFmJ|^Lr!n^ zzaC+ZT^TAEDt8YDxo}*XoL}%;&>zbva0wwOO!%2M^$*joKYKB_wCyf|6QhCpZKmn2 z<=vqu6gC%Xv(K8bY_dWp-pqC}8w8%x$9Kr+1j|zT?AjaeT%wxi&z#QAweNiWs%y^t z8h`h}&ky0m`kJSBc3&qD>>x%470^f8 zs@EYq3yHCLz{Q#62f8%{K%n9GWD&iJIHiLw5KH>Fni>HE?YDBTzZkr9S6v&t<7eSE zYb1{};XP;O&H1Qv^VJcsvE@z$lzHcXBvya$?;iaAG5)OM8z81<2Z#Ih7!!w_4fQ0& z*;u}1OJu?B12X~)thaa$nw^1HT=bWgH(+&o0g8N=r-8fGBVDNEC)3Z>U)f{d($L-a z^yH6z*?qJa@UL0^yPPS~Bm>Aq92pTnRIp>`j16kq+@{Sh6z(K-dkSXt>rjG%{ao`)QH!rc_Wv8?k@nH|?)T{~vP3M*x)yBU* z;#rR7JYKoaHV&XzLHkbYMeXqS7wH;i@b+kLvF0BI@ z^d1D*>Bj0sJmtrv&SVc&ypSF{=XfdU&*{S(qkuH|5R-18Yhr_(m=}Z(oY}<4C&v1s zjsrP*TCji&6!-dti{5y~a#f}XaL&k#Z!;v`DS%!va1H=~E!Kv5+$Vsx2B6eTha#9x zN7>C4>)rK-$3=GA7)PB7gz!A8f(5gO!Of!-5j($*S=KOB`#l;;6}s-|MU~a2y_c zIU()9ktG%%Ho-jLwS@r+^jou@eH6$t&7kEQ-h|5GQ=cvYn9Xvb3jjkvyuX*7?-1bJ zgx#UW0&>w|XJ_QR9mk_9I!85!5;Jqee27yy%miQPz)Et*#vz_oee}^tngBiR?3=na z>uBk^G_Qn+S3T@zFuwB2_^h~wo)h_C(F^ki)T~`euMm}@1S39Y$Ng9;i5n}Pss-1z z0>^P$CUNbOFNbjrT1|>n5K!80wSm$>Ne7{rl9JP0lP7o;Oo}rTya7g?Mptx&pXz8& zK*#Ozq~u%cv(W>>2M;GMRt#q_xN&?RnsY1mBGS7a4AopR+&ql~{6b2YP0oSiXOuzA z>_eynR#Z<$^AZRV-v-n>J&HJ+_G4jX*l-fTaz;s`&$5G^qiS@e~HeAUlbVo0Ify zV{s^K%)^e+0P2n~7g~)jo=ZxOn{~2MWrq4LOTcg+FmPzA8g_~tbZL5xMx8MYI5V2M zUBb3NySkKG7(c0up1P1dcS1=54$=9-9YSVyr``7Jm|dJxw6Q@NVJzTHcWrXf^ni{} z7iJ{U4iuS$r_Hwo1!>$e5$~Jj#b|5Vic2rCLyab;d#Rz!$Mx^aNjfkere`y^piW)vZB$YwE>(zD z?1X{RB;8DQYnEqY8EG~JaW)HE(@`z|HVIjz4KS?5UacDOj&pQjK*OMetqg=DdNfhZ z+DlkN4c?Gee|}5|7@Bn8?$}mw0-rrH)ZT-{A%Kgy!hqMAL*l&{A<>?f8#7lC;2s@ z@zbYYK8r}q#V~>aK}lI{wUP7P=Ap}-{cg&+3*4)AJ{Qo>Q>C5sGOHg)k#%@<@hMd2 z5Ac`^54z#qe-}g zm*Z8HN4xMB`+|>iOr7p7w4RmIEtX^LA>ypCK+mJ{=aVD9bUH|f1^{5Ht{oM=Fq9^k zqai#Oo|z_(pUayogLWCz#@^`8GY1ZM?WWxIMV($Aeol{gi<`?azmr%GEGJjoS7Scv z*=*)r0cwOMK1YU3<&I^3Cj-9u?q6QJmH_!)(V8~ABh)${Xd919b7T7AmWD+Go2gjd zWtBhuun`xBH^29rPWjW!@?Mxx$RO$7J&i3Z-~2`<;kR0`4Y%*@tG}~!Wj!0^r#|={ zr52c*&G&Eg7L8KXq}<-Xx4Ovza<+QIg#V`uwOuctDTX zj#tQ+XJ6tYbLKG~1r)$_1p&qZYVmLVkD)i-xwL)#Z~dj;`s>-Z@$;_~B1~a`+EFzx#`iPw)Ngi@OiLEdN4jIvfZ<^#{Ki_mvIQ%TMw-VNjhpYeVeq zTLaHsAL*DM9S#W%Zi#-(B|i^rll~xtL>D21HZ-3ZOD;Cg?1j-fSUq#j($1j{cWJvb ziwgkeI`P(LcOIBn{xEiQnHHhWnPIoPpv}Jvjq&n%coD&{z>@| zJ;2&db{b#T=#2LSX2(P$2#f`uh!r7O>01Fi6zMWD>+ z?(yy1$^ABU^LMwdIxlUdhn~u6wt($FVx$g?axRaW$g|!M4ZTK?V?|Kmkpt+gtu8bc zv2Ebku6mOB!ngYq7ut}sB> z2$hWj>{2}uaL2JCS{1aY=5;3Ol^19fgx(z0^Okh-%pzT4t{jav6KAT z*cvb1Hvr{MN!Y{k(d31Ch;jQ3(wRVb0~=9u-G6ofG@APX;asc|zqq zi@I55CfKN#;Gf8sg=>jrllPb4k+PD*#}@mLzdM#f)3ycM53r=skGhqKGC3mL!X~D< z4$SzWmxB9+xael* zn!G2le*UO>hBvy9e>R1ufcKS=Qj!(+fSw-of{4eHBsK-~zE=;Emf&5SAy$jh&58l`&RhTG@;(QlkM(tRzJki`gwUxdyKXA)Y?1|-hJvu$J>XawE#K$SOiwvJ$&2B=Hc!CjU-0nGg zv^b+eR#Y;tu4jFBa4ZL?Pw~MYv{-_XZAz1LJl8 z@Z0UtCnDeJfA--MG*r9ly$MV7WG6mNLO#v~PjU_O@zDc=K0;848|P?aBH(-S;ulz; z+!=m_{i=NE6DW3weekR$md`X2bljRg{{(3$Wj7hd0`|_CLk~F(B2zrY8l<{o;^~Ro zBLU#atmK{K>=DovA?yaUow>v7%&@sWMq&mqpk#uMLl@(D zkv?ph7roK7z2omJg#OYuZvWoj`R}gZ^c`K9yeq{lFzR<+L8?5oDJ(|=q=#Mo&ff^S zA=mxM{=fOM7P#i=MEbhLKO98d=kD>E99s`Js|BVO?T)FJzxcAdVszEt``%xVnLl7| zzY(Y}bn;2Am~@IWC}oKWSVs)TowqtF*!%E0Z;&ZF0vKC9JFutEE)R*Xn*?4 za|Zxr-kLJ(O?9f*<|oq#4)D_mC0FjIp-3!EeqIU1{N_HsDo-pHnT}=8vG8T-@c2<@ zptGG?AO(6l%K=?q z0h+!yCv1?y4yc}$h@#;=VJYTsI*^w@>!)tVZ8xioa1@c%(yzHR*=M-jnJCN#+^?n~^}*6Q`|ZNELg{Wan(&u_lBuoe8~;!Xed zrEO{9o0l)G(isC8^y1c_3Jk|U_#uut0!q+?LBQ>|6JidPMnpe69vj&fFgSC1;~EM} zo;nbdJB|3z(YFdF(si*fra(Ipg`XX}vN`LV9!Is8d%-9{`^S{^670AzpDz-*0hp(x zvEZ8+2+9nZp#ZR)F^d^4Kmv@h8NCQi!KkZh@B)y-AgL|xRS#v-zhDCx(486~9RL8| z>=Rk9MF>9(z_X0gQO4ER3dj++5EVYbZW;|P8AJwz1)Jb11I&1ZD|PO;-EyYtsD`-yOE{KqLFk%! z3y*zhG&Y=;x$70EHjub66G{YaYHcs^R>OkF29W8yd$tqjMv!k%0vPWZPUR;`4;@92 z4m=WCbawB&w)BScbmWp`UlfIyaYvkMxrorl{gU_t;ZoH%GC#bI%}wWwk#E9qLS)iB zO}{H;4j?+lIq0RwJ$DpWlVR5wlX>ld`Sgx7?==R)fZH+b^!bo#Sy4-goQQsgZPFOh zHCcS+5ZO&QBysBe62+bQAzpgIg@0pPj0f!y zT;1NnFv@~%3z-%)#pAcHjBnULd#q+`MvXPb`oyV2wAUC-k*9vt(S&M~v)AWWKZ4Lp z^>HJP&G#KukKuA1nN9{+#KTs?Dn(@{T;`$^I*_% zfUMQywfr}*gTgVTpwB1j5**Z85-H7#o5*mV*Bp)lX)o!oxua*`WRuw+dt;2l+eaRU zfK_|O3|6t>n8~B?eW_*2UoIAiN4S5@pZr)DZ zBS)+v=(rgQ=aZqkgOOLRFrdd6onxP46E_e z(Yep4T*^@&j2WG;UP_qH0N16?KFeobkR#Xgi0vK4B(1}wmvvLIxnuV#+po&y(8N8A zMouptF09A4%Zs6I`QnCd6mW+JT89koeHOvr}ceuC@d)nH80q-{N?q8e{N7`)V zTF?n+%LtikIZrQ3likSNX!PSxX$8iTuUPobXz=9ti+ueO_D+4!iR#PNBX=`;XhKg? zbLa2>{@}m-#_Nlhv~Ht36>YyiJ?HV`&8Pb5zU#=&zqRo)>%aE2_w1ZV`p2(^7^B}e zV~hEi|5d8*z^ETgw$J=ZD6^>T(i9kqKxi{2EjWieD-1KSEhR4Ni8u7Fl;{=FL|70f27qv-LYL@)EtOTujm;WabKgN zf~Mb&Bhg^O4Lv9xUjY`!hENB_L=hVRB)+jpnbSf|%r9xQ=|LY7k(f`v;(~;pQ;=ug zGfPhUTv*`?xM7U0-TJ}zZrf3Zw%<6N^mPzWuMd)fr~h2?GtSPZQ}-48%d3Msvy-&j zdE&GX2NXJcW?zcF4!r(6WzTzzWIw(2xc_?%~D-b}ycaC}I zClQ-60`KqNJ01z1_SwlyOpCm={Qe_#{iBoTGd4ZdOE@;mjFWx9vN)LqOrBy6vpg4ptW~RZHW5+m%T_ zd!jFtfzb3qM0(5m=13()9m_i@Q`4`u>-XR{m~^}4SX3W%U8D>HjsdLQLwBVM$E^RT z#0+2+S-09EG z29IILR83^{qVpgedcr+EPT}Q`9j6h9976+^ZM?OfAP8Y z$H&7}6qK$IEA9YF4Lb4da^aL*p2}^H?B)k_#2Hv!wzprs|6CAfd~M$%m(w}mY-~GM zKq^pW_q|xu)9LDEvgPBW5)3LpGjS(Nu(wg&hf`_CB%XwgntPfAGN&N_>Jmx*0YKa(>p&+E z1hgg~0B~3IIRHlKNE2qDEn&_I*1li?wx2=Vr-n$vf`T&eOqd~jM9&GJr7>{Vf}ks4 z*Ty<~>BuDSrM(`0MQqa(Ey8EP5+K7Vb2%_@EW!J!n+@R+$uSQnj{<1WAQK0`XZaM} zKZCfA;+m?ZY`+8FQf{f;p+|idnh+i7^87T_$qtg$=}^NxqLXV8<9seQ^z+6(T>@x! z=-sJ~-7c3iby`<3`^=*IR}eh~hQOR2mg^DsU>@jJm59EiwH)^sj48F(Nw&S|es7?j zj?N(i^62%EwYoajzcdrv+-SW|b2DQ++#67Ci4WgItwO9!EMU|0F^4npGnoD4d^9#V z8ICOjbh*iP?6GxZR^*7XHwN1jt@gBtz(fr;EbGNAYYabwm)PH2 z>`T846;B`y#Q&_AgY?J^A6n0Sd_1)nu5RwcM!9WWsOLggx&Yid zW;pj`aB~PMD!Op3H&G%A5oW!vx)@rl`UqPYTl4jK ztQE8gpt|9F-7k}jS(OoQcob@&T~(M9h$7&x&ojpi^qOEpC5(+=>#DRct8wDOemHS@ zz9u+mBee<3Wt#L=cL#r>CMgshs_&-^Bz|4)71?-5TWLEcDsp4hRO za;8W_~jUHOB?k^Tc&h+B^#S^8QPF zuG1skpnSzUzh7;5UoDx?>;mqOam3DZbG>7qXS>TX3nFpmp!8DAP9#HMKFIjg!W$%q zT<3er0C(tZOxuu@R@CAn?4=gq*W0 zhBtr{==egwqLi5pQnEsl)Nd{?Zp{8Ffm+WVH)etDksZv_>BHys_TkH_dZmM8$?q^z z$IALkcUdSq!HT(xzr)xz9*0MKzuUdmr>Rrt(Cv)6Q8hj8xsa@Rq}R0z-I~Fc-%;+N zwqk;C=K1(0%y)wGF8bHZkncBTDHfi0#SPz^#^!4DwJXu?2kqMQehd)Yw5}XO-C*A{ zR3aYAK#X%Gj2z?{*aKrmuCEw=t}bNqST)`2wWeA4n(7JofD^QSPM_fcVcDh4I_JX> z4Rh!OnSxYzuNRY_SiwBQcc1YCq64^J%h!*~$N%}`Uwu~j)gPWbe(}iHa=C|<<}12< z!S%1LhsP(1{6{Z7`0@Q~Nj z+%By={j&MDKY94S{)@Zs|NUP!p48q@jHpAI9^JLSm;h_&-1aJlFaOF&`aP)O$q*D0 z|BG+O*J)z?`*9|vfF5P~_FgY06U-ghf@wa14Oiw7Y4!esb^ASO2aFz5o3E;!*c>&>z;jUqH{b{oSKENYXkp zw5@p5hzh}%Tp}vx3h}7L<4JQHm6mHWAt|ua0pA7dWXS^*-~`Z>R#d=$1k56yqq|7f z1UA|ETsyh;?UFqUesib)c6Bfp2N+EImP{sVk^lB21nA^8$ZVWMTq$d-w6qS>H)iq6 zoakPFZ&<>+$f$4Z_ww@jQ}53%^eZ|b`zf&CVcAY-2Ms$bb-q{Dd{~luPPbM4{2{?EA-Mzy3 zuEX;)pawGIHUJhQaP_~^zx}P1WxzcXUDK~NfJn{(0CRRMWh23gs#Cz4p!TzkYT{6y zRztD=wns~Qn?A>U{X8g;76NOraO4V-_&Sc=zKZh=oG5^ZzrJMq62x~jokYiwWqX!qqOd-p%kv-re%>Y56k zkJL&h@QZL*JNV&G5ehb(?kZsCSkQ(zKbKP7zIID@s-V0UZH7C6xS>gndu=kg-gcY7 z@BrA6SKNCWM-;r{AcsIFiyUfetZe~2-1a1G$~|ewpeL(WKN$%_c`B0vuf38^h39B9 z!7DNX)PR=+kluc&K!7sNOTcDiTK4-0)-b0SMpyw=pUzAfWmEuFc);QS9yql`0Dwpv z0%nRgC}?a($RG+P}-f1*c zm>lj*xfkjzTTFW#Qq}AC6?rJz3=an7gKyNob~bwui(jMQ+5!f(c^GiJm3?9FotC?u z>CI$6JazixbYDH#7ubxCvVv})$2>!3?N6E zFD6!lTlQ2h_ce&hOUl!Y5bP;NpFrqM2^y-QNm{u+x>{X5i^>Jr=Z!s_vDA$9Y~9v} zg~vvO%Fq?EzI77qoJ-&ibv7p6cjDZ9s$r7r$Q!(wLTIhni6#tc?J6?=CUx{Uotalff|U(!yS%4(Y;+XZ#S)_ts4B5i5vpRt5|7Q?j3>L&C}n`8a&3>x-wH7XlQNY0gdo_$M+I z#V@!F?wSncdPhwJQ~-CJwp`JAXK=kj$6@&iLgk!wLzrvKdX%Sgft zVL}BDgZu4O&GG9rz@Y7ju1Q%iDI=mloSAt??JNhPoGH?V`ZWtDrX{|xl73ToQmSW$ zbNZVu^vt3F@0QSeYsfiKPzCwJ=W?x@@2 zL7ttoBE0b_6VG$lMUa($W!NeiXEPU=uk~6YJt@~%TL9b4l|_B8Fs~X-kiMp>bH~|($jKh zC4u(bl~yN@H=6M-FqWrymakwpOykXTG~l9S!R-_WWw0+bV@sDa&e)rz_6t`qD*i|g zjt}>1o>=kuebR}mG!4orWX&7yKb)T-e z;RJj-0xv~S?4C^v_=LSQd_qInC#dml-}scQl_R;|m3+QSvB5YTf#-Hdf9OItZoj_b z1$_V6Y4Z;Lmmj!XfifI}sU|=Ir~(*Reb!k#cWK_Kzt?F7Bd;os9#!Qd)s5Yz(jSiwS0hmS3r1ndc7e0s35c z>Q63kg##oSg?jk}bZ(Maz0XAzr~w9d7y}9H$QIX@RP;v2Q%qp8KLai>4l(gn`;>CZ zG1flmfRw+@kHqAHc>*Uj9!&yhvsLy3;B;YBbg*+p}*8FVDZWEsP4GNcc@6 zSmc|%Z!HZ`XFzo+vTJ$yH?m>6eDr6($*vG)!C=1&mCikmSggC0`Ky5|;!{1bl>uOZ zD3AX>^UbHrdSr&0#I24v*ok?L_prH4v{9^M3j<|Hyr-e%KDU>6KKq3a?%BGlVRd@Q zH1gLF$G?s;H}M3WYs$p9gW-#nW^71Fl(601c#0LinP5L2gDXbFDTVcZ6*nG4Z(MOc zI6jsRskW5PTCEPXrEMsMsT6PdDon>lLZ$+{1Hr+*?v%SyF+6n*2pDwj(c=PG9@-`Pz5SMRaV&d3@q z@BL{WM^4^~$6re558jTvoaM~KtKqQ>1;1OG#4dnGAOEKZ_kLN>654pI|Hb#Tg)6JJ zhT5O^++AS9%^gv`wUCK7P0OB?D%#u`@;;>m$n(rJ49%!me$&W<%dlIzWi!(QJ1pr+;U z3BpAL&Kp_q%JwKiYE{(#-B4g`XHiZ0InmohU5e#VnPc0fT<$H88R7Kfk!Qe0{}gUewCn*hIMFb-{k{@*%_kh(vSWM{liAXSP5Y-JHQd#@;GCsJ}@)DO?W=%bpjTj1L=^z2& zi{X_ewsR=0FPqK<{M{3=`6@Hr3a(WLeSxt3l~mZ?1CpDq&-a3{xls~x7AHxHH7oMK zp}*oJEV)Ar3yPhfU!dikA;B`7e|S}3TVlnWb_d=SXTb+0k^|yZ`v7PBm^yXLn)Hgv zKgdZwMKCJ$IHe5Riw>rQPU;N?lm>S%M{*Zfh;8Y0gUZB@Ry2~hoB!lDfNR4Nf~z0+ zLI0)rLMS%-wEbzHd+U;a5LoxWhPRF1p~NqOuYK3JL?uyQj7uj>Q|G$qL>+BC?Fz1` zB}d1yMa*@Yz&b}BN-yq^>}&)$fd4!Qd?PeN*#`FQM)Y{{(fyG4+P7a{ebt2hS3ibT zYbGFDOgQ)r$O#nWdpM z9RROV^wFhJxC1#V1#{&+B<29V6mJKnFJFo@{oU_m@o#1TU!}__$3vmXO~Qxd8K#f{ z3a~EdL{d60&kYFfO2N_0K4F1e320&$BoILd)jAKDx&xT67slbCF9;21OPAZ8bKsHa zT=R=`z~kS0w_8OXWc_wB=tn#{XtC$ZuPhY)PhO zx@W^ZD8~*mjZ4xNp3%B`aB0i9s5kw(^)o8}Zin!`gKs$R{m~hB2{I@>FtYD{+wy)^ zB#H}R6K+_Q{ar^I*sBQWT)10Qcc^LLzxneH$^X;$e|>Or9CU3;GUzxx>ud)HZe~4< z*2I(AOQ>Gq2G7qu*_z($<5u9&6{+>r<5plkXoSN&VrZc-AaQR6*`kiZtTkm9ytq&2 z`L7Ug*~_5dq=@zPXAlYL>O(IO7>S}=o#!frr#!Hf^vA6Bq~?!^6ELdBD5`rNQvzaa z!n~_!2f$#FoTj&MZ4h^R23-;NT^Hm_OEFf1z_lk#5R@_zOCyk4B`qKF&9D7=@#2Ov z_jve(6-IKSIdi;STfMjh8R|kHr!IDWVo@J{&`Zy%a|P9 z#KY6XMMf;${rJA!55=zh`j6@FIY0Y%A?^8wN}uA>`7iKSK#EDhFz;UtG2M&Of;&;? zcsZDk)k_BEZdX;tBXw(Cq3#$(>zX%0Ze*oa&XR5=)0iJv_{QSugMj;c;9$5KJ8!II zoK^Yo9zUc%bJZ_Y$4~ypfATZ=ClKTsOvn3aKXGH{uhRbIAO6RGYU*p-zvZ=^7r?RV zl!^0h>+a3f(AjCA3wkyj5vl)*m|3ewJ#EvJ@a;UHcY(442+Dvq;&+6gf%)f3wstFz zi#<1Hbu?L_;CYeiOd){PAP?;`dw?gk*(NSt%?OISQl){3$gwFU(>j&|p=Urm>J@0) zgr~>-$l5$|bOKQ6rzPbz+^Rg+{8NX)M9I;Sqs4E}yZh(fp|>x+aC-1yvcN~oar0u% zy>u#yD$v036&H8Zivs{y3KD^74+rG=>w~lB6l`G#T4U~D_!)S_MQsEqm@lkAjo|Ja!a;)nZh9;gaQi-R9zH1>gBO0AT+2=*F9m{qz zUyMEq19Qx6PvI%}J7IQInZSJ?=WvHM&f^M<@ZoWbeg&gL zO6}2m*A@41w6&JJCN5%w9(8FTpMhRCtIwv+ai<yRw12|X-s$ZKrUEPwDnQa>CkNnHu^mbA8EO=j@y406;FnLzt9PZOm z^HMVC{z}B&n#5Yb(K4Q$`q{lS9z5cL9-V&>n5;eYnU~Q%ev#b62*U}*{2T3#9q;sl zG36t-8rvmSEJgJhGv2yT>%Jw?!L$4)v+im^jb`Dg)1C}T&)|{U&(ieat`GvuGieYp zeKUz^#ZBLR*fW0~zGxlo%zNe-BwbeymKyO#=rH8K`nIOR{x*vEx7}&(Lbc z5>f))$4D*r!X&nRPh>O%9(}2K;}nH0HpcBh2@VCv0kRLJLp`BKJ=_RIE{ebLEPUX1 z6WL>~tucVH)lQsW-7I#82$JW{hudSHoc-#+*vRU-V%C6x0SAFj%ZLX1sSpXDzB3c1 zvGr1QO4}ijF=%tcY?e_HiV%*MVAZWbe;Fbbh7T=hy=T_w_zb0&urJ{Ve%K>sLj;C} zxC3x?&xgkK`OuMi;EXsXm7GoV@A^jBYA}=9lgA2$-^gFWj(<74Ip;tkH@G934xFRD z?wcNt2f{tIMXg&_4faH>DhSZku}gty2U0nbTm+ej8`YX8&J;S8IoZp%g{86euo7jF zmrY`%glMbNE0S&!O5&P1lM>&;%2j_T1-YGz%F9nv%udhNWr}WxzO&V=9G?k^80b0z z`eEZ(x|nYzyz69-o=18YYBJFUDZ~(>J$jbMdi-FatxRpykB>U_<(X<@jNXA>4wH=;@$Zclr5raZe z{F;vzNKhfJI^JtFNfbDML(!w^GyDSkerS_uQyRjbSshML^(mo_P z_RGAofCRkgH}w5a2fgprJSk@E*)iRE+VWV8$Sh+X11-1@dD)d9z5PQSFgr%+ozK(2 zeBwlOuw&P+%s&yQe z=c}!KA$T^3I{`C9M~hqRiukDVCy4wv-8Gkb9o8IttL|UVZc~|@FuRvO2tHq$0-r6$ zLV_Fab|~YQ{X5Zyz~tP2?fWZ#_}~@DPIZ_H+;3L7#PXKVzIz&!hX%GVAL@C5%fb3} zhu^w>`*-Yj{we>XFWVe%h6BqSK)oOy3auUs3k!TDBwf!$DnV33{Sv zw*uQ)+8e$l{u*BeG^h1-}rl9kHms5z~N{! zq_s3bc@pQ%QHXDECwoJhAY=$HH{>JAFhGiP)0$ya*>jcujr$j~Sc$e1wk^xIH27 zJ$p&?qUQJJqVk2Mx$rM|BD?TB^Agaa*8!LM&$VVS@-Mu-@25lk*>|XA-d{|wZyUO& z;Qu=Z*zSh6_3494>wk@CJc_S)os9>((;OS`yf)UY6+_Y&&$-H*r`=a~3bh9L7mC4W z9d43~5u|ncv=>i0Jp?|bM|3~Jxa4RL`ASn(uSv+>DnVPE`|!=%qWQ^SA19@z6C`F> z5ufBS;`i~BjHyQNp7sD3&{mD$pl4YQ)r~auKE7$`2WnQu&bR$8dUz!H-IM|L$S~fk zrKucGx_9Z^e+XJ`*jTYo_=zESDHjr&;GJF7(P%= z{^ab*y8Ep&12fHl6&l{@3T-eGE5uF zqzMH*9e@}P=fY;e9qd)c80_(?fVWJ&9Y9OBD=dUG_yi&s4hY#l-7Ts7;S; zn&4xWqj?}ifHFb~=ST?XNIJ+moxGdWnis#G0Xsm)S#x$JXpZt0OEn)rH-Uf@C=p$N z?gLQl5kjY6qdP=|a87KR{hDg0maPGM*`AXbME*>BWnQLtldqqcTPc{8w zz7m?IPXYzBF?ue#(lk|kS4nor#`9@E0$kz=Bi5K6FC=jnuNhTj*O6Y)WZ`T`Qg&5k z;~|S&DxJS#6Y(Yg1sI4Ox;%k+pX>V(*s0sne8_qVvoS|!AHkz_XzVVslPsj%8qdya z0I(?pCIo5emz4>ap%1~3BUTAAGqLIydXUrU_>O9pXU&Zn^P-vT$^bY#=+N=h(v<9P zp~FMonl+<>$C1-V#}+oaT+BY4H%7DamT<*L@QRJcuxT@4u%b&}+|?_NE0n*17P$wO zf0fKY4!Te;#GG!0uF)rpZkQjg&X&sY66>$bRj;AB<%W3F)vu&e0uQad5z@~N1Nl^| z+;s~6MmpvyR5fqzpkL2Ad#c^XNd>I=VKm{JzbkRB%JkjUn(lb}q}O6BdAdL?PlFGR zNhahZ1v7Mo9-XuTL+G9_nWEcU&&A0+O~S+JNq8Dc<=nO{4kmg}8ylP8Vujw9k*pz{ zc4TK})FM`=5Mw8}M6UzGGM;h8XP~=~sZi(Ov$@#S^9ldVefoo`aV+Wt86Ec!czxul z>K=(0*K2vkFE}{}3eX}S5Vq~3nRrQnk&YNS8#puiP&|ot$x1u5fIPX_hPp3@rr)6} zOJj6)RTO0_$~*3Yi{G_J&;9FDq8=J8>-f`=KQKvjK`|1{sSr6#6q!zRrK*x|hN<0N zY5G8my@fi?`5|j=sij|Y?!|MZPF0s5rrI&LaX3E9On$4|cf|9j_>f_U@xBueUmRDb zPKSSbfm6B3c%LzoQE^n4JhR`f9p7~{gPEwCT>kRX?DYqe$#y`vkfeT3){$x?xNT3v zNfMfj(ZaBo*t&+*&s-pVzB_a*ZN)295Ftu_3=Ty$xz>pejHPsFf*X!H=i^7=^;edbW+KOVlUh3nvjoz~(7vAgaB z;rN^fT=71B=AFHfS@_}or#W@8J3dG8$@%iu{6~XYPzD}lwM)bKL!>bLctR`!ze|0q zy=R-gIp_MQdOD1jIwyi7JkYt>toK#%;>3RZua0R z(4FcoN1CMgnQbVzMC#?Gx!>x3ey^IS{?@|0mW9rG@qTvzx%^f885pSJC;0sQdke7! zucq>UdFA*k;%k~2Te$93x^UX=I>VN9NVqe0K#f7$zmDzAQA@`*AJir@QATy-f9cV; zf`8Q9#m@n129#})ZrjJ5zPrX!Sm0KAR2`6m>3RJcu<_286@ByFm&((>^oM_|w?7gS zzRQ?v)Xl!;EiAN%!KR?r^toX9*N{6L;ye4MCQU7Msk2nN%%v*1k?8^C2Uo)C$n8Au z6AOOnjQL{KM0z*aS_a65tEq11^GmBop(LditFy2TGaM}tuwxOP7qeHF!mA{&!JdBr z_!8T{bLm+$o7^8W2ul6luy82hTkc3bVmhxmr)2EY;ABtuvA3N%e%$$1l|SiJXWU{T zRB!+OcnnQ0yGgY&yfA`JQj{HHS^{4gw!;%&{3q%KeoQ+Pv`r?z1gn>nH4p_BNdK-= zb3bdA2Hd<@gJ-LqP<)9|8+7JU>d_}k@tsIIKm;ridbPQqAEY5|E)ChtiXE)J*dhU-OA98zR5aNSR!@umr0+G3|LJuq z^ptZl;eM~-1*3$cdEvjI4)=kKHtdX(0Cn_IVgYjkOqF1An4WH_O8ylg19+v>3{(xV z1$-dcDHQpA@LqV)nW@2luzSxrjl>J#0rA8-llq08lXO%Z)rGqGLERz#-mtgU-~H$9 zVK_dtdQU$3&k;(KvJX$;Hv8Dq&XSHD4M0gReBv$$&(Fm6r;n?>9}ccf9p>aV>E!3v zmLiNJ!9vy-p}C|00RR9=L_t(cZgdEOh3wAtO&>n0mn{t6$B}4bGlopuxBvqqMz`xn zpQoK>uf!$dJ;8`x)qiJ8L=q|Zu@gCbiY?s{> z)`4orLqw5~yX%1B(@&q1xYKeXp;Ob9nDfLv(~ryCyPQ2>u%R@P`f(InF~kWvosDi7 zy^~A$7B+{?K!4$a4A4PcfDvMXg48i)(e**rS@!V05g8~4%9*R^0(1FiWF6nl))pM; zQ-)q6^nEMs(_s~u@k}qMc;^RS zl>>OQ(VA4#iOd;v&uQNb!C`gnpxxENE*f6kuXnHdD1P8|1^Q?%t_!6|I5>@&&gs!n zb7#R;YReb+>5O&t$5o8VhaHtVGq({TbhN=Gu=2bj~wi2oz_kH3IX&E%{yCG@zZn18g}#a-RY{W z)uCRpy@FGgYTMUm+x<^~@H_HJNBF5Zyp_l$mGRj`g6y0w21w78Kj%G|&S;dk8Zlg_ z!BlwkeCTn_7oDPyR>o%j_A+#~pSW6WZB%?I8c~$bV(NSKzBfLo23}=%Z>ScpWp}%i z4h|(kx2N^pRG50wqg@LpD}1~ZZY}e{dTNiDu08VgZ(@35vtzfA2pO$TM8_Sgc^(T% z15!ofcq~7x3b9TJ;$seU1NxQp|1bUxrrx=w`wz8$&SFbD^ILE;Dtfq zVvKA@qF(>W#95ax65{qb%(6zIJxGW^_!eCD@{f}D0n3Xa2gY>`Pz zj&4_in5qW#*}f?U@~fskLj>30sK_or0r!5bj@s&k9GI9jJkn6X@_2vk<$`?;ApfK=;Kkr(&=4@}=yk!r{ zee7yHgD>DcIdB9$i@DZK{-Qon&B!y9-AeScI%Y2Ph)%uofRob;)3bxGViA$kue;9$ zyka-lChz7=)x(dg4T00q@uo8e53)fWT1IAR5gJ0 z27nN2sW0?6lWFIi@WY8vyd9|H%+iH7d%o$p<>kT8>4fz&#}8r;!xF?|v0GM2Usy^A zLj3#Nj%qgW-8eFUsc)|fQpXkf^K)uy*gl_`JeurQ%h5LV>$04zF%sSF^(&>iTuwAo z|E5&fG-i!F_9s1g+U#WixD2eQE1BE>0Q<+Te>)wlcoXPXkm~v&N-{o9;C~geoi^^x zOIER9YOI;L!Zs>E!T?4_G(Nnx5ZVqP>-EH=`n@#g^^$HU&Ql7GSdO6 z$%M$DorEWUKt0CK?Gwl1j8T`&fpqL9@%5Q?7ttttP(Ox*VhOu=ISym^WYr+O=YG>e zJsN**0+h!$N{-+C%q)x2^AMNXc#r-%y!LAg`W-KDy7RVX9p3=`rqWd9Y}fwyX}$R_ zo9G6Jb!O=lBdzD(=Mu*ndKMd}eA0;{>L!cS>$7|1 z-obHk>dhsH{^2L0ENg(%;Y6NZUtOCk`jmR&w(`?cQ<%W_CLj$YMA6L0u?ys_t%Xk< z|9<#*qD@$w$x4W=D>t~Whf43>cx&gM z_aM~e5tHuaRF8MfNFH2 ztmy>05P@Dy61a;%EQUS@MqxW`GW#0!u3YJG%Oj(uC!%}q7lSyu-vIy*0HB=N2E+~k zT&ejF60e2EdtbTvFpBd4PVwh}8-fQS6TtY9Ja2-f*?RJ7S z^gF9{_L?I*#E$^?tnr|pzZ}N>zK?|R+x`f;chYt)npK%3Ffl5jV^>EHEXr^MrJha7 zdaRkiLWrQ5h66>JBDlLszbUglp+0OU9rOL^gbvZZG1g{*@Fbz-Uop*K6*hw*PWiww z_(sRK&w!D>sq%iR=x_L=%H^*vB zP1T7$7-!u_ite3Vbx^w;0FtH&(J%_io|)sUVuB#4TrDIR*Yb|ZJM595;?KOMK zxno6jGp|>U&Sm$?e1CW*x!1HhvE&%mfrNmccw{P0sA9FLhFLT<2Oa~JyeIUWCxTU% zO_wB`boQn=Mf7xnSnh?+gOKCAN|4EZq#NL^bBrRU{s1~T2&J<2=0`C zR+)1o8HMG?4R$A>L2#qTJ9@%x8yT{}Vr)DohDDt@u^OzbMUyo6K|y%ULEOh2m;9fp zSe`nGo5_x5^Fh}X_~_%Ge{%E$*vMhO?l`@UX>w&ft!5{0KXx?N zaPFV}_kXXHUi*93zurDe3m*sYwf#PKnVi9 zHo%ezMeZABmQC`;6ktmLpKk{Ie#R85`zZ{xaZth$OW~(9;;lg(;8EG(GPI0a$r^37 zX)+{>xy2n^zZb0f8sw`UrKUp#caX>81P@J+w{qw^o_`fF16tm#Iom{r88pndZ~W$7t-o?-`d$LT!g!u zQ7ZXU@}mH@obhY1%L^%?MQ*$u=xJ37J~=qKn)EucO<)0Wa6w>v(l$b_wDhA++3zj) zJ1JKT#EcHh0llIBapd8}-FocATUzRXXFPt9hhA?JRivrt*@-Q~HS@W{yoPE_IyEf7 znaH?2n$44M*y#JUBC>dSadsrA3m3@9cZ5kE0B`M`aG4Q^__<5V+!7z_iQn%hS& z?@f0XdAQF5(ny@;0 zeIlHRmpOU*4wV_f%UJlB?@m3gkt)w?Ba6IMW1AsIGl3b0JuMly`)nLY3SK zWsb%LpF57`lP(iyXWQ^?uiPZAFfb*&f?*oeY!jeB*18=Re(Vc)3gF=%^D<)6hwXcD^!HO`$y=_HQ6n z;Ockyi`v8U)A2YS=v05UsJmD4|FCzQ(3z-y651$xUn0#zY@z zlgWhR@o4ud9XvG1HT)m{y?^ru|MqVKW7KWUw9taoonjl1w&ww`10YFKyz8w&>zDZu zQ0qo#E)G;3HZ=4}#t>%!6sBR`20;+EF)l2!;G>5Z%EWScEfK}ow(DYcruvdW^0~hp z6C85t_4Uq^ae%1arHxyQtL3jclEq$2lY5E`wk?H}{37E6ollWqBJwJQ=FHLO?cTN6 zsNTH30X0g~l|bAo@+H|KA%dhw<7|%=@63E%y-o+B`%>MMooh&^Siv)=S z(n5}$^7^5x>!0Y-!ZhKCj9L?Lk|%<7G6wWc2+QnI1R1n^G%fCTz(j&Jb&!xiToD_Z zl5znmYbcsF7>1cey(4mvNd}bu>(I)dGCLj;J=unU)jdao=z{!@BqS#BkdkSWRTyVRYawtEmrDkT{Up9P0+&7^5O zP!Bnn$uTaXX+HWi9giG+$qgLJLSj~zH({|7QQeq(Dgy0faJfcL(kHPZ5#vIqnbQiq zu+HKS69jj7Xa< z|FiZ4U)v0EeLSEyYe|HfKrSAc;uhMI0CWMiY6n!|8hXlk=aW-Mir4uuOb4CHQ&1+M zosi4z3r$3hufue<2{gQjLb%u=GN0?$#~L)ILksKyBH%;GX4Pv(T6}{b_GrQjg0A=@|$wK{65@-Syg=KKJAsygOpCNzglq zz3_{#5?t#vG`I=#H7OslDkV7QHFU%KKF9y`0z5mgh_tIKW2l*8tEa#sJIj9fxV9#h zqiptUD^K1pa1$l^j^*Z zC--(M$f)wN(b>bgxxe(ce&zT(civa_*SX=Ha!E9n;>8c4o%ru_;j7!z&%O8eznWbo zBTVae9!$NMaeYlX>-l8#`ju+=UhcmP{`jj`em+gkYUKPz|KT6}?(g<~to`xqfVN(? z`(6fQk|CFX)=DF)p!f!Wl1UGF7fm(N($WP71}+^z5~PSLP8%VG9&7b=*Z|;EQ*8u; zD>wybCqat7Xe5|&s<6FCfa4<9OD4bmdVZU}(ZKDoZ4^}LVCX$kpTq#r1gK_ra*=8> z-}4kRMSc=^$766(<`aisFzQrq&fGfFv37o%rN`-ytIxbxqu^cpY&u@t$plKru#b4m z4U~##bHU|2qSnRsD{vMofQ0oKG&29YFaEQF@jb}FE+%3$2rQcVu>?3uL}W(3Uznee zTgzU5&1HXyz@JA}0&}@Fl`|Y`B4EXDH_pUgNk^V3G|lG~B33xH|h*d|t$A>ze)mTczt?o;E>p%i&k0?E1^n}&QB`RGBTLi?U2tPt6xF0-P+ZMX&kjCjhkKES9~;H-t4Hk0!$M#RWv$ zkM&{5C-#WYikVwzjE1*h=M?V74*UP%0h*{GGwRSYa^Z7w-GeKK{nonSX=iF@SR_5e z>dNG`Oz zdvC73X{cR99=Vt={K3hcesd@QUe0PoLGspGGO-#qqJZxsz4$UmvM-;wXZR^fUNOa* zww>NQDYX-Dobh!}z3%Ix@&{Kt$+M8lC0a7Jw!ZW~|GU33uioSYI1mE(4gh+A;~@|t zfOG%-`RTRx1&kmvyF>CNrDYN+jAuP=^vsTV#foC^Dy#9`p7L_l>kuxWh=0DzlH)Fm+CCIGwM z!nOHfNTs$K(la0>yA_om0&dJVOGpF|!P7nWOYr-5+M=uEOJq6kG*b;P*2QOL(3g}; zq9-(vtmT1Sl*eujKZdWr6WgCPOiwN_>x;fQm$HjWq(2ICMeNK}SINZ4e>euz%iZ`h z4zpMzDMM0BaVA)f;p|QnOJ3*)5%A_F3(Dk=wD6mTgbW+Y-C1S*(Z>*4FEl71;Tfa*qD$}t6Wa9Bmvp&ivtC)Ppj=PU48oTN#@0AYy{g%F z$9l#+-48+FS?;av$&n2;kX*JoAZ2?@L6kC+0jTW+u^^HU4dhd5gvdUEA4IOR^+!%n z_8(t0ua<#N)5yEWvu57}Bdu91Hsl>gc83F9#47mI{-K#3`VUv_fgih8b338C1Nuhj zDa<90X92j>@&8P7@FYV|Y4dXzKzFG#!e61PJbp7=t5(`B{m(>MsM9XH14;$6SV?4! zII-FSik_pXkrL)j*LWBm?j(KIk^YkII@apEzso+BKl{9p>DY$iuLxn*aW1FE4Hj3M zw9Q7~_WnNWTnbyn#Jo=}6=ueq(>xy}BW|Iuj?6a9ygFLZ6L&u$on$|d)|a~eGq;e! z5=f}TJ$S3J{9-}0#+u_$ZXPK>TnU9a9AT_6WJfg0x7r@xzd|GcXFB|ood5H?FT9lL zrD8vh*h?VjJWZSi;S9X5seV6hJPVErYJjw`E@X$G z&teEQQzktIxZu<7%v@s7PeJ-aa~g4Z;iDO}I6XL0uC2KEJ@Lc4AHT$A((TZzPeV$- zW4$xc)Rmc&pHmM6Y&LV2x#LE~DNSZmC(5J)x4pyn>vtwX4q#Fl|F`b_7+ll%89}eF zfvmc@&mPN_eius82LCnuFZ{xPc?XkQ`=(g!ODZ>>JYLRSd+(Qj>YxAm@7~>QdpzO} zEPfaQmdb9ub^luu_N#yC&;0!Bf7MIRM3i{^<39l)7N7rO3w-f9kh|hD|MySvnf~qf z{?hivWMxscC$Ud{{~vw89zFl9KdHsT?oR*EanD_mwF5F5T$(LHKICQjL>34@Qh}>_ z*rh7;IC6ni3R>d1zaHgoxK}$*^{F$A*bYw2G-%061}Wd3dhlK&ZErAviuXHY=W`q6 z#xMTV8tl_Kb7r^nDgT4-Jiv%+)Ej~4kDv19SUBpRU8+#_M!KR$7ThYF2sikPkQEUp z4GV;Ad&~BRG{Vvb<+Or)lOm{#U;fKKEy&#a_nWnY;iukWAx%RDV=LsyrK_ogD51;6 zQco6elk7x=1#w{4;AYdk8RENW?^I!6=9Q*{TUa(PmOd*3EF^D za5YPo*B_M!2@QB2-A?HP+m-5R3bQZ-fpkD=o_iQS2u1T=ZtM-}gv=fH!lWIt$tji$ zYG)~o*|kmB+A^({H{i@<62*%hU78D{okKb_A0V(vXgVj;li5Pti6dbvX}6JW(lorN zDH&5!eJyDJKrOoKE1@9sJe6KgbVs{_c#CV-w*&Q4%%IWhr|BlzIZhv@?1~md;K6WuWs4>8pqBc-*rsc?LTz|Rue$xV)78u`qb)H zBkcC~286~WTS1Q`6gM9;)tKZ)5;1aQgg|VHlyXks?$ks1WqK&ZJUv*verk09XGW)W zMSwN;V!ZcK@~&Ca6I@6H(CPO{fXR}HPeuIFOjVQAq@tk-9e`IidXskMWosNxa03xu z4tR}@iU4^`1-py9(sGBzPrFoN-2>nCMYmp*9tP;x)Jl)MgFGKl-Ditx?h$BWZU7f; zYDmGMvY`howxtikb<>`w=OOmce-;^DUPi}#*niBQqFi01^00sCZOr1#qQdu#V7}=Z zMJdx6Ct-I_^J>0xyloJ48k)0Q&qhHbFQuI$zI>?=`g94(h;V61 zn4(;WW7|(0ZMptA(R;9%jqeWHNv7?{i4NwaJrCK_HQlW3jS5#U_Y@jN>pgNI-k!iC zn|ou5fed6-PBg*Z@M>KE)Z(CL@08>?({ zU_+|hi%RI3I}>6}yhje<7a(O=MxbhAmldJ6#6<)*?Dg<3<#IMfX&+jXw9Y@Pzx=(u z-}jy+e)dc5^3g~tfrBs4V`S`I%43vw9 z#$R8}i7Ky{T=$RR&$Qp00Kh9ZKOaWN%@IpBdQ-Ow=7X3wa5Hr+{PoRuxPqiU{cqOf z*XTF>HxqyOFaB@)OzmWFC^BdhqA3P{_wYQT%sS=m3jHumEq^olttHnW)T;PHM3ujGuE;;q6ftF_^ zU`Tj%8;FIrZf9=@PM15XvhHp*{03Kew%Upxncb(K%+{gSM>t~|kkuFlJf7L`ghy?W zOF}`7lcQU)yLWg_o)@>>t)+IevfOpZ5v zh}RdJ1UKUFPC^R3L-cD!rr7^l~)>W#-wx=+NK@p?czWK&8721h0`kIYH>(^x-H zB3%JOZ}+`q8uwITg;IuE9SFM#8qu|Oww9FuqQBFXXHL!mozYbFK}PA;C?;MJtUb1a z$>_E#rY;$$-9A#AbKg|Edm}LH^RVCym%?T~7UE7bL-4aTJU^La($d}?Zzf^TZe(1G zc9l+S9_Dd%Ybu@u088{{7PiL`*H6abu@Mo44C_9+gt85t!7)1}_Ydc2pPD5>I7)~Q zzjL1WGTD5xzHL4nbSr_^Z@Ypur@QuXP?yqX~d8lnl4Pjj7a3;7ymW6l`otl zKlt#Z-k@Dx^qbKb2IeVvWn`0{#wc3EPKAi&cAdjMbn0C<~iPclx< z8i%%^55hw$7%7{56Ao^f#<|%XL)>OJ7fKfnUtA=EE}g6;z0o7!D%mQ57BI{C>G7#R z-TyED^xyp6uj?xzpr|7%2Qz@;c-Q^*L0u~>sMAq)q5FyKc6e1Bx>xF;5{ zVM$P?IWI82@bPiq7CV7QvM+)S33Ba8Mw`FG)hRZZuIq#-=GXFn0>I0Azq@r z%R#+geA-(l6=Nf-cKFrKPn=4odrb*C`iSlO;7okjEp@J1ZJ?g4flC;?{H{6b zyfIA*i6cmdQHBEx9Q|Wm&T!G$xvaEk zw2jz%+^TQZr#9m!&shZP(4H*Sml$d&4>>#Kne~-EKDk_HmCWPvg}*RAy*G=5`Cb=f z)=*}K(p z09)r-$1>8%q5N`xCAs;{05CsTT`hDtwZ|zl|Insq@!pcI{re>YsYt{e$~3}mP{h1^ z1F`*}yJAn%*)L9hT*Odn{3TlED>cB4g}L#C7(Pc3-{8p0Qg|R+8wf)$0PmB<4Tzocr*^ zG1s{gJKOKOH#FB--N@Zs&<1*-j2WanxgE!OJ&l=@5?~>AfX6qxp^V;qj z?*|t+dG|*^;0twJ0_zQAAvy592tB6l9hohbKm%l~NRRe*rtSs;hHIoAS}0 zA`91fsxsK9jSdqr0?;&^TAWqIUijIN*tq%0?>#ENxbWtO`=8!EU-Y{@&bHp0Pc2F! zoRn%>B04YR5cr9qCf|yZ_rU-JO<`USjhD0Q@YmjWZ|ax+NK{t}-TJpbN%ch8H!v<= ziy6h@%Wc2Sv87_NXFkckc%4hkB42O`M)2jH_v`-7)6*w_m`eRQ7&_rmJP*Uw`-&Fr z02B8P6zLnhS`%L=i2i&=y!KjnM{oU5SPCb9z;!$|lOJh-cy-tHf|#iNAGx8( zsN(YFb`0qwJvnJ(7~BlxahtN_;n0ZS_LcR>sTtaFQ={y_ONLz)bl}jndp~45{0mOt zU-jo$<-a^=`ok%|6+L&P2pH2c=h?#XM*W}IQ&<=F9 z=ZsTuH*4oLfU!KY_{7}EtoXIj$MDOn9L&EQ{@c2njGcJSiQT!Gj00nvS#GMMHe3}Q zZNUS6&~|dm7-XrZ%qJng>-!BbKA-D(psPpYV^rj^BaDUkE;}LQXxY=h*2PYuvlBK3 zh~v}?k@PrHW)AW#fgr(=L z%&yuDFp-fL25K_9GRBYnJcz9L9v%l)YA)n0gytcAT+u6$!`%A(g-ak)+g^61TUhw{ z=BJhZsO1G3ITUX^9cQVN5l%)2kOpuX<{+6TKAsWCrqq+83O0c+Xos9nCe?K*I8urd z2GQN6YN$ufKc`-PdANS+t|)+ndPccQ92B|`;++aS=)JpmcCDjAQo-*M#&>(_&J-J{ z22W($yuK|RG*S96MiO_*>b(EI-h*SgsDZgz^pTm>Lt`(B`HAN2FzO-4EvD@Yk^`_O z{OCIhk{gu6t1Hk^@hNt4EsnTSX2_jrTN3vo*dHnR$^e(9-aukcW9-~L7Kr@x5~p)P!2zRFf>_Bi8TY`~z|_z(ensO#`Rdot0Z z%5kq%d+_M+bL{)2*WGL71;zgPYm27uQUvbUYD|m8Z2-WS6bjf(uQ+D?+VT_@1RThY z5j;MB>`YlH!*{GpOM`Jw=5C|TNYeLF94Fgq1c<|aGCDLyC65gZ8fKp03?@7SEleA= z4FGV##Xb!1ZO|D_0dEqRR{MYrcvGYx=t)zakfbRLS(y-6b|SVX7j6J%EQAhBoqEL_ z$p_m3t&H^c*lXZqhI|9dyu*0%fvPfoHQ1=pm(gA_VH_P~o`L08B%{!kAB}ZHT(4r5 zch2is30Ce0TT7TSBgu!=MRyP7-OZ(E5Rzr+_6vz-H~s#P01`jdHh^|+QMupJ!lx07 z!iM2k42T0=qE0oWPB_9v)Z}#A9M*zFrRGkw!%of~TTGO_Q}cqbYs=ej4;K00eySCL zub&kUH-(tvGT+D!p4+^SkNCmr_v^~l=@id@2z*<|!hq7k}(IRghvAy*3@V{5lk2l0IT5h7ly46f{cTk;mOBBFdo^P z)UOs!N_C1bk&L%de3Y%CcAGHYFnN_&5Ii{f(oXrd(80J@BQT?`<8C(dp~n~W5NU$^ z*q__No>*_&H#u9v!&c1+n$!*SO&>Yx@H>P< z#CHu;_j9Pf=#i@ixJ5Y5D?Q)Hj!uv7!5PXH8W6NFu#2W?yNvB|oTo?w=ASanc^!mQ zp|A6(m0u*%y$8bx4vYdtabt0(=PDMzxw9I3ZihF|o9N~?G3qFwzVWj=@&~ht^>w?@ z%stZ-@L7AR-6oRHYdtSFfumCdozAaR8@uahev$QcEzgmn*{)PTKlN^Jn#UvjGGEkq z5*)~ZLbC7i!S<{$)TSsK8}}v`%S*_EUXlrYynMD|(G?+@oO|lwu5$@%yGwRGLTy8N zrHMWV5GJB+%nk`FF@BniM>BFW!sKBAZsi%D65J*qD%Q-625~cYq4EQ=mDfBt#bhtG zFLq*`H*&+~@ziy(4sBfANN?{=Lo>hJoEkl420ml8RgrI2zSD6+1?^9Nq(-Cm5;;Mv zU%_*K^674I^wH?GxQ9MIAJzj=b6sBoiyu5z&!)u2()GDnwdC?{U)DkUCnwABWQNR_ zKbzJ2XAk#@ty|)fLQ>Az2Y=ca_*eY26pMHFNA?+jQbM*W``*2AB32WfKYFEoH8pFC zH*mivvjboJsf$Ogl0k6~)j(gdvGIB+xZ@0eiw=NU0aMkxjdDfo13PeNL z8$bQm*VdFVkj!rD!t$oEeP39PzP_1B`;y__4+pa_(3~C}fOz%USH$J}!gDV5;mk?~o6N4Y(-#Gl!N8jy1^Cd@} z3t?p%z$%ehwHGQ-R0wpsnEG~7o4FDQ)6!->-}Tw~VA#L5kQ^LUr^$Y_38xhz9mqYJ zTzVr~yRdkVGJeDkwi|5;P>0z8GhH9Y4n8l1f8=Msa9)dhjpk_0t_xv&^B6PD&>7a5 zkxlp|IOcUAND#xG9G`(yo^EvAN~sfeox2DCQ5e7K(*X-IH7Vmhc6hJkh<+0A0Eu(* zFkq=dFIuO(EX*Biiq8lU)(f*3Bt1w#rDvW1Key}HQm38*XxvRY)?ZxrF;HaD_1b5F z=0}CK&#AwA(lf3)gvh&ip&Lsor;AbqA~?&ng98H=w=lBkmLn%ZF@;AdFSb7_Edczv z3kbj5%ESGx;EPhOQ)%o#U;gJ;uAZaq$Mp9ee?KvC#{Gy6+fx)mG#i{n0)JR}4g2CTu(3A*fcAAUJ6@Ofhr6@lYp%s!tK(#ry;7ux)n*_tKV02s@7wrjl?TS^ z#zJi3awea(C?ylR3AFCHCWOf6&)mcGQa}_Mfi^@Wf~_M@Huxhm5Uq~7r06$erN>wx zA3blrOwO_p7HVNINwYI(-*HkQh#A=a)p+3a$!;f~+Po>IjZ|>o725W+=cxQ0f#DA> zV(ZHAtjAz_^6A6>W%x?q+M)4VCnVYPPdyVE*AkFEnglgRE6=}?8J7XLDv7KPeWWil zKCUxXtaf(MJxtqX?b0zTz$1g{ssvE;bm_6E_MWi*taig7X%mN~+Q_oN{aheyYa+KtpsIoutsP}d)8_BYD4R4t%A9TeaP-wn^* z%5391!H|sF;9z>~@(sC!&Hwz|0tN%oy2A(Onkujv0BYG4>U+iI?c{g`;83DEl`@UCS7nUL|*sF{NY8LYj7=XmS<_n(J6}G}Q;!M7) zT)6bW@Ar$p#4Ns~FNQ*h(!i-!9M2h9bFmjG=5~X8)zw>2UoHxl%Apd&_8Vh=>Ox1O zaf>N(dLZLJA*8S?(os6Ol0pS}Y1wPG2zWl>ZDFKth-a3RuO)^<5WLbK5LD@h#&A$?x)x`vNNsaGUcUr}@@>w?vnvy+(HWe27SG5F)ICx#PKOndyL zRN_jH(jUx8;`l7t-kDWC)BHd>RxtdmBHaeUO}jV@K>gA3d$KII^T)o^wx3dkgyqJU zm0H*9gpdJ$R+nPVu=e)tL~jULRIf1FF6c%X;TDkO-V4l1#JuRmoCdcy`m`QJ>*41B zKEG9ZdJ4WH66Z(&8a-Cu_)6p3tzdS%1T4*nZa3J$I^hr(J^;TH4J|7bx`>>y0jT%_Dh74HG!j@K9_H^DK3o{jAKw!?mXu0u^IeUzOSAF?46fj-1-y z_?4t}%44ZuRXxJa{4xJmCaw_!rfh!9&OPnFyeeD@g2yL*8V`7@ovG_*@rC|pgSn`S zL$ycQz*0vReZEM4q(qXzQMnK3PpNnUXtF#aaT6zziVm2IMA-|>Rk`Qxq}5>vaxxtX z4`604H(9(GQZ5tiujFgZf!B>;7tXBGwehW(e2?A@RLWiH-G!wYRblIDUNj8Tho>$` z#Ga-G3w5`_W22}!Nkg!mOVkFrgkmEATdaz~O(<);v1^~}8Y+L!7_6WvPKU8%z&58Z zjKyUSc<;LYMboLxQ-fMCtDnfDLBm;Ei}b}l!F6`#mmZHrF3WSkk-I&=Vi;XWyvhzO z$pbVd8-bS}*WXGDC}a!8FcrymP;_C#%5knwj%wMJFNAXbd1rq4(pTN%7PGwNCp>FT z)w$zT=Fx!Regr%gfB)i;8zC+ql8MZvLS>^qJ<~4z`Fqqj%l+U7+>OkGPU#CbqZPAt#sU>)reR`Pjd>L0b9m`vyz195A^P zP>CzY&vz51hoEElJUo2He+VxnfPEiral-1}(jb(4{g`Z0(@7PRAN`LB)m!Q!;dxOc zp8fXTi+cOFH7D8c%B1M|y&wF+Aq3rj))CqD8TZ3~xcN{2%+0wOu!30fhWyTQGV(oP z`huk~&IgD?F~-qiQ5wLy)UKlGG*BK)>9Y+JE&m$Cw? ztl(33^!L8}uHg#66#khJFYf*3i<;M<`^(2GL!dtBfL3nnR!6=UIJ_2kMF5@`yh^d9 z1+R|5&(Py57}GLmWql?z3pSwo)h-j5_oqIQ@&x`r+V%C0Gg8kl23okR0zfBOm{|sk zcP{C`{&>8As)woAsG9@1Q;OAOK#NKlM!4t)`8XauOw*X(Xgq=?ED2vf5kUm%oX)MM z2OpcPXM4PO*O?K45?YFyfR)DR+H4YEP1RZG@TCmA9SA+@Gn3U?<*o%VWM+m?hRQhs z8wQ8=WU+_PEW(%p0hmm;1z?f|MEI?_pW~z2>SXvJi>Uw={H4swkM^s1r#YD}s*^Lf zmJ)5vTNZ%o6!7@tX`f9g4wi~j-4f6cM*%rHm}i!_QQZMhC7>m}C8wozd^=Q*N2cL{ zCth7S@pJ%-bixsrtH6TBOJ>imhz?9z96uzZmQDaBAXQCIMw?E6PXyR~-bWd*6*y}- ze10KUtoyGJ3&qMq5E$#BH^L*DT+pQe7^@O#V7tC-`Cm?GfKwxk-bZcfl$y@TZ6|+u z2FCnj>nuyoGsvmM_DP#*GOR6HvQW0 zyahzkaO0z;VM|i7<;T6B%1FNhK!Dxb=AsC>#sx$YkYXcUDJ7l?XS6Dd0bDR^kmtq> z+6_j?gHs*sd|z^ueP1w|K6rxD^LxIjQmkgLZ;N-(A;Cn*(mV#>bZFN{D&v71pcJ&9 zn4{a=0EocpZ+&|I1`@7$eYccYCPy>|X0yDoxZE5}wn_J$PHQEb13kTNRm9@#!DsI` zulSNM2C^b;#h7_7g7)aW=KkOf{`iN7;TNng1-P1-)J1(5M0$r${>s%sh=r0VXaK|w zfb?n31oO`oE~a)?hbB6ipQb1rki$dM4dKRk&W)9a$PUgtP^=`hIJM6%61J}gkCTwM zv!@$*CxAUI8#rKVZqwn508|(QMF3@yrW6Gz)h+5aDyn+TpO0{TE)YCz+hFhX_}+)Z zr93HRX{(AG8Bk14W(OA2m>HXC$^sh%v9)gi@y>oCuxJd%C`x>fcljo+{0l+ei=MdDQ?{8JWo+Ns3bo!ZsV>BvlGdUZXb%k? z$0+B_$V6+k+3O}Guv)kUf3~@q(G^|SjT#s9qm_p3jkqeh7A*nzSNYz+PZ+7RcF-Q$ zb8`iuuWFPk4MofeiC!L1%mz3AmRTxYW%0^D8@hw8=Ot~;jAfSTMsGrS43q+7&1&-K zB@C{5B!7<(XW+3MdOHl4Bb}9A;6y-{libPu-156;)eEAzhNquQ_ErMRRxv7r_lD`K z$Y(C*i#5rjBa>23O*3Oci%@0+<@?>Nx-TG~%j5k+bslop#L+oQtz;2*)f^f~7C6xS z3%wF*ToRb$iFM62xEG9E8yM47sZ@@pv-RkUz<#{`nHU3)N=Imi zNe*Y#xnRlQ-V(gJ>>8VcUQ)O`^FEx3K5d)D8|tc})?k`EHsRMNmB*+r6-+@Wo@nmP z*!8;gEYwSJp`O9GQ>{b|lNh;u9--I$TIhSd)33s=Q63Q;)rm|JyvH zh}fAft%o(Af)8kJHdJ93(VWRK9>hZkLI6@znk2z65XJB#*z%r>5;R|+2gBuV;6>bo zJqlus+{10U^dS}W99!u?$nH&NR~he9Y$mYY?Pi~|y^7hH#gOc+>o9xXR7v)Adt=U232lp>Eo>?>uqIItzdk$lFq^d?jwIJJICR#NuCDIPCs0{KAgE z@{?QMOX33jtoY9_UJORZnlNgRKl^sNrI+C1#OU3)$eMvKY<|)AGgq%6i=Tb(dz|lz zdodDoKD@Y(go~%(!98fc@R>7C+ye(0{YRaHfAjzQ%~$`%pTk7GfA`Ke?+@SmwX2>C z#Ox9S-T%wKacyB`?(h8B-<{ahUgLMa|9^fuRzDSOQ9ARbLe!XaPeX#m_gL4?U;o*k zz5V(x-kbxfHR-!254YTr?PR=l>U{X!Z~pV|{Hq2pWaFUZ0zJLpXim;1tFAiB#v~%J z79>MG0UFU06J&OjSQWyUpxWy3RCa3xEaKR!~ai#6SNFzxGP& zgU6g%@a4;J>4Q7skp^~ZEWkG6<;#kHJRMio5J1h))tUhs>K0Ku_u%4yx>*hcH|NKfW zFoQlzd?WB#;5-DAvzJc})%N8l_}(ZI*(~#1#cDtiQ6f0HL zU=H@7D=sV`D1E3U(ge#`?Y<-|y-Ht8y^DmNh)`VVdmn$dpSV>T9_Bj-z45B*JMvF} zQ*4b&l(C{L_yEAp=lkPnlJG%7N0bpW4RTeG1}G?7@{|5thCkfX8{+&MCV*rv-n#KR zJM8#8%SB@B3?a~P|KNFb5>$r!a$+83ETWi}MN6>Sqz2X#=u9o$C-&iYlaRGG>eXHzPsr6T;RIPtp-liK9n^;&~IH(*_bfpB@TU+ z?0hc}O{)N;V^9`m{DAi(idx5;1K8_sL*0NwhiPC|)RJ>-KV%SnnYLp#am06S(A&Ek zo^%%k(TO<6CBA>o@9hCI{M6-fNefZuf`hvlPDyUJrF7i!HD>~&QozovV0k$qpUm82 zOC`GkLjTE^Zn-y#ccC*BA=9a!1?S=EAek0VOd*7V`jcV~XHC z!M~(A04*H+)8G1o5B~nY)D2T`A#yT6))f2-GMPF#(55dUO`tXPctadLBaams0Du$# z^iqCZ7y~nkiH5P+bi~hAQRoN=EH~OA;ayav5o_*9~qscAZ7$ymi|Ze2QNzJjYnq zGhDJz&*FrQLerTOY~h?gUYBwFx`B5dPiGgZ59-Xc4c7o<78iOF(g#*|z3$M{s&j%2 ztIuJd&=EVKnE&X>qr7%Hp-Glr3XrWZbhPK)3a#abx*Kp1ULQr7JmEbYivtq^vxPDg zOiAt9q%awItZ?qFB|z}6d@xidINR?o4Eon$6Gw=Y*jK%EC#n5bXq_xSD!k3dN<1qxLFTtqxpsjFfDg zFEg`8ADP$?X$#{jW-RPYH1XBx=&YVL>hmh&sg^Rn)T}xW@-~t6tM~++L(>g0M6Qy$ z6geU@%O*C%yo|jR$TZ6Uml6+zq=yKrGq=|dbTJ>$l_X&Jyv>zolj9IrsKhUDl}%C* z)y;@j;s_MXq|)E6?j4fW+`Q@+e;5)bqckIP!?Y*^5I9e#={VeHOX+GO6akYy*5*S} z@_Jlwb48^?(E9QZ3jIl@XG2B~QU}*P-Nh(NkYI)!#$XZOm^f;#Kar%NJ@*P`X;c1e zJ|-vUzBC&8M>pr_$6?V|R{T)QgVXC%)pYYYK&(a(L>V_awUQ)v07pQ$znKIw4?wA4 zI48BH2Y`?S5-{G#C<1lvgc@B06`Xj}<#S0N=CP|i)gI(l8)4%)EN12#%H;x^;H@oS zst$P<#6Y=X(O2rVCSY7O)@J^(m?A)lle#tXyc(%MSWjsf)`gk%Rwk5@xrOQ z(o>X=++uE!dUcpFlxi7}>{nDQk+>h)!-N55yF<6Uzl^$PNtbAvJ(LGwk2C4}@jqyo z=b&@pDN2CnT)Q?x`?wlICu2V4nv8qF^0}$TVRhY;8~jwTveEFyeaYCh>FmZ&eICI@ zwLhIvIsNCB*Z!|x{EIhk=2r(fbT>$me7@Jl>W_TwpQOLYVk#zHk9Ln|IvcQW`^D_U z%8>xma7}^gy&0Ox!E+RsjqcthkCGm*Vj2Fx`p&xxg=+ETpYHzQZ&gmd*?-a;jV0G& zc|2`G-92^4Yc2Y({pHQS^WM9$4PZF_@o)XOYg4L7s=Rrz_0v++!v#i1GFcy z7y+~4^~6%x3m%|`=@0Qgm%@YH2`GcXg5=(!Gc^3e>tVd0a|AOjl1JQcD3Ef~R|vu|dG{#vje3ty++@+P(DgwEB+SmUK3)Ol z=0KPxf>9~}DKU)xLfm*X@lt~xku1o&?LiTWJ~L6J0hAYjbphmSE3aWgAf<$sC~!qj z3+14jH2`~^9~ctgJaCA{w9ASkXq&v3W z3~(MVO#1>kLg->pG!GmHT5ubOD`MHC(c;R+vEX+Rg1j^pqS_y{?wD!IhNi{0;3C-b zbx(G+D}r*=?k#o$X%adKc63m8M}pxH2Og$`I0LsFt_g585ZTPdxQI zWOplOJ9jzf9=x7&W&JjJ=0n5rJ{@=#mGx|%Pz0Gnx7 z|9p1VPC$bM8Y^R<9qupNGlngIObFFrUsy9Rr^+&qmY1wF`B{M1=P|mjP3d+sQX4ja znTIqrYifFxbFW8Vg9x|$bbfoSfoQZ&J1y))eEEbtiI+a@J<*)O**Q_$9_)8cO1fMl zPiOhFZ-K1~{^4LWtNQx*L-f=o_~jkVP7;}le9+uGWwFyRnjS;~++F>@F#lrRtYgYOQhL<3U)M zMVJ4u^kWG-7zgN-9ILTxz(yon;?3HKbPWKku1UHR(aeDxv%w$@Tn^}}>~W?~tx#t~ z+rCrK+yn%Yo7uoXm!Sj>%mEDm&>JSecq2Z*XjP(C1^~z-w+^IdB9s<(=zcyqz%G;E z!PBEk$hzmsMWT~lzyh@Tk9=Ma7<@m}qVw3FvPIeSu|Pn}JkJ2fSXjJYQ%a8yHSk^( z%h_^O_FZH9ewcOO8+lLfw!$~?yEE!^*1hlnQk$P(=+t+pxcyweB9Z{YVoQj<D{SaTpWu9(mgyWjhTUwTxohJjyJRFwoB!}0U5Dti~3T)`{U2lmN=o=$pxD2y5iy9 zc$vX=a|)aq$=-TD!?;JYYw0-Mq|Ag-8y$`dte<3ZYd#+hJLeb|uyl7O;1*NiNRTe} zy<39NKk;fR40L&9G#YmxzHibQYFlx6qps;ZoH&RaMccJ#i3eCb`&liQf=|R0jJ4(B z@-Q-ruDF7;_*<_=_Xmi4FCDF&crFRK>Iyuvv_K85tS;4zJHTSuy-VM$#UesCdHf;C z?&%aCo$Jtc2aptCyxAT&F9g;U-UH?&Cfx7xLgE4WZ6rA(s%aAQ4n_tJPrIY~(8o9~ z<82W_+~WXT-d3zxOKpqB!-` zSiIjjJJWj2tHtemH*Ejr!tlc}=DjwHn%&``U9Yo3*gPoJUF3S7aQWT7*3_Ch=x~S; zleu}EoTS1S>0PJX4AAMEAuHUlEQaM=O2VrUY%m_wAG2;ko|r#wMnl4c(tFt2NvRx0 z7x_^U3q$Wd34s{kJK7a4jABPlXWVQvX}D4-`(YxMB|K9reI)d$w5J`U_+**s0~dWS zn45yZLV`rS*7n5_>QXXBb)Ou_<`P*+kND+*q+R?FU428kH!Gr}q97xWy z@!huP2U&$x4 zzQtegZpmNi@U!6RPHGid1TonsS*|N8C+fAp+lh_`{|&tdck(|J3U84n5y^)z_ zA%fO@lW3|gDjSgdnra_R2%dH?Z3WOv2AxPW-(8rq*5*b>StsINiV#w_@bZbjuYj)c zU7uU6n`>hJXPn9Bc2e3-cH9d5!9V+tfA5?BhJ6`3JW;gLRgnM|M&ibs_FExy=cbK~XWzuyxq}IdR&f=2KWU_NOP}$5P-kCP()IGY@GECQHNiftP zOEc`ii33fF35KW>i1q(%-QV%8`fxoh{KT!DB>QV&SKC9Wpss%(-OV6| zMkR`^f)`Bhu?T38rrFkIe>S+N+#Tj2)N~w+RryfHjQXQyCfZY_E-=?rL&t{WbxhMu z@|xk(3+bvNOs@iYxlKT1$PcV|9GSVY;%|t>Re%Mi3BaYoV@o7opLUM)NF#*Dy$CqM zL?CYZmsf1x)icUL<4~pU&2G8c=yOj}0z3V57-Hhqk;5HX=6#@6ax16u9$t!{3i#UA z)*J5tg-s7JmY+@rje(_L-UykxG+Bl!pd$r(p`E6c?&mwc7F5f5UpzcLbiKTTseySB z%G%exwR%GFP7)z!=PgWr8N*(Nu&j6o$l|${l)LC4Qs+9PFyB-QD}duw{4#Xp4T6}j zqD{goCi?XFLqW5o0X~$V=Y)f~IWPripp-6IZQ2bz@d0)`I@*>zF1w`y0s>$_=sE}} z(;jt2MK(GMEjBFzu>0~bWI?m(DU@wW$f(~N(O0~g-Bq*Jsm$~pW#-SNrD??^jGZA$ zNTHmLLcMc>+S4kTPkh!`nk)*F3zxg?f3*zW`KQr+j`00_`eBx2n`0WM! zdNH!Dsqy@d^i&z1q(z_+ThiUddY(o#viGuwno+)^;WKztsWb=$@4wtLlxWqhT}j*_ zGAY@bT=-FSCQo|$(jtkF^ZT=*x4bN+JcxYi2L>u-cfnwi%6vFu0v(G$gOVO*Jn37j z+DlPKsX=E_oPAV&*P26Mpval{+r}97cBcYt9T0Kq^JrMCQ!RfXtm*#gBjr-*VwhRB z{LS##su%l48!AV{5gvR!UU;;;T}21Zvnui8glbw?7F^<3+V33)l$>-T0_gZ!9U7=eIPRAGi zdp0O*gV=2+{0hj3?p@GcRz%C5t|7gg!3oyM9<11bj(3GN*TCr)me}BX8PsA{Fsx<8NFU{uK_i9v}~7g_e@ybs;R3WHL#h$ z@%?uV(8yz@a@nNUk9{3Xjo)1q?tgT@HN4+{))8i|zn!{H7Hzxt)O{@P!1 zU%tJzbM@NWH#76M)^Gj%Hd+>kwo!|N4=^yf_ z`IUEn`G5Vd{~Esj&eq)5m!i2cgneb}7ykC2{ds!l%6K&Wcl+P;zk*-=`t`Ng&RG{| zOYi@iKYsK-?tk+q-pHiB@^(|w3G`J?HzXysoW(@B>~)k+s)~*biUDrM>3gkly!8G!Qqm_ERq6nWXcG|r7#Nf*?6Ik8Df90@ zfBS#>#V=>K+6~`-dfLAC?9g+NZqw61*J5UUAX)=)o^Hb^6i5iQw5Z8`fbq&)*9Dv8 zz++_#=Hjp*wz;#hD||Laa{w1>G%4O4?ZRE=v!~}i57tWnKz_+XEc2aV>+u~}+3)71XMW_I(TJdYX{Y&6gJc&Px?STeQgjfB8gS>me2D4}*vY6K_! zw~uNS8}u)4^CZyB3}~Y_V8`3*$Qvv9`OtulbuIhAL$IfGr{~T_**Ui}n{-vPE}!%U zlUR${1{Shwkuxfn74i#RbEv~(cGtE1V!uAhggXD#O@>EL9=>R%YMCYlvxxp=ZP!-} z3~R!Hwlsq0TiDUJKLCKwDDWgg0Qpe@^|AZ6sX8VvmJ~aCvS;~ZhFIRB?V zL#=Q?5@&$4&UIxJ;Oc;j%D3gJGWFDR2^4SFfsRMAfUwIooI?pld}IPM0H`%QTl?&b zq0qI{-Ow|oq(VL**5!!BavdU8DHeA{xyUA9d*aU|+QL&mv6=y9*fFAif9B)LE?|3e zzwE?46R+WMYnU+xHScgrgT4p1(-;PQ@_KtZR~VO>>!7`t<=RE4{^RyZYC);j#!YZ8 zgMf1DL~>6ap&p8MCbM4aChdjmh;J?HHfT@$tvOi6=KR^IAYe8m%5yM(m@Zfz>in_h3ByuXQVok3dq*fM3%5Bl$mpD zZrdf;wK}9k65t0DfGj~5q;6VBa>78*d z;uJx0E~S0vX5{z=6|SXCw2BFi>Ae>$*zM@JA3YAF*8<0%WTduRWnlt@#4gTC?n~t` zZOso@Ec8<^@F;OTf#+Z*#Pv-nNiqr+8cqaRVNW#QNwWo9Ce_behf*&MGGg;X&q3N z4k(6qQSUf6ZZs%sZk!3E0`3W{QA&1#3a{?J@(>RLaNZFG8kv4s7=>&+I-;*l`S#>I zy}a`Lg_61q2!R*liRT;{nqou;4BX{INDKVI2*jmx53EW^97CG!;Zn~X8gyZ*Kwx0Z zi#z^l79TKpj%o@kCPp4Fo@D8x^oT45{EL&Z(&_DijdcNgb$({LU*f~@%1Kar%+e{X zyfJG5B&y6u212S1R0(YrcHlK%e0Aj=e!T7 z-r1eThc7O=$$Jr-Qt{cPW^xcy5$CPB;w|FcJ2A2v2RZ_H{*BrQoP&Vz zj>|QTI`&n22LYByi*(#-l$6FBT5jMCxq)J2f@PR+ZKjr467Mhhe=(K(=>;pv_EsPO zcA*QvwN4)~-vt(KnbHCr(uV3uYP9gM8iBqbw5|&9E6h(Wvzz<~f&jCS?3`H05WR&+ z0v5~wQ8AsrO+VvHpcEWTW^Vh4lL~lIc9u5N9%?~zVlKVry`;jczesI+fA;q4m-2DY zy88`+>)q;f3yUwyio5DP>Gg`f?(-xqbMVZ|h(Kb_Z+a)$t{r#HBeY)wI-WuF@PIQc zu;w}$4&5P=@Wk=2PERR!sLg7N6l_Y7uPt!j1MZhfp4jFP4G|~N;hA@E^MsiQixr_o zEFHQSYMqg7eCP}r@01BggEimeW*a`&m>fxt0W?Osm*OUj#a2c%%SH88$BTb*dNOsM zo;S~1pY?c;0C$g6qwBCtdtx6?fHCgXbD52ed##iCIb$`=qj<`$Wz-XIzW?|m!UA$l zH5haO+EG3cYDbM^6TdI7$}OYW`R!)=$*_12;%3$Q|+mfS|mmlfl=%!)p#kOkib zXAUc1OyC@m@}uR4niX8pfa5A8+CQ@C^{lIN8}fRqoA_5B#-@L%C&=cZPESw z$ubaH{M6;OJ{{&=bD@OCsrN}JaKTN{LcYT+sl0!LC(7Pbcf=mCs37fBN9A)9T>)Kf zDI7Lbfn8L2<#Uqvb&t7YyWX@VKvpS9nD_?s)49p?q-JWNV$YQGrD>q-IQza6`$nPQ z_rGS$rV}~V-^0Zj-qWIA8J{#)qv;?NMEt5}hGD(ZzodI-?J2&r772$lT{{byX~4*4?{L<1p=Tb;-}8qbgHran%iF=9mRp>bhcLZfS#m}jKotu zU}mnuz)syiJc!_|xZLYoVL-E7z!}7OTu3mct+4A=^a88;LRZPy1S+x`!9krl|9VO` z3hyGZBbWp^Pk>IPqhKjZV^|l$tJFn&kSzzZ*ppl|+v)Z^zMO_xjm3;9LW2mfG;amV zj#wDsNF$}IEHu*6QP4!x9W9=p7mW?`onl`M3KrWH4Jfw6=9)7zvpVCW00Bmk`xhN; zL0&9#rD*g#)jcDnPi>Z37M*|yJ6)hQ3WNsg5aQD@aL`!3s!IYEwuM_dO8Kck(1Z;X z3VOcu1RVk61)6K1P{OKIR&-E-tDV!*-LO}Ce(uHqcusB2{0H7azqgvWkH1k}Id|uM z=2F6?u}Qx*0~C75bIS<5K&+wOv38LDBzXkITUy2CEgzulr5O6($g2IkU72YbUgw>qSNHvllYmt z;|<8daCc!tfNCeE=3CZ4$~w{3>??^aIo1J@66Yd4={B@8Br;Tcc%A1zLKF0u3hAT5 z_}WFuBMt12XpOaI!vg$*1~XVU=wX>ItdSH4gnVd_i@MaW1o4*ohA!3zpx%^wVhSlU zo`-K-T=@c zQVhpp;^*(^?C@}gf#>R|&K!HV@I*cYb4p6oFQw1a=GKiwu~U)O#T7J?n2cQAv1Afz z%cFH7y}^taU|}cg^=3XaW6_yY+Tnwlw2psc$&u?3nh3cbl;FTom1I`iaxv=fT(*UC zGVa%FDzbtooAjG5-j1VK(?&WP+#Vfn2m9_RIqSIxVwS_Jq55~a0zY4^_!@we!JZVI zru$nI9ZsC9=p#0j8y;&Hi+(qcsnR$^zdxOvIm`C+@k%O2M*(0qx*)fj(B4@#R-bOL zsW5yD&vj`Cbm&A4MA^i-ysg!e%S0&F>sXl>NITr1c6uPjk0QtSA9fORqYU}#_M4+$ z8^zm7Eju~<-gNf@S-@!$0Q*A^r`cEAg@xSf*X9sc zLtBKZaq62h=l^fL*jPxvxADJK+QuJ!`r_yl^YGJeUVQT+p1vjDO!5H;jzCCxw4x`Y zqaGdpT3$1Djo}@E;q}B6>3kh%K7612&e5Bk^6Jv{V1yqEY5e-e&*U(#7gxMs@&^O= zyVPG;UD!xz-Hr|up{&MlvC>*<>?}D`%vWf~V@@dI_HA?)qjM&KpZlyC_F{QT(pRId zcXAu^pPZBFtC6+N?UzbSVJvFlA7CS}HHV~V#egOAZePOxf zi*Kyj(9rughv9A)-Ybn|C^oPRi5OwCLwTCqJ(LV(@@bdl}P-wk&E%Rl(^ zJDnfKR8*)Q&~`xXqepUZ&6q3;=n74iqANB~Mdi`pUHZ#DUJ_ z=Tp}*aMAFVT6yn=*B$@~y=VjTc*hN304B|5I=r;87*7bmfH6O-kLzBsbQC0?RF=MS z3n+x76RtmJxibpEB6nQxY@zzm9^n&MgnVuF@}8yCq|p-_Pg<1a31v@#ZRAa zOwqU`jPzk_ew_@8e)|fdhdzI;u>O_cy8r9{>h^U85f7@HiY*?^Tn@*tE90gc^!S7> zh-g(%+G;FM8qNhU?~M;d;L3}(Ow=SI<5LSjBrP@GrO&-_-5EX15lTo;;){oR89*_! zJ{9XgCEz;WEOxBiV9T+?DQ9tT+m+Tu#=5q29*id=O#4k2p~q}xp?7`GBCN<8U4oi4 zEss(PmSY>}G=|c^ioay~Y@#}-wgw>f5{g+Y5JCS+3KzM2njZtrBYl8d+}$vD5NXnf zK;03$>MO~4XC2SzTkaeEA{deGq+w6vz4`YR{%pUrf>kY4`aF$U07e2wI45$|bD6hhV5JXClfb?+ROyBS0D9aC^#K9amjH%nibjKj zkA(vU)3pGDJs~?Y8^{2D7nDEU@AKi8fjW2r%XeJlk$E-&k#w(z5Jz1iB6mHo20;@?Q@1wKEzOU001vZamIp&Fb zLYgrz2!NT^z^DZyCJ}wic+LqP(+GqjoKf&1-KQ(h@v)5)qm^?GCX za}==D)d)6T>coS%ixO@qo0PlN*@L}n#z68;&zICf2LoDNjM@x?u~HbAc!&9g?LeUK zMSbYR+@jU@T=C2$ZWfISr)%R~dk}RjrTTD6cu`6RZk;jlwXAb7eO@vKH@zjqIZ};; zYDnZ3knMG#Bs-Ww0deJF#L~~ESG3_X;DxqqcM5aQM`Hq`W3b|NJkO2MVRQLR*DF}S zvPhfXk5Q+v!uQy_SjJaSA5VNmm0a3SRTlPJh1F-_+Vq+WOn_0|!Tpo{jqxZwkayFn z&zI8{-7-u}9%IX*e1fRw+UzRZ)unXVqmRK8JYZuTXdAvdIG2&yJdiQ6DgwFV&4sS3 zI;d|dL+H3Q;!d7WsusN)2@TT27K*=iwJ*5bQSDKW%=r{8Q>_A|5r*b^-d37GgrquO z#oSD-Ts6fq;G3#vfmYXQ&scq|2a>1Sx;0#^wTTJ?G0K^bl4S&Q!ZU-n)ugM(VXaB+ zT4OD9P zkh^O1RA|HOO`Z>5<7%T_6t@%-i};oVVlmQN|E4mN)XVucFyNy7PS^;_l9}Qs4V}F=-0#F2Q;V;YNSOk(@V6m9-y63LZSLiHo()-no*P+dC1R`aF_x=( z$ET%SSUA%SH`ctE6*Rxbf5I-_mbt;t&EcEagX=hQYTubP?Fn5A;_X-mNd{hjW9=vH zzxOBi9{u5`KYU*K=21NO;#{~AS1Pxp|9%{*x9;5eriD}6>;rf7!*3q`_CInRsN!ok zzx*@FmC%-YA>7?Rg?TV?6@?HP;=Nuv(ED$-SKs!`k{}wpZ~5@8`m33K?gD zc3aD|!2kuk<@3axt86&88X*jQj*m5&gf|S1LUdkSp+>Kyue-k#$Bu}TKdv5p=NLFK z27!i5t;+h#tHqS#GvZ`^nzxxZ{3Cm=2dpF{VC~Jw zF zBMClfI$dlzKdQ{3OzdVM7TSGz0=S&;7UsDwc9s%8?2>^bNA30enJ$Ct0XK=7NZ;TP; zmKaax-1r(CJ0FA3i9mNBYmPePK+p=+!mqu;eE~nOA?3+ic@J%v2;SSbYHJLOV)IeQ z@9?KVSJaW`i6UiaFdqV>>l@Mgqshe#1`<0P{V&PNvcO;wu-b|7KS3a!$l`04y>nhVLxF!(dD@lEb-6Z^qf>x16t7z+#)fS=OpBE7cOO^*i;@LE$>f+P>Mc(n9w>H`cE-skT21I z<4_wpkmFrKtP{TE#r$@=fU!x#;<2*BMXG_MwOS!IDB`gNre0cI-Xv&Hyw^Uv?=tDM zPV-JX=!a)eG}O7I^yCE~J8PI?P7Pmfv19zgcV>u#5lJlkSh9D$;G;Wgir#bUsGPk@ zPDH-qrUhn5R5~$$nqFLRt`n1vSBc_N2we`oMUN@SY8@m8&OU@XI=%6NHWJRb@g>i5 zvlf&6ky>M^R9f_haG-fU6lYRr+e6%(2v>xT=%+)KN#ugI98yZPC-pZ0l|I9==yd7D zf|H%iA)ATmByJIl+$gQfqcbKCm)56<{^K@HlsYk{bB*tdHtb_7oiVo%wo#v=chvFV zIB@~kp>)nY?OB50D#Z%@h|>sF$(IVjw}&QA+`T0dPTjJNO(Y#oHlD1u-tuFy0bOv0 zFZqPdg1*fg%ZenCI?5`H%RRRH@oEsp=WW61Btxj;y_EO(Sqa|pZ*?{j;f#e&=)plH z?r5I$=}q_?>B-gjQj5Vc-u@vx31}MqBqpOjGF~4X_rJ^Cs*l`;H$3%R?qA66*btkG zf-YNbfQ*4TF5U7);26r#!OK=~OvZJjjO&QilGEksseC!n3{Qou&js`pgrsu`E#xU8 z5=F;@K>kHC>!V%aKrpftc>X*z231P!><;F<-814fRpO{)@QScM$k^kF#m8(IX6o~_ z>-Lk))9z#^F`hp8ZvN1dQ2q1^G7t!MI4omB^>!(;a|vaT>z+|pE~fvu4H*HT-yBWY zPVfE`aM6=_Tw2Kj>dzz^|6gM7r_bNVfr^+f&HCHSHT+T>c(SFBtUOM5fEGfd_fmzCxz5J_xN|q zC;#>L|81-ITis`svi)+*9!#v(YKI;bNM3UdhTe_$wtivt%J+Z%Hy-_HPoADRz7d2( zgFmdMN4*)~Zoq$pRX2+*s}GOQhi6Z}8n5j**LG>&e>4B^>4Db^jE{B)wP>aN zi2S9uw!Vr-`Vt?AM0PgPpRxSuA+s`^&mWsU0J*bwUJ3Q?_p;nng6T&+?G9QVCrJ<)qo7(86*maygAZ9r~k0u0K+6n@F10p9}Y4fKI4vgj3M8*s%|*R=H+6Syf{dKv&^fHIeDVCF`E zIcvkqgh(cnq%ow4J^|cP;9*n8&N4jNqRvF99+>DNs@BS$ta8U z*F}AAymC1Hp|crWhq!Ss>;W=S9Dp>w>*a{PFm{Z@eA*w1ZULGcS2w^`!f<{@BOI#d zE^m8|a)PrI(eNna_Cg~P7<$}c$3H$4DljJ>2%!}}NQDyXR8Sqt-zf~ncoINoue|$zMo~#;=mZ2HTfJeHQlXwfX_H~vJ()C8<3R&o z1(y_0 z%J0cDrnZSlU@5h{!;W2Bjg=!XFzuuqZpIhErl6f$Qx9@(ehsO;owK~6g%Qg~QuoxF z515n@oAklFzj>Oi7T{I%-TII4}@+R&AZ_gdPLyX%gB>0~;NN(J4M^m$Eh zUv!3hCLO+SRjx&48@8KnIb|Q|vKex>lZ_BcJyEf1^X^2@8$mKE*h#FdcEC2}ROO!G zKcSd&k~X7v0zOO~y-5~I#X)Y})vMRaGeShrM$2lC5^O_o+X6i-V$WFg z+%!v4$u!3n37_Z)2nf|)BuKb2QS+f_%H49$%OoX{bWvDubQ2bCxh4aD5dR{<_~t?q zW_vLHG*=MW&IlZT<5e=#hM)@iq6t9aMr$N-Ej;R3pfMu2@@!C7rUOyS+%p6IW`YmD zC{>(vHHTrvzz9p#kJ${xdtVrxK*2T+ z5^n0aRvTN<$_Rt(njDXIi?6>>Y7%v2_eF9S+nykG9nW3P1)2t2sd%qJ;7WQS!rXf6 z);ItCchAm?hK&mz&Z$ipKIpZ0ZzJ>{_nh93TwsUy`Gb+`i{m%lYLB~D9|hxE@kDxR z*tpZB;Yjp{&cw1x28 z=FcqVTpTuUs$F#U>;K^&eT+OM7gn~v_{Gz6p>%Qb@}vKli>$w~{68mFzFt0^p3d5r z%zDKK;b+-N?eL*9G7&(1`)yOkwEg{*^>Qo{&FL7PNsq=Q|A3kmcOL~#XL|DSel+c} zx806k8gleP<*ZuD>4}-@iH@JbAy>cJeX!_r_s3uoP$ydelN89;2y-sm3z*nDPVWm& z$12M!lkykl60xltsTBS3S?s*YBNvDi6*_$X8whZwga#D`S(q-{&)UrTC?G?JWFoN2 z^#r3k{^ibk;n}CVZbh_!%xiP$#gO68rq?!leh1S!U{t+E07-l(PVZBv@}ya;J%0&! zd%$F^0I+gVSQd>^tv{DD(fc29Y=I$T)U0$HeI_=S;HbD@2E z;GgvrgCfcB^Z^t3W) zXKH|W=Iy2yxlC~r1v!_B4vno#g$N7TKo(yY&M?-g>mwVwfLnZR6Gle9+|~h>`0f1N z_YcT#^gjJ>Pb2kW1@ma8hb*T}7e|GcMrZ(l%;;D{5m>ToYQN~?fypHzZfrOmXXZ6D zD>7Vm(;|8mm@)nJVPg2xIRL#@C8oh=ZM*Y;=0dk{&6kD%*n%nvbLMpGD6viOKqGk^ zALSSyv&vTEa+E&~ke8U3vOUsq+ceWL>co4BUYZNrL@er>JNAu<Rwi7#)NqBj3Dk@1RGXhYbsVApHmbkt0v^SHgO zgdz#v2ao}!zt{$UZarEc=MN86*gv2#3rGgQXkdBFP{W18cCD;gomfAoeRd@vC|D3m z<(42^L_@LU3ki3hPa7|Ltllv^4lrK8;(uYC1TM7zEUW^Y2)qPf&p?BHeo!M~dxJwl zPn}px16}KimfA?y*YrhRN#p??NRCHV&@+uC$Jik3MTnMcj|&DQct<&MemX{hgaL>m zpb*~t`IHBdSX7`yJdhO)77Ml!DUgJGtpsX@kiNza-F(w6Q7~_1yG{2zeLAT((c@Nq z)({Mqc3X7PI(&t7ia^yqfZ#%kVhV>Lz;T6?X4uP4D&*;e8Ph^7HG6^*lr)FRE1j!@ zWVGRpPtY*@Ql^tmETAg)pzYInW%|o8r67iYuLAk4jPC=hnPI}o4IlK#==XPxkEGnc9txb`43By=@5txlZP`hwkRubo)n@^(Hv=UdB6`ezbMpAQ096+Q{ZM+@ZA zyvdOIA0a1(exziY9bR+t zjO4Z3rb53p;4J`VtMg0;-*R=TJiBK7h(Ah|S;UXVV)hjbJ zqAsP=F3i7@w_Z_t_3-l}Z{zigrVCPkEpfi+lCF5BxLhXDA)Z$P0h7X#vSX5i_}rO4 zBrG&a%f%-JEf5bEydFO!!(qCflL2p_Ie@zbHJWy}28mS@2%a!@%H5zk9*Ysn-0Qdj zJT7sQ;E83&5m%vsA9!54Mm3Y~{W$)_t&g2YXv(4KL2nrdOzCRx%-?CnxH(#DVN3Qi zf2K!dYtRZkpN9ux<2cbS1c++wR?=2f(WtDJ2V$~unZsMbhBg`ZyLAHGdLHVB%RWQ% zV>Z+5!id?hbbWHDCBMh0q7nxSloMk}N&ab5q3>U$-+y9LUo?$kH7?Gm`p!|dB⪚ zGq9EjFn}C#k?zhqs;CU+v^RVmm0P+P2XPYEAJ|CPTqu-9AtsLN#bnD(=HL-h@Xry5 zC2;-p`!B=BVYlmQUknpYFFoX7VH;S?h^d*#_W`&uoHq4L{irlJV8J%#4?U#^?dO(% z+}_-upigNqf@11ArC{Vm5N@~oO52KWzPf(<_0P$_5`X{19ZECLe-^c#RE&*SR*Fz}YT-|wf++O&93Bn6#SrWrAv@Ndcs_N=$ zX6`=BaG8cl8gV33WI5*8dCx1j@_&-G-i!Sm2UeWei79c6k|m2ZJab_#-vjqBG&6G- zb>ZJT@;Gmc1s3197`*Vx;fGDn-RGj$2wQ;q+O(@X7m~-68B8i@W+3^8H(X$5=ps z0j_?1?dW#H#Hwk+GkS3Jg>SrYmAcB&)@D|?`@=tu&U-iQ3Sb~a z4wU}ESzEK53510!-=9h%ee3uZfN*4ez0S^^Pufwd=mfND*S!}O_|Ksmb8Bp1%HDlU zf((vU<})ZR1s7&eq&;D8J@cZBl=f#q(Q%AZ*92=fh)8R)Yxu53d=HAv45nIcTpaH8 z2wv>{>D`A7XNeoS6ICUW!ttEPZnKcQ$mtgyuB6Wj0yg-tvu<@>l&gGeQ)qY>^!Wh= z43kLoB8lSZu?eq6I;A5SVgS~0FL=Hp3C0ZTqlu0aoHQJ3p3R9VlGi@piTg|GdJ?w1 z9koRIcF0?Tc(++C*F7lQ6^qtTpDvLKf;)1r(_mi&Qp1GRB*5NWpzG3upa)lc9Pmbt zQ-`Wig0n4Eyn%pQhVE!+FN#dvlLAbYj2d{-l+SCfllY6De=%_2ZkOOveP}Z0=9$nz zkF|=c0RRX`j9-`^gfF}a0XJU^F>9Fr*7QYsnR2X)DkG4NZA9@;ITFCq%xoM!nl;Ez zTD-JvuCF-yR$**k0s<+4?A7@y{m2__1s!V>Bb*sboVnJ{n9e{MSkj>xV#DHi8Uk^q zfsgx;&jO%UH`rO34vEcJogd&$KPoY?i1ldxszu@o^~ZX&brVAjP`M$*E}P?Xe$P}C zaPAX#60Z~VPY=eKa_imm2NmCts{7ILZh16OW}3FXbT}eA&urQt)Q58=#6pP2Lr6YN zdmKpa+)22s45d(l*Wbvj>!diKPA*V$oeTDj8-8^@(Bgi!x4ii^d*a2=OyZc3KFygO zo){bsI$YhWL$vDkH6E4o*;EKtnMHm=+0O;ac(*=Ao^~E|YbPBGtB0K1%##sfA6S-P zDh!+SNmxro0DDpil0m;u?dV*Nw$-GFjJLXZi}lGt=*+!Q@yD^}{`u zxnXJnavX#Io93!@Z#aNYn3NI-B61yTaAAUeE?RrGqy%Al}rrECN`~aORC&QLpFjOn<>)i+LKD6FGUYL(XZ46?J?zRbG^(5>#01g9- zw(85~7EYB+U~6=HT%l0M$)i5w#|Q8j(z|^I<4f?os#Q!ymA~%H&7mDyus{{6_Jqmu zEd9iZn#QhV>d2wL41-j?eKJxV)ZP2yvC&zn1@@7BdW>v-vM{3Svd+otues4!%Fbekx5*i%N+Jb9vVQ9thSwtC_AaLJ2i8wUO*4 z!&mhVz?4tA156SGX{fXNX>Rw2tfTZ^6{j+@P>1+9 z`mR>yCN-FR;|V`-0&9hus?KFm3I)THAndS&?#<@sTXvtn&wyu+md!j*!h!H9;5y3_ z-AeOxgv*#GUUJo|W_(WSvt5+rrhc@fIVX`Hg$GWCWgi+fcs!v z$;Olrp0u9_jL+t~!zI zW3^0#g2*?Y9hkkc+#)P3I8_<>UcB7locE2N{o*R2-9Eua<(SkoX{$_^EzT`6$P4yF=tTvmq%oRoRgo}w=S-{neWQB`0xL}zkB+y>$F^-aoCrASnAXN?Z>|r=_j!0%hPcDv~}1V zwv;e+^!~t?P3N+Ge0FcqWDZyNpWQwF)4%*bKll%S*gY)&m%_JGtp~zoDX=jBZj_c@ zz-F#VUpyd!y6v7nI*N^+p$M9?Jc1##Prke`zsW<*C-~C`N4c6ejsebkZ54MebYioc zbGMZyT(bu*!`@)nQRymS3*tP`LU!vT@w*QSf3w?oOj=JR5Q~v(7KNR`a{WzyFpPGt zEzSO1F=FkWjCznfwFRuWA2*zjGQxO-8K$1+AJW$Sh|EifZ$l z-+A~SKd5~FXusEy_O+Tb5V2`<`;836Yi8U2(!Sf|V$0z4ZidfFJzpapbXqcE%re4k zN$CCvMn;I4^#BFen6lYXcXqMo z?eo9*+h4Y<-TIaP_8auAWW1}E>)qu+UlteK*Xp@SZvDuoIvAVDz*)B|6rPNp3ah&& zlO=EO&%irEM_|$@={He~3>vg|p^HtDP8Hy)q-qk?W?_O5WY!92O=FU^O%6bF+!S*h z-W!+AyY4++G_^YVy2N_hhZcA%Wl}pyq>JeSu)%AZ8#8Mh*2^t6oYg` zX729zJ%ckFG!%b+TbIy~cRc$=#C7W*E~&|f58iojod3?J)zshoiOfCgRfsdA(L14@ zvZpbx;Ma5?;BFW!N}#qfr1I52JQ5z9rtY9e&* z2>?kqIa^9GhQWe}-tXJ>YNHZ_fg^d1Q23D`Imh9D9JiSbUnVtwvfj$)h0<~0*(1ze zyw>9?T*`7eQ7Kr^tC1e(S~Qd$8Hhy|KWus+eOwHiMFt@6ZIK{SK!BgpdD zk?m;R0RV(+K_vqE4WNG|HgQB;(~Le-3;O5m5+M2j4gfM1VZdAvoa= zfkEI-k`bTBpG+5Jw*xFFN_4iJ(n^_Cq_S$31KFwMf;ud2j9TuJrX9@ln3HBTkhGRz zWDdlf3}MsyO!L(ldv$@(K3jZ_A-0z>6EO`JfKJC}F<%3Uixs=E;z*_pTgQcl*35^t z83&+l0pr$GTT`GZYr|*m#^$Ii8=19*T3i$RNfpvi{? z!?$+V^^;ItS~I432ij68K%Zq~CdZz-{npmD4-)!8CVmP(vXSRDm;#60ygAI16clOptl~6YLdN+o?4B*$qbzOc*5u?4kXftBQ?L@m<)!2{V@qn zYH|SUjFOxC$rTv+!R*tCe$?lb5kDR`%Qrk3t0Wu|#ApcO<7o)s{PhA8wBVo9zT%bG zmRy{b5#DIB^s=w)SBS1(aFgk#Y@ieKKDg*0-aHZqnl#+k_F(w}JMe(whrFBuOQju37J+!F<_bl{(;B zj(I33n3?CO>2$ETGUgktH?O`&%}dM@iG${Y(R03aB}Hj@^Si1jkb^i}We3yZA^?UA zUmc`JW3S0FMfqSy)xnu9@Z5wXj>>f{)zZ&~$zjub+oYA%S4Ompww2%~z9-KG5wBwI ztVG#qwE*FyiqhVL&RClnR3)}!lgigi@(_)kRxI-jN9 z>qJig*fKhZKkl9up55QtKaLi*&p7uJ@q?#fBTpi&DR%vlo5&P+THz<(50XtskMy|S zzWKSgfBpYi+4PUs3S+5rkm+Z%K!l#guRqCte|A7QtNFR>*GOzV=;5NJ5~uj%NpLgt zn%&#F=$M@Db-{9;4h9t(TU{UxVm)M9nTXpGn0RRA&hXA3r+;vKV68uSZl*+)?fWR4 zkOB$p>)0lUH%qgS&FgdcT2^R876)=q%2v#Hu*n5ftC+A`=5B@8_ohu**9SuAIWiyy z7E}S8^w}#Y=nj6GJmIynGOD9P03z&j3Z!3?cN&wEME^co%0g7=mu;W_jPHio;sKWb z_VDP3?{Pmm>7;g<(NkB$KfHI2zB6=V%?_;0_`~ZLEhj8#JR2?OsF06*K4@`!dCBUq zHr%hx6IFx7d`%nIdL!vnUTYesY9Wk1{;bp zWS8?cLGsK)avnCGi;@O*oS|aZx$x%b)i2&U?B~iQa_A^ozIXnlaoE{*-1%|&U48iQ zcJn94l`2zz8qROjL5nhIt`(CBV*MoAuBOL5sxhru%bGaFGAH-uDe~15ECK*?RedmS zD{95qnNdfRlkBO*v) zg%T^OJEsMjNCP4PP*`*sevIl?DirFbHw~%a3mu%Bkz?m9-C<@rreYIn1eWng7I4Ir z&)Tj8(V=<;%dr>T<9?n=OBE7FVNVl~#g)hZYYK#ou;73Vxm`=FjcM5`8%varCL4l2 z5__G(c$xJ;PibGHDozAGZ~_yDq;0*_0UtQy@47eV^xqyg|1UD}Fngf+{ zkR6iLMvsqem5FZT%lz?Eh2%Z_^me6H2hfVF%qB%FKG$l1+V%Dg*GmduHgO=T0y$V7 ziaJYn2qs|Cfc8`8MJTjx5W!QgRG7|!a4kToEIQdwO!)edB(s&q6n;?9dQd`Z*pZ}rE%D6$ge>pKg6 zGfh1m@V;7yb{swgtH&R@82|3$#|H5Ek#;m0& z;0oa=k8mxU%y|)iZeYXPxuDe$=)Im`VXAHKu(zFF^RJZ_`<+v%hj-1hHdsZt@#YBl zw9B)Lu20nMwb2wnw+DXA&I0TJPly+gO=vI6-U6j0Ywi zvp#cHt4zTzVKQoV#|4Ym-z6Z%It4YTq#@lqAn*mGH%V&a`+(nU9vf|0L0PT8I|-;0 zxkbCst7NX*nprW~@{?gPt%B2&5TAG+6GFrD9o}j=rYv5{29+Ra!9Ea&SPLdCmWNW& zJtRybPs+SzG3+qJpRI2#q&7>+$v9@4KEwASYNn-<3Ep(PSm^*)o;Xin$e+h;2#WAZw9UsmNkwBH(XDs#*OJ#8>;Y^1+ z+yIN)URSpPRquI#iiE5e&UpjFF=swH4tv~ueLS4^c0aKa^^OsJ=IC6+Y(7FXl-iAA z5VyiM88blqQ1#{q@MzSHnK>*O&$dQ#Pjh+3*g07zItMjPz<`-l#A2z` z(v9fxBCRufSk)-hKtwJ^i!i>a6MR!=@KCF#EcsbDxr}WIF*s&q`j1 z)&Wy#Py>hsykr)o)BN%L@=J_AxVK%(PQ2ExtA2XF4j-9n0mI{qrJZMP%T*7v1KxG~ z)=M{k_Lst*UO}_%*Pc&Z{Q3*G0tk>1fyDngqW}CmzukSupCF2kl^Jx@?mhxYgF}PS zi{a^~60!HVyZuUU2(5bn_4tGbh00twB!{B#O~X9sd{KSrlP?FWpF9)`^KK{KN$32? z=s~Su)ar`eCr#DtrPa=2*P!j{qYJ+od-N2h%!5H)+J^#(D~4O$K*#^;;F&@LECLOH zFU({4=@nt}!Bo>|rEO0G12lbJXFci*U-|NZvYUJ6%kc;OX48T@c;7$J8iSGPhcm!P z;LK&rxB%7Q!XL=r*MI0eEV*WV_Tzlp;{%Q&AtED@I+#nYx}Sca;1-BiiZ=5{=XJ4h z&`S|0Ds|jJKMzf+eY{cDVT+q|c2as|MFKm+?>lly@Ac5&{&?UvfkyvjGXEH*-QjEI z&0XXf3sIU|5{E;j!k#9GP@Q(2K{@6WvjD0U$7HT|$pRdJsDZq2`O@tX9h)`ncHc`P zP`cZUnf;pGZr8o2pJuN#uCJs;ea1=p2D*!E7jGLG#LTTAT-PA7$L*l#}g@CX0n zA^910a#=!)Zj5}4-?t5a^sCS8PTq<-iI29=^c)kIqloP~O2Pwes7YfJ*OGxzwcx2l zesZY`@&agkM=9NYY`20o6{jvx;KRTYB}M~ZJ$Tr4gZt} zS}f{An1ksI8ks7L1&<37)8qCN`wPIWi&X7N`r1s)dAz)p=k$1boYmriN9W>PBIZ86AS!?n7F2*)O zMBhb(hcbAE92z8iIu1Kwql+h;fI|@KdkYSxNDju7%xKeM?*IfOZ#c%aQ~*#AKpiYz zwx1o{v-g0;G#5MqxDf9mYG5^4woZ_2lVG8JUx$4n4!sk!ZNt24w^Kc0-dZ1Wy99T6vQl1!M)i|zayldwR5o@&M7Ag< zG@~@q&(=`s)T43}%39;=pgrQ^ISsMm!c5Ppz=OT?%mix$&A#4X3sp@}xCZ*Fp}&~Q zt>=r85D7V$Vj3eR1{Gewo6n{kdy;+jfsOPZbO%l?`3U8wQ|OnmXj(;E@a5I>Lm6KR z(Q&s*KPC*>($f?J%R4~vS*wS{m4z}MR~rWE@mgj; zzUHX9d+Wm{@E0UgZrHamnCWL)lYx-B6LrVZnJjxcglP z;Bkmhr9C_Esx>h)M0f+@lL6%xxtY;P1%EB)@69by!$V;LAhBMo$-pM`D0FGT^U%C& z0JAouV}>KC;$!N@46}bH3ro|M;C>7s@5B-69EXAipLXd%{(<~;@O z-RwK0v;U1-C;$K%)${CR@BjVdZ^)|VfiG>+OD|l0?d0*!X(_)%8j%etYj_ zdEmsdyHDu`fEydjaTp&?&8`Jq=9SR-9mf#8&m|LfIF@McccKBYsn#v{CGf{*50`Ll zKKX0bD$F>U*23KNEx~*v`211pT@xILc5!sq=_~Hu-52ArNsR zZOM!K$HUgd=BzVz!?Q6u`kNV9e($uJ_q=A;N8W6cVGoCD$xv4%(=IzP9L^y z#?&LpC#=r)fPL{4C~dUt9jJZ$$DWkwCkNj;sv$@|_aL%}6}YB#cEjDypqDM_fpb{C z4S?%b`{R>6;A&)=atk_89o-jKh6_F~x45ZEjaU-Ok4<3V9scpF>QlcI9m+aqjR9DH z*}0gVK0P%hfrp3>7+}S%c_SMnSgNWBtG?3@XDwg%7Pn%+KL3RDb+km)6gjE0T5Q zK|$3*GCUq+bD%~8c1F*_StlE;=P<{TPxaOvL#N|njv8!Pprz538JXHStvKwIsUC5C zhVT2&zxPR@`@9ZEH{g6*qEZkGS-E-i*0ZsM7d`UvBOa+tXJ-el zOQ*$;ewFrIduwp#uViQ(r{YAD8}xar83fF`&%87Gpk$*7G1~Dj*q}XprJOdFyj|5t z`FmDLa-CHT=`a35rkU>1YiEk_&hCHyM{j>#3fG&>$XiX!UwLxx#y#!m7pN&$gG58A zM2{_5h#DU=75-F!LNeQ&1MtiwRHthu-U;-`k*O3403~3`HK{aMlXW0A0U&V5SAa1! zwxYlh1(~WDIFB{~ubwt_0mvPnba?b?RLuhkXNrX!AM$lg%|pT!Jd zCd4IaCX{Z3lrS3fHRufhE1X(!u*eO~@r2}W+t{vWfLY?VnvuSt%w!JxHz&#aBkr`* zGaKF7^tx5xO)xzdlsyu4=sS_ElK}g&+pK4MOi@q4L^<)?q+p?`QmU6%_$TMPAEeCY zGU!mMf}1#)D$dE=acW~=5vK_ZxFdgBNV!1w58U&UF}Y`qqhq1;#N=!Zr+>5oWsMnA zONyTvhkmKcn!CpNZH zQRp7%2wCb0lKM9bv^{6}NaRuPpssY9v0Z9{&M&j|knD3!2b!482z_Wxvg>sy$BjnL zXU+He{eCfBHzs^7vwV~dfd+Yj>Kfy$0HnDQ>l3^iRr$SRcFqm= z)-SX>RU248Zpx0K3U|N(gSUt|$$!6_P*1QaSu#&%>vv}N;qJyY%A~q1xOqOJ9Pu3w z-`EZci+s+)rse9!^tSIcF>5~)H)hNu3)7q}dXF>6%N494O%!b zt{Hr{w#dkz9}R^8fl2)ed6Eg?C&RMMC)=v5p(PN1xigchVbGH{;pteiV=8s3MC*XJ zX6KvpZKbQqoe@hjME{^_0H&lfpvm*o2t!$Cgc)mQtCr&s5~dhWcFSip(#dN*x~`Jj z;juZ3cd53G%#X@!ug{sO2~{JDDu!fIiFc(Ph|3ft)p^j(khS|(WVmQ0pW}U=sCF$y z3=*ja%ANZlD$fz>goEJ1lY!+}0B&G<0PO3?oH^OZU7{$+kr?#b>nX}#FBeKcU+*2r zZQzpVD)yFOST6FUL0i0+R4^D@DHx{jQrcb4iY9utO zynGUKSPrnHwXSzjGwJH`pft)nE!SjkpaN=pki6qLAK%mV{~GR#x#-G8_ZwGoRp{LQ z-V@v7INxpuMqB__6=m3eCIa9r^F%09(o=S8KN)1EEw4@}?DzJc z-g_(!H>ldFHf(M))2HC{Ro}(u1Dktj2h5o5%fI!N#h2$J0PvHK4}>JQG+E37?gJe_ zxA{N%%fGRGtS*Ylm6eU|;?%SJlTY<0otUW($ouIQF5W)iz?rQ@gIMpDE51Hsj=k7FE7u)UEi&!{2c|e# zdt%A%oxGR-$m&5G5Ns9AZ~Ve5|MC}pT}ec%0o&Fz*J@4T;>@MuoGpm~Btdzqgd2q9 zfPHqj5nvCCo%(@eZV>I$7W2rcC-RdPE%yQW1C@JX3k(AxB&db^dxd}m42Rubv&S-| z1@;l`am}K~fNV=b_qu_YjC;S+>UbL6xr#!aW)3RXTScnCQ4`_^({^v&`9vxt8ecDOe#p%*Dt{HhF$$3bokw9<7FXID$P zlf>*C%dQN3b0rN*DBW(3k?iak6wb|MCXL>^*ft_V!#ZqK;wZwYRRs*%P^mabZJUII zVgp7Lp3)sfL=-rP@zR;bGYH=IdzEm%7X!ANid=P`jLm4H6Q$>-Q)*&+C>>dMuXsVT${D1+F9Fg?YOZ4C?kLiEzjWxuEI zHd>>zG=pZHfjf?P5eoTp^>BSO)3rG9Bkg-;C*;~oSkzIG85aO4TA)a>vkU-$n0 zKYjOq{`o(@^WlHYJhcD$^LNJC$4>bVOW^i9*x!}DKhporFa4i;AC>AqIn_b+NTuL8 zy|>K(c3l9~$$>HgaL`#)SwQq^Sr6wdwNMk6YOzjDm-VZ@1rVwDnts4aA$=O(5Dxr+ zwG6-jkaPL?A%K|t{nQqZS|_2W9wOc;ysD6DpdU7Tv3=LwYIPQXZ7*H2HlM>?PL~qn zG19T%6^J7`-F;?`u0z~PQBTW5^O}8U^bX}+llpAZsBAHMC;@DXrkjK?E}sc15N*11 zD#>$=W$(RGxvP43|>+Q#@ej-^flu`mN@r1BS<$N z&V|RIZD!_Ky%s2mT@XQUftDhsM1q|5Lpu}X(`3kT`6H}K<1XK0sny|3D=qfa(ns$G zYj37j&zK%JLPl_sz4B-oWlqRG2NFPy;2Ba>2Nn}F=w48uHcoX5Ac*vB4pM`$F*Hxs zlKS)kq(Xz?XdF?j4GJF*`dwK9(_a=fsi3>i-cPO?e1FYTl@{2{k;3uu|%qO2Lt%oF| zqq^Qjb2D3=8P6K-J3hJ&xHSsvm~`dX6a^i9ofi9Phs}YO(PrYAzh{YS3xn!5v69ao zBmkRix5NIS0h5s0+sXBTC8*As3hK8Pmre0Zh?96fFmt~9;WVC%4R#013BT90CsjW| zP3}?uHQkdl*|m#f<`l5U&38}j{)LA7!&4=>*sl`3nZ6;@{?0*@XR=6QjdWJ@9t5?C z+z0*ZF)pKe!`G_Z{jB1dxo{6J#rf=Zh0PJ+a%Y9Sc7=<^gxv z%#f2I?n3tl-5?R{5<;fZ1eZEh&M_GwTE|aExp7sq!ada-xwc&}+4(`11<*;tN&%*1 zasN~N1$Rd7UXqh%_6ppX7Go%cPPi#bwqs5@KD}Eh*pFIz#3@!Rr8Q}-Unh0;1F5wn zFWYgwZPlnF?uqZ{8EpSqpz%0pX46kho^``1wyR4rs8Hl18I<3hmIQ=$XSQ4$)BT?9 zYDk}iFMSGg;b^A6A1Qr$Izn6W?38_Xsw)UQkO^8)XpLTJ8zZ&ZGfG6?Udqo^r#qqe zWw209i1qy38mxunA@QRs@9B5F0>88JZS#)!lW%wXaCb8v`h1HV-k0cA1pJWebyv(x zi?c?*nrt`g>3WaZh>Q!Cnax|u3&ZMZ!*np+^&U+g!{nQA{sWqrMT+;%W|Ez9=>)rz zk#AZ*<|FO^Pwi;>f>|ffkgOo?A~jNe`TzRmg_#Rk4d|Y1$AT@N|Epl=%nbm~r0nl} z|Bp)F+h1_SX575O%5bTCyroXY=Pp23H?It{^j{a7E4l7F6Wt}ZO<+koibQ>J=2Fr+ zz-HUF6e3E@S-C9i>ouEYm8XNnIPLGnm$ryS9r8kZXe;2#v6TV2vo>n z1ca~^yl9yu3y?b)wvNq`anc^dkstD-wGtRH*K_6$+*wuq4u^Iyp?Lyl zyvQlnNBHneI(p#Q73`4o$YX=MBb4$-=5E|pe{i)AxIrMZ$h@H8&%y36iT}KR{)Zih z)SEcYs)om+dTe65%lIRc1H%W5c&>k)2Z_Llz@bL3j5o&}0|-4-7(%}KkCy*IZ0_Xw z-S?_WUUvbp4l|)r%xNEFdyi+|lWxZ6Ds$xx&2!~-uzE5bwCh66 z8;&S0SIad&q`vV=qVGC^+8Z&vlq-VxH{5QJ$et&EFS?k#u+bfw#=?M50xhoAi|ZCr z*S`36Rx>o>vtdM-Re5_jp(XmQJ@?7EFsOr2e_wA;zXkl@kAFh?r0nhg^QWlEVn?yzduFOXA_G^}+|-eBPA1yg52qQnIy;mH z!jwXBvcE+SOxIYw&KFcn5r@Fs&{+$s8!`MI z&}NZ%L5J-v;MmcmO9BrdN)d2dE5ogrO)j1( zST=mg-7U7_dD9m0GVLB(;KO?-_kTPHbwBnlU$jGdW6-N&yybrF3!jZdW>$p9s6BF( zDCRUGwdJ)vs#={EGglbD9?RxusxH@D8?IaJPbMEtU5o>NHEjFi$zHbxG2THyd)aic zSUVXv2N_1$yfwKh#`)BvVtO^Xagy6&cuVU$cQO;t@4X1Up)P&kwJtfNybbt*0DKUT zjjo3*ZW`{N#F~*ExOxGf)|HNl@uN_wP%&5$V^3;ZNgNm{1fm&cZ{4wNFZ#?fFlh|| z4;mJr8d5mQC)kMQ9R-$p2SGDbU9x$Rrqn;#i7B?=NIa8V{Kv<*bD8irG$!kp=HB1d zH~pH!M9vu|=fjEOyomXKjDA2`gI{ETU`~a7xnVFY6CCKUu~6|b9n2{3F;**kr&|S z^?*h0xzn?|(f5L)B;oNMs)q)ngK!5|Ja5xf1<1PIjFC`Vw^*5$%4Q2r=E8v!eCs>f_uh8P8OOkA^B&YSIUC=#?vvW$4E0f+qw__fY?6k1In}`_Z zL+1pA_tKO*V#4iR)8dpQBJ(#U54Z4(Ws;c@8as&FlK*>S?~1vPVlPnob_;pKX0vzJ z?|RNLmS&mj<>T*Fxk|S&X?QNh{q>VrWU5dp=n!eU&cuxO8iyccb=phomgt~JLXoIT z^qZhx};0yCuvR@>KFrqaiJ{U4xrpO2T=uePN3Q~SuWG-(}r0AZ(+>sgk} zHzHo=^z7mY%;UG{!Ki%L>{wNF$(|XhstEO&M{3H9%%pC2HW!zP$?)sw;s1W6F?<}j zVvutC?{?cVf<1_xU4O;zdFrNA@xIsM|NPhf(SGU^tCLd_dY`rp5+RC=u@kW4t@u+tV|#-Yku*M*Kp za)4KlYH2(C8S*sf_^Imt5zC1f5EoCLHhU1}a$BUji9a1uk|XPWq^W=dnLb83 z5J(}@Vx0bKJcYhA`}D8=r~9-t6WaVM5LSJ4vmJ6(s*@vj;+wq1gErD;jIg(7{rjui z7gEby@DjWEx{a-D{-Uc*!@!5%PDWn?VFv(<4H#3*4JT%onRaXG^6cPLmDEY;0&9)Af}(m>oV)u5C_17HLsc$=x|!_Q~?jx)erV1Y#us>R3#<_v)caACY3fnTi8 zUbM64v5&($WdF{ib7%0PKmlUiKtff-X3OkBljtyhf1HQq;ZJJkrK#XGn$=r;@70}c z>lEpIb`a%kD8hW`Ki=a%K^@+>8a`_1T-hwbdEmldaED}GEJ(t79GP%vZmcASWy1+X zK%q7lP~Lu>wPj1z;4n4F-}1x@1X3^KV)ov7#O&=K0towH3VN{yw^$|+oARa43?pVm zk==CW^0-^HUCa}*qbnI6_eIZ&A6=6i-5K7bQl4(9@wa=q1rroF|7X7vzWXbmYmffn z#A$@K$8}F2ZT>*@8E6f#(iX?A$Qn<(eU?|w?a-jBLu$b{h(&;03JBol9#Ec`#7W2L zR3K6tM_cG%p{{lSfDnmengNi>VN~_vJa0Spj~jfFY37>j!G|n7cYbD?=QhQF`h?N|a~x1g`F0ei7J#9ewHkDz9F0ph%c3|I zThj$ycCh*Jq+o?+t?p1!cbc+et5yd>{>ecLa{`7ARFf(*hSbv0fO3PjT>AiU1)?YZ z;hZ{D*%2s^C4{8{DUCCW3~Xc|AsJp!`6?n=n7Vr$%$6WirQ-6y1KUkMRJ3Fi?uvG9 zIf(4hRkLpQLgnYx#c+h~ry7RH+7O3nTB6PjKxgj2m(0$-@GxnWGtFd#ua*grwYo}! zLoXT04WWpn+micWhsUFC-x$Inn_0D8ZN*O_ zX;oQ2Jr$GgcC7UR>)91_D9L?fl4NVG$(n6*JLlA8bQ&-gPklu|8EdlM20)vOdFN~|Ng??Qad5Pa_)vm#AqTD@588T;)`T9u@?H`!tj9f!UN|B*Ks-7+-F&MG52XU6CRV0wH5D_HM z-A%hMe%RZ-bP=sp+M}1Uwe!~^^L@MM<~r|{=+OA_kav1eZ18d>)F%~e*tEP_Liuql zKgGTbOTFG`Vsw@-&b$uKKBoMk8Q)mWJEvu3^aZ*?Y>O*O6P&(hyb_c{!`kyRVNEl`Cf3yN{1siA%q6Epc&57dWr^!FQYCFXYAwI+aLr z9K|^ba&geog9blP{YGbzs$e$Mtmah6v z{rp$=yhq%%!pu9Xv~%AubUm`500O9 zc623eN-V#8v3M!P}#o@85QR)IKnJ3|UmFRC&T{wOCP|nLygY>G`8+ zKipY!E~Fpr-!P`^FN4pGG08{3K@kAL)03b7@>}<{J#<;KB>;E9+PruG6vbsFtV=6npj7onoHf80+*j|Q^*MJRkN`jc zd_lfs^I!c5zB^s_Hq>l+2(3jg)!2#osCnSlyEMSv1%J18k1tVh_`kiQ{dx1-^U>+! z;^QGQc+cq@kJ>)a7R*+{bD2_%D;gpu z&|R)d>W9@UCia_2$l5GQ!Dh%%zjg2&A>x6*XBXm+y2WHG)Vr2$Ytt4vZhqZj9VA}4 zc9>lJlB^teybfD)OlZ^d>tp5o{It({rkdv>?UJcwTj%f?8=%oeOYLK=(pqm>pKQeN z0%BJh&6Zi>N7Hm|;;^)7N3CBm6I~0jz&S&Uvvn@zANPX2B@hBbhI;jyh0ls#1hO|@ z`OKfy-*uylKvYsXeV0IazT0kSE)Hbt4asRAr~#<^Fd6~b0oMisv#e;PSI$F(;OFy&82bcxaWxXE6gof4DLwwbR{yi{ zSR(wgAG8}KIjluwofOScShZ(uS;sK^HLTsYz%J1;n2guC(e9?j{jgYK*D4Hsoy(WC zUZpm3b>+$za3cp(O&BUmhKfk+(+3gZ`YeH6{b#7(|F-pK{L=)S!Jug0G&)*{W9A-E z2LP`o^Lue?etF}3PO1d)7uaXWC4f*Xg=exNUh=2 zY;%;c#1Z@IWZcywQ~7af`n)h&Q*GmLi?EL9)|~h03%OMQ;PO<=W!b}^o744ywW|Vn zcCxmSVvs~NWARK^KYZkeW9OSf6f^U#L6SnHHrz!SYIqWi**+~FlTHS91$iLD%dQ0` zVjiUGK~LVo&KU4YgBk2ljvM5BovBQplaJ++l_Z1I!J)xi3c+nkU?f8aqm^uHmc)TZ zQ5~o?sBVif79U<43%P(S=`z(2`qrks;wCTPsfKKX_NHV~&@IEqxD}wXn`N)y0;MUE zDM7(>;UF4{oj1>1rL{JxQ%KL3Gu!Hl5=n%q9w5}s8EGhA0UCQI!i2^+1g1sqAnr5+ zxgfkGU&;jXeU)EOf~2LsUz@>3$gLO;$T6$AOKTVV_Y+*WI0?v$cS7R#2DMHvVZy~_0vUC2 zPq<&JOPFbF9|OSxfNlM$X8t4v16(cR)P2mv)|>a7bY+{pq(fEb z=@ry>Qfm<$jd@WEtonRjr7rcX2lJU2NVs&ozbki~#+qLpv;zm!{9EenUVVNLVx#5b z^pRh<8go^~$vk-;BWifNQis>9^9lu%^#CFru;}!eh5k}UN%e{@ei+vQtf%uk6|io!Y}Nc4mRRB{E?B(z=u;!L4^P_MGSe1>D{v31hhkbPpdOe zSh$b24_EN__5|6{ZMPP>6OYw-2)Ipk3+2T<<1&5{h}pr0n!|0wsq*MB7|VZr+Whg^KRT3hX~e2faJ{p!idCG;%f%rgj-&>h0a|zze>6(otQC87z@Btf+$SfMuJVR=bf)W{o78`N=V9c9&yE7E)XJ4O zzCt|v!H+0gpAk6w1_XssNTX|Orw#LNQ}CoMGb#J z=5czgdz$HA{MY~P3txT7hctA6{pkGLsCwnq&-##w2T=#l_uk2T=ess0;aj|%T(uPr zfbWzJ-WbNwd4dsVnsWPW^zE8!^R!x|Nio%^U0+?3JIH2y**R4miWBK#zj-%eLD7|(^rLfiDl z%YmmEbF^I}dlH=N^<}^9t0;fk43vNx(q9eqyUJpPIz=dnNhD|q6VODYKKA*2o8<_7N{&@L>Nj2UVZd(-^DvgIk*-=M+ zw9bbO^N>Cu9$Z)qr%BXG`0Hjln{X+P z#!_nSTJ-4k*yl^xW80n3xtl!v^iT3_&%v?xm9wE74 z8(c5Zw5i*$k!%H_g;dyO$npOElfQ6m#Q&dxHD#aBiYs64c(^ZU*UndGU%d9a|MvE8 z+2)Bg^K|L=zx9#Ulg8AX**0hrZN1hPk@fRODMA3Ln)5I%+zblm?KFFbJP}kGn zm`YU81$pJu^qqO9ttoX{nlcDWaSm84aFcbE)S$ZPUzBLMowv2?#4=ekncFp0YbV^P z6f&g^NAdP9D*JZ3o&^gY2dqh-`|#!A;4ix~lK1+mV?CNNx`ppmy5(X@s+-i#V;9YA z;qggx_7|@Q3{Tbat(?`_YvfEJU-fB&4m9eYzLb4+-+!PCl!w>9xb!Rk{2vd`QQsj} z-Pp>}iI;7F<5~QNsUw&5^t^2ALJww8+r^hZfB7H$3%|wXQd~I+hz)>-2jB|=7MY)9 z51kUz0Y+s7K_Wf%;B)e(iv1K{K|oB_^d-5p5CT7~P2X2Q+E6@nt= z1yit@t^vJ%7707hjJ1E%nYFo;S-9|_7N0ii{!g#=ZS|lZTs|*!BCZkC^9gDAH@of6eYU2Y(7JgNboB?hww*wW&+49E1x-IBaL=`Xa9)6>wQg=+RgMhU z4=J@OdmQP#K7^%nyU9Bl>5zAz)v2D{-!+=^&1l*vwP4%(e(P(-15!;^YxD5|Z8GGL zL&SZ}hB~#gajvJHI zgv*WgRJ&A%N2aA%U+7q?%k7(0CfzaGO#Weg@B+Iw3WFsRFm+t(7k#s_X4)cJ%aQ7$ z%du2ddJnMdyoC%KWAijR!pg&x5nO}{h--afe_X8)fD=iz*wL6z8X{hZYe_RcGE+?G1vf&q8`@dwq_~7(jz8NCp;Z=@@ z?c_@R!;&{&JZg0y!D|YtJm!96MMh)qQfG${lU{GllXt@~wHb6Y+N;3<{ zP+poEj_=(--uP?tS!P2|-P>Ufj8Rv%6OF9?EW~@zWXOkiCXG3bX?6OY%WI!S>VuN) zR&rdbJ<`zZgJK{22U-xXK-DNv{jj66=wxAbUTkiU37cUx9MZW|DIaINzV(0c3)il_ z_^-bE?(dtq;yGY!M!$IL=f1H802jKz-oYa=t-ku3YZk1LhSa}${Ndkz_qRi#e;i9Z z(A;`^n?&~hyL}D|F|T9+zG@?UjK{$z-B{QUBHDdD3lefNxHcBhnB% z^|6ah7kwwPb$W@6T)G+m;qAvQdc~eQI_&cv_Ey|2j7M(P-5y}`MyTn-tsR{jAK5!o zK^~RSg}6<9qw=ATZuII!H* zpQTmip)(Iu&9M^jC*$g8CZg?1@}dB1=KBZT=bjxFqqv{e=PUubHY&2YSZ!SQG5t+T zrEK8rS?O%~dhVR-)g4j3s0UKc7g+r%Xyb2qe@%3bhR4Uqxl%#Nt2Uzu7IqZaQAJi= zTw~A75ZP_>loJ4_{gd|itpt(Qo)@1{CXEl>{EbLH|8c}ZnV8@SWQ3#VNK>#~hfO~u zmm|{tM@RCU;(Q#}fmFj@q+WC-Pe1BixEk+4-8X406ik<8AUI(f3|xL!EQVC#-sK9`HK072z_Ir@|c;}dH>taYc)FpA$Fm* zyq$T-I2tEX$mF<3J)CyObHhO)`WsQp6DVf-jO}X`-^_2Qz+aYbucpl{p0s86?WSwr zZG6|CZwGHClq&-$%d?MCyR^0#&ZUx}c>vOKi`_xjf-=LbHRI~M=(9E->u;>E%}!-V z^M{w>?&r0!!JjRPOcmsDix0*jt9YP~eYJUQl;_SGUQ@S?ZCXdYrFHm8>iGPaA*aW4 zdNk&-KATLT0pHTRxBX{JmZk3<8<&rjPuFU*BvOdVq9Y98?Qq3LnahVEv+p7)K9ic2 zrDe*yf@a<|-l=jyr`@B`kjYh>GT%H*Yq|&#&Xf4h{pQp3BN1WjtLsm5(eDj&~c)DkgPBP$|n!Pn}$6i(^zzZLmJ~rafCZDdW7gRqu4GcK16z$Wk+6O;AAV8&cFH?_QGgiZI2i|nu2>jot=4sc5+R3X`%c*~B z+?rUK5FPBu@8QaE&M^W2VAAm^=n!H3cGJ01hCfQ9c2!Nw9C-zgnO4 zFR@{eZf%v!PEnY(5__6p!|ndDNh%#79*K{ztp}uc*#bEm-MSqfq&f0})n_b8`AlVD zm^k)7<+*Ek^{Lr|rwj+LhA)h&yh3#oC|WZNygX5HS8*KtYV!P4u*yn#5(q>4WyKLB zn_3xdEQ8+G3V}Xr!BB$=t)z=8t?@?K=sb;)SB?--vXa1L->dlDLhA!ByxMexHF+x; zq@Pac2-k!lo2+T*kl;fG(NVym)DKSi$8CrQ*@)Q9L#JW+de40PfeE|P_tpkLgcRbS zzl!d52NvH{L$CQ-yp+J1f~ma*G3ANK()Qk9sE$CZ$r!3+#?p=rWF5BRBR(@Nth5tI zwROZKtm|f;ufa;+_SnEMwm4Oo5Hj8+8P9s_{E$iFo}tC2md|(AW7kqpiu(f4{Tr2qQqh0q^5XHiU+P<8W1(D4zbXMvh&4vVUF$SXS<@kTXRWcE3Kp z3_&!=J!LGHmP_g}N%71hIOKYyn;k_x(YFd#e%O}-PMwD*wSk$PNHEO0Vvq`#)MBLz zeOZs~sRfpbaI6|*_}%Rin`EGl{nS!lpQv@qx_R82j3J+`%S)(QH*=d~rUy)c?jx=u zDB`F#!DiF#Jvhl+N5VZ>a*x5LB|2Jyd|TkCT8c%Cf^}@vMY9PWV=rUOxeAZblc4A3Q zIG(i`RtLwwo2_Mk@}M_6E?4qu9?=DOmfu|YM{AKtao4Xg&q))ywC0{c*9EWn`+t{y z^%|F{k0zyqKlcSWQ z*;g9_#n~Jm38Z>@_Vu^dT&2^BbLP=Ir&T{t%H<1{kxMN)=?lW7`qdW`m4%;PF6Z{c zL-QT9zh-5MJ2JCo0iF-C?Cj@!$87-P>lENTK003fqG_sft3mK<$xn~|yEdi{bF{d> z_gS<36D{4fLhEPaUNqEpAV%IYisyr!HOzV|u(XOK>7e-;x$f@V)uJ(kXKKOS;;DGM zv6I?=V5+(;iQP7*n@qL|R933qxb#+N`Q-)R(?VyMzvT@)ogv7Ve&b^G9&_(UfAQZq zWjV|Du4Dgd7XOP za0k&)N6jK%a*N=DwI;sbCc7ZyQM&y&zCc~0pYpW}?ew!==qm6hmlyusA8m0<7~Wg6 ztt-M1DnG7=r>O!~o9&|jXwuP%S_WoJRxmM0O4j%sZB^ma8WjhpeEv(*WC!352KMl< z4+eeg89d;cJ`_Cxb^17)E;~^-ZOI=zM267-_jEs^eW|o4n2LSZI$vHa`JR|l+UWWS z!UAMb9r(=eohlLTu-mSUQ~qqcVcT_B&3kz<85?FBD+x!{F9-)dPj~nbU|oR@Qagk6 z6>Yv~77Q>S>jJ$=aA>WWqO0l-KfK19j@zsQRt3>Mty#P)V>B3YOJ^MSn?d2MobY|w zzf>HK3aB(f-RMYV4|1b`X&^BSBzi0$xtb{M@N+(o>KO*R-iJiHp@Yeqm&2aNarfcy zaGbQ(J{*W9aJyC~&mSECRPrM01S?N%ovGWqQsh?m%(x2tLgHVJ`klh_?|y9G1WP+u z0!Di@B$LN%VKJ3FZPk_|mY`Y3JEjv+>VPA?Z$6FI=Rt>ep6&b?%0+Oreikr~n#1JA z*VlTZvlr2|&n*W3%g$e?ce597UK;=4i8|$R0_ee0X>7=lZ*D*y5K-jfm8t7vzIdh? zq|2+p&+C2L_R;UM@c^SH0=erHa8!A*CyAdZ<;|G!+X3r6Q|7BTf1PrGpL}y5e6;=h zLoo*X3w{Zy=V8TZoo)Y>Z@N?L(Ko2Q~>M3UT3fZ@_<9sb(Z6? z0f_TlhXF+Y|N67v`p^I2zXAY?0ZPz?jYZitHXYf-rJkn+G$d<`vbRMBBX_$M9B^sw zu|Zk2AqxyAwqQ%3C?;nN1q9oXxTi<8b(N`)E6Tt;OVBkRN25a# znmZ=-Ii*klUJ7@H9=;+>0*bFpnvf<%yu|U1Wvt%5=IiYlmH==}VUd<*#*}oj!9RuP z5P#1Ohe>$EsTlhzCGsKo(=#V#__Krkm%Mx#GS!WGqyvt6QyZP$ZtP^hM%U_*ayVxN zR=U+=cy8*d3E6sTJxom4s0PWLP!`SV@L2`9gunughbTOiItbmgb(FRA zRXFN4JbOmjB3bOp=(xi~L!|u)Q0Yt2IiU|Q9iSec^vfe%1nmfinpaL~ZaFv5)-2M9 zv3WX^)3VHa@EB7z-A6ngtP03v6gZn6Q7sQ8wg=L<#1CTd1aI_FqrtPQkhY!IUk=Wy z=iLnl@xTu}9|IOhX9svCXev&C0DQ`)Bc$ z{Vy46!Ti6ngzRfQzTsUifbtCK2kck2_$hK^05 zuW@1_!^i+nt@cV%#8tL+pn+3Y=@2gA_CN)gMchsbL=O806fo_Ax;=B%-+_GzUm2Z% z;Gewe>u0ThM8kIhmQP)`VS3K-iVxq?^b+5a`bwXsfLNCz5Kp7#f%deE~Jf_QwnNZPT zUJHWQWMK>4=W-a;2>fJnT8BDxJq=!i!p9c2t9Odwfin&C1jq(ggyd#?I4aHUzLHLV zxCPZ?S#Wg;f{((?OzFTpI_SpVs>f|Vn|Slj%SDv;5AB^w-d+vJ5a#QzeTQv(S`VlX zfB)aePVh&^>0i0>!Y}>n->7zh`ULps;Lg{*w%Fy@K@BK*fYkr^+rRtdFFe$O%~2$} zWve^(&fV$->59uWtj zqYHbxHS+aKLDN&_W()tTldb!K@cN|F8qn&ReZ`LRg0p6Gi+2iu?CjpZHL4dI*zU@mz)gGakwwKrHx%^g}&nRyq&y~ zcV62Ci{ISJlbqGj$q=&hbOYgz%VpzZ8MXV(8`n-#b2sAQ*jeS1-~YI#j5wU~cOCp9 zB19It&y*wG2bBPSzj^4}Q46ksTEi!Uu3hXpaiHI2Xa`t})zO-L)i2-;s-Yqol(KZ1 zL+t91FEeApuz?G4Um8TjdZ{uigwKfAlYLmSrXWk`ZRyh|B;3ol?iB~}jcL>1K~K3P z_IE3bUw%~#HVpTIF9AC3FVKru*3KB4+Lu1(YV>fw$8*vuV8iJ+bOd!;o%34lF>bch z4rmoDR#OJ~j`zDW_*)Ykvf62d^E=Gk+I!cU#`zkqnvXsEwR`;*aG?K}zS_6oRt5L# zl^??hK6^S0{~(GRk%Bpba&r5QoFsx`0|K3 zayXVtR=Y$Z=evFPlF*TrdZ=CKaZ}v%Iq>lNd!c19U#tz*9e?m=`N7p9|5X6IWL=_% zMx4Tn=x4c?1-xVF4r+|IKTf=0d3WHt)$kqn_fM*Q=PF*!uDu@NtwoP#GWMCI`YT^h z*aw#mXUGd!?JWF@x0W6?S)g4nn=Y2+$4Q%|joMvRR;amm`D;-2U23}b_@ViQuNlwl zeU&dBUm`aq(6%ritoU-1R_xP&9s$R*Y2Q6+6bidSb@$GXkEeiuc`(r2n+Nj8i!R~r zsBHxY#O8PaI)`P^od{bb9-Ic^>$K&_cgOzeM(hn2!tL!~^jiOw25V*g>(=TYxkf%g zP6DA%)cs$z=s61@z`>6Uj>^K77kaUGDlY3P_{Ft(x!fMhk3Jbxj!#ZE<>Dt})i`Gh zel9+q=u3=;O+oGApFOz~Z07oq8!ksls(_haU79Dc%j*jr|E^L`_rBHc@%)}CH13>M zmSs6R%J6~;5Nz;`H@^JY3riQjDyV^CZ)C((wzcrTlX;rqX+njPge?f|U zA`wwiK)S<|ZUjMu$~-fmdIT4E8Yr({2+F&x3O?=2$*ZdOG-^wo$Sfg^gGHCA}yHX>DD>Id$(W|oFfzlu99)#Xv8Sz4Kb=S`~7 zF^)6x($+=MLHrv`78Of$ecLGu5|DS7^j{+d0Bs z!kLm)?B+dXP@Ex2$D1_kzvwGEE51I_heoFOyj-uAoPD*c1I6*kWWU}c5yJ$)nYMFK zZx8%nAJxZxEqb}0SYs!1pgnh|`&Y(KBKzQRFuG+Oud0U_TT5ERF&Gf89I z9--2+uCbUNUEY+6j=rXJgX*W!4PWNG6S9erWJUnXY2BV+MU#!hfD^^OKKuBzMi)gG zw^?ki1~CfmgZ0p3C+!(|`#^?}y0uZ*dr^QNH|k)O8-Gse{dw-e4d)W?Ks%M;q2%pP zK~(p++x7M_hVU3}m!(na*PJAnrGU`BY4%2ac=}m&Ip1wmtohA;=!D`4E0v9yVwF-? zfG6n1rrIP%JyIF)=5fnqLrNAuJE=f;2pv^f$58Ecjf$Ih_!dgt^UJC|W1Q+-kaTrIVe ztiggyD?V|rGlT5v*h(WXNe=(F3sXr*VcTb**WT96sH>4Nn^o9$X0KeFH-wLs1xvxuuAvVaBeG)9L(M@l z&Z)Wn1YtHBm!z0fILJ<9XW|4&C6S1b^K(Kk#~D#)2$Wg_G}4Rn&e3(zn|<2uEdhSB z*62AXYdG9N6iJVC8`R)3=1$1+r~~ph4O7$m6H+5yR8S7 znxW6+t0mBW`2L^sov+Tg=dCO}@oleGlFuBYey4Wz)whPBAB`qKKUNdR;&*3oUb^y2 zJx5v~qG8`hP0@omFJ5~4#aGXoceufA+&<_(Y+czfp3+W=*2d*e;!ISeZzNrG2sXAG zAM19}wS-V6fWf#YcmBsZWd}b0CPDx*0KE4nfADYq^`FG!(Ti*PSvj~+a{fVa>zZ&J zntkmBSG3Zr?(DTx9B$V7P_qI!?Gib<(T>`1zr*YnoY!@tFZc%$uu8R~o!6!|Cxrv+ zw5?wI%cmnh*m3uOuE`x^Um9DE{rh{(9t1=oOBSuw(hv*06z(1oey?)%&1AOm=xpEP z!jQCQaB*X&DJc5L*Xf9sKtb3RkB+Z&ZpkZO4i=pVYw!*HnT=m8A6&D27M**33kg0S zX_Zet-tn3J55En#OK|Pr88zcC-lY~hA3ynY`aN>!=XUEyVaeOm&dtt*Yo?@L!t!Yj z>R6_@VwZn5mQ9q^gtaW{_aBi zqIN^Ueth&)3qb(lxKVHQXa#KA|LLuN(pQFh4!EU;IsiE$2d@D+&5YedWGbls@*>Q~ zt*?A}iU|c(yUElF{e7`(yOs-eg9p!!1*|tPNp`JD(&yJupB_LA&J9jIn4sT<{XTRJUzfihayG-B_SgW zV*t@in6UtwXkiU`OuTej=~7yrPSauM!Cu?6LU$ab>4|1@0^wFe#1s6C5Z=x0$LvHN z^smJA4B<-EMY4^>g>f(C2EMr`<&*d=vrlzA8*8uXRI0 zlfSUEsJbg>>5HRi&$^A%72ayeRPRSpiU0VM=TB#Oz??|=r5OBC2fTI;&thztFfJRL`(RzQc40lJ15h<~U zNVN->uSDOn-haye%oqRZC%3=z&tCodPdS4B{`cP3Cdh0TaD7QCX~w>mjcW@gz=&X@ zN8qKrW9S*0TkNyuImpeXeC=WrAYc7_5?DyNTetM5z!cz5oS@;Wo=sFx&7LortXhthF{!i{>asNU$J>=&oDZfX!KK_ z#OAy=IDh38X8P-l0sz2>L;<@@P6Xr`(qCspaM3k1?0b?!GNZ=FMZ@YrJw-P|06hS( z5ZExO8P&%HaN-7IH53ALv)hO%rZgkHEiL>=-I~#%b9wV>WN6Du6_YgX*zWoJKy^~_ z2G+6_673_p!{HaXJ5x5(w*JP2Qp05TmIL%ub*I%^w+2#}ij4th?FO=V+#oZ-M;;54 zH9ZyYS`qI|kdi6&&|&tPBzMEXTUq~^%qZ;BjJCm~Y)-a#VsYP?hT0#S|DHbH;I_h& z3xmyI0~*jTv-_VM1f0%R*q;-`=0m5&nitt9a`5_vWYv`SY+u2QEwbtEn`*ExoX@ut zP=uqhSz!XY2{nb75~@5L(2BL~a00|^`Ml-8DHZUU5uiwa3`rNeZ32BUi&p$~)rbvp zqG5^}-AIMx*4x7&1O8|X&_||#JJXUB0GXqflvq#huT6P!g%B$pIOmC7`rfPdE1#HF z`^-7k@* z`a;{>duy)UqvB~I2g)&fubb0`^7F6*SCo&|D!~pMnGj2vqx6PDFP_57t7X$5Do3sJ z9W`vRo^-@8Ho^dvG;1Tt#&dIRUmAhLBG*+GEX$k&jGP>Oy6V-KCtQlPpff(dwkzT1 zv@tzr+MU>MZXK$#fCgH_YCr{dSU!wF%;>SzVZS#X%m+jv!>XnSfEM(OC8I3T8e+lP zg1N^~dHds68qPn}4ILQ8oHkRYkD9Dz?o8{$kxaCC%$22pT*(aawQ7?UK&s%8 zr@*b#q1(w;NH6(giS?nQV2?S`;0tF@w`w;M(>3t@h2W)!yR8YG)8v(=MP-I^!e4B| zI$&;CQFl}+5S`WVT-&Sa=XqF~8FB6T5LEMf2Kkd!i}idaB%$2EUdLcN&JEwk${Qk$ z7l`cC(t6R(!-A&{D1`pBoHaW`OOFN*Nh6Svg067q#B8V_AXrV_$sBN)*>iUyldrAz zdLOzJ?=Ue{rNwI}QvY;nAJJCM+ou;M>}Xv%axL7;6rYUbXuzxC1SmTb`Sw#luZYOy0AF0T!v&^*=)|q8@G)`< zRPS=xTgXcflBR1yY5S-FR%gML^LY>K?Ti|B?}XE1W`6R@5TQ=Rh>f>fo$Zs?ysqPe zE^32liZev51kBI<)GvMX`(0Dtc=%$;HqZ+CU1l6!URj&a=!LaK&yPD96!$YuzGiuK z3Nj&2^k-ju4|ozO*EQ+ie(f7)F%ECaA`&;(q*45xIo|dD`Hr_Y_c#S#{(rvoYR0p^ zVK=51m9ML3t`GM*^0?isJpAX2Zr^hBqKoiy67Z)F{`~Lu@0owf|0OutmqguE{buTM zoNp(T&1+Hkze9aX4h|*m-#-zcUcXJ47TEq0`5DnlayS!wrP^V)s(HbKy)x?Q(-9fm zmFsWv8|8&(@yfGcYYpLJsa%FpAQ-R(-% zl9i>JscWkFOU}nHY8KK?Ss>4)k?jZ0taKLg9Cg0!Eqny-h20A-Y$p2h-Q`+L{}%8a z*MkV@eEAD&54PX_Czj&^IK2Gs#{;CkMbPywdD!;Up?%Bj>)t@iO_7eJ z;i9;t4YKm9dhx^~_P+Vhe#?KCZz}5F7e*sXl?J1|?K^bOhA~e=>DduXCuXaa#|7Lw zCbK0j(%-B9|8DQt)Mf=6Ikz=K!CSq_E2_D8Qzv=|K0C@c5Puh~(h^_sCb(M7pbaH1 z1UkeZ5CsoJ6(Pse*0Rpm1kkuv0B)mJuPoMsW!h$s4mz;?2wWw+TzkLAEDOi|`ZPG2 z1^k4{LMvnB-ioPqZvzT$d(EN2Ny$L&;ys^H1`XOijo|>-X4(_Xx=Ie6Bi zIM@9=yG$XS;u~bhVuqb7v+KP50H6bNs=xqH?@$_=F2&H3PX{dw0V;saK!#g&ZW3T<=YSZEr$*)!V6x}6%<`14 zFhLs0=GMV-tW??eAV<~X)VhD>nXdaL&P@~c4hPJ(&HO|O$>`WiL0;#CNPDE^01yl6 zU7##D^3$ex37%07hlHb@2czY_m<%jJN4#aeh2pO|`h{i(DplSlp&k`K#7t2eFfyyP zUMpx~7zGru_MYGI5=FxY(Oe`|3at#<19vmpwg~2}YlCEXCjYeCFr0w-xRCC!@t{1E zXKWGngI+q>)9k!YrHn`AhxYJwqiPr=An7iQ25)E&8;H5Tm$!blIXReM(Aoigj120L*6R!+==?ez-};`;0912Bw*?8jJVqrR`$Kep+~&qihGQn z4*lJ3-YF?&n^*K5vd-j;d|IT4%C4nFH-NT) zyaf&mpQuPsK3;U9FhUT1X>AHs2qp_M}g07FvO0X*RZ8jzMG%zVK!5v-l zXvO}adV*NKU=2`nXo4$yz0unbCdn6uS}S;to?Uf03)Z>(4+YPw-jXz@OlO9?;ZW7b z4~)ch8@7Hc@Grmr@eUaQ;)tMItBWPQ9vJ-WE%L0KJNnU1?D|c(QJ|JR4w^duiL`&O z`R0F|$L_u!TsZ$h6S&s%(;G$cPH}2SdiWY^(L9KmV1-tn62=1;Y$51P*$4pZwP!x<(@_i>KDcWKl$l?Nwv~po@{uztrLj zZDd3pm}rSdf&vwpVdNr9y+3xzb1p)~dCqi|3)*#V!FQu%2($bXq@qHE7X~(j|m5UmgduE5Np9Yef?%a zkr$he#7~!v{M4y_c6e|pRXRAg1*x)&gHFtf@c$qsSLR#(4h!Wd!gXjpwxs9RrO{f8S4 zBy}Sl7RRb)^Dh~-W0!&-t|XuQvR@kg2k%9{$l?a>x>pxKrZ+SR$!+UrcgWS7V{72W zvl5B=?;>ZFyW7M1aP~cUdoZJ753Z;QpL{Z}yftPXnG-HJ6|dM=M&Ia8N|tb(xsZ9` zyhK=|JrSjcN>9-_fEArx6JzkXLc1*rFksLof02OcGV8nW^FeP*-qUfe zEhE?YTpx&Wwt_XF6TBdV{hZcF`#^k}{>nx6&)s;N_$!SMKJc#&veGc0ZGQf#X!jre z`D!wp#aqzh*Du}la1);t&7eA8xf$Ickq5<0$a`cwqk7Yc!>`bUw^FlM>c8cMZfWza zjn&w}Q8!Xm?%K{3#aAqqgN#K6U3Ot-!bO_XDmh+?@f6&^a>igu@IFZ{svNWyL0hFV zz|3FM1COWuQojG~&7ii_kndPOGeY9%@c{e$&Yqy}k}-W-8CP>w*gMHi#kiIt?x))+qn8)m4CU?9DmBdR{VhuUg2Rk ze5#wYqc#;cfM9kqGJq)JD=~wFrSPq9eD^Y%Z-f$=`75m-3)z`-?nt`65bAg)1kq^P z^yqz0PC6Dgs3-H^dW|x9NwzFgJ-`(Nnx$-SVk{Yo_JnuNyMfwa!MdEdJ#8_D59A~l zJbl`)pOexcH?HD4>wqI}okzJ!kJrZ4PxzTXWp^nFURw^SLk zH#L`MM8R$$=Q$1xTOPY&qQSb{lEY}fqRuf*BA4pj3R)#)(wT&&)_tNmIutJlj42z2 zFpdxt3} zcoa>MdJ7RJ0QHAn#y&1QA?N06GcGlMpEMkO`qi(-E@W}g<_dUVxlp5Q?JDVY(%M4L z`h0%t4e5)$5jsSsU}B(b;?&T~x34jLt;+*eHn%?E?!Lkh<&oABl~_0o0q0$6-ZId& zx~3SSFV~!hb5$s`~+Vr~zlyTxzDAfh|x@9Zg2wBuUu%AIK{KU(9 zFKfAb8U+m0hyxbuVK6E=d5CkVBlX5&zx&8iRmdI|nwOA0%mWQ991AIjnNF;1nhA?? zZXWux3>>aaf{Kdd!fu~9vZzkWNjY${k~lXfb*`f*%2_XAn?a4i{MZY60{F2(q}&LJ z5ZGk64>H$REe9UJcaf6EUPbtM$bT`hU>1{m<0#ii&FloZYl2}L9hOH|d@1_{+^W^J zw;jG*`!CWPSFUO_PBLbb zjr0Bz0%aE`=_)hwzZ8vcw=N(rI=IwA-WOgKec6Z`UucX5gv(UVjrvuHaeDT2H8T9M zle)pDIkYJVC_%POz_hQf$EWRWCmGZn6`L)7(WTGrKgWbug7!L6_evhZhwo64GbzjM{{=9PCrLaRR%vPBr0Ky_g9L0BQEA zRT!yslQmGl!dT|Cf9RlM4^~^@o*jc>|8AP|D#cNAkQ?F-8m!^?LeZl6X#K>qxM8!1 zma4^LBD+K-To-6o2W9_cYf@OWiSbIJ&5zm6w31v;Xbrh2SRK3}M!>O&%V2gs-#g&f zh|GaUGv2vuyEREn60wGK`t4(<>TS6!a}9ty`9quY%N=PFN7yP|i$l|4WY;9thepqk zHLQj@u6mV|=Y1;>9;Pq-&ETCsYJy)A(8e<$czG#n?6{}UPW_h-3dr01=2vH}<6m+O zVR*Tj(_Gkl@4j|5pv*~+fAlC1tXmB$>KA`1)Sky)3DM(JN7`ggAMqdAH~)lx@XB$h2Ub?Lb9+O9340Oe2Jd07B%5R8=MVD$>G|^@p-@6;tGilX{ctXF(qUw0mCk@I+XM&dLk);CT429iOg#)_I}M ze`n_w50nbw86vO^>+QoUc}y2(!ONSDFWJ8E_)h$-#*w*`Ey^Fi*PM+i*v0hXHOlXD zj=KYmz12coq(P;@7sFx8NfO?WRGccYw1=}_YeG=zXO5cpV9cQ6D*|I0Hb=~HJ)xM^ zMCRpjn!D;%$y^A$dT>*TM|y6^QfxTVajgXvf9xHivm@c`LCfKS=&J0oEFRWnNA`dWID)_Y^at#DbW}KQ8Ln=CbG7ZFVL3$S z2adY$0KW*fK_s3MS5%g)5Q*ad!K5msdt!d*LI zPk4;jB1~<_d)9!eU@o_HXx|YzmKA`?K_@{E(7R~2foj1f*&u|oyzB=OHUEdTXOhv!;va|6 zcPtARZ+0(_!s$7YVXTgfaj_)~!~IWxqTTrVWU+={kT-vOPT}6L1$)GCk#}7IKO6TF zk7}c6V9zMg@AxJb$5Ic7Jm>eW9zV!il1`9Dzmo188oghf$k4JHI^npC<`~CUCx2`hW$_N-KO1MSC`;Pf*cR8>AotRGT4?C3?k z+{QM;;)j0fnn{JPKoWwn{lVk?tz$auIIX;}d2lD^?FyBCAjD!alUNAsPmxS;5ghtM zH`noqW6`bs6CJqUn%usu=L;XdkX~HO5I)DdX94V>y4?nxi+UmdqvA)rHq3oQUnKti zzy5o_xO{p29r|yM0V7=VAU2nC+T>;OY8bDRBr$}l<)V|c^ni{kd0IO}BzAfC;iD<; zngPbAneXd0r^7yiMu3wItLV6^`zk`;>FY39vLxnl2#^86&#M;^M9MZDL6#N;fMQL* zqf;9*0*jQjymG#7cj!js>}dM+bg6jZ;^1jL=7|waoW4Z!qe@D3o(bo8V#BECahQ)V zvmO4$>Y1~KV(p-2zk;#(F7ANy*-pqMTkAjxA3Azz%$7}vmImb+S?`N&TE!@gBUY;J zCv^*xC`CBO-sj_UYb5L%lduNuc$G^veUkP9eV(K-n=N2EdzK0$s1vNA(Mh76Ys5~f zwV8E?`=slS^-dyO=$u+|UOUO>{iHIsfmcs1IyWtojzO?e*+PNl&ec!Xx@%2DW&(RLEbOx9F`r%{HuEx@=_U6@k)^8K3_d;#qI+k zI~Uo+QYv}?0RR9=L_t(|>;$5NT6n z*R({(yyqCWMNj0_{6X`{l->Lau`5y;xzEfKutTV#MC3>>z~;2i{#qg1aiv{ERkCXKy}*ARrk#OZs)BY zJzXQmLIaqY6P^q(7X@U4LPq;su1=hRwxw`mIx|Q#pRbkOpShsOROq_E&_zlW1Z=Ko z9M)AXa}Ac`{)>hf_LzrAFWT|S+@A@RzcLBFa9+f!boYsFXL3ql6Xegdfz(@@aTB62 zAy4wI4S87RslJ8H^a}_4sApvMP^+kx9eRwsn-h?^P^GW2^pWwyrO>wtI`1}KHe3Bs z=A7g^4mYx9Gv#bFyw$B;SlgodG#)uD#=PQ)ZWPq{s3dDCt5R9JUvgJC1% z=@(a`{dfZo5j!F`+F>g`da3Op1sJ1z@}g9F(ERX?vn-i<$c zx&3RgzZ2aO>-AsRW09ZCeJAm>>0C>=8b0G+HJ-iY`K7=3P321cuLS2+3(#C3gg;X! z>uHCm*wwtsvf-dc1iJyK;_84B*UCX8iFYH!sq;o(<^rm=PH zFiL{hq4mt8bhL_EkUNuVeFB9rTb-V<)U_AJ1A&oOKgPib3-y5GSvxL7^}?Y+mvgN- zr*vAm51>F={%6?l&D10H)?Y;Fe>C_F)BW)mYTLh>U9Jtd_HZUVLrkvYufDr%@1tKo zxmiITe{9d#Ua)V3ds{I4Nb`MXsurGnEZB(fT$DQ9@m^2)isvK{>HmyPYQoF?dU{|V zza@FW2JL)jadBYZUu>?@@tfa~nu67c&$Hzry9{HW_ClznI1W_FIX~^Zc=kz5#h-t_ z`g6{weIc{|7+p&UZQ|xys%ZC5dQxctk}9)5KnpjNpSQAelUAj2crn{fZ;wUu64`8O zo=6Zp!suhdV0dwWF`HAMATpUL*3?QtFKt+YkOg(Yure)+on8Y^rg4axXnT}o;cpMj zmaYxxdfFmXkhp7zO=kg&w$-YU{7YqsH9qK8Ju zQiJgc+16x6Qy-v+hp)fz*zF%kBle&B|J*C#r<*|ly}Y0XlGSc0^DadlcAp`;R(iIW zAHfzw%abI@-lkpj*KqlztDpU};=8utZQd8m$Q4Kbpz63|E2EKK{mHdAV?b9qv)57? zzgQ`aXstYWw6t==*VVylgSA}F5B`D~(g)Oq(_D^3a#f$F-2230Txutl`pVI;4Nwu8 zm3VfO<#K2KQyb?+e0U&cvz=8>+cEM2s-9LYuS~N$@Ar=T_yi)CGw$e_r5N#h^-^fA z5jMg=cewGTWy=?DzwUuQsRw68qud?z!1H`5Hv-B$6vsj7^h*icA{O>3d&b{bXz%<= zNbm}W3S!Zixe=RalTUx=XF~Gfyc zAj;2siWOzK50J8T+m??}-f`UOIvgd{3!_P%z)Nqs3tCt_=@O{be>Hg6hAj3Vb)-NM z69AQ9WEny(N-Pk6@JiDhoOws9MUoHMoSfd_PC|%(n}~;JcCENR72&asw1A0YuFs2} zn!d9xENHm zx|EU4gq*W#cTFwX(P>-S9JCO#&7YCznjue{I4_&_bAN56^HJRu3IL61e`XnS7RXA( zIz7Mu+MH}m2a{KLM?c{ZkfmX^tP&G(2tY~)K{_6mglb+dunO0;K{N$m7e z`6b?-f71GCfL&R3wT|nc<5fqztp6(VB0@%c9EOyb5Ps*}Bq+zW3fWUC(ewKPEC8P&}*KmuNM{MG29t6?UcmyEW1O8{j|$jF^{4kOxfRJS@(#8ZhTzt(9KSIX`|Nd z%UItY3bf}MeZ7fTButG+bGMV@J8g)V*Bs9(*MgSE_EHkGF{5Zj&ih9vxeE#yq26jBBjkGiynwM+;Wa(VH#)>7+!kU+UTOZn82mBX(QJi;qv8Z`i21)@lXL zLc@7r;yJTTZW~WeD{rjDsmT;?c>}|`876fEcA2mZSI?x{p7li4$G%KRz%^V8k ze86R1Pv(v*M9|NJumw%6NZU=sVv$F8IWzF>7iG$i+QNM3_`Rt-d%;mYzcp=gsfL5|j7; zuLHLMEv|>5LQ*?}@A+%W9S`Mv`d5kpy)x`iQRiL35SyuaT_x5auj`61e@c!V;&3m}utgW? z$AqL6kX3nlQUuUl`rS>-?yI-@JrwR%I=5nd2D3499&JW+~97cn2wa* z|FHbk6QTidLPPMn)i`fCL;FFOXbad=G_rFp{RVD}+1VpW*RoYJe|qk-xu$FziE4}4 z!-r!FJjWML0oDds04C__n`!Cij~syW9O@VUWK0G>-*H16ORZcSEJMM+NQ|`T2|pBG zvA^4T&u__`Oo61O%Y~Q!q>yowGXzz|JlGuM2Qw<2{_@=Ji?iSH{q}bM!DYz(%NE{O z4(t+NIT4p%%IqAk|HS>1NP7H|Kv-C!xla~0<<3c#9|q#{hz0=dWZo_*>`v0+ulA;= z?RaZJL4VbA$);l!w`5;)blL(z5>2Nivr&Q`}@CdK*`KsRoA zn_@B1aP+KI#MAJW{dCw?CF{(Pj92|H9NfIgBn(S=p1oOnznu%e635;XCgBNw+G#I_ z!u?|Ri>z=u5}ToLx-~M*ydHf$($1;<8Fosu65?Nucu3Uo8FRRFXp6mBzk8B-vtQ5> z>BE6L7}!A=;}j0~T%PgKLpq#}8kK6p=GkCzuBV+vOq(XT@3;C9vTuqGAL2a@HeRC0 z0XWR8YtSBtQneYKd0(i08I1HS2Xjbt$PBA)+B0$5R@8HFG&6ntvR}O5f`k!$8_DJeHOU&$tQ2h78%Pu9FnG_Kf6Z9>oS(VjV;^(lMa(m%diIB;JyW!wfM-z) z1Cpd{^2)kGl&Z~W1wLC6sdFRm8~BAdk1uX)o%h)mM^yt+I0%s^{jm#37h;uLQsW_%2^UI1AK4RGkGP+90?#BgKIu{JxEvTX zWEWG^%C{~d$30W#WRi519r{_$^&_7Y{($-evy{IJ_rr1!Z+SwHprDk1YBClMlX4o$17)oYK^;$uL{J zg-(fUpR`ccd^W7SYWG0C{pzQE&UIsPRiVUCZgKDu7A@uw8$~0}5bDc~ST`14B&s>f ztT`%m%IFEYA{l7IE1Z-iFru>#jr8F$a-DtK4V~+ofTfebDmUW+UgCI zMf_GZhHGNXU8saD?&lvrX~>8yiKp3qGB(|G_j}Lepewwk`s(9HuF1K(7rT&*m5wDx z#+=d-7U9@0e0}4!1p)vp-3P~c+eiQI>GSE}J6|XBtu=aD+Wv9YuJ*pQoXm#;M6vd( z&Vm0ZYDQ7&VE7r_iA(-X#J+T8QIMJwXF58nHfC%cF`%4uEhWUfo%76x8^vSw_{to^ zLAhT&3EHC8tO~&H>3grPApZ@nRA83~hd)-^zDjh%T>6o|7tI1u_EG)}b;J4T6mXP}YZ`vfo5Z z6l-KBQAfWO@#oy=-Q1Jq+zXMOT?}E#UuAj>5wR!4FnCgvT9)njm^a{(s3|K+qW&fymN;Klm1cltT2 zMXbSr`hIz9EI6O$;5Qc(HAfn_Zd|w45iS~P(4AUH8H&ouz$iC|CqI0IcXK@z0D+{? zEp5-~U&_-0gbi+6tW#}b6v033Q)oQfow|-#Jn!qiC((R_8Y?q9kP;+3;vuA zq(hNnWUq6mx#PpDF7|0Jwh#!_#IXqOOvtYZjq-``6B2e7;r$=xH(n1FoOjJMv2Z+f z+qVd5^U;nafh<9#!p|#M*xuHjMC6y7$#q3tinSj&-*sJo+{8A%WM8hl^n;_FuP|S6 z*2Z4fN504MP3GmwG>@v0d8TO@tnM%m7!ap&#-VntuRYm-BCXalFAgwLYe3t|{=wfe}!5*e0^-I+%2w zKQ9xVU@=tCH-M(qlzYzA`O4v0$~^0dVDEA1J@~2iFsQ!zPe@95W$*5jL{OcaU>Sdp zg9H<4ag-`JmAn`RI@N2YHmNcZio{%2Yz%y)cLIIuyxK_G7l@)}c!?DY-{q%Jp!Nh> z079SmYrN92Kfl@&_}W<;a$siH=g0L+lldATO`FJAaX9Rv-9xvY3=y5J%yg3SzTSMT+uFBFGP$6A~@fOG! zR=_F%7k$l_7#;E@z`b%f2ERh=Gy<3>7|C`NZ`P&bb+ogUp0l^geU@E>njy)(b^i9^ z=IPHQ->tR!q(Jz9B@}2-`LPjYNBG&m8>Y>S%#ns_#Oiz&7}B8AGbc}LD5ODntkJ+o zYAhB8Y-mIw>Biz{Xfo^QkdZVP@C)d!Yq&;TAv%)AWxw+*@;PC_k#5w7PwTW(A5<}t zGpZd_vZ*i9z1gNa#y#p9X=*XYh?V$bgdT3eIlm$7DJ0=krn1x{j3DZDVpntA@bGPD?1$kbM}7j9n`G|1Lu z)oXpF7wh-s=wx~#p@_Aow5RYI&pv=#i=yWOr4qbHx#^!BG@9gY`tkDfjhYpXxpP*Z z#dB}k$bku=SAfFifou8d{(zrJHBYx0orW)8?;bwDw7yFdiyCoCLezQNH{%#Lowd-% zXcH1(VJNptJJr*lRNnl5e-Y3+BE|$nu4T(j*qI7{fq7@?{=r!Aou+(#VY)s|bgjGK zE1x0KYn17AvS-50aY@MqRbZHh79re|aXc=rF+l#L#U{tydB3mfv7Qgu%e^`R85dX` zR8vf{QYjB6!C3T`D&}>r=(@3~9`g12 z+jE&!-h(9n#R9YXY>#w+sKYWCd#!Tc(wo{N&S$DHd$I0h_k9&Fs!`n*###{}^>VQK z@UyWZO*TB_5$%L8Rh_4<+T50}uCC3*PRK!`#O|j#eV+?nZ?SBGnDfo{0j@C`)rJ-; zX9<<6@`c`R!h`D4%vWA}^R2TkeADObNq7If#~03CoZed=ix1}=ul~Po#6r#}ppUjo z9}VJrK4>8cC_j6&e{XlEHmN4AvS*{sg=VOs{2{kLcG>I?{; z!l(9IU&Tzy(@GSB6v3rI z7Xtdd)-50|aEEuVEPLAT3GJS?ngXCjc&FuU<>)Dd1b|^*gdkXLp4*p3f&8mv_-yd{ z*!gjt18Z7u!AQ@i{dCz69TZ*w_UMA%AkVLSY4ew$;ac9{yTlK(Kf3LlJ%~-diqLoJ zLGY>-Y?p4GDw)UEb}Xn7g@1GP>hX?fKRP_%@;gJ-zgJjz^Gg037T^C8;sMeT9t^y) zqFB=-Yn=dsM`PiOuzDAls`%&h;f)|J^dXOJIu@Fq3E2Q095IP{OtW`(1jz(__rvtRNYPXEM* zxh-&C#N8n9!fahR>CoV$yNG8CvEb}*TyI_=@R$GY%d<~HBT6R~LWIWM(!3mrbu&oXyP*?{!GYh3cY#wwq|UWbPS{8Go1 zr23;z!gucnjiM}drK?H zpW!l~u{T8cs6vGuRcF}>yg79aQi1@^!^jju2gi5((=e%dELPJ4ao=~8008hz1C%LkM<#Z!K5ham?A_51v=;3LtWIP$ zij(^j;!4>sd8eykRTV(H5MR3F{j^F7O4c4{Y{ZZ&B=Ph^ahOaP$~I7@dh9O#)KQL$ z&g0tUEBJoVOEx?XUyGoc`}=r2)Hx!JS*0lW8I$GBK@CLH_F)@J_@=(#hMYh`wi#yxN;O2G#>K4W;F ziiZg!QGz^}KJ4~#1c_NAnQiY`w7aAQhdxN`tRGsBEfSOWPt)Tb$EcTU6O+1f@cwS} zH{5$99Iq4OQbB^bHHT8o$iBU`u2}4pj$${g9f6-}BTHv-(f#%pS73&%_cpATwGz(tJ z>Xq!H;h?Amqr}7-OeD_vHesIZZQaUTDvAm#El-qDqL@4Lvo=etRqGlMSvM>W)t}G^ zvj8m34W3G`I~LBNM}1^2+62mxAX$0_H`e;SRuP6%CJ$L=ain5t zvR6dHRmtGevu@M5O{#ss9Zo{A0CA76Val z6YkiCP(UB|AS)PSD*65^apJ`@W+|uxVYtsaWU_(b=Jk{-iW)_2;Y0H6>9lx zK8-9^fj7`K(>s`;s~fmlFovT+eE!QyKkJN6kA^1$-{Zsea>dZ*wBV{=(;5BG1Q5{rSpRhSx|yx&X?t;|Duv-0Iz|oq&EO{FK z?eM+O%G=Sx-u)p~^=z#iU(ZlPPWJ9O+b#u~ZUtpFk;*g@LG4QAMC!Igo$~%WHZ*^M2U;po8_HSGX9u0Qw?HV!#ws&(b z$De=t;7@+|nKYzd2Yogo4t{xDc`laC79nj6C$ncoFR$KmvYAsq8d`tWA z;*8NydlpA^ygL+6gb8%}YU9I?r=D+P{Tey`)c$nu`61Bz+R_cu_T4`4VQGJ_7ieCO z1TOdX9%g&Eq2y#eJ{u_}E;jQf!E6U(iNK`5Z_ZbqZVka$Z>ZQ(wih2iS9J!kZcFnarR?|q=<$;m(5XU6U)u77+!fAa=oc0MeBS$Nf#h<`mN?JQiz z^)tpQ`gN|wz@h!{v)G;4iaqJzlo%?8SPx{)`YV%S4 zL4&{kZ1m!U9*=7u5fLAlKZY&TCufjo=bU{=Z@UmmHIv=o3DIo=oD;-E6LrFfMli!3 zU7OYjz>T0g&vqEV3JSd88QB3KSn{cG>fBc*)K&+=D7R}YcBm)qTsmn2gI+B_BNdHd z;B;}-hfU%$%{%_?R}Y=%owBeHP--!vTG|NTe9=7w0=la*uV=b4fBMHQBJ(o&X5+Cr za_g0>&l!|@pQPvr&lr!Lduhq~@K^GAnA3Ip=7RgPUp;C*H~5f&<3_Hr|wvx4Xh9;5Iu}WZ)C*MAyi9sOv!fnaIQ- z*$#X6!Uz-*X$-ok)q(WD7kS;{Pj>pS|`>sis2K;GHgx;|9^^Yvc?HJReP2Fh(uktB>78dEOSX-sMwa;5YUE z`CYnkKaB@)t*fso2UlyayzkU99C9I!sie&_M=sj?bk)*EfM(%@%##7e6j(I!+H2pm zO#1QpMEE;kmy-hH(sr{nv{**FlRe131gO+-wCP>dZ;$HxjSpAqM*A5&g#Yw`4rTUB zEtArrQXcir#r#P2yez`T@BXLXd&TzZzdG&VN+|;G?Q7!+6}obHGO1b*ysb<1)(KAL zu8B*7QE%k%&=6?@R-ZQ`cIO`14!cxV#A+>+d3Oq)9YyFCNI zr;3XTUk11jz#HJ?WBXcC5{z!oV-gA=)B~)iEkazX1#FM;v|ARs4HuchEDf{?9wVHf zXG3jTW7!L&(P<#ptrpnVdQhKtd3I4Vv0E64X*M+jKKsSg8n2#6co3ty4~+R4@6=*j zkw%5%PmI|WTYKfMn_uwtr(3gm^mOj#CVd1YBi&xS!I*PIOU1`i&s;Vqf!#yMKNEp< zGvmwa43oE5jfKtF}wt+L>R+^d}nTMR6N2oXpyR&1rM`(uJX~MQI%RX}eg&_Yx9^s0N zx}DR7sMU8=UY2i^gW34begVKD1-uhMsLzsx+hyy${PWkG?$er_wsaP$@~PLCKjS6a zxoZU0SzFXi?CB#c8E{t%dsfrvWwagm(p>xG*mQ;R4ky$CsddvFS#g~|UH5&PH9tNb zhfIP9c(g1GXS)2t;quPxi|4X`rO$&}B`ZXnW_vQ45G&J$x_XiouP3wnpz4Fa*r&Fs zZOwfn#&tIo4)h)GY=pcQd*q_ot_d*9cn2Mh$OnQ8J*w}oFO@BI2giAca+QWAaTE*RU%NQg6L2;-bmXLWL1k|Q&boa_uH%2WY$1&NnIr&PD< zm~A}Ud)IzRLe~7aZaEoWT+~E#y*tx~CsA)(^@q+lU(*wHa@rTs z1`v)})9G@*DewFRm%3t4$hCkc*6sLg(Qyy9Vkm}%`qJP_k#$UgId3aG^fg=i%kZmr z+|TBGYCn5)WBJ#-EHS>Za&o@!7bRjAO8D*BPliwSTbAm$!t<9kJ{}EheDe~*GIxLQ zLFdOUI60B`$J|$y=hW08(f{b-zdQbyxmNZ(YI@7-=sfG(^u8RZE*F9GFrp2AMO;{e z*WRD}m-m0zJm;?`uG%xW?J7I^+5Jf!zjRZe<0DVP{j=v6Q)dZv+-yN7WHlWSp5JgS z8SXTanZ=rP52=2BRF8~9R>{%Xe4dhL`|)PvQ6jk>sB?`hWJ(8~#bke*&Vzh}cm-e@ zt0#AkCNrS4_RyN$?;GoetytJrLM?u|^wKx3`Tu|Ar!#=ydFP9<#OAlH;6Hz1hh^z) z$`we3k49>5oQtWhUXN#=YIg7PD9BOoo%KkmKqLNxGtROY>qtj05#&)*)e`{F!6$Ex zELquZ!_N>D=*r#z032WHc`VA*k?0l=Q)5l#n^5M(?n4BRS<(L({Lhg)iGQ)!AGOBG zlFM_`z8AKp3Y%X{{oltWPja+#FA)i1fA(MYRvLCS*%fj90NO5K0u`N4kNCz= zudTr89LDP^?^_B2C1}zgTCE+GgfU&v1lD%AS)M($4P%W)%N+yy3K(}PU|~^X6j$!3 z8pE%`e#$j>)msXzxR;(=OQ}ZsW(!6w5GEF!p(EsR1YhPFM+NK|P>RFKWI}d!i=L9T zzj(PFXaK=S*}i*biyE)6qbSJia^;#6(uB}D}<(T8rW1|siq?B6iXP|8Dv{^xGH8-k|wCRl> z4g3B<+GCTSV#Ub2ujh~TI6xv({}P*(6*k`+7|+N4@MW=a=9!`Ely}uryi*%G&2$d3 z!hQHeJ)IQz3e*o^%>4Gz7zz@;{=hIvo(L6FkB0|(*=at1|I^7~MV0)6rDR01`b~W@ z$oWJ35L&66)g0*ce?V>6-!-^4qCcUA_PUQStz5b%9P3hqPS_yS@LP~G3nNn9;XXHa zJ1n>ShrooA{+(5ub+5tW?quYKiKtuAW0yu}d8fy~RTp*6SoEoA85#3oZ8!j?9G$x> z2usktCWyM5MQI!|Ev7o^03Lm0q9i>L48X7fj^?mS&`vB+t{mw=Sg<2#eFSMKCDHAJ zca|exM|AjEZX685_h}_F5vwcK`Dh`3I?EAr*RJ()$NKu%n+<0~&PgQ1aVNrYrSGPt zXB;ap!)wGNLu#G|X0KR3-*PT@PslF?PFg!i<7SB9L3in}yx}&Uze6e~#F}2(KAO_= zip7~mR&kLAFvxU#9m0{9N3o#w*ru%?pSe9~m4tlPHM4&O-udiG z*b9QhW@)-J1L?krF7}seABkEo4*%NSNAsf|;eJ5=h*LRe8M4$C5o7n>g$)`euxscHrE z{*ujh0HB-uAi9vp8lE%vdNUz!T6%jk^Io6gHxb2T;X4EwVzaSly^ z^Xy#I6dV4DkDaSmi?O<>lAY72&mnW};TMDCW8Jz6R6ob)gt5HXwCi}Wz}%o?MJ!KO zos?aDB+1->f+u=xw)!#TOtjcu=wP0=)}#(-3BqR`>&kR;*qO8H`-YN;IBCEw0KLwo zGyNFF8O{l(4i@fbP66KGm|4-!Ax>YY)3T@N&3Xey+|8xUYhCG&$~A9 z+6?wscPyq93w7u7&tG=WjiIiLa&0}l*0tvc&sk;dy-tBnjBT={CGCT&XvojS^PXo5d6CeN^CrOy25sYccun(wcF9$Tb%4hxI zGz|d2*a@R>Eql}c zl88+(TR7gpfq)9AT7F}LZZs0C#{>oeUKMcLjDM&Dfe?_)QSA(HKz5eLo;*8>$1RPM zOv!3G8wXL#m6B3_ui<&yWfvXYrU|>|Eb6b$jLJ0+`O%H(lU(zLfT^!F{wUX!oWB{I zAG$AG+Yz_)R?DIfTt1xz>OZ;z zI*dUwwz)!0?gQ%9Q^aDoviOi=XLv&hbLzoqxzeClOyOJC3m|=iWwhGKWcw#$aO{C8 zIq&(&fByX^4QbV3JNWtc50w)FRgr~Yq|m(f^|!8m;RX^g0%HW10&|fZ(32nW)5PsyuBWWASwGt=UBDBMMxN zm}j~4ren0$A8RZ0PAm9>+~;#y2m2MDts{YQ3$Lr_+GV15^rB~3ecSSWo%Bd~yr%!` z`?;iH*huiG!?qJR@*{Jm4vhF$K!pSlW_<9UCuAAKjf;A=~z1 z8{umOWA-O#)&t|fssHer=zILa=wW^Ba;#XYtg7nM_Kc4kkL8s;1fa()K{b0re>D8a zn_C$7^F28aV?>AO&MNFUMs$Xgkdm;@ofS`Sy3;2x=^r$Q(#$$=r$`K6B5m6hH=fT4 zB%lHD2-zwbVjH&uP!2;?pKP~`n~SvZtOL2rOPTP+Uh3#_nff@CCd!S>Kl#e7#V^cz zyI7B%T{(rgg|Jil^Rn-FZ;PyL%S^4J?Y+86eV)?*1{CT+@4Zs_Tf%qM)rqF$#|Y^ zp<^A=-2z7wVG20MQ*&zIE?Ma>F^t)Yd`3rSA&VM|1&OcqZ#|cW9DGa zknm5ni$j^PjumgjHK=B-e8)Vyg`o$u(r(vPbiNJ)*BPe8DYGdh<-zV__*@7@Mztc| zCR=a-Pu+jVX>OOtN1iTv3_Kg=E5GP_rYDr^Wu#Grq9g@i)@D(28~Qm{^7n(S%oxUj zC_h)sxZjaR@cUaQe5o1so3(zkD=UV|A`FIHK4MF@INtiyW5wMCdm;jj2PU}LS^-9w zBZOg&<2o6HYXp$eIes8Nr-4~fZ83twG1Yb64FCgxZS=U22*d*&q(RgUNs$%ic3S;7 zEDB;306jFA756sqp6WOkYn0Rh<4qN~(ib1M=0X#q4I#1Y=7t=27LzT8S2OS0| z2cUAHDN>9?W0S!t3e=~1sse(SZ3DP z)UUzvJtnxeJJ@xwVc=NA!c&jBMosf|2!@wU^CXWzBn>XsFxcws$z^oN4sSr-U5atk z-A3TE_9)~>8phBCU-1Qg338jNKDM|1%quq??E#md^kw@fT3mFaj2OC&f|Hd zPG6|UXDM~=M>rbJh-B7kM6fV4l!{em&G+~)s$Bb7H0K(Ut4jy9qv#b=Jyg_aY;& zL>lmLo6gxwaUVM~G4o-j1@n-sT`BFJ0J8#_pBb!F=B8+E0vK~7dRGPsZ1&gXqenr4 z24!Qw*d)n0G`M+a*tE)i(HKc!-sfEa)kcwusSto=N6yJ01=}Bz72g7ZgFUGK*1oUw zX1n>PM)F(8_((c6#_70!tik1BefDX;98EH#lh*RpnVMDPvg)Dmf)zyczPru+{KEss zm)}^9C24bGo;VY9>qr3jsX?`i8(zIYE*I`idtVg;2PEXg`r?T*YE<0Gw8sfn?;rFc z^F4~B$ZqfAhQpV?hz#KaG6RshQU^G0>P8j*h|)6?a1vT_6%PmR=;6w;<)C$jZp{tf zR8|(;;{K`Q*qgT?v$}>9kBY3~M1q>rm;*w&+C$gXFMNG*ek6AP^ik#R$N$N{yy<3r zRPhW>2LwSKA9s`OX=}r4|LCOM-?@Waa#7@p*8zK4*zs<4Iq7uA z(B-36b{N|C)FFvF8@<w$2C1>9vR^nH$gkF31YcQ;_oUpfCnb}IA$wIyckwE-c z#^?0i;VED)1_m~n!AeXwbQ-~=vJt8rYGm3ttxp+?;(1n1{?(UxC7j{1=O@ofg!wO* zZa(y}dyjSj&kTCiwO&uRDO3Xi2HxMgQW&*= zNg#mkrAxK?)QjnDrG2>;nCiU&-|K~$`D*acZUAVG>ANx>qzO&Bl6|wUoddYC;^G8> z9IH-Et7DQ~>>=Z1kezhj5Nc-7tAsO?ZM|bJmntxZ3i=`g@3#ZaAQx_#l_}a9!s7Xyz=`&_yr7Zc9pNW&k&h0X0te z*~0PAVhgs1fe=`zE(hZ;4x0|I1dP96{*V9sZ^M8yjP|up41LBD@rEy++{yhR^hb@K z8Ya2e6Q1FWK1Zf1Y!j=lBGBwxL+9{%d0U-wwcF{{O7>d|vpN|E8QZM;SrmqiMrc-!F-nW$g1v^2CJ#eO2Ou!L^f%T{f9& z_1O6MTueJfC&xTDgs4}Mkks@rH%WRHD%e16;LDlPLD4!xrD>-2Lhq8aUy2Q|(~P|h zwdRRvpAt$Xi`{7g?n%UE8#d#&R_t>_alffuPhe-MgBZUg9-{0eLpxK6xOW6pz~0z= zX-0=5v+eAoJQydu__sZwXP*}Dhlc^vWu&>3KH@N&U<619!Uo-v>ujV|FBC@?iEy3sOJkKc6tZTD~eKJoL*f5{qp zrmFnz3&xe{NvdRPz7%oiveqcJT+h4y;MK4DziE}eq_r4pND*sb-RoX*$mbnB=#{)G zC>|l&C8qum$OLNt%OC#u2mk7KMgR)}lF3K_3;|X)!hnP0A^L^^Zv*2I?ggSXU<@Sv zK>qY4l;CJ|BK2}k5@;H0HhUe^d#q;YI=J_A*SN$^^8?p~v*m{ME)>B5GG077*3HZY zsFa-wU(yjk_RfSf7U#9#oEH#(0F)N?Xz8qdIYNx6@iU47s?;kMdr^f8lDt4Ps-W&> zJyYQ*fGs`g#;o>jq|730-S9+#a?X-@MIN3FR77CBubu67!s&*MTj@A@K5>!^SU&%^ zKJ7cUx#llHB{uT8bfp*e;k{2UroLs-x9jBa4)I0$IOheXM%3DzH~1VMFlf@<97!gPvt?&((eZwbK}`M7vxE%_rBL+qh;smO%Q1wgPtVy5t1toPdAr0yh%kXM*Dy6vh8IL&GgP}crl*PJy`YPjID)NxL((>7`5NAGyvi}*1HWSB1G@_ zOd#Wu(zlQOxqcYEdXj2&KFNfZdKE@@HHs8Uiy+|us`=`?)qgOtIc9o`qu^<+V~y%S zebrpcxjjVjdBX!{YL0Svstfw6tzRo8+TFr9VYxwep1pVQZ+?{TxNvDC%};w(VTmMq zW82u`bfTS7zUZ|8qcL1p^(lrlGurEoZ*45~a!um>`yXg-%sP+>)IBlriChmPOkDG5 zj;VCvmE;j(2pd8CQ%=V;!beOiJrV$1h^wRZIKTV zs3S+P7^b_`i9Tz!!M-sMGI(fuerJieF`%vE-Z|wZJMA&>&YQn^Exf>wN;I%`D(5k= zm;LnKz&5KltLM%A*hYqKc8@yfH!saW9E89^w?~&nvOsU8G%f0lHaUw%N)i(LlRx+! z&nBD!F7KTQevIF$NbgtwubIQpMfYN?%FkH*jqR=Vnx*<}B1Q%O>~tW`Tg|iG+(|NY zF5`=*Y@5AtbIDvt+g(bZ?>k!alVD%sCqdi%u2%(f`^cl><3Uzn*aYB}U?MOI+=X7C zjM+HY{WWlHe9hH5mNL>|78nxIt1yt(&IOom}Ti%=sCIm#}30t(emFpM075ZOgRL^6UvU{$p_fR9w;q-}3_Xgj zLz4?>3cnSpGtiPZZf1{16Spllk$M#k7+F_2YZiT%x?R^VY(43%`<^ewK&X#RJHgzcL+_%jE*>VtZXZ;83QlB{*QL z=51M9|L)6YpH=pc_LBk~wQ!Ie3f_)G*Jgh5Z~r4)e&yKMZ6qn9g{2bZtD!ZkA-!u~ z+NBl>fA&v*5wgX3mn}^@TlJ%A>#zUgOH2Nxk1Id9^x@3?vsOKX1!v-{U>z}@Z;J!degy2gt*9k00oc<0!BdCV&3h@CBS;>i zjb^tC29jh$HPQ_iW*Ic`0oYHD^kF!9b2{@Y3AS0)n&nfAY)RDpigk6&4RId^*P5m` zr@|;Z!a4%)Nm}`a6tTgM&?4xH3WJki8iU&FWQl}d$y)xB*fN=wrF&YT7b)@g%S3~}b zYGK71*{<)TW}Jh8g|9+h3{vQfOh9e4^#y=BoHgUJHhEDhdoU-Nq* zy#xLI|LsSs^8|jl7GEKhN8O8Av^TJYE$}?i$hMp;jk---;c_)+%2(a`oxk|$PyYE2 z0buL|_&%UI38#?(f)`K~9!6Amg=oVLl9BKlAm|+#aDYHB?u+JH8Vs!Kzz&e|0cPi2 zu6oa6ArO} zMKv*x$i;BYPS2Zdp;NgcCR;m2T0j?8oqUMcD@MlsFn+oP@n^hL`a1LcgO)upfG;A5z)0z;t2t9) z?OcFs77X`uTle(?8i)&0IdvZ8M+CxDv4b9+w5AkL(UJ)`yA^f=<5*P4)>J(;vQ+r` zv<*xFjo+hTHzweAt!)#Q#+!F{JL>f=eQc*(UG15z^{O6v80gXhy9h{mhj@UlIf3^B z9`ndYN6I$CjKI<()-(#fXu8<~+}_qXypw8$zh`*IB#=hb+VP|kirE(_wCR3`9WPZo z6Nj4fqf8U$*kQHgi1C{i@45G4uKKFhCkG^IZ$7|GZS^TR`AW*0ow!5ScfkvQNJ6pl zm}p!sd58dcNKx%ENVsQ#E{OHLRGaPjYf&ezQ%ZZ#m>XO_6uXS!L*nCJ&xA6%uLxQV zffHg)-*f}&mD^q-;OqVRciKl~vyTDj7$DAmba5%h_Am{_Cn0v>^)2skx#MDzQ;z4a zo=-+3_BgQWa@JoC{&qp{ATzMLAsTI>&s${boQO^<2kE)RST{ay@aOx2u_>%o%oaZ( zoOVnX+o(Id5T6JO1pcAM5jPe?_Huj*-Wj@6$q>;i&nSuJZ?vGjz(jdWp zDGsYrh&*2NIwzmCX^`-s39W^dYaM$P4E{#iVyk#YfKgnJP#3)+WdI>d9GlC5d>qFG zx`f!S%J@){hCSP7_Qqw?Re#bW0KkF3;sRJv);bKfWp{_g@G?N)ZYhKe7*FJx$s#piu#gG{HnfpP>*H@(i&@z5@U zT~O~g2aRVxB4+K6tDP&xMnBBKotgQs!dvgX0Qq9?6FuWtnEBpYc64s<{=YuVaPs+U zZEjN7nmA+lHD?sM9vdAsegZev-LX`7Gv~{G{#b^aV)ur9P;AqB=7#&iY*`QT6W(p} z0;NBLY*GJ=hf{J0+Q=WeU11Z0(K!snNu*%4tLU+{Pil5{X2_j)sjBLCB3US5gIe;a z&Cd`7OSYTSIcY-a;;HE2HP~NsQ)>H8FNcRPC079^^OV0bA?a2_0EC)D2Y$+tqSK*} z17EGfaG_WN&K}vi3$XiHBkYW-nKC%p^@CFn5Io@M?^xABcQ_0fzWdx$5CN=m259U9 zlq;n5td-SDn4r5)FAT;@hO4Fh? zP1k@S;}!93zyyZMs&{Kk_5iJYyMtzpTs^u-?o<;DkfWq%t6nr?R6@t-W(`xT@SGYq%%Dp+K8@GdlUh!y zJy3z@a#4Id)_P+M^;oj*pike3D*OJz*$IadJqK59MZx)1+od>AcnET@{?Lc8 zPh#YG-Vq>=wdcT<*RAD@=Fc$}TSRE)CuYFD3^LEn9$>LH-upjR&qsX_A}1D~`uyMg ze+w+agwPUwuw*sPkFFd?wmd*)B7n7J5@vDf(_)QD%acP% zVsOJCS$^zuB3}G*@8tc#EO2e$s{jH7*oAQe@I;sm{(KLV1Sz1O1Q2*Q`jKyU_4E=z zPyL;>%EaD|1TAd>*I3`EK4TFZD_?tFey=|rEdW%m>&GM1BXx51`O$&X8qyngig=j+ z1hguXpXa}fv${7_u8{E9c>3wxPn@KK@MK4=`O!Xa!lIRw>;zu6GjW|FI~9*N(Gy3M zqt~3+BLj8{@*ew4Q_I2tZB4=>;YG&oDp_}YqR;LG>O>gqR=RHvg##%cY-5*z~VGx{mGO(7zlpA)1*bza@{ijls;41I4X*| z1pss(VClAu5Ksr1dUXo8rd=mUecB6K_--3v$AMiYRp_J#2$GgN&hDhTz zHT|5HuCiivJRe$^4CP%?4-1Uu1kB46dNPe|dmF()No~`@#*1Jfv7d|DR#?Nz$0n}~ z<&MpqcHlIU1&gG|9z1sOg*-``H%1;A1reNz`l7WGJ z5|~WRMC&|VbNhegk(NGj<%WakO=8pv%huN1WL`Km0QZHfK}%zK0?SV);aLY%3C+5^ zo=>aCn9Yy7pwsgg5XqZQuT#jV>hJZY0e?!_Zye98($vIt%#p#!`Ti#>)Aaja!I8!N(HQ zQ5Bsd1O-d)n5~%s+ObRxZeRuCMmIeH5Bvr-EM>~G1Lu&H=007nzT|f``s=nty{UbB6HT^J1yAcWa46_kx*F^D4k=FjY zvND=JJzaVA)kWJb3|-J?au#18?g!3imtzMzuiOF)jKh0|qKop;y~jx{@a$IEtzG7f ziL2E_;|P{ime}CrlfEt0n6xQnOq`!Axi$|pst|@lO$xA%R}8u*6HV`EE!gXh7-*#Q z6hdl?uHnLSVEZ<1KXein1C^>VvD+*uD^|3=?}4wv$)d^0XH+!c{-0dQ+~543ON3fn zYc>f#>b71FrHTv}n3QngX0?Cru>pgu)4Bxhj+qg5&$%0lP?y-GRq})5t!-w7fUS}C z7UvC9Jir%gz)NA&2-b!rbmGkTnXK7g8rnNrE7z}B)~?!w5dyM~=KgMc#UTOgmnw+M za3^DO9*}|T1nY^b*P^BUJhw76@_%ax1Yxasr49Ui&bPtzYxRZ%dadxtteXBomGr`% z_*G>0P%rp8m-ge23#Z~N7keWF?CKoUUxdV2;^jU(GNyIc#gBh~7(^V_^2_?G>A(NK z&3Str|CxIb9C8M?-!Z@YhR=WW%lFUTetBLjDrFZ^?*7F$hSWQ+-@Kc*ekItMKPdbH z;OKp=Hvks@TA?(=o)a8c*lPZC68UdD^ZS=ybNyo!dBNaFMLrFb{SV^fadj-`2cY)X zyVxH==(3MS!NC|H9KW?{&k*k4|M16_r+vVgc)b0|lh>RZHhP9xeO~;$R%+gkD(c&- z_Crwb{-EIofpQo5bdb`zSrXW=&fH%8LSx2lX7fDhcRfWxYAp{+FN79CdXBD0r;!@7gL9QKVgO3N3n~d~9-EDWSxig$f z0g=PdZXyH>b~)O?6pV{i-F*_NS(|FlPHgC6e&EMK)QDv5c~PYLo(`)9Eu$0C>Dr=Q zAXqmsD+wA^(e43wT|o(l-6IT3^8`L%e3QY1Lz-;d5-CdUfyYV0sxs;=VB~p*j}2>j z-5Z?kbb6EoDT-7c=x`d6L}6ABH|C>}=&TR?7}w$~+!yT9xI3t3F4oa-M=^n49B8yNkZ)_GPoM3R_E)dBx0e z(Ocp21JMd_9Nk~@o5#S6QmC&x!#kw?67B11E=V1L8Zht#NF^ z@H}AmMP`Wkk>z2&t(Tc2U0wAt`=io4pbXt9#T!p4raPbJH1DCFW_IY5I2^s&r<5M z7Ysez({(v1xx0?G&pu_&M>Z#W0?>f(GfM?akEDuMMO)TBJ9n>Nn4(in@QUkI*LfQ! z7D~0FOP9mUvtGOIGK_|blsGdLY8_@VCjfd}L<5>D zI&=1(zZkKPlt($|lG!uV5$fvfsB^2nq8);j+3C0em$+EUdtk*cYa2~`<>m5DJ^T=s+7svw`Iiz01^X%vc=BBkX&a0q#g1DwN*v= zT%zOvgX+Bi0x6$dT1*X(Z94Ht!lO!Qur%W#c&iEK&JCt%PAk^%vUxZdnJO1I`-ZiT zoOghdi?I}6VErU<>=hTpfe6i^q&E_99u6l?Gg@z#5}=!@#O?8^I_WpM^F6&6#x^9r zKKTj~ExtIKgFLJweKZN+S1O+k7=Je=W$OOkYc0!h9SGrZ-KxKjMAWbrg1Utb&+bEfAQMtV zy=T>P5Tg?wT*h^DILY}Q^{31V%X%q1)^IvfPXH^FI$o>yI(;S zP#j*U{HpF$9VU;}dWgx01&`sZ53nwxd#Vl&vY;ne>&@jZ&|W+poQkP86rsPL&uU7t z%(g~7-{tj3t-bKrJiTdiaZ<8b!>avB9PS7-@zBybvRqP^to*ECQdgWLPw3p@pRGi-JhN2-GpWaPA2CzA1 z7@`mpu5ME{H#byZEgO0Fm%NA z^r>tLt$NT$Tk&yKD=CGf(46F~LEChjQ8u7S)yRZ14Fy4eto!_5ix3Qku& z;^L?ofM!7sJ@F8Wfkh>8~Xe{Wn?SOAq*V;>w^RSKx3H+_7bEcD!y!gG))+rMk`G$S@d60Xy z^Ux?CJJ`eWc?wv${ja{VcspT}$r^q+1MmP~InGQYYYYQivtPHl;v0dZ#`ovj$@}kO z3HQA7(lP|(pS6$dfq!Q6ezKH5I=zoL7`m)swXK~FI}CPj!f*ebfB!p1v;2#p?d^f@ z^LqHhivplkDkVIWmSN&W<^PeLm&sqY`p$ATm4bvm`P8Y3e`Mto1P!;?*HxC zZ0@M}$^F~*#;!vj1z@!r@#3C|aF9>*uBCHw2q7v_Jzxc9vDD z9`atpATTs2DPO&j`Y}*mnd=={R~U4^d*Cp?T&EtdLa~Dgm)U0W|B z{zq_enRJ|ueZ8iPH$79Z#F-M0iIn7)O$-hXoOz(}S|Y+tBw1cEp-b4+!sTBz)!u3^ zRJ>9b^Cnk54vCH_(3v_Zi&c-?0deRuj+&NmayUk%sNgfN^w_5p1JH5~0D(?O1)sOy zz8l}2?JB^fxMf?~3pW8x%ixkxeqv7oO+Cd;wnKW@hgG}&Ftww1oc5KT^AzuU&c0+t z&ui%sS)Zo~NprYM4#ajSTw$B)( zu-K*X#U_J4j}U+v$>2{gu~p=;?xqc>$fIs$&0{ZW0FNGISZ2R7VFrUZ9<7zFN68DM zAozzmKryza0~8(a0?YMl!+Y(sR=tHf>|-Xjy4~Lfyrxofb(_|uSwJ51=Gb3c@>_BW zL>-Z>sBKlE;Hhaps^Sb}nNnR6%Y-_FSqN{G$X zI%jT^aJyBhYB8Naf51n85FiVm^V5{B6M#=LhlwU!AS}+4fw>MBiy&Z}3K?N`8>YQ@ z4I@TT!w^l}zB1+Z8HbC{3vCA@YQwv3lsHEA;CW1Hy6lEiPYdAC5w^B=&!fvW zf6eoB8+qyWX2W_&^R6DsqB%DJ7W%`U%@2ax6Cn*n=~2Dm#a$+SgdIo23FQ8Oq}sM% z+zD|b3{S1)%AG4`=v?racjH1O_Q1vx<)eXBjbr;Tkq%5Dvzt5(Nb8;cxwgaFs7%i~ zW;9{oYNMCq7o*`?d)eA`_jUJk^xQ@{P3=^RIJ6sWuPVz;4Hl=)0CG}+ ztSv)uM94{ZY-RkqG4;XX!3BK7{dgey2PA^x+Z_iy&%SbIwl8yUoykwk8Nx_FSC67x zs2}T&r_^WRXKSO#r-*Hi7Pxjj7O=Ln7c^AI41*Ci#+DqY*f2vy*HQG`k^^Ny#@BH( zhjT^utbM>UXMMx!Hukv=uddURyBL}R(J~8++udGc?uu9L7F7S*O!)cTTp-zUVW4V~ z$Qvzl{lVoO>eS_&wbYK}zM`^`jAOEM2AMv^$X=pc6zci$R>^O7=vKz+CUv=!vnqkX za9oxW%?YBV4{7%`X;2M?9+M~bX2^+Hp7S92N>9!E3>$iD>X4Bn{HU(vjQ7HploYo*bn+`WHpkom%dpIX7T$@q0as#m<*W>xr!_ zJsgB^B1e?2+2ri-N)H-v<5>?Cj$YYIbz67PMET@rb@6f}a^syFD*w830JOKJgEexW zZ-(_mv)XL`?0ux2E9gvk>YNifO=@_iyU4Kf zMAZroGeBQEbrGS}ne(TT{F~9&PRiz|ABv&9pWcPb*CX?>;Fkv&AdMd7SorcP@{=di zZ~yIo__YNoi!9xI|6juy7hf9nirgXGcK(Y{;v4@@@^<2t)hlQ9V!1OBt;A_{_d;qC zxEi9<$xbI_`!?xA&7MbwOOtCWYBud|XN7NE$?z{KjVwQpa5bmr>T>bC(3y5gOmkRi zwDG{hq>4TM*7|GfH!j%%EpAx-`3Du|@%KmF^)GyRl0Sx?KUw^}_l}$X9B^|vLwJ?p zurEx83AI)6znwh%@Zc@nRTZ2t-gm$B1bH0D8W5#TW46uM?VzwG_r_<%x z3rW_J4foWyPTT?F6hE>OjDs+XKz*j99(SdpVMZljD+c-4Q3o0sgC-Jm^EChv{Qy>l z1GBv@B2Z4~lQu3j3k;w%y1$oxude40vzI;&_w;SZ=z0Ob>;@bFk9hk6Yq56*!j%bj zH?5UvHA$|a2rYGs94BAyH;x(ou&?{(;Z|Aw(6vLJ0kP%&sRba4gT%1^yz8f(4oi!{ z6@{P0ZO9?fP6IZfItJ3BzWt~;zd*+K#G^p!Z>Qm>HQl_G=<1%jw>~YP_(w!M;yY#p zKsIfrV`pxWu;)3jPtWx}+}VqXUF7R;P}%KWxYqg5Lrep@r#y(bBBP2^H#T2O`KZ7h zkh}QgXzyMDxfBZ8)5yx${*zM*a7^HFk7|q?V57u_A0 zzO>5`D{bf1fGs_Oc~H*SN?;#5%t+CCkK;p@gsu+0R2kj3cR)7~8FTk~g>Ikz5~Y$J zAkUp{S1nH5jgEVapkevZbItTx{7@;sRRSD<7q=l!PI&^sfPH^;NM?8Xkz~*@5{wiC z9HD3al%$NQo5}y2@24iu#?a7h0v*XgjB7Qk%i^&pnxzx*kOS5BSo21p@*&aFMb<0f zOo5iW>U6(*uB4qw&01SUm=QMz?hukO3fQ0XhYmWyUnn9L!)OpR$*8;~*fE zI_u&j2S8CoVYa4b%}?erIS&~<6<@%HAZjph>Xo4gonwNpGti)-Q%+_TslvcD?wEfz zcrkr!PFr>~+(ebJxoExYJUtyj)GV~>C#H~3auT&&-q{w1QK%fs95+_IXv6`X@_zcI zPZ$&qio3fxl2DC9Y-oAz_s$AbbJY8JH@;twS6YWvC|J|lWPetL%avsX?zjK)Neyxm zpT|Y(ful%7p|SHkkOc(VJ)TBRVeA|g+lFe|s+N}OO7=94Z`hCJE`xDbn+0;YT?#Jp z`$fomJ$DJT)Gv!?S?pX^Ts)bv97C!Lu*;Gj_lTH-&y;D)Qoa*qxtS=V)6;;(YB!KH z#Eu;2R-T#?0gxf#5m3|rpUch21~>$n&tC)FQ4 z=XyNY%AJl6Ef`WQKmH_^(wvd|e8| zmmvr{D7I%Er4_u~8Pz`v4ZDg5YUaQ&>KU(1rxDSK4FY{>+>j6NN_TvA#)>YLV=gB2 zG2B(hFOkyd86ZYpYg~YAkRN*chT{VEfV?aNsB|68P@8Hb)&`{DMov`-L!xasg z*~Lz-ay`nFy0OoUp>j46TwcVz0Y<(Xa64m=I6U4J5??1FQ;r4}of?2dgA+Y1>>kU;v(hwIb36}oGy#r~! zijPJkH+M|8epJgd(k>PTD^}(VqFeL3rAeX`Gw8EZV^G=BY-TUf+yB|IGA!6$hy2tskRPP`Q7Y#ro_vb=1X*Cdqt%$`YpoCM7KOGnC@Pi`kLGA z@C_v9R^v_|_Lp*3@atcm`{wdn&QnwCdjYYZZw!!0w}#k{y{D%K{A9PfRlU&A-D;q` zymQSddVxRs^!dO1pa1m!r@I}GRf-5VA$oBbSU3D^4ui^M9L*5Stua`6R4#pxJ^lTU z{?UW27t7_^3zpglLF=x8W!Nn5*FoxJs!Qo!|{v0w(AlnrP&9L)uDD-mCvur8&6;C=Zsr>tZ4iziPLV=lOSDLwz{^|TStE=EE6Jb*C3*RId}jb%9?TmNo%$?>E56@jLo?}v@b z+R@(pyYxFt1`At5Ro}t;RK>r9UW>&d0IPyhzlNo-nZ<=ygP6H#7=!;mLGS@!X@U?0 z`p@D0>E)c?s~o$kyQ}j|PmY^+S>m#wqMUdLDtG81sOO1_0s;~Qb`dtPX?ABP&-Bdn zbneQz>eb6Rr$5L0KIJNp0|2aNibS%Hw^n9aMPBG%eV4q^HLtn)KZC@V9TB`14Xgq! zOkqh7pqp3C6w`)lqmIg|bG3a41{r45CquO4>j@7=gFR3Vxl<#Gx6xU&k!-noG)e9o zjsz`|oKrUfeyTM*#zGO7zi}E%|qXV`$#`k)*7^ocY@b$XSb3}ZkgFuvrfcV_!$gUm&ip_<&k%?%O9}b6v=gOVJp-=glSAP9x?4iLizhK`B{jC3bm^~jQe3Nw6 z+&@IlGT>0RWk^@o)9SqliUEr$BVR`1!8$biGo#xWKow%I#_Ivm+(9r#O z�LrKdxV$?PF5~f)h1mXkTY3$pzSicF+QBC&m1@1iVI~As+_VW}?7CRRlWs;Vz$QIYK+L8z20oHLz*Ts$UIiCA=kf z@j(=0pi<~a7ed}xr{_`!p>YtoXosDB&E$g_PiE9F5G*z~4<6WV9?0q7gzv;NSLlU_ z^J8113({#v^x`>vnCDL8rSsfXI^-EX7BDho9dyS;f6dlfkX*}b&(tPP9YZSZEL@Gy zC1+TwE9aG!3@$p8mEGR8cn`}v)P_lv2hKz$?^VI1@KFTFsc3WTb_x3mQh%y_W-HoM z&bO^m_q0|Wnka{@R@!wJ(zIcuA4Ud7y#ph_CZq^MJL?uY69arWUMbaua0qP%`=h?! zZ5h;sS)Zc2Guz49BXBzWvz4Mha2bGG?zR;+IzCr7@0oQZD*;&x_TmwV7xglcaE7M} z=sp}2ap#&R#A9}eCMJYir-zK33fThSbBI+4RLE<1DiK{~$pt*_M0jSn?C76kSDjUB zPAe1POXqy>6>l5oCMq42JC%x)g*tw+q-(|RvOX#olPA{I5MocJ`xvb0EiD&trsen< zk8&+Xyjsf)kLL=`%7ILXk{uiMaJ<3Bc)TH2NI2r2xO`TV)IvCcM5lbS>C*N9SZ4rdj&)ZXnGfLyp9G@dPOkX>BHl+LDa?jRAO{4gQ0e%Ou7n9+G;y#m` zZ9~9*4d9!0d%@$0uYMdEMDBq@nF?uB^WSVGAkX^?nDRRK)IL|sAG2q^xuQBMW-YtZ z@{6|NP#;M1?hA18+LT|FB1KHc2PM0A(o>c~d+c==P;J}p=0|~>_IX9)v1&z(MY?!E z=~5z=eK)|OH!Hj*OQ_i6-8G+Yo8K$tcWno| z+qRApmbY-mTD^MH0FwYO#m0LUx`Qlz<`>p}>zCM7mvsP)UHOPIvItZtX5?ZcCx8#x z-2O=;6ivbCra#8e&+fevda}s&_uksot2uOWwXGCB`K$MCnU&t!Czj=0?qRiG;>ttz`~&cqs>`26;nr1 z1NB!sN1&9f^wrKlUi@h*`IJgLEm+gY1$FEvV!nE_EVfiP=6?P&J4zIcei8Nh%#|<; zFQu1Teb`o@t%!{ghvluQ8*#U*l=LLu(>?880dHizB69w6PLYOE(`vbl1wg=xYy}zE zMXS!9$9;pIXRNs09wB0!kX^S9s1@bDPl@|ctP@NXi?EJRi4PoZ4VxH5DQ7@z6dpQOpa<&t!8!np8*V$ApLPiY7vLj-y0@L2 z^{d?j)F8H&1v|+CNw!}#pk4#xLUHo!$-|OJG=z> z;aC-8L!sOg0>+zQi18~d;k%JMzxXR_zxL^0b>Pl-j8)XbC7dE*Bzut>A_#C_#rluO zO2z6x(*pIzc=)xVw40X?1ULH9$&Q>g(h9q$QOXF?vS33U0mb3*ZtuD z5GnvS+QFOgsi`MYzcTeXV@cR7p=Z&70uI8>GdDO^eV#;Ea_*>Ic5;?ml33bmmfzp2 zWC#VYLmjm~^^@!6mI(!|S1o5Fckmcx^KQrWb zhwk(T@jMG&%AVm5!y&NQ06Lxo5k^%SE+tNb(zD^Z3&xZ$16k8-Ee#FW&O!rWl$45H zx27|Qe7u=>#Q;YTRjhY~&?wLCyQhZ^6`qI>1?j}?Tr((P?{L)C3CLTe9#IjXUQ)*$ zXcywuUa!^Zmc~w5^>58Y6z9ZQ*|AS}fpppJ1VLcf2n|T8PLI<}2sd8BAm()614)6l zWCp_1sc)))H(y_4qxQ-$sE1Uq-MgQ*Mx+DU-6WzKIRFJ%v*|066t3rM2fFtm#keJh z2%1_Gz}v^s@xo6I{`i;vP3}w)rd#Kp|2GrtbFm@SdLwOSkE}hb#sCkyM#R1qN4!1B zJQ44;3U|CE3rtvdJFu~^v%G3eOkt5mcYaBJAM5*MVoGPC1)xal@Zw75}Kbq9OFi^oj_51!EmFu^r( z0FVHPc6!B}P~CKN9LAjMdE2Iw5H^`NFxu=>?YQA?+ax9=BBx!=d7pet737K39?IK{V`~L24=(*;P+sg3~{2 zq9fm4EuaIi=q}I4(HXQXzFfdo0Z1F{jGbI3bohPPQG_Q8C(MD9U~_G?>0W zT*a)<&xo=%H|qJE?cpjOH~ZZqxz*Ge?z8$WQv{ICsSpa;B_tnue`=;X8)?8@m#8&> zbyZL4<5)y)DsFm3vWv`U;o-R);d3}1w1TXdeu2aI^yqT9#l^v9O z`U8CIon0Mp5*islbqV{Zrtfl7j!cc*G?+q7m`{bF@q3;SuA2Ig(`=N~fx87OpUdF1 z=@^<^7jasa&G+PjtzNP}zCXGl_^sVJMj-3EJ=Mr>mK)VC2Rt@rbnB?6T5y&=K{D>~m1R33@5e0jtC1dLrL3BU(kJ zHUpNSKRfaOZJ~xOoT=!h_f_&LO?>iDPmc~lsbyCkM5X9L>G;tnfBP5xQ}F2}c^~U- z#hoGuI3q9o>)22J+^;VB$AD9uu88jS9Jy1AP{qHYcnI9li4pNbyn@PaH zRFiNNJgbGoSn#PoH7I;(_S`?c`urDH3{L+%JOQX#C1RsSp^KBD zByi`Qj&os_t?O}D&F$-^4F7VtUpy7M#>Bwu1xyjkV@O}0>p1cNuNb&N_2=`Bgvh3Y z@h3l5P1p(dBDplMnXQbct9Hk(fE5(|HfoFwyrbCB@Dwv#?M^2K|I}aK{FndsH-3Bd z=U!Z0Glj*ufBM4t1ONpET}7G+@ULxk<_h1Ci@y|8(@1r32z#-cDHt+Dtxs zA$1cl+0`?X{VVYx#CBJle<+&CG`9$X>gVmg@mD!O>sEz_~rI|bN zSWxhTFrdcFwt@G3HiomD=NxW!p&Mq@dwr;bbcyc*QL*W1*{3Xa81pH>F>0Z7D3T$< z%i+PBrj0Y9kbPp3*Kp@_3y>%Q-+q)jA4+T8V+btCLKq|%P(A4lzg_&=K^Kww`ML!I zflGj_k4;F!tC1?7-;dDffNC}!H6VUGbku+_zp!X$`M|)mxw~gxHx;J!Vs@0d%L?6x zeXNA=KtdN$slSKM_hRJf9{D8xDW7g4`&-g|m6-9M`QxbU-X@Q^~t4q;Cr*T?5- zp`{^$U4R>D7lex8yc-JwfG2@xn{DS1=+Sz;UVF<%E_a&4br0?sDN;t)+ByJmK*~R| z(KrvHg^3frJC8bzx(H+8A>c$QFN?R^0N%49Vh?S!3A&Mw&~4ZX`#D!;C>yaD5P+a+ z7v^#JsXfk8OS!fLKwA*rSORt;j^-`Q+vDV%vUQcV)SgUVCCL@Mi=!d+m2*?)$B$x| zzr)tlycI4|)+p77-K{q7$uPs3F;czQI^#I`AgDTkeSlpxQ0~#}i1+Byc-z5%AbdN9 zx9+ev{{%k@}ZHz3hqm0AL1e80ZaC>lE@{En?bASaw+3pE85!%IF4AI zxHHt^Q;Ke1w|dg_D}#JOsk=S>X}iKq+uR*oJdg4MIoXF0Ch%x#hx1tu``F5fU^>Pw zuHhV2_O7hYKeg-(`hwJh)qP_y=XRLwER=sgs7IM``y+9@I zGn$z}Vz@Wl^%ShuhRZk@uLN(8Lt|y&xrvPV8MLbRPu;4}KW}6mV0(44XEh#=s&V;* z_cdFNlUzWUnocu{?jdIB`+G#kTkm)X)D5XU?~X0E@V4P@l(3J&Rmr+(ox3H$O|#7?QuRksqzS^gISpnYdBVLSi%8?T=K z$^>Gaq>k0HaIxYAGRQFYVUf-8wly?y6a1x@iA}FJC{n$lpFHZa?_u!~b5CMkI0+JG-3^@u%Bo z0%W@kucgyxe1g!2dfA%BN+6cs!(Grz(`A$cMsFRniDC=zn!hX7AFQ-&w)TK(d^*$k zr%UAE#ut}|lL8a$S?QhVS;qA302X$~b*~b-g?8`dK1K!t!L~yA0$z&;7U)7z8MC6Z z6$3S(y2AW@M)1U#m>Ba{@CkSE)Fs$Jri%;l5d(ln%)Z0$nmjwun(NzVPu7(_my>F7 zA%U8VnewPLLaD?7BM1(qocAsO*vAomw(m9BubJgWWVy0 z%VESN105p_n-5MXbuA}Ej}0y?GLWvy+b*1>{N?2^C|BkQj;#k7s^F^i$)*E zbu*H;U5wT^$b{^P&d0;Wmih{&Sgh)1nO0F*5_J{mdBR=XZfUz@8YuBgZRnQXb` z5jw7pN%(9IM-aT->Ti6vp!|5uw}kV-3C%6rOB)S<{OE(`uYB~alYSS_s^*kT88%tp zCB)s%S!!WeuwJ((X;S{X_n>B63_zH=fHv)F&!00m4f2r@aIa!cd~W=`?!JCPN6pIK z&cm(BE+JL>bsaEUc&kMMKE!IlDjuvNWmU53q-yW8TWr_g`$AHEytMvva>JifPiSS#)vt3Bv; z@q2NQ8E+=?M<+?wr28*$QOR_UQl)uox@SVmn9bj7c9np~>C$j~ss?VPWbDWptp%9? zgb8j37rP<1EQ`Z0bWRxiQ8f(s02=^sK(^r(kutyteqlzdj(MKZ0nvS$W=MCS6zWVc`G>E&Tdhu}uEcdu~rtFE}GD{PJV>GZhrr-*pg`l@|+N`nHtr!zkcrM(f3M!-(?^qr?4>;*loD6W73rsj1$1G;OA)6(jq zfu~I$FgBweQzg~W5Mze1q&#e+UJUlaXqmnqqxZ*M)1V8@fV-3N;fURCLr-9MWn_+M zU0UxaL~v-NUHiU;M$){N%_>gbFK*tfkpvnkzqUoC&qMjdTmk?b3ig?RJHy=$osm z2=*+$=%>DoIzQN9-xsX;)sRl9tWWAnPX z5kGfIx|JL|4h+Cr*p4~^bCCjtL($0e$Uv8ghG1)V*1X+KN<)2_NNw_otbdt58tAUA z63}C{$|>4wfnf9r#aTOs29815Y_}QFrrVag6RX7egdimqod}PM5KIN+?bhU!`*Is~ zY@_=|*S2Vu_c;k~b*&Q?jFF^$&Zp_+7Vcg2^s8NM&JviNrhTLJdY9Sn#z4AmfkjOTH(PW}w9Xeg-=UTJ)m{B_BE`fAa?|dDULsf)^ zgtmcx(c8&4(h8{n5a~ZN>s+Mql8J*|wp2Z58!ee|Um(1_Wv4OKen67JI_)_tRn9=# zXUHoPEgjP>QFRmL&SRUS#AqmyDMCPXae=0ne30X)+YWSR#!rvpV`Vdmg!M%}?CVNh z^CCMw?8L}(&!f&i%JJ-4f8y~%2Sic-#MN_*Eo7#E73X|`9P zpS(CK#d|i(#xBiVaV5$3$P>+$YSx607OhyqiYHR*ARctzOJ(*?8^G8(CEAsFBSiMP zQbh@1RncW`{qX&_{=&6}eR1XIoe{AGuJ;aer&@6#LDs5#P)^0O|L(Ovd*)Lw3V`jbTR7*KtL!!=pE&pBul~*FU;WI}p@aYa zz3=?pH~%VDt{`Z{JxU;d`vcpH&?{e^y0QA~{&3iBk2Jj+$td#T9maqI_Ui$2W^r|5 z<(a6!j+_z8?bzYPNEshK)Dd#HDCO3z>ARw)cM|WR2!2<2Y)qv4&HHZ&WYcoAq(LQ6 ztfUyc%HI9U<`4h&FK+#y7^Q&45fOjRb>*COhgV2BkUZxc!Ef~@@l4l_O`~eA+#ZI) zmRnW-Iyc{Mc!{9gs!AlVAD%urHctdBfISzP_?dtFLhR;+XKuRrG?X{q#e_;vKFjar zPut*+THid%3AN&w>MtD&&co639B^(l6A&zhugk13bbupG0zS1OOk^jVKDIMNM1?-d+udE4>1Rd>ziGD} zaS(ki z8)4e)?t;s`b5;!E$$gI-;l}XDD7W;+sRp!Yaq3-CD{;U*woer$lgV=TF=44qc+i0y zkU9;Qm9@yh&>>K{RL0v_rUpQBUIYM!L8C2qfdeE8uW3q35hRs9=T%wAF!d2od$`xk z^#G?r2sR%io09}s?PK(qw?Ih(0VpVjA$E>cM}Z>BdxpS%x99AZLsx+;Fi@R7MweEa z=UvV7#`eNvT%lJcM^6PU92f}Hoj1uYECL8}`k;@_CNQ}Y34y~nY_Hz@< z!Y|{Ez5oYUlQV+Yo>X@e@R05W1`05E2wVOeSJWQ%!zRu-QUHvCJI?8m2O_F{w1P@~ z&WgFzPg1HsfSG}Ymw43b3}kiW=Dn*|j2m;CRETds7#CH4v{`1wCOfXNkiwU?W4si0 zR*(}d*nsu=SW>Cp!~o%Is#8ibmHPQ5B_*jjN6@XVoNs78NA@A_%yVRgzS36C-_Y=m z-#h|xetIEG@6mfT*?wE^A08pu`hthNp1~b#p^8dY%=PK|{a!G!C|X=gZ=nNtVvex$ zYKxL(viV#%}*meaBqi0H6uV0sx8k*cF1x1jcc!`6*_?3*HP zxFYeK)OW%o)Evj!9DB}l0=nN1Imvklo#9U|(Q(_;xl?PRQ|UyVB*f?JNDw1U7KU-e z92{verLXJTde+tSodBx8D5MuIfDG^u9jtra#yS7sin-+HfJJ-;^ zO+WLMZqeOOB5a?V!e%(tU!!`7b1^TX{z$rGSgr+$nc9}dxhCvG+dG4jN3ti9jg*$# z&WrL{ZwR{`U5Jaj6ApX6T4Xxov$0LtZ#8Jxp14G9t0#?;Ap74p&Bo)s4}2XCEF9{G zq{l-taHc0UV|nMOY#EYdHQAR8S?)1|#rjRnbzC})9wXd3#t*6OO#1=bSFf;W6s!l& z4z+_g6G|Y}OZIP0_X9@0phagAx#P6$@C+DngVE?#V&8n?WCb23MwgAayF9391}R~U zmLII`YyFikQOT#$dv@UQkN#@wQAXqicTKzL;J&aX!$hAdW=6$&Er0yM$*lry_K%%! zj(efw?|Wy>@E1PCjhla2`x7CbS-?~K?D{V}_pATx|N6}@eojDf;NSelH?x2J^#-Ms zuS^#G<^7MpQG3e$RZwd$od3{swFEujNxpS-??9??hk@RlM!n>WekR~HRv61mQ}dm^ zmsDL$uXPoiT}-b`fSKZQVoyX`gMNAUsS|Kpt(2@=>$ZudPq7m8c-PrBCL!SF`Q$^z z+_tp3@yYyUzJFcv9M*t1=@qqdNZBrvybU;*u$9If(Uw_yd z$1b1TT_xFczzmu~U^-K=PVn{nP6V@Ipm|Zg)lhI zlGTeRk1|72a!%$R(ZK+8DhM3USKKbkwlu1@jcL1{qCD6sQA8gTNeh=84W5QDhY$e!iZs; zA`LMKIH~|p8g`Pn(;aDO;|AGI0!qbAR`b*l0JsK>s{P|GeV!YR!%ooc9{`kr@leNV z2Q+@ypK?xxy73XjUYk23I`<1%U!B#3xrjnBbC)xBmPViY;a={rd(WnA=Lg{h8{Vgy z5D~2T9{^ep%vd2u=!;XfP1bwb0tUKu@{AQSLfnfcMCpLot+-z^ z<)gXqal-j`TfsXohBDg4#jfaCDoiFa7=9yRck*T(BN*f%kTD`?hXTbvd)` zNZvW$WcTG}UWk9xY*Nsyj4WXk*ECvVpBsl*_9Leaph!-R1BbO^jg8mP zGE{QZ5Qn|59cTP7TJ%A^p7We1?b{!Q_9V|pnS{r2(hl-=-z?gv?Z8LVUbukuytE7}X0Ses$MroyX*!lX?het1?OMwc=cd_3$ZqeB zx}Leb?mE?_PLy5pIH25^>+h0$tO7Y>E*DLirJ(;r^lZk*nB8YYs++AG z>khUR_P_)Ihzv=N7}DrRDaSHBr+#w#!5@AbvGzPS6PhHgW@^SHKr3f;qS%-AxFSR} z8hXF}3;*WE&;8uLRDfkBnD2Q3m>^qh(c5C2YV4DdMLD5nkv?(dIG}5vWY^t9Ta#GL z1tYyLZ-b370g(H!n>&Ct(C76IfhID1vgsR;s5}_8ny?BGdd+L=a~MR!bdO`_)UIrA zIob&{DVZi_C_Z4qaTzfiP&6+x;GSQxvOphqPc0^7jxH4v$gULgvJ)&2%!NsOR<(`WZ7drz> zt*q8%=hCgfli=>a?HOXZEb}no${g>XVkb@gt;$Ezc{|8vD{wVG4Ok!$d6cBq&G9qXEVZaDdOyx8~_Pxo*F9d4N4%>AyQYS znmlTCOv~CJOa>iy(-?&HhRUW^u}9tE9Qha_Z8J{SNwS0Zr;&3b%eiLqAv#xcpk`%J+nr;Tj^GL z*c<-gl}})Wn}UcMp5j$*=^YSbcrsw>gE2NDaCf)a+Jcmd&h&~1-c-qR$Y}8scL=Ej zqgRI^XU&EC3^lgTc>6LQwDkv53^7W`aiio}8jf9FPsnsbW!kakhQQ?<+sk}ynhaJB z^lB2u+6v{i4M8v0xAN)7SJmebAXuPx(L~Bd`DpS`pO#5>U^b1TcaLoY?5GV>pY1jf zSIAYGI7(Eq=l$TRzBF0eo~LEtjBUwhkSp$`bX+6@QYbw?*T~nz@N7J~zcDnnRUk0( zwiJK{2q85YUml&#`fpF^onpM(L9U}y`E%WO+#BF?vM28e+~FecZepj~_M>`deSFgO z@XmG#uy3{uI376b-=jygz%Fp_Z%Y)ApF&A%>?uk5kfh0K#Wt>30ZLna%8h$1x(+`o z$?tA|^sRsQ-9P`%_x0^=`oc@oPdYA5dY*YaP(0bxeTF-hWKjk zzzeR1LlgOH2F@A}A8ngmyN<48-umFZPPwic4+ZK^j(0X5iu5^+OAuZGuTMSE7E!lW zkBEw28|yeZ3?_yrMA)WJpOTkN?{XW6<;P812~Qi;m?&?P@KDjqq7-BzyKBIlQ4BI? z6k2Ll`!8Y&4*-Az2letO;^l{ZwhYv9T`YCG=G4@MMM>=U!CuXMXger1a9k~dqhR3_ z>4d^z>_CfF65Fk)R9o87d0PQchez08DDu0>COmJMm&V2Jf9R0hAuwCI-#l!1^Zod<$<7A?^wjL_EBei8XOuF=A>2NYO zyt|lpDpo(CEO6v6`sWU1(LDq?tCYXSLmRi?BJp!ye%!JofIOnMsR|CPYZ4&s~z}|oUDD%dL-~3T_XXpV$J`k-% zaZHZYj!~H&P>IpZU;ID6Wr*^d<&Tg0M~BT=UaU>R6JvV6gAc|SygRB~fX702@+b`s zQodm@*0-a5DIwcTx$0`rDn<_sn~CO}H3Xo2@wxg!(y`y&ZtxErUck?^r%3l-Q@r;$a;9{PoWuN)O z(!_YN?wG(BSW7h&lmzHas$Chv1~LLvZ#Ybi)lowWsZ!ht9H3)UQX^MJ{jArixt!yH z3zq7nTz;Sq6hugz;>uVO_|}xW1M+Uo0d%9;m|+Vv!HXQ(7dV0>?@CHaLF9~$5H8xK znT6EjxTLUc_KPR@GavuLsxoou$@xysDShZ3UbFj_3leb8&0qWVmwxV_|ICw-sW|}N zfv2WHz%tGzh!%0sE^?*$vAvl$BjlLr1v_1#x8H~yCXCTsxv7(uv*A!WlTKQvoRFIw zgo->R+igJG{%G9B6ha3KnQqGcUMqYoy2qe}*->DmLy7^Ywqsy0)4|8;v`G?^CVgL= z)R8*sz=bf}g4^DqmYp@N!ko`g7MjLUGvFb*0Ss1RgGxOzu%WYcAB|=J1g$%b;Iz72 z0r##ihth}op+u|F+uqDYV`)g3XRm(RG0uw8z)LAe6^yPh1DP9V0MzO9o9eVAZu0xK z;gz)x)1JPcD#z-9O|Cps)(_^MxH78M=G{iSi=$LKE;yNv4sl2e=>_x3f`9TgtLt`U zPtKglzR&Mr#7jeEn^$#GlgG;VAW&JbhjMKkV{OI#+C?(kJi8`LNWQHA4VRu!dit_z z@6Be30Wd$n?Nc5Bk(9Y7tQ`>S0k-(`0EXC#`;L2OX6T-sg&JTf?89ohqi@%I0jhcA zVP2uggT8B#)oS(rliof6+{556#2OM1$sz+$852Yp8+XChji}f82K{E*CP27nfXEb^#)phnt;OT*-aE5WBlcuO-BF zv3IG$S9A=Fmk^pV_CRFK-h1+NYHv}(&SKhZy*qSX_MB9WndZ9MjaHZJRJ}UIj*p!v z(ZVID?d&#+=S<~*ar^ur(-2z@t@_C6^DH>M6-RI00d_{49w^0KkaM@(5$C~!=ulnv z+md_8Jiqqgm4V~_aSXS)>e9xT^*eKGV3M9;r9@@pb!}nxq?YoysiK5U+JlJ!=3lta z<(4}w(Sm^1q)G}lAi1-W^W1kP=M^^u+0>=~Jb0+|gn8#|Kh>u7@ z*V|!#09kFmAn zsPNOO=XQ1J38HXJ%g&BWPD{PM8!JqufD?nbT=S|(88~scqpJG-lVYq?zWZx;y-Pn% zB8h9C|MZ20g*SfOzItu}80G*QOwrt9zdrH^cwLcXb)E`7{Y$_5`~Us_xpdw$|11B* z*!u15|F+9|CjY}f{?hZm@h>K6pk@NU{s;eOv92Uk`^(l$xpAksvxi3(E}e@)Gt|b; zNA@W4u>LmF1j1ndUUtnf*SHycVq#9NDVuEJ0(l%d6pLZ3O(}t>ysb&GN1fSzBKH7W;U?!^A7SE`@x>= zMCg6i5hDW;ZjzZ2Zf#G0o`0}~IV7U1ISp}M4M}Gs;M{Vleu`uBHl_H|Qgq$Osu9|8 zCwXEQELPe+{^TVnk;kx6_1wr&Zg*cmuo+BZ0Z1pf_XDM#)lCojCy#`H_F}) zMBPD9dd%dXK$)^&))|KY%3^=Bn?0`?t<|8>90m-F&p1=)UZ7z-R+-s3msNJt4?4rF z)A5SdJ}&588_(LVBM#kLmHE&S?HyzuMtVz&ZlSHoY&7Keo^?OAjpm!va4p+sd+n$V zA2@@jeZhA;kyM5m8tAKbBxN?U0p>h^cZ6C;G-}BTQ#^4ku@OPF)7Rb^ij(TrV9dpR zsFQSOj*vkjdVTz{EoUz(4(_2M#1UyS3gugPFqz0~WgSOelA~2!YZ>j(LRfs?_ik}bAyHTHb^ zdGnRwFvCPwf2Mh|Lr6M>w{0nJ5Y7KY-MSS(Ql+WELgeaXKMUD^k`n8QXvUW4?iJ*0 zr{j>lCo$(7vV6&2&TfHe7v}Y^o-5|lj`QE`Hha9M70}v;We89QAs7?_fbSCD@Ayl^ zcaVAer%pFE=MfqU(0`o!@I?<$f2VRKxps&544|!PY<|?=rUIbvI0(|_b+WtT^5TwT zZy6m(7778mW6tY$xrk$XFE*?{tOr7ovtllsa_3miNr)4b>Onjb5zFE6MH}=;qn;$e z4QLLOc)*>(9W$<43+Bj4IQq!kmtW^{>Nh&JYmdH>_0QSkRCCnqR8OZ9Eff zQ&nFg-Ww0*85`NC6}W}=u(!xrSv(zLj~hCQsi7DW*|ALu8n2wcz3EcoffuJdv8Vd;fI z+zr}nurrKhi09oE-RDg?ON!I|=(yge_pKbj8EM~_bJZocI^nm8cF~p_0}~wAX+*p? zv}U1_aL;hnp4lKE%Z)f(d(#cuPJisT5ei5L0aZ?N;{oqRASnw0QCupI1QeJDt&F1A z6f2;R18@F?AL^V0!mQ@7ZbcX)A7H%xA$F$}g03xM>17wNRTbJNvUvum23 zf)}1w2S)(i$)i`uK!((kIjIGe2^+%^mT&~=s6GS?JODGyrf+lgOao0x%NMDj$K>3} z7tZ!iL2MAdH=?~p$>X(_2hlcs_fco=`N*TXgBh!NGf+Q4eg|lp!=C@+!H5NN+c&4WGALzmC9rwc*uvAs8dZnmHP(eL%?0Wkr+7}GeG`<>b)>P z3IrUD*}A-|ak+-n@M>@`bmExv+ZKPs=D z73(e88D=+fJvJS*x`+~vj_Z7-gUp)ksDHXLQ@~q&IZP-O1R0byBdmx`hePN?jp9?@ zSC6C0W2=SL`xa%h#~HOFmK?*L=0NKAZFHg&bVa#lb_a^neC-?xxS&8sj$2_Bsb{P_ zx4Lp?z@>d9X?^P4tS0nt+J(DjG6oH4aU6FQv@G?`^y18|i}!UB?j`IBVt+X%eyc$?bV9g%<{;R2*JcM{+|3 zD0OyrX@4YHZI>lfN}%82n5*?i^<9{ST8eSrT^VjP2VHl0mi63rKL#Am=lWpd*c7Im zM1r?%(Xh9^femJ!OSWV`9#}`S9i6h}%RK3R`nXxc!n|6|y1gSa6Ef}6$zT!<0<*GW z^5sa+YlUE;-msePW;}wzj$3M<;%!lSv<1MJ15$(7&^nVcX>bvzswSZg`=o|??a;dL z*h9u5f_PS0PA%zdmX_HD(}3%aV}NCh9o8`sJO&-1eY?Fn%=rkyp$-Mt2yahz?0swg zGH^Pkj+(>ifDf3zcKMy1TOo!^6YY(oGs5xwgzpq-Py3mYXLfK5rb{RHAt4+sb7rNV znKEZpy((`4J}|{sTI#jO`~&;+opt5P1V6A<`gGGz2na?jsjY6p*=1;^vb9ojfK$$W zx$C?6a-h2xnH;xA{@U%j={g}iKktqUC76z3W}uU(JL+d(={&Exm*GdnZe=iz{-rtf zdHLvNDwv0?A=c{WetPbg`R@H<<~uLw>l>{}0x5m=mCuDf_c;~zro;HflPhaWC?Epw zeCs3LnufC7?~e0W4FC+_h2Q+W&;2a*wTJ2H>yGk4PTCw?N(O%Q$)A}2*Z(*Q0PE$! zPyE+Eyz#+F{6b)~;QH?NN2>*EarT<^;>E+r=E*(VxM^&U+tz-OmQo-awLf241g@k$ z!<2R|&#>F~+Z9%PJVC_AJ)xCab7EeuCXr5q>$HloEW5r;7r2|g58sJu(X%-W8Xg_b z&c^VTGcbU_u`dsQLKJ%2zcxv~@EgDQ^~WD(f9FHlzu0(O`p%27bh3>9eW&c7S`yzl zEcZO2P)S22>GW_}?bQYNe(vf#eLLQ}^g8_--*mW`gl^kh%Y9z39kuJ&ZTpWC+3{e% z#Dti(1vC%FB5TKm>9hF_S7juOlXcyZ3IP`6!ZpS{+bm>>vr=!Sq)d@v?VM2^4W{gA zwO4>}C&38=x__vQC;GZ~(o>}QpdT0*?s%))i&~czVbAnFoonZdOHOsC%E{j8eWB3~ z%yX@6NDuq6Udui}8Nmc>D5y2>1whBy3&jg%y7m>wl^fR?9aJV}CXca>xpkm$H8Y&* zi&y}g+V8*PNhJ{2LyXMfiD)-fw}1b*8J*c@Ar4Q68r_;>I@7shzdS=!dP3dJ4IuyQ z^kA65%YA-^CQkZFl_DT#{H6l$<3>Mhy7Wd(er&i9>hR$F%fQ9(SJyD{t;Jf%#a?7* zx)SM`w*ADUJG;+B)3MX9fAGfmZTVp}^!~8oF_Yrp@# z={j|?^E=&6EInZL#pZ`nnletN=Jq(p=Wbl!;mf}1@#Kqid*_>(-@XU*W^qi^Lva4e zCC)ru5(bayPt3R1*UP`~=rgW)`K6RMNK${Kfi$vlp47K`h-1lB+cR0@B8{D&Ld5a+ zLHhqvL_iq)#lI(^c70;PrB_FlFNdvM1^l2>9YAxUjxPGnMyVOSIYru?;D;MVN-0ax zqyWQ7nNHR_TO)7CdAe;TocvBuyV$Orh7ECqMyt>jXQ$OwfK%X6XbklHyU`AWNn(zg zz@6F0E(Vj%NKtJL@w)%gOU}*)9!B!(&8C$-f7f@<_JM~0;>cnFfzL%mP6BLB{pi3$ zIW&Z(pyhN-(Y1)mSN$0!d^N>igq#Q{j0`jGqgq9p2lr*p5rSd`sWpK731+tE*l`fi zZV#HkNH{dMeC+1?z{;XFJg}*wuyo6d$5pe){wq@3ElF!HRO-svOJqC*{3lULIn@Zi!9 zl5f9AqTL-b6&Y2L-rO+IXyzmRqv?zDr!~k2n+Zqvpb*4x0jKm+)jcP*X7#Cxy(D*{ zmn^-8+$$uOW&ww6>xHsx&4=5%(gdJyo$1b$n1;$*vJ1IzBf9m7aVS8$tT{Qmtn~MQ zSu7wEqj6vW@HGVqZWnqhEb&p^fsQ~r)z0OfGlXw!PDUO8!b@?y@TM1>aLjOzj1%n` zuR8a+yGheN@EOf^D0rh}He{%F=$>*O6V9hrG9?q$5A2hX>Eq$CpVDuPTJl7;`GBXE zRo+dc7EwGqT(gQ57mu_F z(s^lfL@$@1$|H?;AN*)qr+*~w!C(GY|L^lZ z`&m&3{_C!O`#1mPXIJgjHBVI6OK)smo7U#$*|3FdO_aueQ;gpvYkhS17zc(EA}NQx z{jmGhWH`$6gx%5Xa)z^VCdaOB?)HeM^2kd1H}7v1IQu3HTGibs_HxPC|j_X?i?YRgCG&|4=XAg-#fQrnuTB zg92kwZrfC^EyH5v-h*6jO zohfUlPCfv|P6pPlxdb??qb?TVzAtxX-7U6f=h_pO$pb}-?lI+yrx zXI*@K^3%Rf7I*;0F8@sF?6vn>p*_d<)L>xtV0h@VIVruC@S7*ryL}xV9UgE;M$CXm z-2(Iw{zcC54o4zh^K)t|J4h4`y1(@j^rO>-`w#AKUoSAN@JnuFNw)=SqtPG{iVMr- z)G1xyJY4lX9V*{DtPu~1QuL{Vr-u`(&#dkK<%aU@$*NxQ4RpcAI5wpR%IU7O-eyXI<& zsM#A^Ehl$F6^|8H+uo^`_pw;?g9aDo5_Y|)W$jR`trpw%cJidN?O#p8S~jS{hG84% zT7a`tZY8r(avw|HfMcU6govgYwuz%zs&&r@PPL15-y&jW?6sKWDv&jMgUaK?r9q`I zJ@AsDok1y;+v?h$G~t#6vIrRoN&31@n|m8C;m`lu-~30yPxw4-&wGb=D#g|r;1pV~ z=a-VJhwMlsfG2x3h^~#^K}Wy(g7W?6FE3TSmxoJPtRt1(NCeP$z-;?ccAzAlHEa_u zJNZx+QNa3KkNWn<(uH`jFg5KnY&G9UbGuJ#4x=m$yTCnw7$_EVYcLM(Fagit7=W&e z*w#2X0BW+wh9h8b96VD11Jd@P4xj_6K3X<02kFKrN$%JgzN)d!pnD9ycC?3##$l>^ zt4=;Icttzih#_#d;f4bQEwe-Tv_%A0Ga#o`m!rGL(OOcI}Ec$Ctx}*;1%Y(_V&d zIE#>Jh$S~k=1y>9T4RHV^8-e_cn|7diP#1j+!z2CqDC&CrGX&Y>*Sem1gQ3x8DVRf zrbxv@noex*7APzDjilpn5|AG}T%1qmPeIJ)HuWBBVRhLveI+~WPT4xUO`9WjlmQ}4 z2TH49NAjeYvyZ`+jx3CU9dCyPnAI3_^yp-uhS7i)=`fX`He#D@-#G*jy6+1lsxg?| zu%S6yW1ROtBdQOBAQH6+|48UY~EFxZfb~aUSc=xu){Xen$)aZ-iPC< zkuHpzrJ%p*>y7!r60B1yQbI%+!IY@_%eBE zM}Ekg!2!Qtk=+ry$<-Ll*i@VZPt3EbgC?}$s2TNvN8XrNsf&hVVw`~~D?!8G81)w8 z_U1j3aywzZ=;q7@h^c*8<#N&U5L~^gmBh5Y{r=$m7m0({Nx)M?fyqft9^-7YTbj|m z4^s)I4vEHz=5X1Ju??UUX+uG6ybbb7tZw(v)Q~!nPE|?VWnrNx5Hrb{V=m!2{(|EI8As zj7B}kiPt^02(Axmfl~SYxBG=_9`W95x469RcYgMF-hK2JfAQfP=b~y!naWk0&!>hz z`Ln-%to3A*yqX>I-dPEc3+$WXD0UROp^IzB5r#lap7UeG+|;-C?4?o)3NN{(m{-J{uVw&d&I3USaH; zSMP!O4EKsW+~5D4#~ty$YfPpNS&oxm z{OrGsfC{kYJa0+)d%HI7GVAL-)|+lEC_hlNR9MDJPRcM{F8yJt?`5LQC$jj2hSCO$ zI2;(-A>)D4K}pV0cT$-PV4-=pS9YdQEHM|~LbtWi*7%WU;#Zd!B2&6-Gwa_i6(Ve; z(=P9SaBHtrX}3b`l9AdhsCFW7Ww|LeyTd|^Uy6D%CIDclHh@>y@vz<<*gP^N_GsSI zZbL*IqP;AC3&>s^j>qDR&H2%pGU=P5tD7_zI-@G_fgM8?oyZR=j_3tas1K7f!nIeQ zDb<$##~=OPJ8|G501R^z_CyBcJa;?8mm1iiod*hRQfo8Vdaxs%I5fJ)M;r;soz{d=8M~3`+*f- zeOd6l@$!_%)tbo&t*Jcw=S<$)Xir$r|Dg5pKe%r9KJjyZ{KxO&gY-*3x-hxOrV%O6B$HGH^jVZ#EEqs5Y@Z10S4{Q#gJ}-LTX2leN;nmf`@ z3N!X#+Ltf2R@|{uwBjQt9L%u5*D!{QoLDksvlCOiw>Fsc6gJg>8*y~j>$HOwstx^# zu&sJp11J1)n+*7*(|pzIO%jGEO5n^=sIu2?huRE1f3mOtgzR`z&zM|y?OgAjB4>vp zEGlY_9Pnt$nnLk|8gqqhWvieK^6HcFs?pwXgW{d|;DTckYgdmO3+?~@&wuSAG_{u> zMaQq{#NSLeI&Z%&pz>~t3RK_#t{o#+u!}<5SCX7 zcMt#-XG*x!a!p|3i?Shlm!*Iw>D82OMqn%sbEtrHP#YWGC3H|tkKIM|KmF%FdhPGN zhFF>ql$4re01OD2)(ziGfjzReZyLad0*(0OsKr=lKVJl#4mYN2NKC{Z1BMf?fP5HV zc8Z;>)&lQ;uc*KVw_KCU&i&C$O#8aJnA8qv*DTx^n~4T+f58ed^zgRvoUh*#rpzat zT6XkX|LNa7|Lnz7KWVJ_dZP%UAKk#RjNh?iOj->B%&ddw1eYIa6<6{^_xdS0@-@%G ztxi4}opnXarve#s^q!x-s3}+ zI3Z_xy%CIMQQ?Z}&@FdG9L06Wu7FMVSg%dACdPgXXc#(DNb~kbajYjtqZ_^Ys_63? z_A_BKZ;wt5PH4yF(g2Dtx4E7u9r#cEE_m=(91QxqWFdr^!hq!w4wPG?1Td%61pQo+ z?W-`^svmaYG~O175Q<3M7`Ri_E_c+7m7JK~t)aNQ?YZW=KwE>IhS&af|AV)V4(U+jUyT zp&3BRc4#!ztcQTvqXy_8D#J-SFtjyvoHHE^{;=O)%&KRR;LIWAngzQdrQcgLU1b93 zB=4|~R$ESGksE)wv47Uk#LinM?PWu9uZ6}2Llz2mOC8cFJU{V&KlP87(;w&mcIUsn z&3j+?*_V3j>GQ9$qsO63g4K{`B?8VtC%01`x7f0{`)we#4Q(e9NCH$7u39;VqiGnCtXYC{3K2G+vX~!gz`3w#C|MIqjQf;XfyxUp zMwE%Qq|XB>{;}12z;>tUI=KJx+|7P~OXJWU&CXe}eh5`Ak7o$hCDM&DXkN@oP1l8I z0)O|zY#dSk(H8l?frf96z9)O8Vhn6k{e?@WnR&CcJ9pV+F{?h>jpL$bS2nej&@pQ?Gt~~JWBbYLQ7@@(%CkRboic(#b&ytNMK86N1^KmmL ztRTV7#|AJ75TR?bTLIerL}a8*#L%0G7e+7C>}9*C9vvN=>$R%5cx>qPXTJF2YlG?E zfAjb4w;c@>#AXy}7x?NwP|yF`FM1#5k2XH|#unHO3ig2koK4yMN|YWCG##xCWt#1s zO=qcbOxgECo_2>ytRe?xaT@Llj7pwP5wpik9w>21(sGqNkDWMxR3_=ZZbc-cM!Kha z5Bh4-pB%Nb5+VUHJ2Rk$ZOJx*iVh#q_q#j?Euud@QZxu=fz;2Ny~D*=kkX>gX!}9c zxy(PjWm;idwN37%9x1BFzaJRG%~7%mfB<_siKj9N1ApW=zgE)C^l5FBMgltB9tYb} zW~jj)DCe&`)u{`D{La{&)HkZJsnrhCnrMilbk%sY(6raNey}su_fr#Qq25FFYQwk2 zz?JqN{)<=d?88GwaXfb2+jkCAe|Y_g38=^I-mZ4?qvAAzplpBOEjsqU@qb_3X?=;L zbw?}ItemtbH5K-mpc!**q3g~u6?dqueqwPx@yF@amFv@L6R6r6FONy2>>jwhl~4oCwP+usTaIzJVKC%m zQz8A@$w)o&xaLJQuE-xzfQ7}bRI9D_NITb8bFQ_BzZ)u*%d66n7mD0n^m4mR3r>!S zeTU0kuPU{H`4!)AdlVfdjnSD)Um7Gv{rX-SSs8k|!ybnN4v^}>u7RgH-(boh1mRh; zMI+N@zCgJ&p4Ex6C&7y4Uep#U%V&TP=gq7Sg6pmsz?nx1EmE0*Q(33}l4Kph{g(7_ z8lTuS+~=Cd8k(q@B-hI_>8Zkr&*SU)?LE}GRt^3wSi4v+l0i~-5}8qYse8N7PEZT1 zeb`5{a*JSFev$S)3}v3kSYXkkJ}ow>%Bm8rP^#z1R;0;ujSo)wCBfNRaQccvdJL&l zHrRILCjGIFCR2E`#RXtd>9-96v^nU|repKG?868GFi!Y`X~ebF30#^vY^`5#`zs$( z)M|Sx>pK@H>eQls(4^NmLm4YA1eKgU4l{s{D(3YI6ozTSCr01f;}>wbicflzXFUY4 zqxpLMRjmzlu?Et?7L0M$z9adSLEK@BT6Q`siuE%U`t+m=4Wz8Ah9NR~Swip3{Q*wTt<~{`sL?n_6 zjcq@4b)rWxJSy9Py(Srl0=Tb-m1L7)yFtE<8-ab=iPgQ+Hs!6zHdD4wN8H_k(`PWi z?soe#E5=(J^s5waro@ebEIo3Ou}K`2aaScbaJqX<*;UDRms0~KJh{Kun~V|1d@c+| z*JtG?vWo_`lT|g}oP&8h`u+@Z)3{<-wN!Js3&f_KyLpUFX2x)5vaxmy?GsR#BOr5Y zMv2z15TRj!=ge>X-M~$Co^^M+8hkl9`1mdSLuL~w@lwZ%42Di^+`RZCiQm7!RpyD% z2ledFIA6K?3!i(zkA409|6;69{rVTLeLDbnA;>XwI5nlS?-?Y`2_JT7e{k#Ho>t61 zE4)UjR(;`@SAOp2|LjVa{jDGV-q*H%ymAqEarJXREpl-0|CxAo;i-CHB+QP`iyxdk z`qZEQqi_B0ALI4*SDyc=?`^&=?RPVQ*wO;^g`a))jRHRW4}Zl+J9k@0Aq(|?42e%N zBp7{KZD+2q;+yKW@OaCp;U{oM(;af;a;n2cJb?Wx$&eVe@!9U3ZM%C4umG7(52)8j zWtvM?a~;I^agzUZZibh0Q$hCRggs*nF?bq5;}ke@NY>F9Ad0QrvPH^2CdHudb&ZQHw_9N+%l zqkg)QZ}B&vlGiZYNl2k^n0M(`h%V zxVGCP9z|D7Tt*uJ<#ucQE-yYOPOVqB88!;DwLGehn3TU_szoC02HdUcK^&I?1Xg6* z$Z7KFz*z$#*G+(_KK+8tgM6A?1A?_cK67gvH%G_P{zg5ROr0EW+Q!SHxS8)4C+xn< zzTl|e?U9{lT(+Z;LlF-p2SM8VD$fni*`I_FdtDgVsGh`f6dg38`TSvc(et=pqnCpA zy)Mz4=-Aeub*GNbNr5a`321(O1R77m05gSD?!5jcG^)@k=kcB1NM7c~+lcG(mZLT; zo0}k)$M-$CNLlG3G8)j#U5arwY7x}_GoOC79}$9BE4Y1UNIvC#-sK>DSU5g1jpZ%d zfBm0-ymRw-0e|B7gLR#paE*$CAnl5o_&dFY2{rcOmng6EiNIXPDjX>tvy8uc_oJDF z7r26Y%D7Hyzx)63cWWijGDcK#V$Z$o0k)_4)D3!~h}jP1JQ$=Obfg(smj^O9#gcMP zE3(O;S3yeFxwYeWGOJO#G6LM>bBmLdWGyqwAq(`u8v!f}8_lMHv4<%e*1GQ*xYub9 z0_4c5J+wPQR`sJTfmuNMav%Up`3mNCW5)%_b}?&hfV{J`G>jHN2jp$DxS;POe5uu| z&zhhShnK*>W1pHj;-Me^)?c5wh6i;lK#IlM&1SfMNoe*p2S`^v5b5ul(I+U@K6m`6 zly+{v20Y1akt}!DfrDxo$qGo0iO%ln0VFsu?P_To_rawLu94UI(HoW(FN&GsWw3?=7;F(5)BC!<4QL>)0x&*tApp|rZdV#PNFc9KLp zXDZ`B5uJ&37!L)$--R5JEE%7ka_=9WB9*pl*@_5Ihw;GOJ=5mwbe(asYqM48IKx=T$I>benv` zXJQS)sm`+LeXG&iHc}HjP>J*cS$uyYMHa9tT)uiHAVDAO%=A4@CTBHfPHbl{7<74D z?@cAmQ5mVaUFS>AlG(A3TTsM4Xw}JC4rn>bkm;0K3T`@ZbJVe$ED=@}01Uou$j^y< zx1n5{D*ed2r_4xx2PDB>g4I%Z51(cQh2MU?~PXVviEND<1U_cWzs>( zPy*RS0OxuN1X|J!$4b{IwT7N<=e7%Ydc>1c-tt}FoM$`N#s{&~NN$ob&MDVTrPn$G z7ba5~moZUlkF;pXms;w+_IldyIKdr#%o`8{laRb{>?}NX`F^g zAZI|$2&gqP7XHGj-Rwr7y&V6}U;gPoy*@`r6TOpLiwoB`KiPb}vpYU*?*W@L_Vd4b z^V2^lXU_JE)uV^-RjvBIf-bvm#8@&!2;I-(iTXc_{{#54i#!@soFf3_!BH@rma((y z_dd7-3M|Qa>woxH`xhs6Z2a%Ndefht`d9z@e^u-2M`!&fre+2yWg7v0hBkinC;!-< z;84U?{o_CXWpC-phr;W4FF?I$ztuZmDedhX9uvSLkbk0Ydp}|PgB5Rf7>xX*FZe&q4-_D9Ru(y_1|qRK zTR}@RfI|T3DpqCHP^WofpCZOgw6s}J>?N%eI&a6vgl%<=vR9P%H=2iyx74ECUir!S zS8232nGnQU_l+MBM}XmBH+>_N^yuisv(?52?KH3`pu;?lP@%QOmAQ09UZlHs-+ynn zUCj+8)DL?t;qhq?#YwlD?)ywR+jUDT?cwEecx%iGL0pBRCPBmJ4wz4k{ zUZZBXf@9j<+3^QQy@LqmG23NpVV*pwR^!C(eznuKzBEhfbI{h_T5+pj^^3remGU@`l{-$fJRh$GpZQzw?mE)Daq1+1vnVHnLCHbF@0;!+55d8$t zdowyhN6eX?_g-Nq{f_X%2ZxkPAORAefvQEn&#)-EHCTAj^VEgsJ@+fQ`X=|R@nY<& zuQ_n|_H=G@sEKqFuq1cFp+ZUJ{8@VCA_zSk)gP^~G2J)qc5YXAdK7WSJnUGCUZLjpRs zJ1jZ{0mA(q8|r(c$V2b&B9}O63C%YgPNs3z zJCAmE%K$Y%%D+70<<@t2M_2OqmLCpx1Y$-xyw zYihV)ho*&LGNC)hiO^RsW9zlUnSwHOBMPi`w?Z~Y8<~^TJ4pbKHRpC6p+fET6)jfo zX+zEC{EYYD(J>ipL0!g!Q$1&N{b4uIIL=$-s3Ds$$g|r6!Vav;p|`A z$;`3MKFKSPvs)78H1^mDpUdv(YZSz|x6pi~z2d`4R16(GI{9e6;;qwu9OIoEhvUY@ zQIdRu)A~GKE#?46uOU+on|Ri*%bN3cjXKxtew6Azsm6z$#$k;GX6?(TVmBmV`{BXt zzBzVUo$OVn3;^jYg3Mc3?fjG`oRDY5d~6mGBFe^@l@9h>Lu%Rs7A(fB7i;PG#9g)* z=}QT#a)^4U!-;^$)F{TfJ8n29{K0&%+i0bHYR3jyzFG@O;XcVQAp~#@3CJEJw>n%@ zQ!XSaUOK7_!xT=OG~0F$8SK4bo`-EsW!g2W+|?Ti&Q;YVALJggHI6490G?+Hoejir z0wC#|Dh=;=kW2d~A5Hjz#!;uQwM?A?uP23v=5ivnD;@e^uD|WAd|}VG!4>B(c`El! z*5}K16d)b{&R)+wwCo>scAiLtk58nz;HRhXom@|gfF?1>@}#jO!0}0o7tzr^E(AbU zvNi`s)H54(x2+I%y3c5?fukUGZHP6k`5>lKHt${REw|4GkpyyrMqaA)$}4v2sF;+b zM>Y~8*-@|^*rV0zft+w4V2XF}n{Nq}s}HiRk?NUoZXormYk{E_4LQhqanX*vGwPA+ zD=E8W>DzTM&}hzF=$)Vwl$!IJ?ub%9Se$k6CEmEE*>@1N#iCZ%YYQR0Pfo;?v%C2j z5Al}0SRF{uF>Bu!t~nz`xT{&YX(4sshRM<07&DjUi!tX|=8_}g3_iHz^h-XeQ*FY* zbbSMY1N(T9`DS(kh1VjF{4c!boSTRQ(q1|9}0e>K-e;Q2$Ub}JhJXT!mTaQ zj=BlqcJl|oJwI>NvY&m1ckPhu4R@J%+;axsd_!a&{>i`I-aY%9kJ|R(kw3Hdote3* zTgsbJtUFr$bb$Hx!~b=*7`XVvr+$S4wR;Ys^wCMZ^+zy@4tkB2$2NbqA78Z)NxbPO_iy*A_fwxON(C&#aHV9$U`vVNj){6cR$hF)frjXuAyChzo)num~YG zAD#k_Nt9i&DMi&UqP4zb0M3wZrd9vK>P{#KZ|*e1t&(bUWP}36r;`_Upt{=FtpONZwOItkI(ws#Z2tr`uVh2?G}G^N z7h<@(d|gtk1NW#{^Uc-Uw{8qliiarm=O=XY$Vr&hMhtN5(t~7ld0Jb>{01|;SLjZ+ zrsos?%I(zD_Tl9@|4E^`?;utrJ#Q_3g&ppf9E59eE86nt+o5so%>1h1PAzteReSqd z0r&tGgA)lCJt_hk)CmvgGu-3Y7cRfRXX8`OKYLv@_=DifpTerF{&Hc_JA>r{IsZrM zf&*M-(u6$Nw!TgkKl|iFtizIlOz&?2&z1b#wOH$X^I)qp=OTQm=%FzDDfji`=Iu-C zBe|@$r+hfdSTBtz3Ng8Q=y^9^rE3l2^3?RW+g1Mj;+lW>a28??k4_er+8ec)y*FIF zhCnAi5x#4Ee=>%?sQ&7Yzx(_7oVKS(*X+Ruf7ker-o5!>{_N+`H&r^|n$y;LE9~=z zNski2CmwCQMSEonyYwsn=1yIlsx}Ir(pI*dO6IWBI}`44ZPak? zub}NW4@$ z2TSki7jlyzap>6}eS30OY;i}_htVIw5#9Js^ytZr378_bd)hal(pslhaT{yx=hcTF zBw~y-cuXtc{SWv9Yh%sK>b<9)!D7m+c6?d+(#j7u>@Sk$%LO zBL-d1ti5&Fb9GPr)|02J7R#|lkXe|z%A(w>7*nG?Rl5DsGnb>8=AMGg*}ErM(#CWL z$JLiFKc`RCscg7@Cba;5z&)P4$gvB*lTNoktzX4SD@#lCX!WvulGPlGO>L3#l%zz<8F1)~z!Y4()o72l&-t@jCymimetRPeor~XM^_9D8w`b3Oun*^o@{N{3sa>W8R7`He181)5F3j5zPCW)07`D z)t!1L;*6U9<0U^Pe!|Ztz`ozZ85_co&LyM(z~ZOw(TgeSeg^XE&C;tRJD3}eIfE?; zImf|has{lczlam}XpPk@bS_gYqhCsQ@1FLN=~T8n^eOx?asl5dT#%#(LuEp#y+vqq z6K1>2)A1O3ltJdf@c|t~%Iv^h8wJT=VJqyKc_)y%);+El02>(b`*>9voi?!rt5ESf zA&Ei(R1{`&)Eko}4@I*U+Fs8rPsl~db*n7BiYBYZ-f`=usEr~$%MAyLg1(K3DMA<4 zI@qD+xQI0M4diHABW=QSeG>Heu1F`htjV>(XzZLVcTb~eslPxgl_|iH&?*JcgQUEk zA!=~U;={?-O5mVgJx8Cj%~a3!{9$*l%UPXP@^#>t9-_>+AH`fJ>!2!N<#8YN+fg>u z?+?u@4P^w@ih3wZwbMt6a6V`|QqSyU0?0r9IOt#-oPs8tSkKeg1H8X@xzYcA{d8^Y z*j|)&{d*%|!d00(RBaJI|4Q{P+H^L{tQ+s4to#Ui$Gv__}0MH>tEQQw?@c^~aJ zrgrUb-_TMOEi61|dVf%8;ZrzdbZgZ0w*$olP~7H|1@$CoGV&hcY!hHgGq(_^f zZo-wwzu?J~s25(L^jn1T$|xXOkAOc2Y`6Zr^x^)my_(4f>8QI@**U0v>_hzNM?Fd1 zF^~QF|404l>kl4W?yu5f`OyHp9Jt7`R1!Dd1&dD-3l9_Zn_2r)quJZ-?kizOFVX~V zVnG;-r2o&^mH5HV$}G3{VRy&i@fpDtc9VnQbNa@uyGY_oacJtfEB7lB#!wqSdV>ey z!4_>pMX%t)OCmX8k4_d7j_WLKPdT-Rp9pDNyk>oJ=V%=K2{8PJ4JddHifaV_Ju7pC z;6}|eptv+v6^O!Q2i+AS-fMKU-4@!+pjY{E@$rt3cdnxChUWH7UHZ`pNFLu-B4kwc z+f%bZ<=&t=V@`{i#u)UC;-;R+j>q(Jw2>&R-Zux%P8I2@A}GftYD?_$<|H}l1jeH_ z)~OD|5nIRAg=z>>0z8b??eyv-G=j}US~H3Rn1%fq0D5c(7QOgn`J)YD5kp+DwgM$m zowgxHIUyu}mcITD$IkL6iu;+Xp%2EQ-$CDMy_oJ*K-YaqzDbwvXoLH~>v?1wl-`|| zeGS2SH1U-<@rEDFb}=H?JuPUH6LZr070@)u0pw_8zV{t*SRfql~jJ+|F>gNf2`@vmU%BIL`hX(~L*V*ra;dH}K zHS-~OPT#A?XZ)=_RDI_Y_!+(X6iuJm))taojW}sqWmI}G59n7{-0k*2m9=2pQa|e$PJ_;hw^Qjedch!;@_v1 zLtb}aiLBjAO*!rs>p)v}Bgj!86glE1tX2FenMXbqI%-BoVe5QGPma6a6!!-;343mJ zlRpNZSc{(AI{VZ#H?};bBBL#iStK#cwF}}ugFN% ztIhvfIz9@sGH_BOVKVRAYWDRAaC9>L-P>3PzsLmltP_7Yp3mk&Hb$@9M~{)1lOW z?44Te^Iw`;oj1XP^NwCQXEj>1+3mP-r)%;;w4a%{!JXV0e-?KxX-m!SP!r)kOzbi z;rBxPAA9eYKmy4N5E6pR8k@Cy_xjv%pX1(r+TFXltGcSo|KB%1&Vzj<3lVOk0`2g! zic})|9R+6~qp-X_)5=JI!M5Y?D?)*&<>ipcu3if$X5D2<)Zrn(j@W%0v_#o@8WK`O z#Wbqh`XdF_ADiP~HKo%S!fHbsVKHe;g`eh4r-rUat(#D|y<>o_;ZDD6sKbLT1427= z_psf!V4WK82Q*o(VP+d+bn^`WvN)-(7>pEDZ~l{>&wfZc%vY$zA3;{1cqG@UkG)+p zK2GA@fdS1cG8h=lwm^>V$pBs~Gl7`t^h9wRNH$(@6LzU~^Gv7I_5%}ky~Y~{r6X3D zp2m(|o~W~H$;&xTYqXyYv`{d7LJfRzxHQt+snMm0vxi3&x8iPhYw{_v?k2u@RF0w) z-=xoKeI~*~k4y9inRu4|;)XR3uWS5^|Ig4bdx1kVeYFfXev%aIqS?=@j$+ z32BO@6#dDSuY{=nsDHkNyAaI(Vjg zLuI&aYVMq^)O4dK2X<=j1MTV*W2I*};nj?HYO^u4>Mt@Ld-s5D1AD+nnEs{o$M>Ie z$1gLEoA9*gcJ<6Y^kT14CdhlmLr!Pds(eo@UgG&QpFIoOL3gbF{yxeA9UpAk=)k{v z`%8cSr+zH7{`PnN<4|84z5VjRocpbto-Wh+?swi7MYhixW9NcPxBo$!e^uCeZ&8E4 zx=2%jujX+>Ir*aXs^<|N-O_)$xUj`-0J*RyL`E;Xb(VrG`$NUe_qRXN^m(qL-xVB| zarLq5lcf$hakSW}f20oQLa!Us0jl#sCh98J%~iwEyx#`^Vycf`_KWz7zt}%)q_JRL zGEX@Xgn47`;yPxEEBg(8W@t_My@%?a0Ajf$XgSnsIV1p$ie?XG=Q^-uI4F4?-M6JXNG~`l?%I8GLw56a+0{oUzxL+r=^y@e*nY(yhk(|24fJ(NDMN@> z?30e|&ArzIUp9tS_WH9Oajl?iQk-qCl!dbwgzAxJY1Y&j9lpPZk_xZ~$F*Re*CVP_ zHp9RI;P6KI=A{X@*$3KRM0O@@|GB*J3ED;8@z?4J7MSUNzWT!*HIDevWH+aCLBpHt zgl{|eM<-HzrFB|Oha2ZvDJ?Ok4rnPk^KMX9nh-zNLitvEHKOpr(h00rd%bt~%93Ut zpSVVBvWz`gwEUOoS@~pO%Vv{4DBS(@@$h}}sB>p<@P6i%Fbe#;moML3c3%A2xh)_0 z*Q0}#i?+*wOaD86*O-`eW$7*|+v2)302)Ko@MROgfzakD7+BMMX`v9GHs0H;c?P9U z^QIu}=ru?$*VrrcfWueoLTw&kcnkEdVZsqcg^D2n*&klNG>{4tIPVA@d z87~GyQY48RKT=KVPsT4N4PJ1%GH9A2ruNg+qp4fV$M-R3u9dD2;%1NzH?sMdg))oT zalyt9G2@bFHIaNd1Xj-7iVQ1Hr?vsoXLmUS^=QE|)jJ-vu8XcJE}<5)A$@;`83O?n z+*xJHT%UefX6+}LzW-UTYj>JqQ629tzqtUdg=9wQAw$o!>D^TP1~J-RjM~Le9@Iw4 z@VU8FMji?W)i#h02R~D+oKm_|o-^*l^bE^0MpM&uYAl#>Nj$>p3lQG5npvXk5==uV z&=m@~-W5-%)vj#=NJKdVm5#z&_%~pFxQHg%Azicy@ zN1g_H0XCRP)Y*CuXzp~SjzOdRql;ok=|>`~h9!f&M~(zTCH$g2w-F}uiq4>IPMbJ4 z0!zx6mVzeRMyZi#7Kirn)w5aYIahUsNS}-X~7Aw1F24R^3$#ytX9+@KBh{n$-Ll#1>|9YeFsu|wJzq4xv_f;NCE?Z#v;cy#d1#oc7z6~s9eGYbyA`P( z(ZHt`RuTe;wbabG@O4*+9b25NkP1eOKh29 zybVs_Rj6Mp_Zgc6fgToapw%$$>i~TdSQEk%OnmLd;jJE(@9bQzAO|h&&DL1+*3RP& zW1kw?_v)0O7B9H3+Get*QHx74l+z#?A-&i3 zprOW+*OjBp_Q=qa&F4t*jsXrEgTw^MZLkJcA%_ihsA-8#kUD8hySNwhlk!l-N)uVq zP*0i{$NNl>I&R`!U=JG@<%>PP$2GM~jZ$hCa!NvHoDlbhK3gJ2IVwvEqIh1*7;O3o z5u{K8PYhK%-tXl4X@?|>es2P_`0=qt(qPusZp5lwr`R33si9&Knlx|kS!zi-hHM#1 zZc|ViSj6-loT%l>w6JOMMe8skCnNT@ywxmCaL-%9&9r?4vDRRwzBGXfX9T}x7H-=} zG319_TC3AV{Z6szz{4|Fldn1b<}aPcf9dC=KYMHGHsMWh!Fb55-UBwm)X+C{&b{iE zhI?tR|ElD{JHzZtzw11D{@Ry-N@JtAUucGLpy#j~fEoK6b7y=tgFTPFw)C5qP;%X+ z12HSXq#krO=6?~odiL5<dj5sm`{F=vAe^=8vCLbXeXmtcl#fHgP%gdK$nT0m2`ssF!h9z3 zg)dmWyxV#<^!4+;g&tSGaP&@N4)ZQ>3x_^7bx13FjhaPa>Ub(Q`Bc~-a;Q%QQK&! zT=Bt*DOq!rY=Qom9w1qpDL6(i_06%^%3#i5>9hy;UgS3ij`s6Xa?pGjt#`!CQ>OT+ z2uLq{1Tmg8%ijy#tz(UqVFewLJBt5=Wo8x@fLWnXx1dGb*&T~ZL&dzQg6tmoZTCmd z=sW52_rh~@+rLBdBdil7j9Ei9M4uTg4Zirb;OB_^|L-ouTAXynJ|2Tf>ir(h#y=-YNB8 zoqgRc)=V3FWVQu-k`#aM)6>zP{TyHl_v_O5nDSoNbut?vBu^Usuc>)Y`(*0Vz|Z~B zFRuLD=ed_&;0Rb}ZG_xMR2lNFRgyMaQ-W=qZ++&Mte>80zS7_f6(`x7+&lP7$nAYr zA1HGqZ*oQCG0*hr{as*K9&X5*c$m&0Vmp-J@)%k6oS$I)BV(U5K{r~Z(`{3?dy;we z@KwEXB3sgJ%_3oA5M%)9xq*FnZX^ZZ{s6X-X5gJha_Na4EN7e*RavVSt4DyY;$sJ3 z9V?g5YRTWQd_BZ*WmA_B0p3UJJ!2$bbcoM$2N)Z9`u6Sz$4~zzRmnfN@>?x1`am?chxP8anGgSIR6=|=XKBWxwl$$hd%+&n`W?u!B1Ngv(DEt zosu}S;M{8sVtsiADygzzuosd0Eti;ScH^ne8*eV3@dCGom+Fdtmp6BgrQ4v`oXv{E zSiiyU){94OHCy0A4>3ltoNKwP%AVVDR=QT+5(WRDJ+rCr(2rfdT-pFNv zzL&p%An{3Ix}*9BSOjkN4G^G>2S6veK#Qfa1~|rmfruH+DHnsZB;DgC$x4e~8qt&Kt_74n$qp_|PhmsHC{6M~Ryj1cqGEG613|54!v33&9(9fz zN`hW!pG*pKc_^c%M%dL?(d@EA|5!uFQp6CoFS>aU>JQ};2W~I%MZXk07PV$F6CLUJ zUf7WIVMrS(l%S~HlpEs+gMs^8*tDQ+xQx;2iFDt56bF2(x8%BRr z#Ew?8IsU8<9?w@iM;4%Ndc66{een%JYZ|460x^zE2{7fy#k|uz)l+Y?uIGT(%5gy_(>#KZkbpmsVl#P2J)Eembwe16#oPdmv?!gF zZGhp3``Fc;=+Nkz*|5Q@%gX>o4}56Wmb<(KG~z^~)d+*rKBr=6(mcx;t2UhODMd?p z%EnYWCY8<&4rlQRylcR7W+rQ!UP@Q?u>ovtIaySNMiaXY&V-to>{>OjKgUv?*&ndb zTIAB{|9S4Y<;}BA_olsYW}u+X4pmK5gbdN0=ZGEuN(a!F%bbKvlb`Tg*YKNF)3`9M zn&l_XsPk^uJ4J;@m0g}Rsg!%Ag?~)iXPR47e9bhfb$PFd)bS)?D(GFY>b3I%i$r|+ z-Q6X#rAuGc7WO?y@2TUrFx=RexojdNw2qup?(jB_O%z$ygAOgc0$EK(6^)9hPZ`({ zuy7(9$+ph9v&9`@2{u_g*5II9c8dLmeITdsEm>L^lY?SljP7HWzMM=L4n0Q5nVG9z zt-flB=*ZMjM2{YoPr5d{Q!Oy#W*Jnk5{|7?v&jk!Q0qak6?{$}U2mFY4l+OYvB}0m zw1eP2DNo5tMsga?)YOLCu0NDny1@j7P0l;$x#k>3uTwXK%s2nhLj1<+)u9yZ{nS6+ z6U#6D`Twac2O70O7gf=0)AuHR+>X|obGN5FL+FIsyqig0L8fVmq7Uxrp2x!%pw)=&`8?OU%arfhfj0-vn(8O* zz?YLh`_FEtul=~x`m~d^tN-7YnlNAR{#4ZU_IKY7uH6dXT3Bj5+1={!@lTxFRQvmh zKahNO41b4nIxq#zop{^AkF1V?79eM?gjQc$a0@typAKw8x3us8NUe6x^u_&fd$b!K zPncZ#ZqF|YofE%g&H<;{f=iA9!B36!yVec%P}U<7<$+pTG~90MEjT>^uDyx<*>}HP z%{1ol=D-?`Ei8X!S!W?DYYCh*P^692Ee5rDH@5brd~dY-5H`q5wK2xW7GVfS0w|;)xjQuvDh4gBt?M9kSj|A*;c?y` zohbGv&B}@7hquX|1!JiJzGMJS?KP^jc1jdNW)L3xj#A7#^~81U|1O-Aq>eRCeUc(! zUHRdk*1hq+)E=1pugSZAbUKvd-WaZRTCM@T;lrc0zGD!;%qj1`c{BZC#-{})|NiGj z4?E!tgoe7DV46HM#6KB%^heQ5`Pd3f@SEpMGaEfu+_WihvrK~#Va#!q9xgG3`2NRz+6T^P49=`s}zE^KK zoW_Hg&wEp6?bO$H+aIaaQ65V4m?ZN)8A1Gb@q4zeNmF z8L-=>`X?_?A|B=8Vah$<9&Hp|g@{2jE&PJx!`RQyepy`miB~9VGTj)vJw2Cc{8#8# zf-e(_AAM#{pqV2_*Qvw<1E2{nsI9wxDcZ9>NrPEG%@nJ~H|7oBPf?vE=S<&Xb^%_G zU>$J06dmU~!5X;1n(UJ`{?xWB6O#zO74E zZW;)4z*Fo}pm0xIwT};2cA`JM0dy#;9oBj!6!1l$!ho3Mpwx^=ZwJ^3J)AGKrCWsm zrBF4(l&K}c*3PhTeK`G?amU*{Q?!^-kIO%<+K-k_jC9B$w)7P&FQC)zsO*mx?mhm~ z76xI5?dm0m$u2gId&@EJ#g4t8dFXFGysOrGvJ}zsPm5+g^mvzWJ2MijB7V|a+ zT@SWEy!eovt7Z&+XVxM5M8f7ZB5;?5&=DJpuRdq@n0AK_FP7-oAd+JQlld6dl5n?U z7>nwrLSBc5#V8=*(57G z;o0$KEnyb=pCQz?VZz{jf^LRTw<@eS!CnW>IeAMzZnT2^6V*8<9gKLYV9sDx6cTekY@8+oCTNl%j{NgPu|+J9#&i!eNdY4qXWRRZM0EgN zIK^2M2s_ez-BCVA`+5-2$a~=I}P{k{Gl@1MyB1#8uX&B$U@qVaXQqV|5anmmB8ljRUmVCPZ9UEo|0p&4nCt z^XcLV{KV~jJ!t3`8Z|sTAt6nL?Nh3?e88Kdt5bulI0Dl*i>|!U8R>TO3F{(9zA_0e z;AyKH{;EJ_UEWo(ad(_k^io&6!X9B{D<|hZM>I zhAIFj#R)%&?5NH$03bWwm22T#VX!7u6PXbsjD>0tqKpLCSmFcs$0z z#-!l-fEr$NS5SQk$_&9%BsF^G>^tqG{0VA`{lIee!e}q$b>SuVTN$|&|H-RiRT zP5j6KfN)mme=z{1)`2#=1e7a~Mmba#aUH-nnyQ>^Ka2 z&W%@Yg+Tq!&p$r6dvt%R_{6!nT(Rj{R)_e7;v$Xu2K;mS^!cgfEFWGOYWncPg=ujtU#Q;1Il{cdvetB zel)zhSO3&6F9}cW_b%f9_V(Ym8*Kl)e0A!||4x2EQoc`P7rkG2xw7>bt>h%`J zz(4xczkBViUwT;iUbK^}sl*fguvQT*ELgn@`d^cF%b`xT+DBv#;i{?;9aFwD5>m8j=o4uiH;Yc7JDxaQ(~xR+#D#8ls-O6m+OBQe zmSK-aVW1ftk66TIke#YUjfv0(y=y=97NPoie1yBe6VK^r+eyf>Y!>Bd`O=qs70lFq z2wf)^8`h$5X*I%oebI(tXwJ?RZ2+lF4Q@+cbA0r@lgpE2rdM*>XKa_VKTvIN2KD3P zL{tJBZPQg$?8HiXI>B8!ym38p$2hum0u)x>^}HJqrF!CKz*99x%8a~hnWcaFZ$H`Q zKiJ*$6e_=Gc$4GB(Dh2>BYCPSL?yBm6$EshY(v?b}w}tM`Rtiv5Ih%k{6C= zDNDC2_={Hki>?~%vRYqgakCBbyskBj-jVALS;|WPU%&QueDilLFN2sTdM!KtiR0j+ zm*j1g$F1F6Ek+xb0EfZelRxv?0AcU`(W^2cUpp-yhvdCgynP(5wl}^MC>2rb6}hk@ z1gUU$rD4f+b+=;N+cuGWxvPUK@6Fwiq5L`Hryj*$%>LE4-mjbESs@Snk8ch)KG*vC z&n3^`it?>%ctg#4xeT}-ucO)&I9RQICrNoIX{zt+70D6@Rl7&SEszb zKWXm$hyD-X;0m2TKs*NYQ#AD415O_V!&5L#OUHY;qh6YdJ!7qJAX{LnIyA*7-;vUb z(dJy4wXR-susLofu4v=glnQkpDpTVevbG!7E;KhfD_6}#&}zWjuZ!H1a{;G!-zR=f z*1gBr$O)8Zj6l+QDN@Y+mlklvJlC@hA8W_K-!nDCRnUi@%nZXtz+$)AnBnVYBfSc# zoc9~vPvNrvRNU!R`>TL;F~%ay%`uAlbMJp!)mMjRHdG5GeXr$}M?$Z%Y8!QMDZ>qN z^c=j#1uYm0&7^?dwQvIr6Fha8h1GWKim-T`37%)HAv(hLA3 zwQW)3^Hzz=xxzu03r=rIeX!OTTx}Z_0=&mk16ht4L`Idb+WE#_uYeW$j>k*PjtiAhW& zSVIP3LGQz61A#flEr&_SsG$K~$_$A_)N*?uZ?#<(SD9%#4K_$cdIF8t8F+;q0<57< z8x+Yr>0n+AdTad%fXfXL2jnjAHaIBLa6GEAV9+N)B`aV#!CC|DhLbzej0~?R=L()~ zkcE6sh8xgok>SGLZlQlZ)-7_3P;}`x+tFHJFpkWPYCo`B15_i2DvKnmDSk%fjB1c; zzEmSt|Aw|5STr$ByRI>!-8!}Fwir^olW0GuV|o*$9f5$X)u+1hfjhR$W)5isDeP{Q z7a~?UaG_ngAM5%F7u%C!A#)E~!t);>LNFhrRy!yAVA5N)_1sw<X)LeCVGexku-=7yE?DMTdb_pFx5}lJ6*l zcOJv<@CdrAkKYW=G{C)RcRh1BN=zMp(wW9skd8I6mUEZPUm7}e@pR;!iq~5EGr_5< zm>t4i7bx)q!7ZtEn@wExmLAmHbQ+M;JG#EF-F9E|89CzAsV*{8B&o@&2 z-+KSwg_8bHk1wpQz!mdkOt*@5{tzJF`Q;yu`rqwmh15heL8baQ?z4EJ_A^8~^T`L# z`c;Vj#e_8!`hpNVeOCJSsc+Ab{(r>01lB&~UM`s4b(M)`L9A@zSim=lUta?@YFQ0C zx>E`8pZG7HZ41UDM!H6OiQMLv1Dv?5jC%7EzxA)3fA7QJPyg`$`qFQ5XPe*tf4_ZE z7l=>)y#Qp#`@mHOR?`Cr}BX^aPuGhhSIXvxBln*kO63w2+Zx%V9-I;N0pJ!W>hWb>atDFypqql z$s(OFLAIdxK;E4luG~LJQ9cuBgKb+M?s#>>gh|~PJ4akUa_JnfpK9x*f#8*95T2Li zp+8vJPGh49^Ndb*PN$4ZS1;b0eq+u)E+C3gI~{Ck9>?mlY3UqDr+0FY(+*3xbImlz z+74?^C*iE3wr-8Np}Er(COuUgi| z9U@unon9o@j>+!pG`9=;rB@&C+|3NDT>zCZht=fF^i>bcy9jhBTn$==j#b_)u0{!a z&6Jtw2m@o1XHH!?dc)llJ2A{Rq7=nuk_gqc+ip;ehR+RJT@GIUkSSh1n93Qy&y?H& z+A!d)*REGLbFcHxJDGFQ@cV6{n zvj{i?Sh*_o9s2tGrPV|cLSKVrRTnd!<#o|N2LEW3qQtusHD`%64f2 za0$|fhJ8(id%(G`UVaEX6LxHG{^GK5)Z6<0ku2Ef;?;{^m>PHJPx3F`SUB5ICalAc z@7TT&&1GBk`08|~aA<}7eddw16CT`hT|2BFd~ol?@>AAd=J5ZqA*(}@_ac=5^o zFW7%Rc%gr?Suj$9Q!qX2RVL0{iRfnzPBTZ1@);s*!auBv)HmOg++IM}B*oFcvG~RQ zC)K>GL3YrmN}iBzZ$#6b453vLy73`vb^okU%1DZ2~6Q(3_C)v~(DB&o#-C z2%Bg|APr}<>8CL=SojWbvT7jP2l1t4u50fPvesAzxW53epBmw47G<32nUt7~PoXot z?!BS=Y`F2VJVB5d`Zzv7_?c+qq&Xxh$Vx~(LLp}y>%i(G`Z7u1|rRh1kr`UqzK~WIN4P%FKAB5VMc|lpsGU9}9aIVzI zxj%>Yj+-^2N!X_+YMBDyT;NS7gOk9}t%HcG0w`C}r`j;sS4^afMI^zHBSHjqXtj{c zQ#LXo<`ompo3S~;T35r$YD zyu<d3KH$T{bvxUHn)4Viu zPMz5*^?l3W=58Avn^)aL33AcH?Jl8_L4tm!w$F#{r_c_ky+-V{%utReou*y}xpmE@ zKe@+Vym%ySA+U|=5m`2Sz8+D4uC$B*O*D<_@u3^Bv5+mW-ISMJJ>~saJrESTR*!JP zcwbi=+wK6K58`6>aAFRp1~?BFw81=_7vPwqQzdLu2XJG;!Yd^D;%ISZbk`O3SmI!z zG$(Zmz1CpCSsV&s>fB^q-dZf8DdLRkDdSj9JtZ-QHZN0Qcz^Q`n7g|M(+V;ZzZAIK zBCFTk#PesP{_fUChn2p(zp;^b`<$@X8PGs;5L72CYpvvXcjg1rPER&oz5Tff-k(

yGqB?Gjl3ybDo zzQUa}DH!sm?Y}knhx;F%SvlhekG5TIfum9^Gjw{dp&C0?p$8Y}>c_(-O&;FO< zb^ecE{RVpNM)t>tA06H=!HDe#-EE@2Ur>nlp?TRcfeaD;#@ZX9c}wHp{oy@i+Pug` z@sJrWZf!e*E{im0^zfj)54m-*Fvd(;>N%xdIho6Qj^P!j?SU?xvzr@L~3?Uw`e_ ze(q;~_DC6cG763-L^YS zW)4skP(G<4<{i21gn&bf4FC)d(L6k6mkH_6Dha(%cyQbf0JFVGIoa)P^gRGoKeIZn zjprwyoqyx@Ez_cB7@m$NUxGQTru?n8+Rw8`p-3T@<6p~)DdCyw3B0-un(-KlaJnH&s|GyWycq=rA628|a zGfX+`zo4Z$@{B%Ro_}MmeYfMVvYtSBiPjVUIE>dEi7cKM!X`68XszgOIP z;~c7QUf+zoh$-0pJa_4;caCZq?#}Fnj)D${nZ#4g^5y8<+q;b^6Xxplv)HKva0nGx zh~kb(VtcpKJ3S6vUnd_MfAFF?O&_?fn|vTVJ_reN>qV<|F7Eu|&#aIWCh}SLTZ6*T z9a7uUyrnzCCDNv%tO=H-neTt^aQ9u_M2>Uooy!&H>bWHX?ba$DRvvAzHOt>4=5qhc zMm_F)uUC&4T*|j9N2cu~G;)noUyYHQF`RPuHgs&6X7{r@z1*UE{vZ9i0~tyHcuQ>fiBH|Ft+JXvg`ZFQG4izbyC zdBo)x3uS*sYg=BjJKy`(w@ua7&|)$UI8V|oWa>4|Yr*TV?4&^|XRG@sMg9fQmw-gj zKPXo@7v_PbN{y2J9H2*mcE^^8`bz`wzjfdJ;`61_m?W07M*Yd+S*Bx~3_#mBl;>fXvhmzbPgt{HHMaKvf3)*CBG01caEERa+dKl7|f~_Ze z)Elt==+cO2INjU;!GS^fgizP4F2ZU%Ri1@wZ?>QVV2em0{ zn3dkUGII-=zA@hqSK+4sH$ce0Rf`~YE~-@81>mp|$M6|`|e$gikaQD-O8Ar zup%rSmNO{^brTc-v+aVEk->Gu)tVQH0mCUvVy~v11sp{WMW&gm)L+ohI?7L)*YoCL zV3;h75X622?r_Rysm`G?uLqxEsbbSl+C$0sD?aZv)^zqly=I`sXC4!K?0Bwq_yokfo>3c3aQS z#xHF~ap$g5NmjG}nLm2+r!`UsFopW5x!1lFz3rl%a1!62bYu5+8MJE4=QHN^i}%0y z)_*S|TfM&Rt;^B$;U_=$b6@!IkB0yH!$16I>iihn{O&H0xH|n8zxa!v`n~`B-iz=3 z?XUcb+ovf;1)h0GIdodC-nx4FZZzkDOl{U6|g(;XnRVl~Vc5FB%$O_eyi^h4> zWt<59ycziIef#P+(&>-Wt;+NMCn%0K7Gsx~n{ytRi~!^!1iY1T=e4lmo6@|UHiZJ-G8vX^&mvY@0KeDw?+&p!eH|EN;^>iAV44fSt-Ff z9v+hmrt^01ys}xUu0qP*#~qO#n|3rh>@Ey4<_0*UkGAI)gQecEm+H=43-3Pb`#+^1 zrv#BkPAwHHPSyJYt((Yzf0}Ii-}uUMyBt&wi*5a2Hy=F%^IKqj36N{XX}c}gESYqm zHE;DkE7uJ%(`-MGG2|#5l`10$M*$~WDzh`O%b=-K9@L=_hdkA^RVem6xiddr*w-{N zD67KI+M6Ejze~g>2K@}?S{Da|Wf^I~@m?*_-=Qv_1w z^aS8b)8-1>mCYf~ActRtEHCkjt*$k3B-SYvZY?B#FISnFA)-x;L>k4A7`!{h*-WaV zFL{Y|wVJDVr0${TlC!tV(vyy1+uWUS_)`2&@bR4E*gTvFO(_Q*B)pHszDPA1ZAEgY zWPD~09%l-}i2F3Btzncj%EkO)M5FG~wV!K}$7qKOi@j412Qo|TNp`*~LvX1_d?9{L zvA#U)qu!C`oMlcw5vEw|ZQsDi*Yxov0`rO17a`NU986!`UMa>9C9AN;Ubd?HJ1>V9 z%yMpmWQ3)!(@@1^Wc)%h+D8$!%$IDnm150@lUo(gfEuhMBjV*1v+2NWRP+2L15ydv z&86gKq6BV4pz@RPBqqGl=oNZQ2+P%r@ALXa>AhN%rn*2RF+O#AU$y4oIr!%vH4<&@6r5qKkcO zFvf=UELU~01xes*lCIFSUcwqu9dJ_#+nM+ zW{m5zOHj`Gx6>!&-;S-x=Y?nA`_W&sO*SmUouO+)>VPgeCM7Pbja7jZ1F%{$7*Jj% z0AwKAfk4Bj12quQ zGJ1N#K<&ilrcP?0H8;v##m;BolSq#{q>YjePagUKf4^_wkWi)2F(yc`AjwF{PsE6E zz2Md1l&l4MNolFiSGvJvk)O-CUwB=U zT=U3Tb9B}wCXqv-5Zl#jmNGDaXs)Fk!mMRz=}V(| zHdI42DS<yIARUgCXsP>O80q1D7#qTYYMGQpK^)V>q2IB`aRl4k~4ZDS`30LsG%& zu*8~;=f>U55n@a((A-h4X}~2)_2>0EdWM_9&VX!;557#bubjg$Os7U4~fBt_Eca*MF@6PM%o;8dOoC&B8 zl5GTZjlcgCG6&(%FMPFfuQ4e(d~o8u;$Q#ZTOW5(`hqtz9$V7s0}h}QZ-$rWMM7Ik zu*0J13vTYeX!^{*|CQhW!-Mba-~E#!c0otYZ>}%?@sFP03_1Vsm%sX*kDkB$@ZQ`T zm&4K7|MsIlr}gru&wqNw^twqn{UdWGpt4^ID&O`>4PcXcUr zIdSf7{e7F#3cG&x#l7zp?b$c+iBiJ_-CkXz&vY9f(M9)DaOfM46^BTx?fV_u1t;{h zUh{L^Je7<^Fuq5JvEldR!=AV0bl^F7ndH&*783o)7)cN~aH6%2c`Qv0M$ z&w(G^;a?{*P3u=@+5q$Or>=+Y_|%F#|Em7o_kQpZ%BW>~-h4OSo+}5A3@7#}B+-TY z4N#R9PXor!Pq{{~CRW0R5^C0(Z+ilxk4kLb&~x=EVIT??krHEL;e`(kXWc!AZWkmf z)^4(fW?C`MF8jEEgFEii`Er1cmaYzrKd$M(Kl$|kt)1@v+&`HrmM8NwPiJ>ToSocJ zn?i?F6lB?h4RzV1av#az%+@R=&pLvk`eHM_K?idM_egB#^6 z+~!ozYe7E!?yIfqKQ|8^%|MeUKX}1@VS#eCt~`;SL#AN7omGOY1fDlS`H+iiKZq2% zMSKNqGO6mDN_O}wq$51*axwwYLK*tC2GF=ljGl5oF*GLrX@vYpjoH=XfX?gUCq;fg2v#ysm zb*%+g2F|fDj|2*@>Q-jV^apP3t0!_-%GWhYGE0nPc2B zI4r~wOQ(7YX+gqHRX7VA$~pnXmz9OlXlv=xv{EMfg6vM^JF{pbEso;qc`3oTdTbHX zCMFj7(YsF7kCM@8UM!u%BF#R=S^&NMk_p?oc9qSWR)-mN+AUQVx(DaxBMuHMrkJn8lYdNPhY-4`}DVQuM zeQNvgT!l`}WuJXg?4V|i?L~zsLyip5V6`xaO1RNH>aLxoiH5n98RgE#TnuX!ir!JY zaB3Z2%ZgM4NJ%2xwKFA(LNbtjEnL7_Dgs&`NmJ9Sh0c#1(;oo@v$+Bsxce+vY z!3pFNkSJzzsH)lhk!Br?&dC(zhcqW$>-2fX3UAeP1v+*6uiI-7)B)MgFf)dUYE{zg zwC(f23~Tb5iqBukM$~A{9zOc5|L`wjE}+T+(@{_6u!k>59T*0-e1M$;By@rhvwGj( z2Fd`Sqrg>|I$TWJj+zQpQjGTvXfUmMxpP$Xl*}E z7XWlNgg$on&!k1zLHC{g6ZPipumk$hh&SwV@*?8TW~&PhV`_>H@-5!wbZof=twGo? zY^g^pqjbggT7iGGJlRR_IhlenutspbRt}1R;}O(*;U}j>T&CcGdYEvptJU6~AHC2< zH6zwM?MwDS620EDoVtP0!<5elG{6oZTRNcAvCjb>zZ7JUDFxd$aFpmB`3N#3JRZ*)btn%6TwX1@w#e?YQ96I$C65POm~4%$mUJ$cEITh zcSC@?Unndc5XUSJ@~=yA~Or&OOXDr`64ipZ46s(f@t7b`u%#Kehhi$rGRR%Xy|L zHga`$ApzJ7rU1hPCxd#p69~mjKdFCEzFT7~dcW9ls5HJrEr#v7-9ZhITSoJ9UN%E5 zgNtJ`!Q4w`@@w>n0outH*;`pNFi#~IZg1*xkZeA?Q&p*UbutvMG{&&M(|Lrl?p})+ zPH_`;N~H}%JHh|`dg$Oh1#j?K_k%u8f+vcjA8Ut?ob|v^Mu8FHGLk)4KXh7npaRq9 zj8elO)#lsA$76l{61V%(xSR}9)$x$vInQVG=1)wfu&4zng2s>YDy;2B)hKTY`RMTk z2YlOHwV3)2*40lPj!D25xj3V3BUQ)u?2c)JImY2S!}iQZdD!%un^sV9U5e)RJ9(GG zZ7fZc4bbdDrs@~?dwyjKI`1E}Hgm+>wCPksDC{MY#zi!yUExR@JbJnl$od9U#&XHg zK?PeaQZ~{q2BIs6qjfWfQqr+REphNCVrJGpvj7M?7wOWLEmS@6@uB89%woOEC3L>t43s@KBjS)r#P(%X_mm)gsJhCefQ_X*rRm3YTd0C`%B%a?&`REBAzj6d*~P+ z9I({uGI!){xOl;gggz3iYfD4h`!2(*kQtSX@G6ZeVjm`5CxwQ0)^{p;5)K1Z2C@pv zuuVzFaF6Gty)y@xDPr8#dWEugZGs-Q$`(4Hwd7u?iFuNx9~kG)-ci!$JT9RHDVs=s zsl2E_Rf`NCc{|2N(1i@2V7lZQdFKqP%FW=A3n@B>bKH)w)-Rbr(7wQ#(5EL>v zLItaS;Nhn-wp_g*o8(R=e2zVX#Xru=OtOfdB+v;A9byP}#KkQ6Q#4w03e)Wx#7x3U zu0z_&O~N5)j88-flSzhAZ+5tiG0VNlp087Jb~V!OFhrSqxZ8Z(vXVdury4a*w)tq8 z*E??2IQF5|mUF*bot$HK9vH{ISl9#m4J{6IJs_}851gvbmafA=P1lrl0w`p~HY~5_K(SsoIsvup&RUPa!eT1Rd*$C((jC(9WPRv0z1% z*b&mYl(y(o5MNSmC5 z&FO=H+$nh}x9?vH0ayGl+SN~fT=7h<4$-|1CVy`J>|+CaZZyCsd?;lpH5}BWmlQQu zq5B}SDvtN6{YSk(2+ANtCcfY)?M(;Z{UHtLtetS1yL7~Iv}uS(^qK+p;F}Mhf=NeU zOzQsHur)^*yENM`Tj8X(P%RdutAzJhFgHTmRNrHR0duXbj7?1oW1$(wYURo%XGl!! ziR>9`uTKFY;H-_=1*G;Y+guAopOE1CNI77ej^+4%jW}x_bI&juaX6OcYOb!GWt1Iv z-%t25r;yDnr&`#AvuJx~YWT({AmN~(*vrw_qS?{irz8)S2gD>skAv>3NB9 zkJ~1V?`y%jS-_D;`r2{j+nok&Fy6Uc8XE?y=TaS+y zDuw){dek;v;2K4W?;YhUf_wT5mLSLmV^`e_Sjg&`s}-bd3Npe8ue7l(s29AxoqakV zB=mp!PyTImZ2xb6_|E};F}v_$O7q#<%SWwm|LhmP+8ayFqu!^d0uSiDt)24LW63(F zhJ#)MX1hpODy34p(#Sc?;9X+rS_4#Xe^jqsGx@?R;zwO*xx*AT-b8@Y;!e4#oul~Yz^4+g|)&M)c_uqfR zasKNO8u`j>g9B?^#Hxd}S z_^Y6A5Hw$a^^@Y_Z2d>yKDn32g3}I)NGuRH$Rr!U6cD7kV@ihxL}_=7%y&n$?mn^D z9icPM_R~9K;7y%*2^gRN4sRMO*=I1&Lw)LKp!70C#Fcr!r8__c7{Mc>L#TDvy|Wgg zU1ZB`b$KyS>Gm+eR2M3*)nf0fT+-DTEMWSmp@i0B=&QpY<5nj>7^#z?dGjl{er*J* z3czTkE*KY$%}x!QTI8|8IsBw|f;aWcIq6y0H7f`D$syF%yNt_ub3}YZSKM-A?@7$R zGKy@%TQlXjA(65Q&^9H~1P*5=V$1D}EP=HN*mxwFri_Du%}>>ttrikIa{%hgEv9Mm z8qS|K9_aOu5LeH;pl1oy`$6j@q&r5qDc>SQrRH1koZso4tXu3|-Ug7xmVsJLB}{tm zz6Qtr9xd96b3Gai^s{J$=xomBCLW!!IorovoZ?&R`4C%db42|T@2MtV;FZtTu)-V- zn7c}z_Ji3WaAtt{R{hybp)W|nJaSbwJ?E?-PDPv<@^Bt7pMH$uQ!dqP9e-m2VLO0oh?w&WxYQZY)T_MB;QW*>)H}BJOpHa5k?lqg8@v1r8r=D57U-4ucNnr zc$_OPUq4IpAXsRCAw=7s9eR3$C=(aKp&@J-*bLR}bdd9>@alZKw@t-wi4v#VGg4|O zglto0Syz*sE3I-)iwHx80cJ4_*ny1`OkP_r4bxxRBI zb$EK4k8Rk&38eMI`>5LoN$00OU)ahI1CrlnE$o=*T>@k7mACn(X<>>!J(cZ?TDN}4 z!_#K4(s0?BjBRnUMb+U`pyg+Rhr^Chn|h2rePj88`n?Y1KW3`eDQi=*srNAS5-@BU zJ0VMOn{Qq=n)+5BT-z)3!sn3cVC3OROppv6FtxDqGqudO6hU&f7C!0ssbS5x7Q`LL zqnG9pHDJUb3UC9L#i8X@qQRQEkzS3om>>@Po22oGfe?T=7EO+8UO$QrYNAa zF_P2pkiA)I%mcs{aLx=I4S*q(06+QjCk}@fLEyA&ca;s^Ii{l5`=dNY zcqX2_e290iQ~qbx{!G{1fXs$$nWd3Qccem==+jd*Y2aU;nT?t&gke-Ez(cw^I}SI| zS6G$6)HMSFgK*Ab!#YhZgM^nU6z;!jC{|SsHIu;H^KCI1AXu+x%=Si#qXG@49br*z z^^GI`5ScIoSpzuN9xiK=SGmC`SzBLpHoZ%F)2MS*U;O zNACis)jBnYiF=TTzUI1NeuEzA*Q4X((&|_^(3I_gPHNS4JvjnfgzO!MYOkJ@QTG?V ze8!sBA0KW0kb6L>9?Ft2j(0r#5aH@OtMvT6)RIQ=FR=Vn}4o%5!myJ1gYR^$} zEIsvtXHAJ3Tm<(RHGW=p7Ny5cf0OU~V?slbrpWWYhRF+;nzx$76JKAo3;u?Up6xK6 zsWXT6U0r18n%_9WKg{e7EtZhz2HPh}WYmxPq|(S|vEi-(aJ(FjY(HSk#JosXU`9Q;6w(A$4pMBjq<(uQgcyglkZyK><8 zTeLQVwkuf?70ry1gN4S4S%Lev-cx8VZGe0g4+;ufRum)_4Tq z*Av0t{muXIr@#Gg-(&vL44)Cjm07aPuZQ91(y#y8=kGEP{^HRGfA_P$D7@SI(TCsZ zjYf=%o<9@$&d$@D6#CPN^}bA5>ZLqyJF(QU!L>B;%!OuYEdxvi))_uig0a&*a+D!ZP*~y}#nj%DR$)=~?Zz#0tfPfL9w(E- zqoK!Yv}Uzo42Hoj$p_n-JajaHEf7oyNXR*US-cfU?u?pCee=QM(4HIb60nmy7g2U= zKwDf1PPLTMs4B$(WzX*MV+UJ+c?BJD!kG$C^E5lw<%Q7*o`t8GP)YZtl~il5vB>wI zfCenyj>#sYG~3pCB4qGd!LqYrCsw3J231F515?Rhi#~MZiSb+j>=xQI6c+sy3D@|t z4%lce9b52cxO(Vh66p<_%XO*jD-bO0btf7n%T)MYA%zpBkZw&^`XSSzZFC}o6ga7m z##Glyt$tK22Az^NmNZf4?M@q(x0vS3clyzb1t2xuPN;mXK&}OZ7cra~XrN@RS`ZGf z+VKx2O7Y%a3%5F$@UZ9###6PAqn$?@XG*A#ZWB9;2FCQtNNo z(Z%Z3$M_S^iaF2{#D-|>VqCz4OLPM*pguU=#4IUjX+_R(C|_xXKuq%)oF;co=hUr~ zH*iz;y;N(#p`!CXw%%+QQNJ|uPTETiyL%?J{csF;+dpr7^Cx0TclPiqLIpu5B&McBygJpLq_&t|MD=bt;57pLGGVZgQS)iFe0c z%b4XAliXahdWu7~H_s5lLIAF3USd-djVCEQDtLD_!A6WYT28742E@H~5>~{PMOv>- z=j=^8X5fb#Lt$+O{?U8XMQ7PiG%FDpK$^|cQq-#r3@24X0InWM6K1d45}0E5C=Pq` z)M?~k#p>R9B=`{7iSu00*-EMxZ>;0JZ&+1{N8rJCK71B@W-ZJJOQ+x+3t*%?QP{ei ztN_lK#WFZjO_z$pC$tqPvkZ^*5kUC6LL_PZ*H z7WC2I%=Fi4(Sz(?|7c^+SO+u&aLIwDOP=Wxm3ZHu<_^xOrlHGf;S0|=8AHuuXGskw5#L|NN`}2Ji`%s#OF4!3hTN{`xNjZrwcSH~kmr5oT)+2etzm_eL?cDMa2=3>T~SE8{EJ#7Uy#BXy+hYB$DQ zNV7FfrN#*ix|Yv4N)^?U>zqt>&0w90^v=Qy5nOtHw9xy~>8Rj`F8Qm=O5b zT({CY!{Em&Kdzt7#)lKRan5aM)v@GKs|O?&3iLycm#ia`gY^%MqO09xFRsS&N6q!Q zbfJzl_I(leZ*Ts&|IxyyK4&<1JXDRKND$6|Z;#!v0A3j#wRble1-wCxU&!lRqvk`; ztuGZHz4Oi7@{P}4{Q6rK>bu{4`_7#glx=^f=)SuAOH04QU*@mwA9e9)uU@M*ALm4Z z-)MthSC@;P;!N^q;hE?d5*7Ozh|MTZ;_yA3 zWtY0Dp4LB>R)cGWt~P(OpBB{t>q+)49}MnMttn-kl0vDeQ&)NkwGT~lu{Y>R!R0>C zSWB)zP-M#E?&_oXX^jsJHk5hKgk~=$0kdcs9X89!Ie&g%$iE6F=RQ?k0 zS6!Uu+#MWGckJz{hJP^WP#xh5EiHy|y*=w*tTpLnj@z`~fW*}c8RT91A-J6s zQ`Di^N~+%}G%w@8MuZ}YGeUQJEllvCIqL)FF8!z|t$FQL1>}vQ=E5Qs-n*h{^GTPo z?rb)QDW6{7tv{9?6Z?An@)v&Pm#+WIfAOFG_OSRygeLSV1-|y>Syz5}@G09{Z`WSF z{L??NufOgHp1X7Jo4Ln-s3Yf*1R*_UH23)L{)=m$x$@=jZ)RTn`@a<9o+%Fq9Elt{ zz7NcO?z40MFqz7p+IIHmQP-V@g5R)vx4b^AzGFLcHv1}taIi6rd@J*NmBxYYzWl~- z{JbeEy?5`=&eN}b>6b30Kiqs?VgG^Y{C_Y0P0K&I{L{(f?>;%Gey4ok_}V}HzZ@~6 zRcG&gYkR{`#quo2NSMQIMBueX`qlIT(#Fj`#dZAZ9OB*_7qepA1KCux=LnlSMm+Nr z={i5Weg0DMG}lw+CrPH1udVrfA6Y|Znyubu0dT==_vFART%^PJrKttNX)8H|8#B<5 z<#A7UFd!=LB-Q9ido02aQSq#JJG^x`o}z~akKD+npFP5*(S4N)uKA?OATezMa)XyH zimX0%649m&zAzoSz!xM?NIW#z^hJGUve> zwwl-?8-=3ouo?S=CV~*r0|hFekeH$gaUE&9OXLQGlQTjhh2^k$(fM(#@9Gnlc~5w# zEYIk>?}LXeRhii%gIK!HMaP2byrdh<{ZS?A8ubsRCHxp46Gs0s#b4_fQsG*i2&p~PsvJ_DOwC{_m$J#4~r|pfx zR0r!yK+k{bi3aocpwUH##&u#fwvC#D7-}@2t@@tDHq(Pf&4L||rmfG{9TyHkM+NOo z^a;O?J?rsPcA2VhZiBz!YjzyHp#Y*n(~&)w^>#a8(}u*ycx1MpsQj#SLg(oD{oYq)&&U?E}6>AXv>P4>|Wrtfk&Jp<#W1 z0a>vZh9E*~7EA8HY{L3#t8SS>0=hdx!AmCwUE1y}*&PMTi@*ruXZ|Jf^2Y|WeiRr5 zE4CAYIKu%$+^Rb>%^o_>yJS#Omd)}DSUXcv!z}qDugTukHCUC*M!%AlU3f|NehqLw z;aLu%eJG~l4GeVXBdngu{N~0s#^%tSx7qKkJZz%Cc_Sr}5@dZsZ~YG+mC!Kn@^c!3f8^GQm{x zBH+s;Q)G)HW)v_s}=;cN&fXQ5XTsUv5@g=VxA%n8zPnF|M(4{-GIK zpdyye<`^8DUz|$!YRF(8Fn0ad&)FpFmS{A#nJFzR&*3U-R5V|=7Yz7_9MPnmMO}{W z^Ulk)jWRyNa6r^dpJKfQ*k0a8)+Tenk>NHwmTVdFj z2C)9ol^y`!z@q#bt{U7+%=yR3-PfkP+iK61=kLE~SS34g+AUKjw;8ys!p@*9zj)<~ zEKh;K*+i} zs?_=W{91*I&|I0k+n1qHhl7e$pKg>oZvxz18m)#S@jR+uhoG&B&x;}J{}cVSlMMIl`OD5mPC4)l)WZ*+eeB#;Ug5w}v1~82ob`ogdQW#f zx1F6#IgI>|dFtlLp~2uPv}?B()xF+jNB!lT5YNVJlcm9F?CNbk^YY8>+A-kuPVAog zQ%l{csgDwwn(M+UbmWF9hxRQ(&3P6e>{%O{7rQy z?q2Gp&HzqdpaYv-HR4^@v*|W=2b=HZ+W-LDOom}0fGRcEG6lYtSC8Sz8b`=yW#kkr zlXYt7g*5A+-Q#XH#-G^O2o-`3x(2T(8T&)E&JbqjI17ZR_RDN9+PI?I(FJa*#-aFf z+R;p=y6>!7-SGf6DJg2GscZ znH-K@!w?*E&;n+8QRpV%2(bu^pOdpL+X2{&_u+00LdycVa7G&3@eTyAALS>vXy2?^ zcB$ho!eoMkRQLSMXR}ZDrXl}9wPf?pl2T^yX%JLqT)T)xs<^8 z&QsJ$xio7j!edj;0z#=+bE$)R9ZRi(Y|li~7yCcaKyfG_P7JQ{sHq-soLAA}zPlW6 zg+Jm0p6(FroPxNRSxN zjk>}u!wIbi@Dm?yPI-B!tPWVTgXtX-9#+%tlV#6^)}Essn1V6J;MLpr&;4od|6u5yeWrI>{JC3edkJJe_Y$RN-TNzhvU$-+Ype8MLA219#Zo zscc~#jByPcP4UALxj($o3`UHP>@{?1+oRXxKs`{H|@!{~?{QVxKr!$u@ryLdcRZjMrD z+>d9Oag8>5#>PWOnds0?H9BNyVil5JqI_yoH_S9ZxV~9)d&!n=t9$b%;{uBq)B}kR z?$7SR9c$4o`zn^CL(oz{opD!!z~vDLz+2`vz?@ZG`#dN6P?uPg0I)bM;lmcO&LEvt z04!REJ%9Lm#eUo5su*;q!zrPdeor3B#^7YBWWNYJA3cbA<4v+C9cW)_7Iq}fI@7`X z$qwO<;e@xNJTTg3x#odMy`63I3rRITVpv+Yx6DsFhBq_g)ZFESx8WRoaw3Ig$PNY? zu92r=oidtVHp?p=T`)WC2R)Rs}X!8M5cS(=H&jQH)$v;9(Do&Yqa5{lDTM&+cA{1 zq>|b#4wNO?Om!a}OW2YSHaMpVrz`-0+voVD7_ZZIoN$>YHa1PW!vB{_0@Vv}Rp|>3l3lyl~d}iRT|YXJW@$)gb=n-+c4d%^2al zUCfL}+J=}4soUn=N%N=6-p5j<(B1tF_Sf4%YU})~pYpC98)BOa-y$V{^Ull>zVn&0 zUr{_S?_kaI6LC4~N#|Qe@mqi~8y=z4Kgbqf=<8AOzE9FDLQ>xqo#rWg6y2=eH!1Q& z+www-Kwf3#bKSa7jryO_M)%uVH4fx|t==>ab0r=d`lrc9Hs zTnB4P`%zT^He5zWAOV&L{{?s8gCFs+SW%MC<(d7-%DjCw-H2((vsL4j;_$?xobCd8 z=%G5cN!>Q5IsGH=z-8<8IzyCQK$uE5#v+}tec20FvBNNHWL{r)Z$7%+*Gsq?cv5ND z%u1)HgDq_gd<+BvD?0|NvjU@b25A1RYPqq?;&?{ z++l1ukD#M68o60MO<|KEK&ge5Wa`?Z(MP!FtqF+i!(-z#A1RdDLubeDnc%T0zQv9G zl?S<@{Sc|yO(v1I8a4GL0FloOae!$6PCw;L!|-ijYRlG`0Nt+QRWj0Tv}|3r-FN#Y zwLzg=yEd@r+-Sj)3cd`1XGeK)ESAmly>51qD$N4klWN8=aNsyvP?t@6h`#*z-1}F@ zpJw6g&=3{(#W)>R=j(Mg#cBu(H6#kNq;cE_Q{@qv&VD>0h+tI1O(#4Gvg{ZO zVB9eTT`|~;hRMTh#avT5vl@3N1W?Uf$qZC^oaRQwOlN)~W;N`zi(Ua)Y*ZTlC^w;N4t{+D+u;VzjR|f?b<{SAiQkLl! zK1xm--L~QG%N;o2bGre%viP`tJepoc_`$d$S+4+iZmJb~nrwE5*&zMXpxQk2}05wuZXSEpEiL1;{*e3GpsHEnLbzCN*#b;qp5zFV%$I;QGvGjA(&db}Ps z+IG)(1d9(b7S|>yn9Vh7lcHbg_B2!cAl6ntnzVh% zFenV+&t#K>ae6AAg!g-JvOm5@KBPJu>?gKU&3p58vD_%IQOjUYx;UU!1>@ubwjpS9 z7Tc66HmDI(Ewl#$Zh(igbCqZN_Ng-y!CY236Vdap1KyX*`uuU$Gr1~QiPDZubCld7 zJye_V$Qc1>WNgN=A^|zzW1y(`*?te`17L6!+DP?7wu5jH*PY>Uhax7bb7{i{)Zq-U zQNTen)Zjas`F=}Za`v+(mzHN07@<6J7ajUhcpr6jpA0^;VlqnbAT|JN{m>N{X+_4U zpEO`Py0;0S?xPSj^QV<32`V04$Lw)YvCH*x!ey9rA7&Z9jtx9v>Bvwc$K=h?RZ<@u z|ES+?ezWYG>67CdGLuTU276HA?$KqTQ7gAUGS_`w=Xfg^@tTq*%C+rocCDMq+EUhm zdrZf7wxBmD+tauhtlj79G!ZZg?NI0QS*2t?R$jvSfJPqd587zo-!Xlg_u~!d4R@AW zk?EtZ(~7ojOXQJ}8PGr}wMjc7f_^Y{!XPl*?wv3f=qB@lt+b4jjVk?oXR`xmAF$m+ z;8U0X@Dt)KrB^+$nSGZ(-z5#?s(G52wi3WUvaEH0W1zIZtFVPnnD6gMD8<@O;46LC zVr2To=Fq)(eY$}e?ziZhSM;i?(;!Z!j+je}7Xz(*Eq$RmDJ~x}!dI8{PH}dCq?Tb1 zvsermglAd*NjGE;!;)cT-Xtubd-QLvGOx}c)ZhAH%Jr%4ydZtEPA<`0=&PyqiOJZg z^v!|kcKk_2wBx7ajoH+6jlf1Q_KPn32Z?jG=bZB&eku06Ooqft&$l6lWHjxhd*mMS z{EdCoFjYXaR3A934gqzM+UxKUx~v^Fai1MIsiu=iRMyYNA7REvFI`y6I?Yqn)7^?2LYw;)gOM^dn82`lN+lale-yjwW|=$j1^(GyBf^XyIqLe0 z;DVjsdGxAUzP#WG|CVFpGb0s0LKw%5=`|!i7TLxAdKwNmG!{dKf; zc<|TqP``8ENE0>3y8pQJr;>o_58(dKa%e#*p!$=RG?U*C5KomoebjDy60E%+SN32q zCTbBw2g+iin+u3QmzIjq>F1AOnO28N*;*Wm+yUK5y3cvor zz!|yTj-7Uxne&#bX8NnnK;mR!%lni5dCT3uWc_4#_3O7xV(FdVY2N&DtdKq)n|jf6 z%I|&Od%*t3{{A;cPVrghgF>?-jO26V`P;vG-gQ!W+9Pgsm|Cws6`5NSw0L7jo<+HR z)Et1tGdy6?t!}WA>9yJ?mNq7gy#@HrOgGF|QHU-2{ZdpU*oYJ9AsF{!iwRyyhVyMy z)99I%A?YVgF3*{&Nd~P`F5>MYCOxq{dZkV~`AvVT(ba@}+Z#7KKo^??=MX!hneyWE zjh8z}PL^lgG59QDemQ?(1*~bSzx*Ei?sYi+$FY&u!3?5?k4@UIc*l_dQE_6{kSQ(8zeWY^L+q0ruKcOYua21xt=_% zUo>}o1@UxIR>nJyp-+C&;Z+0#J(i%ETs7EsYX_|4KTp8Bm*UE9FNrh@lr}DswuNL0 ztjY=6bzG=P)0N&pbZa4WL&s-(!*&QW5~w6Zi}wUSoo)?XUIUr#bAI#O7FP%+%$boX z9gg~0=pkjR5JL*M6JJvfz&8i8*#vu*;Y(vh(7 zh~=21Hd7X^!Nvz2OL%PGpWj2NnP~(3a$E>h=w+L!K7?EX*nL-hEyA>L(Ep0mBL+B- zJ+_>ki7JC*&MH4{*8E+`PpypAikp%J8Qu1~d{Ax6F&7xMCWufu54Kp>g~`1gPnnd3 zCOV?JB9qv)G?$Q()TeFOX&1m|7@%w&*eDgCn-$XLIW5#gVA>5DXx}cm zY;-SyD@rhUm^z%FjO045u9{C8b8cTe{4LURPkPiHoD5sF9b@u4jEeS;6oXoDS)-*+FebFfD2eJ zcVcO^drRO)cJmAk=l-nM`aH7oQbY?41>#uh6Z?XlhvAh8Ij!lZPr>q(>eR{4}gJg>#o*Oee+B))io~2uUWAR&9{A zTIqR${RV0id+Qw6bj{+W`{uRTKgK;j`IEcyWbVtKnf;@G`TTk3pU1avjq*LFdom6r zoskOcIRC{vdq0>V*B!~%7S=agTMynZuCMuzEQ2}F&@PdypZ!K%douo3p^6*kP=@q{ z84I&87do=P7I4Tf4i4*)YBD9wxIE3h068Js)19EA?ipqUxiezT&yi zJ-1bOCrZ<=oaD;E;jSME(6ge3$z;bJyYT-f2tS})OAN!n^5ukwhliXqP^cV!{rWj} zWRuNIkyMs!jWiy6JUbqrJr28GduO~m$DSD<2gz&8k|mlVC1$e8PQSzNpF`yg6bdzblVWtR{a3Dri*wAB`vv130kc&XKGk<5P`7~R& zVRKGwi4DvyEq0ZeBynk9A!Bx>pVV2V$w@gPjcGp;*y#^p$|FLen zvQIo&ZMm9o$2ZnG7?~c*XIJr1i8OC41iI_}-Dg$)6vu(CsEj*L-7?+>oOp9fSiP*O zPA%WO5amw}PDX2&`*7AWTD}6`KQZn!`vW^;uPWm5c;Z2^#xe<%R50a~8d3~*q09Tz z53bM=EgHOc+2u>VJ97n4xS1VxT~zrXq@NED>e7u)Sd@|jVB;@H?OoT zvFl&$GUPS&%w8<-@0r~t0Pj&aXpytEC6;49}#C4rg?Q&SCYd zC1wbai z+VRUnqs`&-uEB1}KGJOQdB}5QGI0{nRu%r|_9(yvUdJ-Qiai;cSS?9ltW1Olduf08 ze)0c!{@uupH-6)duf6=Y|MfocfimznQ{_k)6(KkN#_y_;~097TzawFus?7x$|a~C7l?D46wQ%!T zE;*huC*=pmicU>!t+ES0-)Su*R+Q>fwa_ngF=ZwiuR##g+-ut)3j{jPucy5A>19r@ zJI3HyyTnBrbSd`6G+Dm>+AE=K4Y7nrlgRAlm&H%bp3s}Z!PyZIPz%TK`g&-0cbWNoXN?|Ym2Pf z9?p8eho>jgnrq;%O=#%wjMJw+3n9+P=>z2WRqOSqkt0ea%q~i`grdcT2e$7+ADY{Yl1 z5@~UUQGXzK2tKk00~;@bfB89n`8>O?MAi;VA>`6Cpw3={e6l*z=ReTUCE4s6U`G=X z2MEbAo8|-q=BU*s5U?lr9b&SBpUYW19X&nntxI4h-fU0x8#Z7(zB~v#XPW3qM@^-d zk3oBU$od!M&O}984{u`Vsqc-Vn5Uqa)jAnXd5WJj1&;7oHHwH1-RQL3h=%nF4U$C* z5oY|9=Pb94>dAof3E#f7k3G3crc|O`aAVFo(WtTb;JoA{ZPmV$u@WS4-gX9%zBp_8 z4n5})zyyb$hR?X&CR(XtO^5&%3i|%2S4&LIjKU$y`iWyYiMz!d1 z-Rhdt+?dh<`BB-a_>Dn&nm%=F_)90}J5JV(=Zq`tRsfJ|u(sefWgwGX%v=k?J9Tui zCJLIDSkE^A#pCyxpRj`^Tsg~8&Q-YkfD`)~I-f+$f^x1C(U5#r7+#{6iqwHTOowB# z1Uob>6*o5FCdh}xKEe`k!{dxqw)sXV7@AgnQwB=ZIt_Qo-aDbw!0FT3;7pGn3KfuA z^LuvyG|-Sxa|F)VF>or3+la>@PA7pX+tnRz>oH6ubld)MZ$hH3ky=afpU0qg4fU-q zpYLZ-XYbg8&2&gQR?`){?^wDfA^W48>?Axe-(;ysD_n@`-ZPsWJ(smv$yG`YL6WiQ zIkk=Hy?Kj!+>`+~Q zO%Y>PvIN7r_mQg|IMr!_9s+%Tcce$`-Li=dc)mF)DXD1d*zhWV+ z*QfUOM4)NqY5De;qGgJnbNGhN3H5$N8E1(g(iBdrijPZ!8Ys z`__hJy3NimI^xS_rwaDjI}`6-m(11hMMEomCkqaE(0jIRC{F6JiTZ5T3i z+4A$=*){iCjLkI$_OtisM^QBe90&)!lbdmOT{gkMU~v-rV5Y|(Ob@05_Z_A886TOf zDzO7@nVI`!qC~ATl+zKVwF-4T4cl9<0B>SIS>#?0`gESOINV|j5@Z8x-12hjX#Vu* z7|6NBBx=tyC-@TJTs&V)wPCb$;768e5N-oo__ZFr~D<92lj7Dz3Rl|rD`Lp+hORw&q#TiGpr(@n# zk{aD^pat!$h)k)yJD;r_CLl|N-qW;8zEIlMs9T!fWarOshxkETtUXO@MQ1v-6DUoK z37@&z>y*6L5KG3&832QX!5K$I=wC-y5A&^Bv8pToZ3H-<=`yYb&}S#5Nh}6DKKZ`S z9%y)z1%tOiG?agY&qu8L?V)vZd;f56hLKRHFlj;s5P*NBotL%j!#H7QeFY z7>-ucCoua;*IE&e584ms+tsU(vk*@XqPuRvwr&l+`MTA3^6b8HYUI2Zzkp4L>yAjL z`m4)!+I&s2oB{~2{Ef+HXL-l7cIZ8&(ab;bp8S@)_UU6DGTI8q zq8CfGLy~bx$S1``HZk_T=za>YyzH{VW4=oyjh@NGG{!DHecoIYJ=it+Ct2>IDHbO2 zZnE;W4wtG|m#$svS@e`<^zG}8V*P!8UvK$5TJ$*}e#tXBhaH`(w8E0x6Mz=RJJ#(BlPf#IJuV0Myx+Fd&!07G0>k(3g&qeIy@Px{|nB8@Yahy`1NaqNGeB9q?>Qy8-6;jFBq2;%NYf zOaTd6{jqbil0rA0iX+gv2!#eMTX_+_LZfQm80)qNruoe=b`Q`%XWM8IHe()YotkXR z@=FMC0^q#l<&sVJ=`y45JTF%Km*`keI-+}CkH0g_!;Qf6#F3}rvM?9oeN9>rdq?4F zUj`idl_`GxffBo5`QJn zEc=h#I3dHLq$V;3_d5HF@8x z!GYH~s_Lta2?SEJB~IKL&^Y$wHR;Yvf~We~e0gFWy)emeJQ;Wks@@+4GplFOWN>sz z4Ys{DTF&0O@zq}_xbssga5Gdsin~7FPsnRQvTlMbG^Z?ffD$_w!*)f$%10|YM~w%{ zj0b1B2Kw#Eg*I3O!_cVQmgg3Ziv?vS96RYZj565xl+=8Iu(dgjM0Xwv2wAgTi^2PK zZ&vv5O~EM#ntuKnsRVuZ?O|fLH3c@purNLp@Kq05J1DNM1BJqL(J9vRDgcZ(4HLzO zs07dsRXc#g1f*3ub1G1dPDdu-;9cG+0si~Xeh%`Q;&aH&y1=OC6;v}F_uu`GHs)#WCo;XvPwQcT>ZT`FIwI{fZw1Y2 z(ozIT?6u4{FD``2C1@w-xH8*Rzirb#@O$jm{MVfB&2b5O>>c_os$3&F$L@;#<%Iy< zCyAPoUu?E>rX=#1!VT#(LzO9~=IMfRcdH0mtW^)07ottm z?2OXIFoni6`Ba*CK$jG&TCAurHJn07>9L}{y@q(z*6~ru>lleyPG7WGpj)UmR;(4Y zXKDyytJDaId9D{9jPf>RH5lPE%|D_mLW?m59X~c>T$Qz{jc66D9mhGLv!^Vu zX4)2K!ue_AlJ`pet+m?&aOmhJ0hk63oB3c-ox;<4UTyrO*G12WiJh`uJ>%ORfk9$uB5YXkA!BpHph?lSW*EuSo zDqAYwt=HO4HsZ9VUZTPk&hIKuWy|g`0`X^;MEt%S{aU~;lP$n40;idQy|WBdo=DW_ zfiv^Pp-D`82UeQi?uBMS^MjbUbDI$FluS1E7pYkM&p1v0uL33bvAJIfCA0j2-vdyf~vS z7O|&@6VX4DfpCZ+<3 zmFi02PT&R^(^;Hqy-*~#i+wycxx@bk54<^5U2xg!LVk8APFC(Nf9dL*Ip?o_{Kxlw z$g9f%vMI*;@juzm*`7Vx{b$Sn`JeuqUoWQHjOiV!N|+lCnBJ$~{dC|5j^By@ZmQ84 z9sClE-<~}=6=o(o3txWifB(s!&}Wu^{Kx<9KYZ;E1Ivq!e{u13CN<-X{OkYxM1KD1 zFFtus?ZNQdX7g}4L7KX{=^m}_hhEqu*JNHdKg=0#1}sIJqWV0eJ&{_pX~VkBhGc)6WNw)5pGJzaGJJ1tUagdgR8p`@)kI zZ?aBkE>#TzI4BaOS$VWIonz+B@=Mtxt!nz={O!x##=el68Js`wcDe_jeJ~wS3$s!@ zIah*~zvvBW+~7ltd|3*US3uz9AVs>`z*co{^k!NV74ZNI2M+6hV)jG`^S~3iJW*8& zAmpM5Uv$_S@DxK=N~NQj8D)MF|5(ROxQFos!VF<5EM@nM8+Vrn+pluAmy&Lv#&b<0 zJ(5jO?BiLFCkK%8`6kgqTxhmAa_(FCh%Uvvz=!Uj7oQIaJ;L-#TBGY^@sHRxbDjZL&p`(Bj8iH$D23HzZd+iN>fMndYb+@iG^c$$wuTtB z2Z+-nwlm?@UE&rSNL4y{3;{^hCyawMRSyf4lmb?5_9nX3%BRy#$34F}w+m8kYM36I z71c#gocrPf1#~Z7h-w2+fTy&lZEU+oS7*%+egX!x8RZKH`p1lF)fOXCu6bo`jP>Hr zg=j>7kKg+B$hYs+ewM1R@9UIpppF?EGc90j&?bl}NiW(VFsf9J#Dwna`W+UOBOQ#z zD8JO&!INNp#glGW&2SP%J`RneU5%Scp(u5FE}Jtp#zIi1$5AQ)0{V%YxZY|19Pp-n z>|?!84SLH27<3zB1Q^xLI^=@dx|;^2OIh2G!NaJ}$x)$Jd*rrgsA{647>td)bJFJd z6tpy)16E`!oi3mN0iSAyP-feaHSFl}A;1Ib1V}i5g2i!FB?ylXdk(fQ&`_^$C{1TN zptsN*U75>&wV1ATwfEKo-u(jWn@|F!B5+V`AFp_wh3SwDm}N1c3S$~d_4~YSu5n8Z zJnOZLc-5&p^lYe3wD5M%HBrl6X3CYci9Kxxw5~9|p5z}NN1ZE$fj;LAoy~$_q%M?f zd=!`)HRM^nFm{mBrlV=G!$6r}X%{y_JmI7Iblt=)r1hnzHA}d_hV;M&Elvr=JAn?* zHyL&&nFjfSZu8fkJPKp4dN16FcgZ=!tVIU-F%`d^V4nY_H~iK}4zjDC7{4?hq|=cK zx=yPQfGVrD$xamSHH^!kX79U4j@{vQFEeDHMiS^jFTQkGhOfKXJYg(%Fvx1fh{5C2 zdu!IKC+Xu0tzfk>3$muoSE_LeJwp1GcbtoN&P!cYVYEi;*>6viKbUo|iR4Qm91RhR z)7?jZ`qu&qY3gO-C(5nr=4A2ANV1uwwo^xe_ z4-6+DfpJi&Gqp(D3;x~j|KnbMH^^nYQ_t8NRC?$R^r*f+8c(P2piWJQC}7<6QXIm~ zzDob@Kl$E62>)OHYD@U#_h1g&fAVt_(CtezG3VoEety=*_Y4}U85ydN$7gRYdgi>B zO%gL*p%FMK_iY(OdP3G-yF|Q9U0nJ>?W5c;J*0OB3tmNr&t-gt?mQ7IuZli3X&-!q zq@tPglss3f6dgXYe{jgpw=KCqZhVT9z98m@p4Nf4Q%G;CE6spDeU$AFtT!ToLv|CU zmT(n#(L0(SVQAnji{;QydRmRJHTqW-{nmPC;CAXCjy-}5Mm&PUGp$dwqtoeKujO1^ z_C;Szn+`)H&s?%&Cz!pG4VQX@vo?PnUwA=TNi3^E7;_7xezh!y3Pu|v+$)%^HazLC zT;OZ`&62Sj<#p^_tjtA_#*R~r7<16Y&dlD06U#8_=TFWs5f@&eCY{mM6mnejU;;n7 z5#{#=WO~v6?bXFjw=VfCus0zAp(H>8K&X|=*2b2d`#+B7PaXwf1v5EgUTL0j6^4k9 z{iK*4yNb?q#xsh{hv~))Fh^q5_Fvww!RmX93m4n1qkt40Sh))}a%^{64C+zC{^l2S z`a^LHPE1Q^n^iZE+GqVLBwf5y{?$=@jYO$hc>91`uvv#LFfYh=LuIhIXGFi;Q(y2L z7mTdlJ3B~1mn^AvtE`WJ)8xoj8wRbnLduh(Oe&W_<3*hisPfDLs~j50MCuzZB5*{& zV22&(@=8LioGzz)BoaLK2Xvj2OBJ@e&zbXXmxA_HK}VRl5(*j6&eE1oiYF+y3QJNK zAV$6+Gzd9x1FoK+-+Sk$VB#9Z!xlH%)WvzxKML`-;n3nX1|Bz9-U_p?r%XTLB~6OPgPK|2ueHq&BFOLEx_TRAO3hy9{-6Fh ze}Pdx?=nB`Pi4PJ6_QCy>Ip6#*L&$1%lHwh2cFiOl%h6YGiL2~{=rxN-h;jOg}tM9 zxBI@gY=3m|E8$<&{_O{wO>nDbwf*{EearLq&EEqq`heu)2R}aj_*ul~XzU4J``h1s z`f+D3_~dF&_^kfI<^CsD@`DflUqATLZ{1$G@WvCNGK&TFB2Pd17yEZF-`H`(ogaSs zY%Dw5jl`AfSoeGnHL+P)y^pL4VVK)Hytz0$`S5ZEm=w_i9uLKAY~6msYB8%G+=3qf z)zt;-u2ppC3tlB(uVSnrRbYvl3KuVCHtR}ctem>7VPAwft9n^@JS{W~@%qKP-sP~M zMEJpA5wmdOQ8`H91K2f)k`C98g$A<^GA`~?8tQ|% ztl3^$*GD~bJl?J6ElooA5U}Ls>@VF=A~~*={3(B6;4ONJcVm`caD!_-h44G~KS9Nm zbZKso+YN8pd#_rXB(z>Wl5J~Ra&Y=E;!G4`u*sRnb?g#QJSQTgijA?V3#=*l%sSiH zc?pb@lr}~EwgdZ_vIEkzQ|wJJl&sX>_Z2HQf5s_Tg_NGcz&jw_|*BTswqqvb^MdKtP%}8U!+Ce7n z2cV9Jg7XJO(`b$QFyNf$i_?Bcw4;(C6o>X}F&VA3X0UK=gY4BO9A1qzz0IyO9_-Zt zMzWuhtysHRFN@O*infB-kg!ogHbeSKHiuj*xNSGB+LPvF2C;YNd{(K#fkKk#9=iN- z-Axtu;9+3AB%7UuTA#DbkA0o)h)iUQKxR60FWs^05Bhd{iywRJ($ThSEj86byp&&( z>R!QeB0;DIOfIW>`G{-!1-BC!Yu#xs8guF@DGZ0py|t3%uo*Rz6WZz~$1Pp1V%P<5 zd%{K9wh#+sXB_UG`Eu^Sya20_Po8ruN=_g85>?y8mIBiAcJf?ztvdbo%|g=`DHhPi zfYJwg)b*tHOi^?c{9Awft6l?Fr}V0Y1XsT%Zb&Wg(i7oX`iW^Y-r=}ao!yG<^=u

0_chHYKGbWSuE8z(_v;Q$qVDBAHSE^&%3 zH#kxj)l`Z>Mz)w8jlb{=d|Vogq3ZbmdHeSt{lQ;UF%5C14FE}VXJr6Jkc0tnfT_hG zST_BPyBCpG^Uix&&!DBzjoAH>@xsf+!gf525PdrZ%~{@=7z|Gr2{`7BO@!bD zXR!uF;_FlpFeyn_jRlL{n*!=AAfW1Hvz%8(E&_TAhq8cK;r&x~4-uPPhtH$7&yH3> z?g*1zDn^Whb`JsJm+C||Kjw(a>LQyZL8Yre;A(bv*AB|lc_?zA1JWpu)pUfy&$ zHeUa3BGKWOk6mviwr-G zIN*myJ&^m^1?f=}bQf_r>OzBgcNrcx#t&bmLOjTxFSu6CXbrrLfQ#7)6Kr%(QSaI9 zo#bU_Fz2RZ{8Z&bl=`ne{jm0!PY3YOA_!9N5kPpP9V&DEMb8 z7!6eRPIa;acqQsQ2qaY^jGDBqELPwdm*#FxpeV4UravfI9927x7_4u zZpUzDkb^E6M+qcbhby7M1uoPWgoo<(ZttlNV`;P!)-O!M#ss2zN18m|_c*6z(y^48 z3qjRK@7^uiQr+lb6Zo=qVe|&2F=3+TnQA}ncB5htO;3D@=SQ_a*Ri0Mm|HEXAkokj zRSl)xq6vjlr!B+_a<0+G#gK>E9`Xk5?7?EVEV!UT33hqW1Z6m6_W+-lyftcwXO!;l zIufseph(m?ye&0Xt>V^8fkde@K>Xu#{OSdF_)3I1^MIimV<>=YO@1mdOBwm`aI&Ep za+M_g>cAS5MoKh}%9@XYB}weo$1V_7S0}@mXHH8tK6#p)i90pmDc1^1ne%u>H8H1T zxYXX_*#8&Sz*kl#z*u>~huy9kHtCxYQ7}$tmh8bOXr2sq;?jbKF2l1&tYFJh0M}#Z zdSNB^=u!3RMcsw?0HUkD88E!;cEXeXGa;W0&g0U~1Bc=&CQPs} zalt{SZBCMP9Gr8A9(7PRt+Ae43y9}PD&%}9fwS^->bIw2V>rGZ(W}U_$L#8a0mj5|!jBnR~qmh@xKO{0d;;D7Uq|EEtMN}V_pnu?@$SsU9;qr4XD)w#v7 z0w=Vwc%>ODQi$Xz!>W(%k6bh0k?eGF%~yYICVK?_zKlqdQ+Z$p85kQWWc*5Fc!(fU z8xBXyChWHceJ?d915{nr0HPivn8vui=nEfrT45{QR^&_1IJZs%44F48i1u>Nh0P(y z4Rn~qoySuKZcl;)4bPKIy?O+bB<%@<4$fo@fJ4qeJ1Uug zYRzJFE@x_o`pDJqazP-;y~JM_F$X=}w(X0}>HDZRs5heQ!Z6T;hP?rdP9iJ7m<6yq zKp{!x?DT*$?RZ@~bufV2!wQl?Ca*p-a0~I`ljBEDiR`O>&1VpGC&7;UW`7!>(jqYs z`_5^&6K*Go6}@y22IBF|x9sbz<9v9}vm1KvyqGb3lGGH5SxK0Nk^YqRfoKzP(nJvB z9)#AZNBd54nsR!$MAOj@fUVBH&--$cct|Kqhr)sGmu6$3>1ihqcv*lLpbAP#dU!SS zD6a>WFND7NcfXhF-rWA`2MEvnr}8JbR3P}xQgsx+bTW7Zh_`^>dG?ogqbsY#^0#XQ z@y6z^<1rF5zC+|#EcDqyX~-AWXuOcm8_T~e4vxm~C{*`$61;Cr3e7OQ$dh)fQy z2NT_x?F^2b9Mp7>Vb+e+SNh~_f97(3uh7m7_1 zD?zA9Pg88*uoA?vL5gwohc#HG+JXK;E_d(tFsA{gUolZI;?%(^D~?fNIfoTXPZRd@ zw0B6J3g!C`e)#Yw|LLFy&@NyS4tll?F9r3SB-#fC_B2IeHQWYn*0op#S+*LTKUF zUIZ(xNF=U~#B74Er|D2@c6rSNnkVsSa99A`IfVmIgI8wyl%{`#q-LNBTvjX55zEh_ z=l8mE*4E7U`LuoFMpy=o2N28vx|$)XRlq|`1Yb&&>9k=>HqWaZ~xojnLM`?{H0iQy^jg{rYASl&qe1EcEHxSY0e?ERwnKpsABoOawwupQcN2o zI;La9fkOt}ar-NY#hQx7BEc?1>LNb-oa8RZbvuW-%bwOoXRbk?QBL~2izbrJc8}a( z9^VNMbXB>$8K0 z9XD@)&ZZmtW-y0Cl&i7de0cClxF3Xp$zQc6tFA7(02lvR7m9KXq2-P1`fPAq0A0{T zmGR@qB>QC|;cD-DM>-8hYKk6PSfenW%d{NrMFANl=|w_5ZgtvD- zJ5vfKC2{`|uFp-D3bnEt>Ll^pr1q%06qtQr=@&Yay>-)dWO)KxU~{#O8WW$ZQ4&$o zSQgfu8|qLmusvNy@TX8bt4_plSY`CRAmC!nic95|>s)!J$j9=iqJ8nyv% zVib;;ID?Nz2`AFQ+HL^vB2EhOaVX~m9yvQnV`0JU6iqaMAMYk+ukM`zky|oXC-`cG zv_WVdbKR#Ot+c}m?FU_UcuOoS(d#O=&6IsDG3&ba#3hDW4$DuU_WJP<5dcd-w7)~D z%#_gqdx%Mu_9ozLk4?eHB-G*58CDQbCORWYr&%-zFlQErd7+t6*ynBhFh6`A)*pG7 zzZc%jB}zXTx8Spr;kkd}N-sseJSBX?l4zSDO{>@uoHR_S9)&0hS1K+OZ+W4(F_~Zr z*Ii&}CmYxnC6>0>ajL5pO&9F~HOeXb-NVvOdVYDN*Nig(ot;l_i$}PXK_|z&|H}2m zsDCKeeM84sTa7q(#e_W>1cnfp92EV`>;qMx^kio0F@eq$G8k)kwlKNmoaCpot(x!k z7n4!+oevMQf?Ug0pow4N0b0kQSl!vl)rgVbi2^c^?z#DtS_=4PPmg2`r!Ab%0Fzp< zj~&Ehe=ZcM>B^{F3}c#o6OgvTUOX(L<|J5i@nZ({y0PT3!?1_4jrd)Kk+pC}yB4r& z`-q{|ntF*$Kxzkac^9?fq1oIRd6zv=~K3&70r;TI~P)?(mU*Q4G30$!h)Z)c6b6+eThR ztI9_T#%edN{)46YLYMtA0RA=SjSH(Qe^EG}cLDxPY*=kh|L~rAarrb}m~%Q~Lr=Bn z^DQ@yY;ysH4y+dz!Y4~zt8+#i2A-lL2RpNx?(Y6qBK0>RYKZp`32M3sqe3K^&Orq# z%?Wcx{~2>*khc`C54;H-5BF!0iz-`1WGsp2x5uwVTn%Yj@loDxSp{j_z?eAL?kdp$ zQBxqSk9c2wV4gm+*gxN!Sw`}wQ)}e8umqoAeU2aXZvXTL|K{^TW&o9F3m{f9y#o;i z24v)lDIcg)+g79ufOq0pye^1zylG66xG@kRT8C^e=Vi=}V)lX=V2@9N=e#|`06>!y zqWTU1ZUFG@iTnJ^$HRZ|PkHj@g{y)2ug~NzKmWaN{l@intVTCjo4ByCFc8xV3o%V; zO)YaabzK&F8Wwb2^eib|uIFz)D=jyr7aT&?ebRUcL~7(&NQ%v@O@&Us z5r4z9-)D({C0d33J&sZ0}QYCBUjex#fAE=_sjF*wV~#TTFKKVgN<)e zEPk0}Hub;Vt?qs1eOkFwnti4expkGSd+kd8(b$bFXxSrMN|WXodh7eAhr*w{eBX{*75B}|sEWB`SGE2o#s&O4&Ck+_Or0lzb`Hj@^7Ku`!U zOFkc;u+XvXAtBN5I%#0w>#>4gv^B>=2j&^b2FaBUwzMsP^oA9`#k2N?hju>z>rp2a zg2h3_Cp*W1qU1o47SUcB-zuy)VM!pX{Sq^b(b3*2Z1?cBpl)>Ff!{`V+O;$>gKOhn zbk5@~z}gI|F+{IV*`p`%JOAT<{$a-mE=E~0=Gll|bNg0bTyK3y)9zDKp+;IJeCZUG zT?UbvhT+U(b=0q&!p&~Ji=7|c-OpFjd({2@;KI*%FM^9R-}5v9_w4M|lp{T`y3cu# zPlZE3nj>mMGc-4-JPy*6a$+XZZOM7vh-JfYDaFK2&-M28D*l8&L#_6c`(B<3C(_Dw zmTH@RXVUE`J$gvB#KLf^Sxh1Gj=p|wjI8h+BRXcFz!={t?bOND_3npz^1^dd=*Xul zFkpBrnqYmtV=j+)0xYLHk|C(rAgUlXCz$nuu_Zqt0znt-J+tbs`G-#?Z@NN{;yjaH z`P%)vA9DH-w^G+;q{Fj`;@(OPpyP^P4Pj<|l%jK!SD4IDwScSFXZjdtaURSY%AWVM zTA>6k1f7L@q;Qd}4o(((ah4D*vOJb9vdLoi;? z-5!NawAAbBSG%$7tY5laoAw^t{#me{uLf*TR7pL#XjE?lj*Rw-<)YC+p%_-LJDzz; ze&%*~{ZPkHoiZ53hvNpJ`8Yt-6TyombpjDnq}vZMFkq@WC3~mzM*-=IjukZe1{~h& zi%Dcq8K8zsfoCU<&)j2IN+YJY2oj-+*b2r(u9N6lC7PLSqy6ACi&#A`(HlVoWF@hD z1xrgD7qNyx*L+hg1;%8Hl>9Rcs}MyKb9M6DE6Syh2yuo!?MknYvYA`Yd6xRsQ|$+> zPgSlL1OiSRE%%zuZgHb$Ifo=Z3jbvk1scBW)%w|4HWO_*EA9bfD})1- zT>$fWP6{3wv^4Ffz1f-;a@dtfTh`V`rj>4iCstY07!<6qCZJfUfH%O#^1`u?eHvPMDlkS$GV#pe6)HyeukJz>Zdl1ITN?arKKj(Ksnw>d__lSa8*`hKKSw zI+_E+#4!M+_>|LpqDA}?C1F*WGVQ4zmsUB^a{tQZU+ow|(WmnRy5w374tvv>i!aOUi@f`3qpznW4{bGuu>?ls`BEUI`i^p< zPKe(9tj`TUK&zB0Onjy`0$MBic zx;kkM*!1-liQZv@N0HTz{iK2jE4HFHh|;f;9jq}R53vptq1KCOy31EccZ ziaASz!+W}fjfOkzEs8#FeZCgGc0`mWEdy8V!&B0O@pglazS2Lg7jugjQa)nPxIH|?`pk2B(t+@$?z^S(@=rBrz@CTk=mm{K z{ixD#?Gw#}dNU9JPs-g@f3e((hP)IL*xi?8{+MY2ZVaYvFGrH_oML{jI0_7f`#~84Qz&iJ)sl0Jhs+o*q@&gwF{PF&+ut#FEGeL-YWz+4+WF~z~>j~+d*}?uYb{%uwgMi zbPa&^M)FjBS(gvEi5|beG|qJ zW0wM0Egf|nR@lIW8Lj)7n_m?AhXRKA4W>IhCP)&#iErO!Z4dB4zD+kI6H4 zE4&1>CO8Z^h9cl{I3-&|L#PU5=sT74Mde;To?db@2h*HhOp-xL`FRI--UBBB zh_88D)%qnU`^69jBqj2U(35O8g>q-mRWCAV;=?2u0;}X?FlOUp9-@I@Vy;_x%(fgM zSBvu|ARQk{2YoV;fzr#XK_*KUzAub6-s=A(S&mwe4@@2oEK|`}mmKXWZRRJVnC|sD zQ>b!&TBqqW8;Xn?_q_de5i05I+_kyNj~_Z$LQ|^G5k_?WVnbNYlzhm_^)NRvu1*=43d9_dx81l=ZqDBV%rZW4&CeKo>0IczhYoK-f#p?mP$i-u zr$=4E9s9MH*Ihwn$BF!8d<6eMmc=5Jpo@q8cu6o5qT<^H6MifBy zy|}a2!mXf;r;@dM;B10y2zw9|^{<tC z$&PZ~v7bDTTyMx;Rw>M&!%y3R=2j`smF1Id;eqz0`vAD)!qSmB;%oo$vN=e6=Uawp z#id8Cx%%hzo^fAGGydWjjwI!$s+0Dr(PIH&FMQe^zC;^`RnER_#UX4`pA{@?irsdb zv#Qpzeb3oJWDMZ>v`;MS0BZoS3ox3f0O+v=Na}c#6tG)&EMSeAZ{u0>yn+MA6V$2uFlT}I zCGc$e&{8~ZHVzMTi4sLT3<3JP{GRXQ4_9JyUlFa<=^Lf_^3xCcmt&Bfc+Opa!iQ+I zBcMlS4Q=sf+$-_Kp9sQ&DsDxrc0Pa1)R(pk-mKsA=qlpNU&*+nLDWKq4Ir50QuVpE zpVKCSgWSxv#R)6ni&$}zxYgS0{dE5w<8f(AJzYpbGgb4M=Mo|DCEF{%6D_4iUvG!{ z#K~y1pgRk-h6%0b$-N~^7K&mQEko$suo-vwT(8}7Jq6HspeGfJwIaLB^gil^v0-xp zX)~?M!>@n$cYk5^%?q_)hz;5i0J6Jngba5nl`7*n1vvc$4#@EUfK7qVpWgkqFTFmX z65sqUzw|1>ZUI_*tJ08EOYYTsmD9ZV=2j@~3cmi&uDE?aTY>K%Bd6dOp?k_7whyeq ziH%N=Mu@`{C^I;mS6-~3t+*ow1f8BrVXS!L zb?DJU2k3z#X@}QpBco>lyaPGoj4L=BiG}+{HJQEPaZtvl_G+5r6+hDQ zLc1d1l7nM-h%D*Bw|T|H%LBHDS~ulX}Gn zPrB_re4C%oEN|O>g#WGI$iylxmo-|ZmGVHdOQbT(5>_XQc{|&hmRiX zE5fC-qxTLGxJZ;>^`)TUWWMQs378M8C%Wo1V;M`Suw8Posp5eX9JHnkWlM6$WqLZ2 z2D<{2gGtOQP__?74=Z-2iGtHgkfz_eQhlbr)Nvq*o$Pqw$D;4$X}?HTwU5Vtw4uE6 znbN3QJ#};}9UI~yapCezh@6&6qeP&ATR0~&3ttNn{^&d~?+DK}D@UM;dhN8TYzx4G zJiP0IMlH;4`|8a5sqRZ|NRfBF`;8BXDtDJR%TyWVi^9zOC$A@Ed(>@+#| zg%-HE#GV|UP^}MlhtxDR#g1Nf3qKJMAjYIYyu1;a=_xac1;I?4IdLi3-5rPj+7fio z2+Tf#rR-_k#nd`D)f~?+%-^xc69~r#H5BpFj$YWlSVo@8b!}zda|ljI-Zwq+5{|8a ze@C*cf?Vy5}kqI5kA$2CK z@b0AjSncC#y3PtYfe0gtz7YY-x2IHT>yk9sP$&+w9^?0q~$wU0uo*n>0DQ)9HrO zj$tNp|GG=b2_YvrVJtjl^r-AuhKVD-dW>kXC|ZVssUA zPuH2p`+i`&6<425UfYf&r(nFjZW?{_Q0Kcbz}l zswyL;w*pA*VFxOZ0sm-1b7}HQv;nv+i{9q6psSJR!odhz(nqcChQ?Q)trN&<4Re zVB90+b8kQD<5H9rz{Mk?Zsmz5X9h zM%JDvfLNR`hE38<6rL`kzFh_l$IVebYc44G5qnJ_szF{{OJr?%u{1Pq0*V$Ggth-{f{@VBz^~|jd*`T>Hu{6oW2H>!R#Op zJ3NIh2l)WUMytz*2^RJPVGpnk0e}evCprT5n=B#kqf0U64&o0}V$P_w+k^xGi2UTE zr-5(B`LQ(n3CFFzq{@OzKKTSqGl?@FFE{dJ(&y@6%~p>pXU38rMv>JSU zZB*%6$`GVH$Q69@BqRzhfAnbB6ZWk!l>f;e{tpg5W;DQHJaTlj3L^bh*y1>bcJh;M zKb`ggeW)Dk0_VVN7&~uTK_)O2VEDS|KLy1xp(0JE8z1+;e=Pio$+gvZj2ND}K~n8k z>2v?Eow%0ol?Ou=c+McW6Yo(Tfc_qU{m7%ecn0*cs;Gs%)!;O~e|}nX`@>{i7)M?- z56cgJ3EltyHJt~)zm6NJOMdUtvl|z#^(%n;GB$f`oM<3%arrfTls6*x)8w0F4y1z} za|E!?12ggCyTAXv;)U-Y{I7?HgP3pbXI`HR`CstG%7Vng;PChaB398vqsW?}L22r+ z$CE$|Kto4r?ZMNB8>ijTxBS1=zP$cBzq3O=f_0?Ot+nreblR4q75?1vrl^!B#c(1Pj3+o8N=8>Jo4T0Du87q-xe@({SgMZ(T0Nz`dms&K_ z?L0+CL~D++zu0G&j80}D@%i>I|LkSwU%&K?ThIU1F)#f0+b8cmroix4Thh!fB@f2n zqy;H-w`#;DM?4YbxdnAGlp9q8_kaUwL}}uS4MN z`95BWF!o9xZ90fLV9y>x?PUf|-0`W;`Nu<(wjK<)WofCsd2aSov@KDO_WtVdaNjYb z58hi}x`=p5Am+Z1dm$L`hOJps>n%8HUC!O@lt=vZ;Lf8`_p2f)-#?QwgL3}6ulQ8; zRcY3bJ=^?+hqo#hObS@qNTPKLrKa|@XEgb7mZLhCZ;LkWD}+CI z_(KAyL>!61MGyLb(iV$Ae>UJ(7L&b&4#pS!~7?j;TYNy*M>=PpqO;XGZv49RHpUmlPGtSQ-a7)oIT;R*&bvm&ZnL zWMIApxp6LB2zv5-!%Lz3ACI0c0UO9tB|-V>Rw~)*#$Dd~a@+O5x3Dsd2=eZ;+R9=7 z!$L=@xs?bci_>~ALD%FAZOa3Xf62fD(LmMHMgFSp++5(HK^sE}9moc1iIYpa=APf! z2*!^%9cgt;$ICG%)(bo~IcRU6|HcyW+3|RG!Bk9NyNYI*9+$8~j<~wlau5pEnVp7e zZq$G1>bmQ>Qfm-foC+4}07sB4wHyIU++A{KoBA_^`wm(+Ndl8VL(kyAV`|x7E|C;G z09PHJ!#tqo>Z*ptQ<}FEEIU0C*G?c>u@FKRT3xO@W1%H$lky&urL@i5yt>g`vc_jd z>AeX!Rm5P?9C;BPieO#So*RrL!0m}>y&>C}N7#>`EW$j^XmdUXXxpdta@rdR!0{)O zepWgJEC8Jbj`7fv-1f+JTsQ4?U)AhiJJ{ssw;^5n_P2lar^iplM%^p3d*_{C zG&~n>_v+f2-?agU)IziCxHcIT{8v4#Vh!&*B6F-v8zSe0<=ZkGnhp3{vCAgsIF&8*Sf^X5!@@QgFn#MQzy;VwQQ{2=54%!ylI5>oYEG-oSqIu!Su zO4FIH4j`IjnIi7@bO#=0vPEP9ZWB4<-LxFL8cq2xeEE}yDrlfkqa%~B#o$4bGlMEH zMwZ5bUE0bCktcrcR&R7XObim|z>v;H$mvj)d%YB$<|b4i`n~J7gcAMsumz7xUH*7b z_oY#C>VsQe44Ck&0l39>1tmx;98r3C%O3$W4?fkY!PMtc>U7s&mIRL#dN_qpb_}T@ zzlxXX)xf|0pZ>r(hild=y}ioLNU1lsL%R3ELVrm3$aYo_LaP+qGQM2E*T33-uTWUn zXL_sJ!>_(P$E#lQ?^oaa_CNlW)GxFEfCB1Xz>k`Mc7$uoIt86}fcX%B@&LdBfB*m! z08n`#O8@{0v`YXc1MU?YFhK|g3_yYbwIzC^lmtvjfG{iw89;#Z0C27V+5-SfG$1Pm zNJrLu3@Xt;9|EobrClI1I^Bu7j5C;b7A(*l)NUq1?^K|y-5y)f(%zxp6|XI{G1J?s z6&vXQg|o}D zdM}3tTDm$0R1;kT^eqyYRl$Y7{v8g_a=Ukx+2fx(<~oS`q7|WhSt%B{m>4PiUw3=x zuj0_OcIJY_+@zu)X&iHhv%|AJ4o|P58blM+c=}V>vmw?=$eH@rcm8Gk&wmsSBH`ai zUiAM$^41F?*oYe|XN^1paxS~08Cen&KCCtZE(swZF;N^nMea|Lbz}C_T|fOlPHv$) zhn6Yc2IVEIdMh$wb3yQ|PtVLaisj)dXJqT;YM4T)W)~4gDV^ z3TDf6(sccxff@@gX2@#Bp0}QZk=7{o&OiPTaNd9ByMK2hI}&C>e7WcCI|ovu>Xy8Y z1^p|h9(<%H{C=fLD^RDJN^Tk_a&rS$bu5b(!(?tEoSyW1YlCNYJ$W*?hT@RamE5c2EvRf zWd?&0 zja*q}xaRI;Kx?QMv->P&Kh0s!eWN-3LS6T#D0i$}(sq#p+JkUJD$rL`U`29NPECVMB#8HG;pAK)`w>+3PE^dh6oD+- z?(q&tlgEhEsI&3(adl%bCy*HNl4mFL&okH<;(vLiyv{#kxZ~ zZP`9EFpSpW7Ue~1bwX5dIfJF!z4qnh!B;A^ii4B!Y}4hY?E?f_C%SzjWg2_! zjcjM{WX--@N*33v;kykr;xmF_Y3DdJxAN4?N4r^Qes(SaFQB)7Fu)T>fhT0mKMB*k z1GM&JHs$~YB1MTqOz-ltfkTwMvM@R7voXxKU!Ayo0?|<0-jxJ)769~aCC&hSAvrP} zZby_KPS8-@YPjGSH9y5$NQv$YT_yp=awGd(0<&CmcQ!qDHm;|#fWceJlnN-*b7nCq zqO$LiTXOZ!JUyVJ(+GeL17SKi4cU`!Rfl6f*U1$V_#hs@4sB{8D$HCxy3axxZ&Ros zJR-$?5v?;-@eUMqDtZ4!3OQdmS>D)S4t*a9pDY}24y=O$s0L;*RPzurEI0#0uP zKQ}iy>z=iPtHHk39hajE4&N411Ee4u2EwZA6dydx&t~Rdxc)201(SN~?Jvar!%i2u zl=U4K6~Ake5W>}Tu-4cMCodXer4OQWmcnDKYK|EiNDl$Nr%zlGj0F`VT4xxbQ+*}C zmuf%%4{v^V@osEs?~BhFa?Q=An3J#GZa|yQ6?Dma(y9R8n{x?6C|kba%uV4fkqP!WEwb^Fvqo;ZHmF5l8tge_z}5nwOEm!8Qh@>9@tUFj z{b_7d1T3!!)=dJi?YN^MDYTP-6@YdE;*>`?77qddFaadmgvV8fI|Blu*rldHz&o)4 z8Uz3UFaZDt007Vc00RKP;`s(RLKqP-q5{&4mfb`uwhulLk020ct7>)DDE^gN^cJb_ zzw_^R-WmXa4FCZkY|gixL%`}o`n)Bdf?+0v2*Wx_TP6jXO@%fr*-885N!@jkT0fmT z*NW{Bm#9ZtbPGoM@|LQBAdC2vsbCzy; z3w#9@M&52MKfG{{N!e0+RGxYQ_W-TiWYlf&-J?rf4)8A%{y z0R|i~w)t_ucRu^`u<%F#kluxUViJ>|MPzvw$?*SZ~Wqy>8{~y zR3b{1@EEe5=X9@%_Uq0Wy7i#1g|hF#>(l@}%0YI#7BWxy95;IG?6~+(&VEVxRd-Lo zU6xY0_B&kn4@c>_N8$dctmhwn)U>|!@=NCn6~@d};JM}xGao`to0~iJ#g`2S9}N<3 zD;tOCW_xpqwSA!MhTakSBzw=aJXL+((&x0Sei$+oT6wOGx5Njxxf~gK@ZWtSwB0THux<=brI5e*BwzO(`E=4t?44s`JXJ zz}%GWcU{!t3#sdRa`NE4-9HikWS^0Q!!@1Gx$fvKMhM;MmtVzy!)U)4z`tsGS$8%C z6z3wX#JvPDMepY}H}#p9Bj%XHGjm&NTiTmwQ=ZV5-_P?dbJIr$lH&dp=1Csllr(43 z)#C@#z4(|R&2W_LJ<7KOwfsV)e3Y69Uc&V)L)G-;(&pH9X<$8tMcepCCU#NFb;b=5 zskL7B9F4nALk-yTvPRyXfviSA2P1850zEzP#u>Kp`&ntm)i_As<{H7xWRiFr(?2|J zOu0P><~z$C0JzU?QK?pdeHn~)jXCljW4*AY+0VuVc<9IZlEmJ+6&Pz4*4a&@ILg7_ zb(t5txOvL2SZ{@mX12VN%g*|VnsX&Utrv#O@VMy5sXbkxGjz-hO-gLz=9AN>#yddw zp!kg4!CYZ4pa4k2KiT?V(_s%p%W_jJW+IBv4_M)VCD@KdKGPTtwLHnRM<>&Lqe>Zg z2Xer*C#!{hy3C-Y&hfm8q6<~7WS#IuyL4D;?k$)COYcLvAWW5GTT6bze_N0QGflu3L1-T;O}d{oRF&dx-isLYIL8)Efb2q# zk59u|!?+^a0Me74Xmi^ps9$??@zwVSPw$WSR=U>JQU4r4ZON*|=mDnLCJ#``=o zWT#;z%e#OMJ`Cp4eb5ZN%EQM^eGL`$@WJBX+^x=#f`)dNojT-BENPvcCcGk120Jk| zz+|FGqPDr}a%zZYzLn;U=T3GqA$^VM+d_sLSw3Lz0mo-Iv`tglYSGX>JCv+ID2Lml z!xnFrL8dpKbD$<|x~ktW?$E|bwSjxggEi9bDMM9Ho7kol#}JA}23&KQF!wef$|eu+ zHX&Cr%uo-HL+CN4SW9WU=FH=!z@TJj$&p<$IY(xwJ<`{CAdHtPo_P}w#_FR=C$LUI zfUkrhA@G>fEw<%jR89n6$s{usZ^l17K+R@z(bRCj6=&fsC%@JLtaVC8N zZ)J}JLt&uDm4UI(THd@9NmP2GZN z%pM#?%0fyR_e>6%!3gv5T&t(6LOPeev9{Ur!`;YPe5WGJEGuz>N#^}Sz#cnE)dIM= zk;t~$ko?g`RquXF=dqlMf942k+aLIH>`-s05fSHM1HcHo4IgXf3i_zW(LAcUHSieo z5ySmcM(?9OXVd596(4nk%bn>?TQ$QeZs}J%Fwf_2{Q2XLjZb^)pBXz}Uc2mI%>!O& zrx_{y0_)SCFjvABZW7O%G`Y)CCCBlS7&$loS~0Fg+#x`Y{rJNJdF_Ay-XHyNap~-O zTtB(9^mKd}vMrhL!d4#)S2y%uHpPK2SUrFX7y{Zf4iG&60DyrXBN?El6|Fiy#VY_c z0sx$DtAJ|&00scir*yy=V2-drA7+i5bL8lAz(^0sEP&Ds3;;k$-ydMc1OO;3*Z~4z z1mI?>6kzGnWSC6}+vD(Zf2yW(y4i+SQ|QmbVimf${+Bixs+)zyfejgxYf`TdM3*upEbrt@FV#Y`8ApLa!GYTNeAJfSW^%}+1Y z$BL22fY=y4c-|UNP>Zezhd2LXACThBPY>9K+`W%>2jQlUYkKoz@oZG2~Y zoq91kD3_~)=AoDmQ>sW5#F1kQ^M}8geEZse9G`asK&c7Xjg4un0i6Ba^!DFr1k1n? zEwkEwF8x$KpbCclrl_{AJ4=IfwB@7EzL=f=)t8Ca)|a=S627o7>SImlhu^I(|3>vo zz#NGa`qA@Vd$j*Uk~?6rXD|Mw*I{vn*BqxJvCBQS-SPDT#I3$H!5uw!2}b5b7X9JF zLAi+>+vD82`!GS^c0mVG)`cnLfkN92n9g#ZS_Ot&pksa4mOGIq_;Z2fF{O+z+Pb;w zWoLpNv+UZza2XuD9N=CW){R9+4T+#F85!1n9S_o$o~rUZuIczt3mv;P99_w$EEkq0 zB={NF_m_DCfWT3x>YpBEcmv&;D8+>f6{6!JVU7=T%SDzPlh{t=ZXth^Z+ZmJOY~b` z{uSMauJCDJW9;!ZeD&Wgz6F-@&&7IuwA^@a?Y8ZMp*v-WO~?N2e?1HKs_=BoZc7*( z>2ub>5AS`q_`7(gy;D8fV*9~%ACK8if9p3L{m55D-0;glM0tyh`$J{nK>w+ijQ}b4 zOr!Z;$ad6F48Qg{-J|46hb`FFdEKjaS1z$3++&|Xi#-5Yne#7=pS7MK5QSPhU-$Q) zhMbm~uXVDH#)A>je3EOqT}w8oI2K!R^6m!RobCo=pLZgWiB3=S+&~#B+%RshxXHh0 z^6^E5Je<`AL>blaN>3+-?O~ga$3VS`LTC+s%)_T_EVx7VkST9$s5c;68wZvv)4@@c zw|t)}OpLp$!-L6kg;~TXG@4xVTW8Eli8wHn7MxJC_*AGcIe%7qi1j(KLq~m=*w_)< zoQ?>2L1UKv{!)$@|CZ(He$%y|mqRgIy{f5b*69jRFs0#vyXrdu-&#%WQWfFsh1Tsy(YT{1_o`NXaKJm! z{Bs(NsWy}?JqD`|=(4}>R5lUC1RiwFkh21$D9SAG^W0+gm={flhH~6*=nq_VEr&sE zPjFU*IlZQAE~1NFe5x3}ZTt+g^Qj@c+hu1GkGm2a+(KJt{abDeQSP-l+Iu+UEQ-F5 z9!yqY6$_W7O#MjhStqDxHO`qW`p$gjtLL8+@3GHKB3&Hn6^~uq&vXDjS09AD)5%iC ztJ_1t0Eg>v4AIT#57sch2)6lFMgcoib^9J&9p6{RmK-H*$aHoL-cI%L zh;|K}%dvq9XeD(U=&^u-@Orz)NqB^gWfLY_u2w766%BbT3?T%{hzMPK-8S05copT3 zw>lGq8K_C75|Jgke&VG>^PnEgsaChfH>82?IKuhC0A?f)JjlFKEqW&o?jeF3pOpHC zF`qq)>AMANWhBUeI~&M)c2G<3Tf4{ijNW;kti8BJn-bv;QXP2;XugMOGd9su3#?-W ze8yj7YSFU03*-sDvu(SYxOgyB4B4{tQpI|Lm(eV(bGAz-x=}=%idvX~=IOXZYqgbu z!^l`&$pXR2YwJR6cJli9+;D4TPgSVtdAyy^*R7PPD~IseN~#K)AsUe3qGp=}i@rRu zn|GLvsZc+_(im>4pb~vmWoAF76HMGC;#Za@S73qBMi+|7C4E z{@i7(-`1yHw?lnyn``%$p~C(C!2P_~)f3a$`{t&VnsZpn-bon|dP&7skWCTN=jXr~ z3Q%8nC~P=P1F=f(VGo@|FpbymCp^T5Dm?2lAXgEc3JmS#JKCzCqlxS;3}*wbfQfQz;c z*m?~1YLJ3d&-XtsM`~>Mhr^eE=1(1QBNB`}P}aR^bU!;c1r_Dc`Hyd>(>MO609jdh zziKxO{xLI-4b1?5_hd4%ni^vX4GW9oe7M*w>5u{O>TAHKhBKX0v9H8>0*3vH^34v? z;NEq3LjoAa+M*EiqegHB)Yn*#%V zx~to$ZQ6q!U90q`&dD<;M-QyMZ~x#aGS^Mr@8B$*-Z{=>%F`jlzGIf2jUE68kBkx> z8P#${#$cunyc54LH)OMoS|goUh>qL|7WlJ-eBmhXO?*xN>iJ(MES+=4(OefOxtm_- z5X3zWKoYmI*FP;Ap8fgu(cE7h4Ov+pa7@l-+z43$T`*TTME-90Gv zpa!Gtl{W%}m|+JGd-wki;d)M`LACL2`teq!EN;K_ubtN#<2$x1FHeLF?uXCvy-e%N zpb7RZSY8_6->k(ix_<3nImV!syV?cgz2k9Vb+9r!X>g~CALB>RC_WcGJ+pvXAFl6m zdfhohFOE-18xo(F&P2Bi*M9vCV6gUohgIm=%FDlS*0Pql8IZWF$3AbE(pIl7H2=68 zh?*UXC|$>_C#7 zx_jq2g;(GEe(6JZP(_5Vkl+5jZx4C%=FQFbwtw8~rXOa1`0Ec-N(H{MwETbmn+d1) z-~QkxCguiT{Ueio-J3N`KI`9d$7~MG1ps9CiP`~wa!&n6UK6wP0sqv||5E5Ho#6&D z9QHq^85$PU9mb=e?|BAR>G!!t#8oVpMB(~Lvz+N`X_VVpy`L`J*d7`aZnNw?ERv*8y0~Cc>y}Q~I5&>d z9$f6VMPVTzlztqMpfWgqQuAxMr?8i%z>bL4V9jFLcT_w*YJXNgn}S7sKcg@+Q7ijn zBJU=_%~3h<^E06dnHgkEL)Qe~ZHzm9)u22cxTfyTES@(UHC7GUX0~-|=VKNk-6f+Y zP7kFU^Lz~lB33JBSvKw;2qc4PaYj!BR^ks8-Bh>JtG;B6ZxaO@?~@I~iX}tq!qCwe zG*#;NRX%xaqsP06UZE~9;Lc)|vRhT%1L}y$Ij6EWc`sdfW;I=o&sVy&aA#PV2(s%Q z>(Mg-aWp*LH3pmj2X&+mS}1D8LnGUhRFzN6A(9ovfVU*DT2pp|@F+BZIH$EK!Io1)NjeW@}}<{UfzqT<4jf zG;sl*)TwvwV{t~RQGNo2Ohl(^HL}fMAz7-uB%21J!(-r%nq13Xu;z3$ksF!p0D0VT zPjb+}VHi-{9J|!4xB7lb(#y5=IbS#1@B-s<78ha^peQz;m_eDGN?Gv+WI|!y(91m? zTM?@%(9j3dp%9H9_s2{g+>$o4UL?5*8SPbZEyRZfxY?|E0MB47>0my8(74YjBbyY1 za)K>l5!3^QBkRr)2^jmG{g`IrSeSS79isu3`sQaAdsKHoj27D`V9~O(S7pW+z-ZYk zR~-?jF>Qb>86K>}Xd7Cx9$~h+Rrsg_NxJ>L}ow85`>Bumc0mFUMoyn|mLt$`=3mp;4FeKsDu|X1w zw%NX|YtRjcdinBLD72)nemKAg-adZZDfGN;l=x(`d6kQWQE^CN+#|2N@HxM-U-IoG z{IhZZw7^33LuRyANiF9VFK zd9Th|I2@4i2^D~XWd|@#0DhCloCRjXzMqe^vp=vqO_c#c^Q1pwu4aJZ7(n${+|BdV z;DAU$5z)Cx=YcNpq$d%*fdvS4vkHw6kdg;`B0s59i_NBf0%?`icdO4Z@-CmuhoRxZ z?UjABKcc5904f0|KWa4mj-K%Dbtr>zW}yaHF4(<;kXDV&==4XF-`FH036j9J)v^Gcy)xop0aOp59)a)EfjOps4 zI1j^;+nPP5;_)_DQ)rtr;;0L?pvkX#pbR=@$n0?*%=CxM{@$@{n_5Rb;K`U04-|s$*r5g2Pc*Aw?@lT1?7r(M#-FcvU zgE8NJrESK7He`XQsQz0&nV9~{LnmPLF&j{Fhxs15d#73O!dKOS%lD(TgG5a(44AT$ zhX<_2RlrD90Mx~}-R+yYRh3`;@RL2AseDqnmKfi$0vq)=`o4F7I*V#p6-qC-HU1{| z(~nyzgrgy}y}FiC%?kFPV#9BD3=OGJD~j-xA2c|?eyM-l&;E8|g(CC-nUCBwA9hRH zs{!6;eQ+JCRV9N_e%WVpdW`A4zr9_GM_tZs=bc1Gqr~WXoiWGj!`wTUOJ7_kKl0dg zZ41MTm$s1aRH4|WZoB5+f%Gw=#SU8ex^3m2OgOxAodwW&mVirzqy@BhoS)5ly>;483L#sTg+|mlsO@F{eVRa)h$I& zJ#ji_I%I8`Wi~4@`+WIg%kn~NnW91m2X(A6l9DxtJ+=LCGOiS1hT@2x`5=f{N(2h_ zDi`noiRO>a99jqQ+7Lw&osk|j-La%+41sQz&znySJdN}A801O^;5f+~y_E?eau=b% zEW`j0N8N?vf_!@UsOcCtD2=>v9If|o*C?RKCRx8_M9-j7DJnS+CT7|#dHK*koJ}_B zZH?a+bM6A$vc3d78JI7O=QFv$-MT#@GuFYLG=eZ=b1|!mMAxm%xB?QJu@}oj8KZ}X z1FL`52=r;njlyClk9_$TJMaBS&^fIlapn1gb@-L-MXwoip0U_Xb02&puN%u20AxZ^r{;hX7jPo2eW7VlY()bf7pU5> zEu#kit6=Y$sWeHpH*MZA(cG(K0ikDFMqqqlp`(5pY|>lmE7C$83+p|R8(_4N&Vd~7mDJ^@|+Vp&xkw8KwT*LlkTzQfg8Jc2VbLsa$)F%C9 zKQpFxf5tpDp>F|ztt6iKJYj$}#oK%j9Oy=%X5iYI-3|aW!T14~W2j*m$klSjh7JYV zeGQ-`HpmaUf&ejU-vhQP1IPi4?U;o~v*HBE-5^hy0ENS)iU{O1cws`?{4{KUG`&_H zrbZzI-9`=ve{9;Bt^c$m*8yo}OfYR-mM(^r(Lgqj_ZB{oMlmLYpM~s~!MO<=GgPL* z0;QiU`yD=$;R3wT<`b$siX9pEzch@FpgVQe|Jj!&*KYLPP>T+-oNl!&CiE3est{Qs zH+euEj<$WEsI6s`P-9(wG)S7zUJ6e#SzUqdzWB+z;hf-C;ho$7KfpHrue;4a%LoHP zmE=)_W~;v|l2P~2Bl?i5=Fq~Wu&uqwOzi)=pQA0^zkmB}{Pou_gyUPY-29NgS#~hZ zr-%2x)4X}z_tH3;pXZk}OKaok!Naya8ccPJ#P!B4MUVA`~3^h55HU1iGlN02BM4w`jb0-;K?#GS>RwQ-vD;_sD^#Q$ETJA~!_h$|^iGZB48+#)JxO3<3 z%Ug46-1}?(mnY?iZ?=&K|L2461;^mwkK5mU|2_Iutoj_Bv(xX~xG}PvQgZJY#}Py6 z)RU(*xgE%pCo^YsZtN9B1Z+A)N1pnvS-M(!+Mlo#dzpvOzOTPj4pC ztDUWrAG=!rt?=wC;>AsW*TgC&8Yfdp#@=@WmY{t!0CdAb^ea>SLQ}lgyBI9J-=0X9 zdUK@F-*xPFdsh|1N3LnHRchx<6EiWz*gRk`x;=YR$heHHOscLT*kSQw?C|*x1w|zh zp9L(qV<@}Jj`4*cIiXAifwZwUEZbR+L)4W}YN*x%qdabR>jIVH(G2-auCMF`Emw9A zZ_1X}n~dlt^Ne8yQ517ESvLxBmJdKgGD?irEe01h(C;Je<#4B3imlf0%Vt7!fm2R% z&omCyKej%e`bo7KG~_b|FAh}*l_NTagR#Ar$fl?}tEoFPtgh+G3&(CWKT@vDS!SOp zsuM%|>X*|n1L&)^BG`WQ15q?x|m z#Fh^<(qm*oV6#v{zH0LQwfrK)2%U}XycHp5#E07yQa6~@=|8ArR8tX=vcaGMuBb;a_l&D@ODoEVm$UzT%?#?=>0%F;MXfD8{?5;_ z&tEX0EF>H0*+OJ_C;9fETUXlqE3 zCV&q(+%FDlb{MWZvW8CCprv4#tMH!wjAoF>Sq0N=d?bGD4HFi$A0Ma6#sHP$tL_s{ zFd@6e$r+L@29OG>38fKSFP`!&Cmmf2*F@V!mFxqc=!53LK>a8iiFktktkB~b*b8aY_RB@SW-uWV_{>wauR8hpzGvhr z4>{{nuJHJbJbE?)gCGtlN^))p)d(Hk+)qsh+GWh3A4;zBI00Zd<~*7Z5v6BNTg#Z= z5KlInr)Su`T_qHrYv-z_I;H}S08#x>I1j$?15^d)84zXsA z_l80%*!-SBdBdze10AGVvtIxhQ^2Q?vmRkE*=hkyGv+?%d^t-4fj8!tbfxq0*swRn~P8p$dh3v1I%aUcr-J^&N|FGfGh$?-m@j5Fze zEy~B%Tonuju$w;~@H#7FkpUHV8d@iC3#b7S4j2Hi0|cBzp&pN#Z@=ORV4u5a1fLBGwEB;9JSo?%TT({5RiEc~E?L}ssV z9Pn+7{=yD*tK379>+79e^VTIc@sYBj6LswZ?i}^gGz5*tIQHQ)J%H{DqH&zeLAn{< zrY@O`k(4Q?TRC<8H4}F4NH`Wc}~G zaaeO22c)MB)Z!NLFaO88*^iPwb~x1?l3s)d0#A=_U@ZsU*c8pbZt4-Ym=mEptTgW7 zivqMb^BEo8`zAROdbS)={FEZVXV3B;_Jb=U%e`T8^$BmuTA!jNRQ_aSnGI_H;H}9+ zL+{L2{vTiEVD!)J`hz!q;~&5CAa(!WKcN1@+~?l7=yE%XBS3(AGp@g^KY|uN9!TfK zqaP-X&gy5ra&Df%IzHPExc>R;S+i{NqSTG-P35`%52VDuefgIfOS}nveRsD%<{uO? zU`H10B9OQ_H6WAx%Tq^E(euAn_wT$19{l<*|9!KV!F`Usb!YKp$~IP&GWfX7*a)~i z>yTj=tEtah_((CXv@?T{9RSKDYy68g)7ZK82OmR=v2&1gPo>Ucfpt^Cur)k=TIp?< zHPjW9s||gG*bY{{FgnbqP>ahPa7?OJy=zSIupR}yK@Sf1e#97uFaB%y!SzqcC&E$i zSm5Gbn#Q}RYlW&;zzzeiaIL^=)BfKqK^K1u&W?czjUkcXGlWioQvcjIPakfANhA z_j$zdnneu^gS+anyBtWjt{>^=c2!dk#>QU#{$X9=WP0)wby5GK$r-t zxon#RJi*r=0XviXz%EYrjj;WWQ99ew*f_o`yCGY_m!y%U;2=9{Cr+IAsLe4K7NC?q zqB15pM(U{N@827LW5F|{ze1nNm%18sbY$8bp`n27OiO?Fl5h4CDb;0!Brv@kZ1w=N z))_|iLK^B;=*UciDpO$FNCyjpB2p3ju9Y*uNoRt%o0|#CJ^Z+p7Zc7*WemotY>9I~ zHh@R%2vW9D!#bhTTyj*mCVHbHV&vv0f{ituO=~v$EyzZ}3_0?v^;E=}@4zu7bi#>I z{ljN@*IByq%%aD1VJDZy>S0+#t$5XK8X@*U_Fh#tnSj{bsOf|NsJf`4Vuq*!aCNx5 zOf0X>jbn%PiBWN+U@j^ zi8Xf1-n9u!Q?Xnst(zYV`yo5P9Pe*B9BSWj^))Obof#Sbe5At}4)vNOqqtI2csSMQ zKoM1iLcaAry zAfB(kSLiK%?)n8?WMay*;)b6;>LI z8LjF|BjrDj1JDQ5lU?r3D+`9`{wp8-vd%=~!;=y)M?rPDqe?MA?A7W`mmCIs7P6g7 z5|QpY;!LpRQE7OfRY&)c4mhl)&*9ns_nwsmtN?o=8$E+QVhnVK1vcLB^@$KhF!>#) z2SO`GC~S(Bh9b$1fSyJGw1lR;6pA1UJ0jZxzI(K3b^su9?O&d06TgPl{`$uHj>a5# zaZ}n7)+VA6%BU`to1n|?0CYRhk+}iamWato-S7kzcqMJRxugzqYYk zX}V1{s(=5#sEXqV*l5)~?spIG|3Ur1ImrK>p(x(A|6y>AHu(enG3$8aMPt&76M~Xz zde2;_S9<5@9rpv_eOtJs?^!O8OUU1~IUI`T;B_Ow&yAmpJMnUv?2J{yB`e$7Nw4~; z`R3|h|Gvyam4P!1>^5-m-~VOCPwx{u!YAfdinUEm8Emhtl*uCfpzA-UdM$Yu5|0?fvw+>K>Jxj!z|M`16Z!YY0 zEC2X&Uk#M!jT*PsZyLaeO5;vR=(|Z&G8nk7W2mt}P5&VszrcKUa(a_w@m3`U zk!LYKJnWA|EJt+AKJ2^*G)@kXd5O?hpn*ev3n8+7j&S7LuP!+9`$uE0sqjEJ8Cjyy zxtk|k$qNt1=$>?q zgBD0ix~e|af}Fay7A6`*uAsLzn*;x;7rjATp(w4yDd3oH;CAhkdyRv+^PS2;Tv0T; zg?eBNnZQN|PEQE%E#9Hyt2&O;vrgOLR3~&CYCuvXgQ$@s=ZF(PbE#dAosV)ip4K|N zMu{U+H+;SIoH1JbgLi7NmqsU{C0a5-2y2%*8CO|{VB*olVQ*+Vbvm#9j0!BdA04El z{GxQ_=f%K;=^$LRjHcR^Kqt%&x?`a$y60tRKyb7DYBzTX|KgP4M^95my^_XiR)u&o9=txbpO9+#&3iG_YV@@fy{$uZqW}`rCM7i1gHQ+z+%N7=w!N{@p7}=svxfMYCJ{uJ4fRHSg?Q2 z<9d9vH@rgl+GkW3S~~oIbSCy9;eLw?Z05`7nvQDZ{OIJsxqz3;WDsw4kM~ow-nZ(j z0eu^nfjWUkkTeXWPA5yGPb)@#RsGA!2MOI9U;J7a>l0gP^V7RD>Uo@kt%gF{Y_mn& zU?#wIBuq%56=K%UU7w;HPONdCtdY%Am@eN?>>(%~1oz~Jl&iMmnsT6{OQlp229#mn zMg~fSoCC(Pp8mPKVV_M38sKu72Rtsp0*#JNgsy)fUf$%)wuTB=4S_-n@&X-zrNlr6 zX#t1ZU@R6hF~>k?W-{I2or((PC|5u=y0tqTix+yZVm1wDtr$_iymYN>Q-B>o|kv`Y#dU)s1kX1qtx}8bWKwZuDRR_mqCIx zJrr!ZTQ)|VO6pE@t;`s$J1m8ADj*O!WYM524%Av3Z-wi+WCaaT+tMRM?=@q?ZU_aW zi7|Z%&DR}g)Jwg1m*CkftRUUvDI2+`I6~7FJg*+(&5na!`1INLnq$+*;fBi3oLG-d zCo=}1Y3z5fStq0&uZFSTY2GR+`qj?O5*$Q^b^k7(ljOI8cSy z*dfzx!}6I{cr!$(I%YdnZwbw_*IsVxtp2fIQ>0tN$Lo9HQwg3#Z3*X#O6K}Wv!~Te4g%hF>>7dsE#gp?-{A0amn_(sU=JXOBZJ<1rXB_WSrhTAm1I(XOL2vBYOJ;2K~}W3@wl4^>YA zzV2Yix96x>X=~Lka0SzC#Yr3P*w&FvI~s-I_niJW-&}ik8vuX_o%I1sle&in%v0iY)A}q5_OabJGftwN0Q) z59y92tD4cpb|3vKa`#j+0n#!+HAYYJbg7&=99~Dar;CD=?rmVz9S&=?ZHl|TFO=>( zb^OlJX%m;Mto0vAa`+9Kk%p0J2RtU&UY){4agP4A_B9|jkbtj%f=L{Ru8E=1G=$hG zqpR8;+DBps9g3g#o7%y_U*54gQiIpMU%K!bvS5)ObF5RSc8u}*Iyh=yTLa$HOOrD1 zeEsZ;{)th??e+VoEcLc5j_L7W?KL-q|J2=_hc4SMKDH9)|Bs8{JKp@}AwaSE@U2-~Hsm%VO~_%tP}d+<32cgr7K)kF@&4R^tEs z{r|^d+}f)TD(wuQgmt~a>h8?MqsFND82G2^y=Gc;~f=z)Q&gHB(g#=)9rth&1%BP#jzQ|fpfQw?kT z4b0SYv?}7f)rkd+=RVCd27ljSvzv+sz3S-5Yd+g$1~Pl{rX}>Ep?@*nIbGlvX6~<+ z&Jn*xAl)yUiKwo7Xyq3QjnZMn>?)XA;R+v}7CJAW7f?)YJsW0eAxK{@E<~TgzeJdSg27eAc2b z*yUhFHSz_?!z@V@=Z&I00<>vG5<(u59jV7+a=yr2&- zT)S-zkYjdBvk~QPZYcoOprPrJWhA7=g0}imCPyIw(l3%_DA|ZznMw0y;c+l%&IyJg z0Bg-UQ&Bx}cwrsfjcb+@lh7yiO!LIAxgB17+hG8C4M`v7q9MF~*ijHC(6w0%9wpeP z( znk1{cZpy6`OW?#H*ys2>%Nr-;BgST^nQcL4n$eOr%znhzg0x8|wFS{uvyRB-VXb`LrbG*?JD=yBE+&As*_VlSUMSA<%cw<^FrBmet``0f3_frR}Nl(kz z8RWh@-kk`JtPJ_`M8(u2yn?Rm{`pBU!&t|7HJ~K6KHZQ0Lg;61h-5qHI6NW+3p8LS zFc&b`1^|nJ3K*FFny($LHBYn|KXl($qf_L;`JSoiYb-Uasgu`4A0EshyCqiBa9xkEj=lS^g>vuLxb6~6aNmDKQ;m;g5Au{-jngxW&L4evi$v^9zPyo4_O`EM<0Z@SXs4a>Cw;{Vd3maTbI$}=oIVjLIS!1L z!W(nnfdA>(dFT0@GIAoep4AZZ8E*f7MtSGnqTY&Yc3Drf8s&%o;e*y{>l6DQ?NXKC zw%4m|4*&J6D_qYo6=VD57FaO5r0RR*w>aeRV8^PruEA{W$9GK2?ZwG2B;vtYJAj)Q zsddTnFWx@A6lVTaan=g-DQZCscY_YVIM>0N&E-`>0bBF$g8_75++ z=(wUC^q*SsV5&+{;tR*5iD{=Q!?va`ViNld3cQqy9r4-W2y9Vm?+N@Hp7##Q!Tphb^hv;@_+c9|c4e|LRs z9?>M2z6j#aBiRvik~&ZZ@X`@~&kNx||6xREocO znV-=~tBtrTGq9{RGH1JQIDH!@RV0w~4aUUYoYA_*o=}kq-ACvP3byrN*qCQ`9D4sZ zcI!7R?n>z-ksR#0j9-}Wkgezz)`-DG!KP`pnUFHdn})9lo2f~xAvGg6B zx$F+E%gi~f7?soqvJI}~GA^y0x-ySv4e6j}HdMy@0Akt_Q{HrJzFE>w!}*$yY`7pB zT%R<#8q_vqJu}NQ;-J3|+rmtyoj^Oy60aMdypgNEvF4UES7&+>I zIz%GOc56sGDbql(@UyyQdB;$hGO+t6!^Gv~&e5ZO9ppircWAR6hnoXejWH*aFIT&9 zA>be-tu?Rq$?gSYh(z%x&zlz` zN4Kkhw*ij>TXw?n0ANnrj5{OQ3b$^*~)bJ#)pfaq0EjIQyT*pN{7c<7mgd&MrQ0}h*XLSWU?fS#5|_kj+c$5YEF z^B`w8zJ-x{X$El^t7k;+AoW?B`+f}!55+3`s*>JIg-ISgUhc}3BNr4H@gq#w96-1& zj0H-aXe`)m!Fi5RF6`?c1|d1UZqm=F`+YwgC{I2qm%Nx+12SqQz)TgK6)oQp1)a?t z(9xVfqbIP+ZvNPE*l|{n#ZsYd)qE^Kxa?UO4OiNwASraTCq{OHv6XxvNkC{x^>{MyqQ06Y zoNRUrR^4L&mBMNzhXoUnp3$vxY@>VZ>5xa~BFp)wA6}OO#7pO+7o;EF>)FO$xQ0C3 zx092G+JT(};tRAtJbbv}ZaBKRmJSbgDNt|ecWTfOAL)8$St-TMDk5j`WmBZZrw%>f zkF$wW=Wmq;C~leV-$FXmqp28g{-48d&;NF)bGc$dmBUy-S7E@aXypQcqXH-i4T1$H zPMN)Z#L!2drfxl}4j|{rMQX12#lqn~sW2{4JlHH}cK_fowL**dkeR<2dy`!*ZnsV4 za>R7*Rx{O}NP=Zh)!!pJ4Q9ovO~I06mu zBv1w(0zI?M1Xzy15z>PvP8V=-1i{ehiZTdDB3t} zZxq8L=aDSSnnvQRei|6;`5Ctz;IkIagae0NGi73$4G++=0>CODIKs4Cpbqh0@$;%@?o#&4{y!qg~LmL>o1=V0rU%Qkr&9!+(xK zJ@)guQ0Xg9;N0;<#%ZpLkIye?tWP|6zs3Gn^0xJyv8P@9s(IE?H%djeD1*cNKA1?j z)?3B;nZp~YKEJk6ZGxN`uHSp7@TIqfy-k|?`O}9#TzY0QO+)V8sm$Hoha)1|0S##P zlt9`Hc$YY1f0Z+Xk@*QKK0CK+D59~T>v7`e!KYKlitDP6ogCH@1|M+rKBYVU(`xc3 z|4lF&JI6`gYe{AA`GqIRkE-kB_9vRfWw>sIddW@NI>1ln2JRLy@|(Od=wkn~hL=|O zcfbF)fBXFw^Gly0&xKBnT>u@2M)Y*=#^HYA`9Y69812+?k<*d|o2ZLMu2sj74Dl3y z04nBQ<~wPKPzx=^pjE@njUw2ZLVe5){Bu%N)6TI)!=i1?6 zdCrh*DF`h6I*i&|1L@KQ(1|a^P#g1?U`dNyvy>*>n$pB z?hC(q=4~6Qi_mCdBew%^{X(b!@#oO}J<89Y5DR_0bVe>6j+X3enrJQ8^kD-onIbl# z*B^>LY-Xj63o*pqrG&s+5q$?m=b3>Q(wP7dc!}G4r_v{X^5PFG8$v(%n2yBX`-^8Q za}Iy!*DienwO#a{n~t*l^!Ng9=xe@kc>6k_4*Pq=Kw<0s8beoyNUK+^P)C#!cgmPz zb_?aMUspQn#-;+#ty*K#;g6mGxIXcIuR^Kr#hDV+HZ}5glS3i}&VF>~C@PS*KQ4~u zypSJ17D`heLM;b^Ah}wu)#dc33}fx~2WiY^GSv5tjym`8^qrMKTE#p&rO=ySTw!)9 z1E5Px+kM7t;XZ9oKyC@qw&zUK23@JHcN(&TTHmyx_l%8;wqt~tUg4N(dz=Z&y7a*@ z=9nlF+v@aLTlZP{t_8-Pa_nAig)|lGIK5zkFf!|5Z5DfLKf<0>I9e)noZJ#pyWLi8 z1%)!$M|QZCC+CNBtIRnr4-X2}AV8pVgDrA&uA9mrXj{q186OMcXqAN#qOHy*!=1Bt zj5SSdMHV+Rq7e%E#l zDXc?o^5ik&xMFA?JrLDsz6u7c)4T0;%#a|*^;R6IKb^<-X)(~8Me}WG0XaEzbZB1g zbefHtF~RF}0pP8ntAdL%bvax<$=HiCuyJqD>AQV9L@u9k`BGp|(?3xathp(pgM z+<7J$%8!O~p^C~UW97*4K$iVZiemJbxk4I);`5OaFEJ(Cat-U5d^#)j9D{&HO1$HG zH2oXr?T_l8ya(yxHs}6vORxH*)1tk9+)M;d-51&6LZsePU~QxUE?;OG^!M7g9%-gA z9gKz`eOYMDl9EL-6|iBq3i_u@RjO;doheJ!?zV7Q4?MBhjnR5;F!Ja0454D0i0x;< ztD{76I^ASO(fUzoo+!N=kN9De*{rIi$_anQUeJXd71#u1tTfcgf)Hrwj;EZwtpnAg z7&W3QCTa1W;Uu0Pl%te$v%9IZ1LTAu%GmGqOjiJb3G;@OP#61&P_j!A3yL$D+15u) z5XK>GbBd=gmSOXvZM)ENXpVkIqpxI6jw(_fcheJ;jkKCbp{5B)EV zLl;g_;=>u$*~+wwF1^kKNKD(+#C4@Uv`RCrtL1fzXHZNut;bfN+RQu}h>>IS z3H4J7yK0!!i!v1A!9YU3JT3pZY?gcb6WZDFa|XEf1^u@QAN=)QhAx>jMgL&+a2k~N z(lzS#?D>TTur66VeRInIqG;dKhe|4w%oo=uqu)4uBAI(9{wg6D8pH<8UM>Q+f_ib3 z*OM)S+~~Gwr`;CO&Q(pCm+OoiI!5u*9Vy~XXI6*rxJH8+&4qE<2`rSPP6sDdH@=ENJ%)FbdD>kw1=uS^jf4J%M6o-u`abg#<}6?i<|5bqBXlG zlk%p-VLFGy1f;ORd=z#HvAz)N$AJoiG6k0-3?Fverh@kWjqY9%EdbImRj@nX`>f;Y>8aBT-m2LU zYT3-N*}gE6HeM0XA_6dWht1Y}7#TKPrcN^NcRLlWn2M0^ls9q@(SCu<*nX(jMpEw;j1!X}}E82z(T z+PGEr>g7L^mMrTF=dOgl{3Q>wZnB&I!v3KnG-RG&_AgJ%v6AjRAZdB%{Gxo$es$RN zaw@TU=YHv7)ehyeYTSE}?ak=WL3QpMv;TAPzdvco-+Az7Q{U|-PfxsZyQ_UE7IgjA z-dCQ7aaYW=%t*VN6-w_aVWqKn9e&riW|{@=J{Oz|XnbDmTetP{kqrFkhAD+kc_Pp=x z7k}g5eE#L;8x=7>`BiFV9nom6+Iv1)%Yc@NosyllAx9ON%5nTz0^P0-{SY&{^(2+wTP%vDM7m=8Qq-9YuoMNNh`A#z9fsao*wFKaO0CrH92_}DIe<~ zozs{>wutgnsGglse7j~n>OrXz8m(Csd?loNXHe1Qn?}RaUN%;JKS1EvyhJpbfJ?YzmZj8pK_(5J3~4F|_3v9&($X-J6Kc)mOH)KjZ+ zB@<1z+RnVbF6t}}PIMSDK*#>bHgpvy?UTDF^6b#sg;VIb;AtmQCsm*V2T*sJJTj*} z4lXxR{O;wEFv#_LnANN+6n3MLDC?9WTcyHTr&XAJkam;brp-yiag|KNv%cCj(DJz_PVa!Bw44Y}7`qM|2jGlm@5TpAx8mGqZTF0Es@Vqnxs z_0M>0-A`}4?|;0p{ade|@mz?-tM0ll^;9g8z*e(ma(a8^-s^DYIXc#@c!K6M@0aVF zC@|Zdn6TAHxFojRwcJx;#vX<`RnFzJnKh)rrL`j>D}Qz}a36+WGoU4)9LNIvILa=nDs|#on-8}&U&KhBa$VPB`0?$2* zgsW?vV9n>HyD{-niQns!Bi-tXBc5#lvPxxVl4p*Arsk~~aadu^U2xS4c#CR*Jk}3S z8|mtL?o{~g-R+MH7(^T`cC70!EP=&Zvka^#Sdr}-!kD4cYrz1=2u@~7m3Wo~pmx3G z4N1fHjSp{5zWVku^Tz+~J-3@*US4bX$4o<~tCHlhw`_RuX>n3BCc8`#GC5<>aZMGM zcY97f_5ARt!4ufPI22#UZC zOFKK5K{l=E1bN0yyfulAFcB9b9&kiH&4D;wWchR&vQZCk=1{axR5LbXZ|a=cl7vg zo5fMz_Kj6^@a4Wa0++p}mraf&q~S!lp#eL_ku=a@%=7Qxkw)8hN=jQ4%0GYD0QwX$ z#=_5!4oKpDrKg<3aQvKe-gtS|s5|A_(Tn<~H8P={50^gL2;TeaKc9%lOHQ$McBynQO1A4tKj2<2KG)y_#c<_Q`&A zEH!b29o4FOo0F^?*%p$1{puHY{(S3&kmgHIW(yjYpX|@+61rd)h~Z_kP4mfW&w!9UYwawvsNs{B{!_Ul)-#)n zzZ~;IShsCGf7T24^t}pdQ~ZsC;ih9Yb^*KpVa+-4%bFB)fo>V*+8wtS2Q2zA06J`D z&jhAQU#vzyrTdnC-W0JPy<1aF>xJ~@ZVPO(=FJIMh)FCpabo|S0}Dq z`21T@5?Mh`C8?e@Sexw<{o~PJ-n7Xe=FwT>=urK~&R zSRJIMBbYhY+?}d9Q|e>U)+|62ZAgU-fkC@8o3W(E^h&4z7v?RUXKd1yq7U~gl{TCJg`y}&3ak{(JSsHO2CTd?E2_I%uCiA@Y z=yce>0^^^^kiTto0fyzJU;cNW{?x#|!1xt#e;c|SJ2Nt`xy^E^v@6&Gq}8&}vdSsDUAA<#ufL9V&n9H!yXL8(!p`5;ao*bH9LiTV%K=Ym3X z0s+t6mTc7TgSrr7O9kTA)We#CZ9SPK%GgeCXm*|$REsZ(1f$|<2|%^Z9wTCa&RF$~pooCuB_C)!~4L|;>= z_^?`UJ}WYsVOBS3obSCPOs+|QM;W)mK;T&SWGWRrQ7ZxCbZzdDnmTW?a7MURcbs-0 zdphntzTglMd955`ysVL7;g7IEf|RJ_z@!L=z@Qy+5HfkfA$Vunis)v8&(hiWzENzJ zpi@08HKQ&u=plF%f?~L{x}FbB6Kj>FrC5`FQAygviU2)8!oQjFKNb7lBLM`;$SHaD zh3OcXkOhU-JtpYjZeEmMwc2lvZae;C=BLX`@k{ox2^rz!Fo*I*c((<6^lGkb2NJFr z#i$ZDq-;Z?YCa(AVZ!#fR+vP0MJoqw7)e+6gm32EFu2vS`}L~P3p_rsM*&uG1P|H| z1Pi9|HNP#a0Ehw8JtQ_MAc$#=1kTn?)jiY+>C#r?tg_GNCfwLmR$u+${zq5GFPybo zKf3EP0Ts^L82Jk>(7^6UdK_21ks1k@_60$Y`MP*b!o?ngP?T%!ul`%$}oR!yK zXfY_7oYRk|@uTIFK?fnb0D#W)yf{%XGv?z~z-G#w1fA$}T(DGtq9bI|Kzw(`rANEe zHqv!V91*j#1#1z-E8S3Ae4nvITns1Kd!33D)$w`RIbaS%+M$}ehkcW20V{6Fi7<}t z7aL|;rma*=WDZ4u?jimcl@p`n`Fqpj_fsd6zu-j^Z$3ZSS-q3Hln8!cM$v)_j;0b? zrO8Sxj?_lIiA8<#{$PrZTA1`2D$Mn?{c+<-#t~O`Pt}Qks*}lUU0VvWMcE z0n02g~w3fh6ms5;*S)q=}Gs1#%0&&Ko zi>cV7<(EQe?%w7{k;?D}!Om^zwTq7Ht<04<6_ETP2pm9wp^7gVf#c$n#JEl4gP;i5 zd@=dy;jyDU8(*5{i)+A?nA|q&9Tdap0CTn5ex1_a;1oTm8y@BT-blai1TGkXM>(I< ztmykf9gMQtv7Q19l&REew0jgFfJOvSO$JArF3J}u*?ksHLS1~r zz6<=2Fd6@@?8U#j@=E;c96E>03+ea1y>B%wJKxT9HgRxwD)Dz`K1;P>&-dT|7w>=J z!#8IWw%-c{npf+Q5_@=5>+yw}tmxgZ_^KbY8@(<9{jgM))PERv2PWnNll~`loC!~v zyhELhKW#KU{SUuy44l3A3;&m!X76PmLmlqlmcDB@jC@C)hle) zXIp$R@w{Gf``*ye$3kuQYV*|dc!?r`aE;?Vqhwp{OEycXxknNg&Qkk-megXX>>+s~ zJ6+Qe_PNfG&=$%*_m%XEU09WyLWFZNhSFhg^Y~`3YmMAwp3Lo6Th8aQp3zv+i}6*M zNDLBd*@HmLuQ9fxH}q$nSK!E%*DQEk-&2n{6{XAS0QJvdk3S)%TpH^*(S54bFZzuP zf`=+ODeU_+hq`ga6w<5ZP7vs(I3pdZ81kn5BBOUFT;@~@RJ9@24S5sU+EY$CXGB(! zX}Yj^>8w!7`f_a2l6TMnXKCn|I)@B1nZA<{2E7pso7gdCbc|5evadQ>BiiAvm3KGE zv@wZIdt4vot&2G4wunvKsE)|wae0Jx%=U@A(&jqZI7lFro@#q5E*(mAPFEX%?b^?) zkEh-;fQ?>#V}QFz#BRb4^PLKfkh9@4?Q;G_mpn)D#rEE+(x^YMP|>?tITTj=%AwM_ z9JC!s!*)$LN4pEIMO-;XtX8K@J?Vmmc87-f0X%CQ4LD~z)Q8z7bpu7uvbHdu-Qo4L z>{1M?cl^rPG=iWYYKY>zC=Zs*{-0&@qxRsKa|P+gjOV4EH#c45QujeM>eo5ifS9&- zCIzH6q)iBI(e)cGUZ97X?X<7ExsTe;?@60ucbF91wqp7t6G$kL!;T_LE4MupA65u(vvQgvA?by6AFi7~~ZB5KGm z%=HfpgL5{j*U%a2BHb&l+JH-a|HGzFZ_iFCrUN~WimDM{%kXf~GG3|WbDG)hX;%Q% z@mP~zLh>czOYBKecP1U-&gip^eYk``MAaf=j2^U({1TwA$?J9)6uKbUUUmgK9d?b= zBS0mn%nkvz;VQq%hE$AyY=JJ9Yb+BfLVQ3FAIgMgRZjbX)=`=o9iR1560t8Tav+8Ly+z#I{|)t8%**w@~qYP0%%@waOpkKC;*-4w9)Rk zIAcC~wv#Rg_)c~jIzL4zz>WsOW1o-IRMf~yq3v{=2<&g9$5D=?w4HY4`uI6 zFq$mlQ^S36aj?6f@wSJCM4t9Lh{>G8azcf*j~z#Zmu;vOzm?04M@3vl`f_QKx5 ztEEACM06+@sB*kV;p+u!*QPgu#kN0WR(C@=@{@?1*tMyoy)4%RS9P)DE5l$P=}lDp z?8KgA3h1t*+($YA#pDt>pp|M=tw6QdpZ9b0i}UvtId{^&R6jK{7k zz7TEd3~5800s#OUiWObWBL*`-@!9&*h2HFd{rIE|HHLKt@Q#k=elsvtTQo0!?(hg| zu(v;1-|^`y%;8-zb;?IXpV?BEzUBUE^q-&ORgs0NG!~7()kv>m|(n4mken`FlTmaP5LQ z^O&x+Qi{*r{mzFri}jAmvZ|((%?|p;&5gy{`Gqc~Yi2O> zKdoCmZ5Ca@Mf3T%1)mJq)}4RnTYcey-p?R83*&Q4ScKe@+4j;M5+@yaK5OTZ+D>UR zmo`n>9w$$1yVgSdhso7TCQZ39Y;1mTRDn?~+o+ty>F!7xr#!NrzcdjOjP5UlewlDR z*{a+SVqZEnGyS;>zZ~u>CXv)Q^7Z`}AJ>B-N0-bsM{SIC53r6^GjhO!TH z8T(Ag)B_FAbYk>E!tq{?zWgq6OX#4`YPFsR#sMHz;~EQEdyjBioO^nM!Kw|{sfKS* zn}YaZ*-HK%dn=zv==4K+w$`ZzeMCI}GT>6RJ|Tind$Ku57KAXm&Q zH<1o0rmI%V7@1ANxGGFiIpc!2_S`)?eXm3VVEMdcxL1Y-L@n1vrtV60%VXtj^Tocb z?^8Zk(msj}bg-e_FO2FQd}KBht3$i>iCcO3MV+(LbA0sEk8X;ISdX7RXUVi1Rt;$E z@;vc9?g>Bi95?!|R}4@-AXoPmE$;rN=rn4e+%&9|_HXFoaC?862t3C-dW;aQG$86N z!p`+Wo!!%a`s`$s&d=HHWjJ|w6uvy>&Sx9)wW_5y5pPx{LQ`Csav2N>maa^pYBXF? zdZy|y0eG@vqe3W40AR3DS-aVHQfZyt(Kwek&j40^jr8@&(pbqP5#n;E44<2>ri!3d z{i?p2(@$mxsVStIXJ0d;sVx?cPwCS*FZk!7A);n>*n>qcwde5I-ntv@l02gPI{q(GR$d_YsZu7Dc z{Uom%+gogmM!6{jX^kW1gZhb~?kmTtph@&9QZ=TZDu-`#SF^%g2I=7C>mmb7p`CG~6(slQ>8*pSI}zej3adqjRwpnC{yjkL|o zC}n2@N?l$RYEfn}1!&gul3Qzke`ELcJ=yngt`)hy%LxfxzZK%}kh%IiW3XR_=n;KR8CJG*IS1Vt8X;TR<%f*h0)QJP2w%ss?$W&X z8#R+M1=zx&AS~O3G(YC__B$|YN%#0uu|d&5TG$X=9hl(V`>ehntl=!M z2#p=o9%DKxj$7854Bc@?xo%=z23I951@r}nGcS-FTcoVK{caL4c`JH z%9F@Ll&Y8NQ`(Ya{-{<#Kd67u#_u-&;O(nRv8|QR0L)lk-ypjaj)fb(pk4!7!+9*gTKTbow}JNrOd$s?9-TMF zexhU>R(FZ00RuQ3AlS&7_9;n0cN@)!O%m?I+B?f@HB|K|(IaYMRK2bFhaYJYTb;lzs<_qY4{uuq6wUghahWv_dA#qOfwPr@I zm8(f~X;?a>g_lCko4fBH*4i{wOm%FB--&m;KiT&%QT+6TBO zKw5)|g@^2kZ9sYD=i8ZEnP=vJ#L8m!*}wMz9s^54&$Qz1j$Am?Ur2f#u@!z+_-Cii z&ACs_;njM1@Bcmq_uRx)0d+3eY-7}u(*_piSWPIz|)a$+_`m14A zZ)M^}^7|jIXJu=bYEQ-{FrCNH8&IwW;kHhmXwTGpaA|SJAW$LR; zdI0olkCl0oF&S*z26w{xXkEIDHrqO()E;rl?UC}eVXYaJ=smUU7&2)^l(JUb+UY(Z zLS~Dj-j*QIf~zM^m%x|8h#&9uM=sySc2^#ld`O)g6B;FpPQySbh4y`2vVI(VY|Gh1 z)TSE>ZbCG7w3~4pl3J1KyV_{6=P^(BxvD*Dkb0DC#Yg-QK}W)*EVvAjdU%)z8Arm} zS?Q7cUa*sV7WwB&^WzJGQIXwH!m%&3bbN<(08(SiYF_BoG7(Dvky=wNmYQ&N^Q>>= za%)NW{Oh<6zy0i>G(dHJ&;X8fDjzh$8RO8Uk5KPN>q`ed-Fi-rk4!H2VGipd65J4b zWE}_HlwxEg3&6|%X?ac!-|u_kp91@*)hn#GFCOFBzfhpp_xJAaYlK}3Xndb8&P5ey zYbmb41fla4C&WD5f4u?K$`yok5G#nhI#>4go%`K3(30Tp39DE~liIYJ z+*Bx8Z)xlvUk!pAO;r${_Je0cOH{qd@n?C}pd_EC3>QOnRIli3CqO&QX?fHJnTJXp z0pNo!x7gu|q7wzG2_?$=fgz@Vy`nx0PF9r~4qFcNy@Bkp+iv$r*QvU|0m?Y*ZreEc z1#*hmD!2(navEI?egH7%#*WtZU-I`6OJQWu(OysCtzd<)>sZJ3q-x7EWN*X{VkVd(S7awHBtyukdE;1RB~^;#G}R+U+EtZC0ig_2@NdTlB;j7OarnG4?PbZ5%!Ze*_F$WOE6 za2_ALmmyCFxt)@z z(641$ZHn>q!y4G}qOB&BBwQr!u;XI0+b1j@S1;89p=LBa*0&DkErX}Wt-MJja@LFX z^6q$qKicXIrI-b{FB1N4Uo?enuD+oMbo-$IET${vmKk)c^)}Uk2YU&;W=9GS(jCIE zj9@vszSq>^A))}axY{&M72zynWYYx@=$kB|E@!Oj7|Up}BW-jW_kh^^)Zwt{uS`>m z#vS%}%wTB~6|JsVVceHgAB6`upc;!o=>J+!0u0V ztDhT|Hh=#3P^vwYW9a4&9{hnt`^MbDDIOk=$3Ot+0!cHKP+3s9aUtfKUZ= zfeuF*pfnzI%K&YYTA2+i2v`ssI(*SaF+x}dhue7&@M~}jGESbR5t;7Vv4NRwt7T{~khQ5qUh4s{+>^b-5(qf`YxEjr@GiqJr?MZSA zpl8UQFpXKeQ~_1Qvtu0|KR@h!5Q*N(JQp$Xm7l)4`%(>hkAXPs@sk^Fqj2o)USEBz zuXi=1z(|Bx2D=aP%~xpkUCy5ONYh*Nl8-_zfo(W8b_(xO4l6a*EGa%US7JdZ$Ne-pKfbZbxb(+~XJDn?H4BGTPkx=veaj{fBan;QX8` zfZ^+}whevOC~9n`LY^@rOT-&zqW;(XGy8*@(eQx@Sj@BJ{H2*segw3&k?Vz2m>bt4 zhh~{1?jK7RN5yz{rjb1?&B~H1>*o{2(x$WGfzN`PNwO$QooNU86ZQvEow<>I;K9e= zd#|&U&wr9^9gkN0lfGChI`b{yNIlm05q*iZ-ywM5HB6xKlz;1`K(|1 z8<^IfF^;Lys9iN$y_x?%LHGeENrD)J*_Erha_5};z1J^?$!TY2)9v2fUXV_A(g_8y z0D}$27z{G-8)Nu?kp!|qAcO!VB;856ytlizVPxY)oz66jGr+K*lW(w=$e2GlT%@p>d$%f1Y=x z9z_X~spB=0P+eBHKbZyGM{OmtX$Ow?QQsALcG?vr_-ve+|DrW5)n$elIBv;B8TJ7Z zGT;1!`f$shYMGUSTF2qxLeVuM+HiH-3Cl!K+#o6u?fHA&|-_B_}Ju_Gvppz;(L5jfjFJIc-nV8Ekhh4E^ z*aRNcTf{fH=qFJ3Tr=(HYQ6>u?rDD?h_9t%lhOe&mZQX&JMsJ(S&kfy)XMXPFXHtE zL7%tG+?HK9^ZQsF9{L*O^_jnTq3bv%{owbK?c4HedxLb0ANRY|6{j;~jHB_W_uWDr z#aE?2XvkP?D<86ZQSw@J6?XPq>YJBaE>o|ur^3`3HE9>EcLqg)v8M5O_{I1UH(r?x z$}Z2X$0D{uy%+TG{a);NZrd-l5^W*eswQSxDvNp~OO;`gp*AAmCYjImQISKcX zEjO6%8An5Vxst4>iY^ryA*}CvwZZE3*m0D?&yQCg*{b5oDwS{gNvTq`Rcn;VZW2gh~I+ms0+GBt#fGx4SdVY65E zuzd$@cbl)+z=H|e$ESv<9-F-gn(Pzen{DG1uXh41!mWE3GyP++Wl!L$7@j|5LRZ^Q zKhy9!hs66W*DgJ?mul0QacKvrVLd12SEg{|$o6BC;RVh!>FtS8!tds_$7Afmz}^GA zv<7xo=84+f!3vEJ7}|lI&c#S>&@7*?){{*?Km`J0qcJ8~u2R-IYhJN_0IhqLJ^>oO z5!Dh4+v$-9Un0&2t&H(m;}zLI#pd=R=b>^8VR)l%jyarlLepvIS#}vP4JvuIBrbuz zkTm*a&24!Sj_c7##n4$+yLd1-iMf%(BGc9@ZjNjZL?@wfZJ@ur01XAPR}n|d?B$XD zS+1Q6x~+quq2UT!mYF4?drn%|K{_5n8i?JS=dGw@KiksRdxOJz9={;Yjp~DEc{bob z@jrLQ6Vmr9;Y-V3XJ7F5|I5ckXtTj!}174FN=4QLy^BhlNc8ue zcoUC$0E1A{-(7)Nh-{mqZ#$m*v2 ztKP}rEdKbiDm^CHMYmg;>}#q&7J?8dlp;q^p<8rZfGGyM-t1XmV*D zSiv~2cC$bA1z&4f$2NnlQ=VhLz?>n+*oW_&O=9nQCZX^I!PY~d+ke3emuF!YUv>ja z<~%W8Zb-F|!Q%3Ne?B{UZnB>FEjDn${j4z&k9EWz4n?5pHa+j_U-YHkT>9C||Jz$F zmix~BAF7uU51;+JoqvAUFA(#pdVll6LPhEs%tds!Fj%!%nYOfW!jIm_1q`fWML52W zYq75bps=)@_-5+xPlo^7-}u|G-~T`V{F|vS+-8PDUjieVsP0qkcvLSB=*l{uXzksx zTc5EIbocymgQ8B41#bAL??1!M(!CQii+v@To8qC>VDN11b617qP>f&$P_(m-`F81KNL3E88yJ6Oxw1+f`J3vkJfVcaC$iVy1m{EQGv;D+ za=+VmE-wR}Pf!=-Q*K+%2a4@wAVY42LM(b<6qC2|lm9#$

%}%T7#!{1iS#i#9{M zEKek=(e$v_%T>Wn)HSQk=dEM`M0HKgK(DVbZ10rGe8O z%2makj3aJ47K?aN(b(a`gFgojpZv#P8}$}``DZRyj*jB%^~vOIcdwg#C9#h*J^C~? zv|FCMG>!H4_dJVdWQ{H16gz{G{rRPjeH{hG+QRk*ZT ziSLHk3)N}e+kminq~0mSoGU;GIUvfZlO&|{1kY1k*px+X;RaGT-qQ<)gxtLIxyK)k z7AX6J$k{4E!t}E1Y0a}GjAu({(#}=6wFhm6R$*bAOenKadN$w%^nq{tIw-w3aAG0b0=D|f2}EQ)lRg|A5xv~ND-1f=1sp$W zc8O)yb22!KhZm^XSvv!Jgxq1%VsDp~Z$qgFKMKb~q2{orDJ?Pt!qWlLxT6L>qm9sP z(XHvNC_oOL(MyY`JMbmac{o12#xRh@w{3I+^5IeGC=CC!6xd^XrzH{qymKoWCKYNH zmCC@#J-Zes02qkI@RJvTxj0DAKOTMGr(IcR>fq~1f9jRj{(Set2tN`YoqlYUBZJGL zcd=z$qWbr3QN_26-vfFTa+Q;H#-o4%# zw%C}U;i_Rwe*h<(UF)$EJ(>S6UnG1+lr;gVw3xL-v{PURjkRhteyIOYu!^J?>Pq$-~?QrGeOC95OpPv8*$is-*N-XV3Ju1 z03RIfR9^|_PY;B%AP9IBz;q`&N;Nd%nhVKm-q{DcCW%PP?BMAn6G9Z*gl1tsH`pUP ziF3~YFzFE584h?Z>K@<}w{Sc;S_hIsZbuve9>;$=CdpKNHeJa=M>syxj9l@hFTczT zb2a(>$X(abB({CE^gI9VRUh>=<(_WpclYmxMlUO>-u)bS=i~Y(4y_*a2Ciqt%;xL1 z%vXa3_-S>Dm6PmepLKGe0V^iJNq(LabjW|3tKF0QEWtF1WA#7{W|NvQ@z221gntpS z5gLt8>;{YtrB)aO#+SL(wUu9qUaazndi(%j0@5&@3Aw)TmDhk{16Z5_zzT5OI*HLq zMJn`#`OLh1q$1vTwE<7ZILk7(-#_XhwGRu*y+$G0JqFhWhW)3p(8tS5f6KR#Pxeq( zRQIU(tR=WIpapmt;81L>dc7uqBg4+r?6E--&`Ad)goo$oGF5suA~GOE1asZpkN@z$ zJ^YvN{9^#L;||;cbRv0?@IABp{&BqzIB#Y7Px^tSY5j-=XC~~d$@C*!hLtcC8f68( zr*?cmc$e1!!9cTl@NYhQ`osV9;o6V?J{$Xm$ekCzkp3S&Tbla?|IOb_?!MGP3|QDb z)R(!~tl6I>A1&Fr@AS#snEFBb1^eD0y5g}vt3Dy+judyAy&dk1|MUBpO?su+o#mDF z#ej{GhdaTp-C4aDcIW(uto@7Aa48Q8andz}qnli7K+s`Xt&Z}U`6u^2{e;Y{`Rl!af`#;75Hbkue5H4iQPVs zTw&oy{0sU^; zh@$cCbtOMqVy25dZ+4y8|Lh<9i@UF9=RTi!$@4lST#PO_ZoCUAiNRzqI6;d?8pO1% z`)B_ZvO_JYQ2LLe1pt0D5nM`eP$+QK9rpNQu}vPs)2j!c)dR`fHzlXByi==Nygj$Q zv$Np0g&sPdi(NSojuuX}7OC>_iFm{+?$HVosMNR0+J~7e?1ucjqn!CbzT#mQl*$Wo zIh{I=?1cUZnk?=FfCK@$_l7uj0ffsV0MZ!+a3EBgxj4UzhQ*||c-9^dmq#)?_BvAO zJf4^e++5l(PYWHzMzYPo2S9f#BA#GOLPz>!%!@dLuQ#n5@w(Q%l5#q&XVYbmy-RCu z!p5;noC`M^dh3Y;zUg7zF=5WWcHdVg-$R@3{k)Pq z_g6XnLYo+_X~a+VjxVKju@6@WEXk`h8vT^~C@HLA@`!hCBZ*^;NBSZ6(lcc+P)&gl z*D?35X{JVUFUVbz;hCE7DjnS;9B6BELVA9Fio_jPHg*JFn3}8paiN|J;u29KryfQpI z5-}>FE@GqcZdku`Xpg{Ih-C+U0q6lCf3V(wsKBRk)e zl%@PptJUNKVy`DNDgTrp;J!Ug%kfWZ{*6F`oSA3j+`v`HjN&5ElEzprK<^WXw{DMI zZ4hsLid83fsVs) zDIeXE#N8rh1x_O#AlZyRn$#CKKWGquC(r0siR|)wpm6K26E{cv7TK%tCeqI8y{$>zo8P(WyRP zI)vD8ZgS9aE1Azg&P5&Thg3P#k zMkj2-eM>q!#}^=Om}wbjrZ!$9bJJP5OA?#WN%0950yc{16ExuX&6hKY*p+KL|NIkS z4#3O?+|8c*4m_PJcD!%lid0Ljnh%CBNUh6fAMw$c&yPbb+~^%)o^T7f0Fj?Mz=VXt z7)``F<$X`gt^)uy;d<*taF=JXV<4rdT27MCpitNJ8>8G_V%E;T{JMX`2mR^Bvz_~o z((hmu^Y8tWZ>+rbw<0a>h1s2IV*S11jWy+?PLK~;?P7yN$`iqFp*}r1RAy?dERA3EJeC_x*Ol|xcyvwXS;`04ldzPC36F!2gR5BQ5r2LBhC+h_cY z2GUH(h#_FqCA7S^{qhXm(9DWd1XdRoMR^-;ZPkvCb@Uy@Pqgk|XBii-TK};4JEy1dNiwwstv^n zxNZk|7x+)*@@K#C8~%TAcYc|eE@_)zzkPl+6%}qyo8zggn|!=i!#xNIJRFoS>1ea@ zVul!We}(d#JnQfY88h#R;vKZ}XaMAwx5rRu_L1G?Uv;xD{ql|Fd>D@3cn%EQutQ4& z09~rghN&AmelnEfi)4i$R;$CZ1<9nEzznFp{o`lZIfP?yr4^0E7kK&luZ8LtpX6^YRge)aD*gUtVN=Ns4FTK1=wWOa(H1uM_* z8N;Ck%~fV~m|#S{w)mzq`G=wVrQNgh8XwZGe8r37zJk2Ko9z`zN!}1Ek47DQ zSInrunKKemCV_d`)9!(bPZPMN)(qW_F&>dc<~J|?&%d%bzwuYw4UF%G|K5MO_RjVr zl#N8nh%JtN!2F9X0_Z6T2QkrY7G%j({^x({E9FR#WzRH=3%R;4%%HcadCh9^sVKi- zu6*g{>NkE#8B{3cTpu^O#`&XituVB^_XuC-TW#Sj-5G{BF&W+>FR?zrm%A9i1JObR zN~p;s%=X)c|Ke%7e)+H~7@2xqspjj}u^z#xY2VC;jS(%0eH*otM4&cob1t+rJ&rpm zquS#mo(K>TSmWeChdeWuAxEi-@ZDtK#%RrC`^zrjaUijF(rQNau?o)<6o6Kq`-%Re z5I5+|%bv+9yGTFWmvT@*C);U3u==aUTJf}!p-mKu)O*h1%G93l&~)epB~;9>sC=E3 zRI+d51qAp4>i)jkIBw!{e( zpL>K73n>XUBjLeFXXM3hj`3L^qHXKkv0vkEed_sBy}4+yiZN25fEs#!-ycc+fqTL| zbwmk9`+?vkgY7#bFaR?Bxmi#RwOT~%lgK&bTg|P2d44Tg4ineVRkYCFL6TO zlcB)pt?^_SzHqg9KAldl?#ShG*i|ih;qI7Q5yWWO|Kgf*4!XZs zW{RN`|0L6-q>a|`8SNF#5Z!8^I8jZ+0?Z@{L#Gov(HwRiJIUUB0snbkv?kMwyeD)H z?~zV0DYct|> z%={G7FImvoJD}}gxCEVFDKS&Y>vHs?<*tUSn<~6V=u}m)amB}5I$<{-&*4!R19et) zT2p}o1kx-WujtDU-ZU*f`I|8Sp%8} zi3-#Ln8~$qc(?&+Lz+~iV~r2ZkGM|QjdhMxJ%$hZ?TfC=nP(R3O{a@f{9Xd}7ld|f zNRQo~06MM3p(UQ1Fl)%&duPf?`!9mqM+FjCMMS!Hf+fDU-UI474G=g z_|XeJD=KSYg|K^2Peko5b_UjFc#Sz!R zd6gF%<*`<`>V{6R?tUV2Yib|$iZSL|87;1CoQ?Axoa5Zh<)q95WKbP#c2nk$-> z3x85?TF7~${|;q(r^d)Jsv%6-@i6+5oo&@{c%a;R{{?@EadWYz^ZJ-X@pGO?1Q zgUXEByqM>|_Szj50O6WP;{Y48h=I|F5N-&d!43d~0Mh|%P{jZO09rsd0;oXMFp>rY zc;RQg-}%;)5AJ^Vo$0%YF?;|fcwg&h@2p*3-&n?9o?1>6MIJzh32=;Gh*}*~FgTtv zx<)!LSpV&#NB?6e@G-mVQLpvY1_s^ye|+M4LkPNfU+$$#z{yPenGNWN@^ovsv_t|}Oipqy&n(T4n1Ki% z0rw!Uj` zs}wk;EXuKz(B{=3G57kaTf2cT-n{Y;zxmHz{NN8pdN!xK2f12)!an6%=1faqc&jr) zL%tvn(Tfy~(EX_pGz`^F_lfSHOe(np{2sH!FMaMWy_LLfKdB=14|klhUYl0N70-&P z*F)(9>(74)Vfn#4^wZNY6d2;_8-%~(OD?E&GSKMNWp= z?BZwzoIh#7QK8dXQrIVUYq&U{FD<1K;#ui2o3{qrIXkXRj;Gf%?t)$S#aH38{nilJ z3MD(PmTGE#c6GSZAJbS4?tcO#JTWzh13u50nL{zHSfy80Z)?6x@$O*WO%OUR`(&WoL~BD z$>>jd4>sWV_^jLx&UILoFX(Vsi(uHXLL@rg^K379P_M?a*}f#b>Op$&O!1+_I~H?m z?f`lkIgI({T!ZOIftVKx;4Zi7+~IWSVM;nmJhjlvC2@R}2@ixr*1F)$o%@UnqmW&} zs?jk%XU{9qxL|?s!;UW zicgJ?+IvlIeC9=0r5unIBZY&fUF1#9{rkTDPj@$uxYKASW2565t%+xT3C*CA4enVH+!Co-gTS?bzjwrmBN7gx1}qNZ&tYEmB5u%h&fRv#G0parbmD zAL4)$&AU#PeK1?x2f;jGQEsHtDJ7&!F1X)XtIR%rIYQPZEY(jKh*p^;)G){=Hy({*!Bo)-%R1rS5?6NOucs>8`d zhd8CZmIgX|fi4y7XA_CNGiDJ!JZ`w$I0@pzIW^GcS9GG{H~l_hcHFH=ULV|(`89qs zv;3-eE8GO{(>t|qKlpGsV(<{6^{F{nkhL~rERW-Me2QqJg9)(iTzYP4w}=yAoM=$N zgkh@j9$k@p>U`QLeB!s$#H1moxXa?^q*=Z9st5T{OXT!MK}q`Bm<5YIiO@INnZ`?P z8V$YH5k7d<9h$rsITlg(vJMNF5%r`#@mH8K+tG~thOhLvst2R17*!co($stl6k}by zu%ay<44b*O=mSVA7&HV~UVu5Kv3LK=7s-3gnHtVUy30ZHi4o`wv53^LIw@K!kQou9 zbHd&ZF%Pg!;GvvKW`i@U+($`R>D#vJk{&;|${CMeReWdI&{$HlhDJB25I4P7q8PY3 z4FBrpQY%s{j33@FWH!U4{W9Rg0*Hx-5z{s3!JSYbfeI6RHW@eo=8B*W+4Xjr7xwrY z!OiWGS_4kZ`9!bb3|zzGe`JFmlam-uCC@|{Pohw zHfqn+PSyM153(RI_p57v@6}%&x;)A@&GUQh$=8D8j~XUo+t8^|H9F0dgCBV@v!ZWoEH&m{Tg0@Zid{6Ah&JW)`FHT;Z z{??DakF6~H)s0#3)xUpH@SAZ|+p_d$(Lt+~Z%HNiFrGgy9(=VAmO?YA-{DYUTaD4$ zB+sJ9bQSw==ydk`zx@KziDgtRC8(>#=)LHfNt5WQjs?7g4;voUd64n|a>?DG7ab@ec_ZovE3I^{-IyM{v_nSqnrKt#SgyyC-Q&(t>=f@oeTae z^FO0~?nWYg*uXKSqn|s|XW z-(mBaFGjL2ecnI!aD5VM`wY6dRNRfRPY7#!|Mco}qg2ExCrjO#jI6^x{P`>Ho24Za4oN^nhgUjwM>}78N zKS030w*j25J{n{%y?FaK-Xq=x>Lwt%04CsZSEt>%{~*Xxe&IEH8C|Dd1CLI7Kxv zouELsso%Q2TK-NOUAKpQl^DI<4}WJ%3zji{?&od$dtbkP@ndS$&w(LtC%t&FDQFqr z7KYc9s;6K1lolJK_ydB5wBuoDX@Wr6bg(EXP2G6t5u z0Eyp40xJcNcze)Nt$TQ2xhUrCz5d+WL~jHRbu>E(rX8y7a(>sf`wK=09zzJFJnHfA zQadNM9m^k=r7;n#gtJGDleY=v`M9@<`k^Y>?iM#Y-^Hg5yqW zXF>(NKG%?ygY5{%2T!_t+-PA?tS`ON3rm5Pw(Tfeg%RoH#$nakTkZGT;m-+?fv;XF z&Ww^-@Pp1B>g|`mZct`jRF=1&7C|R-@b5n?1tW`*%7;pQ1rF<4D&`gnlM3j*bBP?Z zdAi&^SVHG1u6Ocjol>&pwJJS~rdnd*iXB4JGqRZ;XQfhoJJ26^nUq!Rz1JwJLyp8l zp!8_$i3|)Q?p2aj{vcz9jX;CcUT%_~#qkAoY_{TJw%#de;^XxP!`DA3YK8)U@$%_N-AXUgsJ;w%Qhm8x(*;E4v^K8Pu9;KhD zhgNHYC;HlibPdYB9Cu{Lc&KDfz39xkfP^M;{~Qlg%|S4pnnNdEv0Y;A=4a9L+yOF+ zPUF&`Ma{8;dit$m)<(x>NU*flUIZ!j}E8G49%sk6R`4zQuo#)o7!D2=_2 zPc%FM*gOM=2w)jjH&9Q=ISClJvS^@0Q_^{bo|}T61hm^4@yv+dkNeiT{XL4BiyXjm z{uDYRrLe)0i=alZp<3s&2^YV1L^Kwgu$`Bbs!u_SmNS8@)X2kcDUET!DDLLS44s+o zx8;doyT71ng(NsmA&%{fNCr`YwPc~ubzzR8kTg-e;kxt2^`AeKZ8o(3Y`RN*a9WnS z00E2#0KVzNYH3s*K}kuJuQq%iB%AKVPU`S=h1%9#VQ-fucBj z9|lLuar5*U%%&i+Ha(wLmjm$W(`-~dqmvw5i4PaUTJ9jdE17OlZ~z5{*`gJ46^l1%6b8CzWJ|nC-gGB9*AT&;;sk6dBkGgqYM>OY5`7 zLiOx3bH-BD-H)GlV%h1*$sFk0&vkX=f8yUT-xL-yE&!yKf>-T}S8n(eE9z!R2LSXL z5M=-WI0Jx`0l=sV1H=sIkpKq(MZjKaivYxlop(gF`<(}!^0Xd0`3f2KzV)U%?ea~p znWLS?xfu{g+v2%@Hm|17%BL8G4XY`5%N4SkiGg+MEh+B^51t)_D);`l=?5cyt78gW zBqj|S4D1~!ViZ6uU?$-CE4F=)I^#5e(|xt1nJiHr0;s7?co|a;eWJfb4*&;eL1yb` zMdr~LKa;Yzvo~EiDe;=LKn2{QfO>9lk#WCcT?&*uz=iU35aYWA>Sdqazy%xz)X?JT z{tG{wY^!x;GCM__TKBy0Gyg;d^wn2auif_S&%Yf9x&t0GzK@yb=N*!D0Yan-h&*NY zd6;ac=6&|SU|!xhA%NV*?9s39Ki+@#mo|BpzwzeFU+0NfJANQQ!E{(14^G5L0J7bt za`bUegm;Q{Z>@_gRJMP=`p?=+5i7C0j_^NyXMH-}cK5HvdH=33^jiV%`52pj{=;t> zWwHCj-DP@5lfWfZEe1JDud`%M9v9e9ur%ymN`_9hLCEDYW>$Ev!55h3OF-tFfjXCM8|o3~5l5T3wNQ#`oIqCokNzSG%JNy6X}Xf>sF&pKs>K(KiHIEQ6gm6?Ai1mo9#;A|_5>zChr<5CtJ zyV6To8a|TkJ06}aPEDU@;u%MFts-(jgg@BFSsXw$)=N?3ySl16ER6`vC5>bc9H5HqH z-pJDKy%UEci7nY%r0TUUnD+pi^Ae!s&rRfS7jw5d`u9@ z&e+_D8F<)NIY+$fqjx*b(xjB!eEYO8$$%u|ftflo9g7cgGUXyS$Fz=1y_u+Jc?NGu z^rUtSxfYBBB!e{M5%|_@WFjdDr~wXu7ltp``8Sql1;}$%BE}dSthS5GqEfFSolQ-! zj92fevoD1Q&oJ$d)^jj10hXuh2Iw3AAis1XAhL>Bt{WZR@~Qsb^iD7u#*ckED#Hfm#ym;q^wk z#Pz%3`bvE~y1Zx?22k7MX4nc7cDGt;YK*;)JIijrd*~7)?{kCCo3R&)43`OX>p*y6 zt}ZMM@UD6W((Qm0A5P|gT;sVFn=`_oa%7gK-H>{bT~Q{Lhm7nZbFT{0`2^>w4$|QH zMEzr#U5n+N;dwNgo=uLQwWu_DgbA+>=JuOCPe7X4^M22?=krBBeKIj)aFzj;9@mz7 zSE%A=F83A~=uLQsY`UV1DgewWFiTQ~TC4>^I6+73dby2xu!Q>v=-g(*>aO~j>#u52 zoh*U+R+m9i9oL@M2yitLg4kz{9i7Et--kjxh3s{_ggtUrc<%9NYm<1=ot^02WntZ- z6lw8ty7=LXA&*{S7EkUsj6{U0$Xvcx>MrX1cHCw5JKku(7enhKc_8OPrIVG_TF(eb zU2D3irk;hBd2^;w<9^#Kgbf0u$)e(BCSZcP~0;0`@v#qda%HBS2cBuCNlKjaro>JL%0(8|X{>VZ<0199&*`Vq+W=Gbe2-uVx z^Uzq8NPjCe{`^^~Y2?%%KrKT3j)LM&1htw0-WiBC&bW>MF>f_Tohp_F|yXhq8@q@wP!U9gA2oP&ZF@v-1g&2 zb!Mk|=X1Be@rUp2x~4}F&Aq#XQo|zT_z)KqRd1ejf8Zs36f;yt`bO&VzB-u2OG7Cb zo%y|%y@#}Kc&H}>*~cONX4Ib_eJJJV=MSXCI0LG{k?sb%QatIisQ|{7-v zpZwsR_HvG1dn1XL!i!n_%p}(n<9oKe!Yv%1JX9qrHjc`nnB#w7*KrHPqiek`%u63z z)C5OE!-lP9c7o40h}Yu8;HcQ|#FI>64}UrZ676S2Na@x`MlFVI)PTe1C38$k029N1 z%DcLojs(GBsh-SWq7{G11w9F(Z$XK`@=7%kitt8GvRl_y*XlhfNP#jHaRV+u?gCVB zAOk)MC>MY<^xja7ALySL;k@7fvMB<|v2m)Su@8S3n(xa|%%c%a(_;wT*?B{B@t4xT zw3{f0l62$wpEt0>qu>9tKRBL+&+&F#iw=P*?3uHfbE69AYwKiFMnTtXuy4DP7E}df zfbzIyTciauw8>e82}l71=>Jx&-;m}k7Qii+XWD7&^@8WYRzxq!&l!aydQXqh9e|u zG*-#YY6i_YWe@oJ?>g^z#>G>~YcB z8U%)kTUTxwbo$CA8x6je-j_aQcl&!Of9AbUr0Wm=^{+$YvoRL)dR)lNMXr*4y6Ai? z%K9d?dNAL>bY@PQJr@9yi+F|<`uV>0r$B$Bbz@>3sjnbwz+ZsAe4ZUHcO%p4z`RZ9 zh(z65{Cv?piy>=2dGv37^B+Cd=bp!*u(?wK^H+_QjivkHnco!LRim9>hF_3eURSzZ z@%8#Qob0)mh*hR$fiGWwl%A~bST}$A+SQAEqJ^-U?n3NA)ZN+}4&4dAxMRE6p86c1 zLRuphbVP|SPdqpF%=Y>emio*?1pBGQ-R|K+_9g1YcK_-K7A{K7YK{!F*m_~eEjq5U zTi^|~joMZx8QXH9(&?c@-53zZNsH8FsL9qYRj-^zMbNW5nQi#)Tj-l6I%H+Hfb}w+ zcrSI2z?Xzi&-vG9C(O@rCr=gw*mMG-V7xu+p+QVMa(FI z#h;lfr<3NuzzCH?tC%m(tLk?i6Dhcz;wI1h5qE#4X2$~+O+p){eclhH+#mm;3Eqlg z{aFs}pE)gk1{t9gYBe`z=#IPYXnW9Ma-+4#N;A76Dnh3l+`5=J*{MTD ziCWxD^_$;S*!lD7$A^188(O-EQ@<#``1fS0n?coVDAngnG@O|072(-m z#JR%(HOsoVcYEkXH>tTg6r<8zV(N^MFZiZEE~O)j!{njsG2aR|_`Biyo8|CH@SKS) z9*Ec)CDPtPt3A(vz==NF5K|v}&#qK@;^K;1)x@65%E^H431kNSVt4Um`83e=S%bF0 z&oEd1q|m#aJwQT>qh4zJ$q&~rUafhXO3>2UxoETzw-dp;E}NjTz&2}Hx#kZK6b6o>BKnl#3;JM^Fh5=k-S#m zVff%R-&O&DHK`e_(mSScDH;)vZ9Xt)ItGhkjgWt{b@GYrMB6Jw?h`ARcI3Jr&P;H} zT||a@&RKh36>cWvoeoYR-Iej&F}+&{aa&NAv}C&hI`IM>p#DL$mA(0{sG%rQKX_ zTFHf61=X9Ccc~QwDBhx}^IfA6w-@VaBl$>dAq?w9aJ!M|+pYXHVYcVh(Xq59#Y_xr z4~{j1_RU2)>0n?}xv@3B{)7AgoPOu{6KR5WU=batIna^db89|--fdjXP%R74_ANtn z_tXyaauCzV@KK{Yr18btRJ;;1j}&Z`RUUwGH!+HvSO)<4!o)^rB!{35{lZ~#L`C%e zui30bqVF?@%Bb7gSG)}0GWuAT<>u7U(ZU>WolqtXKO|84x*`{ijNOO84BV~`yq3Rn zK?|P^Aa9g%yxZDno&lPCnX1y5CyU~b?4qkG=i>aSGG+)S1*Qyoq7e1;6n=^_V+q{^ zXx*x)1dWiaQbgp)M4OE{1#UX*vT7Z28q!DS^HhB}w!JQ=KAAaDmu9@a{0hcRAME}Z zwXmujzZmgK!O>V8Fp@cuHGN|Z`n0o{oSo|wj1rVwD8iS3X(gi%yEcjg3ln~KArcpk zB`rG+_4CJG^^RH5#pFKE;L~g%@V(L5?cwL||Dxww4q4c%wNH;{M_4~1Hr;*Y_^+lv z4~5xv*O%sRa>&nG$g$SDW5M^1Odt}YrCl}fmBsG8s`Ict*pi~0@$h6f{E_=8HQ^NY zCQGz0C2?*Qs!yd_6%5x+Zj;Bwu_Va>9&A}F0lXB*`_CFv=QP&#Ol=?F+CSni<)bfL z1Dr%%10mCc>Wq9+LxcWVz16|@*gfV%^13F_DBeT%xAYYB5=wD*;I32T74XIR`S}#e z_4AxuEv;sQVeC5MOym~Z}{tOBqW+ z6Yz2T8IGdEejo^dfAdFw@|*wfzxo%SV%o2?qVcyqTX}pKuzf;l6ov4v+9R_m{m1*d z7l4f6k4h%f?uVlO;Xp^E`-LlI-@6+vuzfSr?o>nFgUF+~=8P!J&!;ss3-HF*Z$v74 zi8pS!05(b*b1F4Wg<>5INICN>#qM|T@SQvFKYo;15_6hdJAdcj-~G{#J|g?=uP*1{ zZ!T9sml>2_eA>G?;10@uUt|~S20TC4ANAdi^b+E}RahI55#1@np1!hut|8U&D-dAzd}keZOS zV{lp0k0U;bXbZDadm4WuWlvAX1;)7u1{!)<(Y_i~KaD1d#td0P>2^=w4$Uo1rowG$ zGU^P=nWP|eSsf!jg|l>Zy|BM^G?y75W^RU_c`ZDtY){`WeDC-`b+QT9lcW%agEd3-Oy9O?MI)|kpmLI?K z-hFj607?5-T1y zv+jgz&pipB54FumQ;#R6$NurtVRuDVj6{BVlnpwEPDsI@N!G8>L?PU)27XH0DFy8+ z1^aQB94&PC8G2YB=R$tXaW#6@V(uO@Slf{?Y#{*e)tu=W7M)90<8mBY{JeiKiK)!<HqRGcz_liyDn5eJp$xEmXWoQw)?khiGUnYjuEfd=M-9s^sV*8h;Gc zVge0Ki{dC6TZ2M!bKjJ~-lBwdUy5MQPa+F%0p~?L#KqeE`{u}u=FP`?B3qUi-tQVf z52=#N2#`=2$+-0(>nV&cfljr8?b^acn=QF)jF6%RK4N*GdQt$g^FKAxK9S&gbUL^U z>o5lIX_Qx+OvdCOHJVqzevGlkqv5HxoNyVgfpJPUKyN-NS7$Gjot%MkhnwBUh^4;l*(D0oB;t&5A;9;@S7TbSs;B?edl| zDW6ofpF^D2_@wni?U-(p2ts?}8lWI$?~rla{UbKNY~tr^dC{IiTCRe$#tYty=bXQK zaGwpWjMbjW;ptFOJF_EHy5Df5w73GK#Xi`#YKy@nyBK$2kzME6-rbJZTOE#oENTH# zLQ<=0ikPTiyc2)|I58A>F3q?LXZ013tTml#+~c;}n0r+n4{^V3ql_i>RuR85ZFi!m z`LuI-Yi*%=K9YGfK1l2=mo`qb)5sM?JsHgQe4x9vbz(IcqaE<$9XUY}Vmm12j$M^x zCqS>c>|qTY2Emw1o>pkd7<#w~H@T#Jj^JfcdHCRL5pl*LrrVgV(rb0;;Q;VvU)7vX z1b<>_Fr#L27iLj7%OUs9rhvn`f^4WSltmg_+-EhnFF;M|_I$rJo%>?9^2EVGb@T=T zj23}l>P9d=S1CG;(nxq>Y3X-7nfdWx#6=g?IQSZzSlD=T<2I+90WbtVM;uqHe=0;{ zen8|BXlM|b42`p>=SoMNfQ4_1Jpk_pK07_UzZ8De|GCiWhy(izwRAOadPat#BVNK_ zX5JqA>~1=F`e~5l4 zRK!-vMnVCQnVRxAFoI%NXhI4*iKd5+clU?(-fYL$G~G(tj=AQWrRLJ6+LBi@9hXI(rUO}ebA;!dSqU$pN!`ysmEWvNjmmeGe{N~!^_+*-hq0P3L#e!w<-~QWw z^nd@WAHG6J?9iv*foE4sI8r zk;ykdj5Fc)o5Yq{>5LH_A%(WEWrb?PGi3f{=IkK((>JeOzQUxljYhG@i%S&u@SW`+ z{iKqcJZ@SyB|qrYYshAHLoGIsl{RByUY4wTFn-V;19=xPp`>sRkN`IYBjb2y(4NNr z!ppZ+SA6M}k^#&KNi!-nPm9N0MYZ(jVaA@eD0V%cJom^m)qfJwQq3_Q*Uw8q2oGxbs6`92Jtw;cU+Ao>s2p%)EsFXod!01!OOawgG>6sr* zRMj)pjBhFP8mJG>c)avvQ+evYpi<^lM7C6)}UTfmHl8M{%yXM~Ps@oP2F5Kum z2(7I5iKb8E&ZlC0>IIGNZcv;HcA#-E)QNz8p-U$gT7|X&k5)2E+b#5xzFSM>>na8= zd}!?WJmz&aR)hC!3>mKn)A!&i%upmsHyY(P;&1<`b=MWCN9#$HDetPANzh-Oo!KC} zJe~GEst!gOTOoZt@yv0$@yvCwp`OO>7CJ)adz~LMYWRg#86A3-!|R-1vXCLnNID3Q zpi4iM`mHbB1QI`MiU6|nM}PN+oz9={`~m(EGBGcpSgR?tJPAs6#a~t7a@B|~ z$QzdTT$$JN&%NVv=p;~EsSg{z`sHOyI`MTD54^@r zqgIYC(ECyc>2{<1g$a5NJ3dfUy|VD}OM!Hu2VjA6ayNI3LjF8k;iT**qsbLrEnSa# zk7ob^JUz3n&NXBo4ZjJnLa5WAo?qdI5Rl1g-m7#J;U|(beMZp7| z$*Bt;Od&7oTPJ(RqQ<(!63(vq03Jm9| zQOkzQO>k?`8t)*Hfbl3GXZCm^72iL3#?8l4_0iNrGVW>RE)ev4+WlGbIyEYZe%Kti zlLTrTjptL;O(a|r;V@;5$$Owu$v}9?UJY1>lo)7dhSW3sEZTr%mPydy$(TwYa4R!_ z2YGnV4|sG>AzOy<0snzJvAi%n6oI*hqtggnXYl0~){9kr;(B@bf(Wq=0WNhi_vCI4 zJB@dpZU7TwN%ycco~%#d-K^|73NKpIO30&4ER7miITCGzz4?9p`3lYv2%$D0$QTyp zRGDcA^>cyG`&6`dxWIZ&1<%m3>3}Wk6#@g? z#Z=kP^XeGNW|uQVv$rrh9JE6Z%CmG5N0~{xBOn1EcGMik$$5wA2V6oe8)H8ny8&8V zE)ZIz!L_>WwPW@aamiIT@>E6Rr8Fqu~}`H$6dRA&aK9K zKt0Rxrh)X8dZ{<2Tv5-a1DzX;qz&QA1M<1IXJG+)l%O*G38-_sj_Hd_uA!D-(-U_M z-P-_%-4;I)m?q-l@ork0*7>js^Z>ytW$Bw|BucxiXxctdn|EN*zLnm(z}t7i{J zKhRTOI*i5tmM53bp6X9LXo~7YyUC;4?)0LcSG&DRE4M&8?8Sl5ulBY#qMJ|pO2lEh z{(1Y^dq_5yb~lm@|1>)|_%xlTosl+>v8r2PKOJNGAv0uNX zxf=;2PjjG{7T*ThF&V_xOrbot8XrB++sTn^H{eN!lk==7<4*7?M-J2O|m#f$kPRUuo) z`22=B8C+1r_mMXMtbdI#p4nOs<{IU`vX)svxf_4^avlvDbk54?lZqyuH#c~u&ep(s zW~^dvwqWym(hPnAw&*Atu$^j`Rhw+`!i4VzD{8~LCdNNaV&TeD+eF7%6gb}b?f)|ppvymF$8o0q!LScV9*c`ROZp(>nH(EaUD7HdQPALWg2c%c1FxhwvY3@)g z^8_~*f}W2?P%*+^x8-tHc;N6eB*N zaK6l3ZorBq&d}J%FNBRf!SJDz6%_9~qfSc-St}}hF0!<%c-H6IsxKT_y>Rbohr-ZJ zd7)&#pB^o>^@_bQGi^W>623W#d1{RUJ#A8_%Q?Uj7$8+}T3+?f9)os)4K6;F*#&Cs zw4Oco!+?aN%bdR$8pXt5mifouc>U$vPp9BHAZ%9v$P^CWc~{tb_s8eI|Lso#FgvPD z{m49qjnGTsSR021NzX!E@EYJWhJgw*O&q#UT_sWOq+=Tp_soO>2lARV90j%6$OVK3 z$w!Nw#pHSBL9#hYsyWb{9ZDjwFbxatf-4-%4VA}_Kwyixs?{IYnG5qW??PG?&WxD# z0qhE^8o)u20X4+73%KCNh3JSn3jZYH(8GgOGC!843zq*~2_Q3zxr4KA45c3(MZ-{M zZh2_+ZUhF~C7%!XTOgw92Z!g8^&9~!oxVyFPEU@p3#X-{JbYWqe*YBmB&;UeS%$W7 zdl#tTRTYL7P5yK%6FdKYPL6)p&t-5GZ;6-yL(!-*Yc>NQI~-RmYMtRDw)Idz@(qJZ zU#YqGw0<8TbKU|wBcF2X7iSL;Ys+eR`l;il2Z)D<@_`TCGaWioyja6ecn^tpng@x? zH~cT!${e(hgMqqk>y6e$)4%-%U+jPsgn{W90Cu`co%y>NB0^o*KP`K?-H7Y*K6t)W z!&^RY2UEhddur5TXdfD0!j-2*8#CTQ_;*7rG3zLs3FBib7HWFO>c|aEdOYo^*Bgqq zL4$9NYFum<+uMw zBXReeOTgjp>W9(`;DdJel&#GbG-II;Jq7x4fX;fyBRSCX)~FFZ3CE`x=!3NzasEel zj}ixy85IPCH*U*?Cy)UwLSx)==6iOYyKs# zP!rre)J-nd`VS|GKtfO^FX85+Jqw8$E(`(tvk67xP)p=r62+NEtYP3}-~usG<)}Q@ zDKj^<{q7bdY%6^q6B#IEhJJN9R+yC@-gB=qdP5okhBL`n3li0E_X%zVZ){T>elTmnKGadKU%r_ z^o*ZBIy^CxUUZK?iLOx**L8M~S&DcdxEFr;tKKcj96Q!ohrs#1Awzx8PG=pZrii$I zZF=-=#v2(1t{T8W%I^uE1$QsBL#;{U^l{0A!X>(WJ9dLw^sMRia+~U#PJnO_`g|nM z80xEI^N@%vksp3y$8YzOXWp9IlkW76

b7<~WzEwxLp&z;RungLz`#RY#s551=@3 z&^>Fw{xv%4#jHWZ<)89=P#q$$Ja7hXC|c6h5DAPE(8@&K)g=ExRA-$yl-}!?ynWl8 zubb8N6XR5LOLM$G1cY!^YonwkJqUaus62I3Hdelig7Sx=)>Np%>F$?Do1dZIO?w^p>wB5i8vsB_5=7J8runyi|Ns2Kul|2M9AHj>DF6UT0H^}4$eaS4d$T!s zr{D>vYn|52b^}re2m)01O_}+KmOK(OA*XOKE|pbZit?#O|1w|t1AN?%{X!n+AK z)h)qO5OUN@jb*@1`6A(rd0w%^NS$eL;SPB~Oqbp5N;8UX>CU#>@Q+r!1Kfyr zNZa4>0`sd(tv?+0E+@Wp@oS=Dq#pISjhouj-hr`u`jbzNrbr7gaw{r1n}_sjIc#N& z^hxhUB)Jhima9_%w}COxVixCUwlR8~=Pzqkr%W zA9ZGPpwb(2B6$!etpEdbfQ6Xc>@LKwrv_2^b$UDujhW#RSdEww5L*L1##osW7onybQL4{u0@Y_=mQKyq>RKaa$(r6`oi)L~bIB~rQcP{t0&hQ!8+r?8p z@ewZ}^mwC~J6UPXL7e^!60fm=S(E0l(F+7{fnDw-)JvCf*#%|It`xo)9*`$%wM)HF z1!$|LCidxUqTj3X)_8XGQgVI=5;FR%v|~WIu>bpT?G2;`T9*gfvx;wFRy?9-sjH#R zUIU3ZIRQkvM%_)J?!2Y9)&_VRZPQur>0_Z4aldAtijf;&wO*C#gl5(epFKCNet&o+ zIh?pQ74rW6Ebs*l@55&!d85dbdhc~oSE~I$66#{&k@mm5apNn0(Z3eEN|A5j z(HdIy3_iYh^v*j!sT6;s3i|@P_yL8Y7ug&1ijB80fWec&*7Lhb|CJpS=CSSgQ{x9l z>~%%7$o+kCgL98okCOGQ>}|^e6|muPTMFc2wyS(BcG%#x$@u=z^Ty31y~7P!3Bb26 z=}8vW%jUU_E-h61?Vo}@pStsH<}~*qu;3om)mV$5V1LtP`ZCVh6pKhw-%Qfu$(Y{_ zd)}1weO>F?3$Krl&!ZztUbT>O%?ZcDpE}^nA!+~qQxB77W~k%_eEAT*4fF}0d!`hG zNr*SdwQ1vcY9@W?MDR>KAS4f`FH5a`C~dpq4Za_N-GtdJj=_%Gc#XCs2|6dVY?YhE z5PWq0eCp5mBjBhQr2Mcn>X<%dJPEC-f#KFC)KOBKb_N!~Q7mjLHl~dI*4OIha2?QQcflmAQD#{79m4<%gzv^orlt?dO+jhLU zQMDv9-buN+y455F56%yOL)QdY$m5UpGhR0%nqyqE7CI%5a$u11!c&gp0(0|t4))Q-Iv4WqW7pclmM`vl>cLeJ%Ef`K7lkV%Qd-D^C4L1qpdF`H^(bdj=r0_@uv*dPBRi z90E(mglq`U)tmXyAj|j8&dIfyjfnJqq?zm6gQ(wyj6D3g2K$5*309)44ZMvP4&BEH~8r%A-ebEnfMg0qAZppBy7C z{yDmMVK2df9G_8|7!Cqt%QD?y6s$tLb6RNPT&IiAOGZ!|K%_IUjPOIoRt%j z6kV+hBNMw7k7Vmg%@3^inP;30h;WFij?FLv1E!?dD+U0_t~!ZNW&{BAm;izH01&zI z>UD4b(qkM8XJ7w2-*8q{Q}eN%T*K;d%wRUJ8V9r;9aVu_px5Ggp_F1 z$TbCiH_+ItTEqSA=^y?$+WDdIN|y`KfeM6RFh-`RLOU1M;@znJmBufN)S^uZ%eyy5^RNc{JYp6vg@zh{sj zo}Y9NGQg&1g&G)7lBX;CoCQY&JaU0ilOc4XXNsK;Iim1r!-b#B%mR>T6wR>ze?w=(-|+FEU`T z4maId9yq@$M4CI3Ps|dU2w~udkNRo0iGcBb+(S=}d3CkHeG4qKti|imRz-+yj)X{> zXpkrEkspW5aY<$*%Ciaus7~!LE`$vJfU-%eePh(zCt{IHEbyr=F9C8d6GM*&;vEJ? z@3ubta(-!~6)}I_U9~>;3}1!O-pp2oS;+4{y4!N!a!+Q)RGGoi$gAT?dmJ6DZX@kr zWdBneL(q(;U92TeJU@?f^z?)J4*XucFj#>KHMH1JgNs#$z4g(#VGwg9|r5QLvu z?mazcmhgU*bv^uKnn=xQFAhsSDs1_@TQ%ajXSU$z6ETRhjTxs7s_PM7o6-7gYwAQi z(;;*gsd;rT$t_F56HQq0ENf7&1(Af@r;8@>IhQ4g@+7LCcTc>^+^jlykdC~;V^hnf z-G0fKu+6C!gfjyL3~oZq`;FA?NSPcqH4Zw<#x6FnA?kMpcc%rgq10tna(zE>kngM0 z4(al5v0UjzS%`_=wAEH7{f|=m1>UF0A)xN@-)^23D(Pqh9}LdUr94;{=jwEowwsCjf28<-#sSVQcb#rQA7>G;0T^wf z5O6QYTjWS8d0CpzxIW!3$Koh^FgJ?tKwUSV+whdOCyVCUF&l#0Fncp)ZchlLi!1h2 z!`Q`v)k;^Qhu+>R$<>`60*~IUX|L#4?p~O3wF9`t(d)dbJ~{4rF2PrZpkA6Aq@bo` zde=$(e0D;QgRZGF!dO}CFQpj=EH!|+z%kk4Yq$|h^k#=kdRsB`q%&5q1f;acU=z_l zH|G11;X%C^NUoxNTJG}tbP#@NyP$hQRgi4hr-?ys03&Owt&_fcZO(J_tVzJ8$QdB- zMtvqq2GZNbNlR=o0G(bLk~()hY)yyWbzs;-1Mt-WdP5#zG!?rSo}~BT&O9>F(14eOe+Y~&9(0UfPOCpWj9gvnzVF8|#2%G~ zf+xsV{E_7LCS3MFQt?faR+5M7m$p+|Kk{to!2obr+wk5M|1rq4Ab()ApS9wHG@bNkXiQXF#!V4dMv`gattQ2b_ZK8;aulE}zEw!Z{m&223354xXnZ)%i zIJ?W(QN~zS)2$Fp^&oe&1-63+d+#@~oIa>(ToGuwek_}VyRFx+AT~4STLn{BZZ~xO z3;4$Gefyuk^Pj&f>Hz8oVC16JY>TU1e8~A)LkGc3E2%Dw_!CoOkgka2$!>i{*&&1J z=m>zc2s5pE>M<>-yPKR1e&gboM@d(fe|*)v!~fF!$TSm?uL+aJaAZyW5=C&bx9IK` z^hrQ{L7%-+xIB=*)pHaWOzrfM0p84?5D!wv!%@4JQ{Z>M*2RzIA^+9j@`b$Y?eFcr zG+3#TLB_0qZ~exCQ73mQrc#gY1Cd8*fW-aB87GV#P-*}RdViDV;u5e}m~fQalFxyyHF$1}sbKz%9%{(fcWw|IYSECT3>130!r(yB{JbR#A1 zF2_*4+32u)X%#(gLA+zh=?jE79N;^Ab&12QDcti7%nC8;k{-$yI`#RjwI;<6xxqkoC|cRrT!aBnWGgs4ET3W>ru8nNQ74C=OFwz<9&0Hjg4MK`8hE zRwbFlpjz{C)YAi3bX9RfkmovpzUZl*tKD*!jy)~=mR7ox?Xhz$`DK~h<&`IH=-T#B zPWAPJz(C&-+mP}#37d*Oc<0D_-d3JZ6SRcs!5?}LZG`R(s2 z&;G;Vu9g7+H^4W)7|0)Z=F;qw3x1^W zhlqUUm$o)!kLPtn1Tel(6FxehS?y0teM8?P3HYu%xH=eBs+TzBLE}0WStGC?rjV%4 zxKZWfoi>;7psI}bq!Krqk1n3oj^_iw(xq>t+Nx!!Kah{{V}XUJlI>Q#N7>Fo@dk6f zkJP}CLYQ(NfFYAcJKfrbFI!S(VMG%7h-K8w^GIgN>`x_zM>_)4bl2SlAd;p|9qN|h zmS@FrBBeLmf&uWFJVgPj=T`xQnnO{pJ(MY9q-*?K!qntqFB{~vuCI>GL`F~3KfikE zTi^cEavD$qNNl_=wmY_9$my9M-0+k>D@R?v2yIrUR?a_r06FXC?0g#3{VZXhvl>lz z*x@=ev`&}7>$R1uOZLj*TGuK~C-o1_4?0tX4e7o|^l9AJo;DW`S@zJe0J91BZ*ysFn^voV-X8vQh z1H&Vw4(crA52NA;!t`1|Ah-B-53r;=A2S9&5mgzPc6Ce%% zO&9Jc5r8Z6GzE^dmN&H4h!%hY001!nytMY^eqZ?9^63(~hW+9e7l!u<(zHF(gBBVZ zQ4fJbj|C5urepajVp74x7fK8tg?aA-MM;hKW^ujX@&R!_%Co?`-x>^=D9jgFXe)7! za=`JyYhh6+Oqzt&^{1|eo^=`kPzzTs4c15SaJV|oJSB)QaK=iO9fXGTQ0f93D?sxQ z(Bl62*|;_{UAfislU^q74vJ`}bvn)7=1SXGRv1*m#-fio_6o=#bVHu+zV`B+9ArL+ z#0@isHOGCMnXDk@xc8IO#-#Y+@MtYD8%_LihCH>$A<8$Rdj{)B`@zLk_Xm}7Hm(&_ zW_7P+>q|qR8Da<4^p1d546kEoHa(VgmTqgz?bx31)!faA;b>aq3qQ9y;;L;a^xD^! zUETwseWVL$0l)fZjL!AC@DZdMO**@3UD zRHIx+Y6=zqykD9nu1fE2+h2y3&Xb=iVwlA>^XybfZ*ZJ5l~&J$!?|{O=Ff$t8Nf7k z?SgrHJVi-71oH}tdQY3JRS!VS&*w_w9!nhq+jc2KcSNg(F264cV2UUJ8#iQWKLqJx!x&X%5E?cXQXA#u>FwcCvH$Fl4#eX?p~9~MB0D1D zm*Bkut`$+2S0pdgOZ!Rp^?vyD!oG9{)077jwP1`o@5^+>B`=Gy(fX-b%TkF_q8W$V zm3CxBIXM8Zl#qA$&IlH;MezCWp>M1WKWOF%px+)1*TXw|#M`OHcuG7H;#US$?d=Qx za!+R4Bgmg}v`{($mpU_-REu$;oW?fVz zr)^FKtR23u-*%*>Ja$G&01y^ri^-rEBdG5e#EVj|TE994&y;i`M zhIF#OvO8^hSA$-7FjIm@h3eG{O!p^#W2Ip5eL}dSlV$Lr6H2a*ODDeNY_~MEZ>Ufi(wLXzdnr~$R|PPhZQWStnr)c!dXSs8nZ zde1$Pb`lg|xDtxB9)f=`3nnmNay$qGOazYULJ3u;EmtyZ_Pmr8PFn93f=RzMhQ=w! z9P}*+jgpI6za}H>g8!si6e#w}+4OOAp45S)cj%M)?TH{8p$H*2+3Pm6O{UY80|6k^ z%YI94G{zw9jRwI&Z@TIm)}~_Hx*R5s+g?8120OIm(L=PYPy9h&zpN^zgBS`yOxik^ zg&Ngah$O`5L@{FisjBn_A?ntQ277j8k#_|LU@=a$wT3U&rcX9X%|02!0|0xflw(!^ zjAr1&ZF7+pT{^5K*se#yWraZ9y2t;Qu3ntg2VY+Jl0tT$LQVa>`*%h6pl}8vHJ}X= zGFPVQrEYub3+>@hAXHsJq6h6JRf<(|KDShZ3~bKo^!*+M(r$mlH>h~+VQ=RVNI`JB zkH(|=IoG=iku8p2vWJDbTmj!SnWJ9>@(+ejaUV^sz`OMrS!gEVtlB=evxOlIK*Pm| zOX-abUSlB6lD(s35tb-%ikWhYXpvqbSh-#U+B#`Q+)Z*sVgNb}0`{bD#6n1|E%8C0 zn}>_lo|w+~h(_F%9+g`j4>D~CQD=5M12S~9N*bMv;XWnY{l10ft1cRtJ&Jhzq|2O7P|Zxs^o{N;@et_c0!x#pE?N7IT+&rcFBbVQeWaLAN`O{W*g$zK%T z485)PyrSQRx=jw_Z~fHzi-`2f)3>*4dypt?el9Q(XH!A>yz2XmB2S)|6h9daQ` zJ!_So%r0GT{|Wo*y3ngop2g%@nJ_T)ln(YgO#{dgA)XBPb;`sK}6p{UCj z``**i1rjGL$ZUAE?4ix zo@TTp*CSNq)LwRP?B!~K!XT6EJ#)>a45ow%2`v{hv2A1$M>?}hHgKP|b`kSJb$u__ z>6rJ|I%}}E^TNFFO`N!IeHarG?$)TC#m4TXCHongid%iiqGqjE{rM8)1~q9atCn9Z z+m5xn|Ge6}_rp^rhSfT!I;WZTm;XY3q2$)T_2K^nJohtUAl(L=2@hZn-GJtn!Kd*Q zetL|Ox~&6OIQHW%AC2nGQ3v(9LKr`5N-76%H!bN2JSD_X2LKm0FM!%?F092mp1Xf{ zZsl}|a85W=o))?qp0jrWB+$}mwM28_AxTB(9)EwzBw>3x7B4vHN6Kej&GhhdPxX8A z^4fsh2FTI)EHeo)He8`oc4VKaM_PsrhX-MHWH*3WZ%xP@q*1b3VPa-PN}RfaG*;Iq zl#5mf0}=sH6s(*=pLi%R!eU1+uG;ETf(T)_8qE^bvo4?H#(FC?V0=1oqPTs2uO_hE z#dO^LfRW~cN!qO}TuA{Xy07}!@rO^R!lOUBOUuh9eS(t^=2f*>L|nz_ACp!_lH`ea zI`ZVyQJV`y-1sn6H}0;+ffmd&`bFpTI0z8bBBY$Zw6vrj4D8R5+&0VSlErq z81gcm=_L-m_^zN%23ES$MG>*LX%9}iIGbue>)Z-iz@~d53$TILW)#}Cuza&#miZ=} zbm32AI-Lr(fm&?hk)0-lc&1};iXGr;I-H_+pPKIUIM4zcl$7;dvFAwZ;69}C`=!Ag z>J40besVBJk*9!~7jjTgWRUjc2!mi0@pt91eJqstv67Db!jpyVbRU%Ll}xeF)iE{- zhz6)G@<;ma+EYgH&<7)&RXnu+DG~^;)_s#@pjs*t0O?t9DFEYds4cs-iY`xL!z$_j zT)TgWS&q$MhiT{+Bv>5DpC-D=>6<%BlmBS)@)o*no`utFL&815U}$_?Jq**yz~ISL zxGX0QfuYO6^Cw0%)Alw!N~k)n>ZK z?aZ3NVs`-nB0y3K=>>{l1RwMwi3}W4B)Hz@O&BHH#@mIpt7h0GK@gA>-p;Bit&h4P;nO870`5)2s8mueLdwFtKccyYPUWn=r2+rU=2Ez>~#%B@Uqp(o2ri zmtrLn=`sl~B2=D49+pIu?aRc!_vGT!`1_;q!4E**nzS2mcx@#;vl>q=K8d^c z#laQE9gLUm#iU3)dFh_z%jE+>e-3ZX-2w-JP#m3k-j8 zQ%^lf4!k& zD4cC65W9wkSN;97cI!m1 z$1tIIK}aFCTL`mt`((Y5g2Is_cg)i(O05Z%AkzS9;{D~~vu!s5pEuqxtlFAjGstIW zC|A}{$BX(wh%j4oH3r_r=6~vxa%M3awVD>;PR82>55-MWtF;fed&Pd0k0IX(dfJ*L zZ4`9^ZgdvMSEjwi_`RWhnvz9l&DT#Y2+vxKWjj)E0tfSkHd6?d7d8^s!2sDc6gjsU zjxNl{>s*MTy!D-X99w$|?Z4=#Qxl_KM-S1T0^{}$HyA1SFd<%hmclEGIgRXcH^{sEQk+F0 zZ}hY=zIN|>V)*9&BXdMt`OiEt4@-*>CfPZ3%cM`d2o_d`lAajj>RWKcJZgL#8*f6l z!r^ZnVZF&isM}we1zWz&M(<{fX9j1c<@yO1do!lJUritR(-oOo*}PZ%eL*}SZ4Jjt z^l*P%t0Ff>R9qy;ooV|JqlaE5fVAAb!}Y`McEX|-d}4|(HbblO%l(drjL{v;tfjhN z&FzX8#3D;vP{>Ei)Annn9t?jJHU8S7@AUg}KDneq7o9o02FQi)Y4VxV3t&8+9Vmgl zcMjMWsd@ROe)wYueG15tn(hQZLr}mQ=ZGM%*U`5_$zFj3N!euC0)=X z>n~6LkdiD((^Rneqc{Q57xND$)%Y4I`yg?z|IeGnL?pyX7P$DRb2v)AffivS%Q z5!eWj@QgC00M!iwLqyhA61ByU93l91=t2C03;G}qR=4wNBIXCk?NhSpIm>F$E6QFc zGo7VXO_mW_0Zz#Lsx?4Ym08COUGmSsteQf;majA6AyBjx)DqVv>8ouzKV7DSFSGVy zG>>>#aF&dd`_H?XFd~HE>x=C!bEFV7Ct6cavY-&XM$@w0A<9v>MIVhi?mHW?I^A5D z$>cm4wOci7QnZQ^);uM`*8|tE*|S$E%Rcd|;UE;4_n^?mZW*DfXr%D{ao#O5KA1u5 z?s2V`L3^Kcyj$nOza@V2KrvoTX*qltX0n1K0oxDfmq`sOv;tG?+mR46uPf9`Jg|b` zmqyT|!zJNcFJ^LAi2}>KegEx2>|S+g(_RT7L3UZ{V7KnpXW+M_C$&Lrw$humcswks zi=c}w%9qlp8s&7ZjegTch}9P;ycR>;R#RV3O&c_HbfQOLm-FK0ijnBN)g2w95rgo~ zwK>}zBIWDJz?;rAEOy+t_{5uVp%OcpFP3?_W9z*faciVCfNOv2ranNr`)4GPWR~c! zU$m}u@jy{uWL%hC>pclvs}9DmY!GwZt%P@|Md{P|yDaiK$wcE#Q@t%mUPUe`ohP~` zpN!}O?b!xww3UEky^Y`aZ1VIq#pZ`YUh;1}DE!TSxET8P`ONpuykX?Vb?56LRV?0Y zDIhQO6cjF*v8(9EIbktN1Wh z6VKY9_NqID+-TCQHKimMm=^>BQc2AYH|^slbzTbM%atEz0C7I(Ky zp|k|*-#Sui81VMs#()@i{?kT&8w(`Az3Mq}Xb&aedEsX0O>1-mO|{~;Ex8^KLQa2Y z_-=R2GV;pcn@c7pMfAR)@yCLNM6y&xnQBIQ;$+e6A^U_rOJH|P+*4vGwIfCI%_UfW zJI7uAYN+x~=WL3sg?&wry+L1P+c1ij!kc{jm4y6Z><9U4Uz^DLmmkl2v2XZHqTv5H zCbTzK{{Gtkg>!I#VgOv3&8h;6-S)ws{EGYP(a)ZJ@o&7i_ffg`c)+fk#d&h466i{y zxj0{jn1?Z(VvNef2}C28_Q_iI*6{TUI?8%h@C2&f7WK=>&j#-5Y?%H1U8{clS`FqR zi7mTdn}-(BIwF@ZwtD5)bftM-yPcdtL6bE8NOHdsnVc|b@e?_{yN>jiPwr|8YJesM*KcHwmK8+k1ed=sCP2go@!PNfUc^WJOnE;|&O;E1P) z+07jw*4Cg{_f1CuL4L0uY?f=KdO$4Uk~p8>@gnQVwW+_I4ohgH+J@dlE_r|JRQNU) zSq()rp`lObQ{(WWns5oVs0ntsFyOr9-;0I}eE1u7Ux%@r}%42|e#x@zogUY0#hO>+;;RQ3S;%zO?rJA{f4IdgH@=4_*u-b`Fg{ z8b0?;(ut(*M@FYtQKg7Gl;KM$PZeNIqv%*o9p77`j-`6_6q|(+t*=2KHikL;S|S#G z+&#L^Sug!rf_I7Ljp^n>liBZUsFZk)9q}y^w>RNyJ@K^nru1@E!zt8e}0Q=P7V^89ghoan`09}kA<@!CM?Zrq5sUSLpe zG^v)ggL-ylJJ$=OKd59C_-2s&*}wtm=!1cD^pEb}m2o_0uXjV=wwg=g{AD4(Cv|o) z=P9|CnCkoKvjkF}TYcv;Xx((*ZQ?!F_gUXYBx11}HS{i~E9Q9@TmSB&bGfq_WQNt} zhC~W~`Ue7XO?gA!Q^b-D&8x*PsW`k@b*Z#ig448{gxjX6YwM) z&z5bkj-tIOo5mjwW+bPVMxVVhX=m2cXF|)DGp6OzEI`9DsLR;(rIN$1tkHYd{+m1R%IU)UmyI!N zh-P9seo?O>iGtK+&TQePw-`r=lrZcta+=Iti6_AWM!p55JESNni5y~xLCk7TBP9jb zhqE_rr5detU?{vh+8yc^^%hGX%u<5+2#m|tc#;7()g6ROPPgLYkbgq3& zWIh<$)O_lI9=U&0{Z7YZOUnxUx7^FO&>Z9}q&Ps(jBlgtRfpAg+s@Z|kyx6ObXXy> zRZoU#+xwHbcoSrKdeHTflzX}9WX6b|8Vx|V;j+kUaQHGFaR1;~()DX-b zcdFmneIckYQ0_(l`si05scXYn-{Q^I5vRWKBIOhpd`P?TONd!74WlRQxAnOcl5KEr z85!w2yiP6s&hM6QetU5tz4{Yk<92R6QChw6N0|rnEQpXv2bg_8@JGilW8s;<+|#t5B0(~>?~;KGg@pv zcadoVI6iwuknf*wsPxaD@xS{+F#77^&F)%ZH0r?~GsKD%=V;m-SJC@3*dzirT~4czW?@6#m|A@sq3d z(tJ8uW5S;iWbnjDZ$iDJ`C?^dw=TTNX~449l626zISL=zZA{&q8`Dv=3u5|rm1w80 z%QOtRV!M)tUXQ9bc=t5vQSIQM(FaMVm?h>SZqM8L6kp1&X%pAnHA1zzv9gkQpp-eW4XXBBI+%N$udTSQ$2`(PvW8QLjEm|S~kbh0m!lZxgG zQ8!-0j1?+v3^T{A+!L9e?#An3;YNWwV{__;;n!w15y3^w=JBUrUAiKgikL&7{Yz-3sVsY(=`3 zE~QFeo%Eak{_;;>{!8~_1mbk46d8W`hO;pwLO-D|>2rc3%%$|^H;xPN)>gy0n=k)L z1vL~eBFk}CKxx|#SC=E!5nZUHmMz!YsQ0u|4>8^!$c>Gi>&TC( zKREJaz2UPJ@z=@l&HkA1h+%X3_O)zJb+94^y;g*1HIIs7j|5u-$@Waml@AJW%AR58 zPAHl*V$I+%H1)_^Vf0`MC){|lYP{(7b*PT~Qm-YCJ{o-VnuNHIxMg}JeCO2qCS#XW zRoe}iEzF&O(dK{&-h=EX3Vdg}F*_rgI*ITmSaBPQ{>i$rX%W+Q5 zOJE^H;}-CcuFMv!2&_%h5nCXxIgcCDC>NqS-Lwo{d@oA;6pKgey1R$BQbX|LqE-XjvU^<_|d)o9`Vy)hhAns&v$n27kcDuj-43h*MMSRk2(S5CJ zzVK$R{@Xtm#d$Fob`*0egdk!_nJEeM*Z?^tT`&!CAs-XX8B^TV+7mCr1^rW|$-1yS zgCo`OygmUIMTJ-h2~iwERLZ&xMb7OhXD_YCNA_$c)r#I)v-WbEPtK8rmteTgMDk;t z#42DACUPjQwxhH;@GJV&X+B^X*p682nC)=Zx15wcwHHfz_|%vrsJx3C36eT)o1DKy zB`zz3+&NxhD&4UOQ#LvurDEtaJGg5RGSly9k)V$ZXVRUdHFH<$i3vHvOOurHqa&*s z0;}2T>k%x$I0~W|J}Frt zuvxCjukLY3TL|n^M=8}PyHN6$$bK0d`X0Y=h`nDzQFB!&FkMvL^xFKB4a=X2T zOHaQD-*$S)&TMQ}==QV=R%CX11Ll7LA5tQ-VQ-8jz?D03@AYB!=GLf%Ocp&H>pdA* z@6wy>y2v}NbLDOXQgBALfYRrn!$;3c!S$?~Z|zNPze!&N(dLPL_a7gAHG+1mKvawj z<4(`Y=anwE>xs4U1eZR#Ok9O87}hDk{O6s0Z~5`S%#$f)`%`xWL$kEyH^+~0h;Bn8-_OPzrSM<1vd_2phQ8zyvF8=N= zbBxAaZ48v_>rV3|G)<_H6Vi$X4f$hnnjFTk{)ll5iRQGUGS?bBN)O-TJ;RL_pcpsVZUI);c*Qxe{L~38{ z?J2c$yu;q zrGGL7R(yD?r%Goli<@LJuF1`E;`sH^-N3dGn%rR6K`k;!udq1O^W;I^x?y63*X6n@MNw-w3awFE)X>F`DWI_%gspwo-MaeglS<#4i0 z$A0~@e=F3^zj!@d%HLdYvt%>s>=8^)c!_C>PTtCeu1;0TP$}nJ%*N0f}W zs#lErST#~O6=!$UCC<4V-21)XW4=|f-_MW} z&>Oa5r`J4K{d(_f^6-0iHu?{raoOk^$UO;rN$n*RE7J=XBEh7AckyQOCtGWu6OA!+ zz8b8)?2uJiCzdkmk!(e31ah-CZpJEx_U5>pPj`;5kq?FeJN_m>rpGVJ$$Izct{F3| z*sx8Zc`uuIgInOWXRcgwR6LqY#}b7KF->)$rvb5Hld<6x!Q5~#`BRg-{pz_=wC70B z^E*A8uzr2|&;RW|52FsICj)uEzM!*kBWo0ER+B^kQUVJ;j~)+vp9{;2;aX4`%`rR1 z2PdJeX#EuN)L?0HIf>lgq7R|1jgobY3i-)c4<)vmYv)`eN3<|_MI4FhJE8VO!xjiJ z3DF-64Ko&e(!tgQEF2hUp;@c9hQ?+|9iIjP>J-M~35ND%ibX`m2MxN+K=rhSO_yP( zYjVbJ1UVMdNkzu>SxiYiNtVj)%gSW&n$50pm`aL0#m&Ny7YWYva$EBf0ZEzrJ_lls z(elFoaO2w-t(AG)a%_0&2UaHP#iVg#D}SC*iw`5+kQKLQN+hARCoxn|hIwl{NgQZU zX2402;nJKCW|on}^x+DA+X`Mny}U%{is|m-&h;qw1Y&7nsy^@=^+Cl65B*Arc%JY` zMI^%n6UwC0@g>3E4u1!a|60t77^ThrOsHit*+9(Jqfyn>Zb;ctyLY8H?z@obp69>* zu6eyd*=vd3XUb-%4`Lg6PT!9rm$CQHznm~vuhJSqJC3$Exc~QgHcov}N zW2cz8I6Gv6r?XC68Q21xzB?Fplbu-px)bjW7rkw9!^283R&ba#Q-0LiH!^WTuu1}G zmA&&}AnCWmvlN>11``3FKx_hST((W#ERaPBiYxUuQ-+^bQp^$VqQVk^#N$DPXMZv6 zRYY3ikX`vKFoAjZp^Zoz$^69gGO&FcW{YgTEM`X*lGhzm#N7m-DuUL%?OAv$@U2Abq_kNBOSxbH2K7idw!pbduy-OS3FlXn*BsU0UEFGE_h?L){3i8WKHCF=+SJ4gIcWX4C3kw_%`Y=oJsEt}Z&+Y{v4B+yuCbXxv06}_Nc z4lP5RhVRcOR0pQ3xD2HqXwfeh&%!F`ld=1|=|c%sU3CLC_gfQYJ(ty@{`sMqtXATe zvyOcZVI{{qCBe4h4U)IHO9&9hDo!LL$Qy{m#BNyem`Xm>ovKQ4MjYkB3~?_Wc^aQ` zZ{pY=Qt7`rw$s-O6epw;aL9!UY_#F-FhOA*`3~3n*2!`&fA;*ZKsVQWeszTe)0x-t zH$^|W?5xk^kA=`}_uB(1Nr`*Z;Nj$2k&`D|O#1NHmam_t#-wFb>`BA2Nq&=eC9=&- z{JQu%+-`gMwj2B3;K44R9l6tY1WX5-Wdzxcot+}z>3;gq%Orv|XugN558pr7pG3I%)YUaZ*Ui1z{A(BsIUE~n5tniUB@!TM^jl+( zx0Vt(Im*je!zBMc5`G4S*Y{?qXGU0tG|8vIlo<%Ew+1V@wUhkpCX?I6JM-MqPp=*{BQk&ryBLJeNXu z&~8Gyz96SxFwMe<)2h8Oo4MH`>(O zu}(OeBRi&=YvZ*tLb$LJD&Jr3gvXw&rJDD6Qaqo{x^lQ*#4Qr$FYwjVW@mp3qJt2VelCi>c#<2v{#H zQ*)xWh4KsFkh-L5WCLu!`h@p}H|e;&R3S_2bH4H?>OSZ1Qnk<+48glD0R-@8G4 zUH{DoFV>~t8#?+Rem4?u(UU%wp{`SUn zGTBMVNenXT*fEXUv;NvjT%0A@Gmg(;cAzmZ?iAD0j69?B0ms~(fU~`U zzX)PS9lrRXbTJ~dlR;R@1>y^BAZ@LO{b_QVnC>)@gjO_DWP@0673!XqsEwjhoVe_v-A8x&3yfcbwI^25Gxin%Qkl z4I(;c%t3@)ZTdDFjp#iPBpjOOzy1>Z269rJQs67c#tX=o4WjWSQoN;FvKR9v% z8r#@R6uU}Dj#>}8vvXZS`mlNv*I%(EM(tO9MS?jbM4!4oZwcfcXP(PbN#Q1&PLkaX zv|lgO2)uxX_mj~8Jbf`*w&p#&z`#dPJd7O)E)iEC3c8cKesT2tKDqu)sHq$LLNhkU zGY3eQJK?Hf593I6>Ln+*4jbr>`IUxxeIuq9b*xF-Ku>4$)5%m=p&a4TjP7KPV}9`2 zGMRfBysDJ*p}7;Vn5L#p`t@RhQtL#GcH;sa+g>%0mhw6R^S;MkV0U-WC$hHiX=`+R z&AIlCr*DbMWsZBdb9`P{69!E>qa)I?F{7|d7*SXSYXmiEMN`CU+bKLwq|d#vI59k5 z-W(>sKKtt0D!B1a|HuO9(Tf%Wa zM^(CUvz%zY>@yLjPqAQf$u|cdqs|XXnOWmi?FJXF7zOnn%^z{|_PC_!7Hu?nX)%2fi9{!YwUueccScLDGLM#bP<)KuqCC&a=dX(Y)a!N3iNb zXc3}HN2vjbLbs*l%XF)JzI!_@?yEDIPR;aQqzN};OFa;-ct>K3-wqEKOXXQjMY4pHq3+f@bDUr4#hLFhk?pmO@O60gJ8w@%i+J^@)ZQI? zwvv+VUm1yI&Vn>WTfw-$9b4Swv~g&OTjtLBP&nNDpe{4q{@_Km^MQt%!Sbyt|fK0@7%K zkIsgHJaq)YxGh?GGWpIbayHabw=43e`*DQ2TytilV0;$Yr-qpWiujJ1db*1IBnDl+ zZvIs9Gv0WJ4=eN<+B2dAdmh;MyUyak>@*10kBsi5w;2Pkua4y>gE;H-b+vOL;@Ayq z$-mBKSc_F*!)iIPcY4k-d!aJbRxe&qKS@DS2qM2+C^0ZIBCDs37{K8q@>Dfe5Xx41 zaZY(Y$d^*hp1<5<7vXXUeHMappROCXUl5V}Bvr{6o~X~gb}!-v1$r^Sq8SGd!?U(c zxVRBR@6Lrq*;x!2oyLMaO9);VM!n6xo!0B^el+gqxoQAMj)}I>O`bGgreDC2QfR!MVHT2zd8}N3H5c%i;dIS zRPbv4X#_9yfbKNBEjZLb0u=hXEByelM;q8Y6NbNf#Rn@{$2%oE)L9BI)vpG-e5IAK zAP@{XJ8%6Xgw&I}3q>hy`g)^vKIsn43no+sTJP_@zoUr%#Yk$B7|C5vnw@T97`~Vd z>Vu52^qa)ATzmZFvkSTf{VT4~?>K;Qfx*U8I3?m}H8D>H3ZOsjrb{_eB#&4(kDUpl zDU65JY!n$fT!g&TTWOcr@++g>!6Ncqes)Sz#v>fQ2@%7EKQP#pLX$MpN#&=eAbaKhDeg*M*`h4;bKoP@Ah%K!jhDbZM58BLH3 z1Elki{Ejz2U<2mJ4#1GCsxSpOxx5H#VImOl+f8HTra0}tHxlJrcz*z_nK*sF`1UUk zy7!jFZ}9aG!IQonyK06wY#>2!B?6y#5dh8E?!4+njLnj4D!t_Rde^;}z?se9sWSjC z-1johbeamY#H(O9@4q_L16Y+?Z%K1XX_H{bDvJZu$LOTomr`h}Re`+e=H}_59qk9_ zTs~hZ@a}@#SkaA6mfpDp-h=O!Hh%Pn8$bJ>>|g%XfB%;UUo3BJL>kixFsejo!YqXN zYIt}gnWa6@YmwWKawW69%WQhx?GJjlWOp*TQ1^UYU6(KBD**2z2`%Ww3tdhjgL(^% zm#$RW4u{gpqx99rF((B;XX3pRhIDuf^`1`(5A5cvnd-O^QWw>a#aMZP5yWtjBeZXy73D-hp=X{rU2X^B#V;QJ`7i&Cc!;^T?7%Zt0SZ|5!39!U2}>#xhDQ@E zD|#@H4GMvKtQs^DEVkC7c*!415lmCj9(S zY*82q_)1SY0#f3B^1iO0#OHW-RlG%m$&;IMT{m z{Wm{7+FF&28ZsjhQuapeDKd$Q^gJ*}H1%peV^%UD$q>Qjoz2vUP}EMyzWA$TrSJ_D z+Bloo(bzQ(yG$PtpJKz?DXv~<%%AVr8z0H1(`}!U@(vlukdB8 z@i-MJGjY0UI#Ec8CPQbx39Q`hMNnK^*;sr0&0oUYI}dWiY0OAO+w+I(#7f6=mI44jlyIi)7s7 zlw$-eA#x-@sVe^coy;HChTds5hs|cIasn$L3n7I)Z9m_}((NghSR!+eldT{36VvOa z(1hp9@>WAuW|`(j)YCiK}X5jNs;_)AqKqp$PbIGh3qeeJS6 z=aM)MJM+do7vCF76HM;zMA)kb(&E?Q1P?~Ta4^0<(_M&8tA}i%6dQY2Bfn_HzuM~F zH`vB!KKggmfkxogqA?xkJl>rqX!YHX9)#0?&qR(VTm^gd{6(Yjsbh_(Yd=PgT8-f1dZb|FR=!XAZtgvG z+`^3yuKg$!F#(Xlh3P$i@$$5F@${3gJI$x()B!l=@0e2yfU6LfOWf<1y!BBnJX?Jn zZ|udxJ?y{~b2%a2OZALl^i~iWbO~fzxIFB|Vh=K-<{U6a)GNx-6K#~Y_1+!^eO)uD z)!ikdeQ^%Td4lFWLDAFv%zl*aauR+wC3gi{c6|j%6XH6{yiSi?6JrHfns9E|#)R`s z@|`~XSi$4VsH!F+hx`7$Tx)dk;UMwZGDxLReC$lRA((N2V2t9ErSpieOaMbbyuZ3y zn1rqf1)$u`A+{)W+b@dYz399*!3-`!z4Av<4abx?vKoxc*lF$6Dn(tCGxF@bphY}< zeK@?-auj35pybdWX%kb}D^!{@X)1CFrd>d?VuKu4hJp$Ma|V+1ia=vIc+!;TQ-Wb# z(j`2@-bxttAn(r=Z8{6ZnE+fQXT8~)V!d_%pxa4plS&=KX-IgPq;qC+zS(y_w=35z zbHo^|;tgpxKD4wjW~ImIk z={BEQ9jcv3o35;&u7xq7>RIrdMeuvUzay=v(y#w~Tss}GsUen&bhz*zGPfOUJ_88( z1(*4)4i9I3Egjf_*hV7+YXb$Dv;gzG*YZ6sDRis)qE&h)y^2q#N6FXW&+b@-1A4?y z#Rf_OkcdrDHtdr0*lO^BWXID1y8vykIzbsDI0zR!`)m?S%#ua=|8Z?-9KoVhs>2Ya3}Aj2Mo zpV-jfPudH4@qCQ+Cdu_k2i7-c1(Cfv$4XK76ES%UnJ^)3md@&}1ErRmQ!BLTxD+Nge@l1CPs6 z-9j$in231H=jNNBS&J(XA@Wz77?(44qOHQwj)KiUpT%1b8=bgK%e(?cCM0 zuZ2)JW~x;x`GyZ5wU$4)%D>IcS}WFVp)V{wXkDEZ66uD%c+b!L)brn2vO;QKjcz6# z;>u{0I2PNFelt0+3>y6rCO`Jt?!;|(|L6bVUjVt02>@ulqCFdn0fkVmQ>*RSna+iA zfIx^fRb|HVRS*S)mUOfQ1q>w?q+l^yz*;pMW`*|Sq?j)*LzsP7CKA!fVLGO#X1wd!@G(B~i(?IG26zZgoroxq#Ob=WpozSr`Vkkxlw*8}#R|m^FNj#CoEu6B8}8;XHh~x}hDh zdzg=e$b+0y(jZwltBW_*cnb-WRW7Gtmidm5>KcPyFqR&fMO}Ifyi3M3$1GI_3t!&|w^P{`_*f4pQI0)^Dr#<;2zVXNmH?!vegN z9cm|t29!`B#lel|BP|=g*B_q7<^!Z?2#(vI*@-vaeyUtv9p}PY zGe9DAmk8Qg&v`Zi+ekpAreTyGm1S%!;bxYSd4xW*l@B|?FI{Vy&9tLIdvwjeu{VgCnnv3g3;PDY>(TsyE-gW?w?=#$9Ljcnp15 z+Q#)qZH}9Kp?sv*U=UgkE6d$G%+ zOHSsLQ^CDCv5NoLx-s7doX!Y7^TZkM#(qR>&clX){PO(q_^0(|aVJ~lSAJN!Hq$1H z*qtA}yMa{t^sEk`to1f9%iI2u*Ih~jaW+0G89zSvJR_N$Ke9C$p8afJ3 z(J7J*fl&mb45&I?c=73HGYR7};XdCrr%7bY1QR?I8h9fh=-f2@=B9B|Y;-jQ3&g4f z-R#P9tRn<&{En)8js=R)Awn!m8p{{XGr(K%-1J0W~IlGLs4kfkIQbEM-yIVPg52b;Sw-TAg6i_$$=mQT+OR z+*X-0&D%|u=7WL0>&0pP{5b&0aHy4BPx6i8*tYwb9#}95NonoGLe_BJon}(piT0o>xNEe`-PQM z;{?N!-Sr4tQvp$*&L9xZ+-5vYLY-o}bpjw4Kum?EHq41CCeiQ9i>n<^dee=Ekl6foCk?&mV&t-w!{Xg8{6Q3(Ogb^< zUbnoj?r*LYl%KW5jisDp7)v<*4QVk?lxPTyvBaHw{gvN-uX?d6Wf=!wY*B%8VmMT# z$;ot^mP(t~+an8=$cG0nz8XFr9EP$vtAs;cDKwxL$fXlSr6C$6G2aQ&QYuyX{E~{V zLO`163o>N!o!KEuS{x`uZK(&rYCsP`F>^8w(db|mgoT(XE&$1DH%aixInI9Y-tzp_ zuxygN5f7O5?6W+z)318z2Y*7{>$0Vf%a&m{VOPHLPo(`54ui|v?ee!Hz;B;vNM zI_M@F2Vb71BK*n+->+; zG(tI}UPu_st}P>TB?3u_;Ffr6?0vpP4L^N5On;k=%JfpX0Vb&lAm}YOG!f@=H8^x! zp8ldH2g|(>QY?`t|NEbR`yc-De~I=z8`3y`xsz^^eIvT&dlRNVephgorL>S*m?$hoT+`fEkEKrAx|Q8lssSk>-nTI2D3PLQuOVL=tqu znBm~gGg8}=^|QD^9|f=BiV%m=6bVOD7sw(^2DRSHq8RC?Yz*mNZts}G^iPBl4O2*N ztCbv@jlB;*x5H=2zs9)a4~)RTlKR9npTT*4=2Nehp0r zg0kR3Hz0dq5gWqVNzg&d-v(T$UM^hAiH`pY0GXxWkZ(m zwdSa<&Jf9ElTcQUMt_oIulpdo9ju?gzzG#8pPL3Kv$^ni1cVV6o=ufJF@8Q% zc2H>yZ^`zh7Pj77evo8hYu}A->{L$vY3H-qC!&BC%3&FK$jW6pR#UVeZ|>aufBwPS zWF+o`v-T8VydIQ`b&EUuqD zS|s1CU!J8jW0}LQIyw0EU@#)LIjm+)fs&48;Yq^{(69V(EG#7Z-h5KRyeES9UUlgL zVDbdH8+-ZJi{MVpk7bno*b*aw#fpFTW9?%=RZ%CsI+;bT$%B#T+3RNKa1qot<@BVq zRhykw^6+q)NyZn|mi5LG@I;Y1oQgo$4Bl)}pPI|n(YQzPG0F@x%CM#*(e+63H(YoV zd}BPlv7ld!^_&M`_sS7N(~^AX+0dHWfn*i}%~LN0M z?!tn%gkcSx#W9zd)n}#B9O(gtE(}<8g<|JdEjJwV18AZJ>ak6-{E7IIXVY8A_}iFo zoP1b_9ZV+eY#?=^z&qvM0e7G1K8#QKb|O-ZQuA2bISu9gxe>}=DdG~I8)>ggT6lr0 zvkYv?Wz3UlS%5-2bjCCvT_ipTUieqbFV^=Rz+*jPHL4xUY?SC2{B=0la`!nv!3#G; z$6x2k>YIlkh&jb7sexm79u=T09Ih`C~ZC`0-%$OMPN#AeY=TZ86`LPdPYw zla4;|PO`yppjPN?y|gd*JZ$YF#I5YOV^_3!zRC_#<1OaH^`HEX(|I8hX1I8h{d(Ho z2+Uo9?t7_XEn;L%v(&!$_&5LK3kwj9n`iuUmtV=B_Z;U(bN^Wa)I^M9H&z$;)>WYsvV|=781u=UP)~ZY2ed~Qj-e29w z;Vo1O$!snDM%9DmC$9P~ifT#6c~Y_8(}cxI3U~GSifM=Ch01T>XzEb^xiMSjtL>fB zTUW_$gGw$*1Hj3+m>#i7Pp5~~H|(k_@6$VaSn!Fg(nh50S3I!}8xlg@fscOmW|K|K z88C>cYZ>igeJuF==+Ax#@3aBJ5S>#fO?ByemBTM$ zD0)`B5%Zde7LTP2sO|Zo)Fk>cHwo7{e%?`|*)I}2cRt?&J^m7!{$7xNI*7A{FE(TE zDbx9hAn;t7iP|&F4!+P*^qX%I-_JYG95PV^$?2CfY%?DGQTOSuHnJbQPF@jp^9L-NiA9}K@GO`o-IV@SLVZcJNoEgu_9@Y#~vT8#mTgF7z>t2A$*XLW*ba*+*CAx{sJ!m}Fvw#eOZ6B+gPq zyni25|D@U2H0Ahu%Fz9>y0w%$zr3i~#dWzxG3zyJ70G|5LYc%gCPuE74dxt)Z-f_( zZwH0-P#+3zsPZY+1+5C$rU)33q!7z|MVzpxwi4Zl9m{8-cu81pmyTUqA;ue4VRZ*X9w3ZK+#s;!R{iGjEHnFH_aoV3^*6l_yv8_1)B& zNVCppEMjq&fI1p~KY9Gh^zR-;_pd@IU?83^lvW2C%sSnU%c*z;YXwY2>Jo~ zQs;aEzl|1&c$X3DLf{sguwKaK63x$P`{qcRO$s2zm55#Z7A zXseLJGt$5aOkVQ&h@@`^tNrMS_Lab-(psG1$DMHpN+lDPtIT)Q#3Rbxh^dz7pG}aR z?+fCbXg_zB?oRrAZ1zpOb7$+FpACdq#MQgfB1v&)mr&-`+q6DpNh9J9zUqhW zM5qZQ*@tN7-DGxrU=hoD?-e%>f$g$oU&%2&)LcuB%yGu=ueAk?R4o zN5s4qpt84l=`)E|-NjOmm}FhFTN(!*qAAuy!aOZ6b%Jde=Fygp$Gaw*6O}m=g50!* zKg5BQiYA;xUk71f6*t?;2NCC2MrW1I4HU$KUEj5d1xD?5RFhVvFeUdal4b&2&|dQU zGb$ti0%v+c5BUCuoSf!ew>?M(8qZ!CdeTU;tf@*^ zlpN1hi;gnrLWG?A+|4&9=gU|RHoQ5F*32RrgfbmF7ix+DKso`O(EDN%3Jn}RmjvNf zg?zs@{~DQf7wN+9s)6v=DH6Nim1pTJGpKTMubr_1tJxo<3;D{psKEj zp|SXZnRuj)hIn`d)Tb5`!?`mHWdX8IHD7Wb5(UgvDhCF0@DqbI;+x=D8iKj~9Wryy zYyASej$Xp*Ti{?)O57qX?&Oq5-d~#5J^{v-t5Wfuu5=PlWeTyu@R?Jy zt*iax9)1-O0{J_|wXjuT|4Acyu^jP;efA|f7qZ=;p#{IX>+Vhr$L^Y~guj7|`T{Al z0W&V`?PKK9`DA_^Sm>D{>{Jpm;a0y#f54=JdJl@2iRhGEef^K=&(HtG7<5%P5-7@Nv&Cf^K*b~ zt*%1y{1_6EE)fe03&c}%ThXiYX$(qiygeFpL2`IYS$i!H(zY&AP&q2dZIE@z{_4`0 zTF6RgFSGnS2$5rYWiGN|byVtk=`k;cvWk`-W%;-~lVsI3tb-)jLR+JVdmI4Q`;AlO;s14Bvf&SE>> zWSA(A&3bOKr3JffQYb9qt@bz;$sk~=Dl0NGKn;_x=%Ny2q?D(N%H3P}-e2~jw=!Z6 zC2{>Yg;dT-lBD8k^}su)$8UwTb2k}WXtc3#l1XOBNo)RaIAVczP0d}z0OF5aL(luA zhO6N3cge3*ZA?0E?P;Gw*=rN#;&NcF^{RdJ&|O~%)^)!iZt+g2zkU*N&@*&ZjlNK4 z7R9^avy4j7d3V&LDjQo@omQy`!HZ0tZj^E`jK3TAhF8`oY>**lMFY>#!G?9CW(WUHRzk4M9s{n&YNy#Ec@eQPM6tSKew-pDz>78so4<$$64X&f1J zD{`&2eck@ti&jtP=nn$yuUi^NJX|hSh&k5tS?K%knJ z3216*Fl$;|D2F086V+R47WVoUpf!9ZA4cQ-SmM8|_m0L%+Vzx;t_>>0Q7sDRr_Nlu z$OnTb-YIb`1|DK*vwPpIjc}jEB{`2 z(3yktc{7_za|m-GOKSySc18c&RR5!X3aUhnH+6-DMOF*m3EGbt&a~{fU(^k0Y^Oo^ zVMY(>GbEM?6CIrkBn{Ad9vCWvVoXrzUR3 zrJ_C==ovhU_&hQ_)V2-h>ud7f;x*G6R{Ra$F=W>)74 z8=p!_nB*})mYM4lUIMY#p;%$NQGPg?vtz|B>FyJeOyg|CD=vozQ9tEd%i-5^~afC3O~2L?DfFw7eIyH-`Rfh>I*H0 zz{p-g5s6$KRrPGQJh(6F&zKd~fNb{HsjH>_mbtW2flRMq;3@n!=_lNLkLcS8ciupa zfTqO*XiyIm97YA}*wfhloyCe$n$E>ZikO=8E;x+#5?>aS^i=dR%c|3%C4KBB#iUq0 z-cY%2S!X6jUjcLIZc05;2n`^5%n#PSUkV)g!IMtU)^}Hf=lsBF`FC1@SDSP}To_Db zKGO+^9KlAm?SzVRTMg+-qFYpOC-4Vm|KFn&$UudKC{HlBfDxUz4gsatt^kyeB^TK_ zH^q*eph=#YJ`H*muw)l=68tq70hgPMQ(!JO6m_r(#HT=uMFZn(^v1cRZlmtu z2+kXx7Fx6yCIpEwhQ}YwX21EZYxLQ9Siq=R=)AZT1e#ETfUn}C4G?+I3*;s z$ID=YC7(|BBDKZHWy|20i6=Vw8AfKYbP7;Q1REtqbQd4wLj;nJIziG_gcpH4k z4mp`%P=-h;x!=nCpZ>AO4_C=G*|3o{Q~0dcFU6q1Iek&;+~jUbu=EIZe|NU?<@mI? zw@m~v@y#K=MsQu9U`<#$Ikk%cY>l$=(^ z`PT0{!(X4S#@?|BO9{5D;Vmxu6&M$4p)*Tcx$CNvEo}THju%drzldI9$7l4%uZ~yH z>R#fvZ@$&b6hQ4Tkh~|39`^tp!VSac<7&SZh!-B>lj_c7@hodz&+-4EbMJavI`OQf z567whO;rAX>m7VWR@OQRlAi^yeJi+8?5`lM5anI6VNS!jMYo=yH+)D^Kk}~i$ol2* zw=?-aX|5~}(Y_LD}(=hlrXg#4lz+Tdyi z|B-_I|Bn8v|IhzRk7msIM2}dh3aC8gA94tg2mm^OU<H7X zlwRB_i8*^zbI}yXXUXXl2bdj!hc+Yz*K=lfA$g+GC&e{Th~)i~4pO;? zX%jQtUIlTtZ?>{TC&d47P=7vUZ)b)b^8UDXMM5CEhkN3AbDUyeYuLnxR5y$kf0#DD zTGP#0?D}{ycSo8U$1xOy##e7jH-Cig!$*~GHop;__lm~~9GHT*>J@nbPVb(F`U7DW zJmiu)QoxGYkd79X&p!XEl-UEzU(JyF&^I`3U&(1?CCrXE7&)29K9{5XCPJmc&g(h4 zcS1%Y{x5EXN}+!7`B-MrF+}<5pm?@Q+=0CDsh(PyTO(VgG7jIdLMawg?mnLmdpe9&ouqJ{dlK}H|l@80pIDW zwGnAnrLbvvSH-|y+H%^-Z#nf^hg}2);mza2)6HT=(S_ddDL31Q7VF|&U;T?EJA$i< zy}H|?hPSl##|!0VtZZx6csgao)MWt*+4vqjdk!t!BXu#{tb`ztGa&{e<@oaQMgfqiOkQem@X8d6y6EFQ%*Slzx3EMV#pB$$sl ziC;K=!0T3cet+1#ckkx05y+uq3j>TaTUIN7<@$HXMaQ}#g7KYW`MJ%MvP(7SQ-ca< zqks(wn{4p}+Kl%f*8j8ooiD;mk)va62cWYtHCQ$pRSd-3oGeM#u_5_c4JF| zH)(Y(3<$kKaMc)%I2?hi(u<)-QVuc=>QajSe#8AGwYNnyL!hrkvd7~JG<(Aokc5sa z^M#nY#djai6HeHV&7_>DPdAqA$z|jwSRXEZHMzt@RI6SO5BZSv?Q8?IY8aGMR!k@q z)*zEVSP;RWJKJ}Q`N_bJry}C{s#$)?NU-KkQU;|29NwW@8P{v&u+kMd{jkvgvnB&b zH9&@Q8U;zLm^M9w7R`Cm9qI~A>}cr*+OIf^F;LS?u%4gJ=5fp`ra{9~EVUxC?n;YO zD?$(Itn(a@i!fUEU9WrWElO}uaMM&#S*UXUYOz$f(>8^;1bqRc@hAjh7yPUoUl>Pd z11GoB$>H#+Vc|g{)giVXNWXZKE{JM7nz_w>ZVm25md)`1>#OJV`Yly`9O_g?c_rZ| z1GN?s3e;^U8wb8u4F=sjvcNXWL_$89Fb^)~o$FBK_2we2Mw3I&dw-;#`PAVj|H<_5 zp+sBoT>g<8e+VoWE-mcGyR&q?kSlbCqfOUsijc}e%#3Bj20qL8sBT2+N+2)i+F^f% znGL#z1XkgtPALR+E`m8FhJ42|4jBh~?Pk`h3(bt8paBbM1TO?`yS}UsI!%}g-zu&` z(81V=Zd8_pk-YZ9zhnBEgmRlBv=`up>Bh1f)+T*YbXU9hlRa}L8`sI4FcOV&W!1Sl z^9$kHxQda-Io_NJlfi5^$moCyNn;}n**kduq!jJ7mr}{c*Mo`4xB79(DR;o@%}Hry zr`6=N?f8X;rLOuQW5-05k}Vfa)fMq}1#T(%2$3N?kE7gL@AET9*DoF|z*0}nZQQ67 zNOLpIbfr)q%f%;8(*Rs^voF3gx%_uM?tP1Vy&gn*v10ngG#M|pF;O3l^`sXSby9@5 zx2f=Rc;&5)~|2B;DTt&+AdZ1vsrs)nAel~dv~Q$%E$F0)r}oYLK@ zR6Ac8QFA8ZZV7mQ(D1gFkp3cwdF?ElcMMSsIZY3^7=daLD~ zvwCk6Opn&%+y&-4QJLJwjb$r-0n~WJjj7nN$$-pK47qaKs*CwrmA1#D?b&t*2qfr) zq0DMQv0Efu+89G}5Kj8co36|J-GKDDR*YP31u0^b{i2bIOez0X}R4;?!=+q z{Gt+$yvR`ekvX+%F(!GW1W+OT;}0j~#kcX6kn2ATD*WO*Y?@Y?_o=XaN| z>CK{Sz~7xR2JJNi3mq(Gtr)j@6>V&tp@+QmRth`bkI(_OAJFno)C>s|3H2nnIa+&p zyJe0KSFi^`zxQbpx~X~7ez)IC3v>8>CFc&u%1vJEe6uo)zsN~Ul_I9BbWy8Y;kUAv z%|%s4!f>jVdFuPV3b4!|BW^{LQxVKEe#hctL*Z6o@$6xH4k&1F#^9mgJKq_+XaoZb zJ+}l$FOoC*+zuy%di)~)B>H8-yn#4V$uncc2zGQf-}AMZV5MY9l^0#6q%&tEBi+?s6+}!tmG5xYO zm-qS8##2D8@iD<@8__LYGm7+_aZ!-PFnh`ICR1h+sYU4OAUKhx?A9y&GMUzx^DL0b z{7!xF1>O3Qwk|}7T8iYvlcdxA3c9<6*qA;(k>RxM_HPhzcNCZ<&mHpSTx(`c`~+hQ z$^~34>ldRwW&@-M2^TEA6GmOS5N7mR?LxEM375-nN0iUK962Jmn2|f0^r*BiLnF|% ztLwq5TA&Kd1s&U^N1t9HXjBhONVk%V)sN~ah7s9p!q+9wN&8ZN>ZG@BEDj$c{KheI zf(t$qTc)U)TGN%)5JafBlTHafOY>i8}@@v;6S3$WMkq-z;cih1MB0~ zOW%oW+8E!UsRq>Luy8mK*YTDO49Y-iVkGW!VnW$zjrPkd3FVwnPZ25=MH~zz4$R=y zO>2P!MqYnMjm@D+TgYcgQAPq-Um{j#3%pFnhSDc#s=_SvdU*~N_aYsM;%nLw(po~m zYcvH>*H_tJ&{1?$&-b?6aBT*}0#2WXTU#?)Ue2;cAuih&@xFbrFmsXg;9`ic`h69@ zq3I2UPSCyAFt;s32N~$WioE@F<+( zq6c~tbU7SJRrR{5=)wx^$q?Ne%W+yA>l_JOgA7z~q&8%LEr-C6+6g8uB3wCp`HN(0E7b?L<3FZY=9J!WJ{m2P!eP2a{~I{d*2&h4$Ot(2i?mB znt*z)uG@5$W@3Zz>)bgQ<1t4J1!)Bs11iGw+58$|`B!bB{FYWb&IVH_bEXKyW<<(o z!{nI6!*$E&uvsIME&Y08QY5h~wSo@+99xhSmTg*tn?5AphP&fY=8Bv!{@ja&%&{C= zs-h?4IMGdLQPdF;%JU3uOa?iBLd`ihM0nwzC=d)3OGc%0499C?AVUQtPSJFlGa4op zR;R>Hk!hq-WmVKYpV?9-r-daOZhAvc@#4egcW=|9x|hgLU20~L3}@{gvDqyZ!H84oAn9+nwQ!sVXa=D^4r7l zIKxD=#Rzp4()!Mt**lj3Ge`98S@aU{l z)Dp>Fb*FTa7BFjiTtjs}zrqeB_f9Z&iPE2w^}WLo7f0sU=tZmlTjbld_%RY7mbf^u z1N`Mf7rxb{fGPQN-j%xpR?jEPsd}R2vx)c4` ziLB3;l9i`BU=2FYJ@BF#PIo>Y@BuUw666_#HEe{x~1EQ|0_!dSZu`*K?a2 zJRHM)$_b(i0DJ&+FjbBTrsCyE^Q0SZUK|Pgulsw!{h!+}zxepvJ^^(hVV-#&!QU`j z}oG9ph5nZ+e(glnzdD4971^jd?3+;*O zrK5wpxPOKvpb{NdDcMh{F?1>pC2!fx`q0SEskR>%jlR#${ahw7jL`B>6c8H?%u{A{qCnXoHgp62 z?O*!heWFoAPSK?`Y@%Eie<1)UbogOfuBq9(2 z?6puX#iU^`FkkdQNM<)9&g0N7WsPJo#|^1WToWuYOIfo>*~Q#-W4vJFT!b_x{xmY# zUP=31g!b2Z(jb^9(4c$dI^X^N=2HI}wJwMqe<4N#$) zaRP7;G6>TksrBUHSD!BK-pRB_o~TxHg~YVdz(1LJm`4F*XxzX<&z0dKnJUYS5I55! z+xPOiHFV1-3*{!gfN#f}hYF8)7q-7e`}jF?k;)~Bnh-;i1k}G^H<)On0r&WaXdoW) znz6(2>A(8d{|x|>stODcQnwapecLGZonvqxwZ3u!ks`4)$J$n{XSjg^U=ScJGVyt6 z0FF^CjKQXs)-nyCuLq&P%BEfbOP^2su|#ZTR;%M=n=WUSp60p)v~5%K^5kqnCmYr< zT879s{y4N01szO{7%Re5*1>Q+OR2KlnyY}_h{`HcY--s&a;oasOdGCp0c*C?xpSC# z0_)WnR;PQZ{WYSbgcQ)37%H%`SumUgAuu;{GB#u>MIOh%W-#h%k+!7Kd^|pXb&_O? zw%QJ_$7vcp>Eg0A36N6AOip0m3ed|IGi2um*)I(5c&U?%vmf5L@ok(XbRkRD;`Y7$ ziJr&j3W*^L9xOA}3unBH3!178Vz58^K7Qw-OqZxvZ*jmofpE|!zwJ9z^bcG8fus2qp zCKD^Ntv_|?_mh-wiB6sL(^tw#>HdoSrBjZd&gb_Q#e*PmpW46bf6rNbwh{HFa7tM^ zTm3Zqx4r!8-z$i3_Qkf4UqTm-0#5e8<}h`$e);%XF!l(CelXV`r#vIqjx-`mG}^EP ztp#5TrjB?%>WwdE!*VpL&j_z7M;{;lj7)B~BJdJi49ESHkMn)W+l!&@to{vVmHZ9{uUFsFlTnl*?70~#qoHL?e^S&IaL3{0>VW~?$xGBDV6HNG^gX)Y);yNf z`lsrkbSEW^FOzG#+-&%A(4o3nWe-ngq<5vtBc;y_&F*&0J{PW7;ZDZ}c<|i3I150_ zM9CjWoWH4>0*cKl`Prw!(t2ONOmwpmr1_G}0o>lUv>`EfLJ6J?$BqyW&EU7VM27zW zu1*QAxICC?#6YHs@~h9}fby!z1-qwP4)jLe&J+dfQ|9uAyUXST+WYP;OEebV>iuPD zLbJa$(&xcaDwb9W*@mfnA0^UV+stAuFh>Ep|7?UEbpF-xC!^!p+ncHX@qhjAKniS* zR$7UPLvUHca!^|I8 zN8E`=?$Ru|vtzq^rl-5ByUS))dKcci7xChGukRamQLeDAG&nEplb*F-i_T9MK$yyG zY#6Ji;xzd}dVEHhtePUBD4g-bok6UHDf~Q3@|}9L#epL-=K`uR<#P#KwHHaS8Nxtg zxgpn-jf@D7)3`ObuMw-jr`!_9A8w?7Cv zXS)%$dvD7D9v>9N_{PoJ6*8T4hPDn2KbjP0oBQ@sPUQoQ`G0@jJ~6&pU7nPE+Q>0; zeiT1L#u+k=k!cg`SKG_Oo%IdBPk78}%Mt&`A-<^2?enB;4O!W3ctH_Itxzz@1STEj z7rP6?@x22wS4W#m@QyHF5C@?$P43>iwijH;LR^4v%NXnqs1*{bXD@iOA5?x7CL5qeRX>40{I^JyrbdsLlCH> zw>D#szy$Gk-M!ZQ-PzRpdgJ~fllfOc|B>S_+!y!qdxs2|Ba!7QCYXb>x4r-2zxm(!90ph^${WTA zaLi#Is7rq3&YZofTrtgH6!%m+7T2*kvgBh(FxRaxk&56Ig+gYaA~*mwk+1-6$?2Jf zE`-kRsCu+p=e&b>r=!^j5Mj#Q}&PLaSEj@OKe5?|Ps>X7N2*qYbf z$EAg0K=?CmU^a60afmla%%*aU*p87gJN& zPR!{idyg;9ejnIv`2D2)B0j_j_4*7p#9`uRF?tv2HGp9rsFJQrEPt~7w2LE}$#1o_ zMUwC(AmhMJpU~j57{VznK~5|p+xkLYJld&w2t9r~iEI{(eLpZB#hs|U`v`42w)4h{ zrO1MW!swb}Eewi7M3RJW;6>C&1r|Z{!8j{z>!vHecRW6ihsj{|c0+~Io78s^$x|aq zpzJH)jMVf(Mr!dP2if1&Ut;X6uew-(exghm$%Q>H&3CVK}(qJfQC*DBWqsA zSRRuY(0Yv5zKr^LFBXejfSWb&42C(N8!Fv1QubE|&pS8C6mS7Bap5efNWLQtJ&mlfI5Ae0wb*jfT&-fRe;dXhunlKp zC)e)>j+`>1-qUHWF82*gWdKP)w!f9oNN+{A#makcI$ylIFFt<#@n~^448Nf#2d&_eB-6-hl~L96WK@2W4z^d{uKJl|5LPsIkjW_n7pmDO-~O_JR7Db% z#wd~t*%ry&bpnw?)MbN$rqHHjyk4tgp|=s{rZ{ERRX6CYiOHiCl`uS@ib;J~@66tm zJ1C!V-b(hLxbe%YZg1t=2U4;jthS^2X@E1>2nuja2K@z9fG~BfQeA=r=sFmkjxLhJXr?AkFW;r^ zKu6!t@Qs;2-o=EMlki9RQYKl52?PXY*@Gu*e|DZ6r}?xnU>6xB4X`+Xb^riUft-Fk zM(!j5;EeR0;mg1L=4Mhi&2PUx7Q)N(zY-e&3;^h3*030omhfS+llm^VGHp;=gK-kuHXLLOe!JxZGyCD9c!<#V zxz)?tWv-SdZvmmQB%A6bxzfuRX63h9+DR(XprzE4;N&n5t-n?%AR#;*zk1^u_hRK$ zqNxw7#ts&*bGUVJ3g-j>W}Neo+y(dnKDEJk6>4!k;A?g>=Ju8#Os-@ zwdf8&YXYI2FBEE=qhox?IqZcg0JtIGdNl3(h_IE1+n0zC`FJ6QO2>=e)5}eHbx~U2vt<3 zJ{Fo0*Fj{upUtLP$nd~Q{Y%Lz)u*i^rbM53HBsv=kcd`sChIa_P8_yh;{^QBD%}7r zuNo-IU4P~86Q|exht>60mYTc`T_K6%7PmgSu2=Wi_C97P!sOy?N&zD8Kt|4eEV~2!2>Y}>pp4SQR4K2Pin&*m>dsvsLkd||8R^(7 z1O8*bHAQ@KR_3yk?gAs{q#29FkNj((m;EEgj!BkAmsn>q*2$F~F4m$%HxFY8H`c(# zG4=n8oqxD2hk9>GR0{DP$)TE-zi`zmlmH_!qsj&s+W)F4Slg-)Po|i z0E!eljzXGY?g4{av=&q6&gxpR=(o*7bpU}S;Jt@`k#M+NVLm?$t4N}4uk;ONeud=| zP#?n+-@JsL?aFEjdc`X2Ny>wNTmLQRu@w$19)H=ZqGq4vH-0B%UyXI!7@Nkfo`1ieJcV1 z)@3|1)NurrVS2DILm&b;FoBZt`quIc?n`5NLCph?d$`J9GAFTVbwtjdO@rTw`qn~P zVG0xBS02ayk^af&(dS;)+qzksXD^_B*367oCI6F~L_PH36v=k`es#@JZsr(}#(}vC z7`=oU1yKeAScV%fOkq~mABVXY^`kBP$%XTlig9K;}igUlfVGgQ6o z#3r7aK>0zmPx-2^)uU?Y7c@#E@z>PN0fY|$XKcxL^up!6%S}r@16ruHW#%WjsYYZ_ zZxq99A0>vay6fcv5yskUFg!~Bm^y#0Zk1|hGlp)j&_7vv(+x&>?gNu^XY-HGzxwrB zdQWgk#qsyo-(FjpYdrP`vE&Q*xiwt-a}@iP+9^vBy%juX`SoGHU#KK_2l3|?Sp%D9Ah-mC-i9>o-1ywxnllAviPhSz zJ#iB~)Oi9!4ds&A*_}9GEa?WOcjw%BlZF>Fhn>v%Em`1+zP&kBx(W!b0!A&9&z4V@t` z&*ee5^ndKZLzw)*K{c0o>_e{)OaKFY=4nul*Y zD435D(~gKJgUKg5)arI&&E2VurXx~WyIhV_H?X_yt;M{&uzRLXLnpcITs@^oet7n5H(75yo`7e#s-Q+}=g$-RxZWA=Rbbs-5(XOZ+1l@a`7gGhlk4?QV#QYY z3jc@i=Oi$*!u56Zv$uRAz3rs1>{^vyCCHs=sK+H{%m-@d-SZ!ByMq(e3=2jr;+8(64S^jwVQ|xW-8B*pJDKbOcdrhPV{HkNCZ6%Q z!S-Xr2q9)6OS1;w9!g>H`i@z2S3}_LRQ(|t$)xO0R#McqH;Y!k%}s>H-jcSr)AJ6f zB!@8rNG#Y;I{Ad^Ufrax z%4LE7l5Wt!okcj6!c{P1SLAioDtBYc_l?Ep4j|`Jj7bbhD{b(h>_j=HtLdrNmhfTm zB5)H>RO(#d=pFspVeqTWG0RB-Hxqhk)~q!1FCsVYhkCY?9bz!=;uHSzDKEc^W01Lk z#>zBWuW)3znmZU($WvbL=&hIIizGH30M!`Q$&|Y@uq=l*q|utzFPt*a&+xs1%O`GYSY6GKIV5m z)Y?a6P3>uqwj*tPQb~RE#g(znc0Cy$n3+4U(KCjiiw<*sdgT^tnai&un~tdQ%)Pe* z4a1t#ofPQ!e26V7w%3_`w&h>V_Tup$j^}G>tUqE}c$4O$n1?S5kuVkQS|Ri9D}zoS z(BSd;6nn}Qd(M}G?ionzCpE8Qsqr> zyK(*$_g3nCefzL)NPg&UbdUi?#T{TvhWa;@RbDv_EG&ci`7vF54~_p0SMEh1GjqSy zHg}UJ4!&Q&y1I_FJ&FMq^SqV5Dnfn<}P!6XRC(RT{7f z;KAkg5pcdF08p;xiz;#vEpz#}c6*z`;&6f+cSb`l!)OLc7zrJ|fy0C#S%bw&h~WoI z?==$fNyF%t5YJH(!(t=R$(ux1=LkTv@2>$1s0z(v=IYe|DY7e!JX zq_L@uqm$l7nVk(sigmZJTEYpR0(u=dzoNQ;ncu-yXKfYO+Pps)^>dLoB+!L9<%C@X z)ZA!FcY*RAdD6>;j%S+7(*D?JS0n0bHrc(qf1T_yEy9?VHuGN}DgZzh<0BLCcA4_a zxe6yE(=`A+S=hP8fTA|=JVdb7UMS>b$6Ji=ZSh}Rqj%HR0*S{I{M~LM*{%-@gZ*#z zOr52lAz?7_ecD4wrhn3iVet90B;VIqwVY4BtKYi+IINsjf0!d5m}jG&!_**pT%Jj@ z?HYYKMZ^M^Aea7|Nj&`IJUDX7LVO;3uiI!=iwvZhI}h-cAz($eW(N~8O25mrGC#2T zgEyqV<{!tA^=6j0#I>!{|4GlJNL7?v;yOL-hj5l6sp|x5+rF(fh-4-^8ja4daj8rs z^*C?tP5AFL_*q3+nb5QarfMuuo39Ql>GsKdY$(iP z8eoAUSfs$oJBi+4@ZL?GpD@ZFAUnUkvwZN?FoKJH8@VbgcO4@XL5gh?*H7w#_c+zN z9Ev<8wN~vE{@BnKxm4rrpj1MXQ<8mBzv(hHg;pzqHaWuR8uz!!=YPIVxxAT2+ z{;==OrUUi8P3&wKmNSTlXXwDzJKij{$}Pc;-!*Kkz2Vm`_ zQ{)yYr81MNVOHjcR^Oi|%8;rH#;neO2S@dvJu7efE#o3%Z-e;df@_8SJ4Seo;CvCU zCZf|wOJi?gv`)~tyKSqYgd|CU$p zA3FWbTRp_cmbV;C?1L$RZ0-!#rCoOny5`K*{C8{K+GWZqUdf&9_WbuuIfIYEm@EuX zoiR%;iWjR66~@y>YfyZ$E^bwp&nE=ffji1&;&k57yZ!M@Sl6{5QuA`=qdV87D7=)b z>+_I#{Kv~k`QuH(-}z+YIEww9tb}ca0OOi7pNc~rkSYKZS0S(%0r(?8aehAkw?e=0 z{rIcttKWHW?(|hhv4UX}UePHazWnn;^w(8W!j{%#KOL)ITm4x455d+KZj!+v>S8f3 zjd6N|aV-K*A?SpT1|vfh<-9(*W|sF_{7u?pNOvg8`0x8DSJE=QI2t$(1+e+#n@JnE zCGQr2qYzHP2?e?3mPmS?vAzq(xQaQjwUIOFMyVZdMQC2=%(4mbWS}i+l7TfnRo$5P zuIBdR5_m=X%d?s&wJ2k)A8PRrmiCuRgOI_Xm=?6NKla5soTGr!IAOhL0BTnH{(FI*I!cxF|jI)vx7nAQ_m+OUr ziPN*2CSF8%l&dS#0F3Amv>_iyvsyUV;=cEiw*Mz-e5NrYY!M+Aonfvvq znU9rS;tljF2bFW9)U?K1%3B~zXRYb0o$=q-w9{B-HZPW&k>j}eiuElHRlM>P9)KAk z)8qP3ySz=>mkB2J;;1L-E{09*_Yb-+=2C^m`%}URZa1@u<-(Di@Y}&)<5?VN$8T5G zPc`#JO-d5I`Ynpf>`Ge79yS%x^*0)48m~(UT7k>>@Ej5oF)a-i-gTWv!h~28Pp=lZ zlHyQE%VPUa$YsM_R%LM##6y_Q<08@opfzJ^lYYE}mOuT`uJ!=LT<$nfQ&!A|)>a6a)^3xSz_;K8K`*O}ySnqc_1UK}7_hI+Z?n#UlSrY7SaAH=C#6Lvp> z`$m0hu`#SWWNAF4SH%krOjGhs`lJhFasi-ur~Ym#wal)$tru&NbEy93ch}J=GNj7c z9K0DO_tu$NYR&iO#A-;M-?4Iu@3|L`MXngCH~hS~h;EIXxP61J?yM%4+$x>{#lz3q z)F~4;U5%ST@D|^^7?mIac-G-`=v^C2dW45k>d@dfoTL`CLs3k_qj%#XB`X^^Q+8*+ zL}6%wfE2UdZv`mZkAUQR==eW>%zSe4_TPN>FaPKN2Gq0(0?GO<_LtyRi$>z^#Gv8fC*}F_< zdcKcUeD#_Z4qkS;vOvKU;`w!C0HN79O1Lw@LR!ee&L-fowOVPP?E~MP#)>RdzXunu zeM{%f2B)I)@vnrGB$6AH*T(wY;>axPg(AAv0sGdO*iUop^WcG-Jy{N zBt7NMD6cxV!QNJHTcD@!FdNmitd2C}8;IM|0wH4QAEAivmw%r<+~b{S93PXacS9Vw zj9wN+tKCWFygLE-I!r~UGddqb5$on2@!%VJiu%h9m~Pzl`-_ zvj?^1bp7%Af*QNAn!0Gljz5exuL)M0T%5&Id3!xAv={m*J{LefvK%X8qVpotydHiU z!PL{ouNS`|l|Qg5BXEV5ZuRzK^1(4Z@!2_dsx*p~_vEDqtHyKO-eciav2A=K$pUpQ z&XNIX>lSPT_GZ~#EOP2no4~^`6EjG!)|c&CbKR<3rM{F;KHWvKUtwd!@b_x^3x!1% zBpkHn3csI@rW>Gm1gSY@cMNOj3?JSlYd3$R=RPvOhunQm#@l$iL{XpnsMrDvTx-Qk zY+n?^w)=<}yr#G(q*GVNbzV+s7GOZT2Qd9_YG3_RHfVp20;Q-gIQxT-WBB;3ZF)gJ zEg*w;Vr{Q6#Bu*Q7ta$xEj4>P*x2Ce1icSly>33b<+>k6F~=EdiR4trL87y7M|R-D z4WxzD6NF8Znfh`^fL;%gz2xRw@Y_u^u1~5_rG0Uh-2!`~WI;BEV>pkVox3q3Pw)eq zo8)_Gqc&(?tzd)pk_#ta1HaYxDdh|o5u5-n2jQcbc&10uP!o#MvSH+Rr8A_SvTu~; z4#g~OC)DO@qC)w;7QelVZ(n}z?_`xpU-q}=a+?`}%=fWnDGz75gn#0qAB5Jg3xh{L zx29`YIgz~zHbv*9sc{y#1Zk1FT+Fk~H7k7j-T3WiV68{FWIIDYyS>5bfyKmlnug?zU97kGfGSYoc4im5MbM(JN3~^lAjfL}KGMC|I<|++;v~k~q zfAsJ#y3`;Z?YKO^35kmvD6`#WX1j(8q(x`y;i>{){7PLmm1W4xq;FflzM6p1WH$Gk z|8BY9E`I4~2?wWz22=RszTN->!D>_E>M}PEdPXaYZOr0eywm14kp8U zoAl8}X5W3ibaq5>H6KMw&)RTTI`ibZ&etz2RSGSVh#n`&P7uUL%^`)@+^a1#N zL_ccukny~~DfB}WS$0Vu2tw4>vQ>>WK4{n~P*AMqIGTg}1`RjpMZ$duu3qr5ZFk_A zDae)7LIb}}tg`)B3j+|&+Me2u5Y4S1kF2I9ArazBM=A)`IBd`}ah8yn<<(s`-QTo5 z-jkQy`P>#4!Xh?u7L*usaO4BrIV^a?E?=CQr~!AFN!pXzeqKmvsIN}P7z?S|fu6g@ zGP$h=gzq5c6?2M>QnXt^OG|Rt9ih8r`qoE88}!DrplB~4mj&7 z+)81Oo3pp8jsV0amCbSCbZ3}70!J*$8MkmuB9}b8OOFg4S)qLOA{yU=-hCO~|C7u9 z6+R-J;*&6-$BD^Zk8Bw&x^AG|wH2pp? z(Z*O4r}hWQx5+@&ylkq$T)l77-*#({9+Jjur~Uybj>kM8S?pCO^^Z%7&bGl|Xw}ju!mF%Ytsc_bl;Wp!9}i;o(~6 z8Ihl0ZlnwiHvm^$S2Jod;mT3ij(8fjIKymX33h^6;5i!KWL8is0mA4kZyI4Nf8B|( z!~Xteh0X_B{}?sy}Hw#_1?U_@c1&O`#Dmc461RoZYH}E~tY?q|D22B@TlYWw^0@zt+bXK7DZ^xJJy$|?VQ$bjI zxy;AOH_M<3;o~7Ht#^BTp*USn>epZP%#Ab-h=mfp>sG z8(o0^Y@EMYZ_3~N>|cHJSO0S6(|$yPLM&?zdafQ&3EG@uI61}5J)b#=bT*sU#zTtm z)0Eh6vl!5U2*4Nc9aT7aJdPdL+!P1`Rf$Ffadq2|#pC+cENn=&keHooyA}Dw9|8e` z_HSZ&9?%UN7QJ8^B?L7dPaQHxq}q|Hqe9-t|EN$k3X z5h8`8GFEqp>a~FEC9F$b0OI1l)x1dM+;gvl#apvsJh4w=?OvH;zBwaf0KA^i`^>ZP z;EkJH0+|fZ>jEZMF1S_)3wS!+Glx|f>1Y%*_C3I|`AGqM&=D>~r$VVq)O1!)3*os1 zh%BB7#`?uQ@JG#{8N{cl;Z`hmbrIh~4V^=&+=`n(5xu%L94(+WhMbxf0G@Q^BS^m= zZ2&v{UC;j+`6e1~-+RYTWC`N>sy_+?FX1gfX%k4BgKjY`shozr8N96)wiLMo0~>{_2l493G|D@>eW;VAM*z1u4(3<* z@cxWtF)jr{T^uH1cx-}mdPFkC;_bLVOHJTyZ1_lCADU@8Me=XIW}Nikd>emq0-o2* zUAmN6y?JpwGlYXX*+9Ip`(&unW6O)D8Jx1|YiXf(7T+QQ7^NHCURkD!IKduS{9jq*4y|`(2>k}&uUieR`}c4uHf|X+QPKy6vCd} zPEj~C8v%j@c2swEs@X{&b_=6nT+z>&8Xg=ui#Sp&(DPML0A2UuWS;$Swb#b9oH3Z^ z;gsL|S#$3ny+5|U70-4!tUq9Dt6`WcBS2gfA#)dQy*;=G1XBMNLiZu5u0YA=Om`IcmbidV9<%7-yT-SduMzL?`G8%x zv&o?q5^9&{5}*cJd0#y%aR+*(ebv)xEkmMBT`xjO87^MsUrB0O!w2P#Wb)oc-zXm> zZkka(oHTRf`xE&M3O~WMa}|n#;iZk!hO)h99fz`Q0ck|RhJvXrAs_`8nE^&-r%jkk zP3LSeoGn1Z2Y@+6i8Gt|r`YT_=$fLQ#RA5d8#tMYi{o3^2PSlii1QXgrRxJjhG%fN zJL1N`C{3YYhV0+E1#~h_K#SV8xf;h0<7GyEZGW=?%hN|F6^6n)r4KP+Vx@P5N{O(p zf;KheT~epg>;}5qxjGyp$+ACc#bO`t*K*A?O#?-42==51U}xKSoCjfL zg1_l}iB`(V)>6(Txn*`-XXkxDJuR&CR}+}K zgSNlNH@C#@B*5+Fj0Aijky(iY5CuK%??yzK-8OsHQe0TujrcVmz!3+P zun05lW~{amHNRV0W!@;o|1_;NJTLZ|{P=Es)y^TYdY>xQHvbFk;TNCTca;WsyEXlE zDAx}S@243DG^S#>INW=Vz{Ovy9BG=X|M zu6Cv;{X^*9J!>VV;ra_QTpUTCWbLWSmcpRU34^6l-Q?bP)Q%Szbll=^ZfZ|F35Qn8W0=wsE_Tqm&#he{rBJfyI=q1{|ww6U`K!nJ?D1|sy?4PeuaUV z8?d*o(>@cBSmM$;qB*|G5XPJ$9-^ybs}&9aY9nv~<7r#t6+oOsOhw8SPllguf z0Q?Si$AKrZrgIWVxYD5#Y*%<|+MmKAy>3LyayfHB_LUw%FAMw95kSUD-*NnwwYXCmR zNa!gsJqN_q_viilL2(furK6Usii_r%^(;PA_C$yagYI z=&6XJ!Gy8`ItF4!m$9w-n>+_emp{=msrkzxs7k-#~Zhxd(_`l1xuk2VrYDJcJo@=cPdZ1WVpb z8p|c{-oAUKqzKYwI-}MVZ2XjxLH+EqbT7kAq0i~&jT8-76%+9ItnALhFl>zEwDYo`b{5{6JXY)3RDquBB13JNC2!C4_e;PV)d}4uN8R`gLi&H1b<% zd#jtd#HmbmCN-GopRChYzYpwxM0Ng^oBwo>ul}#|_M5R=jN%z)`17`Xuh&u8C&c8t zfU|Kv`1S3t+W;~?+2{V$NdI89Vk5%ZZ*?%~Q)VMbtNHBXyBp4ncOO-Ym)l&8*_PSy zT64oM&O`vRC3ZQ;#B(>vuE?cua0mg4*N^~cLe58HtM>cg#YViVohZ|oxKjK7kN@;? zXXgCJS;`}Z{A(gxx`~+75a!9r&9Jj3LY)PkzF!XBbjkN{{e-5p-YpT|PH)-H;VrG_ zbYMCNsZBN?WrChO2wL%<#NEHqn6l-qh7(-mX(7~x78s+F6P^_=-|&K1cUhzg_~-2{ zGI6nF@WL6_;pkGTJBK-}zX*ba=>zUv88O&6G;vobwv2qqq2^e3F{~7IRsqfg{+{Mv zro?EuA;4dXUgGLgAO35Oc3KLsl@wPN!81y;7l~k!jD{JS`KF3PIxVSlf`<}p~9e|;a=Wq-I}X%qSCzv;3QttJ7+OpGI+@v-q2uVQTuZKoz(*; zwh+NW9Bj_{Z3Jrhz`ZT#Cgcd`G)Nzy7tQK+?YD#Ilg{Z?>RB@W9hrRD@Ch_bN7v9mc27WUFxJB6YlV zy}QcK!xNj2WbJMZuYbh`W)$DQW+<}x!$OEy72Ct4WgHoqumX3|!&vI^Vw}Kc*YTaX zZz?EKOvrmfK}BqSt8x#6kO2E?Cz~JCe#~8JJ46X#;VZwD>f|?*sRngnB~0Y|wcMkn z9T>=RGca@>IcxRJcCxs0%OZaSzL@ltXq+`4KAK}+s=YtBV}El)vAI}o!7jMBx>#qXkuF^wkK;79Sv}OoYFIN1p;u0K92Se0|^on|c)b#1+3PLR-sd z0!onMtD?iXC!2EVW<{nK&>N?RvZ-h}HWmSyThn7-3+0E{H%PByKeW=**(juz^eUx$ z;!t_+TwI^Ujb8k-Kf+?c-PxK>)zEug@AuQd#tcgdQXh1=pNqYn56k9^n+}$A(uFXn zsntrYzul?br+8rS>Uu%_NZn{sw;&~+a8j}2o7eSE zCApRBU_s)ZviibFm10I<=%8c++&Zkx+`{GL3;gC^{NMjCL10_sXF%!<_Pjuh3b%}m zPkR!eQ&2^voP2ulb{2~k3x@V}nasD)B`}Tv00XvN`CMNCfU{mnfHEyayI}NsrP4v( z0yHV95r)vDv~_z;%^YKwT(^jPit`%SIJy$jd=jWkfF?58CR5)R+M74vO5Y-kvlM_n zhf^6LGgnSGQr>tN+H1bx#(Fr!>a@RXgC_7E1AHF>L^l1z9JkbP0s+Hx7Es`F!fb<3 z*qj2C#6we*iW|%M76=?m5P-lRNu}>+mZ{z#5C9U@ zP1QWxWHwBghp8DpYndY73Fo=d`VQ>58MxkpgxEDRW3F&ECxUYdHYbP#q9c1Lk`NwA zq-{&v;V~NDzIye07WnY;@$Jr7*D!K*4a6yNzAbv#_pzWVgJs2S%|~0Ce>#E3v#C*@ zF`cS)XLx*3(AO97iGr>sz_QvWVWwGU6Y}j={F#z6@+S`#>!7@^3g4>@CI&8-`E|iU z=kF$hn;Mn>^7#gF_odBe9{^*Nhj5zNnc>&DTCa6;&)-?G zy*}syRC4yaYXx3Ci&f>8wgZ-5eY~uFlbWP%^K4lgU0)_= z2`Ake`CDw+B**-vF04P8+?fzp!H!INFQebT*O3hlFvOa4*KxjS%@vq_jCF>U45bht zAI%p48TfPn^`mbq+IrXD$Pw3~7(HJE8H@T3FeL$cemQ#<)g%<-%D5;2-$qea2;vRg+#*9HG2>9l3Ys-BzKzPJVTI z{8VN8krm;tUyR~M{-|u!DUCn>BpzN;v3&;6k&9vLAXUwHNy{9=LNGsBb1RoSg|^bl z>3}3Rf2)b)I(6@iX`~$DwTBiXt4S44^PQi33t8nc!sV6k(Nx+qopW}xWd8yn3o(~} zero4))6or&xM1eTOm)6$d-jdi<7dH)SeL!Y&G)`kvqX=KKHNw&U;oWK_N>6yyR>|_ zwLh;apxXdWVto<*`la&_BeD-Ky4ZR`qUGzCeYEJEXUv1SdgW}y%|p>%3$18W1{lQ% zDWrdn#Hr@=PEr^%f)(p;_P4E46JDeTi`(sG@_GlB9Tt7`7XQoa=#I&+?&z^M%uJY5 z1?r|<{uAJOSU|{T96-jm0>h#!kcj!g%<-8bnyty@?eUm0z8we`!)eSoPxREL zPY^p%|6l*+_{x@821ws`v~OU~TslZ8Ypxms$oO=a?F19S&d`aoslSzuFW|T#8*DS| z(p>R1^{2N7As@PQiHp#}M=%6s}yR+Tk?vJQU1ggT}}}vx$slyI@vLK@bGb zO_jDIekN$n5ML)O2!i`P9!PXGpdbSU8*U_fv0L_3b^$|>@EE$dSZ+N@kWD12)t%!t zgsEK6zsK-&@(dYmH0i6DR+!kGjr<>6flC1C9L)XGnC~vQ1giPGt~3cCPS)JmEp#s7 zaTecP(yf@1hK1E?Hkzj0csq2#819F(Fl%fU6JHQU@;U(pgW)E=<>-lK{05oYk65%D z``E$$ef0(y13?ChK@Wt>@A@1S_n5hR|Hss;pL6WNv}w$X$U%G1{(^?v*~6_*m?5OU1kxtpy~}9idPB1p%uL=6&_YgDBrCN z6VM&u?=jDaM>kz27|vN%-Er5)5)rirSk7wM$#@4j5eJD^t@Vd@9jNNc7m7d4GF#Ux zrAQE-=E>Rn^8DPIj6#|G%@kY1I%kZ$i>foM+V;R))cA1boaJKREP1Kutb``)8J^Ge zZgHINr3~PTS2MXxyV+QS!*;6@FHGX-lt@0YUYt$JypM%^i-p$vUT23(kv=vF-P+@m{P(;F!L*2`s{%~AEe+me1V>l4}{1=BJwgAtp!9RgF zXX1~3l+k}kIQzEQyIidH*}IB-&R&=(aSze!gIp3`Aj|LT(8tLPvgoFB*_DbkyjU$! z?_9%qP?(L)L?R=I^DfLrqSZR&(G8FnE&Ps~po7wM=nknG=4w;JIE zsUn()CH06Zud9$XDWKRl3d&OIYLVZiRpfN48XcEi_Agnt?*N6D$5FUx?$wzG4D=($8tv1aWK=>0gq|{BTgoxgCyI9^ktOR6@X5htZC@E zDupSuCXC`2z^E2)+1TcLb$aFjgGcP<6WXqKd(9}kL(kuIy)fofxzY}Q&*xR)*pCkM zmQK_d*v0^~l}BdpUgZ)Me%Ri>w>3-?x?IluyRXL|l%-Dk*O#Xc@7*8Tb>E()8mNo_ zZ{*zs_tnW!ou{ksXR_Bo!H`~LY~1hWyvGxW(!&YvjMG_b)_Na>e%;Szp#fdu{lyYU zBph#v6wt|+?rfqbqE&c5lDjE}SzbdPt{6ldT|!$MZT~foF)?(95=|H*xas_LOa1I# z`t`LT!92HCmu6^)W?LpKF9+j1LHBCicP%XV?mRBvQ(s#oo~r$`Btu;K9l`h@2Kc`- zzDl&2N~XD@H^Gb!lQT?xxX`*HiXercPr&HS05RxNs%G7GA&$|(YjD-KqRcme{eTpe z1&=LBfX>RX^W5s_8*>8qPlzOO+`8Se_fl(ajJ2)}C!4QPC>WCV<(G-vqch`_tzrY4 z*tI#=vuA`{(nXTtUV z4akZ%=3*e!y_1T%vlTEbp76##He$j|M zErb`#1T}iqzketD1KNq8>28?u5+XkXYPqdmC7l~@WI|_^JYUP+O>&wUivvIswb-m! z8)V;0q11+SDw9V|boiB>NZFa27pQ*IIaOBYIoBzJ~H+-Dw4Yu~q zU!9hBA!LxbKrSO>bEDK;+}auvs}2@Jc55Op?+4IwzEX)tk@l+zUBTki+Teo0wl2(t zM*~)NvX6Ub3b11l697IC;8z30&nxtVbg&ACpWS+Zw8LSqV*^{~O^Xx=pPWldYP`-a z>NiY`p&1>ir%9PK18$$6e>3lTR7wJ&{>t1zG$2Q{-pB-??bW7}+jtaTTqGsB0~+ha z;@L|8%fS9@z>u;nEoGkI-Udf&`}b~s@M_%nmQTf;Q`-hMxHX*~7)L+^fT=v@AP54E zrww2jkNt5$=^Z`|KRCL$i21(DlJg+-46sk7izU96y{-4}#*2MhgSEsE4K{dcl9)Zh z^^?<2@Xc16QMblA<|oV!H?1Lt>4@~N~Mst;ZC#*PTcg9Rj5fq_SD+u@tX zEdA~E{fCtwG)JL<4-P8=_WUXxfB&bS@gs}K{%E}ry0(=g_rcgWca6KANgEMc7D*pCQU8dPdH*%d%;LJC=%UFY8 z{XhlA(sDQ+;UNrS^EEg=7EAc-YO!qM_+#KgWFI#~k-J7?v)hXrOZyHrCZL!nOJetS0<=>-f=SRv;Vw$r}uY+!P>!(%avWEw=4XCvQo{#Nhj}!Gcl}6 z$YFOWrQ?tZX_mr{;52;vTj4!A0EDYK`kt;?afS+rj;L++(ptG|nOiMuDJRwKYa>Yg zInbF+sOBIY6@PHOXRfU7|J6^w|KxKGGIJ_HtHCAUt%)h27X9Vlo;Lo+e||UyXbv#k z7!V5g%8gC}@Xs^+%$5Pye}dOf;x2SeXut}7)gc zyTn=-I96Fk@XEoRdGPhXF6QBOm)c)TD%0z(KtCajrAlAAQHEd^{aJoJ>^h=UCEV{V z1zsiydJ<7FU^bpZfDn@yXGs9@4ijXBl{3Z$^Xx{!wO3|)i9>5)EpQo;o@mWpE?GDl zClsoKzy@3rkduBC1EdI07i%tnW}md(5ri12(KKrUZ2)Kj3_$4|1iL(uUs0Y$MAa_5 zC^WQgI-3VpN>V|`MMU*Nbg-UCF3Ei3O5Mt0ra8L7vfB>ROMI{PVl2AQA!4UQPbRWK zoYS-FM33JEKvfuFL6{ul?}R3uyEhn<6xw+d;xAPx533&RHSPvA6Q@T8j1s2vJ`o>h z-_IBQS?lPPN!`^hn=jsuKO>Ok_3CbL(J=-!Q@etxwa;e@h@QV~siO=&rw)P4|Obao3){#@9CE-x73e)^c#~?_l&_Dx+OA5frir9+tJ$vq}G(&3m7| z^8@sHB`ov=IHAwPgYN7BL)O(yl}hnI2r~4*HW|z7aPxp z>q3!YL&qG{YY(2QOhxK#cH<|Ob50Tl1z$$)qMQ{TH^#H=%Dg*TpB>*rCQp0<48 zeY-j6-yc3<0Zs8I$tTQh{mQX}DuIKSTkxNu{l((W?Zk@7XYL-Hyo$dY46=O2N>I{` zG$M4FedQCy$i^ZR?q2}+6SGt25k>wSQOmuJ)9lPy*nmGzCiknXTkLx-l6?1UWzQ^r zI{Blme=wjgoL5nKznox0sLj5vsi*?UyZFW1hz4#8ZCd>^ZhEu?ugrxv58}Acx*dU_ zl4IxZ3^mqB1Tjic#D5&0{7%kq#dalBF`4@gFS2EMZT!;ugjpD+2VI{|$}1j~DQ;oS z?mSyVr`=fS%<`k7PW-M4_C1}Uu3TvI@_Hk&cX4w%J({)Ax1D>ScbaWar_51%oEeU8 zLA%1{y31fO89oi5Uk9msMdi@006}ltXWGVsvpp;VoXzzS1qergmU8$4q1>E_nY4Wp zh4;O}l}Qq*5z<^OkmQ=s24>)G?Qvpo#IW}D6!NnI$Fnux?ob{d6h6@uTt(|%EO0SY3-M^5TEC9-Ix~U z70n+w!v~2P?j!Rw!(1IS^{C;O0#!t^~ z%iG|2w**ZN$sgn8Wqf#w2;Nl^{Ntaet}t&5gtV(+nOZbJIPvx*K>=4*JQ29+l3@T| zqM`}QEg-$@Ec=}9GP!KucFN7tO};Ok-hZ7;g?lU$EDU~O#ck>Iy*G(>AA=us=`Pw1iz49+SW7Y>&NGyrAIjrAkew$I zsaEtV-g_FAn$e($M|f0wV-M4?SF;Mg)Aeot-7g*a9~s>d=IWO|DYp;87=SVhg9rm-Zea0;aO^wmQeH6s*^&Q{se{L<<&UYTA!MQJ>~?za zoHy593s>VOS0YvVWnJx zPM7-194$x`h#5!%fjFf4`7Q#ll=EU9a#tGf2x;`An7c|H7?s4(YQRIT?X3G;Bl5#x@ji)7E+A1^aQw2vrkxY;ZlMdS0}!_ zzjWW>)aLbRk#GjfkMYD`3(bw^fnw0InDCfsby9t`d%npY*qy7UvHL*M2aG%KD~U#x z`!81U?q!6(%&vJu!L+S>TfWYT8v8Mm~wQw^QFWr(TSsHW992wh|IzL{#)W z+2%a%7PzuNQ3y!~$jIQSf)yzZr6E1tQt(RhQY>@&GLeSF=4@rpEhIj93~{in@q>tAoi})+o3! zdxwuA=lr>Or`QGiKkiO?%Y7%_&(1+>pAWiX542^-_|Dk9gS!xZ1NK-ZeqnO&-5T68 z|3B)xwDpbY(%A?=G|?I=e1+(!s}J5%i|X;P_wjkvOHqGCm;Rh=)^CHRf#31WRU?hWHmr+rg`u2W7GlIRsGr*m7_1l=@L>}-x{obn3+G7`kzleT*GZ= z^43Tij;f{7p`(7ZLPlu~n&#j*l?2bp5gU~7WeZv=V~G{j@@kj=py@3{*YOjzux@N4 zqc6QE@20cwv(74jX%B4)Y&Gxa>(N*RPB%OM;Ks52Y`s21KiKQP>`%!lgdHCm#f&hS z*}OYhq$jK$hsJ1ncXjgSX=?Ac9J2oX9Nn+|!%xPy?qAe>`|s{4ZMU{DBa45Uyfto@ z-Cc-ELi$KNNiY8BGQ}pvjtzH^HqRFt3+3M4!u;Fs+Vdv`Zfq4Fuy4<1)z|;x>v5t@ z@KZc{|KA*cRU1TM*Fa-=Q-JWsF@pyUs00ruOg>N9M_2Rg-UH1##V31etXGL3pP$|~ z6~N2TcIc7>1P)5Eif4LVRIddGm+4<7rUXus6Vm}nAxg%JhqEh+D~U72V627O$NdMA zrVlnCxzp=gs}Hd4Z^S9S(m@xb34dNp0_WVu!BuaOA__?OrnS%?mG2#Dhlm0Q`vfqy zBNdotE4*ze6Ok0AYs;`@(;lE(^0H!WnrR~~z}moV211RbubRhLHHwqF)9BD2Lrf%Q zWz-tS`P6J?WW0{yVJ7v-$hn3jHW>P~c(!e=c2jk5IGYh$NDym;(W|S&=f0c{_fq>7^)!iN57}fKjF8cP-DCms#iIzI6;}Zk6{oq0RHf zxXF(&aH|;2y4bX++;`KnF6hv(97DBEMnc~8F~%&zJN*99;Fk{;S$CiL!_40Cluvz} zHhq_W_=nTpJ~5}RgJWVp4g=)7$owWe6yjYZ42$4`P5kELd`OAha+YcI{- zea)S@;(!^3+v5T<@RcJi^s-{AR#sA1KyS6x4DD#*AlJzkd z$)UsasVKgVv1aH^^Np~1FF|z`F{B*^I3A5kG7)dnHx>L30^)^w;|@0ky8gFLm~UU| zIT_-Z+Iv>8cmR=MrLjN&`YbT0m{E-8q1$0v^{#i- zEn&0OyrGap(}wOl$R*_Dz(iuZw={T~$i?*Qp?872f9LAR&NK3zdgD!yUZVg=QcnGj zGzGst|I5Gnf6WyO07*O|nUtZj7y}L1E1(M|6M19;^!a%KwI?8M!%k)b!(p@_Je>w; zhcN;Dz78}fKr!P02nqm3KLW|n>%Ptc0JOQ$8ect$up2EtHxXdH5488TopJX)k_H%T zsVe{w5;4M71QA#)o|w;0#1=VKQq|~moB?tUv|2$_ksbChlOEIXQ#R3z-hf2aSD0IC@hJUu%=tiWA9{?*$-#^$eiKQy5?W2wzO4_^)E#x7zlY}nvYrYX%y~oY-e~W zfAGoR@Od*5Hjn`F{TySxqn865zdsWl*K1nR{=K=@on@P8rHs5t4|gc4gp&)AEx>ic}b#H{KidVI&*YIGGa6S&IC;J1;)mjrw-g1)nUo(dQ^Uro7A_e|Se z%b$q+V_rLTf=#?K)?e*jY_1-97%aHScw*(ay6T(2^`h}|IL#)@QswI6jlBnKch>{9 z>u~EOv-vuK6})I~k-oN^#wS7fFLn5?w^$PKVnrb5^|vdAB^1P@Q8zYE-mrba3kfQ7 znHaARve|X*_6lX(bClYfp=-yZzxpq8JKwPBvGIHj=Sq+cl02GWzd3=VArK>!Gqdm~ zfb$$w?xgP=Q}Z9;y+$uO6VZ=CaukNtU6H82{!Yo?!R6)n+%(8MU+i24;z6*GAo#44 zdTd_&n?v&NmxAAIGiBSq78qX-0d5!Bj+AK1A~FDr@zwhCgE>vx*xt3-%=AkTaTL$&d1IiTS)qd6`cAv z<@g=Dvt+`x&U*L}1Rtu$eP_MO1ZUste^P0`)DtqgSgaDc6&6xzpRtInfT3y#0}P2^ zQ<;Fg`IK$N6f7{xhTpuXG1OUYeK}yzSv2F*?SsCB(aqIza; z-ufNl2(l`>kQ{RTJg&z!Vtgcwud7S*Ydixi*==?;ByC1JxX1PMLVw;amD1KR03~Dh zjL{W%O*}3(-<3Yy*JlkXl(F^%+}kA>BU<*S5ay_8+ZkkxQ^?$Hbo;A8V*02V{fBEi zb%2eV(?ps`ZbL*Y5%w&?}L}?V`PjGs* z4JO}0mM_xan9s*z8#`uacs;&N_LoY!+`n8!y5l5!Y9SRPWd5afrTzR}Z1UlMto~aA z%t{ya7tO3VJuViu@ooqCxby0ZTrLEb9Bk%%w$UF_?2^=WI_tL?559t+f69cve1~9Z zY{bEL+MT%#K!D%@R0?y431d7FljtBNO1Ic7Y&<2`n+gn9+=(*dmwQpIJ0B&W4;y@o z&G(=iKeWw|O4<39YXanssOZYxIN!&6;|>}E)Q-A30|=rxkDD0<8%Pci%*f#z$X zc17@wm8Y4D43g#Fyk^lX?n;ogUn;BxAE%uD3RU*+p<0Ll-m8M@cAIp&%`E z`+K8GLn2wiU&1=M-bDb0d6}3oln3$v3Y6Bzt`=dPd&9K`*WnVfjXJ!Kwc&{CkT|C# zMvH)cTcK zxW;5u#*zXUEM(jT>Wfacw9K}lchC+S_r>9r3xF*)1*DP8H3C5>)^uR!k zcw}!JOc!TKE-ej(COG+shGRit!JW9_PyBS2-t_zU5lqnGjI{HC}{@_>j-~O{u{((0#bkcdFqR*zVwaG7sq7VzCk=vU=#njM+uI2g0 zEQY3{_2%Zw#S0w5mHfq>;o5R1YrYzTG?td5P^HztU*2&FtbY18Y&kU)Y-)fB30A)u zZf4Q%yKpXwJIS^akqgCsyq3CZ4LJ;}p7(#h!7h6J)2Sv1@ho4acIjfH5QSqs-H+Kj z&h3gRCN-srWIV@%I$~UGD9}p4?z#{?6sd=E>CEgCK@>JY)O5{jkOX&>00ME`#Hzy06;A0iwe9KaR;ilMzF zlZ`vzJW@bEO~6Tw{JotJODDVvC>pr(SpYI2#7`vC0i{jn1po{LXy^(YC;^m~M!BIk z&ANPF3;~Fu8Pli1-vqeVFK;D0P9OYe}=9A!`r4HExU)k!Rl~xCj<1;tnp$T18f7RA^WJ?JTuXF-WFG| zkAK>;ugWZRlbZqcIWW%^qL>BX(4WuU=c|Rm@27QNhh|`y8*A-sxUSt@pfiFp&U^EN z%H}aLlhFbbyMlhVhZTOSDI0a2{H!|sP^k^q~a|h3o2c&9%-%>zfm30?a{_2uk-iw&7iT_)h28E(m{Q-uXd#NvxM2#eh{qUf!a>V}N7`bJAUvIp*soTX^QmQ$<~- z>x4tb_PEq>cET;7HCCIod64d5F!^)(jG;!vVAsf}WGc z0HZuJ^cZq-z_6`bbcNXWIPF{Vg97lYFvNg<$@DR7`wAm+5NjNMF&FoB?7IgJd7H%% zDz$-iop;9Jd+Yk?3liLwYjJv|q(btmCS8e-ioz+!|4;X9ar8(aQagl_w@L>ZdA+o9ig^6D#^Y=CV@p&CS)11Lp8_l+Rh`s&3@f z&B5KSIewx~+UrjijMDz=dE+Zx9VFc|Oq9Vc) z7>7`&m@N0A0oJukTT-(C8*?Tb80?vGk+^uFWPSns1CDw!qF3FIu$Gz-0(#zU?8~w|^nOH0@aEqD2$u@mi{XPwEOjWO|4+s>? zWt`Pzh@mke5r_{e7!ew_pH&Ay<-N4It6?VlnB|_!!p2QduEO{RaOwZWBl~=s=7Q)7 zRoSK7gf9~x7uhaZ@jEZaQTh!3*O660Bk&U|_IB#eC36W^_aqx19{)mbe*HP}aDi9; zcnX|o@B00@CDtnSudX(H!`3lQvf8&GS%d8xYEnF@;>r4TJbt$^@0E$!+!n=xUDvHO zpK6imXdDsPNCsHcwMR1N>D`i*8pmc+(1+)|4d6g&B7&8L+?URH(igyRIIBg0D}r@c z-U02oGDS9aVWr;gT7V=`{m15$nJ{~iyxlhw=Kw1XlnH|8lh9l;;kT;)fU#aDHmiPo zvDqh%TPBS08Av%!M3l?3GOxib4jd^*8=u`d{w|O~Yb)0K8rXS&C6@ESd^F?Y`NUl9 z#n{+~r22NcF}-x}9so;iB(2tFkHz)l)rb)!@4JpgVWmXls8{3Pn?h$+-_#I>T+`|C z?SkKp_d0}JM#aRLZ1L~aYY4#f=y*JqY>ix`r3-<5 zWh^b25xs6gy(ERf>`42~%FDZ`SAwP<5;Thx{Cjz)5YF!`W8Z&B$U(t){mOQL)E#Sq z)a2*pq$DK#&KO8!S@vdSu2pA~b0CN90n!gI*aCNZQ5Db$bbv+%ZINf%q|7WAbRVre z+^`TjJm}0ab+8y zEK^ygJ%wesmI-df?(G^5P=o$-*iz|tYl+~(wIJ34zg4}$u3`)~}`&NX~jAZn?5cC0AuzG~GX}w$a?kD!qOYqBsg?1MGvjpW< zywd@+BWvg9AROO<{RyxUY1g+x?ql!TvXha<{1ArOwbeFiHOHQ9CNiWeT2c4D{a#;t zK7a4d-1s8VzOO&FnagBKSVS6%p^-TsnAr(+yPcQQ!vi8~&I3D__Le(dej*NA^7Ud_ z!^>rXH^5FV+BFNO+CoNE?QkF6Sq@u^vjQqyl4=fL$JKElY!mj|pdWwT5B{ri`oCLD zjXQ2mEd?QPCvS{bCS!=_YuLeL{Hi_klR?zXq>b%H!%DmHvi29e1NNX zoOIuotRI8bF&(C6<9EP3EsH!#ox7}ixU_S1>3E&|qk%J6HhPHyAa6&}z?+WAy*SZ= zuL2`AWeB#AHD68c%Gg&hz7Yg%-_A4rQMmaauyld3)y2|_IDbAg1fh_k2Zz&YlGT)U z-QGzR#(C zbGe6aw39X5nh6BlA1Fimpa}PWYH`gfq1;5j$c7J_H|M;*IgVi*1sJG}jhf&!tlrHf zJN2X#J6e+M&htv_eGXWvSd7mtmY{{r@5l7>G1s|p?{CFVaVzX#&V#EpUGDdi@eMVp z!p)mZa(&2xsh6!I&f_7Te55X4nywSc;!~~vKgANYOODF;j7_WR0%H(hwi*%|xL|9w z$kr_#- ze(c;!gDns6&V$~4pP6I8zVn$^3WUu{wsH~mccH1Dd|(6+)DB{KveUAQ?`NduQA2)Y z{_qaHZCJ| zcuGj5N73uqR3}w*5}VG>bPKY%Lm5^0y{;P9R1*f| zPZH2K)8xiyGvH^E->3GBbZj=IVxyXZLa0=i7ZoTrQbc+2 z9m!Gi=t7unxy2+G_w-fwIq>Yq$A8t?sd3$lHuRAQ7znsv$rytWZxKDA`H`0^VJW2Z zH^h4KyOFx--fuqXQMnZHixKlg5UqxuwP--OJ+86&R75nd5% z46ex?)P8zF{twfizxY4>cXbZf1e1iSAZhPR156#*%K#^3h-`+HL2ZP?F42Nihu@U# z%c;Zw0}K!qc13EK#RY)55cr({2SFSlw>a{Y#$&Mw(ePn$JzR0^)WyR9uQ#0(Mvj9l z`be|hVrUjYMAv$Qn1J_Fg#R{mESkwNZJJcAV88Pnd0F&}3+l2T@G%ga(c(kRJh}@} zZw8azUI935VIV+;+w6X`cPf-N?;Mz6K^)4 z%`cqWTx$D0t83`-6>y-Yj(`=Hy|kNKF(ntBb(nUGAiX9lkc1Z;%~Nhw06sXWq?&4ii#Hs{OQa2I*> zAIw2b4Ax$mM0z#hW^Lk0Qt{bk{A%r-SLu6dRWChD$In?>T&sltdJ_G(BwFHVD(@}y>@Es(T-Xq-aDd)3=d6~$DbNyDKsDi-< zyq_m`yHR5k-PPD1z#pi?3LW=PiCI$P?#0R1Z|%)&w+?=ol;(rU{C)q<&!}%@Ilr$X zorN+hu-B_z?%E&haa%YSbG_w-clAD5=+Pf*(4s~)K`?&0z@up-NpO6rm8DdpE3RU1 zEvK^fbTzJz0F!fc*d__aAuYw5AMaLmJfx}C%HuMRw9~eIz!!S##k_kr zt>GwYpMyVu*j|Y@mLmY42Xa-IhJA*O-?>V>5CRpuv~4N9;wWLLEaf88=b^o~%gYAj zA0*-_8vV_8{=-u18zR`oWW>kt3G7nwKGyl*p)~!vS76qsM$nEW7RHcG!0(aXD-M1b z79yI|69eMH1yM5)dG9>e-|AfR>wmBQ$`eZOqTIdU>)cp87MJq|f}!1P5c~E}WVXTi z>gOLUw=eSZp%izFJGS{^H$sjrf42+_*djVQ)0Q)QZv$)Jg{x+NmisZ#4ozgdMT5%y zM^%$`Uj}ND7d})8^^yJS&(Gh3=+18GN1=RS4hecQHpj4o!^ZJ`+@H8L6ILgxxl@Al z+jus1+O3oEHQ^Uq#>b}*L3g;q0uE#9;o$JE-@@q~O9i3T@nR>)D4(Z47t~_D-nG5; zr7Uox8_nFq^ro}7Vo%mObaZ!f=Fg`$0+sGfEUMr~!!|sww2(<9tz(9Ksc-<$DIpY- zRZm@k&KHaZUYpg`9TgcsV6Zn1`k=Agk_)|Q2;&PsP{KVT=Gc>Pjk)Y42=NpZV5g09 z8)l}l7C@0B1n_)WDht0}uu>)gV_N0qip_Cnv(R->CfaNYhkrO}fuC01v=``uc=hni zQy%ws$CVJBEHhPM86nVUHnnZNgKd7m)9(133ZRyk8fhfWPCEs)TIZYC?BlX`HYK%_ zbh-*g#0Z$rm!n`D0|0yLiEw*0kX+yz*?tgQj`Yobf-Z)ytV$x$d&n)nJKpxP=ftH+ ziA6$KV2oG+#BQQO$fD~<382Y_;E`h-^=to7`kO=KV=Fex z$dXT8ZM|6!DEzId6qY6(Oy~b#ZTPvh0YS`CZZm+4k=rTg* zO%IQboTKqs(tOPC%bB}bIu|cE-&j&wTc|i2Cy z00lbo^@1*hIl1SaCZQG71k5zYMy}CjO7~{dZjnjOyHlN+00dBMm)pP?AcU9zZ&}?V z?QS-qa?52@S)6-?JY-+n-h+1DkdLugel(3d-bPq_a=wcBp#Vq&T*9zxs+2FEcdrAA zggtu~aV{rmCZWvpGiEC2bS0iZt=WyxB=M>YntsQ0^H7}FI?vZLzgafo`yZ_L!|0Q* z6N6m@;M#uBxM`H);AA3EyIt^7Ggm^y5cflBz6o>}YP_Lk(!~6@=s_zhOsb?76?#r& zqt|i1mc2q&_n-?Mm#hk1{yta=yoxvYWxOj4_T2)LVp>mVac*cF5A=n|mS3){sat$;(O=*bNpCEPh zBId?No5g%xfGsT+N%zu=2=#wGM(#3lZw3}iz^g^wW2aA1$;O1H%VjiwHaFvltZpqc z6K0ki1oAG@yvSM)>Uf({2fJMAh8*%}nYu_0<8Kl4BMkX@jH3GI=s?VD0jO{FptRHc zU6%St?SV;<7<@H~tpfy-^hZU?p-~T{t ztzdxgkwQSWqrB(1Iq4Z~iyHaAj`$xdf|C0ZcpU7X=FYqL(bwV7wecKX#oR5C9 zxDzbAXKUb8z801{h+@V|-@lnXD|Zcrc|F!@L?NvF!mzeWJrYc$6n?dA#9-^y5{~D~ zmIl`{Od}B{`(}fE{#ob!Z+3zA4y{A$%J3Y>2F}78hW=+M`Gxw<=2Ke3CFtI!n7Kx~ z>jS$%d_ut*Gtz#Qiht&wzG@MerzAh!OZPj+qyYD$9fGWr{S`fVcJaX>iqPy2C{%tI z={dM1DeHI;L1`QVC`9VE<`O!E{fv6&)cK9Hy8kqG{I0-regj(k#i;&!q*`#I8}Td= zf4J_2U)#OpEYdUV-nbE)?6zfJ1%xuCOnXTe8;y5;JmnmK`>nwUs_Qw0wl-bRN&xD` zYjav6s$?6Q6y2v0(4!M@d2ah!)ZVtUqggU8c1At{ELD!A@%GC;R7ELBr`7E;O&LXS{~m#m@RQ`b9w9FNTtY zrf>YoR!*#=_@7g~%j+DLGm5q05xjv-or86JUcZBU(og)Z?WQ&t*(jW# zjaXERxVlG3F+B?NcgE)n2y6qyZcDNMt~9&kXMCr4Ih|$p6tdr)V-hl5A0>WQJ=q!| z&BKJ4H^yg!0hff(^BT&0E6s=%MZOC6kk))iOm=)om);cNa<`8(Dk;$(QHI(#a5<|N zF>e6D%Q;AA!zr5szU_#BCZhQU+$FOmdeNHg>;m020r){OXTxcVDe5>4@)U@6cw?sG zAGoa_b|u$cmS;jplGO+pCEJl2hbTfNQq@CeymrJ)KU*dhe{r~MJr)iIeHU^@_}xcF zb_xz=0NX7D`T%b*#vC=Z~|=W2Vt0&VA6sJkXqAT!lKB{vpc&Jd%9=(^~>SrTsh}+>g0UN3LOB5UPXcIR5}8NB>-sTKgu{LpEx@^97fZXDc39YRsr^C z66HyRM*Ns{YHzQlSgAJ--=0E)l=7;rtmkf3Tww$Z>c_kIZF(&>B1Y86nE2R)ormFM zR_ynxmA+_x$Z2&c7>{x?HOh#)bU07|=EY2gPxDOz_LmTn6}6v`{eNnF$6nTt2rC%C zVzE5^i-Tw23qO&KAm0*TkyHS3ouC(^Fjj``Aviz-bt^*Thk#KI- zqE++i3p0Cr$Z@mx!};UU{a49$m~rf*n0uGCgfkA}QGP@147~e29;<-Aap?`U=WTro ztN)(9OibXxB=CDh&U&WzZoBpCcZu`N*O`+c#@yT9^xOW<{gt5Q(dJScgxbzX;opP9 zdue`(MJR`wQ{p`Z7SYZn9!wX|T>yV};f#rPlb_%WfFayl}$;K>ykz$rh?NL@gojX7nZow zY(X~yzz4>Ws8zo~)+tEW(pIGIdpNv+F)#y0=OVB&pB>@e<~;yt@ojxMLIA*hoLshM z{wPZ)U1MC1cqb_15`1;G2|EGW+DHJSv7wiY6$dM#C;Y}W-l7pK!6*>7{zmDfggGq0ZqxZelC+O8n zW1W>7xeqE%O+2Y3HldMU_8Yxfj;@}-+5?5J=dV|c&)PWr z8LN%)X>d_pd^DuK_7!R{jz*$Fmx*qgoZwE7jn&X~J49#=n5}ZMIr0XHy-2`nbj);i zbUvV0c&!N2ZP%>Nk*r8lA-#rMzS&LEd3a)fTm_$k@R)WZG$vWrlVj)^Z-RUro%h0p ziSqKE89QP_zIT$IGhIaAd8*Qa8rekm&itf0X*$y>e0z~9!1^l zv)&gpk5t3s=2>R?Esj$=;cp79wei{;ubCr#YzkrfnG+?O_0`A*99Dw%D-Vd>SOS|4 z)|%`NIIxC`6Vm0AG(B|b0Q2fu9h?{O+f{KD+!(*3<`kGkuUnJU>KYqZIKOpYtp?VV z!!#d<2CIkcO+?8RtQTL=2?lh+-qe++;#h>_Z3^hTU%gw(T|vKGuYXeeKq0$FGl3g; z^SFlk#AT?QK~54vn6C2bGz2>J(JgrMS6k1k_0L+d=Emh>QQ@@%)__lfG;`B3uL%Z< zD@ACfx~Q>T=S6}o4W?KTk#U-+HvjTQ++J+;=JR|sCg`$^8u2V0ITdG*1N~9+Z!b1m zEG8W<=y1~52m41In73r!un*|mx;}O`_mH0E#aQ(eP4b59P7vO{5pwI;3n`ohetR+6 zzZdjal90+u;1$ri4~xI>LI^g^s{uHiA1|daHq)(DQaLNFyB9}}ti;zsYoYw$e2C)b zhVhT7$S*PETaqz_VR<+Tlm5)~%0y&Z#*UBh7|<<3;la*=?uNti1rRM=by!-Mn%@(X z_0>)gNZBLwx4`9&u|du+YQekJ^On2Ue$AA^%Ty$GA-{F7TY6aj-OSyyMTrB^1)|0269QIz&GiMJORu9f{BOa=G)CdYdyvWYYGQj*Zz=54Fj&_y#zK9cD!rM4vzDTa_D$c$YPh+T$*l` z;J4R6IsuPREyQCoyPgrgm|xP)hE);PuCpCm25)!8V1c+kjm1K;UlNRZVTxK#QHWYm z5SBqZ=Y_eVIG|2r!zb_S##Yd_C=M-)_&KXYNC;(3qv;Z*uq41aw7d*nCt1w;Rc2Hg zs!*6q$^9d$bO(K~>($#>Y4+XFPGIvxEpe*8k1KCZgpKg~Aye!Ektz@V-Dd0$SFN-( zHb|d5#qkt4kFmTElt7y{HFOU58C@jxoxz@n51@@3ZIocl_e1hyJ|x3+S$9xr4=h9g zbByI@d41u!vDwT|r4{3b{A~a$I%p_O3H|Fs8o3+GXY2DNK};5l*m-oiJL2tC%_at^ zP7c1RED~RVD}@Y@-Wa`_?_%B8Wx+G%iX0`JWXc=WllaI|iGlkVPW-I@YNz_yO6cm^ ziA3nYjnT&do5tbTwR_P!vq4j81bsbpZ#h}?_fna+BSO>WX;1G}V`g;OLkU?;`GVj&Z?HEwA%kW%?uBuDu9(4IS zyAIo9^3?MHK`7^d`W(Dtg$HZ=>@3PgmMki30x)4L!1KarYQ8)UGg%HqgJ8djhA?4t zFBSU8co5Z1DecdN>HY{i0DL@ZxG@A1Vp_mqtVy~CEF%Cw@1k_A zE2*CV*{>p#C$++x&yCcN+X8lnvl`Z#u8SpRR5K#0j#vwW8%D4X=$4#VDJ&-^=?ZW% zsg_h9A!nVDl}K?ACYZ*4q4(lVG!2#X?3eDvhk;$I%?iNFZqmX1C>)G(^0{uOJjy9d zCITKjNirX3-eTErh}*H$gcM9YMCsswFIVQL3rQ?z3$xJ;&ruYgIT++~gsSH3==+ux zPiR3@62`#@4G#1hK>*jP#cZ!J%mdt^PpEWt~tiz|EM4C0>~ zzz05CQ^kBB>d#a)kG2dcI#d_kI7r${l0nO6B1v8TY#Bo!=Dug2`$Bh^u#I&;Gc@4I zgDgKjc90ywt~Tuvn~CJ>T@xSTEGGgf0ha^X%~D}r(fY~-v%W8w4Qj+~q9X2r*V3)W zPaMNk`;4hc9^iXsM=Gq94;G{31)&<=;CHddx&fn`zPHdD_Nf7Xl(d{~)lJn7oVn4{ zaB3~c4Wab3(JyQ!WA{k)RG2d9oO)hbKwo4r^y(E0eP?73<*;6$DTkxFv*d2SN%GNF zK!8a9H$(M@;~SOV^1Y5)3of#)DG6of=+eW&fvDk*gKByneyPeKq$!|eZ%%!*a248> zh&IM-^gG-WgMdQ=JIfDly2-LH3<_W@Xl;*U=dp=?bEYTDz~CO${ri zu1v-?IvOGFykc9+gE!!6)%VD6@#d*cav^5exp3kfVS&lH4U1DH{@90}k2O|JTD(l; za;+e?+c;tj*$CmL>L0^MoW$c|ydQ`G(>fK*Ucp^xfo6QO6m(|h344)7U}uqw=m+E3 zw{~&|ZkAGOaHgm*29ZQO%<~Crq`0>{df|{HL;cje`Pcw!fkJ+9PPoAH^1aX94kzyZmN;Iw z!3Y=YrLCk%=Zkt%AeCPsPSuKCFYVRDQbpDo z{CAw`C85@_HDT4D-e}?EHelCbA-as+3 znbtK;d^4tVMSLqIs(;y!Xm!*ntXdLa%tLTKBwFJt}YdmWxM(GxS{` zg2$zB%mb3rtT)~aCa3!D{&LvKtC-}qg&`be@M$dJoVNW>{poe8%J)l0;jh+oj430^ z^kf{)thj{N+Qkcfsh(P?YF@kwyu2Z*E0yL z64LjrS*wpeZXf;l{sZ*19?p?HEC`Ky)q5*y)3SzwXZ{XdY8`3zEnvV;2|k+OCGRKB zMbVgD?rd&f!prAUtJ1ARl1hs5VU~z!)Q=gfaEVlt^Er{AkC#$WQ-CtcxGeITYu)Z5 zdKUAb+mLmXa=_j&4(>!Jjhn=F5Pm=ML%oF>wJPhu(W!vuNn*jm9PJgt^LIm-Qe)bp zn_CGUHLo$3S%Ei)u_ZlI%ymN|=Fm4#E*@q`e%43RF7d4N1`s2xsQ}}6u5|k1D&ncj zepBj&a)0YiB9PNKK5B}yF{x2PVs{SBeG0&U37s)_o>K1#Vr;9?2@;NsPz$Y9(Ul|P zD80S<#i{zOQB%d3@jWN`wiJ8Rbk3`$NpX0;mvF)HbrxU!;;S!z{-6F;mjWKaz^NQZ zXl7xyqF!!_u!&elo``qh>Jb+W0PhE5fVs7io=kd#lw5!pjOmR^K|d_m?|>Ly2$Ns| z+a6l#lTzUVX)B!;7f(BFk}UAcWiaXYX_fX9K6v1&!aKz#{p*F(0kqwgKI zPefp|M55lxJAImjW)2SJEdOR04f?LX8x(675|)VAu(xgnt|Kyfu@cIIGphk%)tS2p zi{~)~zgP=l4U4iwDVfLJX=MU7qe-N6uDwq4JUPXLEW&kj{p1xNRui;^Pd87k3flVx z5X0h>Q3H3##UWIzsuMd9ywNVQaj$Df(j{m01K$oP_w5_m`JEzAaRdJ2i1BBOVYcII39Pp7`}O9E{fS)?(lj{`vZJWn|`| zd`+Fgxoq}J78LRTYs&u(ZSZq~-`&aEgoY`;cQE6k`h$b3{}pW|;T@s-5|ivfOt!=Ez!(rXAv z%*Q$fq?eq<6&IP{9%?uF=Z!yRZl@_AmU?Y!i8%zXh)_*yjJe=Fd+Ww`);O6V8_iPX z<9{RmB5$Xbc+ZT}^_Rf!*dk<4UdL8HUi1&*6d#aNP<0sbs3{nbgOu{7_27|k`C_eD z!Q6=gr_N@B5A>L7a%`;rw&Q(AVrn0PLZ8VKeKE9(^%e>qrnp$*XmJ+KBlae(r1rf!D^Z_{bqrXbS_Gqh>HjmzI(8Yfc@^Ki~KB^&4mv6T`q z5Rwf0b^6>J-rTnK&k&_J)J>-J$M?@S`>Cu@MvQkzfW3Y`53d``YampL%J%Wh!Rb9@9?53Q(5={ID1ic31t&o`#=N{C#K99=eN!6DDA|52no0MfwWo zHnr@l{v{tcA9w>|8kz@|i$G8vvZ?aId&5Sdj2--$#{Cm>@&`t}*sClbYw1p&oi7=7 zDTOvn%_2f#b1?l81YO(fB+;Bg_BqaE8M0#fhZ8sfTaGd^4H-^sU;5fA?UM5D7+N4m zOBIpwxMarXCJk$VNXE~d(}1X;Trj4h(me3!2h_NJDZHlZxlFP1cA33DWG}cta*o%$ zTG>m_=wq|6o4JPRysh^84u>q{G#L`XkWp3pLr*%RN>wM$#jIjG_iOvbWM!40l)H7; zv-}}ewY|d(paS)jxJOBXM3gy&QM|(yc6ntdQ+JTFzxG237|9cuJkP>JFG>X_mdY#% zDk>0cyq|t2nZI8uDr}O9|7XCZ&gjz3dUi`KOe8X{TLa8>NSMD=yX+fqd>29=@a8thKwEHw*`P0CXkC)45 zq8FQI2F8~KWHuNwGRsoC)?!rToI}r~k@&K3l{>nltg3hpH0R@pZkLQgo7aP29>0F0 z#AAvc=952HYcP_@E8|4ay%@51cAS+Lw32zrP@pudge}J36o|p?tBnRVZt!d@YJDKh zgurUevp+GZZa6mbHBUV+;F%ZQMafF8bKkx z`kmg_~@6nydF+{WV6eZ%Kh3b>%Ag*>sW@1O4JW2eX7Y)Z@tY*URLG zylbwI@a4q$09<~7C)aGdPhK%5Yf9A-I=lyiHUqI?Ve~8wB${8oUP#VT0?7T)9#po{ z*%hpBE=qN@r;yt{KgWij;e)eDecL`{%=l0OGh<(Wbyi{0rh`{

@6}H z(5s>)C1M#vVG1Mx(hcjKIp6}or-kg7Evx{{Iy-i@Nl#?(R{HKxS=-sV>>gIBXgLTU zbYMYIGS#4uox$QmJJ3)&xhdS|WyLfAML@d0ah4aQCHV~^ZR^B|odcjw1Ta>idoA}I z29nB>(pQGX6|>V(3TUXe#6nI~*lOm{e8~dN}Uk zXq+0;2ow>!9BOq&V+bgFR3;jhA*ewFKZVB64eWE~u7aX$>!ij9dgSFA5Rp4Y5}w39 zusbWIyHbK)EZ+F@Qg?Q|5)GykTHQ6n9%WbdNCD=rmCm*ev_r{zO7%-^v=X{;$)r;6 zdw4cU3~c+(B#GqTH0F1s-lXmNrNT%%+&;%9Mu5%3*N~g#Ez8#7*M-X3{3^zBKuLpN z1z28&ZASswc_3jsbVQl@+lu#gpekXHb$Ek6lb~=#)DUFH3>cs@nAEcy+^~Tr^O<=Q z#2~i2?8F|6kF2t4O{I4L%*>9;N-SClN$Hnzh!gA5Xjo&>QPqTz_W#dyT5my0XdvNnjBBwFR%Hf!_<%4;8J-U1S7zOPL-Rrx3a1v<(S1TYFHq7{guVsw`SU`vcSnm;Tf!? z^UMQj(v%qr0CLHU(R#{d=Lg__fpKmgk8&p+XZCn@;N#}b z!uxlol!3VDT4T0&w1Quq%o5?yxrdi1tuC_|d>IhJ&I24c2@@%2%`*;6U4(GT8D>W9 z}#+^~jS~-U|V7lgkX9hywWoNq-O>9(0po`vUA7#CB_TmwEdYQ`T- ze{*iu-o03y|F=K>TsVFU>`*{`D;w$|^9)fOPSVvPJ8X?i;G(<&KUXHN)o942Q$t50`76kI9!b}>2 z$Zn-N5W2zfq&VJr{-6<*JAH3;WJP{t%)3>&+3v;M&kX_~GRC;il-ao)f_M#b+LRJd zM38fmN4JdI_K_mp3C4iMdnI(2H&fY<(UP+z6747$L#rA*Slz8m+aRy#1v~v3IX3~R z5(-m3r}uM5F)=_3*#xl?so^E%3Kv*} zTAbuwyXV|l;X0)M~qPmEH@0MZ|r&7aHkXCK`-8Zsi6y%kqO-q$c-pg_(6Iu=l{BCYU@VAGTm$Y^fRx_piRwzv^aEV7&@S-2^8 zStyW6;9p$sMIJQ*s)ddbIfKSQ^$IE%C8c}V^<(Z`LcI>Ic-=a5FHVmz=1^(l+RWr3 zUid^3U-}UqMD;WVXxJPkGU(D;frK}Ry|t{qE~$6=(sf_^B%OFiMR+=f1kXBGRhxdd zayN#&=*sDm@2jcVI)~mYV5`Oj5YT)Amybq+ioy*ci@!zrhb(<>WblAukFsCx57(Ay zZHQD^kRzFivsKH!NYBrWMWukB1=sF4MvBhZlYxo2j^-g~+6%W8B(`rcb&Q^K#0piH zk3t*laz+ir>3W;AX^&h(Cl-O~5|kgnZ6*SUQ@WgziUVTisW6*`=>xh622lOka0^bn zTR>%cNW_L#qfs=?{>efhw*NY!6pBV=7K0p#J>yP|mZUv!K z?{^LJv;fjNTFl#xFGk2}pgvvPYHo_Ru6NIGe{)}f4@fBhqEA}bt85`ZaGl;%!3bB3 z_zxW>j+@ZZP2u4i*KgV=W)78K2a5MPM9aTnZi#D0p{|aC2@y9!w__&NR=*DM+W2@i zyeeD{(Ou^1SpD9u<>kdT+!s9*v8?tqUx;tX#KHVZgTXR&_f4o%mC#9~A3f#IK>u41 z?8l5+a0t9(qT60)lKG9+To1*H+S2yf0~{?c3@*_9ipD>bgj`?$lT)r5EwsP$Hr%F? zrw_c~>jBESypr?x=I1IKwPvviuSLLzJpF2Nz#?Jott?l(4rc}-hMEFrUCdvLcjQ~>g5?Iuo zVj%z1xLppn>E36qId^KJ_dA$AM)D*sHgMsNw|fj-5C}<`)PhpC4AxjAA-d^|FT_xB zl5zq~QRnxS{t^_AZhe0pZJO63Q4m4zW6AoBl9Uo=x(8a@9CxkbsR@DwmxD7ja`*5= zVID`J#7+2FDvDfbW)a^N>_KvIrBiERt{cFJ11XaNh}|&M+v~)>VX`23m|nSuDgU8E zefKev=CbXn>?V>q&u?Le7=fkD|24dI6}CU**~|O4)?*}$*l9;SzY1(7000GM%cFjF zch__k9QHM!MOg*FW&s5NC>>xmzzG2eXgh$5Eb$2s5*+|ao*C%X>AQdV2mjmCV_dv^ znQ^>c8K#v4vIstdvNF=WHdZ7*QK%%#Y6XC;jxE;diw~|tjioX0N zjbfKnauho+3m?e6C6~HdwhO~uGdLxtRUddJKa6T~i<8jU6dQ!$tB-<#wJO7l$i@Cp`PL*T#A&juNs`87c{iMxU91u-xVBP=#7v zm&l;u9pC`+_Jp+})HV2c?b%83J}|UG7~EnxkBm0>TbHsgQXLdWT#jPi#uVYZ&D-GX#B% zcqg>Og{;hzh2RqZ{{ndfbb55Vw`U3 zOTs1>wRWbura!wL+=gzh-T^;H{D0uB{wA;d@Zn|+jy%CZ|Hc;2##&YMUGd5*+=UTs z6*NW~;>2tPPc}1-V~;ZWCWzx_s-daept>~YBvkEhK_>L|`CVt4Xvw-lHfL$f5#HXP;wDPyN4-bA`E6jkuwjmIBO zS%b;M%9Nz`*&A?TD_bi&B7b>n7C$N}lNymlPE*P6Pt~EU52fUgj@jh z2z4lOam7S~y-sTd-7V_f*$)h;4jl85e&!{~K!d8Om;>`dN3ndKrncc%BmWWDMn-F{0LAwCDeYcOhX zV&|gJKz>e^WuC5X7ffubqt8Tb95y>^B+uaW)Une#q&)-N) zWF^uW`#dJ&KrCpAT{A^vcE{}yoy`DhHKT40s(E6O2{3ot)(SnX>iCc*nCA_HtwqXhm4qs5BZbu%?c&|Sm!?(n#SIi%XF2);Vrp5 zU7JeEhMs^nrQ_>OzWx+jBLV_GUxX8|*nVtmYl%@%lMgWNA&0-$FItyqshYu)z3N-I zMs9(EeUmV@kk_L4B;d3dim8ppEIGo1DdgH6#o%y;8hjJ$t3MVzMKo8ko4EcrCD{y2QuR1#s^wjzKj(xIIqyRbtNSjv8nU-#S>js{zD zng-*;g&cmM80LzGix2#jRvljY{Y3QmQpo=PE_OJZ(dVH_CZ$E-`1wqUgyQMb(O?Uj zi^=kcmBD^4chkuhnK>3Rmnidqs$JFnv#|irh~Rn{)i@KtH+gwJGu#w!O7*z8DFkJu zOH}3^aU-@wB6fU^FY^UYen;`-^|-i&h%Zjvm}mGoKW`;on8aiAtw042d@0WtXL=v% zHrt3f!xR=zztovbm3mx5XL z{b@R;8$-Kn12+IRVfed|+4-cxX_GNv-n~~jnVoXtz8xP>H6K|Q5H%vA)$p=8QSP9j zlZ1Vl22gjEe1|}q1EUgCF>g7p}bLPSOL;aHT6qGf<{;Ij~L_#WmmRwRb z@lV3-0-P2VGypRmWa0(LI_ABoYF_$QaB+cbWdeVsIkzi%myCPkG2_~-p3G2=R{~Gw zmp_O;I2{OXzTa^GkLbz0E_oE6&Y=&VcxMvlFBL4dj9*85bUf2yiPQv{*~0`J*r82?m8?|m zH9(}#4Vaeacn6+>AfVa)@!1@o4*S_?-a=cc8)O+@TN=;2@DuI5k<4C<;+VMLqssD_6?V}_&SU};>JkW| zVq9mTRZ@O0>S3b#^+;`n5c2pM)PrA! zh%j_2Bj9^FR)u1Nqh<_LDXLNgI=?2-oK8*4qiC1M=AdU6l>O$?<~QLvFGac%84e9c z)g*uEEb6cD5AH-Stc&n6#A=3mhe@>H=jm~^O@!{Xt}j!WcHpvNMbwCZ)e~x1=ZjA4QLp zS~T_q)!y`!{zv$?OygP&t)Kz_tUZ^KJTnh+<_q~Uo!uLIwrd9#*jKlu0_K|LP@Kj1 znmyV~uBJ|4m#&T5K`$G_$aA@s!P1Da2;_^;Xs6IvRlJ1@eYuSz=+^GiHJJo4rVPM_ z?*lQtLLl|s>(Ed3L;s6HMCVi~zMokilb5mL-@$Sx)zCHo0DuDk2%rEx#$E#GrsrAX zx}IF2fyoevWwqH26tq%QsG-5xUpd}+Hon{0nFX8XCJ;YDn0n8FbG z(viav!f`e4A|^A;9@{3o@u^DCRwA-qx2M^g9$$2ETNjnWjE&xLk4a$oL+3+jeT(V89`X02sF-Q>TU;?ej$N$h0Vj7 zEY3)})coo7~(VGS_5*ztsSc7_t(}pEN-`cc3<;hKOP_eK_dX z{2$zlHNsCq`Oa>PYwB!kv3<`di@phM|&Jlwe5cs1C)d3%Bl zjk7rGQ~%p5<* z;jIOoShga8Pn*cO-BBg<7Wi_(=)gS$o~G^z`;hAU-5Rnszi@ThEYRytgC>J2#* zDxM6jTb47uH5fNNo=TO5%G;`aE5>j)!t#=ZKjU8=2Y$#p=V=d$l0-j;mU|y%l0%65 z05O=h@DmGK+1oEHt_GS#QB~R+4(Y9X4cqiP=fT+ktR!uiD~qprU@Uro*SPh_e17u3E{P&fEQ7H@F;s%FfN-*9O|$TFNz$oJ~Z3F5}=bA zReDEajwKrAG#cNst|kPLYk*U(uG%y8vVG1Q7*01&`CV9^uzunkeZLpVCBlj6e9<3HGZE0Maj-e_ zBjJgovQ;WmV~)UIrWcM(dH% zb?5E}CNBb?%CL7aR57_P*-^@2m8Q382%$^MwPbfmT8DUJAia=;T{l;5RNf}+_ z24QMH(DZJSN3olt7bQF7RcIM_Lj(N6FD}&+bM<~v?8dz4DVApj-L;bPa~BC*Rg6!& zxr6CY&V+VGl!o10diUDlW68U{HPx_8vwC20TaLb zQLS%%H;3U20002s0d5_TOCtQ3yce~jXAGy>$r$c2MKc{!yoY%jJVuEvR&+2B10foMsN-H$|?eZBEkV?h|r>2k><<>9hrxByuraV1!R&C_jO{hDd;2`p7({* zmGqt8NWK4jj%uAid)-X|;IHzrqB=+FRxobsh7am9<@zyMJO$Wb402g21dPTL(()6h zO8W`U4HxicRiha1;<aqxS~K+d?;> z-Ug!b`Hcuf7eF?rpLByMY~k)Mk+WF-Zs6(pG?T5UhZif}=4=xlT{RqNa4qWYm-7>6 z^LF{{bMeVK_dM>~KpvC~%sN|H| z(gWx%pm_!Zej11bBm8iN(Wo#C5gdR|&}YH(fO!~S#u>_knMKUfmSlM-9A`Saa{Oej1fGzD%F1@q zgH&!VPWom!Ih{v0y-O(swP**ilA(Fm)w71|BFr=$MF%oL%x*=WJe|;D-<;Gmk_NVF{ zWzp9Q&22eqJK%e;RLr2s^d$CmdKq*^0hu?fK&B{Ra>>5FYW~Ags-y0$xTc?~sKhY9 z5q8*V4U{eSww5@-b7(_YY|Xt695P+ZzqTqL>Vq2ytj_ssEV3Yp&m%v7I#}5bkrP-z zNIcWml#pSL(Dn}O%%hy5=hyUMtD444anVsI>fTDsyb#apox$dV*l6VKchF8?n*!~A zMoA-%KU!#2Q?HkZW5wHOY~|B8V|_H82e-1!SMgx9r-5ddVkoNT`ZOy+3RliS{^6Nj z`0#YB+Q4`v>impge#8L7(>xO%K1Jv)YnYCgwecI%%P6?aO?!(>^pg=j2y=kmYJ|d} zpoU+sodaUnQ^73a&1r`29KB|f5inZ`r!h-Z6kDS@Nti};V4#?fg2_L-hurzOGx-?E zwV})BQQ~6}Y7b}YR7A!)`qb1TW!b5g6c8%@IeJ}g0IUroyY}EqES}RP6hK1J{;VH6 zHQ{usXKEo67h^|hwcMBFjOmOYGrY~E3~8hV7mJ7&m~dxNIvg6QKh^mK+u1ATgg?!- zqpwK_Or*-+2;n<}tL3JaTMSMNEJ6yT9vMXlh}y2pzkC9HX8Wf>G}CgN-1*|H)WwNU zp=1MSh+Q@qxlk5PGzxYYO?i8#_~h#R*<$%1Sv{GPWcV`R{SH8%w3))VdG_r%y0iww zt9|hE1>AKOga%Kx)D@H#o*HPzVQs$W&URU_4~1#ToRO+;6J!CooVHgM8w9X?7oQgt zLhH(f1lCUkk^((zgLnoq-AgUMy3!ta=J3n$yKi-jU` zZp``T|F1kBo;J{MRZZc;SQNAxB9ajAD&x1!47UH3~Zs}l8&JA!b_Sh z;n71t`IY8vuo5uS_0ZfPf`x2AsU0|EZv0Vta~44jMRH7w z4|I@L3cEie=fuD@3o(xAGxM|x0OD3tyly?d4*%NSmypRSpUz~-17@lV__`uN1m5^X zjq)B+pIrW|mA?-iXi#z?!Gsu3Aj$AH>!kjm`Kw#dy(jA%*vDI|ac1kIKs*`(4(~WA zKvchpr1k-P1_1d^6Yz>a8Zgl%KEp+a2t0fJvp+h1-uUDH`0k*-F$20R;xdlL?~uZA zfPSPjo8|JbV}{qd^Y(2HJ zIIGs=^GWb{HU)5GhhCg%>VB9x^9i<9n5=ax7vUZ{kEXV$W!lKi@S7?kuwYs!gJX_r3$Tw;F!LT_aN`((UqGz(kE7y zCG>`3=V4SD6xF*w4POk-?QdaOViRm1*OtmE-zmBIu8#m^cx*aHsE78CVrUOzOs7rnSM1@$m7EV7H73;goD<;#{CZcCe8( zb28JnycsCUQ7lNzH2d?2dSgUgLC6qU%pcbZ@Iq3t*6i`5`G4)b#vOM(w!Xe$N74Hn zA3zehtCp%qV5*^7*nC{2?FsnE)0#m79;;bQg|vtpN6xO6!ivTQ-1L$NX=RsmgT3Q_ z+nzuB#s67)3&e|n=mKOX=KviV4p^>k=I)`wTQ(U$iBNN5QgSV>0zfV{pu^$G&|yVI zq%{Itn+Q{E1^&(utlp}ix>>^{0`y&Lm(<#6-)_1Q)0`!6EaB591Rm$*#(OpR&=v-d z*>;155P0><)>mS|t#;u>ny6)|hZ~ zS~(6}rU&LZdkd##ag>@lORY_<-#tU;i53iljnuw-+yJBD;B1f#LMD=NMXgB-$tZB| zCUVAzV15&9){I+L>5GNB6K55^=bE}T++Ce_pKY7#O86KZGvX3R6yhSVR}KEzAQ2na z9Fnd1M%xUc8V)#u<6}$tX3BDpxpcMCF!J)ip_`-rq!Pp-~`9V!1%y_V>&0? z=m&a2!F_)!<9OI+Fx>0vA^dnuKjN&8??-hXhFyx~niunMdVR^ArQl$1CKidpdt{#I zP6;*OB;3g|xE*qzqk1BxoWIi_(?ruPt{dWCi0Ps}Wba+SI}hAXQ2|CnnKhpo2wG*N z0qeS{@qysRM(^ks2VN+Xdb@L!v;s$8!XHGmw=k3|2Gr1Rz5egra)FlH9osNF?WeaZ z2Mc7^h2-qQwFH`s(Ihclo(+Tf{4v#}mw75Tp4ezikNJ`>GK>B2ymqih0l$^?1QZU# zmDmQ@ohvB~l=^*=wJ(tp5%Lvq?0zmI&pI;l67MhsDU$Ws#em{?#gWRXFbg4;CCUSRR1GzZNy*H~uLpS4!vP1=jPSX~m>^$poNIEQBfT_NLFF_{PWQ{VN={0*XoFFzmvFotKmK z?#Kee?wvqHYp-6x#|eEvR8x8@ehEd_oM#t)bO#SkLCYb|VHFJL7voVfU?nce-IVZB z<(QA#*I%LkGf2v5iaA+`KiLsM<;ILO`$vHpI%>YMh$8}bz`xh?x-vJu@kr*JOj^CwfoZJ z)7DiyDEAUMU7fl&`PBXl1eYE5QCxqe!wIy_ipZruuya2|mha&Fhw+!!k#u*elJm`- z; zgBZj^5DiQ_nLu{zpi&fpr;Ztj8c>4;tTvF3%C42+SKujdl@3J)%ZOpNZsZBZKaN_* z;p-p_&W+8Ape-see(4SGE~@ISgbTH&NU8jlX0PQ2%yFQ>;`I4p==-0^C$7K3hUSB| zxTr)DjV@UXBK;H=MI6*)@=MzNxO(xfwU*?!I#MTaJF>BqhDwyf&xko!dj}mRQHE-AwTy9^E~fVBXN z251{lP{09zxY2pBwAF|^?7yCgycdWRa_xSdQM^!Z*vs6Fd0#jF#ee&6)K()4l{@jP zH6`kIj8RFa7Z40;PJP$?hqmy-_>dmgeI;(|r?Cz8?Z{gVLkVYeq9lf!q4K3|88;Y| zh?De5cm#zqyRYD*tmNoW*zM%w``{#`h87DeLT8s~;0@mY5JOB>iXtn=_#%Ubxu`2Y zW0O0uO>v!;Sz6_j{e_!M7|d2g9oU55=b_y4bZq?oqwI~yG*rGQAgyoD+8IYE<07e91}HW)c{SgJ`2HvA=3M`#-|$rljV=sjt=0 z9)Czq+O*bD3nES_8^SG{d2tx#y#9~u$liW!hvu{sa&2rEUg zHr`CXnYq~*c)=teTd&(ViCmLmltRC7)DKSPL67c zA&grB2=WT<&2Tv)Q9ulQv}C_SyTIv$dcdC%?(U|QailR7(lzpy`stH1H-I3?Q%Xbm_!s9IQe+b%2y3xRdY zGTqwIgU96b14MIbn`dD=Ja6r@;Va6oFUKK+oFXp6Jt{g^P2XbC*$Azc#$Nr-@Q$3; zaIJSd5W`GTf0*8U|4NuLOo8viDBfrZB%o?BJO*cHfltN&0RR9=L_t&(w2pVsR3WjD z{}CjdB-Vua$zLyo&SD+8`b&|hjFWt7 zE(EG!OXL&=-wN~ulOFV4mougm9#VY#{)G94NNW3s+pXMSsnHT+$pC-nj2N$-Roy+! zK38Z*0ioWJP_W_@bdH4{8c{Vi?;R@)0l?AJ;@FCZZY8)wUuyXLXQ5=fBM6HSku<=? zc7p$<$R_PMJBqxf$LUaa>6*!A>9QQ6+LNt{Nd^pL=zpnXsK=Sr#mNdlFS^KrjJ*P- z{g`f;K;P!fj{NNe-;B68XBpQD#|1Sw+I7AR)O-Tj;Cy?s+TyKktZ4uYTn}`rWQK7T z9h0g0@xi|C{KzEl)Xo+Z6w~@I3A$?k=|5v>ITK z6*x%NeCE!IWDf}z82?&+^?TtTG${ zCUTDnp9P#pN&gK^?i;}TeCg%7uKvNcZFQmt zz0f9g*q+f83&QL1b4^*DgIDmy#vE_PquIkua~sG224)SB4bZJGha7LKs6Q0r+k3A3 z4hR{k)ATE%n@W5gY5rZRGJj^i$&4uYdL}@~_`{DTJ}vaJhb_RpRrd$&-4*jE(UgGGFo46d(Wf zlKOkc)~}yH#6e_bbGA&_=A0866P*=7DYs7;V@l|E?Ob5ZC*UPZEaXvOK3pqO*TcQ>UAJc%YEupD#&pxAe{F=e$+z7Www_H1 zaXbR1Qg++WL$bu6+aO;^Z-2=QzkMxDb^;_Zc&y%?xcYb*oN)KmZeOp4F^5hso_%xooT1}L~eer24UhVYj1a*4n+Ei0NY)vbaIjO z@x>gnXk8sdOM8LDm(;UVxGY?Nh2#hJ`^jp&(|C+k4c9f}mM*r_=(5iS&kt{si2zYFr~R-fO&pvJ3qJL|od2flqUB2a zXs8Fx!j~@j1oVN091_s2t%p+jIVs?^4EY{We)KowNnReu!WndezaouQJUWJl_eTCn z8pd240T#g)Cf`d3PA0#SjnBZi!D$z#Vhp;dQI96@ z&lV~Cb|VZqgSzne4)av*eJJmp81}vNPv716_IUo~7iA)vdh#I}h+sK8wAfUVyF!bPza(6_5|T*BT? zi6SL7M-Yu`S^S}fO9D4W@@uVRS0Xb&f0P`N@lD+?s`BaT_@(Hr%Ts}JCBk8 zppjABZFBYk5xN*|pCv z{0nADxriUIBQgKF(~Rx!kJ{thvLx9alkJY}84Io~xHYb`}C*V{lFt z&+^Rg9J_yUcfa(@_jI4K5Qa>E(%)F#htY@yUOM`^r zyR_3nm)bCWkqw0xT6XzrXqqYJ?u^!*7#dtCFdj>el-|O25jk+T@V@25ENc`LILC^a zC67PidU+?B2*dph>%WPGD`J7yF=Cn)fxC6@+_vsjiZ?MKHJ-|FCJKnqd?ky0agB^+ z<#EdV@@D_Zy~X%OC2~$xj*RX1*hTuLj7#vi6dCE_A3jGH0SH;^fE#l0^h*r5gO;@2d~mn^SEeTuf|tivIlqQK!?_?H6$+oqdRn z96f-I7V_u$%hQvjR4i^c58A-@?C@XNN?c-c=}^zoqtmnj{IwQ*BFEH#F4SXSR&W^( zcT-Vpc+m0hroq8{xx&ysMT-~_wnvLNhKN`_Ih&R`6qPYwfdqegptj06+l%0009p z2!H^rO8{E~0RPosct4f8r~~Vq=0ypp2FaCUY9U(x<=^}0U<`rj`mFEDTyRVGPE!(t z@RYX$l5fW<5Yl9jSXp*M2>C9EZ7O`@&WNf~g2mo(o8W4qS=*MxUp|iQ#Wq-dg(BI~!Vc#u!XL#TSoSEVzEh82n|Y8m!)+wlI?U;+>+`?p35@ zKQ69bJP{vVsBO5skJWU`SOmi-N0<9{;wOz5Uae_0=o@A`DD~2#!r>H}l z+4hs8Kuh>{Zt`AhryXb)+|g>#eqLQKo-K|oD&lZSS|7?dl$ob-IBhZK@MzeJBvHXw zLQAuDd*@NVLn%ZH+6{#IJYbzjuR%v{)hDg@F?Thxw7352JDrTNt(R+^KrTJ*dUh3E zbY4aIt-0>*FAra3!8i!qF>g1Piwobb=E&KC?v%$He>nStKl;~GWc(Y40a8XxDbCnY ze8K^+7&O9GYDx|gFxkA;0iObNAPc}!5Rx4k3VM*uT6h8*y6XsTv%R_u#y?#jf7!SV zB(8Zn>`!4bSDzTkWMtL4Jgz1dujfRvm-IOuO9I|Vw=E(H&{iTdSu|%JO9N8X@{l|H z+d)4drvbbU@&O&P^`3vnR~k|PVaW;8jp2cElkdQm7oQL32yiEXJ`){e!K~dzop;_P zO9DQ<5^weD_jlH&&Fkw)1byPz-vIFTGNJ=)Z4ICn!iGxs35pKtwYgQnm>S%Rb(1M< z(vJj^#%vt$$H~afjiYIghRSciU4%Q|68)<@Q@9|W2Sh$%?3gw?3wU z!yb^Jiy?Mn0>55hEBm9dweFs_Fi4lA5R5GrAH>q5WuPjb^$Pbx*!)c67xZC~RmvcFcab8x-u;JDd96Tsknrj5)7@v?z9ZDi`C2d!5;ni6cQ`4Xq13wCWj(&GX0YPk$4}I#rY(Pj1*v3Pghs5IIExbbzIi6@-QMfAkD*~|I}<-e zULfIs>L!PjylV`aAnPwvI1Oa`&)?kIxMMzt>^(c!>s$WypeuIYVpxA9jeZ@ zqjSleh@th7m0mE5)oW=HfuuUPjnG}&SnmM&-4?Mb0P^}759@SweUmaYF<^&j{}0#ca(dwF>pXK0rn z^qw-S1|wI=!9Mq;?~D0}k9SHO?^?GGmv6*HZ5b9S*6ihLd~qj$GaKO4Tc9ttV| zPy&Flv5aG+10n8Wj`L9ie!B#=;s60?067Z(9zr30afl{8YZ&dbZc;Jd&{vRvfH57C zvxYMWi<|R&lZ^PKj&LYPVsk%QA(jGTa<=k_59s-p;+%7h9o~wkdt655BpJZ@k8O zDh_h#m9268MR`;E>Z=DIaf9B9Ls-)#TFvY6ze)&qlul|C!SwkFZF9?*5Cc-YHzzL` zX>+0os}|i!t$&u2p8jfl6|+Bfm`N3Bo-cET9<(wtvJiltzwqb}Q2I2QfrM|8G(QZ- zNr#oBr59-wy$x2^e=&afyB(fh{j{D%Mrc1XMUG4>r6bTPM-JcAu&vu|(BwfXH0W!f z7IhKlqI$X~+(m$ag$lo!Zmq@+G|O8#8~@~D8CF2;F7RCpuO1>jerBBrSWmx)^I6@hin<|M? z->2MHfyxS>RD|wuX#L&A;wzwFSA0F$z4yjGHDRKtPns+mMY0p$yQiCNbKn(GW>Qh$ znZSg>kQtJD^IRhNvfIsq50$<1*2Q_G}|qv&u+>XN+~9S^3CX#nx+ zPP=usg}W#L?4h^67``)slIpuFYfKEi>diAF2#>|2%VRqpO{V3Ow^V3fa(jW4k;HwW zt8nv@u~#6O_eXs&o$ys(*V>O@2hr)v@7l2gyWxmifsE`&&k@y<2Y^+&Nd1G5}p>;!nVovE>dW&57m%qS^FsD9W@v- z#{+nnn>bc&27}w^rkrt`#VC!`Op6|htixSp3ZI+^(t|E>qsRJfg zabVDYI=uBD`0KBjbZj=Oy36kN{lVAcmG})IGZV8Ms|e0wKrJIAQ+7V17B9e<&gLXRE`0aMeEd zjgL#l-~7$TBK_7vijN6RS>7~)Ts50?8lYC6*sJeC_O~tPtd8dIbJ*)FyETep>s!Hi(Xu+Od?r(njdwZC`1jNFI`3a{rwnb7&j5W)(U z&Y9hVA)i|jZU)lY_mReN^;WB6-C?c;m93p^udX zK6if{4L)&m3$!MlCb#&T@vh%~3r9H`D%&~=n7tH;%3L|5Jo5!{8Q(bP8z{LKN>8Xh zg$G<>+?!tHs)cUL3WCW@VLp5{uIGhY?VL_AZLAZH@l`_f&7u-K?YxX;Xis9np|ZxZ zN2o=G^B_9LH4=lQ8-=2rQC?tgcqX|AI#)I+V^-i;(RV%KWX~;km`@e`ebv5L1QE$% zD0m2xVd6P;Vo}kuM(c5}FbO>c&+d7pYuBif)Bw3i64WVcd?**@c0`7B8*FX$6E=18 zH*)m8yM=~N#^$W&7pq1KPK9{c5N?;EmxCN@jBZ$u%XQ`6GQ|}gm`P7*@>E)^iPfI}&(oy34$v;29+2qb|M{m;CpZk0@2vg&E%2T1 zK3E5`ch=|#^SUpr@X?Tc8Hop`^QcE#XuzF6ht?jW1b_UixBshu{5eGdbuR?vd1o>K z3IJ${!BA;YmoT@zby!u>{1^V1}FXYG?D=;&XAgLu= z_{)^v2OBHr<#aJ_#(hDDDQ7;niM#Zd?V;7ww9wMZ?nW;77F>Cd@9k3D^u*fttE zX13`}_;|`~MYwm}tIB4PUe@|@Tn&?V-&cOQJYJ%i`<2?Ux?S3ps-w&Q@2v56=+!Z+ zCV=p}XLceo0(0wL%OnlHP4;@4+Djbw1N==*j+)&!mMV$^#$`WPtt2fvdwQ;DaDOEAMX99MY0m3bNu|u)a7(3Q9sD&xsjy_$#zMU zZtN$U(bK-000VfmPc;cs3mEzMx!wBIQuX zh)47B-l9c80{5uI&qBNhX*^uzrLWp|?v*Hwn_GjERVMPhv{tQ8=9h&3VC?t^@QsW7 zPKdln4I^j!S*+$0F`Wz_sVAhrYhdpm%n9%BM=pMmQQ^});VQ7mb?@QkROjL$gNgP= zTCUJ)pme21yXkb ziSG9U51t+!tf4Etgs2PG;l=XY=NKBn=|?M*MXMbCfUVn}JRu|6M7M5$)QXVSZ~do@ zK^TZL;mC($&T=LCa|L@wdUw8>kp07YhDoWXTFGykFW(VvpSeOx>1$J^_N z`O8X>mMGfSDNQ7wLl?jAiFQ67?qwl#I+XlrwjVPDcyULf-iul&IpU83qY6YR`HQ{9 z?$N~cPn5#P(AV>g`trF>aI~~!%91oZ1^fRsgML9wK~EeXZ8Kpq z4pHZ@4gvpjg~v>n6LCZi#Czsf1o~-oXA`ge<;}Zk{3E9K zN741w5$wcz&cwo2bn*OF7CyZ}@7^;F zdb)ErCSDghej8x94-&nL0Uh(V=ApA_h|+qDhs14XqV*^(r~}w>W$UxlpS`VRAtD(c z$rc;-Kmh8G7U9tt@;Gv9Lm3T>0G#L56F&tTp?&{Su%wBi_*8BKGe-oEPP?61u(BQ( zu|7E!!d|*rYwbjl!_Qs*?e0oPe9jA5CNqM;VopbYYWKIOKuD#hGnd{vV_TjV=EH**$>-A_UxMS_m5NDe->Nf}M_WfxDUXz{}LbSVTx2;kBKHVoj%-p2G3 zMopoK!E&^^gIyV$L=#=37R%LP#C)`) z#whf!;yo*8^lU!H+j)&}vR?E8JPv)BzKoSW zd^+w}xhKYz+XLSh{q?G3wsIiQ3(V423V?fu@NUIjW&m4QT7w|zaaQ^*JpJI>z5F{7 z@U)wycesHm<>wHlkNvUfZ4rm8{}7-&khpSv=|Xy#?X;z;cRh3-^PATk|A~LzK9`eV z@XV1?*>O`IIO2-@|0f8qx-Cl(#6WK|?|t@R#(U0Xh#Q<4iItgE)m5sNT54(3BWbj> z49idc53}aQo)`N;o;8;3u`G>7EoiB!APW*BB7-jTy@#1MGx_^|T7Gz+mOKMK=Hn;G$Vx|xC~7`h$r6tW!Hb0!5xdy z^4FNZfsGd9Wi)!vF{oFkUxMc4K(z1eW+6GvR75`Zagx%^2*4jg-7Vd`SoXjiNr~n2 zJGQziu13|1nZz*sL%RL{`iuYNKmLF0A?gD*4}{<~H`!0=M8goW_LG6x%o@fBl}QJ6 z0oQsMR1a4t{%w*^*UKY7De-k*1iBdtSY&An1Xds7wqS870D=R|NXSP+ zKr6@0>Syu~@g61k@nsbyb-|M3RxU{Tr z#@lGJEjI`k+EW+`HFg^xPM>~1wR7(sX1|9H@~KG3oqG7HZrl!co;|yKF#?bl9Jv>A z6fsKRN`u7!w@^0T4AjSSdy=W@@`bfZZVa&Jg(BFnxf zVUS*18RR=5X--^K+=r=-52iZPJW;tZWNg{LER_o6G&%CA9_W-ECS)60+)J4VGd*5x zKl-8J-YXsS*=nYjfVz^ryuy+hI=~$SX%Y35oDKW}_wzDHl#`yITdkSE&!SytS^sr@)iewdL zjKi&o8OsJk3CfGG8WOT=*UHNrm`d;EGJiPrSo(V){v#$PAf!eaaV)VZ(NY*S(PHPOk#`voBN35keTlXB{P@@(eT-= z9~m8@4`||h&-mbZdhutYh!~#chvM9sju0d=&YXyHl~FIME7aS+j>kV+z$Tm!KGLM& z0s#4b{LxpZ(r$mo2<3JI!iI)Wet5N%kxbMitH%S?=~ke3pZhP|O>87Y~|G z{`QIg9*h}N6gI|F3+-WxRe3k+4cxLnV~3&hTyy25HvQ<(8{k;{)5zsKUb6-Q(tB-Z zJ}kJaH`p6a_wHEuu}vIIsSJqBaWPV7lr5OAtAOQCwu{vHQIeu>w^0rOh(k!j-4?Rg zVoES9ghKc!frVyDKSDh62i@TQ6H)#5)bT2r^F})#h>f!hv~a+cfQ5rO1W!j^)EJM` zm3uR{J=Td;3|nYpJG2$O73Kv?Tn_fv@ZQ-vxmM3M9#8ZWy%Hox{9>QPURuy5kyo{I zhDUs(!C&O=NY#8)(hfxk8GM3U=k~```A7p?%w416G~S<;mDX$cv3R?PR_C4gJ#^PJJXkw^@M##_dkg}=>PCS zei&Yv8xv*Sh&GrmFb-Reu)el3b4QT4pkdB-Hz{yTcSVPez$h-mQ*AdCyBjE2c*@P5 zPIr#!MieMu@c7K~3w;1URwA*WL(C{29yAmlNI=`bd!gtL^+_J*q%IVxnF313LakAH z|NT*?s~wn&zdzo;$`2+OmQ{6-^SuDW5Dd0QJ!bWf7?O}{GQwU*s6R!8Q7X{YI}&)b z;ojT)$^F>&8~r4~cuV0Nl76(75r?s%dzQPpJkSTh?f2(OdH}a~*bpV^9Rx<3Z=a)V zHFrPUJwO!yuO%uf%5wpVt@4t9hejzn{mpC%lwG*(t>~2U9+CS2!lA_-wiRB#Ysp`5 z=EWvvoM*}WJ8O>3M%J1aJKxBb!E?*gCF#8kDUK;3BSda?j5VXDW9ymIu2H_z!$tZc z2R)wX=ls%R%K;*1>O`Ut&{+R666J0q;Sz?D{`DF6{%YBA`lBcTN1abBuPIT+N73b^ zUo%;)b?;$1M*qw&E2;+jCN${>D@fb!l;R9cW{VT_H^rcK=!v$0dbp8!Jdl1}^%OV( zof-i#z^c@8kt+^b;XKn<+aZLR^092fh@Zkb=FDm}zuB>h26ThPemvQ}*?oBv8oZ}} z(bVs1H$ZEhk9YbDc)hwUEHA7>B=+=NyZ-2+H25^Rt*CEEDf&)inV)_Q{dODE!_wB~ zdi%6shYt2U?`oo>Sk&;Yr#ttnS3#oW2aG(vO*4*at@ou_Tt(D{j-;e^K9uSs>zvxs z0DH50`G;1eX}`me!4KulKdwHiF?MCS(*FR8AK;X@>=kE@HE1_*dPCcNv2wWMFK@bR zI=@&9`Z=s+U8wYme{w$6e{B%AU&vV_fR2qBGP;dZ}MO z1ETmd!+W7pFq-!AeZUG8!0%qubM6N&Dk6is>N$1q=BJ1b1 z%rwiD6|dWH%xt?lsV(A|5I6DuoK8!CK=y`E8VG3sjqgt2L6j25(E65ialw+=9w#{f zx|t9zn-L&kf%+U%llkpQ-HavSu&#U2>#z^>XIL~E?f~-*jlSR*-|?&nlpFYcY6ZCuHK=!y(X(y8e%{?uGHN)6%FLAqG(b^Zw5ICFr9AMT9inMw+9VNGoU8OLa5 zR3u2$_`&Fiz1Yyp_?$42k^t8tXY!@XrNT6Z!K#enmKNgrAR*5_!gAt&w$6R}d9 ztJ3;-9%mEp{75o>ZsDa}uzNg>Ro~Lgef`g=%0q4>4>jlpt2}}J*695o;ppvA=kVh% z9*fQ8kf+1ijn?}qxnG|SMOZqYWw^Qs-!txUG)`Y@~kQSy^vCzfUAY1KJsSl0yc|r?0O;b0YC*Is{$&3jj5?k(dewl;C}b*kj%W7 z174vu>n#>qZExv9w}ZwZ>WXZM`hF=AhSWKjQX2h1WijuIWhujQIbc&AzMVzOZD+iB zdU5dpV2n%O;z zpj5hDvU_8ALSyc)-W~tL^u1S`p!{O|c_~{TU<7_Z#m#E&?HR6*E8U;Nk%* z6rDvH*-=)Op}z{om3}2u6+}AAHBO^bY)C{S#X{@)TzVK=k1E4&YJcDU{x@DGo;Gf3 zNa~#fMoHQa)qwLOC}evMrKn^m9F*fEs(k;I`;4u0SA{|YGU7c!pui1&-n@nLg?%H@ zzX)x{uRYxhB*p@*n>1V*-uBlvOkY^Nqc2t#`*5iVM)u^d4^o?fXjJi%d2L-*USU)I zVYc7*wv>9a0dkLnr;)<=PHf|wmVf-D;g0z5sHjD!1jbY*j2ONe9wc(n2DOp##~&@6 zy8#;p&au?G%dJz^4|NeAlSD--Es=qRXIB(W$Kwa;U2;#<+o8BUaH_jG<&qt!U2?q* zOGS?DYhd6Z`(a=xXe|eS6739ITd_C}sFRz#P05;KYAC<*$c1kNm|Deo%j}6n?VjrN zP|ScQ#UwsB9E2lLYo#!}h4Ts4!aPD0BSBX;=XUCSHp`~iS@gWdeE z=>PnSAvpON7$TMzP^WrekE$EFdhCkyMfhd^>c*RfQ8}F39>#9C)C1Ou@J>8Fd?(#oC60$X05SsZLxr{-iSc!XQm-VuNS6g=kdxNhuC?b}3=DdnaXAfe zp&B7v4tKru=`KW#d>u}dMqQMIr7}I;;eZRJ#I5J7kq}2N?ve$|K~e`OxrLf5%L{R1BjE8Tm`R2rRg8K#eW(* zY2^#}Fg}4VPUlTjtpMJgGAgr*wX#zwY_XM4yggO?J2d~%KvMD9A@mZJ9|1hV&D?lE zAmr_f=l|+I{~BwKWC+NF5nXU?9|)2NIw56J!}pNzD|?*Hga$*2AtEt&3BrJw=W5-^ z4K$K05Xc2U+se52hG7uk?yoYFmNl;|(o;N*W48{R@_OVTrlU=(u5RZG|&!b z*IpejI8+OE82C^zLGn=$zJn!;j%f)vFfJ$tmzJp=Y8GgKkq-Cf8tq14r%v*?C1=cK zPY1SEffMS1OHg!SrK_X+^ks!vVf7F&E|#(#@Z(71pUy1~#amf}}! zubNPhJXHM^(Aoh8S*`SS`R!gSVG+=Y<>M={*vX|5c^G#DDx#XMmzYN_?B)tzNtiB3 zT5xvkx|_=Q(gU!}-lqF22(HWm3ZYh9_5#(%=-JvLd|Z1-D*(@3`I8#Im`W*`ipJzX zT3_4~C2Rb6Yhy9VT+jU-<{PCKjwf*$8Ais)*YOB8Aol&_<8A%RR%9!KeHr}u`^1kp zHyTPi37C+5$6!~t4OKKhnm&4)yM_b#M%jJlAIz&|;@pqsx&m$^b!<2RCItH!;9q11 zRQEw~|1{ABu8Q8Q!KQJhXlQL6*^3SD$Xq%%M>+|GGT%njWE&%*fH;VG3n-bvY%aE} zU#EUTk}nMV37x)+opZ04p#+1UhV+qY8UvB z5W@Gj3G6V_-kE9v2c3^gKXGRxLY=RX%A{E}BG~yuyrNDZ5{NX^4D8Osq<6P$k)R83 zJC%BOzJ|?pFi{D3AUPWUfsyyfV7V;fZVTxz;Z?8#6}oP3``)&33n>M;csxBJs745f zS-E_xKs+UPH)1KRnV#+rqZq^XFaF2c-XDK6>?h^-N!Nk1qtP<1`OURbyg6Ru+rK3q z{1NF2_~rMvi7lDYS>_Ts%GhDc52XNBlonWsu;{UKYNxB3Bu1m==+y`5VsFEs+goLV z64Iu-SO6s8*}@Tt8`tlJD&yVhd>(D(fY1p!>~+;O28 zN?(b<+8{G4b&!7ik|X{(<$u37<-bvBt~|>O_LXhKy=jm8h7TnAJKiT}vy+Wcg554% z$Y)t9e~ajegf>BimrdbZ>dACU=9{57i^Lz z|I_1uzPc(hp#>VR-06eFdL($MV^P6w!>KQFcOxE6j&ci1x>e~3-MSB_f)fNRhyYh4 zfF(Xd;Hhv|4B}jp_lawdpoNp@$}?;DTXnhz*F5>v(Q2aD2w5?91;gs+B(yna5ndIS zLu&n=`@&zYbg?nv2Yd`D3kX_qUrs1pa`E0Cy?zKFhG7zPKM<=eRYPjf{Tn>Dl6ldP zA6EI3`d9Ze>A%#O50S;S2~hVkl$hhX&#$rjA=SXce@KYCa*;$Nx$ReE<$PafdJoFN3h^=pd?LG{0eBi_paQ>9>L%x?xK^xg9R=NF<}*m%~xRKy!C;!KT885Jf>#b39LShuRyLiy50Ev@7x|X(1+dz z96!6%KngUN-5A@)m7xU#@=ZKaq01Zh`OG9N=Z7#!e$gm4@4L#4P8QeEriR{w7UvdH zP!L~^&4+)PwQ_%`6gWU;>r@=flOg@!sTO$wKTEuOe!CK0S@C1$<|N+yMAHX=Pe&8l ztv4DL;ScEecxmT_vqkWc)`F>z#t9qJ#(R)lpM~)B_qxx5(W)+tF3kmXVBzfZg(mvxjG_cNL}T*VlQU&By-=)NcdTv zi5^T#72~BXvU4no;1a6_*OG`P_X=>mKacF)V<)d0a*~L6cg0Eic=M;Nru3)poi>)x zplJy9^h!I9-Ih`z+$*9_!jG)&_1y6Fl>rxWi-Su@SVca04oo+bGTH6f578I@^7X&^ zw+BFeg;pA@NLY+1WD|ftt@_(4s$kdBk^oIpV*oq^;8S33Q~?5Jfg8}w%RrLkW`R7$ z=`CRG5Ty3A&=G8=k(|LJsn%4^6m`#Jb*bm8tV$O}P0AJ$$n?Ton#=9zz68C&L#R`C zXCZP!3dtKpY5>l-c7M!ZX_v09)t_| zK`a261dq-zX@ZffVca;IbpaOPd^c;;3s0Xe;#|xENXO@74367?FuW zSM@rx%~Jxw&ftd8y`QPM@^GXw^IV}MT}b6^j-=bcUCdpzgx;>1?U!CHgY1I=zFaIi zp|u42-DdddFW*XQxCKXe2AYq}6%;(m(Az!aSYx=@2)R@95j9VQ|J;q-Hx^!*1D9PT zLhvAmnbSeN#H??**m(kpc3f{^c2i1V=kIUQAmL3(JA1M02qw7-3}Z%A`t)&V*6+u5 zeiWUAeDg1<;*&xBWkQTzk{X%v8;64H?t705|8Xk5w`yoF*N&5Y zxVfF(Ij=kQ@>%sbFn@acb?3wNz;-OoOE6Hyfd4jTS{D+FO+&){XoLyEOJ5L>!Fu0C zQ?u#tGjuGuB}X6~d=YuXNv8%o@%mY5SJ?bRWUpwnjRhEeFan3XjTyjf+{UA;iA||} z(d;N|k)pIGJ&~4uvpAt{!*F({4_FTdLs4$AFnG%`2vQyOL+fzhM(t8IIc!K1+~P74 zbryM0gntNQ_E*W|{>iMLP2q_pxJc@!-_ey?(p$PFosLsi!L7vpG<-M|?@u!C;sy(0 zIA(fFi^Am8@nfu#4`wXUBL3!uVd}7B8V&X0c1P=lwNy1zr<+f*&!B zj-kxyQt6$HCz(bCAzZ|gcoe59;oHjq$J(+LPZnpbQM!O$lbnBPu#Y`y9tz{5oDhjMZ?B~MN{ zHVU^SH5$hx5n5n_y=>ahS6np}#x8my2BnStJYfyxUJw=+_P$A8aBv~o9^A|DC%MG} zvJqoD-c*oGFOC<^0FNf(shM#d?yF`h6`vE}&WDoJx2k*9;SVEZ4z%3JR<G?4>-}( zR(a5RD{d7Mmt9s1euqS>+!zXW?ggEWvjs;aY^vWatDEafT1wCP@!ZjV^f$=GKhRHJ zTGd1*v^V!Vg+=Oh)Q$VYA788<{)>aFzb&SldZD&1FZ&?CI?uw#H@*Gl65VqJa>gEJ ze5yT*eoSjib1e-_l1_`wN>%)ERn>mG0)SxV1>PXrzyc z8Gpr0d=+Uv#JtgHOyw*1>Cubib_&r`Br`Y18b8Qi?^m7>&qs?~ub6gc|GIMa!I%2q zq$4r)gbd{)_(zqJXKks25x)}@Zf?zeS;g|vtG;MfHSukG{o?=qPjBah8okfpFI6fU zpD%o{;tFG_sxymXNa%o(WPd5H5|)`HyB+f>khlbV88KKlxv|wV8n)nLrbmP8*wX=E zrZN++Jz~~!jy!ERG@S^vF1eDirbfKdV_G|Udfw#m$oEPy6u*`5jg8rn9%X{T5?*qt z4d_xez)%`kE)@ax3>z*3!9@OICHL~o$2oeNg?hmgwn`lAnQ(I*V5!z zK}}@xlwePzu^Q)e1cWrTW|;bkC7*M<;^{KIhVd!`PL}o_kOjq{FL~X`V&3a)lab<}8+EhAzAm`Etq|nw3d14nc(m;$S%xP0M1 zOaXr}C?)tCHH?}kL|`h;2d-9M|5@u)Tf!_Og0EiOceoDR;jE%JggYNr>o>lKcLg zdG)ik(1OT?4sN|_&>v~@W_R4oZ9jPa7w;a0|44pxUNe*_nSZb0pFLHJ)9vv(oc(Gl z?v7v!bLHvpHe-eVAcqM~UyVm5^T~)0m+QvVUV>rZP_+>*`{3^FDEfp84tk6IOJmRRHXeW2B{bmHnrAy~H#;IeEs~e2bG^oIf~Y)lCi-E9+i}Aiv$I(!`$p@P zc9r0HN!v|nUz_0{8J>*zPM=<;20TK0gQbpz{S~j%H11X0D-~NC(n4jKX#2M*A-Vh@ z>NMR3*x$sQbcgD1laaC4BODPSOI@Qv$tMw$D!|UoR(A-S5A0ILPMnPf+uZt@cTto) zQk^yNWReNZwu{Smt;A;C9j~0KbhV&yH!7L%XKn#fTJXBkfAiD~yKBK+l&7vx%@$2- zCA{htu3APKH&_w64zCuQnzHvQ=OD0ATfklJhEb}Cxru}&phOO@E#Q`yKBs1*gIMl8 z=~AFQFBQKbq0JFG2k5nm*BMJ?fSjZ;_)h0n1G+nG0*@BP>kMDI77Z`!{#a!iL<1V; zp6rH>-h=|Zk34a2<@l!5zl)_2Pyd4UyG#4ZDnd@Z#B*Yfa;|$|KQOj{tu*E4`)HJGs_B@{ijyZehaf}BSumwySEsOOFdmm;*{M{Hs`2SFg{5>X&rrF1IqjMo(eZIS4 zb`s_NeE8wPi~s!LB>T5fA^nBF2mTLm>f~s6^y)wSrFA2>u(@YP%6v&Kck%h2iG7cI zh9ePX)Kamv4UV55zFIs3cHKviE}!#v{x<;#S2KnDO!!()!6kOn=}xgyR7G%GILYU} zn4YC#i9ZOcs1*`geQQ7O>d>Mm+K-~RV)f?pOf}L0?icmvu+7jzzOener-nSG8X1S> zEikbjMwUcw3B}7e#C@d>tMzQ=^Pw%ld8jfI9cEsnw)AKHI~B#>K1`&u=EB`2iFD)5WlpNEik;$cn-EWKUXUcc#*IQX z!J@EfOfGo1FK=ZsmoYUYhKRfkYse6k0g|tISRzm+m-*qtMZ%jezhe1HYS&Z#60mbc zon)<(2_i~`!U`=cmn7&E5Z|&U;)x7@7(+T_fe*el&72TN;kT@+TP50 zmrmJ{^GD)mVz++>WDxGem6x>#K>ED=-D;;GCZ@d^0w&g}rKG%VGG()U_3BVKKpnhj-We@p216KT3mV3Z%*^AB7t<^$-&-#%-eh(D58Ltq3`GO>Sy~1hU3D zcPPCbK0LjB->S`a7qD?~?QO5(kWRUwo)^SJC^7ojs{YMRD82-fL~Sy;leTRB2ilL@ zyN`dNd(A!ItUe%pcC})qf!d{OV71iB^PAq>%B~h12Dptx_f99KrBcboNdn)vNuZ$| zI@9M-qmLLBbFLsVj)&IOa$Q)un#u1NXiK~bY`m~5wns$(PON0Vgf&vW$UxDZ{+^hT zwdpKZ0UHx`zI9V+-#7Bm@YpUPJu$n2n#-kd9Zgmy8d$kZN=P`4fnCyacHShu+Gy<8 z@gJXk(jmQVICy6>tS7)|VVkyJT4 zkv)R|3Xb!}v%IL-=wvrnE#}675sfrgeeyU32?q-ffmP9M-a*~VrEaXEHZ#_zP zw8_eyZI#AlW9t_Bm9_joIJJKI>t45tfG_W?twhLS%edJ`R6UD&Z?F9D?))^}%$`3o zh!l|$vv1j6@^r>ppv^eD(c&`NSxBEPzzEuWodA31d=1ab>AF6~vgjR!X60)ac<_Xs zE{5z7z2+l(n(rK%9ttQ)u1Eb>H2x9NG&JUl?Qd;AV6S{9)E>So@q3Hgmwcr5@>E23 zB!qct|$~*tHerlcKYSZgD3Y`rWe5~o5FZ@1bBZ^%3qAWTQ`9|^&}dx0Yf0% zwA|@#Rf~6Yqw@pg6p3mkouUPUXSN{0M{W3NzzM}Qi_&gGhLD>j* zokoSu?=t9~}$_6E_rEQaU<;``qqF>Zu6;f&`-&w8iVE^+T@^ipT!!;^4QR zG3fzfNY5tdcYG^5O)mEq03P8-Fa!3z!H;h<--YLEx(BSTE?-@Pg$;GKlnmz|Go!zy zQe^`uv$NKa9ET$CO4yi*Iqh|K`^o)!_!^yEmbgylRr&ku!8vrXn)W7sTcX=MLxx{|6(N(+xn>|v{^8I;r)Ec3G zs_@T~i+ul!x&3@)IzLJnxlU%Xzvxl%O=L1|Fji*#`HcL-JerC2ttOW)1bR5-4B>fN z$34mnc;aSOTy2Qp&qJ+ICbhsSV_PRpasPIWOk9sMkGxr32Iq zv;{duV0M?V(pq1wBiFG=mU)~g%~OAMJCEMJZTrnmL{^deoY#oPgTXM8yOFg>G+D7` zQ^!XnC&pQ=ChNqb5l)CrhLiP`Dd{3&Z%_J&myKT*r$wh#rGR!j902w_sRQJ$^$xkW z;i}=2So|m0WwR6KW!SAbN5b8s(N3@}^-L??Hsnu|#_24+h2CCnKV{qfJR9|$7@WgM z#;WCD{P8{i>K*^~^PA6!vUAl>IuV0=dsqOu^N%&FpU6G^1+R35wf4ht!c>q>3Qyd1 zrenT{pPZ)LxVmwli@vhL>4jn315jd-nM3C)o~E=p0%scNFhIQ(95$L^fRtz=0^ga5 z2LwWB$FQ!&IP}`V9Z)yII6VdL0DEP9WlNcyH{dPCZ1Nf#?RNVQi2k)Hq?i>xikSin zVybnEc&vz%8e4&{e?U@OvI%V|tXdgcf=#wzF(c1H`Xe`Vn9V%BQ+9XKpVFRB(PW?UF$~@fLEwR`NK`CG0;X#aj zBKyM?C=h}SCyH|>z;AoM{-6K*{{jK=$AECaVjD5!92CoNq0ww%)m6e6$YYJPR}h9( z)OszRa~Ib>hs^<)UJ349qrcS@4uE3YzB*b3iLHAsGT?M-85^LZT&g`A$0~c2(1JaD z=+{C<6NW7}BZWrgTqZe+eA_#J46;~W>`tSFz?!1u)TkG2;DqAPN?Ih!2CB(`#WMuUu93Gg!rK6*57AXd7%a#q z7VlIE)~>?r zpSI2~y(H}B2RX7y-+dTTMydmWPgfSxsOh#o>{J#7dl|MaKM(Jmvro^_(duNW18#(% zr!eNjh?_=Y776Kz&o`6Mq$@w-x<`W!=%!XbE}q|!G3z9Wzn2<{2fUf~Z%37)_aewY zYQ7csx6p}Xgy2=x8m-@j-bI)3_0GzPB}6}bo}SaH6I+9WFO6AE&J7$bY!g04=}};( z+w~v(?9BOn4F&L4QP?LFuWxlOrffUVAgqVlp5gF{Dltj;#X`+IUb;s)Dq$|9h*L5c zPTfB>zoD!j4Z_!uW8`fBt%jz3m&EPmlK3Eo7{G)=3z~cLy#HA^dRV(hN=F5ZoavD( zw7Exy1cZ$E{)mst*ir5J6EFEH-*hFIOGh?dh37NS-f0QLwJhUnh7Iv)MeERU`StPY z&)*yNg1C%#n0ayCvOD;NgHOiSF*@M_MBKF2_}HP*F%)4v&Xb1MU=u_rPR~4yyP2LX zD3%lfgU3R6CTR^^nO-ok7J;1JG!fq$-bn8!<(t8v^ZDbygX7aKm0~8!-e}g!xelSs;+*;V=r-jNu5)NDZ4tx>7!OmFg8=0`H1Jw&@X^JyDKUiNatr zN*?r8CY*IS&IpGQN_o-IIQBy>{0AM+R?f}&s(Jqo`;AEYdq;@%3_w(DjCC?gwCA#{f~N!(dGks8Q8KIiRj`Ia|*fxE^|6DiK(&;t`Am0?D9CW zfk>%jtlw$hQu?Vr8ycRAIi3tCC28-@K91tJ5D4xvx<-Sf?Y&Ym`I4qMdE8eg10bEaBaBa7 z=s?BHvB^H7C(onwCufz}_9)q7UgxkBN-EARpi1^|wNi1f@O*Oc?V|er{!-+ifjPmX z6lE*OJUXheRp1Q}tLP3zfQlz&6JxbunUSdrKW6+}n84rIg$xAA<^?R_JO4N~{?Exz z3s0s=F{vNiB53~t-1sz8NPzxXRa`WNd~|zms_DLjWs+`xa2}D?z-JfW;a)o=z&+K- zNR}ahY?`o~if;Y#^lWvaH}5@jfCFfTvxAHiqUXwC7!5ldSIFoGcWZyN9{kShe(GUY z+%W$-yMR^4iSQV5#80zO&K)5S9s`{>ttMHan!1;ghE~P$O~4-$ zdL^$)YD8w?`@h@n{$r9nmsnmD1U$ZRLM&Y~QEEmbUs1x!x$HLLqiO7 z4O{Y@hf$`x+#KuYkJHW~j77#?_i-|JQNQ7B_{7@bGL!NwJPqqidkFoAK$SbOvCk#* zmLU4h7)`Qu%uSsGwM^|>X_8R5@b0ZSiUyHbdn9aUmHLvbaJt_Z&4;0QnG>Y}a*qo} zGo$n)$HM6G&EpmE)o76(8P-Z(?Ezh7IbL}jy859QBNPmMdY*RfS*MFEQPWgq2h z?aSGok~&tF#9ln^!e;Pdaq~r_Hgp6zU^oPYkN@relIvB+55N`XbA%j*o{}*So!?tY z_{+&fU;1wRSO4;VxD5e$1#N~oV6i9=q-3&6dAjaI(_?U{$V8?4|0=B2`BaDOpQ+QmgoROeiNASs2q5;#W>trwxt(TqblRs^K+69^RY z?caIlV{~fVpBn}MWDVsuhrI@Y{L5aF-7+7;O9w)j)>I(no45k+L3qO#RG#%?e8SDW zjT}diTzEg45!|UOMf`P$zuHIXi~$7S{m40S<-VJcuWnCOI^H@+v+O*y5+q9ZJuvAx z!p``nKqE%i;>0BtdHyp=S}T^F(5Onl8s|KL%hpi)DZR%R%uhV*-NOPMALfy>$B4S~ zTG!-ARM_82eT!z2&|^5zb~9)KQo#WE27VwWm|uL(Kq)6;hhXT8l|0)yi(lVrz7Si! z{=?Vbw%t+Ze3qa4n{H&Ybs~(!=j!^leev6W$G169nhmMVJTu{VE%o~Kw z66#O1{785aU&I@mIJ}ygS!cwd2qDKZQSEicPF=>08|wQk|21PQz*=`O4=czGaSTxX z>#6$2T^vombKU8s*^kBf>+txlIgSQwNZEL;W)AF(W{b1C<&! zU?KR6t%=iRW!zQNaz5@Y;X>))6!*5up zU^>CTKRP`><3iW3_4@DPYRJ2MY|o?O&Y3%4PFFkNzQfeQ$Cz z8~t!HI>tJqb_-O$KC|e2gM;hvJ66_EUJYwrW7(WSM(p2t|K_bXFTfjszX((8GUfq| z`x6`GtkXAJk@ez=8~k)#pZ;{qOZxw|I7Bv%x=n>rXG_rnuzT!QVts!xLA(Pt90k%O zJIx&yBqwUkyM8bBdIiR$(0S$=dH3cwKVFL&H^KFXr$?gGC2YnFP0Xk2`#|R&PWx!P z0+h2mOohNntewdZ#PFt*j%$-l0x?@J0`oewnAmmWF|tsN>siD?*hVkE?-i#6JWU3B zeS@6daVIC~z*vcnDftBUI-#*LPf^iGLun?h-Cu%N(^%|gfh<2`oodisYEr7Udf-9J zWWl$Q{CA#pE2wyO-1nHL?(#}T7Tr74b?L+No5|gZfs(HU?lf&baI)rB1bo-nxMO8z zuFMxw=3{87Nh<{r^sh3}bM_5@unQL>LLibMQ?QUPUO?Fgsk=oIPbXcVE*5tq`W z^Llnd;?~b%^Z^BIgSlDpa;gf`9QOp7i?<#23R;bZ;e9_I4D#d&#BOY!Pk%tkLn)YJ z>DIM7a4G>9f!~eVvC)@kyzc7@()G_Tf`cRw-iDGSdft?Sw%E{6yN?y!Zp5R zHVf`-{%wc+O>4Glbl;}b%)?)1`~MpfzW5!}pM<`B*TbJLf0)Iao#{0rhw9{EuLZV* zrnC?Wqj_bZ^$+tPFH;e_K8xqEkKRP9NZnl!q4@wEhJ-Nj4#^^1BJ3p!Z4-Jx&VQY+ zht0`xcab@I&<4%{9yj9{GmI>_X^?P8lBJ@zFW@7<*Rn4 zS?Vam8d#3ZW2zF1FTJ>jHy_+klgkJho`lQq`N8nQt*jvx!%!x2BIEaC(9-3{``7)i zLnDzCP2)dJ+P|Bh$LtV9beM+M;;`CU%3pL#?%i*rb2!FjnF(_l{hv}zMxIG0A6&Pq z`s#{^4%()JTpO+eCBy6h?2MO38!@QfRjdXXq#5q~t2<54^jzMb{z`7cgW7N^^&r>DVym8kjKly@M@cXXaL(3)4BeaQ@W2pZJmQHBz}#i&i>$oUjO+WV zIJQ}hqI1fn4sSY3vqyK~csZFIE?%2jRDY~7lL&7HOLoZ(E!*H{VQg~^1AXbjql>Ca zVe05MO0H_m=^g;Q?uC)mJ-%I^Co(DRY-`gxc*~nH7YIpge^(^CbSPB-t%=`qG8y9% zeJV?>g4j366~w*qtGSesxRy>I*C?nCjS#10!;i2)4<(L8Ew-_Ywl)`jh@HM@#o6Sh zFnRUCc0o5`kHKE4!DfZj;x<_alj;wK3z^JAb>x6*;_fpK7&AcpTSmd)uM* zxVpauOfSS?$}je+tNZgq6HJ9n8;>Sr0D{k6jXo|L*A|>Ep9;pa$*_R|AsiMl)rmw_ zlDqrbx42(X{1h1pz}xb&YWSW`hnyK#NDTgyQr``O+Vsgiw)?zHghypRs0*htjJs1G zUGA})-)cYm)H|ZIWR9onBvwc=@F8gJ!HxE!hyp^Wm2^I944P{>`cp&wdnuPB5JK*mj#p%CQ0uK zDm`^lK!Z3plfLNSq8ZBuDv-hK;Q_h@>M9a0>QrZrn&j?BN@J2XYv!%FoY#FegBI5ej!rX zyKp}=NXA9S&4VeSJF))M<`3i%l;-^moSsi<^CNG+rC#$g`L-$VZcLP6h$sUY#679+ z`a8p?bZb2Q6Tul$*fMbT^z-l9pcZG^U>{@cQ2U!<(SnlMoOHjmKIA{X&4S_9CrDyx-SITAnUSRWPnURyyLiO!Qo z@{&wNB6Gr=Nh`(VFMIL|y2Q=Uun^fi5y%Z<>SsOpbf6OzIx&^c0rop5;tX~jfy!RZ-b-FG?@IH?0bPI24O zJYw6Fr-_uCL|o_e#i0E2!eYsBWfeIeFKrZwdy?yISIfNHYz<+IL?^?6LOxFQhTuA2 zEV&#=O0fvkMCFGsl8wh4sWmBaz8!;48*#NdzN?d(Rfsw>Xh-6e`|*{lv5-{(vv2~4 zPnX=a5-mpTH^Akyh=W($+_9aU3f;e z)U;XEni87^M-o5=PCh2PfIA}cAh=iPlK={MKdYJe1 zVyqu~k(4T1t>NE~>3ccHa)+$-fGjx0@MTqvw&4?t;ARr#Cb17dcfBAd)) zG^9)U&7pU$+V}GNuO_;K93n|N4w07Wv^xh|sr&$1TZKIi{B2KrJkWJ2Fp~I21L;22 zc@doVTIO0LjAVVthR#bYHpDyNXmjHECD}WEhGee%WnzuGdA(NvF57^_Evb(wy2|3bQV~ndU_i2bmJPwn?vj!c+Q0Y}>T!n~V4~3nZi-Z-p&hIMaxSI?{4fW)o;%)^KVV zB?E(w#nXT;dczxxTN6{Gyk2n0w4puk`Q^=sI=YdS&pr5prUW}<7EV6A*t~c+g+@H3 z4F?LDe1zgt$mL0YsyISPB;F`^GXD@1Ut&+Qh%M=3cN{<))H4k|SUOm#6{VSQ=ok1*FioNd_MP#9kt4b)v1|_dnk? z-zvEtmPXYRJ{-Z;Bl2rQ`5?Y=97rfN5NFarD5S78tni)v*HJ%Lh(`yEkHBO7%QP05 zl!j2_DLn}iyPKB!IuwD`0gIx>i1*8VWG)aE_U*LvK7(kegGIO{c{rck&+ko1+o5_( zocxTcG}|wh(NGj5DbH|~#3Jbu^$Qwb)gpa*NY1Nk7C35H^opVS#AMG%UIJ?Rc1?sw zp@+2vKSk#q@04;^0)7uY8+tzjMu!2&ofw1lwH&WtPJkwI^K%^p98W8*m5#4(AT(kl zlfaBGR%cw>cML8T1EMZmpYE?NUoG=K+FQSi+~iOlTSgpN0E-Vosaxipyw1#cmIW0) zv^{?PeLDXIPJDY8@-sbPX5JvmGqfh<-P1_=z31kI|I5#!?@PTP*wRKMQ*r0F*Q(N~ zjDJFG%CK&9Et>V(TqUc~6=3!nohQM6%=N#XmgwpH8{;g^L!*8!o_%+lDs7!EtlM@d zvhAd%T4OFHv!akp?05~>sm>YHR*&6jC9qj=Pj;vFJs_?yDIKQcbZ;Tp-GY%gj|1iI zO;QAo9zR-kWLzjn&hLG&)yOdWBc$ zvFp~CaV~%%7bE3mceD8L+^*wLy!_kY-A|qDbylviQ}POcmTZ45gmdarccaM|*H@-t zY#yK$cqzSw?FS4g`k;pwN#%#aGQQ7>lzW8k`AHy4Mj@!7ygAnfH z%f`TuJ>DfIV=V$!?B_S?kM;6ZIaK^rdJ8_l4cbL<2_Px=6ap06kFD`3xvM!dST zZZ_JHEsP3fgo~c;)BH9T91%2n$|j3$Sr%t%1ht|87f1P9 zY7{#y`nH?gUn(aQ6(QP~mK`T4?pO(jVaYG0+Fy5>P$AAj4AT*zAT0?CCl$)UKk$1M z;5pdbEc9RY?rpSYJr9gmAvp@AeRCcXQ9BA<`-b%p%4;-bQuNT#XgLVG=E@rNO?1Q` z(Xc3twy4yEpXd+^uXv$%;RINNH{Lbw43FO>c)1Yw5>5JVwf3LlTXWJr)SS+^cW_RAx51+-nz_D`ghHGgB9V2$P{T_| zdxmmvg$Wl3wc3YApuQGd_s5LDa8Dv)7j@=PF)_c3-NlbWja>B}IkW<2;ZX1Z87Eaz&;ye`^o;;PBQMxo2@W$n)qzB_BWc& z5YgS#sQEYO#a{38Ha)wyMODw9p6xZF4wEf$2(dxq3^NfTwaCQ*Pb4qgM`#pg*rQD& zanpChOjS+20_g|ww`6?JdN=>pSZt?CUQ&v7s7U>eo^AbCbGE1+cx5YPNuyAvdUln2jRNZXH8X>_#fM8>6wj zs5Eku^7HM@3jHpA5!U;A7A!}8O59fEI zdv?K!PaC68tD3uow+$x`p^-9GKE`-3pL@_TO@qtjB#OTFv1yz;%t)so^ttOrLw2;56r zNG3j?CPurBjYRyag`xM5>}1v#(-@>i(yohz+oXpAsymNaQN7Pw@AQXnhm$j@+o_-Nw?}#zf&q-FY@0aZJ}TT({yhy}xBfKi>*3 zhcB6VnZsbfZ9*))4Ds*;o|aS0Ofu>9n0gY;D+_t z)7<^&cRe5?%bT|P9ppcS?K!fum3OC8w7KAF6`dJf>a{IdH~ASJk1F3vstu95h2%x! zC@yfLxjm0Xo+8rgdQkocaqQe$+hAY49BozH(Yz$a5pm zSFQA&Wv7$S62BoY9$i^FTa!zAWa!YWIb6fS*G@v92VclYD?^kXDhIzgjxW;E21nSD zp4%+&9~r6=m*4q2D_?)3uUX6>@|x5`lUV+93W|Eh1(|SgCuFVRnn2uca39uFJR6 z_OsE$U;*P?Mo!KP^WF>1T#Ni7Xtknw-Z__zdpu~)Ibe7b|1^CnPO+0rCDGFc({6cp z>uT&Krh04}$PoG)El79A(-K2mfZG)_#gCr8=K6_pUiiy2A978xdPtiU1Ai9<)yPMmpz4>$G?GVvFagb-8vrQftk793mJfzys|{_ICpLmE zaov}YmY4bH{&nvh;)k(|_jKw!oCZ3v+-hGMFCJ&tzCYIrQS4T3!*Cu0x*k{(Jc(C2 zTqTE1op#9Gr=yF$*qKcti3H-TvV`P?+sxGMEJ^IqvfYDliQlCPaCoeDp!oZ^4{GOA zDcZ<)cHPyB=qQbTM47VyJ{CRJZ6ssi+h&NxdsZb)+pa>~C6VGnvGY`}18ZbE$AMqj zkpigT-c2&Iq4Wz}L|mrGwAKpRQJ`Rv34byBC`ByFYO$epz$Y|~JK4kea|s)xW^?{- zHT5KX*YF||CFW-D5k%j=z(Y}c*z15eEmsZRB137r-X3q{2-}Pfdbo}{c$8IysS~mv z3zIiOel_lCjxHvl^5x__xuf*8T_An2?SEk{2WW_&X*MR|p^&PoZHx{vu_~@!UC_na zY;d`@Le0#)2@0#R@YfP>PxKaSY%b64TkM=j7Fn?E3d54jNf5u(4DEraH6k$k^AbJz zsgI0Otfoh>N;2g4K8K2D+p%?Jg8i@4xBsW@_~o&YdJ@w_MDu8|wX&U55zq&HK~dsD zM2)!V)Z)~udYv|0-9smLcz46(URlHx=QbQ^?CGncMCzIUZb&6cr;)?lFYN_uZZYZ3 zwvJl}6&L;2k6oZXk9jNc0G0Wnj%3!9(ASgGXW{BO^KFQDHu~MX`tv)Zk$0%EYvpf# zENA~R6RH?20oL6hB%PluvWS}s;|&g6r3gPAk*xVtfzuKWEW(+ONB`wp>&Z{e(c9!^ z&KRDppg$it9XPoV$ElTcFlbyyw8U53;)x1Q+FBC&v+{@E{#p!w%;Gl+$GLL%=B9V@ z16hM#r&e~6*P8j`ecNmP{=fV`rh>5Eih&?~hpL&7xi%xd1mH+2AI&ai^|nr~=h16% z7BVd>Tfjz=9I*f*L*6t#EYZK6W4yNm@*M#LpgaU@?Csp$SbD@ms5=+{{M%ChM?C-9 zEmu0rMKZ?J6SvjpABgNbKVzzGAPR(#d8(J4mh^A7pYfTS#xUwRQ$T1$Ix*WYt^t>MGRe{!O`jdY%d%hN9$j-=u zGSK4)*P0T=xY)9TGSJf5Jk;z1_e=U2$nzV<>g}4p2WJtni;P&WtVQ5>SoL49;5vf+ zZcMIMZh`;%0{AmMnFY zYyDt@q=8YGVJExEoAga$d);1=ARW|0R_ZpHBI^dqsERmI2y(kszTA9)iN#5H7*f{D z>P##abO@ABl*I0)ugy_xX5gThL$(g3w=l9mrDq0e*?nfPIsB{xd{rdNvF_k&TmOjF z=fPYz?u9W)F4?pl@jRXhO49s_Upik>$uH8s!H|Cf4PU;!PklD5&3hr0{=v&9xqdpF zmloB*|xUdkJC&YP4uBID=~upK9X{whMslrVHBUOP;~QBC9u0mO z3#d_KEo#QbuMd#_cCh^0Csg6Dt4+R}3d6bmMROX`1~>70Klas|>)Uq|_uhjl+-%>} z;AAwCvy~-H7+t`c2O048=41E4Z!&kpo2I`K1vJkjT_`srhj}QDAoH^XUYJZ_mQ?ij z!{&hFrW_P~&9r|ckuw^ZFVoTZ2pg2TOOM%>V*!AkOqB=RJfM#cFE{qKS+_d8e@*?~7YS}lvcCLal>PT4x&tVE34W5WQ2=d7Fi@Q2#LOE9r*xWo za!j2vC&ZK5WH>^MNHqu4J9sjr6Ug6D<=^a%?0>x({l|J(obqBp8|cSbBj{Oqmat8f zNS|nO)}O~I9({sg|2OyMGlW55S5>hFtgf-j+|2r7hz{M6XmvOoJ%-)BT*Qob?t0F= zp@GUYpfiap?IL0GX(skY3IpjE50i2BRZ{wyMBBpUOE5WN#q#`7%Td7HQL>jP64W*I zo20cGS7r@2&9e4PBaF%!zW9YdpT`!SR}LrGlP*Pma6vv#aC&b@#^+k{kCWtGdMUJq zno-#Xt`5Ci^>MZRLZegglDYpRoYgaspuQO(oO zo9}gYjY8wqU^AK<&J|%LGPSN!9J_NKp_r4e890h&JT*(g3%{p^i=;R3ro}d_=|nwD>CsfwDyZ+_KxXzH+?>AIEERtkAqhbpR}JJw)qh;6v~fkZz=HczVG)AjxyQZIgNDZ8i=g`boL4S7af2y4RFa44}5kyrQ5^{J}9Q5k*wG< zL+cH8>aq@J5j~d(8#;m1=a%$yH{X3qKu708Yrk_I4xOVi14KO1ONc; z86dylfr!L`G1qWSU7>-p39j-F+qGF-o9WXk9SQo%b%Jpc-SBxPlT*kDwxCta*}ECt zrIyUt5A9lwNh+d_#~uk*_+a7d6QWr0@ln{0%k8)X=p1#|$>Hf+z{zN^S(s$ItLQp% zJs;s%d6(*fMEV=FnQCZ(oFD*Pr5Ng|Ze0zgauI;ll#G|l77R_)C^nJoP-Dy7y_ocB zyxl^$XlS`H*K9iUmMBxSC?C&LX?{P5Ui+sDEn;G>I`2074> za(g8GxG+9fQ|qCvADBAly1JG~-VGyP7{iyjB`~dl@JY&CC3bG}uWQRc;ehWA?~#k$ zfo;%d(FdjY55wlNWaRoE&|Js2p~WLOHb7dT=${><^OV~xT95`v^7MYe6K-=LHf&~g zBG2Co6JwrXq&hai@ zI?qlX0J5hog!v}Cj;@>DbT9&H0{0JIc?(E0w95I6^`l=3rU6aTc z6zT;%eSaam(dcV->t{FP7eBgrf1E|rNQYt*i1N!HV!v3^fSEc(^R+?8g!};&`MgGb zbIT;w=Em6Xt5kI@2hZw`OCZ+7FPG8rQf7-OYB2}F_|m%)2bFYqdNpsXKinON3TH&2 z9#Ywu1#V<%uJlrB-qKUt{7iWW*1riDeVHRd|0XVN+~&v>ODWA*fVz5Ip~WFqSp%Cl z_ck})-AbPR;Y>SUL(iPg-Zy={HW2QtRcM0c-E%L7zVm!(mD!)BvfoC7Cyj=5lbMK0 zYK0L(FJN~wl(2asT>qt`Clf2ND}0VA^CGw6bAgKwN>`=PK0T4YbzXl#eQ+mgPk1uD zDK@%iHyz6TyQ{bF{8d^EX#(Pw1k70#7_AzCJ4#%?eY?3?Z3|6dd@T%-)UT#Ck$+-3a;YHfOXjB*x(*#5S~78jQ2Q|{S^QH{8)}DVl`qWaPBSNL1Y>cefQlcKx8|*3D&l}7 zIjJCzI{0Xg0>)X1C9jPeCD_x(9gZrGXMJoV8wFor%>s!Ou4#W|a7V7P!D>}}=gjdL zt+>_QeE%;xAK%lDl3vS-g?i&ce|yTEi|^Oc4Rox$i+uP=&X$gt!Pf7RPb6<9^jK!daYu%^&4*P z#Gw3`i|G0QL&BQ_@LNB*zw*naz3u4cek61?9U#b)+m*NR`gSffIeJZhy2JL!Z#m4t?Ak3HE@$A@;jwB)GW_QO@q?+dvDoMkf%bAHE8ySGezF<- zrilw-z3TG_?4uYk9TOtb3ng~2w*$0VjNtPF(8@lTa(DX7-A3?+cBeAN9u-M4n+M~I z!6fv@S@>59`f&mo)**RAb)|(aF6d&MY>RLV(}LQjXCP2p8pMtwNP(NVW`RlI@@ac7 znm8>rhV>F%o#X}yb?zmZ_A4Zv&mcQ+y>;?1xbJq4tv}jPPLGIsX6x@G%|C1zv2WZ- zf!G;LNAW5dmDl0M(NMhu*Rv00L<4pV5DpX!jaow!Ie^Ie1Cyn`>0xuGYHJgCxfYt(xgEktL=8< z*-Dm1aa9F-j?$gRQ(Li>e(RiKSJ}o)Tnqo`Z15)c5uZNeWkZ>U6)IzNZ>;!k5G8A| zM6@(LE!9@uu%YO3mr_hDET&Yk?mzt}yW-)&$j&PxHM39PEh_oXoVz=@Hz;qWpUeK! zFNW|HofMV<9h{~@m=>73VfwcMs`0n0M&VK`L+gXxdh!Q&Qqv9tGyQHG+ZKUv8-a+; ze&Le5qvQlq2!8p?zUoU)5%R{=)-x-qL_gbT`r~6 zK9;*RyU8Ra*vxb}iOjZVYqv?Q1Oq1-SPq!t=q|iYO>Yo*c@+}-QsDl+8GVGG%ryKj zUYs)^h9>&6a~A*tgi6u2VCVBxtK)JG0IV5Yy1I@I<(~?5S>u8v zl!ge{Han;bCy~_&{UyQ3nbQ>^z39nWO4@WoeFC*n;?oqSo9-o*34Nyol?Iusq|Pn? zI;C}2{HKEUEtSsr-=feOBBY$f4+yf#fFAlRE;hAd?tT6E)naM~8!7+o@96a>39QxL z2v@G5)CS7(jM_~znHB8O-S%bm$lFR~`W>67K%5kn@in>J&vq1?i7)-`0;_Fu!>cPY zUVKN4$sU$)K~j)L#nnM{@i@~ZhEE*(luaP=E7qF1wRT*X!~#-YI(xdSUR?7rG;Q_-uwwmXkc<~Pp$7}V9$(fTR?$nFn|8o7rzlmzf~Il<72# zY)jlLJC0#mJYL3@u84Ia2=KNJMuv5Gi=%XKj&d1@Qz3u88O>okWK? zQ4Tic(vT{piLmL+e0KLk>tCJv>k+T1E7gmlvF?Tt#@HEf*VRS(s4D-FK})sv>Ch?b z{7v^t+St7}>x(4Ywnn{NDf6bfT71=n%-luTShoEVYxDzdUC~Sk`h%iB2`8nJ_vm8z z!axkNJGYYC%kg-q&nw!KMC9LVi=UN#M28pJg%k;Z1AtnU?K$sG4~WoDsfDjq!~Al5 z9T-^BXaWJU0JuUTr!R!tY&bnm6889;fAc2(ER6bpz#|{)qtw=lF8CdzBRh*#B`EX@#}@f@7Ss&o zb?SE_27iK2K1>wNIpA(`-q%iO<7^5?5;yL3mW=R&wE=S!5{r>i6VM4O71FwW9*ZtG zmw@Jzi-?y`n=_tW^R!7b$KCFbn`aGRn1EM8Xd`i(_?9l@H+u(H^-^IaDR@gJ&Pi>a zN-`8X@Eh;W%<=;?i)QiZ;vG^-w}2`)ou-{v^6V4#Jy)>>N8(n1qk4P|(}DPSaS8xc z6G9|sxCP9HAx_flbUue*OYs4t!11;7u`uPvzNHPNEjE6z(rS+0vcL-Cakxp0ThQpv( z0Tg;I^Gf27cCR2D{leNz}jR`bxbN%(;s8Y>slf=bHaAz+6dLCm-eEzNDolB zldyMVD%}cEa*U63JO1T`5q}in4})05>lZAv6AlKvRXRyW(!&#cqssJWHW>;-snFcM zN%sT=t%dG8Ckyo>b4*H`&<5Ta&cXtI_94K&6Epz!xB+|NYGI%zmIvT?fedLZDnbjJ z+SqYT)tTQ#`9z~f+Yd*=^zqX!xGaFT?jgW#e`43oJ}M@{z1XstGFo3!>rtf>w-eX# zAX-t-BZ_By%c#uLjWC!jNLQ})fgbMbi~?ak-P*lc6;l%nX-^ZiFx-Uap9FRfoh)nY z%CxNXyD;`)C@&0q^G9pNS2toNHv?-I&f!nd#(Xt1PW<9ufGg9h22zC7(Y(x5HC{01 z+i+~~fcmJI9DHTABHh57RD^<*+&{y=*gW~af@L}cJ!sfMBK|ft&JLkSC_lr0NX)l~ z*|VU@MHD0c1#fnu4I}zxgGKn$7hMSYAtKY4Mp7zC;08xmg?gm6H^t`PD z;V>DMcWp{mkWzA=aW4ZKy$oNoy`*jW4UBy+V+*LI<6O2JmyRw9n)_8dZKYaVB~e+- z-t-=>{P@-KFcTgnjnw?7Lj3irUBch|-hG&4Ouajg=gABT&#zT-zAdj_v0eLs9DTBy zdNEN#M%kbGF?`9=DP!CkhC?g6z+l!$jUpmBn_NuPX6l`J>xYhde_g#*g3a3P2_l(jVpGq%oVVhRrz$+*R#d6QJib!7(S!9EcLJvF2sA6x2tCt_c}niKZND zgLnICWYO7vOuU?$JQvds=jF|W0fdcFx0Z4R!biev{YH#>DIXVH*h)21)PwBXg9DT( ztLBK0n`JqqRJi(hP+~Tf`4CR#r?_6J&l20*?a3rl%3O8ejWDAYRZkr!x0u#h2K5i! zSfMH5(u-DBK17N3w#=(nV>`rG(qm1N`Z zka;n(#Kok&w7?eA+wF526ckxLO0C>JIv{QmN?Oxi?^oyN-cEZ2Rkvf#>Dy0=iE$!R z+C~X5?_Y=mREjU+-jC$TD=Su@!F2e2XJ>$P*YBWPV7;Lax=$_)~i zqhONxz0VXv6v;1gx;~C<{(U6+SJOUnGBz`DB_d3j?WbL=7u?`7REUaG#WIK@B@a*e z&_BDLeY+JcKFG~p4TP0?Tq=ceQ#~YNTdf zAN(9iKR4?>T$40+@9%{_o%y3*m$Aoa`WTL)^~L__nEx<*V4PB8k3OQTJ(M_%YP-kE!-q{%BNO%N`U5(!FV1YHeW}S zrU9(oO((2& zsPHRWNv)`e0*%mIL^$pt_>LzGMZhbSfO$#b<&d{Xm&c3a|E6;P)w}V^z8f`quWkwq z(E@cIN}ydVzXT`mM^ioGL~Dn_9>uNoB3H3P5_>|;4#bM(0tTV&!OH}kQCXW9wQIaWTdS@riH#g*iA5U0eTNa)Bdh@LMQP*yU0y+Q60FtEv zdnU#bsbG399)B)ATQUB!BgB0rk+C*Ld4D8<_p)pe`U?Mk~FY*arI>ouQKge7mGhb;9zV{-C-74#WXcCn*c|7wd zA6V2tWR@S&X3R8L1yO7PBG!F1s;$0=-&Vr3JWc@jKpPg(&eV)XSa}-|^f=bT)ohtUYcfnbFyN5668c97zv; zfP)8eHM1I6U7}dMJvsj%9(5P42`m;A#{lDmHSSCkwfQWu=6FiA>m*QkwLFWlC#SMp+DNMzm z@Q=TWc2a)k>=xbG?rUHEDD)@4G2W7;YVo=gKsA4Mvkd)&DxE|-;Xz?h?8c8{9;E;j z2USq_b|Ob2>>O8^Fr5sn`86xkHb9+A*lKpf3n3#h>BM+SfN1c(3>}H{D+1fjgt5u^ zGCcWNuXw%=CHjDRHqcWQh!RZMcA^2t6b?~!4-ps<)wzD~i#M)14pE%tr*eQ} z50)pjC=T1jUf%*!9ROR6GN!fYj>UUyYLcEKLQa5(D48e^8hmWyD{zMAA>cfjGvn$8%EKv>x-`rOlH5)F|4H<(X3G)HK$eK6(REcyG(MWs^vOd7O)#s@DE{84D$AiEEi-6KS!Ifu#PU_QfXTzW3i zfptTXgN+Pomu4 zc=LOMC&;tGCL+HN_#JJeD!k{{!}!d<^~H4QO;rC^1N5}cP)Ipc-fq47$!YB39%ra| z1X?(_;H;6K=A&nwjT+rrMI=~Ydi~l*sPrZ`4Pr!MeE7B?{d$Qn>LPCQ_8&UL=ImSL z;Lo#WTo*~2`f9cQD9!)Xka`L{Ol$H*>hvy%ZA7JP7mq#;dp}H-(jJ2L6MB{7Y%?XL zM}!&2rjxfT6pp%C?Ha$XcR$X?Co0B@h^B#%u$N<{qyd=eGFqB1`Lvu-rly)T=;y@y!e>6rwXUJTD$!a7bm_9akQ$KS|rx1Q(2 zwKb>7$7WTbRX|eL*%XG4LLpGr54J(;J9VCFi8N`qf!#lUMvdILI|o$1oFnW}U5%bt zDGfcb+p67$RwZ@&!DpM7SJYqB`)7cGFAoSFGA~}fz;vy|B<5x`;4)STsk!`R?_Gi3 zi*K@4XE9wD6a2T|8s!aG+{kSZ>YZ)*?j0+ZP$|?Nxa_;BrJJ|?BLAuBjJ%{7S+XHY zL-pIp5q}ctBgP)H79X&eB7N{s68=B)(+Q9%>?`TwQ|a<{?UL#3L0K!D+__78u<>!$ zQ4?{MJ~74QTOgobA&1!6-;~ppFrGmzch$NVbfxS?vB zsqXg6;quqdSQju&Fab?3xQX|}!w+|2uiMj z@$2(J3pjaJ|Cu?Qa4YmKBYJEd9It^Cx&cnRwB@`FGLMJ_%uK&4B{K#PjPjG!w0Mx& zdLjN*l}PH&6m389?`@~of7GITizys~)tSfcGV_z!DD^n)zjKMYN*J}CDW{l5g1JM7 z{Vr3f%|f}n($>3NOTR8Cz%CXn3kve~Y2O%b<+g;R{kz-N1~PjGKiMvfq>C)XcJfP$ z%jAw@K1dwZuD6Fl7)uzUgt>sPe;gKXRSV4mSD_=+2CTn70`(6T!PHrFM5~a?5O+zP zUuU}c;Zr4IwpvI`JDYBo`O#vw~O1 zAYZQxO8Af%zC|_-_cn8MA1KJN&<3@R(@4hp!1R8C{6k5%3A#oP^npV;AX(drt- zJyrxH11+U4`Vq!k)mFjjR^y+~2QP2{F#(nKnFDOwm><8*Z@ZDLpETd~u*)uxdQ^v- z&BaeD`8QoVc}{_!O{5VnJM2cr?~ExIGbbRJTJ!*RI~}u626m>l8jp+n*LpMU3B`KK zYlFmk8S(?$E!w_|0HvFZ@vnv#m$B`XKlpCQbJ5ILTroF$Bn@5bHuX|K#k5N4FCmF~IgWgItL78suOiH;=nYLXKvYTmg2DpJ? zYtAyVlXjpI0jj-q^g8Yw`lyvtOiFbe!r+^hHX0)V3xe0=lQ*=XK({P6ml+xiZ$hJ)PqB zGVob6jEKV=mU2hco5;73Vig_gzo;zlj*xMq1g$aGX$9O9L3(MB7&QHJH~msTN3sV9D+j%9Io-n7wVU`ZYULoZw5%6FGESxeY(P}_kcOxA4gV`=z{ipxq z|D;?Iz`glKADFHy(T&x(7o=IuKz3Qo>gvPtlEN9|2DT9ZiOKv506jrYM7I{kAcL$Y zdLH|5nEL|&ePi`pERsM7B|hr6Md;+vD}PL6=BiRh*K9l&GeqA(0GR54p2666E$vA zeJ(1;(?3D8Y+-RCc$a}_xk={5oP99w4us9Q*sQD)`6^DBpSI(0SzY=G5Eb zda%KeERASKGHu9ffLj`X8vbTEx!n=B;MBFNEm*GY5hj)k|Dyx_TBgzT%R`5F4WnCZ zzJpXWG)oh9Fk$o$>A3)Z3S=jnc3sctb8P}pZlZS+YeUPYm@T8*!P#IUG!1S|5SRn}ok{ zTCwkc)l2wLq;sw$Of@Ge-TUkQf)+YVe?PvLhF}XCjx>zy8L0^hh|D#Iv%YJ&;RP(o zyjpg9l4>W|b9A4>k)hEm#@%X$j`s+a&G`p9^)UAhkV3S`Kh-yN%NTKqdD3yokQdSL zd}G_2YMQq*zLBF{D!Wz4uE&eP#byQ9Skmvc!zeqfx`3e<@#RwBf(`H?mx81_pU6y( zZf{MN_4&VhhaDI1AuP{EgJ}Lbo>EbNDuj-9Lg0ZTTz00E71B}kD;P}dW3Mm>8*3AA zVaiKb-R5mYfdp=_ZLSIH3PtvOb0(883>LC6Nk}gvf4e8IUazLea5!-|b6{^QW#`%3oqUOe0x+3!knw#jueRTWt;0w@B> zNo8q(D~yU4VMWN_x}P1qrG!<|4VN(=%bj#b_sCxcczIvEA10ji!4xJRG00-uo=#Me zONc?inTa{CV&NUw zdM)~kv7qM#o{H9f?nvFt0~fheQl;Te{f&iyp=f~5I%#`VUAN)G(B^QiobNHlZ5eNe z#$)v{{t`pi4EbYBJz@FneJHi(w8dpY5!I6TT&la%bCpcxr8WGAE#xGG&+6GAP~RZr ze(oYV!yl^ex=7QMql(0N@HurIQs)S7Ohv*RBOb~^`ZX}HHeHj zlIkLiMgYA;jkm0gXcm%AiDk5?EgTIrQz}Yot!RX}A)Q^J)*s#4SHxYH-N+ZMORh(u6D#U_mp_n{RcNhVwfvI#P14&Givid6` zzdTcLsC;-AbLa3*EJk$y^Y3c2X}j-O@j+#!9Z|Xn+pBw*#TSIHIXl?-oy>_Gm|Vx2 z1VAcz6gy^@bcvE66~Dwg7Dktnnq}$xRc*{u{G&Cn2V|2rnF{BCwZv)}EneI$aBwe3 z_RoJe3gFq8RdzCOk%A4(d5S=Lql!@|u{I>rolMY;OX$q~@Q25EebN+@WbuC-RbL}( z@OoLh8Z$NpT=hLVW31vp?7y2OwCFms3fKU)Rq1`zxokxj z7#{JNGT?D>v@F&Dhhki2-7Ru!$z>iom~&k0LEB&o;AeYVj|P*s@-mv;OTZ_fn_hTW zk-VD(!qN+%B#qDJ$#j12H2i@Et;9%+f1K~$y2Z*&e+jJp4FquDkHz%_fI&t#j520T zAl8swe_2kYEXlO@1ZmOFMpIY%3~1Itz>nlrd>Tc9C7#&Dq}$zi_*W4hF#L^7(Ns*) zou@bH_9!(=o@Ukn0&We^ z8D#+9 zsz?(ph4LYQ?hF0E_|J#?%iW8SXv&LS=CKw%?%`AxQ++vyM$LtaGRaVImoqVsDMv$q zDH$=9glrS4W>I(kjWK%QWH>ApSO8)H6GoH-Z$o2yYHyRvj9^VzCm-O0zq{}3o{}d^ zG8b;T#up56xR!#Nz`u*yp)ugCw6KHWCfYS$h+J;RMW=&or_j4kii+p!?Zi55Zc};F zmdWE&koz+YzHzaLO?v4yW|bJvL_*{`-~`NCtfu@=}Ruep#FMf7W~F^ zv(;l$DGO7f_gVibG#`r{jH%E}rW3ZxqfUSxb_uaTgLj|@`#Nk-)o&cTk$!-ID9K*Z z>lC6WXT^sJG~~65G^#Y=Y`Mz}XToHZ*bbLn(eK&QJnPL>mtfEYy z^(_x|gfKRVUDa2rV0(NQpqgRx;yC@%w}eu99T(b4sLXv4#URJRXBSW}Y2C@%xi zlxEDTFaT>C7w|G>HWIYH^w0CrT=(A5DT0xlH4-J6 z4ug!JPO zE~mD-(+NLM%8>z|2g0)*1D~OU8JCS14<832Pz$-)A+5Kai0T1x%3cCfV4j9BAqHO? z_Kt^M+EoBoGszGg6LU3G@$$V*xNNvkC;3+N~^%thp~w6EEPM}^sGoUpo_VofuwHCIeJr~b}V2> ztR4S3H+Zpz;}IX6?Fc2aHSPq3DA1lAB{-BU#r*O&`duAh;Gt zHSFb=sh)55GxBOR63k+?s589!kS`i&5Hs0((L7N4- z2BgmxHkT{d+L0)iz35VyWyA2>`-ckbw7`+;4sBg{1lkOD^DKEaDJ>r|SQfvR5i=~s zHM7U<$w{>{zWq0f>Yof_7yQU(* z+1YUpaeBCdl)6U|4Wz=S>`2D^s&n?2YJkX&#i0|-(k4-$5I+FlZ?>e@+Hy!oY{9J8na7JwkgW(O^|CC;CY zwx*`pJ54FEuQ!bdwWSTSxgOTrgmOF(dTRpzV2a@lINi)Tu_=m`i^vQvoi!N|gjSe> zcdVP*F|Z)V%ehjHBB*pv?Ac6wB8h9{nI%Cvamgmkm^g@!@<#N5r0H+2gY#t`Xs>MN z2~5s(k`y*68u%2|!p<;AFFYrP0$o2UuOZCzpiX7CH;O~nOs^(WqCG3ii(O!7C23Vd zwt-wd2|j`2KWB1T#hz8}pm>yDqBDJKUnFi;8wtq=)d7GS^HJ{w2ICl3%oaLY76Mq% z0j?OAm^nr~3XH_6DV9dWQX&9IUI4&7Znn257Bgu@P5=ud}u<6}X-4(JTfEQ;4^E_qy zx`l_uHZP>7zf7nJjW$FMc`q=Q0$3{+}tDH z&IGd;0?@>f)ICxnB~F`jMJlrFf3S%dlO(nmsh|`Md2eydx8&G{dFYAA8=0P&E?{93 zxf(e}EGmk;UIDw8XPzToBK@~*ZU)*d0f4TfNEQdYR!UDG_p+j`Nb7bCSFWSZXi!K1 zq|aJ%j~$P#@CvsepaKZ3L>fR~0#8Q|()rVs19)5v*c<>4lPV6U`{SkxIBlCi-|C+h`b5zeBlZ(P~aDueVjcn|u}4!&FE)AI}f zwAqc?C2XS@nZIq(3Hg#{cVXKYXtIL)n-F>vPmzN&n~mQKxrw!oRz>;ti4}jXER*S{ zOI1p(i498pb~pOLa?ojKh+P+g7f!~Nnh?@T5&O2RF1#?BA4lNq=2HD0dx!jB0bb272XJl=>)3~x(>QrF}+klZ7F4?8_|1CmCp<^YoB{Xq%zpj;GVI8b3Xo~ zIBM_>T1EOscb}mdKOd3~Z7-3tl*LBk^swklq|ySg>sOS>y>e47iwa z;@UMhLB^@j<#+tcNz{yN92#dc*v=J;Z8Z$N_1LNYFf(&%OPkg${|MLOr2I zGZ|2hwVn2|5zDN(P@C}+#Qe!Z{6X#b*}u&6uONJ&r;{A&&K5cxz_A@4XmRUB92Im? z#A1({H`C9e_@V-s9Mw4r0Opc-xNLp1lYrJ$c;9`X2!WUAb4&uY5~e(a0;vO3GMXI( zZZCRXy^JikcvQ>~MlYc-mr)rZnCS65cpwa+X>Co1YG{KkgD4su*G+pkQpptE?zsu? z3TY3Y!T;)wfd>|kGvC!gM(8n?2@94EmJ$$lp*P8h;wzxa$yEA{9SO!6HsGg%( z@t29zcV<5=6*Hgw=DV9u4?>$%tvfo5XbjBqxrOUJ&}RP{)PF7%)|%H_T3}J!^Cf>( zoX6+3d-w^d{ataCW9~Ip88(Hu5^)gL3NkEsG z@~U|^WvxSTz7n$L#KW*)nC1`Qa$LOopSCwX9}PVuxb~(bb*Cbp7OtraitNQ;qv0E9M~>QS6$X94y-bQ&oN z{DwXHk2nU!fzqniCwVv~gqBJT`OL0*v{vD!kaA{rk6#KZ{4r0#Nu;gK2v_#+XyfVD zb#uM788=7$My-4dy)1tHbd)hOa|7OB%-dtUKpSX{L>vK`92@L%z?M^X@?tU0@gFkw zMQ*V~HcR6;18XhiB&z1<24dqIozAd8gUdi9P#&=tu2+wuvcwEgnN+}H+7De89H5u` zvd{)*P@IU5cg`zrV>pGhh3^7uxH2NqZD>YsW)0)YSrVS>F$^o#B=S8#3LNbSDji@_ zsD@G*uk_Yg!5CSXJCi{_-%B}m$b8^#okiaB5I?qONP=+j!qjvVN82kf4pc|pS!7}`8w&=dYou_jjYyOCHj%uB=Q(a&s4Dkp%rtM z+)%s=Yqh_hz=GB6LQ#cWm~THG(?g*qt^2YlJli7Jx&1cAv5|;@-wHDw_DT%#!w}sP zpfF@RPWMwT5#Gekgh>XAamd&%h(tH<@BxEhY^HwWa1?jFGwP4KAAWmJ{YV)>9>yfd zv78FwFQ$=4JGPRiO5>AR_`vv^7E<8gDBH-7mfqz}Ga)npPYnPq8NuV%H+qL%@Wh*< zj;r?{Wq+?I&s9k=&Hjwe{hE67U54|{;%THE&Yq`p(^WCuIhGRgV=q;N+T zichY(N0H10MQ^c4tlk&@CPK21A*qq_dSq1AY#7<7>63LhJ>#u{?UHg&fH{R?Twvc8_M2QaT~Vxl}rZ8CWSA3*zs zF@2XDZ!}etrOOGwHFSycTsS!B^@6SUuwPsC3fi=p% zE%T-;b1GEf+Ajsr{4m+LMTtki`>Qaq%m4U?|M=B^{SR`| zqX^V!>QoYN=71Lgo+;6;Utxh;IfKq88VGCw%mCMFaYnY2y@v{As`R&cZmZSia?h^4 zF{8?;8_v>e%a2uOD=l4&MWa{PRibM*AfU6R&PQgHqLBuLjeIi;h+wd~QSCC!3!V@@sD}ASw`K zZ2Qv_HQ?NO(3dRi_P$YaH=$mdOd_ZA8#kKy^U>^865bdWPoE>gEqN~s^;6sXNKc~s zn`>%eCdnLlJv3wWJyV8){@={i9aDG9pNImB0WYjR!Sl8jRDks=;U?lqL{wJJ)F)ej|k&0@zJf1b-9yQ4hckd;-*A$p!5HS9tOte=E${VV*g z;BpHyEp%OE16mm^8H$sx|!=5Gr$nPx(pI~$V-RQ z1N>hg$los4J`GiDXfV5k)z7hCiS!%Yf)-u&S1IUkg(;s`{L2c+%eL(m*7Q{}x$CO~ ziA*PYid;xlv>AiI?~4d`5zTn7uOlEJ3HvGTy2ta3W$oB}*Cr_8lnH8g+r8W@mH0s& zHa-wrzY_}gu1i+=y`iN1G%nzm(=nZsQx^`D{UvArKRQ$?ea8&9z$C!~V?ax(R8N^Q zV5MJ~=DNW!8dBtHF+!6-I+pSi&Ym`U0q0Q_n>#aRjTR1KD^U?xNVw~qSD!Isk0pk* zk-yct`Jblp7grh;=`47nEWm3WTxxPALTZW$;~H(3jbKPMlFkV{jRmoDgWa^-Cbbn| z*RY6bp@@s>tnv&suOoTiT}Q#l-QrsCqSCDQ=-2nUuliox+co*4kPnQ0bdT}cqEN|? zL@on$x>Lrjp@|zW21N$U4T_M)R1y+`L^$e&Xl!cf_X$mi#@gPbJAr-~$FEQl z#3zJQzVRX{R2GmUl(}(*DPIIf>O$Uw6 zvfnVLJ4kAp=7ArW*aIu$N!L?o-{S+;y3lqL)L`Y#pFIw*M%%^;j1s3vh|R&w8GiIn zgJ-w7i1AV_{yDF|nZzr|q<1zBw>Du<8Qahn>wro-`V=DFRdNLD|7(#)Ip<0#YcS(L{p2t;1EoJX|gGLfI zg=GL9CcxtqSNk1@Kc6eN`5Cv$!HO1 zF}pFfutNbe8#72KQm2bOAz^=aYCI}3fA?Etv!Rwk^`%$JOg#BHRQq}mEhc{~A%Jg} z=(u<^jYmQG7Oy^t0f?-RbhVT!`||a;2;5)gw8BycEx7A5v!u$ot!Th6?_|6YAH5@_ zz1gEUrA$}?8eD)*1~GvkvuN0Q?Q)Yf+ zBH{{kDH-~oE(Qn1QCrsmngx(X^!|gVVLog~?hoI$e)E6%KVACw0b})%;LUiCyKUcI5w+LOM8tJ5=#@UWRY+R=n6qts!?iX z7Qlf2b4q+OMTvVr8rfLCIo3bg9vC&C|3%g`ln*P;ONdUH`~dSKT{_2y%Zj3+mfy)T zmdI+$?j!f1CM<{kidC2C%|8|4tH7LtMka8ZaX1|m*5#w*t5O*ALbO__iRaj*>zf#oJF1~4?25f{6y zi5n^e<;!qtwRQEtx^V^{gZ7GLqpJWz!6vzJh;{?7jgKh|=PKGx> z|ExW|{h!ygyZ={HdG=u)YpyXkW*JiC`E=NpZ*?t8{PgqMP;o+KY(mtk3FR_1{uO>J|Z1jVz&h!L3M zQ>l{oG1O<+zJ$yVAg)t`sS)#mo^Qe8%OJ?pBOnO=?!vd z?yzfy#ihR9f*Sc^PtBXs#91DK+h6h|5bZ_W!bBQv9E)N(FKt&Me+0;1D`SKC2%EjR zDx2!pOq!MA$(pzr5YZ|MdO3t0w$DP6xZdGDU5sHqx^8Dr*5iNf!T%w##GFe#^gV^Q+v~%9NIk7^=zQ;6FZkXEWHm}hUrxSNn+K#vBVFk6; z(R5?BN2FK3gAzXxSGRocR&t^Vg*Kdouj%m53;b;X-fWR41e80wRSic@5GK2TI4AyE zhd-i&e@E+%1paS2*q8gPVJ04A-uDm!TXkA~cDV>Y?p=)wDV|r3^_|@f!WmEAV;D7~ z+ay3h!pUmvbUc)S3$jM3Y)%KHv;=-FIm&2Kr z@?+dBwcc{(e)vdNw8GA8^j5$k$U+Lr`~Lck=5e#ITu@go%}#RxEG(jzm~tcK{=yup z{B{Dgo#oB)@l(?t>$8}d=_FNQM?^dPyNUKt%`XZ^)mRuzo=eW<`k;>f{>#kIw(TG0 zVrHLc+smvxBf_73Z=QaJYd0iYjD>e2WFv3nexF-D31nl*@pul9{!OvAd1OSCId@Px zxoZ6~6pjpbx)Dvdr?{)*>i$7|Vo}xUS=SThXzR}}GpDH?5S~0V_3s#*x4HIc9$EFP zAC?rKvD>Q=NjTWf2L2D9Bo6<-S;+lT@VL^G^+4cJiyz2g$E8N}{KfuZP9E~cw3`yJX2VF! zw=MQB)9?G0KbimL8?E*#w z)%q>>!r^|ARs<{3#c#8GYBAB(%zW6(ZDw5Ol5>CAvR>Y=m6}BBaDT4*{oynTsNdqNO2HMyJaghI-((d{E%jNMF3Y^izJuEzKy9tc*g5_4N^m;}@)HSmN^fLF*0%vT}Gv^~q z_5&bCpjs^Ay{T7I^D|P9b&1t8jC;A#K94phLeO61aF za_D3dNs$-I?Hc3`LoKeBJ%@CQlXE=tc_(rU22w7FJ@?4fG-2kYx%;>b^?v85HFK+o zBGT@fo1{^ilD#*~P6fGxKn_rhSvagOcE-n8&0*K z*1UizyIhs=W9YEmVv63D6n9k$O*2zutxepdLJo5rsGV;}#u4~4in!XVPjD_hSVR*R z-qxUq{Wf^CiE44|$`w9{w{91O&6M(ok@-=P5t~XFKPU~%Th2kNFNFm;UnBv$&tZ>y zYe^I<`D(MGkPv^a=4rjNrN@RQ;3R&4^7mm4&WJ!18;y08evgC+5RVN*1G|Hg_h@(- zUwzT4?QdK0;i=HQ7d|9pi(QQ((USLPu=?*k+*$ysYA?Lxu?KR-^={1sjC#WaO7sA1 z`Wjs9_J2BSe6bFnWu{J;Ste}X=x3ro?Q}aDki9>jda1!=&D@pRPS0C#Y?4J%*Izt7q`kMM-zU7%ifmAkH0f|_a)w=uLqiiHOcCuf^zPD zk#Y{EV^>IojGq$c92`H2B!P|0qe=UsM6+*S-mVHM7Wd=PXIXgF9GIHW4qFn>he1A# zpryoWWXj+?D|0uoRT+sXG~f?bsrZIUuDiY7^DSWL)H7yY1qyuXt$&N^>!p6668M&o z4lP%;V7+pcxN43fvB7MPz86xg+^Hu*Yibmx#NY49l{+-Z#jS8sb0D=zhf{6*H1Mo! z`ZsT1@58@fxvrIn)#5lU8@B>poh}A+M8yP5zprqK3KHIzV4 zt%1$L#x135-;?1%_i=*%=7uOdai`OSWwkUjO_Q^SY5`|L%LV#Fi%y-e-u^(<$QYl@ zi}SgeC1m0U@OM8ujJZ7`;2Do+Flp^1SL0Rm(TF~Zo)h+`L8|BJbCe38;Z|;@TS$0o zrX$eF7GV$X#qbmk{zDhVu@|)f_62;*RZjxr`PIuG6mA{$|59^@h>cTpNWsySKBXnnJ1`h_SCl0(jNjnABAnVT2Pj54JD}t zbR+^SL-*8f(Hm;|eyJ<jI-4-tWl^zIR$m9R5K zPlz&{Ko+~@=(=ZUlt8ZU*Bwof!1V$txN9FgStm8-}*+*5} zK&nOgcogif6?xAMoF~nu82dBL`yKYUMx3vlO&l4j!D^|3Rwlx|40aA}!`#TEpdTIY zK4oTSvu3WbKU|y?3GC87DB^d(wyk#r_8I-B7x=%1*HJUK>HoMo7l_E^SDc?F>lWSF z9}sOHcML5bmZVu3Cv4k@)Rw zmO&BLwN6}j4=$WZ7eAMyos?L?x${+xD)ve;dI!vNA?XmT+~=M7k|tqQ4^1dvBpo<_|iXG4kHV6AXKt^imql-!|#nS<)xM&R^prFIMJ5p zROO)4Z$FA@BchS0I;&J5r!1<<2x}7d8a1b=i;aVcuz(iuUA)hOJ6HzrQ!kbT^2)2;T{Ypj(rr zo~*|dOs5=?sY9LLmQj9Y8Cz@SRp}keX5_-t`d5mzXGnVZr>poMLruuJs!b%$Nj1KP z4m~@~X}~u>t|ISl zb4CD@DIM+YDY}~3*C)%f!r~BW1QRrfFBoHdyiV^WH&@}9$=*RHKnO(DBCGa=6>}ML z${=S-4twwCSHoBJnQN40mI6aCqpUnKQxV3poB{;T?XE0gV~vEsQYqAF6qA5H*yEE3 zF=JW+M?^YE6G3ST4nYlctc)4ylmrOuWI);gGEs!pRI25_jxV@rPRbU_<7@xW>z!a5 z5&O=f529&<>$1xJQS67Gk9XgJDnpr)N_)6shT`EG#iP_Ton>UnTV?R|WQ3#DU2t`> zfHpxt2-!G>0l{Em5?Ja<#O?LtihMod(*+UsBOJsEYy{XQW~eRDCuGl!lf# z$?0T=;mPY&jLNV5WUF}f**|6{znoz}v^D9*!Tqb;_}5ZAq#~=#J5j8}tyvO(z|!y6 zK-rN|snB)=y7D%DdT#B~U$uAsiN;*Tp|rOO7g3OECm)l4^PU$ROlaPEsT^Mvc@>a_ zNP_eM+0@V|hvqt2zXZZ3()ZBzFaBcYw;|1y?$-NA?H{yxZcEu+@4u`Ez{xMx#0$^~=B|Zj_H}-L%cmZ)Y zj>z9=BPCs>vKMmMBT z`c79$O`OX^*SodwhnBWp$?16ffM@@z?Zrc;^q7BkIsSd2@Vswd&0l|4%-k>HZZE;Q zUjg2|mRH8me^VhLe)a5&Pv(0+War@Uxf5zgetMur28$7Jk6d2N9%f)^!+9|<4wrlS zaArhGC^<3QaAYKletRamR+vhh1!zh%_}v zhPyin0KbV}X;`!XAVTO&k!&QYn|D*9nqP9$+uxI^O;r$gr%zjlP5R23Bn;kUM+_n9 z{m;-wFD9+nX?<9?B(a@@t42i!ZV5XO6551$q)spE4pbAlVS?%=VREd7#Xhs3*CeP6 znWD%{9wr!hK6OL{@gXKQh0zZOHxQNlM&J2mHe7(scUE((L(}V}j{kbSh5Rp8|uG@L6&wNm1m2(FKXg zAmKc2#ufk?ed~$)6UNV47Rx7$k#43IxDdG`PamG`_1&~Wob81>OK;_`K`1|VS#W(- ziDSV$IbG139}vEDt7s{w3fr&!3#8z{r1mo^zkeP&J8XYrh zYi3Svcp=2FY$6N)IONNH52{Va(@sJFB0ynUckaC>C&&gsoj}!jfAqh6SRH5=*43dE zxk;fJ0Mb^M>FLkxPeim8#y7#-`}#Kz!C-Xfcl!}6o}jt+UIbTKFt3vx0WX*MwF=*R z_q7zwzTl0-xlI5Y>(rfFy;+-&0g+CT!^W-VHPWq{L>2H?z)LgUKvIFTk~ibxrToN! ze=J6y+pF;%8{KZpw!dXM*KSIOF$QJhpft?+#yW~EP<(F{6(1Y^QY{&jO(Q3urgaS7 zEx|}ciVMQfdKj5~OL!j<+1oc@?b$m#e6V&;-g)UpcbVJWC*MI|=g|7zR)HL=v#a&H zWbvSs`J5)_`W#7b3l)gavDJ6f>_M=r;2ZV$-H=iBH3+(>5^WSiz3L?6iNI{N@x;nV z{v;e<+_D=8+$W5b*V_0%_~9S+-_pOYJ5^@kgso@gmU>JDc!TPJxJXt z_ki!l!^BS6X{_w4d?DK|cH!$(5^8N4k#o4o;1A^fVmGF@S3-}VN~7Mamd`jo zVkq8Ff$21!$zpT06JZi)Mw>WAW$iM^9B^|I)faxy@fwck-y zIg6z|L*4v98Xd6$WxuoL_b?s?p`(R@J^HD#80N?E4Bu`VOMT@n*-P!xlVLrRde}`4 z-2e8AAC^NI$bf)>mGO)`m!yp_I}OIFkrWQZ;_>cj7Q`t!=CR?=p~vshFJMC-azq?Y z*$@U_|R(%z|)eXSnl8q@9mdP#A ze3P*Gk$baV0)>qR+!_D@6Gk9Nk}?4H!_PqS2LjuL<+pQ)!vc;V01W4rGP_^9k(O3b zCOh?r!f?dtFhhvL4Dloez4O?pDay`<(Z9^c?hkEWagKkb6E7JHq_#9-?DeCB(B&F6 z)f>Sr1W!Jrm2Zr-;}=qvhT+qJnqvxPew{R(wUu%5zA)*w#|Tt?wiDVb<;o9J@yfn7 z?Q$fckZXRr;Si(l{32QV)N3jxfk(A>UaFi(Nkg8jZW~T8kiPGhcQ#q*s^I)D>AjxIm#W^Q^<;%4B3&{Czx{2U}2rKrq z(Ab(whJ_#nN_A@oHS*+z7=6%Q<6cH6&L*pZVFkid?(!<$SO18S++#Xv%Rl~pnmBl_ z0XCubW_Uu6?fQBQdzn4rH4P<(7ruu*I*q(8x(7WL{;%!eC4wdEm32^L0K6p zPJA(2XVS{VFGY+j<;6Rcz%*V1(41Iad zZWa7H3rzT=xpDU~82|nPRtiGIQt8_x{?SJY2wA-;-bi|F_4uj({OYb#dsOa3?9^4_ z=*by4FT1YInyanJ^UL}oE(oKZT3<$EnOE=CUj>WT=~N!vvNrFuW9Iq+$lqOHYOWAf z+atdEsA~X>>Cm3lt^gF=eZGA7jsM4cSHX`%Rj0C^Gyb!P@)e*&;*UAfiVr5=_)Pvv zHMTlA?xUG0RVCT*_&11{7|pg1K8kH>zyE8llzj&Z+;4@|N51HuP!^CoMbl5|me&V~ z(rmdvL)^Yd9d78TwE4TIw;yVe)ME6l$fcSm;g;A9Kg!}%mk&QBn*FzRhVe8_GP6P1 zCjKhh-4i3a>q1IvVL#<6OVLtsHSNuay0=5OLW0#30BKV3nw%4$ky>*HY$+{vgFczN9`lT9oCWBeMPH893qgrnjRGXuJ5={SNX%h+i%paHH zQEaf$S*!{*1TtMxmKL?2*N(op+U-e3p%AkNSMh&|l7EoW$&D_%zUUUWM4O0D5Mw?g zQgV7vIl0XMtd_NTUKWHhH&^D%QDLwC?HlEjlsOfVNE&}3Mzg?aOcD!m{_I|3spG@vxTwn8)emU%O!Hcpcyy4-cS)v+8*G zJ|GI~c>c0Ky60pWIy`a}q2^Tew@2Vd+mrFag!&7wD~BI+F6ozZOAZOuQ~(<)qHh!(h_*aFxsij4+ochAV|~C!sv1}b39%u zjGU+!`$tE4cnIrGq@xb2=F?W=d@msG^lWmQj(#;hKFAe%Eg*}+Q!$A&jI=8EIgmJt zHu9~LT!sb&0YVuMePdn<>ss6!OOMFPkvY#4S#!GHFtS7N4zF^_y}!u1siAYix^?2Q z=8OC%u}1Hvw>t>zU`R_Zmmhf1%+}PrN=RwPetYNb3MFansT(aQ9RL>? zMY6812sARsvHL~;v>6E!5zZle4vwsTG`sWV6AD;wZ>`qXP2*1JJHEKf9!wzbK#k1l z#il))`kPRq%~;W91Sa7Zk(-}Qtnn>2`-@n4wl~wi)dYN^VNaL^<^_U9yv5B%r|%_y zg-l>^`7TfTUp>9zhDi&{B`gbxA4KGyzakY!QmyvgAUN*Rjl$Yy&j0%V-eDqbz`2fN-*b&TqHr%*dT!59`QFTS=H=lp=35hqO0RaWGHL zzmJp1B>fZ1xCp1nSQI&c-vt!A3cqh3V2LhW*RnlPvw0Yksp$s8uRCiV%EF;#xPn9I zCNfI^+*+gr!xVi%s`LPpPk1_Ke9k84ZAoG|M{|AIMDN9fuDMvpb}IcN&5(s85G+cA zEOvjjYQr&zTFq3Yu3=oh0StS6Kw6f7;-uaO%!H3n8?j`+*Cn7u1jx1)3ud#r-R=y4 zD(W_^pYO!msBPb-7HDWA2+kZQ+K(O(OKD$ROzQCCOL3H&h+2Lz)xlIb28dRAGQvyH zSt1{AQP+Z)CWym%if?pZpM%0i%$%+wkw&0NPE^>T!mow~vE`=q7`~|VT={-dzFt{5 zt|a>*^!6BRfQzt5RGLUZ&%E>d)Q5W$)|^~!1nX}sAN`;~v@fj<9&K~@&uXwe-*Gq3 zGP&oNv&$5_ibfXan*jY{7{@n2ET1oW{p^i8V%fP5CK%p^dbxhd*>4Y}E;!$*sS#D5 z%(&^Qv7x>A8Q+npr;33}Wth`?y)|sjp&D6{aWv#S&4U>J=vzL7z`s{gYjH?fe+Wvo;*7hG_diQZqsCTp|7?n)MOz?N`|sYMN>)qY{rv|dDhD= zu-*tOvB->f%0Z-q_z4h*W@oSG-5v08=WGt{=ZgRTBq$9uPSUi!Wp@08!{6SnUR`UDN&ggBcC&lL_wFP!M!A`P#lOo>bJi!4y#VdCmCZbM z;G?mh_0PRegVZ46SOaG7{Rd)7}r zTetpR*ijSh3Sp-8$RD-heR3rm<(t6VPD3Dko?RXik>xkvxuui=5aCFl#_I8T*ZsI! z1`X`R`huN)eQl?7jxQjKz@t3q^PA-LpJ}!=;ocV_#b`i=%VQT{zt#?(Ld$O`_@~Ig zjHOZ+?D_pL_tut~{K-r&QLmBJKEh{H^7P7W(ub8RsaLw&Ej?)sI4;>&oK@}>a+2NL zLggO4NJ7rqZ^+RusF9_ZcReH?pqyrmS2%+sS~HIB%+Du<8gaX*QoNojX&@m<>vGbY zEF!6I#@-{VL?kZePArA{QC22~!WDEU`)Ue5yo0d)`nmEhug8*9vzOw-0g}><_xFLc zQJDcZsV?)_M_YZk>us9$7xHb?BhP^+4;fjyff@(zhu1vA7Q?Nx{@vCpz1Y1@ zPm*A~G*;ijT2&2eTW^hFM zp4?^29aYz}O9*Np&}kg5jfhFg-leUR@far$!T1zd$y&F86ckqKg7*`by$r&f+Ce3^ z1+f>wIwyq9UVz(RB;4Zgn{Z3Ii*Koy%k3c2vbFqj7CYF{TY~5ul}q?&nw+G;jq3#% zxOdrd4(_Rga=DC=k4E9y&Oc1A9?SL|X4%7D#^uj7v!1D2Yqhy9KTl2j^QB$kVf{D; zKUcFO{bbkn-*c0>TX~>1*66sNOi!eNNGGG(0s3%}vYHx;Ku6@)%jxDPwtAiZ;2RTG7&I(J@pFKNL2S#?5G-znw?-0C)uSX>GfY z&dQpuKM~^R%qT8aWmJAPUECmBcl6@pn%p{P0l%34%zE*XXapE56eR24cJt5wNgV#- zN`LT(`bzKoXV#-xaRy%IAmb#T%gcL>zmOiBi?=0KfAdH-r}uMX8%pn^g!~xQ-Q3}F ztV~4_>&?WS^>(hL4KQ{p2($6K58O4DcE0{s2ai;(Jw3W!&WL4Ug7W>!tSuR$St0Y}E4AjP~{wi$rQ4r@xZj!vyy7 z!r8kb;NFdm;IeQ4R*Yqfp4`&yY85Wa= zWOs%?WZUl$Iy;zgtRTTAPCjzuQ-Ah;-j4nRj9iDeFu=NGVurP=`|SXnTh`r`7iydp#*QA3j(pLQO*nI}xbDR^!^_e7`2e}jdP8_O zW2{mhViA+RKz|s-5G~dM_6t{b`Bp~=~WXa!> zO^PtQ8Mz(yM~nLSAy*yFsO4n z_9Fvd!PRoYU0Igk9jKCg!4xvN5(<4`c8!5k1>^y|5j;}FTi#$72RFR7xmeOJGr}E5 zpwP1bKkVCDjUV5r(M?~S^H@>wDCUt8aUmc+kSWH1;D@^BUTQ)l+aNXYZ4 z0ze@PP2_4^uRkddx8cmX6Rc>5&J?`vz=ftDv$Y;tv&0}bB>0r&)CJ5D5|c<<8+)d| z;MG{jH9J~z%Y6jX7ei5q*>~fIigcCB*w}-AsXG+S2d#|ZXx7UWEuC!S?WQG zQX~6Jbe?NzCIxR<8-r3jVf4;aFiN4Gyy)BCOW$A5S*i%!-_FB>DC>shcJlYdEBR&pXAWVDG@zn6U5) zThg!{C;q*60d?8a2)_MNnnZ;;1fA4c(eCB!<7 zJgXZ2K8j^eZw#;q7ae$P@d;6R#iS32Dda2~mCUrYzchO7Mt_4B>@lb+%Z&~8`xQ~w zKt2G$q%F0%Qnmv~B?-aiKu^ihSty5Iw3X*NveHBTbAD#11*3cgEV}+AED-TO5XCdK2n;d0}M?e$jrkZ!dj(-P~iE>HPTCt!47Ws+Od}? z6!Eus`1QLwaqr~@yY;(fiy|}0j9t;~(Ss#?tD`{CxQph1bvJJ6>*PjWRvjbX+I(5M z-SbmpVSNq}nVCrjnAPh%_;_O_psAxzrw{*~)*W|J-JV;eCxa=VRae|-h{kh=xbm(k{;ZL`Ws+<7mb=z$_{nnorIUiu z(l7Uvpgn6!{6VeL7C)Mz0F1RTs_0tHP-mH~c$Y3{gJ=dHl4W}}H^nL{IV7E$U25L`uLjo9GT$2HjU2~_LGmFTG zkuRqq_xoGp32httTHg`qfjO*0`J?;miWy&pdkN`P3ofR#uV>)>pQ4tAV&~_%#-Dx- zYmeVv+>wcfKny8yp00*J=6>q`VU*a4SF0Dj?~yncH3BMC>}-V3F90T>gVojL&J%hf zRkHh+1tlqPAK#SS4^NqCDGk`eaVY+2#Pz4<`$K{`De-&R#@e3m<$JecY{k55tRBpS z3O=3{3FkB(*QHBU4hfczUOO730f!0Ci6pg!M4h_t{}r_}Evo=1EG+xp5d0>ZX6M&d zUQy>&1`+X0YO#zAuhRZG4gq#bEBAq{n-U^{Y+cmY1L@y z4xFljvwz{tVtm9JX!Cn<6gG>u$CzCUc9?PdE?XtmwzBd{8OaO%PTxCt#LZvC;_R`L z-BYH0&M}SaaADm3vHJJZ{}fZsSTVOQj7)R~UEOLF4mfeZaDlSMH!DF|1#4SbVh+sM z-ONh4+`FdF5B-QA)$LGx=VhKvCp{nr#EWDROum1gA%ST!CwaUVIHr#fPJatCz6(=W zVit+)_!2U>yv#&*n8Yb+{32M47qN-u7qYpI7-)2xpW8((V`f%2LnR$Aq}2Qgd@4{p zi;gWmiT2GB2imSO%Oqm5)S3oDKH{#|>rXSqZ)|OCxKU0os!zr3J`F}YmnZj87?%p~ zxtSd14|L7q1iR2-76mzXXREn<${d}~OLAG=F|W_J_`<+l2SjK#BDTgq3B0Q-Nczl& z$LL4I^38N3|KJrhe=_Jo6$Do@<4%^0PCJdT?0=+l-z}~fpsMZZ=&Nb(xw-pFuxlAo zU^BfT{%3q!bhB3(rr0JL2ceu`6DG1q=M7t<(EdF?GG8Rx@(o>~TACS+PII@u=A+bD zduup#p}b0{oao15g$T9oM{H}<02A%RdUkB3y>}29Q8Jq)4h(Lq+4Su|cY$I#vvQql ztCXC**o?3HXBF_EjFvV(8-6Pv7T52+*kjy|tfsG>#U}CL!tML=R{R+%{Z`^0%wE}H z6(hYh05ycVD9M-{nu>(~oW8jPvkYq9+8{TsMu(Bv^P`;H8V_(Q*xB9UJ-I`(Ig2sQ z^T;pUfcmYraNTY-KwO+O3)51biTpr*$+zk+KhhuGRME;!7E4OviqopEsKR#Awp|5? zwxwm!6RG5w2eY@dx3CaY;ldP8XF#Ss0)bv&Z8`YL5dYo{@8Q$cC0EbDCT%dCB^b|W z(ItITl`|B^Oa+%$0EaK-S7&Dv)+YlfMsJ3KkL#9N96Y*7^z^I%xa&e?wFqzVGHEu!Bo(m@k1r{Q** zxqpmn_r4k*!=x8AVsY~x8-H^GGy(bZG-d@o;EH>-6V%oqX&apoh|o@UN{@>LTKyiw zb{zNAdp8KH4JDn6UiRXRUBi0SH)tNDHwtxecy_L#W|} zYp{^&D*Pd9y}0T(+WOtO&Np>(#hhPE?@z4doOk~Y;u7z{!+qogMootaJ88q(@y zGh3(1fWnJC-bwscE1^kt!(DCW7;kNHL3)bKaXJR&dbdg)<|| z)07llbQah)PCE*9M&g^o$%Vo`r($0k?K1Dyy`Aaw7Gb!E2aX;TP!VsF*5;AnKZ#E> za{2cxl0!N%f_ZnevI`FjC*P6x3aKljRa?961I&zQVAsaeu;1R$%3@ribh3bs1`hrP z>Kvj6xUAi>#38Vu;%~?Z5^;B4s*4=D2#ovExQ#@eB5q+3YEG9_;QIb-^+hVx7>e)$ z7=XXR;a6_S%8$v>9AgTEt8k^ zXE+2hwPTn%oN3~NFy9toD?-GoYe}cV>&XO-Y)CzaCBl0c)&YwKF?RR?tv%0f{I$6J z6cnMf#yk4S1fbVKFHRO{lh>RsiWI`iX;s~uT2@xiuIn>*e(mv5t%Xu`BUluH*2?*|@TEp#Zlotl089UEj)Aln(hK%Kx=YSe;JdpsqHs2$YwXBYDC#1n?7-PoO~WpC)S>L5X=XVK0NEVjb0J_d92bcwS27No9*rh|!W8;~h-Ll7 z0x$k$$-H~rT@?wy%idhw{rbT{%V^8;0T}r*GsTH=?RueCi1tz{2PB-;)m2fSF{68} zr^e1QK1bh(dX$aN-9AKvffvtD%g*kl+`LD|jxil|R#14qF5bL|M~BpPZus!$=4xK*C1OQ?pv5|p;Db5cr#juo&qu=8!IY%P=iUZ+1k*aIa==??{yW4GO!-=`*nOQU@Luk(^eXh z6|3T20iqa(P-6Q6up-4}FsR-~^NJAWW0Ex=@0Dh(vaDg0k+sFNz7ymLa*kG=Q$xy; zk;6g~mcyxb((6rbSAz{WK9y?ngD57L4K~^c4(q;>nZYUG%AkKC;Hvy?ZeoRLPVZbW zoAn=JI6$bO5UM}Ma_ze*8sxI+$m^R#M7rsrHCA2DKD@SjJoAH;OLJ3#b8nGE!N_~- zzkd3psc+3lnL@{!5gac#%=L#DIbEAM&(ndQ zt#5I=grWCq06X7kpRyzawDQ2Eu}tGhD>~j@lXlw8tFYU4t!!K&O8&6=o<&Wl6h`Xz zBQd+2TlLo3I@-JAVgXI~=Ey*nuS|2{BzQL{ME)x)iL4UkdAIV26?MLl^ZGgUAK?>~ z${w#culT1y=Gqd!lkbB|cK3f#uDuVXjKk`aTPd09YRvh~V^s{-EWtP>2uZo|56JfH zi4-}bS85|j9Tv)xJ9yoPtBJ02-dpF7x_3tW4szd@^(h)kxoHcWnyuICuln{5&eGe4 zZ6lsQ9}H*O7yZ;UhTqqquimJ|ABWoCp#H&-O6!%=4|W9<9<+}z;J_9CK95z9(_qtYD@~8Auj9__*b_fo3{u zd>lpXHD;_M2*HWq%7`3AYIlUPvbPjqM3K>X?V(hOY7h#=vU+Q#@C-Kr^-TX}((#-a znd{oXoW9i1!z*zRc1$~j1bML#Fdvg@Y4a+c2pCV;gE1S0H2Sgq6~3iy#oi&LQ(U%8ubid8_y zjz7E&GI1T>jz~vPW#4vN5u=opYCYk8`9g!=KHMZ?aVThc@)_5{L8{%N^I~L~`8;)% z+?JpO8;$&pH zA`t=)ezf#pbBri(w)AaOfM(>-4~|48M+(#S`XQW;R1ox`wQgJuZcb;fmFYg1e)@_l zQnU2_g04Qz%q_XQ^2>7=S#XP7g!Mt*i{4o=3c0gc&>ifi#EULdMVE4T^XpAF9@3}L z>x)8iPDGnoNWP;mXZ5p=v@pzR+qM6_4KO<9L~_G+W`j$W>5wY!3@4j*oHs{=r(Wv2 zCrQ^8RzZ4Zht7qJ+Hd0Pc5l0#r}E4FytTM~4Et>c3Y9zFpN!mg^96A|168m_-j+&W zG$zduUPRo)N<~H7A2G?x07^a)VKTn;9<7w)AVb7%;#&cXPn(STy_&aI%7c&ex&?mc zQ{mDb0bB&BM@n`~J_h<^h`WTn!)ysfiwRDW7MxI@=#+8nj;HJwb4nh8teT%jC1#t; z!L=W&fgxO^PM`Vxe7ZlE|HsdU%o@#I8A>9u9cSyKCy|fi=z8fASmu8f4I!~tmV*WW zq;4uVPRl=A9N6OiZH~~%>KrM}ORRHVs&}OjgO4RZ1$%V48PZ62TC?X-yPz&uJ2o^o zLsmyj*g(B&btxnp$$KAUjH4YrqorurL`C9pZ8`d`>qT0=afgEC2rIlK?X?}r9EsG- zQoWmTP?`8kp^VO2cwxjuC!lx_&=x^PLEXM=&$v>tPsY;~pZ9IMFFMq^M9;(Aaf^K#Uh9)Y(DTJ zP^q5-)6u-BFUFxfHtWLu=)G&%t$w@tCY5XC)q5X~xTtrb?)L22>`_4Z^CnJBEQ^=( zYBDZGS(lTnNFI(Il4U5g4;>5opb;P=d@H4VD_-+NIcKU+>-wW)`emu5W@B9OoulvG zv7FIvvhmnO>#%|_CO-0`I)?W8M{p_lChG+ z_9IF#XJD8as$pus913>?iDQ53deWa1*gIn@D?f6m89Xr*-A%D^B70+~*S&@oADs_K zMFZpTT7VHIfb1z?L4VQ)$}n!vr6ChmI%9Wtk8c9(4j`&JFw(%?$7T42KP5$8)1vw# z{qZexSS7Bdbwlj8Z{+4eGXSFv2bJR#-#x^j&!;o+lXJTTe!RjvLt5Zogr6P4%*gE3pQ;Qe?f z?x&1EE)UuM0bsXMyCmdNmKF0h7lR?N!C%$9*g(%MBN*vZ)aIFgd7y9mh1X5zuvWhiI#?Ei_HUL)^q`n7JUENC40Lc^xW}5 z^$tAyONjb9O4epdkk4(%otK>g+t9}ubaS1dZ9BkQph_l40#)=kIX1C--GB8bnaH=f z>0yScjgy^VIs-vM5x7V+00G^&p&BzY_CG5PzPz(y5EQUvNDfv#Kab~a^^6ENXZbF; zcG3X;w^;V)29Zonu6v0PV%JgPXKerdjLTFnF1{ku`HYL-jP?(GWF$z!AN@-2ZqhtWkazR z+HN8}1@ZSi_`|(|gGcL*gao-Eek%Vr;o`r@#jJH$9`Sc+a1Xy-!o`*Cm7}Sqd+cEM zxA>0ItXYM-w@&)kqd?wPvvS(h2VEVb86CpG>bnhgYg~?$wZSfm$ReI5kwr+C$WhC?fVyh|Ns628V0_C(S@}w*=|~ZC0LhC zAn>z*9p(skm*;K)vddV1I`lNV_btHh1HxG$0Js6W1~4eSnnRm*Y#PJ$U=amJJ0jew z=7#HWC|WsUo7x~pXCleK5Uwv)UMZ{3$Ax6x@NmiInCR(DWFCp~TA-U*hCdf&fX_c6 zy>rSp=Fo#hXAwpRANRSD-Qes(@jjrQjUO`+*xW=y3*MK{s_j{qaS z03ZY49l#YbtQ=sVx+uWOAuR!62@6MYnQl8u6m)_Bf^fSS61NeVb%+9v+GXh7oDSyW z2$-&-uo19JXvW9Z>R?yfno#GfMS3d^t^;XpYhAM5&ey+&sLY_en;Yhe^Gmu53h)s_^m zp%Yd&*svV+mUsAvROHp()H%8WrUpNfU(SNy92-l8r`{0mYsyu*Z*M>p@#q(0aV`BD{>OB9c)9HcSAY0W0p~NvYL%Wo8+vYkVYS zzc)ePo|9U4)~mWrp!9uI=rx@LHmb8ZO3F>094C7?h2)YxPszRCu z#TNd!sK-0;c-hc~u|;zDF`xMTt;FC^HiUe$81gp^g`BKCj3j;3i)nRfM{#FxP3pHi z_|W7XK{yKXlG(0)dK>9#R#>F=6P^fvP!AQ^6UK1 zg`WO>7^iAtQ~`kqJ`{=1)x>dB)|Vr@jsa^2(Aemq;k=|_2wnKO(iq6Y$$V^vcbM@- z-~d<%WTMDi^Gsl!3X&UMC%WFeNVf;~FMWh&UrL zosoM^Z=jyyczS$ZFZ*nm-draS-N7XVggtnk!Is2C@A@&=;A(^RrOrf>`7Ux}3oK=@ zv5QfU#^8r?bbd63<~@(ygM|E1)uzGsP=sgV=Q)8=K%_Ng}C z%nDkuo7-B)I#ReMUu+py@XaB*M;UGFNjrQh7e4Jo|A`Sa6Y2=0^0!Isn>?Jm%Lk~; zrf{T58yzR)eI^OLeW>9$Ku4X6J!zW|to|IIr&dwB03RO+i~o?TZ1?Eld;C^j#c*OX z{4rLz@wGWPb5iS>d$u`0e!=Qnn#UwS`Q>~b-}(O)1Ru?fn+E_uKYZvN1WAES6pKVE z)M`t132oMC(yWdmX!UqctcM^eZ zLtAO0?9+=%5AJRks~r=Zk?^1!w&9Q8jpxR0y9`lmmSi>JKIp$2`fAPq00j_9L_t*7rv^_#HE~YooG}_p z7(4^_H=Gbc=`Q^PlD7JQ(Vg?6Q$nJR+cxBH^r75C zYZjL{(};V=8)1I^_UCE|5^Z+gU< zohA;4*RvX~dnd;w5fDiWC^%jHVLfWBuS?qS?*uV~rvgI(@C>YJ?HWlpxV{K0e6nd@ zBF=4GbnPwrFAz;fl;9`;>COG!fOx<^e{#T(Gjj5|mMv-oSjK#4s}!nZ58P`yy()v% zwglo9>_)YmF;k~2cWEAAxdZ%yEcl0N4v?U8^};7s{AM$4kA<*vrFJH+)~J>oKc1X= zTOBO`l^WzhDbHPfL64>L@p4dEw|1$B!QsEg7 zwt4aNM6fX=Y$MC%@ueu!f7&~a!@w?+8t0c)AngVSrcjTe2fO8Zg%_z z#E`}NYmHEn`R*Tpsr?uFq``ztUv<(gIXz@&ELP3yA=#Uw@i`QwIc1h45Dp_yjn;Fp zapG3!>H4{Q)t=%T5xyX?pl(*KK{~@r2UJ3c5(u=Ph}hVKpn_w($AkB$W|GxNO>eEV zV}#e-GJzeWE3oTT7F(&%EJVILGR+ICsVsqD6P4J=xIWz8%W~061^|p<9PSR+A9Pu> zD$BiZ3eb$%GR{Bs%UqhD*&Kh_qv+u}OWu}IQ+hOVs$rLN;xY7uXRlf%n0GV2-cb?{ zimlDK<5}6G=QmQ59LHTA6e4kJ0RIszj+Bg{N{@jCUlL+QwTqrz4P@Sb%T$9{L5^-j zIa-vBs6YwilYqHC+c^cTw6$9?RkM&oa-sadi2TdRm_EM-H%uW=B|J$E`!+4(Q3=oY|RLEq-Z zH*372x$ZO1i7ENCfhMP(?;{gIh4}0k=kl@eOI20b6GIKVu1jbEhH2-q4Hq7VzFFzmQpvjB( z2Pl@##snaU@3E*dP5B +#include +int main(void) { return pthread_create(NULL, NULL, NULL, NULL); } +EOF check_header unistd.h # for sysconf(3) and friends. check_header vpx/vpx_integer.h -I${source_path} && enable_feature vpx_ports + + if enabled neon && ! enabled external_build; then + check_header arm_neon.h || die "Unable to find arm_neon.h" + fi } process_toolchain() { @@ -599,22 +624,39 @@ process_toolchain() { check_add_cflags -Wcast-qual check_add_cflags -Wvla check_add_cflags -Wimplicit-function-declaration + check_add_cflags -Wmissing-declarations + check_add_cflags -Wmissing-prototypes check_add_cflags -Wuninitialized check_add_cflags -Wunused - # -Wextra has some tricky cases. Rather than fix them all now, get the - # flag for as many files as possible and fix the remaining issues - # piecemeal. - # https://bugs.chromium.org/p/webm/issues/detail?id=1069 check_add_cflags -Wextra # check_add_cflags also adds to cxxflags. gtest does not do well with - # -Wundef so add it explicitly to CFLAGS only. + # these flags so add them explicitly to CFLAGS only. check_cflags -Wundef && add_cflags_only -Wundef + check_cflags -Wframe-larger-than=52000 && \ + add_cflags_only -Wframe-larger-than=52000 if enabled mips || [ -z "${INLINE}" ]; then enabled extra_warnings || check_add_cflags -Wno-unused-function fi + # Enforce c89 for c files. Don't be too strict about it though. Allow + # gnu extensions like "//" for comments. + check_cflags -std=gnu89 && add_cflags_only -std=gnu89 # Avoid this warning for third_party C++ sources. Some reorganization # would be needed to apply this only to test/*.cc. check_cflags -Wshorten-64-to-32 && add_cflags_only -Wshorten-64-to-32 + + # Quiet gcc 6 vs 7 abi warnings: + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77728 + if enabled arm; then + check_add_cxxflags -Wno-psabi + fi + + # disable some warnings specific to libyuv. + check_cxxflags -Wno-missing-declarations \ + && LIBYUV_CXXFLAGS="${LIBYUV_CXXFLAGS} -Wno-missing-declarations" + check_cxxflags -Wno-missing-prototypes \ + && LIBYUV_CXXFLAGS="${LIBYUV_CXXFLAGS} -Wno-missing-prototypes" + check_cxxflags -Wno-unused-parameter \ + && LIBYUV_CXXFLAGS="${LIBYUV_CXXFLAGS} -Wno-unused-parameter" fi if enabled icc; then @@ -685,7 +727,7 @@ process_toolchain() { soft_enable libyuv ;; *-android-*) - soft_enable webm_io + check_add_cxxflags -std=c++11 && soft_enable webm_io soft_enable libyuv # GTestLog must be modified to use Android logging utilities. ;; @@ -694,30 +736,23 @@ process_toolchain() { # x86 targets. ;; *-iphonesimulator-*) - soft_enable webm_io + check_add_cxxflags -std=c++11 && soft_enable webm_io soft_enable libyuv ;; *-win*) # Some mingw toolchains don't have pthread available by default. # Treat these more like visual studio where threading in gtest # would be disabled for the same reason. - check_cxx "$@" < $@ + @echo "ENABLED_SECTIONS += samples" >> $@ diff --git a/media/libvpx/libvpx/vpx/svc_context.h b/media/libvpx/libvpx/examples/svc_context.h similarity index 83% rename from media/libvpx/libvpx/vpx/svc_context.h rename to media/libvpx/libvpx/examples/svc_context.h index 462785075cbb..c5779ce8a9f8 100644 --- a/media/libvpx/libvpx/vpx/svc_context.h +++ b/media/libvpx/libvpx/examples/svc_context.h @@ -13,11 +13,11 @@ * spatial SVC frame */ -#ifndef VPX_SVC_CONTEXT_H_ -#define VPX_SVC_CONTEXT_H_ +#ifndef VPX_EXAMPLES_SVC_CONTEXT_H_ +#define VPX_EXAMPLES_SVC_CONTEXT_H_ -#include "./vp8cx.h" -#include "./vpx_encoder.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" #ifdef __cplusplus extern "C" { @@ -35,10 +35,8 @@ typedef struct { int temporal_layers; // number of temporal layers int temporal_layering_mode; SVC_LOG_LEVEL log_level; // amount of information to display - int log_print; // when set, printf log messages instead of returning the - // message with svc_get_message - int output_rc_stat; // for outputting rc stats - int speed; // speed setting for codec + int output_rc_stat; // for outputting rc stats + int speed; // speed setting for codec int threads; int aqmode; // turns on aq-mode=3 (cyclic_refresh): 0=off, 1=on. // private storage for vpx_svc_encode @@ -71,7 +69,6 @@ typedef struct SvcInternal { int layer; int use_multiple_frame_contexts; - char message_buffer[2048]; vpx_codec_ctx_t *codec_ctx; } SvcInternal_t; @@ -106,15 +103,10 @@ void vpx_svc_release(SvcContext *svc_ctx); /** * dump accumulated statistics and reset accumulated values */ -const char *vpx_svc_dump_statistics(SvcContext *svc_ctx); - -/** - * get status message from previous encode - */ -const char *vpx_svc_get_message(const SvcContext *svc_ctx); +void vpx_svc_dump_statistics(SvcContext *svc_ctx); #ifdef __cplusplus } // extern "C" #endif -#endif // VPX_SVC_CONTEXT_H_ +#endif // VPX_EXAMPLES_SVC_CONTEXT_H_ diff --git a/media/libvpx/libvpx/vpx/src/svc_encodeframe.c b/media/libvpx/libvpx/examples/svc_encodeframe.c similarity index 85% rename from media/libvpx/libvpx/vpx/src/svc_encodeframe.c rename to media/libvpx/libvpx/examples/svc_encodeframe.c index f633600c799e..a73ee8ed6607 100644 --- a/media/libvpx/libvpx/vpx/src/svc_encodeframe.c +++ b/media/libvpx/libvpx/examples/svc_encodeframe.c @@ -22,7 +22,7 @@ #include #define VPX_DISABLE_CTRL_TYPECHECKS 1 #include "./vpx_config.h" -#include "vpx/svc_context.h" +#include "./svc_context.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" #include "vpx_mem/vpx_mem.h" @@ -95,17 +95,11 @@ static const SvcInternal_t *get_const_svc_internal(const SvcContext *svc_ctx) { return (const SvcInternal_t *)svc_ctx->internal; } -static void svc_log_reset(SvcContext *svc_ctx) { - SvcInternal_t *const si = (SvcInternal_t *)svc_ctx->internal; - si->message_buffer[0] = '\0'; -} - static int svc_log(SvcContext *svc_ctx, SVC_LOG_LEVEL level, const char *fmt, ...) { char buf[512]; int retval = 0; va_list ap; - SvcInternal_t *const si = get_svc_internal(svc_ctx); if (level > svc_ctx->log_level) { return retval; @@ -115,16 +109,8 @@ static int svc_log(SvcContext *svc_ctx, SVC_LOG_LEVEL level, const char *fmt, retval = vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); - if (svc_ctx->log_print) { - printf("%s", buf); - } else { - strncat(si->message_buffer, buf, - sizeof(si->message_buffer) - strlen(si->message_buffer) - 1); - } + printf("%s", buf); - if (level == SVC_LOG_ERROR) { - si->codec_ctx->err_detail = si->message_buffer; - } return retval; } @@ -169,6 +155,7 @@ static vpx_codec_err_t parse_layer_options_from_string(SvcContext *svc_ctx, return VPX_CODEC_INVALID_PARAM; input_string = strdup(input); + if (input_string == NULL) return VPX_CODEC_MEM_ERROR; token = strtok_r(input_string, delim, &save_ptr); for (i = 0; i < num_layers; ++i) { if (token != NULL) { @@ -208,6 +195,7 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) { if (options == NULL) return VPX_CODEC_OK; input_string = strdup(options); + if (input_string == NULL) return VPX_CODEC_MEM_ERROR; // parse option name option_name = strtok_r(input_string, "=", &input_ptr); @@ -294,8 +282,8 @@ vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options) { return VPX_CODEC_OK; } -vpx_codec_err_t assign_layer_bitrates(const SvcContext *svc_ctx, - vpx_codec_enc_cfg_t *const enc_cfg) { +static vpx_codec_err_t assign_layer_bitrates( + const SvcContext *svc_ctx, vpx_codec_enc_cfg_t *const enc_cfg) { int i; const SvcInternal_t *const si = get_const_svc_internal(svc_ctx); int sl, tl, spatial_layer_target; @@ -471,8 +459,7 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, svc_log(svc_ctx, SVC_LOG_ERROR, "spatial layers * temporal layers exceeds the maximum number of " "allowed layers of %d\n", - svc_ctx->spatial_layers * svc_ctx->temporal_layers, - (int)VPX_MAX_LAYERS); + svc_ctx->spatial_layers * svc_ctx->temporal_layers, VPX_MAX_LAYERS); return VPX_CODEC_INVALID_PARAM; } res = assign_layer_bitrates(svc_ctx, enc_cfg); @@ -485,11 +472,6 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, return VPX_CODEC_INVALID_PARAM; } -#if CONFIG_SPATIAL_SVC - for (i = 0; i < svc_ctx->spatial_layers; ++i) - enc_cfg->ss_enable_auto_alt_ref[i] = si->enable_auto_alt_ref[i]; -#endif - if (svc_ctx->temporal_layers > 1) { int i; for (i = 0; i < svc_ctx->temporal_layers; ++i) { @@ -514,7 +496,17 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, enc_cfg->rc_buf_initial_sz = 500; enc_cfg->rc_buf_optimal_sz = 600; enc_cfg->rc_buf_sz = 1000; - enc_cfg->rc_dropframe_thresh = 0; + } + + for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) { + for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { + i = sl * svc_ctx->temporal_layers + tl; + if (enc_cfg->rc_end_usage == VPX_CBR && + enc_cfg->g_pass == VPX_RC_ONE_PASS) { + si->svc_params.max_quantizers[i] = enc_cfg->rc_max_quantizer; + si->svc_params.min_quantizers[i] = enc_cfg->rc_min_quantizer; + } + } } if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0) @@ -548,8 +540,6 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, return VPX_CODEC_INVALID_PARAM; } - svc_log_reset(svc_ctx); - res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration, 0, deadline); if (res != VPX_CODEC_OK) { @@ -559,56 +549,7 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, iter = NULL; while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) { switch (cx_pkt->kind) { -#if CONFIG_SPATIAL_SVC && defined(VPX_TEST_SPATIAL_SVC) - case VPX_CODEC_SPATIAL_SVC_LAYER_PSNR: { - int i; - for (i = 0; i < svc_ctx->spatial_layers; ++i) { - int j; - svc_log(svc_ctx, SVC_LOG_DEBUG, - "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): " - "%2.3f %2.3f %2.3f %2.3f \n", - si->psnr_pkt_received, i, cx_pkt->data.layer_psnr[i].psnr[0], - cx_pkt->data.layer_psnr[i].psnr[1], - cx_pkt->data.layer_psnr[i].psnr[2], - cx_pkt->data.layer_psnr[i].psnr[3]); - svc_log(svc_ctx, SVC_LOG_DEBUG, - "SVC frame: %d, layer: %d, SSE(Total/Y/U/V): " - "%2.3f %2.3f %2.3f %2.3f \n", - si->psnr_pkt_received, i, cx_pkt->data.layer_psnr[i].sse[0], - cx_pkt->data.layer_psnr[i].sse[1], - cx_pkt->data.layer_psnr[i].sse[2], - cx_pkt->data.layer_psnr[i].sse[3]); - - for (j = 0; j < COMPONENTS; ++j) { - si->psnr_sum[i][j] += cx_pkt->data.layer_psnr[i].psnr[j]; - si->sse_sum[i][j] += cx_pkt->data.layer_psnr[i].sse[j]; - } - } - ++si->psnr_pkt_received; - break; - } - case VPX_CODEC_SPATIAL_SVC_LAYER_SIZES: { - int i; - for (i = 0; i < svc_ctx->spatial_layers; ++i) - si->bytes_sum[i] += cx_pkt->data.layer_sizes[i]; - break; - } -#endif case VPX_CODEC_PSNR_PKT: { -#if CONFIG_SPATIAL_SVC && defined(VPX_TEST_SPATIAL_SVC) - int j; - svc_log(svc_ctx, SVC_LOG_DEBUG, - "frame: %d, layer: %d, PSNR(Total/Y/U/V): " - "%2.3f %2.3f %2.3f %2.3f \n", - si->psnr_pkt_received, 0, cx_pkt->data.layer_psnr[0].psnr[0], - cx_pkt->data.layer_psnr[0].psnr[1], - cx_pkt->data.layer_psnr[0].psnr[2], - cx_pkt->data.layer_psnr[0].psnr[3]); - for (j = 0; j < COMPONENTS; ++j) { - si->psnr_sum[0][j] += cx_pkt->data.layer_psnr[0].psnr[j]; - si->sse_sum[0][j] += cx_pkt->data.layer_psnr[0].sse[j]; - } -#endif } ++si->psnr_pkt_received; break; @@ -619,19 +560,13 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, return VPX_CODEC_OK; } -const char *vpx_svc_get_message(const SvcContext *svc_ctx) { - const SvcInternal_t *const si = get_const_svc_internal(svc_ctx); - if (svc_ctx == NULL || si == NULL) return NULL; - return si->message_buffer; -} - static double calc_psnr(double d) { if (d == 0) return 100; return -10.0 * log(d) / log(10.0); } // dump accumulated statistics and reset accumulated values -const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) { +void vpx_svc_dump_statistics(SvcContext *svc_ctx) { int number_of_frames; int i, j; uint32_t bytes_total = 0; @@ -641,21 +576,19 @@ const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) { double y_scale; SvcInternal_t *const si = get_svc_internal(svc_ctx); - if (svc_ctx == NULL || si == NULL) return NULL; - - svc_log_reset(svc_ctx); + if (svc_ctx == NULL || si == NULL) return; number_of_frames = si->psnr_pkt_received; - if (number_of_frames <= 0) return vpx_svc_get_message(svc_ctx); + if (number_of_frames <= 0) return; svc_log(svc_ctx, SVC_LOG_INFO, "\n"); for (i = 0; i < svc_ctx->spatial_layers; ++i) { svc_log(svc_ctx, SVC_LOG_INFO, "Layer %d Average PSNR=[%2.3f, %2.3f, %2.3f, %2.3f], Bytes=[%u]\n", - i, (double)si->psnr_sum[i][0] / number_of_frames, - (double)si->psnr_sum[i][1] / number_of_frames, - (double)si->psnr_sum[i][2] / number_of_frames, - (double)si->psnr_sum[i][3] / number_of_frames, si->bytes_sum[i]); + i, si->psnr_sum[i][0] / number_of_frames, + si->psnr_sum[i][1] / number_of_frames, + si->psnr_sum[i][2] / number_of_frames, + si->psnr_sum[i][3] / number_of_frames, si->bytes_sum[i]); // the following psnr calculation is deduced from ffmpeg.c#print_report y_scale = si->width * si->height * 255.0 * 255.0 * number_of_frames; scale[1] = y_scale; @@ -686,7 +619,6 @@ const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) { si->psnr_pkt_received = 0; svc_log(svc_ctx, SVC_LOG_INFO, "Total Bytes=[%u]\n", bytes_total); - return vpx_svc_get_message(svc_ctx); } void vpx_svc_release(SvcContext *svc_ctx) { diff --git a/media/libvpx/libvpx/examples/vp8_multi_resolution_encoder.c b/media/libvpx/libvpx/examples/vp8_multi_resolution_encoder.c index b14b1ff39726..e72f8a01970e 100644 --- a/media/libvpx/libvpx/examples/vp8_multi_resolution_encoder.c +++ b/media/libvpx/libvpx/examples/vp8_multi_resolution_encoder.c @@ -61,7 +61,7 @@ void usage_exit(void) { exit(EXIT_FAILURE); } int (*read_frame_p)(FILE *f, vpx_image_t *img); -static int read_frame(FILE *f, vpx_image_t *img) { +static int mulres_read_frame(FILE *f, vpx_image_t *img) { size_t nbytes, to_read; int res = 1; @@ -75,7 +75,7 @@ static int read_frame(FILE *f, vpx_image_t *img) { return res; } -static int read_frame_by_row(FILE *f, vpx_image_t *img) { +static int mulres_read_frame_by_row(FILE *f, vpx_image_t *img) { size_t nbytes, to_read; int res = 1; int plane; @@ -471,9 +471,9 @@ int main(int argc, char **argv) { die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h); if (raw[0].stride[VPX_PLANE_Y] == (int)raw[0].d_w) - read_frame_p = read_frame; + read_frame_p = mulres_read_frame; else - read_frame_p = read_frame_by_row; + read_frame_p = mulres_read_frame_by_row; for (i = 0; i < NUM_ENCODERS; i++) if (outfile[i]) write_ivf_file_header(outfile[i], &cfg[i], 0); diff --git a/media/libvpx/libvpx/examples/vp9_spatial_svc_encoder.c b/media/libvpx/libvpx/examples/vp9_spatial_svc_encoder.c index 0987cbfb8585..b987989a86cf 100644 --- a/media/libvpx/libvpx/examples/vp9_spatial_svc_encoder.c +++ b/media/libvpx/libvpx/examples/vp9_spatial_svc_encoder.c @@ -25,13 +25,19 @@ #include "../video_writer.h" #include "../vpx_ports/vpx_timer.h" -#include "vpx/svc_context.h" +#include "./svc_context.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" #include "../vpxstats.h" #include "vp9/encoder/vp9_encoder.h" +#include "./y4minput.h" + #define OUTPUT_RC_STATS 1 +#define SIMULCAST_MODE 0 + +static const arg_def_t outputfile = + ARG_DEF("o", "output", 1, "Output filename"); static const arg_def_t skip_frames_arg = ARG_DEF("s", "skip-frames", 1, "input frames to skip"); static const arg_def_t frames_arg = @@ -86,6 +92,19 @@ static const arg_def_t aqmode_arg = ARG_DEF("aq", "aqmode", 1, "aq-mode off/on"); static const arg_def_t bitrates_arg = ARG_DEF("bl", "bitrates", 1, "bitrates[sl * num_tl + tl]"); +static const arg_def_t dropframe_thresh_arg = + ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)"); +static const struct arg_enum_list tune_content_enum[] = { + { "default", VP9E_CONTENT_DEFAULT }, + { "screen", VP9E_CONTENT_SCREEN }, + { "film", VP9E_CONTENT_FILM }, + { NULL, 0 } +}; + +static const arg_def_t tune_content_arg = ARG_DEF_ENUM( + NULL, "tune-content", 1, "Tune content type", tune_content_enum); +static const arg_def_t inter_layer_pred_arg = ARG_DEF( + NULL, "inter-layer-pred", 1, "0 - 3: On, Off, Key-frames, Constrained"); #if CONFIG_VP9_HIGHBITDEPTH static const struct arg_enum_list bitdepth_enum[] = { @@ -97,6 +116,7 @@ static const arg_def_t bitdepth_arg = ARG_DEF_ENUM( #endif // CONFIG_VP9_HIGHBITDEPTH static const arg_def_t *svc_args[] = { &frames_arg, + &outputfile, &width_arg, &height_arg, &timebase_arg, @@ -127,6 +147,9 @@ static const arg_def_t *svc_args[] = { &frames_arg, &speed_arg, &rc_end_usage_arg, &bitrates_arg, + &dropframe_thresh_arg, + &tune_content_arg, + &inter_layer_pred_arg, NULL }; static const uint32_t default_frames_to_skip = 0; @@ -145,7 +168,6 @@ static const int32_t default_speed = -1; // -1 means use library default. static const uint32_t default_threads = 0; // zero means use library default. typedef struct { - const char *input_filename; const char *output_filename; uint32_t frames_to_code; uint32_t frames_to_skip; @@ -153,12 +175,14 @@ typedef struct { stats_io_t rc_stats; int passes; int pass; + int tune_content; + int inter_layer_pred; } AppInput; static const char *exec_name; void usage_exit(void) { - fprintf(stderr, "Usage: %s input_filename output_filename\n", + fprintf(stderr, "Usage: %s input_filename -o output_filename\n", exec_name); fprintf(stderr, "Options:\n"); arg_show_usage(stderr, svc_args); @@ -217,6 +241,8 @@ static void parse_command_line(int argc, const char **argv_, if (arg_match(&arg, &frames_arg, argi)) { app_input->frames_to_code = arg_parse_uint(&arg); + } else if (arg_match(&arg, &outputfile, argi)) { + app_input->output_filename = arg.val; } else if (arg_match(&arg, &width_arg, argi)) { enc_cfg->g_w = arg_parse_uint(&arg); } else if (arg_match(&arg, &height_arg, argi)) { @@ -237,6 +263,9 @@ static void parse_command_line(int argc, const char **argv_, #endif } else if (arg_match(&arg, &speed_arg, argi)) { svc_ctx->speed = arg_parse_uint(&arg); + if (svc_ctx->speed > 9) { + warn("Mapping speed %d to speed 9.\n", svc_ctx->speed); + } } else if (arg_match(&arg, &aqmode_arg, argi)) { svc_ctx->aqmode = arg_parse_uint(&arg); } else if (arg_match(&arg, &threads_arg, argi)) { @@ -251,11 +280,15 @@ static void parse_command_line(int argc, const char **argv_, enc_cfg->kf_min_dist = arg_parse_uint(&arg); enc_cfg->kf_max_dist = enc_cfg->kf_min_dist; } else if (arg_match(&arg, &scale_factors_arg, argi)) { - snprintf(string_options, sizeof(string_options), "%s scale-factors=%s", - string_options, arg.val); + strncat(string_options, " scale-factors=", + sizeof(string_options) - strlen(string_options) - 1); + strncat(string_options, arg.val, + sizeof(string_options) - strlen(string_options) - 1); } else if (arg_match(&arg, &bitrates_arg, argi)) { - snprintf(string_options, sizeof(string_options), "%s bitrates=%s", - string_options, arg.val); + strncat(string_options, " bitrates=", + sizeof(string_options) - strlen(string_options) - 1); + strncat(string_options, arg.val, + sizeof(string_options) - strlen(string_options) - 1); } else if (arg_match(&arg, &passes_arg, argi)) { passes = arg_parse_uint(&arg); if (passes < 1 || passes > 2) { @@ -269,11 +302,15 @@ static void parse_command_line(int argc, const char **argv_, } else if (arg_match(&arg, &fpf_name_arg, argi)) { fpf_file_name = arg.val; } else if (arg_match(&arg, &min_q_arg, argi)) { - snprintf(string_options, sizeof(string_options), "%s min-quantizers=%s", - string_options, arg.val); + strncat(string_options, " min-quantizers=", + sizeof(string_options) - strlen(string_options) - 1); + strncat(string_options, arg.val, + sizeof(string_options) - strlen(string_options) - 1); } else if (arg_match(&arg, &max_q_arg, argi)) { - snprintf(string_options, sizeof(string_options), "%s max-quantizers=%s", - string_options, arg.val); + strncat(string_options, " max-quantizers=", + sizeof(string_options) - strlen(string_options) - 1); + strncat(string_options, arg.val, + sizeof(string_options) - strlen(string_options) - 1); } else if (arg_match(&arg, &min_bitrate_arg, argi)) { min_bitrate = arg_parse_uint(&arg); } else if (arg_match(&arg, &max_bitrate_arg, argi)) { @@ -303,6 +340,12 @@ static void parse_command_line(int argc, const char **argv_, break; } #endif // CONFIG_VP9_HIGHBITDEPTH + } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) { + enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg); + } else if (arg_match(&arg, &tune_content_arg, argi)) { + app_input->tune_content = arg_parse_uint(&arg); + } else if (arg_match(&arg, &inter_layer_pred_arg, argi)) { + app_input->inter_layer_pred = arg_parse_uint(&arg); } else { ++argj; } @@ -358,13 +401,18 @@ static void parse_command_line(int argc, const char **argv_, if (argi[0][0] == '-' && strlen(argi[0]) > 1) die("Error: Unrecognized option %s\n", *argi); - if (argv[0] == NULL || argv[1] == 0) { + if (argv[0] == NULL) { usage_exit(); } - app_input->input_filename = argv[0]; - app_input->output_filename = argv[1]; + app_input->input_ctx.filename = argv[0]; free(argv); + open_input_file(&app_input->input_ctx); + if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) { + enc_cfg->g_w = app_input->input_ctx.width; + enc_cfg->g_h = app_input->input_ctx.height; + } + if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 || enc_cfg->g_h % 2) die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h); @@ -429,8 +477,9 @@ static void set_rate_control_stats(struct RateControlStats *rc, rc->layer_framerate[layer] = framerate / cfg->ts_rate_decimator[tl]; if (tl > 0) { rc->layer_pfb[layer] = - 1000.0 * (cfg->layer_target_bitrate[layer] - - cfg->layer_target_bitrate[layer - 1]) / + 1000.0 * + (cfg->layer_target_bitrate[layer] - + cfg->layer_target_bitrate[layer - 1]) / (rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]); } else { rc->layer_pfb[layer] = 1000.0 * cfg->layer_target_bitrate[layer] / @@ -502,14 +551,13 @@ static void printout_rate_control_summary(struct RateControlStats *rc, printf("Average, rms-variance, and percent-fluct: %f %f %f \n", rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate), perc_fluctuation); - if (frame_cnt != tot_num_frames) - die("Error: Number of input frames not equal to output encoded frames != " - "%d tot_num_frames = %d\n", - frame_cnt, tot_num_frames); + printf("Num of input, num of encoded (super) frames: %d %d \n", frame_cnt, + tot_num_frames); } -vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz, - uint64_t sizes[8], int *count) { +static vpx_codec_err_t parse_superframe_index(const uint8_t *data, + size_t data_sz, uint64_t sizes[8], + int *count) { // A chunk ending with a byte matching 0xc0 is an invalid chunk unless // it is a super frame index. If the last byte of real video compression // data is 0xc0 the encoder must add a 0 byte. If we have the marker but @@ -561,106 +609,386 @@ vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz, // bypass/flexible mode. The pattern corresponds to the pattern // VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in // non-flexible mode. -void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers, - int is_key_frame, - vpx_svc_ref_frame_config_t *ref_frame_config) { +static void set_frame_flags_bypass_mode_ex0( + int tl, int num_spatial_layers, int is_key_frame, + vpx_svc_ref_frame_config_t *ref_frame_config) { + int sl; + for (sl = 0; sl < num_spatial_layers; ++sl) + ref_frame_config->update_buffer_slot[sl] = 0; + for (sl = 0; sl < num_spatial_layers; ++sl) { - if (!tl) { - if (!sl) { - ref_frame_config->frame_flags[sl] = - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | - VP8_EFLAG_NO_UPD_ARF; - } else { - if (is_key_frame) { - ref_frame_config->frame_flags[sl] = - VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; - } else { - ref_frame_config->frame_flags[sl] = - VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; - } - } - } else if (tl == 1) { - if (!sl) { - ref_frame_config->frame_flags[sl] = - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | - VP8_EFLAG_NO_UPD_GF; - } else { - ref_frame_config->frame_flags[sl] = - VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; - } - } + // Set the buffer idx. if (tl == 0) { ref_frame_config->lst_fb_idx[sl] = sl; - if (sl) - ref_frame_config->gld_fb_idx[sl] = sl - 1; - else + if (sl) { + if (is_key_frame) { + ref_frame_config->lst_fb_idx[sl] = sl - 1; + ref_frame_config->gld_fb_idx[sl] = sl; + } else { + ref_frame_config->gld_fb_idx[sl] = sl - 1; + } + } else { ref_frame_config->gld_fb_idx[sl] = 0; + } ref_frame_config->alt_fb_idx[sl] = 0; } else if (tl == 1) { ref_frame_config->lst_fb_idx[sl] = sl; ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1; ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl; } + // Set the reference and update flags. + if (!tl) { + if (!sl) { + // Base spatial and base temporal (sl = 0, tl = 0) + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 0; + ref_frame_config->reference_alt_ref[sl] = 0; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->lst_fb_idx[sl]; + } else { + if (is_key_frame) { + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 0; + ref_frame_config->reference_alt_ref[sl] = 0; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->gld_fb_idx[sl]; + } else { + // Non-zero spatiall layer. + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 1; + ref_frame_config->reference_alt_ref[sl] = 1; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->lst_fb_idx[sl]; + } + } + } else if (tl == 1) { + if (!sl) { + // Base spatial and top temporal (tl = 1) + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 0; + ref_frame_config->reference_alt_ref[sl] = 0; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->alt_fb_idx[sl]; + } else { + // Non-zero spatial. + if (sl < num_spatial_layers - 1) { + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 1; + ref_frame_config->reference_alt_ref[sl] = 0; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->alt_fb_idx[sl]; + } else if (sl == num_spatial_layers - 1) { + // Top spatial and top temporal (non-reference -- doesn't update any + // reference buffers) + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 1; + ref_frame_config->reference_alt_ref[sl] = 0; + } + } + } } } +// Example pattern for 2 spatial layers and 2 temporal layers used in the +// bypass/flexible mode, except only 1 spatial layer when temporal_layer_id = 1. +static void set_frame_flags_bypass_mode_ex1( + int tl, int num_spatial_layers, int is_key_frame, + vpx_svc_ref_frame_config_t *ref_frame_config) { + int sl; + for (sl = 0; sl < num_spatial_layers; ++sl) + ref_frame_config->update_buffer_slot[sl] = 0; + + if (tl == 0) { + if (is_key_frame) { + ref_frame_config->lst_fb_idx[1] = 0; + ref_frame_config->gld_fb_idx[1] = 1; + } else { + ref_frame_config->lst_fb_idx[1] = 1; + ref_frame_config->gld_fb_idx[1] = 0; + } + ref_frame_config->alt_fb_idx[1] = 0; + + ref_frame_config->lst_fb_idx[0] = 0; + ref_frame_config->gld_fb_idx[0] = 0; + ref_frame_config->alt_fb_idx[0] = 0; + } + if (tl == 1) { + ref_frame_config->lst_fb_idx[0] = 0; + ref_frame_config->gld_fb_idx[0] = 1; + ref_frame_config->alt_fb_idx[0] = 2; + + ref_frame_config->lst_fb_idx[1] = 1; + ref_frame_config->gld_fb_idx[1] = 2; + ref_frame_config->alt_fb_idx[1] = 3; + } + // Set the reference and update flags. + if (tl == 0) { + // Base spatial and base temporal (sl = 0, tl = 0) + ref_frame_config->reference_last[0] = 1; + ref_frame_config->reference_golden[0] = 0; + ref_frame_config->reference_alt_ref[0] = 0; + ref_frame_config->update_buffer_slot[0] |= + 1 << ref_frame_config->lst_fb_idx[0]; + + if (is_key_frame) { + ref_frame_config->reference_last[1] = 1; + ref_frame_config->reference_golden[1] = 0; + ref_frame_config->reference_alt_ref[1] = 0; + ref_frame_config->update_buffer_slot[1] |= + 1 << ref_frame_config->gld_fb_idx[1]; + } else { + // Non-zero spatiall layer. + ref_frame_config->reference_last[1] = 1; + ref_frame_config->reference_golden[1] = 1; + ref_frame_config->reference_alt_ref[1] = 1; + ref_frame_config->update_buffer_slot[1] |= + 1 << ref_frame_config->lst_fb_idx[1]; + } + } + if (tl == 1) { + // Top spatial and top temporal (non-reference -- doesn't update any + // reference buffers) + ref_frame_config->reference_last[1] = 1; + ref_frame_config->reference_golden[1] = 0; + ref_frame_config->reference_alt_ref[1] = 0; + } +} + +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE +static void test_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder, + const int frames_out, int *mismatch_seen) { + vpx_image_t enc_img, dec_img; + struct vp9_ref_frame ref_enc, ref_dec; + if (*mismatch_seen) return; + /* Get the internal reference frame */ + ref_enc.idx = 0; + ref_dec.idx = 0; + vpx_codec_control(encoder, VP9_GET_REFERENCE, &ref_enc); + enc_img = ref_enc.img; + vpx_codec_control(decoder, VP9_GET_REFERENCE, &ref_dec); + dec_img = ref_dec.img; +#if CONFIG_VP9_HIGHBITDEPTH + if ((enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) != + (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH)) { + if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + vpx_img_alloc(&enc_img, enc_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH, + enc_img.d_w, enc_img.d_h, 16); + vpx_img_truncate_16_to_8(&enc_img, &ref_enc.img); + } + if (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + vpx_img_alloc(&dec_img, dec_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH, + dec_img.d_w, dec_img.d_h, 16); + vpx_img_truncate_16_to_8(&dec_img, &ref_dec.img); + } + } +#endif + + if (!compare_img(&enc_img, &dec_img)) { + int y[4], u[4], v[4]; +#if CONFIG_VP9_HIGHBITDEPTH + if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + find_mismatch_high(&enc_img, &dec_img, y, u, v); + } else { + find_mismatch(&enc_img, &dec_img, y, u, v); + } +#else + find_mismatch(&enc_img, &dec_img, y, u, v); +#endif + decoder->err = 1; + printf( + "Encode/decode mismatch on frame %d at" + " Y[%d, %d] {%d/%d}," + " U[%d, %d] {%d/%d}," + " V[%d, %d] {%d/%d}\n", + frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1], + v[2], v[3]); + *mismatch_seen = frames_out; + } + + vpx_img_free(&enc_img); + vpx_img_free(&dec_img); +} +#endif + +#if OUTPUT_RC_STATS +static void svc_output_rc_stats( + vpx_codec_ctx_t *codec, vpx_codec_enc_cfg_t *enc_cfg, + vpx_svc_layer_id_t *layer_id, const vpx_codec_cx_pkt_t *cx_pkt, + struct RateControlStats *rc, VpxVideoWriter **outfile, + const uint32_t frame_cnt, const double framerate) { + int num_layers_encoded = 0; + unsigned int sl, tl; + uint64_t sizes[8]; + uint64_t sizes_parsed[8]; + int count = 0; + double sum_bitrate = 0.0; + double sum_bitrate2 = 0.0; + vp9_zero(sizes); + vp9_zero(sizes_parsed); + vpx_codec_control(codec, VP9E_GET_SVC_LAYER_ID, layer_id); + parse_superframe_index(cx_pkt->data.frame.buf, cx_pkt->data.frame.sz, + sizes_parsed, &count); + if (enc_cfg->ss_number_layers == 1) sizes[0] = cx_pkt->data.frame.sz; + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + sizes[sl] = 0; + if (cx_pkt->data.frame.spatial_layer_encoded[sl]) { + sizes[sl] = sizes_parsed[num_layers_encoded]; + num_layers_encoded++; + } + } + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + unsigned int sl2; + uint64_t tot_size = 0; +#if SIMULCAST_MODE + for (sl2 = 0; sl2 < sl; ++sl2) { + if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2]; + } + vpx_video_writer_write_frame(outfile[sl], + (uint8_t *)(cx_pkt->data.frame.buf) + tot_size, + (size_t)(sizes[sl]), cx_pkt->data.frame.pts); +#else + for (sl2 = 0; sl2 <= sl; ++sl2) { + if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2]; + } + if (tot_size > 0) + vpx_video_writer_write_frame(outfile[sl], cx_pkt->data.frame.buf, + (size_t)(tot_size), cx_pkt->data.frame.pts); +#endif // SIMULCAST_MODE + } + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + if (cx_pkt->data.frame.spatial_layer_encoded[sl]) { + for (tl = layer_id->temporal_layer_id; tl < enc_cfg->ts_number_layers; + ++tl) { + const int layer = sl * enc_cfg->ts_number_layers + tl; + ++rc->layer_tot_enc_frames[layer]; + rc->layer_encoding_bitrate[layer] += 8.0 * sizes[sl]; + // Keep count of rate control stats per layer, for non-key + // frames. + if (tl == (unsigned int)layer_id->temporal_layer_id && + !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) { + rc->layer_avg_frame_size[layer] += 8.0 * sizes[sl]; + rc->layer_avg_rate_mismatch[layer] += + fabs(8.0 * sizes[sl] - rc->layer_pfb[layer]) / + rc->layer_pfb[layer]; + ++rc->layer_enc_frames[layer]; + } + } + } + } + + // Update for short-time encoding bitrate states, for moving + // window of size rc->window, shifted by rc->window / 2. + // Ignore first window segment, due to key frame. + if (frame_cnt > (unsigned int)rc->window_size) { + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + if (cx_pkt->data.frame.spatial_layer_encoded[sl]) + sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate; + } + if (frame_cnt % rc->window_size == 0) { + rc->window_count += 1; + rc->avg_st_encoding_bitrate += sum_bitrate / rc->window_size; + rc->variance_st_encoding_bitrate += + (sum_bitrate / rc->window_size) * (sum_bitrate / rc->window_size); + } + } + + // Second shifted window. + if (frame_cnt > (unsigned int)(rc->window_size + rc->window_size / 2)) { + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate; + } + + if (frame_cnt > (unsigned int)(2 * rc->window_size) && + frame_cnt % rc->window_size == 0) { + rc->window_count += 1; + rc->avg_st_encoding_bitrate += sum_bitrate2 / rc->window_size; + rc->variance_st_encoding_bitrate += + (sum_bitrate2 / rc->window_size) * (sum_bitrate2 / rc->window_size); + } + } +} +#endif + int main(int argc, const char **argv) { AppInput app_input; VpxVideoWriter *writer = NULL; VpxVideoInfo info; - vpx_codec_ctx_t codec; + vpx_codec_ctx_t encoder; vpx_codec_enc_cfg_t enc_cfg; SvcContext svc_ctx; + vpx_svc_frame_drop_t svc_drop_frame; uint32_t i; uint32_t frame_cnt = 0; vpx_image_t raw; vpx_codec_err_t res; int pts = 0; /* PTS starts at 0 */ int frame_duration = 1; /* 1 timebase tick per frame */ - FILE *infile = NULL; int end_of_stream = 0; int frames_received = 0; #if OUTPUT_RC_STATS - VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = { NULL }; + VpxVideoWriter *outfile[VPX_SS_MAX_LAYERS] = { NULL }; struct RateControlStats rc; vpx_svc_layer_id_t layer_id; vpx_svc_ref_frame_config_t ref_frame_config; - unsigned int sl, tl; - double sum_bitrate = 0.0; - double sum_bitrate2 = 0.0; + unsigned int sl; double framerate = 30.0; #endif struct vpx_usec_timer timer; int64_t cx_time = 0; +#if CONFIG_INTERNAL_STATS + FILE *f = fopen("opsnr.stt", "a"); +#endif +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE + int mismatch_seen = 0; + vpx_codec_ctx_t decoder; +#endif memset(&svc_ctx, 0, sizeof(svc_ctx)); - svc_ctx.log_print = 1; + memset(&app_input, 0, sizeof(AppInput)); + memset(&info, 0, sizeof(VpxVideoInfo)); + memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t)); + memset(&rc, 0, sizeof(struct RateControlStats)); exec_name = argv[0]; + + /* Setup default input stream settings */ + app_input.input_ctx.framerate.numerator = 30; + app_input.input_ctx.framerate.denominator = 1; + app_input.input_ctx.only_i420 = 1; + app_input.input_ctx.bit_depth = 0; + parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg); + // Y4M reader handles its own allocation. + if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) { // Allocate image buffer #if CONFIG_VP9_HIGHBITDEPTH - if (!vpx_img_alloc(&raw, - enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420 - : VPX_IMG_FMT_I42016, - enc_cfg.g_w, enc_cfg.g_h, 32)) { - die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h); - } + if (!vpx_img_alloc(&raw, + enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420 + : VPX_IMG_FMT_I42016, + enc_cfg.g_w, enc_cfg.g_h, 32)) { + die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h); + } #else - if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) { - die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h); - } + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) { + die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h); + } #endif // CONFIG_VP9_HIGHBITDEPTH - - if (!(infile = fopen(app_input.input_filename, "rb"))) - die("Failed to open %s for reading\n", app_input.input_filename); + } // Initialize codec - if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) != + if (vpx_svc_init(&svc_ctx, &encoder, vpx_codec_vp9_cx(), &enc_cfg) != VPX_CODEC_OK) die("Failed to initialize encoder\n"); +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE + if (vpx_codec_dec_init( + &decoder, get_vpx_decoder_by_name("vp9")->codec_interface(), NULL, 0)) + die("Failed to initialize decoder\n"); +#endif #if OUTPUT_RC_STATS + rc.window_count = 1; + rc.window_size = 15; // Silence a static analysis warning. + rc.avg_st_encoding_bitrate = 0.0; + rc.variance_st_encoding_bitrate = 0.0; if (svc_ctx.output_rc_stat) { set_rate_control_stats(&rc, &enc_cfg); framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num; @@ -668,6 +996,8 @@ int main(int argc, const char **argv) { #endif info.codec_fourcc = VP9_FOURCC; + info.frame_width = enc_cfg.g_w; + info.frame_height = enc_cfg.g_h; info.time_base.numerator = enc_cfg.g_timebase.num; info.time_base.denominator = enc_cfg.g_timebase.den; @@ -679,43 +1009,65 @@ int main(int argc, const char **argv) { die("Failed to open %s for writing\n", app_input.output_filename); } #if OUTPUT_RC_STATS - // For now, just write temporal layer streams. - // TODO(marpan): do spatial by re-writing superframe. + // Write out spatial layer stream. + // TODO(marpan/jianj): allow for writing each spatial and temporal stream. if (svc_ctx.output_rc_stat) { - for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) { + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { char file_name[PATH_MAX]; - snprintf(file_name, sizeof(file_name), "%s_t%d.ivf", - app_input.output_filename, tl); - outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info); - if (!outfile[tl]) die("Failed to open %s for writing", file_name); + snprintf(file_name, sizeof(file_name), "%s_s%d.ivf", + app_input.output_filename, sl); + outfile[sl] = vpx_video_writer_open(file_name, kContainerIVF, &info); + if (!outfile[sl]) die("Failed to open %s for writing", file_name); } } #endif // skip initial frames - for (i = 0; i < app_input.frames_to_skip; ++i) vpx_img_read(&raw, infile); + for (i = 0; i < app_input.frames_to_skip; ++i) + read_frame(&app_input.input_ctx, &raw); if (svc_ctx.speed != -1) - vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed); + vpx_codec_control(&encoder, VP8E_SET_CPUUSED, svc_ctx.speed); if (svc_ctx.threads) { - vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1)); + vpx_codec_control(&encoder, VP9E_SET_TILE_COLUMNS, + get_msb(svc_ctx.threads)); if (svc_ctx.threads > 1) - vpx_codec_control(&codec, VP9E_SET_ROW_MT, 1); + vpx_codec_control(&encoder, VP9E_SET_ROW_MT, 1); else - vpx_codec_control(&codec, VP9E_SET_ROW_MT, 0); + vpx_codec_control(&encoder, VP9E_SET_ROW_MT, 0); } if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1) - vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3); + vpx_codec_control(&encoder, VP9E_SET_AQ_MODE, 3); if (svc_ctx.speed >= 5) - vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); - vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, 900); + vpx_codec_control(&encoder, VP8E_SET_STATIC_THRESHOLD, 1); + vpx_codec_control(&encoder, VP8E_SET_MAX_INTRA_BITRATE_PCT, 900); + + vpx_codec_control(&encoder, VP9E_SET_SVC_INTER_LAYER_PRED, + app_input.inter_layer_pred); + + vpx_codec_control(&encoder, VP9E_SET_NOISE_SENSITIVITY, 0); + + vpx_codec_control(&encoder, VP9E_SET_TUNE_CONTENT, app_input.tune_content); + + svc_drop_frame.framedrop_mode = FULL_SUPERFRAME_DROP; + for (sl = 0; sl < (unsigned int)svc_ctx.spatial_layers; ++sl) + svc_drop_frame.framedrop_thresh[sl] = enc_cfg.rc_dropframe_thresh; + svc_drop_frame.max_consec_drop = INT_MAX; + vpx_codec_control(&encoder, VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame); // Encode frames while (!end_of_stream) { vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *cx_pkt; - if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) { + // Example patterns for bypass/flexible mode: + // example_pattern = 0: 2 temporal layers, and spatial_layers = 1,2,3. Exact + // to fixed SVC patterns. example_pattern = 1: 2 spatial and 2 temporal + // layers, with SL0 only has TL0, and SL1 has both TL0 and TL1. This example + // uses the extended API. + int example_pattern = 0; + if (frame_cnt >= app_input.frames_to_code || + !read_frame(&app_input.input_ctx, &raw)) { // We need one extra vpx_svc_encode call at end of stream to flush // encoder and get remaining data end_of_stream = 1; @@ -723,140 +1075,97 @@ int main(int argc, const char **argv) { // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates) // and the buffer indices for each spatial layer of the current - // (super)frame to be encoded. The temporal layer_id for the current frame - // also needs to be set. + // (super)frame to be encoded. The spatial and temporal layer_id for the + // current frame also needs to be set. // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS" // mode to "VP9E_LAYERING_MODE_BYPASS". if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { layer_id.spatial_layer_id = 0; // Example for 2 temporal layers. - if (frame_cnt % 2 == 0) + if (frame_cnt % 2 == 0) { layer_id.temporal_layer_id = 0; - else + for (i = 0; i < VPX_SS_MAX_LAYERS; i++) + layer_id.temporal_layer_id_per_spatial[i] = 0; + } else { layer_id.temporal_layer_id = 1; - // Note that we only set the temporal layer_id, since we are calling - // the encode for the whole superframe. The encoder will internally loop - // over all the spatial layers for the current superframe. - vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id); - set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id, - svc_ctx.spatial_layers, frame_cnt == 0, - &ref_frame_config); - vpx_codec_control(&codec, VP9E_SET_SVC_REF_FRAME_CONFIG, + for (i = 0; i < VPX_SS_MAX_LAYERS; i++) + layer_id.temporal_layer_id_per_spatial[i] = 1; + } + if (example_pattern == 1) { + // example_pattern 1 is hard-coded for 2 spatial and 2 temporal layers. + assert(svc_ctx.spatial_layers == 2); + assert(svc_ctx.temporal_layers == 2); + if (frame_cnt % 2 == 0) { + // Spatial layer 0 and 1 are encoded. + layer_id.temporal_layer_id_per_spatial[0] = 0; + layer_id.temporal_layer_id_per_spatial[1] = 0; + layer_id.spatial_layer_id = 0; + } else { + // Only spatial layer 1 is encoded here. + layer_id.temporal_layer_id_per_spatial[1] = 1; + layer_id.spatial_layer_id = 1; + } + } + vpx_codec_control(&encoder, VP9E_SET_SVC_LAYER_ID, &layer_id); + // TODO(jianj): Fix the parameter passing for "is_key_frame" in + // set_frame_flags_bypass_model() for case of periodic key frames. + if (example_pattern == 0) { + set_frame_flags_bypass_mode_ex0(layer_id.temporal_layer_id, + svc_ctx.spatial_layers, frame_cnt == 0, + &ref_frame_config); + } else if (example_pattern == 1) { + set_frame_flags_bypass_mode_ex1(layer_id.temporal_layer_id, + svc_ctx.spatial_layers, frame_cnt == 0, + &ref_frame_config); + } + ref_frame_config.duration[0] = frame_duration * 1; + ref_frame_config.duration[1] = frame_duration * 1; + + vpx_codec_control(&encoder, VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config); // Keep track of input frames, to account for frame drops in rate control // stats/metrics. - for (sl = 0; sl < (unsigned int)enc_cfg.ss_number_layers; ++sl) { + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + layer_id.temporal_layer_id]; } + } else { + // For the fixed pattern SVC, temporal layer is given by superframe count. + unsigned int tl = 0; + if (enc_cfg.ts_number_layers == 2) + tl = (frame_cnt % 2 != 0); + else if (enc_cfg.ts_number_layers == 3) { + if (frame_cnt % 2 != 0) tl = 2; + if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0)) tl = 1; + } + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) + ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + tl]; } vpx_usec_timer_start(&timer); res = vpx_svc_encode( - &svc_ctx, &codec, (end_of_stream ? NULL : &raw), pts, frame_duration, + &svc_ctx, &encoder, (end_of_stream ? NULL : &raw), pts, frame_duration, svc_ctx.speed >= 5 ? VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY); vpx_usec_timer_mark(&timer); cx_time += vpx_usec_timer_elapsed(&timer); - printf("%s", vpx_svc_get_message(&svc_ctx)); fflush(stdout); if (res != VPX_CODEC_OK) { - die_codec(&codec, "Failed to encode frame"); + die_codec(&encoder, "Failed to encode frame"); } - while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) { + while ((cx_pkt = vpx_codec_get_cx_data(&encoder, &iter)) != NULL) { switch (cx_pkt->kind) { case VPX_CODEC_CX_FRAME_PKT: { SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal; if (cx_pkt->data.frame.sz > 0) { -#if OUTPUT_RC_STATS - uint64_t sizes[8]; - int count = 0; -#endif vpx_video_writer_write_frame(writer, cx_pkt->data.frame.buf, cx_pkt->data.frame.sz, cx_pkt->data.frame.pts); #if OUTPUT_RC_STATS - // TODO(marpan): Put this (to line728) in separate function. if (svc_ctx.output_rc_stat) { - vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id); - parse_superframe_index(cx_pkt->data.frame.buf, - cx_pkt->data.frame.sz, sizes, &count); - if (enc_cfg.ss_number_layers == 1) - sizes[0] = cx_pkt->data.frame.sz; - // Note computing input_layer_frames here won't account for frame - // drops in rate control stats. - // TODO(marpan): Fix this for non-bypass mode so we can get stats - // for dropped frames. - if (svc_ctx.temporal_layering_mode != - VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { - for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { - ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + - layer_id.temporal_layer_id]; - } - } - for (tl = layer_id.temporal_layer_id; - tl < enc_cfg.ts_number_layers; ++tl) { - vpx_video_writer_write_frame( - outfile[tl], cx_pkt->data.frame.buf, cx_pkt->data.frame.sz, - cx_pkt->data.frame.pts); - } - - for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { - for (tl = layer_id.temporal_layer_id; - tl < enc_cfg.ts_number_layers; ++tl) { - const int layer = sl * enc_cfg.ts_number_layers + tl; - ++rc.layer_tot_enc_frames[layer]; - rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl]; - // Keep count of rate control stats per layer, for non-key - // frames. - if (tl == (unsigned int)layer_id.temporal_layer_id && - !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) { - rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl]; - rc.layer_avg_rate_mismatch[layer] += - fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) / - rc.layer_pfb[layer]; - ++rc.layer_enc_frames[layer]; - } - } - } - - // Update for short-time encoding bitrate states, for moving - // window of size rc->window, shifted by rc->window / 2. - // Ignore first window segment, due to key frame. - if (frame_cnt > (unsigned int)rc.window_size) { - tl = layer_id.temporal_layer_id; - for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { - sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate; - } - if (frame_cnt % rc.window_size == 0) { - rc.window_count += 1; - rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size; - rc.variance_st_encoding_bitrate += - (sum_bitrate / rc.window_size) * - (sum_bitrate / rc.window_size); - sum_bitrate = 0.0; - } - } - - // Second shifted window. - if (frame_cnt > - (unsigned int)(rc.window_size + rc.window_size / 2)) { - tl = layer_id.temporal_layer_id; - for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { - sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate; - } - - if (frame_cnt > (unsigned int)(2 * rc.window_size) && - frame_cnt % rc.window_size == 0) { - rc.window_count += 1; - rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size; - rc.variance_st_encoding_bitrate += - (sum_bitrate2 / rc.window_size) * - (sum_bitrate2 / rc.window_size); - sum_bitrate2 = 0.0; - } - } + svc_output_rc_stats(&encoder, &enc_cfg, &layer_id, cx_pkt, &rc, + outfile, frame_cnt, framerate); } #endif } @@ -868,6 +1177,11 @@ int main(int argc, const char **argv) { if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1) si->bytes_sum[0] += (int)cx_pkt->data.frame.sz; ++frames_received; +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE + if (vpx_codec_decode(&decoder, cx_pkt->data.frame.buf, + (unsigned int)cx_pkt->data.frame.sz, NULL, 0)) + die_codec(&decoder, "Failed to decode frame."); +#endif break; } case VPX_CODEC_STATS_PKT: { @@ -877,6 +1191,19 @@ int main(int argc, const char **argv) { } default: { break; } } + +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE + vpx_codec_control(&encoder, VP9E_GET_SVC_LAYER_ID, &layer_id); + // Don't look for mismatch on top spatial and top temporal layers as they + // are non reference frames. + if ((enc_cfg.ss_number_layers > 1 || enc_cfg.ts_number_layers > 1) && + !(layer_id.temporal_layer_id > 0 && + layer_id.temporal_layer_id == (int)enc_cfg.ts_number_layers - 1 && + cx_pkt->data.frame + .spatial_layer_encoded[enc_cfg.ss_number_layers - 1])) { + test_decode(&encoder, &decoder, frame_cnt, &mismatch_seen); + } +#endif } if (!end_of_stream) { @@ -885,41 +1212,45 @@ int main(int argc, const char **argv) { } } - // Compensate for the extra frame count for the bypass mode. - if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { - for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { - const int layer = - sl * enc_cfg.ts_number_layers + layer_id.temporal_layer_id; - --rc.layer_input_frames[layer]; - } - } - printf("Processed %d frames\n", frame_cnt); - fclose(infile); + + close_input_file(&app_input.input_ctx); + #if OUTPUT_RC_STATS if (svc_ctx.output_rc_stat) { printout_rate_control_summary(&rc, &enc_cfg, frame_cnt); printf("\n"); } #endif - if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + if (vpx_codec_destroy(&encoder)) + die_codec(&encoder, "Failed to destroy codec"); if (app_input.passes == 2) stats_close(&app_input.rc_stats, 1); if (writer) { vpx_video_writer_close(writer); } #if OUTPUT_RC_STATS if (svc_ctx.output_rc_stat) { - for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) { - vpx_video_writer_close(outfile[tl]); + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { + vpx_video_writer_close(outfile[sl]); } } +#endif +#if CONFIG_INTERNAL_STATS + if (mismatch_seen) { + fprintf(f, "First mismatch occurred in frame %d\n", mismatch_seen); + } else { + fprintf(f, "No mismatch detected in recon buffers\n"); + } + fclose(f); #endif printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000), 1000000 * (double)frame_cnt / (double)cx_time); - vpx_img_free(&raw); + if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) { + vpx_img_free(&raw); + } // display average size, psnr - printf("%s", vpx_svc_dump_statistics(&svc_ctx)); + vpx_svc_dump_statistics(&svc_ctx); vpx_svc_release(&svc_ctx); return EXIT_SUCCESS; } diff --git a/media/libvpx/libvpx/examples/vp9cx_set_ref.c b/media/libvpx/libvpx/examples/vp9cx_set_ref.c index 3472689db2f5..911ad38630c6 100644 --- a/media/libvpx/libvpx/examples/vp9cx_set_ref.c +++ b/media/libvpx/libvpx/examples/vp9cx_set_ref.c @@ -68,128 +68,6 @@ void usage_exit() { exit(EXIT_FAILURE); } -static int compare_img(const vpx_image_t *const img1, - const vpx_image_t *const img2) { - uint32_t l_w = img1->d_w; - uint32_t c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; - const uint32_t c_h = - (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; - uint32_t i; - int match = 1; - - match &= (img1->fmt == img2->fmt); - match &= (img1->d_w == img2->d_w); - match &= (img1->d_h == img2->d_h); - - for (i = 0; i < img1->d_h; ++i) - match &= (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y], - img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y], - l_w) == 0); - - for (i = 0; i < c_h; ++i) - match &= (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U], - img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U], - c_w) == 0); - - for (i = 0; i < c_h; ++i) - match &= (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V], - img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V], - c_w) == 0); - - return match; -} - -#define mmin(a, b) ((a) < (b) ? (a) : (b)) -static void find_mismatch(const vpx_image_t *const img1, - const vpx_image_t *const img2, int yloc[4], - int uloc[4], int vloc[4]) { - const uint32_t bsize = 64; - const uint32_t bsizey = bsize >> img1->y_chroma_shift; - const uint32_t bsizex = bsize >> img1->x_chroma_shift; - const uint32_t c_w = - (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; - const uint32_t c_h = - (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; - int match = 1; - uint32_t i, j; - yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; - for (i = 0, match = 1; match && i < img1->d_h; i += bsize) { - for (j = 0; match && j < img1->d_w; j += bsize) { - int k, l; - const int si = mmin(i + bsize, img1->d_h) - i; - const int sj = mmin(j + bsize, img1->d_w) - j; - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(img1->planes[VPX_PLANE_Y] + - (i + k) * img1->stride[VPX_PLANE_Y] + j + l) != - *(img2->planes[VPX_PLANE_Y] + - (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) { - yloc[0] = i + k; - yloc[1] = j + l; - yloc[2] = *(img1->planes[VPX_PLANE_Y] + - (i + k) * img1->stride[VPX_PLANE_Y] + j + l); - yloc[3] = *(img2->planes[VPX_PLANE_Y] + - (i + k) * img2->stride[VPX_PLANE_Y] + j + l); - match = 0; - break; - } - } - } - } - } - - uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; - for (i = 0, match = 1; match && i < c_h; i += bsizey) { - for (j = 0; match && j < c_w; j += bsizex) { - int k, l; - const int si = mmin(i + bsizey, c_h - i); - const int sj = mmin(j + bsizex, c_w - j); - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(img1->planes[VPX_PLANE_U] + - (i + k) * img1->stride[VPX_PLANE_U] + j + l) != - *(img2->planes[VPX_PLANE_U] + - (i + k) * img2->stride[VPX_PLANE_U] + j + l)) { - uloc[0] = i + k; - uloc[1] = j + l; - uloc[2] = *(img1->planes[VPX_PLANE_U] + - (i + k) * img1->stride[VPX_PLANE_U] + j + l); - uloc[3] = *(img2->planes[VPX_PLANE_U] + - (i + k) * img2->stride[VPX_PLANE_U] + j + l); - match = 0; - break; - } - } - } - } - } - vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; - for (i = 0, match = 1; match && i < c_h; i += bsizey) { - for (j = 0; match && j < c_w; j += bsizex) { - int k, l; - const int si = mmin(i + bsizey, c_h - i); - const int sj = mmin(j + bsizex, c_w - j); - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(img1->planes[VPX_PLANE_V] + - (i + k) * img1->stride[VPX_PLANE_V] + j + l) != - *(img2->planes[VPX_PLANE_V] + - (i + k) * img2->stride[VPX_PLANE_V] + j + l)) { - vloc[0] = i + k; - vloc[1] = j + l; - vloc[2] = *(img1->planes[VPX_PLANE_V] + - (i + k) * img1->stride[VPX_PLANE_V] + j + l); - vloc[3] = *(img2->planes[VPX_PLANE_V] + - (i + k) * img2->stride[VPX_PLANE_V] + j + l); - match = 0; - break; - } - } - } - } - } -} - static void testing_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder, unsigned int frame_out, int *mismatch_seen) { vpx_image_t enc_img, dec_img; diff --git a/media/libvpx/libvpx/examples/vpx_dec_fuzzer.cc b/media/libvpx/libvpx/examples/vpx_dec_fuzzer.cc new file mode 100644 index 000000000000..d55fe1571be3 --- /dev/null +++ b/media/libvpx/libvpx/examples/vpx_dec_fuzzer.cc @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Fuzzer for libvpx decoders + * ========================== + * Requirements + * -------------- + * Requires Clang 6.0 or above as -fsanitize=fuzzer is used as a linker + * option. + + * Steps to build + * -------------- + * Clone libvpx repository + $git clone https://chromium.googlesource.com/webm/libvpx + + * Create a directory in parallel to libvpx and change directory + $mkdir vpx_dec_fuzzer + $cd vpx_dec_fuzzer/ + + * Enable sanitizers (Supported: address integer memory thread undefined) + $source ../libvpx/tools/set_analyzer_env.sh address + + * Configure libvpx. + * Note --size-limit and VPX_MAX_ALLOCABLE_MEMORY are defined to avoid + * Out of memory errors when running generated fuzzer binary + $../libvpx/configure --disable-unit-tests --size-limit=12288x12288 \ + --extra-cflags="-fsanitize=fuzzer-no-link \ + -DVPX_MAX_ALLOCABLE_MEMORY=1073741824" \ + --disable-webm-io --enable-debug --disable-vp8-encoder \ + --disable-vp9-encoder --disable-examples + + * Build libvpx + $make -j32 + + * Build vp9 fuzzer + $ $CXX $CXXFLAGS -std=c++11 -DDECODER=vp9 \ + -fsanitize=fuzzer -I../libvpx -I. -Wl,--start-group \ + ../libvpx/examples/vpx_dec_fuzzer.cc -o ./vpx_dec_fuzzer_vp9 \ + ./libvpx.a -Wl,--end-group + + * DECODER should be defined as vp9 or vp8 to enable vp9/vp8 + * + * create a corpus directory and copy some ivf files there. + * Based on which codec (vp8/vp9) is being tested, it is recommended to + * have corresponding ivf files in corpus directory + * Empty corpus directoy also is acceptable, though not recommended + $mkdir CORPUS && cp some-files CORPUS + + * Run fuzzing: + $./vpx_dec_fuzzer_vp9 CORPUS + + * References: + * http://llvm.org/docs/LibFuzzer.html + * https://github.com/google/oss-fuzz + */ + +#include +#include +#include +#include +#include +#include + +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" +#include "vpx_ports/mem_ops.h" + +#define IVF_FRAME_HDR_SZ (4 + 8) /* 4 byte size + 8 byte timestamp */ +#define IVF_FILE_HDR_SZ 32 + +#define VPXD_INTERFACE(name) VPXD_INTERFACE_(name) +#define VPXD_INTERFACE_(name) vpx_codec_##name##_dx() + +extern "C" void usage_exit(void) { exit(EXIT_FAILURE); } + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size <= IVF_FILE_HDR_SZ) { + return 0; + } + + vpx_codec_ctx_t codec; + // Set thread count in the range [1, 64]. + const unsigned int threads = (data[IVF_FILE_HDR_SZ] & 0x3f) + 1; + vpx_codec_dec_cfg_t cfg = { threads, 0, 0 }; + if (vpx_codec_dec_init(&codec, VPXD_INTERFACE(DECODER), &cfg, 0)) { + return 0; + } + + data += IVF_FILE_HDR_SZ; + size -= IVF_FILE_HDR_SZ; + + while (size > IVF_FRAME_HDR_SZ) { + size_t frame_size = mem_get_le32(data); + size -= IVF_FRAME_HDR_SZ; + data += IVF_FRAME_HDR_SZ; + frame_size = std::min(size, frame_size); + + const vpx_codec_err_t err = + vpx_codec_decode(&codec, data, frame_size, nullptr, 0); + static_cast(err); + vpx_codec_iter_t iter = nullptr; + vpx_image_t *img = nullptr; + while ((img = vpx_codec_get_frame(&codec, &iter)) != nullptr) { + } + data += frame_size; + size -= frame_size; + } + vpx_codec_destroy(&codec); + return 0; +} diff --git a/media/libvpx/libvpx/examples/vpx_temporal_svc_encoder.c b/media/libvpx/libvpx/examples/vpx_temporal_svc_encoder.c index f5736ea45d21..925043d10667 100644 --- a/media/libvpx/libvpx/examples/vpx_temporal_svc_encoder.c +++ b/media/libvpx/libvpx/examples/vpx_temporal_svc_encoder.c @@ -19,14 +19,18 @@ #include #include "./vpx_config.h" +#include "./y4minput.h" #include "../vpx_ports/vpx_timer.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" +#include "vpx_ports/bitops.h" #include "../tools_common.h" #include "../video_writer.h" -#define VP8_ROI_MAP 0 +#define ROI_MAP 0 + +#define zero(Dest) memset(&(Dest), 0, sizeof(Dest)); static const char *exec_name; @@ -89,19 +93,21 @@ struct RateControlMetrics { // in the stream. static void set_rate_control_metrics(struct RateControlMetrics *rc, vpx_codec_enc_cfg_t *cfg) { - unsigned int i = 0; + int i = 0; // Set the layer (cumulative) framerate and the target layer (non-cumulative) // per-frame-bandwidth, for the rate control encoding stats below. const double framerate = cfg->g_timebase.den / cfg->g_timebase.num; + const int ts_number_layers = cfg->ts_number_layers; rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0]; rc->layer_pfb[0] = 1000.0 * rc->layer_target_bitrate[0] / rc->layer_framerate[0]; - for (i = 0; i < cfg->ts_number_layers; ++i) { + for (i = 0; i < ts_number_layers; ++i) { if (i > 0) { rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i]; - rc->layer_pfb[i] = 1000.0 * (rc->layer_target_bitrate[i] - - rc->layer_target_bitrate[i - 1]) / - (rc->layer_framerate[i] - rc->layer_framerate[i - 1]); + rc->layer_pfb[i] = + 1000.0 * + (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) / + (rc->layer_framerate[i] - rc->layer_framerate[i - 1]); } rc->layer_input_frames[i] = 0; rc->layer_enc_frames[i] = 0; @@ -114,6 +120,9 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc, rc->window_size = 15; rc->avg_st_encoding_bitrate = 0.0; rc->variance_st_encoding_bitrate = 0.0; + // Target bandwidth for the whole stream. + // Set to layer_target_bitrate for highest layer (total bitrate). + cfg->rc_target_bitrate = rc->layer_target_bitrate[ts_number_layers - 1]; } static void printout_rate_control_summary(struct RateControlMetrics *rc, @@ -164,38 +173,60 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc, die("Error: Number of input frames not equal to output! \n"); } -#if VP8_ROI_MAP -static void vp8_set_roi_map(vpx_codec_enc_cfg_t *cfg, vpx_roi_map_t *roi) { +#if ROI_MAP +static void set_roi_map(const char *enc_name, vpx_codec_enc_cfg_t *cfg, + vpx_roi_map_t *roi) { unsigned int i, j; - memset(roi, 0, sizeof(*roi)); + int block_size = 0; + uint8_t is_vp8 = strncmp(enc_name, "vp8", 3) == 0 ? 1 : 0; + uint8_t is_vp9 = strncmp(enc_name, "vp9", 3) == 0 ? 1 : 0; + if (!is_vp8 && !is_vp9) { + die("unsupported codec."); + } + zero(*roi); + + block_size = is_vp9 && !is_vp8 ? 8 : 16; // ROI is based on the segments (4 for vp8, 8 for vp9), smallest unit for // segment is 16x16 for vp8, 8x8 for vp9. - roi->rows = (cfg->g_h + 15) / 16; - roi->cols = (cfg->g_w + 15) / 16; + roi->rows = (cfg->g_h + block_size - 1) / block_size; + roi->cols = (cfg->g_w + block_size - 1) / block_size; // Applies delta QP on the segment blocks, varies from -63 to 63. // Setting to negative means lower QP (better quality). // Below we set delta_q to the extreme (-63) to show strong effect. - roi->delta_q[0] = 0; + // VP8 uses the first 4 segments. VP9 uses all 8 segments. + zero(roi->delta_q); roi->delta_q[1] = -63; - roi->delta_q[2] = 0; - roi->delta_q[3] = 0; // Applies delta loopfilter strength on the segment blocks, varies from -63 to - // 63. Setting to positive means stronger loopfilter. - roi->delta_lf[0] = 0; - roi->delta_lf[1] = 0; - roi->delta_lf[2] = 0; - roi->delta_lf[3] = 0; + // 63. Setting to positive means stronger loopfilter. VP8 uses the first 4 + // segments. VP9 uses all 8 segments. + zero(roi->delta_lf); - // Applies skip encoding threshold on the segment blocks, varies from 0 to - // UINT_MAX. Larger value means more skipping of encoding is possible. - // This skip threshold only applies on delta frames. - roi->static_threshold[0] = 0; - roi->static_threshold[1] = 0; - roi->static_threshold[2] = 0; - roi->static_threshold[3] = 0; + if (is_vp8) { + // Applies skip encoding threshold on the segment blocks, varies from 0 to + // UINT_MAX. Larger value means more skipping of encoding is possible. + // This skip threshold only applies on delta frames. + zero(roi->static_threshold); + } + + if (is_vp9) { + // Apply skip segment. Setting to 1 means this block will be copied from + // previous frame. + zero(roi->skip); + } + + if (is_vp9) { + // Apply ref frame segment. + // -1 : Do not apply this segment. + // 0 : Froce using intra. + // 1 : Force using last. + // 2 : Force using golden. + // 3 : Force using alfref but not used in non-rd pickmode for 0 lag. + memset(roi->ref_frame, -1, sizeof(roi->ref_frame)); + roi->ref_frame[1] = 1; + } // Use 2 states: 1 is center square, 0 is the rest. roi->roi_map = @@ -563,12 +594,12 @@ int main(int argc, char **argv) { int layering_mode = 0; int layer_flags[VPX_TS_MAX_PERIODICITY] = { 0 }; int flag_periodicity = 1; -#if VP8_ROI_MAP +#if ROI_MAP vpx_roi_map_t roi; #endif - vpx_svc_layer_id_t layer_id = { 0, 0 }; + vpx_svc_layer_id_t layer_id; const VpxInterface *encoder = NULL; - FILE *infile = NULL; + struct VpxInputContext input_ctx; struct RateControlMetrics rc; int64_t cx_time = 0; const int min_args_base = 13; @@ -583,6 +614,15 @@ int main(int argc, char **argv) { double sum_bitrate2 = 0.0; double framerate = 30.0; + zero(rc.layer_target_bitrate); + memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t)); + memset(&input_ctx, 0, sizeof(input_ctx)); + /* Setup default input stream settings */ + input_ctx.framerate.numerator = 30; + input_ctx.framerate.denominator = 1; + input_ctx.only_i420 = 1; + input_ctx.bit_depth = 0; + exec_name = argv[0]; // Check usage and arguments. if (argc < min_args) { @@ -621,6 +661,9 @@ int main(int argc, char **argv) { die("Invalid number of arguments"); } + input_ctx.filename = argv[1]; + open_input_file(&input_ctx); + #if CONFIG_VP9_HIGHBITDEPTH switch (strtol(argv[argc - 1], NULL, 0)) { case 8: @@ -637,14 +680,22 @@ int main(int argc, char **argv) { break; default: die("Invalid bit depth (8, 10, 12) %s", argv[argc - 1]); } - if (!vpx_img_alloc( - &raw, bit_depth == VPX_BITS_8 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016, - width, height, 32)) { - die("Failed to allocate image", width, height); + + // Y4M reader has its own allocation. + if (input_ctx.file_type != FILE_TYPE_Y4M) { + if (!vpx_img_alloc( + &raw, + bit_depth == VPX_BITS_8 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016, + width, height, 32)) { + die("Failed to allocate image", width, height); + } } #else - if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 32)) { - die("Failed to allocate image", width, height); + // Y4M reader has its own allocation. + if (input_ctx.file_type != FILE_TYPE_Y4M) { + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 32)) { + die("Failed to allocate image", width, height); + } } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -675,6 +726,9 @@ int main(int argc, char **argv) { if (speed < 0) { die("Invalid speed setting: must be positive"); } + if (strncmp(encoder->name, "vp9", 3) == 0 && speed > 9) { + warn("Mapping speed %d to speed 9.\n", speed); + } for (i = min_args_base; (int)i < min_args_base + mode_to_num_layers[layering_mode]; ++i) { @@ -722,13 +776,15 @@ int main(int argc, char **argv) { set_rate_control_metrics(&rc, &cfg); - // Target bandwidth for the whole stream. - // Set to layer_target_bitrate for highest layer (total bitrate). - cfg.rc_target_bitrate = rc.layer_target_bitrate[cfg.ts_number_layers - 1]; - - // Open input file. - if (!(infile = fopen(argv[1], "rb"))) { - die("Failed to open %s for reading", argv[1]); + if (input_ctx.file_type == FILE_TYPE_Y4M) { + if (input_ctx.width != cfg.g_w || input_ctx.height != cfg.g_h) { + die("Incorrect width or height: %d x %d", cfg.g_w, cfg.g_h); + } + if (input_ctx.framerate.numerator != cfg.g_timebase.den || + input_ctx.framerate.denominator != cfg.g_timebase.num) { + die("Incorrect framerate: numerator %d denominator %d", + cfg.g_timebase.num, cfg.g_timebase.den); + } } framerate = cfg.g_timebase.den / cfg.g_timebase.num; @@ -766,8 +822,8 @@ int main(int argc, char **argv) { vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kVp8DenoiserOff); vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0); -#if VP8_ROI_MAP - vp8_set_roi_map(&cfg, &roi); +#if ROI_MAP + set_roi_map(encoder->name, &cfg, &roi); if (vpx_codec_control(&codec, VP8E_SET_ROI_MAP, &roi)) die_codec(&codec, "Failed to set ROI map"); #endif @@ -783,10 +839,14 @@ int main(int argc, char **argv) { vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kVp9DenoiserOff); vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0); - vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1)); - // TODO(marpan/jianj): There is an issue with row-mt for low resolutons at - // high speed settings, disable its use for those cases for now. - if (cfg.g_threads > 1 && ((cfg.g_w > 320 && cfg.g_h > 240) || speed < 7)) + vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, get_msb(cfg.g_threads)); +#if ROI_MAP + set_roi_map(encoder->name, &cfg, &roi); + if (vpx_codec_control(&codec, VP9E_SET_ROI_MAP, &roi)) + die_codec(&codec, "Failed to set ROI map"); + vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 0); +#endif + if (cfg.g_threads > 1) vpx_codec_control(&codec, VP9E_SET_ROW_MT, 1); else vpx_codec_control(&codec, VP9E_SET_ROW_MT, 0); @@ -822,6 +882,7 @@ int main(int argc, char **argv) { layer_id.spatial_layer_id = 0; layer_id.temporal_layer_id = cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity]; + layer_id.temporal_layer_id_per_spatial[0] = layer_id.temporal_layer_id; if (strncmp(encoder->name, "vp9", 3) == 0) { vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id); } else if (strncmp(encoder->name, "vp8", 3) == 0) { @@ -830,7 +891,7 @@ int main(int argc, char **argv) { } flags = layer_flags[frame_cnt % flag_periodicity]; if (layering_mode == 0) flags = 0; - frame_avail = vpx_img_read(&raw, infile); + frame_avail = read_frame(&input_ctx, &raw); if (frame_avail) ++rc.layer_input_frames[layer_id.temporal_layer_id]; vpx_usec_timer_start(&timer); if (vpx_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags, @@ -898,7 +959,7 @@ int main(int argc, char **argv) { ++frame_cnt; pts += frame_duration; } - fclose(infile); + close_input_file(&input_ctx); printout_rate_control_summary(&rc, &cfg, frame_cnt); printf("\n"); printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", @@ -910,6 +971,12 @@ int main(int argc, char **argv) { // Try to rewrite the output file headers with the actual frame count. for (i = 0; i < cfg.ts_number_layers; ++i) vpx_video_writer_close(outfile[i]); - vpx_img_free(&raw); + if (input_ctx.file_type != FILE_TYPE_Y4M) { + vpx_img_free(&raw); + } + +#if ROI_MAP + free(roi.roi_map); +#endif return EXIT_SUCCESS; } diff --git a/media/libvpx/libvpx/ivfdec.c b/media/libvpx/libvpx/ivfdec.c index f64e594ab0e6..3e179bc6ed2b 100644 --- a/media/libvpx/libvpx/ivfdec.c +++ b/media/libvpx/libvpx/ivfdec.c @@ -76,12 +76,12 @@ int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read, size_t frame_size = 0; if (fread(raw_header, IVF_FRAME_HDR_SZ, 1, infile) != 1) { - if (!feof(infile)) warn("Failed to read frame size\n"); + if (!feof(infile)) warn("Failed to read frame size"); } else { frame_size = mem_get_le32(raw_header); if (frame_size > 256 * 1024 * 1024) { - warn("Read invalid frame size (%u)\n", (unsigned int)frame_size); + warn("Read invalid frame size (%u)", (unsigned int)frame_size); frame_size = 0; } @@ -92,7 +92,7 @@ int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read, *buffer = new_buffer; *buffer_size = 2 * frame_size; } else { - warn("Failed to allocate compressed data buffer\n"); + warn("Failed to allocate compressed data buffer"); frame_size = 0; } } @@ -100,7 +100,7 @@ int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read, if (!feof(infile)) { if (fread(*buffer, 1, frame_size, infile) != frame_size) { - warn("Failed to read full frame\n"); + warn("Failed to read full frame"); return 1; } diff --git a/media/libvpx/libvpx/ivfdec.h b/media/libvpx/libvpx/ivfdec.h index af725572b48d..847cd79f3fef 100644 --- a/media/libvpx/libvpx/ivfdec.h +++ b/media/libvpx/libvpx/ivfdec.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef IVFDEC_H_ -#define IVFDEC_H_ +#ifndef VPX_IVFDEC_H_ +#define VPX_IVFDEC_H_ #include "./tools_common.h" @@ -25,4 +25,4 @@ int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read, } /* extern "C" */ #endif -#endif // IVFDEC_H_ +#endif // VPX_IVFDEC_H_ diff --git a/media/libvpx/libvpx/ivfenc.h b/media/libvpx/libvpx/ivfenc.h index ebdce47be8f6..483f2d2c591f 100644 --- a/media/libvpx/libvpx/ivfenc.h +++ b/media/libvpx/libvpx/ivfenc.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef IVFENC_H_ -#define IVFENC_H_ +#ifndef VPX_IVFENC_H_ +#define VPX_IVFENC_H_ #include "./tools_common.h" @@ -30,4 +30,4 @@ void ivf_write_frame_size(FILE *outfile, size_t frame_size); } /* extern "C" */ #endif -#endif // IVFENC_H_ +#endif // VPX_IVFENC_H_ diff --git a/media/libvpx/libvpx/libs.doxy_template b/media/libvpx/libvpx/libs.doxy_template index 5a8f847280e1..1eacc8fe2db5 100644 --- a/media/libvpx/libvpx/libs.doxy_template +++ b/media/libvpx/libvpx/libs.doxy_template @@ -943,18 +943,6 @@ GENERATE_XML = NO XML_OUTPUT = xml -# The XML_SCHEMA tag can be used to specify an XML schema, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_SCHEMA = - -# The XML_DTD tag can be used to specify an XML DTD, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_DTD = - # If the XML_PROGRAMLISTING tag is set to YES Doxygen will # dump the program listings (including syntax highlighting # and cross-referencing information) to the XML output. Note that diff --git a/media/libvpx/libvpx/libs.mk b/media/libvpx/libvpx/libs.mk index a3e2f9d0ebdd..569dbdc50bcf 100644 --- a/media/libvpx/libvpx/libs.mk +++ b/media/libvpx/libvpx/libs.mk @@ -11,7 +11,7 @@ # ARM assembly files are written in RVCT-style. We use some make magic to # filter those files to allow GCC compilation -ifeq ($(ARCH_ARM),yes) +ifeq ($(VPX_ARCH_ARM),yes) ASM:=$(if $(filter yes,$(CONFIG_GCC)$(CONFIG_MSVS)),.asm.S,.asm) else ASM:=.asm @@ -88,7 +88,6 @@ ifeq ($(CONFIG_VP9_ENCODER),yes) CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS)) CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h - INSTALL-LIBS-$(CONFIG_SPATIAL_SVC) += include/vpx/svc_context.h INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/% CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h CODEC_DOC_SECTIONS += vp9 vp9_encoder @@ -113,13 +112,6 @@ ifeq ($(CONFIG_DECODERS),yes) CODEC_DOC_SECTIONS += decoder endif -# Suppress -Wextra warnings in third party code. -$(BUILD_PFX)third_party/googletest/%.cc.o: CXXFLAGS += -Wno-missing-field-initializers -# Suppress -Wextra warnings in first party code pending investigation. -# https://bugs.chromium.org/p/webm/issues/detail?id=1069 -$(BUILD_PFX)vp8/encoder/onyx_if.c.o: CFLAGS += -Wno-unknown-warning-option -Wno-clobbered -$(BUILD_PFX)vp8/decoder/onyxd_if.c.o: CFLAGS += -Wno-unknown-warning-option -Wno-clobbered - ifeq ($(CONFIG_MSVS),yes) CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd) GTEST_LIB=$(if $(CONFIG_STATIC_MSVCRT),gtestmt,gtestmd) @@ -147,15 +139,12 @@ CODEC_SRCS-yes += vpx_ports/mem_ops_aligned.h CODEC_SRCS-yes += vpx_ports/vpx_once.h CODEC_SRCS-yes += $(BUILD_PFX)vpx_config.c INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c -ifeq ($(ARCH_X86)$(ARCH_X86_64),yes) +ifeq ($(VPX_ARCH_X86)$(VPX_ARCH_X86_64),yes) INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += vpx_dsp/x86/bitdepth_conversion_sse2.asm endif CODEC_EXPORTS-yes += vpx/exports_com CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc -ifeq ($(CONFIG_SPATIAL_SVC),yes) -CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_spatial_svc -endif CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec INSTALL-LIBS-yes += include/vpx/vpx_codec.h @@ -206,6 +195,8 @@ vpx.def: $(call enabled,CODEC_EXPORTS) --out=$@ $^ CLEAN-OBJS += vpx.def +vpx.$(VCPROJ_SFX): VCPROJ_SRCS=$(filter-out $(addprefix %, $(ASM_INCLUDES)), $^) + vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def @echo " [CREATE] $@" $(qexec)$(GEN_VCPROJ) \ @@ -218,7 +209,15 @@ vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def --ver=$(CONFIG_VS_VERSION) \ --src-path-bare="$(SRC_PATH_BARE)" \ --out=$@ $(CFLAGS) \ - $(filter-out $(addprefix %, $(ASM_INCLUDES)), $^) \ + $(filter $(SRC_PATH_BARE)/vp8/%.c, $(VCPROJ_SRCS)) \ + $(filter $(SRC_PATH_BARE)/vp8/%.h, $(VCPROJ_SRCS)) \ + $(filter $(SRC_PATH_BARE)/vp9/%.c, $(VCPROJ_SRCS)) \ + $(filter $(SRC_PATH_BARE)/vp9/%.h, $(VCPROJ_SRCS)) \ + $(filter $(SRC_PATH_BARE)/vpx/%, $(VCPROJ_SRCS)) \ + $(filter $(SRC_PATH_BARE)/vpx_dsp/%, $(VCPROJ_SRCS)) \ + $(filter-out $(addprefix $(SRC_PATH_BARE)/, \ + vp8/%.c vp8/%.h vp9/%.c vp9/%.h vpx/% vpx_dsp/%), \ + $(VCPROJ_SRCS)) \ --src-path-bare="$(SRC_PATH_BARE)" \ PROJECTS-yes += vpx.$(VCPROJ_SFX) @@ -233,8 +232,8 @@ OBJS-yes += $(LIBVPX_OBJS) LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS) -SO_VERSION_MAJOR := 5 -SO_VERSION_MINOR := 0 +SO_VERSION_MAJOR := 6 +SO_VERSION_MINOR := 2 SO_VERSION_PATCH := 0 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS)) LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib @@ -274,18 +273,6 @@ $(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm $(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(SO_VERSION_MAJOR) $(BUILD_PFX)$(LIBVPX_SO): EXPORTS_FILE = $(EXPORT_FILE) -libvpx.ver: $(call enabled,CODEC_EXPORTS) - @echo " [CREATE] $@" - $(qexec)echo "{ global:" > $@ - $(qexec)for f in $?; do awk '{print $$2";"}' < $$f >>$@; done - $(qexec)echo "local: *; };" >> $@ -CLEAN-OBJS += libvpx.ver - -libvpx.syms: $(call enabled,CODEC_EXPORTS) - @echo " [CREATE] $@" - $(qexec)awk '{print "_"$$2}' $^ >$@ -CLEAN-OBJS += libvpx.syms - libvpx.def: $(call enabled,CODEC_EXPORTS) @echo " [CREATE] $@" $(qexec)echo LIBRARY $(LIBVPX_SO:.dll=) INITINSTANCE TERMINSTANCE > $@ @@ -345,10 +332,22 @@ INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc CLEAN-OBJS += vpx.pc endif +libvpx.ver: $(call enabled,CODEC_EXPORTS) + @echo " [CREATE] $@" + $(qexec)echo "{ global:" > $@ + $(qexec)for f in $?; do awk '{print $$2";"}' < $$f >>$@; done + $(qexec)echo "local: *; };" >> $@ +CLEAN-OBJS += libvpx.ver + +libvpx.syms: $(call enabled,CODEC_EXPORTS) + @echo " [CREATE] $@" + $(qexec)awk '{print "_"$$2}' $^ >$@ +CLEAN-OBJS += libvpx.syms + # # Rule to make assembler configuration file from C configuration file # -ifeq ($(ARCH_X86)$(ARCH_X86_64),yes) +ifeq ($(VPX_ARCH_X86)$(VPX_ARCH_X86_64),yes) # YASM $(BUILD_PFX)vpx_config.asm: $(BUILD_PFX)vpx_config.h @echo " [CREATE] $@" diff --git a/media/libvpx/libvpx/mainpage.dox b/media/libvpx/libvpx/mainpage.dox index ec202fa4fb50..4b0dff08710d 100644 --- a/media/libvpx/libvpx/mainpage.dox +++ b/media/libvpx/libvpx/mainpage.dox @@ -25,8 +25,10 @@ release. - The \ref readme contains instructions on recompiling the sample applications. - Read the \ref usage "usage" for a narrative on codec usage. + \if samples - Read the \ref samples "sample code" for examples of how to interact with the codec. + \endif - \ref codec reference \if encoder - \ref encoder reference diff --git a/media/libvpx/libvpx/md5_utils.c b/media/libvpx/libvpx/md5_utils.c index 093798b83398..9ddb104c8a61 100644 --- a/media/libvpx/libvpx/md5_utils.c +++ b/media/libvpx/libvpx/md5_utils.c @@ -163,7 +163,7 @@ void MD5Final(md5byte digest[16], struct MD5Context *ctx) { */ VPX_NO_UNSIGNED_OVERFLOW_CHECK void MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) { - register UWORD32 a, b, c, d; + UWORD32 a, b, c, d; a = buf[0]; b = buf[1]; diff --git a/media/libvpx/libvpx/md5_utils.h b/media/libvpx/libvpx/md5_utils.h index bd4991b3ad96..e0d5a2d1fbd4 100644 --- a/media/libvpx/libvpx/md5_utils.h +++ b/media/libvpx/libvpx/md5_utils.h @@ -20,8 +20,8 @@ * Still in the public domain. */ -#ifndef MD5_UTILS_H_ -#define MD5_UTILS_H_ +#ifndef VPX_MD5_UTILS_H_ +#define VPX_MD5_UTILS_H_ #ifdef __cplusplus extern "C" { @@ -46,4 +46,4 @@ void MD5Transform(UWORD32 buf[4], UWORD32 const in[16]); } // extern "C" #endif -#endif // MD5_UTILS_H_ +#endif // VPX_MD5_UTILS_H_ diff --git a/media/libvpx/libvpx/rate_hist.h b/media/libvpx/libvpx/rate_hist.h index 00a1676a6177..d6a4c685195d 100644 --- a/media/libvpx/libvpx/rate_hist.h +++ b/media/libvpx/libvpx/rate_hist.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef RATE_HIST_H_ -#define RATE_HIST_H_ +#ifndef VPX_RATE_HIST_H_ +#define VPX_RATE_HIST_H_ #include "vpx/vpx_encoder.h" @@ -37,4 +37,4 @@ void show_rate_histogram(struct rate_hist *hist, const vpx_codec_enc_cfg_t *cfg, } // extern "C" #endif -#endif // RATE_HIST_H_ +#endif // VPX_RATE_HIST_H_ diff --git a/media/libvpx/libvpx/test/acm_random.h b/media/libvpx/libvpx/test/acm_random.h index d915cf91336c..3458340a124a 100644 --- a/media/libvpx/libvpx/test/acm_random.h +++ b/media/libvpx/libvpx/test/acm_random.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_ACM_RANDOM_H_ -#define TEST_ACM_RANDOM_H_ +#ifndef VPX_TEST_ACM_RANDOM_H_ +#define VPX_TEST_ACM_RANDOM_H_ #include @@ -34,6 +34,23 @@ class ACMRandom { return (value >> 15) & 0xffff; } + int32_t Rand20Signed(void) { + // Use 20 bits: values between 524287 and -524288. + const uint32_t value = random_.Generate(1048576); + return static_cast(value) - 524288; + } + + int16_t Rand16Signed(void) { + // Use 16 bits: values between 32767 and -32768. + return static_cast(random_.Generate(65536)); + } + + int16_t Rand13Signed(void) { + // Use 13 bits: values between 4095 and -4096. + const uint32_t value = random_.Generate(8192); + return static_cast(value) - 4096; + } + int16_t Rand9Signed(void) { // Use 9 bits: values between 255 (0x0FF) and -256 (0x100). const uint32_t value = random_.Generate(512); @@ -51,7 +68,7 @@ class ACMRandom { // Returns a random value near 0 or near 255, to better exercise // saturation behavior. const uint8_t r = Rand8(); - return r < 128 ? r << 4 : r >> 4; + return static_cast((r < 128) ? r << 4 : r >> 4); } uint32_t RandRange(const uint32_t range) { @@ -73,4 +90,4 @@ class ACMRandom { } // namespace libvpx_test -#endif // TEST_ACM_RANDOM_H_ +#endif // VPX_TEST_ACM_RANDOM_H_ diff --git a/media/libvpx/libvpx/test/active_map_refresh_test.cc b/media/libvpx/libvpx/test/active_map_refresh_test.cc index d893635505ca..a985ed4f1190 100644 --- a/media/libvpx/libvpx/test/active_map_refresh_test.cc +++ b/media/libvpx/libvpx/test/active_map_refresh_test.cc @@ -74,7 +74,7 @@ class ActiveMapRefreshTest ::libvpx_test::Encoder *encoder) { ::libvpx_test::Y4mVideoSource *y4m_video = static_cast(video); - if (video->frame() == 1) { + if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, cpu_used_); encoder->Control(VP9E_SET_AQ_MODE, kAqModeCyclicRefresh); } else if (video->frame() >= 2 && video->img()) { diff --git a/media/libvpx/libvpx/test/active_map_test.cc b/media/libvpx/libvpx/test/active_map_test.cc index 1d24f956f59e..03536c81ef48 100644 --- a/media/libvpx/libvpx/test/active_map_test.cc +++ b/media/libvpx/libvpx/test/active_map_test.cc @@ -35,7 +35,7 @@ class ActiveMapTest virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { - if (video->frame() == 1) { + if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, cpu_used_); } else if (video->frame() == 3) { vpx_active_map_t map = vpx_active_map_t(); diff --git a/media/libvpx/libvpx/test/add_noise_test.cc b/media/libvpx/libvpx/test/add_noise_test.cc index eae32c33bb8b..0d1893c524a6 100644 --- a/media/libvpx/libvpx/test/add_noise_test.cc +++ b/media/libvpx/libvpx/test/add_noise_test.cc @@ -8,8 +8,11 @@ * be found in the AUTHORS file in the root of the source tree. */ #include +#include + #include "test/clear_system_state.h" #include "test/register_state_check.h" +#include "test/util.h" #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" @@ -25,7 +28,10 @@ typedef void (*AddNoiseFunc)(uint8_t *start, const int8_t *noise, int blackclamp, int whiteclamp, int width, int height, int pitch); -class AddNoiseTest : public ::testing::TestWithParam { +typedef std::tuple AddNoiseTestFPParam; + +class AddNoiseTest : public ::testing::Test, + public ::testing::WithParamInterface { public: virtual void TearDown() { libvpx_test::ClearSystemState(); } virtual ~AddNoiseTest() {} @@ -44,14 +50,14 @@ TEST_P(AddNoiseTest, CheckNoiseAdded) { const int height = 64; const int image_size = width * height; int8_t noise[kNoiseSize]; - const int clamp = vpx_setup_noise(4.4, noise, kNoiseSize); + const int clamp = vpx_setup_noise(GET_PARAM(0), noise, kNoiseSize); uint8_t *const s = reinterpret_cast(vpx_calloc(image_size, sizeof(*s))); ASSERT_TRUE(s != NULL); memset(s, 99, image_size * sizeof(*s)); ASM_REGISTER_STATE_CHECK( - GetParam()(s, noise, clamp, clamp, width, height, width)); + GET_PARAM(1)(s, noise, clamp, clamp, width, height, width)); // Check to make sure we don't end up having either the same or no added // noise either vertically or horizontally. @@ -70,7 +76,7 @@ TEST_P(AddNoiseTest, CheckNoiseAdded) { memset(s, 255, image_size); ASM_REGISTER_STATE_CHECK( - GetParam()(s, noise, clamp, clamp, width, height, width)); + GET_PARAM(1)(s, noise, clamp, clamp, width, height, width)); // Check to make sure don't roll over. for (int i = 0; i < image_size; ++i) { @@ -81,7 +87,7 @@ TEST_P(AddNoiseTest, CheckNoiseAdded) { memset(s, 0, image_size); ASM_REGISTER_STATE_CHECK( - GetParam()(s, noise, clamp, clamp, width, height, width)); + GET_PARAM(1)(s, noise, clamp, clamp, width, height, width)); // Check to make sure don't roll under. for (int i = 0; i < image_size; ++i) { @@ -108,7 +114,7 @@ TEST_P(AddNoiseTest, CheckCvsAssembly) { srand(0); ASM_REGISTER_STATE_CHECK( - GetParam()(s, noise, clamp, clamp, width, height, width)); + GET_PARAM(1)(s, noise, clamp, clamp, width, height, width)); srand(0); ASM_REGISTER_STATE_CHECK( vpx_plane_add_noise_c(d, noise, clamp, clamp, width, height, width)); @@ -121,16 +127,24 @@ TEST_P(AddNoiseTest, CheckCvsAssembly) { vpx_free(s); } -INSTANTIATE_TEST_CASE_P(C, AddNoiseTest, - ::testing::Values(vpx_plane_add_noise_c)); +using std::make_tuple; + +INSTANTIATE_TEST_CASE_P( + C, AddNoiseTest, + ::testing::Values(make_tuple(3.25, vpx_plane_add_noise_c), + make_tuple(4.4, vpx_plane_add_noise_c))); #if HAVE_SSE2 -INSTANTIATE_TEST_CASE_P(SSE2, AddNoiseTest, - ::testing::Values(vpx_plane_add_noise_sse2)); +INSTANTIATE_TEST_CASE_P( + SSE2, AddNoiseTest, + ::testing::Values(make_tuple(3.25, vpx_plane_add_noise_sse2), + make_tuple(4.4, vpx_plane_add_noise_sse2))); #endif #if HAVE_MSA -INSTANTIATE_TEST_CASE_P(MSA, AddNoiseTest, - ::testing::Values(vpx_plane_add_noise_msa)); +INSTANTIATE_TEST_CASE_P( + MSA, AddNoiseTest, + ::testing::Values(make_tuple(3.25, vpx_plane_add_noise_msa), + make_tuple(4.4, vpx_plane_add_noise_msa))); #endif } // namespace diff --git a/media/libvpx/libvpx/test/alt_ref_aq_segment_test.cc b/media/libvpx/libvpx/test/alt_ref_aq_segment_test.cc index 64a3011eb994..6e03a478525f 100644 --- a/media/libvpx/libvpx/test/alt_ref_aq_segment_test.cc +++ b/media/libvpx/libvpx/test/alt_ref_aq_segment_test.cc @@ -32,7 +32,7 @@ class AltRefAqSegmentTest virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { - if (video->frame() == 1) { + if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); encoder->Control(VP9E_SET_ALT_REF_AQ, alt_ref_aq_mode_); encoder->Control(VP9E_SET_AQ_MODE, aq_mode_); diff --git a/media/libvpx/libvpx/test/altref_test.cc b/media/libvpx/libvpx/test/altref_test.cc index f9308c2717a8..0119be4da0ab 100644 --- a/media/libvpx/libvpx/test/altref_test.cc +++ b/media/libvpx/libvpx/test/altref_test.cc @@ -35,7 +35,7 @@ class AltRefTest : public ::libvpx_test::EncoderTest, virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { - if (video->frame() == 1) { + if (video->frame() == 0) { encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_CPUUSED, 3); } diff --git a/media/libvpx/libvpx/test/android/README b/media/libvpx/libvpx/test/android/README index 4a1adcf7f48d..f67fea50c7b8 100644 --- a/media/libvpx/libvpx/test/android/README +++ b/media/libvpx/libvpx/test/android/README @@ -3,15 +3,16 @@ Android.mk will build vpx unittests on android. ./libvpx/configure --target=armv7-android-gcc --enable-external-build \ --enable-postproc --disable-install-srcs --enable-multi-res-encoding \ --enable-temporal-denoising --disable-unit-tests --disable-install-docs \ - --disable-examples --disable-runtime-cpu-detect --sdk-path=$NDK + --disable-examples --disable-runtime-cpu-detect 2) From the parent directory, invoke ndk-build: NDK_PROJECT_PATH=. ndk-build APP_BUILD_SCRIPT=./libvpx/test/android/Android.mk \ APP_ABI=armeabi-v7a APP_PLATFORM=android-18 APP_OPTIM=release \ - APP_STL=gnustl_static + APP_STL=c++_static -Note: Both adb and ndk-build are available prebuilt at: - https://chromium.googlesource.com/android_tools +Note: Both adb and ndk-build are available at: + https://developer.android.com/studio#downloads + https://developer.android.com/ndk/downloads 3) Run get_files.py to download the test files: python get_files.py -i /path/to/test-data.sha1 -o /path/to/put/files \ diff --git a/media/libvpx/libvpx/test/aq_segment_test.cc b/media/libvpx/libvpx/test/aq_segment_test.cc index 1c2147fbb2c9..3c4053be7f31 100644 --- a/media/libvpx/libvpx/test/aq_segment_test.cc +++ b/media/libvpx/libvpx/test/aq_segment_test.cc @@ -31,7 +31,7 @@ class AqSegmentTest virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { - if (video->frame() == 1) { + if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); encoder->Control(VP9E_SET_AQ_MODE, aq_mode_); encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 100); diff --git a/media/libvpx/libvpx/test/avg_test.cc b/media/libvpx/libvpx/test/avg_test.cc index ad21198e4b43..72e16f657813 100644 --- a/media/libvpx/libvpx/test/avg_test.cc +++ b/media/libvpx/libvpx/test/avg_test.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -22,40 +23,43 @@ #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" +#include "vpx/vpx_codec.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/vpx_timer.h" using libvpx_test::ACMRandom; namespace { + +template class AverageTestBase : public ::testing::Test { public: - AverageTestBase(int width, int height) : width_(width), height_(height) {} + AverageTestBase(int width, int height) + : width_(width), height_(height), source_data_(NULL), source_stride_(0), + bit_depth_(8) {} - static void SetUpTestCase() { - source_data_ = reinterpret_cast( - vpx_memalign(kDataAlignment, kDataBlockSize)); - } - - static void TearDownTestCase() { + virtual void TearDown() { vpx_free(source_data_); source_data_ = NULL; + libvpx_test::ClearSystemState(); } - virtual void TearDown() { libvpx_test::ClearSystemState(); } - protected: // Handle blocks up to 4 blocks 64x64 with stride up to 128 static const int kDataAlignment = 16; static const int kDataBlockSize = 64 * 128; virtual void SetUp() { + source_data_ = reinterpret_cast( + vpx_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0]))); + ASSERT_TRUE(source_data_ != NULL); source_stride_ = (width_ + 31) & ~31; + bit_depth_ = 8; rnd_.Reset(ACMRandom::DeterministicSeed()); } // Sum Pixels - static unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) { + static unsigned int ReferenceAverage8x8(const Pixel *source, int pitch) { unsigned int average = 0; for (int h = 0; h < 8; ++h) { for (int w = 0; w < 8; ++w) average += source[h * pitch + w]; @@ -63,7 +67,7 @@ class AverageTestBase : public ::testing::Test { return ((average + 32) >> 6); } - static unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) { + static unsigned int ReferenceAverage4x4(const Pixel *source, int pitch) { unsigned int average = 0; for (int h = 0; h < 4; ++h) { for (int w = 0; w < 4; ++w) average += source[h * pitch + w]; @@ -71,7 +75,7 @@ class AverageTestBase : public ::testing::Test { return ((average + 8) >> 4); } - void FillConstant(uint8_t fill_constant) { + void FillConstant(Pixel fill_constant) { for (int i = 0; i < width_ * height_; ++i) { source_data_[i] = fill_constant; } @@ -79,21 +83,22 @@ class AverageTestBase : public ::testing::Test { void FillRandom() { for (int i = 0; i < width_ * height_; ++i) { - source_data_[i] = rnd_.Rand8(); + source_data_[i] = rnd_.Rand16() & ((1 << bit_depth_) - 1); } } int width_, height_; - static uint8_t *source_data_; + Pixel *source_data_; int source_stride_; + int bit_depth_; ACMRandom rnd_; }; typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch); -typedef std::tr1::tuple AvgFunc; +typedef std::tuple AvgFunc; -class AverageTest : public AverageTestBase, +class AverageTest : public AverageTestBase, public ::testing::WithParamInterface { public: AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {} @@ -119,12 +124,40 @@ class AverageTest : public AverageTestBase, } }; +#if CONFIG_VP9_HIGHBITDEPTH +class AverageTestHBD : public AverageTestBase, + public ::testing::WithParamInterface { + public: + AverageTestHBD() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {} + + protected: + void CheckAverages() { + const int block_size = GET_PARAM(3); + unsigned int expected = 0; + if (block_size == 8) { + expected = + ReferenceAverage8x8(source_data_ + GET_PARAM(2), source_stride_); + } else if (block_size == 4) { + expected = + ReferenceAverage4x4(source_data_ + GET_PARAM(2), source_stride_); + } + + ASM_REGISTER_STATE_CHECK(GET_PARAM(4)( + CONVERT_TO_BYTEPTR(source_data_ + GET_PARAM(2)), source_stride_)); + unsigned int actual = GET_PARAM(4)( + CONVERT_TO_BYTEPTR(source_data_ + GET_PARAM(2)), source_stride_); + + EXPECT_EQ(expected, actual); + } +}; +#endif // CONFIG_VP9_HIGHBITDEPTH + typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref, const int ref_stride, const int height); -typedef std::tr1::tuple IntProRowParam; +typedef std::tuple IntProRowParam; -class IntProRowTest : public AverageTestBase, +class IntProRowTest : public AverageTestBase, public ::testing::WithParamInterface { public: IntProRowTest() @@ -135,6 +168,10 @@ class IntProRowTest : public AverageTestBase, protected: virtual void SetUp() { + source_data_ = reinterpret_cast( + vpx_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0]))); + ASSERT_TRUE(source_data_ != NULL); + hbuf_asm_ = reinterpret_cast( vpx_memalign(kDataAlignment, sizeof(*hbuf_asm_) * 16)); hbuf_c_ = reinterpret_cast( @@ -142,6 +179,8 @@ class IntProRowTest : public AverageTestBase, } virtual void TearDown() { + vpx_free(source_data_); + source_data_ = NULL; vpx_free(hbuf_c_); hbuf_c_ = NULL; vpx_free(hbuf_asm_); @@ -164,9 +203,9 @@ class IntProRowTest : public AverageTestBase, typedef int16_t (*IntProColFunc)(uint8_t const *ref, const int width); -typedef std::tr1::tuple IntProColParam; +typedef std::tuple IntProColParam; -class IntProColTest : public AverageTestBase, +class IntProColTest : public AverageTestBase, public ::testing::WithParamInterface { public: IntProColTest() : AverageTestBase(GET_PARAM(0), 1), sum_asm_(0), sum_c_(0) { @@ -189,7 +228,7 @@ class IntProColTest : public AverageTestBase, }; typedef int (*SatdFunc)(const tran_low_t *coeffs, int length); -typedef std::tr1::tuple SatdTestParam; +typedef std::tuple SatdTestParam; class SatdTest : public ::testing::Test, public ::testing::WithParamInterface { @@ -212,12 +251,7 @@ class SatdTest : public ::testing::Test, for (int i = 0; i < satd_size_; ++i) src_[i] = val; } - void FillRandom() { - for (int i = 0; i < satd_size_; ++i) { - const int16_t tmp = rnd_.Rand16(); - src_[i] = (tran_low_t)tmp; - } - } + virtual void FillRandom() = 0; void Check(const int expected) { int total; @@ -225,17 +259,29 @@ class SatdTest : public ::testing::Test, EXPECT_EQ(expected, total); } + tran_low_t *GetCoeff() const { return src_; } + int satd_size_; + ACMRandom rnd_; + tran_low_t *src_; private: - tran_low_t *src_; SatdFunc satd_func_; - ACMRandom rnd_; +}; + +class SatdLowbdTest : public SatdTest { + protected: + virtual void FillRandom() { + for (int i = 0; i < satd_size_; ++i) { + const int16_t tmp = rnd_.Rand16Signed(); + src_[i] = (tran_low_t)tmp; + } + } }; typedef int64_t (*BlockErrorFunc)(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size); -typedef std::tr1::tuple BlockErrorTestFPParam; +typedef std::tuple BlockErrorTestFPParam; class BlockErrorTestFP : public ::testing::Test, @@ -279,6 +325,10 @@ class BlockErrorTestFP EXPECT_EQ(expected, total); } + tran_low_t *GetCoeff() const { return coeff_; } + + tran_low_t *GetDQCoeff() const { return dqcoeff_; } + int txfm_size_; private: @@ -288,8 +338,6 @@ class BlockErrorTestFP ACMRandom rnd_; }; -uint8_t *AverageTestBase::source_data_ = NULL; - TEST_P(AverageTest, MinValue) { FillConstant(0); CheckAverages(); @@ -308,6 +356,27 @@ TEST_P(AverageTest, Random) { CheckAverages(); } } +#if CONFIG_VP9_HIGHBITDEPTH +TEST_P(AverageTestHBD, MinValue) { + FillConstant(0); + CheckAverages(); +} + +TEST_P(AverageTestHBD, MaxValue) { + FillConstant((1 << VPX_BITS_12) - 1); + CheckAverages(); +} + +TEST_P(AverageTestHBD, Random) { + bit_depth_ = VPX_BITS_12; + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + for (int i = 0; i < 1000; i++) { + FillRandom(); + CheckAverages(); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH TEST_P(IntProRowTest, MinValue) { FillConstant(0); @@ -339,27 +408,27 @@ TEST_P(IntProColTest, Random) { RunComparison(); } -TEST_P(SatdTest, MinValue) { +TEST_P(SatdLowbdTest, MinValue) { const int kMin = -32640; const int expected = -kMin * satd_size_; FillConstant(kMin); Check(expected); } -TEST_P(SatdTest, MaxValue) { +TEST_P(SatdLowbdTest, MaxValue) { const int kMax = 32640; const int expected = kMax * satd_size_; FillConstant(kMax); Check(expected); } -TEST_P(SatdTest, Random) { +TEST_P(SatdLowbdTest, Random) { int expected; switch (satd_size_) { - case 16: expected = 205298; break; - case 64: expected = 1113950; break; - case 256: expected = 4268415; break; - case 1024: expected = 16954082; break; + case 16: expected = 261036; break; + case 64: expected = 991732; break; + case 256: expected = 4136358; break; + case 1024: expected = 16677592; break; default: FAIL() << "Invalid satd size (" << satd_size_ << ") valid: 16/64/256/1024"; @@ -368,11 +437,12 @@ TEST_P(SatdTest, Random) { Check(expected); } -TEST_P(SatdTest, DISABLED_Speed) { +TEST_P(SatdLowbdTest, DISABLED_Speed) { const int kCountSpeedTestBlock = 20000; vpx_usec_timer timer; - DECLARE_ALIGNED(16, tran_low_t, coeff[1024]); const int blocksize = GET_PARAM(0); + FillRandom(); + tran_low_t *coeff = GetCoeff(); vpx_usec_timer_start(&timer); for (int i = 0; i < kCountSpeedTestBlock; ++i) { @@ -383,6 +453,62 @@ TEST_P(SatdTest, DISABLED_Speed) { printf("blocksize: %4d time: %4d us\n", blocksize, elapsed_time); } +#if CONFIG_VP9_HIGHBITDEPTH +class SatdHighbdTest : public SatdTest { + protected: + virtual void FillRandom() { + for (int i = 0; i < satd_size_; ++i) { + src_[i] = rnd_.Rand20Signed(); + } + } +}; + +TEST_P(SatdHighbdTest, MinValue) { + const int kMin = -524280; + const int expected = -kMin * satd_size_; + FillConstant(kMin); + Check(expected); +} + +TEST_P(SatdHighbdTest, MaxValue) { + const int kMax = 524280; + const int expected = kMax * satd_size_; + FillConstant(kMax); + Check(expected); +} + +TEST_P(SatdHighbdTest, Random) { + int expected; + switch (satd_size_) { + case 16: expected = 5249712; break; + case 64: expected = 18362120; break; + case 256: expected = 66100520; break; + case 1024: expected = 266094734; break; + default: + FAIL() << "Invalid satd size (" << satd_size_ + << ") valid: 16/64/256/1024"; + } + FillRandom(); + Check(expected); +} + +TEST_P(SatdHighbdTest, DISABLED_Speed) { + const int kCountSpeedTestBlock = 20000; + vpx_usec_timer timer; + const int blocksize = GET_PARAM(0); + FillRandom(); + tran_low_t *coeff = GetCoeff(); + + vpx_usec_timer_start(&timer); + for (int i = 0; i < kCountSpeedTestBlock; ++i) { + GET_PARAM(1)(coeff, blocksize); + } + vpx_usec_timer_mark(&timer); + const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); + printf("blocksize: %4d time: %4d us\n", blocksize, elapsed_time); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + TEST_P(BlockErrorTestFP, MinValue) { const int64_t kMin = -32640; const int64_t expected = kMin * kMin * txfm_size_; @@ -415,9 +541,10 @@ TEST_P(BlockErrorTestFP, Random) { TEST_P(BlockErrorTestFP, DISABLED_Speed) { const int kCountSpeedTestBlock = 20000; vpx_usec_timer timer; - DECLARE_ALIGNED(16, tran_low_t, coeff[1024]); - DECLARE_ALIGNED(16, tran_low_t, dqcoeff[1024]); const int blocksize = GET_PARAM(0); + FillRandom(); + tran_low_t *coeff = GetCoeff(); + tran_low_t *dqcoeff = GetDQCoeff(); vpx_usec_timer_start(&timer); for (int i = 0; i < kCountSpeedTestBlock; ++i) { @@ -428,14 +555,34 @@ TEST_P(BlockErrorTestFP, DISABLED_Speed) { printf("blocksize: %4d time: %4d us\n", blocksize, elapsed_time); } -using std::tr1::make_tuple; +using std::make_tuple; INSTANTIATE_TEST_CASE_P( C, AverageTest, ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_avg_8x8_c), make_tuple(16, 16, 1, 4, &vpx_avg_4x4_c))); -INSTANTIATE_TEST_CASE_P(C, SatdTest, +#if CONFIG_VP9_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + C, AverageTestHBD, + ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_highbd_avg_8x8_c), + make_tuple(16, 16, 1, 4, &vpx_highbd_avg_4x4_c))); + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, AverageTestHBD, + ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_highbd_avg_8x8_sse2), + make_tuple(16, 16, 1, 4, &vpx_highbd_avg_4x4_sse2))); +#endif // HAVE_SSE2 + +INSTANTIATE_TEST_CASE_P(C, SatdHighbdTest, + ::testing::Values(make_tuple(16, &vpx_satd_c), + make_tuple(64, &vpx_satd_c), + make_tuple(256, &vpx_satd_c), + make_tuple(1024, &vpx_satd_c))); +#endif // CONFIG_VP9_HIGHBITDEPTH + +INSTANTIATE_TEST_CASE_P(C, SatdLowbdTest, ::testing::Values(make_tuple(16, &vpx_satd_c), make_tuple(64, &vpx_satd_c), make_tuple(256, &vpx_satd_c), @@ -472,7 +619,7 @@ INSTANTIATE_TEST_CASE_P( make_tuple(64, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c))); -INSTANTIATE_TEST_CASE_P(SSE2, SatdTest, +INSTANTIATE_TEST_CASE_P(SSE2, SatdLowbdTest, ::testing::Values(make_tuple(16, &vpx_satd_sse2), make_tuple(64, &vpx_satd_sse2), make_tuple(256, &vpx_satd_sse2), @@ -487,12 +634,21 @@ INSTANTIATE_TEST_CASE_P( #endif // HAVE_SSE2 #if HAVE_AVX2 -INSTANTIATE_TEST_CASE_P(AVX2, SatdTest, +INSTANTIATE_TEST_CASE_P(AVX2, SatdLowbdTest, ::testing::Values(make_tuple(16, &vpx_satd_avx2), make_tuple(64, &vpx_satd_avx2), make_tuple(256, &vpx_satd_avx2), make_tuple(1024, &vpx_satd_avx2))); +#if CONFIG_VP9_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + AVX2, SatdHighbdTest, + ::testing::Values(make_tuple(16, &vpx_highbd_satd_avx2), + make_tuple(64, &vpx_highbd_satd_avx2), + make_tuple(256, &vpx_highbd_satd_avx2), + make_tuple(1024, &vpx_highbd_satd_avx2))); +#endif // CONFIG_VP9_HIGHBITDEPTH + INSTANTIATE_TEST_CASE_P( AVX2, BlockErrorTestFP, ::testing::Values(make_tuple(16, &vp9_block_error_fp_avx2), @@ -525,7 +681,7 @@ INSTANTIATE_TEST_CASE_P( make_tuple(64, &vpx_int_pro_col_neon, &vpx_int_pro_col_c))); -INSTANTIATE_TEST_CASE_P(NEON, SatdTest, +INSTANTIATE_TEST_CASE_P(NEON, SatdLowbdTest, ::testing::Values(make_tuple(16, &vpx_satd_neon), make_tuple(64, &vpx_satd_neon), make_tuple(256, &vpx_satd_neon), @@ -570,7 +726,7 @@ INSTANTIATE_TEST_CASE_P( // TODO(jingning): Remove the highbitdepth flag once the SIMD functions are // in place. #if !CONFIG_VP9_HIGHBITDEPTH -INSTANTIATE_TEST_CASE_P(MSA, SatdTest, +INSTANTIATE_TEST_CASE_P(MSA, SatdLowbdTest, ::testing::Values(make_tuple(16, &vpx_satd_msa), make_tuple(64, &vpx_satd_msa), make_tuple(256, &vpx_satd_msa), diff --git a/media/libvpx/libvpx/test/bench.cc b/media/libvpx/libvpx/test/bench.cc new file mode 100644 index 000000000000..4b883d8250e1 --- /dev/null +++ b/media/libvpx/libvpx/test/bench.cc @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "test/bench.h" +#include "vpx_ports/vpx_timer.h" + +void AbstractBench::RunNTimes(int n) { + for (int r = 0; r < VPX_BENCH_ROBUST_ITER; r++) { + vpx_usec_timer timer; + vpx_usec_timer_start(&timer); + for (int j = 0; j < n; ++j) { + Run(); + } + vpx_usec_timer_mark(&timer); + times_[r] = static_cast(vpx_usec_timer_elapsed(&timer)); + } +} + +void AbstractBench::PrintMedian(const char *title) { + std::sort(times_, times_ + VPX_BENCH_ROBUST_ITER); + const int med = times_[VPX_BENCH_ROBUST_ITER >> 1]; + int sad = 0; + for (int t = 0; t < VPX_BENCH_ROBUST_ITER; t++) { + sad += abs(times_[t] - med); + } + printf("[%10s] %s %.1f ms ( ±%.1f ms )\n", "BENCH ", title, med / 1000.0, + sad / (VPX_BENCH_ROBUST_ITER * 1000.0)); +} diff --git a/media/libvpx/libvpx/test/bench.h b/media/libvpx/libvpx/test/bench.h new file mode 100644 index 000000000000..57ca9118bac0 --- /dev/null +++ b/media/libvpx/libvpx/test/bench.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_TEST_BENCH_H_ +#define VPX_TEST_BENCH_H_ + +// Number of iterations used to compute median run time. +#define VPX_BENCH_ROBUST_ITER 15 + +class AbstractBench { + public: + void RunNTimes(int n); + void PrintMedian(const char *title); + + protected: + // Implement this method and put the code to benchmark in it. + virtual void Run() = 0; + + private: + int times_[VPX_BENCH_ROBUST_ITER]; +}; + +#endif // VPX_TEST_BENCH_H_ diff --git a/media/libvpx/libvpx/test/blockiness_test.cc b/media/libvpx/libvpx/test/blockiness_test.cc index 2fa10192f159..ced6e66c6207 100644 --- a/media/libvpx/libvpx/test/blockiness_test.cc +++ b/media/libvpx/libvpx/test/blockiness_test.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -25,10 +26,7 @@ #include "test/util.h" #include "vpx_mem/vpx_mem.h" - -extern "C" double vp9_get_blockiness(const unsigned char *img1, int img1_pitch, - const unsigned char *img2, int img2_pitch, - int width, int height); +#include "vp9/encoder/vp9_blockiness.h" using libvpx_test::ACMRandom; @@ -141,7 +139,7 @@ class BlockinessTestBase : public ::testing::Test { }; #if CONFIG_VP9_ENCODER -typedef std::tr1::tuple BlockinessParam; +typedef std::tuple BlockinessParam; class BlockinessVP9Test : public BlockinessTestBase, public ::testing::WithParamInterface { @@ -208,15 +206,15 @@ TEST_P(BlockinessVP9Test, WorstCaseBlockiness) { } #endif // CONFIG_VP9_ENCODER -using std::tr1::make_tuple; +using std::make_tuple; //------------------------------------------------------------------------------ // C functions #if CONFIG_VP9_ENCODER -const BlockinessParam c_vp9_tests[] = { - make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238), -}; +const BlockinessParam c_vp9_tests[] = { make_tuple(320, 240), + make_tuple(318, 242), + make_tuple(318, 238) }; INSTANTIATE_TEST_CASE_P(C, BlockinessVP9Test, ::testing::ValuesIn(c_vp9_tests)); #endif diff --git a/media/libvpx/libvpx/test/borders_test.cc b/media/libvpx/libvpx/test/borders_test.cc index e66ff02e25e0..b91a15b80036 100644 --- a/media/libvpx/libvpx/test/borders_test.cc +++ b/media/libvpx/libvpx/test/borders_test.cc @@ -31,7 +31,7 @@ class BordersTest virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { - if (video->frame() == 1) { + if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, 1); encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); diff --git a/media/libvpx/libvpx/test/buffer.h b/media/libvpx/libvpx/test/buffer.h index 2175dad9d903..b003d2f0d02a 100644 --- a/media/libvpx/libvpx/test/buffer.h +++ b/media/libvpx/libvpx/test/buffer.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_BUFFER_H_ -#define TEST_BUFFER_H_ +#ifndef VPX_TEST_BUFFER_H_ +#define VPX_TEST_BUFFER_H_ #include @@ -379,4 +379,4 @@ bool Buffer::BufferSizesMatch(const Buffer &a) const { return true; } } // namespace libvpx_test -#endif // TEST_BUFFER_H_ +#endif // VPX_TEST_BUFFER_H_ diff --git a/media/libvpx/libvpx/test/byte_alignment_test.cc b/media/libvpx/libvpx/test/byte_alignment_test.cc index 5a058b27561e..0ef6c4c5192f 100644 --- a/media/libvpx/libvpx/test/byte_alignment_test.cc +++ b/media/libvpx/libvpx/test/byte_alignment_test.cc @@ -171,8 +171,9 @@ TEST_F(ByteAlignmentTest, SwitchByteAlignment) { TEST_P(ByteAlignmentTest, TestAlignment) { const ByteAlignmentTestParam t = GetParam(); SetByteAlignment(t.byte_alignment, t.expected_value); - if (t.decode_remaining) + if (t.decode_remaining) { ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames(t.byte_alignment)); + } } INSTANTIATE_TEST_CASE_P(Alignments, ByteAlignmentTest, diff --git a/media/libvpx/libvpx/test/clear_system_state.h b/media/libvpx/libvpx/test/clear_system_state.h index 044a5c758346..ba3c0b386a1b 100644 --- a/media/libvpx/libvpx/test/clear_system_state.h +++ b/media/libvpx/libvpx/test/clear_system_state.h @@ -7,23 +7,17 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_CLEAR_SYSTEM_STATE_H_ -#define TEST_CLEAR_SYSTEM_STATE_H_ +#ifndef VPX_TEST_CLEAR_SYSTEM_STATE_H_ +#define VPX_TEST_CLEAR_SYSTEM_STATE_H_ #include "./vpx_config.h" -#if ARCH_X86 || ARCH_X86_64 -#include "vpx_ports/x86.h" -#endif +#include "vpx_ports/system_state.h" namespace libvpx_test { // Reset system to a known state. This function should be used for all non-API // test cases. -inline void ClearSystemState() { -#if ARCH_X86 || ARCH_X86_64 - vpx_reset_mmx_state(); -#endif -} +inline void ClearSystemState() { vpx_clear_system_state(); } } // namespace libvpx_test -#endif // TEST_CLEAR_SYSTEM_STATE_H_ +#endif // VPX_TEST_CLEAR_SYSTEM_STATE_H_ diff --git a/media/libvpx/libvpx/test/codec_factory.h b/media/libvpx/libvpx/test/codec_factory.h index d5882ed9c8bf..17c9512ca8b0 100644 --- a/media/libvpx/libvpx/test/codec_factory.h +++ b/media/libvpx/libvpx/test/codec_factory.h @@ -7,8 +7,10 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_CODEC_FACTORY_H_ -#define TEST_CODEC_FACTORY_H_ +#ifndef VPX_TEST_CODEC_FACTORY_H_ +#define VPX_TEST_CODEC_FACTORY_H_ + +#include #include "./vpx_config.h" #include "vpx/vpx_decoder.h" @@ -53,23 +55,22 @@ class CodecFactory { template class CodecTestWithParam : public ::testing::TestWithParam< - std::tr1::tuple > {}; + std::tuple > {}; template class CodecTestWith2Params : public ::testing::TestWithParam< - std::tr1::tuple > {}; + std::tuple > {}; template class CodecTestWith3Params : public ::testing::TestWithParam< - std::tr1::tuple > {}; + std::tuple > {}; template class CodecTestWith4Params : public ::testing::TestWithParam< - std::tr1::tuple > { -}; + std::tuple > {}; /* * VP8 Codec Definitions @@ -264,4 +265,4 @@ const libvpx_test::VP9CodecFactory kVP9; #endif // CONFIG_VP9 } // namespace libvpx_test -#endif // TEST_CODEC_FACTORY_H_ +#endif // VPX_TEST_CODEC_FACTORY_H_ diff --git a/media/libvpx/libvpx/test/comp_avg_pred_test.cc b/media/libvpx/libvpx/test/comp_avg_pred_test.cc index 110e06583657..56e701e09cbf 100644 --- a/media/libvpx/libvpx/test/comp_avg_pred_test.cc +++ b/media/libvpx/libvpx/test/comp_avg_pred_test.cc @@ -29,6 +29,10 @@ uint8_t avg_with_rounding(uint8_t a, uint8_t b) { return (a + b + 1) >> 1; } void reference_pred(const Buffer &pred, const Buffer &ref, int width, int height, Buffer *avg) { + ASSERT_TRUE(avg->TopLeftPixel() != NULL); + ASSERT_TRUE(pred.TopLeftPixel() != NULL); + ASSERT_TRUE(ref.TopLeftPixel() != NULL); + for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { avg->TopLeftPixel()[y * avg->stride() + x] = diff --git a/media/libvpx/libvpx/test/consistency_test.cc b/media/libvpx/libvpx/test/consistency_test.cc index 37b4a45e541c..875b06f4aa14 100644 --- a/media/libvpx/libvpx/test/consistency_test.cc +++ b/media/libvpx/libvpx/test/consistency_test.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -127,7 +128,7 @@ class ConsistencyTestBase : public ::testing::Test { }; #if CONFIG_VP9_ENCODER -typedef std::tr1::tuple ConsistencyParam; +typedef std::tuple ConsistencyParam; class ConsistencyVP9Test : public ConsistencyTestBase, public ::testing::WithParamInterface { @@ -198,15 +199,15 @@ TEST_P(ConsistencyVP9Test, ConsistencyIsZero) { } #endif // CONFIG_VP9_ENCODER -using std::tr1::make_tuple; +using std::make_tuple; //------------------------------------------------------------------------------ // C functions #if CONFIG_VP9_ENCODER -const ConsistencyParam c_vp9_tests[] = { - make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238), -}; +const ConsistencyParam c_vp9_tests[] = { make_tuple(320, 240), + make_tuple(318, 242), + make_tuple(318, 238) }; INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test, ::testing::ValuesIn(c_vp9_tests)); #endif diff --git a/media/libvpx/libvpx/test/convolve_test.cc b/media/libvpx/libvpx/test/convolve_test.cc index 70f0b11a7705..d8d053df5fd6 100644 --- a/media/libvpx/libvpx/test/convolve_test.cc +++ b/media/libvpx/libvpx/test/convolve_test.cc @@ -9,6 +9,7 @@ */ #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -77,7 +78,7 @@ struct ConvolveFunctions { int use_highbd_; // 0 if high bitdepth not used, else the actual bit depth. }; -typedef std::tr1::tuple ConvolveParam; +typedef std::tuple ConvolveParam; #define ALL_SIZES(convolve_fn) \ make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn), \ @@ -114,6 +115,7 @@ void filter_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride, // and filter_max_width = 16 // uint8_t intermediate_buffer[71 * kMaxDimension]; + vp9_zero(intermediate_buffer); const int intermediate_next_stride = 1 - static_cast(intermediate_height * output_width); @@ -213,6 +215,8 @@ void highbd_filter_block2d_8_c(const uint16_t *src_ptr, const int intermediate_next_stride = 1 - static_cast(intermediate_height * output_width); + vp9_zero(intermediate_buffer); + // Horizontal pass (src -> transposed intermediate). { uint16_t *output_ptr = intermediate_buffer; @@ -412,8 +416,14 @@ class ConvolveTest : public ::testing::TestWithParam { for (int i = 0; i < kOutputBufferSize; ++i) { if (IsIndexInBorder(i)) { output_[i] = 255; +#if CONFIG_VP9_HIGHBITDEPTH + output16_[i] = mask_; +#endif } else { output_[i] = 0; +#if CONFIG_VP9_HIGHBITDEPTH + output16_[i] = 0; +#endif } } @@ -450,7 +460,9 @@ class ConvolveTest : public ::testing::TestWithParam { void CheckGuardBlocks() { for (int i = 0; i < kOutputBufferSize; ++i) { - if (IsIndexInBorder(i)) EXPECT_EQ(255, output_[i]); + if (IsIndexInBorder(i)) { + EXPECT_EQ(255, output_[i]); + } } } @@ -672,6 +684,74 @@ TEST_P(ConvolveTest, DISABLED_8Tap_Vert_Speed) { UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); } +TEST_P(ConvolveTest, DISABLED_4Tap_Speed) { + const uint8_t *const in = input(); + uint8_t *const out = output(); + const InterpKernel *const fourtap = vp9_filter_kernels[FOURTAP]; + const int kNumTests = 5000000; + const int width = Width(); + const int height = Height(); + vpx_usec_timer timer; + + SetConstantInput(127); + + vpx_usec_timer_start(&timer); + for (int n = 0; n < kNumTests; ++n) { + UUT_->hv8_[0](in, kInputStride, out, kOutputStride, fourtap, 8, 16, 8, 16, + width, height); + } + vpx_usec_timer_mark(&timer); + + const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); + printf("convolve4_%dx%d_%d: %d us\n", width, height, + UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); +} + +TEST_P(ConvolveTest, DISABLED_4Tap_Horiz_Speed) { + const uint8_t *const in = input(); + uint8_t *const out = output(); + const InterpKernel *const fourtap = vp9_filter_kernels[FOURTAP]; + const int kNumTests = 5000000; + const int width = Width(); + const int height = Height(); + vpx_usec_timer timer; + + SetConstantInput(127); + + vpx_usec_timer_start(&timer); + for (int n = 0; n < kNumTests; ++n) { + UUT_->h8_[0](in, kInputStride, out, kOutputStride, fourtap, 8, 16, 8, 16, + width, height); + } + vpx_usec_timer_mark(&timer); + + const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); + printf("convolve4_horiz_%dx%d_%d: %d us\n", width, height, + UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); +} + +TEST_P(ConvolveTest, DISABLED_4Tap_Vert_Speed) { + const uint8_t *const in = input(); + uint8_t *const out = output(); + const InterpKernel *const fourtap = vp9_filter_kernels[FOURTAP]; + const int kNumTests = 5000000; + const int width = Width(); + const int height = Height(); + vpx_usec_timer timer; + + SetConstantInput(127); + + vpx_usec_timer_start(&timer); + for (int n = 0; n < kNumTests; ++n) { + UUT_->v8_[0](in, kInputStride, out, kOutputStride, fourtap, 8, 16, 8, 16, + width, height); + } + vpx_usec_timer_mark(&timer); + + const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); + printf("convolve4_vert_%dx%d_%d: %d us\n", width, height, + UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); +} TEST_P(ConvolveTest, DISABLED_8Tap_Avg_Speed) { const uint8_t *const in = input(); uint8_t *const out = output(); @@ -787,7 +867,7 @@ TEST_P(ConvolveTest, Copy2D) { } } -const int kNumFilterBanks = 4; +const int kNumFilterBanks = 5; const int kNumFilters = 16; TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) { @@ -1040,7 +1120,7 @@ TEST_P(ConvolveTest, CheckScalingFiltering) { } #endif -using std::tr1::make_tuple; +using std::make_tuple; #if CONFIG_VP9_HIGHBITDEPTH #define WRAP(func, bd) \ @@ -1053,7 +1133,7 @@ using std::tr1::make_tuple; x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); \ } -#if HAVE_SSE2 && ARCH_X86_64 +#if HAVE_SSE2 && VPX_ARCH_X86_64 WRAP(convolve_copy_sse2, 8) WRAP(convolve_avg_sse2, 8) WRAP(convolve_copy_sse2, 10) @@ -1078,7 +1158,7 @@ WRAP(convolve8_vert_sse2, 12) WRAP(convolve8_avg_vert_sse2, 12) WRAP(convolve8_sse2, 12) WRAP(convolve8_avg_sse2, 12) -#endif // HAVE_SSE2 && ARCH_X86_64 +#endif // HAVE_SSE2 && VPX_ARCH_X86_64 #if HAVE_AVX2 WRAP(convolve_copy_avx2, 8) @@ -1183,9 +1263,9 @@ const ConvolveFunctions convolve12_c( wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12); -const ConvolveParam kArrayConvolve_c[] = { - ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c) -}; +const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c), + ALL_SIZES(convolve10_c), + ALL_SIZES(convolve12_c) }; #else const ConvolveFunctions convolve8_c( @@ -1198,7 +1278,7 @@ const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) }; #endif INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c)); -#if HAVE_SSE2 && ARCH_X86_64 +#if HAVE_SSE2 && VPX_ARCH_X86_64 #if CONFIG_VP9_HIGHBITDEPTH const ConvolveFunctions convolve8_sse2( wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8, @@ -1377,4 +1457,16 @@ const ConvolveParam kArrayConvolve_vsx[] = { ALL_SIZES(convolve8_vsx) }; INSTANTIATE_TEST_CASE_P(VSX, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_vsx)); #endif // HAVE_VSX + +#if HAVE_MMI +const ConvolveFunctions convolve8_mmi( + vpx_convolve_copy_c, vpx_convolve_avg_mmi, vpx_convolve8_horiz_mmi, + vpx_convolve8_avg_horiz_mmi, vpx_convolve8_vert_mmi, + vpx_convolve8_avg_vert_mmi, vpx_convolve8_mmi, vpx_convolve8_avg_mmi, + vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, + vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); +const ConvolveParam kArrayConvolve_mmi[] = { ALL_SIZES(convolve8_mmi) }; +INSTANTIATE_TEST_CASE_P(MMI, ConvolveTest, + ::testing::ValuesIn(kArrayConvolve_mmi)); +#endif // HAVE_MMI } // namespace diff --git a/media/libvpx/libvpx/test/cpu_speed_test.cc b/media/libvpx/libvpx/test/cpu_speed_test.cc index 404b5b44f4f2..2fb5c10eae13 100644 --- a/media/libvpx/libvpx/test/cpu_speed_test.cc +++ b/media/libvpx/libvpx/test/cpu_speed_test.cc @@ -44,7 +44,7 @@ class CpuSpeedTest virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { - if (video->frame() == 1) { + if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_); if (encoding_mode_ != ::libvpx_test::kRealTime) { @@ -152,5 +152,5 @@ VP9_INSTANTIATE_TEST_CASE(CpuSpeedTest, ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime), - ::testing::Range(0, 9)); + ::testing::Range(0, 10)); } // namespace diff --git a/media/libvpx/libvpx/test/cq_test.cc b/media/libvpx/libvpx/test/cq_test.cc index 20e1f0f3deff..474b9d0fa2ee 100644 --- a/media/libvpx/libvpx/test/cq_test.cc +++ b/media/libvpx/libvpx/test/cq_test.cc @@ -65,7 +65,7 @@ class CQTest : public ::libvpx_test::EncoderTest, virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { - if (video->frame() == 1) { + if (video->frame() == 0) { if (cfg_.rc_end_usage == VPX_CQ) { encoder->Control(VP8E_SET_CQ_LEVEL, cq_level_); } diff --git a/media/libvpx/libvpx/test/datarate_test.cc b/media/libvpx/libvpx/test/datarate_test.cc deleted file mode 100644 index 31a8523d2191..000000000000 --- a/media/libvpx/libvpx/test/datarate_test.cc +++ /dev/null @@ -1,1876 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "./vpx_config.h" -#include "third_party/googletest/src/include/gtest/gtest.h" -#include "test/codec_factory.h" -#include "test/encode_test_driver.h" -#include "test/i420_video_source.h" -#include "test/util.h" -#include "test/y4m_video_source.h" -#include "vpx/vpx_codec.h" - -namespace { - -class DatarateTestLarge - : public ::libvpx_test::EncoderTest, - public ::libvpx_test::CodecTestWith2Params { - public: - DatarateTestLarge() : EncoderTest(GET_PARAM(0)) {} - - virtual ~DatarateTestLarge() {} - - protected: - virtual void SetUp() { - InitializeConfig(); - SetMode(GET_PARAM(1)); - set_cpu_used_ = GET_PARAM(2); - ResetModel(); - } - - virtual void ResetModel() { - last_pts_ = 0; - bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz; - frame_number_ = 0; - first_drop_ = 0; - bits_total_ = 0; - duration_ = 0.0; - denoiser_offon_test_ = 0; - denoiser_offon_period_ = -1; - gf_boost_ = 0; - use_roi_ = 0; - } - - virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, - ::libvpx_test::Encoder *encoder) { - if (video->frame() == 0) { - encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_); - encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); - encoder->Control(VP8E_SET_GF_CBR_BOOST_PCT, gf_boost_); - } - -#if CONFIG_VP8_ENCODER - if (use_roi_ == 1) { - encoder->Control(VP8E_SET_ROI_MAP, &roi_); - } -#endif - - if (denoiser_offon_test_) { - ASSERT_GT(denoiser_offon_period_, 0) - << "denoiser_offon_period_ is not positive."; - if ((video->frame() + 1) % denoiser_offon_period_ == 0) { - // Flip denoiser_on_ periodically - denoiser_on_ ^= 1; - } - encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_); - } - - const vpx_rational_t tb = video->timebase(); - timebase_ = static_cast(tb.num) / tb.den; - duration_ = 0; - } - - virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { - // Time since last timestamp = duration. - vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_; - - // TODO(jimbankoski): Remove these lines when the issue: - // http://code.google.com/p/webm/issues/detail?id=496 is fixed. - // For now the codec assumes buffer starts at starting buffer rate - // plus one frame's time. - if (last_pts_ == 0) duration = 1; - - // Add to the buffer the bits we'd expect from a constant bitrate server. - bits_in_buffer_model_ += static_cast( - duration * timebase_ * cfg_.rc_target_bitrate * 1000); - - /* Test the buffer model here before subtracting the frame. Do so because - * the way the leaky bucket model works in libvpx is to allow the buffer to - * empty - and then stop showing frames until we've got enough bits to - * show one. As noted in comment below (issue 495), this does not currently - * apply to key frames. For now exclude key frames in condition below. */ - const bool key_frame = - (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; - if (!key_frame) { - ASSERT_GE(bits_in_buffer_model_, 0) - << "Buffer Underrun at frame " << pkt->data.frame.pts; - } - - const int64_t frame_size_in_bits = pkt->data.frame.sz * 8; - - // Subtract from the buffer the bits associated with a played back frame. - bits_in_buffer_model_ -= frame_size_in_bits; - - // Update the running total of bits for end of test datarate checks. - bits_total_ += frame_size_in_bits; - - // If first drop not set and we have a drop set it to this time. - if (!first_drop_ && duration > 1) first_drop_ = last_pts_ + 1; - - // Update the most recent pts. - last_pts_ = pkt->data.frame.pts; - - // We update this so that we can calculate the datarate minus the last - // frame encoded in the file. - bits_in_last_frame_ = frame_size_in_bits; - - ++frame_number_; - } - - virtual void EndPassHook(void) { - if (bits_total_) { - const double file_size_in_kb = bits_total_ / 1000.; // bits per kilobit - - duration_ = (last_pts_ + 1) * timebase_; - - // Effective file datarate includes the time spent prebuffering. - effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0 / - (cfg_.rc_buf_initial_sz / 1000.0 + duration_); - - file_datarate_ = file_size_in_kb / duration_; - } - } - - vpx_codec_pts_t last_pts_; - int64_t bits_in_buffer_model_; - double timebase_; - int frame_number_; - vpx_codec_pts_t first_drop_; - int64_t bits_total_; - double duration_; - double file_datarate_; - double effective_datarate_; - int64_t bits_in_last_frame_; - int denoiser_on_; - int denoiser_offon_test_; - int denoiser_offon_period_; - int set_cpu_used_; - int gf_boost_; - int use_roi_; - vpx_roi_map_t roi_; -}; - -#if CONFIG_TEMPORAL_DENOISING -// Check basic datarate targeting, for a single bitrate, but loop over the -// various denoiser settings. -TEST_P(DatarateTestLarge, DenoiserLevels) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 140); - for (int j = 1; j < 5; ++j) { - // Run over the denoiser levels. - // For the temporal denoiser (#if CONFIG_TEMPORAL_DENOISING) the level j - // refers to the 4 denoiser modes: denoiserYonly, denoiserOnYUV, - // denoiserOnAggressive, and denoiserOnAdaptive. - denoiser_on_ = j; - cfg_.rc_target_bitrate = 300; - ResetModel(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) - << " The datarate for the file exceeds the target!"; - - ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) - << " The datarate for the file missed the target!"; - } -} - -// Check basic datarate targeting, for a single bitrate, when denoiser is off -// and on. -TEST_P(DatarateTestLarge, DenoiserOffOn) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 299); - cfg_.rc_target_bitrate = 300; - ResetModel(); - // The denoiser is off by default. - denoiser_on_ = 0; - // Set the offon test flag. - denoiser_offon_test_ = 1; - denoiser_offon_period_ = 100; - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) - << " The datarate for the file exceeds the target!"; - ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) - << " The datarate for the file missed the target!"; -} -#endif // CONFIG_TEMPORAL_DENOISING - -TEST_P(DatarateTestLarge, BasicBufferModel) { - denoiser_on_ = 0; - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - // 2 pass cbr datarate control has a bug hidden by the small # of - // frames selected in this encode. The problem is that even if the buffer is - // negative we produce a keyframe on a cutscene. Ignoring datarate - // constraints - // TODO(jimbankoski): ( Fix when issue - // http://code.google.com/p/webm/issues/detail?id=495 is addressed. ) - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 140); - - // There is an issue for low bitrates in real-time mode, where the - // effective_datarate slightly overshoots the target bitrate. - // This is same the issue as noted about (#495). - // TODO(jimbankoski/marpan): Update test to run for lower bitrates (< 100), - // when the issue is resolved. - for (int i = 100; i < 800; i += 200) { - cfg_.rc_target_bitrate = i; - ResetModel(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) - << " The datarate for the file exceeds the target!"; - ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) - << " The datarate for the file missed the target!"; - } -} - -TEST_P(DatarateTestLarge, ChangingDropFrameThresh) { - denoiser_on_ = 0; - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_max_quantizer = 36; - cfg_.rc_end_usage = VPX_CBR; - cfg_.rc_target_bitrate = 200; - cfg_.kf_mode = VPX_KF_DISABLED; - - const int frame_count = 40; - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, frame_count); - - // Here we check that the first dropped frame gets earlier and earlier - // as the drop frame threshold is increased. - - const int kDropFrameThreshTestStep = 30; - vpx_codec_pts_t last_drop = frame_count; - for (int i = 1; i < 91; i += kDropFrameThreshTestStep) { - cfg_.rc_dropframe_thresh = i; - ResetModel(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_LE(first_drop_, last_drop) - << " The first dropped frame for drop_thresh " << i - << " > first dropped frame for drop_thresh " - << i - kDropFrameThreshTestStep; - last_drop = first_drop_; - } -} - -TEST_P(DatarateTestLarge, DropFramesMultiThreads) { - denoiser_on_ = 0; - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_dropframe_thresh = 30; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_threads = 2; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 140); - cfg_.rc_target_bitrate = 200; - ResetModel(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) - << " The datarate for the file exceeds the target!"; - - ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) - << " The datarate for the file missed the target!"; -} - -class DatarateTestRealTime : public DatarateTestLarge { - public: - virtual ~DatarateTestRealTime() {} -}; - -#if CONFIG_TEMPORAL_DENOISING -// Check basic datarate targeting, for a single bitrate, but loop over the -// various denoiser settings. -TEST_P(DatarateTestRealTime, DenoiserLevels) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 140); - for (int j = 1; j < 5; ++j) { - // Run over the denoiser levels. - // For the temporal denoiser (#if CONFIG_TEMPORAL_DENOISING) the level j - // refers to the 4 denoiser modes: denoiserYonly, denoiserOnYUV, - // denoiserOnAggressive, and denoiserOnAdaptive. - denoiser_on_ = j; - cfg_.rc_target_bitrate = 300; - ResetModel(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) - << " The datarate for the file exceeds the target!"; - ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) - << " The datarate for the file missed the target!"; - } -} - -// Check basic datarate targeting, for a single bitrate, when denoiser is off -// and on. -TEST_P(DatarateTestRealTime, DenoiserOffOn) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 299); - cfg_.rc_target_bitrate = 300; - ResetModel(); - // The denoiser is off by default. - denoiser_on_ = 0; - // Set the offon test flag. - denoiser_offon_test_ = 1; - denoiser_offon_period_ = 100; - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) - << " The datarate for the file exceeds the target!"; - ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) - << " The datarate for the file missed the target!"; -} -#endif // CONFIG_TEMPORAL_DENOISING - -TEST_P(DatarateTestRealTime, BasicBufferModel) { - denoiser_on_ = 0; - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - // 2 pass cbr datarate control has a bug hidden by the small # of - // frames selected in this encode. The problem is that even if the buffer is - // negative we produce a keyframe on a cutscene, ignoring datarate - // constraints - // TODO(jimbankoski): Fix when issue - // http://bugs.chromium.org/p/webm/issues/detail?id=495 is addressed. - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 140); - - // There is an issue for low bitrates in real-time mode, where the - // effective_datarate slightly overshoots the target bitrate. - // This is same the issue as noted above (#495). - // TODO(jimbankoski/marpan): Update test to run for lower bitrates (< 100), - // when the issue is resolved. - for (int i = 100; i <= 700; i += 200) { - cfg_.rc_target_bitrate = i; - ResetModel(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) - << " The datarate for the file exceeds the target!"; - ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) - << " The datarate for the file missed the target!"; - } -} - -TEST_P(DatarateTestRealTime, ChangingDropFrameThresh) { - denoiser_on_ = 0; - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_max_quantizer = 36; - cfg_.rc_end_usage = VPX_CBR; - cfg_.rc_target_bitrate = 200; - cfg_.kf_mode = VPX_KF_DISABLED; - - const int frame_count = 40; - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, frame_count); - - // Check that the first dropped frame gets earlier and earlier - // as the drop frame threshold is increased. - - const int kDropFrameThreshTestStep = 30; - vpx_codec_pts_t last_drop = frame_count; - for (int i = 1; i < 91; i += kDropFrameThreshTestStep) { - cfg_.rc_dropframe_thresh = i; - ResetModel(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_LE(first_drop_, last_drop) - << " The first dropped frame for drop_thresh " << i - << " > first dropped frame for drop_thresh " - << i - kDropFrameThreshTestStep; - last_drop = first_drop_; - } -} - -TEST_P(DatarateTestRealTime, DropFramesMultiThreads) { - denoiser_on_ = 0; - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_dropframe_thresh = 30; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - // Encode using multiple threads. - cfg_.g_threads = 2; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 140); - cfg_.rc_target_bitrate = 200; - ResetModel(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) - << " The datarate for the file exceeds the target!"; - - ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) - << " The datarate for the file missed the target!"; -} - -TEST_P(DatarateTestRealTime, RegionOfInterest) { - denoiser_on_ = 0; - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_dropframe_thresh = 0; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - // Encode using multiple threads. - cfg_.g_threads = 2; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 300); - cfg_.rc_target_bitrate = 450; - cfg_.g_w = 352; - cfg_.g_h = 288; - - ResetModel(); - - // Set ROI parameters - use_roi_ = 1; - memset(&roi_, 0, sizeof(roi_)); - - roi_.rows = (cfg_.g_h + 15) / 16; - roi_.cols = (cfg_.g_w + 15) / 16; - - roi_.delta_q[0] = 0; - roi_.delta_q[1] = -20; - roi_.delta_q[2] = 0; - roi_.delta_q[3] = 0; - - roi_.delta_lf[0] = 0; - roi_.delta_lf[1] = -20; - roi_.delta_lf[2] = 0; - roi_.delta_lf[3] = 0; - - roi_.static_threshold[0] = 0; - roi_.static_threshold[1] = 1000; - roi_.static_threshold[2] = 0; - roi_.static_threshold[3] = 0; - - // Use 2 states: 1 is center square, 0 is the rest. - roi_.roi_map = - (uint8_t *)calloc(roi_.rows * roi_.cols, sizeof(*roi_.roi_map)); - for (unsigned int i = 0; i < roi_.rows; ++i) { - for (unsigned int j = 0; j < roi_.cols; ++j) { - if (i > (roi_.rows >> 2) && i < ((roi_.rows * 3) >> 2) && - j > (roi_.cols >> 2) && j < ((roi_.cols * 3) >> 2)) { - roi_.roi_map[i * roi_.cols + j] = 1; - } - } - } - - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) - << " The datarate for the file exceeds the target!"; - - ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) - << " The datarate for the file missed the target!"; - - free(roi_.roi_map); -} - -TEST_P(DatarateTestRealTime, GFBoost) { - denoiser_on_ = 0; - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_dropframe_thresh = 0; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_error_resilient = 0; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 300); - cfg_.rc_target_bitrate = 300; - ResetModel(); - // Apply a gf boost. - gf_boost_ = 50; - - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) - << " The datarate for the file exceeds the target!"; - - ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) - << " The datarate for the file missed the target!"; -} - -class DatarateTestVP9Large - : public ::libvpx_test::EncoderTest, - public ::libvpx_test::CodecTestWith2Params { - public: - DatarateTestVP9Large() : EncoderTest(GET_PARAM(0)) {} - - protected: - virtual ~DatarateTestVP9Large() {} - - virtual void SetUp() { - InitializeConfig(); - SetMode(GET_PARAM(1)); - set_cpu_used_ = GET_PARAM(2); - ResetModel(); - } - - virtual void ResetModel() { - last_pts_ = 0; - bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz; - frame_number_ = 0; - tot_frame_number_ = 0; - first_drop_ = 0; - num_drops_ = 0; - // Denoiser is off by default. - denoiser_on_ = 0; - // For testing up to 3 layers. - for (int i = 0; i < 3; ++i) { - bits_total_[i] = 0; - } - denoiser_offon_test_ = 0; - denoiser_offon_period_ = -1; - frame_parallel_decoding_mode_ = 1; - } - - // - // Frame flags and layer id for temporal layers. - // - - // For two layers, test pattern is: - // 1 3 - // 0 2 ..... - // For three layers, test pattern is: - // 1 3 5 7 - // 2 6 - // 0 4 .... - // LAST is always update on base/layer 0, GOLDEN is updated on layer 1. - // For this 3 layer example, the 2nd enhancement layer (layer 2) updates - // the altref frame. - int SetFrameFlags(int frame_num, int num_temp_layers) { - int frame_flags = 0; - if (num_temp_layers == 2) { - if (frame_num % 2 == 0) { - // Layer 0: predict from L and ARF, update L. - frame_flags = - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; - } else { - // Layer 1: predict from L, G and ARF, and update G. - frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | - VP8_EFLAG_NO_UPD_ENTROPY; - } - } else if (num_temp_layers == 3) { - if (frame_num % 4 == 0) { - // Layer 0: predict from L and ARF; update L. - frame_flags = - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; - } else if ((frame_num - 2) % 4 == 0) { - // Layer 1: predict from L, G, ARF; update G. - frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; - } else if ((frame_num - 1) % 2 == 0) { - // Layer 2: predict from L, G, ARF; update ARF. - frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST; - } - } - return frame_flags; - } - - int SetLayerId(int frame_num, int num_temp_layers) { - int layer_id = 0; - if (num_temp_layers == 2) { - if (frame_num % 2 == 0) { - layer_id = 0; - } else { - layer_id = 1; - } - } else if (num_temp_layers == 3) { - if (frame_num % 4 == 0) { - layer_id = 0; - } else if ((frame_num - 2) % 4 == 0) { - layer_id = 1; - } else if ((frame_num - 1) % 2 == 0) { - layer_id = 2; - } - } - return layer_id; - } - - virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, - ::libvpx_test::Encoder *encoder) { - if (video->frame() == 0) encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); - - if (denoiser_offon_test_) { - ASSERT_GT(denoiser_offon_period_, 0) - << "denoiser_offon_period_ is not positive."; - if ((video->frame() + 1) % denoiser_offon_period_ == 0) { - // Flip denoiser_on_ periodically - denoiser_on_ ^= 1; - } - } - - encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_); - encoder->Control(VP9E_SET_TILE_COLUMNS, (cfg_.g_threads >> 1)); - encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, - frame_parallel_decoding_mode_); - - if (cfg_.ts_number_layers > 1) { - if (video->frame() == 0) { - encoder->Control(VP9E_SET_SVC, 1); - } - vpx_svc_layer_id_t layer_id; - layer_id.spatial_layer_id = 0; - frame_flags_ = SetFrameFlags(video->frame(), cfg_.ts_number_layers); - layer_id.temporal_layer_id = - SetLayerId(video->frame(), cfg_.ts_number_layers); - encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id); - } - const vpx_rational_t tb = video->timebase(); - timebase_ = static_cast(tb.num) / tb.den; - duration_ = 0; - } - - virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { - // Time since last timestamp = duration. - vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_; - - if (duration > 1) { - // If first drop not set and we have a drop set it to this time. - if (!first_drop_) first_drop_ = last_pts_ + 1; - // Update the number of frame drops. - num_drops_ += static_cast(duration - 1); - // Update counter for total number of frames (#frames input to encoder). - // Needed for setting the proper layer_id below. - tot_frame_number_ += static_cast(duration - 1); - } - - int layer = SetLayerId(tot_frame_number_, cfg_.ts_number_layers); - - // Add to the buffer the bits we'd expect from a constant bitrate server. - bits_in_buffer_model_ += static_cast( - duration * timebase_ * cfg_.rc_target_bitrate * 1000); - - // Buffer should not go negative. - ASSERT_GE(bits_in_buffer_model_, 0) - << "Buffer Underrun at frame " << pkt->data.frame.pts; - - const size_t frame_size_in_bits = pkt->data.frame.sz * 8; - - // Update the total encoded bits. For temporal layers, update the cumulative - // encoded bits per layer. - for (int i = layer; i < static_cast(cfg_.ts_number_layers); ++i) { - bits_total_[i] += frame_size_in_bits; - } - - // Update the most recent pts. - last_pts_ = pkt->data.frame.pts; - ++frame_number_; - ++tot_frame_number_; - } - - virtual void EndPassHook(void) { - for (int layer = 0; layer < static_cast(cfg_.ts_number_layers); - ++layer) { - duration_ = (last_pts_ + 1) * timebase_; - if (bits_total_[layer]) { - // Effective file datarate: - effective_datarate_[layer] = (bits_total_[layer] / 1000.0) / duration_; - } - } - } - - vpx_codec_pts_t last_pts_; - double timebase_; - int frame_number_; // Counter for number of non-dropped/encoded frames. - int tot_frame_number_; // Counter for total number of input frames. - int64_t bits_total_[3]; - double duration_; - double effective_datarate_[3]; - int set_cpu_used_; - int64_t bits_in_buffer_model_; - vpx_codec_pts_t first_drop_; - int num_drops_; - int denoiser_on_; - int denoiser_offon_test_; - int denoiser_offon_period_; - int frame_parallel_decoding_mode_; -}; - -// Check basic rate targeting for VBR mode with 0 lag. -TEST_P(DatarateTestVP9Large, BasicRateTargetingVBRLagZero) { - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.g_error_resilient = 0; - cfg_.rc_end_usage = VPX_VBR; - cfg_.g_lag_in_frames = 0; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 300); - for (int i = 400; i <= 800; i += 400) { - cfg_.rc_target_bitrate = i; - ResetModel(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75) - << " The datarate for the file is lower than target by too much!"; - ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.30) - << " The datarate for the file is greater than target by too much!"; - } -} - -// Check basic rate targeting for VBR mode with non-zero lag. -TEST_P(DatarateTestVP9Large, BasicRateTargetingVBRLagNonZero) { - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.g_error_resilient = 0; - cfg_.rc_end_usage = VPX_VBR; - // For non-zero lag, rate control will work (be within bounds) for - // real-time mode. - if (deadline_ == VPX_DL_REALTIME) { - cfg_.g_lag_in_frames = 15; - } else { - cfg_.g_lag_in_frames = 0; - } - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 300); - for (int i = 400; i <= 800; i += 400) { - cfg_.rc_target_bitrate = i; - ResetModel(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75) - << " The datarate for the file is lower than target by too much!"; - ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.30) - << " The datarate for the file is greater than target by too much!"; - } -} - -// Check basic rate targeting for VBR mode with non-zero lag, with -// frame_parallel_decoding_mode off. This enables the adapt_coeff/mode/mv probs -// since error_resilience is off. -TEST_P(DatarateTestVP9Large, BasicRateTargetingVBRLagNonZeroFrameParDecOff) { - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.g_error_resilient = 0; - cfg_.rc_end_usage = VPX_VBR; - // For non-zero lag, rate control will work (be within bounds) for - // real-time mode. - if (deadline_ == VPX_DL_REALTIME) { - cfg_.g_lag_in_frames = 15; - } else { - cfg_.g_lag_in_frames = 0; - } - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 300); - for (int i = 400; i <= 800; i += 400) { - cfg_.rc_target_bitrate = i; - ResetModel(); - frame_parallel_decoding_mode_ = 0; - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75) - << " The datarate for the file is lower than target by too much!"; - ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.30) - << " The datarate for the file is greater than target by too much!"; - } -} - -// Check basic rate targeting for CBR mode. -TEST_P(DatarateTestVP9Large, BasicRateTargeting) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 140); - for (int i = 150; i < 800; i += 200) { - cfg_.rc_target_bitrate = i; - ResetModel(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) - << " The datarate for the file is lower than target by too much!"; - ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) - << " The datarate for the file is greater than target by too much!"; - } -} - -// Check basic rate targeting for CBR mode, with frame_parallel_decoding_mode -// off( and error_resilience off). -TEST_P(DatarateTestVP9Large, BasicRateTargetingFrameParDecOff) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - cfg_.g_error_resilient = 0; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 140); - for (int i = 150; i < 800; i += 200) { - cfg_.rc_target_bitrate = i; - ResetModel(); - frame_parallel_decoding_mode_ = 0; - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) - << " The datarate for the file is lower than target by too much!"; - ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) - << " The datarate for the file is greater than target by too much!"; - } -} - -// Check basic rate targeting for CBR mode, with 2 threads and dropped frames. -TEST_P(DatarateTestVP9Large, BasicRateTargetingDropFramesMultiThreads) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_dropframe_thresh = 30; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - // Encode using multiple threads. - cfg_.g_threads = 2; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 140); - cfg_.rc_target_bitrate = 200; - ResetModel(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) - << " The datarate for the file is lower than target by too much!"; - ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) - << " The datarate for the file is greater than target by too much!"; -} - -// Check basic rate targeting for CBR. -TEST_P(DatarateTestVP9Large, BasicRateTargeting444) { - ::libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140); - - cfg_.g_profile = 1; - cfg_.g_timebase = video.timebase(); - - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - - for (int i = 250; i < 900; i += 200) { - cfg_.rc_target_bitrate = i; - ResetModel(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(static_cast(cfg_.rc_target_bitrate), - effective_datarate_[0] * 0.80) - << " The datarate for the file exceeds the target by too much!"; - ASSERT_LE(static_cast(cfg_.rc_target_bitrate), - effective_datarate_[0] * 1.15) - << " The datarate for the file missed the target!" - << cfg_.rc_target_bitrate << " " << effective_datarate_; - } -} - -// Check that (1) the first dropped frame gets earlier and earlier -// as the drop frame threshold is increased, and (2) that the total number of -// frame drops does not decrease as we increase frame drop threshold. -// Use a lower qp-max to force some frame drops. -TEST_P(DatarateTestVP9Large, ChangingDropFrameThresh) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_undershoot_pct = 20; - cfg_.rc_undershoot_pct = 20; - cfg_.rc_dropframe_thresh = 10; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 50; - cfg_.rc_end_usage = VPX_CBR; - cfg_.rc_target_bitrate = 200; - cfg_.g_lag_in_frames = 0; - // TODO(marpan): Investigate datarate target failures with a smaller keyframe - // interval (128). - cfg_.kf_max_dist = 9999; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 140); - - const int kDropFrameThreshTestStep = 30; - for (int j = 50; j <= 150; j += 100) { - cfg_.rc_target_bitrate = j; - vpx_codec_pts_t last_drop = 140; - int last_num_drops = 0; - for (int i = 10; i < 100; i += kDropFrameThreshTestStep) { - cfg_.rc_dropframe_thresh = i; - ResetModel(); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) - << " The datarate for the file is lower than target by too much!"; - ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.25) - << " The datarate for the file is greater than target by too much!"; - ASSERT_LE(first_drop_, last_drop) - << " The first dropped frame for drop_thresh " << i - << " > first dropped frame for drop_thresh " - << i - kDropFrameThreshTestStep; - ASSERT_GE(num_drops_, last_num_drops * 0.85) - << " The number of dropped frames for drop_thresh " << i - << " < number of dropped frames for drop_thresh " - << i - kDropFrameThreshTestStep; - last_drop = first_drop_; - last_num_drops = num_drops_; - } - } -} - -// Check basic rate targeting for 2 temporal layers. -TEST_P(DatarateTestVP9Large, BasicRateTargeting2TemporalLayers) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - - // 2 Temporal layers, no spatial layers: Framerate decimation (2, 1). - cfg_.ss_number_layers = 1; - cfg_.ts_number_layers = 2; - cfg_.ts_rate_decimator[0] = 2; - cfg_.ts_rate_decimator[1] = 1; - - cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; - - if (deadline_ == VPX_DL_REALTIME) cfg_.g_error_resilient = 1; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 200); - for (int i = 200; i <= 800; i += 200) { - cfg_.rc_target_bitrate = i; - ResetModel(); - // 60-40 bitrate allocation for 2 temporal layers. - cfg_.layer_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100; - cfg_.layer_target_bitrate[1] = cfg_.rc_target_bitrate; - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { - ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85) - << " The datarate for the file is lower than target by too much, " - "for layer: " - << j; - ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15) - << " The datarate for the file is greater than target by too much, " - "for layer: " - << j; - } - } -} - -// Check basic rate targeting for 3 temporal layers. -TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayers) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - - // 3 Temporal layers, no spatial layers: Framerate decimation (4, 2, 1). - cfg_.ss_number_layers = 1; - cfg_.ts_number_layers = 3; - cfg_.ts_rate_decimator[0] = 4; - cfg_.ts_rate_decimator[1] = 2; - cfg_.ts_rate_decimator[2] = 1; - - cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 200); - for (int i = 200; i <= 800; i += 200) { - cfg_.rc_target_bitrate = i; - ResetModel(); - // 40-20-40 bitrate allocation for 3 temporal layers. - cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100; - cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100; - cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate; - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { - // TODO(yaowu): Work out more stable rc control strategy and - // Adjust the thresholds to be tighter than .75. - ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.75) - << " The datarate for the file is lower than target by too much, " - "for layer: " - << j; - // TODO(yaowu): Work out more stable rc control strategy and - // Adjust the thresholds to be tighter than 1.25. - ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.25) - << " The datarate for the file is greater than target by too much, " - "for layer: " - << j; - } - } -} - -// Check basic rate targeting for 3 temporal layers, with frame dropping. -// Only for one (low) bitrate with lower max_quantizer, and somewhat higher -// frame drop threshold, to force frame dropping. -TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - // Set frame drop threshold and rc_max_quantizer to force some frame drops. - cfg_.rc_dropframe_thresh = 20; - cfg_.rc_max_quantizer = 45; - cfg_.rc_min_quantizer = 0; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - - // 3 Temporal layers, no spatial layers: Framerate decimation (4, 2, 1). - cfg_.ss_number_layers = 1; - cfg_.ts_number_layers = 3; - cfg_.ts_rate_decimator[0] = 4; - cfg_.ts_rate_decimator[1] = 2; - cfg_.ts_rate_decimator[2] = 1; - - cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 200); - cfg_.rc_target_bitrate = 200; - ResetModel(); - // 40-20-40 bitrate allocation for 3 temporal layers. - cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100; - cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100; - cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate; - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { - ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85) - << " The datarate for the file is lower than target by too much, " - "for layer: " - << j; - ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15) - << " The datarate for the file is greater than target by too much, " - "for layer: " - << j; - // Expect some frame drops in this test: for this 200 frames test, - // expect at least 10% and not more than 60% drops. - ASSERT_GE(num_drops_, 20); - ASSERT_LE(num_drops_, 130); - } -} - -#if CONFIG_VP9_TEMPORAL_DENOISING -class DatarateTestVP9LargeDenoiser : public DatarateTestVP9Large { - public: - virtual ~DatarateTestVP9LargeDenoiser() {} -}; - -// Check basic datarate targeting, for a single bitrate, when denoiser is on. -TEST_P(DatarateTestVP9LargeDenoiser, LowNoise) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_min_quantizer = 2; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 140); - - // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING), - // there is only one denoiser mode: denoiserYonly(which is 1), - // but may add more modes in the future. - cfg_.rc_target_bitrate = 300; - ResetModel(); - // Turn on the denoiser. - denoiser_on_ = 1; - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) - << " The datarate for the file is lower than target by too much!"; - ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) - << " The datarate for the file is greater than target by too much!"; -} - -// Check basic datarate targeting, for a single bitrate, when denoiser is on, -// for clip with high noise level. Use 2 threads. -TEST_P(DatarateTestVP9LargeDenoiser, HighNoise) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_min_quantizer = 2; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - cfg_.g_threads = 2; - - ::libvpx_test::Y4mVideoSource video("noisy_clip_640_360.y4m", 0, 200); - - // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING), - // there is only one denoiser mode: kDenoiserOnYOnly(which is 1), - // but may add more modes in the future. - cfg_.rc_target_bitrate = 1000; - ResetModel(); - // Turn on the denoiser. - denoiser_on_ = 1; - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) - << " The datarate for the file is lower than target by too much!"; - ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) - << " The datarate for the file is greater than target by too much!"; -} - -// Check basic datarate targeting, for a single bitrate, when denoiser is on, -// for 1280x720 clip with 4 threads. -TEST_P(DatarateTestVP9LargeDenoiser, 4threads) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_min_quantizer = 2; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - cfg_.g_threads = 4; - - ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300); - - // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING), - // there is only one denoiser mode: denoiserYonly(which is 1), - // but may add more modes in the future. - cfg_.rc_target_bitrate = 1000; - ResetModel(); - // Turn on the denoiser. - denoiser_on_ = 1; - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) - << " The datarate for the file is lower than target by too much!"; - ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.29) - << " The datarate for the file is greater than target by too much!"; -} - -// Check basic datarate targeting, for a single bitrate, when denoiser is off -// and on. -TEST_P(DatarateTestVP9LargeDenoiser, DenoiserOffOn) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_dropframe_thresh = 1; - cfg_.rc_min_quantizer = 2; - cfg_.rc_max_quantizer = 56; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - - ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 299); - - // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING), - // there is only one denoiser mode: denoiserYonly(which is 1), - // but may add more modes in the future. - cfg_.rc_target_bitrate = 300; - ResetModel(); - // The denoiser is off by default. - denoiser_on_ = 0; - // Set the offon test flag. - denoiser_offon_test_ = 1; - denoiser_offon_period_ = 100; - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) - << " The datarate for the file is lower than target by too much!"; - ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) - << " The datarate for the file is greater than target by too much!"; -} -#endif // CONFIG_VP9_TEMPORAL_DENOISING - -class DatarateOnePassCbrSvc - : public ::libvpx_test::EncoderTest, - public ::libvpx_test::CodecTestWith2Params { - public: - DatarateOnePassCbrSvc() : EncoderTest(GET_PARAM(0)) { - memset(&svc_params_, 0, sizeof(svc_params_)); - } - virtual ~DatarateOnePassCbrSvc() {} - - protected: - virtual void SetUp() { - InitializeConfig(); - SetMode(GET_PARAM(1)); - speed_setting_ = GET_PARAM(2); - ResetModel(); - } - virtual void ResetModel() { - last_pts_ = 0; - duration_ = 0.0; - mismatch_psnr_ = 0.0; - mismatch_nframes_ = 0; - denoiser_on_ = 0; - tune_content_ = 0; - base_speed_setting_ = 5; - spatial_layer_id_ = 0; - temporal_layer_id_ = 0; - memset(bits_in_buffer_model_, 0, sizeof(bits_in_buffer_model_)); - memset(bits_total_, 0, sizeof(bits_total_)); - memset(layer_target_avg_bandwidth_, 0, sizeof(layer_target_avg_bandwidth_)); - } - virtual void BeginPassHook(unsigned int /*pass*/) {} - virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, - ::libvpx_test::Encoder *encoder) { - if (video->frame() == 0) { - int i; - for (i = 0; i < VPX_MAX_LAYERS; ++i) { - svc_params_.max_quantizers[i] = 63; - svc_params_.min_quantizers[i] = 0; - } - svc_params_.speed_per_layer[0] = base_speed_setting_; - for (i = 1; i < VPX_SS_MAX_LAYERS; ++i) { - svc_params_.speed_per_layer[i] = speed_setting_; - } - - encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_); - encoder->Control(VP9E_SET_SVC, 1); - encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_); - encoder->Control(VP8E_SET_CPUUSED, speed_setting_); - encoder->Control(VP9E_SET_TILE_COLUMNS, 0); - encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 300); - encoder->Control(VP9E_SET_TILE_COLUMNS, (cfg_.g_threads >> 1)); - encoder->Control(VP9E_SET_ROW_MT, 1); - encoder->Control(VP8E_SET_STATIC_THRESHOLD, 1); - encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_); - } - const vpx_rational_t tb = video->timebase(); - timebase_ = static_cast(tb.num) / tb.den; - duration_ = 0; - } - - virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) { - vpx_svc_layer_id_t layer_id; - encoder->Control(VP9E_GET_SVC_LAYER_ID, &layer_id); - spatial_layer_id_ = layer_id.spatial_layer_id; - temporal_layer_id_ = layer_id.temporal_layer_id; - // Update buffer with per-layer target frame bandwidth, this is done - // for every frame passed to the encoder (encoded or dropped). - // For temporal layers, update the cumulative buffer level. - for (int sl = 0; sl < number_spatial_layers_; ++sl) { - for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) { - const int layer = sl * number_temporal_layers_ + tl; - bits_in_buffer_model_[layer] += - static_cast(layer_target_avg_bandwidth_[layer]); - } - } - } - - vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz, - uint32_t sizes[8], int *count) { - uint8_t marker; - marker = *(data + data_sz - 1); - *count = 0; - if ((marker & 0xe0) == 0xc0) { - const uint32_t frames = (marker & 0x7) + 1; - const uint32_t mag = ((marker >> 3) & 0x3) + 1; - const size_t index_sz = 2 + mag * frames; - // This chunk is marked as having a superframe index but doesn't have - // enough data for it, thus it's an invalid superframe index. - if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME; - { - const uint8_t marker2 = *(data + data_sz - index_sz); - // This chunk is marked as having a superframe index but doesn't have - // the matching marker byte at the front of the index therefore it's an - // invalid chunk. - if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME; - } - { - uint32_t i, j; - const uint8_t *x = &data[data_sz - index_sz + 1]; - for (i = 0; i < frames; ++i) { - uint32_t this_sz = 0; - - for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8); - sizes[i] = this_sz; - } - *count = frames; - } - } - return VPX_CODEC_OK; - } - - virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { - uint32_t sizes[8] = { 0 }; - int count = 0; - last_pts_ = pkt->data.frame.pts; - const bool key_frame = - (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; - parse_superframe_index(static_cast(pkt->data.frame.buf), - pkt->data.frame.sz, sizes, &count); - ASSERT_EQ(count, number_spatial_layers_); - for (int sl = 0; sl < number_spatial_layers_; ++sl) { - sizes[sl] = sizes[sl] << 3; - // Update the total encoded bits per layer. - // For temporal layers, update the cumulative encoded bits per layer. - for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) { - const int layer = sl * number_temporal_layers_ + tl; - bits_total_[layer] += static_cast(sizes[sl]); - // Update the per-layer buffer level with the encoded frame size. - bits_in_buffer_model_[layer] -= static_cast(sizes[sl]); - // There should be no buffer underrun, except on the base - // temporal layer, since there may be key frames there. - if (!key_frame && tl > 0) { - ASSERT_GE(bits_in_buffer_model_[layer], 0) - << "Buffer Underrun at frame " << pkt->data.frame.pts; - } - } - } - } - - virtual void EndPassHook(void) { - for (int sl = 0; sl < number_spatial_layers_; ++sl) { - for (int tl = 0; tl < number_temporal_layers_; ++tl) { - const int layer = sl * number_temporal_layers_ + tl; - const double file_size_in_kb = bits_total_[layer] / 1000.; - duration_ = (last_pts_ + 1) * timebase_; - file_datarate_[layer] = file_size_in_kb / duration_; - } - } - } - - virtual void MismatchHook(const vpx_image_t *img1, const vpx_image_t *img2) { - double mismatch_psnr = compute_psnr(img1, img2); - mismatch_psnr_ += mismatch_psnr; - ++mismatch_nframes_; - } - - unsigned int GetMismatchFrames() { return mismatch_nframes_; } - - vpx_codec_pts_t last_pts_; - int64_t bits_in_buffer_model_[VPX_MAX_LAYERS]; - double timebase_; - int64_t bits_total_[VPX_MAX_LAYERS]; - double duration_; - double file_datarate_[VPX_MAX_LAYERS]; - size_t bits_in_last_frame_; - vpx_svc_extra_cfg_t svc_params_; - int speed_setting_; - double mismatch_psnr_; - int mismatch_nframes_; - int denoiser_on_; - int tune_content_; - int base_speed_setting_; - int spatial_layer_id_; - int temporal_layer_id_; - int number_spatial_layers_; - int number_temporal_layers_; - int layer_target_avg_bandwidth_[VPX_MAX_LAYERS]; -}; -static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg, - const vpx_svc_extra_cfg_t *svc_params, - int spatial_layers, int temporal_layers, - int temporal_layering_mode, - int *layer_target_avg_bandwidth, - int64_t *bits_in_buffer_model) { - int sl, spatial_layer_target; - float total = 0; - float alloc_ratio[VPX_MAX_LAYERS] = { 0 }; - float framerate = 30.0; - for (sl = 0; sl < spatial_layers; ++sl) { - if (svc_params->scaling_factor_den[sl] > 0) { - alloc_ratio[sl] = (float)(svc_params->scaling_factor_num[sl] * 1.0 / - svc_params->scaling_factor_den[sl]); - total += alloc_ratio[sl]; - } - } - for (sl = 0; sl < spatial_layers; ++sl) { - enc_cfg->ss_target_bitrate[sl] = spatial_layer_target = - (unsigned int)(enc_cfg->rc_target_bitrate * alloc_ratio[sl] / total); - const int index = sl * temporal_layers; - if (temporal_layering_mode == 3) { - enc_cfg->layer_target_bitrate[index] = spatial_layer_target >> 1; - enc_cfg->layer_target_bitrate[index + 1] = - (spatial_layer_target >> 1) + (spatial_layer_target >> 2); - enc_cfg->layer_target_bitrate[index + 2] = spatial_layer_target; - } else if (temporal_layering_mode == 2) { - enc_cfg->layer_target_bitrate[index] = spatial_layer_target * 2 / 3; - enc_cfg->layer_target_bitrate[index + 1] = spatial_layer_target; - } else if (temporal_layering_mode <= 1) { - enc_cfg->layer_target_bitrate[index] = spatial_layer_target; - } - } - for (sl = 0; sl < spatial_layers; ++sl) { - for (int tl = 0; tl < temporal_layers; ++tl) { - const int layer = sl * temporal_layers + tl; - float layer_framerate = framerate; - if (temporal_layers == 2 && tl == 0) layer_framerate = framerate / 2; - if (temporal_layers == 3 && tl == 0) layer_framerate = framerate / 4; - if (temporal_layers == 3 && tl == 1) layer_framerate = framerate / 2; - layer_target_avg_bandwidth[layer] = static_cast( - enc_cfg->layer_target_bitrate[layer] * 1000.0 / layer_framerate); - bits_in_buffer_model[layer] = - enc_cfg->layer_target_bitrate[layer] * enc_cfg->rc_buf_initial_sz; - } - } -} - -static void CheckLayerRateTargeting(vpx_codec_enc_cfg_t *const cfg, - int number_spatial_layers, - int number_temporal_layers, - double *file_datarate, - double thresh_overshoot, - double thresh_undershoot) { - for (int sl = 0; sl < number_spatial_layers; ++sl) - for (int tl = 0; tl < number_temporal_layers; ++tl) { - const int layer = sl * number_temporal_layers + tl; - ASSERT_GE(cfg->layer_target_bitrate[layer], - file_datarate[layer] * thresh_overshoot) - << " The datarate for the file exceeds the target by too much!"; - ASSERT_LE(cfg->layer_target_bitrate[layer], - file_datarate[layer] * thresh_undershoot) - << " The datarate for the file is lower than the target by too much!"; - } -} - -// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 1 -// temporal layer, with screen content mode on and same speed setting for all -// layers. -TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL1TLScreenContent1) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - cfg_.ss_number_layers = 2; - cfg_.ts_number_layers = 1; - cfg_.ts_rate_decimator[0] = 1; - cfg_.g_error_resilient = 1; - cfg_.g_threads = 1; - cfg_.temporal_layering_mode = 0; - svc_params_.scaling_factor_num[0] = 144; - svc_params_.scaling_factor_den[0] = 288; - svc_params_.scaling_factor_num[1] = 288; - svc_params_.scaling_factor_den[1] = 288; - cfg_.rc_dropframe_thresh = 10; - cfg_.kf_max_dist = 9999; - number_spatial_layers_ = cfg_.ss_number_layers; - number_temporal_layers_ = cfg_.ts_number_layers; - ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); - cfg_.rc_target_bitrate = 500; - ResetModel(); - tune_content_ = 1; - base_speed_setting_ = speed_setting_; - assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, - cfg_.ts_number_layers, cfg_.temporal_layering_mode, - layer_target_avg_bandwidth_, bits_in_buffer_model_); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - CheckLayerRateTargeting(&cfg_, number_spatial_layers_, - number_temporal_layers_, file_datarate_, 0.78, 1.15); - EXPECT_EQ(static_cast(0), GetMismatchFrames()); -} - -// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and -// 3 temporal layers. Run CIF clip with 1 thread. -TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - cfg_.ss_number_layers = 2; - cfg_.ts_number_layers = 3; - cfg_.ts_rate_decimator[0] = 4; - cfg_.ts_rate_decimator[1] = 2; - cfg_.ts_rate_decimator[2] = 1; - cfg_.g_error_resilient = 1; - cfg_.g_threads = 1; - cfg_.temporal_layering_mode = 3; - svc_params_.scaling_factor_num[0] = 144; - svc_params_.scaling_factor_den[0] = 288; - svc_params_.scaling_factor_num[1] = 288; - svc_params_.scaling_factor_den[1] = 288; - cfg_.rc_dropframe_thresh = 0; - cfg_.kf_max_dist = 9999; - number_spatial_layers_ = cfg_.ss_number_layers; - number_temporal_layers_ = cfg_.ts_number_layers; - ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, - 0, 400); - // TODO(marpan): Check that effective_datarate for each layer hits the - // layer target_bitrate. - for (int i = 200; i <= 800; i += 200) { - cfg_.rc_target_bitrate = i; - ResetModel(); - assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, - cfg_.ts_number_layers, cfg_.temporal_layering_mode, - layer_target_avg_bandwidth_, bits_in_buffer_model_); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - CheckLayerRateTargeting(&cfg_, number_spatial_layers_, - number_temporal_layers_, file_datarate_, 0.78, - 1.15); -#if CONFIG_VP9_DECODER - // Number of temporal layers > 1, so half of the frames in this SVC pattern - // will be non-reference frame and hence encoder will avoid loopfilter. - // Since frame dropper is off, we can expect 200 (half of the sequence) - // mismatched frames. - EXPECT_EQ(static_cast(200), GetMismatchFrames()); -#endif - } -} - -// Check basic rate targeting for 1 pass CBR SVC with denoising. -// 2 spatial layers and 3 temporal layer. Run HD clip with 2 threads. -TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLDenoiserOn) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - cfg_.ss_number_layers = 2; - cfg_.ts_number_layers = 3; - cfg_.ts_rate_decimator[0] = 4; - cfg_.ts_rate_decimator[1] = 2; - cfg_.ts_rate_decimator[2] = 1; - cfg_.g_error_resilient = 1; - cfg_.g_threads = 2; - cfg_.temporal_layering_mode = 3; - svc_params_.scaling_factor_num[0] = 144; - svc_params_.scaling_factor_den[0] = 288; - svc_params_.scaling_factor_num[1] = 288; - svc_params_.scaling_factor_den[1] = 288; - cfg_.rc_dropframe_thresh = 0; - cfg_.kf_max_dist = 9999; - number_spatial_layers_ = cfg_.ss_number_layers; - number_temporal_layers_ = cfg_.ts_number_layers; - ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, - 0, 400); - // TODO(marpan): Check that effective_datarate for each layer hits the - // layer target_bitrate. - // For SVC, noise_sen = 1 means denoising only the top spatial layer - // noise_sen = 2 means denoising the two top spatial layers. - for (int noise_sen = 1; noise_sen <= 2; noise_sen++) { - for (int i = 600; i <= 1000; i += 200) { - cfg_.rc_target_bitrate = i; - ResetModel(); - denoiser_on_ = noise_sen; - assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, - cfg_.ts_number_layers, cfg_.temporal_layering_mode, - layer_target_avg_bandwidth_, bits_in_buffer_model_); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - CheckLayerRateTargeting(&cfg_, number_spatial_layers_, - number_temporal_layers_, file_datarate_, 0.78, - 1.15); -#if CONFIG_VP9_DECODER - // Number of temporal layers > 1, so half of the frames in this SVC - // pattern - // will be non-reference frame and hence encoder will avoid loopfilter. - // Since frame dropper is off, we can expect 200 (half of the sequence) - // mismatched frames. - EXPECT_EQ(static_cast(200), GetMismatchFrames()); -#endif - } - } -} - -// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 3 -// temporal layers. Run CIF clip with 1 thread, and few short key frame periods. -TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLSmallKf) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - cfg_.ss_number_layers = 2; - cfg_.ts_number_layers = 3; - cfg_.ts_rate_decimator[0] = 4; - cfg_.ts_rate_decimator[1] = 2; - cfg_.ts_rate_decimator[2] = 1; - cfg_.g_error_resilient = 1; - cfg_.g_threads = 1; - cfg_.temporal_layering_mode = 3; - svc_params_.scaling_factor_num[0] = 144; - svc_params_.scaling_factor_den[0] = 288; - svc_params_.scaling_factor_num[1] = 288; - svc_params_.scaling_factor_den[1] = 288; - cfg_.rc_dropframe_thresh = 10; - cfg_.rc_target_bitrate = 400; - number_spatial_layers_ = cfg_.ss_number_layers; - number_temporal_layers_ = cfg_.ts_number_layers; - ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, - 0, 400); - // For this 3 temporal layer case, pattern repeats every 4 frames, so choose - // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2). - for (int j = 64; j <= 67; j++) { - cfg_.kf_max_dist = j; - ResetModel(); - assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, - cfg_.ts_number_layers, cfg_.temporal_layering_mode, - layer_target_avg_bandwidth_, bits_in_buffer_model_); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - CheckLayerRateTargeting(&cfg_, number_spatial_layers_, - number_temporal_layers_, file_datarate_, 0.78, - 1.15); - } -} - -// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and -// 3 temporal layers. Run HD clip with 4 threads. -TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL4Threads) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - cfg_.ss_number_layers = 2; - cfg_.ts_number_layers = 3; - cfg_.ts_rate_decimator[0] = 4; - cfg_.ts_rate_decimator[1] = 2; - cfg_.ts_rate_decimator[2] = 1; - cfg_.g_error_resilient = 1; - cfg_.g_threads = 4; - cfg_.temporal_layering_mode = 3; - svc_params_.scaling_factor_num[0] = 144; - svc_params_.scaling_factor_den[0] = 288; - svc_params_.scaling_factor_num[1] = 288; - svc_params_.scaling_factor_den[1] = 288; - cfg_.rc_dropframe_thresh = 0; - cfg_.kf_max_dist = 9999; - number_spatial_layers_ = cfg_.ss_number_layers; - number_temporal_layers_ = cfg_.ts_number_layers; - ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); - cfg_.rc_target_bitrate = 800; - ResetModel(); - assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, - cfg_.ts_number_layers, cfg_.temporal_layering_mode, - layer_target_avg_bandwidth_, bits_in_buffer_model_); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - CheckLayerRateTargeting(&cfg_, number_spatial_layers_, - number_temporal_layers_, file_datarate_, 0.78, 1.15); -#if CONFIG_VP9_DECODER - // Number of temporal layers > 1, so half of the frames in this SVC pattern - // will be non-reference frame and hence encoder will avoid loopfilter. - // Since frame dropper is off, we can expect 30 (half of the sequence) - // mismatched frames. - EXPECT_EQ(static_cast(30), GetMismatchFrames()); -#endif -} - -// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and -// 3 temporal layers. Run CIF clip with 1 thread. -TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - cfg_.ss_number_layers = 3; - cfg_.ts_number_layers = 3; - cfg_.ts_rate_decimator[0] = 4; - cfg_.ts_rate_decimator[1] = 2; - cfg_.ts_rate_decimator[2] = 1; - cfg_.g_error_resilient = 1; - cfg_.g_threads = 1; - cfg_.temporal_layering_mode = 3; - svc_params_.scaling_factor_num[0] = 72; - svc_params_.scaling_factor_den[0] = 288; - svc_params_.scaling_factor_num[1] = 144; - svc_params_.scaling_factor_den[1] = 288; - svc_params_.scaling_factor_num[2] = 288; - svc_params_.scaling_factor_den[2] = 288; - cfg_.rc_dropframe_thresh = 0; - cfg_.kf_max_dist = 9999; - number_spatial_layers_ = cfg_.ss_number_layers; - number_temporal_layers_ = cfg_.ts_number_layers; - ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, - 0, 400); - cfg_.rc_target_bitrate = 800; - ResetModel(); - assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, - cfg_.ts_number_layers, cfg_.temporal_layering_mode, - layer_target_avg_bandwidth_, bits_in_buffer_model_); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - CheckLayerRateTargeting(&cfg_, number_spatial_layers_, - number_temporal_layers_, file_datarate_, 0.78, 1.15); -#if CONFIG_VP9_DECODER - // Number of temporal layers > 1, so half of the frames in this SVC pattern - // will be non-reference frame and hence encoder will avoid loopfilter. - // Since frame dropper is off, we can expect 200 (half of the sequence) - // mismatched frames. - EXPECT_EQ(static_cast(200), GetMismatchFrames()); -#endif -} - -// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3 -// temporal layers. Run CIF clip with 1 thread, and few short key frame periods. -TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLSmallKf) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - cfg_.ss_number_layers = 3; - cfg_.ts_number_layers = 3; - cfg_.ts_rate_decimator[0] = 4; - cfg_.ts_rate_decimator[1] = 2; - cfg_.ts_rate_decimator[2] = 1; - cfg_.g_error_resilient = 1; - cfg_.g_threads = 1; - cfg_.temporal_layering_mode = 3; - svc_params_.scaling_factor_num[0] = 72; - svc_params_.scaling_factor_den[0] = 288; - svc_params_.scaling_factor_num[1] = 144; - svc_params_.scaling_factor_den[1] = 288; - svc_params_.scaling_factor_num[2] = 288; - svc_params_.scaling_factor_den[2] = 288; - cfg_.rc_dropframe_thresh = 10; - cfg_.rc_target_bitrate = 800; - number_spatial_layers_ = cfg_.ss_number_layers; - number_temporal_layers_ = cfg_.ts_number_layers; - ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, - 0, 400); - // For this 3 temporal layer case, pattern repeats every 4 frames, so choose - // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2). - for (int j = 32; j <= 35; j++) { - cfg_.kf_max_dist = j; - ResetModel(); - assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, - cfg_.ts_number_layers, cfg_.temporal_layering_mode, - layer_target_avg_bandwidth_, bits_in_buffer_model_); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - CheckLayerRateTargeting(&cfg_, number_spatial_layers_, - number_temporal_layers_, file_datarate_, 0.78, - 1.15); - } -} - -// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and -// 3 temporal layers. Run HD clip with 4 threads. -TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL4threads) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - cfg_.ss_number_layers = 3; - cfg_.ts_number_layers = 3; - cfg_.ts_rate_decimator[0] = 4; - cfg_.ts_rate_decimator[1] = 2; - cfg_.ts_rate_decimator[2] = 1; - cfg_.g_error_resilient = 1; - cfg_.g_threads = 4; - cfg_.temporal_layering_mode = 3; - svc_params_.scaling_factor_num[0] = 72; - svc_params_.scaling_factor_den[0] = 288; - svc_params_.scaling_factor_num[1] = 144; - svc_params_.scaling_factor_den[1] = 288; - svc_params_.scaling_factor_num[2] = 288; - svc_params_.scaling_factor_den[2] = 288; - cfg_.rc_dropframe_thresh = 0; - cfg_.kf_max_dist = 9999; - number_spatial_layers_ = cfg_.ss_number_layers; - number_temporal_layers_ = cfg_.ts_number_layers; - ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); - cfg_.rc_target_bitrate = 800; - ResetModel(); - assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, - cfg_.ts_number_layers, cfg_.temporal_layering_mode, - layer_target_avg_bandwidth_, bits_in_buffer_model_); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - CheckLayerRateTargeting(&cfg_, number_spatial_layers_, - number_temporal_layers_, file_datarate_, 0.78, 1.15); -#if CONFIG_VP9_DECODER - // Number of temporal layers > 1, so half of the frames in this SVC pattern - // will be non-reference frame and hence encoder will avoid loopfilter. - // Since frame dropper is off, we can expect 30 (half of the sequence) - // mismatched frames. - EXPECT_EQ(static_cast(30), GetMismatchFrames()); -#endif -} - -// Run SVC encoder for 1 temporal layer, 2 spatial layers, with spatial -// downscale 5x5. -TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL1TL5x5MultipleRuns) { - cfg_.rc_buf_initial_sz = 500; - cfg_.rc_buf_optimal_sz = 500; - cfg_.rc_buf_sz = 1000; - cfg_.rc_min_quantizer = 0; - cfg_.rc_max_quantizer = 63; - cfg_.rc_end_usage = VPX_CBR; - cfg_.g_lag_in_frames = 0; - cfg_.ss_number_layers = 2; - cfg_.ts_number_layers = 1; - cfg_.ts_rate_decimator[0] = 1; - cfg_.g_error_resilient = 1; - cfg_.g_threads = 3; - cfg_.temporal_layering_mode = 0; - svc_params_.scaling_factor_num[0] = 256; - svc_params_.scaling_factor_den[0] = 1280; - svc_params_.scaling_factor_num[1] = 1280; - svc_params_.scaling_factor_den[1] = 1280; - cfg_.rc_dropframe_thresh = 10; - cfg_.kf_max_dist = 999999; - cfg_.kf_min_dist = 0; - cfg_.ss_target_bitrate[0] = 300; - cfg_.ss_target_bitrate[1] = 1400; - cfg_.layer_target_bitrate[0] = 300; - cfg_.layer_target_bitrate[1] = 1400; - cfg_.rc_target_bitrate = 1700; - number_spatial_layers_ = cfg_.ss_number_layers; - number_temporal_layers_ = cfg_.ts_number_layers; - ResetModel(); - layer_target_avg_bandwidth_[0] = cfg_.layer_target_bitrate[0] * 1000 / 30; - bits_in_buffer_model_[0] = - cfg_.layer_target_bitrate[0] * cfg_.rc_buf_initial_sz; - layer_target_avg_bandwidth_[1] = cfg_.layer_target_bitrate[1] * 1000 / 30; - bits_in_buffer_model_[1] = - cfg_.layer_target_bitrate[1] * cfg_.rc_buf_initial_sz; - ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - CheckLayerRateTargeting(&cfg_, number_spatial_layers_, - number_temporal_layers_, file_datarate_, 0.78, 1.15); - EXPECT_EQ(static_cast(0), GetMismatchFrames()); -} - -VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES, - ::testing::Values(0)); -VP8_INSTANTIATE_TEST_CASE(DatarateTestRealTime, - ::testing::Values(::libvpx_test::kRealTime), - ::testing::Values(-6, -12)); -VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large, - ::testing::Values(::libvpx_test::kOnePassGood, - ::libvpx_test::kRealTime), - ::testing::Range(2, 9)); -#if CONFIG_VP9_TEMPORAL_DENOISING -VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser, - ::testing::Values(::libvpx_test::kRealTime), - ::testing::Range(5, 9)); -#endif -VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvc, - ::testing::Values(::libvpx_test::kRealTime), - ::testing::Range(5, 9)); -} // namespace diff --git a/media/libvpx/libvpx/test/dct16x16_test.cc b/media/libvpx/libvpx/test/dct16x16_test.cc index ce0bd37b3df3..9ccf2b84f11f 100644 --- a/media/libvpx/libvpx/test/dct16x16_test.cc +++ b/media/libvpx/libvpx/test/dct16x16_test.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -229,10 +230,9 @@ typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride, typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, int tx_type); -typedef std::tr1::tuple Dct16x16Param; -typedef std::tr1::tuple Ht16x16Param; -typedef std::tr1::tuple - Idct16x16Param; +typedef std::tuple Dct16x16Param; +typedef std::tuple Ht16x16Param; +typedef std::tuple Idct16x16Param; void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride, int /*tx_type*/) { @@ -744,7 +744,7 @@ TEST_P(InvTrans16x16DCT, CompareReference) { CompareInvReference(ref_txfm_, thresh_); } -using std::tr1::make_tuple; +using std::make_tuple; #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( diff --git a/media/libvpx/libvpx/test/dct32x32_test.cc b/media/libvpx/libvpx/test/dct32x32_test.cc index a95ff973287e..94d6b37fa91a 100644 --- a/media/libvpx/libvpx/test/dct32x32_test.cc +++ b/media/libvpx/libvpx/test/dct32x32_test.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -18,6 +19,7 @@ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" +#include "test/bench.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" @@ -66,7 +68,7 @@ void reference_32x32_dct_2d(const int16_t input[kNumCoeffs], typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride); typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride); -typedef std::tr1::tuple +typedef std::tuple Trans32x32Param; #if CONFIG_VP9_HIGHBITDEPTH @@ -79,7 +81,8 @@ void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) { } #endif // CONFIG_VP9_HIGHBITDEPTH -class Trans32x32Test : public ::testing::TestWithParam { +class Trans32x32Test : public AbstractBench, + public ::testing::TestWithParam { public: virtual ~Trans32x32Test() {} virtual void SetUp() { @@ -99,8 +102,14 @@ class Trans32x32Test : public ::testing::TestWithParam { int mask_; FwdTxfmFunc fwd_txfm_; InvTxfmFunc inv_txfm_; + + int16_t *bench_in_; + tran_low_t *bench_out_; + virtual void Run(); }; +void Trans32x32Test::Run() { fwd_txfm_(bench_in_, bench_out_, 32); } + TEST_P(Trans32x32Test, AccuracyCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); uint32_t max_error = 0; @@ -237,6 +246,19 @@ TEST_P(Trans32x32Test, MemCheck) { } } +TEST_P(Trans32x32Test, DISABLED_Speed) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]); + + bench_in_ = input_extreme_block; + bench_out_ = output_block; + + RunNTimes(INT16_MAX); + PrintMedian("32x32"); +} + TEST_P(Trans32x32Test, InverseAccuracy) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 1000; @@ -292,7 +314,7 @@ TEST_P(Trans32x32Test, InverseAccuracy) { } } -using std::tr1::make_tuple; +using std::make_tuple; #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( @@ -371,7 +393,7 @@ INSTANTIATE_TEST_CASE_P( VSX, Trans32x32Test, ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_vsx, 0, VPX_BITS_8), - make_tuple(&vpx_fdct32x32_rd_c, + make_tuple(&vpx_fdct32x32_rd_vsx, &vpx_idct32x32_1024_add_vsx, 1, VPX_BITS_8))); #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE } // namespace diff --git a/media/libvpx/libvpx/test/dct_partial_test.cc b/media/libvpx/libvpx/test/dct_partial_test.cc index 4d145f589153..c889e92d709a 100644 --- a/media/libvpx/libvpx/test/dct_partial_test.cc +++ b/media/libvpx/libvpx/test/dct_partial_test.cc @@ -11,8 +11,8 @@ #include #include #include - #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -28,8 +28,8 @@ using libvpx_test::ACMRandom; using libvpx_test::Buffer; -using std::tr1::tuple; -using std::tr1::make_tuple; +using std::make_tuple; +using std::tuple; namespace { typedef void (*PartialFdctFunc)(const int16_t *in, tran_low_t *out, int stride); @@ -39,10 +39,14 @@ typedef tuple tran_low_t partial_fdct_ref(const Buffer &in, int size) { int64_t sum = 0; - for (int y = 0; y < size; ++y) { - for (int x = 0; x < size; ++x) { - sum += in.TopLeftPixel()[y * in.stride() + x]; + if (in.TopLeftPixel() != NULL) { + for (int y = 0; y < size; ++y) { + for (int x = 0; x < size; ++x) { + sum += in.TopLeftPixel()[y * in.stride() + x]; + } } + } else { + assert(0); } switch (size) { @@ -77,21 +81,25 @@ class PartialFdctTest : public ::testing::TestWithParam { Buffer output_block = Buffer(size_, size_, 0, 16); ASSERT_TRUE(output_block.Init()); - for (int i = 0; i < 100; ++i) { - if (i == 0) { - input_block.Set(maxvalue); - } else if (i == 1) { - input_block.Set(minvalue); - } else { - input_block.Set(&rnd, minvalue, maxvalue); + if (output_block.TopLeftPixel() != NULL) { + for (int i = 0; i < 100; ++i) { + if (i == 0) { + input_block.Set(maxvalue); + } else if (i == 1) { + input_block.Set(minvalue); + } else { + input_block.Set(&rnd, minvalue, maxvalue); + } + + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block.TopLeftPixel(), + output_block.TopLeftPixel(), + input_block.stride())); + + EXPECT_EQ(partial_fdct_ref(input_block, size_), + output_block.TopLeftPixel()[0]); } - - ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block.TopLeftPixel(), - output_block.TopLeftPixel(), - input_block.stride())); - - EXPECT_EQ(partial_fdct_ref(input_block, size_), - output_block.TopLeftPixel()[0]); + } else { + assert(0); } } diff --git a/media/libvpx/libvpx/test/dct_test.cc b/media/libvpx/libvpx/test/dct_test.cc index addbdfb463f8..ed12f7756917 100644 --- a/media/libvpx/libvpx/test/dct_test.cc +++ b/media/libvpx/libvpx/test/dct_test.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -28,8 +29,8 @@ using libvpx_test::ACMRandom; using libvpx_test::Buffer; -using std::tr1::tuple; -using std::tr1::make_tuple; +using std::make_tuple; +using std::tuple; namespace { typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride); @@ -40,10 +41,60 @@ typedef void (*FhtFuncRef)(const Buffer &in, Buffer *out, int size, int tx_type); typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, int tx_type); +typedef void (*IhtWithBdFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type, int bd); + +template +void fdct_wrapper(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + (void)tx_type; + fn(in, out, stride); +} + +template +void idct_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type, + int bd) { + (void)tx_type; + (void)bd; + fn(in, out, stride); +} + +template +void iht_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type, + int bd) { + (void)bd; + fn(in, out, stride, tx_type); +} + +#if CONFIG_VP9_HIGHBITDEPTH +typedef void (*HighbdIdctFunc)(const tran_low_t *in, uint16_t *out, int stride, + int bd); + +typedef void (*HighbdIhtFunc)(const tran_low_t *in, uint16_t *out, int stride, + int tx_type, int bd); + +template +void highbd_idct_wrapper(const tran_low_t *in, uint8_t *out, int stride, + int tx_type, int bd) { + (void)tx_type; + fn(in, CAST_TO_SHORTPTR(out), stride, bd); +} + +template +void highbd_iht_wrapper(const tran_low_t *in, uint8_t *out, int stride, + int tx_type, int bd) { + fn(in, CAST_TO_SHORTPTR(out), stride, tx_type, bd); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +struct FuncInfo { + FhtFunc ft_func; + IhtWithBdFunc it_func; + int size; + int pixel_size; +}; /* forward transform, inverse transform, size, transform type, bit depth */ -typedef tuple DctParam; -typedef tuple HtParam; +typedef tuple DctParam; void fdct_ref(const Buffer &in, Buffer *out, int size, int /*tx_type*/) { @@ -81,128 +132,123 @@ void fwht_ref(const Buffer &in, Buffer *out, int size, vp9_fwht4x4_c(in.TopLeftPixel(), out->TopLeftPixel(), in.stride()); } -#if CONFIG_VP9_HIGHBITDEPTH -#define idctNxN(n, coeffs, bitdepth) \ - void idct##n##x##n##_##bitdepth(const tran_low_t *in, uint8_t *out, \ - int stride) { \ - vpx_highbd_idct##n##x##n##_##coeffs##_add_c(in, CAST_TO_SHORTPTR(out), \ - stride, bitdepth); \ - } - -idctNxN(4, 16, 10); -idctNxN(4, 16, 12); -idctNxN(8, 64, 10); -idctNxN(8, 64, 12); -idctNxN(16, 256, 10); -idctNxN(16, 256, 12); -idctNxN(32, 1024, 10); -idctNxN(32, 1024, 12); - -#define ihtNxN(n, coeffs, bitdepth) \ - void iht##n##x##n##_##bitdepth(const tran_low_t *in, uint8_t *out, \ - int stride, int tx_type) { \ - vp9_highbd_iht##n##x##n##_##coeffs##_add_c(in, CAST_TO_SHORTPTR(out), \ - stride, tx_type, bitdepth); \ - } - -ihtNxN(4, 16, 10); -ihtNxN(4, 16, 12); -ihtNxN(8, 64, 10); -ihtNxN(8, 64, 12); -ihtNxN(16, 256, 10); -// ihtNxN(16, 256, 12); - -void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) { - vpx_highbd_iwht4x4_16_add_c(in, CAST_TO_SHORTPTR(out), stride, 10); -} - -void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) { - vpx_highbd_iwht4x4_16_add_c(in, CAST_TO_SHORTPTR(out), stride, 12); -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -class TransTestBase { +class TransTestBase : public ::testing::TestWithParam { public: - virtual void TearDown() { libvpx_test::ClearSystemState(); } + virtual void SetUp() { + rnd_.Reset(ACMRandom::DeterministicSeed()); + const int idx = GET_PARAM(0); + const FuncInfo *func_info = &(GET_PARAM(1)[idx]); + tx_type_ = GET_PARAM(2); + bit_depth_ = GET_PARAM(3); + fwd_txfm_ = func_info->ft_func; + inv_txfm_ = func_info->it_func; + size_ = func_info->size; + pixel_size_ = func_info->pixel_size; + max_pixel_value_ = (1 << bit_depth_) - 1; + + // Randomize stride_ to a value less than or equal to 1024 + stride_ = rnd_(1024) + 1; + if (stride_ < size_) { + stride_ = size_; + } + // Align stride_ to 16 if it's bigger than 16. + if (stride_ > 16) { + stride_ &= ~15; + } + + block_size_ = size_ * stride_; + + src_ = reinterpret_cast( + vpx_memalign(16, pixel_size_ * block_size_)); + ASSERT_TRUE(src_ != NULL); + dst_ = reinterpret_cast( + vpx_memalign(16, pixel_size_ * block_size_)); + ASSERT_TRUE(dst_ != NULL); + } + + virtual void TearDown() { + vpx_free(src_); + src_ = NULL; + vpx_free(dst_); + dst_ = NULL; + libvpx_test::ClearSystemState(); + } + + void InitMem() { + if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return; + if (pixel_size_ == 1) { + for (int j = 0; j < block_size_; ++j) { + src_[j] = rnd_.Rand16() & max_pixel_value_; + } + for (int j = 0; j < block_size_; ++j) { + dst_[j] = rnd_.Rand16() & max_pixel_value_; + } + } else { + ASSERT_EQ(pixel_size_, 2); + uint16_t *const src = reinterpret_cast(src_); + uint16_t *const dst = reinterpret_cast(dst_); + for (int j = 0; j < block_size_; ++j) { + src[j] = rnd_.Rand16() & max_pixel_value_; + } + for (int j = 0; j < block_size_; ++j) { + dst[j] = rnd_.Rand16() & max_pixel_value_; + } + } + } + + void RunFwdTxfm(const Buffer &in, Buffer *out) { + fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride(), tx_type_); + } + + void RunInvTxfm(const Buffer &in, uint8_t *out) { + inv_txfm_(in.TopLeftPixel(), out, stride_, tx_type_, bit_depth_); + } protected: - virtual void RunFwdTxfm(const Buffer &in, - Buffer *out) = 0; - - virtual void RunInvTxfm(const Buffer &in, uint8_t *out) = 0; - void RunAccuracyCheck(int limit) { + if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return; ACMRandom rnd(ACMRandom::DeterministicSeed()); Buffer test_input_block = Buffer(size_, size_, 8, size_ == 4 ? 0 : 16); ASSERT_TRUE(test_input_block.Init()); + ASSERT_TRUE(test_input_block.TopLeftPixel() != NULL); Buffer test_temp_block = Buffer(size_, size_, 0, 16); ASSERT_TRUE(test_temp_block.Init()); - Buffer dst = Buffer(size_, size_, 0, 16); - ASSERT_TRUE(dst.Init()); - Buffer src = Buffer(size_, size_, 0, 16); - ASSERT_TRUE(src.Init()); -#if CONFIG_VP9_HIGHBITDEPTH - Buffer dst16 = Buffer(size_, size_, 0, 16); - ASSERT_TRUE(dst16.Init()); - Buffer src16 = Buffer(size_, size_, 0, 16); - ASSERT_TRUE(src16.Init()); -#endif // CONFIG_VP9_HIGHBITDEPTH uint32_t max_error = 0; int64_t total_error = 0; const int count_test_block = 10000; for (int i = 0; i < count_test_block; ++i) { - if (bit_depth_ == 8) { - src.Set(&rnd, &ACMRandom::Rand8); - dst.Set(&rnd, &ACMRandom::Rand8); - // Initialize a test block with input range [-255, 255]. - for (int h = 0; h < size_; ++h) { - for (int w = 0; w < size_; ++w) { + InitMem(); + for (int h = 0; h < size_; ++h) { + for (int w = 0; w < size_; ++w) { + if (pixel_size_ == 1) { test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] = - src.TopLeftPixel()[h * src.stride() + w] - - dst.TopLeftPixel()[h * dst.stride() + w]; + src_[h * stride_ + w] - dst_[h * stride_ + w]; + } else { + ASSERT_EQ(pixel_size_, 2); + const uint16_t *const src = reinterpret_cast(src_); + const uint16_t *const dst = reinterpret_cast(dst_); + test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] = + src[h * stride_ + w] - dst[h * stride_ + w]; } } -#if CONFIG_VP9_HIGHBITDEPTH - } else { - src16.Set(&rnd, 0, max_pixel_value_); - dst16.Set(&rnd, 0, max_pixel_value_); - for (int h = 0; h < size_; ++h) { - for (int w = 0; w < size_; ++w) { - test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] = - src16.TopLeftPixel()[h * src16.stride() + w] - - dst16.TopLeftPixel()[h * dst16.stride() + w]; - } - } -#endif // CONFIG_VP9_HIGHBITDEPTH } ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block, &test_temp_block)); - if (bit_depth_ == VPX_BITS_8) { - ASM_REGISTER_STATE_CHECK( - RunInvTxfm(test_temp_block, dst.TopLeftPixel())); -#if CONFIG_VP9_HIGHBITDEPTH - } else { - ASM_REGISTER_STATE_CHECK( - RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16.TopLeftPixel()))); -#endif // CONFIG_VP9_HIGHBITDEPTH - } + ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst_)); for (int h = 0; h < size_; ++h) { for (int w = 0; w < size_; ++w) { int diff; -#if CONFIG_VP9_HIGHBITDEPTH - if (bit_depth_ != 8) { - diff = dst16.TopLeftPixel()[h * dst16.stride() + w] - - src16.TopLeftPixel()[h * src16.stride() + w]; + if (pixel_size_ == 1) { + diff = dst_[h * stride_ + w] - src_[h * stride_ + w]; } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - diff = dst.TopLeftPixel()[h * dst.stride() + w] - - src.TopLeftPixel()[h * src.stride() + w]; -#if CONFIG_VP9_HIGHBITDEPTH + ASSERT_EQ(pixel_size_, 2); + const uint16_t *const src = reinterpret_cast(src_); + const uint16_t *const dst = reinterpret_cast(dst_); + diff = dst[h * stride_ + w] - src[h * stride_ + w]; } -#endif // CONFIG_VP9_HIGHBITDEPTH const uint32_t error = diff * diff; if (max_error < error) max_error = error; total_error += error; @@ -211,14 +257,18 @@ class TransTestBase { } EXPECT_GE(static_cast(limit), max_error) - << "Error: 4x4 FHT/IHT has an individual round trip error > " << limit; + << "Error: " << size_ << "x" << size_ + << " transform/inverse transform has an individual round trip error > " + << limit; EXPECT_GE(count_test_block * limit, total_error) - << "Error: 4x4 FHT/IHT has average round trip error > " << limit - << " per block"; + << "Error: " << size_ << "x" << size_ + << " transform/inverse transform has average round trip error > " + << limit << " per block"; } void RunCoeffCheck() { + if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return; ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 5000; Buffer input_block = @@ -248,6 +298,7 @@ class TransTestBase { } void RunMemCheck() { + if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return; ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 5000; Buffer input_extreme_block = @@ -265,6 +316,7 @@ class TransTestBase { } else if (i == 1) { input_extreme_block.Set(-max_pixel_value_); } else { + ASSERT_TRUE(input_extreme_block.TopLeftPixel() != NULL); for (int h = 0; h < size_; ++h) { for (int w = 0; w < size_; ++w) { input_extreme_block @@ -279,13 +331,14 @@ class TransTestBase { // The minimum quant value is 4. EXPECT_TRUE(output_block.CheckValues(output_ref_block)); + ASSERT_TRUE(output_block.TopLeftPixel() != NULL); for (int h = 0; h < size_; ++h) { for (int w = 0; w < size_; ++w) { EXPECT_GE( 4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block.TopLeftPixel()[h * output_block.stride() + w])) - << "Error: 4x4 FDCT has coefficient larger than " - "4*DCT_MAX_VALUE" + << "Error: " << size_ << "x" << size_ + << " transform has coefficient larger than 4*DCT_MAX_VALUE" << " at " << w << "," << h; if (::testing::Test::HasFailure()) { printf("Size: %d Transform type: %d\n", size_, tx_type_); @@ -298,6 +351,7 @@ class TransTestBase { } void RunInvAccuracyCheck(int limit) { + if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return; ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 1000; Buffer in = Buffer(size_, size_, 4); @@ -314,100 +368,85 @@ class TransTestBase { ASSERT_TRUE(src16.Init()); for (int i = 0; i < count_test_block; ++i) { + InitMem(); + ASSERT_TRUE(in.TopLeftPixel() != NULL); // Initialize a test block with input range [-max_pixel_value_, // max_pixel_value_]. - if (bit_depth_ == VPX_BITS_8) { - src.Set(&rnd, &ACMRandom::Rand8); - dst.Set(&rnd, &ACMRandom::Rand8); - for (int h = 0; h < size_; ++h) { - for (int w = 0; w < size_; ++w) { + for (int h = 0; h < size_; ++h) { + for (int w = 0; w < size_; ++w) { + if (pixel_size_ == 1) { in.TopLeftPixel()[h * in.stride() + w] = - src.TopLeftPixel()[h * src.stride() + w] - - dst.TopLeftPixel()[h * dst.stride() + w]; + src_[h * stride_ + w] - dst_[h * stride_ + w]; + } else { + ASSERT_EQ(pixel_size_, 2); + const uint16_t *const src = reinterpret_cast(src_); + const uint16_t *const dst = reinterpret_cast(dst_); + in.TopLeftPixel()[h * in.stride() + w] = + src[h * stride_ + w] - dst[h * stride_ + w]; } } -#if CONFIG_VP9_HIGHBITDEPTH - } else { - src16.Set(&rnd, 0, max_pixel_value_); - dst16.Set(&rnd, 0, max_pixel_value_); - for (int h = 0; h < size_; ++h) { - for (int w = 0; w < size_; ++w) { - in.TopLeftPixel()[h * in.stride() + w] = - src16.TopLeftPixel()[h * src16.stride() + w] - - dst16.TopLeftPixel()[h * dst16.stride() + w]; - } - } -#endif // CONFIG_VP9_HIGHBITDEPTH } fwd_txfm_ref(in, &coeff, size_, tx_type_); - if (bit_depth_ == VPX_BITS_8) { - ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst.TopLeftPixel())); -#if CONFIG_VP9_HIGHBITDEPTH - } else { - ASM_REGISTER_STATE_CHECK( - RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16.TopLeftPixel()))); -#endif // CONFIG_VP9_HIGHBITDEPTH - } + ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst_)); for (int h = 0; h < size_; ++h) { for (int w = 0; w < size_; ++w) { int diff; -#if CONFIG_VP9_HIGHBITDEPTH - if (bit_depth_ != 8) { - diff = dst16.TopLeftPixel()[h * dst16.stride() + w] - - src16.TopLeftPixel()[h * src16.stride() + w]; + if (pixel_size_ == 1) { + diff = dst_[h * stride_ + w] - src_[h * stride_ + w]; } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - diff = dst.TopLeftPixel()[h * dst.stride() + w] - - src.TopLeftPixel()[h * src.stride() + w]; -#if CONFIG_VP9_HIGHBITDEPTH + ASSERT_EQ(pixel_size_, 2); + const uint16_t *const src = reinterpret_cast(src_); + const uint16_t *const dst = reinterpret_cast(dst_); + diff = dst[h * stride_ + w] - src[h * stride_ + w]; } -#endif // CONFIG_VP9_HIGHBITDEPTH const uint32_t error = diff * diff; EXPECT_GE(static_cast(limit), error) - << "Error: " << size_ << "x" << size_ << " IDCT has error " - << error << " at " << w << "," << h; + << "Error: " << size_ << "x" << size_ + << " inverse transform has error " << error << " at " << w << "," + << h; + if (::testing::Test::HasFailure()) { + printf("Size: %d Transform type: %d\n", size_, tx_type_); + return; + } } } } } + FhtFunc fwd_txfm_; FhtFuncRef fwd_txfm_ref; + IhtWithBdFunc inv_txfm_; + ACMRandom rnd_; + uint8_t *src_; + uint8_t *dst_; vpx_bit_depth_t bit_depth_; int tx_type_; int max_pixel_value_; int size_; + int stride_; + int pixel_size_; + int block_size_; }; -class TransDCT : public TransTestBase, - public ::testing::TestWithParam { +/* -------------------------------------------------------------------------- */ + +class TransDCT : public TransTestBase { public: - TransDCT() { - fwd_txfm_ref = fdct_ref; - fwd_txfm_ = GET_PARAM(0); - inv_txfm_ = GET_PARAM(1); - size_ = GET_PARAM(2); - tx_type_ = GET_PARAM(3); - bit_depth_ = GET_PARAM(4); - max_pixel_value_ = (1 << bit_depth_) - 1; - } - - protected: - void RunFwdTxfm(const Buffer &in, Buffer *out) { - fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride()); - } - - void RunInvTxfm(const Buffer &in, uint8_t *out) { - inv_txfm_(in.TopLeftPixel(), out, in.stride()); - } - - FdctFunc fwd_txfm_; - IdctFunc inv_txfm_; + TransDCT() { fwd_txfm_ref = fdct_ref; } }; -TEST_P(TransDCT, AccuracyCheck) { RunAccuracyCheck(1); } +TEST_P(TransDCT, AccuracyCheck) { + int t = 1; + if (size_ == 16 && bit_depth_ > 10 && pixel_size_ == 2) { + t = 2; + } else if (size_ == 32 && bit_depth_ > 10 && pixel_size_ == 2) { + t = 7; + } + RunAccuracyCheck(t); +} TEST_P(TransDCT, CoeffCheck) { RunCoeffCheck(); } @@ -415,177 +454,150 @@ TEST_P(TransDCT, MemCheck) { RunMemCheck(); } TEST_P(TransDCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); } +static const FuncInfo dct_c_func_info[] = { #if CONFIG_VP9_HIGHBITDEPTH -INSTANTIATE_TEST_CASE_P( - C, TransDCT, - ::testing::Values( - make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_10, 32, 0, VPX_BITS_10), - make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_12, 32, 0, VPX_BITS_10), - make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 32, 0, - VPX_BITS_8), - make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 16, 0, VPX_BITS_10), - make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 16, 0, VPX_BITS_10), - make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 16, 0, - VPX_BITS_8), - make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 8, 0, VPX_BITS_10), - make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 8, 0, VPX_BITS_10), - make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 8, 0, VPX_BITS_8), - make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 4, 0, VPX_BITS_10), - make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 4, 0, VPX_BITS_12), - make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 4, 0, VPX_BITS_8))); -#else -INSTANTIATE_TEST_CASE_P( - C, TransDCT, - ::testing::Values( - make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 32, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 16, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 8, 0, VPX_BITS_8), - make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 4, 0, VPX_BITS_8))); -#endif // CONFIG_VP9_HIGHBITDEPTH - -#if HAVE_SSE2 -#if !CONFIG_EMULATE_HARDWARE -#if CONFIG_VP9_HIGHBITDEPTH -/* TODO:(johannkoenig) Determine why these fail AccuracyCheck - make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 32, 0, VPX_BITS_12), - make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_12, 16, 0, VPX_BITS_12), -*/ -INSTANTIATE_TEST_CASE_P( - SSE2, TransDCT, - ::testing::Values( - make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 32, 0, - VPX_BITS_10), - make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_sse2, 32, 0, - VPX_BITS_8), - make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_10, 16, 0, - VPX_BITS_10), - make_tuple(&vpx_fdct16x16_sse2, &vpx_idct16x16_256_add_sse2, 16, 0, - VPX_BITS_8), - make_tuple(&vpx_highbd_fdct8x8_sse2, &idct8x8_10, 8, 0, VPX_BITS_10), - make_tuple(&vpx_highbd_fdct8x8_sse2, &idct8x8_12, 8, 0, VPX_BITS_12), - make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 8, 0, - VPX_BITS_8), - make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10, 4, 0, VPX_BITS_10), - make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12, 4, 0, VPX_BITS_12), - make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_sse2, 4, 0, - VPX_BITS_8))); -#else -INSTANTIATE_TEST_CASE_P( - SSE2, TransDCT, - ::testing::Values(make_tuple(&vpx_fdct32x32_sse2, - &vpx_idct32x32_1024_add_sse2, 32, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct16x16_sse2, - &vpx_idct16x16_256_add_sse2, 16, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 8, - 0, VPX_BITS_8), - make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_sse2, 4, - 0, VPX_BITS_8))); -#endif // CONFIG_VP9_HIGHBITDEPTH -#endif // !CONFIG_EMULATE_HARDWARE -#endif // HAVE_SSE2 - -#if !CONFIG_VP9_HIGHBITDEPTH -#if HAVE_SSSE3 && !CONFIG_EMULATE_HARDWARE -#if !ARCH_X86_64 -// TODO(johannkoenig): high bit depth fdct8x8. -INSTANTIATE_TEST_CASE_P( - SSSE3, TransDCT, - ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_sse2, - 32, 0, VPX_BITS_8), - make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_sse2, 8, 0, - VPX_BITS_8))); -#else -// vpx_fdct8x8_ssse3 is only available in 64 bit builds. -INSTANTIATE_TEST_CASE_P( - SSSE3, TransDCT, - ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_sse2, - 32, 0, VPX_BITS_8), - make_tuple(&vpx_fdct8x8_ssse3, &vpx_idct8x8_64_add_sse2, - 8, 0, VPX_BITS_8))); -#endif // !ARCH_X86_64 -#endif // HAVE_SSSE3 && !CONFIG_EMULATE_HARDWARE -#endif // !CONFIG_VP9_HIGHBITDEPTH - -#if !CONFIG_VP9_HIGHBITDEPTH && HAVE_AVX2 && !CONFIG_EMULATE_HARDWARE -// TODO(johannkoenig): high bit depth fdct32x32. -INSTANTIATE_TEST_CASE_P( - AVX2, TransDCT, ::testing::Values(make_tuple(&vpx_fdct32x32_avx2, - &vpx_idct32x32_1024_add_sse2, - 32, 0, VPX_BITS_8))); - -#endif // !CONFIG_VP9_HIGHBITDEPTH && HAVE_AVX2 && !CONFIG_EMULATE_HARDWARE - -#if HAVE_NEON -#if !CONFIG_EMULATE_HARDWARE -INSTANTIATE_TEST_CASE_P( - NEON, TransDCT, - ::testing::Values(make_tuple(&vpx_fdct32x32_neon, - &vpx_idct32x32_1024_add_neon, 32, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct16x16_neon, - &vpx_idct16x16_256_add_neon, 16, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct8x8_neon, &vpx_idct8x8_64_add_neon, 8, - 0, VPX_BITS_8), - make_tuple(&vpx_fdct4x4_neon, &vpx_idct4x4_16_add_neon, 4, - 0, VPX_BITS_8))); -#endif // !CONFIG_EMULATE_HARDWARE -#endif // HAVE_NEON - -#if HAVE_MSA -#if !CONFIG_VP9_HIGHBITDEPTH -#if !CONFIG_EMULATE_HARDWARE -INSTANTIATE_TEST_CASE_P( - MSA, TransDCT, - ::testing::Values( - make_tuple(&vpx_fdct32x32_msa, &vpx_idct32x32_1024_add_msa, 32, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct16x16_msa, &vpx_idct16x16_256_add_msa, 16, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct8x8_msa, &vpx_idct8x8_64_add_msa, 8, 0, VPX_BITS_8), - make_tuple(&vpx_fdct4x4_msa, &vpx_idct4x4_16_add_msa, 4, 0, - VPX_BITS_8))); -#endif // !CONFIG_EMULATE_HARDWARE -#endif // !CONFIG_VP9_HIGHBITDEPTH -#endif // HAVE_MSA - -#if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE -INSTANTIATE_TEST_CASE_P(VSX, TransDCT, - ::testing::Values(make_tuple(&vpx_fdct4x4_c, - &vpx_idct4x4_16_add_vsx, 4, - 0, VPX_BITS_8))); -#endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE - -class TransHT : public TransTestBase, public ::testing::TestWithParam { - public: - TransHT() { - fwd_txfm_ref = fht_ref; - fwd_txfm_ = GET_PARAM(0); - inv_txfm_ = GET_PARAM(1); - size_ = GET_PARAM(2); - tx_type_ = GET_PARAM(3); - bit_depth_ = GET_PARAM(4); - max_pixel_value_ = (1 << bit_depth_) - 1; - } - - protected: - void RunFwdTxfm(const Buffer &in, Buffer *out) { - fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride(), tx_type_); - } - - void RunInvTxfm(const Buffer &in, uint8_t *out) { - inv_txfm_(in.TopLeftPixel(), out, in.stride(), tx_type_); - } - - FhtFunc fwd_txfm_; - IhtFunc inv_txfm_; + { &fdct_wrapper, + &highbd_idct_wrapper, 4, 2 }, + { &fdct_wrapper, + &highbd_idct_wrapper, 8, 2 }, + { &fdct_wrapper, + &highbd_idct_wrapper, 16, 2 }, + { &fdct_wrapper, + &highbd_idct_wrapper, 32, 2 }, +#endif + { &fdct_wrapper, &idct_wrapper, 4, 1 }, + { &fdct_wrapper, &idct_wrapper, 8, 1 }, + { &fdct_wrapper, &idct_wrapper, 16, + 1 }, + { &fdct_wrapper, &idct_wrapper, 32, + 1 } }; -TEST_P(TransHT, AccuracyCheck) { RunAccuracyCheck(1); } +INSTANTIATE_TEST_CASE_P( + C, TransDCT, + ::testing::Combine( + ::testing::Range(0, static_cast(sizeof(dct_c_func_info) / + sizeof(dct_c_func_info[0]))), + ::testing::Values(dct_c_func_info), ::testing::Values(0), + ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12))); + +#if !CONFIG_EMULATE_HARDWARE + +#if HAVE_SSE2 +static const FuncInfo dct_sse2_func_info[] = { +#if CONFIG_VP9_HIGHBITDEPTH + { &fdct_wrapper, + &highbd_idct_wrapper, 4, 2 }, + { &fdct_wrapper, + &highbd_idct_wrapper, 8, 2 }, + { &fdct_wrapper, + &highbd_idct_wrapper, 16, 2 }, + { &fdct_wrapper, + &highbd_idct_wrapper, 32, 2 }, +#endif + { &fdct_wrapper, &idct_wrapper, 4, + 1 }, + { &fdct_wrapper, &idct_wrapper, 8, + 1 }, + { &fdct_wrapper, + &idct_wrapper, 16, 1 }, + { &fdct_wrapper, + &idct_wrapper, 32, 1 } +}; + +INSTANTIATE_TEST_CASE_P( + SSE2, TransDCT, + ::testing::Combine( + ::testing::Range(0, static_cast(sizeof(dct_sse2_func_info) / + sizeof(dct_sse2_func_info[0]))), + ::testing::Values(dct_sse2_func_info), ::testing::Values(0), + ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12))); +#endif // HAVE_SSE2 + +#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64 +// vpx_fdct8x8_ssse3 is only available in 64 bit builds. +static const FuncInfo dct_ssse3_func_info = { + &fdct_wrapper, &idct_wrapper, 8, 1 +}; + +// TODO(johannkoenig): high bit depth fdct8x8. +INSTANTIATE_TEST_CASE_P(SSSE3, TransDCT, + ::testing::Values(make_tuple(0, &dct_ssse3_func_info, 0, + VPX_BITS_8))); +#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64 + +#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH +static const FuncInfo dct_avx2_func_info = { + &fdct_wrapper, &idct_wrapper, + 32, 1 +}; + +// TODO(johannkoenig): high bit depth fdct32x32. +INSTANTIATE_TEST_CASE_P(AVX2, TransDCT, + ::testing::Values(make_tuple(0, &dct_avx2_func_info, 0, + VPX_BITS_8))); +#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH + +#if HAVE_NEON +static const FuncInfo dct_neon_func_info[4] = { + { &fdct_wrapper, &idct_wrapper, 4, + 1 }, + { &fdct_wrapper, &idct_wrapper, 8, + 1 }, + { &fdct_wrapper, + &idct_wrapper, 16, 1 }, + { &fdct_wrapper, + &idct_wrapper, 32, 1 } +}; + +INSTANTIATE_TEST_CASE_P( + NEON, TransDCT, + ::testing::Combine(::testing::Range(0, 4), + ::testing::Values(dct_neon_func_info), + ::testing::Values(0), ::testing::Values(VPX_BITS_8))); +#endif // HAVE_NEON + +#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH +static const FuncInfo dct_msa_func_info[4] = { + { &fdct_wrapper, &idct_wrapper, 4, + 1 }, + { &fdct_wrapper, &idct_wrapper, 8, + 1 }, + { &fdct_wrapper, &idct_wrapper, + 16, 1 }, + { &fdct_wrapper, &idct_wrapper, + 32, 1 } +}; + +INSTANTIATE_TEST_CASE_P(MSA, TransDCT, + ::testing::Combine(::testing::Range(0, 4), + ::testing::Values(dct_msa_func_info), + ::testing::Values(0), + ::testing::Values(VPX_BITS_8))); +#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH + +#if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH +static const FuncInfo dct_vsx_func_info = { + &fdct_wrapper, &idct_wrapper, 4, 1 +}; + +INSTANTIATE_TEST_CASE_P(VSX, TransDCT, + ::testing::Values(make_tuple(0, &dct_vsx_func_info, 0, + VPX_BITS_8))); +#endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && + +#endif // !CONFIG_EMULATE_HARDWARE + +/* -------------------------------------------------------------------------- */ + +class TransHT : public TransTestBase { + public: + TransHT() { fwd_txfm_ref = fht_ref; } +}; + +TEST_P(TransHT, AccuracyCheck) { + RunAccuracyCheck(size_ == 16 && bit_depth_ > 10 && pixel_size_ == 2 ? 2 : 1); +} TEST_P(TransHT, CoeffCheck) { RunCoeffCheck(); } @@ -593,117 +605,109 @@ TEST_P(TransHT, MemCheck) { RunMemCheck(); } TEST_P(TransHT, InvAccuracyCheck) { RunInvAccuracyCheck(1); } -/* TODO:(johannkoenig) Determine why these fail AccuracyCheck - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 0, VPX_BITS_12), - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 1, VPX_BITS_12), - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 2, VPX_BITS_12), - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 3, VPX_BITS_12), - */ +static const FuncInfo ht_c_func_info[] = { #if CONFIG_VP9_HIGHBITDEPTH + { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper, 4, + 2 }, + { &vp9_highbd_fht8x8_c, &highbd_iht_wrapper, 8, + 2 }, + { &vp9_highbd_fht16x16_c, &highbd_iht_wrapper, + 16, 2 }, +#endif + { &vp9_fht4x4_c, &iht_wrapper, 4, 1 }, + { &vp9_fht8x8_c, &iht_wrapper, 8, 1 }, + { &vp9_fht16x16_c, &iht_wrapper, 16, 1 } +}; + INSTANTIATE_TEST_CASE_P( C, TransHT, - ::testing::Values( - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 0, VPX_BITS_10), - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 1, VPX_BITS_10), - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 2, VPX_BITS_10), - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 3, VPX_BITS_10), - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 0, VPX_BITS_8), - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 1, VPX_BITS_8), - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 2, VPX_BITS_8), - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 3, VPX_BITS_8), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 0, VPX_BITS_10), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 1, VPX_BITS_10), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 2, VPX_BITS_10), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 3, VPX_BITS_10), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 0, VPX_BITS_12), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 1, VPX_BITS_12), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 2, VPX_BITS_12), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 3, VPX_BITS_12), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 0, VPX_BITS_8), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 1, VPX_BITS_8), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 2, VPX_BITS_8), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 3, VPX_BITS_8), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 0, VPX_BITS_10), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 1, VPX_BITS_10), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 2, VPX_BITS_10), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 3, VPX_BITS_10), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 0, VPX_BITS_12), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 1, VPX_BITS_12), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 2, VPX_BITS_12), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 3, VPX_BITS_12), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 0, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 1, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 2, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 3, VPX_BITS_8))); -#else + ::testing::Combine( + ::testing::Range(0, static_cast(sizeof(ht_c_func_info) / + sizeof(ht_c_func_info[0]))), + ::testing::Values(ht_c_func_info), ::testing::Range(0, 4), + ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12))); + +#if !CONFIG_EMULATE_HARDWARE + +#if HAVE_NEON + +static const FuncInfo ht_neon_func_info[] = { +#if CONFIG_VP9_HIGHBITDEPTH + { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper, 4, + 2 }, + { &vp9_highbd_fht8x8_c, &highbd_iht_wrapper, 8, + 2 }, + { &vp9_highbd_fht16x16_c, + &highbd_iht_wrapper, 16, 2 }, +#endif + { &vp9_fht4x4_c, &iht_wrapper, 4, 1 }, + { &vp9_fht8x8_c, &iht_wrapper, 8, 1 }, + { &vp9_fht16x16_c, &iht_wrapper, 16, 1 } +}; + INSTANTIATE_TEST_CASE_P( - C, TransHT, - ::testing::Values( - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 0, VPX_BITS_8), - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 1, VPX_BITS_8), - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 2, VPX_BITS_8), - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 3, VPX_BITS_8), - - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 0, VPX_BITS_8), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 1, VPX_BITS_8), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 2, VPX_BITS_8), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 3, VPX_BITS_8), - - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 0, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 1, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 2, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 3, VPX_BITS_8))); -#endif // CONFIG_VP9_HIGHBITDEPTH + NEON, TransHT, + ::testing::Combine( + ::testing::Range(0, static_cast(sizeof(ht_neon_func_info) / + sizeof(ht_neon_func_info[0]))), + ::testing::Values(ht_neon_func_info), ::testing::Range(0, 4), + ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12))); +#endif // HAVE_NEON #if HAVE_SSE2 -INSTANTIATE_TEST_CASE_P( - SSE2, TransHT, - ::testing::Values( - make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 0, - VPX_BITS_8), - make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 1, - VPX_BITS_8), - make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 2, - VPX_BITS_8), - make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 3, - VPX_BITS_8), - make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 0, VPX_BITS_8), - make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 1, VPX_BITS_8), - make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 2, VPX_BITS_8), - make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 3, VPX_BITS_8), +static const FuncInfo ht_sse2_func_info[3] = { + { &vp9_fht4x4_sse2, &iht_wrapper, 4, 1 }, + { &vp9_fht8x8_sse2, &iht_wrapper, 8, 1 }, + { &vp9_fht16x16_sse2, &iht_wrapper, 16, 1 } +}; - make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 0, VPX_BITS_8), - make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 1, VPX_BITS_8), - make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 2, VPX_BITS_8), - make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 3, - VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(SSE2, TransHT, + ::testing::Combine(::testing::Range(0, 3), + ::testing::Values(ht_sse2_func_info), + ::testing::Range(0, 4), + ::testing::Values(VPX_BITS_8))); #endif // HAVE_SSE2 -class TransWHT : public TransTestBase, - public ::testing::TestWithParam { +#if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH +static const FuncInfo ht_sse4_1_func_info[3] = { + { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper, + 4, 2 }, + { vp9_highbd_fht8x8_c, &highbd_iht_wrapper, + 8, 2 }, + { &vp9_highbd_fht16x16_c, + &highbd_iht_wrapper, 16, 2 } +}; + +INSTANTIATE_TEST_CASE_P( + SSE4_1, TransHT, + ::testing::Combine(::testing::Range(0, 3), + ::testing::Values(ht_sse4_1_func_info), + ::testing::Range(0, 4), + ::testing::Values(VPX_BITS_8, VPX_BITS_10, + VPX_BITS_12))); +#endif // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH + +#if HAVE_VSX && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH +static const FuncInfo ht_vsx_func_info[3] = { + { &vp9_fht4x4_c, &iht_wrapper, 4, 1 }, + { &vp9_fht8x8_c, &iht_wrapper, 8, 1 }, + { &vp9_fht16x16_c, &iht_wrapper, 16, 1 } +}; + +INSTANTIATE_TEST_CASE_P(VSX, TransHT, + ::testing::Combine(::testing::Range(0, 3), + ::testing::Values(ht_vsx_func_info), + ::testing::Range(0, 4), + ::testing::Values(VPX_BITS_8))); +#endif // HAVE_VSX +#endif // !CONFIG_EMULATE_HARDWARE + +/* -------------------------------------------------------------------------- */ + +class TransWHT : public TransTestBase { public: - TransWHT() { - fwd_txfm_ref = fwht_ref; - fwd_txfm_ = GET_PARAM(0); - inv_txfm_ = GET_PARAM(1); - size_ = GET_PARAM(2); - tx_type_ = GET_PARAM(3); - bit_depth_ = GET_PARAM(4); - max_pixel_value_ = (1 << bit_depth_) - 1; - } - - protected: - void RunFwdTxfm(const Buffer &in, Buffer *out) { - fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride()); - } - - void RunInvTxfm(const Buffer &in, uint8_t *out) { - inv_txfm_(in.TopLeftPixel(), out, in.stride()); - } - - FdctFunc fwd_txfm_; - IdctFunc inv_txfm_; + TransWHT() { fwd_txfm_ref = fwht_ref; } }; TEST_P(TransWHT, AccuracyCheck) { RunAccuracyCheck(0); } @@ -714,24 +718,39 @@ TEST_P(TransWHT, MemCheck) { RunMemCheck(); } TEST_P(TransWHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); } +static const FuncInfo wht_c_func_info[] = { #if CONFIG_VP9_HIGHBITDEPTH + { &fdct_wrapper, + &highbd_idct_wrapper, 4, 2 }, +#endif + { &fdct_wrapper, &idct_wrapper, 4, 1 } +}; + INSTANTIATE_TEST_CASE_P( C, TransWHT, - ::testing::Values( - make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 4, 0, VPX_BITS_10), - make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 4, 0, VPX_BITS_12), - make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 4, 0, VPX_BITS_8))); -#else -INSTANTIATE_TEST_CASE_P(C, TransWHT, - ::testing::Values(make_tuple(&vp9_fwht4x4_c, - &vpx_iwht4x4_16_add_c, 4, - 0, VPX_BITS_8))); -#endif // CONFIG_VP9_HIGHBITDEPTH + ::testing::Combine( + ::testing::Range(0, static_cast(sizeof(wht_c_func_info) / + sizeof(wht_c_func_info[0]))), + ::testing::Values(wht_c_func_info), ::testing::Values(0), + ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12))); + +#if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE +static const FuncInfo wht_sse2_func_info = { + &fdct_wrapper, &idct_wrapper, 4, 1 +}; -#if HAVE_SSE2 INSTANTIATE_TEST_CASE_P(SSE2, TransWHT, - ::testing::Values(make_tuple(&vp9_fwht4x4_sse2, - &vpx_iwht4x4_16_add_sse2, - 4, 0, VPX_BITS_8))); -#endif // HAVE_SSE2 + ::testing::Values(make_tuple(0, &wht_sse2_func_info, 0, + VPX_BITS_8))); +#endif // HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE + +#if HAVE_VSX && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH +static const FuncInfo wht_vsx_func_info = { + &fdct_wrapper, &idct_wrapper, 4, 1 +}; + +INSTANTIATE_TEST_CASE_P(VSX, TransWHT, + ::testing::Values(make_tuple(0, &wht_vsx_func_info, 0, + VPX_BITS_8))); +#endif // HAVE_VSX && !CONFIG_EMULATE_HARDWARE } // namespace diff --git a/media/libvpx/libvpx/test/decode_api_test.cc b/media/libvpx/libvpx/test/decode_api_test.cc index 4167cf3e0f07..d4b67ccdb8e6 100644 --- a/media/libvpx/libvpx/test/decode_api_test.cc +++ b/media/libvpx/libvpx/test/decode_api_test.cc @@ -138,8 +138,30 @@ TEST(DecodeAPI, Vp9InvalidDecode) { EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec)); } -TEST(DecodeAPI, Vp9PeekSI) { +void TestPeekInfo(const uint8_t *const data, uint32_t data_sz, + uint32_t peek_size) { const vpx_codec_iface_t *const codec = &vpx_codec_vp9_dx_algo; + // Verify behavior of vpx_codec_decode. vpx_codec_decode doesn't even get + // to decoder_peek_si_internal on frames of size < 8. + if (data_sz >= 8) { + vpx_codec_ctx_t dec; + EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, codec, NULL, 0)); + EXPECT_EQ((data_sz < peek_size) ? VPX_CODEC_UNSUP_BITSTREAM + : VPX_CODEC_CORRUPT_FRAME, + vpx_codec_decode(&dec, data, data_sz, NULL, 0)); + vpx_codec_iter_t iter = NULL; + EXPECT_EQ(NULL, vpx_codec_get_frame(&dec, &iter)); + EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec)); + } + + // Verify behavior of vpx_codec_peek_stream_info. + vpx_codec_stream_info_t si; + si.sz = sizeof(si); + EXPECT_EQ((data_sz < peek_size) ? VPX_CODEC_UNSUP_BITSTREAM : VPX_CODEC_OK, + vpx_codec_peek_stream_info(codec, data, data_sz, &si)); +} + +TEST(DecodeAPI, Vp9PeekStreamInfo) { // The first 9 bytes are valid and the rest of the bytes are made up. Until // size 10, this should return VPX_CODEC_UNSUP_BITSTREAM and after that it // should return VPX_CODEC_CORRUPT_FRAME. @@ -150,24 +172,18 @@ TEST(DecodeAPI, Vp9PeekSI) { }; for (uint32_t data_sz = 1; data_sz <= 32; ++data_sz) { - // Verify behavior of vpx_codec_decode. vpx_codec_decode doesn't even get - // to decoder_peek_si_internal on frames of size < 8. - if (data_sz >= 8) { - vpx_codec_ctx_t dec; - EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, codec, NULL, 0)); - EXPECT_EQ( - (data_sz < 10) ? VPX_CODEC_UNSUP_BITSTREAM : VPX_CODEC_CORRUPT_FRAME, - vpx_codec_decode(&dec, data, data_sz, NULL, 0)); - vpx_codec_iter_t iter = NULL; - EXPECT_EQ(NULL, vpx_codec_get_frame(&dec, &iter)); - EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec)); - } + TestPeekInfo(data, data_sz, 10); + } +} - // Verify behavior of vpx_codec_peek_stream_info. - vpx_codec_stream_info_t si; - si.sz = sizeof(si); - EXPECT_EQ((data_sz < 10) ? VPX_CODEC_UNSUP_BITSTREAM : VPX_CODEC_OK, - vpx_codec_peek_stream_info(codec, data, data_sz, &si)); +TEST(DecodeAPI, Vp9PeekStreamInfoTruncated) { + // This profile 1 header requires 10.25 bytes, ensure + // vpx_codec_peek_stream_info doesn't over read. + const uint8_t profile1_data[10] = { 0xa4, 0xe9, 0x30, 0x68, 0x53, + 0xe9, 0x30, 0x68, 0x53, 0x04 }; + + for (uint32_t data_sz = 1; data_sz <= 10; ++data_sz) { + TestPeekInfo(profile1_data, data_sz, 11); } } #endif // CONFIG_VP9_DECODER diff --git a/media/libvpx/libvpx/test/decode_corrupted.cc b/media/libvpx/libvpx/test/decode_corrupted.cc new file mode 100644 index 000000000000..b1495ce89ff5 --- /dev/null +++ b/media/libvpx/libvpx/test/decode_corrupted.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/i420_video_source.h" +#include "vpx_mem/vpx_mem.h" + +namespace { + +class DecodeCorruptedFrameTest + : public ::libvpx_test::EncoderTest, + public ::testing::TestWithParam< + std::tuple > { + public: + DecodeCorruptedFrameTest() : EncoderTest(GET_PARAM(0)) {} + + protected: + virtual ~DecodeCorruptedFrameTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + cfg_.g_lag_in_frames = 0; + cfg_.rc_end_usage = VPX_CBR; + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + + // Set small key frame distance such that we insert more key frames. + cfg_.kf_max_dist = 3; + dec_cfg_.threads = 1; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 0) encoder->Control(VP8E_SET_CPUUSED, 7); + } + + virtual void MismatchHook(const vpx_image_t * /*img1*/, + const vpx_image_t * /*img2*/) {} + + virtual const vpx_codec_cx_pkt_t *MutateEncoderOutputHook( + const vpx_codec_cx_pkt_t *pkt) { + // Don't edit frame packet on key frame. + if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) return pkt; + if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return pkt; + + memcpy(&modified_pkt_, pkt, sizeof(*pkt)); + + // Halve the size so it's corrupted to decoder. + modified_pkt_.data.frame.sz = modified_pkt_.data.frame.sz / 2; + + return &modified_pkt_; + } + + virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec, + const libvpx_test::VideoSource & /*video*/, + libvpx_test::Decoder *decoder) { + EXPECT_NE(res_dec, VPX_CODEC_MEM_ERROR) << decoder->DecodeError(); + return VPX_CODEC_MEM_ERROR != res_dec; + } + + vpx_codec_cx_pkt_t modified_pkt_; +}; + +TEST_P(DecodeCorruptedFrameTest, DecodeCorruptedFrame) { + cfg_.rc_target_bitrate = 200; + cfg_.g_error_resilient = 0; + + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 300); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +#if CONFIG_VP9 +INSTANTIATE_TEST_CASE_P( + VP9, DecodeCorruptedFrameTest, + ::testing::Values( + static_cast(&libvpx_test::kVP9))); +#endif // CONFIG_VP9 + +#if CONFIG_VP8 +INSTANTIATE_TEST_CASE_P( + VP8, DecodeCorruptedFrameTest, + ::testing::Values( + static_cast(&libvpx_test::kVP8))); +#endif // CONFIG_VP8 + +} // namespace diff --git a/media/libvpx/libvpx/test/decode_perf_test.cc b/media/libvpx/libvpx/test/decode_perf_test.cc index ee26c3c04646..aecdd3e99931 100644 --- a/media/libvpx/libvpx/test/decode_perf_test.cc +++ b/media/libvpx/libvpx/test/decode_perf_test.cc @@ -9,6 +9,8 @@ */ #include +#include + #include "test/codec_factory.h" #include "test/decode_test_driver.h" #include "test/encode_test_driver.h" @@ -21,7 +23,7 @@ #include "./ivfenc.h" #include "./vpx_version.h" -using std::tr1::make_tuple; +using std::make_tuple; namespace { @@ -34,7 +36,7 @@ const char kNewEncodeOutputFile[] = "new_encode.ivf"; /* DecodePerfTest takes a tuple of filename + number of threads to decode with */ -typedef std::tr1::tuple DecodePerfParam; +typedef std::tuple DecodePerfParam; const DecodePerfParam kVP9DecodePerfVectors[] = { make_tuple("vp90-2-bbb_426x240_tile_1x1_180kbps.webm", 1), @@ -137,7 +139,7 @@ class VP9NewEncodeDecodePerfTest virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { - if (video->frame() == 1) { + if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, speed_); encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1); encoder->Control(VP9E_SET_TILE_COLUMNS, 2); diff --git a/media/libvpx/libvpx/test/decode_svc_test.cc b/media/libvpx/libvpx/test/decode_svc_test.cc index 69f62f13bde3..c6f0873f8973 100644 --- a/media/libvpx/libvpx/test/decode_svc_test.cc +++ b/media/libvpx/libvpx/test/decode_svc_test.cc @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include #include "test/codec_factory.h" @@ -53,7 +54,7 @@ class DecodeSvcTest : public ::libvpx_test::DecoderTest, // number of frames decoded. This results in 1/4x1/4 resolution (320x180). TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer0) { const std::string filename = GET_PARAM(1); - testing::internal::scoped_ptr video; + std::unique_ptr video; video.reset(new libvpx_test::IVFVideoSource(filename)); ASSERT_TRUE(video.get() != NULL); video->Init(); @@ -70,7 +71,7 @@ TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer0) { // number of frames decoded. This results in 1/2x1/2 resolution (640x360). TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer1) { const std::string filename = GET_PARAM(1); - testing::internal::scoped_ptr video; + std::unique_ptr video; video.reset(new libvpx_test::IVFVideoSource(filename)); ASSERT_TRUE(video.get() != NULL); video->Init(); @@ -87,7 +88,7 @@ TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer1) { // number of frames decoded. This results in the full resolution (1280x720). TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer2) { const std::string filename = GET_PARAM(1); - testing::internal::scoped_ptr video; + std::unique_ptr video; video.reset(new libvpx_test::IVFVideoSource(filename)); ASSERT_TRUE(video.get() != NULL); video->Init(); @@ -105,7 +106,7 @@ TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer2) { // the decoding should result in the full resolution (1280x720). TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer10) { const std::string filename = GET_PARAM(1); - testing::internal::scoped_ptr video; + std::unique_ptr video; video.reset(new libvpx_test::IVFVideoSource(filename)); ASSERT_TRUE(video.get() != NULL); video->Init(); diff --git a/media/libvpx/libvpx/test/decode_test_driver.cc b/media/libvpx/libvpx/test/decode_test_driver.cc index 48680eb8e975..ae23587759d3 100644 --- a/media/libvpx/libvpx/test/decode_test_driver.cc +++ b/media/libvpx/libvpx/test/decode_test_driver.cc @@ -52,9 +52,10 @@ void DecoderTest::HandlePeekResult(Decoder *const decoder, /* Vp8's implementation of PeekStream returns an error if the frame you * pass it is not a keyframe, so we only expect VPX_CODEC_OK on the first * frame, which must be a keyframe. */ - if (video->frame_number() == 0) + if (video->frame_number() == 0) { ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: " << vpx_codec_err_to_string(res_peek); + } } else { /* The Vp9 implementation of PeekStream returns an error only if the * data passed to it isn't a valid Vp9 chunk. */ @@ -97,7 +98,7 @@ void DecoderTest::RunLoop(CompressedVideoSource *video, const vpx_image_t *img = NULL; // Get decompressed data - while ((img = dec_iter.Next())) { + while (!::testing::Test::HasFailure() && (img = dec_iter.Next())) { DecompressedFrameHook(*img, video->frame_number()); } } diff --git a/media/libvpx/libvpx/test/decode_test_driver.h b/media/libvpx/libvpx/test/decode_test_driver.h index 644fc9e90dc1..04876cdd7cb6 100644 --- a/media/libvpx/libvpx/test/decode_test_driver.h +++ b/media/libvpx/libvpx/test/decode_test_driver.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_DECODE_TEST_DRIVER_H_ -#define TEST_DECODE_TEST_DRIVER_H_ +#ifndef VPX_TEST_DECODE_TEST_DRIVER_H_ +#define VPX_TEST_DECODE_TEST_DRIVER_H_ #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" @@ -159,4 +159,4 @@ class DecoderTest { } // namespace libvpx_test -#endif // TEST_DECODE_TEST_DRIVER_H_ +#endif // VPX_TEST_DECODE_TEST_DRIVER_H_ diff --git a/media/libvpx/libvpx/test/encode_perf_test.cc b/media/libvpx/libvpx/test/encode_perf_test.cc index 0bb435502b39..142d9e2da8ef 100644 --- a/media/libvpx/libvpx/test/encode_perf_test.cc +++ b/media/libvpx/libvpx/test/encode_perf_test.cc @@ -48,7 +48,7 @@ const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = { EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470), }; -const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8 }; +const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8, 9 }; const int kEncodePerfTestThreads[] = { 1, 2, 4 }; #define NELEMENTS(x) (sizeof((x)) / sizeof((x)[0])) diff --git a/media/libvpx/libvpx/test/encode_test_driver.cc b/media/libvpx/libvpx/test/encode_test_driver.cc index b2cbc3f05bdf..8fdbdb62ae7d 100644 --- a/media/libvpx/libvpx/test/encode_test_driver.cc +++ b/media/libvpx/libvpx/test/encode_test_driver.cc @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -128,6 +129,8 @@ static bool compare_img(const vpx_image_t *img1, const vpx_image_t *img2) { bool match = (img1->fmt == img2->fmt) && (img1->cs == img2->cs) && (img1->d_w == img2->d_w) && (img1->d_h == img2->d_h); + if (!match) return false; + const unsigned int width_y = img1->d_w; const unsigned int height_y = img1->d_h; unsigned int i; @@ -177,7 +180,7 @@ void EncoderTest::RunLoop(VideoSource *video) { } BeginPassHook(pass); - testing::internal::scoped_ptr encoder( + std::unique_ptr encoder( codec_->CreateEncoder(cfg_, deadline_, init_flags_, &stats_)); ASSERT_TRUE(encoder.get() != NULL); @@ -191,7 +194,7 @@ void EncoderTest::RunLoop(VideoSource *video) { if (init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION) { dec_init_flags |= VPX_CODEC_USE_INPUT_FRAGMENTS; } - testing::internal::scoped_ptr decoder( + std::unique_ptr decoder( codec_->CreateDecoder(dec_cfg, dec_init_flags)); bool again; for (again = true; again; video->Next()) { @@ -214,6 +217,7 @@ void EncoderTest::RunLoop(VideoSource *video) { case VPX_CODEC_CX_FRAME_PKT: has_cxdata = true; if (decoder.get() != NULL && DoDecode()) { + PreDecodeFrameHook(video, decoder.get()); vpx_codec_err_t res_dec = decoder->DecodeFrame( (const uint8_t *)pkt->data.frame.buf, pkt->data.frame.sz); diff --git a/media/libvpx/libvpx/test/encode_test_driver.h b/media/libvpx/libvpx/test/encode_test_driver.h index 89a3b1767e27..3edba4b926a5 100644 --- a/media/libvpx/libvpx/test/encode_test_driver.h +++ b/media/libvpx/libvpx/test/encode_test_driver.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_ENCODE_TEST_DRIVER_H_ -#define TEST_ENCODE_TEST_DRIVER_H_ +#ifndef VPX_TEST_ENCODE_TEST_DRIVER_H_ +#define VPX_TEST_ENCODE_TEST_DRIVER_H_ #include #include @@ -128,24 +128,37 @@ class Encoder { ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } + void Control(int ctrl_id, struct vpx_svc_ref_frame_config *arg) { + const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); + ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); + } + void Control(int ctrl_id, struct vpx_svc_parameters *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } + + void Control(int ctrl_id, struct vpx_svc_frame_drop *arg) { + const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); + ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); + } + + void Control(int ctrl_id, struct vpx_svc_spatial_layer_sync *arg) { + const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); + ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); + } + #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER void Control(int ctrl_id, vpx_active_map_t *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } -#endif -#if CONFIG_VP8_ENCODER void Control(int ctrl_id, vpx_roi_map_t *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } #endif - void Config(const vpx_codec_enc_cfg_t *cfg) { const vpx_codec_err_t res = vpx_codec_enc_config_set(&encoder_, cfg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); @@ -219,6 +232,9 @@ class EncoderTest { virtual void PreEncodeFrameHook(VideoSource * /*video*/, Encoder * /*encoder*/) {} + virtual void PreDecodeFrameHook(VideoSource * /*video*/, + Decoder * /*decoder*/) {} + virtual void PostEncodeFrameHook(Encoder * /*encoder*/) {} // Hook to be called on every compressed data packet. @@ -273,4 +289,4 @@ class EncoderTest { } // namespace libvpx_test -#endif // TEST_ENCODE_TEST_DRIVER_H_ +#endif // VPX_TEST_ENCODE_TEST_DRIVER_H_ diff --git a/media/libvpx/libvpx/test/external_frame_buffer_test.cc b/media/libvpx/libvpx/test/external_frame_buffer_test.cc index dbf2971198c8..438eeb3ecdf5 100644 --- a/media/libvpx/libvpx/test/external_frame_buffer_test.cc +++ b/media/libvpx/libvpx/test/external_frame_buffer_test.cc @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include #include "./vpx_config.h" @@ -113,9 +114,9 @@ class ExternalFrameBufferList { return 0; } - // Checks that the ximage data is contained within the external frame buffer - // private data passed back in the ximage. - void CheckXImageFrameBuffer(const vpx_image_t *img) { + // Checks that the vpx_image_t data is contained within the external frame + // buffer private data passed back in the vpx_image_t. + void CheckImageFrameBuffer(const vpx_image_t *img) { if (img->fb_priv != NULL) { const struct ExternalFrameBuffer *const ext_fb = reinterpret_cast(img->fb_priv); @@ -335,14 +336,13 @@ class ExternalFrameBufferTest : public ::testing::Test { return VPX_CODEC_OK; } - protected: void CheckDecodedFrames() { libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData(); const vpx_image_t *img = NULL; // Get decompressed data while ((img = dec_iter.Next()) != NULL) { - fb_list_.CheckXImageFrameBuffer(img); + fb_list_.CheckImageFrameBuffer(img); } } @@ -393,7 +393,7 @@ TEST_P(ExternalFrameBufferMD5Test, ExtFBMD5Match) { #endif // Open compressed video file. - testing::internal::scoped_ptr video; + std::unique_ptr video; if (filename.substr(filename.length() - 3, 3) == "ivf") { video.reset(new libvpx_test::IVFVideoSource(filename)); } else { diff --git a/media/libvpx/libvpx/test/fdct8x8_test.cc b/media/libvpx/libvpx/test/fdct8x8_test.cc index 5021dda9b38f..1d4f31871f0c 100644 --- a/media/libvpx/libvpx/test/fdct8x8_test.cc +++ b/media/libvpx/libvpx/test/fdct8x8_test.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -43,9 +44,9 @@ typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride, typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, int tx_type); -typedef std::tr1::tuple Dct8x8Param; -typedef std::tr1::tuple Ht8x8Param; -typedef std::tr1::tuple Idct8x8Param; +typedef std::tuple Dct8x8Param; +typedef std::tuple Ht8x8Param; +typedef std::tuple Idct8x8Param; void reference_8x8_dct_1d(const double in[8], double out[8]) { const double kInvSqrt2 = 0.707106781186547524400844362104; @@ -628,7 +629,7 @@ TEST_P(InvTrans8x8DCT, CompareReference) { CompareInvReference(ref_txfm_, thresh_); } -using std::tr1::make_tuple; +using std::make_tuple; #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( @@ -675,6 +676,7 @@ INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT, ::testing::Values(make_tuple(&vpx_fdct8x8_neon, &vpx_idct8x8_64_add_neon, 0, VPX_BITS_8))); + #if !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( NEON, FwdTrans8x8HT, @@ -735,7 +737,7 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12))); #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE -#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \ +#if HAVE_SSSE3 && VPX_ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \ !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P(SSSE3, FwdTrans8x8DCT, ::testing::Values(make_tuple(&vpx_fdct8x8_ssse3, diff --git a/media/libvpx/libvpx/test/frame_size_tests.cc b/media/libvpx/libvpx/test/frame_size_tests.cc index 5a9b166e5b1d..f66972b4a19a 100644 --- a/media/libvpx/libvpx/test/frame_size_tests.cc +++ b/media/libvpx/libvpx/test/frame_size_tests.cc @@ -34,7 +34,7 @@ class VP9FrameSizeTestsLarge : public ::libvpx_test::EncoderTest, virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { - if (video->frame() == 1) { + if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, 7); encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); diff --git a/media/libvpx/libvpx/test/hadamard_test.cc b/media/libvpx/libvpx/test/hadamard_test.cc index 3b7cfeddcfe3..6b7aae3d50e9 100644 --- a/media/libvpx/libvpx/test/hadamard_test.cc +++ b/media/libvpx/libvpx/test/hadamard_test.cc @@ -25,13 +25,13 @@ using ::libvpx_test::ACMRandom; typedef void (*HadamardFunc)(const int16_t *a, ptrdiff_t a_stride, tran_low_t *b); -void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) { - int16_t b[8]; +void hadamard_loop(const tran_low_t *a, tran_low_t *out) { + tran_low_t b[8]; for (int i = 0; i < 8; i += 2) { - b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride]; - b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride]; + b[i + 0] = a[i * 8] + a[(i + 1) * 8]; + b[i + 1] = a[i * 8] - a[(i + 1) * 8]; } - int16_t c[8]; + tran_low_t c[8]; for (int i = 0; i < 8; i += 4) { c[i + 0] = b[i + 0] + b[i + 2]; c[i + 1] = b[i + 1] + b[i + 3]; @@ -49,12 +49,15 @@ void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) { } void reference_hadamard8x8(const int16_t *a, int a_stride, tran_low_t *b) { - int16_t buf[64]; - int16_t buf2[64]; - for (int i = 0; i < 8; ++i) hadamard_loop(a + i, a_stride, buf + i * 8); - for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, 8, buf2 + i * 8); - - for (int i = 0; i < 64; ++i) b[i] = (tran_low_t)buf2[i]; + tran_low_t input[64]; + tran_low_t buf[64]; + for (int i = 0; i < 8; ++i) { + for (int j = 0; j < 8; ++j) { + input[i * 8 + j] = static_cast(a[i * a_stride + j]); + } + } + for (int i = 0; i < 8; ++i) hadamard_loop(input + i, buf + i * 8); + for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, b + i * 8); } void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) { @@ -89,205 +92,229 @@ void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) { } } -class HadamardTestBase : public ::testing::TestWithParam { +void reference_hadamard32x32(const int16_t *a, int a_stride, tran_low_t *b) { + reference_hadamard16x16(a + 0 + 0 * a_stride, a_stride, b + 0); + reference_hadamard16x16(a + 16 + 0 * a_stride, a_stride, b + 256); + reference_hadamard16x16(a + 0 + 16 * a_stride, a_stride, b + 512); + reference_hadamard16x16(a + 16 + 16 * a_stride, a_stride, b + 768); + + for (int i = 0; i < 256; ++i) { + const tran_low_t a0 = b[0]; + const tran_low_t a1 = b[256]; + const tran_low_t a2 = b[512]; + const tran_low_t a3 = b[768]; + + const tran_low_t b0 = (a0 + a1) >> 2; + const tran_low_t b1 = (a0 - a1) >> 2; + const tran_low_t b2 = (a2 + a3) >> 2; + const tran_low_t b3 = (a2 - a3) >> 2; + + b[0] = b0 + b2; + b[256] = b1 + b3; + b[512] = b0 - b2; + b[768] = b1 - b3; + + ++b; + } +} + +struct HadamardFuncWithSize { + HadamardFuncWithSize(HadamardFunc f, int s) : func(f), block_size(s) {} + HadamardFunc func; + int block_size; +}; + +std::ostream &operator<<(std::ostream &os, const HadamardFuncWithSize &hfs) { + return os << "block size: " << hfs.block_size; +} + +class HadamardTestBase : public ::testing::TestWithParam { public: virtual void SetUp() { - h_func_ = GetParam(); + h_func_ = GetParam().func; + bwh_ = GetParam().block_size; + block_size_ = bwh_ * bwh_; rnd_.Reset(ACMRandom::DeterministicSeed()); } + virtual int16_t Rand() = 0; + + void ReferenceHadamard(const int16_t *a, int a_stride, tran_low_t *b, + int bwh) { + if (bwh == 32) + reference_hadamard32x32(a, a_stride, b); + else if (bwh == 16) + reference_hadamard16x16(a, a_stride, b); + else + reference_hadamard8x8(a, a_stride, b); + } + + void CompareReferenceRandom() { + const int kMaxBlockSize = 32 * 32; + DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]); + DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]); + memset(a, 0, sizeof(a)); + memset(b, 0, sizeof(b)); + + tran_low_t b_ref[kMaxBlockSize]; + memset(b_ref, 0, sizeof(b_ref)); + + for (int i = 0; i < block_size_; ++i) a[i] = Rand(); + + ReferenceHadamard(a, bwh_, b_ref, bwh_); + ASM_REGISTER_STATE_CHECK(h_func_(a, bwh_, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + block_size_); + std::sort(b_ref, b_ref + block_size_); + EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); + } + + void VaryStride() { + const int kMaxBlockSize = 32 * 32; + DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize * 8]); + DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]); + memset(a, 0, sizeof(a)); + for (int i = 0; i < block_size_ * 8; ++i) a[i] = Rand(); + + tran_low_t b_ref[kMaxBlockSize]; + for (int i = 8; i < 64; i += 8) { + memset(b, 0, sizeof(b)); + memset(b_ref, 0, sizeof(b_ref)); + + ReferenceHadamard(a, i, b_ref, bwh_); + ASM_REGISTER_STATE_CHECK(h_func_(a, i, b)); + + // The order of the output is not important. Sort before checking. + std::sort(b, b + block_size_); + std::sort(b_ref, b_ref + block_size_); + EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); + } + } + + void SpeedTest(int times) { + const int kMaxBlockSize = 32 * 32; + DECLARE_ALIGNED(16, int16_t, input[kMaxBlockSize]); + DECLARE_ALIGNED(16, tran_low_t, output[kMaxBlockSize]); + memset(input, 1, sizeof(input)); + memset(output, 0, sizeof(output)); + + vpx_usec_timer timer; + vpx_usec_timer_start(&timer); + for (int i = 0; i < times; ++i) { + h_func_(input, bwh_, output); + } + vpx_usec_timer_mark(&timer); + + const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); + printf("Hadamard%dx%d[%12d runs]: %d us\n", bwh_, bwh_, times, + elapsed_time); + } + protected: + int bwh_; + int block_size_; HadamardFunc h_func_; ACMRandom rnd_; }; -void HadamardSpeedTest(const char *name, HadamardFunc const func, - const int16_t *input, int stride, tran_low_t *output, - int times) { - int i; - vpx_usec_timer timer; +class HadamardLowbdTest : public HadamardTestBase { + protected: + virtual int16_t Rand() { return rnd_.Rand9Signed(); } +}; - vpx_usec_timer_start(&timer); - for (i = 0; i < times; ++i) { - func(input, stride, output); - } - vpx_usec_timer_mark(&timer); +TEST_P(HadamardLowbdTest, CompareReferenceRandom) { CompareReferenceRandom(); } - const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); - printf("%s[%12d runs]: %d us\n", name, times, elapsed_time); +TEST_P(HadamardLowbdTest, VaryStride) { VaryStride(); } + +TEST_P(HadamardLowbdTest, DISABLED_Speed) { + SpeedTest(10); + SpeedTest(10000); + SpeedTest(10000000); } -class Hadamard8x8Test : public HadamardTestBase {}; - -void HadamardSpeedTest8x8(HadamardFunc const func, int times) { - DECLARE_ALIGNED(16, int16_t, input[64]); - DECLARE_ALIGNED(16, tran_low_t, output[64]); - memset(input, 1, sizeof(input)); - HadamardSpeedTest("Hadamard8x8", func, input, 8, output, times); -} - -TEST_P(Hadamard8x8Test, CompareReferenceRandom) { - DECLARE_ALIGNED(16, int16_t, a[64]); - DECLARE_ALIGNED(16, tran_low_t, b[64]); - tran_low_t b_ref[64]; - for (int i = 0; i < 64; ++i) { - a[i] = rnd_.Rand9Signed(); - } - memset(b, 0, sizeof(b)); - memset(b_ref, 0, sizeof(b_ref)); - - reference_hadamard8x8(a, 8, b_ref); - ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b)); - - // The order of the output is not important. Sort before checking. - std::sort(b, b + 64); - std::sort(b_ref, b_ref + 64); - EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); -} - -TEST_P(Hadamard8x8Test, VaryStride) { - DECLARE_ALIGNED(16, int16_t, a[64 * 8]); - DECLARE_ALIGNED(16, tran_low_t, b[64]); - tran_low_t b_ref[64]; - for (int i = 0; i < 64 * 8; ++i) { - a[i] = rnd_.Rand9Signed(); - } - - for (int i = 8; i < 64; i += 8) { - memset(b, 0, sizeof(b)); - memset(b_ref, 0, sizeof(b_ref)); - - reference_hadamard8x8(a, i, b_ref); - ASM_REGISTER_STATE_CHECK(h_func_(a, i, b)); - - // The order of the output is not important. Sort before checking. - std::sort(b, b + 64); - std::sort(b_ref, b_ref + 64); - EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); - } -} - -TEST_P(Hadamard8x8Test, DISABLED_Speed) { - HadamardSpeedTest8x8(h_func_, 10); - HadamardSpeedTest8x8(h_func_, 10000); - HadamardSpeedTest8x8(h_func_, 10000000); -} - -INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test, - ::testing::Values(&vpx_hadamard_8x8_c)); +INSTANTIATE_TEST_CASE_P( + C, HadamardLowbdTest, + ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_c, 8), + HadamardFuncWithSize(&vpx_hadamard_16x16_c, 16), + HadamardFuncWithSize(&vpx_hadamard_32x32_c, 32))); #if HAVE_SSE2 -INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test, - ::testing::Values(&vpx_hadamard_8x8_sse2)); +INSTANTIATE_TEST_CASE_P( + SSE2, HadamardLowbdTest, + ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_sse2, 8), + HadamardFuncWithSize(&vpx_hadamard_16x16_sse2, 16), + HadamardFuncWithSize(&vpx_hadamard_32x32_sse2, 32))); #endif // HAVE_SSE2 -#if HAVE_SSSE3 && ARCH_X86_64 -INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test, - ::testing::Values(&vpx_hadamard_8x8_ssse3)); -#endif // HAVE_SSSE3 && ARCH_X86_64 +#if HAVE_AVX2 +INSTANTIATE_TEST_CASE_P( + AVX2, HadamardLowbdTest, + ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_16x16_avx2, 16), + HadamardFuncWithSize(&vpx_hadamard_32x32_avx2, 32))); +#endif // HAVE_AVX2 + +#if HAVE_SSSE3 && VPX_ARCH_X86_64 +INSTANTIATE_TEST_CASE_P( + SSSE3, HadamardLowbdTest, + ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_ssse3, 8))); +#endif // HAVE_SSSE3 && VPX_ARCH_X86_64 #if HAVE_NEON -INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test, - ::testing::Values(&vpx_hadamard_8x8_neon)); +INSTANTIATE_TEST_CASE_P( + NEON, HadamardLowbdTest, + ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_neon, 8), + HadamardFuncWithSize(&vpx_hadamard_16x16_neon, 16))); #endif // HAVE_NEON // TODO(jingning): Remove highbitdepth flag when the SIMD functions are // in place and turn on the unit test. #if !CONFIG_VP9_HIGHBITDEPTH #if HAVE_MSA -INSTANTIATE_TEST_CASE_P(MSA, Hadamard8x8Test, - ::testing::Values(&vpx_hadamard_8x8_msa)); +INSTANTIATE_TEST_CASE_P( + MSA, HadamardLowbdTest, + ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_msa, 8), + HadamardFuncWithSize(&vpx_hadamard_16x16_msa, 16))); #endif // HAVE_MSA #endif // !CONFIG_VP9_HIGHBITDEPTH #if HAVE_VSX -INSTANTIATE_TEST_CASE_P(VSX, Hadamard8x8Test, - ::testing::Values(&vpx_hadamard_8x8_vsx)); +INSTANTIATE_TEST_CASE_P( + VSX, HadamardLowbdTest, + ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_vsx, 8), + HadamardFuncWithSize(&vpx_hadamard_16x16_vsx, 16))); #endif // HAVE_VSX -class Hadamard16x16Test : public HadamardTestBase {}; +#if CONFIG_VP9_HIGHBITDEPTH +class HadamardHighbdTest : public HadamardTestBase { + protected: + virtual int16_t Rand() { return rnd_.Rand13Signed(); } +}; -void HadamardSpeedTest16x16(HadamardFunc const func, int times) { - DECLARE_ALIGNED(16, int16_t, input[256]); - DECLARE_ALIGNED(16, tran_low_t, output[256]); - memset(input, 1, sizeof(input)); - HadamardSpeedTest("Hadamard16x16", func, input, 16, output, times); +TEST_P(HadamardHighbdTest, CompareReferenceRandom) { CompareReferenceRandom(); } + +TEST_P(HadamardHighbdTest, VaryStride) { VaryStride(); } + +TEST_P(HadamardHighbdTest, DISABLED_Speed) { + SpeedTest(10); + SpeedTest(10000); + SpeedTest(10000000); } -TEST_P(Hadamard16x16Test, CompareReferenceRandom) { - DECLARE_ALIGNED(16, int16_t, a[16 * 16]); - DECLARE_ALIGNED(16, tran_low_t, b[16 * 16]); - tran_low_t b_ref[16 * 16]; - for (int i = 0; i < 16 * 16; ++i) { - a[i] = rnd_.Rand9Signed(); - } - memset(b, 0, sizeof(b)); - memset(b_ref, 0, sizeof(b_ref)); - - reference_hadamard16x16(a, 16, b_ref); - ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b)); - - // The order of the output is not important. Sort before checking. - std::sort(b, b + 16 * 16); - std::sort(b_ref, b_ref + 16 * 16); - EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); -} - -TEST_P(Hadamard16x16Test, VaryStride) { - DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]); - DECLARE_ALIGNED(16, tran_low_t, b[16 * 16]); - tran_low_t b_ref[16 * 16]; - for (int i = 0; i < 16 * 16 * 8; ++i) { - a[i] = rnd_.Rand9Signed(); - } - - for (int i = 8; i < 64; i += 8) { - memset(b, 0, sizeof(b)); - memset(b_ref, 0, sizeof(b_ref)); - - reference_hadamard16x16(a, i, b_ref); - ASM_REGISTER_STATE_CHECK(h_func_(a, i, b)); - - // The order of the output is not important. Sort before checking. - std::sort(b, b + 16 * 16); - std::sort(b_ref, b_ref + 16 * 16); - EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); - } -} - -TEST_P(Hadamard16x16Test, DISABLED_Speed) { - HadamardSpeedTest16x16(h_func_, 10); - HadamardSpeedTest16x16(h_func_, 10000); - HadamardSpeedTest16x16(h_func_, 10000000); -} - -INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test, - ::testing::Values(&vpx_hadamard_16x16_c)); - -#if HAVE_SSE2 -INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test, - ::testing::Values(&vpx_hadamard_16x16_sse2)); -#endif // HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + C, HadamardHighbdTest, + ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_c, 8), + HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_c, 16), + HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_c, 32))); #if HAVE_AVX2 -INSTANTIATE_TEST_CASE_P(AVX2, Hadamard16x16Test, - ::testing::Values(&vpx_hadamard_16x16_avx2)); +INSTANTIATE_TEST_CASE_P( + AVX2, HadamardHighbdTest, + ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_avx2, 8), + HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_avx2, 16), + HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_avx2, + 32))); #endif // HAVE_AVX2 -#if HAVE_VSX -INSTANTIATE_TEST_CASE_P(VSX, Hadamard16x16Test, - ::testing::Values(&vpx_hadamard_16x16_vsx)); -#endif // HAVE_VSX - -#if HAVE_NEON -INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test, - ::testing::Values(&vpx_hadamard_16x16_neon)); -#endif // HAVE_NEON - -#if !CONFIG_VP9_HIGHBITDEPTH -#if HAVE_MSA -INSTANTIATE_TEST_CASE_P(MSA, Hadamard16x16Test, - ::testing::Values(&vpx_hadamard_16x16_msa)); -#endif // HAVE_MSA -#endif // !CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_VP9_HIGHBITDEPTH } // namespace diff --git a/media/libvpx/libvpx/test/i420_video_source.h b/media/libvpx/libvpx/test/i420_video_source.h index 49573823b4f5..97473b5c2f1b 100644 --- a/media/libvpx/libvpx/test/i420_video_source.h +++ b/media/libvpx/libvpx/test/i420_video_source.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_I420_VIDEO_SOURCE_H_ -#define TEST_I420_VIDEO_SOURCE_H_ +#ifndef VPX_TEST_I420_VIDEO_SOURCE_H_ +#define VPX_TEST_I420_VIDEO_SOURCE_H_ #include #include #include @@ -30,4 +30,4 @@ class I420VideoSource : public YUVVideoSource { } // namespace libvpx_test -#endif // TEST_I420_VIDEO_SOURCE_H_ +#endif // VPX_TEST_I420_VIDEO_SOURCE_H_ diff --git a/media/libvpx/libvpx/test/idct_test.cc b/media/libvpx/libvpx/test/idct_test.cc index 3700374d7a90..3564c0bd5d1d 100644 --- a/media/libvpx/libvpx/test/idct_test.cc +++ b/media/libvpx/libvpx/test/idct_test.cc @@ -72,6 +72,7 @@ TEST_P(IDCTTest, TestAllZeros) { TEST_P(IDCTTest, TestAllOnes) { input->Set(0); + ASSERT_TRUE(input->TopLeftPixel() != NULL); // When the first element is '4' it will fill the output buffer with '1'. input->TopLeftPixel()[0] = 4; predict->Set(0); @@ -89,6 +90,7 @@ TEST_P(IDCTTest, TestAddOne) { // Set the transform output to '1' and make sure it gets added to the // prediction buffer. input->Set(0); + ASSERT_TRUE(input->TopLeftPixel() != NULL); input->TopLeftPixel()[0] = 4; output->Set(0); @@ -174,4 +176,4 @@ INSTANTIATE_TEST_CASE_P(MSA, IDCTTest, INSTANTIATE_TEST_CASE_P(MMI, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_mmi)); #endif // HAVE_MMI -} +} // namespace diff --git a/media/libvpx/libvpx/test/invalid_file_test.cc b/media/libvpx/libvpx/test/invalid_file_test.cc index 79220b0f69e6..8fb9859ae492 100644 --- a/media/libvpx/libvpx/test/invalid_file_test.cc +++ b/media/libvpx/libvpx/test/invalid_file_test.cc @@ -10,6 +10,7 @@ #include #include +#include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -89,7 +90,7 @@ class InvalidFileTest : public ::libvpx_test::DecoderTest, const std::string filename = input.filename; // Open compressed video file. - testing::internal::scoped_ptr video; + std::unique_ptr video; if (filename.substr(filename.length() - 3, 3) == "ivf") { video.reset(new libvpx_test::IVFVideoSource(filename)); } else if (filename.substr(filename.length() - 4, 4) == "webm") { @@ -123,6 +124,8 @@ TEST_P(InvalidFileTest, ReturnCode) { RunTest(); } #if CONFIG_VP8_DECODER const DecodeParam kVP8InvalidFileTests[] = { { 1, "invalid-bug-1443.ivf" }, + { 1, "invalid-token-partition.ivf" }, + { 1, "invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf" }, }; VP8_INSTANTIATE_TEST_CASE(InvalidFileTest, @@ -144,7 +147,7 @@ const DecodeParam kVP9InvalidFileTests[] = { // This file will cause a large allocation which is expected to fail in 32-bit // environments. Test x86 for coverage purposes as the allocation failure will // be in platform agnostic code. -#if ARCH_X86 +#if VPX_ARCH_X86 { 1, "invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf" }, #endif { 1, "invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf" }, @@ -202,6 +205,8 @@ const DecodeParam kMultiThreadedVP9InvalidFileTests[] = { { 2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf" }, { 4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf" }, { 2, "invalid-crbug-629481.webm" }, + { 3, "invalid-crbug-1558.ivf" }, + { 4, "invalid-crbug-1562.ivf" }, }; INSTANTIATE_TEST_CASE_P( diff --git a/media/libvpx/libvpx/test/ivf_video_source.h b/media/libvpx/libvpx/test/ivf_video_source.h index 5862d2649f8d..22c05ecde9ea 100644 --- a/media/libvpx/libvpx/test/ivf_video_source.h +++ b/media/libvpx/libvpx/test/ivf_video_source.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_IVF_VIDEO_SOURCE_H_ -#define TEST_IVF_VIDEO_SOURCE_H_ +#ifndef VPX_TEST_IVF_VIDEO_SOURCE_H_ +#define VPX_TEST_IVF_VIDEO_SOURCE_H_ #include #include #include @@ -16,7 +16,7 @@ #include "test/video_source.h" namespace libvpx_test { -const unsigned int kCodeBufferSize = 256 * 1024; +const unsigned int kCodeBufferSize = 256 * 1024 * 1024; const unsigned int kIvfFileHdrSize = 32; const unsigned int kIvfFrameHdrSize = 12; @@ -103,4 +103,4 @@ class IVFVideoSource : public CompressedVideoSource { } // namespace libvpx_test -#endif // TEST_IVF_VIDEO_SOURCE_H_ +#endif // VPX_TEST_IVF_VIDEO_SOURCE_H_ diff --git a/media/libvpx/libvpx/test/keyframe_test.cc b/media/libvpx/libvpx/test/keyframe_test.cc index ee75f401ca5d..582d448168d1 100644 --- a/media/libvpx/libvpx/test/keyframe_test.cc +++ b/media/libvpx/libvpx/test/keyframe_test.cc @@ -38,7 +38,7 @@ class KeyframeTest if (kf_do_force_kf_) { frame_flags_ = (video->frame() % 3) ? 0 : VPX_EFLAG_FORCE_KF; } - if (set_cpu_used_ && video->frame() == 1) { + if (set_cpu_used_ && video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); } } @@ -68,7 +68,9 @@ TEST_P(KeyframeTest, TestRandomVideoSource) { // In realtime mode - auto placed keyframes are exceedingly rare, don't // bother with this check if(GetParam() > 0) - if (GET_PARAM(1) > 0) EXPECT_GT(kf_count_, 1); + if (GET_PARAM(1) > 0) { + EXPECT_GT(kf_count_, 1); + } } TEST_P(KeyframeTest, TestDisableKeyframes) { @@ -128,8 +130,9 @@ TEST_P(KeyframeTest, TestAutoKeyframe) { // In realtime mode - auto placed keyframes are exceedingly rare, don't // bother with this check - if (GET_PARAM(1) > 0) + if (GET_PARAM(1) > 0) { EXPECT_EQ(2u, kf_pts_list_.size()) << " Not the right number of keyframes "; + } // Verify that keyframes match the file keyframes in the file. for (std::vector::const_iterator iter = kf_pts_list_.begin(); diff --git a/media/libvpx/libvpx/test/lpf_test.cc b/media/libvpx/libvpx/test/lpf_test.cc index e04b996cd868..9db1181c6c61 100644 --- a/media/libvpx/libvpx/test/lpf_test.cc +++ b/media/libvpx/libvpx/test/lpf_test.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -56,8 +57,8 @@ typedef void (*dual_loop_op_t)(Pixel *s, int p, const uint8_t *blimit0, const uint8_t *thresh1); #endif // CONFIG_VP9_HIGHBITDEPTH -typedef std::tr1::tuple loop8_param_t; -typedef std::tr1::tuple dualloop8_param_t; +typedef std::tuple loop8_param_t; +typedef std::tuple dualloop8_param_t; void InitInput(Pixel *s, Pixel *ref_s, ACMRandom *rnd, const uint8_t limit, const int mask, const int32_t p, const int i) { @@ -74,9 +75,9 @@ void InitInput(Pixel *s, Pixel *ref_s, ACMRandom *rnd, const uint8_t limit, if (j < 1) { tmp_s[j] = rnd->Rand16(); } else if (val & 0x20) { // Increment by a value within the limit. - tmp_s[j] = tmp_s[j - 1] + (limit - 1); + tmp_s[j] = static_cast(tmp_s[j - 1] + (limit - 1)); } else { // Decrement by a value within the limit. - tmp_s[j] = tmp_s[j - 1] - (limit - 1); + tmp_s[j] = static_cast(tmp_s[j - 1] - (limit - 1)); } j++; } @@ -93,11 +94,11 @@ void InitInput(Pixel *s, Pixel *ref_s, ACMRandom *rnd, const uint8_t limit, if (j < 1) { tmp_s[j] = rnd->Rand16(); } else if (val & 0x20) { // Increment by a value within the limit. - tmp_s[(j % 32) * 32 + j / 32] = - tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] + (limit - 1); + tmp_s[(j % 32) * 32 + j / 32] = static_cast( + tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] + (limit - 1)); } else { // Decrement by a value within the limit. - tmp_s[(j % 32) * 32 + j / 32] = - tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] - (limit - 1); + tmp_s[(j % 32) * 32 + j / 32] = static_cast( + tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] - (limit - 1)); } j++; } @@ -402,7 +403,7 @@ TEST_P(Loop8Test9Param, ValueCheck) { << "First failed at test case " << first_failure; } -using std::tr1::make_tuple; +using std::make_tuple; #if HAVE_SSE2 #if CONFIG_VP9_HIGHBITDEPTH diff --git a/media/libvpx/libvpx/test/md5_helper.h b/media/libvpx/libvpx/test/md5_helper.h index ef310a2d900a..dc28dc6283bc 100644 --- a/media/libvpx/libvpx/test/md5_helper.h +++ b/media/libvpx/libvpx/test/md5_helper.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_MD5_HELPER_H_ -#define TEST_MD5_HELPER_H_ +#ifndef VPX_TEST_MD5_HELPER_H_ +#define VPX_TEST_MD5_HELPER_H_ #include "./md5_utils.h" #include "vpx/vpx_decoder.h" @@ -72,4 +72,4 @@ class MD5 { } // namespace libvpx_test -#endif // TEST_MD5_HELPER_H_ +#endif // VPX_TEST_MD5_HELPER_H_ diff --git a/media/libvpx/libvpx/test/non_greedy_mv_test.cc b/media/libvpx/libvpx/test/non_greedy_mv_test.cc new file mode 100644 index 000000000000..c78331b28521 --- /dev/null +++ b/media/libvpx/libvpx/test/non_greedy_mv_test.cc @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "vp9/encoder/vp9_non_greedy_mv.h" +#include "./vpx_dsp_rtcd.h" + +namespace { + +static void read_in_mf(const char *filename, int *rows_ptr, int *cols_ptr, + MV **buffer_ptr) { + FILE *input = fopen(filename, "rb"); + int row, col; + int idx; + + ASSERT_NE(input, nullptr) << "Cannot open file: " << filename << std::endl; + + fscanf(input, "%d,%d\n", rows_ptr, cols_ptr); + + *buffer_ptr = (MV *)malloc((*rows_ptr) * (*cols_ptr) * sizeof(MV)); + + for (idx = 0; idx < (*rows_ptr) * (*cols_ptr); ++idx) { + fscanf(input, "%d,%d;", &row, &col); + (*buffer_ptr)[idx].row = row; + (*buffer_ptr)[idx].col = col; + } + fclose(input); +} + +static void read_in_local_var(const char *filename, int *rows_ptr, + int *cols_ptr, + int (**M_ptr)[MF_LOCAL_STRUCTURE_SIZE]) { + FILE *input = fopen(filename, "rb"); + int M00, M01, M10, M11; + int idx; + int int_type; + + ASSERT_NE(input, nullptr) << "Cannot open file: " << filename << std::endl; + + fscanf(input, "%d,%d\n", rows_ptr, cols_ptr); + + *M_ptr = (int(*)[MF_LOCAL_STRUCTURE_SIZE])malloc( + (*rows_ptr) * (*cols_ptr) * MF_LOCAL_STRUCTURE_SIZE * sizeof(int_type)); + + for (idx = 0; idx < (*rows_ptr) * (*cols_ptr); ++idx) { + fscanf(input, "%d,%d,%d,%d;", &M00, &M01, &M10, &M11); + (*M_ptr)[idx][0] = M00; + (*M_ptr)[idx][1] = M01; + (*M_ptr)[idx][2] = M10; + (*M_ptr)[idx][3] = M11; + } + fclose(input); +} + +static void compare_mf(const MV *mf1, const MV *mf2, int rows, int cols, + float *mean_ptr, float *std_ptr) { + float float_type; + float *diffs = (float *)malloc(rows * cols * sizeof(float_type)); + int idx; + float accu = 0.0f; + for (idx = 0; idx < rows * cols; ++idx) { + MV mv1 = mf1[idx]; + MV mv2 = mf2[idx]; + float row_diff2 = (float)((mv1.row - mv2.row) * (mv1.row - mv2.row)); + float col_diff2 = (float)((mv1.col - mv2.col) * (mv1.col - mv2.col)); + diffs[idx] = sqrt(row_diff2 + col_diff2); + accu += diffs[idx]; + } + *mean_ptr = accu / rows / cols; + *std_ptr = 0; + for (idx = 0; idx < rows * cols; ++idx) { + *std_ptr += (diffs[idx] - (*mean_ptr)) * (diffs[idx] - (*mean_ptr)); + } + *std_ptr = sqrt(*std_ptr / rows / cols); + free(diffs); +} + +static void load_frame_info(const char *filename, + YV12_BUFFER_CONFIG *ref_frame_ptr) { + FILE *input = fopen(filename, "rb"); + int idx; + uint8_t data_type; + + ASSERT_NE(input, nullptr) << "Cannot open file: " << filename << std::endl; + + fscanf(input, "%d,%d\n", &(ref_frame_ptr->y_height), + &(ref_frame_ptr->y_width)); + + ref_frame_ptr->y_buffer = (uint8_t *)malloc( + (ref_frame_ptr->y_width) * (ref_frame_ptr->y_height) * sizeof(data_type)); + + for (idx = 0; idx < (ref_frame_ptr->y_width) * (ref_frame_ptr->y_height); + ++idx) { + int value; + fscanf(input, "%d,", &value); + ref_frame_ptr->y_buffer[idx] = (uint8_t)value; + } + + ref_frame_ptr->y_stride = ref_frame_ptr->y_width; + fclose(input); +} + +static int compare_local_var(const int (*local_var1)[MF_LOCAL_STRUCTURE_SIZE], + const int (*local_var2)[MF_LOCAL_STRUCTURE_SIZE], + int rows, int cols) { + int diff = 0; + int outter_idx, inner_idx; + for (outter_idx = 0; outter_idx < rows * cols; ++outter_idx) { + for (inner_idx = 0; inner_idx < MF_LOCAL_STRUCTURE_SIZE; ++inner_idx) { + diff += abs(local_var1[outter_idx][inner_idx] - + local_var2[outter_idx][inner_idx]); + } + } + return diff / rows / cols; +} + +TEST(non_greedy_mv, smooth_mf) { + const char *search_mf_file = "non_greedy_mv_test_files/exhaust_16x16.txt"; + const char *local_var_file = "non_greedy_mv_test_files/localVar_16x16.txt"; + const char *estimation_file = "non_greedy_mv_test_files/estimation_16x16.txt"; + const char *ground_truth_file = + "non_greedy_mv_test_files/ground_truth_16x16.txt"; + BLOCK_SIZE bsize = BLOCK_32X32; + MV *search_mf = NULL; + MV *smooth_mf = NULL; + MV *estimation = NULL; + MV *ground_truth = NULL; + int(*local_var)[MF_LOCAL_STRUCTURE_SIZE] = NULL; + int rows = 0, cols = 0; + + int alpha = 100, max_iter = 100; + + read_in_mf(search_mf_file, &rows, &cols, &search_mf); + read_in_local_var(local_var_file, &rows, &cols, &local_var); + read_in_mf(estimation_file, &rows, &cols, &estimation); + read_in_mf(ground_truth_file, &rows, &cols, &ground_truth); + + float sm_mean, sm_std; + float est_mean, est_std; + + smooth_mf = (MV *)malloc(rows * cols * sizeof(MV)); + vp9_get_smooth_motion_field(search_mf, local_var, rows, cols, bsize, alpha, + max_iter, smooth_mf); + + compare_mf(smooth_mf, ground_truth, rows, cols, &sm_mean, &sm_std); + compare_mf(smooth_mf, estimation, rows, cols, &est_mean, &est_std); + + EXPECT_LE(sm_mean, 3); + EXPECT_LE(est_mean, 2); + + free(search_mf); + free(local_var); + free(estimation); + free(ground_truth); + free(smooth_mf); +} + +TEST(non_greedy_mv, local_var) { + const char *ref_frame_file = "non_greedy_mv_test_files/ref_frame_16x16.txt"; + const char *cur_frame_file = "non_greedy_mv_test_files/cur_frame_16x16.txt"; + const char *gt_local_var_file = "non_greedy_mv_test_files/localVar_16x16.txt"; + const char *search_mf_file = "non_greedy_mv_test_files/exhaust_16x16.txt"; + BLOCK_SIZE bsize = BLOCK_16X16; + int(*gt_local_var)[MF_LOCAL_STRUCTURE_SIZE] = NULL; + int(*est_local_var)[MF_LOCAL_STRUCTURE_SIZE] = NULL; + YV12_BUFFER_CONFIG ref_frame, cur_frame; + int rows, cols; + MV *search_mf; + int int_type; + int local_var_diff; + vp9_variance_fn_ptr_t fn; + + load_frame_info(ref_frame_file, &ref_frame); + load_frame_info(cur_frame_file, &cur_frame); + read_in_mf(search_mf_file, &rows, &cols, &search_mf); + + fn.sdf = vpx_sad16x16; + est_local_var = (int(*)[MF_LOCAL_STRUCTURE_SIZE])malloc( + rows * cols * MF_LOCAL_STRUCTURE_SIZE * sizeof(int_type)); + vp9_get_local_structure(&cur_frame, &ref_frame, search_mf, &fn, rows, cols, + bsize, est_local_var); + read_in_local_var(gt_local_var_file, &rows, &cols, >_local_var); + + local_var_diff = compare_local_var(est_local_var, gt_local_var, rows, cols); + + EXPECT_LE(local_var_diff, 1); + + free(gt_local_var); + free(est_local_var); + free(ref_frame.y_buffer); +} +} // namespace diff --git a/media/libvpx/libvpx/test/partial_idct_test.cc b/media/libvpx/libvpx/test/partial_idct_test.cc index f7b50f53a113..e66a695eb0d5 100644 --- a/media/libvpx/libvpx/test/partial_idct_test.cc +++ b/media/libvpx/libvpx/test/partial_idct_test.cc @@ -11,8 +11,8 @@ #include #include #include - #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -51,8 +51,8 @@ void highbd_wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) { } #endif -typedef std::tr1::tuple +typedef std::tuple PartialInvTxfmParam; const int kMaxNumCoeffs = 1024; const int kCountTestBlock = 1000; @@ -324,7 +324,7 @@ TEST_P(PartialIDctTest, DISABLED_Speed) { << "Error: partial inverse transform produces different results"; } -using std::tr1::make_tuple; +using std::make_tuple; const PartialInvTxfmParam c_partial_idct_tests[] = { #if CONFIG_VP9_HIGHBITDEPTH diff --git a/media/libvpx/libvpx/test/pp_filter_test.cc b/media/libvpx/libvpx/test/pp_filter_test.cc index 5a2ade1ef4fe..1ed261bf9b69 100644 --- a/media/libvpx/libvpx/test/pp_filter_test.cc +++ b/media/libvpx/libvpx/test/pp_filter_test.cc @@ -11,6 +11,7 @@ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" +#include "test/bench.h" #include "test/buffer.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" @@ -32,7 +33,6 @@ typedef void (*VpxMbPostProcDownFunc)(unsigned char *dst, int pitch, int rows, int cols, int flimit); namespace { - // Compute the filter level used in post proc from the loop filter strength int q2mbl(int x) { if (x < 20) x = 20; @@ -42,33 +42,52 @@ int q2mbl(int x) { } class VpxPostProcDownAndAcrossMbRowTest - : public ::testing::TestWithParam { + : public AbstractBench, + public ::testing::TestWithParam { public: + VpxPostProcDownAndAcrossMbRowTest() + : mb_post_proc_down_and_across_(GetParam()) {} virtual void TearDown() { libvpx_test::ClearSystemState(); } + + protected: + virtual void Run(); + + const VpxPostProcDownAndAcrossMbRowFunc mb_post_proc_down_and_across_; + // Size of the underlying data block that will be filtered. + int block_width_; + int block_height_; + Buffer *src_image_; + Buffer *dst_image_; + uint8_t *flimits_; }; +void VpxPostProcDownAndAcrossMbRowTest::Run() { + mb_post_proc_down_and_across_( + src_image_->TopLeftPixel(), dst_image_->TopLeftPixel(), + src_image_->stride(), dst_image_->stride(), block_width_, flimits_, 16); +} + // Test routine for the VPx post-processing function // vpx_post_proc_down_and_across_mb_row_c. TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckFilterOutput) { // Size of the underlying data block that will be filtered. - const int block_width = 16; - const int block_height = 16; + block_width_ = 16; + block_height_ = 16; // 5-tap filter needs 2 padding rows above and below the block in the input. - Buffer src_image = Buffer(block_width, block_height, 2); + Buffer src_image = Buffer(block_width_, block_height_, 2); ASSERT_TRUE(src_image.Init()); // Filter extends output block by 8 samples at left and right edges. // Though the left padding is only 8 bytes, the assembly code tries to // read 16 bytes before the pointer. Buffer dst_image = - Buffer(block_width, block_height, 8, 16, 8, 8); + Buffer(block_width_, block_height_, 8, 16, 8, 8); ASSERT_TRUE(dst_image.Init()); - uint8_t *const flimits = - reinterpret_cast(vpx_memalign(16, block_width)); - (void)memset(flimits, 255, block_width); + flimits_ = reinterpret_cast(vpx_memalign(16, block_width_)); + (void)memset(flimits_, 255, block_width_); // Initialize pixels in the input: // block pixels to value 1, @@ -79,37 +98,36 @@ TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckFilterOutput) { // Initialize pixels in the output to 99. dst_image.Set(99); - ASM_REGISTER_STATE_CHECK(GetParam()( + ASM_REGISTER_STATE_CHECK(mb_post_proc_down_and_across_( src_image.TopLeftPixel(), dst_image.TopLeftPixel(), src_image.stride(), - dst_image.stride(), block_width, flimits, 16)); + dst_image.stride(), block_width_, flimits_, 16)); - static const uint8_t kExpectedOutput[block_height] = { - 4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4 - }; + static const uint8_t kExpectedOutput[] = { 4, 3, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 4 }; uint8_t *pixel_ptr = dst_image.TopLeftPixel(); - for (int i = 0; i < block_height; ++i) { - for (int j = 0; j < block_width; ++j) { + for (int i = 0; i < block_height_; ++i) { + for (int j = 0; j < block_width_; ++j) { ASSERT_EQ(kExpectedOutput[i], pixel_ptr[j]) << "at (" << i << ", " << j << ")"; } pixel_ptr += dst_image.stride(); } - vpx_free(flimits); + vpx_free(flimits_); }; TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckCvsAssembly) { // Size of the underlying data block that will be filtered. // Y blocks are always a multiple of 16 wide and exactly 16 high. U and V // blocks are always a multiple of 8 wide and exactly 8 high. - const int block_width = 136; - const int block_height = 16; + block_width_ = 136; + block_height_ = 16; // 5-tap filter needs 2 padding rows above and below the block in the input. // SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16. Buffer src_image = - Buffer(block_width, block_height, 2, 2, 10, 2); + Buffer(block_width_, block_height_, 2, 2, 10, 2); ASSERT_TRUE(src_image.Init()); // Filter extends output block by 8 samples at left and right edges. @@ -118,17 +136,17 @@ TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckCvsAssembly) { // not a problem. // SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16. Buffer dst_image = - Buffer(block_width, block_height, 8, 8, 16, 8); + Buffer(block_width_, block_height_, 8, 8, 16, 8); ASSERT_TRUE(dst_image.Init()); - Buffer dst_image_ref = Buffer(block_width, block_height, 8); + Buffer dst_image_ref = + Buffer(block_width_, block_height_, 8); ASSERT_TRUE(dst_image_ref.Init()); // Filter values are set in blocks of 16 for Y and 8 for U/V. Each macroblock // can have a different filter. SSE2 assembly reads flimits in blocks of 16 so // it must be padded out. - const int flimits_width = block_width % 16 ? block_width + 8 : block_width; - uint8_t *const flimits = - reinterpret_cast(vpx_memalign(16, flimits_width)); + const int flimits_width = block_width_ % 16 ? block_width_ + 8 : block_width_; + flimits_ = reinterpret_cast(vpx_memalign(16, flimits_width)); ACMRandom rnd; rnd.Reset(ACMRandom::DeterministicSeed()); @@ -138,37 +156,78 @@ TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckCvsAssembly) { src_image.SetPadding(10); src_image.Set(&rnd, &ACMRandom::Rand8); - for (int blocks = 0; blocks < block_width; blocks += 8) { - (void)memset(flimits, 0, sizeof(*flimits) * flimits_width); + for (int blocks = 0; blocks < block_width_; blocks += 8) { + (void)memset(flimits_, 0, sizeof(*flimits_) * flimits_width); for (int f = 0; f < 255; f++) { - (void)memset(flimits + blocks, f, sizeof(*flimits) * 8); - + (void)memset(flimits_ + blocks, f, sizeof(*flimits_) * 8); dst_image.Set(0); dst_image_ref.Set(0); vpx_post_proc_down_and_across_mb_row_c( src_image.TopLeftPixel(), dst_image_ref.TopLeftPixel(), - src_image.stride(), dst_image_ref.stride(), block_width, flimits, - block_height); - ASM_REGISTER_STATE_CHECK( - GetParam()(src_image.TopLeftPixel(), dst_image.TopLeftPixel(), - src_image.stride(), dst_image.stride(), block_width, - flimits, block_height)); + src_image.stride(), dst_image_ref.stride(), block_width_, flimits_, + block_height_); + ASM_REGISTER_STATE_CHECK(mb_post_proc_down_and_across_( + src_image.TopLeftPixel(), dst_image.TopLeftPixel(), + src_image.stride(), dst_image.stride(), block_width_, flimits_, + block_height_)); ASSERT_TRUE(dst_image.CheckValues(dst_image_ref)); } } - vpx_free(flimits); + vpx_free(flimits_); } +TEST_P(VpxPostProcDownAndAcrossMbRowTest, DISABLED_Speed) { + // Size of the underlying data block that will be filtered. + block_width_ = 16; + block_height_ = 16; + + // 5-tap filter needs 2 padding rows above and below the block in the input. + Buffer src_image = Buffer(block_width_, block_height_, 2); + ASSERT_TRUE(src_image.Init()); + this->src_image_ = &src_image; + + // Filter extends output block by 8 samples at left and right edges. + // Though the left padding is only 8 bytes, the assembly code tries to + // read 16 bytes before the pointer. + Buffer dst_image = + Buffer(block_width_, block_height_, 8, 16, 8, 8); + ASSERT_TRUE(dst_image.Init()); + this->dst_image_ = &dst_image; + + flimits_ = reinterpret_cast(vpx_memalign(16, block_width_)); + (void)memset(flimits_, 255, block_width_); + + // Initialize pixels in the input: + // block pixels to value 1, + // border pixels to value 10. + src_image.SetPadding(10); + src_image.Set(1); + + // Initialize pixels in the output to 99. + dst_image.Set(99); + + RunNTimes(INT16_MAX); + PrintMedian("16x16"); + + vpx_free(flimits_); +}; + class VpxMbPostProcAcrossIpTest - : public ::testing::TestWithParam { + : public AbstractBench, + public ::testing::TestWithParam { public: + VpxMbPostProcAcrossIpTest() + : rows_(16), cols_(16), mb_post_proc_across_ip_(GetParam()), + src_(Buffer(rows_, cols_, 8, 8, 17, 8)) {} virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: + virtual void Run(); + void SetCols(unsigned char *s, int rows, int cols, int src_width) { for (int r = 0; r < rows; r++) { for (int c = 0; c < cols; c++) { @@ -195,71 +254,67 @@ class VpxMbPostProcAcrossIpTest GetParam()(s, src_width, rows, cols, filter_level)); RunComparison(expected_output, s, rows, cols, src_width); } + + const int rows_; + const int cols_; + const VpxMbPostProcAcrossIpFunc mb_post_proc_across_ip_; + Buffer src_; }; +void VpxMbPostProcAcrossIpTest::Run() { + mb_post_proc_across_ip_(src_.TopLeftPixel(), src_.stride(), rows_, cols_, + q2mbl(0)); +} + TEST_P(VpxMbPostProcAcrossIpTest, CheckLowFilterOutput) { - const int rows = 16; - const int cols = 16; + ASSERT_TRUE(src_.Init()); + src_.SetPadding(10); + SetCols(src_.TopLeftPixel(), rows_, cols_, src_.stride()); - Buffer src = Buffer(cols, rows, 8, 8, 17, 8); - ASSERT_TRUE(src.Init()); - src.SetPadding(10); - SetCols(src.TopLeftPixel(), rows, cols, src.stride()); - - Buffer expected_output = Buffer(cols, rows, 0); + Buffer expected_output = Buffer(cols_, rows_, 0); ASSERT_TRUE(expected_output.Init()); - SetCols(expected_output.TopLeftPixel(), rows, cols, expected_output.stride()); + SetCols(expected_output.TopLeftPixel(), rows_, cols_, + expected_output.stride()); - RunFilterLevel(src.TopLeftPixel(), rows, cols, src.stride(), q2mbl(0), + RunFilterLevel(src_.TopLeftPixel(), rows_, cols_, src_.stride(), q2mbl(0), expected_output.TopLeftPixel()); } TEST_P(VpxMbPostProcAcrossIpTest, CheckMediumFilterOutput) { - const int rows = 16; - const int cols = 16; + ASSERT_TRUE(src_.Init()); + src_.SetPadding(10); + SetCols(src_.TopLeftPixel(), rows_, cols_, src_.stride()); - Buffer src = Buffer(cols, rows, 8, 8, 17, 8); - ASSERT_TRUE(src.Init()); - src.SetPadding(10); - SetCols(src.TopLeftPixel(), rows, cols, src.stride()); - - static const unsigned char kExpectedOutput[cols] = { + static const unsigned char kExpectedOutput[] = { 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13 }; - RunFilterLevel(src.TopLeftPixel(), rows, cols, src.stride(), q2mbl(70), + RunFilterLevel(src_.TopLeftPixel(), rows_, cols_, src_.stride(), q2mbl(70), kExpectedOutput); } TEST_P(VpxMbPostProcAcrossIpTest, CheckHighFilterOutput) { - const int rows = 16; - const int cols = 16; + ASSERT_TRUE(src_.Init()); + src_.SetPadding(10); + SetCols(src_.TopLeftPixel(), rows_, cols_, src_.stride()); - Buffer src = Buffer(cols, rows, 8, 8, 17, 8); - ASSERT_TRUE(src.Init()); - src.SetPadding(10); - SetCols(src.TopLeftPixel(), rows, cols, src.stride()); - - static const unsigned char kExpectedOutput[cols] = { + static const unsigned char kExpectedOutput[] = { 2, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10, 11, 11, 12, 13, 13 }; - RunFilterLevel(src.TopLeftPixel(), rows, cols, src.stride(), INT_MAX, + RunFilterLevel(src_.TopLeftPixel(), rows_, cols_, src_.stride(), INT_MAX, kExpectedOutput); - SetCols(src.TopLeftPixel(), rows, cols, src.stride()); + SetCols(src_.TopLeftPixel(), rows_, cols_, src_.stride()); - RunFilterLevel(src.TopLeftPixel(), rows, cols, src.stride(), q2mbl(100), + RunFilterLevel(src_.TopLeftPixel(), rows_, cols_, src_.stride(), q2mbl(100), kExpectedOutput); } TEST_P(VpxMbPostProcAcrossIpTest, CheckCvsAssembly) { - const int rows = 16; - const int cols = 16; - - Buffer c_mem = Buffer(cols, rows, 8, 8, 17, 8); + Buffer c_mem = Buffer(cols_, rows_, 8, 8, 17, 8); ASSERT_TRUE(c_mem.Init()); - Buffer asm_mem = Buffer(cols, rows, 8, 8, 17, 8); + Buffer asm_mem = Buffer(cols_, rows_, 8, 8, 17, 8); ASSERT_TRUE(asm_mem.Init()); // When level >= 100, the filter behaves the same as the level = INT_MAX @@ -267,24 +322,41 @@ TEST_P(VpxMbPostProcAcrossIpTest, CheckCvsAssembly) { for (int level = 0; level < 100; level++) { c_mem.SetPadding(10); asm_mem.SetPadding(10); - SetCols(c_mem.TopLeftPixel(), rows, cols, c_mem.stride()); - SetCols(asm_mem.TopLeftPixel(), rows, cols, asm_mem.stride()); + SetCols(c_mem.TopLeftPixel(), rows_, cols_, c_mem.stride()); + SetCols(asm_mem.TopLeftPixel(), rows_, cols_, asm_mem.stride()); - vpx_mbpost_proc_across_ip_c(c_mem.TopLeftPixel(), c_mem.stride(), rows, - cols, q2mbl(level)); + vpx_mbpost_proc_across_ip_c(c_mem.TopLeftPixel(), c_mem.stride(), rows_, + cols_, q2mbl(level)); ASM_REGISTER_STATE_CHECK(GetParam()( - asm_mem.TopLeftPixel(), asm_mem.stride(), rows, cols, q2mbl(level))); + asm_mem.TopLeftPixel(), asm_mem.stride(), rows_, cols_, q2mbl(level))); ASSERT_TRUE(asm_mem.CheckValues(c_mem)); } } +TEST_P(VpxMbPostProcAcrossIpTest, DISABLED_Speed) { + ASSERT_TRUE(src_.Init()); + src_.SetPadding(10); + + SetCols(src_.TopLeftPixel(), rows_, cols_, src_.stride()); + + RunNTimes(100000); + PrintMedian("16x16"); +} + class VpxMbPostProcDownTest - : public ::testing::TestWithParam { + : public AbstractBench, + public ::testing::TestWithParam { public: + VpxMbPostProcDownTest() + : rows_(16), cols_(16), mb_post_proc_down_(GetParam()), + src_c_(Buffer(rows_, cols_, 8, 8, 8, 17)) {} + virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: + virtual void Run(); + void SetRows(unsigned char *src_c, int rows, int cols, int src_width) { for (int r = 0; r < rows; r++) { memset(src_c, r, cols); @@ -306,22 +378,28 @@ class VpxMbPostProcDownTest void RunFilterLevel(unsigned char *s, int rows, int cols, int src_width, int filter_level, const unsigned char *expected_output) { ASM_REGISTER_STATE_CHECK( - GetParam()(s, src_width, rows, cols, filter_level)); + mb_post_proc_down_(s, src_width, rows, cols, filter_level)); RunComparison(expected_output, s, rows, cols, src_width); } + + const int rows_; + const int cols_; + const VpxMbPostProcDownFunc mb_post_proc_down_; + Buffer src_c_; }; +void VpxMbPostProcDownTest::Run() { + mb_post_proc_down_(src_c_.TopLeftPixel(), src_c_.stride(), rows_, cols_, + q2mbl(0)); +} + TEST_P(VpxMbPostProcDownTest, CheckHighFilterOutput) { - const int rows = 16; - const int cols = 16; + ASSERT_TRUE(src_c_.Init()); + src_c_.SetPadding(10); - Buffer src_c = Buffer(cols, rows, 8, 8, 8, 17); - ASSERT_TRUE(src_c.Init()); - src_c.SetPadding(10); + SetRows(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride()); - SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride()); - - static const unsigned char kExpectedOutput[rows * cols] = { + static const unsigned char kExpectedOutput[] = { 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 3, 4, 4, 3, 3, 3, @@ -338,26 +416,22 @@ TEST_P(VpxMbPostProcDownTest, CheckHighFilterOutput) { 13, 13, 13, 13, 14, 13, 13, 13, 13 }; - RunFilterLevel(src_c.TopLeftPixel(), rows, cols, src_c.stride(), INT_MAX, + RunFilterLevel(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride(), INT_MAX, kExpectedOutput); - src_c.SetPadding(10); - SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride()); - RunFilterLevel(src_c.TopLeftPixel(), rows, cols, src_c.stride(), q2mbl(100), - kExpectedOutput); + src_c_.SetPadding(10); + SetRows(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride()); + RunFilterLevel(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride(), + q2mbl(100), kExpectedOutput); } TEST_P(VpxMbPostProcDownTest, CheckMediumFilterOutput) { - const int rows = 16; - const int cols = 16; + ASSERT_TRUE(src_c_.Init()); + src_c_.SetPadding(10); - Buffer src_c = Buffer(cols, rows, 8, 8, 8, 17); - ASSERT_TRUE(src_c.Init()); - src_c.SetPadding(10); + SetRows(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride()); - SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride()); - - static const unsigned char kExpectedOutput[rows * cols] = { + static const unsigned char kExpectedOutput[] = { 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, @@ -374,67 +448,69 @@ TEST_P(VpxMbPostProcDownTest, CheckMediumFilterOutput) { 13, 13, 13, 13, 14, 13, 13, 13, 13 }; - RunFilterLevel(src_c.TopLeftPixel(), rows, cols, src_c.stride(), q2mbl(70), - kExpectedOutput); + RunFilterLevel(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride(), + q2mbl(70), kExpectedOutput); } TEST_P(VpxMbPostProcDownTest, CheckLowFilterOutput) { - const int rows = 16; - const int cols = 16; + ASSERT_TRUE(src_c_.Init()); + src_c_.SetPadding(10); - Buffer src_c = Buffer(cols, rows, 8, 8, 8, 17); - ASSERT_TRUE(src_c.Init()); - src_c.SetPadding(10); + SetRows(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride()); - SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride()); - - unsigned char *expected_output = new unsigned char[rows * cols]; + unsigned char *expected_output = new unsigned char[rows_ * cols_]; ASSERT_TRUE(expected_output != NULL); - SetRows(expected_output, rows, cols, cols); + SetRows(expected_output, rows_, cols_, cols_); - RunFilterLevel(src_c.TopLeftPixel(), rows, cols, src_c.stride(), q2mbl(0), + RunFilterLevel(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride(), q2mbl(0), expected_output); delete[] expected_output; } TEST_P(VpxMbPostProcDownTest, CheckCvsAssembly) { - const int rows = 16; - const int cols = 16; - ACMRandom rnd; rnd.Reset(ACMRandom::DeterministicSeed()); - Buffer src_c = Buffer(cols, rows, 8, 8, 8, 17); - ASSERT_TRUE(src_c.Init()); - Buffer src_asm = Buffer(cols, rows, 8, 8, 8, 17); + ASSERT_TRUE(src_c_.Init()); + Buffer src_asm = Buffer(cols_, rows_, 8, 8, 8, 17); ASSERT_TRUE(src_asm.Init()); for (int level = 0; level < 100; level++) { - src_c.SetPadding(10); + src_c_.SetPadding(10); src_asm.SetPadding(10); - src_c.Set(&rnd, &ACMRandom::Rand8); - src_asm.CopyFrom(src_c); + src_c_.Set(&rnd, &ACMRandom::Rand8); + src_asm.CopyFrom(src_c_); - vpx_mbpost_proc_down_c(src_c.TopLeftPixel(), src_c.stride(), rows, cols, + vpx_mbpost_proc_down_c(src_c_.TopLeftPixel(), src_c_.stride(), rows_, cols_, q2mbl(level)); - ASM_REGISTER_STATE_CHECK(GetParam()( - src_asm.TopLeftPixel(), src_asm.stride(), rows, cols, q2mbl(level))); - ASSERT_TRUE(src_asm.CheckValues(src_c)); + ASM_REGISTER_STATE_CHECK(mb_post_proc_down_( + src_asm.TopLeftPixel(), src_asm.stride(), rows_, cols_, q2mbl(level))); + ASSERT_TRUE(src_asm.CheckValues(src_c_)); - src_c.SetPadding(10); + src_c_.SetPadding(10); src_asm.SetPadding(10); - src_c.Set(&rnd, &ACMRandom::Rand8Extremes); - src_asm.CopyFrom(src_c); + src_c_.Set(&rnd, &ACMRandom::Rand8Extremes); + src_asm.CopyFrom(src_c_); - vpx_mbpost_proc_down_c(src_c.TopLeftPixel(), src_c.stride(), rows, cols, + vpx_mbpost_proc_down_c(src_c_.TopLeftPixel(), src_c_.stride(), rows_, cols_, q2mbl(level)); - ASM_REGISTER_STATE_CHECK(GetParam()( - src_asm.TopLeftPixel(), src_asm.stride(), rows, cols, q2mbl(level))); - ASSERT_TRUE(src_asm.CheckValues(src_c)); + ASM_REGISTER_STATE_CHECK(mb_post_proc_down_( + src_asm.TopLeftPixel(), src_asm.stride(), rows_, cols_, q2mbl(level))); + ASSERT_TRUE(src_asm.CheckValues(src_c_)); } } +TEST_P(VpxMbPostProcDownTest, DISABLED_Speed) { + ASSERT_TRUE(src_c_.Init()); + src_c_.SetPadding(10); + + SetRows(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride()); + + RunNTimes(100000); + PrintMedian("16x16"); +} + INSTANTIATE_TEST_CASE_P( C, VpxPostProcDownAndAcrossMbRowTest, ::testing::Values(vpx_post_proc_down_and_across_mb_row_c)); @@ -481,4 +557,16 @@ INSTANTIATE_TEST_CASE_P(MSA, VpxMbPostProcDownTest, ::testing::Values(vpx_mbpost_proc_down_msa)); #endif // HAVE_MSA +#if HAVE_VSX +INSTANTIATE_TEST_CASE_P( + VSX, VpxPostProcDownAndAcrossMbRowTest, + ::testing::Values(vpx_post_proc_down_and_across_mb_row_vsx)); + +INSTANTIATE_TEST_CASE_P(VSX, VpxMbPostProcAcrossIpTest, + ::testing::Values(vpx_mbpost_proc_across_ip_vsx)); + +INSTANTIATE_TEST_CASE_P(VSX, VpxMbPostProcDownTest, + ::testing::Values(vpx_mbpost_proc_down_vsx)); +#endif // HAVE_VSX + } // namespace diff --git a/media/libvpx/libvpx/test/predict_test.cc b/media/libvpx/libvpx/test/predict_test.cc index 9f366ae5299d..d40d9c755e67 100644 --- a/media/libvpx/libvpx/test/predict_test.cc +++ b/media/libvpx/libvpx/test/predict_test.cc @@ -10,30 +10,34 @@ #include #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vp8_rtcd.h" #include "./vpx_config.h" #include "test/acm_random.h" +#include "test/bench.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/msvc.h" namespace { using libvpx_test::ACMRandom; -using std::tr1::make_tuple; +using std::make_tuple; typedef void (*PredictFunc)(uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch); -typedef std::tr1::tuple PredictParam; +typedef std::tuple PredictParam; -class PredictTestBase : public ::testing::TestWithParam { +class PredictTestBase : public AbstractBench, + public ::testing::TestWithParam { public: PredictTestBase() : width_(GET_PARAM(0)), height_(GET_PARAM(1)), predict_(GET_PARAM(2)), @@ -204,7 +208,20 @@ class PredictTestBase : public ::testing::TestWithParam { } } } -}; + + void Run() { + for (int xoffset = 0; xoffset < 8; ++xoffset) { + for (int yoffset = 0; yoffset < 8; ++yoffset) { + if (xoffset == 0 && yoffset == 0) { + continue; + } + + predict_(&src_[kSrcStride * 2 + 2], kSrcStride, xoffset, yoffset, dst_, + dst_stride_); + } + } + } +}; // namespace class SixtapPredictTest : public PredictTestBase {}; @@ -341,6 +358,14 @@ TEST_P(BilinearPredictTest, TestWithRandomData) { TEST_P(BilinearPredictTest, TestWithUnalignedDst) { TestWithUnalignedDst(vp8_bilinear_predict16x16_c); } +TEST_P(BilinearPredictTest, DISABLED_Speed) { + const int kCountSpeedTestBlock = 5000000 / (width_ * height_); + RunNTimes(kCountSpeedTestBlock); + + char title[16]; + snprintf(title, sizeof(title), "%dx%d", width_, height_); + PrintMedian(title); +} INSTANTIATE_TEST_CASE_P( C, BilinearPredictTest, @@ -356,17 +381,13 @@ INSTANTIATE_TEST_CASE_P( make_tuple(8, 4, &vp8_bilinear_predict8x4_neon), make_tuple(4, 4, &vp8_bilinear_predict4x4_neon))); #endif -#if HAVE_MMX -INSTANTIATE_TEST_CASE_P( - MMX, BilinearPredictTest, - ::testing::Values(make_tuple(8, 4, &vp8_bilinear_predict8x4_mmx), - make_tuple(4, 4, &vp8_bilinear_predict4x4_mmx))); -#endif #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, BilinearPredictTest, ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_sse2), - make_tuple(8, 8, &vp8_bilinear_predict8x8_sse2))); + make_tuple(8, 8, &vp8_bilinear_predict8x8_sse2), + make_tuple(8, 4, &vp8_bilinear_predict8x4_sse2), + make_tuple(4, 4, &vp8_bilinear_predict4x4_sse2))); #endif #if HAVE_SSSE3 INSTANTIATE_TEST_CASE_P( diff --git a/media/libvpx/libvpx/test/quantize_test.cc b/media/libvpx/libvpx/test/quantize_test.cc index 40bb2642e425..a7497742ceba 100644 --- a/media/libvpx/libvpx/test/quantize_test.cc +++ b/media/libvpx/libvpx/test/quantize_test.cc @@ -9,12 +9,14 @@ */ #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" -#include "./vpx_config.h" #include "./vp8_rtcd.h" +#include "./vpx_config.h" #include "test/acm_random.h" +#include "test/bench.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" @@ -33,10 +35,10 @@ const int kNumBlockEntries = 16; typedef void (*VP8Quantize)(BLOCK *b, BLOCKD *d); -typedef std::tr1::tuple VP8QuantizeParam; +typedef std::tuple VP8QuantizeParam; using libvpx_test::ACMRandom; -using std::tr1::make_tuple; +using std::make_tuple; // Create and populate a VP8_COMP instance which has a complete set of // quantization inputs as well as a second MACROBLOCKD for output. @@ -116,7 +118,8 @@ class QuantizeTestBase { }; class QuantizeTest : public QuantizeTestBase, - public ::testing::TestWithParam { + public ::testing::TestWithParam, + public AbstractBench { protected: virtual void SetUp() { SetupCompressor(); @@ -124,6 +127,10 @@ class QuantizeTest : public QuantizeTestBase, c_quant_ = GET_PARAM(1); } + virtual void Run() { + asm_quant_(&vp8_comp_->mb.block[0], ¯oblockd_dst_->block[0]); + } + void RunComparison() { for (int i = 0; i < kNumBlocks; ++i) { ASM_REGISTER_STATE_CHECK( @@ -166,6 +173,13 @@ TEST_P(QuantizeTest, TestMultipleQ) { } } +TEST_P(QuantizeTest, DISABLED_Speed) { + FillCoeffRandom(); + + RunNTimes(10000000); + PrintMedian("vp8 quantize"); +} + #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, QuantizeTest, diff --git a/media/libvpx/libvpx/test/register_state_check.h b/media/libvpx/libvpx/test/register_state_check.h index a779e5c06a5d..4366466378fe 100644 --- a/media/libvpx/libvpx/test/register_state_check.h +++ b/media/libvpx/libvpx/test/register_state_check.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_REGISTER_STATE_CHECK_H_ -#define TEST_REGISTER_STATE_CHECK_H_ +#ifndef VPX_TEST_REGISTER_STATE_CHECK_H_ +#define VPX_TEST_REGISTER_STATE_CHECK_H_ #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" @@ -28,7 +28,7 @@ // See platform implementations of RegisterStateCheckXXX for details. // -#if defined(_WIN64) +#if defined(_WIN64) && VPX_ARCH_X86_64 #undef NOMINMAX #define NOMINMAX @@ -138,9 +138,9 @@ class RegisterStateCheck {}; } // namespace libvpx_test -#endif // _WIN64 +#endif // _WIN64 && VPX_ARCH_X86_64 -#if ARCH_X86 || ARCH_X86_64 +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 #if defined(__GNUC__) namespace libvpx_test { @@ -178,10 +178,10 @@ class RegisterStateCheckMMX { } // namespace libvpx_test #endif // __GNUC__ -#endif // ARCH_X86 || ARCH_X86_64 +#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64 #ifndef API_REGISTER_STATE_CHECK #define API_REGISTER_STATE_CHECK ASM_REGISTER_STATE_CHECK #endif -#endif // TEST_REGISTER_STATE_CHECK_H_ +#endif // VPX_TEST_REGISTER_STATE_CHECK_H_ diff --git a/media/libvpx/libvpx/test/resize_test.cc b/media/libvpx/libvpx/test/resize_test.cc index e95dc6651a14..5f323db5abb3 100644 --- a/media/libvpx/libvpx/test/resize_test.cc +++ b/media/libvpx/libvpx/test/resize_test.cc @@ -93,7 +93,21 @@ struct FrameInfo { void ScaleForFrameNumber(unsigned int frame, unsigned int initial_w, unsigned int initial_h, unsigned int *w, - unsigned int *h, int flag_codec) { + unsigned int *h, bool flag_codec, + bool smaller_width_larger_size_) { + if (smaller_width_larger_size_) { + if (frame < 30) { + *w = initial_w; + *h = initial_h; + return; + } + if (frame < 100) { + *w = initial_w * 7 / 10; + *h = initial_h * 16 / 10; + return; + } + return; + } if (frame < 10) { *w = initial_w; *h = initial_h; @@ -248,8 +262,10 @@ class ResizingVideoSource : public ::libvpx_test::DummyVideoSource { ResizingVideoSource() { SetSize(kInitialWidth, kInitialHeight); limit_ = 350; + smaller_width_larger_size_ = false; } - int flag_codec_; + bool flag_codec_; + bool smaller_width_larger_size_; virtual ~ResizingVideoSource() {} protected: @@ -258,7 +274,7 @@ class ResizingVideoSource : public ::libvpx_test::DummyVideoSource { unsigned int width; unsigned int height; ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height, - flag_codec_); + flag_codec_, smaller_width_larger_size_); SetSize(width, height); FillFrame(); } @@ -277,17 +293,35 @@ class ResizeTest SetMode(GET_PARAM(1)); } + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + ASSERT_NE(static_cast(pkt->data.frame.width[0]), 0); + ASSERT_NE(static_cast(pkt->data.frame.height[0]), 0); + encode_frame_width_.push_back(pkt->data.frame.width[0]); + encode_frame_height_.push_back(pkt->data.frame.height[0]); + } + + unsigned int GetFrameWidth(size_t idx) const { + return encode_frame_width_[idx]; + } + + unsigned int GetFrameHeight(size_t idx) const { + return encode_frame_height_[idx]; + } + virtual void DecompressedFrameHook(const vpx_image_t &img, vpx_codec_pts_t pts) { frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h)); } std::vector frame_info_list_; + std::vector encode_frame_width_; + std::vector encode_frame_height_; }; TEST_P(ResizeTest, TestExternalResizeWorks) { ResizingVideoSource video; - video.flag_codec_ = 0; + video.flag_codec_ = false; + video.smaller_width_larger_size_ = false; cfg_.g_lag_in_frames = 0; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); @@ -296,8 +330,12 @@ TEST_P(ResizeTest, TestExternalResizeWorks) { const unsigned int frame = static_cast(info->pts); unsigned int expected_w; unsigned int expected_h; + const size_t idx = info - frame_info_list_.begin(); + ASSERT_EQ(info->w, GetFrameWidth(idx)); + ASSERT_EQ(info->h, GetFrameHeight(idx)); ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w, - &expected_h, 0); + &expected_h, video.flag_codec_, + video.smaller_width_larger_size_); EXPECT_EQ(expected_w, info->w) << "Frame " << frame << " had unexpected width"; EXPECT_EQ(expected_h, info->h) @@ -464,8 +502,23 @@ class ResizeRealtimeTest ++mismatch_nframes_; } + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + ASSERT_NE(static_cast(pkt->data.frame.width[0]), 0); + ASSERT_NE(static_cast(pkt->data.frame.height[0]), 0); + encode_frame_width_.push_back(pkt->data.frame.width[0]); + encode_frame_height_.push_back(pkt->data.frame.height[0]); + } + unsigned int GetMismatchFrames() { return mismatch_nframes_; } + unsigned int GetFrameWidth(size_t idx) const { + return encode_frame_width_[idx]; + } + + unsigned int GetFrameHeight(size_t idx) const { + return encode_frame_height_[idx]; + } + void DefaultConfig() { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 600; @@ -493,11 +546,14 @@ class ResizeRealtimeTest bool change_bitrate_; double mismatch_psnr_; int mismatch_nframes_; + std::vector encode_frame_width_; + std::vector encode_frame_height_; }; TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) { ResizingVideoSource video; - video.flag_codec_ = 1; + video.flag_codec_ = true; + video.smaller_width_larger_size_ = false; DefaultConfig(); // Disable internal resize for this test. cfg_.rc_resize_allowed = 0; @@ -512,7 +568,36 @@ TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) { unsigned int expected_w; unsigned int expected_h; ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w, - &expected_h, 1); + &expected_h, video.flag_codec_, + video.smaller_width_larger_size_); + EXPECT_EQ(expected_w, info->w) + << "Frame " << frame << " had unexpected width"; + EXPECT_EQ(expected_h, info->h) + << "Frame " << frame << " had unexpected height"; + EXPECT_EQ(static_cast(0), GetMismatchFrames()); + } +} + +TEST_P(ResizeRealtimeTest, DISABLED_TestExternalResizeSmallerWidthBiggerSize) { + ResizingVideoSource video; + video.flag_codec_ = true; + video.smaller_width_larger_size_ = true; + DefaultConfig(); + // Disable internal resize for this test. + cfg_.rc_resize_allowed = 0; + change_bitrate_ = false; + mismatch_psnr_ = 0.0; + mismatch_nframes_ = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + + for (std::vector::const_iterator info = frame_info_list_.begin(); + info != frame_info_list_.end(); ++info) { + const unsigned int frame = static_cast(info->pts); + unsigned int expected_w; + unsigned int expected_h; + ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w, + &expected_h, video.flag_codec_, + video.smaller_width_larger_size_); EXPECT_EQ(expected_w, info->w) << "Frame " << frame << " had unexpected width"; EXPECT_EQ(expected_h, info->h) @@ -582,6 +667,9 @@ TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) { int resize_count = 0; for (std::vector::const_iterator info = frame_info_list_.begin(); info != frame_info_list_.end(); ++info) { + const size_t idx = info - frame_info_list_.begin(); + ASSERT_EQ(info->w, GetFrameWidth(idx)); + ASSERT_EQ(info->h, GetFrameHeight(idx)); if (info->w != last_w || info->h != last_h) { resize_count++; if (resize_count == 1) { diff --git a/media/libvpx/libvpx/test/sad_test.cc b/media/libvpx/libvpx/test/sad_test.cc index 67c3c531506e..e39775cc4628 100644 --- a/media/libvpx/libvpx/test/sad_test.cc +++ b/media/libvpx/libvpx/test/sad_test.cc @@ -10,19 +10,21 @@ #include #include -#include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" +#include "test/bench.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vpx/vpx_codec.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" +#include "vpx_ports/msvc.h" +#include "vpx_ports/vpx_timer.h" template struct TestParams { @@ -46,6 +48,12 @@ typedef void (*SadMxNx4Func)(const uint8_t *src_ptr, int src_stride, unsigned int *sad_array); typedef TestParams SadMxNx4Param; +typedef void (*SadMxNx8Func)(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sad_array); + +typedef TestParams SadMxNx8Param; + using libvpx_test::ACMRandom; namespace { @@ -84,7 +92,7 @@ class SADTestBase : public ::testing::TestWithParam { #endif // CONFIG_VP9_HIGHBITDEPTH } mask_ = (1 << bit_depth_) - 1; - source_stride_ = (params_.width + 31) & ~31; + source_stride_ = (params_.width + 63) & ~63; reference_stride_ = params_.width * 2; rnd_.Reset(ACMRandom::DeterministicSeed()); } @@ -108,29 +116,43 @@ class SADTestBase : public ::testing::TestWithParam { protected: // Handle blocks up to 4 blocks 64x64 with stride up to 128 - static const int kDataAlignment = 16; + // crbug.com/webm/1660 + // const[expr] should be sufficient for DECLARE_ALIGNED but early + // implementations of c++11 appear to have some issues with it. + enum { kDataAlignment = 32 }; static const int kDataBlockSize = 64 * 128; static const int kDataBufferSize = 4 * kDataBlockSize; - uint8_t *GetReference(int block_idx) const { + int GetBlockRefOffset(int block_idx) const { + return block_idx * kDataBlockSize; + } + + uint8_t *GetReferenceFromOffset(int ref_offset) const { + assert((params_.height - 1) * reference_stride_ + params_.width - 1 + + ref_offset < + kDataBufferSize); #if CONFIG_VP9_HIGHBITDEPTH if (use_high_bit_depth_) { return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) + - block_idx * kDataBlockSize); + ref_offset); } #endif // CONFIG_VP9_HIGHBITDEPTH - return reference_data_ + block_idx * kDataBlockSize; + return reference_data_ + ref_offset; + } + + uint8_t *GetReference(int block_idx) const { + return GetReferenceFromOffset(GetBlockRefOffset(block_idx)); } // Sum of Absolute Differences. Given two blocks, calculate the absolute // difference between two pixels in the same relative location; accumulate. - uint32_t ReferenceSAD(int block_idx) const { + uint32_t ReferenceSAD(int ref_offset) const { uint32_t sad = 0; - const uint8_t *const reference8 = GetReference(block_idx); + const uint8_t *const reference8 = GetReferenceFromOffset(ref_offset); const uint8_t *const source8 = source_data_; #if CONFIG_VP9_HIGHBITDEPTH const uint16_t *const reference16 = - CONVERT_TO_SHORTPTR(GetReference(block_idx)); + CONVERT_TO_SHORTPTR(GetReferenceFromOffset(ref_offset)); const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_); #endif // CONFIG_VP9_HIGHBITDEPTH for (int h = 0; h < params_.height; ++h) { @@ -201,24 +223,28 @@ class SADTestBase : public ::testing::TestWithParam { } } - void FillRandom(uint8_t *data, int stride) { + void FillRandomWH(uint8_t *data, int stride, int w, int h) { uint8_t *data8 = data; #if CONFIG_VP9_HIGHBITDEPTH uint16_t *data16 = CONVERT_TO_SHORTPTR(data); #endif // CONFIG_VP9_HIGHBITDEPTH - for (int h = 0; h < params_.height; ++h) { - for (int w = 0; w < params_.width; ++w) { + for (int r = 0; r < h; ++r) { + for (int c = 0; c < w; ++c) { if (!use_high_bit_depth_) { - data8[h * stride + w] = rnd_.Rand8(); + data8[r * stride + c] = rnd_.Rand8(); #if CONFIG_VP9_HIGHBITDEPTH } else { - data16[h * stride + w] = rnd_.Rand16() & mask_; + data16[r * stride + c] = rnd_.Rand16() & mask_; #endif // CONFIG_VP9_HIGHBITDEPTH } } } } + void FillRandom(uint8_t *data, int stride) { + FillRandomWH(data, stride, params_.width, params_.height); + } + uint32_t mask_; vpx_bit_depth_t bit_depth_; int source_stride_; @@ -239,6 +265,30 @@ class SADTestBase : public ::testing::TestWithParam { ParamType params_; }; +class SADx8Test : public SADTestBase { + public: + SADx8Test() : SADTestBase(GetParam()) {} + + protected: + void SADs(unsigned int *results) const { + const uint8_t *reference = GetReferenceFromOffset(0); + + ASM_REGISTER_STATE_CHECK(params_.func( + source_data_, source_stride_, reference, reference_stride_, results)); + } + + void CheckSADs() const { + uint32_t reference_sad; + DECLARE_ALIGNED(kDataAlignment, uint32_t, exp_sad[8]); + + SADs(exp_sad); + for (int offset = 0; offset < 8; ++offset) { + reference_sad = ReferenceSAD(offset); + EXPECT_EQ(reference_sad, exp_sad[offset]) << "offset " << offset; + } + } +}; + class SADx4Test : public SADTestBase { public: SADx4Test() : SADTestBase(GetParam()) {} @@ -253,18 +303,19 @@ class SADx4Test : public SADTestBase { } void CheckSADs() const { - uint32_t reference_sad, exp_sad[4]; + uint32_t reference_sad; + DECLARE_ALIGNED(kDataAlignment, uint32_t, exp_sad[4]); SADs(exp_sad); for (int block = 0; block < 4; ++block) { - reference_sad = ReferenceSAD(block); + reference_sad = ReferenceSAD(GetBlockRefOffset(block)); EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block; } } }; -class SADTest : public SADTestBase { +class SADTest : public AbstractBench, public SADTestBase { public: SADTest() : SADTestBase(GetParam()) {} @@ -279,11 +330,16 @@ class SADTest : public SADTestBase { } void CheckSAD() const { - const unsigned int reference_sad = ReferenceSAD(0); + const unsigned int reference_sad = ReferenceSAD(GetBlockRefOffset(0)); const unsigned int exp_sad = SAD(0); ASSERT_EQ(reference_sad, exp_sad); } + + void Run() { + params_.func(source_data_, source_stride_, reference_data_, + reference_stride_); + } }; class SADavgTest : public SADTestBase { @@ -350,6 +406,17 @@ TEST_P(SADTest, ShortSrc) { source_stride_ = tmp_stride; } +TEST_P(SADTest, DISABLED_Speed) { + const int kCountSpeedTestBlock = 50000000 / (params_.width * params_.height); + FillRandom(source_data_, source_stride_); + + RunNTimes(kCountSpeedTestBlock); + + char title[16]; + snprintf(title, sizeof(title), "%dx%d", params_.width, params_.height); + PrintMedian(title); +} + TEST_P(SADavgTest, MaxRef) { FillConstant(source_data_, source_stride_, 0); FillConstant(reference_data_, reference_stride_, mask_); @@ -463,6 +530,46 @@ TEST_P(SADx4Test, SrcAlignedByWidth) { source_data_ = tmp_source_data; } +TEST_P(SADx4Test, DISABLED_Speed) { + int tmp_stride = reference_stride_; + reference_stride_ -= 1; + FillRandom(source_data_, source_stride_); + FillRandom(GetReference(0), reference_stride_); + FillRandom(GetReference(1), reference_stride_); + FillRandom(GetReference(2), reference_stride_); + FillRandom(GetReference(3), reference_stride_); + const int kCountSpeedTestBlock = 500000000 / (params_.width * params_.height); + uint32_t reference_sad[4]; + DECLARE_ALIGNED(kDataAlignment, uint32_t, exp_sad[4]); + vpx_usec_timer timer; + + memset(reference_sad, 0, sizeof(reference_sad)); + SADs(exp_sad); + vpx_usec_timer_start(&timer); + for (int i = 0; i < kCountSpeedTestBlock; ++i) { + for (int block = 0; block < 4; ++block) { + reference_sad[block] = ReferenceSAD(GetBlockRefOffset(block)); + } + } + vpx_usec_timer_mark(&timer); + for (int block = 0; block < 4; ++block) { + EXPECT_EQ(reference_sad[block], exp_sad[block]) << "block " << block; + } + const int elapsed_time = + static_cast(vpx_usec_timer_elapsed(&timer) / 1000); + printf("sad%dx%dx4 (%2dbit) time: %5d ms\n", params_.width, params_.height, + bit_depth_, elapsed_time); + + reference_stride_ = tmp_stride; +} + +TEST_P(SADx8Test, Regular) { + FillRandomWH(source_data_, source_stride_, params_.width, params_.height); + FillRandomWH(GetReferenceFromOffset(0), reference_stride_, params_.width + 8, + params_.height); + CheckSADs(); +} + //------------------------------------------------------------------------------ // C functions const SadMxNParam c_tests[] = { @@ -639,6 +746,24 @@ const SadMxNx4Param x4d_c_tests[] = { }; INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests)); +// TODO(angiebird): implement the marked-down sad functions +const SadMxNx8Param x8_c_tests[] = { + // SadMxNx8Param(64, 64, &vpx_sad64x64x8_c), + // SadMxNx8Param(64, 32, &vpx_sad64x32x8_c), + // SadMxNx8Param(32, 64, &vpx_sad32x64x8_c), + SadMxNx8Param(32, 32, &vpx_sad32x32x8_c), + // SadMxNx8Param(32, 16, &vpx_sad32x16x8_c), + // SadMxNx8Param(16, 32, &vpx_sad16x32x8_c), + SadMxNx8Param(16, 16, &vpx_sad16x16x8_c), + SadMxNx8Param(16, 8, &vpx_sad16x8x8_c), + SadMxNx8Param(8, 16, &vpx_sad8x16x8_c), + SadMxNx8Param(8, 8, &vpx_sad8x8x8_c), + // SadMxNx8Param(8, 4, &vpx_sad8x4x8_c), + // SadMxNx8Param(4, 8, &vpx_sad4x8x8_c), + SadMxNx8Param(4, 4, &vpx_sad4x4x8_c), +}; +INSTANTIATE_TEST_CASE_P(C, SADx8Test, ::testing::ValuesIn(x8_c_tests)); + //------------------------------------------------------------------------------ // ARM functions #if HAVE_NEON @@ -867,7 +992,15 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests)); #endif // HAVE_SSSE3 #if HAVE_SSE4_1 -// Only functions are x8, which do not have tests. +const SadMxNx8Param x8_sse4_1_tests[] = { + SadMxNx8Param(16, 16, &vpx_sad16x16x8_sse4_1), + SadMxNx8Param(16, 8, &vpx_sad16x8x8_sse4_1), + SadMxNx8Param(8, 16, &vpx_sad8x16x8_sse4_1), + SadMxNx8Param(8, 8, &vpx_sad8x8x8_sse4_1), + SadMxNx8Param(4, 4, &vpx_sad4x4x8_sse4_1), +}; +INSTANTIATE_TEST_CASE_P(SSE4_1, SADx8Test, + ::testing::ValuesIn(x8_sse4_1_tests)); #endif // HAVE_SSE4_1 #if HAVE_AVX2 @@ -894,6 +1027,12 @@ const SadMxNx4Param x4d_avx2_tests[] = { SadMxNx4Param(32, 32, &vpx_sad32x32x4d_avx2), }; INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests)); + +const SadMxNx8Param x8_avx2_tests[] = { + // SadMxNx8Param(64, 64, &vpx_sad64x64x8_c), + SadMxNx8Param(32, 32, &vpx_sad32x32x8_avx2), +}; +INSTANTIATE_TEST_CASE_P(AVX2, SADx8Test, ::testing::ValuesIn(x8_avx2_tests)); #endif // HAVE_AVX2 #if HAVE_AVX512 @@ -971,6 +1110,9 @@ const SadMxNParam vsx_tests[] = { SadMxNParam(16, 32, &vpx_sad16x32_vsx), SadMxNParam(16, 16, &vpx_sad16x16_vsx), SadMxNParam(16, 8, &vpx_sad16x8_vsx), + SadMxNParam(8, 16, &vpx_sad8x16_vsx), + SadMxNParam(8, 8, &vpx_sad8x8_vsx), + SadMxNParam(8, 4, &vpx_sad8x4_vsx), }; INSTANTIATE_TEST_CASE_P(VSX, SADTest, ::testing::ValuesIn(vsx_tests)); diff --git a/media/libvpx/libvpx/test/simple_encode_test.cc b/media/libvpx/libvpx/test/simple_encode_test.cc new file mode 100644 index 000000000000..e93a1a46903a --- /dev/null +++ b/media/libvpx/libvpx/test/simple_encode_test.cc @@ -0,0 +1,154 @@ +#include +#include +#include +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "vp9/simple_encode.h" + +namespace vp9 { +namespace { + +// TODO(angirbid): Find a better way to construct encode info +const int w = 352; +const int h = 288; +const int frame_rate_num = 30; +const int frame_rate_den = 1; +const int target_bitrate = 1000; +const int num_frames = 17; +const char infile_path[] = "bus_352x288_420_f20_b8.yuv"; + +double GetBitrateInKbps(size_t bit_size, int num_frames, int frame_rate_num, + int frame_rate_den) { + return static_cast(bit_size) / num_frames * frame_rate_num / + frame_rate_den / 1000.0; +} + +TEST(SimpleEncode, ComputeFirstPassStats) { + SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den, + target_bitrate, num_frames, infile_path); + simple_encode.ComputeFirstPassStats(); + std::vector> frame_stats = + simple_encode.ObserveFirstPassStats(); + EXPECT_EQ(frame_stats.size(), static_cast(num_frames)); + size_t data_num = frame_stats[0].size(); + // Read ObserveFirstPassStats before changing FIRSTPASS_STATS. + EXPECT_EQ(data_num, static_cast(25)); + for (size_t i = 0; i < frame_stats.size(); ++i) { + EXPECT_EQ(frame_stats[i].size(), data_num); + // FIRSTPASS_STATS's first element is frame + EXPECT_EQ(frame_stats[i][0], i); + // FIRSTPASS_STATS's last element is count, and the count is 1 for single + // frame stats + EXPECT_EQ(frame_stats[i][data_num - 1], 1); + } +} + +TEST(SimpleEncode, GetCodingFrameNum) { + SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den, + target_bitrate, num_frames, infile_path); + simple_encode.ComputeFirstPassStats(); + int num_coding_frames = simple_encode.GetCodingFrameNum(); + EXPECT_EQ(num_coding_frames, 19); +} + +TEST(SimpleEncode, EncodeFrame) { + SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den, + target_bitrate, num_frames, infile_path); + simple_encode.ComputeFirstPassStats(); + int num_coding_frames = simple_encode.GetCodingFrameNum(); + EXPECT_GE(num_coding_frames, num_frames); + // The coding frames include actual show frames and alternate reference + // frames, i.e. no show frame. + int ref_num_alternate_refereces = num_coding_frames - num_frames; + int num_alternate_refereces = 0; + simple_encode.StartEncode(); + size_t total_data_bit_size = 0; + for (int i = 0; i < num_coding_frames; ++i) { + EncodeFrameResult encode_frame_result; + simple_encode.EncodeFrame(&encode_frame_result); + if (i == 0) { + EXPECT_EQ(encode_frame_result.show_idx, 0); + EXPECT_EQ(encode_frame_result.frame_type, kKeyFrame) + << "The first coding frame should be key frame"; + } + if (encode_frame_result.frame_type == kAlternateReference) { + ++num_alternate_refereces; + } + EXPECT_GE(encode_frame_result.show_idx, 0); + EXPECT_LT(encode_frame_result.show_idx, num_frames); + if (i == num_coding_frames - 1) { + EXPECT_EQ(encode_frame_result.show_idx, num_frames - 1) + << "The last coding frame should be the last display order"; + } + EXPECT_GE(encode_frame_result.psnr, 34) + << "The psnr is supposed to be greater than 34 given the " + "target_bitrate 1000 kbps"; + total_data_bit_size += encode_frame_result.coding_data_bit_size; + } + EXPECT_EQ(num_alternate_refereces, ref_num_alternate_refereces); + const double bitrate = GetBitrateInKbps(total_data_bit_size, num_frames, + frame_rate_num, frame_rate_den); + const double off_target_threshold = 150; + EXPECT_LE(fabs(target_bitrate - bitrate), off_target_threshold); + simple_encode.EndEncode(); +} + +TEST(SimpleEncode, EncodeFrameWithQuantizeIndex) { + SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den, + target_bitrate, num_frames, infile_path); + simple_encode.ComputeFirstPassStats(); + int num_coding_frames = simple_encode.GetCodingFrameNum(); + simple_encode.StartEncode(); + for (int i = 0; i < num_coding_frames; ++i) { + const int assigned_quantize_index = 100 + i; + EncodeFrameResult encode_frame_result; + simple_encode.EncodeFrameWithQuantizeIndex(&encode_frame_result, + assigned_quantize_index); + EXPECT_EQ(encode_frame_result.quantize_index, assigned_quantize_index); + } + simple_encode.EndEncode(); +} + +TEST(SimpleEncode, EncodeConsistencyTest) { + std::vector quantize_index_list; + std::vector ref_sse_list; + std::vector ref_psnr_list; + std::vector ref_bit_size_list; + { + SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den, + target_bitrate, num_frames, infile_path); + simple_encode.ComputeFirstPassStats(); + const int num_coding_frames = simple_encode.GetCodingFrameNum(); + simple_encode.StartEncode(); + for (int i = 0; i < num_coding_frames; ++i) { + EncodeFrameResult encode_frame_result; + simple_encode.EncodeFrame(&encode_frame_result); + quantize_index_list.push_back(encode_frame_result.quantize_index); + ref_sse_list.push_back(encode_frame_result.sse); + ref_psnr_list.push_back(encode_frame_result.psnr); + ref_bit_size_list.push_back(encode_frame_result.coding_data_bit_size); + } + simple_encode.EndEncode(); + } + { + SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den, + target_bitrate, num_frames, infile_path); + simple_encode.ComputeFirstPassStats(); + const int num_coding_frames = simple_encode.GetCodingFrameNum(); + EXPECT_EQ(static_cast(num_coding_frames), + quantize_index_list.size()); + simple_encode.StartEncode(); + for (int i = 0; i < num_coding_frames; ++i) { + EncodeFrameResult encode_frame_result; + simple_encode.EncodeFrameWithQuantizeIndex(&encode_frame_result, + quantize_index_list[i]); + EXPECT_EQ(encode_frame_result.quantize_index, quantize_index_list[i]); + EXPECT_EQ(encode_frame_result.sse, ref_sse_list[i]); + EXPECT_DOUBLE_EQ(encode_frame_result.psnr, ref_psnr_list[i]); + EXPECT_EQ(encode_frame_result.coding_data_bit_size, ref_bit_size_list[i]); + } + simple_encode.EndEncode(); + } +} +} // namespace + +} // namespace vp9 diff --git a/media/libvpx/libvpx/test/stress.sh b/media/libvpx/libvpx/test/stress.sh index a899c800ca63..fdec764c7aef 100755 --- a/media/libvpx/libvpx/test/stress.sh +++ b/media/libvpx/libvpx/test/stress.sh @@ -30,7 +30,7 @@ SHA1_FILE="$(dirname $0)/test-data.sha1" # Download a file from the url and check its sha1sum. download_and_check_file() { # Get the file from the file path. - local readonly root="${1#${LIBVPX_TEST_DATA_PATH}/}" + local root="${1#${LIBVPX_TEST_DATA_PATH}/}" # Download the file using curl. Trap to insure non partial file. (trap "rm -f $1" INT TERM \ @@ -72,13 +72,13 @@ stress_verify_environment() { # This function runs tests on libvpx that run multiple encodes and decodes # in parallel in hopes of catching synchronization and/or threading issues. stress() { - local readonly decoder="$(vpx_tool_path vpxdec)" - local readonly encoder="$(vpx_tool_path vpxenc)" - local readonly codec="$1" - local readonly webm="$2" - local readonly decode_count="$3" - local readonly threads="$4" - local readonly enc_args="$5" + local decoder="$(vpx_tool_path vpxdec)" + local encoder="$(vpx_tool_path vpxenc)" + local codec="$1" + local webm="$2" + local decode_count="$3" + local threads="$4" + local enc_args="$5" local pids="" local rt_max_jobs=${STRESS_RT_MAX_JOBS:-5} local onepass_max_jobs=${STRESS_ONEPASS_MAX_JOBS:-5} @@ -144,6 +144,19 @@ vp8_stress_test() { fi } +vp8_stress_test_token_parititions() { + local vp8_max_jobs=${STRESS_VP8_DECODE_MAX_JOBS:-40} + if [ "$(vp8_decode_available)" = "yes" -a \ + "$(vp8_encode_available)" = "yes" ]; then + for threads in 2 4 8; do + for token_partitions in 1 2 3; do + stress vp8 "${VP8}" "${vp8_max_jobs}" ${threads} \ + "--token-parts=$token_partitions" + done + done + fi +} + vp9_stress() { local vp9_max_jobs=${STRESS_VP9_DECODE_MAX_JOBS:-25} @@ -154,16 +167,17 @@ vp9_stress() { } vp9_stress_test() { - for threads in 4 8 100; do + for threads in 4 8 64; do vp9_stress "$threads" "--row-mt=0" done } vp9_stress_test_row_mt() { - for threads in 4 8 100; do + for threads in 4 8 64; do vp9_stress "$threads" "--row-mt=1" done } run_tests stress_verify_environment \ - "vp8_stress_test vp9_stress_test vp9_stress_test_row_mt" + "vp8_stress_test vp8_stress_test_token_parititions + vp9_stress_test vp9_stress_test_row_mt" diff --git a/media/libvpx/libvpx/test/sum_squares_test.cc b/media/libvpx/libvpx/test/sum_squares_test.cc index 9c407c649f41..d2c70f4d4b4d 100644 --- a/media/libvpx/libvpx/test/sum_squares_test.cc +++ b/media/libvpx/libvpx/test/sum_squares_test.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -28,7 +29,7 @@ namespace { const int kNumIterations = 10000; typedef uint64_t (*SSI16Func)(const int16_t *src, int stride, int size); -typedef std::tr1::tuple SumSquaresParam; +typedef std::tuple SumSquaresParam; class SumSquaresTest : public ::testing::TestWithParam { public: @@ -102,7 +103,14 @@ TEST_P(SumSquaresTest, ExtremeValues) { } } -using std::tr1::make_tuple; +using std::make_tuple; + +#if HAVE_NEON +INSTANTIATE_TEST_CASE_P( + NEON, SumSquaresTest, + ::testing::Values(make_tuple(&vpx_sum_squares_2d_i16_c, + &vpx_sum_squares_2d_i16_neon))); +#endif // HAVE_NEON #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( @@ -112,8 +120,9 @@ INSTANTIATE_TEST_CASE_P( #endif // HAVE_SSE2 #if HAVE_MSA -INSTANTIATE_TEST_CASE_P(MSA, SumSquaresTest, ::testing::Values(make_tuple( - &vpx_sum_squares_2d_i16_c, - &vpx_sum_squares_2d_i16_msa))); +INSTANTIATE_TEST_CASE_P( + MSA, SumSquaresTest, + ::testing::Values(make_tuple(&vpx_sum_squares_2d_i16_c, + &vpx_sum_squares_2d_i16_msa))); #endif // HAVE_MSA } // namespace diff --git a/media/libvpx/libvpx/test/superframe_test.cc b/media/libvpx/libvpx/test/superframe_test.cc index 421dfccd609d..8c8d1ae2904f 100644 --- a/media/libvpx/libvpx/test/superframe_test.cc +++ b/media/libvpx/libvpx/test/superframe_test.cc @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ #include +#include + #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" @@ -18,7 +20,7 @@ namespace { const int kTestMode = 0; -typedef std::tr1::tuple SuperframeTestParam; +typedef std::tuple SuperframeTestParam; class SuperframeTest : public ::libvpx_test::EncoderTest, @@ -31,7 +33,7 @@ class SuperframeTest virtual void SetUp() { InitializeConfig(); const SuperframeTestParam input = GET_PARAM(1); - const libvpx_test::TestMode mode = std::tr1::get(input); + const libvpx_test::TestMode mode = std::get(input); SetMode(mode); sf_count_ = 0; sf_count_max_ = INT_MAX; @@ -41,7 +43,7 @@ class SuperframeTest virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { - if (video->frame() == 1) { + if (video->frame() == 0) { encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); } } diff --git a/media/libvpx/libvpx/test/svc_datarate_test.cc b/media/libvpx/libvpx/test/svc_datarate_test.cc new file mode 100644 index 000000000000..d6b247723f2e --- /dev/null +++ b/media/libvpx/libvpx/test/svc_datarate_test.cc @@ -0,0 +1,1428 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "./vpx_config.h" +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/svc_test.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "vp9/common/vp9_onyxc_int.h" +#include "vpx/vpx_codec.h" +#include "vpx_ports/bitops.h" + +namespace svc_test { +namespace { + +typedef enum { + // Inter-layer prediction is on on all frames. + INTER_LAYER_PRED_ON, + // Inter-layer prediction is off on all frames. + INTER_LAYER_PRED_OFF, + // Inter-layer prediction is off on non-key frames and non-sync frames. + INTER_LAYER_PRED_OFF_NONKEY, + // Inter-layer prediction is on on all frames, but constrained such + // that any layer S (> 0) can only predict from previous spatial + // layer S-1, from the same superframe. + INTER_LAYER_PRED_ON_CONSTRAINED +} INTER_LAYER_PRED; + +class DatarateOnePassCbrSvc : public OnePassCbrSvc { + public: + explicit DatarateOnePassCbrSvc(const ::libvpx_test::CodecFactory *codec) + : OnePassCbrSvc(codec) { + inter_layer_pred_mode_ = 0; + } + + protected: + virtual ~DatarateOnePassCbrSvc() {} + + virtual void ResetModel() { + last_pts_ = 0; + duration_ = 0.0; + mismatch_psnr_ = 0.0; + mismatch_nframes_ = 0; + denoiser_on_ = 0; + tune_content_ = 0; + base_speed_setting_ = 5; + spatial_layer_id_ = 0; + temporal_layer_id_ = 0; + update_pattern_ = 0; + memset(bits_in_buffer_model_, 0, sizeof(bits_in_buffer_model_)); + memset(bits_total_, 0, sizeof(bits_total_)); + memset(layer_target_avg_bandwidth_, 0, sizeof(layer_target_avg_bandwidth_)); + dynamic_drop_layer_ = false; + change_bitrate_ = false; + last_pts_ref_ = 0; + middle_bitrate_ = 0; + top_bitrate_ = 0; + superframe_count_ = -1; + key_frame_spacing_ = 9999; + num_nonref_frames_ = 0; + layer_framedrop_ = 0; + force_key_ = 0; + force_key_test_ = 0; + insert_layer_sync_ = 0; + layer_sync_on_base_ = 0; + force_intra_only_frame_ = 0; + superframe_has_intra_only_ = 0; + use_post_encode_drop_ = 0; + denoiser_off_on_ = false; + denoiser_enable_layers_ = false; + } + virtual void BeginPassHook(unsigned int /*pass*/) {} + + // Example pattern for spatial layers and 2 temporal layers used in the + // bypass/flexible mode. The pattern corresponds to the pattern + // VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in + // non-flexible mode, except that we disable inter-layer prediction. + void set_frame_flags_bypass_mode( + int tl, int num_spatial_layers, int is_key_frame, + vpx_svc_ref_frame_config_t *ref_frame_config) { + for (int sl = 0; sl < num_spatial_layers; ++sl) + ref_frame_config->update_buffer_slot[sl] = 0; + + for (int sl = 0; sl < num_spatial_layers; ++sl) { + if (tl == 0) { + ref_frame_config->lst_fb_idx[sl] = sl; + if (sl) { + if (is_key_frame) { + ref_frame_config->lst_fb_idx[sl] = sl - 1; + ref_frame_config->gld_fb_idx[sl] = sl; + } else { + ref_frame_config->gld_fb_idx[sl] = sl - 1; + } + } else { + ref_frame_config->gld_fb_idx[sl] = 0; + } + ref_frame_config->alt_fb_idx[sl] = 0; + } else if (tl == 1) { + ref_frame_config->lst_fb_idx[sl] = sl; + ref_frame_config->gld_fb_idx[sl] = + VPXMIN(REF_FRAMES - 1, num_spatial_layers + sl - 1); + ref_frame_config->alt_fb_idx[sl] = + VPXMIN(REF_FRAMES - 1, num_spatial_layers + sl); + } + if (!tl) { + if (!sl) { + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 0; + ref_frame_config->reference_alt_ref[sl] = 0; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->lst_fb_idx[sl]; + } else { + if (is_key_frame) { + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 0; + ref_frame_config->reference_alt_ref[sl] = 0; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->gld_fb_idx[sl]; + } else { + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 0; + ref_frame_config->reference_alt_ref[sl] = 0; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->lst_fb_idx[sl]; + } + } + } else if (tl == 1) { + if (!sl) { + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 0; + ref_frame_config->reference_alt_ref[sl] = 0; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->alt_fb_idx[sl]; + } else { + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 0; + ref_frame_config->reference_alt_ref[sl] = 0; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->alt_fb_idx[sl]; + } + } + } + } + + void CheckLayerRateTargeting(int num_spatial_layers, int num_temporal_layers, + double thresh_overshoot, + double thresh_undershoot) const { + for (int sl = 0; sl < num_spatial_layers; ++sl) + for (int tl = 0; tl < num_temporal_layers; ++tl) { + const int layer = sl * num_temporal_layers + tl; + ASSERT_GE(cfg_.layer_target_bitrate[layer], + file_datarate_[layer] * thresh_overshoot) + << " The datarate for the file exceeds the target by too much!"; + ASSERT_LE(cfg_.layer_target_bitrate[layer], + file_datarate_[layer] * thresh_undershoot) + << " The datarate for the file is lower than the target by too " + "much!"; + } + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + PreEncodeFrameHookSetup(video, encoder); + + if (video->frame() == 0) { + if (force_intra_only_frame_) { + // Decoder sets the color_space for Intra-only frames + // to BT_601 (see line 1810 in vp9_decodeframe.c). + // So set it here in these tess to avoid encoder-decoder + // mismatch check on color space setting. + encoder->Control(VP9E_SET_COLOR_SPACE, VPX_CS_BT_601); + } + encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_); + encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_); + encoder->Control(VP9E_SET_SVC_INTER_LAYER_PRED, inter_layer_pred_mode_); + + if (layer_framedrop_) { + vpx_svc_frame_drop_t svc_drop_frame; + svc_drop_frame.framedrop_mode = LAYER_DROP; + for (int i = 0; i < number_spatial_layers_; i++) + svc_drop_frame.framedrop_thresh[i] = 30; + svc_drop_frame.max_consec_drop = 30; + encoder->Control(VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame); + } + + if (use_post_encode_drop_) { + encoder->Control(VP9E_SET_POSTENCODE_DROP, use_post_encode_drop_); + } + } + + if (denoiser_off_on_) { + encoder->Control(VP9E_SET_AQ_MODE, 3); + // Set inter_layer_pred to INTER_LAYER_PRED_OFF_NONKEY (K-SVC). + encoder->Control(VP9E_SET_SVC_INTER_LAYER_PRED, 2); + if (!denoiser_enable_layers_) { + if (video->frame() == 0) + encoder->Control(VP9E_SET_NOISE_SENSITIVITY, 0); + else if (video->frame() == 100) + encoder->Control(VP9E_SET_NOISE_SENSITIVITY, 1); + } else { + // Cumulative bitrates for top spatial layers, for + // 3 temporal layers. + if (video->frame() == 0) { + encoder->Control(VP9E_SET_NOISE_SENSITIVITY, 0); + // Change layer bitrates to set top spatial layer to 0. + // This is for 3 spatial 3 temporal layers. + // This will trigger skip encoding/dropping of top spatial layer. + cfg_.rc_target_bitrate -= cfg_.layer_target_bitrate[8]; + for (int i = 0; i < 3; i++) + bitrate_sl3_[i] = cfg_.layer_target_bitrate[i + 6]; + cfg_.layer_target_bitrate[6] = 0; + cfg_.layer_target_bitrate[7] = 0; + cfg_.layer_target_bitrate[8] = 0; + encoder->Config(&cfg_); + } else if (video->frame() == 100) { + // Change layer bitrates to non-zero on top spatial layer. + // This will trigger skip encoding of top spatial layer + // on key frame (period = 100). + for (int i = 0; i < 3; i++) + cfg_.layer_target_bitrate[i + 6] = bitrate_sl3_[i]; + cfg_.rc_target_bitrate += cfg_.layer_target_bitrate[8]; + encoder->Config(&cfg_); + } else if (video->frame() == 120) { + // Enable denoiser and top spatial layer after key frame (period is + // 100). + encoder->Control(VP9E_SET_NOISE_SENSITIVITY, 1); + } + } + } + + if (update_pattern_ && video->frame() >= 100) { + vpx_svc_layer_id_t layer_id; + if (video->frame() == 100) { + cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; + encoder->Config(&cfg_); + } + // Set layer id since the pattern changed. + layer_id.spatial_layer_id = 0; + layer_id.temporal_layer_id = (video->frame() % 2 != 0); + temporal_layer_id_ = layer_id.temporal_layer_id; + for (int i = 0; i < number_spatial_layers_; i++) + layer_id.temporal_layer_id_per_spatial[i] = temporal_layer_id_; + encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id); + set_frame_flags_bypass_mode(layer_id.temporal_layer_id, + number_spatial_layers_, 0, &ref_frame_config); + encoder->Control(VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config); + } + + if (change_bitrate_ && video->frame() == 200) { + duration_ = (last_pts_ + 1) * timebase_; + for (int sl = 0; sl < number_spatial_layers_; ++sl) { + for (int tl = 0; tl < number_temporal_layers_; ++tl) { + const int layer = sl * number_temporal_layers_ + tl; + const double file_size_in_kb = bits_total_[layer] / 1000.; + file_datarate_[layer] = file_size_in_kb / duration_; + } + } + + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, + 0.78, 1.15); + + memset(file_datarate_, 0, sizeof(file_datarate_)); + memset(bits_total_, 0, sizeof(bits_total_)); + int64_t bits_in_buffer_model_tmp[VPX_MAX_LAYERS]; + last_pts_ref_ = last_pts_; + // Set new target bitarate. + cfg_.rc_target_bitrate = cfg_.rc_target_bitrate >> 1; + // Buffer level should not reset on dynamic bitrate change. + memcpy(bits_in_buffer_model_tmp, bits_in_buffer_model_, + sizeof(bits_in_buffer_model_)); + AssignLayerBitrates(); + memcpy(bits_in_buffer_model_, bits_in_buffer_model_tmp, + sizeof(bits_in_buffer_model_)); + + // Change config to update encoder with new bitrate configuration. + encoder->Config(&cfg_); + } + + if (dynamic_drop_layer_) { + // TODO(jian): Disable AQ Mode for this test for now. + encoder->Control(VP9E_SET_AQ_MODE, 0); + if (video->frame() == 0) { + // Change layer bitrates to set top layers to 0. This will trigger skip + // encoding/dropping of top two spatial layers. + cfg_.rc_target_bitrate -= + (cfg_.layer_target_bitrate[1] + cfg_.layer_target_bitrate[2]); + middle_bitrate_ = cfg_.layer_target_bitrate[1]; + top_bitrate_ = cfg_.layer_target_bitrate[2]; + cfg_.layer_target_bitrate[1] = 0; + cfg_.layer_target_bitrate[2] = 0; + encoder->Config(&cfg_); + } else if (video->frame() == 50) { + // Change layer bitrates to non-zero on two top spatial layers. + // This will trigger skip encoding of top two spatial layers. + cfg_.layer_target_bitrate[1] = middle_bitrate_; + cfg_.layer_target_bitrate[2] = top_bitrate_; + cfg_.rc_target_bitrate += + cfg_.layer_target_bitrate[2] + cfg_.layer_target_bitrate[1]; + encoder->Config(&cfg_); + } else if (video->frame() == 100) { + // Change layer bitrates to set top layers to 0. This will trigger skip + // encoding/dropping of top two spatial layers. + cfg_.rc_target_bitrate -= + (cfg_.layer_target_bitrate[1] + cfg_.layer_target_bitrate[2]); + middle_bitrate_ = cfg_.layer_target_bitrate[1]; + top_bitrate_ = cfg_.layer_target_bitrate[2]; + cfg_.layer_target_bitrate[1] = 0; + cfg_.layer_target_bitrate[2] = 0; + encoder->Config(&cfg_); + } else if (video->frame() == 150) { + // Change layer bitrate on second layer to non-zero to start + // encoding it again. + cfg_.layer_target_bitrate[1] = middle_bitrate_; + cfg_.rc_target_bitrate += cfg_.layer_target_bitrate[1]; + encoder->Config(&cfg_); + } else if (video->frame() == 200) { + // Change layer bitrate on top layer to non-zero to start + // encoding it again. + cfg_.layer_target_bitrate[2] = top_bitrate_; + cfg_.rc_target_bitrate += cfg_.layer_target_bitrate[2]; + encoder->Config(&cfg_); + } + } + + if (force_key_test_ && force_key_) frame_flags_ = VPX_EFLAG_FORCE_KF; + + if (insert_layer_sync_) { + vpx_svc_spatial_layer_sync_t svc_layer_sync; + svc_layer_sync.base_layer_intra_only = 0; + for (int i = 0; i < number_spatial_layers_; i++) + svc_layer_sync.spatial_layer_sync[i] = 0; + if (force_intra_only_frame_) { + superframe_has_intra_only_ = 0; + if (video->frame() == 0) { + svc_layer_sync.base_layer_intra_only = 1; + svc_layer_sync.spatial_layer_sync[0] = 1; + encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync); + superframe_has_intra_only_ = 1; + } else if (video->frame() == 100) { + svc_layer_sync.base_layer_intra_only = 1; + svc_layer_sync.spatial_layer_sync[0] = 1; + encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync); + superframe_has_intra_only_ = 1; + } + } else { + layer_sync_on_base_ = 0; + if (video->frame() == 150) { + svc_layer_sync.spatial_layer_sync[1] = 1; + encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync); + } else if (video->frame() == 240) { + svc_layer_sync.spatial_layer_sync[2] = 1; + encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync); + } else if (video->frame() == 320) { + svc_layer_sync.spatial_layer_sync[0] = 1; + layer_sync_on_base_ = 1; + encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync); + } + } + } + + const vpx_rational_t tb = video->timebase(); + timebase_ = static_cast(tb.num) / tb.den; + duration_ = 0; + } + + vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz, + uint32_t sizes[8], int *count) { + uint8_t marker; + marker = *(data + data_sz - 1); + *count = 0; + if ((marker & 0xe0) == 0xc0) { + const uint32_t frames = (marker & 0x7) + 1; + const uint32_t mag = ((marker >> 3) & 0x3) + 1; + const size_t index_sz = 2 + mag * frames; + // This chunk is marked as having a superframe index but doesn't have + // enough data for it, thus it's an invalid superframe index. + if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME; + { + const uint8_t marker2 = *(data + data_sz - index_sz); + // This chunk is marked as having a superframe index but doesn't have + // the matching marker byte at the front of the index therefore it's an + // invalid chunk. + if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME; + } + { + uint32_t i, j; + const uint8_t *x = &data[data_sz - index_sz + 1]; + for (i = 0; i < frames; ++i) { + uint32_t this_sz = 0; + + for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8); + sizes[i] = this_sz; + } + *count = frames; + } + } + return VPX_CODEC_OK; + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + uint32_t sizes[8] = { 0 }; + uint32_t sizes_parsed[8] = { 0 }; + int count = 0; + int num_layers_encoded = 0; + last_pts_ = pkt->data.frame.pts; + const bool key_frame = + (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; + if (key_frame) { + // For test that inserts layer sync frames: requesting a layer_sync on + // the base layer must force key frame. So if any key frame occurs after + // first superframe it must due to layer sync on base spatial layer. + if (superframe_count_ > 0 && insert_layer_sync_ && + !force_intra_only_frame_) { + ASSERT_EQ(layer_sync_on_base_, 1); + } + temporal_layer_id_ = 0; + superframe_count_ = 0; + } + parse_superframe_index(static_cast(pkt->data.frame.buf), + pkt->data.frame.sz, sizes_parsed, &count); + // Count may be less than number of spatial layers because of frame drops. + for (int sl = 0; sl < number_spatial_layers_; ++sl) { + if (pkt->data.frame.spatial_layer_encoded[sl]) { + sizes[sl] = sizes_parsed[num_layers_encoded]; + num_layers_encoded++; + } + } + // For superframe with Intra-only count will be +1 larger + // because of no-show frame. + if (force_intra_only_frame_ && superframe_has_intra_only_) + ASSERT_EQ(count, num_layers_encoded + 1); + else + ASSERT_EQ(count, num_layers_encoded); + + // In the constrained frame drop mode, if a given spatial is dropped all + // upper layers must be dropped too. + if (!layer_framedrop_) { + int num_layers_dropped = 0; + for (int sl = 0; sl < number_spatial_layers_; ++sl) { + if (!pkt->data.frame.spatial_layer_encoded[sl]) { + // Check that all upper layers are dropped. + num_layers_dropped++; + for (int sl2 = sl + 1; sl2 < number_spatial_layers_; ++sl2) + ASSERT_EQ(pkt->data.frame.spatial_layer_encoded[sl2], 0); + } + } + if (num_layers_dropped == number_spatial_layers_ - 1) + force_key_ = 1; + else + force_key_ = 0; + } + // Keep track of number of non-reference frames, needed for mismatch check. + // Non-reference frames are top spatial and temporal layer frames, + // for TL > 0. + if (temporal_layer_id_ == number_temporal_layers_ - 1 && + temporal_layer_id_ > 0 && + pkt->data.frame.spatial_layer_encoded[number_spatial_layers_ - 1]) + num_nonref_frames_++; + for (int sl = 0; sl < number_spatial_layers_; ++sl) { + sizes[sl] = sizes[sl] << 3; + // Update the total encoded bits per layer. + // For temporal layers, update the cumulative encoded bits per layer. + for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) { + const int layer = sl * number_temporal_layers_ + tl; + bits_total_[layer] += static_cast(sizes[sl]); + // Update the per-layer buffer level with the encoded frame size. + bits_in_buffer_model_[layer] -= static_cast(sizes[sl]); + // There should be no buffer underrun, except on the base + // temporal layer, since there may be key frames there. + // Fo short key frame spacing, buffer can underrun on individual frames. + if (!key_frame && tl > 0 && key_frame_spacing_ < 100) { + ASSERT_GE(bits_in_buffer_model_[layer], 0) + << "Buffer Underrun at frame " << pkt->data.frame.pts; + } + } + + ASSERT_EQ(pkt->data.frame.width[sl], + top_sl_width_ * svc_params_.scaling_factor_num[sl] / + svc_params_.scaling_factor_den[sl]); + + ASSERT_EQ(pkt->data.frame.height[sl], + top_sl_height_ * svc_params_.scaling_factor_num[sl] / + svc_params_.scaling_factor_den[sl]); + } + } + + virtual void EndPassHook(void) { + if (change_bitrate_) last_pts_ = last_pts_ - last_pts_ref_; + duration_ = (last_pts_ + 1) * timebase_; + for (int sl = 0; sl < number_spatial_layers_; ++sl) { + for (int tl = 0; tl < number_temporal_layers_; ++tl) { + const int layer = sl * number_temporal_layers_ + tl; + const double file_size_in_kb = bits_total_[layer] / 1000.; + file_datarate_[layer] = file_size_in_kb / duration_; + } + } + } + + virtual void MismatchHook(const vpx_image_t *img1, const vpx_image_t *img2) { + double mismatch_psnr = compute_psnr(img1, img2); + mismatch_psnr_ += mismatch_psnr; + ++mismatch_nframes_; + } + + unsigned int GetMismatchFrames() { return mismatch_nframes_; } + unsigned int GetNonRefFrames() { return num_nonref_frames_; } + + vpx_codec_pts_t last_pts_; + double timebase_; + int64_t bits_total_[VPX_MAX_LAYERS]; + double duration_; + double file_datarate_[VPX_MAX_LAYERS]; + size_t bits_in_last_frame_; + double mismatch_psnr_; + int denoiser_on_; + int tune_content_; + int spatial_layer_id_; + bool dynamic_drop_layer_; + unsigned int top_sl_width_; + unsigned int top_sl_height_; + vpx_svc_ref_frame_config_t ref_frame_config; + int update_pattern_; + bool change_bitrate_; + vpx_codec_pts_t last_pts_ref_; + int middle_bitrate_; + int top_bitrate_; + int key_frame_spacing_; + int layer_framedrop_; + int force_key_; + int force_key_test_; + int inter_layer_pred_mode_; + int insert_layer_sync_; + int layer_sync_on_base_; + int force_intra_only_frame_; + int superframe_has_intra_only_; + int use_post_encode_drop_; + int bitrate_sl3_[3]; + // Denoiser switched on the fly. + bool denoiser_off_on_; + // Top layer enabled on the fly. + bool denoiser_enable_layers_; + + private: + virtual void SetConfig(const int num_temporal_layer) { + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 1; + if (num_temporal_layer == 3) { + cfg_.ts_rate_decimator[0] = 4; + cfg_.ts_rate_decimator[1] = 2; + cfg_.ts_rate_decimator[2] = 1; + cfg_.temporal_layering_mode = 3; + } else if (num_temporal_layer == 2) { + cfg_.ts_rate_decimator[0] = 2; + cfg_.ts_rate_decimator[1] = 1; + cfg_.temporal_layering_mode = 2; + } else if (num_temporal_layer == 1) { + cfg_.ts_rate_decimator[0] = 1; + cfg_.temporal_layering_mode = 0; + } + } + + unsigned int num_nonref_frames_; + unsigned int mismatch_nframes_; +}; + +// Params: speed setting. +class DatarateOnePassCbrSvcSingleBR + : public DatarateOnePassCbrSvc, + public ::libvpx_test::CodecTestWithParam { + public: + DatarateOnePassCbrSvcSingleBR() : DatarateOnePassCbrSvc(GET_PARAM(0)) { + memset(&svc_params_, 0, sizeof(svc_params_)); + } + virtual ~DatarateOnePassCbrSvcSingleBR() {} + + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + speed_setting_ = GET_PARAM(1); + ResetModel(); + } +}; + +// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 1 +// temporal layer, with screen content mode on and same speed setting for all +// layers. +TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc2SL1TLScreenContent1) { + SetSvcConfig(2, 1); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 1; + cfg_.rc_dropframe_thresh = 10; + cfg_.kf_max_dist = 9999; + + ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + top_sl_width_ = 1280; + top_sl_height_ = 720; + cfg_.rc_target_bitrate = 500; + ResetModel(); + tune_content_ = 1; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, + 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and +// 3 temporal layers, with force key frame after frame drop +TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL3TLForceKey) { + SetSvcConfig(3, 3); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 1; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 9999; + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + top_sl_width_ = 640; + top_sl_height_ = 480; + cfg_.rc_target_bitrate = 100; + ResetModel(); + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, + 1.25); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and +// 2 temporal layers, with a change on the fly from the fixed SVC pattern to one +// generate via SVC_SET_REF_FRAME_CONFIG. The new pattern also disables +// inter-layer prediction. +TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL2TLDynamicPatternChange) { + SetSvcConfig(3, 2); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 1; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 9999; + // Change SVC pattern on the fly. + update_pattern_ = 1; + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + top_sl_width_ = 640; + top_sl_height_ = 480; + cfg_.rc_target_bitrate = 800; + ResetModel(); + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, + 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Check basic rate targeting for 1 pass CBR SVC with 3 spatial and 3 temporal +// layers, for inter_layer_pred=OffKey (K-SVC) and on the fly switching +// of denoiser from off to on (on at frame = 100). Key frame period is set to +// 1000 so denoise is enabled on non-key. +TEST_P(DatarateOnePassCbrSvcSingleBR, + OnePassCbrSvc3SL3TL_DenoiserOffOnFixedLayers) { + SetSvcConfig(3, 3); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 1; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 1000; + ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv", 1280, + 720, 30, 1, 0, 300); + top_sl_width_ = 1280; + top_sl_height_ = 720; + cfg_.rc_target_bitrate = 1000; + ResetModel(); + denoiser_off_on_ = true; + denoiser_enable_layers_ = false; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // Don't check rate targeting on two top spatial layer since they will be + // skipped for part of the sequence. + CheckLayerRateTargeting(number_spatial_layers_ - 2, number_temporal_layers_, + 0.78, 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Check basic rate targeting for 1 pass CBR SVC with 3 spatial and 3 temporal +// layers, for inter_layer_pred=OffKey (K-SVC) and on the fly switching +// of denoiser from off to on, for dynamic layers. Start at 2 spatial layers +// and enable 3rd spatial layer at frame = 100. Use periodic key frame with +// period 100 so enabling of spatial layer occurs at key frame. Enable denoiser +// at frame > 100, after the key frame sync. +TEST_P(DatarateOnePassCbrSvcSingleBR, + OnePassCbrSvc3SL3TL_DenoiserOffOnEnableLayers) { + SetSvcConfig(3, 3); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 1; + cfg_.rc_dropframe_thresh = 0; + cfg_.kf_max_dist = 100; + ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv", 1280, + 720, 30, 1, 0, 300); + top_sl_width_ = 1280; + top_sl_height_ = 720; + cfg_.rc_target_bitrate = 1000; + ResetModel(); + denoiser_off_on_ = true; + denoiser_enable_layers_ = true; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // Don't check rate targeting on two top spatial layer since they will be + // skipped for part of the sequence. + CheckLayerRateTargeting(number_spatial_layers_ - 2, number_temporal_layers_, + 0.78, 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Check basic rate targeting for 1 pass CBR SVC with 3 spatial layers and on +// the fly switching to 1 and then 2 and back to 3 spatial layers. This switch +// is done by setting spatial layer bitrates to 0, and then back to non-zero, +// during the sequence. +TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL_DisableEnableLayers) { + SetSvcConfig(3, 1); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 1; + cfg_.temporal_layering_mode = 0; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 9999; + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + top_sl_width_ = 640; + top_sl_height_ = 480; + cfg_.rc_target_bitrate = 800; + ResetModel(); + dynamic_drop_layer_ = true; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // Don't check rate targeting on two top spatial layer since they will be + // skipped for part of the sequence. + CheckLayerRateTargeting(number_spatial_layers_ - 2, number_temporal_layers_, + 0.78, 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Run SVC encoder for 1 temporal layer, 2 spatial layers, with spatial +// downscale 5x5. +TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc2SL1TL5x5MultipleRuns) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.ss_number_layers = 2; + cfg_.ts_number_layers = 1; + cfg_.ts_rate_decimator[0] = 1; + cfg_.g_error_resilient = 1; + cfg_.g_threads = 3; + cfg_.temporal_layering_mode = 0; + svc_params_.scaling_factor_num[0] = 256; + svc_params_.scaling_factor_den[0] = 1280; + svc_params_.scaling_factor_num[1] = 1280; + svc_params_.scaling_factor_den[1] = 1280; + cfg_.rc_dropframe_thresh = 10; + cfg_.kf_max_dist = 999999; + cfg_.kf_min_dist = 0; + cfg_.ss_target_bitrate[0] = 300; + cfg_.ss_target_bitrate[1] = 1400; + cfg_.layer_target_bitrate[0] = 300; + cfg_.layer_target_bitrate[1] = 1400; + cfg_.rc_target_bitrate = 1700; + number_spatial_layers_ = cfg_.ss_number_layers; + number_temporal_layers_ = cfg_.ts_number_layers; + ResetModel(); + layer_target_avg_bandwidth_[0] = cfg_.layer_target_bitrate[0] * 1000 / 30; + bits_in_buffer_model_[0] = + cfg_.layer_target_bitrate[0] * cfg_.rc_buf_initial_sz; + layer_target_avg_bandwidth_[1] = cfg_.layer_target_bitrate[1] * 1000 / 30; + bits_in_buffer_model_[1] = + cfg_.layer_target_bitrate[1] * cfg_.rc_buf_initial_sz; + ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + top_sl_width_ = 1280; + top_sl_height_ = 720; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, + 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Params: speed setting and index for bitrate array. +class DatarateOnePassCbrSvcMultiBR + : public DatarateOnePassCbrSvc, + public ::libvpx_test::CodecTestWith2Params { + public: + DatarateOnePassCbrSvcMultiBR() : DatarateOnePassCbrSvc(GET_PARAM(0)) { + memset(&svc_params_, 0, sizeof(svc_params_)); + } + virtual ~DatarateOnePassCbrSvcMultiBR() {} + + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + speed_setting_ = GET_PARAM(1); + ResetModel(); + } +}; + +// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and +// 3 temporal layers. Run CIF clip with 1 thread. +TEST_P(DatarateOnePassCbrSvcMultiBR, OnePassCbrSvc2SL3TL) { + SetSvcConfig(2, 3); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 1; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 9999; + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + top_sl_width_ = 640; + top_sl_height_ = 480; + const int bitrates[3] = { 200, 400, 600 }; + // TODO(marpan): Check that effective_datarate for each layer hits the + // layer target_bitrate. + cfg_.rc_target_bitrate = bitrates[GET_PARAM(2)]; + ResetModel(); + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.75, + 1.2); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Params: speed setting, layer framedrop control and index for bitrate array. +class DatarateOnePassCbrSvcFrameDropMultiBR + : public DatarateOnePassCbrSvc, + public ::libvpx_test::CodecTestWith3Params { + public: + DatarateOnePassCbrSvcFrameDropMultiBR() + : DatarateOnePassCbrSvc(GET_PARAM(0)) { + memset(&svc_params_, 0, sizeof(svc_params_)); + } + virtual ~DatarateOnePassCbrSvcFrameDropMultiBR() {} + + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + speed_setting_ = GET_PARAM(1); + ResetModel(); + } +}; + +// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and +// 3 temporal layers. Run HD clip with 4 threads. +TEST_P(DatarateOnePassCbrSvcFrameDropMultiBR, OnePassCbrSvc2SL3TL4Threads) { + SetSvcConfig(2, 3); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 4; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 9999; + ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + top_sl_width_ = 1280; + top_sl_height_ = 720; + layer_framedrop_ = 0; + const int bitrates[3] = { 200, 400, 600 }; + cfg_.rc_target_bitrate = bitrates[GET_PARAM(3)]; + ResetModel(); + layer_framedrop_ = GET_PARAM(2); + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.64, + 1.45); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and +// 3 temporal layers. Run HD clip with 4 threads. +TEST_P(DatarateOnePassCbrSvcFrameDropMultiBR, OnePassCbrSvc3SL3TL4Threads) { + SetSvcConfig(3, 3); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 4; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 9999; + ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + top_sl_width_ = 1280; + top_sl_height_ = 720; + layer_framedrop_ = 0; + const int bitrates[3] = { 200, 400, 600 }; + cfg_.rc_target_bitrate = bitrates[GET_PARAM(3)]; + ResetModel(); + layer_framedrop_ = GET_PARAM(2); + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.58, + 1.2); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Params: speed setting, inter-layer prediction mode. +class DatarateOnePassCbrSvcInterLayerPredSingleBR + : public DatarateOnePassCbrSvc, + public ::libvpx_test::CodecTestWith2Params { + public: + DatarateOnePassCbrSvcInterLayerPredSingleBR() + : DatarateOnePassCbrSvc(GET_PARAM(0)) { + memset(&svc_params_, 0, sizeof(svc_params_)); + } + virtual ~DatarateOnePassCbrSvcInterLayerPredSingleBR() {} + + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + speed_setting_ = GET_PARAM(1); + inter_layer_pred_mode_ = GET_PARAM(2); + ResetModel(); + } +}; + +// Check basic rate targeting with different inter-layer prediction modes for 1 +// pass CBR SVC: 3 spatial layers and 3 temporal layers. Run CIF clip with 1 +// thread. +TEST_P(DatarateOnePassCbrSvcInterLayerPredSingleBR, OnePassCbrSvc3SL3TL) { + // Disable test for inter-layer pred off for now since simulcast_mode fails. + if (inter_layer_pred_mode_ == INTER_LAYER_PRED_OFF) return; + SetSvcConfig(3, 3); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 1; + cfg_.temporal_layering_mode = 3; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 9999; + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + top_sl_width_ = 640; + top_sl_height_ = 480; + cfg_.rc_target_bitrate = 800; + ResetModel(); + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, + 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Check rate targeting with different inter-layer prediction modes for 1 pass +// CBR SVC: 3 spatial layers and 3 temporal layers, changing the target bitrate +// at the middle of encoding. +TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL3TLDynamicBitrateChange) { + SetSvcConfig(3, 3); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 1; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 9999; + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + top_sl_width_ = 640; + top_sl_height_ = 480; + cfg_.rc_target_bitrate = 800; + ResetModel(); + change_bitrate_ = true; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, + 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +#if CONFIG_VP9_TEMPORAL_DENOISING +// Params: speed setting, noise sensitivity, index for bitrate array and inter +// layer pred mode. +class DatarateOnePassCbrSvcDenoiser + : public DatarateOnePassCbrSvc, + public ::libvpx_test::CodecTestWith4Params { + public: + DatarateOnePassCbrSvcDenoiser() : DatarateOnePassCbrSvc(GET_PARAM(0)) { + memset(&svc_params_, 0, sizeof(svc_params_)); + } + virtual ~DatarateOnePassCbrSvcDenoiser() {} + + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + speed_setting_ = GET_PARAM(1); + inter_layer_pred_mode_ = GET_PARAM(3); + ResetModel(); + } +}; + +// Check basic rate targeting for 1 pass CBR SVC with denoising. +// 2 spatial layers and 3 temporal layer. Run HD clip with 2 threads. +TEST_P(DatarateOnePassCbrSvcDenoiser, OnePassCbrSvc2SL3TLDenoiserOn) { + SetSvcConfig(2, 3); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 2; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 9999; + number_spatial_layers_ = cfg_.ss_number_layers; + number_temporal_layers_ = cfg_.ts_number_layers; + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + top_sl_width_ = 640; + top_sl_height_ = 480; + const int bitrates[3] = { 600, 800, 1000 }; + // TODO(marpan): Check that effective_datarate for each layer hits the + // layer target_bitrate. + // For SVC, noise_sen = 1 means denoising only the top spatial layer + // noise_sen = 2 means denoising the two top spatial layers. + cfg_.rc_target_bitrate = bitrates[GET_PARAM(3)]; + ResetModel(); + denoiser_on_ = GET_PARAM(2); + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, + 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} +#endif + +// Params: speed setting, key frame dist. +class DatarateOnePassCbrSvcSmallKF + : public DatarateOnePassCbrSvc, + public ::libvpx_test::CodecTestWith2Params { + public: + DatarateOnePassCbrSvcSmallKF() : DatarateOnePassCbrSvc(GET_PARAM(0)) { + memset(&svc_params_, 0, sizeof(svc_params_)); + } + virtual ~DatarateOnePassCbrSvcSmallKF() {} + + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + speed_setting_ = GET_PARAM(1); + ResetModel(); + } +}; + +// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3 +// temporal layers. Run CIF clip with 1 thread, and few short key frame periods. +TEST_P(DatarateOnePassCbrSvcSmallKF, OnePassCbrSvc3SL3TLSmallKf) { + SetSvcConfig(3, 3); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 1; + cfg_.rc_dropframe_thresh = 10; + cfg_.rc_target_bitrate = 800; + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + top_sl_width_ = 640; + top_sl_height_ = 480; + // For this 3 temporal layer case, pattern repeats every 4 frames, so choose + // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2). + const int kf_dist = GET_PARAM(2); + cfg_.kf_max_dist = kf_dist; + key_frame_spacing_ = kf_dist; + ResetModel(); + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + // TODO(jianj): webm:1554 + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.70, + 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 3 +// temporal layers. Run CIF clip with 1 thread, and few short key frame periods. +TEST_P(DatarateOnePassCbrSvcSmallKF, OnePassCbrSvc2SL3TLSmallKf) { + SetSvcConfig(2, 3); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 1; + cfg_.rc_dropframe_thresh = 10; + cfg_.rc_target_bitrate = 400; + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + top_sl_width_ = 640; + top_sl_height_ = 480; + // For this 3 temporal layer case, pattern repeats every 4 frames, so choose + // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2). + const int kf_dist = GET_PARAM(2) + 32; + cfg_.kf_max_dist = kf_dist; + key_frame_spacing_ = kf_dist; + ResetModel(); + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, + 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3 +// temporal layers. Run VGA clip with 1 thread, and place layer sync frames: +// one at middle layer first, then another one for top layer, and another +// insert for base spatial layer (which forces key frame). +TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL3TLSyncFrames) { + SetSvcConfig(3, 3); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 1; + cfg_.kf_max_dist = 9999; + cfg_.rc_dropframe_thresh = 10; + cfg_.rc_target_bitrate = 400; + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + top_sl_width_ = 640; + top_sl_height_ = 480; + ResetModel(); + insert_layer_sync_ = 1; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, + 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Run SVC encoder for 3 spatial layers, 1 temporal layer, with +// intra-only frame as sync frame on base spatial layer. +// Intra_only is inserted at start and in middle of sequence. +TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL1TLSyncWithIntraOnly) { + SetSvcConfig(3, 1); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 4; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 9999; + cfg_.rc_target_bitrate = 400; + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + top_sl_width_ = 640; + top_sl_height_ = 480; + ResetModel(); + insert_layer_sync_ = 1; + // Use intra_only frame for sync on base layer. + force_intra_only_frame_ = 1; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.73, + 1.2); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Run SVC encoder for 2 quality layers (same resolution different, +// bitrates), 1 temporal layer, with screen content mode. +TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc2QL1TLScreen) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.ss_number_layers = 2; + cfg_.ts_number_layers = 1; + cfg_.ts_rate_decimator[0] = 1; + cfg_.temporal_layering_mode = 0; + cfg_.g_error_resilient = 1; + cfg_.g_threads = 2; + svc_params_.scaling_factor_num[0] = 1; + svc_params_.scaling_factor_den[0] = 1; + svc_params_.scaling_factor_num[1] = 1; + svc_params_.scaling_factor_den[1] = 1; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 9999; + number_spatial_layers_ = cfg_.ss_number_layers; + number_temporal_layers_ = cfg_.ts_number_layers; + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + top_sl_width_ = 640; + top_sl_height_ = 480; + ResetModel(); + tune_content_ = 1; + // Set the layer bitrates, for 2 spatial layers, 1 temporal. + cfg_.rc_target_bitrate = 400; + cfg_.ss_target_bitrate[0] = 100; + cfg_.ss_target_bitrate[1] = 300; + cfg_.layer_target_bitrate[0] = 100; + cfg_.layer_target_bitrate[1] = 300; + for (int sl = 0; sl < 2; ++sl) { + float layer_framerate = 30.0; + layer_target_avg_bandwidth_[sl] = static_cast( + cfg_.layer_target_bitrate[sl] * 1000.0 / layer_framerate); + bits_in_buffer_model_[sl] = + cfg_.layer_target_bitrate[sl] * cfg_.rc_buf_initial_sz; + } + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.73, + 1.25); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Params: speed setting. +class DatarateOnePassCbrSvcPostencodeDrop + : public DatarateOnePassCbrSvc, + public ::libvpx_test::CodecTestWithParam { + public: + DatarateOnePassCbrSvcPostencodeDrop() : DatarateOnePassCbrSvc(GET_PARAM(0)) { + memset(&svc_params_, 0, sizeof(svc_params_)); + } + virtual ~DatarateOnePassCbrSvcPostencodeDrop() {} + + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + speed_setting_ = GET_PARAM(1); + ResetModel(); + } +}; + +// Run SVC encoder for 2 quality layers (same resolution different, +// bitrates), 1 temporal layer, with screen content mode. +TEST_P(DatarateOnePassCbrSvcPostencodeDrop, OnePassCbrSvc2QL1TLScreen) { + cfg_.rc_buf_initial_sz = 200; + cfg_.rc_buf_optimal_sz = 200; + cfg_.rc_buf_sz = 400; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 52; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.ss_number_layers = 2; + cfg_.ts_number_layers = 1; + cfg_.ts_rate_decimator[0] = 1; + cfg_.temporal_layering_mode = 0; + cfg_.g_error_resilient = 1; + cfg_.g_threads = 2; + svc_params_.scaling_factor_num[0] = 1; + svc_params_.scaling_factor_den[0] = 1; + svc_params_.scaling_factor_num[1] = 1; + svc_params_.scaling_factor_den[1] = 1; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 9999; + number_spatial_layers_ = cfg_.ss_number_layers; + number_temporal_layers_ = cfg_.ts_number_layers; + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 300); + top_sl_width_ = 352; + top_sl_height_ = 288; + ResetModel(); + base_speed_setting_ = speed_setting_; + tune_content_ = 1; + use_post_encode_drop_ = 1; + // Set the layer bitrates, for 2 spatial layers, 1 temporal. + cfg_.rc_target_bitrate = 400; + cfg_.ss_target_bitrate[0] = 100; + cfg_.ss_target_bitrate[1] = 300; + cfg_.layer_target_bitrate[0] = 100; + cfg_.layer_target_bitrate[1] = 300; + for (int sl = 0; sl < 2; ++sl) { + float layer_framerate = 30.0; + layer_target_avg_bandwidth_[sl] = static_cast( + cfg_.layer_target_bitrate[sl] * 1000.0 / layer_framerate); + bits_in_buffer_model_[sl] = + cfg_.layer_target_bitrate[sl] * cfg_.rc_buf_initial_sz; + } + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.73, + 1.25); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSingleBR, + ::testing::Range(5, 10)); + +VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcPostencodeDrop, + ::testing::Range(5, 6)); + +VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcInterLayerPredSingleBR, + ::testing::Range(5, 10), ::testing::Range(0, 3)); + +VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcMultiBR, ::testing::Range(5, 10), + ::testing::Range(0, 3)); + +VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcFrameDropMultiBR, + ::testing::Range(5, 10), ::testing::Range(0, 2), + ::testing::Range(0, 3)); + +#if CONFIG_VP9_TEMPORAL_DENOISING +VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcDenoiser, + ::testing::Range(5, 10), ::testing::Range(1, 3), + ::testing::Range(0, 3), ::testing::Range(0, 4)); +#endif + +VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSmallKF, ::testing::Range(5, 10), + ::testing::Range(32, 36)); +} // namespace +} // namespace svc_test diff --git a/media/libvpx/libvpx/test/svc_end_to_end_test.cc b/media/libvpx/libvpx/test/svc_end_to_end_test.cc new file mode 100644 index 000000000000..82259ac30cac --- /dev/null +++ b/media/libvpx/libvpx/test/svc_end_to_end_test.cc @@ -0,0 +1,481 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "./vpx_config.h" +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/svc_test.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "vpx/vpx_codec.h" +#include "vpx_ports/bitops.h" + +namespace svc_test { +namespace { + +typedef enum { + // Inter-layer prediction is on on all frames. + INTER_LAYER_PRED_ON, + // Inter-layer prediction is off on all frames. + INTER_LAYER_PRED_OFF, + // Inter-layer prediction is off on non-key frames and non-sync frames. + INTER_LAYER_PRED_OFF_NONKEY, + // Inter-layer prediction is on on all frames, but constrained such + // that any layer S (> 0) can only predict from previous spatial + // layer S-1, from the same superframe. + INTER_LAYER_PRED_ON_CONSTRAINED +} INTER_LAYER_PRED; + +class ScalePartitionOnePassCbrSvc + : public OnePassCbrSvc, + public ::testing::TestWithParam { + public: + ScalePartitionOnePassCbrSvc() + : OnePassCbrSvc(GetParam()), mismatch_nframes_(0), num_nonref_frames_(0) { + SetMode(::libvpx_test::kRealTime); + } + + protected: + virtual ~ScalePartitionOnePassCbrSvc() {} + + virtual void SetUp() { + InitializeConfig(); + speed_setting_ = 7; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + PreEncodeFrameHookSetup(video, encoder); + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + // Keep track of number of non-reference frames, needed for mismatch check. + // Non-reference frames are top spatial and temporal layer frames, + // for TL > 0. + if (temporal_layer_id_ == number_temporal_layers_ - 1 && + temporal_layer_id_ > 0 && + pkt->data.frame.spatial_layer_encoded[number_spatial_layers_ - 1]) + num_nonref_frames_++; + } + + virtual void MismatchHook(const vpx_image_t * /*img1*/, + const vpx_image_t * /*img2*/) { + ++mismatch_nframes_; + } + + virtual void SetConfig(const int /*num_temporal_layer*/) {} + + unsigned int GetMismatchFrames() const { return mismatch_nframes_; } + unsigned int GetNonRefFrames() const { return num_nonref_frames_; } + + private: + unsigned int mismatch_nframes_; + unsigned int num_nonref_frames_; +}; + +TEST_P(ScalePartitionOnePassCbrSvc, OnePassCbrSvc3SL3TL1080P) { + SetSvcConfig(3, 3); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_threads = 1; + cfg_.rc_dropframe_thresh = 10; + cfg_.rc_target_bitrate = 800; + cfg_.kf_max_dist = 9999; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 1; + cfg_.ts_rate_decimator[0] = 4; + cfg_.ts_rate_decimator[1] = 2; + cfg_.ts_rate_decimator[2] = 1; + cfg_.temporal_layering_mode = 3; + ::libvpx_test::I420VideoSource video( + "slides_code_term_web_plot.1920_1080.yuv", 1920, 1080, 30, 1, 0, 100); + // For this 3 temporal layer case, pattern repeats every 4 frames, so choose + // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2). + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Params: Inter layer prediction modes. +class SyncFrameOnePassCbrSvc : public OnePassCbrSvc, + public ::libvpx_test::CodecTestWithParam { + public: + SyncFrameOnePassCbrSvc() + : OnePassCbrSvc(GET_PARAM(0)), current_video_frame_(0), + frame_to_start_decode_(0), frame_to_sync_(0), + inter_layer_pred_mode_(GET_PARAM(1)), decode_to_layer_before_sync_(-1), + decode_to_layer_after_sync_(-1), denoiser_on_(0), + intra_only_test_(false), mismatch_nframes_(0), num_nonref_frames_(0) { + SetMode(::libvpx_test::kRealTime); + memset(&svc_layer_sync_, 0, sizeof(svc_layer_sync_)); + } + + protected: + virtual ~SyncFrameOnePassCbrSvc() {} + + virtual void SetUp() { + InitializeConfig(); + speed_setting_ = 7; + } + + virtual bool DoDecode() const { + return current_video_frame_ >= frame_to_start_decode_; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + current_video_frame_ = video->frame(); + PreEncodeFrameHookSetup(video, encoder); + if (video->frame() == 0) { + // Do not turn off inter-layer pred completely because simulcast mode + // fails. + if (inter_layer_pred_mode_ != INTER_LAYER_PRED_OFF) + encoder->Control(VP9E_SET_SVC_INTER_LAYER_PRED, inter_layer_pred_mode_); + encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_); + if (intra_only_test_) + // Decoder sets the color_space for Intra-only frames + // to BT_601 (see line 1810 in vp9_decodeframe.c). + // So set it here in these tess to avoid encoder-decoder + // mismatch check on color space setting. + encoder->Control(VP9E_SET_COLOR_SPACE, VPX_CS_BT_601); + } + if (video->frame() == frame_to_sync_) { + encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync_); + } + } + +#if CONFIG_VP9_DECODER + virtual void PreDecodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Decoder *decoder) { + if (video->frame() < frame_to_sync_) { + if (decode_to_layer_before_sync_ >= 0) + decoder->Control(VP9_DECODE_SVC_SPATIAL_LAYER, + decode_to_layer_before_sync_); + } else { + if (decode_to_layer_after_sync_ >= 0) + decoder->Control(VP9_DECODE_SVC_SPATIAL_LAYER, + decode_to_layer_after_sync_); + } + } +#endif + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + // Keep track of number of non-reference frames, needed for mismatch check. + // Non-reference frames are top spatial and temporal layer frames, + // for TL > 0. + if (temporal_layer_id_ == number_temporal_layers_ - 1 && + temporal_layer_id_ > 0 && + pkt->data.frame.spatial_layer_encoded[number_spatial_layers_ - 1] && + current_video_frame_ >= frame_to_sync_) + num_nonref_frames_++; + + if (intra_only_test_ && current_video_frame_ == frame_to_sync_) { + // Intra-only frame is only generated for spatial layers > 1 and <= 3, + // among other conditions (see constraint in set_intra_only_frame(). If + // intra-only is no allowed then encoder will insert key frame instead. + const bool key_frame = + (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; + if (number_spatial_layers_ == 1 || number_spatial_layers_ > 3) + ASSERT_TRUE(key_frame); + else + ASSERT_FALSE(key_frame); + } + } + + virtual void MismatchHook(const vpx_image_t * /*img1*/, + const vpx_image_t * /*img2*/) { + if (current_video_frame_ >= frame_to_sync_) ++mismatch_nframes_; + } + + unsigned int GetMismatchFrames() const { return mismatch_nframes_; } + unsigned int GetNonRefFrames() const { return num_nonref_frames_; } + + unsigned int current_video_frame_; + unsigned int frame_to_start_decode_; + unsigned int frame_to_sync_; + int inter_layer_pred_mode_; + int decode_to_layer_before_sync_; + int decode_to_layer_after_sync_; + int denoiser_on_; + bool intra_only_test_; + vpx_svc_spatial_layer_sync_t svc_layer_sync_; + + private: + virtual void SetConfig(const int num_temporal_layer) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 1; + cfg_.g_threads = 1; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 9999; + if (num_temporal_layer == 3) { + cfg_.ts_rate_decimator[0] = 4; + cfg_.ts_rate_decimator[1] = 2; + cfg_.ts_rate_decimator[2] = 1; + cfg_.temporal_layering_mode = 3; + } else if (num_temporal_layer == 2) { + cfg_.ts_rate_decimator[0] = 2; + cfg_.ts_rate_decimator[1] = 1; + cfg_.temporal_layering_mode = 2; + } else if (num_temporal_layer == 1) { + cfg_.ts_rate_decimator[0] = 1; + cfg_.temporal_layering_mode = 1; + } + } + + unsigned int mismatch_nframes_; + unsigned int num_nonref_frames_; +}; + +// Test for sync layer for 1 pass CBR SVC: 3 spatial layers and +// 3 temporal layers. Only start decoding on the sync layer. +// Full sync: insert key frame on base layer. +TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc3SL3TLFullSync) { + SetSvcConfig(3, 3); + // Sync is on base layer so the frame to sync and the frame to start decoding + // is the same. + frame_to_start_decode_ = 20; + frame_to_sync_ = 20; + decode_to_layer_before_sync_ = -1; + decode_to_layer_after_sync_ = 2; + + // Set up svc layer sync structure. + svc_layer_sync_.base_layer_intra_only = 0; + svc_layer_sync_.spatial_layer_sync[0] = 1; + + ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + + cfg_.rc_target_bitrate = 600; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Test for sync layer for 1 pass CBR SVC: 2 spatial layers and +// 3 temporal layers. Decoding QVGA before sync frame and decode up to +// VGA on and after sync. +TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc2SL3TLSyncToVGA) { + SetSvcConfig(2, 3); + frame_to_start_decode_ = 0; + frame_to_sync_ = 100; + decode_to_layer_before_sync_ = 0; + decode_to_layer_after_sync_ = 1; + + // Set up svc layer sync structure. + svc_layer_sync_.base_layer_intra_only = 0; + svc_layer_sync_.spatial_layer_sync[0] = 0; + svc_layer_sync_.spatial_layer_sync[1] = 1; + + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + cfg_.rc_target_bitrate = 400; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Test for sync layer for 1 pass CBR SVC: 3 spatial layers and +// 3 temporal layers. Decoding QVGA and VGA before sync frame and decode up to +// HD on and after sync. +TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc3SL3TLSyncToHD) { + SetSvcConfig(3, 3); + frame_to_start_decode_ = 0; + frame_to_sync_ = 20; + decode_to_layer_before_sync_ = 1; + decode_to_layer_after_sync_ = 2; + + // Set up svc layer sync structure. + svc_layer_sync_.base_layer_intra_only = 0; + svc_layer_sync_.spatial_layer_sync[0] = 0; + svc_layer_sync_.spatial_layer_sync[1] = 0; + svc_layer_sync_.spatial_layer_sync[2] = 1; + + ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + cfg_.rc_target_bitrate = 600; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Test for sync layer for 1 pass CBR SVC: 3 spatial layers and +// 3 temporal layers. Decoding QVGA before sync frame and decode up to +// HD on and after sync. +TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc3SL3TLSyncToVGAHD) { + SetSvcConfig(3, 3); + frame_to_start_decode_ = 0; + frame_to_sync_ = 20; + decode_to_layer_before_sync_ = 0; + decode_to_layer_after_sync_ = 2; + + // Set up svc layer sync structure. + svc_layer_sync_.base_layer_intra_only = 0; + svc_layer_sync_.spatial_layer_sync[0] = 0; + svc_layer_sync_.spatial_layer_sync[1] = 1; + svc_layer_sync_.spatial_layer_sync[2] = 1; + + ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + cfg_.rc_target_bitrate = 600; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +#if CONFIG_VP9_TEMPORAL_DENOISING +// Test for sync layer for 1 pass CBR SVC: 2 spatial layers and +// 3 temporal layers. Decoding QVGA before sync frame and decode up to +// VGA on and after sync. +TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc2SL3TLSyncFrameVGADenoise) { + SetSvcConfig(2, 3); + frame_to_start_decode_ = 0; + frame_to_sync_ = 100; + decode_to_layer_before_sync_ = 0; + decode_to_layer_after_sync_ = 1; + + denoiser_on_ = 1; + // Set up svc layer sync structure. + svc_layer_sync_.base_layer_intra_only = 0; + svc_layer_sync_.spatial_layer_sync[0] = 0; + svc_layer_sync_.spatial_layer_sync[1] = 1; + + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + cfg_.rc_target_bitrate = 400; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} +#endif + +// Start decoding from beginning of sequence, during sequence insert intra-only +// on base/qvga layer. Decode all layers. +TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc3SL3TLSyncFrameIntraOnlyQVGA) { + SetSvcConfig(3, 3); + frame_to_start_decode_ = 0; + frame_to_sync_ = 20; + decode_to_layer_before_sync_ = 2; + // The superframe containing intra-only layer will have 4 frames. Thus set the + // layer to decode after sync frame to 3. + decode_to_layer_after_sync_ = 3; + intra_only_test_ = true; + + // Set up svc layer sync structure. + svc_layer_sync_.base_layer_intra_only = 1; + svc_layer_sync_.spatial_layer_sync[0] = 1; + svc_layer_sync_.spatial_layer_sync[1] = 0; + svc_layer_sync_.spatial_layer_sync[2] = 0; + + ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + cfg_.rc_target_bitrate = 600; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Start decoding from beginning of sequence, during sequence insert intra-only +// on base/qvga layer and sync_layer on middle/VGA layer. Decode all layers. +TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc3SL3TLSyncFrameIntraOnlyVGA) { + SetSvcConfig(3, 3); + frame_to_start_decode_ = 0; + frame_to_sync_ = 20; + decode_to_layer_before_sync_ = 2; + // The superframe containing intra-only layer will have 4 frames. Thus set the + // layer to decode after sync frame to 3. + decode_to_layer_after_sync_ = 3; + intra_only_test_ = true; + + // Set up svc layer sync structure. + svc_layer_sync_.base_layer_intra_only = 1; + svc_layer_sync_.spatial_layer_sync[0] = 1; + svc_layer_sync_.spatial_layer_sync[1] = 1; + svc_layer_sync_.spatial_layer_sync[2] = 0; + + ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + cfg_.rc_target_bitrate = 600; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +// Start decoding from sync frame, insert intra-only on base/qvga layer. Decode +// all layers. For 1 spatial layer, it inserts a key frame. +TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc1SL3TLSyncFrameIntraOnlyQVGA) { + SetSvcConfig(1, 3); + frame_to_start_decode_ = 20; + frame_to_sync_ = 20; + decode_to_layer_before_sync_ = 0; + decode_to_layer_after_sync_ = 0; + intra_only_test_ = true; + + // Set up svc layer sync structure. + svc_layer_sync_.base_layer_intra_only = 1; + svc_layer_sync_.spatial_layer_sync[0] = 1; + + ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); + cfg_.rc_target_bitrate = 600; + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + +VP9_INSTANTIATE_TEST_CASE(SyncFrameOnePassCbrSvc, ::testing::Range(0, 3)); + +INSTANTIATE_TEST_CASE_P( + VP9, ScalePartitionOnePassCbrSvc, + ::testing::Values( + static_cast(&libvpx_test::kVP9))); + +} // namespace +} // namespace svc_test diff --git a/media/libvpx/libvpx/test/svc_test.cc b/media/libvpx/libvpx/test/svc_test.cc index 482d9fffa1dc..4798c7718323 100644 --- a/media/libvpx/libvpx/test/svc_test.cc +++ b/media/libvpx/libvpx/test/svc_test.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,782 +8,127 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include -#include "third_party/googletest/src/include/gtest/gtest.h" -#include "test/codec_factory.h" -#include "test/decode_test_driver.h" -#include "test/i420_video_source.h" +#include "test/svc_test.h" -#include "vp9/decoder/vp9_decoder.h" +namespace svc_test { +void OnePassCbrSvc::SetSvcConfig(const int num_spatial_layer, + const int num_temporal_layer) { + SetConfig(num_temporal_layer); + cfg_.ss_number_layers = num_spatial_layer; + cfg_.ts_number_layers = num_temporal_layer; + if (num_spatial_layer == 1) { + svc_params_.scaling_factor_num[0] = 288; + svc_params_.scaling_factor_den[0] = 288; + } else if (num_spatial_layer == 2) { + svc_params_.scaling_factor_num[0] = 144; + svc_params_.scaling_factor_den[0] = 288; + svc_params_.scaling_factor_num[1] = 288; + svc_params_.scaling_factor_den[1] = 288; + } else if (num_spatial_layer == 3) { + svc_params_.scaling_factor_num[0] = 72; + svc_params_.scaling_factor_den[0] = 288; + svc_params_.scaling_factor_num[1] = 144; + svc_params_.scaling_factor_den[1] = 288; + svc_params_.scaling_factor_num[2] = 288; + svc_params_.scaling_factor_den[2] = 288; + } + number_spatial_layers_ = cfg_.ss_number_layers; + number_temporal_layers_ = cfg_.ts_number_layers; +} -#include "vpx/svc_context.h" -#include "vpx/vp8cx.h" -#include "vpx/vpx_encoder.h" +void OnePassCbrSvc::PreEncodeFrameHookSetup(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 0) { + for (int i = 0; i < VPX_MAX_LAYERS; ++i) { + svc_params_.max_quantizers[i] = 63; + svc_params_.min_quantizers[i] = 0; + } + svc_params_.speed_per_layer[0] = base_speed_setting_; + for (int i = 1; i < VPX_SS_MAX_LAYERS; ++i) { + svc_params_.speed_per_layer[i] = speed_setting_; + } -namespace { - -using libvpx_test::CodecFactory; -using libvpx_test::Decoder; -using libvpx_test::DxDataIterator; -using libvpx_test::VP9CodecFactory; - -class SvcTest : public ::testing::Test { - protected: - static const uint32_t kWidth = 352; - static const uint32_t kHeight = 288; - - SvcTest() - : codec_iface_(0), test_file_name_("hantro_collage_w352h288.yuv"), - codec_initialized_(false), decoder_(0) { - memset(&svc_, 0, sizeof(svc_)); - memset(&codec_, 0, sizeof(codec_)); - memset(&codec_enc_, 0, sizeof(codec_enc_)); + encoder->Control(VP9E_SET_SVC, 1); + encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_); + encoder->Control(VP8E_SET_CPUUSED, speed_setting_); + encoder->Control(VP9E_SET_AQ_MODE, 3); + encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 300); + encoder->Control(VP9E_SET_TILE_COLUMNS, get_msb(cfg_.g_threads)); + encoder->Control(VP9E_SET_ROW_MT, 1); + encoder->Control(VP8E_SET_STATIC_THRESHOLD, 1); } - virtual ~SvcTest() {} - - virtual void SetUp() { - svc_.log_level = SVC_LOG_DEBUG; - svc_.log_print = 0; - - codec_iface_ = vpx_codec_vp9_cx(); - const vpx_codec_err_t res = - vpx_codec_enc_config_default(codec_iface_, &codec_enc_, 0); - EXPECT_EQ(VPX_CODEC_OK, res); - - codec_enc_.g_w = kWidth; - codec_enc_.g_h = kHeight; - codec_enc_.g_timebase.num = 1; - codec_enc_.g_timebase.den = 60; - codec_enc_.kf_min_dist = 100; - codec_enc_.kf_max_dist = 100; - - vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t(); - VP9CodecFactory codec_factory; - decoder_ = codec_factory.CreateDecoder(dec_cfg, 0); - - tile_columns_ = 0; - tile_rows_ = 0; - } - - virtual void TearDown() { - ReleaseEncoder(); - delete (decoder_); - } - - void InitializeEncoder() { - const vpx_codec_err_t res = - vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_OK, res); - vpx_codec_control(&codec_, VP8E_SET_CPUUSED, 4); // Make the test faster - vpx_codec_control(&codec_, VP9E_SET_TILE_COLUMNS, tile_columns_); - vpx_codec_control(&codec_, VP9E_SET_TILE_ROWS, tile_rows_); - codec_initialized_ = true; - } - - void ReleaseEncoder() { - vpx_svc_release(&svc_); - if (codec_initialized_) vpx_codec_destroy(&codec_); - codec_initialized_ = false; - } - - void GetStatsData(std::string *const stats_buf) { - vpx_codec_iter_t iter = NULL; - const vpx_codec_cx_pkt_t *cx_pkt; - - while ((cx_pkt = vpx_codec_get_cx_data(&codec_, &iter)) != NULL) { - if (cx_pkt->kind == VPX_CODEC_STATS_PKT) { - EXPECT_GT(cx_pkt->data.twopass_stats.sz, 0U); - ASSERT_TRUE(cx_pkt->data.twopass_stats.buf != NULL); - stats_buf->append(static_cast(cx_pkt->data.twopass_stats.buf), - cx_pkt->data.twopass_stats.sz); - } + superframe_count_++; + temporal_layer_id_ = 0; + if (number_temporal_layers_ == 2) { + temporal_layer_id_ = (superframe_count_ % 2 != 0); + } else if (number_temporal_layers_ == 3) { + if (superframe_count_ % 2 != 0) temporal_layer_id_ = 2; + if (superframe_count_ > 1) { + if ((superframe_count_ - 2) % 4 == 0) temporal_layer_id_ = 1; } } - void Pass1EncodeNFrames(const int n, const int layers, - std::string *const stats_buf) { - vpx_codec_err_t res; + frame_flags_ = 0; +} - ASSERT_GT(n, 0); - ASSERT_GT(layers, 0); - svc_.spatial_layers = layers; - codec_enc_.g_pass = VPX_RC_FIRST_PASS; - InitializeEncoder(); - - libvpx_test::I420VideoSource video( - test_file_name_, codec_enc_.g_w, codec_enc_.g_h, - codec_enc_.g_timebase.den, codec_enc_.g_timebase.num, 0, 30); - video.Begin(); - - for (int i = 0; i < n; ++i) { - res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), - video.duration(), VPX_DL_GOOD_QUALITY); - ASSERT_EQ(VPX_CODEC_OK, res); - GetStatsData(stats_buf); - video.Next(); - } - - // Flush encoder and test EOS packet. - res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(), video.duration(), - VPX_DL_GOOD_QUALITY); - ASSERT_EQ(VPX_CODEC_OK, res); - GetStatsData(stats_buf); - - ReleaseEncoder(); - } - - void StoreFrames(const size_t max_frame_received, - struct vpx_fixed_buf *const outputs, - size_t *const frame_received) { - vpx_codec_iter_t iter = NULL; - const vpx_codec_cx_pkt_t *cx_pkt; - - while ((cx_pkt = vpx_codec_get_cx_data(&codec_, &iter)) != NULL) { - if (cx_pkt->kind == VPX_CODEC_CX_FRAME_PKT) { - const size_t frame_size = cx_pkt->data.frame.sz; - - EXPECT_GT(frame_size, 0U); - ASSERT_TRUE(cx_pkt->data.frame.buf != NULL); - ASSERT_LT(*frame_received, max_frame_received); - - if (*frame_received == 0) - EXPECT_EQ(1, !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)); - - outputs[*frame_received].buf = malloc(frame_size + 16); - ASSERT_TRUE(outputs[*frame_received].buf != NULL); - memcpy(outputs[*frame_received].buf, cx_pkt->data.frame.buf, - frame_size); - outputs[*frame_received].sz = frame_size; - ++(*frame_received); - } +void OnePassCbrSvc::PostEncodeFrameHook(::libvpx_test::Encoder *encoder) { + vpx_svc_layer_id_t layer_id; + encoder->Control(VP9E_GET_SVC_LAYER_ID, &layer_id); + temporal_layer_id_ = layer_id.temporal_layer_id; + for (int sl = 0; sl < number_spatial_layers_; ++sl) { + for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) { + const int layer = sl * number_temporal_layers_ + tl; + bits_in_buffer_model_[layer] += + static_cast(layer_target_avg_bandwidth_[layer]); } } +} - void Pass2EncodeNFrames(std::string *const stats_buf, const int n, - const int layers, - struct vpx_fixed_buf *const outputs) { - vpx_codec_err_t res; - size_t frame_received = 0; - - ASSERT_TRUE(outputs != NULL); - ASSERT_GT(n, 0); - ASSERT_GT(layers, 0); - svc_.spatial_layers = layers; - codec_enc_.rc_target_bitrate = 500; - if (codec_enc_.g_pass == VPX_RC_LAST_PASS) { - ASSERT_TRUE(stats_buf != NULL); - ASSERT_GT(stats_buf->size(), 0U); - codec_enc_.rc_twopass_stats_in.buf = &(*stats_buf)[0]; - codec_enc_.rc_twopass_stats_in.sz = stats_buf->size(); - } - InitializeEncoder(); - - libvpx_test::I420VideoSource video( - test_file_name_, codec_enc_.g_w, codec_enc_.g_h, - codec_enc_.g_timebase.den, codec_enc_.g_timebase.num, 0, 30); - video.Begin(); - - for (int i = 0; i < n; ++i) { - res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), - video.duration(), VPX_DL_GOOD_QUALITY); - ASSERT_EQ(VPX_CODEC_OK, res); - StoreFrames(n, outputs, &frame_received); - video.Next(); - } - - // Flush encoder. - res = vpx_svc_encode(&svc_, &codec_, NULL, 0, video.duration(), - VPX_DL_GOOD_QUALITY); - EXPECT_EQ(VPX_CODEC_OK, res); - StoreFrames(n, outputs, &frame_received); - - EXPECT_EQ(frame_received, static_cast(n)); - - ReleaseEncoder(); - } - - void DecodeNFrames(const struct vpx_fixed_buf *const inputs, const int n) { - int decoded_frames = 0; - int received_frames = 0; - - ASSERT_TRUE(inputs != NULL); - ASSERT_GT(n, 0); - - for (int i = 0; i < n; ++i) { - ASSERT_TRUE(inputs[i].buf != NULL); - ASSERT_GT(inputs[i].sz, 0U); - const vpx_codec_err_t res_dec = decoder_->DecodeFrame( - static_cast(inputs[i].buf), inputs[i].sz); - ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); - ++decoded_frames; - - DxDataIterator dec_iter = decoder_->GetDxData(); - while (dec_iter.Next() != NULL) { - ++received_frames; - } - } - EXPECT_EQ(decoded_frames, n); - EXPECT_EQ(received_frames, n); - } - - void DropEnhancementLayers(struct vpx_fixed_buf *const inputs, - const int num_super_frames, - const int remained_spatial_layers) { - ASSERT_TRUE(inputs != NULL); - ASSERT_GT(num_super_frames, 0); - ASSERT_GT(remained_spatial_layers, 0); - - for (int i = 0; i < num_super_frames; ++i) { - uint32_t frame_sizes[8] = { 0 }; - int frame_count = 0; - int frames_found = 0; - int frame; - ASSERT_TRUE(inputs[i].buf != NULL); - ASSERT_GT(inputs[i].sz, 0U); - - vpx_codec_err_t res = vp9_parse_superframe_index( - static_cast(inputs[i].buf), inputs[i].sz, - frame_sizes, &frame_count, NULL, NULL); - ASSERT_EQ(VPX_CODEC_OK, res); - - if (frame_count == 0) { - // There's no super frame but only a single frame. - ASSERT_EQ(1, remained_spatial_layers); - } else { - // Found a super frame. - uint8_t *frame_data = static_cast(inputs[i].buf); - uint8_t *frame_start = frame_data; - for (frame = 0; frame < frame_count; ++frame) { - // Looking for a visible frame. - if (frame_data[0] & 0x02) { - ++frames_found; - if (frames_found == remained_spatial_layers) break; - } - frame_data += frame_sizes[frame]; - } - ASSERT_LT(frame, frame_count) - << "Couldn't find a visible frame. " - << "remained_spatial_layers: " << remained_spatial_layers - << " super_frame: " << i; - if (frame == frame_count - 1) continue; - - frame_data += frame_sizes[frame]; - - // We need to add one more frame for multiple frame contexts. - uint8_t marker = - static_cast(inputs[i].buf)[inputs[i].sz - 1]; - const uint32_t mag = ((marker >> 3) & 0x3) + 1; - const size_t index_sz = 2 + mag * frame_count; - const size_t new_index_sz = 2 + mag * (frame + 1); - marker &= 0x0f8; - marker |= frame; - - // Copy existing frame sizes. - memmove(frame_data + 1, frame_start + inputs[i].sz - index_sz + 1, - new_index_sz - 2); - // New marker. - frame_data[0] = marker; - frame_data += (mag * (frame + 1) + 1); - - *frame_data++ = marker; - inputs[i].sz = frame_data - frame_start; - } +void OnePassCbrSvc::AssignLayerBitrates() { + int sl, spatial_layer_target; + int spatial_layers = cfg_.ss_number_layers; + int temporal_layers = cfg_.ts_number_layers; + float total = 0; + float alloc_ratio[VPX_MAX_LAYERS] = { 0 }; + float framerate = 30.0; + for (sl = 0; sl < spatial_layers; ++sl) { + if (svc_params_.scaling_factor_den[sl] > 0) { + alloc_ratio[sl] = + static_cast((svc_params_.scaling_factor_num[sl] * 1.0 / + svc_params_.scaling_factor_den[sl])); + total += alloc_ratio[sl]; } } - - void FreeBitstreamBuffers(struct vpx_fixed_buf *const inputs, const int n) { - ASSERT_TRUE(inputs != NULL); - ASSERT_GT(n, 0); - - for (int i = 0; i < n; ++i) { - free(inputs[i].buf); - inputs[i].buf = NULL; - inputs[i].sz = 0; + for (sl = 0; sl < spatial_layers; ++sl) { + cfg_.ss_target_bitrate[sl] = spatial_layer_target = + static_cast(cfg_.rc_target_bitrate * alloc_ratio[sl] / + total); + const int index = sl * temporal_layers; + if (cfg_.temporal_layering_mode == 3) { + cfg_.layer_target_bitrate[index] = spatial_layer_target >> 1; + cfg_.layer_target_bitrate[index + 1] = + (spatial_layer_target >> 1) + (spatial_layer_target >> 2); + cfg_.layer_target_bitrate[index + 2] = spatial_layer_target; + } else if (cfg_.temporal_layering_mode == 2) { + cfg_.layer_target_bitrate[index] = spatial_layer_target * 2 / 3; + cfg_.layer_target_bitrate[index + 1] = spatial_layer_target; + } else if (cfg_.temporal_layering_mode <= 1) { + cfg_.layer_target_bitrate[index] = spatial_layer_target; + } + } + for (sl = 0; sl < spatial_layers; ++sl) { + for (int tl = 0; tl < temporal_layers; ++tl) { + const int layer = sl * temporal_layers + tl; + float layer_framerate = framerate; + if (temporal_layers == 2 && tl == 0) layer_framerate = framerate / 2; + if (temporal_layers == 3 && tl == 0) layer_framerate = framerate / 4; + if (temporal_layers == 3 && tl == 1) layer_framerate = framerate / 2; + layer_target_avg_bandwidth_[layer] = static_cast( + cfg_.layer_target_bitrate[layer] * 1000.0 / layer_framerate); + bits_in_buffer_model_[layer] = + cfg_.layer_target_bitrate[layer] * cfg_.rc_buf_initial_sz; } } - - SvcContext svc_; - vpx_codec_ctx_t codec_; - struct vpx_codec_enc_cfg codec_enc_; - vpx_codec_iface_t *codec_iface_; - std::string test_file_name_; - bool codec_initialized_; - Decoder *decoder_; - int tile_columns_; - int tile_rows_; -}; - -TEST_F(SvcTest, SvcInit) { - // test missing parameters - vpx_codec_err_t res = vpx_svc_init(NULL, &codec_, codec_iface_, &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - res = vpx_svc_init(&svc_, NULL, codec_iface_, &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - res = vpx_svc_init(&svc_, &codec_, NULL, &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - res = vpx_svc_init(&svc_, &codec_, codec_iface_, NULL); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - svc_.spatial_layers = 6; // too many layers - res = vpx_svc_init(&svc_, &codec_, codec_iface_, &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - svc_.spatial_layers = 0; // use default layers - InitializeEncoder(); - EXPECT_EQ(VPX_SS_DEFAULT_LAYERS, svc_.spatial_layers); } - -TEST_F(SvcTest, InitTwoLayers) { - svc_.spatial_layers = 2; - InitializeEncoder(); -} - -TEST_F(SvcTest, InvalidOptions) { - vpx_codec_err_t res = vpx_svc_set_options(&svc_, NULL); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - res = vpx_svc_set_options(&svc_, "not-an-option=1"); - EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); -} - -TEST_F(SvcTest, SetLayersOption) { - vpx_codec_err_t res = vpx_svc_set_options(&svc_, "spatial-layers=3"); - EXPECT_EQ(VPX_CODEC_OK, res); - InitializeEncoder(); - EXPECT_EQ(3, svc_.spatial_layers); -} - -TEST_F(SvcTest, SetMultipleOptions) { - vpx_codec_err_t res = - vpx_svc_set_options(&svc_, "spatial-layers=2 scale-factors=1/3,2/3"); - EXPECT_EQ(VPX_CODEC_OK, res); - InitializeEncoder(); - EXPECT_EQ(2, svc_.spatial_layers); -} - -TEST_F(SvcTest, SetScaleFactorsOption) { - svc_.spatial_layers = 2; - vpx_codec_err_t res = - vpx_svc_set_options(&svc_, "scale-factors=not-scale-factors"); - EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - res = vpx_svc_set_options(&svc_, "scale-factors=1/3, 3*3"); - EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - res = vpx_svc_set_options(&svc_, "scale-factors=1/3"); - EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - res = vpx_svc_set_options(&svc_, "scale-factors=1/3,2/3"); - EXPECT_EQ(VPX_CODEC_OK, res); - InitializeEncoder(); -} - -TEST_F(SvcTest, SetQuantizersOption) { - svc_.spatial_layers = 2; - vpx_codec_err_t res = vpx_svc_set_options(&svc_, "max-quantizers=nothing"); - EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - res = vpx_svc_set_options(&svc_, "min-quantizers=nothing"); - EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - res = vpx_svc_set_options(&svc_, "max-quantizers=40"); - EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - res = vpx_svc_set_options(&svc_, "min-quantizers=40"); - EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - res = vpx_svc_set_options(&svc_, "max-quantizers=30,30 min-quantizers=40,40"); - EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - res = vpx_svc_set_options(&svc_, "max-quantizers=40,40 min-quantizers=30,30"); - InitializeEncoder(); -} - -TEST_F(SvcTest, SetAutoAltRefOption) { - svc_.spatial_layers = 5; - vpx_codec_err_t res = vpx_svc_set_options(&svc_, "auto-alt-refs=none"); - EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - res = vpx_svc_set_options(&svc_, "auto-alt-refs=1,1,1,1,0"); - EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - vpx_svc_set_options(&svc_, "auto-alt-refs=0,1,1,1,0"); - InitializeEncoder(); -} - -// Test that decoder can handle an SVC frame as the first frame in a sequence. -TEST_F(SvcTest, OnePassEncodeOneFrame) { - codec_enc_.g_pass = VPX_RC_ONE_PASS; - vpx_fixed_buf output = vpx_fixed_buf(); - Pass2EncodeNFrames(NULL, 1, 2, &output); - DecodeNFrames(&output, 1); - FreeBitstreamBuffers(&output, 1); -} - -TEST_F(SvcTest, OnePassEncodeThreeFrames) { - codec_enc_.g_pass = VPX_RC_ONE_PASS; - codec_enc_.g_lag_in_frames = 0; - vpx_fixed_buf outputs[3]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(NULL, 3, 2, &outputs[0]); - DecodeNFrames(&outputs[0], 3); - FreeBitstreamBuffers(&outputs[0], 3); -} - -TEST_F(SvcTest, TwoPassEncode10Frames) { - // First pass encode - std::string stats_buf; - Pass1EncodeNFrames(10, 2, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - vpx_fixed_buf outputs[10]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); - DecodeNFrames(&outputs[0], 10); - FreeBitstreamBuffers(&outputs[0], 10); -} - -TEST_F(SvcTest, TwoPassEncode20FramesWithAltRef) { - // First pass encode - std::string stats_buf; - Pass1EncodeNFrames(20, 2, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - vpx_svc_set_options(&svc_, "auto-alt-refs=1,1"); - vpx_fixed_buf outputs[20]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 20, 2, &outputs[0]); - DecodeNFrames(&outputs[0], 20); - FreeBitstreamBuffers(&outputs[0], 20); -} - -TEST_F(SvcTest, TwoPassEncode2SpatialLayersDecodeBaseLayerOnly) { - // First pass encode - std::string stats_buf; - Pass1EncodeNFrames(10, 2, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - vpx_svc_set_options(&svc_, "auto-alt-refs=1,1"); - vpx_fixed_buf outputs[10]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); - DropEnhancementLayers(&outputs[0], 10, 1); - DecodeNFrames(&outputs[0], 10); - FreeBitstreamBuffers(&outputs[0], 10); -} - -TEST_F(SvcTest, TwoPassEncode5SpatialLayersDecode54321Layers) { - // First pass encode - std::string stats_buf; - Pass1EncodeNFrames(10, 5, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - vpx_svc_set_options(&svc_, "auto-alt-refs=0,1,1,1,0"); - vpx_fixed_buf outputs[10]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 10, 5, &outputs[0]); - - DecodeNFrames(&outputs[0], 10); - DropEnhancementLayers(&outputs[0], 10, 4); - DecodeNFrames(&outputs[0], 10); - DropEnhancementLayers(&outputs[0], 10, 3); - DecodeNFrames(&outputs[0], 10); - DropEnhancementLayers(&outputs[0], 10, 2); - DecodeNFrames(&outputs[0], 10); - DropEnhancementLayers(&outputs[0], 10, 1); - DecodeNFrames(&outputs[0], 10); - - FreeBitstreamBuffers(&outputs[0], 10); -} - -TEST_F(SvcTest, TwoPassEncode2SNRLayers) { - // First pass encode - std::string stats_buf; - vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1"); - Pass1EncodeNFrames(20, 2, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 scale-factors=1/1,1/1"); - vpx_fixed_buf outputs[20]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 20, 2, &outputs[0]); - DecodeNFrames(&outputs[0], 20); - FreeBitstreamBuffers(&outputs[0], 20); -} - -TEST_F(SvcTest, TwoPassEncode3SNRLayersDecode321Layers) { - // First pass encode - std::string stats_buf; - vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1,1/1"); - Pass1EncodeNFrames(20, 3, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - vpx_svc_set_options(&svc_, "auto-alt-refs=1,1,1 scale-factors=1/1,1/1,1/1"); - vpx_fixed_buf outputs[20]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 20, 3, &outputs[0]); - DecodeNFrames(&outputs[0], 20); - DropEnhancementLayers(&outputs[0], 20, 2); - DecodeNFrames(&outputs[0], 20); - DropEnhancementLayers(&outputs[0], 20, 1); - DecodeNFrames(&outputs[0], 20); - - FreeBitstreamBuffers(&outputs[0], 20); -} - -TEST_F(SvcTest, SetMultipleFrameContextsOption) { - svc_.spatial_layers = 5; - vpx_codec_err_t res = vpx_svc_set_options(&svc_, "multi-frame-contexts=1"); - EXPECT_EQ(VPX_CODEC_OK, res); - res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); - EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); - - svc_.spatial_layers = 2; - res = vpx_svc_set_options(&svc_, "multi-frame-contexts=1"); - InitializeEncoder(); -} - -TEST_F(SvcTest, TwoPassEncode2SpatialLayersWithMultipleFrameContexts) { - // First pass encode - std::string stats_buf; - Pass1EncodeNFrames(10, 2, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - codec_enc_.g_error_resilient = 0; - vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 multi-frame-contexts=1"); - vpx_fixed_buf outputs[10]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); - DecodeNFrames(&outputs[0], 10); - FreeBitstreamBuffers(&outputs[0], 10); -} - -TEST_F(SvcTest, - TwoPassEncode2SpatialLayersWithMultipleFrameContextsDecodeBaselayer) { - // First pass encode - std::string stats_buf; - Pass1EncodeNFrames(10, 2, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - codec_enc_.g_error_resilient = 0; - vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 multi-frame-contexts=1"); - vpx_fixed_buf outputs[10]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); - DropEnhancementLayers(&outputs[0], 10, 1); - DecodeNFrames(&outputs[0], 10); - FreeBitstreamBuffers(&outputs[0], 10); -} - -TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContexts) { - // First pass encode - std::string stats_buf; - vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1"); - Pass1EncodeNFrames(10, 2, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - codec_enc_.g_error_resilient = 0; - vpx_svc_set_options(&svc_, - "auto-alt-refs=1,1 scale-factors=1/1,1/1 " - "multi-frame-contexts=1"); - vpx_fixed_buf outputs[10]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); - DecodeNFrames(&outputs[0], 10); - FreeBitstreamBuffers(&outputs[0], 10); -} - -TEST_F(SvcTest, - TwoPassEncode3SNRLayersWithMultipleFrameContextsDecode321Layer) { - // First pass encode - std::string stats_buf; - vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1,1/1"); - Pass1EncodeNFrames(10, 3, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - codec_enc_.g_error_resilient = 0; - vpx_svc_set_options(&svc_, - "auto-alt-refs=1,1,1 scale-factors=1/1,1/1,1/1 " - "multi-frame-contexts=1"); - vpx_fixed_buf outputs[10]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 10, 3, &outputs[0]); - - DecodeNFrames(&outputs[0], 10); - DropEnhancementLayers(&outputs[0], 10, 2); - DecodeNFrames(&outputs[0], 10); - DropEnhancementLayers(&outputs[0], 10, 1); - DecodeNFrames(&outputs[0], 10); - - FreeBitstreamBuffers(&outputs[0], 10); -} - -TEST_F(SvcTest, TwoPassEncode2TemporalLayers) { - // First pass encode - std::string stats_buf; - vpx_svc_set_options(&svc_, "scale-factors=1/1"); - svc_.temporal_layers = 2; - Pass1EncodeNFrames(10, 1, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - svc_.temporal_layers = 2; - vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1"); - vpx_fixed_buf outputs[10]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]); - DecodeNFrames(&outputs[0], 10); - FreeBitstreamBuffers(&outputs[0], 10); -} - -TEST_F(SvcTest, TwoPassEncode2TemporalLayersWithMultipleFrameContexts) { - // First pass encode - std::string stats_buf; - vpx_svc_set_options(&svc_, "scale-factors=1/1"); - svc_.temporal_layers = 2; - Pass1EncodeNFrames(10, 1, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - svc_.temporal_layers = 2; - codec_enc_.g_error_resilient = 0; - vpx_svc_set_options(&svc_, - "auto-alt-refs=1 scale-factors=1/1 " - "multi-frame-contexts=1"); - vpx_fixed_buf outputs[10]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]); - DecodeNFrames(&outputs[0], 10); - FreeBitstreamBuffers(&outputs[0], 10); -} - -TEST_F(SvcTest, TwoPassEncode2TemporalLayersDecodeBaseLayer) { - // First pass encode - std::string stats_buf; - vpx_svc_set_options(&svc_, "scale-factors=1/1"); - svc_.temporal_layers = 2; - Pass1EncodeNFrames(10, 1, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - svc_.temporal_layers = 2; - vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1"); - vpx_fixed_buf outputs[10]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]); - - vpx_fixed_buf base_layer[5]; - for (int i = 0; i < 5; ++i) base_layer[i] = outputs[i * 2]; - - DecodeNFrames(&base_layer[0], 5); - FreeBitstreamBuffers(&outputs[0], 10); -} - -TEST_F(SvcTest, - TwoPassEncode2TemporalLayersWithMultipleFrameContextsDecodeBaseLayer) { - // First pass encode - std::string stats_buf; - vpx_svc_set_options(&svc_, "scale-factors=1/1"); - svc_.temporal_layers = 2; - Pass1EncodeNFrames(10, 1, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - svc_.temporal_layers = 2; - codec_enc_.g_error_resilient = 0; - vpx_svc_set_options(&svc_, - "auto-alt-refs=1 scale-factors=1/1 " - "multi-frame-contexts=1"); - vpx_fixed_buf outputs[10]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]); - - vpx_fixed_buf base_layer[5]; - for (int i = 0; i < 5; ++i) base_layer[i] = outputs[i * 2]; - - DecodeNFrames(&base_layer[0], 5); - FreeBitstreamBuffers(&outputs[0], 10); -} - -TEST_F(SvcTest, TwoPassEncode2TemporalLayersWithTiles) { - // First pass encode - std::string stats_buf; - vpx_svc_set_options(&svc_, "scale-factors=1/1"); - svc_.temporal_layers = 2; - Pass1EncodeNFrames(10, 1, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - svc_.temporal_layers = 2; - vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1"); - codec_enc_.g_w = 704; - codec_enc_.g_h = 144; - tile_columns_ = 1; - tile_rows_ = 1; - vpx_fixed_buf outputs[10]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]); - DecodeNFrames(&outputs[0], 10); - FreeBitstreamBuffers(&outputs[0], 10); -} - -TEST_F(SvcTest, TwoPassEncode2TemporalLayersWithMultipleFrameContextsAndTiles) { - // First pass encode - std::string stats_buf; - vpx_svc_set_options(&svc_, "scale-factors=1/1"); - svc_.temporal_layers = 2; - Pass1EncodeNFrames(10, 1, &stats_buf); - - // Second pass encode - codec_enc_.g_pass = VPX_RC_LAST_PASS; - svc_.temporal_layers = 2; - codec_enc_.g_error_resilient = 0; - codec_enc_.g_w = 704; - codec_enc_.g_h = 144; - tile_columns_ = 1; - tile_rows_ = 1; - vpx_svc_set_options(&svc_, - "auto-alt-refs=1 scale-factors=1/1 " - "multi-frame-contexts=1"); - vpx_fixed_buf outputs[10]; - memset(&outputs[0], 0, sizeof(outputs)); - Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]); - DecodeNFrames(&outputs[0], 10); - FreeBitstreamBuffers(&outputs[0], 10); -} - -} // namespace +} // namespace svc_test diff --git a/media/libvpx/libvpx/test/svc_test.h b/media/libvpx/libvpx/test/svc_test.h new file mode 100644 index 000000000000..f1d727fd9deb --- /dev/null +++ b/media/libvpx/libvpx/test/svc_test.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_TEST_SVC_TEST_H_ +#define VPX_TEST_SVC_TEST_H_ + +#include "./vpx_config.h" +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "vpx/vpx_codec.h" +#include "vpx_ports/bitops.h" + +namespace svc_test { +class OnePassCbrSvc : public ::libvpx_test::EncoderTest { + public: + explicit OnePassCbrSvc(const ::libvpx_test::CodecFactory *codec) + : EncoderTest(codec), base_speed_setting_(0), speed_setting_(0), + superframe_count_(0), temporal_layer_id_(0), number_temporal_layers_(0), + number_spatial_layers_(0) { + memset(&svc_params_, 0, sizeof(svc_params_)); + memset(bits_in_buffer_model_, 0, + sizeof(bits_in_buffer_model_[0]) * VPX_MAX_LAYERS); + memset(layer_target_avg_bandwidth_, 0, + sizeof(layer_target_avg_bandwidth_[0]) * VPX_MAX_LAYERS); + } + + protected: + virtual ~OnePassCbrSvc() {} + + virtual void SetConfig(const int num_temporal_layer) = 0; + + virtual void SetSvcConfig(const int num_spatial_layer, + const int num_temporal_layer); + + virtual void PreEncodeFrameHookSetup(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder); + + virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder); + + virtual void AssignLayerBitrates(); + + virtual void MismatchHook(const vpx_image_t *, const vpx_image_t *) {} + + vpx_svc_extra_cfg_t svc_params_; + int64_t bits_in_buffer_model_[VPX_MAX_LAYERS]; + int layer_target_avg_bandwidth_[VPX_MAX_LAYERS]; + int base_speed_setting_; + int speed_setting_; + int superframe_count_; + int temporal_layer_id_; + int number_temporal_layers_; + int number_spatial_layers_; +}; +} // namespace svc_test + +#endif // VPX_TEST_SVC_TEST_H_ diff --git a/media/libvpx/libvpx/test/temporal_filter_test.cc b/media/libvpx/libvpx/test/temporal_filter_test.cc deleted file mode 100644 index 655a36be9a4c..000000000000 --- a/media/libvpx/libvpx/test/temporal_filter_test.cc +++ /dev/null @@ -1,277 +0,0 @@ -/* - * Copyright (c) 2016 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "third_party/googletest/src/include/gtest/gtest.h" - -#include "./vp9_rtcd.h" -#include "test/acm_random.h" -#include "test/buffer.h" -#include "test/register_state_check.h" -#include "vpx_ports/vpx_timer.h" - -namespace { - -using ::libvpx_test::ACMRandom; -using ::libvpx_test::Buffer; - -typedef void (*TemporalFilterFunc)(const uint8_t *a, unsigned int stride, - const uint8_t *b, unsigned int w, - unsigned int h, int filter_strength, - int filter_weight, unsigned int *accumulator, - uint16_t *count); - -// Calculate the difference between 'a' and 'b', sum in blocks of 9, and apply -// filter based on strength and weight. Store the resulting filter amount in -// 'count' and apply it to 'b' and store it in 'accumulator'. -void reference_filter(const Buffer &a, const Buffer &b, int w, - int h, int filter_strength, int filter_weight, - Buffer *accumulator, - Buffer *count) { - Buffer diff_sq = Buffer(w, h, 0); - ASSERT_TRUE(diff_sq.Init()); - diff_sq.Set(0); - - int rounding = 0; - if (filter_strength > 0) { - rounding = 1 << (filter_strength - 1); - } - - // Calculate all the differences. Avoids re-calculating a bunch of extra - // values. - for (int height = 0; height < h; ++height) { - for (int width = 0; width < w; ++width) { - int diff = a.TopLeftPixel()[height * a.stride() + width] - - b.TopLeftPixel()[height * b.stride() + width]; - diff_sq.TopLeftPixel()[height * diff_sq.stride() + width] = diff * diff; - } - } - - // For any given point, sum the neighboring values and calculate the - // modifier. - for (int height = 0; height < h; ++height) { - for (int width = 0; width < w; ++width) { - // Determine how many values are being summed. - int summed_values = 9; - - if (height == 0 || height == (h - 1)) { - summed_values -= 3; - } - - if (width == 0 || width == (w - 1)) { - if (summed_values == 6) { // corner - summed_values -= 2; - } else { - summed_values -= 3; - } - } - - // Sum the diff_sq of the surrounding values. - int sum = 0; - for (int idy = -1; idy <= 1; ++idy) { - for (int idx = -1; idx <= 1; ++idx) { - const int y = height + idy; - const int x = width + idx; - - // If inside the border. - if (y >= 0 && y < h && x >= 0 && x < w) { - sum += diff_sq.TopLeftPixel()[y * diff_sq.stride() + x]; - } - } - } - - sum *= 3; - sum /= summed_values; - sum += rounding; - sum >>= filter_strength; - - // Clamp the value and invert it. - if (sum > 16) sum = 16; - sum = 16 - sum; - - sum *= filter_weight; - - count->TopLeftPixel()[height * count->stride() + width] += sum; - accumulator->TopLeftPixel()[height * accumulator->stride() + width] += - sum * b.TopLeftPixel()[height * b.stride() + width]; - } - } -} - -class TemporalFilterTest : public ::testing::TestWithParam { - public: - virtual void SetUp() { - filter_func_ = GetParam(); - rnd_.Reset(ACMRandom::DeterministicSeed()); - } - - protected: - TemporalFilterFunc filter_func_; - ACMRandom rnd_; -}; - -TEST_P(TemporalFilterTest, SizeCombinations) { - // Depending on subsampling this function may be called with values of 8 or 16 - // for width and height, in any combination. - Buffer a = Buffer(16, 16, 8); - ASSERT_TRUE(a.Init()); - - const int filter_weight = 2; - const int filter_strength = 6; - - for (int width = 8; width <= 16; width += 8) { - for (int height = 8; height <= 16; height += 8) { - // The second buffer must not have any border. - Buffer b = Buffer(width, height, 0); - ASSERT_TRUE(b.Init()); - Buffer accum_ref = Buffer(width, height, 0); - ASSERT_TRUE(accum_ref.Init()); - Buffer accum_chk = Buffer(width, height, 0); - ASSERT_TRUE(accum_chk.Init()); - Buffer count_ref = Buffer(width, height, 0); - ASSERT_TRUE(count_ref.Init()); - Buffer count_chk = Buffer(width, height, 0); - ASSERT_TRUE(count_chk.Init()); - - // The difference between the buffers must be small to pass the threshold - // to apply the filter. - a.Set(&rnd_, 0, 7); - b.Set(&rnd_, 0, 7); - - accum_ref.Set(rnd_.Rand8()); - accum_chk.CopyFrom(accum_ref); - count_ref.Set(rnd_.Rand8()); - count_chk.CopyFrom(count_ref); - reference_filter(a, b, width, height, filter_strength, filter_weight, - &accum_ref, &count_ref); - ASM_REGISTER_STATE_CHECK( - filter_func_(a.TopLeftPixel(), a.stride(), b.TopLeftPixel(), width, - height, filter_strength, filter_weight, - accum_chk.TopLeftPixel(), count_chk.TopLeftPixel())); - EXPECT_TRUE(accum_chk.CheckValues(accum_ref)); - EXPECT_TRUE(count_chk.CheckValues(count_ref)); - if (HasFailure()) { - printf("Width: %d Height: %d\n", width, height); - count_chk.PrintDifference(count_ref); - accum_chk.PrintDifference(accum_ref); - return; - } - } - } -} - -TEST_P(TemporalFilterTest, CompareReferenceRandom) { - for (int width = 8; width <= 16; width += 8) { - for (int height = 8; height <= 16; height += 8) { - Buffer a = Buffer(width, height, 8); - ASSERT_TRUE(a.Init()); - // The second buffer must not have any border. - Buffer b = Buffer(width, height, 0); - ASSERT_TRUE(b.Init()); - Buffer accum_ref = Buffer(width, height, 0); - ASSERT_TRUE(accum_ref.Init()); - Buffer accum_chk = Buffer(width, height, 0); - ASSERT_TRUE(accum_chk.Init()); - Buffer count_ref = Buffer(width, height, 0); - ASSERT_TRUE(count_ref.Init()); - Buffer count_chk = Buffer(width, height, 0); - ASSERT_TRUE(count_chk.Init()); - - for (int filter_strength = 0; filter_strength <= 6; ++filter_strength) { - for (int filter_weight = 0; filter_weight <= 2; ++filter_weight) { - for (int repeat = 0; repeat < 100; ++repeat) { - if (repeat < 50) { - a.Set(&rnd_, 0, 7); - b.Set(&rnd_, 0, 7); - } else { - // Check large (but close) values as well. - a.Set(&rnd_, std::numeric_limits::max() - 7, - std::numeric_limits::max()); - b.Set(&rnd_, std::numeric_limits::max() - 7, - std::numeric_limits::max()); - } - - accum_ref.Set(rnd_.Rand8()); - accum_chk.CopyFrom(accum_ref); - count_ref.Set(rnd_.Rand8()); - count_chk.CopyFrom(count_ref); - reference_filter(a, b, width, height, filter_strength, - filter_weight, &accum_ref, &count_ref); - ASM_REGISTER_STATE_CHECK(filter_func_( - a.TopLeftPixel(), a.stride(), b.TopLeftPixel(), width, height, - filter_strength, filter_weight, accum_chk.TopLeftPixel(), - count_chk.TopLeftPixel())); - EXPECT_TRUE(accum_chk.CheckValues(accum_ref)); - EXPECT_TRUE(count_chk.CheckValues(count_ref)); - if (HasFailure()) { - printf("Weight: %d Strength: %d\n", filter_weight, - filter_strength); - count_chk.PrintDifference(count_ref); - accum_chk.PrintDifference(accum_ref); - return; - } - } - } - } - } - } -} - -TEST_P(TemporalFilterTest, DISABLED_Speed) { - Buffer a = Buffer(16, 16, 8); - ASSERT_TRUE(a.Init()); - - const int filter_weight = 2; - const int filter_strength = 6; - - for (int width = 8; width <= 16; width += 8) { - for (int height = 8; height <= 16; height += 8) { - // The second buffer must not have any border. - Buffer b = Buffer(width, height, 0); - ASSERT_TRUE(b.Init()); - Buffer accum_ref = Buffer(width, height, 0); - ASSERT_TRUE(accum_ref.Init()); - Buffer accum_chk = Buffer(width, height, 0); - ASSERT_TRUE(accum_chk.Init()); - Buffer count_ref = Buffer(width, height, 0); - ASSERT_TRUE(count_ref.Init()); - Buffer count_chk = Buffer(width, height, 0); - ASSERT_TRUE(count_chk.Init()); - - a.Set(&rnd_, 0, 7); - b.Set(&rnd_, 0, 7); - - accum_chk.Set(0); - count_chk.Set(0); - - vpx_usec_timer timer; - vpx_usec_timer_start(&timer); - for (int i = 0; i < 10000; ++i) { - filter_func_(a.TopLeftPixel(), a.stride(), b.TopLeftPixel(), width, - height, filter_strength, filter_weight, - accum_chk.TopLeftPixel(), count_chk.TopLeftPixel()); - } - vpx_usec_timer_mark(&timer); - const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); - printf("Temporal filter %dx%d time: %5d us\n", width, height, - elapsed_time); - } - } -} - -INSTANTIATE_TEST_CASE_P(C, TemporalFilterTest, - ::testing::Values(&vp9_temporal_filter_apply_c)); - -#if HAVE_SSE4_1 -INSTANTIATE_TEST_CASE_P(SSE4_1, TemporalFilterTest, - ::testing::Values(&vp9_temporal_filter_apply_sse4_1)); -#endif // HAVE_SSE4_1 -} // namespace diff --git a/media/libvpx/libvpx/test/test-data.mk b/media/libvpx/libvpx/test/test-data.mk index f405e4ef14f2..905f0138e64f 100644 --- a/media/libvpx/libvpx/test/test-data.mk +++ b/media/libvpx/libvpx/test/test-data.mk @@ -3,14 +3,16 @@ LIBVPX_TEST_SRCS-yes += test-data.mk # Encoder test source LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += desktop_office1.1280_720-020.yuv +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += slides_code_term_web_plot.1920_1080.yuv -LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420.y4m -LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422.y4m -LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444.y4m +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420_20f.y4m +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422_20f.y4m +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444_20f.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_440.yuv -LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m -LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m -LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420_20f.y4m +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422_20f.y4m +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444_20f.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_440.yuv LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420_a10-1.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m @@ -24,6 +26,7 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += noisy_clip_640_360.y4m LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_640_480_30.yuv +LIBVPX_TEST_DATA-$(CONFIG_RATE_CTRL) += bus_352x288_420_f20_b8.yuv # Test vectors LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf @@ -734,8 +737,12 @@ endif # CONFIG_VP9_HIGHBITDEPTH # Invalid files for testing libvpx error checking. LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf.res +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-token-partition.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-token-partition.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf.res +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm @@ -783,8 +790,13 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-2.web LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-3.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-629481.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-629481.webm.res +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-1558.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-1558.ivf.res +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-1562.ivf +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-1562.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-667044.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-667044.webm.res +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += crbug-1539.rawfile ifeq ($(CONFIG_DECODE_PERF_TESTS),yes) # Encode / Decode test diff --git a/media/libvpx/libvpx/test/test-data.sha1 b/media/libvpx/libvpx/test/test-data.sha1 index 99b4e1e4654b..8f0084c47052 100644 --- a/media/libvpx/libvpx/test/test-data.sha1 +++ b/media/libvpx/libvpx/test/test-data.sha1 @@ -1,3 +1,4 @@ +3eaf216d9fc8b4b9bb8c3956311f49a85974806c *bus_352x288_420_f20_b8.yuv d5dfb0151c9051f8c85999255645d7a23916d3c0 *hantro_collage_w352h288.yuv b87815bf86020c592ccc7a846ba2e28ec8043902 *hantro_odd.yuv 76024eb753cdac6a5e5703aaea189d35c3c30ac7 *invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf @@ -17,13 +18,13 @@ df1a1453feb3c00d7d89746c7003b4163523bff3 *invalid-vp90-03-v3.webm d637297561dd904eb2c97a9015deeb31c4a1e8d2 *invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm 3a204bdbeaa3c6458b77bcebb8366d107267f55d *invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm.res 9aa21d8b2cb9d39abe8a7bb6032dc66955fb4342 *noisy_clip_640_360.y4m -a432f96ff0a787268e2f94a8092ab161a18d1b06 *park_joy_90p_10_420.y4m -0b194cc312c3a2e84d156a221b0a5eb615dfddc5 *park_joy_90p_10_422.y4m -ff0e0a21dc2adc95b8c1b37902713700655ced17 *park_joy_90p_10_444.y4m +0936b837708ae68c034719f8e07596021c2c214f *park_joy_90p_10_420_20f.y4m +5727a853c083c1099f837d27967bc1322d50ed4f *park_joy_90p_10_422_20f.y4m +e13489470ef8e8b2a871a5640d795a42a39be58d *park_joy_90p_10_444_20f.y4m c934da6fb8cc54ee2a8c17c54cf6076dac37ead0 *park_joy_90p_10_440.yuv -614c32ae1eca391e867c70d19974f0d62664dd99 *park_joy_90p_12_420.y4m -c92825f1ea25c5c37855083a69faac6ac4641a9e *park_joy_90p_12_422.y4m -b592189b885b6cc85db55cc98512a197d73d3b34 *park_joy_90p_12_444.y4m +79b0dc1784635a7f291e21c4e8d66a29c496ab99 *park_joy_90p_12_420_20f.y4m +9cf22b0f809f7464c8b9058f0cfa9d905921cbd1 *park_joy_90p_12_422_20f.y4m +22b2a4abaecc4a9ade6bb503d25fb82367947e85 *park_joy_90p_12_444_20f.y4m 82c1bfcca368c2f22bad7d693d690d5499ecdd11 *park_joy_90p_12_440.yuv b9e1e90aece2be6e2c90d89e6ab2372d5f8c792d *park_joy_90p_8_420_a10-1.y4m 4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c *park_joy_90p_8_420.y4m @@ -852,5 +853,16 @@ e402cbbf9e550ae017a1e9f1f73931c1d18474e8 *invalid-crbug-667044.webm d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-crbug-667044.webm.res fd9df7f3f6992af1d7a9dde975c9a0d6f28c053d *invalid-bug-1443.ivf fd3020fa6e9ca5966206738654c97dec313b0a95 *invalid-bug-1443.ivf.res +1a0e405606939f2febab1a21b30c37cb8f2c8cb1 *invalid-token-partition.ivf +90a8a95e7024f015b87f5483a65036609b3d1b74 *invalid-token-partition.ivf.res 17696cd21e875f1d6e5d418cbf89feab02c8850a *vp90-2-22-svc_1280x720_1.webm e2f9e1e47a791b4e939a9bdc50bf7a25b3761f77 *vp90-2-22-svc_1280x720_1.webm.md5 +a0fbbbc5dd50fd452096f4455a58c1a8c9f66697 *invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf +a61774cf03fc584bd9f0904fc145253bb8ea6c4c *invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf.res +894fae3afee0290546590823974203ab4b8abd95 *crbug-1539.rawfile +f1026c03efd5da21b381c8eb21f0d64e6d7e4ba3 *invalid-crbug-1558.ivf +eb198c25f861c3fe2cbd310de11eb96843019345 *invalid-crbug-1558.ivf.res +c62b005a9fd32c36a1b3f67de6840330f9915e34 *invalid-crbug-1562.ivf +f0cd8389948ad16085714d96567612136f6a46c5 *invalid-crbug-1562.ivf.res +bac455906360b45338a16dd626ac5f19bc36a307 *desktop_office1.1280_720-020.yuv +094be4b80fa30bd227149ea16ab6476d549ea092 *slides_code_term_web_plot.1920_1080.yuv diff --git a/media/libvpx/libvpx/test/test.mk b/media/libvpx/libvpx/test/test.mk index a3716be60c43..b4a5ea0cb365 100644 --- a/media/libvpx/libvpx/test/test.mk +++ b/media/libvpx/libvpx/test/test.mk @@ -1,4 +1,6 @@ LIBVPX_TEST_SRCS-yes += acm_random.h +LIBVPX_TEST_SRCS-yes += bench.h +LIBVPX_TEST_SRCS-yes += bench.cc LIBVPX_TEST_SRCS-yes += buffer.h LIBVPX_TEST_SRCS-yes += clear_system_state.h LIBVPX_TEST_SRCS-yes += codec_factory.h @@ -22,7 +24,8 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += ../y4minput.h ../y4minput.c LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += altref_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += aq_segment_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += alt_ref_aq_segment_test.cc -LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += datarate_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += vp8_datarate_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += vp9_datarate_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += encode_api_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h @@ -46,9 +49,16 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_end_to_end_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += decode_corrupted.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ethread_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_motion_vector_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += level_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_datarate_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_test.h +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_end_to_end_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += timestamp_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_RATE_CTRL) += simple_encode_test.cc LIBVPX_TEST_SRCS-yes += decode_test_driver.cc LIBVPX_TEST_SRCS-yes += decode_test_driver.h @@ -67,6 +77,7 @@ LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.cc LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.cc LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.h LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.h +LIBWEBM_PARSER_SRCS += ../third_party/libwebm/common/webmids.h LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += $(LIBWEBM_PARSER_SRCS) LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../tools_common.h LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.cc @@ -161,7 +172,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += hadamard_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_scale_test.cc ifneq ($(CONFIG_REALTIME_ONLY),yes) -LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += temporal_filter_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += yuv_temporal_filter_test.cc endif LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_block_error_test.cc @@ -169,11 +180,14 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc ifeq ($(CONFIG_VP9_ENCODER),yes) -LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += blockiness_test.cc LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += consistency_test.cc endif +ifeq ($(CONFIG_VP9_ENCODER),yes) +LIBVPX_TEST_SRCS-$(CONFIG_NON_GREEDY_MV) += non_greedy_mv_test.cc +endif + ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes) LIBVPX_TEST_SRCS-yes += vp9_denoiser_test.cc endif diff --git a/media/libvpx/libvpx/test/test_intra_pred_speed.cc b/media/libvpx/libvpx/test/test_intra_pred_speed.cc index 1cdeda410a78..0be9feefd99e 100644 --- a/media/libvpx/libvpx/test/test_intra_pred_speed.cc +++ b/media/libvpx/libvpx/test/test_intra_pred_speed.cc @@ -313,6 +313,8 @@ INTRA_PRED_TEST(MSA, TestIntraPred32, vpx_dc_predictor_32x32_msa, #endif // HAVE_MSA #if HAVE_VSX +// TODO(crbug.com/webm/1522): Fix test failures. +#if 0 INTRA_PRED_TEST(VSX, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, vpx_h_predictor_4x4_vsx, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_4x4_vsx) @@ -321,6 +323,7 @@ INTRA_PRED_TEST(VSX, TestIntraPred8, vpx_dc_predictor_8x8_vsx, NULL, NULL, NULL, NULL, vpx_h_predictor_8x8_vsx, vpx_d45_predictor_8x8_vsx, NULL, NULL, NULL, NULL, vpx_d63_predictor_8x8_vsx, vpx_tm_predictor_8x8_vsx) +#endif INTRA_PRED_TEST(VSX, TestIntraPred16, vpx_dc_predictor_16x16_vsx, vpx_dc_left_predictor_16x16_vsx, vpx_dc_top_predictor_16x16_vsx, diff --git a/media/libvpx/libvpx/test/test_libvpx.cc b/media/libvpx/libvpx/test/test_libvpx.cc index 30641ae8c89e..222a83f8c748 100644 --- a/media/libvpx/libvpx/test/test_libvpx.cc +++ b/media/libvpx/libvpx/test/test_libvpx.cc @@ -12,7 +12,7 @@ #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" -#if ARCH_X86 || ARCH_X86_64 +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 #include "vpx_ports/x86.h" #endif extern "C" { @@ -26,7 +26,7 @@ extern void vpx_dsp_rtcd(); extern void vpx_scale_rtcd(); } -#if ARCH_X86 || ARCH_X86_64 +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 static void append_negative_gtest_filter(const char *str) { std::string filter = ::testing::FLAGS_gtest_filter; // Negative patterns begin with one '-' followed by a ':' separated list. @@ -34,12 +34,12 @@ static void append_negative_gtest_filter(const char *str) { filter += str; ::testing::FLAGS_gtest_filter = filter; } -#endif // ARCH_X86 || ARCH_X86_64 +#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64 int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); -#if ARCH_X86 || ARCH_X86_64 +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 const int simd_caps = x86_simd_caps(); if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter(":MMX.*:MMX/*"); if (!(simd_caps & HAS_SSE)) append_negative_gtest_filter(":SSE.*:SSE/*"); @@ -56,12 +56,11 @@ int main(int argc, char **argv) { if (!(simd_caps & HAS_AVX512)) { append_negative_gtest_filter(":AVX512.*:AVX512/*"); } -#endif // ARCH_X86 || ARCH_X86_64 +#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64 #if !CONFIG_SHARED // Shared library builds don't support whitebox tests // that exercise internal symbols. - #if CONFIG_VP8 vp8_rtcd(); #endif // CONFIG_VP8 diff --git a/media/libvpx/libvpx/test/test_vector_test.cc b/media/libvpx/libvpx/test/test_vector_test.cc index 1879b3d27737..5a9737122f6f 100644 --- a/media/libvpx/libvpx/test/test_vector_test.cc +++ b/media/libvpx/libvpx/test/test_vector_test.cc @@ -10,8 +10,11 @@ #include #include +#include #include #include +#include + #include "third_party/googletest/src/include/gtest/gtest.h" #include "../tools_common.h" #include "./vpx_config.h" @@ -29,9 +32,10 @@ namespace { const int kThreads = 0; -const int kFileName = 1; +const int kMtMode = 1; +const int kFileName = 2; -typedef std::tr1::tuple DecodeParam; +typedef std::tuple DecodeParam; class TestVectorTest : public ::libvpx_test::DecoderTest, public ::libvpx_test::CodecTestWithParam { @@ -54,6 +58,25 @@ class TestVectorTest : public ::libvpx_test::DecoderTest, << "Md5 file open failed. Filename: " << md5_file_name_; } +#if CONFIG_VP9_DECODER + virtual void PreDecodeFrameHook( + const libvpx_test::CompressedVideoSource &video, + libvpx_test::Decoder *decoder) { + if (video.frame_number() == 0 && mt_mode_ >= 0) { + if (mt_mode_ == 1) { + decoder->Control(VP9D_SET_LOOP_FILTER_OPT, 1); + decoder->Control(VP9D_SET_ROW_MT, 0); + } else if (mt_mode_ == 2) { + decoder->Control(VP9D_SET_LOOP_FILTER_OPT, 0); + decoder->Control(VP9D_SET_ROW_MT, 1); + } else { + decoder->Control(VP9D_SET_LOOP_FILTER_OPT, 0); + decoder->Control(VP9D_SET_ROW_MT, 0); + } + } + } +#endif + virtual void DecompressedFrameHook(const vpx_image_t &img, const unsigned int frame_number) { ASSERT_TRUE(md5_file_ != NULL); @@ -77,6 +100,7 @@ class TestVectorTest : public ::libvpx_test::DecoderTest, #if CONFIG_VP9_DECODER std::set resize_clips_; #endif + int mt_mode_; private: FILE *md5_file_; @@ -88,19 +112,20 @@ class TestVectorTest : public ::libvpx_test::DecoderTest, // the test failed. TEST_P(TestVectorTest, MD5Match) { const DecodeParam input = GET_PARAM(1); - const std::string filename = std::tr1::get(input); + const std::string filename = std::get(input); vpx_codec_flags_t flags = 0; vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); char str[256]; - cfg.threads = std::tr1::get(input); - - snprintf(str, sizeof(str) / sizeof(str[0]) - 1, "file: %s threads: %d", - filename.c_str(), cfg.threads); + cfg.threads = std::get(input); + mt_mode_ = std::get(input); + snprintf(str, sizeof(str) / sizeof(str[0]) - 1, + "file: %s threads: %d MT mode: %d", filename.c_str(), cfg.threads, + mt_mode_); SCOPED_TRACE(str); // Open compressed video file. - testing::internal::scoped_ptr video; + std::unique_ptr video; if (filename.substr(filename.length() - 3, 3) == "ivf") { video.reset(new libvpx_test::IVFVideoSource(filename)); } else if (filename.substr(filename.length() - 4, 4) == "webm") { @@ -131,7 +156,8 @@ TEST_P(TestVectorTest, MD5Match) { VP8_INSTANTIATE_TEST_CASE( TestVectorTest, ::testing::Combine( - ::testing::Values(1), // Single thread. + ::testing::Values(1), // Single thread. + ::testing::Values(-1), // LPF opt and Row MT is not applicable ::testing::ValuesIn(libvpx_test::kVP8TestVectors, libvpx_test::kVP8TestVectors + libvpx_test::kNumVP8TestVectors))); @@ -144,6 +170,7 @@ INSTANTIATE_TEST_CASE_P( static_cast(&libvpx_test::kVP8)), ::testing::Combine( ::testing::Range(2, 9), // With 2 ~ 8 threads. + ::testing::Values(-1), // LPF opt and Row MT is not applicable ::testing::ValuesIn(libvpx_test::kVP8TestVectors, libvpx_test::kVP8TestVectors + libvpx_test::kNumVP8TestVectors)))); @@ -154,7 +181,8 @@ INSTANTIATE_TEST_CASE_P( VP9_INSTANTIATE_TEST_CASE( TestVectorTest, ::testing::Combine( - ::testing::Values(1), // Single thread. + ::testing::Values(1), // Single thread. + ::testing::Values(-1), // LPF opt and Row MT is not applicable ::testing::ValuesIn(libvpx_test::kVP9TestVectors, libvpx_test::kVP9TestVectors + libvpx_test::kNumVP9TestVectors))); @@ -166,6 +194,10 @@ INSTANTIATE_TEST_CASE_P( static_cast(&libvpx_test::kVP9)), ::testing::Combine( ::testing::Range(2, 9), // With 2 ~ 8 threads. + ::testing::Range(0, 3), // With multi threads modes 0 ~ 2 + // 0: LPF opt and Row MT disabled + // 1: LPF opt enabled + // 2: Row MT enabled ::testing::ValuesIn(libvpx_test::kVP9TestVectors, libvpx_test::kVP9TestVectors + libvpx_test::kNumVP9TestVectors)))); diff --git a/media/libvpx/libvpx/test/test_vectors.h b/media/libvpx/libvpx/test/test_vectors.h index 3df3e8113332..0a4be0f1a283 100644 --- a/media/libvpx/libvpx/test/test_vectors.h +++ b/media/libvpx/libvpx/test/test_vectors.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_TEST_VECTORS_H_ -#define TEST_TEST_VECTORS_H_ +#ifndef VPX_TEST_TEST_VECTORS_H_ +#define VPX_TEST_TEST_VECTORS_H_ #include "./vpx_config.h" @@ -31,4 +31,4 @@ extern const char *const kVP9TestVectorsResize[]; } // namespace libvpx_test -#endif // TEST_TEST_VECTORS_H_ +#endif // VPX_TEST_TEST_VECTORS_H_ diff --git a/media/libvpx/libvpx/test/tile_independence_test.cc b/media/libvpx/libvpx/test/tile_independence_test.cc index e24981c68d36..1d1020a9d3e1 100644 --- a/media/libvpx/libvpx/test/tile_independence_test.cc +++ b/media/libvpx/libvpx/test/tile_independence_test.cc @@ -48,7 +48,7 @@ class TileIndependenceTest : public ::libvpx_test::EncoderTest, virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { - if (video->frame() == 1) { + if (video->frame() == 0) { encoder->Control(VP9E_SET_TILE_COLUMNS, n_tiles_); } } diff --git a/media/libvpx/libvpx/test/timestamp_test.cc b/media/libvpx/libvpx/test/timestamp_test.cc new file mode 100644 index 000000000000..41c1928875fc --- /dev/null +++ b/media/libvpx/libvpx/test/timestamp_test.cc @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/util.h" +#include "test/video_source.h" +#include "third_party/googletest/src/include/gtest/gtest.h" + +namespace { + +const int kVideoSourceWidth = 320; +const int kVideoSourceHeight = 240; +const int kFramesToEncode = 3; + +// A video source that exposes functions to set the timebase, framerate and +// starting pts. +class DummyTimebaseVideoSource : public ::libvpx_test::DummyVideoSource { + public: + // Parameters num and den set the timebase for the video source. + DummyTimebaseVideoSource(int num, int den) + : timebase_({ num, den }), framerate_numerator_(30), + framerate_denominator_(1), starting_pts_(0) { + SetSize(kVideoSourceWidth, kVideoSourceHeight); + set_limit(kFramesToEncode); + } + + void SetFramerate(int numerator, int denominator) { + framerate_numerator_ = numerator; + framerate_denominator_ = denominator; + } + + // Returns one frames duration in timebase units as a double. + double FrameDuration() const { + return (static_cast(timebase_.den) / timebase_.num) / + (static_cast(framerate_numerator_) / framerate_denominator_); + } + + virtual vpx_codec_pts_t pts() const { + return static_cast(frame_ * FrameDuration() + + starting_pts_ + 0.5); + } + + virtual unsigned long duration() const { + return static_cast(FrameDuration() + 0.5); + } + + virtual vpx_rational_t timebase() const { return timebase_; } + + void set_starting_pts(int64_t starting_pts) { starting_pts_ = starting_pts; } + + private: + vpx_rational_t timebase_; + int framerate_numerator_; + int framerate_denominator_; + int64_t starting_pts_; +}; + +class TimestampTest + : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWithParam { + protected: + TimestampTest() : EncoderTest(GET_PARAM(0)) {} + virtual ~TimestampTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + } +}; + +// Tests encoding in millisecond timebase. +TEST_P(TimestampTest, EncodeFrames) { + DummyTimebaseVideoSource video(1, 1000); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +TEST_P(TimestampTest, TestMicrosecondTimebase) { + // Set the timebase to microseconds. + DummyTimebaseVideoSource video(1, 1000000); + video.set_limit(1); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +TEST_P(TimestampTest, TestVpxRollover) { + DummyTimebaseVideoSource video(1, 1000); + video.set_starting_pts(922337170351ll); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +VP8_INSTANTIATE_TEST_CASE(TimestampTest, + ::testing::Values(::libvpx_test::kTwoPassGood)); +VP9_INSTANTIATE_TEST_CASE(TimestampTest, + ::testing::Values(::libvpx_test::kTwoPassGood)); +} // namespace diff --git a/media/libvpx/libvpx/test/tools_common.sh b/media/libvpx/libvpx/test/tools_common.sh index 0bdcc08d7875..844a12534deb 100755 --- a/media/libvpx/libvpx/test/tools_common.sh +++ b/media/libvpx/libvpx/test/tools_common.sh @@ -150,7 +150,7 @@ is_windows_target() { # empty string. Caller is responsible for testing the string once the function # returns. vpx_tool_path() { - local readonly tool_name="$1" + local tool_name="$1" local tool_path="${LIBVPX_BIN_PATH}/${tool_name}${VPX_TEST_EXE_SUFFIX}" if [ ! -x "${tool_path}" ]; then # Try one directory up: when running via examples.sh the tool could be in @@ -404,12 +404,16 @@ VP9_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-00-quantizer-00.webm" VP9_FPM_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-07-frame_parallel-1.webm" VP9_LT_50_FRAMES_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-02-size-32x08.webm" +VP9_RAW_FILE="${LIBVPX_TEST_DATA_PATH}/crbug-1539.rawfile" + YUV_RAW_INPUT="${LIBVPX_TEST_DATA_PATH}/hantro_collage_w352h288.yuv" YUV_RAW_INPUT_WIDTH=352 YUV_RAW_INPUT_HEIGHT=288 Y4M_NOSQ_PAR_INPUT="${LIBVPX_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m" Y4M_720P_INPUT="${LIBVPX_TEST_DATA_PATH}/niklas_1280_720_30.y4m" +Y4M_720P_INPUT_WIDTH=1280 +Y4M_720P_INPUT_HEIGHT=720 # Setup a trap function to clean up after tests complete. trap cleanup EXIT diff --git a/media/libvpx/libvpx/test/user_priv_test.cc b/media/libvpx/libvpx/test/user_priv_test.cc index 4b5de094e952..7bea76b0a94e 100644 --- a/media/libvpx/libvpx/test/user_priv_test.cc +++ b/media/libvpx/libvpx/test/user_priv_test.cc @@ -27,8 +27,8 @@ namespace { -using std::string; using libvpx_test::ACMRandom; +using std::string; #if CONFIG_WEBM_IO @@ -73,7 +73,7 @@ string DecodeFile(const string &filename) { CheckUserPrivateData(img->user_priv, &frame_num); // Also test ctrl_get_reference api. - struct vp9_ref_frame ref; + struct vp9_ref_frame ref = vp9_ref_frame(); // Randomly fetch a reference frame. ref.idx = rnd.Rand8() % 3; decoder.Control(VP9_GET_REFERENCE, &ref); diff --git a/media/libvpx/libvpx/test/util.h b/media/libvpx/libvpx/test/util.h index 1f2540ecf22c..985f487094f7 100644 --- a/media/libvpx/libvpx/test/util.h +++ b/media/libvpx/libvpx/test/util.h @@ -8,16 +8,18 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_UTIL_H_ -#define TEST_UTIL_H_ +#ifndef VPX_TEST_UTIL_H_ +#define VPX_TEST_UTIL_H_ #include #include +#include + #include "third_party/googletest/src/include/gtest/gtest.h" #include "vpx/vpx_image.h" // Macros -#define GET_PARAM(k) std::tr1::get(GetParam()) +#define GET_PARAM(k) std::get(GetParam()) inline double compute_psnr(const vpx_image_t *img1, const vpx_image_t *img2) { assert((img1->fmt == img2->fmt) && (img1->d_w == img2->d_w) && @@ -43,4 +45,4 @@ inline double compute_psnr(const vpx_image_t *img1, const vpx_image_t *img2) { return psnr; } -#endif // TEST_UTIL_H_ +#endif // VPX_TEST_UTIL_H_ diff --git a/media/libvpx/libvpx/test/variance_test.cc b/media/libvpx/libvpx/test/variance_test.cc index 421024ad8891..e9fa03c680a2 100644 --- a/media/libvpx/libvpx/test/variance_test.cc +++ b/media/libvpx/libvpx/test/variance_test.cc @@ -20,24 +20,13 @@ #include "test/register_state_check.h" #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" +#include "vpx_dsp/variance.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/vpx_timer.h" namespace { -typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sse); -typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride, - int xoffset, int yoffset, - const uint8_t *b, int b_stride, - unsigned int *sse); -typedef unsigned int (*SubpixAvgVarMxNFunc)(const uint8_t *a, int a_stride, - int xoffset, int yoffset, - const uint8_t *b, int b_stride, - uint32_t *sse, - const uint8_t *second_pred); typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride); typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src); @@ -572,15 +561,16 @@ class SubpelVarianceTest if (!use_high_bit_depth()) { src_ = reinterpret_cast(vpx_memalign(16, block_size())); sec_ = reinterpret_cast(vpx_memalign(16, block_size())); - ref_ = new uint8_t[block_size() + width() + height() + 1]; + ref_ = reinterpret_cast( + vpx_malloc(block_size() + width() + height() + 1)); #if CONFIG_VP9_HIGHBITDEPTH } else { src_ = CONVERT_TO_BYTEPTR(reinterpret_cast( vpx_memalign(16, block_size() * sizeof(uint16_t)))); sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast( vpx_memalign(16, block_size() * sizeof(uint16_t)))); - ref_ = CONVERT_TO_BYTEPTR( - new uint16_t[block_size() + width() + height() + 1]); + ref_ = CONVERT_TO_BYTEPTR(reinterpret_cast(vpx_malloc( + (block_size() + width() + height() + 1) * sizeof(uint16_t)))); #endif // CONFIG_VP9_HIGHBITDEPTH } ASSERT_TRUE(src_ != NULL); @@ -591,12 +581,12 @@ class SubpelVarianceTest virtual void TearDown() { if (!use_high_bit_depth()) { vpx_free(src_); - delete[] ref_; vpx_free(sec_); + vpx_free(ref_); #if CONFIG_VP9_HIGHBITDEPTH } else { vpx_free(CONVERT_TO_SHORTPTR(src_)); - delete[] CONVERT_TO_SHORTPTR(ref_); + vpx_free(CONVERT_TO_SHORTPTR(ref_)); vpx_free(CONVERT_TO_SHORTPTR(sec_)); #endif // CONFIG_VP9_HIGHBITDEPTH } @@ -692,7 +682,7 @@ void SubpelVarianceTest::ExtremeRefTest() { } template <> -void SubpelVarianceTest::RefTest() { +void SubpelVarianceTest::RefTest() { for (int x = 0; x < 8; ++x) { for (int y = 0; y < 8; ++y) { if (!use_high_bit_depth()) { @@ -728,10 +718,10 @@ void SubpelVarianceTest::RefTest() { } typedef MainTestClass VpxSseTest; -typedef MainTestClass VpxMseTest; -typedef MainTestClass VpxVarianceTest; -typedef SubpelVarianceTest VpxSubpelVarianceTest; -typedef SubpelVarianceTest VpxSubpelAvgVarianceTest; +typedef MainTestClass VpxMseTest; +typedef MainTestClass VpxVarianceTest; +typedef SubpelVarianceTest VpxSubpelVarianceTest; +typedef SubpelVarianceTest VpxSubpelAvgVarianceTest; TEST_P(VpxSseTest, RefSse) { RefTestSse(); } TEST_P(VpxSseTest, MaxSse) { MaxTestSse(); } @@ -756,14 +746,14 @@ INSTANTIATE_TEST_CASE_P(C, VpxSseTest, ::testing::Values(SseParams(2, 2, &vpx_get4x4sse_cs_c))); -typedef TestParams MseParams; +typedef TestParams MseParams; INSTANTIATE_TEST_CASE_P(C, VpxMseTest, ::testing::Values(MseParams(4, 4, &vpx_mse16x16_c), MseParams(4, 3, &vpx_mse16x8_c), MseParams(3, 4, &vpx_mse8x16_c), MseParams(3, 3, &vpx_mse8x8_c))); -typedef TestParams VarianceParams; +typedef TestParams VarianceParams; INSTANTIATE_TEST_CASE_P( C, VpxVarianceTest, ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_c), @@ -780,7 +770,7 @@ INSTANTIATE_TEST_CASE_P( VarianceParams(2, 3, &vpx_variance4x8_c), VarianceParams(2, 2, &vpx_variance4x4_c))); -typedef TestParams SubpelVarianceParams; +typedef TestParams SubpelVarianceParams; INSTANTIATE_TEST_CASE_P( C, VpxSubpelVarianceTest, ::testing::Values( @@ -798,7 +788,7 @@ INSTANTIATE_TEST_CASE_P( SubpelVarianceParams(2, 3, &vpx_sub_pixel_variance4x8_c, 0), SubpelVarianceParams(2, 2, &vpx_sub_pixel_variance4x4_c, 0))); -typedef TestParams SubpelAvgVarianceParams; +typedef TestParams SubpelAvgVarianceParams; INSTANTIATE_TEST_CASE_P( C, VpxSubpelAvgVarianceTest, ::testing::Values( @@ -817,10 +807,11 @@ INSTANTIATE_TEST_CASE_P( SubpelAvgVarianceParams(2, 2, &vpx_sub_pixel_avg_variance4x4_c, 0))); #if CONFIG_VP9_HIGHBITDEPTH -typedef MainTestClass VpxHBDMseTest; -typedef MainTestClass VpxHBDVarianceTest; -typedef SubpelVarianceTest VpxHBDSubpelVarianceTest; -typedef SubpelVarianceTest VpxHBDSubpelAvgVarianceTest; +typedef MainTestClass VpxHBDMseTest; +typedef MainTestClass VpxHBDVarianceTest; +typedef SubpelVarianceTest VpxHBDSubpelVarianceTest; +typedef SubpelVarianceTest + VpxHBDSubpelAvgVarianceTest; TEST_P(VpxHBDMseTest, RefMse) { RefTestMse(); } TEST_P(VpxHBDMseTest, MaxMse) { MaxTestMse(); } @@ -1384,15 +1375,19 @@ INSTANTIATE_TEST_CASE_P( #if HAVE_AVX2 INSTANTIATE_TEST_CASE_P(AVX2, VpxMseTest, - ::testing::Values(MseParams(4, 4, &vpx_mse16x16_avx2))); + ::testing::Values(MseParams(4, 4, &vpx_mse16x16_avx2), + MseParams(4, 3, &vpx_mse16x8_avx2))); INSTANTIATE_TEST_CASE_P( AVX2, VpxVarianceTest, ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_avx2), VarianceParams(6, 5, &vpx_variance64x32_avx2), + VarianceParams(5, 6, &vpx_variance32x64_avx2), VarianceParams(5, 5, &vpx_variance32x32_avx2), VarianceParams(5, 4, &vpx_variance32x16_avx2), - VarianceParams(4, 4, &vpx_variance16x16_avx2))); + VarianceParams(4, 5, &vpx_variance16x32_avx2), + VarianceParams(4, 4, &vpx_variance16x16_avx2), + VarianceParams(4, 3, &vpx_variance16x8_avx2))); INSTANTIATE_TEST_CASE_P( AVX2, VpxSubpelVarianceTest, @@ -1539,6 +1534,27 @@ INSTANTIATE_TEST_CASE_P(VSX, SumOfSquaresTest, INSTANTIATE_TEST_CASE_P(VSX, VpxSseTest, ::testing::Values(SseParams(2, 2, &vpx_get4x4sse_cs_vsx))); +INSTANTIATE_TEST_CASE_P(VSX, VpxMseTest, + ::testing::Values(MseParams(4, 4, &vpx_mse16x16_vsx), + MseParams(4, 3, &vpx_mse16x8_vsx), + MseParams(3, 4, &vpx_mse8x16_vsx), + MseParams(3, 3, &vpx_mse8x8_vsx))); + +INSTANTIATE_TEST_CASE_P( + VSX, VpxVarianceTest, + ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_vsx), + VarianceParams(6, 5, &vpx_variance64x32_vsx), + VarianceParams(5, 6, &vpx_variance32x64_vsx), + VarianceParams(5, 5, &vpx_variance32x32_vsx), + VarianceParams(5, 4, &vpx_variance32x16_vsx), + VarianceParams(4, 5, &vpx_variance16x32_vsx), + VarianceParams(4, 4, &vpx_variance16x16_vsx), + VarianceParams(4, 3, &vpx_variance16x8_vsx), + VarianceParams(3, 4, &vpx_variance8x16_vsx), + VarianceParams(3, 3, &vpx_variance8x8_vsx), + VarianceParams(3, 2, &vpx_variance8x4_vsx), + VarianceParams(2, 3, &vpx_variance4x8_vsx), + VarianceParams(2, 2, &vpx_variance4x4_vsx))); #endif // HAVE_VSX #if HAVE_MMI diff --git a/media/libvpx/libvpx/test/video_source.h b/media/libvpx/libvpx/test/video_source.h index 54f692865b6b..e9340f21e9ea 100644 --- a/media/libvpx/libvpx/test/video_source.h +++ b/media/libvpx/libvpx/test/video_source.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_VIDEO_SOURCE_H_ -#define TEST_VIDEO_SOURCE_H_ +#ifndef VPX_TEST_VIDEO_SOURCE_H_ +#define VPX_TEST_VIDEO_SOURCE_H_ #if defined(_WIN32) #undef NOMINMAX @@ -255,4 +255,4 @@ class CompressedVideoSource { } // namespace libvpx_test -#endif // TEST_VIDEO_SOURCE_H_ +#endif // VPX_TEST_VIDEO_SOURCE_H_ diff --git a/media/libvpx/libvpx/test/vp8_boolcoder_test.cc b/media/libvpx/libvpx/test/vp8_boolcoder_test.cc index 9d81f9382ad2..c78b0b3b6cc2 100644 --- a/media/libvpx/libvpx/test/vp8_boolcoder_test.cc +++ b/media/libvpx/libvpx/test/vp8_boolcoder_test.cc @@ -93,6 +93,9 @@ TEST(VP8, TestBitIO) { } vp8_stop_encode(&bw); + // vp8dx_bool_decoder_fill() may read into uninitialized data that + // isn't used meaningfully, but may trigger an MSan warning. + memset(bw_buffer + bw.pos, 0, sizeof(VP8_BD_VALUE) - 1); BOOL_DECODER br; encrypt_buffer(bw_buffer, kBufferSize); diff --git a/media/libvpx/libvpx/test/vp8_datarate_test.cc b/media/libvpx/libvpx/test/vp8_datarate_test.cc new file mode 100644 index 000000000000..95a1157f6c2b --- /dev/null +++ b/media/libvpx/libvpx/test/vp8_datarate_test.cc @@ -0,0 +1,416 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "./vpx_config.h" +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "vpx/vpx_codec.h" + +namespace { + +class DatarateTestLarge + : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWith2Params { + public: + DatarateTestLarge() : EncoderTest(GET_PARAM(0)) {} + + virtual ~DatarateTestLarge() {} + + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + set_cpu_used_ = GET_PARAM(2); + ResetModel(); + } + + virtual void ResetModel() { + last_pts_ = 0; + bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz; + frame_number_ = 0; + first_drop_ = 0; + bits_total_ = 0; + duration_ = 0.0; + denoiser_offon_test_ = 0; + denoiser_offon_period_ = -1; + gf_boost_ = 0; + use_roi_ = false; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_); + encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); + encoder->Control(VP8E_SET_GF_CBR_BOOST_PCT, gf_boost_); + } + + if (use_roi_) { + encoder->Control(VP8E_SET_ROI_MAP, &roi_); + } + + if (denoiser_offon_test_) { + ASSERT_GT(denoiser_offon_period_, 0) + << "denoiser_offon_period_ is not positive."; + if ((video->frame() + 1) % denoiser_offon_period_ == 0) { + // Flip denoiser_on_ periodically + denoiser_on_ ^= 1; + } + encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_); + } + + const vpx_rational_t tb = video->timebase(); + timebase_ = static_cast(tb.num) / tb.den; + duration_ = 0; + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + // Time since last timestamp = duration. + vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_; + + // TODO(jimbankoski): Remove these lines when the issue: + // http://code.google.com/p/webm/issues/detail?id=496 is fixed. + // For now the codec assumes buffer starts at starting buffer rate + // plus one frame's time. + if (last_pts_ == 0) duration = 1; + + // Add to the buffer the bits we'd expect from a constant bitrate server. + bits_in_buffer_model_ += static_cast( + duration * timebase_ * cfg_.rc_target_bitrate * 1000); + + /* Test the buffer model here before subtracting the frame. Do so because + * the way the leaky bucket model works in libvpx is to allow the buffer to + * empty - and then stop showing frames until we've got enough bits to + * show one. As noted in comment below (issue 495), this does not currently + * apply to key frames. For now exclude key frames in condition below. */ + const bool key_frame = + (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; + if (!key_frame) { + ASSERT_GE(bits_in_buffer_model_, 0) + << "Buffer Underrun at frame " << pkt->data.frame.pts; + } + + const int64_t frame_size_in_bits = pkt->data.frame.sz * 8; + + // Subtract from the buffer the bits associated with a played back frame. + bits_in_buffer_model_ -= frame_size_in_bits; + + // Update the running total of bits for end of test datarate checks. + bits_total_ += frame_size_in_bits; + + // If first drop not set and we have a drop set it to this time. + if (!first_drop_ && duration > 1) first_drop_ = last_pts_ + 1; + + // Update the most recent pts. + last_pts_ = pkt->data.frame.pts; + + // We update this so that we can calculate the datarate minus the last + // frame encoded in the file. + bits_in_last_frame_ = frame_size_in_bits; + + ++frame_number_; + } + + virtual void EndPassHook(void) { + if (bits_total_) { + const double file_size_in_kb = bits_total_ / 1000.; // bits per kilobit + + duration_ = (last_pts_ + 1) * timebase_; + + // Effective file datarate includes the time spent prebuffering. + effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0 / + (cfg_.rc_buf_initial_sz / 1000.0 + duration_); + + file_datarate_ = file_size_in_kb / duration_; + } + } + + virtual void DenoiserLevelsTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 140); + for (int j = 1; j < 5; ++j) { + // Run over the denoiser levels. + // For the temporal denoiser (#if CONFIG_TEMPORAL_DENOISING) the level j + // refers to the 4 denoiser modes: denoiserYonly, denoiserOnYUV, + // denoiserOnAggressive, and denoiserOnAdaptive. + denoiser_on_ = j; + cfg_.rc_target_bitrate = 300; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) + << " The datarate for the file exceeds the target!"; + + ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) + << " The datarate for the file missed the target!"; + } + } + + virtual void DenoiserOffOnTest() { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 299); + cfg_.rc_target_bitrate = 300; + ResetModel(); + // The denoiser is off by default. + denoiser_on_ = 0; + // Set the offon test flag. + denoiser_offon_test_ = 1; + denoiser_offon_period_ = 100; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) + << " The datarate for the file exceeds the target!"; + ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) + << " The datarate for the file missed the target!"; + } + + virtual void BasicBufferModelTest() { + denoiser_on_ = 0; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + // 2 pass cbr datarate control has a bug hidden by the small # of + // frames selected in this encode. The problem is that even if the buffer is + // negative we produce a keyframe on a cutscene. Ignoring datarate + // constraints + // TODO(jimbankoski): ( Fix when issue + // http://code.google.com/p/webm/issues/detail?id=495 is addressed. ) + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 140); + + // There is an issue for low bitrates in real-time mode, where the + // effective_datarate slightly overshoots the target bitrate. + // This is same the issue as noted about (#495). + // TODO(jimbankoski/marpan): Update test to run for lower bitrates (< 100), + // when the issue is resolved. + for (int i = 100; i < 800; i += 200) { + cfg_.rc_target_bitrate = i; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) + << " The datarate for the file exceeds the target!"; + ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) + << " The datarate for the file missed the target!"; + } + } + + virtual void ChangingDropFrameThreshTest() { + denoiser_on_ = 0; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_max_quantizer = 36; + cfg_.rc_end_usage = VPX_CBR; + cfg_.rc_target_bitrate = 200; + cfg_.kf_mode = VPX_KF_DISABLED; + + const int frame_count = 40; + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, frame_count); + + // Here we check that the first dropped frame gets earlier and earlier + // as the drop frame threshold is increased. + + const int kDropFrameThreshTestStep = 30; + vpx_codec_pts_t last_drop = frame_count; + for (int i = 1; i < 91; i += kDropFrameThreshTestStep) { + cfg_.rc_dropframe_thresh = i; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_LE(first_drop_, last_drop) + << " The first dropped frame for drop_thresh " << i + << " > first dropped frame for drop_thresh " + << i - kDropFrameThreshTestStep; + last_drop = first_drop_; + } + } + + virtual void DropFramesMultiThreadsTest() { + denoiser_on_ = 0; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_dropframe_thresh = 30; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_threads = 2; + + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, + 288, 30, 1, 0, 140); + cfg_.rc_target_bitrate = 200; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) + << " The datarate for the file exceeds the target!"; + + ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) + << " The datarate for the file missed the target!"; + } + + vpx_codec_pts_t last_pts_; + int64_t bits_in_buffer_model_; + double timebase_; + int frame_number_; + vpx_codec_pts_t first_drop_; + int64_t bits_total_; + double duration_; + double file_datarate_; + double effective_datarate_; + int64_t bits_in_last_frame_; + int denoiser_on_; + int denoiser_offon_test_; + int denoiser_offon_period_; + int set_cpu_used_; + int gf_boost_; + bool use_roi_; + vpx_roi_map_t roi_; +}; + +#if CONFIG_TEMPORAL_DENOISING +// Check basic datarate targeting, for a single bitrate, but loop over the +// various denoiser settings. +TEST_P(DatarateTestLarge, DenoiserLevels) { DenoiserLevelsTest(); } + +// Check basic datarate targeting, for a single bitrate, when denoiser is off +// and on. +TEST_P(DatarateTestLarge, DenoiserOffOn) { DenoiserOffOnTest(); } +#endif // CONFIG_TEMPORAL_DENOISING + +TEST_P(DatarateTestLarge, BasicBufferModel) { BasicBufferModelTest(); } + +TEST_P(DatarateTestLarge, ChangingDropFrameThresh) { + ChangingDropFrameThreshTest(); +} + +TEST_P(DatarateTestLarge, DropFramesMultiThreads) { + DropFramesMultiThreadsTest(); +} + +class DatarateTestRealTime : public DatarateTestLarge { + public: + virtual ~DatarateTestRealTime() {} +}; + +#if CONFIG_TEMPORAL_DENOISING +// Check basic datarate targeting, for a single bitrate, but loop over the +// various denoiser settings. +TEST_P(DatarateTestRealTime, DenoiserLevels) { DenoiserLevelsTest(); } + +// Check basic datarate targeting, for a single bitrate, when denoiser is off +// and on. +TEST_P(DatarateTestRealTime, DenoiserOffOn) {} +#endif // CONFIG_TEMPORAL_DENOISING + +TEST_P(DatarateTestRealTime, BasicBufferModel) { BasicBufferModelTest(); } + +TEST_P(DatarateTestRealTime, ChangingDropFrameThresh) { + ChangingDropFrameThreshTest(); +} + +TEST_P(DatarateTestRealTime, DropFramesMultiThreads) { + DropFramesMultiThreadsTest(); +} + +TEST_P(DatarateTestRealTime, RegionOfInterest) { + denoiser_on_ = 0; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + // Encode using multiple threads. + cfg_.g_threads = 2; + + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 300); + cfg_.rc_target_bitrate = 450; + cfg_.g_w = 352; + cfg_.g_h = 288; + + ResetModel(); + + // Set ROI parameters + use_roi_ = true; + memset(&roi_, 0, sizeof(roi_)); + + roi_.rows = (cfg_.g_h + 15) / 16; + roi_.cols = (cfg_.g_w + 15) / 16; + + roi_.delta_q[0] = 0; + roi_.delta_q[1] = -20; + roi_.delta_q[2] = 0; + roi_.delta_q[3] = 0; + + roi_.delta_lf[0] = 0; + roi_.delta_lf[1] = -20; + roi_.delta_lf[2] = 0; + roi_.delta_lf[3] = 0; + + roi_.static_threshold[0] = 0; + roi_.static_threshold[1] = 1000; + roi_.static_threshold[2] = 0; + roi_.static_threshold[3] = 0; + + // Use 2 states: 1 is center square, 0 is the rest. + roi_.roi_map = + (uint8_t *)calloc(roi_.rows * roi_.cols, sizeof(*roi_.roi_map)); + for (unsigned int i = 0; i < roi_.rows; ++i) { + for (unsigned int j = 0; j < roi_.cols; ++j) { + if (i > (roi_.rows >> 2) && i < ((roi_.rows * 3) >> 2) && + j > (roi_.cols >> 2) && j < ((roi_.cols * 3) >> 2)) { + roi_.roi_map[i * roi_.cols + j] = 1; + } + } + } + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) + << " The datarate for the file exceeds the target!"; + + ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) + << " The datarate for the file missed the target!"; + + free(roi_.roi_map); +} + +TEST_P(DatarateTestRealTime, GFBoost) { + denoiser_on_ = 0; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_error_resilient = 0; + + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 300); + cfg_.rc_target_bitrate = 300; + ResetModel(); + // Apply a gf boost. + gf_boost_ = 50; + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) + << " The datarate for the file exceeds the target!"; + + ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) + << " The datarate for the file missed the target!"; +} + +VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES, + ::testing::Values(0)); +VP8_INSTANTIATE_TEST_CASE(DatarateTestRealTime, + ::testing::Values(::libvpx_test::kRealTime), + ::testing::Values(-6, -12)); +} // namespace diff --git a/media/libvpx/libvpx/test/vp8_multi_resolution_encoder.sh b/media/libvpx/libvpx/test/vp8_multi_resolution_encoder.sh index a8b7fe78eea4..bd45b5381f6d 100755 --- a/media/libvpx/libvpx/test/vp8_multi_resolution_encoder.sh +++ b/media/libvpx/libvpx/test/vp8_multi_resolution_encoder.sh @@ -22,7 +22,7 @@ vp8_multi_resolution_encoder_verify_environment() { elog "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi - local readonly app="vp8_multi_resolution_encoder" + local app="vp8_multi_resolution_encoder" if [ -z "$(vpx_tool_path "${app}")" ]; then elog "${app} not found. It must exist in LIBVPX_BIN_PATH or its parent." return 1 @@ -33,7 +33,7 @@ vp8_multi_resolution_encoder_verify_environment() { # Runs vp8_multi_resolution_encoder. Simply forwards all arguments to # vp8_multi_resolution_encoder after building path to the executable. vp8_mre() { - local readonly encoder="$(vpx_tool_path vp8_multi_resolution_encoder)" + local encoder="$(vpx_tool_path vp8_multi_resolution_encoder)" if [ ! -x "${encoder}" ]; then elog "${encoder} does not exist or is not executable." return 1 @@ -43,22 +43,34 @@ vp8_mre() { } vp8_multi_resolution_encoder_three_formats() { - local readonly output_files="${VPX_TEST_OUTPUT_DIR}/vp8_mre_0.ivf - ${VPX_TEST_OUTPUT_DIR}/vp8_mre_1.ivf - ${VPX_TEST_OUTPUT_DIR}/vp8_mre_2.ivf" + local output_files="${VPX_TEST_OUTPUT_DIR}/vp8_mre_0.ivf + ${VPX_TEST_OUTPUT_DIR}/vp8_mre_1.ivf + ${VPX_TEST_OUTPUT_DIR}/vp8_mre_2.ivf" + local layer_bitrates="150 80 50" + local keyframe_insert="200" + local temporal_layers="3 3 3" + local framerate="30" if [ "$(vpx_config_option_enabled CONFIG_MULTI_RES_ENCODING)" = "yes" ]; then if [ "$(vp8_encode_available)" = "yes" ]; then # Param order: # Input width # Input height + # Framerate # Input file path # Output file names + # Layer bitrates + # Temporal layers + # Keyframe insert # Output PSNR vp8_mre "${YUV_RAW_INPUT_WIDTH}" \ "${YUV_RAW_INPUT_HEIGHT}" \ + "${framerate}" \ "${YUV_RAW_INPUT}" \ ${output_files} \ + ${layer_bitrates} \ + ${temporal_layers} \ + "${keyframe_insert}" \ 0 for output_file in ${output_files}; do diff --git a/media/libvpx/libvpx/test/vp9_arf_freq_test.cc b/media/libvpx/libvpx/test/vp9_arf_freq_test.cc index 48a4ca73926a..9a3455b4aa15 100644 --- a/media/libvpx/libvpx/test/vp9_arf_freq_test.cc +++ b/media/libvpx/libvpx/test/vp9_arf_freq_test.cc @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include + #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" @@ -190,7 +192,7 @@ TEST_P(ArfFreqTest, MinArfFreqTest) { init_flags_ = VPX_CODEC_USE_PSNR; if (cfg_.g_bit_depth > 8) init_flags_ |= VPX_CODEC_USE_HIGHBITDEPTH; - testing::internal::scoped_ptr video; + std::unique_ptr video; if (is_extension_y4m(test_video_param_.filename)) { video.reset(new libvpx_test::Y4mVideoSource(test_video_param_.filename, 0, kFrames)); diff --git a/media/libvpx/libvpx/test/vp9_block_error_test.cc b/media/libvpx/libvpx/test/vp9_block_error_test.cc index 0b4d1df9922f..71a0686d7a55 100644 --- a/media/libvpx/libvpx/test/vp9_block_error_test.cc +++ b/media/libvpx/libvpx/test/vp9_block_error_test.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -35,7 +36,7 @@ typedef int64_t (*HBDBlockErrorFunc)(const tran_low_t *coeff, intptr_t block_size, int64_t *ssz, int bps); -typedef std::tr1::tuple +typedef std::tuple BlockErrorParam; typedef int64_t (*BlockErrorFunc)(const tran_low_t *coeff, @@ -168,7 +169,7 @@ TEST_P(BlockErrorTest, ExtremeValues) { << "First failed at test case " << first_failure; } -using std::tr1::make_tuple; +using std::make_tuple; #if HAVE_SSE2 const BlockErrorParam sse2_block_error_tests[] = { diff --git a/media/libvpx/libvpx/test/vp9_boolcoder_test.cc b/media/libvpx/libvpx/test/vp9_boolcoder_test.cc index 5dbfd5ca5912..0cafa6730e25 100644 --- a/media/libvpx/libvpx/test/vp9_boolcoder_test.cc +++ b/media/libvpx/libvpx/test/vp9_boolcoder_test.cc @@ -66,6 +66,9 @@ TEST(VP9, TestBitIO) { } vpx_stop_encode(&bw); + // vpx_reader_fill() may read into uninitialized data that + // isn't used meaningfully, but may trigger an MSan warning. + memset(bw_buffer + bw.pos, 0, sizeof(BD_VALUE) - 1); // First bit should be zero GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0); diff --git a/media/libvpx/libvpx/test/vp9_datarate_test.cc b/media/libvpx/libvpx/test/vp9_datarate_test.cc new file mode 100644 index 000000000000..b8be275eaf7a --- /dev/null +++ b/media/libvpx/libvpx/test/vp9_datarate_test.cc @@ -0,0 +1,901 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "./vpx_config.h" +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" +#include "test/y4m_video_source.h" +#include "vpx/vpx_codec.h" +#include "vpx_ports/bitops.h" + +namespace { + +class DatarateTestVP9 : public ::libvpx_test::EncoderTest { + public: + explicit DatarateTestVP9(const ::libvpx_test::CodecFactory *codec) + : EncoderTest(codec) { + tune_content_ = 0; + } + + protected: + virtual ~DatarateTestVP9() {} + + virtual void ResetModel() { + last_pts_ = 0; + bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz; + frame_number_ = 0; + tot_frame_number_ = 0; + first_drop_ = 0; + num_drops_ = 0; + aq_mode_ = 3; + // Denoiser is off by default. + denoiser_on_ = 0; + // For testing up to 3 layers. + for (int i = 0; i < 3; ++i) { + bits_total_[i] = 0; + } + denoiser_offon_test_ = 0; + denoiser_offon_period_ = -1; + frame_parallel_decoding_mode_ = 1; + use_roi_ = false; + } + + // + // Frame flags and layer id for temporal layers. + // + + // For two layers, test pattern is: + // 1 3 + // 0 2 ..... + // For three layers, test pattern is: + // 1 3 5 7 + // 2 6 + // 0 4 .... + // LAST is always update on base/layer 0, GOLDEN is updated on layer 1. + // For this 3 layer example, the 2nd enhancement layer (layer 2) updates + // the altref frame. + static int GetFrameFlags(int frame_num, int num_temp_layers) { + int frame_flags = 0; + if (num_temp_layers == 2) { + if (frame_num % 2 == 0) { + // Layer 0: predict from L and ARF, update L. + frame_flags = + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + } else { + // Layer 1: predict from L, G and ARF, and update G. + frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ENTROPY; + } + } else if (num_temp_layers == 3) { + if (frame_num % 4 == 0) { + // Layer 0: predict from L and ARF; update L. + frame_flags = + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; + } else if ((frame_num - 2) % 4 == 0) { + // Layer 1: predict from L, G, ARF; update G. + frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + } else if ((frame_num - 1) % 2 == 0) { + // Layer 2: predict from L, G, ARF; update ARF. + frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST; + } + } + return frame_flags; + } + + static int SetLayerId(int frame_num, int num_temp_layers) { + int layer_id = 0; + if (num_temp_layers == 2) { + if (frame_num % 2 == 0) { + layer_id = 0; + } else { + layer_id = 1; + } + } else if (num_temp_layers == 3) { + if (frame_num % 4 == 0) { + layer_id = 0; + } else if ((frame_num - 2) % 4 == 0) { + layer_id = 1; + } else if ((frame_num - 1) % 2 == 0) { + layer_id = 2; + } + } + return layer_id; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); + encoder->Control(VP9E_SET_AQ_MODE, aq_mode_); + encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_); + } + + if (denoiser_offon_test_) { + ASSERT_GT(denoiser_offon_period_, 0) + << "denoiser_offon_period_ is not positive."; + if ((video->frame() + 1) % denoiser_offon_period_ == 0) { + // Flip denoiser_on_ periodically + denoiser_on_ ^= 1; + } + } + + encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_); + encoder->Control(VP9E_SET_TILE_COLUMNS, get_msb(cfg_.g_threads)); + encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, + frame_parallel_decoding_mode_); + + if (use_roi_) { + encoder->Control(VP9E_SET_ROI_MAP, &roi_); + encoder->Control(VP9E_SET_AQ_MODE, 0); + } + + if (cfg_.ts_number_layers > 1) { + if (video->frame() == 0) { + encoder->Control(VP9E_SET_SVC, 1); + } + vpx_svc_layer_id_t layer_id; + layer_id.spatial_layer_id = 0; + frame_flags_ = GetFrameFlags(video->frame(), cfg_.ts_number_layers); + layer_id.temporal_layer_id = + SetLayerId(video->frame(), cfg_.ts_number_layers); + layer_id.temporal_layer_id_per_spatial[0] = + SetLayerId(video->frame(), cfg_.ts_number_layers); + encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id); + } + const vpx_rational_t tb = video->timebase(); + timebase_ = static_cast(tb.num) / tb.den; + duration_ = 0; + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + // Time since last timestamp = duration. + vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_; + + if (duration > 1) { + // If first drop not set and we have a drop set it to this time. + if (!first_drop_) first_drop_ = last_pts_ + 1; + // Update the number of frame drops. + num_drops_ += static_cast(duration - 1); + // Update counter for total number of frames (#frames input to encoder). + // Needed for setting the proper layer_id below. + tot_frame_number_ += static_cast(duration - 1); + } + + int layer = SetLayerId(tot_frame_number_, cfg_.ts_number_layers); + + // Add to the buffer the bits we'd expect from a constant bitrate server. + bits_in_buffer_model_ += static_cast( + duration * timebase_ * cfg_.rc_target_bitrate * 1000); + + // Buffer should not go negative. + ASSERT_GE(bits_in_buffer_model_, 0) + << "Buffer Underrun at frame " << pkt->data.frame.pts; + + const size_t frame_size_in_bits = pkt->data.frame.sz * 8; + + // Update the total encoded bits. For temporal layers, update the cumulative + // encoded bits per layer. + for (int i = layer; i < static_cast(cfg_.ts_number_layers); ++i) { + bits_total_[i] += frame_size_in_bits; + } + + // Update the most recent pts. + last_pts_ = pkt->data.frame.pts; + ++frame_number_; + ++tot_frame_number_; + } + + virtual void EndPassHook(void) { + for (int layer = 0; layer < static_cast(cfg_.ts_number_layers); + ++layer) { + duration_ = (last_pts_ + 1) * timebase_; + if (bits_total_[layer]) { + // Effective file datarate: + effective_datarate_[layer] = (bits_total_[layer] / 1000.0) / duration_; + } + } + } + + vpx_codec_pts_t last_pts_; + double timebase_; + int tune_content_; + int frame_number_; // Counter for number of non-dropped/encoded frames. + int tot_frame_number_; // Counter for total number of input frames. + int64_t bits_total_[3]; + double duration_; + double effective_datarate_[3]; + int set_cpu_used_; + int64_t bits_in_buffer_model_; + vpx_codec_pts_t first_drop_; + int num_drops_; + int aq_mode_; + int denoiser_on_; + int denoiser_offon_test_; + int denoiser_offon_period_; + int frame_parallel_decoding_mode_; + bool use_roi_; + vpx_roi_map_t roi_; +}; + +// Params: test mode, speed setting and index for bitrate array. +class DatarateTestVP9RealTimeMultiBR + : public DatarateTestVP9, + public ::libvpx_test::CodecTestWith2Params { + public: + DatarateTestVP9RealTimeMultiBR() : DatarateTestVP9(GET_PARAM(0)) {} + + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + set_cpu_used_ = GET_PARAM(1); + ResetModel(); + } +}; + +// Params: speed setting and index for bitrate array. +class DatarateTestVP9LargeVBR + : public DatarateTestVP9, + public ::libvpx_test::CodecTestWith2Params { + public: + DatarateTestVP9LargeVBR() : DatarateTestVP9(GET_PARAM(0)) {} + + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + set_cpu_used_ = GET_PARAM(1); + ResetModel(); + } +}; + +// Check basic rate targeting for VBR mode with 0 lag. +TEST_P(DatarateTestVP9LargeVBR, BasicRateTargetingVBRLagZero) { + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_error_resilient = 0; + cfg_.rc_end_usage = VPX_VBR; + cfg_.g_lag_in_frames = 0; + + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 300); + + const int bitrates[2] = { 400, 800 }; + const int bitrate_index = GET_PARAM(2); + cfg_.rc_target_bitrate = bitrates[bitrate_index]; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.36) + << " The datarate for the file is greater than target by too much!"; +} + +// Check basic rate targeting for VBR mode with non-zero lag. +TEST_P(DatarateTestVP9LargeVBR, BasicRateTargetingVBRLagNonZero) { + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_error_resilient = 0; + cfg_.rc_end_usage = VPX_VBR; + // For non-zero lag, rate control will work (be within bounds) for + // real-time mode. + if (deadline_ == VPX_DL_REALTIME) { + cfg_.g_lag_in_frames = 15; + } else { + cfg_.g_lag_in_frames = 0; + } + + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 300); + const int bitrates[2] = { 400, 800 }; + const int bitrate_index = GET_PARAM(2); + cfg_.rc_target_bitrate = bitrates[bitrate_index]; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.35) + << " The datarate for the file is greater than target by too much!"; +} + +// Check basic rate targeting for VBR mode with non-zero lag, with +// frame_parallel_decoding_mode off. This enables the adapt_coeff/mode/mv probs +// since error_resilience is off. +TEST_P(DatarateTestVP9LargeVBR, BasicRateTargetingVBRLagNonZeroFrameParDecOff) { + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.g_error_resilient = 0; + cfg_.rc_end_usage = VPX_VBR; + // For non-zero lag, rate control will work (be within bounds) for + // real-time mode. + if (deadline_ == VPX_DL_REALTIME) { + cfg_.g_lag_in_frames = 15; + } else { + cfg_.g_lag_in_frames = 0; + } + + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 300); + const int bitrates[2] = { 400, 800 }; + const int bitrate_index = GET_PARAM(2); + cfg_.rc_target_bitrate = bitrates[bitrate_index]; + ResetModel(); + frame_parallel_decoding_mode_ = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.35) + << " The datarate for the file is greater than target by too much!"; +} + +// Check basic rate targeting for CBR mode. +TEST_P(DatarateTestVP9RealTimeMultiBR, BasicRateTargeting) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + const int bitrates[4] = { 150, 350, 550, 750 }; + const int bitrate_index = GET_PARAM(2); + cfg_.rc_target_bitrate = bitrates[bitrate_index]; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) + << " The datarate for the file is greater than target by too much!"; +} + +// Check basic rate targeting for CBR mode, with frame_parallel_decoding_mode +// off( and error_resilience off). +TEST_P(DatarateTestVP9RealTimeMultiBR, BasicRateTargetingFrameParDecOff) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + const int bitrates[4] = { 150, 350, 550, 750 }; + const int bitrate_index = GET_PARAM(2); + cfg_.rc_target_bitrate = bitrates[bitrate_index]; + ResetModel(); + frame_parallel_decoding_mode_ = 0; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) + << " The datarate for the file is greater than target by too much!"; +} + +// Check basic rate targeting for CBR. +TEST_P(DatarateTestVP9RealTimeMultiBR, BasicRateTargeting444) { + ::libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140); + + cfg_.g_profile = 1; + cfg_.g_timebase = video.timebase(); + + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + const int bitrates[4] = { 250, 450, 650, 850 }; + const int bitrate_index = GET_PARAM(2); + cfg_.rc_target_bitrate = bitrates[bitrate_index]; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(static_cast(cfg_.rc_target_bitrate), + effective_datarate_[0] * 0.80) + << " The datarate for the file exceeds the target by too much!"; + ASSERT_LE(static_cast(cfg_.rc_target_bitrate), + effective_datarate_[0] * 1.15) + << " The datarate for the file missed the target!" + << cfg_.rc_target_bitrate << " " << effective_datarate_; +} + +// Check that (1) the first dropped frame gets earlier and earlier +// as the drop frame threshold is increased, and (2) that the total number of +// frame drops does not decrease as we increase frame drop threshold. +// Use a lower qp-max to force some frame drops. +TEST_P(DatarateTestVP9RealTimeMultiBR, ChangingDropFrameThresh) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_undershoot_pct = 20; + cfg_.rc_undershoot_pct = 20; + cfg_.rc_dropframe_thresh = 10; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 50; + cfg_.rc_end_usage = VPX_CBR; + cfg_.rc_target_bitrate = 200; + cfg_.g_lag_in_frames = 0; + // TODO(marpan): Investigate datarate target failures with a smaller keyframe + // interval (128). + cfg_.kf_max_dist = 9999; + + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + + const int kDropFrameThreshTestStep = 30; + const int bitrates[2] = { 50, 150 }; + const int bitrate_index = GET_PARAM(2); + if (bitrate_index > 1) return; + cfg_.rc_target_bitrate = bitrates[bitrate_index]; + vpx_codec_pts_t last_drop = 140; + int last_num_drops = 0; + for (int i = 10; i < 100; i += kDropFrameThreshTestStep) { + cfg_.rc_dropframe_thresh = i; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.25) + << " The datarate for the file is greater than target by too much!"; + ASSERT_LE(first_drop_, last_drop) + << " The first dropped frame for drop_thresh " << i + << " > first dropped frame for drop_thresh " + << i - kDropFrameThreshTestStep; + ASSERT_GE(num_drops_, last_num_drops * 0.85) + << " The number of dropped frames for drop_thresh " << i + << " < number of dropped frames for drop_thresh " + << i - kDropFrameThreshTestStep; + last_drop = first_drop_; + last_num_drops = num_drops_; + } +} // namespace + +// Check basic rate targeting for 2 temporal layers. +TEST_P(DatarateTestVP9RealTimeMultiBR, BasicRateTargeting2TemporalLayers) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + + // 2 Temporal layers, no spatial layers: Framerate decimation (2, 1). + cfg_.ss_number_layers = 1; + cfg_.ts_number_layers = 2; + cfg_.ts_rate_decimator[0] = 2; + cfg_.ts_rate_decimator[1] = 1; + + cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; + + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + const int bitrates[4] = { 200, 400, 600, 800 }; + const int bitrate_index = GET_PARAM(2); + cfg_.rc_target_bitrate = bitrates[bitrate_index]; + ResetModel(); + // 60-40 bitrate allocation for 2 temporal layers. + cfg_.layer_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100; + cfg_.layer_target_bitrate[1] = cfg_.rc_target_bitrate; + aq_mode_ = 0; + if (deadline_ == VPX_DL_REALTIME) { + aq_mode_ = 3; + cfg_.g_error_resilient = 1; + } + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { + ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85) + << " The datarate for the file is lower than target by too much, " + "for layer: " + << j; + ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15) + << " The datarate for the file is greater than target by too much, " + "for layer: " + << j; + } +} + +// Check basic rate targeting for 3 temporal layers. +TEST_P(DatarateTestVP9RealTimeMultiBR, BasicRateTargeting3TemporalLayers) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + + // 3 Temporal layers, no spatial layers: Framerate decimation (4, 2, 1). + cfg_.ss_number_layers = 1; + cfg_.ts_number_layers = 3; + cfg_.ts_rate_decimator[0] = 4; + cfg_.ts_rate_decimator[1] = 2; + cfg_.ts_rate_decimator[2] = 1; + + cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; + + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + const int bitrates[4] = { 200, 400, 600, 800 }; + const int bitrate_index = GET_PARAM(2); + cfg_.rc_target_bitrate = bitrates[bitrate_index]; + ResetModel(); + // 40-20-40 bitrate allocation for 3 temporal layers. + cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100; + cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100; + cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate; + aq_mode_ = 0; + if (deadline_ == VPX_DL_REALTIME) { + aq_mode_ = 3; + cfg_.g_error_resilient = 1; + } + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { + // TODO(yaowu): Work out more stable rc control strategy and + // Adjust the thresholds to be tighter than .75. + ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.75) + << " The datarate for the file is lower than target by too much, " + "for layer: " + << j; + // TODO(yaowu): Work out more stable rc control strategy and + // Adjust the thresholds to be tighter than 1.25. + ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.25) + << " The datarate for the file is greater than target by too much, " + "for layer: " + << j; + } +} + +// Params: speed setting. +class DatarateTestVP9RealTime : public DatarateTestVP9, + public ::libvpx_test::CodecTestWithParam { + public: + DatarateTestVP9RealTime() : DatarateTestVP9(GET_PARAM(0)) {} + virtual ~DatarateTestVP9RealTime() {} + + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + set_cpu_used_ = GET_PARAM(1); + ResetModel(); + } +}; + +// Check basic rate targeting for CBR mode, with 2 threads and dropped frames. +TEST_P(DatarateTestVP9RealTime, BasicRateTargetingDropFramesMultiThreads) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 30; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + // Encode using multiple threads. + cfg_.g_threads = 2; + + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + cfg_.rc_target_bitrate = 200; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) + << " The datarate for the file is greater than target by too much!"; +} + +// Check basic rate targeting for 3 temporal layers, with frame dropping. +// Only for one (low) bitrate with lower max_quantizer, and somewhat higher +// frame drop threshold, to force frame dropping. +TEST_P(DatarateTestVP9RealTime, + BasicRateTargeting3TemporalLayersFrameDropping) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + // Set frame drop threshold and rc_max_quantizer to force some frame drops. + cfg_.rc_dropframe_thresh = 20; + cfg_.rc_max_quantizer = 45; + cfg_.rc_min_quantizer = 0; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + + // 3 Temporal layers, no spatial layers: Framerate decimation (4, 2, 1). + cfg_.ss_number_layers = 1; + cfg_.ts_number_layers = 3; + cfg_.ts_rate_decimator[0] = 4; + cfg_.ts_rate_decimator[1] = 2; + cfg_.ts_rate_decimator[2] = 1; + + cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; + + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + cfg_.rc_target_bitrate = 200; + ResetModel(); + // 40-20-40 bitrate allocation for 3 temporal layers. + cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100; + cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100; + cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate; + aq_mode_ = 0; + if (deadline_ == VPX_DL_REALTIME) { + aq_mode_ = 3; + cfg_.g_error_resilient = 1; + } + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { + ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85) + << " The datarate for the file is lower than target by too much, " + "for layer: " + << j; + ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.20) + << " The datarate for the file is greater than target by too much, " + "for layer: " + << j; + // Expect some frame drops in this test: for this 200 frames test, + // expect at least 10% and not more than 60% drops. + ASSERT_GE(num_drops_, 20); + ASSERT_LE(num_drops_, 280); + } +} + +// Check VP9 region of interest feature. +TEST_P(DatarateTestVP9RealTime, RegionOfInterest) { + if (deadline_ != VPX_DL_REALTIME || set_cpu_used_ < 5) return; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + + cfg_.rc_target_bitrate = 450; + cfg_.g_w = 640; + cfg_.g_h = 480; + + ResetModel(); + + // Set ROI parameters + use_roi_ = true; + memset(&roi_, 0, sizeof(roi_)); + + roi_.rows = (cfg_.g_h + 7) / 8; + roi_.cols = (cfg_.g_w + 7) / 8; + + roi_.delta_q[1] = -20; + roi_.delta_lf[1] = -20; + memset(roi_.ref_frame, -1, sizeof(roi_.ref_frame)); + roi_.ref_frame[1] = 1; + + // Use 2 states: 1 is center square, 0 is the rest. + roi_.roi_map = reinterpret_cast( + calloc(roi_.rows * roi_.cols, sizeof(*roi_.roi_map))); + ASSERT_TRUE(roi_.roi_map != NULL); + + for (unsigned int i = 0; i < roi_.rows; ++i) { + for (unsigned int j = 0; j < roi_.cols; ++j) { + if (i > (roi_.rows >> 2) && i < ((roi_.rows * 3) >> 2) && + j > (roi_.cols >> 2) && j < ((roi_.cols * 3) >> 2)) { + roi_.roi_map[i * roi_.cols + j] = 1; + } + } + } + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_[0] * 0.90) + << " The datarate for the file exceeds the target!"; + + ASSERT_LE(cfg_.rc_target_bitrate, effective_datarate_[0] * 1.4) + << " The datarate for the file missed the target!"; + + free(roi_.roi_map); +} + +// Params: test mode, speed setting and index for bitrate array. +class DatarateTestVP9PostEncodeDrop + : public DatarateTestVP9, + public ::libvpx_test::CodecTestWithParam { + public: + DatarateTestVP9PostEncodeDrop() : DatarateTestVP9(GET_PARAM(0)) {} + + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + set_cpu_used_ = GET_PARAM(1); + ResetModel(); + } +}; + +// Check basic rate targeting for CBR mode, with 2 threads and dropped frames. +TEST_P(DatarateTestVP9PostEncodeDrop, PostEncodeDropScreenContent) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 30; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + // Encode using multiple threads. + cfg_.g_threads = 2; + cfg_.g_error_resilient = 0; + tune_content_ = 1; + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 300); + cfg_.rc_target_bitrate = 300; + ResetModel(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) + << " The datarate for the file is greater than target by too much!"; +} + +#if CONFIG_VP9_TEMPORAL_DENOISING +// Params: speed setting. +class DatarateTestVP9RealTimeDenoiser : public DatarateTestVP9RealTime { + public: + virtual ~DatarateTestVP9RealTimeDenoiser() {} +}; + +// Check basic datarate targeting, for a single bitrate, when denoiser is on. +TEST_P(DatarateTestVP9RealTimeDenoiser, LowNoise) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + + // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING), + // there is only one denoiser mode: denoiserYonly(which is 1), + // but may add more modes in the future. + cfg_.rc_target_bitrate = 400; + ResetModel(); + // Turn on the denoiser. + denoiser_on_ = 1; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) + << " The datarate for the file is greater than target by too much!"; +} + +// Check basic datarate targeting, for a single bitrate, when denoiser is on, +// for clip with high noise level. Use 2 threads. +TEST_P(DatarateTestVP9RealTimeDenoiser, HighNoise) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_threads = 2; + + ::libvpx_test::Y4mVideoSource video("noisy_clip_640_360.y4m", 0, 200); + + // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING), + // there is only one denoiser mode: kDenoiserOnYOnly(which is 1), + // but may add more modes in the future. + cfg_.rc_target_bitrate = 1000; + ResetModel(); + // Turn on the denoiser. + denoiser_on_ = 1; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) + << " The datarate for the file is greater than target by too much!"; +} + +// Check basic datarate targeting, for a single bitrate, when denoiser is on, +// for 1280x720 clip with 4 threads. +TEST_P(DatarateTestVP9RealTimeDenoiser, 4threads) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_threads = 4; + + ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300); + + // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING), + // there is only one denoiser mode: denoiserYonly(which is 1), + // but may add more modes in the future. + cfg_.rc_target_bitrate = 1000; + ResetModel(); + // Turn on the denoiser. + denoiser_on_ = 1; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.29) + << " The datarate for the file is greater than target by too much!"; +} + +// Check basic datarate targeting, for a single bitrate, when denoiser is off +// and on. +TEST_P(DatarateTestVP9RealTimeDenoiser, DenoiserOffOn) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + + // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING), + // there is only one denoiser mode: denoiserYonly(which is 1), + // but may add more modes in the future. + cfg_.rc_target_bitrate = 400; + ResetModel(); + // The denoiser is off by default. + denoiser_on_ = 0; + // Set the offon test flag. + denoiser_offon_test_ = 1; + denoiser_offon_period_ = 100; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) + << " The datarate for the file is greater than target by too much!"; +} +#endif // CONFIG_VP9_TEMPORAL_DENOISING + +VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTimeMultiBR, + ::testing::Range(5, 10), ::testing::Range(0, 4)); + +VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeVBR, ::testing::Range(5, 9), + ::testing::Range(0, 2)); + +VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTime, ::testing::Range(5, 10)); + +VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9PostEncodeDrop, + ::testing::Range(5, 6)); + +#if CONFIG_VP9_TEMPORAL_DENOISING +VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTimeDenoiser, + ::testing::Range(5, 10)); +#endif +} // namespace diff --git a/media/libvpx/libvpx/test/vp9_denoiser_test.cc b/media/libvpx/libvpx/test/vp9_denoiser_test.cc index 56ca257c5958..47fa587fca73 100644 --- a/media/libvpx/libvpx/test/vp9_denoiser_test.cc +++ b/media/libvpx/libvpx/test/vp9_denoiser_test.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/acm_random.h" @@ -35,7 +36,7 @@ typedef int (*Vp9DenoiserFilterFunc)(const uint8_t *sig, int sig_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude); -typedef std::tr1::tuple VP9DenoiserTestParam; +typedef std::tuple VP9DenoiserTestParam; class VP9DenoiserTest : public ::testing::Test, @@ -99,7 +100,7 @@ TEST_P(VP9DenoiserTest, BitexactCheck) { } } -using std::tr1::make_tuple; +using std::make_tuple; // Test for all block size. #if HAVE_SSE2 diff --git a/media/libvpx/libvpx/test/vp9_encoder_parms_get_to_decoder.cc b/media/libvpx/libvpx/test/vp9_encoder_parms_get_to_decoder.cc index 62e8dcb9b52b..fade08bbd475 100644 --- a/media/libvpx/libvpx/test/vp9_encoder_parms_get_to_decoder.cc +++ b/media/libvpx/libvpx/test/vp9_encoder_parms_get_to_decoder.cc @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include + #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" @@ -74,7 +76,7 @@ class VpxEncoderParmsGetToDecoder virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { - if (video->frame() == 1) { + if (video->frame() == 0) { encoder->Control(VP9E_SET_COLOR_SPACE, encode_parms.cs); encoder->Control(VP9E_SET_COLOR_RANGE, encode_parms.color_range); encoder->Control(VP9E_SET_LOSSLESS, encode_parms.lossless); @@ -138,7 +140,7 @@ class VpxEncoderParmsGetToDecoder TEST_P(VpxEncoderParmsGetToDecoder, BitstreamParms) { init_flags_ = VPX_CODEC_USE_PSNR; - testing::internal::scoped_ptr video( + std::unique_ptr video( new libvpx_test::Y4mVideoSource(test_video_.name, 0, test_video_.frames)); ASSERT_TRUE(video.get() != NULL); diff --git a/media/libvpx/libvpx/test/vp9_end_to_end_test.cc b/media/libvpx/libvpx/test/vp9_end_to_end_test.cc index 955f567ce243..7cb716f22624 100644 --- a/media/libvpx/libvpx/test/vp9_end_to_end_test.cc +++ b/media/libvpx/libvpx/test/vp9_end_to_end_test.cc @@ -8,10 +8,13 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "memory" + #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" +#include "test/i420_video_source.h" #include "test/util.h" #include "test/y4m_video_source.h" #include "test/yuv_video_source.h" @@ -21,14 +24,14 @@ namespace { const unsigned int kWidth = 160; const unsigned int kHeight = 90; const unsigned int kFramerate = 50; -const unsigned int kFrames = 10; +const unsigned int kFrames = 20; const int kBitrate = 500; // List of psnr thresholds for speed settings 0-7 and 5 encoding modes const double kPsnrThreshold[][5] = { { 36.0, 37.0, 37.0, 37.0, 37.0 }, { 35.0, 36.0, 36.0, 36.0, 36.0 }, { 34.0, 35.0, 35.0, 35.0, 35.0 }, { 33.0, 34.0, 34.0, 34.0, 34.0 }, - { 32.0, 33.0, 33.0, 33.0, 33.0 }, { 31.0, 32.0, 32.0, 32.0, 32.0 }, - { 30.0, 31.0, 31.0, 31.0, 31.0 }, { 29.0, 30.0, 30.0, 30.0, 30.0 }, + { 32.0, 33.0, 33.0, 33.0, 33.0 }, { 28.0, 32.0, 32.0, 32.0, 32.0 }, + { 28.5, 31.0, 31.0, 31.0, 31.0 }, { 27.5, 30.0, 30.0, 30.0, 30.0 }, }; typedef struct { @@ -45,13 +48,13 @@ const TestVideoParam kTestVectors[] = { { "park_joy_90p_8_444.y4m", 8, VPX_IMG_FMT_I444, VPX_BITS_8, 1 }, { "park_joy_90p_8_440.yuv", 8, VPX_IMG_FMT_I440, VPX_BITS_8, 1 }, #if CONFIG_VP9_HIGHBITDEPTH - { "park_joy_90p_10_420.y4m", 10, VPX_IMG_FMT_I42016, VPX_BITS_10, 2 }, - { "park_joy_90p_10_422.y4m", 10, VPX_IMG_FMT_I42216, VPX_BITS_10, 3 }, - { "park_joy_90p_10_444.y4m", 10, VPX_IMG_FMT_I44416, VPX_BITS_10, 3 }, + { "park_joy_90p_10_420_20f.y4m", 10, VPX_IMG_FMT_I42016, VPX_BITS_10, 2 }, + { "park_joy_90p_10_422_20f.y4m", 10, VPX_IMG_FMT_I42216, VPX_BITS_10, 3 }, + { "park_joy_90p_10_444_20f.y4m", 10, VPX_IMG_FMT_I44416, VPX_BITS_10, 3 }, { "park_joy_90p_10_440.yuv", 10, VPX_IMG_FMT_I44016, VPX_BITS_10, 3 }, - { "park_joy_90p_12_420.y4m", 12, VPX_IMG_FMT_I42016, VPX_BITS_12, 2 }, - { "park_joy_90p_12_422.y4m", 12, VPX_IMG_FMT_I42216, VPX_BITS_12, 3 }, - { "park_joy_90p_12_444.y4m", 12, VPX_IMG_FMT_I44416, VPX_BITS_12, 3 }, + { "park_joy_90p_12_420_20f.y4m", 12, VPX_IMG_FMT_I42016, VPX_BITS_12, 2 }, + { "park_joy_90p_12_422_20f.y4m", 12, VPX_IMG_FMT_I42216, VPX_BITS_12, 3 }, + { "park_joy_90p_12_444_20f.y4m", 12, VPX_IMG_FMT_I44416, VPX_BITS_12, 3 }, { "park_joy_90p_12_440.yuv", 12, VPX_IMG_FMT_I44016, VPX_BITS_12, 3 }, #endif // CONFIG_VP9_HIGHBITDEPTH }; @@ -59,11 +62,11 @@ const TestVideoParam kTestVectors[] = { // Encoding modes tested const libvpx_test::TestMode kEncodingModeVectors[] = { ::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, - ::libvpx_test::kRealTime, + ::libvpx_test::kRealTime }; // Speed settings tested -const int kCpuUsedVectors[] = { 1, 2, 3, 5, 6 }; +const int kCpuUsedVectors[] = { 1, 2, 3, 5, 6, 7 }; int is_extension_y4m(const char *filename) { const char *dot = strrchr(filename, '.'); @@ -74,6 +77,43 @@ int is_extension_y4m(const char *filename) { } } +class EndToEndTestAdaptiveRDThresh + : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWith2Params { + protected: + EndToEndTestAdaptiveRDThresh() + : EncoderTest(GET_PARAM(0)), cpu_used_start_(GET_PARAM(1)), + cpu_used_end_(GET_PARAM(2)) {} + + virtual ~EndToEndTestAdaptiveRDThresh() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + cfg_.g_lag_in_frames = 0; + cfg_.rc_end_usage = VPX_CBR; + cfg_.rc_buf_sz = 1000; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + dec_cfg_.threads = 4; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(VP8E_SET_CPUUSED, cpu_used_start_); + encoder->Control(VP9E_SET_ROW_MT, 1); + encoder->Control(VP9E_SET_TILE_COLUMNS, 2); + } + if (video->frame() == 100) + encoder->Control(VP8E_SET_CPUUSED, cpu_used_end_); + } + + private: + int cpu_used_start_; + int cpu_used_end_; +}; + class EndToEndTestLarge : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith3Paramsframe() == 1) { + if (video->frame() == 0) { encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1); encoder->Control(VP9E_SET_TILE_COLUMNS, 4); encoder->Control(VP8E_SET_CPUUSED, cpu_used_); @@ -123,6 +166,9 @@ class EndToEndTestLarge encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); encoder->Control(VP8E_SET_ARNR_TYPE, 3); + } else { + encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_); + encoder->Control(VP9E_SET_AQ_MODE, cyclic_refresh_); } } } @@ -138,6 +184,8 @@ class EndToEndTestLarge TestVideoParam test_video_param_; int cpu_used_; + int cyclic_refresh_; + int denoiser_on_; private: double psnr_; @@ -145,6 +193,50 @@ class EndToEndTestLarge libvpx_test::TestMode encoding_mode_; }; +#if CONFIG_VP9_DECODER +// The test parameters control VP9D_SET_LOOP_FILTER_OPT and the number of +// decoder threads. +class EndToEndTestLoopFilterThreading + : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWith2Params { + protected: + EndToEndTestLoopFilterThreading() + : EncoderTest(GET_PARAM(0)), use_loop_filter_opt_(GET_PARAM(1)) {} + + virtual ~EndToEndTestLoopFilterThreading() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + cfg_.g_threads = 2; + cfg_.g_lag_in_frames = 0; + cfg_.rc_target_bitrate = 500; + cfg_.rc_end_usage = VPX_CBR; + cfg_.kf_min_dist = 1; + cfg_.kf_max_dist = 1; + dec_cfg_.threads = GET_PARAM(2); + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(VP8E_SET_CPUUSED, 8); + } + encoder->Control(VP9E_SET_TILE_COLUMNS, 4 - video->frame() % 5); + } + + virtual void PreDecodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Decoder *decoder) { + if (video->frame() == 0) { + decoder->Control(VP9D_SET_LOOP_FILTER_OPT, use_loop_filter_opt_ ? 1 : 0); + } + } + + private: + const bool use_loop_filter_opt_; +}; +#endif // CONFIG_VP9_DECODER + TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) { cfg_.rc_target_bitrate = kBitrate; cfg_.g_error_resilient = 0; @@ -154,7 +246,7 @@ TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) { init_flags_ = VPX_CODEC_USE_PSNR; if (cfg_.g_bit_depth > 8) init_flags_ |= VPX_CODEC_USE_HIGHBITDEPTH; - testing::internal::scoped_ptr video; + std::unique_ptr video; if (is_extension_y4m(test_video_param_.filename)) { video.reset(new libvpx_test::Y4mVideoSource(test_video_param_.filename, 0, kFrames)); @@ -170,8 +262,63 @@ TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) { EXPECT_GT(psnr, GetPsnrThreshold()); } +TEST_P(EndToEndTestLarge, EndtoEndPSNRDenoiserAQTest) { + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_profile = test_video_param_.profile; + cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; + cfg_.g_bit_depth = test_video_param_.bit_depth; + init_flags_ = VPX_CODEC_USE_PSNR; + cyclic_refresh_ = 3; + denoiser_on_ = 1; + if (cfg_.g_bit_depth > 8) init_flags_ |= VPX_CODEC_USE_HIGHBITDEPTH; + + std::unique_ptr video; + if (is_extension_y4m(test_video_param_.filename)) { + video.reset(new libvpx_test::Y4mVideoSource(test_video_param_.filename, 0, + kFrames)); + } else { + video.reset(new libvpx_test::YUVVideoSource( + test_video_param_.filename, test_video_param_.fmt, kWidth, kHeight, + kFramerate, 1, 0, kFrames)); + } + ASSERT_TRUE(video.get() != NULL); + + ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); + const double psnr = GetAveragePsnr(); + EXPECT_GT(psnr, GetPsnrThreshold()); +} + +TEST_P(EndToEndTestAdaptiveRDThresh, EndtoEndAdaptiveRDThreshRowMT) { + cfg_.rc_target_bitrate = kBitrate; + cfg_.g_error_resilient = 0; + cfg_.g_threads = 2; + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +#if CONFIG_VP9_DECODER +TEST_P(EndToEndTestLoopFilterThreading, TileCountChange) { + ::libvpx_test::RandomVideoSource video; + video.SetSize(4096, 2160); + video.set_limit(10); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} +#endif // CONFIG_VP9_DECODER + VP9_INSTANTIATE_TEST_CASE(EndToEndTestLarge, ::testing::ValuesIn(kEncodingModeVectors), ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kCpuUsedVectors)); + +VP9_INSTANTIATE_TEST_CASE(EndToEndTestAdaptiveRDThresh, + ::testing::Values(5, 6, 7), ::testing::Values(8, 9)); + +#if CONFIG_VP9_DECODER +VP9_INSTANTIATE_TEST_CASE(EndToEndTestLoopFilterThreading, ::testing::Bool(), + ::testing::Range(2, 6)); +#endif // CONFIG_VP9_DECODER } // namespace diff --git a/media/libvpx/libvpx/test/vp9_ethread_test.cc b/media/libvpx/libvpx/test/vp9_ethread_test.cc index 6b7e512116c1..6de76e9e5554 100644 --- a/media/libvpx/libvpx/test/vp9_ethread_test.cc +++ b/media/libvpx/libvpx/test/vp9_ethread_test.cc @@ -387,7 +387,7 @@ TEST_P(VPxEncoderThreadTest, EncoderResultTest) { ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const double multi_thr_psnr = GetAveragePsnr(); - EXPECT_NEAR(single_thr_psnr, multi_thr_psnr, 0.1); + EXPECT_NEAR(single_thr_psnr, multi_thr_psnr, 0.2); } INSTANTIATE_TEST_CASE_P( @@ -409,7 +409,7 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime), - ::testing::Range(3, 9), // cpu_used + ::testing::Range(3, 10), // cpu_used ::testing::Range(0, 3), // tile_columns ::testing::Range(2, 5))); // threads diff --git a/media/libvpx/libvpx/test/vp9_intrapred_test.cc b/media/libvpx/libvpx/test/vp9_intrapred_test.cc index 39c5e79ebd93..58091f875b1a 100644 --- a/media/libvpx/libvpx/test/vp9_intrapred_test.cc +++ b/media/libvpx/libvpx/test/vp9_intrapred_test.cc @@ -130,6 +130,12 @@ TEST_P(VP9IntraPredTest, IntraPredTests) { RunTest(left_col, above_data, dst, ref_dst); } +// Instantiate a token test to avoid -Wuninitialized warnings when none of the +// other tests are enabled. +INSTANTIATE_TEST_CASE_P( + C, VP9IntraPredTest, + ::testing::Values(IntraPredParam(&vpx_d45_predictor_4x4_c, + &vpx_d45_predictor_4x4_c, 4, 8))); #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, VP9IntraPredTest, @@ -378,58 +384,61 @@ INSTANTIATE_TEST_CASE_P( 8))); #endif // HAVE_MSA -#if HAVE_VSX -INSTANTIATE_TEST_CASE_P( - VSX, VP9IntraPredTest, - ::testing::Values( +// TODO(crbug.com/webm/1522): Fix test failures. +#if 0 IntraPredParam(&vpx_d45_predictor_8x8_vsx, &vpx_d45_predictor_8x8_c, 8, 8), - IntraPredParam(&vpx_d45_predictor_16x16_vsx, &vpx_d45_predictor_16x16_c, - 16, 8), - IntraPredParam(&vpx_d45_predictor_32x32_vsx, &vpx_d45_predictor_32x32_c, - 32, 8), IntraPredParam(&vpx_d63_predictor_8x8_vsx, &vpx_d63_predictor_8x8_c, 8, 8), - IntraPredParam(&vpx_d63_predictor_16x16_vsx, &vpx_d63_predictor_16x16_c, - 16, 8), - IntraPredParam(&vpx_d63_predictor_32x32_vsx, &vpx_d63_predictor_32x32_c, - 32, 8), - IntraPredParam(&vpx_dc_128_predictor_16x16_vsx, - &vpx_dc_128_predictor_16x16_c, 16, 8), - IntraPredParam(&vpx_dc_128_predictor_32x32_vsx, - &vpx_dc_128_predictor_32x32_c, 32, 8), - IntraPredParam(&vpx_dc_left_predictor_16x16_vsx, - &vpx_dc_left_predictor_16x16_c, 16, 8), - IntraPredParam(&vpx_dc_left_predictor_32x32_vsx, - &vpx_dc_left_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_predictor_8x8_vsx, &vpx_dc_predictor_8x8_c, 8, 8), - IntraPredParam(&vpx_dc_predictor_16x16_vsx, &vpx_dc_predictor_16x16_c, - 16, 8), - IntraPredParam(&vpx_dc_predictor_32x32_vsx, &vpx_dc_predictor_32x32_c, - 32, 8), - IntraPredParam(&vpx_dc_top_predictor_16x16_vsx, - &vpx_dc_top_predictor_16x16_c, 16, 8), - IntraPredParam(&vpx_dc_top_predictor_32x32_vsx, - &vpx_dc_top_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_h_predictor_4x4_vsx, &vpx_h_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_h_predictor_8x8_vsx, &vpx_h_predictor_8x8_c, 8, 8), - IntraPredParam(&vpx_h_predictor_16x16_vsx, &vpx_h_predictor_16x16_c, 16, - 8), - IntraPredParam(&vpx_h_predictor_32x32_vsx, &vpx_h_predictor_32x32_c, 32, - 8), IntraPredParam(&vpx_tm_predictor_4x4_vsx, &vpx_tm_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_tm_predictor_8x8_vsx, &vpx_tm_predictor_8x8_c, 8, 8), - IntraPredParam(&vpx_tm_predictor_16x16_vsx, &vpx_tm_predictor_16x16_c, - 16, 8), - IntraPredParam(&vpx_tm_predictor_32x32_vsx, &vpx_tm_predictor_32x32_c, - 32, 8), - IntraPredParam(&vpx_v_predictor_16x16_vsx, &vpx_v_predictor_16x16_c, 16, - 8), - IntraPredParam(&vpx_v_predictor_32x32_vsx, &vpx_v_predictor_32x32_c, 32, - 8))); +#endif + +#if HAVE_VSX +INSTANTIATE_TEST_CASE_P( + VSX, VP9IntraPredTest, + ::testing::Values(IntraPredParam(&vpx_d45_predictor_16x16_vsx, + &vpx_d45_predictor_16x16_c, 16, 8), + IntraPredParam(&vpx_d45_predictor_32x32_vsx, + &vpx_d45_predictor_32x32_c, 32, 8), + IntraPredParam(&vpx_d63_predictor_16x16_vsx, + &vpx_d63_predictor_16x16_c, 16, 8), + IntraPredParam(&vpx_d63_predictor_32x32_vsx, + &vpx_d63_predictor_32x32_c, 32, 8), + IntraPredParam(&vpx_dc_128_predictor_16x16_vsx, + &vpx_dc_128_predictor_16x16_c, 16, 8), + IntraPredParam(&vpx_dc_128_predictor_32x32_vsx, + &vpx_dc_128_predictor_32x32_c, 32, 8), + IntraPredParam(&vpx_dc_left_predictor_16x16_vsx, + &vpx_dc_left_predictor_16x16_c, 16, 8), + IntraPredParam(&vpx_dc_left_predictor_32x32_vsx, + &vpx_dc_left_predictor_32x32_c, 32, 8), + IntraPredParam(&vpx_dc_predictor_16x16_vsx, + &vpx_dc_predictor_16x16_c, 16, 8), + IntraPredParam(&vpx_dc_predictor_32x32_vsx, + &vpx_dc_predictor_32x32_c, 32, 8), + IntraPredParam(&vpx_dc_top_predictor_16x16_vsx, + &vpx_dc_top_predictor_16x16_c, 16, 8), + IntraPredParam(&vpx_dc_top_predictor_32x32_vsx, + &vpx_dc_top_predictor_32x32_c, 32, 8), + IntraPredParam(&vpx_h_predictor_16x16_vsx, + &vpx_h_predictor_16x16_c, 16, 8), + IntraPredParam(&vpx_h_predictor_32x32_vsx, + &vpx_h_predictor_32x32_c, 32, 8), + IntraPredParam(&vpx_tm_predictor_16x16_vsx, + &vpx_tm_predictor_16x16_c, 16, 8), + IntraPredParam(&vpx_tm_predictor_32x32_vsx, + &vpx_tm_predictor_32x32_c, 32, 8), + IntraPredParam(&vpx_v_predictor_16x16_vsx, + &vpx_v_predictor_16x16_c, 16, 8), + IntraPredParam(&vpx_v_predictor_32x32_vsx, + &vpx_v_predictor_32x32_c, 32, 8))); #endif // HAVE_VSX #if CONFIG_VP9_HIGHBITDEPTH diff --git a/media/libvpx/libvpx/test/vp9_lossless_test.cc b/media/libvpx/libvpx/test/vp9_lossless_test.cc index 703b55e9bddd..5cf0a41da4d1 100644 --- a/media/libvpx/libvpx/test/vp9_lossless_test.cc +++ b/media/libvpx/libvpx/test/vp9_lossless_test.cc @@ -38,7 +38,7 @@ class LosslessTest virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { - if (video->frame() == 1) { + if (video->frame() == 0) { // Only call Control if quantizer > 0 to verify that using quantizer // alone will activate lossless if (cfg_.rc_max_quantizer > 0 || cfg_.rc_min_quantizer > 0) { diff --git a/media/libvpx/libvpx/test/vp9_motion_vector_test.cc b/media/libvpx/libvpx/test/vp9_motion_vector_test.cc index 1030204ae326..b556a1c37846 100644 --- a/media/libvpx/libvpx/test/vp9_motion_vector_test.cc +++ b/media/libvpx/libvpx/test/vp9_motion_vector_test.cc @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include + #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" @@ -22,7 +24,7 @@ namespace { // Encoding modes const libvpx_test::TestMode kEncodingModeVectors[] = { ::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, - ::libvpx_test::kRealTime, + ::libvpx_test::kRealTime }; // Encoding speeds @@ -59,7 +61,7 @@ class MotionVectorTestLarge virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { - if (video->frame() == 1) { + if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, cpu_used_); encoder->Control(VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, mv_test_mode_); if (encoding_mode_ != ::libvpx_test::kRealTime) { @@ -81,7 +83,7 @@ TEST_P(MotionVectorTestLarge, OverallTest) { cfg_.g_profile = 0; init_flags_ = VPX_CODEC_USE_PSNR; - testing::internal::scoped_ptr video; + std::unique_ptr video; video.reset(new libvpx_test::YUVVideoSource( "niklas_640_480_30.yuv", VPX_IMG_FMT_I420, 3840, 2160, // 2048, 1080, 30, 1, 0, 5)); diff --git a/media/libvpx/libvpx/test/vp9_quantize_test.cc b/media/libvpx/libvpx/test/vp9_quantize_test.cc index b18d4522ceb6..69c2c5a0b9e8 100644 --- a/media/libvpx/libvpx/test/vp9_quantize_test.cc +++ b/media/libvpx/libvpx/test/vp9_quantize_test.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include "third_party/googletest/src/include/gtest/gtest.h" @@ -18,6 +19,7 @@ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" +#include "test/bench.h" #include "test/buffer.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" @@ -26,6 +28,7 @@ #include "vp9/common/vp9_scan.h" #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" +#include "vpx_ports/msvc.h" #include "vpx_ports/vpx_timer.h" using libvpx_test::ACMRandom; @@ -41,8 +44,8 @@ typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count, tran_low_t *dqcoeff, const int16_t *dequant, uint16_t *eob, const int16_t *scan, const int16_t *iscan); -typedef std::tr1::tuple +typedef std::tuple QuantizeParam; // Wrapper for FP version which does not use zbin or quant_shift. @@ -67,11 +70,19 @@ void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block, scan, iscan); } -class VP9QuantizeBase { +class VP9QuantizeBase : public AbstractBench { public: VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp) - : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp) { + : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp), + coeff_(Buffer(max_size_, max_size_, 0, 16)), + qcoeff_(Buffer(max_size_, max_size_, 0, 32)), + dqcoeff_(Buffer(max_size_, max_size_, 0, 32)) { + // TODO(jianj): SSSE3 and AVX2 tests fail on extreme values. +#if HAVE_NEON + max_value_ = (1 << (7 + bit_depth_)) - 1; +#else max_value_ = (1 << bit_depth_) - 1; +#endif zbin_ptr_ = reinterpret_cast(vpx_memalign(16, 8 * sizeof(*zbin_ptr_))); round_fp_ptr_ = reinterpret_cast( @@ -86,6 +97,9 @@ class VP9QuantizeBase { vpx_memalign(16, 8 * sizeof(*quant_shift_ptr_))); dequant_ptr_ = reinterpret_cast( vpx_memalign(16, 8 * sizeof(*dequant_ptr_))); + + r_ptr_ = (is_fp_) ? round_fp_ptr_ : round_ptr_; + q_ptr_ = (is_fp_) ? quant_fp_ptr_ : quant_ptr_; } ~VP9QuantizeBase() { @@ -118,6 +132,15 @@ class VP9QuantizeBase { int max_value_; const int max_size_; const bool is_fp_; + Buffer coeff_; + Buffer qcoeff_; + Buffer dqcoeff_; + int16_t *r_ptr_; + int16_t *q_ptr_; + int count_; + int skip_block_; + const scan_order *scan_; + uint16_t eob_; }; class VP9QuantizeTest : public VP9QuantizeBase, @@ -128,21 +151,29 @@ class VP9QuantizeTest : public VP9QuantizeBase, quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {} protected: + virtual void Run(); const QuantizeFunc quantize_op_; const QuantizeFunc ref_quantize_op_; }; +void VP9QuantizeTest::Run() { + quantize_op_(coeff_.TopLeftPixel(), count_, skip_block_, zbin_ptr_, r_ptr_, + q_ptr_, quant_shift_ptr_, qcoeff_.TopLeftPixel(), + dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_, scan_->scan, + scan_->iscan); +} + // This quantizer compares the AC coefficients to the quantization step size to // determine if further multiplication operations are needed. // Based on vp9_quantize_fp_sse2(). -void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - int skip_block, const int16_t *round_ptr, - const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan, - const int16_t *iscan) { +inline void quant_fp_nz(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan, int is_32x32) { int i, eob = -1; - const int thr = dequant_ptr[1] >> 1; + const int thr = dequant_ptr[1] >> (1 + is_32x32); (void)iscan; (void)skip_block; assert(!skip_block); @@ -172,11 +203,27 @@ void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, // If all of the AC coeffs in a row has magnitude less than the // quantization step_size/2, quantize to zero. if (nzflag_cnt < 16) { - int tmp = - clamp(abs_coeff[y] + round_ptr[rc != 0], INT16_MIN, INT16_MAX); - tmp = (tmp * quant_ptr[rc != 0]) >> 16; + int tmp; + int _round; + + if (is_32x32) { + _round = ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); + } else { + _round = round_ptr[rc != 0]; + } + tmp = clamp(abs_coeff[y] + _round, INT16_MIN, INT16_MAX); + tmp = (tmp * quant_ptr[rc != 0]) >> (16 - is_32x32); qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y]; - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; + dqcoeff_ptr[rc] = + static_cast(qcoeff_ptr[rc] * dequant_ptr[rc != 0]); + + if (is_32x32) { + dqcoeff_ptr[rc] = static_cast(qcoeff_ptr[rc] * + dequant_ptr[rc != 0] / 2); + } else { + dqcoeff_ptr[rc] = + static_cast(qcoeff_ptr[rc] * dequant_ptr[rc != 0]); + } } else { qcoeff_ptr[rc] = 0; dqcoeff_ptr[rc] = 0; @@ -195,6 +242,26 @@ void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, *eob_ptr = eob + 1; } +void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { + quant_fp_nz(coeff_ptr, n_coeffs, skip_block, round_ptr, quant_ptr, qcoeff_ptr, + dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 0); +} + +void quantize_fp_32x32_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { + quant_fp_nz(coeff_ptr, n_coeffs, skip_block, round_ptr, quant_ptr, qcoeff_ptr, + dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 1); +} + void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round, int16_t *quant, int16_t *quant_shift, int16_t *dequant, int16_t *round_fp, @@ -236,19 +303,17 @@ void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round, TEST_P(VP9QuantizeTest, OperationCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); - Buffer coeff = Buffer(max_size_, max_size_, 0, 16); - ASSERT_TRUE(coeff.Init()); - Buffer qcoeff = Buffer(max_size_, max_size_, 0, 32); - ASSERT_TRUE(qcoeff.Init()); - Buffer dqcoeff = Buffer(max_size_, max_size_, 0, 32); - ASSERT_TRUE(dqcoeff.Init()); + ASSERT_TRUE(coeff_.Init()); + ASSERT_TRUE(qcoeff_.Init()); + ASSERT_TRUE(dqcoeff_.Init()); Buffer ref_qcoeff = Buffer(max_size_, max_size_, 0, 32); ASSERT_TRUE(ref_qcoeff.Init()); Buffer ref_dqcoeff = Buffer(max_size_, max_size_, 0, 32); ASSERT_TRUE(ref_dqcoeff.Init()); - uint16_t eob, ref_eob; + uint16_t ref_eob = 0; + eob_ = 0; for (int i = 0; i < number_of_iterations; ++i) { // Test skip block for the first three iterations to catch all the different @@ -261,33 +326,31 @@ TEST_P(VP9QuantizeTest, OperationCheck) { sz = TX_32X32; } const TX_TYPE tx_type = static_cast((i >> 2) % 3); - const scan_order *scan_order = &vp9_scan_orders[sz][tx_type]; - const int count = (4 << sz) * (4 << sz); - coeff.Set(&rnd, -max_value_, max_value_); + scan_ = &vp9_scan_orders[sz][tx_type]; + count_ = (4 << sz) * (4 << sz); + coeff_.Set(&rnd, -max_value_, max_value_); GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_, quant_shift_ptr_, dequant_ptr_, round_fp_ptr_, quant_fp_ptr_); - int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_; - int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_; - ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, - q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(), - ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob, - scan_order->scan, scan_order->iscan); + ref_quantize_op_(coeff_.TopLeftPixel(), count_, skip_block, zbin_ptr_, + r_ptr_, q_ptr_, quant_shift_ptr_, + ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(), + dequant_ptr_, &ref_eob, scan_->scan, scan_->iscan); ASM_REGISTER_STATE_CHECK(quantize_op_( - coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr, - quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(), - dequant_ptr_, &eob, scan_order->scan, scan_order->iscan)); + coeff_.TopLeftPixel(), count_, skip_block, zbin_ptr_, r_ptr_, q_ptr_, + quant_shift_ptr_, qcoeff_.TopLeftPixel(), dqcoeff_.TopLeftPixel(), + dequant_ptr_, &eob_, scan_->scan, scan_->iscan)); - EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff)); - EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff)); + EXPECT_TRUE(qcoeff_.CheckValues(ref_qcoeff)); + EXPECT_TRUE(dqcoeff_.CheckValues(ref_dqcoeff)); - EXPECT_EQ(eob, ref_eob); + EXPECT_EQ(eob_, ref_eob); if (HasFailure()) { printf("Failure on iteration %d.\n", i); - qcoeff.PrintDifference(ref_qcoeff); - dqcoeff.PrintDifference(ref_dqcoeff); + qcoeff_.PrintDifference(ref_qcoeff); + dqcoeff_.PrintDifference(ref_dqcoeff); return; } } @@ -295,22 +358,21 @@ TEST_P(VP9QuantizeTest, OperationCheck) { TEST_P(VP9QuantizeTest, EOBCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); - Buffer coeff = Buffer(max_size_, max_size_, 0, 16); - ASSERT_TRUE(coeff.Init()); - Buffer qcoeff = Buffer(max_size_, max_size_, 0, 32); - ASSERT_TRUE(qcoeff.Init()); - Buffer dqcoeff = Buffer(max_size_, max_size_, 0, 32); - ASSERT_TRUE(dqcoeff.Init()); + ASSERT_TRUE(coeff_.Init()); + ASSERT_TRUE(qcoeff_.Init()); + ASSERT_TRUE(dqcoeff_.Init()); Buffer ref_qcoeff = Buffer(max_size_, max_size_, 0, 32); ASSERT_TRUE(ref_qcoeff.Init()); Buffer ref_dqcoeff = Buffer(max_size_, max_size_, 0, 32); ASSERT_TRUE(ref_dqcoeff.Init()); - uint16_t eob, ref_eob; + uint16_t ref_eob = 0; + eob_ = 0; + const uint32_t max_index = max_size_ * max_size_ - 1; for (int i = 0; i < number_of_iterations; ++i) { - const int skip_block = 0; + skip_block_ = 0; TX_SIZE sz; if (max_size_ == 16) { sz = static_cast(i % 3); // TX_4X4, TX_8X8 TX_16X16 @@ -318,38 +380,36 @@ TEST_P(VP9QuantizeTest, EOBCheck) { sz = TX_32X32; } const TX_TYPE tx_type = static_cast((i >> 2) % 3); - const scan_order *scan_order = &vp9_scan_orders[sz][tx_type]; - int count = (4 << sz) * (4 << sz); + scan_ = &vp9_scan_orders[sz][tx_type]; + count_ = (4 << sz) * (4 << sz); // Two random entries - coeff.Set(0); - coeff.TopLeftPixel()[rnd(count)] = + coeff_.Set(0); + coeff_.TopLeftPixel()[rnd.RandRange(count_) & max_index] = static_cast(rnd.RandRange(max_value_ * 2)) - max_value_; - coeff.TopLeftPixel()[rnd(count)] = + coeff_.TopLeftPixel()[rnd.RandRange(count_) & max_index] = static_cast(rnd.RandRange(max_value_ * 2)) - max_value_; GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_, quant_shift_ptr_, dequant_ptr_, round_fp_ptr_, quant_fp_ptr_); - int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_; - int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_; - ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, - q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(), - ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob, - scan_order->scan, scan_order->iscan); + ref_quantize_op_(coeff_.TopLeftPixel(), count_, skip_block_, zbin_ptr_, + r_ptr_, q_ptr_, quant_shift_ptr_, + ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(), + dequant_ptr_, &ref_eob, scan_->scan, scan_->iscan); ASM_REGISTER_STATE_CHECK(quantize_op_( - coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr, - quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(), - dequant_ptr_, &eob, scan_order->scan, scan_order->iscan)); + coeff_.TopLeftPixel(), count_, skip_block_, zbin_ptr_, r_ptr_, q_ptr_, + quant_shift_ptr_, qcoeff_.TopLeftPixel(), dqcoeff_.TopLeftPixel(), + dequant_ptr_, &eob_, scan_->scan, scan_->iscan)); - EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff)); - EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff)); + EXPECT_TRUE(qcoeff_.CheckValues(ref_qcoeff)); + EXPECT_TRUE(dqcoeff_.CheckValues(ref_dqcoeff)); - EXPECT_EQ(eob, ref_eob); + EXPECT_EQ(eob_, ref_eob); if (HasFailure()) { printf("Failure on iteration %d.\n", i); - qcoeff.PrintDifference(ref_qcoeff); - dqcoeff.PrintDifference(ref_dqcoeff); + qcoeff_.PrintDifference(ref_qcoeff); + dqcoeff_.PrintDifference(ref_dqcoeff); return; } } @@ -357,13 +417,9 @@ TEST_P(VP9QuantizeTest, EOBCheck) { TEST_P(VP9QuantizeTest, DISABLED_Speed) { ACMRandom rnd(ACMRandom::DeterministicSeed()); - Buffer coeff = Buffer(max_size_, max_size_, 0, 16); - ASSERT_TRUE(coeff.Init()); - Buffer qcoeff = Buffer(max_size_, max_size_, 0, 32); - ASSERT_TRUE(qcoeff.Init()); - Buffer dqcoeff = Buffer(max_size_, max_size_, 0, 32); - ASSERT_TRUE(dqcoeff.Init()); - uint16_t eob; + ASSERT_TRUE(coeff_.Init()); + ASSERT_TRUE(qcoeff_.Init()); + ASSERT_TRUE(dqcoeff_.Init()); TX_SIZE starting_sz, ending_sz; if (max_size_ == 16) { @@ -377,18 +433,16 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) { for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) { // zbin > coeff, zbin < coeff. for (int i = 0; i < 2; ++i) { - const int skip_block = 0; + skip_block_ = 0; // TX_TYPE defines the scan order. That is not relevant to the speed test. // Pick the first one. const TX_TYPE tx_type = DCT_DCT; - const scan_order *scan_order = &vp9_scan_orders[sz][tx_type]; - const int count = (4 << sz) * (4 << sz); + count_ = (4 << sz) * (4 << sz); + scan_ = &vp9_scan_orders[sz][tx_type]; GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_, quant_shift_ptr_, dequant_ptr_, round_fp_ptr_, quant_fp_ptr_); - int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_; - int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_; if (i == 0) { // When |coeff values| are less than zbin the results are 0. @@ -399,40 +453,33 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) { threshold = 200; } for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold; - coeff.Set(&rnd, -99, 99); + coeff_.Set(&rnd, -99, 99); } else if (i == 1) { for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50; - coeff.Set(&rnd, -500, 500); + coeff_.Set(&rnd, -500, 500); } - vpx_usec_timer timer; - vpx_usec_timer_start(&timer); - for (int j = 0; j < 100000000 / count; ++j) { - quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, - q_ptr, quant_shift_ptr_, qcoeff.TopLeftPixel(), - dqcoeff.TopLeftPixel(), dequant_ptr_, &eob, - scan_order->scan, scan_order->iscan); - } - vpx_usec_timer_mark(&timer); - const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); - if (i == 0) printf("Bypass calculations.\n"); - if (i == 1) printf("Full calculations.\n"); - printf("Quantize %dx%d time: %5d ms\n", 4 << sz, 4 << sz, - elapsed_time / 1000); + RunNTimes(10000000 / count_); + const char *type = + (i == 0) ? "Bypass calculations " : "Full calculations "; + char block_size[16]; + snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz); + char title[100]; + snprintf(title, sizeof(title), "%25s %8s ", type, block_size); + PrintMedian(title); } - printf("\n"); } } -using std::tr1::make_tuple; +using std::make_tuple; #if HAVE_SSE2 #if CONFIG_VP9_HIGHBITDEPTH -// TODO(johannkoenig): Fix vpx_quantize_b_sse2 in highbitdepth builds. -// make_tuple(&vpx_quantize_b_sse2, &vpx_highbd_quantize_b_c, VPX_BITS_8), INSTANTIATE_TEST_CASE_P( SSE2, VP9QuantizeTest, ::testing::Values( + make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c, VPX_BITS_8, 16, + false), make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c, VPX_BITS_8, 16, false), make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c, @@ -457,51 +504,52 @@ INSTANTIATE_TEST_CASE_P( #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_SSE2 -#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH -#if ARCH_X86_64 +#if HAVE_SSSE3 +#if VPX_ARCH_X86_64 INSTANTIATE_TEST_CASE_P( SSSE3, VP9QuantizeTest, ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c, VPX_BITS_8, 16, false), - make_tuple(&QuantFPWrapper, - &QuantFPWrapper, VPX_BITS_8, - 16, true))); -#else -INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest, - ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, - &vpx_quantize_b_c, - VPX_BITS_8, 16, false))); -#endif - -#if ARCH_X86_64 -// TODO(johannkoenig): SSSE3 optimizations do not yet pass this test. -INSTANTIATE_TEST_CASE_P( - DISABLED_SSSE3, VP9QuantizeTest, - ::testing::Values(make_tuple(&vpx_quantize_b_32x32_ssse3, + make_tuple(&vpx_quantize_b_32x32_ssse3, &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false), + make_tuple(&QuantFPWrapper, + &QuantFPWrapper, VPX_BITS_8, + 16, true), make_tuple(&QuantFPWrapper, - &QuantFPWrapper, + &QuantFPWrapper, VPX_BITS_8, 32, true))); -#endif // ARCH_X86_64 -#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH - -// TODO(johannkoenig): AVX optimizations do not yet pass the 32x32 test or -// highbitdepth configurations. -#if HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH +#else INSTANTIATE_TEST_CASE_P( - AVX, VP9QuantizeTest, - ::testing::Values(make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c, + SSSE3, VP9QuantizeTest, + ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c, VPX_BITS_8, 16, false), - // Even though SSSE3 and AVX do not match the reference - // code, we can keep them in sync with each other. - make_tuple(&vpx_quantize_b_32x32_avx, - &vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32, + make_tuple(&vpx_quantize_b_32x32_ssse3, + &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false))); -#endif // HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH -// TODO(webm:1448): dqcoeff is not handled correctly in HBD builds. -#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH +#endif // VPX_ARCH_X86_64 +#endif // HAVE_SSSE3 + +#if HAVE_AVX +INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest, + ::testing::Values(make_tuple(&vpx_quantize_b_avx, + &vpx_quantize_b_c, + VPX_BITS_8, 16, false), + make_tuple(&vpx_quantize_b_32x32_avx, + &vpx_quantize_b_32x32_c, + VPX_BITS_8, 32, false))); +#endif // HAVE_AVX + +#if VPX_ARCH_X86_64 && HAVE_AVX2 +INSTANTIATE_TEST_CASE_P( + AVX2, VP9QuantizeTest, + ::testing::Values(make_tuple(&QuantFPWrapper, + &QuantFPWrapper, VPX_BITS_8, + 16, true))); +#endif // HAVE_AVX2 + +#if HAVE_NEON INSTANTIATE_TEST_CASE_P( NEON, VP9QuantizeTest, ::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c, @@ -515,7 +563,23 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 32, true))); -#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH +#endif // HAVE_NEON + +#if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + VSX, VP9QuantizeTest, + ::testing::Values(make_tuple(&vpx_quantize_b_vsx, &vpx_quantize_b_c, + VPX_BITS_8, 16, false), + make_tuple(&vpx_quantize_b_32x32_vsx, + &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, + false), + make_tuple(&QuantFPWrapper, + &QuantFPWrapper, VPX_BITS_8, + 16, true), + make_tuple(&QuantFPWrapper, + &QuantFPWrapper, + VPX_BITS_8, 32, true))); +#endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH // Only useful to compare "Speed" test results. INSTANTIATE_TEST_CASE_P( @@ -528,6 +592,9 @@ INSTANTIATE_TEST_CASE_P( &QuantFPWrapper, VPX_BITS_8, 16, true), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 16, true), + make_tuple(&QuantFPWrapper, + &QuantFPWrapper, VPX_BITS_8, 32, + true), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 32, true))); diff --git a/media/libvpx/libvpx/test/vp9_scale_test.cc b/media/libvpx/libvpx/test/vp9_scale_test.cc index 5d7d38e89adb..f3e7f0a0e2b8 100644 --- a/media/libvpx/libvpx/test/vp9_scale_test.cc +++ b/media/libvpx/libvpx/test/vp9_scale_test.cc @@ -47,7 +47,7 @@ class ScaleTest : public VpxScaleBase, scale_fn_(&img_, &dst_img_, filter_type, phase_scaler)); } - void RunTest() { + void RunTest(INTERP_FILTER filter_type) { static const int kNumSizesToTest = 20; static const int kNumScaleFactorsToTest = 4; static const int kSizesToTest[] = { @@ -55,50 +55,48 @@ class ScaleTest : public VpxScaleBase, 22, 24, 26, 28, 30, 32, 34, 68, 128, 134 }; static const int kScaleFactors[] = { 1, 2, 3, 4 }; - for (INTERP_FILTER filter_type = 0; filter_type < 4; ++filter_type) { - for (int phase_scaler = 0; phase_scaler < 16; ++phase_scaler) { - for (int h = 0; h < kNumSizesToTest; ++h) { - const int src_height = kSizesToTest[h]; - for (int w = 0; w < kNumSizesToTest; ++w) { - const int src_width = kSizesToTest[w]; - for (int sf_up_idx = 0; sf_up_idx < kNumScaleFactorsToTest; - ++sf_up_idx) { - const int sf_up = kScaleFactors[sf_up_idx]; - for (int sf_down_idx = 0; sf_down_idx < kNumScaleFactorsToTest; - ++sf_down_idx) { - const int sf_down = kScaleFactors[sf_down_idx]; - const int dst_width = src_width * sf_up / sf_down; - const int dst_height = src_height * sf_up / sf_down; - if (sf_up == sf_down && sf_up != 1) { - continue; - } - // I420 frame width and height must be even. - if (!dst_width || !dst_height || dst_width & 1 || - dst_height & 1) { - continue; - } - // vpx_convolve8_c() has restriction on the step which cannot - // exceed 64 (ratio 1 to 4). - if (src_width > 4 * dst_width || src_height > 4 * dst_height) { - continue; - } - ASSERT_NO_FATAL_FAILURE(ResetScaleImages( - src_width, src_height, dst_width, dst_height)); - ReferenceScaleFrame(filter_type, phase_scaler); - ScaleFrame(filter_type, phase_scaler); - if (memcmp(dst_img_.buffer_alloc, ref_img_.buffer_alloc, - ref_img_.frame_size)) { - printf( - "filter_type = %d, phase_scaler = %d, src_width = %4d, " - "src_height = %4d, dst_width = %4d, dst_height = %4d, " - "scale factor = %d:%d\n", - filter_type, phase_scaler, src_width, src_height, - dst_width, dst_height, sf_down, sf_up); - PrintDiff(); - } - CompareImages(dst_img_); - DeallocScaleImages(); + for (int phase_scaler = 0; phase_scaler < 16; ++phase_scaler) { + for (int h = 0; h < kNumSizesToTest; ++h) { + const int src_height = kSizesToTest[h]; + for (int w = 0; w < kNumSizesToTest; ++w) { + const int src_width = kSizesToTest[w]; + for (int sf_up_idx = 0; sf_up_idx < kNumScaleFactorsToTest; + ++sf_up_idx) { + const int sf_up = kScaleFactors[sf_up_idx]; + for (int sf_down_idx = 0; sf_down_idx < kNumScaleFactorsToTest; + ++sf_down_idx) { + const int sf_down = kScaleFactors[sf_down_idx]; + const int dst_width = src_width * sf_up / sf_down; + const int dst_height = src_height * sf_up / sf_down; + if (sf_up == sf_down && sf_up != 1) { + continue; } + // I420 frame width and height must be even. + if (!dst_width || !dst_height || dst_width & 1 || + dst_height & 1) { + continue; + } + // vpx_convolve8_c() has restriction on the step which cannot + // exceed 64 (ratio 1 to 4). + if (src_width > 4 * dst_width || src_height > 4 * dst_height) { + continue; + } + ASSERT_NO_FATAL_FAILURE(ResetScaleImages(src_width, src_height, + dst_width, dst_height)); + ReferenceScaleFrame(filter_type, phase_scaler); + ScaleFrame(filter_type, phase_scaler); + if (memcmp(dst_img_.buffer_alloc, ref_img_.buffer_alloc, + ref_img_.frame_size)) { + printf( + "filter_type = %d, phase_scaler = %d, src_width = %4d, " + "src_height = %4d, dst_width = %4d, dst_height = %4d, " + "scale factor = %d:%d\n", + filter_type, phase_scaler, src_width, src_height, dst_width, + dst_height, sf_down, sf_up); + PrintDiff(); + } + CompareImages(dst_img_); + DeallocScaleImages(); } } } @@ -145,7 +143,10 @@ class ScaleTest : public VpxScaleBase, ScaleFrameFunc scale_fn_; }; -TEST_P(ScaleTest, ScaleFrame) { ASSERT_NO_FATAL_FAILURE(RunTest()); } +TEST_P(ScaleTest, ScaleFrame_EightTap) { RunTest(EIGHTTAP); } +TEST_P(ScaleTest, ScaleFrame_EightTapSmooth) { RunTest(EIGHTTAP_SMOOTH); } +TEST_P(ScaleTest, ScaleFrame_EightTapSharp) { RunTest(EIGHTTAP_SHARP); } +TEST_P(ScaleTest, ScaleFrame_Bilinear) { RunTest(BILINEAR); } TEST_P(ScaleTest, DISABLED_Speed) { static const int kCountSpeedTestBlock = 100; diff --git a/media/libvpx/libvpx/test/vp9_spatial_svc_encoder.sh b/media/libvpx/libvpx/test/vp9_spatial_svc_encoder.sh deleted file mode 100755 index 65031073f870..000000000000 --- a/media/libvpx/libvpx/test/vp9_spatial_svc_encoder.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/sh -## -## Copyright (c) 2014 The WebM project authors. All Rights Reserved. -## -## Use of this source code is governed by a BSD-style license -## that can be found in the LICENSE file in the root of the source -## tree. An additional intellectual property rights grant can be found -## in the file PATENTS. All contributing project authors may -## be found in the AUTHORS file in the root of the source tree. -## -## This file tests the libvpx vp9_spatial_svc_encoder example. To add new -## tests to to this file, do the following: -## 1. Write a shell function (this is your test). -## 2. Add the function to vp9_spatial_svc_tests (on a new line). -## -. $(dirname $0)/tools_common.sh - -# Environment check: $YUV_RAW_INPUT is required. -vp9_spatial_svc_encoder_verify_environment() { - if [ ! -e "${YUV_RAW_INPUT}" ]; then - echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." - return 1 - fi -} - -# Runs vp9_spatial_svc_encoder. $1 is the test name. -vp9_spatial_svc_encoder() { - local readonly \ - encoder="${LIBVPX_BIN_PATH}/vp9_spatial_svc_encoder${VPX_TEST_EXE_SUFFIX}" - local readonly test_name="$1" - local readonly \ - output_file="${VPX_TEST_OUTPUT_DIR}/vp9_ssvc_encoder${test_name}.ivf" - local readonly frames_to_encode=10 - local readonly max_kf=9999 - - shift - - if [ ! -x "${encoder}" ]; then - elog "${encoder} does not exist or is not executable." - return 1 - fi - - eval "${VPX_TEST_PREFIX}" "${encoder}" -w "${YUV_RAW_INPUT_WIDTH}" \ - -h "${YUV_RAW_INPUT_HEIGHT}" -k "${max_kf}" -f "${frames_to_encode}" \ - "$@" "${YUV_RAW_INPUT}" "${output_file}" ${devnull} - - [ -e "${output_file}" ] || return 1 -} - -# Each test is run with layer count 1-$vp9_ssvc_test_layers. -vp9_ssvc_test_layers=5 - -vp9_spatial_svc() { - if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly test_name="vp9_spatial_svc" - for layers in $(seq 1 ${vp9_ssvc_test_layers}); do - vp9_spatial_svc_encoder "${test_name}" -sl ${layers} - done - fi -} - -readonly vp9_spatial_svc_tests="DISABLED_vp9_spatial_svc_mode_i - DISABLED_vp9_spatial_svc_mode_altip - DISABLED_vp9_spatial_svc_mode_ip - DISABLED_vp9_spatial_svc_mode_gf - vp9_spatial_svc" - -if [ "$(vpx_config_option_enabled CONFIG_SPATIAL_SVC)" = "yes" ]; then - run_tests \ - vp9_spatial_svc_encoder_verify_environment \ - "${vp9_spatial_svc_tests}" -fi diff --git a/media/libvpx/libvpx/test/vp9_subtract_test.cc b/media/libvpx/libvpx/test/vp9_subtract_test.cc index 62845ad61540..67e8de6c74be 100644 --- a/media/libvpx/libvpx/test/vp9_subtract_test.cc +++ b/media/libvpx/libvpx/test/vp9_subtract_test.cc @@ -14,9 +14,11 @@ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" +#include "test/bench.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "vp9/common/vp9_blockd.h" +#include "vpx_ports/msvc.h" #include "vpx_mem/vpx_mem.h" typedef void (*SubtractFunc)(int rows, int cols, int16_t *diff_ptr, @@ -26,62 +28,101 @@ typedef void (*SubtractFunc)(int rows, int cols, int16_t *diff_ptr, namespace vp9 { -class VP9SubtractBlockTest : public ::testing::TestWithParam { +class VP9SubtractBlockTest : public AbstractBench, + public ::testing::TestWithParam { public: virtual void TearDown() { libvpx_test::ClearSystemState(); } + + protected: + virtual void Run() { + GetParam()(block_height_, block_width_, diff_, block_width_, src_, + block_width_, pred_, block_width_); + } + + void SetupBlocks(BLOCK_SIZE bsize) { + block_width_ = 4 * num_4x4_blocks_wide_lookup[bsize]; + block_height_ = 4 * num_4x4_blocks_high_lookup[bsize]; + diff_ = reinterpret_cast( + vpx_memalign(16, sizeof(*diff_) * block_width_ * block_height_ * 2)); + pred_ = reinterpret_cast( + vpx_memalign(16, block_width_ * block_height_ * 2)); + src_ = reinterpret_cast( + vpx_memalign(16, block_width_ * block_height_ * 2)); + } + + int block_width_; + int block_height_; + int16_t *diff_; + uint8_t *pred_; + uint8_t *src_; }; using libvpx_test::ACMRandom; +TEST_P(VP9SubtractBlockTest, DISABLED_Speed) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES; + bsize = static_cast(static_cast(bsize) + 1)) { + SetupBlocks(bsize); + + RunNTimes(100000000 / (block_height_ * block_width_)); + char block_size[16]; + snprintf(block_size, sizeof(block_size), "%dx%d", block_height_, + block_width_); + char title[100]; + snprintf(title, sizeof(title), "%8s ", block_size); + PrintMedian(title); + + vpx_free(diff_); + vpx_free(pred_); + vpx_free(src_); + } +} + TEST_P(VP9SubtractBlockTest, SimpleSubtract) { ACMRandom rnd(ACMRandom::DeterministicSeed()); - // FIXME(rbultje) split in its own file for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES; bsize = static_cast(static_cast(bsize) + 1)) { - const int block_width = 4 * num_4x4_blocks_wide_lookup[bsize]; - const int block_height = 4 * num_4x4_blocks_high_lookup[bsize]; - int16_t *diff = reinterpret_cast( - vpx_memalign(16, sizeof(*diff) * block_width * block_height * 2)); - uint8_t *pred = reinterpret_cast( - vpx_memalign(16, block_width * block_height * 2)); - uint8_t *src = reinterpret_cast( - vpx_memalign(16, block_width * block_height * 2)); + SetupBlocks(bsize); for (int n = 0; n < 100; n++) { - for (int r = 0; r < block_height; ++r) { - for (int c = 0; c < block_width * 2; ++c) { - src[r * block_width * 2 + c] = rnd.Rand8(); - pred[r * block_width * 2 + c] = rnd.Rand8(); + for (int r = 0; r < block_height_; ++r) { + for (int c = 0; c < block_width_ * 2; ++c) { + src_[r * block_width_ * 2 + c] = rnd.Rand8(); + pred_[r * block_width_ * 2 + c] = rnd.Rand8(); } } - GetParam()(block_height, block_width, diff, block_width, src, block_width, - pred, block_width); + GetParam()(block_height_, block_width_, diff_, block_width_, src_, + block_width_, pred_, block_width_); - for (int r = 0; r < block_height; ++r) { - for (int c = 0; c < block_width; ++c) { - EXPECT_EQ(diff[r * block_width + c], - (src[r * block_width + c] - pred[r * block_width + c])) - << "r = " << r << ", c = " << c << ", bs = " << bsize; + for (int r = 0; r < block_height_; ++r) { + for (int c = 0; c < block_width_; ++c) { + EXPECT_EQ(diff_[r * block_width_ + c], + (src_[r * block_width_ + c] - pred_[r * block_width_ + c])) + << "r = " << r << ", c = " << c + << ", bs = " << static_cast(bsize); } } - GetParam()(block_height, block_width, diff, block_width * 2, src, - block_width * 2, pred, block_width * 2); + GetParam()(block_height_, block_width_, diff_, block_width_ * 2, src_, + block_width_ * 2, pred_, block_width_ * 2); - for (int r = 0; r < block_height; ++r) { - for (int c = 0; c < block_width; ++c) { - EXPECT_EQ( - diff[r * block_width * 2 + c], - (src[r * block_width * 2 + c] - pred[r * block_width * 2 + c])) - << "r = " << r << ", c = " << c << ", bs = " << bsize; + for (int r = 0; r < block_height_; ++r) { + for (int c = 0; c < block_width_; ++c) { + EXPECT_EQ(diff_[r * block_width_ * 2 + c], + (src_[r * block_width_ * 2 + c] - + pred_[r * block_width_ * 2 + c])) + << "r = " << r << ", c = " << c + << ", bs = " << static_cast(bsize); } } } - vpx_free(diff); - vpx_free(pred); - vpx_free(src); + vpx_free(diff_); + vpx_free(pred_); + vpx_free(src_); } } @@ -106,4 +147,9 @@ INSTANTIATE_TEST_CASE_P(MMI, VP9SubtractBlockTest, ::testing::Values(vpx_subtract_block_mmi)); #endif +#if HAVE_VSX +INSTANTIATE_TEST_CASE_P(VSX, VP9SubtractBlockTest, + ::testing::Values(vpx_subtract_block_vsx)); +#endif + } // namespace vp9 diff --git a/media/libvpx/libvpx/test/vp9_thread_test.cc b/media/libvpx/libvpx/test/vp9_thread_test.cc index 576f5e906b05..31b6fe57b4aa 100644 --- a/media/libvpx/libvpx/test/vp9_thread_test.cc +++ b/media/libvpx/libvpx/test/vp9_thread_test.cc @@ -147,7 +147,6 @@ TEST(VPxWorkerThreadTest, TestInterfaceAPI) { // ----------------------------------------------------------------------------- // Multi-threaded decode tests - #if CONFIG_WEBM_IO struct FileList { const char *name; @@ -197,6 +196,7 @@ void DecodeFiles(const FileList files[]) { // Note any worker that requires synchronization between other workers will // hang. namespace impl { +namespace { void Init(VPxWorker *const worker) { memset(worker, 0, sizeof(*worker)); } int Reset(VPxWorker *const /*worker*/) { return 1; } @@ -209,6 +209,7 @@ void Execute(VPxWorker *const worker) { void Launch(VPxWorker *const worker) { Execute(worker); } void End(VPxWorker *const /*worker*/) {} +} // namespace } // namespace impl TEST(VPxWorkerThreadTest, TestSerialInterface) { diff --git a/media/libvpx/libvpx/test/vpx_scale_test.cc b/media/libvpx/libvpx/test/vpx_scale_test.cc index ac75dceb2305..057a2a295f44 100644 --- a/media/libvpx/libvpx/test/vpx_scale_test.cc +++ b/media/libvpx/libvpx/test/vpx_scale_test.cc @@ -20,6 +20,15 @@ #include "vpx_scale/yv12config.h" namespace libvpx_test { +namespace { + +#if VPX_ARCH_ARM || (VPX_ARCH_MIPS && !HAVE_MIPS64) || VPX_ARCH_X86 +// Avoid OOM failures on 32-bit platforms. +const int kNumSizesToTest = 7; +#else +const int kNumSizesToTest = 8; +#endif +const int kSizesToTest[] = { 1, 15, 33, 145, 512, 1025, 3840, 16383 }; typedef void (*ExtendFrameBorderFunc)(YV12_BUFFER_CONFIG *ybf); typedef void (*CopyFrameFunc)(const YV12_BUFFER_CONFIG *src_ybf, @@ -37,13 +46,6 @@ class ExtendBorderTest void ExtendBorder() { ASM_REGISTER_STATE_CHECK(extend_fn_(&img_)); } void RunTest() { -#if ARCH_ARM - // Some arm devices OOM when trying to allocate the largest buffers. - static const int kNumSizesToTest = 6; -#else - static const int kNumSizesToTest = 7; -#endif - static const int kSizesToTest[] = { 1, 15, 33, 145, 512, 1025, 16383 }; for (int h = 0; h < kNumSizesToTest; ++h) { for (int w = 0; w < kNumSizesToTest; ++w) { ASSERT_NO_FATAL_FAILURE(ResetImages(kSizesToTest[w], kSizesToTest[h])); @@ -76,13 +78,6 @@ class CopyFrameTest : public VpxScaleBase, } void RunTest() { -#if ARCH_ARM - // Some arm devices OOM when trying to allocate the largest buffers. - static const int kNumSizesToTest = 6; -#else - static const int kNumSizesToTest = 7; -#endif - static const int kSizesToTest[] = { 1, 15, 33, 145, 512, 1025, 16383 }; for (int h = 0; h < kNumSizesToTest; ++h) { for (int w = 0; w < kNumSizesToTest; ++w) { ASSERT_NO_FATAL_FAILURE(ResetImages(kSizesToTest[w], kSizesToTest[h])); @@ -102,4 +97,5 @@ TEST_P(CopyFrameTest, CopyFrame) { ASSERT_NO_FATAL_FAILURE(RunTest()); } INSTANTIATE_TEST_CASE_P(C, CopyFrameTest, ::testing::Values(vp8_yv12_copy_frame_c)); +} // namespace } // namespace libvpx_test diff --git a/media/libvpx/libvpx/test/vpx_scale_test.h b/media/libvpx/libvpx/test/vpx_scale_test.h index dcbd02b91fa3..11c259ae807b 100644 --- a/media/libvpx/libvpx/test/vpx_scale_test.h +++ b/media/libvpx/libvpx/test/vpx_scale_test.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_VPX_SCALE_TEST_H_ -#define TEST_VPX_SCALE_TEST_H_ +#ifndef VPX_TEST_VPX_SCALE_TEST_H_ +#define VPX_TEST_VPX_SCALE_TEST_H_ #include "third_party/googletest/src/include/gtest/gtest.h" @@ -33,7 +33,8 @@ class VpxScaleBase { const int height) { memset(img, 0, sizeof(*img)); ASSERT_EQ( - 0, vp8_yv12_alloc_frame_buffer(img, width, height, VP8BORDERINPIXELS)); + 0, vp8_yv12_alloc_frame_buffer(img, width, height, VP8BORDERINPIXELS)) + << "for width: " << width << " height: " << height; memset(img->buffer_alloc, kBufFiller, img->frame_size); } @@ -197,4 +198,4 @@ class VpxScaleBase { } // namespace libvpx_test -#endif // TEST_VPX_SCALE_TEST_H_ +#endif // VPX_TEST_VPX_SCALE_TEST_H_ diff --git a/media/libvpx/libvpx/test/vpx_temporal_svc_encoder.sh b/media/libvpx/libvpx/test/vpx_temporal_svc_encoder.sh index 56a7902f4f2e..5e5bac8fa6ac 100755 --- a/media/libvpx/libvpx/test/vpx_temporal_svc_encoder.sh +++ b/media/libvpx/libvpx/test/vpx_temporal_svc_encoder.sh @@ -38,6 +38,7 @@ vpx_tsvc_encoder() { local output_file="${VPX_TEST_OUTPUT_DIR}/${output_file_base}" local timebase_num="1" local timebase_den="1000" + local timebase_den_y4m="30" local speed="6" local frame_drop_thresh="30" local max_threads="4" @@ -58,6 +59,12 @@ vpx_tsvc_encoder() { "${YUV_RAW_INPUT_HEIGHT}" "${timebase_num}" "${timebase_den}" \ "${speed}" "${frame_drop_thresh}" "${error_resilient}" "${threads}" \ "$@" ${devnull} + # Test for y4m input. + eval "${VPX_TEST_PREFIX}" "${encoder}" "${Y4M_720P_INPUT}" \ + "${output_file}" "${codec}" "${Y4M_720P_INPUT_WIDTH}" \ + "${Y4M_720P_INPUT_HEIGHT}" "${timebase_num}" "${timebase_den_y4m}" \ + "${speed}" "${frame_drop_thresh}" "${error_resilient}" "${threads}" \ + "$@" ${devnull} else eval "${VPX_TEST_PREFIX}" "${encoder}" "${YUV_RAW_INPUT}" \ "${output_file}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ @@ -85,7 +92,7 @@ files_exist() { vpx_tsvc_encoder_vp8_mode_0() { if [ "$(vp8_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp8_mode_0" + local output_basename="vpx_tsvc_encoder_vp8_mode_0" vpx_tsvc_encoder vp8 "${output_basename}" 0 200 || return 1 # Mode 0 produces 1 stream files_exist "${output_basename}" 1 || return 1 @@ -94,7 +101,7 @@ vpx_tsvc_encoder_vp8_mode_0() { vpx_tsvc_encoder_vp8_mode_1() { if [ "$(vp8_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp8_mode_1" + local output_basename="vpx_tsvc_encoder_vp8_mode_1" vpx_tsvc_encoder vp8 "${output_basename}" 1 200 400 || return 1 # Mode 1 produces 2 streams files_exist "${output_basename}" 2 || return 1 @@ -103,7 +110,7 @@ vpx_tsvc_encoder_vp8_mode_1() { vpx_tsvc_encoder_vp8_mode_2() { if [ "$(vp8_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp8_mode_2" + local output_basename="vpx_tsvc_encoder_vp8_mode_2" vpx_tsvc_encoder vp8 "${output_basename}" 2 200 400 || return 1 # Mode 2 produces 2 streams files_exist "${output_basename}" 2 || return 1 @@ -112,7 +119,7 @@ vpx_tsvc_encoder_vp8_mode_2() { vpx_tsvc_encoder_vp8_mode_3() { if [ "$(vp8_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp8_mode_3" + local output_basename="vpx_tsvc_encoder_vp8_mode_3" vpx_tsvc_encoder vp8 "${output_basename}" 3 200 400 600 || return 1 # Mode 3 produces 3 streams files_exist "${output_basename}" 3 || return 1 @@ -121,7 +128,7 @@ vpx_tsvc_encoder_vp8_mode_3() { vpx_tsvc_encoder_vp8_mode_4() { if [ "$(vp8_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp8_mode_4" + local output_basename="vpx_tsvc_encoder_vp8_mode_4" vpx_tsvc_encoder vp8 "${output_basename}" 4 200 400 600 || return 1 # Mode 4 produces 3 streams files_exist "${output_basename}" 3 || return 1 @@ -130,7 +137,7 @@ vpx_tsvc_encoder_vp8_mode_4() { vpx_tsvc_encoder_vp8_mode_5() { if [ "$(vp8_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp8_mode_5" + local output_basename="vpx_tsvc_encoder_vp8_mode_5" vpx_tsvc_encoder vp8 "${output_basename}" 5 200 400 600 || return 1 # Mode 5 produces 3 streams files_exist "${output_basename}" 3 || return 1 @@ -139,7 +146,7 @@ vpx_tsvc_encoder_vp8_mode_5() { vpx_tsvc_encoder_vp8_mode_6() { if [ "$(vp8_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp8_mode_6" + local output_basename="vpx_tsvc_encoder_vp8_mode_6" vpx_tsvc_encoder vp8 "${output_basename}" 6 200 400 600 || return 1 # Mode 6 produces 3 streams files_exist "${output_basename}" 3 || return 1 @@ -148,7 +155,7 @@ vpx_tsvc_encoder_vp8_mode_6() { vpx_tsvc_encoder_vp8_mode_7() { if [ "$(vp8_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp8_mode_7" + local output_basename="vpx_tsvc_encoder_vp8_mode_7" vpx_tsvc_encoder vp8 "${output_basename}" 7 200 400 600 800 1000 || return 1 # Mode 7 produces 5 streams files_exist "${output_basename}" 5 || return 1 @@ -157,7 +164,7 @@ vpx_tsvc_encoder_vp8_mode_7() { vpx_tsvc_encoder_vp8_mode_8() { if [ "$(vp8_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp8_mode_8" + local output_basename="vpx_tsvc_encoder_vp8_mode_8" vpx_tsvc_encoder vp8 "${output_basename}" 8 200 400 || return 1 # Mode 8 produces 2 streams files_exist "${output_basename}" 2 || return 1 @@ -166,7 +173,7 @@ vpx_tsvc_encoder_vp8_mode_8() { vpx_tsvc_encoder_vp8_mode_9() { if [ "$(vp8_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp8_mode_9" + local output_basename="vpx_tsvc_encoder_vp8_mode_9" vpx_tsvc_encoder vp8 "${output_basename}" 9 200 400 600 || return 1 # Mode 9 produces 3 streams files_exist "${output_basename}" 3 || return 1 @@ -175,7 +182,7 @@ vpx_tsvc_encoder_vp8_mode_9() { vpx_tsvc_encoder_vp8_mode_10() { if [ "$(vp8_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp8_mode_10" + local output_basename="vpx_tsvc_encoder_vp8_mode_10" vpx_tsvc_encoder vp8 "${output_basename}" 10 200 400 600 || return 1 # Mode 10 produces 3 streams files_exist "${output_basename}" 3 || return 1 @@ -184,7 +191,7 @@ vpx_tsvc_encoder_vp8_mode_10() { vpx_tsvc_encoder_vp8_mode_11() { if [ "$(vp8_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp8_mode_11" + local output_basename="vpx_tsvc_encoder_vp8_mode_11" vpx_tsvc_encoder vp8 "${output_basename}" 11 200 400 600 || return 1 # Mode 11 produces 3 streams files_exist "${output_basename}" 3 || return 1 @@ -193,7 +200,7 @@ vpx_tsvc_encoder_vp8_mode_11() { vpx_tsvc_encoder_vp9_mode_0() { if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp9_mode_0" + local output_basename="vpx_tsvc_encoder_vp9_mode_0" vpx_tsvc_encoder vp9 "${output_basename}" 0 200 || return 1 # Mode 0 produces 1 stream files_exist "${output_basename}" 1 || return 1 @@ -202,7 +209,7 @@ vpx_tsvc_encoder_vp9_mode_0() { vpx_tsvc_encoder_vp9_mode_1() { if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp9_mode_1" + local output_basename="vpx_tsvc_encoder_vp9_mode_1" vpx_tsvc_encoder vp9 "${output_basename}" 1 200 400 || return 1 # Mode 1 produces 2 streams files_exist "${output_basename}" 2 || return 1 @@ -211,7 +218,7 @@ vpx_tsvc_encoder_vp9_mode_1() { vpx_tsvc_encoder_vp9_mode_2() { if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp9_mode_2" + local output_basename="vpx_tsvc_encoder_vp9_mode_2" vpx_tsvc_encoder vp9 "${output_basename}" 2 200 400 || return 1 # Mode 2 produces 2 streams files_exist "${output_basename}" 2 || return 1 @@ -220,7 +227,7 @@ vpx_tsvc_encoder_vp9_mode_2() { vpx_tsvc_encoder_vp9_mode_3() { if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp9_mode_3" + local output_basename="vpx_tsvc_encoder_vp9_mode_3" vpx_tsvc_encoder vp9 "${output_basename}" 3 200 400 600 || return 1 # Mode 3 produces 3 streams files_exist "${output_basename}" 3 || return 1 @@ -229,7 +236,7 @@ vpx_tsvc_encoder_vp9_mode_3() { vpx_tsvc_encoder_vp9_mode_4() { if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp9_mode_4" + local output_basename="vpx_tsvc_encoder_vp9_mode_4" vpx_tsvc_encoder vp9 "${output_basename}" 4 200 400 600 || return 1 # Mode 4 produces 3 streams files_exist "${output_basename}" 3 || return 1 @@ -238,7 +245,7 @@ vpx_tsvc_encoder_vp9_mode_4() { vpx_tsvc_encoder_vp9_mode_5() { if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp9_mode_5" + local output_basename="vpx_tsvc_encoder_vp9_mode_5" vpx_tsvc_encoder vp9 "${output_basename}" 5 200 400 600 || return 1 # Mode 5 produces 3 streams files_exist "${output_basename}" 3 || return 1 @@ -247,7 +254,7 @@ vpx_tsvc_encoder_vp9_mode_5() { vpx_tsvc_encoder_vp9_mode_6() { if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp9_mode_6" + local output_basename="vpx_tsvc_encoder_vp9_mode_6" vpx_tsvc_encoder vp9 "${output_basename}" 6 200 400 600 || return 1 # Mode 6 produces 3 streams files_exist "${output_basename}" 3 || return 1 @@ -256,7 +263,7 @@ vpx_tsvc_encoder_vp9_mode_6() { vpx_tsvc_encoder_vp9_mode_7() { if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp9_mode_7" + local output_basename="vpx_tsvc_encoder_vp9_mode_7" vpx_tsvc_encoder vp9 "${output_basename}" 7 200 400 600 800 1000 || return 1 # Mode 7 produces 5 streams files_exist "${output_basename}" 5 || return 1 @@ -265,7 +272,7 @@ vpx_tsvc_encoder_vp9_mode_7() { vpx_tsvc_encoder_vp9_mode_8() { if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp9_mode_8" + local output_basename="vpx_tsvc_encoder_vp9_mode_8" vpx_tsvc_encoder vp9 "${output_basename}" 8 200 400 || return 1 # Mode 8 produces 2 streams files_exist "${output_basename}" 2 || return 1 @@ -274,7 +281,7 @@ vpx_tsvc_encoder_vp9_mode_8() { vpx_tsvc_encoder_vp9_mode_9() { if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp9_mode_9" + local output_basename="vpx_tsvc_encoder_vp9_mode_9" vpx_tsvc_encoder vp9 "${output_basename}" 9 200 400 600 || return 1 # Mode 9 produces 3 streams files_exist "${output_basename}" 3 || return 1 @@ -283,7 +290,7 @@ vpx_tsvc_encoder_vp9_mode_9() { vpx_tsvc_encoder_vp9_mode_10() { if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp9_mode_10" + local output_basename="vpx_tsvc_encoder_vp9_mode_10" vpx_tsvc_encoder vp9 "${output_basename}" 10 200 400 600 || return 1 # Mode 10 produces 3 streams files_exist "${output_basename}" 3 || return 1 @@ -292,7 +299,7 @@ vpx_tsvc_encoder_vp9_mode_10() { vpx_tsvc_encoder_vp9_mode_11() { if [ "$(vp9_encode_available)" = "yes" ]; then - local readonly output_basename="vpx_tsvc_encoder_vp9_mode_11" + local output_basename="vpx_tsvc_encoder_vp9_mode_11" vpx_tsvc_encoder vp9 "${output_basename}" 11 200 400 600 || return 1 # Mode 11 produces 3 streams files_exist "${output_basename}" 3 || return 1 diff --git a/media/libvpx/libvpx/test/vpxdec.sh b/media/libvpx/libvpx/test/vpxdec.sh index de51c8004ed7..044aa7e16d0c 100755 --- a/media/libvpx/libvpx/test/vpxdec.sh +++ b/media/libvpx/libvpx/test/vpxdec.sh @@ -18,7 +18,8 @@ vpxdec_verify_environment() { if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_WEBM_FILE}" ] || \ [ ! -e "${VP9_FPM_WEBM_FILE}" ] || \ - [ ! -e "${VP9_LT_50_FRAMES_WEBM_FILE}" ] ; then + [ ! -e "${VP9_LT_50_FRAMES_WEBM_FILE}" ] || \ + [ ! -e "${VP9_RAW_FILE}" ]; then elog "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi @@ -33,8 +34,8 @@ vpxdec_verify_environment() { # input file path and shifted away. All remaining parameters are passed through # to vpxdec. vpxdec_pipe() { - local readonly decoder="$(vpx_tool_path vpxdec)" - local readonly input="$1" + local decoder="$(vpx_tool_path vpxdec)" + local input="$1" shift cat "${input}" | eval "${VPX_TEST_PREFIX}" "${decoder}" - "$@" ${devnull} } @@ -43,8 +44,8 @@ vpxdec_pipe() { # the directory containing vpxdec. $1 one is used as the input file path and # shifted away. All remaining parameters are passed through to vpxdec. vpxdec() { - local readonly decoder="$(vpx_tool_path vpxdec)" - local readonly input="$1" + local decoder="$(vpx_tool_path vpxdec)" + local input="$1" shift eval "${VPX_TEST_PREFIX}" "${decoder}" "$input" "$@" ${devnull} } @@ -95,9 +96,9 @@ vpxdec_vp9_webm_less_than_50_frames() { # frames in actual webm_read_frame calls. if [ "$(vpxdec_can_decode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then - local readonly decoder="$(vpx_tool_path vpxdec)" - local readonly expected=10 - local readonly num_frames=$(${VPX_TEST_PREFIX} "${decoder}" \ + local decoder="$(vpx_tool_path vpxdec)" + local expected=10 + local num_frames=$(${VPX_TEST_PREFIX} "${decoder}" \ "${VP9_LT_50_FRAMES_WEBM_FILE}" --summary --noblit 2>&1 \ | awk '/^[0-9]+ decoded frames/ { print $1 }') if [ "$num_frames" -ne "$expected" ]; then @@ -107,10 +108,28 @@ vpxdec_vp9_webm_less_than_50_frames() { fi } +# Ensures VP9_RAW_FILE correctly produces 1 frame instead of causing a hang. +vpxdec_vp9_raw_file() { + # Ensure a raw file properly reports eof and doesn't cause a hang. + if [ "$(vpxdec_can_decode_vp9)" = "yes" ]; then + local decoder="$(vpx_tool_path vpxdec)" + local expected=1 + [ -x /usr/bin/timeout ] && local TIMEOUT="/usr/bin/timeout 30s" + local num_frames=$(${TIMEOUT} ${VPX_TEST_PREFIX} "${decoder}" \ + "${VP9_RAW_FILE}" --summary --noblit 2>&1 \ + | awk '/^[0-9]+ decoded frames/ { print $1 }') + if [ -z "$num_frames" ] || [ "$num_frames" -ne "$expected" ]; then + elog "Output frames ($num_frames) != expected ($expected)" + return 1 + fi + fi +} + vpxdec_tests="vpxdec_vp8_ivf vpxdec_vp8_ivf_pipe_input vpxdec_vp9_webm vpxdec_vp9_webm_frame_parallel - vpxdec_vp9_webm_less_than_50_frames" + vpxdec_vp9_webm_less_than_50_frames + vpxdec_vp9_raw_file" run_tests vpxdec_verify_environment "${vpxdec_tests}" diff --git a/media/libvpx/libvpx/test/vpxenc.sh b/media/libvpx/libvpx/test/vpxenc.sh index 0c160dafc037..f94e2e094a37 100755 --- a/media/libvpx/libvpx/test/vpxenc.sh +++ b/media/libvpx/libvpx/test/vpxenc.sh @@ -67,7 +67,7 @@ y4m_input_720p() { # Echo default vpxenc real time encoding params. $1 is the codec, which defaults # to vp8 if unspecified. vpxenc_rt_params() { - local readonly codec="${1:-vp8}" + local codec="${1:-vp8}" echo "--codec=${codec} --buf-initial-sz=500 --buf-optimal-sz=600 @@ -104,8 +104,8 @@ vpxenc_passes_param() { # input file path and shifted away. All remaining parameters are passed through # to vpxenc. vpxenc_pipe() { - local readonly encoder="$(vpx_tool_path vpxenc)" - local readonly input="$1" + local encoder="$(vpx_tool_path vpxenc)" + local input="$1" shift cat "${input}" | eval "${VPX_TEST_PREFIX}" "${encoder}" - \ --test-decode=fatal \ @@ -116,8 +116,8 @@ vpxenc_pipe() { # the directory containing vpxenc. $1 one is used as the input file path and # shifted away. All remaining parameters are passed through to vpxenc. vpxenc() { - local readonly encoder="$(vpx_tool_path vpxenc)" - local readonly input="$1" + local encoder="$(vpx_tool_path vpxenc)" + local input="$1" shift eval "${VPX_TEST_PREFIX}" "${encoder}" "${input}" \ --test-decode=fatal \ @@ -126,7 +126,7 @@ vpxenc() { vpxenc_vp8_ivf() { if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8.ivf" + local output="${VPX_TEST_OUTPUT_DIR}/vp8.ivf" vpxenc $(yuv_input_hantro_collage) \ --codec=vp8 \ --limit="${TEST_FRAMES}" \ @@ -143,7 +143,7 @@ vpxenc_vp8_ivf() { vpxenc_vp8_webm() { if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8.webm" + local output="${VPX_TEST_OUTPUT_DIR}/vp8.webm" vpxenc $(yuv_input_hantro_collage) \ --codec=vp8 \ --limit="${TEST_FRAMES}" \ @@ -159,7 +159,7 @@ vpxenc_vp8_webm() { vpxenc_vp8_webm_rt() { if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8_rt.webm" + local output="${VPX_TEST_OUTPUT_DIR}/vp8_rt.webm" vpxenc $(yuv_input_hantro_collage) \ $(vpxenc_rt_params vp8) \ --output="${output}" @@ -173,7 +173,7 @@ vpxenc_vp8_webm_rt() { vpxenc_vp8_webm_2pass() { if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8.webm" + local output="${VPX_TEST_OUTPUT_DIR}/vp8.webm" vpxenc $(yuv_input_hantro_collage) \ --codec=vp8 \ --limit="${TEST_FRAMES}" \ @@ -190,9 +190,9 @@ vpxenc_vp8_webm_2pass() { vpxenc_vp8_webm_lag10_frames20() { if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then - local readonly lag_total_frames=20 - local readonly lag_frames=10 - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8_lag10_frames20.webm" + local lag_total_frames=20 + local lag_frames=10 + local output="${VPX_TEST_OUTPUT_DIR}/vp8_lag10_frames20.webm" vpxenc $(yuv_input_hantro_collage) \ --codec=vp8 \ --limit="${lag_total_frames}" \ @@ -210,7 +210,7 @@ vpxenc_vp8_webm_lag10_frames20() { vpxenc_vp8_ivf_piped_input() { if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp8_piped_input.ivf" + local output="${VPX_TEST_OUTPUT_DIR}/vp8_piped_input.ivf" vpxenc_pipe $(yuv_input_hantro_collage) \ --codec=vp8 \ --limit="${TEST_FRAMES}" \ @@ -226,8 +226,8 @@ vpxenc_vp8_ivf_piped_input() { vpxenc_vp9_ivf() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9.ivf" - local readonly passes=$(vpxenc_passes_param) + local output="${VPX_TEST_OUTPUT_DIR}/vp9.ivf" + local passes=$(vpxenc_passes_param) vpxenc $(yuv_input_hantro_collage) \ --codec=vp9 \ --limit="${TEST_FRAMES}" \ @@ -245,8 +245,8 @@ vpxenc_vp9_ivf() { vpxenc_vp9_webm() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9.webm" - local readonly passes=$(vpxenc_passes_param) + local output="${VPX_TEST_OUTPUT_DIR}/vp9.webm" + local passes=$(vpxenc_passes_param) vpxenc $(yuv_input_hantro_collage) \ --codec=vp9 \ --limit="${TEST_FRAMES}" \ @@ -263,7 +263,7 @@ vpxenc_vp9_webm() { vpxenc_vp9_webm_rt() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_rt.webm" + local output="${VPX_TEST_OUTPUT_DIR}/vp9_rt.webm" vpxenc $(yuv_input_hantro_collage) \ $(vpxenc_rt_params vp9) \ --output="${output}" @@ -278,11 +278,11 @@ vpxenc_vp9_webm_rt() { vpxenc_vp9_webm_rt_multithread_tiled() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_rt_multithread_tiled.webm" - local readonly tilethread_min=2 - local readonly tilethread_max=4 - local readonly num_threads="$(seq ${tilethread_min} ${tilethread_max})" - local readonly num_tile_cols="$(seq ${tilethread_min} ${tilethread_max})" + local output="${VPX_TEST_OUTPUT_DIR}/vp9_rt_multithread_tiled.webm" + local tilethread_min=2 + local tilethread_max=4 + local num_threads="$(seq ${tilethread_min} ${tilethread_max})" + local num_tile_cols="$(seq ${tilethread_min} ${tilethread_max})" for threads in ${num_threads}; do for tile_cols in ${num_tile_cols}; do @@ -291,26 +291,25 @@ vpxenc_vp9_webm_rt_multithread_tiled() { --threads=${threads} \ --tile-columns=${tile_cols} \ --output="${output}" + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + rm "${output}" done done - - if [ ! -e "${output}" ]; then - elog "Output file does not exist." - return 1 - fi - - rm "${output}" fi } vpxenc_vp9_webm_rt_multithread_tiled_frameparallel() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_rt_mt_t_fp.webm" - local readonly tilethread_min=2 - local readonly tilethread_max=4 - local readonly num_threads="$(seq ${tilethread_min} ${tilethread_max})" - local readonly num_tile_cols="$(seq ${tilethread_min} ${tilethread_max})" + local output="${VPX_TEST_OUTPUT_DIR}/vp9_rt_mt_t_fp.webm" + local tilethread_min=2 + local tilethread_max=4 + local num_threads="$(seq ${tilethread_min} ${tilethread_max})" + local num_tile_cols="$(seq ${tilethread_min} ${tilethread_max})" for threads in ${num_threads}; do for tile_cols in ${num_tile_cols}; do @@ -320,22 +319,20 @@ vpxenc_vp9_webm_rt_multithread_tiled_frameparallel() { --tile-columns=${tile_cols} \ --frame-parallel=1 \ --output="${output}" + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + rm "${output}" done done - - if [ ! -e "${output}" ]; then - elog "Output file does not exist." - return 1 - fi - - rm "${output}" fi } vpxenc_vp9_webm_2pass() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9.webm" + local output="${VPX_TEST_OUTPUT_DIR}/vp9.webm" vpxenc $(yuv_input_hantro_collage) \ --codec=vp9 \ --limit="${TEST_FRAMES}" \ @@ -351,8 +348,8 @@ vpxenc_vp9_webm_2pass() { vpxenc_vp9_ivf_lossless() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_lossless.ivf" - local readonly passes=$(vpxenc_passes_param) + local output="${VPX_TEST_OUTPUT_DIR}/vp9_lossless.ivf" + local passes=$(vpxenc_passes_param) vpxenc $(yuv_input_hantro_collage) \ --codec=vp9 \ --limit="${TEST_FRAMES}" \ @@ -370,8 +367,8 @@ vpxenc_vp9_ivf_lossless() { vpxenc_vp9_ivf_minq0_maxq0() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_lossless_minq0_maxq0.ivf" - local readonly passes=$(vpxenc_passes_param) + local output="${VPX_TEST_OUTPUT_DIR}/vp9_lossless_minq0_maxq0.ivf" + local passes=$(vpxenc_passes_param) vpxenc $(yuv_input_hantro_collage) \ --codec=vp9 \ --limit="${TEST_FRAMES}" \ @@ -391,10 +388,10 @@ vpxenc_vp9_ivf_minq0_maxq0() { vpxenc_vp9_webm_lag10_frames20() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then - local readonly lag_total_frames=20 - local readonly lag_frames=10 - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_lag10_frames20.webm" - local readonly passes=$(vpxenc_passes_param) + local lag_total_frames=20 + local lag_frames=10 + local output="${VPX_TEST_OUTPUT_DIR}/vp9_lag10_frames20.webm" + local passes=$(vpxenc_passes_param) vpxenc $(yuv_input_hantro_collage) \ --codec=vp9 \ --limit="${lag_total_frames}" \ @@ -414,8 +411,8 @@ vpxenc_vp9_webm_lag10_frames20() { vpxenc_vp9_webm_non_square_par() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then - local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_non_square_par.webm" - local readonly passes=$(vpxenc_passes_param) + local output="${VPX_TEST_OUTPUT_DIR}/vp9_non_square_par.webm" + local passes=$(vpxenc_passes_param) vpxenc $(y4m_input_non_square_par) \ --codec=vp9 \ --limit="${TEST_FRAMES}" \ @@ -429,6 +426,42 @@ vpxenc_vp9_webm_non_square_par() { fi } +vpxenc_vp9_webm_sharpness() { + if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then + local sharpnesses="0 1 2 3 4 5 6 7" + local output="${VPX_TEST_OUTPUT_DIR}/vpxenc_vp9_webm_sharpness.ivf" + local last_size=0 + local this_size=0 + + for sharpness in ${sharpnesses}; do + + vpxenc $(yuv_input_hantro_collage) \ + --sharpness="${sharpness}" \ + --codec=vp9 \ + --limit=1 \ + --cpu-used=2 \ + --end-usage=q \ + --cq-level=40 \ + --output="${output}" \ + "${passes}" + + if [ ! -e "${output}" ]; then + elog "Output file does not exist." + return 1 + fi + + this_size=$(stat -c '%s' "${output}") + if [ "${this_size}" -lt "${last_size}" ]; then + elog "Higher sharpness value yielded lower file size." + echo "${this_size}" " < " "${last_size}" + return 1 + fi + last_size="${this_size}" + + done + fi +} + vpxenc_tests="vpxenc_vp8_ivf vpxenc_vp8_webm vpxenc_vp8_webm_rt @@ -441,7 +474,9 @@ vpxenc_tests="vpxenc_vp8_ivf vpxenc_vp9_ivf_lossless vpxenc_vp9_ivf_minq0_maxq0 vpxenc_vp9_webm_lag10_frames20 - vpxenc_vp9_webm_non_square_par" + vpxenc_vp9_webm_non_square_par + vpxenc_vp9_webm_sharpness" + if [ "$(vpx_config_option_enabled CONFIG_REALTIME_ONLY)" != "yes" ]; then vpxenc_tests="$vpxenc_tests vpxenc_vp8_webm_2pass diff --git a/media/libvpx/libvpx/test/webm_video_source.h b/media/libvpx/libvpx/test/webm_video_source.h index 09c007a3f3f4..6f55f7db7c2a 100644 --- a/media/libvpx/libvpx/test/webm_video_source.h +++ b/media/libvpx/libvpx/test/webm_video_source.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_WEBM_VIDEO_SOURCE_H_ -#define TEST_WEBM_VIDEO_SOURCE_H_ +#ifndef VPX_TEST_WEBM_VIDEO_SOURCE_H_ +#define VPX_TEST_WEBM_VIDEO_SOURCE_H_ #include #include #include @@ -90,4 +90,4 @@ class WebMVideoSource : public CompressedVideoSource { } // namespace libvpx_test -#endif // TEST_WEBM_VIDEO_SOURCE_H_ +#endif // VPX_TEST_WEBM_VIDEO_SOURCE_H_ diff --git a/media/libvpx/libvpx/test/y4m_test.cc b/media/libvpx/libvpx/test/y4m_test.cc index ced717a7c192..76d033d52a71 100644 --- a/media/libvpx/libvpx/test/y4m_test.cc +++ b/media/libvpx/libvpx/test/y4m_test.cc @@ -40,18 +40,18 @@ const Y4mTestParam kY4mTestVectors[] = { "284a47a47133b12884ec3a14e959a0b6" }, { "park_joy_90p_8_444.y4m", 8, VPX_IMG_FMT_I444, "90517ff33843d85de712fd4fe60dbed0" }, - { "park_joy_90p_10_420.y4m", 10, VPX_IMG_FMT_I42016, - "63f21f9f717d8b8631bd2288ee87137b" }, - { "park_joy_90p_10_422.y4m", 10, VPX_IMG_FMT_I42216, - "48ab51fb540aed07f7ff5af130c9b605" }, - { "park_joy_90p_10_444.y4m", 10, VPX_IMG_FMT_I44416, - "067bfd75aa85ff9bae91fa3e0edd1e3e" }, - { "park_joy_90p_12_420.y4m", 12, VPX_IMG_FMT_I42016, - "9e6d8f6508c6e55625f6b697bc461cef" }, - { "park_joy_90p_12_422.y4m", 12, VPX_IMG_FMT_I42216, - "b239c6b301c0b835485be349ca83a7e3" }, - { "park_joy_90p_12_444.y4m", 12, VPX_IMG_FMT_I44416, - "5a6481a550821dab6d0192f5c63845e9" }, + { "park_joy_90p_10_420_20f.y4m", 10, VPX_IMG_FMT_I42016, + "2f56ab9809269f074df7e3daf1ce0be6" }, + { "park_joy_90p_10_422_20f.y4m", 10, VPX_IMG_FMT_I42216, + "1b5c73d2e8e8c4e02dc4889ecac41c83" }, + { "park_joy_90p_10_444_20f.y4m", 10, VPX_IMG_FMT_I44416, + "ec4ab5be53195c5b838d1d19e1bc2674" }, + { "park_joy_90p_12_420_20f.y4m", 12, VPX_IMG_FMT_I42016, + "3370856c8ddebbd1f9bb2e66f97677f4" }, + { "park_joy_90p_12_422_20f.y4m", 12, VPX_IMG_FMT_I42216, + "4eab364318dd8201acbb182e43bd4966" }, + { "park_joy_90p_12_444_20f.y4m", 12, VPX_IMG_FMT_I44416, + "f189dfbbd92119fc8e5f211a550166be" }, }; static void write_image_file(const vpx_image_t *img, FILE *file) { diff --git a/media/libvpx/libvpx/test/y4m_video_source.h b/media/libvpx/libvpx/test/y4m_video_source.h index 1301f69703b1..89aa2a44fc9b 100644 --- a/media/libvpx/libvpx/test/y4m_video_source.h +++ b/media/libvpx/libvpx/test/y4m_video_source.h @@ -7,9 +7,10 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_Y4M_VIDEO_SOURCE_H_ -#define TEST_Y4M_VIDEO_SOURCE_H_ +#ifndef VPX_TEST_Y4M_VIDEO_SOURCE_H_ +#define VPX_TEST_Y4M_VIDEO_SOURCE_H_ #include +#include #include #include "test/video_source.h" @@ -108,7 +109,7 @@ class Y4mVideoSource : public VideoSource { std::string file_name_; FILE *input_file_; - testing::internal::scoped_ptr img_; + std::unique_ptr img_; unsigned int start_; unsigned int limit_; unsigned int frame_; @@ -119,4 +120,4 @@ class Y4mVideoSource : public VideoSource { } // namespace libvpx_test -#endif // TEST_Y4M_VIDEO_SOURCE_H_ +#endif // VPX_TEST_Y4M_VIDEO_SOURCE_H_ diff --git a/media/libvpx/libvpx/test/yuv_temporal_filter_test.cc b/media/libvpx/libvpx/test/yuv_temporal_filter_test.cc new file mode 100644 index 000000000000..8f3c58b03834 --- /dev/null +++ b/media/libvpx/libvpx/test/yuv_temporal_filter_test.cc @@ -0,0 +1,708 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "./vp9_rtcd.h" +#include "test/acm_random.h" +#include "test/buffer.h" +#include "test/register_state_check.h" +#include "vpx_ports/vpx_timer.h" + +namespace { + +using ::libvpx_test::ACMRandom; +using ::libvpx_test::Buffer; + +typedef void (*YUVTemporalFilterFunc)( + const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, + int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, + int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, + uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator, + uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count); + +struct TemporalFilterWithBd { + TemporalFilterWithBd(YUVTemporalFilterFunc func, int bitdepth) + : temporal_filter(func), bd(bitdepth) {} + + YUVTemporalFilterFunc temporal_filter; + int bd; +}; + +std::ostream &operator<<(std::ostream &os, const TemporalFilterWithBd &tf) { + return os << "Bitdepth: " << tf.bd; +} + +int GetFilterWeight(unsigned int row, unsigned int col, + unsigned int block_height, unsigned int block_width, + const int *const blk_fw, int use_32x32) { + if (use_32x32) { + return blk_fw[0]; + } + + return blk_fw[2 * (row >= block_height / 2) + (col >= block_width / 2)]; +} + +template +int GetModIndex(int sum_dist, int index, int rounding, int strength, + int filter_weight) { + int mod = sum_dist * 3 / index; + mod += rounding; + mod >>= strength; + + mod = VPXMIN(16, mod); + + mod = 16 - mod; + mod *= filter_weight; + + return mod; +} + +template <> +int GetModIndex(int sum_dist, int index, int rounding, int strength, + int filter_weight) { + unsigned int index_mult[14] = { 0, 0, 0, 0, 49152, + 39322, 32768, 28087, 24576, 21846, + 19661, 17874, 0, 15124 }; + + assert(index >= 0 && index <= 13); + assert(index_mult[index] != 0); + + int mod = (clamp(sum_dist, 0, UINT16_MAX) * index_mult[index]) >> 16; + mod += rounding; + mod >>= strength; + + mod = VPXMIN(16, mod); + + mod = 16 - mod; + mod *= filter_weight; + + return mod; +} + +template <> +int GetModIndex(int sum_dist, int index, int rounding, int strength, + int filter_weight) { + int64_t index_mult[14] = { 0U, 0U, 0U, 0U, + 3221225472U, 2576980378U, 2147483648U, 1840700270U, + 1610612736U, 1431655766U, 1288490189U, 1171354718U, + 0U, 991146300U }; + + assert(index >= 0 && index <= 13); + assert(index_mult[index] != 0); + + int mod = static_cast((sum_dist * index_mult[index]) >> 32); + mod += rounding; + mod >>= strength; + + mod = VPXMIN(16, mod); + + mod = 16 - mod; + mod *= filter_weight; + + return mod; +} + +template +void ApplyReferenceFilter( + const Buffer &y_src, const Buffer &y_pre, + const Buffer &u_src, const Buffer &v_src, + const Buffer &u_pre, const Buffer &v_pre, + unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, + int strength, const int *const blk_fw, int use_32x32, + Buffer *y_accumulator, Buffer *y_counter, + Buffer *u_accumulator, Buffer *u_counter, + Buffer *v_accumulator, Buffer *v_counter) { + const PixelType *y_src_ptr = y_src.TopLeftPixel(); + const PixelType *y_pre_ptr = y_pre.TopLeftPixel(); + const PixelType *u_src_ptr = u_src.TopLeftPixel(); + const PixelType *u_pre_ptr = u_pre.TopLeftPixel(); + const PixelType *v_src_ptr = v_src.TopLeftPixel(); + const PixelType *v_pre_ptr = v_pre.TopLeftPixel(); + + const int uv_block_width = block_width >> ss_x, + uv_block_height = block_height >> ss_y; + const int y_src_stride = y_src.stride(), y_pre_stride = y_pre.stride(); + const int uv_src_stride = u_src.stride(), uv_pre_stride = u_pre.stride(); + const int y_diff_stride = block_width, uv_diff_stride = uv_block_width; + + Buffer y_dif = Buffer(block_width, block_height, 0); + Buffer u_dif = Buffer(uv_block_width, uv_block_height, 0); + Buffer v_dif = Buffer(uv_block_width, uv_block_height, 0); + + ASSERT_TRUE(y_dif.Init()); + ASSERT_TRUE(u_dif.Init()); + ASSERT_TRUE(v_dif.Init()); + y_dif.Set(0); + u_dif.Set(0); + v_dif.Set(0); + + int *y_diff_ptr = y_dif.TopLeftPixel(); + int *u_diff_ptr = u_dif.TopLeftPixel(); + int *v_diff_ptr = v_dif.TopLeftPixel(); + + uint32_t *y_accum = y_accumulator->TopLeftPixel(); + uint32_t *u_accum = u_accumulator->TopLeftPixel(); + uint32_t *v_accum = v_accumulator->TopLeftPixel(); + uint16_t *y_count = y_counter->TopLeftPixel(); + uint16_t *u_count = u_counter->TopLeftPixel(); + uint16_t *v_count = v_counter->TopLeftPixel(); + + const int y_accum_stride = y_accumulator->stride(); + const int u_accum_stride = u_accumulator->stride(); + const int v_accum_stride = v_accumulator->stride(); + const int y_count_stride = y_counter->stride(); + const int u_count_stride = u_counter->stride(); + const int v_count_stride = v_counter->stride(); + + const int rounding = (1 << strength) >> 1; + + // Get the square diffs + for (int row = 0; row < static_cast(block_height); row++) { + for (int col = 0; col < static_cast(block_width); col++) { + const int diff = y_src_ptr[row * y_src_stride + col] - + y_pre_ptr[row * y_pre_stride + col]; + y_diff_ptr[row * y_diff_stride + col] = diff * diff; + } + } + + for (int row = 0; row < uv_block_height; row++) { + for (int col = 0; col < uv_block_width; col++) { + const int u_diff = u_src_ptr[row * uv_src_stride + col] - + u_pre_ptr[row * uv_pre_stride + col]; + const int v_diff = v_src_ptr[row * uv_src_stride + col] - + v_pre_ptr[row * uv_pre_stride + col]; + u_diff_ptr[row * uv_diff_stride + col] = u_diff * u_diff; + v_diff_ptr[row * uv_diff_stride + col] = v_diff * v_diff; + } + } + + // Apply the filter to luma + for (int row = 0; row < static_cast(block_height); row++) { + for (int col = 0; col < static_cast(block_width); col++) { + const int uv_row = row >> ss_y; + const int uv_col = col >> ss_x; + const int filter_weight = GetFilterWeight(row, col, block_height, + block_width, blk_fw, use_32x32); + + // First we get the modifier for the current y pixel + const int y_pixel = y_pre_ptr[row * y_pre_stride + col]; + int y_num_used = 0; + int y_mod = 0; + + // Sum the neighboring 3x3 y pixels + for (int row_step = -1; row_step <= 1; row_step++) { + for (int col_step = -1; col_step <= 1; col_step++) { + const int sub_row = row + row_step; + const int sub_col = col + col_step; + + if (sub_row >= 0 && sub_row < static_cast(block_height) && + sub_col >= 0 && sub_col < static_cast(block_width)) { + y_mod += y_diff_ptr[sub_row * y_diff_stride + sub_col]; + y_num_used++; + } + } + } + + // Sum the corresponding uv pixels to the current y modifier + // Note we are rounding down instead of rounding to the nearest pixel. + y_mod += u_diff_ptr[uv_row * uv_diff_stride + uv_col]; + y_mod += v_diff_ptr[uv_row * uv_diff_stride + uv_col]; + + y_num_used += 2; + + // Set the modifier + y_mod = GetModIndex(y_mod, y_num_used, rounding, strength, + filter_weight); + + // Accumulate the result + y_count[row * y_count_stride + col] += y_mod; + y_accum[row * y_accum_stride + col] += y_mod * y_pixel; + } + } + + // Apply the filter to chroma + for (int uv_row = 0; uv_row < uv_block_height; uv_row++) { + for (int uv_col = 0; uv_col < uv_block_width; uv_col++) { + const int y_row = uv_row << ss_y; + const int y_col = uv_col << ss_x; + const int filter_weight = GetFilterWeight( + uv_row, uv_col, uv_block_height, uv_block_width, blk_fw, use_32x32); + + const int u_pixel = u_pre_ptr[uv_row * uv_pre_stride + uv_col]; + const int v_pixel = v_pre_ptr[uv_row * uv_pre_stride + uv_col]; + + int uv_num_used = 0; + int u_mod = 0, v_mod = 0; + + // Sum the neighboring 3x3 chromal pixels to the chroma modifier + for (int row_step = -1; row_step <= 1; row_step++) { + for (int col_step = -1; col_step <= 1; col_step++) { + const int sub_row = uv_row + row_step; + const int sub_col = uv_col + col_step; + + if (sub_row >= 0 && sub_row < uv_block_height && sub_col >= 0 && + sub_col < uv_block_width) { + u_mod += u_diff_ptr[sub_row * uv_diff_stride + sub_col]; + v_mod += v_diff_ptr[sub_row * uv_diff_stride + sub_col]; + uv_num_used++; + } + } + } + + // Sum all the luma pixels associated with the current luma pixel + for (int row_step = 0; row_step < 1 + ss_y; row_step++) { + for (int col_step = 0; col_step < 1 + ss_x; col_step++) { + const int sub_row = y_row + row_step; + const int sub_col = y_col + col_step; + const int y_diff = y_diff_ptr[sub_row * y_diff_stride + sub_col]; + + u_mod += y_diff; + v_mod += y_diff; + uv_num_used++; + } + } + + // Set the modifier + u_mod = GetModIndex(u_mod, uv_num_used, rounding, strength, + filter_weight); + v_mod = GetModIndex(v_mod, uv_num_used, rounding, strength, + filter_weight); + + // Accumulate the result + u_count[uv_row * u_count_stride + uv_col] += u_mod; + u_accum[uv_row * u_accum_stride + uv_col] += u_mod * u_pixel; + v_count[uv_row * v_count_stride + uv_col] += v_mod; + v_accum[uv_row * v_accum_stride + uv_col] += v_mod * v_pixel; + } + } +} + +class YUVTemporalFilterTest + : public ::testing::TestWithParam { + public: + virtual void SetUp() { + filter_func_ = GetParam().temporal_filter; + bd_ = GetParam().bd; + use_highbd_ = (bd_ != 8); + + rnd_.Reset(ACMRandom::DeterministicSeed()); + saturate_test_ = 0; + num_repeats_ = 10; + + ASSERT_TRUE(bd_ == 8 || bd_ == 10 || bd_ == 12); + } + + protected: + template + void CompareTestWithParam(int width, int height, int ss_x, int ss_y, + int filter_strength, int use_32x32, + const int *filter_weight); + template + void RunTestFilterWithParam(int width, int height, int ss_x, int ss_y, + int filter_strength, int use_32x32, + const int *filter_weight); + YUVTemporalFilterFunc filter_func_; + ACMRandom rnd_; + int saturate_test_; + int num_repeats_; + int use_highbd_; + int bd_; +}; + +template +void YUVTemporalFilterTest::CompareTestWithParam(int width, int height, + int ss_x, int ss_y, + int filter_strength, + int use_32x32, + const int *filter_weight) { + const int uv_width = width >> ss_x, uv_height = height >> ss_y; + + Buffer y_src = Buffer(width, height, 0); + Buffer y_pre = Buffer(width, height, 0); + Buffer y_count_ref = Buffer(width, height, 0); + Buffer y_accum_ref = Buffer(width, height, 0); + Buffer y_count_tst = Buffer(width, height, 0); + Buffer y_accum_tst = Buffer(width, height, 0); + + Buffer u_src = Buffer(uv_width, uv_height, 0); + Buffer u_pre = Buffer(uv_width, uv_height, 0); + Buffer u_count_ref = Buffer(uv_width, uv_height, 0); + Buffer u_accum_ref = Buffer(uv_width, uv_height, 0); + Buffer u_count_tst = Buffer(uv_width, uv_height, 0); + Buffer u_accum_tst = Buffer(uv_width, uv_height, 0); + + Buffer v_src = Buffer(uv_width, uv_height, 0); + Buffer v_pre = Buffer(uv_width, uv_height, 0); + Buffer v_count_ref = Buffer(uv_width, uv_height, 0); + Buffer v_accum_ref = Buffer(uv_width, uv_height, 0); + Buffer v_count_tst = Buffer(uv_width, uv_height, 0); + Buffer v_accum_tst = Buffer(uv_width, uv_height, 0); + + ASSERT_TRUE(y_src.Init()); + ASSERT_TRUE(y_pre.Init()); + ASSERT_TRUE(y_count_ref.Init()); + ASSERT_TRUE(y_accum_ref.Init()); + ASSERT_TRUE(y_count_tst.Init()); + ASSERT_TRUE(y_accum_tst.Init()); + ASSERT_TRUE(u_src.Init()); + ASSERT_TRUE(u_pre.Init()); + ASSERT_TRUE(u_count_ref.Init()); + ASSERT_TRUE(u_accum_ref.Init()); + ASSERT_TRUE(u_count_tst.Init()); + ASSERT_TRUE(u_accum_tst.Init()); + + ASSERT_TRUE(v_src.Init()); + ASSERT_TRUE(v_pre.Init()); + ASSERT_TRUE(v_count_ref.Init()); + ASSERT_TRUE(v_accum_ref.Init()); + ASSERT_TRUE(v_count_tst.Init()); + ASSERT_TRUE(v_accum_tst.Init()); + + y_accum_ref.Set(0); + y_accum_tst.Set(0); + y_count_ref.Set(0); + y_count_tst.Set(0); + u_accum_ref.Set(0); + u_accum_tst.Set(0); + u_count_ref.Set(0); + u_count_tst.Set(0); + v_accum_ref.Set(0); + v_accum_tst.Set(0); + v_count_ref.Set(0); + v_count_tst.Set(0); + + for (int repeats = 0; repeats < num_repeats_; repeats++) { + if (saturate_test_) { + const int max_val = (1 << bd_) - 1; + y_src.Set(max_val); + y_pre.Set(0); + u_src.Set(max_val); + u_pre.Set(0); + v_src.Set(max_val); + v_pre.Set(0); + } else { + y_src.Set(&rnd_, 0, 7 << (bd_ - 8)); + y_pre.Set(&rnd_, 0, 7 << (bd_ - 8)); + u_src.Set(&rnd_, 0, 7 << (bd_ - 8)); + u_pre.Set(&rnd_, 0, 7 << (bd_ - 8)); + v_src.Set(&rnd_, 0, 7 << (bd_ - 8)); + v_pre.Set(&rnd_, 0, 7 << (bd_ - 8)); + } + + ApplyReferenceFilter( + y_src, y_pre, u_src, v_src, u_pre, v_pre, width, height, ss_x, ss_y, + filter_strength, filter_weight, use_32x32, &y_accum_ref, &y_count_ref, + &u_accum_ref, &u_count_ref, &v_accum_ref, &v_count_ref); + + ASM_REGISTER_STATE_CHECK(filter_func_( + reinterpret_cast(y_src.TopLeftPixel()), y_src.stride(), + reinterpret_cast(y_pre.TopLeftPixel()), y_pre.stride(), + reinterpret_cast(u_src.TopLeftPixel()), + reinterpret_cast(v_src.TopLeftPixel()), u_src.stride(), + reinterpret_cast(u_pre.TopLeftPixel()), + reinterpret_cast(v_pre.TopLeftPixel()), u_pre.stride(), + width, height, ss_x, ss_y, filter_strength, filter_weight, use_32x32, + y_accum_tst.TopLeftPixel(), y_count_tst.TopLeftPixel(), + u_accum_tst.TopLeftPixel(), u_count_tst.TopLeftPixel(), + v_accum_tst.TopLeftPixel(), v_count_tst.TopLeftPixel())); + + EXPECT_TRUE(y_accum_tst.CheckValues(y_accum_ref)); + EXPECT_TRUE(y_count_tst.CheckValues(y_count_ref)); + EXPECT_TRUE(u_accum_tst.CheckValues(u_accum_ref)); + EXPECT_TRUE(u_count_tst.CheckValues(u_count_ref)); + EXPECT_TRUE(v_accum_tst.CheckValues(v_accum_ref)); + EXPECT_TRUE(v_count_tst.CheckValues(v_count_ref)); + + if (HasFailure()) { + if (use_32x32) { + printf("SS_X: %d, SS_Y: %d, Strength: %d, Weight: %d\n", ss_x, ss_y, + filter_strength, *filter_weight); + } else { + printf("SS_X: %d, SS_Y: %d, Strength: %d, Weights: %d,%d,%d,%d\n", ss_x, + ss_y, filter_strength, filter_weight[0], filter_weight[1], + filter_weight[2], filter_weight[3]); + } + y_accum_tst.PrintDifference(y_accum_ref); + y_count_tst.PrintDifference(y_count_ref); + u_accum_tst.PrintDifference(u_accum_ref); + u_count_tst.PrintDifference(u_count_ref); + v_accum_tst.PrintDifference(v_accum_ref); + v_count_tst.PrintDifference(v_count_ref); + + return; + } + } +} + +template +void YUVTemporalFilterTest::RunTestFilterWithParam(int width, int height, + int ss_x, int ss_y, + int filter_strength, + int use_32x32, + const int *filter_weight) { + const int uv_width = width >> ss_x, uv_height = height >> ss_y; + + Buffer y_src = Buffer(width, height, 0); + Buffer y_pre = Buffer(width, height, 0); + Buffer y_count = Buffer(width, height, 0); + Buffer y_accum = Buffer(width, height, 0); + + Buffer u_src = Buffer(uv_width, uv_height, 0); + Buffer u_pre = Buffer(uv_width, uv_height, 0); + Buffer u_count = Buffer(uv_width, uv_height, 0); + Buffer u_accum = Buffer(uv_width, uv_height, 0); + + Buffer v_src = Buffer(uv_width, uv_height, 0); + Buffer v_pre = Buffer(uv_width, uv_height, 0); + Buffer v_count = Buffer(uv_width, uv_height, 0); + Buffer v_accum = Buffer(uv_width, uv_height, 0); + + ASSERT_TRUE(y_src.Init()); + ASSERT_TRUE(y_pre.Init()); + ASSERT_TRUE(y_count.Init()); + ASSERT_TRUE(y_accum.Init()); + + ASSERT_TRUE(u_src.Init()); + ASSERT_TRUE(u_pre.Init()); + ASSERT_TRUE(u_count.Init()); + ASSERT_TRUE(u_accum.Init()); + + ASSERT_TRUE(v_src.Init()); + ASSERT_TRUE(v_pre.Init()); + ASSERT_TRUE(v_count.Init()); + ASSERT_TRUE(v_accum.Init()); + + y_accum.Set(0); + y_count.Set(0); + + u_accum.Set(0); + u_count.Set(0); + + v_accum.Set(0); + v_count.Set(0); + + y_src.Set(&rnd_, 0, 7 << (bd_ - 8)); + y_pre.Set(&rnd_, 0, 7 << (bd_ - 8)); + u_src.Set(&rnd_, 0, 7 << (bd_ - 8)); + u_pre.Set(&rnd_, 0, 7 << (bd_ - 8)); + v_src.Set(&rnd_, 0, 7 << (bd_ - 8)); + v_pre.Set(&rnd_, 0, 7 << (bd_ - 8)); + + for (int repeats = 0; repeats < num_repeats_; repeats++) { + ASM_REGISTER_STATE_CHECK(filter_func_( + reinterpret_cast(y_src.TopLeftPixel()), y_src.stride(), + reinterpret_cast(y_pre.TopLeftPixel()), y_pre.stride(), + reinterpret_cast(u_src.TopLeftPixel()), + reinterpret_cast(v_src.TopLeftPixel()), u_src.stride(), + reinterpret_cast(u_pre.TopLeftPixel()), + reinterpret_cast(v_pre.TopLeftPixel()), u_pre.stride(), + width, height, ss_x, ss_y, filter_strength, filter_weight, use_32x32, + y_accum.TopLeftPixel(), y_count.TopLeftPixel(), u_accum.TopLeftPixel(), + u_count.TopLeftPixel(), v_accum.TopLeftPixel(), + v_count.TopLeftPixel())); + } +} + +TEST_P(YUVTemporalFilterTest, Use32x32) { + const int width = 32, height = 32; + const int use_32x32 = 1; + + for (int ss_x = 0; ss_x <= 1; ss_x++) { + for (int ss_y = 0; ss_y <= 1; ss_y++) { + for (int filter_strength = 0; filter_strength <= 6; + filter_strength += 2) { + for (int filter_weight = 0; filter_weight <= 2; filter_weight++) { + if (use_highbd_) { + const int adjusted_strength = filter_strength + 2 * (bd_ - 8); + CompareTestWithParam(width, height, ss_x, ss_y, + adjusted_strength, use_32x32, + &filter_weight); + } else { + CompareTestWithParam(width, height, ss_x, ss_y, + filter_strength, use_32x32, + &filter_weight); + } + ASSERT_FALSE(HasFailure()); + } + } + } + } +} + +TEST_P(YUVTemporalFilterTest, Use16x16) { + const int width = 32, height = 32; + const int use_32x32 = 0; + + for (int ss_x = 0; ss_x <= 1; ss_x++) { + for (int ss_y = 0; ss_y <= 1; ss_y++) { + for (int filter_idx = 0; filter_idx < 3 * 3 * 3 * 3; filter_idx++) { + // Set up the filter + int filter_weight[4]; + int filter_idx_cp = filter_idx; + for (int idx = 0; idx < 4; idx++) { + filter_weight[idx] = filter_idx_cp % 3; + filter_idx_cp /= 3; + } + + // Test each parameter + for (int filter_strength = 0; filter_strength <= 6; + filter_strength += 2) { + if (use_highbd_) { + const int adjusted_strength = filter_strength + 2 * (bd_ - 8); + CompareTestWithParam(width, height, ss_x, ss_y, + adjusted_strength, use_32x32, + filter_weight); + } else { + CompareTestWithParam(width, height, ss_x, ss_y, + filter_strength, use_32x32, + filter_weight); + } + + ASSERT_FALSE(HasFailure()); + } + } + } + } +} + +TEST_P(YUVTemporalFilterTest, SaturationTest) { + const int width = 32, height = 32; + const int use_32x32 = 1; + const int filter_weight = 1; + saturate_test_ = 1; + + for (int ss_x = 0; ss_x <= 1; ss_x++) { + for (int ss_y = 0; ss_y <= 1; ss_y++) { + for (int filter_strength = 0; filter_strength <= 6; + filter_strength += 2) { + if (use_highbd_) { + const int adjusted_strength = filter_strength + 2 * (bd_ - 8); + CompareTestWithParam(width, height, ss_x, ss_y, + adjusted_strength, use_32x32, + &filter_weight); + } else { + CompareTestWithParam(width, height, ss_x, ss_y, + filter_strength, use_32x32, + &filter_weight); + } + + ASSERT_FALSE(HasFailure()); + } + } + } +} + +TEST_P(YUVTemporalFilterTest, DISABLED_Speed) { + const int width = 32, height = 32; + num_repeats_ = 1000; + + for (int use_32x32 = 0; use_32x32 <= 1; use_32x32++) { + const int num_filter_weights = use_32x32 ? 3 : 3 * 3 * 3 * 3; + for (int ss_x = 0; ss_x <= 1; ss_x++) { + for (int ss_y = 0; ss_y <= 1; ss_y++) { + for (int filter_idx = 0; filter_idx < num_filter_weights; + filter_idx++) { + // Set up the filter + int filter_weight[4]; + int filter_idx_cp = filter_idx; + for (int idx = 0; idx < 4; idx++) { + filter_weight[idx] = filter_idx_cp % 3; + filter_idx_cp /= 3; + } + + // Test each parameter + for (int filter_strength = 0; filter_strength <= 6; + filter_strength += 2) { + vpx_usec_timer timer; + vpx_usec_timer_start(&timer); + + if (use_highbd_) { + RunTestFilterWithParam(width, height, ss_x, ss_y, + filter_strength, use_32x32, + filter_weight); + } else { + RunTestFilterWithParam(width, height, ss_x, ss_y, + filter_strength, use_32x32, + filter_weight); + } + + vpx_usec_timer_mark(&timer); + const int elapsed_time = + static_cast(vpx_usec_timer_elapsed(&timer)); + + printf( + "Bitdepth: %d, Use 32X32: %d, SS_X: %d, SS_Y: %d, Weight Idx: " + "%d, Strength: %d, Time: %5d\n", + bd_, use_32x32, ss_x, ss_y, filter_idx, filter_strength, + elapsed_time); + } + } + } + } + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +#define WRAP_HIGHBD_FUNC(func, bd) \ + void wrap_##func##_##bd( \ + const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, \ + int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, \ + int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, \ + int uv_pre_stride, unsigned int block_width, unsigned int block_height, \ + int ss_x, int ss_y, int strength, const int *const blk_fw, \ + int use_32x32, uint32_t *y_accumulator, uint16_t *y_count, \ + uint32_t *u_accumulator, uint16_t *u_count, uint32_t *v_accumulator, \ + uint16_t *v_count) { \ + func(reinterpret_cast(y_src), y_src_stride, \ + reinterpret_cast(y_pre), y_pre_stride, \ + reinterpret_cast(u_src), \ + reinterpret_cast(v_src), uv_src_stride, \ + reinterpret_cast(u_pre), \ + reinterpret_cast(v_pre), uv_pre_stride, \ + block_width, block_height, ss_x, ss_y, strength, blk_fw, use_32x32, \ + y_accumulator, y_count, u_accumulator, u_count, v_accumulator, \ + v_count); \ + } + +WRAP_HIGHBD_FUNC(vp9_highbd_apply_temporal_filter_c, 10); +WRAP_HIGHBD_FUNC(vp9_highbd_apply_temporal_filter_c, 12); + +INSTANTIATE_TEST_CASE_P( + C, YUVTemporalFilterTest, + ::testing::Values( + TemporalFilterWithBd(&wrap_vp9_highbd_apply_temporal_filter_c_10, 10), + TemporalFilterWithBd(&wrap_vp9_highbd_apply_temporal_filter_c_12, 12))); +#if HAVE_SSE4_1 +WRAP_HIGHBD_FUNC(vp9_highbd_apply_temporal_filter_sse4_1, 10); +WRAP_HIGHBD_FUNC(vp9_highbd_apply_temporal_filter_sse4_1, 12); + +INSTANTIATE_TEST_CASE_P( + SSE4_1, YUVTemporalFilterTest, + ::testing::Values( + TemporalFilterWithBd(&wrap_vp9_highbd_apply_temporal_filter_sse4_1_10, + 10), + TemporalFilterWithBd(&wrap_vp9_highbd_apply_temporal_filter_sse4_1_12, + 12))); +#endif // HAVE_SSE4_1 +#else +INSTANTIATE_TEST_CASE_P( + C, YUVTemporalFilterTest, + ::testing::Values(TemporalFilterWithBd(&vp9_apply_temporal_filter_c, 8))); + +#if HAVE_SSE4_1 +INSTANTIATE_TEST_CASE_P(SSE4_1, YUVTemporalFilterTest, + ::testing::Values(TemporalFilterWithBd( + &vp9_apply_temporal_filter_sse4_1, 8))); +#endif // HAVE_SSE4_1 +#endif // CONFIG_VP9_HIGHBITDEPTH +} // namespace diff --git a/media/libvpx/libvpx/test/yuv_video_source.h b/media/libvpx/libvpx/test/yuv_video_source.h index aee6b2ffbb87..020ce801d938 100644 --- a/media/libvpx/libvpx/test/yuv_video_source.h +++ b/media/libvpx/libvpx/test/yuv_video_source.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TEST_YUV_VIDEO_SOURCE_H_ -#define TEST_YUV_VIDEO_SOURCE_H_ +#ifndef VPX_TEST_YUV_VIDEO_SOURCE_H_ +#define VPX_TEST_YUV_VIDEO_SOURCE_H_ #include #include @@ -122,4 +122,4 @@ class YUVVideoSource : public VideoSource { } // namespace libvpx_test -#endif // TEST_YUV_VIDEO_SOURCE_H_ +#endif // VPX_TEST_YUV_VIDEO_SOURCE_H_ diff --git a/media/libvpx/libvpx/third_party/googletest/README.libvpx b/media/libvpx/libvpx/third_party/googletest/README.libvpx index 2cd6910b4100..49005ddac98e 100644 --- a/media/libvpx/libvpx/third_party/googletest/README.libvpx +++ b/media/libvpx/libvpx/third_party/googletest/README.libvpx @@ -1,5 +1,5 @@ -URL: https://github.com/google/googletest -Version: 1.8.0 +URL: https://github.com/google/googletest.git +Version: release-1.8.1 License: BSD License File: LICENSE @@ -13,12 +13,16 @@ generation. Local Modifications: - Remove everything but: - googletest-release-1.8.0/googletest/ + googletest-release-1.8.1/googletest/ CHANGES CONTRIBUTORS include LICENSE README.md src -- Suppress unsigned overflow instrumentation in the LCG - https://github.com/google/googletest/pull/1066 + +- Make WithParamInterface::GetParam static in order to avoid + initialization issues + https://github.com/google/googletest/pull/1830 +- Use wcslen() instead of std::wcslen() + https://github.com/google/googletest/pull/1899 diff --git a/media/libvpx/libvpx/third_party/googletest/src/README.md b/media/libvpx/libvpx/third_party/googletest/src/README.md index edd4408054b3..e30fe8047127 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/README.md +++ b/media/libvpx/libvpx/third_party/googletest/src/README.md @@ -1,23 +1,21 @@ +### Generic Build Instructions -### Generic Build Instructions ### +#### Setup -#### Setup #### +To build Google Test and your tests that use it, you need to tell your build +system where to find its headers and source files. The exact way to do it +depends on which build system you use, and is usually straightforward. -To build Google Test and your tests that use it, you need to tell your -build system where to find its headers and source files. The exact -way to do it depends on which build system you use, and is usually -straightforward. +#### Build -#### Build #### - -Suppose you put Google Test in directory `${GTEST_DIR}`. To build it, -create a library build target (or a project as called by Visual Studio -and Xcode) to compile +Suppose you put Google Test in directory `${GTEST_DIR}`. To build it, create a +library build target (or a project as called by Visual Studio and Xcode) to +compile ${GTEST_DIR}/src/gtest-all.cc with `${GTEST_DIR}/include` in the system header search path and `${GTEST_DIR}` -in the normal header search path. Assuming a Linux-like system and gcc, +in the normal header search path. Assuming a Linux-like system and gcc, something like the following will do: g++ -isystem ${GTEST_DIR}/include -I${GTEST_DIR} \ @@ -26,136 +24,239 @@ something like the following will do: (We need `-pthread` as Google Test uses threads.) -Next, you should compile your test source file with -`${GTEST_DIR}/include` in the system header search path, and link it -with gtest and any other necessary libraries: +Next, you should compile your test source file with `${GTEST_DIR}/include` in +the system header search path, and link it with gtest and any other necessary +libraries: g++ -isystem ${GTEST_DIR}/include -pthread path/to/your_test.cc libgtest.a \ -o your_test -As an example, the make/ directory contains a Makefile that you can -use to build Google Test on systems where GNU make is available -(e.g. Linux, Mac OS X, and Cygwin). It doesn't try to build Google -Test's own tests. Instead, it just builds the Google Test library and -a sample test. You can use it as a starting point for your own build -script. +As an example, the make/ directory contains a Makefile that you can use to build +Google Test on systems where GNU make is available (e.g. Linux, Mac OS X, and +Cygwin). It doesn't try to build Google Test's own tests. Instead, it just +builds the Google Test library and a sample test. You can use it as a starting +point for your own build script. -If the default settings are correct for your environment, the -following commands should succeed: +If the default settings are correct for your environment, the following commands +should succeed: cd ${GTEST_DIR}/make make ./sample1_unittest -If you see errors, try to tweak the contents of `make/Makefile` to make -them go away. There are instructions in `make/Makefile` on how to do -it. +If you see errors, try to tweak the contents of `make/Makefile` to make them go +away. There are instructions in `make/Makefile` on how to do it. -### Using CMake ### +### Using CMake Google Test comes with a CMake build script ( -[CMakeLists.txt](CMakeLists.txt)) that can be used on a wide range of platforms ("C" stands for -cross-platform.). If you don't have CMake installed already, you can -download it for free from . +[CMakeLists.txt](https://github.com/google/googletest/blob/master/CMakeLists.txt)) +that can be used on a wide range of platforms ("C" stands for cross-platform.). +If you don't have CMake installed already, you can download it for free from +. -CMake works by generating native makefiles or build projects that can -be used in the compiler environment of your choice. The typical -workflow starts with: +CMake works by generating native makefiles or build projects that can be used in +the compiler environment of your choice. You can either build Google Test as a +standalone project or it can be incorporated into an existing CMake build for +another project. + +#### Standalone CMake Project + +When building Google Test as a standalone project, the typical workflow starts +with: mkdir mybuild # Create a directory to hold the build output. cd mybuild cmake ${GTEST_DIR} # Generate native build scripts. -If you want to build Google Test's samples, you should replace the -last command with +If you want to build Google Test's samples, you should replace the last command +with cmake -Dgtest_build_samples=ON ${GTEST_DIR} -If you are on a \*nix system, you should now see a Makefile in the -current directory. Just type 'make' to build gtest. +If you are on a \*nix system, you should now see a Makefile in the current +directory. Just type 'make' to build gtest. -If you use Windows and have Visual Studio installed, a `gtest.sln` file -and several `.vcproj` files will be created. You can then build them -using Visual Studio. +If you use Windows and have Visual Studio installed, a `gtest.sln` file and +several `.vcproj` files will be created. You can then build them using Visual +Studio. On Mac OS X with Xcode installed, a `.xcodeproj` file will be generated. -### Legacy Build Scripts ### +#### Incorporating Into An Existing CMake Project + +If you want to use gtest in a project which already uses CMake, then a more +robust and flexible approach is to build gtest as part of that project directly. +This is done by making the GoogleTest source code available to the main build +and adding it using CMake's `add_subdirectory()` command. This has the +significant advantage that the same compiler and linker settings are used +between gtest and the rest of your project, so issues associated with using +incompatible libraries (eg debug/release), etc. are avoided. This is +particularly useful on Windows. Making GoogleTest's source code available to the +main build can be done a few different ways: + +* Download the GoogleTest source code manually and place it at a known + location. This is the least flexible approach and can make it more difficult + to use with continuous integration systems, etc. +* Embed the GoogleTest source code as a direct copy in the main project's + source tree. This is often the simplest approach, but is also the hardest to + keep up to date. Some organizations may not permit this method. +* Add GoogleTest as a git submodule or equivalent. This may not always be + possible or appropriate. Git submodules, for example, have their own set of + advantages and drawbacks. +* Use CMake to download GoogleTest as part of the build's configure step. This + is just a little more complex, but doesn't have the limitations of the other + methods. + +The last of the above methods is implemented with a small piece of CMake code in +a separate file (e.g. `CMakeLists.txt.in`) which is copied to the build area and +then invoked as a sub-build _during the CMake stage_. That directory is then +pulled into the main build with `add_subdirectory()`. For example: + +New file `CMakeLists.txt.in`: + + cmake_minimum_required(VERSION 2.8.2) + + project(googletest-download NONE) + + include(ExternalProject) + ExternalProject_Add(googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG master + SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-src" + BINARY_DIR "${CMAKE_BINARY_DIR}/googletest-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" + ) + +Existing build's `CMakeLists.txt`: + + # Download and unpack googletest at configure time + configure_file(CMakeLists.txt.in googletest-download/CMakeLists.txt) + execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download ) + if(result) + message(FATAL_ERROR "CMake step for googletest failed: ${result}") + endif() + execute_process(COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download ) + if(result) + message(FATAL_ERROR "Build step for googletest failed: ${result}") + endif() + + # Prevent overriding the parent project's compiler/linker + # settings on Windows + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + + # Add googletest directly to our build. This defines + # the gtest and gtest_main targets. + add_subdirectory(${CMAKE_BINARY_DIR}/googletest-src + ${CMAKE_BINARY_DIR}/googletest-build + EXCLUDE_FROM_ALL) + + # The gtest/gtest_main targets carry header search path + # dependencies automatically when using CMake 2.8.11 or + # later. Otherwise we have to add them here ourselves. + if (CMAKE_VERSION VERSION_LESS 2.8.11) + include_directories("${gtest_SOURCE_DIR}/include") + endif() + + # Now simply link against gtest or gtest_main as needed. Eg + add_executable(example example.cpp) + target_link_libraries(example gtest_main) + add_test(NAME example_test COMMAND example) + +Note that this approach requires CMake 2.8.2 or later due to its use of the +`ExternalProject_Add()` command. The above technique is discussed in more detail +in [this separate article](http://crascit.com/2015/07/25/cmake-gtest/) which +also contains a link to a fully generalized implementation of the technique. + +##### Visual Studio Dynamic vs Static Runtimes + +By default, new Visual Studio projects link the C runtimes dynamically but +Google Test links them statically. This will generate an error that looks +something like the following: gtest.lib(gtest-all.obj) : error LNK2038: mismatch +detected for 'RuntimeLibrary': value 'MTd_StaticDebug' doesn't match value +'MDd_DynamicDebug' in main.obj + +Google Test already has a CMake option for this: `gtest_force_shared_crt` + +Enabling this option will make gtest link the runtimes dynamically too, and +match the project in which it is included. + +### Legacy Build Scripts Before settling on CMake, we have been providing hand-maintained build -projects/scripts for Visual Studio, Xcode, and Autotools. While we -continue to provide them for convenience, they are not actively -maintained any more. We highly recommend that you follow the -instructions in the previous two sections to integrate Google Test -with your existing build system. +projects/scripts for Visual Studio, Xcode, and Autotools. While we continue to +provide them for convenience, they are not actively maintained any more. We +highly recommend that you follow the instructions in the above sections to +integrate Google Test with your existing build system. If you still need to use the legacy build scripts, here's how: -The msvc\ folder contains two solutions with Visual C++ projects. -Open the `gtest.sln` or `gtest-md.sln` file using Visual Studio, and you -are ready to build Google Test the same way you build any Visual -Studio project. Files that have names ending with -md use DLL -versions of Microsoft runtime libraries (the /MD or the /MDd compiler -option). Files without that suffix use static versions of the runtime -libraries (the /MT or the /MTd option). Please note that one must use -the same option to compile both gtest and the test code. If you use -Visual Studio 2005 or above, we recommend the -md version as /MD is -the default for new projects in these versions of Visual Studio. +The msvc\ folder contains two solutions with Visual C++ projects. Open the +`gtest.sln` or `gtest-md.sln` file using Visual Studio, and you are ready to +build Google Test the same way you build any Visual Studio project. Files that +have names ending with -md use DLL versions of Microsoft runtime libraries (the +/MD or the /MDd compiler option). Files without that suffix use static versions +of the runtime libraries (the /MT or the /MTd option). Please note that one must +use the same option to compile both gtest and the test code. If you use Visual +Studio 2005 or above, we recommend the -md version as /MD is the default for new +projects in these versions of Visual Studio. -On Mac OS X, open the `gtest.xcodeproj` in the `xcode/` folder using -Xcode. Build the "gtest" target. The universal binary framework will -end up in your selected build directory (selected in the Xcode -"Preferences..." -> "Building" pane and defaults to xcode/build). -Alternatively, at the command line, enter: +On Mac OS X, open the `gtest.xcodeproj` in the `xcode/` folder using Xcode. +Build the "gtest" target. The universal binary framework will end up in your +selected build directory (selected in the Xcode "Preferences..." -> "Building" +pane and defaults to xcode/build). Alternatively, at the command line, enter: xcodebuild -This will build the "Release" configuration of gtest.framework in your -default build location. See the "xcodebuild" man page for more -information about building different configurations and building in -different locations. +This will build the "Release" configuration of gtest.framework in your default +build location. See the "xcodebuild" man page for more information about +building different configurations and building in different locations. -If you wish to use the Google Test Xcode project with Xcode 4.x and -above, you need to either: +If you wish to use the Google Test Xcode project with Xcode 4.x and above, you +need to either: - * update the SDK configuration options in xcode/Config/General.xconfig. - Comment options `SDKROOT`, `MACOS_DEPLOYMENT_TARGET`, and `GCC_VERSION`. If - you choose this route you lose the ability to target earlier versions - of MacOS X. - * Install an SDK for an earlier version. This doesn't appear to be - supported by Apple, but has been reported to work - (http://stackoverflow.com/questions/5378518). +* update the SDK configuration options in xcode/Config/General.xconfig. + Comment options `SDKROOT`, `MACOS_DEPLOYMENT_TARGET`, and `GCC_VERSION`. If + you choose this route you lose the ability to target earlier versions of + MacOS X. +* Install an SDK for an earlier version. This doesn't appear to be supported + by Apple, but has been reported to work + (http://stackoverflow.com/questions/5378518). -### Tweaking Google Test ### +### Tweaking Google Test -Google Test can be used in diverse environments. The default -configuration may not work (or may not work well) out of the box in -some environments. However, you can easily tweak Google Test by -defining control macros on the compiler command line. Generally, -these macros are named like `GTEST_XYZ` and you define them to either 1 -or 0 to enable or disable a certain feature. +Google Test can be used in diverse environments. The default configuration may +not work (or may not work well) out of the box in some environments. However, +you can easily tweak Google Test by defining control macros on the compiler +command line. Generally, these macros are named like `GTEST_XYZ` and you define +them to either 1 or 0 to enable or disable a certain feature. -We list the most frequently used macros below. For a complete list, -see file [include/gtest/internal/gtest-port.h](include/gtest/internal/gtest-port.h). +We list the most frequently used macros below. For a complete list, see file +[include/gtest/internal/gtest-port.h](https://github.com/google/googletest/blob/master/include/gtest/internal/gtest-port.h). -### Choosing a TR1 Tuple Library ### +### Choosing a TR1 Tuple Library -Some Google Test features require the C++ Technical Report 1 (TR1) -tuple library, which is not yet available with all compilers. The -good news is that Google Test implements a subset of TR1 tuple that's -enough for its own need, and will automatically use this when the -compiler doesn't provide TR1 tuple. +Some Google Test features require the C++ Technical Report 1 (TR1) tuple +library, which is not yet available with all compilers. The good news is that +Google Test implements a subset of TR1 tuple that's enough for its own need, and +will automatically use this when the compiler doesn't provide TR1 tuple. -Usually you don't need to care about which tuple library Google Test -uses. However, if your project already uses TR1 tuple, you need to -tell Google Test to use the same TR1 tuple library the rest of your -project uses, or the two tuple implementations will clash. To do -that, add +Usually you don't need to care about which tuple library Google Test uses. +However, if your project already uses TR1 tuple, you need to tell Google Test to +use the same TR1 tuple library the rest of your project uses, or the two tuple +implementations will clash. To do that, add -DGTEST_USE_OWN_TR1_TUPLE=0 -to the compiler flags while compiling Google Test and your tests. If -you want to force Google Test to use its own tuple library, just add +to the compiler flags while compiling Google Test and your tests. If you want to +force Google Test to use its own tuple library, just add -DGTEST_USE_OWN_TR1_TUPLE=1 @@ -167,15 +268,15 @@ If you don't want Google Test to use tuple at all, add and all features using tuple will be disabled. -### Multi-threaded Tests ### +### Multi-threaded Tests -Google Test is thread-safe where the pthread library is available. -After `#include "gtest/gtest.h"`, you can check the `GTEST_IS_THREADSAFE` -macro to see whether this is the case (yes if the macro is `#defined` to -1, no if it's undefined.). +Google Test is thread-safe where the pthread library is available. After +`#include "gtest/gtest.h"`, you can check the `GTEST_IS_THREADSAFE` macro to see +whether this is the case (yes if the macro is `#defined` to 1, no if it's +undefined.). -If Google Test doesn't correctly detect whether pthread is available -in your environment, you can force it with +If Google Test doesn't correctly detect whether pthread is available in your +environment, you can force it with -DGTEST_HAS_PTHREAD=1 @@ -183,26 +284,24 @@ or -DGTEST_HAS_PTHREAD=0 -When Google Test uses pthread, you may need to add flags to your -compiler and/or linker to select the pthread library, or you'll get -link errors. If you use the CMake script or the deprecated Autotools -script, this is taken care of for you. If you use your own build -script, you'll need to read your compiler and linker's manual to -figure out what flags to add. +When Google Test uses pthread, you may need to add flags to your compiler and/or +linker to select the pthread library, or you'll get link errors. If you use the +CMake script or the deprecated Autotools script, this is taken care of for you. +If you use your own build script, you'll need to read your compiler and linker's +manual to figure out what flags to add. -### As a Shared Library (DLL) ### +### As a Shared Library (DLL) -Google Test is compact, so most users can build and link it as a -static library for the simplicity. You can choose to use Google Test -as a shared library (known as a DLL on Windows) if you prefer. +Google Test is compact, so most users can build and link it as a static library +for the simplicity. You can choose to use Google Test as a shared library (known +as a DLL on Windows) if you prefer. To compile *gtest* as a shared library, add -DGTEST_CREATE_SHARED_LIBRARY=1 -to the compiler flags. You'll also need to tell the linker to produce -a shared library instead - consult your linker's manual for how to do -it. +to the compiler flags. You'll also need to tell the linker to produce a shared +library instead - consult your linker's manual for how to do it. To compile your *tests* that use the gtest shared library, add @@ -210,31 +309,28 @@ To compile your *tests* that use the gtest shared library, add to the compiler flags. -Note: while the above steps aren't technically necessary today when -using some compilers (e.g. GCC), they may become necessary in the -future, if we decide to improve the speed of loading the library (see - for details). Therefore you are -recommended to always add the above flags when using Google Test as a -shared library. Otherwise a future release of Google Test may break -your build script. +Note: while the above steps aren't technically necessary today when using some +compilers (e.g. GCC), they may become necessary in the future, if we decide to +improve the speed of loading the library (see + for details). Therefore you are recommended +to always add the above flags when using Google Test as a shared library. +Otherwise a future release of Google Test may break your build script. -### Avoiding Macro Name Clashes ### +### Avoiding Macro Name Clashes -In C++, macros don't obey namespaces. Therefore two libraries that -both define a macro of the same name will clash if you `#include` both -definitions. In case a Google Test macro clashes with another -library, you can force Google Test to rename its macro to avoid the -conflict. +In C++, macros don't obey namespaces. Therefore two libraries that both define a +macro of the same name will clash if you `#include` both definitions. In case a +Google Test macro clashes with another library, you can force Google Test to +rename its macro to avoid the conflict. -Specifically, if both Google Test and some other code define macro -FOO, you can add +Specifically, if both Google Test and some other code define macro FOO, you can +add -DGTEST_DONT_DEFINE_FOO=1 -to the compiler flags to tell Google Test to change the macro's name -from `FOO` to `GTEST_FOO`. Currently `FOO` can be `FAIL`, `SUCCEED`, -or `TEST`. For example, with `-DGTEST_DONT_DEFINE_TEST=1`, you'll -need to write +to the compiler flags to tell Google Test to change the macro's name from `FOO` +to `GTEST_FOO`. Currently `FOO` can be `FAIL`, `SUCCEED`, or `TEST`. For +example, with `-DGTEST_DONT_DEFINE_TEST=1`, you'll need to write GTEST_TEST(SomeTest, DoesThis) { ... } @@ -243,38 +339,3 @@ instead of TEST(SomeTest, DoesThis) { ... } in order to define a test. - -## Developing Google Test ## - -This section discusses how to make your own changes to Google Test. - -### Testing Google Test Itself ### - -To make sure your changes work as intended and don't break existing -functionality, you'll want to compile and run Google Test's own tests. -For that you can use CMake: - - mkdir mybuild - cd mybuild - cmake -Dgtest_build_tests=ON ${GTEST_DIR} - -Make sure you have Python installed, as some of Google Test's tests -are written in Python. If the cmake command complains about not being -able to find Python (`Could NOT find PythonInterp (missing: -PYTHON_EXECUTABLE)`), try telling it explicitly where your Python -executable can be found: - - cmake -DPYTHON_EXECUTABLE=path/to/python -Dgtest_build_tests=ON ${GTEST_DIR} - -Next, you can build Google Test and all of its own tests. On \*nix, -this is usually done by 'make'. To run the tests, do - - make test - -All tests should pass. - -Normally you don't need to worry about regenerating the source files, -unless you need to modify them. In that case, you should modify the -corresponding .pump files instead and run the pump.py Python script to -regenerate them. You can find pump.py in the [scripts/](scripts/) directory. -Read the [Pump manual](docs/PumpManual.md) for how to use it. diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-death-test.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-death-test.h index 957a69c6a9e7..20c54d869519 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-death-test.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-death-test.h @@ -26,14 +26,14 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // -// Author: wan@google.com (Zhanyong Wan) -// -// The Google C++ Testing Framework (Google Test) +// The Google C++ Testing and Mocking Framework (Google Test) // // This header file defines the public API for death tests. It is // #included by gtest.h so a user doesn't need to include this // directly. +// GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ #define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ @@ -99,10 +99,11 @@ GTEST_API_ bool InDeathTestChild(); // // On the regular expressions used in death tests: // +// GOOGLETEST_CM0005 DO NOT DELETE // On POSIX-compliant systems (*nix), we use the library, // which uses the POSIX extended regex syntax. // -// On other platforms (e.g. Windows), we only support a simple regex +// On other platforms (e.g. Windows or Mac), we only support a simple regex // syntax implemented as part of Google Test. This limited // implementation should be enough most of the time when writing // death tests; though it lacks many features you can find in PCRE @@ -160,7 +161,7 @@ GTEST_API_ bool InDeathTestChild(); // is rarely a problem as people usually don't put the test binary // directory in PATH. // -// TODO(wan@google.com): make thread-safe death tests search the PATH. +// FIXME: make thread-safe death tests search the PATH. // Asserts that a given statement causes the program to exit, with an // integer exit status that satisfies predicate, and emitting error output @@ -198,9 +199,10 @@ class GTEST_API_ ExitedWithCode { const int exit_code_; }; -# if !GTEST_OS_WINDOWS +# if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA // Tests that an exit code describes an exit due to termination by a // given signal. +// GOOGLETEST_CM0006 DO NOT DELETE class GTEST_API_ KilledBySignal { public: explicit KilledBySignal(int signum); @@ -272,6 +274,54 @@ class GTEST_API_ KilledBySignal { # endif // NDEBUG for EXPECT_DEBUG_DEATH #endif // GTEST_HAS_DEATH_TEST +// This macro is used for implementing macros such as +// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where +// death tests are not supported. Those macros must compile on such systems +// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on +// systems that support death tests. This allows one to write such a macro +// on a system that does not support death tests and be sure that it will +// compile on a death-test supporting system. It is exposed publicly so that +// systems that have death-tests with stricter requirements than +// GTEST_HAS_DEATH_TEST can write their own equivalent of +// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED. +// +// Parameters: +// statement - A statement that a macro such as EXPECT_DEATH would test +// for program termination. This macro has to make sure this +// statement is compiled but not executed, to ensure that +// EXPECT_DEATH_IF_SUPPORTED compiles with a certain +// parameter iff EXPECT_DEATH compiles with it. +// regex - A regex that a macro such as EXPECT_DEATH would use to test +// the output of statement. This parameter has to be +// compiled but not evaluated by this macro, to ensure that +// this macro only accepts expressions that a macro such as +// EXPECT_DEATH would accept. +// terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED +// and a return statement for ASSERT_DEATH_IF_SUPPORTED. +// This ensures that ASSERT_DEATH_IF_SUPPORTED will not +// compile inside functions where ASSERT_DEATH doesn't +// compile. +// +// The branch that has an always false condition is used to ensure that +// statement and regex are compiled (and thus syntactically correct) but +// never executed. The unreachable code macro protects the terminator +// statement from generating an 'unreachable code' warning in case +// statement unconditionally returns or throws. The Message constructor at +// the end allows the syntax of streaming additional messages into the +// macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH. +# define GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, terminator) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::AlwaysTrue()) { \ + GTEST_LOG_(WARNING) \ + << "Death tests are not supported on this platform.\n" \ + << "Statement '" #statement "' cannot be verified."; \ + } else if (::testing::internal::AlwaysFalse()) { \ + ::testing::internal::RE::PartialMatch(".*", (regex)); \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + terminator; \ + } else \ + ::testing::Message() + // EXPECT_DEATH_IF_SUPPORTED(statement, regex) and // ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if // death tests are supported; otherwise they just issue a warning. This is @@ -284,9 +334,9 @@ class GTEST_API_ KilledBySignal { ASSERT_DEATH(statement, regex) #else # define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \ - GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, ) + GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, ) # define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \ - GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, return) + GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, return) #endif } // namespace testing diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-message.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-message.h index fe879bca7927..5ca041614cb3 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-message.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-message.h @@ -26,10 +26,9 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // -// Author: wan@google.com (Zhanyong Wan) -// -// The Google C++ Testing Framework (Google Test) +// The Google C++ Testing and Mocking Framework (Google Test) // // This header file defines the Message class. // @@ -43,6 +42,8 @@ // to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user // program! +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ #define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ @@ -50,6 +51,9 @@ #include "gtest/internal/gtest-port.h" +GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ +/* class A needs to have dll-interface to be used by clients of class B */) + // Ensures that there is at least one operator<< in the global namespace. // See Message& operator<<(...) below for why. void operator<<(const testing::internal::Secret&, int); @@ -196,7 +200,6 @@ class GTEST_API_ Message { std::string GetString() const; private: - #if GTEST_OS_SYMBIAN // These are needed as the Nokia Symbian Compiler cannot decide between // const T& and const T* in a function template. The Nokia compiler _can_ @@ -247,4 +250,6 @@ std::string StreamableToString(const T& streamable) { } // namespace internal } // namespace testing +GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 + #endif // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-param-test.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-param-test.h index 038f9ba79eb3..3e95e4390e00 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-param-test.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-param-test.h @@ -31,13 +31,12 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Authors: vladl@google.com (Vlad Losev) -// // Macros and functions for implementing parameterized tests -// in Google C++ Testing Framework (Google Test) +// in Google C++ Testing and Mocking Framework (Google Test) // // This file is generated by a SCRIPT. DO NOT EDIT BY HAND! // +// GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ #define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ @@ -79,7 +78,7 @@ TEST_P(FooTest, HasBlahBlah) { // Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test // case with any set of parameters you want. Google Test defines a number // of functions for generating test parameters. They return what we call -// (surprise!) parameter generators. Here is a summary of them, which +// (surprise!) parameter generators. Here is a summary of them, which // are all in the testing namespace: // // @@ -185,15 +184,10 @@ TEST_P(DerivedTest, DoesBlah) { # include #endif -// scripts/fuse_gtest.py depends on gtest's own header being #included -// *unconditionally*. Therefore these #includes cannot be moved -// inside #if GTEST_HAS_PARAM_TEST. #include "gtest/internal/gtest-internal.h" #include "gtest/internal/gtest-param-util.h" #include "gtest/internal/gtest-param-util-generated.h" -#if GTEST_HAS_PARAM_TEST - namespace testing { // Functions producing parameter generators. @@ -273,7 +267,7 @@ internal::ParamGenerator Range(T start, T end) { // each with C-string values of "foo", "bar", and "baz": // // const char* strings[] = {"foo", "bar", "baz"}; -// INSTANTIATE_TEST_CASE_P(StringSequence, SrtingTest, ValuesIn(strings)); +// INSTANTIATE_TEST_CASE_P(StringSequence, StringTest, ValuesIn(strings)); // // This instantiates tests from test case StlStringTest // each with STL strings with values "a" and "b": @@ -1375,8 +1369,6 @@ internal::CartesianProductHolder10AddTestPattern(\ - #test_case_name, \ - #test_name, \ + GTEST_STRINGIFY_(test_case_name), \ + GTEST_STRINGIFY_(test_name), \ new ::testing::internal::TestMetaFactory< \ GTEST_TEST_CLASS_NAME_(\ test_case_name, test_name)>()); \ @@ -1412,21 +1404,21 @@ internal::CartesianProductHolder10, and return std::string. // // testing::PrintToStringParamName is a builtin test suffix generator that -// returns the value of testing::PrintToString(GetParam()). It does not work -// for std::string or C strings. +// returns the value of testing::PrintToString(GetParam()). // // Note: test names must be non-empty, unique, and may only contain ASCII -// alphanumeric characters or underscore. +// alphanumeric characters or underscore. Because PrintToString adds quotes +// to std::string and C strings, it won't work for these types. # define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator, ...) \ - ::testing::internal::ParamGenerator \ + static ::testing::internal::ParamGenerator \ gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \ - ::std::string gtest_##prefix##test_case_name##_EvalGenerateName_( \ + static ::std::string gtest_##prefix##test_case_name##_EvalGenerateName_( \ const ::testing::TestParamInfo& info) { \ return ::testing::internal::GetParamNameGen \ (__VA_ARGS__)(info); \ } \ - int gtest_##prefix##test_case_name##_dummy_ GTEST_ATTRIBUTE_UNUSED_ = \ + static int gtest_##prefix##test_case_name##_dummy_ GTEST_ATTRIBUTE_UNUSED_ = \ ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ GetTestCasePatternHolder(\ #test_case_name, \ @@ -1439,6 +1431,4 @@ internal::CartesianProductHolder10 #endif -// scripts/fuse_gtest.py depends on gtest's own header being #included -// *unconditionally*. Therefore these #includes cannot be moved -// inside #if GTEST_HAS_PARAM_TEST. #include "gtest/internal/gtest-internal.h" #include "gtest/internal/gtest-param-util.h" #include "gtest/internal/gtest-param-util-generated.h" -#if GTEST_HAS_PARAM_TEST - namespace testing { // Functions producing parameter generators. @@ -272,7 +266,7 @@ internal::ParamGenerator Range(T start, T end) { // each with C-string values of "foo", "bar", and "baz": // // const char* strings[] = {"foo", "bar", "baz"}; -// INSTANTIATE_TEST_CASE_P(StringSequence, SrtingTest, ValuesIn(strings)); +// INSTANTIATE_TEST_CASE_P(StringSequence, StringTest, ValuesIn(strings)); // // This instantiates tests from test case StlStringTest // each with STL strings with values "a" and "b": @@ -441,8 +435,6 @@ internal::CartesianProductHolder$i<$for j, [[Generator$j]]> Combine( ]] # endif // GTEST_HAS_COMBINE - - # define TEST_P(test_case_name, test_name) \ class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \ : public test_case_name { \ @@ -456,8 +448,8 @@ internal::CartesianProductHolder$i<$for j, [[Generator$j]]> Combine( #test_case_name, \ ::testing::internal::CodeLocation(\ __FILE__, __LINE__))->AddTestPattern(\ - #test_case_name, \ - #test_name, \ + GTEST_STRINGIFY_(test_case_name), \ + GTEST_STRINGIFY_(test_name), \ new ::testing::internal::TestMetaFactory< \ GTEST_TEST_CLASS_NAME_(\ test_case_name, test_name)>()); \ @@ -485,14 +477,14 @@ internal::CartesianProductHolder$i<$for j, [[Generator$j]]> Combine( // to std::string and C strings, it won't work for these types. # define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator, ...) \ - ::testing::internal::ParamGenerator \ + static ::testing::internal::ParamGenerator \ gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \ - ::std::string gtest_##prefix##test_case_name##_EvalGenerateName_( \ + static ::std::string gtest_##prefix##test_case_name##_EvalGenerateName_( \ const ::testing::TestParamInfo& info) { \ return ::testing::internal::GetParamNameGen \ (__VA_ARGS__)(info); \ } \ - int gtest_##prefix##test_case_name##_dummy_ GTEST_ATTRIBUTE_UNUSED_ = \ + static int gtest_##prefix##test_case_name##_dummy_ GTEST_ATTRIBUTE_UNUSED_ = \ ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ GetTestCasePatternHolder(\ #test_case_name, \ @@ -505,6 +497,4 @@ internal::CartesianProductHolder$i<$for j, [[Generator$j]]> Combine( } // namespace testing -#endif // GTEST_HAS_PARAM_TEST - #endif // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-printers.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-printers.h index 8a33164cb38a..51865f84e6f6 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-printers.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-printers.h @@ -26,10 +26,9 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) -// Google Test - The Google C++ Testing Framework + +// Google Test - The Google C++ Testing and Mocking Framework // // This file implements a universal value printer that can print a // value of any type T: @@ -46,6 +45,10 @@ // 2. operator<<(ostream&, const T&) defined in either foo or the // global namespace. // +// However if T is an STL-style container then it is printed element-wise +// unless foo::PrintTo(const T&, ostream*) is defined. Note that +// operator<<() is ignored for container types. +// // If none of the above is defined, it will print the debug string of // the value if it is a protocol buffer, or print the raw bytes in the // value otherwise. @@ -92,6 +95,8 @@ // being defined as many user-defined container types don't have // value_type. +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_ #define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_ @@ -107,6 +112,12 @@ # include #endif +#if GTEST_HAS_ABSL +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "absl/types/variant.h" +#endif // GTEST_HAS_ABSL + namespace testing { // Definitions in the 'internal' and 'internal2' name spaces are @@ -125,7 +136,11 @@ enum TypeKind { kProtobuf, // a protobuf type kConvertibleToInteger, // a type implicitly convertible to BiggestInt // (e.g. a named or unnamed enum type) - kOtherType // anything else +#if GTEST_HAS_ABSL + kConvertibleToStringView, // a type implicitly convertible to + // absl::string_view +#endif + kOtherType // anything else }; // TypeWithoutFormatter::PrintValue(value, os) is called @@ -137,7 +152,8 @@ class TypeWithoutFormatter { public: // This default version is called when kTypeKind is kOtherType. static void PrintValue(const T& value, ::std::ostream* os) { - PrintBytesInObjectTo(reinterpret_cast(&value), + PrintBytesInObjectTo(static_cast( + reinterpret_cast(&value)), sizeof(value), os); } }; @@ -151,10 +167,10 @@ template class TypeWithoutFormatter { public: static void PrintValue(const T& value, ::std::ostream* os) { - const ::testing::internal::string short_str = value.ShortDebugString(); - const ::testing::internal::string pretty_str = - short_str.length() <= kProtobufOneLinerMaxLength ? - short_str : ("\n" + value.DebugString()); + std::string pretty_str = value.ShortDebugString(); + if (pretty_str.length() > kProtobufOneLinerMaxLength) { + pretty_str = "\n" + value.DebugString(); + } *os << ("<" + pretty_str + ">"); } }; @@ -175,6 +191,19 @@ class TypeWithoutFormatter { } }; +#if GTEST_HAS_ABSL +template +class TypeWithoutFormatter { + public: + // Since T has neither operator<< nor PrintTo() but can be implicitly + // converted to absl::string_view, we print it as a absl::string_view. + // + // Note: the implementation is further below, as it depends on + // internal::PrintTo symbol which is defined later in the file. + static void PrintValue(const T& value, ::std::ostream* os); +}; +#endif + // Prints the given value to the given ostream. If the value is a // protocol message, its debug string is printed; if it's an enum or // of a type implicitly convertible to BiggestInt, it's printed as an @@ -202,10 +231,19 @@ class TypeWithoutFormatter { template ::std::basic_ostream& operator<<( ::std::basic_ostream& os, const T& x) { - TypeWithoutFormatter::value ? kProtobuf : - internal::ImplicitlyConvertible::value ? - kConvertibleToInteger : kOtherType)>::PrintValue(x, &os); + TypeWithoutFormatter::value + ? kProtobuf + : internal::ImplicitlyConvertible< + const T&, internal::BiggestInt>::value + ? kConvertibleToInteger + : +#if GTEST_HAS_ABSL + internal::ImplicitlyConvertible< + const T&, absl::string_view>::value + ? kConvertibleToStringView + : +#endif + kOtherType)>::PrintValue(x, &os); return os; } @@ -364,11 +402,18 @@ class UniversalPrinter; template void UniversalPrint(const T& value, ::std::ostream* os); +enum DefaultPrinterType { + kPrintContainer, + kPrintPointer, + kPrintFunctionPointer, + kPrintOther, +}; +template struct WrapPrinterType {}; + // Used to print an STL-style container when the user doesn't define // a PrintTo() for it. template -void DefaultPrintTo(IsContainer /* dummy */, - false_type /* is not a pointer */, +void DefaultPrintTo(WrapPrinterType /* dummy */, const C& container, ::std::ostream* os) { const size_t kMaxCount = 32; // The maximum number of elements to print. *os << '{'; @@ -401,40 +446,34 @@ void DefaultPrintTo(IsContainer /* dummy */, // implementation-defined. Therefore they will be printed as raw // bytes.) template -void DefaultPrintTo(IsNotContainer /* dummy */, - true_type /* is a pointer */, +void DefaultPrintTo(WrapPrinterType /* dummy */, T* p, ::std::ostream* os) { if (p == NULL) { *os << "NULL"; } else { - // C++ doesn't allow casting from a function pointer to any object - // pointer. - // - // IsTrue() silences warnings: "Condition is always true", - // "unreachable code". - if (IsTrue(ImplicitlyConvertible::value)) { - // T is not a function type. We just call << to print p, - // relying on ADL to pick up user-defined << for their pointer - // types, if any. - *os << p; - } else { - // T is a function type, so '*os << p' doesn't do what we want - // (it just prints p as bool). We want to print p as a const - // void*. However, we cannot cast it to const void* directly, - // even using reinterpret_cast, as earlier versions of gcc - // (e.g. 3.4.5) cannot compile the cast when p is a function - // pointer. Casting to UInt64 first solves the problem. - *os << reinterpret_cast( - reinterpret_cast(p)); - } + // T is not a function type. We just call << to print p, + // relying on ADL to pick up user-defined << for their pointer + // types, if any. + *os << p; + } +} +template +void DefaultPrintTo(WrapPrinterType /* dummy */, + T* p, ::std::ostream* os) { + if (p == NULL) { + *os << "NULL"; + } else { + // T is a function type, so '*os << p' doesn't do what we want + // (it just prints p as bool). We want to print p as a const + // void*. + *os << reinterpret_cast(p); } } // Used to print a non-container, non-pointer value when the user // doesn't define PrintTo() for it. template -void DefaultPrintTo(IsNotContainer /* dummy */, - false_type /* is not a pointer */, +void DefaultPrintTo(WrapPrinterType /* dummy */, const T& value, ::std::ostream* os) { ::testing_internal::DefaultPrintNonContainerTo(value, os); } @@ -452,11 +491,8 @@ void DefaultPrintTo(IsNotContainer /* dummy */, // wants). template void PrintTo(const T& value, ::std::ostream* os) { - // DefaultPrintTo() is overloaded. The type of its first two - // arguments determine which version will be picked. If T is an - // STL-style container, the version for container will be called; if - // T is a pointer, the pointer version will be called; otherwise the - // generic version will be called. + // DefaultPrintTo() is overloaded. The type of its first argument + // determines which version will be picked. // // Note that we check for container types here, prior to we check // for protocol message types in our operator<<. The rationale is: @@ -468,13 +504,27 @@ void PrintTo(const T& value, ::std::ostream* os) { // elements; therefore we check for container types here to ensure // that our format is used. // - // The second argument of DefaultPrintTo() is needed to bypass a bug - // in Symbian's C++ compiler that prevents it from picking the right - // overload between: - // - // PrintTo(const T& x, ...); - // PrintTo(T* x, ...); - DefaultPrintTo(IsContainerTest(0), is_pointer(), value, os); + // Note that MSVC and clang-cl do allow an implicit conversion from + // pointer-to-function to pointer-to-object, but clang-cl warns on it. + // So don't use ImplicitlyConvertible if it can be helped since it will + // cause this warning, and use a separate overload of DefaultPrintTo for + // function pointers so that the `*os << p` in the object pointer overload + // doesn't cause that warning either. + DefaultPrintTo( + WrapPrinterType < + (sizeof(IsContainerTest(0)) == sizeof(IsContainer)) && + !IsRecursiveContainer::value + ? kPrintContainer + : !is_pointer::value + ? kPrintOther +#if GTEST_LANG_CXX11 + : std::is_function::type>::value +#else + : !internal::ImplicitlyConvertible::value +#endif + ? kPrintFunctionPointer + : kPrintPointer > (), + value, os); } // The following list of PrintTo() overloads tells @@ -581,6 +631,17 @@ inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) { } #endif // GTEST_HAS_STD_WSTRING +#if GTEST_HAS_ABSL +// Overload for absl::string_view. +inline void PrintTo(absl::string_view sp, ::std::ostream* os) { + PrintTo(::std::string(sp), os); +} +#endif // GTEST_HAS_ABSL + +#if GTEST_LANG_CXX11 +inline void PrintTo(std::nullptr_t, ::std::ostream* os) { *os << "(nullptr)"; } +#endif // GTEST_LANG_CXX11 + #if GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_ // Helper function for printing a tuple. T must be instantiated with // a tuple type. @@ -710,6 +771,48 @@ class UniversalPrinter { GTEST_DISABLE_MSC_WARNINGS_POP_() }; +#if GTEST_HAS_ABSL + +// Printer for absl::optional + +template +class UniversalPrinter<::absl::optional> { + public: + static void Print(const ::absl::optional& value, ::std::ostream* os) { + *os << '('; + if (!value) { + *os << "nullopt"; + } else { + UniversalPrint(*value, os); + } + *os << ')'; + } +}; + +// Printer for absl::variant + +template +class UniversalPrinter<::absl::variant> { + public: + static void Print(const ::absl::variant& value, ::std::ostream* os) { + *os << '('; + absl::visit(Visitor{os}, value); + *os << ')'; + } + + private: + struct Visitor { + template + void operator()(const U& u) const { + *os << "'" << GetTypeName() << "' with value "; + UniversalPrint(u, os); + } + ::std::ostream* os; + }; +}; + +#endif // GTEST_HAS_ABSL + // UniversalPrintArray(begin, len, os) prints an array of 'len' // elements, starting at address 'begin'. template @@ -723,7 +826,7 @@ void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) { // If the array has more than kThreshold elements, we'll have to // omit some details by printing only the first and the last // kChunkSize elements. - // TODO(wan@google.com): let the user control the threshold using a flag. + // FIXME: let the user control the threshold using a flag. if (len <= kThreshold) { PrintRawArrayTo(begin, len, os); } else { @@ -805,7 +908,7 @@ class UniversalTersePrinter { if (str == NULL) { *os << "NULL"; } else { - UniversalPrint(string(str), os); + UniversalPrint(std::string(str), os); } } }; @@ -856,7 +959,7 @@ void UniversalPrint(const T& value, ::std::ostream* os) { UniversalPrinter::Print(value, os); } -typedef ::std::vector Strings; +typedef ::std::vector< ::std::string> Strings; // TuplePolicy must provide: // - tuple_size @@ -875,12 +978,13 @@ struct TuplePolicy { static const size_t tuple_size = ::std::tr1::tuple_size::value; template - struct tuple_element : ::std::tr1::tuple_element {}; + struct tuple_element : ::std::tr1::tuple_element(I), Tuple> { + }; template - static typename AddReference< - const typename ::std::tr1::tuple_element::type>::type get( - const Tuple& tuple) { + static typename AddReference(I), Tuple>::type>::type + get(const Tuple& tuple) { return ::std::tr1::get(tuple); } }; @@ -976,6 +1080,16 @@ Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) { } // namespace internal +#if GTEST_HAS_ABSL +namespace internal2 { +template +void TypeWithoutFormatter::PrintValue( + const T& value, ::std::ostream* os) { + internal::PrintTo(absl::string_view(value), os); +} +} // namespace internal2 +#endif + template ::std::string PrintToString(const T& value) { ::std::stringstream ss; diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-spi.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-spi.h index f63fa9a1b2a8..1e8983938ea2 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-spi.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-spi.h @@ -26,17 +26,21 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) + // // Utilities for testing Google Test itself and code that uses Google Test // (e.g. frameworks built on top of Google Test). +// GOOGLETEST_CM0004 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_ #define GTEST_INCLUDE_GTEST_GTEST_SPI_H_ #include "gtest/gtest.h" +GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ +/* class A needs to have dll-interface to be used by clients of class B */) + namespace testing { // This helper class can be used to mock out Google Test failure reporting @@ -97,13 +101,12 @@ class GTEST_API_ SingleFailureChecker { public: // The constructor remembers the arguments. SingleFailureChecker(const TestPartResultArray* results, - TestPartResult::Type type, - const string& substr); + TestPartResult::Type type, const std::string& substr); ~SingleFailureChecker(); private: const TestPartResultArray* const results_; const TestPartResult::Type type_; - const string substr_; + const std::string substr_; GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker); }; @@ -112,6 +115,8 @@ class GTEST_API_ SingleFailureChecker { } // namespace testing +GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 + // A set of macros for testing Google Test assertions or code that's expected // to generate Google Test fatal failures. It verifies that the given // statement will cause exactly one fatal Google Test failure with 'substr' diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-test-part.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-test-part.h index 77eb844839d8..1c7b89e08796 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-test-part.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-test-part.h @@ -27,8 +27,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Author: mheule@google.com (Markus Heule) -// +// GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ #define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ @@ -38,6 +37,9 @@ #include "gtest/internal/gtest-internal.h" #include "gtest/internal/gtest-string.h" +GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ +/* class A needs to have dll-interface to be used by clients of class B */) + namespace testing { // A copyable object representing the result of a test part (i.e. an @@ -143,7 +145,7 @@ class GTEST_API_ TestPartResultArray { }; // This interface knows how to report a test part result. -class TestPartResultReporterInterface { +class GTEST_API_ TestPartResultReporterInterface { public: virtual ~TestPartResultReporterInterface() {} @@ -176,4 +178,6 @@ class GTEST_API_ HasNewFatalFailureHelper } // namespace testing +GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 + #endif // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-typed-test.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-typed-test.h index 5f69d5678eaf..74bce46bdc50 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-typed-test.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest-typed-test.h @@ -26,8 +26,9 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) + + +// GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_ #define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_ @@ -82,6 +83,24 @@ TYPED_TEST(FooTest, DoesBlah) { TYPED_TEST(FooTest, HasPropertyA) { ... } +// TYPED_TEST_CASE takes an optional third argument which allows to specify a +// class that generates custom test name suffixes based on the type. This should +// be a class which has a static template function GetName(int index) returning +// a string for each type. The provided integer index equals the index of the +// type in the provided type list. In many cases the index can be ignored. +// +// For example: +// class MyTypeNames { +// public: +// template +// static std::string GetName(int) { +// if (std::is_same()) return "char"; +// if (std::is_same()) return "int"; +// if (std::is_same()) return "unsignedInt"; +// } +// }; +// TYPED_TEST_CASE(FooTest, MyTypes, MyTypeNames); + #endif // 0 // Type-parameterized tests are abstract test patterns parameterized @@ -143,6 +162,11 @@ INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes); // If the type list contains only one type, you can write that type // directly without Types<...>: // INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int); +// +// Similar to the optional argument of TYPED_TEST_CASE above, +// INSTANTIATE_TEST_CASE_P takes an optional fourth argument which allows to +// generate custom names. +// INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes, MyTypeNames); #endif // 0 @@ -159,32 +183,46 @@ INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes); // given test case. # define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_ +// Expands to the name of the typedef for the NameGenerator, responsible for +// creating the suffixes of the name. +#define GTEST_NAME_GENERATOR_(TestCaseName) \ + gtest_type_params_##TestCaseName##_NameGenerator + // The 'Types' template argument below must have spaces around it // since some compilers may choke on '>>' when passing a template // instance (e.g. Types) -# define TYPED_TEST_CASE(CaseName, Types) \ - typedef ::testing::internal::TypeList< Types >::type \ - GTEST_TYPE_PARAMS_(CaseName) +# define TYPED_TEST_CASE(CaseName, Types, ...) \ + typedef ::testing::internal::TypeList< Types >::type GTEST_TYPE_PARAMS_( \ + CaseName); \ + typedef ::testing::internal::NameGeneratorSelector<__VA_ARGS__>::type \ + GTEST_NAME_GENERATOR_(CaseName) -# define TYPED_TEST(CaseName, TestName) \ - template \ - class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \ - : public CaseName { \ - private: \ - typedef CaseName TestFixture; \ - typedef gtest_TypeParam_ TypeParam; \ - virtual void TestBody(); \ - }; \ - bool gtest_##CaseName##_##TestName##_registered_ GTEST_ATTRIBUTE_UNUSED_ = \ - ::testing::internal::TypeParameterizedTest< \ - CaseName, \ - ::testing::internal::TemplateSel< \ - GTEST_TEST_CLASS_NAME_(CaseName, TestName)>, \ - GTEST_TYPE_PARAMS_(CaseName)>::Register(\ - "", ::testing::internal::CodeLocation(__FILE__, __LINE__), \ - #CaseName, #TestName, 0); \ - template \ - void GTEST_TEST_CLASS_NAME_(CaseName, TestName)::TestBody() +# define TYPED_TEST(CaseName, TestName) \ + template \ + class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \ + : public CaseName { \ + private: \ + typedef CaseName TestFixture; \ + typedef gtest_TypeParam_ TypeParam; \ + virtual void TestBody(); \ + }; \ + static bool gtest_##CaseName##_##TestName##_registered_ \ + GTEST_ATTRIBUTE_UNUSED_ = \ + ::testing::internal::TypeParameterizedTest< \ + CaseName, \ + ::testing::internal::TemplateSel, \ + GTEST_TYPE_PARAMS_( \ + CaseName)>::Register("", \ + ::testing::internal::CodeLocation( \ + __FILE__, __LINE__), \ + #CaseName, #TestName, 0, \ + ::testing::internal::GenerateNames< \ + GTEST_NAME_GENERATOR_(CaseName), \ + GTEST_TYPE_PARAMS_(CaseName)>()); \ + template \ + void GTEST_TEST_CLASS_NAME_(CaseName, \ + TestName)::TestBody() #endif // GTEST_HAS_TYPED_TEST @@ -241,22 +279,27 @@ INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes); namespace GTEST_CASE_NAMESPACE_(CaseName) { \ typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \ } \ - static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) = \ - GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames(\ - __FILE__, __LINE__, #__VA_ARGS__) + static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) \ + GTEST_ATTRIBUTE_UNUSED_ = \ + GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames( \ + __FILE__, __LINE__, #__VA_ARGS__) // The 'Types' template argument below must have spaces around it // since some compilers may choke on '>>' when passing a template // instance (e.g. Types) -# define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types) \ - bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ = \ - ::testing::internal::TypeParameterizedTestCase::type>::Register(\ - #Prefix, \ - ::testing::internal::CodeLocation(__FILE__, __LINE__), \ - >EST_TYPED_TEST_CASE_P_STATE_(CaseName), \ - #CaseName, GTEST_REGISTERED_TEST_NAMES_(CaseName)) +# define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types, ...) \ + static bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ = \ + ::testing::internal::TypeParameterizedTestCase< \ + CaseName, GTEST_CASE_NAMESPACE_(CaseName)::gtest_AllTests_, \ + ::testing::internal::TypeList< Types >::type>:: \ + Register(#Prefix, \ + ::testing::internal::CodeLocation(__FILE__, __LINE__), \ + >EST_TYPED_TEST_CASE_P_STATE_(CaseName), #CaseName, \ + GTEST_REGISTERED_TEST_NAMES_(CaseName), \ + ::testing::internal::GenerateNames< \ + ::testing::internal::NameGeneratorSelector< \ + __VA_ARGS__>::type, \ + ::testing::internal::TypeList< Types >::type>()) #endif // GTEST_HAS_TYPED_TEST_P diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest.h index f846c5bd6696..3b4bb1ee902e 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest.h @@ -26,10 +26,9 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // -// Author: wan@google.com (Zhanyong Wan) -// -// The Google C++ Testing Framework (Google Test) +// The Google C++ Testing and Mocking Framework (Google Test) // // This header file defines the public API for Google Test. It should be // included by any test program that uses Google Test. @@ -48,6 +47,8 @@ // registration from Barthelemy Dagenais' (barthelemy@prologique.com) // easyUnit framework. +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_GTEST_H_ #define GTEST_INCLUDE_GTEST_GTEST_H_ @@ -65,6 +66,9 @@ #include "gtest/gtest-test-part.h" #include "gtest/gtest-typed-test.h" +GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ +/* class A needs to have dll-interface to be used by clients of class B */) + // Depending on the platform, different string classes are available. // On Linux, in addition to ::std::string, Google also makes use of // class ::string, which has the same interface as ::std::string, but @@ -82,6 +86,15 @@ namespace testing { +// Silence C4100 (unreferenced formal parameter) and 4805 +// unsafe mix of type 'const int' and type 'const bool' +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable:4805) +# pragma warning(disable:4100) +#endif + + // Declares the flags. // This flag temporary enables the disabled tests. @@ -103,6 +116,10 @@ GTEST_DECLARE_string_(color); // the tests to run. If the filter is not given all tests are executed. GTEST_DECLARE_string_(filter); +// This flag controls whether Google Test installs a signal handler that dumps +// debugging information when fatal signals are raised. +GTEST_DECLARE_bool_(install_failure_signal_handler); + // This flag causes the Google Test to list tests. None of the tests listed // are actually run if the flag is provided. GTEST_DECLARE_bool_(list_tests); @@ -115,6 +132,9 @@ GTEST_DECLARE_string_(output); // test. GTEST_DECLARE_bool_(print_time); +// This flags control whether Google Test prints UTF8 characters as text. +GTEST_DECLARE_bool_(print_utf8); + // This flag specifies the random number seed. GTEST_DECLARE_int32_(random_seed); @@ -135,7 +155,7 @@ GTEST_DECLARE_int32_(stack_trace_depth); // When this flag is specified, a failed assertion will throw an // exception if exceptions are enabled, or exit the program with a -// non-zero code otherwise. +// non-zero code otherwise. For use with an external test framework. GTEST_DECLARE_bool_(throw_on_failure); // When this flag is set with a "host:port" string, on supported @@ -143,6 +163,10 @@ GTEST_DECLARE_bool_(throw_on_failure); // the specified host machine. GTEST_DECLARE_string_(stream_result_to); +#if GTEST_USE_OWN_FLAGFILE_FLAG_ +GTEST_DECLARE_string_(flagfile); +#endif // GTEST_USE_OWN_FLAGFILE_FLAG_ + // The upper limit for valid stack trace depths. const int kMaxStackTraceDepth = 100; @@ -160,6 +184,7 @@ class TestEventListenersAccessor; class TestEventRepeater; class UnitTestRecordPropertyTestHelper; class WindowsDeathTest; +class FuchsiaDeathTest; class UnitTestImpl* GetUnitTestImpl(); void ReportFailureInUnknownLocation(TestPartResult::Type result_type, const std::string& message); @@ -259,7 +284,9 @@ class GTEST_API_ AssertionResult { // Used in EXPECT_TRUE/FALSE(assertion_result). AssertionResult(const AssertionResult& other); +#if defined(_MSC_VER) && _MSC_VER < 1910 GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 /* forcing value to bool */) +#endif // Used in the EXPECT_TRUE/FALSE(bool_expression). // @@ -276,7 +303,9 @@ class GTEST_API_ AssertionResult { /*enabler*/ = NULL) : success_(success) {} +#if defined(_MSC_VER) && _MSC_VER < 1910 GTEST_DISABLE_MSC_WARNINGS_POP_() +#endif // Assignment operator. AssertionResult& operator=(AssertionResult other) { @@ -297,7 +326,7 @@ class GTEST_API_ AssertionResult { const char* message() const { return message_.get() != NULL ? message_->c_str() : ""; } - // TODO(vladl@google.com): Remove this after making sure no clients use it. + // FIXME: Remove this after making sure no clients use it. // Deprecated; please use message() instead. const char* failure_message() const { return message(); } @@ -345,6 +374,15 @@ GTEST_API_ AssertionResult AssertionFailure(); // Deprecated; use AssertionFailure() << msg. GTEST_API_ AssertionResult AssertionFailure(const Message& msg); +} // namespace testing + +// Includes the auto-generated header that implements a family of generic +// predicate assertion macros. This include comes late because it relies on +// APIs declared above. +#include "gtest/gtest_pred_impl.h" + +namespace testing { + // The abstract class that all tests inherit from. // // In Google Test, a unit test program contains one or many TestCases, and @@ -355,7 +393,7 @@ GTEST_API_ AssertionResult AssertionFailure(const Message& msg); // this for you. // // The only time you derive from Test is when defining a test fixture -// to be used a TEST_F. For example: +// to be used in a TEST_F. For example: // // class FooTest : public testing::Test { // protected: @@ -550,9 +588,8 @@ class GTEST_API_ TestResult { // Returns the elapsed time, in milliseconds. TimeInMillis elapsed_time() const { return elapsed_time_; } - // Returns the i-th test part result among all the results. i can range - // from 0 to test_property_count() - 1. If i is not in that range, aborts - // the program. + // Returns the i-th test part result among all the results. i can range from 0 + // to total_part_count() - 1. If i is not in that range, aborts the program. const TestPartResult& GetTestPartResult(int i) const; // Returns the i-th test property. i can range from 0 to @@ -569,6 +606,7 @@ class GTEST_API_ TestResult { friend class internal::TestResultAccessor; friend class internal::UnitTestImpl; friend class internal::WindowsDeathTest; + friend class internal::FuchsiaDeathTest; // Gets the vector of TestPartResults. const std::vector& test_part_results() const { @@ -594,7 +632,7 @@ class GTEST_API_ TestResult { // Adds a failure if the key is a reserved attribute of Google Test // testcase tags. Returns true if the property is valid. - // TODO(russr): Validate attribute names are legal and human readable. + // FIXME: Validate attribute names are legal and human readable. static bool ValidateTestProperty(const std::string& xml_element, const TestProperty& test_property); @@ -675,6 +713,9 @@ class GTEST_API_ TestInfo { // Returns the line where this test is defined. int line() const { return location_.line; } + // Return true if this test should not be run because it's in another shard. + bool is_in_another_shard() const { return is_in_another_shard_; } + // Returns true if this test should run, that is if the test is not // disabled (or it is disabled but the also_run_disabled_tests flag has // been specified) and its full name matches the user-specified filter. @@ -695,10 +736,9 @@ class GTEST_API_ TestInfo { // Returns true iff this test will appear in the XML report. bool is_reportable() const { - // For now, the XML report includes all tests matching the filter. - // In the future, we may trim tests that are excluded because of - // sharding. - return matches_filter_; + // The XML report includes tests matching the filter, excluding those + // run in other shards. + return matches_filter_ && !is_in_another_shard_; } // Returns the result of the test. @@ -762,6 +802,7 @@ class GTEST_API_ TestInfo { bool is_disabled_; // True iff this test is disabled bool matches_filter_; // True if this test matches the // user-specified filter. + bool is_in_another_shard_; // Will be run in another shard. internal::TestFactoryBase* const factory_; // The factory that creates // the test object @@ -986,6 +1027,18 @@ class Environment { virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; } }; +#if GTEST_HAS_EXCEPTIONS + +// Exception which can be thrown from TestEventListener::OnTestPartResult. +class GTEST_API_ AssertionException + : public internal::GoogleTestFailureException { + public: + explicit AssertionException(const TestPartResult& result) + : GoogleTestFailureException(result) {} +}; + +#endif // GTEST_HAS_EXCEPTIONS + // The interface for tracing execution of tests. The methods are organized in // the order the corresponding events are fired. class TestEventListener { @@ -1014,6 +1067,8 @@ class TestEventListener { virtual void OnTestStart(const TestInfo& test_info) = 0; // Fired after a failed assertion or a SUCCEED() invocation. + // If you want to throw an exception from this function to skip to the next + // TEST, it must be AssertionException defined above, or inherited from it. virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0; // Fired after the test ends. @@ -1180,14 +1235,12 @@ class GTEST_API_ UnitTest { // Returns the random seed used at the start of the current test run. int random_seed() const; -#if GTEST_HAS_PARAM_TEST // Returns the ParameterizedTestCaseRegistry object used to keep track of // value-parameterized tests and instantiate and register them. // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. internal::ParameterizedTestCaseRegistry& parameterized_test_registry() GTEST_LOCK_EXCLUDED_(mutex_); -#endif // GTEST_HAS_PARAM_TEST // Gets the number of successful test cases. int successful_test_case_count() const; @@ -1287,11 +1340,11 @@ class GTEST_API_ UnitTest { internal::UnitTestImpl* impl() { return impl_; } const internal::UnitTestImpl* impl() const { return impl_; } - // These classes and funcions are friends as they need to access private + // These classes and functions are friends as they need to access private // members of UnitTest. + friend class ScopedTrace; friend class Test; friend class internal::AssertHelper; - friend class internal::ScopedTrace; friend class internal::StreamingListenerTest; friend class internal::UnitTestRecordPropertyTestHelper; friend Environment* AddGlobalTestEnvironment(Environment* env); @@ -1388,11 +1441,9 @@ AssertionResult CmpHelperEQ(const char* lhs_expression, const char* rhs_expression, const T1& lhs, const T2& rhs) { -GTEST_DISABLE_MSC_WARNINGS_PUSH_(4389 /* signed/unsigned mismatch */) if (lhs == rhs) { return AssertionSuccess(); } -GTEST_DISABLE_MSC_WARNINGS_POP_() return CmpHelperEQFailure(lhs_expression, rhs_expression, lhs, rhs); } @@ -1706,7 +1757,6 @@ class GTEST_API_ AssertHelper { } // namespace internal -#if GTEST_HAS_PARAM_TEST // The pure interface class that all value-parameterized tests inherit from. // A value-parameterized class must inherit from both ::testing::Test and // ::testing::WithParamInterface. In most cases that just means inheriting @@ -1748,11 +1798,8 @@ class WithParamInterface { virtual ~WithParamInterface() {} // The current parameter value. Is also available in the test fixture's - // constructor. This member function is non-static, even though it only - // references static data, to reduce the opportunity for incorrect uses - // like writing 'WithParamInterface::GetParam()' for a test that - // uses a fixture whose parameter type is int. - const ParamType& GetParam() const { + // constructor. + static const ParamType& GetParam() { GTEST_CHECK_(parameter_ != NULL) << "GetParam() can only be called inside a value-parameterized test " << "-- did you intend to write TEST_P instead of TEST_F?"; @@ -1783,8 +1830,6 @@ template class TestWithParam : public Test, public WithParamInterface { }; -#endif // GTEST_HAS_PARAM_TEST - // Macros for indicating success/failure in test code. // ADD_FAILURE unconditionally adds a failure to the current test. @@ -1857,22 +1902,18 @@ class TestWithParam : public Test, public WithParamInterface { // AssertionResult. For more information on how to use AssertionResult with // these macros see comments on that class. #define EXPECT_TRUE(condition) \ - GTEST_TEST_BOOLEAN_((condition), #condition, false, true, \ + GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \ GTEST_NONFATAL_FAILURE_) #define EXPECT_FALSE(condition) \ GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \ GTEST_NONFATAL_FAILURE_) #define ASSERT_TRUE(condition) \ - GTEST_TEST_BOOLEAN_((condition), #condition, false, true, \ + GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \ GTEST_FATAL_FAILURE_) #define ASSERT_FALSE(condition) \ GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \ GTEST_FATAL_FAILURE_) -// Includes the auto-generated header that implements a family of -// generic predicate assertion macros. -#include "gtest/gtest_pred_impl.h" - // Macros for testing equalities and inequalities. // // * {ASSERT|EXPECT}_EQ(v1, v2): Tests that v1 == v2 @@ -1914,8 +1955,8 @@ class TestWithParam : public Test, public WithParamInterface { // // Examples: // -// EXPECT_NE(5, Foo()); -// EXPECT_EQ(NULL, a_pointer); +// EXPECT_NE(Foo(), 5); +// EXPECT_EQ(a_pointer, NULL); // ASSERT_LT(i, array_size); // ASSERT_GT(records.size(), 0) << "There is no record left."; @@ -2101,6 +2142,57 @@ GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2, #define EXPECT_NO_FATAL_FAILURE(statement) \ GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_) +// Causes a trace (including the given source file path and line number, +// and the given message) to be included in every test failure message generated +// by code in the scope of the lifetime of an instance of this class. The effect +// is undone with the destruction of the instance. +// +// The message argument can be anything streamable to std::ostream. +// +// Example: +// testing::ScopedTrace trace("file.cc", 123, "message"); +// +class GTEST_API_ ScopedTrace { + public: + // The c'tor pushes the given source file location and message onto + // a trace stack maintained by Google Test. + + // Template version. Uses Message() to convert the values into strings. + // Slow, but flexible. + template + ScopedTrace(const char* file, int line, const T& message) { + PushTrace(file, line, (Message() << message).GetString()); + } + + // Optimize for some known types. + ScopedTrace(const char* file, int line, const char* message) { + PushTrace(file, line, message ? message : "(null)"); + } + +#if GTEST_HAS_GLOBAL_STRING + ScopedTrace(const char* file, int line, const ::string& message) { + PushTrace(file, line, message); + } +#endif + + ScopedTrace(const char* file, int line, const std::string& message) { + PushTrace(file, line, message); + } + + // The d'tor pops the info pushed by the c'tor. + // + // Note that the d'tor is not virtual in order to be efficient. + // Don't inherit from ScopedTrace! + ~ScopedTrace(); + + private: + void PushTrace(const char* file, int line, std::string message); + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace); +} GTEST_ATTRIBUTE_UNUSED_; // A ScopedTrace object does its job in its + // c'tor and d'tor. Therefore it doesn't + // need to be used otherwise. + // Causes a trace (including the source file path, the current line // number, and the given message) to be included in every test failure // message generated by code in the current scope. The effect is @@ -2112,9 +2204,14 @@ GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2, // of the dummy variable name, thus allowing multiple SCOPED_TRACE()s // to appear in the same block - as long as they are on different // lines. +// +// Assuming that each thread maintains its own stack of traces. +// Therefore, a SCOPED_TRACE() would (correctly) only affect the +// assertions in its own thread. #define SCOPED_TRACE(message) \ - ::testing::internal::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\ - __FILE__, __LINE__, ::testing::Message() << (message)) + ::testing::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\ + __FILE__, __LINE__, (message)) + // Compile-time assertion for type equality. // StaticAssertTypeEq() compiles iff type1 and type2 are @@ -2194,7 +2291,7 @@ bool StaticAssertTypeEq() { // name of the test within the test case. // // A test fixture class must be declared earlier. The user should put -// his test code between braces after using this macro. Example: +// the test code between braces after using this macro. Example: // // class FooTest : public testing::Test { // protected: @@ -2209,14 +2306,22 @@ bool StaticAssertTypeEq() { // } // // TEST_F(FooTest, ReturnsElementCountCorrectly) { -// EXPECT_EQ(0, a_.size()); -// EXPECT_EQ(1, b_.size()); +// EXPECT_EQ(a_.size(), 0); +// EXPECT_EQ(b_.size(), 1); // } #define TEST_F(test_fixture, test_name)\ GTEST_TEST_(test_fixture, test_name, test_fixture, \ ::testing::internal::GetTypeId()) +// Returns a path to temporary directory. +// Tries to determine an appropriate directory for the platform. +GTEST_API_ std::string TempDir(); + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + } // namespace testing // Use this function in main() to run all tests. It returns 0 if all @@ -2233,4 +2338,6 @@ inline int RUN_ALL_TESTS() { return ::testing::UnitTest::GetInstance()->Run(); } +GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 + #endif // GTEST_INCLUDE_GTEST_GTEST_H_ diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_pred_impl.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_pred_impl.h index 30ae712f50ed..0c1105cb8eb2 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_pred_impl.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_pred_impl.h @@ -27,18 +27,19 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// This file is AUTOMATICALLY GENERATED on 10/31/2011 by command +// This file is AUTOMATICALLY GENERATED on 01/02/2018 by command // 'gen_gtest_pred_impl.py 5'. DO NOT EDIT BY HAND! // // Implements a family of generic predicate assertion macros. +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ #define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ -// Makes sure this header is not included before gtest.h. -#ifndef GTEST_INCLUDE_GTEST_GTEST_H_ -# error Do not include gtest_pred_impl.h directly. Include gtest.h instead. -#endif // GTEST_INCLUDE_GTEST_GTEST_H_ +#include "gtest/gtest.h" + +namespace testing { // This header implements a family of generic predicate assertion // macros: @@ -66,8 +67,6 @@ // We also define the EXPECT_* variations. // // For now we only support predicates whose arity is at most 5. -// Please email googletestframework@googlegroups.com if you need -// support for higher arities. // GTEST_ASSERT_ is the basic statement to which all of the assertions // in this file reduce. Don't use this in your code. @@ -355,4 +354,6 @@ AssertionResult AssertPred5Helper(const char* pred_text, +} // namespace testing + #endif // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_prod.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_prod.h index da80ddc6c70e..e651671ebde8 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_prod.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/gtest_prod.h @@ -26,10 +26,10 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // -// Author: wan@google.com (Zhanyong Wan) -// -// Google C++ Testing Framework definitions useful in production code. +// Google C++ Testing and Mocking Framework definitions useful in production code. +// GOOGLETEST_CM0003 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_ #define GTEST_INCLUDE_GTEST_GTEST_PROD_H_ @@ -40,17 +40,20 @@ // // class MyClass { // private: -// void MyMethod(); -// FRIEND_TEST(MyClassTest, MyMethod); +// void PrivateMethod(); +// FRIEND_TEST(MyClassTest, PrivateMethodWorks); // }; // // class MyClassTest : public testing::Test { // // ... // }; // -// TEST_F(MyClassTest, MyMethod) { -// // Can call MyClass::MyMethod() here. +// TEST_F(MyClassTest, PrivateMethodWorks) { +// // Can call MyClass::PrivateMethod() here. // } +// +// Note: The test class must be in the same namespace as the class being tested. +// For example, putting MyClassTest in an anonymous namespace will not work. #define FRIEND_TEST(test_case_name, test_name)\ friend class test_case_name##_##test_name##_Test diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/README.md b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/README.md new file mode 100644 index 000000000000..ff391fb4e2b5 --- /dev/null +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/README.md @@ -0,0 +1,56 @@ +# Customization Points + +The custom directory is an injection point for custom user configurations. + +## Header `gtest.h` + +### The following macros can be defined: + +* `GTEST_OS_STACK_TRACE_GETTER_` - The name of an implementation of + `OsStackTraceGetterInterface`. +* `GTEST_CUSTOM_TEMPDIR_FUNCTION_` - An override for `testing::TempDir()`. See + `testing::TempDir` for semantics and signature. + +## Header `gtest-port.h` + +The following macros can be defined: + +### Flag related macros: + +* `GTEST_FLAG(flag_name)` +* `GTEST_USE_OWN_FLAGFILE_FLAG_` - Define to 0 when the system provides its + own flagfile flag parsing. +* `GTEST_DECLARE_bool_(name)` +* `GTEST_DECLARE_int32_(name)` +* `GTEST_DECLARE_string_(name)` +* `GTEST_DEFINE_bool_(name, default_val, doc)` +* `GTEST_DEFINE_int32_(name, default_val, doc)` +* `GTEST_DEFINE_string_(name, default_val, doc)` + +### Logging: + +* `GTEST_LOG_(severity)` +* `GTEST_CHECK_(condition)` +* Functions `LogToStderr()` and `FlushInfoLog()` have to be provided too. + +### Threading: + +* `GTEST_HAS_NOTIFICATION_` - Enabled if Notification is already provided. +* `GTEST_HAS_MUTEX_AND_THREAD_LOCAL_` - Enabled if `Mutex` and `ThreadLocal` + are already provided. Must also provide `GTEST_DECLARE_STATIC_MUTEX_(mutex)` + and `GTEST_DEFINE_STATIC_MUTEX_(mutex)` +* `GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)` +* `GTEST_LOCK_EXCLUDED_(locks)` + +### Underlying library support features + +* `GTEST_HAS_CXXABI_H_` + +### Exporting API symbols: + +* `GTEST_API_` - Specifier for exported symbols. + +## Header `gtest-printers.h` + +* See documentation at `gtest/gtest-printers.h` for details on how to define a + custom printer. diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-port.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-port.h index 7e744bd3bb38..cd85d956d2dc 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-port.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-port.h @@ -27,39 +27,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Injection point for custom user configurations. -// The following macros can be defined: -// -// Flag related macros: -// GTEST_FLAG(flag_name) -// GTEST_USE_OWN_FLAGFILE_FLAG_ - Define to 0 when the system provides its -// own flagfile flag parsing. -// GTEST_DECLARE_bool_(name) -// GTEST_DECLARE_int32_(name) -// GTEST_DECLARE_string_(name) -// GTEST_DEFINE_bool_(name, default_val, doc) -// GTEST_DEFINE_int32_(name, default_val, doc) -// GTEST_DEFINE_string_(name, default_val, doc) -// -// Test filtering: -// GTEST_TEST_FILTER_ENV_VAR_ - The name of an environment variable that -// will be used if --GTEST_FLAG(test_filter) -// is not provided. -// -// Logging: -// GTEST_LOG_(severity) -// GTEST_CHECK_(condition) -// Functions LogToStderr() and FlushInfoLog() have to be provided too. -// -// Threading: -// GTEST_HAS_NOTIFICATION_ - Enabled if Notification is already provided. -// GTEST_HAS_MUTEX_AND_THREAD_LOCAL_ - Enabled if Mutex and ThreadLocal are -// already provided. -// Must also provide GTEST_DECLARE_STATIC_MUTEX_(mutex) and -// GTEST_DEFINE_STATIC_MUTEX_(mutex) -// -// GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks) -// GTEST_LOCK_EXCLUDED_(locks) +// Injection point for custom user configurations. See README for details // // ** Custom implementation starts here ** diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-printers.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-printers.h index 60c1ea050b61..eb4467abcabe 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-printers.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest-printers.h @@ -31,8 +31,8 @@ // installation of gTest. // It will be included from gtest-printers.h and the overrides in this file // will be visible to everyone. -// See documentation at gtest/gtest-printers.h for details on how to define a -// custom printer. +// +// Injection point for custom user configurations. See README for details // // ** Custom implementation starts here ** diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest.h index c27412a89818..4c8e07be23f1 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/custom/gtest.h @@ -27,11 +27,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Injection point for custom user configurations. -// The following macros can be defined: -// -// GTEST_OS_STACK_TRACE_GETTER_ - The name of an implementation of -// OsStackTraceGetterInterface. +// Injection point for custom user configurations. See README for details // // ** Custom implementation starts here ** diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-death-test-internal.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-death-test-internal.h index 2b3a78f5bf86..0a9b42c8a572 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-death-test-internal.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-death-test-internal.h @@ -27,12 +27,11 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) -// -// The Google C++ Testing Framework (Google Test) +// The Google C++ Testing and Mocking Framework (Google Test) // // This header file defines internal utilities needed for implementing // death tests. They are subject to change without notice. +// GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ @@ -53,6 +52,9 @@ const char kInternalRunDeathTestFlag[] = "internal_run_death_test"; #if GTEST_HAS_DEATH_TEST +GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ +/* class A needs to have dll-interface to be used by clients of class B */) + // DeathTest is a class that hides much of the complexity of the // GTEST_DEATH_TEST_ macro. It is abstract; its static Create method // returns a concrete class that depends on the prevailing death test @@ -136,6 +138,8 @@ class GTEST_API_ DeathTest { GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest); }; +GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 + // Factory interface for death tests. May be mocked out for testing. class DeathTestFactory { public: @@ -218,14 +222,18 @@ GTEST_API_ bool ExitedUnsuccessfully(int exit_status); // can be streamed. // This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in -// NDEBUG mode. In this case we need the statements to be executed, the regex is -// ignored, and the macro must accept a streamed message even though the message -// is never printed. -# define GTEST_EXECUTE_STATEMENT_(statement, regex) \ - GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ - if (::testing::internal::AlwaysTrue()) { \ - GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ - } else \ +// NDEBUG mode. In this case we need the statements to be executed and the macro +// must accept a streamed message even though the message is never printed. +// The regex object is not evaluated, but it is used to prevent "unused" +// warnings and to avoid an expression that doesn't compile in debug mode. +#define GTEST_EXECUTE_STATEMENT_(statement, regex) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::AlwaysTrue()) { \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + } else if (!::testing::internal::AlwaysTrue()) { \ + const ::testing::internal::RE& gtest_regex = (regex); \ + static_cast(gtest_regex); \ + } else \ ::testing::Message() // A class representing the parsed contents of the @@ -264,53 +272,6 @@ class InternalRunDeathTestFlag { // the flag is specified; otherwise returns NULL. InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag(); -#else // GTEST_HAS_DEATH_TEST - -// This macro is used for implementing macros such as -// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where -// death tests are not supported. Those macros must compile on such systems -// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on -// systems that support death tests. This allows one to write such a macro -// on a system that does not support death tests and be sure that it will -// compile on a death-test supporting system. -// -// Parameters: -// statement - A statement that a macro such as EXPECT_DEATH would test -// for program termination. This macro has to make sure this -// statement is compiled but not executed, to ensure that -// EXPECT_DEATH_IF_SUPPORTED compiles with a certain -// parameter iff EXPECT_DEATH compiles with it. -// regex - A regex that a macro such as EXPECT_DEATH would use to test -// the output of statement. This parameter has to be -// compiled but not evaluated by this macro, to ensure that -// this macro only accepts expressions that a macro such as -// EXPECT_DEATH would accept. -// terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED -// and a return statement for ASSERT_DEATH_IF_SUPPORTED. -// This ensures that ASSERT_DEATH_IF_SUPPORTED will not -// compile inside functions where ASSERT_DEATH doesn't -// compile. -// -// The branch that has an always false condition is used to ensure that -// statement and regex are compiled (and thus syntactically correct) but -// never executed. The unreachable code macro protects the terminator -// statement from generating an 'unreachable code' warning in case -// statement unconditionally returns or throws. The Message constructor at -// the end allows the syntax of streaming additional messages into the -// macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH. -# define GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, terminator) \ - GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ - if (::testing::internal::AlwaysTrue()) { \ - GTEST_LOG_(WARNING) \ - << "Death tests are not supported on this platform.\n" \ - << "Statement '" #statement "' cannot be verified."; \ - } else if (::testing::internal::AlwaysFalse()) { \ - ::testing::internal::RE::PartialMatch(".*", (regex)); \ - GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ - terminator; \ - } else \ - ::testing::Message() - #endif // GTEST_HAS_DEATH_TEST } // namespace internal diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-filepath.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-filepath.h index 7a13b4b0de60..ae38d95bf844 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-filepath.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-filepath.h @@ -27,21 +27,24 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Author: keith.ray@gmail.com (Keith Ray) -// // Google Test filepath utilities // // This header file declares classes and functions used internally by // Google Test. They are subject to change without notice. // -// This file is #included in . +// This file is #included in gtest/internal/gtest-internal.h. // Do not include this header file separately! +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ #include "gtest/internal/gtest-string.h" +GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ +/* class A needs to have dll-interface to be used by clients of class B */) + namespace testing { namespace internal { @@ -203,4 +206,6 @@ class GTEST_API_ FilePath { } // namespace internal } // namespace testing +GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 + #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-internal.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-internal.h index ebd1cf615de0..b762f61fc53c 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-internal.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-internal.h @@ -27,13 +27,13 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) -// -// The Google C++ Testing Framework (Google Test) +// The Google C++ Testing and Mocking Framework (Google Test) // // This header file declares functions and macros used internally by // Google Test. They are subject to change without notice. +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ @@ -61,8 +61,8 @@ #include #include "gtest/gtest-message.h" -#include "gtest/internal/gtest-string.h" #include "gtest/internal/gtest-filepath.h" +#include "gtest/internal/gtest-string.h" #include "gtest/internal/gtest-type-util.h" // Due to C++ preprocessor weirdness, we need double indirection to @@ -76,6 +76,9 @@ #define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar) #define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar +// Stringifies its argument. +#define GTEST_STRINGIFY_(name) #name + class ProtocolMessage; namespace proto2 { class Message; } @@ -96,7 +99,6 @@ template namespace internal { struct TraceInfo; // Information about a trace point. -class ScopedTrace; // Implements scoped trace. class TestInfoImpl; // Opaque implementation of TestInfo class UnitTestImpl; // Opaque implementation of UnitTest @@ -139,6 +141,9 @@ GTEST_API_ std::string AppendUserMessage( #if GTEST_HAS_EXCEPTIONS +GTEST_DISABLE_MSC_WARNINGS_PUSH_(4275 \ +/* an exported class was derived from a class that was not exported */) + // This exception is thrown by (and only by) a failed Google Test // assertion when GTEST_FLAG(throw_on_failure) is true (if exceptions // are enabled). We derive it from std::runtime_error, which is for @@ -150,32 +155,15 @@ class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error { explicit GoogleTestFailureException(const TestPartResult& failure); }; +GTEST_DISABLE_MSC_WARNINGS_POP_() // 4275 + #endif // GTEST_HAS_EXCEPTIONS -// A helper class for creating scoped traces in user programs. -class GTEST_API_ ScopedTrace { - public: - // The c'tor pushes the given source file location and message onto - // a trace stack maintained by Google Test. - ScopedTrace(const char* file, int line, const Message& message); - - // The d'tor pops the info pushed by the c'tor. - // - // Note that the d'tor is not virtual in order to be efficient. - // Don't inherit from ScopedTrace! - ~ScopedTrace(); - - private: - GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace); -} GTEST_ATTRIBUTE_UNUSED_; // A ScopedTrace object does its job in its - // c'tor and d'tor. Therefore it doesn't - // need to be used otherwise. - namespace edit_distance { // Returns the optimal edits to go from 'left' to 'right'. // All edits cost the same, with replace having lower priority than // add/remove. -// Simple implementation of the Wagner–Fischer algorithm. +// Simple implementation of the Wagner-Fischer algorithm. // See http://en.wikipedia.org/wiki/Wagner-Fischer_algorithm enum EditType { kMatch, kAdd, kRemove, kReplace }; GTEST_API_ std::vector CalculateOptimalEdits( @@ -502,9 +490,10 @@ typedef void (*SetUpTestCaseFunc)(); typedef void (*TearDownTestCaseFunc)(); struct CodeLocation { - CodeLocation(const string& a_file, int a_line) : file(a_file), line(a_line) {} + CodeLocation(const std::string& a_file, int a_line) + : file(a_file), line(a_line) {} - string file; + std::string file; int line; }; @@ -544,6 +533,9 @@ GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr); #if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P +GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ +/* class A needs to have dll-interface to be used by clients of class B */) + // State of the definition of a type-parameterized test case. class GTEST_API_ TypedTestCasePState { public: @@ -589,6 +581,8 @@ class GTEST_API_ TypedTestCasePState { RegisteredTestsMap registered_tests_; }; +GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 + // Skips to the first non-space char after the first comma in 'str'; // returns NULL if no comma is found in 'str'. inline const char* SkipComma(const char* str) { @@ -612,6 +606,37 @@ inline std::string GetPrefixUntilComma(const char* str) { void SplitString(const ::std::string& str, char delimiter, ::std::vector< ::std::string>* dest); +// The default argument to the template below for the case when the user does +// not provide a name generator. +struct DefaultNameGenerator { + template + static std::string GetName(int i) { + return StreamableToString(i); + } +}; + +template +struct NameGeneratorSelector { + typedef Provided type; +}; + +template +void GenerateNamesRecursively(Types0, std::vector*, int) {} + +template +void GenerateNamesRecursively(Types, std::vector* result, int i) { + result->push_back(NameGenerator::template GetName(i)); + GenerateNamesRecursively(typename Types::Tail(), result, + i + 1); +} + +template +std::vector GenerateNames() { + std::vector result; + GenerateNamesRecursively(Types(), &result, 0); + return result; +} + // TypeParameterizedTest::Register() // registers a list of type-parameterized tests with Google Test. The // return value is insignificant - we just need to return something @@ -626,10 +651,10 @@ class TypeParameterizedTest { // specified in INSTANTIATE_TYPED_TEST_CASE_P(Prefix, TestCase, // Types). Valid values for 'index' are [0, N - 1] where N is the // length of Types. - static bool Register(const char* prefix, - CodeLocation code_location, - const char* case_name, const char* test_names, - int index) { + static bool Register(const char* prefix, const CodeLocation& code_location, + const char* case_name, const char* test_names, int index, + const std::vector& type_names = + GenerateNames()) { typedef typename Types::Head Type; typedef Fixture FixtureClass; typedef typename GTEST_BIND_(TestSel, Type) TestClass; @@ -637,20 +662,23 @@ class TypeParameterizedTest { // First, registers the first type-parameterized test in the type // list. MakeAndRegisterTestInfo( - (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name + "/" - + StreamableToString(index)).c_str(), + (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name + + "/" + type_names[index]) + .c_str(), StripTrailingSpaces(GetPrefixUntilComma(test_names)).c_str(), GetTypeName().c_str(), NULL, // No value parameter. - code_location, - GetTypeId(), - TestClass::SetUpTestCase, - TestClass::TearDownTestCase, - new TestFactoryImpl); + code_location, GetTypeId(), TestClass::SetUpTestCase, + TestClass::TearDownTestCase, new TestFactoryImpl); // Next, recurses (at compile time) with the tail of the type list. - return TypeParameterizedTest - ::Register(prefix, code_location, case_name, test_names, index + 1); + return TypeParameterizedTest::Register(prefix, + code_location, + case_name, + test_names, + index + 1, + type_names); } }; @@ -658,9 +686,11 @@ class TypeParameterizedTest { template class TypeParameterizedTest { public: - static bool Register(const char* /*prefix*/, CodeLocation, + static bool Register(const char* /*prefix*/, const CodeLocation&, const char* /*case_name*/, const char* /*test_names*/, - int /*index*/) { + int /*index*/, + const std::vector& = + std::vector() /*type_names*/) { return true; } }; @@ -673,8 +703,10 @@ template class TypeParameterizedTestCase { public: static bool Register(const char* prefix, CodeLocation code_location, - const TypedTestCasePState* state, - const char* case_name, const char* test_names) { + const TypedTestCasePState* state, const char* case_name, + const char* test_names, + const std::vector& type_names = + GenerateNames()) { std::string test_name = StripTrailingSpaces( GetPrefixUntilComma(test_names)); if (!state->TestExists(test_name)) { @@ -691,12 +723,14 @@ class TypeParameterizedTestCase { // First, register the first test in 'Test' for each type in 'Types'. TypeParameterizedTest::Register( - prefix, test_location, case_name, test_names, 0); + prefix, test_location, case_name, test_names, 0, type_names); // Next, recurses (at compile time) with the tail of the test list. - return TypeParameterizedTestCase - ::Register(prefix, code_location, state, - case_name, SkipComma(test_names)); + return TypeParameterizedTestCase::Register(prefix, code_location, + state, case_name, + SkipComma(test_names), + type_names); } }; @@ -704,9 +738,11 @@ class TypeParameterizedTestCase { template class TypeParameterizedTestCase { public: - static bool Register(const char* /*prefix*/, CodeLocation, + static bool Register(const char* /*prefix*/, const CodeLocation&, const TypedTestCasePState* /*state*/, - const char* /*case_name*/, const char* /*test_names*/) { + const char* /*case_name*/, const char* /*test_names*/, + const std::vector& = + std::vector() /*type_names*/) { return true; } }; @@ -823,31 +859,6 @@ struct RemoveConst { #define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \ GTEST_REMOVE_CONST_(GTEST_REMOVE_REFERENCE_(T)) -// Adds reference to a type if it is not a reference type, -// otherwise leaves it unchanged. This is the same as -// tr1::add_reference, which is not widely available yet. -template -struct AddReference { typedef T& type; }; // NOLINT -template -struct AddReference { typedef T& type; }; // NOLINT - -// A handy wrapper around AddReference that works when the argument T -// depends on template parameters. -#define GTEST_ADD_REFERENCE_(T) \ - typename ::testing::internal::AddReference::type - -// Adds a reference to const on top of T as necessary. For example, -// it transforms -// -// char ==> const char& -// const char ==> const char& -// char& ==> const char& -// const char& ==> const char& -// -// The argument T must depend on some template parameters. -#define GTEST_REFERENCE_TO_CONST_(T) \ - GTEST_ADD_REFERENCE_(const GTEST_REMOVE_REFERENCE_(T)) - // ImplicitlyConvertible::value is a compile-time bool // constant that's true iff type From can be implicitly converted to // type To. @@ -917,8 +928,11 @@ struct IsAProtocolMessage // a container class by checking the type of IsContainerTest(0). // The value of the expression is insignificant. // -// Note that we look for both C::iterator and C::const_iterator. The -// reason is that C++ injects the name of a class as a member of the +// In C++11 mode we check the existence of a const_iterator and that an +// iterator is properly implemented for the container. +// +// For pre-C++11 that we look for both C::iterator and C::const_iterator. +// The reason is that C++ injects the name of a class as a member of the // class itself (e.g. you can refer to class iterator as either // 'iterator' or 'iterator::iterator'). If we look for C::iterator // only, for example, we would mistakenly think that a class named @@ -928,17 +942,96 @@ struct IsAProtocolMessage // IsContainerTest(typename C::const_iterator*) and // IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++. typedef int IsContainer; +#if GTEST_LANG_CXX11 +template ().begin()), + class = decltype(::std::declval().end()), + class = decltype(++::std::declval()), + class = decltype(*::std::declval()), + class = typename C::const_iterator> +IsContainer IsContainerTest(int /* dummy */) { + return 0; +} +#else template IsContainer IsContainerTest(int /* dummy */, typename C::iterator* /* it */ = NULL, typename C::const_iterator* /* const_it */ = NULL) { return 0; } +#endif // GTEST_LANG_CXX11 typedef char IsNotContainer; template IsNotContainer IsContainerTest(long /* dummy */) { return '\0'; } +// Trait to detect whether a type T is a hash table. +// The heuristic used is that the type contains an inner type `hasher` and does +// not contain an inner type `reverse_iterator`. +// If the container is iterable in reverse, then order might actually matter. +template +struct IsHashTable { + private: + template + static char test(typename U::hasher*, typename U::reverse_iterator*); + template + static int test(typename U::hasher*, ...); + template + static char test(...); + + public: + static const bool value = sizeof(test(0, 0)) == sizeof(int); +}; + +template +const bool IsHashTable::value; + +template +struct VoidT { + typedef void value_type; +}; + +template +struct HasValueType : false_type {}; +template +struct HasValueType > : true_type { +}; + +template (0)) == sizeof(IsContainer), + bool = HasValueType::value> +struct IsRecursiveContainerImpl; + +template +struct IsRecursiveContainerImpl : public false_type {}; + +// Since the IsRecursiveContainerImpl depends on the IsContainerTest we need to +// obey the same inconsistencies as the IsContainerTest, namely check if +// something is a container is relying on only const_iterator in C++11 and +// is relying on both const_iterator and iterator otherwise +template +struct IsRecursiveContainerImpl : public false_type {}; + +template +struct IsRecursiveContainerImpl { + #if GTEST_LANG_CXX11 + typedef typename IteratorTraits::value_type + value_type; +#else + typedef typename IteratorTraits::value_type value_type; +#endif + typedef is_same type; +}; + +// IsRecursiveContainer is a unary compile-time predicate that +// evaluates whether C is a recursive container type. A recursive container +// type is a container type whose value_type is equal to the container type +// itself. An example for a recursive container type is +// boost::filesystem::path, whose iterator has a value_type that is equal to +// boost::filesystem::path. +template +struct IsRecursiveContainer : public IsRecursiveContainerImpl::type {}; + // EnableIf::type is void when 'Cond' is true, and // undefined when 'Cond' is false. To use SFINAE to make a function // overload only apply when a particular expression is true, add @@ -1070,7 +1163,7 @@ class NativeArray { private: enum { kCheckTypeIsNotConstOrAReference = StaticAssertTypeEqHelper< - Element, GTEST_REMOVE_REFERENCE_AND_CONST_(Element)>::value, + Element, GTEST_REMOVE_REFERENCE_AND_CONST_(Element)>::value }; // Initializes this object with a copy of the input. @@ -1115,7 +1208,7 @@ class NativeArray { #define GTEST_SUCCESS_(message) \ GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess) -// Suppresses MSVC warnings 4072 (unreachable code) for the code following +// Suppress MSVC warning 4702 (unreachable code) for the code following // statement if it returns or throws (or doesn't return or throw in some // situations). #define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \ @@ -1235,4 +1328,3 @@ class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\ void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ - diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-linked_ptr.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-linked_ptr.h index 36029422174f..082b87289ae1 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-linked_ptr.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-linked_ptr.h @@ -27,8 +27,6 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Authors: Dan Egnor (egnor@google.com) -// // A "smart" pointer type with reference tracking. Every pointer to a // particular object is kept on a circular linked list. When the last pointer // to an object is destroyed or reassigned, the object is deleted. @@ -62,9 +60,11 @@ // raw pointer (e.g. via get()) concurrently, and // - it's safe to write to two linked_ptrs that point to the same // shared object concurrently. -// TODO(wan@google.com): rename this to safe_linked_ptr to avoid +// FIXME: rename this to safe_linked_ptr to avoid // confusion with normal linked_ptr. +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_ diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util-generated.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util-generated.h index 4d1d81d20ffc..4fac8c02703d 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util-generated.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util-generated.h @@ -30,8 +30,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: vladl@google.com (Vlad Losev) + // Type and function utilities for implementing parameterized tests. // This file is generated by a SCRIPT. DO NOT EDIT BY HAND! @@ -43,17 +42,14 @@ // by the maximum arity of the implementation of tuple which is // currently set at 10. +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ -// scripts/fuse_gtest.py depends on gtest's own header being #included -// *unconditionally*. Therefore these #includes cannot be moved -// inside #if GTEST_HAS_PARAM_TEST. #include "gtest/internal/gtest-param-util.h" #include "gtest/internal/gtest-port.h" -#if GTEST_HAS_PARAM_TEST - namespace testing { // Forward declarations of ValuesIn(), which is implemented in @@ -84,6 +80,8 @@ class ValueArray1 { return ValuesIn(array); } + ValueArray1(const ValueArray1& other) : v1_(other.v1_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray1& other); @@ -102,6 +100,8 @@ class ValueArray2 { return ValuesIn(array); } + ValueArray2(const ValueArray2& other) : v1_(other.v1_), v2_(other.v2_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray2& other); @@ -122,6 +122,9 @@ class ValueArray3 { return ValuesIn(array); } + ValueArray3(const ValueArray3& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray3& other); @@ -144,6 +147,9 @@ class ValueArray4 { return ValuesIn(array); } + ValueArray4(const ValueArray4& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray4& other); @@ -167,6 +173,9 @@ class ValueArray5 { return ValuesIn(array); } + ValueArray5(const ValueArray5& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray5& other); @@ -193,6 +202,9 @@ class ValueArray6 { return ValuesIn(array); } + ValueArray6(const ValueArray6& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray6& other); @@ -220,6 +232,10 @@ class ValueArray7 { return ValuesIn(array); } + ValueArray7(const ValueArray7& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray7& other); @@ -249,6 +265,10 @@ class ValueArray8 { return ValuesIn(array); } + ValueArray8(const ValueArray8& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray8& other); @@ -280,6 +300,10 @@ class ValueArray9 { return ValuesIn(array); } + ValueArray9(const ValueArray9& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray9& other); @@ -312,6 +336,10 @@ class ValueArray10 { return ValuesIn(array); } + ValueArray10(const ValueArray10& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray10& other); @@ -346,6 +374,11 @@ class ValueArray11 { return ValuesIn(array); } + ValueArray11(const ValueArray11& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray11& other); @@ -382,6 +415,11 @@ class ValueArray12 { return ValuesIn(array); } + ValueArray12(const ValueArray12& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray12& other); @@ -420,6 +458,11 @@ class ValueArray13 { return ValuesIn(array); } + ValueArray13(const ValueArray13& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray13& other); @@ -459,6 +502,11 @@ class ValueArray14 { return ValuesIn(array); } + ValueArray14(const ValueArray14& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray14& other); @@ -500,6 +548,12 @@ class ValueArray15 { return ValuesIn(array); } + ValueArray15(const ValueArray15& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray15& other); @@ -544,6 +598,12 @@ class ValueArray16 { return ValuesIn(array); } + ValueArray16(const ValueArray16& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray16& other); @@ -589,6 +649,12 @@ class ValueArray17 { return ValuesIn(array); } + ValueArray17(const ValueArray17& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray17& other); @@ -636,6 +702,12 @@ class ValueArray18 { return ValuesIn(array); } + ValueArray18(const ValueArray18& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray18& other); @@ -684,6 +756,13 @@ class ValueArray19 { return ValuesIn(array); } + ValueArray19(const ValueArray19& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray19& other); @@ -734,6 +813,13 @@ class ValueArray20 { return ValuesIn(array); } + ValueArray20(const ValueArray20& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray20& other); @@ -787,6 +873,13 @@ class ValueArray21 { return ValuesIn(array); } + ValueArray21(const ValueArray21& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray21& other); @@ -841,6 +934,13 @@ class ValueArray22 { return ValuesIn(array); } + ValueArray22(const ValueArray22& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray22& other); @@ -897,6 +997,14 @@ class ValueArray23 { return ValuesIn(array); } + ValueArray23(const ValueArray23& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray23& other); @@ -955,6 +1063,14 @@ class ValueArray24 { return ValuesIn(array); } + ValueArray24(const ValueArray24& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray24& other); @@ -1014,6 +1130,14 @@ class ValueArray25 { return ValuesIn(array); } + ValueArray25(const ValueArray25& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray25& other); @@ -1075,6 +1199,14 @@ class ValueArray26 { return ValuesIn(array); } + ValueArray26(const ValueArray26& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray26& other); @@ -1139,6 +1271,15 @@ class ValueArray27 { return ValuesIn(array); } + ValueArray27(const ValueArray27& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray27& other); @@ -1204,6 +1345,15 @@ class ValueArray28 { return ValuesIn(array); } + ValueArray28(const ValueArray28& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray28& other); @@ -1270,6 +1420,15 @@ class ValueArray29 { return ValuesIn(array); } + ValueArray29(const ValueArray29& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray29& other); @@ -1339,6 +1498,15 @@ class ValueArray30 { return ValuesIn(array); } + ValueArray30(const ValueArray30& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray30& other); @@ -1410,6 +1578,16 @@ class ValueArray31 { return ValuesIn(array); } + ValueArray31(const ValueArray31& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray31& other); @@ -1482,6 +1660,16 @@ class ValueArray32 { return ValuesIn(array); } + ValueArray32(const ValueArray32& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray32& other); @@ -1557,6 +1745,16 @@ class ValueArray33 { return ValuesIn(array); } + ValueArray33(const ValueArray33& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray33& other); @@ -1633,6 +1831,16 @@ class ValueArray34 { return ValuesIn(array); } + ValueArray34(const ValueArray34& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray34& other); @@ -1710,6 +1918,17 @@ class ValueArray35 { return ValuesIn(array); } + ValueArray35(const ValueArray35& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray35& other); @@ -1790,6 +2009,17 @@ class ValueArray36 { return ValuesIn(array); } + ValueArray36(const ValueArray36& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray36& other); @@ -1872,6 +2102,17 @@ class ValueArray37 { return ValuesIn(array); } + ValueArray37(const ValueArray37& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_), v37_(other.v37_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray37& other); @@ -1955,6 +2196,17 @@ class ValueArray38 { return ValuesIn(array); } + ValueArray38(const ValueArray38& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray38& other); @@ -2040,6 +2292,18 @@ class ValueArray39 { return ValuesIn(array); } + ValueArray39(const ValueArray39& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), + v39_(other.v39_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray39& other); @@ -2127,6 +2391,18 @@ class ValueArray40 { return ValuesIn(array); } + ValueArray40(const ValueArray40& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), + v39_(other.v39_), v40_(other.v40_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray40& other); @@ -2216,6 +2492,18 @@ class ValueArray41 { return ValuesIn(array); } + ValueArray41(const ValueArray41& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), + v39_(other.v39_), v40_(other.v40_), v41_(other.v41_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray41& other); @@ -2307,6 +2595,18 @@ class ValueArray42 { return ValuesIn(array); } + ValueArray42(const ValueArray42& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), + v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray42& other); @@ -2399,6 +2699,19 @@ class ValueArray43 { return ValuesIn(array); } + ValueArray43(const ValueArray43& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), + v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), + v43_(other.v43_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray43& other); @@ -2493,6 +2806,19 @@ class ValueArray44 { return ValuesIn(array); } + ValueArray44(const ValueArray44& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), + v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), + v43_(other.v43_), v44_(other.v44_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray44& other); @@ -2589,6 +2915,19 @@ class ValueArray45 { return ValuesIn(array); } + ValueArray45(const ValueArray45& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), + v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), + v43_(other.v43_), v44_(other.v44_), v45_(other.v45_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray45& other); @@ -2687,6 +3026,19 @@ class ValueArray46 { return ValuesIn(array); } + ValueArray46(const ValueArray46& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), + v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), + v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray46& other); @@ -2787,6 +3139,20 @@ class ValueArray47 { return ValuesIn(array); } + ValueArray47(const ValueArray47& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), + v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), + v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_), + v47_(other.v47_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray47& other); @@ -2889,6 +3255,20 @@ class ValueArray48 { return ValuesIn(array); } + ValueArray48(const ValueArray48& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), + v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), + v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_), + v47_(other.v47_), v48_(other.v48_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray48& other); @@ -2992,6 +3372,20 @@ class ValueArray49 { return ValuesIn(array); } + ValueArray49(const ValueArray49& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), + v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), + v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_), + v47_(other.v47_), v48_(other.v48_), v49_(other.v49_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray49& other); @@ -3096,6 +3490,20 @@ class ValueArray50 { return ValuesIn(array); } + ValueArray50(const ValueArray50& other) : v1_(other.v1_), v2_(other.v2_), + v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), + v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), + v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), + v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), + v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), + v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), + v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), + v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), + v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), + v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), + v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_), + v47_(other.v47_), v48_(other.v48_), v49_(other.v49_), v50_(other.v50_) {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray50& other); @@ -3208,7 +3616,7 @@ class CartesianProductGenerator2 virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } - virtual const ParamType* Current() const { return ¤t_value_; } + virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. @@ -3240,7 +3648,7 @@ class CartesianProductGenerator2 void ComputeCurrentValue() { if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_); + current_value_.reset(new ParamType(*current1_, *current2_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the @@ -3262,7 +3670,7 @@ class CartesianProductGenerator2 const typename ParamGenerator::iterator begin2_; const typename ParamGenerator::iterator end2_; typename ParamGenerator::iterator current2_; - ParamType current_value_; + linked_ptr current_value_; }; // class CartesianProductGenerator2::Iterator // No implementation - assignment is unsupported. @@ -3331,7 +3739,7 @@ class CartesianProductGenerator3 virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } - virtual const ParamType* Current() const { return ¤t_value_; } + virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. @@ -3367,7 +3775,7 @@ class CartesianProductGenerator3 void ComputeCurrentValue() { if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_); + current_value_.reset(new ParamType(*current1_, *current2_, *current3_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the @@ -3393,7 +3801,7 @@ class CartesianProductGenerator3 const typename ParamGenerator::iterator begin3_; const typename ParamGenerator::iterator end3_; typename ParamGenerator::iterator current3_; - ParamType current_value_; + linked_ptr current_value_; }; // class CartesianProductGenerator3::Iterator // No implementation - assignment is unsupported. @@ -3472,7 +3880,7 @@ class CartesianProductGenerator4 virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } - virtual const ParamType* Current() const { return ¤t_value_; } + virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. @@ -3512,8 +3920,8 @@ class CartesianProductGenerator4 void ComputeCurrentValue() { if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_, - *current4_); + current_value_.reset(new ParamType(*current1_, *current2_, *current3_, + *current4_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the @@ -3543,7 +3951,7 @@ class CartesianProductGenerator4 const typename ParamGenerator::iterator begin4_; const typename ParamGenerator::iterator end4_; typename ParamGenerator::iterator current4_; - ParamType current_value_; + linked_ptr current_value_; }; // class CartesianProductGenerator4::Iterator // No implementation - assignment is unsupported. @@ -3630,7 +4038,7 @@ class CartesianProductGenerator5 virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } - virtual const ParamType* Current() const { return ¤t_value_; } + virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. @@ -3674,8 +4082,8 @@ class CartesianProductGenerator5 void ComputeCurrentValue() { if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_, - *current4_, *current5_); + current_value_.reset(new ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the @@ -3709,7 +4117,7 @@ class CartesianProductGenerator5 const typename ParamGenerator::iterator begin5_; const typename ParamGenerator::iterator end5_; typename ParamGenerator::iterator current5_; - ParamType current_value_; + linked_ptr current_value_; }; // class CartesianProductGenerator5::Iterator // No implementation - assignment is unsupported. @@ -3807,7 +4215,7 @@ class CartesianProductGenerator6 virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } - virtual const ParamType* Current() const { return ¤t_value_; } + virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. @@ -3855,8 +4263,8 @@ class CartesianProductGenerator6 void ComputeCurrentValue() { if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_, - *current4_, *current5_, *current6_); + current_value_.reset(new ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_, *current6_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the @@ -3894,7 +4302,7 @@ class CartesianProductGenerator6 const typename ParamGenerator::iterator begin6_; const typename ParamGenerator::iterator end6_; typename ParamGenerator::iterator current6_; - ParamType current_value_; + linked_ptr current_value_; }; // class CartesianProductGenerator6::Iterator // No implementation - assignment is unsupported. @@ -4001,7 +4409,7 @@ class CartesianProductGenerator7 virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } - virtual const ParamType* Current() const { return ¤t_value_; } + virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. @@ -4053,8 +4461,8 @@ class CartesianProductGenerator7 void ComputeCurrentValue() { if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_, - *current4_, *current5_, *current6_, *current7_); + current_value_.reset(new ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_, *current6_, *current7_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the @@ -4096,7 +4504,7 @@ class CartesianProductGenerator7 const typename ParamGenerator::iterator begin7_; const typename ParamGenerator::iterator end7_; typename ParamGenerator::iterator current7_; - ParamType current_value_; + linked_ptr current_value_; }; // class CartesianProductGenerator7::Iterator // No implementation - assignment is unsupported. @@ -4214,7 +4622,7 @@ class CartesianProductGenerator8 virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } - virtual const ParamType* Current() const { return ¤t_value_; } + virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. @@ -4270,8 +4678,8 @@ class CartesianProductGenerator8 void ComputeCurrentValue() { if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_, - *current4_, *current5_, *current6_, *current7_, *current8_); + current_value_.reset(new ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_, *current6_, *current7_, *current8_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the @@ -4317,7 +4725,7 @@ class CartesianProductGenerator8 const typename ParamGenerator::iterator begin8_; const typename ParamGenerator::iterator end8_; typename ParamGenerator::iterator current8_; - ParamType current_value_; + linked_ptr current_value_; }; // class CartesianProductGenerator8::Iterator // No implementation - assignment is unsupported. @@ -4443,7 +4851,7 @@ class CartesianProductGenerator9 virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } - virtual const ParamType* Current() const { return ¤t_value_; } + virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. @@ -4503,9 +4911,9 @@ class CartesianProductGenerator9 void ComputeCurrentValue() { if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_, + current_value_.reset(new ParamType(*current1_, *current2_, *current3_, *current4_, *current5_, *current6_, *current7_, *current8_, - *current9_); + *current9_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the @@ -4555,7 +4963,7 @@ class CartesianProductGenerator9 const typename ParamGenerator::iterator begin9_; const typename ParamGenerator::iterator end9_; typename ParamGenerator::iterator current9_; - ParamType current_value_; + linked_ptr current_value_; }; // class CartesianProductGenerator9::Iterator // No implementation - assignment is unsupported. @@ -4690,7 +5098,7 @@ class CartesianProductGenerator10 virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } - virtual const ParamType* Current() const { return ¤t_value_; } + virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. @@ -4754,9 +5162,9 @@ class CartesianProductGenerator10 void ComputeCurrentValue() { if (!AtEnd()) - current_value_ = ParamType(*current1_, *current2_, *current3_, + current_value_.reset(new ParamType(*current1_, *current2_, *current3_, *current4_, *current5_, *current6_, *current7_, *current8_, - *current9_, *current10_); + *current9_, *current10_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the @@ -4810,7 +5218,7 @@ class CartesianProductGenerator10 const typename ParamGenerator::iterator begin10_; const typename ParamGenerator::iterator end10_; typename ParamGenerator::iterator current10_; - ParamType current_value_; + linked_ptr current_value_; }; // class CartesianProductGenerator10::Iterator // No implementation - assignment is unsupported. @@ -5141,6 +5549,4 @@ CartesianProductHolder10(const Generator1& g1, const Generator2& g2, } // namespace internal } // namespace testing -#endif // GTEST_HAS_PARAM_TEST - #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util-generated.h.pump b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util-generated.h.pump index 5c7c47af0bb1..30dffe43c3c6 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util-generated.h.pump +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util-generated.h.pump @@ -29,8 +29,7 @@ $var maxtuple = 10 $$ Maximum number of Combine arguments we want to support. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: vladl@google.com (Vlad Losev) + // Type and function utilities for implementing parameterized tests. // This file is generated by a SCRIPT. DO NOT EDIT BY HAND! @@ -42,17 +41,14 @@ $var maxtuple = 10 $$ Maximum number of Combine arguments we want to support. // by the maximum arity of the implementation of tuple which is // currently set at $maxtuple. +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ -// scripts/fuse_gtest.py depends on gtest's own header being #included -// *unconditionally*. Therefore these #includes cannot be moved -// inside #if GTEST_HAS_PARAM_TEST. #include "gtest/internal/gtest-param-util.h" #include "gtest/internal/gtest-port.h" -#if GTEST_HAS_PARAM_TEST - namespace testing { // Forward declarations of ValuesIn(), which is implemented in @@ -87,6 +83,8 @@ class ValueArray$i { return ValuesIn(array); } + ValueArray$i(const ValueArray$i& other) : $for j, [[v$(j)_(other.v$(j)_)]] {} + private: // No implementation - assignment is unsupported. void operator=(const ValueArray$i& other); @@ -165,7 +163,7 @@ $for k [[ virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } - virtual const ParamType* Current() const { return ¤t_value_; } + virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. @@ -197,7 +195,7 @@ $for k [[ void ComputeCurrentValue() { if (!AtEnd()) - current_value_ = ParamType($for j, [[*current$(j)_]]); + current_value_.reset(new ParamType($for j, [[*current$(j)_]])); } bool AtEnd() const { // We must report iterator past the end of the range when either of the @@ -222,7 +220,7 @@ $for j [[ typename ParamGenerator::iterator current$(j)_; ]] - ParamType current_value_; + linked_ptr current_value_; }; // class CartesianProductGenerator$i::Iterator // No implementation - assignment is unsupported. @@ -281,6 +279,4 @@ $for j [[ } // namespace internal } // namespace testing -#endif // GTEST_HAS_PARAM_TEST - #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util.h index 82cab9b0201b..d64f620c4c67 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-param-util.h @@ -26,11 +26,12 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: vladl@google.com (Vlad Losev) + // Type and function utilities for implementing parameterized tests. +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ @@ -41,16 +42,11 @@ #include #include -// scripts/fuse_gtest.py depends on gtest's own header being #included -// *unconditionally*. Therefore these #includes cannot be moved -// inside #if GTEST_HAS_PARAM_TEST. #include "gtest/internal/gtest-internal.h" #include "gtest/internal/gtest-linked_ptr.h" #include "gtest/internal/gtest-port.h" #include "gtest/gtest-printers.h" -#if GTEST_HAS_PARAM_TEST - namespace testing { // Input to a parameterized test name generator, describing a test parameter. @@ -472,7 +468,7 @@ class ParameterizedTestCaseInfoBase { virtual ~ParameterizedTestCaseInfoBase() {} // Base part of test case name for display purposes. - virtual const string& GetTestCaseName() const = 0; + virtual const std::string& GetTestCaseName() const = 0; // Test case id to verify identity. virtual TypeId GetTestCaseTypeId() const = 0; // UnitTest class invokes this method to register tests in this @@ -511,7 +507,7 @@ class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase { : test_case_name_(name), code_location_(code_location) {} // Test case base name for display purposes. - virtual const string& GetTestCaseName() const { return test_case_name_; } + virtual const std::string& GetTestCaseName() const { return test_case_name_; } // Test case id to verify identity. virtual TypeId GetTestCaseTypeId() const { return GetTypeId(); } // TEST_P macro uses AddTestPattern() to record information @@ -529,11 +525,10 @@ class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase { } // INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information // about a generator. - int AddTestCaseInstantiation(const string& instantiation_name, + int AddTestCaseInstantiation(const std::string& instantiation_name, GeneratorCreationFunc* func, ParamNameGeneratorFunc* name_func, - const char* file, - int line) { + const char* file, int line) { instantiations_.push_back( InstantiationInfo(instantiation_name, func, name_func, file, line)); return 0; // Return value used only to run this method in namespace scope. @@ -550,13 +545,13 @@ class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase { for (typename InstantiationContainer::iterator gen_it = instantiations_.begin(); gen_it != instantiations_.end(); ++gen_it) { - const string& instantiation_name = gen_it->name; + const std::string& instantiation_name = gen_it->name; ParamGenerator generator((*gen_it->generator)()); ParamNameGeneratorFunc* name_func = gen_it->name_func; const char* file = gen_it->file; int line = gen_it->line; - string test_case_name; + std::string test_case_name; if ( !instantiation_name.empty() ) test_case_name = instantiation_name + "/"; test_case_name += test_info->test_case_base_name; @@ -609,8 +604,8 @@ class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase { test_base_name(a_test_base_name), test_meta_factory(a_test_meta_factory) {} - const string test_case_base_name; - const string test_base_name; + const std::string test_case_base_name; + const std::string test_base_name; const scoped_ptr > test_meta_factory; }; typedef ::std::vector > TestInfoContainer; @@ -651,7 +646,7 @@ class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase { return true; } - const string test_case_name_; + const std::string test_case_name_; CodeLocation code_location_; TestInfoContainer tests_; InstantiationContainer instantiations_; @@ -726,6 +721,4 @@ class ParameterizedTestCaseRegistry { } // namespace internal } // namespace testing -#endif // GTEST_HAS_PARAM_TEST - #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port-arch.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port-arch.h index 74ab949057c7..f83700e06d98 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port-arch.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port-arch.h @@ -27,7 +27,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// The Google C++ Testing Framework (Google Test) +// The Google C++ Testing and Mocking Framework (Google Test) // // This header file defines the GTEST_OS_* macro. // It is separate from gtest-port.h so that custom/gtest-port.h can include it. @@ -54,6 +54,9 @@ # define GTEST_OS_WINDOWS_PHONE 1 # elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) # define GTEST_OS_WINDOWS_RT 1 +# elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_TV_TITLE) +# define GTEST_OS_WINDOWS_PHONE 1 +# define GTEST_OS_WINDOWS_TV_TITLE 1 # else // WINAPI_FAMILY defined but no known partition matched. // Default to desktop. @@ -69,6 +72,8 @@ # endif #elif defined __FreeBSD__ # define GTEST_OS_FREEBSD 1 +#elif defined __Fuchsia__ +# define GTEST_OS_FUCHSIA 1 #elif defined __linux__ # define GTEST_OS_LINUX 1 # if defined __ANDROID__ @@ -84,6 +89,8 @@ # define GTEST_OS_HPUX 1 #elif defined __native_client__ # define GTEST_OS_NACL 1 +#elif defined __NetBSD__ +# define GTEST_OS_NETBSD 1 #elif defined __OpenBSD__ # define GTEST_OS_OPENBSD 1 #elif defined __QNX__ diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port.h index da57e65d338b..786497d854c9 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-port.h @@ -27,8 +27,6 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Authors: wan@google.com (Zhanyong Wan) -// // Low-level types and utilities for porting Google Test to various // platforms. All macros ending with _ and symbols defined in an // internal namespace are subject to change without notice. Code @@ -40,6 +38,8 @@ // files are expected to #include this. Therefore, it cannot #include // any other Google Test header. +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ @@ -73,11 +73,9 @@ // GTEST_HAS_EXCEPTIONS - Define it to 1/0 to indicate that exceptions // are enabled. // GTEST_HAS_GLOBAL_STRING - Define it to 1/0 to indicate that ::string -// is/isn't available (some systems define -// ::string, which is different to std::string). -// GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::string -// is/isn't available (some systems define -// ::wstring, which is different to std::wstring). +// is/isn't available +// GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::wstring +// is/isn't available // GTEST_HAS_POSIX_RE - Define it to 1/0 to indicate that POSIX regular // expressions are/aren't available. // GTEST_HAS_PTHREAD - Define it to 1/0 to indicate that @@ -109,6 +107,12 @@ // GTEST_CREATE_SHARED_LIBRARY // - Define to 1 when compiling Google Test itself // as a shared library. +// GTEST_DEFAULT_DEATH_TEST_STYLE +// - The default value of --gtest_death_test_style. +// The legacy default has been "fast" in the open +// source version since 2008. The recommended value +// is "threadsafe", and can be set in +// custom/gtest-port.h. // Platform-indicating macros // -------------------------- @@ -122,12 +126,14 @@ // GTEST_OS_AIX - IBM AIX // GTEST_OS_CYGWIN - Cygwin // GTEST_OS_FREEBSD - FreeBSD +// GTEST_OS_FUCHSIA - Fuchsia // GTEST_OS_HPUX - HP-UX // GTEST_OS_LINUX - Linux // GTEST_OS_LINUX_ANDROID - Google Android // GTEST_OS_MAC - Mac OS X // GTEST_OS_IOS - iOS // GTEST_OS_NACL - Google Native Client (NaCl) +// GTEST_OS_NETBSD - NetBSD // GTEST_OS_OPENBSD - OpenBSD // GTEST_OS_QNX - QNX // GTEST_OS_SOLARIS - Sun Solaris @@ -169,15 +175,15 @@ // GTEST_HAS_COMBINE - the Combine() function (for value-parameterized // tests) // GTEST_HAS_DEATH_TEST - death tests -// GTEST_HAS_PARAM_TEST - value-parameterized tests // GTEST_HAS_TYPED_TEST - typed tests // GTEST_HAS_TYPED_TEST_P - type-parameterized tests // GTEST_IS_THREADSAFE - Google Test is thread-safe. +// GOOGLETEST_CM0007 DO NOT DELETE // GTEST_USES_POSIX_RE - enhanced POSIX regex is used. Do not confuse with // GTEST_HAS_POSIX_RE (see above) which users can // define themselves. // GTEST_USES_SIMPLE_RE - our own simple regex is used; -// the above two are mutually exclusive. +// the above RE\b(s) are mutually exclusive. // GTEST_CAN_COMPARE_NULL - accepts untyped NULL in EXPECT_EQ(). // Misc public macros @@ -206,6 +212,7 @@ // // C++11 feature wrappers: // +// testing::internal::forward - portability wrapper for std::forward. // testing::internal::move - portability wrapper for std::move. // // Synchronization: @@ -222,10 +229,10 @@ // // Regular expressions: // RE - a simple regular expression class using the POSIX -// Extended Regular Expression syntax on UNIX-like -// platforms, or a reduced regular exception syntax on -// other platforms, including Windows. -// +// Extended Regular Expression syntax on UNIX-like platforms +// GOOGLETEST_CM0008 DO NOT DELETE +// or a reduced regular exception syntax on other +// platforms, including Windows. // Logging: // GTEST_LOG_() - logs messages at the specified severity level. // LogToStderr() - directs all log messages to stderr. @@ -271,10 +278,12 @@ # include #endif +// Brings in the definition of HAS_GLOBAL_STRING. This must be done +// BEFORE we test HAS_GLOBAL_STRING. +#include // NOLINT #include // NOLINT #include // NOLINT #include // NOLINT -#include // NOLINT #include #include // NOLINT @@ -306,7 +315,7 @@ // GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 4385) // /* code that triggers warnings C4800 and C4385 */ // GTEST_DISABLE_MSC_WARNINGS_POP_() -#if _MSC_VER >= 1500 +#if _MSC_VER >= 1400 # define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings) \ __pragma(warning(push)) \ __pragma(warning(disable: warnings)) @@ -318,12 +327,28 @@ # define GTEST_DISABLE_MSC_WARNINGS_POP_() #endif +// Clang on Windows does not understand MSVC's pragma warning. +// We need clang-specific way to disable function deprecation warning. +#ifdef __clang__ +# define GTEST_DISABLE_MSC_DEPRECATED_PUSH_() \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") \ + _Pragma("clang diagnostic ignored \"-Wdeprecated-implementations\"") +#define GTEST_DISABLE_MSC_DEPRECATED_POP_() \ + _Pragma("clang diagnostic pop") +#else +# define GTEST_DISABLE_MSC_DEPRECATED_PUSH_() \ + GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996) +# define GTEST_DISABLE_MSC_DEPRECATED_POP_() \ + GTEST_DISABLE_MSC_WARNINGS_POP_() +#endif + #ifndef GTEST_LANG_CXX11 // gcc and clang define __GXX_EXPERIMENTAL_CXX0X__ when // -std={c,gnu}++{0x,11} is passed. The C++11 standard specifies a // value for __cplusplus, and recent versions of clang, gcc, and // probably other compilers set that too in C++11 mode. -# if __GXX_EXPERIMENTAL_CXX0X__ || __cplusplus >= 201103L +# if __GXX_EXPERIMENTAL_CXX0X__ || __cplusplus >= 201103L || _MSC_VER >= 1900 // Compiling in at least C++11 mode. # define GTEST_LANG_CXX11 1 # else @@ -355,12 +380,16 @@ #if GTEST_STDLIB_CXX11 # define GTEST_HAS_STD_BEGIN_AND_END_ 1 # define GTEST_HAS_STD_FORWARD_LIST_ 1 -# define GTEST_HAS_STD_FUNCTION_ 1 +# if !defined(_MSC_VER) || (_MSC_FULL_VER >= 190023824) +// works only with VS2015U2 and better +# define GTEST_HAS_STD_FUNCTION_ 1 +# endif # define GTEST_HAS_STD_INITIALIZER_LIST_ 1 # define GTEST_HAS_STD_MOVE_ 1 -# define GTEST_HAS_STD_SHARED_PTR_ 1 -# define GTEST_HAS_STD_TYPE_TRAITS_ 1 # define GTEST_HAS_STD_UNIQUE_PTR_ 1 +# define GTEST_HAS_STD_SHARED_PTR_ 1 +# define GTEST_HAS_UNORDERED_MAP_ 1 +# define GTEST_HAS_UNORDERED_SET_ 1 #endif // C++11 specifies that provides std::tuple. @@ -368,7 +397,8 @@ #if GTEST_LANG_CXX11 # define GTEST_HAS_STD_TUPLE_ 1 # if defined(__clang__) -// Inspired by http://clang.llvm.org/docs/LanguageExtensions.html#__has_include +// Inspired by +// https://clang.llvm.org/docs/LanguageExtensions.html#include-file-checking-macros # if defined(__has_include) && !__has_include() # undef GTEST_HAS_STD_TUPLE_ # endif @@ -380,7 +410,7 @@ # elif defined(__GLIBCXX__) // Inspired by boost/config/stdlib/libstdcpp3.hpp, // http://gcc.gnu.org/gcc-4.2/changes.html and -// http://gcc.gnu.org/onlinedocs/libstdc++/manual/bk01pt01ch01.html#manual.intro.status.standard.200x +// https://web.archive.org/web/20140227044429/gcc.gnu.org/onlinedocs/libstdc++/manual/bk01pt01ch01.html#manual.intro.status.standard.200x # if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2) # undef GTEST_HAS_STD_TUPLE_ # endif @@ -396,10 +426,16 @@ # include # endif // In order to avoid having to include , use forward declaration -// assuming CRITICAL_SECTION is a typedef of _RTL_CRITICAL_SECTION. +#if GTEST_OS_WINDOWS_MINGW && !defined(__MINGW64_VERSION_MAJOR) +// MinGW defined _CRITICAL_SECTION and _RTL_CRITICAL_SECTION as two +// separate (equivalent) structs, instead of using typedef +typedef struct _CRITICAL_SECTION GTEST_CRITICAL_SECTION; +#else +// Assume CRITICAL_SECTION is a typedef of _RTL_CRITICAL_SECTION. // This assumption is verified by // WindowsTypesTest.CRITICAL_SECTIONIs_RTL_CRITICAL_SECTION. -struct _RTL_CRITICAL_SECTION; +typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION; +#endif #else // This assumes that non-Windows OSes provide unistd.h. For OSes where this // is not the case, we need to include headers that provide the functions @@ -453,8 +489,11 @@ struct _RTL_CRITICAL_SECTION; #ifndef GTEST_HAS_EXCEPTIONS // The user didn't tell us whether exceptions are enabled, so we need // to figure it out. -# if defined(_MSC_VER) || defined(__BORLANDC__) -// MSVC's and C++Builder's implementations of the STL use the _HAS_EXCEPTIONS +# if defined(_MSC_VER) && defined(_CPPUNWIND) +// MSVC defines _CPPUNWIND to 1 iff exceptions are enabled. +# define GTEST_HAS_EXCEPTIONS 1 +# elif defined(__BORLANDC__) +// C++Builder's implementation of the STL uses the _HAS_EXCEPTIONS // macro to enable exceptions, so we'll do the same. // Assumes that exceptions are enabled by default. # ifndef _HAS_EXCEPTIONS @@ -498,21 +537,17 @@ struct _RTL_CRITICAL_SECTION; # define GTEST_HAS_STD_STRING 1 #elif !GTEST_HAS_STD_STRING // The user told us that ::std::string isn't available. -# error "Google Test cannot be used where ::std::string isn't available." +# error "::std::string isn't available." #endif // !defined(GTEST_HAS_STD_STRING) #ifndef GTEST_HAS_GLOBAL_STRING -// The user didn't tell us whether ::string is available, so we need -// to figure it out. - # define GTEST_HAS_GLOBAL_STRING 0 - #endif // GTEST_HAS_GLOBAL_STRING #ifndef GTEST_HAS_STD_WSTRING // The user didn't tell us whether ::std::wstring is available, so we need // to figure it out. -// TODO(wan@google.com): uses autoconf to detect whether ::std::wstring +// FIXME: uses autoconf to detect whether ::std::wstring // is available. // Cygwin 1.7 and below doesn't support ::std::wstring. @@ -600,8 +635,9 @@ struct _RTL_CRITICAL_SECTION; // // To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0 // to your compiler flags. -# define GTEST_HAS_PTHREAD (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX \ - || GTEST_OS_QNX || GTEST_OS_FREEBSD || GTEST_OS_NACL) +#define GTEST_HAS_PTHREAD \ + (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX || GTEST_OS_QNX || \ + GTEST_OS_FREEBSD || GTEST_OS_NACL || GTEST_OS_NETBSD || GTEST_OS_FUCHSIA) #endif // GTEST_HAS_PTHREAD #if GTEST_HAS_PTHREAD @@ -616,7 +652,7 @@ struct _RTL_CRITICAL_SECTION; // Determines if hash_map/hash_set are available. // Only used for testing against those containers. #if !defined(GTEST_HAS_HASH_MAP_) -# if _MSC_VER +# if defined(_MSC_VER) && (_MSC_VER < 1900) # define GTEST_HAS_HASH_MAP_ 1 // Indicates that hash_map is available. # define GTEST_HAS_HASH_SET_ 1 // Indicates that hash_set is available. # endif // _MSC_VER @@ -629,6 +665,14 @@ struct _RTL_CRITICAL_SECTION; # if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR) // STLport, provided with the Android NDK, has neither or . # define GTEST_HAS_TR1_TUPLE 0 +# elif defined(_MSC_VER) && (_MSC_VER >= 1910) +// Prevent `warning C4996: 'std::tr1': warning STL4002: +// The non-Standard std::tr1 namespace and TR1-only machinery +// are deprecated and will be REMOVED.` +# define GTEST_HAS_TR1_TUPLE 0 +# elif GTEST_LANG_CXX11 && defined(_LIBCPP_VERSION) +// libc++ doesn't support TR1. +# define GTEST_HAS_TR1_TUPLE 0 # else // The user didn't tell us not to do it, so we assume it's OK. # define GTEST_HAS_TR1_TUPLE 1 @@ -638,6 +682,10 @@ struct _RTL_CRITICAL_SECTION; // Determines whether Google Test's own tr1 tuple implementation // should be used. #ifndef GTEST_USE_OWN_TR1_TUPLE +// We use our own tuple implementation on Symbian. +# if GTEST_OS_SYMBIAN +# define GTEST_USE_OWN_TR1_TUPLE 1 +# else // The user didn't tell us, so we need to figure it out. // We use our own TR1 tuple if we aren't sure the user has an @@ -651,7 +699,8 @@ struct _RTL_CRITICAL_SECTION; // support TR1 tuple. libc++ only provides std::tuple, in C++11 mode, // and it can be used with some compilers that define __GNUC__. # if (defined(__GNUC__) && !defined(__CUDACC__) && (GTEST_GCC_VER_ >= 40000) \ - && !GTEST_OS_QNX && !defined(_LIBCPP_VERSION)) || _MSC_VER >= 1600 + && !GTEST_OS_QNX && !defined(_LIBCPP_VERSION)) \ + || (_MSC_VER >= 1600 && _MSC_VER < 1900) # define GTEST_ENV_HAS_TR1_TUPLE_ 1 # endif @@ -667,12 +716,11 @@ struct _RTL_CRITICAL_SECTION; # else # define GTEST_USE_OWN_TR1_TUPLE 1 # endif - +# endif // GTEST_OS_SYMBIAN #endif // GTEST_USE_OWN_TR1_TUPLE -// To avoid conditional compilation everywhere, we make it -// gtest-port.h's responsibility to #include the header implementing -// tuple. +// To avoid conditional compilation we make it gtest-port.h's responsibility +// to #include the header implementing tuple. #if GTEST_HAS_STD_TUPLE_ # include // IWYU pragma: export # define GTEST_TUPLE_NAMESPACE_ ::std @@ -687,22 +735,6 @@ struct _RTL_CRITICAL_SECTION; # if GTEST_USE_OWN_TR1_TUPLE # include "gtest/internal/gtest-tuple.h" // IWYU pragma: export // NOLINT -# elif GTEST_ENV_HAS_STD_TUPLE_ -# include -// C++11 puts its tuple into the ::std namespace rather than -// ::std::tr1. gtest expects tuple to live in ::std::tr1, so put it there. -// This causes undefined behavior, but supported compilers react in -// the way we intend. -namespace std { -namespace tr1 { -using ::std::get; -using ::std::make_tuple; -using ::std::tuple; -using ::std::tuple_element; -using ::std::tuple_size; -} -} - # elif GTEST_OS_SYMBIAN // On Symbian, BOOST_HAS_TR1_TUPLE causes Boost's TR1 tuple library to @@ -727,20 +759,22 @@ using ::std::tuple_size; // Until version 4.3.2, gcc has a bug that causes , // which is #included by , to not compile when RTTI is // disabled. _TR1_FUNCTIONAL is the header guard for -// . Hence the following #define is a hack to prevent +// . Hence the following #define is used to prevent // from being included. # define _TR1_FUNCTIONAL 1 # include # undef _TR1_FUNCTIONAL // Allows the user to #include - // if he chooses to. + // if they choose to. # else # include // NOLINT # endif // !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302 -# else -// If the compiler is not GCC 4.0+, we assume the user is using a -// spec-conforming TR1 implementation. +// VS 2010 now has tr1 support. +# elif _MSC_VER >= 1600 # include // IWYU pragma: export // NOLINT + +# else // GTEST_USE_OWN_TR1_TUPLE +# include // IWYU pragma: export // NOLINT # endif // GTEST_USE_OWN_TR1_TUPLE #endif // GTEST_HAS_TR1_TUPLE @@ -754,8 +788,12 @@ using ::std::tuple_size; # if GTEST_OS_LINUX && !defined(__ia64__) # if GTEST_OS_LINUX_ANDROID -// On Android, clone() is only available on ARM starting with Gingerbread. -# if defined(__arm__) && __ANDROID_API__ >= 9 +// On Android, clone() became available at different API levels for each 32-bit +// architecture. +# if defined(__LP64__) || \ + (defined(__arm__) && __ANDROID_API__ >= 9) || \ + (defined(__mips__) && __ANDROID_API__ >= 12) || \ + (defined(__i386__) && __ANDROID_API__ >= 17) # define GTEST_HAS_CLONE 1 # else # define GTEST_HAS_CLONE 0 @@ -786,19 +824,15 @@ using ::std::tuple_size; // Google Test does not support death tests for VC 7.1 and earlier as // abort() in a VC 7.1 application compiled as GUI in debug config // pops up a dialog window that cannot be suppressed programmatically. -#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \ - (GTEST_OS_MAC && !GTEST_OS_IOS) || \ - (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER >= 1400) || \ +#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \ + (GTEST_OS_MAC && !GTEST_OS_IOS) || \ + (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER >= 1400) || \ GTEST_OS_WINDOWS_MINGW || GTEST_OS_AIX || GTEST_OS_HPUX || \ - GTEST_OS_OPENBSD || GTEST_OS_QNX || GTEST_OS_FREEBSD) + GTEST_OS_OPENBSD || GTEST_OS_QNX || GTEST_OS_FREEBSD || \ + GTEST_OS_NETBSD || GTEST_OS_FUCHSIA) # define GTEST_HAS_DEATH_TEST 1 #endif -// We don't support MSVC 7.1 with exceptions disabled now. Therefore -// all the compilers we care about are adequate for supporting -// value-parameterized tests. -#define GTEST_HAS_PARAM_TEST 1 - // Determines whether to support type-driven tests. // Typed tests need and variadic macros, which GCC, VC++ 8.0, @@ -813,7 +847,7 @@ using ::std::tuple_size; // value-parameterized tests are enabled. The implementation doesn't // work on Sun Studio since it doesn't understand templated conversion // operators. -#if GTEST_HAS_PARAM_TEST && GTEST_HAS_TR1_TUPLE && !defined(__SUNPRO_CC) +#if (GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_) && !defined(__SUNPRO_CC) # define GTEST_HAS_COMBINE 1 #endif @@ -864,15 +898,39 @@ using ::std::tuple_size; # define GTEST_ATTRIBUTE_UNUSED_ #endif +#if GTEST_LANG_CXX11 +# define GTEST_CXX11_EQUALS_DELETE_ = delete +#else // GTEST_LANG_CXX11 +# define GTEST_CXX11_EQUALS_DELETE_ +#endif // GTEST_LANG_CXX11 + +// Use this annotation before a function that takes a printf format string. +#if (defined(__GNUC__) || defined(__clang__)) && !defined(COMPILER_ICC) +# if defined(__MINGW_PRINTF_FORMAT) +// MinGW has two different printf implementations. Ensure the format macro +// matches the selected implementation. See +// https://sourceforge.net/p/mingw-w64/wiki2/gnu%20printf/. +# define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) \ + __attribute__((__format__(__MINGW_PRINTF_FORMAT, string_index, \ + first_to_check))) +# else +# define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) \ + __attribute__((__format__(__printf__, string_index, first_to_check))) +# endif +#else +# define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) +#endif + + // A macro to disallow operator= // This should be used in the private: declarations for a class. -#define GTEST_DISALLOW_ASSIGN_(type)\ - void operator=(type const &) +#define GTEST_DISALLOW_ASSIGN_(type) \ + void operator=(type const &) GTEST_CXX11_EQUALS_DELETE_ // A macro to disallow copy constructor and operator= // This should be used in the private: declarations for a class. -#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type)\ - type(type const &);\ +#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type) \ + type(type const &) GTEST_CXX11_EQUALS_DELETE_; \ GTEST_DISALLOW_ASSIGN_(type) // Tell the compiler to warn about unused return values for functions declared @@ -920,6 +978,11 @@ using ::std::tuple_size; #endif // GTEST_HAS_SEH +// GTEST_API_ qualifies all symbols that must be exported. The definitions below +// are guarded by #ifndef to give embedders a chance to define GTEST_API_ in +// gtest/internal/custom/gtest-port.h +#ifndef GTEST_API_ + #ifdef _MSC_VER # if GTEST_LINKED_AS_SHARED_LIBRARY # define GTEST_API_ __declspec(dllimport) @@ -928,11 +991,17 @@ using ::std::tuple_size; # endif #elif __GNUC__ >= 4 || defined(__clang__) # define GTEST_API_ __attribute__((visibility ("default"))) -#endif // _MSC_VER +#endif // _MSC_VER + +#endif // GTEST_API_ #ifndef GTEST_API_ # define GTEST_API_ -#endif +#endif // GTEST_API_ + +#ifndef GTEST_DEFAULT_DEATH_TEST_STYLE +# define GTEST_DEFAULT_DEATH_TEST_STYLE "fast" +#endif // GTEST_DEFAULT_DEATH_TEST_STYLE #ifdef __GNUC__ // Ask the compiler to never inline a given function. @@ -942,10 +1011,12 @@ using ::std::tuple_size; #endif // _LIBCPP_VERSION is defined by the libc++ library from the LLVM project. -#if defined(__GLIBCXX__) || defined(_LIBCPP_VERSION) -# define GTEST_HAS_CXXABI_H_ 1 -#else -# define GTEST_HAS_CXXABI_H_ 0 +#if !defined(GTEST_HAS_CXXABI_H_) +# if defined(__GLIBCXX__) || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) +# define GTEST_HAS_CXXABI_H_ 1 +# else +# define GTEST_HAS_CXXABI_H_ 0 +# endif #endif // A function level attribute to disable checking for use of uninitialized @@ -985,19 +1056,6 @@ using ::std::tuple_size; # define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ #endif // __clang__ -// A function level attribute to disable UndefinedBehaviorSanitizer's (defined) -// unsigned integer overflow instrumentation. -#if defined(__clang__) -# if defined(__has_attribute) && __has_attribute(no_sanitize) -# define GTEST_ATTRIBUTE_NO_SANITIZE_UNSIGNED_OVERFLOW_ \ - __attribute__((no_sanitize("unsigned-integer-overflow"))) -# else -# define GTEST_ATTRIBUTE_NO_SANITIZE_UNSIGNED_OVERFLOW_ -# endif // defined(__has_attribute) && __has_attribute(no_sanitize) -#else -# define GTEST_ATTRIBUTE_NO_SANITIZE_UNSIGNED_OVERFLOW_ -#endif // __clang__ - namespace testing { class Message; @@ -1101,6 +1159,16 @@ struct StaticAssertTypeEqHelper { enum { value = true }; }; +// Same as std::is_same<>. +template +struct IsSame { + enum { value = false }; +}; +template +struct IsSame { + enum { value = true }; +}; + // Evaluates to the number of elements in 'array'. #define GTEST_ARRAY_SIZE_(array) (sizeof(array) / sizeof(array[0])) @@ -1164,6 +1232,10 @@ class scoped_ptr { // Defines RE. +#if GTEST_USES_PCRE +// if used, PCRE is injected by custom/gtest-port.h +#elif GTEST_USES_POSIX_RE || GTEST_USES_SIMPLE_RE + // A simple C++ wrapper for . It uses the POSIX Extended // Regular Expression syntax. class GTEST_API_ RE { @@ -1175,11 +1247,11 @@ class GTEST_API_ RE { // Constructs an RE from a string. RE(const ::std::string& regex) { Init(regex.c_str()); } // NOLINT -#if GTEST_HAS_GLOBAL_STRING +# if GTEST_HAS_GLOBAL_STRING RE(const ::string& regex) { Init(regex.c_str()); } // NOLINT -#endif // GTEST_HAS_GLOBAL_STRING +# endif // GTEST_HAS_GLOBAL_STRING RE(const char* regex) { Init(regex); } // NOLINT ~RE(); @@ -1192,7 +1264,7 @@ class GTEST_API_ RE { // PartialMatch(str, re) returns true iff regular expression re // matches a substring of str (including str itself). // - // TODO(wan@google.com): make FullMatch() and PartialMatch() work + // FIXME: make FullMatch() and PartialMatch() work // when str contains NUL characters. static bool FullMatch(const ::std::string& str, const RE& re) { return FullMatch(str.c_str(), re); @@ -1201,7 +1273,7 @@ class GTEST_API_ RE { return PartialMatch(str.c_str(), re); } -#if GTEST_HAS_GLOBAL_STRING +# if GTEST_HAS_GLOBAL_STRING static bool FullMatch(const ::string& str, const RE& re) { return FullMatch(str.c_str(), re); @@ -1210,7 +1282,7 @@ class GTEST_API_ RE { return PartialMatch(str.c_str(), re); } -#endif // GTEST_HAS_GLOBAL_STRING +# endif // GTEST_HAS_GLOBAL_STRING static bool FullMatch(const char* str, const RE& re); static bool PartialMatch(const char* str, const RE& re); @@ -1219,25 +1291,27 @@ class GTEST_API_ RE { void Init(const char* regex); // We use a const char* instead of an std::string, as Google Test used to be - // used where std::string is not available. TODO(wan@google.com): change to + // used where std::string is not available. FIXME: change to // std::string. const char* pattern_; bool is_valid_; -#if GTEST_USES_POSIX_RE +# if GTEST_USES_POSIX_RE regex_t full_regex_; // For FullMatch(). regex_t partial_regex_; // For PartialMatch(). -#else // GTEST_USES_SIMPLE_RE +# else // GTEST_USES_SIMPLE_RE const char* full_pattern_; // For FullMatch(); -#endif +# endif GTEST_DISALLOW_ASSIGN_(RE); }; +#endif // GTEST_USES_PCRE + // Formats a source file path and a line number as they would appear // in an error message from the compiler used to compile this code. GTEST_API_ ::std::string FormatFileLocation(const char* file, int line); @@ -1323,13 +1397,59 @@ inline void FlushInfoLog() { fflush(NULL); } GTEST_LOG_(FATAL) << #posix_call << "failed with error " \ << gtest_error +// Adds reference to a type if it is not a reference type, +// otherwise leaves it unchanged. This is the same as +// tr1::add_reference, which is not widely available yet. +template +struct AddReference { typedef T& type; }; // NOLINT +template +struct AddReference { typedef T& type; }; // NOLINT + +// A handy wrapper around AddReference that works when the argument T +// depends on template parameters. +#define GTEST_ADD_REFERENCE_(T) \ + typename ::testing::internal::AddReference::type + +// Transforms "T" into "const T&" according to standard reference collapsing +// rules (this is only needed as a backport for C++98 compilers that do not +// support reference collapsing). Specifically, it transforms: +// +// char ==> const char& +// const char ==> const char& +// char& ==> char& +// const char& ==> const char& +// +// Note that the non-const reference will not have "const" added. This is +// standard, and necessary so that "T" can always bind to "const T&". +template +struct ConstRef { typedef const T& type; }; +template +struct ConstRef { typedef T& type; }; + +// The argument T must depend on some template parameters. +#define GTEST_REFERENCE_TO_CONST_(T) \ + typename ::testing::internal::ConstRef::type + #if GTEST_HAS_STD_MOVE_ +using std::forward; using std::move; + +template +struct RvalueRef { + typedef T&& type; +}; #else // GTEST_HAS_STD_MOVE_ template const T& move(const T& t) { return t; } +template +GTEST_ADD_REFERENCE_(T) forward(GTEST_ADD_REFERENCE_(T) t) { return t; } + +template +struct RvalueRef { + typedef const T& type; +}; #endif // GTEST_HAS_STD_MOVE_ // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. @@ -1430,10 +1550,6 @@ GTEST_API_ void CaptureStderr(); GTEST_API_ std::string GetCapturedStderr(); #endif // GTEST_HAS_STREAM_REDIRECTION - -// Returns a path to temporary directory. -GTEST_API_ std::string TempDir(); - // Returns the size (in bytes) of a file. GTEST_API_ size_t GetFileSize(FILE* file); @@ -1441,14 +1557,18 @@ GTEST_API_ size_t GetFileSize(FILE* file); GTEST_API_ std::string ReadEntireFile(FILE* file); // All command line arguments. -GTEST_API_ const ::std::vector& GetArgvs(); +GTEST_API_ std::vector GetArgvs(); #if GTEST_HAS_DEATH_TEST -const ::std::vector& GetInjectableArgvs(); -void SetInjectableArgvs(const ::std::vector* - new_argvs); - +std::vector GetInjectableArgvs(); +// Deprecated: pass the args vector by value instead. +void SetInjectableArgvs(const std::vector* new_argvs); +void SetInjectableArgvs(const std::vector& new_argvs); +#if GTEST_HAS_GLOBAL_STRING +void SetInjectableArgvs(const std::vector< ::string>& new_argvs); +#endif // GTEST_HAS_GLOBAL_STRING +void ClearInjectableArgvs(); #endif // GTEST_HAS_DEATH_TEST @@ -1698,7 +1818,7 @@ class GTEST_API_ Mutex { // Initializes owner_thread_id_ and critical_section_ in static mutexes. void ThreadSafeLazyInit(); - // Per http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx, + // Per https://blogs.msdn.microsoft.com/oldnewthing/20040223-00/?p=40503, // we assume that 0 is an invalid value for thread IDs. unsigned int owner_thread_id_; @@ -1706,7 +1826,7 @@ class GTEST_API_ Mutex { // by the linker. MutexType type_; long critical_section_init_phase_; // NOLINT - _RTL_CRITICAL_SECTION* critical_section_; + GTEST_CRITICAL_SECTION* critical_section_; GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex); }; @@ -1982,8 +2102,13 @@ class MutexBase { extern ::testing::internal::MutexBase mutex // Defines and statically (i.e. at link time) initializes a static mutex. -# define GTEST_DEFINE_STATIC_MUTEX_(mutex) \ - ::testing::internal::MutexBase mutex = { PTHREAD_MUTEX_INITIALIZER, false, pthread_t() } +// The initialization list here does not explicitly initialize each field, +// instead relying on default initialization for the unspecified fields. In +// particular, the owner_ field (a pthread_t) is not explicitly initialized. +// This allows initialization to work whether pthread_t is a scalar or struct. +// The flag -Wmissing-field-initializers must not be specified for this to work. +#define GTEST_DEFINE_STATIC_MUTEX_(mutex) \ + ::testing::internal::MutexBase mutex = {PTHREAD_MUTEX_INITIALIZER, false, 0} // The Mutex class can only be used for mutexes created at runtime. It // shares its API with MutexBase otherwise. @@ -2040,7 +2165,7 @@ extern "C" inline void DeleteThreadLocalValue(void* value_holder) { // Implements thread-local storage on pthreads-based systems. template -class ThreadLocal { +class GTEST_API_ ThreadLocal { public: ThreadLocal() : key_(CreateKey()), default_factory_(new DefaultValueHolderFactory()) {} @@ -2172,7 +2297,7 @@ class GTestMutexLock { typedef GTestMutexLock MutexLock; template -class ThreadLocal { +class GTEST_API_ ThreadLocal { public: ThreadLocal() : value_() {} explicit ThreadLocal(const T& value) : value_(value) {} @@ -2191,12 +2316,13 @@ class ThreadLocal { GTEST_API_ size_t GetThreadCount(); // Passing non-POD classes through ellipsis (...) crashes the ARM -// compiler and generates a warning in Sun Studio. The Nokia Symbian +// compiler and generates a warning in Sun Studio before 12u4. The Nokia Symbian // and the IBM XL C/C++ compiler try to instantiate a copy constructor // for objects passed through ellipsis (...), failing for uncopyable // objects. We define this to ensure that only POD is passed through // ellipsis on these systems. -#if defined(__SYMBIAN32__) || defined(__IBMCPP__) || defined(__SUNPRO_CC) +#if defined(__SYMBIAN32__) || defined(__IBMCPP__) || \ + (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5130) // We lose support for NULL detection where the compiler doesn't like // passing non-POD classes through ellipsis (...). # define GTEST_ELLIPSIS_NEEDS_POD_ 1 @@ -2222,6 +2348,13 @@ template const bool bool_constant::value; typedef bool_constant false_type; typedef bool_constant true_type; +template +struct is_same : public false_type {}; + +template +struct is_same : public true_type {}; + + template struct is_pointer : public false_type {}; @@ -2233,6 +2366,7 @@ struct IteratorTraits { typedef typename Iterator::value_type value_type; }; + template struct IteratorTraits { typedef T value_type; @@ -2364,7 +2498,7 @@ inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); } // Functions deprecated by MSVC 8.0. -GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996 /* deprecated function */) +GTEST_DISABLE_MSC_DEPRECATED_PUSH_() inline const char* StrNCpy(char* dest, const char* src, size_t n) { return strncpy(dest, src, n); @@ -2398,7 +2532,7 @@ inline int Close(int fd) { return close(fd); } inline const char* StrError(int errnum) { return strerror(errnum); } #endif inline const char* GetEnv(const char* name) { -#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE | GTEST_OS_WINDOWS_RT +#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT // We are on Windows CE, which has no environment variables. static_cast(name); // To prevent 'unused argument' warning. return NULL; @@ -2412,7 +2546,7 @@ inline const char* GetEnv(const char* name) { #endif } -GTEST_DISABLE_MSC_WARNINGS_POP_() +GTEST_DISABLE_MSC_DEPRECATED_POP_() #if GTEST_OS_WINDOWS_MOBILE // Windows CE has no C library. The abort() function is used in @@ -2528,15 +2662,15 @@ typedef TypeWithSize<8>::Int TimeInMillis; // Represents time in milliseconds. # define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name) # define GTEST_DECLARE_int32_(name) \ GTEST_API_ extern ::testing::internal::Int32 GTEST_FLAG(name) -#define GTEST_DECLARE_string_(name) \ +# define GTEST_DECLARE_string_(name) \ GTEST_API_ extern ::std::string GTEST_FLAG(name) // Macros for defining flags. -#define GTEST_DEFINE_bool_(name, default_val, doc) \ +# define GTEST_DEFINE_bool_(name, default_val, doc) \ GTEST_API_ bool GTEST_FLAG(name) = (default_val) -#define GTEST_DEFINE_int32_(name, default_val, doc) \ +# define GTEST_DEFINE_int32_(name, default_val, doc) \ GTEST_API_ ::testing::internal::Int32 GTEST_FLAG(name) = (default_val) -#define GTEST_DEFINE_string_(name, default_val, doc) \ +# define GTEST_DEFINE_string_(name, default_val, doc) \ GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val) #endif // !defined(GTEST_DECLARE_bool_) @@ -2550,7 +2684,7 @@ typedef TypeWithSize<8>::Int TimeInMillis; // Represents time in milliseconds. // Parses 'str' for a 32-bit signed integer. If successful, writes the result // to *value and returns true; otherwise leaves *value unchanged and returns // false. -// TODO(chandlerc): Find a better way to refactor flag and environment parsing +// FIXME: Find a better way to refactor flag and environment parsing // out of both gtest-port.cc and gtest.cc to avoid exporting this utility // function. bool ParseInt32(const Message& src_text, const char* str, Int32* value); @@ -2559,7 +2693,8 @@ bool ParseInt32(const Message& src_text, const char* str, Int32* value); // corresponding to the given Google Test flag. bool BoolFromGTestEnv(const char* flag, bool default_val); GTEST_API_ Int32 Int32FromGTestEnv(const char* flag, Int32 default_val); -std::string StringFromGTestEnv(const char* flag, const char* default_val); +std::string OutputFlagAlsoCheckEnvVar(); +const char* StringFromGTestEnv(const char* flag, const char* default_val); } // namespace internal } // namespace testing diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-string.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-string.h index 97f1a7fdd2c0..4c9b6262c3c1 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-string.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-string.h @@ -27,17 +27,17 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) -// -// The Google C++ Testing Framework (Google Test) +// The Google C++ Testing and Mocking Framework (Google Test) // // This header file declares the String class and functions used internally by // Google Test. They are subject to change without notice. They should not used // by code external to Google Test. // -// This header file is #included by . +// This header file is #included by gtest-internal.h. // It should not be #included by other files. +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-tuple.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-tuple.h index e9b405340a85..78a3a6a01fac 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-tuple.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-tuple.h @@ -30,11 +30,12 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) + // Implements a subset of TR1 tuple needed by Google Test and Google Mock. +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ @@ -42,7 +43,7 @@ // The compiler used in Symbian has a bug that prevents us from declaring the // tuple template as a friend (it complains that tuple is redefined). This -// hack bypasses the bug by declaring the members that should otherwise be +// bypasses the bug by declaring the members that should otherwise be // private as public. // Sun Studio versions < 12 also have the above bug. #if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590) diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-tuple.h.pump b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-tuple.h.pump index 429ddfeecaa7..bb626e049f08 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-tuple.h.pump +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-tuple.h.pump @@ -29,11 +29,12 @@ $$ This meta comment fixes auto-indentation in Emacs. }} // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) + // Implements a subset of TR1 tuple needed by Google Test and Google Mock. +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ @@ -41,7 +42,7 @@ $$ This meta comment fixes auto-indentation in Emacs. }} // The compiler used in Symbian has a bug that prevents us from declaring the // tuple template as a friend (it complains that tuple is redefined). This -// hack bypasses the bug by declaring the members that should otherwise be +// bypasses the bug by declaring the members that should otherwise be // private as public. // Sun Studio versions < 12 also have the above bug. #if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590) diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-type-util.h b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-type-util.h index e46f7cfcb483..28e411245361 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-type-util.h +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-type-util.h @@ -30,8 +30,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) + // Type utilities needed for implementing typed and type-parameterized // tests. This file is generated by a SCRIPT. DO NOT EDIT BY HAND! @@ -41,6 +40,8 @@ // Please contact googletestframework@googlegroups.com if you need // more. +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ @@ -57,6 +58,22 @@ namespace testing { namespace internal { +// Canonicalizes a given name with respect to the Standard C++ Library. +// This handles removing the inline namespace within `std` that is +// used by various standard libraries (e.g., `std::__1`). Names outside +// of namespace std are returned unmodified. +inline std::string CanonicalizeForStdLibVersioning(std::string s) { + static const char prefix[] = "std::__"; + if (s.compare(0, strlen(prefix), prefix) == 0) { + std::string::size_type end = s.find("::", strlen(prefix)); + if (end != s.npos) { + // Erase everything between the initial `std` and the second `::`. + s.erase(strlen("std"), end - strlen("std")); + } + } + return s; +} + // GetTypeName() returns a human-readable name of type T. // NB: This function is also used in Google Mock, so don't move it inside of // the typed-test-only section below. @@ -75,7 +92,7 @@ std::string GetTypeName() { char* const readable_name = __cxa_demangle(name, 0, 0, &status); const std::string name_str(status == 0 ? readable_name : name); free(readable_name); - return name_str; + return CanonicalizeForStdLibVersioning(name_str); # else return name; # endif // GTEST_HAS_CXXABI_H_ || __HP_aCC diff --git a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-type-util.h.pump b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-type-util.h.pump index 251fdf025b2c..0001a5d39dfa 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-type-util.h.pump +++ b/media/libvpx/libvpx/third_party/googletest/src/include/gtest/internal/gtest-type-util.h.pump @@ -28,8 +28,7 @@ $var n = 50 $$ Maximum length of type lists we want to support. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) + // Type utilities needed for implementing typed and type-parameterized // tests. This file is generated by a SCRIPT. DO NOT EDIT BY HAND! @@ -39,6 +38,8 @@ $var n = 50 $$ Maximum length of type lists we want to support. // Please contact googletestframework@googlegroups.com if you need // more. +// GOOGLETEST_CM0001 DO NOT DELETE + #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ @@ -55,6 +56,22 @@ $var n = 50 $$ Maximum length of type lists we want to support. namespace testing { namespace internal { +// Canonicalizes a given name with respect to the Standard C++ Library. +// This handles removing the inline namespace within `std` that is +// used by various standard libraries (e.g., `std::__1`). Names outside +// of namespace std are returned unmodified. +inline std::string CanonicalizeForStdLibVersioning(std::string s) { + static const char prefix[] = "std::__"; + if (s.compare(0, strlen(prefix), prefix) == 0) { + std::string::size_type end = s.find("::", strlen(prefix)); + if (end != s.npos) { + // Erase everything between the initial `std` and the second `::`. + s.erase(strlen("std"), end - strlen("std")); + } + } + return s; +} + // GetTypeName() returns a human-readable name of type T. // NB: This function is also used in Google Mock, so don't move it inside of // the typed-test-only section below. @@ -73,7 +90,7 @@ std::string GetTypeName() { char* const readable_name = __cxa_demangle(name, 0, 0, &status); const std::string name_str(status == 0 ? readable_name : name); free(readable_name); - return name_str; + return CanonicalizeForStdLibVersioning(name_str); # else return name; # endif // GTEST_HAS_CXXABI_H_ || __HP_aCC diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-all.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-all.cc index 0a9cee522333..b217a18006b0 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-all.cc +++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-all.cc @@ -26,10 +26,9 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // -// Author: mheule@google.com (Markus Heule) -// -// Google C++ Testing Framework (Google Test) +// Google C++ Testing and Mocking Framework (Google Test) // // Sometimes it's desirable to build Google Test by compiling a single file. // This file serves this purpose. diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-death-test.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-death-test.cc index a01a36983087..09083551612e 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-death-test.cc +++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-death-test.cc @@ -26,8 +26,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan), vladl@google.com (Vlad Losev) + // // This file implements death tests. @@ -62,26 +61,30 @@ # include # endif // GTEST_OS_QNX +# if GTEST_OS_FUCHSIA +# include +# include +# include +# include +# include +# endif // GTEST_OS_FUCHSIA + #endif // GTEST_HAS_DEATH_TEST #include "gtest/gtest-message.h" #include "gtest/internal/gtest-string.h" - -// Indicates that this translation unit is part of Google Test's -// implementation. It must come before gtest-internal-inl.h is -// included, or there will be a compiler error. This trick exists to -// prevent the accidental inclusion of gtest-internal-inl.h in the -// user's code. -#define GTEST_IMPLEMENTATION_ 1 #include "src/gtest-internal-inl.h" -#undef GTEST_IMPLEMENTATION_ namespace testing { // Constants. // The default death test style. -static const char kDefaultDeathTestStyle[] = "fast"; +// +// This is defined in internal/gtest-port.h as "fast", but can be overridden by +// a definition in internal/custom/gtest-port.h. The recommended value, which is +// used internally at Google, is "threadsafe". +static const char kDefaultDeathTestStyle[] = GTEST_DEFAULT_DEATH_TEST_STYLE; GTEST_DEFINE_string_( death_test_style, @@ -121,7 +124,7 @@ namespace internal { // Valid only for fast death tests. Indicates the code is running in the // child process of a fast style death test. -# if !GTEST_OS_WINDOWS +# if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA static bool g_in_fast_death_test_child = false; # endif @@ -131,10 +134,10 @@ static bool g_in_fast_death_test_child = false; // tests. IMPORTANT: This is an internal utility. Using it may break the // implementation of death tests. User code MUST NOT use it. bool InDeathTestChild() { -# if GTEST_OS_WINDOWS +# if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA - // On Windows, death tests are thread-safe regardless of the value of the - // death_test_style flag. + // On Windows and Fuchsia, death tests are thread-safe regardless of the value + // of the death_test_style flag. return !GTEST_FLAG(internal_run_death_test).empty(); # else @@ -154,7 +157,7 @@ ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) { // ExitedWithCode function-call operator. bool ExitedWithCode::operator()(int exit_status) const { -# if GTEST_OS_WINDOWS +# if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA return exit_status == exit_code_; @@ -162,10 +165,10 @@ bool ExitedWithCode::operator()(int exit_status) const { return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_; -# endif // GTEST_OS_WINDOWS +# endif // GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA } -# if !GTEST_OS_WINDOWS +# if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA // KilledBySignal constructor. KilledBySignal::KilledBySignal(int signum) : signum_(signum) { } @@ -182,7 +185,7 @@ bool KilledBySignal::operator()(int exit_status) const { # endif // defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_) return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_; } -# endif // !GTEST_OS_WINDOWS +# endif // !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA namespace internal { @@ -193,7 +196,7 @@ namespace internal { static std::string ExitSummary(int exit_code) { Message m; -# if GTEST_OS_WINDOWS +# if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA m << "Exited with exit status " << exit_code; @@ -209,7 +212,7 @@ static std::string ExitSummary(int exit_code) { m << " (core dumped)"; } # endif -# endif // GTEST_OS_WINDOWS +# endif // GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA return m.GetString(); } @@ -220,7 +223,7 @@ bool ExitedUnsuccessfully(int exit_status) { return !ExitedWithCode(0)(exit_status); } -# if !GTEST_OS_WINDOWS +# if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA // Generates a textual failure message when a death test finds more than // one thread running, or cannot determine the number of threads, prior // to executing the given statement. It is the responsibility of the @@ -229,13 +232,19 @@ static std::string DeathTestThreadWarning(size_t thread_count) { Message msg; msg << "Death tests use fork(), which is unsafe particularly" << " in a threaded context. For this test, " << GTEST_NAME_ << " "; - if (thread_count == 0) + if (thread_count == 0) { msg << "couldn't detect the number of threads."; - else + } else { msg << "detected " << thread_count << " threads."; + } + msg << " See " + "https://github.com/google/googletest/blob/master/googletest/docs/" + "advanced.md#death-tests-and-threads" + << " for more explanation and suggested solutions, especially if" + << " this is the last message you see before your test times out."; return msg.GetString(); } -# endif // !GTEST_OS_WINDOWS +# endif // !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA // Flag characters for reporting a death test that did not die. static const char kDeathTestLived = 'L'; @@ -243,6 +252,13 @@ static const char kDeathTestReturned = 'R'; static const char kDeathTestThrew = 'T'; static const char kDeathTestInternalError = 'I'; +#if GTEST_OS_FUCHSIA + +// File descriptor used for the pipe in the child process. +static const int kFuchsiaReadPipeFd = 3; + +#endif + // An enumeration describing all of the possible ways that a death test can // conclude. DIED means that the process died while executing the test // code; LIVED means that process lived beyond the end of the test code; @@ -250,7 +266,7 @@ static const char kDeathTestInternalError = 'I'; // statement, which is not allowed; THREW means that the test statement // returned control by throwing an exception. IN_PROGRESS means the test // has not yet concluded. -// TODO(vladl@google.com): Unify names and possibly values for +// FIXME: Unify names and possibly values for // AbortReason, DeathTestOutcome, and flag characters above. enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW }; @@ -259,7 +275,7 @@ enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW }; // message is propagated back to the parent process. Otherwise, the // message is simply printed to stderr. In either case, the program // then exits with status 1. -void DeathTestAbort(const std::string& message) { +static void DeathTestAbort(const std::string& message) { // On a POSIX system, this function may be called from a threadsafe-style // death test child process, which operates on a very small stack. Use // the heap for any additional non-minuscule memory requirements. @@ -563,7 +579,12 @@ bool DeathTestImpl::Passed(bool status_ok) { break; case DIED: if (status_ok) { +# if GTEST_USES_PCRE + // PCRE regexes support embedded NULs. + const bool matched = RE::PartialMatch(error_message, *regex()); +# else const bool matched = RE::PartialMatch(error_message.c_str(), *regex()); +# endif // GTEST_USES_PCRE if (matched) { success = true; } else { @@ -779,7 +800,200 @@ DeathTest::TestRole WindowsDeathTest::AssumeRole() { set_spawned(true); return OVERSEE_TEST; } -# else // We are not on Windows. + +# elif GTEST_OS_FUCHSIA + +class FuchsiaDeathTest : public DeathTestImpl { + public: + FuchsiaDeathTest(const char* a_statement, + const RE* a_regex, + const char* file, + int line) + : DeathTestImpl(a_statement, a_regex), file_(file), line_(line) {} + virtual ~FuchsiaDeathTest() { + zx_status_t status = zx_handle_close(child_process_); + GTEST_DEATH_TEST_CHECK_(status == ZX_OK); + status = zx_handle_close(port_); + GTEST_DEATH_TEST_CHECK_(status == ZX_OK); + } + + // All of these virtual functions are inherited from DeathTest. + virtual int Wait(); + virtual TestRole AssumeRole(); + + private: + // The name of the file in which the death test is located. + const char* const file_; + // The line number on which the death test is located. + const int line_; + + zx_handle_t child_process_ = ZX_HANDLE_INVALID; + zx_handle_t port_ = ZX_HANDLE_INVALID; +}; + +// Utility class for accumulating command-line arguments. +class Arguments { + public: + Arguments() { + args_.push_back(NULL); + } + + ~Arguments() { + for (std::vector::iterator i = args_.begin(); i != args_.end(); + ++i) { + free(*i); + } + } + void AddArgument(const char* argument) { + args_.insert(args_.end() - 1, posix::StrDup(argument)); + } + + template + void AddArguments(const ::std::vector& arguments) { + for (typename ::std::vector::const_iterator i = arguments.begin(); + i != arguments.end(); + ++i) { + args_.insert(args_.end() - 1, posix::StrDup(i->c_str())); + } + } + char* const* Argv() { + return &args_[0]; + } + + int size() { + return args_.size() - 1; + } + + private: + std::vector args_; +}; + +// Waits for the child in a death test to exit, returning its exit +// status, or 0 if no child process exists. As a side effect, sets the +// outcome data member. +int FuchsiaDeathTest::Wait() { + if (!spawned()) + return 0; + + // Register to wait for the child process to terminate. + zx_status_t status_zx; + status_zx = zx_object_wait_async(child_process_, + port_, + 0 /* key */, + ZX_PROCESS_TERMINATED, + ZX_WAIT_ASYNC_ONCE); + GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK); + + // Wait for it to terminate, or an exception to be received. + zx_port_packet_t packet; + status_zx = zx_port_wait(port_, ZX_TIME_INFINITE, &packet); + GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK); + + if (ZX_PKT_IS_EXCEPTION(packet.type)) { + // Process encountered an exception. Kill it directly rather than letting + // other handlers process the event. + status_zx = zx_task_kill(child_process_); + GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK); + + // Now wait for |child_process_| to terminate. + zx_signals_t signals = 0; + status_zx = zx_object_wait_one( + child_process_, ZX_PROCESS_TERMINATED, ZX_TIME_INFINITE, &signals); + GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK); + GTEST_DEATH_TEST_CHECK_(signals & ZX_PROCESS_TERMINATED); + } else { + // Process terminated. + GTEST_DEATH_TEST_CHECK_(ZX_PKT_IS_SIGNAL_ONE(packet.type)); + GTEST_DEATH_TEST_CHECK_(packet.signal.observed & ZX_PROCESS_TERMINATED); + } + + ReadAndInterpretStatusByte(); + + zx_info_process_t buffer; + status_zx = zx_object_get_info( + child_process_, + ZX_INFO_PROCESS, + &buffer, + sizeof(buffer), + nullptr, + nullptr); + GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK); + + GTEST_DEATH_TEST_CHECK_(buffer.exited); + set_status(buffer.return_code); + return status(); +} + +// The AssumeRole process for a Fuchsia death test. It creates a child +// process with the same executable as the current process to run the +// death test. The child process is given the --gtest_filter and +// --gtest_internal_run_death_test flags such that it knows to run the +// current death test only. +DeathTest::TestRole FuchsiaDeathTest::AssumeRole() { + const UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const TestInfo* const info = impl->current_test_info(); + const int death_test_index = info->result()->death_test_count(); + + if (flag != NULL) { + // ParseInternalRunDeathTestFlag() has performed all the necessary + // processing. + set_write_fd(kFuchsiaReadPipeFd); + return EXECUTE_TEST; + } + + CaptureStderr(); + // Flush the log buffers since the log streams are shared with the child. + FlushInfoLog(); + + // Build the child process command line. + const std::string filter_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" + + info->test_case_name() + "." + info->name(); + const std::string internal_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "=" + + file_ + "|" + + StreamableToString(line_) + "|" + + StreamableToString(death_test_index); + Arguments args; + args.AddArguments(GetInjectableArgvs()); + args.AddArgument(filter_flag.c_str()); + args.AddArgument(internal_flag.c_str()); + + // Build the pipe for communication with the child. + zx_status_t status; + zx_handle_t child_pipe_handle; + uint32_t type; + status = fdio_pipe_half(&child_pipe_handle, &type); + GTEST_DEATH_TEST_CHECK_(status >= 0); + set_read_fd(status); + + // Set the pipe handle for the child. + fdio_spawn_action_t add_handle_action = {}; + add_handle_action.action = FDIO_SPAWN_ACTION_ADD_HANDLE; + add_handle_action.h.id = PA_HND(type, kFuchsiaReadPipeFd); + add_handle_action.h.handle = child_pipe_handle; + + // Spawn the child process. + status = fdio_spawn_etc(ZX_HANDLE_INVALID, FDIO_SPAWN_CLONE_ALL, + args.Argv()[0], args.Argv(), nullptr, 1, + &add_handle_action, &child_process_, nullptr); + GTEST_DEATH_TEST_CHECK_(status == ZX_OK); + + // Create an exception port and attach it to the |child_process_|, to allow + // us to suppress the system default exception handler from firing. + status = zx_port_create(0, &port_); + GTEST_DEATH_TEST_CHECK_(status == ZX_OK); + status = zx_task_bind_exception_port( + child_process_, port_, 0 /* key */, 0 /*options */); + GTEST_DEATH_TEST_CHECK_(status == ZX_OK); + + set_spawned(true); + return OVERSEE_TEST; +} + +#else // We are neither on Windows, nor on Fuchsia. // ForkingDeathTest provides implementations for most of the abstract // methods of the DeathTest interface. Only the AssumeRole method is @@ -883,11 +1097,10 @@ class ExecDeathTest : public ForkingDeathTest { ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { } virtual TestRole AssumeRole(); private: - static ::std::vector - GetArgvsForDeathTestChildProcess() { - ::std::vector args = GetInjectableArgvs(); + static ::std::vector GetArgvsForDeathTestChildProcess() { + ::std::vector args = GetInjectableArgvs(); # if defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_) - ::std::vector extra_args = + ::std::vector extra_args = GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_(); args.insert(args.end(), extra_args.begin(), extra_args.end()); # endif // defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_) @@ -986,6 +1199,7 @@ static int ExecDeathTestChildMain(void* child_arg) { } # endif // !GTEST_OS_QNX +# if GTEST_HAS_CLONE // Two utility routines that together determine the direction the stack // grows. // This could be accomplished more elegantly by a single recursive @@ -995,20 +1209,22 @@ static int ExecDeathTestChildMain(void* child_arg) { // GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining // StackLowerThanAddress into StackGrowsDown, which then doesn't give // correct answer. -void StackLowerThanAddress(const void* ptr, bool* result) GTEST_NO_INLINE_; -void StackLowerThanAddress(const void* ptr, bool* result) { +static void StackLowerThanAddress(const void* ptr, + bool* result) GTEST_NO_INLINE_; +static void StackLowerThanAddress(const void* ptr, bool* result) { int dummy; *result = (&dummy < ptr); } // Make sure AddressSanitizer does not tamper with the stack here. GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ -bool StackGrowsDown() { +static bool StackGrowsDown() { int dummy; bool result; StackLowerThanAddress(&dummy, &result); return result; } +# endif // GTEST_HAS_CLONE // Spawns a child process with the same executable as the current process in // a thread-safe manner and instructs it to run the death test. The @@ -1200,6 +1416,13 @@ bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex, *test = new WindowsDeathTest(statement, regex, file, line); } +# elif GTEST_OS_FUCHSIA + + if (GTEST_FLAG(death_test_style) == "threadsafe" || + GTEST_FLAG(death_test_style) == "fast") { + *test = new FuchsiaDeathTest(statement, regex, file, line); + } + # else if (GTEST_FLAG(death_test_style) == "threadsafe") { @@ -1224,7 +1447,7 @@ bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex, // Recreates the pipe and event handles from the provided parameters, // signals the event, and returns a file descriptor wrapped around the pipe // handle. This function is called in the child process only. -int GetStatusFileDescriptor(unsigned int parent_process_id, +static int GetStatusFileDescriptor(unsigned int parent_process_id, size_t write_handle_as_size_t, size_t event_handle_as_size_t) { AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE, @@ -1235,7 +1458,7 @@ int GetStatusFileDescriptor(unsigned int parent_process_id, StreamableToString(parent_process_id)); } - // TODO(vladl@google.com): Replace the following check with a + // FIXME: Replace the following check with a // compile-time assertion when available. GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t)); @@ -1243,7 +1466,7 @@ int GetStatusFileDescriptor(unsigned int parent_process_id, reinterpret_cast(write_handle_as_size_t); HANDLE dup_write_handle; - // The newly initialized handle is accessible only in in the parent + // The newly initialized handle is accessible only in the parent // process. To obtain one accessible within the child, we need to use // DuplicateHandle. if (!::DuplicateHandle(parent_process_handle.Get(), write_handle, @@ -1320,6 +1543,16 @@ InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() { write_fd = GetStatusFileDescriptor(parent_process_id, write_handle_as_size_t, event_handle_as_size_t); + +# elif GTEST_OS_FUCHSIA + + if (fields.size() != 3 + || !ParseNaturalNumber(fields[1], &line) + || !ParseNaturalNumber(fields[2], &index)) { + DeathTestAbort("Bad --gtest_internal_run_death_test flag: " + + GTEST_FLAG(internal_run_death_test)); + } + # else if (fields.size() != 4 diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-filepath.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-filepath.cc index 0292dc11957e..a7e65c082a75 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-filepath.cc +++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-filepath.cc @@ -26,14 +26,12 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Authors: keith.ray@gmail.com (Keith Ray) -#include "gtest/gtest-message.h" #include "gtest/internal/gtest-filepath.h" -#include "gtest/internal/gtest-port.h" #include +#include "gtest/internal/gtest-port.h" +#include "gtest/gtest-message.h" #if GTEST_OS_WINDOWS_MOBILE # include @@ -48,6 +46,8 @@ # include // Some Linux distributions define PATH_MAX here. #endif // GTEST_OS_WINDOWS_MOBILE +#include "gtest/internal/gtest-string.h" + #if GTEST_OS_WINDOWS # define GTEST_PATH_MAX_ _MAX_PATH #elif defined(PATH_MAX) @@ -58,8 +58,6 @@ # define GTEST_PATH_MAX_ _POSIX_PATH_MAX #endif // GTEST_OS_WINDOWS -#include "gtest/internal/gtest-string.h" - namespace testing { namespace internal { @@ -130,7 +128,7 @@ FilePath FilePath::RemoveExtension(const char* extension) const { return *this; } -// Returns a pointer to the last occurence of a valid path separator in +// Returns a pointer to the last occurrence of a valid path separator in // the FilePath. On Windows, for example, both '/' and '\' are valid path // separators. Returns NULL if no path separator was found. const char* FilePath::FindLastPathSeparator() const { @@ -252,7 +250,7 @@ bool FilePath::DirectoryExists() const { // root directory per disk drive.) bool FilePath::IsRootDirectory() const { #if GTEST_OS_WINDOWS - // TODO(wan@google.com): on Windows a network share like + // FIXME: on Windows a network share like // \\server\share can be a root directory, although it cannot be the // current directory. Handle this properly. return pathname_.length() == 3 && IsAbsolutePath(); @@ -352,7 +350,7 @@ FilePath FilePath::RemoveTrailingPathSeparator() const { // Removes any redundant separators that might be in the pathname. // For example, "bar///foo" becomes "bar/foo". Does not eliminate other // redundancies that might be in a pathname involving "." or "..". -// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share). +// FIXME: handle Windows network shares (e.g. \\server\share). void FilePath::Normalize() { if (pathname_.c_str() == NULL) { pathname_ = ""; diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-internal-inl.h b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-internal-inl.h index ed8a682a964f..479004149b48 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-internal-inl.h +++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-internal-inl.h @@ -27,24 +27,13 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// Utility functions and classes used by the Google C++ testing framework. -// -// Author: wan@google.com (Zhanyong Wan) -// +// Utility functions and classes used by the Google C++ testing framework.// // This file contains purely Google Test's internal implementation. Please // DO NOT #INCLUDE IT IN A USER PROGRAM. #ifndef GTEST_SRC_GTEST_INTERNAL_INL_H_ #define GTEST_SRC_GTEST_INTERNAL_INL_H_ -// GTEST_IMPLEMENTATION_ is defined to 1 iff the current translation unit is -// part of Google Test's implementation; otherwise it's undefined. -#if !GTEST_IMPLEMENTATION_ -// If this file is included from the user's code, just say no. -# error "gtest-internal-inl.h is part of Google Test's internal implementation." -# error "It must not be included except by Google Test itself." -#endif // GTEST_IMPLEMENTATION_ - #ifndef _WIN32_WCE # include #endif // !_WIN32_WCE @@ -67,9 +56,12 @@ # include // NOLINT #endif // GTEST_OS_WINDOWS -#include "gtest/gtest.h" // NOLINT +#include "gtest/gtest.h" #include "gtest/gtest-spi.h" +GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ +/* class A needs to have dll-interface to be used by clients of class B */) + namespace testing { // Declares the flags. @@ -94,6 +86,7 @@ const char kFilterFlag[] = "filter"; const char kListTestsFlag[] = "list_tests"; const char kOutputFlag[] = "output"; const char kPrintTimeFlag[] = "print_time"; +const char kPrintUTF8Flag[] = "print_utf8"; const char kRandomSeedFlag[] = "random_seed"; const char kRepeatFlag[] = "repeat"; const char kShuffleFlag[] = "shuffle"; @@ -174,6 +167,7 @@ class GTestFlagSaver { list_tests_ = GTEST_FLAG(list_tests); output_ = GTEST_FLAG(output); print_time_ = GTEST_FLAG(print_time); + print_utf8_ = GTEST_FLAG(print_utf8); random_seed_ = GTEST_FLAG(random_seed); repeat_ = GTEST_FLAG(repeat); shuffle_ = GTEST_FLAG(shuffle); @@ -195,6 +189,7 @@ class GTestFlagSaver { GTEST_FLAG(list_tests) = list_tests_; GTEST_FLAG(output) = output_; GTEST_FLAG(print_time) = print_time_; + GTEST_FLAG(print_utf8) = print_utf8_; GTEST_FLAG(random_seed) = random_seed_; GTEST_FLAG(repeat) = repeat_; GTEST_FLAG(shuffle) = shuffle_; @@ -216,6 +211,7 @@ class GTestFlagSaver { bool list_tests_; std::string output_; bool print_time_; + bool print_utf8_; internal::Int32 random_seed_; internal::Int32 repeat_; bool shuffle_; @@ -426,7 +422,7 @@ class OsStackTraceGetterInterface { // in the trace. // skip_count - the number of top frames to be skipped; doesn't count // against max_depth. - virtual string CurrentStackTrace(int max_depth, int skip_count) = 0; + virtual std::string CurrentStackTrace(int max_depth, int skip_count) = 0; // UponLeavingGTest() should be called immediately before Google Test calls // user code. It saves some information about the current stack that @@ -446,10 +442,20 @@ class OsStackTraceGetter : public OsStackTraceGetterInterface { public: OsStackTraceGetter() {} - virtual string CurrentStackTrace(int max_depth, int skip_count); + virtual std::string CurrentStackTrace(int max_depth, int skip_count); virtual void UponLeavingGTest(); private: +#if GTEST_HAS_ABSL + Mutex mutex_; // Protects all internal state. + + // We save the stack frame below the frame that calls user code. + // We do this because the address of the frame immediately below + // the user code changes between the call to UponLeavingGTest() + // and any calls to the stack trace code from within the user code. + void* caller_frame_ = nullptr; +#endif // GTEST_HAS_ABSL + GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetter); }; @@ -664,13 +670,11 @@ class GTEST_API_ UnitTestImpl { tear_down_tc)->AddTestInfo(test_info); } -#if GTEST_HAS_PARAM_TEST // Returns ParameterizedTestCaseRegistry object used to keep track of // value-parameterized tests and instantiate and register them. internal::ParameterizedTestCaseRegistry& parameterized_test_registry() { return parameterized_test_registry_; } -#endif // GTEST_HAS_PARAM_TEST // Sets the TestCase object for the test that's currently running. void set_current_test_case(TestCase* a_current_test_case) { @@ -845,14 +849,12 @@ class GTEST_API_ UnitTestImpl { // shuffled order. std::vector test_case_indices_; -#if GTEST_HAS_PARAM_TEST // ParameterizedTestRegistry object used to register value-parameterized // tests. internal::ParameterizedTestCaseRegistry parameterized_test_registry_; // Indicates whether RegisterParameterizedTests() has been called already. bool parameterized_tests_registered_; -#endif // GTEST_HAS_PARAM_TEST // Index of the last death test case registered. Initially -1. int last_death_test_case_; @@ -992,7 +994,7 @@ bool ParseNaturalNumber(const ::std::string& str, Integer* number) { const bool parse_success = *end == '\0' && errno == 0; - // TODO(vladl@google.com): Convert this to compile time assertion when it is + // FIXME: Convert this to compile time assertion when it is // available. GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed)); @@ -1032,7 +1034,7 @@ class TestResultAccessor { #if GTEST_CAN_STREAM_RESULTS_ // Streams test results to the given port on the given host machine. -class GTEST_API_ StreamingListener : public EmptyTestEventListener { +class StreamingListener : public EmptyTestEventListener { public: // Abstract base class for writing strings to a socket. class AbstractSocketWriter { @@ -1040,21 +1042,19 @@ class GTEST_API_ StreamingListener : public EmptyTestEventListener { virtual ~AbstractSocketWriter() {} // Sends a string to the socket. - virtual void Send(const string& message) = 0; + virtual void Send(const std::string& message) = 0; // Closes the socket. virtual void CloseConnection() {} // Sends a string and a newline to the socket. - void SendLn(const string& message) { - Send(message + "\n"); - } + void SendLn(const std::string& message) { Send(message + "\n"); } }; // Concrete class for actually writing strings to a socket. class SocketWriter : public AbstractSocketWriter { public: - SocketWriter(const string& host, const string& port) + SocketWriter(const std::string& host, const std::string& port) : sockfd_(-1), host_name_(host), port_num_(port) { MakeConnection(); } @@ -1065,7 +1065,7 @@ class GTEST_API_ StreamingListener : public EmptyTestEventListener { } // Sends a string to the socket. - virtual void Send(const string& message) { + virtual void Send(const std::string& message) { GTEST_CHECK_(sockfd_ != -1) << "Send() can be called only when there is a connection."; @@ -1091,17 +1091,19 @@ class GTEST_API_ StreamingListener : public EmptyTestEventListener { } int sockfd_; // socket file descriptor - const string host_name_; - const string port_num_; + const std::string host_name_; + const std::string port_num_; GTEST_DISALLOW_COPY_AND_ASSIGN_(SocketWriter); }; // class SocketWriter // Escapes '=', '&', '%', and '\n' characters in str as "%xx". - static string UrlEncode(const char* str); + static std::string UrlEncode(const char* str); - StreamingListener(const string& host, const string& port) - : socket_writer_(new SocketWriter(host, port)) { Start(); } + StreamingListener(const std::string& host, const std::string& port) + : socket_writer_(new SocketWriter(host, port)) { + Start(); + } explicit StreamingListener(AbstractSocketWriter* socket_writer) : socket_writer_(socket_writer) { Start(); } @@ -1162,13 +1164,13 @@ class GTEST_API_ StreamingListener : public EmptyTestEventListener { private: // Sends the given message and a newline to the socket. - void SendLn(const string& message) { socket_writer_->SendLn(message); } + void SendLn(const std::string& message) { socket_writer_->SendLn(message); } // Called at the start of streaming to notify the receiver what // protocol we are using. void Start() { SendLn("gtest_streaming_protocol_version=1.0"); } - string FormatBool(bool value) { return value ? "1" : "0"; } + std::string FormatBool(bool value) { return value ? "1" : "0"; } const scoped_ptr socket_writer_; @@ -1180,4 +1182,6 @@ class GTEST_API_ StreamingListener : public EmptyTestEventListener { } // namespace internal } // namespace testing +GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 + #endif // GTEST_SRC_GTEST_INTERNAL_INL_H_ diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-port.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-port.cc index e5bf3dd2be4b..fecb5d11c212 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-port.cc +++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-port.cc @@ -26,8 +26,7 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) + #include "gtest/internal/gtest-port.h" @@ -63,19 +62,16 @@ # include #endif // GTEST_OS_AIX +#if GTEST_OS_FUCHSIA +# include +# include +#endif // GTEST_OS_FUCHSIA + #include "gtest/gtest-spi.h" #include "gtest/gtest-message.h" #include "gtest/internal/gtest-internal.h" #include "gtest/internal/gtest-string.h" - -// Indicates that this translation unit is part of Google Test's -// implementation. It must come before gtest-internal-inl.h is -// included, or there will be a compiler error. This trick exists to -// prevent the accidental inclusion of gtest-internal-inl.h in the -// user's code. -#define GTEST_IMPLEMENTATION_ 1 #include "src/gtest-internal-inl.h" -#undef GTEST_IMPLEMENTATION_ namespace testing { namespace internal { @@ -93,7 +89,7 @@ const int kStdErrFileno = STDERR_FILENO; namespace { template -T ReadProcFileField(const string& filename, int field) { +T ReadProcFileField(const std::string& filename, int field) { std::string dummy; std::ifstream file(filename.c_str()); while (field-- > 0) { @@ -107,7 +103,7 @@ T ReadProcFileField(const string& filename, int field) { // Returns the number of active threads, or 0 when there is an error. size_t GetThreadCount() { - const string filename = + const std::string filename = (Message() << "/proc/" << getpid() << "/stat").GetString(); return ReadProcFileField(filename, 19); } @@ -164,6 +160,25 @@ size_t GetThreadCount() { } } +#elif GTEST_OS_FUCHSIA + +size_t GetThreadCount() { + int dummy_buffer; + size_t avail; + zx_status_t status = zx_object_get_info( + zx_process_self(), + ZX_INFO_PROCESS_THREADS, + &dummy_buffer, + 0, + nullptr, + &avail); + if (status == ZX_OK) { + return avail; + } else { + return 0; + } +} + #else size_t GetThreadCount() { @@ -246,9 +261,9 @@ Mutex::Mutex() Mutex::~Mutex() { // Static mutexes are leaked intentionally. It is not thread-safe to try // to clean them up. - // TODO(yukawa): Switch to Slim Reader/Writer (SRW) Locks, which requires + // FIXME: Switch to Slim Reader/Writer (SRW) Locks, which requires // nothing to clean it up but is available only on Vista and later. - // http://msdn.microsoft.com/en-us/library/windows/desktop/aa904937.aspx + // https://docs.microsoft.com/en-us/windows/desktop/Sync/slim-reader-writer--srw--locks if (type_ == kDynamic) { ::DeleteCriticalSection(critical_section_); delete critical_section_; @@ -279,6 +294,43 @@ void Mutex::AssertHeld() { << "The current thread is not holding the mutex @" << this; } +namespace { + +// Use the RAII idiom to flag mem allocs that are intentionally never +// deallocated. The motivation is to silence the false positive mem leaks +// that are reported by the debug version of MS's CRT which can only detect +// if an alloc is missing a matching deallocation. +// Example: +// MemoryIsNotDeallocated memory_is_not_deallocated; +// critical_section_ = new CRITICAL_SECTION; +// +class MemoryIsNotDeallocated +{ + public: + MemoryIsNotDeallocated() : old_crtdbg_flag_(0) { +#ifdef _MSC_VER + old_crtdbg_flag_ = _CrtSetDbgFlag(_CRTDBG_REPORT_FLAG); + // Set heap allocation block type to _IGNORE_BLOCK so that MS debug CRT + // doesn't report mem leak if there's no matching deallocation. + _CrtSetDbgFlag(old_crtdbg_flag_ & ~_CRTDBG_ALLOC_MEM_DF); +#endif // _MSC_VER + } + + ~MemoryIsNotDeallocated() { +#ifdef _MSC_VER + // Restore the original _CRTDBG_ALLOC_MEM_DF flag + _CrtSetDbgFlag(old_crtdbg_flag_); +#endif // _MSC_VER + } + + private: + int old_crtdbg_flag_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(MemoryIsNotDeallocated); +}; + +} // namespace + // Initializes owner_thread_id_ and critical_section_ in static mutexes. void Mutex::ThreadSafeLazyInit() { // Dynamic mutexes are initialized in the constructor. @@ -289,7 +341,11 @@ void Mutex::ThreadSafeLazyInit() { // If critical_section_init_phase_ was 0 before the exchange, we // are the first to test it and need to perform the initialization. owner_thread_id_ = 0; - critical_section_ = new CRITICAL_SECTION; + { + // Use RAII to flag that following mem alloc is never deallocated. + MemoryIsNotDeallocated memory_is_not_deallocated; + critical_section_ = new CRITICAL_SECTION; + } ::InitializeCriticalSection(critical_section_); // Updates the critical_section_init_phase_ to 2 to signal // initialization complete. @@ -328,7 +384,7 @@ class ThreadWithParamSupport : public ThreadWithParamBase { Notification* thread_can_start) { ThreadMainParam* param = new ThreadMainParam(runnable, thread_can_start); DWORD thread_id; - // TODO(yukawa): Consider to use _beginthreadex instead. + // FIXME: Consider to use _beginthreadex instead. HANDLE thread_handle = ::CreateThread( NULL, // Default security. 0, // Default stack size. @@ -496,7 +552,7 @@ class ThreadLocalRegistryImpl { FALSE, thread_id); GTEST_CHECK_(thread != NULL); - // We need to to pass a valid thread ID pointer into CreateThread for it + // We need to pass a valid thread ID pointer into CreateThread for it // to work correctly under Win98. DWORD watcher_thread_id; HANDLE watcher_thread = ::CreateThread( @@ -531,7 +587,8 @@ class ThreadLocalRegistryImpl { // Returns map of thread local instances. static ThreadIdToThreadLocals* GetThreadLocalsMapLocked() { mutex_.AssertHeld(); - static ThreadIdToThreadLocals* map = new ThreadIdToThreadLocals; + MemoryIsNotDeallocated memory_is_not_deallocated; + static ThreadIdToThreadLocals* map = new ThreadIdToThreadLocals(); return map; } @@ -671,7 +728,7 @@ bool AtomMatchesChar(bool escaped, char pattern_char, char ch) { } // Helper function used by ValidateRegex() to format error messages. -std::string FormatRegexSyntaxError(const char* regex, int index) { +static std::string FormatRegexSyntaxError(const char* regex, int index) { return (Message() << "Syntax error at index " << index << " in simple regular expression \"" << regex << "\": ").GetString(); } @@ -680,7 +737,7 @@ std::string FormatRegexSyntaxError(const char* regex, int index) { // otherwise returns true. bool ValidateRegex(const char* regex) { if (regex == NULL) { - // TODO(wan@google.com): fix the source file location in the + // FIXME: fix the source file location in the // assertion failures to match where the regex is used in user // code. ADD_FAILURE() << "NULL is not a valid simple regular expression."; @@ -923,9 +980,10 @@ GTestLog::~GTestLog() { posix::Abort(); } } + // Disable Microsoft deprecation warnings for POSIX functions called from // this class (creat, dup, dup2, and close) -GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996) +GTEST_DISABLE_MSC_DEPRECATED_PUSH_() #if GTEST_HAS_STREAM_REDIRECTION @@ -1009,13 +1067,14 @@ class CapturedStream { GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream); }; -GTEST_DISABLE_MSC_WARNINGS_POP_() +GTEST_DISABLE_MSC_DEPRECATED_POP_() static CapturedStream* g_captured_stderr = NULL; static CapturedStream* g_captured_stdout = NULL; // Starts capturing an output stream (stdout/stderr). -void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) { +static void CaptureStream(int fd, const char* stream_name, + CapturedStream** stream) { if (*stream != NULL) { GTEST_LOG_(FATAL) << "Only one " << stream_name << " capturer can exist at a time."; @@ -1024,7 +1083,7 @@ void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) { } // Stops capturing the output stream and returns the captured string. -std::string GetCapturedStream(CapturedStream** captured_stream) { +static std::string GetCapturedStream(CapturedStream** captured_stream) { const std::string content = (*captured_stream)->GetCapturedString(); delete *captured_stream; @@ -1055,23 +1114,9 @@ std::string GetCapturedStderr() { #endif // GTEST_HAS_STREAM_REDIRECTION -std::string TempDir() { -#if GTEST_OS_WINDOWS_MOBILE - return "\\temp\\"; -#elif GTEST_OS_WINDOWS - const char* temp_dir = posix::GetEnv("TEMP"); - if (temp_dir == NULL || temp_dir[0] == '\0') - return "\\temp\\"; - else if (temp_dir[strlen(temp_dir) - 1] == '\\') - return temp_dir; - else - return std::string(temp_dir) + "\\"; -#elif GTEST_OS_LINUX_ANDROID - return "/sdcard/"; -#else - return "/tmp/"; -#endif // GTEST_OS_WINDOWS_MOBILE -} + + + size_t GetFileSize(FILE* file) { fseek(file, 0, SEEK_END); @@ -1101,22 +1146,36 @@ std::string ReadEntireFile(FILE* file) { } #if GTEST_HAS_DEATH_TEST +static const std::vector* g_injected_test_argvs = NULL; // Owned. -static const ::std::vector* g_injected_test_argvs = - NULL; // Owned. - -void SetInjectableArgvs(const ::std::vector* argvs) { - if (g_injected_test_argvs != argvs) - delete g_injected_test_argvs; - g_injected_test_argvs = argvs; -} - -const ::std::vector& GetInjectableArgvs() { +std::vector GetInjectableArgvs() { if (g_injected_test_argvs != NULL) { return *g_injected_test_argvs; } return GetArgvs(); } + +void SetInjectableArgvs(const std::vector* new_argvs) { + if (g_injected_test_argvs != new_argvs) delete g_injected_test_argvs; + g_injected_test_argvs = new_argvs; +} + +void SetInjectableArgvs(const std::vector& new_argvs) { + SetInjectableArgvs( + new std::vector(new_argvs.begin(), new_argvs.end())); +} + +#if GTEST_HAS_GLOBAL_STRING +void SetInjectableArgvs(const std::vector< ::string>& new_argvs) { + SetInjectableArgvs( + new std::vector(new_argvs.begin(), new_argvs.end())); +} +#endif // GTEST_HAS_GLOBAL_STRING + +void ClearInjectableArgvs() { + delete g_injected_test_argvs; + g_injected_test_argvs = NULL; +} #endif // GTEST_HAS_DEATH_TEST #if GTEST_OS_WINDOWS_MOBILE @@ -1191,11 +1250,12 @@ bool ParseInt32(const Message& src_text, const char* str, Int32* value) { bool BoolFromGTestEnv(const char* flag, bool default_value) { #if defined(GTEST_GET_BOOL_FROM_ENV_) return GTEST_GET_BOOL_FROM_ENV_(flag, default_value); -#endif // defined(GTEST_GET_BOOL_FROM_ENV_) +#else const std::string env_var = FlagToEnvVar(flag); const char* const string_value = posix::GetEnv(env_var.c_str()); return string_value == NULL ? default_value : strcmp(string_value, "0") != 0; +#endif // defined(GTEST_GET_BOOL_FROM_ENV_) } // Reads and returns a 32-bit integer stored in the environment @@ -1204,7 +1264,7 @@ bool BoolFromGTestEnv(const char* flag, bool default_value) { Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) { #if defined(GTEST_GET_INT32_FROM_ENV_) return GTEST_GET_INT32_FROM_ENV_(flag, default_value); -#endif // defined(GTEST_GET_INT32_FROM_ENV_) +#else const std::string env_var = FlagToEnvVar(flag); const char* const string_value = posix::GetEnv(env_var.c_str()); if (string_value == NULL) { @@ -1222,37 +1282,36 @@ Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) { } return result; +#endif // defined(GTEST_GET_INT32_FROM_ENV_) +} + +// As a special case for the 'output' flag, if GTEST_OUTPUT is not +// set, we look for XML_OUTPUT_FILE, which is set by the Bazel build +// system. The value of XML_OUTPUT_FILE is a filename without the +// "xml:" prefix of GTEST_OUTPUT. +// Note that this is meant to be called at the call site so it does +// not check that the flag is 'output' +// In essence this checks an env variable called XML_OUTPUT_FILE +// and if it is set we prepend "xml:" to its value, if it not set we return "" +std::string OutputFlagAlsoCheckEnvVar(){ + std::string default_value_for_output_flag = ""; + const char* xml_output_file_env = posix::GetEnv("XML_OUTPUT_FILE"); + if (NULL != xml_output_file_env) { + default_value_for_output_flag = std::string("xml:") + xml_output_file_env; + } + return default_value_for_output_flag; } // Reads and returns the string environment variable corresponding to // the given flag; if it's not set, returns default_value. -std::string StringFromGTestEnv(const char* flag, const char* default_value) { +const char* StringFromGTestEnv(const char* flag, const char* default_value) { #if defined(GTEST_GET_STRING_FROM_ENV_) return GTEST_GET_STRING_FROM_ENV_(flag, default_value); -#endif // defined(GTEST_GET_STRING_FROM_ENV_) +#else const std::string env_var = FlagToEnvVar(flag); - const char* value = posix::GetEnv(env_var.c_str()); - if (value != NULL) { - return value; - } - - // As a special case for the 'output' flag, if GTEST_OUTPUT is not - // set, we look for XML_OUTPUT_FILE, which is set by the Bazel build - // system. The value of XML_OUTPUT_FILE is a filename without the - // "xml:" prefix of GTEST_OUTPUT. - // - // The net priority order after flag processing is thus: - // --gtest_output command line flag - // GTEST_OUTPUT environment variable - // XML_OUTPUT_FILE environment variable - // 'default_value' - if (strcmp(flag, "output") == 0) { - value = posix::GetEnv("XML_OUTPUT_FILE"); - if (value != NULL) { - return std::string("xml:") + value; - } - } - return default_value; + const char* const value = posix::GetEnv(env_var.c_str()); + return value == NULL ? default_value : value; +#endif // defined(GTEST_GET_STRING_FROM_ENV_) } } // namespace internal diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-printers.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-printers.cc index a2df412f8a20..b5022549f902 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-printers.cc +++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-printers.cc @@ -26,10 +26,9 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) -// Google Test - The Google C++ Testing Framework + +// Google Test - The Google C++ Testing and Mocking Framework // // This file implements a universal value printer that can print a // value of any type T: @@ -43,12 +42,13 @@ // defines Foo. #include "gtest/gtest-printers.h" -#include #include +#include #include #include // NOLINT #include #include "gtest/internal/gtest-port.h" +#include "src/gtest-internal-inl.h" namespace testing { @@ -89,7 +89,7 @@ void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count, // If the object size is bigger than kThreshold, we'll have to omit // some details by printing only the first and the last kChunkSize // bytes. - // TODO(wan): let the user control the threshold using a flag. + // FIXME: let the user control the threshold using a flag. if (count < kThreshold) { PrintByteSegmentInObjectTo(obj_bytes, 0, count, os); } else { @@ -123,7 +123,7 @@ namespace internal { // Depending on the value of a char (or wchar_t), we print it in one // of three formats: // - as is if it's a printable ASCII (e.g. 'a', '2', ' '), -// - as a hexidecimal escape sequence (e.g. '\x7F'), or +// - as a hexadecimal escape sequence (e.g. '\x7F'), or // - as a special escape sequence (e.g. '\r', '\n'). enum CharFormat { kAsIs, @@ -180,7 +180,10 @@ static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) { *os << static_cast(c); return kAsIs; } else { - *os << "\\x" + String::FormatHexInt(static_cast(c)); + ostream::fmtflags flags = os->flags(); + *os << "\\x" << std::hex << std::uppercase + << static_cast(static_cast(c)); + os->flags(flags); return kHexEscape; } } @@ -227,7 +230,7 @@ void PrintCharAndCodeTo(Char c, ostream* os) { return; *os << " (" << static_cast(c); - // For more convenience, we print c's code again in hexidecimal, + // For more convenience, we print c's code again in hexadecimal, // unless c was already printed in the form '\x##' or the code is in // [1, 9]. if (format == kHexEscape || (1 <= c && c <= 9)) { @@ -259,11 +262,12 @@ template GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ -static void PrintCharsAsStringTo( +static CharFormat PrintCharsAsStringTo( const CharType* begin, size_t len, ostream* os) { const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\""; *os << kQuoteBegin; bool is_previous_hex = false; + CharFormat print_format = kAsIs; for (size_t index = 0; index < len; ++index) { const CharType cur = begin[index]; if (is_previous_hex && IsXDigit(cur)) { @@ -273,8 +277,13 @@ static void PrintCharsAsStringTo( *os << "\" " << kQuoteBegin; } is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape; + // Remember if any characters required hex escaping. + if (is_previous_hex) { + print_format = kHexEscape; + } } *os << "\""; + return print_format; } // Prints a (const) char/wchar_t array of 'len' elements, starting at address @@ -339,20 +348,95 @@ void PrintTo(const wchar_t* s, ostream* os) { *os << "NULL"; } else { *os << ImplicitCast_(s) << " pointing to "; - PrintCharsAsStringTo(s, std::wcslen(s), os); + PrintCharsAsStringTo(s, wcslen(s), os); } } #endif // wchar_t is native +namespace { + +bool ContainsUnprintableControlCodes(const char* str, size_t length) { + const unsigned char *s = reinterpret_cast(str); + + for (size_t i = 0; i < length; i++) { + unsigned char ch = *s++; + if (std::iscntrl(ch)) { + switch (ch) { + case '\t': + case '\n': + case '\r': + break; + default: + return true; + } + } + } + return false; +} + +bool IsUTF8TrailByte(unsigned char t) { return 0x80 <= t && t<= 0xbf; } + +bool IsValidUTF8(const char* str, size_t length) { + const unsigned char *s = reinterpret_cast(str); + + for (size_t i = 0; i < length;) { + unsigned char lead = s[i++]; + + if (lead <= 0x7f) { + continue; // single-byte character (ASCII) 0..7F + } + if (lead < 0xc2) { + return false; // trail byte or non-shortest form + } else if (lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i])) { + ++i; // 2-byte character + } else if (0xe0 <= lead && lead <= 0xef && (i + 2) <= length && + IsUTF8TrailByte(s[i]) && + IsUTF8TrailByte(s[i + 1]) && + // check for non-shortest form and surrogate + (lead != 0xe0 || s[i] >= 0xa0) && + (lead != 0xed || s[i] < 0xa0)) { + i += 2; // 3-byte character + } else if (0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length && + IsUTF8TrailByte(s[i]) && + IsUTF8TrailByte(s[i + 1]) && + IsUTF8TrailByte(s[i + 2]) && + // check for non-shortest form + (lead != 0xf0 || s[i] >= 0x90) && + (lead != 0xf4 || s[i] < 0x90)) { + i += 3; // 4-byte character + } else { + return false; + } + } + return true; +} + +void ConditionalPrintAsText(const char* str, size_t length, ostream* os) { + if (!ContainsUnprintableControlCodes(str, length) && + IsValidUTF8(str, length)) { + *os << "\n As Text: \"" << str << "\""; + } +} + +} // anonymous namespace + // Prints a ::string object. #if GTEST_HAS_GLOBAL_STRING void PrintStringTo(const ::string& s, ostream* os) { - PrintCharsAsStringTo(s.data(), s.size(), os); + if (PrintCharsAsStringTo(s.data(), s.size(), os) == kHexEscape) { + if (GTEST_FLAG(print_utf8)) { + ConditionalPrintAsText(s.data(), s.size(), os); + } + } } #endif // GTEST_HAS_GLOBAL_STRING void PrintStringTo(const ::std::string& s, ostream* os) { - PrintCharsAsStringTo(s.data(), s.size(), os); + if (PrintCharsAsStringTo(s.data(), s.size(), os) == kHexEscape) { + if (GTEST_FLAG(print_utf8)) { + ConditionalPrintAsText(s.data(), s.size(), os); + } + } } // Prints a ::wstring object. diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-test-part.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-test-part.cc index fb0e35425e1e..c88860d92385 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-test-part.cc +++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-test-part.cc @@ -26,21 +26,12 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // -// Author: mheule@google.com (Markus Heule) -// -// The Google C++ Testing Framework (Google Test) +// The Google C++ Testing and Mocking Framework (Google Test) #include "gtest/gtest-test-part.h" - -// Indicates that this translation unit is part of Google Test's -// implementation. It must come before gtest-internal-inl.h is -// included, or there will be a compiler error. This trick exists to -// prevent the accidental inclusion of gtest-internal-inl.h in the -// user's code. -#define GTEST_IMPLEMENTATION_ 1 #include "src/gtest-internal-inl.h" -#undef GTEST_IMPLEMENTATION_ namespace testing { diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-typed-test.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-typed-test.cc index df1eef4754ef..1dc2ad38bab9 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/src/gtest-typed-test.cc +++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest-typed-test.cc @@ -26,10 +26,10 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author: wan@google.com (Zhanyong Wan) + #include "gtest/gtest-typed-test.h" + #include "gtest/gtest.h" namespace testing { diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest.cc index 5a8932c73e36..96b07c68abb0 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/src/gtest.cc +++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest.cc @@ -26,10 +26,9 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // -// Author: wan@google.com (Zhanyong Wan) -// -// The Google C++ Testing Framework (Google Test) +// The Google C++ Testing and Mocking Framework (Google Test) #include "gtest/gtest.h" #include "gtest/internal/custom/gtest.h" @@ -55,7 +54,7 @@ #if GTEST_OS_LINUX -// TODO(kenton@google.com): Use autoconf to detect availability of +// FIXME: Use autoconf to detect availability of // gettimeofday(). # define GTEST_HAS_GETTIMEOFDAY_ 1 @@ -94,9 +93,9 @@ # if GTEST_OS_WINDOWS_MINGW // MinGW has gettimeofday() but not _ftime64(). -// TODO(kenton@google.com): Use autoconf to detect availability of +// FIXME: Use autoconf to detect availability of // gettimeofday(). -// TODO(kenton@google.com): There are other ways to get the time on +// FIXME: There are other ways to get the time on // Windows, like GetTickCount() or GetSystemTimeAsFileTime(). MinGW // supports these. consider using them instead. # define GTEST_HAS_GETTIMEOFDAY_ 1 @@ -111,7 +110,7 @@ #else // Assume other platforms have gettimeofday(). -// TODO(kenton@google.com): Use autoconf to detect availability of +// FIXME: Use autoconf to detect availability of // gettimeofday(). # define GTEST_HAS_GETTIMEOFDAY_ 1 @@ -133,19 +132,25 @@ # include // NOLINT #endif -// Indicates that this translation unit is part of Google Test's -// implementation. It must come before gtest-internal-inl.h is -// included, or there will be a compiler error. This trick is to -// prevent a user from accidentally including gtest-internal-inl.h in -// his code. -#define GTEST_IMPLEMENTATION_ 1 #include "src/gtest-internal-inl.h" -#undef GTEST_IMPLEMENTATION_ #if GTEST_OS_WINDOWS # define vsnprintf _vsnprintf #endif // GTEST_OS_WINDOWS +#if GTEST_OS_MAC +#ifndef GTEST_OS_IOS +#include +#endif +#endif + +#if GTEST_HAS_ABSL +#include "absl/debugging/failure_signal_handler.h" +#include "absl/debugging/stacktrace.h" +#include "absl/debugging/symbolize.h" +#include "absl/strings/str_cat.h" +#endif // GTEST_HAS_ABSL + namespace testing { using internal::CountIf; @@ -167,8 +172,10 @@ static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*"; // A test filter that matches everything. static const char kUniversalFilter[] = "*"; -// The default output file for XML output. -static const char kDefaultOutputFile[] = "test_detail.xml"; +// The default output format. +static const char kDefaultOutputFormat[] = "xml"; +// The default output file. +static const char kDefaultOutputFile[] = "test_detail"; // The environment variable name for the test shard index. static const char kTestShardIndex[] = "GTEST_SHARD_INDEX"; @@ -187,15 +194,31 @@ const char kStackTraceMarker[] = "\nStack trace:\n"; // specified on the command line. bool g_help_flag = false; +// Utilty function to Open File for Writing +static FILE* OpenFileForWriting(const std::string& output_file) { + FILE* fileout = NULL; + FilePath output_file_path(output_file); + FilePath output_dir(output_file_path.RemoveFileName()); + + if (output_dir.CreateDirectoriesRecursively()) { + fileout = posix::FOpen(output_file.c_str(), "w"); + } + if (fileout == NULL) { + GTEST_LOG_(FATAL) << "Unable to open file \"" << output_file << "\""; + } + return fileout; +} + } // namespace internal +// Bazel passes in the argument to '--test_filter' via the TESTBRIDGE_TEST_ONLY +// environment variable. static const char* GetDefaultFilter() { -#ifdef GTEST_TEST_FILTER_ENV_VAR_ - const char* const testbridge_test_only = getenv(GTEST_TEST_FILTER_ENV_VAR_); + const char* const testbridge_test_only = + internal::posix::GetEnv("TESTBRIDGE_TEST_ONLY"); if (testbridge_test_only != NULL) { return testbridge_test_only; } -#endif // GTEST_TEST_FILTER_ENV_VAR_ return kUniversalFilter; } @@ -232,15 +255,28 @@ GTEST_DEFINE_string_( "exclude). A test is run if it matches one of the positive " "patterns and does not match any of the negative patterns."); +GTEST_DEFINE_bool_( + install_failure_signal_handler, + internal::BoolFromGTestEnv("install_failure_signal_handler", false), + "If true and supported on the current platform, " GTEST_NAME_ " should " + "install a signal handler that dumps debugging information when fatal " + "signals are raised."); + GTEST_DEFINE_bool_(list_tests, false, "List all tests without running them."); +// The net priority order after flag processing is thus: +// --gtest_output command line flag +// GTEST_OUTPUT environment variable +// XML_OUTPUT_FILE environment variable +// '' GTEST_DEFINE_string_( output, - internal::StringFromGTestEnv("output", ""), - "A format (currently must be \"xml\"), optionally followed " - "by a colon and an output file name or directory. A directory " - "is indicated by a trailing pathname separator. " + internal::StringFromGTestEnv("output", + internal::OutputFlagAlsoCheckEnvVar().c_str()), + "A format (defaults to \"xml\" but can be specified to be \"json\"), " + "optionally followed by a colon and an output file name or directory. " + "A directory is indicated by a trailing pathname separator. " "Examples: \"xml:filename.xml\", \"xml::directoryname/\". " "If a directory is specified, output files will be created " "within that directory, with file-names based on the test " @@ -253,6 +289,12 @@ GTEST_DEFINE_bool_( "True iff " GTEST_NAME_ " should display elapsed time in text output."); +GTEST_DEFINE_bool_( + print_utf8, + internal::BoolFromGTestEnv("print_utf8", true), + "True iff " GTEST_NAME_ + " prints UTF8 characters as text."); + GTEST_DEFINE_int32_( random_seed, internal::Int32FromGTestEnv("random_seed", 0), @@ -294,7 +336,7 @@ GTEST_DEFINE_bool_( internal::BoolFromGTestEnv("throw_on_failure", false), "When this flag is specified, a failed assertion will throw an exception " "if exceptions are enabled or exit the program with a non-zero code " - "otherwise."); + "otherwise. For use with an external test framework."); #if GTEST_USE_OWN_FLAGFILE_FLAG_ GTEST_DEFINE_string_( @@ -308,10 +350,10 @@ namespace internal { // Generates a random number from [0, range), using a Linear // Congruential Generator (LCG). Crashes if 'range' is 0 or greater // than kMaxRange. -GTEST_ATTRIBUTE_NO_SANITIZE_UNSIGNED_OVERFLOW_ UInt32 Random::Generate(UInt32 range) { // These constants are the same as are used in glibc's rand(3). - state_ = (1103515245U*state_ + 12345U) % kMaxRange; + // Use wider types than necessary to prevent unsigned overflow diagnostics. + state_ = static_cast(1103515245ULL*state_ + 12345U) % kMaxRange; GTEST_CHECK_(range > 0) << "Cannot generate a number in the range [0, 0)."; @@ -385,12 +427,15 @@ void AssertHelper::operator=(const Message& message) const { GTEST_API_ GTEST_DEFINE_STATIC_MUTEX_(g_linked_ptr_mutex); // A copy of all command line arguments. Set by InitGoogleTest(). -::std::vector g_argvs; +static ::std::vector g_argvs; -const ::std::vector& GetArgvs() { +::std::vector GetArgvs() { #if defined(GTEST_CUSTOM_GET_ARGVS_) - return GTEST_CUSTOM_GET_ARGVS_(); -#else // defined(GTEST_CUSTOM_GET_ARGVS_) + // GTEST_CUSTOM_GET_ARGVS_() may return a container of std::string or + // ::string. This code converts it to the appropriate type. + const auto& custom = GTEST_CUSTOM_GET_ARGVS_(); + return ::std::vector(custom.begin(), custom.end()); +#else // defined(GTEST_CUSTOM_GET_ARGVS_) return g_argvs; #endif // defined(GTEST_CUSTOM_GET_ARGVS_) } @@ -414,8 +459,6 @@ FilePath GetCurrentExecutableName() { // Returns the output format, or "" for normal printed output. std::string UnitTestOptions::GetOutputFormat() { const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); - if (gtest_output_flag == NULL) return std::string(""); - const char* const colon = strchr(gtest_output_flag, ':'); return (colon == NULL) ? std::string(gtest_output_flag) : @@ -426,19 +469,22 @@ std::string UnitTestOptions::GetOutputFormat() { // was explicitly specified. std::string UnitTestOptions::GetAbsolutePathToOutputFile() { const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); - if (gtest_output_flag == NULL) - return ""; + + std::string format = GetOutputFormat(); + if (format.empty()) + format = std::string(kDefaultOutputFormat); const char* const colon = strchr(gtest_output_flag, ':'); if (colon == NULL) - return internal::FilePath::ConcatPaths( + return internal::FilePath::MakeFileName( internal::FilePath( UnitTest::GetInstance()->original_working_dir()), - internal::FilePath(kDefaultOutputFile)).string(); + internal::FilePath(kDefaultOutputFile), 0, + format.c_str()).string(); internal::FilePath output_name(colon + 1); if (!output_name.IsAbsolutePath()) - // TODO(wan@google.com): on Windows \some\path is not an absolute + // FIXME: on Windows \some\path is not an absolute // path (as its meaning depends on the current drive), yet the // following logic for turning it into an absolute path is wrong. // Fix it. @@ -629,12 +675,12 @@ extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId(); // This predicate-formatter checks that 'results' contains a test part // failure of the given type and that the failure message contains the // given substring. -AssertionResult HasOneFailure(const char* /* results_expr */, - const char* /* type_expr */, - const char* /* substr_expr */, - const TestPartResultArray& results, - TestPartResult::Type type, - const string& substr) { +static AssertionResult HasOneFailure(const char* /* results_expr */, + const char* /* type_expr */, + const char* /* substr_expr */, + const TestPartResultArray& results, + TestPartResult::Type type, + const std::string& substr) { const std::string expected(type == TestPartResult::kFatalFailure ? "1 fatal failure" : "1 non-fatal failure"); @@ -668,13 +714,10 @@ AssertionResult HasOneFailure(const char* /* results_expr */, // The constructor of SingleFailureChecker remembers where to look up // test part results, what type of failure we expect, and what // substring the failure message should contain. -SingleFailureChecker:: SingleFailureChecker( - const TestPartResultArray* results, - TestPartResult::Type type, - const string& substr) - : results_(results), - type_(type), - substr_(substr) {} +SingleFailureChecker::SingleFailureChecker(const TestPartResultArray* results, + TestPartResult::Type type, + const std::string& substr) + : results_(results), type_(type), substr_(substr) {} // The destructor of SingleFailureChecker verifies that the given // TestPartResultArray contains exactly one failure that has the given @@ -815,7 +858,7 @@ TimeInMillis GetTimeInMillis() { SYSTEMTIME now_systime; FILETIME now_filetime; ULARGE_INTEGER now_int64; - // TODO(kenton@google.com): Shouldn't this just use + // FIXME: Shouldn't this just use // GetSystemTimeAsFileTime()? GetSystemTime(&now_systime); if (SystemTimeToFileTime(&now_systime, &now_filetime)) { @@ -831,11 +874,11 @@ TimeInMillis GetTimeInMillis() { // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996 // (deprecated function) there. - // TODO(kenton@google.com): Use GetTickCount()? Or use + // FIXME: Use GetTickCount()? Or use // SystemTimeToFileTime() - GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996) + GTEST_DISABLE_MSC_DEPRECATED_PUSH_() _ftime64(&now); - GTEST_DISABLE_MSC_WARNINGS_POP_() + GTEST_DISABLE_MSC_DEPRECATED_POP_() return static_cast(now.time) * 1000 + now.millitm; #elif GTEST_HAS_GETTIMEOFDAY_ @@ -1172,7 +1215,7 @@ class Hunk { // Print a unified diff header for one hunk. // The format is // "@@ -, +, @@" - // where the left/right parts are ommitted if unnecessary. + // where the left/right parts are omitted if unnecessary. void PrintHeader(std::ostream* ss) const { *ss << "@@ "; if (removes_) { @@ -1316,13 +1359,14 @@ AssertionResult EqFailure(const char* lhs_expression, const std::string& rhs_value, bool ignoring_case) { Message msg; - msg << " Expected: " << lhs_expression; + msg << "Expected equality of these values:"; + msg << "\n " << lhs_expression; if (lhs_value != lhs_expression) { - msg << "\n Which is: " << lhs_value; + msg << "\n Which is: " << lhs_value; } - msg << "\nTo be equal to: " << rhs_expression; + msg << "\n " << rhs_expression; if (rhs_value != rhs_expression) { - msg << "\n Which is: " << rhs_value; + msg << "\n Which is: " << rhs_value; } if (ignoring_case) { @@ -1369,7 +1413,7 @@ AssertionResult DoubleNearPredFormat(const char* expr1, const double diff = fabs(val1 - val2); if (diff <= abs_error) return AssertionSuccess(); - // TODO(wan): do not print the value of an expression if it's + // FIXME: do not print the value of an expression if it's // already a literal. return AssertionFailure() << "The difference between " << expr1 << " and " << expr2 @@ -1664,7 +1708,7 @@ namespace { AssertionResult HRESULTFailureHelper(const char* expr, const char* expected, long hr) { // NOLINT -# if GTEST_OS_WINDOWS_MOBILE +# if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_TV_TITLE // Windows CE doesn't support FormatMessage. const char error_text[] = ""; @@ -1721,7 +1765,7 @@ AssertionResult IsHRESULTFailure(const char* expr, long hr) { // NOLINT // Utility functions for encoding Unicode text (wide strings) in // UTF-8. -// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8 +// A Unicode code-point can have up to 21 bits, and is encoded in UTF-8 // like this: // // Code-point length Encoding @@ -1785,7 +1829,7 @@ std::string CodePointToUtf8(UInt32 code_point) { return str; } -// The following two functions only make sense if the the system +// The following two functions only make sense if the system // uses UTF-16 for wide string encoding. All supported systems // with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16. @@ -2097,13 +2141,8 @@ static const char* const kReservedTestSuiteAttributes[] = { // The list of reserved attributes used in the element of XML output. static const char* const kReservedTestCaseAttributes[] = { - "classname", - "name", - "status", - "time", - "type_param", - "value_param" -}; + "classname", "name", "status", "time", + "type_param", "value_param", "file", "line"}; template std::vector ArrayAsVector(const char* const (&array)[kSize]) { @@ -2139,8 +2178,9 @@ static std::string FormatWordList(const std::vector& words) { return word_list.GetString(); } -bool ValidateTestPropertyName(const std::string& property_name, - const std::vector& reserved_names) { +static bool ValidateTestPropertyName( + const std::string& property_name, + const std::vector& reserved_names) { if (std::find(reserved_names.begin(), reserved_names.end(), property_name) != reserved_names.end()) { ADD_FAILURE() << "Reserved key used in RecordProperty(): " << property_name @@ -2437,6 +2477,8 @@ Result HandleExceptionsInMethodIfSupported( #if GTEST_HAS_EXCEPTIONS try { return HandleSehExceptionsInMethodIfSupported(object, method, location); + } catch (const AssertionException&) { // NOLINT + // This failure was reported already. } catch (const internal::GoogleTestFailureException&) { // NOLINT // This exception type can only be thrown by a failed Google // Test assertion with the intention of letting another testing @@ -2558,7 +2600,6 @@ TestInfo* MakeAndRegisterTestInfo( return test_info; } -#if GTEST_HAS_PARAM_TEST void ReportInvalidTestCaseType(const char* test_case_name, CodeLocation code_location) { Message errors; @@ -2572,13 +2613,10 @@ void ReportInvalidTestCaseType(const char* test_case_name, << "probably rename one of the classes to put the tests into different\n" << "test cases."; - fprintf(stderr, "%s %s", - FormatFileLocation(code_location.file.c_str(), - code_location.line).c_str(), - errors.GetString().c_str()); + GTEST_LOG_(ERROR) << FormatFileLocation(code_location.file.c_str(), + code_location.line) + << " " << errors.GetString(); } -#endif // GTEST_HAS_PARAM_TEST - } // namespace internal namespace { @@ -2616,12 +2654,10 @@ namespace internal { // and INSTANTIATE_TEST_CASE_P into regular tests and registers those. // This will be done just once during the program runtime. void UnitTestImpl::RegisterParameterizedTests() { -#if GTEST_HAS_PARAM_TEST if (!parameterized_tests_registered_) { parameterized_test_registry_.RegisterTests(); parameterized_tests_registered_ = true; } -#endif } } // namespace internal @@ -2649,18 +2685,18 @@ void TestInfo::Run() { factory_, &internal::TestFactoryBase::CreateTest, "the test fixture's constructor"); - // Runs the test only if the test object was created and its - // constructor didn't generate a fatal failure. - if ((test != NULL) && !Test::HasFatalFailure()) { + // Runs the test if the constructor didn't generate a fatal failure. + // Note that the object will not be null + if (!Test::HasFatalFailure()) { // This doesn't throw as all user code that can throw are wrapped into // exception handling code. test->Run(); } - // Deletes the test object. - impl->os_stack_trace_getter()->UponLeavingGTest(); - internal::HandleExceptionsInMethodIfSupported( - test, &Test::DeleteSelf_, "the test fixture's destructor"); + // Deletes the test object. + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + test, &Test::DeleteSelf_, "the test fixture's destructor"); result_.set_elapsed_time(internal::GetTimeInMillis() - start); @@ -2886,10 +2922,10 @@ enum GTestColor { }; #if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && \ - !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT + !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT && !GTEST_OS_WINDOWS_MINGW // Returns the character attribute for the given color. -WORD GetColorAttribute(GTestColor color) { +static WORD GetColorAttribute(GTestColor color) { switch (color) { case COLOR_RED: return FOREGROUND_RED; case COLOR_GREEN: return FOREGROUND_GREEN; @@ -2898,11 +2934,42 @@ WORD GetColorAttribute(GTestColor color) { } } +static int GetBitOffset(WORD color_mask) { + if (color_mask == 0) return 0; + + int bitOffset = 0; + while ((color_mask & 1) == 0) { + color_mask >>= 1; + ++bitOffset; + } + return bitOffset; +} + +static WORD GetNewColor(GTestColor color, WORD old_color_attrs) { + // Let's reuse the BG + static const WORD background_mask = BACKGROUND_BLUE | BACKGROUND_GREEN | + BACKGROUND_RED | BACKGROUND_INTENSITY; + static const WORD foreground_mask = FOREGROUND_BLUE | FOREGROUND_GREEN | + FOREGROUND_RED | FOREGROUND_INTENSITY; + const WORD existing_bg = old_color_attrs & background_mask; + + WORD new_color = + GetColorAttribute(color) | existing_bg | FOREGROUND_INTENSITY; + static const int bg_bitOffset = GetBitOffset(background_mask); + static const int fg_bitOffset = GetBitOffset(foreground_mask); + + if (((new_color & background_mask) >> bg_bitOffset) == + ((new_color & foreground_mask) >> fg_bitOffset)) { + new_color ^= FOREGROUND_INTENSITY; // invert intensity + } + return new_color; +} + #else // Returns the ANSI color code for the given color. COLOR_DEFAULT is // an invalid input. -const char* GetAnsiColorCode(GTestColor color) { +static const char* GetAnsiColorCode(GTestColor color) { switch (color) { case COLOR_RED: return "1"; case COLOR_GREEN: return "2"; @@ -2918,7 +2985,7 @@ bool ShouldUseColor(bool stdout_is_tty) { const char* const gtest_color = GTEST_FLAG(color).c_str(); if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) { -#if GTEST_OS_WINDOWS +#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW // On Windows the TERM variable is usually not set, but the // console there does support colors. return stdout_is_tty; @@ -2954,7 +3021,7 @@ bool ShouldUseColor(bool stdout_is_tty) { // cannot simply emit special characters and have the terminal change colors. // This routine must actually emit the characters rather than return a string // that would be colored when printed, as can be done on Linux. -void ColoredPrintf(GTestColor color, const char* fmt, ...) { +static void ColoredPrintf(GTestColor color, const char* fmt, ...) { va_list args; va_start(args, fmt); @@ -2975,20 +3042,21 @@ void ColoredPrintf(GTestColor color, const char* fmt, ...) { } #if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && \ - !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT + !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT && !GTEST_OS_WINDOWS_MINGW const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE); // Gets the current text color. CONSOLE_SCREEN_BUFFER_INFO buffer_info; GetConsoleScreenBufferInfo(stdout_handle, &buffer_info); const WORD old_color_attrs = buffer_info.wAttributes; + const WORD new_color = GetNewColor(color, old_color_attrs); // We need to flush the stream buffers into the console before each // SetConsoleTextAttribute call lest it affect the text that is already // printed but has not yet reached the console. fflush(stdout); - SetConsoleTextAttribute(stdout_handle, - GetColorAttribute(color) | FOREGROUND_INTENSITY); + SetConsoleTextAttribute(stdout_handle, new_color); + vprintf(fmt, args); fflush(stdout); @@ -3002,12 +3070,12 @@ void ColoredPrintf(GTestColor color, const char* fmt, ...) { va_end(args); } -// Text printed in Google Test's text output and --gunit_list_tests +// Text printed in Google Test's text output and --gtest_list_tests // output to label the type parameter and value parameter for a test. static const char kTypeParamLabel[] = "TypeParam"; static const char kValueParamLabel[] = "GetParam()"; -void PrintFullTestCommentIfPresent(const TestInfo& test_info) { +static void PrintFullTestCommentIfPresent(const TestInfo& test_info) { const char* const type_param = test_info.type_param(); const char* const value_param = test_info.value_param(); @@ -3278,7 +3346,7 @@ void TestEventRepeater::Append(TestEventListener *listener) { listeners_.push_back(listener); } -// TODO(vladl@google.com): Factor the search functionality into Vector::Find. +// FIXME: Factor the search functionality into Vector::Find. TestEventListener* TestEventRepeater::Release(TestEventListener *listener) { for (size_t i = 0; i < listeners_.size(); ++i) { if (listeners_[i] == listener) { @@ -3352,6 +3420,11 @@ class XmlUnitTestResultPrinter : public EmptyTestEventListener { explicit XmlUnitTestResultPrinter(const char* output_file); virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); + void ListTestsMatchingFilter(const std::vector& test_cases); + + // Prints an XML summary of all unit tests. + static void PrintXmlTestsList(std::ostream* stream, + const std::vector& test_cases); private: // Is c a whitespace character that is normalized to a space character @@ -3413,6 +3486,11 @@ class XmlUnitTestResultPrinter : public EmptyTestEventListener { // to delimit this attribute from prior attributes. static std::string TestPropertiesAsXmlAttributes(const TestResult& result); + // Streams an XML representation of the test properties of a TestResult + // object. + static void OutputXmlTestProperties(std::ostream* stream, + const TestResult& result); + // The output file. const std::string output_file_; @@ -3422,46 +3500,30 @@ class XmlUnitTestResultPrinter : public EmptyTestEventListener { // Creates a new XmlUnitTestResultPrinter. XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file) : output_file_(output_file) { - if (output_file_.c_str() == NULL || output_file_.empty()) { - fprintf(stderr, "XML output file may not be null\n"); - fflush(stderr); - exit(EXIT_FAILURE); + if (output_file_.empty()) { + GTEST_LOG_(FATAL) << "XML output file may not be null"; } } // Called after the unit test ends. void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, int /*iteration*/) { - FILE* xmlout = NULL; - FilePath output_file(output_file_); - FilePath output_dir(output_file.RemoveFileName()); - - if (output_dir.CreateDirectoriesRecursively()) { - xmlout = posix::FOpen(output_file_.c_str(), "w"); - } - if (xmlout == NULL) { - // TODO(wan): report the reason of the failure. - // - // We don't do it for now as: - // - // 1. There is no urgent need for it. - // 2. It's a bit involved to make the errno variable thread-safe on - // all three operating systems (Linux, Windows, and Mac OS). - // 3. To interpret the meaning of errno in a thread-safe way, - // we need the strerror_r() function, which is not available on - // Windows. - fprintf(stderr, - "Unable to open file \"%s\"\n", - output_file_.c_str()); - fflush(stderr); - exit(EXIT_FAILURE); - } + FILE* xmlout = OpenFileForWriting(output_file_); std::stringstream stream; PrintXmlUnitTest(&stream, unit_test); fprintf(xmlout, "%s", StringStreamToString(&stream).c_str()); fclose(xmlout); } +void XmlUnitTestResultPrinter::ListTestsMatchingFilter( + const std::vector& test_cases) { + FILE* xmlout = OpenFileForWriting(output_file_); + std::stringstream stream; + PrintXmlTestsList(&stream, test_cases); + fprintf(xmlout, "%s", StringStreamToString(&stream).c_str()); + fclose(xmlout); +} + // Returns an XML-escaped copy of the input string str. If is_attribute // is true, the text is meant to appear as an attribute value, and // normalizable whitespace is preserved by replacing it with character @@ -3472,7 +3534,7 @@ void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, // module will consist of ordinary English text. // If this module is ever modified to produce version 1.1 XML output, // most invalid characters can be retained using character references. -// TODO(wan): It might be nice to have a minimally invasive, human-readable +// FIXME: It might be nice to have a minimally invasive, human-readable // escaping scheme for invalid characters, rather than dropping them. std::string XmlUnitTestResultPrinter::EscapeXml( const std::string& str, bool is_attribute) { @@ -3533,6 +3595,7 @@ std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters( // The following routines generate an XML representation of a UnitTest // object. +// GOOGLETEST_CM0009 DO NOT DELETE // // This is how Google Test concepts map to the DTD: // @@ -3622,13 +3685,17 @@ void XmlUnitTestResultPrinter::OutputXmlAttribute( } // Prints an XML representation of a TestInfo object. -// TODO(wan): There is also value in printing properties with the plain printer. +// FIXME: There is also value in printing properties with the plain printer. void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream, const char* test_case_name, const TestInfo& test_info) { const TestResult& result = *test_info.result(); const std::string kTestcase = "testcase"; + if (test_info.is_in_another_shard()) { + return; + } + *stream << " \n"; + return; + } OutputXmlAttribute(stream, kTestcase, "status", test_info.should_run() ? "run" : "notrun"); OutputXmlAttribute(stream, kTestcase, "time", FormatTimeInMillisAsSeconds(result.elapsed_time())); OutputXmlAttribute(stream, kTestcase, "classname", test_case_name); - *stream << TestPropertiesAsXmlAttributes(result); int failures = 0; for (int i = 0; i < result.total_part_count(); ++i) { @@ -3654,22 +3727,28 @@ void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream, if (++failures == 1) { *stream << ">\n"; } - const string location = internal::FormatCompilerIndependentFileLocation( - part.file_name(), part.line_number()); - const string summary = location + "\n" + part.summary(); + const std::string location = + internal::FormatCompilerIndependentFileLocation(part.file_name(), + part.line_number()); + const std::string summary = location + "\n" + part.summary(); *stream << " "; - const string detail = location + "\n" + part.message(); + const std::string detail = location + "\n" + part.message(); OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str()); *stream << "\n"; } } - if (failures == 0) + if (failures == 0 && result.test_property_count() == 0) { *stream << " />\n"; - else + } else { + if (failures == 0) { + *stream << ">\n"; + } + OutputXmlTestProperties(stream, result); *stream << " \n"; + } } // Prints an XML representation of a TestCase object @@ -3680,17 +3759,18 @@ void XmlUnitTestResultPrinter::PrintXmlTestCase(std::ostream* stream, OutputXmlAttribute(stream, kTestsuite, "name", test_case.name()); OutputXmlAttribute(stream, kTestsuite, "tests", StreamableToString(test_case.reportable_test_count())); - OutputXmlAttribute(stream, kTestsuite, "failures", - StreamableToString(test_case.failed_test_count())); - OutputXmlAttribute( - stream, kTestsuite, "disabled", - StreamableToString(test_case.reportable_disabled_test_count())); - OutputXmlAttribute(stream, kTestsuite, "errors", "0"); - OutputXmlAttribute(stream, kTestsuite, "time", - FormatTimeInMillisAsSeconds(test_case.elapsed_time())); - *stream << TestPropertiesAsXmlAttributes(test_case.ad_hoc_test_result()) - << ">\n"; - + if (!GTEST_FLAG(list_tests)) { + OutputXmlAttribute(stream, kTestsuite, "failures", + StreamableToString(test_case.failed_test_count())); + OutputXmlAttribute( + stream, kTestsuite, "disabled", + StreamableToString(test_case.reportable_disabled_test_count())); + OutputXmlAttribute(stream, kTestsuite, "errors", "0"); + OutputXmlAttribute(stream, kTestsuite, "time", + FormatTimeInMillisAsSeconds(test_case.elapsed_time())); + *stream << TestPropertiesAsXmlAttributes(test_case.ad_hoc_test_result()); + } + *stream << ">\n"; for (int i = 0; i < test_case.total_test_count(); ++i) { if (test_case.GetTestInfo(i)->is_reportable()) OutputXmlTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i)); @@ -3724,7 +3804,6 @@ void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream, OutputXmlAttribute(stream, kTestsuites, "random_seed", StreamableToString(unit_test.random_seed())); } - *stream << TestPropertiesAsXmlAttributes(unit_test.ad_hoc_test_result()); OutputXmlAttribute(stream, kTestsuites, "name", "AllTests"); @@ -3737,6 +3816,28 @@ void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream, *stream << "\n"; } +void XmlUnitTestResultPrinter::PrintXmlTestsList( + std::ostream* stream, const std::vector& test_cases) { + const std::string kTestsuites = "testsuites"; + + *stream << "\n"; + *stream << "<" << kTestsuites; + + int total_tests = 0; + for (size_t i = 0; i < test_cases.size(); ++i) { + total_tests += test_cases[i]->total_test_count(); + } + OutputXmlAttribute(stream, kTestsuites, "tests", + StreamableToString(total_tests)); + OutputXmlAttribute(stream, kTestsuites, "name", "AllTests"); + *stream << ">\n"; + + for (size_t i = 0; i < test_cases.size(); ++i) { + PrintXmlTestCase(stream, *test_cases[i]); + } + *stream << "\n"; +} + // Produces a string representing the test properties in a result as space // delimited XML attributes based on the property key="value" pairs. std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes( @@ -3750,8 +3851,390 @@ std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes( return attributes.GetString(); } +void XmlUnitTestResultPrinter::OutputXmlTestProperties( + std::ostream* stream, const TestResult& result) { + const std::string kProperties = "properties"; + const std::string kProperty = "property"; + + if (result.test_property_count() <= 0) { + return; + } + + *stream << "<" << kProperties << ">\n"; + for (int i = 0; i < result.test_property_count(); ++i) { + const TestProperty& property = result.GetTestProperty(i); + *stream << "<" << kProperty; + *stream << " name=\"" << EscapeXmlAttribute(property.key()) << "\""; + *stream << " value=\"" << EscapeXmlAttribute(property.value()) << "\""; + *stream << "/>\n"; + } + *stream << "\n"; +} + // End XmlUnitTestResultPrinter +// This class generates an JSON output file. +class JsonUnitTestResultPrinter : public EmptyTestEventListener { + public: + explicit JsonUnitTestResultPrinter(const char* output_file); + + virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); + + // Prints an JSON summary of all unit tests. + static void PrintJsonTestList(::std::ostream* stream, + const std::vector& test_cases); + + private: + // Returns an JSON-escaped copy of the input string str. + static std::string EscapeJson(const std::string& str); + + //// Verifies that the given attribute belongs to the given element and + //// streams the attribute as JSON. + static void OutputJsonKey(std::ostream* stream, + const std::string& element_name, + const std::string& name, + const std::string& value, + const std::string& indent, + bool comma = true); + static void OutputJsonKey(std::ostream* stream, + const std::string& element_name, + const std::string& name, + int value, + const std::string& indent, + bool comma = true); + + // Streams a JSON representation of a TestInfo object. + static void OutputJsonTestInfo(::std::ostream* stream, + const char* test_case_name, + const TestInfo& test_info); + + // Prints a JSON representation of a TestCase object + static void PrintJsonTestCase(::std::ostream* stream, + const TestCase& test_case); + + // Prints a JSON summary of unit_test to output stream out. + static void PrintJsonUnitTest(::std::ostream* stream, + const UnitTest& unit_test); + + // Produces a string representing the test properties in a result as + // a JSON dictionary. + static std::string TestPropertiesAsJson(const TestResult& result, + const std::string& indent); + + // The output file. + const std::string output_file_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(JsonUnitTestResultPrinter); +}; + +// Creates a new JsonUnitTestResultPrinter. +JsonUnitTestResultPrinter::JsonUnitTestResultPrinter(const char* output_file) + : output_file_(output_file) { + if (output_file_.empty()) { + GTEST_LOG_(FATAL) << "JSON output file may not be null"; + } +} + +void JsonUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, + int /*iteration*/) { + FILE* jsonout = OpenFileForWriting(output_file_); + std::stringstream stream; + PrintJsonUnitTest(&stream, unit_test); + fprintf(jsonout, "%s", StringStreamToString(&stream).c_str()); + fclose(jsonout); +} + +// Returns an JSON-escaped copy of the input string str. +std::string JsonUnitTestResultPrinter::EscapeJson(const std::string& str) { + Message m; + + for (size_t i = 0; i < str.size(); ++i) { + const char ch = str[i]; + switch (ch) { + case '\\': + case '"': + case '/': + m << '\\' << ch; + break; + case '\b': + m << "\\b"; + break; + case '\t': + m << "\\t"; + break; + case '\n': + m << "\\n"; + break; + case '\f': + m << "\\f"; + break; + case '\r': + m << "\\r"; + break; + default: + if (ch < ' ') { + m << "\\u00" << String::FormatByte(static_cast(ch)); + } else { + m << ch; + } + break; + } + } + + return m.GetString(); +} + +// The following routines generate an JSON representation of a UnitTest +// object. + +// Formats the given time in milliseconds as seconds. +static std::string FormatTimeInMillisAsDuration(TimeInMillis ms) { + ::std::stringstream ss; + ss << (static_cast(ms) * 1e-3) << "s"; + return ss.str(); +} + +// Converts the given epoch time in milliseconds to a date string in the +// RFC3339 format, without the timezone information. +static std::string FormatEpochTimeInMillisAsRFC3339(TimeInMillis ms) { + struct tm time_struct; + if (!PortableLocaltime(static_cast(ms / 1000), &time_struct)) + return ""; + // YYYY-MM-DDThh:mm:ss + return StreamableToString(time_struct.tm_year + 1900) + "-" + + String::FormatIntWidth2(time_struct.tm_mon + 1) + "-" + + String::FormatIntWidth2(time_struct.tm_mday) + "T" + + String::FormatIntWidth2(time_struct.tm_hour) + ":" + + String::FormatIntWidth2(time_struct.tm_min) + ":" + + String::FormatIntWidth2(time_struct.tm_sec) + "Z"; +} + +static inline std::string Indent(int width) { + return std::string(width, ' '); +} + +void JsonUnitTestResultPrinter::OutputJsonKey( + std::ostream* stream, + const std::string& element_name, + const std::string& name, + const std::string& value, + const std::string& indent, + bool comma) { + const std::vector& allowed_names = + GetReservedAttributesForElement(element_name); + + GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) != + allowed_names.end()) + << "Key \"" << name << "\" is not allowed for value \"" << element_name + << "\"."; + + *stream << indent << "\"" << name << "\": \"" << EscapeJson(value) << "\""; + if (comma) + *stream << ",\n"; +} + +void JsonUnitTestResultPrinter::OutputJsonKey( + std::ostream* stream, + const std::string& element_name, + const std::string& name, + int value, + const std::string& indent, + bool comma) { + const std::vector& allowed_names = + GetReservedAttributesForElement(element_name); + + GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) != + allowed_names.end()) + << "Key \"" << name << "\" is not allowed for value \"" << element_name + << "\"."; + + *stream << indent << "\"" << name << "\": " << StreamableToString(value); + if (comma) + *stream << ",\n"; +} + +// Prints a JSON representation of a TestInfo object. +void JsonUnitTestResultPrinter::OutputJsonTestInfo(::std::ostream* stream, + const char* test_case_name, + const TestInfo& test_info) { + const TestResult& result = *test_info.result(); + const std::string kTestcase = "testcase"; + const std::string kIndent = Indent(10); + + *stream << Indent(8) << "{\n"; + OutputJsonKey(stream, kTestcase, "name", test_info.name(), kIndent); + + if (test_info.value_param() != NULL) { + OutputJsonKey(stream, kTestcase, "value_param", + test_info.value_param(), kIndent); + } + if (test_info.type_param() != NULL) { + OutputJsonKey(stream, kTestcase, "type_param", test_info.type_param(), + kIndent); + } + if (GTEST_FLAG(list_tests)) { + OutputJsonKey(stream, kTestcase, "file", test_info.file(), kIndent); + OutputJsonKey(stream, kTestcase, "line", test_info.line(), kIndent, false); + *stream << "\n" << Indent(8) << "}"; + return; + } + + OutputJsonKey(stream, kTestcase, "status", + test_info.should_run() ? "RUN" : "NOTRUN", kIndent); + OutputJsonKey(stream, kTestcase, "time", + FormatTimeInMillisAsDuration(result.elapsed_time()), kIndent); + OutputJsonKey(stream, kTestcase, "classname", test_case_name, kIndent, false); + *stream << TestPropertiesAsJson(result, kIndent); + + int failures = 0; + for (int i = 0; i < result.total_part_count(); ++i) { + const TestPartResult& part = result.GetTestPartResult(i); + if (part.failed()) { + *stream << ",\n"; + if (++failures == 1) { + *stream << kIndent << "\"" << "failures" << "\": [\n"; + } + const std::string location = + internal::FormatCompilerIndependentFileLocation(part.file_name(), + part.line_number()); + const std::string message = EscapeJson(location + "\n" + part.message()); + *stream << kIndent << " {\n" + << kIndent << " \"failure\": \"" << message << "\",\n" + << kIndent << " \"type\": \"\"\n" + << kIndent << " }"; + } + } + + if (failures > 0) + *stream << "\n" << kIndent << "]"; + *stream << "\n" << Indent(8) << "}"; +} + +// Prints an JSON representation of a TestCase object +void JsonUnitTestResultPrinter::PrintJsonTestCase(std::ostream* stream, + const TestCase& test_case) { + const std::string kTestsuite = "testsuite"; + const std::string kIndent = Indent(6); + + *stream << Indent(4) << "{\n"; + OutputJsonKey(stream, kTestsuite, "name", test_case.name(), kIndent); + OutputJsonKey(stream, kTestsuite, "tests", test_case.reportable_test_count(), + kIndent); + if (!GTEST_FLAG(list_tests)) { + OutputJsonKey(stream, kTestsuite, "failures", test_case.failed_test_count(), + kIndent); + OutputJsonKey(stream, kTestsuite, "disabled", + test_case.reportable_disabled_test_count(), kIndent); + OutputJsonKey(stream, kTestsuite, "errors", 0, kIndent); + OutputJsonKey(stream, kTestsuite, "time", + FormatTimeInMillisAsDuration(test_case.elapsed_time()), + kIndent, false); + *stream << TestPropertiesAsJson(test_case.ad_hoc_test_result(), kIndent) + << ",\n"; + } + + *stream << kIndent << "\"" << kTestsuite << "\": [\n"; + + bool comma = false; + for (int i = 0; i < test_case.total_test_count(); ++i) { + if (test_case.GetTestInfo(i)->is_reportable()) { + if (comma) { + *stream << ",\n"; + } else { + comma = true; + } + OutputJsonTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i)); + } + } + *stream << "\n" << kIndent << "]\n" << Indent(4) << "}"; +} + +// Prints a JSON summary of unit_test to output stream out. +void JsonUnitTestResultPrinter::PrintJsonUnitTest(std::ostream* stream, + const UnitTest& unit_test) { + const std::string kTestsuites = "testsuites"; + const std::string kIndent = Indent(2); + *stream << "{\n"; + + OutputJsonKey(stream, kTestsuites, "tests", unit_test.reportable_test_count(), + kIndent); + OutputJsonKey(stream, kTestsuites, "failures", unit_test.failed_test_count(), + kIndent); + OutputJsonKey(stream, kTestsuites, "disabled", + unit_test.reportable_disabled_test_count(), kIndent); + OutputJsonKey(stream, kTestsuites, "errors", 0, kIndent); + if (GTEST_FLAG(shuffle)) { + OutputJsonKey(stream, kTestsuites, "random_seed", unit_test.random_seed(), + kIndent); + } + OutputJsonKey(stream, kTestsuites, "timestamp", + FormatEpochTimeInMillisAsRFC3339(unit_test.start_timestamp()), + kIndent); + OutputJsonKey(stream, kTestsuites, "time", + FormatTimeInMillisAsDuration(unit_test.elapsed_time()), kIndent, + false); + + *stream << TestPropertiesAsJson(unit_test.ad_hoc_test_result(), kIndent) + << ",\n"; + + OutputJsonKey(stream, kTestsuites, "name", "AllTests", kIndent); + *stream << kIndent << "\"" << kTestsuites << "\": [\n"; + + bool comma = false; + for (int i = 0; i < unit_test.total_test_case_count(); ++i) { + if (unit_test.GetTestCase(i)->reportable_test_count() > 0) { + if (comma) { + *stream << ",\n"; + } else { + comma = true; + } + PrintJsonTestCase(stream, *unit_test.GetTestCase(i)); + } + } + + *stream << "\n" << kIndent << "]\n" << "}\n"; +} + +void JsonUnitTestResultPrinter::PrintJsonTestList( + std::ostream* stream, const std::vector& test_cases) { + const std::string kTestsuites = "testsuites"; + const std::string kIndent = Indent(2); + *stream << "{\n"; + int total_tests = 0; + for (size_t i = 0; i < test_cases.size(); ++i) { + total_tests += test_cases[i]->total_test_count(); + } + OutputJsonKey(stream, kTestsuites, "tests", total_tests, kIndent); + + OutputJsonKey(stream, kTestsuites, "name", "AllTests", kIndent); + *stream << kIndent << "\"" << kTestsuites << "\": [\n"; + + for (size_t i = 0; i < test_cases.size(); ++i) { + if (i != 0) { + *stream << ",\n"; + } + PrintJsonTestCase(stream, *test_cases[i]); + } + + *stream << "\n" + << kIndent << "]\n" + << "}\n"; +} +// Produces a string representing the test properties in a result as +// a JSON dictionary. +std::string JsonUnitTestResultPrinter::TestPropertiesAsJson( + const TestResult& result, const std::string& indent) { + Message attributes; + for (int i = 0; i < result.test_property_count(); ++i) { + const TestProperty& property = result.GetTestProperty(i); + attributes << ",\n" << indent << "\"" << property.key() << "\": " + << "\"" << EscapeJson(property.value()) << "\""; + } + return attributes.GetString(); +} + +// End JsonUnitTestResultPrinter + #if GTEST_CAN_STREAM_RESULTS_ // Checks if str contains '=', '&', '%' or '\n' characters. If yes, @@ -3759,8 +4242,8 @@ std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes( // example, replaces "=" with "%3D". This algorithm is O(strlen(str)) // in both time and space -- important as the input str may contain an // arbitrarily long test failure message and stack trace. -string StreamingListener::UrlEncode(const char* str) { - string result; +std::string StreamingListener::UrlEncode(const char* str) { + std::string result; result.reserve(strlen(str) + 1); for (char ch = *str; ch != '\0'; ch = *++str) { switch (ch) { @@ -3822,47 +4305,82 @@ void StreamingListener::SocketWriter::MakeConnection() { // End of class Streaming Listener #endif // GTEST_CAN_STREAM_RESULTS__ -// Class ScopedTrace - -// Pushes the given source file location and message onto a per-thread -// trace stack maintained by Google Test. -ScopedTrace::ScopedTrace(const char* file, int line, const Message& message) - GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) { - TraceInfo trace; - trace.file = file; - trace.line = line; - trace.message = message.GetString(); - - UnitTest::GetInstance()->PushGTestTrace(trace); -} - -// Pops the info pushed by the c'tor. -ScopedTrace::~ScopedTrace() - GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) { - UnitTest::GetInstance()->PopGTestTrace(); -} - - // class OsStackTraceGetter const char* const OsStackTraceGetterInterface::kElidedFramesMarker = "... " GTEST_NAME_ " internal frames ..."; -string OsStackTraceGetter::CurrentStackTrace(int /*max_depth*/, - int /*skip_count*/) { +std::string OsStackTraceGetter::CurrentStackTrace(int max_depth, int skip_count) + GTEST_LOCK_EXCLUDED_(mutex_) { +#if GTEST_HAS_ABSL + std::string result; + + if (max_depth <= 0) { + return result; + } + + max_depth = std::min(max_depth, kMaxStackTraceDepth); + + std::vector raw_stack(max_depth); + // Skips the frames requested by the caller, plus this function. + const int raw_stack_size = + absl::GetStackTrace(&raw_stack[0], max_depth, skip_count + 1); + + void* caller_frame = nullptr; + { + MutexLock lock(&mutex_); + caller_frame = caller_frame_; + } + + for (int i = 0; i < raw_stack_size; ++i) { + if (raw_stack[i] == caller_frame && + !GTEST_FLAG(show_internal_stack_frames)) { + // Add a marker to the trace and stop adding frames. + absl::StrAppend(&result, kElidedFramesMarker, "\n"); + break; + } + + char tmp[1024]; + const char* symbol = "(unknown)"; + if (absl::Symbolize(raw_stack[i], tmp, sizeof(tmp))) { + symbol = tmp; + } + + char line[1024]; + snprintf(line, sizeof(line), " %p: %s\n", raw_stack[i], symbol); + result += line; + } + + return result; + +#else // !GTEST_HAS_ABSL + static_cast(max_depth); + static_cast(skip_count); return ""; +#endif // GTEST_HAS_ABSL } -void OsStackTraceGetter::UponLeavingGTest() {} +void OsStackTraceGetter::UponLeavingGTest() GTEST_LOCK_EXCLUDED_(mutex_) { +#if GTEST_HAS_ABSL + void* caller_frame = nullptr; + if (absl::GetStackTrace(&caller_frame, 1, 3) <= 0) { + caller_frame = nullptr; + } + + MutexLock lock(&mutex_); + caller_frame_ = caller_frame; +#endif // GTEST_HAS_ABSL +} // A helper class that creates the premature-exit file in its // constructor and deletes the file in its destructor. class ScopedPrematureExitFile { public: explicit ScopedPrematureExitFile(const char* premature_exit_filepath) - : premature_exit_filepath_(premature_exit_filepath) { + : premature_exit_filepath_(premature_exit_filepath ? + premature_exit_filepath : "") { // If a path to the premature-exit file is specified... - if (premature_exit_filepath != NULL && *premature_exit_filepath != '\0') { + if (!premature_exit_filepath_.empty()) { // create the file with a single "0" character in it. I/O // errors are ignored as there's nothing better we can do and we // don't want to fail the test because of this. @@ -3873,13 +4391,18 @@ class ScopedPrematureExitFile { } ~ScopedPrematureExitFile() { - if (premature_exit_filepath_ != NULL && *premature_exit_filepath_ != '\0') { - remove(premature_exit_filepath_); + if (!premature_exit_filepath_.empty()) { + int retval = remove(premature_exit_filepath_.c_str()); + if (retval) { + GTEST_LOG_(ERROR) << "Failed to remove premature exit filepath \"" + << premature_exit_filepath_ << "\" with error " + << retval; + } } } private: - const char* const premature_exit_filepath_; + const std::string premature_exit_filepath_; GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedPrematureExitFile); }; @@ -4149,6 +4672,11 @@ void UnitTest::AddTestPartResult( // when a failure happens and both the --gtest_break_on_failure and // the --gtest_catch_exceptions flags are specified. DebugBreak(); +#elif (!defined(__native_client__)) && \ + ((defined(__clang__) || defined(__GNUC__)) && \ + (defined(__x86_64__) || defined(__i386__))) + // with clang/gcc we can achieve the same effect on x86 by invoking int3 + asm("int3"); #else // Dereference NULL through a volatile pointer to prevent the compiler // from removing. We use this rather than abort() or __builtin_trap() for @@ -4216,7 +4744,7 @@ int UnitTest::Run() { // used for the duration of the program. impl()->set_catch_exceptions(GTEST_FLAG(catch_exceptions)); -#if GTEST_HAS_SEH +#if GTEST_OS_WINDOWS // Either the user wants Google Test to catch exceptions thrown by the // tests or this is executing in the context of death test child // process. In either case the user does not want to see pop-up dialogs @@ -4245,7 +4773,7 @@ int UnitTest::Run() { // VC++ doesn't define _set_abort_behavior() prior to the version 8.0. // Users of prior VC versions shall suffer the agony and pain of // clicking through the countless debug dialogs. - // TODO(vladl@google.com): find a way to suppress the abort dialog() in the + // FIXME: find a way to suppress the abort dialog() in the // debug mode when compiled with VC 7.1 or lower. if (!GTEST_FLAG(break_on_failure)) _set_abort_behavior( @@ -4253,7 +4781,7 @@ int UnitTest::Run() { _WRITE_ABORT_MSG | _CALL_REPORTFAULT); // pop-up window, core dump. # endif } -#endif // GTEST_HAS_SEH +#endif // GTEST_OS_WINDOWS return internal::HandleExceptionsInMethodIfSupported( impl(), @@ -4286,7 +4814,6 @@ const TestInfo* UnitTest::current_test_info() const // Returns the random seed used at the start of the current test run. int UnitTest::random_seed() const { return impl_->random_seed(); } -#if GTEST_HAS_PARAM_TEST // Returns ParameterizedTestCaseRegistry object used to keep track of // value-parameterized tests and instantiate and register them. internal::ParameterizedTestCaseRegistry& @@ -4294,7 +4821,6 @@ internal::ParameterizedTestCaseRegistry& GTEST_LOCK_EXCLUDED_(mutex_) { return impl_->parameterized_test_registry(); } -#endif // GTEST_HAS_PARAM_TEST // Creates an empty UnitTest. UnitTest::UnitTest() { @@ -4333,10 +4859,8 @@ UnitTestImpl::UnitTestImpl(UnitTest* parent) &default_global_test_part_result_reporter_), per_thread_test_part_result_reporter_( &default_per_thread_test_part_result_reporter_), -#if GTEST_HAS_PARAM_TEST parameterized_test_registry_(), parameterized_tests_registered_(false), -#endif // GTEST_HAS_PARAM_TEST last_death_test_case_(-1), current_test_case_(NULL), current_test_info_(NULL), @@ -4403,10 +4927,12 @@ void UnitTestImpl::ConfigureXmlOutput() { if (output_format == "xml") { listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter( UnitTestOptions::GetAbsolutePathToOutputFile().c_str())); + } else if (output_format == "json") { + listeners()->SetDefaultXmlGenerator(new JsonUnitTestResultPrinter( + UnitTestOptions::GetAbsolutePathToOutputFile().c_str())); } else if (output_format != "") { - printf("WARNING: unrecognized output format \"%s\" ignored.\n", - output_format.c_str()); - fflush(stdout); + GTEST_LOG_(WARNING) << "WARNING: unrecognized output format \"" + << output_format << "\" ignored."; } } @@ -4421,9 +4947,8 @@ void UnitTestImpl::ConfigureStreamingOutput() { listeners()->Append(new StreamingListener(target.substr(0, pos), target.substr(pos+1))); } else { - printf("WARNING: unrecognized streaming target \"%s\" ignored.\n", - target.c_str()); - fflush(stdout); + GTEST_LOG_(WARNING) << "unrecognized streaming target \"" << target + << "\" ignored."; } } } @@ -4462,6 +4987,13 @@ void UnitTestImpl::PostFlagParsingInit() { // Configures listeners for streaming test results to the specified server. ConfigureStreamingOutput(); #endif // GTEST_CAN_STREAM_RESULTS_ + +#if GTEST_HAS_ABSL + if (GTEST_FLAG(install_failure_signal_handler)) { + absl::FailureSignalHandlerOptions options; + absl::InstallFailureSignalHandler(options); + } +#endif // GTEST_HAS_ABSL } } @@ -4505,11 +5037,11 @@ TestCase* UnitTestImpl::GetTestCase(const char* test_case_name, Test::SetUpTestCaseFunc set_up_tc, Test::TearDownTestCaseFunc tear_down_tc) { // Can we find a TestCase with the given name? - const std::vector::const_iterator test_case = - std::find_if(test_cases_.begin(), test_cases_.end(), + const std::vector::const_reverse_iterator test_case = + std::find_if(test_cases_.rbegin(), test_cases_.rend(), TestCaseNameIs(test_case_name)); - if (test_case != test_cases_.end()) + if (test_case != test_cases_.rend()) return *test_case; // No. Let's create one. @@ -4550,13 +5082,8 @@ static void TearDownEnvironment(Environment* env) { env->TearDown(); } // All other functions called from RunAllTests() may safely assume that // parameterized tests are ready to be counted and run. bool UnitTestImpl::RunAllTests() { - // Makes sure InitGoogleTest() was called. - if (!GTestIsInitialized()) { - printf("%s", - "\nThis test program did NOT call ::testing::InitGoogleTest " - "before calling RUN_ALL_TESTS(). Please fix it.\n"); - return false; - } + // True iff Google Test is initialized before RUN_ALL_TESTS() is called. + const bool gtest_is_initialized_before_run_all_tests = GTestIsInitialized(); // Do not run any test if the --help flag was specified. if (g_help_flag) @@ -4684,6 +5211,20 @@ bool UnitTestImpl::RunAllTests() { repeater->OnTestProgramEnd(*parent_); + if (!gtest_is_initialized_before_run_all_tests) { + ColoredPrintf( + COLOR_RED, + "\nIMPORTANT NOTICE - DO NOT IGNORE:\n" + "This test program did NOT call " GTEST_INIT_GOOGLE_TEST_NAME_ + "() before calling RUN_ALL_TESTS(). This is INVALID. Soon " GTEST_NAME_ + " will start to enforce the valid usage. " + "Please fix it ASAP, or IT WILL START TO FAIL.\n"); // NOLINT +#if GTEST_FOR_GOOGLE_ + ColoredPrintf(COLOR_RED, + "For more details, see http://wiki/Main/ValidGUnitMain.\n"); +#endif // GTEST_FOR_GOOGLE_ + } + return !failed; } @@ -4785,8 +5326,8 @@ bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) { // each TestCase and TestInfo object. // If shard_tests == true, further filters tests based on sharding // variables in the environment - see -// http://code.google.com/p/googletest/wiki/GoogleTestAdvancedGuide. -// Returns the number of tests that should run. +// https://github.com/google/googletest/blob/master/googletest/docs/advanced.md +// . Returns the number of tests that should run. int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) { const Int32 total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ? Int32FromEnvOrDie(kTestTotalShards, -1) : -1; @@ -4825,10 +5366,11 @@ int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) { (GTEST_FLAG(also_run_disabled_tests) || !is_disabled) && matches_filter; - const bool is_selected = is_runnable && - (shard_tests == IGNORE_SHARDING_PROTOCOL || - ShouldRunTestOnShard(total_shards, shard_index, - num_runnable_tests)); + const bool is_in_another_shard = + shard_tests != IGNORE_SHARDING_PROTOCOL && + !ShouldRunTestOnShard(total_shards, shard_index, num_runnable_tests); + test_info->is_in_another_shard_ = is_in_another_shard; + const bool is_selected = is_runnable && !is_in_another_shard; num_runnable_tests += is_runnable; num_selected_tests += is_selected; @@ -4898,6 +5440,23 @@ void UnitTestImpl::ListTestsMatchingFilter() { } } fflush(stdout); + const std::string& output_format = UnitTestOptions::GetOutputFormat(); + if (output_format == "xml" || output_format == "json") { + FILE* fileout = OpenFileForWriting( + UnitTestOptions::GetAbsolutePathToOutputFile().c_str()); + std::stringstream stream; + if (output_format == "xml") { + XmlUnitTestResultPrinter( + UnitTestOptions::GetAbsolutePathToOutputFile().c_str()) + .PrintXmlTestsList(&stream, test_cases_); + } else if (output_format == "json") { + JsonUnitTestResultPrinter( + UnitTestOptions::GetAbsolutePathToOutputFile().c_str()) + .PrintJsonTestList(&stream, test_cases_); + } + fprintf(fileout, "%s", StringStreamToString(&stream).c_str()); + fclose(fileout); + } } // Sets the OS stack trace getter. @@ -4928,11 +5487,15 @@ OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() { return os_stack_trace_getter_; } -// Returns the TestResult for the test that's currently running, or -// the TestResult for the ad hoc test if no test is running. +// Returns the most specific TestResult currently running. TestResult* UnitTestImpl::current_test_result() { - return current_test_info_ ? - &(current_test_info_->result_) : &ad_hoc_test_result_; + if (current_test_info_ != NULL) { + return ¤t_test_info_->result_; + } + if (current_test_case_ != NULL) { + return ¤t_test_case_->ad_hoc_test_result_; + } + return &ad_hoc_test_result_; } // Shuffles all test cases, and the tests within each test case, @@ -5013,9 +5576,8 @@ bool SkipPrefix(const char* prefix, const char** pstr) { // part can be omitted. // // Returns the value of the flag, or NULL if the parsing failed. -const char* ParseFlagValue(const char* str, - const char* flag, - bool def_optional) { +static const char* ParseFlagValue(const char* str, const char* flag, + bool def_optional) { // str and flag must not be NULL. if (str == NULL || flag == NULL) return NULL; @@ -5051,7 +5613,7 @@ const char* ParseFlagValue(const char* str, // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. -bool ParseBoolFlag(const char* str, const char* flag, bool* value) { +static bool ParseBoolFlag(const char* str, const char* flag, bool* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, true); @@ -5085,7 +5647,8 @@ bool ParseInt32Flag(const char* str, const char* flag, Int32* value) { // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. -bool ParseStringFlag(const char* str, const char* flag, std::string* value) { +template +static bool ParseStringFlag(const char* str, const char* flag, String* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); @@ -5121,7 +5684,7 @@ static bool HasGoogleTestFlagPrefix(const char* str) { // @Y changes the color to yellow. // @D changes to the default terminal text color. // -// TODO(wan@google.com): Write tests for this once we add stdout +// FIXME: Write tests for this once we add stdout // capturing to Google Test. static void PrintColorEncoded(const char* str) { GTestColor color = COLOR_DEFAULT; // The current color. @@ -5187,24 +5750,25 @@ static const char kColorEncodedHelpMessage[] = " Enable/disable colored output. The default is @Gauto@D.\n" " -@G-" GTEST_FLAG_PREFIX_ "print_time=0@D\n" " Don't print the elapsed time of each test.\n" -" @G--" GTEST_FLAG_PREFIX_ "output=xml@Y[@G:@YDIRECTORY_PATH@G" +" @G--" GTEST_FLAG_PREFIX_ "output=@Y(@Gjson@Y|@Gxml@Y)[@G:@YDIRECTORY_PATH@G" GTEST_PATH_SEP_ "@Y|@G:@YFILE_PATH]@D\n" -" Generate an XML report in the given directory or with the given file\n" -" name. @YFILE_PATH@D defaults to @Gtest_details.xml@D.\n" -#if GTEST_CAN_STREAM_RESULTS_ +" Generate a JSON or XML report in the given directory or with the given\n" +" file name. @YFILE_PATH@D defaults to @Gtest_details.xml@D.\n" +# if GTEST_CAN_STREAM_RESULTS_ " @G--" GTEST_FLAG_PREFIX_ "stream_result_to=@YHOST@G:@YPORT@D\n" " Stream test results to the given server.\n" -#endif // GTEST_CAN_STREAM_RESULTS_ +# endif // GTEST_CAN_STREAM_RESULTS_ "\n" "Assertion Behavior:\n" -#if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS +# if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS " @G--" GTEST_FLAG_PREFIX_ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n" " Set the default death test style.\n" -#endif // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS +# endif // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS " @G--" GTEST_FLAG_PREFIX_ "break_on_failure@D\n" " Turn assertion failures into debugger break-points.\n" " @G--" GTEST_FLAG_PREFIX_ "throw_on_failure@D\n" -" Turn assertion failures into C++ exceptions.\n" +" Turn assertion failures into C++ exceptions for use by an external\n" +" test framework.\n" " @G--" GTEST_FLAG_PREFIX_ "catch_exceptions=0@D\n" " Do not report exceptions as test failures. Instead, allow them\n" " to crash the program or throw a pop-up (on Windows).\n" @@ -5221,7 +5785,7 @@ static const char kColorEncodedHelpMessage[] = "(not one in your own code or tests), please report it to\n" "@G<" GTEST_DEV_EMAIL_ ">@D.\n"; -bool ParseGoogleTestFlag(const char* const arg) { +static bool ParseGoogleTestFlag(const char* const arg) { return ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag, >EST_FLAG(also_run_disabled_tests)) || ParseBoolFlag(arg, kBreakOnFailureFlag, @@ -5239,6 +5803,7 @@ bool ParseGoogleTestFlag(const char* const arg) { ParseBoolFlag(arg, kListTestsFlag, >EST_FLAG(list_tests)) || ParseStringFlag(arg, kOutputFlag, >EST_FLAG(output)) || ParseBoolFlag(arg, kPrintTimeFlag, >EST_FLAG(print_time)) || + ParseBoolFlag(arg, kPrintUTF8Flag, >EST_FLAG(print_utf8)) || ParseInt32Flag(arg, kRandomSeedFlag, >EST_FLAG(random_seed)) || ParseInt32Flag(arg, kRepeatFlag, >EST_FLAG(repeat)) || ParseBoolFlag(arg, kShuffleFlag, >EST_FLAG(shuffle)) || @@ -5251,14 +5816,11 @@ bool ParseGoogleTestFlag(const char* const arg) { } #if GTEST_USE_OWN_FLAGFILE_FLAG_ -void LoadFlagsFromFile(const std::string& path) { +static void LoadFlagsFromFile(const std::string& path) { FILE* flagfile = posix::FOpen(path.c_str(), "r"); if (!flagfile) { - fprintf(stderr, - "Unable to open file \"%s\"\n", - GTEST_FLAG(flagfile).c_str()); - fflush(stderr); - exit(EXIT_FAILURE); + GTEST_LOG_(FATAL) << "Unable to open file \"" << GTEST_FLAG(flagfile) + << "\""; } std::string contents(ReadEntireFile(flagfile)); posix::FClose(flagfile); @@ -5332,6 +5894,17 @@ void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) { // other parts of Google Test. void ParseGoogleTestFlagsOnly(int* argc, char** argv) { ParseGoogleTestFlagsOnlyImpl(argc, argv); + + // Fix the value of *_NSGetArgc() on macOS, but iff + // *_NSGetArgv() == argv + // Only applicable to char** version of argv +#if GTEST_OS_MAC +#ifndef GTEST_OS_IOS + if (*_NSGetArgv() == argv) { + *_NSGetArgc() = *argc; + } +#endif +#endif } void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) { ParseGoogleTestFlagsOnlyImpl(argc, argv); @@ -5353,6 +5926,10 @@ void InitGoogleTestImpl(int* argc, CharType** argv) { g_argvs.push_back(StreamableToString(argv[i])); } +#if GTEST_HAS_ABSL + absl::InitializeSymbolizer(g_argvs[0].c_str()); +#endif // GTEST_HAS_ABSL + ParseGoogleTestFlagsOnly(argc, argv); GetUnitTestImpl()->PostFlagParsingInit(); } @@ -5386,4 +5963,45 @@ void InitGoogleTest(int* argc, wchar_t** argv) { #endif // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_) } +std::string TempDir() { +#if defined(GTEST_CUSTOM_TEMPDIR_FUNCTION_) + return GTEST_CUSTOM_TEMPDIR_FUNCTION_(); +#endif + +#if GTEST_OS_WINDOWS_MOBILE + return "\\temp\\"; +#elif GTEST_OS_WINDOWS + const char* temp_dir = internal::posix::GetEnv("TEMP"); + if (temp_dir == NULL || temp_dir[0] == '\0') + return "\\temp\\"; + else if (temp_dir[strlen(temp_dir) - 1] == '\\') + return temp_dir; + else + return std::string(temp_dir) + "\\"; +#elif GTEST_OS_LINUX_ANDROID + return "/sdcard/"; +#else + return "/tmp/"; +#endif // GTEST_OS_WINDOWS_MOBILE +} + +// Class ScopedTrace + +// Pushes the given source file location and message onto a per-thread +// trace stack maintained by Google Test. +void ScopedTrace::PushTrace(const char* file, int line, std::string message) { + internal::TraceInfo trace; + trace.file = file; + trace.line = line; + trace.message.swap(message); + + UnitTest::GetInstance()->PushGTestTrace(trace); +} + +// Pops the info pushed by the c'tor. +ScopedTrace::~ScopedTrace() + GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) { + UnitTest::GetInstance()->PopGTestTrace(); +} + } // namespace testing diff --git a/media/libvpx/libvpx/third_party/googletest/src/src/gtest_main.cc b/media/libvpx/libvpx/third_party/googletest/src/src/gtest_main.cc index f30282255233..2113f621e654 100644 --- a/media/libvpx/libvpx/third_party/googletest/src/src/gtest_main.cc +++ b/media/libvpx/libvpx/third_party/googletest/src/src/gtest_main.cc @@ -28,11 +28,10 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include - #include "gtest/gtest.h" GTEST_API_ int main(int argc, char **argv) { - printf("Running main() from gtest_main.cc\n"); + printf("Running main() from %s\n", __FILE__); testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } diff --git a/media/libvpx/libvpx/third_party/libwebm/AUTHORS.TXT b/media/libvpx/libvpx/third_party/libwebm/AUTHORS.TXT deleted file mode 100644 index 8ab6f794c734..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/AUTHORS.TXT +++ /dev/null @@ -1,4 +0,0 @@ -# Names should be added to this file like so: -# Name or Organization - -Google Inc. diff --git a/media/libvpx/libvpx/third_party/libwebm/Android.mk b/media/libvpx/libvpx/third_party/libwebm/Android.mk deleted file mode 100644 index 8149a083f4fa..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/Android.mk +++ /dev/null @@ -1,17 +0,0 @@ -LOCAL_PATH:= $(call my-dir) - -include $(CLEAR_VARS) -LOCAL_MODULE:= libwebm -LOCAL_CPPFLAGS:=-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -Wno-extern-c-compat -LOCAL_C_INCLUDES:= $(LOCAL_PATH) -LOCAL_EXPORT_C_INCLUDES:= $(LOCAL_PATH) - -LOCAL_SRC_FILES:= common/file_util.cc \ - common/hdr_util.cc \ - mkvparser/mkvparser.cc \ - mkvparser/mkvreader.cc \ - mkvmuxer/mkvmuxer.cc \ - mkvmuxer/mkvmuxerutil.cc \ - mkvmuxer/mkvwriter.cc -include $(BUILD_STATIC_LIBRARY) diff --git a/media/libvpx/libvpx/third_party/libwebm/PATENTS.TXT b/media/libvpx/libvpx/third_party/libwebm/PATENTS.TXT deleted file mode 100644 index caedf607e95a..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/PATENTS.TXT +++ /dev/null @@ -1,23 +0,0 @@ -Additional IP Rights Grant (Patents) ------------------------------------- - -"These implementations" means the copyrightable works that implement the WebM -codecs distributed by Google as part of the WebM Project. - -Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge, -royalty-free, irrevocable (except as stated in this section) patent license to -make, have made, use, offer to sell, sell, import, transfer, and otherwise -run, modify and propagate the contents of these implementations of WebM, where -such license applies only to those patent claims, both currently owned by -Google and acquired in the future, licensable by Google that are necessarily -infringed by these implementations of WebM. This grant does not include claims -that would be infringed only as a consequence of further modification of these -implementations. If you or your agent or exclusive licensee institute or order -or agree to the institution of patent litigation or any other patent -enforcement activity against any entity (including a cross-claim or -counterclaim in a lawsuit) alleging that any of these implementations of WebM -or any code incorporated within any of these implementations of WebM -constitute direct or contributory patent infringement, or inducement of -patent infringement, then any patent rights granted to you under this License -for these implementations of WebM shall terminate as of the date such -litigation is filed. diff --git a/media/libvpx/libvpx/third_party/libwebm/README.libvpx b/media/libvpx/libvpx/third_party/libwebm/README.libvpx deleted file mode 100644 index ebb5ff2f4d71..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/README.libvpx +++ /dev/null @@ -1,10 +0,0 @@ -URL: https://chromium.googlesource.com/webm/libwebm -Version: 0ae757087f5e6eb01dfea16cc09205b2425cfb74 -License: BSD -License File: LICENSE.txt - -Description: -libwebm is used to handle WebM container I/O. - -Local Changes: -* diff --git a/media/libvpx/libvpx/third_party/libwebm/common/file_util.cc b/media/libvpx/libvpx/third_party/libwebm/common/file_util.cc deleted file mode 100644 index 6dab146dd988..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/common/file_util.cc +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (c) 2016 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -#include "common/file_util.h" - -#include -#ifndef _MSC_VER -#include // close() -#endif - -#include -#include -#include -#include -#include - -namespace libwebm { - -std::string GetTempFileName() { -#if !defined _MSC_VER && !defined __MINGW32__ - std::string temp_file_name_template_str = - std::string(std::getenv("TEST_TMPDIR") ? std::getenv("TEST_TMPDIR") : - ".") + - "/libwebm_temp.XXXXXX"; - char* temp_file_name_template = - new char[temp_file_name_template_str.length() + 1]; - memset(temp_file_name_template, 0, temp_file_name_template_str.length() + 1); - temp_file_name_template_str.copy(temp_file_name_template, - temp_file_name_template_str.length(), 0); - int fd = mkstemp(temp_file_name_template); - std::string temp_file_name = - (fd != -1) ? std::string(temp_file_name_template) : std::string(); - delete[] temp_file_name_template; - if (fd != -1) { - close(fd); - } - return temp_file_name; -#else - char tmp_file_name[_MAX_PATH]; - errno_t err = tmpnam_s(tmp_file_name); - if (err == 0) { - return std::string(tmp_file_name); - } - return std::string(); -#endif -} - -uint64_t GetFileSize(const std::string& file_name) { - uint64_t file_size = 0; -#ifndef _MSC_VER - struct stat st; - st.st_size = 0; - if (stat(file_name.c_str(), &st) == 0) { -#else - struct _stat st; - st.st_size = 0; - if (_stat(file_name.c_str(), &st) == 0) { -#endif - file_size = st.st_size; - } - return file_size; -} - -TempFileDeleter::TempFileDeleter() { file_name_ = GetTempFileName(); } - -TempFileDeleter::~TempFileDeleter() { - std::ifstream file(file_name_.c_str()); - if (file.good()) { - file.close(); - std::remove(file_name_.c_str()); - } -} - -} // namespace libwebm diff --git a/media/libvpx/libvpx/third_party/libwebm/common/file_util.h b/media/libvpx/libvpx/third_party/libwebm/common/file_util.h deleted file mode 100644 index 0e71eac11e46..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/common/file_util.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2016 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -#ifndef LIBWEBM_COMMON_FILE_UTIL_H_ -#define LIBWEBM_COMMON_FILE_UTIL_H_ - -#include - -#include - -#include "mkvmuxer/mkvmuxertypes.h" // LIBWEBM_DISALLOW_COPY_AND_ASSIGN() - -namespace libwebm { - -// Returns a temporary file name. -std::string GetTempFileName(); - -// Returns size of file specified by |file_name|, or 0 upon failure. -uint64_t GetFileSize(const std::string& file_name); - -// Manages life of temporary file specified at time of construction. Deletes -// file upon destruction. -class TempFileDeleter { - public: - TempFileDeleter(); - explicit TempFileDeleter(std::string file_name) : file_name_(file_name) {} - ~TempFileDeleter(); - const std::string& name() const { return file_name_; } - - private: - std::string file_name_; - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TempFileDeleter); -}; - -} // namespace libwebm - -#endif // LIBWEBM_COMMON_FILE_UTIL_H_ \ No newline at end of file diff --git a/media/libvpx/libvpx/third_party/libwebm/common/hdr_util.cc b/media/libvpx/libvpx/third_party/libwebm/common/hdr_util.cc deleted file mode 100644 index e1618ce75a78..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/common/hdr_util.cc +++ /dev/null @@ -1,220 +0,0 @@ -// Copyright (c) 2016 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -#include "hdr_util.h" - -#include -#include -#include - -#include "mkvparser/mkvparser.h" - -namespace libwebm { -const int Vp9CodecFeatures::kValueNotPresent = INT_MAX; - -bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc, - PrimaryChromaticityPtr* muxer_pc) { - muxer_pc->reset(new (std::nothrow) - mkvmuxer::PrimaryChromaticity(parser_pc.x, parser_pc.y)); - if (!muxer_pc->get()) - return false; - return true; -} - -bool MasteringMetadataValuePresent(double value) { - return value != mkvparser::MasteringMetadata::kValueNotPresent; -} - -bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm, - mkvmuxer::MasteringMetadata* muxer_mm) { - if (MasteringMetadataValuePresent(parser_mm.luminance_max)) - muxer_mm->set_luminance_max(parser_mm.luminance_max); - if (MasteringMetadataValuePresent(parser_mm.luminance_min)) - muxer_mm->set_luminance_min(parser_mm.luminance_min); - - PrimaryChromaticityPtr r_ptr(NULL); - PrimaryChromaticityPtr g_ptr(NULL); - PrimaryChromaticityPtr b_ptr(NULL); - PrimaryChromaticityPtr wp_ptr(NULL); - - if (parser_mm.r) { - if (!CopyPrimaryChromaticity(*parser_mm.r, &r_ptr)) - return false; - } - if (parser_mm.g) { - if (!CopyPrimaryChromaticity(*parser_mm.g, &g_ptr)) - return false; - } - if (parser_mm.b) { - if (!CopyPrimaryChromaticity(*parser_mm.b, &b_ptr)) - return false; - } - if (parser_mm.white_point) { - if (!CopyPrimaryChromaticity(*parser_mm.white_point, &wp_ptr)) - return false; - } - - if (!muxer_mm->SetChromaticity(r_ptr.get(), g_ptr.get(), b_ptr.get(), - wp_ptr.get())) { - return false; - } - - return true; -} - -bool ColourValuePresent(long long value) { - return value != mkvparser::Colour::kValueNotPresent; -} - -bool CopyColour(const mkvparser::Colour& parser_colour, - mkvmuxer::Colour* muxer_colour) { - if (!muxer_colour) - return false; - - if (ColourValuePresent(parser_colour.matrix_coefficients)) - muxer_colour->set_matrix_coefficients(parser_colour.matrix_coefficients); - if (ColourValuePresent(parser_colour.bits_per_channel)) - muxer_colour->set_bits_per_channel(parser_colour.bits_per_channel); - if (ColourValuePresent(parser_colour.chroma_subsampling_horz)) { - muxer_colour->set_chroma_subsampling_horz( - parser_colour.chroma_subsampling_horz); - } - if (ColourValuePresent(parser_colour.chroma_subsampling_vert)) { - muxer_colour->set_chroma_subsampling_vert( - parser_colour.chroma_subsampling_vert); - } - if (ColourValuePresent(parser_colour.cb_subsampling_horz)) - muxer_colour->set_cb_subsampling_horz(parser_colour.cb_subsampling_horz); - if (ColourValuePresent(parser_colour.cb_subsampling_vert)) - muxer_colour->set_cb_subsampling_vert(parser_colour.cb_subsampling_vert); - if (ColourValuePresent(parser_colour.chroma_siting_horz)) - muxer_colour->set_chroma_siting_horz(parser_colour.chroma_siting_horz); - if (ColourValuePresent(parser_colour.chroma_siting_vert)) - muxer_colour->set_chroma_siting_vert(parser_colour.chroma_siting_vert); - if (ColourValuePresent(parser_colour.range)) - muxer_colour->set_range(parser_colour.range); - if (ColourValuePresent(parser_colour.transfer_characteristics)) { - muxer_colour->set_transfer_characteristics( - parser_colour.transfer_characteristics); - } - if (ColourValuePresent(parser_colour.primaries)) - muxer_colour->set_primaries(parser_colour.primaries); - if (ColourValuePresent(parser_colour.max_cll)) - muxer_colour->set_max_cll(parser_colour.max_cll); - if (ColourValuePresent(parser_colour.max_fall)) - muxer_colour->set_max_fall(parser_colour.max_fall); - - if (parser_colour.mastering_metadata) { - mkvmuxer::MasteringMetadata muxer_mm; - if (!CopyMasteringMetadata(*parser_colour.mastering_metadata, &muxer_mm)) - return false; - if (!muxer_colour->SetMasteringMetadata(muxer_mm)) - return false; - } - return true; -} - -// Format of VPx private data: -// -// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -// | ID Byte | Length | | -// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | -// | | -// : Bytes 1..Length of Codec Feature : -// | | -// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -// -// ID Byte Format -// ID byte is an unsigned byte. -// 0 1 2 3 4 5 6 7 -// +-+-+-+-+-+-+-+-+ -// |X| ID | -// +-+-+-+-+-+-+-+-+ -// -// The X bit is reserved. -// -// See the following link for more information: -// http://www.webmproject.org/vp9/profiles/ -bool ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length, - Vp9CodecFeatures* features) { - const int kVpxCodecPrivateMinLength = 3; - if (!private_data || !features || length < kVpxCodecPrivateMinLength) - return false; - - const uint8_t kVp9ProfileId = 1; - const uint8_t kVp9LevelId = 2; - const uint8_t kVp9BitDepthId = 3; - const uint8_t kVp9ChromaSubsamplingId = 4; - const int kVpxFeatureLength = 1; - int offset = 0; - - // Set features to not set. - features->profile = Vp9CodecFeatures::kValueNotPresent; - features->level = Vp9CodecFeatures::kValueNotPresent; - features->bit_depth = Vp9CodecFeatures::kValueNotPresent; - features->chroma_subsampling = Vp9CodecFeatures::kValueNotPresent; - do { - const uint8_t id_byte = private_data[offset++]; - const uint8_t length_byte = private_data[offset++]; - if (length_byte != kVpxFeatureLength) - return false; - if (id_byte == kVp9ProfileId) { - const int priv_profile = static_cast(private_data[offset++]); - if (priv_profile < 0 || priv_profile > 3) - return false; - if (features->profile != Vp9CodecFeatures::kValueNotPresent && - features->profile != priv_profile) { - return false; - } - features->profile = priv_profile; - } else if (id_byte == kVp9LevelId) { - const int priv_level = static_cast(private_data[offset++]); - - const int kNumLevels = 14; - const int levels[kNumLevels] = {10, 11, 20, 21, 30, 31, 40, - 41, 50, 51, 52, 60, 61, 62}; - - for (int i = 0; i < kNumLevels; ++i) { - if (priv_level == levels[i]) { - if (features->level != Vp9CodecFeatures::kValueNotPresent && - features->level != priv_level) { - return false; - } - features->level = priv_level; - break; - } - } - if (features->level == Vp9CodecFeatures::kValueNotPresent) - return false; - } else if (id_byte == kVp9BitDepthId) { - const int priv_profile = static_cast(private_data[offset++]); - if (priv_profile != 8 && priv_profile != 10 && priv_profile != 12) - return false; - if (features->bit_depth != Vp9CodecFeatures::kValueNotPresent && - features->bit_depth != priv_profile) { - return false; - } - features->bit_depth = priv_profile; - } else if (id_byte == kVp9ChromaSubsamplingId) { - const int priv_profile = static_cast(private_data[offset++]); - if (priv_profile != 0 && priv_profile != 2 && priv_profile != 3) - return false; - if (features->chroma_subsampling != Vp9CodecFeatures::kValueNotPresent && - features->chroma_subsampling != priv_profile) { - return false; - } - features->chroma_subsampling = priv_profile; - } else { - // Invalid ID. - return false; - } - } while (offset + kVpxCodecPrivateMinLength <= length); - - return true; -} -} // namespace libwebm diff --git a/media/libvpx/libvpx/third_party/libwebm/common/hdr_util.h b/media/libvpx/libvpx/third_party/libwebm/common/hdr_util.h deleted file mode 100644 index 3ef5388fd038..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/common/hdr_util.h +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright (c) 2016 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -#ifndef LIBWEBM_COMMON_HDR_UTIL_H_ -#define LIBWEBM_COMMON_HDR_UTIL_H_ - -#include - -#include - -#include "mkvmuxer/mkvmuxer.h" - -namespace mkvparser { -struct Colour; -struct MasteringMetadata; -struct PrimaryChromaticity; -} // namespace mkvparser - -namespace libwebm { -// Utility types and functions for working with the Colour element and its -// children. Copiers return true upon success. Presence functions return true -// when the specified element is present. - -// TODO(tomfinegan): These should be moved to libwebm_utils once c++11 is -// required by libwebm. - -// Features of the VP9 codec that may be set in the CodecPrivate of a VP9 video -// stream. A value of kValueNotPresent represents that the value was not set in -// the CodecPrivate. -struct Vp9CodecFeatures { - static const int kValueNotPresent; - - Vp9CodecFeatures() - : profile(kValueNotPresent), - level(kValueNotPresent), - bit_depth(kValueNotPresent), - chroma_subsampling(kValueNotPresent) {} - ~Vp9CodecFeatures() {} - - int profile; - int level; - int bit_depth; - int chroma_subsampling; -}; - -// disable deprecation warnings for auto_ptr -#if defined(__GNUC__) && __GNUC__ >= 5 -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif -typedef std::auto_ptr PrimaryChromaticityPtr; -#if defined(__GNUC__) && __GNUC__ >= 5 -#pragma GCC diagnostic pop -#endif - -bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc, - PrimaryChromaticityPtr* muxer_pc); - -bool MasteringMetadataValuePresent(double value); - -bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm, - mkvmuxer::MasteringMetadata* muxer_mm); - -bool ColourValuePresent(long long value); - -bool CopyColour(const mkvparser::Colour& parser_colour, - mkvmuxer::Colour* muxer_colour); - -// Returns true if |features| is set to one or more valid values. -bool ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length, - Vp9CodecFeatures* features); - -} // namespace libwebm - -#endif // LIBWEBM_COMMON_HDR_UTIL_H_ diff --git a/media/libvpx/libvpx/third_party/libwebm/common/webmids.h b/media/libvpx/libvpx/third_party/libwebm/common/webmids.h deleted file mode 100644 index 89d722a71bcb..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/common/webmids.h +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - -#ifndef COMMON_WEBMIDS_H_ -#define COMMON_WEBMIDS_H_ - -namespace libwebm { - -enum MkvId { - kMkvEBML = 0x1A45DFA3, - kMkvEBMLVersion = 0x4286, - kMkvEBMLReadVersion = 0x42F7, - kMkvEBMLMaxIDLength = 0x42F2, - kMkvEBMLMaxSizeLength = 0x42F3, - kMkvDocType = 0x4282, - kMkvDocTypeVersion = 0x4287, - kMkvDocTypeReadVersion = 0x4285, - kMkvVoid = 0xEC, - kMkvSignatureSlot = 0x1B538667, - kMkvSignatureAlgo = 0x7E8A, - kMkvSignatureHash = 0x7E9A, - kMkvSignaturePublicKey = 0x7EA5, - kMkvSignature = 0x7EB5, - kMkvSignatureElements = 0x7E5B, - kMkvSignatureElementList = 0x7E7B, - kMkvSignedElement = 0x6532, - // segment - kMkvSegment = 0x18538067, - // Meta Seek Information - kMkvSeekHead = 0x114D9B74, - kMkvSeek = 0x4DBB, - kMkvSeekID = 0x53AB, - kMkvSeekPosition = 0x53AC, - // Segment Information - kMkvInfo = 0x1549A966, - kMkvTimecodeScale = 0x2AD7B1, - kMkvDuration = 0x4489, - kMkvDateUTC = 0x4461, - kMkvTitle = 0x7BA9, - kMkvMuxingApp = 0x4D80, - kMkvWritingApp = 0x5741, - // Cluster - kMkvCluster = 0x1F43B675, - kMkvTimecode = 0xE7, - kMkvPrevSize = 0xAB, - kMkvBlockGroup = 0xA0, - kMkvBlock = 0xA1, - kMkvBlockDuration = 0x9B, - kMkvReferenceBlock = 0xFB, - kMkvLaceNumber = 0xCC, - kMkvSimpleBlock = 0xA3, - kMkvBlockAdditions = 0x75A1, - kMkvBlockMore = 0xA6, - kMkvBlockAddID = 0xEE, - kMkvBlockAdditional = 0xA5, - kMkvDiscardPadding = 0x75A2, - // Track - kMkvTracks = 0x1654AE6B, - kMkvTrackEntry = 0xAE, - kMkvTrackNumber = 0xD7, - kMkvTrackUID = 0x73C5, - kMkvTrackType = 0x83, - kMkvFlagEnabled = 0xB9, - kMkvFlagDefault = 0x88, - kMkvFlagForced = 0x55AA, - kMkvFlagLacing = 0x9C, - kMkvDefaultDuration = 0x23E383, - kMkvMaxBlockAdditionID = 0x55EE, - kMkvName = 0x536E, - kMkvLanguage = 0x22B59C, - kMkvCodecID = 0x86, - kMkvCodecPrivate = 0x63A2, - kMkvCodecName = 0x258688, - kMkvCodecDelay = 0x56AA, - kMkvSeekPreRoll = 0x56BB, - // video - kMkvVideo = 0xE0, - kMkvFlagInterlaced = 0x9A, - kMkvStereoMode = 0x53B8, - kMkvAlphaMode = 0x53C0, - kMkvPixelWidth = 0xB0, - kMkvPixelHeight = 0xBA, - kMkvPixelCropBottom = 0x54AA, - kMkvPixelCropTop = 0x54BB, - kMkvPixelCropLeft = 0x54CC, - kMkvPixelCropRight = 0x54DD, - kMkvDisplayWidth = 0x54B0, - kMkvDisplayHeight = 0x54BA, - kMkvDisplayUnit = 0x54B2, - kMkvAspectRatioType = 0x54B3, - kMkvFrameRate = 0x2383E3, - // end video - // colour - kMkvColour = 0x55B0, - kMkvMatrixCoefficients = 0x55B1, - kMkvBitsPerChannel = 0x55B2, - kMkvChromaSubsamplingHorz = 0x55B3, - kMkvChromaSubsamplingVert = 0x55B4, - kMkvCbSubsamplingHorz = 0x55B5, - kMkvCbSubsamplingVert = 0x55B6, - kMkvChromaSitingHorz = 0x55B7, - kMkvChromaSitingVert = 0x55B8, - kMkvRange = 0x55B9, - kMkvTransferCharacteristics = 0x55BA, - kMkvPrimaries = 0x55BB, - kMkvMaxCLL = 0x55BC, - kMkvMaxFALL = 0x55BD, - // mastering metadata - kMkvMasteringMetadata = 0x55D0, - kMkvPrimaryRChromaticityX = 0x55D1, - kMkvPrimaryRChromaticityY = 0x55D2, - kMkvPrimaryGChromaticityX = 0x55D3, - kMkvPrimaryGChromaticityY = 0x55D4, - kMkvPrimaryBChromaticityX = 0x55D5, - kMkvPrimaryBChromaticityY = 0x55D6, - kMkvWhitePointChromaticityX = 0x55D7, - kMkvWhitePointChromaticityY = 0x55D8, - kMkvLuminanceMax = 0x55D9, - kMkvLuminanceMin = 0x55DA, - // end mastering metadata - // end colour - // projection - kMkvProjection = 0x7670, - kMkvProjectionType = 0x7671, - kMkvProjectionPrivate = 0x7672, - kMkvProjectionPoseYaw = 0x7673, - kMkvProjectionPosePitch = 0x7674, - kMkvProjectionPoseRoll = 0x7675, - // end projection - // audio - kMkvAudio = 0xE1, - kMkvSamplingFrequency = 0xB5, - kMkvOutputSamplingFrequency = 0x78B5, - kMkvChannels = 0x9F, - kMkvBitDepth = 0x6264, - // end audio - // ContentEncodings - kMkvContentEncodings = 0x6D80, - kMkvContentEncoding = 0x6240, - kMkvContentEncodingOrder = 0x5031, - kMkvContentEncodingScope = 0x5032, - kMkvContentEncodingType = 0x5033, - kMkvContentCompression = 0x5034, - kMkvContentCompAlgo = 0x4254, - kMkvContentCompSettings = 0x4255, - kMkvContentEncryption = 0x5035, - kMkvContentEncAlgo = 0x47E1, - kMkvContentEncKeyID = 0x47E2, - kMkvContentSignature = 0x47E3, - kMkvContentSigKeyID = 0x47E4, - kMkvContentSigAlgo = 0x47E5, - kMkvContentSigHashAlgo = 0x47E6, - kMkvContentEncAESSettings = 0x47E7, - kMkvAESSettingsCipherMode = 0x47E8, - kMkvAESSettingsCipherInitData = 0x47E9, - // end ContentEncodings - // Cueing Data - kMkvCues = 0x1C53BB6B, - kMkvCuePoint = 0xBB, - kMkvCueTime = 0xB3, - kMkvCueTrackPositions = 0xB7, - kMkvCueTrack = 0xF7, - kMkvCueClusterPosition = 0xF1, - kMkvCueBlockNumber = 0x5378, - // Chapters - kMkvChapters = 0x1043A770, - kMkvEditionEntry = 0x45B9, - kMkvChapterAtom = 0xB6, - kMkvChapterUID = 0x73C4, - kMkvChapterStringUID = 0x5654, - kMkvChapterTimeStart = 0x91, - kMkvChapterTimeEnd = 0x92, - kMkvChapterDisplay = 0x80, - kMkvChapString = 0x85, - kMkvChapLanguage = 0x437C, - kMkvChapCountry = 0x437E, - // Tags - kMkvTags = 0x1254C367, - kMkvTag = 0x7373, - kMkvSimpleTag = 0x67C8, - kMkvTagName = 0x45A3, - kMkvTagString = 0x4487 -}; - -} // namespace libwebm - -#endif // COMMON_WEBMIDS_H_ diff --git a/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc b/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc deleted file mode 100644 index 15b9a908d8a2..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc +++ /dev/null @@ -1,4196 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - -#include "mkvmuxer/mkvmuxer.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/webmids.h" -#include "mkvmuxer/mkvmuxerutil.h" -#include "mkvmuxer/mkvwriter.h" -#include "mkvparser/mkvparser.h" - -// disable deprecation warnings for auto_ptr -#if defined(__GNUC__) && __GNUC__ >= 5 -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - -namespace mkvmuxer { - -const float PrimaryChromaticity::kChromaticityMin = 0.0f; -const float PrimaryChromaticity::kChromaticityMax = 1.0f; -const float MasteringMetadata::kMinLuminance = 0.0f; -const float MasteringMetadata::kMinLuminanceMax = 999.99f; -const float MasteringMetadata::kMaxLuminanceMax = 9999.99f; -const float MasteringMetadata::kValueNotPresent = FLT_MAX; -const uint64_t Colour::kValueNotPresent = UINT64_MAX; - -namespace { - -const char kDocTypeWebm[] = "webm"; -const char kDocTypeMatroska[] = "matroska"; - -// Deallocate the string designated by |dst|, and then copy the |src| -// string to |dst|. The caller owns both the |src| string and the -// |dst| copy (hence the caller is responsible for eventually -// deallocating the strings, either directly, or indirectly via -// StrCpy). Returns true if the source string was successfully copied -// to the destination. -bool StrCpy(const char* src, char** dst_ptr) { - if (dst_ptr == NULL) - return false; - - char*& dst = *dst_ptr; - - delete[] dst; - dst = NULL; - - if (src == NULL) - return true; - - const size_t size = strlen(src) + 1; - - dst = new (std::nothrow) char[size]; // NOLINT - if (dst == NULL) - return false; - - strcpy(dst, src); // NOLINT - return true; -} - -typedef std::auto_ptr PrimaryChromaticityPtr; -bool CopyChromaticity(const PrimaryChromaticity* src, - PrimaryChromaticityPtr* dst) { - if (!dst) - return false; - - dst->reset(new (std::nothrow) PrimaryChromaticity(src->x(), src->y())); - if (!dst->get()) - return false; - - return true; -} - -} // namespace - -/////////////////////////////////////////////////////////////// -// -// IMkvWriter Class - -IMkvWriter::IMkvWriter() {} - -IMkvWriter::~IMkvWriter() {} - -bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version, - const char* const doc_type) { - // Level 0 - uint64_t size = - EbmlElementSize(libwebm::kMkvEBMLVersion, static_cast(1)); - size += EbmlElementSize(libwebm::kMkvEBMLReadVersion, static_cast(1)); - size += EbmlElementSize(libwebm::kMkvEBMLMaxIDLength, static_cast(4)); - size += - EbmlElementSize(libwebm::kMkvEBMLMaxSizeLength, static_cast(8)); - size += EbmlElementSize(libwebm::kMkvDocType, doc_type); - size += EbmlElementSize(libwebm::kMkvDocTypeVersion, - static_cast(doc_type_version)); - size += - EbmlElementSize(libwebm::kMkvDocTypeReadVersion, static_cast(2)); - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvEBML, size)) - return false; - if (!WriteEbmlElement(writer, libwebm::kMkvEBMLVersion, - static_cast(1))) { - return false; - } - if (!WriteEbmlElement(writer, libwebm::kMkvEBMLReadVersion, - static_cast(1))) { - return false; - } - if (!WriteEbmlElement(writer, libwebm::kMkvEBMLMaxIDLength, - static_cast(4))) { - return false; - } - if (!WriteEbmlElement(writer, libwebm::kMkvEBMLMaxSizeLength, - static_cast(8))) { - return false; - } - if (!WriteEbmlElement(writer, libwebm::kMkvDocType, doc_type)) - return false; - if (!WriteEbmlElement(writer, libwebm::kMkvDocTypeVersion, - static_cast(doc_type_version))) { - return false; - } - if (!WriteEbmlElement(writer, libwebm::kMkvDocTypeReadVersion, - static_cast(2))) { - return false; - } - - return true; -} - -bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version) { - return WriteEbmlHeader(writer, doc_type_version, kDocTypeWebm); -} - -bool WriteEbmlHeader(IMkvWriter* writer) { - return WriteEbmlHeader(writer, mkvmuxer::Segment::kDefaultDocTypeVersion); -} - -bool ChunkedCopy(mkvparser::IMkvReader* source, mkvmuxer::IMkvWriter* dst, - int64_t start, int64_t size) { - // TODO(vigneshv): Check if this is a reasonable value. - const uint32_t kBufSize = 2048; - uint8_t* buf = new uint8_t[kBufSize]; - int64_t offset = start; - while (size > 0) { - const int64_t read_len = (size > kBufSize) ? kBufSize : size; - if (source->Read(offset, static_cast(read_len), buf)) - return false; - dst->Write(buf, static_cast(read_len)); - offset += read_len; - size -= read_len; - } - delete[] buf; - return true; -} - -/////////////////////////////////////////////////////////////// -// -// Frame Class - -Frame::Frame() - : add_id_(0), - additional_(NULL), - additional_length_(0), - duration_(0), - duration_set_(false), - frame_(NULL), - is_key_(false), - length_(0), - track_number_(0), - timestamp_(0), - discard_padding_(0), - reference_block_timestamp_(0), - reference_block_timestamp_set_(false) {} - -Frame::~Frame() { - delete[] frame_; - delete[] additional_; -} - -bool Frame::CopyFrom(const Frame& frame) { - delete[] frame_; - frame_ = NULL; - length_ = 0; - if (frame.length() > 0 && frame.frame() != NULL && - !Init(frame.frame(), frame.length())) { - return false; - } - add_id_ = 0; - delete[] additional_; - additional_ = NULL; - additional_length_ = 0; - if (frame.additional_length() > 0 && frame.additional() != NULL && - !AddAdditionalData(frame.additional(), frame.additional_length(), - frame.add_id())) { - return false; - } - duration_ = frame.duration(); - duration_set_ = frame.duration_set(); - is_key_ = frame.is_key(); - track_number_ = frame.track_number(); - timestamp_ = frame.timestamp(); - discard_padding_ = frame.discard_padding(); - reference_block_timestamp_ = frame.reference_block_timestamp(); - reference_block_timestamp_set_ = frame.reference_block_timestamp_set(); - return true; -} - -bool Frame::Init(const uint8_t* frame, uint64_t length) { - uint8_t* const data = - new (std::nothrow) uint8_t[static_cast(length)]; // NOLINT - if (!data) - return false; - - delete[] frame_; - frame_ = data; - length_ = length; - - memcpy(frame_, frame, static_cast(length_)); - return true; -} - -bool Frame::AddAdditionalData(const uint8_t* additional, uint64_t length, - uint64_t add_id) { - uint8_t* const data = - new (std::nothrow) uint8_t[static_cast(length)]; // NOLINT - if (!data) - return false; - - delete[] additional_; - additional_ = data; - additional_length_ = length; - add_id_ = add_id; - - memcpy(additional_, additional, static_cast(additional_length_)); - return true; -} - -bool Frame::IsValid() const { - if (length_ == 0 || !frame_) { - return false; - } - if ((additional_length_ != 0 && !additional_) || - (additional_ != NULL && additional_length_ == 0)) { - return false; - } - if (track_number_ == 0 || track_number_ > kMaxTrackNumber) { - return false; - } - if (!CanBeSimpleBlock() && !is_key_ && !reference_block_timestamp_set_) { - return false; - } - return true; -} - -bool Frame::CanBeSimpleBlock() const { - return additional_ == NULL && discard_padding_ == 0 && duration_ == 0; -} - -void Frame::set_duration(uint64_t duration) { - duration_ = duration; - duration_set_ = true; -} - -void Frame::set_reference_block_timestamp(int64_t reference_block_timestamp) { - reference_block_timestamp_ = reference_block_timestamp; - reference_block_timestamp_set_ = true; -} - -/////////////////////////////////////////////////////////////// -// -// CuePoint Class - -CuePoint::CuePoint() - : time_(0), - track_(0), - cluster_pos_(0), - block_number_(1), - output_block_number_(true) {} - -CuePoint::~CuePoint() {} - -bool CuePoint::Write(IMkvWriter* writer) const { - if (!writer || track_ < 1 || cluster_pos_ < 1) - return false; - - uint64_t size = EbmlElementSize(libwebm::kMkvCueClusterPosition, - static_cast(cluster_pos_)); - size += EbmlElementSize(libwebm::kMkvCueTrack, static_cast(track_)); - if (output_block_number_ && block_number_ > 1) - size += EbmlElementSize(libwebm::kMkvCueBlockNumber, - static_cast(block_number_)); - const uint64_t track_pos_size = - EbmlMasterElementSize(libwebm::kMkvCueTrackPositions, size) + size; - const uint64_t payload_size = - EbmlElementSize(libwebm::kMkvCueTime, static_cast(time_)) + - track_pos_size; - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvCuePoint, payload_size)) - return false; - - const int64_t payload_position = writer->Position(); - if (payload_position < 0) - return false; - - if (!WriteEbmlElement(writer, libwebm::kMkvCueTime, - static_cast(time_))) { - return false; - } - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvCueTrackPositions, size)) - return false; - if (!WriteEbmlElement(writer, libwebm::kMkvCueTrack, - static_cast(track_))) { - return false; - } - if (!WriteEbmlElement(writer, libwebm::kMkvCueClusterPosition, - static_cast(cluster_pos_))) { - return false; - } - if (output_block_number_ && block_number_ > 1) { - if (!WriteEbmlElement(writer, libwebm::kMkvCueBlockNumber, - static_cast(block_number_))) { - return false; - } - } - - const int64_t stop_position = writer->Position(); - if (stop_position < 0) - return false; - - if (stop_position - payload_position != static_cast(payload_size)) - return false; - - return true; -} - -uint64_t CuePoint::PayloadSize() const { - uint64_t size = EbmlElementSize(libwebm::kMkvCueClusterPosition, - static_cast(cluster_pos_)); - size += EbmlElementSize(libwebm::kMkvCueTrack, static_cast(track_)); - if (output_block_number_ && block_number_ > 1) - size += EbmlElementSize(libwebm::kMkvCueBlockNumber, - static_cast(block_number_)); - const uint64_t track_pos_size = - EbmlMasterElementSize(libwebm::kMkvCueTrackPositions, size) + size; - const uint64_t payload_size = - EbmlElementSize(libwebm::kMkvCueTime, static_cast(time_)) + - track_pos_size; - - return payload_size; -} - -uint64_t CuePoint::Size() const { - const uint64_t payload_size = PayloadSize(); - return EbmlMasterElementSize(libwebm::kMkvCuePoint, payload_size) + - payload_size; -} - -/////////////////////////////////////////////////////////////// -// -// Cues Class - -Cues::Cues() - : cue_entries_capacity_(0), - cue_entries_size_(0), - cue_entries_(NULL), - output_block_number_(true) {} - -Cues::~Cues() { - if (cue_entries_) { - for (int32_t i = 0; i < cue_entries_size_; ++i) { - CuePoint* const cue = cue_entries_[i]; - delete cue; - } - delete[] cue_entries_; - } -} - -bool Cues::AddCue(CuePoint* cue) { - if (!cue) - return false; - - if ((cue_entries_size_ + 1) > cue_entries_capacity_) { - // Add more CuePoints. - const int32_t new_capacity = - (!cue_entries_capacity_) ? 2 : cue_entries_capacity_ * 2; - - if (new_capacity < 1) - return false; - - CuePoint** const cues = - new (std::nothrow) CuePoint*[new_capacity]; // NOLINT - if (!cues) - return false; - - for (int32_t i = 0; i < cue_entries_size_; ++i) { - cues[i] = cue_entries_[i]; - } - - delete[] cue_entries_; - - cue_entries_ = cues; - cue_entries_capacity_ = new_capacity; - } - - cue->set_output_block_number(output_block_number_); - cue_entries_[cue_entries_size_++] = cue; - return true; -} - -CuePoint* Cues::GetCueByIndex(int32_t index) const { - if (cue_entries_ == NULL) - return NULL; - - if (index >= cue_entries_size_) - return NULL; - - return cue_entries_[index]; -} - -uint64_t Cues::Size() { - uint64_t size = 0; - for (int32_t i = 0; i < cue_entries_size_; ++i) - size += GetCueByIndex(i)->Size(); - size += EbmlMasterElementSize(libwebm::kMkvCues, size); - return size; -} - -bool Cues::Write(IMkvWriter* writer) const { - if (!writer) - return false; - - uint64_t size = 0; - for (int32_t i = 0; i < cue_entries_size_; ++i) { - const CuePoint* const cue = GetCueByIndex(i); - - if (!cue) - return false; - - size += cue->Size(); - } - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvCues, size)) - return false; - - const int64_t payload_position = writer->Position(); - if (payload_position < 0) - return false; - - for (int32_t i = 0; i < cue_entries_size_; ++i) { - const CuePoint* const cue = GetCueByIndex(i); - - if (!cue->Write(writer)) - return false; - } - - const int64_t stop_position = writer->Position(); - if (stop_position < 0) - return false; - - if (stop_position - payload_position != static_cast(size)) - return false; - - return true; -} - -/////////////////////////////////////////////////////////////// -// -// ContentEncAESSettings Class - -ContentEncAESSettings::ContentEncAESSettings() : cipher_mode_(kCTR) {} - -uint64_t ContentEncAESSettings::Size() const { - const uint64_t payload = PayloadSize(); - const uint64_t size = - EbmlMasterElementSize(libwebm::kMkvContentEncAESSettings, payload) + - payload; - return size; -} - -bool ContentEncAESSettings::Write(IMkvWriter* writer) const { - const uint64_t payload = PayloadSize(); - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncAESSettings, - payload)) - return false; - - const int64_t payload_position = writer->Position(); - if (payload_position < 0) - return false; - - if (!WriteEbmlElement(writer, libwebm::kMkvAESSettingsCipherMode, - static_cast(cipher_mode_))) { - return false; - } - - const int64_t stop_position = writer->Position(); - if (stop_position < 0 || - stop_position - payload_position != static_cast(payload)) - return false; - - return true; -} - -uint64_t ContentEncAESSettings::PayloadSize() const { - uint64_t size = EbmlElementSize(libwebm::kMkvAESSettingsCipherMode, - static_cast(cipher_mode_)); - return size; -} - -/////////////////////////////////////////////////////////////// -// -// ContentEncoding Class - -ContentEncoding::ContentEncoding() - : enc_algo_(5), - enc_key_id_(NULL), - encoding_order_(0), - encoding_scope_(1), - encoding_type_(1), - enc_key_id_length_(0) {} - -ContentEncoding::~ContentEncoding() { delete[] enc_key_id_; } - -bool ContentEncoding::SetEncryptionID(const uint8_t* id, uint64_t length) { - if (!id || length < 1) - return false; - - delete[] enc_key_id_; - - enc_key_id_ = - new (std::nothrow) uint8_t[static_cast(length)]; // NOLINT - if (!enc_key_id_) - return false; - - memcpy(enc_key_id_, id, static_cast(length)); - enc_key_id_length_ = length; - - return true; -} - -uint64_t ContentEncoding::Size() const { - const uint64_t encryption_size = EncryptionSize(); - const uint64_t encoding_size = EncodingSize(0, encryption_size); - const uint64_t encodings_size = - EbmlMasterElementSize(libwebm::kMkvContentEncoding, encoding_size) + - encoding_size; - - return encodings_size; -} - -bool ContentEncoding::Write(IMkvWriter* writer) const { - const uint64_t encryption_size = EncryptionSize(); - const uint64_t encoding_size = EncodingSize(0, encryption_size); - const uint64_t size = - EbmlMasterElementSize(libwebm::kMkvContentEncoding, encoding_size) + - encoding_size; - - const int64_t payload_position = writer->Position(); - if (payload_position < 0) - return false; - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncoding, - encoding_size)) - return false; - if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingOrder, - static_cast(encoding_order_))) - return false; - if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingScope, - static_cast(encoding_scope_))) - return false; - if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingType, - static_cast(encoding_type_))) - return false; - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncryption, - encryption_size)) - return false; - if (!WriteEbmlElement(writer, libwebm::kMkvContentEncAlgo, - static_cast(enc_algo_))) { - return false; - } - if (!WriteEbmlElement(writer, libwebm::kMkvContentEncKeyID, enc_key_id_, - enc_key_id_length_)) - return false; - - if (!enc_aes_settings_.Write(writer)) - return false; - - const int64_t stop_position = writer->Position(); - if (stop_position < 0 || - stop_position - payload_position != static_cast(size)) - return false; - - return true; -} - -uint64_t ContentEncoding::EncodingSize(uint64_t compresion_size, - uint64_t encryption_size) const { - // TODO(fgalligan): Add support for compression settings. - if (compresion_size != 0) - return 0; - - uint64_t encoding_size = 0; - - if (encryption_size > 0) { - encoding_size += - EbmlMasterElementSize(libwebm::kMkvContentEncryption, encryption_size) + - encryption_size; - } - encoding_size += EbmlElementSize(libwebm::kMkvContentEncodingType, - static_cast(encoding_type_)); - encoding_size += EbmlElementSize(libwebm::kMkvContentEncodingScope, - static_cast(encoding_scope_)); - encoding_size += EbmlElementSize(libwebm::kMkvContentEncodingOrder, - static_cast(encoding_order_)); - - return encoding_size; -} - -uint64_t ContentEncoding::EncryptionSize() const { - const uint64_t aes_size = enc_aes_settings_.Size(); - - uint64_t encryption_size = EbmlElementSize(libwebm::kMkvContentEncKeyID, - enc_key_id_, enc_key_id_length_); - encryption_size += EbmlElementSize(libwebm::kMkvContentEncAlgo, - static_cast(enc_algo_)); - - return encryption_size + aes_size; -} - -/////////////////////////////////////////////////////////////// -// -// Track Class - -Track::Track(unsigned int* seed) - : codec_id_(NULL), - codec_private_(NULL), - language_(NULL), - max_block_additional_id_(0), - name_(NULL), - number_(0), - type_(0), - uid_(MakeUID(seed)), - codec_delay_(0), - seek_pre_roll_(0), - default_duration_(0), - codec_private_length_(0), - content_encoding_entries_(NULL), - content_encoding_entries_size_(0) {} - -Track::~Track() { - delete[] codec_id_; - delete[] codec_private_; - delete[] language_; - delete[] name_; - - if (content_encoding_entries_) { - for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { - ContentEncoding* const encoding = content_encoding_entries_[i]; - delete encoding; - } - delete[] content_encoding_entries_; - } -} - -bool Track::AddContentEncoding() { - const uint32_t count = content_encoding_entries_size_ + 1; - - ContentEncoding** const content_encoding_entries = - new (std::nothrow) ContentEncoding*[count]; // NOLINT - if (!content_encoding_entries) - return false; - - ContentEncoding* const content_encoding = - new (std::nothrow) ContentEncoding(); // NOLINT - if (!content_encoding) { - delete[] content_encoding_entries; - return false; - } - - for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { - content_encoding_entries[i] = content_encoding_entries_[i]; - } - - delete[] content_encoding_entries_; - - content_encoding_entries_ = content_encoding_entries; - content_encoding_entries_[content_encoding_entries_size_] = content_encoding; - content_encoding_entries_size_ = count; - return true; -} - -ContentEncoding* Track::GetContentEncodingByIndex(uint32_t index) const { - if (content_encoding_entries_ == NULL) - return NULL; - - if (index >= content_encoding_entries_size_) - return NULL; - - return content_encoding_entries_[index]; -} - -uint64_t Track::PayloadSize() const { - uint64_t size = - EbmlElementSize(libwebm::kMkvTrackNumber, static_cast(number_)); - size += EbmlElementSize(libwebm::kMkvTrackUID, static_cast(uid_)); - size += EbmlElementSize(libwebm::kMkvTrackType, static_cast(type_)); - if (codec_id_) - size += EbmlElementSize(libwebm::kMkvCodecID, codec_id_); - if (codec_private_) - size += EbmlElementSize(libwebm::kMkvCodecPrivate, codec_private_, - codec_private_length_); - if (language_) - size += EbmlElementSize(libwebm::kMkvLanguage, language_); - if (name_) - size += EbmlElementSize(libwebm::kMkvName, name_); - if (max_block_additional_id_) { - size += EbmlElementSize(libwebm::kMkvMaxBlockAdditionID, - static_cast(max_block_additional_id_)); - } - if (codec_delay_) { - size += EbmlElementSize(libwebm::kMkvCodecDelay, - static_cast(codec_delay_)); - } - if (seek_pre_roll_) { - size += EbmlElementSize(libwebm::kMkvSeekPreRoll, - static_cast(seek_pre_roll_)); - } - if (default_duration_) { - size += EbmlElementSize(libwebm::kMkvDefaultDuration, - static_cast(default_duration_)); - } - - if (content_encoding_entries_size_ > 0) { - uint64_t content_encodings_size = 0; - for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { - ContentEncoding* const encoding = content_encoding_entries_[i]; - content_encodings_size += encoding->Size(); - } - - size += EbmlMasterElementSize(libwebm::kMkvContentEncodings, - content_encodings_size) + - content_encodings_size; - } - - return size; -} - -uint64_t Track::Size() const { - uint64_t size = PayloadSize(); - size += EbmlMasterElementSize(libwebm::kMkvTrackEntry, size); - return size; -} - -bool Track::Write(IMkvWriter* writer) const { - if (!writer) - return false; - - // mandatory elements without a default value. - if (!type_ || !codec_id_) - return false; - - // |size| may be bigger than what is written out in this function because - // derived classes may write out more data in the Track element. - const uint64_t payload_size = PayloadSize(); - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvTrackEntry, payload_size)) - return false; - - uint64_t size = - EbmlElementSize(libwebm::kMkvTrackNumber, static_cast(number_)); - size += EbmlElementSize(libwebm::kMkvTrackUID, static_cast(uid_)); - size += EbmlElementSize(libwebm::kMkvTrackType, static_cast(type_)); - if (codec_id_) - size += EbmlElementSize(libwebm::kMkvCodecID, codec_id_); - if (codec_private_) - size += EbmlElementSize(libwebm::kMkvCodecPrivate, codec_private_, - static_cast(codec_private_length_)); - if (language_) - size += EbmlElementSize(libwebm::kMkvLanguage, language_); - if (name_) - size += EbmlElementSize(libwebm::kMkvName, name_); - if (max_block_additional_id_) - size += EbmlElementSize(libwebm::kMkvMaxBlockAdditionID, - static_cast(max_block_additional_id_)); - if (codec_delay_) - size += EbmlElementSize(libwebm::kMkvCodecDelay, - static_cast(codec_delay_)); - if (seek_pre_roll_) - size += EbmlElementSize(libwebm::kMkvSeekPreRoll, - static_cast(seek_pre_roll_)); - if (default_duration_) - size += EbmlElementSize(libwebm::kMkvDefaultDuration, - static_cast(default_duration_)); - - const int64_t payload_position = writer->Position(); - if (payload_position < 0) - return false; - - if (!WriteEbmlElement(writer, libwebm::kMkvTrackNumber, - static_cast(number_))) - return false; - if (!WriteEbmlElement(writer, libwebm::kMkvTrackUID, - static_cast(uid_))) - return false; - if (!WriteEbmlElement(writer, libwebm::kMkvTrackType, - static_cast(type_))) - return false; - if (max_block_additional_id_) { - if (!WriteEbmlElement(writer, libwebm::kMkvMaxBlockAdditionID, - static_cast(max_block_additional_id_))) { - return false; - } - } - if (codec_delay_) { - if (!WriteEbmlElement(writer, libwebm::kMkvCodecDelay, - static_cast(codec_delay_))) - return false; - } - if (seek_pre_roll_) { - if (!WriteEbmlElement(writer, libwebm::kMkvSeekPreRoll, - static_cast(seek_pre_roll_))) - return false; - } - if (default_duration_) { - if (!WriteEbmlElement(writer, libwebm::kMkvDefaultDuration, - static_cast(default_duration_))) - return false; - } - if (codec_id_) { - if (!WriteEbmlElement(writer, libwebm::kMkvCodecID, codec_id_)) - return false; - } - if (codec_private_) { - if (!WriteEbmlElement(writer, libwebm::kMkvCodecPrivate, codec_private_, - static_cast(codec_private_length_))) - return false; - } - if (language_) { - if (!WriteEbmlElement(writer, libwebm::kMkvLanguage, language_)) - return false; - } - if (name_) { - if (!WriteEbmlElement(writer, libwebm::kMkvName, name_)) - return false; - } - - int64_t stop_position = writer->Position(); - if (stop_position < 0 || - stop_position - payload_position != static_cast(size)) - return false; - - if (content_encoding_entries_size_ > 0) { - uint64_t content_encodings_size = 0; - for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { - ContentEncoding* const encoding = content_encoding_entries_[i]; - content_encodings_size += encoding->Size(); - } - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncodings, - content_encodings_size)) - return false; - - for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { - ContentEncoding* const encoding = content_encoding_entries_[i]; - if (!encoding->Write(writer)) - return false; - } - } - - stop_position = writer->Position(); - if (stop_position < 0) - return false; - return true; -} - -bool Track::SetCodecPrivate(const uint8_t* codec_private, uint64_t length) { - if (!codec_private || length < 1) - return false; - - delete[] codec_private_; - - codec_private_ = - new (std::nothrow) uint8_t[static_cast(length)]; // NOLINT - if (!codec_private_) - return false; - - memcpy(codec_private_, codec_private, static_cast(length)); - codec_private_length_ = length; - - return true; -} - -void Track::set_codec_id(const char* codec_id) { - if (codec_id) { - delete[] codec_id_; - - const size_t length = strlen(codec_id) + 1; - codec_id_ = new (std::nothrow) char[length]; // NOLINT - if (codec_id_) { -#ifdef _MSC_VER - strcpy_s(codec_id_, length, codec_id); -#else - strcpy(codec_id_, codec_id); -#endif - } - } -} - -// TODO(fgalligan): Vet the language parameter. -void Track::set_language(const char* language) { - if (language) { - delete[] language_; - - const size_t length = strlen(language) + 1; - language_ = new (std::nothrow) char[length]; // NOLINT - if (language_) { -#ifdef _MSC_VER - strcpy_s(language_, length, language); -#else - strcpy(language_, language); -#endif - } - } -} - -void Track::set_name(const char* name) { - if (name) { - delete[] name_; - - const size_t length = strlen(name) + 1; - name_ = new (std::nothrow) char[length]; // NOLINT - if (name_) { -#ifdef _MSC_VER - strcpy_s(name_, length, name); -#else - strcpy(name_, name); -#endif - } - } -} - -/////////////////////////////////////////////////////////////// -// -// Colour and its child elements - -uint64_t PrimaryChromaticity::PrimaryChromaticitySize( - libwebm::MkvId x_id, libwebm::MkvId y_id) const { - return EbmlElementSize(x_id, x_) + EbmlElementSize(y_id, y_); -} - -bool PrimaryChromaticity::Write(IMkvWriter* writer, libwebm::MkvId x_id, - libwebm::MkvId y_id) const { - if (!Valid()) { - return false; - } - return WriteEbmlElement(writer, x_id, x_) && - WriteEbmlElement(writer, y_id, y_); -} - -bool PrimaryChromaticity::Valid() const { - return (x_ >= kChromaticityMin && x_ <= kChromaticityMax && - y_ >= kChromaticityMin && y_ <= kChromaticityMax); -} - -uint64_t MasteringMetadata::MasteringMetadataSize() const { - uint64_t size = PayloadSize(); - - if (size > 0) - size += EbmlMasterElementSize(libwebm::kMkvMasteringMetadata, size); - - return size; -} - -bool MasteringMetadata::Valid() const { - if (luminance_min_ != kValueNotPresent) { - if (luminance_min_ < kMinLuminance || luminance_min_ > kMinLuminanceMax || - luminance_min_ > luminance_max_) { - return false; - } - } - if (luminance_max_ != kValueNotPresent) { - if (luminance_max_ < kMinLuminance || luminance_max_ > kMaxLuminanceMax || - luminance_max_ < luminance_min_) { - return false; - } - } - if (r_ && !r_->Valid()) - return false; - if (g_ && !g_->Valid()) - return false; - if (b_ && !b_->Valid()) - return false; - if (white_point_ && !white_point_->Valid()) - return false; - - return true; -} - -bool MasteringMetadata::Write(IMkvWriter* writer) const { - const uint64_t size = PayloadSize(); - - // Don't write an empty element. - if (size == 0) - return true; - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvMasteringMetadata, size)) - return false; - if (luminance_max_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvLuminanceMax, luminance_max_)) { - return false; - } - if (luminance_min_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvLuminanceMin, luminance_min_)) { - return false; - } - if (r_ && - !r_->Write(writer, libwebm::kMkvPrimaryRChromaticityX, - libwebm::kMkvPrimaryRChromaticityY)) { - return false; - } - if (g_ && - !g_->Write(writer, libwebm::kMkvPrimaryGChromaticityX, - libwebm::kMkvPrimaryGChromaticityY)) { - return false; - } - if (b_ && - !b_->Write(writer, libwebm::kMkvPrimaryBChromaticityX, - libwebm::kMkvPrimaryBChromaticityY)) { - return false; - } - if (white_point_ && - !white_point_->Write(writer, libwebm::kMkvWhitePointChromaticityX, - libwebm::kMkvWhitePointChromaticityY)) { - return false; - } - - return true; -} - -bool MasteringMetadata::SetChromaticity( - const PrimaryChromaticity* r, const PrimaryChromaticity* g, - const PrimaryChromaticity* b, const PrimaryChromaticity* white_point) { - PrimaryChromaticityPtr r_ptr(NULL); - if (r) { - if (!CopyChromaticity(r, &r_ptr)) - return false; - } - PrimaryChromaticityPtr g_ptr(NULL); - if (g) { - if (!CopyChromaticity(g, &g_ptr)) - return false; - } - PrimaryChromaticityPtr b_ptr(NULL); - if (b) { - if (!CopyChromaticity(b, &b_ptr)) - return false; - } - PrimaryChromaticityPtr wp_ptr(NULL); - if (white_point) { - if (!CopyChromaticity(white_point, &wp_ptr)) - return false; - } - - r_ = r_ptr.release(); - g_ = g_ptr.release(); - b_ = b_ptr.release(); - white_point_ = wp_ptr.release(); - return true; -} - -uint64_t MasteringMetadata::PayloadSize() const { - uint64_t size = 0; - - if (luminance_max_ != kValueNotPresent) - size += EbmlElementSize(libwebm::kMkvLuminanceMax, luminance_max_); - if (luminance_min_ != kValueNotPresent) - size += EbmlElementSize(libwebm::kMkvLuminanceMin, luminance_min_); - - if (r_) { - size += r_->PrimaryChromaticitySize(libwebm::kMkvPrimaryRChromaticityX, - libwebm::kMkvPrimaryRChromaticityY); - } - if (g_) { - size += g_->PrimaryChromaticitySize(libwebm::kMkvPrimaryGChromaticityX, - libwebm::kMkvPrimaryGChromaticityY); - } - if (b_) { - size += b_->PrimaryChromaticitySize(libwebm::kMkvPrimaryBChromaticityX, - libwebm::kMkvPrimaryBChromaticityY); - } - if (white_point_) { - size += white_point_->PrimaryChromaticitySize( - libwebm::kMkvWhitePointChromaticityX, - libwebm::kMkvWhitePointChromaticityY); - } - - return size; -} - -uint64_t Colour::ColourSize() const { - uint64_t size = PayloadSize(); - - if (size > 0) - size += EbmlMasterElementSize(libwebm::kMkvColour, size); - - return size; -} - -bool Colour::Valid() const { - if (mastering_metadata_ && !mastering_metadata_->Valid()) - return false; - if (matrix_coefficients_ != kValueNotPresent && - !IsMatrixCoefficientsValueValid(matrix_coefficients_)) { - return false; - } - if (chroma_siting_horz_ != kValueNotPresent && - !IsChromaSitingHorzValueValid(chroma_siting_horz_)) { - return false; - } - if (chroma_siting_vert_ != kValueNotPresent && - !IsChromaSitingVertValueValid(chroma_siting_vert_)) { - return false; - } - if (range_ != kValueNotPresent && !IsColourRangeValueValid(range_)) - return false; - if (transfer_characteristics_ != kValueNotPresent && - !IsTransferCharacteristicsValueValid(transfer_characteristics_)) { - return false; - } - if (primaries_ != kValueNotPresent && !IsPrimariesValueValid(primaries_)) - return false; - - return true; -} - -bool Colour::Write(IMkvWriter* writer) const { - const uint64_t size = PayloadSize(); - - // Don't write an empty element. - if (size == 0) - return true; - - // Don't write an invalid element. - if (!Valid()) - return false; - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvColour, size)) - return false; - - if (matrix_coefficients_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvMatrixCoefficients, - static_cast(matrix_coefficients_))) { - return false; - } - if (bits_per_channel_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvBitsPerChannel, - static_cast(bits_per_channel_))) { - return false; - } - if (chroma_subsampling_horz_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvChromaSubsamplingHorz, - static_cast(chroma_subsampling_horz_))) { - return false; - } - if (chroma_subsampling_vert_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvChromaSubsamplingVert, - static_cast(chroma_subsampling_vert_))) { - return false; - } - - if (cb_subsampling_horz_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvCbSubsamplingHorz, - static_cast(cb_subsampling_horz_))) { - return false; - } - if (cb_subsampling_vert_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvCbSubsamplingVert, - static_cast(cb_subsampling_vert_))) { - return false; - } - if (chroma_siting_horz_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvChromaSitingHorz, - static_cast(chroma_siting_horz_))) { - return false; - } - if (chroma_siting_vert_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvChromaSitingVert, - static_cast(chroma_siting_vert_))) { - return false; - } - if (range_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvRange, - static_cast(range_))) { - return false; - } - if (transfer_characteristics_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvTransferCharacteristics, - static_cast(transfer_characteristics_))) { - return false; - } - if (primaries_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvPrimaries, - static_cast(primaries_))) { - return false; - } - if (max_cll_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvMaxCLL, - static_cast(max_cll_))) { - return false; - } - if (max_fall_ != kValueNotPresent && - !WriteEbmlElement(writer, libwebm::kMkvMaxFALL, - static_cast(max_fall_))) { - return false; - } - - if (mastering_metadata_ && !mastering_metadata_->Write(writer)) - return false; - - return true; -} - -bool Colour::SetMasteringMetadata(const MasteringMetadata& mastering_metadata) { - std::auto_ptr mm_ptr(new MasteringMetadata()); - if (!mm_ptr.get()) - return false; - - mm_ptr->set_luminance_max(mastering_metadata.luminance_max()); - mm_ptr->set_luminance_min(mastering_metadata.luminance_min()); - - if (!mm_ptr->SetChromaticity(mastering_metadata.r(), mastering_metadata.g(), - mastering_metadata.b(), - mastering_metadata.white_point())) { - return false; - } - - delete mastering_metadata_; - mastering_metadata_ = mm_ptr.release(); - return true; -} - -uint64_t Colour::PayloadSize() const { - uint64_t size = 0; - - if (matrix_coefficients_ != kValueNotPresent) { - size += EbmlElementSize(libwebm::kMkvMatrixCoefficients, - static_cast(matrix_coefficients_)); - } - if (bits_per_channel_ != kValueNotPresent) { - size += EbmlElementSize(libwebm::kMkvBitsPerChannel, - static_cast(bits_per_channel_)); - } - if (chroma_subsampling_horz_ != kValueNotPresent) { - size += EbmlElementSize(libwebm::kMkvChromaSubsamplingHorz, - static_cast(chroma_subsampling_horz_)); - } - if (chroma_subsampling_vert_ != kValueNotPresent) { - size += EbmlElementSize(libwebm::kMkvChromaSubsamplingVert, - static_cast(chroma_subsampling_vert_)); - } - if (cb_subsampling_horz_ != kValueNotPresent) { - size += EbmlElementSize(libwebm::kMkvCbSubsamplingHorz, - static_cast(cb_subsampling_horz_)); - } - if (cb_subsampling_vert_ != kValueNotPresent) { - size += EbmlElementSize(libwebm::kMkvCbSubsamplingVert, - static_cast(cb_subsampling_vert_)); - } - if (chroma_siting_horz_ != kValueNotPresent) { - size += EbmlElementSize(libwebm::kMkvChromaSitingHorz, - static_cast(chroma_siting_horz_)); - } - if (chroma_siting_vert_ != kValueNotPresent) { - size += EbmlElementSize(libwebm::kMkvChromaSitingVert, - static_cast(chroma_siting_vert_)); - } - if (range_ != kValueNotPresent) { - size += EbmlElementSize(libwebm::kMkvRange, static_cast(range_)); - } - if (transfer_characteristics_ != kValueNotPresent) { - size += EbmlElementSize(libwebm::kMkvTransferCharacteristics, - static_cast(transfer_characteristics_)); - } - if (primaries_ != kValueNotPresent) { - size += EbmlElementSize(libwebm::kMkvPrimaries, - static_cast(primaries_)); - } - if (max_cll_ != kValueNotPresent) { - size += EbmlElementSize(libwebm::kMkvMaxCLL, static_cast(max_cll_)); - } - if (max_fall_ != kValueNotPresent) { - size += - EbmlElementSize(libwebm::kMkvMaxFALL, static_cast(max_fall_)); - } - - if (mastering_metadata_) - size += mastering_metadata_->MasteringMetadataSize(); - - return size; -} - -/////////////////////////////////////////////////////////////// -// -// Projection element - -uint64_t Projection::ProjectionSize() const { - uint64_t size = PayloadSize(); - - if (size > 0) - size += EbmlMasterElementSize(libwebm::kMkvProjection, size); - - return size; -} - -bool Projection::Write(IMkvWriter* writer) const { - const uint64_t size = PayloadSize(); - - // Don't write an empty element. - if (size == 0) - return true; - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvProjection, size)) - return false; - - if (!WriteEbmlElement(writer, libwebm::kMkvProjectionType, - static_cast(type_))) { - return false; - } - - if (private_data_length_ > 0 && private_data_ != NULL && - !WriteEbmlElement(writer, libwebm::kMkvProjectionPrivate, private_data_, - private_data_length_)) { - return false; - } - - if (!WriteEbmlElement(writer, libwebm::kMkvProjectionPoseYaw, pose_yaw_)) - return false; - - if (!WriteEbmlElement(writer, libwebm::kMkvProjectionPosePitch, - pose_pitch_)) { - return false; - } - - if (!WriteEbmlElement(writer, libwebm::kMkvProjectionPoseRoll, pose_roll_)) { - return false; - } - - return true; -} - -bool Projection::SetProjectionPrivate(const uint8_t* data, - uint64_t data_length) { - if (data == NULL || data_length == 0) { - return false; - } - - if (data_length != static_cast(data_length)) { - return false; - } - - uint8_t* new_private_data = - new (std::nothrow) uint8_t[static_cast(data_length)]; - if (new_private_data == NULL) { - return false; - } - - delete[] private_data_; - private_data_ = new_private_data; - private_data_length_ = data_length; - memcpy(private_data_, data, static_cast(data_length)); - - return true; -} - -uint64_t Projection::PayloadSize() const { - uint64_t size = - EbmlElementSize(libwebm::kMkvProjection, static_cast(type_)); - - if (private_data_length_ > 0 && private_data_ != NULL) { - size += EbmlElementSize(libwebm::kMkvProjectionPrivate, private_data_, - private_data_length_); - } - - size += EbmlElementSize(libwebm::kMkvProjectionPoseYaw, pose_yaw_); - size += EbmlElementSize(libwebm::kMkvProjectionPosePitch, pose_pitch_); - size += EbmlElementSize(libwebm::kMkvProjectionPoseRoll, pose_roll_); - - return size; -} - -/////////////////////////////////////////////////////////////// -// -// VideoTrack Class - -VideoTrack::VideoTrack(unsigned int* seed) - : Track(seed), - display_height_(0), - display_width_(0), - pixel_height_(0), - pixel_width_(0), - crop_left_(0), - crop_right_(0), - crop_top_(0), - crop_bottom_(0), - frame_rate_(0.0), - height_(0), - stereo_mode_(0), - alpha_mode_(0), - width_(0), - colour_(NULL), - projection_(NULL) {} - -VideoTrack::~VideoTrack() { - delete colour_; - delete projection_; -} - -bool VideoTrack::SetStereoMode(uint64_t stereo_mode) { - if (stereo_mode != kMono && stereo_mode != kSideBySideLeftIsFirst && - stereo_mode != kTopBottomRightIsFirst && - stereo_mode != kTopBottomLeftIsFirst && - stereo_mode != kSideBySideRightIsFirst) - return false; - - stereo_mode_ = stereo_mode; - return true; -} - -bool VideoTrack::SetAlphaMode(uint64_t alpha_mode) { - if (alpha_mode != kNoAlpha && alpha_mode != kAlpha) - return false; - - alpha_mode_ = alpha_mode; - return true; -} - -uint64_t VideoTrack::PayloadSize() const { - const uint64_t parent_size = Track::PayloadSize(); - - uint64_t size = VideoPayloadSize(); - size += EbmlMasterElementSize(libwebm::kMkvVideo, size); - - return parent_size + size; -} - -bool VideoTrack::Write(IMkvWriter* writer) const { - if (!Track::Write(writer)) - return false; - - const uint64_t size = VideoPayloadSize(); - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvVideo, size)) - return false; - - const int64_t payload_position = writer->Position(); - if (payload_position < 0) - return false; - - if (!WriteEbmlElement( - writer, libwebm::kMkvPixelWidth, - static_cast((pixel_width_ > 0) ? pixel_width_ : width_))) - return false; - if (!WriteEbmlElement( - writer, libwebm::kMkvPixelHeight, - static_cast((pixel_height_ > 0) ? pixel_height_ : height_))) - return false; - if (display_width_ > 0) { - if (!WriteEbmlElement(writer, libwebm::kMkvDisplayWidth, - static_cast(display_width_))) - return false; - } - if (display_height_ > 0) { - if (!WriteEbmlElement(writer, libwebm::kMkvDisplayHeight, - static_cast(display_height_))) - return false; - } - if (crop_left_ > 0) { - if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropLeft, - static_cast(crop_left_))) - return false; - } - if (crop_right_ > 0) { - if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropRight, - static_cast(crop_right_))) - return false; - } - if (crop_top_ > 0) { - if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropTop, - static_cast(crop_top_))) - return false; - } - if (crop_bottom_ > 0) { - if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropBottom, - static_cast(crop_bottom_))) - return false; - } - if (stereo_mode_ > kMono) { - if (!WriteEbmlElement(writer, libwebm::kMkvStereoMode, - static_cast(stereo_mode_))) - return false; - } - if (alpha_mode_ > kNoAlpha) { - if (!WriteEbmlElement(writer, libwebm::kMkvAlphaMode, - static_cast(alpha_mode_))) - return false; - } - if (frame_rate_ > 0.0) { - if (!WriteEbmlElement(writer, libwebm::kMkvFrameRate, - static_cast(frame_rate_))) { - return false; - } - } - if (colour_) { - if (!colour_->Write(writer)) - return false; - } - if (projection_) { - if (!projection_->Write(writer)) - return false; - } - - const int64_t stop_position = writer->Position(); - if (stop_position < 0 || - stop_position - payload_position != static_cast(size)) { - return false; - } - - return true; -} - -bool VideoTrack::SetColour(const Colour& colour) { - std::auto_ptr colour_ptr(new Colour()); - if (!colour_ptr.get()) - return false; - - if (colour.mastering_metadata()) { - if (!colour_ptr->SetMasteringMetadata(*colour.mastering_metadata())) - return false; - } - - colour_ptr->set_matrix_coefficients(colour.matrix_coefficients()); - colour_ptr->set_bits_per_channel(colour.bits_per_channel()); - colour_ptr->set_chroma_subsampling_horz(colour.chroma_subsampling_horz()); - colour_ptr->set_chroma_subsampling_vert(colour.chroma_subsampling_vert()); - colour_ptr->set_cb_subsampling_horz(colour.cb_subsampling_horz()); - colour_ptr->set_cb_subsampling_vert(colour.cb_subsampling_vert()); - colour_ptr->set_chroma_siting_horz(colour.chroma_siting_horz()); - colour_ptr->set_chroma_siting_vert(colour.chroma_siting_vert()); - colour_ptr->set_range(colour.range()); - colour_ptr->set_transfer_characteristics(colour.transfer_characteristics()); - colour_ptr->set_primaries(colour.primaries()); - colour_ptr->set_max_cll(colour.max_cll()); - colour_ptr->set_max_fall(colour.max_fall()); - delete colour_; - colour_ = colour_ptr.release(); - return true; -} - -bool VideoTrack::SetProjection(const Projection& projection) { - std::auto_ptr projection_ptr(new Projection()); - if (!projection_ptr.get()) - return false; - - if (projection.private_data()) { - if (!projection_ptr->SetProjectionPrivate( - projection.private_data(), projection.private_data_length())) { - return false; - } - } - - projection_ptr->set_type(projection.type()); - projection_ptr->set_pose_yaw(projection.pose_yaw()); - projection_ptr->set_pose_pitch(projection.pose_pitch()); - projection_ptr->set_pose_roll(projection.pose_roll()); - delete projection_; - projection_ = projection_ptr.release(); - return true; -} - -uint64_t VideoTrack::VideoPayloadSize() const { - uint64_t size = EbmlElementSize( - libwebm::kMkvPixelWidth, - static_cast((pixel_width_ > 0) ? pixel_width_ : width_)); - size += EbmlElementSize( - libwebm::kMkvPixelHeight, - static_cast((pixel_height_ > 0) ? pixel_height_ : height_)); - if (display_width_ > 0) - size += EbmlElementSize(libwebm::kMkvDisplayWidth, - static_cast(display_width_)); - if (display_height_ > 0) - size += EbmlElementSize(libwebm::kMkvDisplayHeight, - static_cast(display_height_)); - if (crop_left_ > 0) - size += EbmlElementSize(libwebm::kMkvPixelCropLeft, - static_cast(crop_left_)); - if (crop_right_ > 0) - size += EbmlElementSize(libwebm::kMkvPixelCropRight, - static_cast(crop_right_)); - if (crop_top_ > 0) - size += EbmlElementSize(libwebm::kMkvPixelCropTop, - static_cast(crop_top_)); - if (crop_bottom_ > 0) - size += EbmlElementSize(libwebm::kMkvPixelCropBottom, - static_cast(crop_bottom_)); - if (stereo_mode_ > kMono) - size += EbmlElementSize(libwebm::kMkvStereoMode, - static_cast(stereo_mode_)); - if (alpha_mode_ > kNoAlpha) - size += EbmlElementSize(libwebm::kMkvAlphaMode, - static_cast(alpha_mode_)); - if (frame_rate_ > 0.0) - size += EbmlElementSize(libwebm::kMkvFrameRate, - static_cast(frame_rate_)); - if (colour_) - size += colour_->ColourSize(); - if (projection_) - size += projection_->ProjectionSize(); - - return size; -} - -/////////////////////////////////////////////////////////////// -// -// AudioTrack Class - -AudioTrack::AudioTrack(unsigned int* seed) - : Track(seed), bit_depth_(0), channels_(1), sample_rate_(0.0) {} - -AudioTrack::~AudioTrack() {} - -uint64_t AudioTrack::PayloadSize() const { - const uint64_t parent_size = Track::PayloadSize(); - - uint64_t size = EbmlElementSize(libwebm::kMkvSamplingFrequency, - static_cast(sample_rate_)); - size += - EbmlElementSize(libwebm::kMkvChannels, static_cast(channels_)); - if (bit_depth_ > 0) - size += - EbmlElementSize(libwebm::kMkvBitDepth, static_cast(bit_depth_)); - size += EbmlMasterElementSize(libwebm::kMkvAudio, size); - - return parent_size + size; -} - -bool AudioTrack::Write(IMkvWriter* writer) const { - if (!Track::Write(writer)) - return false; - - // Calculate AudioSettings size. - uint64_t size = EbmlElementSize(libwebm::kMkvSamplingFrequency, - static_cast(sample_rate_)); - size += - EbmlElementSize(libwebm::kMkvChannels, static_cast(channels_)); - if (bit_depth_ > 0) - size += - EbmlElementSize(libwebm::kMkvBitDepth, static_cast(bit_depth_)); - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvAudio, size)) - return false; - - const int64_t payload_position = writer->Position(); - if (payload_position < 0) - return false; - - if (!WriteEbmlElement(writer, libwebm::kMkvSamplingFrequency, - static_cast(sample_rate_))) - return false; - if (!WriteEbmlElement(writer, libwebm::kMkvChannels, - static_cast(channels_))) - return false; - if (bit_depth_ > 0) - if (!WriteEbmlElement(writer, libwebm::kMkvBitDepth, - static_cast(bit_depth_))) - return false; - - const int64_t stop_position = writer->Position(); - if (stop_position < 0 || - stop_position - payload_position != static_cast(size)) - return false; - - return true; -} - -/////////////////////////////////////////////////////////////// -// -// Tracks Class - -const char Tracks::kOpusCodecId[] = "A_OPUS"; -const char Tracks::kVorbisCodecId[] = "A_VORBIS"; -const char Tracks::kVp8CodecId[] = "V_VP8"; -const char Tracks::kVp9CodecId[] = "V_VP9"; -const char Tracks::kVp10CodecId[] = "V_VP10"; -const char Tracks::kWebVttCaptionsId[] = "D_WEBVTT/CAPTIONS"; -const char Tracks::kWebVttDescriptionsId[] = "D_WEBVTT/DESCRIPTIONS"; -const char Tracks::kWebVttMetadataId[] = "D_WEBVTT/METADATA"; -const char Tracks::kWebVttSubtitlesId[] = "D_WEBVTT/SUBTITLES"; - -Tracks::Tracks() - : track_entries_(NULL), track_entries_size_(0), wrote_tracks_(false) {} - -Tracks::~Tracks() { - if (track_entries_) { - for (uint32_t i = 0; i < track_entries_size_; ++i) { - Track* const track = track_entries_[i]; - delete track; - } - delete[] track_entries_; - } -} - -bool Tracks::AddTrack(Track* track, int32_t number) { - if (number < 0 || wrote_tracks_) - return false; - - // This muxer only supports track numbers in the range [1, 126], in - // order to be able (to use Matroska integer representation) to - // serialize the block header (of which the track number is a part) - // for a frame using exactly 4 bytes. - - if (number > 0x7E) - return false; - - uint32_t track_num = number; - - if (track_num > 0) { - // Check to make sure a track does not already have |track_num|. - for (uint32_t i = 0; i < track_entries_size_; ++i) { - if (track_entries_[i]->number() == track_num) - return false; - } - } - - const uint32_t count = track_entries_size_ + 1; - - Track** const track_entries = new (std::nothrow) Track*[count]; // NOLINT - if (!track_entries) - return false; - - for (uint32_t i = 0; i < track_entries_size_; ++i) { - track_entries[i] = track_entries_[i]; - } - - delete[] track_entries_; - - // Find the lowest availible track number > 0. - if (track_num == 0) { - track_num = count; - - // Check to make sure a track does not already have |track_num|. - bool exit = false; - do { - exit = true; - for (uint32_t i = 0; i < track_entries_size_; ++i) { - if (track_entries[i]->number() == track_num) { - track_num++; - exit = false; - break; - } - } - } while (!exit); - } - track->set_number(track_num); - - track_entries_ = track_entries; - track_entries_[track_entries_size_] = track; - track_entries_size_ = count; - return true; -} - -const Track* Tracks::GetTrackByIndex(uint32_t index) const { - if (track_entries_ == NULL) - return NULL; - - if (index >= track_entries_size_) - return NULL; - - return track_entries_[index]; -} - -Track* Tracks::GetTrackByNumber(uint64_t track_number) const { - const int32_t count = track_entries_size(); - for (int32_t i = 0; i < count; ++i) { - if (track_entries_[i]->number() == track_number) - return track_entries_[i]; - } - - return NULL; -} - -bool Tracks::TrackIsAudio(uint64_t track_number) const { - const Track* const track = GetTrackByNumber(track_number); - - if (track->type() == kAudio) - return true; - - return false; -} - -bool Tracks::TrackIsVideo(uint64_t track_number) const { - const Track* const track = GetTrackByNumber(track_number); - - if (track->type() == kVideo) - return true; - - return false; -} - -bool Tracks::Write(IMkvWriter* writer) const { - uint64_t size = 0; - const int32_t count = track_entries_size(); - for (int32_t i = 0; i < count; ++i) { - const Track* const track = GetTrackByIndex(i); - - if (!track) - return false; - - size += track->Size(); - } - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvTracks, size)) - return false; - - const int64_t payload_position = writer->Position(); - if (payload_position < 0) - return false; - - for (int32_t i = 0; i < count; ++i) { - const Track* const track = GetTrackByIndex(i); - if (!track->Write(writer)) - return false; - } - - const int64_t stop_position = writer->Position(); - if (stop_position < 0 || - stop_position - payload_position != static_cast(size)) - return false; - - wrote_tracks_ = true; - return true; -} - -/////////////////////////////////////////////////////////////// -// -// Chapter Class - -bool Chapter::set_id(const char* id) { return StrCpy(id, &id_); } - -void Chapter::set_time(const Segment& segment, uint64_t start_ns, - uint64_t end_ns) { - const SegmentInfo* const info = segment.GetSegmentInfo(); - const uint64_t timecode_scale = info->timecode_scale(); - start_timecode_ = start_ns / timecode_scale; - end_timecode_ = end_ns / timecode_scale; -} - -bool Chapter::add_string(const char* title, const char* language, - const char* country) { - if (!ExpandDisplaysArray()) - return false; - - Display& d = displays_[displays_count_++]; - d.Init(); - - if (!d.set_title(title)) - return false; - - if (!d.set_language(language)) - return false; - - if (!d.set_country(country)) - return false; - - return true; -} - -Chapter::Chapter() { - // This ctor only constructs the object. Proper initialization is - // done in Init() (called in Chapters::AddChapter()). The only - // reason we bother implementing this ctor is because we had to - // declare it as private (along with the dtor), in order to prevent - // clients from creating Chapter instances (a privelege we grant - // only to the Chapters class). Doing no initialization here also - // means that creating arrays of chapter objects is more efficient, - // because we only initialize each new chapter object as it becomes - // active on the array. -} - -Chapter::~Chapter() {} - -void Chapter::Init(unsigned int* seed) { - id_ = NULL; - start_timecode_ = 0; - end_timecode_ = 0; - displays_ = NULL; - displays_size_ = 0; - displays_count_ = 0; - uid_ = MakeUID(seed); -} - -void Chapter::ShallowCopy(Chapter* dst) const { - dst->id_ = id_; - dst->start_timecode_ = start_timecode_; - dst->end_timecode_ = end_timecode_; - dst->uid_ = uid_; - dst->displays_ = displays_; - dst->displays_size_ = displays_size_; - dst->displays_count_ = displays_count_; -} - -void Chapter::Clear() { - StrCpy(NULL, &id_); - - while (displays_count_ > 0) { - Display& d = displays_[--displays_count_]; - d.Clear(); - } - - delete[] displays_; - displays_ = NULL; - - displays_size_ = 0; -} - -bool Chapter::ExpandDisplaysArray() { - if (displays_size_ > displays_count_) - return true; // nothing to do yet - - const int size = (displays_size_ == 0) ? 1 : 2 * displays_size_; - - Display* const displays = new (std::nothrow) Display[size]; // NOLINT - if (displays == NULL) - return false; - - for (int idx = 0; idx < displays_count_; ++idx) { - displays[idx] = displays_[idx]; // shallow copy - } - - delete[] displays_; - - displays_ = displays; - displays_size_ = size; - - return true; -} - -uint64_t Chapter::WriteAtom(IMkvWriter* writer) const { - uint64_t payload_size = - EbmlElementSize(libwebm::kMkvChapterStringUID, id_) + - EbmlElementSize(libwebm::kMkvChapterUID, static_cast(uid_)) + - EbmlElementSize(libwebm::kMkvChapterTimeStart, - static_cast(start_timecode_)) + - EbmlElementSize(libwebm::kMkvChapterTimeEnd, - static_cast(end_timecode_)); - - for (int idx = 0; idx < displays_count_; ++idx) { - const Display& d = displays_[idx]; - payload_size += d.WriteDisplay(NULL); - } - - const uint64_t atom_size = - EbmlMasterElementSize(libwebm::kMkvChapterAtom, payload_size) + - payload_size; - - if (writer == NULL) - return atom_size; - - const int64_t start = writer->Position(); - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapterAtom, payload_size)) - return 0; - - if (!WriteEbmlElement(writer, libwebm::kMkvChapterStringUID, id_)) - return 0; - - if (!WriteEbmlElement(writer, libwebm::kMkvChapterUID, - static_cast(uid_))) - return 0; - - if (!WriteEbmlElement(writer, libwebm::kMkvChapterTimeStart, - static_cast(start_timecode_))) - return 0; - - if (!WriteEbmlElement(writer, libwebm::kMkvChapterTimeEnd, - static_cast(end_timecode_))) - return 0; - - for (int idx = 0; idx < displays_count_; ++idx) { - const Display& d = displays_[idx]; - - if (!d.WriteDisplay(writer)) - return 0; - } - - const int64_t stop = writer->Position(); - - if (stop >= start && uint64_t(stop - start) != atom_size) - return 0; - - return atom_size; -} - -void Chapter::Display::Init() { - title_ = NULL; - language_ = NULL; - country_ = NULL; -} - -void Chapter::Display::Clear() { - StrCpy(NULL, &title_); - StrCpy(NULL, &language_); - StrCpy(NULL, &country_); -} - -bool Chapter::Display::set_title(const char* title) { - return StrCpy(title, &title_); -} - -bool Chapter::Display::set_language(const char* language) { - return StrCpy(language, &language_); -} - -bool Chapter::Display::set_country(const char* country) { - return StrCpy(country, &country_); -} - -uint64_t Chapter::Display::WriteDisplay(IMkvWriter* writer) const { - uint64_t payload_size = EbmlElementSize(libwebm::kMkvChapString, title_); - - if (language_) - payload_size += EbmlElementSize(libwebm::kMkvChapLanguage, language_); - - if (country_) - payload_size += EbmlElementSize(libwebm::kMkvChapCountry, country_); - - const uint64_t display_size = - EbmlMasterElementSize(libwebm::kMkvChapterDisplay, payload_size) + - payload_size; - - if (writer == NULL) - return display_size; - - const int64_t start = writer->Position(); - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapterDisplay, - payload_size)) - return 0; - - if (!WriteEbmlElement(writer, libwebm::kMkvChapString, title_)) - return 0; - - if (language_) { - if (!WriteEbmlElement(writer, libwebm::kMkvChapLanguage, language_)) - return 0; - } - - if (country_) { - if (!WriteEbmlElement(writer, libwebm::kMkvChapCountry, country_)) - return 0; - } - - const int64_t stop = writer->Position(); - - if (stop >= start && uint64_t(stop - start) != display_size) - return 0; - - return display_size; -} - -/////////////////////////////////////////////////////////////// -// -// Chapters Class - -Chapters::Chapters() : chapters_size_(0), chapters_count_(0), chapters_(NULL) {} - -Chapters::~Chapters() { - while (chapters_count_ > 0) { - Chapter& chapter = chapters_[--chapters_count_]; - chapter.Clear(); - } - - delete[] chapters_; - chapters_ = NULL; -} - -int Chapters::Count() const { return chapters_count_; } - -Chapter* Chapters::AddChapter(unsigned int* seed) { - if (!ExpandChaptersArray()) - return NULL; - - Chapter& chapter = chapters_[chapters_count_++]; - chapter.Init(seed); - - return &chapter; -} - -bool Chapters::Write(IMkvWriter* writer) const { - if (writer == NULL) - return false; - - const uint64_t payload_size = WriteEdition(NULL); // return size only - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapters, payload_size)) - return false; - - const int64_t start = writer->Position(); - - if (WriteEdition(writer) == 0) // error - return false; - - const int64_t stop = writer->Position(); - - if (stop >= start && uint64_t(stop - start) != payload_size) - return false; - - return true; -} - -bool Chapters::ExpandChaptersArray() { - if (chapters_size_ > chapters_count_) - return true; // nothing to do yet - - const int size = (chapters_size_ == 0) ? 1 : 2 * chapters_size_; - - Chapter* const chapters = new (std::nothrow) Chapter[size]; // NOLINT - if (chapters == NULL) - return false; - - for (int idx = 0; idx < chapters_count_; ++idx) { - const Chapter& src = chapters_[idx]; - Chapter* const dst = chapters + idx; - src.ShallowCopy(dst); - } - - delete[] chapters_; - - chapters_ = chapters; - chapters_size_ = size; - - return true; -} - -uint64_t Chapters::WriteEdition(IMkvWriter* writer) const { - uint64_t payload_size = 0; - - for (int idx = 0; idx < chapters_count_; ++idx) { - const Chapter& chapter = chapters_[idx]; - payload_size += chapter.WriteAtom(NULL); - } - - const uint64_t edition_size = - EbmlMasterElementSize(libwebm::kMkvEditionEntry, payload_size) + - payload_size; - - if (writer == NULL) // return size only - return edition_size; - - const int64_t start = writer->Position(); - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvEditionEntry, payload_size)) - return 0; // error - - for (int idx = 0; idx < chapters_count_; ++idx) { - const Chapter& chapter = chapters_[idx]; - - const uint64_t chapter_size = chapter.WriteAtom(writer); - if (chapter_size == 0) // error - return 0; - } - - const int64_t stop = writer->Position(); - - if (stop >= start && uint64_t(stop - start) != edition_size) - return 0; - - return edition_size; -} - -// Tag Class - -bool Tag::add_simple_tag(const char* tag_name, const char* tag_string) { - if (!ExpandSimpleTagsArray()) - return false; - - SimpleTag& st = simple_tags_[simple_tags_count_++]; - st.Init(); - - if (!st.set_tag_name(tag_name)) - return false; - - if (!st.set_tag_string(tag_string)) - return false; - - return true; -} - -Tag::Tag() { - simple_tags_ = NULL; - simple_tags_size_ = 0; - simple_tags_count_ = 0; -} - -Tag::~Tag() {} - -void Tag::ShallowCopy(Tag* dst) const { - dst->simple_tags_ = simple_tags_; - dst->simple_tags_size_ = simple_tags_size_; - dst->simple_tags_count_ = simple_tags_count_; -} - -void Tag::Clear() { - while (simple_tags_count_ > 0) { - SimpleTag& st = simple_tags_[--simple_tags_count_]; - st.Clear(); - } - - delete[] simple_tags_; - simple_tags_ = NULL; - - simple_tags_size_ = 0; -} - -bool Tag::ExpandSimpleTagsArray() { - if (simple_tags_size_ > simple_tags_count_) - return true; // nothing to do yet - - const int size = (simple_tags_size_ == 0) ? 1 : 2 * simple_tags_size_; - - SimpleTag* const simple_tags = new (std::nothrow) SimpleTag[size]; // NOLINT - if (simple_tags == NULL) - return false; - - for (int idx = 0; idx < simple_tags_count_; ++idx) { - simple_tags[idx] = simple_tags_[idx]; // shallow copy - } - - delete[] simple_tags_; - - simple_tags_ = simple_tags; - simple_tags_size_ = size; - - return true; -} - -uint64_t Tag::Write(IMkvWriter* writer) const { - uint64_t payload_size = 0; - - for (int idx = 0; idx < simple_tags_count_; ++idx) { - const SimpleTag& st = simple_tags_[idx]; - payload_size += st.Write(NULL); - } - - const uint64_t tag_size = - EbmlMasterElementSize(libwebm::kMkvTag, payload_size) + payload_size; - - if (writer == NULL) - return tag_size; - - const int64_t start = writer->Position(); - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvTag, payload_size)) - return 0; - - for (int idx = 0; idx < simple_tags_count_; ++idx) { - const SimpleTag& st = simple_tags_[idx]; - - if (!st.Write(writer)) - return 0; - } - - const int64_t stop = writer->Position(); - - if (stop >= start && uint64_t(stop - start) != tag_size) - return 0; - - return tag_size; -} - -// Tag::SimpleTag - -void Tag::SimpleTag::Init() { - tag_name_ = NULL; - tag_string_ = NULL; -} - -void Tag::SimpleTag::Clear() { - StrCpy(NULL, &tag_name_); - StrCpy(NULL, &tag_string_); -} - -bool Tag::SimpleTag::set_tag_name(const char* tag_name) { - return StrCpy(tag_name, &tag_name_); -} - -bool Tag::SimpleTag::set_tag_string(const char* tag_string) { - return StrCpy(tag_string, &tag_string_); -} - -uint64_t Tag::SimpleTag::Write(IMkvWriter* writer) const { - uint64_t payload_size = EbmlElementSize(libwebm::kMkvTagName, tag_name_); - - payload_size += EbmlElementSize(libwebm::kMkvTagString, tag_string_); - - const uint64_t simple_tag_size = - EbmlMasterElementSize(libwebm::kMkvSimpleTag, payload_size) + - payload_size; - - if (writer == NULL) - return simple_tag_size; - - const int64_t start = writer->Position(); - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvSimpleTag, payload_size)) - return 0; - - if (!WriteEbmlElement(writer, libwebm::kMkvTagName, tag_name_)) - return 0; - - if (!WriteEbmlElement(writer, libwebm::kMkvTagString, tag_string_)) - return 0; - - const int64_t stop = writer->Position(); - - if (stop >= start && uint64_t(stop - start) != simple_tag_size) - return 0; - - return simple_tag_size; -} - -// Tags Class - -Tags::Tags() : tags_size_(0), tags_count_(0), tags_(NULL) {} - -Tags::~Tags() { - while (tags_count_ > 0) { - Tag& tag = tags_[--tags_count_]; - tag.Clear(); - } - - delete[] tags_; - tags_ = NULL; -} - -int Tags::Count() const { return tags_count_; } - -Tag* Tags::AddTag() { - if (!ExpandTagsArray()) - return NULL; - - Tag& tag = tags_[tags_count_++]; - - return &tag; -} - -bool Tags::Write(IMkvWriter* writer) const { - if (writer == NULL) - return false; - - uint64_t payload_size = 0; - - for (int idx = 0; idx < tags_count_; ++idx) { - const Tag& tag = tags_[idx]; - payload_size += tag.Write(NULL); - } - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvTags, payload_size)) - return false; - - const int64_t start = writer->Position(); - - for (int idx = 0; idx < tags_count_; ++idx) { - const Tag& tag = tags_[idx]; - - const uint64_t tag_size = tag.Write(writer); - if (tag_size == 0) // error - return 0; - } - - const int64_t stop = writer->Position(); - - if (stop >= start && uint64_t(stop - start) != payload_size) - return false; - - return true; -} - -bool Tags::ExpandTagsArray() { - if (tags_size_ > tags_count_) - return true; // nothing to do yet - - const int size = (tags_size_ == 0) ? 1 : 2 * tags_size_; - - Tag* const tags = new (std::nothrow) Tag[size]; // NOLINT - if (tags == NULL) - return false; - - for (int idx = 0; idx < tags_count_; ++idx) { - const Tag& src = tags_[idx]; - Tag* const dst = tags + idx; - src.ShallowCopy(dst); - } - - delete[] tags_; - - tags_ = tags; - tags_size_ = size; - - return true; -} - -/////////////////////////////////////////////////////////////// -// -// Cluster class - -Cluster::Cluster(uint64_t timecode, int64_t cues_pos, uint64_t timecode_scale, - bool write_last_frame_with_duration, bool fixed_size_timecode) - : blocks_added_(0), - finalized_(false), - fixed_size_timecode_(fixed_size_timecode), - header_written_(false), - payload_size_(0), - position_for_cues_(cues_pos), - size_position_(-1), - timecode_(timecode), - timecode_scale_(timecode_scale), - write_last_frame_with_duration_(write_last_frame_with_duration), - writer_(NULL) {} - -Cluster::~Cluster() { - // Delete any stored frames that are left behind. This will happen if the - // Cluster was not Finalized for whatever reason. - while (!stored_frames_.empty()) { - while (!stored_frames_.begin()->second.empty()) { - delete stored_frames_.begin()->second.front(); - stored_frames_.begin()->second.pop_front(); - } - stored_frames_.erase(stored_frames_.begin()->first); - } -} - -bool Cluster::Init(IMkvWriter* ptr_writer) { - if (!ptr_writer) { - return false; - } - writer_ = ptr_writer; - return true; -} - -bool Cluster::AddFrame(const Frame* const frame) { - return QueueOrWriteFrame(frame); -} - -bool Cluster::AddFrame(const uint8_t* data, uint64_t length, - uint64_t track_number, uint64_t abs_timecode, - bool is_key) { - Frame frame; - if (!frame.Init(data, length)) - return false; - frame.set_track_number(track_number); - frame.set_timestamp(abs_timecode); - frame.set_is_key(is_key); - return QueueOrWriteFrame(&frame); -} - -bool Cluster::AddFrameWithAdditional(const uint8_t* data, uint64_t length, - const uint8_t* additional, - uint64_t additional_length, - uint64_t add_id, uint64_t track_number, - uint64_t abs_timecode, bool is_key) { - if (!additional || additional_length == 0) { - return false; - } - Frame frame; - if (!frame.Init(data, length) || - !frame.AddAdditionalData(additional, additional_length, add_id)) { - return false; - } - frame.set_track_number(track_number); - frame.set_timestamp(abs_timecode); - frame.set_is_key(is_key); - return QueueOrWriteFrame(&frame); -} - -bool Cluster::AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length, - int64_t discard_padding, - uint64_t track_number, - uint64_t abs_timecode, bool is_key) { - Frame frame; - if (!frame.Init(data, length)) - return false; - frame.set_discard_padding(discard_padding); - frame.set_track_number(track_number); - frame.set_timestamp(abs_timecode); - frame.set_is_key(is_key); - return QueueOrWriteFrame(&frame); -} - -bool Cluster::AddMetadata(const uint8_t* data, uint64_t length, - uint64_t track_number, uint64_t abs_timecode, - uint64_t duration_timecode) { - Frame frame; - if (!frame.Init(data, length)) - return false; - frame.set_track_number(track_number); - frame.set_timestamp(abs_timecode); - frame.set_duration(duration_timecode); - frame.set_is_key(true); // All metadata blocks are keyframes. - return QueueOrWriteFrame(&frame); -} - -void Cluster::AddPayloadSize(uint64_t size) { payload_size_ += size; } - -bool Cluster::Finalize() { - return !write_last_frame_with_duration_ && Finalize(false, 0); -} - -bool Cluster::Finalize(bool set_last_frame_duration, uint64_t duration) { - if (!writer_ || finalized_) - return false; - - if (write_last_frame_with_duration_) { - // Write out held back Frames. This essentially performs a k-way merge - // across all tracks in the increasing order of timestamps. - while (!stored_frames_.empty()) { - Frame* frame = stored_frames_.begin()->second.front(); - - // Get the next frame to write (frame with least timestamp across all - // tracks). - for (FrameMapIterator frames_iterator = ++stored_frames_.begin(); - frames_iterator != stored_frames_.end(); ++frames_iterator) { - if (frames_iterator->second.front()->timestamp() < frame->timestamp()) { - frame = frames_iterator->second.front(); - } - } - - // Set the duration if it's the last frame for the track. - if (set_last_frame_duration && - stored_frames_[frame->track_number()].size() == 1 && - !frame->duration_set()) { - frame->set_duration(duration - frame->timestamp()); - if (!frame->is_key() && !frame->reference_block_timestamp_set()) { - frame->set_reference_block_timestamp( - last_block_timestamp_[frame->track_number()]); - } - } - - // Write the frame and remove it from |stored_frames_|. - const bool wrote_frame = DoWriteFrame(frame); - stored_frames_[frame->track_number()].pop_front(); - if (stored_frames_[frame->track_number()].empty()) { - stored_frames_.erase(frame->track_number()); - } - delete frame; - if (!wrote_frame) - return false; - } - } - - if (size_position_ == -1) - return false; - - if (writer_->Seekable()) { - const int64_t pos = writer_->Position(); - - if (writer_->Position(size_position_)) - return false; - - if (WriteUIntSize(writer_, payload_size(), 8)) - return false; - - if (writer_->Position(pos)) - return false; - } - - finalized_ = true; - - return true; -} - -uint64_t Cluster::Size() const { - const uint64_t element_size = - EbmlMasterElementSize(libwebm::kMkvCluster, 0xFFFFFFFFFFFFFFFFULL) + - payload_size_; - return element_size; -} - -bool Cluster::PreWriteBlock() { - if (finalized_) - return false; - - if (!header_written_) { - if (!WriteClusterHeader()) - return false; - } - - return true; -} - -void Cluster::PostWriteBlock(uint64_t element_size) { - AddPayloadSize(element_size); - ++blocks_added_; -} - -int64_t Cluster::GetRelativeTimecode(int64_t abs_timecode) const { - const int64_t cluster_timecode = this->Cluster::timecode(); - const int64_t rel_timecode = - static_cast(abs_timecode) - cluster_timecode; - - if (rel_timecode < 0 || rel_timecode > kMaxBlockTimecode) - return -1; - - return rel_timecode; -} - -bool Cluster::DoWriteFrame(const Frame* const frame) { - if (!frame || !frame->IsValid()) - return false; - - if (!PreWriteBlock()) - return false; - - const uint64_t element_size = WriteFrame(writer_, frame, this); - if (element_size == 0) - return false; - - PostWriteBlock(element_size); - last_block_timestamp_[frame->track_number()] = frame->timestamp(); - return true; -} - -bool Cluster::QueueOrWriteFrame(const Frame* const frame) { - if (!frame || !frame->IsValid()) - return false; - - // If |write_last_frame_with_duration_| is not set, then write the frame right - // away. - if (!write_last_frame_with_duration_) { - return DoWriteFrame(frame); - } - - // Queue the current frame. - uint64_t track_number = frame->track_number(); - Frame* const frame_to_store = new Frame(); - frame_to_store->CopyFrom(*frame); - stored_frames_[track_number].push_back(frame_to_store); - - // Iterate through all queued frames in the current track except the last one - // and write it if it is okay to do so (i.e.) no other track has an held back - // frame with timestamp <= the timestamp of the frame in question. - std::vector::iterator> frames_to_erase; - for (std::list::iterator - current_track_iterator = stored_frames_[track_number].begin(), - end = --stored_frames_[track_number].end(); - current_track_iterator != end; ++current_track_iterator) { - const Frame* const frame_to_write = *current_track_iterator; - bool okay_to_write = true; - for (FrameMapIterator track_iterator = stored_frames_.begin(); - track_iterator != stored_frames_.end(); ++track_iterator) { - if (track_iterator->first == track_number) { - continue; - } - if (track_iterator->second.front()->timestamp() < - frame_to_write->timestamp()) { - okay_to_write = false; - break; - } - } - if (okay_to_write) { - const bool wrote_frame = DoWriteFrame(frame_to_write); - delete frame_to_write; - if (!wrote_frame) - return false; - frames_to_erase.push_back(current_track_iterator); - } else { - break; - } - } - for (std::vector::iterator>::iterator iterator = - frames_to_erase.begin(); - iterator != frames_to_erase.end(); ++iterator) { - stored_frames_[track_number].erase(*iterator); - } - return true; -} - -bool Cluster::WriteClusterHeader() { - if (finalized_) - return false; - - if (WriteID(writer_, libwebm::kMkvCluster)) - return false; - - // Save for later. - size_position_ = writer_->Position(); - - // Write "unknown" (EBML coded -1) as cluster size value. We need to write 8 - // bytes because we do not know how big our cluster will be. - if (SerializeInt(writer_, kEbmlUnknownValue, 8)) - return false; - - if (!WriteEbmlElement(writer_, libwebm::kMkvTimecode, timecode(), - fixed_size_timecode_ ? 8 : 0)) { - return false; - } - AddPayloadSize(EbmlElementSize(libwebm::kMkvTimecode, timecode(), - fixed_size_timecode_ ? 8 : 0)); - header_written_ = true; - - return true; -} - -/////////////////////////////////////////////////////////////// -// -// SeekHead Class - -SeekHead::SeekHead() : start_pos_(0ULL) { - for (int32_t i = 0; i < kSeekEntryCount; ++i) { - seek_entry_id_[i] = 0; - seek_entry_pos_[i] = 0; - } -} - -SeekHead::~SeekHead() {} - -bool SeekHead::Finalize(IMkvWriter* writer) const { - if (writer->Seekable()) { - if (start_pos_ == -1) - return false; - - uint64_t payload_size = 0; - uint64_t entry_size[kSeekEntryCount]; - - for (int32_t i = 0; i < kSeekEntryCount; ++i) { - if (seek_entry_id_[i] != 0) { - entry_size[i] = EbmlElementSize(libwebm::kMkvSeekID, - static_cast(seek_entry_id_[i])); - entry_size[i] += EbmlElementSize( - libwebm::kMkvSeekPosition, static_cast(seek_entry_pos_[i])); - - payload_size += - EbmlMasterElementSize(libwebm::kMkvSeek, entry_size[i]) + - entry_size[i]; - } - } - - // No SeekHead elements - if (payload_size == 0) - return true; - - const int64_t pos = writer->Position(); - if (writer->Position(start_pos_)) - return false; - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvSeekHead, payload_size)) - return false; - - for (int32_t i = 0; i < kSeekEntryCount; ++i) { - if (seek_entry_id_[i] != 0) { - if (!WriteEbmlMasterElement(writer, libwebm::kMkvSeek, entry_size[i])) - return false; - - if (!WriteEbmlElement(writer, libwebm::kMkvSeekID, - static_cast(seek_entry_id_[i]))) - return false; - - if (!WriteEbmlElement(writer, libwebm::kMkvSeekPosition, - static_cast(seek_entry_pos_[i]))) - return false; - } - } - - const uint64_t total_entry_size = kSeekEntryCount * MaxEntrySize(); - const uint64_t total_size = - EbmlMasterElementSize(libwebm::kMkvSeekHead, total_entry_size) + - total_entry_size; - const int64_t size_left = total_size - (writer->Position() - start_pos_); - - const uint64_t bytes_written = WriteVoidElement(writer, size_left); - if (!bytes_written) - return false; - - if (writer->Position(pos)) - return false; - } - - return true; -} - -bool SeekHead::Write(IMkvWriter* writer) { - const uint64_t entry_size = kSeekEntryCount * MaxEntrySize(); - const uint64_t size = - EbmlMasterElementSize(libwebm::kMkvSeekHead, entry_size); - - start_pos_ = writer->Position(); - - const uint64_t bytes_written = WriteVoidElement(writer, size + entry_size); - if (!bytes_written) - return false; - - return true; -} - -bool SeekHead::AddSeekEntry(uint32_t id, uint64_t pos) { - for (int32_t i = 0; i < kSeekEntryCount; ++i) { - if (seek_entry_id_[i] == 0) { - seek_entry_id_[i] = id; - seek_entry_pos_[i] = pos; - return true; - } - } - return false; -} - -uint32_t SeekHead::GetId(int index) const { - if (index < 0 || index >= kSeekEntryCount) - return UINT_MAX; - return seek_entry_id_[index]; -} - -uint64_t SeekHead::GetPosition(int index) const { - if (index < 0 || index >= kSeekEntryCount) - return ULLONG_MAX; - return seek_entry_pos_[index]; -} - -bool SeekHead::SetSeekEntry(int index, uint32_t id, uint64_t position) { - if (index < 0 || index >= kSeekEntryCount) - return false; - seek_entry_id_[index] = id; - seek_entry_pos_[index] = position; - return true; -} - -uint64_t SeekHead::MaxEntrySize() const { - const uint64_t max_entry_payload_size = - EbmlElementSize(libwebm::kMkvSeekID, - static_cast(UINT64_C(0xffffffff))) + - EbmlElementSize(libwebm::kMkvSeekPosition, - static_cast(UINT64_C(0xffffffffffffffff))); - const uint64_t max_entry_size = - EbmlMasterElementSize(libwebm::kMkvSeek, max_entry_payload_size) + - max_entry_payload_size; - - return max_entry_size; -} - -/////////////////////////////////////////////////////////////// -// -// SegmentInfo Class - -SegmentInfo::SegmentInfo() - : duration_(-1.0), - muxing_app_(NULL), - timecode_scale_(1000000ULL), - writing_app_(NULL), - date_utc_(LLONG_MIN), - duration_pos_(-1) {} - -SegmentInfo::~SegmentInfo() { - delete[] muxing_app_; - delete[] writing_app_; -} - -bool SegmentInfo::Init() { - int32_t major; - int32_t minor; - int32_t build; - int32_t revision; - GetVersion(&major, &minor, &build, &revision); - char temp[256]; -#ifdef _MSC_VER - sprintf_s(temp, sizeof(temp) / sizeof(temp[0]), "libwebm-%d.%d.%d.%d", major, - minor, build, revision); -#else - snprintf(temp, sizeof(temp) / sizeof(temp[0]), "libwebm-%d.%d.%d.%d", major, - minor, build, revision); -#endif - - const size_t app_len = strlen(temp) + 1; - - delete[] muxing_app_; - - muxing_app_ = new (std::nothrow) char[app_len]; // NOLINT - if (!muxing_app_) - return false; - -#ifdef _MSC_VER - strcpy_s(muxing_app_, app_len, temp); -#else - strcpy(muxing_app_, temp); -#endif - - set_writing_app(temp); - if (!writing_app_) - return false; - return true; -} - -bool SegmentInfo::Finalize(IMkvWriter* writer) const { - if (!writer) - return false; - - if (duration_ > 0.0) { - if (writer->Seekable()) { - if (duration_pos_ == -1) - return false; - - const int64_t pos = writer->Position(); - - if (writer->Position(duration_pos_)) - return false; - - if (!WriteEbmlElement(writer, libwebm::kMkvDuration, - static_cast(duration_))) - return false; - - if (writer->Position(pos)) - return false; - } - } - - return true; -} - -bool SegmentInfo::Write(IMkvWriter* writer) { - if (!writer || !muxing_app_ || !writing_app_) - return false; - - uint64_t size = EbmlElementSize(libwebm::kMkvTimecodeScale, - static_cast(timecode_scale_)); - if (duration_ > 0.0) - size += - EbmlElementSize(libwebm::kMkvDuration, static_cast(duration_)); - if (date_utc_ != LLONG_MIN) - size += EbmlDateElementSize(libwebm::kMkvDateUTC); - size += EbmlElementSize(libwebm::kMkvMuxingApp, muxing_app_); - size += EbmlElementSize(libwebm::kMkvWritingApp, writing_app_); - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvInfo, size)) - return false; - - const int64_t payload_position = writer->Position(); - if (payload_position < 0) - return false; - - if (!WriteEbmlElement(writer, libwebm::kMkvTimecodeScale, - static_cast(timecode_scale_))) - return false; - - if (duration_ > 0.0) { - // Save for later - duration_pos_ = writer->Position(); - - if (!WriteEbmlElement(writer, libwebm::kMkvDuration, - static_cast(duration_))) - return false; - } - - if (date_utc_ != LLONG_MIN) - WriteEbmlDateElement(writer, libwebm::kMkvDateUTC, date_utc_); - - if (!WriteEbmlElement(writer, libwebm::kMkvMuxingApp, muxing_app_)) - return false; - if (!WriteEbmlElement(writer, libwebm::kMkvWritingApp, writing_app_)) - return false; - - const int64_t stop_position = writer->Position(); - if (stop_position < 0 || - stop_position - payload_position != static_cast(size)) - return false; - - return true; -} - -void SegmentInfo::set_muxing_app(const char* app) { - if (app) { - const size_t length = strlen(app) + 1; - char* temp_str = new (std::nothrow) char[length]; // NOLINT - if (!temp_str) - return; - -#ifdef _MSC_VER - strcpy_s(temp_str, length, app); -#else - strcpy(temp_str, app); -#endif - - delete[] muxing_app_; - muxing_app_ = temp_str; - } -} - -void SegmentInfo::set_writing_app(const char* app) { - if (app) { - const size_t length = strlen(app) + 1; - char* temp_str = new (std::nothrow) char[length]; // NOLINT - if (!temp_str) - return; - -#ifdef _MSC_VER - strcpy_s(temp_str, length, app); -#else - strcpy(temp_str, app); -#endif - - delete[] writing_app_; - writing_app_ = temp_str; - } -} - -/////////////////////////////////////////////////////////////// -// -// Segment Class - -Segment::Segment() - : chunk_count_(0), - chunk_name_(NULL), - chunk_writer_cluster_(NULL), - chunk_writer_cues_(NULL), - chunk_writer_header_(NULL), - chunking_(false), - chunking_base_name_(NULL), - cluster_list_(NULL), - cluster_list_capacity_(0), - cluster_list_size_(0), - cues_position_(kAfterClusters), - cues_track_(0), - force_new_cluster_(false), - frames_(NULL), - frames_capacity_(0), - frames_size_(0), - has_video_(false), - header_written_(false), - last_block_duration_(0), - last_timestamp_(0), - max_cluster_duration_(kDefaultMaxClusterDuration), - max_cluster_size_(0), - mode_(kFile), - new_cuepoint_(false), - output_cues_(true), - accurate_cluster_duration_(false), - fixed_size_cluster_timecode_(false), - estimate_file_duration_(false), - payload_pos_(0), - size_position_(0), - doc_type_version_(kDefaultDocTypeVersion), - doc_type_version_written_(0), - duration_(0.0), - writer_cluster_(NULL), - writer_cues_(NULL), - writer_header_(NULL) { - const time_t curr_time = time(NULL); - seed_ = static_cast(curr_time); -#ifdef _WIN32 - srand(seed_); -#endif -} - -Segment::~Segment() { - if (cluster_list_) { - for (int32_t i = 0; i < cluster_list_size_; ++i) { - Cluster* const cluster = cluster_list_[i]; - delete cluster; - } - delete[] cluster_list_; - } - - if (frames_) { - for (int32_t i = 0; i < frames_size_; ++i) { - Frame* const frame = frames_[i]; - delete frame; - } - delete[] frames_; - } - - delete[] chunk_name_; - delete[] chunking_base_name_; - - if (chunk_writer_cluster_) { - chunk_writer_cluster_->Close(); - delete chunk_writer_cluster_; - } - if (chunk_writer_cues_) { - chunk_writer_cues_->Close(); - delete chunk_writer_cues_; - } - if (chunk_writer_header_) { - chunk_writer_header_->Close(); - delete chunk_writer_header_; - } -} - -void Segment::MoveCuesBeforeClustersHelper(uint64_t diff, int32_t index, - uint64_t* cues_size) { - CuePoint* const cue_point = cues_.GetCueByIndex(index); - if (cue_point == NULL) - return; - const uint64_t old_cue_point_size = cue_point->Size(); - const uint64_t cluster_pos = cue_point->cluster_pos() + diff; - cue_point->set_cluster_pos(cluster_pos); // update the new cluster position - // New size of the cue is computed as follows - // Let a = current sum of size of all CuePoints - // Let b = Increase in Cue Point's size due to this iteration - // Let c = Increase in size of Cues Element's length due to this iteration - // (This is computed as CodedSize(a + b) - CodedSize(a)) - // Let d = b + c. Now d is the |diff| passed to the next recursive call. - // Let e = a + b. Now e is the |cues_size| passed to the next recursive - // call. - const uint64_t cue_point_size_diff = cue_point->Size() - old_cue_point_size; - const uint64_t cue_size_diff = - GetCodedUIntSize(*cues_size + cue_point_size_diff) - - GetCodedUIntSize(*cues_size); - *cues_size += cue_point_size_diff; - diff = cue_size_diff + cue_point_size_diff; - if (diff > 0) { - for (int32_t i = 0; i < cues_.cue_entries_size(); ++i) { - MoveCuesBeforeClustersHelper(diff, i, cues_size); - } - } -} - -void Segment::MoveCuesBeforeClusters() { - const uint64_t current_cue_size = cues_.Size(); - uint64_t cue_size = 0; - for (int32_t i = 0; i < cues_.cue_entries_size(); ++i) - cue_size += cues_.GetCueByIndex(i)->Size(); - for (int32_t i = 0; i < cues_.cue_entries_size(); ++i) - MoveCuesBeforeClustersHelper(current_cue_size, i, &cue_size); - - // Adjust the Seek Entry to reflect the change in position - // of Cluster and Cues - int32_t cluster_index = 0; - int32_t cues_index = 0; - for (int32_t i = 0; i < SeekHead::kSeekEntryCount; ++i) { - if (seek_head_.GetId(i) == libwebm::kMkvCluster) - cluster_index = i; - if (seek_head_.GetId(i) == libwebm::kMkvCues) - cues_index = i; - } - seek_head_.SetSeekEntry(cues_index, libwebm::kMkvCues, - seek_head_.GetPosition(cluster_index)); - seek_head_.SetSeekEntry(cluster_index, libwebm::kMkvCluster, - cues_.Size() + seek_head_.GetPosition(cues_index)); -} - -bool Segment::Init(IMkvWriter* ptr_writer) { - if (!ptr_writer) { - return false; - } - writer_cluster_ = ptr_writer; - writer_cues_ = ptr_writer; - writer_header_ = ptr_writer; - memset(&track_frames_written_, 0, - sizeof(track_frames_written_[0]) * kMaxTrackNumber); - memset(&last_track_timestamp_, 0, - sizeof(last_track_timestamp_[0]) * kMaxTrackNumber); - return segment_info_.Init(); -} - -bool Segment::CopyAndMoveCuesBeforeClusters(mkvparser::IMkvReader* reader, - IMkvWriter* writer) { - if (!writer->Seekable() || chunking_) - return false; - const int64_t cluster_offset = - cluster_list_[0]->size_position() - GetUIntSize(libwebm::kMkvCluster); - - // Copy the headers. - if (!ChunkedCopy(reader, writer, 0, cluster_offset)) - return false; - - // Recompute cue positions and seek entries. - MoveCuesBeforeClusters(); - - // Write cues and seek entries. - // TODO(vigneshv): As of now, it's safe to call seek_head_.Finalize() for the - // second time with a different writer object. But the name Finalize() doesn't - // indicate something we want to call more than once. So consider renaming it - // to write() or some such. - if (!cues_.Write(writer) || !seek_head_.Finalize(writer)) - return false; - - // Copy the Clusters. - if (!ChunkedCopy(reader, writer, cluster_offset, - cluster_end_offset_ - cluster_offset)) - return false; - - // Update the Segment size in case the Cues size has changed. - const int64_t pos = writer->Position(); - const int64_t segment_size = writer->Position() - payload_pos_; - if (writer->Position(size_position_) || - WriteUIntSize(writer, segment_size, 8) || writer->Position(pos)) - return false; - return true; -} - -bool Segment::Finalize() { - if (WriteFramesAll() < 0) - return false; - - // In kLive mode, call Cluster::Finalize only if |accurate_cluster_duration_| - // is set. In all other modes, always call Cluster::Finalize. - if ((mode_ == kLive ? accurate_cluster_duration_ : true) && - cluster_list_size_ > 0) { - // Update last cluster's size - Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1]; - - // For the last frame of the last Cluster, we don't write it as a BlockGroup - // with Duration unless the frame itself has duration set explicitly. - if (!old_cluster || !old_cluster->Finalize(false, 0)) - return false; - } - - if (mode_ == kFile) { - if (chunking_ && chunk_writer_cluster_) { - chunk_writer_cluster_->Close(); - chunk_count_++; - } - - double duration = - (static_cast(last_timestamp_) + last_block_duration_) / - segment_info_.timecode_scale(); - if (duration_ > 0.0) { - duration = duration_; - } else { - if (last_block_duration_ == 0 && estimate_file_duration_) { - const int num_tracks = static_cast(tracks_.track_entries_size()); - for (int i = 0; i < num_tracks; ++i) { - if (track_frames_written_[i] < 2) - continue; - - // Estimate the duration for the last block of a Track. - const double nano_per_frame = - static_cast(last_track_timestamp_[i]) / - (track_frames_written_[i] - 1); - const double track_duration = - (last_track_timestamp_[i] + nano_per_frame) / - segment_info_.timecode_scale(); - if (track_duration > duration) - duration = track_duration; - } - } - } - segment_info_.set_duration(duration); - if (!segment_info_.Finalize(writer_header_)) - return false; - - if (output_cues_) - if (!seek_head_.AddSeekEntry(libwebm::kMkvCues, MaxOffset())) - return false; - - if (chunking_) { - if (!chunk_writer_cues_) - return false; - - char* name = NULL; - if (!UpdateChunkName("cues", &name)) - return false; - - const bool cues_open = chunk_writer_cues_->Open(name); - delete[] name; - if (!cues_open) - return false; - } - - cluster_end_offset_ = writer_cluster_->Position(); - - // Write the seek headers and cues - if (output_cues_) - if (!cues_.Write(writer_cues_)) - return false; - - if (!seek_head_.Finalize(writer_header_)) - return false; - - if (writer_header_->Seekable()) { - if (size_position_ == -1) - return false; - - const int64_t segment_size = MaxOffset(); - if (segment_size < 1) - return false; - - const int64_t pos = writer_header_->Position(); - UpdateDocTypeVersion(); - if (doc_type_version_ != doc_type_version_written_) { - if (writer_header_->Position(0)) - return false; - - const char* const doc_type = - DocTypeIsWebm() ? kDocTypeWebm : kDocTypeMatroska; - if (!WriteEbmlHeader(writer_header_, doc_type_version_, doc_type)) - return false; - if (writer_header_->Position() != ebml_header_size_) - return false; - - doc_type_version_written_ = doc_type_version_; - } - - if (writer_header_->Position(size_position_)) - return false; - - if (WriteUIntSize(writer_header_, segment_size, 8)) - return false; - - if (writer_header_->Position(pos)) - return false; - } - - if (chunking_) { - // Do not close any writers until the segment size has been written, - // otherwise the size may be off. - if (!chunk_writer_cues_ || !chunk_writer_header_) - return false; - - chunk_writer_cues_->Close(); - chunk_writer_header_->Close(); - } - } - - return true; -} - -Track* Segment::AddTrack(int32_t number) { - Track* const track = new (std::nothrow) Track(&seed_); // NOLINT - - if (!track) - return NULL; - - if (!tracks_.AddTrack(track, number)) { - delete track; - return NULL; - } - - return track; -} - -Chapter* Segment::AddChapter() { return chapters_.AddChapter(&seed_); } - -Tag* Segment::AddTag() { return tags_.AddTag(); } - -uint64_t Segment::AddVideoTrack(int32_t width, int32_t height, int32_t number) { - VideoTrack* const track = new (std::nothrow) VideoTrack(&seed_); // NOLINT - if (!track) - return 0; - - track->set_type(Tracks::kVideo); - track->set_codec_id(Tracks::kVp8CodecId); - track->set_width(width); - track->set_height(height); - - if (!tracks_.AddTrack(track, number)) { - delete track; - return 0; - } - has_video_ = true; - - return track->number(); -} - -bool Segment::AddCuePoint(uint64_t timestamp, uint64_t track) { - if (cluster_list_size_ < 1) - return false; - - const Cluster* const cluster = cluster_list_[cluster_list_size_ - 1]; - if (!cluster) - return false; - - CuePoint* const cue = new (std::nothrow) CuePoint(); // NOLINT - if (!cue) - return false; - - cue->set_time(timestamp / segment_info_.timecode_scale()); - cue->set_block_number(cluster->blocks_added()); - cue->set_cluster_pos(cluster->position_for_cues()); - cue->set_track(track); - if (!cues_.AddCue(cue)) { - delete cue; - return false; - } - - new_cuepoint_ = false; - return true; -} - -uint64_t Segment::AddAudioTrack(int32_t sample_rate, int32_t channels, - int32_t number) { - AudioTrack* const track = new (std::nothrow) AudioTrack(&seed_); // NOLINT - if (!track) - return 0; - - track->set_type(Tracks::kAudio); - track->set_codec_id(Tracks::kVorbisCodecId); - track->set_sample_rate(sample_rate); - track->set_channels(channels); - - if (!tracks_.AddTrack(track, number)) { - delete track; - return 0; - } - - return track->number(); -} - -bool Segment::AddFrame(const uint8_t* data, uint64_t length, - uint64_t track_number, uint64_t timestamp, bool is_key) { - if (!data) - return false; - - Frame frame; - if (!frame.Init(data, length)) - return false; - frame.set_track_number(track_number); - frame.set_timestamp(timestamp); - frame.set_is_key(is_key); - return AddGenericFrame(&frame); -} - -bool Segment::AddFrameWithAdditional(const uint8_t* data, uint64_t length, - const uint8_t* additional, - uint64_t additional_length, - uint64_t add_id, uint64_t track_number, - uint64_t timestamp, bool is_key) { - if (!data || !additional) - return false; - - Frame frame; - if (!frame.Init(data, length) || - !frame.AddAdditionalData(additional, additional_length, add_id)) { - return false; - } - frame.set_track_number(track_number); - frame.set_timestamp(timestamp); - frame.set_is_key(is_key); - return AddGenericFrame(&frame); -} - -bool Segment::AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length, - int64_t discard_padding, - uint64_t track_number, - uint64_t timestamp, bool is_key) { - if (!data) - return false; - - Frame frame; - if (!frame.Init(data, length)) - return false; - frame.set_discard_padding(discard_padding); - frame.set_track_number(track_number); - frame.set_timestamp(timestamp); - frame.set_is_key(is_key); - return AddGenericFrame(&frame); -} - -bool Segment::AddMetadata(const uint8_t* data, uint64_t length, - uint64_t track_number, uint64_t timestamp_ns, - uint64_t duration_ns) { - if (!data) - return false; - - Frame frame; - if (!frame.Init(data, length)) - return false; - frame.set_track_number(track_number); - frame.set_timestamp(timestamp_ns); - frame.set_duration(duration_ns); - frame.set_is_key(true); // All metadata blocks are keyframes. - return AddGenericFrame(&frame); -} - -bool Segment::AddGenericFrame(const Frame* frame) { - if (!frame) - return false; - - if (!CheckHeaderInfo()) - return false; - - // Check for non-monotonically increasing timestamps. - if (frame->timestamp() < last_timestamp_) - return false; - - // Check if the track number is valid. - if (!tracks_.GetTrackByNumber(frame->track_number())) - return false; - - if (frame->discard_padding() != 0) - doc_type_version_ = 4; - - if (cluster_list_size_ > 0) { - const uint64_t timecode_scale = segment_info_.timecode_scale(); - const uint64_t frame_timecode = frame->timestamp() / timecode_scale; - - const Cluster* const last_cluster = cluster_list_[cluster_list_size_ - 1]; - const uint64_t last_cluster_timecode = last_cluster->timecode(); - - const uint64_t rel_timecode = frame_timecode - last_cluster_timecode; - if (rel_timecode > kMaxBlockTimecode) { - force_new_cluster_ = true; - } - } - - // If the segment has a video track hold onto audio frames to make sure the - // audio that is associated with the start time of a video key-frame is - // muxed into the same cluster. - if (has_video_ && tracks_.TrackIsAudio(frame->track_number()) && - !force_new_cluster_) { - Frame* const new_frame = new (std::nothrow) Frame(); - if (!new_frame || !new_frame->CopyFrom(*frame)) { - delete new_frame; - return false; - } - if (!QueueFrame(new_frame)) { - delete new_frame; - return false; - } - track_frames_written_[frame->track_number() - 1]++; - return true; - } - - if (!DoNewClusterProcessing(frame->track_number(), frame->timestamp(), - frame->is_key())) { - return false; - } - - if (cluster_list_size_ < 1) - return false; - - Cluster* const cluster = cluster_list_[cluster_list_size_ - 1]; - if (!cluster) - return false; - - // If the Frame is not a SimpleBlock, then set the reference_block_timestamp - // if it is not set already. - bool frame_created = false; - if (!frame->CanBeSimpleBlock() && !frame->is_key() && - !frame->reference_block_timestamp_set()) { - Frame* const new_frame = new (std::nothrow) Frame(); - if (!new_frame || !new_frame->CopyFrom(*frame)) { - delete new_frame; - return false; - } - new_frame->set_reference_block_timestamp( - last_track_timestamp_[frame->track_number() - 1]); - frame = new_frame; - frame_created = true; - } - - if (!cluster->AddFrame(frame)) - return false; - - if (new_cuepoint_ && cues_track_ == frame->track_number()) { - if (!AddCuePoint(frame->timestamp(), cues_track_)) - return false; - } - - last_timestamp_ = frame->timestamp(); - last_track_timestamp_[frame->track_number() - 1] = frame->timestamp(); - last_block_duration_ = frame->duration(); - track_frames_written_[frame->track_number() - 1]++; - - if (frame_created) - delete frame; - return true; -} - -void Segment::OutputCues(bool output_cues) { output_cues_ = output_cues; } - -void Segment::AccurateClusterDuration(bool accurate_cluster_duration) { - accurate_cluster_duration_ = accurate_cluster_duration; -} - -void Segment::UseFixedSizeClusterTimecode(bool fixed_size_cluster_timecode) { - fixed_size_cluster_timecode_ = fixed_size_cluster_timecode; -} - -bool Segment::SetChunking(bool chunking, const char* filename) { - if (chunk_count_ > 0) - return false; - - if (chunking) { - if (!filename) - return false; - - // Check if we are being set to what is already set. - if (chunking_ && !strcmp(filename, chunking_base_name_)) - return true; - - const size_t name_length = strlen(filename) + 1; - char* const temp = new (std::nothrow) char[name_length]; // NOLINT - if (!temp) - return false; - -#ifdef _MSC_VER - strcpy_s(temp, name_length, filename); -#else - strcpy(temp, filename); -#endif - - delete[] chunking_base_name_; - chunking_base_name_ = temp; - - if (!UpdateChunkName("chk", &chunk_name_)) - return false; - - if (!chunk_writer_cluster_) { - chunk_writer_cluster_ = new (std::nothrow) MkvWriter(); // NOLINT - if (!chunk_writer_cluster_) - return false; - } - - if (!chunk_writer_cues_) { - chunk_writer_cues_ = new (std::nothrow) MkvWriter(); // NOLINT - if (!chunk_writer_cues_) - return false; - } - - if (!chunk_writer_header_) { - chunk_writer_header_ = new (std::nothrow) MkvWriter(); // NOLINT - if (!chunk_writer_header_) - return false; - } - - if (!chunk_writer_cluster_->Open(chunk_name_)) - return false; - - const size_t header_length = strlen(filename) + strlen(".hdr") + 1; - char* const header = new (std::nothrow) char[header_length]; // NOLINT - if (!header) - return false; - -#ifdef _MSC_VER - strcpy_s(header, header_length - strlen(".hdr"), chunking_base_name_); - strcat_s(header, header_length, ".hdr"); -#else - strcpy(header, chunking_base_name_); - strcat(header, ".hdr"); -#endif - if (!chunk_writer_header_->Open(header)) { - delete[] header; - return false; - } - - writer_cluster_ = chunk_writer_cluster_; - writer_cues_ = chunk_writer_cues_; - writer_header_ = chunk_writer_header_; - - delete[] header; - } - - chunking_ = chunking; - - return true; -} - -bool Segment::CuesTrack(uint64_t track_number) { - const Track* const track = GetTrackByNumber(track_number); - if (!track) - return false; - - cues_track_ = track_number; - return true; -} - -void Segment::ForceNewClusterOnNextFrame() { force_new_cluster_ = true; } - -Track* Segment::GetTrackByNumber(uint64_t track_number) const { - return tracks_.GetTrackByNumber(track_number); -} - -bool Segment::WriteSegmentHeader() { - UpdateDocTypeVersion(); - - const char* const doc_type = - DocTypeIsWebm() ? kDocTypeWebm : kDocTypeMatroska; - if (!WriteEbmlHeader(writer_header_, doc_type_version_, doc_type)) - return false; - doc_type_version_written_ = doc_type_version_; - ebml_header_size_ = static_cast(writer_header_->Position()); - - // Write "unknown" (-1) as segment size value. If mode is kFile, Segment - // will write over duration when the file is finalized. - if (WriteID(writer_header_, libwebm::kMkvSegment)) - return false; - - // Save for later. - size_position_ = writer_header_->Position(); - - // Write "unknown" (EBML coded -1) as segment size value. We need to write 8 - // bytes because if we are going to overwrite the segment size later we do - // not know how big our segment will be. - if (SerializeInt(writer_header_, kEbmlUnknownValue, 8)) - return false; - - payload_pos_ = writer_header_->Position(); - - if (mode_ == kFile && writer_header_->Seekable()) { - // Set the duration > 0.0 so SegmentInfo will write out the duration. When - // the muxer is done writing we will set the correct duration and have - // SegmentInfo upadte it. - segment_info_.set_duration(1.0); - - if (!seek_head_.Write(writer_header_)) - return false; - } - - if (!seek_head_.AddSeekEntry(libwebm::kMkvInfo, MaxOffset())) - return false; - if (!segment_info_.Write(writer_header_)) - return false; - - if (!seek_head_.AddSeekEntry(libwebm::kMkvTracks, MaxOffset())) - return false; - if (!tracks_.Write(writer_header_)) - return false; - - if (chapters_.Count() > 0) { - if (!seek_head_.AddSeekEntry(libwebm::kMkvChapters, MaxOffset())) - return false; - if (!chapters_.Write(writer_header_)) - return false; - } - - if (tags_.Count() > 0) { - if (!seek_head_.AddSeekEntry(libwebm::kMkvTags, MaxOffset())) - return false; - if (!tags_.Write(writer_header_)) - return false; - } - - if (chunking_ && (mode_ == kLive || !writer_header_->Seekable())) { - if (!chunk_writer_header_) - return false; - - chunk_writer_header_->Close(); - } - - header_written_ = true; - - return true; -} - -// Here we are testing whether to create a new cluster, given a frame -// having time frame_timestamp_ns. -// -int Segment::TestFrame(uint64_t track_number, uint64_t frame_timestamp_ns, - bool is_key) const { - if (force_new_cluster_) - return 1; - - // If no clusters have been created yet, then create a new cluster - // and write this frame immediately, in the new cluster. This path - // should only be followed once, the first time we attempt to write - // a frame. - - if (cluster_list_size_ <= 0) - return 1; - - // There exists at least one cluster. We must compare the frame to - // the last cluster, in order to determine whether the frame is - // written to the existing cluster, or that a new cluster should be - // created. - - const uint64_t timecode_scale = segment_info_.timecode_scale(); - const uint64_t frame_timecode = frame_timestamp_ns / timecode_scale; - - const Cluster* const last_cluster = cluster_list_[cluster_list_size_ - 1]; - const uint64_t last_cluster_timecode = last_cluster->timecode(); - - // For completeness we test for the case when the frame's timecode - // is less than the cluster's timecode. Although in principle that - // is allowed, this muxer doesn't actually write clusters like that, - // so this indicates a bug somewhere in our algorithm. - - if (frame_timecode < last_cluster_timecode) // should never happen - return -1; - - // If the frame has a timestamp significantly larger than the last - // cluster (in Matroska, cluster-relative timestamps are serialized - // using a 16-bit signed integer), then we cannot write this frame - // to that cluster, and so we must create a new cluster. - - const int64_t delta_timecode = frame_timecode - last_cluster_timecode; - - if (delta_timecode > kMaxBlockTimecode) - return 2; - - // We decide to create a new cluster when we have a video keyframe. - // This will flush queued (audio) frames, and write the keyframe - // immediately, in the newly-created cluster. - - if (is_key && tracks_.TrackIsVideo(track_number)) - return 1; - - // Create a new cluster if we have accumulated too many frames - // already, where "too many" is defined as "the total time of frames - // in the cluster exceeds a threshold". - - const uint64_t delta_ns = delta_timecode * timecode_scale; - - if (max_cluster_duration_ > 0 && delta_ns >= max_cluster_duration_) - return 1; - - // This is similar to the case above, with the difference that a new - // cluster is created when the size of the current cluster exceeds a - // threshold. - - const uint64_t cluster_size = last_cluster->payload_size(); - - if (max_cluster_size_ > 0 && cluster_size >= max_cluster_size_) - return 1; - - // There's no need to create a new cluster, so emit this frame now. - - return 0; -} - -bool Segment::MakeNewCluster(uint64_t frame_timestamp_ns) { - const int32_t new_size = cluster_list_size_ + 1; - - if (new_size > cluster_list_capacity_) { - // Add more clusters. - const int32_t new_capacity = - (cluster_list_capacity_ <= 0) ? 1 : cluster_list_capacity_ * 2; - Cluster** const clusters = - new (std::nothrow) Cluster*[new_capacity]; // NOLINT - if (!clusters) - return false; - - for (int32_t i = 0; i < cluster_list_size_; ++i) { - clusters[i] = cluster_list_[i]; - } - - delete[] cluster_list_; - - cluster_list_ = clusters; - cluster_list_capacity_ = new_capacity; - } - - if (!WriteFramesLessThan(frame_timestamp_ns)) - return false; - - if (cluster_list_size_ > 0) { - // Update old cluster's size - Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1]; - - if (!old_cluster || !old_cluster->Finalize(true, frame_timestamp_ns)) - return false; - } - - if (output_cues_) - new_cuepoint_ = true; - - if (chunking_ && cluster_list_size_ > 0) { - chunk_writer_cluster_->Close(); - chunk_count_++; - - if (!UpdateChunkName("chk", &chunk_name_)) - return false; - if (!chunk_writer_cluster_->Open(chunk_name_)) - return false; - } - - const uint64_t timecode_scale = segment_info_.timecode_scale(); - const uint64_t frame_timecode = frame_timestamp_ns / timecode_scale; - - uint64_t cluster_timecode = frame_timecode; - - if (frames_size_ > 0) { - const Frame* const f = frames_[0]; // earliest queued frame - const uint64_t ns = f->timestamp(); - const uint64_t tc = ns / timecode_scale; - - if (tc < cluster_timecode) - cluster_timecode = tc; - } - - Cluster*& cluster = cluster_list_[cluster_list_size_]; - const int64_t offset = MaxOffset(); - cluster = new (std::nothrow) - Cluster(cluster_timecode, offset, segment_info_.timecode_scale(), - accurate_cluster_duration_, fixed_size_cluster_timecode_); - if (!cluster) - return false; - - if (!cluster->Init(writer_cluster_)) - return false; - - cluster_list_size_ = new_size; - return true; -} - -bool Segment::DoNewClusterProcessing(uint64_t track_number, - uint64_t frame_timestamp_ns, bool is_key) { - for (;;) { - // Based on the characteristics of the current frame and current - // cluster, decide whether to create a new cluster. - const int result = TestFrame(track_number, frame_timestamp_ns, is_key); - if (result < 0) // error - return false; - - // Always set force_new_cluster_ to false after TestFrame. - force_new_cluster_ = false; - - // A non-zero result means create a new cluster. - if (result > 0 && !MakeNewCluster(frame_timestamp_ns)) - return false; - - // Write queued (audio) frames. - const int frame_count = WriteFramesAll(); - if (frame_count < 0) // error - return false; - - // Write the current frame to the current cluster (if TestFrame - // returns 0) or to a newly created cluster (TestFrame returns 1). - if (result <= 1) - return true; - - // TestFrame returned 2, which means there was a large time - // difference between the cluster and the frame itself. Do the - // test again, comparing the frame to the new cluster. - } -} - -bool Segment::CheckHeaderInfo() { - if (!header_written_) { - if (!WriteSegmentHeader()) - return false; - - if (!seek_head_.AddSeekEntry(libwebm::kMkvCluster, MaxOffset())) - return false; - - if (output_cues_ && cues_track_ == 0) { - // Check for a video track - for (uint32_t i = 0; i < tracks_.track_entries_size(); ++i) { - const Track* const track = tracks_.GetTrackByIndex(i); - if (!track) - return false; - - if (tracks_.TrackIsVideo(track->number())) { - cues_track_ = track->number(); - break; - } - } - - // Set first track found - if (cues_track_ == 0) { - const Track* const track = tracks_.GetTrackByIndex(0); - if (!track) - return false; - - cues_track_ = track->number(); - } - } - } - return true; -} - -void Segment::UpdateDocTypeVersion() { - for (uint32_t index = 0; index < tracks_.track_entries_size(); ++index) { - const Track* track = tracks_.GetTrackByIndex(index); - if (track == NULL) - break; - if ((track->codec_delay() || track->seek_pre_roll()) && - doc_type_version_ < 4) { - doc_type_version_ = 4; - break; - } - } -} - -bool Segment::UpdateChunkName(const char* ext, char** name) const { - if (!name || !ext) - return false; - - char ext_chk[64]; -#ifdef _MSC_VER - sprintf_s(ext_chk, sizeof(ext_chk), "_%06d.%s", chunk_count_, ext); -#else - snprintf(ext_chk, sizeof(ext_chk), "_%06d.%s", chunk_count_, ext); -#endif - - const size_t length = strlen(chunking_base_name_) + strlen(ext_chk) + 1; - char* const str = new (std::nothrow) char[length]; // NOLINT - if (!str) - return false; - -#ifdef _MSC_VER - strcpy_s(str, length - strlen(ext_chk), chunking_base_name_); - strcat_s(str, length, ext_chk); -#else - strcpy(str, chunking_base_name_); - strcat(str, ext_chk); -#endif - - delete[] * name; - *name = str; - - return true; -} - -int64_t Segment::MaxOffset() { - if (!writer_header_) - return -1; - - int64_t offset = writer_header_->Position() - payload_pos_; - - if (chunking_) { - for (int32_t i = 0; i < cluster_list_size_; ++i) { - Cluster* const cluster = cluster_list_[i]; - offset += cluster->Size(); - } - - if (writer_cues_) - offset += writer_cues_->Position(); - } - - return offset; -} - -bool Segment::QueueFrame(Frame* frame) { - const int32_t new_size = frames_size_ + 1; - - if (new_size > frames_capacity_) { - // Add more frames. - const int32_t new_capacity = (!frames_capacity_) ? 2 : frames_capacity_ * 2; - - if (new_capacity < 1) - return false; - - Frame** const frames = new (std::nothrow) Frame*[new_capacity]; // NOLINT - if (!frames) - return false; - - for (int32_t i = 0; i < frames_size_; ++i) { - frames[i] = frames_[i]; - } - - delete[] frames_; - frames_ = frames; - frames_capacity_ = new_capacity; - } - - frames_[frames_size_++] = frame; - - return true; -} - -int Segment::WriteFramesAll() { - if (frames_ == NULL) - return 0; - - if (cluster_list_size_ < 1) - return -1; - - Cluster* const cluster = cluster_list_[cluster_list_size_ - 1]; - - if (!cluster) - return -1; - - for (int32_t i = 0; i < frames_size_; ++i) { - Frame*& frame = frames_[i]; - // TODO(jzern/vigneshv): using Segment::AddGenericFrame here would limit the - // places where |doc_type_version_| needs to be updated. - if (frame->discard_padding() != 0) - doc_type_version_ = 4; - if (!cluster->AddFrame(frame)) - return -1; - - if (new_cuepoint_ && cues_track_ == frame->track_number()) { - if (!AddCuePoint(frame->timestamp(), cues_track_)) - return -1; - } - - if (frame->timestamp() > last_timestamp_) { - last_timestamp_ = frame->timestamp(); - last_track_timestamp_[frame->track_number() - 1] = frame->timestamp(); - } - - delete frame; - frame = NULL; - } - - const int result = frames_size_; - frames_size_ = 0; - - return result; -} - -bool Segment::WriteFramesLessThan(uint64_t timestamp) { - // Check |cluster_list_size_| to see if this is the first cluster. If it is - // the first cluster the audio frames that are less than the first video - // timesatmp will be written in a later step. - if (frames_size_ > 0 && cluster_list_size_ > 0) { - if (!frames_) - return false; - - Cluster* const cluster = cluster_list_[cluster_list_size_ - 1]; - if (!cluster) - return false; - - int32_t shift_left = 0; - - // TODO(fgalligan): Change this to use the durations of frames instead of - // the next frame's start time if the duration is accurate. - for (int32_t i = 1; i < frames_size_; ++i) { - const Frame* const frame_curr = frames_[i]; - - if (frame_curr->timestamp() > timestamp) - break; - - const Frame* const frame_prev = frames_[i - 1]; - if (frame_prev->discard_padding() != 0) - doc_type_version_ = 4; - if (!cluster->AddFrame(frame_prev)) - return false; - - if (new_cuepoint_ && cues_track_ == frame_prev->track_number()) { - if (!AddCuePoint(frame_prev->timestamp(), cues_track_)) - return false; - } - - ++shift_left; - if (frame_prev->timestamp() > last_timestamp_) { - last_timestamp_ = frame_prev->timestamp(); - last_track_timestamp_[frame_prev->track_number() - 1] = - frame_prev->timestamp(); - } - - delete frame_prev; - } - - if (shift_left > 0) { - if (shift_left >= frames_size_) - return false; - - const int32_t new_frames_size = frames_size_ - shift_left; - for (int32_t i = 0; i < new_frames_size; ++i) { - frames_[i] = frames_[i + shift_left]; - } - - frames_size_ = new_frames_size; - } - } - - return true; -} - -bool Segment::DocTypeIsWebm() const { - const int kNumCodecIds = 9; - - // TODO(vigneshv): Tweak .clang-format. - const char* kWebmCodecIds[kNumCodecIds] = { - Tracks::kOpusCodecId, Tracks::kVorbisCodecId, - Tracks::kVp8CodecId, Tracks::kVp9CodecId, - Tracks::kVp10CodecId, Tracks::kWebVttCaptionsId, - Tracks::kWebVttDescriptionsId, Tracks::kWebVttMetadataId, - Tracks::kWebVttSubtitlesId}; - - const int num_tracks = static_cast(tracks_.track_entries_size()); - for (int track_index = 0; track_index < num_tracks; ++track_index) { - const Track* const track = tracks_.GetTrackByIndex(track_index); - const std::string codec_id = track->codec_id(); - - bool id_is_webm = false; - for (int id_index = 0; id_index < kNumCodecIds; ++id_index) { - if (codec_id == kWebmCodecIds[id_index]) { - id_is_webm = true; - break; - } - } - - if (!id_is_webm) - return false; - } - - return true; -} - -} // namespace mkvmuxer diff --git a/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.h b/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.h deleted file mode 100644 index 46b0029dc470..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.h +++ /dev/null @@ -1,1921 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - -#ifndef MKVMUXER_MKVMUXER_H_ -#define MKVMUXER_MKVMUXER_H_ - -#include - -#include -#include -#include - -#include "common/webmids.h" -#include "mkvmuxer/mkvmuxertypes.h" - -// For a description of the WebM elements see -// http://www.webmproject.org/code/specs/container/. - -namespace mkvparser { -class IMkvReader; -} // namespace mkvparser - -namespace mkvmuxer { - -class MkvWriter; -class Segment; - -const uint64_t kMaxTrackNumber = 126; - -/////////////////////////////////////////////////////////////// -// Interface used by the mkvmuxer to write out the Mkv data. -class IMkvWriter { - public: - // Writes out |len| bytes of |buf|. Returns 0 on success. - virtual int32 Write(const void* buf, uint32 len) = 0; - - // Returns the offset of the output position from the beginning of the - // output. - virtual int64 Position() const = 0; - - // Set the current File position. Returns 0 on success. - virtual int32 Position(int64 position) = 0; - - // Returns true if the writer is seekable. - virtual bool Seekable() const = 0; - - // Element start notification. Called whenever an element identifier is about - // to be written to the stream. |element_id| is the element identifier, and - // |position| is the location in the WebM stream where the first octet of the - // element identifier will be written. - // Note: the |MkvId| enumeration in webmids.hpp defines element values. - virtual void ElementStartNotify(uint64 element_id, int64 position) = 0; - - protected: - IMkvWriter(); - virtual ~IMkvWriter(); - - private: - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(IMkvWriter); -}; - -// Writes out the EBML header for a WebM file, but allows caller to specify -// DocType. This function must be called before any other libwebm writing -// functions are called. -bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version, - const char* const doc_type); - -// Writes out the EBML header for a WebM file. This function must be called -// before any other libwebm writing functions are called. -bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version); - -// Deprecated. Writes out EBML header with doc_type_version as -// kDefaultDocTypeVersion. Exists for backward compatibility. -bool WriteEbmlHeader(IMkvWriter* writer); - -// Copies in Chunk from source to destination between the given byte positions -bool ChunkedCopy(mkvparser::IMkvReader* source, IMkvWriter* dst, int64_t start, - int64_t size); - -/////////////////////////////////////////////////////////////// -// Class to hold data the will be written to a block. -class Frame { - public: - Frame(); - ~Frame(); - - // Sets this frame's contents based on |frame|. Returns true on success. On - // failure, this frame's existing contents may be lost. - bool CopyFrom(const Frame& frame); - - // Copies |frame| data into |frame_|. Returns true on success. - bool Init(const uint8_t* frame, uint64_t length); - - // Copies |additional| data into |additional_|. Returns true on success. - bool AddAdditionalData(const uint8_t* additional, uint64_t length, - uint64_t add_id); - - // Returns true if the frame has valid parameters. - bool IsValid() const; - - // Returns true if the frame can be written as a SimpleBlock based on current - // parameters. - bool CanBeSimpleBlock() const; - - uint64_t add_id() const { return add_id_; } - const uint8_t* additional() const { return additional_; } - uint64_t additional_length() const { return additional_length_; } - void set_duration(uint64_t duration); - uint64_t duration() const { return duration_; } - bool duration_set() const { return duration_set_; } - const uint8_t* frame() const { return frame_; } - void set_is_key(bool key) { is_key_ = key; } - bool is_key() const { return is_key_; } - uint64_t length() const { return length_; } - void set_track_number(uint64_t track_number) { track_number_ = track_number; } - uint64_t track_number() const { return track_number_; } - void set_timestamp(uint64_t timestamp) { timestamp_ = timestamp; } - uint64_t timestamp() const { return timestamp_; } - void set_discard_padding(int64_t discard_padding) { - discard_padding_ = discard_padding; - } - int64_t discard_padding() const { return discard_padding_; } - void set_reference_block_timestamp(int64_t reference_block_timestamp); - int64_t reference_block_timestamp() const { - return reference_block_timestamp_; - } - bool reference_block_timestamp_set() const { - return reference_block_timestamp_set_; - } - - private: - // Id of the Additional data. - uint64_t add_id_; - - // Pointer to additional data. Owned by this class. - uint8_t* additional_; - - // Length of the additional data. - uint64_t additional_length_; - - // Duration of the frame in nanoseconds. - uint64_t duration_; - - // Flag indicating that |duration_| has been set. Setting duration causes the - // frame to be written out as a Block with BlockDuration instead of as a - // SimpleBlock. - bool duration_set_; - - // Pointer to the data. Owned by this class. - uint8_t* frame_; - - // Flag telling if the data should set the key flag of a block. - bool is_key_; - - // Length of the data. - uint64_t length_; - - // Mkv track number the data is associated with. - uint64_t track_number_; - - // Timestamp of the data in nanoseconds. - uint64_t timestamp_; - - // Discard padding for the frame. - int64_t discard_padding_; - - // Reference block timestamp. - int64_t reference_block_timestamp_; - - // Flag indicating if |reference_block_timestamp_| has been set. - bool reference_block_timestamp_set_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Frame); -}; - -/////////////////////////////////////////////////////////////// -// Class to hold one cue point in a Cues element. -class CuePoint { - public: - CuePoint(); - ~CuePoint(); - - // Returns the size in bytes for the entire CuePoint element. - uint64_t Size() const; - - // Output the CuePoint element to the writer. Returns true on success. - bool Write(IMkvWriter* writer) const; - - void set_time(uint64_t time) { time_ = time; } - uint64_t time() const { return time_; } - void set_track(uint64_t track) { track_ = track; } - uint64_t track() const { return track_; } - void set_cluster_pos(uint64_t cluster_pos) { cluster_pos_ = cluster_pos; } - uint64_t cluster_pos() const { return cluster_pos_; } - void set_block_number(uint64_t block_number) { block_number_ = block_number; } - uint64_t block_number() const { return block_number_; } - void set_output_block_number(bool output_block_number) { - output_block_number_ = output_block_number; - } - bool output_block_number() const { return output_block_number_; } - - private: - // Returns the size in bytes for the payload of the CuePoint element. - uint64_t PayloadSize() const; - - // Absolute timecode according to the segment time base. - uint64_t time_; - - // The Track element associated with the CuePoint. - uint64_t track_; - - // The position of the Cluster containing the Block. - uint64_t cluster_pos_; - - // Number of the Block within the Cluster, starting from 1. - uint64_t block_number_; - - // If true the muxer will write out the block number for the cue if the - // block number is different than the default of 1. Default is set to true. - bool output_block_number_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(CuePoint); -}; - -/////////////////////////////////////////////////////////////// -// Cues element. -class Cues { - public: - Cues(); - ~Cues(); - - // Adds a cue point to the Cues element. Returns true on success. - bool AddCue(CuePoint* cue); - - // Returns the cue point by index. Returns NULL if there is no cue point - // match. - CuePoint* GetCueByIndex(int32_t index) const; - - // Returns the total size of the Cues element - uint64_t Size(); - - // Output the Cues element to the writer. Returns true on success. - bool Write(IMkvWriter* writer) const; - - int32_t cue_entries_size() const { return cue_entries_size_; } - void set_output_block_number(bool output_block_number) { - output_block_number_ = output_block_number; - } - bool output_block_number() const { return output_block_number_; } - - private: - // Number of allocated elements in |cue_entries_|. - int32_t cue_entries_capacity_; - - // Number of CuePoints in |cue_entries_|. - int32_t cue_entries_size_; - - // CuePoint list. - CuePoint** cue_entries_; - - // If true the muxer will write out the block number for the cue if the - // block number is different than the default of 1. Default is set to true. - bool output_block_number_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Cues); -}; - -/////////////////////////////////////////////////////////////// -// ContentEncAESSettings element -class ContentEncAESSettings { - public: - enum { kCTR = 1 }; - - ContentEncAESSettings(); - ~ContentEncAESSettings() {} - - // Returns the size in bytes for the ContentEncAESSettings element. - uint64_t Size() const; - - // Writes out the ContentEncAESSettings element to |writer|. Returns true on - // success. - bool Write(IMkvWriter* writer) const; - - uint64_t cipher_mode() const { return cipher_mode_; } - - private: - // Returns the size in bytes for the payload of the ContentEncAESSettings - // element. - uint64_t PayloadSize() const; - - // Sub elements - uint64_t cipher_mode_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncAESSettings); -}; - -/////////////////////////////////////////////////////////////// -// ContentEncoding element -// Elements used to describe if the track data has been encrypted or -// compressed with zlib or header stripping. -// Currently only whole frames can be encrypted with AES. This dictates that -// ContentEncodingOrder will be 0, ContentEncodingScope will be 1, -// ContentEncodingType will be 1, and ContentEncAlgo will be 5. -class ContentEncoding { - public: - ContentEncoding(); - ~ContentEncoding(); - - // Sets the content encryption id. Copies |length| bytes from |id| to - // |enc_key_id_|. Returns true on success. - bool SetEncryptionID(const uint8_t* id, uint64_t length); - - // Returns the size in bytes for the ContentEncoding element. - uint64_t Size() const; - - // Writes out the ContentEncoding element to |writer|. Returns true on - // success. - bool Write(IMkvWriter* writer) const; - - uint64_t enc_algo() const { return enc_algo_; } - uint64_t encoding_order() const { return encoding_order_; } - uint64_t encoding_scope() const { return encoding_scope_; } - uint64_t encoding_type() const { return encoding_type_; } - ContentEncAESSettings* enc_aes_settings() { return &enc_aes_settings_; } - - private: - // Returns the size in bytes for the encoding elements. - uint64_t EncodingSize(uint64_t compresion_size, - uint64_t encryption_size) const; - - // Returns the size in bytes for the encryption elements. - uint64_t EncryptionSize() const; - - // Track element names - uint64_t enc_algo_; - uint8_t* enc_key_id_; - uint64_t encoding_order_; - uint64_t encoding_scope_; - uint64_t encoding_type_; - - // ContentEncAESSettings element. - ContentEncAESSettings enc_aes_settings_; - - // Size of the ContentEncKeyID data in bytes. - uint64_t enc_key_id_length_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding); -}; - -/////////////////////////////////////////////////////////////// -// Colour element. -class PrimaryChromaticity { - public: - static const float kChromaticityMin; - static const float kChromaticityMax; - - PrimaryChromaticity(float x_val, float y_val) : x_(x_val), y_(y_val) {} - PrimaryChromaticity() : x_(0), y_(0) {} - ~PrimaryChromaticity() {} - - // Returns sum of |x_id| and |y_id| element id sizes and payload sizes. - uint64_t PrimaryChromaticitySize(libwebm::MkvId x_id, - libwebm::MkvId y_id) const; - bool Valid() const; - bool Write(IMkvWriter* writer, libwebm::MkvId x_id, - libwebm::MkvId y_id) const; - - float x() const { return x_; } - void set_x(float new_x) { x_ = new_x; } - float y() const { return y_; } - void set_y(float new_y) { y_ = new_y; } - - private: - float x_; - float y_; -}; - -class MasteringMetadata { - public: - static const float kValueNotPresent; - static const float kMinLuminance; - static const float kMinLuminanceMax; - static const float kMaxLuminanceMax; - - MasteringMetadata() - : luminance_max_(kValueNotPresent), - luminance_min_(kValueNotPresent), - r_(NULL), - g_(NULL), - b_(NULL), - white_point_(NULL) {} - ~MasteringMetadata() { - delete r_; - delete g_; - delete b_; - delete white_point_; - } - - // Returns total size of the MasteringMetadata element. - uint64_t MasteringMetadataSize() const; - bool Valid() const; - bool Write(IMkvWriter* writer) const; - - // Copies non-null chromaticity. - bool SetChromaticity(const PrimaryChromaticity* r, - const PrimaryChromaticity* g, - const PrimaryChromaticity* b, - const PrimaryChromaticity* white_point); - const PrimaryChromaticity* r() const { return r_; } - const PrimaryChromaticity* g() const { return g_; } - const PrimaryChromaticity* b() const { return b_; } - const PrimaryChromaticity* white_point() const { return white_point_; } - - float luminance_max() const { return luminance_max_; } - void set_luminance_max(float luminance_max) { - luminance_max_ = luminance_max; - } - float luminance_min() const { return luminance_min_; } - void set_luminance_min(float luminance_min) { - luminance_min_ = luminance_min; - } - - private: - // Returns size of MasteringMetadata child elements. - uint64_t PayloadSize() const; - - float luminance_max_; - float luminance_min_; - PrimaryChromaticity* r_; - PrimaryChromaticity* g_; - PrimaryChromaticity* b_; - PrimaryChromaticity* white_point_; -}; - -class Colour { - public: - enum MatrixCoefficients { - kGbr = 0, - kBt709 = 1, - kUnspecifiedMc = 2, - kReserved = 3, - kFcc = 4, - kBt470bg = 5, - kSmpte170MMc = 6, - kSmpte240MMc = 7, - kYcocg = 8, - kBt2020NonConstantLuminance = 9, - kBt2020ConstantLuminance = 10, - }; - enum ChromaSitingHorz { - kUnspecifiedCsh = 0, - kLeftCollocated = 1, - kHalfCsh = 2, - }; - enum ChromaSitingVert { - kUnspecifiedCsv = 0, - kTopCollocated = 1, - kHalfCsv = 2, - }; - enum Range { - kUnspecifiedCr = 0, - kBroadcastRange = 1, - kFullRange = 2, - kMcTcDefined = 3, // Defined by MatrixCoefficients/TransferCharacteristics. - }; - enum TransferCharacteristics { - kIturBt709Tc = 1, - kUnspecifiedTc = 2, - kReservedTc = 3, - kGamma22Curve = 4, - kGamma28Curve = 5, - kSmpte170MTc = 6, - kSmpte240MTc = 7, - kLinear = 8, - kLog = 9, - kLogSqrt = 10, - kIec6196624 = 11, - kIturBt1361ExtendedColourGamut = 12, - kIec6196621 = 13, - kIturBt202010bit = 14, - kIturBt202012bit = 15, - kSmpteSt2084 = 16, - kSmpteSt4281Tc = 17, - kAribStdB67Hlg = 18, - }; - enum Primaries { - kReservedP0 = 0, - kIturBt709P = 1, - kUnspecifiedP = 2, - kReservedP3 = 3, - kIturBt470M = 4, - kIturBt470Bg = 5, - kSmpte170MP = 6, - kSmpte240MP = 7, - kFilm = 8, - kIturBt2020 = 9, - kSmpteSt4281P = 10, - kJedecP22Phosphors = 22, - }; - static const uint64_t kValueNotPresent; - Colour() - : matrix_coefficients_(kValueNotPresent), - bits_per_channel_(kValueNotPresent), - chroma_subsampling_horz_(kValueNotPresent), - chroma_subsampling_vert_(kValueNotPresent), - cb_subsampling_horz_(kValueNotPresent), - cb_subsampling_vert_(kValueNotPresent), - chroma_siting_horz_(kValueNotPresent), - chroma_siting_vert_(kValueNotPresent), - range_(kValueNotPresent), - transfer_characteristics_(kValueNotPresent), - primaries_(kValueNotPresent), - max_cll_(kValueNotPresent), - max_fall_(kValueNotPresent), - mastering_metadata_(NULL) {} - ~Colour() { delete mastering_metadata_; } - - // Returns total size of the Colour element. - uint64_t ColourSize() const; - bool Valid() const; - bool Write(IMkvWriter* writer) const; - - // Deep copies |mastering_metadata|. - bool SetMasteringMetadata(const MasteringMetadata& mastering_metadata); - - const MasteringMetadata* mastering_metadata() const { - return mastering_metadata_; - } - - uint64_t matrix_coefficients() const { return matrix_coefficients_; } - void set_matrix_coefficients(uint64_t matrix_coefficients) { - matrix_coefficients_ = matrix_coefficients; - } - uint64_t bits_per_channel() const { return bits_per_channel_; } - void set_bits_per_channel(uint64_t bits_per_channel) { - bits_per_channel_ = bits_per_channel; - } - uint64_t chroma_subsampling_horz() const { return chroma_subsampling_horz_; } - void set_chroma_subsampling_horz(uint64_t chroma_subsampling_horz) { - chroma_subsampling_horz_ = chroma_subsampling_horz; - } - uint64_t chroma_subsampling_vert() const { return chroma_subsampling_vert_; } - void set_chroma_subsampling_vert(uint64_t chroma_subsampling_vert) { - chroma_subsampling_vert_ = chroma_subsampling_vert; - } - uint64_t cb_subsampling_horz() const { return cb_subsampling_horz_; } - void set_cb_subsampling_horz(uint64_t cb_subsampling_horz) { - cb_subsampling_horz_ = cb_subsampling_horz; - } - uint64_t cb_subsampling_vert() const { return cb_subsampling_vert_; } - void set_cb_subsampling_vert(uint64_t cb_subsampling_vert) { - cb_subsampling_vert_ = cb_subsampling_vert; - } - uint64_t chroma_siting_horz() const { return chroma_siting_horz_; } - void set_chroma_siting_horz(uint64_t chroma_siting_horz) { - chroma_siting_horz_ = chroma_siting_horz; - } - uint64_t chroma_siting_vert() const { return chroma_siting_vert_; } - void set_chroma_siting_vert(uint64_t chroma_siting_vert) { - chroma_siting_vert_ = chroma_siting_vert; - } - uint64_t range() const { return range_; } - void set_range(uint64_t range) { range_ = range; } - uint64_t transfer_characteristics() const { - return transfer_characteristics_; - } - void set_transfer_characteristics(uint64_t transfer_characteristics) { - transfer_characteristics_ = transfer_characteristics; - } - uint64_t primaries() const { return primaries_; } - void set_primaries(uint64_t primaries) { primaries_ = primaries; } - uint64_t max_cll() const { return max_cll_; } - void set_max_cll(uint64_t max_cll) { max_cll_ = max_cll; } - uint64_t max_fall() const { return max_fall_; } - void set_max_fall(uint64_t max_fall) { max_fall_ = max_fall; } - - private: - // Returns size of Colour child elements. - uint64_t PayloadSize() const; - - uint64_t matrix_coefficients_; - uint64_t bits_per_channel_; - uint64_t chroma_subsampling_horz_; - uint64_t chroma_subsampling_vert_; - uint64_t cb_subsampling_horz_; - uint64_t cb_subsampling_vert_; - uint64_t chroma_siting_horz_; - uint64_t chroma_siting_vert_; - uint64_t range_; - uint64_t transfer_characteristics_; - uint64_t primaries_; - uint64_t max_cll_; - uint64_t max_fall_; - - MasteringMetadata* mastering_metadata_; -}; - -/////////////////////////////////////////////////////////////// -// Projection element. -class Projection { - public: - enum ProjectionType { - kTypeNotPresent = -1, - kRectangular = 0, - kEquirectangular = 1, - kCubeMap = 2, - kMesh = 3, - }; - static const uint64_t kValueNotPresent; - Projection() - : type_(kRectangular), - pose_yaw_(0.0), - pose_pitch_(0.0), - pose_roll_(0.0), - private_data_(NULL), - private_data_length_(0) {} - ~Projection() { delete[] private_data_; } - - uint64_t ProjectionSize() const; - bool Write(IMkvWriter* writer) const; - - bool SetProjectionPrivate(const uint8_t* private_data, - uint64_t private_data_length); - - ProjectionType type() const { return type_; } - void set_type(ProjectionType type) { type_ = type; } - float pose_yaw() const { return pose_yaw_; } - void set_pose_yaw(float pose_yaw) { pose_yaw_ = pose_yaw; } - float pose_pitch() const { return pose_pitch_; } - void set_pose_pitch(float pose_pitch) { pose_pitch_ = pose_pitch; } - float pose_roll() const { return pose_roll_; } - void set_pose_roll(float pose_roll) { pose_roll_ = pose_roll; } - uint8_t* private_data() const { return private_data_; } - uint64_t private_data_length() const { return private_data_length_; } - - private: - // Returns size of VideoProjection child elements. - uint64_t PayloadSize() const; - - ProjectionType type_; - float pose_yaw_; - float pose_pitch_; - float pose_roll_; - uint8_t* private_data_; - uint64_t private_data_length_; -}; - -/////////////////////////////////////////////////////////////// -// Track element. -class Track { - public: - // The |seed| parameter is used to synthesize a UID for the track. - explicit Track(unsigned int* seed); - virtual ~Track(); - - // Adds a ContentEncoding element to the Track. Returns true on success. - virtual bool AddContentEncoding(); - - // Returns the ContentEncoding by index. Returns NULL if there is no - // ContentEncoding match. - ContentEncoding* GetContentEncodingByIndex(uint32_t index) const; - - // Returns the size in bytes for the payload of the Track element. - virtual uint64_t PayloadSize() const; - - // Returns the size in bytes of the Track element. - virtual uint64_t Size() const; - - // Output the Track element to the writer. Returns true on success. - virtual bool Write(IMkvWriter* writer) const; - - // Sets the CodecPrivate element of the Track element. Copies |length| - // bytes from |codec_private| to |codec_private_|. Returns true on success. - bool SetCodecPrivate(const uint8_t* codec_private, uint64_t length); - - void set_codec_id(const char* codec_id); - const char* codec_id() const { return codec_id_; } - const uint8_t* codec_private() const { return codec_private_; } - void set_language(const char* language); - const char* language() const { return language_; } - void set_max_block_additional_id(uint64_t max_block_additional_id) { - max_block_additional_id_ = max_block_additional_id; - } - uint64_t max_block_additional_id() const { return max_block_additional_id_; } - void set_name(const char* name); - const char* name() const { return name_; } - void set_number(uint64_t number) { number_ = number; } - uint64_t number() const { return number_; } - void set_type(uint64_t type) { type_ = type; } - uint64_t type() const { return type_; } - void set_uid(uint64_t uid) { uid_ = uid; } - uint64_t uid() const { return uid_; } - void set_codec_delay(uint64_t codec_delay) { codec_delay_ = codec_delay; } - uint64_t codec_delay() const { return codec_delay_; } - void set_seek_pre_roll(uint64_t seek_pre_roll) { - seek_pre_roll_ = seek_pre_roll; - } - uint64_t seek_pre_roll() const { return seek_pre_roll_; } - void set_default_duration(uint64_t default_duration) { - default_duration_ = default_duration; - } - uint64_t default_duration() const { return default_duration_; } - - uint64_t codec_private_length() const { return codec_private_length_; } - uint32_t content_encoding_entries_size() const { - return content_encoding_entries_size_; - } - - private: - // Track element names. - char* codec_id_; - uint8_t* codec_private_; - char* language_; - uint64_t max_block_additional_id_; - char* name_; - uint64_t number_; - uint64_t type_; - uint64_t uid_; - uint64_t codec_delay_; - uint64_t seek_pre_roll_; - uint64_t default_duration_; - - // Size of the CodecPrivate data in bytes. - uint64_t codec_private_length_; - - // ContentEncoding element list. - ContentEncoding** content_encoding_entries_; - - // Number of ContentEncoding elements added. - uint32_t content_encoding_entries_size_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Track); -}; - -/////////////////////////////////////////////////////////////// -// Track that has video specific elements. -class VideoTrack : public Track { - public: - // Supported modes for stereo 3D. - enum StereoMode { - kMono = 0, - kSideBySideLeftIsFirst = 1, - kTopBottomRightIsFirst = 2, - kTopBottomLeftIsFirst = 3, - kSideBySideRightIsFirst = 11 - }; - - enum AlphaMode { kNoAlpha = 0, kAlpha = 1 }; - - // The |seed| parameter is used to synthesize a UID for the track. - explicit VideoTrack(unsigned int* seed); - virtual ~VideoTrack(); - - // Returns the size in bytes for the payload of the Track element plus the - // video specific elements. - virtual uint64_t PayloadSize() const; - - // Output the VideoTrack element to the writer. Returns true on success. - virtual bool Write(IMkvWriter* writer) const; - - // Sets the video's stereo mode. Returns true on success. - bool SetStereoMode(uint64_t stereo_mode); - - // Sets the video's alpha mode. Returns true on success. - bool SetAlphaMode(uint64_t alpha_mode); - - void set_display_height(uint64_t height) { display_height_ = height; } - uint64_t display_height() const { return display_height_; } - void set_display_width(uint64_t width) { display_width_ = width; } - uint64_t display_width() const { return display_width_; } - void set_pixel_height(uint64_t height) { pixel_height_ = height; } - uint64_t pixel_height() const { return pixel_height_; } - void set_pixel_width(uint64_t width) { pixel_width_ = width; } - uint64_t pixel_width() const { return pixel_width_; } - - void set_crop_left(uint64_t crop_left) { crop_left_ = crop_left; } - uint64_t crop_left() const { return crop_left_; } - void set_crop_right(uint64_t crop_right) { crop_right_ = crop_right; } - uint64_t crop_right() const { return crop_right_; } - void set_crop_top(uint64_t crop_top) { crop_top_ = crop_top; } - uint64_t crop_top() const { return crop_top_; } - void set_crop_bottom(uint64_t crop_bottom) { crop_bottom_ = crop_bottom; } - uint64_t crop_bottom() const { return crop_bottom_; } - - void set_frame_rate(double frame_rate) { frame_rate_ = frame_rate; } - double frame_rate() const { return frame_rate_; } - void set_height(uint64_t height) { height_ = height; } - uint64_t height() const { return height_; } - uint64_t stereo_mode() { return stereo_mode_; } - uint64_t alpha_mode() { return alpha_mode_; } - void set_width(uint64_t width) { width_ = width; } - uint64_t width() const { return width_; } - - Colour* colour() { return colour_; } - - // Deep copies |colour|. - bool SetColour(const Colour& colour); - - Projection* projection() { return projection_; } - - // Deep copies |projection|. - bool SetProjection(const Projection& projection); - - private: - // Returns the size in bytes of the Video element. - uint64_t VideoPayloadSize() const; - - // Video track element names. - uint64_t display_height_; - uint64_t display_width_; - uint64_t pixel_height_; - uint64_t pixel_width_; - uint64_t crop_left_; - uint64_t crop_right_; - uint64_t crop_top_; - uint64_t crop_bottom_; - double frame_rate_; - uint64_t height_; - uint64_t stereo_mode_; - uint64_t alpha_mode_; - uint64_t width_; - - Colour* colour_; - Projection* projection_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(VideoTrack); -}; - -/////////////////////////////////////////////////////////////// -// Track that has audio specific elements. -class AudioTrack : public Track { - public: - // The |seed| parameter is used to synthesize a UID for the track. - explicit AudioTrack(unsigned int* seed); - virtual ~AudioTrack(); - - // Returns the size in bytes for the payload of the Track element plus the - // audio specific elements. - virtual uint64_t PayloadSize() const; - - // Output the AudioTrack element to the writer. Returns true on success. - virtual bool Write(IMkvWriter* writer) const; - - void set_bit_depth(uint64_t bit_depth) { bit_depth_ = bit_depth; } - uint64_t bit_depth() const { return bit_depth_; } - void set_channels(uint64_t channels) { channels_ = channels; } - uint64_t channels() const { return channels_; } - void set_sample_rate(double sample_rate) { sample_rate_ = sample_rate; } - double sample_rate() const { return sample_rate_; } - - private: - // Audio track element names. - uint64_t bit_depth_; - uint64_t channels_; - double sample_rate_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(AudioTrack); -}; - -/////////////////////////////////////////////////////////////// -// Tracks element -class Tracks { - public: - // Audio and video type defined by the Matroska specs. - enum { kVideo = 0x1, kAudio = 0x2 }; - - static const char kOpusCodecId[]; - static const char kVorbisCodecId[]; - static const char kVp8CodecId[]; - static const char kVp9CodecId[]; - static const char kVp10CodecId[]; - static const char kWebVttCaptionsId[]; - static const char kWebVttDescriptionsId[]; - static const char kWebVttMetadataId[]; - static const char kWebVttSubtitlesId[]; - - Tracks(); - ~Tracks(); - - // Adds a Track element to the Tracks object. |track| will be owned and - // deleted by the Tracks object. Returns true on success. |number| is the - // number to use for the track. |number| must be >= 0. If |number| == 0 - // then the muxer will decide on the track number. - bool AddTrack(Track* track, int32_t number); - - // Returns the track by index. Returns NULL if there is no track match. - const Track* GetTrackByIndex(uint32_t idx) const; - - // Search the Tracks and return the track that matches |tn|. Returns NULL - // if there is no track match. - Track* GetTrackByNumber(uint64_t track_number) const; - - // Returns true if the track number is an audio track. - bool TrackIsAudio(uint64_t track_number) const; - - // Returns true if the track number is a video track. - bool TrackIsVideo(uint64_t track_number) const; - - // Output the Tracks element to the writer. Returns true on success. - bool Write(IMkvWriter* writer) const; - - uint32_t track_entries_size() const { return track_entries_size_; } - - private: - // Track element list. - Track** track_entries_; - - // Number of Track elements added. - uint32_t track_entries_size_; - - // Whether or not Tracks element has already been written via IMkvWriter. - mutable bool wrote_tracks_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tracks); -}; - -/////////////////////////////////////////////////////////////// -// Chapter element -// -class Chapter { - public: - // Set the identifier for this chapter. (This corresponds to the - // Cue Identifier line in WebVTT.) - // TODO(matthewjheaney): the actual serialization of this item in - // MKV is pending. - bool set_id(const char* id); - - // Converts the nanosecond start and stop times of this chapter to - // their corresponding timecode values, and stores them that way. - void set_time(const Segment& segment, uint64_t start_time_ns, - uint64_t end_time_ns); - - // Sets the uid for this chapter. Primarily used to enable - // deterministic output from the muxer. - void set_uid(const uint64_t uid) { uid_ = uid; } - - // Add a title string to this chapter, per the semantics described - // here: - // http://www.matroska.org/technical/specs/index.html - // - // The title ("chapter string") is a UTF-8 string. - // - // The language has ISO 639-2 representation, described here: - // http://www.loc.gov/standards/iso639-2/englangn.html - // http://www.loc.gov/standards/iso639-2/php/English_list.php - // If you specify NULL as the language value, this implies - // English ("eng"). - // - // The country value corresponds to the codes listed here: - // http://www.iana.org/domains/root/db/ - // - // The function returns false if the string could not be allocated. - bool add_string(const char* title, const char* language, const char* country); - - private: - friend class Chapters; - - // For storage of chapter titles that differ by language. - class Display { - public: - // Establish representation invariant for new Display object. - void Init(); - - // Reclaim resources, in anticipation of destruction. - void Clear(); - - // Copies the title to the |title_| member. Returns false on - // error. - bool set_title(const char* title); - - // Copies the language to the |language_| member. Returns false - // on error. - bool set_language(const char* language); - - // Copies the country to the |country_| member. Returns false on - // error. - bool set_country(const char* country); - - // If |writer| is non-NULL, serialize the Display sub-element of - // the Atom into the stream. Returns the Display element size on - // success, 0 if error. - uint64_t WriteDisplay(IMkvWriter* writer) const; - - private: - char* title_; - char* language_; - char* country_; - }; - - Chapter(); - ~Chapter(); - - // Establish the representation invariant for a newly-created - // Chapter object. The |seed| parameter is used to create the UID - // for this chapter atom. - void Init(unsigned int* seed); - - // Copies this Chapter object to a different one. This is used when - // expanding a plain array of Chapter objects (see Chapters). - void ShallowCopy(Chapter* dst) const; - - // Reclaim resources used by this Chapter object, pending its - // destruction. - void Clear(); - - // If there is no storage remaining on the |displays_| array for a - // new display object, creates a new, longer array and copies the - // existing Display objects to the new array. Returns false if the - // array cannot be expanded. - bool ExpandDisplaysArray(); - - // If |writer| is non-NULL, serialize the Atom sub-element into the - // stream. Returns the total size of the element on success, 0 if - // error. - uint64_t WriteAtom(IMkvWriter* writer) const; - - // The string identifier for this chapter (corresponds to WebVTT cue - // identifier). - char* id_; - - // Start timecode of the chapter. - uint64_t start_timecode_; - - // Stop timecode of the chapter. - uint64_t end_timecode_; - - // The binary identifier for this chapter. - uint64_t uid_; - - // The Atom element can contain multiple Display sub-elements, as - // the same logical title can be rendered in different languages. - Display* displays_; - - // The physical length (total size) of the |displays_| array. - int displays_size_; - - // The logical length (number of active elements) on the |displays_| - // array. - int displays_count_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Chapter); -}; - -/////////////////////////////////////////////////////////////// -// Chapters element -// -class Chapters { - public: - Chapters(); - ~Chapters(); - - Chapter* AddChapter(unsigned int* seed); - - // Returns the number of chapters that have been added. - int Count() const; - - // Output the Chapters element to the writer. Returns true on success. - bool Write(IMkvWriter* writer) const; - - private: - // Expands the chapters_ array if there is not enough space to contain - // another chapter object. Returns true on success. - bool ExpandChaptersArray(); - - // If |writer| is non-NULL, serialize the Edition sub-element of the - // Chapters element into the stream. Returns the Edition element - // size on success, 0 if error. - uint64_t WriteEdition(IMkvWriter* writer) const; - - // Total length of the chapters_ array. - int chapters_size_; - - // Number of active chapters on the chapters_ array. - int chapters_count_; - - // Array for storage of chapter objects. - Chapter* chapters_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Chapters); -}; - -/////////////////////////////////////////////////////////////// -// Tag element -// -class Tag { - public: - bool add_simple_tag(const char* tag_name, const char* tag_string); - - private: - // Tags calls Clear and the destructor of Tag - friend class Tags; - - // For storage of simple tags - class SimpleTag { - public: - // Establish representation invariant for new SimpleTag object. - void Init(); - - // Reclaim resources, in anticipation of destruction. - void Clear(); - - // Copies the title to the |tag_name_| member. Returns false on - // error. - bool set_tag_name(const char* tag_name); - - // Copies the language to the |tag_string_| member. Returns false - // on error. - bool set_tag_string(const char* tag_string); - - // If |writer| is non-NULL, serialize the SimpleTag sub-element of - // the Atom into the stream. Returns the SimpleTag element size on - // success, 0 if error. - uint64_t Write(IMkvWriter* writer) const; - - private: - char* tag_name_; - char* tag_string_; - }; - - Tag(); - ~Tag(); - - // Copies this Tag object to a different one. This is used when - // expanding a plain array of Tag objects (see Tags). - void ShallowCopy(Tag* dst) const; - - // Reclaim resources used by this Tag object, pending its - // destruction. - void Clear(); - - // If there is no storage remaining on the |simple_tags_| array for a - // new display object, creates a new, longer array and copies the - // existing SimpleTag objects to the new array. Returns false if the - // array cannot be expanded. - bool ExpandSimpleTagsArray(); - - // If |writer| is non-NULL, serialize the Tag sub-element into the - // stream. Returns the total size of the element on success, 0 if - // error. - uint64_t Write(IMkvWriter* writer) const; - - // The Atom element can contain multiple SimpleTag sub-elements - SimpleTag* simple_tags_; - - // The physical length (total size) of the |simple_tags_| array. - int simple_tags_size_; - - // The logical length (number of active elements) on the |simple_tags_| - // array. - int simple_tags_count_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tag); -}; - -/////////////////////////////////////////////////////////////// -// Tags element -// -class Tags { - public: - Tags(); - ~Tags(); - - Tag* AddTag(); - - // Returns the number of tags that have been added. - int Count() const; - - // Output the Tags element to the writer. Returns true on success. - bool Write(IMkvWriter* writer) const; - - private: - // Expands the tags_ array if there is not enough space to contain - // another tag object. Returns true on success. - bool ExpandTagsArray(); - - // Total length of the tags_ array. - int tags_size_; - - // Number of active tags on the tags_ array. - int tags_count_; - - // Array for storage of tag objects. - Tag* tags_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tags); -}; - -/////////////////////////////////////////////////////////////// -// Cluster element -// -// Notes: -// |Init| must be called before any other method in this class. -class Cluster { - public: - // |timecode| is the absolute timecode of the cluster. |cues_pos| is the - // position for the cluster within the segment that should be written in - // the cues element. |timecode_scale| is the timecode scale of the segment. - Cluster(uint64_t timecode, int64_t cues_pos, uint64_t timecode_scale, - bool write_last_frame_with_duration = false, - bool fixed_size_timecode = false); - ~Cluster(); - - bool Init(IMkvWriter* ptr_writer); - - // Adds a frame to be output in the file. The frame is written out through - // |writer_| if successful. Returns true on success. - bool AddFrame(const Frame* frame); - - // Adds a frame to be output in the file. The frame is written out through - // |writer_| if successful. Returns true on success. - // Inputs: - // data: Pointer to the data - // length: Length of the data - // track_number: Track to add the data to. Value returned by Add track - // functions. The range of allowed values is [1, 126]. - // timecode: Absolute (not relative to cluster) timestamp of the - // frame, expressed in timecode units. - // is_key: Flag telling whether or not this frame is a key frame. - bool AddFrame(const uint8_t* data, uint64_t length, uint64_t track_number, - uint64_t timecode, // timecode units (absolute) - bool is_key); - - // Adds a frame to be output in the file. The frame is written out through - // |writer_| if successful. Returns true on success. - // Inputs: - // data: Pointer to the data - // length: Length of the data - // additional: Pointer to the additional data - // additional_length: Length of the additional data - // add_id: Value of BlockAddID element - // track_number: Track to add the data to. Value returned by Add track - // functions. The range of allowed values is [1, 126]. - // abs_timecode: Absolute (not relative to cluster) timestamp of the - // frame, expressed in timecode units. - // is_key: Flag telling whether or not this frame is a key frame. - bool AddFrameWithAdditional(const uint8_t* data, uint64_t length, - const uint8_t* additional, - uint64_t additional_length, uint64_t add_id, - uint64_t track_number, uint64_t abs_timecode, - bool is_key); - - // Adds a frame to be output in the file. The frame is written out through - // |writer_| if successful. Returns true on success. - // Inputs: - // data: Pointer to the data. - // length: Length of the data. - // discard_padding: DiscardPadding element value. - // track_number: Track to add the data to. Value returned by Add track - // functions. The range of allowed values is [1, 126]. - // abs_timecode: Absolute (not relative to cluster) timestamp of the - // frame, expressed in timecode units. - // is_key: Flag telling whether or not this frame is a key frame. - bool AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length, - int64_t discard_padding, - uint64_t track_number, uint64_t abs_timecode, - bool is_key); - - // Writes a frame of metadata to the output medium; returns true on - // success. - // Inputs: - // data: Pointer to the data - // length: Length of the data - // track_number: Track to add the data to. Value returned by Add track - // functions. The range of allowed values is [1, 126]. - // timecode: Absolute (not relative to cluster) timestamp of the - // metadata frame, expressed in timecode units. - // duration: Duration of metadata frame, in timecode units. - // - // The metadata frame is written as a block group, with a duration - // sub-element but no reference time sub-elements (indicating that - // it is considered a keyframe, per Matroska semantics). - bool AddMetadata(const uint8_t* data, uint64_t length, uint64_t track_number, - uint64_t timecode, uint64_t duration); - - // Increments the size of the cluster's data in bytes. - void AddPayloadSize(uint64_t size); - - // Closes the cluster so no more data can be written to it. Will update the - // cluster's size if |writer_| is seekable. Returns true on success. This - // variant of Finalize() fails when |write_last_frame_with_duration_| is set - // to true. - bool Finalize(); - - // Closes the cluster so no more data can be written to it. Will update the - // cluster's size if |writer_| is seekable. Returns true on success. - // Inputs: - // set_last_frame_duration: Boolean indicating whether or not the duration - // of the last frame should be set. If set to - // false, the |duration| value is ignored and - // |write_last_frame_with_duration_| will not be - // honored. - // duration: Duration of the Cluster in timecode scale. - bool Finalize(bool set_last_frame_duration, uint64_t duration); - - // Returns the size in bytes for the entire Cluster element. - uint64_t Size() const; - - // Given |abs_timecode|, calculates timecode relative to most recent timecode. - // Returns -1 on failure, or a relative timecode. - int64_t GetRelativeTimecode(int64_t abs_timecode) const; - - int64_t size_position() const { return size_position_; } - int32_t blocks_added() const { return blocks_added_; } - uint64_t payload_size() const { return payload_size_; } - int64_t position_for_cues() const { return position_for_cues_; } - uint64_t timecode() const { return timecode_; } - uint64_t timecode_scale() const { return timecode_scale_; } - void set_write_last_frame_with_duration(bool write_last_frame_with_duration) { - write_last_frame_with_duration_ = write_last_frame_with_duration; - } - bool write_last_frame_with_duration() const { - return write_last_frame_with_duration_; - } - - private: - // Iterator type for the |stored_frames_| map. - typedef std::map >::iterator FrameMapIterator; - - // Utility method that confirms that blocks can still be added, and that the - // cluster header has been written. Used by |DoWriteFrame*|. Returns true - // when successful. - bool PreWriteBlock(); - - // Utility method used by the |DoWriteFrame*| methods that handles the book - // keeping required after each block is written. - void PostWriteBlock(uint64_t element_size); - - // Does some verification and calls WriteFrame. - bool DoWriteFrame(const Frame* const frame); - - // Either holds back the given frame, or writes it out depending on whether or - // not |write_last_frame_with_duration_| is set. - bool QueueOrWriteFrame(const Frame* const frame); - - // Outputs the Cluster header to |writer_|. Returns true on success. - bool WriteClusterHeader(); - - // Number of blocks added to the cluster. - int32_t blocks_added_; - - // Flag telling if the cluster has been closed. - bool finalized_; - - // Flag indicating whether the cluster's timecode will always be written out - // using 8 bytes. - bool fixed_size_timecode_; - - // Flag telling if the cluster's header has been written. - bool header_written_; - - // The size of the cluster elements in bytes. - uint64_t payload_size_; - - // The file position used for cue points. - const int64_t position_for_cues_; - - // The file position of the cluster's size element. - int64_t size_position_; - - // The absolute timecode of the cluster. - const uint64_t timecode_; - - // The timecode scale of the Segment containing the cluster. - const uint64_t timecode_scale_; - - // Flag indicating whether the last frame of the cluster should be written as - // a Block with Duration. If set to true, then it will result in holding back - // of frames and the parameterized version of Finalize() must be called to - // finish writing the Cluster. - bool write_last_frame_with_duration_; - - // Map used to hold back frames, if required. Track number is the key. - std::map > stored_frames_; - - // Map from track number to the timestamp of the last block written for that - // track. - std::map last_block_timestamp_; - - // Pointer to the writer object. Not owned by this class. - IMkvWriter* writer_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Cluster); -}; - -/////////////////////////////////////////////////////////////// -// SeekHead element -class SeekHead { - public: - SeekHead(); - ~SeekHead(); - - // TODO(fgalligan): Change this to reserve a certain size. Then check how - // big the seek entry to be added is as not every seek entry will be the - // maximum size it could be. - // Adds a seek entry to be written out when the element is finalized. |id| - // must be the coded mkv element id. |pos| is the file position of the - // element. Returns true on success. - bool AddSeekEntry(uint32_t id, uint64_t pos); - - // Writes out SeekHead and SeekEntry elements. Returns true on success. - bool Finalize(IMkvWriter* writer) const; - - // Returns the id of the Seek Entry at the given index. Returns -1 if index is - // out of range. - uint32_t GetId(int index) const; - - // Returns the position of the Seek Entry at the given index. Returns -1 if - // index is out of range. - uint64_t GetPosition(int index) const; - - // Sets the Seek Entry id and position at given index. - // Returns true on success. - bool SetSeekEntry(int index, uint32_t id, uint64_t position); - - // Reserves space by writing out a Void element which will be updated with - // a SeekHead element later. Returns true on success. - bool Write(IMkvWriter* writer); - - // We are going to put a cap on the number of Seek Entries. - const static int32_t kSeekEntryCount = 5; - - private: - // Returns the maximum size in bytes of one seek entry. - uint64_t MaxEntrySize() const; - - // Seek entry id element list. - uint32_t seek_entry_id_[kSeekEntryCount]; - - // Seek entry pos element list. - uint64_t seek_entry_pos_[kSeekEntryCount]; - - // The file position of SeekHead element. - int64_t start_pos_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(SeekHead); -}; - -/////////////////////////////////////////////////////////////// -// Segment Information element -class SegmentInfo { - public: - SegmentInfo(); - ~SegmentInfo(); - - // Will update the duration if |duration_| is > 0.0. Returns true on success. - bool Finalize(IMkvWriter* writer) const; - - // Sets |muxing_app_| and |writing_app_|. - bool Init(); - - // Output the Segment Information element to the writer. Returns true on - // success. - bool Write(IMkvWriter* writer); - - void set_duration(double duration) { duration_ = duration; } - double duration() const { return duration_; } - void set_muxing_app(const char* app); - const char* muxing_app() const { return muxing_app_; } - void set_timecode_scale(uint64_t scale) { timecode_scale_ = scale; } - uint64_t timecode_scale() const { return timecode_scale_; } - void set_writing_app(const char* app); - const char* writing_app() const { return writing_app_; } - void set_date_utc(int64_t date_utc) { date_utc_ = date_utc; } - int64_t date_utc() const { return date_utc_; } - - private: - // Segment Information element names. - // Initially set to -1 to signify that a duration has not been set and should - // not be written out. - double duration_; - // Set to libwebm-%d.%d.%d.%d, major, minor, build, revision. - char* muxing_app_; - uint64_t timecode_scale_; - // Initially set to libwebm-%d.%d.%d.%d, major, minor, build, revision. - char* writing_app_; - // LLONG_MIN when DateUTC is not set. - int64_t date_utc_; - - // The file position of the duration element. - int64_t duration_pos_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(SegmentInfo); -}; - -/////////////////////////////////////////////////////////////// -// This class represents the main segment in a WebM file. Currently only -// supports one Segment element. -// -// Notes: -// |Init| must be called before any other method in this class. -class Segment { - public: - enum Mode { kLive = 0x1, kFile = 0x2 }; - - enum CuesPosition { - kAfterClusters = 0x0, // Position Cues after Clusters - Default - kBeforeClusters = 0x1 // Position Cues before Clusters - }; - - static const uint32_t kDefaultDocTypeVersion = 4; - static const uint64_t kDefaultMaxClusterDuration = 30000000000ULL; - - Segment(); - ~Segment(); - - // Initializes |SegmentInfo| and returns result. Always returns false when - // |ptr_writer| is NULL. - bool Init(IMkvWriter* ptr_writer); - - // Adds a generic track to the segment. Returns the newly-allocated - // track object (which is owned by the segment) on success, NULL on - // error. |number| is the number to use for the track. |number| - // must be >= 0. If |number| == 0 then the muxer will decide on the - // track number. - Track* AddTrack(int32_t number); - - // Adds a Vorbis audio track to the segment. Returns the number of the track - // on success, 0 on error. |number| is the number to use for the audio track. - // |number| must be >= 0. If |number| == 0 then the muxer will decide on - // the track number. - uint64_t AddAudioTrack(int32_t sample_rate, int32_t channels, int32_t number); - - // Adds an empty chapter to the chapters of this segment. Returns - // non-NULL on success. After adding the chapter, the caller should - // populate its fields via the Chapter member functions. - Chapter* AddChapter(); - - // Adds an empty tag to the tags of this segment. Returns - // non-NULL on success. After adding the tag, the caller should - // populate its fields via the Tag member functions. - Tag* AddTag(); - - // Adds a cue point to the Cues element. |timestamp| is the time in - // nanoseconds of the cue's time. |track| is the Track of the Cue. This - // function must be called after AddFrame to calculate the correct - // BlockNumber for the CuePoint. Returns true on success. - bool AddCuePoint(uint64_t timestamp, uint64_t track); - - // Adds a frame to be output in the file. Returns true on success. - // Inputs: - // data: Pointer to the data - // length: Length of the data - // track_number: Track to add the data to. Value returned by Add track - // functions. - // timestamp: Timestamp of the frame in nanoseconds from 0. - // is_key: Flag telling whether or not this frame is a key frame. - bool AddFrame(const uint8_t* data, uint64_t length, uint64_t track_number, - uint64_t timestamp_ns, bool is_key); - - // Writes a frame of metadata to the output medium; returns true on - // success. - // Inputs: - // data: Pointer to the data - // length: Length of the data - // track_number: Track to add the data to. Value returned by Add track - // functions. - // timecode: Absolute timestamp of the metadata frame, expressed - // in nanosecond units. - // duration: Duration of metadata frame, in nanosecond units. - // - // The metadata frame is written as a block group, with a duration - // sub-element but no reference time sub-elements (indicating that - // it is considered a keyframe, per Matroska semantics). - bool AddMetadata(const uint8_t* data, uint64_t length, uint64_t track_number, - uint64_t timestamp_ns, uint64_t duration_ns); - - // Writes a frame with additional data to the output medium; returns true on - // success. - // Inputs: - // data: Pointer to the data. - // length: Length of the data. - // additional: Pointer to additional data. - // additional_length: Length of additional data. - // add_id: Additional ID which identifies the type of additional data. - // track_number: Track to add the data to. Value returned by Add track - // functions. - // timestamp: Absolute timestamp of the frame, expressed in nanosecond - // units. - // is_key: Flag telling whether or not this frame is a key frame. - bool AddFrameWithAdditional(const uint8_t* data, uint64_t length, - const uint8_t* additional, - uint64_t additional_length, uint64_t add_id, - uint64_t track_number, uint64_t timestamp, - bool is_key); - - // Writes a frame with DiscardPadding to the output medium; returns true on - // success. - // Inputs: - // data: Pointer to the data. - // length: Length of the data. - // discard_padding: DiscardPadding element value. - // track_number: Track to add the data to. Value returned by Add track - // functions. - // timestamp: Absolute timestamp of the frame, expressed in nanosecond - // units. - // is_key: Flag telling whether or not this frame is a key frame. - bool AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length, - int64_t discard_padding, - uint64_t track_number, uint64_t timestamp, - bool is_key); - - // Writes a Frame to the output medium. Chooses the correct way of writing - // the frame (Block vs SimpleBlock) based on the parameters passed. - // Inputs: - // frame: frame object - bool AddGenericFrame(const Frame* frame); - - // Adds a VP8 video track to the segment. Returns the number of the track on - // success, 0 on error. |number| is the number to use for the video track. - // |number| must be >= 0. If |number| == 0 then the muxer will decide on - // the track number. - uint64_t AddVideoTrack(int32_t width, int32_t height, int32_t number); - - // This function must be called after Finalize() if you need a copy of the - // output with Cues written before the Clusters. It will return false if the - // writer is not seekable of if chunking is set to true. - // Input parameters: - // reader - an IMkvReader object created with the same underlying file of the - // current writer object. Make sure to close the existing writer - // object before creating this so that all the data is properly - // flushed and available for reading. - // writer - an IMkvWriter object pointing to a *different* file than the one - // pointed by the current writer object. This file will contain the - // Cues element before the Clusters. - bool CopyAndMoveCuesBeforeClusters(mkvparser::IMkvReader* reader, - IMkvWriter* writer); - - // Sets which track to use for the Cues element. Must have added the track - // before calling this function. Returns true on success. |track_number| is - // returned by the Add track functions. - bool CuesTrack(uint64_t track_number); - - // This will force the muxer to create a new Cluster when the next frame is - // added. - void ForceNewClusterOnNextFrame(); - - // Writes out any frames that have not been written out. Finalizes the last - // cluster. May update the size and duration of the segment. May output the - // Cues element. May finalize the SeekHead element. Returns true on success. - bool Finalize(); - - // Returns the Cues object. - Cues* GetCues() { return &cues_; } - - // Returns the Segment Information object. - const SegmentInfo* GetSegmentInfo() const { return &segment_info_; } - SegmentInfo* GetSegmentInfo() { return &segment_info_; } - - // Search the Tracks and return the track that matches |track_number|. - // Returns NULL if there is no track match. - Track* GetTrackByNumber(uint64_t track_number) const; - - // Toggles whether to output a cues element. - void OutputCues(bool output_cues); - - // Toggles whether to write the last frame in each Cluster with Duration. - void AccurateClusterDuration(bool accurate_cluster_duration); - - // Toggles whether to write the Cluster Timecode using exactly 8 bytes. - void UseFixedSizeClusterTimecode(bool fixed_size_cluster_timecode); - - // Sets if the muxer will output files in chunks or not. |chunking| is a - // flag telling whether or not to turn on chunking. |filename| is the base - // filename for the chunk files. The header chunk file will be named - // |filename|.hdr and the data chunks will be named - // |filename|_XXXXXX.chk. Chunking implies that the muxer will be writing - // to files so the muxer will use the default MkvWriter class to control - // what data is written to what files. Returns true on success. - // TODO: Should we change the IMkvWriter Interface to add Open and Close? - // That will force the interface to be dependent on files. - bool SetChunking(bool chunking, const char* filename); - - bool chunking() const { return chunking_; } - uint64_t cues_track() const { return cues_track_; } - void set_max_cluster_duration(uint64_t max_cluster_duration) { - max_cluster_duration_ = max_cluster_duration; - } - uint64_t max_cluster_duration() const { return max_cluster_duration_; } - void set_max_cluster_size(uint64_t max_cluster_size) { - max_cluster_size_ = max_cluster_size; - } - uint64_t max_cluster_size() const { return max_cluster_size_; } - void set_mode(Mode mode) { mode_ = mode; } - Mode mode() const { return mode_; } - CuesPosition cues_position() const { return cues_position_; } - bool output_cues() const { return output_cues_; } - void set_estimate_file_duration(bool estimate_duration) { - estimate_file_duration_ = estimate_duration; - } - bool estimate_file_duration() const { return estimate_file_duration_; } - const SegmentInfo* segment_info() const { return &segment_info_; } - void set_duration(double duration) { duration_ = duration; } - double duration() const { return duration_; } - - // Returns true when codec IDs are valid for WebM. - bool DocTypeIsWebm() const; - - private: - // Checks if header information has been output and initialized. If not it - // will output the Segment element and initialize the SeekHead elment and - // Cues elements. - bool CheckHeaderInfo(); - - // Sets |doc_type_version_| based on the current element requirements. - void UpdateDocTypeVersion(); - - // Sets |name| according to how many chunks have been written. |ext| is the - // file extension. |name| must be deleted by the calling app. Returns true - // on success. - bool UpdateChunkName(const char* ext, char** name) const; - - // Returns the maximum offset within the segment's payload. When chunking - // this function is needed to determine offsets of elements within the - // chunked files. Returns -1 on error. - int64_t MaxOffset(); - - // Adds the frame to our frame array. - bool QueueFrame(Frame* frame); - - // Output all frames that are queued. Returns -1 on error, otherwise - // it returns the number of frames written. - int WriteFramesAll(); - - // Output all frames that are queued that have an end time that is less - // then |timestamp|. Returns true on success and if there are no frames - // queued. - bool WriteFramesLessThan(uint64_t timestamp); - - // Outputs the segment header, Segment Information element, SeekHead element, - // and Tracks element to |writer_|. - bool WriteSegmentHeader(); - - // Given a frame with the specified timestamp (nanosecond units) and - // keyframe status, determine whether a new cluster should be - // created, before writing enqueued frames and the frame itself. The - // function returns one of the following values: - // -1 = error: an out-of-order frame was detected - // 0 = do not create a new cluster, and write frame to the existing cluster - // 1 = create a new cluster, and write frame to that new cluster - // 2 = create a new cluster, and re-run test - int TestFrame(uint64_t track_num, uint64_t timestamp_ns, bool key) const; - - // Create a new cluster, using the earlier of the first enqueued - // frame, or the indicated time. Returns true on success. - bool MakeNewCluster(uint64_t timestamp_ns); - - // Checks whether a new cluster needs to be created, and if so - // creates a new cluster. Returns false if creation of a new cluster - // was necessary but creation was not successful. - bool DoNewClusterProcessing(uint64_t track_num, uint64_t timestamp_ns, - bool key); - - // Adjusts Cue Point values (to place Cues before Clusters) so that they - // reflect the correct offsets. - void MoveCuesBeforeClusters(); - - // This function recursively computes the correct cluster offsets (this is - // done to move the Cues before Clusters). It recursively updates the change - // in size (which indicates a change in cluster offset) until no sizes change. - // Parameters: - // diff - indicates the difference in size of the Cues element that needs to - // accounted for. - // index - index in the list of Cues which is currently being adjusted. - // cue_size - sum of size of all the CuePoint elements. - void MoveCuesBeforeClustersHelper(uint64_t diff, int index, - uint64_t* cue_size); - - // Seeds the random number generator used to make UIDs. - unsigned int seed_; - - // WebM elements - Cues cues_; - SeekHead seek_head_; - SegmentInfo segment_info_; - Tracks tracks_; - Chapters chapters_; - Tags tags_; - - // Number of chunks written. - int chunk_count_; - - // Current chunk filename. - char* chunk_name_; - - // Default MkvWriter object created by this class used for writing clusters - // out in separate files. - MkvWriter* chunk_writer_cluster_; - - // Default MkvWriter object created by this class used for writing Cues - // element out to a file. - MkvWriter* chunk_writer_cues_; - - // Default MkvWriter object created by this class used for writing the - // Matroska header out to a file. - MkvWriter* chunk_writer_header_; - - // Flag telling whether or not the muxer is chunking output to multiple - // files. - bool chunking_; - - // Base filename for the chunked files. - char* chunking_base_name_; - - // File position offset where the Clusters end. - int64_t cluster_end_offset_; - - // List of clusters. - Cluster** cluster_list_; - - // Number of cluster pointers allocated in the cluster list. - int32_t cluster_list_capacity_; - - // Number of clusters in the cluster list. - int32_t cluster_list_size_; - - // Indicates whether Cues should be written before or after Clusters - CuesPosition cues_position_; - - // Track number that is associated with the cues element for this segment. - uint64_t cues_track_; - - // Tells the muxer to force a new cluster on the next Block. - bool force_new_cluster_; - - // List of stored audio frames. These variables are used to store frames so - // the muxer can follow the guideline "Audio blocks that contain the video - // key frame's timecode should be in the same cluster as the video key frame - // block." - Frame** frames_; - - // Number of frame pointers allocated in the frame list. - int32_t frames_capacity_; - - // Number of frames in the frame list. - int32_t frames_size_; - - // Flag telling if a video track has been added to the segment. - bool has_video_; - - // Flag telling if the segment's header has been written. - bool header_written_; - - // Duration of the last block in nanoseconds. - uint64_t last_block_duration_; - - // Last timestamp in nanoseconds added to a cluster. - uint64_t last_timestamp_; - - // Last timestamp in nanoseconds by track number added to a cluster. - uint64_t last_track_timestamp_[kMaxTrackNumber]; - - // Number of frames written per track. - uint64_t track_frames_written_[kMaxTrackNumber]; - - // Maximum time in nanoseconds for a cluster duration. This variable is a - // guideline and some clusters may have a longer duration. Default is 30 - // seconds. - uint64_t max_cluster_duration_; - - // Maximum size in bytes for a cluster. This variable is a guideline and - // some clusters may have a larger size. Default is 0 which signifies that - // the muxer will decide the size. - uint64_t max_cluster_size_; - - // The mode that segment is in. If set to |kLive| the writer must not - // seek backwards. - Mode mode_; - - // Flag telling the muxer that a new cue point should be added. - bool new_cuepoint_; - - // TODO(fgalligan): Should we add support for more than one Cues element? - // Flag whether or not the muxer should output a Cues element. - bool output_cues_; - - // Flag whether or not the last frame in each Cluster will have a Duration - // element in it. - bool accurate_cluster_duration_; - - // Flag whether or not to write the Cluster Timecode using exactly 8 bytes. - bool fixed_size_cluster_timecode_; - - // Flag whether or not to estimate the file duration. - bool estimate_file_duration_; - - // The size of the EBML header, used to validate the header if - // WriteEbmlHeader() is called more than once. - int32_t ebml_header_size_; - - // The file position of the segment's payload. - int64_t payload_pos_; - - // The file position of the element's size. - int64_t size_position_; - - // Current DocTypeVersion (|doc_type_version_|) and that written in - // WriteSegmentHeader(). - // WriteEbmlHeader() will be called from Finalize() if |doc_type_version_| - // differs from |doc_type_version_written_|. - uint32_t doc_type_version_; - uint32_t doc_type_version_written_; - - // If |duration_| is > 0, then explicitly set the duration of the segment. - double duration_; - - // Pointer to the writer objects. Not owned by this class. - IMkvWriter* writer_cluster_; - IMkvWriter* writer_cues_; - IMkvWriter* writer_header_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Segment); -}; - -} // namespace mkvmuxer - -#endif // MKVMUXER_MKVMUXER_H_ diff --git a/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxertypes.h b/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxertypes.h deleted file mode 100644 index e5db121605f6..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxertypes.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - -#ifndef MKVMUXER_MKVMUXERTYPES_H_ -#define MKVMUXER_MKVMUXERTYPES_H_ - -namespace mkvmuxer { -typedef unsigned char uint8; -typedef short int16; -typedef int int32; -typedef unsigned int uint32; -typedef long long int64; -typedef unsigned long long uint64; -} // namespace mkvmuxer - -// Copied from Chromium basictypes.h -// A macro to disallow the copy constructor and operator= functions -// This should be used in the private: declarations for a class -#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&); \ - void operator=(const TypeName&) - -#endif // MKVMUXER_MKVMUXERTYPES_HPP_ diff --git a/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc b/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc deleted file mode 100644 index 355d4e22b3c6..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc +++ /dev/null @@ -1,744 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - -#include "mkvmuxer/mkvmuxerutil.h" - -#ifdef __ANDROID__ -#include -#include -#endif - -#include -#include -#include -#include -#include -#include -#include - -#include "common/webmids.h" -#include "mkvmuxer/mkvmuxer.h" -#include "mkvmuxer/mkvwriter.h" - -namespace mkvmuxer { - -namespace { - -// Date elements are always 8 octets in size. -const int kDateElementSize = 8; - -uint64 WriteBlock(IMkvWriter* writer, const Frame* const frame, int64 timecode, - uint64 timecode_scale) { - uint64 block_additional_elem_size = 0; - uint64 block_addid_elem_size = 0; - uint64 block_more_payload_size = 0; - uint64 block_more_elem_size = 0; - uint64 block_additions_payload_size = 0; - uint64 block_additions_elem_size = 0; - if (frame->additional()) { - block_additional_elem_size = - EbmlElementSize(libwebm::kMkvBlockAdditional, frame->additional(), - frame->additional_length()); - block_addid_elem_size = EbmlElementSize( - libwebm::kMkvBlockAddID, static_cast(frame->add_id())); - - block_more_payload_size = - block_addid_elem_size + block_additional_elem_size; - block_more_elem_size = - EbmlMasterElementSize(libwebm::kMkvBlockMore, block_more_payload_size) + - block_more_payload_size; - block_additions_payload_size = block_more_elem_size; - block_additions_elem_size = - EbmlMasterElementSize(libwebm::kMkvBlockAdditions, - block_additions_payload_size) + - block_additions_payload_size; - } - - uint64 discard_padding_elem_size = 0; - if (frame->discard_padding() != 0) { - discard_padding_elem_size = - EbmlElementSize(libwebm::kMkvDiscardPadding, - static_cast(frame->discard_padding())); - } - - const uint64 reference_block_timestamp = - frame->reference_block_timestamp() / timecode_scale; - uint64 reference_block_elem_size = 0; - if (!frame->is_key()) { - reference_block_elem_size = - EbmlElementSize(libwebm::kMkvReferenceBlock, reference_block_timestamp); - } - - const uint64 duration = frame->duration() / timecode_scale; - uint64 block_duration_elem_size = 0; - if (duration > 0) - block_duration_elem_size = - EbmlElementSize(libwebm::kMkvBlockDuration, duration); - - const uint64 block_payload_size = 4 + frame->length(); - const uint64 block_elem_size = - EbmlMasterElementSize(libwebm::kMkvBlock, block_payload_size) + - block_payload_size; - - const uint64 block_group_payload_size = - block_elem_size + block_additions_elem_size + block_duration_elem_size + - discard_padding_elem_size + reference_block_elem_size; - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockGroup, - block_group_payload_size)) { - return 0; - } - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlock, block_payload_size)) - return 0; - - if (WriteUInt(writer, frame->track_number())) - return 0; - - if (SerializeInt(writer, timecode, 2)) - return 0; - - // For a Block, flags is always 0. - if (SerializeInt(writer, 0, 1)) - return 0; - - if (writer->Write(frame->frame(), static_cast(frame->length()))) - return 0; - - if (frame->additional()) { - if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockAdditions, - block_additions_payload_size)) { - return 0; - } - - if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockMore, - block_more_payload_size)) - return 0; - - if (!WriteEbmlElement(writer, libwebm::kMkvBlockAddID, - static_cast(frame->add_id()))) - return 0; - - if (!WriteEbmlElement(writer, libwebm::kMkvBlockAdditional, - frame->additional(), frame->additional_length())) { - return 0; - } - } - - if (frame->discard_padding() != 0 && - !WriteEbmlElement(writer, libwebm::kMkvDiscardPadding, - static_cast(frame->discard_padding()))) { - return false; - } - - if (!frame->is_key() && - !WriteEbmlElement(writer, libwebm::kMkvReferenceBlock, - reference_block_timestamp)) { - return false; - } - - if (duration > 0 && - !WriteEbmlElement(writer, libwebm::kMkvBlockDuration, duration)) { - return false; - } - return EbmlMasterElementSize(libwebm::kMkvBlockGroup, - block_group_payload_size) + - block_group_payload_size; -} - -uint64 WriteSimpleBlock(IMkvWriter* writer, const Frame* const frame, - int64 timecode) { - if (WriteID(writer, libwebm::kMkvSimpleBlock)) - return 0; - - const int32 size = static_cast(frame->length()) + 4; - if (WriteUInt(writer, size)) - return 0; - - if (WriteUInt(writer, static_cast(frame->track_number()))) - return 0; - - if (SerializeInt(writer, timecode, 2)) - return 0; - - uint64 flags = 0; - if (frame->is_key()) - flags |= 0x80; - - if (SerializeInt(writer, flags, 1)) - return 0; - - if (writer->Write(frame->frame(), static_cast(frame->length()))) - return 0; - - return GetUIntSize(libwebm::kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 + - frame->length(); -} - -} // namespace - -int32 GetCodedUIntSize(uint64 value) { - if (value < 0x000000000000007FULL) - return 1; - else if (value < 0x0000000000003FFFULL) - return 2; - else if (value < 0x00000000001FFFFFULL) - return 3; - else if (value < 0x000000000FFFFFFFULL) - return 4; - else if (value < 0x00000007FFFFFFFFULL) - return 5; - else if (value < 0x000003FFFFFFFFFFULL) - return 6; - else if (value < 0x0001FFFFFFFFFFFFULL) - return 7; - return 8; -} - -int32 GetUIntSize(uint64 value) { - if (value < 0x0000000000000100ULL) - return 1; - else if (value < 0x0000000000010000ULL) - return 2; - else if (value < 0x0000000001000000ULL) - return 3; - else if (value < 0x0000000100000000ULL) - return 4; - else if (value < 0x0000010000000000ULL) - return 5; - else if (value < 0x0001000000000000ULL) - return 6; - else if (value < 0x0100000000000000ULL) - return 7; - return 8; -} - -int32 GetIntSize(int64 value) { - // Doubling the requested value ensures positive values with their high bit - // set are written with 0-padding to avoid flipping the signedness. - const uint64 v = (value < 0) ? value ^ -1LL : value; - return GetUIntSize(2 * v); -} - -uint64 EbmlMasterElementSize(uint64 type, uint64 value) { - // Size of EBML ID - int32 ebml_size = GetUIntSize(type); - - // Datasize - ebml_size += GetCodedUIntSize(value); - - return ebml_size; -} - -uint64 EbmlElementSize(uint64 type, int64 value) { - // Size of EBML ID - int32 ebml_size = GetUIntSize(type); - - // Datasize - ebml_size += GetIntSize(value); - - // Size of Datasize - ebml_size++; - - return ebml_size; -} - -uint64 EbmlElementSize(uint64 type, uint64 value) { - return EbmlElementSize(type, value, 0); -} - -uint64 EbmlElementSize(uint64 type, uint64 value, uint64 fixed_size) { - // Size of EBML ID - uint64 ebml_size = GetUIntSize(type); - - // Datasize - ebml_size += (fixed_size > 0) ? fixed_size : GetUIntSize(value); - - // Size of Datasize - ebml_size++; - - return ebml_size; -} - -uint64 EbmlElementSize(uint64 type, float /* value */) { - // Size of EBML ID - uint64 ebml_size = GetUIntSize(type); - - // Datasize - ebml_size += sizeof(float); - - // Size of Datasize - ebml_size++; - - return ebml_size; -} - -uint64 EbmlElementSize(uint64 type, const char* value) { - if (!value) - return 0; - - // Size of EBML ID - uint64 ebml_size = GetUIntSize(type); - - // Datasize - ebml_size += strlen(value); - - // Size of Datasize - ebml_size += GetCodedUIntSize(strlen(value)); - - return ebml_size; -} - -uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size) { - if (!value) - return 0; - - // Size of EBML ID - uint64 ebml_size = GetUIntSize(type); - - // Datasize - ebml_size += size; - - // Size of Datasize - ebml_size += GetCodedUIntSize(size); - - return ebml_size; -} - -uint64 EbmlDateElementSize(uint64 type) { - // Size of EBML ID - uint64 ebml_size = GetUIntSize(type); - - // Datasize - ebml_size += kDateElementSize; - - // Size of Datasize - ebml_size++; - - return ebml_size; -} - -int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size) { - if (!writer || size < 1 || size > 8) - return -1; - - for (int32 i = 1; i <= size; ++i) { - const int32 byte_count = size - i; - const int32 bit_count = byte_count * 8; - - const int64 bb = value >> bit_count; - const uint8 b = static_cast(bb); - - const int32 status = writer->Write(&b, 1); - - if (status < 0) - return status; - } - - return 0; -} - -int32 SerializeFloat(IMkvWriter* writer, float f) { - if (!writer) - return -1; - - assert(sizeof(uint32) == sizeof(float)); - // This union is merely used to avoid a reinterpret_cast from float& to - // uint32& which will result in violation of strict aliasing. - union U32 { - uint32 u32; - float f; - } value; - value.f = f; - - for (int32 i = 1; i <= 4; ++i) { - const int32 byte_count = 4 - i; - const int32 bit_count = byte_count * 8; - - const uint8 byte = static_cast(value.u32 >> bit_count); - - const int32 status = writer->Write(&byte, 1); - - if (status < 0) - return status; - } - - return 0; -} - -int32 WriteUInt(IMkvWriter* writer, uint64 value) { - if (!writer) - return -1; - - int32 size = GetCodedUIntSize(value); - - return WriteUIntSize(writer, value, size); -} - -int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size) { - if (!writer || size < 0 || size > 8) - return -1; - - if (size > 0) { - const uint64 bit = 1LL << (size * 7); - - if (value > (bit - 2)) - return -1; - - value |= bit; - } else { - size = 1; - int64 bit; - - for (;;) { - bit = 1LL << (size * 7); - const uint64 max = bit - 2; - - if (value <= max) - break; - - ++size; - } - - if (size > 8) - return false; - - value |= bit; - } - - return SerializeInt(writer, value, size); -} - -int32 WriteID(IMkvWriter* writer, uint64 type) { - if (!writer) - return -1; - - writer->ElementStartNotify(type, writer->Position()); - - const int32 size = GetUIntSize(type); - - return SerializeInt(writer, type, size); -} - -bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 type, uint64 size) { - if (!writer) - return false; - - if (WriteID(writer, type)) - return false; - - if (WriteUInt(writer, size)) - return false; - - return true; -} - -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value) { - return WriteEbmlElement(writer, type, value, 0); -} - -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value, - uint64 fixed_size) { - if (!writer) - return false; - - if (WriteID(writer, type)) - return false; - - uint64 size = GetUIntSize(value); - if (fixed_size > 0) { - if (size > fixed_size) - return false; - size = fixed_size; - } - if (WriteUInt(writer, size)) - return false; - - if (SerializeInt(writer, value, static_cast(size))) - return false; - - return true; -} - -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value) { - if (!writer) - return false; - - if (WriteID(writer, type)) - return 0; - - const uint64 size = GetIntSize(value); - if (WriteUInt(writer, size)) - return false; - - if (SerializeInt(writer, value, static_cast(size))) - return false; - - return true; -} - -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value) { - if (!writer) - return false; - - if (WriteID(writer, type)) - return false; - - if (WriteUInt(writer, 4)) - return false; - - if (SerializeFloat(writer, value)) - return false; - - return true; -} - -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value) { - if (!writer || !value) - return false; - - if (WriteID(writer, type)) - return false; - - const uint64 length = strlen(value); - if (WriteUInt(writer, length)) - return false; - - if (writer->Write(value, static_cast(length))) - return false; - - return true; -} - -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value, - uint64 size) { - if (!writer || !value || size < 1) - return false; - - if (WriteID(writer, type)) - return false; - - if (WriteUInt(writer, size)) - return false; - - if (writer->Write(value, static_cast(size))) - return false; - - return true; -} - -bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value) { - if (!writer) - return false; - - if (WriteID(writer, type)) - return false; - - if (WriteUInt(writer, kDateElementSize)) - return false; - - if (SerializeInt(writer, value, kDateElementSize)) - return false; - - return true; -} - -uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame, - Cluster* cluster) { - if (!writer || !frame || !frame->IsValid() || !cluster || - !cluster->timecode_scale()) - return 0; - - // Technically the timecode for a block can be less than the - // timecode for the cluster itself (remember that block timecode - // is a signed, 16-bit integer). However, as a simplification we - // only permit non-negative cluster-relative timecodes for blocks. - const int64 relative_timecode = cluster->GetRelativeTimecode( - frame->timestamp() / cluster->timecode_scale()); - if (relative_timecode < 0 || relative_timecode > kMaxBlockTimecode) - return 0; - - return frame->CanBeSimpleBlock() ? - WriteSimpleBlock(writer, frame, relative_timecode) : - WriteBlock(writer, frame, relative_timecode, - cluster->timecode_scale()); -} - -uint64 WriteVoidElement(IMkvWriter* writer, uint64 size) { - if (!writer) - return false; - - // Subtract one for the void ID and the coded size. - uint64 void_entry_size = size - 1 - GetCodedUIntSize(size - 1); - uint64 void_size = EbmlMasterElementSize(libwebm::kMkvVoid, void_entry_size) + - void_entry_size; - - if (void_size != size) - return 0; - - const int64 payload_position = writer->Position(); - if (payload_position < 0) - return 0; - - if (WriteID(writer, libwebm::kMkvVoid)) - return 0; - - if (WriteUInt(writer, void_entry_size)) - return 0; - - const uint8 value = 0; - for (int32 i = 0; i < static_cast(void_entry_size); ++i) { - if (writer->Write(&value, 1)) - return 0; - } - - const int64 stop_position = writer->Position(); - if (stop_position < 0 || - stop_position - payload_position != static_cast(void_size)) - return 0; - - return void_size; -} - -void GetVersion(int32* major, int32* minor, int32* build, int32* revision) { - *major = 0; - *minor = 2; - *build = 1; - *revision = 0; -} - -uint64 MakeUID(unsigned int* seed) { - uint64 uid = 0; - -#ifdef __MINGW32__ - srand(*seed); -#endif - - for (int i = 0; i < 7; ++i) { // avoid problems with 8-byte values - uid <<= 8; - -// TODO(fgalligan): Move random number generation to platform specific code. -#ifdef _MSC_VER - (void)seed; - const int32 nn = rand(); -#elif __ANDROID__ - (void)seed; - int32 temp_num = 1; - int fd = open("/dev/urandom", O_RDONLY); - if (fd != -1) { - read(fd, &temp_num, sizeof(temp_num)); - close(fd); - } - const int32 nn = temp_num; -#elif defined __MINGW32__ - const int32 nn = rand(); -#else - const int32 nn = rand_r(seed); -#endif - const int32 n = 0xFF & (nn >> 4); // throw away low-order bits - - uid |= n; - } - - return uid; -} - -bool IsMatrixCoefficientsValueValid(uint64_t value) { - switch (value) { - case mkvmuxer::Colour::kGbr: - case mkvmuxer::Colour::kBt709: - case mkvmuxer::Colour::kUnspecifiedMc: - case mkvmuxer::Colour::kReserved: - case mkvmuxer::Colour::kFcc: - case mkvmuxer::Colour::kBt470bg: - case mkvmuxer::Colour::kSmpte170MMc: - case mkvmuxer::Colour::kSmpte240MMc: - case mkvmuxer::Colour::kYcocg: - case mkvmuxer::Colour::kBt2020NonConstantLuminance: - case mkvmuxer::Colour::kBt2020ConstantLuminance: - return true; - } - return false; -} - -bool IsChromaSitingHorzValueValid(uint64_t value) { - switch (value) { - case mkvmuxer::Colour::kUnspecifiedCsh: - case mkvmuxer::Colour::kLeftCollocated: - case mkvmuxer::Colour::kHalfCsh: - return true; - } - return false; -} - -bool IsChromaSitingVertValueValid(uint64_t value) { - switch (value) { - case mkvmuxer::Colour::kUnspecifiedCsv: - case mkvmuxer::Colour::kTopCollocated: - case mkvmuxer::Colour::kHalfCsv: - return true; - } - return false; -} - -bool IsColourRangeValueValid(uint64_t value) { - switch (value) { - case mkvmuxer::Colour::kUnspecifiedCr: - case mkvmuxer::Colour::kBroadcastRange: - case mkvmuxer::Colour::kFullRange: - case mkvmuxer::Colour::kMcTcDefined: - return true; - } - return false; -} - -bool IsTransferCharacteristicsValueValid(uint64_t value) { - switch (value) { - case mkvmuxer::Colour::kIturBt709Tc: - case mkvmuxer::Colour::kUnspecifiedTc: - case mkvmuxer::Colour::kReservedTc: - case mkvmuxer::Colour::kGamma22Curve: - case mkvmuxer::Colour::kGamma28Curve: - case mkvmuxer::Colour::kSmpte170MTc: - case mkvmuxer::Colour::kSmpte240MTc: - case mkvmuxer::Colour::kLinear: - case mkvmuxer::Colour::kLog: - case mkvmuxer::Colour::kLogSqrt: - case mkvmuxer::Colour::kIec6196624: - case mkvmuxer::Colour::kIturBt1361ExtendedColourGamut: - case mkvmuxer::Colour::kIec6196621: - case mkvmuxer::Colour::kIturBt202010bit: - case mkvmuxer::Colour::kIturBt202012bit: - case mkvmuxer::Colour::kSmpteSt2084: - case mkvmuxer::Colour::kSmpteSt4281Tc: - case mkvmuxer::Colour::kAribStdB67Hlg: - return true; - } - return false; -} - -bool IsPrimariesValueValid(uint64_t value) { - switch (value) { - case mkvmuxer::Colour::kReservedP0: - case mkvmuxer::Colour::kIturBt709P: - case mkvmuxer::Colour::kUnspecifiedP: - case mkvmuxer::Colour::kReservedP3: - case mkvmuxer::Colour::kIturBt470M: - case mkvmuxer::Colour::kIturBt470Bg: - case mkvmuxer::Colour::kSmpte170MP: - case mkvmuxer::Colour::kSmpte240MP: - case mkvmuxer::Colour::kFilm: - case mkvmuxer::Colour::kIturBt2020: - case mkvmuxer::Colour::kSmpteSt4281P: - case mkvmuxer::Colour::kJedecP22Phosphors: - return true; - } - return false; -} - -} // namespace mkvmuxer diff --git a/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h b/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h deleted file mode 100644 index 132388da5995..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -#ifndef MKVMUXER_MKVMUXERUTIL_H_ -#define MKVMUXER_MKVMUXERUTIL_H_ - -#include "mkvmuxertypes.h" - -#include "stdint.h" - -namespace mkvmuxer { -class Cluster; -class Frame; -class IMkvWriter; - -// TODO(tomfinegan): mkvmuxer:: integer types continue to be used here because -// changing them causes pain for downstream projects. It would be nice if a -// solution that allows removal of the mkvmuxer:: integer types while avoiding -// pain for downstream users of libwebm. Considering that mkvmuxerutil.{cc,h} -// are really, for the great majority of cases, EBML size calculation and writer -// functions, perhaps a more EBML focused utility would be the way to go as a -// first step. - -const uint64 kEbmlUnknownValue = 0x01FFFFFFFFFFFFFFULL; -const int64 kMaxBlockTimecode = 0x07FFFLL; - -// Writes out |value| in Big Endian order. Returns 0 on success. -int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size); - -// Returns the size in bytes of the element. -int32 GetUIntSize(uint64 value); -int32 GetIntSize(int64 value); -int32 GetCodedUIntSize(uint64 value); -uint64 EbmlMasterElementSize(uint64 type, uint64 value); -uint64 EbmlElementSize(uint64 type, int64 value); -uint64 EbmlElementSize(uint64 type, uint64 value); -uint64 EbmlElementSize(uint64 type, float value); -uint64 EbmlElementSize(uint64 type, const char* value); -uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size); -uint64 EbmlDateElementSize(uint64 type); - -// Returns the size in bytes of the element assuming that the element was -// written using |fixed_size| bytes. If |fixed_size| is set to zero, then it -// computes the necessary number of bytes based on |value|. -uint64 EbmlElementSize(uint64 type, uint64 value, uint64 fixed_size); - -// Creates an EBML coded number from |value| and writes it out. The size of -// the coded number is determined by the value of |value|. |value| must not -// be in a coded form. Returns 0 on success. -int32 WriteUInt(IMkvWriter* writer, uint64 value); - -// Creates an EBML coded number from |value| and writes it out. The size of -// the coded number is determined by the value of |size|. |value| must not -// be in a coded form. Returns 0 on success. -int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size); - -// Output an Mkv master element. Returns true if the element was written. -bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 value, uint64 size); - -// Outputs an Mkv ID, calls |IMkvWriter::ElementStartNotify|, and passes the -// ID to |SerializeInt|. Returns 0 on success. -int32 WriteID(IMkvWriter* writer, uint64 type); - -// Output an Mkv non-master element. Returns true if the element was written. -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value); -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value); -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value); -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value); -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value, - uint64 size); -bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value); - -// Output an Mkv non-master element using fixed size. The element will be -// written out using exactly |fixed_size| bytes. If |fixed_size| is set to zero -// then it computes the necessary number of bytes based on |value|. Returns true -// if the element was written. -bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value, - uint64 fixed_size); - -// Output a Mkv Frame. It decides the correct element to write (Block vs -// SimpleBlock) based on the parameters of the Frame. -uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame, - Cluster* cluster); - -// Output a void element. |size| must be the entire size in bytes that will be -// void. The function will calculate the size of the void header and subtract -// it from |size|. -uint64 WriteVoidElement(IMkvWriter* writer, uint64 size); - -// Returns the version number of the muxer in |major|, |minor|, |build|, -// and |revision|. -void GetVersion(int32* major, int32* minor, int32* build, int32* revision); - -// Returns a random number to be used for UID, using |seed| to seed -// the random-number generator (see POSIX rand_r() for semantics). -uint64 MakeUID(unsigned int* seed); - -// Colour field validation helpers. All return true when |value| is valid. -bool IsMatrixCoefficientsValueValid(uint64_t value); -bool IsChromaSitingHorzValueValid(uint64_t value); -bool IsChromaSitingVertValueValid(uint64_t value); -bool IsColourRangeValueValid(uint64_t value); -bool IsTransferCharacteristicsValueValid(uint64_t value); -bool IsPrimariesValueValid(uint64_t value); - -} // namespace mkvmuxer - -#endif // MKVMUXER_MKVMUXERUTIL_H_ diff --git a/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.cc b/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.cc deleted file mode 100644 index 84655d802a80..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.cc +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - -#include "mkvmuxer/mkvwriter.h" - -#include - -#ifdef _MSC_VER -#include // for _SH_DENYWR -#endif - -namespace mkvmuxer { - -MkvWriter::MkvWriter() : file_(NULL), writer_owns_file_(true) {} - -MkvWriter::MkvWriter(FILE* fp) : file_(fp), writer_owns_file_(false) {} - -MkvWriter::~MkvWriter() { Close(); } - -int32 MkvWriter::Write(const void* buffer, uint32 length) { - if (!file_) - return -1; - - if (length == 0) - return 0; - - if (buffer == NULL) - return -1; - - const size_t bytes_written = fwrite(buffer, 1, length, file_); - - return (bytes_written == length) ? 0 : -1; -} - -bool MkvWriter::Open(const char* filename) { - if (filename == NULL) - return false; - - if (file_) - return false; - -#ifdef _MSC_VER - file_ = _fsopen(filename, "wb", _SH_DENYWR); -#else - file_ = fopen(filename, "wb"); -#endif - if (file_ == NULL) - return false; - return true; -} - -void MkvWriter::Close() { - if (file_ && writer_owns_file_) { - fclose(file_); - } - file_ = NULL; -} - -int64 MkvWriter::Position() const { - if (!file_) - return 0; - -#ifdef _MSC_VER - return _ftelli64(file_); -#else - return ftell(file_); -#endif -} - -int32 MkvWriter::Position(int64 position) { - if (!file_) - return -1; - -#ifdef _MSC_VER - return _fseeki64(file_, position, SEEK_SET); -#else - return fseeko(file_, static_cast(position), SEEK_SET); -#endif -} - -bool MkvWriter::Seekable() const { return true; } - -void MkvWriter::ElementStartNotify(uint64, int64) {} - -} // namespace mkvmuxer diff --git a/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.h b/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.h deleted file mode 100644 index 4227c63748aa..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.h +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - -#ifndef MKVMUXER_MKVWRITER_H_ -#define MKVMUXER_MKVWRITER_H_ - -#include - -#include "mkvmuxer/mkvmuxer.h" -#include "mkvmuxer/mkvmuxertypes.h" - -namespace mkvmuxer { - -// Default implementation of the IMkvWriter interface on Windows. -class MkvWriter : public IMkvWriter { - public: - MkvWriter(); - explicit MkvWriter(FILE* fp); - virtual ~MkvWriter(); - - // IMkvWriter interface - virtual int64 Position() const; - virtual int32 Position(int64 position); - virtual bool Seekable() const; - virtual int32 Write(const void* buffer, uint32 length); - virtual void ElementStartNotify(uint64 element_id, int64 position); - - // Creates and opens a file for writing. |filename| is the name of the file - // to open. This function will overwrite the contents of |filename|. Returns - // true on success. - bool Open(const char* filename); - - // Closes an opened file. - void Close(); - - private: - // File handle to output file. - FILE* file_; - bool writer_owns_file_; - - LIBWEBM_DISALLOW_COPY_AND_ASSIGN(MkvWriter); -}; - -} // namespace mkvmuxer - -#endif // MKVMUXER_MKVWRITER_H_ diff --git a/media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvparser.cc b/media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvparser.cc deleted file mode 100644 index 37f230d0a958..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvparser.cc +++ /dev/null @@ -1,8032 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -#include "mkvparser/mkvparser.h" - -#if defined(_MSC_VER) && _MSC_VER < 1800 -#include // _isnan() / _finite() -#define MSC_COMPAT -#endif - -#include -#include -#include -#include -#include -#include -#include - -#include "common/webmids.h" - -// disable deprecation warnings for auto_ptr -#if defined(__GNUC__) && __GNUC__ >= 5 -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - -namespace mkvparser { -const float MasteringMetadata::kValueNotPresent = FLT_MAX; -const long long Colour::kValueNotPresent = LLONG_MAX; -const float Projection::kValueNotPresent = FLT_MAX; - -#ifdef MSC_COMPAT -inline bool isnan(double val) { return !!_isnan(val); } -inline bool isinf(double val) { return !_finite(val); } -#else -inline bool isnan(double val) { return std::isnan(val); } -inline bool isinf(double val) { return std::isinf(val); } -#endif // MSC_COMPAT - -IMkvReader::~IMkvReader() {} - -template -Type* SafeArrayAlloc(unsigned long long num_elements, - unsigned long long element_size) { - if (num_elements == 0 || element_size == 0) - return NULL; - - const size_t kMaxAllocSize = 0x80000000; // 2GiB - const unsigned long long num_bytes = num_elements * element_size; - if (element_size > (kMaxAllocSize / num_elements)) - return NULL; - if (num_bytes != static_cast(num_bytes)) - return NULL; - - return new (std::nothrow) Type[static_cast(num_bytes)]; -} - -void GetVersion(int& major, int& minor, int& build, int& revision) { - major = 1; - minor = 0; - build = 0; - revision = 30; -} - -long long ReadUInt(IMkvReader* pReader, long long pos, long& len) { - if (!pReader || pos < 0) - return E_FILE_FORMAT_INVALID; - - len = 1; - unsigned char b; - int status = pReader->Read(pos, 1, &b); - - if (status < 0) // error or underflow - return status; - - if (status > 0) // interpreted as "underflow" - return E_BUFFER_NOT_FULL; - - if (b == 0) // we can't handle u-int values larger than 8 bytes - return E_FILE_FORMAT_INVALID; - - unsigned char m = 0x80; - - while (!(b & m)) { - m >>= 1; - ++len; - } - - long long result = b & (~m); - ++pos; - - for (int i = 1; i < len; ++i) { - status = pReader->Read(pos, 1, &b); - - if (status < 0) { - len = 1; - return status; - } - - if (status > 0) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result <<= 8; - result |= b; - - ++pos; - } - - return result; -} - -// Reads an EBML ID and returns it. -// An ID must at least 1 byte long, cannot exceed 4, and its value must be -// greater than 0. -// See known EBML values and EBMLMaxIDLength: -// http://www.matroska.org/technical/specs/index.html -// Returns the ID, or a value less than 0 to report an error while reading the -// ID. -long long ReadID(IMkvReader* pReader, long long pos, long& len) { - if (pReader == NULL || pos < 0) - return E_FILE_FORMAT_INVALID; - - // Read the first byte. The length in bytes of the ID is determined by - // finding the first set bit in the first byte of the ID. - unsigned char temp_byte = 0; - int read_status = pReader->Read(pos, 1, &temp_byte); - - if (read_status < 0) - return E_FILE_FORMAT_INVALID; - else if (read_status > 0) // No data to read. - return E_BUFFER_NOT_FULL; - - if (temp_byte == 0) // ID length > 8 bytes; invalid file. - return E_FILE_FORMAT_INVALID; - - int bit_pos = 0; - const int kMaxIdLengthInBytes = 4; - const int kCheckByte = 0x80; - - // Find the first bit that's set. - bool found_bit = false; - for (; bit_pos < kMaxIdLengthInBytes; ++bit_pos) { - if ((kCheckByte >> bit_pos) & temp_byte) { - found_bit = true; - break; - } - } - - if (!found_bit) { - // The value is too large to be a valid ID. - return E_FILE_FORMAT_INVALID; - } - - // Read the remaining bytes of the ID (if any). - const int id_length = bit_pos + 1; - long long ebml_id = temp_byte; - for (int i = 1; i < id_length; ++i) { - ebml_id <<= 8; - read_status = pReader->Read(pos + i, 1, &temp_byte); - - if (read_status < 0) - return E_FILE_FORMAT_INVALID; - else if (read_status > 0) - return E_BUFFER_NOT_FULL; - - ebml_id |= temp_byte; - } - - len = id_length; - return ebml_id; -} - -long long GetUIntLength(IMkvReader* pReader, long long pos, long& len) { - if (!pReader || pos < 0) - return E_FILE_FORMAT_INVALID; - - long long total, available; - - int status = pReader->Length(&total, &available); - if (status < 0 || (total >= 0 && available > total)) - return E_FILE_FORMAT_INVALID; - - len = 1; - - if (pos >= available) - return pos; // too few bytes available - - unsigned char b; - - status = pReader->Read(pos, 1, &b); - - if (status != 0) - return status; - - if (b == 0) // we can't handle u-int values larger than 8 bytes - return E_FILE_FORMAT_INVALID; - - unsigned char m = 0x80; - - while (!(b & m)) { - m >>= 1; - ++len; - } - - return 0; // success -} - -// TODO(vigneshv): This function assumes that unsigned values never have their -// high bit set. -long long UnserializeUInt(IMkvReader* pReader, long long pos, long long size) { - if (!pReader || pos < 0 || (size <= 0) || (size > 8)) - return E_FILE_FORMAT_INVALID; - - long long result = 0; - - for (long long i = 0; i < size; ++i) { - unsigned char b; - - const long status = pReader->Read(pos, 1, &b); - - if (status < 0) - return status; - - result <<= 8; - result |= b; - - ++pos; - } - - return result; -} - -long UnserializeFloat(IMkvReader* pReader, long long pos, long long size_, - double& result) { - if (!pReader || pos < 0 || ((size_ != 4) && (size_ != 8))) - return E_FILE_FORMAT_INVALID; - - const long size = static_cast(size_); - - unsigned char buf[8]; - - const int status = pReader->Read(pos, size, buf); - - if (status < 0) // error - return status; - - if (size == 4) { - union { - float f; - unsigned long ff; - }; - - ff = 0; - - for (int i = 0;;) { - ff |= buf[i]; - - if (++i >= 4) - break; - - ff <<= 8; - } - - result = f; - } else { - union { - double d; - unsigned long long dd; - }; - - dd = 0; - - for (int i = 0;;) { - dd |= buf[i]; - - if (++i >= 8) - break; - - dd <<= 8; - } - - result = d; - } - - if (mkvparser::isinf(result) || mkvparser::isnan(result)) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -long UnserializeInt(IMkvReader* pReader, long long pos, long long size, - long long& result_ref) { - if (!pReader || pos < 0 || size < 1 || size > 8) - return E_FILE_FORMAT_INVALID; - - signed char first_byte = 0; - const long status = pReader->Read(pos, 1, (unsigned char*)&first_byte); - - if (status < 0) - return status; - - unsigned long long result = first_byte; - ++pos; - - for (long i = 1; i < size; ++i) { - unsigned char b; - - const long status = pReader->Read(pos, 1, &b); - - if (status < 0) - return status; - - result <<= 8; - result |= b; - - ++pos; - } - - result_ref = static_cast(result); - return 0; -} - -long UnserializeString(IMkvReader* pReader, long long pos, long long size, - char*& str) { - delete[] str; - str = NULL; - - if (size >= LONG_MAX || size < 0) - return E_FILE_FORMAT_INVALID; - - // +1 for '\0' terminator - const long required_size = static_cast(size) + 1; - - str = SafeArrayAlloc(1, required_size); - if (str == NULL) - return E_FILE_FORMAT_INVALID; - - unsigned char* const buf = reinterpret_cast(str); - - const long status = pReader->Read(pos, static_cast(size), buf); - - if (status) { - delete[] str; - str = NULL; - - return status; - } - - str[required_size - 1] = '\0'; - return 0; -} - -long ParseElementHeader(IMkvReader* pReader, long long& pos, long long stop, - long long& id, long long& size) { - if (stop >= 0 && pos >= stop) - return E_FILE_FORMAT_INVALID; - - long len; - - id = ReadID(pReader, pos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume id - - if (stop >= 0 && pos >= stop) - return E_FILE_FORMAT_INVALID; - - size = ReadUInt(pReader, pos, len); - - if (size < 0 || len < 1 || len > 8) { - // Invalid: Negative payload size, negative or 0 length integer, or integer - // larger than 64 bits (libwebm cannot handle them). - return E_FILE_FORMAT_INVALID; - } - - // Avoid rolling over pos when very close to LLONG_MAX. - const unsigned long long rollover_check = - static_cast(pos) + len; - if (rollover_check > LLONG_MAX) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume length of size - - // pos now designates payload - - if (stop >= 0 && pos > stop) - return E_FILE_FORMAT_INVALID; - - return 0; // success -} - -bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id, - long long& val) { - if (!pReader || pos < 0) - return false; - - long long total = 0; - long long available = 0; - - const long status = pReader->Length(&total, &available); - if (status < 0 || (total >= 0 && available > total)) - return false; - - long len = 0; - - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (available - pos) > len) - return false; - - if (static_cast(id) != expected_id) - return false; - - pos += len; // consume id - - const long long size = ReadUInt(pReader, pos, len); - if (size < 0 || size > 8 || len < 1 || len > 8 || (available - pos) > len) - return false; - - pos += len; // consume length of size of payload - - val = UnserializeUInt(pReader, pos, size); - if (val < 0) - return false; - - pos += size; // consume size of payload - - return true; -} - -bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id, - unsigned char*& buf, size_t& buflen) { - if (!pReader || pos < 0) - return false; - - long long total = 0; - long long available = 0; - - long status = pReader->Length(&total, &available); - if (status < 0 || (total >= 0 && available > total)) - return false; - - long len = 0; - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (available - pos) > len) - return false; - - if (static_cast(id) != expected_id) - return false; - - pos += len; // consume id - - const long long size = ReadUInt(pReader, pos, len); - if (size < 0 || len <= 0 || len > 8 || (available - pos) > len) - return false; - - unsigned long long rollover_check = - static_cast(pos) + len; - if (rollover_check > LLONG_MAX) - return false; - - pos += len; // consume length of size of payload - - rollover_check = static_cast(pos) + size; - if (rollover_check > LLONG_MAX) - return false; - - if ((pos + size) > available) - return false; - - if (size >= LONG_MAX) - return false; - - const long buflen_ = static_cast(size); - - buf = SafeArrayAlloc(1, buflen_); - if (!buf) - return false; - - status = pReader->Read(pos, buflen_, buf); - if (status != 0) - return false; - - buflen = buflen_; - - pos += size; // consume size of payload - return true; -} - -EBMLHeader::EBMLHeader() : m_docType(NULL) { Init(); } - -EBMLHeader::~EBMLHeader() { delete[] m_docType; } - -void EBMLHeader::Init() { - m_version = 1; - m_readVersion = 1; - m_maxIdLength = 4; - m_maxSizeLength = 8; - - if (m_docType) { - delete[] m_docType; - m_docType = NULL; - } - - m_docTypeVersion = 1; - m_docTypeReadVersion = 1; -} - -long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { - if (!pReader) - return E_FILE_FORMAT_INVALID; - - long long total, available; - - long status = pReader->Length(&total, &available); - - if (status < 0) // error - return status; - - pos = 0; - - // Scan until we find what looks like the first byte of the EBML header. - const long long kMaxScanBytes = (available >= 1024) ? 1024 : available; - const unsigned char kEbmlByte0 = 0x1A; - unsigned char scan_byte = 0; - - while (pos < kMaxScanBytes) { - status = pReader->Read(pos, 1, &scan_byte); - - if (status < 0) // error - return status; - else if (status > 0) - return E_BUFFER_NOT_FULL; - - if (scan_byte == kEbmlByte0) - break; - - ++pos; - } - - long len = 0; - const long long ebml_id = ReadID(pReader, pos, len); - - if (ebml_id == E_BUFFER_NOT_FULL) - return E_BUFFER_NOT_FULL; - - if (len != 4 || ebml_id != libwebm::kMkvEBML) - return E_FILE_FORMAT_INVALID; - - // Move read pos forward to the EBML header size field. - pos += 4; - - // Read length of size field. - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return E_FILE_FORMAT_INVALID; - else if (result > 0) // need more data - return E_BUFFER_NOT_FULL; - - if (len < 1 || len > 8) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && ((total - pos) < len)) - return E_FILE_FORMAT_INVALID; - - if ((available - pos) < len) - return pos + len; // try again later - - // Read the EBML header size. - result = ReadUInt(pReader, pos, len); - - if (result < 0) // error - return result; - - pos += len; // consume size field - - // pos now designates start of payload - - if ((total >= 0) && ((total - pos) < result)) - return E_FILE_FORMAT_INVALID; - - if ((available - pos) < result) - return pos + result; - - const long long end = pos + result; - - Init(); - - while (pos < end) { - long long id, size; - - status = ParseElementHeader(pReader, pos, end, id, size); - - if (status < 0) // error - return status; - - if (size == 0) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvEBMLVersion) { - m_version = UnserializeUInt(pReader, pos, size); - - if (m_version <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvEBMLReadVersion) { - m_readVersion = UnserializeUInt(pReader, pos, size); - - if (m_readVersion <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvEBMLMaxIDLength) { - m_maxIdLength = UnserializeUInt(pReader, pos, size); - - if (m_maxIdLength <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvEBMLMaxSizeLength) { - m_maxSizeLength = UnserializeUInt(pReader, pos, size); - - if (m_maxSizeLength <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDocType) { - if (m_docType) - return E_FILE_FORMAT_INVALID; - - status = UnserializeString(pReader, pos, size, m_docType); - - if (status) // error - return status; - } else if (id == libwebm::kMkvDocTypeVersion) { - m_docTypeVersion = UnserializeUInt(pReader, pos, size); - - if (m_docTypeVersion <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDocTypeReadVersion) { - m_docTypeReadVersion = UnserializeUInt(pReader, pos, size); - - if (m_docTypeReadVersion <= 0) - return E_FILE_FORMAT_INVALID; - } - - pos += size; - } - - if (pos != end) - return E_FILE_FORMAT_INVALID; - - // Make sure DocType, DocTypeReadVersion, and DocTypeVersion are valid. - if (m_docType == NULL || m_docTypeReadVersion <= 0 || m_docTypeVersion <= 0) - return E_FILE_FORMAT_INVALID; - - // Make sure EBMLMaxIDLength and EBMLMaxSizeLength are valid. - if (m_maxIdLength <= 0 || m_maxIdLength > 4 || m_maxSizeLength <= 0 || - m_maxSizeLength > 8) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -Segment::Segment(IMkvReader* pReader, long long elem_start, - // long long elem_size, - long long start, long long size) - : m_pReader(pReader), - m_element_start(elem_start), - // m_element_size(elem_size), - m_start(start), - m_size(size), - m_pos(start), - m_pUnknownSize(0), - m_pSeekHead(NULL), - m_pInfo(NULL), - m_pTracks(NULL), - m_pCues(NULL), - m_pChapters(NULL), - m_pTags(NULL), - m_clusters(NULL), - m_clusterCount(0), - m_clusterPreloadCount(0), - m_clusterSize(0) {} - -Segment::~Segment() { - const long count = m_clusterCount + m_clusterPreloadCount; - - Cluster** i = m_clusters; - Cluster** j = m_clusters + count; - - while (i != j) { - Cluster* const p = *i++; - delete p; - } - - delete[] m_clusters; - - delete m_pTracks; - delete m_pInfo; - delete m_pCues; - delete m_pChapters; - delete m_pTags; - delete m_pSeekHead; -} - -long long Segment::CreateInstance(IMkvReader* pReader, long long pos, - Segment*& pSegment) { - if (pReader == NULL || pos < 0) - return E_PARSE_FAILED; - - pSegment = NULL; - - long long total, available; - - const long status = pReader->Length(&total, &available); - - if (status < 0) // error - return status; - - if (available < 0) - return -1; - - if ((total >= 0) && (available > total)) - return -1; - - // I would assume that in practice this loop would execute - // exactly once, but we allow for other elements (e.g. Void) - // to immediately follow the EBML header. This is fine for - // the source filter case (since the entire file is available), - // but in the splitter case over a network we should probably - // just give up early. We could for example decide only to - // execute this loop a maximum of, say, 10 times. - // TODO: - // There is an implied "give up early" by only parsing up - // to the available limit. We do do that, but only if the - // total file size is unknown. We could decide to always - // use what's available as our limit (irrespective of whether - // we happen to know the total file length). This would have - // as its sense "parse this much of the file before giving up", - // which a slightly different sense from "try to parse up to - // 10 EMBL elements before giving up". - - for (;;) { - if ((total >= 0) && (pos >= total)) - return E_FILE_FORMAT_INVALID; - - // Read ID - long len; - long long result = GetUIntLength(pReader, pos, len); - - if (result) // error, or too few available bytes - return result; - - if ((total >= 0) && ((pos + len) > total)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > available) - return pos + len; - - const long long idpos = pos; - const long long id = ReadID(pReader, pos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID - - // Read Size - - result = GetUIntLength(pReader, pos, len); - - if (result) // error, or too few available bytes - return result; - - if ((total >= 0) && ((pos + len) > total)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > available) - return pos + len; - - long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return size; - - pos += len; // consume length of size of element - - // Pos now points to start of payload - - // Handle "unknown size" for live streaming of webm files. - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (id == libwebm::kMkvSegment) { - if (size == unknown_size) - size = -1; - - else if (total < 0) - size = -1; - - else if ((pos + size) > total) - size = -1; - - pSegment = new (std::nothrow) Segment(pReader, idpos, pos, size); - if (pSegment == NULL) - return E_PARSE_FAILED; - - return 0; // success - } - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && ((pos + size) > total)) - return E_FILE_FORMAT_INVALID; - - if ((pos + size) > available) - return pos + size; - - pos += size; // consume payload - } -} - -long long Segment::ParseHeaders() { - // Outermost (level 0) segment object has been constructed, - // and pos designates start of payload. We need to find the - // inner (level 1) elements. - long long total, available; - - const int status = m_pReader->Length(&total, &available); - - if (status < 0) // error - return status; - - if (total > 0 && available > total) - return E_FILE_FORMAT_INVALID; - - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - if ((segment_stop >= 0 && total >= 0 && segment_stop > total) || - (segment_stop >= 0 && m_pos > segment_stop)) { - return E_FILE_FORMAT_INVALID; - } - - for (;;) { - if ((total >= 0) && (m_pos >= total)) - break; - - if ((segment_stop >= 0) && (m_pos >= segment_stop)) - break; - - long long pos = m_pos; - const long long element_start = pos; - - // Avoid rolling over pos when very close to LLONG_MAX. - unsigned long long rollover_check = pos + 1ULL; - if (rollover_check > LLONG_MAX) - return E_FILE_FORMAT_INVALID; - - if ((pos + 1) > available) - return (pos + 1); - - long len; - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return result; - - if (result > 0) { - // MkvReader doesn't have enough data to satisfy this read attempt. - return (pos + 1); - } - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > available) - return pos + len; - - const long long idpos = pos; - const long long id = ReadID(m_pReader, idpos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvCluster) - break; - - pos += len; // consume ID - - if ((pos + 1) > available) - return (pos + 1); - - // Read Size - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return result; - - if (result > 0) { - // MkvReader doesn't have enough data to satisfy this read attempt. - return (pos + 1); - } - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > available) - return pos + len; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0 || len < 1 || len > 8) { - // TODO(tomfinegan): ReadUInt should return an error when len is < 1 or - // len > 8 is true instead of checking this _everywhere_. - return size; - } - - pos += len; // consume length of size of element - - // Avoid rolling over pos when very close to LLONG_MAX. - rollover_check = static_cast(pos) + size; - if (rollover_check > LLONG_MAX) - return E_FILE_FORMAT_INVALID; - - const long long element_size = size + pos - element_start; - - // Pos now points to start of payload - - if ((segment_stop >= 0) && ((pos + size) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - // We read EBML elements either in total or nothing at all. - - if ((pos + size) > available) - return pos + size; - - if (id == libwebm::kMkvInfo) { - if (m_pInfo) - return E_FILE_FORMAT_INVALID; - - m_pInfo = new (std::nothrow) - SegmentInfo(this, pos, size, element_start, element_size); - - if (m_pInfo == NULL) - return -1; - - const long status = m_pInfo->Parse(); - - if (status) - return status; - } else if (id == libwebm::kMkvTracks) { - if (m_pTracks) - return E_FILE_FORMAT_INVALID; - - m_pTracks = new (std::nothrow) - Tracks(this, pos, size, element_start, element_size); - - if (m_pTracks == NULL) - return -1; - - const long status = m_pTracks->Parse(); - - if (status) - return status; - } else if (id == libwebm::kMkvCues) { - if (m_pCues == NULL) { - m_pCues = new (std::nothrow) - Cues(this, pos, size, element_start, element_size); - - if (m_pCues == NULL) - return -1; - } - } else if (id == libwebm::kMkvSeekHead) { - if (m_pSeekHead == NULL) { - m_pSeekHead = new (std::nothrow) - SeekHead(this, pos, size, element_start, element_size); - - if (m_pSeekHead == NULL) - return -1; - - const long status = m_pSeekHead->Parse(); - - if (status) - return status; - } - } else if (id == libwebm::kMkvChapters) { - if (m_pChapters == NULL) { - m_pChapters = new (std::nothrow) - Chapters(this, pos, size, element_start, element_size); - - if (m_pChapters == NULL) - return -1; - - const long status = m_pChapters->Parse(); - - if (status) - return status; - } - } else if (id == libwebm::kMkvTags) { - if (m_pTags == NULL) { - m_pTags = new (std::nothrow) - Tags(this, pos, size, element_start, element_size); - - if (m_pTags == NULL) - return -1; - - const long status = m_pTags->Parse(); - - if (status) - return status; - } - } - - m_pos = pos + size; // consume payload - } - - if (segment_stop >= 0 && m_pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - if (m_pInfo == NULL) // TODO: liberalize this behavior - return E_FILE_FORMAT_INVALID; - - if (m_pTracks == NULL) - return E_FILE_FORMAT_INVALID; - - return 0; // success -} - -long Segment::LoadCluster(long long& pos, long& len) { - for (;;) { - const long result = DoLoadCluster(pos, len); - - if (result <= 1) - return result; - } -} - -long Segment::DoLoadCluster(long long& pos, long& len) { - if (m_pos < 0) - return DoLoadClusterUnknownSize(pos, len); - - long long total, avail; - - long status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - if (total >= 0 && avail > total) - return E_FILE_FORMAT_INVALID; - - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - long long cluster_off = -1; // offset relative to start of segment - long long cluster_size = -1; // size of cluster payload - - for (;;) { - if ((total >= 0) && (m_pos >= total)) - return 1; // no more clusters - - if ((segment_stop >= 0) && (m_pos >= segment_stop)) - return 1; // no more clusters - - pos = m_pos; - - // Read ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long idpos = pos; - const long long id = ReadID(m_pReader, idpos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - pos += len; // consume length of size of element - - // pos now points to start of payload - - if (size == 0) { - // Missing element payload: move on. - m_pos = pos; - continue; - } - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if ((segment_stop >= 0) && (size != unknown_size) && - ((pos + size) > segment_stop)) { - return E_FILE_FORMAT_INVALID; - } - - if (id == libwebm::kMkvCues) { - if (size == unknown_size) { - // Cues element of unknown size: Not supported. - return E_FILE_FORMAT_INVALID; - } - - if (m_pCues == NULL) { - const long long element_size = (pos - idpos) + size; - - m_pCues = new (std::nothrow) Cues(this, pos, size, idpos, element_size); - if (m_pCues == NULL) - return -1; - } - - m_pos = pos + size; // consume payload - continue; - } - - if (id != libwebm::kMkvCluster) { - // Besides the Segment, Libwebm allows only cluster elements of unknown - // size. Fail the parse upon encountering a non-cluster element reporting - // unknown size. - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - m_pos = pos + size; // consume payload - continue; - } - - // We have a cluster. - - cluster_off = idpos - m_start; // relative pos - - if (size != unknown_size) - cluster_size = size; - - break; - } - - if (cluster_off < 0) { - // No cluster, die. - return E_FILE_FORMAT_INVALID; - } - - long long pos_; - long len_; - - status = Cluster::HasBlockEntries(this, cluster_off, pos_, len_); - - if (status < 0) { // error, or underflow - pos = pos_; - len = len_; - - return status; - } - - // status == 0 means "no block entries found" - // status > 0 means "found at least one block entry" - - // TODO: - // The issue here is that the segment increments its own - // pos ptr past the most recent cluster parsed, and then - // starts from there to parse the next cluster. If we - // don't know the size of the current cluster, then we - // must either parse its payload (as we do below), looking - // for the cluster (or cues) ID to terminate the parse. - // This isn't really what we want: rather, we really need - // a way to create the curr cluster object immediately. - // The pity is that cluster::parse can determine its own - // boundary, and we largely duplicate that same logic here. - // - // Maybe we need to get rid of our look-ahead preloading - // in source::parse??? - // - // As we're parsing the blocks in the curr cluster - //(in cluster::parse), we should have some way to signal - // to the segment that we have determined the boundary, - // so it can adjust its own segment::m_pos member. - // - // The problem is that we're asserting in asyncreadinit, - // because we adjust the pos down to the curr seek pos, - // and the resulting adjusted len is > 2GB. I'm suspicious - // that this is even correct, but even if it is, we can't - // be loading that much data in the cache anyway. - - const long idx = m_clusterCount; - - if (m_clusterPreloadCount > 0) { - if (idx >= m_clusterSize) - return E_FILE_FORMAT_INVALID; - - Cluster* const pCluster = m_clusters[idx]; - if (pCluster == NULL || pCluster->m_index >= 0) - return E_FILE_FORMAT_INVALID; - - const long long off = pCluster->GetPosition(); - if (off < 0) - return E_FILE_FORMAT_INVALID; - - if (off == cluster_off) { // preloaded already - if (status == 0) // no entries found - return E_FILE_FORMAT_INVALID; - - if (cluster_size >= 0) - pos += cluster_size; - else { - const long long element_size = pCluster->GetElementSize(); - - if (element_size <= 0) - return E_FILE_FORMAT_INVALID; // TODO: handle this case - - pos = pCluster->m_element_start + element_size; - } - - pCluster->m_index = idx; // move from preloaded to loaded - ++m_clusterCount; - --m_clusterPreloadCount; - - m_pos = pos; // consume payload - if (segment_stop >= 0 && m_pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - return 0; // success - } - } - - if (status == 0) { // no entries found - if (cluster_size >= 0) - pos += cluster_size; - - if ((total >= 0) && (pos >= total)) { - m_pos = total; - return 1; // no more clusters - } - - if ((segment_stop >= 0) && (pos >= segment_stop)) { - m_pos = segment_stop; - return 1; // no more clusters - } - - m_pos = pos; - return 2; // try again - } - - // status > 0 means we have an entry - - Cluster* const pCluster = Cluster::Create(this, idx, cluster_off); - if (pCluster == NULL) - return -1; - - if (!AppendCluster(pCluster)) { - delete pCluster; - return -1; - } - - if (cluster_size >= 0) { - pos += cluster_size; - - m_pos = pos; - - if (segment_stop > 0 && m_pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - return 0; - } - - m_pUnknownSize = pCluster; - m_pos = -pos; - - return 0; // partial success, since we have a new cluster - - // status == 0 means "no block entries found" - // pos designates start of payload - // m_pos has NOT been adjusted yet (in case we need to come back here) -} - -long Segment::DoLoadClusterUnknownSize(long long& pos, long& len) { - if (m_pos >= 0 || m_pUnknownSize == NULL) - return E_PARSE_FAILED; - - const long status = m_pUnknownSize->Parse(pos, len); - - if (status < 0) // error or underflow - return status; - - if (status == 0) // parsed a block - return 2; // continue parsing - - const long long start = m_pUnknownSize->m_element_start; - const long long size = m_pUnknownSize->GetElementSize(); - - if (size < 0) - return E_FILE_FORMAT_INVALID; - - pos = start + size; - m_pos = pos; - - m_pUnknownSize = 0; - - return 2; // continue parsing -} - -bool Segment::AppendCluster(Cluster* pCluster) { - if (pCluster == NULL || pCluster->m_index < 0) - return false; - - const long count = m_clusterCount + m_clusterPreloadCount; - - long& size = m_clusterSize; - const long idx = pCluster->m_index; - - if (size < count || idx != m_clusterCount) - return false; - - if (count >= size) { - const long n = (size <= 0) ? 2048 : 2 * size; - - Cluster** const qq = new (std::nothrow) Cluster*[n]; - if (qq == NULL) - return false; - - Cluster** q = qq; - Cluster** p = m_clusters; - Cluster** const pp = p + count; - - while (p != pp) - *q++ = *p++; - - delete[] m_clusters; - - m_clusters = qq; - size = n; - } - - if (m_clusterPreloadCount > 0) { - Cluster** const p = m_clusters + m_clusterCount; - if (*p == NULL || (*p)->m_index >= 0) - return false; - - Cluster** q = p + m_clusterPreloadCount; - if (q >= (m_clusters + size)) - return false; - - for (;;) { - Cluster** const qq = q - 1; - if ((*qq)->m_index >= 0) - return false; - - *q = *qq; - q = qq; - - if (q == p) - break; - } - } - - m_clusters[idx] = pCluster; - ++m_clusterCount; - return true; -} - -bool Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) { - if (pCluster == NULL || pCluster->m_index >= 0 || idx < m_clusterCount) - return false; - - const long count = m_clusterCount + m_clusterPreloadCount; - - long& size = m_clusterSize; - if (size < count) - return false; - - if (count >= size) { - const long n = (size <= 0) ? 2048 : 2 * size; - - Cluster** const qq = new (std::nothrow) Cluster*[n]; - if (qq == NULL) - return false; - Cluster** q = qq; - - Cluster** p = m_clusters; - Cluster** const pp = p + count; - - while (p != pp) - *q++ = *p++; - - delete[] m_clusters; - - m_clusters = qq; - size = n; - } - - if (m_clusters == NULL) - return false; - - Cluster** const p = m_clusters + idx; - - Cluster** q = m_clusters + count; - if (q < p || q >= (m_clusters + size)) - return false; - - while (q > p) { - Cluster** const qq = q - 1; - - if ((*qq)->m_index >= 0) - return false; - - *q = *qq; - q = qq; - } - - m_clusters[idx] = pCluster; - ++m_clusterPreloadCount; - return true; -} - -long Segment::Load() { - if (m_clusters != NULL || m_clusterSize != 0 || m_clusterCount != 0) - return E_PARSE_FAILED; - - // Outermost (level 0) segment object has been constructed, - // and pos designates start of payload. We need to find the - // inner (level 1) elements. - - const long long header_status = ParseHeaders(); - - if (header_status < 0) // error - return static_cast(header_status); - - if (header_status > 0) // underflow - return E_BUFFER_NOT_FULL; - - if (m_pInfo == NULL || m_pTracks == NULL) - return E_FILE_FORMAT_INVALID; - - for (;;) { - const long status = LoadCluster(); - - if (status < 0) // error - return status; - - if (status >= 1) // no more clusters - return 0; - } -} - -SeekHead::Entry::Entry() : id(0), pos(0), element_start(0), element_size(0) {} - -SeekHead::SeekHead(Segment* pSegment, long long start, long long size_, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(start), - m_size(size_), - m_element_start(element_start), - m_element_size(element_size), - m_entries(0), - m_entry_count(0), - m_void_elements(0), - m_void_element_count(0) {} - -SeekHead::~SeekHead() { - delete[] m_entries; - delete[] m_void_elements; -} - -long SeekHead::Parse() { - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = m_start; - const long long stop = m_start + m_size; - - // first count the seek head entries - - int entry_count = 0; - int void_element_count = 0; - - while (pos < stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvSeek) - ++entry_count; - else if (id == libwebm::kMkvVoid) - ++void_element_count; - - pos += size; // consume payload - - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - if (entry_count > 0) { - m_entries = new (std::nothrow) Entry[entry_count]; - - if (m_entries == NULL) - return -1; - } - - if (void_element_count > 0) { - m_void_elements = new (std::nothrow) VoidElement[void_element_count]; - - if (m_void_elements == NULL) - return -1; - } - - // now parse the entries and void elements - - Entry* pEntry = m_entries; - VoidElement* pVoidElement = m_void_elements; - - pos = m_start; - - while (pos < stop) { - const long long idpos = pos; - - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvSeek && entry_count > 0) { - if (ParseEntry(pReader, pos, size, pEntry)) { - Entry& e = *pEntry++; - - e.element_start = idpos; - e.element_size = (pos + size) - idpos; - } - } else if (id == libwebm::kMkvVoid && void_element_count > 0) { - VoidElement& e = *pVoidElement++; - - e.element_start = idpos; - e.element_size = (pos + size) - idpos; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries); - assert(count_ >= 0); - assert(count_ <= entry_count); - - m_entry_count = static_cast(count_); - - count_ = ptrdiff_t(pVoidElement - m_void_elements); - assert(count_ >= 0); - assert(count_ <= void_element_count); - - m_void_element_count = static_cast(count_); - - return 0; -} - -int SeekHead::GetCount() const { return m_entry_count; } - -const SeekHead::Entry* SeekHead::GetEntry(int idx) const { - if (idx < 0) - return 0; - - if (idx >= m_entry_count) - return 0; - - return m_entries + idx; -} - -int SeekHead::GetVoidElementCount() const { return m_void_element_count; } - -const SeekHead::VoidElement* SeekHead::GetVoidElement(int idx) const { - if (idx < 0) - return 0; - - if (idx >= m_void_element_count) - return 0; - - return m_void_elements + idx; -} - -long Segment::ParseCues(long long off, long long& pos, long& len) { - if (m_pCues) - return 0; // success - - if (off < 0) - return -1; - - long long total, avail; - - const int status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - pos = m_start + off; - - if ((total < 0) || (pos >= total)) - return 1; // don't bother parsing cues - - const long long element_start = pos; - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // underflow (weird) - { - len = 1; - return E_BUFFER_NOT_FULL; - } - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long idpos = pos; - - const long long id = ReadID(m_pReader, idpos, len); - - if (id != libwebm::kMkvCues) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID - assert((segment_stop < 0) || (pos <= segment_stop)); - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // underflow (weird) - { - len = 1; - return E_BUFFER_NOT_FULL; - } - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - if (size == 0) // weird, although technically not illegal - return 1; // done - - pos += len; // consume length of size of element - assert((segment_stop < 0) || (pos <= segment_stop)); - - // Pos now points to start of payload - - const long long element_stop = pos + size; - - if ((segment_stop >= 0) && (element_stop > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && (element_stop > total)) - return 1; // don't bother parsing anymore - - len = static_cast(size); - - if (element_stop > avail) - return E_BUFFER_NOT_FULL; - - const long long element_size = element_stop - element_start; - - m_pCues = - new (std::nothrow) Cues(this, pos, size, element_start, element_size); - if (m_pCues == NULL) - return -1; - - return 0; // success -} - -bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_, - Entry* pEntry) { - if (size_ <= 0) - return false; - - long long pos = start; - const long long stop = start + size_; - - long len; - - // parse the container for the level-1 element ID - - const long long seekIdId = ReadID(pReader, pos, len); - if (seekIdId < 0) - return false; - - if (seekIdId != libwebm::kMkvSeekID) - return false; - - if ((pos + len) > stop) - return false; - - pos += len; // consume SeekID id - - const long long seekIdSize = ReadUInt(pReader, pos, len); - - if (seekIdSize <= 0) - return false; - - if ((pos + len) > stop) - return false; - - pos += len; // consume size of field - - if ((pos + seekIdSize) > stop) - return false; - - pEntry->id = ReadID(pReader, pos, len); // payload - - if (pEntry->id <= 0) - return false; - - if (len != seekIdSize) - return false; - - pos += seekIdSize; // consume SeekID payload - - const long long seekPosId = ReadID(pReader, pos, len); - - if (seekPosId != libwebm::kMkvSeekPosition) - return false; - - if ((pos + len) > stop) - return false; - - pos += len; // consume id - - const long long seekPosSize = ReadUInt(pReader, pos, len); - - if (seekPosSize <= 0) - return false; - - if ((pos + len) > stop) - return false; - - pos += len; // consume size - - if ((pos + seekPosSize) > stop) - return false; - - pEntry->pos = UnserializeUInt(pReader, pos, seekPosSize); - - if (pEntry->pos < 0) - return false; - - pos += seekPosSize; // consume payload - - if (pos != stop) - return false; - - return true; -} - -Cues::Cues(Segment* pSegment, long long start_, long long size_, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(start_), - m_size(size_), - m_element_start(element_start), - m_element_size(element_size), - m_cue_points(NULL), - m_count(0), - m_preload_count(0), - m_pos(start_) {} - -Cues::~Cues() { - const long n = m_count + m_preload_count; - - CuePoint** p = m_cue_points; - CuePoint** const q = p + n; - - while (p != q) { - CuePoint* const pCP = *p++; - assert(pCP); - - delete pCP; - } - - delete[] m_cue_points; -} - -long Cues::GetCount() const { - if (m_cue_points == NULL) - return -1; - - return m_count; // TODO: really ignore preload count? -} - -bool Cues::DoneParsing() const { - const long long stop = m_start + m_size; - return (m_pos >= stop); -} - -bool Cues::Init() const { - if (m_cue_points) - return true; - - if (m_count != 0 || m_preload_count != 0) - return false; - - IMkvReader* const pReader = m_pSegment->m_pReader; - - const long long stop = m_start + m_size; - long long pos = m_start; - - long cue_points_size = 0; - - while (pos < stop) { - const long long idpos = pos; - - long len; - - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (pos + len) > stop) { - return false; - } - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - if (size < 0 || (pos + len > stop)) { - return false; - } - - pos += len; // consume Size field - if (pos + size > stop) { - return false; - } - - if (id == libwebm::kMkvCuePoint) { - if (!PreloadCuePoint(cue_points_size, idpos)) - return false; - } - - pos += size; // skip payload - } - return true; -} - -bool Cues::PreloadCuePoint(long& cue_points_size, long long pos) const { - if (m_count != 0) - return false; - - if (m_preload_count >= cue_points_size) { - const long n = (cue_points_size <= 0) ? 2048 : 2 * cue_points_size; - - CuePoint** const qq = new (std::nothrow) CuePoint*[n]; - if (qq == NULL) - return false; - - CuePoint** q = qq; // beginning of target - - CuePoint** p = m_cue_points; // beginning of source - CuePoint** const pp = p + m_preload_count; // end of source - - while (p != pp) - *q++ = *p++; - - delete[] m_cue_points; - - m_cue_points = qq; - cue_points_size = n; - } - - CuePoint* const pCP = new (std::nothrow) CuePoint(m_preload_count, pos); - if (pCP == NULL) - return false; - - m_cue_points[m_preload_count++] = pCP; - return true; -} - -bool Cues::LoadCuePoint() const { - const long long stop = m_start + m_size; - - if (m_pos >= stop) - return false; // nothing else to do - - if (!Init()) { - m_pos = stop; - return false; - } - - IMkvReader* const pReader = m_pSegment->m_pReader; - - while (m_pos < stop) { - const long long idpos = m_pos; - - long len; - - const long long id = ReadID(pReader, m_pos, len); - if (id < 0 || (m_pos + len) > stop) - return false; - - m_pos += len; // consume ID - - const long long size = ReadUInt(pReader, m_pos, len); - if (size < 0 || (m_pos + len) > stop) - return false; - - m_pos += len; // consume Size field - if ((m_pos + size) > stop) - return false; - - if (id != libwebm::kMkvCuePoint) { - m_pos += size; // consume payload - if (m_pos > stop) - return false; - - continue; - } - - if (m_preload_count < 1) - return false; - - CuePoint* const pCP = m_cue_points[m_count]; - if (!pCP || (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos))) - return false; - - if (!pCP->Load(pReader)) { - m_pos = stop; - return false; - } - ++m_count; - --m_preload_count; - - m_pos += size; // consume payload - if (m_pos > stop) - return false; - - return true; // yes, we loaded a cue point - } - - return false; // no, we did not load a cue point -} - -bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP, - const CuePoint::TrackPosition*& pTP) const { - if (time_ns < 0 || pTrack == NULL || m_cue_points == NULL || m_count == 0) - return false; - - CuePoint** const ii = m_cue_points; - CuePoint** i = ii; - - CuePoint** const jj = ii + m_count; - CuePoint** j = jj; - - pCP = *i; - if (pCP == NULL) - return false; - - if (time_ns <= pCP->GetTime(m_pSegment)) { - pTP = pCP->Find(pTrack); - return (pTP != NULL); - } - - while (i < j) { - // INVARIANT: - //[ii, i) <= time_ns - //[i, j) ? - //[j, jj) > time_ns - - CuePoint** const k = i + (j - i) / 2; - if (k >= jj) - return false; - - CuePoint* const pCP = *k; - if (pCP == NULL) - return false; - - const long long t = pCP->GetTime(m_pSegment); - - if (t <= time_ns) - i = k + 1; - else - j = k; - - if (i > j) - return false; - } - - if (i != j || i > jj || i <= ii) - return false; - - pCP = *--i; - - if (pCP == NULL || pCP->GetTime(m_pSegment) > time_ns) - return false; - - // TODO: here and elsewhere, it's probably not correct to search - // for the cue point with this time, and then search for a matching - // track. In principle, the matching track could be on some earlier - // cue point, and with our current algorithm, we'd miss it. To make - // this bullet-proof, we'd need to create a secondary structure, - // with a list of cue points that apply to a track, and then search - // that track-based structure for a matching cue point. - - pTP = pCP->Find(pTrack); - return (pTP != NULL); -} - -const CuePoint* Cues::GetFirst() const { - if (m_cue_points == NULL || m_count == 0) - return NULL; - - CuePoint* const* const pp = m_cue_points; - if (pp == NULL) - return NULL; - - CuePoint* const pCP = pp[0]; - if (pCP == NULL || pCP->GetTimeCode() < 0) - return NULL; - - return pCP; -} - -const CuePoint* Cues::GetLast() const { - if (m_cue_points == NULL || m_count <= 0) - return NULL; - - const long index = m_count - 1; - - CuePoint* const* const pp = m_cue_points; - if (pp == NULL) - return NULL; - - CuePoint* const pCP = pp[index]; - if (pCP == NULL || pCP->GetTimeCode() < 0) - return NULL; - - return pCP; -} - -const CuePoint* Cues::GetNext(const CuePoint* pCurr) const { - if (pCurr == NULL || pCurr->GetTimeCode() < 0 || m_cue_points == NULL || - m_count < 1) { - return NULL; - } - - long index = pCurr->m_index; - if (index >= m_count) - return NULL; - - CuePoint* const* const pp = m_cue_points; - if (pp == NULL || pp[index] != pCurr) - return NULL; - - ++index; - - if (index >= m_count) - return NULL; - - CuePoint* const pNext = pp[index]; - - if (pNext == NULL || pNext->GetTimeCode() < 0) - return NULL; - - return pNext; -} - -const BlockEntry* Cues::GetBlock(const CuePoint* pCP, - const CuePoint::TrackPosition* pTP) const { - if (pCP == NULL || pTP == NULL) - return NULL; - - return m_pSegment->GetBlock(*pCP, *pTP); -} - -const BlockEntry* Segment::GetBlock(const CuePoint& cp, - const CuePoint::TrackPosition& tp) { - Cluster** const ii = m_clusters; - Cluster** i = ii; - - const long count = m_clusterCount + m_clusterPreloadCount; - - Cluster** const jj = ii + count; - Cluster** j = jj; - - while (i < j) { - // INVARIANT: - //[ii, i) < pTP->m_pos - //[i, j) ? - //[j, jj) > pTP->m_pos - - Cluster** const k = i + (j - i) / 2; - assert(k < jj); - - Cluster* const pCluster = *k; - assert(pCluster); - - // const long long pos_ = pCluster->m_pos; - // assert(pos_); - // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1); - - const long long pos = pCluster->GetPosition(); - assert(pos >= 0); - - if (pos < tp.m_pos) - i = k + 1; - else if (pos > tp.m_pos) - j = k; - else - return pCluster->GetEntry(cp, tp); - } - - assert(i == j); - // assert(Cluster::HasBlockEntries(this, tp.m_pos)); - - Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos); //, -1); - if (pCluster == NULL) - return NULL; - - const ptrdiff_t idx = i - m_clusters; - - if (!PreloadCluster(pCluster, idx)) { - delete pCluster; - return NULL; - } - assert(m_clusters); - assert(m_clusterPreloadCount > 0); - assert(m_clusters[idx] == pCluster); - - return pCluster->GetEntry(cp, tp); -} - -const Cluster* Segment::FindOrPreloadCluster(long long requested_pos) { - if (requested_pos < 0) - return 0; - - Cluster** const ii = m_clusters; - Cluster** i = ii; - - const long count = m_clusterCount + m_clusterPreloadCount; - - Cluster** const jj = ii + count; - Cluster** j = jj; - - while (i < j) { - // INVARIANT: - //[ii, i) < pTP->m_pos - //[i, j) ? - //[j, jj) > pTP->m_pos - - Cluster** const k = i + (j - i) / 2; - assert(k < jj); - - Cluster* const pCluster = *k; - assert(pCluster); - - // const long long pos_ = pCluster->m_pos; - // assert(pos_); - // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1); - - const long long pos = pCluster->GetPosition(); - assert(pos >= 0); - - if (pos < requested_pos) - i = k + 1; - else if (pos > requested_pos) - j = k; - else - return pCluster; - } - - assert(i == j); - // assert(Cluster::HasBlockEntries(this, tp.m_pos)); - - Cluster* const pCluster = Cluster::Create(this, -1, requested_pos); - if (pCluster == NULL) - return NULL; - - const ptrdiff_t idx = i - m_clusters; - - if (!PreloadCluster(pCluster, idx)) { - delete pCluster; - return NULL; - } - assert(m_clusters); - assert(m_clusterPreloadCount > 0); - assert(m_clusters[idx] == pCluster); - - return pCluster; -} - -CuePoint::CuePoint(long idx, long long pos) - : m_element_start(0), - m_element_size(0), - m_index(idx), - m_timecode(-1 * pos), - m_track_positions(NULL), - m_track_positions_count(0) { - assert(pos > 0); -} - -CuePoint::~CuePoint() { delete[] m_track_positions; } - -bool CuePoint::Load(IMkvReader* pReader) { - // odbgstream os; - // os << "CuePoint::Load(begin): timecode=" << m_timecode << endl; - - if (m_timecode >= 0) // already loaded - return true; - - assert(m_track_positions == NULL); - assert(m_track_positions_count == 0); - - long long pos_ = -m_timecode; - const long long element_start = pos_; - - long long stop; - - { - long len; - - const long long id = ReadID(pReader, pos_, len); - if (id != libwebm::kMkvCuePoint) - return false; - - pos_ += len; // consume ID - - const long long size = ReadUInt(pReader, pos_, len); - assert(size >= 0); - - pos_ += len; // consume Size field - // pos_ now points to start of payload - - stop = pos_ + size; - } - - const long long element_size = stop - element_start; - - long long pos = pos_; - - // First count number of track positions - - while (pos < stop) { - long len; - - const long long id = ReadID(pReader, pos, len); - if ((id < 0) || (pos + len > stop)) { - return false; - } - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - if ((size < 0) || (pos + len > stop)) { - return false; - } - - pos += len; // consume Size field - if ((pos + size) > stop) { - return false; - } - - if (id == libwebm::kMkvCueTime) - m_timecode = UnserializeUInt(pReader, pos, size); - - else if (id == libwebm::kMkvCueTrackPositions) - ++m_track_positions_count; - - pos += size; // consume payload - } - - if (m_timecode < 0 || m_track_positions_count <= 0) { - return false; - } - - // os << "CuePoint::Load(cont'd): idpos=" << idpos - // << " timecode=" << m_timecode - // << endl; - - m_track_positions = new (std::nothrow) TrackPosition[m_track_positions_count]; - if (m_track_positions == NULL) - return false; - - // Now parse track positions - - TrackPosition* p = m_track_positions; - pos = pos_; - - while (pos < stop) { - long len; - - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (pos + len) > stop) - return false; - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - assert(size >= 0); - assert((pos + len) <= stop); - - pos += len; // consume Size field - assert((pos + size) <= stop); - - if (id == libwebm::kMkvCueTrackPositions) { - TrackPosition& tp = *p++; - if (!tp.Parse(pReader, pos, size)) { - return false; - } - } - - pos += size; // consume payload - if (pos > stop) - return false; - } - - assert(size_t(p - m_track_positions) == m_track_positions_count); - - m_element_start = element_start; - m_element_size = element_size; - - return true; -} - -bool CuePoint::TrackPosition::Parse(IMkvReader* pReader, long long start_, - long long size_) { - const long long stop = start_ + size_; - long long pos = start_; - - m_track = -1; - m_pos = -1; - m_block = 1; // default - - while (pos < stop) { - long len; - - const long long id = ReadID(pReader, pos, len); - if ((id < 0) || ((pos + len) > stop)) { - return false; - } - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - if ((size < 0) || ((pos + len) > stop)) { - return false; - } - - pos += len; // consume Size field - if ((pos + size) > stop) { - return false; - } - - if (id == libwebm::kMkvCueTrack) - m_track = UnserializeUInt(pReader, pos, size); - else if (id == libwebm::kMkvCueClusterPosition) - m_pos = UnserializeUInt(pReader, pos, size); - else if (id == libwebm::kMkvCueBlockNumber) - m_block = UnserializeUInt(pReader, pos, size); - - pos += size; // consume payload - } - - if ((m_pos < 0) || (m_track <= 0)) { - return false; - } - - return true; -} - -const CuePoint::TrackPosition* CuePoint::Find(const Track* pTrack) const { - if (pTrack == NULL) { - return NULL; - } - - const long long n = pTrack->GetNumber(); - - const TrackPosition* i = m_track_positions; - const TrackPosition* const j = i + m_track_positions_count; - - while (i != j) { - const TrackPosition& p = *i++; - - if (p.m_track == n) - return &p; - } - - return NULL; // no matching track number found -} - -long long CuePoint::GetTimeCode() const { return m_timecode; } - -long long CuePoint::GetTime(const Segment* pSegment) const { - assert(pSegment); - assert(m_timecode >= 0); - - const SegmentInfo* const pInfo = pSegment->GetInfo(); - assert(pInfo); - - const long long scale = pInfo->GetTimeCodeScale(); - assert(scale >= 1); - - const long long time = scale * m_timecode; - - return time; -} - -bool Segment::DoneParsing() const { - if (m_size < 0) { - long long total, avail; - - const int status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return true; // must assume done - - if (total < 0) - return false; // assume live stream - - return (m_pos >= total); - } - - const long long stop = m_start + m_size; - - return (m_pos >= stop); -} - -const Cluster* Segment::GetFirst() const { - if ((m_clusters == NULL) || (m_clusterCount <= 0)) - return &m_eos; - - Cluster* const pCluster = m_clusters[0]; - assert(pCluster); - - return pCluster; -} - -const Cluster* Segment::GetLast() const { - if ((m_clusters == NULL) || (m_clusterCount <= 0)) - return &m_eos; - - const long idx = m_clusterCount - 1; - - Cluster* const pCluster = m_clusters[idx]; - assert(pCluster); - - return pCluster; -} - -unsigned long Segment::GetCount() const { return m_clusterCount; } - -const Cluster* Segment::GetNext(const Cluster* pCurr) { - assert(pCurr); - assert(pCurr != &m_eos); - assert(m_clusters); - - long idx = pCurr->m_index; - - if (idx >= 0) { - assert(m_clusterCount > 0); - assert(idx < m_clusterCount); - assert(pCurr == m_clusters[idx]); - - ++idx; - - if (idx >= m_clusterCount) - return &m_eos; // caller will LoadCluster as desired - - Cluster* const pNext = m_clusters[idx]; - assert(pNext); - assert(pNext->m_index >= 0); - assert(pNext->m_index == idx); - - return pNext; - } - - assert(m_clusterPreloadCount > 0); - - long long pos = pCurr->m_element_start; - - assert(m_size >= 0); // TODO - const long long stop = m_start + m_size; // end of segment - - { - long len; - - long long result = GetUIntLength(m_pReader, pos, len); - assert(result == 0); - assert((pos + len) <= stop); // TODO - if (result != 0) - return NULL; - - const long long id = ReadID(m_pReader, pos, len); - if (id != libwebm::kMkvCluster) - return NULL; - - pos += len; // consume ID - - // Read Size - result = GetUIntLength(m_pReader, pos, len); - assert(result == 0); // TODO - assert((pos + len) <= stop); // TODO - - const long long size = ReadUInt(m_pReader, pos, len); - assert(size > 0); // TODO - // assert((pCurr->m_size <= 0) || (pCurr->m_size == size)); - - pos += len; // consume length of size of element - assert((pos + size) <= stop); // TODO - - // Pos now points to start of payload - - pos += size; // consume payload - } - - long long off_next = 0; - - while (pos < stop) { - long len; - - long long result = GetUIntLength(m_pReader, pos, len); - assert(result == 0); - assert((pos + len) <= stop); // TODO - if (result != 0) - return NULL; - - const long long idpos = pos; // pos of next (potential) cluster - - const long long id = ReadID(m_pReader, idpos, len); - if (id < 0) - return NULL; - - pos += len; // consume ID - - // Read Size - result = GetUIntLength(m_pReader, pos, len); - assert(result == 0); // TODO - assert((pos + len) <= stop); // TODO - - const long long size = ReadUInt(m_pReader, pos, len); - assert(size >= 0); // TODO - - pos += len; // consume length of size of element - assert((pos + size) <= stop); // TODO - - // Pos now points to start of payload - - if (size == 0) // weird - continue; - - if (id == libwebm::kMkvCluster) { - const long long off_next_ = idpos - m_start; - - long long pos_; - long len_; - - const long status = Cluster::HasBlockEntries(this, off_next_, pos_, len_); - - assert(status >= 0); - - if (status > 0) { - off_next = off_next_; - break; - } - } - - pos += size; // consume payload - } - - if (off_next <= 0) - return 0; - - Cluster** const ii = m_clusters + m_clusterCount; - Cluster** i = ii; - - Cluster** const jj = ii + m_clusterPreloadCount; - Cluster** j = jj; - - while (i < j) { - // INVARIANT: - //[0, i) < pos_next - //[i, j) ? - //[j, jj) > pos_next - - Cluster** const k = i + (j - i) / 2; - assert(k < jj); - - Cluster* const pNext = *k; - assert(pNext); - assert(pNext->m_index < 0); - - // const long long pos_ = pNext->m_pos; - // assert(pos_); - // pos = pos_ * ((pos_ < 0) ? -1 : 1); - - pos = pNext->GetPosition(); - - if (pos < off_next) - i = k + 1; - else if (pos > off_next) - j = k; - else - return pNext; - } - - assert(i == j); - - Cluster* const pNext = Cluster::Create(this, -1, off_next); - if (pNext == NULL) - return NULL; - - const ptrdiff_t idx_next = i - m_clusters; // insertion position - - if (!PreloadCluster(pNext, idx_next)) { - delete pNext; - return NULL; - } - assert(m_clusters); - assert(idx_next < m_clusterSize); - assert(m_clusters[idx_next] == pNext); - - return pNext; -} - -long Segment::ParseNext(const Cluster* pCurr, const Cluster*& pResult, - long long& pos, long& len) { - assert(pCurr); - assert(!pCurr->EOS()); - assert(m_clusters); - - pResult = 0; - - if (pCurr->m_index >= 0) { // loaded (not merely preloaded) - assert(m_clusters[pCurr->m_index] == pCurr); - - const long next_idx = pCurr->m_index + 1; - - if (next_idx < m_clusterCount) { - pResult = m_clusters[next_idx]; - return 0; // success - } - - // curr cluster is last among loaded - - const long result = LoadCluster(pos, len); - - if (result < 0) // error or underflow - return result; - - if (result > 0) // no more clusters - { - // pResult = &m_eos; - return 1; - } - - pResult = GetLast(); - return 0; // success - } - - assert(m_pos > 0); - - long long total, avail; - - long status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - // interrogate curr cluster - - pos = pCurr->m_element_start; - - if (pCurr->m_element_size >= 0) - pos += pCurr->m_element_size; - else { - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadUInt(m_pReader, pos, len); - - if (id != libwebm::kMkvCluster) - return -1; - - pos += len; // consume ID - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - pos += len; // consume size field - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) // TODO: should never happen - return E_FILE_FORMAT_INVALID; // TODO: resolve this - - // assert((pCurr->m_size <= 0) || (pCurr->m_size == size)); - - if ((segment_stop >= 0) && ((pos + size) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - // Pos now points to start of payload - - pos += size; // consume payload (that is, the current cluster) - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - // By consuming the payload, we are assuming that the curr - // cluster isn't interesting. That is, we don't bother checking - // whether the payload of the curr cluster is less than what - // happens to be available (obtained via IMkvReader::Length). - // Presumably the caller has already dispensed with the current - // cluster, and really does want the next cluster. - } - - // pos now points to just beyond the last fully-loaded cluster - - for (;;) { - const long status = DoParseNext(pResult, pos, len); - - if (status <= 1) - return status; - } -} - -long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { - long long total, avail; - - long status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - // Parse next cluster. This is strictly a parsing activity. - // Creation of a new cluster object happens later, after the - // parsing is done. - - long long off_next = 0; - long long cluster_size = -1; - - for (;;) { - if ((total >= 0) && (pos >= total)) - return 1; // EOF - - if ((segment_stop >= 0) && (pos >= segment_stop)) - return 1; // EOF - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long idpos = pos; // absolute - const long long idoff = pos - m_start; // relative - - const long long id = ReadID(m_pReader, idpos, len); // absolute - - if (id < 0) // error - return static_cast(id); - - if (id == 0) // weird - return -1; // generic error - - pos += len; // consume ID - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - pos += len; // consume length of size of element - - // Pos now points to start of payload - - if (size == 0) // weird - continue; - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if ((segment_stop >= 0) && (size != unknown_size) && - ((pos + size) > segment_stop)) { - return E_FILE_FORMAT_INVALID; - } - - if (id == libwebm::kMkvCues) { - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - const long long element_stop = pos + size; - - if ((segment_stop >= 0) && (element_stop > segment_stop)) - return E_FILE_FORMAT_INVALID; - - const long long element_start = idpos; - const long long element_size = element_stop - element_start; - - if (m_pCues == NULL) { - m_pCues = new (std::nothrow) - Cues(this, pos, size, element_start, element_size); - if (m_pCues == NULL) - return false; - } - - pos += size; // consume payload - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - continue; - } - - if (id != libwebm::kMkvCluster) { // not a Cluster ID - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - pos += size; // consume payload - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - continue; - } - - // We have a cluster. - off_next = idoff; - - if (size != unknown_size) - cluster_size = size; - - break; - } - - assert(off_next > 0); // have cluster - - // We have parsed the next cluster. - // We have not created a cluster object yet. What we need - // to do now is determine whether it has already be preloaded - //(in which case, an object for this cluster has already been - // created), and if not, create a new cluster object. - - Cluster** const ii = m_clusters + m_clusterCount; - Cluster** i = ii; - - Cluster** const jj = ii + m_clusterPreloadCount; - Cluster** j = jj; - - while (i < j) { - // INVARIANT: - //[0, i) < pos_next - //[i, j) ? - //[j, jj) > pos_next - - Cluster** const k = i + (j - i) / 2; - assert(k < jj); - - const Cluster* const pNext = *k; - assert(pNext); - assert(pNext->m_index < 0); - - pos = pNext->GetPosition(); - assert(pos >= 0); - - if (pos < off_next) - i = k + 1; - else if (pos > off_next) - j = k; - else { - pResult = pNext; - return 0; // success - } - } - - assert(i == j); - - long long pos_; - long len_; - - status = Cluster::HasBlockEntries(this, off_next, pos_, len_); - - if (status < 0) { // error or underflow - pos = pos_; - len = len_; - - return status; - } - - if (status > 0) { // means "found at least one block entry" - Cluster* const pNext = Cluster::Create(this, - -1, // preloaded - off_next); - if (pNext == NULL) - return -1; - - const ptrdiff_t idx_next = i - m_clusters; // insertion position - - if (!PreloadCluster(pNext, idx_next)) { - delete pNext; - return -1; - } - assert(m_clusters); - assert(idx_next < m_clusterSize); - assert(m_clusters[idx_next] == pNext); - - pResult = pNext; - return 0; // success - } - - // status == 0 means "no block entries found" - - if (cluster_size < 0) { // unknown size - const long long payload_pos = pos; // absolute pos of cluster payload - - for (;;) { // determine cluster size - if ((total >= 0) && (pos >= total)) - break; - - if ((segment_stop >= 0) && (pos >= segment_stop)) - break; // no more clusters - - // Read ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long idpos = pos; - const long long id = ReadID(m_pReader, idpos, len); - - if (id < 0) // error (or underflow) - return static_cast(id); - - // This is the distinguished set of ID's we use to determine - // that we have exhausted the sub-element's inside the cluster - // whose ID we parsed earlier. - - if (id == libwebm::kMkvCluster || id == libwebm::kMkvCues) - break; - - pos += len; // consume ID (of sub-element) - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - pos += len; // consume size field of element - - // pos now points to start of sub-element's payload - - if (size == 0) // weird - continue; - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; // not allowed for sub-elements - - if ((segment_stop >= 0) && ((pos + size) > segment_stop)) // weird - return E_FILE_FORMAT_INVALID; - - pos += size; // consume payload of sub-element - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - } // determine cluster size - - cluster_size = pos - payload_pos; - assert(cluster_size >= 0); // TODO: handle cluster_size = 0 - - pos = payload_pos; // reset and re-parse original cluster - } - - pos += cluster_size; // consume payload - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - return 2; // try to find a cluster that follows next -} - -const Cluster* Segment::FindCluster(long long time_ns) const { - if ((m_clusters == NULL) || (m_clusterCount <= 0)) - return &m_eos; - - { - Cluster* const pCluster = m_clusters[0]; - assert(pCluster); - assert(pCluster->m_index == 0); - - if (time_ns <= pCluster->GetTime()) - return pCluster; - } - - // Binary search of cluster array - - long i = 0; - long j = m_clusterCount; - - while (i < j) { - // INVARIANT: - //[0, i) <= time_ns - //[i, j) ? - //[j, m_clusterCount) > time_ns - - const long k = i + (j - i) / 2; - assert(k < m_clusterCount); - - Cluster* const pCluster = m_clusters[k]; - assert(pCluster); - assert(pCluster->m_index == k); - - const long long t = pCluster->GetTime(); - - if (t <= time_ns) - i = k + 1; - else - j = k; - - assert(i <= j); - } - - assert(i == j); - assert(i > 0); - assert(i <= m_clusterCount); - - const long k = i - 1; - - Cluster* const pCluster = m_clusters[k]; - assert(pCluster); - assert(pCluster->m_index == k); - assert(pCluster->GetTime() <= time_ns); - - return pCluster; -} - -const Tracks* Segment::GetTracks() const { return m_pTracks; } -const SegmentInfo* Segment::GetInfo() const { return m_pInfo; } -const Cues* Segment::GetCues() const { return m_pCues; } -const Chapters* Segment::GetChapters() const { return m_pChapters; } -const Tags* Segment::GetTags() const { return m_pTags; } -const SeekHead* Segment::GetSeekHead() const { return m_pSeekHead; } - -long long Segment::GetDuration() const { - assert(m_pInfo); - return m_pInfo->GetDuration(); -} - -Chapters::Chapters(Segment* pSegment, long long payload_start, - long long payload_size, long long element_start, - long long element_size) - : m_pSegment(pSegment), - m_start(payload_start), - m_size(payload_size), - m_element_start(element_start), - m_element_size(element_size), - m_editions(NULL), - m_editions_size(0), - m_editions_count(0) {} - -Chapters::~Chapters() { - while (m_editions_count > 0) { - Edition& e = m_editions[--m_editions_count]; - e.Clear(); - } - delete[] m_editions; -} - -long Chapters::Parse() { - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = m_start; // payload start - const long long stop = pos + m_size; // payload stop - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // weird - continue; - - if (id == libwebm::kMkvEditionEntry) { - status = ParseEdition(pos, size); - - if (status < 0) // error - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -int Chapters::GetEditionCount() const { return m_editions_count; } - -const Chapters::Edition* Chapters::GetEdition(int idx) const { - if (idx < 0) - return NULL; - - if (idx >= m_editions_count) - return NULL; - - return m_editions + idx; -} - -bool Chapters::ExpandEditionsArray() { - if (m_editions_size > m_editions_count) - return true; // nothing else to do - - const int size = (m_editions_size == 0) ? 1 : 2 * m_editions_size; - - Edition* const editions = new (std::nothrow) Edition[size]; - - if (editions == NULL) - return false; - - for (int idx = 0; idx < m_editions_count; ++idx) { - m_editions[idx].ShallowCopy(editions[idx]); - } - - delete[] m_editions; - m_editions = editions; - - m_editions_size = size; - return true; -} - -long Chapters::ParseEdition(long long pos, long long size) { - if (!ExpandEditionsArray()) - return -1; - - Edition& e = m_editions[m_editions_count++]; - e.Init(); - - return e.Parse(m_pSegment->m_pReader, pos, size); -} - -Chapters::Edition::Edition() {} - -Chapters::Edition::~Edition() {} - -int Chapters::Edition::GetAtomCount() const { return m_atoms_count; } - -const Chapters::Atom* Chapters::Edition::GetAtom(int index) const { - if (index < 0) - return NULL; - - if (index >= m_atoms_count) - return NULL; - - return m_atoms + index; -} - -void Chapters::Edition::Init() { - m_atoms = NULL; - m_atoms_size = 0; - m_atoms_count = 0; -} - -void Chapters::Edition::ShallowCopy(Edition& rhs) const { - rhs.m_atoms = m_atoms; - rhs.m_atoms_size = m_atoms_size; - rhs.m_atoms_count = m_atoms_count; -} - -void Chapters::Edition::Clear() { - while (m_atoms_count > 0) { - Atom& a = m_atoms[--m_atoms_count]; - a.Clear(); - } - - delete[] m_atoms; - m_atoms = NULL; - - m_atoms_size = 0; -} - -long Chapters::Edition::Parse(IMkvReader* pReader, long long pos, - long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) - continue; - - if (id == libwebm::kMkvChapterAtom) { - status = ParseAtom(pReader, pos, size); - - if (status < 0) // error - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -long Chapters::Edition::ParseAtom(IMkvReader* pReader, long long pos, - long long size) { - if (!ExpandAtomsArray()) - return -1; - - Atom& a = m_atoms[m_atoms_count++]; - a.Init(); - - return a.Parse(pReader, pos, size); -} - -bool Chapters::Edition::ExpandAtomsArray() { - if (m_atoms_size > m_atoms_count) - return true; // nothing else to do - - const int size = (m_atoms_size == 0) ? 1 : 2 * m_atoms_size; - - Atom* const atoms = new (std::nothrow) Atom[size]; - - if (atoms == NULL) - return false; - - for (int idx = 0; idx < m_atoms_count; ++idx) { - m_atoms[idx].ShallowCopy(atoms[idx]); - } - - delete[] m_atoms; - m_atoms = atoms; - - m_atoms_size = size; - return true; -} - -Chapters::Atom::Atom() {} - -Chapters::Atom::~Atom() {} - -unsigned long long Chapters::Atom::GetUID() const { return m_uid; } - -const char* Chapters::Atom::GetStringUID() const { return m_string_uid; } - -long long Chapters::Atom::GetStartTimecode() const { return m_start_timecode; } - -long long Chapters::Atom::GetStopTimecode() const { return m_stop_timecode; } - -long long Chapters::Atom::GetStartTime(const Chapters* pChapters) const { - return GetTime(pChapters, m_start_timecode); -} - -long long Chapters::Atom::GetStopTime(const Chapters* pChapters) const { - return GetTime(pChapters, m_stop_timecode); -} - -int Chapters::Atom::GetDisplayCount() const { return m_displays_count; } - -const Chapters::Display* Chapters::Atom::GetDisplay(int index) const { - if (index < 0) - return NULL; - - if (index >= m_displays_count) - return NULL; - - return m_displays + index; -} - -void Chapters::Atom::Init() { - m_string_uid = NULL; - m_uid = 0; - m_start_timecode = -1; - m_stop_timecode = -1; - - m_displays = NULL; - m_displays_size = 0; - m_displays_count = 0; -} - -void Chapters::Atom::ShallowCopy(Atom& rhs) const { - rhs.m_string_uid = m_string_uid; - rhs.m_uid = m_uid; - rhs.m_start_timecode = m_start_timecode; - rhs.m_stop_timecode = m_stop_timecode; - - rhs.m_displays = m_displays; - rhs.m_displays_size = m_displays_size; - rhs.m_displays_count = m_displays_count; -} - -void Chapters::Atom::Clear() { - delete[] m_string_uid; - m_string_uid = NULL; - - while (m_displays_count > 0) { - Display& d = m_displays[--m_displays_count]; - d.Clear(); - } - - delete[] m_displays; - m_displays = NULL; - - m_displays_size = 0; -} - -long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // 0 length payload, skip. - continue; - - if (id == libwebm::kMkvChapterDisplay) { - status = ParseDisplay(pReader, pos, size); - - if (status < 0) // error - return status; - } else if (id == libwebm::kMkvChapterStringUID) { - status = UnserializeString(pReader, pos, size, m_string_uid); - - if (status < 0) // error - return status; - } else if (id == libwebm::kMkvChapterUID) { - long long val; - status = UnserializeInt(pReader, pos, size, val); - - if (status < 0) // error - return status; - - m_uid = static_cast(val); - } else if (id == libwebm::kMkvChapterTimeStart) { - const long long val = UnserializeUInt(pReader, pos, size); - - if (val < 0) // error - return static_cast(val); - - m_start_timecode = val; - } else if (id == libwebm::kMkvChapterTimeEnd) { - const long long val = UnserializeUInt(pReader, pos, size); - - if (val < 0) // error - return static_cast(val); - - m_stop_timecode = val; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -long long Chapters::Atom::GetTime(const Chapters* pChapters, - long long timecode) { - if (pChapters == NULL) - return -1; - - Segment* const pSegment = pChapters->m_pSegment; - - if (pSegment == NULL) // weird - return -1; - - const SegmentInfo* const pInfo = pSegment->GetInfo(); - - if (pInfo == NULL) - return -1; - - const long long timecode_scale = pInfo->GetTimeCodeScale(); - - if (timecode_scale < 1) // weird - return -1; - - if (timecode < 0) - return -1; - - const long long result = timecode_scale * timecode; - - return result; -} - -long Chapters::Atom::ParseDisplay(IMkvReader* pReader, long long pos, - long long size) { - if (!ExpandDisplaysArray()) - return -1; - - Display& d = m_displays[m_displays_count++]; - d.Init(); - - return d.Parse(pReader, pos, size); -} - -bool Chapters::Atom::ExpandDisplaysArray() { - if (m_displays_size > m_displays_count) - return true; // nothing else to do - - const int size = (m_displays_size == 0) ? 1 : 2 * m_displays_size; - - Display* const displays = new (std::nothrow) Display[size]; - - if (displays == NULL) - return false; - - for (int idx = 0; idx < m_displays_count; ++idx) { - m_displays[idx].ShallowCopy(displays[idx]); - } - - delete[] m_displays; - m_displays = displays; - - m_displays_size = size; - return true; -} - -Chapters::Display::Display() {} - -Chapters::Display::~Display() {} - -const char* Chapters::Display::GetString() const { return m_string; } - -const char* Chapters::Display::GetLanguage() const { return m_language; } - -const char* Chapters::Display::GetCountry() const { return m_country; } - -void Chapters::Display::Init() { - m_string = NULL; - m_language = NULL; - m_country = NULL; -} - -void Chapters::Display::ShallowCopy(Display& rhs) const { - rhs.m_string = m_string; - rhs.m_language = m_language; - rhs.m_country = m_country; -} - -void Chapters::Display::Clear() { - delete[] m_string; - m_string = NULL; - - delete[] m_language; - m_language = NULL; - - delete[] m_country; - m_country = NULL; -} - -long Chapters::Display::Parse(IMkvReader* pReader, long long pos, - long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // No payload. - continue; - - if (id == libwebm::kMkvChapString) { - status = UnserializeString(pReader, pos, size, m_string); - - if (status) - return status; - } else if (id == libwebm::kMkvChapLanguage) { - status = UnserializeString(pReader, pos, size, m_language); - - if (status) - return status; - } else if (id == libwebm::kMkvChapCountry) { - status = UnserializeString(pReader, pos, size, m_country); - - if (status) - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -Tags::Tags(Segment* pSegment, long long payload_start, long long payload_size, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(payload_start), - m_size(payload_size), - m_element_start(element_start), - m_element_size(element_size), - m_tags(NULL), - m_tags_size(0), - m_tags_count(0) {} - -Tags::~Tags() { - while (m_tags_count > 0) { - Tag& t = m_tags[--m_tags_count]; - t.Clear(); - } - delete[] m_tags; -} - -long Tags::Parse() { - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = m_start; // payload start - const long long stop = pos + m_size; // payload stop - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) - return status; - - if (size == 0) // 0 length tag, read another - continue; - - if (id == libwebm::kMkvTag) { - status = ParseTag(pos, size); - - if (status < 0) - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -int Tags::GetTagCount() const { return m_tags_count; } - -const Tags::Tag* Tags::GetTag(int idx) const { - if (idx < 0) - return NULL; - - if (idx >= m_tags_count) - return NULL; - - return m_tags + idx; -} - -bool Tags::ExpandTagsArray() { - if (m_tags_size > m_tags_count) - return true; // nothing else to do - - const int size = (m_tags_size == 0) ? 1 : 2 * m_tags_size; - - Tag* const tags = new (std::nothrow) Tag[size]; - - if (tags == NULL) - return false; - - for (int idx = 0; idx < m_tags_count; ++idx) { - m_tags[idx].ShallowCopy(tags[idx]); - } - - delete[] m_tags; - m_tags = tags; - - m_tags_size = size; - return true; -} - -long Tags::ParseTag(long long pos, long long size) { - if (!ExpandTagsArray()) - return -1; - - Tag& t = m_tags[m_tags_count++]; - t.Init(); - - return t.Parse(m_pSegment->m_pReader, pos, size); -} - -Tags::Tag::Tag() {} - -Tags::Tag::~Tag() {} - -int Tags::Tag::GetSimpleTagCount() const { return m_simple_tags_count; } - -const Tags::SimpleTag* Tags::Tag::GetSimpleTag(int index) const { - if (index < 0) - return NULL; - - if (index >= m_simple_tags_count) - return NULL; - - return m_simple_tags + index; -} - -void Tags::Tag::Init() { - m_simple_tags = NULL; - m_simple_tags_size = 0; - m_simple_tags_count = 0; -} - -void Tags::Tag::ShallowCopy(Tag& rhs) const { - rhs.m_simple_tags = m_simple_tags; - rhs.m_simple_tags_size = m_simple_tags_size; - rhs.m_simple_tags_count = m_simple_tags_count; -} - -void Tags::Tag::Clear() { - while (m_simple_tags_count > 0) { - SimpleTag& d = m_simple_tags[--m_simple_tags_count]; - d.Clear(); - } - - delete[] m_simple_tags; - m_simple_tags = NULL; - - m_simple_tags_size = 0; -} - -long Tags::Tag::Parse(IMkvReader* pReader, long long pos, long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) - return status; - - if (size == 0) // 0 length tag, read another - continue; - - if (id == libwebm::kMkvSimpleTag) { - status = ParseSimpleTag(pReader, pos, size); - - if (status < 0) - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -long Tags::Tag::ParseSimpleTag(IMkvReader* pReader, long long pos, - long long size) { - if (!ExpandSimpleTagsArray()) - return -1; - - SimpleTag& st = m_simple_tags[m_simple_tags_count++]; - st.Init(); - - return st.Parse(pReader, pos, size); -} - -bool Tags::Tag::ExpandSimpleTagsArray() { - if (m_simple_tags_size > m_simple_tags_count) - return true; // nothing else to do - - const int size = (m_simple_tags_size == 0) ? 1 : 2 * m_simple_tags_size; - - SimpleTag* const displays = new (std::nothrow) SimpleTag[size]; - - if (displays == NULL) - return false; - - for (int idx = 0; idx < m_simple_tags_count; ++idx) { - m_simple_tags[idx].ShallowCopy(displays[idx]); - } - - delete[] m_simple_tags; - m_simple_tags = displays; - - m_simple_tags_size = size; - return true; -} - -Tags::SimpleTag::SimpleTag() {} - -Tags::SimpleTag::~SimpleTag() {} - -const char* Tags::SimpleTag::GetTagName() const { return m_tag_name; } - -const char* Tags::SimpleTag::GetTagString() const { return m_tag_string; } - -void Tags::SimpleTag::Init() { - m_tag_name = NULL; - m_tag_string = NULL; -} - -void Tags::SimpleTag::ShallowCopy(SimpleTag& rhs) const { - rhs.m_tag_name = m_tag_name; - rhs.m_tag_string = m_tag_string; -} - -void Tags::SimpleTag::Clear() { - delete[] m_tag_name; - m_tag_name = NULL; - - delete[] m_tag_string; - m_tag_string = NULL; -} - -long Tags::SimpleTag::Parse(IMkvReader* pReader, long long pos, - long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // weird - continue; - - if (id == libwebm::kMkvTagName) { - status = UnserializeString(pReader, pos, size, m_tag_name); - - if (status) - return status; - } else if (id == libwebm::kMkvTagString) { - status = UnserializeString(pReader, pos, size, m_tag_string); - - if (status) - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -SegmentInfo::SegmentInfo(Segment* pSegment, long long start, long long size_, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(start), - m_size(size_), - m_element_start(element_start), - m_element_size(element_size), - m_pMuxingAppAsUTF8(NULL), - m_pWritingAppAsUTF8(NULL), - m_pTitleAsUTF8(NULL) {} - -SegmentInfo::~SegmentInfo() { - delete[] m_pMuxingAppAsUTF8; - m_pMuxingAppAsUTF8 = NULL; - - delete[] m_pWritingAppAsUTF8; - m_pWritingAppAsUTF8 = NULL; - - delete[] m_pTitleAsUTF8; - m_pTitleAsUTF8 = NULL; -} - -long SegmentInfo::Parse() { - assert(m_pMuxingAppAsUTF8 == NULL); - assert(m_pWritingAppAsUTF8 == NULL); - assert(m_pTitleAsUTF8 == NULL); - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = m_start; - const long long stop = m_start + m_size; - - m_timecodeScale = 1000000; - m_duration = -1; - - while (pos < stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvTimecodeScale) { - m_timecodeScale = UnserializeUInt(pReader, pos, size); - - if (m_timecodeScale <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDuration) { - const long status = UnserializeFloat(pReader, pos, size, m_duration); - - if (status < 0) - return status; - - if (m_duration < 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvMuxingApp) { - const long status = - UnserializeString(pReader, pos, size, m_pMuxingAppAsUTF8); - - if (status) - return status; - } else if (id == libwebm::kMkvWritingApp) { - const long status = - UnserializeString(pReader, pos, size, m_pWritingAppAsUTF8); - - if (status) - return status; - } else if (id == libwebm::kMkvTitle) { - const long status = UnserializeString(pReader, pos, size, m_pTitleAsUTF8); - - if (status) - return status; - } - - pos += size; - - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - const double rollover_check = m_duration * m_timecodeScale; - if (rollover_check > static_cast(LLONG_MAX)) - return E_FILE_FORMAT_INVALID; - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -long long SegmentInfo::GetTimeCodeScale() const { return m_timecodeScale; } - -long long SegmentInfo::GetDuration() const { - if (m_duration < 0) - return -1; - - assert(m_timecodeScale >= 1); - - const double dd = double(m_duration) * double(m_timecodeScale); - const long long d = static_cast(dd); - - return d; -} - -const char* SegmentInfo::GetMuxingAppAsUTF8() const { - return m_pMuxingAppAsUTF8; -} - -const char* SegmentInfo::GetWritingAppAsUTF8() const { - return m_pWritingAppAsUTF8; -} - -const char* SegmentInfo::GetTitleAsUTF8() const { return m_pTitleAsUTF8; } - -/////////////////////////////////////////////////////////////// -// ContentEncoding element -ContentEncoding::ContentCompression::ContentCompression() - : algo(0), settings(NULL), settings_len(0) {} - -ContentEncoding::ContentCompression::~ContentCompression() { - delete[] settings; -} - -ContentEncoding::ContentEncryption::ContentEncryption() - : algo(0), - key_id(NULL), - key_id_len(0), - signature(NULL), - signature_len(0), - sig_key_id(NULL), - sig_key_id_len(0), - sig_algo(0), - sig_hash_algo(0) {} - -ContentEncoding::ContentEncryption::~ContentEncryption() { - delete[] key_id; - delete[] signature; - delete[] sig_key_id; -} - -ContentEncoding::ContentEncoding() - : compression_entries_(NULL), - compression_entries_end_(NULL), - encryption_entries_(NULL), - encryption_entries_end_(NULL), - encoding_order_(0), - encoding_scope_(1), - encoding_type_(0) {} - -ContentEncoding::~ContentEncoding() { - ContentCompression** comp_i = compression_entries_; - ContentCompression** const comp_j = compression_entries_end_; - - while (comp_i != comp_j) { - ContentCompression* const comp = *comp_i++; - delete comp; - } - - delete[] compression_entries_; - - ContentEncryption** enc_i = encryption_entries_; - ContentEncryption** const enc_j = encryption_entries_end_; - - while (enc_i != enc_j) { - ContentEncryption* const enc = *enc_i++; - delete enc; - } - - delete[] encryption_entries_; -} - -const ContentEncoding::ContentCompression* -ContentEncoding::GetCompressionByIndex(unsigned long idx) const { - const ptrdiff_t count = compression_entries_end_ - compression_entries_; - assert(count >= 0); - - if (idx >= static_cast(count)) - return NULL; - - return compression_entries_[idx]; -} - -unsigned long ContentEncoding::GetCompressionCount() const { - const ptrdiff_t count = compression_entries_end_ - compression_entries_; - assert(count >= 0); - - return static_cast(count); -} - -const ContentEncoding::ContentEncryption* ContentEncoding::GetEncryptionByIndex( - unsigned long idx) const { - const ptrdiff_t count = encryption_entries_end_ - encryption_entries_; - assert(count >= 0); - - if (idx >= static_cast(count)) - return NULL; - - return encryption_entries_[idx]; -} - -unsigned long ContentEncoding::GetEncryptionCount() const { - const ptrdiff_t count = encryption_entries_end_ - encryption_entries_; - assert(count >= 0); - - return static_cast(count); -} - -long ContentEncoding::ParseContentEncAESSettingsEntry( - long long start, long long size, IMkvReader* pReader, - ContentEncAESSettings* aes) { - assert(pReader); - assert(aes); - - long long pos = start; - const long long stop = start + size; - - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvAESSettingsCipherMode) { - aes->cipher_mode = UnserializeUInt(pReader, pos, size); - if (aes->cipher_mode != 1) - return E_FILE_FORMAT_INVALID; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - return 0; -} - -long ContentEncoding::ParseContentEncodingEntry(long long start, long long size, - IMkvReader* pReader) { - assert(pReader); - - long long pos = start; - const long long stop = start + size; - - // Count ContentCompression and ContentEncryption elements. - int compression_count = 0; - int encryption_count = 0; - - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvContentCompression) - ++compression_count; - - if (id == libwebm::kMkvContentEncryption) - ++encryption_count; - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (compression_count <= 0 && encryption_count <= 0) - return -1; - - if (compression_count > 0) { - compression_entries_ = - new (std::nothrow) ContentCompression*[compression_count]; - if (!compression_entries_) - return -1; - compression_entries_end_ = compression_entries_; - } - - if (encryption_count > 0) { - encryption_entries_ = - new (std::nothrow) ContentEncryption*[encryption_count]; - if (!encryption_entries_) { - delete[] compression_entries_; - return -1; - } - encryption_entries_end_ = encryption_entries_; - } - - pos = start; - while (pos < stop) { - long long id, size; - long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvContentEncodingOrder) { - encoding_order_ = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvContentEncodingScope) { - encoding_scope_ = UnserializeUInt(pReader, pos, size); - if (encoding_scope_ < 1) - return -1; - } else if (id == libwebm::kMkvContentEncodingType) { - encoding_type_ = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvContentCompression) { - ContentCompression* const compression = - new (std::nothrow) ContentCompression(); - if (!compression) - return -1; - - status = ParseCompressionEntry(pos, size, pReader, compression); - if (status) { - delete compression; - return status; - } - *compression_entries_end_++ = compression; - } else if (id == libwebm::kMkvContentEncryption) { - ContentEncryption* const encryption = - new (std::nothrow) ContentEncryption(); - if (!encryption) - return -1; - - status = ParseEncryptionEntry(pos, size, pReader, encryption); - if (status) { - delete encryption; - return status; - } - *encryption_entries_end_++ = encryption; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -long ContentEncoding::ParseCompressionEntry(long long start, long long size, - IMkvReader* pReader, - ContentCompression* compression) { - assert(pReader); - assert(compression); - - long long pos = start; - const long long stop = start + size; - - bool valid = false; - - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvContentCompAlgo) { - long long algo = UnserializeUInt(pReader, pos, size); - if (algo < 0) - return E_FILE_FORMAT_INVALID; - compression->algo = algo; - valid = true; - } else if (id == libwebm::kMkvContentCompSettings) { - if (size <= 0) - return E_FILE_FORMAT_INVALID; - - const size_t buflen = static_cast(size); - unsigned char* buf = SafeArrayAlloc(1, buflen); - if (buf == NULL) - return -1; - - const int read_status = - pReader->Read(pos, static_cast(buflen), buf); - if (read_status) { - delete[] buf; - return status; - } - - compression->settings = buf; - compression->settings_len = buflen; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - // ContentCompAlgo is mandatory - if (!valid) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -long ContentEncoding::ParseEncryptionEntry(long long start, long long size, - IMkvReader* pReader, - ContentEncryption* encryption) { - assert(pReader); - assert(encryption); - - long long pos = start; - const long long stop = start + size; - - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvContentEncAlgo) { - encryption->algo = UnserializeUInt(pReader, pos, size); - if (encryption->algo != 5) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvContentEncKeyID) { - delete[] encryption->key_id; - encryption->key_id = NULL; - encryption->key_id_len = 0; - - if (size <= 0) - return E_FILE_FORMAT_INVALID; - - const size_t buflen = static_cast(size); - unsigned char* buf = SafeArrayAlloc(1, buflen); - if (buf == NULL) - return -1; - - const int read_status = - pReader->Read(pos, static_cast(buflen), buf); - if (read_status) { - delete[] buf; - return status; - } - - encryption->key_id = buf; - encryption->key_id_len = buflen; - } else if (id == libwebm::kMkvContentSignature) { - delete[] encryption->signature; - encryption->signature = NULL; - encryption->signature_len = 0; - - if (size <= 0) - return E_FILE_FORMAT_INVALID; - - const size_t buflen = static_cast(size); - unsigned char* buf = SafeArrayAlloc(1, buflen); - if (buf == NULL) - return -1; - - const int read_status = - pReader->Read(pos, static_cast(buflen), buf); - if (read_status) { - delete[] buf; - return status; - } - - encryption->signature = buf; - encryption->signature_len = buflen; - } else if (id == libwebm::kMkvContentSigKeyID) { - delete[] encryption->sig_key_id; - encryption->sig_key_id = NULL; - encryption->sig_key_id_len = 0; - - if (size <= 0) - return E_FILE_FORMAT_INVALID; - - const size_t buflen = static_cast(size); - unsigned char* buf = SafeArrayAlloc(1, buflen); - if (buf == NULL) - return -1; - - const int read_status = - pReader->Read(pos, static_cast(buflen), buf); - if (read_status) { - delete[] buf; - return status; - } - - encryption->sig_key_id = buf; - encryption->sig_key_id_len = buflen; - } else if (id == libwebm::kMkvContentSigAlgo) { - encryption->sig_algo = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvContentSigHashAlgo) { - encryption->sig_hash_algo = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvContentEncAESSettings) { - const long status = ParseContentEncAESSettingsEntry( - pos, size, pReader, &encryption->aes_settings); - if (status) - return status; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - return 0; -} - -Track::Track(Segment* pSegment, long long element_start, long long element_size) - : m_pSegment(pSegment), - m_element_start(element_start), - m_element_size(element_size), - content_encoding_entries_(NULL), - content_encoding_entries_end_(NULL) {} - -Track::~Track() { - Info& info = const_cast(m_info); - info.Clear(); - - ContentEncoding** i = content_encoding_entries_; - ContentEncoding** const j = content_encoding_entries_end_; - - while (i != j) { - ContentEncoding* const encoding = *i++; - delete encoding; - } - - delete[] content_encoding_entries_; -} - -long Track::Create(Segment* pSegment, const Info& info, long long element_start, - long long element_size, Track*& pResult) { - if (pResult) - return -1; - - Track* const pTrack = - new (std::nothrow) Track(pSegment, element_start, element_size); - - if (pTrack == NULL) - return -1; // generic error - - const int status = info.Copy(pTrack->m_info); - - if (status) { // error - delete pTrack; - return status; - } - - pResult = pTrack; - return 0; // success -} - -Track::Info::Info() - : uid(0), - defaultDuration(0), - codecDelay(0), - seekPreRoll(0), - nameAsUTF8(NULL), - language(NULL), - codecId(NULL), - codecNameAsUTF8(NULL), - codecPrivate(NULL), - codecPrivateSize(0), - lacing(false) {} - -Track::Info::~Info() { Clear(); } - -void Track::Info::Clear() { - delete[] nameAsUTF8; - nameAsUTF8 = NULL; - - delete[] language; - language = NULL; - - delete[] codecId; - codecId = NULL; - - delete[] codecPrivate; - codecPrivate = NULL; - codecPrivateSize = 0; - - delete[] codecNameAsUTF8; - codecNameAsUTF8 = NULL; -} - -int Track::Info::CopyStr(char* Info::*str, Info& dst_) const { - if (str == static_cast(NULL)) - return -1; - - char*& dst = dst_.*str; - - if (dst) // should be NULL already - return -1; - - const char* const src = this->*str; - - if (src == NULL) - return 0; - - const size_t len = strlen(src); - - dst = SafeArrayAlloc(1, len + 1); - - if (dst == NULL) - return -1; - - strcpy(dst, src); - - return 0; -} - -int Track::Info::Copy(Info& dst) const { - if (&dst == this) - return 0; - - dst.type = type; - dst.number = number; - dst.defaultDuration = defaultDuration; - dst.codecDelay = codecDelay; - dst.seekPreRoll = seekPreRoll; - dst.uid = uid; - dst.lacing = lacing; - dst.settings = settings; - - // We now copy the string member variables from src to dst. - // This involves memory allocation so in principle the operation - // can fail (indeed, that's why we have Info::Copy), so we must - // report this to the caller. An error return from this function - // therefore implies that the copy was only partially successful. - - if (int status = CopyStr(&Info::nameAsUTF8, dst)) - return status; - - if (int status = CopyStr(&Info::language, dst)) - return status; - - if (int status = CopyStr(&Info::codecId, dst)) - return status; - - if (int status = CopyStr(&Info::codecNameAsUTF8, dst)) - return status; - - if (codecPrivateSize > 0) { - if (codecPrivate == NULL) - return -1; - - if (dst.codecPrivate) - return -1; - - if (dst.codecPrivateSize != 0) - return -1; - - dst.codecPrivate = SafeArrayAlloc(1, codecPrivateSize); - - if (dst.codecPrivate == NULL) - return -1; - - memcpy(dst.codecPrivate, codecPrivate, codecPrivateSize); - dst.codecPrivateSize = codecPrivateSize; - } - - return 0; -} - -const BlockEntry* Track::GetEOS() const { return &m_eos; } - -long Track::GetType() const { return m_info.type; } - -long Track::GetNumber() const { return m_info.number; } - -unsigned long long Track::GetUid() const { return m_info.uid; } - -const char* Track::GetNameAsUTF8() const { return m_info.nameAsUTF8; } - -const char* Track::GetLanguage() const { return m_info.language; } - -const char* Track::GetCodecNameAsUTF8() const { return m_info.codecNameAsUTF8; } - -const char* Track::GetCodecId() const { return m_info.codecId; } - -const unsigned char* Track::GetCodecPrivate(size_t& size) const { - size = m_info.codecPrivateSize; - return m_info.codecPrivate; -} - -bool Track::GetLacing() const { return m_info.lacing; } - -unsigned long long Track::GetDefaultDuration() const { - return m_info.defaultDuration; -} - -unsigned long long Track::GetCodecDelay() const { return m_info.codecDelay; } - -unsigned long long Track::GetSeekPreRoll() const { return m_info.seekPreRoll; } - -long Track::GetFirst(const BlockEntry*& pBlockEntry) const { - const Cluster* pCluster = m_pSegment->GetFirst(); - - for (int i = 0;;) { - if (pCluster == NULL) { - pBlockEntry = GetEOS(); - return 1; - } - - if (pCluster->EOS()) { - if (m_pSegment->DoneParsing()) { - pBlockEntry = GetEOS(); - return 1; - } - - pBlockEntry = 0; - return E_BUFFER_NOT_FULL; - } - - long status = pCluster->GetFirst(pBlockEntry); - - if (status < 0) // error - return status; - - if (pBlockEntry == 0) { // empty cluster - pCluster = m_pSegment->GetNext(pCluster); - continue; - } - - for (;;) { - const Block* const pBlock = pBlockEntry->GetBlock(); - assert(pBlock); - - const long long tn = pBlock->GetTrackNumber(); - - if ((tn == m_info.number) && VetEntry(pBlockEntry)) - return 0; - - const BlockEntry* pNextEntry; - - status = pCluster->GetNext(pBlockEntry, pNextEntry); - - if (status < 0) // error - return status; - - if (pNextEntry == 0) - break; - - pBlockEntry = pNextEntry; - } - - ++i; - - if (i >= 100) - break; - - pCluster = m_pSegment->GetNext(pCluster); - } - - // NOTE: if we get here, it means that we didn't find a block with - // a matching track number. We interpret that as an error (which - // might be too conservative). - - pBlockEntry = GetEOS(); // so we can return a non-NULL value - return 1; -} - -long Track::GetNext(const BlockEntry* pCurrEntry, - const BlockEntry*& pNextEntry) const { - assert(pCurrEntry); - assert(!pCurrEntry->EOS()); //? - - const Block* const pCurrBlock = pCurrEntry->GetBlock(); - assert(pCurrBlock && pCurrBlock->GetTrackNumber() == m_info.number); - if (!pCurrBlock || pCurrBlock->GetTrackNumber() != m_info.number) - return -1; - - const Cluster* pCluster = pCurrEntry->GetCluster(); - assert(pCluster); - assert(!pCluster->EOS()); - - long status = pCluster->GetNext(pCurrEntry, pNextEntry); - - if (status < 0) // error - return status; - - for (int i = 0;;) { - while (pNextEntry) { - const Block* const pNextBlock = pNextEntry->GetBlock(); - assert(pNextBlock); - - if (pNextBlock->GetTrackNumber() == m_info.number) - return 0; - - pCurrEntry = pNextEntry; - - status = pCluster->GetNext(pCurrEntry, pNextEntry); - - if (status < 0) // error - return status; - } - - pCluster = m_pSegment->GetNext(pCluster); - - if (pCluster == NULL) { - pNextEntry = GetEOS(); - return 1; - } - - if (pCluster->EOS()) { - if (m_pSegment->DoneParsing()) { - pNextEntry = GetEOS(); - return 1; - } - - // TODO: there is a potential O(n^2) problem here: we tell the - // caller to (pre)load another cluster, which he does, but then he - // calls GetNext again, which repeats the same search. This is - // a pathological case, since the only way it can happen is if - // there exists a long sequence of clusters none of which contain a - // block from this track. One way around this problem is for the - // caller to be smarter when he loads another cluster: don't call - // us back until you have a cluster that contains a block from this - // track. (Of course, that's not cheap either, since our caller - // would have to scan the each cluster as it's loaded, so that - // would just push back the problem.) - - pNextEntry = NULL; - return E_BUFFER_NOT_FULL; - } - - status = pCluster->GetFirst(pNextEntry); - - if (status < 0) // error - return status; - - if (pNextEntry == NULL) // empty cluster - continue; - - ++i; - - if (i >= 100) - break; - } - - // NOTE: if we get here, it means that we didn't find a block with - // a matching track number after lots of searching, so we give - // up trying. - - pNextEntry = GetEOS(); // so we can return a non-NULL value - return 1; -} - -bool Track::VetEntry(const BlockEntry* pBlockEntry) const { - assert(pBlockEntry); - const Block* const pBlock = pBlockEntry->GetBlock(); - assert(pBlock); - assert(pBlock->GetTrackNumber() == m_info.number); - if (!pBlock || pBlock->GetTrackNumber() != m_info.number) - return false; - - // This function is used during a seek to determine whether the - // frame is a valid seek target. This default function simply - // returns true, which means all frames are valid seek targets. - // It gets overridden by the VideoTrack class, because only video - // keyframes can be used as seek target. - - return true; -} - -long Track::Seek(long long time_ns, const BlockEntry*& pResult) const { - const long status = GetFirst(pResult); - - if (status < 0) // buffer underflow, etc - return status; - - assert(pResult); - - if (pResult->EOS()) - return 0; - - const Cluster* pCluster = pResult->GetCluster(); - assert(pCluster); - assert(pCluster->GetIndex() >= 0); - - if (time_ns <= pResult->GetBlock()->GetTime(pCluster)) - return 0; - - Cluster** const clusters = m_pSegment->m_clusters; - assert(clusters); - - const long count = m_pSegment->GetCount(); // loaded only, not preloaded - assert(count > 0); - - Cluster** const i = clusters + pCluster->GetIndex(); - assert(i); - assert(*i == pCluster); - assert(pCluster->GetTime() <= time_ns); - - Cluster** const j = clusters + count; - - Cluster** lo = i; - Cluster** hi = j; - - while (lo < hi) { - // INVARIANT: - //[i, lo) <= time_ns - //[lo, hi) ? - //[hi, j) > time_ns - - Cluster** const mid = lo + (hi - lo) / 2; - assert(mid < hi); - - pCluster = *mid; - assert(pCluster); - assert(pCluster->GetIndex() >= 0); - assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters)); - - const long long t = pCluster->GetTime(); - - if (t <= time_ns) - lo = mid + 1; - else - hi = mid; - - assert(lo <= hi); - } - - assert(lo == hi); - assert(lo > i); - assert(lo <= j); - - while (lo > i) { - pCluster = *--lo; - assert(pCluster); - assert(pCluster->GetTime() <= time_ns); - - pResult = pCluster->GetEntry(this); - - if ((pResult != 0) && !pResult->EOS()) - return 0; - - // landed on empty cluster (no entries) - } - - pResult = GetEOS(); // weird - return 0; -} - -const ContentEncoding* Track::GetContentEncodingByIndex( - unsigned long idx) const { - const ptrdiff_t count = - content_encoding_entries_end_ - content_encoding_entries_; - assert(count >= 0); - - if (idx >= static_cast(count)) - return NULL; - - return content_encoding_entries_[idx]; -} - -unsigned long Track::GetContentEncodingCount() const { - const ptrdiff_t count = - content_encoding_entries_end_ - content_encoding_entries_; - assert(count >= 0); - - return static_cast(count); -} - -long Track::ParseContentEncodingsEntry(long long start, long long size) { - IMkvReader* const pReader = m_pSegment->m_pReader; - assert(pReader); - - long long pos = start; - const long long stop = start + size; - - // Count ContentEncoding elements. - int count = 0; - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - // pos now designates start of element - if (id == libwebm::kMkvContentEncoding) - ++count; - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (count <= 0) - return -1; - - content_encoding_entries_ = new (std::nothrow) ContentEncoding*[count]; - if (!content_encoding_entries_) - return -1; - - content_encoding_entries_end_ = content_encoding_entries_; - - pos = start; - while (pos < stop) { - long long id, size; - long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - // pos now designates start of element - if (id == libwebm::kMkvContentEncoding) { - ContentEncoding* const content_encoding = - new (std::nothrow) ContentEncoding(); - if (!content_encoding) - return -1; - - status = content_encoding->ParseContentEncodingEntry(pos, size, pReader); - if (status) { - delete content_encoding; - return status; - } - - *content_encoding_entries_end_++ = content_encoding; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -Track::EOSBlock::EOSBlock() : BlockEntry(NULL, LONG_MIN) {} - -BlockEntry::Kind Track::EOSBlock::GetKind() const { return kBlockEOS; } - -const Block* Track::EOSBlock::GetBlock() const { return NULL; } - -bool PrimaryChromaticity::Parse(IMkvReader* reader, long long read_pos, - long long value_size, bool is_x, - PrimaryChromaticity** chromaticity) { - if (!reader) - return false; - - if (!*chromaticity) - *chromaticity = new PrimaryChromaticity(); - - if (!*chromaticity) - return false; - - PrimaryChromaticity* pc = *chromaticity; - float* value = is_x ? &pc->x : &pc->y; - - double parser_value = 0; - const long long parse_status = - UnserializeFloat(reader, read_pos, value_size, parser_value); - - // Valid range is [0, 1]. Make sure the double is representable as a float - // before casting. - if (parse_status < 0 || parser_value < 0.0 || parser_value > 1.0 || - (parser_value > 0.0 && parser_value < FLT_MIN)) - return false; - - *value = static_cast(parser_value); - - return true; -} - -bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start, - long long mm_size, MasteringMetadata** mm) { - if (!reader || *mm) - return false; - - std::auto_ptr mm_ptr(new MasteringMetadata()); - if (!mm_ptr.get()) - return false; - - const long long mm_end = mm_start + mm_size; - long long read_pos = mm_start; - - while (read_pos < mm_end) { - long long child_id = 0; - long long child_size = 0; - - const long long status = - ParseElementHeader(reader, read_pos, mm_end, child_id, child_size); - if (status < 0) - return false; - - if (child_id == libwebm::kMkvLuminanceMax) { - double value = 0; - const long long value_parse_status = - UnserializeFloat(reader, read_pos, child_size, value); - mm_ptr->luminance_max = static_cast(value); - if (value_parse_status < 0 || mm_ptr->luminance_max < 0.0 || - mm_ptr->luminance_max > 9999.99) { - return false; - } - } else if (child_id == libwebm::kMkvLuminanceMin) { - double value = 0; - const long long value_parse_status = - UnserializeFloat(reader, read_pos, child_size, value); - mm_ptr->luminance_min = static_cast(value); - if (value_parse_status < 0 || mm_ptr->luminance_min < 0.0 || - mm_ptr->luminance_min > 999.9999) { - return false; - } - } else { - bool is_x = false; - PrimaryChromaticity** chromaticity; - switch (child_id) { - case libwebm::kMkvPrimaryRChromaticityX: - case libwebm::kMkvPrimaryRChromaticityY: - is_x = child_id == libwebm::kMkvPrimaryRChromaticityX; - chromaticity = &mm_ptr->r; - break; - case libwebm::kMkvPrimaryGChromaticityX: - case libwebm::kMkvPrimaryGChromaticityY: - is_x = child_id == libwebm::kMkvPrimaryGChromaticityX; - chromaticity = &mm_ptr->g; - break; - case libwebm::kMkvPrimaryBChromaticityX: - case libwebm::kMkvPrimaryBChromaticityY: - is_x = child_id == libwebm::kMkvPrimaryBChromaticityX; - chromaticity = &mm_ptr->b; - break; - case libwebm::kMkvWhitePointChromaticityX: - case libwebm::kMkvWhitePointChromaticityY: - is_x = child_id == libwebm::kMkvWhitePointChromaticityX; - chromaticity = &mm_ptr->white_point; - break; - default: - return false; - } - const bool value_parse_status = PrimaryChromaticity::Parse( - reader, read_pos, child_size, is_x, chromaticity); - if (!value_parse_status) - return false; - } - - read_pos += child_size; - if (read_pos > mm_end) - return false; - } - - *mm = mm_ptr.release(); - return true; -} - -bool Colour::Parse(IMkvReader* reader, long long colour_start, - long long colour_size, Colour** colour) { - if (!reader || *colour) - return false; - - std::auto_ptr colour_ptr(new Colour()); - if (!colour_ptr.get()) - return false; - - const long long colour_end = colour_start + colour_size; - long long read_pos = colour_start; - - while (read_pos < colour_end) { - long long child_id = 0; - long long child_size = 0; - - const long status = - ParseElementHeader(reader, read_pos, colour_end, child_id, child_size); - if (status < 0) - return false; - - if (child_id == libwebm::kMkvMatrixCoefficients) { - colour_ptr->matrix_coefficients = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->matrix_coefficients < 0) - return false; - } else if (child_id == libwebm::kMkvBitsPerChannel) { - colour_ptr->bits_per_channel = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->bits_per_channel < 0) - return false; - } else if (child_id == libwebm::kMkvChromaSubsamplingHorz) { - colour_ptr->chroma_subsampling_horz = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->chroma_subsampling_horz < 0) - return false; - } else if (child_id == libwebm::kMkvChromaSubsamplingVert) { - colour_ptr->chroma_subsampling_vert = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->chroma_subsampling_vert < 0) - return false; - } else if (child_id == libwebm::kMkvCbSubsamplingHorz) { - colour_ptr->cb_subsampling_horz = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->cb_subsampling_horz < 0) - return false; - } else if (child_id == libwebm::kMkvCbSubsamplingVert) { - colour_ptr->cb_subsampling_vert = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->cb_subsampling_vert < 0) - return false; - } else if (child_id == libwebm::kMkvChromaSitingHorz) { - colour_ptr->chroma_siting_horz = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->chroma_siting_horz < 0) - return false; - } else if (child_id == libwebm::kMkvChromaSitingVert) { - colour_ptr->chroma_siting_vert = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->chroma_siting_vert < 0) - return false; - } else if (child_id == libwebm::kMkvRange) { - colour_ptr->range = UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->range < 0) - return false; - } else if (child_id == libwebm::kMkvTransferCharacteristics) { - colour_ptr->transfer_characteristics = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->transfer_characteristics < 0) - return false; - } else if (child_id == libwebm::kMkvPrimaries) { - colour_ptr->primaries = UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->primaries < 0) - return false; - } else if (child_id == libwebm::kMkvMaxCLL) { - colour_ptr->max_cll = UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->max_cll < 0) - return false; - } else if (child_id == libwebm::kMkvMaxFALL) { - colour_ptr->max_fall = UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->max_fall < 0) - return false; - } else if (child_id == libwebm::kMkvMasteringMetadata) { - if (!MasteringMetadata::Parse(reader, read_pos, child_size, - &colour_ptr->mastering_metadata)) - return false; - } else { - return false; - } - - read_pos += child_size; - if (read_pos > colour_end) - return false; - } - *colour = colour_ptr.release(); - return true; -} - -bool Projection::Parse(IMkvReader* reader, long long start, long long size, - Projection** projection) { - if (!reader || *projection) - return false; - - std::auto_ptr projection_ptr(new Projection()); - if (!projection_ptr.get()) - return false; - - const long long end = start + size; - long long read_pos = start; - - while (read_pos < end) { - long long child_id = 0; - long long child_size = 0; - - const long long status = - ParseElementHeader(reader, read_pos, end, child_id, child_size); - if (status < 0) - return false; - - if (child_id == libwebm::kMkvProjectionType) { - long long projection_type = kTypeNotPresent; - projection_type = UnserializeUInt(reader, read_pos, child_size); - if (projection_type < 0) - return false; - - projection_ptr->type = static_cast(projection_type); - } else if (child_id == libwebm::kMkvProjectionPrivate) { - unsigned char* data = SafeArrayAlloc(1, child_size); - - if (data == NULL) - return false; - - const int status = - reader->Read(read_pos, static_cast(child_size), data); - - if (status) { - delete[] data; - return false; - } - - projection_ptr->private_data = data; - projection_ptr->private_data_length = static_cast(child_size); - } else { - double value = 0; - const long long value_parse_status = - UnserializeFloat(reader, read_pos, child_size, value); - // Make sure value is representable as a float before casting. - if (value_parse_status < 0 || value < -FLT_MAX || value > FLT_MAX || - (value > 0.0 && value < FLT_MIN)) { - return false; - } - - switch (child_id) { - case libwebm::kMkvProjectionPoseYaw: - projection_ptr->pose_yaw = static_cast(value); - break; - case libwebm::kMkvProjectionPosePitch: - projection_ptr->pose_pitch = static_cast(value); - break; - case libwebm::kMkvProjectionPoseRoll: - projection_ptr->pose_roll = static_cast(value); - break; - default: - return false; - } - } - - read_pos += child_size; - if (read_pos > end) - return false; - } - - *projection = projection_ptr.release(); - return true; -} - -VideoTrack::VideoTrack(Segment* pSegment, long long element_start, - long long element_size) - : Track(pSegment, element_start, element_size), - m_colour(NULL), - m_projection(NULL) {} - -VideoTrack::~VideoTrack() { - delete m_colour; - delete m_projection; -} - -long VideoTrack::Parse(Segment* pSegment, const Info& info, - long long element_start, long long element_size, - VideoTrack*& pResult) { - if (pResult) - return -1; - - if (info.type != Track::kVideo) - return -1; - - long long width = 0; - long long height = 0; - long long display_width = 0; - long long display_height = 0; - long long display_unit = 0; - long long stereo_mode = 0; - - double rate = 0.0; - - IMkvReader* const pReader = pSegment->m_pReader; - - const Settings& s = info.settings; - assert(s.start >= 0); - assert(s.size >= 0); - - long long pos = s.start; - assert(pos >= 0); - - const long long stop = pos + s.size; - - Colour* colour = NULL; - Projection* projection = NULL; - - while (pos < stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvPixelWidth) { - width = UnserializeUInt(pReader, pos, size); - - if (width <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvPixelHeight) { - height = UnserializeUInt(pReader, pos, size); - - if (height <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDisplayWidth) { - display_width = UnserializeUInt(pReader, pos, size); - - if (display_width <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDisplayHeight) { - display_height = UnserializeUInt(pReader, pos, size); - - if (display_height <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDisplayUnit) { - display_unit = UnserializeUInt(pReader, pos, size); - - if (display_unit < 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvStereoMode) { - stereo_mode = UnserializeUInt(pReader, pos, size); - - if (stereo_mode < 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvFrameRate) { - const long status = UnserializeFloat(pReader, pos, size, rate); - - if (status < 0) - return status; - - if (rate <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvColour) { - if (!Colour::Parse(pReader, pos, size, &colour)) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvProjection) { - if (!Projection::Parse(pReader, pos, size, &projection)) - return E_FILE_FORMAT_INVALID; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - VideoTrack* const pTrack = - new (std::nothrow) VideoTrack(pSegment, element_start, element_size); - - if (pTrack == NULL) - return -1; // generic error - - const int status = info.Copy(pTrack->m_info); - - if (status) { // error - delete pTrack; - return status; - } - - pTrack->m_width = width; - pTrack->m_height = height; - pTrack->m_display_width = display_width; - pTrack->m_display_height = display_height; - pTrack->m_display_unit = display_unit; - pTrack->m_stereo_mode = stereo_mode; - pTrack->m_rate = rate; - pTrack->m_colour = colour; - pTrack->m_projection = projection; - - pResult = pTrack; - return 0; // success -} - -bool VideoTrack::VetEntry(const BlockEntry* pBlockEntry) const { - return Track::VetEntry(pBlockEntry) && pBlockEntry->GetBlock()->IsKey(); -} - -long VideoTrack::Seek(long long time_ns, const BlockEntry*& pResult) const { - const long status = GetFirst(pResult); - - if (status < 0) // buffer underflow, etc - return status; - - assert(pResult); - - if (pResult->EOS()) - return 0; - - const Cluster* pCluster = pResult->GetCluster(); - assert(pCluster); - assert(pCluster->GetIndex() >= 0); - - if (time_ns <= pResult->GetBlock()->GetTime(pCluster)) - return 0; - - Cluster** const clusters = m_pSegment->m_clusters; - assert(clusters); - - const long count = m_pSegment->GetCount(); // loaded only, not pre-loaded - assert(count > 0); - - Cluster** const i = clusters + pCluster->GetIndex(); - assert(i); - assert(*i == pCluster); - assert(pCluster->GetTime() <= time_ns); - - Cluster** const j = clusters + count; - - Cluster** lo = i; - Cluster** hi = j; - - while (lo < hi) { - // INVARIANT: - //[i, lo) <= time_ns - //[lo, hi) ? - //[hi, j) > time_ns - - Cluster** const mid = lo + (hi - lo) / 2; - assert(mid < hi); - - pCluster = *mid; - assert(pCluster); - assert(pCluster->GetIndex() >= 0); - assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters)); - - const long long t = pCluster->GetTime(); - - if (t <= time_ns) - lo = mid + 1; - else - hi = mid; - - assert(lo <= hi); - } - - assert(lo == hi); - assert(lo > i); - assert(lo <= j); - - pCluster = *--lo; - assert(pCluster); - assert(pCluster->GetTime() <= time_ns); - - pResult = pCluster->GetEntry(this, time_ns); - - if ((pResult != 0) && !pResult->EOS()) // found a keyframe - return 0; - - while (lo != i) { - pCluster = *--lo; - assert(pCluster); - assert(pCluster->GetTime() <= time_ns); - - pResult = pCluster->GetEntry(this, time_ns); - - if ((pResult != 0) && !pResult->EOS()) - return 0; - } - - // weird: we're on the first cluster, but no keyframe found - // should never happen but we must return something anyway - - pResult = GetEOS(); - return 0; -} - -Colour* VideoTrack::GetColour() const { return m_colour; } - -Projection* VideoTrack::GetProjection() const { return m_projection; } - -long long VideoTrack::GetWidth() const { return m_width; } - -long long VideoTrack::GetHeight() const { return m_height; } - -long long VideoTrack::GetDisplayWidth() const { - return m_display_width > 0 ? m_display_width : GetWidth(); -} - -long long VideoTrack::GetDisplayHeight() const { - return m_display_height > 0 ? m_display_height : GetHeight(); -} - -long long VideoTrack::GetDisplayUnit() const { return m_display_unit; } - -long long VideoTrack::GetStereoMode() const { return m_stereo_mode; } - -double VideoTrack::GetFrameRate() const { return m_rate; } - -AudioTrack::AudioTrack(Segment* pSegment, long long element_start, - long long element_size) - : Track(pSegment, element_start, element_size) {} - -long AudioTrack::Parse(Segment* pSegment, const Info& info, - long long element_start, long long element_size, - AudioTrack*& pResult) { - if (pResult) - return -1; - - if (info.type != Track::kAudio) - return -1; - - IMkvReader* const pReader = pSegment->m_pReader; - - const Settings& s = info.settings; - assert(s.start >= 0); - assert(s.size >= 0); - - long long pos = s.start; - assert(pos >= 0); - - const long long stop = pos + s.size; - - double rate = 8000.0; // MKV default - long long channels = 1; - long long bit_depth = 0; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvSamplingFrequency) { - status = UnserializeFloat(pReader, pos, size, rate); - - if (status < 0) - return status; - - if (rate <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvChannels) { - channels = UnserializeUInt(pReader, pos, size); - - if (channels <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvBitDepth) { - bit_depth = UnserializeUInt(pReader, pos, size); - - if (bit_depth <= 0) - return E_FILE_FORMAT_INVALID; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - AudioTrack* const pTrack = - new (std::nothrow) AudioTrack(pSegment, element_start, element_size); - - if (pTrack == NULL) - return -1; // generic error - - const int status = info.Copy(pTrack->m_info); - - if (status) { - delete pTrack; - return status; - } - - pTrack->m_rate = rate; - pTrack->m_channels = channels; - pTrack->m_bitDepth = bit_depth; - - pResult = pTrack; - return 0; // success -} - -double AudioTrack::GetSamplingRate() const { return m_rate; } - -long long AudioTrack::GetChannels() const { return m_channels; } - -long long AudioTrack::GetBitDepth() const { return m_bitDepth; } - -Tracks::Tracks(Segment* pSegment, long long start, long long size_, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(start), - m_size(size_), - m_element_start(element_start), - m_element_size(element_size), - m_trackEntries(NULL), - m_trackEntriesEnd(NULL) {} - -long Tracks::Parse() { - assert(m_trackEntries == NULL); - assert(m_trackEntriesEnd == NULL); - - const long long stop = m_start + m_size; - IMkvReader* const pReader = m_pSegment->m_pReader; - - int count = 0; - long long pos = m_start; - - while (pos < stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // weird - continue; - - if (id == libwebm::kMkvTrackEntry) - ++count; - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - if (count <= 0) - return 0; // success - - m_trackEntries = new (std::nothrow) Track*[count]; - - if (m_trackEntries == NULL) - return -1; - - m_trackEntriesEnd = m_trackEntries; - - pos = m_start; - - while (pos < stop) { - const long long element_start = pos; - - long long id, payload_size; - - const long status = - ParseElementHeader(pReader, pos, stop, id, payload_size); - - if (status < 0) // error - return status; - - if (payload_size == 0) // weird - continue; - - const long long payload_stop = pos + payload_size; - assert(payload_stop <= stop); // checked in ParseElement - - const long long element_size = payload_stop - element_start; - - if (id == libwebm::kMkvTrackEntry) { - Track*& pTrack = *m_trackEntriesEnd; - pTrack = NULL; - - const long status = ParseTrackEntry(pos, payload_size, element_start, - element_size, pTrack); - if (status) - return status; - - if (pTrack) - ++m_trackEntriesEnd; - } - - pos = payload_stop; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - return 0; // success -} - -unsigned long Tracks::GetTracksCount() const { - const ptrdiff_t result = m_trackEntriesEnd - m_trackEntries; - assert(result >= 0); - - return static_cast(result); -} - -long Tracks::ParseTrackEntry(long long track_start, long long track_size, - long long element_start, long long element_size, - Track*& pResult) const { - if (pResult) - return -1; - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = track_start; - const long long track_stop = track_start + track_size; - - Track::Info info; - - info.type = 0; - info.number = 0; - info.uid = 0; - info.defaultDuration = 0; - - Track::Settings v; - v.start = -1; - v.size = -1; - - Track::Settings a; - a.start = -1; - a.size = -1; - - Track::Settings e; // content_encodings_settings; - e.start = -1; - e.size = -1; - - long long lacing = 1; // default is true - - while (pos < track_stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, track_stop, id, size); - - if (status < 0) // error - return status; - - if (size < 0) - return E_FILE_FORMAT_INVALID; - - const long long start = pos; - - if (id == libwebm::kMkvVideo) { - v.start = start; - v.size = size; - } else if (id == libwebm::kMkvAudio) { - a.start = start; - a.size = size; - } else if (id == libwebm::kMkvContentEncodings) { - e.start = start; - e.size = size; - } else if (id == libwebm::kMkvTrackUID) { - if (size > 8) - return E_FILE_FORMAT_INVALID; - - info.uid = 0; - - long long pos_ = start; - const long long pos_end = start + size; - - while (pos_ != pos_end) { - unsigned char b; - - const int status = pReader->Read(pos_, 1, &b); - - if (status) - return status; - - info.uid <<= 8; - info.uid |= b; - - ++pos_; - } - } else if (id == libwebm::kMkvTrackNumber) { - const long long num = UnserializeUInt(pReader, pos, size); - - if ((num <= 0) || (num > 127)) - return E_FILE_FORMAT_INVALID; - - info.number = static_cast(num); - } else if (id == libwebm::kMkvTrackType) { - const long long type = UnserializeUInt(pReader, pos, size); - - if ((type <= 0) || (type > 254)) - return E_FILE_FORMAT_INVALID; - - info.type = static_cast(type); - } else if (id == libwebm::kMkvName) { - const long status = - UnserializeString(pReader, pos, size, info.nameAsUTF8); - - if (status) - return status; - } else if (id == libwebm::kMkvLanguage) { - const long status = UnserializeString(pReader, pos, size, info.language); - - if (status) - return status; - } else if (id == libwebm::kMkvDefaultDuration) { - const long long duration = UnserializeUInt(pReader, pos, size); - - if (duration < 0) - return E_FILE_FORMAT_INVALID; - - info.defaultDuration = static_cast(duration); - } else if (id == libwebm::kMkvCodecID) { - const long status = UnserializeString(pReader, pos, size, info.codecId); - - if (status) - return status; - } else if (id == libwebm::kMkvFlagLacing) { - lacing = UnserializeUInt(pReader, pos, size); - - if ((lacing < 0) || (lacing > 1)) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvCodecPrivate) { - delete[] info.codecPrivate; - info.codecPrivate = NULL; - info.codecPrivateSize = 0; - - const size_t buflen = static_cast(size); - - if (buflen) { - unsigned char* buf = SafeArrayAlloc(1, buflen); - - if (buf == NULL) - return -1; - - const int status = pReader->Read(pos, static_cast(buflen), buf); - - if (status) { - delete[] buf; - return status; - } - - info.codecPrivate = buf; - info.codecPrivateSize = buflen; - } - } else if (id == libwebm::kMkvCodecName) { - const long status = - UnserializeString(pReader, pos, size, info.codecNameAsUTF8); - - if (status) - return status; - } else if (id == libwebm::kMkvCodecDelay) { - info.codecDelay = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvSeekPreRoll) { - info.seekPreRoll = UnserializeUInt(pReader, pos, size); - } - - pos += size; // consume payload - if (pos > track_stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != track_stop) - return E_FILE_FORMAT_INVALID; - - if (info.number <= 0) // not specified - return E_FILE_FORMAT_INVALID; - - if (GetTrackByNumber(info.number)) - return E_FILE_FORMAT_INVALID; - - if (info.type <= 0) // not specified - return E_FILE_FORMAT_INVALID; - - info.lacing = (lacing > 0) ? true : false; - - if (info.type == Track::kVideo) { - if (v.start < 0) - return E_FILE_FORMAT_INVALID; - - if (a.start >= 0) - return E_FILE_FORMAT_INVALID; - - info.settings = v; - - VideoTrack* pTrack = NULL; - - const long status = VideoTrack::Parse(m_pSegment, info, element_start, - element_size, pTrack); - - if (status) - return status; - - pResult = pTrack; - assert(pResult); - - if (e.start >= 0) - pResult->ParseContentEncodingsEntry(e.start, e.size); - } else if (info.type == Track::kAudio) { - if (a.start < 0) - return E_FILE_FORMAT_INVALID; - - if (v.start >= 0) - return E_FILE_FORMAT_INVALID; - - info.settings = a; - - AudioTrack* pTrack = NULL; - - const long status = AudioTrack::Parse(m_pSegment, info, element_start, - element_size, pTrack); - - if (status) - return status; - - pResult = pTrack; - assert(pResult); - - if (e.start >= 0) - pResult->ParseContentEncodingsEntry(e.start, e.size); - } else { - // neither video nor audio - probably metadata or subtitles - - if (a.start >= 0) - return E_FILE_FORMAT_INVALID; - - if (v.start >= 0) - return E_FILE_FORMAT_INVALID; - - if (info.type == Track::kMetadata && e.start >= 0) - return E_FILE_FORMAT_INVALID; - - info.settings.start = -1; - info.settings.size = 0; - - Track* pTrack = NULL; - - const long status = - Track::Create(m_pSegment, info, element_start, element_size, pTrack); - - if (status) - return status; - - pResult = pTrack; - assert(pResult); - } - - return 0; // success -} - -Tracks::~Tracks() { - Track** i = m_trackEntries; - Track** const j = m_trackEntriesEnd; - - while (i != j) { - Track* const pTrack = *i++; - delete pTrack; - } - - delete[] m_trackEntries; -} - -const Track* Tracks::GetTrackByNumber(long tn) const { - if (tn < 0) - return NULL; - - Track** i = m_trackEntries; - Track** const j = m_trackEntriesEnd; - - while (i != j) { - Track* const pTrack = *i++; - - if (pTrack == NULL) - continue; - - if (tn == pTrack->GetNumber()) - return pTrack; - } - - return NULL; // not found -} - -const Track* Tracks::GetTrackByIndex(unsigned long idx) const { - const ptrdiff_t count = m_trackEntriesEnd - m_trackEntries; - - if (idx >= static_cast(count)) - return NULL; - - return m_trackEntries[idx]; -} - -long Cluster::Load(long long& pos, long& len) const { - if (m_pSegment == NULL) - return E_PARSE_FAILED; - - if (m_timecode >= 0) // at least partially loaded - return 0; - - if (m_pos != m_element_start || m_element_size >= 0) - return E_PARSE_FAILED; - - IMkvReader* const pReader = m_pSegment->m_pReader; - long long total, avail; - const int status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - if (total >= 0 && (avail > total || m_pos > total)) - return E_FILE_FORMAT_INVALID; - - pos = m_pos; - - long long cluster_size = -1; - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error or underflow - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id_ = ReadID(pReader, pos, len); - - if (id_ < 0) // error - return static_cast(id_); - - if (id_ != libwebm::kMkvCluster) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume id - - // read cluster size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast(cluster_size); - - if (size == 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume length of size of element - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size != unknown_size) - cluster_size = size; - - // pos points to start of payload - long long timecode = -1; - long long new_pos = -1; - bool bBlock = false; - - long long cluster_stop = (cluster_size < 0) ? -1 : pos + cluster_size; - - for (;;) { - if ((cluster_stop >= 0) && (pos >= cluster_stop)) - break; - - // Parse ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) // error - return static_cast(id); - - if (id == 0) - return E_FILE_FORMAT_INVALID; - - // This is the distinguished set of ID's we use to determine - // that we have exhausted the sub-element's inside the cluster - // whose ID we parsed earlier. - - if (id == libwebm::kMkvCluster) - break; - - if (id == libwebm::kMkvCues) - break; - - pos += len; // consume ID field - - // Parse Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume size field - - if ((cluster_stop >= 0) && (pos > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - // pos now points to start of payload - - if (size == 0) - continue; - - if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvTimecode) { - len = static_cast(size); - - if ((pos + size) > avail) - return E_BUFFER_NOT_FULL; - - timecode = UnserializeUInt(pReader, pos, size); - - if (timecode < 0) // error (or underflow) - return static_cast(timecode); - - new_pos = pos + size; - - if (bBlock) - break; - } else if (id == libwebm::kMkvBlockGroup) { - bBlock = true; - break; - } else if (id == libwebm::kMkvSimpleBlock) { - bBlock = true; - break; - } - - pos += size; // consume payload - if (cluster_stop >= 0 && pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - } - - if (cluster_stop >= 0 && pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - - if (timecode < 0) // no timecode found - return E_FILE_FORMAT_INVALID; - - if (!bBlock) - return E_FILE_FORMAT_INVALID; - - m_pos = new_pos; // designates position just beyond timecode payload - m_timecode = timecode; // m_timecode >= 0 means we're partially loaded - - if (cluster_size >= 0) - m_element_size = cluster_stop - m_element_start; - - return 0; -} - -long Cluster::Parse(long long& pos, long& len) const { - long status = Load(pos, len); - - if (status < 0) - return status; - - if (m_pos < m_element_start || m_timecode < 0) - return E_PARSE_FAILED; - - const long long cluster_stop = - (m_element_size < 0) ? -1 : m_element_start + m_element_size; - - if ((cluster_stop >= 0) && (m_pos >= cluster_stop)) - return 1; // nothing else to do - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long total, avail; - - status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - if (total >= 0 && avail > total) - return E_FILE_FORMAT_INVALID; - - pos = m_pos; - - for (;;) { - if ((cluster_stop >= 0) && (pos >= cluster_stop)) - break; - - if ((total >= 0) && (pos >= total)) { - if (m_element_size < 0) - m_element_size = pos - m_element_start; - - break; - } - - // Parse ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - // This is the distinguished set of ID's we use to determine - // that we have exhausted the sub-element's inside the cluster - // whose ID we parsed earlier. - - if ((id == libwebm::kMkvCluster) || (id == libwebm::kMkvCues)) { - if (m_element_size < 0) - m_element_size = pos - m_element_start; - - break; - } - - pos += len; // consume ID field - - // Parse Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume size field - - if ((cluster_stop >= 0) && (pos > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - // pos now points to start of payload - - if (size == 0) - continue; - - // const long long block_start = pos; - const long long block_stop = pos + size; - - if (cluster_stop >= 0) { - if (block_stop > cluster_stop) { - if (id == libwebm::kMkvBlockGroup || id == libwebm::kMkvSimpleBlock) { - return E_FILE_FORMAT_INVALID; - } - - pos = cluster_stop; - break; - } - } else if ((total >= 0) && (block_stop > total)) { - m_element_size = total - m_element_start; - pos = total; - break; - } else if (block_stop > avail) { - len = static_cast(size); - return E_BUFFER_NOT_FULL; - } - - Cluster* const this_ = const_cast(this); - - if (id == libwebm::kMkvBlockGroup) - return this_->ParseBlockGroup(size, pos, len); - - if (id == libwebm::kMkvSimpleBlock) - return this_->ParseSimpleBlock(size, pos, len); - - pos += size; // consume payload - if (cluster_stop >= 0 && pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - } - - if (m_element_size < 1) - return E_FILE_FORMAT_INVALID; - - m_pos = pos; - if (cluster_stop >= 0 && m_pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - - if (m_entries_count > 0) { - const long idx = m_entries_count - 1; - - const BlockEntry* const pLast = m_entries[idx]; - if (pLast == NULL) - return E_PARSE_FAILED; - - const Block* const pBlock = pLast->GetBlock(); - if (pBlock == NULL) - return E_PARSE_FAILED; - - const long long start = pBlock->m_start; - - if ((total >= 0) && (start > total)) - return E_PARSE_FAILED; // defend against trucated stream - - const long long size = pBlock->m_size; - - const long long stop = start + size; - if (cluster_stop >= 0 && stop > cluster_stop) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && (stop > total)) - return E_PARSE_FAILED; // defend against trucated stream - } - - return 1; // no more entries -} - -long Cluster::ParseSimpleBlock(long long block_size, long long& pos, - long& len) { - const long long block_start = pos; - const long long block_stop = pos + block_size; - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long total, avail; - - long status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - // parse track number - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((pos + len) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long track = ReadUInt(pReader, pos, len); - - if (track < 0) // error - return static_cast(track); - - if (track == 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume track number - - if ((pos + 2) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + 2) > avail) { - len = 2; - return E_BUFFER_NOT_FULL; - } - - pos += 2; // consume timecode - - if ((pos + 1) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - unsigned char flags; - - status = pReader->Read(pos, 1, &flags); - - if (status < 0) { // error or underflow - len = 1; - return status; - } - - ++pos; // consume flags byte - assert(pos <= avail); - - if (pos >= block_stop) - return E_FILE_FORMAT_INVALID; - - const int lacing = int(flags & 0x06) >> 1; - - if ((lacing != 0) && (block_stop > avail)) { - len = static_cast(block_stop - pos); - return E_BUFFER_NOT_FULL; - } - - status = CreateBlock(libwebm::kMkvSimpleBlock, block_start, block_size, - 0); // DiscardPadding - - if (status != 0) - return status; - - m_pos = block_stop; - - return 0; // success -} - -long Cluster::ParseBlockGroup(long long payload_size, long long& pos, - long& len) { - const long long payload_start = pos; - const long long payload_stop = pos + payload_size; - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long total, avail; - - long status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - if ((total >= 0) && (payload_stop > total)) - return E_FILE_FORMAT_INVALID; - - if (payload_stop > avail) { - len = static_cast(payload_size); - return E_BUFFER_NOT_FULL; - } - - long long discard_padding = 0; - - while (pos < payload_stop) { - // parse sub-block element ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((pos + len) > payload_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) // error - return static_cast(id); - - if (id == 0) // not a valid ID - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID field - - // Parse Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((pos + len) > payload_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - pos += len; // consume size field - - // pos now points to start of sub-block group payload - - if (pos > payload_stop) - return E_FILE_FORMAT_INVALID; - - if (size == 0) // weird - continue; - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvDiscardPadding) { - status = UnserializeInt(pReader, pos, size, discard_padding); - - if (status < 0) // error - return status; - } - - if (id != libwebm::kMkvBlock) { - pos += size; // consume sub-part of block group - - if (pos > payload_stop) - return E_FILE_FORMAT_INVALID; - - continue; - } - - const long long block_stop = pos + size; - - if (block_stop > payload_stop) - return E_FILE_FORMAT_INVALID; - - // parse track number - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((pos + len) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long track = ReadUInt(pReader, pos, len); - - if (track < 0) // error - return static_cast(track); - - if (track == 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume track number - - if ((pos + 2) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + 2) > avail) { - len = 2; - return E_BUFFER_NOT_FULL; - } - - pos += 2; // consume timecode - - if ((pos + 1) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - unsigned char flags; - - status = pReader->Read(pos, 1, &flags); - - if (status < 0) { // error or underflow - len = 1; - return status; - } - - ++pos; // consume flags byte - assert(pos <= avail); - - if (pos >= block_stop) - return E_FILE_FORMAT_INVALID; - - const int lacing = int(flags & 0x06) >> 1; - - if ((lacing != 0) && (block_stop > avail)) { - len = static_cast(block_stop - pos); - return E_BUFFER_NOT_FULL; - } - - pos = block_stop; // consume block-part of block group - if (pos > payload_stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != payload_stop) - return E_FILE_FORMAT_INVALID; - - status = CreateBlock(libwebm::kMkvBlockGroup, payload_start, payload_size, - discard_padding); - if (status != 0) - return status; - - m_pos = payload_stop; - - return 0; // success -} - -long Cluster::GetEntry(long index, const mkvparser::BlockEntry*& pEntry) const { - assert(m_pos >= m_element_start); - - pEntry = NULL; - - if (index < 0) - return -1; // generic error - - if (m_entries_count < 0) - return E_BUFFER_NOT_FULL; - - assert(m_entries); - assert(m_entries_size > 0); - assert(m_entries_count <= m_entries_size); - - if (index < m_entries_count) { - pEntry = m_entries[index]; - assert(pEntry); - - return 1; // found entry - } - - if (m_element_size < 0) // we don't know cluster end yet - return E_BUFFER_NOT_FULL; // underflow - - const long long element_stop = m_element_start + m_element_size; - - if (m_pos >= element_stop) - return 0; // nothing left to parse - - return E_BUFFER_NOT_FULL; // underflow, since more remains to be parsed -} - -Cluster* Cluster::Create(Segment* pSegment, long idx, long long off) { - if (!pSegment || off < 0) - return NULL; - - const long long element_start = pSegment->m_start + off; - - Cluster* const pCluster = - new (std::nothrow) Cluster(pSegment, idx, element_start); - - return pCluster; -} - -Cluster::Cluster() - : m_pSegment(NULL), - m_element_start(0), - m_index(0), - m_pos(0), - m_element_size(0), - m_timecode(0), - m_entries(NULL), - m_entries_size(0), - m_entries_count(0) // means "no entries" -{} - -Cluster::Cluster(Segment* pSegment, long idx, long long element_start - /* long long element_size */) - : m_pSegment(pSegment), - m_element_start(element_start), - m_index(idx), - m_pos(element_start), - m_element_size(-1 /* element_size */), - m_timecode(-1), - m_entries(NULL), - m_entries_size(0), - m_entries_count(-1) // means "has not been parsed yet" -{} - -Cluster::~Cluster() { - if (m_entries_count <= 0) { - delete[] m_entries; - return; - } - - BlockEntry** i = m_entries; - BlockEntry** const j = m_entries + m_entries_count; - - while (i != j) { - BlockEntry* p = *i++; - assert(p); - - delete p; - } - - delete[] m_entries; -} - -bool Cluster::EOS() const { return (m_pSegment == NULL); } - -long Cluster::GetIndex() const { return m_index; } - -long long Cluster::GetPosition() const { - const long long pos = m_element_start - m_pSegment->m_start; - assert(pos >= 0); - - return pos; -} - -long long Cluster::GetElementSize() const { return m_element_size; } - -long Cluster::HasBlockEntries( - const Segment* pSegment, - long long off, // relative to start of segment payload - long long& pos, long& len) { - assert(pSegment); - assert(off >= 0); // relative to segment - - IMkvReader* const pReader = pSegment->m_pReader; - - long long total, avail; - - long status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - pos = pSegment->m_start + off; // absolute - - if ((total >= 0) && (pos >= total)) - return 0; // we don't even have a complete cluster - - const long long segment_stop = - (pSegment->m_size < 0) ? -1 : pSegment->m_start + pSegment->m_size; - - long long cluster_stop = -1; // interpreted later to mean "unknown size" - - { - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // need more data - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && ((pos + len) > total)) - return 0; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) // error - return static_cast(id); - - if (id != libwebm::kMkvCluster) - return E_PARSE_FAILED; - - pos += len; // consume Cluster ID field - - // read size field - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && ((pos + len) > total)) - return 0; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - if (size == 0) - return 0; // cluster does not have entries - - pos += len; // consume size field - - // pos now points to start of payload - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size != unknown_size) { - cluster_stop = pos + size; - assert(cluster_stop >= 0); - - if ((segment_stop >= 0) && (cluster_stop > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && (cluster_stop > total)) - // return E_FILE_FORMAT_INVALID; //too conservative - return 0; // cluster does not have any entries - } - } - - for (;;) { - if ((cluster_stop >= 0) && (pos >= cluster_stop)) - return 0; // no entries detected - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // need more data - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) // error - return static_cast(id); - - // This is the distinguished set of ID's we use to determine - // that we have exhausted the sub-element's inside the cluster - // whose ID we parsed earlier. - - if (id == libwebm::kMkvCluster) - return 0; // no entries found - - if (id == libwebm::kMkvCues) - return 0; // no entries found - - pos += len; // consume id field - - if ((cluster_stop >= 0) && (pos >= cluster_stop)) - return E_FILE_FORMAT_INVALID; - - // read size field - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast(result); - - if (result > 0) // underflow - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast(size); - - pos += len; // consume size field - - // pos now points to start of payload - - if ((cluster_stop >= 0) && (pos > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if (size == 0) // weird - continue; - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; // not supported inside cluster - - if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvBlockGroup) - return 1; // have at least one entry - - if (id == libwebm::kMkvSimpleBlock) - return 1; // have at least one entry - - pos += size; // consume payload - if (cluster_stop >= 0 && pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - } -} - -long long Cluster::GetTimeCode() const { - long long pos; - long len; - - const long status = Load(pos, len); - - if (status < 0) // error - return status; - - return m_timecode; -} - -long long Cluster::GetTime() const { - const long long tc = GetTimeCode(); - - if (tc < 0) - return tc; - - const SegmentInfo* const pInfo = m_pSegment->GetInfo(); - assert(pInfo); - - const long long scale = pInfo->GetTimeCodeScale(); - assert(scale >= 1); - - const long long t = m_timecode * scale; - - return t; -} - -long long Cluster::GetFirstTime() const { - const BlockEntry* pEntry; - - const long status = GetFirst(pEntry); - - if (status < 0) // error - return status; - - if (pEntry == NULL) // empty cluster - return GetTime(); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - return pBlock->GetTime(this); -} - -long long Cluster::GetLastTime() const { - const BlockEntry* pEntry; - - const long status = GetLast(pEntry); - - if (status < 0) // error - return status; - - if (pEntry == NULL) // empty cluster - return GetTime(); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - return pBlock->GetTime(this); -} - -long Cluster::CreateBlock(long long id, - long long pos, // absolute pos of payload - long long size, long long discard_padding) { - if (id != libwebm::kMkvBlockGroup && id != libwebm::kMkvSimpleBlock) - return E_PARSE_FAILED; - - if (m_entries_count < 0) { // haven't parsed anything yet - assert(m_entries == NULL); - assert(m_entries_size == 0); - - m_entries_size = 1024; - m_entries = new (std::nothrow) BlockEntry*[m_entries_size]; - if (m_entries == NULL) - return -1; - - m_entries_count = 0; - } else { - assert(m_entries); - assert(m_entries_size > 0); - assert(m_entries_count <= m_entries_size); - - if (m_entries_count >= m_entries_size) { - const long entries_size = 2 * m_entries_size; - - BlockEntry** const entries = new (std::nothrow) BlockEntry*[entries_size]; - if (entries == NULL) - return -1; - - BlockEntry** src = m_entries; - BlockEntry** const src_end = src + m_entries_count; - - BlockEntry** dst = entries; - - while (src != src_end) - *dst++ = *src++; - - delete[] m_entries; - - m_entries = entries; - m_entries_size = entries_size; - } - } - - if (id == libwebm::kMkvBlockGroup) - return CreateBlockGroup(pos, size, discard_padding); - else - return CreateSimpleBlock(pos, size); -} - -long Cluster::CreateBlockGroup(long long start_offset, long long size, - long long discard_padding) { - assert(m_entries); - assert(m_entries_size > 0); - assert(m_entries_count >= 0); - assert(m_entries_count < m_entries_size); - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = start_offset; - const long long stop = start_offset + size; - - // For WebM files, there is a bias towards previous reference times - //(in order to support alt-ref frames, which refer back to the previous - // keyframe). Normally a 0 value is not possible, but here we tenatively - // allow 0 as the value of a reference frame, with the interpretation - // that this is a "previous" reference time. - - long long prev = 1; // nonce - long long next = 0; // nonce - long long duration = -1; // really, this is unsigned - - long long bpos = -1; - long long bsize = -1; - - while (pos < stop) { - long len; - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (pos + len) > stop) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - assert(size >= 0); // TODO - assert((pos + len) <= stop); - - pos += len; // consume size - - if (id == libwebm::kMkvBlock) { - if (bpos < 0) { // Block ID - bpos = pos; - bsize = size; - } - } else if (id == libwebm::kMkvBlockDuration) { - if (size > 8) - return E_FILE_FORMAT_INVALID; - - duration = UnserializeUInt(pReader, pos, size); - - if (duration < 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvReferenceBlock) { - if (size > 8 || size <= 0) - return E_FILE_FORMAT_INVALID; - const long size_ = static_cast(size); - - long long time; - - long status = UnserializeInt(pReader, pos, size_, time); - assert(status == 0); - if (status != 0) - return -1; - - if (time <= 0) // see note above - prev = time; - else - next = time; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - if (bpos < 0) - return E_FILE_FORMAT_INVALID; - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - assert(bsize >= 0); - - const long idx = m_entries_count; - - BlockEntry** const ppEntry = m_entries + idx; - BlockEntry*& pEntry = *ppEntry; - - pEntry = new (std::nothrow) - BlockGroup(this, idx, bpos, bsize, prev, next, duration, discard_padding); - - if (pEntry == NULL) - return -1; // generic error - - BlockGroup* const p = static_cast(pEntry); - - const long status = p->Parse(); - - if (status == 0) { // success - ++m_entries_count; - return 0; - } - - delete pEntry; - pEntry = 0; - - return status; -} - -long Cluster::CreateSimpleBlock(long long st, long long sz) { - assert(m_entries); - assert(m_entries_size > 0); - assert(m_entries_count >= 0); - assert(m_entries_count < m_entries_size); - - const long idx = m_entries_count; - - BlockEntry** const ppEntry = m_entries + idx; - BlockEntry*& pEntry = *ppEntry; - - pEntry = new (std::nothrow) SimpleBlock(this, idx, st, sz); - - if (pEntry == NULL) - return -1; // generic error - - SimpleBlock* const p = static_cast(pEntry); - - const long status = p->Parse(); - - if (status == 0) { - ++m_entries_count; - return 0; - } - - delete pEntry; - pEntry = 0; - - return status; -} - -long Cluster::GetFirst(const BlockEntry*& pFirst) const { - if (m_entries_count <= 0) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) { // error - pFirst = NULL; - return status; - } - - if (m_entries_count <= 0) { // empty cluster - pFirst = NULL; - return 0; - } - } - - assert(m_entries); - - pFirst = m_entries[0]; - assert(pFirst); - - return 0; // success -} - -long Cluster::GetLast(const BlockEntry*& pLast) const { - for (;;) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) { // error - pLast = NULL; - return status; - } - - if (status > 0) // no new block - break; - } - - if (m_entries_count <= 0) { - pLast = NULL; - return 0; - } - - assert(m_entries); - - const long idx = m_entries_count - 1; - - pLast = m_entries[idx]; - assert(pLast); - - return 0; -} - -long Cluster::GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const { - assert(pCurr); - assert(m_entries); - assert(m_entries_count > 0); - - size_t idx = pCurr->GetIndex(); - assert(idx < size_t(m_entries_count)); - assert(m_entries[idx] == pCurr); - - ++idx; - - if (idx >= size_t(m_entries_count)) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) { // error - pNext = NULL; - return status; - } - - if (status > 0) { - pNext = NULL; - return 0; - } - - assert(m_entries); - assert(m_entries_count > 0); - assert(idx < size_t(m_entries_count)); - } - - pNext = m_entries[idx]; - assert(pNext); - - return 0; -} - -long Cluster::GetEntryCount() const { return m_entries_count; } - -const BlockEntry* Cluster::GetEntry(const Track* pTrack, - long long time_ns) const { - assert(pTrack); - - if (m_pSegment == NULL) // this is the special EOS cluster - return pTrack->GetEOS(); - - const BlockEntry* pResult = pTrack->GetEOS(); - - long index = 0; - - for (;;) { - if (index >= m_entries_count) { - long long pos; - long len; - - const long status = Parse(pos, len); - assert(status >= 0); - - if (status > 0) // completely parsed, and no more entries - return pResult; - - if (status < 0) // should never happen - return 0; - - assert(m_entries); - assert(index < m_entries_count); - } - - const BlockEntry* const pEntry = m_entries[index]; - assert(pEntry); - assert(!pEntry->EOS()); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - if (pBlock->GetTrackNumber() != pTrack->GetNumber()) { - ++index; - continue; - } - - if (pTrack->VetEntry(pEntry)) { - if (time_ns < 0) // just want first candidate block - return pEntry; - - const long long ns = pBlock->GetTime(this); - - if (ns > time_ns) - return pResult; - - pResult = pEntry; // have a candidate - } else if (time_ns >= 0) { - const long long ns = pBlock->GetTime(this); - - if (ns > time_ns) - return pResult; - } - - ++index; - } -} - -const BlockEntry* Cluster::GetEntry(const CuePoint& cp, - const CuePoint::TrackPosition& tp) const { - assert(m_pSegment); - const long long tc = cp.GetTimeCode(); - - if (tp.m_block > 0) { - const long block = static_cast(tp.m_block); - const long index = block - 1; - - while (index >= m_entries_count) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) // TODO: can this happen? - return NULL; - - if (status > 0) // nothing remains to be parsed - return NULL; - } - - const BlockEntry* const pEntry = m_entries[index]; - assert(pEntry); - assert(!pEntry->EOS()); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - if ((pBlock->GetTrackNumber() == tp.m_track) && - (pBlock->GetTimeCode(this) == tc)) { - return pEntry; - } - } - - long index = 0; - - for (;;) { - if (index >= m_entries_count) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) // TODO: can this happen? - return NULL; - - if (status > 0) // nothing remains to be parsed - return NULL; - - assert(m_entries); - assert(index < m_entries_count); - } - - const BlockEntry* const pEntry = m_entries[index]; - assert(pEntry); - assert(!pEntry->EOS()); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - if (pBlock->GetTrackNumber() != tp.m_track) { - ++index; - continue; - } - - const long long tc_ = pBlock->GetTimeCode(this); - - if (tc_ < tc) { - ++index; - continue; - } - - if (tc_ > tc) - return NULL; - - const Tracks* const pTracks = m_pSegment->GetTracks(); - assert(pTracks); - - const long tn = static_cast(tp.m_track); - const Track* const pTrack = pTracks->GetTrackByNumber(tn); - - if (pTrack == NULL) - return NULL; - - const long long type = pTrack->GetType(); - - if (type == 2) // audio - return pEntry; - - if (type != 1) // not video - return NULL; - - if (!pBlock->IsKey()) - return NULL; - - return pEntry; - } -} - -BlockEntry::BlockEntry(Cluster* p, long idx) : m_pCluster(p), m_index(idx) {} -BlockEntry::~BlockEntry() {} -const Cluster* BlockEntry::GetCluster() const { return m_pCluster; } -long BlockEntry::GetIndex() const { return m_index; } - -SimpleBlock::SimpleBlock(Cluster* pCluster, long idx, long long start, - long long size) - : BlockEntry(pCluster, idx), m_block(start, size, 0) {} - -long SimpleBlock::Parse() { return m_block.Parse(m_pCluster); } -BlockEntry::Kind SimpleBlock::GetKind() const { return kBlockSimple; } -const Block* SimpleBlock::GetBlock() const { return &m_block; } - -BlockGroup::BlockGroup(Cluster* pCluster, long idx, long long block_start, - long long block_size, long long prev, long long next, - long long duration, long long discard_padding) - : BlockEntry(pCluster, idx), - m_block(block_start, block_size, discard_padding), - m_prev(prev), - m_next(next), - m_duration(duration) {} - -long BlockGroup::Parse() { - const long status = m_block.Parse(m_pCluster); - - if (status) - return status; - - m_block.SetKey((m_prev > 0) && (m_next <= 0)); - - return 0; -} - -BlockEntry::Kind BlockGroup::GetKind() const { return kBlockGroup; } -const Block* BlockGroup::GetBlock() const { return &m_block; } -long long BlockGroup::GetPrevTimeCode() const { return m_prev; } -long long BlockGroup::GetNextTimeCode() const { return m_next; } -long long BlockGroup::GetDurationTimeCode() const { return m_duration; } - -Block::Block(long long start, long long size_, long long discard_padding) - : m_start(start), - m_size(size_), - m_track(0), - m_timecode(-1), - m_flags(0), - m_frames(NULL), - m_frame_count(-1), - m_discard_padding(discard_padding) {} - -Block::~Block() { delete[] m_frames; } - -long Block::Parse(const Cluster* pCluster) { - if (pCluster == NULL) - return -1; - - if (pCluster->m_pSegment == NULL) - return -1; - - assert(m_start >= 0); - assert(m_size >= 0); - assert(m_track <= 0); - assert(m_frames == NULL); - assert(m_frame_count <= 0); - - long long pos = m_start; - const long long stop = m_start + m_size; - - long len; - - IMkvReader* const pReader = pCluster->m_pSegment->m_pReader; - - m_track = ReadUInt(pReader, pos, len); - - if (m_track <= 0) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > stop) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume track number - - if ((stop - pos) < 2) - return E_FILE_FORMAT_INVALID; - - long status; - long long value; - - status = UnserializeInt(pReader, pos, 2, value); - - if (status) - return E_FILE_FORMAT_INVALID; - - if (value < SHRT_MIN) - return E_FILE_FORMAT_INVALID; - - if (value > SHRT_MAX) - return E_FILE_FORMAT_INVALID; - - m_timecode = static_cast(value); - - pos += 2; - - if ((stop - pos) <= 0) - return E_FILE_FORMAT_INVALID; - - status = pReader->Read(pos, 1, &m_flags); - - if (status) - return E_FILE_FORMAT_INVALID; - - const int lacing = int(m_flags & 0x06) >> 1; - - ++pos; // consume flags byte - - if (lacing == 0) { // no lacing - if (pos > stop) - return E_FILE_FORMAT_INVALID; - - m_frame_count = 1; - m_frames = new (std::nothrow) Frame[m_frame_count]; - if (m_frames == NULL) - return -1; - - Frame& f = m_frames[0]; - f.pos = pos; - - const long long frame_size = stop - pos; - - if (frame_size > LONG_MAX || frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - f.len = static_cast(frame_size); - - return 0; // success - } - - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - unsigned char biased_count; - - status = pReader->Read(pos, 1, &biased_count); - - if (status) - return E_FILE_FORMAT_INVALID; - - ++pos; // consume frame count - if (pos > stop) - return E_FILE_FORMAT_INVALID; - - m_frame_count = int(biased_count) + 1; - - m_frames = new (std::nothrow) Frame[m_frame_count]; - if (m_frames == NULL) - return -1; - - if (!m_frames) - return E_FILE_FORMAT_INVALID; - - if (lacing == 1) { // Xiph - Frame* pf = m_frames; - Frame* const pf_end = pf + m_frame_count; - - long long size = 0; - int frame_count = m_frame_count; - - while (frame_count > 1) { - long frame_size = 0; - - for (;;) { - unsigned char val; - - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - status = pReader->Read(pos, 1, &val); - - if (status) - return E_FILE_FORMAT_INVALID; - - ++pos; // consume xiph size byte - - frame_size += val; - - if (val < 255) - break; - } - - Frame& f = *pf++; - assert(pf < pf_end); - if (pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - f.pos = 0; // patch later - - if (frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - f.len = frame_size; - size += frame_size; // contribution of this frame - - --frame_count; - } - - if (pf >= pf_end || pos > stop) - return E_FILE_FORMAT_INVALID; - - { - Frame& f = *pf++; - - if (pf != pf_end) - return E_FILE_FORMAT_INVALID; - - f.pos = 0; // patch later - - const long long total_size = stop - pos; - - if (total_size < size) - return E_FILE_FORMAT_INVALID; - - const long long frame_size = total_size - size; - - if (frame_size > LONG_MAX || frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - f.len = static_cast(frame_size); - } - - pf = m_frames; - while (pf != pf_end) { - Frame& f = *pf++; - assert((pos + f.len) <= stop); - - if ((pos + f.len) > stop) - return E_FILE_FORMAT_INVALID; - - f.pos = pos; - pos += f.len; - } - - assert(pos == stop); - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - } else if (lacing == 2) { // fixed-size lacing - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - const long long total_size = stop - pos; - - if ((total_size % m_frame_count) != 0) - return E_FILE_FORMAT_INVALID; - - const long long frame_size = total_size / m_frame_count; - - if (frame_size > LONG_MAX || frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - Frame* pf = m_frames; - Frame* const pf_end = pf + m_frame_count; - - while (pf != pf_end) { - assert((pos + frame_size) <= stop); - if ((pos + frame_size) > stop) - return E_FILE_FORMAT_INVALID; - - Frame& f = *pf++; - - f.pos = pos; - f.len = static_cast(frame_size); - - pos += frame_size; - } - - assert(pos == stop); - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - } else { - assert(lacing == 3); // EBML lacing - - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - long long size = 0; - int frame_count = m_frame_count; - - long long frame_size = ReadUInt(pReader, pos, len); - - if (frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - if (frame_size > LONG_MAX) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > stop) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume length of size of first frame - - if ((pos + frame_size) > stop) - return E_FILE_FORMAT_INVALID; - - Frame* pf = m_frames; - Frame* const pf_end = pf + m_frame_count; - - { - Frame& curr = *pf; - - curr.pos = 0; // patch later - - curr.len = static_cast(frame_size); - size += curr.len; // contribution of this frame - } - - --frame_count; - - while (frame_count > 1) { - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - assert(pf < pf_end); - if (pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - const Frame& prev = *pf++; - assert(prev.len == frame_size); - if (prev.len != frame_size) - return E_FILE_FORMAT_INVALID; - - assert(pf < pf_end); - if (pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - Frame& curr = *pf; - - curr.pos = 0; // patch later - - const long long delta_size_ = ReadUInt(pReader, pos, len); - - if (delta_size_ < 0) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > stop) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume length of (delta) size - if (pos > stop) - return E_FILE_FORMAT_INVALID; - - const long exp = 7 * len - 1; - const long long bias = (1LL << exp) - 1LL; - const long long delta_size = delta_size_ - bias; - - frame_size += delta_size; - - if (frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - if (frame_size > LONG_MAX) - return E_FILE_FORMAT_INVALID; - - curr.len = static_cast(frame_size); - size += curr.len; // contribution of this frame - - --frame_count; - } - - // parse last frame - if (frame_count > 0) { - if (pos > stop || pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - const Frame& prev = *pf++; - assert(prev.len == frame_size); - if (prev.len != frame_size) - return E_FILE_FORMAT_INVALID; - - if (pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - Frame& curr = *pf++; - if (pf != pf_end) - return E_FILE_FORMAT_INVALID; - - curr.pos = 0; // patch later - - const long long total_size = stop - pos; - - if (total_size < size) - return E_FILE_FORMAT_INVALID; - - frame_size = total_size - size; - - if (frame_size > LONG_MAX || frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - curr.len = static_cast(frame_size); - } - - pf = m_frames; - while (pf != pf_end) { - Frame& f = *pf++; - if ((pos + f.len) > stop) - return E_FILE_FORMAT_INVALID; - - f.pos = pos; - pos += f.len; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - } - - return 0; // success -} - -long long Block::GetTimeCode(const Cluster* pCluster) const { - if (pCluster == 0) - return m_timecode; - - const long long tc0 = pCluster->GetTimeCode(); - assert(tc0 >= 0); - - const long long tc = tc0 + m_timecode; - - return tc; // unscaled timecode units -} - -long long Block::GetTime(const Cluster* pCluster) const { - assert(pCluster); - - const long long tc = GetTimeCode(pCluster); - - const Segment* const pSegment = pCluster->m_pSegment; - const SegmentInfo* const pInfo = pSegment->GetInfo(); - assert(pInfo); - - const long long scale = pInfo->GetTimeCodeScale(); - assert(scale >= 1); - - const long long ns = tc * scale; - - return ns; -} - -long long Block::GetTrackNumber() const { return m_track; } - -bool Block::IsKey() const { - return ((m_flags & static_cast(1 << 7)) != 0); -} - -void Block::SetKey(bool bKey) { - if (bKey) - m_flags |= static_cast(1 << 7); - else - m_flags &= 0x7F; -} - -bool Block::IsInvisible() const { return bool(int(m_flags & 0x08) != 0); } - -Block::Lacing Block::GetLacing() const { - const int value = int(m_flags & 0x06) >> 1; - return static_cast(value); -} - -int Block::GetFrameCount() const { return m_frame_count; } - -const Block::Frame& Block::GetFrame(int idx) const { - assert(idx >= 0); - assert(idx < m_frame_count); - - const Frame& f = m_frames[idx]; - assert(f.pos > 0); - assert(f.len > 0); - - return f; -} - -long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const { - assert(pReader); - assert(buf); - - const long status = pReader->Read(pos, len, buf); - return status; -} - -long long Block::GetDiscardPadding() const { return m_discard_padding; } - -} // namespace mkvparser diff --git a/media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvparser.h b/media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvparser.h deleted file mode 100644 index 26c2b7e5ebf5..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvparser.h +++ /dev/null @@ -1,1145 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -#ifndef MKVPARSER_MKVPARSER_H_ -#define MKVPARSER_MKVPARSER_H_ - -#include - -namespace mkvparser { - -const int E_PARSE_FAILED = -1; -const int E_FILE_FORMAT_INVALID = -2; -const int E_BUFFER_NOT_FULL = -3; - -class IMkvReader { - public: - virtual int Read(long long pos, long len, unsigned char* buf) = 0; - virtual int Length(long long* total, long long* available) = 0; - - protected: - virtual ~IMkvReader(); -}; - -template -Type* SafeArrayAlloc(unsigned long long num_elements, - unsigned long long element_size); -long long GetUIntLength(IMkvReader*, long long, long&); -long long ReadUInt(IMkvReader*, long long, long&); -long long ReadID(IMkvReader* pReader, long long pos, long& len); -long long UnserializeUInt(IMkvReader*, long long pos, long long size); - -long UnserializeFloat(IMkvReader*, long long pos, long long size, double&); -long UnserializeInt(IMkvReader*, long long pos, long long size, - long long& result); - -long UnserializeString(IMkvReader*, long long pos, long long size, char*& str); - -long ParseElementHeader(IMkvReader* pReader, - long long& pos, // consume id and size fields - long long stop, // if you know size of element's parent - long long& id, long long& size); - -bool Match(IMkvReader*, long long&, unsigned long, long long&); -bool Match(IMkvReader*, long long&, unsigned long, unsigned char*&, size_t&); - -void GetVersion(int& major, int& minor, int& build, int& revision); - -struct EBMLHeader { - EBMLHeader(); - ~EBMLHeader(); - long long m_version; - long long m_readVersion; - long long m_maxIdLength; - long long m_maxSizeLength; - char* m_docType; - long long m_docTypeVersion; - long long m_docTypeReadVersion; - - long long Parse(IMkvReader*, long long&); - void Init(); -}; - -class Segment; -class Track; -class Cluster; - -class Block { - Block(const Block&); - Block& operator=(const Block&); - - public: - const long long m_start; - const long long m_size; - - Block(long long start, long long size, long long discard_padding); - ~Block(); - - long Parse(const Cluster*); - - long long GetTrackNumber() const; - long long GetTimeCode(const Cluster*) const; // absolute, but not scaled - long long GetTime(const Cluster*) const; // absolute, and scaled (ns) - bool IsKey() const; - void SetKey(bool); - bool IsInvisible() const; - - enum Lacing { kLacingNone, kLacingXiph, kLacingFixed, kLacingEbml }; - Lacing GetLacing() const; - - int GetFrameCount() const; // to index frames: [0, count) - - struct Frame { - long long pos; // absolute offset - long len; - - long Read(IMkvReader*, unsigned char*) const; - }; - - const Frame& GetFrame(int frame_index) const; - - long long GetDiscardPadding() const; - - private: - long long m_track; // Track::Number() - short m_timecode; // relative to cluster - unsigned char m_flags; - - Frame* m_frames; - int m_frame_count; - - protected: - const long long m_discard_padding; -}; - -class BlockEntry { - BlockEntry(const BlockEntry&); - BlockEntry& operator=(const BlockEntry&); - - protected: - BlockEntry(Cluster*, long index); - - public: - virtual ~BlockEntry(); - - bool EOS() const { return (GetKind() == kBlockEOS); } - const Cluster* GetCluster() const; - long GetIndex() const; - virtual const Block* GetBlock() const = 0; - - enum Kind { kBlockEOS, kBlockSimple, kBlockGroup }; - virtual Kind GetKind() const = 0; - - protected: - Cluster* const m_pCluster; - const long m_index; -}; - -class SimpleBlock : public BlockEntry { - SimpleBlock(const SimpleBlock&); - SimpleBlock& operator=(const SimpleBlock&); - - public: - SimpleBlock(Cluster*, long index, long long start, long long size); - long Parse(); - - Kind GetKind() const; - const Block* GetBlock() const; - - protected: - Block m_block; -}; - -class BlockGroup : public BlockEntry { - BlockGroup(const BlockGroup&); - BlockGroup& operator=(const BlockGroup&); - - public: - BlockGroup(Cluster*, long index, - long long block_start, // absolute pos of block's payload - long long block_size, // size of block's payload - long long prev, long long next, long long duration, - long long discard_padding); - - long Parse(); - - Kind GetKind() const; - const Block* GetBlock() const; - - long long GetPrevTimeCode() const; // relative to block's time - long long GetNextTimeCode() const; // as above - long long GetDurationTimeCode() const; - - private: - Block m_block; - const long long m_prev; - const long long m_next; - const long long m_duration; -}; - -/////////////////////////////////////////////////////////////// -// ContentEncoding element -// Elements used to describe if the track data has been encrypted or -// compressed with zlib or header stripping. -class ContentEncoding { - public: - enum { kCTR = 1 }; - - ContentEncoding(); - ~ContentEncoding(); - - // ContentCompression element names - struct ContentCompression { - ContentCompression(); - ~ContentCompression(); - - unsigned long long algo; - unsigned char* settings; - long long settings_len; - }; - - // ContentEncAESSettings element names - struct ContentEncAESSettings { - ContentEncAESSettings() : cipher_mode(kCTR) {} - ~ContentEncAESSettings() {} - - unsigned long long cipher_mode; - }; - - // ContentEncryption element names - struct ContentEncryption { - ContentEncryption(); - ~ContentEncryption(); - - unsigned long long algo; - unsigned char* key_id; - long long key_id_len; - unsigned char* signature; - long long signature_len; - unsigned char* sig_key_id; - long long sig_key_id_len; - unsigned long long sig_algo; - unsigned long long sig_hash_algo; - - ContentEncAESSettings aes_settings; - }; - - // Returns ContentCompression represented by |idx|. Returns NULL if |idx| - // is out of bounds. - const ContentCompression* GetCompressionByIndex(unsigned long idx) const; - - // Returns number of ContentCompression elements in this ContentEncoding - // element. - unsigned long GetCompressionCount() const; - - // Parses the ContentCompression element from |pReader|. |start| is the - // starting offset of the ContentCompression payload. |size| is the size in - // bytes of the ContentCompression payload. |compression| is where the parsed - // values will be stored. - long ParseCompressionEntry(long long start, long long size, - IMkvReader* pReader, - ContentCompression* compression); - - // Returns ContentEncryption represented by |idx|. Returns NULL if |idx| - // is out of bounds. - const ContentEncryption* GetEncryptionByIndex(unsigned long idx) const; - - // Returns number of ContentEncryption elements in this ContentEncoding - // element. - unsigned long GetEncryptionCount() const; - - // Parses the ContentEncAESSettings element from |pReader|. |start| is the - // starting offset of the ContentEncAESSettings payload. |size| is the - // size in bytes of the ContentEncAESSettings payload. |encryption| is - // where the parsed values will be stored. - long ParseContentEncAESSettingsEntry(long long start, long long size, - IMkvReader* pReader, - ContentEncAESSettings* aes); - - // Parses the ContentEncoding element from |pReader|. |start| is the - // starting offset of the ContentEncoding payload. |size| is the size in - // bytes of the ContentEncoding payload. Returns true on success. - long ParseContentEncodingEntry(long long start, long long size, - IMkvReader* pReader); - - // Parses the ContentEncryption element from |pReader|. |start| is the - // starting offset of the ContentEncryption payload. |size| is the size in - // bytes of the ContentEncryption payload. |encryption| is where the parsed - // values will be stored. - long ParseEncryptionEntry(long long start, long long size, - IMkvReader* pReader, ContentEncryption* encryption); - - unsigned long long encoding_order() const { return encoding_order_; } - unsigned long long encoding_scope() const { return encoding_scope_; } - unsigned long long encoding_type() const { return encoding_type_; } - - private: - // Member variables for list of ContentCompression elements. - ContentCompression** compression_entries_; - ContentCompression** compression_entries_end_; - - // Member variables for list of ContentEncryption elements. - ContentEncryption** encryption_entries_; - ContentEncryption** encryption_entries_end_; - - // ContentEncoding element names - unsigned long long encoding_order_; - unsigned long long encoding_scope_; - unsigned long long encoding_type_; - - // LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding); - ContentEncoding(const ContentEncoding&); - ContentEncoding& operator=(const ContentEncoding&); -}; - -class Track { - Track(const Track&); - Track& operator=(const Track&); - - public: - class Info; - static long Create(Segment*, const Info&, long long element_start, - long long element_size, Track*&); - - enum Type { kVideo = 1, kAudio = 2, kSubtitle = 0x11, kMetadata = 0x21 }; - - Segment* const m_pSegment; - const long long m_element_start; - const long long m_element_size; - virtual ~Track(); - - long GetType() const; - long GetNumber() const; - unsigned long long GetUid() const; - const char* GetNameAsUTF8() const; - const char* GetLanguage() const; - const char* GetCodecNameAsUTF8() const; - const char* GetCodecId() const; - const unsigned char* GetCodecPrivate(size_t&) const; - bool GetLacing() const; - unsigned long long GetDefaultDuration() const; - unsigned long long GetCodecDelay() const; - unsigned long long GetSeekPreRoll() const; - - const BlockEntry* GetEOS() const; - - struct Settings { - long long start; - long long size; - }; - - class Info { - public: - Info(); - ~Info(); - int Copy(Info&) const; - void Clear(); - long type; - long number; - unsigned long long uid; - unsigned long long defaultDuration; - unsigned long long codecDelay; - unsigned long long seekPreRoll; - char* nameAsUTF8; - char* language; - char* codecId; - char* codecNameAsUTF8; - unsigned char* codecPrivate; - size_t codecPrivateSize; - bool lacing; - Settings settings; - - private: - Info(const Info&); - Info& operator=(const Info&); - int CopyStr(char* Info::*str, Info&) const; - }; - - long GetFirst(const BlockEntry*&) const; - long GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const; - virtual bool VetEntry(const BlockEntry*) const; - virtual long Seek(long long time_ns, const BlockEntry*&) const; - - const ContentEncoding* GetContentEncodingByIndex(unsigned long idx) const; - unsigned long GetContentEncodingCount() const; - - long ParseContentEncodingsEntry(long long start, long long size); - - protected: - Track(Segment*, long long element_start, long long element_size); - - Info m_info; - - class EOSBlock : public BlockEntry { - public: - EOSBlock(); - - Kind GetKind() const; - const Block* GetBlock() const; - }; - - EOSBlock m_eos; - - private: - ContentEncoding** content_encoding_entries_; - ContentEncoding** content_encoding_entries_end_; -}; - -struct PrimaryChromaticity { - PrimaryChromaticity() : x(0), y(0) {} - ~PrimaryChromaticity() {} - static bool Parse(IMkvReader* reader, long long read_pos, - long long value_size, bool is_x, - PrimaryChromaticity** chromaticity); - float x; - float y; -}; - -struct MasteringMetadata { - static const float kValueNotPresent; - - MasteringMetadata() - : r(NULL), - g(NULL), - b(NULL), - white_point(NULL), - luminance_max(kValueNotPresent), - luminance_min(kValueNotPresent) {} - ~MasteringMetadata() { - delete r; - delete g; - delete b; - delete white_point; - } - - static bool Parse(IMkvReader* reader, long long element_start, - long long element_size, - MasteringMetadata** mastering_metadata); - - PrimaryChromaticity* r; - PrimaryChromaticity* g; - PrimaryChromaticity* b; - PrimaryChromaticity* white_point; - float luminance_max; - float luminance_min; -}; - -struct Colour { - static const long long kValueNotPresent; - - // Unless otherwise noted all values assigned upon construction are the - // equivalent of unspecified/default. - Colour() - : matrix_coefficients(kValueNotPresent), - bits_per_channel(kValueNotPresent), - chroma_subsampling_horz(kValueNotPresent), - chroma_subsampling_vert(kValueNotPresent), - cb_subsampling_horz(kValueNotPresent), - cb_subsampling_vert(kValueNotPresent), - chroma_siting_horz(kValueNotPresent), - chroma_siting_vert(kValueNotPresent), - range(kValueNotPresent), - transfer_characteristics(kValueNotPresent), - primaries(kValueNotPresent), - max_cll(kValueNotPresent), - max_fall(kValueNotPresent), - mastering_metadata(NULL) {} - ~Colour() { - delete mastering_metadata; - mastering_metadata = NULL; - } - - static bool Parse(IMkvReader* reader, long long element_start, - long long element_size, Colour** colour); - - long long matrix_coefficients; - long long bits_per_channel; - long long chroma_subsampling_horz; - long long chroma_subsampling_vert; - long long cb_subsampling_horz; - long long cb_subsampling_vert; - long long chroma_siting_horz; - long long chroma_siting_vert; - long long range; - long long transfer_characteristics; - long long primaries; - long long max_cll; - long long max_fall; - - MasteringMetadata* mastering_metadata; -}; - -struct Projection { - enum ProjectionType { - kTypeNotPresent = -1, - kRectangular = 0, - kEquirectangular = 1, - kCubeMap = 2, - kMesh = 3, - }; - static const float kValueNotPresent; - Projection() - : type(kTypeNotPresent), - private_data(NULL), - private_data_length(0), - pose_yaw(kValueNotPresent), - pose_pitch(kValueNotPresent), - pose_roll(kValueNotPresent) {} - ~Projection() { delete[] private_data; } - static bool Parse(IMkvReader* reader, long long element_start, - long long element_size, Projection** projection); - - ProjectionType type; - unsigned char* private_data; - size_t private_data_length; - float pose_yaw; - float pose_pitch; - float pose_roll; -}; - -class VideoTrack : public Track { - VideoTrack(const VideoTrack&); - VideoTrack& operator=(const VideoTrack&); - - VideoTrack(Segment*, long long element_start, long long element_size); - - public: - virtual ~VideoTrack(); - static long Parse(Segment*, const Info&, long long element_start, - long long element_size, VideoTrack*&); - - long long GetWidth() const; - long long GetHeight() const; - long long GetDisplayWidth() const; - long long GetDisplayHeight() const; - long long GetDisplayUnit() const; - long long GetStereoMode() const; - double GetFrameRate() const; - - bool VetEntry(const BlockEntry*) const; - long Seek(long long time_ns, const BlockEntry*&) const; - - Colour* GetColour() const; - - Projection* GetProjection() const; - - private: - long long m_width; - long long m_height; - long long m_display_width; - long long m_display_height; - long long m_display_unit; - long long m_stereo_mode; - - double m_rate; - - Colour* m_colour; - Projection* m_projection; -}; - -class AudioTrack : public Track { - AudioTrack(const AudioTrack&); - AudioTrack& operator=(const AudioTrack&); - - AudioTrack(Segment*, long long element_start, long long element_size); - - public: - static long Parse(Segment*, const Info&, long long element_start, - long long element_size, AudioTrack*&); - - double GetSamplingRate() const; - long long GetChannels() const; - long long GetBitDepth() const; - - private: - double m_rate; - long long m_channels; - long long m_bitDepth; -}; - -class Tracks { - Tracks(const Tracks&); - Tracks& operator=(const Tracks&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - Tracks(Segment*, long long start, long long size, long long element_start, - long long element_size); - - ~Tracks(); - - long Parse(); - - unsigned long GetTracksCount() const; - - const Track* GetTrackByNumber(long tn) const; - const Track* GetTrackByIndex(unsigned long idx) const; - - private: - Track** m_trackEntries; - Track** m_trackEntriesEnd; - - long ParseTrackEntry(long long payload_start, long long payload_size, - long long element_start, long long element_size, - Track*&) const; -}; - -class Chapters { - Chapters(const Chapters&); - Chapters& operator=(const Chapters&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - Chapters(Segment*, long long payload_start, long long payload_size, - long long element_start, long long element_size); - - ~Chapters(); - - long Parse(); - - class Atom; - class Edition; - - class Display { - friend class Atom; - Display(); - Display(const Display&); - ~Display(); - Display& operator=(const Display&); - - public: - const char* GetString() const; - const char* GetLanguage() const; - const char* GetCountry() const; - - private: - void Init(); - void ShallowCopy(Display&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - - char* m_string; - char* m_language; - char* m_country; - }; - - class Atom { - friend class Edition; - Atom(); - Atom(const Atom&); - ~Atom(); - Atom& operator=(const Atom&); - - public: - unsigned long long GetUID() const; - const char* GetStringUID() const; - - long long GetStartTimecode() const; - long long GetStopTimecode() const; - - long long GetStartTime(const Chapters*) const; - long long GetStopTime(const Chapters*) const; - - int GetDisplayCount() const; - const Display* GetDisplay(int index) const; - - private: - void Init(); - void ShallowCopy(Atom&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - static long long GetTime(const Chapters*, long long timecode); - - long ParseDisplay(IMkvReader*, long long pos, long long size); - bool ExpandDisplaysArray(); - - char* m_string_uid; - unsigned long long m_uid; - long long m_start_timecode; - long long m_stop_timecode; - - Display* m_displays; - int m_displays_size; - int m_displays_count; - }; - - class Edition { - friend class Chapters; - Edition(); - Edition(const Edition&); - ~Edition(); - Edition& operator=(const Edition&); - - public: - int GetAtomCount() const; - const Atom* GetAtom(int index) const; - - private: - void Init(); - void ShallowCopy(Edition&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - - long ParseAtom(IMkvReader*, long long pos, long long size); - bool ExpandAtomsArray(); - - Atom* m_atoms; - int m_atoms_size; - int m_atoms_count; - }; - - int GetEditionCount() const; - const Edition* GetEdition(int index) const; - - private: - long ParseEdition(long long pos, long long size); - bool ExpandEditionsArray(); - - Edition* m_editions; - int m_editions_size; - int m_editions_count; -}; - -class Tags { - Tags(const Tags&); - Tags& operator=(const Tags&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - Tags(Segment*, long long payload_start, long long payload_size, - long long element_start, long long element_size); - - ~Tags(); - - long Parse(); - - class Tag; - class SimpleTag; - - class SimpleTag { - friend class Tag; - SimpleTag(); - SimpleTag(const SimpleTag&); - ~SimpleTag(); - SimpleTag& operator=(const SimpleTag&); - - public: - const char* GetTagName() const; - const char* GetTagString() const; - - private: - void Init(); - void ShallowCopy(SimpleTag&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - - char* m_tag_name; - char* m_tag_string; - }; - - class Tag { - friend class Tags; - Tag(); - Tag(const Tag&); - ~Tag(); - Tag& operator=(const Tag&); - - public: - int GetSimpleTagCount() const; - const SimpleTag* GetSimpleTag(int index) const; - - private: - void Init(); - void ShallowCopy(Tag&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - - long ParseSimpleTag(IMkvReader*, long long pos, long long size); - bool ExpandSimpleTagsArray(); - - SimpleTag* m_simple_tags; - int m_simple_tags_size; - int m_simple_tags_count; - }; - - int GetTagCount() const; - const Tag* GetTag(int index) const; - - private: - long ParseTag(long long pos, long long size); - bool ExpandTagsArray(); - - Tag* m_tags; - int m_tags_size; - int m_tags_count; -}; - -class SegmentInfo { - SegmentInfo(const SegmentInfo&); - SegmentInfo& operator=(const SegmentInfo&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - SegmentInfo(Segment*, long long start, long long size, - long long element_start, long long element_size); - - ~SegmentInfo(); - - long Parse(); - - long long GetTimeCodeScale() const; - long long GetDuration() const; // scaled - const char* GetMuxingAppAsUTF8() const; - const char* GetWritingAppAsUTF8() const; - const char* GetTitleAsUTF8() const; - - private: - long long m_timecodeScale; - double m_duration; - char* m_pMuxingAppAsUTF8; - char* m_pWritingAppAsUTF8; - char* m_pTitleAsUTF8; -}; - -class SeekHead { - SeekHead(const SeekHead&); - SeekHead& operator=(const SeekHead&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - SeekHead(Segment*, long long start, long long size, long long element_start, - long long element_size); - - ~SeekHead(); - - long Parse(); - - struct Entry { - Entry(); - - // the SeekHead entry payload - long long id; - long long pos; - - // absolute pos of SeekEntry ID - long long element_start; - - // SeekEntry ID size + size size + payload - long long element_size; - }; - - int GetCount() const; - const Entry* GetEntry(int idx) const; - - struct VoidElement { - // absolute pos of Void ID - long long element_start; - - // ID size + size size + payload size - long long element_size; - }; - - int GetVoidElementCount() const; - const VoidElement* GetVoidElement(int idx) const; - - private: - Entry* m_entries; - int m_entry_count; - - VoidElement* m_void_elements; - int m_void_element_count; - - static bool ParseEntry(IMkvReader*, - long long pos, // payload - long long size, Entry*); -}; - -class Cues; -class CuePoint { - friend class Cues; - - CuePoint(long, long long); - ~CuePoint(); - - CuePoint(const CuePoint&); - CuePoint& operator=(const CuePoint&); - - public: - long long m_element_start; - long long m_element_size; - - bool Load(IMkvReader*); - - long long GetTimeCode() const; // absolute but unscaled - long long GetTime(const Segment*) const; // absolute and scaled (ns units) - - struct TrackPosition { - long long m_track; - long long m_pos; // of cluster - long long m_block; - // codec_state //defaults to 0 - // reference = clusters containing req'd referenced blocks - // reftime = timecode of the referenced block - - bool Parse(IMkvReader*, long long, long long); - }; - - const TrackPosition* Find(const Track*) const; - - private: - const long m_index; - long long m_timecode; - TrackPosition* m_track_positions; - size_t m_track_positions_count; -}; - -class Cues { - friend class Segment; - - Cues(Segment*, long long start, long long size, long long element_start, - long long element_size); - ~Cues(); - - Cues(const Cues&); - Cues& operator=(const Cues&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - bool Find( // lower bound of time_ns - long long time_ns, const Track*, const CuePoint*&, - const CuePoint::TrackPosition*&) const; - - const CuePoint* GetFirst() const; - const CuePoint* GetLast() const; - const CuePoint* GetNext(const CuePoint*) const; - - const BlockEntry* GetBlock(const CuePoint*, - const CuePoint::TrackPosition*) const; - - bool LoadCuePoint() const; - long GetCount() const; // loaded only - // long GetTotal() const; //loaded + preloaded - bool DoneParsing() const; - - private: - bool Init() const; - bool PreloadCuePoint(long&, long long) const; - - mutable CuePoint** m_cue_points; - mutable long m_count; - mutable long m_preload_count; - mutable long long m_pos; -}; - -class Cluster { - friend class Segment; - - Cluster(const Cluster&); - Cluster& operator=(const Cluster&); - - public: - Segment* const m_pSegment; - - public: - static Cluster* Create(Segment*, - long index, // index in segment - long long off); // offset relative to segment - // long long element_size); - - Cluster(); // EndOfStream - ~Cluster(); - - bool EOS() const; - - long long GetTimeCode() const; // absolute, but not scaled - long long GetTime() const; // absolute, and scaled (nanosecond units) - long long GetFirstTime() const; // time (ns) of first (earliest) block - long long GetLastTime() const; // time (ns) of last (latest) block - - long GetFirst(const BlockEntry*&) const; - long GetLast(const BlockEntry*&) const; - long GetNext(const BlockEntry* curr, const BlockEntry*& next) const; - - const BlockEntry* GetEntry(const Track*, long long ns = -1) const; - const BlockEntry* GetEntry(const CuePoint&, - const CuePoint::TrackPosition&) const; - // const BlockEntry* GetMaxKey(const VideoTrack*) const; - - // static bool HasBlockEntries(const Segment*, long long); - - static long HasBlockEntries(const Segment*, long long idoff, long long& pos, - long& size); - - long GetEntryCount() const; - - long Load(long long& pos, long& size) const; - - long Parse(long long& pos, long& size) const; - long GetEntry(long index, const mkvparser::BlockEntry*&) const; - - protected: - Cluster(Segment*, long index, long long element_start); - // long long element_size); - - public: - const long long m_element_start; - long long GetPosition() const; // offset relative to segment - - long GetIndex() const; - long long GetElementSize() const; - // long long GetPayloadSize() const; - - // long long Unparsed() const; - - private: - long m_index; - mutable long long m_pos; - // mutable long long m_size; - mutable long long m_element_size; - mutable long long m_timecode; - mutable BlockEntry** m_entries; - mutable long m_entries_size; - mutable long m_entries_count; - - long ParseSimpleBlock(long long, long long&, long&); - long ParseBlockGroup(long long, long long&, long&); - - long CreateBlock(long long id, long long pos, long long size, - long long discard_padding); - long CreateBlockGroup(long long start_offset, long long size, - long long discard_padding); - long CreateSimpleBlock(long long, long long); -}; - -class Segment { - friend class Cues; - friend class Track; - friend class VideoTrack; - - Segment(const Segment&); - Segment& operator=(const Segment&); - - private: - Segment(IMkvReader*, long long elem_start, - // long long elem_size, - long long pos, long long size); - - public: - IMkvReader* const m_pReader; - const long long m_element_start; - // const long long m_element_size; - const long long m_start; // posn of segment payload - const long long m_size; // size of segment payload - Cluster m_eos; // TODO: make private? - - static long long CreateInstance(IMkvReader*, long long, Segment*&); - ~Segment(); - - long Load(); // loads headers and all clusters - - // for incremental loading - // long long Unparsed() const; - bool DoneParsing() const; - long long ParseHeaders(); // stops when first cluster is found - // long FindNextCluster(long long& pos, long& size) const; - long LoadCluster(long long& pos, long& size); // load one cluster - long LoadCluster(); - - long ParseNext(const Cluster* pCurr, const Cluster*& pNext, long long& pos, - long& size); - - const SeekHead* GetSeekHead() const; - const Tracks* GetTracks() const; - const SegmentInfo* GetInfo() const; - const Cues* GetCues() const; - const Chapters* GetChapters() const; - const Tags* GetTags() const; - - long long GetDuration() const; - - unsigned long GetCount() const; - const Cluster* GetFirst() const; - const Cluster* GetLast() const; - const Cluster* GetNext(const Cluster*); - - const Cluster* FindCluster(long long time_nanoseconds) const; - // const BlockEntry* Seek(long long time_nanoseconds, const Track*) const; - - const Cluster* FindOrPreloadCluster(long long pos); - - long ParseCues(long long cues_off, // offset relative to start of segment - long long& parse_pos, long& parse_len); - - private: - long long m_pos; // absolute file posn; what has been consumed so far - Cluster* m_pUnknownSize; - - SeekHead* m_pSeekHead; - SegmentInfo* m_pInfo; - Tracks* m_pTracks; - Cues* m_pCues; - Chapters* m_pChapters; - Tags* m_pTags; - Cluster** m_clusters; - long m_clusterCount; // number of entries for which m_index >= 0 - long m_clusterPreloadCount; // number of entries for which m_index < 0 - long m_clusterSize; // array size - - long DoLoadCluster(long long&, long&); - long DoLoadClusterUnknownSize(long long&, long&); - long DoParseNext(const Cluster*&, long long&, long&); - - bool AppendCluster(Cluster*); - bool PreloadCluster(Cluster*, ptrdiff_t); - - // void ParseSeekHead(long long pos, long long size); - // void ParseSeekEntry(long long pos, long long size); - // void ParseCues(long long); - - const BlockEntry* GetBlock(const CuePoint&, const CuePoint::TrackPosition&); -}; - -} // namespace mkvparser - -inline long mkvparser::Segment::LoadCluster() { - long long pos; - long size; - - return LoadCluster(pos, size); -} - -#endif // MKVPARSER_MKVPARSER_H_ diff --git a/media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvreader.cc b/media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvreader.cc deleted file mode 100644 index 23d68f508919..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvreader.cc +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright (c) 2010 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -#include "mkvparser/mkvreader.h" - -#include - -#include - -namespace mkvparser { - -MkvReader::MkvReader() : m_file(NULL), reader_owns_file_(true) {} - -MkvReader::MkvReader(FILE* fp) : m_file(fp), reader_owns_file_(false) { - GetFileSize(); -} - -MkvReader::~MkvReader() { - if (reader_owns_file_) - Close(); - m_file = NULL; -} - -int MkvReader::Open(const char* fileName) { - if (fileName == NULL) - return -1; - - if (m_file) - return -1; - -#ifdef _MSC_VER - const errno_t e = fopen_s(&m_file, fileName, "rb"); - - if (e) - return -1; // error -#else - m_file = fopen(fileName, "rb"); - - if (m_file == NULL) - return -1; -#endif - return !GetFileSize(); -} - -bool MkvReader::GetFileSize() { - if (m_file == NULL) - return false; -#ifdef _MSC_VER - int status = _fseeki64(m_file, 0L, SEEK_END); - - if (status) - return false; // error - - m_length = _ftelli64(m_file); -#else - fseek(m_file, 0L, SEEK_END); - m_length = ftell(m_file); -#endif - assert(m_length >= 0); - - if (m_length < 0) - return false; - -#ifdef _MSC_VER - status = _fseeki64(m_file, 0L, SEEK_SET); - - if (status) - return false; // error -#else - fseek(m_file, 0L, SEEK_SET); -#endif - - return true; -} - -void MkvReader::Close() { - if (m_file != NULL) { - fclose(m_file); - m_file = NULL; - } -} - -int MkvReader::Length(long long* total, long long* available) { - if (m_file == NULL) - return -1; - - if (total) - *total = m_length; - - if (available) - *available = m_length; - - return 0; -} - -int MkvReader::Read(long long offset, long len, unsigned char* buffer) { - if (m_file == NULL) - return -1; - - if (offset < 0) - return -1; - - if (len < 0) - return -1; - - if (len == 0) - return 0; - - if (offset >= m_length) - return -1; - -#ifdef _MSC_VER - const int status = _fseeki64(m_file, offset, SEEK_SET); - - if (status) - return -1; // error -#else - fseeko(m_file, static_cast(offset), SEEK_SET); -#endif - - const size_t size = fread(buffer, 1, len, m_file); - - if (size < size_t(len)) - return -1; // error - - return 0; // success -} - -} // namespace mkvparser diff --git a/media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvreader.h b/media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvreader.h deleted file mode 100644 index 9831ecf64586..000000000000 --- a/media/libvpx/libvpx/third_party/libwebm/mkvparser/mkvreader.h +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2010 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -#ifndef MKVPARSER_MKVREADER_H_ -#define MKVPARSER_MKVREADER_H_ - -#include - -#include "mkvparser/mkvparser.h" - -namespace mkvparser { - -class MkvReader : public IMkvReader { - public: - MkvReader(); - explicit MkvReader(FILE* fp); - virtual ~MkvReader(); - - int Open(const char*); - void Close(); - - virtual int Read(long long position, long length, unsigned char* buffer); - virtual int Length(long long* total, long long* available); - - private: - MkvReader(const MkvReader&); - MkvReader& operator=(const MkvReader&); - - // Determines the size of the file. This is called either by the constructor - // or by the Open function depending on file ownership. Returns true on - // success. - bool GetFileSize(); - - long long m_length; - FILE* m_file; - bool reader_owns_file_; -}; - -} // namespace mkvparser - -#endif // MKVPARSER_MKVREADER_H_ diff --git a/media/libvpx/libvpx/third_party/libwebm/LICENSE.TXT b/media/libvpx/libvpx/third_party/libyuv/LICENSE similarity index 95% rename from media/libvpx/libvpx/third_party/libwebm/LICENSE.TXT rename to media/libvpx/libvpx/third_party/libyuv/LICENSE index 7a6f99547d4d..c911747a6b53 100644 --- a/media/libvpx/libvpx/third_party/libwebm/LICENSE.TXT +++ b/media/libvpx/libvpx/third_party/libyuv/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2010, Google Inc. All rights reserved. +Copyright 2011 The LibYuv Project Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -27,4 +27,3 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/media/libvpx/libvpx/third_party/libyuv/README.libvpx b/media/libvpx/libvpx/third_party/libyuv/README.libvpx index 485f79c0ff78..9519dc4beed8 100644 --- a/media/libvpx/libvpx/third_party/libyuv/README.libvpx +++ b/media/libvpx/libvpx/third_party/libyuv/README.libvpx @@ -1,6 +1,6 @@ Name: libyuv URL: https://chromium.googlesource.com/libyuv/libyuv -Version: de944ed8c74909ea6fbd743a22efe1e55e851b83 +Version: a37e7bfece9e0676ae90a1700b0ec85b0f4f22a1 License: BSD License File: LICENSE @@ -8,15 +8,16 @@ Description: libyuv is an open source project that includes YUV conversion and scaling functionality. -The optimized scaler in libyuv is used in multiple resolution encoder example, -which down-samples the original input video (f.g. 1280x720) a number of times -in order to encode multiple resolution bit streams. +The optimized scaler in libyuv is used in the multiple resolution encoder +example which down-samples the original input video (f.g. 1280x720) a number of +times in order to encode multiple resolution bit streams. Local Modifications: -rm -rf .gitignore .gn AUTHORS Android.mk BUILD.gn CMakeLists.txt DEPS LICENSE \ - LICENSE_THIRD_PARTY OWNERS PATENTS PRESUBMIT.py README.chromium README.md \ - all.gyp build_overrides/ chromium/ codereview.settings docs/ \ - download_vs_toolchain.py gyp_libyuv gyp_libyuv.py include/libyuv.h \ - include/libyuv/compare_row.h libyuv.gyp libyuv.gypi libyuv_nacl.gyp \ - libyuv_test.gyp linux.mk public.mk setup_links.py sync_chromium.py \ - third_party/ tools/ unit_test/ util/ winarm.mk +Disable ARGBToRGB24Row_AVX512VBMI due to build failure on Mac. +rm libyuv/include/libyuv.h libyuv/include/libyuv/compare_row.h +mv libyuv/include tmp/ +mv libyuv/source tmp/ +mv libyuv/LICENSE tmp/ +rm -rf libyuv + +mv tmp/* third_party/libyuv/ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/basic_types.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/basic_types.h index 54a2181430c1..01d9dfc77362 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/basic_types.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/basic_types.h @@ -8,82 +8,36 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_ #define INCLUDE_LIBYUV_BASIC_TYPES_H_ -#include // for NULL, size_t +#include // For size_t and NULL + +#if !defined(INT_TYPES_DEFINED) && !defined(GG_LONGLONG) +#define INT_TYPES_DEFINED #if defined(_MSC_VER) && (_MSC_VER < 1600) #include // for uintptr_t on x86 +typedef unsigned __int64 uint64_t; +typedef __int64 int64_t; +typedef unsigned int uint32_t; +typedef int int32_t; +typedef unsigned short uint16_t; +typedef short int16_t; +typedef unsigned char uint8_t; +typedef signed char int8_t; #else -#include // for uintptr_t -#endif - -#ifndef GG_LONGLONG -#ifndef INT_TYPES_DEFINED -#define INT_TYPES_DEFINED -#ifdef COMPILER_MSVC -typedef unsigned __int64 uint64; -typedef __int64 int64; -#ifndef INT64_C -#define INT64_C(x) x ## I64 -#endif -#ifndef UINT64_C -#define UINT64_C(x) x ## UI64 -#endif -#define INT64_F "I64" -#else // COMPILER_MSVC -#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) -typedef unsigned long uint64; // NOLINT -typedef long int64; // NOLINT -#ifndef INT64_C -#define INT64_C(x) x ## L -#endif -#ifndef UINT64_C -#define UINT64_C(x) x ## UL -#endif -#define INT64_F "l" -#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) -typedef unsigned long long uint64; // NOLINT -typedef long long int64; // NOLINT -#ifndef INT64_C -#define INT64_C(x) x ## LL -#endif -#ifndef UINT64_C -#define UINT64_C(x) x ## ULL -#endif -#define INT64_F "ll" -#endif // __LP64__ -#endif // COMPILER_MSVC -typedef unsigned int uint32; -typedef int int32; -typedef unsigned short uint16; // NOLINT -typedef short int16; // NOLINT -typedef unsigned char uint8; -typedef signed char int8; +#include // for uintptr_t and C99 types +#endif // defined(_MSC_VER) && (_MSC_VER < 1600) +typedef uint64_t uint64; +typedef int64_t int64; +typedef uint32_t uint32; +typedef int32_t int32; +typedef uint16_t uint16; +typedef int16_t int16; +typedef uint8_t uint8; +typedef int8_t int8; #endif // INT_TYPES_DEFINED -#endif // GG_LONGLONG - -// Detect compiler is for x86 or x64. -#if defined(__x86_64__) || defined(_M_X64) || \ - defined(__i386__) || defined(_M_IX86) -#define CPU_X86 1 -#endif -// Detect compiler is for ARM. -#if defined(__arm__) || defined(_M_ARM) -#define CPU_ARM 1 -#endif - -#ifndef ALIGNP -#ifdef __cplusplus -#define ALIGNP(p, t) \ - (reinterpret_cast(((reinterpret_cast(p) + \ - ((t) - 1)) & ~((t) - 1)))) -#else -#define ALIGNP(p, t) \ - ((uint8*)((((uintptr_t)(p) + ((t) - 1)) & ~((t) - 1)))) /* NOLINT */ -#endif -#endif #if !defined(LIBYUV_API) #if defined(_WIN32) || defined(__CYGWIN__) @@ -95,24 +49,17 @@ typedef signed char int8; #define LIBYUV_API #endif // LIBYUV_BUILDING_SHARED_LIBRARY #elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \ - (defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \ - defined(LIBYUV_USING_SHARED_LIBRARY)) -#define LIBYUV_API __attribute__ ((visibility ("default"))) + (defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \ + defined(LIBYUV_USING_SHARED_LIBRARY)) +#define LIBYUV_API __attribute__((visibility("default"))) #else #define LIBYUV_API #endif // __GNUC__ #endif // LIBYUV_API +// TODO(fbarchard): Remove bool macros. #define LIBYUV_BOOL int #define LIBYUV_FALSE 0 #define LIBYUV_TRUE 1 -// Visual C x86 or GCC little endian. -#if defined(__x86_64__) || defined(_M_X64) || \ - defined(__i386__) || defined(_M_IX86) || \ - defined(__arm__) || defined(_M_ARM) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -#define LIBYUV_LITTLE_ENDIAN -#endif - -#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_ NOLINT +#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/compare.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/compare.h index 08b2bb2ecf4a..3353ad71c68a 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/compare.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/compare.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_COMPARE_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_COMPARE_H_ #define INCLUDE_LIBYUV_COMPARE_H_ #include "libyuv/basic_types.h" @@ -20,59 +20,92 @@ extern "C" { // Compute a hash for specified memory. Seed of 5381 recommended. LIBYUV_API -uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed); +uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed); + +// Hamming Distance +LIBYUV_API +uint64_t ComputeHammingDistance(const uint8_t* src_a, + const uint8_t* src_b, + int count); // Scan an opaque argb image and return fourcc based on alpha offset. // Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown. LIBYUV_API -uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height); +uint32_t ARGBDetect(const uint8_t* argb, + int stride_argb, + int width, + int height); // Sum Square Error - used to compute Mean Square Error or PSNR. LIBYUV_API -uint64 ComputeSumSquareError(const uint8* src_a, - const uint8* src_b, int count); +uint64_t ComputeSumSquareError(const uint8_t* src_a, + const uint8_t* src_b, + int count); LIBYUV_API -uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height); +uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a, + int stride_a, + const uint8_t* src_b, + int stride_b, + int width, + int height); static const int kMaxPsnr = 128; LIBYUV_API -double SumSquareErrorToPsnr(uint64 sse, uint64 count); +double SumSquareErrorToPsnr(uint64_t sse, uint64_t count); LIBYUV_API -double CalcFramePsnr(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height); +double CalcFramePsnr(const uint8_t* src_a, + int stride_a, + const uint8_t* src_b, + int stride_b, + int width, + int height); LIBYUV_API -double I420Psnr(const uint8* src_y_a, int stride_y_a, - const uint8* src_u_a, int stride_u_a, - const uint8* src_v_a, int stride_v_a, - const uint8* src_y_b, int stride_y_b, - const uint8* src_u_b, int stride_u_b, - const uint8* src_v_b, int stride_v_b, - int width, int height); +double I420Psnr(const uint8_t* src_y_a, + int stride_y_a, + const uint8_t* src_u_a, + int stride_u_a, + const uint8_t* src_v_a, + int stride_v_a, + const uint8_t* src_y_b, + int stride_y_b, + const uint8_t* src_u_b, + int stride_u_b, + const uint8_t* src_v_b, + int stride_v_b, + int width, + int height); LIBYUV_API -double CalcFrameSsim(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height); +double CalcFrameSsim(const uint8_t* src_a, + int stride_a, + const uint8_t* src_b, + int stride_b, + int width, + int height); LIBYUV_API -double I420Ssim(const uint8* src_y_a, int stride_y_a, - const uint8* src_u_a, int stride_u_a, - const uint8* src_v_a, int stride_v_a, - const uint8* src_y_b, int stride_y_b, - const uint8* src_u_b, int stride_u_b, - const uint8* src_v_b, int stride_v_b, - int width, int height); +double I420Ssim(const uint8_t* src_y_a, + int stride_y_a, + const uint8_t* src_u_a, + int stride_u_a, + const uint8_t* src_v_a, + int stride_v_a, + const uint8_t* src_y_b, + int stride_y_b, + const uint8_t* src_u_b, + int stride_u_b, + const uint8_t* src_v_b, + int stride_v_b, + int width, + int height); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_COMPARE_H_ NOLINT +#endif // INCLUDE_LIBYUV_COMPARE_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert.h index fcfcf544e128..d12ef24f7997 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_CONVERT_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_CONVERT_H_ #define INCLUDE_LIBYUV_CONVERT_H_ #include "libyuv/basic_types.h" @@ -16,8 +16,8 @@ #include "libyuv/rotate.h" // For enum RotationMode. // TODO(fbarchard): fix WebRTC source to include following libyuv headers: -#include "libyuv/convert_argb.h" // For WebRTC I420ToARGB. b/620 -#include "libyuv/convert_from.h" // For WebRTC ConvertFromI420. b/620 +#include "libyuv/convert_argb.h" // For WebRTC I420ToARGB. b/620 +#include "libyuv/convert_from.h" // For WebRTC ConvertFromI420. b/620 #include "libyuv/planar_functions.h" // For WebRTC I420Rect, CopyPlane. b/618 #ifdef __cplusplus @@ -27,195 +27,335 @@ extern "C" { // Convert I444 to I420. LIBYUV_API -int I444ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int I444ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert I422 to I420. LIBYUV_API -int I422ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert I411 to I420. -LIBYUV_API -int I411ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int I422ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Copy I420 to I420. #define I420ToI420 I420Copy LIBYUV_API -int I420Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int I420Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Copy I010 to I010 +#define I010ToI010 I010Copy +#define H010ToH010 I010Copy +LIBYUV_API +int I010Copy(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert 10 bit YUV to 8 bit +#define H010ToH420 I010ToI420 +LIBYUV_API +int I010ToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert I400 (grey) to I420. LIBYUV_API -int I400ToI420(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int I400ToI420(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); #define J400ToJ420 I400ToI420 // Convert NV12 to I420. LIBYUV_API -int NV12ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int NV12ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert NV21 to I420. LIBYUV_API -int NV21ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_vu, int src_stride_vu, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int NV21ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert YUY2 to I420. LIBYUV_API -int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int YUY2ToI420(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert UYVY to I420. LIBYUV_API -int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int UYVYToI420(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert M420 to I420. LIBYUV_API -int M420ToI420(const uint8* src_m420, int src_stride_m420, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int M420ToI420(const uint8_t* src_m420, + int src_stride_m420, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert Android420 to I420. LIBYUV_API -int Android420ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - int pixel_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int Android420ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // ARGB little endian (bgra in memory) to I420. LIBYUV_API -int ARGBToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGBToI420(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // BGRA little endian (argb in memory) to I420. LIBYUV_API -int BGRAToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int BGRAToI420(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // ABGR little endian (rgba in memory) to I420. LIBYUV_API -int ABGRToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ABGRToI420(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // RGBA little endian (abgr in memory) to I420. LIBYUV_API -int RGBAToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int RGBAToI420(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // RGB little endian (bgr in memory) to I420. LIBYUV_API -int RGB24ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int RGB24ToI420(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // RGB big endian (rgb in memory) to I420. LIBYUV_API -int RAWToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int RAWToI420(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // RGB16 (RGBP fourcc) little endian to I420. LIBYUV_API -int RGB565ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int RGB565ToI420(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // RGB15 (RGBO fourcc) little endian to I420. LIBYUV_API -int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGB1555ToI420(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // RGB12 (R444 fourcc) little endian to I420. LIBYUV_API -int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGB4444ToI420(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); #ifdef HAVE_JPEG // src_width/height provided by capture. // dst_width/height for clipping determine final size. LIBYUV_API -int MJPGToI420(const uint8* sample, size_t sample_size, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int src_width, int src_height, - int dst_width, int dst_height); +int MJPGToI420(const uint8_t* sample, + size_t sample_size, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int src_width, + int src_height, + int dst_width, + int dst_height); // Query size of MJPG in pixels. LIBYUV_API -int MJPGSize(const uint8* sample, size_t sample_size, - int* width, int* height); +int MJPGSize(const uint8_t* sample, + size_t sample_size, + int* width, + int* height); #endif // Convert camera sample to I420 with cropping, rotation and vertical flip. @@ -238,22 +378,29 @@ int MJPGSize(const uint8* sample, size_t sample_size, // Must be less than or equal to src_width/src_height // Cropping parameters are pre-rotation. // "rotation" can be 0, 90, 180 or 270. -// "format" is a fourcc. ie 'I420', 'YUY2' +// "fourcc" is a fourcc. ie 'I420', 'YUY2' // Returns 0 for successful; -1 for invalid parameter. Non-zero for failure. LIBYUV_API -int ConvertToI420(const uint8* src_frame, size_t src_size, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int crop_x, int crop_y, - int src_width, int src_height, - int crop_width, int crop_height, +int ConvertToI420(const uint8_t* sample, + size_t sample_size, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int crop_x, + int crop_y, + int src_width, + int src_height, + int crop_width, + int crop_height, enum RotationMode rotation, - uint32 format); + uint32_t fourcc); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_CONVERT_H_ NOLINT +#endif // INCLUDE_LIBYUV_CONVERT_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_argb.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_argb.h index 19672f326906..ab772b6c3233 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_argb.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_argb.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_ #define INCLUDE_LIBYUV_CONVERT_ARGB_H_ #include "libyuv/basic_types.h" @@ -30,258 +30,621 @@ extern "C" { // Copy ARGB to ARGB. LIBYUV_API -int ARGBCopy(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBCopy(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert I420 to ARGB. LIBYUV_API -int I420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int I420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Duplicate prototype for function in convert_from.h for remoting. LIBYUV_API -int I420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int I420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert I010 to ARGB. +LIBYUV_API +int I010ToARGB(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert I010 to ARGB. +LIBYUV_API +int I010ToARGB(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert I010 to ABGR. +LIBYUV_API +int I010ToABGR(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert H010 to ARGB. +LIBYUV_API +int H010ToARGB(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert H010 to ABGR. +LIBYUV_API +int H010ToABGR(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); // Convert I422 to ARGB. LIBYUV_API -int I422ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int I422ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert I444 to ARGB. LIBYUV_API -int I444ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int I444ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert J444 to ARGB. LIBYUV_API -int J444ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int J444ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert I444 to ABGR. LIBYUV_API -int I444ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); - -// Convert I411 to ARGB. -LIBYUV_API -int I411ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int I444ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); // Convert I420 with Alpha to preattenuated ARGB. LIBYUV_API -int I420AlphaToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - const uint8* src_a, int src_stride_a, - uint8* dst_argb, int dst_stride_argb, - int width, int height, int attenuate); +int I420AlphaToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + int attenuate); // Convert I420 with Alpha to preattenuated ABGR. LIBYUV_API -int I420AlphaToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - const uint8* src_a, int src_stride_a, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height, int attenuate); +int I420AlphaToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height, + int attenuate); // Convert I400 (grey) to ARGB. Reverse of ARGBToI400. LIBYUV_API -int I400ToARGB(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int I400ToARGB(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert J400 (jpeg grey) to ARGB. LIBYUV_API -int J400ToARGB(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int J400ToARGB(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Alias. #define YToARGB I400ToARGB // Convert NV12 to ARGB. LIBYUV_API -int NV12ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int NV12ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert NV21 to ARGB. LIBYUV_API -int NV21ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_vu, int src_stride_vu, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int NV21ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert NV12 to ABGR. +int NV12ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert NV21 to ABGR. +LIBYUV_API +int NV21ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert NV12 to RGB24. +LIBYUV_API +int NV12ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); + +// Convert NV21 to RGB24. +LIBYUV_API +int NV21ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); // Convert M420 to ARGB. LIBYUV_API -int M420ToARGB(const uint8* src_m420, int src_stride_m420, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int M420ToARGB(const uint8_t* src_m420, + int src_stride_m420, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert YUY2 to ARGB. LIBYUV_API -int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int YUY2ToARGB(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert UYVY to ARGB. LIBYUV_API -int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int UYVYToARGB(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert J420 to ARGB. LIBYUV_API -int J420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int J420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert J422 to ARGB. LIBYUV_API -int J422ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int J422ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert J420 to ABGR. LIBYUV_API -int J420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); +int J420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); // Convert J422 to ABGR. LIBYUV_API -int J422ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); +int J422ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); // Convert H420 to ARGB. LIBYUV_API -int H420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int H420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert H422 to ARGB. LIBYUV_API -int H422ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int H422ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert H420 to ABGR. LIBYUV_API -int H420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); +int H420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); // Convert H422 to ABGR. LIBYUV_API -int H422ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); +int H422ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert H010 to ARGB. +LIBYUV_API +int H010ToARGB(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert I010 to AR30. +LIBYUV_API +int I010ToAR30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); + +// Convert H010 to AR30. +LIBYUV_API +int H010ToAR30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); + +// Convert I010 to AB30. +LIBYUV_API +int I010ToAB30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height); + +// Convert H010 to AB30. +LIBYUV_API +int H010ToAB30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height); // BGRA little endian (argb in memory) to ARGB. LIBYUV_API -int BGRAToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int BGRAToARGB(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // ABGR little endian (rgba in memory) to ARGB. LIBYUV_API -int ABGRToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ABGRToARGB(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // RGBA little endian (abgr in memory) to ARGB. LIBYUV_API -int RGBAToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int RGBAToARGB(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Deprecated function name. #define BG24ToARGB RGB24ToARGB // RGB little endian (bgr in memory) to ARGB. LIBYUV_API -int RGB24ToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int RGB24ToARGB(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // RGB big endian (rgb in memory) to ARGB. LIBYUV_API -int RAWToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int RAWToARGB(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // RGB16 (RGBP fourcc) little endian to ARGB. LIBYUV_API -int RGB565ToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int RGB565ToARGB(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // RGB15 (RGBO fourcc) little endian to ARGB. LIBYUV_API -int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGB1555ToARGB(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // RGB12 (R444 fourcc) little endian to ARGB. LIBYUV_API -int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGB4444ToARGB(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Aliases +#define AB30ToARGB AR30ToABGR +#define AB30ToABGR AR30ToARGB +#define AB30ToAR30 AR30ToAB30 + +// Convert AR30 To ARGB. +LIBYUV_API +int AR30ToARGB(const uint8_t* src_ar30, + int src_stride_ar30, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert AR30 To ABGR. +LIBYUV_API +int AR30ToABGR(const uint8_t* src_ar30, + int src_stride_ar30, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert AR30 To AB30. +LIBYUV_API +int AR30ToAB30(const uint8_t* src_ar30, + int src_stride_ar30, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height); #ifdef HAVE_JPEG // src_width/height provided by capture // dst_width/height for clipping determine final size. LIBYUV_API -int MJPGToARGB(const uint8* sample, size_t sample_size, - uint8* dst_argb, int dst_stride_argb, - int src_width, int src_height, - int dst_width, int dst_height); +int MJPGToARGB(const uint8_t* sample, + size_t sample_size, + uint8_t* dst_argb, + int dst_stride_argb, + int src_width, + int src_height, + int dst_width, + int dst_height); #endif +// Convert Android420 to ARGB. +LIBYUV_API +int Android420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert Android420 to ABGR. +LIBYUV_API +int Android420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + // Convert camera sample to ARGB with cropping, rotation and vertical flip. -// "src_size" is needed to parse MJPG. +// "sample_size" is needed to parse MJPG. // "dst_stride_argb" number of bytes in a row of the dst_argb plane. // Normally this would be the same as dst_width, with recommended alignment // to 16 bytes for better efficiency. @@ -300,20 +663,25 @@ int MJPGToARGB(const uint8* sample, size_t sample_size, // Must be less than or equal to src_width/src_height // Cropping parameters are pre-rotation. // "rotation" can be 0, 90, 180 or 270. -// "format" is a fourcc. ie 'I420', 'YUY2' +// "fourcc" is a fourcc. ie 'I420', 'YUY2' // Returns 0 for successful; -1 for invalid parameter. Non-zero for failure. LIBYUV_API -int ConvertToARGB(const uint8* src_frame, size_t src_size, - uint8* dst_argb, int dst_stride_argb, - int crop_x, int crop_y, - int src_width, int src_height, - int crop_width, int crop_height, +int ConvertToARGB(const uint8_t* sample, + size_t sample_size, + uint8_t* dst_argb, + int dst_stride_argb, + int crop_x, + int crop_y, + int src_width, + int src_height, + int crop_width, + int crop_height, enum RotationMode rotation, - uint32 format); + uint32_t fourcc); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_ NOLINT +#endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from.h index 39e1578a0e33..5cd8a4bfc04c 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_ #define INCLUDE_LIBYUV_CONVERT_FROM_H_ #include "libyuv/basic_types.h" @@ -21,159 +21,322 @@ extern "C" { // See Also convert.h for conversions from formats to I420. -// I420Copy in convert to I420ToI420. +// Convert 8 bit YUV to 10 bit. +#define H420ToH010 I420ToI010 +int I420ToI010(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height); LIBYUV_API -int I420ToI422(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int I420ToI422(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); LIBYUV_API -int I420ToI444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -LIBYUV_API -int I420ToI411(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int I420ToI444(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21. LIBYUV_API -int I400Copy(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); +int I400Copy(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); LIBYUV_API -int I420ToNV12(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height); +int I420ToNV12(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); LIBYUV_API -int I420ToNV21(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height); +int I420ToNV21(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); LIBYUV_API -int I420ToYUY2(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); +int I420ToYUY2(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height); LIBYUV_API -int I420ToUYVY(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); +int I420ToUYVY(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uyvy, + int dst_stride_uyvy, + int width, + int height); LIBYUV_API -int I420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int I420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); LIBYUV_API -int I420ToBGRA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int I420ToBGRA(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_bgra, + int dst_stride_bgra, + int width, + int height); LIBYUV_API -int I420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int I420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); LIBYUV_API -int I420ToRGBA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height); +int I420ToRGBA(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height); LIBYUV_API -int I420ToRGB24(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); +int I420ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); LIBYUV_API -int I420ToRAW(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); +int I420ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height); LIBYUV_API -int I420ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); +int H420ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); + +LIBYUV_API +int H420ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height); + +LIBYUV_API +int I420ToRGB565(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height); + +LIBYUV_API +int I422ToRGB565(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height); // Convert I420 To RGB565 with 4x4 dither matrix (16 bytes). // Values in dither matrix from 0 to 7 recommended. // The order of the dither matrix is first byte is upper left. LIBYUV_API -int I420ToRGB565Dither(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - const uint8* dither4x4, int width, int height); +int I420ToRGB565Dither(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + const uint8_t* dither4x4, + int width, + int height); LIBYUV_API -int I420ToARGB1555(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); +int I420ToARGB1555(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb1555, + int dst_stride_argb1555, + int width, + int height); LIBYUV_API -int I420ToARGB4444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); +int I420ToARGB4444(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb4444, + int dst_stride_argb4444, + int width, + int height); + +// Convert I420 to AR30. +LIBYUV_API +int I420ToAR30(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); + +// Convert H420 to AR30. +LIBYUV_API +int H420ToAR30(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); // Convert I420 to specified format. // "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the // buffer has contiguous rows. Can be negative. A multiple of 16 is optimal. LIBYUV_API -int ConvertFromI420(const uint8* y, int y_stride, - const uint8* u, int u_stride, - const uint8* v, int v_stride, - uint8* dst_sample, int dst_sample_stride, - int width, int height, - uint32 format); +int ConvertFromI420(const uint8_t* y, + int y_stride, + const uint8_t* u, + int u_stride, + const uint8_t* v, + int v_stride, + uint8_t* dst_sample, + int dst_sample_stride, + int width, + int height, + uint32_t fourcc); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_ NOLINT +#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from_argb.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from_argb.h index 1df53200ddee..05c815a093e8 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from_argb.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/convert_from_argb.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ #define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ #include "libyuv/basic_types.h" @@ -21,170 +21,267 @@ extern "C" { // Copy ARGB to ARGB. #define ARGBToARGB ARGBCopy LIBYUV_API -int ARGBCopy(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBCopy(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert ARGB To BGRA. LIBYUV_API -int ARGBToBGRA(const uint8* src_argb, int src_stride_argb, - uint8* dst_bgra, int dst_stride_bgra, - int width, int height); +int ARGBToBGRA(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_bgra, + int dst_stride_bgra, + int width, + int height); // Convert ARGB To ABGR. LIBYUV_API -int ARGBToABGR(const uint8* src_argb, int src_stride_argb, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); +int ARGBToABGR(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); // Convert ARGB To RGBA. LIBYUV_API -int ARGBToRGBA(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height); +int ARGBToRGBA(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height); + +// Aliases +#define ARGBToAB30 ABGRToAR30 +#define ABGRToAB30 ARGBToAR30 + +// Convert ABGR To AR30. +LIBYUV_API +int ABGRToAR30(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); + +// Convert ARGB To AR30. +LIBYUV_API +int ARGBToAR30(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); // Convert ARGB To RGB24. LIBYUV_API -int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb24, int dst_stride_rgb24, - int width, int height); +int ARGBToRGB24(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); // Convert ARGB To RAW. LIBYUV_API -int ARGBToRAW(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb, int dst_stride_rgb, - int width, int height); +int ARGBToRAW(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height); // Convert ARGB To RGB565. LIBYUV_API -int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height); +int ARGBToRGB565(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height); // Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes). // Values in dither matrix from 0 to 7 recommended. // The order of the dither matrix is first byte is upper left. // TODO(fbarchard): Consider pointer to 2d array for dither4x4. -// const uint8(*dither)[4][4]; +// const uint8_t(*dither)[4][4]; LIBYUV_API -int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb565, int dst_stride_rgb565, - const uint8* dither4x4, int width, int height); +int ARGBToRGB565Dither(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + const uint8_t* dither4x4, + int width, + int height); // Convert ARGB To ARGB1555. LIBYUV_API -int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb1555, int dst_stride_argb1555, - int width, int height); +int ARGBToARGB1555(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb1555, + int dst_stride_argb1555, + int width, + int height); // Convert ARGB To ARGB4444. LIBYUV_API -int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb4444, int dst_stride_argb4444, - int width, int height); +int ARGBToARGB4444(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb4444, + int dst_stride_argb4444, + int width, + int height); // Convert ARGB To I444. LIBYUV_API -int ARGBToI444(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGBToI444(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert ARGB To I422. LIBYUV_API -int ARGBToI422(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGBToI422(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert ARGB To I420. (also in convert.h) LIBYUV_API -int ARGBToI420(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGBToI420(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert ARGB to J420. (JPeg full range I420). LIBYUV_API -int ARGBToJ420(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGBToJ420(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert ARGB to J422. LIBYUV_API -int ARGBToJ422(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB To I411. -LIBYUV_API -int ARGBToI411(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGBToJ422(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert ARGB to J400. (JPeg full range). LIBYUV_API -int ARGBToJ400(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - int width, int height); +int ARGBToJ400(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yj, + int dst_stride_yj, + int width, + int height); // Convert ARGB to I400. LIBYUV_API -int ARGBToI400(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - int width, int height); +int ARGBToI400(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); // Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB) LIBYUV_API -int ARGBToG(const uint8* src_argb, int src_stride_argb, - uint8* dst_g, int dst_stride_g, - int width, int height); +int ARGBToG(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_g, + int dst_stride_g, + int width, + int height); // Convert ARGB To NV12. LIBYUV_API -int ARGBToNV12(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height); +int ARGBToNV12(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); // Convert ARGB To NV21. LIBYUV_API -int ARGBToNV21(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height); +int ARGBToNV21(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); // Convert ARGB To NV21. LIBYUV_API -int ARGBToNV21(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height); +int ARGBToNV21(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); // Convert ARGB To YUY2. LIBYUV_API -int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, - uint8* dst_yuy2, int dst_stride_yuy2, - int width, int height); +int ARGBToYUY2(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height); // Convert ARGB To UYVY. LIBYUV_API -int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, - uint8* dst_uyvy, int dst_stride_uyvy, - int width, int height); +int ARGBToUYVY(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_uyvy, + int dst_stride_uyvy, + int width, + int height); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ NOLINT +#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/cpu_id.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/cpu_id.h index dfb7445e2f13..0229cb5e7369 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/cpu_id.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/cpu_id.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_CPU_ID_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_CPU_ID_H_ #define INCLUDE_LIBYUV_CPU_ID_H_ #include "libyuv/basic_types.h" @@ -31,50 +31,89 @@ static const int kCpuHasX86 = 0x10; static const int kCpuHasSSE2 = 0x20; static const int kCpuHasSSSE3 = 0x40; static const int kCpuHasSSE41 = 0x80; -static const int kCpuHasSSE42 = 0x100; +static const int kCpuHasSSE42 = 0x100; // unused at this time. static const int kCpuHasAVX = 0x200; static const int kCpuHasAVX2 = 0x400; static const int kCpuHasERMS = 0x800; static const int kCpuHasFMA3 = 0x1000; -static const int kCpuHasAVX3 = 0x2000; -// 0x2000, 0x4000, 0x8000 reserved for future X86 flags. +static const int kCpuHasF16C = 0x2000; +static const int kCpuHasGFNI = 0x4000; +static const int kCpuHasAVX512BW = 0x8000; +static const int kCpuHasAVX512VL = 0x10000; +static const int kCpuHasAVX512VBMI = 0x20000; +static const int kCpuHasAVX512VBMI2 = 0x40000; +static const int kCpuHasAVX512VBITALG = 0x80000; +static const int kCpuHasAVX512VPOPCNTDQ = 0x100000; // These flags are only valid on MIPS processors. -static const int kCpuHasMIPS = 0x10000; -static const int kCpuHasDSPR2 = 0x20000; +static const int kCpuHasMIPS = 0x200000; +static const int kCpuHasMSA = 0x400000; -// Internal function used to auto-init. +// Optional init function. TestCpuFlag does an auto-init. +// Returns cpu_info flags. LIBYUV_API int InitCpuFlags(void); +// Detect CPU has SSE2 etc. +// Test_flag parameter should be one of kCpuHas constants above. +// Returns non-zero if instruction set is detected +static __inline int TestCpuFlag(int test_flag) { + LIBYUV_API extern int cpu_info_; +#ifdef __ATOMIC_RELAXED + int cpu_info = __atomic_load_n(&cpu_info_, __ATOMIC_RELAXED); +#else + int cpu_info = cpu_info_; +#endif + return (!cpu_info ? InitCpuFlags() : cpu_info) & test_flag; +} + // Internal function for parsing /proc/cpuinfo. LIBYUV_API int ArmCpuCaps(const char* cpuinfo_name); -// Detect CPU has SSE2 etc. -// Test_flag parameter should be one of kCpuHas constants above. -// returns non-zero if instruction set is detected -static __inline int TestCpuFlag(int test_flag) { - LIBYUV_API extern int cpu_info_; - return (!cpu_info_ ? InitCpuFlags() : cpu_info_) & test_flag; -} - // For testing, allow CPU flags to be disabled. // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3. // MaskCpuFlags(-1) to enable all cpu specific optimizations. // MaskCpuFlags(1) to disable all cpu specific optimizations. +// MaskCpuFlags(0) to reset state so next call will auto init. +// Returns cpu_info flags. LIBYUV_API -void MaskCpuFlags(int enable_flags); +int MaskCpuFlags(int enable_flags); + +// Sets the CPU flags to |cpu_flags|, bypassing the detection code. |cpu_flags| +// should be a valid combination of the kCpuHas constants above and include +// kCpuInitialized. Use this method when running in a sandboxed process where +// the detection code might fail (as it might access /proc/cpuinfo). In such +// cases the cpu_info can be obtained from a non sandboxed process by calling +// InitCpuFlags() and passed to the sandboxed process (via command line +// parameters, IPC...) which can then call this method to initialize the CPU +// flags. +// Notes: +// - when specifying 0 for |cpu_flags|, the auto initialization is enabled +// again. +// - enabling CPU features that are not supported by the CPU will result in +// undefined behavior. +// TODO(fbarchard): consider writing a helper function that translates from +// other library CPU info to libyuv CPU info and add a .md doc that explains +// CPU detection. +static __inline void SetCpuFlags(int cpu_flags) { + LIBYUV_API extern int cpu_info_; +#ifdef __ATOMIC_RELAXED + __atomic_store_n(&cpu_info_, cpu_flags, __ATOMIC_RELAXED); +#else + cpu_info_ = cpu_flags; +#endif +} // Low level cpuid for X86. Returns zeros on other CPUs. // eax is the info type that you want. // ecx is typically the cpu number, and should normally be zero. LIBYUV_API -void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info); +void CpuId(int info_eax, int info_ecx, int* cpu_info); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_CPU_ID_H_ NOLINT +#endif // INCLUDE_LIBYUV_CPU_ID_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/macros_msa.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/macros_msa.h new file mode 100644 index 000000000000..bba0e8aedac2 --- /dev/null +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/macros_msa.h @@ -0,0 +1,233 @@ +/* + * Copyright 2016 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef INCLUDE_LIBYUV_MACROS_MSA_H_ +#define INCLUDE_LIBYUV_MACROS_MSA_H_ + +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#include +#include + +#if (__mips_isa_rev >= 6) +#define LW(psrc) \ + ({ \ + const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \ + uint32_t val_m; \ + asm volatile("lw %[val_m], %[psrc_lw_m] \n" \ + : [val_m] "=r"(val_m) \ + : [psrc_lw_m] "m"(*psrc_lw_m)); \ + val_m; \ + }) + +#if (__mips == 64) +#define LD(psrc) \ + ({ \ + const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ + uint64_t val_m = 0; \ + asm volatile("ld %[val_m], %[psrc_ld_m] \n" \ + : [val_m] "=r"(val_m) \ + : [psrc_ld_m] "m"(*psrc_ld_m)); \ + val_m; \ + }) +#else // !(__mips == 64) +#define LD(psrc) \ + ({ \ + const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ + uint32_t val0_m, val1_m; \ + uint64_t val_m = 0; \ + val0_m = LW(psrc_ld_m); \ + val1_m = LW(psrc_ld_m + 4); \ + val_m = (uint64_t)(val1_m); /* NOLINT */ \ + val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ + val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \ + val_m; \ + }) +#endif // (__mips == 64) + +#define SW(val, pdst) \ + ({ \ + uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \ + uint32_t val_m = (val); \ + asm volatile("sw %[val_m], %[pdst_sw_m] \n" \ + : [pdst_sw_m] "=m"(*pdst_sw_m) \ + : [val_m] "r"(val_m)); \ + }) + +#if (__mips == 64) +#define SD(val, pdst) \ + ({ \ + uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ + uint64_t val_m = (val); \ + asm volatile("sd %[val_m], %[pdst_sd_m] \n" \ + : [pdst_sd_m] "=m"(*pdst_sd_m) \ + : [val_m] "r"(val_m)); \ + }) +#else // !(__mips == 64) +#define SD(val, pdst) \ + ({ \ + uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ + uint32_t val0_m, val1_m; \ + val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ + val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ + SW(val0_m, pdst_sd_m); \ + SW(val1_m, pdst_sd_m + 4); \ + }) +#endif // !(__mips == 64) +#else // !(__mips_isa_rev >= 6) +#define LW(psrc) \ + ({ \ + const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \ + uint32_t val_m; \ + asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \ + : [val_m] "=r"(val_m) \ + : [psrc_lw_m] "m"(*psrc_lw_m)); \ + val_m; \ + }) + +#if (__mips == 64) +#define LD(psrc) \ + ({ \ + const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ + uint64_t val_m = 0; \ + asm volatile("uld %[val_m], %[psrc_ld_m] \n" \ + : [val_m] "=r"(val_m) \ + : [psrc_ld_m] "m"(*psrc_ld_m)); \ + val_m; \ + }) +#else // !(__mips == 64) +#define LD(psrc) \ + ({ \ + const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ + uint32_t val0_m, val1_m; \ + uint64_t val_m = 0; \ + val0_m = LW(psrc_ld_m); \ + val1_m = LW(psrc_ld_m + 4); \ + val_m = (uint64_t)(val1_m); /* NOLINT */ \ + val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ + val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \ + val_m; \ + }) +#endif // (__mips == 64) + +#define SW(val, pdst) \ + ({ \ + uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \ + uint32_t val_m = (val); \ + asm volatile("usw %[val_m], %[pdst_sw_m] \n" \ + : [pdst_sw_m] "=m"(*pdst_sw_m) \ + : [val_m] "r"(val_m)); \ + }) + +#define SD(val, pdst) \ + ({ \ + uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ + uint32_t val0_m, val1_m; \ + val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ + val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ + SW(val0_m, pdst_sd_m); \ + SW(val1_m, pdst_sd_m + 4); \ + }) +#endif // (__mips_isa_rev >= 6) + +// TODO(fbarchard): Consider removing __VAR_ARGS versions. +#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ +#define LD_UB(...) LD_B(const v16u8, __VA_ARGS__) + +#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ +#define ST_UB(...) ST_B(v16u8, __VA_ARGS__) + +#define ST_H(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ +#define ST_UH(...) ST_H(v8u16, __VA_ARGS__) + +/* Description : Load two vectors with 16 'byte' sized elements + Arguments : Inputs - psrc, stride + Outputs - out0, out1 + Return Type - as per RTYPE + Details : Load 16 byte elements in 'out0' from (psrc) + Load 16 byte elements in 'out1' from (psrc + stride) +*/ +#define LD_B2(RTYPE, psrc, stride, out0, out1) \ + { \ + out0 = LD_B(RTYPE, (psrc)); \ + out1 = LD_B(RTYPE, (psrc) + stride); \ + } +#define LD_UB2(...) LD_B2(const v16u8, __VA_ARGS__) + +#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \ + { \ + LD_B2(RTYPE, (psrc), stride, out0, out1); \ + LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \ + } +#define LD_UB4(...) LD_B4(const v16u8, __VA_ARGS__) + +/* Description : Store two vectors with stride each having 16 'byte' sized + elements + Arguments : Inputs - in0, in1, pdst, stride + Details : Store 16 byte elements from 'in0' to (pdst) + Store 16 byte elements from 'in1' to (pdst + stride) +*/ +#define ST_B2(RTYPE, in0, in1, pdst, stride) \ + { \ + ST_B(RTYPE, in0, (pdst)); \ + ST_B(RTYPE, in1, (pdst) + stride); \ + } +#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) + +#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \ + { \ + ST_B2(RTYPE, in0, in1, (pdst), stride); \ + ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ + } +#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) + +/* Description : Store vectors of 8 halfword elements with stride + Arguments : Inputs - in0, in1, pdst, stride + Details : Store 8 halfword elements from 'in0' to (pdst) + Store 8 halfword elements from 'in1' to (pdst + stride) +*/ +#define ST_H2(RTYPE, in0, in1, pdst, stride) \ + { \ + ST_H(RTYPE, in0, (pdst)); \ + ST_H(RTYPE, in1, (pdst) + stride); \ + } +#define ST_UH2(...) ST_H2(v8u16, __VA_ARGS__) + +// TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly. +/* Description : Shuffle byte vector elements as per mask vector + Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 + Outputs - out0, out1 + Return Type - as per RTYPE + Details : Byte elements from 'in0' & 'in1' are copied selectively to + 'out0' as per control vector 'mask0' +*/ +#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ + { \ + out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \ + out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \ + } +#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) + +/* Description : Interleave both left and right half of input vectors + Arguments : Inputs - in0, in1 + Outputs - out0, out1 + Return Type - as per RTYPE + Details : Right half of byte elements from 'in0' and 'in1' are + interleaved and written to 'out0' +*/ +#define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ + { \ + out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ + out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ + } +#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) + +#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ + +#endif // INCLUDE_LIBYUV_MACROS_MSA_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/mjpeg_decoder.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/mjpeg_decoder.h index 8423121d11ec..275f8d4c1853 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/mjpeg_decoder.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/mjpeg_decoder.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_ #define INCLUDE_LIBYUV_MJPEG_DECODER_H_ #include "libyuv/basic_types.h" @@ -26,25 +26,24 @@ namespace libyuv { extern "C" { #endif -LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size); +LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size); #ifdef __cplusplus } // extern "C" #endif -static const uint32 kUnknownDataSize = 0xFFFFFFFF; +static const uint32_t kUnknownDataSize = 0xFFFFFFFF; enum JpegSubsamplingType { kJpegYuv420, kJpegYuv422, - kJpegYuv411, kJpegYuv444, kJpegYuv400, kJpegUnknown }; struct Buffer { - const uint8* data; + const uint8_t* data; int len; }; @@ -66,7 +65,7 @@ struct SetJmpErrorMgr; class LIBYUV_API MJpegDecoder { public: typedef void (*CallbackFunction)(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows); @@ -86,7 +85,7 @@ class LIBYUV_API MJpegDecoder { // If return value is LIBYUV_TRUE, then the values for all the following // getters are populated. // src_len is the size of the compressed mjpeg frame in bytes. - LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len); + LIBYUV_BOOL LoadFrame(const uint8_t* src, size_t src_len); // Returns width of the last loaded frame in pixels. int GetWidth(); @@ -139,18 +138,22 @@ class LIBYUV_API MJpegDecoder { // at least GetComponentSize(i). The pointers in planes are incremented // to point to after the end of the written data. // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded. - LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height); + LIBYUV_BOOL DecodeToBuffers(uint8_t** planes, int dst_width, int dst_height); // Decodes the entire image and passes the data via repeated calls to a // callback function. Each call will get the data for a whole number of // image scanlines. // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded. - LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque, - int dst_width, int dst_height); + LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, + void* opaque, + int dst_width, + int dst_height); // The helper function which recognizes the jpeg sub-sampling type. static JpegSubsamplingType JpegSubsamplingTypeHelper( - int* subsample_x, int* subsample_y, int number_of_components); + int* subsample_x, + int* subsample_y, + int number_of_components); private: void AllocOutputBuffers(int num_outbufs); @@ -159,7 +162,7 @@ class LIBYUV_API MJpegDecoder { LIBYUV_BOOL StartDecode(); LIBYUV_BOOL FinishDecode(); - void SetScanlinePointers(uint8** data); + void SetScanlinePointers(uint8_t** data); LIBYUV_BOOL DecodeImcuRow(); int GetComponentScanlinePadding(int component); @@ -178,15 +181,15 @@ class LIBYUV_API MJpegDecoder { // Temporaries used to point to scanline outputs. int num_outbufs_; // Outermost size of all arrays below. - uint8*** scanlines_; + uint8_t*** scanlines_; int* scanlines_sizes_; // Temporary buffer used for decoding when we can't decode directly to the // output buffers. Large enough for just one iMCU row. - uint8** databuf_; + uint8_t** databuf_; int* databuf_strides_; }; } // namespace libyuv #endif // __cplusplus -#endif // INCLUDE_LIBYUV_MJPEG_DECODER_H_ NOLINT +#endif // INCLUDE_LIBYUV_MJPEG_DECODER_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/planar_functions.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/planar_functions.h index 9662516c573a..91137baba254 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/planar_functions.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/planar_functions.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ #define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ #include "libyuv/basic_types.h" @@ -22,449 +22,10 @@ namespace libyuv { extern "C" { #endif -// Copy a plane of data. -LIBYUV_API -void CopyPlane(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); - -LIBYUV_API -void CopyPlane_16(const uint16* src_y, int src_stride_y, - uint16* dst_y, int dst_stride_y, - int width, int height); - -// Set a plane of data to a 32 bit value. -LIBYUV_API -void SetPlane(uint8* dst_y, int dst_stride_y, - int width, int height, - uint32 value); - -// Split interleaved UV plane into separate U and V planes. -LIBYUV_API -void SplitUVPlane(const uint8* src_uv, int src_stride_uv, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Merge separate U and V planes into one interleaved UV plane. -LIBYUV_API -void MergeUVPlane(const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_uv, int dst_stride_uv, - int width, int height); - -// Copy I400. Supports inverting. -LIBYUV_API -int I400ToI400(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); - -#define J400ToJ400 I400ToI400 - -// Copy I422 to I422. -#define I422ToI422 I422Copy -LIBYUV_API -int I422Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Copy I444 to I444. -#define I444ToI444 I444Copy -LIBYUV_API -int I444Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert YUY2 to I422. -LIBYUV_API -int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert UYVY to I422. -LIBYUV_API -int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -LIBYUV_API -int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height); - -LIBYUV_API -int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height); - -// Convert I420 to I400. (calls CopyPlane ignoring u/v). -LIBYUV_API -int I420ToI400(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Alias -#define J420ToJ400 I420ToI400 -#define I420ToI420Mirror I420Mirror - -// I420 mirror. -LIBYUV_API -int I420Mirror(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Alias -#define I400ToI400Mirror I400Mirror - -// I400 mirror. A single plane is mirrored horizontally. -// Pass negative height to achieve 180 degree rotation. -LIBYUV_API -int I400Mirror(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Alias -#define ARGBToARGBMirror ARGBMirror - -// ARGB mirror. -LIBYUV_API -int ARGBMirror(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert NV12 to RGB565. -LIBYUV_API -int NV12ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height); - -// I422ToARGB is in convert_argb.h -// Convert I422 to BGRA. -LIBYUV_API -int I422ToBGRA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_bgra, int dst_stride_bgra, - int width, int height); - -// Convert I422 to ABGR. -LIBYUV_API -int I422ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); - -// Convert I422 to RGBA. -LIBYUV_API -int I422ToRGBA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height); - -// Alias -#define RGB24ToRAW RAWToRGB24 - -LIBYUV_API -int RAWToRGB24(const uint8* src_raw, int src_stride_raw, - uint8* dst_rgb24, int dst_stride_rgb24, - int width, int height); - -// Draw a rectangle into I420. -LIBYUV_API -int I420Rect(uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int x, int y, int width, int height, - int value_y, int value_u, int value_v); - -// Draw a rectangle into ARGB. -LIBYUV_API -int ARGBRect(uint8* dst_argb, int dst_stride_argb, - int x, int y, int width, int height, uint32 value); - -// Convert ARGB to gray scale ARGB. -LIBYUV_API -int ARGBGrayTo(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Make a rectangle of ARGB gray scale. -LIBYUV_API -int ARGBGray(uint8* dst_argb, int dst_stride_argb, - int x, int y, int width, int height); - -// Make a rectangle of ARGB Sepia tone. -LIBYUV_API -int ARGBSepia(uint8* dst_argb, int dst_stride_argb, - int x, int y, int width, int height); - -// Apply a matrix rotation to each ARGB pixel. -// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2. -// The first 4 coefficients apply to B, G, R, A and produce B of the output. -// The next 4 coefficients apply to B, G, R, A and produce G of the output. -// The next 4 coefficients apply to B, G, R, A and produce R of the output. -// The last 4 coefficients apply to B, G, R, A and produce A of the output. -LIBYUV_API -int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const int8* matrix_argb, - int width, int height); - -// Deprecated. Use ARGBColorMatrix instead. -// Apply a matrix rotation to each ARGB pixel. -// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1. -// The first 4 coefficients apply to B, G, R, A and produce B of the output. -// The next 4 coefficients apply to B, G, R, A and produce G of the output. -// The last 4 coefficients apply to B, G, R, A and produce R of the output. -LIBYUV_API -int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb, - const int8* matrix_rgb, - int x, int y, int width, int height); - -// Apply a color table each ARGB pixel. -// Table contains 256 ARGB values. -LIBYUV_API -int ARGBColorTable(uint8* dst_argb, int dst_stride_argb, - const uint8* table_argb, - int x, int y, int width, int height); - -// Apply a color table each ARGB pixel but preserve destination alpha. -// Table contains 256 ARGB values. -LIBYUV_API -int RGBColorTable(uint8* dst_argb, int dst_stride_argb, - const uint8* table_argb, - int x, int y, int width, int height); - -// Apply a luma/color table each ARGB pixel but preserve destination alpha. -// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from -// RGB (YJ style) and C is an 8 bit color component (R, G or B). -LIBYUV_API -int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const uint8* luma_rgb_table, - int width, int height); - -// Apply a 3 term polynomial to ARGB values. -// poly points to a 4x4 matrix. The first row is constants. The 2nd row is -// coefficients for b, g, r and a. The 3rd row is coefficients for b squared, -// g squared, r squared and a squared. The 4rd row is coefficients for b to -// the 3, g to the 3, r to the 3 and a to the 3. The values are summed and -// result clamped to 0 to 255. -// A polynomial approximation can be dirived using software such as 'R'. - -LIBYUV_API -int ARGBPolynomial(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const float* poly, - int width, int height); - -// Quantize a rectangle of ARGB. Alpha unaffected. -// scale is a 16 bit fractional fixed point scaler between 0 and 65535. -// interval_size should be a value between 1 and 255. -// interval_offset should be a value between 0 and 255. -LIBYUV_API -int ARGBQuantize(uint8* dst_argb, int dst_stride_argb, - int scale, int interval_size, int interval_offset, - int x, int y, int width, int height); - -// Copy ARGB to ARGB. -LIBYUV_API -int ARGBCopy(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Copy Alpha channel of ARGB to alpha of ARGB. -LIBYUV_API -int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Extract the alpha channel from ARGB. -LIBYUV_API -int ARGBExtractAlpha(const uint8* src_argb, int src_stride_argb, - uint8* dst_a, int dst_stride_a, - int width, int height); - -// Copy Y channel to Alpha of ARGB. -LIBYUV_API -int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -typedef void (*ARGBBlendRow)(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width); - -// Get function to Alpha Blend ARGB pixels and store to destination. -LIBYUV_API -ARGBBlendRow GetARGBBlend(); - -// Alpha Blend ARGB images and store to destination. -// Source is pre-multiplied by alpha using ARGBAttenuate. -// Alpha of destination is set to 255. -LIBYUV_API -int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Alpha Blend plane and store to destination. -// Source is not pre-multiplied by alpha. -LIBYUV_API -int BlendPlane(const uint8* src_y0, int src_stride_y0, - const uint8* src_y1, int src_stride_y1, - const uint8* alpha, int alpha_stride, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Alpha Blend YUV images and store to destination. -// Source is not pre-multiplied by alpha. -// Alpha is full width x height and subsampled to half size to apply to UV. -LIBYUV_API -int I420Blend(const uint8* src_y0, int src_stride_y0, - const uint8* src_u0, int src_stride_u0, - const uint8* src_v0, int src_stride_v0, - const uint8* src_y1, int src_stride_y1, - const uint8* src_u1, int src_stride_u1, - const uint8* src_v1, int src_stride_v1, - const uint8* alpha, int alpha_stride, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255. -LIBYUV_API -int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Add ARGB image with ARGB image. Saturates to 255. -LIBYUV_API -int ARGBAdd(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0. -LIBYUV_API -int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert I422 to YUY2. -LIBYUV_API -int I422ToYUY2(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -// Convert I422 to UYVY. -LIBYUV_API -int I422ToUYVY(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -// Convert unattentuated ARGB to preattenuated ARGB. -LIBYUV_API -int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert preattentuated ARGB to unattenuated ARGB. -LIBYUV_API -int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Internal function - do not call directly. -// Computes table of cumulative sum for image where the value is the sum -// of all values above and to the left of the entry. Used by ARGBBlur. -LIBYUV_API -int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, - int32* dst_cumsum, int dst_stride32_cumsum, - int width, int height); - -// Blur ARGB image. -// dst_cumsum table of width * (height + 1) * 16 bytes aligned to -// 16 byte boundary. -// dst_stride32_cumsum is number of ints in a row (width * 4). -// radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5. -// Blur is optimized for radius of 5 (11x11) or less. -LIBYUV_API -int ARGBBlur(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int32* dst_cumsum, int dst_stride32_cumsum, - int width, int height, int radius); - -// Multiply ARGB image by ARGB value. -LIBYUV_API -int ARGBShade(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height, uint32 value); - -// Interpolate between two images using specified amount of interpolation -// (0 to 255) and store to destination. -// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0 -// and 255 means 1% src0 and 99% src1. -LIBYUV_API -int InterpolatePlane(const uint8* src0, int src_stride0, - const uint8* src1, int src_stride1, - uint8* dst, int dst_stride, - int width, int height, int interpolation); - -// Interpolate between two ARGB images using specified amount of interpolation -// Internally calls InterpolatePlane with width * 4 (bpp). -LIBYUV_API -int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height, int interpolation); - -// Interpolate between two YUV images using specified amount of interpolation -// Internally calls InterpolatePlane on each plane where the U and V planes -// are half width and half height. -LIBYUV_API -int I420Interpolate(const uint8* src0_y, int src0_stride_y, - const uint8* src0_u, int src0_stride_u, - const uint8* src0_v, int src0_stride_v, - const uint8* src1_y, int src1_stride_y, - const uint8* src1_u, int src1_stride_u, - const uint8* src1_v, int src1_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height, int interpolation); - -#if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) +// TODO(fbarchard): Move cpu macros to row.h +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 @@ -479,43 +40,808 @@ int I420Interpolate(const uint8* src0_y, int src0_stride_y, #define HAS_ARGBAFFINEROW_SSE2 #endif +// Copy a plane of data. +LIBYUV_API +void CopyPlane(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); + +LIBYUV_API +void CopyPlane_16(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int width, + int height); + +LIBYUV_API +void Convert16To8Plane(const uint16_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int scale, // 16384 for 10 bits + int width, + int height); + +LIBYUV_API +void Convert8To16Plane(const uint8_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int scale, // 1024 for 10 bits + int width, + int height); + +// Set a plane of data to a 32 bit value. +LIBYUV_API +void SetPlane(uint8_t* dst_y, + int dst_stride_y, + int width, + int height, + uint32_t value); + +// Split interleaved UV plane into separate U and V planes. +LIBYUV_API +void SplitUVPlane(const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Merge separate U and V planes into one interleaved UV plane. +LIBYUV_API +void MergeUVPlane(const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Split interleaved RGB plane into separate R, G and B planes. +LIBYUV_API +void SplitRGBPlane(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_r, + int dst_stride_r, + uint8_t* dst_g, + int dst_stride_g, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height); + +// Merge separate R, G and B planes into one interleaved RGB plane. +LIBYUV_API +void MergeRGBPlane(const uint8_t* src_r, + int src_stride_r, + const uint8_t* src_g, + int src_stride_g, + const uint8_t* src_b, + int src_stride_b, + uint8_t* dst_rgb, + int dst_stride_rgb, + int width, + int height); + +// Copy I400. Supports inverting. +LIBYUV_API +int I400ToI400(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); + +#define J400ToJ400 I400ToI400 + +// Copy I422 to I422. +#define I422ToI422 I422Copy +LIBYUV_API +int I422Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Copy I444 to I444. +#define I444ToI444 I444Copy +LIBYUV_API +int I444Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert YUY2 to I422. +LIBYUV_API +int YUY2ToI422(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert UYVY to I422. +LIBYUV_API +int UYVYToI422(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +LIBYUV_API +int YUY2ToNV12(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +LIBYUV_API +int UYVYToNV12(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +LIBYUV_API +int YUY2ToY(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); + +// Convert I420 to I400. (calls CopyPlane ignoring u/v). +LIBYUV_API +int I420ToI400(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); + +// Alias +#define J420ToJ400 I420ToI400 +#define I420ToI420Mirror I420Mirror + +// I420 mirror. +LIBYUV_API +int I420Mirror(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Alias +#define I400ToI400Mirror I400Mirror + +// I400 mirror. A single plane is mirrored horizontally. +// Pass negative height to achieve 180 degree rotation. +LIBYUV_API +int I400Mirror(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); + +// Alias +#define ARGBToARGBMirror ARGBMirror + +// ARGB mirror. +LIBYUV_API +int ARGBMirror(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert NV12 to RGB565. +LIBYUV_API +int NV12ToRGB565(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height); + +// I422ToARGB is in convert_argb.h +// Convert I422 to BGRA. +LIBYUV_API +int I422ToBGRA(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_bgra, + int dst_stride_bgra, + int width, + int height); + +// Convert I422 to ABGR. +LIBYUV_API +int I422ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert I422 to RGBA. +LIBYUV_API +int I422ToRGBA(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height); + +// Alias +#define RGB24ToRAW RAWToRGB24 + +LIBYUV_API +int RAWToRGB24(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); + +// Draw a rectangle into I420. +LIBYUV_API +int I420Rect(uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int x, + int y, + int width, + int height, + int value_y, + int value_u, + int value_v); + +// Draw a rectangle into ARGB. +LIBYUV_API +int ARGBRect(uint8_t* dst_argb, + int dst_stride_argb, + int dst_x, + int dst_y, + int width, + int height, + uint32_t value); + +// Convert ARGB to gray scale ARGB. +LIBYUV_API +int ARGBGrayTo(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Make a rectangle of ARGB gray scale. +LIBYUV_API +int ARGBGray(uint8_t* dst_argb, + int dst_stride_argb, + int dst_x, + int dst_y, + int width, + int height); + +// Make a rectangle of ARGB Sepia tone. +LIBYUV_API +int ARGBSepia(uint8_t* dst_argb, + int dst_stride_argb, + int dst_x, + int dst_y, + int width, + int height); + +// Apply a matrix rotation to each ARGB pixel. +// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2. +// The first 4 coefficients apply to B, G, R, A and produce B of the output. +// The next 4 coefficients apply to B, G, R, A and produce G of the output. +// The next 4 coefficients apply to B, G, R, A and produce R of the output. +// The last 4 coefficients apply to B, G, R, A and produce A of the output. +LIBYUV_API +int ARGBColorMatrix(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + const int8_t* matrix_argb, + int width, + int height); + +// Deprecated. Use ARGBColorMatrix instead. +// Apply a matrix rotation to each ARGB pixel. +// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1. +// The first 4 coefficients apply to B, G, R, A and produce B of the output. +// The next 4 coefficients apply to B, G, R, A and produce G of the output. +// The last 4 coefficients apply to B, G, R, A and produce R of the output. +LIBYUV_API +int RGBColorMatrix(uint8_t* dst_argb, + int dst_stride_argb, + const int8_t* matrix_rgb, + int dst_x, + int dst_y, + int width, + int height); + +// Apply a color table each ARGB pixel. +// Table contains 256 ARGB values. +LIBYUV_API +int ARGBColorTable(uint8_t* dst_argb, + int dst_stride_argb, + const uint8_t* table_argb, + int dst_x, + int dst_y, + int width, + int height); + +// Apply a color table each ARGB pixel but preserve destination alpha. +// Table contains 256 ARGB values. +LIBYUV_API +int RGBColorTable(uint8_t* dst_argb, + int dst_stride_argb, + const uint8_t* table_argb, + int dst_x, + int dst_y, + int width, + int height); + +// Apply a luma/color table each ARGB pixel but preserve destination alpha. +// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from +// RGB (YJ style) and C is an 8 bit color component (R, G or B). +LIBYUV_API +int ARGBLumaColorTable(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + const uint8_t* luma, + int width, + int height); + +// Apply a 3 term polynomial to ARGB values. +// poly points to a 4x4 matrix. The first row is constants. The 2nd row is +// coefficients for b, g, r and a. The 3rd row is coefficients for b squared, +// g squared, r squared and a squared. The 4rd row is coefficients for b to +// the 3, g to the 3, r to the 3 and a to the 3. The values are summed and +// result clamped to 0 to 255. +// A polynomial approximation can be dirived using software such as 'R'. + +LIBYUV_API +int ARGBPolynomial(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + const float* poly, + int width, + int height); + +// Convert plane of 16 bit shorts to half floats. +// Source values are multiplied by scale before storing as half float. +LIBYUV_API +int HalfFloatPlane(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + float scale, + int width, + int height); + +// Convert a buffer of bytes to floats, scale the values and store as floats. +LIBYUV_API +int ByteToFloat(const uint8_t* src_y, float* dst_y, float scale, int width); + +// Quantize a rectangle of ARGB. Alpha unaffected. +// scale is a 16 bit fractional fixed point scaler between 0 and 65535. +// interval_size should be a value between 1 and 255. +// interval_offset should be a value between 0 and 255. +LIBYUV_API +int ARGBQuantize(uint8_t* dst_argb, + int dst_stride_argb, + int scale, + int interval_size, + int interval_offset, + int dst_x, + int dst_y, + int width, + int height); + +// Copy ARGB to ARGB. +LIBYUV_API +int ARGBCopy(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Copy Alpha channel of ARGB to alpha of ARGB. +LIBYUV_API +int ARGBCopyAlpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Extract the alpha channel from ARGB. +LIBYUV_API +int ARGBExtractAlpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_a, + int dst_stride_a, + int width, + int height); + +// Copy Y channel to Alpha of ARGB. +LIBYUV_API +int ARGBCopyYToAlpha(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +typedef void (*ARGBBlendRow)(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); + +// Get function to Alpha Blend ARGB pixels and store to destination. +LIBYUV_API +ARGBBlendRow GetARGBBlend(); + +// Alpha Blend ARGB images and store to destination. +// Source is pre-multiplied by alpha using ARGBAttenuate. +// Alpha of destination is set to 255. +LIBYUV_API +int ARGBBlend(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Alpha Blend plane and store to destination. +// Source is not pre-multiplied by alpha. +LIBYUV_API +int BlendPlane(const uint8_t* src_y0, + int src_stride_y0, + const uint8_t* src_y1, + int src_stride_y1, + const uint8_t* alpha, + int alpha_stride, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); + +// Alpha Blend YUV images and store to destination. +// Source is not pre-multiplied by alpha. +// Alpha is full width x height and subsampled to half size to apply to UV. +LIBYUV_API +int I420Blend(const uint8_t* src_y0, + int src_stride_y0, + const uint8_t* src_u0, + int src_stride_u0, + const uint8_t* src_v0, + int src_stride_v0, + const uint8_t* src_y1, + int src_stride_y1, + const uint8_t* src_u1, + int src_stride_u1, + const uint8_t* src_v1, + int src_stride_v1, + const uint8_t* alpha, + int alpha_stride, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255. +LIBYUV_API +int ARGBMultiply(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Add ARGB image with ARGB image. Saturates to 255. +LIBYUV_API +int ARGBAdd(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0. +LIBYUV_API +int ARGBSubtract(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert I422 to YUY2. +LIBYUV_API +int I422ToYUY2(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height); + +// Convert I422 to UYVY. +LIBYUV_API +int I422ToUYVY(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uyvy, + int dst_stride_uyvy, + int width, + int height); + +// Convert unattentuated ARGB to preattenuated ARGB. +LIBYUV_API +int ARGBAttenuate(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert preattentuated ARGB to unattenuated ARGB. +LIBYUV_API +int ARGBUnattenuate(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Internal function - do not call directly. +// Computes table of cumulative sum for image where the value is the sum +// of all values above and to the left of the entry. Used by ARGBBlur. +LIBYUV_API +int ARGBComputeCumulativeSum(const uint8_t* src_argb, + int src_stride_argb, + int32_t* dst_cumsum, + int dst_stride32_cumsum, + int width, + int height); + +// Blur ARGB image. +// dst_cumsum table of width * (height + 1) * 16 bytes aligned to +// 16 byte boundary. +// dst_stride32_cumsum is number of ints in a row (width * 4). +// radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5. +// Blur is optimized for radius of 5 (11x11) or less. +LIBYUV_API +int ARGBBlur(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int32_t* dst_cumsum, + int dst_stride32_cumsum, + int width, + int height, + int radius); + +// Multiply ARGB image by ARGB value. +LIBYUV_API +int ARGBShade(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + uint32_t value); + +// Interpolate between two images using specified amount of interpolation +// (0 to 255) and store to destination. +// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0 +// and 255 means 1% src0 and 99% src1. +LIBYUV_API +int InterpolatePlane(const uint8_t* src0, + int src_stride0, + const uint8_t* src1, + int src_stride1, + uint8_t* dst, + int dst_stride, + int width, + int height, + int interpolation); + +// Interpolate between two ARGB images using specified amount of interpolation +// Internally calls InterpolatePlane with width * 4 (bpp). +LIBYUV_API +int ARGBInterpolate(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + int interpolation); + +// Interpolate between two YUV images using specified amount of interpolation +// Internally calls InterpolatePlane on each plane where the U and V planes +// are half width and half height. +LIBYUV_API +int I420Interpolate(const uint8_t* src0_y, + int src0_stride_y, + const uint8_t* src0_u, + int src0_stride_u, + const uint8_t* src0_v, + int src0_stride_v, + const uint8_t* src1_y, + int src1_stride_y, + const uint8_t* src1_u, + int src1_stride_u, + const uint8_t* src1_v, + int src1_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + int interpolation); + // Row function for copying pixels from a source with a slope to a row // of destination. Useful for scaling, rotation, mirror, texture mapping. LIBYUV_API -void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width); +void ARGBAffineRow_C(const uint8_t* src_argb, + int src_argb_stride, + uint8_t* dst_argb, + const float* uv_dudv, + int width); +// TODO(fbarchard): Move ARGBAffineRow_SSE2 to row.h LIBYUV_API -void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width); +void ARGBAffineRow_SSE2(const uint8_t* src_argb, + int src_argb_stride, + uint8_t* dst_argb, + const float* uv_dudv, + int width); // Shuffle ARGB channel order. e.g. BGRA to ARGB. // shuffler is 16 bytes and must be aligned. LIBYUV_API -int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_argb, int dst_stride_argb, - const uint8* shuffler, int width, int height); +int ARGBShuffle(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_argb, + int dst_stride_argb, + const uint8_t* shuffler, + int width, + int height); // Sobel ARGB effect with planar output. LIBYUV_API -int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - int width, int height); +int ARGBSobelToPlane(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); // Sobel ARGB effect. LIBYUV_API -int ARGBSobel(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBSobel(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB. LIBYUV_API -int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBSobelXY(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ NOLINT +#endif // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate.h index 8af60b89550e..76b692be8b0b 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_ROTATE_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_ROTATE_H_ #define INCLUDE_LIBYUV_ROTATE_H_ #include "libyuv/basic_types.h" @@ -20,8 +20,8 @@ extern "C" { // Supported rotation. typedef enum RotationMode { - kRotate0 = 0, // No rotation. - kRotate90 = 90, // Rotate 90 degrees clockwise. + kRotate0 = 0, // No rotation. + kRotate90 = 90, // Rotate 90 degrees clockwise. kRotate180 = 180, // Rotate 180 degrees. kRotate270 = 270, // Rotate 270 degrees clockwise. @@ -33,85 +33,132 @@ typedef enum RotationMode { // Rotate I420 frame. LIBYUV_API -int I420Rotate(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int src_width, int src_height, enum RotationMode mode); +int I420Rotate(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode); // Rotate NV12 input and store in I420. LIBYUV_API -int NV12ToI420Rotate(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int src_width, int src_height, enum RotationMode mode); +int NV12ToI420Rotate(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode); // Rotate a plane by 0, 90, 180, or 270. LIBYUV_API -int RotatePlane(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int src_width, int src_height, enum RotationMode mode); +int RotatePlane(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height, + enum RotationMode mode); // Rotate planes by 90, 180, 270. Deprecated. LIBYUV_API -void RotatePlane90(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); +void RotatePlane90(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height); LIBYUV_API -void RotatePlane180(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); +void RotatePlane180(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height); LIBYUV_API -void RotatePlane270(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); +void RotatePlane270(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height); LIBYUV_API -void RotateUV90(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); +void RotateUV90(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height); // Rotations for when U and V are interleaved. // These functions take one input pointer and // split the data into two buffers while // rotating them. Deprecated. LIBYUV_API -void RotateUV180(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); +void RotateUV180(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height); LIBYUV_API -void RotateUV270(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); +void RotateUV270(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height); // The 90 and 270 functions are based on transposes. // Doing a transpose with reversing the read/write // order will result in a rotation by +- 90 degrees. // Deprecated. LIBYUV_API -void TransposePlane(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); +void TransposePlane(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height); LIBYUV_API -void TransposeUV(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); +void TransposeUV(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_ROTATE_H_ NOLINT +#endif // INCLUDE_LIBYUV_ROTATE_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_argb.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_argb.h index 660ff5573ec5..20432949ab42 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_argb.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_argb.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_ #define INCLUDE_LIBYUV_ROTATE_ARGB_H_ #include "libyuv/basic_types.h" @@ -21,13 +21,17 @@ extern "C" { // Rotate ARGB frame LIBYUV_API -int ARGBRotate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int src_width, int src_height, enum RotationMode mode); +int ARGBRotate(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int src_width, + int src_height, + enum RotationMode mode); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_ROTATE_ARGB_H_ NOLINT +#endif // INCLUDE_LIBYUV_ROTATE_ARGB_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_row.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_row.h index ebc487f9abf4..5edc0fcf13a1 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_row.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/rotate_row.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_ROTATE_ROW_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_ROTATE_ROW_H_ #define INCLUDE_LIBYUV_ROTATE_ROW_H_ #include "libyuv/basic_types.h" @@ -18,10 +18,14 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) @@ -29,93 +33,162 @@ extern "C" { #endif #endif // The following are available for Visual C and clangcl 32 bit: -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) #define HAS_TRANSPOSEWX8_SSSE3 #define HAS_TRANSPOSEUVWX8_SSE2 #endif -// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit: -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__))) +// The following are available for GCC 32 or 64 bit: +#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__)) #define HAS_TRANSPOSEWX8_SSSE3 #endif -// The following are available for 64 bit GCC but not NaCL: -#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \ - defined(__x86_64__) +// The following are available for 64 bit GCC: +#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__) #define HAS_TRANSPOSEWX8_FAST_SSSE3 #define HAS_TRANSPOSEUVWX8_SSE2 #endif -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ +#if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_TRANSPOSEWX8_NEON #define HAS_TRANSPOSEUVWX8_NEON #endif -#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \ - defined(__mips__) && \ - defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_TRANSPOSEWX8_DSPR2 -#define HAS_TRANSPOSEUVWX8_DSPR2 -#endif // defined(__mips__) +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#define HAS_TRANSPOSEWX16_MSA +#define HAS_TRANSPOSEUVWX16_MSA +#endif -void TransposeWxH_C(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width, int height); +void TransposeWxH_C(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height); -void TransposeWx8_C(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_NEON(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_SSSE3(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_DSPR2(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); +void TransposeWx8_C(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx16_C(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx8_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx8_SSSE3(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx8_Fast_SSSE3(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx16_MSA(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); -void TransposeWx8_Any_NEON(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_Any_DSPR2(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); +void TransposeWx8_Any_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx8_Any_SSSE3(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx16_Any_MSA(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); -void TransposeUVWxH_C(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); +void TransposeUVWxH_C(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height); -void TransposeUVWx8_C(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_SSE2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_NEON(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_DSPR2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); +void TransposeUVWx8_C(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); +void TransposeUVWx16_C(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); +void TransposeUVWx8_SSE2(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); +void TransposeUVWx8_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); +void TransposeUVWx16_MSA(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); -void TransposeUVWx8_Any_SSE2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_Any_NEON(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_Any_DSPR2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); +void TransposeUVWx8_Any_SSE2(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); +void TransposeUVWx8_Any_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); +void TransposeUVWx16_Any_MSA(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_ROTATE_ROW_H_ NOLINT +#endif // INCLUDE_LIBYUV_ROTATE_ROW_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/row.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/row.h index 013a7e53e326..65ef448b8ce8 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/row.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/row.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_ROW_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_ROW_H_ #define INCLUDE_LIBYUV_ROW_H_ #include // For malloc. @@ -20,41 +20,20 @@ namespace libyuv { extern "C" { #endif -#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1))) - -#ifdef __cplusplus -#define align_buffer_64(var, size) \ - uint8* var##_mem = reinterpret_cast(malloc((size) + 63)); \ - uint8* var = reinterpret_cast \ - ((reinterpret_cast(var##_mem) + 63) & ~63) -#else -#define align_buffer_64(var, size) \ - uint8* var##_mem = (uint8*)(malloc((size) + 63)); /* NOLINT */ \ - uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ -#endif - -#define free_aligned_buffer_64(var) \ - free(var##_mem); \ - var = 0 - -#if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif -// True if compiling for SSSE3 as a requirement. -#if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3)) -#define LIBYUV_SSSE3_ONLY -#endif - -#if defined(__native_client__) -#define LIBYUV_DISABLE_NEON -#endif // clang >= 3.5.0 required for Arm64. #if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON) #if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5)) @@ -76,9 +55,19 @@ extern "C" { #endif // clang >= 3.4 #endif // __clang__ +// clang >= 6.0.0 required for AVX512. +// TODO(fbarchard): fix xcode 9 ios b/789. +#if 0 // Build fails in libvpx on Mac +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) +#if (__clang_major__ >= 7) && !defined(__APPLE_EMBEDDED_SIMULATOR__) +#define CLANG_HAS_AVX512 1 +#endif // clang >= 7 +#endif // __clang__ +#endif // 0 + // Visual C 2012 required for AVX2. -#if defined(_M_IX86) && !defined(__clang__) && \ - defined(_MSC_VER) && _MSC_VER >= 1700 +#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \ + _MSC_VER >= 1700 #define VISUALC_HAS_AVX2 1 #endif // VisualStudio >= 2012 @@ -90,8 +79,8 @@ extern "C" { #define HAS_ABGRTOYROW_SSSE3 #define HAS_ARGB1555TOARGBROW_SSE2 #define HAS_ARGB4444TOARGBROW_SSE2 +#define HAS_ARGBEXTRACTALPHAROW_SSE2 #define HAS_ARGBSETROW_X86 -#define HAS_ARGBSHUFFLEROW_SSE2 #define HAS_ARGBSHUFFLEROW_SSSE3 #define HAS_ARGBTOARGB1555ROW_SSE2 #define HAS_ARGBTOARGB4444ROW_SSE2 @@ -104,12 +93,12 @@ extern "C" { #define HAS_ARGBTOUVROW_SSSE3 #define HAS_ARGBTOYJROW_SSSE3 #define HAS_ARGBTOYROW_SSSE3 -#define HAS_ARGBEXTRACTALPHAROW_SSE2 #define HAS_BGRATOUVROW_SSSE3 #define HAS_BGRATOYROW_SSSE3 #define HAS_COPYROW_ERMS #define HAS_COPYROW_SSE2 #define HAS_H422TOARGBROW_SSSE3 +#define HAS_HALFFLOATROW_SSE2 #define HAS_I400TOARGBROW_SSE2 #define HAS_I422TOARGB1555ROW_SSSE3 #define HAS_I422TOARGB4444ROW_SSSE3 @@ -126,8 +115,10 @@ extern "C" { #define HAS_MIRRORROW_SSSE3 #define HAS_MIRRORUVROW_SSSE3 #define HAS_NV12TOARGBROW_SSSE3 +#define HAS_NV12TORGB24ROW_SSSE3 #define HAS_NV12TORGB565ROW_SSSE3 #define HAS_NV21TOARGBROW_SSSE3 +#define HAS_NV21TORGB24ROW_SSSE3 #define HAS_RAWTOARGBROW_SSSE3 #define HAS_RAWTORGB24ROW_SSSE3 #define HAS_RAWTOYROW_SSSE3 @@ -180,11 +171,8 @@ extern "C" { // The following functions fail on gcc/clang 32 bit with fpic and framepointer. // caveat: clangcl uses row_win.cc which works. -#if defined(NDEBUG) || !(defined(_DEBUG) && defined(__i386__)) || \ - !defined(__i386__) || defined(_MSC_VER) -// TODO(fbarchard): fix build error on x86 debug -// https://code.google.com/p/libyuv/issues/detail?id=524 -#define HAS_I411TOARGBROW_SSSE3 +#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \ + defined(_MSC_VER) // TODO(fbarchard): fix build error on android_full_debug=1 // https://code.google.com/p/libyuv/issues/detail?id=517 #define HAS_I422ALPHATOARGBROW_SSSE3 @@ -193,11 +181,12 @@ extern "C" { // The following are available on all x86 platforms, but // require VS2012, clang 3.4 or gcc 4.7. -// The code supports NaCL but requires a new compiler and validator. -#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \ - defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \ + defined(GCC_HAS_AVX2)) #define HAS_ARGBCOPYALPHAROW_AVX2 #define HAS_ARGBCOPYYTOALPHAROW_AVX2 +#define HAS_ARGBEXTRACTALPHAROW_AVX2 #define HAS_ARGBMIRRORROW_AVX2 #define HAS_ARGBPOLYNOMIALROW_AVX2 #define HAS_ARGBSHUFFLEROW_AVX2 @@ -208,13 +197,9 @@ extern "C" { #define HAS_ARGBTOYROW_AVX2 #define HAS_COPYROW_AVX #define HAS_H422TOARGBROW_AVX2 +#define HAS_HALFFLOATROW_AVX2 +// #define HAS_HALFFLOATROW_F16C // Enable to test halffloat cast #define HAS_I400TOARGBROW_AVX2 -#if !(defined(_DEBUG) && defined(__i386__)) -// TODO(fbarchard): fix build error on android_full_debug=1 -// https://code.google.com/p/libyuv/issues/detail?id=517 -#define HAS_I422ALPHATOARGBROW_AVX2 -#endif -#define HAS_I411TOARGBROW_AVX2 #define HAS_I422TOARGB1555ROW_AVX2 #define HAS_I422TOARGB4444ROW_AVX2 #define HAS_I422TOARGBROW_AVX2 @@ -227,8 +212,10 @@ extern "C" { #define HAS_MERGEUVROW_AVX2 #define HAS_MIRRORROW_AVX2 #define HAS_NV12TOARGBROW_AVX2 +#define HAS_NV12TORGB24ROW_AVX2 #define HAS_NV12TORGB565ROW_AVX2 #define HAS_NV21TOARGBROW_AVX2 +#define HAS_NV21TORGB24ROW_AVX2 #define HAS_SPLITUVROW_AVX2 #define HAS_UYVYTOARGBROW_AVX2 #define HAS_UYVYTOUV422ROW_AVX2 @@ -246,11 +233,18 @@ extern "C" { #define HAS_ARGBSUBTRACTROW_AVX2 #define HAS_ARGBUNATTENUATEROW_AVX2 #define HAS_BLENDPLANEROW_AVX2 + +#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \ + defined(_MSC_VER) +// TODO(fbarchard): fix build error on android_full_debug=1 +// https://code.google.com/p/libyuv/issues/detail?id=517 +#define HAS_I422ALPHATOARGBROW_AVX2 +#endif #endif // The following are available for AVX2 Visual C and clangcl 32 bit: // TODO(fbarchard): Port to gcc. -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \ (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2)) #define HAS_ARGB1555TOARGBROW_AVX2 #define HAS_ARGB4444TOARGBROW_AVX2 @@ -268,6 +262,51 @@ extern "C" { #define HAS_I422TOARGBROW_SSSE3 #endif +// The following are available for gcc/clang x86 platforms: +// TODO(fbarchard): Port to Visual C +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) +#define HAS_ABGRTOAR30ROW_SSSE3 +#define HAS_ARGBTOAR30ROW_SSSE3 +#define HAS_CONVERT16TO8ROW_SSSE3 +#define HAS_CONVERT8TO16ROW_SSE2 +// I210 is for H010. 2 = 422. I for 601 vs H for 709. +#define HAS_I210TOAR30ROW_SSSE3 +#define HAS_I210TOARGBROW_SSSE3 +#define HAS_I422TOAR30ROW_SSSE3 +#define HAS_MERGERGBROW_SSSE3 +#define HAS_SPLITRGBROW_SSSE3 +#endif + +// The following are available for AVX2 gcc/clang x86 platforms: +// TODO(fbarchard): Port to Visual C +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \ + (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) +#define HAS_ABGRTOAR30ROW_AVX2 +#define HAS_ARGBTOAR30ROW_AVX2 +#define HAS_ARGBTORAWROW_AVX2 +#define HAS_ARGBTORGB24ROW_AVX2 +#define HAS_CONVERT16TO8ROW_AVX2 +#define HAS_CONVERT8TO16ROW_AVX2 +#define HAS_I210TOAR30ROW_AVX2 +#define HAS_I210TOARGBROW_AVX2 +#define HAS_I422TOAR30ROW_AVX2 +#define HAS_I422TOUYVYROW_AVX2 +#define HAS_I422TOYUY2ROW_AVX2 +#define HAS_MERGEUVROW_16_AVX2 +#define HAS_MULTIPLYROW_16_AVX2 +#endif + +// The following are available for AVX512 clang x86 platforms: +// TODO(fbarchard): Port to GCC and Visual C +// TODO(fbarchard): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI. Issue libyuv:789 +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \ + (defined(CLANG_HAS_AVX512)) +#define HAS_ARGBTORGB24ROW_AVX512VBMI +#endif + // The following are available on Neon platforms: #if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) @@ -279,6 +318,7 @@ extern "C" { #define HAS_ARGB4444TOARGBROW_NEON #define HAS_ARGB4444TOUVROW_NEON #define HAS_ARGB4444TOYROW_NEON +#define HAS_ARGBEXTRACTALPHAROW_NEON #define HAS_ARGBSETROW_NEON #define HAS_ARGBTOARGB1555ROW_NEON #define HAS_ARGBTOARGB4444ROW_NEON @@ -286,18 +326,17 @@ extern "C" { #define HAS_ARGBTORGB24ROW_NEON #define HAS_ARGBTORGB565DITHERROW_NEON #define HAS_ARGBTORGB565ROW_NEON -#define HAS_ARGBTOUV411ROW_NEON #define HAS_ARGBTOUV444ROW_NEON #define HAS_ARGBTOUVJROW_NEON #define HAS_ARGBTOUVROW_NEON #define HAS_ARGBTOYJROW_NEON #define HAS_ARGBTOYROW_NEON -#define HAS_ARGBEXTRACTALPHAROW_NEON #define HAS_BGRATOUVROW_NEON #define HAS_BGRATOYROW_NEON +#define HAS_BYTETOFLOATROW_NEON #define HAS_COPYROW_NEON +#define HAS_HALFFLOATROW_NEON #define HAS_I400TOARGBROW_NEON -#define HAS_I411TOARGBROW_NEON #define HAS_I422ALPHATOARGBROW_NEON #define HAS_I422TOARGB1555ROW_NEON #define HAS_I422TOARGB4444ROW_NEON @@ -313,8 +352,10 @@ extern "C" { #define HAS_MIRRORROW_NEON #define HAS_MIRRORUVROW_NEON #define HAS_NV12TOARGBROW_NEON +#define HAS_NV12TORGB24ROW_NEON #define HAS_NV12TORGB565ROW_NEON #define HAS_NV21TOARGBROW_NEON +#define HAS_NV21TORGB24ROW_NEON #define HAS_RAWTOARGBROW_NEON #define HAS_RAWTORGB24ROW_NEON #define HAS_RAWTOUVROW_NEON @@ -328,6 +369,7 @@ extern "C" { #define HAS_RGBATOUVROW_NEON #define HAS_RGBATOYROW_NEON #define HAS_SETROW_NEON +#define HAS_SPLITRGBROW_NEON #define HAS_SPLITUVROW_NEON #define HAS_UYVYTOARGBROW_NEON #define HAS_UYVYTOUV422ROW_NEON @@ -359,17 +401,87 @@ extern "C" { #define HAS_SOBELYROW_NEON #endif -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \ - (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6) -#define HAS_COPYROW_MIPS -#if defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_I422TOARGBROW_DSPR2 -#define HAS_INTERPOLATEROW_DSPR2 -#define HAS_MIRRORROW_DSPR2 -#define HAS_MIRRORUVROW_DSPR2 -#define HAS_SPLITUVROW_DSPR2 +// The following are available on AArch64 platforms: +#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) +#define HAS_SCALESUMSAMPLES_NEON #endif +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#define HAS_ABGRTOUVROW_MSA +#define HAS_ABGRTOYROW_MSA +#define HAS_ARGB1555TOARGBROW_MSA +#define HAS_ARGB1555TOUVROW_MSA +#define HAS_ARGB1555TOYROW_MSA +#define HAS_ARGB4444TOARGBROW_MSA +#define HAS_ARGBADDROW_MSA +#define HAS_ARGBATTENUATEROW_MSA +#define HAS_ARGBBLENDROW_MSA +#define HAS_ARGBCOLORMATRIXROW_MSA +#define HAS_ARGBEXTRACTALPHAROW_MSA +#define HAS_ARGBGRAYROW_MSA +#define HAS_ARGBMIRRORROW_MSA +#define HAS_ARGBMULTIPLYROW_MSA +#define HAS_ARGBQUANTIZEROW_MSA +#define HAS_ARGBSEPIAROW_MSA +#define HAS_ARGBSETROW_MSA +#define HAS_ARGBSHADEROW_MSA +#define HAS_ARGBSHUFFLEROW_MSA +#define HAS_ARGBSUBTRACTROW_MSA +#define HAS_ARGBTOARGB1555ROW_MSA +#define HAS_ARGBTOARGB4444ROW_MSA +#define HAS_ARGBTORAWROW_MSA +#define HAS_ARGBTORGB24ROW_MSA +#define HAS_ARGBTORGB565DITHERROW_MSA +#define HAS_ARGBTORGB565ROW_MSA +#define HAS_ARGBTOUV444ROW_MSA +#define HAS_ARGBTOUVJROW_MSA +#define HAS_ARGBTOUVROW_MSA +#define HAS_ARGBTOYJROW_MSA +#define HAS_ARGBTOYROW_MSA +#define HAS_BGRATOUVROW_MSA +#define HAS_BGRATOYROW_MSA +#define HAS_HALFFLOATROW_MSA +#define HAS_I400TOARGBROW_MSA +#define HAS_I422ALPHATOARGBROW_MSA +#define HAS_I422TOARGBROW_MSA +#define HAS_I422TORGB24ROW_MSA +#define HAS_I422TORGBAROW_MSA +#define HAS_I422TOUYVYROW_MSA +#define HAS_I422TOYUY2ROW_MSA +#define HAS_I444TOARGBROW_MSA +#define HAS_INTERPOLATEROW_MSA +#define HAS_J400TOARGBROW_MSA +#define HAS_MERGEUVROW_MSA +#define HAS_MIRRORROW_MSA +#define HAS_MIRRORUVROW_MSA +#define HAS_NV12TOARGBROW_MSA +#define HAS_NV12TORGB565ROW_MSA +#define HAS_NV21TOARGBROW_MSA +#define HAS_RAWTOARGBROW_MSA +#define HAS_RAWTORGB24ROW_MSA +#define HAS_RAWTOUVROW_MSA +#define HAS_RAWTOYROW_MSA +#define HAS_RGB24TOARGBROW_MSA +#define HAS_RGB24TOUVROW_MSA +#define HAS_RGB24TOYROW_MSA +#define HAS_RGB565TOARGBROW_MSA +#define HAS_RGB565TOUVROW_MSA +#define HAS_RGB565TOYROW_MSA +#define HAS_RGBATOUVROW_MSA +#define HAS_RGBATOYROW_MSA +#define HAS_SETROW_MSA +#define HAS_SOBELROW_MSA +#define HAS_SOBELTOPLANEROW_MSA +#define HAS_SOBELXROW_MSA +#define HAS_SOBELXYROW_MSA +#define HAS_SOBELYROW_MSA +#define HAS_SPLITUVROW_MSA +#define HAS_UYVYTOARGBROW_MSA +#define HAS_UYVYTOUVROW_MSA +#define HAS_UYVYTOYROW_MSA +#define HAS_YUY2TOARGBROW_MSA +#define HAS_YUY2TOUV422ROW_MSA +#define HAS_YUY2TOUVROW_MSA +#define HAS_YUY2TOYROW_MSA #endif #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__) @@ -378,18 +490,18 @@ extern "C" { #else #define SIMD_ALIGNED(var) __declspec(align(16)) var #endif -typedef __declspec(align(16)) int16 vec16[8]; -typedef __declspec(align(16)) int32 vec32[4]; -typedef __declspec(align(16)) int8 vec8[16]; -typedef __declspec(align(16)) uint16 uvec16[8]; -typedef __declspec(align(16)) uint32 uvec32[4]; -typedef __declspec(align(16)) uint8 uvec8[16]; -typedef __declspec(align(32)) int16 lvec16[16]; -typedef __declspec(align(32)) int32 lvec32[8]; -typedef __declspec(align(32)) int8 lvec8[32]; -typedef __declspec(align(32)) uint16 ulvec16[16]; -typedef __declspec(align(32)) uint32 ulvec32[8]; -typedef __declspec(align(32)) uint8 ulvec8[32]; +typedef __declspec(align(16)) int16_t vec16[8]; +typedef __declspec(align(16)) int32_t vec32[4]; +typedef __declspec(align(16)) int8_t vec8[16]; +typedef __declspec(align(16)) uint16_t uvec16[8]; +typedef __declspec(align(16)) uint32_t uvec32[4]; +typedef __declspec(align(16)) uint8_t uvec8[16]; +typedef __declspec(align(32)) int16_t lvec16[16]; +typedef __declspec(align(32)) int32_t lvec32[8]; +typedef __declspec(align(32)) int8_t lvec8[32]; +typedef __declspec(align(32)) uint16_t ulvec16[16]; +typedef __declspec(align(32)) uint32_t ulvec32[8]; +typedef __declspec(align(32)) uint8_t ulvec8[32]; #elif !defined(__pnacl__) && (defined(__GNUC__) || defined(__clang__)) // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const. #if defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2) @@ -397,32 +509,32 @@ typedef __declspec(align(32)) uint8 ulvec8[32]; #else #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) #endif -typedef int16 __attribute__((vector_size(16))) vec16; -typedef int32 __attribute__((vector_size(16))) vec32; -typedef int8 __attribute__((vector_size(16))) vec8; -typedef uint16 __attribute__((vector_size(16))) uvec16; -typedef uint32 __attribute__((vector_size(16))) uvec32; -typedef uint8 __attribute__((vector_size(16))) uvec8; -typedef int16 __attribute__((vector_size(32))) lvec16; -typedef int32 __attribute__((vector_size(32))) lvec32; -typedef int8 __attribute__((vector_size(32))) lvec8; -typedef uint16 __attribute__((vector_size(32))) ulvec16; -typedef uint32 __attribute__((vector_size(32))) ulvec32; -typedef uint8 __attribute__((vector_size(32))) ulvec8; +typedef int16_t __attribute__((vector_size(16))) vec16; +typedef int32_t __attribute__((vector_size(16))) vec32; +typedef int8_t __attribute__((vector_size(16))) vec8; +typedef uint16_t __attribute__((vector_size(16))) uvec16; +typedef uint32_t __attribute__((vector_size(16))) uvec32; +typedef uint8_t __attribute__((vector_size(16))) uvec8; +typedef int16_t __attribute__((vector_size(32))) lvec16; +typedef int32_t __attribute__((vector_size(32))) lvec32; +typedef int8_t __attribute__((vector_size(32))) lvec8; +typedef uint16_t __attribute__((vector_size(32))) ulvec16; +typedef uint32_t __attribute__((vector_size(32))) ulvec32; +typedef uint8_t __attribute__((vector_size(32))) ulvec8; #else #define SIMD_ALIGNED(var) var -typedef int16 vec16[8]; -typedef int32 vec32[4]; -typedef int8 vec8[16]; -typedef uint16 uvec16[8]; -typedef uint32 uvec32[4]; -typedef uint8 uvec8[16]; -typedef int16 lvec16[16]; -typedef int32 lvec32[8]; -typedef int8 lvec8[32]; -typedef uint16 ulvec16[16]; -typedef uint32 ulvec32[8]; -typedef uint8 ulvec8[32]; +typedef int16_t vec16[8]; +typedef int32_t vec32[4]; +typedef int8_t vec8[16]; +typedef uint16_t uvec16[8]; +typedef uint32_t uvec32[4]; +typedef uint8_t uvec8[16]; +typedef int16_t lvec16[16]; +typedef int32_t lvec32[8]; +typedef int8_t lvec8[32]; +typedef uint16_t ulvec16[16]; +typedef uint32_t ulvec32[8]; +typedef uint8_t ulvec8[32]; #endif #if defined(__aarch64__) @@ -446,23 +558,23 @@ struct YuvConstants { #else // This struct is for Intel color conversion. struct YuvConstants { - int8 kUVToB[32]; - int8 kUVToG[32]; - int8 kUVToR[32]; - int16 kUVBiasB[16]; - int16 kUVBiasG[16]; - int16 kUVBiasR[16]; - int16 kYToRgb[16]; + int8_t kUVToB[32]; + int8_t kUVToG[32]; + int8_t kUVToR[32]; + int16_t kUVBiasB[16]; + int16_t kUVBiasG[16]; + int16_t kUVBiasR[16]; + int16_t kYToRgb[16]; }; // Offsets into YuvConstants structure -#define KUVTOB 0 -#define KUVTOG 32 -#define KUVTOR 64 +#define KUVTOB 0 +#define KUVTOG 32 +#define KUVTOR 64 #define KUVBIASB 96 #define KUVBIASG 128 #define KUVBIASR 160 -#define KYTORGB 192 +#define KYTORGB 192 #endif // Conversion matrix for YUV to RGB @@ -475,6 +587,16 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants); // BT.601 extern const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants); // JPeg extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 +#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1))) + +#define align_buffer_64(var, size) \ + uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \ + uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ + +#define free_aligned_buffer_64(var) \ + free(var##_mem); \ + var = 0 + #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #define OMITFP #else @@ -487,1458 +609,2863 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 #else #define LABELALIGN #endif -#if defined(__native_client__) && defined(__x86_64__) -// r14 is used for MEMOP macros. -#define NACL_R14 "r14", -#define BUNDLELOCK ".bundle_lock\n" -#define BUNDLEUNLOCK ".bundle_unlock\n" -#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")" -#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")" -#define MEMLEA(offset, base) #offset "(%q" #base ")" -#define MEMLEA3(offset, index, scale) \ - #offset "(,%q" #index "," #scale ")" -#define MEMLEA4(offset, base, index, scale) \ - #offset "(%q" #base ",%q" #index "," #scale ")" -#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15" -#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15" -#define MEMOPREG(opcode, offset, base, index, scale, reg) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " (%%r15,%%r14),%%" #reg "\n" \ - BUNDLEUNLOCK -#define MEMOPMEM(opcode, reg, offset, base, index, scale) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " %%" #reg ",(%%r15,%%r14)\n" \ - BUNDLEUNLOCK -#define MEMOPARG(opcode, offset, base, index, scale, arg) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " (%%r15,%%r14),%" #arg "\n" \ - BUNDLEUNLOCK -#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " (%%r15,%%r14),%%" #reg1 ",%%" #reg2 "\n" \ - BUNDLEUNLOCK -#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #op " $" #sel ",%%" #reg ",(%%r15,%%r14)\n" \ - BUNDLEUNLOCK -#else // defined(__native_client__) && defined(__x86_64__) -#define NACL_R14 -#define BUNDLEALIGN -#define MEMACCESS(base) "(%" #base ")" -#define MEMACCESS2(offset, base) #offset "(%" #base ")" -#define MEMLEA(offset, base) #offset "(%" #base ")" -#define MEMLEA3(offset, index, scale) \ - #offset "(,%" #index "," #scale ")" -#define MEMLEA4(offset, base, index, scale) \ - #offset "(%" #base ",%" #index "," #scale ")" -#define MEMMOVESTRING(s, d) -#define MEMSTORESTRING(reg, d) -#define MEMOPREG(opcode, offset, base, index, scale, reg) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n" -#define MEMOPMEM(opcode, reg, offset, base, index, scale) \ - #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n" -#define MEMOPARG(opcode, offset, base, index, scale, arg) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n" -#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg1 ",%%" \ - #reg2 "\n" -#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \ - #op " $" #sel ",%%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n" -#endif // defined(__native_client__) && defined(__x86_64__) -#if defined(__arm__) || defined(__aarch64__) -#undef MEMACCESS -#if defined(__native_client__) -#define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n" -#else -#define MEMACCESS(base) -#endif +// Intel Code Analizer markers. Insert IACA_START IACA_END around code to be +// measured and then run with iaca -64 libyuv_unittest. +// IACA_ASM_START amd IACA_ASM_END are equivalents that can be used within +// inline assembly blocks. +// example of iaca: +// ~/iaca-lin64/bin/iaca.sh -64 -analysis LATENCY out/Release/libyuv_unittest + +#if defined(__x86_64__) || defined(__i386__) + +#define IACA_ASM_START \ + ".byte 0x0F, 0x0B\n" \ + " movl $111, %%ebx\n" \ + ".byte 0x64, 0x67, 0x90\n" + +#define IACA_ASM_END \ + " movl $222, %%ebx\n" \ + ".byte 0x64, 0x67, 0x90\n" \ + ".byte 0x0F, 0x0B\n" + +#define IACA_SSC_MARK(MARK_ID) \ + __asm__ __volatile__("\n\t movl $" #MARK_ID \ + ", %%ebx" \ + "\n\t .byte 0x64, 0x67, 0x90" \ + : \ + : \ + : "memory"); + +#define IACA_UD_BYTES __asm__ __volatile__("\n\t .byte 0x0F, 0x0B"); + +#else /* Visual C */ +#define IACA_UD_BYTES \ + { __asm _emit 0x0F __asm _emit 0x0B } + +#define IACA_SSC_MARK(x) \ + { __asm mov ebx, x __asm _emit 0x64 __asm _emit 0x67 __asm _emit 0x90 } + +#define IACA_VC64_START __writegsbyte(111, 111); +#define IACA_VC64_END __writegsbyte(222, 222); #endif -void I444ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +#define IACA_START \ + { \ + IACA_UD_BYTES \ + IACA_SSC_MARK(111) \ + } +#define IACA_END \ + { \ + IACA_SSC_MARK(222) \ + IACA_UD_BYTES \ + } + +void I444ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I422AlphaToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGBARow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - const struct YuvConstants* yuvconstants, - int width); -void I422ToRGB24Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, +void NV12ToRGB565Row_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV21ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_NEON(const uint8* src_yuy2, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void UYVYToARGBRow_NEON(const uint8* src_uyvy, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); - -void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width); -void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int width); -void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, +void NV12ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, int width); -void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, +void NV21ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, int width); -void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int width); -void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int width); -void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width); -void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width); -void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width); -void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width); -void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width); -void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width); -void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width); -void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int width); -void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width); -void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width); -void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width); -void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width); -void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int width); -void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width); -void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int width); -void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width); -void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, - int width); -void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, - int width); - -void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_AVX2(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_Any_AVX2(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width); -void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width); -void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int width); -void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int width); -void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width); -void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555, - int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width); -void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444, - int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int width); -void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int width); -void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width); -void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width); -void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width); - -void ARGBToUV444Row_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); - -void ARGBToUV444Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV411Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); - -void MirrorRow_AVX2(const uint8* src, uint8* dst, int width); -void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); -void MirrorRow_NEON(const uint8* src, uint8* dst, int width); -void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width); -void MirrorRow_C(const uint8* src, uint8* dst, int width); -void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width); -void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width); -void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width); -void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width); - -void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v, +void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_NEON(const uint8_t* src_uyvy, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I444ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, int width); -void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + +void I422ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGBARow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB24Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB565Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB4444Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB1555Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_MSA(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_MSA(const uint8_t* src_uyvy, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, int width); -void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); -void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_Any_NEON(const uint8* src, uint8* dst, int width); - -void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); -void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, +void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width); +void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width); +void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width); +void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void RGB24ToYRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RAWToYRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width); +void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToUV444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_NEON(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void SplitUVRow_Any_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, +void ARGBToUV444Row_MSA(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_MSA(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_NEON(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_NEON(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_NEON(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_NEON(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_NEON(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_NEON(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_u, + uint8_t* dst_v, int width); +void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_MSA(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width); +void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width); +void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width); +void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width); +void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, + uint8_t* dst_y, + int width); +void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, + uint8_t* dst_y, + int width); +void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width); +void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width); +void ARGBToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void BGRAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ABGRToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RGBAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RGB24ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RAWToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width); +void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width); +void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width); +void ARGBToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYRow_Any_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RAWToYRow_Any_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width); +void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB565ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGB1555ToYRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB4444ToYRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void BGRAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB565ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); -void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, +void ARGBToUVRow_AVX2(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_AVX2(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_SSSE3(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0, + int src_stride_bgra, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0, + int src_stride_rgba, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUV444Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUV444Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB1555ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB4444ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB1555ToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_C(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, +void RGB565ToUVRow_C(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, +void ARGB1555ToUVRow_C(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB4444ToUVRow_C(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_u, + uint8_t* dst_v, + int width); + +void ARGBToUV444Row_SSSE3(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUV444Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); + +void ARGBToUV444Row_C(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); + +void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); + +void MirrorUVRow_SSSE3(const uint8_t* src, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void MirrorUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void MirrorUVRow_MSA(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, +void MirrorUVRow_C(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); + +void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBMirrorRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); + +void SplitUVRow_C(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_SSE2(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, +void SplitUVRow_AVX2(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_MSA(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, +void SplitUVRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, +void SplitUVRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, int width); +void SplitUVRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); -void CopyRow_SSE2(const uint8* src, uint8* dst, int count); -void CopyRow_AVX(const uint8* src, uint8* dst, int count); -void CopyRow_ERMS(const uint8* src, uint8* dst, int count); -void CopyRow_NEON(const uint8* src, uint8* dst, int count); -void CopyRow_MIPS(const uint8* src, uint8* dst, int count); -void CopyRow_C(const uint8* src, uint8* dst, int count); -void CopyRow_Any_SSE2(const uint8* src, uint8* dst, int count); -void CopyRow_Any_AVX(const uint8* src, uint8* dst, int count); -void CopyRow_Any_NEON(const uint8* src, uint8* dst, int count); +void MergeUVRow_C(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width); +void MergeUVRow_SSE2(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width); +void MergeUVRow_AVX2(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width); +void MergeUVRow_NEON(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width); +void MergeUVRow_MSA(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width); +void MergeUVRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void MergeUVRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void MergeUVRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void MergeUVRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); -void CopyRow_16_C(const uint16* src, uint16* dst, int count); +void SplitRGBRow_C(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitRGBRow_SSSE3(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitRGBRow_NEON(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitRGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitRGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); -void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBCopyAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, +void MergeRGBRow_C(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, + int width); +void MergeRGBRow_SSSE3(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, + int width); +void MergeRGBRow_NEON(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, + int width); +void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void MergeRGBRow_Any_NEON(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, + int width); + +void MergeUVRow_16_C(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, + int scale, /* 64 for 10 bit */ + int width); +void MergeUVRow_16_AVX2(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, + int scale, + int width); + +void MultiplyRow_16_AVX2(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void MultiplyRow_16_C(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width); + +void Convert8To16Row_C(const uint8_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void Convert8To16Row_SSE2(const uint8_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void Convert8To16Row_AVX2(const uint8_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void Convert8To16Row_Any_SSE2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int scale, + int width); +void Convert8To16Row_Any_AVX2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int scale, + int width); + +void Convert16To8Row_C(const uint16_t* src_y, + uint8_t* dst_y, + int scale, + int width); +void Convert16To8Row_SSSE3(const uint16_t* src_y, + uint8_t* dst_y, + int scale, + int width); +void Convert16To8Row_AVX2(const uint16_t* src_y, + uint8_t* dst_y, + int scale, + int width); +void Convert16To8Row_Any_SSSE3(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int scale, int width); -void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, +void Convert16To8Row_Any_AVX2(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int scale, + int width); + +void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width); +void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width); +void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width); +void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count); +void CopyRow_C(const uint8_t* src, uint8_t* dst, int count); +void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void CopyRow_Any_AVX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void CopyRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); + +void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count); + +void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBCopyAlphaRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width); -void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width); -void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width); -void ARGBExtractAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_a, +void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width); +void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_a, + int width); +void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_a, + int width); +void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, + uint8_t* dst_a, + int width); +void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, + uint8_t* dst_a, + int width); +void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBExtractAlphaRow_Any_NEON(const uint8* src_argb, uint8* dst_a, +void ARGBExtractAlphaRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBExtractAlphaRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBExtractAlphaRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBCopyYToAlphaRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width); -void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void ARGBCopyYToAlphaRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, - int width); -void ARGBCopyYToAlphaRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, - int width); +void SetRow_C(uint8_t* dst, uint8_t v8, int width); +void SetRow_MSA(uint8_t* dst, uint8_t v8, int width); +void SetRow_X86(uint8_t* dst, uint8_t v8, int width); +void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width); +void SetRow_NEON(uint8_t* dst, uint8_t v8, int width); +void SetRow_Any_X86(uint8_t* dst_ptr, uint8_t v32, int width); +void SetRow_Any_NEON(uint8_t* dst_ptr, uint8_t v32, int width); -void SetRow_C(uint8* dst, uint8 v8, int count); -void SetRow_X86(uint8* dst, uint8 v8, int count); -void SetRow_ERMS(uint8* dst, uint8 v8, int count); -void SetRow_NEON(uint8* dst, uint8 v8, int count); -void SetRow_Any_X86(uint8* dst, uint8 v8, int count); -void SetRow_Any_NEON(uint8* dst, uint8 v8, int count); - -void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int count); -void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count); -void ARGBSetRow_NEON(uint8* dst_argb, uint32 v32, int count); -void ARGBSetRow_Any_NEON(uint8* dst_argb, uint32 v32, int count); +void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width); +void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width); +void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width); +void ARGBSetRow_Any_NEON(uint8_t* dst_ptr, uint32_t v32, int width); +void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width); +void ARGBSetRow_Any_MSA(uint8_t* dst_ptr, uint32_t v32, int width); // ARGBShufflers for BGRAToARGB etc. -void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); - -void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width); -void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width); -void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int width); -void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, - int width); -void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, int width); -void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb, - int width); - -void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width); -void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width); -void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width); -void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, - int width); -void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width); -void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width); -void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width); -void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, +void ARGBShuffleRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width); +void ARGBShuffleRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width); +void ARGBShuffleRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width); +void ARGBShuffleRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width); +void ARGBShuffleRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width); +void ARGBShuffleRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, int width); -void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_Any_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width); - -void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb, - int width); -void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb, - int width); -void RGB565ToARGBRow_Any_AVX2(const uint8* src_rgb565, uint8* dst_argb, - int width); -void ARGB1555ToARGBRow_Any_AVX2(const uint8* src_argb1555, uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_Any_AVX2(const uint8* src_argb4444, uint8* dst_argb, - int width); - -void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, +void ARGBShuffleRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, int width); -void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_Any_NEON(const uint8* src_raw, uint8* dst_rgb24, int width); -void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb, +void ARGBShuffleRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, + int width); +void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, + int width); + +void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width); +void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width); +void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width); +void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width); + +void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width); +void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); +void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width); +void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width); +void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width); +void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width); +void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width); +void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width); +void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); +void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RGB565ToARGBRow_C(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); +void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width); +void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width); +void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width); +void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width); +void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width); +void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width); + +void RGB24ToARGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb, +void RAWToARGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void RGB565ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB1555ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb, +void ARGB4444ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RGB565ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB1555ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB4444ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); +void RGB24ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RGB24ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RAWToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToRGB24Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RAWToRGB24Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB565ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RGB565ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB1555ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB1555ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); -void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width); -void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width); -void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width); +void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); -void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); +void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); +void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBToARGB1555Row_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); +void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); -void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width); +void ARGBToRAWRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBToRGB24Row_AVX2(const uint8_t* src, uint8_t* dst, int width); -void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width); +void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width); -void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width); +void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, + int width); +void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, + uint8_t* dst, + const uint32_t dither4, + int width); +void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, + uint8_t* dst, + const uint32_t dither4, + int width); -void I444ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); +void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); +void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width); + +void ARGBToRGB24Row_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb24, + int width); +void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width); +void ARGBToRGB565Row_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb565, + int width); +void ARGBToARGB1555Row_NEON(const uint8_t* src_argb, + uint8_t* dst_argb1555, + int width); +void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_argb4444, + int width); +void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, + int width); +void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); +void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); +void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, + int width); + +void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width); +void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width); + +void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void J400ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void J400ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void J400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); + +void I444ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToAR30Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I210ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I210ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void NV12ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToRGB565Row_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB565Row_C(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV21ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_C(const uint8* src_yuy2, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void UYVYToARGBRow_C(const uint8* src_uyvy, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToRGBARow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - const struct YuvConstants* yuvconstants, - int width); -void I422ToRGB24Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void NV12ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void NV21ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void YUY2ToARGBRow_C(const uint8_t* src_yuy2, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_C(const uint8_t* src_uyvy, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGBARow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB4444Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB1555Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGBARow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I444ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, + +void I422ToAR30Row_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I210ToAR30Row_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I210ToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToAR30Row_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I210ToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I210ToAR30Row_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I422AlphaToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void NV12ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I411ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void NV12ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB24Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToRGB24Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToRGB24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_AVX2(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV21ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* vu_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV21ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* vu_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_SSSE3(const uint8_t* yuy2_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_AVX2(const uint8_t* yuy2_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_AVX2(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGBARow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB4444Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB4444Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB1555Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB1555Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB565Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB565Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGBARow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I422ToAR30Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210ToAR30Row_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210ToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToAR30Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210ToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210ToAR30Row_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I422AlphaToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void NV12ToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void NV12ToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV21ToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV21ToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV12ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB24Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_Any_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB565Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGBARow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToARGB4444Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToARGB4444Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToARGB1555Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToARGB1555Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGB565Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGB565Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB24Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width); +void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width); +void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width); +void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width); +void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); +void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); +void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void I400ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); // ARGB preattenuated alpha blend. -void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); +void ARGBBlendRow_SSSE3(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBBlendRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBBlendRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBBlendRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); // Unattenuated planar alpha blend. -void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width); -void BlendPlaneRow_Any_SSSE3(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width); -void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width); -void BlendPlaneRow_Any_AVX2(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width); -void BlendPlaneRow_C(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width); +void BlendPlaneRow_SSSE3(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width); +void BlendPlaneRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void BlendPlaneRow_AVX2(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width); +void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void BlendPlaneRow_C(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width); // ARGB multiply images. Same API as Blend, but these require // pointer and width alignment for SSE2. -void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); +void ARGBMultiplyRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBMultiplyRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBMultiplyRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBMultiplyRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBMultiplyRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBMultiplyRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); // ARGB add images. -void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); +void ARGBAddRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBAddRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBAddRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBAddRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBAddRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBAddRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBAddRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBAddRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBAddRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); // ARGB subtract images. Same API as Blend, but these require // pointer and width alignment for SSE2. -void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); +void ARGBSubtractRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBSubtractRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBSubtractRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBSubtractRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBSubtractRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBSubtractRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBSubtractRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBSubtractRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); -void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, +void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRAWRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRGB565Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, +void ARGBToARGB4444Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); +void ABGRToAR30Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRAWRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToRGB24Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRGB24Row_Any_AVX512VBMI(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, + int width); +void ARGBToRGB565DitherRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, + int width); -void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width); -void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width); +void ARGBToRGB565Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB4444Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ABGRToAR30Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToAR30Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); -void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, +void ARGBToRGB24Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRAWRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToRGB565Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, +void ARGBToARGB4444Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); +void ARGBToRGB565DitherRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, + int width); +void ARGBToRGB24Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRAWRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToRGB565Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB4444Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, + int width); -void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, - int width); -void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, - int width); -void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width); - -void I444ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - const uint8* src_a, - uint8* dst_argb, +void I422AlphaToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGBARow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToRGB24Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB24Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB4444Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB1555Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB565Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV21ToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB24Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToRGB24Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, +void I444ToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGBARow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void YUY2ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); + +void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void I422ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, +void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width); +void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_MSA(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_C(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_C(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToUVRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToUVRow_Any_SSE2(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, + int stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_SSE2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_SSE2(const uint8_t* src_uyvy, + int stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, + int stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_NEON(const uint8_t* src_uyvy, + int stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_MSA(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_MSA(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); -void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToUV422Row_NEON(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToUV422Row_C(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_NEON(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); +void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_C(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_C(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToUVRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToUVRow_Any_SSE2(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); -void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_C(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); - -void I422ToYUY2Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_Any_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_Any_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); +void I422ToYUY2Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_frame, + int width); +void I422ToUYVYRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_frame, + int width); +void I422ToYUY2Row_SSE2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width); +void I422ToUYVYRow_SSE2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width); +void I422ToYUY2Row_Any_SSE2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToUYVYRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToYUY2Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width); +void I422ToUYVYRow_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width); +void I422ToYUY2Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToUYVYRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToYUY2Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width); +void I422ToUYVYRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width); +void I422ToYUY2Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToUYVYRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToYUY2Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width); +void I422ToUYVYRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width); +void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); // Effects related row functions. -void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, - int width); -void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, +void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBAttenuateRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBAttenuateRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, +void ARGBAttenuateRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, +void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); +void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); // Inverse table for unattenuate, shared by C and SSE2. -extern const uint32 fixed_invtbl8[256]; -void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, +extern const uint32_t fixed_invtbl8[256]; +void ARGBUnattenuateRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBUnattenuateRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, +void ARGBUnattenuateRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width); +void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width); -void ARGBSepiaRow_C(uint8* dst_argb, int width); -void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width); -void ARGBSepiaRow_NEON(uint8* dst_argb, int width); +void ARGBSepiaRow_C(uint8_t* dst_argb, int width); +void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width); +void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width); +void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width); -void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width); -void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width); -void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width); +void ARGBColorMatrixRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width); +void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width); +void ARGBColorMatrixRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width); +void ARGBColorMatrixRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width); -void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width); -void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width); +void ARGBColorTableRow_C(uint8_t* dst_argb, + const uint8_t* table_argb, + int width); +void ARGBColorTableRow_X86(uint8_t* dst_argb, + const uint8_t* table_argb, + int width); -void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width); -void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width); +void RGBColorTableRow_C(uint8_t* dst_argb, + const uint8_t* table_argb, + int width); +void RGBColorTableRow_X86(uint8_t* dst_argb, + const uint8_t* table_argb, + int width); -void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width); -void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width); -void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width); +void ARGBQuantizeRow_C(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width); +void ARGBQuantizeRow_SSE2(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width); +void ARGBQuantizeRow_NEON(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width); +void ARGBQuantizeRow_MSA(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width); -void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value); -void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value); -void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value); +void ARGBShadeRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value); +void ARGBShadeRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value); +void ARGBShadeRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value); +void ARGBShadeRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value); // Used for blur. -void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count); -void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width); +void CumulativeSumToAverageRow_SSE2(const int32_t* topleft, + const int32_t* botleft, + int width, + int area, + uint8_t* dst, + int count); +void ComputeCumulativeSumRow_SSE2(const uint8_t* row, + int32_t* cumsum, + const int32_t* previous_cumsum, + int width); -void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count); -void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width); +void CumulativeSumToAverageRow_C(const int32_t* tl, + const int32_t* bl, + int w, + int area, + uint8_t* dst, + int count); +void ComputeCumulativeSumRow_C(const uint8_t* row, + int32_t* cumsum, + const int32_t* previous_cumsum, + int width); LIBYUV_API -void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width); +void ARGBAffineRow_C(const uint8_t* src_argb, + int src_argb_stride, + uint8_t* dst_argb, + const float* uv_dudv, + int width); LIBYUV_API -void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width); +void ARGBAffineRow_SSE2(const uint8_t* src_argb, + int src_argb_stride, + uint8_t* dst_argb, + const float* src_dudv, + int width); // Used for I420Scale, ARGBScale, and ARGBInterpolate. -void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, - int width, int source_y_fraction); -void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, +void InterpolateRow_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int width, + int source_y_fraction); +void InterpolateRow_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, int source_y_fraction); -void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, +void InterpolateRow_AVX2(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, int source_y_fraction); -void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, +void InterpolateRow_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, int source_y_fraction); -void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, +void InterpolateRow_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int width, + int source_y_fraction); +void InterpolateRow_Any_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride_ptr, + int width, int source_y_fraction); -void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, +void InterpolateRow_Any_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride_ptr, + int width, int source_y_fraction); -void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, +void InterpolateRow_Any_AVX2(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride_ptr, + int width, int source_y_fraction); -void InterpolateRow_Any_DSPR2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); +void InterpolateRow_Any_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride_ptr, + int width, + int source_y_fraction); -void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr, - ptrdiff_t src_stride_ptr, - int width, int source_y_fraction); +void InterpolateRow_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int width, + int source_y_fraction); // Sobel images. -void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, - uint8* dst_sobelx, int width); -void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width); -void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width); -void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width); -void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width); -void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width); -void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelToPlaneRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelToPlaneRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelXYRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelXYRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); - -void ARGBPolynomialRow_C(const uint8* src_argb, - uint8* dst_argb, const float* poly, +void SobelXRow_C(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int width); +void SobelXRow_SSE2(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int width); +void SobelXRow_NEON(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int width); +void SobelXRow_MSA(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int width); +void SobelYRow_C(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int width); +void SobelYRow_SSE2(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int width); +void SobelYRow_NEON(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int width); +void SobelYRow_MSA(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int width); +void SobelRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelToPlaneRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, + int width); +void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, + int width); +void SobelToPlaneRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, + int width); +void SobelToPlaneRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, int width); -void ARGBPolynomialRow_SSE2(const uint8* src_argb, - uint8* dst_argb, const float* poly, +void SobelXYRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelXYRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelXYRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelXYRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelToPlaneRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelToPlaneRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelToPlaneRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelXYRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelXYRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelXYRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); + +void ARGBPolynomialRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + const float* poly, + int width); +void ARGBPolynomialRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, + const float* poly, int width); -void ARGBPolynomialRow_AVX2(const uint8* src_argb, - uint8* dst_argb, const float* poly, +void ARGBPolynomialRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + const float* poly, int width); -void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width, - const uint8* luma, uint32 lumacoeff); -void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, +// Scale and convert to half float. +void HalfFloatRow_C(const uint16_t* src, uint16_t* dst, float scale, int width); +void HalfFloatRow_SSE2(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_SSE2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, + int width); +void HalfFloatRow_AVX2(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_AVX2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, + int width); +void HalfFloatRow_F16C(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_F16C(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloat1Row_F16C(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloat1Row_Any_F16C(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_NEON(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, + int width); +void HalfFloat1Row_NEON(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloat1Row_Any_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, + int width); +void HalfFloatRow_MSA(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_MSA(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, + int width); +void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width); +void ByteToFloatRow_NEON(const uint8_t* src, + float* dst, + float scale, + int width); +void ByteToFloatRow_Any_NEON(const uint8_t* src_ptr, + float* dst_ptr, + float param, + int width); + +void ARGBLumaColorTableRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + const uint8_t* luma, + uint32_t lumacoeff); +void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - const uint8* luma, uint32 lumacoeff); + const uint8_t* luma, + uint32_t lumacoeff); + +float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width); +float ScaleMaxSamples_NEON(const float* src, + float* dst, + float scale, + int width); +float ScaleSumSamples_C(const float* src, float* dst, float scale, int width); +float ScaleSumSamples_NEON(const float* src, + float* dst, + float scale, + int width); +void ScaleSamples_C(const float* src, float* dst, float scale, int width); +void ScaleSamples_NEON(const float* src, float* dst, float scale, int width); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_ROW_H_ NOLINT +#endif // INCLUDE_LIBYUV_ROW_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale.h index 102158d1ab28..b937d348cabc 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_SCALE_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_SCALE_H_ #define INCLUDE_LIBYUV_SCALE_H_ #include "libyuv/basic_types.h" @@ -20,25 +20,33 @@ extern "C" { // Supported filtering. typedef enum FilterMode { - kFilterNone = 0, // Point sample; Fastest. - kFilterLinear = 1, // Filter horizontally only. + kFilterNone = 0, // Point sample; Fastest. + kFilterLinear = 1, // Filter horizontally only. kFilterBilinear = 2, // Faster than box, but lower quality scaling down. - kFilterBox = 3 // Highest quality. + kFilterBox = 3 // Highest quality. } FilterModeEnum; // Scale a YUV plane. LIBYUV_API -void ScalePlane(const uint8* src, int src_stride, - int src_width, int src_height, - uint8* dst, int dst_stride, - int dst_width, int dst_height, +void ScalePlane(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, enum FilterMode filtering); LIBYUV_API -void ScalePlane_16(const uint16* src, int src_stride, - int src_width, int src_height, - uint16* dst, int dst_stride, - int dst_width, int dst_height, +void ScalePlane_16(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, enum FilterMode filtering); // Scales a YUV 4:2:0 image from the src width and height to the @@ -52,44 +60,64 @@ void ScalePlane_16(const uint16* src, int src_stride, // Returns 0 if successful. LIBYUV_API -int I420Scale(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - int src_width, int src_height, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int dst_width, int dst_height, +int I420Scale(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, enum FilterMode filtering); LIBYUV_API -int I420Scale_16(const uint16* src_y, int src_stride_y, - const uint16* src_u, int src_stride_u, - const uint16* src_v, int src_stride_v, - int src_width, int src_height, - uint16* dst_y, int dst_stride_y, - uint16* dst_u, int dst_stride_u, - uint16* dst_v, int dst_stride_v, - int dst_width, int dst_height, +int I420Scale_16(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, enum FilterMode filtering); #ifdef __cplusplus // Legacy API. Deprecated. LIBYUV_API -int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v, - int src_stride_y, int src_stride_u, int src_stride_v, - int src_width, int src_height, - uint8* dst_y, uint8* dst_u, uint8* dst_v, - int dst_stride_y, int dst_stride_u, int dst_stride_v, - int dst_width, int dst_height, +int Scale(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + int src_stride_y, + int src_stride_u, + int src_stride_v, + int src_width, + int src_height, + uint8_t* dst_y, + uint8_t* dst_u, + uint8_t* dst_v, + int dst_stride_y, + int dst_stride_u, + int dst_stride_v, + int dst_width, + int dst_height, LIBYUV_BOOL interpolate); -// Legacy API. Deprecated. -LIBYUV_API -int ScaleOffset(const uint8* src_i420, int src_width, int src_height, - uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset, - LIBYUV_BOOL interpolate); - // For testing, allow disabling of specialized scalers. LIBYUV_API void SetUseReferenceImpl(LIBYUV_BOOL use); @@ -100,4 +128,4 @@ void SetUseReferenceImpl(LIBYUV_BOOL use); } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_SCALE_H_ NOLINT +#endif // INCLUDE_LIBYUV_SCALE_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_argb.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_argb.h index b56cf5209937..7641f18e3416 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_argb.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_argb.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_ #define INCLUDE_LIBYUV_SCALE_ARGB_H_ #include "libyuv/basic_types.h" @@ -20,32 +20,52 @@ extern "C" { #endif LIBYUV_API -int ARGBScale(const uint8* src_argb, int src_stride_argb, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, +int ARGBScale(const uint8_t* src_argb, + int src_stride_argb, + int src_width, + int src_height, + uint8_t* dst_argb, + int dst_stride_argb, + int dst_width, + int dst_height, enum FilterMode filtering); // Clipped scale takes destination rectangle coordinates for clip values. LIBYUV_API -int ARGBScaleClip(const uint8* src_argb, int src_stride_argb, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, - int clip_x, int clip_y, int clip_width, int clip_height, +int ARGBScaleClip(const uint8_t* src_argb, + int src_stride_argb, + int src_width, + int src_height, + uint8_t* dst_argb, + int dst_stride_argb, + int dst_width, + int dst_height, + int clip_x, + int clip_y, + int clip_width, + int clip_height, enum FilterMode filtering); // Scale with YUV conversion to ARGB and clipping. LIBYUV_API -int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint32 src_fourcc, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - uint32 dst_fourcc, - int dst_width, int dst_height, - int clip_x, int clip_y, int clip_width, int clip_height, +int YUVToARGBScaleClip(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint32_t src_fourcc, + int src_width, + int src_height, + uint8_t* dst_argb, + int dst_stride_argb, + uint32_t dst_fourcc, + int dst_width, + int dst_height, + int clip_x, + int clip_y, + int clip_width, + int clip_height, enum FilterMode filtering); #ifdef __cplusplus @@ -53,4 +73,4 @@ int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y, } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_SCALE_ARGB_H_ NOLINT +#endif // INCLUDE_LIBYUV_SCALE_ARGB_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_row.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_row.h index df699e6c2282..7194ba09f842 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_row.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/scale_row.h @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_ #define INCLUDE_LIBYUV_SCALE_ROW_H_ #include "libyuv/basic_types.h" @@ -19,17 +19,20 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif - // GCC >= 4.7.0 required for AVX2. #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) @@ -45,8 +48,8 @@ extern "C" { #endif // __clang__ // Visual C 2012 required for AVX2. -#if defined(_M_IX86) && !defined(__clang__) && \ - defined(_MSC_VER) && _MSC_VER >= 1700 +#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \ + _MSC_VER >= 1700 #define VISUALC_HAS_AVX2 1 #endif // VisualStudio >= 2012 @@ -72,15 +75,16 @@ extern "C" { // The following are available on all x86 platforms, but // require VS2012, clang 3.4 or gcc 4.7. // The code supports NaCL but requires a new compiler and validator. -#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \ - defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \ + defined(GCC_HAS_AVX2)) #define HAS_SCALEADDROW_AVX2 #define HAS_SCALEROWDOWN2_AVX2 #define HAS_SCALEROWDOWN4_AVX2 #endif // The following are available on Neon platforms: -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ +#if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_SCALEARGBCOLS_NEON #define HAS_SCALEARGBROWDOWN2_NEON @@ -93,33 +97,51 @@ extern "C" { #define HAS_SCALEARGBFILTERCOLS_NEON #endif -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \ - defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_SCALEROWDOWN2_DSPR2 -#define HAS_SCALEROWDOWN4_DSPR2 -#define HAS_SCALEROWDOWN34_DSPR2 -#define HAS_SCALEROWDOWN38_DSPR2 +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#define HAS_SCALEADDROW_MSA +#define HAS_SCALEARGBCOLS_MSA +#define HAS_SCALEARGBFILTERCOLS_MSA +#define HAS_SCALEARGBROWDOWN2_MSA +#define HAS_SCALEARGBROWDOWNEVEN_MSA +#define HAS_SCALEFILTERCOLS_MSA +#define HAS_SCALEROWDOWN2_MSA +#define HAS_SCALEROWDOWN34_MSA +#define HAS_SCALEROWDOWN38_MSA +#define HAS_SCALEROWDOWN4_MSA #endif // Scale ARGB vertically with bilinear interpolation. void ScalePlaneVertical(int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int y, int dy, - int bpp, enum FilterMode filtering); + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int y, + int dy, + int bpp, + enum FilterMode filtering); void ScalePlaneVertical_16(int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint16* src_argb, uint16* dst_argb, - int x, int y, int dy, - int wpp, enum FilterMode filtering); + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_argb, + uint16_t* dst_argb, + int x, + int y, + int dy, + int wpp, + enum FilterMode filtering); // Simplify the filtering based on scale factors. -enum FilterMode ScaleFilterReduce(int src_width, int src_height, - int dst_width, int dst_height, +enum FilterMode ScaleFilterReduce(int src_width, + int src_height, + int dst_width, + int dst_height, enum FilterMode filtering); // Divide num by div and return as 16.16 fixed point result. @@ -137,367 +159,786 @@ int FixedDiv1_X86(int num, int div); #endif // Compute slope values for stepping. -void ScaleSlope(int src_width, int src_height, - int dst_width, int dst_height, +void ScaleSlope(int src_width, + int src_height, + int dst_width, + int dst_height, enum FilterMode filtering, - int* x, int* y, int* dx, int* dy); + int* x, + int* y, + int* dx, + int* dy); -void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width); -void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width); -void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width); -void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width); -void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width); -void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width); -void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* d, int dst_width); -void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* d, int dst_width); -void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int x, int dx); -void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int, int); -void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int, int); -void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int x, int dx); -void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int x, int dx); -void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width); -void ScaleRowDown38_3_Box_C(const uint8* src_ptr, +void ScaleRowDown2_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +void ScaleRowDown2Linear_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +void ScaleRowDown2Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr, - ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width); -void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width); -void ScaleARGBRowDown2_C(const uint8* src_argb, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +void ScaleRowDown4_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown4_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +void ScaleRowDown4Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown4Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +void ScaleRowDown34_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown34_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Linear_C(const uint8* src_argb, + uint16_t* dst, + int dst_width); +void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* d, + int dst_width); +void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride, + uint16_t* d, + int dst_width); +void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* d, + int dst_width); +void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* d, + int dst_width); +void ScaleCols_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleCols_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleColsUp2_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int, + int); +void ScaleColsUp2_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + int dst_width, + int, + int); +void ScaleFilterCols_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleFilterCols_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleFilterCols64_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x32, + int dx); +void ScaleFilterCols64_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + int dst_width, + int x32, + int dx); +void ScaleRowDown38_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown38_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + int dst_width); +void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleAddRow_16_C(const uint16_t* src_ptr, + uint32_t* dst_ptr, + int src_width); +void ScaleARGBRowDown2_C(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Box_C(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEven_C(const uint8_t* src_argb, + ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_C(const uint8* src_argb, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int, int); -void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); + uint8_t* dst_argb, + int dst_width); +void ScaleARGBCols_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBCols64_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x32, + int dx); +void ScaleARGBColsUp2_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int, + int); +void ScaleARGBFilterCols_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBFilterCols64_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x32, + int dx); // Specialized scalers for x86. -void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown2_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); -void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown34_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_Odd_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_Odd_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Linear_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Odd_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2_Any_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Linear_Any_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Any_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Odd_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4_Any_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_Any_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); -void ScaleRowDown34_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_Any_SSSE3(const uint8* src_ptr, +void ScaleRowDown34_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_1_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_Any_SSSE3(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_0_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_Any_SSSE3(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_3_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_Any_SSSE3(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_2_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); + uint8_t* dst_ptr, + int dst_width); -void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_Any_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_Any_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width); - -void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); +void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleAddRow_Any_SSE2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width); +void ScaleAddRow_Any_AVX2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width); +void ScaleFilterCols_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleColsUp2_SSE2(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); // ARGB Column functions -void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBCols_Any_NEON(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); +void ScaleARGBCols_SSE2(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBFilterCols_NEON(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBCols_NEON(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBFilterCols_Any_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleARGBCols_Any_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleARGBFilterCols_MSA(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBCols_MSA(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBFilterCols_Any_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleARGBCols_Any_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); // ARGB Row functions -void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb, +void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleARGBRowDown2_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Linear_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2_Any_SSE2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2Linear_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleARGBRowDown2Linear_Any_NEON(const uint8* src_argb, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2Box_Any_SSE2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2Linear_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2Linear_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); -void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, +void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr, + ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8* src_argb, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb, ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDownEven_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_Any_NEON(const uint8* src_argb, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDownEvenBox_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDownEven_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDownEvenBox_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_ptr, + int dst_width); // ScaleRowDown2Box also used by planar functions // NEON downscalers with interpolation. // Note - not static due to reuse in convert for 444 to 420. -void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); +void ScaleRowDown2_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Box_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); -void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown4_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); // Down scale from 4 to 3 pixels. Use the neon multilane read/write // to load up the every 4th pixel into a 4 different registers. // Point samples 32 pixels to 24 pixels. -void ScaleRowDown34_NEON(const uint8* src_ptr, +void ScaleRowDown34_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); + uint8_t* dst_ptr, + int dst_width); // 32 -> 12 -void ScaleRowDown38_NEON(const uint8* src_ptr, +void ScaleRowDown38_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); + uint8_t* dst_ptr, + int dst_width); // 32x3 -> 12x1 -void ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, +void ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); + uint8_t* dst_ptr, + int dst_width); // 32x2 -> 12x1 -void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, +void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); + uint8_t* dst_ptr, + int dst_width); -void ScaleRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_Odd_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown2_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Linear_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Odd_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_0_Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_1_Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); // 32 -> 12 -void ScaleRowDown38_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown38_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); // 32x3 -> 12x1 -void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown38_3_Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); // 32x2 -> 12x1 -void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown38_2_Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); -void ScaleAddRow_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_Any_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width); +void ScaleAddRow_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleAddRow_Any_NEON(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width); -void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); +void ScaleFilterCols_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); -void ScaleFilterCols_Any_NEON(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); +void ScaleFilterCols_Any_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); -void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown2_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Linear_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown4_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown4Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown38_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown38_2_Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleFilterCols_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleRowDown34_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* d, + int dst_width); +void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* d, + int dst_width); + +void ScaleRowDown2_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Linear_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_2_Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_3_Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleAddRow_Any_MSA(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width); +void ScaleFilterCols_Any_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleRowDown34_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_0_Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_1_Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_SCALE_ROW_H_ NOLINT +#endif // INCLUDE_LIBYUV_SCALE_ROW_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/version.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/version.h index 0fbdc022d579..7022785d8cad 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/version.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/version.h @@ -8,9 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1616 +#define LIBYUV_VERSION 1711 -#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT +#endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/video_common.h b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/video_common.h index ad934e424190..bcef378b5a4e 100644 --- a/media/libvpx/libvpx/third_party/libyuv/include/libyuv/video_common.h +++ b/media/libvpx/libvpx/third_party/libyuv/include/libyuv/video_common.h @@ -10,7 +10,7 @@ // Common definitions for video, including fourcc and VideoFormat. -#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_ // NOLINT +#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_ #define INCLUDE_LIBYUV_VIDEO_COMMON_H_ #include "libyuv/basic_types.h" @@ -28,13 +28,13 @@ extern "C" { // Needs to be a macro otherwise the OS X compiler complains when the kFormat* // constants are used in a switch. #ifdef __cplusplus -#define FOURCC(a, b, c, d) ( \ - (static_cast(a)) | (static_cast(b) << 8) | \ - (static_cast(c) << 16) | (static_cast(d) << 24)) +#define FOURCC(a, b, c, d) \ + ((static_cast(a)) | (static_cast(b) << 8) | \ + (static_cast(c) << 16) | (static_cast(d) << 24)) #else -#define FOURCC(a, b, c, d) ( \ - ((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \ - ((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */ +#define FOURCC(a, b, c, d) \ + (((uint32_t)(a)) | ((uint32_t)(b) << 8) | /* NOLINT */ \ + ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) /* NOLINT */ #endif // Some pages discussing FourCC codes: @@ -53,38 +53,33 @@ enum FourCC { FOURCC_I420 = FOURCC('I', '4', '2', '0'), FOURCC_I422 = FOURCC('I', '4', '2', '2'), FOURCC_I444 = FOURCC('I', '4', '4', '4'), - FOURCC_I411 = FOURCC('I', '4', '1', '1'), FOURCC_I400 = FOURCC('I', '4', '0', '0'), FOURCC_NV21 = FOURCC('N', 'V', '2', '1'), FOURCC_NV12 = FOURCC('N', 'V', '1', '2'), FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'), FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'), + FOURCC_H010 = FOURCC('H', '0', '1', '0'), // unofficial fourcc. 10 bit lsb - // 2 Secondary YUV formats: row biplanar. + // 1 Secondary YUV format: row biplanar. FOURCC_M420 = FOURCC('M', '4', '2', '0'), - FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated. - // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp. + // 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'), FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'), FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'), + FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010. + FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit FOURCC_24BG = FOURCC('2', '4', 'B', 'G'), - FOURCC_RAW = FOURCC('r', 'a', 'w', ' '), + FOURCC_RAW = FOURCC('r', 'a', 'w', ' '), FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'), FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE. FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE. FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE. - // 4 Secondary RGB formats: 4 Bayer Patterns. deprecated. - FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'), - FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'), - FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'), - FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'), - // 1 Primary Compressed YUV format. FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'), - // 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. + // 7 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'), FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'), FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'), @@ -112,7 +107,13 @@ enum FourCC { FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP. FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO. - // 1 Auxiliary compressed YUV format set aside for capturer. + // deprecated formats. Not supported, but defined for backward compatibility. + FOURCC_I411 = FOURCC('I', '4', '1', '1'), + FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), + FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'), + FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'), + FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'), + FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'), FOURCC_H264 = FOURCC('H', '2', '6', '4'), // Match any fourcc. @@ -136,8 +137,10 @@ enum FourCCBpp { FOURCC_BPP_BGRA = 32, FOURCC_BPP_ABGR = 32, FOURCC_BPP_RGBA = 32, + FOURCC_BPP_AR30 = 32, + FOURCC_BPP_AB30 = 32, FOURCC_BPP_24BG = 24, - FOURCC_BPP_RAW = 24, + FOURCC_BPP_RAW = 24, FOURCC_BPP_RGBP = 16, FOURCC_BPP_RGBO = 16, FOURCC_BPP_R444 = 16, @@ -152,6 +155,7 @@ enum FourCCBpp { FOURCC_BPP_J420 = 12, FOURCC_BPP_J400 = 8, FOURCC_BPP_H420 = 12, + FOURCC_BPP_H010 = 24, FOURCC_BPP_MJPG = 0, // 0 means unknown. FOURCC_BPP_H264 = 0, FOURCC_BPP_IYUV = 12, @@ -170,15 +174,15 @@ enum FourCCBpp { FOURCC_BPP_CM24 = 24, // Match any fourcc. - FOURCC_BPP_ANY = 0, // 0 means unknown. + FOURCC_BPP_ANY = 0, // 0 means unknown. }; // Converts fourcc aliases into canonical ones. -LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc); +LIBYUV_API uint32_t CanonicalFourCC(uint32_t fourcc); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_ NOLINT +#endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_ diff --git a/media/libvpx/libvpx/third_party/libyuv/source/compare.cc b/media/libvpx/libvpx/third_party/libyuv/source/compare.cc index e3846bdfdd11..50e3abd0556e 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/compare.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/compare.cc @@ -29,10 +29,10 @@ extern "C" { // hash seed of 5381 recommended. LIBYUV_API -uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { +uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) { const int kBlockSize = 1 << 15; // 32768; int remainder; - uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = + uint32_t (*HashDjb2_SSE)(const uint8_t* src, int count, uint32_t seed) = HashDjb2_C; #if defined(HAS_HASHDJB2_SSE41) if (TestCpuFlag(kCpuHasSSE41)) { @@ -45,25 +45,25 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { } #endif - while (count >= (uint64)(kBlockSize)) { + while (count >= (uint64_t)(kBlockSize)) { seed = HashDjb2_SSE(src, kBlockSize, seed); src += kBlockSize; count -= kBlockSize; } - remainder = (int)(count) & ~15; + remainder = (int)count & ~15; if (remainder) { seed = HashDjb2_SSE(src, remainder, seed); src += remainder; count -= remainder; } - remainder = (int)(count) & 15; + remainder = (int)count & 15; if (remainder) { seed = HashDjb2_C(src, remainder, seed); } return seed; } -static uint32 ARGBDetectRow_C(const uint8* argb, int width) { +static uint32_t ARGBDetectRow_C(const uint8_t* argb, int width) { int x; for (x = 0; x < width - 1; x += 2) { if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB. @@ -94,8 +94,11 @@ static uint32 ARGBDetectRow_C(const uint8* argb, int width) { // Scan an opaque argb image and return fourcc based on alpha offset. // Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown. LIBYUV_API -uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) { - uint32 fourcc = 0; +uint32_t ARGBDetect(const uint8_t* argb, + int stride_argb, + int width, + int height) { + uint32_t fourcc = 0; int h; // Coalesce rows. @@ -111,19 +114,80 @@ uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) { return fourcc; } +// NEON version accumulates in 16 bit shorts which overflow at 65536 bytes. +// So actual maximum is 1 less loop, which is 64436 - 32 bytes. + +LIBYUV_API +uint64_t ComputeHammingDistance(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + const int kBlockSize = 1 << 15; // 32768; + const int kSimdSize = 64; + // SIMD for multiple of 64, and C for remainder + int remainder = count & (kBlockSize - 1) & ~(kSimdSize - 1); + uint64_t diff = 0; + int i; + uint32_t (*HammingDistance)(const uint8_t* src_a, const uint8_t* src_b, + int count) = HammingDistance_C; +#if defined(HAS_HAMMINGDISTANCE_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + HammingDistance = HammingDistance_NEON; + } +#endif +#if defined(HAS_HAMMINGDISTANCE_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + HammingDistance = HammingDistance_SSSE3; + } +#endif +#if defined(HAS_HAMMINGDISTANCE_SSE42) + if (TestCpuFlag(kCpuHasSSE42)) { + HammingDistance = HammingDistance_SSE42; + } +#endif +#if defined(HAS_HAMMINGDISTANCE_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + HammingDistance = HammingDistance_AVX2; + } +#endif +#if defined(HAS_HAMMINGDISTANCE_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + HammingDistance = HammingDistance_MSA; + } +#endif +#ifdef _OPENMP +#pragma omp parallel for reduction(+ : diff) +#endif + for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { + diff += HammingDistance(src_a + i, src_b + i, kBlockSize); + } + src_a += count & ~(kBlockSize - 1); + src_b += count & ~(kBlockSize - 1); + if (remainder) { + diff += HammingDistance(src_a, src_b, remainder); + src_a += remainder; + src_b += remainder; + } + remainder = count & (kSimdSize - 1); + if (remainder) { + diff += HammingDistance_C(src_a, src_b, remainder); + } + return diff; +} + // TODO(fbarchard): Refactor into row function. LIBYUV_API -uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, - int count) { +uint64_t ComputeSumSquareError(const uint8_t* src_a, + const uint8_t* src_b, + int count) { // SumSquareError returns values 0 to 65535 for each squared difference. - // Up to 65536 of those can be summed and remain within a uint32. - // After each block of 65536 pixels, accumulate into a uint64. + // Up to 65536 of those can be summed and remain within a uint32_t. + // After each block of 65536 pixels, accumulate into a uint64_t. const int kBlockSize = 65536; int remainder = count & (kBlockSize - 1) & ~31; - uint64 sse = 0; + uint64_t sse = 0; int i; - uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = - SumSquareError_C; + uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b, + int count) = SumSquareError_C; #if defined(HAS_SUMSQUAREERROR_NEON) if (TestCpuFlag(kCpuHasNEON)) { SumSquareError = SumSquareError_NEON; @@ -141,8 +205,13 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, SumSquareError = SumSquareError_AVX2; } #endif +#if defined(HAS_SUMSQUAREERROR_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + SumSquareError = SumSquareError_MSA; + } +#endif #ifdef _OPENMP -#pragma omp parallel for reduction(+: sse) +#pragma omp parallel for reduction(+ : sse) #endif for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { sse += SumSquareError(src_a + i, src_b + i, kBlockSize); @@ -162,14 +231,16 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, } LIBYUV_API -uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height) { - uint64 sse = 0; +uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a, + int stride_a, + const uint8_t* src_b, + int stride_b, + int width, + int height) { + uint64_t sse = 0; int h; // Coalesce rows. - if (stride_a == width && - stride_b == width) { + if (stride_a == width && stride_b == width) { width *= height; height = 1; stride_a = stride_b = 0; @@ -183,66 +254,76 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, } LIBYUV_API -double SumSquareErrorToPsnr(uint64 sse, uint64 count) { +double SumSquareErrorToPsnr(uint64_t sse, uint64_t count) { double psnr; if (sse > 0) { - double mse = (double)(count) / (double)(sse); + double mse = (double)count / (double)sse; psnr = 10.0 * log10(255.0 * 255.0 * mse); } else { - psnr = kMaxPsnr; // Limit to prevent divide by 0 + psnr = kMaxPsnr; // Limit to prevent divide by 0 } - if (psnr > kMaxPsnr) + if (psnr > kMaxPsnr) { psnr = kMaxPsnr; + } return psnr; } LIBYUV_API -double CalcFramePsnr(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height) { - const uint64 samples = width * height; - const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a, - src_b, stride_b, - width, height); +double CalcFramePsnr(const uint8_t* src_a, + int stride_a, + const uint8_t* src_b, + int stride_b, + int width, + int height) { + const uint64_t samples = (uint64_t)width * (uint64_t)height; + const uint64_t sse = ComputeSumSquareErrorPlane(src_a, stride_a, src_b, + stride_b, width, height); return SumSquareErrorToPsnr(sse, samples); } LIBYUV_API -double I420Psnr(const uint8* src_y_a, int stride_y_a, - const uint8* src_u_a, int stride_u_a, - const uint8* src_v_a, int stride_v_a, - const uint8* src_y_b, int stride_y_b, - const uint8* src_u_b, int stride_u_b, - const uint8* src_v_b, int stride_v_b, - int width, int height) { - const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a, - src_y_b, stride_y_b, - width, height); +double I420Psnr(const uint8_t* src_y_a, + int stride_y_a, + const uint8_t* src_u_a, + int stride_u_a, + const uint8_t* src_v_a, + int stride_v_a, + const uint8_t* src_y_b, + int stride_y_b, + const uint8_t* src_u_b, + int stride_u_b, + const uint8_t* src_v_b, + int stride_v_b, + int width, + int height) { + const uint64_t sse_y = ComputeSumSquareErrorPlane( + src_y_a, stride_y_a, src_y_b, stride_y_b, width, height); const int width_uv = (width + 1) >> 1; const int height_uv = (height + 1) >> 1; - const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a, - src_u_b, stride_u_b, - width_uv, height_uv); - const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a, - src_v_b, stride_v_b, - width_uv, height_uv); - const uint64 samples = width * height + 2 * (width_uv * height_uv); - const uint64 sse = sse_y + sse_u + sse_v; + const uint64_t sse_u = ComputeSumSquareErrorPlane( + src_u_a, stride_u_a, src_u_b, stride_u_b, width_uv, height_uv); + const uint64_t sse_v = ComputeSumSquareErrorPlane( + src_v_a, stride_v_a, src_v_b, stride_v_b, width_uv, height_uv); + const uint64_t samples = (uint64_t)width * (uint64_t)height + + 2 * ((uint64_t)width_uv * (uint64_t)height_uv); + const uint64_t sse = sse_y + sse_u + sse_v; return SumSquareErrorToPsnr(sse, samples); } -static const int64 cc1 = 26634; // (64^2*(.01*255)^2 -static const int64 cc2 = 239708; // (64^2*(.03*255)^2 +static const int64_t cc1 = 26634; // (64^2*(.01*255)^2 +static const int64_t cc2 = 239708; // (64^2*(.03*255)^2 -static double Ssim8x8_C(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b) { - int64 sum_a = 0; - int64 sum_b = 0; - int64 sum_sq_a = 0; - int64 sum_sq_b = 0; - int64 sum_axb = 0; +static double Ssim8x8_C(const uint8_t* src_a, + int stride_a, + const uint8_t* src_b, + int stride_b) { + int64_t sum_a = 0; + int64_t sum_b = 0; + int64_t sum_sq_a = 0; + int64_t sum_sq_b = 0; + int64_t sum_axb = 0; int i; for (i = 0; i < 8; ++i) { @@ -260,22 +341,22 @@ static double Ssim8x8_C(const uint8* src_a, int stride_a, } { - const int64 count = 64; + const int64_t count = 64; // scale the constants by number of pixels - const int64 c1 = (cc1 * count * count) >> 12; - const int64 c2 = (cc2 * count * count) >> 12; + const int64_t c1 = (cc1 * count * count) >> 12; + const int64_t c2 = (cc2 * count * count) >> 12; - const int64 sum_a_x_sum_b = sum_a * sum_b; + const int64_t sum_a_x_sum_b = sum_a * sum_b; - const int64 ssim_n = (2 * sum_a_x_sum_b + c1) * - (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2); + const int64_t ssim_n = (2 * sum_a_x_sum_b + c1) * + (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2); - const int64 sum_a_sq = sum_a*sum_a; - const int64 sum_b_sq = sum_b*sum_b; + const int64_t sum_a_sq = sum_a * sum_a; + const int64_t sum_b_sq = sum_b * sum_b; - const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) * - (count * sum_sq_a - sum_a_sq + - count * sum_sq_b - sum_b_sq + c2); + const int64_t ssim_d = + (sum_a_sq + sum_b_sq + c1) * + (count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2); if (ssim_d == 0.0) { return DBL_MAX; @@ -288,13 +369,16 @@ static double Ssim8x8_C(const uint8* src_a, int stride_a, // on the 4x4 pixel grid. Such arrangement allows the windows to overlap // block boundaries to penalize blocking artifacts. LIBYUV_API -double CalcFrameSsim(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height) { +double CalcFrameSsim(const uint8_t* src_a, + int stride_a, + const uint8_t* src_b, + int stride_b, + int width, + int height) { int samples = 0; double ssim_total = 0; - double (*Ssim8x8)(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b) = Ssim8x8_C; + double (*Ssim8x8)(const uint8_t* src_a, int stride_a, const uint8_t* src_b, + int stride_b) = Ssim8x8_C; // sample point start with each 4x4 location int i; @@ -314,22 +398,27 @@ double CalcFrameSsim(const uint8* src_a, int stride_a, } LIBYUV_API -double I420Ssim(const uint8* src_y_a, int stride_y_a, - const uint8* src_u_a, int stride_u_a, - const uint8* src_v_a, int stride_v_a, - const uint8* src_y_b, int stride_y_b, - const uint8* src_u_b, int stride_u_b, - const uint8* src_v_b, int stride_v_b, - int width, int height) { - const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a, - src_y_b, stride_y_b, width, height); +double I420Ssim(const uint8_t* src_y_a, + int stride_y_a, + const uint8_t* src_u_a, + int stride_u_a, + const uint8_t* src_v_a, + int stride_v_a, + const uint8_t* src_y_b, + int stride_y_b, + const uint8_t* src_u_b, + int stride_u_b, + const uint8_t* src_v_b, + int stride_v_b, + int width, + int height) { + const double ssim_y = + CalcFrameSsim(src_y_a, stride_y_a, src_y_b, stride_y_b, width, height); const int width_uv = (width + 1) >> 1; const int height_uv = (height + 1) >> 1; - const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a, - src_u_b, stride_u_b, + const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a, src_u_b, stride_u_b, width_uv, height_uv); - const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a, - src_v_b, stride_v_b, + const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a, src_v_b, stride_v_b, width_uv, height_uv); return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v); } diff --git a/media/libvpx/libvpx/third_party/libyuv/source/compare_common.cc b/media/libvpx/libvpx/third_party/libyuv/source/compare_common.cc index 42fc58935432..d4b170ad9862 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/compare_common.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/compare_common.cc @@ -17,20 +17,80 @@ namespace libyuv { extern "C" { #endif -uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) { - uint32 sse = 0u; +#if ORIGINAL_OPT +uint32_t HammingDistance_C1(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff = 0u; + + int i; + for (i = 0; i < count; ++i) { + int x = src_a[i] ^ src_b[i]; + if (x & 1) + ++diff; + if (x & 2) + ++diff; + if (x & 4) + ++diff; + if (x & 8) + ++diff; + if (x & 16) + ++diff; + if (x & 32) + ++diff; + if (x & 64) + ++diff; + if (x & 128) + ++diff; + } + return diff; +} +#endif + +// Hakmem method for hamming distance. +uint32_t HammingDistance_C(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff = 0u; + + int i; + for (i = 0; i < count - 3; i += 4) { + uint32_t x = *((const uint32_t*)src_a) ^ *((const uint32_t*)src_b); + uint32_t u = x - ((x >> 1) & 0x55555555); + u = ((u >> 2) & 0x33333333) + (u & 0x33333333); + diff += ((((u + (u >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24); + src_a += 4; + src_b += 4; + } + + for (; i < count; ++i) { + uint32_t x = *src_a ^ *src_b; + uint32_t u = x - ((x >> 1) & 0x55); + u = ((u >> 2) & 0x33) + (u & 0x33); + diff += (u + (u >> 4)) & 0x0f; + src_a += 1; + src_b += 1; + } + + return diff; +} + +uint32_t SumSquareError_C(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t sse = 0u; int i; for (i = 0; i < count; ++i) { int diff = src_a[i] - src_b[i]; - sse += (uint32)(diff * diff); + sse += (uint32_t)(diff * diff); } return sse; } // hash seed of 5381 recommended. // Internal C version of HashDjb2 with int sized count for efficiency. -uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) { - uint32 hash = seed; +uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed) { + uint32_t hash = seed; int i; for (i = 0; i < count; ++i) { hash += (hash << 5) + src[i]; diff --git a/media/libvpx/libvpx/third_party/libyuv/source/compare_gcc.cc b/media/libvpx/libvpx/third_party/libyuv/source/compare_gcc.cc index 1b83edb166a1..676527c1b1b3 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/compare_gcc.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/compare_gcc.cc @@ -22,124 +22,334 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) -uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { - uint32 sse; - asm volatile ( - "pxor %%xmm0,%%xmm0 \n" - "pxor %%xmm5,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "lea " MEMLEA(0x10, 0) ",%0 \n" - "movdqu " MEMACCESS(1) ",%%xmm2 \n" - "lea " MEMLEA(0x10, 1) ",%1 \n" - "movdqa %%xmm1,%%xmm3 \n" - "psubusb %%xmm2,%%xmm1 \n" - "psubusb %%xmm3,%%xmm2 \n" - "por %%xmm2,%%xmm1 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "punpckhbw %%xmm5,%%xmm2 \n" - "pmaddwd %%xmm1,%%xmm1 \n" - "pmaddwd %%xmm2,%%xmm2 \n" - "paddd %%xmm1,%%xmm0 \n" - "paddd %%xmm2,%%xmm0 \n" - "sub $0x10,%2 \n" - "jg 1b \n" +#if defined(__x86_64__) +uint32_t HammingDistance_SSE42(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint64_t diff = 0u; - "pshufd $0xee,%%xmm0,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "pshufd $0x1,%%xmm0,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "movd %%xmm0,%3 \n" + asm volatile( + "xor %3,%3 \n" + "xor %%r8,%%r8 \n" + "xor %%r9,%%r9 \n" + "xor %%r10,%%r10 \n" - : "+r"(src_a), // %0 - "+r"(src_b), // %1 - "+r"(count), // %2 - "=g"(sse) // %3 - :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + // Process 32 bytes per loop. + LABELALIGN + "1: \n" + "mov (%0),%%rcx \n" + "mov 0x8(%0),%%rdx \n" + "xor (%1),%%rcx \n" + "xor 0x8(%1),%%rdx \n" + "popcnt %%rcx,%%rcx \n" + "popcnt %%rdx,%%rdx \n" + "mov 0x10(%0),%%rsi \n" + "mov 0x18(%0),%%rdi \n" + "xor 0x10(%1),%%rsi \n" + "xor 0x18(%1),%%rdi \n" + "popcnt %%rsi,%%rsi \n" + "popcnt %%rdi,%%rdi \n" + "add $0x20,%0 \n" + "add $0x20,%1 \n" + "add %%rcx,%3 \n" + "add %%rdx,%%r8 \n" + "add %%rsi,%%r9 \n" + "add %%rdi,%%r10 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + + "add %%r8, %3 \n" + "add %%r9, %3 \n" + "add %%r10, %3 \n" + : "+r"(src_a), // %0 + "+r"(src_b), // %1 + "+r"(count), // %2 + "=r"(diff) // %3 + : + : "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10"); + + return static_cast(diff); +} +#else +uint32_t HammingDistance_SSE42(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff = 0u; + + asm volatile( + // Process 16 bytes per loop. + LABELALIGN + "1: \n" + "mov (%0),%%ecx \n" + "mov 0x4(%0),%%edx \n" + "xor (%1),%%ecx \n" + "xor 0x4(%1),%%edx \n" + "popcnt %%ecx,%%ecx \n" + "add %%ecx,%3 \n" + "popcnt %%edx,%%edx \n" + "add %%edx,%3 \n" + "mov 0x8(%0),%%ecx \n" + "mov 0xc(%0),%%edx \n" + "xor 0x8(%1),%%ecx \n" + "xor 0xc(%1),%%edx \n" + "popcnt %%ecx,%%ecx \n" + "add %%ecx,%3 \n" + "popcnt %%edx,%%edx \n" + "add %%edx,%3 \n" + "add $0x10,%0 \n" + "add $0x10,%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_a), // %0 + "+r"(src_b), // %1 + "+r"(count), // %2 + "+r"(diff) // %3 + : + : "memory", "cc", "ecx", "edx"); + + return diff; +} +#endif + +static const vec8 kNibbleMask = {15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15}; +static const vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; + +uint32_t HammingDistance_SSSE3(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff = 0u; + + asm volatile( + "movdqa %4,%%xmm2 \n" + "movdqa %5,%%xmm3 \n" + "pxor %%xmm0,%%xmm0 \n" + "pxor %%xmm1,%%xmm1 \n" + "sub %0,%1 \n" + + LABELALIGN + "1: \n" + "movdqa (%0),%%xmm4 \n" + "movdqa 0x10(%0), %%xmm5 \n" + "pxor (%0,%1), %%xmm4 \n" + "movdqa %%xmm4,%%xmm6 \n" + "pand %%xmm2,%%xmm6 \n" + "psrlw $0x4,%%xmm4 \n" + "movdqa %%xmm3,%%xmm7 \n" + "pshufb %%xmm6,%%xmm7 \n" + "pand %%xmm2,%%xmm4 \n" + "movdqa %%xmm3,%%xmm6 \n" + "pshufb %%xmm4,%%xmm6 \n" + "paddb %%xmm7,%%xmm6 \n" + "pxor 0x10(%0,%1),%%xmm5 \n" + "add $0x20,%0 \n" + "movdqa %%xmm5,%%xmm4 \n" + "pand %%xmm2,%%xmm5 \n" + "psrlw $0x4,%%xmm4 \n" + "movdqa %%xmm3,%%xmm7 \n" + "pshufb %%xmm5,%%xmm7 \n" + "pand %%xmm2,%%xmm4 \n" + "movdqa %%xmm3,%%xmm5 \n" + "pshufb %%xmm4,%%xmm5 \n" + "paddb %%xmm7,%%xmm5 \n" + "paddb %%xmm5,%%xmm6 \n" + "psadbw %%xmm1,%%xmm6 \n" + "paddd %%xmm6,%%xmm0 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + + "pshufd $0xaa,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "movd %%xmm0, %3 \n" + : "+r"(src_a), // %0 + "+r"(src_b), // %1 + "+r"(count), // %2 + "=r"(diff) // %3 + : "m"(kNibbleMask), // %4 + "m"(kBitCount) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); + + return diff; +} + +#ifdef HAS_HAMMINGDISTANCE_AVX2 +uint32_t HammingDistance_AVX2(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff = 0u; + + asm volatile( + "vbroadcastf128 %4,%%ymm2 \n" + "vbroadcastf128 %5,%%ymm3 \n" + "vpxor %%ymm0,%%ymm0,%%ymm0 \n" + "vpxor %%ymm1,%%ymm1,%%ymm1 \n" + "sub %0,%1 \n" + + LABELALIGN + "1: \n" + "vmovdqa (%0),%%ymm4 \n" + "vmovdqa 0x20(%0), %%ymm5 \n" + "vpxor (%0,%1), %%ymm4, %%ymm4 \n" + "vpand %%ymm2,%%ymm4,%%ymm6 \n" + "vpsrlw $0x4,%%ymm4,%%ymm4 \n" + "vpshufb %%ymm6,%%ymm3,%%ymm6 \n" + "vpand %%ymm2,%%ymm4,%%ymm4 \n" + "vpshufb %%ymm4,%%ymm3,%%ymm4 \n" + "vpaddb %%ymm4,%%ymm6,%%ymm6 \n" + "vpxor 0x20(%0,%1),%%ymm5,%%ymm4 \n" + "add $0x40,%0 \n" + "vpand %%ymm2,%%ymm4,%%ymm5 \n" + "vpsrlw $0x4,%%ymm4,%%ymm4 \n" + "vpshufb %%ymm5,%%ymm3,%%ymm5 \n" + "vpand %%ymm2,%%ymm4,%%ymm4 \n" + "vpshufb %%ymm4,%%ymm3,%%ymm4 \n" + "vpaddb %%ymm5,%%ymm4,%%ymm4 \n" + "vpaddb %%ymm6,%%ymm4,%%ymm4 \n" + "vpsadbw %%ymm1,%%ymm4,%%ymm4 \n" + "vpaddd %%ymm0,%%ymm4,%%ymm0 \n" + "sub $0x40,%2 \n" + "jg 1b \n" + + "vpermq $0xb1,%%ymm0,%%ymm1 \n" + "vpaddd %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xaa,%%ymm0,%%ymm1 \n" + "vpaddd %%ymm1,%%ymm0,%%ymm0 \n" + "vmovd %%xmm0, %3 \n" + "vzeroupper \n" + : "+r"(src_a), // %0 + "+r"(src_b), // %1 + "+r"(count), // %2 + "=r"(diff) // %3 + : "m"(kNibbleMask), // %4 + "m"(kBitCount) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); + + return diff; +} +#endif // HAS_HAMMINGDISTANCE_AVX2 + +uint32_t SumSquareError_SSE2(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t sse; + asm volatile( + "pxor %%xmm0,%%xmm0 \n" + "pxor %%xmm5,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "movdqu (%1),%%xmm2 \n" + "lea 0x10(%1),%1 \n" + "movdqa %%xmm1,%%xmm3 \n" + "psubusb %%xmm2,%%xmm1 \n" + "psubusb %%xmm3,%%xmm2 \n" + "por %%xmm2,%%xmm1 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "punpckhbw %%xmm5,%%xmm2 \n" + "pmaddwd %%xmm1,%%xmm1 \n" + "pmaddwd %%xmm2,%%xmm2 \n" + "paddd %%xmm1,%%xmm0 \n" + "paddd %%xmm2,%%xmm0 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + + "pshufd $0xee,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "pshufd $0x1,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "movd %%xmm0,%3 \n" + + : "+r"(src_a), // %0 + "+r"(src_b), // %1 + "+r"(count), // %2 + "=g"(sse) // %3 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); return sse; } -static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 -static uvec32 kHashMul0 = { - 0x0c3525e1, // 33 ^ 15 - 0xa3476dc1, // 33 ^ 14 - 0x3b4039a1, // 33 ^ 13 - 0x4f5f0981, // 33 ^ 12 +static const uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16 +static const uvec32 kHashMul0 = { + 0x0c3525e1, // 33 ^ 15 + 0xa3476dc1, // 33 ^ 14 + 0x3b4039a1, // 33 ^ 13 + 0x4f5f0981, // 33 ^ 12 }; -static uvec32 kHashMul1 = { - 0x30f35d61, // 33 ^ 11 - 0x855cb541, // 33 ^ 10 - 0x040a9121, // 33 ^ 9 - 0x747c7101, // 33 ^ 8 +static const uvec32 kHashMul1 = { + 0x30f35d61, // 33 ^ 11 + 0x855cb541, // 33 ^ 10 + 0x040a9121, // 33 ^ 9 + 0x747c7101, // 33 ^ 8 }; -static uvec32 kHashMul2 = { - 0xec41d4e1, // 33 ^ 7 - 0x4cfa3cc1, // 33 ^ 6 - 0x025528a1, // 33 ^ 5 - 0x00121881, // 33 ^ 4 +static const uvec32 kHashMul2 = { + 0xec41d4e1, // 33 ^ 7 + 0x4cfa3cc1, // 33 ^ 6 + 0x025528a1, // 33 ^ 5 + 0x00121881, // 33 ^ 4 }; -static uvec32 kHashMul3 = { - 0x00008c61, // 33 ^ 3 - 0x00000441, // 33 ^ 2 - 0x00000021, // 33 ^ 1 - 0x00000001, // 33 ^ 0 +static const uvec32 kHashMul3 = { + 0x00008c61, // 33 ^ 3 + 0x00000441, // 33 ^ 2 + 0x00000021, // 33 ^ 1 + 0x00000001, // 33 ^ 0 }; -uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { - uint32 hash; - asm volatile ( - "movd %2,%%xmm0 \n" - "pxor %%xmm7,%%xmm7 \n" - "movdqa %4,%%xmm6 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "lea " MEMLEA(0x10, 0) ",%0 \n" - "pmulld %%xmm6,%%xmm0 \n" - "movdqa %5,%%xmm5 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm7,%%xmm2 \n" - "movdqa %%xmm2,%%xmm3 \n" - "punpcklwd %%xmm7,%%xmm3 \n" - "pmulld %%xmm5,%%xmm3 \n" - "movdqa %6,%%xmm5 \n" - "movdqa %%xmm2,%%xmm4 \n" - "punpckhwd %%xmm7,%%xmm4 \n" - "pmulld %%xmm5,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "punpckhbw %%xmm7,%%xmm1 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklwd %%xmm7,%%xmm2 \n" - "pmulld %%xmm5,%%xmm2 \n" - "movdqa %8,%%xmm5 \n" - "punpckhwd %%xmm7,%%xmm1 \n" - "pmulld %%xmm5,%%xmm1 \n" - "paddd %%xmm4,%%xmm3 \n" - "paddd %%xmm2,%%xmm1 \n" - "paddd %%xmm3,%%xmm1 \n" - "pshufd $0xe,%%xmm1,%%xmm2 \n" - "paddd %%xmm2,%%xmm1 \n" - "pshufd $0x1,%%xmm1,%%xmm2 \n" - "paddd %%xmm2,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "sub $0x10,%1 \n" - "jg 1b \n" - "movd %%xmm0,%3 \n" - : "+r"(src), // %0 - "+r"(count), // %1 - "+rm"(seed), // %2 - "=g"(hash) // %3 - : "m"(kHash16x33), // %4 - "m"(kHashMul0), // %5 - "m"(kHashMul1), // %6 - "m"(kHashMul2), // %7 - "m"(kHashMul3) // %8 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); +uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) { + uint32_t hash; + asm volatile( + "movd %2,%%xmm0 \n" + "pxor %%xmm7,%%xmm7 \n" + "movdqa %4,%%xmm6 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "pmulld %%xmm6,%%xmm0 \n" + "movdqa %5,%%xmm5 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm7,%%xmm2 \n" + "movdqa %%xmm2,%%xmm3 \n" + "punpcklwd %%xmm7,%%xmm3 \n" + "pmulld %%xmm5,%%xmm3 \n" + "movdqa %6,%%xmm5 \n" + "movdqa %%xmm2,%%xmm4 \n" + "punpckhwd %%xmm7,%%xmm4 \n" + "pmulld %%xmm5,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "punpckhbw %%xmm7,%%xmm1 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklwd %%xmm7,%%xmm2 \n" + "pmulld %%xmm5,%%xmm2 \n" + "movdqa %8,%%xmm5 \n" + "punpckhwd %%xmm7,%%xmm1 \n" + "pmulld %%xmm5,%%xmm1 \n" + "paddd %%xmm4,%%xmm3 \n" + "paddd %%xmm2,%%xmm1 \n" + "paddd %%xmm3,%%xmm1 \n" + "pshufd $0xe,%%xmm1,%%xmm2 \n" + "paddd %%xmm2,%%xmm1 \n" + "pshufd $0x1,%%xmm1,%%xmm2 \n" + "paddd %%xmm2,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "sub $0x10,%1 \n" + "jg 1b \n" + "movd %%xmm0,%3 \n" + : "+r"(src), // %0 + "+r"(count), // %1 + "+rm"(seed), // %2 + "=g"(hash) // %3 + : "m"(kHash16x33), // %4 + "m"(kHashMul0), // %5 + "m"(kHashMul1), // %6 + "m"(kHashMul2), // %7 + "m"(kHashMul3) // %8 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); return hash; } #endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) @@ -148,4 +358,3 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { } // extern "C" } // namespace libyuv #endif - diff --git a/media/libvpx/libvpx/third_party/libyuv/source/compare_msa.cc b/media/libvpx/libvpx/third_party/libyuv/source/compare_msa.cc new file mode 100644 index 000000000000..0b807d37bee3 --- /dev/null +++ b/media/libvpx/libvpx/third_party/libyuv/source/compare_msa.cc @@ -0,0 +1,97 @@ +/* + * Copyright 2017 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "libyuv/basic_types.h" + +#include "libyuv/compare_row.h" +#include "libyuv/row.h" + +// This module is for GCC MSA +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#include "libyuv/macros_msa.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +uint32_t HammingDistance_MSA(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff = 0u; + int i; + v16u8 src0, src1, src2, src3; + v2i64 vec0 = {0}, vec1 = {0}; + + for (i = 0; i < count; i += 32) { + src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0); + src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16); + src0 ^= src2; + src1 ^= src3; + vec0 += __msa_pcnt_d((v2i64)src0); + vec1 += __msa_pcnt_d((v2i64)src1); + src_a += 32; + src_b += 32; + } + + vec0 += vec1; + diff = (uint32_t)__msa_copy_u_w((v4i32)vec0, 0); + diff += (uint32_t)__msa_copy_u_w((v4i32)vec0, 2); + return diff; +} + +uint32_t SumSquareError_MSA(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t sse = 0u; + int i; + v16u8 src0, src1, src2, src3; + v8i16 vec0, vec1, vec2, vec3; + v4i32 reg0 = {0}, reg1 = {0}, reg2 = {0}, reg3 = {0}; + v2i64 tmp0; + + for (i = 0; i < count; i += 32) { + src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0); + src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16); + vec0 = (v8i16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); + vec1 = (v8i16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); + vec2 = (v8i16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); + vec3 = (v8i16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); + vec0 = __msa_hsub_u_h((v16u8)vec0, (v16u8)vec0); + vec1 = __msa_hsub_u_h((v16u8)vec1, (v16u8)vec1); + vec2 = __msa_hsub_u_h((v16u8)vec2, (v16u8)vec2); + vec3 = __msa_hsub_u_h((v16u8)vec3, (v16u8)vec3); + reg0 = __msa_dpadd_s_w(reg0, vec0, vec0); + reg1 = __msa_dpadd_s_w(reg1, vec1, vec1); + reg2 = __msa_dpadd_s_w(reg2, vec2, vec2); + reg3 = __msa_dpadd_s_w(reg3, vec3, vec3); + src_a += 32; + src_b += 32; + } + + reg0 += reg1; + reg2 += reg3; + reg0 += reg2; + tmp0 = __msa_hadd_s_d(reg0, reg0); + sse = (uint32_t)__msa_copy_u_w((v4i32)tmp0, 0); + sse += (uint32_t)__msa_copy_u_w((v4i32)tmp0, 2); + return sse; +} + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + +#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) diff --git a/media/libvpx/libvpx/third_party/libyuv/source/compare_neon.cc b/media/libvpx/libvpx/third_party/libyuv/source/compare_neon.cc index 49aa3b4eefec..2a2181e0cb37 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/compare_neon.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/compare_neon.cc @@ -21,40 +21,70 @@ extern "C" { #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ !defined(__aarch64__) -uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { - volatile uint32 sse; - asm volatile ( - "vmov.u8 q8, #0 \n" - "vmov.u8 q10, #0 \n" - "vmov.u8 q9, #0 \n" - "vmov.u8 q11, #0 \n" +// 256 bits at a time +// uses short accumulator which restricts count to 131 KB +uint32_t HammingDistance_NEON(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff; - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" - MEMACCESS(1) - "vld1.8 {q1}, [%1]! \n" - "subs %2, %2, #16 \n" - "vsubl.u8 q2, d0, d2 \n" - "vsubl.u8 q3, d1, d3 \n" - "vmlal.s16 q8, d4, d4 \n" - "vmlal.s16 q9, d6, d6 \n" - "vmlal.s16 q10, d5, d5 \n" - "vmlal.s16 q11, d7, d7 \n" - "bgt 1b \n" + asm volatile( + "vmov.u16 q4, #0 \n" // accumulator - "vadd.u32 q8, q8, q9 \n" - "vadd.u32 q10, q10, q11 \n" - "vadd.u32 q11, q8, q10 \n" - "vpaddl.u32 q1, q11 \n" - "vadd.u64 d0, d2, d3 \n" - "vmov.32 %3, d0[0] \n" - : "+r"(src_a), - "+r"(src_b), - "+r"(count), - "=r"(sse) - : - : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); + "1: \n" + "vld1.8 {q0, q1}, [%0]! \n" + "vld1.8 {q2, q3}, [%1]! \n" + "veor.32 q0, q0, q2 \n" + "veor.32 q1, q1, q3 \n" + "vcnt.i8 q0, q0 \n" + "vcnt.i8 q1, q1 \n" + "subs %2, %2, #32 \n" + "vadd.u8 q0, q0, q1 \n" // 16 byte counts + "vpadal.u8 q4, q0 \n" // 8 shorts + "bgt 1b \n" + + "vpaddl.u16 q0, q4 \n" // 4 ints + "vpadd.u32 d0, d0, d1 \n" + "vpadd.u32 d0, d0, d0 \n" + "vmov.32 %3, d0[0] \n" + + : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(diff) + : + : "cc", "q0", "q1", "q2", "q3", "q4"); + return diff; +} + +uint32_t SumSquareError_NEON(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t sse; + asm volatile( + "vmov.u8 q8, #0 \n" + "vmov.u8 q10, #0 \n" + "vmov.u8 q9, #0 \n" + "vmov.u8 q11, #0 \n" + + "1: \n" + "vld1.8 {q0}, [%0]! \n" + "vld1.8 {q1}, [%1]! \n" + "subs %2, %2, #16 \n" + "vsubl.u8 q2, d0, d2 \n" + "vsubl.u8 q3, d1, d3 \n" + "vmlal.s16 q8, d4, d4 \n" + "vmlal.s16 q9, d6, d6 \n" + "vmlal.s16 q10, d5, d5 \n" + "vmlal.s16 q11, d7, d7 \n" + "bgt 1b \n" + + "vadd.u32 q8, q8, q9 \n" + "vadd.u32 q10, q10, q11 \n" + "vadd.u32 q11, q8, q10 \n" + "vpaddl.u32 q1, q11 \n" + "vadd.u64 d0, d2, d3 \n" + "vmov.32 %3, d0[0] \n" + : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse) + : + : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); return sse; } diff --git a/media/libvpx/libvpx/third_party/libyuv/source/compare_neon64.cc b/media/libvpx/libvpx/third_party/libyuv/source/compare_neon64.cc index f9c7df98c89d..6e8f672ab73a 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/compare_neon64.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/compare_neon64.cc @@ -20,39 +20,65 @@ extern "C" { #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) -uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { - volatile uint32 sse; - asm volatile ( - "eor v16.16b, v16.16b, v16.16b \n" - "eor v18.16b, v18.16b, v18.16b \n" - "eor v17.16b, v17.16b, v17.16b \n" - "eor v19.16b, v19.16b, v19.16b \n" +// 256 bits at a time +// uses short accumulator which restricts count to 131 KB +uint32_t HammingDistance_NEON(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff; + asm volatile( + "movi v4.8h, #0 \n" - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" - MEMACCESS(1) - "ld1 {v1.16b}, [%1], #16 \n" - "subs %w2, %w2, #16 \n" - "usubl v2.8h, v0.8b, v1.8b \n" - "usubl2 v3.8h, v0.16b, v1.16b \n" - "smlal v16.4s, v2.4h, v2.4h \n" - "smlal v17.4s, v3.4h, v3.4h \n" - "smlal2 v18.4s, v2.8h, v2.8h \n" - "smlal2 v19.4s, v3.8h, v3.8h \n" - "b.gt 1b \n" + "1: \n" + "ld1 {v0.16b, v1.16b}, [%0], #32 \n" + "ld1 {v2.16b, v3.16b}, [%1], #32 \n" + "eor v0.16b, v0.16b, v2.16b \n" + "eor v1.16b, v1.16b, v3.16b \n" + "cnt v0.16b, v0.16b \n" + "cnt v1.16b, v1.16b \n" + "subs %w2, %w2, #32 \n" + "add v0.16b, v0.16b, v1.16b \n" + "uadalp v4.8h, v0.16b \n" + "b.gt 1b \n" - "add v16.4s, v16.4s, v17.4s \n" - "add v18.4s, v18.4s, v19.4s \n" - "add v19.4s, v16.4s, v18.4s \n" - "addv s0, v19.4s \n" - "fmov %w3, s0 \n" - : "+r"(src_a), - "+r"(src_b), - "+r"(count), - "=r"(sse) - : - : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); + "uaddlv s4, v4.8h \n" + "fmov %w3, s4 \n" + : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(diff) + : + : "cc", "v0", "v1", "v2", "v3", "v4"); + return diff; +} + +uint32_t SumSquareError_NEON(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t sse; + asm volatile( + "eor v16.16b, v16.16b, v16.16b \n" + "eor v18.16b, v18.16b, v18.16b \n" + "eor v17.16b, v17.16b, v17.16b \n" + "eor v19.16b, v19.16b, v19.16b \n" + + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" + "ld1 {v1.16b}, [%1], #16 \n" + "subs %w2, %w2, #16 \n" + "usubl v2.8h, v0.8b, v1.8b \n" + "usubl2 v3.8h, v0.16b, v1.16b \n" + "smlal v16.4s, v2.4h, v2.4h \n" + "smlal v17.4s, v3.4h, v3.4h \n" + "smlal2 v18.4s, v2.8h, v2.8h \n" + "smlal2 v19.4s, v3.8h, v3.8h \n" + "b.gt 1b \n" + + "add v16.4s, v16.4s, v17.4s \n" + "add v18.4s, v18.4s, v19.4s \n" + "add v19.4s, v16.4s, v18.4s \n" + "addv s0, v19.4s \n" + "fmov %w3, s0 \n" + : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse) + : + : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); return sse; } diff --git a/media/libvpx/libvpx/third_party/libyuv/source/compare_win.cc b/media/libvpx/libvpx/third_party/libyuv/source/compare_win.cc index dc86fe25b19d..d57d3d9d1c8e 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/compare_win.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/compare_win.cc @@ -13,20 +13,39 @@ #include "libyuv/compare_row.h" #include "libyuv/row.h" +#if defined(_MSC_VER) +#include // For __popcnt +#endif + #ifdef __cplusplus namespace libyuv { extern "C" { #endif // This module is for 32 bit Visual C x86 and clangcl -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) -__declspec(naked) -uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { +uint32_t HammingDistance_SSE42(const uint8_t* src_a, + const uint8_t* src_b, + int count) { + uint32_t diff = 0u; + + int i; + for (i = 0; i < count - 3; i += 4) { + uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT + src_a += 4; + src_b += 4; + diff += __popcnt(x); + } + return diff; +} + +__declspec(naked) uint32_t + SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count) { __asm { - mov eax, [esp + 4] // src_a - mov edx, [esp + 8] // src_b - mov ecx, [esp + 12] // count + mov eax, [esp + 4] // src_a + mov edx, [esp + 8] // src_b + mov ecx, [esp + 12] // count pxor xmm0, xmm0 pxor xmm5, xmm5 @@ -61,13 +80,13 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { // Visual C 2012 required for AVX2. #if _MSC_VER >= 1700 // C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX. -#pragma warning(disable: 4752) -__declspec(naked) -uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) { +#pragma warning(disable : 4752) +__declspec(naked) uint32_t + SumSquareError_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count) { __asm { - mov eax, [esp + 4] // src_a - mov edx, [esp + 8] // src_b - mov ecx, [esp + 12] // count + mov eax, [esp + 4] // src_a + mov edx, [esp + 8] // src_b + mov ecx, [esp + 12] // count vpxor ymm0, ymm0, ymm0 // sum vpxor ymm5, ymm5, ymm5 // constant 0 for unpck sub edx, eax @@ -101,65 +120,65 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) { } #endif // _MSC_VER >= 1700 -uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 +uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16 uvec32 kHashMul0 = { - 0x0c3525e1, // 33 ^ 15 - 0xa3476dc1, // 33 ^ 14 - 0x3b4039a1, // 33 ^ 13 - 0x4f5f0981, // 33 ^ 12 + 0x0c3525e1, // 33 ^ 15 + 0xa3476dc1, // 33 ^ 14 + 0x3b4039a1, // 33 ^ 13 + 0x4f5f0981, // 33 ^ 12 }; uvec32 kHashMul1 = { - 0x30f35d61, // 33 ^ 11 - 0x855cb541, // 33 ^ 10 - 0x040a9121, // 33 ^ 9 - 0x747c7101, // 33 ^ 8 + 0x30f35d61, // 33 ^ 11 + 0x855cb541, // 33 ^ 10 + 0x040a9121, // 33 ^ 9 + 0x747c7101, // 33 ^ 8 }; uvec32 kHashMul2 = { - 0xec41d4e1, // 33 ^ 7 - 0x4cfa3cc1, // 33 ^ 6 - 0x025528a1, // 33 ^ 5 - 0x00121881, // 33 ^ 4 + 0xec41d4e1, // 33 ^ 7 + 0x4cfa3cc1, // 33 ^ 6 + 0x025528a1, // 33 ^ 5 + 0x00121881, // 33 ^ 4 }; uvec32 kHashMul3 = { - 0x00008c61, // 33 ^ 3 - 0x00000441, // 33 ^ 2 - 0x00000021, // 33 ^ 1 - 0x00000001, // 33 ^ 0 + 0x00008c61, // 33 ^ 3 + 0x00000441, // 33 ^ 2 + 0x00000021, // 33 ^ 1 + 0x00000001, // 33 ^ 0 }; -__declspec(naked) -uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { +__declspec(naked) uint32_t + HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) { __asm { - mov eax, [esp + 4] // src - mov ecx, [esp + 8] // count + mov eax, [esp + 4] // src + mov ecx, [esp + 8] // count movd xmm0, [esp + 12] // seed - pxor xmm7, xmm7 // constant 0 for unpck + pxor xmm7, xmm7 // constant 0 for unpck movdqa xmm6, xmmword ptr kHash16x33 wloop: - movdqu xmm1, [eax] // src[0-15] + movdqu xmm1, [eax] // src[0-15] lea eax, [eax + 16] - pmulld xmm0, xmm6 // hash *= 33 ^ 16 + pmulld xmm0, xmm6 // hash *= 33 ^ 16 movdqa xmm5, xmmword ptr kHashMul0 movdqa xmm2, xmm1 - punpcklbw xmm2, xmm7 // src[0-7] + punpcklbw xmm2, xmm7 // src[0-7] movdqa xmm3, xmm2 - punpcklwd xmm3, xmm7 // src[0-3] + punpcklwd xmm3, xmm7 // src[0-3] pmulld xmm3, xmm5 movdqa xmm5, xmmword ptr kHashMul1 movdqa xmm4, xmm2 - punpckhwd xmm4, xmm7 // src[4-7] + punpckhwd xmm4, xmm7 // src[4-7] pmulld xmm4, xmm5 movdqa xmm5, xmmword ptr kHashMul2 - punpckhbw xmm1, xmm7 // src[8-15] + punpckhbw xmm1, xmm7 // src[8-15] movdqa xmm2, xmm1 - punpcklwd xmm2, xmm7 // src[8-11] + punpcklwd xmm2, xmm7 // src[8-11] pmulld xmm2, xmm5 movdqa xmm5, xmmword ptr kHashMul3 - punpckhwd xmm1, xmm7 // src[12-15] + punpckhwd xmm1, xmm7 // src[12-15] pmulld xmm1, xmm5 - paddd xmm3, xmm4 // add 16 results + paddd xmm3, xmm4 // add 16 results paddd xmm1, xmm2 paddd xmm1, xmm3 @@ -171,18 +190,18 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { sub ecx, 16 jg wloop - movd eax, xmm0 // return hash + movd eax, xmm0 // return hash ret } } // Visual C 2012 required for AVX2. #if _MSC_VER >= 1700 -__declspec(naked) -uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { +__declspec(naked) uint32_t + HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed) { __asm { - mov eax, [esp + 4] // src - mov ecx, [esp + 8] // count + mov eax, [esp + 4] // src + mov ecx, [esp + 8] // count vmovd xmm0, [esp + 12] // seed wloop: @@ -196,7 +215,7 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { vpmulld xmm2, xmm2, xmmword ptr kHashMul2 lea eax, [eax + 16] vpmulld xmm1, xmm1, xmmword ptr kHashMul3 - vpaddd xmm3, xmm3, xmm4 // add 16 results + vpaddd xmm3, xmm3, xmm4 // add 16 results vpaddd xmm1, xmm1, xmm2 vpaddd xmm1, xmm1, xmm3 vpshufd xmm2, xmm1, 0x0e // upper 2 dwords @@ -207,7 +226,7 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { sub ecx, 16 jg wloop - vmovd eax, xmm0 // return hash + vmovd eax, xmm0 // return hash vzeroupper ret } diff --git a/media/libvpx/libvpx/third_party/libyuv/source/convert.cc b/media/libvpx/libvpx/third_party/libyuv/source/convert.cc index a33742d24d90..375cc732c1d7 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/convert.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/convert.cc @@ -14,8 +14,8 @@ #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" -#include "libyuv/scale.h" // For ScalePlane() #include "libyuv/row.h" +#include "libyuv/scale.h" // For ScalePlane() #ifdef __cplusplus namespace libyuv { @@ -28,14 +28,22 @@ static __inline int Abs(int v) { } // Any I4xx To I420 format with mirroring. -static int I4xxToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int src_y_width, int src_y_height, - int src_uv_width, int src_uv_height) { +static int I4xxToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int src_y_width, + int src_y_height, + int src_uv_width, + int src_uv_height) { const int dst_y_width = Abs(src_y_width); const int dst_y_height = Abs(src_y_height); const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1); @@ -44,35 +52,37 @@ static int I4xxToI420(const uint8* src_y, int src_stride_y, return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, - dst_y, dst_stride_y, dst_y_width, dst_y_height, - kFilterBilinear); + ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, + dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); } - ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, - dst_u, dst_stride_u, dst_uv_width, dst_uv_height, - kFilterBilinear); - ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, - dst_v, dst_stride_v, dst_uv_width, dst_uv_height, - kFilterBilinear); + ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, + dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); + ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, + dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); return 0; } -// Copy I420 with optional flipping +// Copy I420 with optional flipping. // TODO(fbarchard): Use Scale plane which supports mirroring, but ensure // is does row coalescing. LIBYUV_API -int I420Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int I420Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!src_u || !src_v || - !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -96,79 +106,152 @@ int I420Copy(const uint8* src_y, int src_stride_y, return 0; } +// Copy I010 with optional flipping. +LIBYUV_API +int I010Copy(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + if (dst_y) { + CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + // Copy UV planes. + CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); + CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); + return 0; +} + +// Convert 10 bit YUV to 8 bit. +LIBYUV_API +int I010ToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + // Convert Y plane. + Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, 16384, width, + height); + // Convert UV planes. + Convert16To8Plane(src_u, src_stride_u, dst_u, dst_stride_u, 16384, halfwidth, + halfheight); + Convert16To8Plane(src_v, src_stride_v, dst_v, dst_stride_v, 16384, halfwidth, + halfheight); + return 0; +} + // 422 chroma is 1/2 width, 1x height // 420 chroma is 1/2 width, 1/2 height LIBYUV_API -int I422ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int I422ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { const int src_uv_width = SUBSAMPLE(width, 1, 1); - return I4xxToI420(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - src_uv_width, height); + return I4xxToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height, src_uv_width, height); } // 444 chroma is 1x width, 1x height // 420 chroma is 1/2 width, 1/2 height LIBYUV_API -int I444ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - return I4xxToI420(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - width, height); -} - -// 411 chroma is 1/4 width, 1x height -// 420 chroma is 1/2 width, 1/2 height -LIBYUV_API -int I411ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - const int src_uv_width = SUBSAMPLE(width, 3, 2); - return I4xxToI420(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - src_uv_width, height); +int I444ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return I4xxToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height, width, height); } // I400 is greyscale typically used in MJPG LIBYUV_API -int I400ToI420(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int I400ToI420(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!dst_u || !dst_v || - width <= 0 || height == 0) { + if (!dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -186,11 +269,15 @@ int I400ToI420(const uint8* src_y, int src_stride_y, return 0; } -static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1, - uint8* dst, int dst_stride, - int width, int height) { +static void CopyPlane2(const uint8_t* src, + int src_stride_0, + int src_stride_1, + uint8_t* dst, + int dst_stride, + int width, + int height) { int y; - void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; + void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; #if defined(HAS_COPYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; @@ -211,11 +298,6 @@ static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1, CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif // Copy plane for (y = 0; y < height - 1; y += 2) { @@ -238,17 +320,22 @@ static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1, // src_stride_m420 is row planar. Normally this will be the width in pixels. // The UV plane is half width, but 2 values, so src_stride_m420 applies to // this as well as the two Y planes. -static int X420ToI420(const uint8* src_y, - int src_stride_y0, int src_stride_y1, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +static int X420ToI420(const uint8_t* src_y, + int src_stride_y0, + int src_stride_y1, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!src_uv || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_uv || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -265,16 +352,14 @@ static int X420ToI420(const uint8* src_y, dst_stride_v = -dst_stride_v; } // Coalesce rows. - if (src_stride_y0 == width && - src_stride_y1 == width && + if (src_stride_y0 == width && src_stride_y1 == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y0 = src_stride_y1 = dst_stride_y = 0; } // Coalesce rows. - if (src_stride_uv == halfwidth * 2 && - dst_stride_u == halfwidth && + if (src_stride_uv == halfwidth * 2 && dst_stride_u == halfwidth && dst_stride_v == halfwidth) { halfwidth *= halfheight; halfheight = 1; @@ -299,63 +384,78 @@ static int X420ToI420(const uint8* src_y, // Convert NV12 to I420. LIBYUV_API -int NV12ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - return X420ToI420(src_y, src_stride_y, src_stride_y, - src_uv, src_stride_uv, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height); +int NV12ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return X420ToI420(src_y, src_stride_y, src_stride_y, src_uv, src_stride_uv, + dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, width, height); } // Convert NV21 to I420. Same as NV12 but u and v pointers swapped. LIBYUV_API -int NV21ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_vu, int src_stride_vu, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - return X420ToI420(src_y, src_stride_y, src_stride_y, - src_vu, src_stride_vu, - dst_y, dst_stride_y, - dst_v, dst_stride_v, - dst_u, dst_stride_u, - width, height); +int NV21ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return X420ToI420(src_y, src_stride_y, src_stride_y, src_vu, src_stride_vu, + dst_y, dst_stride_y, dst_v, dst_stride_v, dst_u, + dst_stride_u, width, height); } // Convert M420 to I420. LIBYUV_API -int M420ToI420(const uint8* src_m420, int src_stride_m420, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int M420ToI420(const uint8_t* src_m420, + int src_stride_m420, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2, - src_m420 + src_stride_m420 * 2, src_stride_m420 * 3, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, + src_m420 + src_stride_m420 * 2, src_stride_m420 * 3, dst_y, + dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, width, height); } // Convert YUY2 to I420. LIBYUV_API -int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int YUY2ToI420(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; - void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_u, uint8* dst_v, int width) = YUY2ToUVRow_C; - void (*YUY2ToYRow)(const uint8* src_yuy2, - uint8* dst_y, int width) = YUY2ToYRow_C; + void (*YUY2ToUVRow)(const uint8_t* src_yuy2, int src_stride_yuy2, + uint8_t* dst_u, uint8_t* dst_v, int width) = + YUY2ToUVRow_C; + void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = + YUY2ToYRow_C; // Negative height means invert the image. if (height < 0) { height = -height; @@ -392,6 +492,16 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, } } #endif +#if defined(HAS_YUY2TOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + YUY2ToYRow = YUY2ToYRow_Any_MSA; + YUY2ToUVRow = YUY2ToUVRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + YUY2ToYRow = YUY2ToYRow_MSA; + YUY2ToUVRow = YUY2ToUVRow_MSA; + } + } +#endif for (y = 0; y < height - 1; y += 2) { YUY2ToUVRow(src_yuy2, src_stride_yuy2, dst_u, dst_v, width); @@ -411,16 +521,22 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, // Convert UYVY to I420. LIBYUV_API -int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int UYVYToI420(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; - void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_u, uint8* dst_v, int width) = UYVYToUVRow_C; - void (*UYVYToYRow)(const uint8* src_uyvy, - uint8* dst_y, int width) = UYVYToYRow_C; + void (*UYVYToUVRow)(const uint8_t* src_uyvy, int src_stride_uyvy, + uint8_t* dst_u, uint8_t* dst_v, int width) = + UYVYToUVRow_C; + void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) = + UYVYToYRow_C; // Negative height means invert the image. if (height < 0) { height = -height; @@ -457,6 +573,16 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, } } #endif +#if defined(HAS_UYVYTOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + UYVYToYRow = UYVYToYRow_Any_MSA; + UYVYToUVRow = UYVYToUVRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + UYVYToYRow = UYVYToYRow_MSA; + UYVYToUVRow = UYVYToUVRow_MSA; + } + } +#endif for (y = 0; y < height - 1; y += 2) { UYVYToUVRow(src_uyvy, src_stride_uyvy, dst_u, dst_v, width); @@ -476,19 +602,23 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, // Convert ARGB to I420. LIBYUV_API -int ARGBToI420(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int ARGBToI420(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; - if (!src_argb || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -533,6 +663,22 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYRow = ARGBToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_MSA; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToUVRow = ARGBToUVRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_MSA; + } + } +#endif for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width); @@ -552,19 +698,23 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb, // Convert BGRA to I420. LIBYUV_API -int BGRAToI420(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int BGRAToI420(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; - void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width) = BGRAToUVRow_C; - void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int width) = + void (*BGRAToUVRow)(const uint8_t* src_bgra0, int src_stride_bgra, + uint8_t* dst_u, uint8_t* dst_v, int width) = + BGRAToUVRow_C; + void (*BGRAToYRow)(const uint8_t* src_bgra, uint8_t* dst_y, int width) = BGRAToYRow_C; - if (!src_bgra || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_bgra || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -592,12 +742,28 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra, } #endif #if defined(HAS_BGRATOUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - BGRAToUVRow = BGRAToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - BGRAToUVRow = BGRAToUVRow_NEON; - } + if (TestCpuFlag(kCpuHasNEON)) { + BGRAToUVRow = BGRAToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + BGRAToUVRow = BGRAToUVRow_NEON; } + } +#endif +#if defined(HAS_BGRATOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + BGRAToYRow = BGRAToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + BGRAToYRow = BGRAToYRow_MSA; + } + } +#endif +#if defined(HAS_BGRATOUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + BGRAToUVRow = BGRAToUVRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + BGRAToUVRow = BGRAToUVRow_MSA; + } + } #endif for (y = 0; y < height - 1; y += 2) { @@ -618,19 +784,23 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra, // Convert ABGR to I420. LIBYUV_API -int ABGRToI420(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int ABGRToI420(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; - void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width) = ABGRToUVRow_C; - void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int width) = + void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ABGRToUVRow_C; + void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) = ABGRToYRow_C; - if (!src_abgr || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_abgr || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -665,6 +835,22 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr, } } #endif +#if defined(HAS_ABGRTOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ABGRToYRow = ABGRToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_MSA; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ABGRToUVRow = ABGRToUVRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ABGRToUVRow = ABGRToUVRow_MSA; + } + } +#endif for (y = 0; y < height - 1; y += 2) { ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width); @@ -684,19 +870,23 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr, // Convert RGBA to I420. LIBYUV_API -int RGBAToI420(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int RGBAToI420(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; - void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width) = RGBAToUVRow_C; - void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int width) = + void (*RGBAToUVRow)(const uint8_t* src_rgba0, int src_stride_rgba, + uint8_t* dst_u, uint8_t* dst_v, int width) = + RGBAToUVRow_C; + void (*RGBAToYRow)(const uint8_t* src_rgba, uint8_t* dst_y, int width) = RGBAToYRow_C; - if (!src_rgba || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_rgba || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -731,6 +921,22 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba, } } #endif +#if defined(HAS_RGBATOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RGBAToYRow = RGBAToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RGBAToYRow = RGBAToYRow_MSA; + } + } +#endif +#if defined(HAS_RGBATOUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RGBAToUVRow = RGBAToUVRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RGBAToUVRow = RGBAToUVRow_MSA; + } + } +#endif for (y = 0; y < height - 1; y += 2) { RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width); @@ -750,27 +956,33 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba, // Convert RGB24 to I420. LIBYUV_API -int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int RGB24ToI420(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; -#if defined(HAS_RGB24TOYROW_NEON) - void (*RGB24ToUVRow)(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int width) = RGB24ToUVRow_C; - void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int width) = +#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) + void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24, + uint8_t* dst_u, uint8_t* dst_v, int width) = + RGB24ToUVRow_C; + void (*RGB24ToYRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) = RGB24ToYRow_C; #else - void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = + void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RGB24ToARGBRow_C; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif - if (!src_rgb24 || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -792,6 +1004,15 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, } } } +#elif defined(HAS_RGB24TOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RGB24ToUVRow = RGB24ToUVRow_Any_MSA; + RGB24ToYRow = RGB24ToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RGB24ToYRow = RGB24ToYRow_MSA; + RGB24ToUVRow = RGB24ToUVRow_MSA; + } + } // Other platforms do intermediate conversion from RGB24 to ARGB. #else #if defined(HAS_RGB24TOARGBROW_SSSE3) @@ -822,14 +1043,17 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, } } #endif +#endif + { +#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { -#if defined(HAS_RGB24TOYROW_NEON) +#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); RGB24ToYRow(src_rgb24, dst_y, width); RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); @@ -846,7 +1070,7 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, dst_v += dst_stride_v; } if (height & 1) { -#if defined(HAS_RGB24TOYROW_NEON) +#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width); RGB24ToYRow(src_rgb24, dst_y, width); #else @@ -855,36 +1079,41 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, ARGBToYRow(row, dst_y, width); #endif } -#if !defined(HAS_RGB24TOYROW_NEON) +#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) free_aligned_buffer_64(row); - } #endif + } return 0; } // Convert RAW to I420. LIBYUV_API -int RAWToI420(const uint8* src_raw, int src_stride_raw, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int RAWToI420(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; -#if defined(HAS_RAWTOYROW_NEON) - void (*RAWToUVRow)(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int width) = RAWToUVRow_C; - void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int width) = +#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) + void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u, + uint8_t* dst_v, int width) = RAWToUVRow_C; + void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = RAWToYRow_C; #else - void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RAWToARGBRow_C; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif - if (!src_raw || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -906,6 +1135,15 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw, } } } +#elif defined(HAS_RAWTOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RAWToUVRow = RAWToUVRow_Any_MSA; + RAWToYRow = RAWToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RAWToYRow = RAWToYRow_MSA; + RAWToUVRow = RAWToUVRow_MSA; + } + } // Other platforms do intermediate conversion from RAW to ARGB. #else #if defined(HAS_RAWTOARGBROW_SSSE3) @@ -936,14 +1174,17 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw, } } #endif +#endif + { +#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { -#if defined(HAS_RAWTOYROW_NEON) +#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width); RAWToYRow(src_raw, dst_y, width); RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); @@ -960,7 +1201,7 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw, dst_v += dst_stride_v; } if (height & 1) { -#if defined(HAS_RAWTOYROW_NEON) +#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) RAWToUVRow(src_raw, 0, dst_u, dst_v, width); RAWToYRow(src_raw, dst_y, width); #else @@ -969,36 +1210,42 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw, ARGBToYRow(row, dst_y, width); #endif } -#if !defined(HAS_RAWTOYROW_NEON) +#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) free_aligned_buffer_64(row); - } #endif + } return 0; } // Convert RGB565 to I420. LIBYUV_API -int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int RGB565ToI420(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; -#if defined(HAS_RGB565TOYROW_NEON) - void (*RGB565ToUVRow)(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width) = RGB565ToUVRow_C; - void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int width) = +#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) + void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565, + uint8_t* dst_u, uint8_t* dst_v, int width) = + RGB565ToUVRow_C; + void (*RGB565ToYRow)(const uint8_t* src_rgb565, uint8_t* dst_y, int width) = RGB565ToYRow_C; #else - void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = - RGB565ToARGBRow_C; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*RGB565ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, + int width) = RGB565ToARGBRow_C; + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif - if (!src_rgb565 || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_rgb565 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1020,6 +1267,15 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, } } } +#elif defined(HAS_RGB565TOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RGB565ToUVRow = RGB565ToUVRow_Any_MSA; + RGB565ToYRow = RGB565ToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RGB565ToYRow = RGB565ToYRow_MSA; + RGB565ToUVRow = RGB565ToUVRow_MSA; + } + } // Other platforms do intermediate conversion from RGB565 to ARGB. #else #if defined(HAS_RGB565TOARGBROW_SSE2) @@ -1057,15 +1313,16 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, ARGBToYRow = ARGBToYRow_AVX2; } } +#endif #endif { +#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif - for (y = 0; y < height - 1; y += 2) { -#if defined(HAS_RGB565TOYROW_NEON) +#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width); RGB565ToYRow(src_rgb565, dst_y, width); RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); @@ -1082,7 +1339,7 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, dst_v += dst_stride_v; } if (height & 1) { -#if defined(HAS_RGB565TOYROW_NEON) +#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width); RGB565ToYRow(src_rgb565, dst_y, width); #else @@ -1091,36 +1348,43 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, ARGBToYRow(row, dst_y, width); #endif } -#if !defined(HAS_RGB565TOYROW_NEON) +#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) free_aligned_buffer_64(row); - } #endif + } return 0; } // Convert ARGB1555 to I420. LIBYUV_API -int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int ARGB1555ToI420(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; -#if defined(HAS_ARGB1555TOYROW_NEON) - void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width) = ARGB1555ToUVRow_C; - void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int width) = - ARGB1555ToYRow_C; +#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) + void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGB1555ToUVRow_C; + void (*ARGB1555ToYRow)(const uint8_t* src_argb1555, uint8_t* dst_y, + int width) = ARGB1555ToYRow_C; #else - void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = - ARGB1555ToARGBRow_C; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGB1555ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, + int width) = ARGB1555ToARGBRow_C; + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif - if (!src_argb1555 || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_argb1555 || !dst_y || !dst_u || !dst_v || width <= 0 || + height == 0) { return -1; } // Negative height means invert the image. @@ -1142,6 +1406,15 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, } } } +#elif defined(HAS_ARGB1555TOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGB1555ToUVRow = ARGB1555ToUVRow_Any_MSA; + ARGB1555ToYRow = ARGB1555ToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGB1555ToYRow = ARGB1555ToYRow_MSA; + ARGB1555ToUVRow = ARGB1555ToUVRow_MSA; + } + } // Other platforms do intermediate conversion from ARGB1555 to ARGB. #else #if defined(HAS_ARGB1555TOARGBROW_SSE2) @@ -1179,15 +1452,17 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, ARGBToYRow = ARGBToYRow_AVX2; } } +#endif #endif { +#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { -#if defined(HAS_ARGB1555TOYROW_NEON) +#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width); ARGB1555ToYRow(src_argb1555, dst_y, width); ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, @@ -1206,7 +1481,7 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, dst_v += dst_stride_v; } if (height & 1) { -#if defined(HAS_ARGB1555TOYROW_NEON) +#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width); ARGB1555ToYRow(src_argb1555, dst_y, width); #else @@ -1215,36 +1490,43 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, ARGBToYRow(row, dst_y, width); #endif } -#if !defined(HAS_ARGB1555TOYROW_NEON) +#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) free_aligned_buffer_64(row); - } #endif + } return 0; } // Convert ARGB4444 to I420. LIBYUV_API -int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int ARGB4444ToI420(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; #if defined(HAS_ARGB4444TOYROW_NEON) - void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width) = ARGB4444ToUVRow_C; - void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int width) = - ARGB4444ToYRow_C; + void (*ARGB4444ToUVRow)(const uint8_t* src_argb4444, int src_stride_argb4444, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGB4444ToUVRow_C; + void (*ARGB4444ToYRow)(const uint8_t* src_argb4444, uint8_t* dst_y, + int width) = ARGB4444ToYRow_C; #else - void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = - ARGB4444ToARGBRow_C; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGB4444ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, + int width) = ARGB4444ToARGBRow_C; + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif - if (!src_argb4444 || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_argb4444 || !dst_y || !dst_u || !dst_v || width <= 0 || + height == 0) { return -1; } // Negative height means invert the image. @@ -1284,6 +1566,14 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, } } #endif +#if defined(HAS_ARGB4444TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_MSA; + } + } +#endif #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; @@ -1304,7 +1594,22 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, } } #endif +#if defined(HAS_ARGBTOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToUVRow = ARGBToUVRow_Any_MSA; + ARGBToYRow = ARGBToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_MSA; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_MSA; + } + } + } +#endif +#endif + { +#if !defined(HAS_ARGB4444TOYROW_NEON) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); @@ -1341,13 +1646,15 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, } #if !defined(HAS_ARGB4444TOYROW_NEON) free_aligned_buffer_64(row); - } #endif + } return 0; } -static void SplitPixels(const uint8* src_u, int src_pixel_stride_uv, - uint8* dst_u, int width) { +static void SplitPixels(const uint8_t* src_u, + int src_pixel_stride_uv, + uint8_t* dst_u, + int width) { int i; for (i = 0; i < width; ++i) { *dst_u = *src_u; @@ -1358,21 +1665,26 @@ static void SplitPixels(const uint8* src_u, int src_pixel_stride_uv, // Convert Android420 to I420. LIBYUV_API -int Android420ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, +int Android420ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, int src_pixel_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; - const int vu_off = src_v - src_u; + const ptrdiff_t vu_off = src_v - src_u; int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!src_u || !src_v || - !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1396,15 +1708,16 @@ int Android420ToI420(const uint8* src_y, int src_stride_y, CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); return 0; - // Split UV planes - NV21 - } else if (src_pixel_stride_uv == 2 && vu_off == -1 && - src_stride_u == src_stride_v) { + // Split UV planes - NV21 + } + if (src_pixel_stride_uv == 2 && vu_off == -1 && + src_stride_u == src_stride_v) { SplitUVPlane(src_v, src_stride_v, dst_v, dst_stride_v, dst_u, dst_stride_u, halfwidth, halfheight); return 0; - // Split UV planes - NV12 - } else if (src_pixel_stride_uv == 2 && vu_off == 1 && - src_stride_u == src_stride_v) { + // Split UV planes - NV12 + } + if (src_pixel_stride_uv == 2 && vu_off == 1 && src_stride_u == src_stride_v) { SplitUVPlane(src_u, src_stride_u, dst_u, dst_stride_u, dst_v, dst_stride_v, halfwidth, halfheight); return 0; diff --git a/media/libvpx/libvpx/third_party/libyuv/source/convert_argb.cc b/media/libvpx/libvpx/third_party/libyuv/source/convert_argb.cc index fb9582d627ef..f2fe474f704a 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/convert_argb.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/convert_argb.cc @@ -26,11 +26,13 @@ extern "C" { // Copy ARGB with optional flipping LIBYUV_API -int ARGBCopy(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_argb || !dst_argb || - width <= 0 || height == 0) { +int ARGBCopy(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -40,27 +42,29 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } - CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb, - width * 4, height); + CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width * 4, + height); return 0; } -// Convert I422 to ARGB with matrix -static int I420ToARGBMatrix(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, +// Convert I420 to ARGB with matrix +static int I420ToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, const struct YuvConstants* yuvconstants, - int width, int height) { + int width, + int height) { int y; - void (*I422ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = I422ToARGBRow_C; - if (!src_y || !src_u || !src_v || !dst_argb || - width <= 0 || height == 0) { + void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I422ToARGBRow_C; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -93,13 +97,12 @@ static int I420ToARGBMatrix(const uint8* src_y, int src_stride_y, } } #endif -#if defined(HAS_I422TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422ToARGBRow = I422ToARGBRow_DSPR2; +#if defined(HAS_I422TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToARGBRow = I422ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I422ToARGBRow = I422ToARGBRow_MSA; + } } #endif @@ -117,111 +120,130 @@ static int I420ToARGBMatrix(const uint8* src_y, int src_stride_y, // Convert I420 to ARGB. LIBYUV_API -int I420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return I420ToARGBMatrix(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_argb, dst_stride_argb, - &kYuvI601Constants, - width, height); +int I420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return I420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + &kYuvI601Constants, width, height); } // Convert I420 to ABGR. LIBYUV_API -int I420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height) { - return I420ToARGBMatrix(src_y, src_stride_y, - src_v, src_stride_v, // Swap U and V - src_u, src_stride_u, - dst_abgr, dst_stride_abgr, +int I420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return I420ToARGBMatrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuI601Constants, // Use Yvu matrix width, height); } // Convert J420 to ARGB. LIBYUV_API -int J420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return I420ToARGBMatrix(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_argb, dst_stride_argb, - &kYuvJPEGConstants, - width, height); +int J420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return I420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + &kYuvJPEGConstants, width, height); } // Convert J420 to ABGR. LIBYUV_API -int J420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height) { - return I420ToARGBMatrix(src_y, src_stride_y, - src_v, src_stride_v, // Swap U and V - src_u, src_stride_u, - dst_abgr, dst_stride_abgr, +int J420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return I420ToARGBMatrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuJPEGConstants, // Use Yvu matrix width, height); } // Convert H420 to ARGB. LIBYUV_API -int H420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return I420ToARGBMatrix(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_argb, dst_stride_argb, - &kYuvH709Constants, - width, height); +int H420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return I420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + &kYuvH709Constants, width, height); } // Convert H420 to ABGR. LIBYUV_API -int H420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height) { - return I420ToARGBMatrix(src_y, src_stride_y, - src_v, src_stride_v, // Swap U and V - src_u, src_stride_u, - dst_abgr, dst_stride_abgr, +int H420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return I420ToARGBMatrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuH709Constants, // Use Yvu matrix width, height); } // Convert I422 to ARGB with matrix -static int I422ToARGBMatrix(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, +static int I422ToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, const struct YuvConstants* yuvconstants, - int width, int height) { + int width, + int height) { int y; - void (*I422ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = I422ToARGBRow_C; - if (!src_y || !src_u || !src_v || - !dst_argb || - width <= 0 || height == 0) { + void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I422ToARGBRow_C; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -231,10 +253,8 @@ static int I422ToARGBMatrix(const uint8* src_y, int src_stride_y, dst_stride_argb = -dst_stride_argb; } // Coalesce rows. - if (src_stride_y == width && - src_stride_u * 2 == width && - src_stride_v * 2 == width && - dst_stride_argb == width * 4) { + if (src_stride_y == width && src_stride_u * 2 == width && + src_stride_v * 2 == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; @@ -263,13 +283,12 @@ static int I422ToARGBMatrix(const uint8* src_y, int src_stride_y, } } #endif -#if defined(HAS_I422TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422ToARGBRow = I422ToARGBRow_DSPR2; +#if defined(HAS_I422TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToARGBRow = I422ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I422ToARGBRow = I422ToARGBRow_MSA; + } } #endif @@ -285,111 +304,380 @@ static int I422ToARGBMatrix(const uint8* src_y, int src_stride_y, // Convert I422 to ARGB. LIBYUV_API -int I422ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return I422ToARGBMatrix(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_argb, dst_stride_argb, - &kYuvI601Constants, - width, height); +int I422ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return I422ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + &kYuvI601Constants, width, height); } // Convert I422 to ABGR. LIBYUV_API -int I422ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height) { - return I422ToARGBMatrix(src_y, src_stride_y, - src_v, src_stride_v, // Swap U and V - src_u, src_stride_u, - dst_abgr, dst_stride_abgr, +int I422ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return I422ToARGBMatrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuI601Constants, // Use Yvu matrix width, height); } // Convert J422 to ARGB. LIBYUV_API -int J422ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return I422ToARGBMatrix(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_argb, dst_stride_argb, - &kYuvJPEGConstants, - width, height); +int J422ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return I422ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + &kYuvJPEGConstants, width, height); } // Convert J422 to ABGR. LIBYUV_API -int J422ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height) { - return I422ToARGBMatrix(src_y, src_stride_y, - src_v, src_stride_v, // Swap U and V - src_u, src_stride_u, - dst_abgr, dst_stride_abgr, +int J422ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return I422ToARGBMatrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuJPEGConstants, // Use Yvu matrix width, height); } // Convert H422 to ARGB. LIBYUV_API -int H422ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return I422ToARGBMatrix(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_argb, dst_stride_argb, - &kYuvH709Constants, - width, height); +int H422ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return I422ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + &kYuvH709Constants, width, height); } // Convert H422 to ABGR. LIBYUV_API -int H422ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height) { - return I422ToARGBMatrix(src_y, src_stride_y, - src_v, src_stride_v, // Swap U and V - src_u, src_stride_u, - dst_abgr, dst_stride_abgr, +int H422ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return I422ToARGBMatrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_abgr, dst_stride_abgr, + &kYvuH709Constants, // Use Yvu matrix + width, height); +} + +// Convert 10 bit YUV to ARGB with matrix +// TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to +// multiply 10 bit yuv into high bits to allow any number of bits. +static int I010ToAR30Matrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I210ToAR30Row)(const uint16_t* y_buf, const uint16_t* u_buf, + const uint16_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I210ToAR30Row_C; + if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; + dst_stride_ar30 = -dst_stride_ar30; + } +#if defined(HAS_I210TOAR30ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I210ToAR30Row = I210ToAR30Row_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I210ToAR30Row = I210ToAR30Row_SSSE3; + } + } +#endif +#if defined(HAS_I210TOAR30ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I210ToAR30Row = I210ToAR30Row_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I210ToAR30Row = I210ToAR30Row_AVX2; + } + } +#endif + for (y = 0; y < height; ++y) { + I210ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width); + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + return 0; +} + +// Convert I010 to AR30. +LIBYUV_API +int I010ToAR30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height) { + return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_ar30, dst_stride_ar30, + &kYuvI601Constants, width, height); +} + +// Convert H010 to AR30. +LIBYUV_API +int H010ToAR30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height) { + return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_ar30, dst_stride_ar30, + &kYuvH709Constants, width, height); +} + +// Convert I010 to AB30. +LIBYUV_API +int I010ToAB30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height) { + return I010ToAR30Matrix(src_y, src_stride_y, src_v, src_stride_v, src_u, + src_stride_u, dst_ab30, dst_stride_ab30, + &kYvuI601Constants, width, height); +} + +// Convert H010 to AB30. +LIBYUV_API +int H010ToAB30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height) { + return I010ToAR30Matrix(src_y, src_stride_y, src_v, src_stride_v, src_u, + src_stride_u, dst_ab30, dst_stride_ab30, + &kYvuH709Constants, width, height); +} + +// Convert 10 bit YUV to ARGB with matrix +static int I010ToARGBMatrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I210ToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf, + const uint16_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I210ToARGBRow_C; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } +#if defined(HAS_I210TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I210ToARGBRow = I210ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I210ToARGBRow = I210ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_I210TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I210ToARGBRow = I210ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I210ToARGBRow = I210ToARGBRow_AVX2; + } + } +#endif + for (y = 0; y < height; ++y) { + I210ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + return 0; +} + +// Convert I010 to ARGB. +LIBYUV_API +int I010ToARGB(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + &kYuvI601Constants, width, height); +} + +// Convert I010 to ABGR. +LIBYUV_API +int I010ToABGR(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return I010ToARGBMatrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_abgr, dst_stride_abgr, + &kYvuI601Constants, // Use Yvu matrix + width, height); +} + +// Convert H010 to ARGB. +LIBYUV_API +int H010ToARGB(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + &kYuvH709Constants, width, height); +} + +// Convert H010 to ABGR. +LIBYUV_API +int H010ToABGR(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return I010ToARGBMatrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuH709Constants, // Use Yvu matrix width, height); } // Convert I444 to ARGB with matrix -static int I444ToARGBMatrix(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, +static int I444ToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, const struct YuvConstants* yuvconstants, - int width, int height) { + int width, + int height) { int y; - void (*I444ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = I444ToARGBRow_C; - if (!src_y || !src_u || !src_v || - !dst_argb || - width <= 0 || height == 0) { + void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I444ToARGBRow_C; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -399,9 +687,7 @@ static int I444ToARGBMatrix(const uint8* src_y, int src_stride_y, dst_stride_argb = -dst_stride_argb; } // Coalesce rows. - if (src_stride_y == width && - src_stride_u == width && - src_stride_v == width && + if (src_stride_y == width && src_stride_u == width && src_stride_v == width && dst_stride_argb == width * 4) { width *= height; height = 1; @@ -431,6 +717,14 @@ static int I444ToARGBMatrix(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I444TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I444ToARGBRow = I444ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I444ToARGBRow = I444ToARGBRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); @@ -444,138 +738,81 @@ static int I444ToARGBMatrix(const uint8* src_y, int src_stride_y, // Convert I444 to ARGB. LIBYUV_API -int I444ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return I444ToARGBMatrix(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_argb, dst_stride_argb, - &kYuvI601Constants, - width, height); +int I444ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return I444ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + &kYuvI601Constants, width, height); } // Convert I444 to ABGR. LIBYUV_API -int I444ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height) { - return I444ToARGBMatrix(src_y, src_stride_y, - src_v, src_stride_v, // Swap U and V - src_u, src_stride_u, - dst_abgr, dst_stride_abgr, +int I444ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return I444ToARGBMatrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuI601Constants, // Use Yvu matrix width, height); } // Convert J444 to ARGB. LIBYUV_API -int J444ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return I444ToARGBMatrix(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_argb, dst_stride_argb, - &kYuvJPEGConstants, - width, height); -} - -// Convert I411 to ARGB. -LIBYUV_API -int I411ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - int y; - void (*I411ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = I411ToARGBRow_C; - if (!src_y || !src_u || !src_v || - !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - // Coalesce rows. - if (src_stride_y == width && - src_stride_u * 4 == width && - src_stride_v * 4 == width && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; - } -#if defined(HAS_I411TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - I411ToARGBRow = I411ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I411ToARGBRow = I411ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_I411TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - I411ToARGBRow = I411ToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - I411ToARGBRow = I411ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_I411TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I411ToARGBRow = I411ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I411ToARGBRow = I411ToARGBRow_NEON; - } - } -#endif - - for (y = 0; y < height; ++y) { - I411ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvI601Constants, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - src_u += src_stride_u; - src_v += src_stride_v; - } - return 0; +int J444ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return I444ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + &kYuvJPEGConstants, width, height); } // Convert I420 with Alpha to preattenuated ARGB. -static int I420AlphaToARGBMatrix(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - const uint8* src_a, int src_stride_a, - uint8* dst_argb, int dst_stride_argb, +static int I420AlphaToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, const struct YuvConstants* yuvconstants, - int width, int height, int attenuate) { + int width, + int height, + int attenuate) { int y; - void (*I422AlphaToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, + void (*I422AlphaToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, const uint8_t* a_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) = I422AlphaToARGBRow_C; - void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, + void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; - if (!src_y || !src_u || !src_v || !dst_argb || - width <= 0 || height == 0) { + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -608,13 +845,12 @@ static int I420AlphaToARGBMatrix(const uint8* src_y, int src_stride_y, } } #endif -#if defined(HAS_I422ALPHATOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422AlphaToARGBRow = I422AlphaToARGBRow_DSPR2; +#if defined(HAS_I422ALPHATOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422AlphaToARGBRow = I422AlphaToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I422AlphaToARGBRow = I422AlphaToARGBRow_MSA; + } } #endif #if defined(HAS_ARGBATTENUATEROW_SSSE3) @@ -641,6 +877,14 @@ static int I420AlphaToARGBMatrix(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_ARGBATTENUATEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants, @@ -661,49 +905,59 @@ static int I420AlphaToARGBMatrix(const uint8* src_y, int src_stride_y, // Convert I420 with Alpha to ARGB. LIBYUV_API -int I420AlphaToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - const uint8* src_a, int src_stride_a, - uint8* dst_argb, int dst_stride_argb, - int width, int height, int attenuate) { - return I420AlphaToARGBMatrix(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - src_a, src_stride_a, - dst_argb, dst_stride_argb, - &kYuvI601Constants, - width, height, attenuate); +int I420AlphaToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + int attenuate) { + return I420AlphaToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, src_a, src_stride_a, dst_argb, + dst_stride_argb, &kYuvI601Constants, width, + height, attenuate); } // Convert I420 with Alpha to ABGR. LIBYUV_API -int I420AlphaToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - const uint8* src_a, int src_stride_a, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height, int attenuate) { - return I420AlphaToARGBMatrix(src_y, src_stride_y, - src_v, src_stride_v, // Swap U and V - src_u, src_stride_u, - src_a, src_stride_a, - dst_abgr, dst_stride_abgr, - &kYvuI601Constants, // Use Yvu matrix - width, height, attenuate); +int I420AlphaToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height, + int attenuate) { + return I420AlphaToARGBMatrix( + src_y, src_stride_y, src_v, src_stride_v, // Swap U and V + src_u, src_stride_u, src_a, src_stride_a, dst_abgr, dst_stride_abgr, + &kYvuI601Constants, // Use Yvu matrix + width, height, attenuate); } // Convert I400 to ARGB. LIBYUV_API -int I400ToARGB(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int I400ToARGB(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*I400ToARGBRow)(const uint8* y_buf, - uint8* rgb_buf, - int width) = I400ToARGBRow_C; - if (!src_y || !dst_argb || - width <= 0 || height == 0) { + void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf, int width) = + I400ToARGBRow_C; + if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -713,8 +967,7 @@ int I400ToARGB(const uint8* src_y, int src_stride_y, dst_stride_argb = -dst_stride_argb; } // Coalesce rows. - if (src_stride_y == width && - dst_stride_argb == width * 4) { + if (src_stride_y == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_y = dst_stride_argb = 0; @@ -743,6 +996,14 @@ int I400ToARGB(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I400TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I400ToARGBRow = I400ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + I400ToARGBRow = I400ToARGBRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { I400ToARGBRow(src_y, dst_argb, width); @@ -754,14 +1015,16 @@ int I400ToARGB(const uint8* src_y, int src_stride_y, // Convert J400 to ARGB. LIBYUV_API -int J400ToARGB(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int J400ToARGB(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int width) = + void (*J400ToARGBRow)(const uint8_t* src_y, uint8_t* dst_argb, int width) = J400ToARGBRow_C; - if (!src_y || !dst_argb || - width <= 0 || height == 0) { + if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -771,8 +1034,7 @@ int J400ToARGB(const uint8* src_y, int src_stride_y, src_stride_y = -src_stride_y; } // Coalesce rows. - if (src_stride_y == width && - dst_stride_argb == width * 4) { + if (src_stride_y == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_y = dst_stride_argb = 0; @@ -800,6 +1062,14 @@ int J400ToARGB(const uint8* src_y, int src_stride_y, J400ToARGBRow = J400ToARGBRow_NEON; } } +#endif +#if defined(HAS_J400TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + J400ToARGBRow = J400ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + J400ToARGBRow = J400ToARGBRow_MSA; + } + } #endif for (y = 0; y < height; ++y) { J400ToARGBRow(src_y, dst_argb, width); @@ -810,85 +1080,89 @@ int J400ToARGB(const uint8* src_y, int src_stride_y, } // Shuffle table for converting BGRA to ARGB. -static uvec8 kShuffleMaskBGRAToARGB = { - 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u -}; +static const uvec8 kShuffleMaskBGRAToARGB = { + 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u}; // Shuffle table for converting ABGR to ARGB. -static uvec8 kShuffleMaskABGRToARGB = { - 2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u -}; +static const uvec8 kShuffleMaskABGRToARGB = { + 2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u}; // Shuffle table for converting RGBA to ARGB. -static uvec8 kShuffleMaskRGBAToARGB = { - 1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u -}; +static const uvec8 kShuffleMaskRGBAToARGB = { + 1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u}; // Convert BGRA to ARGB. LIBYUV_API -int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return ARGBShuffle(src_bgra, src_stride_bgra, - dst_argb, dst_stride_argb, - (const uint8*)(&kShuffleMaskBGRAToARGB), - width, height); +int BGRAToARGB(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return ARGBShuffle(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb, + (const uint8_t*)(&kShuffleMaskBGRAToARGB), width, height); } // Convert ARGB to BGRA (same as BGRAToARGB). LIBYUV_API -int ARGBToBGRA(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return ARGBShuffle(src_bgra, src_stride_bgra, - dst_argb, dst_stride_argb, - (const uint8*)(&kShuffleMaskBGRAToARGB), - width, height); +int ARGBToBGRA(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return ARGBShuffle(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb, + (const uint8_t*)(&kShuffleMaskBGRAToARGB), width, height); } // Convert ABGR to ARGB. LIBYUV_API -int ABGRToARGB(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return ARGBShuffle(src_abgr, src_stride_abgr, - dst_argb, dst_stride_argb, - (const uint8*)(&kShuffleMaskABGRToARGB), - width, height); +int ABGRToARGB(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return ARGBShuffle(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb, + (const uint8_t*)(&kShuffleMaskABGRToARGB), width, height); } // Convert ARGB to ABGR to (same as ABGRToARGB). LIBYUV_API -int ARGBToABGR(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return ARGBShuffle(src_abgr, src_stride_abgr, - dst_argb, dst_stride_argb, - (const uint8*)(&kShuffleMaskABGRToARGB), - width, height); +int ARGBToABGR(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return ARGBShuffle(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb, + (const uint8_t*)(&kShuffleMaskABGRToARGB), width, height); } // Convert RGBA to ARGB. LIBYUV_API -int RGBAToARGB(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return ARGBShuffle(src_rgba, src_stride_rgba, - dst_argb, dst_stride_argb, - (const uint8*)(&kShuffleMaskRGBAToARGB), - width, height); +int RGBAToARGB(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return ARGBShuffle(src_rgba, src_stride_rgba, dst_argb, dst_stride_argb, + (const uint8_t*)(&kShuffleMaskRGBAToARGB), width, height); } // Convert RGB24 to ARGB. LIBYUV_API -int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int RGB24ToARGB(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = + void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RGB24ToARGBRow_C; - if (!src_rgb24 || !dst_argb || - width <= 0 || height == 0) { + if (!src_rgb24 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -898,8 +1172,7 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24, src_stride_rgb24 = -src_stride_rgb24; } // Coalesce rows. - if (src_stride_rgb24 == width * 3 && - dst_stride_argb == width * 4) { + if (src_stride_rgb24 == width * 3 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_rgb24 = dst_stride_argb = 0; @@ -920,6 +1193,14 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24, } } #endif +#if defined(HAS_RGB24TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { RGB24ToARGBRow(src_rgb24, dst_argb, width); @@ -931,14 +1212,16 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24, // Convert RAW to ARGB. LIBYUV_API -int RAWToARGB(const uint8* src_raw, int src_stride_raw, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int RAWToARGB(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) = + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RAWToARGBRow_C; - if (!src_raw || !dst_argb || - width <= 0 || height == 0) { + if (!src_raw || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -948,8 +1231,7 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw, src_stride_raw = -src_stride_raw; } // Coalesce rows. - if (src_stride_raw == width * 3 && - dst_stride_argb == width * 4) { + if (src_stride_raw == width * 3 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_raw = dst_stride_argb = 0; @@ -970,6 +1252,14 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw, } } #endif +#if defined(HAS_RAWTOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RAWToARGBRow = RAWToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RAWToARGBRow = RAWToARGBRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { RAWToARGBRow(src_raw, dst_argb, width); @@ -981,14 +1271,16 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw, // Convert RGB565 to ARGB. LIBYUV_API -int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int RGB565ToARGB(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int width) = - RGB565ToARGBRow_C; - if (!src_rgb565 || !dst_argb || - width <= 0 || height == 0) { + void (*RGB565ToARGBRow)(const uint8_t* src_rgb565, uint8_t* dst_argb, + int width) = RGB565ToARGBRow_C; + if (!src_rgb565 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -998,8 +1290,7 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565, src_stride_rgb565 = -src_stride_rgb565; } // Coalesce rows. - if (src_stride_rgb565 == width * 2 && - dst_stride_argb == width * 4) { + if (src_stride_rgb565 == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_rgb565 = dst_stride_argb = 0; @@ -1028,6 +1319,14 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565, } } #endif +#if defined(HAS_RGB565TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RGB565ToARGBRow = RGB565ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RGB565ToARGBRow = RGB565ToARGBRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { RGB565ToARGBRow(src_rgb565, dst_argb, width); @@ -1039,14 +1338,16 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565, // Convert ARGB1555 to ARGB. LIBYUV_API -int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int ARGB1555ToARGB(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb, - int width) = ARGB1555ToARGBRow_C; - if (!src_argb1555 || !dst_argb || - width <= 0 || height == 0) { + void (*ARGB1555ToARGBRow)(const uint8_t* src_argb1555, uint8_t* dst_argb, + int width) = ARGB1555ToARGBRow_C; + if (!src_argb1555 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1056,8 +1357,7 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555, src_stride_argb1555 = -src_stride_argb1555; } // Coalesce rows. - if (src_stride_argb1555 == width * 2 && - dst_stride_argb == width * 4) { + if (src_stride_argb1555 == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb1555 = dst_stride_argb = 0; @@ -1086,6 +1386,14 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555, } } #endif +#if defined(HAS_ARGB1555TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGB1555ToARGBRow = ARGB1555ToARGBRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { ARGB1555ToARGBRow(src_argb1555, dst_argb, width); @@ -1097,14 +1405,16 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555, // Convert ARGB4444 to ARGB. LIBYUV_API -int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int ARGB4444ToARGB(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb, - int width) = ARGB4444ToARGBRow_C; - if (!src_argb4444 || !dst_argb || - width <= 0 || height == 0) { + void (*ARGB4444ToARGBRow)(const uint8_t* src_argb4444, uint8_t* dst_argb, + int width) = ARGB4444ToARGBRow_C; + if (!src_argb4444 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1114,8 +1424,7 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444, src_stride_argb4444 = -src_stride_argb4444; } // Coalesce rows. - if (src_stride_argb4444 == width * 2 && - dst_stride_argb == width * 4) { + if (src_stride_argb4444 == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb4444 = dst_stride_argb = 0; @@ -1144,6 +1453,14 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444, } } #endif +#if defined(HAS_ARGB4444TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { ARGB4444ToARGBRow(src_argb4444, dst_argb, width); @@ -1153,20 +1470,117 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444, return 0; } -// Convert NV12 to ARGB. +// Convert AR30 to ARGB. LIBYUV_API -int NV12ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int AR30ToARGB(const uint8_t* src_ar30, + int src_stride_ar30, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*NV12ToARGBRow)(const uint8* y_buf, - const uint8* uv_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = NV12ToARGBRow_C; - if (!src_y || !src_uv || !dst_argb || - width <= 0 || height == 0) { + if (!src_ar30 || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_ar30 = src_ar30 + (height - 1) * src_stride_ar30; + src_stride_ar30 = -src_stride_ar30; + } + // Coalesce rows. + if (src_stride_ar30 == width * 4 && dst_stride_argb == width * 4) { + width *= height; + height = 1; + src_stride_ar30 = dst_stride_argb = 0; + } + for (y = 0; y < height; ++y) { + AR30ToARGBRow_C(src_ar30, dst_argb, width); + src_ar30 += src_stride_ar30; + dst_argb += dst_stride_argb; + } + return 0; +} + +// Convert AR30 to ABGR. +LIBYUV_API +int AR30ToABGR(const uint8_t* src_ar30, + int src_stride_ar30, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + int y; + if (!src_ar30 || !dst_abgr || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_ar30 = src_ar30 + (height - 1) * src_stride_ar30; + src_stride_ar30 = -src_stride_ar30; + } + // Coalesce rows. + if (src_stride_ar30 == width * 4 && dst_stride_abgr == width * 4) { + width *= height; + height = 1; + src_stride_ar30 = dst_stride_abgr = 0; + } + for (y = 0; y < height; ++y) { + AR30ToABGRRow_C(src_ar30, dst_abgr, width); + src_ar30 += src_stride_ar30; + dst_abgr += dst_stride_abgr; + } + return 0; +} + +// Convert AR30 to AB30. +LIBYUV_API +int AR30ToAB30(const uint8_t* src_ar30, + int src_stride_ar30, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height) { + int y; + if (!src_ar30 || !dst_ab30 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_ar30 = src_ar30 + (height - 1) * src_stride_ar30; + src_stride_ar30 = -src_stride_ar30; + } + // Coalesce rows. + if (src_stride_ar30 == width * 4 && dst_stride_ab30 == width * 4) { + width *= height; + height = 1; + src_stride_ar30 = dst_stride_ab30 = 0; + } + for (y = 0; y < height; ++y) { + AR30ToAB30Row_C(src_ar30, dst_ab30, width); + src_ar30 += src_stride_ar30; + dst_ab30 += dst_stride_ab30; + } + return 0; +} + +// Convert NV12 to ARGB with matrix +static int NV12ToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*NV12ToARGBRow)( + const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C; + if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1199,9 +1613,17 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_NV12TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + NV12ToARGBRow = NV12ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + NV12ToARGBRow = NV12ToARGBRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { - NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width); + NV12ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; if (y & 1) { @@ -1211,20 +1633,21 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y, return 0; } -// Convert NV21 to ARGB. -LIBYUV_API -int NV21ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +// Convert NV21 to ARGB with matrix +static int NV21ToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height) { int y; - void (*NV21ToARGBRow)(const uint8* y_buf, - const uint8* uv_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = NV21ToARGBRow_C; - if (!src_y || !src_uv || !dst_argb || - width <= 0 || height == 0) { + void (*NV21ToARGBRow)( + const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = NV21ToARGBRow_C; + if (!src_y || !src_vu || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1257,11 +1680,136 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_NV21TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + NV21ToARGBRow = NV21ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + NV21ToARGBRow = NV21ToARGBRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { - NV21ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width); + NV21ToARGBRow(src_y, src_vu, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; + if (y & 1) { + src_vu += src_stride_vu; + } + } + return 0; +} + +// Convert NV12 to ARGB. +LIBYUV_API +int NV12ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return NV12ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_argb, + dst_stride_argb, &kYuvI601Constants, width, height); +} + +// Convert NV21 to ARGB. +LIBYUV_API +int NV21ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return NV21ToARGBMatrix(src_y, src_stride_y, src_vu, src_stride_vu, dst_argb, + dst_stride_argb, &kYuvI601Constants, width, height); +} + +// Convert NV12 to ABGR. +// To output ABGR instead of ARGB swap the UV and use a mirrrored yuc matrix. +// To swap the UV use NV12 instead of NV21.LIBYUV_API +int NV12ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return NV21ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_abgr, + dst_stride_abgr, &kYvuI601Constants, width, height); +} + +// Convert NV21 to ABGR. +LIBYUV_API +int NV21ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return NV12ToARGBMatrix(src_y, src_stride_y, src_vu, src_stride_vu, dst_abgr, + dst_stride_abgr, &kYvuI601Constants, width, height); +} + +// TODO(fbarchard): Consider SSSE3 2 step conversion. +// Convert NV12 to RGB24 with matrix +static int NV12ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*NV12ToRGB24Row)( + const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = NV12ToRGB24Row_C; + if (!src_y || !src_uv || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; + dst_stride_rgb24 = -dst_stride_rgb24; + } +#if defined(HAS_NV12TORGB24ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + NV12ToRGB24Row = NV12ToRGB24Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + NV12ToRGB24Row = NV12ToRGB24Row_NEON; + } + } +#endif +#if defined(HAS_NV12TORGB24ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + NV12ToRGB24Row = NV12ToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + NV12ToRGB24Row = NV12ToRGB24Row_SSSE3; + } + } +#endif +#if defined(HAS_NV12TORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + NV12ToRGB24Row = NV12ToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + NV12ToRGB24Row = NV12ToRGB24Row_AVX2; + } + } +#endif + + for (y = 0; y < height; ++y) { + NV12ToRGB24Row(src_y, src_uv, dst_rgb24, yuvconstants, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; if (y & 1) { src_uv += src_stride_uv; } @@ -1269,19 +1817,109 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y, return 0; } +// Convert NV21 to RGB24 with matrix +static int NV21ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*NV21ToRGB24Row)( + const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = NV21ToRGB24Row_C; + if (!src_y || !src_vu || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; + dst_stride_rgb24 = -dst_stride_rgb24; + } +#if defined(HAS_NV21TORGB24ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + NV21ToRGB24Row = NV21ToRGB24Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + NV21ToRGB24Row = NV21ToRGB24Row_NEON; + } + } +#endif +#if defined(HAS_NV21TORGB24ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + NV21ToRGB24Row = NV21ToRGB24Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + NV21ToRGB24Row = NV21ToRGB24Row_SSSE3; + } + } +#endif +#if defined(HAS_NV21TORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + NV21ToRGB24Row = NV21ToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + NV21ToRGB24Row = NV21ToRGB24Row_AVX2; + } + } +#endif + + for (y = 0; y < height; ++y) { + NV21ToRGB24Row(src_y, src_vu, dst_rgb24, yuvconstants, width); + dst_rgb24 += dst_stride_rgb24; + src_y += src_stride_y; + if (y & 1) { + src_vu += src_stride_vu; + } + } + return 0; +} + +// TODO(fbarchard): NV12ToRAW can be implemented by mirrored matrix. +// Convert NV12 to RGB24. +LIBYUV_API +int NV12ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height) { + return NV12ToRGB24Matrix(src_y, src_stride_y, src_uv, src_stride_uv, + dst_rgb24, dst_stride_rgb24, &kYuvI601Constants, + width, height); +} + +// Convert NV21 to RGB24. +LIBYUV_API +int NV21ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height) { + return NV21ToRGB24Matrix(src_y, src_stride_y, src_vu, src_stride_vu, + dst_rgb24, dst_stride_rgb24, &kYuvI601Constants, + width, height); +} + // Convert M420 to ARGB. LIBYUV_API -int M420ToARGB(const uint8* src_m420, int src_stride_m420, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int M420ToARGB(const uint8_t* src_m420, + int src_stride_m420, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*NV12ToARGBRow)(const uint8* y_buf, - const uint8* uv_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = NV12ToARGBRow_C; - if (!src_m420 || !dst_argb || - width <= 0 || height == 0) { + void (*NV12ToARGBRow)( + const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C; + if (!src_m420 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1314,6 +1952,14 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420, } } #endif +#if defined(HAS_NV12TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + NV12ToARGBRow = NV12ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + NV12ToARGBRow = NV12ToARGBRow_MSA; + } + } +#endif for (y = 0; y < height - 1; y += 2) { NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, @@ -1332,17 +1978,17 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420, // Convert YUY2 to ARGB. LIBYUV_API -int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int YUY2ToARGB(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*YUY2ToARGBRow)(const uint8* src_yuy2, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) = + void (*YUY2ToARGBRow)(const uint8_t* src_yuy2, uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, int width) = YUY2ToARGBRow_C; - if (!src_yuy2 || !dst_argb || - width <= 0 || height == 0) { + if (!src_yuy2 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1352,8 +1998,7 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, src_stride_yuy2 = -src_stride_yuy2; } // Coalesce rows. - if (src_stride_yuy2 == width * 2 && - dst_stride_argb == width * 4) { + if (src_stride_yuy2 == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_yuy2 = dst_stride_argb = 0; @@ -1381,6 +2026,14 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, YUY2ToARGBRow = YUY2ToARGBRow_NEON; } } +#endif +#if defined(HAS_YUY2TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + YUY2ToARGBRow = YUY2ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + YUY2ToARGBRow = YUY2ToARGBRow_MSA; + } + } #endif for (y = 0; y < height; ++y) { YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvI601Constants, width); @@ -1392,17 +2045,17 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, // Convert UYVY to ARGB. LIBYUV_API -int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int UYVYToARGB(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*UYVYToARGBRow)(const uint8* src_uyvy, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) = + void (*UYVYToARGBRow)(const uint8_t* src_uyvy, uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, int width) = UYVYToARGBRow_C; - if (!src_uyvy || !dst_argb || - width <= 0 || height == 0) { + if (!src_uyvy || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1412,8 +2065,7 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, src_stride_uyvy = -src_stride_uyvy; } // Coalesce rows. - if (src_stride_uyvy == width * 2 && - dst_stride_argb == width * 4) { + if (src_stride_uyvy == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_uyvy = dst_stride_argb = 0; @@ -1441,6 +2093,14 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, UYVYToARGBRow = UYVYToARGBRow_NEON; } } +#endif +#if defined(HAS_UYVYTOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + UYVYToARGBRow = UYVYToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + UYVYToARGBRow = UYVYToARGBRow_MSA; + } + } #endif for (y = 0; y < height; ++y) { UYVYToARGBRow(src_uyvy, dst_argb, &kYuvI601Constants, width); @@ -1449,6 +2109,121 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, } return 0; } +static void WeavePixels(const uint8_t* src_u, + const uint8_t* src_v, + int src_pixel_stride_uv, + uint8_t* dst_uv, + int width) { + int i; + for (i = 0; i < width; ++i) { + dst_uv[0] = *src_u; + dst_uv[1] = *src_v; + dst_uv += 2; + src_u += src_pixel_stride_uv; + src_v += src_pixel_stride_uv; + } +} + +// Convert Android420 to ARGB. +LIBYUV_API +int Android420ToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + uint8_t* dst_uv; + const ptrdiff_t vu_off = src_v - src_u; + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } + + // I420 + if (src_pixel_stride_uv == 1) { + return I420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_argb, dst_stride_argb, + yuvconstants, width, height); + // NV21 + } + if (src_pixel_stride_uv == 2 && vu_off == -1 && + src_stride_u == src_stride_v) { + return NV21ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, dst_argb, + dst_stride_argb, yuvconstants, width, height); + // NV12 + } + if (src_pixel_stride_uv == 2 && vu_off == 1 && src_stride_u == src_stride_v) { + return NV12ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, dst_argb, + dst_stride_argb, yuvconstants, width, height); + } + + // General case fallback creates NV12 + align_buffer_64(plane_uv, halfwidth * 2 * halfheight); + dst_uv = plane_uv; + for (y = 0; y < halfheight; ++y) { + WeavePixels(src_u, src_v, src_pixel_stride_uv, dst_uv, halfwidth); + src_u += src_stride_u; + src_v += src_stride_v; + dst_uv += halfwidth * 2; + } + NV12ToARGBMatrix(src_y, src_stride_y, plane_uv, halfwidth * 2, dst_argb, + dst_stride_argb, yuvconstants, width, height); + free_aligned_buffer_64(plane_uv); + return 0; +} + +// Convert Android420 to ARGB. +LIBYUV_API +int Android420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + return Android420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, src_pixel_stride_uv, dst_argb, + dst_stride_argb, &kYuvI601Constants, width, + height); +} + +// Convert Android420 to ABGR. +LIBYUV_API +int Android420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height) { + return Android420ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, src_u, + src_stride_u, src_pixel_stride_uv, dst_abgr, + dst_stride_abgr, &kYvuI601Constants, width, + height); +} #ifdef __cplusplus } // extern "C" diff --git a/media/libvpx/libvpx/third_party/libyuv/source/convert_from.cc b/media/libvpx/libvpx/third_party/libyuv/source/convert_from.cc index 3b2dca8163a1..6fa253237ee6 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/convert_from.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/convert_from.cc @@ -15,9 +15,9 @@ #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" +#include "libyuv/row.h" #include "libyuv/scale.h" // For ScalePlane() #include "libyuv/video_common.h" -#include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { @@ -30,109 +30,144 @@ static __inline int Abs(int v) { } // I420 To any I4xx YUV format with mirroring. -static int I420ToI4xx(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int src_y_width, int src_y_height, - int dst_uv_width, int dst_uv_height) { +static int I420ToI4xx(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int src_y_width, + int src_y_height, + int dst_uv_width, + int dst_uv_height) { const int dst_y_width = Abs(src_y_width); const int dst_y_height = Abs(src_y_height); const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1); const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1); - if (src_y_width == 0 || src_y_height == 0 || - dst_uv_width <= 0 || dst_uv_height <= 0) { + if (src_y_width == 0 || src_y_height == 0 || dst_uv_width <= 0 || + dst_uv_height <= 0) { return -1; } if (dst_y) { - ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, - dst_y, dst_stride_y, dst_y_width, dst_y_height, - kFilterBilinear); + ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, + dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); } - ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, - dst_u, dst_stride_u, dst_uv_width, dst_uv_height, - kFilterBilinear); - ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, - dst_v, dst_stride_v, dst_uv_width, dst_uv_height, - kFilterBilinear); + ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, + dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); + ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, + dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); + return 0; +} + +// Convert 8 bit YUV to 10 bit. +LIBYUV_API +int I420ToI010(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + // Convert Y plane. + Convert8To16Plane(src_y, src_stride_y, dst_y, dst_stride_y, 1024, width, + height); + // Convert UV planes. + Convert8To16Plane(src_u, src_stride_u, dst_u, dst_stride_u, 1024, halfwidth, + halfheight); + Convert8To16Plane(src_v, src_stride_v, dst_v, dst_stride_v, 1024, halfwidth, + halfheight); return 0; } // 420 chroma is 1/2 width, 1/2 height // 422 chroma is 1/2 width, 1x height LIBYUV_API -int I420ToI422(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int I420ToI422(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { const int dst_uv_width = (Abs(width) + 1) >> 1; const int dst_uv_height = Abs(height); - return I420ToI4xx(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - dst_uv_width, dst_uv_height); + return I420ToI4xx(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height, dst_uv_width, + dst_uv_height); } // 420 chroma is 1/2 width, 1/2 height // 444 chroma is 1x width, 1x height LIBYUV_API -int I420ToI444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int I420ToI444(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { const int dst_uv_width = Abs(width); const int dst_uv_height = Abs(height); - return I420ToI4xx(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - dst_uv_width, dst_uv_height); -} - -// 420 chroma is 1/2 width, 1/2 height -// 411 chroma is 1/4 width, 1x height -LIBYUV_API -int I420ToI411(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - const int dst_uv_width = (Abs(width) + 3) >> 2; - const int dst_uv_height = Abs(height); - return I420ToI4xx(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - dst_uv_width, dst_uv_height); + return I420ToI4xx(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height, dst_uv_width, + dst_uv_height); } // Copy to I400. Source can be I420,422,444,400,NV12,NV21 LIBYUV_API -int I400Copy(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height) { - if (!src_y || !dst_y || - width <= 0 || height == 0) { +int I400Copy(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height) { + if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -146,17 +181,21 @@ int I400Copy(const uint8* src_y, int src_stride_y, } LIBYUV_API -int I422ToYUY2(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_yuy2, int dst_stride_yuy2, - int width, int height) { +int I422ToYUY2(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height) { int y; - void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_yuy2, int width) = + void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, + const uint8_t* src_v, uint8_t* dst_yuy2, int width) = I422ToYUY2Row_C; - if (!src_y || !src_u || !src_v || !dst_yuy2 || - width <= 0 || height == 0) { + if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -166,10 +205,8 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y, dst_stride_yuy2 = -dst_stride_yuy2; } // Coalesce rows. - if (src_stride_y == width && - src_stride_u * 2 == width && - src_stride_v * 2 == width && - dst_stride_yuy2 == width * 2) { + if (src_stride_y == width && src_stride_u * 2 == width && + src_stride_v * 2 == width && dst_stride_yuy2 == width * 2) { width *= height; height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0; @@ -182,6 +219,14 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I422TOYUY2ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToYUY2Row = I422ToYUY2Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToYUY2Row = I422ToYUY2Row_AVX2; + } + } +#endif #if defined(HAS_I422TOYUY2ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToYUY2Row = I422ToYUY2Row_Any_NEON; @@ -202,17 +247,21 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y, } LIBYUV_API -int I420ToYUY2(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_yuy2, int dst_stride_yuy2, - int width, int height) { +int I420ToYUY2(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height) { int y; - void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_yuy2, int width) = + void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, + const uint8_t* src_v, uint8_t* dst_yuy2, int width) = I422ToYUY2Row_C; - if (!src_y || !src_u || !src_v || !dst_yuy2 || - width <= 0 || height == 0) { + if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -229,6 +278,14 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I422TOYUY2ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToYUY2Row = I422ToYUY2Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToYUY2Row = I422ToYUY2Row_AVX2; + } + } +#endif #if defined(HAS_I422TOYUY2ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToYUY2Row = I422ToYUY2Row_Any_NEON; @@ -237,6 +294,14 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I422TOYUY2ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToYUY2Row = I422ToYUY2Row_Any_MSA; + if (IS_ALIGNED(width, 32)) { + I422ToYUY2Row = I422ToYUY2Row_MSA; + } + } +#endif for (y = 0; y < height - 1; y += 2) { I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width); @@ -254,17 +319,21 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y, } LIBYUV_API -int I422ToUYVY(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_uyvy, int dst_stride_uyvy, - int width, int height) { +int I422ToUYVY(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uyvy, + int dst_stride_uyvy, + int width, + int height) { int y; - void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_uyvy, int width) = + void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u, + const uint8_t* src_v, uint8_t* dst_uyvy, int width) = I422ToUYVYRow_C; - if (!src_y || !src_u || !src_v || !dst_uyvy || - width <= 0 || height == 0) { + if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -274,10 +343,8 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y, dst_stride_uyvy = -dst_stride_uyvy; } // Coalesce rows. - if (src_stride_y == width && - src_stride_u * 2 == width && - src_stride_v * 2 == width && - dst_stride_uyvy == width * 2) { + if (src_stride_y == width && src_stride_u * 2 == width && + src_stride_v * 2 == width && dst_stride_uyvy == width * 2) { width *= height; height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0; @@ -290,6 +357,14 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I422TOUYVYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToUYVYRow = I422ToUYVYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToUYVYRow = I422ToUYVYRow_AVX2; + } + } +#endif #if defined(HAS_I422TOUYVYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToUYVYRow = I422ToUYVYRow_Any_NEON; @@ -298,6 +373,14 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I422TOUYVYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToUYVYRow = I422ToUYVYRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + I422ToUYVYRow = I422ToUYVYRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); @@ -310,17 +393,21 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y, } LIBYUV_API -int I420ToUYVY(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_uyvy, int dst_stride_uyvy, - int width, int height) { +int I420ToUYVY(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uyvy, + int dst_stride_uyvy, + int width, + int height) { int y; - void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_uyvy, int width) = + void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u, + const uint8_t* src_v, uint8_t* dst_uyvy, int width) = I422ToUYVYRow_C; - if (!src_y || !src_u || !src_v || !dst_uyvy || - width <= 0 || height == 0) { + if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -337,6 +424,14 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I422TOUYVYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToUYVYRow = I422ToUYVYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToUYVYRow = I422ToUYVYRow_AVX2; + } + } +#endif #if defined(HAS_I422TOUYVYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToUYVYRow = I422ToUYVYRow_Any_NEON; @@ -345,6 +440,14 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I422TOUYVYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToUYVYRow = I422ToUYVYRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + I422ToUYVYRow = I422ToUYVYRow_MSA; + } + } +#endif for (y = 0; y < height - 1; y += 2) { I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); @@ -363,14 +466,20 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y, // TODO(fbarchard): test negative height for invert. LIBYUV_API -int I420ToNV12(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_y || !dst_uv || - width <= 0 || height == 0) { +int I420ToNV12(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + if (!src_y || !src_u || !src_v || !dst_y || !dst_uv || width <= 0 || + height == 0) { return -1; } int halfwidth = (width + 1) / 2; @@ -378,44 +487,47 @@ int I420ToNV12(const uint8* src_y, int src_stride_y, if (dst_y) { CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } - MergeUVPlane(src_u, src_stride_u, - src_v, src_stride_v, - dst_uv, dst_stride_uv, + MergeUVPlane(src_u, src_stride_u, src_v, src_stride_v, dst_uv, dst_stride_uv, halfwidth, halfheight); return 0; } LIBYUV_API -int I420ToNV21(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height) { - return I420ToNV12(src_y, src_stride_y, - src_v, src_stride_v, - src_u, src_stride_u, - dst_y, dst_stride_y, - dst_vu, dst_stride_vu, +int I420ToNV21(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + return I420ToNV12(src_y, src_stride_y, src_v, src_stride_v, src_u, + src_stride_u, dst_y, dst_stride_y, dst_vu, dst_stride_vu, width, height); } // Convert I422 to RGBA with matrix -static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgba, int dst_stride_rgba, +static int I420ToRGBAMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgba, + int dst_stride_rgba, const struct YuvConstants* yuvconstants, - int width, int height) { + int width, + int height) { int y; - void (*I422ToRGBARow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = I422ToRGBARow_C; - if (!src_y || !src_u || !src_v || !dst_rgba || - width <= 0 || height == 0) { + void (*I422ToRGBARow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I422ToRGBARow_C; + if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -448,13 +560,12 @@ static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y, } } #endif -#if defined(HAS_I422TORGBAROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) { - I422ToRGBARow = I422ToRGBARow_DSPR2; +#if defined(HAS_I422TORGBAROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToRGBARow = I422ToRGBARow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I422ToRGBARow = I422ToRGBARow_MSA; + } } #endif @@ -472,50 +583,58 @@ static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y, // Convert I420 to RGBA. LIBYUV_API -int I420ToRGBA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height) { - return I420ToRGBAMatrix(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_rgba, dst_stride_rgba, - &kYuvI601Constants, - width, height); +int I420ToRGBA(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height) { + return I420ToRGBAMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgba, dst_stride_rgba, + &kYuvI601Constants, width, height); } // Convert I420 to BGRA. LIBYUV_API -int I420ToBGRA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_bgra, int dst_stride_bgra, - int width, int height) { - return I420ToRGBAMatrix(src_y, src_stride_y, - src_v, src_stride_v, // Swap U and V - src_u, src_stride_u, - dst_bgra, dst_stride_bgra, +int I420ToBGRA(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_bgra, + int dst_stride_bgra, + int width, + int height) { + return I420ToRGBAMatrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_bgra, dst_stride_bgra, &kYvuI601Constants, // Use Yvu matrix width, height); } // Convert I420 to RGB24 with matrix -static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgb24, int dst_stride_rgb24, +static int I420ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, const struct YuvConstants* yuvconstants, - int width, int height) { + int width, + int height) { int y; - void (*I422ToRGB24Row)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = I422ToRGB24Row_C; - if (!src_y || !src_u || !src_v || !dst_rgb24 || - width <= 0 || height == 0) { + void (*I422ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I422ToRGB24Row_C; + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -548,6 +667,14 @@ static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I422TORGB24ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToRGB24Row = I422ToRGB24Row_Any_MSA; + if (IS_ALIGNED(width, 16)) { + I422ToRGB24Row = I422ToRGB24Row_MSA; + } + } +#endif for (y = 0; y < height; ++y) { I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width); @@ -563,50 +690,95 @@ static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y, // Convert I420 to RGB24. LIBYUV_API -int I420ToRGB24(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgb24, int dst_stride_rgb24, - int width, int height) { - return I420ToRGB24Matrix(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_rgb24, dst_stride_rgb24, - &kYuvI601Constants, - width, height); +int I420ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height) { + return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgb24, dst_stride_rgb24, + &kYuvI601Constants, width, height); } // Convert I420 to RAW. LIBYUV_API -int I420ToRAW(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_raw, int dst_stride_raw, - int width, int height) { - return I420ToRGB24Matrix(src_y, src_stride_y, - src_v, src_stride_v, // Swap U and V - src_u, src_stride_u, - dst_raw, dst_stride_raw, +int I420ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height) { + return I420ToRGB24Matrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_raw, dst_stride_raw, &kYvuI601Constants, // Use Yvu matrix width, height); } +// Convert H420 to RGB24. +LIBYUV_API +int H420ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height) { + return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgb24, dst_stride_rgb24, + &kYuvH709Constants, width, height); +} + +// Convert H420 to RAW. +LIBYUV_API +int H420ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height) { + return I420ToRGB24Matrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_raw, dst_stride_raw, + &kYvuH709Constants, // Use Yvu matrix + width, height); +} + // Convert I420 to ARGB1555. LIBYUV_API -int I420ToARGB1555(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb1555, int dst_stride_argb1555, - int width, int height) { +int I420ToARGB1555(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb1555, + int dst_stride_argb1555, + int width, + int height) { int y; - void (*I422ToARGB1555Row)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, + void (*I422ToARGB1555Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGB1555Row_C; - if (!src_y || !src_u || !src_v || !dst_argb1555 || - width <= 0 || height == 0) { + if (!src_y || !src_u || !src_v || !dst_argb1555 || width <= 0 || + height == 0) { return -1; } // Negative height means invert the image. @@ -639,6 +811,14 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I422TOARGB1555ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I422ToARGB1555Row = I422ToARGB1555Row_MSA; + } + } +#endif for (y = 0; y < height; ++y) { I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvI601Constants, @@ -653,23 +833,25 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y, return 0; } - // Convert I420 to ARGB4444. LIBYUV_API -int I420ToARGB4444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb4444, int dst_stride_argb4444, - int width, int height) { +int I420ToARGB4444(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb4444, + int dst_stride_argb4444, + int width, + int height) { int y; - void (*I422ToARGB4444Row)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, + void (*I422ToARGB4444Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGB4444Row_C; - if (!src_y || !src_u || !src_v || !dst_argb4444 || - width <= 0 || height == 0) { + if (!src_y || !src_u || !src_v || !dst_argb4444 || width <= 0 || + height == 0) { return -1; } // Negative height means invert the image. @@ -702,6 +884,14 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I422TOARGB4444ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I422ToARGB4444Row = I422ToARGB4444Row_MSA; + } + } +#endif for (y = 0; y < height; ++y) { I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvI601Constants, @@ -718,20 +908,22 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y, // Convert I420 to RGB565. LIBYUV_API -int I420ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height) { +int I420ToRGB565(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height) { int y; - void (*I422ToRGB565Row)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = I422ToRGB565Row_C; - if (!src_y || !src_u || !src_v || !dst_rgb565 || - width <= 0 || height == 0) { + void (*I422ToRGB565Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I422ToRGB565Row_C; + if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -764,6 +956,14 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_I422TORGB565ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToRGB565Row = I422ToRGB565Row_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I422ToRGB565Row = I422ToRGB565Row_MSA; + } + } +#endif for (y = 0; y < height; ++y) { I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width); @@ -777,32 +977,102 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, return 0; } +// Convert I422 to RGB565. +LIBYUV_API +int I422ToRGB565(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height) { + int y; + void (*I422ToRGB565Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I422ToRGB565Row_C; + if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; + dst_stride_rgb565 = -dst_stride_rgb565; + } +#if defined(HAS_I422TORGB565ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I422ToRGB565Row = I422ToRGB565Row_SSSE3; + } + } +#endif +#if defined(HAS_I422TORGB565ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToRGB565Row = I422ToRGB565Row_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I422ToRGB565Row = I422ToRGB565Row_AVX2; + } + } +#endif +#if defined(HAS_I422TORGB565ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I422ToRGB565Row = I422ToRGB565Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I422ToRGB565Row = I422ToRGB565Row_NEON; + } + } +#endif +#if defined(HAS_I422TORGB565ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToRGB565Row = I422ToRGB565Row_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I422ToRGB565Row = I422ToRGB565Row_MSA; + } + } +#endif + + for (y = 0; y < height; ++y) { + I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width); + dst_rgb565 += dst_stride_rgb565; + src_y += src_stride_y; + src_u += src_stride_u; + src_v += src_stride_v; + } + return 0; +} + // Ordered 8x8 dither for 888 to 565. Values from 0 to 7. -static const uint8 kDither565_4x4[16] = { - 0, 4, 1, 5, - 6, 2, 7, 3, - 1, 5, 0, 4, - 7, 3, 6, 2, +static const uint8_t kDither565_4x4[16] = { + 0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2, }; // Convert I420 to RGB565 with dithering. LIBYUV_API -int I420ToRGB565Dither(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgb565, int dst_stride_rgb565, - const uint8* dither4x4, int width, int height) { +int I420ToRGB565Dither(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + const uint8_t* dither4x4, + int width, + int height) { int y; - void (*I422ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = I422ToARGBRow_C; - void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width) = ARGBToRGB565DitherRow_C; - if (!src_y || !src_u || !src_v || !dst_rgb565 || - width <= 0 || height == 0) { + void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I422ToARGBRow_C; + void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb, + const uint32_t dither4, int width) = + ARGBToRGB565DitherRow_C; + if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -838,12 +1108,12 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y, } } #endif -#if defined(HAS_I422TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) { - I422ToARGBRow = I422ToARGBRow_DSPR2; +#if defined(HAS_I422TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToARGBRow = I422ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I422ToARGBRow = I422ToARGBRow_MSA; + } } #endif #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) @@ -869,6 +1139,14 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y, ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON; } } +#endif +#if defined(HAS_ARGBTORGB565DITHERROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_MSA; + } + } #endif { // Allocate a row of argb. @@ -876,7 +1154,8 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y, for (y = 0; y < height; ++y) { I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width); ARGBToRGB565DitherRow(row_argb, dst_rgb565, - *(uint32*)(dither4x4 + ((y & 3) << 2)), width); + *(const uint32_t*)(dither4x4 + ((y & 3) << 2)), + width); dst_rgb565 += dst_stride_rgb565; src_y += src_stride_y; if (y & 1) { @@ -889,220 +1168,254 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y, return 0; } +// Convert I420 to AR30 with matrix +static int I420ToAR30Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height) { + int y; + void (*I422ToAR30Row)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I422ToAR30Row_C; + + if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; + dst_stride_ar30 = -dst_stride_ar30; + } + +#if defined(HAS_I422TOAR30ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I422ToAR30Row = I422ToAR30Row_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I422ToAR30Row = I422ToAR30Row_SSSE3; + } + } +#endif +#if defined(HAS_I422TOAR30ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToAR30Row = I422ToAR30Row_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I422ToAR30Row = I422ToAR30Row_AVX2; + } + } +#endif + + for (y = 0; y < height; ++y) { + I422ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width); + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + return 0; +} + +// Convert I420 to AR30. +LIBYUV_API +int I420ToAR30(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height) { + return I420ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_ar30, dst_stride_ar30, + &kYuvI601Constants, width, height); +} + +// Convert H420 to AR30. +LIBYUV_API +int H420ToAR30(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height) { + return I420ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_ar30, dst_stride_ar30, + &kYvuH709Constants, width, height); +} + // Convert I420 to specified format LIBYUV_API -int ConvertFromI420(const uint8* y, int y_stride, - const uint8* u, int u_stride, - const uint8* v, int v_stride, - uint8* dst_sample, int dst_sample_stride, - int width, int height, - uint32 fourcc) { - uint32 format = CanonicalFourCC(fourcc); +int ConvertFromI420(const uint8_t* y, + int y_stride, + const uint8_t* u, + int u_stride, + const uint8_t* v, + int v_stride, + uint8_t* dst_sample, + int dst_sample_stride, + int width, + int height, + uint32_t fourcc) { + uint32_t format = CanonicalFourCC(fourcc); int r = 0; - if (!y || !u|| !v || !dst_sample || - width <= 0 || height == 0) { + if (!y || !u || !v || !dst_sample || width <= 0 || height == 0) { return -1; } switch (format) { // Single plane formats case FOURCC_YUY2: - r = I420ToYUY2(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 2, - width, height); + r = I420ToYUY2(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride ? dst_sample_stride : width * 2, width, + height); break; case FOURCC_UYVY: - r = I420ToUYVY(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 2, - width, height); + r = I420ToUYVY(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride ? dst_sample_stride : width * 2, width, + height); break; case FOURCC_RGBP: - r = I420ToRGB565(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 2, - width, height); + r = I420ToRGB565(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride ? dst_sample_stride : width * 2, width, + height); break; case FOURCC_RGBO: - r = I420ToARGB1555(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, + r = I420ToARGB1555(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width * 2, width, height); break; case FOURCC_R444: - r = I420ToARGB4444(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, + r = I420ToARGB4444(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width * 2, width, height); break; case FOURCC_24BG: - r = I420ToRGB24(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 3, - width, height); + r = I420ToRGB24(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride ? dst_sample_stride : width * 3, width, + height); break; case FOURCC_RAW: - r = I420ToRAW(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 3, - width, height); + r = I420ToRAW(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride ? dst_sample_stride : width * 3, width, + height); break; case FOURCC_ARGB: - r = I420ToARGB(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 4, - width, height); + r = I420ToARGB(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride ? dst_sample_stride : width * 4, width, + height); break; case FOURCC_BGRA: - r = I420ToBGRA(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 4, - width, height); + r = I420ToBGRA(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride ? dst_sample_stride : width * 4, width, + height); break; case FOURCC_ABGR: - r = I420ToABGR(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 4, - width, height); + r = I420ToABGR(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride ? dst_sample_stride : width * 4, width, + height); break; case FOURCC_RGBA: - r = I420ToRGBA(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 4, - width, height); + r = I420ToRGBA(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride ? dst_sample_stride : width * 4, width, + height); + break; + case FOURCC_AR30: + r = I420ToAR30(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride ? dst_sample_stride : width * 4, width, + height); break; case FOURCC_I400: - r = I400Copy(y, y_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - width, height); + r = I400Copy(y, y_stride, dst_sample, + dst_sample_stride ? dst_sample_stride : width, width, + height); break; case FOURCC_NV12: { - uint8* dst_uv = dst_sample + width * height; - r = I420ToNV12(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - dst_uv, - dst_sample_stride ? dst_sample_stride : width, - width, height); + uint8_t* dst_uv = dst_sample + width * height; + r = I420ToNV12(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride ? dst_sample_stride : width, dst_uv, + dst_sample_stride ? dst_sample_stride : width, width, + height); break; } case FOURCC_NV21: { - uint8* dst_vu = dst_sample + width * height; - r = I420ToNV21(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - dst_vu, - dst_sample_stride ? dst_sample_stride : width, - width, height); + uint8_t* dst_vu = dst_sample + width * height; + r = I420ToNV21(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride ? dst_sample_stride : width, dst_vu, + dst_sample_stride ? dst_sample_stride : width, width, + height); break; } // TODO(fbarchard): Add M420. // Triplanar formats - // TODO(fbarchard): halfstride instead of halfwidth case FOURCC_I420: case FOURCC_YV12: { - int halfwidth = (width + 1) / 2; + dst_sample_stride = dst_sample_stride ? dst_sample_stride : width; + int halfstride = (dst_sample_stride + 1) / 2; int halfheight = (height + 1) / 2; - uint8* dst_u; - uint8* dst_v; + uint8_t* dst_u; + uint8_t* dst_v; if (format == FOURCC_YV12) { - dst_v = dst_sample + width * height; - dst_u = dst_v + halfwidth * halfheight; + dst_v = dst_sample + dst_sample_stride * height; + dst_u = dst_v + halfstride * halfheight; } else { - dst_u = dst_sample + width * height; - dst_v = dst_u + halfwidth * halfheight; + dst_u = dst_sample + dst_sample_stride * height; + dst_v = dst_u + halfstride * halfheight; } - r = I420Copy(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, width, - dst_u, halfwidth, - dst_v, halfwidth, + r = I420Copy(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride, dst_u, halfstride, dst_v, halfstride, width, height); break; } case FOURCC_I422: case FOURCC_YV16: { - int halfwidth = (width + 1) / 2; - uint8* dst_u; - uint8* dst_v; + dst_sample_stride = dst_sample_stride ? dst_sample_stride : width; + int halfstride = (dst_sample_stride + 1) / 2; + uint8_t* dst_u; + uint8_t* dst_v; if (format == FOURCC_YV16) { - dst_v = dst_sample + width * height; - dst_u = dst_v + halfwidth * height; + dst_v = dst_sample + dst_sample_stride * height; + dst_u = dst_v + halfstride * height; } else { - dst_u = dst_sample + width * height; - dst_v = dst_u + halfwidth * height; + dst_u = dst_sample + dst_sample_stride * height; + dst_v = dst_u + halfstride * height; } - r = I420ToI422(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, width, - dst_u, halfwidth, - dst_v, halfwidth, + r = I420ToI422(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride, dst_u, halfstride, dst_v, halfstride, width, height); break; } case FOURCC_I444: case FOURCC_YV24: { - uint8* dst_u; - uint8* dst_v; + dst_sample_stride = dst_sample_stride ? dst_sample_stride : width; + uint8_t* dst_u; + uint8_t* dst_v; if (format == FOURCC_YV24) { - dst_v = dst_sample + width * height; - dst_u = dst_v + width * height; + dst_v = dst_sample + dst_sample_stride * height; + dst_u = dst_v + dst_sample_stride * height; } else { - dst_u = dst_sample + width * height; - dst_v = dst_u + width * height; + dst_u = dst_sample + dst_sample_stride * height; + dst_v = dst_u + dst_sample_stride * height; } - r = I420ToI444(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, width, - dst_u, width, - dst_v, width, - width, height); + r = I420ToI444(y, y_stride, u, u_stride, v, v_stride, dst_sample, + dst_sample_stride, dst_u, dst_sample_stride, dst_v, + dst_sample_stride, width, height); break; } - case FOURCC_I411: { - int quarterwidth = (width + 3) / 4; - uint8* dst_u = dst_sample + width * height; - uint8* dst_v = dst_u + quarterwidth * height; - r = I420ToI411(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, width, - dst_u, quarterwidth, - dst_v, quarterwidth, - width, height); - break; - } - // Formats not supported - MJPG, biplanar, some rgb formats. default: return -1; // unknown fourcc - return failure code. diff --git a/media/libvpx/libvpx/third_party/libyuv/source/convert_from_argb.cc b/media/libvpx/libvpx/third_party/libyuv/source/convert_from_argb.cc index 2a8682b7eb42..c8d91252e9b5 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/convert_from_argb.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/convert_from_argb.cc @@ -22,16 +22,21 @@ extern "C" { // ARGB little endian (bgra in memory) to I444 LIBYUV_API -int ARGBToI444(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int ARGBToI444(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; - void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) = ARGBToUV444Row_C; + void (*ARGBToUV444Row)(const uint8_t* src_argb, uint8_t* dst_u, + uint8_t* dst_v, int width) = ARGBToUV444Row_C; if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } @@ -41,20 +46,18 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_y == width && - dst_stride_u == width && - dst_stride_v == width) { + if (src_stride_argb == width * 4 && dst_stride_y == width && + dst_stride_u == width && dst_stride_v == width) { width *= height; height = 1; src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; } #if defined(HAS_ARGBTOUV444ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUV444Row = ARGBToUV444Row_SSSE3; - } + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUV444Row = ARGBToUV444Row_SSSE3; + } } #endif #if defined(HAS_ARGBTOUV444ROW_NEON) @@ -65,6 +68,14 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOUV444ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToUV444Row = ARGBToUV444Row_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToUV444Row = ARGBToUV444Row_MSA; + } + } +#endif #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYRow = ARGBToYRow_Any_SSSE3; @@ -89,6 +100,14 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYRow = ARGBToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { ARGBToUV444Row(src_argb, dst_u, dst_v, width); @@ -103,19 +122,23 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb, // ARGB little endian (bgra in memory) to I422 LIBYUV_API -int ARGBToI422(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int ARGBToI422(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; - if (!src_argb || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -125,10 +148,8 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_y == width && - dst_stride_u * 2 == width && - dst_stride_v * 2 == width) { + if (src_stride_argb == width * 4 && dst_stride_y == width && + dst_stride_u * 2 == width && dst_stride_v * 2 == width) { width *= height; height = 1; src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; @@ -170,6 +191,23 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, } #endif +#if defined(HAS_ARGBTOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYRow = ARGBToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_MSA; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToUVRow = ARGBToUVRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_MSA; + } + } +#endif + for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); ARGBToYRow(src_argb, dst_y, width); @@ -181,95 +219,25 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, return 0; } -// ARGB little endian (bgra in memory) to I411 LIBYUV_API -int ARGBToI411(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - int y; - void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) = ARGBToUV411Row_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = - ARGBToYRow_C; - if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_y == width && - dst_stride_u * 4 == width && - dst_stride_v * 4 == width) { - width *= height; - height = 1; - src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; - } -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYRow = ARGBToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYRow = ARGBToYRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUV411ROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUV411Row = ARGBToUV411Row_Any_NEON; - if (IS_ALIGNED(width, 32)) { - ARGBToUV411Row = ARGBToUV411Row_NEON; - } - } -#endif - - for (y = 0; y < height; ++y) { - ARGBToUV411Row(src_argb, dst_u, dst_v, width); - ARGBToYRow(src_argb, dst_y, width); - src_argb += src_stride_argb; - dst_y += dst_stride_y; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - return 0; -} - -LIBYUV_API -int ARGBToNV12(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height) { +int ARGBToNV12(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { int y; int halfwidth = (width + 1) >> 1; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; - void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) = MergeUVRow_C; - if (!src_argb || - !dst_y || !dst_uv || - width <= 0 || height == 0) { + void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v, + uint8_t* dst_uv, int width) = MergeUVRow_C; + if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -314,6 +282,22 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYRow = ARGBToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_MSA; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToUVRow = ARGBToUVRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_MSA; + } + } +#endif #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow_ = MergeUVRow_Any_SSE2; @@ -337,11 +321,19 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, MergeUVRow_ = MergeUVRow_NEON; } } +#endif +#if defined(HAS_MERGEUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + MergeUVRow_ = MergeUVRow_Any_MSA; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow_ = MergeUVRow_MSA; + } + } #endif { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); - uint8* row_v = row_u + ((halfwidth + 31) & ~31); + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); @@ -364,21 +356,24 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, // Same as NV12 but U and V swapped. LIBYUV_API -int ARGBToNV21(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height) { +int ARGBToNV21(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { int y; int halfwidth = (width + 1) >> 1; - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; - void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) = MergeUVRow_C; - if (!src_argb || - !dst_y || !dst_uv || - width <= 0 || height == 0) { + void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v, + uint8_t* dst_vu, int width) = MergeUVRow_C; + if (!src_argb || !dst_y || !dst_vu || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -423,6 +418,22 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYRow = ARGBToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_MSA; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToUVRow = ARGBToUVRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_MSA; + } + } +#endif #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow_ = MergeUVRow_Any_SSE2; @@ -446,24 +457,32 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, MergeUVRow_ = MergeUVRow_NEON; } } +#endif +#if defined(HAS_MERGEUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + MergeUVRow_ = MergeUVRow_Any_MSA; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow_ = MergeUVRow_MSA; + } + } #endif { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); - uint8* row_v = row_u + ((halfwidth + 31) & ~31); + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_uv, halfwidth); + MergeUVRow_(row_v, row_u, dst_vu, halfwidth); ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); src_argb += src_stride_argb * 2; dst_y += dst_stride_y * 2; - dst_uv += dst_stride_uv; + dst_vu += dst_stride_vu; } if (height & 1) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_uv, halfwidth); + MergeUVRow_(row_v, row_u, dst_vu, halfwidth); ARGBToYRow(src_argb, dst_y, width); } free_aligned_buffer_64(row_u); @@ -473,19 +492,23 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, // Convert ARGB to YUY2. LIBYUV_API -int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, - uint8* dst_yuy2, int dst_stride_yuy2, - int width, int height) { +int ARGBToYUY2(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height) { int y; - void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; - void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_yuy2, int width) = I422ToYUY2Row_C; + void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, + const uint8_t* src_v, uint8_t* dst_yuy2, int width) = + I422ToYUY2Row_C; - if (!src_argb || !dst_yuy2 || - width <= 0 || height == 0) { + if (!src_argb || !dst_yuy2 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -495,8 +518,7 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, dst_stride_yuy2 = -dst_stride_yuy2; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_yuy2 == width * 2) { + if (src_stride_argb == width * 4 && dst_stride_yuy2 == width * 2) { width *= height; height = 1; src_stride_argb = dst_stride_yuy2 = 0; @@ -537,6 +559,22 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYRow = ARGBToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_MSA; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToUVRow = ARGBToUVRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_MSA; + } + } +#endif #if defined(HAS_I422TOYUY2ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; @@ -545,6 +583,14 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_I422TOYUY2ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToYUY2Row = I422ToYUY2Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToYUY2Row = I422ToYUY2Row_AVX2; + } + } +#endif #if defined(HAS_I422TOYUY2ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToYUY2Row = I422ToYUY2Row_Any_NEON; @@ -553,12 +599,20 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_I422TOYUY2ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToYUY2Row = I422ToYUY2Row_Any_MSA; + if (IS_ALIGNED(width, 32)) { + I422ToYUY2Row = I422ToYUY2Row_MSA; + } + } +#endif { // Allocate a rows of yuv. align_buffer_64(row_y, ((width + 63) & ~63) * 2); - uint8* row_u = row_y + ((width + 63) & ~63); - uint8* row_v = row_u + ((width + 63) & ~63) / 2; + uint8_t* row_u = row_y + ((width + 63) & ~63); + uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); @@ -575,19 +629,23 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, // Convert ARGB to UYVY. LIBYUV_API -int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, - uint8* dst_uyvy, int dst_stride_uyvy, - int width, int height) { +int ARGBToUYVY(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_uyvy, + int dst_stride_uyvy, + int width, + int height) { int y; - void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVRow_C; + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; - void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_uyvy, int width) = I422ToUYVYRow_C; + void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u, + const uint8_t* src_v, uint8_t* dst_uyvy, int width) = + I422ToUYVYRow_C; - if (!src_argb || !dst_uyvy || - width <= 0 || height == 0) { + if (!src_argb || !dst_uyvy || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -597,8 +655,7 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, dst_stride_uyvy = -dst_stride_uyvy; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_uyvy == width * 2) { + if (src_stride_argb == width * 4 && dst_stride_uyvy == width * 2) { width *= height; height = 1; src_stride_argb = dst_stride_uyvy = 0; @@ -639,6 +696,22 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYRow = ARGBToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_MSA; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToUVRow = ARGBToUVRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_MSA; + } + } +#endif #if defined(HAS_I422TOUYVYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; @@ -647,6 +720,14 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_I422TOUYVYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToUYVYRow = I422ToUYVYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + I422ToUYVYRow = I422ToUYVYRow_AVX2; + } + } +#endif #if defined(HAS_I422TOUYVYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToUYVYRow = I422ToUYVYRow_Any_NEON; @@ -655,12 +736,20 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_I422TOUYVYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToUYVYRow = I422ToUYVYRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + I422ToUYVYRow = I422ToUYVYRow_MSA; + } + } +#endif { // Allocate a rows of yuv. align_buffer_64(row_y, ((width + 63) & ~63) * 2); - uint8* row_u = row_y + ((width + 63) & ~63); - uint8* row_v = row_u + ((width + 63) & ~63) / 2; + uint8_t* row_u = row_y + ((width + 63) & ~63); + uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); @@ -677,11 +766,14 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, // Convert ARGB to I400. LIBYUV_API -int ARGBToI400(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - int width, int height) { +int ARGBToI400(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height) { int y; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; if (!src_argb || !dst_y || width <= 0 || height == 0) { return -1; @@ -692,8 +784,7 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_y == width) { + if (src_stride_argb == width * 4 && dst_stride_y == width) { width *= height; height = 1; src_stride_argb = dst_stride_y = 0; @@ -722,6 +813,14 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYRow = ARGBToYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { ARGBToYRow(src_argb, dst_y, width); @@ -732,28 +831,31 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb, } // Shuffle table for converting ARGB to RGBA. -static uvec8 kShuffleMaskARGBToRGBA = { - 3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u -}; +static const uvec8 kShuffleMaskARGBToRGBA = { + 3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u}; // Convert ARGB to RGBA. LIBYUV_API -int ARGBToRGBA(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height) { - return ARGBShuffle(src_argb, src_stride_argb, - dst_rgba, dst_stride_rgba, - (const uint8*)(&kShuffleMaskARGBToRGBA), - width, height); +int ARGBToRGBA(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height) { + return ARGBShuffle(src_argb, src_stride_argb, dst_rgba, dst_stride_rgba, + (const uint8_t*)(&kShuffleMaskARGBToRGBA), width, height); } // Convert ARGB To RGB24. LIBYUV_API -int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb24, int dst_stride_rgb24, - int width, int height) { +int ARGBToRGB24(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height) { int y; - void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int width) = + void (*ARGBToRGB24Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToRGB24Row_C; if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) { return -1; @@ -764,8 +866,7 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_rgb24 == width * 3) { + if (src_stride_argb == width * 4 && dst_stride_rgb24 == width * 3) { width *= height; height = 1; src_stride_argb = dst_stride_rgb24 = 0; @@ -778,6 +879,22 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTORGB24ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToRGB24Row = ARGBToRGB24Row_AVX2; + } + } +#endif +#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI) + if (TestCpuFlag(kCpuHasAVX512VBMI)) { + ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX512VBMI; + if (IS_ALIGNED(width, 32)) { + ARGBToRGB24Row = ARGBToRGB24Row_AVX512VBMI; + } + } +#endif #if defined(HAS_ARGBTORGB24ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON; @@ -786,6 +903,14 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTORGB24ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToRGB24Row = ARGBToRGB24Row_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToRGB24Row = ARGBToRGB24Row_MSA; + } + } +#endif for (y = 0; y < height; ++y) { ARGBToRGB24Row(src_argb, dst_rgb24, width); @@ -797,11 +922,14 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, // Convert ARGB To RAW. LIBYUV_API -int ARGBToRAW(const uint8* src_argb, int src_stride_argb, - uint8* dst_raw, int dst_stride_raw, - int width, int height) { +int ARGBToRAW(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height) { int y; - void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int width) = + void (*ARGBToRAWRow)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToRAWRow_C; if (!src_argb || !dst_raw || width <= 0 || height == 0) { return -1; @@ -812,8 +940,7 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_raw == width * 3) { + if (src_stride_argb == width * 4 && dst_stride_raw == width * 3) { width *= height; height = 1; src_stride_argb = dst_stride_raw = 0; @@ -826,6 +953,14 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTORAWROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToRAWRow = ARGBToRAWRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToRAWRow = ARGBToRAWRow_AVX2; + } + } +#endif #if defined(HAS_ARGBTORAWROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToRAWRow = ARGBToRAWRow_Any_NEON; @@ -834,6 +969,14 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTORAWROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToRAWRow = ARGBToRAWRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToRAWRow = ARGBToRAWRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { ARGBToRAWRow(src_argb, dst_raw, width); @@ -844,21 +987,23 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb, } // Ordered 8x8 dither for 888 to 565. Values from 0 to 7. -static const uint8 kDither565_4x4[16] = { - 0, 4, 1, 5, - 6, 2, 7, 3, - 1, 5, 0, 4, - 7, 3, 6, 2, +static const uint8_t kDither565_4x4[16] = { + 0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2, }; // Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes). LIBYUV_API -int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb565, int dst_stride_rgb565, - const uint8* dither4x4, int width, int height) { +int ARGBToRGB565Dither(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + const uint8_t* dither4x4, + int width, + int height) { int y; - void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width) = ARGBToRGB565DitherRow_C; + void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb, + const uint32_t dither4, int width) = + ARGBToRGB565DitherRow_C; if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { return -1; } @@ -894,9 +1039,19 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTORGB565DITHERROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_MSA; + } + } +#endif + for (y = 0; y < height; ++y) { ARGBToRGB565DitherRow(src_argb, dst_rgb565, - *(uint32*)(dither4x4 + ((y & 3) << 2)), width); + *(const uint32_t*)(dither4x4 + ((y & 3) << 2)), + width); src_argb += src_stride_argb; dst_rgb565 += dst_stride_rgb565; } @@ -906,12 +1061,15 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, // Convert ARGB To RGB565. // TODO(fbarchard): Consider using dither function low level with zeros. LIBYUV_API -int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height) { +int ARGBToRGB565(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height) { int y; - void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int width) = - ARGBToRGB565Row_C; + void (*ARGBToRGB565Row)(const uint8_t* src_argb, uint8_t* dst_rgb, + int width) = ARGBToRGB565Row_C; if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { return -1; } @@ -921,8 +1079,7 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_rgb565 == width * 2) { + if (src_stride_argb == width * 4 && dst_stride_rgb565 == width * 2) { width *= height; height = 1; src_stride_argb = dst_stride_rgb565 = 0; @@ -951,6 +1108,14 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTORGB565ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToRGB565Row = ARGBToRGB565Row_Any_MSA; + if (IS_ALIGNED(width, 8)) { + ARGBToRGB565Row = ARGBToRGB565Row_MSA; + } + } +#endif for (y = 0; y < height; ++y) { ARGBToRGB565Row(src_argb, dst_rgb565, width); @@ -962,12 +1127,15 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, // Convert ARGB To ARGB1555. LIBYUV_API -int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb1555, int dst_stride_argb1555, - int width, int height) { +int ARGBToARGB1555(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb1555, + int dst_stride_argb1555, + int width, + int height) { int y; - void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int width) = - ARGBToARGB1555Row_C; + void (*ARGBToARGB1555Row)(const uint8_t* src_argb, uint8_t* dst_rgb, + int width) = ARGBToARGB1555Row_C; if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) { return -1; } @@ -977,8 +1145,7 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb1555 == width * 2) { + if (src_stride_argb == width * 4 && dst_stride_argb1555 == width * 2) { width *= height; height = 1; src_stride_argb = dst_stride_argb1555 = 0; @@ -1007,6 +1174,14 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOARGB1555ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToARGB1555Row = ARGBToARGB1555Row_Any_MSA; + if (IS_ALIGNED(width, 8)) { + ARGBToARGB1555Row = ARGBToARGB1555Row_MSA; + } + } +#endif for (y = 0; y < height; ++y) { ARGBToARGB1555Row(src_argb, dst_argb1555, width); @@ -1018,12 +1193,15 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, // Convert ARGB To ARGB4444. LIBYUV_API -int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb4444, int dst_stride_argb4444, - int width, int height) { +int ARGBToARGB4444(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb4444, + int dst_stride_argb4444, + int width, + int height) { int y; - void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int width) = - ARGBToARGB4444Row_C; + void (*ARGBToARGB4444Row)(const uint8_t* src_argb, uint8_t* dst_rgb, + int width) = ARGBToARGB4444Row_C; if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) { return -1; } @@ -1033,8 +1211,7 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb4444 == width * 2) { + if (src_stride_argb == width * 4 && dst_stride_argb4444 == width * 2) { width *= height; height = 1; src_stride_argb = dst_stride_argb4444 = 0; @@ -1063,6 +1240,14 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOARGB4444ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToARGB4444Row = ARGBToARGB4444Row_Any_MSA; + if (IS_ALIGNED(width, 8)) { + ARGBToARGB4444Row = ARGBToARGB4444Row_MSA; + } + } +#endif for (y = 0; y < height; ++y) { ARGBToARGB4444Row(src_argb, dst_argb4444, width); @@ -1072,21 +1257,123 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, return 0; } +// Convert ABGR To AR30. +LIBYUV_API +int ABGRToAR30(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height) { + int y; + void (*ABGRToAR30Row)(const uint8_t* src_abgr, uint8_t* dst_rgb, int width) = + ABGRToAR30Row_C; + if (!src_abgr || !dst_ar30 || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; + } + // Coalesce rows. + if (src_stride_abgr == width * 4 && dst_stride_ar30 == width * 4) { + width *= height; + height = 1; + src_stride_abgr = dst_stride_ar30 = 0; + } +#if defined(HAS_ABGRTOAR30ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToAR30Row = ABGRToAR30Row_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ABGRToAR30Row = ABGRToAR30Row_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOAR30ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToAR30Row = ABGRToAR30Row_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ABGRToAR30Row = ABGRToAR30Row_AVX2; + } + } +#endif + for (y = 0; y < height; ++y) { + ABGRToAR30Row(src_abgr, dst_ar30, width); + src_abgr += src_stride_abgr; + dst_ar30 += dst_stride_ar30; + } + return 0; +} + +// Convert ARGB To AR30. +LIBYUV_API +int ARGBToAR30(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height) { + int y; + void (*ARGBToAR30Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = + ARGBToAR30Row_C; + if (!src_argb || !dst_ar30 || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_ar30 == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_ar30 = 0; + } +#if defined(HAS_ARGBTOAR30ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToAR30Row = ARGBToAR30Row_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBToAR30Row = ARGBToAR30Row_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOAR30ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToAR30Row = ARGBToAR30Row_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBToAR30Row = ARGBToAR30Row_AVX2; + } + } +#endif + for (y = 0; y < height; ++y) { + ARGBToAR30Row(src_argb, dst_ar30, width); + src_argb += src_stride_argb; + dst_ar30 += dst_stride_ar30; + } + return 0; +} + // Convert ARGB to J420. (JPeg full range I420). LIBYUV_API -int ARGBToJ420(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int ARGBToJ420(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; - void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C; - void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) = + void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVJRow_C; + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; - if (!src_argb || - !dst_yj || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1129,6 +1416,22 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOYJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYJRow = ARGBToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_MSA; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToUVJRow = ARGBToUVJRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_MSA; + } + } +#endif for (y = 0; y < height - 1; y += 2) { ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width); @@ -1148,19 +1451,23 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb, // Convert ARGB to J422. (JPeg full range I422). LIBYUV_API -int ARGBToJ422(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int ARGBToJ422(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; - void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C; - void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) = + void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVJRow_C; + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; - if (!src_argb || - !dst_yj || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1170,10 +1477,8 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_yj == width && - dst_stride_u * 2 == width && - dst_stride_v * 2 == width) { + if (src_stride_argb == width * 4 && dst_stride_yj == width && + dst_stride_u * 2 == width && dst_stride_v * 2 == width) { width *= height; height = 1; src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0; @@ -1212,6 +1517,22 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOYJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYJRow = ARGBToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_MSA; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToUVJRow = ARGBToUVJRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width); @@ -1226,11 +1547,14 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb, // Convert ARGB to J400. LIBYUV_API -int ARGBToJ400(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - int width, int height) { +int ARGBToJ400(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yj, + int dst_stride_yj, + int width, + int height) { int y; - void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) = + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; if (!src_argb || !dst_yj || width <= 0 || height == 0) { return -1; @@ -1241,8 +1565,7 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_yj == width) { + if (src_stride_argb == width * 4 && dst_stride_yj == width) { width *= height; height = 1; src_stride_argb = dst_stride_yj = 0; @@ -1271,6 +1594,14 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOYJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYJRow = ARGBToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { ARGBToYJRow(src_argb, dst_yj, width); diff --git a/media/libvpx/libvpx/third_party/libyuv/source/convert_jpeg.cc b/media/libvpx/libvpx/third_party/libyuv/source/convert_jpeg.cc index 90f550a26adc..ae3cc18cd247 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/convert_jpeg.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/convert_jpeg.cc @@ -22,28 +22,24 @@ extern "C" { #ifdef HAVE_JPEG struct I420Buffers { - uint8* y; + uint8_t* y; int y_stride; - uint8* u; + uint8_t* u; int u_stride; - uint8* v; + uint8_t* v; int v_stride; int w; int h; }; static void JpegCopyI420(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { I420Buffers* dest = (I420Buffers*)(opaque); - I420Copy(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->y, dest->y_stride, - dest->u, dest->u_stride, - dest->v, dest->v_stride, - dest->w, rows); + I420Copy(data[0], strides[0], data[1], strides[1], data[2], strides[2], + dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v, + dest->v_stride, dest->w, rows); dest->y += rows * dest->y_stride; dest->u += ((rows + 1) >> 1) * dest->u_stride; dest->v += ((rows + 1) >> 1) * dest->v_stride; @@ -51,17 +47,13 @@ static void JpegCopyI420(void* opaque, } static void JpegI422ToI420(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { I420Buffers* dest = (I420Buffers*)(opaque); - I422ToI420(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->y, dest->y_stride, - dest->u, dest->u_stride, - dest->v, dest->v_stride, - dest->w, rows); + I422ToI420(data[0], strides[0], data[1], strides[1], data[2], strides[2], + dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v, + dest->v_stride, dest->w, rows); dest->y += rows * dest->y_stride; dest->u += ((rows + 1) >> 1) * dest->u_stride; dest->v += ((rows + 1) >> 1) * dest->v_stride; @@ -69,35 +61,13 @@ static void JpegI422ToI420(void* opaque, } static void JpegI444ToI420(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { I420Buffers* dest = (I420Buffers*)(opaque); - I444ToI420(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->y, dest->y_stride, - dest->u, dest->u_stride, - dest->v, dest->v_stride, - dest->w, rows); - dest->y += rows * dest->y_stride; - dest->u += ((rows + 1) >> 1) * dest->u_stride; - dest->v += ((rows + 1) >> 1) * dest->v_stride; - dest->h -= rows; -} - -static void JpegI411ToI420(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { - I420Buffers* dest = (I420Buffers*)(opaque); - I411ToI420(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->y, dest->y_stride, - dest->u, dest->u_stride, - dest->v, dest->v_stride, - dest->w, rows); + I444ToI420(data[0], strides[0], data[1], strides[1], data[2], strides[2], + dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v, + dest->v_stride, dest->w, rows); dest->y += rows * dest->y_stride; dest->u += ((rows + 1) >> 1) * dest->u_stride; dest->v += ((rows + 1) >> 1) * dest->v_stride; @@ -105,15 +75,12 @@ static void JpegI411ToI420(void* opaque, } static void JpegI400ToI420(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { I420Buffers* dest = (I420Buffers*)(opaque); - I400ToI420(data[0], strides[0], - dest->y, dest->y_stride, - dest->u, dest->u_stride, - dest->v, dest->v_stride, - dest->w, rows); + I400ToI420(data[0], strides[0], dest->y, dest->y_stride, dest->u, + dest->u_stride, dest->v, dest->v_stride, dest->w, rows); dest->y += rows * dest->y_stride; dest->u += ((rows + 1) >> 1) * dest->u_stride; dest->v += ((rows + 1) >> 1) * dest->v_stride; @@ -122,8 +89,10 @@ static void JpegI400ToI420(void* opaque, // Query size of MJPG in pixels. LIBYUV_API -int MJPGSize(const uint8* sample, size_t sample_size, - int* width, int* height) { +int MJPGSize(const uint8_t* sample, + size_t sample_size, + int* width, + int* height) { MJpegDecoder mjpeg_decoder; LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); if (ret) { @@ -135,15 +104,21 @@ int MJPGSize(const uint8* sample, size_t sample_size, } // MJPG (Motion JPeg) to I420 -// TODO(fbarchard): review w and h requirement. dw and dh may be enough. +// TODO(fbarchard): review src_width and src_height requirement. dst_width and +// dst_height may be enough. LIBYUV_API -int MJPGToI420(const uint8* sample, +int MJPGToI420(const uint8_t* sample, size_t sample_size, - uint8* y, int y_stride, - uint8* u, int u_stride, - uint8* v, int v_stride, - int w, int h, - int dw, int dh) { + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int src_width, + int src_height, + int dst_width, + int dst_height) { if (sample_size == kUnknownDataSize) { // ERROR: MJPEG frame size unknown return -1; @@ -152,17 +127,17 @@ int MJPGToI420(const uint8* sample, // TODO(fbarchard): Port MJpeg to C. MJpegDecoder mjpeg_decoder; LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); - if (ret && (mjpeg_decoder.GetWidth() != w || - mjpeg_decoder.GetHeight() != h)) { + if (ret && (mjpeg_decoder.GetWidth() != src_width || + mjpeg_decoder.GetHeight() != src_height)) { // ERROR: MJPEG frame has unexpected dimensions mjpeg_decoder.UnloadFrame(); return 1; // runtime failure } if (ret) { - I420Buffers bufs = { y, y_stride, u, u_stride, v, v_stride, dw, dh }; + I420Buffers bufs = {dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, dst_width, dst_height}; // YUV420 - if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceYCbCr && + if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && mjpeg_decoder.GetNumComponents() == 3 && mjpeg_decoder.GetVertSampFactor(0) == 2 && mjpeg_decoder.GetHorizSampFactor(0) == 2 && @@ -170,8 +145,9 @@ int MJPGToI420(const uint8* sample, mjpeg_decoder.GetHorizSampFactor(1) == 1 && mjpeg_decoder.GetVertSampFactor(2) == 1 && mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dw, dh); - // YUV422 + ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dst_width, + dst_height); + // YUV422 } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && mjpeg_decoder.GetNumComponents() == 3 && @@ -181,8 +157,9 @@ int MJPGToI420(const uint8* sample, mjpeg_decoder.GetHorizSampFactor(1) == 1 && mjpeg_decoder.GetVertSampFactor(2) == 1 && mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dw, dh); - // YUV444 + ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dst_width, + dst_height); + // YUV444 } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && mjpeg_decoder.GetNumComponents() == 3 && @@ -192,28 +169,19 @@ int MJPGToI420(const uint8* sample, mjpeg_decoder.GetHorizSampFactor(1) == 1 && mjpeg_decoder.GetVertSampFactor(2) == 1 && mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh); - // YUV411 - } else if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 4 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh); - // YUV400 + ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dst_width, + dst_height); + // YUV400 } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceGrayscale && mjpeg_decoder.GetNumComponents() == 1 && mjpeg_decoder.GetVertSampFactor(0) == 1 && mjpeg_decoder.GetHorizSampFactor(0) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh); + ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dst_width, + dst_height); } else { // TODO(fbarchard): Implement conversion for any other colorspace/sample - // factors that occur in practice. 411 is supported by libjpeg + // factors that occur in practice. // ERROR: Unable to convert MJPEG frame because format is not supported mjpeg_decoder.UnloadFrame(); return 1; @@ -224,88 +192,67 @@ int MJPGToI420(const uint8* sample, #ifdef HAVE_JPEG struct ARGBBuffers { - uint8* argb; + uint8_t* argb; int argb_stride; int w; int h; }; static void JpegI420ToARGB(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { + const uint8_t* const* data, + const int* strides, + int rows) { ARGBBuffers* dest = (ARGBBuffers*)(opaque); - I420ToARGB(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->argb, dest->argb_stride, - dest->w, rows); + I420ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2], + dest->argb, dest->argb_stride, dest->w, rows); dest->argb += rows * dest->argb_stride; dest->h -= rows; } static void JpegI422ToARGB(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { ARGBBuffers* dest = (ARGBBuffers*)(opaque); - I422ToARGB(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->argb, dest->argb_stride, - dest->w, rows); + I422ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2], + dest->argb, dest->argb_stride, dest->w, rows); dest->argb += rows * dest->argb_stride; dest->h -= rows; } static void JpegI444ToARGB(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { ARGBBuffers* dest = (ARGBBuffers*)(opaque); - I444ToARGB(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->argb, dest->argb_stride, - dest->w, rows); - dest->argb += rows * dest->argb_stride; - dest->h -= rows; -} - -static void JpegI411ToARGB(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { - ARGBBuffers* dest = (ARGBBuffers*)(opaque); - I411ToARGB(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->argb, dest->argb_stride, - dest->w, rows); + I444ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2], + dest->argb, dest->argb_stride, dest->w, rows); dest->argb += rows * dest->argb_stride; dest->h -= rows; } static void JpegI400ToARGB(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows) { ARGBBuffers* dest = (ARGBBuffers*)(opaque); - I400ToARGB(data[0], strides[0], - dest->argb, dest->argb_stride, - dest->w, rows); + I400ToARGB(data[0], strides[0], dest->argb, dest->argb_stride, dest->w, rows); dest->argb += rows * dest->argb_stride; dest->h -= rows; } // MJPG (Motion JPeg) to ARGB -// TODO(fbarchard): review w and h requirement. dw and dh may be enough. +// TODO(fbarchard): review src_width and src_height requirement. dst_width and +// dst_height may be enough. LIBYUV_API -int MJPGToARGB(const uint8* sample, +int MJPGToARGB(const uint8_t* sample, size_t sample_size, - uint8* argb, int argb_stride, - int w, int h, - int dw, int dh) { + uint8_t* dst_argb, + int dst_stride_argb, + int src_width, + int src_height, + int dst_width, + int dst_height) { if (sample_size == kUnknownDataSize) { // ERROR: MJPEG frame size unknown return -1; @@ -314,17 +261,16 @@ int MJPGToARGB(const uint8* sample, // TODO(fbarchard): Port MJpeg to C. MJpegDecoder mjpeg_decoder; LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); - if (ret && (mjpeg_decoder.GetWidth() != w || - mjpeg_decoder.GetHeight() != h)) { + if (ret && (mjpeg_decoder.GetWidth() != src_width || + mjpeg_decoder.GetHeight() != src_height)) { // ERROR: MJPEG frame has unexpected dimensions mjpeg_decoder.UnloadFrame(); return 1; // runtime failure } if (ret) { - ARGBBuffers bufs = { argb, argb_stride, dw, dh }; + ARGBBuffers bufs = {dst_argb, dst_stride_argb, dst_width, dst_height}; // YUV420 - if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceYCbCr && + if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && mjpeg_decoder.GetNumComponents() == 3 && mjpeg_decoder.GetVertSampFactor(0) == 2 && mjpeg_decoder.GetHorizSampFactor(0) == 2 && @@ -332,8 +278,9 @@ int MJPGToARGB(const uint8* sample, mjpeg_decoder.GetHorizSampFactor(1) == 1 && mjpeg_decoder.GetVertSampFactor(2) == 1 && mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dw, dh); - // YUV422 + ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dst_width, + dst_height); + // YUV422 } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && mjpeg_decoder.GetNumComponents() == 3 && @@ -343,8 +290,9 @@ int MJPGToARGB(const uint8* sample, mjpeg_decoder.GetHorizSampFactor(1) == 1 && mjpeg_decoder.GetVertSampFactor(2) == 1 && mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dw, dh); - // YUV444 + ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dst_width, + dst_height); + // YUV444 } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && mjpeg_decoder.GetNumComponents() == 3 && @@ -354,28 +302,19 @@ int MJPGToARGB(const uint8* sample, mjpeg_decoder.GetHorizSampFactor(1) == 1 && mjpeg_decoder.GetVertSampFactor(2) == 1 && mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh); - // YUV411 - } else if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 4 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToARGB, &bufs, dw, dh); - // YUV400 + ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dst_width, + dst_height); + // YUV400 } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceGrayscale && mjpeg_decoder.GetNumComponents() == 1 && mjpeg_decoder.GetVertSampFactor(0) == 1 && mjpeg_decoder.GetHorizSampFactor(0) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dw, dh); + ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dst_width, + dst_height); } else { // TODO(fbarchard): Implement conversion for any other colorspace/sample - // factors that occur in practice. 411 is supported by libjpeg + // factors that occur in practice. // ERROR: Unable to convert MJPEG frame because format is not supported mjpeg_decoder.UnloadFrame(); return 1; diff --git a/media/libvpx/libvpx/third_party/libyuv/source/convert_to_argb.cc b/media/libvpx/libvpx/third_party/libyuv/source/convert_to_argb.cc index aecdc80fde29..67484522c0f7 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/convert_to_argb.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/convert_to_argb.cc @@ -28,36 +28,50 @@ extern "C" { // src_height is used to compute location of planes, and indicate inversion // sample_size is measured in bytes and is the size of the frame. // With MJPEG it is the compressed size of the frame. + +// TODO(fbarchard): Add the following: +// H010ToARGB +// H420ToARGB +// H422ToARGB +// I010ToARGB +// J400ToARGB +// J422ToARGB +// J444ToARGB + LIBYUV_API -int ConvertToARGB(const uint8* sample, size_t sample_size, - uint8* crop_argb, int argb_stride, - int crop_x, int crop_y, - int src_width, int src_height, - int crop_width, int crop_height, +int ConvertToARGB(const uint8_t* sample, + size_t sample_size, + uint8_t* dst_argb, + int dst_stride_argb, + int crop_x, + int crop_y, + int src_width, + int src_height, + int crop_width, + int crop_height, enum RotationMode rotation, - uint32 fourcc) { - uint32 format = CanonicalFourCC(fourcc); + uint32_t fourcc) { + uint32_t format = CanonicalFourCC(fourcc); int aligned_src_width = (src_width + 1) & ~1; - const uint8* src; - const uint8* src_uv; + const uint8_t* src; + const uint8_t* src_uv; int abs_src_height = (src_height < 0) ? -src_height : src_height; int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height; int r = 0; // One pass rotation is available for some formats. For the rest, convert - // to I420 (with optional vertical flipping) into a temporary I420 buffer, - // and then rotate the I420 to the final destination buffer. - // For in-place conversion, if destination crop_argb is same as source sample, + // to ARGB (with optional vertical flipping) into a temporary ARGB buffer, + // and then rotate the ARGB to the final destination buffer. + // For in-place conversion, if destination dst_argb is same as source sample, // also enable temporary buffer. - LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) || - crop_argb == sample; - uint8* dest_argb = crop_argb; - int dest_argb_stride = argb_stride; - uint8* rotate_buffer = NULL; + LIBYUV_BOOL need_buf = + (rotation && format != FOURCC_ARGB) || dst_argb == sample; + uint8_t* dest_argb = dst_argb; + int dest_dst_stride_argb = dst_stride_argb; + uint8_t* rotate_buffer = NULL; int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; - if (crop_argb == NULL || sample == NULL || - src_width <= 0 || crop_width <= 0 || + if (dst_argb == NULL || sample == NULL || src_width <= 0 || crop_width <= 0 || src_height == 0 || crop_height == 0) { return -1; } @@ -67,187 +81,174 @@ int ConvertToARGB(const uint8* sample, size_t sample_size, if (need_buf) { int argb_size = crop_width * 4 * abs_crop_height; - rotate_buffer = (uint8*)malloc(argb_size); + rotate_buffer = (uint8_t*)malloc(argb_size); /* NOLINT */ if (!rotate_buffer) { return 1; // Out of memory runtime error. } - crop_argb = rotate_buffer; - argb_stride = crop_width * 4; + dst_argb = rotate_buffer; + dst_stride_argb = crop_width * 4; } switch (format) { // Single plane formats case FOURCC_YUY2: src = sample + (aligned_src_width * crop_y + crop_x) * 2; - r = YUY2ToARGB(src, aligned_src_width * 2, - crop_argb, argb_stride, + r = YUY2ToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_UYVY: src = sample + (aligned_src_width * crop_y + crop_x) * 2; - r = UYVYToARGB(src, aligned_src_width * 2, - crop_argb, argb_stride, + r = UYVYToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_24BG: src = sample + (src_width * crop_y + crop_x) * 3; - r = RGB24ToARGB(src, src_width * 3, - crop_argb, argb_stride, - crop_width, inv_crop_height); + r = RGB24ToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width, + inv_crop_height); break; case FOURCC_RAW: src = sample + (src_width * crop_y + crop_x) * 3; - r = RAWToARGB(src, src_width * 3, - crop_argb, argb_stride, - crop_width, inv_crop_height); + r = RAWToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width, + inv_crop_height); break; case FOURCC_ARGB: - src = sample + (src_width * crop_y + crop_x) * 4; - r = ARGBToARGB(src, src_width * 4, - crop_argb, argb_stride, - crop_width, inv_crop_height); + if (!need_buf && !rotation) { + src = sample + (src_width * crop_y + crop_x) * 4; + r = ARGBToARGB(src, src_width * 4, dst_argb, dst_stride_argb, + crop_width, inv_crop_height); + } break; case FOURCC_BGRA: src = sample + (src_width * crop_y + crop_x) * 4; - r = BGRAToARGB(src, src_width * 4, - crop_argb, argb_stride, - crop_width, inv_crop_height); + r = BGRAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, + inv_crop_height); break; case FOURCC_ABGR: src = sample + (src_width * crop_y + crop_x) * 4; - r = ABGRToARGB(src, src_width * 4, - crop_argb, argb_stride, - crop_width, inv_crop_height); + r = ABGRToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, + inv_crop_height); break; case FOURCC_RGBA: src = sample + (src_width * crop_y + crop_x) * 4; - r = RGBAToARGB(src, src_width * 4, - crop_argb, argb_stride, - crop_width, inv_crop_height); + r = RGBAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, + inv_crop_height); + break; + case FOURCC_AR30: + src = sample + (src_width * crop_y + crop_x) * 4; + r = AR30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, + inv_crop_height); + break; + case FOURCC_AB30: + src = sample + (src_width * crop_y + crop_x) * 4; + r = AB30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, + inv_crop_height); break; case FOURCC_RGBP: src = sample + (src_width * crop_y + crop_x) * 2; - r = RGB565ToARGB(src, src_width * 2, - crop_argb, argb_stride, + r = RGB565ToARGB(src, src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_RGBO: src = sample + (src_width * crop_y + crop_x) * 2; - r = ARGB1555ToARGB(src, src_width * 2, - crop_argb, argb_stride, + r = ARGB1555ToARGB(src, src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_R444: src = sample + (src_width * crop_y + crop_x) * 2; - r = ARGB4444ToARGB(src, src_width * 2, - crop_argb, argb_stride, + r = ARGB4444ToARGB(src, src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_I400: src = sample + src_width * crop_y + crop_x; - r = I400ToARGB(src, src_width, - crop_argb, argb_stride, - crop_width, inv_crop_height); + r = I400ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width, + inv_crop_height); break; // Biplanar formats case FOURCC_NV12: src = sample + (src_width * crop_y + crop_x); - src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x; - r = NV12ToARGB(src, src_width, - src_uv, aligned_src_width, - crop_argb, argb_stride, - crop_width, inv_crop_height); + src_uv = sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x; + r = NV12ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb, + dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_NV21: src = sample + (src_width * crop_y + crop_x); - src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x; + src_uv = sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x; // Call NV12 but with u and v parameters swapped. - r = NV21ToARGB(src, src_width, - src_uv, aligned_src_width, - crop_argb, argb_stride, - crop_width, inv_crop_height); + r = NV21ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb, + dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_M420: src = sample + (src_width * crop_y) * 12 / 8 + crop_x; - r = M420ToARGB(src, src_width, - crop_argb, argb_stride, - crop_width, inv_crop_height); + r = M420ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width, + inv_crop_height); break; + // Triplanar formats case FOURCC_I420: case FOURCC_YV12: { - const uint8* src_y = sample + (src_width * crop_y + crop_x); - const uint8* src_u; - const uint8* src_v; + const uint8_t* src_y = sample + (src_width * crop_y + crop_x); + const uint8_t* src_u; + const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; int halfheight = (abs_src_height + 1) / 2; if (format == FOURCC_YV12) { src_v = sample + src_width * abs_src_height + - (halfwidth * crop_y + crop_x) / 2; + (halfwidth * crop_y + crop_x) / 2; src_u = sample + src_width * abs_src_height + - halfwidth * (halfheight + crop_y / 2) + crop_x / 2; + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; } else { src_u = sample + src_width * abs_src_height + - (halfwidth * crop_y + crop_x) / 2; + (halfwidth * crop_y + crop_x) / 2; src_v = sample + src_width * abs_src_height + - halfwidth * (halfheight + crop_y / 2) + crop_x / 2; + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; } - r = I420ToARGB(src_y, src_width, - src_u, halfwidth, - src_v, halfwidth, - crop_argb, argb_stride, - crop_width, inv_crop_height); + r = I420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, + dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; } case FOURCC_J420: { - const uint8* src_y = sample + (src_width * crop_y + crop_x); - const uint8* src_u; - const uint8* src_v; + const uint8_t* src_y = sample + (src_width * crop_y + crop_x); + const uint8_t* src_u; + const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; int halfheight = (abs_src_height + 1) / 2; src_u = sample + src_width * abs_src_height + - (halfwidth * crop_y + crop_x) / 2; + (halfwidth * crop_y + crop_x) / 2; src_v = sample + src_width * abs_src_height + - halfwidth * (halfheight + crop_y / 2) + crop_x / 2; - r = J420ToARGB(src_y, src_width, - src_u, halfwidth, - src_v, halfwidth, - crop_argb, argb_stride, - crop_width, inv_crop_height); + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; + r = J420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, + dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; } case FOURCC_I422: case FOURCC_YV16: { - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u; - const uint8* src_v; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; + const uint8_t* src_u; + const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; if (format == FOURCC_YV16) { - src_v = sample + src_width * abs_src_height + - halfwidth * crop_y + crop_x / 2; + src_v = sample + src_width * abs_src_height + halfwidth * crop_y + + crop_x / 2; src_u = sample + src_width * abs_src_height + - halfwidth * (abs_src_height + crop_y) + crop_x / 2; + halfwidth * (abs_src_height + crop_y) + crop_x / 2; } else { - src_u = sample + src_width * abs_src_height + - halfwidth * crop_y + crop_x / 2; + src_u = sample + src_width * abs_src_height + halfwidth * crop_y + + crop_x / 2; src_v = sample + src_width * abs_src_height + - halfwidth * (abs_src_height + crop_y) + crop_x / 2; + halfwidth * (abs_src_height + crop_y) + crop_x / 2; } - r = I422ToARGB(src_y, src_width, - src_u, halfwidth, - src_v, halfwidth, - crop_argb, argb_stride, - crop_width, inv_crop_height); + r = I422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, + dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; } case FOURCC_I444: case FOURCC_YV24: { - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u; - const uint8* src_v; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; + const uint8_t* src_u; + const uint8_t* src_v; if (format == FOURCC_YV24) { src_v = sample + src_width * (abs_src_height + crop_y) + crop_x; src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; @@ -255,32 +256,14 @@ int ConvertToARGB(const uint8* sample, size_t sample_size, src_u = sample + src_width * (abs_src_height + crop_y) + crop_x; src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; } - r = I444ToARGB(src_y, src_width, - src_u, src_width, - src_v, src_width, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - } - case FOURCC_I411: { - int quarterwidth = (src_width + 3) / 4; - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u = sample + src_width * abs_src_height + - quarterwidth * crop_y + crop_x / 4; - const uint8* src_v = sample + src_width * abs_src_height + - quarterwidth * (abs_src_height + crop_y) + crop_x / 4; - r = I411ToARGB(src_y, src_width, - src_u, quarterwidth, - src_v, quarterwidth, - crop_argb, argb_stride, - crop_width, inv_crop_height); + r = I444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width, + dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; } #ifdef HAVE_JPEG case FOURCC_MJPG: - r = MJPGToARGB(sample, sample_size, - crop_argb, argb_stride, - src_width, abs_src_height, crop_width, inv_crop_height); + r = MJPGToARGB(sample, sample_size, dst_argb, dst_stride_argb, src_width, + abs_src_height, crop_width, inv_crop_height); break; #endif default: @@ -289,11 +272,14 @@ int ConvertToARGB(const uint8* sample, size_t sample_size, if (need_buf) { if (!r) { - r = ARGBRotate(crop_argb, argb_stride, - dest_argb, dest_argb_stride, + r = ARGBRotate(dst_argb, dst_stride_argb, dest_argb, dest_dst_stride_argb, crop_width, abs_crop_height, rotation); } free(rotate_buffer); + } else if (rotation) { + src = sample + (src_width * crop_y + crop_x) * 4; + r = ARGBRotate(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, + inv_crop_height, rotation); } return r; diff --git a/media/libvpx/libvpx/third_party/libyuv/source/convert_to_i420.cc b/media/libvpx/libvpx/third_party/libyuv/source/convert_to_i420.cc index e5f307c44643..df08309f9baa 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/convert_to_i420.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/convert_to_i420.cc @@ -25,251 +25,216 @@ extern "C" { // sample_size is measured in bytes and is the size of the frame. // With MJPEG it is the compressed size of the frame. LIBYUV_API -int ConvertToI420(const uint8* sample, +int ConvertToI420(const uint8_t* sample, size_t sample_size, - uint8* y, int y_stride, - uint8* u, int u_stride, - uint8* v, int v_stride, - int crop_x, int crop_y, - int src_width, int src_height, - int crop_width, int crop_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int crop_x, + int crop_y, + int src_width, + int src_height, + int crop_width, + int crop_height, enum RotationMode rotation, - uint32 fourcc) { - uint32 format = CanonicalFourCC(fourcc); + uint32_t fourcc) { + uint32_t format = CanonicalFourCC(fourcc); int aligned_src_width = (src_width + 1) & ~1; - const uint8* src; - const uint8* src_uv; + const uint8_t* src; + const uint8_t* src_uv; const int abs_src_height = (src_height < 0) ? -src_height : src_height; // TODO(nisse): Why allow crop_height < 0? const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; int r = 0; - LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 && - format != FOURCC_NV12 && format != FOURCC_NV21 && - format != FOURCC_YV12) || y == sample; - uint8* tmp_y = y; - uint8* tmp_u = u; - uint8* tmp_v = v; - int tmp_y_stride = y_stride; - int tmp_u_stride = u_stride; - int tmp_v_stride = v_stride; - uint8* rotate_buffer = NULL; + LIBYUV_BOOL need_buf = + (rotation && format != FOURCC_I420 && format != FOURCC_NV12 && + format != FOURCC_NV21 && format != FOURCC_YV12) || + dst_y == sample; + uint8_t* tmp_y = dst_y; + uint8_t* tmp_u = dst_u; + uint8_t* tmp_v = dst_v; + int tmp_y_stride = dst_stride_y; + int tmp_u_stride = dst_stride_u; + int tmp_v_stride = dst_stride_v; + uint8_t* rotate_buffer = NULL; const int inv_crop_height = (src_height < 0) ? -abs_crop_height : abs_crop_height; - if (!y || !u || !v || !sample || - src_width <= 0 || crop_width <= 0 || - src_height == 0 || crop_height == 0) { + if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 || + crop_width <= 0 || src_height == 0 || crop_height == 0) { return -1; } // One pass rotation is available for some formats. For the rest, convert // to I420 (with optional vertical flipping) into a temporary I420 buffer, // and then rotate the I420 to the final destination buffer. - // For in-place conversion, if destination y is same as source sample, + // For in-place conversion, if destination dst_y is same as source sample, // also enable temporary buffer. if (need_buf) { int y_size = crop_width * abs_crop_height; int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2); - rotate_buffer = (uint8*)malloc(y_size + uv_size * 2); + rotate_buffer = (uint8_t*)malloc(y_size + uv_size * 2); /* NOLINT */ if (!rotate_buffer) { return 1; // Out of memory runtime error. } - y = rotate_buffer; - u = y + y_size; - v = u + uv_size; - y_stride = crop_width; - u_stride = v_stride = ((crop_width + 1) / 2); + dst_y = rotate_buffer; + dst_u = dst_y + y_size; + dst_v = dst_u + uv_size; + dst_stride_y = crop_width; + dst_stride_u = dst_stride_v = ((crop_width + 1) / 2); } switch (format) { // Single plane formats case FOURCC_YUY2: src = sample + (aligned_src_width * crop_y + crop_x) * 2; - r = YUY2ToI420(src, aligned_src_width * 2, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = YUY2ToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_UYVY: src = sample + (aligned_src_width * crop_y + crop_x) * 2; - r = UYVYToI420(src, aligned_src_width * 2, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = UYVYToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_RGBP: src = sample + (src_width * crop_y + crop_x) * 2; - r = RGB565ToI420(src, src_width * 2, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = RGB565ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_RGBO: src = sample + (src_width * crop_y + crop_x) * 2; - r = ARGB1555ToI420(src, src_width * 2, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = ARGB1555ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_R444: src = sample + (src_width * crop_y + crop_x) * 2; - r = ARGB4444ToI420(src, src_width * 2, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = ARGB4444ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_24BG: src = sample + (src_width * crop_y + crop_x) * 3; - r = RGB24ToI420(src, src_width * 3, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = RGB24ToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_RAW: src = sample + (src_width * crop_y + crop_x) * 3; - r = RAWToI420(src, src_width * 3, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = RAWToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_ARGB: src = sample + (src_width * crop_y + crop_x) * 4; - r = ARGBToI420(src, src_width * 4, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = ARGBToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_BGRA: src = sample + (src_width * crop_y + crop_x) * 4; - r = BGRAToI420(src, src_width * 4, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = BGRAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_ABGR: src = sample + (src_width * crop_y + crop_x) * 4; - r = ABGRToI420(src, src_width * 4, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = ABGRToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; case FOURCC_RGBA: src = sample + (src_width * crop_y + crop_x) * 4; - r = RGBAToI420(src, src_width * 4, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = RGBAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, crop_width, + inv_crop_height); break; + // TODO(fbarchard): Add AR30 and AB30 case FOURCC_I400: src = sample + src_width * crop_y + crop_x; - r = I400ToI420(src, src_width, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = I400ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, crop_width, inv_crop_height); break; // Biplanar formats case FOURCC_NV12: src = sample + (src_width * crop_y + crop_x); - src_uv = sample + (src_width * src_height) + - ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2); - r = NV12ToI420Rotate(src, src_width, - src_uv, aligned_src_width, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height, rotation); + src_uv = sample + (src_width * abs_src_height) + + ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2); + r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y, + dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, crop_width, inv_crop_height, rotation); break; case FOURCC_NV21: src = sample + (src_width * crop_y + crop_x); - src_uv = sample + (src_width * src_height) + - ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2); - // Call NV12 but with u and v parameters swapped. - r = NV12ToI420Rotate(src, src_width, - src_uv, aligned_src_width, - y, y_stride, - v, v_stride, - u, u_stride, - crop_width, inv_crop_height, rotation); + src_uv = sample + (src_width * abs_src_height) + + ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2); + // Call NV12 but with dst_u and dst_v parameters swapped. + r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y, + dst_stride_y, dst_v, dst_stride_v, dst_u, + dst_stride_u, crop_width, inv_crop_height, rotation); break; case FOURCC_M420: src = sample + (src_width * crop_y) * 12 / 8 + crop_x; - r = M420ToI420(src, src_width, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = M420ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, crop_width, inv_crop_height); break; // Triplanar formats case FOURCC_I420: case FOURCC_YV12: { - const uint8* src_y = sample + (src_width * crop_y + crop_x); - const uint8* src_u; - const uint8* src_v; + const uint8_t* src_y = sample + (src_width * crop_y + crop_x); + const uint8_t* src_u; + const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; int halfheight = (abs_src_height + 1) / 2; if (format == FOURCC_YV12) { src_v = sample + src_width * abs_src_height + - (halfwidth * crop_y + crop_x) / 2; + (halfwidth * crop_y + crop_x) / 2; src_u = sample + src_width * abs_src_height + - halfwidth * (halfheight + crop_y / 2) + crop_x / 2; + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; } else { src_u = sample + src_width * abs_src_height + - (halfwidth * crop_y + crop_x) / 2; + (halfwidth * crop_y + crop_x) / 2; src_v = sample + src_width * abs_src_height + - halfwidth * (halfheight + crop_y / 2) + crop_x / 2; + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; } - r = I420Rotate(src_y, src_width, - src_u, halfwidth, - src_v, halfwidth, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height, rotation); + r = I420Rotate(src_y, src_width, src_u, halfwidth, src_v, halfwidth, + dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, crop_width, inv_crop_height, rotation); break; } case FOURCC_I422: case FOURCC_YV16: { - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u; - const uint8* src_v; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; + const uint8_t* src_u; + const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; if (format == FOURCC_YV16) { - src_v = sample + src_width * abs_src_height + - halfwidth * crop_y + crop_x / 2; + src_v = sample + src_width * abs_src_height + halfwidth * crop_y + + crop_x / 2; src_u = sample + src_width * abs_src_height + - halfwidth * (abs_src_height + crop_y) + crop_x / 2; + halfwidth * (abs_src_height + crop_y) + crop_x / 2; } else { - src_u = sample + src_width * abs_src_height + - halfwidth * crop_y + crop_x / 2; + src_u = sample + src_width * abs_src_height + halfwidth * crop_y + + crop_x / 2; src_v = sample + src_width * abs_src_height + - halfwidth * (abs_src_height + crop_y) + crop_x / 2; + halfwidth * (abs_src_height + crop_y) + crop_x / 2; } - r = I422ToI420(src_y, src_width, - src_u, halfwidth, - src_v, halfwidth, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = I422ToI420(src_y, src_width, src_u, halfwidth, src_v, halfwidth, + dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, crop_width, inv_crop_height); break; } case FOURCC_I444: case FOURCC_YV24: { - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u; - const uint8* src_v; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; + const uint8_t* src_u; + const uint8_t* src_v; if (format == FOURCC_YV24) { src_v = sample + src_width * (abs_src_height + crop_y) + crop_x; src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; @@ -277,38 +242,16 @@ int ConvertToI420(const uint8* sample, src_u = sample + src_width * (abs_src_height + crop_y) + crop_x; src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; } - r = I444ToI420(src_y, src_width, - src_u, src_width, - src_v, src_width, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - } - case FOURCC_I411: { - int quarterwidth = (src_width + 3) / 4; - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u = sample + src_width * abs_src_height + - quarterwidth * crop_y + crop_x / 4; - const uint8* src_v = sample + src_width * abs_src_height + - quarterwidth * (abs_src_height + crop_y) + crop_x / 4; - r = I411ToI420(src_y, src_width, - src_u, quarterwidth, - src_v, quarterwidth, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); + r = I444ToI420(src_y, src_width, src_u, src_width, src_v, src_width, + dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, crop_width, inv_crop_height); break; } #ifdef HAVE_JPEG case FOURCC_MJPG: - r = MJPGToI420(sample, sample_size, - y, y_stride, - u, u_stride, - v, v_stride, - src_width, abs_src_height, crop_width, inv_crop_height); + r = MJPGToI420(sample, sample_size, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, src_width, + abs_src_height, crop_width, inv_crop_height); break; #endif default: @@ -317,13 +260,10 @@ int ConvertToI420(const uint8* sample, if (need_buf) { if (!r) { - r = I420Rotate(y, y_stride, - u, u_stride, - v, v_stride, - tmp_y, tmp_y_stride, - tmp_u, tmp_u_stride, - tmp_v, tmp_v_stride, - crop_width, abs_crop_height, rotation); + r = I420Rotate(dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, tmp_y, tmp_y_stride, tmp_u, tmp_u_stride, + tmp_v, tmp_v_stride, crop_width, abs_crop_height, + rotation); } free(rotate_buffer); } diff --git a/media/libvpx/libvpx/third_party/libyuv/source/cpu_id.cc b/media/libvpx/libvpx/third_party/libyuv/source/cpu_id.cc index 84927ebc3e2b..31e24b6739b6 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/cpu_id.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/cpu_id.cc @@ -13,22 +13,16 @@ #if defined(_MSC_VER) #include // For __cpuidex() #endif -#if !defined(__pnacl__) && !defined(__CLR_VER) && \ +#if !defined(__pnacl__) && !defined(__CLR_VER) && \ !defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \ defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) #include // For _xgetbv() #endif -#if !defined(__native_client__) -#include // For getenv() -#endif - // For ArmCpuCaps() but unittested on all platforms #include #include -#include "libyuv/basic_types.h" // For CPU_X86 - #ifdef __cplusplus namespace libyuv { extern "C" { @@ -43,16 +37,20 @@ extern "C" { #define SAFEBUFFERS #endif +// cpu_info_ variable for SIMD instruction sets detected. +LIBYUV_API int cpu_info_ = 0; + +// TODO(fbarchard): Consider using int for cpuid so casting is not needed. // Low level cpuid for X86. -#if (defined(_M_IX86) || defined(_M_X64) || \ - defined(__i386__) || defined(__x86_64__)) && \ +#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ + defined(__x86_64__)) && \ !defined(__pnacl__) && !defined(__CLR_VER) LIBYUV_API -void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { +void CpuId(int info_eax, int info_ecx, int* cpu_info) { #if defined(_MSC_VER) // Visual C version uses intrinsic or inline x86 assembly. #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) - __cpuidex((int*)(cpu_info), info_eax, info_ecx); + __cpuidex(cpu_info, info_eax, info_ecx); #elif defined(_M_IX86) __asm { mov eax, info_eax @@ -66,26 +64,26 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { } #else // Visual C but not x86 if (info_ecx == 0) { - __cpuid((int*)(cpu_info), info_eax); + __cpuid(cpu_info, info_eax); } else { - cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0; + cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0u; } #endif // GCC version uses inline x86 assembly. #else // defined(_MSC_VER) - uint32 info_ebx, info_edx; - asm volatile ( -#if defined( __i386__) && defined(__PIC__) - // Preserve ebx for fpic 32 bit. - "mov %%ebx, %%edi \n" - "cpuid \n" - "xchg %%edi, %%ebx \n" - : "=D" (info_ebx), + int info_ebx, info_edx; + asm volatile( +#if defined(__i386__) && defined(__PIC__) + // Preserve ebx for fpic 32 bit. + "mov %%ebx, %%edi \n" + "cpuid \n" + "xchg %%edi, %%ebx \n" + : "=D"(info_ebx), #else - "cpuid \n" - : "=b" (info_ebx), + "cpuid \n" + : "=b"(info_ebx), #endif // defined( __i386__) && defined(__PIC__) - "+a" (info_eax), "+c" (info_ecx), "=d" (info_edx)); + "+a"(info_eax), "+c"(info_ecx), "=d"(info_edx)); cpu_info[0] = info_eax; cpu_info[1] = info_ebx; cpu_info[2] = info_ecx; @@ -94,7 +92,9 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { } #else // (defined(_M_IX86) || defined(_M_X64) ... LIBYUV_API -void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) { +void CpuId(int eax, int ecx, int* cpu_info) { + (void)eax; + (void)ecx; cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0; } #endif @@ -111,20 +111,22 @@ void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) { #if defined(_M_IX86) && (_MSC_VER < 1900) #pragma optimize("g", off) #endif -#if (defined(_M_IX86) || defined(_M_X64) || \ - defined(__i386__) || defined(__x86_64__)) && \ +#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ + defined(__x86_64__)) && \ !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) -#define HAS_XGETBV // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. int GetXCR0() { - uint32 xcr0 = 0u; + int xcr0 = 0; #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) - xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required. + xcr0 = (int)_xgetbv(0); // VS2010 SP1 required. NOLINT #elif defined(__i386__) || defined(__x86_64__) - asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx"); + asm(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr0) : "c"(0) : "%edx"); #endif // defined(__i386__) || defined(__x86_64__) return xcr0; } +#else +// xgetbv unavailable to query for OSSave support. Return 0. +#define GetXCR0() 0 #endif // defined(_M_IX86) || defined(_M_X64) .. // Return optimization to previous setting. #if defined(_M_IX86) && (_MSC_VER < 1900) @@ -133,8 +135,7 @@ int GetXCR0() { // based on libvpx arm_cpudetect.c // For Arm, but public to allow testing on any CPU -LIBYUV_API SAFEBUFFERS -int ArmCpuCaps(const char* cpuinfo_name) { +LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) { char cpuinfo_line[512]; FILE* f = fopen(cpuinfo_name, "r"); if (!f) { @@ -151,7 +152,7 @@ int ArmCpuCaps(const char* cpuinfo_name) { } // aarch64 uses asimd for Neon. p = strstr(cpuinfo_line, " asimd"); - if (p && (p[6] == ' ' || p[6] == '\n')) { + if (p) { fclose(f); return kCpuHasNEON; } @@ -161,103 +162,78 @@ int ArmCpuCaps(const char* cpuinfo_name) { return 0; } -// CPU detect function for SIMD instruction sets. -LIBYUV_API -int cpu_info_ = 0; // cpu_info is not initialized yet. - -// Test environment variable for disabling CPU features. Any non-zero value -// to disable. Zero ignored to make it easy to set the variable on/off. -#if !defined(__native_client__) && !defined(_M_ARM) - -static LIBYUV_BOOL TestEnv(const char* name) { - const char* var = getenv(name); - if (var) { - if (var[0] != '0') { - return LIBYUV_TRUE; +// TODO(fbarchard): Consider read_msa_ir(). +// TODO(fbarchard): Add unittest. +LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name, + const char ase[]) { + char cpuinfo_line[512]; + FILE* f = fopen(cpuinfo_name, "r"); + if (!f) { + // ase enabled if /proc/cpuinfo is unavailable. + if (strcmp(ase, " msa") == 0) { + return kCpuHasMSA; + } + return 0; + } + while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) { + if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) { + char* p = strstr(cpuinfo_line, ase); + if (p) { + fclose(f); + if (strcmp(ase, " msa") == 0) { + return kCpuHasMSA; + } + return 0; + } } } - return LIBYUV_FALSE; + fclose(f); + return 0; } -#else // nacl does not support getenv(). -static LIBYUV_BOOL TestEnv(const char*) { - return LIBYUV_FALSE; -} -#endif -LIBYUV_API SAFEBUFFERS -int InitCpuFlags(void) { - // TODO(fbarchard): swap kCpuInit logic so 0 means uninitialized. +static SAFEBUFFERS int GetCpuFlags(void) { int cpu_info = 0; -#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86) - uint32 cpu_info0[4] = { 0, 0, 0, 0 }; - uint32 cpu_info1[4] = { 0, 0, 0, 0 }; - uint32 cpu_info7[4] = { 0, 0, 0, 0 }; +#if !defined(__pnacl__) && !defined(__CLR_VER) && \ + (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ + defined(_M_IX86)) + int cpu_info0[4] = {0, 0, 0, 0}; + int cpu_info1[4] = {0, 0, 0, 0}; + int cpu_info7[4] = {0, 0, 0, 0}; CpuId(0, 0, cpu_info0); CpuId(1, 0, cpu_info1); if (cpu_info0[0] >= 7) { CpuId(7, 0, cpu_info7); } - cpu_info = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | + cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | - ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) | - ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | - kCpuHasX86; + ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0); -#ifdef HAS_XGETBV - // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv + // AVX requires OS saves YMM registers. if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave ((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers - cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX; + cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | + ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | + ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0); // Detect AVX512bw if ((GetXCR0() & 0xe0) == 0xe0) { - cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0; + cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX512BW : 0; + cpu_info |= (cpu_info7[1] & 0x80000000) ? kCpuHasAVX512VL : 0; + cpu_info |= (cpu_info7[2] & 0x00000002) ? kCpuHasAVX512VBMI : 0; + cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0; + cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0; + cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0; + cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0; } } #endif - - // Environment variable overrides for testing. - if (TestEnv("LIBYUV_DISABLE_X86")) { - cpu_info &= ~kCpuHasX86; - } - if (TestEnv("LIBYUV_DISABLE_SSE2")) { - cpu_info &= ~kCpuHasSSE2; - } - if (TestEnv("LIBYUV_DISABLE_SSSE3")) { - cpu_info &= ~kCpuHasSSSE3; - } - if (TestEnv("LIBYUV_DISABLE_SSE41")) { - cpu_info &= ~kCpuHasSSE41; - } - if (TestEnv("LIBYUV_DISABLE_SSE42")) { - cpu_info &= ~kCpuHasSSE42; - } - if (TestEnv("LIBYUV_DISABLE_AVX")) { - cpu_info &= ~kCpuHasAVX; - } - if (TestEnv("LIBYUV_DISABLE_AVX2")) { - cpu_info &= ~kCpuHasAVX2; - } - if (TestEnv("LIBYUV_DISABLE_ERMS")) { - cpu_info &= ~kCpuHasERMS; - } - if (TestEnv("LIBYUV_DISABLE_FMA3")) { - cpu_info &= ~kCpuHasFMA3; - } - if (TestEnv("LIBYUV_DISABLE_AVX3")) { - cpu_info &= ~kCpuHasAVX3; - } -#endif #if defined(__mips__) && defined(__linux__) -#if defined(__mips_dspr2) - cpu_info |= kCpuHasDSPR2; +#if defined(__mips_msa) + cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa"); #endif cpu_info |= kCpuHasMIPS; - if (getenv("LIBYUV_DISABLE_DSPR2")) { - cpu_info &= ~kCpuHasDSPR2; - } #endif #if defined(__arm__) || defined(__aarch64__) // gcc -mfpu=neon defines __ARM_NEON__ @@ -276,22 +252,22 @@ int InitCpuFlags(void) { cpu_info = ArmCpuCaps("/proc/cpuinfo"); #endif cpu_info |= kCpuHasARM; - if (TestEnv("LIBYUV_DISABLE_NEON")) { - cpu_info &= ~kCpuHasNEON; - } #endif // __arm__ - if (TestEnv("LIBYUV_DISABLE_ASM")) { - cpu_info = 0; - } - cpu_info |= kCpuInitialized; - cpu_info_ = cpu_info; + cpu_info |= kCpuInitialized; return cpu_info; } // Note that use of this function is not thread safe. LIBYUV_API -void MaskCpuFlags(int enable_flags) { - cpu_info_ = InitCpuFlags() & enable_flags; +int MaskCpuFlags(int enable_flags) { + int cpu_info = GetCpuFlags() & enable_flags; + SetCpuFlags(cpu_info); + return cpu_info; +} + +LIBYUV_API +int InitCpuFlags(void) { + return MaskCpuFlags(-1); } #ifdef __cplusplus diff --git a/media/libvpx/libvpx/third_party/libyuv/source/mjpeg_decoder.cc b/media/libvpx/libvpx/third_party/libyuv/source/mjpeg_decoder.cc index 22025ad04ae7..eaf2530130b2 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/mjpeg_decoder.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/mjpeg_decoder.cc @@ -21,7 +21,7 @@ #if defined(_MSC_VER) // disable warning 4324: structure was padded due to __declspec(align()) -#pragma warning(disable:4324) +#pragma warning(disable : 4324) #endif #endif @@ -102,7 +102,7 @@ MJpegDecoder::~MJpegDecoder() { DestroyOutputBuffers(); } -LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) { +LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) { if (!ValidateJpeg(src, src_len)) { return LIBYUV_FALSE; } @@ -129,7 +129,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) { if (scanlines_[i]) { delete scanlines_[i]; } - scanlines_[i] = new uint8* [scanlines_size]; + scanlines_[i] = new uint8_t*[scanlines_size]; scanlines_sizes_[i] = scanlines_size; } @@ -145,7 +145,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) { if (databuf_[i]) { delete databuf_[i]; } - databuf_[i] = new uint8[databuf_size]; + databuf_[i] = new uint8_t[databuf_size]; databuf_strides_[i] = databuf_stride; } @@ -195,13 +195,11 @@ int MJpegDecoder::GetVertSampFactor(int component) { } int MJpegDecoder::GetHorizSubSampFactor(int component) { - return decompress_struct_->max_h_samp_factor / - GetHorizSampFactor(component); + return decompress_struct_->max_h_samp_factor / GetHorizSampFactor(component); } int MJpegDecoder::GetVertSubSampFactor(int component) { - return decompress_struct_->max_v_samp_factor / - GetVertSampFactor(component); + return decompress_struct_->max_v_samp_factor / GetVertSampFactor(component); } int MJpegDecoder::GetImageScanlinesPerImcuRow() { @@ -245,10 +243,10 @@ LIBYUV_BOOL MJpegDecoder::UnloadFrame() { } // TODO(fbarchard): Allow rectangle to be specified: x, y, width, height. -LIBYUV_BOOL MJpegDecoder::DecodeToBuffers( - uint8** planes, int dst_width, int dst_height) { - if (dst_width != GetWidth() || - dst_height > GetHeight()) { +LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(uint8_t** planes, + int dst_width, + int dst_height) { + if (dst_width != GetWidth() || dst_height > GetHeight()) { // ERROR: Bad dimensions return LIBYUV_FALSE; } @@ -289,14 +287,13 @@ LIBYUV_BOOL MJpegDecoder::DecodeToBuffers( for (int i = 0; i < num_outbufs_; ++i) { // TODO(fbarchard): Compute skip to avoid this assert(skip % GetVertSubSampFactor(i) == 0); - int rows_to_skip = - DivideAndRoundDown(skip, GetVertSubSampFactor(i)); - int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i) - - rows_to_skip; + int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i)); + int scanlines_to_copy = + GetComponentScanlinesPerImcuRow(i) - rows_to_skip; int data_to_skip = rows_to_skip * GetComponentStride(i); - CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i), - planes[i], GetComponentWidth(i), - GetComponentWidth(i), scanlines_to_copy); + CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i), planes[i], + GetComponentWidth(i), GetComponentWidth(i), + scanlines_to_copy); planes[i] += scanlines_to_copy * GetComponentWidth(i); } lines_left -= (GetImageScanlinesPerImcuRow() - skip); @@ -305,16 +302,15 @@ LIBYUV_BOOL MJpegDecoder::DecodeToBuffers( // Read full MCUs but cropped horizontally for (; lines_left > GetImageScanlinesPerImcuRow(); - lines_left -= GetImageScanlinesPerImcuRow()) { + lines_left -= GetImageScanlinesPerImcuRow()) { if (!DecodeImcuRow()) { FinishDecode(); return LIBYUV_FALSE; } for (int i = 0; i < num_outbufs_; ++i) { int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i); - CopyPlane(databuf_[i], GetComponentStride(i), - planes[i], GetComponentWidth(i), - GetComponentWidth(i), scanlines_to_copy); + CopyPlane(databuf_[i], GetComponentStride(i), planes[i], + GetComponentWidth(i), GetComponentWidth(i), scanlines_to_copy); planes[i] += scanlines_to_copy * GetComponentWidth(i); } } @@ -328,19 +324,19 @@ LIBYUV_BOOL MJpegDecoder::DecodeToBuffers( for (int i = 0; i < num_outbufs_; ++i) { int scanlines_to_copy = DivideAndRoundUp(lines_left, GetVertSubSampFactor(i)); - CopyPlane(databuf_[i], GetComponentStride(i), - planes[i], GetComponentWidth(i), - GetComponentWidth(i), scanlines_to_copy); + CopyPlane(databuf_[i], GetComponentStride(i), planes[i], + GetComponentWidth(i), GetComponentWidth(i), scanlines_to_copy); planes[i] += scanlines_to_copy * GetComponentWidth(i); } } return FinishDecode(); } -LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque, - int dst_width, int dst_height) { - if (dst_width != GetWidth() || - dst_height > GetHeight()) { +LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, + void* opaque, + int dst_width, + int dst_height) { + if (dst_width != GetWidth() || dst_height > GetHeight()) { // ERROR: Bad dimensions return LIBYUV_FALSE; } @@ -395,7 +391,7 @@ LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque, } // Read full MCUs until we get to the crop point. for (; lines_left >= GetImageScanlinesPerImcuRow(); - lines_left -= GetImageScanlinesPerImcuRow()) { + lines_left -= GetImageScanlinesPerImcuRow()) { if (!DecodeImcuRow()) { FinishDecode(); return LIBYUV_FALSE; @@ -435,22 +431,22 @@ void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT } void term_source(j_decompress_ptr cinfo) { - // Nothing to do. + (void)cinfo; // Nothing to do. } #ifdef HAVE_SETJMP void ErrorHandler(j_common_ptr cinfo) { - // This is called when a jpeglib command experiences an error. Unfortunately - // jpeglib's error handling model is not very flexible, because it expects the - // error handler to not return--i.e., it wants the program to terminate. To - // recover from errors we use setjmp() as shown in their example. setjmp() is - // C's implementation for the "call with current continuation" functionality - // seen in some functional programming languages. - // A formatted message can be output, but is unsafe for release. +// This is called when a jpeglib command experiences an error. Unfortunately +// jpeglib's error handling model is not very flexible, because it expects the +// error handler to not return--i.e., it wants the program to terminate. To +// recover from errors we use setjmp() as shown in their example. setjmp() is +// C's implementation for the "call with current continuation" functionality +// seen in some functional programming languages. +// A formatted message can be output, but is unsafe for release. #ifdef DEBUG char buf[JMSG_LENGTH_MAX]; (*cinfo->err->format_message)(cinfo, buf); - // ERROR: Error in jpeglib: buf +// ERROR: Error in jpeglib: buf #endif SetJmpErrorMgr* mgr = reinterpret_cast(cinfo->err); @@ -459,8 +455,9 @@ void ErrorHandler(j_common_ptr cinfo) { longjmp(mgr->setjmp_buffer, 1); } +// Suppress fprintf warnings. void OutputHandler(j_common_ptr cinfo) { - // Suppress fprintf warnings. + (void)cinfo; } #endif // HAVE_SETJMP @@ -472,9 +469,9 @@ void MJpegDecoder::AllocOutputBuffers(int num_outbufs) { // it. DestroyOutputBuffers(); - scanlines_ = new uint8** [num_outbufs]; + scanlines_ = new uint8_t**[num_outbufs]; scanlines_sizes_ = new int[num_outbufs]; - databuf_ = new uint8* [num_outbufs]; + databuf_ = new uint8_t*[num_outbufs]; databuf_strides_ = new int[num_outbufs]; for (int i = 0; i < num_outbufs; ++i) { @@ -490,13 +487,13 @@ void MJpegDecoder::AllocOutputBuffers(int num_outbufs) { void MJpegDecoder::DestroyOutputBuffers() { for (int i = 0; i < num_outbufs_; ++i) { - delete [] scanlines_[i]; - delete [] databuf_[i]; + delete[] scanlines_[i]; + delete[] databuf_[i]; } - delete [] scanlines_; - delete [] databuf_; - delete [] scanlines_sizes_; - delete [] databuf_strides_; + delete[] scanlines_; + delete[] databuf_; + delete[] scanlines_sizes_; + delete[] databuf_strides_; scanlines_ = NULL; databuf_ = NULL; scanlines_sizes_ = NULL; @@ -530,9 +527,9 @@ LIBYUV_BOOL MJpegDecoder::FinishDecode() { return LIBYUV_TRUE; } -void MJpegDecoder::SetScanlinePointers(uint8** data) { +void MJpegDecoder::SetScanlinePointers(uint8_t** data) { for (int i = 0; i < num_outbufs_; ++i) { - uint8* data_i = data[i]; + uint8_t* data_i = data[i]; for (int j = 0; j < scanlines_sizes_[i]; ++j) { scanlines_[i][j] = data_i; data_i += GetComponentStride(i); @@ -542,26 +539,26 @@ void MJpegDecoder::SetScanlinePointers(uint8** data) { inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() { return (unsigned int)(GetImageScanlinesPerImcuRow()) == - jpeg_read_raw_data(decompress_struct_, - scanlines_, - GetImageScanlinesPerImcuRow()); + jpeg_read_raw_data(decompress_struct_, scanlines_, + GetImageScanlinesPerImcuRow()); } // The helper function which recognizes the jpeg sub-sampling type. JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper( - int* subsample_x, int* subsample_y, int number_of_components) { + int* subsample_x, + int* subsample_y, + int number_of_components) { if (number_of_components == 3) { // Color images. - if (subsample_x[0] == 1 && subsample_y[0] == 1 && - subsample_x[1] == 2 && subsample_y[1] == 2 && - subsample_x[2] == 2 && subsample_y[2] == 2) { + if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 && + subsample_y[1] == 2 && subsample_x[2] == 2 && subsample_y[2] == 2) { return kJpegYuv420; - } else if (subsample_x[0] == 1 && subsample_y[0] == 1 && - subsample_x[1] == 2 && subsample_y[1] == 1 && - subsample_x[2] == 2 && subsample_y[2] == 1) { + } + if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 && + subsample_y[1] == 1 && subsample_x[2] == 2 && subsample_y[2] == 1) { return kJpegYuv422; - } else if (subsample_x[0] == 1 && subsample_y[0] == 1 && - subsample_x[1] == 1 && subsample_y[1] == 1 && - subsample_x[2] == 1 && subsample_y[2] == 1) { + } + if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 1 && + subsample_y[1] == 1 && subsample_x[2] == 1 && subsample_y[2] == 1) { return kJpegYuv444; } } else if (number_of_components == 1) { // Grey-scale images. @@ -574,4 +571,3 @@ JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper( } // namespace libyuv #endif // HAVE_JPEG - diff --git a/media/libvpx/libvpx/third_party/libyuv/source/mjpeg_validate.cc b/media/libvpx/libvpx/third_party/libyuv/source/mjpeg_validate.cc index 9c4883204517..80c2cc0cb9be 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/mjpeg_validate.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/mjpeg_validate.cc @@ -18,13 +18,13 @@ extern "C" { #endif // Helper function to scan for EOI marker (0xff 0xd9). -static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) { +static LIBYUV_BOOL ScanEOI(const uint8_t* sample, size_t sample_size) { if (sample_size >= 2) { - const uint8* end = sample + sample_size - 1; - const uint8* it = sample; + const uint8_t* end = sample + sample_size - 1; + const uint8_t* it = sample; while (it < end) { // TODO(fbarchard): scan for 0xd9 instead. - it = static_cast(memchr(it, 0xff, end - it)); + it = (const uint8_t*)(memchr(it, 0xff, end - it)); if (it == NULL) { break; } @@ -39,7 +39,7 @@ static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) { } // Helper function to validate the jpeg appears intact. -LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) { +LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size) { // Maximum size that ValidateJpeg will consider valid. const size_t kMaxJpegSize = 0x7fffffffull; const size_t kBackSearchSize = 1024; @@ -68,4 +68,3 @@ LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) { } // extern "C" } // namespace libyuv #endif - diff --git a/media/libvpx/libvpx/third_party/libyuv/source/planar_functions.cc b/media/libvpx/libvpx/third_party/libyuv/source/planar_functions.cc index a764f8da4721..5eae3f763a72 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/planar_functions.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/planar_functions.cc @@ -26,11 +26,14 @@ extern "C" { // Copy a plane of data LIBYUV_API -void CopyPlane(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height) { +void CopyPlane(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height) { int y; - void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; + void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; // Negative height means invert the image. if (height < 0) { height = -height; @@ -38,8 +41,7 @@ void CopyPlane(const uint8* src_y, int src_stride_y, dst_stride_y = -dst_stride_y; } // Coalesce rows. - if (src_stride_y == width && - dst_stride_y == width) { + if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; @@ -48,6 +50,7 @@ void CopyPlane(const uint8* src_y, int src_stride_y, if (src_y == dst_y && src_stride_y == dst_stride_y) { return; } + #if defined(HAS_COPYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; @@ -68,11 +71,6 @@ void CopyPlane(const uint8* src_y, int src_stride_y, CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif // Copy plane for (y = 0; y < height; ++y) { @@ -83,15 +81,18 @@ void CopyPlane(const uint8* src_y, int src_stride_y, } // TODO(fbarchard): Consider support for negative height. +// TODO(fbarchard): Consider stride measured in bytes. LIBYUV_API -void CopyPlane_16(const uint16* src_y, int src_stride_y, - uint16* dst_y, int dst_stride_y, - int width, int height) { +void CopyPlane_16(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int width, + int height) { int y; - void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C; + void (*CopyRow)(const uint16_t* src, uint16_t* dst, int width) = CopyRow_16_C; // Coalesce rows. - if (src_stride_y == width && - dst_stride_y == width) { + if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; @@ -111,11 +112,6 @@ void CopyPlane_16(const uint16* src_y, int src_stride_y, CopyRow = CopyRow_16_NEON; } #endif -#if defined(HAS_COPYROW_16_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_16_MIPS; - } -#endif // Copy plane for (y = 0; y < height; ++y) { @@ -125,19 +121,124 @@ void CopyPlane_16(const uint16* src_y, int src_stride_y, } } +// Convert a plane of 16 bit data to 8 bit +LIBYUV_API +void Convert16To8Plane(const uint16_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int scale, // 16384 for 10 bits + int width, + int height) { + int y; + void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale, + int width) = Convert16To8Row_C; + + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_stride_y = -dst_stride_y; + } + // Coalesce rows. + if (src_stride_y == width && dst_stride_y == width) { + width *= height; + height = 1; + src_stride_y = dst_stride_y = 0; + } +#if defined(HAS_CONVERT16TO8ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + Convert16To8Row = Convert16To8Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + Convert16To8Row = Convert16To8Row_SSSE3; + } + } +#endif +#if defined(HAS_CONVERT16TO8ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Convert16To8Row = Convert16To8Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + Convert16To8Row = Convert16To8Row_AVX2; + } + } +#endif + + // Convert plane + for (y = 0; y < height; ++y) { + Convert16To8Row(src_y, dst_y, scale, width); + src_y += src_stride_y; + dst_y += dst_stride_y; + } +} + +// Convert a plane of 8 bit data to 16 bit +LIBYUV_API +void Convert8To16Plane(const uint8_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int scale, // 16384 for 10 bits + int width, + int height) { + int y; + void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale, + int width) = Convert8To16Row_C; + + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_stride_y = -dst_stride_y; + } + // Coalesce rows. + if (src_stride_y == width && dst_stride_y == width) { + width *= height; + height = 1; + src_stride_y = dst_stride_y = 0; + } +#if defined(HAS_CONVERT8TO16ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + Convert8To16Row = Convert8To16Row_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + Convert8To16Row = Convert8To16Row_SSE2; + } + } +#endif +#if defined(HAS_CONVERT8TO16ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Convert8To16Row = Convert8To16Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + Convert8To16Row = Convert8To16Row_AVX2; + } + } +#endif + + // Convert plane + for (y = 0; y < height; ++y) { + Convert8To16Row(src_y, dst_y, scale, width); + src_y += src_stride_y; + dst_y += dst_stride_y; + } +} + // Copy I422. LIBYUV_API -int I422Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int I422Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int halfwidth = (width + 1) >> 1; - if (!src_u || !src_v || - !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -161,16 +262,21 @@ int I422Copy(const uint8* src_y, int src_stride_y, // Copy I444. LIBYUV_API -int I444Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_u || !src_v || - !dst_u || !dst_v || - width <= 0 || height == 0) { +int I444Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -194,9 +300,12 @@ int I444Copy(const uint8* src_y, int src_stride_y, // Copy I400. LIBYUV_API -int I400ToI400(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height) { +int I400ToI400(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height) { if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } @@ -212,11 +321,20 @@ int I400ToI400(const uint8* src_y, int src_stride_y, // Convert I420 to I400. LIBYUV_API -int I420ToI400(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - int width, int height) { +int I420ToI400(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height) { + (void)src_u; + (void)src_stride_u; + (void)src_v; + (void)src_stride_v; if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } @@ -234,12 +352,16 @@ int I420ToI400(const uint8* src_y, int src_stride_y, // Support function for NV12 etc UV channels. // Width and height are plane sizes (typically half pixel width). LIBYUV_API -void SplitUVPlane(const uint8* src_uv, int src_stride_uv, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +void SplitUVPlane(const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; - void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; // Negative height means invert the image. if (height < 0) { @@ -250,8 +372,7 @@ void SplitUVPlane(const uint8* src_uv, int src_stride_uv, dst_stride_v = -dst_stride_v; } // Coalesce rows. - if (src_stride_uv == width * 2 && - dst_stride_u == width && + if (src_stride_uv == width * 2 && dst_stride_u == width && dst_stride_v == width) { width *= height; height = 1; @@ -281,13 +402,11 @@ void SplitUVPlane(const uint8* src_uv, int src_stride_uv, } } #endif -#if defined(HAS_SPLITUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && - IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) && - IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) { - SplitUVRow = SplitUVRow_Any_DSPR2; - if (IS_ALIGNED(width, 16)) { - SplitUVRow = SplitUVRow_DSPR2; +#if defined(HAS_SPLITUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + SplitUVRow = SplitUVRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + SplitUVRow = SplitUVRow_MSA; } } #endif @@ -302,13 +421,17 @@ void SplitUVPlane(const uint8* src_uv, int src_stride_uv, } LIBYUV_API -void MergeUVPlane(const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_uv, int dst_stride_uv, - int width, int height) { +void MergeUVPlane(const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { int y; - void (*MergeUVRow)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) = MergeUVRow_C; + void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, + uint8_t* dst_uv, int width) = MergeUVRow_C; // Coalesce rows. // Negative height means invert the image. if (height < 0) { @@ -317,8 +440,7 @@ void MergeUVPlane(const uint8* src_u, int src_stride_u, dst_stride_uv = -dst_stride_uv; } // Coalesce rows. - if (src_stride_u == width && - src_stride_v == width && + if (src_stride_u == width && src_stride_v == width && dst_stride_uv == width * 2) { width *= height; height = 1; @@ -348,6 +470,14 @@ void MergeUVPlane(const uint8* src_u, int src_stride_u, } } #endif +#if defined(HAS_MERGEUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + MergeUVRow = MergeUVRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + MergeUVRow = MergeUVRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { // Merge a row of U and V into a row of UV. @@ -358,12 +488,131 @@ void MergeUVPlane(const uint8* src_u, int src_stride_u, } } -// Mirror a plane of data. -void MirrorPlane(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height) { +// Support function for NV12 etc RGB channels. +// Width and height are plane sizes (typically half pixel width). +LIBYUV_API +void SplitRGBPlane(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_r, + int dst_stride_r, + uint8_t* dst_g, + int dst_stride_g, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height) { int y; - void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C; + void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, + uint8_t* dst_b, int width) = SplitRGBRow_C; + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_r = dst_r + (height - 1) * dst_stride_r; + dst_g = dst_g + (height - 1) * dst_stride_g; + dst_b = dst_b + (height - 1) * dst_stride_b; + dst_stride_r = -dst_stride_r; + dst_stride_g = -dst_stride_g; + dst_stride_b = -dst_stride_b; + } + // Coalesce rows. + if (src_stride_rgb == width * 3 && dst_stride_r == width && + dst_stride_g == width && dst_stride_b == width) { + width *= height; + height = 1; + src_stride_rgb = dst_stride_r = dst_stride_g = dst_stride_b = 0; + } +#if defined(HAS_SPLITRGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + SplitRGBRow = SplitRGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + SplitRGBRow = SplitRGBRow_SSSE3; + } + } +#endif +#if defined(HAS_SPLITRGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + SplitRGBRow = SplitRGBRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + SplitRGBRow = SplitRGBRow_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + // Copy a row of RGB. + SplitRGBRow(src_rgb, dst_r, dst_g, dst_b, width); + dst_r += dst_stride_r; + dst_g += dst_stride_g; + dst_b += dst_stride_b; + src_rgb += src_stride_rgb; + } +} + +LIBYUV_API +void MergeRGBPlane(const uint8_t* src_r, + int src_stride_r, + const uint8_t* src_g, + int src_stride_g, + const uint8_t* src_b, + int src_stride_b, + uint8_t* dst_rgb, + int dst_stride_rgb, + int width, + int height) { + int y; + void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g, + const uint8_t* src_b, uint8_t* dst_rgb, int width) = + MergeRGBRow_C; + // Coalesce rows. + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb; + dst_stride_rgb = -dst_stride_rgb; + } + // Coalesce rows. + if (src_stride_r == width && src_stride_g == width && src_stride_b == width && + dst_stride_rgb == width * 3) { + width *= height; + height = 1; + src_stride_r = src_stride_g = src_stride_b = dst_stride_rgb = 0; + } +#if defined(HAS_MERGERGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + MergeRGBRow = MergeRGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + MergeRGBRow = MergeRGBRow_SSSE3; + } + } +#endif +#if defined(HAS_MERGERGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MergeRGBRow = MergeRGBRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + MergeRGBRow = MergeRGBRow_NEON; + } + } +#endif + + for (y = 0; y < height; ++y) { + // Merge a row of U and V into a row of RGB. + MergeRGBRow(src_r, src_g, src_b, dst_rgb, width); + src_r += src_stride_r; + src_g += src_stride_g; + src_b += src_stride_b; + dst_rgb += dst_stride_rgb; + } +} + +// Mirror a plane of data. +void MirrorPlane(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height) { + int y; + void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C; // Negative height means invert the image. if (height < 0) { height = -height; @@ -394,12 +643,12 @@ void MirrorPlane(const uint8* src_y, int src_stride_y, } } #endif -// TODO(fbarchard): Mirror on mips handle unaligned memory. -#if defined(HAS_MIRRORROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(dst_y, 4) && IS_ALIGNED(dst_stride_y, 4)) { - MirrorRow = MirrorRow_DSPR2; +#if defined(HAS_MIRRORROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + MirrorRow = MirrorRow_Any_MSA; + if (IS_ALIGNED(width, 64)) { + MirrorRow = MirrorRow_MSA; + } } #endif @@ -413,17 +662,24 @@ void MirrorPlane(const uint8* src_y, int src_stride_y, // Convert YUY2 to I422. LIBYUV_API -int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int YUY2ToI422(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; - void (*YUY2ToUV422Row)(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width) = - YUY2ToUV422Row_C; - void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) = + void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u, + uint8_t* dst_v, int width) = YUY2ToUV422Row_C; + void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = YUY2ToYRow_C; + if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -431,10 +687,9 @@ int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, src_stride_yuy2 = -src_stride_yuy2; } // Coalesce rows. - if (src_stride_yuy2 == width * 2 && - dst_stride_y == width && - dst_stride_u * 2 == width && - dst_stride_v * 2 == width) { + if (src_stride_yuy2 == width * 2 && dst_stride_y == width && + dst_stride_u * 2 == width && dst_stride_v * 2 == width && + width * height <= 32768) { width *= height; height = 1; src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0; @@ -462,15 +717,23 @@ int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, #if defined(HAS_YUY2TOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { YUY2ToYRow = YUY2ToYRow_Any_NEON; - if (width >= 16) { - YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; - } + YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; if (IS_ALIGNED(width, 16)) { YUY2ToYRow = YUY2ToYRow_NEON; YUY2ToUV422Row = YUY2ToUV422Row_NEON; } } #endif +#if defined(HAS_YUY2TOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + YUY2ToYRow = YUY2ToYRow_Any_MSA; + YUY2ToUV422Row = YUY2ToUV422Row_Any_MSA; + if (IS_ALIGNED(width, 32)) { + YUY2ToYRow = YUY2ToYRow_MSA; + YUY2ToUV422Row = YUY2ToUV422Row_MSA; + } + } +#endif for (y = 0; y < height; ++y) { YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width); @@ -485,17 +748,24 @@ int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, // Convert UYVY to I422. LIBYUV_API -int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int UYVYToI422(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; - void (*UYVYToUV422Row)(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width) = - UYVYToUV422Row_C; - void (*UYVYToYRow)(const uint8* src_uyvy, - uint8* dst_y, int width) = UYVYToYRow_C; + void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u, + uint8_t* dst_v, int width) = UYVYToUV422Row_C; + void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) = + UYVYToYRow_C; + if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -503,10 +773,9 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, src_stride_uyvy = -src_stride_uyvy; } // Coalesce rows. - if (src_stride_uyvy == width * 2 && - dst_stride_y == width && - dst_stride_u * 2 == width && - dst_stride_v * 2 == width) { + if (src_stride_uyvy == width * 2 && dst_stride_y == width && + dst_stride_u * 2 == width && dst_stride_v * 2 == width && + width * height <= 32768) { width *= height; height = 1; src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0; @@ -534,15 +803,23 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, #if defined(HAS_UYVYTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { UYVYToYRow = UYVYToYRow_Any_NEON; - if (width >= 16) { - UYVYToUV422Row = UYVYToUV422Row_Any_NEON; - } + UYVYToUV422Row = UYVYToUV422Row_Any_NEON; if (IS_ALIGNED(width, 16)) { UYVYToYRow = UYVYToYRow_NEON; UYVYToUV422Row = UYVYToUV422Row_NEON; } } #endif +#if defined(HAS_UYVYTOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + UYVYToYRow = UYVYToYRow_Any_MSA; + UYVYToUV422Row = UYVYToUV422Row_Any_MSA; + if (IS_ALIGNED(width, 32)) { + UYVYToYRow = UYVYToYRow_MSA; + UYVYToUV422Row = UYVYToUV422Row_MSA; + } + } +#endif for (y = 0; y < height; ++y) { UYVYToUV422Row(src_uyvy, dst_u, dst_v, width); @@ -555,13 +832,82 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, return 0; } +// Convert YUY2 to Y. +LIBYUV_API +int YUY2ToY(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height) { + int y; + void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = + YUY2ToYRow_C; + if (!src_yuy2 || !dst_y || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; + src_stride_yuy2 = -src_stride_yuy2; + } + // Coalesce rows. + if (src_stride_yuy2 == width * 2 && dst_stride_y == width) { + width *= height; + height = 1; + src_stride_yuy2 = dst_stride_y = 0; + } +#if defined(HAS_YUY2TOYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + YUY2ToYRow = YUY2ToYRow_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + YUY2ToYRow = YUY2ToYRow_SSE2; + } + } +#endif +#if defined(HAS_YUY2TOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + YUY2ToYRow = YUY2ToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + YUY2ToYRow = YUY2ToYRow_AVX2; + } + } +#endif +#if defined(HAS_YUY2TOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + YUY2ToYRow = YUY2ToYRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + YUY2ToYRow = YUY2ToYRow_NEON; + } + } +#endif +#if defined(HAS_YUY2TOYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + YUY2ToYRow = YUY2ToYRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + YUY2ToYRow = YUY2ToYRow_MSA; + } + } +#endif + + for (y = 0; y < height; ++y) { + YUY2ToYRow(src_yuy2, dst_y, width); + src_yuy2 += src_stride_yuy2; + dst_y += dst_stride_y; + } + return 0; +} + // Mirror I400 with optional flipping LIBYUV_API -int I400Mirror(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height) { - if (!src_y || !dst_y || - width <= 0 || height == 0) { +int I400Mirror(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height) { + if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -577,17 +923,24 @@ int I400Mirror(const uint8* src_y, int src_stride_y, // Mirror I420 with optional flipping LIBYUV_API -int I420Mirror(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int I420Mirror(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || width <= 0 || + height == 0) { return -1; } // Negative height means invert the image. @@ -612,11 +965,14 @@ int I420Mirror(const uint8* src_y, int src_stride_y, // ARGB mirror. LIBYUV_API -int ARGBMirror(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int ARGBMirror(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) = + void (*ARGBMirrorRow)(const uint8_t* src, uint8_t* dst, int width) = ARGBMirrorRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; @@ -651,6 +1007,14 @@ int ARGBMirror(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBMIRRORROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBMirrorRow = ARGBMirrorRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBMirrorRow = ARGBMirrorRow_MSA; + } + } +#endif // Mirror plane for (y = 0; y < height; ++y) { @@ -666,8 +1030,8 @@ int ARGBMirror(const uint8* src_argb, int src_stride_argb, // the same blend function for all pixels if possible. LIBYUV_API ARGBBlendRow GetARGBBlend() { - void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width) = ARGBBlendRow_C; + void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1, + uint8_t* dst_argb, int width) = ARGBBlendRow_C; #if defined(HAS_ARGBBLENDROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBBlendRow = ARGBBlendRow_SSSE3; @@ -678,19 +1042,28 @@ ARGBBlendRow GetARGBBlend() { if (TestCpuFlag(kCpuHasNEON)) { ARGBBlendRow = ARGBBlendRow_NEON; } +#endif +#if defined(HAS_ARGBBLENDROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBBlendRow = ARGBBlendRow_MSA; + } #endif return ARGBBlendRow; } // Alpha Blend 2 ARGB images and store to destination. LIBYUV_API -int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int ARGBBlend(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width) = GetARGBBlend(); + void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1, + uint8_t* dst_argb, int width) = GetARGBBlend(); if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -701,8 +1074,7 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, dst_stride_argb = -dst_stride_argb; } // Coalesce rows. - if (src_stride_argb0 == width * 4 && - src_stride_argb1 == width * 4 && + if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; @@ -720,14 +1092,20 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, // Alpha Blend plane and store to destination. LIBYUV_API -int BlendPlane(const uint8* src_y0, int src_stride_y0, - const uint8* src_y1, int src_stride_y1, - const uint8* alpha, int alpha_stride, - uint8* dst_y, int dst_stride_y, - int width, int height) { +int BlendPlane(const uint8_t* src_y0, + int src_stride_y0, + const uint8_t* src_y1, + int src_stride_y1, + const uint8_t* alpha, + int alpha_stride, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height) { int y; - void (*BlendPlaneRow)(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C; + void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1, + const uint8_t* alpha, uint8_t* dst, int width) = + BlendPlaneRow_C; if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) { return -1; } @@ -739,10 +1117,8 @@ int BlendPlane(const uint8* src_y0, int src_stride_y0, } // Coalesce rows for Y plane. - if (src_stride_y0 == width && - src_stride_y1 == width && - alpha_stride == width && - dst_stride_y == width) { + if (src_stride_y0 == width && src_stride_y1 == width && + alpha_stride == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0; @@ -750,7 +1126,7 @@ int BlendPlane(const uint8* src_y0, int src_stride_y0, #if defined(HAS_BLENDPLANEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - BlendPlaneRow = BlendPlaneRow_Any_SSSE3; + BlendPlaneRow = BlendPlaneRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { BlendPlaneRow = BlendPlaneRow_SSSE3; } @@ -758,7 +1134,7 @@ int BlendPlane(const uint8* src_y0, int src_stride_y0, #endif #if defined(HAS_BLENDPLANEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - BlendPlaneRow = BlendPlaneRow_Any_AVX2; + BlendPlaneRow = BlendPlaneRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { BlendPlaneRow = BlendPlaneRow_AVX2; } @@ -778,24 +1154,36 @@ int BlendPlane(const uint8* src_y0, int src_stride_y0, #define MAXTWIDTH 2048 // Alpha Blend YUV images and store to destination. LIBYUV_API -int I420Blend(const uint8* src_y0, int src_stride_y0, - const uint8* src_u0, int src_stride_u0, - const uint8* src_v0, int src_stride_v0, - const uint8* src_y1, int src_stride_y1, - const uint8* src_u1, int src_stride_u1, - const uint8* src_v1, int src_stride_v1, - const uint8* alpha, int alpha_stride, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { +int I420Blend(const uint8_t* src_y0, + int src_stride_y0, + const uint8_t* src_u0, + int src_stride_u0, + const uint8_t* src_v0, + int src_stride_v0, + const uint8_t* src_y1, + int src_stride_y1, + const uint8_t* src_u1, + int src_stride_u1, + const uint8_t* src_v1, + int src_stride_v1, + const uint8_t* alpha, + int alpha_stride, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int y; // Half width/height for UV. int halfwidth = (width + 1) >> 1; - void (*BlendPlaneRow)(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C; - void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C; + void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1, + const uint8_t* alpha, uint8_t* dst, int width) = + BlendPlaneRow_C; + void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width) = ScaleRowDown2Box_C; if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 || !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; @@ -809,11 +1197,8 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, } // Blend Y plane. - BlendPlane(src_y0, src_stride_y0, - src_y1, src_stride_y1, - alpha, alpha_stride, - dst_y, dst_stride_y, - width, height); + BlendPlane(src_y0, src_stride_y0, src_y1, src_stride_y1, alpha, alpha_stride, + dst_y, dst_stride_y, width, height); #if defined(HAS_BLENDPLANEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { @@ -893,13 +1278,17 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, // Multiply 2 ARGB images and store to destination. LIBYUV_API -int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int ARGBMultiply(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst, - int width) = ARGBMultiplyRow_C; + void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1, + uint8_t* dst, int width) = ARGBMultiplyRow_C; if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -910,8 +1299,7 @@ int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, dst_stride_argb = -dst_stride_argb; } // Coalesce rows. - if (src_stride_argb0 == width * 4 && - src_stride_argb1 == width * 4 && + if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; @@ -941,6 +1329,14 @@ int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, } } #endif +#if defined(HAS_ARGBMULTIPLYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBMultiplyRow = ARGBMultiplyRow_Any_MSA; + if (IS_ALIGNED(width, 4)) { + ARGBMultiplyRow = ARGBMultiplyRow_MSA; + } + } +#endif // Multiply plane for (y = 0; y < height; ++y) { @@ -954,12 +1350,16 @@ int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, // Add 2 ARGB images and store to destination. LIBYUV_API -int ARGBAdd(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int ARGBAdd(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst, + void (*ARGBAddRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst, int width) = ARGBAddRow_C; if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; @@ -971,8 +1371,7 @@ int ARGBAdd(const uint8* src_argb0, int src_stride_argb0, dst_stride_argb = -dst_stride_argb; } // Coalesce rows. - if (src_stride_argb0 == width * 4 && - src_stride_argb1 == width * 4 && + if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; @@ -1007,6 +1406,14 @@ int ARGBAdd(const uint8* src_argb0, int src_stride_argb0, } } #endif +#if defined(HAS_ARGBADDROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBAddRow = ARGBAddRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + ARGBAddRow = ARGBAddRow_MSA; + } + } +#endif // Add plane for (y = 0; y < height; ++y) { @@ -1020,13 +1427,17 @@ int ARGBAdd(const uint8* src_argb0, int src_stride_argb0, // Subtract 2 ARGB images and store to destination. LIBYUV_API -int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int ARGBSubtract(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst, - int width) = ARGBSubtractRow_C; + void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1, + uint8_t* dst, int width) = ARGBSubtractRow_C; if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1037,8 +1448,7 @@ int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0, dst_stride_argb = -dst_stride_argb; } // Coalesce rows. - if (src_stride_argb0 == width * 4 && - src_stride_argb1 == width * 4 && + if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; @@ -1068,6 +1478,14 @@ int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0, } } #endif +#if defined(HAS_ARGBSUBTRACTROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBSubtractRow = ARGBSubtractRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + ARGBSubtractRow = ARGBSubtractRow_MSA; + } + } +#endif // Subtract plane for (y = 0; y < height; ++y) { @@ -1079,21 +1497,23 @@ int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0, return 0; } // Convert I422 to RGBA with matrix -static int I422ToRGBAMatrix(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgba, int dst_stride_rgba, +static int I422ToRGBAMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgba, + int dst_stride_rgba, const struct YuvConstants* yuvconstants, - int width, int height) { + int width, + int height) { int y; - void (*I422ToRGBARow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = I422ToRGBARow_C; - if (!src_y || !src_u || !src_v || !dst_rgba || - width <= 0 || height == 0) { + void (*I422ToRGBARow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I422ToRGBARow_C; + if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1126,13 +1546,12 @@ static int I422ToRGBAMatrix(const uint8* src_y, int src_stride_y, } } #endif -#if defined(HAS_I422TORGBAROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) { - I422ToRGBARow = I422ToRGBARow_DSPR2; +#if defined(HAS_I422TORGBAROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToRGBARow = I422ToRGBARow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I422ToRGBARow = I422ToRGBARow_MSA; + } } #endif @@ -1148,48 +1567,55 @@ static int I422ToRGBAMatrix(const uint8* src_y, int src_stride_y, // Convert I422 to RGBA. LIBYUV_API -int I422ToRGBA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height) { - return I422ToRGBAMatrix(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_rgba, dst_stride_rgba, - &kYuvI601Constants, - width, height); +int I422ToRGBA(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height) { + return I422ToRGBAMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_rgba, dst_stride_rgba, + &kYuvI601Constants, width, height); } // Convert I422 to BGRA. LIBYUV_API -int I422ToBGRA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_bgra, int dst_stride_bgra, - int width, int height) { - return I422ToRGBAMatrix(src_y, src_stride_y, - src_v, src_stride_v, // Swap U and V - src_u, src_stride_u, - dst_bgra, dst_stride_bgra, +int I422ToBGRA(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_bgra, + int dst_stride_bgra, + int width, + int height) { + return I422ToRGBAMatrix(src_y, src_stride_y, src_v, + src_stride_v, // Swap U and V + src_u, src_stride_u, dst_bgra, dst_stride_bgra, &kYvuI601Constants, // Use Yvu matrix width, height); } // Convert NV12 to RGB565. LIBYUV_API -int NV12ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height) { +int NV12ToRGB565(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height) { int y; - void (*NV12ToRGB565Row)(const uint8* y_buf, - const uint8* uv_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) = NV12ToRGB565Row_C; - if (!src_y || !src_uv || !dst_rgb565 || - width <= 0 || height == 0) { + void (*NV12ToRGB565Row)( + const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C; + if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1222,6 +1648,14 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, } } #endif +#if defined(HAS_NV12TORGB565ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + NV12ToRGB565Row = NV12ToRGB565Row_Any_MSA; + if (IS_ALIGNED(width, 8)) { + NV12ToRGB565Row = NV12ToRGB565Row_MSA; + } + } +#endif for (y = 0; y < height; ++y) { NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvI601Constants, width); @@ -1236,14 +1670,16 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, // Convert RAW to RGB24. LIBYUV_API -int RAWToRGB24(const uint8* src_raw, int src_stride_raw, - uint8* dst_rgb24, int dst_stride_rgb24, - int width, int height) { +int RAWToRGB24(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height) { int y; - void (*RAWToRGB24Row)(const uint8* src_rgb, uint8* dst_rgb24, int width) = + void (*RAWToRGB24Row)(const uint8_t* src_rgb, uint8_t* dst_rgb24, int width) = RAWToRGB24Row_C; - if (!src_raw || !dst_rgb24 || - width <= 0 || height == 0) { + if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1253,8 +1689,7 @@ int RAWToRGB24(const uint8* src_raw, int src_stride_raw, src_stride_raw = -src_stride_raw; } // Coalesce rows. - if (src_stride_raw == width * 3 && - dst_stride_rgb24 == width * 3) { + if (src_stride_raw == width * 3 && dst_stride_rgb24 == width * 3) { width *= height; height = 1; src_stride_raw = dst_stride_rgb24 = 0; @@ -1275,6 +1710,14 @@ int RAWToRGB24(const uint8* src_raw, int src_stride_raw, } } #endif +#if defined(HAS_RAWTORGB24ROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RAWToRGB24Row = RAWToRGB24Row_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RAWToRGB24Row = RAWToRGB24Row_MSA; + } + } +#endif for (y = 0; y < height; ++y) { RAWToRGB24Row(src_raw, dst_rgb24, width); @@ -1285,11 +1728,13 @@ int RAWToRGB24(const uint8* src_raw, int src_stride_raw, } LIBYUV_API -void SetPlane(uint8* dst_y, int dst_stride_y, - int width, int height, - uint32 value) { +void SetPlane(uint8_t* dst_y, + int dst_stride_y, + int width, + int height, + uint32_t value) { int y; - void (*SetRow)(uint8* dst, uint8 value, int width) = SetRow_C; + void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C; if (height < 0) { height = -height; dst_y = dst_y + (height - 1) * dst_stride_y; @@ -1322,6 +1767,11 @@ void SetPlane(uint8* dst_y, int dst_stride_y, SetRow = SetRow_ERMS; } #endif +#if defined(HAS_SETROW_MSA) + if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 16)) { + SetRow = SetRow_MSA; + } +#endif // Set plane for (y = 0; y < height; ++y) { @@ -1332,22 +1782,26 @@ void SetPlane(uint8* dst_y, int dst_stride_y, // Draw a rectangle into I420 LIBYUV_API -int I420Rect(uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int x, int y, - int width, int height, - int value_y, int value_u, int value_v) { +int I420Rect(uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int x, + int y, + int width, + int height, + int value_y, + int value_u, + int value_v) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - uint8* start_y = dst_y + y * dst_stride_y + x; - uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2); - uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2); - if (!dst_y || !dst_u || !dst_v || - width <= 0 || height == 0 || - x < 0 || y < 0 || - value_y < 0 || value_y > 255 || - value_u < 0 || value_u > 255 || + uint8_t* start_y = dst_y + y * dst_stride_y + x; + uint8_t* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2); + uint8_t* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2); + if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 || + y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 || value_v < 0 || value_v > 255) { return -1; } @@ -1360,15 +1814,17 @@ int I420Rect(uint8* dst_y, int dst_stride_y, // Draw a rectangle into ARGB LIBYUV_API -int ARGBRect(uint8* dst_argb, int dst_stride_argb, - int dst_x, int dst_y, - int width, int height, - uint32 value) { +int ARGBRect(uint8_t* dst_argb, + int dst_stride_argb, + int dst_x, + int dst_y, + int width, + int height, + uint32_t value) { int y; - void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int width) = ARGBSetRow_C; - if (!dst_argb || - width <= 0 || height == 0 || - dst_x < 0 || dst_y < 0) { + void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) = + ARGBSetRow_C; + if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) { return -1; } if (height < 0) { @@ -1397,6 +1853,14 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb, ARGBSetRow = ARGBSetRow_X86; } #endif +#if defined(HAS_ARGBSETROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBSetRow = ARGBSetRow_Any_MSA; + if (IS_ALIGNED(width, 4)) { + ARGBSetRow = ARGBSetRow_MSA; + } + } +#endif // Set plane for (y = 0; y < height; ++y) { @@ -1420,11 +1884,14 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb, // f is foreground pixel premultiplied by alpha LIBYUV_API -int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int ARGBAttenuate(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, + void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1435,8 +1902,7 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -1465,6 +1931,14 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBATTENUATEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { ARGBAttenuateRow(src_argb, dst_argb, width); @@ -1476,11 +1950,14 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, // Convert preattentuated ARGB to unattenuated ARGB. LIBYUV_API -int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int ARGBUnattenuate(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb, + void (*ARGBUnattenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBUnattenuateRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1491,8 +1968,7 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -1513,7 +1989,7 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, } } #endif -// TODO(fbarchard): Neon version. + // TODO(fbarchard): Neon version. for (y = 0; y < height; ++y) { ARGBUnattenuateRow(src_argb, dst_argb, width); @@ -1525,12 +2001,15 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, // Convert ARGB to Grayed ARGB. LIBYUV_API -int ARGBGrayTo(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int ARGBGrayTo(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, - int width) = ARGBGrayRow_C; + void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = + ARGBGrayRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -1540,8 +2019,7 @@ int ARGBGrayTo(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -1556,6 +2034,11 @@ int ARGBGrayTo(const uint8* src_argb, int src_stride_argb, ARGBGrayRow = ARGBGrayRow_NEON; } #endif +#if defined(HAS_ARGBGRAYROW_MSA) + if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { + ARGBGrayRow = ARGBGrayRow_MSA; + } +#endif for (y = 0; y < height; ++y) { ARGBGrayRow(src_argb, dst_argb, width); @@ -1567,13 +2050,16 @@ int ARGBGrayTo(const uint8* src_argb, int src_stride_argb, // Make a rectangle of ARGB gray scale. LIBYUV_API -int ARGBGray(uint8* dst_argb, int dst_stride_argb, - int dst_x, int dst_y, - int width, int height) { +int ARGBGray(uint8_t* dst_argb, + int dst_stride_argb, + int dst_x, + int dst_y, + int width, + int height) { int y; - void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, - int width) = ARGBGrayRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; + void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = + ARGBGrayRow_C; + uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; } @@ -1593,6 +2079,12 @@ int ARGBGray(uint8* dst_argb, int dst_stride_argb, ARGBGrayRow = ARGBGrayRow_NEON; } #endif +#if defined(HAS_ARGBGRAYROW_MSA) + if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { + ARGBGrayRow = ARGBGrayRow_MSA; + } +#endif + for (y = 0; y < height; ++y) { ARGBGrayRow(dst, dst, width); dst += dst_stride_argb; @@ -1602,11 +2094,15 @@ int ARGBGray(uint8* dst_argb, int dst_stride_argb, // Make a rectangle of ARGB Sepia tone. LIBYUV_API -int ARGBSepia(uint8* dst_argb, int dst_stride_argb, - int dst_x, int dst_y, int width, int height) { +int ARGBSepia(uint8_t* dst_argb, + int dst_stride_argb, + int dst_x, + int dst_y, + int width, + int height) { int y; - void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; + void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C; + uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; } @@ -1626,6 +2122,12 @@ int ARGBSepia(uint8* dst_argb, int dst_stride_argb, ARGBSepiaRow = ARGBSepiaRow_NEON; } #endif +#if defined(HAS_ARGBSEPIAROW_MSA) + if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { + ARGBSepiaRow = ARGBSepiaRow_MSA; + } +#endif + for (y = 0; y < height; ++y) { ARGBSepiaRow(dst, width); dst += dst_stride_argb; @@ -1636,13 +2138,17 @@ int ARGBSepia(uint8* dst_argb, int dst_stride_argb, // Apply a 4x4 matrix to each ARGB pixel. // Note: Normally for shading, but can be used to swizzle or invert. LIBYUV_API -int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const int8* matrix_argb, - int width, int height) { +int ARGBColorMatrix(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + const int8_t* matrix_argb, + int width, + int height) { int y; - void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width) = ARGBColorMatrixRow_C; + void (*ARGBColorMatrixRow)(const uint8_t* src_argb, uint8_t* dst_argb, + const int8_t* matrix_argb, int width) = + ARGBColorMatrixRow_C; if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) { return -1; } @@ -1652,8 +2158,7 @@ int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -1667,6 +2172,11 @@ int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb, if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { ARGBColorMatrixRow = ARGBColorMatrixRow_NEON; } +#endif +#if defined(HAS_ARGBCOLORMATRIXROW_MSA) + if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { + ARGBColorMatrixRow = ARGBColorMatrixRow_MSA; + } #endif for (y = 0; y < height; ++y) { ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width); @@ -1679,13 +2189,17 @@ int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb, // Apply a 4x3 matrix to each ARGB pixel. // Deprecated. LIBYUV_API -int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb, - const int8* matrix_rgb, - int dst_x, int dst_y, int width, int height) { - SIMD_ALIGNED(int8 matrix_argb[16]); - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; - if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || - dst_x < 0 || dst_y < 0) { +int RGBColorMatrix(uint8_t* dst_argb, + int dst_stride_argb, + const int8_t* matrix_rgb, + int dst_x, + int dst_y, + int width, + int height) { + SIMD_ALIGNED(int8_t matrix_argb[16]); + uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; + if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || dst_x < 0 || + dst_y < 0) { return -1; } @@ -1705,23 +2219,26 @@ int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb, matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0; matrix_argb[15] = 64; // 1.0 - return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb, - dst, dst_stride_argb, - &matrix_argb[0], width, height); + return ARGBColorMatrix((const uint8_t*)(dst), dst_stride_argb, dst, + dst_stride_argb, &matrix_argb[0], width, height); } // Apply a color table each ARGB pixel. // Table contains 256 ARGB values. LIBYUV_API -int ARGBColorTable(uint8* dst_argb, int dst_stride_argb, - const uint8* table_argb, - int dst_x, int dst_y, int width, int height) { +int ARGBColorTable(uint8_t* dst_argb, + int dst_stride_argb, + const uint8_t* table_argb, + int dst_x, + int dst_y, + int width, + int height) { int y; - void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, + void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb, int width) = ARGBColorTableRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; - if (!dst_argb || !table_argb || width <= 0 || height <= 0 || - dst_x < 0 || dst_y < 0) { + uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; + if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 || + dst_y < 0) { return -1; } // Coalesce rows. @@ -1745,15 +2262,19 @@ int ARGBColorTable(uint8* dst_argb, int dst_stride_argb, // Apply a color table each ARGB pixel but preserve destination alpha. // Table contains 256 ARGB values. LIBYUV_API -int RGBColorTable(uint8* dst_argb, int dst_stride_argb, - const uint8* table_argb, - int dst_x, int dst_y, int width, int height) { +int RGBColorTable(uint8_t* dst_argb, + int dst_stride_argb, + const uint8_t* table_argb, + int dst_x, + int dst_y, + int width, + int height) { int y; - void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, + void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb, int width) = RGBColorTableRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; - if (!dst_argb || !table_argb || width <= 0 || height <= 0 || - dst_x < 0 || dst_y < 0) { + uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; + if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 || + dst_y < 0) { return -1; } // Coalesce rows. @@ -1784,13 +2305,19 @@ int RGBColorTable(uint8* dst_argb, int dst_stride_argb, // Caveat - although SSE2 saturates, the C function does not and should be used // with care if doing anything but quantization. LIBYUV_API -int ARGBQuantize(uint8* dst_argb, int dst_stride_argb, - int scale, int interval_size, int interval_offset, - int dst_x, int dst_y, int width, int height) { +int ARGBQuantize(uint8_t* dst_argb, + int dst_stride_argb, + int scale, + int interval_size, + int interval_offset, + int dst_x, + int dst_y, + int width, + int height) { int y; - void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size, + void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size, int interval_offset, int width) = ARGBQuantizeRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; + uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 || interval_size < 1 || interval_size > 255) { return -1; @@ -1810,6 +2337,11 @@ int ARGBQuantize(uint8* dst_argb, int dst_stride_argb, if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { ARGBQuantizeRow = ARGBQuantizeRow_NEON; } +#endif +#if defined(HAS_ARGBQUANTIZEROW_MSA) + if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { + ARGBQuantizeRow = ARGBQuantizeRow_MSA; + } #endif for (y = 0; y < height; ++y) { ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width); @@ -1821,13 +2353,17 @@ int ARGBQuantize(uint8* dst_argb, int dst_stride_argb, // Computes table of cumulative sum for image where the value is the sum // of all values above and to the left of the entry. Used by ARGBBlur. LIBYUV_API -int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, - int32* dst_cumsum, int dst_stride32_cumsum, - int width, int height) { +int ARGBComputeCumulativeSum(const uint8_t* src_argb, + int src_stride_argb, + int32_t* dst_cumsum, + int dst_stride32_cumsum, + int width, + int height) { int y; - void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; - int32* previous_cumsum = dst_cumsum; + void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum, + const int32_t* previous_cumsum, int width) = + ComputeCumulativeSumRow_C; + int32_t* previous_cumsum = dst_cumsum; if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) { return -1; } @@ -1851,18 +2387,25 @@ int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory // as the buffer is treated as circular. LIBYUV_API -int ARGBBlur(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int32* dst_cumsum, int dst_stride32_cumsum, - int width, int height, int radius) { +int ARGBBlur(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int32_t* dst_cumsum, + int dst_stride32_cumsum, + int width, + int height, + int radius) { int y; - void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum, - const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; - void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C; - int32* cumsum_bot_row; - int32* max_cumsum_bot_row; - int32* cumsum_top_row; + void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum, + const int32_t* previous_cumsum, int width) = + ComputeCumulativeSumRow_C; + void (*CumulativeSumToAverageRow)( + const int32_t* topleft, const int32_t* botleft, int width, int area, + uint8_t* dst, int count) = CumulativeSumToAverageRow_C; + int32_t* cumsum_bot_row; + int32_t* max_cumsum_bot_row; + int32_t* cumsum_top_row; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; @@ -1889,9 +2432,8 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, #endif // Compute enough CumulativeSum for first row to be blurred. After this // one row of CumulativeSum is updated at a time. - ARGBComputeCumulativeSum(src_argb, src_stride_argb, - dst_cumsum, dst_stride32_cumsum, - width, radius); + ARGBComputeCumulativeSum(src_argb, src_stride_argb, dst_cumsum, + dst_stride32_cumsum, width, radius); src_argb = src_argb + radius * src_stride_argb; cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum]; @@ -1917,7 +2459,7 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, // Increment cumsum_bot_row pointer with circular buffer wrap around and // then fill in a row of CumulativeSum. if ((y + radius) < height) { - const int32* prev_cumsum_bot_row = cumsum_bot_row; + const int32_t* prev_cumsum_bot_row = cumsum_bot_row; cumsum_bot_row += dst_stride32_cumsum; if (cumsum_bot_row >= max_cumsum_bot_row) { cumsum_bot_row = dst_cumsum; @@ -1929,24 +2471,24 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, // Left clipped. for (x = 0; x < radius + 1; ++x) { - CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, - boxwidth, area, &dst_argb[x * 4], 1); + CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area, + &dst_argb[x * 4], 1); area += (bot_y - top_y); boxwidth += 4; } // Middle unclipped. n = (width - 1) - radius - x + 1; - CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, - boxwidth, area, &dst_argb[x * 4], n); + CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area, + &dst_argb[x * 4], n); // Right clipped. for (x += n; x <= width - 1; ++x) { area -= (bot_y - top_y); boxwidth -= 4; CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4, - cumsum_bot_row + (x - radius - 1) * 4, - boxwidth, area, &dst_argb[x * 4], 1); + cumsum_bot_row + (x - radius - 1) * 4, boxwidth, + area, &dst_argb[x * 4], 1); } dst_argb += dst_stride_argb; } @@ -1955,12 +2497,16 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, // Multiply ARGB image by a specified ARGB value. LIBYUV_API -int ARGBShade(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height, uint32 value) { +int ARGBShade(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + uint32_t value) { int y; - void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb, - int width, uint32 value) = ARGBShadeRow_C; + void (*ARGBShadeRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width, + uint32_t value) = ARGBShadeRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) { return -1; } @@ -1970,8 +2516,7 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -1986,6 +2531,11 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb, ARGBShadeRow = ARGBShadeRow_NEON; } #endif +#if defined(HAS_ARGBSHADEROW_MSA) + if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 4)) { + ARGBShadeRow = ARGBShadeRow_MSA; + } +#endif for (y = 0; y < height; ++y) { ARGBShadeRow(src_argb, dst_argb, width, value); @@ -1997,12 +2547,17 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb, // Interpolate 2 planes by specified amount (0 to 255). LIBYUV_API -int InterpolatePlane(const uint8* src0, int src_stride0, - const uint8* src1, int src_stride1, - uint8* dst, int dst_stride, - int width, int height, int interpolation) { +int InterpolatePlane(const uint8_t* src0, + int src_stride0, + const uint8_t* src1, + int src_stride1, + uint8_t* dst, + int dst_stride, + int width, + int height, + int interpolation) { int y; - void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, + void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; if (!src0 || !src1 || !dst || width <= 0 || height == 0) { @@ -2015,9 +2570,7 @@ int InterpolatePlane(const uint8* src0, int src_stride0, dst_stride = -dst_stride; } // Coalesce rows. - if (src_stride0 == width && - src_stride1 == width && - dst_stride == width) { + if (src_stride0 == width && src_stride1 == width && dst_stride == width) { width *= height; height = 1; src_stride0 = src_stride1 = dst_stride = 0; @@ -2046,13 +2599,12 @@ int InterpolatePlane(const uint8* src0, int src_stride0, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && - IS_ALIGNED(src0, 4) && IS_ALIGNED(src_stride0, 4) && - IS_ALIGNED(src1, 4) && IS_ALIGNED(src_stride1, 4) && - IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4) && - IS_ALIGNED(width, 4)) { - InterpolateRow = InterpolateRow_DSPR2; +#if defined(HAS_INTERPOLATEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + InterpolateRow = InterpolateRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + InterpolateRow = InterpolateRow_MSA; + } } #endif @@ -2067,61 +2619,71 @@ int InterpolatePlane(const uint8* src0, int src_stride0, // Interpolate 2 ARGB images by specified amount (0 to 255). LIBYUV_API -int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height, int interpolation) { - return InterpolatePlane(src_argb0, src_stride_argb0, - src_argb1, src_stride_argb1, - dst_argb, dst_stride_argb, +int ARGBInterpolate(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + int interpolation) { + return InterpolatePlane(src_argb0, src_stride_argb0, src_argb1, + src_stride_argb1, dst_argb, dst_stride_argb, width * 4, height, interpolation); } // Interpolate 2 YUV images by specified amount (0 to 255). LIBYUV_API -int I420Interpolate(const uint8* src0_y, int src0_stride_y, - const uint8* src0_u, int src0_stride_u, - const uint8* src0_v, int src0_stride_v, - const uint8* src1_y, int src1_stride_y, - const uint8* src1_u, int src1_stride_u, - const uint8* src1_v, int src1_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height, int interpolation) { +int I420Interpolate(const uint8_t* src0_y, + int src0_stride_y, + const uint8_t* src0_u, + int src0_stride_u, + const uint8_t* src0_v, + int src0_stride_v, + const uint8_t* src1_y, + int src1_stride_y, + const uint8_t* src1_u, + int src1_stride_u, + const uint8_t* src1_v, + int src1_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + int interpolation) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!src0_y || !src0_u || !src0_v || - !src1_y || !src1_u || !src1_v || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (!src0_y || !src0_u || !src0_v || !src1_y || !src1_u || !src1_v || + !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } - InterpolatePlane(src0_y, src0_stride_y, - src1_y, src1_stride_y, - dst_y, dst_stride_y, - width, height, interpolation); - InterpolatePlane(src0_u, src0_stride_u, - src1_u, src1_stride_u, - dst_u, dst_stride_u, - halfwidth, halfheight, interpolation); - InterpolatePlane(src0_v, src0_stride_v, - src1_v, src1_stride_v, - dst_v, dst_stride_v, - halfwidth, halfheight, interpolation); + InterpolatePlane(src0_y, src0_stride_y, src1_y, src1_stride_y, dst_y, + dst_stride_y, width, height, interpolation); + InterpolatePlane(src0_u, src0_stride_u, src1_u, src1_stride_u, dst_u, + dst_stride_u, halfwidth, halfheight, interpolation); + InterpolatePlane(src0_v, src0_stride_v, src1_v, src1_stride_v, dst_v, + dst_stride_v, halfwidth, halfheight, interpolation); return 0; } // Shuffle ARGB channel order. e.g. BGRA to ARGB. LIBYUV_API -int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_argb, int dst_stride_argb, - const uint8* shuffler, int width, int height) { +int ARGBShuffle(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_argb, + int dst_stride_argb, + const uint8_t* shuffler, + int width, + int height) { int y; - void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb, - const uint8* shuffler, int width) = ARGBShuffleRow_C; - if (!src_bgra || !dst_argb || - width <= 0 || height == 0) { + void (*ARGBShuffleRow)(const uint8_t* src_bgra, uint8_t* dst_argb, + const uint8_t* shuffler, int width) = ARGBShuffleRow_C; + if (!src_bgra || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -2131,20 +2693,11 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, src_stride_bgra = -src_stride_bgra; } // Coalesce rows. - if (src_stride_bgra == width * 4 && - dst_stride_argb == width * 4) { + if (src_stride_bgra == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_bgra = dst_stride_argb = 0; } -#if defined(HAS_ARGBSHUFFLEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - ARGBShuffleRow = ARGBShuffleRow_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBShuffleRow = ARGBShuffleRow_SSE2; - } - } -#endif #if defined(HAS_ARGBSHUFFLEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3; @@ -2169,6 +2722,14 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, } } #endif +#if defined(HAS_ARGBSHUFFLEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBShuffleRow = ARGBShuffleRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + ARGBShuffleRow = ARGBShuffleRow_MSA; + } + } +#endif for (y = 0; y < height; ++y) { ARGBShuffleRow(src_bgra, dst_argb, shuffler, width); @@ -2179,28 +2740,32 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, } // Sobel ARGB effect. -static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height, - void (*SobelRow)(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst, int width)) { +static int ARGBSobelize(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + void (*SobelRow)(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst, + int width)) { int y; - void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int width) = + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) = ARGBToYJRow_C; - void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width) = SobelYRow_C; - void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobely, int width) = + void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1, + uint8_t* dst_sobely, int width) = SobelYRow_C; + void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1, + const uint8_t* src_y2, uint8_t* dst_sobely, int width) = SobelXRow_C; const int kEdge = 16; // Extra pixels at start of row for extrude/align. - if (!src_argb || !dst_argb || width <= 0 || height == 0) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } @@ -2228,6 +2793,14 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOYJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYJRow = ARGBToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_MSA; + } + } +#endif #if defined(HAS_SOBELYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { @@ -2239,6 +2812,11 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, SobelYRow = SobelYRow_NEON; } #endif +#if defined(HAS_SOBELYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + SobelYRow = SobelYRow_MSA; + } +#endif #if defined(HAS_SOBELXROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelXRow = SobelXRow_SSE2; @@ -2248,19 +2826,24 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, if (TestCpuFlag(kCpuHasNEON)) { SobelXRow = SobelXRow_NEON; } +#endif +#if defined(HAS_SOBELXROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + SobelXRow = SobelXRow_MSA; + } #endif { // 3 rows with edges before/after. const int kRowSize = (width + kEdge + 31) & ~31; align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); - uint8* row_sobelx = rows; - uint8* row_sobely = rows + kRowSize; - uint8* row_y = rows + kRowSize * 2; + uint8_t* row_sobelx = rows; + uint8_t* row_sobely = rows + kRowSize; + uint8_t* row_y = rows + kRowSize * 2; // Convert first row. - uint8* row_y0 = row_y + kEdge; - uint8* row_y1 = row_y0 + kRowSize; - uint8* row_y2 = row_y1 + kRowSize; + uint8_t* row_y0 = row_y + kEdge; + uint8_t* row_y1 = row_y0 + kRowSize; + uint8_t* row_y2 = row_y1 + kRowSize; ARGBToYJRow(src_argb, row_y0, width); row_y0[-1] = row_y0[0]; memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. @@ -2284,7 +2867,7 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, // Cycle thru circular queue of 3 row_y buffers. { - uint8* row_yt = row_y0; + uint8_t* row_yt = row_y0; row_y0 = row_y1; row_y1 = row_y2; row_y2 = row_yt; @@ -2299,11 +2882,14 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, // Sobel ARGB effect. LIBYUV_API -int ARGBSobel(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) = SobelRow_C; +int ARGBSobel(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + void (*SobelRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely, + uint8_t* dst_argb, int width) = SobelRow_C; #if defined(HAS_SOBELROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelRow = SobelRow_Any_SSE2; @@ -2319,6 +2905,14 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb, SobelRow = SobelRow_NEON; } } +#endif +#if defined(HAS_SOBELROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + SobelRow = SobelRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + SobelRow = SobelRow_MSA; + } + } #endif return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height, SobelRow); @@ -2326,11 +2920,14 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb, // Sobel ARGB effect with planar output. LIBYUV_API -int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - int width, int height) { - void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_, int width) = SobelToPlaneRow_C; +int ARGBSobelToPlane(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height) { + void (*SobelToPlaneRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely, + uint8_t* dst_, int width) = SobelToPlaneRow_C; #if defined(HAS_SOBELTOPLANEROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelToPlaneRow = SobelToPlaneRow_Any_SSE2; @@ -2347,18 +2944,29 @@ int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb, } } #endif - return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, - width, height, SobelToPlaneRow); +#if defined(HAS_SOBELTOPLANEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + SobelToPlaneRow = SobelToPlaneRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + SobelToPlaneRow = SobelToPlaneRow_MSA; + } + } +#endif + return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width, + height, SobelToPlaneRow); } // SobelXY ARGB effect. // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel. LIBYUV_API -int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) = SobelXYRow_C; +int ARGBSobelXY(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { + void (*SobelXYRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely, + uint8_t* dst_argb, int width) = SobelXYRow_C; #if defined(HAS_SOBELXYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelXYRow = SobelXYRow_Any_SSE2; @@ -2374,6 +2982,14 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, SobelXYRow = SobelXYRow_NEON; } } +#endif +#if defined(HAS_SOBELXYROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + SobelXYRow = SobelXYRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + SobelXYRow = SobelXYRow_MSA; + } + } #endif return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height, SobelXYRow); @@ -2381,26 +2997,27 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, // Apply a 4x4 polynomial to each ARGB pixel. LIBYUV_API -int ARGBPolynomial(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, +int ARGBPolynomial(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, const float* poly, - int width, int height) { + int width, + int height) { int y; - void (*ARGBPolynomialRow)(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width) = ARGBPolynomialRow_C; + void (*ARGBPolynomialRow)(const uint8_t* src_argb, uint8_t* dst_argb, + const float* poly, int width) = ARGBPolynomialRow_C; if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -2425,28 +3042,132 @@ int ARGBPolynomial(const uint8* src_argb, int src_stride_argb, return 0; } +// Convert plane of 16 bit shorts to half floats. +// Source values are multiplied by scale before storing as half float. +LIBYUV_API +int HalfFloatPlane(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + float scale, + int width, + int height) { + int y; + void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale, + int width) = HalfFloatRow_C; + if (!src_y || !dst_y || width <= 0 || height == 0) { + return -1; + } + src_stride_y >>= 1; + dst_stride_y >>= 1; + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_stride_y = -src_stride_y; + } + // Coalesce rows. + if (src_stride_y == width && dst_stride_y == width) { + width *= height; + height = 1; + src_stride_y = dst_stride_y = 0; + } +#if defined(HAS_HALFFLOATROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + HalfFloatRow = HalfFloatRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + HalfFloatRow = HalfFloatRow_SSE2; + } + } +#endif +#if defined(HAS_HALFFLOATROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + HalfFloatRow = HalfFloatRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + HalfFloatRow = HalfFloatRow_AVX2; + } + } +#endif +#if defined(HAS_HALFFLOATROW_F16C) + if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasF16C)) { + HalfFloatRow = + (scale == 1.0f) ? HalfFloat1Row_Any_F16C : HalfFloatRow_Any_F16C; + if (IS_ALIGNED(width, 16)) { + HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_F16C : HalfFloatRow_F16C; + } + } +#endif +#if defined(HAS_HALFFLOATROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + HalfFloatRow = + (scale == 1.0f) ? HalfFloat1Row_Any_NEON : HalfFloatRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_NEON : HalfFloatRow_NEON; + } + } +#endif +#if defined(HAS_HALFFLOATROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + HalfFloatRow = HalfFloatRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + HalfFloatRow = HalfFloatRow_MSA; + } + } +#endif + + for (y = 0; y < height; ++y) { + HalfFloatRow(src_y, dst_y, scale, width); + src_y += src_stride_y; + dst_y += dst_stride_y; + } + return 0; +} + +// Convert a buffer of bytes to floats, scale the values and store as floats. +LIBYUV_API +int ByteToFloat(const uint8_t* src_y, float* dst_y, float scale, int width) { + void (*ByteToFloatRow)(const uint8_t* src, float* dst, float scale, + int width) = ByteToFloatRow_C; + if (!src_y || !dst_y || width <= 0) { + return -1; + } +#if defined(HAS_BYTETOFLOATROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ByteToFloatRow = ByteToFloatRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ByteToFloatRow = ByteToFloatRow_NEON; + } + } +#endif + + ByteToFloatRow(src_y, dst_y, scale, width); + return 0; +} + // Apply a lumacolortable to each ARGB pixel. LIBYUV_API -int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const uint8* luma, - int width, int height) { +int ARGBLumaColorTable(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + const uint8_t* luma, + int width, + int height) { int y; - void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb, - int width, const uint8* luma, const uint32 lumacoeff) = - ARGBLumaColorTableRow_C; + void (*ARGBLumaColorTableRow)( + const uint8_t* src_argb, uint8_t* dst_argb, int width, + const uint8_t* luma, const uint32_t lumacoeff) = ARGBLumaColorTableRow_C; if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -2467,12 +3188,15 @@ int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, // Copy Alpha from one ARGB image to another. LIBYUV_API -int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int ARGBCopyAlpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) = - ARGBCopyAlphaRow_C; + void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb, + int width) = ARGBCopyAlphaRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -2483,8 +3207,7 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -2516,55 +3239,73 @@ int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, // Extract just the alpha channel from ARGB. LIBYUV_API -int ARGBExtractAlpha(const uint8* src_argb, int src_stride, - uint8* dst_a, int dst_stride, - int width, int height) { +int ARGBExtractAlpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_a, + int dst_stride_a, + int width, + int height) { if (!src_argb || !dst_a || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb += (height - 1) * src_stride; - src_stride = -src_stride; + src_argb += (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride == width * 4 && dst_stride == width) { + if (src_stride_argb == width * 4 && dst_stride_a == width) { width *= height; height = 1; - src_stride = dst_stride = 0; + src_stride_argb = dst_stride_a = 0; } - void (*ARGBExtractAlphaRow)(const uint8 *src_argb, uint8 *dst_a, int width) = - ARGBExtractAlphaRow_C; + void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a, + int width) = ARGBExtractAlphaRow_C; #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2 : ARGBExtractAlphaRow_Any_SSE2; } #endif +#if defined(HAS_ARGBEXTRACTALPHAROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2 + : ARGBExtractAlphaRow_Any_AVX2; + } +#endif #if defined(HAS_ARGBEXTRACTALPHAROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON : ARGBExtractAlphaRow_Any_NEON; } #endif +#if defined(HAS_ARGBEXTRACTALPHAROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_MSA + : ARGBExtractAlphaRow_Any_MSA; + } +#endif for (int y = 0; y < height; ++y) { ARGBExtractAlphaRow(src_argb, dst_a, width); - src_argb += src_stride; - dst_a += dst_stride; + src_argb += src_stride_argb; + dst_a += dst_stride_a; } return 0; } // Copy a planar Y channel to the alpha channel of a destination ARGB image. LIBYUV_API -int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { +int ARGBCopyYToAlpha(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; - void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) = - ARGBCopyYToAlphaRow_C; + void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb, + int width) = ARGBCopyYToAlphaRow_C; if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; } @@ -2575,8 +3316,7 @@ int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, src_stride_y = -src_stride_y; } // Coalesce rows. - if (src_stride_y == width && - dst_stride_argb == width * 4) { + if (src_stride_y == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_y = dst_stride_argb = 0; @@ -2610,20 +3350,22 @@ int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, // directly. A SplitUVRow_Odd function could copy the remaining chroma. LIBYUV_API -int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height) { +int YUY2ToNV12(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { int y; int halfwidth = (width + 1) >> 1; - void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; - void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, + void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - if (!src_yuy2 || - !dst_y || !dst_uv || - width <= 0 || height == 0) { + if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -2656,6 +3398,14 @@ int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2, } } #endif +#if defined(HAS_SPLITUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + SplitUVRow = SplitUVRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + SplitUVRow = SplitUVRow_MSA; + } + } +#endif #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; @@ -2680,6 +3430,14 @@ int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2, } } #endif +#if defined(HAS_INTERPOLATEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + InterpolateRow = InterpolateRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + InterpolateRow = InterpolateRow_MSA; + } + } +#endif { int awidth = halfwidth * 2; @@ -2708,20 +3466,22 @@ int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2, } LIBYUV_API -int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height) { +int UYVYToNV12(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { int y; int halfwidth = (width + 1) >> 1; - void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; - void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, + void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - if (!src_uyvy || - !dst_y || !dst_uv || - width <= 0 || height == 0) { + if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -2754,6 +3514,14 @@ int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy, } } #endif +#if defined(HAS_SPLITUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + SplitUVRow = SplitUVRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + SplitUVRow = SplitUVRow_MSA; + } + } +#endif #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; @@ -2778,6 +3546,14 @@ int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy, } } #endif +#if defined(HAS_INTERPOLATEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + InterpolateRow = InterpolateRow_Any_MSA; + if (IS_ALIGNED(width, 32)) { + InterpolateRow = InterpolateRow_MSA; + } + } +#endif { int awidth = halfwidth * 2; diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate.cc index 01ea5c40744f..f2bed85b7552 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/rotate.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate.cc @@ -10,8 +10,8 @@ #include "libyuv/rotate.h" -#include "libyuv/cpu_id.h" #include "libyuv/convert.h" +#include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" #include "libyuv/rotate_row.h" #include "libyuv/row.h" @@ -22,12 +22,20 @@ extern "C" { #endif LIBYUV_API -void TransposePlane(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { +void TransposePlane(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height) { int i = height; - void (*TransposeWx8)(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width) = TransposeWx8_C; +#if defined(HAS_TRANSPOSEWX16_MSA) + void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst, + int dst_stride, int width) = TransposeWx16_C; +#else + void (*TransposeWx8)(const uint8_t* src, int src_stride, uint8_t* dst, + int dst_stride, int width) = TransposeWx8_C; +#endif #if defined(HAS_TRANSPOSEWX8_NEON) if (TestCpuFlag(kCpuHasNEON)) { TransposeWx8 = TransposeWx8_NEON; @@ -49,24 +57,32 @@ void TransposePlane(const uint8* src, int src_stride, } } #endif -#if defined(HAS_TRANSPOSEWX8_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - if (IS_ALIGNED(width, 4) && - IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) { - TransposeWx8 = TransposeWx8_Fast_DSPR2; - } else { - TransposeWx8 = TransposeWx8_DSPR2; +#if defined(HAS_TRANSPOSEWX16_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + TransposeWx16 = TransposeWx16_Any_MSA; + if (IS_ALIGNED(width, 16)) { + TransposeWx16 = TransposeWx16_MSA; } } #endif +#if defined(HAS_TRANSPOSEWX16_MSA) + // Work across the source in 16x16 tiles + while (i >= 16) { + TransposeWx16(src, src_stride, dst, dst_stride, width); + src += 16 * src_stride; // Go down 16 rows. + dst += 16; // Move over 16 columns. + i -= 16; + } +#else // Work across the source in 8x8 tiles while (i >= 8) { TransposeWx8(src, src_stride, dst, dst_stride, width); - src += 8 * src_stride; // Go down 8 rows. - dst += 8; // Move over 8 columns. + src += 8 * src_stride; // Go down 8 rows. + dst += 8; // Move over 8 columns. i -= 8; } +#endif if (i > 0) { TransposeWxH_C(src, src_stride, dst, dst_stride, width, i); @@ -74,9 +90,12 @@ void TransposePlane(const uint8* src, int src_stride, } LIBYUV_API -void RotatePlane90(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { +void RotatePlane90(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height) { // Rotate by 90 is a transpose with the source read // from bottom to top. So set the source pointer to the end // of the buffer and flip the sign of the source stride. @@ -86,9 +105,12 @@ void RotatePlane90(const uint8* src, int src_stride, } LIBYUV_API -void RotatePlane270(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { +void RotatePlane270(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height) { // Rotate by 270 is a transpose with the destination written // from bottom to top. So set the destination pointer to the end // of the buffer and flip the sign of the destination stride. @@ -98,17 +120,20 @@ void RotatePlane270(const uint8* src, int src_stride, } LIBYUV_API -void RotatePlane180(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { +void RotatePlane180(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height) { // Swap first and last row and mirror the content. Uses a temporary row. align_buffer_64(row, width); - const uint8* src_bot = src + src_stride * (height - 1); - uint8* dst_bot = dst + dst_stride * (height - 1); + const uint8_t* src_bot = src + src_stride * (height - 1); + uint8_t* dst_bot = dst + dst_stride * (height - 1); int half_height = (height + 1) >> 1; int y; - void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C; - void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; + void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C; + void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; #if defined(HAS_MIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MirrorRow = MirrorRow_Any_NEON; @@ -133,12 +158,12 @@ void RotatePlane180(const uint8* src, int src_stride, } } #endif -// TODO(fbarchard): Mirror on mips handle unaligned memory. -#if defined(HAS_MIRRORROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && - IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) { - MirrorRow = MirrorRow_DSPR2; +#if defined(HAS_MIRRORROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + MirrorRow = MirrorRow_Any_MSA; + if (IS_ALIGNED(width, 64)) { + MirrorRow = MirrorRow_MSA; + } } #endif #if defined(HAS_COPYROW_SSE2) @@ -161,11 +186,6 @@ void RotatePlane180(const uint8* src, int src_stride, CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif // Odd height will harmlessly mirror the middle row twice. for (y = 0; y < half_height; ++y) { @@ -181,15 +201,24 @@ void RotatePlane180(const uint8* src, int src_stride, } LIBYUV_API -void TransposeUV(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height) { +void TransposeUV(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height) { int i = height; - void (*TransposeUVWx8)(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, +#if defined(HAS_TRANSPOSEUVWX16_MSA) + void (*TransposeUVWx16)(const uint8_t* src, int src_stride, uint8_t* dst_a, + int dst_stride_a, uint8_t* dst_b, int dst_stride_b, + int width) = TransposeUVWx16_C; +#else + void (*TransposeUVWx8)(const uint8_t* src, int src_stride, uint8_t* dst_a, + int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width) = TransposeUVWx8_C; +#endif #if defined(HAS_TRANSPOSEUVWX8_NEON) if (TestCpuFlag(kCpuHasNEON)) { TransposeUVWx8 = TransposeUVWx8_NEON; @@ -203,72 +232,90 @@ void TransposeUV(const uint8* src, int src_stride, } } #endif -#if defined(HAS_TRANSPOSEUVWX8_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) && - IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) { - TransposeUVWx8 = TransposeUVWx8_DSPR2; +#if defined(HAS_TRANSPOSEUVWX16_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + TransposeUVWx16 = TransposeUVWx16_Any_MSA; + if (IS_ALIGNED(width, 8)) { + TransposeUVWx16 = TransposeUVWx16_MSA; + } } #endif +#if defined(HAS_TRANSPOSEUVWX16_MSA) + // Work through the source in 8x8 tiles. + while (i >= 16) { + TransposeUVWx16(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, + width); + src += 16 * src_stride; // Go down 16 rows. + dst_a += 16; // Move over 8 columns. + dst_b += 16; // Move over 8 columns. + i -= 16; + } +#else // Work through the source in 8x8 tiles. while (i >= 8) { - TransposeUVWx8(src, src_stride, - dst_a, dst_stride_a, - dst_b, dst_stride_b, + TransposeUVWx8(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width); - src += 8 * src_stride; // Go down 8 rows. - dst_a += 8; // Move over 8 columns. - dst_b += 8; // Move over 8 columns. + src += 8 * src_stride; // Go down 8 rows. + dst_a += 8; // Move over 8 columns. + dst_b += 8; // Move over 8 columns. i -= 8; } +#endif if (i > 0) { - TransposeUVWxH_C(src, src_stride, - dst_a, dst_stride_a, - dst_b, dst_stride_b, + TransposeUVWxH_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width, i); } } LIBYUV_API -void RotateUV90(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height) { +void RotateUV90(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height) { src += src_stride * (height - 1); src_stride = -src_stride; - TransposeUV(src, src_stride, - dst_a, dst_stride_a, - dst_b, dst_stride_b, - width, height); + TransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width, + height); } LIBYUV_API -void RotateUV270(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height) { +void RotateUV270(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height) { dst_a += dst_stride_a * (width - 1); dst_b += dst_stride_b * (width - 1); dst_stride_a = -dst_stride_a; dst_stride_b = -dst_stride_b; - TransposeUV(src, src_stride, - dst_a, dst_stride_a, - dst_b, dst_stride_b, - width, height); + TransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width, + height); } // Rotate 180 is a horizontal and vertical flip. LIBYUV_API -void RotateUV180(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height) { +void RotateUV180(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height) { int i; - void (*MirrorUVRow)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) = - MirrorUVRow_C; + void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v, + int width) = MirrorUVRow_C; #if defined(HAS_MIRRORUVROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { MirrorUVRow = MirrorUVRow_NEON; @@ -279,10 +326,9 @@ void RotateUV180(const uint8* src, int src_stride, MirrorUVRow = MirrorUVRow_SSSE3; } #endif -#if defined(HAS_MIRRORUVROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && - IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) { - MirrorUVRow = MirrorUVRow_DSPR2; +#if defined(HAS_MIRRORUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 32)) { + MirrorUVRow = MirrorUVRow_MSA; } #endif @@ -298,9 +344,12 @@ void RotateUV180(const uint8* src, int src_stride, } LIBYUV_API -int RotatePlane(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height, +int RotatePlane(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height, enum RotationMode mode) { if (!src || width <= 0 || height == 0 || !dst) { return -1; @@ -316,24 +365,16 @@ int RotatePlane(const uint8* src, int src_stride, switch (mode) { case kRotate0: // copy frame - CopyPlane(src, src_stride, - dst, dst_stride, - width, height); + CopyPlane(src, src_stride, dst, dst_stride, width, height); return 0; case kRotate90: - RotatePlane90(src, src_stride, - dst, dst_stride, - width, height); + RotatePlane90(src, src_stride, dst, dst_stride, width, height); return 0; case kRotate270: - RotatePlane270(src, src_stride, - dst, dst_stride, - width, height); + RotatePlane270(src, src_stride, dst, dst_stride, width, height); return 0; case kRotate180: - RotatePlane180(src, src_stride, - dst, dst_stride, - width, height); + RotatePlane180(src, src_stride, dst, dst_stride, width, height); return 0; default: break; @@ -342,18 +383,25 @@ int RotatePlane(const uint8* src, int src_stride, } LIBYUV_API -int I420Rotate(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height, +int I420Rotate(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, enum RotationMode mode) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || - !dst_y || !dst_u || !dst_v) { + if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || + !dst_u || !dst_v) { return -1; } @@ -372,45 +420,29 @@ int I420Rotate(const uint8* src_y, int src_stride_y, switch (mode) { case kRotate0: // copy frame - return I420Copy(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height); + return I420Copy(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height); case kRotate90: - RotatePlane90(src_y, src_stride_y, - dst_y, dst_stride_y, - width, height); - RotatePlane90(src_u, src_stride_u, - dst_u, dst_stride_u, - halfwidth, halfheight); - RotatePlane90(src_v, src_stride_v, - dst_v, dst_stride_v, - halfwidth, halfheight); + RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, + halfheight); + RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, + halfheight); return 0; case kRotate270: - RotatePlane270(src_y, src_stride_y, - dst_y, dst_stride_y, - width, height); - RotatePlane270(src_u, src_stride_u, - dst_u, dst_stride_u, - halfwidth, halfheight); - RotatePlane270(src_v, src_stride_v, - dst_v, dst_stride_v, - halfwidth, halfheight); + RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, + halfheight); + RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, + halfheight); return 0; case kRotate180: - RotatePlane180(src_y, src_stride_y, - dst_y, dst_stride_y, - width, height); - RotatePlane180(src_u, src_stride_u, - dst_u, dst_stride_u, - halfwidth, halfheight); - RotatePlane180(src_v, src_stride_v, - dst_v, dst_stride_v, - halfwidth, halfheight); + RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, + halfheight); + RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, + halfheight); return 0; default: break; @@ -419,17 +451,23 @@ int I420Rotate(const uint8* src_y, int src_stride_y, } LIBYUV_API -int NV12ToI420Rotate(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height, +int NV12ToI420Rotate(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, enum RotationMode mode) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!src_y || !src_uv || width <= 0 || height == 0 || - !dst_y || !dst_u || !dst_v) { + if (!src_y || !src_uv || width <= 0 || height == 0 || !dst_y || !dst_u || + !dst_v) { return -1; } @@ -446,38 +484,23 @@ int NV12ToI420Rotate(const uint8* src_y, int src_stride_y, switch (mode) { case kRotate0: // copy frame - return NV12ToI420(src_y, src_stride_y, - src_uv, src_stride_uv, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, + return NV12ToI420(src_y, src_stride_y, src_uv, src_stride_uv, dst_y, + dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, width, height); case kRotate90: - RotatePlane90(src_y, src_stride_y, - dst_y, dst_stride_y, - width, height); - RotateUV90(src_uv, src_stride_uv, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - halfwidth, halfheight); + RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + RotateUV90(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v, + dst_stride_v, halfwidth, halfheight); return 0; case kRotate270: - RotatePlane270(src_y, src_stride_y, - dst_y, dst_stride_y, - width, height); - RotateUV270(src_uv, src_stride_uv, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - halfwidth, halfheight); + RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + RotateUV270(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v, + dst_stride_v, halfwidth, halfheight); return 0; case kRotate180: - RotatePlane180(src_y, src_stride_y, - dst_y, dst_stride_y, - width, height); - RotateUV180(src_uv, src_stride_uv, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - halfwidth, halfheight); + RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + RotateUV180(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v, + dst_stride_v, halfwidth, halfheight); return 0; default: break; diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_any.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_any.cc index 31a74c315553..c2752e6222c0 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/rotate_any.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_any.cc @@ -18,16 +18,16 @@ namespace libyuv { extern "C" { #endif -#define TANY(NAMEANY, TPOS_SIMD, MASK) \ - void NAMEANY(const uint8* src, int src_stride, \ - uint8* dst, int dst_stride, int width) { \ - int r = width & MASK; \ - int n = width - r; \ - if (n > 0) { \ - TPOS_SIMD(src, src_stride, dst, dst_stride, n); \ - } \ - TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r);\ - } +#define TANY(NAMEANY, TPOS_SIMD, MASK) \ + void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst, \ + int dst_stride, int width) { \ + int r = width & MASK; \ + int n = width - r; \ + if (n > 0) { \ + TPOS_SIMD(src, src_stride, dst, dst_stride, n); \ + } \ + TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r); \ + } #ifdef HAS_TRANSPOSEWX8_NEON TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7) @@ -38,25 +38,23 @@ TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7) #ifdef HAS_TRANSPOSEWX8_FAST_SSSE3 TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15) #endif -#ifdef HAS_TRANSPOSEWX8_DSPR2 -TANY(TransposeWx8_Any_DSPR2, TransposeWx8_DSPR2, 7) +#ifdef HAS_TRANSPOSEWX16_MSA +TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, 15) #endif #undef TANY #define TUVANY(NAMEANY, TPOS_SIMD, MASK) \ - void NAMEANY(const uint8* src, int src_stride, \ - uint8* dst_a, int dst_stride_a, \ - uint8* dst_b, int dst_stride_b, int width) { \ - int r = width & MASK; \ - int n = width - r; \ - if (n > 0) { \ - TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, \ - n); \ - } \ - TransposeUVWx8_C(src + n * 2, src_stride, \ - dst_a + n * dst_stride_a, dst_stride_a, \ - dst_b + n * dst_stride_b, dst_stride_b, r); \ - } + void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst_a, \ + int dst_stride_a, uint8_t* dst_b, int dst_stride_b, \ + int width) { \ + int r = width & MASK; \ + int n = width - r; \ + if (n > 0) { \ + TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, n); \ + } \ + TransposeUVWx8_C(src + n * 2, src_stride, dst_a + n * dst_stride_a, \ + dst_stride_a, dst_b + n * dst_stride_b, dst_stride_b, r); \ + } #ifdef HAS_TRANSPOSEUVWX8_NEON TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7) @@ -64,8 +62,8 @@ TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7) #ifdef HAS_TRANSPOSEUVWX8_SSE2 TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7) #endif -#ifdef HAS_TRANSPOSEUVWX8_DSPR2 -TUVANY(TransposeUVWx8_Any_DSPR2, TransposeUVWx8_DSPR2, 7) +#ifdef HAS_TRANSPOSEUVWX16_MSA +TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7) #endif #undef TUVANY @@ -73,8 +71,3 @@ TUVANY(TransposeUVWx8_Any_DSPR2, TransposeUVWx8_DSPR2, 7) } // extern "C" } // namespace libyuv #endif - - - - - diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_argb.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_argb.cc index 787c0ad1be94..5a6e05376f17 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/rotate_argb.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_argb.cc @@ -10,90 +10,106 @@ #include "libyuv/rotate.h" -#include "libyuv/cpu_id.h" #include "libyuv/convert.h" +#include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" #include "libyuv/row.h" +#include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */ #ifdef __cplusplus namespace libyuv { extern "C" { #endif -// ARGBScale has a function to copy pixels to a row, striding each source -// pixel by a constant. -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || \ - (defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__)) -#define HAS_SCALEARGBROWDOWNEVEN_SSE2 -void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride, - int src_stepx, uint8* dst_ptr, int dst_width); -#endif -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ - (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) -#define HAS_SCALEARGBROWDOWNEVEN_NEON -void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride, - int src_stepx, uint8* dst_ptr, int dst_width); -#endif - -void ScaleARGBRowDownEven_C(const uint8* src_ptr, int, - int src_stepx, uint8* dst_ptr, int dst_width); - -static void ARGBTranspose(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width, int height) { +static void ARGBTranspose(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int i; - int src_pixel_step = src_stride >> 2; - void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride, - int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C; + int src_pixel_step = src_stride_argb >> 2; + void (*ScaleARGBRowDownEven)( + const uint8_t* src_argb, ptrdiff_t src_stride_argb, int src_step, + uint8_t* dst_argb, int dst_width) = ScaleARGBRowDownEven_C; #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4)) { // Width of dest. - ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2; + if (TestCpuFlag(kCpuHasSSE2)) { + ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_SSE2; + if (IS_ALIGNED(height, 4)) { // Width of dest. + ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2; + } } #endif #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4)) { // Width of dest. - ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON; + if (TestCpuFlag(kCpuHasNEON)) { + ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_NEON; + if (IS_ALIGNED(height, 4)) { // Width of dest. + ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON; + } + } +#endif +#if defined(HAS_SCALEARGBROWDOWNEVEN_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_MSA; + if (IS_ALIGNED(height, 4)) { // Width of dest. + ScaleARGBRowDownEven = ScaleARGBRowDownEven_MSA; + } } #endif for (i = 0; i < width; ++i) { // column of source to row of dest. - ScaleARGBRowDownEven(src, 0, src_pixel_step, dst, height); - dst += dst_stride; - src += 4; + ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height); + dst_argb += dst_stride_argb; + src_argb += 4; } } -void ARGBRotate90(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width, int height) { +void ARGBRotate90(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { // Rotate by 90 is a ARGBTranspose with the source read // from bottom to top. So set the source pointer to the end // of the buffer and flip the sign of the source stride. - src += src_stride * (height - 1); - src_stride = -src_stride; - ARGBTranspose(src, src_stride, dst, dst_stride, width, height); + src_argb += src_stride_argb * (height - 1); + src_stride_argb = -src_stride_argb; + ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, + height); } -void ARGBRotate270(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width, int height) { +void ARGBRotate270(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { // Rotate by 270 is a ARGBTranspose with the destination written // from bottom to top. So set the destination pointer to the end // of the buffer and flip the sign of the destination stride. - dst += dst_stride * (width - 1); - dst_stride = -dst_stride; - ARGBTranspose(src, src_stride, dst, dst_stride, width, height); + dst_argb += dst_stride_argb * (width - 1); + dst_stride_argb = -dst_stride_argb; + ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, + height); } -void ARGBRotate180(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width, int height) { +void ARGBRotate180(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { // Swap first and last row and mirror the content. Uses a temporary row. align_buffer_64(row, width * 4); - const uint8* src_bot = src + src_stride * (height - 1); - uint8* dst_bot = dst + dst_stride * (height - 1); + const uint8_t* src_bot = src_argb + src_stride_argb * (height - 1); + uint8_t* dst_bot = dst_argb + dst_stride_argb * (height - 1); int half_height = (height + 1) >> 1; int y; - void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) = + void (*ARGBMirrorRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBMirrorRow_C; - void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; + void (*CopyRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = + CopyRow_C; #if defined(HAS_ARGBMIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBMirrorRow = ARGBMirrorRow_Any_NEON; @@ -118,6 +134,14 @@ void ARGBRotate180(const uint8* src, int src_stride, } } #endif +#if defined(HAS_ARGBMIRRORROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBMirrorRow = ARGBMirrorRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBMirrorRow = ARGBMirrorRow_MSA; + } + } +#endif #if defined(HAS_COPYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; @@ -138,28 +162,27 @@ void ARGBRotate180(const uint8* src, int src_stride, CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif // Odd height will harmlessly mirror the middle row twice. for (y = 0; y < half_height; ++y) { - ARGBMirrorRow(src, row, width); // Mirror first row into a buffer - ARGBMirrorRow(src_bot, dst, width); // Mirror last row into first row + ARGBMirrorRow(src_argb, row, width); // Mirror first row into a buffer + ARGBMirrorRow(src_bot, dst_argb, width); // Mirror last row into first row CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last - src += src_stride; - dst += dst_stride; - src_bot -= src_stride; - dst_bot -= dst_stride; + src_argb += src_stride_argb; + dst_argb += dst_stride_argb; + src_bot -= src_stride_argb; + dst_bot -= dst_stride_argb; } free_aligned_buffer_64(row); } LIBYUV_API -int ARGBRotate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, int width, int height, +int ARGBRotate(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, enum RotationMode mode) { if (!src_argb || width <= 0 || height == 0 || !dst_argb) { return -1; @@ -175,23 +198,19 @@ int ARGBRotate(const uint8* src_argb, int src_stride_argb, switch (mode) { case kRotate0: // copy frame - return ARGBCopy(src_argb, src_stride_argb, - dst_argb, dst_stride_argb, + return ARGBCopy(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height); case kRotate90: - ARGBRotate90(src_argb, src_stride_argb, - dst_argb, dst_stride_argb, - width, height); + ARGBRotate90(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, + height); return 0; case kRotate270: - ARGBRotate270(src_argb, src_stride_argb, - dst_argb, dst_stride_argb, - width, height); + ARGBRotate270(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, + height); return 0; case kRotate180: - ARGBRotate180(src_argb, src_stride_argb, - dst_argb, dst_stride_argb, - width, height); + ARGBRotate180(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, + height); return 0; default: break; diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_common.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_common.cc index b33a9a0c6edb..ff212adebc4d 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/rotate_common.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_common.cc @@ -8,16 +8,19 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "libyuv/row.h" #include "libyuv/rotate_row.h" +#include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif -void TransposeWx8_C(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width) { +void TransposeWx8_C(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width) { int i; for (i = 0; i < width; ++i) { dst[0] = src[0 * src_stride]; @@ -33,9 +36,13 @@ void TransposeWx8_C(const uint8* src, int src_stride, } } -void TransposeUVWx8_C(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width) { +void TransposeUVWx8_C(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width) { int i; for (i = 0; i < width; ++i) { dst_a[0] = src[0 * src_stride + 0]; @@ -60,9 +67,12 @@ void TransposeUVWx8_C(const uint8* src, int src_stride, } } -void TransposeWxH_C(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { +void TransposeWxH_C(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height) { int i; for (i = 0; i < width; ++i) { int j; @@ -72,10 +82,14 @@ void TransposeWxH_C(const uint8* src, int src_stride, } } -void TransposeUVWxH_C(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height) { +void TransposeUVWxH_C(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height) { int i; for (i = 0; i < width * 2; i += 2) { int j; diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_gcc.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_gcc.cc index cbe870caa745..04e19e29eefb 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/rotate_gcc.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_gcc.cc @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "libyuv/row.h" #include "libyuv/rotate_row.h" +#include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { @@ -22,342 +22,348 @@ extern "C" { // Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit. #if defined(HAS_TRANSPOSEWX8_SSSE3) -void TransposeWx8_SSSE3(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width) { - asm volatile ( - // Read in the data from the source pointer. - // First round of bit swap. - LABELALIGN - "1: \n" - "movq (%0),%%xmm0 \n" - "movq (%0,%3),%%xmm1 \n" - "lea (%0,%3,2),%0 \n" - "punpcklbw %%xmm1,%%xmm0 \n" - "movq (%0),%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "palignr $0x8,%%xmm1,%%xmm1 \n" - "movq (%0,%3),%%xmm3 \n" - "lea (%0,%3,2),%0 \n" - "punpcklbw %%xmm3,%%xmm2 \n" - "movdqa %%xmm2,%%xmm3 \n" - "movq (%0),%%xmm4 \n" - "palignr $0x8,%%xmm3,%%xmm3 \n" - "movq (%0,%3),%%xmm5 \n" - "lea (%0,%3,2),%0 \n" - "punpcklbw %%xmm5,%%xmm4 \n" - "movdqa %%xmm4,%%xmm5 \n" - "movq (%0),%%xmm6 \n" - "palignr $0x8,%%xmm5,%%xmm5 \n" - "movq (%0,%3),%%xmm7 \n" - "lea (%0,%3,2),%0 \n" - "punpcklbw %%xmm7,%%xmm6 \n" - "neg %3 \n" - "movdqa %%xmm6,%%xmm7 \n" - "lea 0x8(%0,%3,8),%0 \n" - "palignr $0x8,%%xmm7,%%xmm7 \n" - "neg %3 \n" - // Second round of bit swap. - "punpcklwd %%xmm2,%%xmm0 \n" - "punpcklwd %%xmm3,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "movdqa %%xmm1,%%xmm3 \n" - "palignr $0x8,%%xmm2,%%xmm2 \n" - "palignr $0x8,%%xmm3,%%xmm3 \n" - "punpcklwd %%xmm6,%%xmm4 \n" - "punpcklwd %%xmm7,%%xmm5 \n" - "movdqa %%xmm4,%%xmm6 \n" - "movdqa %%xmm5,%%xmm7 \n" - "palignr $0x8,%%xmm6,%%xmm6 \n" - "palignr $0x8,%%xmm7,%%xmm7 \n" - // Third round of bit swap. - // Write to the destination pointer. - "punpckldq %%xmm4,%%xmm0 \n" - "movq %%xmm0,(%1) \n" - "movdqa %%xmm0,%%xmm4 \n" - "palignr $0x8,%%xmm4,%%xmm4 \n" - "movq %%xmm4,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "punpckldq %%xmm6,%%xmm2 \n" - "movdqa %%xmm2,%%xmm6 \n" - "movq %%xmm2,(%1) \n" - "palignr $0x8,%%xmm6,%%xmm6 \n" - "punpckldq %%xmm5,%%xmm1 \n" - "movq %%xmm6,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "movdqa %%xmm1,%%xmm5 \n" - "movq %%xmm1,(%1) \n" - "palignr $0x8,%%xmm5,%%xmm5 \n" - "movq %%xmm5,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "punpckldq %%xmm7,%%xmm3 \n" - "movq %%xmm3,(%1) \n" - "movdqa %%xmm3,%%xmm7 \n" - "palignr $0x8,%%xmm7,%%xmm7 \n" - "sub $0x8,%2 \n" - "movq %%xmm7,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "r"((intptr_t)(src_stride)), // %3 - "r"((intptr_t)(dst_stride)) // %4 - : "memory", "cc", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); +void TransposeWx8_SSSE3(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width) { + asm volatile( + // Read in the data from the source pointer. + // First round of bit swap. + LABELALIGN + "1: \n" + "movq (%0),%%xmm0 \n" + "movq (%0,%3),%%xmm1 \n" + "lea (%0,%3,2),%0 \n" + "punpcklbw %%xmm1,%%xmm0 \n" + "movq (%0),%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "palignr $0x8,%%xmm1,%%xmm1 \n" + "movq (%0,%3),%%xmm3 \n" + "lea (%0,%3,2),%0 \n" + "punpcklbw %%xmm3,%%xmm2 \n" + "movdqa %%xmm2,%%xmm3 \n" + "movq (%0),%%xmm4 \n" + "palignr $0x8,%%xmm3,%%xmm3 \n" + "movq (%0,%3),%%xmm5 \n" + "lea (%0,%3,2),%0 \n" + "punpcklbw %%xmm5,%%xmm4 \n" + "movdqa %%xmm4,%%xmm5 \n" + "movq (%0),%%xmm6 \n" + "palignr $0x8,%%xmm5,%%xmm5 \n" + "movq (%0,%3),%%xmm7 \n" + "lea (%0,%3,2),%0 \n" + "punpcklbw %%xmm7,%%xmm6 \n" + "neg %3 \n" + "movdqa %%xmm6,%%xmm7 \n" + "lea 0x8(%0,%3,8),%0 \n" + "palignr $0x8,%%xmm7,%%xmm7 \n" + "neg %3 \n" + // Second round of bit swap. + "punpcklwd %%xmm2,%%xmm0 \n" + "punpcklwd %%xmm3,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "movdqa %%xmm1,%%xmm3 \n" + "palignr $0x8,%%xmm2,%%xmm2 \n" + "palignr $0x8,%%xmm3,%%xmm3 \n" + "punpcklwd %%xmm6,%%xmm4 \n" + "punpcklwd %%xmm7,%%xmm5 \n" + "movdqa %%xmm4,%%xmm6 \n" + "movdqa %%xmm5,%%xmm7 \n" + "palignr $0x8,%%xmm6,%%xmm6 \n" + "palignr $0x8,%%xmm7,%%xmm7 \n" + // Third round of bit swap. + // Write to the destination pointer. + "punpckldq %%xmm4,%%xmm0 \n" + "movq %%xmm0,(%1) \n" + "movdqa %%xmm0,%%xmm4 \n" + "palignr $0x8,%%xmm4,%%xmm4 \n" + "movq %%xmm4,(%1,%4) \n" + "lea (%1,%4,2),%1 \n" + "punpckldq %%xmm6,%%xmm2 \n" + "movdqa %%xmm2,%%xmm6 \n" + "movq %%xmm2,(%1) \n" + "palignr $0x8,%%xmm6,%%xmm6 \n" + "punpckldq %%xmm5,%%xmm1 \n" + "movq %%xmm6,(%1,%4) \n" + "lea (%1,%4,2),%1 \n" + "movdqa %%xmm1,%%xmm5 \n" + "movq %%xmm1,(%1) \n" + "palignr $0x8,%%xmm5,%%xmm5 \n" + "movq %%xmm5,(%1,%4) \n" + "lea (%1,%4,2),%1 \n" + "punpckldq %%xmm7,%%xmm3 \n" + "movq %%xmm3,(%1) \n" + "movdqa %%xmm3,%%xmm7 \n" + "palignr $0x8,%%xmm7,%%xmm7 \n" + "sub $0x8,%2 \n" + "movq %%xmm7,(%1,%4) \n" + "lea (%1,%4,2),%1 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"((intptr_t)(src_stride)), // %3 + "r"((intptr_t)(dst_stride)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // defined(HAS_TRANSPOSEWX8_SSSE3) // Transpose 16x8. 64 bit #if defined(HAS_TRANSPOSEWX8_FAST_SSSE3) -void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width) { - asm volatile ( - // Read in the data from the source pointer. - // First round of bit swap. - LABELALIGN - "1: \n" - "movdqu (%0),%%xmm0 \n" - "movdqu (%0,%3),%%xmm1 \n" - "lea (%0,%3,2),%0 \n" - "movdqa %%xmm0,%%xmm8 \n" - "punpcklbw %%xmm1,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm8 \n" - "movdqu (%0),%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm8,%%xmm9 \n" - "palignr $0x8,%%xmm1,%%xmm1 \n" - "palignr $0x8,%%xmm9,%%xmm9 \n" - "movdqu (%0,%3),%%xmm3 \n" - "lea (%0,%3,2),%0 \n" - "movdqa %%xmm2,%%xmm10 \n" - "punpcklbw %%xmm3,%%xmm2 \n" - "punpckhbw %%xmm3,%%xmm10 \n" - "movdqa %%xmm2,%%xmm3 \n" - "movdqa %%xmm10,%%xmm11 \n" - "movdqu (%0),%%xmm4 \n" - "palignr $0x8,%%xmm3,%%xmm3 \n" - "palignr $0x8,%%xmm11,%%xmm11 \n" - "movdqu (%0,%3),%%xmm5 \n" - "lea (%0,%3,2),%0 \n" - "movdqa %%xmm4,%%xmm12 \n" - "punpcklbw %%xmm5,%%xmm4 \n" - "punpckhbw %%xmm5,%%xmm12 \n" - "movdqa %%xmm4,%%xmm5 \n" - "movdqa %%xmm12,%%xmm13 \n" - "movdqu (%0),%%xmm6 \n" - "palignr $0x8,%%xmm5,%%xmm5 \n" - "palignr $0x8,%%xmm13,%%xmm13 \n" - "movdqu (%0,%3),%%xmm7 \n" - "lea (%0,%3,2),%0 \n" - "movdqa %%xmm6,%%xmm14 \n" - "punpcklbw %%xmm7,%%xmm6 \n" - "punpckhbw %%xmm7,%%xmm14 \n" - "neg %3 \n" - "movdqa %%xmm6,%%xmm7 \n" - "movdqa %%xmm14,%%xmm15 \n" - "lea 0x10(%0,%3,8),%0 \n" - "palignr $0x8,%%xmm7,%%xmm7 \n" - "palignr $0x8,%%xmm15,%%xmm15 \n" - "neg %3 \n" - // Second round of bit swap. - "punpcklwd %%xmm2,%%xmm0 \n" - "punpcklwd %%xmm3,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "movdqa %%xmm1,%%xmm3 \n" - "palignr $0x8,%%xmm2,%%xmm2 \n" - "palignr $0x8,%%xmm3,%%xmm3 \n" - "punpcklwd %%xmm6,%%xmm4 \n" - "punpcklwd %%xmm7,%%xmm5 \n" - "movdqa %%xmm4,%%xmm6 \n" - "movdqa %%xmm5,%%xmm7 \n" - "palignr $0x8,%%xmm6,%%xmm6 \n" - "palignr $0x8,%%xmm7,%%xmm7 \n" - "punpcklwd %%xmm10,%%xmm8 \n" - "punpcklwd %%xmm11,%%xmm9 \n" - "movdqa %%xmm8,%%xmm10 \n" - "movdqa %%xmm9,%%xmm11 \n" - "palignr $0x8,%%xmm10,%%xmm10 \n" - "palignr $0x8,%%xmm11,%%xmm11 \n" - "punpcklwd %%xmm14,%%xmm12 \n" - "punpcklwd %%xmm15,%%xmm13 \n" - "movdqa %%xmm12,%%xmm14 \n" - "movdqa %%xmm13,%%xmm15 \n" - "palignr $0x8,%%xmm14,%%xmm14 \n" - "palignr $0x8,%%xmm15,%%xmm15 \n" - // Third round of bit swap. - // Write to the destination pointer. - "punpckldq %%xmm4,%%xmm0 \n" - "movq %%xmm0,(%1) \n" - "movdqa %%xmm0,%%xmm4 \n" - "palignr $0x8,%%xmm4,%%xmm4 \n" - "movq %%xmm4,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "punpckldq %%xmm6,%%xmm2 \n" - "movdqa %%xmm2,%%xmm6 \n" - "movq %%xmm2,(%1) \n" - "palignr $0x8,%%xmm6,%%xmm6 \n" - "punpckldq %%xmm5,%%xmm1 \n" - "movq %%xmm6,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "movdqa %%xmm1,%%xmm5 \n" - "movq %%xmm1,(%1) \n" - "palignr $0x8,%%xmm5,%%xmm5 \n" - "movq %%xmm5,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "punpckldq %%xmm7,%%xmm3 \n" - "movq %%xmm3,(%1) \n" - "movdqa %%xmm3,%%xmm7 \n" - "palignr $0x8,%%xmm7,%%xmm7 \n" - "movq %%xmm7,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "punpckldq %%xmm12,%%xmm8 \n" - "movq %%xmm8,(%1) \n" - "movdqa %%xmm8,%%xmm12 \n" - "palignr $0x8,%%xmm12,%%xmm12 \n" - "movq %%xmm12,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "punpckldq %%xmm14,%%xmm10 \n" - "movdqa %%xmm10,%%xmm14 \n" - "movq %%xmm10,(%1) \n" - "palignr $0x8,%%xmm14,%%xmm14 \n" - "punpckldq %%xmm13,%%xmm9 \n" - "movq %%xmm14,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "movdqa %%xmm9,%%xmm13 \n" - "movq %%xmm9,(%1) \n" - "palignr $0x8,%%xmm13,%%xmm13 \n" - "movq %%xmm13,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "punpckldq %%xmm15,%%xmm11 \n" - "movq %%xmm11,(%1) \n" - "movdqa %%xmm11,%%xmm15 \n" - "palignr $0x8,%%xmm15,%%xmm15 \n" - "sub $0x10,%2 \n" - "movq %%xmm15,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "r"((intptr_t)(src_stride)), // %3 - "r"((intptr_t)(dst_stride)) // %4 - : "memory", "cc", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", - "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" - ); +void TransposeWx8_Fast_SSSE3(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width) { + asm volatile( + // Read in the data from the source pointer. + // First round of bit swap. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu (%0,%3),%%xmm1 \n" + "lea (%0,%3,2),%0 \n" + "movdqa %%xmm0,%%xmm8 \n" + "punpcklbw %%xmm1,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm8 \n" + "movdqu (%0),%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm8,%%xmm9 \n" + "palignr $0x8,%%xmm1,%%xmm1 \n" + "palignr $0x8,%%xmm9,%%xmm9 \n" + "movdqu (%0,%3),%%xmm3 \n" + "lea (%0,%3,2),%0 \n" + "movdqa %%xmm2,%%xmm10 \n" + "punpcklbw %%xmm3,%%xmm2 \n" + "punpckhbw %%xmm3,%%xmm10 \n" + "movdqa %%xmm2,%%xmm3 \n" + "movdqa %%xmm10,%%xmm11 \n" + "movdqu (%0),%%xmm4 \n" + "palignr $0x8,%%xmm3,%%xmm3 \n" + "palignr $0x8,%%xmm11,%%xmm11 \n" + "movdqu (%0,%3),%%xmm5 \n" + "lea (%0,%3,2),%0 \n" + "movdqa %%xmm4,%%xmm12 \n" + "punpcklbw %%xmm5,%%xmm4 \n" + "punpckhbw %%xmm5,%%xmm12 \n" + "movdqa %%xmm4,%%xmm5 \n" + "movdqa %%xmm12,%%xmm13 \n" + "movdqu (%0),%%xmm6 \n" + "palignr $0x8,%%xmm5,%%xmm5 \n" + "palignr $0x8,%%xmm13,%%xmm13 \n" + "movdqu (%0,%3),%%xmm7 \n" + "lea (%0,%3,2),%0 \n" + "movdqa %%xmm6,%%xmm14 \n" + "punpcklbw %%xmm7,%%xmm6 \n" + "punpckhbw %%xmm7,%%xmm14 \n" + "neg %3 \n" + "movdqa %%xmm6,%%xmm7 \n" + "movdqa %%xmm14,%%xmm15 \n" + "lea 0x10(%0,%3,8),%0 \n" + "palignr $0x8,%%xmm7,%%xmm7 \n" + "palignr $0x8,%%xmm15,%%xmm15 \n" + "neg %3 \n" + // Second round of bit swap. + "punpcklwd %%xmm2,%%xmm0 \n" + "punpcklwd %%xmm3,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "movdqa %%xmm1,%%xmm3 \n" + "palignr $0x8,%%xmm2,%%xmm2 \n" + "palignr $0x8,%%xmm3,%%xmm3 \n" + "punpcklwd %%xmm6,%%xmm4 \n" + "punpcklwd %%xmm7,%%xmm5 \n" + "movdqa %%xmm4,%%xmm6 \n" + "movdqa %%xmm5,%%xmm7 \n" + "palignr $0x8,%%xmm6,%%xmm6 \n" + "palignr $0x8,%%xmm7,%%xmm7 \n" + "punpcklwd %%xmm10,%%xmm8 \n" + "punpcklwd %%xmm11,%%xmm9 \n" + "movdqa %%xmm8,%%xmm10 \n" + "movdqa %%xmm9,%%xmm11 \n" + "palignr $0x8,%%xmm10,%%xmm10 \n" + "palignr $0x8,%%xmm11,%%xmm11 \n" + "punpcklwd %%xmm14,%%xmm12 \n" + "punpcklwd %%xmm15,%%xmm13 \n" + "movdqa %%xmm12,%%xmm14 \n" + "movdqa %%xmm13,%%xmm15 \n" + "palignr $0x8,%%xmm14,%%xmm14 \n" + "palignr $0x8,%%xmm15,%%xmm15 \n" + // Third round of bit swap. + // Write to the destination pointer. + "punpckldq %%xmm4,%%xmm0 \n" + "movq %%xmm0,(%1) \n" + "movdqa %%xmm0,%%xmm4 \n" + "palignr $0x8,%%xmm4,%%xmm4 \n" + "movq %%xmm4,(%1,%4) \n" + "lea (%1,%4,2),%1 \n" + "punpckldq %%xmm6,%%xmm2 \n" + "movdqa %%xmm2,%%xmm6 \n" + "movq %%xmm2,(%1) \n" + "palignr $0x8,%%xmm6,%%xmm6 \n" + "punpckldq %%xmm5,%%xmm1 \n" + "movq %%xmm6,(%1,%4) \n" + "lea (%1,%4,2),%1 \n" + "movdqa %%xmm1,%%xmm5 \n" + "movq %%xmm1,(%1) \n" + "palignr $0x8,%%xmm5,%%xmm5 \n" + "movq %%xmm5,(%1,%4) \n" + "lea (%1,%4,2),%1 \n" + "punpckldq %%xmm7,%%xmm3 \n" + "movq %%xmm3,(%1) \n" + "movdqa %%xmm3,%%xmm7 \n" + "palignr $0x8,%%xmm7,%%xmm7 \n" + "movq %%xmm7,(%1,%4) \n" + "lea (%1,%4,2),%1 \n" + "punpckldq %%xmm12,%%xmm8 \n" + "movq %%xmm8,(%1) \n" + "movdqa %%xmm8,%%xmm12 \n" + "palignr $0x8,%%xmm12,%%xmm12 \n" + "movq %%xmm12,(%1,%4) \n" + "lea (%1,%4,2),%1 \n" + "punpckldq %%xmm14,%%xmm10 \n" + "movdqa %%xmm10,%%xmm14 \n" + "movq %%xmm10,(%1) \n" + "palignr $0x8,%%xmm14,%%xmm14 \n" + "punpckldq %%xmm13,%%xmm9 \n" + "movq %%xmm14,(%1,%4) \n" + "lea (%1,%4,2),%1 \n" + "movdqa %%xmm9,%%xmm13 \n" + "movq %%xmm9,(%1) \n" + "palignr $0x8,%%xmm13,%%xmm13 \n" + "movq %%xmm13,(%1,%4) \n" + "lea (%1,%4,2),%1 \n" + "punpckldq %%xmm15,%%xmm11 \n" + "movq %%xmm11,(%1) \n" + "movdqa %%xmm11,%%xmm15 \n" + "palignr $0x8,%%xmm15,%%xmm15 \n" + "sub $0x10,%2 \n" + "movq %%xmm15,(%1,%4) \n" + "lea (%1,%4,2),%1 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"((intptr_t)(src_stride)), // %3 + "r"((intptr_t)(dst_stride)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", + "xmm15"); } #endif // defined(HAS_TRANSPOSEWX8_FAST_SSSE3) // Transpose UV 8x8. 64 bit. #if defined(HAS_TRANSPOSEUVWX8_SSE2) -void TransposeUVWx8_SSE2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width) { - asm volatile ( - // Read in the data from the source pointer. - // First round of bit swap. - LABELALIGN - "1: \n" - "movdqu (%0),%%xmm0 \n" - "movdqu (%0,%4),%%xmm1 \n" - "lea (%0,%4,2),%0 \n" - "movdqa %%xmm0,%%xmm8 \n" - "punpcklbw %%xmm1,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm8 \n" - "movdqa %%xmm8,%%xmm1 \n" - "movdqu (%0),%%xmm2 \n" - "movdqu (%0,%4),%%xmm3 \n" - "lea (%0,%4,2),%0 \n" - "movdqa %%xmm2,%%xmm8 \n" - "punpcklbw %%xmm3,%%xmm2 \n" - "punpckhbw %%xmm3,%%xmm8 \n" - "movdqa %%xmm8,%%xmm3 \n" - "movdqu (%0),%%xmm4 \n" - "movdqu (%0,%4),%%xmm5 \n" - "lea (%0,%4,2),%0 \n" - "movdqa %%xmm4,%%xmm8 \n" - "punpcklbw %%xmm5,%%xmm4 \n" - "punpckhbw %%xmm5,%%xmm8 \n" - "movdqa %%xmm8,%%xmm5 \n" - "movdqu (%0),%%xmm6 \n" - "movdqu (%0,%4),%%xmm7 \n" - "lea (%0,%4,2),%0 \n" - "movdqa %%xmm6,%%xmm8 \n" - "punpcklbw %%xmm7,%%xmm6 \n" - "neg %4 \n" - "lea 0x10(%0,%4,8),%0 \n" - "punpckhbw %%xmm7,%%xmm8 \n" - "movdqa %%xmm8,%%xmm7 \n" - "neg %4 \n" - // Second round of bit swap. - "movdqa %%xmm0,%%xmm8 \n" - "movdqa %%xmm1,%%xmm9 \n" - "punpckhwd %%xmm2,%%xmm8 \n" - "punpckhwd %%xmm3,%%xmm9 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpcklwd %%xmm3,%%xmm1 \n" - "movdqa %%xmm8,%%xmm2 \n" - "movdqa %%xmm9,%%xmm3 \n" - "movdqa %%xmm4,%%xmm8 \n" - "movdqa %%xmm5,%%xmm9 \n" - "punpckhwd %%xmm6,%%xmm8 \n" - "punpckhwd %%xmm7,%%xmm9 \n" - "punpcklwd %%xmm6,%%xmm4 \n" - "punpcklwd %%xmm7,%%xmm5 \n" - "movdqa %%xmm8,%%xmm6 \n" - "movdqa %%xmm9,%%xmm7 \n" - // Third round of bit swap. - // Write to the destination pointer. - "movdqa %%xmm0,%%xmm8 \n" - "punpckldq %%xmm4,%%xmm0 \n" - "movlpd %%xmm0,(%1) \n" // Write back U channel - "movhpd %%xmm0,(%2) \n" // Write back V channel - "punpckhdq %%xmm4,%%xmm8 \n" - "movlpd %%xmm8,(%1,%5) \n" - "lea (%1,%5,2),%1 \n" - "movhpd %%xmm8,(%2,%6) \n" - "lea (%2,%6,2),%2 \n" - "movdqa %%xmm2,%%xmm8 \n" - "punpckldq %%xmm6,%%xmm2 \n" - "movlpd %%xmm2,(%1) \n" - "movhpd %%xmm2,(%2) \n" - "punpckhdq %%xmm6,%%xmm8 \n" - "movlpd %%xmm8,(%1,%5) \n" - "lea (%1,%5,2),%1 \n" - "movhpd %%xmm8,(%2,%6) \n" - "lea (%2,%6,2),%2 \n" - "movdqa %%xmm1,%%xmm8 \n" - "punpckldq %%xmm5,%%xmm1 \n" - "movlpd %%xmm1,(%1) \n" - "movhpd %%xmm1,(%2) \n" - "punpckhdq %%xmm5,%%xmm8 \n" - "movlpd %%xmm8,(%1,%5) \n" - "lea (%1,%5,2),%1 \n" - "movhpd %%xmm8,(%2,%6) \n" - "lea (%2,%6,2),%2 \n" - "movdqa %%xmm3,%%xmm8 \n" - "punpckldq %%xmm7,%%xmm3 \n" - "movlpd %%xmm3,(%1) \n" - "movhpd %%xmm3,(%2) \n" - "punpckhdq %%xmm7,%%xmm8 \n" - "sub $0x8,%3 \n" - "movlpd %%xmm8,(%1,%5) \n" - "lea (%1,%5,2),%1 \n" - "movhpd %%xmm8,(%2,%6) \n" - "lea (%2,%6,2),%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst_a), // %1 - "+r"(dst_b), // %2 - "+r"(width) // %3 - : "r"((intptr_t)(src_stride)), // %4 - "r"((intptr_t)(dst_stride_a)), // %5 - "r"((intptr_t)(dst_stride_b)) // %6 - : "memory", "cc", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", - "xmm8", "xmm9" - ); +void TransposeUVWx8_SSE2(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width) { + asm volatile( + // Read in the data from the source pointer. + // First round of bit swap. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu (%0,%4),%%xmm1 \n" + "lea (%0,%4,2),%0 \n" + "movdqa %%xmm0,%%xmm8 \n" + "punpcklbw %%xmm1,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm8 \n" + "movdqa %%xmm8,%%xmm1 \n" + "movdqu (%0),%%xmm2 \n" + "movdqu (%0,%4),%%xmm3 \n" + "lea (%0,%4,2),%0 \n" + "movdqa %%xmm2,%%xmm8 \n" + "punpcklbw %%xmm3,%%xmm2 \n" + "punpckhbw %%xmm3,%%xmm8 \n" + "movdqa %%xmm8,%%xmm3 \n" + "movdqu (%0),%%xmm4 \n" + "movdqu (%0,%4),%%xmm5 \n" + "lea (%0,%4,2),%0 \n" + "movdqa %%xmm4,%%xmm8 \n" + "punpcklbw %%xmm5,%%xmm4 \n" + "punpckhbw %%xmm5,%%xmm8 \n" + "movdqa %%xmm8,%%xmm5 \n" + "movdqu (%0),%%xmm6 \n" + "movdqu (%0,%4),%%xmm7 \n" + "lea (%0,%4,2),%0 \n" + "movdqa %%xmm6,%%xmm8 \n" + "punpcklbw %%xmm7,%%xmm6 \n" + "neg %4 \n" + "lea 0x10(%0,%4,8),%0 \n" + "punpckhbw %%xmm7,%%xmm8 \n" + "movdqa %%xmm8,%%xmm7 \n" + "neg %4 \n" + // Second round of bit swap. + "movdqa %%xmm0,%%xmm8 \n" + "movdqa %%xmm1,%%xmm9 \n" + "punpckhwd %%xmm2,%%xmm8 \n" + "punpckhwd %%xmm3,%%xmm9 \n" + "punpcklwd %%xmm2,%%xmm0 \n" + "punpcklwd %%xmm3,%%xmm1 \n" + "movdqa %%xmm8,%%xmm2 \n" + "movdqa %%xmm9,%%xmm3 \n" + "movdqa %%xmm4,%%xmm8 \n" + "movdqa %%xmm5,%%xmm9 \n" + "punpckhwd %%xmm6,%%xmm8 \n" + "punpckhwd %%xmm7,%%xmm9 \n" + "punpcklwd %%xmm6,%%xmm4 \n" + "punpcklwd %%xmm7,%%xmm5 \n" + "movdqa %%xmm8,%%xmm6 \n" + "movdqa %%xmm9,%%xmm7 \n" + // Third round of bit swap. + // Write to the destination pointer. + "movdqa %%xmm0,%%xmm8 \n" + "punpckldq %%xmm4,%%xmm0 \n" + "movlpd %%xmm0,(%1) \n" // Write back U channel + "movhpd %%xmm0,(%2) \n" // Write back V channel + "punpckhdq %%xmm4,%%xmm8 \n" + "movlpd %%xmm8,(%1,%5) \n" + "lea (%1,%5,2),%1 \n" + "movhpd %%xmm8,(%2,%6) \n" + "lea (%2,%6,2),%2 \n" + "movdqa %%xmm2,%%xmm8 \n" + "punpckldq %%xmm6,%%xmm2 \n" + "movlpd %%xmm2,(%1) \n" + "movhpd %%xmm2,(%2) \n" + "punpckhdq %%xmm6,%%xmm8 \n" + "movlpd %%xmm8,(%1,%5) \n" + "lea (%1,%5,2),%1 \n" + "movhpd %%xmm8,(%2,%6) \n" + "lea (%2,%6,2),%2 \n" + "movdqa %%xmm1,%%xmm8 \n" + "punpckldq %%xmm5,%%xmm1 \n" + "movlpd %%xmm1,(%1) \n" + "movhpd %%xmm1,(%2) \n" + "punpckhdq %%xmm5,%%xmm8 \n" + "movlpd %%xmm8,(%1,%5) \n" + "lea (%1,%5,2),%1 \n" + "movhpd %%xmm8,(%2,%6) \n" + "lea (%2,%6,2),%2 \n" + "movdqa %%xmm3,%%xmm8 \n" + "punpckldq %%xmm7,%%xmm3 \n" + "movlpd %%xmm3,(%1) \n" + "movhpd %%xmm3,(%2) \n" + "punpckhdq %%xmm7,%%xmm8 \n" + "sub $0x8,%3 \n" + "movlpd %%xmm8,(%1,%5) \n" + "lea (%1,%5,2),%1 \n" + "movhpd %%xmm8,(%2,%6) \n" + "lea (%2,%6,2),%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst_a), // %1 + "+r"(dst_b), // %2 + "+r"(width) // %3 + : "r"((intptr_t)(src_stride)), // %4 + "r"((intptr_t)(dst_stride_a)), // %5 + "r"((intptr_t)(dst_stride_b)) // %6 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7", "xmm8", "xmm9"); } #endif // defined(HAS_TRANSPOSEUVWX8_SSE2) #endif // defined(__x86_64__) || defined(__i386__) diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_mips.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_mips.cc deleted file mode 100644 index 1e8ce25197ab..000000000000 --- a/media/libvpx/libvpx/third_party/libyuv/source/rotate_mips.cc +++ /dev/null @@ -1,484 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" -#include "libyuv/rotate_row.h" - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_MIPS) && \ - defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \ - (_MIPS_SIM == _MIPS_SIM_ABI32) - -void TransposeWx8_DSPR2(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 - "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 - "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 - "addu $t3, $t2, %[src_stride] \n" - "addu $t5, $t4, %[src_stride] \n" - "addu $t6, $t2, $t4 \n" - "andi $t0, %[dst], 0x3 \n" - "andi $t1, %[dst_stride], 0x3 \n" - "or $t0, $t0, $t1 \n" - "bnez $t0, 11f \n" - " subu $t7, $t9, %[src_stride] \n" -//dst + dst_stride word aligned - "1: \n" - "lbu $t0, 0(%[src]) \n" - "lbux $t1, %[src_stride](%[src]) \n" - "lbux $t8, $t2(%[src]) \n" - "lbux $t9, $t3(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s0, $t8, $t0 \n" - "lbux $t0, $t4(%[src]) \n" - "lbux $t1, $t5(%[src]) \n" - "lbux $t8, $t6(%[src]) \n" - "lbux $t9, $t7(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s1, $t8, $t0 \n" - "sw $s0, 0(%[dst]) \n" - "addiu %[width], -1 \n" - "addiu %[src], 1 \n" - "sw $s1, 4(%[dst]) \n" - "bnez %[width], 1b \n" - " addu %[dst], %[dst], %[dst_stride] \n" - "b 2f \n" -//dst + dst_stride unaligned - "11: \n" - "lbu $t0, 0(%[src]) \n" - "lbux $t1, %[src_stride](%[src]) \n" - "lbux $t8, $t2(%[src]) \n" - "lbux $t9, $t3(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s0, $t8, $t0 \n" - "lbux $t0, $t4(%[src]) \n" - "lbux $t1, $t5(%[src]) \n" - "lbux $t8, $t6(%[src]) \n" - "lbux $t9, $t7(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s1, $t8, $t0 \n" - "swr $s0, 0(%[dst]) \n" - "swl $s0, 3(%[dst]) \n" - "addiu %[width], -1 \n" - "addiu %[src], 1 \n" - "swr $s1, 4(%[dst]) \n" - "swl $s1, 7(%[dst]) \n" - "bnez %[width], 11b \n" - "addu %[dst], %[dst], %[dst_stride] \n" - "2: \n" - ".set pop \n" - :[src] "+r" (src), - [dst] "+r" (dst), - [width] "+r" (width) - :[src_stride] "r" (src_stride), - [dst_stride] "r" (dst_stride) - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9", - "s0", "s1" - ); -} - -void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width) { - __asm__ __volatile__ ( - ".set noat \n" - ".set push \n" - ".set noreorder \n" - "beqz %[width], 2f \n" - " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 - "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 - "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 - "addu $t3, $t2, %[src_stride] \n" - "addu $t5, $t4, %[src_stride] \n" - "addu $t6, $t2, $t4 \n" - - "srl $AT, %[width], 0x2 \n" - "andi $t0, %[dst], 0x3 \n" - "andi $t1, %[dst_stride], 0x3 \n" - "or $t0, $t0, $t1 \n" - "bnez $t0, 11f \n" - " subu $t7, $t9, %[src_stride] \n" -//dst + dst_stride word aligned - "1: \n" - "lw $t0, 0(%[src]) \n" - "lwx $t1, %[src_stride](%[src]) \n" - "lwx $t8, $t2(%[src]) \n" - "lwx $t9, $t3(%[src]) \n" - -// t0 = | 30 | 20 | 10 | 00 | -// t1 = | 31 | 21 | 11 | 01 | -// t8 = | 32 | 22 | 12 | 02 | -// t9 = | 33 | 23 | 13 | 03 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 21 | 01 | 20 | 00 | - // s1 = | 23 | 03 | 22 | 02 | - // s2 = | 31 | 11 | 30 | 10 | - // s3 = | 33 | 13 | 32 | 12 | - - "precr.qb.ph $s4, $s1, $s0 \n" - "precrq.qb.ph $s5, $s1, $s0 \n" - "precr.qb.ph $s6, $s3, $s2 \n" - "precrq.qb.ph $s7, $s3, $s2 \n" - - // s4 = | 03 | 02 | 01 | 00 | - // s5 = | 23 | 22 | 21 | 20 | - // s6 = | 13 | 12 | 11 | 10 | - // s7 = | 33 | 32 | 31 | 30 | - - "lwx $t0, $t4(%[src]) \n" - "lwx $t1, $t5(%[src]) \n" - "lwx $t8, $t6(%[src]) \n" - "lwx $t9, $t7(%[src]) \n" - -// t0 = | 34 | 24 | 14 | 04 | -// t1 = | 35 | 25 | 15 | 05 | -// t8 = | 36 | 26 | 16 | 06 | -// t9 = | 37 | 27 | 17 | 07 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 25 | 05 | 24 | 04 | - // s1 = | 27 | 07 | 26 | 06 | - // s2 = | 35 | 15 | 34 | 14 | - // s3 = | 37 | 17 | 36 | 16 | - - "precr.qb.ph $t0, $s1, $s0 \n" - "precrq.qb.ph $t1, $s1, $s0 \n" - "precr.qb.ph $t8, $s3, $s2 \n" - "precrq.qb.ph $t9, $s3, $s2 \n" - - // t0 = | 07 | 06 | 05 | 04 | - // t1 = | 27 | 26 | 25 | 24 | - // t8 = | 17 | 16 | 15 | 14 | - // t9 = | 37 | 36 | 35 | 34 | - - "addu $s0, %[dst], %[dst_stride] \n" - "addu $s1, $s0, %[dst_stride] \n" - "addu $s2, $s1, %[dst_stride] \n" - - "sw $s4, 0(%[dst]) \n" - "sw $t0, 4(%[dst]) \n" - "sw $s6, 0($s0) \n" - "sw $t8, 4($s0) \n" - "sw $s5, 0($s1) \n" - "sw $t1, 4($s1) \n" - "sw $s7, 0($s2) \n" - "sw $t9, 4($s2) \n" - - "addiu $AT, -1 \n" - "addiu %[src], 4 \n" - - "bnez $AT, 1b \n" - " addu %[dst], $s2, %[dst_stride] \n" - "b 2f \n" -//dst + dst_stride unaligned - "11: \n" - "lw $t0, 0(%[src]) \n" - "lwx $t1, %[src_stride](%[src]) \n" - "lwx $t8, $t2(%[src]) \n" - "lwx $t9, $t3(%[src]) \n" - -// t0 = | 30 | 20 | 10 | 00 | -// t1 = | 31 | 21 | 11 | 01 | -// t8 = | 32 | 22 | 12 | 02 | -// t9 = | 33 | 23 | 13 | 03 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 21 | 01 | 20 | 00 | - // s1 = | 23 | 03 | 22 | 02 | - // s2 = | 31 | 11 | 30 | 10 | - // s3 = | 33 | 13 | 32 | 12 | - - "precr.qb.ph $s4, $s1, $s0 \n" - "precrq.qb.ph $s5, $s1, $s0 \n" - "precr.qb.ph $s6, $s3, $s2 \n" - "precrq.qb.ph $s7, $s3, $s2 \n" - - // s4 = | 03 | 02 | 01 | 00 | - // s5 = | 23 | 22 | 21 | 20 | - // s6 = | 13 | 12 | 11 | 10 | - // s7 = | 33 | 32 | 31 | 30 | - - "lwx $t0, $t4(%[src]) \n" - "lwx $t1, $t5(%[src]) \n" - "lwx $t8, $t6(%[src]) \n" - "lwx $t9, $t7(%[src]) \n" - -// t0 = | 34 | 24 | 14 | 04 | -// t1 = | 35 | 25 | 15 | 05 | -// t8 = | 36 | 26 | 16 | 06 | -// t9 = | 37 | 27 | 17 | 07 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 25 | 05 | 24 | 04 | - // s1 = | 27 | 07 | 26 | 06 | - // s2 = | 35 | 15 | 34 | 14 | - // s3 = | 37 | 17 | 36 | 16 | - - "precr.qb.ph $t0, $s1, $s0 \n" - "precrq.qb.ph $t1, $s1, $s0 \n" - "precr.qb.ph $t8, $s3, $s2 \n" - "precrq.qb.ph $t9, $s3, $s2 \n" - - // t0 = | 07 | 06 | 05 | 04 | - // t1 = | 27 | 26 | 25 | 24 | - // t8 = | 17 | 16 | 15 | 14 | - // t9 = | 37 | 36 | 35 | 34 | - - "addu $s0, %[dst], %[dst_stride] \n" - "addu $s1, $s0, %[dst_stride] \n" - "addu $s2, $s1, %[dst_stride] \n" - - "swr $s4, 0(%[dst]) \n" - "swl $s4, 3(%[dst]) \n" - "swr $t0, 4(%[dst]) \n" - "swl $t0, 7(%[dst]) \n" - "swr $s6, 0($s0) \n" - "swl $s6, 3($s0) \n" - "swr $t8, 4($s0) \n" - "swl $t8, 7($s0) \n" - "swr $s5, 0($s1) \n" - "swl $s5, 3($s1) \n" - "swr $t1, 4($s1) \n" - "swl $t1, 7($s1) \n" - "swr $s7, 0($s2) \n" - "swl $s7, 3($s2) \n" - "swr $t9, 4($s2) \n" - "swl $t9, 7($s2) \n" - - "addiu $AT, -1 \n" - "addiu %[src], 4 \n" - - "bnez $AT, 11b \n" - " addu %[dst], $s2, %[dst_stride] \n" - "2: \n" - ".set pop \n" - ".set at \n" - :[src] "+r" (src), - [dst] "+r" (dst), - [width] "+r" (width) - :[src_stride] "r" (src_stride), - [dst_stride] "r" (dst_stride) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", - "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7" - ); -} - -void TransposeUVWx8_DSPR2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "beqz %[width], 2f \n" - " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 - "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 - "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 - "addu $t3, $t2, %[src_stride] \n" - "addu $t5, $t4, %[src_stride] \n" - "addu $t6, $t2, $t4 \n" - "subu $t7, $t9, %[src_stride] \n" - "srl $t1, %[width], 1 \n" - -// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b - "andi $t0, %[dst_a], 0x3 \n" - "andi $t8, %[dst_b], 0x3 \n" - "or $t0, $t0, $t8 \n" - "andi $t8, %[dst_stride_a], 0x3 \n" - "andi $s5, %[dst_stride_b], 0x3 \n" - "or $t8, $t8, $s5 \n" - "or $t0, $t0, $t8 \n" - "bnez $t0, 11f \n" - " nop \n" -// dst + dst_stride word aligned (both, a & b dst addresses) - "1: \n" - "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0| - "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1| - "addu $s5, %[dst_a], %[dst_stride_a] \n" - "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2| - "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3| - "addu $s6, %[dst_b], %[dst_stride_b] \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0| - "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2| - "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2| - - "sw $s3, 0($s5) \n" - "sw $s4, 0($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0| - - "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4| - "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5| - "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6| - "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7| - "sw $s3, 0(%[dst_a]) \n" - "sw $s4, 0(%[dst_b]) \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4| - "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7| - "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6| - "sw $s3, 4($s5) \n" - "sw $s4, 4($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4| - - "addiu %[src], 4 \n" - "addiu $t1, -1 \n" - "sll $t0, %[dst_stride_a], 1 \n" - "sll $t8, %[dst_stride_b], 1 \n" - "sw $s3, 4(%[dst_a]) \n" - "sw $s4, 4(%[dst_b]) \n" - "addu %[dst_a], %[dst_a], $t0 \n" - "bnez $t1, 1b \n" - " addu %[dst_b], %[dst_b], $t8 \n" - "b 2f \n" - " nop \n" - -// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned - "11: \n" - "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0| - "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1| - "addu $s5, %[dst_a], %[dst_stride_a] \n" - "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2| - "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3| - "addu $s6, %[dst_b], %[dst_stride_b] \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0| - "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2| - "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2| - - "swr $s3, 0($s5) \n" - "swl $s3, 3($s5) \n" - "swr $s4, 0($s6) \n" - "swl $s4, 3($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0| - - "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4| - "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5| - "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6| - "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7| - "swr $s3, 0(%[dst_a]) \n" - "swl $s3, 3(%[dst_a]) \n" - "swr $s4, 0(%[dst_b]) \n" - "swl $s4, 3(%[dst_b]) \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4| - "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7| - "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6| - - "swr $s3, 4($s5) \n" - "swl $s3, 7($s5) \n" - "swr $s4, 4($s6) \n" - "swl $s4, 7($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4| - - "addiu %[src], 4 \n" - "addiu $t1, -1 \n" - "sll $t0, %[dst_stride_a], 1 \n" - "sll $t8, %[dst_stride_b], 1 \n" - "swr $s3, 4(%[dst_a]) \n" - "swl $s3, 7(%[dst_a]) \n" - "swr $s4, 4(%[dst_b]) \n" - "swl $s4, 7(%[dst_b]) \n" - "addu %[dst_a], %[dst_a], $t0 \n" - "bnez $t1, 11b \n" - " addu %[dst_b], %[dst_b], $t8 \n" - - "2: \n" - ".set pop \n" - : [src] "+r" (src), - [dst_a] "+r" (dst_a), - [dst_b] "+r" (dst_b), - [width] "+r" (width), - [src_stride] "+r" (src_stride) - : [dst_stride_a] "r" (dst_stride_a), - [dst_stride_b] "r" (dst_stride_b) - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9", - "s0", "s1", "s2", "s3", - "s4", "s5", "s6" - ); -} - -#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_msa.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_msa.cc new file mode 100644 index 000000000000..99bdca65b32c --- /dev/null +++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_msa.cc @@ -0,0 +1,250 @@ +/* + * Copyright 2016 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "libyuv/rotate_row.h" + +// This module is for GCC MSA +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#include "libyuv/macros_msa.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +#define ILVRL_B(in0, in1, in2, in3, out0, out1, out2, out3) \ + { \ + out0 = (v16u8)__msa_ilvr_b((v16i8)in1, (v16i8)in0); \ + out1 = (v16u8)__msa_ilvl_b((v16i8)in1, (v16i8)in0); \ + out2 = (v16u8)__msa_ilvr_b((v16i8)in3, (v16i8)in2); \ + out3 = (v16u8)__msa_ilvl_b((v16i8)in3, (v16i8)in2); \ + } + +#define ILVRL_H(in0, in1, in2, in3, out0, out1, out2, out3) \ + { \ + out0 = (v16u8)__msa_ilvr_h((v8i16)in1, (v8i16)in0); \ + out1 = (v16u8)__msa_ilvl_h((v8i16)in1, (v8i16)in0); \ + out2 = (v16u8)__msa_ilvr_h((v8i16)in3, (v8i16)in2); \ + out3 = (v16u8)__msa_ilvl_h((v8i16)in3, (v8i16)in2); \ + } + +#define ILVRL_W(in0, in1, in2, in3, out0, out1, out2, out3) \ + { \ + out0 = (v16u8)__msa_ilvr_w((v4i32)in1, (v4i32)in0); \ + out1 = (v16u8)__msa_ilvl_w((v4i32)in1, (v4i32)in0); \ + out2 = (v16u8)__msa_ilvr_w((v4i32)in3, (v4i32)in2); \ + out3 = (v16u8)__msa_ilvl_w((v4i32)in3, (v4i32)in2); \ + } + +#define ILVRL_D(in0, in1, in2, in3, out0, out1, out2, out3) \ + { \ + out0 = (v16u8)__msa_ilvr_d((v2i64)in1, (v2i64)in0); \ + out1 = (v16u8)__msa_ilvl_d((v2i64)in1, (v2i64)in0); \ + out2 = (v16u8)__msa_ilvr_d((v2i64)in3, (v2i64)in2); \ + out3 = (v16u8)__msa_ilvl_d((v2i64)in3, (v2i64)in2); \ + } + +void TransposeWx16_C(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width) { + TransposeWx8_C(src, src_stride, dst, dst_stride, width); + TransposeWx8_C((src + 8 * src_stride), src_stride, (dst + 8), dst_stride, + width); +} + +void TransposeUVWx16_C(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width) { + TransposeUVWx8_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, + width); + TransposeUVWx8_C((src + 8 * src_stride), src_stride, (dst_a + 8), + dst_stride_a, (dst_b + 8), dst_stride_b, width); +} + +void TransposeWx16_MSA(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width) { + int x; + const uint8_t* s; + v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3; + v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; + v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9; + + for (x = 0; x < width; x += 16) { + s = src; + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); + ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3); + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); + ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7); + ILVRL_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3); + ILVRL_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7); + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); + ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3); + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); + ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7); + res8 = (v16u8)__msa_ilvr_w((v4i32)reg4, (v4i32)reg0); + res9 = (v16u8)__msa_ilvl_w((v4i32)reg4, (v4i32)reg0); + ILVRL_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3); + ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride); + dst += dst_stride * 4; + res8 = (v16u8)__msa_ilvr_w((v4i32)reg5, (v4i32)reg1); + res9 = (v16u8)__msa_ilvl_w((v4i32)reg5, (v4i32)reg1); + ILVRL_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3); + ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride); + dst += dst_stride * 4; + res8 = (v16u8)__msa_ilvr_w((v4i32)reg6, (v4i32)reg2); + res9 = (v16u8)__msa_ilvl_w((v4i32)reg6, (v4i32)reg2); + ILVRL_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3); + ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride); + dst += dst_stride * 4; + res8 = (v16u8)__msa_ilvr_w((v4i32)reg7, (v4i32)reg3); + res9 = (v16u8)__msa_ilvl_w((v4i32)reg7, (v4i32)reg3); + ILVRL_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3); + ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride); + src += 16; + dst += dst_stride * 4; + } +} + +void TransposeUVWx16_MSA(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width) { + int x; + const uint8_t* s; + v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3; + v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; + v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9; + + for (x = 0; x < width; x += 8) { + s = src; + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); + ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3); + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); + ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7); + ILVRL_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3); + ILVRL_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7); + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); + ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3); + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); + s += src_stride; + ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); + ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7); + res8 = (v16u8)__msa_ilvr_w((v4i32)reg4, (v4i32)reg0); + res9 = (v16u8)__msa_ilvl_w((v4i32)reg4, (v4i32)reg0); + ILVRL_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3); + ST_UB2(dst0, dst2, dst_a, dst_stride_a); + ST_UB2(dst1, dst3, dst_b, dst_stride_b); + dst_a += dst_stride_a * 2; + dst_b += dst_stride_b * 2; + res8 = (v16u8)__msa_ilvr_w((v4i32)reg5, (v4i32)reg1); + res9 = (v16u8)__msa_ilvl_w((v4i32)reg5, (v4i32)reg1); + ILVRL_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3); + ST_UB2(dst0, dst2, dst_a, dst_stride_a); + ST_UB2(dst1, dst3, dst_b, dst_stride_b); + dst_a += dst_stride_a * 2; + dst_b += dst_stride_b * 2; + res8 = (v16u8)__msa_ilvr_w((v4i32)reg6, (v4i32)reg2); + res9 = (v16u8)__msa_ilvl_w((v4i32)reg6, (v4i32)reg2); + ILVRL_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3); + ST_UB2(dst0, dst2, dst_a, dst_stride_a); + ST_UB2(dst1, dst3, dst_b, dst_stride_b); + dst_a += dst_stride_a * 2; + dst_b += dst_stride_b * 2; + res8 = (v16u8)__msa_ilvr_w((v4i32)reg7, (v4i32)reg3); + res9 = (v16u8)__msa_ilvl_w((v4i32)reg7, (v4i32)reg3); + ILVRL_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3); + ST_UB2(dst0, dst2, dst_a, dst_stride_a); + ST_UB2(dst1, dst3, dst_b, dst_stride_b); + src += 16; + dst_a += dst_stride_a * 2; + dst_b += dst_stride_b * 2; + } +} + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + +#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_neon.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_neon.cc index 1c22b472bc11..fdc0dd476c6d 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/rotate_neon.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_neon.cc @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "libyuv/row.h" #include "libyuv/rotate_row.h" +#include "libyuv/row.h" #include "libyuv/basic_types.h" @@ -21,38 +21,32 @@ extern "C" { #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ !defined(__aarch64__) -static uvec8 kVTbl4x4Transpose = - { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; +static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13, + 2, 6, 10, 14, 3, 7, 11, 15}; -void TransposeWx8_NEON(const uint8* src, int src_stride, - uint8* dst, int dst_stride, +void TransposeWx8_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, int width) { - const uint8* src_temp; - asm volatile ( - // loops are on blocks of 8. loop will stop when - // counter gets to or below 0. starting the counter - // at w-8 allow for this - "sub %5, #8 \n" + const uint8_t* src_temp; + asm volatile( + // loops are on blocks of 8. loop will stop when + // counter gets to or below 0. starting the counter + // at w-8 allow for this + "sub %5, #8 \n" - // handle 8x8 blocks. this should be the majority of the plane - "1: \n" + // handle 8x8 blocks. this should be the majority of the plane + "1: \n" "mov %0, %1 \n" - MEMACCESS(0) "vld1.8 {d0}, [%0], %2 \n" - MEMACCESS(0) "vld1.8 {d1}, [%0], %2 \n" - MEMACCESS(0) "vld1.8 {d2}, [%0], %2 \n" - MEMACCESS(0) "vld1.8 {d3}, [%0], %2 \n" - MEMACCESS(0) "vld1.8 {d4}, [%0], %2 \n" - MEMACCESS(0) "vld1.8 {d5}, [%0], %2 \n" - MEMACCESS(0) "vld1.8 {d6}, [%0], %2 \n" - MEMACCESS(0) "vld1.8 {d7}, [%0] \n" "vtrn.8 d1, d0 \n" @@ -77,21 +71,13 @@ void TransposeWx8_NEON(const uint8* src, int src_stride, "mov %0, %3 \n" - MEMACCESS(0) "vst1.8 {d1}, [%0], %4 \n" - MEMACCESS(0) "vst1.8 {d0}, [%0], %4 \n" - MEMACCESS(0) "vst1.8 {d3}, [%0], %4 \n" - MEMACCESS(0) "vst1.8 {d2}, [%0], %4 \n" - MEMACCESS(0) "vst1.8 {d5}, [%0], %4 \n" - MEMACCESS(0) "vst1.8 {d4}, [%0], %4 \n" - MEMACCESS(0) "vst1.8 {d7}, [%0], %4 \n" - MEMACCESS(0) "vst1.8 {d6}, [%0] \n" "add %1, #8 \n" // src += 8 @@ -99,180 +85,138 @@ void TransposeWx8_NEON(const uint8* src, int src_stride, "subs %5, #8 \n" // w -= 8 "bge 1b \n" - // add 8 back to counter. if the result is 0 there are - // no residuals. - "adds %5, #8 \n" - "beq 4f \n" + // add 8 back to counter. if the result is 0 there are + // no residuals. + "adds %5, #8 \n" + "beq 4f \n" - // some residual, so between 1 and 7 lines left to transpose - "cmp %5, #2 \n" - "blt 3f \n" + // some residual, so between 1 and 7 lines left to transpose + "cmp %5, #2 \n" + "blt 3f \n" - "cmp %5, #4 \n" - "blt 2f \n" + "cmp %5, #4 \n" + "blt 2f \n" - // 4x8 block - "mov %0, %1 \n" - MEMACCESS(0) - "vld1.32 {d0[0]}, [%0], %2 \n" - MEMACCESS(0) - "vld1.32 {d0[1]}, [%0], %2 \n" - MEMACCESS(0) - "vld1.32 {d1[0]}, [%0], %2 \n" - MEMACCESS(0) - "vld1.32 {d1[1]}, [%0], %2 \n" - MEMACCESS(0) - "vld1.32 {d2[0]}, [%0], %2 \n" - MEMACCESS(0) - "vld1.32 {d2[1]}, [%0], %2 \n" - MEMACCESS(0) - "vld1.32 {d3[0]}, [%0], %2 \n" - MEMACCESS(0) - "vld1.32 {d3[1]}, [%0] \n" + // 4x8 block + "mov %0, %1 \n" + "vld1.32 {d0[0]}, [%0], %2 \n" + "vld1.32 {d0[1]}, [%0], %2 \n" + "vld1.32 {d1[0]}, [%0], %2 \n" + "vld1.32 {d1[1]}, [%0], %2 \n" + "vld1.32 {d2[0]}, [%0], %2 \n" + "vld1.32 {d2[1]}, [%0], %2 \n" + "vld1.32 {d3[0]}, [%0], %2 \n" + "vld1.32 {d3[1]}, [%0] \n" - "mov %0, %3 \n" + "mov %0, %3 \n" - MEMACCESS(6) - "vld1.8 {q3}, [%6] \n" + "vld1.8 {q3}, [%6] \n" - "vtbl.8 d4, {d0, d1}, d6 \n" - "vtbl.8 d5, {d0, d1}, d7 \n" - "vtbl.8 d0, {d2, d3}, d6 \n" - "vtbl.8 d1, {d2, d3}, d7 \n" + "vtbl.8 d4, {d0, d1}, d6 \n" + "vtbl.8 d5, {d0, d1}, d7 \n" + "vtbl.8 d0, {d2, d3}, d6 \n" + "vtbl.8 d1, {d2, d3}, d7 \n" - // TODO(frkoenig): Rework shuffle above to - // write out with 4 instead of 8 writes. - MEMACCESS(0) - "vst1.32 {d4[0]}, [%0], %4 \n" - MEMACCESS(0) - "vst1.32 {d4[1]}, [%0], %4 \n" - MEMACCESS(0) - "vst1.32 {d5[0]}, [%0], %4 \n" - MEMACCESS(0) - "vst1.32 {d5[1]}, [%0] \n" + // TODO(frkoenig): Rework shuffle above to + // write out with 4 instead of 8 writes. + "vst1.32 {d4[0]}, [%0], %4 \n" + "vst1.32 {d4[1]}, [%0], %4 \n" + "vst1.32 {d5[0]}, [%0], %4 \n" + "vst1.32 {d5[1]}, [%0] \n" - "add %0, %3, #4 \n" - MEMACCESS(0) - "vst1.32 {d0[0]}, [%0], %4 \n" - MEMACCESS(0) - "vst1.32 {d0[1]}, [%0], %4 \n" - MEMACCESS(0) - "vst1.32 {d1[0]}, [%0], %4 \n" - MEMACCESS(0) - "vst1.32 {d1[1]}, [%0] \n" + "add %0, %3, #4 \n" + "vst1.32 {d0[0]}, [%0], %4 \n" + "vst1.32 {d0[1]}, [%0], %4 \n" + "vst1.32 {d1[0]}, [%0], %4 \n" + "vst1.32 {d1[1]}, [%0] \n" - "add %1, #4 \n" // src += 4 - "add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride - "subs %5, #4 \n" // w -= 4 - "beq 4f \n" + "add %1, #4 \n" // src += 4 + "add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride + "subs %5, #4 \n" // w -= 4 + "beq 4f \n" - // some residual, check to see if it includes a 2x8 block, - // or less - "cmp %5, #2 \n" - "blt 3f \n" + // some residual, check to see if it includes a 2x8 block, + // or less + "cmp %5, #2 \n" + "blt 3f \n" - // 2x8 block - "2: \n" - "mov %0, %1 \n" - MEMACCESS(0) - "vld1.16 {d0[0]}, [%0], %2 \n" - MEMACCESS(0) - "vld1.16 {d1[0]}, [%0], %2 \n" - MEMACCESS(0) - "vld1.16 {d0[1]}, [%0], %2 \n" - MEMACCESS(0) - "vld1.16 {d1[1]}, [%0], %2 \n" - MEMACCESS(0) - "vld1.16 {d0[2]}, [%0], %2 \n" - MEMACCESS(0) - "vld1.16 {d1[2]}, [%0], %2 \n" - MEMACCESS(0) - "vld1.16 {d0[3]}, [%0], %2 \n" - MEMACCESS(0) - "vld1.16 {d1[3]}, [%0] \n" + // 2x8 block + "2: \n" + "mov %0, %1 \n" + "vld1.16 {d0[0]}, [%0], %2 \n" + "vld1.16 {d1[0]}, [%0], %2 \n" + "vld1.16 {d0[1]}, [%0], %2 \n" + "vld1.16 {d1[1]}, [%0], %2 \n" + "vld1.16 {d0[2]}, [%0], %2 \n" + "vld1.16 {d1[2]}, [%0], %2 \n" + "vld1.16 {d0[3]}, [%0], %2 \n" + "vld1.16 {d1[3]}, [%0] \n" - "vtrn.8 d0, d1 \n" + "vtrn.8 d0, d1 \n" - "mov %0, %3 \n" + "mov %0, %3 \n" - MEMACCESS(0) - "vst1.64 {d0}, [%0], %4 \n" - MEMACCESS(0) - "vst1.64 {d1}, [%0] \n" + "vst1.64 {d0}, [%0], %4 \n" + "vst1.64 {d1}, [%0] \n" - "add %1, #2 \n" // src += 2 - "add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride - "subs %5, #2 \n" // w -= 2 - "beq 4f \n" + "add %1, #2 \n" // src += 2 + "add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride + "subs %5, #2 \n" // w -= 2 + "beq 4f \n" - // 1x8 block - "3: \n" - MEMACCESS(1) - "vld1.8 {d0[0]}, [%1], %2 \n" - MEMACCESS(1) - "vld1.8 {d0[1]}, [%1], %2 \n" - MEMACCESS(1) - "vld1.8 {d0[2]}, [%1], %2 \n" - MEMACCESS(1) - "vld1.8 {d0[3]}, [%1], %2 \n" - MEMACCESS(1) - "vld1.8 {d0[4]}, [%1], %2 \n" - MEMACCESS(1) - "vld1.8 {d0[5]}, [%1], %2 \n" - MEMACCESS(1) - "vld1.8 {d0[6]}, [%1], %2 \n" - MEMACCESS(1) - "vld1.8 {d0[7]}, [%1] \n" + // 1x8 block + "3: \n" + "vld1.8 {d0[0]}, [%1], %2 \n" + "vld1.8 {d0[1]}, [%1], %2 \n" + "vld1.8 {d0[2]}, [%1], %2 \n" + "vld1.8 {d0[3]}, [%1], %2 \n" + "vld1.8 {d0[4]}, [%1], %2 \n" + "vld1.8 {d0[5]}, [%1], %2 \n" + "vld1.8 {d0[6]}, [%1], %2 \n" + "vld1.8 {d0[7]}, [%1] \n" - MEMACCESS(3) - "vst1.64 {d0}, [%3] \n" + "vst1.64 {d0}, [%3] \n" - "4: \n" + "4: \n" - : "=&r"(src_temp), // %0 - "+r"(src), // %1 - "+r"(src_stride), // %2 - "+r"(dst), // %3 - "+r"(dst_stride), // %4 - "+r"(width) // %5 - : "r"(&kVTbl4x4Transpose) // %6 - : "memory", "cc", "q0", "q1", "q2", "q3" - ); + : "=&r"(src_temp), // %0 + "+r"(src), // %1 + "+r"(src_stride), // %2 + "+r"(dst), // %3 + "+r"(dst_stride), // %4 + "+r"(width) // %5 + : "r"(&kVTbl4x4Transpose) // %6 + : "memory", "cc", "q0", "q1", "q2", "q3"); } -static uvec8 kVTbl4x4TransposeDi = - { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 }; +static const uvec8 kVTbl4x4TransposeDi = {0, 8, 1, 9, 2, 10, 3, 11, + 4, 12, 5, 13, 6, 14, 7, 15}; -void TransposeUVWx8_NEON(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, +void TransposeUVWx8_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, int width) { - const uint8* src_temp; - asm volatile ( - // loops are on blocks of 8. loop will stop when - // counter gets to or below 0. starting the counter - // at w-8 allow for this - "sub %7, #8 \n" + const uint8_t* src_temp; + asm volatile( + // loops are on blocks of 8. loop will stop when + // counter gets to or below 0. starting the counter + // at w-8 allow for this + "sub %7, #8 \n" - // handle 8x8 blocks. this should be the majority of the plane - "1: \n" + // handle 8x8 blocks. this should be the majority of the plane + "1: \n" "mov %0, %1 \n" - MEMACCESS(0) "vld2.8 {d0, d1}, [%0], %2 \n" - MEMACCESS(0) "vld2.8 {d2, d3}, [%0], %2 \n" - MEMACCESS(0) "vld2.8 {d4, d5}, [%0], %2 \n" - MEMACCESS(0) "vld2.8 {d6, d7}, [%0], %2 \n" - MEMACCESS(0) "vld2.8 {d16, d17}, [%0], %2 \n" - MEMACCESS(0) "vld2.8 {d18, d19}, [%0], %2 \n" - MEMACCESS(0) "vld2.8 {d20, d21}, [%0], %2 \n" - MEMACCESS(0) "vld2.8 {d22, d23}, [%0] \n" "vtrn.8 q1, q0 \n" @@ -301,40 +245,24 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride, "mov %0, %3 \n" - MEMACCESS(0) "vst1.8 {d2}, [%0], %4 \n" - MEMACCESS(0) "vst1.8 {d0}, [%0], %4 \n" - MEMACCESS(0) "vst1.8 {d6}, [%0], %4 \n" - MEMACCESS(0) "vst1.8 {d4}, [%0], %4 \n" - MEMACCESS(0) "vst1.8 {d18}, [%0], %4 \n" - MEMACCESS(0) "vst1.8 {d16}, [%0], %4 \n" - MEMACCESS(0) "vst1.8 {d22}, [%0], %4 \n" - MEMACCESS(0) "vst1.8 {d20}, [%0] \n" "mov %0, %5 \n" - MEMACCESS(0) "vst1.8 {d3}, [%0], %6 \n" - MEMACCESS(0) "vst1.8 {d1}, [%0], %6 \n" - MEMACCESS(0) "vst1.8 {d7}, [%0], %6 \n" - MEMACCESS(0) "vst1.8 {d5}, [%0], %6 \n" - MEMACCESS(0) "vst1.8 {d19}, [%0], %6 \n" - MEMACCESS(0) "vst1.8 {d17}, [%0], %6 \n" - MEMACCESS(0) "vst1.8 {d23}, [%0], %6 \n" - MEMACCESS(0) "vst1.8 {d21}, [%0] \n" "add %1, #8*2 \n" // src += 8*2 @@ -343,187 +271,142 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride, "subs %7, #8 \n" // w -= 8 "bge 1b \n" - // add 8 back to counter. if the result is 0 there are - // no residuals. - "adds %7, #8 \n" - "beq 4f \n" + // add 8 back to counter. if the result is 0 there are + // no residuals. + "adds %7, #8 \n" + "beq 4f \n" - // some residual, so between 1 and 7 lines left to transpose - "cmp %7, #2 \n" - "blt 3f \n" + // some residual, so between 1 and 7 lines left to transpose + "cmp %7, #2 \n" + "blt 3f \n" - "cmp %7, #4 \n" - "blt 2f \n" + "cmp %7, #4 \n" + "blt 2f \n" - // TODO(frkoenig): Clean this up - // 4x8 block - "mov %0, %1 \n" - MEMACCESS(0) - "vld1.64 {d0}, [%0], %2 \n" - MEMACCESS(0) - "vld1.64 {d1}, [%0], %2 \n" - MEMACCESS(0) - "vld1.64 {d2}, [%0], %2 \n" - MEMACCESS(0) - "vld1.64 {d3}, [%0], %2 \n" - MEMACCESS(0) - "vld1.64 {d4}, [%0], %2 \n" - MEMACCESS(0) - "vld1.64 {d5}, [%0], %2 \n" - MEMACCESS(0) - "vld1.64 {d6}, [%0], %2 \n" - MEMACCESS(0) - "vld1.64 {d7}, [%0] \n" + // TODO(frkoenig): Clean this up + // 4x8 block + "mov %0, %1 \n" + "vld1.64 {d0}, [%0], %2 \n" + "vld1.64 {d1}, [%0], %2 \n" + "vld1.64 {d2}, [%0], %2 \n" + "vld1.64 {d3}, [%0], %2 \n" + "vld1.64 {d4}, [%0], %2 \n" + "vld1.64 {d5}, [%0], %2 \n" + "vld1.64 {d6}, [%0], %2 \n" + "vld1.64 {d7}, [%0] \n" - MEMACCESS(8) - "vld1.8 {q15}, [%8] \n" + "vld1.8 {q15}, [%8] \n" - "vtrn.8 q0, q1 \n" - "vtrn.8 q2, q3 \n" + "vtrn.8 q0, q1 \n" + "vtrn.8 q2, q3 \n" - "vtbl.8 d16, {d0, d1}, d30 \n" - "vtbl.8 d17, {d0, d1}, d31 \n" - "vtbl.8 d18, {d2, d3}, d30 \n" - "vtbl.8 d19, {d2, d3}, d31 \n" - "vtbl.8 d20, {d4, d5}, d30 \n" - "vtbl.8 d21, {d4, d5}, d31 \n" - "vtbl.8 d22, {d6, d7}, d30 \n" - "vtbl.8 d23, {d6, d7}, d31 \n" + "vtbl.8 d16, {d0, d1}, d30 \n" + "vtbl.8 d17, {d0, d1}, d31 \n" + "vtbl.8 d18, {d2, d3}, d30 \n" + "vtbl.8 d19, {d2, d3}, d31 \n" + "vtbl.8 d20, {d4, d5}, d30 \n" + "vtbl.8 d21, {d4, d5}, d31 \n" + "vtbl.8 d22, {d6, d7}, d30 \n" + "vtbl.8 d23, {d6, d7}, d31 \n" - "mov %0, %3 \n" + "mov %0, %3 \n" - MEMACCESS(0) - "vst1.32 {d16[0]}, [%0], %4 \n" - MEMACCESS(0) - "vst1.32 {d16[1]}, [%0], %4 \n" - MEMACCESS(0) - "vst1.32 {d17[0]}, [%0], %4 \n" - MEMACCESS(0) - "vst1.32 {d17[1]}, [%0], %4 \n" + "vst1.32 {d16[0]}, [%0], %4 \n" + "vst1.32 {d16[1]}, [%0], %4 \n" + "vst1.32 {d17[0]}, [%0], %4 \n" + "vst1.32 {d17[1]}, [%0], %4 \n" - "add %0, %3, #4 \n" - MEMACCESS(0) - "vst1.32 {d20[0]}, [%0], %4 \n" - MEMACCESS(0) - "vst1.32 {d20[1]}, [%0], %4 \n" - MEMACCESS(0) - "vst1.32 {d21[0]}, [%0], %4 \n" - MEMACCESS(0) - "vst1.32 {d21[1]}, [%0] \n" + "add %0, %3, #4 \n" + "vst1.32 {d20[0]}, [%0], %4 \n" + "vst1.32 {d20[1]}, [%0], %4 \n" + "vst1.32 {d21[0]}, [%0], %4 \n" + "vst1.32 {d21[1]}, [%0] \n" - "mov %0, %5 \n" + "mov %0, %5 \n" - MEMACCESS(0) - "vst1.32 {d18[0]}, [%0], %6 \n" - MEMACCESS(0) - "vst1.32 {d18[1]}, [%0], %6 \n" - MEMACCESS(0) - "vst1.32 {d19[0]}, [%0], %6 \n" - MEMACCESS(0) - "vst1.32 {d19[1]}, [%0], %6 \n" + "vst1.32 {d18[0]}, [%0], %6 \n" + "vst1.32 {d18[1]}, [%0], %6 \n" + "vst1.32 {d19[0]}, [%0], %6 \n" + "vst1.32 {d19[1]}, [%0], %6 \n" - "add %0, %5, #4 \n" - MEMACCESS(0) - "vst1.32 {d22[0]}, [%0], %6 \n" - MEMACCESS(0) - "vst1.32 {d22[1]}, [%0], %6 \n" - MEMACCESS(0) - "vst1.32 {d23[0]}, [%0], %6 \n" - MEMACCESS(0) - "vst1.32 {d23[1]}, [%0] \n" + "add %0, %5, #4 \n" + "vst1.32 {d22[0]}, [%0], %6 \n" + "vst1.32 {d22[1]}, [%0], %6 \n" + "vst1.32 {d23[0]}, [%0], %6 \n" + "vst1.32 {d23[1]}, [%0] \n" - "add %1, #4*2 \n" // src += 4 * 2 - "add %3, %3, %4, lsl #2 \n" // dst_a += 4 * dst_stride_a - "add %5, %5, %6, lsl #2 \n" // dst_b += 4 * dst_stride_b - "subs %7, #4 \n" // w -= 4 - "beq 4f \n" + "add %1, #4*2 \n" // src += 4 * 2 + "add %3, %3, %4, lsl #2 \n" // dst_a += 4 * + // dst_stride_a + "add %5, %5, %6, lsl #2 \n" // dst_b += 4 * + // dst_stride_b + "subs %7, #4 \n" // w -= 4 + "beq 4f \n" - // some residual, check to see if it includes a 2x8 block, - // or less - "cmp %7, #2 \n" - "blt 3f \n" + // some residual, check to see if it includes a 2x8 block, + // or less + "cmp %7, #2 \n" + "blt 3f \n" - // 2x8 block - "2: \n" - "mov %0, %1 \n" - MEMACCESS(0) - "vld2.16 {d0[0], d2[0]}, [%0], %2 \n" - MEMACCESS(0) - "vld2.16 {d1[0], d3[0]}, [%0], %2 \n" - MEMACCESS(0) - "vld2.16 {d0[1], d2[1]}, [%0], %2 \n" - MEMACCESS(0) - "vld2.16 {d1[1], d3[1]}, [%0], %2 \n" - MEMACCESS(0) - "vld2.16 {d0[2], d2[2]}, [%0], %2 \n" - MEMACCESS(0) - "vld2.16 {d1[2], d3[2]}, [%0], %2 \n" - MEMACCESS(0) - "vld2.16 {d0[3], d2[3]}, [%0], %2 \n" - MEMACCESS(0) - "vld2.16 {d1[3], d3[3]}, [%0] \n" + // 2x8 block + "2: \n" + "mov %0, %1 \n" + "vld2.16 {d0[0], d2[0]}, [%0], %2 \n" + "vld2.16 {d1[0], d3[0]}, [%0], %2 \n" + "vld2.16 {d0[1], d2[1]}, [%0], %2 \n" + "vld2.16 {d1[1], d3[1]}, [%0], %2 \n" + "vld2.16 {d0[2], d2[2]}, [%0], %2 \n" + "vld2.16 {d1[2], d3[2]}, [%0], %2 \n" + "vld2.16 {d0[3], d2[3]}, [%0], %2 \n" + "vld2.16 {d1[3], d3[3]}, [%0] \n" - "vtrn.8 d0, d1 \n" - "vtrn.8 d2, d3 \n" + "vtrn.8 d0, d1 \n" + "vtrn.8 d2, d3 \n" - "mov %0, %3 \n" + "mov %0, %3 \n" - MEMACCESS(0) - "vst1.64 {d0}, [%0], %4 \n" - MEMACCESS(0) - "vst1.64 {d2}, [%0] \n" + "vst1.64 {d0}, [%0], %4 \n" + "vst1.64 {d2}, [%0] \n" - "mov %0, %5 \n" + "mov %0, %5 \n" - MEMACCESS(0) - "vst1.64 {d1}, [%0], %6 \n" - MEMACCESS(0) - "vst1.64 {d3}, [%0] \n" + "vst1.64 {d1}, [%0], %6 \n" + "vst1.64 {d3}, [%0] \n" - "add %1, #2*2 \n" // src += 2 * 2 - "add %3, %3, %4, lsl #1 \n" // dst_a += 2 * dst_stride_a - "add %5, %5, %6, lsl #1 \n" // dst_b += 2 * dst_stride_b - "subs %7, #2 \n" // w -= 2 - "beq 4f \n" + "add %1, #2*2 \n" // src += 2 * 2 + "add %3, %3, %4, lsl #1 \n" // dst_a += 2 * + // dst_stride_a + "add %5, %5, %6, lsl #1 \n" // dst_b += 2 * + // dst_stride_b + "subs %7, #2 \n" // w -= 2 + "beq 4f \n" - // 1x8 block - "3: \n" - MEMACCESS(1) - "vld2.8 {d0[0], d1[0]}, [%1], %2 \n" - MEMACCESS(1) - "vld2.8 {d0[1], d1[1]}, [%1], %2 \n" - MEMACCESS(1) - "vld2.8 {d0[2], d1[2]}, [%1], %2 \n" - MEMACCESS(1) - "vld2.8 {d0[3], d1[3]}, [%1], %2 \n" - MEMACCESS(1) - "vld2.8 {d0[4], d1[4]}, [%1], %2 \n" - MEMACCESS(1) - "vld2.8 {d0[5], d1[5]}, [%1], %2 \n" - MEMACCESS(1) - "vld2.8 {d0[6], d1[6]}, [%1], %2 \n" - MEMACCESS(1) - "vld2.8 {d0[7], d1[7]}, [%1] \n" + // 1x8 block + "3: \n" + "vld2.8 {d0[0], d1[0]}, [%1], %2 \n" + "vld2.8 {d0[1], d1[1]}, [%1], %2 \n" + "vld2.8 {d0[2], d1[2]}, [%1], %2 \n" + "vld2.8 {d0[3], d1[3]}, [%1], %2 \n" + "vld2.8 {d0[4], d1[4]}, [%1], %2 \n" + "vld2.8 {d0[5], d1[5]}, [%1], %2 \n" + "vld2.8 {d0[6], d1[6]}, [%1], %2 \n" + "vld2.8 {d0[7], d1[7]}, [%1] \n" - MEMACCESS(3) - "vst1.64 {d0}, [%3] \n" - MEMACCESS(5) - "vst1.64 {d1}, [%5] \n" + "vst1.64 {d0}, [%3] \n" + "vst1.64 {d1}, [%5] \n" - "4: \n" + "4: \n" - : "=&r"(src_temp), // %0 - "+r"(src), // %1 - "+r"(src_stride), // %2 - "+r"(dst_a), // %3 - "+r"(dst_stride_a), // %4 - "+r"(dst_b), // %5 - "+r"(dst_stride_b), // %6 - "+r"(width) // %7 - : "r"(&kVTbl4x4TransposeDi) // %8 - : "memory", "cc", - "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11" - ); + : "=&r"(src_temp), // %0 + "+r"(src), // %1 + "+r"(src_stride), // %2 + "+r"(dst_a), // %3 + "+r"(dst_stride_a), // %4 + "+r"(dst_b), // %5 + "+r"(dst_stride_b), // %6 + "+r"(width) // %7 + : "r"(&kVTbl4x4TransposeDi) // %8 + : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); } #endif // defined(__ARM_NEON__) && !defined(__aarch64__) diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_neon64.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_neon64.cc index 1ab448f3ab4c..f469baacf689 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/rotate_neon64.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_neon64.cc @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "libyuv/row.h" #include "libyuv/rotate_row.h" +#include "libyuv/row.h" #include "libyuv/basic_types.h" @@ -21,38 +21,32 @@ extern "C" { // This module is for GCC Neon armv8 64 bit. #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) -static uvec8 kVTbl4x4Transpose = - { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; +static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13, + 2, 6, 10, 14, 3, 7, 11, 15}; -void TransposeWx8_NEON(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width) { - const uint8* src_temp; - int64 width64 = (int64) width; // Work around clang 3.4 warning. - asm volatile ( - // loops are on blocks of 8. loop will stop when - // counter gets to or below 0. starting the counter - // at w-8 allow for this - "sub %3, %3, #8 \n" +void TransposeWx8_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width) { + const uint8_t* src_temp; + asm volatile( + // loops are on blocks of 8. loop will stop when + // counter gets to or below 0. starting the counter + // at w-8 allow for this + "sub %w3, %w3, #8 \n" - // handle 8x8 blocks. this should be the majority of the plane - "1: \n" + // handle 8x8 blocks. this should be the majority of the plane + "1: \n" "mov %0, %1 \n" - MEMACCESS(0) "ld1 {v0.8b}, [%0], %5 \n" - MEMACCESS(0) "ld1 {v1.8b}, [%0], %5 \n" - MEMACCESS(0) "ld1 {v2.8b}, [%0], %5 \n" - MEMACCESS(0) "ld1 {v3.8b}, [%0], %5 \n" - MEMACCESS(0) "ld1 {v4.8b}, [%0], %5 \n" - MEMACCESS(0) "ld1 {v5.8b}, [%0], %5 \n" - MEMACCESS(0) "ld1 {v6.8b}, [%0], %5 \n" - MEMACCESS(0) "ld1 {v7.8b}, [%0] \n" "trn2 v16.8b, v0.8b, v1.8b \n" @@ -84,456 +78,345 @@ void TransposeWx8_NEON(const uint8* src, int src_stride, "mov %0, %2 \n" - MEMACCESS(0) "st1 {v17.8b}, [%0], %6 \n" - MEMACCESS(0) "st1 {v16.8b}, [%0], %6 \n" - MEMACCESS(0) "st1 {v19.8b}, [%0], %6 \n" - MEMACCESS(0) "st1 {v18.8b}, [%0], %6 \n" - MEMACCESS(0) "st1 {v21.8b}, [%0], %6 \n" - MEMACCESS(0) "st1 {v20.8b}, [%0], %6 \n" - MEMACCESS(0) "st1 {v23.8b}, [%0], %6 \n" - MEMACCESS(0) "st1 {v22.8b}, [%0] \n" "add %1, %1, #8 \n" // src += 8 "add %2, %2, %6, lsl #3 \n" // dst += 8 * dst_stride - "subs %3, %3, #8 \n" // w -= 8 + "subs %w3, %w3, #8 \n" // w -= 8 "b.ge 1b \n" - // add 8 back to counter. if the result is 0 there are - // no residuals. - "adds %3, %3, #8 \n" - "b.eq 4f \n" + // add 8 back to counter. if the result is 0 there are + // no residuals. + "adds %w3, %w3, #8 \n" + "b.eq 4f \n" - // some residual, so between 1 and 7 lines left to transpose - "cmp %3, #2 \n" - "b.lt 3f \n" + // some residual, so between 1 and 7 lines left to transpose + "cmp %w3, #2 \n" + "b.lt 3f \n" - "cmp %3, #4 \n" - "b.lt 2f \n" + "cmp %w3, #4 \n" + "b.lt 2f \n" - // 4x8 block - "mov %0, %1 \n" - MEMACCESS(0) - "ld1 {v0.s}[0], [%0], %5 \n" - MEMACCESS(0) - "ld1 {v0.s}[1], [%0], %5 \n" - MEMACCESS(0) - "ld1 {v0.s}[2], [%0], %5 \n" - MEMACCESS(0) - "ld1 {v0.s}[3], [%0], %5 \n" - MEMACCESS(0) - "ld1 {v1.s}[0], [%0], %5 \n" - MEMACCESS(0) - "ld1 {v1.s}[1], [%0], %5 \n" - MEMACCESS(0) - "ld1 {v1.s}[2], [%0], %5 \n" - MEMACCESS(0) - "ld1 {v1.s}[3], [%0] \n" + // 4x8 block + "mov %0, %1 \n" + "ld1 {v0.s}[0], [%0], %5 \n" + "ld1 {v0.s}[1], [%0], %5 \n" + "ld1 {v0.s}[2], [%0], %5 \n" + "ld1 {v0.s}[3], [%0], %5 \n" + "ld1 {v1.s}[0], [%0], %5 \n" + "ld1 {v1.s}[1], [%0], %5 \n" + "ld1 {v1.s}[2], [%0], %5 \n" + "ld1 {v1.s}[3], [%0] \n" - "mov %0, %2 \n" + "mov %0, %2 \n" - MEMACCESS(4) - "ld1 {v2.16b}, [%4] \n" + "ld1 {v2.16b}, [%4] \n" - "tbl v3.16b, {v0.16b}, v2.16b \n" - "tbl v0.16b, {v1.16b}, v2.16b \n" + "tbl v3.16b, {v0.16b}, v2.16b \n" + "tbl v0.16b, {v1.16b}, v2.16b \n" - // TODO(frkoenig): Rework shuffle above to - // write out with 4 instead of 8 writes. - MEMACCESS(0) - "st1 {v3.s}[0], [%0], %6 \n" - MEMACCESS(0) - "st1 {v3.s}[1], [%0], %6 \n" - MEMACCESS(0) - "st1 {v3.s}[2], [%0], %6 \n" - MEMACCESS(0) - "st1 {v3.s}[3], [%0] \n" + // TODO(frkoenig): Rework shuffle above to + // write out with 4 instead of 8 writes. + "st1 {v3.s}[0], [%0], %6 \n" + "st1 {v3.s}[1], [%0], %6 \n" + "st1 {v3.s}[2], [%0], %6 \n" + "st1 {v3.s}[3], [%0] \n" - "add %0, %2, #4 \n" - MEMACCESS(0) - "st1 {v0.s}[0], [%0], %6 \n" - MEMACCESS(0) - "st1 {v0.s}[1], [%0], %6 \n" - MEMACCESS(0) - "st1 {v0.s}[2], [%0], %6 \n" - MEMACCESS(0) - "st1 {v0.s}[3], [%0] \n" + "add %0, %2, #4 \n" + "st1 {v0.s}[0], [%0], %6 \n" + "st1 {v0.s}[1], [%0], %6 \n" + "st1 {v0.s}[2], [%0], %6 \n" + "st1 {v0.s}[3], [%0] \n" - "add %1, %1, #4 \n" // src += 4 - "add %2, %2, %6, lsl #2 \n" // dst += 4 * dst_stride - "subs %3, %3, #4 \n" // w -= 4 - "b.eq 4f \n" + "add %1, %1, #4 \n" // src += 4 + "add %2, %2, %6, lsl #2 \n" // dst += 4 * dst_stride + "subs %w3, %w3, #4 \n" // w -= 4 + "b.eq 4f \n" - // some residual, check to see if it includes a 2x8 block, - // or less - "cmp %3, #2 \n" - "b.lt 3f \n" + // some residual, check to see if it includes a 2x8 block, + // or less + "cmp %w3, #2 \n" + "b.lt 3f \n" - // 2x8 block - "2: \n" - "mov %0, %1 \n" - MEMACCESS(0) - "ld1 {v0.h}[0], [%0], %5 \n" - MEMACCESS(0) - "ld1 {v1.h}[0], [%0], %5 \n" - MEMACCESS(0) - "ld1 {v0.h}[1], [%0], %5 \n" - MEMACCESS(0) - "ld1 {v1.h}[1], [%0], %5 \n" - MEMACCESS(0) - "ld1 {v0.h}[2], [%0], %5 \n" - MEMACCESS(0) - "ld1 {v1.h}[2], [%0], %5 \n" - MEMACCESS(0) - "ld1 {v0.h}[3], [%0], %5 \n" - MEMACCESS(0) - "ld1 {v1.h}[3], [%0] \n" + // 2x8 block + "2: \n" + "mov %0, %1 \n" + "ld1 {v0.h}[0], [%0], %5 \n" + "ld1 {v1.h}[0], [%0], %5 \n" + "ld1 {v0.h}[1], [%0], %5 \n" + "ld1 {v1.h}[1], [%0], %5 \n" + "ld1 {v0.h}[2], [%0], %5 \n" + "ld1 {v1.h}[2], [%0], %5 \n" + "ld1 {v0.h}[3], [%0], %5 \n" + "ld1 {v1.h}[3], [%0] \n" - "trn2 v2.8b, v0.8b, v1.8b \n" - "trn1 v3.8b, v0.8b, v1.8b \n" + "trn2 v2.8b, v0.8b, v1.8b \n" + "trn1 v3.8b, v0.8b, v1.8b \n" - "mov %0, %2 \n" + "mov %0, %2 \n" - MEMACCESS(0) - "st1 {v3.8b}, [%0], %6 \n" - MEMACCESS(0) - "st1 {v2.8b}, [%0] \n" + "st1 {v3.8b}, [%0], %6 \n" + "st1 {v2.8b}, [%0] \n" - "add %1, %1, #2 \n" // src += 2 - "add %2, %2, %6, lsl #1 \n" // dst += 2 * dst_stride - "subs %3, %3, #2 \n" // w -= 2 - "b.eq 4f \n" + "add %1, %1, #2 \n" // src += 2 + "add %2, %2, %6, lsl #1 \n" // dst += 2 * dst_stride + "subs %w3, %w3, #2 \n" // w -= 2 + "b.eq 4f \n" - // 1x8 block - "3: \n" - MEMACCESS(1) - "ld1 {v0.b}[0], [%1], %5 \n" - MEMACCESS(1) - "ld1 {v0.b}[1], [%1], %5 \n" - MEMACCESS(1) - "ld1 {v0.b}[2], [%1], %5 \n" - MEMACCESS(1) - "ld1 {v0.b}[3], [%1], %5 \n" - MEMACCESS(1) - "ld1 {v0.b}[4], [%1], %5 \n" - MEMACCESS(1) - "ld1 {v0.b}[5], [%1], %5 \n" - MEMACCESS(1) - "ld1 {v0.b}[6], [%1], %5 \n" - MEMACCESS(1) - "ld1 {v0.b}[7], [%1] \n" + // 1x8 block + "3: \n" + "ld1 {v0.b}[0], [%1], %5 \n" + "ld1 {v0.b}[1], [%1], %5 \n" + "ld1 {v0.b}[2], [%1], %5 \n" + "ld1 {v0.b}[3], [%1], %5 \n" + "ld1 {v0.b}[4], [%1], %5 \n" + "ld1 {v0.b}[5], [%1], %5 \n" + "ld1 {v0.b}[6], [%1], %5 \n" + "ld1 {v0.b}[7], [%1] \n" - MEMACCESS(2) - "st1 {v0.8b}, [%2] \n" + "st1 {v0.8b}, [%2] \n" - "4: \n" + "4: \n" - : "=&r"(src_temp), // %0 - "+r"(src), // %1 - "+r"(dst), // %2 - "+r"(width64) // %3 - : "r"(&kVTbl4x4Transpose), // %4 - "r"(static_cast(src_stride)), // %5 - "r"(static_cast(dst_stride)) // %6 - : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", - "v17", "v18", "v19", "v20", "v21", "v22", "v23" - ); + : "=&r"(src_temp), // %0 + "+r"(src), // %1 + "+r"(dst), // %2 + "+r"(width) // %3 + : "r"(&kVTbl4x4Transpose), // %4 + "r"(static_cast(src_stride)), // %5 + "r"(static_cast(dst_stride)) // %6 + : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", + "v17", "v18", "v19", "v20", "v21", "v22", "v23"); } -static uint8 kVTbl4x4TransposeDi[32] = - { 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54, - 1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55}; +static const uint8_t kVTbl4x4TransposeDi[32] = { + 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54, + 1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55}; -void TransposeUVWx8_NEON(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, +void TransposeUVWx8_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, int width) { - const uint8* src_temp; - int64 width64 = (int64) width; // Work around clang 3.4 warning. - asm volatile ( - // loops are on blocks of 8. loop will stop when - // counter gets to or below 0. starting the counter - // at w-8 allow for this - "sub %4, %4, #8 \n" + const uint8_t* src_temp; + asm volatile( + // loops are on blocks of 8. loop will stop when + // counter gets to or below 0. starting the counter + // at w-8 allow for this + "sub %w4, %w4, #8 \n" - // handle 8x8 blocks. this should be the majority of the plane - "1: \n" - "mov %0, %1 \n" + // handle 8x8 blocks. this should be the majority of the plane + "1: \n" + "mov %0, %1 \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], %5 \n" - MEMACCESS(0) - "ld1 {v1.16b}, [%0], %5 \n" - MEMACCESS(0) - "ld1 {v2.16b}, [%0], %5 \n" - MEMACCESS(0) - "ld1 {v3.16b}, [%0], %5 \n" - MEMACCESS(0) - "ld1 {v4.16b}, [%0], %5 \n" - MEMACCESS(0) - "ld1 {v5.16b}, [%0], %5 \n" - MEMACCESS(0) - "ld1 {v6.16b}, [%0], %5 \n" - MEMACCESS(0) - "ld1 {v7.16b}, [%0] \n" + "ld1 {v0.16b}, [%0], %5 \n" + "ld1 {v1.16b}, [%0], %5 \n" + "ld1 {v2.16b}, [%0], %5 \n" + "ld1 {v3.16b}, [%0], %5 \n" + "ld1 {v4.16b}, [%0], %5 \n" + "ld1 {v5.16b}, [%0], %5 \n" + "ld1 {v6.16b}, [%0], %5 \n" + "ld1 {v7.16b}, [%0] \n" - "trn1 v16.16b, v0.16b, v1.16b \n" - "trn2 v17.16b, v0.16b, v1.16b \n" - "trn1 v18.16b, v2.16b, v3.16b \n" - "trn2 v19.16b, v2.16b, v3.16b \n" - "trn1 v20.16b, v4.16b, v5.16b \n" - "trn2 v21.16b, v4.16b, v5.16b \n" - "trn1 v22.16b, v6.16b, v7.16b \n" - "trn2 v23.16b, v6.16b, v7.16b \n" + "trn1 v16.16b, v0.16b, v1.16b \n" + "trn2 v17.16b, v0.16b, v1.16b \n" + "trn1 v18.16b, v2.16b, v3.16b \n" + "trn2 v19.16b, v2.16b, v3.16b \n" + "trn1 v20.16b, v4.16b, v5.16b \n" + "trn2 v21.16b, v4.16b, v5.16b \n" + "trn1 v22.16b, v6.16b, v7.16b \n" + "trn2 v23.16b, v6.16b, v7.16b \n" - "trn1 v0.8h, v16.8h, v18.8h \n" - "trn2 v1.8h, v16.8h, v18.8h \n" - "trn1 v2.8h, v20.8h, v22.8h \n" - "trn2 v3.8h, v20.8h, v22.8h \n" - "trn1 v4.8h, v17.8h, v19.8h \n" - "trn2 v5.8h, v17.8h, v19.8h \n" - "trn1 v6.8h, v21.8h, v23.8h \n" - "trn2 v7.8h, v21.8h, v23.8h \n" + "trn1 v0.8h, v16.8h, v18.8h \n" + "trn2 v1.8h, v16.8h, v18.8h \n" + "trn1 v2.8h, v20.8h, v22.8h \n" + "trn2 v3.8h, v20.8h, v22.8h \n" + "trn1 v4.8h, v17.8h, v19.8h \n" + "trn2 v5.8h, v17.8h, v19.8h \n" + "trn1 v6.8h, v21.8h, v23.8h \n" + "trn2 v7.8h, v21.8h, v23.8h \n" - "trn1 v16.4s, v0.4s, v2.4s \n" - "trn2 v17.4s, v0.4s, v2.4s \n" - "trn1 v18.4s, v1.4s, v3.4s \n" - "trn2 v19.4s, v1.4s, v3.4s \n" - "trn1 v20.4s, v4.4s, v6.4s \n" - "trn2 v21.4s, v4.4s, v6.4s \n" - "trn1 v22.4s, v5.4s, v7.4s \n" - "trn2 v23.4s, v5.4s, v7.4s \n" + "trn1 v16.4s, v0.4s, v2.4s \n" + "trn2 v17.4s, v0.4s, v2.4s \n" + "trn1 v18.4s, v1.4s, v3.4s \n" + "trn2 v19.4s, v1.4s, v3.4s \n" + "trn1 v20.4s, v4.4s, v6.4s \n" + "trn2 v21.4s, v4.4s, v6.4s \n" + "trn1 v22.4s, v5.4s, v7.4s \n" + "trn2 v23.4s, v5.4s, v7.4s \n" - "mov %0, %2 \n" + "mov %0, %2 \n" - MEMACCESS(0) - "st1 {v16.d}[0], [%0], %6 \n" - MEMACCESS(0) - "st1 {v18.d}[0], [%0], %6 \n" - MEMACCESS(0) - "st1 {v17.d}[0], [%0], %6 \n" - MEMACCESS(0) - "st1 {v19.d}[0], [%0], %6 \n" - MEMACCESS(0) - "st1 {v16.d}[1], [%0], %6 \n" - MEMACCESS(0) - "st1 {v18.d}[1], [%0], %6 \n" - MEMACCESS(0) - "st1 {v17.d}[1], [%0], %6 \n" - MEMACCESS(0) - "st1 {v19.d}[1], [%0] \n" + "st1 {v16.d}[0], [%0], %6 \n" + "st1 {v18.d}[0], [%0], %6 \n" + "st1 {v17.d}[0], [%0], %6 \n" + "st1 {v19.d}[0], [%0], %6 \n" + "st1 {v16.d}[1], [%0], %6 \n" + "st1 {v18.d}[1], [%0], %6 \n" + "st1 {v17.d}[1], [%0], %6 \n" + "st1 {v19.d}[1], [%0] \n" - "mov %0, %3 \n" + "mov %0, %3 \n" - MEMACCESS(0) - "st1 {v20.d}[0], [%0], %7 \n" - MEMACCESS(0) - "st1 {v22.d}[0], [%0], %7 \n" - MEMACCESS(0) - "st1 {v21.d}[0], [%0], %7 \n" - MEMACCESS(0) - "st1 {v23.d}[0], [%0], %7 \n" - MEMACCESS(0) - "st1 {v20.d}[1], [%0], %7 \n" - MEMACCESS(0) - "st1 {v22.d}[1], [%0], %7 \n" - MEMACCESS(0) - "st1 {v21.d}[1], [%0], %7 \n" - MEMACCESS(0) - "st1 {v23.d}[1], [%0] \n" + "st1 {v20.d}[0], [%0], %7 \n" + "st1 {v22.d}[0], [%0], %7 \n" + "st1 {v21.d}[0], [%0], %7 \n" + "st1 {v23.d}[0], [%0], %7 \n" + "st1 {v20.d}[1], [%0], %7 \n" + "st1 {v22.d}[1], [%0], %7 \n" + "st1 {v21.d}[1], [%0], %7 \n" + "st1 {v23.d}[1], [%0] \n" - "add %1, %1, #16 \n" // src += 8*2 - "add %2, %2, %6, lsl #3 \n" // dst_a += 8 * dst_stride_a - "add %3, %3, %7, lsl #3 \n" // dst_b += 8 * dst_stride_b - "subs %4, %4, #8 \n" // w -= 8 - "b.ge 1b \n" + "add %1, %1, #16 \n" // src += 8*2 + "add %2, %2, %6, lsl #3 \n" // dst_a += 8 * + // dst_stride_a + "add %3, %3, %7, lsl #3 \n" // dst_b += 8 * + // dst_stride_b + "subs %w4, %w4, #8 \n" // w -= 8 + "b.ge 1b \n" - // add 8 back to counter. if the result is 0 there are - // no residuals. - "adds %4, %4, #8 \n" - "b.eq 4f \n" + // add 8 back to counter. if the result is 0 there are + // no residuals. + "adds %w4, %w4, #8 \n" + "b.eq 4f \n" - // some residual, so between 1 and 7 lines left to transpose - "cmp %4, #2 \n" - "b.lt 3f \n" + // some residual, so between 1 and 7 lines left to transpose + "cmp %w4, #2 \n" + "b.lt 3f \n" - "cmp %4, #4 \n" - "b.lt 2f \n" + "cmp %w4, #4 \n" + "b.lt 2f \n" - // TODO(frkoenig): Clean this up - // 4x8 block - "mov %0, %1 \n" - MEMACCESS(0) - "ld1 {v0.8b}, [%0], %5 \n" - MEMACCESS(0) - "ld1 {v1.8b}, [%0], %5 \n" - MEMACCESS(0) - "ld1 {v2.8b}, [%0], %5 \n" - MEMACCESS(0) - "ld1 {v3.8b}, [%0], %5 \n" - MEMACCESS(0) - "ld1 {v4.8b}, [%0], %5 \n" - MEMACCESS(0) - "ld1 {v5.8b}, [%0], %5 \n" - MEMACCESS(0) - "ld1 {v6.8b}, [%0], %5 \n" - MEMACCESS(0) - "ld1 {v7.8b}, [%0] \n" + // TODO(frkoenig): Clean this up + // 4x8 block + "mov %0, %1 \n" + "ld1 {v0.8b}, [%0], %5 \n" + "ld1 {v1.8b}, [%0], %5 \n" + "ld1 {v2.8b}, [%0], %5 \n" + "ld1 {v3.8b}, [%0], %5 \n" + "ld1 {v4.8b}, [%0], %5 \n" + "ld1 {v5.8b}, [%0], %5 \n" + "ld1 {v6.8b}, [%0], %5 \n" + "ld1 {v7.8b}, [%0] \n" - MEMACCESS(8) - "ld1 {v30.16b}, [%8], #16 \n" - "ld1 {v31.16b}, [%8] \n" + "ld1 {v30.16b}, [%8], #16 \n" + "ld1 {v31.16b}, [%8] \n" - "tbl v16.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b \n" - "tbl v17.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v31.16b \n" - "tbl v18.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v30.16b \n" - "tbl v19.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v31.16b \n" + "tbl v16.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b \n" + "tbl v17.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v31.16b \n" + "tbl v18.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v30.16b \n" + "tbl v19.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v31.16b \n" - "mov %0, %2 \n" + "mov %0, %2 \n" - MEMACCESS(0) - "st1 {v16.s}[0], [%0], %6 \n" - MEMACCESS(0) - "st1 {v16.s}[1], [%0], %6 \n" - MEMACCESS(0) - "st1 {v16.s}[2], [%0], %6 \n" - MEMACCESS(0) - "st1 {v16.s}[3], [%0], %6 \n" + "st1 {v16.s}[0], [%0], %6 \n" + "st1 {v16.s}[1], [%0], %6 \n" + "st1 {v16.s}[2], [%0], %6 \n" + "st1 {v16.s}[3], [%0], %6 \n" - "add %0, %2, #4 \n" - MEMACCESS(0) - "st1 {v18.s}[0], [%0], %6 \n" - MEMACCESS(0) - "st1 {v18.s}[1], [%0], %6 \n" - MEMACCESS(0) - "st1 {v18.s}[2], [%0], %6 \n" - MEMACCESS(0) - "st1 {v18.s}[3], [%0] \n" + "add %0, %2, #4 \n" + "st1 {v18.s}[0], [%0], %6 \n" + "st1 {v18.s}[1], [%0], %6 \n" + "st1 {v18.s}[2], [%0], %6 \n" + "st1 {v18.s}[3], [%0] \n" - "mov %0, %3 \n" + "mov %0, %3 \n" - MEMACCESS(0) - "st1 {v17.s}[0], [%0], %7 \n" - MEMACCESS(0) - "st1 {v17.s}[1], [%0], %7 \n" - MEMACCESS(0) - "st1 {v17.s}[2], [%0], %7 \n" - MEMACCESS(0) - "st1 {v17.s}[3], [%0], %7 \n" + "st1 {v17.s}[0], [%0], %7 \n" + "st1 {v17.s}[1], [%0], %7 \n" + "st1 {v17.s}[2], [%0], %7 \n" + "st1 {v17.s}[3], [%0], %7 \n" - "add %0, %3, #4 \n" - MEMACCESS(0) - "st1 {v19.s}[0], [%0], %7 \n" - MEMACCESS(0) - "st1 {v19.s}[1], [%0], %7 \n" - MEMACCESS(0) - "st1 {v19.s}[2], [%0], %7 \n" - MEMACCESS(0) - "st1 {v19.s}[3], [%0] \n" + "add %0, %3, #4 \n" + "st1 {v19.s}[0], [%0], %7 \n" + "st1 {v19.s}[1], [%0], %7 \n" + "st1 {v19.s}[2], [%0], %7 \n" + "st1 {v19.s}[3], [%0] \n" - "add %1, %1, #8 \n" // src += 4 * 2 - "add %2, %2, %6, lsl #2 \n" // dst_a += 4 * dst_stride_a - "add %3, %3, %7, lsl #2 \n" // dst_b += 4 * dst_stride_b - "subs %4, %4, #4 \n" // w -= 4 - "b.eq 4f \n" + "add %1, %1, #8 \n" // src += 4 * 2 + "add %2, %2, %6, lsl #2 \n" // dst_a += 4 * + // dst_stride_a + "add %3, %3, %7, lsl #2 \n" // dst_b += 4 * + // dst_stride_b + "subs %w4, %w4, #4 \n" // w -= 4 + "b.eq 4f \n" - // some residual, check to see if it includes a 2x8 block, - // or less - "cmp %4, #2 \n" - "b.lt 3f \n" + // some residual, check to see if it includes a 2x8 block, + // or less + "cmp %w4, #2 \n" + "b.lt 3f \n" - // 2x8 block - "2: \n" - "mov %0, %1 \n" - MEMACCESS(0) - "ld2 {v0.h, v1.h}[0], [%0], %5 \n" - MEMACCESS(0) - "ld2 {v2.h, v3.h}[0], [%0], %5 \n" - MEMACCESS(0) - "ld2 {v0.h, v1.h}[1], [%0], %5 \n" - MEMACCESS(0) - "ld2 {v2.h, v3.h}[1], [%0], %5 \n" - MEMACCESS(0) - "ld2 {v0.h, v1.h}[2], [%0], %5 \n" - MEMACCESS(0) - "ld2 {v2.h, v3.h}[2], [%0], %5 \n" - MEMACCESS(0) - "ld2 {v0.h, v1.h}[3], [%0], %5 \n" - MEMACCESS(0) - "ld2 {v2.h, v3.h}[3], [%0] \n" + // 2x8 block + "2: \n" + "mov %0, %1 \n" + "ld2 {v0.h, v1.h}[0], [%0], %5 \n" + "ld2 {v2.h, v3.h}[0], [%0], %5 \n" + "ld2 {v0.h, v1.h}[1], [%0], %5 \n" + "ld2 {v2.h, v3.h}[1], [%0], %5 \n" + "ld2 {v0.h, v1.h}[2], [%0], %5 \n" + "ld2 {v2.h, v3.h}[2], [%0], %5 \n" + "ld2 {v0.h, v1.h}[3], [%0], %5 \n" + "ld2 {v2.h, v3.h}[3], [%0] \n" - "trn1 v4.8b, v0.8b, v2.8b \n" - "trn2 v5.8b, v0.8b, v2.8b \n" - "trn1 v6.8b, v1.8b, v3.8b \n" - "trn2 v7.8b, v1.8b, v3.8b \n" + "trn1 v4.8b, v0.8b, v2.8b \n" + "trn2 v5.8b, v0.8b, v2.8b \n" + "trn1 v6.8b, v1.8b, v3.8b \n" + "trn2 v7.8b, v1.8b, v3.8b \n" - "mov %0, %2 \n" + "mov %0, %2 \n" - MEMACCESS(0) - "st1 {v4.d}[0], [%0], %6 \n" - MEMACCESS(0) - "st1 {v6.d}[0], [%0] \n" + "st1 {v4.d}[0], [%0], %6 \n" + "st1 {v6.d}[0], [%0] \n" - "mov %0, %3 \n" + "mov %0, %3 \n" - MEMACCESS(0) - "st1 {v5.d}[0], [%0], %7 \n" - MEMACCESS(0) - "st1 {v7.d}[0], [%0] \n" + "st1 {v5.d}[0], [%0], %7 \n" + "st1 {v7.d}[0], [%0] \n" - "add %1, %1, #4 \n" // src += 2 * 2 - "add %2, %2, %6, lsl #1 \n" // dst_a += 2 * dst_stride_a - "add %3, %3, %7, lsl #1 \n" // dst_b += 2 * dst_stride_b - "subs %4, %4, #2 \n" // w -= 2 - "b.eq 4f \n" + "add %1, %1, #4 \n" // src += 2 * 2 + "add %2, %2, %6, lsl #1 \n" // dst_a += 2 * + // dst_stride_a + "add %3, %3, %7, lsl #1 \n" // dst_b += 2 * + // dst_stride_b + "subs %w4, %w4, #2 \n" // w -= 2 + "b.eq 4f \n" - // 1x8 block - "3: \n" - MEMACCESS(1) - "ld2 {v0.b, v1.b}[0], [%1], %5 \n" - MEMACCESS(1) - "ld2 {v0.b, v1.b}[1], [%1], %5 \n" - MEMACCESS(1) - "ld2 {v0.b, v1.b}[2], [%1], %5 \n" - MEMACCESS(1) - "ld2 {v0.b, v1.b}[3], [%1], %5 \n" - MEMACCESS(1) - "ld2 {v0.b, v1.b}[4], [%1], %5 \n" - MEMACCESS(1) - "ld2 {v0.b, v1.b}[5], [%1], %5 \n" - MEMACCESS(1) - "ld2 {v0.b, v1.b}[6], [%1], %5 \n" - MEMACCESS(1) - "ld2 {v0.b, v1.b}[7], [%1] \n" + // 1x8 block + "3: \n" + "ld2 {v0.b, v1.b}[0], [%1], %5 \n" + "ld2 {v0.b, v1.b}[1], [%1], %5 \n" + "ld2 {v0.b, v1.b}[2], [%1], %5 \n" + "ld2 {v0.b, v1.b}[3], [%1], %5 \n" + "ld2 {v0.b, v1.b}[4], [%1], %5 \n" + "ld2 {v0.b, v1.b}[5], [%1], %5 \n" + "ld2 {v0.b, v1.b}[6], [%1], %5 \n" + "ld2 {v0.b, v1.b}[7], [%1] \n" - MEMACCESS(2) - "st1 {v0.d}[0], [%2] \n" - MEMACCESS(3) - "st1 {v1.d}[0], [%3] \n" + "st1 {v0.d}[0], [%2] \n" + "st1 {v1.d}[0], [%3] \n" - "4: \n" + "4: \n" - : "=&r"(src_temp), // %0 - "+r"(src), // %1 - "+r"(dst_a), // %2 - "+r"(dst_b), // %3 - "+r"(width64) // %4 - : "r"(static_cast(src_stride)), // %5 - "r"(static_cast(dst_stride_a)), // %6 - "r"(static_cast(dst_stride_b)), // %7 - "r"(&kVTbl4x4TransposeDi) // %8 - : "memory", "cc", - "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", - "v30", "v31" - ); + : "=&r"(src_temp), // %0 + "+r"(src), // %1 + "+r"(dst_a), // %2 + "+r"(dst_b), // %3 + "+r"(width) // %4 + : "r"(static_cast(src_stride)), // %5 + "r"(static_cast(dst_stride_a)), // %6 + "r"(static_cast(dst_stride_b)), // %7 + "r"(&kVTbl4x4TransposeDi) // %8 + : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", + "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v30", "v31"); } #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) diff --git a/media/libvpx/libvpx/third_party/libyuv/source/rotate_win.cc b/media/libvpx/libvpx/third_party/libyuv/source/rotate_win.cc index 1300fc0feb84..e887dd525c75 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/rotate_win.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/rotate_win.cc @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "libyuv/row.h" #include "libyuv/rotate_row.h" +#include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { @@ -17,17 +17,19 @@ extern "C" { #endif // This module is for 32 bit Visual C x86 and clangcl -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) -__declspec(naked) -void TransposeWx8_SSSE3(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width) { +__declspec(naked) void TransposeWx8_SSSE3(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width) { __asm { push edi push esi push ebp - mov eax, [esp + 12 + 4] // src - mov edi, [esp + 12 + 8] // src_stride + mov eax, [esp + 12 + 4] // src + mov edi, [esp + 12 + 8] // src_stride mov edx, [esp + 12 + 12] // dst mov esi, [esp + 12 + 16] // dst_stride mov ecx, [esp + 12 + 20] // width @@ -110,18 +112,20 @@ void TransposeWx8_SSSE3(const uint8* src, int src_stride, } } -__declspec(naked) -void TransposeUVWx8_SSE2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int w) { +__declspec(naked) void TransposeUVWx8_SSE2(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int w) { __asm { push ebx push esi push edi push ebp - mov eax, [esp + 16 + 4] // src - mov edi, [esp + 16 + 8] // src_stride + mov eax, [esp + 16 + 4] // src + mov edi, [esp + 16 + 8] // src_stride mov edx, [esp + 16 + 12] // dst_a mov esi, [esp + 16 + 16] // dst_stride_a mov ebx, [esp + 16 + 20] // dst_b @@ -133,9 +137,9 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride, mov ecx, [ecx + 16 + 28] // w align 4 - convertloop: // Read in the data from the source pointer. // First round of bit swap. + convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + edi] lea eax, [eax + 2 * edi] @@ -162,13 +166,13 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride, lea eax, [eax + 2 * edi] movdqu [esp], xmm5 // backup xmm5 neg edi - movdqa xmm5, xmm6 // use xmm5 as temp register. + movdqa xmm5, xmm6 // use xmm5 as temp register. punpcklbw xmm6, xmm7 punpckhbw xmm5, xmm7 movdqa xmm7, xmm5 lea eax, [eax + 8 * edi + 16] neg edi - // Second round of bit swap. + // Second round of bit swap. movdqa xmm5, xmm0 punpcklwd xmm0, xmm2 punpckhwd xmm5, xmm2 @@ -183,12 +187,13 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride, movdqa xmm6, xmm5 movdqu xmm5, [esp] // restore xmm5 movdqu [esp], xmm6 // backup xmm6 - movdqa xmm6, xmm5 // use xmm6 as temp register. + movdqa xmm6, xmm5 // use xmm6 as temp register. punpcklwd xmm5, xmm7 punpckhwd xmm6, xmm7 movdqa xmm7, xmm6 - // Third round of bit swap. - // Write to the destination pointer. + + // Third round of bit swap. + // Write to the destination pointer. movdqa xmm6, xmm0 punpckldq xmm0, xmm4 punpckhdq xmm6, xmm4 @@ -200,7 +205,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride, lea edx, [edx + 2 * esi] movhpd qword ptr [ebx + ebp], xmm4 lea ebx, [ebx + 2 * ebp] - movdqa xmm0, xmm2 // use xmm0 as the temp register. + movdqa xmm0, xmm2 // use xmm0 as the temp register. punpckldq xmm2, xmm6 movlpd qword ptr [edx], xmm2 movhpd qword ptr [ebx], xmm2 @@ -209,7 +214,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride, lea edx, [edx + 2 * esi] movhpd qword ptr [ebx + ebp], xmm0 lea ebx, [ebx + 2 * ebp] - movdqa xmm0, xmm1 // use xmm0 as the temp register. + movdqa xmm0, xmm1 // use xmm0 as the temp register. punpckldq xmm1, xmm5 movlpd qword ptr [edx], xmm1 movhpd qword ptr [ebx], xmm1 @@ -218,7 +223,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride, lea edx, [edx + 2 * esi] movhpd qword ptr [ebx + ebp], xmm0 lea ebx, [ebx + 2 * ebp] - movdqa xmm0, xmm3 // use xmm0 as the temp register. + movdqa xmm0, xmm3 // use xmm0 as the temp register. punpckldq xmm3, xmm7 movlpd qword ptr [edx], xmm3 movhpd qword ptr [ebx], xmm3 diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_any.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_any.cc index 494164fd0233..e91560c44c69 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/row_any.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/row_any.cc @@ -19,30 +19,38 @@ namespace libyuv { extern "C" { #endif +// memset for temp is meant to clear the source buffer (not dest) so that +// SIMD that reads full multiple of 16 bytes will not trigger msan errors. +// memset is not needed for production, as the garbage values are processed but +// not used, although there may be edge cases for subsampling. +// The size of the buffer is based on the largest read, which can be inferred +// by the source type (e.g. ARGB) and the mask (last parameter), or by examining +// the source code for how much the source pointers are advanced. + // Subsampled source needs to be increase by 1 of not even. #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift)) // Any 4 planes to 1 with yuvconstants -#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ - void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \ - const uint8* a_buf, uint8* dst_ptr, \ - const struct YuvConstants* yuvconstants, int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 5]); \ - memset(temp, 0, 64 * 4); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \ - } \ - memcpy(temp, y_buf + n, r); \ - memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 192, a_buf + n, r); \ - ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \ - yuvconstants, MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \ - SS(r, DUVSHIFT) * BPP); \ - } +#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ + void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ + const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \ + const struct YuvConstants* yuvconstants, int width) { \ + SIMD_ALIGNED(uint8_t temp[64 * 5]); \ + memset(temp, 0, 64 * 4); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \ + } \ + memcpy(temp, y_buf + n, r); \ + memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + memcpy(temp + 192, a_buf + n, r); \ + ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \ + yuvconstants, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \ + SS(r, DUVSHIFT) * BPP); \ + } #ifdef HAS_I422ALPHATOARGBROW_SSSE3 ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7) @@ -53,36 +61,57 @@ ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15) #ifdef HAS_I422ALPHATOARGBROW_NEON ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7) #endif +#ifdef HAS_I422ALPHATOARGBROW_MSA +ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7) +#endif #undef ANY41C // Any 3 planes to 1. -#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ - void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \ - uint8* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 4]); \ - memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \ - } \ - memcpy(temp, y_buf + n, r); \ - memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ - SS(r, DUVSHIFT) * BPP); \ - } +#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ + void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ + const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \ + SIMD_ALIGNED(uint8_t temp[64 * 4]); \ + memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \ + } \ + memcpy(temp, y_buf + n, r); \ + memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ + SS(r, DUVSHIFT) * BPP); \ + } + +// Merge functions. +#ifdef HAS_MERGERGBROW_SSSE3 +ANY31(MergeRGBRow_Any_SSSE3, MergeRGBRow_SSSE3, 0, 0, 3, 15) +#endif +#ifdef HAS_MERGERGBROW_NEON +ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15) +#endif #ifdef HAS_I422TOYUY2ROW_SSE2 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) #endif +#ifdef HAS_I422TOYUY2ROW_AVX2 +ANY31(I422ToYUY2Row_Any_AVX2, I422ToYUY2Row_AVX2, 1, 1, 4, 31) +ANY31(I422ToUYVYRow_Any_AVX2, I422ToUYVYRow_AVX2, 1, 1, 4, 31) +#endif #ifdef HAS_I422TOYUY2ROW_NEON ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) #endif +#ifdef HAS_I422TOYUY2ROW_MSA +ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31) +#endif #ifdef HAS_I422TOUYVYROW_NEON ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) #endif +#ifdef HAS_I422TOUYVYROW_MSA +ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31) +#endif #ifdef HAS_BLENDPLANEROW_AVX2 ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31) #endif @@ -94,35 +123,38 @@ ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7) // Note that odd width replication includes 444 due to implementation // on arm that subsamples 444 to 422 internally. // Any 3 planes to 1 with yuvconstants -#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ - void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \ - uint8* dst_ptr, const struct YuvConstants* yuvconstants, \ - int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 4]); \ - memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ - } \ - memcpy(temp, y_buf + n, r); \ - memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ - if (width & 1) { \ - temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1]; \ - temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \ - } \ - ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, \ - yuvconstants, MASK + 1); \ - memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ - SS(r, DUVSHIFT) * BPP); \ - } +#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ + void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ + const uint8_t* v_buf, uint8_t* dst_ptr, \ + const struct YuvConstants* yuvconstants, int width) { \ + SIMD_ALIGNED(uint8_t temp[128 * 4]); \ + memset(temp, 0, 128 * 3); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ + } \ + memcpy(temp, y_buf + n, r); \ + memcpy(temp + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + memcpy(temp + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ + if (width & 1) { \ + temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \ + temp[256 + SS(r, UVSHIFT)] = temp[256 + SS(r, UVSHIFT) - 1]; \ + } \ + ANY_SIMD(temp, temp + 128, temp + 256, temp + 384, yuvconstants, \ + MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 384, \ + SS(r, DUVSHIFT) * BPP); \ + } #ifdef HAS_I422TOARGBROW_SSSE3 ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) #endif -#ifdef HAS_I411TOARGBROW_SSSE3 -ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7) +#ifdef HAS_I422TOAR30ROW_SSSE3 +ANY31C(I422ToAR30Row_Any_SSSE3, I422ToAR30Row_SSSE3, 1, 0, 4, 7) +#endif +#ifdef HAS_I422TOAR30ROW_AVX2 +ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15) #endif #ifdef HAS_I444TOARGBROW_SSSE3 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) @@ -130,10 +162,10 @@ ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) -ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7) +ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 15) #endif // HAS_I444TOARGBROW_SSSE3 #ifdef HAS_I422TORGB24ROW_AVX2 -ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15) +ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31) #endif #ifdef HAS_I422TOARGBROW_AVX2 ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) @@ -144,47 +176,87 @@ ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) #ifdef HAS_I444TOARGBROW_AVX2 ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) #endif -#ifdef HAS_I411TOARGBROW_AVX2 -ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15) -#endif #ifdef HAS_I422TOARGB4444ROW_AVX2 -ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7) +ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15) #endif #ifdef HAS_I422TOARGB1555ROW_AVX2 -ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7) +ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15) #endif #ifdef HAS_I422TORGB565ROW_AVX2 -ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7) +ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15) #endif #ifdef HAS_I422TOARGBROW_NEON ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) -ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7) ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) #endif +#ifdef HAS_I422TOARGBROW_MSA +ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7) +ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7) +ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7) +ANY31C(I422ToRGB24Row_Any_MSA, I422ToRGB24Row_MSA, 1, 0, 3, 15) +ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7) +ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7) +ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7) +#endif #undef ANY31C +// Any 3 planes of 16 bit to 1 with yuvconstants +// TODO(fbarchard): consider sharing this code with ANY31C +#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ + void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \ + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \ + int width) { \ + SIMD_ALIGNED(T temp[16 * 3]); \ + SIMD_ALIGNED(uint8_t out[64]); \ + memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ + } \ + memcpy(temp, y_buf + n, r * SBPP); \ + memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ + memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ + ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \ + } + +#ifdef HAS_I210TOAR30ROW_SSSE3 +ANY31CT(I210ToAR30Row_Any_SSSE3, I210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7) +#endif +#ifdef HAS_I210TOARGBROW_SSSE3 +ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7) +#endif +#ifdef HAS_I210TOARGBROW_AVX2 +ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15) +#endif +#ifdef HAS_I210TOAR30ROW_AVX2 +ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15) +#endif +#undef ANY31CT + // Any 2 planes to 1. -#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ - void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \ - uint8* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 3]); \ - memset(temp, 0, 64 * 2); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \ - } \ - memcpy(temp, y_buf + n * SBPP, r * SBPP); \ - memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ - SS(r, UVSHIFT) * SBPP2); \ - ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ - } +#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ + void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ + int width) { \ + SIMD_ALIGNED(uint8_t temp[64 * 3]); \ + memset(temp, 0, 64 * 2); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \ + } \ + memcpy(temp, y_buf + n * SBPP, r * SBPP); \ + memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ + SS(r, UVSHIFT) * SBPP2); \ + ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ + } // Merge functions. #ifdef HAS_MERGEUVROW_SSE2 @@ -196,6 +268,9 @@ ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31) #ifdef HAS_MERGEUVROW_NEON ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15) #endif +#ifdef HAS_MERGEUVROW_MSA +ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15) +#endif // Math functions. #ifdef HAS_ARGBMULTIPLYROW_SSE2 @@ -225,44 +300,61 @@ ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7) #ifdef HAS_ARGBSUBTRACTROW_NEON ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7) #endif +#ifdef HAS_ARGBMULTIPLYROW_MSA +ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3) +#endif +#ifdef HAS_ARGBADDROW_MSA +ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7) +#endif +#ifdef HAS_ARGBSUBTRACTROW_MSA +ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7) +#endif #ifdef HAS_SOBELROW_SSE2 ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15) #endif #ifdef HAS_SOBELROW_NEON ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7) #endif +#ifdef HAS_SOBELROW_MSA +ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15) +#endif #ifdef HAS_SOBELTOPLANEROW_SSE2 ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15) #endif #ifdef HAS_SOBELTOPLANEROW_NEON ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15) #endif +#ifdef HAS_SOBELTOPLANEROW_MSA +ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31) +#endif #ifdef HAS_SOBELXYROW_SSE2 ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15) #endif #ifdef HAS_SOBELXYROW_NEON ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7) #endif +#ifdef HAS_SOBELXYROW_MSA +ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15) +#endif #undef ANY21 // Any 2 planes to 1 with yuvconstants -#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ - void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \ - uint8* dst_ptr, const struct YuvConstants* yuvconstants, \ - int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 3]); \ - memset(temp, 0, 64 * 2); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ - } \ - memcpy(temp, y_buf + n * SBPP, r * SBPP); \ - memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ - SS(r, UVSHIFT) * SBPP2); \ - ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ - } +#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ + void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ + const struct YuvConstants* yuvconstants, int width) { \ + SIMD_ALIGNED(uint8_t temp[128 * 3]); \ + memset(temp, 0, 128 * 2); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ + } \ + memcpy(temp, y_buf + n * SBPP, r * SBPP); \ + memcpy(temp + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \ + SS(r, UVSHIFT) * SBPP2); \ + ANY_SIMD(temp, temp + 128, temp + 256, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 256, r * BPP); \ + } // Biplanar to RGB. #ifdef HAS_NV12TOARGBROW_SSSE3 @@ -274,6 +366,9 @@ ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) #ifdef HAS_NV12TOARGBROW_NEON ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) #endif +#ifdef HAS_NV12TOARGBROW_MSA +ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7) +#endif #ifdef HAS_NV21TOARGBROW_SSSE3 ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7) #endif @@ -283,6 +378,27 @@ ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15) #ifdef HAS_NV21TOARGBROW_NEON ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7) #endif +#ifdef HAS_NV21TOARGBROW_MSA +ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7) +#endif +#ifdef HAS_NV12TORGB24ROW_NEON +ANY21C(NV12ToRGB24Row_Any_NEON, NV12ToRGB24Row_NEON, 1, 1, 2, 3, 7) +#endif +#ifdef HAS_NV21TORGB24ROW_NEON +ANY21C(NV21ToRGB24Row_Any_NEON, NV21ToRGB24Row_NEON, 1, 1, 2, 3, 7) +#endif +#ifdef HAS_NV12TORGB24ROW_SSSE3 +ANY21C(NV12ToRGB24Row_Any_SSSE3, NV12ToRGB24Row_SSSE3, 1, 1, 2, 3, 15) +#endif +#ifdef HAS_NV21TORGB24ROW_SSSE3 +ANY21C(NV21ToRGB24Row_Any_SSSE3, NV21ToRGB24Row_SSSE3, 1, 1, 2, 3, 15) +#endif +#ifdef HAS_NV12TORGB24ROW_AVX2 +ANY21C(NV12ToRGB24Row_Any_AVX2, NV12ToRGB24Row_AVX2, 1, 1, 2, 3, 31) +#endif +#ifdef HAS_NV21TORGB24ROW_AVX2 +ANY21C(NV21ToRGB24Row_Any_AVX2, NV21ToRGB24Row_AVX2, 1, 1, 2, 3, 31) +#endif #ifdef HAS_NV12TORGB565ROW_SSSE3 ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) #endif @@ -292,22 +408,25 @@ ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) #ifdef HAS_NV12TORGB565ROW_NEON ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) #endif +#ifdef HAS_NV12TORGB565ROW_MSA +ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7) +#endif #undef ANY21C // Any 1 to 1. -#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8 temp[128 * 2]); \ - memset(temp, 0, 128); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_ptr, n); \ - } \ - memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ - ANY_SIMD(temp, temp + 128, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ - } +#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ + SIMD_ALIGNED(uint8_t temp[128 * 2]); \ + memset(temp, 0, 128); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_ptr, n); \ + } \ + memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ + ANY_SIMD(temp, temp + 128, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ + } #ifdef HAS_COPYROW_AVX ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63) @@ -325,6 +444,15 @@ ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3) ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3) ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3) #endif +#if defined(HAS_ARGBTORGB24ROW_AVX2) +ANY11(ARGBToRGB24Row_Any_AVX2, ARGBToRGB24Row_AVX2, 0, 4, 3, 31) +#endif +#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI) +ANY11(ARGBToRGB24Row_Any_AVX512VBMI, ARGBToRGB24Row_AVX512VBMI, 0, 4, 3, 31) +#endif +#if defined(HAS_ARGBTORAWROW_AVX2) +ANY11(ARGBToRAWRow_Any_AVX2, ARGBToRAWRow_AVX2, 0, 4, 3, 31) +#endif #if defined(HAS_ARGBTORGB565ROW_AVX2) ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) #endif @@ -332,6 +460,18 @@ ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7) ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7) #endif +#if defined(HAS_ABGRTOAR30ROW_SSSE3) +ANY11(ABGRToAR30Row_Any_SSSE3, ABGRToAR30Row_SSSE3, 0, 4, 4, 3) +#endif +#if defined(HAS_ARGBTOAR30ROW_SSSE3) +ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3) +#endif +#if defined(HAS_ABGRTOAR30ROW_AVX2) +ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7) +#endif +#if defined(HAS_ARGBTOAR30ROW_AVX2) +ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7) +#endif #if defined(HAS_J400TOARGBROW_SSE2) ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7) #endif @@ -372,9 +512,21 @@ ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7) ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7) ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7) #endif +#if defined(HAS_ARGBTORGB24ROW_MSA) +ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15) +ANY11(ARGBToRAWRow_Any_MSA, ARGBToRAWRow_MSA, 0, 4, 3, 15) +ANY11(ARGBToRGB565Row_Any_MSA, ARGBToRGB565Row_MSA, 0, 4, 2, 7) +ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7) +ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7) +ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15) +ANY11(I400ToARGBRow_Any_MSA, I400ToARGBRow_MSA, 0, 1, 4, 15) +#endif #if defined(HAS_RAWTORGB24ROW_NEON) ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7) #endif +#if defined(HAS_RAWTORGB24ROW_MSA) +ANY11(RAWToRGB24Row_Any_MSA, RAWToRGB24Row_MSA, 0, 3, 3, 15) +#endif #ifdef HAS_ARGBTOYROW_AVX2 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31) #endif @@ -403,30 +555,57 @@ ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15) #ifdef HAS_ARGBTOYROW_NEON ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7) #endif +#ifdef HAS_ARGBTOYROW_MSA +ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15) +#endif #ifdef HAS_ARGBTOYJROW_NEON ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7) #endif +#ifdef HAS_ARGBTOYJROW_MSA +ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15) +#endif #ifdef HAS_BGRATOYROW_NEON ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7) #endif +#ifdef HAS_BGRATOYROW_MSA +ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15) +#endif #ifdef HAS_ABGRTOYROW_NEON ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7) #endif +#ifdef HAS_ABGRTOYROW_MSA +ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7) +#endif #ifdef HAS_RGBATOYROW_NEON ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7) #endif +#ifdef HAS_RGBATOYROW_MSA +ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15) +#endif #ifdef HAS_RGB24TOYROW_NEON ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7) #endif +#ifdef HAS_RGB24TOYROW_MSA +ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15) +#endif #ifdef HAS_RAWTOYROW_NEON ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7) #endif +#ifdef HAS_RAWTOYROW_MSA +ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15) +#endif #ifdef HAS_RGB565TOYROW_NEON ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7) #endif +#ifdef HAS_RGB565TOYROW_MSA +ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15) +#endif #ifdef HAS_ARGB1555TOYROW_NEON ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7) #endif +#ifdef HAS_ARGB1555TOYROW_MSA +ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15) +#endif #ifdef HAS_ARGB4444TOYROW_NEON ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7) #endif @@ -434,23 +613,44 @@ ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7) ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15) #endif #ifdef HAS_UYVYTOYROW_NEON -ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 0, 2, 1, 15) +ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15) +#endif +#ifdef HAS_YUY2TOYROW_MSA +ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31) +#endif +#ifdef HAS_UYVYTOYROW_MSA +ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31) #endif #ifdef HAS_RGB24TOARGBROW_NEON ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7) #endif +#ifdef HAS_RGB24TOARGBROW_MSA +ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15) +#endif #ifdef HAS_RAWTOARGBROW_NEON ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7) #endif +#ifdef HAS_RAWTOARGBROW_MSA +ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15) +#endif #ifdef HAS_RGB565TOARGBROW_NEON ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7) #endif +#ifdef HAS_RGB565TOARGBROW_MSA +ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15) +#endif #ifdef HAS_ARGB1555TOARGBROW_NEON ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7) #endif +#ifdef HAS_ARGB1555TOARGBROW_MSA +ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15) +#endif #ifdef HAS_ARGB4444TOARGBROW_NEON ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) #endif +#ifdef HAS_ARGB4444TOARGBROW_MSA +ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15) +#endif #ifdef HAS_ARGBATTENUATEROW_SSSE3 ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3) #endif @@ -466,29 +666,38 @@ ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7) #ifdef HAS_ARGBATTENUATEROW_NEON ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) #endif +#ifdef HAS_ARGBATTENUATEROW_MSA +ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7) +#endif #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7) #endif +#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 +ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 31) +#endif #ifdef HAS_ARGBEXTRACTALPHAROW_NEON ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15) #endif +#ifdef HAS_ARGBEXTRACTALPHAROW_MSA +ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15) +#endif #undef ANY11 // Any 1 to 1 blended. Destination is read, modify, write. -#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8 temp[128 * 2]); \ - memset(temp, 0, 128 * 2); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_ptr, n); \ - } \ - memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ - memcpy(temp + 128, dst_ptr + n * BPP, r * BPP); \ - ANY_SIMD(temp, temp + 128, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ - } +#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ + SIMD_ALIGNED(uint8_t temp[64 * 2]); \ + memset(temp, 0, 64 * 2); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_ptr, n); \ + } \ + memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ + memcpy(temp + 64, dst_ptr + n * BPP, r * BPP); \ + ANY_SIMD(temp, temp + 64, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ + } #ifdef HAS_ARGBCOPYALPHAROW_AVX2 ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15) @@ -506,61 +715,184 @@ ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7) // Any 1 to 1 with parameter. #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \ - T shuffler, int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 2]); \ - memset(temp, 0, 64); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_ptr, shuffler, n); \ - } \ - memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ - ANY_SIMD(temp, temp + 64, shuffler, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ - } + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \ + SIMD_ALIGNED(uint8_t temp[64 * 2]); \ + memset(temp, 0, 64); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_ptr, param, n); \ + } \ + memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ + ANY_SIMD(temp, temp + 64, param, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ + } #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) -ANY11P(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2, - const uint32, 4, 2, 3) +ANY11P(ARGBToRGB565DitherRow_Any_SSE2, + ARGBToRGB565DitherRow_SSE2, + const uint32_t, + 4, + 2, + 3) #endif #if defined(HAS_ARGBTORGB565DITHERROW_AVX2) -ANY11P(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2, - const uint32, 4, 2, 7) +ANY11P(ARGBToRGB565DitherRow_Any_AVX2, + ARGBToRGB565DitherRow_AVX2, + const uint32_t, + 4, + 2, + 7) #endif #if defined(HAS_ARGBTORGB565DITHERROW_NEON) -ANY11P(ARGBToRGB565DitherRow_Any_NEON, ARGBToRGB565DitherRow_NEON, - const uint32, 4, 2, 7) +ANY11P(ARGBToRGB565DitherRow_Any_NEON, + ARGBToRGB565DitherRow_NEON, + const uint32_t, + 4, + 2, + 7) #endif -#ifdef HAS_ARGBSHUFFLEROW_SSE2 -ANY11P(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2, const uint8*, 4, 4, 3) +#if defined(HAS_ARGBTORGB565DITHERROW_MSA) +ANY11P(ARGBToRGB565DitherRow_Any_MSA, + ARGBToRGB565DitherRow_MSA, + const uint32_t, + 4, + 2, + 7) #endif #ifdef HAS_ARGBSHUFFLEROW_SSSE3 -ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8*, 4, 4, 7) +ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7) #endif #ifdef HAS_ARGBSHUFFLEROW_AVX2 -ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8*, 4, 4, 15) +ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15) #endif #ifdef HAS_ARGBSHUFFLEROW_NEON -ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8*, 4, 4, 3) +ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3) +#endif +#ifdef HAS_ARGBSHUFFLEROW_MSA +ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7) #endif #undef ANY11P +// Any 1 to 1 with parameter and shorts. BPP measures in shorts. +#define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \ + void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \ + SIMD_ALIGNED(STYPE temp[32]); \ + SIMD_ALIGNED(DTYPE out[32]); \ + memset(temp, 0, 32 * SBPP); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_ptr, scale, n); \ + } \ + memcpy(temp, src_ptr + n, r * SBPP); \ + ANY_SIMD(temp, out, scale, MASK + 1); \ + memcpy(dst_ptr + n, out, r * BPP); \ + } + +#ifdef HAS_CONVERT16TO8ROW_SSSE3 +ANY11C(Convert16To8Row_Any_SSSE3, + Convert16To8Row_SSSE3, + 2, + 1, + uint16_t, + uint8_t, + 15) +#endif +#ifdef HAS_CONVERT16TO8ROW_AVX2 +ANY11C(Convert16To8Row_Any_AVX2, + Convert16To8Row_AVX2, + 2, + 1, + uint16_t, + uint8_t, + 31) +#endif +#ifdef HAS_CONVERT8TO16ROW_SSE2 +ANY11C(Convert8To16Row_Any_SSE2, + Convert8To16Row_SSE2, + 1, + 2, + uint8_t, + uint16_t, + 15) +#endif +#ifdef HAS_CONVERT8TO16ROW_AVX2 +ANY11C(Convert8To16Row_Any_AVX2, + Convert8To16Row_AVX2, + 1, + 2, + uint8_t, + uint16_t, + 31) +#endif +#undef ANY11C + +// Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts. +#define ANY11P16(NAMEANY, ANY_SIMD, ST, T, SBPP, BPP, MASK) \ + void NAMEANY(const ST* src_ptr, T* dst_ptr, float param, int width) { \ + SIMD_ALIGNED(ST temp[32]); \ + SIMD_ALIGNED(T out[32]); \ + memset(temp, 0, SBPP * 32); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_ptr, param, n); \ + } \ + memcpy(temp, src_ptr + n, r * SBPP); \ + ANY_SIMD(temp, out, param, MASK + 1); \ + memcpy(dst_ptr + n, out, r * BPP); \ + } + +#ifdef HAS_HALFFLOATROW_SSE2 +ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, uint16_t, uint16_t, 2, 2, 7) +#endif +#ifdef HAS_HALFFLOATROW_AVX2 +ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, uint16_t, uint16_t, 2, 2, 15) +#endif +#ifdef HAS_HALFFLOATROW_F16C +ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, uint16_t, uint16_t, 2, 2, 15) +ANY11P16(HalfFloat1Row_Any_F16C, + HalfFloat1Row_F16C, + uint16_t, + uint16_t, + 2, + 2, + 15) +#endif +#ifdef HAS_HALFFLOATROW_NEON +ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, uint16_t, uint16_t, 2, 2, 7) +ANY11P16(HalfFloat1Row_Any_NEON, + HalfFloat1Row_NEON, + uint16_t, + uint16_t, + 2, + 2, + 7) +#endif +#ifdef HAS_HALFFLOATROW_MSA +ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, uint16_t, uint16_t, 2, 2, 31) +#endif +#ifdef HAS_BYTETOFLOATROW_NEON +ANY11P16(ByteToFloatRow_Any_NEON, ByteToFloatRow_NEON, uint8_t, float, 1, 3, 7) +#endif +#undef ANY11P16 + // Any 1 to 1 with yuvconstants -#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \ - const struct YuvConstants* yuvconstants, int width) { \ - SIMD_ALIGNED(uint8 temp[128 * 2]); \ - memset(temp, 0, 128); /* for YUY2 and msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \ - } \ - memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ - ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ - } +#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \ + const struct YuvConstants* yuvconstants, int width) { \ + SIMD_ALIGNED(uint8_t temp[128 * 2]); \ + memset(temp, 0, 128); /* for YUY2 and msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \ + } \ + memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ + ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ + } #if defined(HAS_YUY2TOARGBROW_SSSE3) ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) @@ -573,25 +905,28 @@ ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) #endif +#if defined(HAS_YUY2TOARGBROW_MSA) +ANY11C(YUY2ToARGBRow_Any_MSA, YUY2ToARGBRow_MSA, 1, 4, 4, 7) +ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7) +#endif #undef ANY11C // Any 1 to 1 interpolate. Takes 2 rows of source via stride. -#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ - void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \ - ptrdiff_t src_stride_ptr, int width, \ - int source_y_fraction) { \ - SIMD_ALIGNED(uint8 temp[64 * 3]); \ - memset(temp, 0, 64 * 2); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \ - } \ - memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ - memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \ - ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ - } +#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ + void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, \ + ptrdiff_t src_stride_ptr, int width, int source_y_fraction) { \ + SIMD_ALIGNED(uint8_t temp[64 * 3]); \ + memset(temp, 0, 64 * 2); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \ + } \ + memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ + memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \ + ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \ + memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ + } #ifdef HAS_INTERPOLATEROW_AVX2 ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31) @@ -602,25 +937,25 @@ ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15) #ifdef HAS_INTERPOLATEROW_NEON ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15) #endif -#ifdef HAS_INTERPOLATEROW_DSPR2 -ANY11T(InterpolateRow_Any_DSPR2, InterpolateRow_DSPR2, 1, 1, 3) +#ifdef HAS_INTERPOLATEROW_MSA +ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31) #endif #undef ANY11T // Any 1 to 1 mirror. -#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8 temp[64 * 2]); \ - memset(temp, 0, 64); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \ - } \ - memcpy(temp, src_ptr, r * BPP); \ - ANY_SIMD(temp, temp + 64, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \ - } +#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ + SIMD_ALIGNED(uint8_t temp[64 * 2]); \ + memset(temp, 0, 64); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \ + } \ + memcpy(temp, src_ptr, r* BPP); \ + ANY_SIMD(temp, temp + 64, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \ + } #ifdef HAS_MIRRORROW_AVX2 ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31) @@ -631,6 +966,9 @@ ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15) #ifdef HAS_MIRRORROW_NEON ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15) #endif +#ifdef HAS_MIRRORROW_MSA +ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63) +#endif #ifdef HAS_ARGBMIRRORROW_AVX2 ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7) #endif @@ -640,67 +978,54 @@ ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3) #ifdef HAS_ARGBMIRRORROW_NEON ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3) #endif +#ifdef HAS_ARGBMIRRORROW_MSA +ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15) +#endif #undef ANY11M // Any 1 plane. (memset) -#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \ - void NAMEANY(uint8* dst_ptr, T v32, int width) { \ - SIMD_ALIGNED(uint8 temp[64]); \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(dst_ptr, v32, n); \ - } \ - ANY_SIMD(temp, v32, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp, r * BPP); \ - } +#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \ + void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \ + SIMD_ALIGNED(uint8_t temp[64]); \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(dst_ptr, v32, n); \ + } \ + ANY_SIMD(temp, v32, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp, r * BPP); \ + } #ifdef HAS_SETROW_X86 -ANY1(SetRow_Any_X86, SetRow_X86, uint8, 1, 3) +ANY1(SetRow_Any_X86, SetRow_X86, uint8_t, 1, 3) #endif #ifdef HAS_SETROW_NEON -ANY1(SetRow_Any_NEON, SetRow_NEON, uint8, 1, 15) +ANY1(SetRow_Any_NEON, SetRow_NEON, uint8_t, 1, 15) #endif #ifdef HAS_ARGBSETROW_NEON -ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3) +ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32_t, 4, 3) +#endif +#ifdef HAS_ARGBSETROW_MSA +ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3) #endif #undef ANY1 // Any 1 to 2. Outputs UV planes. -#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \ - void NAMEANY(const uint8* src_ptr, uint8* dst_u, uint8* dst_v, int width) {\ - SIMD_ALIGNED(uint8 temp[128 * 3]); \ - memset(temp, 0, 128); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_u, dst_v, n); \ - } \ - memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ - /* repeat last 4 bytes for 422 subsampler */ \ - if ((width & 1) && BPP == 4 && DUVSHIFT == 1) { \ - memcpy(temp + SS(r, UVSHIFT) * BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \ - } \ - /* repeat last 4 - 12 bytes for 411 subsampler */ \ - if (((width & 3) == 1) && BPP == 4 && DUVSHIFT == 2) { \ - memcpy(temp + SS(r, UVSHIFT) * BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \ - memcpy(temp + SS(r, UVSHIFT) * BPP + BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP, BPP * 2); \ - } \ - if (((width & 3) == 2) && BPP == 4 && DUVSHIFT == 2) { \ - memcpy(temp + SS(r, UVSHIFT) * BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP * 2, BPP * 2); \ - } \ - if (((width & 3) == 3) && BPP == 4 && DUVSHIFT == 2) { \ - memcpy(temp + SS(r, UVSHIFT) * BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \ - } \ - ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \ - memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \ - memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \ - } +#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \ + int width) { \ + SIMD_ALIGNED(uint8_t temp[128 * 3]); \ + memset(temp, 0, 128); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_u, dst_v, n); \ + } \ + memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ + ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \ + memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \ + memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \ + } #ifdef HAS_SPLITUVROW_SSE2 ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15) @@ -711,8 +1036,8 @@ ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31) #ifdef HAS_SPLITUVROW_NEON ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15) #endif -#ifdef HAS_SPLITUVROW_DSPR2 -ANY12(SplitUVRow_Any_DSPR2, SplitUVRow_DSPR2, 0, 2, 0, 15) +#ifdef HAS_SPLITUVROW_MSA +ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31) #endif #ifdef HAS_ARGBTOUV444ROW_SSSE3 ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15) @@ -727,37 +1052,66 @@ ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15) #endif #ifdef HAS_YUY2TOUV422ROW_NEON ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7) -ANY12(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 0, 4, 2, 31) ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15) ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) #endif +#ifdef HAS_YUY2TOUV422ROW_MSA +ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15) +ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31) +ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31) +#endif #undef ANY12 +// Any 1 to 3. Outputs RGB planes. +#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \ + uint8_t* dst_b, int width) { \ + SIMD_ALIGNED(uint8_t temp[16 * 6]); \ + memset(temp, 0, 16 * 3); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \ + } \ + memcpy(temp, src_ptr + n * BPP, r * BPP); \ + ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \ + memcpy(dst_r + n, temp + 16 * 3, r); \ + memcpy(dst_g + n, temp + 16 * 4, r); \ + memcpy(dst_b + n, temp + 16 * 5, r); \ + } + +#ifdef HAS_SPLITRGBROW_SSSE3 +ANY13(SplitRGBRow_Any_SSSE3, SplitRGBRow_SSSE3, 3, 15) +#endif +#ifdef HAS_SPLITRGBROW_NEON +ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15) +#endif + // Any 1 to 2 with source stride (2 rows of source). Outputs UV planes. // 128 byte row allows for 32 avx ARGB pixels. -#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, int src_stride_ptr, \ - uint8* dst_u, uint8* dst_v, int width) { \ - SIMD_ALIGNED(uint8 temp[128 * 4]); \ - memset(temp, 0, 128 * 2); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \ - } \ - memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ - memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \ - SS(r, UVSHIFT) * BPP); \ - if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */\ - memcpy(temp + SS(r, UVSHIFT) * BPP, \ - temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \ - memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \ - temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ - } \ - ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \ - memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \ - memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \ - } +#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, \ + uint8_t* dst_v, int width) { \ + SIMD_ALIGNED(uint8_t temp[128 * 4]); \ + memset(temp, 0, 128 * 2); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \ + } \ + memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ + memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \ + SS(r, UVSHIFT) * BPP); \ + if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \ + memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \ + BPP); \ + memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \ + temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ + } \ + ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \ + memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \ + memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \ + } #ifdef HAS_ARGBTOUVROW_AVX2 ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31) @@ -783,30 +1137,57 @@ ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15) #ifdef HAS_ARGBTOUVROW_NEON ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15) #endif +#ifdef HAS_ARGBTOUVROW_MSA +ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31) +#endif #ifdef HAS_ARGBTOUVJROW_NEON ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15) #endif +#ifdef HAS_ARGBTOUVJROW_MSA +ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31) +#endif #ifdef HAS_BGRATOUVROW_NEON ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15) #endif +#ifdef HAS_BGRATOUVROW_MSA +ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 31) +#endif #ifdef HAS_ABGRTOUVROW_NEON ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15) #endif +#ifdef HAS_ABGRTOUVROW_MSA +ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 31) +#endif #ifdef HAS_RGBATOUVROW_NEON ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15) #endif +#ifdef HAS_RGBATOUVROW_MSA +ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 31) +#endif #ifdef HAS_RGB24TOUVROW_NEON ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15) #endif +#ifdef HAS_RGB24TOUVROW_MSA +ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15) +#endif #ifdef HAS_RAWTOUVROW_NEON ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15) #endif +#ifdef HAS_RAWTOUVROW_MSA +ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15) +#endif #ifdef HAS_RGB565TOUVROW_NEON ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15) #endif +#ifdef HAS_RGB565TOUVROW_MSA +ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15) +#endif #ifdef HAS_ARGB1555TOUVROW_NEON ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15) #endif +#ifdef HAS_ARGB1555TOUVROW_MSA +ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15) +#endif #ifdef HAS_ARGB4444TOUVROW_NEON ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15) #endif @@ -816,6 +1197,12 @@ ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15) #ifdef HAS_UYVYTOUVROW_NEON ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) #endif +#ifdef HAS_YUY2TOUVROW_MSA +ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31) +#endif +#ifdef HAS_UYVYTOUVROW_MSA +ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31) +#endif #undef ANY12S #ifdef __cplusplus diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_common.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_common.cc index aefa38c49541..2bbc5adbf145 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/row_common.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/row_common.cc @@ -10,6 +10,7 @@ #include "libyuv/row.h" +#include #include // For memcpy and memset. #include "libyuv/basic_types.h" @@ -23,59 +24,69 @@ extern "C" { #define USE_BRANCHLESS 1 #if USE_BRANCHLESS -static __inline int32 clamp0(int32 v) { +static __inline int32_t clamp0(int32_t v) { return ((-(v) >> 31) & (v)); } -static __inline int32 clamp255(int32 v) { +static __inline int32_t clamp255(int32_t v) { return (((255 - (v)) >> 31) | (v)) & 255; } -static __inline uint32 Clamp(int32 val) { - int v = clamp0(val); - return (uint32)(clamp255(v)); +static __inline int32_t clamp1023(int32_t v) { + return (((1023 - (v)) >> 31) | (v)) & 1023; } -static __inline uint32 Abs(int32 v) { +static __inline uint32_t Abs(int32_t v) { int m = v >> 31; return (v + m) ^ m; } -#else // USE_BRANCHLESS -static __inline int32 clamp0(int32 v) { +#else // USE_BRANCHLESS +static __inline int32_t clamp0(int32_t v) { return (v < 0) ? 0 : v; } -static __inline int32 clamp255(int32 v) { +static __inline int32_t clamp255(int32_t v) { return (v > 255) ? 255 : v; } -static __inline uint32 Clamp(int32 val) { - int v = clamp0(val); - return (uint32)(clamp255(v)); +static __inline int32_t clamp1023(int32_t v) { + return (v > 1023) ? 1023 : v; } -static __inline uint32 Abs(int32 v) { +static __inline uint32_t Abs(int32_t v) { return (v < 0) ? -v : v; } #endif // USE_BRANCHLESS +static __inline uint32_t Clamp(int32_t val) { + int v = clamp0(val); + return (uint32_t)(clamp255(v)); +} -#ifdef LIBYUV_LITTLE_ENDIAN -#define WRITEWORD(p, v) *(uint32*)(p) = v +static __inline uint32_t Clamp10(int32_t val) { + int v = clamp0(val); + return (uint32_t)(clamp1023(v)); +} + +// Little Endian +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ + defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +#define WRITEWORD(p, v) *(uint32_t*)(p) = v #else -static inline void WRITEWORD(uint8* p, uint32 v) { - p[0] = (uint8)(v & 255); - p[1] = (uint8)((v >> 8) & 255); - p[2] = (uint8)((v >> 16) & 255); - p[3] = (uint8)((v >> 24) & 255); +static inline void WRITEWORD(uint8_t* p, uint32_t v) { + p[0] = (uint8_t)(v & 255); + p[1] = (uint8_t)((v >> 8) & 255); + p[2] = (uint8_t)((v >> 16) & 255); + p[3] = (uint8_t)((v >> 24) & 255); } #endif -void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) { +void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_rgb24[0]; - uint8 g = src_rgb24[1]; - uint8 r = src_rgb24[2]; + uint8_t b = src_rgb24[0]; + uint8_t g = src_rgb24[1]; + uint8_t r = src_rgb24[2]; dst_argb[0] = b; dst_argb[1] = g; dst_argb[2] = r; @@ -85,12 +96,12 @@ void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) { } } -void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) { +void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - uint8 r = src_raw[0]; - uint8 g = src_raw[1]; - uint8 b = src_raw[2]; + uint8_t r = src_raw[0]; + uint8_t g = src_raw[1]; + uint8_t b = src_raw[2]; dst_argb[0] = b; dst_argb[1] = g; dst_argb[2] = r; @@ -100,12 +111,12 @@ void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) { } } -void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width) { +void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { int x; for (x = 0; x < width; ++x) { - uint8 r = src_raw[0]; - uint8 g = src_raw[1]; - uint8 b = src_raw[2]; + uint8_t r = src_raw[0]; + uint8_t g = src_raw[1]; + uint8_t b = src_raw[2]; dst_rgb24[0] = b; dst_rgb24[1] = g; dst_rgb24[2] = r; @@ -114,12 +125,14 @@ void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width) { } } -void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) { +void RGB565ToARGBRow_C(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_rgb565[0] & 0x1f; - uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8 r = src_rgb565[1] >> 3; + uint8_t b = src_rgb565[0] & 0x1f; + uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); + uint8_t r = src_rgb565[1] >> 3; dst_argb[0] = (b << 3) | (b >> 2); dst_argb[1] = (g << 2) | (g >> 4); dst_argb[2] = (r << 3) | (r >> 2); @@ -129,14 +142,15 @@ void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) { } } -void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb, +void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555, + uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_argb1555[0] & 0x1f; - uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8 r = (src_argb1555[1] & 0x7c) >> 2; - uint8 a = src_argb1555[1] >> 7; + uint8_t b = src_argb1555[0] & 0x1f; + uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); + uint8_t r = (src_argb1555[1] & 0x7c) >> 2; + uint8_t a = src_argb1555[1] >> 7; dst_argb[0] = (b << 3) | (b >> 2); dst_argb[1] = (g << 3) | (g >> 2); dst_argb[2] = (r << 3) | (r >> 2); @@ -146,14 +160,15 @@ void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb, } } -void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb, +void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444, + uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_argb4444[0] & 0x0f; - uint8 g = src_argb4444[0] >> 4; - uint8 r = src_argb4444[1] & 0x0f; - uint8 a = src_argb4444[1] >> 4; + uint8_t b = src_argb4444[0] & 0x0f; + uint8_t g = src_argb4444[0] >> 4; + uint8_t r = src_argb4444[1] & 0x0f; + uint8_t a = src_argb4444[1] >> 4; dst_argb[0] = (b << 4) | b; dst_argb[1] = (g << 4) | g; dst_argb[2] = (r << 4) | r; @@ -163,12 +178,53 @@ void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb, } } -void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { +void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_argb[0]; - uint8 g = src_argb[1]; - uint8 r = src_argb[2]; + uint32_t ar30 = *(const uint32_t*)src_ar30; + uint32_t b = (ar30 >> 2) & 0xff; + uint32_t g = (ar30 >> 12) & 0xff; + uint32_t r = (ar30 >> 22) & 0xff; + uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits. + *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24); + dst_argb += 4; + src_ar30 += 4; + } +} + +void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) { + int x; + for (x = 0; x < width; ++x) { + uint32_t ar30 = *(const uint32_t*)src_ar30; + uint32_t b = (ar30 >> 2) & 0xff; + uint32_t g = (ar30 >> 12) & 0xff; + uint32_t r = (ar30 >> 22) & 0xff; + uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits. + *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24); + dst_abgr += 4; + src_ar30 += 4; + } +} + +void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) { + int x; + for (x = 0; x < width; ++x) { + uint32_t ar30 = *(const uint32_t*)src_ar30; + uint32_t b = ar30 & 0x3ff; + uint32_t ga = ar30 & 0xc00ffc00; + uint32_t r = (ar30 >> 20) & 0x3ff; + *(uint32_t*)(dst_ab30) = r | ga | (b << 20); + dst_ab30 += 4; + src_ar30 += 4; + } +} + +void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { + int x; + for (x = 0; x < width; ++x) { + uint8_t b = src_argb[0]; + uint8_t g = src_argb[1]; + uint8_t r = src_argb[2]; dst_rgb[0] = b; dst_rgb[1] = g; dst_rgb[2] = r; @@ -177,12 +233,12 @@ void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { } } -void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) { +void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_argb[0]; - uint8 g = src_argb[1]; - uint8 r = src_argb[2]; + uint8_t b = src_argb[0]; + uint8_t g = src_argb[1]; + uint8_t r = src_argb[2]; dst_rgb[0] = r; dst_rgb[1] = g; dst_rgb[2] = b; @@ -191,25 +247,25 @@ void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) { } } -void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { +void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb[0] >> 3; - uint8 g0 = src_argb[1] >> 2; - uint8 r0 = src_argb[2] >> 3; - uint8 b1 = src_argb[4] >> 3; - uint8 g1 = src_argb[5] >> 2; - uint8 r1 = src_argb[6] >> 3; - WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | - (b1 << 16) | (g1 << 21) | (r1 << 27)); + uint8_t b0 = src_argb[0] >> 3; + uint8_t g0 = src_argb[1] >> 2; + uint8_t r0 = src_argb[2] >> 3; + uint8_t b1 = src_argb[4] >> 3; + uint8_t g1 = src_argb[5] >> 2; + uint8_t r1 = src_argb[6] >> 3; + WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | + (r1 << 27)); dst_rgb += 4; src_argb += 8; } if (width & 1) { - uint8 b0 = src_argb[0] >> 3; - uint8 g0 = src_argb[1] >> 2; - uint8 r0 = src_argb[2] >> 3; - *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); + uint8_t b0 = src_argb[0] >> 3; + uint8_t g0 = src_argb[1] >> 2; + uint8_t r0 = src_argb[2] >> 3; + *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); } } @@ -221,132 +277,160 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { // endian will not affect order of the original matrix. But the dither4 // will containing the first pixel in the lower byte for little endian // or the upper byte for big endian. -void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width) { +void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, + int width) { int x; for (x = 0; x < width - 1; x += 2) { int dither0 = ((const unsigned char*)(&dither4))[x & 3]; int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3]; - uint8 b0 = clamp255(src_argb[0] + dither0) >> 3; - uint8 g0 = clamp255(src_argb[1] + dither0) >> 2; - uint8 r0 = clamp255(src_argb[2] + dither0) >> 3; - uint8 b1 = clamp255(src_argb[4] + dither1) >> 3; - uint8 g1 = clamp255(src_argb[5] + dither1) >> 2; - uint8 r1 = clamp255(src_argb[6] + dither1) >> 3; - WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | - (b1 << 16) | (g1 << 21) | (r1 << 27)); + uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3; + uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2; + uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3; + uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3; + uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2; + uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3; + WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | + (r1 << 27)); dst_rgb += 4; src_argb += 8; } if (width & 1) { int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3]; - uint8 b0 = clamp255(src_argb[0] + dither0) >> 3; - uint8 g0 = clamp255(src_argb[1] + dither0) >> 2; - uint8 r0 = clamp255(src_argb[2] + dither0) >> 3; - *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); + uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3; + uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2; + uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3; + *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); } } -void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { +void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb[0] >> 3; - uint8 g0 = src_argb[1] >> 3; - uint8 r0 = src_argb[2] >> 3; - uint8 a0 = src_argb[3] >> 7; - uint8 b1 = src_argb[4] >> 3; - uint8 g1 = src_argb[5] >> 3; - uint8 r1 = src_argb[6] >> 3; - uint8 a1 = src_argb[7] >> 7; - *(uint32*)(dst_rgb) = - b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) | - (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31); + uint8_t b0 = src_argb[0] >> 3; + uint8_t g0 = src_argb[1] >> 3; + uint8_t r0 = src_argb[2] >> 3; + uint8_t a0 = src_argb[3] >> 7; + uint8_t b1 = src_argb[4] >> 3; + uint8_t g1 = src_argb[5] >> 3; + uint8_t r1 = src_argb[6] >> 3; + uint8_t a1 = src_argb[7] >> 7; + *(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) | + (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31); dst_rgb += 4; src_argb += 8; } if (width & 1) { - uint8 b0 = src_argb[0] >> 3; - uint8 g0 = src_argb[1] >> 3; - uint8 r0 = src_argb[2] >> 3; - uint8 a0 = src_argb[3] >> 7; - *(uint16*)(dst_rgb) = - b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); + uint8_t b0 = src_argb[0] >> 3; + uint8_t g0 = src_argb[1] >> 3; + uint8_t r0 = src_argb[2] >> 3; + uint8_t a0 = src_argb[3] >> 7; + *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); } } -void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { +void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb[0] >> 4; - uint8 g0 = src_argb[1] >> 4; - uint8 r0 = src_argb[2] >> 4; - uint8 a0 = src_argb[3] >> 4; - uint8 b1 = src_argb[4] >> 4; - uint8 g1 = src_argb[5] >> 4; - uint8 r1 = src_argb[6] >> 4; - uint8 a1 = src_argb[7] >> 4; - *(uint32*)(dst_rgb) = - b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | - (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28); + uint8_t b0 = src_argb[0] >> 4; + uint8_t g0 = src_argb[1] >> 4; + uint8_t r0 = src_argb[2] >> 4; + uint8_t a0 = src_argb[3] >> 4; + uint8_t b1 = src_argb[4] >> 4; + uint8_t g1 = src_argb[5] >> 4; + uint8_t r1 = src_argb[6] >> 4; + uint8_t a1 = src_argb[7] >> 4; + *(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | + (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28); dst_rgb += 4; src_argb += 8; } if (width & 1) { - uint8 b0 = src_argb[0] >> 4; - uint8 g0 = src_argb[1] >> 4; - uint8 r0 = src_argb[2] >> 4; - uint8 a0 = src_argb[3] >> 4; - *(uint16*)(dst_rgb) = - b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); + uint8_t b0 = src_argb[0] >> 4; + uint8_t g0 = src_argb[1] >> 4; + uint8_t r0 = src_argb[2] >> 4; + uint8_t a0 = src_argb[3] >> 4; + *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); } } -static __inline int RGBToY(uint8 r, uint8 g, uint8 b) { - return (66 * r + 129 * g + 25 * b + 0x1080) >> 8; +void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) { + int x; + for (x = 0; x < width; ++x) { + uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2); + uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2); + uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2); + uint32_t a0 = (src_abgr[3] >> 6); + *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30); + dst_ar30 += 4; + src_abgr += 4; + } } -static __inline int RGBToU(uint8 r, uint8 g, uint8 b) { +void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) { + int x; + for (x = 0; x < width; ++x) { + uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2); + uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2); + uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2); + uint32_t a0 = (src_argb[3] >> 6); + *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30); + dst_ar30 += 4; + src_argb += 4; + } +} + +static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { + return (66 * r + 129 * g + 25 * b + 0x1080) >> 8; +} + +static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) { return (112 * b - 74 * g - 38 * r + 0x8080) >> 8; } -static __inline int RGBToV(uint8 r, uint8 g, uint8 b) { +static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) { return (112 * r - 94 * g - 18 * b + 0x8080) >> 8; } -#define MAKEROWY(NAME, R, G, B, BPP) \ -void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ - int x; \ - for (x = 0; x < width; ++x) { \ - dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ - src_argb0 += BPP; \ - dst_y += 1; \ - } \ -} \ -void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \ - uint8* dst_u, uint8* dst_v, int width) { \ - const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ - int x; \ - for (x = 0; x < width - 1; x += 2) { \ - uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + \ - src_rgb1[B] + src_rgb1[B + BPP]) >> 2; \ - uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + \ - src_rgb1[G] + src_rgb1[G + BPP]) >> 2; \ - uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + \ - src_rgb1[R] + src_rgb1[R + BPP]) >> 2; \ - dst_u[0] = RGBToU(ar, ag, ab); \ - dst_v[0] = RGBToV(ar, ag, ab); \ - src_rgb0 += BPP * 2; \ - src_rgb1 += BPP * 2; \ - dst_u += 1; \ - dst_v += 1; \ - } \ - if (width & 1) { \ - uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \ - uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \ - uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \ - dst_u[0] = RGBToU(ar, ag, ab); \ - dst_v[0] = RGBToV(ar, ag, ab); \ - } \ -} +// ARGBToY_C and ARGBToUV_C +#define MAKEROWY(NAME, R, G, B, BPP) \ + void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \ + int x; \ + for (x = 0; x < width; ++x) { \ + dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ + src_argb0 += BPP; \ + dst_y += 1; \ + } \ + } \ + void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \ + uint8_t* dst_u, uint8_t* dst_v, int width) { \ + const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \ + int x; \ + for (x = 0; x < width - 1; x += 2) { \ + uint8_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \ + src_rgb1[B + BPP]) >> \ + 2; \ + uint8_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \ + src_rgb1[G + BPP]) >> \ + 2; \ + uint8_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \ + src_rgb1[R + BPP]) >> \ + 2; \ + dst_u[0] = RGBToU(ar, ag, ab); \ + dst_v[0] = RGBToV(ar, ag, ab); \ + src_rgb0 += BPP * 2; \ + src_rgb1 += BPP * 2; \ + dst_u += 1; \ + dst_v += 1; \ + } \ + if (width & 1) { \ + uint8_t ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \ + uint8_t ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \ + uint8_t ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \ + dst_u[0] = RGBToU(ar, ag, ab); \ + dst_v[0] = RGBToV(ar, ag, ab); \ + } \ + } MAKEROWY(ARGB, 2, 1, 0, 4) MAKEROWY(BGRA, 1, 2, 3, 4) @@ -381,64 +465,65 @@ MAKEROWY(RAW, 0, 1, 2, 3) // g -0.41869 * 255 = -106.76595 = -107 // r 0.50000 * 255 = 127.5 = 127 -static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) { - return (38 * r + 75 * g + 15 * b + 64) >> 7; +static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { + return (38 * r + 75 * g + 15 * b + 64) >> 7; } -static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) { +static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) { return (127 * b - 84 * g - 43 * r + 0x8080) >> 8; } -static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) { +static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) { return (127 * r - 107 * g - 20 * b + 0x8080) >> 8; } #define AVGB(a, b) (((a) + (b) + 1) >> 1) -#define MAKEROWYJ(NAME, R, G, B, BPP) \ -void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ - int x; \ - for (x = 0; x < width; ++x) { \ - dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \ - src_argb0 += BPP; \ - dst_y += 1; \ - } \ -} \ -void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \ - uint8* dst_u, uint8* dst_v, int width) { \ - const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ - int x; \ - for (x = 0; x < width - 1; x += 2) { \ - uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \ - AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \ - uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \ - AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \ - uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \ - AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \ - dst_u[0] = RGBToUJ(ar, ag, ab); \ - dst_v[0] = RGBToVJ(ar, ag, ab); \ - src_rgb0 += BPP * 2; \ - src_rgb1 += BPP * 2; \ - dst_u += 1; \ - dst_v += 1; \ - } \ - if (width & 1) { \ - uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \ - uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \ - uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \ - dst_u[0] = RGBToUJ(ar, ag, ab); \ - dst_v[0] = RGBToVJ(ar, ag, ab); \ - } \ -} +// ARGBToYJ_C and ARGBToUVJ_C +#define MAKEROWYJ(NAME, R, G, B, BPP) \ + void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \ + int x; \ + for (x = 0; x < width; ++x) { \ + dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \ + src_argb0 += BPP; \ + dst_y += 1; \ + } \ + } \ + void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \ + uint8_t* dst_u, uint8_t* dst_v, int width) { \ + const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \ + int x; \ + for (x = 0; x < width - 1; x += 2) { \ + uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \ + AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \ + uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \ + AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \ + uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \ + AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \ + dst_u[0] = RGBToUJ(ar, ag, ab); \ + dst_v[0] = RGBToVJ(ar, ag, ab); \ + src_rgb0 += BPP * 2; \ + src_rgb1 += BPP * 2; \ + dst_u += 1; \ + dst_v += 1; \ + } \ + if (width & 1) { \ + uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \ + uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \ + uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \ + dst_u[0] = RGBToUJ(ar, ag, ab); \ + dst_v[0] = RGBToVJ(ar, ag, ab); \ + } \ + } MAKEROWYJ(ARGB, 2, 1, 0, 4) #undef MAKEROWYJ -void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) { +void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_rgb565[0] & 0x1f; - uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8 r = src_rgb565[1] >> 3; + uint8_t b = src_rgb565[0] & 0x1f; + uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); + uint8_t r = src_rgb565[1] >> 3; b = (b << 3) | (b >> 2); g = (g << 2) | (g >> 4); r = (r << 3) | (r >> 2); @@ -448,12 +533,12 @@ void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) { } } -void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) { +void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_argb1555[0] & 0x1f; - uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8 r = (src_argb1555[1] & 0x7c) >> 2; + uint8_t b = src_argb1555[0] & 0x1f; + uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); + uint8_t r = (src_argb1555[1] & 0x7c) >> 2; b = (b << 3) | (b >> 2); g = (g << 3) | (g >> 2); r = (r << 3) | (r >> 2); @@ -463,12 +548,12 @@ void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) { } } -void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) { +void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { - uint8 b = src_argb4444[0] & 0x0f; - uint8 g = src_argb4444[0] >> 4; - uint8 r = src_argb4444[1] & 0x0f; + uint8_t b = src_argb4444[0] & 0x0f; + uint8_t g = src_argb4444[0] >> 4; + uint8_t r = src_argb4444[1] & 0x0f; b = (b << 4) | b; g = (g << 4) | g; r = (r << 4) | r; @@ -478,26 +563,29 @@ void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) { } } -void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565; +void RGB565ToUVRow_C(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565; int x; for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_rgb565[0] & 0x1f; - uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8 r0 = src_rgb565[1] >> 3; - uint8 b1 = src_rgb565[2] & 0x1f; - uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3); - uint8 r1 = src_rgb565[3] >> 3; - uint8 b2 = next_rgb565[0] & 0x1f; - uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); - uint8 r2 = next_rgb565[1] >> 3; - uint8 b3 = next_rgb565[2] & 0x1f; - uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3); - uint8 r3 = next_rgb565[3] >> 3; - uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787. - uint8 g = (g0 + g1 + g2 + g3); - uint8 r = (r0 + r1 + r2 + r3); + uint8_t b0 = src_rgb565[0] & 0x1f; + uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); + uint8_t r0 = src_rgb565[1] >> 3; + uint8_t b1 = src_rgb565[2] & 0x1f; + uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3); + uint8_t r1 = src_rgb565[3] >> 3; + uint8_t b2 = next_rgb565[0] & 0x1f; + uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); + uint8_t r2 = next_rgb565[1] >> 3; + uint8_t b3 = next_rgb565[2] & 0x1f; + uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3); + uint8_t r3 = next_rgb565[3] >> 3; + uint8_t b = (b0 + b1 + b2 + b3); // 565 * 4 = 787. + uint8_t g = (g0 + g1 + g2 + g3); + uint8_t r = (r0 + r1 + r2 + r3); b = (b << 1) | (b >> 6); // 787 -> 888. r = (r << 1) | (r >> 6); dst_u[0] = RGBToU(r, g, b); @@ -508,15 +596,15 @@ void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565, dst_v += 1; } if (width & 1) { - uint8 b0 = src_rgb565[0] & 0x1f; - uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8 r0 = src_rgb565[1] >> 3; - uint8 b2 = next_rgb565[0] & 0x1f; - uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); - uint8 r2 = next_rgb565[1] >> 3; - uint8 b = (b0 + b2); // 565 * 2 = 676. - uint8 g = (g0 + g2); - uint8 r = (r0 + r2); + uint8_t b0 = src_rgb565[0] & 0x1f; + uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); + uint8_t r0 = src_rgb565[1] >> 3; + uint8_t b2 = next_rgb565[0] & 0x1f; + uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); + uint8_t r2 = next_rgb565[1] >> 3; + uint8_t b = (b0 + b2); // 565 * 2 = 676. + uint8_t g = (g0 + g2); + uint8_t r = (r0 + r2); b = (b << 2) | (b >> 4); // 676 -> 888 g = (g << 1) | (g >> 6); r = (r << 2) | (r >> 4); @@ -525,26 +613,29 @@ void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565, } } -void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555; +void ARGB1555ToUVRow_C(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555; int x; for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb1555[0] & 0x1f; - uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8 r0 = (src_argb1555[1] & 0x7c) >> 2; - uint8 b1 = src_argb1555[2] & 0x1f; - uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3); - uint8 r1 = (src_argb1555[3] & 0x7c) >> 2; - uint8 b2 = next_argb1555[0] & 0x1f; - uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); - uint8 r2 = (next_argb1555[1] & 0x7c) >> 2; - uint8 b3 = next_argb1555[2] & 0x1f; - uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3); - uint8 r3 = (next_argb1555[3] & 0x7c) >> 2; - uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777. - uint8 g = (g0 + g1 + g2 + g3); - uint8 r = (r0 + r1 + r2 + r3); + uint8_t b0 = src_argb1555[0] & 0x1f; + uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); + uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2; + uint8_t b1 = src_argb1555[2] & 0x1f; + uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3); + uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2; + uint8_t b2 = next_argb1555[0] & 0x1f; + uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); + uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2; + uint8_t b3 = next_argb1555[2] & 0x1f; + uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3); + uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2; + uint8_t b = (b0 + b1 + b2 + b3); // 555 * 4 = 777. + uint8_t g = (g0 + g1 + g2 + g3); + uint8_t r = (r0 + r1 + r2 + r3); b = (b << 1) | (b >> 6); // 777 -> 888. g = (g << 1) | (g >> 6); r = (r << 1) | (r >> 6); @@ -556,15 +647,15 @@ void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555, dst_v += 1; } if (width & 1) { - uint8 b0 = src_argb1555[0] & 0x1f; - uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8 r0 = (src_argb1555[1] & 0x7c) >> 2; - uint8 b2 = next_argb1555[0] & 0x1f; - uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); - uint8 r2 = next_argb1555[1] >> 3; - uint8 b = (b0 + b2); // 555 * 2 = 666. - uint8 g = (g0 + g2); - uint8 r = (r0 + r2); + uint8_t b0 = src_argb1555[0] & 0x1f; + uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); + uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2; + uint8_t b2 = next_argb1555[0] & 0x1f; + uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); + uint8_t r2 = next_argb1555[1] >> 3; + uint8_t b = (b0 + b2); // 555 * 2 = 666. + uint8_t g = (g0 + g2); + uint8_t r = (r0 + r2); b = (b << 2) | (b >> 4); // 666 -> 888. g = (g << 2) | (g >> 4); r = (r << 2) | (r >> 4); @@ -573,26 +664,29 @@ void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555, } } -void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444; +void ARGB4444ToUVRow_C(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444; int x; for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb4444[0] & 0x0f; - uint8 g0 = src_argb4444[0] >> 4; - uint8 r0 = src_argb4444[1] & 0x0f; - uint8 b1 = src_argb4444[2] & 0x0f; - uint8 g1 = src_argb4444[2] >> 4; - uint8 r1 = src_argb4444[3] & 0x0f; - uint8 b2 = next_argb4444[0] & 0x0f; - uint8 g2 = next_argb4444[0] >> 4; - uint8 r2 = next_argb4444[1] & 0x0f; - uint8 b3 = next_argb4444[2] & 0x0f; - uint8 g3 = next_argb4444[2] >> 4; - uint8 r3 = next_argb4444[3] & 0x0f; - uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666. - uint8 g = (g0 + g1 + g2 + g3); - uint8 r = (r0 + r1 + r2 + r3); + uint8_t b0 = src_argb4444[0] & 0x0f; + uint8_t g0 = src_argb4444[0] >> 4; + uint8_t r0 = src_argb4444[1] & 0x0f; + uint8_t b1 = src_argb4444[2] & 0x0f; + uint8_t g1 = src_argb4444[2] >> 4; + uint8_t r1 = src_argb4444[3] & 0x0f; + uint8_t b2 = next_argb4444[0] & 0x0f; + uint8_t g2 = next_argb4444[0] >> 4; + uint8_t r2 = next_argb4444[1] & 0x0f; + uint8_t b3 = next_argb4444[2] & 0x0f; + uint8_t g3 = next_argb4444[2] >> 4; + uint8_t r3 = next_argb4444[3] & 0x0f; + uint8_t b = (b0 + b1 + b2 + b3); // 444 * 4 = 666. + uint8_t g = (g0 + g1 + g2 + g3); + uint8_t r = (r0 + r1 + r2 + r3); b = (b << 2) | (b >> 4); // 666 -> 888. g = (g << 2) | (g >> 4); r = (r << 2) | (r >> 4); @@ -604,15 +698,15 @@ void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444, dst_v += 1; } if (width & 1) { - uint8 b0 = src_argb4444[0] & 0x0f; - uint8 g0 = src_argb4444[0] >> 4; - uint8 r0 = src_argb4444[1] & 0x0f; - uint8 b2 = next_argb4444[0] & 0x0f; - uint8 g2 = next_argb4444[0] >> 4; - uint8 r2 = next_argb4444[1] & 0x0f; - uint8 b = (b0 + b2); // 444 * 2 = 555. - uint8 g = (g0 + g2); - uint8 r = (r0 + r2); + uint8_t b0 = src_argb4444[0] & 0x0f; + uint8_t g0 = src_argb4444[0] >> 4; + uint8_t r0 = src_argb4444[1] & 0x0f; + uint8_t b2 = next_argb4444[0] & 0x0f; + uint8_t g2 = next_argb4444[0] >> 4; + uint8_t r2 = next_argb4444[1] & 0x0f; + uint8_t b = (b0 + b2); // 444 * 2 = 555. + uint8_t g = (g0 + g2); + uint8_t r = (r0 + r2); b = (b << 3) | (b >> 2); // 555 -> 888. g = (g << 3) | (g >> 2); r = (r << 3) | (r >> 2); @@ -621,13 +715,15 @@ void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444, } } -void ARGBToUV444Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width) { +void ARGBToUV444Row_C(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { int x; for (x = 0; x < width; ++x) { - uint8 ab = src_argb[0]; - uint8 ag = src_argb[1]; - uint8 ar = src_argb[2]; + uint8_t ab = src_argb[0]; + uint8_t ag = src_argb[1]; + uint8_t ar = src_argb[2]; dst_u[0] = RGBToU(ar, ag, ab); dst_v[0] = RGBToV(ar, ag, ab); src_argb += 4; @@ -636,45 +732,10 @@ void ARGBToUV444Row_C(const uint8* src_argb, } } -void ARGBToUV411Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width) { - int x; - for (x = 0; x < width - 3; x += 4) { - uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2; - uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2; - uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - src_argb += 16; - dst_u += 1; - dst_v += 1; - } - // Odd width handling mimics 'any' function which replicates last pixel. - if ((width & 3) == 3) { - uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[8]) >> 2; - uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[9]) >> 2; - uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[10]) >> 2; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - } else if ((width & 3) == 2) { - uint8 ab = (src_argb[0] + src_argb[4]) >> 1; - uint8 ag = (src_argb[1] + src_argb[5]) >> 1; - uint8 ar = (src_argb[2] + src_argb[6]) >> 1; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - } else if ((width & 3) == 1) { - uint8 ab = src_argb[0]; - uint8 ag = src_argb[1]; - uint8 ar = src_argb[2]; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - } -} - -void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) { +void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { - uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]); + uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]); dst_argb[2] = dst_argb[1] = dst_argb[0] = y; dst_argb[3] = src_argb[3]; dst_argb += 4; @@ -683,7 +744,7 @@ void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) { } // Convert a row of image to Sepia tone. -void ARGBSepiaRow_C(uint8* dst_argb, int width) { +void ARGBSepiaRow_C(uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { int b = dst_argb[0]; @@ -702,22 +763,28 @@ void ARGBSepiaRow_C(uint8* dst_argb, int width) { // Apply color matrix to a row of image. Matrix is signed. // TODO(fbarchard): Consider adding rounding (+32). -void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width) { +void ARGBColorMatrixRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width) { int x; for (x = 0; x < width; ++x) { int b = src_argb[0]; int g = src_argb[1]; int r = src_argb[2]; int a = src_argb[3]; - int sb = (b * matrix_argb[0] + g * matrix_argb[1] + - r * matrix_argb[2] + a * matrix_argb[3]) >> 6; - int sg = (b * matrix_argb[4] + g * matrix_argb[5] + - r * matrix_argb[6] + a * matrix_argb[7]) >> 6; - int sr = (b * matrix_argb[8] + g * matrix_argb[9] + - r * matrix_argb[10] + a * matrix_argb[11]) >> 6; - int sa = (b * matrix_argb[12] + g * matrix_argb[13] + - r * matrix_argb[14] + a * matrix_argb[15]) >> 6; + int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] + + a * matrix_argb[3]) >> + 6; + int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] + + a * matrix_argb[7]) >> + 6; + int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] + + a * matrix_argb[11]) >> + 6; + int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] + + a * matrix_argb[15]) >> + 6; dst_argb[0] = Clamp(sb); dst_argb[1] = Clamp(sg); dst_argb[2] = Clamp(sr); @@ -728,7 +795,9 @@ void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb, } // Apply color table to a row of image. -void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { +void ARGBColorTableRow_C(uint8_t* dst_argb, + const uint8_t* table_argb, + int width) { int x; for (x = 0; x < width; ++x) { int b = dst_argb[0]; @@ -744,7 +813,9 @@ void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { } // Apply color table to a row of image. -void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { +void RGBColorTableRow_C(uint8_t* dst_argb, + const uint8_t* table_argb, + int width) { int x; for (x = 0; x < width; ++x) { int b = dst_argb[0]; @@ -757,8 +828,11 @@ void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { } } -void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width) { +void ARGBQuantizeRow_C(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width) { int x; for (x = 0; x < width; ++x) { int b = dst_argb[0]; @@ -772,21 +846,23 @@ void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size, } #define REPEAT8(v) (v) | ((v) << 8) -#define SHADE(f, v) v * f >> 24 +#define SHADE(f, v) v* f >> 24 -void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value) { - const uint32 b_scale = REPEAT8(value & 0xff); - const uint32 g_scale = REPEAT8((value >> 8) & 0xff); - const uint32 r_scale = REPEAT8((value >> 16) & 0xff); - const uint32 a_scale = REPEAT8(value >> 24); +void ARGBShadeRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value) { + const uint32_t b_scale = REPEAT8(value & 0xff); + const uint32_t g_scale = REPEAT8((value >> 8) & 0xff); + const uint32_t r_scale = REPEAT8((value >> 16) & 0xff); + const uint32_t a_scale = REPEAT8(value >> 24); int i; for (i = 0; i < width; ++i) { - const uint32 b = REPEAT8(src_argb[0]); - const uint32 g = REPEAT8(src_argb[1]); - const uint32 r = REPEAT8(src_argb[2]); - const uint32 a = REPEAT8(src_argb[3]); + const uint32_t b = REPEAT8(src_argb[0]); + const uint32_t g = REPEAT8(src_argb[1]); + const uint32_t r = REPEAT8(src_argb[2]); + const uint32_t a = REPEAT8(src_argb[3]); dst_argb[0] = SHADE(b, b_scale); dst_argb[1] = SHADE(g, g_scale); dst_argb[2] = SHADE(r, r_scale); @@ -799,20 +875,22 @@ void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, #undef SHADE #define REPEAT8(v) (v) | ((v) << 8) -#define SHADE(f, v) v * f >> 16 +#define SHADE(f, v) v* f >> 16 -void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { +void ARGBMultiplyRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { int i; for (i = 0; i < width; ++i) { - const uint32 b = REPEAT8(src_argb0[0]); - const uint32 g = REPEAT8(src_argb0[1]); - const uint32 r = REPEAT8(src_argb0[2]); - const uint32 a = REPEAT8(src_argb0[3]); - const uint32 b_scale = src_argb1[0]; - const uint32 g_scale = src_argb1[1]; - const uint32 r_scale = src_argb1[2]; - const uint32 a_scale = src_argb1[3]; + const uint32_t b = REPEAT8(src_argb0[0]); + const uint32_t g = REPEAT8(src_argb0[1]); + const uint32_t r = REPEAT8(src_argb0[2]); + const uint32_t a = REPEAT8(src_argb0[3]); + const uint32_t b_scale = src_argb1[0]; + const uint32_t g_scale = src_argb1[1]; + const uint32_t r_scale = src_argb1[2]; + const uint32_t a_scale = src_argb1[3]; dst_argb[0] = SHADE(b, b_scale); dst_argb[1] = SHADE(g, g_scale); dst_argb[2] = SHADE(r, r_scale); @@ -827,8 +905,10 @@ void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1, #define SHADE(f, v) clamp255(v + f) -void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { +void ARGBAddRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { int i; for (i = 0; i < width; ++i) { const int b = src_argb0[0]; @@ -852,8 +932,10 @@ void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1, #define SHADE(f, v) clamp0(f - v) -void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { +void ARGBSubtractRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { int i; for (i = 0; i < width; ++i) { const int b = src_argb0[0]; @@ -876,8 +958,11 @@ void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1, #undef SHADE // Sobel functions which mimics SSSE3. -void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, - uint8* dst_sobelx, int width) { +void SobelXRow_C(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int width) { int i; for (i = 0; i < width; ++i) { int a = src_y0[i]; @@ -890,12 +975,14 @@ void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, int b_diff = b - b_sub; int c_diff = c - c_sub; int sobel = Abs(a_diff + b_diff * 2 + c_diff); - dst_sobelx[i] = (uint8)(clamp255(sobel)); + dst_sobelx[i] = (uint8_t)(clamp255(sobel)); } } -void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width) { +void SobelYRow_C(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int width) { int i; for (i = 0; i < width; ++i) { int a = src_y0[i + 0]; @@ -908,56 +995,62 @@ void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, int b_diff = b - b_sub; int c_diff = c - c_sub; int sobel = Abs(a_diff + b_diff * 2 + c_diff); - dst_sobely[i] = (uint8)(clamp255(sobel)); + dst_sobely[i] = (uint8_t)(clamp255(sobel)); } } -void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { +void SobelRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width) { int i; for (i = 0; i < width; ++i) { int r = src_sobelx[i]; int b = src_sobely[i]; int s = clamp255(r + b); - dst_argb[0] = (uint8)(s); - dst_argb[1] = (uint8)(s); - dst_argb[2] = (uint8)(s); - dst_argb[3] = (uint8)(255u); + dst_argb[0] = (uint8_t)(s); + dst_argb[1] = (uint8_t)(s); + dst_argb[2] = (uint8_t)(s); + dst_argb[3] = (uint8_t)(255u); dst_argb += 4; } } -void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width) { +void SobelToPlaneRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, + int width) { int i; for (i = 0; i < width; ++i) { int r = src_sobelx[i]; int b = src_sobely[i]; int s = clamp255(r + b); - dst_y[i] = (uint8)(s); + dst_y[i] = (uint8_t)(s); } } -void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { +void SobelXYRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width) { int i; for (i = 0; i < width; ++i) { int r = src_sobelx[i]; int b = src_sobely[i]; int g = clamp255(r + b); - dst_argb[0] = (uint8)(b); - dst_argb[1] = (uint8)(g); - dst_argb[2] = (uint8)(r); - dst_argb[3] = (uint8)(255u); + dst_argb[0] = (uint8_t)(b); + dst_argb[1] = (uint8_t)(g); + dst_argb[2] = (uint8_t)(r); + dst_argb[3] = (uint8_t)(255u); dst_argb += 4; } } -void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { +void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { // Copy a Y to RGB. int x; for (x = 0; x < width; ++x) { - uint8 y = src_y[0]; + uint8_t y = src_y[0]; dst_argb[2] = dst_argb[1] = dst_argb[0] = y; dst_argb[3] = 255u; dst_argb += 4; @@ -974,75 +1067,69 @@ void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { // B = (Y - 16) * 1.164 - U * -2.018 // Y contribution to R,G,B. Scale and bias. -#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ +#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ // U and V contributions to R,G,B. #define UB -128 /* max(-128, round(-2.018 * 64)) */ -#define UG 25 /* round(0.391 * 64) */ -#define VG 52 /* round(0.813 * 64) */ +#define UG 25 /* round(0.391 * 64) */ +#define VG 52 /* round(0.813 * 64) */ #define VR -102 /* round(-1.596 * 64) */ // Bias values to subtract 16 from Y and 128 from U and V. -#define BB (UB * 128 + YGB) +#define BB (UB * 128 + YGB) #define BG (UG * 128 + VG * 128 + YGB) -#define BR (VR * 128 + YGB) +#define BR (VR * 128 + YGB) #if defined(__aarch64__) // 64 bit arm const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { - { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR }, - { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR }, - { UG, VG, UG, VG, UG, VG, UG, VG }, - { UG, VG, UG, VG, UG, VG, UG, VG }, - { BB, BG, BR, 0, 0, 0, 0, 0 }, - { 0x0101 * YG, 0, 0, 0 } -}; + {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, + {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, + {UG, VG, UG, VG, UG, VG, UG, VG}, + {UG, VG, UG, VG, UG, VG, UG, VG}, + {BB, BG, BR, 0, 0, 0, 0, 0}, + {0x0101 * YG, 0, 0, 0}}; const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { - { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB }, - { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB }, - { VG, UG, VG, UG, VG, UG, VG, UG }, - { VG, UG, VG, UG, VG, UG, VG, UG }, - { BR, BG, BB, 0, 0, 0, 0, 0 }, - { 0x0101 * YG, 0, 0, 0 } -}; + {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, + {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, + {VG, UG, VG, UG, VG, UG, VG, UG}, + {VG, UG, VG, UG, VG, UG, VG, UG}, + {BR, BG, BB, 0, 0, 0, 0, 0}, + {0x0101 * YG, 0, 0, 0}}; #elif defined(__arm__) // 32 bit arm const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { - { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 }, - { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 }, - { BB, BG, BR, 0, 0, 0, 0, 0 }, - { 0x0101 * YG, 0, 0, 0 } -}; + {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, + {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, + {BB, BG, BR, 0, 0, 0, 0, 0}, + {0x0101 * YG, 0, 0, 0}}; const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { - { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 }, - { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 }, - { BR, BG, BB, 0, 0, 0, 0, 0 }, - { 0x0101 * YG, 0, 0, 0 } -}; + {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, + {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, + {BR, BG, BB, 0, 0, 0, 0, 0}, + {0x0101 * YG, 0, 0, 0}}; #else const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { - { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, - UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 }, - { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, - UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG }, - { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, - 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR }, - { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, - { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, - { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, - { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } -}; + {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, + UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, + {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, + UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, + {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, + 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, + {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, + {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, + {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, + {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { - { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, - VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 }, - { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, - VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG }, - { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, - 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB }, - { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, - { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, - { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, - { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } -}; + {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, + VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, + {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, + VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, + {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, + 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, + {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, + {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, + {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, + {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; #endif #undef BB @@ -1062,74 +1149,68 @@ const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { // Y contribution to R,G,B. Scale and bias. #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ -#define YGB 32 /* 64 / 2 */ +#define YGB 32 /* 64 / 2 */ // U and V contributions to R,G,B. #define UB -113 /* round(-1.77200 * 64) */ -#define UG 22 /* round(0.34414 * 64) */ -#define VG 46 /* round(0.71414 * 64) */ -#define VR -90 /* round(-1.40200 * 64) */ +#define UG 22 /* round(0.34414 * 64) */ +#define VG 46 /* round(0.71414 * 64) */ +#define VR -90 /* round(-1.40200 * 64) */ // Bias values to round, and subtract 128 from U and V. -#define BB (UB * 128 + YGB) +#define BB (UB * 128 + YGB) #define BG (UG * 128 + VG * 128 + YGB) -#define BR (VR * 128 + YGB) +#define BR (VR * 128 + YGB) #if defined(__aarch64__) const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { - { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR }, - { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR }, - { UG, VG, UG, VG, UG, VG, UG, VG }, - { UG, VG, UG, VG, UG, VG, UG, VG }, - { BB, BG, BR, 0, 0, 0, 0, 0 }, - { 0x0101 * YG, 0, 0, 0 } -}; + {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, + {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, + {UG, VG, UG, VG, UG, VG, UG, VG}, + {UG, VG, UG, VG, UG, VG, UG, VG}, + {BB, BG, BR, 0, 0, 0, 0, 0}, + {0x0101 * YG, 0, 0, 0}}; const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { - { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB }, - { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB }, - { VG, UG, VG, UG, VG, UG, VG, UG }, - { VG, UG, VG, UG, VG, UG, VG, UG }, - { BR, BG, BB, 0, 0, 0, 0, 0 }, - { 0x0101 * YG, 0, 0, 0 } -}; + {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, + {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, + {VG, UG, VG, UG, VG, UG, VG, UG}, + {VG, UG, VG, UG, VG, UG, VG, UG}, + {BR, BG, BB, 0, 0, 0, 0, 0}, + {0x0101 * YG, 0, 0, 0}}; #elif defined(__arm__) const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { - { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 }, - { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 }, - { BB, BG, BR, 0, 0, 0, 0, 0 }, - { 0x0101 * YG, 0, 0, 0 } -}; + {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, + {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, + {BB, BG, BR, 0, 0, 0, 0, 0}, + {0x0101 * YG, 0, 0, 0}}; const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { - { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 }, - { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 }, - { BR, BG, BB, 0, 0, 0, 0, 0 }, - { 0x0101 * YG, 0, 0, 0 } -}; + {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, + {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, + {BR, BG, BB, 0, 0, 0, 0, 0}, + {0x0101 * YG, 0, 0, 0}}; #else const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { - { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, - UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 }, - { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, - UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG }, - { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, - 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR }, - { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, - { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, - { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, - { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } -}; + {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, + UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, + {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, + UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, + {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, + 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, + {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, + {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, + {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, + {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { - { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, - VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 }, - { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, - VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG }, - { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, - 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB }, - { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, - { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, - { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, - { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } -}; + {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, + VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, + {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, + VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, + {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, + 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, + {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, + {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, + {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, + {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; #endif #undef BB @@ -1143,81 +1224,76 @@ const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { #undef YG // BT.709 YUV to RGB reference -// * R = Y - V * -1.28033 -// * G = Y - U * 0.21482 - V * 0.38059 -// * B = Y - U * -2.12798 +// R = (Y - 16) * 1.164 - V * -1.793 +// G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533 +// B = (Y - 16) * 1.164 - U * -2.112 +// See also http://www.equasys.de/colorconversion.html // Y contribution to R,G,B. Scale and bias. -#define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ -#define YGB 32 /* 64 / 2 */ +#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ +#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ -// TODO(fbarchard): Find way to express 2.12 instead of 2.0. +// TODO(fbarchard): Find way to express 2.112 instead of 2.0. // U and V contributions to R,G,B. -#define UB -128 /* max(-128, round(-2.12798 * 64)) */ -#define UG 14 /* round(0.21482 * 64) */ -#define VG 24 /* round(0.38059 * 64) */ -#define VR -82 /* round(-1.28033 * 64) */ +#define UB -128 /* max(-128, round(-2.112 * 64)) */ +#define UG 14 /* round(0.213 * 64) */ +#define VG 34 /* round(0.533 * 64) */ +#define VR -115 /* round(-1.793 * 64) */ // Bias values to round, and subtract 128 from U and V. -#define BB (UB * 128 + YGB) +#define BB (UB * 128 + YGB) #define BG (UG * 128 + VG * 128 + YGB) -#define BR (VR * 128 + YGB) +#define BR (VR * 128 + YGB) #if defined(__aarch64__) const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { - { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR }, - { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR }, - { UG, VG, UG, VG, UG, VG, UG, VG }, - { UG, VG, UG, VG, UG, VG, UG, VG }, - { BB, BG, BR, 0, 0, 0, 0, 0 }, - { 0x0101 * YG, 0, 0, 0 } -}; + {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, + {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, + {UG, VG, UG, VG, UG, VG, UG, VG}, + {UG, VG, UG, VG, UG, VG, UG, VG}, + {BB, BG, BR, 0, 0, 0, 0, 0}, + {0x0101 * YG, 0, 0, 0}}; const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { - { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB }, - { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB }, - { VG, UG, VG, UG, VG, UG, VG, UG }, - { VG, UG, VG, UG, VG, UG, VG, UG }, - { BR, BG, BB, 0, 0, 0, 0, 0 }, - { 0x0101 * YG, 0, 0, 0 } -}; + {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, + {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, + {VG, UG, VG, UG, VG, UG, VG, UG}, + {VG, UG, VG, UG, VG, UG, VG, UG}, + {BR, BG, BB, 0, 0, 0, 0, 0}, + {0x0101 * YG, 0, 0, 0}}; #elif defined(__arm__) const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { - { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 }, - { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 }, - { BB, BG, BR, 0, 0, 0, 0, 0 }, - { 0x0101 * YG, 0, 0, 0 } -}; + {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, + {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, + {BB, BG, BR, 0, 0, 0, 0, 0}, + {0x0101 * YG, 0, 0, 0}}; const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { - { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 }, - { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 }, - { BR, BG, BB, 0, 0, 0, 0, 0 }, - { 0x0101 * YG, 0, 0, 0 } -}; + {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, + {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, + {BR, BG, BB, 0, 0, 0, 0, 0}, + {0x0101 * YG, 0, 0, 0}}; #else const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { - { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, - UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 }, - { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, - UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG }, - { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, - 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR }, - { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, - { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, - { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, - { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } -}; + {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, + UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, + {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, + UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, + {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, + 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, + {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, + {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, + {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, + {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { - { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, - VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 }, - { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, - VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG }, - { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, - 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB }, - { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, - { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, - { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, - { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } -}; + {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, + VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, + {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, + VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, + {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, + 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, + {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, + {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, + {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, + {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; #endif #undef BB @@ -1231,8 +1307,14 @@ const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { #undef YG // C reference code that mimics the YUV assembly. -static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, - uint8* b, uint8* g, uint8* r, +// Reads 8 bit YUV and leaves result as 16 bit. + +static __inline void YuvPixel(uint8_t y, + uint8_t u, + uint8_t v, + uint8_t* b, + uint8_t* g, + uint8_t* r, const struct YuvConstants* yuvconstants) { #if defined(__aarch64__) int ub = -yuvconstants->kUVToRB[0]; @@ -1263,22 +1345,129 @@ static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, int yg = yuvconstants->kYToRgb[0]; #endif - uint32 y1 = (uint32)(y * 0x0101 * yg) >> 16; - *b = Clamp((int32)(-(u * ub) + y1 + bb) >> 6); - *g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6); - *r = Clamp((int32) (-(v * vr) + y1 + br) >> 6); + uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; + *b = Clamp((int32_t)(-(u * ub) + y1 + bb) >> 6); + *g = Clamp((int32_t)(-(u * ug + v * vg) + y1 + bg) >> 6); + *r = Clamp((int32_t)(-(v * vr) + y1 + br) >> 6); +} + +// Reads 8 bit YUV and leaves result as 16 bit. +static __inline void YuvPixel8_16(uint8_t y, + uint8_t u, + uint8_t v, + int* b, + int* g, + int* r, + const struct YuvConstants* yuvconstants) { +#if defined(__aarch64__) + int ub = -yuvconstants->kUVToRB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[1]; + int vr = -yuvconstants->kUVToRB[1]; + int bb = yuvconstants->kUVBiasBGR[0]; + int bg = yuvconstants->kUVBiasBGR[1]; + int br = yuvconstants->kUVBiasBGR[2]; + int yg = yuvconstants->kYToRgb[0] / 0x0101; +#elif defined(__arm__) + int ub = -yuvconstants->kUVToRB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[4]; + int vr = -yuvconstants->kUVToRB[4]; + int bb = yuvconstants->kUVBiasBGR[0]; + int bg = yuvconstants->kUVBiasBGR[1]; + int br = yuvconstants->kUVBiasBGR[2]; + int yg = yuvconstants->kYToRgb[0] / 0x0101; +#else + int ub = yuvconstants->kUVToB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[1]; + int vr = yuvconstants->kUVToR[1]; + int bb = yuvconstants->kUVBiasB[0]; + int bg = yuvconstants->kUVBiasG[0]; + int br = yuvconstants->kUVBiasR[0]; + int yg = yuvconstants->kYToRgb[0]; +#endif + + uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; + *b = (int)(-(u * ub) + y1 + bb); + *g = (int)(-(u * ug + v * vg) + y1 + bg); + *r = (int)(-(v * vr) + y1 + br); +} + +// C reference code that mimics the YUV 16 bit assembly. +// Reads 10 bit YUV and leaves result as 16 bit. +static __inline void YuvPixel16(int16_t y, + int16_t u, + int16_t v, + int* b, + int* g, + int* r, + const struct YuvConstants* yuvconstants) { +#if defined(__aarch64__) + int ub = -yuvconstants->kUVToRB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[1]; + int vr = -yuvconstants->kUVToRB[1]; + int bb = yuvconstants->kUVBiasBGR[0]; + int bg = yuvconstants->kUVBiasBGR[1]; + int br = yuvconstants->kUVBiasBGR[2]; + int yg = yuvconstants->kYToRgb[0] / 0x0101; +#elif defined(__arm__) + int ub = -yuvconstants->kUVToRB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[4]; + int vr = -yuvconstants->kUVToRB[4]; + int bb = yuvconstants->kUVBiasBGR[0]; + int bg = yuvconstants->kUVBiasBGR[1]; + int br = yuvconstants->kUVBiasBGR[2]; + int yg = yuvconstants->kYToRgb[0] / 0x0101; +#else + int ub = yuvconstants->kUVToB[0]; + int ug = yuvconstants->kUVToG[0]; + int vg = yuvconstants->kUVToG[1]; + int vr = yuvconstants->kUVToR[1]; + int bb = yuvconstants->kUVBiasB[0]; + int bg = yuvconstants->kUVBiasG[0]; + int br = yuvconstants->kUVBiasR[0]; + int yg = yuvconstants->kYToRgb[0]; +#endif + + uint32_t y1 = (uint32_t)((y << 6) * yg) >> 16; + u = clamp255(u >> 2); + v = clamp255(v >> 2); + *b = (int)(-(u * ub) + y1 + bb); + *g = (int)(-(u * ug + v * vg) + y1 + bg); + *r = (int)(-(v * vr) + y1 + br); +} + +// C reference code that mimics the YUV 10 bit assembly. +// Reads 10 bit YUV and clamps down to 8 bit RGB. +static __inline void YuvPixel10(uint16_t y, + uint16_t u, + uint16_t v, + uint8_t* b, + uint8_t* g, + uint8_t* r, + const struct YuvConstants* yuvconstants) { + int b16; + int g16; + int r16; + YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants); + *b = Clamp(b16 >> 6); + *g = Clamp(g16 >> 6); + *r = Clamp(r16 >> 6); } // Y contribution to R,G,B. Scale and bias. -#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ +#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ // C reference code that mimics the YUV assembly. -static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) { - uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16; - *b = Clamp((int32)(y1 + YGB) >> 6); - *g = Clamp((int32)(y1 + YGB) >> 6); - *r = Clamp((int32)(y1 + YGB) >> 6); +static __inline void YPixel(uint8_t y, uint8_t* b, uint8_t* g, uint8_t* r) { + uint32_t y1 = (uint32_t)(y * 0x0101 * YG) >> 16; + *b = Clamp((int32_t)(y1 + YGB) >> 6); + *g = Clamp((int32_t)(y1 + YGB) >> 6); + *r = Clamp((int32_t)(y1 + YGB) >> 6); } #undef YG @@ -1288,16 +1477,16 @@ static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) { (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON)) // C mimic assembly. // TODO(fbarchard): Remove subsampling from Neon. -void I444ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, +void I444ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { - uint8 u = (src_u[0] + src_u[1] + 1) >> 1; - uint8 v = (src_v[0] + src_v[1] + 1) >> 1; + uint8_t u = (src_u[0] + src_u[1] + 1) >> 1; + uint8_t v = (src_v[0] + src_v[1] + 1) >> 1; YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; @@ -1310,22 +1499,22 @@ void I444ToARGBRow_C(const uint8* src_y, rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } #else -void I444ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, +void I444ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width; ++x) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; src_y += 1; src_u += 1; @@ -1336,19 +1525,19 @@ void I444ToARGBRow_C(const uint8* src_y, #endif // Also used for 420 -void I422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, +void I422ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); + YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, + rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_y += 2; src_u += 1; @@ -1356,26 +1545,120 @@ void I422ToARGBRow_C(const uint8* src_y, rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } -void I422AlphaToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - const uint8* src_a, - uint8* rgb_buf, +// 10 bit YUV to ARGB +void I210ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width - 1; x += 2) { + YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + rgb_buf[3] = 255; + YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, + rgb_buf + 6, yuvconstants); + rgb_buf[7] = 255; + src_y += 2; + src_u += 1; + src_v += 1; + rgb_buf += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + rgb_buf[3] = 255; + } +} + +static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) { + uint32_t ar30; + b = b >> 4; // convert 10.6 to 10 bit. + g = g >> 4; + r = r >> 4; + b = Clamp10(b); + g = Clamp10(g); + r = Clamp10(r); + ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000; + (*(uint32_t*)rgb_buf) = ar30; +} + +// 10 bit YUV to 10 bit AR30 +void I210ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int b; + int g; + int r; + for (x = 0; x < width - 1; x += 2) { + YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf, b, g, r); + YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf + 4, b, g, r); + src_y += 2; + src_u += 1; + src_v += 1; + rgb_buf += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf, b, g, r); + } +} + +// 8 bit YUV to 10 bit AR30 +// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits. +void I422ToAR30Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int b; + int g; + int r; + for (x = 0; x < width - 1; x += 2) { + YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf, b, g, r); + YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf + 4, b, g, r); + src_y += 2; + src_u += 1; + src_v += 1; + rgb_buf += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); + StoreAR30(rgb_buf, b, g, r); + } +} + +void I422AlphaToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); rgb_buf[3] = src_a[0]; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); + YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, + rgb_buf + 6, yuvconstants); rgb_buf[7] = src_a[1]; src_y += 2; src_u += 1; @@ -1384,47 +1667,47 @@ void I422AlphaToARGBRow_C(const uint8* src_y, rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); rgb_buf[3] = src_a[0]; } } -void I422ToRGB24Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, +void I422ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 3, rgb_buf + 4, rgb_buf + 5, yuvconstants); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4, + rgb_buf + 5, yuvconstants); src_y += 2; src_u += 1; src_v += 1; rgb_buf += 6; // Advance 2 pixels. } if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); } } -void I422ToARGB4444Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { - uint8 b0; - uint8 g0; - uint8 r0; - uint8 b1; - uint8 g1; - uint8 r1; + uint8_t b0; + uint8_t g0; + uint8_t r0; + uint8_t b1; + uint8_t g1; + uint8_t r1; int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); @@ -1435,8 +1718,8 @@ void I422ToARGB4444Row_C(const uint8* src_y, b1 = b1 >> 4; g1 = g1 >> 4; r1 = r1 >> 4; - *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | - (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000; + *(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) | + (g1 << 20) | (r1 << 24) | 0xf000f000; src_y += 2; src_u += 1; src_v += 1; @@ -1447,23 +1730,22 @@ void I422ToARGB4444Row_C(const uint8* src_y, b0 = b0 >> 4; g0 = g0 >> 4; r0 = r0 >> 4; - *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | - 0xf000; + *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000; } } -void I422ToARGB1555Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { - uint8 b0; - uint8 g0; - uint8 r0; - uint8 b1; - uint8 g1; - uint8 r1; + uint8_t b0; + uint8_t g0; + uint8_t r0; + uint8_t b1; + uint8_t g1; + uint8_t r1; int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); @@ -1474,8 +1756,8 @@ void I422ToARGB1555Row_C(const uint8* src_y, b1 = b1 >> 3; g1 = g1 >> 3; r1 = r1 >> 3; - *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | - (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000; + *(uint32_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) | + (g1 << 21) | (r1 << 26) | 0x80008000; src_y += 2; src_u += 1; src_v += 1; @@ -1486,23 +1768,22 @@ void I422ToARGB1555Row_C(const uint8* src_y, b0 = b0 >> 3; g0 = g0 >> 3; r0 = r0 >> 3; - *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | - 0x8000; + *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000; } } -void I422ToRGB565Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { - uint8 b0; - uint8 g0; - uint8 r0; - uint8 b1; - uint8 g1; - uint8 r1; + uint8_t b0; + uint8_t g0; + uint8_t r0; + uint8_t b1; + uint8_t g1; + uint8_t r1; int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); @@ -1513,8 +1794,8 @@ void I422ToRGB565Row_C(const uint8* src_y, b1 = b1 >> 3; g1 = g1 >> 2; r1 = r1 >> 3; - *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | - (b1 << 16) | (g1 << 21) | (r1 << 27); + *(uint32_t*)(dst_rgb565) = + b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27); src_y += 2; src_u += 1; src_v += 1; @@ -1525,111 +1806,111 @@ void I422ToRGB565Row_C(const uint8* src_y, b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; - *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); + *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); } } -void I411ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - int x; - for (x = 0; x < width - 3; x += 4) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = 255; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); - rgb_buf[7] = 255; - YuvPixel(src_y[2], src_u[0], src_v[0], - rgb_buf + 8, rgb_buf + 9, rgb_buf + 10, yuvconstants); - rgb_buf[11] = 255; - YuvPixel(src_y[3], src_u[0], src_v[0], - rgb_buf + 12, rgb_buf + 13, rgb_buf + 14, yuvconstants); - rgb_buf[15] = 255; - src_y += 4; - src_u += 1; - src_v += 1; - rgb_buf += 16; // Advance 4 pixels. - } - if (width & 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = 255; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); - rgb_buf[7] = 255; - src_y += 2; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); - rgb_buf[3] = 255; - } -} - -void NV12ToARGBRow_C(const uint8* src_y, - const uint8* src_uv, - uint8* rgb_buf, +void NV12ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_uv[0], src_uv[1], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; - YuvPixel(src_y[1], src_uv[0], src_uv[1], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); + YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5, + rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_y += 2; src_uv += 2; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(src_y[0], src_uv[0], src_uv[1], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } -void NV21ToARGBRow_C(const uint8* src_y, - const uint8* src_vu, - uint8* rgb_buf, +void NV21ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_vu[1], src_vu[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; - YuvPixel(src_y[1], src_vu[1], src_vu[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); + YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5, + rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_y += 2; src_vu += 2; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(src_y[0], src_vu[1], src_vu[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } -void NV12ToRGB565Row_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, +void NV12ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width - 1; x += 2) { + YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4, + rgb_buf + 5, yuvconstants); + src_y += 2; + src_uv += 2; + rgb_buf += 6; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + } +} + +void NV21ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width - 1; x += 2) { + YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4, + rgb_buf + 5, yuvconstants); + src_y += 2; + src_vu += 2; + rgb_buf += 6; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + } +} + +void NV12ToRGB565Row_C(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { - uint8 b0; - uint8 g0; - uint8 r0; - uint8 b1; - uint8 g1; - uint8 r1; + uint8_t b0; + uint8_t g0; + uint8_t r0; + uint8_t b1; + uint8_t g1; + uint8_t r1; int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants); @@ -1640,8 +1921,8 @@ void NV12ToRGB565Row_C(const uint8* src_y, b1 = b1 >> 3; g1 = g1 >> 2; r1 = r1 >> 3; - *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | - (b1 << 16) | (g1 << 21) | (r1 << 27); + *(uint32_t*)(dst_rgb565) = + b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27); src_y += 2; src_uv += 2; dst_rgb565 += 4; // Advance 2 pixels. @@ -1651,67 +1932,67 @@ void NV12ToRGB565Row_C(const uint8* src_y, b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; - *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); + *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); } } -void YUY2ToARGBRow_C(const uint8* src_yuy2, - uint8* rgb_buf, +void YUY2ToARGBRow_C(const uint8_t* src_yuy2, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; - YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); + YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5, + rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_yuy2 += 4; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } -void UYVYToARGBRow_C(const uint8* src_uyvy, - uint8* rgb_buf, +void UYVYToARGBRow_C(const uint8_t* src_uyvy, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; - YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); + YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5, + rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_uyvy += 4; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); + YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } -void I422ToRGBARow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, +void I422ToRGBARow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2, + rgb_buf + 3, yuvconstants); rgb_buf[0] = 255; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 5, rgb_buf + 6, rgb_buf + 7, yuvconstants); + YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6, + rgb_buf + 7, yuvconstants); rgb_buf[4] = 255; src_y += 2; src_u += 1; @@ -1719,13 +2000,13 @@ void I422ToRGBARow_C(const uint8* src_y, rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants); + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2, + rgb_buf + 3, yuvconstants); rgb_buf[0] = 255; } } -void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) { +void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width) { int x; for (x = 0; x < width - 1; x += 2) { YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); @@ -1741,7 +2022,7 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) { } } -void MirrorRow_C(const uint8* src, uint8* dst, int width) { +void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) { int x; src += width - 1; for (x = 0; x < width - 1; x += 2) { @@ -1754,7 +2035,10 @@ void MirrorRow_C(const uint8* src, uint8* dst, int width) { } } -void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { +void MirrorUVRow_C(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { int x; src_uv += (width - 1) << 1; for (x = 0; x < width - 1; x += 2) { @@ -1770,10 +2054,10 @@ void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { } } -void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) { +void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) { int x; - const uint32* src32 = (const uint32*)(src); - uint32* dst32 = (uint32*)(dst); + const uint32_t* src32 = (const uint32_t*)(src); + uint32_t* dst32 = (uint32_t*)(dst); src32 += width - 1; for (x = 0; x < width - 1; x += 2) { dst32[x] = src32[0]; @@ -1785,7 +2069,10 @@ void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) { } } -void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { +void SplitUVRow_C(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { int x; for (x = 0; x < width - 1; x += 2) { dst_u[x] = src_uv[0]; @@ -1800,7 +2087,9 @@ void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { } } -void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, +void MergeUVRow_C(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width) { int x; for (x = 0; x < width - 1; x += 2) { @@ -1816,20 +2105,110 @@ void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, } } -void CopyRow_C(const uint8* src, uint8* dst, int count) { +void SplitRGBRow_C(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width) { + int x; + for (x = 0; x < width; ++x) { + dst_r[x] = src_rgb[0]; + dst_g[x] = src_rgb[1]; + dst_b[x] = src_rgb[2]; + src_rgb += 3; + } +} + +void MergeRGBRow_C(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, + int width) { + int x; + for (x = 0; x < width; ++x) { + dst_rgb[0] = src_r[x]; + dst_rgb[1] = src_g[x]; + dst_rgb[2] = src_b[x]; + dst_rgb += 3; + } +} + +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 128 = 9 bits +// 64 = 10 bits +// 16 = 12 bits +// 1 = 16 bits +void MergeUVRow_16_C(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, + int scale, + int width) { + int x; + for (x = 0; x < width - 1; x += 2) { + dst_uv[0] = src_u[x] * scale; + dst_uv[1] = src_v[x] * scale; + dst_uv[2] = src_u[x + 1] * scale; + dst_uv[3] = src_v[x + 1] * scale; + dst_uv += 4; + } + if (width & 1) { + dst_uv[0] = src_u[width - 1] * scale; + dst_uv[1] = src_v[width - 1] * scale; + } +} + +void MultiplyRow_16_C(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width) { + int x; + for (x = 0; x < width; ++x) { + dst_y[x] = src_y[x] * scale; + } +} + +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 32768 = 9 bits +// 16384 = 10 bits +// 4096 = 12 bits +// 256 = 16 bits +void Convert16To8Row_C(const uint16_t* src_y, + uint8_t* dst_y, + int scale, + int width) { + int x; + for (x = 0; x < width; ++x) { + dst_y[x] = clamp255((src_y[x] * scale) >> 16); + } +} + +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 1024 = 10 bits +void Convert8To16Row_C(const uint8_t* src_y, + uint16_t* dst_y, + int scale, + int width) { + int x; + scale *= 0x0101; // replicates the byte. + for (x = 0; x < width; ++x) { + dst_y[x] = (src_y[x] * scale) >> 16; + } +} + +void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) { memcpy(dst, src, count); } -void CopyRow_16_C(const uint16* src, uint16* dst, int count) { +void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) { memcpy(dst, src, count * 2); } -void SetRow_C(uint8* dst, uint8 v8, int width) { +void SetRow_C(uint8_t* dst, uint8_t v8, int width) { memset(dst, v8, width); } -void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) { - uint32* d = (uint32*)(dst_argb); +void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) { + uint32_t* d = (uint32_t*)(dst_argb); int x; for (x = 0; x < width; ++x) { d[x] = v32; @@ -1837,8 +2216,11 @@ void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) { } // Filter 2 rows of YUY2 UV's (422) into U and V (420). -void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_u, uint8* dst_v, int width) { +void YUY2ToUVRow_C(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { // Output a row of UV values, filtering 2 rows of YUY2. int x; for (x = 0; x < width; x += 2) { @@ -1851,8 +2233,10 @@ void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2, } // Copy row of YUY2 UV's (422) into U and V (422). -void YUY2ToUV422Row_C(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width) { +void YUY2ToUV422Row_C(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { // Output a row of UV values. int x; for (x = 0; x < width; x += 2) { @@ -1865,7 +2249,7 @@ void YUY2ToUV422Row_C(const uint8* src_yuy2, } // Copy row of YUY2 Y's (422) into Y (420/422). -void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) { +void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { // Output a row of Y values. int x; for (x = 0; x < width - 1; x += 2) { @@ -1879,8 +2263,11 @@ void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) { } // Filter 2 rows of UYVY UV's (422) into U and V (420). -void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_u, uint8* dst_v, int width) { +void UYVYToUVRow_C(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { // Output a row of UV values. int x; for (x = 0; x < width; x += 2) { @@ -1893,8 +2280,10 @@ void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy, } // Copy row of UYVY UV's (422) into U and V (422). -void UYVYToUV422Row_C(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width) { +void UYVYToUV422Row_C(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { // Output a row of UV values. int x; for (x = 0; x < width; x += 2) { @@ -1907,7 +2296,7 @@ void UYVYToUV422Row_C(const uint8* src_uyvy, } // Copy row of UYVY Y's (422) into Y (420/422). -void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) { +void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { // Output a row of Y values. int x; for (x = 0; x < width - 1; x += 2) { @@ -1925,17 +2314,19 @@ void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) { // Blend src_argb0 over src_argb1 and store to dst_argb. // dst_argb may be src_argb0 or src_argb1. // This code mimics the SSSE3 version for better testability. -void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { +void ARGBBlendRow_C(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { int x; for (x = 0; x < width - 1; x += 2) { - uint32 fb = src_argb0[0]; - uint32 fg = src_argb0[1]; - uint32 fr = src_argb0[2]; - uint32 a = src_argb0[3]; - uint32 bb = src_argb1[0]; - uint32 bg = src_argb1[1]; - uint32 br = src_argb1[2]; + uint32_t fb = src_argb0[0]; + uint32_t fg = src_argb0[1]; + uint32_t fr = src_argb0[2]; + uint32_t a = src_argb0[3]; + uint32_t bb = src_argb1[0]; + uint32_t bg = src_argb1[1]; + uint32_t br = src_argb1[2]; dst_argb[0] = BLEND(fb, bb, a); dst_argb[1] = BLEND(fg, bg, a); dst_argb[2] = BLEND(fr, br, a); @@ -1958,13 +2349,13 @@ void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1, } if (width & 1) { - uint32 fb = src_argb0[0]; - uint32 fg = src_argb0[1]; - uint32 fr = src_argb0[2]; - uint32 a = src_argb0[3]; - uint32 bb = src_argb1[0]; - uint32 bg = src_argb1[1]; - uint32 br = src_argb1[2]; + uint32_t fb = src_argb0[0]; + uint32_t fg = src_argb0[1]; + uint32_t fr = src_argb0[2]; + uint32_t a = src_argb0[3]; + uint32_t bb = src_argb1[0]; + uint32_t bg = src_argb1[1]; + uint32_t br = src_argb1[2]; dst_argb[0] = BLEND(fb, bb, a); dst_argb[1] = BLEND(fg, bg, a); dst_argb[2] = BLEND(fr, br, a); @@ -1973,9 +2364,12 @@ void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1, } #undef BLEND -#define UBLEND(f, b, a) (((a) * f) + ((255 - a) * b) + 255) >> 8 -void BlendPlaneRow_C(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width) { +#define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8 +void BlendPlaneRow_C(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width) { int x; for (x = 0; x < width - 1; x += 2) { dst[0] = UBLEND(src0[0], src1[0], alpha[0]); @@ -1995,13 +2389,13 @@ void BlendPlaneRow_C(const uint8* src0, const uint8* src1, // Multiply source RGB by alpha and store to destination. // This code mimics the SSSE3 version for better testability. -void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { +void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { int i; for (i = 0; i < width - 1; i += 2) { - uint32 b = src_argb[0]; - uint32 g = src_argb[1]; - uint32 r = src_argb[2]; - uint32 a = src_argb[3]; + uint32_t b = src_argb[0]; + uint32_t g = src_argb[1]; + uint32_t r = src_argb[2]; + uint32_t a = src_argb[3]; dst_argb[0] = ATTENUATE(b, a); dst_argb[1] = ATTENUATE(g, a); dst_argb[2] = ATTENUATE(r, a); @@ -2019,10 +2413,10 @@ void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { } if (width & 1) { - const uint32 b = src_argb[0]; - const uint32 g = src_argb[1]; - const uint32 r = src_argb[2]; - const uint32 a = src_argb[3]; + const uint32_t b = src_argb[0]; + const uint32_t g = src_argb[1]; + const uint32_t r = src_argb[2]; + const uint32_t a = src_argb[3]; dst_argb[0] = ATTENUATE(b, a); dst_argb[1] = ATTENUATE(g, a); dst_argb[2] = ATTENUATE(r, a); @@ -2038,49 +2432,56 @@ void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { // Reciprocal method is off by 1 on some values. ie 125 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower. #define T(a) 0x01000000 + (0x10000 / a) -const uint32 fixed_invtbl8[256] = { - 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07), - T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f), - T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17), - T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f), - T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), - T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), - T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37), - T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f), - T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47), - T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f), - T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57), - T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), - T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), - T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f), - T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77), - T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f), - T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87), - T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f), - T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), - T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), - T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7), - T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf), - T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7), - T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf), - T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7), - T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), - T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), - T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf), - T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7), - T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef), - T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7), - T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 }; +const uint32_t fixed_invtbl8[256] = { + 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), + T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), + T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), + T(0x15), T(0x16), T(0x17), T(0x18), T(0x19), T(0x1a), T(0x1b), + T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x20), T(0x21), T(0x22), + T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), T(0x28), T(0x29), + T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), T(0x30), + T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37), + T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), + T(0x3f), T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), + T(0x46), T(0x47), T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), + T(0x4d), T(0x4e), T(0x4f), T(0x50), T(0x51), T(0x52), T(0x53), + T(0x54), T(0x55), T(0x56), T(0x57), T(0x58), T(0x59), T(0x5a), + T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), T(0x60), T(0x61), + T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), T(0x68), + T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f), + T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), + T(0x77), T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), + T(0x7e), T(0x7f), T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), + T(0x85), T(0x86), T(0x87), T(0x88), T(0x89), T(0x8a), T(0x8b), + T(0x8c), T(0x8d), T(0x8e), T(0x8f), T(0x90), T(0x91), T(0x92), + T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), T(0x98), T(0x99), + T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), T(0xa0), + T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7), + T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), + T(0xaf), T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), + T(0xb6), T(0xb7), T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), + T(0xbd), T(0xbe), T(0xbf), T(0xc0), T(0xc1), T(0xc2), T(0xc3), + T(0xc4), T(0xc5), T(0xc6), T(0xc7), T(0xc8), T(0xc9), T(0xca), + T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), T(0xd0), T(0xd1), + T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), T(0xd8), + T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf), + T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), + T(0xe7), T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), + T(0xee), T(0xef), T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), + T(0xf5), T(0xf6), T(0xf7), T(0xf8), T(0xf9), T(0xfa), T(0xfb), + T(0xfc), T(0xfd), T(0xfe), 0x01000100}; #undef T -void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { +void ARGBUnattenuateRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { int i; for (i = 0; i < width; ++i) { - uint32 b = src_argb[0]; - uint32 g = src_argb[1]; - uint32 r = src_argb[2]; - const uint32 a = src_argb[3]; - const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point + uint32_t b = src_argb[0]; + uint32_t g = src_argb[1]; + uint32_t r = src_argb[2]; + const uint32_t a = src_argb[3]; + const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point b = (b * ia) >> 8; g = (g * ia) >> 8; r = (r * ia) >> 8; @@ -2094,31 +2495,37 @@ void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { } } -void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width) { - int32 row_sum[4] = {0, 0, 0, 0}; +void ComputeCumulativeSumRow_C(const uint8_t* row, + int32_t* cumsum, + const int32_t* previous_cumsum, + int width) { + int32_t row_sum[4] = {0, 0, 0, 0}; int x; for (x = 0; x < width; ++x) { row_sum[0] += row[x * 4 + 0]; row_sum[1] += row[x * 4 + 1]; row_sum[2] += row[x * 4 + 2]; row_sum[3] += row[x * 4 + 3]; - cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0]; - cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1]; - cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2]; - cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3]; + cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0]; + cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1]; + cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2]; + cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3]; } } -void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl, - int w, int area, uint8* dst, int count) { +void CumulativeSumToAverageRow_C(const int32_t* tl, + const int32_t* bl, + int w, + int area, + uint8_t* dst, + int count) { float ooa = 1.0f / area; int i; for (i = 0; i < count; ++i) { - dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); - dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa); - dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa); - dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa); + dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); + dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa); + dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa); + dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa); dst += 4; tl += 4; bl += 4; @@ -2127,8 +2534,11 @@ void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl, // Copy pixels from rotated source to destination row with a slope. LIBYUV_API -void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width) { +void ARGBAffineRow_C(const uint8_t* src_argb, + int src_argb_stride, + uint8_t* dst_argb, + const float* uv_dudv, + int width) { int i; // Render a row of pixels from source into a buffer. float uv[2]; @@ -2137,9 +2547,8 @@ void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, for (i = 0; i < width; ++i) { int x = (int)(uv[0]); int y = (int)(uv[1]); - *(uint32*)(dst_argb) = - *(const uint32*)(src_argb + y * src_argb_stride + - x * 4); + *(uint32_t*)(dst_argb) = + *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4); dst_argb += 4; uv[0] += uv_dudv[2]; uv[1] += uv_dudv[3]; @@ -2147,16 +2556,20 @@ void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, } // Blend 2 rows into 1. -static void HalfRow_C(const uint8* src_uv, ptrdiff_t src_uv_stride, - uint8* dst_uv, int width) { +static void HalfRow_C(const uint8_t* src_uv, + ptrdiff_t src_uv_stride, + uint8_t* dst_uv, + int width) { int x; for (x = 0; x < width; ++x) { dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; } } -static void HalfRow_16_C(const uint16* src_uv, ptrdiff_t src_uv_stride, - uint16* dst_uv, int width) { +static void HalfRow_16_C(const uint16_t* src_uv, + ptrdiff_t src_uv_stride, + uint16_t* dst_uv, + int width) { int x; for (x = 0; x < width; ++x) { dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; @@ -2164,12 +2577,14 @@ static void HalfRow_16_C(const uint16* src_uv, ptrdiff_t src_uv_stride, } // C version 2x2 -> 2x1. -void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, +void InterpolateRow_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, ptrdiff_t src_stride, - int width, int source_y_fraction) { + int width, + int source_y_fraction) { int y1_fraction = source_y_fraction; int y0_fraction = 256 - y1_fraction; - const uint8* src_ptr1 = src_ptr + src_stride; + const uint8_t* src_ptr1 = src_ptr + src_stride; int x; if (y1_fraction == 0) { memcpy(dst_ptr, src_ptr, width); @@ -2194,12 +2609,14 @@ void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, } } -void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr, +void InterpolateRow_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, ptrdiff_t src_stride, - int width, int source_y_fraction) { + int width, + int source_y_fraction) { int y1_fraction = source_y_fraction; int y0_fraction = 256 - y1_fraction; - const uint16* src_ptr1 = src_ptr + src_stride; + const uint16_t* src_ptr1 = src_ptr + src_stride; int x; if (source_y_fraction == 0) { memcpy(dst_ptr, src_ptr, width * 2); @@ -2222,8 +2639,10 @@ void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr, } // Use first 4 shuffler values to reorder ARGB channels. -void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width) { +void ARGBShuffleRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width) { int index0 = shuffler[0]; int index1 = shuffler[1]; int index2 = shuffler[2]; @@ -2232,10 +2651,10 @@ void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, int x; for (x = 0; x < width; ++x) { // To support in-place conversion. - uint8 b = src_argb[index0]; - uint8 g = src_argb[index1]; - uint8 r = src_argb[index2]; - uint8 a = src_argb[index3]; + uint8_t b = src_argb[index0]; + uint8_t g = src_argb[index1]; + uint8_t r = src_argb[index2]; + uint8_t a = src_argb[index3]; dst_argb[0] = b; dst_argb[1] = g; dst_argb[2] = r; @@ -2245,10 +2664,11 @@ void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, } } -void I422ToYUY2Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { +void I422ToYUY2Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_frame, + int width) { int x; for (x = 0; x < width - 1; x += 2) { dst_frame[0] = src_y[0]; @@ -2268,10 +2688,11 @@ void I422ToYUY2Row_C(const uint8* src_y, } } -void I422ToUYVYRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { +void I422ToUYVYRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_frame, + int width) { int x; for (x = 0; x < width - 1; x += 2) { dst_frame[0] = src_u[0]; @@ -2291,9 +2712,8 @@ void I422ToUYVYRow_C(const uint8* src_y, } } - -void ARGBPolynomialRow_C(const uint8* src_argb, - uint8* dst_argb, +void ARGBPolynomialRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, const float* poly, int width) { int i; @@ -2323,33 +2743,75 @@ void ARGBPolynomialRow_C(const uint8* src_argb, dr += poly[14] * r3; da += poly[15] * a3; - dst_argb[0] = Clamp((int32)(db)); - dst_argb[1] = Clamp((int32)(dg)); - dst_argb[2] = Clamp((int32)(dr)); - dst_argb[3] = Clamp((int32)(da)); + dst_argb[0] = Clamp((int32_t)(db)); + dst_argb[1] = Clamp((int32_t)(dg)); + dst_argb[2] = Clamp((int32_t)(dr)); + dst_argb[3] = Clamp((int32_t)(da)); src_argb += 4; dst_argb += 4; } } -void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width, - const uint8* luma, uint32 lumacoeff) { - uint32 bc = lumacoeff & 0xff; - uint32 gc = (lumacoeff >> 8) & 0xff; - uint32 rc = (lumacoeff >> 16) & 0xff; +// Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor +// adjust the source integer range to the half float range desired. + +// This magic constant is 2^-112. Multiplying by this +// is the same as subtracting 112 from the exponent, which +// is the difference in exponent bias between 32-bit and +// 16-bit floats. Once we've done this subtraction, we can +// simply extract the low bits of the exponent and the high +// bits of the mantissa from our float and we're done. + +// Work around GCC 7 punning warning -Wstrict-aliasing +#if defined(__GNUC__) +typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t; +#else +typedef uint32_t uint32_alias_t; +#endif + +void HalfFloatRow_C(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { + int i; + float mult = 1.9259299444e-34f * scale; + for (i = 0; i < width; ++i) { + float value = src[i] * mult; + dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13); + } +} + +void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) { + int i; + for (i = 0; i < width; ++i) { + float value = src[i] * scale; + dst[i] = value; + } +} + +void ARGBLumaColorTableRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + const uint8_t* luma, + uint32_t lumacoeff) { + uint32_t bc = lumacoeff & 0xff; + uint32_t gc = (lumacoeff >> 8) & 0xff; + uint32_t rc = (lumacoeff >> 16) & 0xff; int i; for (i = 0; i < width - 1; i += 2) { // Luminance in rows, color values in columns. - const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc + - src_argb[2] * rc) & 0x7F00u) + luma; - const uint8* luma1; + const uint8_t* luma0 = + ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) + + luma; + const uint8_t* luma1; dst_argb[0] = luma0[src_argb[0]]; dst_argb[1] = luma0[src_argb[1]]; dst_argb[2] = luma0[src_argb[2]]; dst_argb[3] = src_argb[3]; - luma1 = ((src_argb[4] * bc + src_argb[5] * gc + - src_argb[6] * rc) & 0x7F00u) + luma; + luma1 = + ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) + + luma; dst_argb[4] = luma1[src_argb[4]]; dst_argb[5] = luma1[src_argb[5]]; dst_argb[6] = luma1[src_argb[6]]; @@ -2359,8 +2821,9 @@ void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width, } if (width & 1) { // Luminance in rows, color values in columns. - const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc + - src_argb[2] * rc) & 0x7F00u) + luma; + const uint8_t* luma0 = + ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) + + luma; dst_argb[0] = luma0[src_argb[0]]; dst_argb[1] = luma0[src_argb[1]]; dst_argb[2] = luma0[src_argb[2]]; @@ -2368,7 +2831,7 @@ void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width, } } -void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) { +void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) { int i; for (i = 0; i < width - 1; i += 2) { dst[3] = src[3]; @@ -2381,7 +2844,7 @@ void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) { } } -void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width) { +void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) { int i; for (i = 0; i < width - 1; i += 2) { dst_a[0] = src_argb[3]; @@ -2394,7 +2857,7 @@ void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width) { } } -void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) { +void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) { int i; for (i = 0; i < width - 1; i += 2) { dst[3] = src[0]; @@ -2413,13 +2876,13 @@ void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) { #if !(defined(_MSC_VER) && defined(_M_IX86)) && \ defined(HAS_I422TORGB565ROW_SSSE3) // row_win.cc has asm version, but GCC uses 2 step wrapper. -void I422ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); @@ -2434,14 +2897,14 @@ void I422ToRGB565Row_SSSE3(const uint8* src_y, #endif #if defined(HAS_I422TOARGB1555ROW_SSSE3) -void I422ToARGB1555Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); @@ -2456,14 +2919,14 @@ void I422ToARGB1555Row_SSSE3(const uint8* src_y, #endif #if defined(HAS_I422TOARGB4444ROW_SSSE3) -void I422ToARGB4444Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); @@ -2478,13 +2941,13 @@ void I422ToARGB4444Row_SSSE3(const uint8* src_y, #endif #if defined(HAS_NV12TORGB565ROW_SSSE3) -void NV12ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, +void NV12ToRGB565Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth); @@ -2497,14 +2960,102 @@ void NV12ToRGB565Row_SSSE3(const uint8* src_y, } #endif -#if defined(HAS_I422TORGB565ROW_AVX2) -void I422ToRGB565Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +#if defined(HAS_NV12TORGB24ROW_SSSE3) +void NV12ToRGB24Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth); + ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); + src_y += twidth; + src_uv += twidth; + dst_rgb24 += twidth * 3; + width -= twidth; + } +} +#endif + +#if defined(HAS_NV21TORGB24ROW_SSSE3) +void NV21ToRGB24Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth); + ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); + src_y += twidth; + src_vu += twidth; + dst_rgb24 += twidth * 3; + width -= twidth; + } +} +#endif + +#if defined(HAS_NV12TORGB24ROW_AVX2) +void NV12ToRGB24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth); +#if defined(HAS_ARGBTORGB24ROW_AVX2) + ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); +#else + ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); +#endif + src_y += twidth; + src_uv += twidth; + dst_rgb24 += twidth * 3; + width -= twidth; + } +} +#endif + +#if defined(HAS_NV21TORGB24ROW_AVX2) +void NV21ToRGB24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth); +#if defined(HAS_ARGBTORGB24ROW_AVX2) + ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); +#else + ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); +#endif + src_y += twidth; + src_vu += twidth; + dst_rgb24 += twidth * 3; + width -= twidth; + } +} +#endif + +#if defined(HAS_I422TORGB565ROW_AVX2) +void I422ToRGB565Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width) { + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); @@ -2523,14 +3074,14 @@ void I422ToRGB565Row_AVX2(const uint8* src_y, #endif #if defined(HAS_I422TOARGB1555ROW_AVX2) -void I422ToARGB1555Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); @@ -2549,14 +3100,14 @@ void I422ToARGB1555Row_AVX2(const uint8* src_y, #endif #if defined(HAS_I422TOARGB4444ROW_AVX2) -void I422ToARGB4444Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); @@ -2575,19 +3126,22 @@ void I422ToARGB4444Row_AVX2(const uint8* src_y, #endif #if defined(HAS_I422TORGB24ROW_AVX2) -void I422ToRGB24Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, - const struct YuvConstants* yuvconstants, - int width) { +void I422ToRGB24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); - // TODO(fbarchard): ARGBToRGB24Row_AVX2 +#if defined(HAS_ARGBTORGB24ROW_AVX2) + ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); +#else ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); +#endif src_y += twidth; src_u += twidth / 2; src_v += twidth / 2; @@ -2598,13 +3152,13 @@ void I422ToRGB24Row_AVX2(const uint8* src_y, #endif #if defined(HAS_NV12TORGB565ROW_AVX2) -void NV12ToRGB565Row_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, +void NV12ToRGB565Row_AVX2(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]); + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth); @@ -2621,6 +3175,62 @@ void NV12ToRGB565Row_AVX2(const uint8* src_y, } #endif +float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) { + float fsum = 0.f; + int i; +#if defined(__clang__) +#pragma clang loop vectorize_width(4) +#endif + for (i = 0; i < width; ++i) { + float v = *src++; + fsum += v * v; + *dst++ = v * scale; + } + return fsum; +} + +float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) { + float fmax = 0.f; + int i; + for (i = 0; i < width; ++i) { + float v = *src++; + float vs = v * scale; + fmax = (v > fmax) ? v : fmax; + *dst++ = vs; + } + return fmax; +} + +void ScaleSamples_C(const float* src, float* dst, float scale, int width) { + int i; + for (i = 0; i < width; ++i) { + *dst++ = *src++ * scale; + } +} + +void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) { + int i; + for (i = 0; i < width; ++i) { + *dst++ = + (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8; + ++src; + } +} + +// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row. +void GaussCol_C(const uint16_t* src0, + const uint16_t* src1, + const uint16_t* src2, + const uint16_t* src3, + const uint16_t* src4, + uint32_t* dst, + int width) { + int i; + for (i = 0; i < width; ++i) { + *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++; + } +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_gcc.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_gcc.cc index 1ac7ef1aa398..8d3cb81cec2e 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/row_gcc.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/row_gcc.cc @@ -1,4 +1,3 @@ -// VERSION 2 /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * @@ -23,1663 +22,2001 @@ extern "C" { #if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3) // Constants for ARGB -static vec8 kARGBToY = { - 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0 -}; +static const vec8 kARGBToY = {13, 65, 33, 0, 13, 65, 33, 0, + 13, 65, 33, 0, 13, 65, 33, 0}; // JPeg full range. -static vec8 kARGBToYJ = { - 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0 -}; +static const vec8 kARGBToYJ = {15, 75, 38, 0, 15, 75, 38, 0, + 15, 75, 38, 0, 15, 75, 38, 0}; #endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3) -static vec8 kARGBToU = { - 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0 -}; +static const vec8 kARGBToU = {112, -74, -38, 0, 112, -74, -38, 0, + 112, -74, -38, 0, 112, -74, -38, 0}; -static vec8 kARGBToUJ = { - 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0 -}; +static const vec8 kARGBToUJ = {127, -84, -43, 0, 127, -84, -43, 0, + 127, -84, -43, 0, 127, -84, -43, 0}; -static vec8 kARGBToV = { - -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -}; +static const vec8 kARGBToV = {-18, -94, 112, 0, -18, -94, 112, 0, + -18, -94, 112, 0, -18, -94, 112, 0}; -static vec8 kARGBToVJ = { - -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0 -}; +static const vec8 kARGBToVJ = {-20, -107, 127, 0, -20, -107, 127, 0, + -20, -107, 127, 0, -20, -107, 127, 0}; // Constants for BGRA -static vec8 kBGRAToY = { - 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13 -}; +static const vec8 kBGRAToY = {0, 33, 65, 13, 0, 33, 65, 13, + 0, 33, 65, 13, 0, 33, 65, 13}; -static vec8 kBGRAToU = { - 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112 -}; +static const vec8 kBGRAToU = {0, -38, -74, 112, 0, -38, -74, 112, + 0, -38, -74, 112, 0, -38, -74, 112}; -static vec8 kBGRAToV = { - 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18 -}; +static const vec8 kBGRAToV = {0, 112, -94, -18, 0, 112, -94, -18, + 0, 112, -94, -18, 0, 112, -94, -18}; // Constants for ABGR -static vec8 kABGRToY = { - 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0 -}; +static const vec8 kABGRToY = {33, 65, 13, 0, 33, 65, 13, 0, + 33, 65, 13, 0, 33, 65, 13, 0}; -static vec8 kABGRToU = { - -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0 -}; +static const vec8 kABGRToU = {-38, -74, 112, 0, -38, -74, 112, 0, + -38, -74, 112, 0, -38, -74, 112, 0}; -static vec8 kABGRToV = { - 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0 -}; +static const vec8 kABGRToV = {112, -94, -18, 0, 112, -94, -18, 0, + 112, -94, -18, 0, 112, -94, -18, 0}; // Constants for RGBA. -static vec8 kRGBAToY = { - 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33 -}; +static const vec8 kRGBAToY = {0, 13, 65, 33, 0, 13, 65, 33, + 0, 13, 65, 33, 0, 13, 65, 33}; -static vec8 kRGBAToU = { - 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38 -}; +static const vec8 kRGBAToU = {0, 112, -74, -38, 0, 112, -74, -38, + 0, 112, -74, -38, 0, 112, -74, -38}; -static vec8 kRGBAToV = { - 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112 -}; +static const vec8 kRGBAToV = {0, -18, -94, 112, 0, -18, -94, 112, + 0, -18, -94, 112, 0, -18, -94, 112}; -static uvec8 kAddY16 = { - 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u -}; +static const uvec8 kAddY16 = {16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, + 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u}; // 7 bit fixed point 0.5. -static vec16 kAddYJ64 = { - 64, 64, 64, 64, 64, 64, 64, 64 -}; +static const vec16 kAddYJ64 = {64, 64, 64, 64, 64, 64, 64, 64}; -static uvec8 kAddUV128 = { - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u -}; +static const uvec8 kAddUV128 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; -static uvec16 kAddUVJ128 = { - 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u -}; +static const uvec16 kAddUVJ128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u, + 0x8080u, 0x8080u, 0x8080u, 0x8080u}; #endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3) #ifdef HAS_RGB24TOARGBROW_SSSE3 // Shuffle table for converting RGB24 to ARGB. -static uvec8 kShuffleMaskRGB24ToARGB = { - 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u -}; +static const uvec8 kShuffleMaskRGB24ToARGB = { + 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u}; // Shuffle table for converting RAW to ARGB. -static uvec8 kShuffleMaskRAWToARGB = { - 2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u -}; +static const uvec8 kShuffleMaskRAWToARGB = {2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, + 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u}; // Shuffle table for converting RAW to RGB24. First 8. static const uvec8 kShuffleMaskRAWToRGB24_0 = { - 2u, 1u, 0u, 5u, 4u, 3u, 8u, 7u, - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u -}; + 2u, 1u, 0u, 5u, 4u, 3u, 8u, 7u, + 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; // Shuffle table for converting RAW to RGB24. Middle 8. static const uvec8 kShuffleMaskRAWToRGB24_1 = { - 2u, 7u, 6u, 5u, 10u, 9u, 8u, 13u, - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u -}; + 2u, 7u, 6u, 5u, 10u, 9u, 8u, 13u, + 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; // Shuffle table for converting RAW to RGB24. Last 8. static const uvec8 kShuffleMaskRAWToRGB24_2 = { - 8u, 7u, 12u, 11u, 10u, 15u, 14u, 13u, - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u -}; + 8u, 7u, 12u, 11u, 10u, 15u, 14u, 13u, + 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGB to RGB24. -static uvec8 kShuffleMaskARGBToRGB24 = { - 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u -}; +static const uvec8 kShuffleMaskARGBToRGB24 = { + 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGB to RAW. -static uvec8 kShuffleMaskARGBToRAW = { - 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u -}; +static const uvec8 kShuffleMaskARGBToRAW = { + 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4 -static uvec8 kShuffleMaskARGBToRGB24_0 = { - 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u -}; +static const uvec8 kShuffleMaskARGBToRGB24_0 = { + 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u}; // YUY2 shuf 16 Y to 32 Y. -static const lvec8 kShuffleYUY2Y = { - 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, - 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 -}; +static const lvec8 kShuffleYUY2Y = {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, + 10, 12, 12, 14, 14, 0, 0, 2, 2, 4, 4, + 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}; // YUY2 shuf 8 UV to 16 UV. -static const lvec8 kShuffleYUY2UV = { - 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15, - 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15 -}; +static const lvec8 kShuffleYUY2UV = {1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, + 11, 13, 15, 13, 15, 1, 3, 1, 3, 5, 7, + 5, 7, 9, 11, 9, 11, 13, 15, 13, 15}; // UYVY shuf 16 Y to 32 Y. -static const lvec8 kShuffleUYVYY = { - 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15, - 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15 -}; +static const lvec8 kShuffleUYVYY = {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, + 11, 13, 13, 15, 15, 1, 1, 3, 3, 5, 5, + 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}; // UYVY shuf 8 UV to 16 UV. -static const lvec8 kShuffleUYVYUV = { - 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14, - 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 -}; +static const lvec8 kShuffleUYVYUV = {0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, + 10, 12, 14, 12, 14, 0, 2, 0, 2, 4, 6, + 4, 6, 8, 10, 8, 10, 12, 14, 12, 14}; // NV21 shuf 8 VU to 16 UV. static const lvec8 kShuffleNV21 = { - 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, - 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, + 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, + 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, }; #endif // HAS_RGB24TOARGBROW_SSSE3 #ifdef HAS_J400TOARGBROW_SSE2 -void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "pslld $0x18,%%xmm5 \n" - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm0,%%xmm0 \n" - "punpckhwd %%xmm1,%%xmm1 \n" - "por %%xmm5,%%xmm0 \n" - "por %%xmm5,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_y), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm5" - ); +void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width) { + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "pslld $0x18,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movq (%0),%%xmm0 \n" + "lea 0x8(%0),%0 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklwd %%xmm0,%%xmm0 \n" + "punpckhwd %%xmm1,%%xmm1 \n" + "por %%xmm5,%%xmm0 \n" + "por %%xmm5,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src_y), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_J400TOARGBROW_SSE2 #ifdef HAS_RGB24TOARGBROW_SSSE3 -void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000 - "pslld $0x18,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm3 \n" - "lea " MEMLEA(0x30,0) ",%0 \n" - "movdqa %%xmm3,%%xmm2 \n" - "palignr $0x8,%%xmm1,%%xmm2 \n" - "pshufb %%xmm4,%%xmm2 \n" - "por %%xmm5,%%xmm2 \n" - "palignr $0xc,%%xmm0,%%xmm1 \n" - "pshufb %%xmm4,%%xmm0 \n" - "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n" - "por %%xmm5,%%xmm0 \n" - "pshufb %%xmm4,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "por %%xmm5,%%xmm1 \n" - "palignr $0x4,%%xmm3,%%xmm3 \n" - "pshufb %%xmm4,%%xmm3 \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "por %%xmm5,%%xmm3 \n" - "movdqu %%xmm3," MEMACCESS2(0x30,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_rgb24), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleMaskRGB24ToARGB) // %3 - : "memory", "cc" , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width) { + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" // 0xff000000 + "pslld $0x18,%%xmm5 \n" + "movdqa %3,%%xmm4 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm3 \n" + "lea 0x30(%0),%0 \n" + "movdqa %%xmm3,%%xmm2 \n" + "palignr $0x8,%%xmm1,%%xmm2 \n" + "pshufb %%xmm4,%%xmm2 \n" + "por %%xmm5,%%xmm2 \n" + "palignr $0xc,%%xmm0,%%xmm1 \n" + "pshufb %%xmm4,%%xmm0 \n" + "movdqu %%xmm2,0x20(%1) \n" + "por %%xmm5,%%xmm0 \n" + "pshufb %%xmm4,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "por %%xmm5,%%xmm1 \n" + "palignr $0x4,%%xmm3,%%xmm3 \n" + "pshufb %%xmm4,%%xmm3 \n" + "movdqu %%xmm1,0x10(%1) \n" + "por %%xmm5,%%xmm3 \n" + "movdqu %%xmm3,0x30(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_rgb24), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskRGB24ToARGB) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000 - "pslld $0x18,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm3 \n" - "lea " MEMLEA(0x30,0) ",%0 \n" - "movdqa %%xmm3,%%xmm2 \n" - "palignr $0x8,%%xmm1,%%xmm2 \n" - "pshufb %%xmm4,%%xmm2 \n" - "por %%xmm5,%%xmm2 \n" - "palignr $0xc,%%xmm0,%%xmm1 \n" - "pshufb %%xmm4,%%xmm0 \n" - "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n" - "por %%xmm5,%%xmm0 \n" - "pshufb %%xmm4,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "por %%xmm5,%%xmm1 \n" - "palignr $0x4,%%xmm3,%%xmm3 \n" - "pshufb %%xmm4,%%xmm3 \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "por %%xmm5,%%xmm3 \n" - "movdqu %%xmm3," MEMACCESS2(0x30,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_raw), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleMaskRAWToARGB) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width) { + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" // 0xff000000 + "pslld $0x18,%%xmm5 \n" + "movdqa %3,%%xmm4 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm3 \n" + "lea 0x30(%0),%0 \n" + "movdqa %%xmm3,%%xmm2 \n" + "palignr $0x8,%%xmm1,%%xmm2 \n" + "pshufb %%xmm4,%%xmm2 \n" + "por %%xmm5,%%xmm2 \n" + "palignr $0xc,%%xmm0,%%xmm1 \n" + "pshufb %%xmm4,%%xmm0 \n" + "movdqu %%xmm2,0x20(%1) \n" + "por %%xmm5,%%xmm0 \n" + "pshufb %%xmm4,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "por %%xmm5,%%xmm1 \n" + "palignr $0x4,%%xmm3,%%xmm3 \n" + "pshufb %%xmm4,%%xmm3 \n" + "movdqu %%xmm1,0x10(%1) \n" + "por %%xmm5,%%xmm3 \n" + "movdqu %%xmm3,0x30(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_raw), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskRAWToARGB) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width) { - asm volatile ( - "movdqa %3,%%xmm3 \n" - "movdqa %4,%%xmm4 \n" - "movdqa %5,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x4,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x8,0) ",%%xmm2 \n" - "lea " MEMLEA(0x18,0) ",%0 \n" - "pshufb %%xmm3,%%xmm0 \n" - "pshufb %%xmm4,%%xmm1 \n" - "pshufb %%xmm5,%%xmm2 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "movq %%xmm1," MEMACCESS2(0x8,1) " \n" - "movq %%xmm2," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x18,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_raw), // %0 - "+r"(dst_rgb24), // %1 - "+r"(width) // %2 - : "m"(kShuffleMaskRAWToRGB24_0), // %3 - "m"(kShuffleMaskRAWToRGB24_1), // %4 - "m"(kShuffleMaskRAWToRGB24_2) // %5 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, + uint8_t* dst_rgb24, + int width) { + asm volatile( + "movdqa %3,%%xmm3 \n" + "movdqa %4,%%xmm4 \n" + "movdqa %5,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x4(%0),%%xmm1 \n" + "movdqu 0x8(%0),%%xmm2 \n" + "lea 0x18(%0),%0 \n" + "pshufb %%xmm3,%%xmm0 \n" + "pshufb %%xmm4,%%xmm1 \n" + "pshufb %%xmm5,%%xmm2 \n" + "movq %%xmm0,(%1) \n" + "movq %%xmm1,0x8(%1) \n" + "movq %%xmm2,0x10(%1) \n" + "lea 0x18(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src_raw), // %0 + "+r"(dst_rgb24), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskRAWToRGB24_0), // %3 + "m"(kShuffleMaskRAWToRGB24_1), // %4 + "m"(kShuffleMaskRAWToRGB24_2) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "mov $0x1080108,%%eax \n" - "movd %%eax,%%xmm5 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "mov $0x20802080,%%eax \n" - "movd %%eax,%%xmm6 \n" - "pshufd $0x0,%%xmm6,%%xmm6 \n" - "pcmpeqb %%xmm3,%%xmm3 \n" - "psllw $0xb,%%xmm3 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "psllw $0xa,%%xmm4 \n" - "psrlw $0x5,%%xmm4 \n" - "pcmpeqb %%xmm7,%%xmm7 \n" - "psllw $0x8,%%xmm7 \n" - "sub %0,%1 \n" - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "pand %%xmm3,%%xmm1 \n" - "psllw $0xb,%%xmm2 \n" - "pmulhuw %%xmm5,%%xmm1 \n" - "pmulhuw %%xmm5,%%xmm2 \n" - "psllw $0x8,%%xmm1 \n" - "por %%xmm2,%%xmm1 \n" - "pand %%xmm4,%%xmm0 \n" - "pmulhuw %%xmm6,%%xmm0 \n" - "por %%xmm7,%%xmm0 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpckhbw %%xmm0,%%xmm2 \n" - MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2) - MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2) - "lea " MEMLEA(0x10,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc", "eax", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); +void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "mov $0x1080108,%%eax \n" + "movd %%eax,%%xmm5 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "mov $0x20802080,%%eax \n" + "movd %%eax,%%xmm6 \n" + "pshufd $0x0,%%xmm6,%%xmm6 \n" + "pcmpeqb %%xmm3,%%xmm3 \n" + "psllw $0xb,%%xmm3 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "psllw $0xa,%%xmm4 \n" + "psrlw $0x5,%%xmm4 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" + "psllw $0x8,%%xmm7 \n" + "sub %0,%1 \n" + "sub %0,%1 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "pand %%xmm3,%%xmm1 \n" + "psllw $0xb,%%xmm2 \n" + "pmulhuw %%xmm5,%%xmm1 \n" + "pmulhuw %%xmm5,%%xmm2 \n" + "psllw $0x8,%%xmm1 \n" + "por %%xmm2,%%xmm1 \n" + "pand %%xmm4,%%xmm0 \n" + "pmulhuw %%xmm6,%%xmm0 \n" + "por %%xmm7,%%xmm0 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm0,%%xmm1 \n" + "punpckhbw %%xmm0,%%xmm2 \n" + "movdqu %%xmm1,0x00(%1,%0,2) \n" + "movdqu %%xmm2,0x10(%1,%0,2) \n" + "lea 0x10(%0),%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", + "xmm6", "xmm7"); } -void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "mov $0x1080108,%%eax \n" - "movd %%eax,%%xmm5 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "mov $0x42004200,%%eax \n" - "movd %%eax,%%xmm6 \n" - "pshufd $0x0,%%xmm6,%%xmm6 \n" - "pcmpeqb %%xmm3,%%xmm3 \n" - "psllw $0xb,%%xmm3 \n" - "movdqa %%xmm3,%%xmm4 \n" - "psrlw $0x6,%%xmm4 \n" - "pcmpeqb %%xmm7,%%xmm7 \n" - "psllw $0x8,%%xmm7 \n" - "sub %0,%1 \n" - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "psllw $0x1,%%xmm1 \n" - "psllw $0xb,%%xmm2 \n" - "pand %%xmm3,%%xmm1 \n" - "pmulhuw %%xmm5,%%xmm2 \n" - "pmulhuw %%xmm5,%%xmm1 \n" - "psllw $0x8,%%xmm1 \n" - "por %%xmm2,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "pand %%xmm4,%%xmm0 \n" - "psraw $0x8,%%xmm2 \n" - "pmulhuw %%xmm6,%%xmm0 \n" - "pand %%xmm7,%%xmm2 \n" - "por %%xmm2,%%xmm0 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpckhbw %%xmm0,%%xmm2 \n" - MEMOPMEM(movdqu,xmm1,0x00,1,0,2) // movdqu %%xmm1,(%1,%0,2) - MEMOPMEM(movdqu,xmm2,0x10,1,0,2) // movdqu %%xmm2,0x10(%1,%0,2) - "lea " MEMLEA(0x10,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc", "eax", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); +void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "mov $0x1080108,%%eax \n" + "movd %%eax,%%xmm5 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "mov $0x42004200,%%eax \n" + "movd %%eax,%%xmm6 \n" + "pshufd $0x0,%%xmm6,%%xmm6 \n" + "pcmpeqb %%xmm3,%%xmm3 \n" + "psllw $0xb,%%xmm3 \n" + "movdqa %%xmm3,%%xmm4 \n" + "psrlw $0x6,%%xmm4 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" + "psllw $0x8,%%xmm7 \n" + "sub %0,%1 \n" + "sub %0,%1 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "psllw $0x1,%%xmm1 \n" + "psllw $0xb,%%xmm2 \n" + "pand %%xmm3,%%xmm1 \n" + "pmulhuw %%xmm5,%%xmm2 \n" + "pmulhuw %%xmm5,%%xmm1 \n" + "psllw $0x8,%%xmm1 \n" + "por %%xmm2,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "pand %%xmm4,%%xmm0 \n" + "psraw $0x8,%%xmm2 \n" + "pmulhuw %%xmm6,%%xmm0 \n" + "pand %%xmm7,%%xmm2 \n" + "por %%xmm2,%%xmm0 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm0,%%xmm1 \n" + "punpckhbw %%xmm0,%%xmm2 \n" + "movdqu %%xmm1,0x00(%1,%0,2) \n" + "movdqu %%xmm2,0x10(%1,%0,2) \n" + "lea 0x10(%0),%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", + "xmm6", "xmm7"); } -void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "mov $0xf0f0f0f,%%eax \n" - "movd %%eax,%%xmm4 \n" - "pshufd $0x0,%%xmm4,%%xmm4 \n" - "movdqa %%xmm4,%%xmm5 \n" - "pslld $0x4,%%xmm5 \n" - "sub %0,%1 \n" - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "pand %%xmm4,%%xmm0 \n" - "pand %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm3 \n" - "psllw $0x4,%%xmm1 \n" - "psrlw $0x4,%%xmm3 \n" - "por %%xmm1,%%xmm0 \n" - "por %%xmm3,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm0 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - MEMOPMEM(movdqu,xmm0,0x00,1,0,2) // movdqu %%xmm0,(%1,%0,2) - MEMOPMEM(movdqu,xmm1,0x10,1,0,2) // movdqu %%xmm1,0x10(%1,%0,2) - "lea " MEMLEA(0x10,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc", "eax", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "mov $0xf0f0f0f,%%eax \n" + "movd %%eax,%%xmm4 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + "movdqa %%xmm4,%%xmm5 \n" + "pslld $0x4,%%xmm5 \n" + "sub %0,%1 \n" + "sub %0,%1 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "pand %%xmm4,%%xmm0 \n" + "pand %%xmm5,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm3 \n" + "psllw $0x4,%%xmm1 \n" + "psrlw $0x4,%%xmm3 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm3,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm2,%%xmm0 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "movdqu %%xmm0,0x00(%1,%0,2) \n" + "movdqu %%xmm1,0x10(%1,%0,2) \n" + "lea 0x10(%0),%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int width) { - asm volatile ( - "movdqa %3,%%xmm6 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "pshufb %%xmm6,%%xmm0 \n" - "pshufb %%xmm6,%%xmm1 \n" - "pshufb %%xmm6,%%xmm2 \n" - "pshufb %%xmm6,%%xmm3 \n" - "movdqa %%xmm1,%%xmm4 \n" - "psrldq $0x4,%%xmm1 \n" - "pslldq $0xc,%%xmm4 \n" - "movdqa %%xmm2,%%xmm5 \n" - "por %%xmm4,%%xmm0 \n" - "pslldq $0x8,%%xmm5 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "por %%xmm5,%%xmm1 \n" - "psrldq $0x8,%%xmm2 \n" - "pslldq $0x4,%%xmm3 \n" - "por %%xmm3,%%xmm2 \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x30,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "m"(kShuffleMaskARGBToRGB24) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); +void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + + "movdqa %3,%%xmm6 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "pshufb %%xmm6,%%xmm0 \n" + "pshufb %%xmm6,%%xmm1 \n" + "pshufb %%xmm6,%%xmm2 \n" + "pshufb %%xmm6,%%xmm3 \n" + "movdqa %%xmm1,%%xmm4 \n" + "psrldq $0x4,%%xmm1 \n" + "pslldq $0xc,%%xmm4 \n" + "movdqa %%xmm2,%%xmm5 \n" + "por %%xmm4,%%xmm0 \n" + "pslldq $0x8,%%xmm5 \n" + "movdqu %%xmm0,(%1) \n" + "por %%xmm5,%%xmm1 \n" + "psrldq $0x8,%%xmm2 \n" + "pslldq $0x4,%%xmm3 \n" + "por %%xmm3,%%xmm2 \n" + "movdqu %%xmm1,0x10(%1) \n" + "movdqu %%xmm2,0x20(%1) \n" + "lea 0x30(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskARGBToRGB24) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } -void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int width) { - asm volatile ( - "movdqa %3,%%xmm6 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "pshufb %%xmm6,%%xmm0 \n" - "pshufb %%xmm6,%%xmm1 \n" - "pshufb %%xmm6,%%xmm2 \n" - "pshufb %%xmm6,%%xmm3 \n" - "movdqa %%xmm1,%%xmm4 \n" - "psrldq $0x4,%%xmm1 \n" - "pslldq $0xc,%%xmm4 \n" - "movdqa %%xmm2,%%xmm5 \n" - "por %%xmm4,%%xmm0 \n" - "pslldq $0x8,%%xmm5 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "por %%xmm5,%%xmm1 \n" - "psrldq $0x8,%%xmm2 \n" - "pslldq $0x4,%%xmm3 \n" - "por %%xmm3,%%xmm2 \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x30,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "m"(kShuffleMaskARGBToRAW) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); +void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + + "movdqa %3,%%xmm6 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "pshufb %%xmm6,%%xmm0 \n" + "pshufb %%xmm6,%%xmm1 \n" + "pshufb %%xmm6,%%xmm2 \n" + "pshufb %%xmm6,%%xmm3 \n" + "movdqa %%xmm1,%%xmm4 \n" + "psrldq $0x4,%%xmm1 \n" + "pslldq $0xc,%%xmm4 \n" + "movdqa %%xmm2,%%xmm5 \n" + "por %%xmm4,%%xmm0 \n" + "pslldq $0x8,%%xmm5 \n" + "movdqu %%xmm0,(%1) \n" + "por %%xmm5,%%xmm1 \n" + "psrldq $0x8,%%xmm2 \n" + "pslldq $0x4,%%xmm3 \n" + "por %%xmm3,%%xmm2 \n" + "movdqu %%xmm1,0x10(%1) \n" + "movdqu %%xmm2,0x20(%1) \n" + "lea 0x30(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskARGBToRAW) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } -void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "pcmpeqb %%xmm3,%%xmm3 \n" - "psrld $0x1b,%%xmm3 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "psrld $0x1a,%%xmm4 \n" - "pslld $0x5,%%xmm4 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pslld $0xb,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "pslld $0x8,%%xmm0 \n" - "psrld $0x3,%%xmm1 \n" - "psrld $0x5,%%xmm2 \n" - "psrad $0x10,%%xmm0 \n" - "pand %%xmm3,%%xmm1 \n" - "pand %%xmm4,%%xmm2 \n" - "pand %%xmm5,%%xmm0 \n" - "por %%xmm2,%%xmm1 \n" - "por %%xmm1,%%xmm0 \n" - "packssdw %%xmm0,%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +#ifdef HAS_ARGBTORGB24ROW_AVX2 +// vpermd for 12+12 to 24 +static const lvec32 kPermdRGB24_AVX = {0, 1, 2, 4, 5, 6, 3, 7}; + +void ARGBToRGB24Row_AVX2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "vbroadcastf128 %3,%%ymm6 \n" + "vmovdqa %4,%%ymm7 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x40(%0),%%ymm2 \n" + "vmovdqu 0x60(%0),%%ymm3 \n" + "lea 0x80(%0),%0 \n" + "vpshufb %%ymm6,%%ymm0,%%ymm0 \n" // xxx0yyy0 + "vpshufb %%ymm6,%%ymm1,%%ymm1 \n" + "vpshufb %%ymm6,%%ymm2,%%ymm2 \n" + "vpshufb %%ymm6,%%ymm3,%%ymm3 \n" + "vpermd %%ymm0,%%ymm7,%%ymm0 \n" // pack to 24 bytes + "vpermd %%ymm1,%%ymm7,%%ymm1 \n" + "vpermd %%ymm2,%%ymm7,%%ymm2 \n" + "vpermd %%ymm3,%%ymm7,%%ymm3 \n" + "vpermq $0x3f,%%ymm1,%%ymm4 \n" // combine 24 + 8 + "vpor %%ymm4,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "vpermq $0xf9,%%ymm1,%%ymm1 \n" // combine 16 + 16 + "vpermq $0x4f,%%ymm2,%%ymm4 \n" + "vpor %%ymm4,%%ymm1,%%ymm1 \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "vpermq $0xfe,%%ymm2,%%ymm2 \n" // combine 8 + 24 + "vpermq $0x93,%%ymm3,%%ymm3 \n" + "vpor %%ymm3,%%ymm2,%%ymm2 \n" + "vmovdqu %%ymm2,0x40(%1) \n" + "lea 0x60(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskARGBToRGB24), // %3 + "m"(kPermdRGB24_AVX) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); +} +#endif + +#ifdef HAS_ARGBTORGB24ROW_AVX512VBMI +// Shuffle table for converting ARGBToRGB24 +static const ulvec8 kPermARGBToRGB24_0 = { + 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, + 14u, 16u, 17u, 18u, 20u, 21u, 22u, 24u, 25u, 26u, 28u, + 29u, 30u, 32u, 33u, 34u, 36u, 37u, 38u, 40u, 41u}; +static const ulvec8 kPermARGBToRGB24_1 = { + 10u, 12u, 13u, 14u, 16u, 17u, 18u, 20u, 21u, 22u, 24u, + 25u, 26u, 28u, 29u, 30u, 32u, 33u, 34u, 36u, 37u, 38u, + 40u, 41u, 42u, 44u, 45u, 46u, 48u, 49u, 50u, 52u}; +static const ulvec8 kPermARGBToRGB24_2 = { + 21u, 22u, 24u, 25u, 26u, 28u, 29u, 30u, 32u, 33u, 34u, + 36u, 37u, 38u, 40u, 41u, 42u, 44u, 45u, 46u, 48u, 49u, + 50u, 52u, 53u, 54u, 56u, 57u, 58u, 60u, 61u, 62u}; + +void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "vmovdqa %3,%%ymm5 \n" + "vmovdqa %4,%%ymm6 \n" + "vmovdqa %5,%%ymm7 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x40(%0),%%ymm2 \n" + "vmovdqu 0x60(%0),%%ymm3 \n" + "lea 0x80(%0),%0 \n" + "vpermt2b %%ymm1,%%ymm5,%%ymm0 \n" + "vpermt2b %%ymm2,%%ymm6,%%ymm1 \n" + "vpermt2b %%ymm3,%%ymm7,%%ymm2 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "vmovdqu %%ymm2,0x40(%1) \n" + "lea 0x60(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kPermARGBToRGB24_0), // %3 + "m"(kPermARGBToRGB24_1), // %4 + "m"(kPermARGBToRGB24_2) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6", "xmm7"); +} +#endif + +#ifdef HAS_ARGBTORAWROW_AVX2 +void ARGBToRAWRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "vbroadcastf128 %3,%%ymm6 \n" + "vmovdqa %4,%%ymm7 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x40(%0),%%ymm2 \n" + "vmovdqu 0x60(%0),%%ymm3 \n" + "lea 0x80(%0),%0 \n" + "vpshufb %%ymm6,%%ymm0,%%ymm0 \n" // xxx0yyy0 + "vpshufb %%ymm6,%%ymm1,%%ymm1 \n" + "vpshufb %%ymm6,%%ymm2,%%ymm2 \n" + "vpshufb %%ymm6,%%ymm3,%%ymm3 \n" + "vpermd %%ymm0,%%ymm7,%%ymm0 \n" // pack to 24 bytes + "vpermd %%ymm1,%%ymm7,%%ymm1 \n" + "vpermd %%ymm2,%%ymm7,%%ymm2 \n" + "vpermd %%ymm3,%%ymm7,%%ymm3 \n" + "vpermq $0x3f,%%ymm1,%%ymm4 \n" // combine 24 + 8 + "vpor %%ymm4,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "vpermq $0xf9,%%ymm1,%%ymm1 \n" // combine 16 + 16 + "vpermq $0x4f,%%ymm2,%%ymm4 \n" + "vpor %%ymm4,%%ymm1,%%ymm1 \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "vpermq $0xfe,%%ymm2,%%ymm2 \n" // combine 8 + 24 + "vpermq $0x93,%%ymm3,%%ymm3 \n" + "vpor %%ymm3,%%ymm2,%%ymm2 \n" + "vmovdqu %%ymm2,0x40(%1) \n" + "lea 0x60(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskARGBToRAW), // %3 + "m"(kPermdRGB24_AVX) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); +} +#endif + +void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "pcmpeqb %%xmm3,%%xmm3 \n" + "psrld $0x1b,%%xmm3 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "psrld $0x1a,%%xmm4 \n" + "pslld $0x5,%%xmm4 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "pslld $0xb,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "pslld $0x8,%%xmm0 \n" + "psrld $0x3,%%xmm1 \n" + "psrld $0x5,%%xmm2 \n" + "psrad $0x10,%%xmm0 \n" + "pand %%xmm3,%%xmm1 \n" + "pand %%xmm4,%%xmm2 \n" + "pand %%xmm5,%%xmm0 \n" + "por %%xmm2,%%xmm1 \n" + "por %%xmm1,%%xmm0 \n" + "packssdw %%xmm0,%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void ARGBToRGB565DitherRow_SSE2(const uint8* src, uint8* dst, - const uint32 dither4, int width) { - asm volatile ( - "movd %3,%%xmm6 \n" - "punpcklbw %%xmm6,%%xmm6 \n" - "movdqa %%xmm6,%%xmm7 \n" - "punpcklwd %%xmm6,%%xmm6 \n" - "punpckhwd %%xmm7,%%xmm7 \n" - "pcmpeqb %%xmm3,%%xmm3 \n" - "psrld $0x1b,%%xmm3 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "psrld $0x1a,%%xmm4 \n" - "pslld $0x5,%%xmm4 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pslld $0xb,%%xmm5 \n" +void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, + uint8_t* dst, + const uint32_t dither4, + int width) { + asm volatile( + "movd %3,%%xmm6 \n" + "punpcklbw %%xmm6,%%xmm6 \n" + "movdqa %%xmm6,%%xmm7 \n" + "punpcklwd %%xmm6,%%xmm6 \n" + "punpckhwd %%xmm7,%%xmm7 \n" + "pcmpeqb %%xmm3,%%xmm3 \n" + "psrld $0x1b,%%xmm3 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "psrld $0x1a,%%xmm4 \n" + "pslld $0x5,%%xmm4 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "pslld $0xb,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu (%0),%%xmm0 \n" - "paddusb %%xmm6,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "pslld $0x8,%%xmm0 \n" - "psrld $0x3,%%xmm1 \n" - "psrld $0x5,%%xmm2 \n" - "psrad $0x10,%%xmm0 \n" - "pand %%xmm3,%%xmm1 \n" - "pand %%xmm4,%%xmm2 \n" - "pand %%xmm5,%%xmm0 \n" - "por %%xmm2,%%xmm1 \n" - "por %%xmm1,%%xmm0 \n" - "packssdw %%xmm0,%%xmm0 \n" - "lea 0x10(%0),%0 \n" - "movq %%xmm0,(%1) \n" - "lea 0x8(%1),%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "m"(dither4) // %3 - : "memory", "cc", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "paddusb %%xmm6,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "pslld $0x8,%%xmm0 \n" + "psrld $0x3,%%xmm1 \n" + "psrld $0x5,%%xmm2 \n" + "psrad $0x10,%%xmm0 \n" + "pand %%xmm3,%%xmm1 \n" + "pand %%xmm4,%%xmm2 \n" + "pand %%xmm5,%%xmm0 \n" + "por %%xmm2,%%xmm1 \n" + "por %%xmm1,%%xmm0 \n" + "packssdw %%xmm0,%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(dither4) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #ifdef HAS_ARGBTORGB565DITHERROW_AVX2 -void ARGBToRGB565DitherRow_AVX2(const uint8* src, uint8* dst, - const uint32 dither4, int width) { - asm volatile ( - "vbroadcastss %3,%%xmm6 \n" - "vpunpcklbw %%xmm6,%%xmm6,%%xmm6 \n" - "vpermq $0xd8,%%ymm6,%%ymm6 \n" - "vpunpcklwd %%ymm6,%%ymm6,%%ymm6 \n" - "vpcmpeqb %%ymm3,%%ymm3,%%ymm3 \n" - "vpsrld $0x1b,%%ymm3,%%ymm3 \n" - "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" - "vpsrld $0x1a,%%ymm4,%%ymm4 \n" - "vpslld $0x5,%%ymm4,%%ymm4 \n" - "vpslld $0xb,%%ymm3,%%ymm5 \n" +void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, + uint8_t* dst, + const uint32_t dither4, + int width) { + asm volatile( + "vbroadcastss %3,%%xmm6 \n" + "vpunpcklbw %%xmm6,%%xmm6,%%xmm6 \n" + "vpermq $0xd8,%%ymm6,%%ymm6 \n" + "vpunpcklwd %%ymm6,%%ymm6,%%ymm6 \n" + "vpcmpeqb %%ymm3,%%ymm3,%%ymm3 \n" + "vpsrld $0x1b,%%ymm3,%%ymm3 \n" + "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" + "vpsrld $0x1a,%%ymm4,%%ymm4 \n" + "vpslld $0x5,%%ymm4,%%ymm4 \n" + "vpslld $0xb,%%ymm3,%%ymm5 \n" - LABELALIGN - "1: \n" - "vmovdqu (%0),%%ymm0 \n" - "vpaddusb %%ymm6,%%ymm0,%%ymm0 \n" - "vpsrld $0x5,%%ymm0,%%ymm2 \n" - "vpsrld $0x3,%%ymm0,%%ymm1 \n" - "vpsrld $0x8,%%ymm0,%%ymm0 \n" - "vpand %%ymm4,%%ymm2,%%ymm2 \n" - "vpand %%ymm3,%%ymm1,%%ymm1 \n" - "vpand %%ymm5,%%ymm0,%%ymm0 \n" - "vpor %%ymm2,%%ymm1,%%ymm1 \n" - "vpor %%ymm1,%%ymm0,%%ymm0 \n" - "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "lea 0x20(%0),%0 \n" - "vmovdqu %%xmm0,(%1) \n" - "lea 0x10(%1),%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "m"(dither4) // %3 - : "memory", "cc", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vpaddusb %%ymm6,%%ymm0,%%ymm0 \n" + "vpsrld $0x5,%%ymm0,%%ymm2 \n" + "vpsrld $0x3,%%ymm0,%%ymm1 \n" + "vpsrld $0x8,%%ymm0,%%ymm0 \n" + "vpand %%ymm4,%%ymm2,%%ymm2 \n" + "vpand %%ymm3,%%ymm1,%%ymm1 \n" + "vpand %%ymm5,%%ymm0,%%ymm0 \n" + "vpor %%ymm2,%%ymm1,%%ymm1 \n" + "vpor %%ymm1,%%ymm0,%%ymm0 \n" + "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "lea 0x20(%0),%0 \n" + "vmovdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(dither4) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBTORGB565DITHERROW_AVX2 +void ARGBToARGB1555Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "pcmpeqb %%xmm4,%%xmm4 \n" + "psrld $0x1b,%%xmm4 \n" + "movdqa %%xmm4,%%xmm5 \n" + "pslld $0x5,%%xmm5 \n" + "movdqa %%xmm4,%%xmm6 \n" + "pslld $0xa,%%xmm6 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" + "pslld $0xf,%%xmm7 \n" -void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "pcmpeqb %%xmm4,%%xmm4 \n" - "psrld $0x1b,%%xmm4 \n" - "movdqa %%xmm4,%%xmm5 \n" - "pslld $0x5,%%xmm5 \n" - "movdqa %%xmm4,%%xmm6 \n" - "pslld $0xa,%%xmm6 \n" - "pcmpeqb %%xmm7,%%xmm7 \n" - "pslld $0xf,%%xmm7 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "movdqa %%xmm0,%%xmm3 \n" - "psrad $0x10,%%xmm0 \n" - "psrld $0x3,%%xmm1 \n" - "psrld $0x6,%%xmm2 \n" - "psrld $0x9,%%xmm3 \n" - "pand %%xmm7,%%xmm0 \n" - "pand %%xmm4,%%xmm1 \n" - "pand %%xmm5,%%xmm2 \n" - "pand %%xmm6,%%xmm3 \n" - "por %%xmm1,%%xmm0 \n" - "por %%xmm3,%%xmm2 \n" - "por %%xmm2,%%xmm0 \n" - "packssdw %%xmm0,%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - :: "memory", "cc", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "movdqa %%xmm0,%%xmm3 \n" + "psrad $0x10,%%xmm0 \n" + "psrld $0x3,%%xmm1 \n" + "psrld $0x6,%%xmm2 \n" + "psrld $0x9,%%xmm3 \n" + "pand %%xmm7,%%xmm0 \n" + "pand %%xmm4,%%xmm1 \n" + "pand %%xmm5,%%xmm2 \n" + "pand %%xmm6,%%xmm3 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm3,%%xmm2 \n" + "por %%xmm2,%%xmm0 \n" + "packssdw %%xmm0,%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } -void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "pcmpeqb %%xmm4,%%xmm4 \n" - "psllw $0xc,%%xmm4 \n" - "movdqa %%xmm4,%%xmm3 \n" - "psrlw $0x8,%%xmm3 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm3,%%xmm0 \n" - "pand %%xmm4,%%xmm1 \n" - "psrlq $0x4,%%xmm0 \n" - "psrlq $0x8,%%xmm1 \n" - "por %%xmm1,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" - ); +void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "pcmpeqb %%xmm4,%%xmm4 \n" + "psllw $0xc,%%xmm4 \n" + "movdqa %%xmm4,%%xmm3 \n" + "psrlw $0x8,%%xmm3 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "pand %%xmm3,%%xmm0 \n" + "pand %%xmm4,%%xmm1 \n" + "psrlq $0x4,%%xmm0 \n" + "psrlq $0x8,%%xmm1 \n" + "por %%xmm1,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); } #endif // HAS_RGB24TOARGBROW_SSSE3 +/* + +ARGBToAR30Row: + +Red Blue +With the 8 bit value in the upper bits of a short, vpmulhuw by (1024+4) will +produce a 10 bit value in the low 10 bits of each 16 bit value. This is whats +wanted for the blue channel. The red needs to be shifted 4 left, so multiply by +(1024+4)*16 for red. + +Alpha Green +Alpha and Green are already in the high bits so vpand can zero out the other +bits, keeping just 2 upper bits of alpha and 8 bit green. The same multiplier +could be used for Green - (1024+4) putting the 10 bit green in the lsb. Alpha +would be a simple multiplier to shift it into position. It wants a gap of 10 +above the green. Green is 10 bits, so there are 6 bits in the low short. 4 +more are needed, so a multiplier of 4 gets the 2 bits into the upper 16 bits, +and then a shift of 4 is a multiply of 16, so (4*16) = 64. Then shift the +result left 10 to position the A and G channels. +*/ + +// Shuffle table for converting RAW to RGB24. Last 8. +static const uvec8 kShuffleRB30 = {128u, 0u, 128u, 2u, 128u, 4u, 128u, 6u, + 128u, 8u, 128u, 10u, 128u, 12u, 128u, 14u}; + +static const uvec8 kShuffleBR30 = {128u, 2u, 128u, 0u, 128u, 6u, 128u, 4u, + 128u, 10u, 128u, 8u, 128u, 14u, 128u, 12u}; + +static const uint32_t kMulRB10 = 1028 * 16 * 65536 + 1028; +static const uint32_t kMaskRB10 = 0x3ff003ff; +static const uint32_t kMaskAG10 = 0xc000ff00; +static const uint32_t kMulAG10 = 64 * 65536 + 1028; + +void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "movdqa %3,%%xmm2 \n" // shuffler for RB + "movd %4,%%xmm3 \n" // multipler for RB + "movd %5,%%xmm4 \n" // mask for R10 B10 + "movd %6,%%xmm5 \n" // mask for AG + "movd %7,%%xmm6 \n" // multipler for AG + "pshufd $0x0,%%xmm3,%%xmm3 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "pshufd $0x0,%%xmm6,%%xmm6 \n" + "sub %0,%1 \n" + + "1: \n" + "movdqu (%0),%%xmm0 \n" // fetch 4 ARGB pixels + "movdqa %%xmm0,%%xmm1 \n" + "pshufb %%xmm2,%%xmm1 \n" // R0B0 + "pand %%xmm5,%%xmm0 \n" // A0G0 + "pmulhuw %%xmm3,%%xmm1 \n" // X2 R16 X4 B10 + "pmulhuw %%xmm6,%%xmm0 \n" // X10 A2 X10 G10 + "pand %%xmm4,%%xmm1 \n" // X2 R10 X10 B10 + "pslld $10,%%xmm0 \n" // A2 x10 G10 x10 + "por %%xmm1,%%xmm0 \n" // A2 R10 G10 B10 + "movdqu %%xmm0,(%1,%0) \n" // store 4 AR30 pixels + "add $0x10,%0 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleRB30), // %3 + "m"(kMulRB10), // %4 + "m"(kMaskRB10), // %5 + "m"(kMaskAG10), // %6 + "m"(kMulAG10) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); +} + +void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "movdqa %3,%%xmm2 \n" // shuffler for RB + "movd %4,%%xmm3 \n" // multipler for RB + "movd %5,%%xmm4 \n" // mask for R10 B10 + "movd %6,%%xmm5 \n" // mask for AG + "movd %7,%%xmm6 \n" // multipler for AG + "pshufd $0x0,%%xmm3,%%xmm3 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "pshufd $0x0,%%xmm6,%%xmm6 \n" + "sub %0,%1 \n" + + "1: \n" + "movdqu (%0),%%xmm0 \n" // fetch 4 ABGR pixels + "movdqa %%xmm0,%%xmm1 \n" + "pshufb %%xmm2,%%xmm1 \n" // R0B0 + "pand %%xmm5,%%xmm0 \n" // A0G0 + "pmulhuw %%xmm3,%%xmm1 \n" // X2 R16 X4 B10 + "pmulhuw %%xmm6,%%xmm0 \n" // X10 A2 X10 G10 + "pand %%xmm4,%%xmm1 \n" // X2 R10 X10 B10 + "pslld $10,%%xmm0 \n" // A2 x10 G10 x10 + "por %%xmm1,%%xmm0 \n" // A2 R10 G10 B10 + "movdqu %%xmm0,(%1,%0) \n" // store 4 AR30 pixels + "add $0x10,%0 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleBR30), // %3 reversed shuffler + "m"(kMulRB10), // %4 + "m"(kMaskRB10), // %5 + "m"(kMaskAG10), // %6 + "m"(kMulAG10) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); +} + +#ifdef HAS_ARGBTOAR30ROW_AVX2 +void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "vbroadcastf128 %3,%%ymm2 \n" // shuffler for RB + "vbroadcastss %4,%%ymm3 \n" // multipler for RB + "vbroadcastss %5,%%ymm4 \n" // mask for R10 B10 + "vbroadcastss %6,%%ymm5 \n" // mask for AG + "vbroadcastss %7,%%ymm6 \n" // multipler for AG + "sub %0,%1 \n" + + "1: \n" + "vmovdqu (%0),%%ymm0 \n" // fetch 8 ARGB pixels + "vpshufb %%ymm2,%%ymm0,%%ymm1 \n" // R0B0 + "vpand %%ymm5,%%ymm0,%%ymm0 \n" // A0G0 + "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" // X2 R16 X4 B10 + "vpmulhuw %%ymm6,%%ymm0,%%ymm0 \n" // X10 A2 X10 G10 + "vpand %%ymm4,%%ymm1,%%ymm1 \n" // X2 R10 X10 B10 + "vpslld $10,%%ymm0,%%ymm0 \n" // A2 x10 G10 x10 + "vpor %%ymm1,%%ymm0,%%ymm0 \n" // A2 R10 G10 B10 + "vmovdqu %%ymm0,(%1,%0) \n" // store 8 AR30 pixels + "add $0x20,%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" + + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleRB30), // %3 + "m"(kMulRB10), // %4 + "m"(kMaskRB10), // %5 + "m"(kMaskAG10), // %6 + "m"(kMulAG10) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); +} +#endif + +#ifdef HAS_ABGRTOAR30ROW_AVX2 +void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "vbroadcastf128 %3,%%ymm2 \n" // shuffler for RB + "vbroadcastss %4,%%ymm3 \n" // multipler for RB + "vbroadcastss %5,%%ymm4 \n" // mask for R10 B10 + "vbroadcastss %6,%%ymm5 \n" // mask for AG + "vbroadcastss %7,%%ymm6 \n" // multipler for AG + "sub %0,%1 \n" + + "1: \n" + "vmovdqu (%0),%%ymm0 \n" // fetch 8 ABGR pixels + "vpshufb %%ymm2,%%ymm0,%%ymm1 \n" // R0B0 + "vpand %%ymm5,%%ymm0,%%ymm0 \n" // A0G0 + "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" // X2 R16 X4 B10 + "vpmulhuw %%ymm6,%%ymm0,%%ymm0 \n" // X10 A2 X10 G10 + "vpand %%ymm4,%%ymm1,%%ymm1 \n" // X2 R10 X10 B10 + "vpslld $10,%%ymm0,%%ymm0 \n" // A2 x10 G10 x10 + "vpor %%ymm1,%%ymm0,%%ymm0 \n" // A2 R10 G10 B10 + "vmovdqu %%ymm0,(%1,%0) \n" // store 8 AR30 pixels + "add $0x20,%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" + + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(kShuffleBR30), // %3 reversed shuffler + "m"(kMulRB10), // %4 + "m"(kMaskRB10), // %5 + "m"(kMaskAG10), // %6 + "m"(kMulAG10) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); +} +#endif + #ifdef HAS_ARGBTOYROW_SSSE3 // Convert 16 ARGB pixels (64 bytes) to 16 Y values. -void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { - asm volatile ( - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "m"(kARGBToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { + asm volatile( + "movdqa %3,%%xmm4 \n" + "movdqa %4,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "phaddw %%xmm1,%%xmm0 \n" + "phaddw %%xmm3,%%xmm2 \n" + "psrlw $0x7,%%xmm0 \n" + "psrlw $0x7,%%xmm2 \n" + "packuswb %%xmm2,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kARGBToY), // %3 + "m"(kAddY16) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYJROW_SSSE3 // Convert 16 ARGB pixels (64 bytes) to 16 YJ values. // Same as ARGBToYRow but different coefficients, no add 16, but do rounding. -void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { - asm volatile ( - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "paddw %%xmm5,%%xmm0 \n" - "paddw %%xmm5,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "m"(kARGBToYJ), // %3 - "m"(kAddYJ64) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { + asm volatile( + "movdqa %3,%%xmm4 \n" + "movdqa %4,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "phaddw %%xmm1,%%xmm0 \n" + "phaddw %%xmm3,%%xmm2 \n" + "paddw %%xmm5,%%xmm0 \n" + "paddw %%xmm5,%%xmm2 \n" + "psrlw $0x7,%%xmm0 \n" + "psrlw $0x7,%%xmm2 \n" + "packuswb %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kARGBToYJ), // %3 + "m"(kAddYJ64) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBTOYJROW_SSSE3 #ifdef HAS_ARGBTOYROW_AVX2 // vpermd for vphaddw + vpackuswb vpermd. -static const lvec32 kPermdARGBToY_AVX = { - 0, 4, 1, 5, 2, 6, 3, 7 -}; +static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7}; // Convert 32 ARGB pixels (128 bytes) to 32 Y values. -void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) { - asm volatile ( - "vbroadcastf128 %3,%%ymm4 \n" - "vbroadcastf128 %4,%%ymm5 \n" - "vmovdqu %5,%%ymm6 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n" - "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n" - "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" - "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" - "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" - "lea " MEMLEA(0x80,0) ",%0 \n" - "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates. - "vphaddw %%ymm3,%%ymm2,%%ymm2 \n" - "vpsrlw $0x7,%%ymm0,%%ymm0 \n" - "vpsrlw $0x7,%%ymm2,%%ymm2 \n" - "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. - "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate. - "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" // add 16 for Y - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "m"(kARGBToY), // %3 - "m"(kAddY16), // %4 - "m"(kPermdARGBToY_AVX) // %5 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); +void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) { + asm volatile( + "vbroadcastf128 %3,%%ymm4 \n" + "vbroadcastf128 %4,%%ymm5 \n" + "vmovdqu %5,%%ymm6 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x40(%0),%%ymm2 \n" + "vmovdqu 0x60(%0),%%ymm3 \n" + "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" + "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" + "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" + "lea 0x80(%0),%0 \n" + "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates. + "vphaddw %%ymm3,%%ymm2,%%ymm2 \n" + "vpsrlw $0x7,%%ymm0,%%ymm0 \n" + "vpsrlw $0x7,%%ymm2,%%ymm2 \n" + "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. + "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate. + "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" // add 16 for Y + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kARGBToY), // %3 + "m"(kAddY16), // %4 + "m"(kPermdARGBToY_AVX) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBTOYROW_AVX2 #ifdef HAS_ARGBTOYJROW_AVX2 // Convert 32 ARGB pixels (128 bytes) to 32 Y values. -void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) { - asm volatile ( - "vbroadcastf128 %3,%%ymm4 \n" - "vbroadcastf128 %4,%%ymm5 \n" - "vmovdqu %5,%%ymm6 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n" - "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n" - "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" - "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" - "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" - "lea " MEMLEA(0x80,0) ",%0 \n" - "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates. - "vphaddw %%ymm3,%%ymm2,%%ymm2 \n" - "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" // Add .5 for rounding. - "vpaddw %%ymm5,%%ymm2,%%ymm2 \n" - "vpsrlw $0x7,%%ymm0,%%ymm0 \n" - "vpsrlw $0x7,%%ymm2,%%ymm2 \n" - "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. - "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate. - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "m"(kARGBToYJ), // %3 - "m"(kAddYJ64), // %4 - "m"(kPermdARGBToY_AVX) // %5 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); +void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) { + asm volatile( + "vbroadcastf128 %3,%%ymm4 \n" + "vbroadcastf128 %4,%%ymm5 \n" + "vmovdqu %5,%%ymm6 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x40(%0),%%ymm2 \n" + "vmovdqu 0x60(%0),%%ymm3 \n" + "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" + "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" + "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" + "lea 0x80(%0),%0 \n" + "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates. + "vphaddw %%ymm3,%%ymm2,%%ymm2 \n" + "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" // Add .5 for rounding. + "vpaddw %%ymm5,%%ymm2,%%ymm2 \n" + "vpsrlw $0x7,%%ymm0,%%ymm0 \n" + "vpsrlw $0x7,%%ymm2,%%ymm2 \n" + "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. + "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate. + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kARGBToYJ), // %3 + "m"(kAddYJ64), // %4 + "m"(kPermdARGBToY_AVX) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBTOYJROW_AVX2 #ifdef HAS_ARGBTOUVROW_SSSE3 -void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %5,%%xmm3 \n" - "movdqa %6,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" +void ARGBToUVRow_SSSE3(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "movdqa %5,%%xmm3 \n" + "movdqa %6,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "sub %1,%2 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_argb)), // %4 - "m"(kARGBToV), // %5 - "m"(kARGBToU), // %6 - "m"(kAddUV128) // %7 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x10(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x20(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "movdqu 0x30(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + + "lea 0x40(%0),%0 \n" + "movdqa %%xmm0,%%xmm7 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqa %%xmm2,%%xmm7 \n" + "shufps $0x88,%%xmm6,%%xmm2 \n" + "shufps $0xdd,%%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm2,%%xmm0 \n" + "phaddw %%xmm6,%%xmm1 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm1 \n" + "packsswb %%xmm1,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movlps %%xmm0,(%1) \n" + "movhps %%xmm0,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_argb0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_argb)), // %4 + "m"(kARGBToV), // %5 + "m"(kARGBToU), // %6 + "m"(kAddUV128) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); } #endif // HAS_ARGBTOUVROW_SSSE3 #ifdef HAS_ARGBTOUVROW_AVX2 // vpshufb for vphaddw + vpackuswb packed to shorts. static const lvec8 kShufARGBToUV_AVX = { - 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, - 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15 -}; -void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "vbroadcastf128 %5,%%ymm5 \n" - "vbroadcastf128 %6,%%ymm6 \n" - "vbroadcastf128 %7,%%ymm7 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n" - "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n" - VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0 - VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1) - VMEMOPREG(vpavgb,0x40,0,4,1,ymm2,ymm2) - VMEMOPREG(vpavgb,0x60,0,4,1,ymm3,ymm3) - "lea " MEMLEA(0x80,0) ",%0 \n" - "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n" - "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n" - "vpavgb %%ymm4,%%ymm0,%%ymm0 \n" - "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n" - "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n" - "vpavgb %%ymm4,%%ymm2,%%ymm2 \n" + 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, + 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15}; +void ARGBToUVRow_AVX2(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "vbroadcastf128 %5,%%ymm5 \n" + "vbroadcastf128 %6,%%ymm6 \n" + "vbroadcastf128 %7,%%ymm7 \n" + "sub %1,%2 \n" - "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n" - "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n" - "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n" - "vphaddw %%ymm3,%%ymm1,%%ymm1 \n" - "vphaddw %%ymm2,%%ymm0,%%ymm0 \n" - "vpsraw $0x8,%%ymm1,%%ymm1 \n" - "vpsraw $0x8,%%ymm0,%%ymm0 \n" - "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpshufb %8,%%ymm0,%%ymm0 \n" - "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x40(%0),%%ymm2 \n" + "vmovdqu 0x60(%0),%%ymm3 \n" + "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n" + "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n" + "vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n" + "vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n" + "lea 0x80(%0),%0 \n" + "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n" + "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n" + "vpavgb %%ymm4,%%ymm0,%%ymm0 \n" + "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n" + "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n" + "vpavgb %%ymm4,%%ymm2,%%ymm2 \n" - "vextractf128 $0x0,%%ymm0," MEMACCESS(1) " \n" - VEXTOPMEM(vextractf128,1,ymm0,0x0,1,2,1) // vextractf128 $1,%%ymm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_argb)), // %4 - "m"(kAddUV128), // %5 - "m"(kARGBToV), // %6 - "m"(kARGBToU), // %7 - "m"(kShufARGBToUV_AVX) // %8 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n" + "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n" + "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n" + "vphaddw %%ymm3,%%ymm1,%%ymm1 \n" + "vphaddw %%ymm2,%%ymm0,%%ymm0 \n" + "vpsraw $0x8,%%ymm1,%%ymm1 \n" + "vpsraw $0x8,%%ymm0,%%ymm0 \n" + "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpshufb %8,%%ymm0,%%ymm0 \n" + "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" + + "vextractf128 $0x0,%%ymm0,(%1) \n" + "vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_argb)), // %4 + "m"(kAddUV128), // %5 + "m"(kARGBToV), // %6 + "m"(kARGBToU), // %7 + "m"(kShufARGBToUV_AVX) // %8 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBTOUVROW_AVX2 #ifdef HAS_ARGBTOUVJROW_AVX2 -void ARGBToUVJRow_AVX2(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "vbroadcastf128 %5,%%ymm5 \n" - "vbroadcastf128 %6,%%ymm6 \n" - "vbroadcastf128 %7,%%ymm7 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "vmovdqu " MEMACCESS2(0x40,0) ",%%ymm2 \n" - "vmovdqu " MEMACCESS2(0x60,0) ",%%ymm3 \n" - VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0 - VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1) - VMEMOPREG(vpavgb,0x40,0,4,1,ymm2,ymm2) - VMEMOPREG(vpavgb,0x60,0,4,1,ymm3,ymm3) - "lea " MEMLEA(0x80,0) ",%0 \n" - "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n" - "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n" - "vpavgb %%ymm4,%%ymm0,%%ymm0 \n" - "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n" - "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n" - "vpavgb %%ymm4,%%ymm2,%%ymm2 \n" +void ARGBToUVJRow_AVX2(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "vbroadcastf128 %5,%%ymm5 \n" + "vbroadcastf128 %6,%%ymm6 \n" + "vbroadcastf128 %7,%%ymm7 \n" + "sub %1,%2 \n" - "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n" - "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n" - "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n" - "vphaddw %%ymm3,%%ymm1,%%ymm1 \n" - "vphaddw %%ymm2,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm5,%%ymm1,%%ymm1 \n" - "vpsraw $0x8,%%ymm1,%%ymm1 \n" - "vpsraw $0x8,%%ymm0,%%ymm0 \n" - "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpshufb %8,%%ymm0,%%ymm0 \n" + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x40(%0),%%ymm2 \n" + "vmovdqu 0x60(%0),%%ymm3 \n" + "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n" + "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n" + "vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n" + "vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n" + "lea 0x80(%0),%0 \n" + "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n" + "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n" + "vpavgb %%ymm4,%%ymm0,%%ymm0 \n" + "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n" + "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n" + "vpavgb %%ymm4,%%ymm2,%%ymm2 \n" - "vextractf128 $0x0,%%ymm0," MEMACCESS(1) " \n" - VEXTOPMEM(vextractf128,1,ymm0,0x0,1,2,1) // vextractf128 $1,%%ymm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_argb)), // %4 - "m"(kAddUVJ128), // %5 - "m"(kARGBToVJ), // %6 - "m"(kARGBToUJ), // %7 - "m"(kShufARGBToUV_AVX) // %8 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n" + "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n" + "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n" + "vphaddw %%ymm3,%%ymm1,%%ymm1 \n" + "vphaddw %%ymm2,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm5,%%ymm1,%%ymm1 \n" + "vpsraw $0x8,%%ymm1,%%ymm1 \n" + "vpsraw $0x8,%%ymm0,%%ymm0 \n" + "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpshufb %8,%%ymm0,%%ymm0 \n" + + "vextractf128 $0x0,%%ymm0,(%1) \n" + "vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_argb)), // %4 + "m"(kAddUVJ128), // %5 + "m"(kARGBToVJ), // %6 + "m"(kARGBToUJ), // %7 + "m"(kShufARGBToUV_AVX) // %8 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBTOUVJROW_AVX2 #ifdef HAS_ARGBTOUVJROW_SSSE3 -void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %5,%%xmm3 \n" - "movdqa %6,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" +void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "movdqa %5,%%xmm3 \n" + "movdqa %6,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "sub %1,%2 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "paddw %%xmm5,%%xmm0 \n" - "paddw %%xmm5,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_argb)), // %4 - "m"(kARGBToVJ), // %5 - "m"(kARGBToUJ), // %6 - "m"(kAddUVJ128) // %7 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x10(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x20(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "movdqu 0x30(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + + "lea 0x40(%0),%0 \n" + "movdqa %%xmm0,%%xmm7 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqa %%xmm2,%%xmm7 \n" + "shufps $0x88,%%xmm6,%%xmm2 \n" + "shufps $0xdd,%%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm2,%%xmm0 \n" + "phaddw %%xmm6,%%xmm1 \n" + "paddw %%xmm5,%%xmm0 \n" + "paddw %%xmm5,%%xmm1 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm1 \n" + "packsswb %%xmm1,%%xmm0 \n" + "movlps %%xmm0,(%1) \n" + "movhps %%xmm0,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_argb0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_argb)), // %4 + "m"(kARGBToVJ), // %5 + "m"(kARGBToUJ), // %6 + "m"(kAddUVJ128) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); } #endif // HAS_ARGBTOUVJROW_SSSE3 #ifdef HAS_ARGBTOUV444ROW_SSSE3 -void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v, +void ARGBToUV444Row_SSSE3(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "movdqa %4,%%xmm3 \n" - "movdqa %5,%%xmm4 \n" - "movdqa %6,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm6 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm6,%%xmm2 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm2 \n" - "packsswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - "pmaddubsw %%xmm3,%%xmm0 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm6,%%xmm2 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm2 \n" - "packsswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - MEMOPMEM(movdqu,xmm0,0x00,1,2,1) // movdqu %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "m"(kARGBToV), // %4 - "m"(kARGBToU), // %5 - "m"(kAddUV128) // %6 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm6" - ); + asm volatile( + "movdqa %4,%%xmm3 \n" + "movdqa %5,%%xmm4 \n" + "movdqa %6,%%xmm5 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm6 \n" + "phaddw %%xmm1,%%xmm0 \n" + "phaddw %%xmm6,%%xmm2 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm2 \n" + "packsswb %%xmm2,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "pmaddubsw %%xmm3,%%xmm0 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm1,%%xmm0 \n" + "phaddw %%xmm6,%%xmm2 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm2 \n" + "packsswb %%xmm2,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "lea 0x40(%0),%0 \n" + "movdqu %%xmm0,0x00(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "m"(kARGBToV), // %4 + "m"(kARGBToU), // %5 + "m"(kAddUV128) // %6 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6"); } #endif // HAS_ARGBTOUV444ROW_SSSE3 -void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width) { - asm volatile ( - "movdqa %4,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_bgra), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "m"(kBGRAToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width) { + asm volatile( + "movdqa %4,%%xmm5 \n" + "movdqa %3,%%xmm4 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "phaddw %%xmm1,%%xmm0 \n" + "phaddw %%xmm3,%%xmm2 \n" + "psrlw $0x7,%%xmm0 \n" + "psrlw $0x7,%%xmm2 \n" + "packuswb %%xmm2,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_bgra), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kBGRAToY), // %3 + "m"(kAddY16) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %5,%%xmm3 \n" - "movdqa %6,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" +void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0, + int src_stride_bgra, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "movdqa %5,%%xmm3 \n" + "movdqa %6,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "sub %1,%2 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_bgra0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_bgra)), // %4 - "m"(kBGRAToV), // %5 - "m"(kBGRAToU), // %6 - "m"(kAddUV128) // %7 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x10(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x20(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "movdqu 0x30(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + + "lea 0x40(%0),%0 \n" + "movdqa %%xmm0,%%xmm7 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqa %%xmm2,%%xmm7 \n" + "shufps $0x88,%%xmm6,%%xmm2 \n" + "shufps $0xdd,%%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm2,%%xmm0 \n" + "phaddw %%xmm6,%%xmm1 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm1 \n" + "packsswb %%xmm1,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movlps %%xmm0,(%1) \n" + "movhps %%xmm0,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_bgra0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_bgra)), // %4 + "m"(kBGRAToV), // %5 + "m"(kBGRAToU), // %6 + "m"(kAddUV128) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); } -void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width) { - asm volatile ( - "movdqa %4,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_abgr), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "m"(kABGRToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width) { + asm volatile( + "movdqa %4,%%xmm5 \n" + "movdqa %3,%%xmm4 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "phaddw %%xmm1,%%xmm0 \n" + "phaddw %%xmm3,%%xmm2 \n" + "psrlw $0x7,%%xmm0 \n" + "psrlw $0x7,%%xmm2 \n" + "packuswb %%xmm2,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_abgr), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kABGRToY), // %3 + "m"(kAddY16) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width) { - asm volatile ( - "movdqa %4,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_rgba), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "m"(kRGBAToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) { + asm volatile( + "movdqa %4,%%xmm5 \n" + "movdqa %3,%%xmm4 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "phaddw %%xmm1,%%xmm0 \n" + "phaddw %%xmm3,%%xmm2 \n" + "psrlw $0x7,%%xmm0 \n" + "psrlw $0x7,%%xmm2 \n" + "packuswb %%xmm2,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_rgba), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "m"(kRGBAToY), // %3 + "m"(kAddY16) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %5,%%xmm3 \n" - "movdqa %6,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" +void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "movdqa %5,%%xmm3 \n" + "movdqa %6,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "sub %1,%2 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_abgr0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_abgr)), // %4 - "m"(kABGRToV), // %5 - "m"(kABGRToU), // %6 - "m"(kAddUV128) // %7 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x10(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x20(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "movdqu 0x30(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + + "lea 0x40(%0),%0 \n" + "movdqa %%xmm0,%%xmm7 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqa %%xmm2,%%xmm7 \n" + "shufps $0x88,%%xmm6,%%xmm2 \n" + "shufps $0xdd,%%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm2,%%xmm0 \n" + "phaddw %%xmm6,%%xmm1 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm1 \n" + "packsswb %%xmm1,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movlps %%xmm0,(%1) \n" + "movhps %%xmm0,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_abgr0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_abgr)), // %4 + "m"(kABGRToV), // %5 + "m"(kABGRToU), // %6 + "m"(kAddUV128) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); } -void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %5,%%xmm3 \n" - "movdqa %6,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" +void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0, + int src_stride_rgba, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "movdqa %5,%%xmm3 \n" + "movdqa %6,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "sub %1,%2 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_rgba0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_rgba)), // %4 - "m"(kRGBAToV), // %5 - "m"(kRGBAToU), // %6 - "m"(kAddUV128) // %7 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x10(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x20(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqu 0x30(%0),%%xmm6 \n" + "movdqu 0x30(%0,%4,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + + "lea 0x40(%0),%0 \n" + "movdqa %%xmm0,%%xmm7 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm7 \n" + "pavgb %%xmm7,%%xmm0 \n" + "movdqa %%xmm2,%%xmm7 \n" + "shufps $0x88,%%xmm6,%%xmm2 \n" + "shufps $0xdd,%%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "phaddw %%xmm2,%%xmm0 \n" + "phaddw %%xmm6,%%xmm1 \n" + "psraw $0x8,%%xmm0 \n" + "psraw $0x8,%%xmm1 \n" + "packsswb %%xmm1,%%xmm0 \n" + "paddb %%xmm5,%%xmm0 \n" + "movlps %%xmm0,(%1) \n" + "movhps %%xmm0,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_rgba0), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+rm"(width) // %3 + : "r"((intptr_t)(src_stride_rgba)), // %4 + "m"(kRGBAToV), // %5 + "m"(kRGBAToU), // %6 + "m"(kAddUV128) // %7 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); } #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) // Read 8 UV from 444 -#define READYUV444 \ - "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ - "punpcklbw %%xmm1,%%xmm0 \n" \ - "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "punpcklbw %%xmm4,%%xmm4 \n" \ - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" +#define READYUV444 \ + "movq (%[u_buf]),%%xmm0 \n" \ + "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x8(%[u_buf]),%[u_buf] \n" \ + "punpcklbw %%xmm1,%%xmm0 \n" \ + "movq (%[y_buf]),%%xmm4 \n" \ + "punpcklbw %%xmm4,%%xmm4 \n" \ + "lea 0x8(%[y_buf]),%[y_buf] \n" // Read 4 UV from 422, upsample to 8 UV -#define READYUV422 \ - "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ - "punpcklbw %%xmm1,%%xmm0 \n" \ - "punpcklwd %%xmm0,%%xmm0 \n" \ - "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "punpcklbw %%xmm4,%%xmm4 \n" \ - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" +#define READYUV422 \ + "movd (%[u_buf]),%%xmm0 \n" \ + "movd 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x4(%[u_buf]),%[u_buf] \n" \ + "punpcklbw %%xmm1,%%xmm0 \n" \ + "punpcklwd %%xmm0,%%xmm0 \n" \ + "movq (%[y_buf]),%%xmm4 \n" \ + "punpcklbw %%xmm4,%%xmm4 \n" \ + "lea 0x8(%[y_buf]),%[y_buf] \n" + +// Read 4 UV from 422 10 bit, upsample to 8 UV +// TODO(fbarchard): Consider shufb to replace pack/unpack +// TODO(fbarchard): Consider pmulhuw to replace psraw +// TODO(fbarchard): Consider pmullw to replace psllw and allow different bits. +#define READYUV210 \ + "movq (%[u_buf]),%%xmm0 \n" \ + "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x8(%[u_buf]),%[u_buf] \n" \ + "punpcklwd %%xmm1,%%xmm0 \n" \ + "psraw $0x2,%%xmm0 \n" \ + "packuswb %%xmm0,%%xmm0 \n" \ + "punpcklwd %%xmm0,%%xmm0 \n" \ + "movdqu (%[y_buf]),%%xmm4 \n" \ + "psllw $0x6,%%xmm4 \n" \ + "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. -#define READYUVA422 \ - "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ - "punpcklbw %%xmm1,%%xmm0 \n" \ - "punpcklwd %%xmm0,%%xmm0 \n" \ - "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "punpcklbw %%xmm4,%%xmm4 \n" \ - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ - "movq " MEMACCESS([a_buf]) ",%%xmm5 \n" \ - "lea " MEMLEA(0x8, [a_buf]) ",%[a_buf] \n" - -// Read 2 UV from 411, upsample to 8 UV. -// reading 4 bytes is an msan violation. -// "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" -// MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) -// pinsrw fails with drmemory -// __asm pinsrw xmm0, [esi], 0 /* U */ -// __asm pinsrw xmm1, [esi + edi], 0 /* V */ -#define READYUV411_TEMP \ - "movzwl " MEMACCESS([u_buf]) ",%[temp] \n" \ - "movd %[temp],%%xmm0 \n" \ - MEMOPARG(movzwl, 0x00, [u_buf], [v_buf], 1, [temp]) " \n" \ - "movd %[temp],%%xmm1 \n" \ - "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ - "punpcklbw %%xmm1,%%xmm0 \n" \ - "punpcklwd %%xmm0,%%xmm0 \n" \ - "punpckldq %%xmm0,%%xmm0 \n" \ - "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "punpcklbw %%xmm4,%%xmm4 \n" \ - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" +#define READYUVA422 \ + "movd (%[u_buf]),%%xmm0 \n" \ + "movd 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x4(%[u_buf]),%[u_buf] \n" \ + "punpcklbw %%xmm1,%%xmm0 \n" \ + "punpcklwd %%xmm0,%%xmm0 \n" \ + "movq (%[y_buf]),%%xmm4 \n" \ + "punpcklbw %%xmm4,%%xmm4 \n" \ + "lea 0x8(%[y_buf]),%[y_buf] \n" \ + "movq (%[a_buf]),%%xmm5 \n" \ + "lea 0x8(%[a_buf]),%[a_buf] \n" // Read 4 UV from NV12, upsample to 8 UV -#define READNV12 \ - "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ - "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ - "punpcklwd %%xmm0,%%xmm0 \n" \ - "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "punpcklbw %%xmm4,%%xmm4 \n" \ - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" +#define READNV12 \ + "movq (%[uv_buf]),%%xmm0 \n" \ + "lea 0x8(%[uv_buf]),%[uv_buf] \n" \ + "punpcklwd %%xmm0,%%xmm0 \n" \ + "movq (%[y_buf]),%%xmm4 \n" \ + "punpcklbw %%xmm4,%%xmm4 \n" \ + "lea 0x8(%[y_buf]),%[y_buf] \n" // Read 4 VU from NV21, upsample to 8 UV -#define READNV21 \ - "movq " MEMACCESS([vu_buf]) ",%%xmm0 \n" \ - "lea " MEMLEA(0x8, [vu_buf]) ",%[vu_buf] \n" \ - "pshufb %[kShuffleNV21], %%xmm0 \n" \ - "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "punpcklbw %%xmm4,%%xmm4 \n" \ - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" +#define READNV21 \ + "movq (%[vu_buf]),%%xmm0 \n" \ + "lea 0x8(%[vu_buf]),%[vu_buf] \n" \ + "pshufb %[kShuffleNV21], %%xmm0 \n" \ + "movq (%[y_buf]),%%xmm4 \n" \ + "punpcklbw %%xmm4,%%xmm4 \n" \ + "lea 0x8(%[y_buf]),%[y_buf] \n" // Read 4 YUY2 with 8 Y and update 4 UV to 8 UV. -#define READYUY2 \ - "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm4 \n" \ - "pshufb %[kShuffleYUY2Y], %%xmm4 \n" \ - "movdqu " MEMACCESS([yuy2_buf]) ",%%xmm0 \n" \ - "pshufb %[kShuffleYUY2UV], %%xmm0 \n" \ - "lea " MEMLEA(0x10, [yuy2_buf]) ",%[yuy2_buf] \n" +#define READYUY2 \ + "movdqu (%[yuy2_buf]),%%xmm4 \n" \ + "pshufb %[kShuffleYUY2Y], %%xmm4 \n" \ + "movdqu (%[yuy2_buf]),%%xmm0 \n" \ + "pshufb %[kShuffleYUY2UV], %%xmm0 \n" \ + "lea 0x10(%[yuy2_buf]),%[yuy2_buf] \n" // Read 4 UYVY with 8 Y and update 4 UV to 8 UV. -#define READUYVY \ - "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm4 \n" \ - "pshufb %[kShuffleUYVYY], %%xmm4 \n" \ - "movdqu " MEMACCESS([uyvy_buf]) ",%%xmm0 \n" \ - "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \ - "lea " MEMLEA(0x10, [uyvy_buf]) ",%[uyvy_buf] \n" +#define READUYVY \ + "movdqu (%[uyvy_buf]),%%xmm4 \n" \ + "pshufb %[kShuffleUYVYY], %%xmm4 \n" \ + "movdqu (%[uyvy_buf]),%%xmm0 \n" \ + "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \ + "lea 0x10(%[uyvy_buf]),%[uyvy_buf] \n" #if defined(__x86_64__) -#define YUVTORGB_SETUP(yuvconstants) \ - "movdqa " MEMACCESS([yuvconstants]) ",%%xmm8 \n" \ - "movdqa " MEMACCESS2(32, [yuvconstants]) ",%%xmm9 \n" \ - "movdqa " MEMACCESS2(64, [yuvconstants]) ",%%xmm10 \n" \ - "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm11 \n" \ - "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm12 \n" \ - "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm13 \n" \ - "movdqa " MEMACCESS2(192, [yuvconstants]) ",%%xmm14 \n" +#define YUVTORGB_SETUP(yuvconstants) \ + "movdqa (%[yuvconstants]),%%xmm8 \n" \ + "movdqa 32(%[yuvconstants]),%%xmm9 \n" \ + "movdqa 64(%[yuvconstants]),%%xmm10 \n" \ + "movdqa 96(%[yuvconstants]),%%xmm11 \n" \ + "movdqa 128(%[yuvconstants]),%%xmm12 \n" \ + "movdqa 160(%[yuvconstants]),%%xmm13 \n" \ + "movdqa 192(%[yuvconstants]),%%xmm14 \n" // Convert 8 pixels: 8 UV and 8 Y -#define YUVTORGB(yuvconstants) \ - "movdqa %%xmm0,%%xmm1 \n" \ - "movdqa %%xmm0,%%xmm2 \n" \ - "movdqa %%xmm0,%%xmm3 \n" \ - "movdqa %%xmm11,%%xmm0 \n" \ - "pmaddubsw %%xmm8,%%xmm1 \n" \ - "psubw %%xmm1,%%xmm0 \n" \ - "movdqa %%xmm12,%%xmm1 \n" \ - "pmaddubsw %%xmm9,%%xmm2 \n" \ - "psubw %%xmm2,%%xmm1 \n" \ - "movdqa %%xmm13,%%xmm2 \n" \ - "pmaddubsw %%xmm10,%%xmm3 \n" \ - "psubw %%xmm3,%%xmm2 \n" \ - "pmulhuw %%xmm14,%%xmm4 \n" \ - "paddsw %%xmm4,%%xmm0 \n" \ - "paddsw %%xmm4,%%xmm1 \n" \ - "paddsw %%xmm4,%%xmm2 \n" \ - "psraw $0x6,%%xmm0 \n" \ - "psraw $0x6,%%xmm1 \n" \ - "psraw $0x6,%%xmm2 \n" \ - "packuswb %%xmm0,%%xmm0 \n" \ - "packuswb %%xmm1,%%xmm1 \n" \ - "packuswb %%xmm2,%%xmm2 \n" +#define YUVTORGB16(yuvconstants) \ + "movdqa %%xmm0,%%xmm1 \n" \ + "movdqa %%xmm0,%%xmm2 \n" \ + "movdqa %%xmm0,%%xmm3 \n" \ + "movdqa %%xmm11,%%xmm0 \n" \ + "pmaddubsw %%xmm8,%%xmm1 \n" \ + "psubw %%xmm1,%%xmm0 \n" \ + "movdqa %%xmm12,%%xmm1 \n" \ + "pmaddubsw %%xmm9,%%xmm2 \n" \ + "psubw %%xmm2,%%xmm1 \n" \ + "movdqa %%xmm13,%%xmm2 \n" \ + "pmaddubsw %%xmm10,%%xmm3 \n" \ + "psubw %%xmm3,%%xmm2 \n" \ + "pmulhuw %%xmm14,%%xmm4 \n" \ + "paddsw %%xmm4,%%xmm0 \n" \ + "paddsw %%xmm4,%%xmm1 \n" \ + "paddsw %%xmm4,%%xmm2 \n" #define YUVTORGB_REGS \ - "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", #else #define YUVTORGB_SETUP(yuvconstants) // Convert 8 pixels: 8 UV and 8 Y -#define YUVTORGB(yuvconstants) \ - "movdqa %%xmm0,%%xmm1 \n" \ - "movdqa %%xmm0,%%xmm2 \n" \ - "movdqa %%xmm0,%%xmm3 \n" \ - "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \ - "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \ - "psubw %%xmm1,%%xmm0 \n" \ - "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \ - "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \ - "psubw %%xmm2,%%xmm1 \n" \ - "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \ - "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \ - "psubw %%xmm3,%%xmm2 \n" \ - "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm4 \n" \ - "paddsw %%xmm4,%%xmm0 \n" \ - "paddsw %%xmm4,%%xmm1 \n" \ - "paddsw %%xmm4,%%xmm2 \n" \ - "psraw $0x6,%%xmm0 \n" \ - "psraw $0x6,%%xmm1 \n" \ - "psraw $0x6,%%xmm2 \n" \ - "packuswb %%xmm0,%%xmm0 \n" \ - "packuswb %%xmm1,%%xmm1 \n" \ - "packuswb %%xmm2,%%xmm2 \n" +#define YUVTORGB16(yuvconstants) \ + "movdqa %%xmm0,%%xmm1 \n" \ + "movdqa %%xmm0,%%xmm2 \n" \ + "movdqa %%xmm0,%%xmm3 \n" \ + "movdqa 96(%[yuvconstants]),%%xmm0 \n" \ + "pmaddubsw (%[yuvconstants]),%%xmm1 \n" \ + "psubw %%xmm1,%%xmm0 \n" \ + "movdqa 128(%[yuvconstants]),%%xmm1 \n" \ + "pmaddubsw 32(%[yuvconstants]),%%xmm2 \n" \ + "psubw %%xmm2,%%xmm1 \n" \ + "movdqa 160(%[yuvconstants]),%%xmm2 \n" \ + "pmaddubsw 64(%[yuvconstants]),%%xmm3 \n" \ + "psubw %%xmm3,%%xmm2 \n" \ + "pmulhuw 192(%[yuvconstants]),%%xmm4 \n" \ + "paddsw %%xmm4,%%xmm0 \n" \ + "paddsw %%xmm4,%%xmm1 \n" \ + "paddsw %%xmm4,%%xmm2 \n" #define YUVTORGB_REGS #endif +#define YUVTORGB(yuvconstants) \ + YUVTORGB16(yuvconstants) \ + "psraw $0x6,%%xmm0 \n" \ + "psraw $0x6,%%xmm1 \n" \ + "psraw $0x6,%%xmm2 \n" \ + "packuswb %%xmm0,%%xmm0 \n" \ + "packuswb %%xmm1,%%xmm1 \n" \ + "packuswb %%xmm2,%%xmm2 \n" + // Store 8 ARGB values. -#define STOREARGB \ - "punpcklbw %%xmm1,%%xmm0 \n" \ - "punpcklbw %%xmm5,%%xmm2 \n" \ - "movdqa %%xmm0,%%xmm1 \n" \ - "punpcklwd %%xmm2,%%xmm0 \n" \ - "punpckhwd %%xmm2,%%xmm1 \n" \ - "movdqu %%xmm0," MEMACCESS([dst_argb]) " \n" \ - "movdqu %%xmm1," MEMACCESS2(0x10, [dst_argb]) " \n" \ - "lea " MEMLEA(0x20, [dst_argb]) ", %[dst_argb] \n" +#define STOREARGB \ + "punpcklbw %%xmm1,%%xmm0 \n" \ + "punpcklbw %%xmm5,%%xmm2 \n" \ + "movdqa %%xmm0,%%xmm1 \n" \ + "punpcklwd %%xmm2,%%xmm0 \n" \ + "punpckhwd %%xmm2,%%xmm1 \n" \ + "movdqu %%xmm0,(%[dst_argb]) \n" \ + "movdqu %%xmm1,0x10(%[dst_argb]) \n" \ + "lea 0x20(%[dst_argb]), %[dst_argb] \n" // Store 8 RGBA values. -#define STORERGBA \ - "pcmpeqb %%xmm5,%%xmm5 \n" \ - "punpcklbw %%xmm2,%%xmm1 \n" \ - "punpcklbw %%xmm0,%%xmm5 \n" \ - "movdqa %%xmm5,%%xmm0 \n" \ - "punpcklwd %%xmm1,%%xmm5 \n" \ - "punpckhwd %%xmm1,%%xmm0 \n" \ - "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \ - "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ - "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n" +#define STORERGBA \ + "pcmpeqb %%xmm5,%%xmm5 \n" \ + "punpcklbw %%xmm2,%%xmm1 \n" \ + "punpcklbw %%xmm0,%%xmm5 \n" \ + "movdqa %%xmm5,%%xmm0 \n" \ + "punpcklwd %%xmm1,%%xmm5 \n" \ + "punpckhwd %%xmm1,%%xmm0 \n" \ + "movdqu %%xmm5,(%[dst_rgba]) \n" \ + "movdqu %%xmm0,0x10(%[dst_rgba]) \n" \ + "lea 0x20(%[dst_rgba]),%[dst_rgba] \n" -void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, +// Store 8 AR30 values. +#define STOREAR30 \ + "psraw $0x4,%%xmm0 \n" \ + "psraw $0x4,%%xmm1 \n" \ + "psraw $0x4,%%xmm2 \n" \ + "pminsw %%xmm7,%%xmm0 \n" \ + "pminsw %%xmm7,%%xmm1 \n" \ + "pminsw %%xmm7,%%xmm2 \n" \ + "pmaxsw %%xmm6,%%xmm0 \n" \ + "pmaxsw %%xmm6,%%xmm1 \n" \ + "pmaxsw %%xmm6,%%xmm2 \n" \ + "psllw $0x4,%%xmm2 \n" \ + "movdqa %%xmm0,%%xmm3 \n" \ + "punpcklwd %%xmm2,%%xmm0 \n" \ + "punpckhwd %%xmm2,%%xmm3 \n" \ + "movdqa %%xmm1,%%xmm2 \n" \ + "punpcklwd %%xmm5,%%xmm1 \n" \ + "punpckhwd %%xmm5,%%xmm2 \n" \ + "pslld $0xa,%%xmm1 \n" \ + "pslld $0xa,%%xmm2 \n" \ + "por %%xmm1,%%xmm0 \n" \ + "por %%xmm2,%%xmm3 \n" \ + "movdqu %%xmm0,(%[dst_ar30]) \n" \ + "movdqu %%xmm3,0x10(%[dst_ar30]) \n" \ + "lea 0x20(%[dst_ar30]), %[dst_ar30] \n" + +void OMITFP I444ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" + LABELALIGN - "1: \n" + "1: \n" READYUV444 YUVTORGB(yuvconstants) STOREARGB @@ -1691,15 +2028,15 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } -void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgb24, +void OMITFP I422ToRGB24Row_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -1707,8 +2044,9 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" "sub %[u_buf],%[v_buf] \n" + LABELALIGN - "1: \n" + "1: \n" READYUV422 YUVTORGB(yuvconstants) "punpcklbw %%xmm1,%%xmm0 \n" @@ -1719,16 +2057,16 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, "pshufb %%xmm5,%%xmm0 \n" "pshufb %%xmm6,%%xmm1 \n" "palignr $0xc,%%xmm0,%%xmm1 \n" - "movq %%xmm0," MEMACCESS([dst_rgb24]) "\n" - "movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n" - "lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n" + "movq %%xmm0,(%[dst_rgb24]) \n" + "movdqu %%xmm1,0x8(%[dst_rgb24]) \n" + "lea 0x18(%[dst_rgb24]),%[dst_rgb24] \n" "subl $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24] -#if defined(__i386__) && defined(__pic__) +#if defined(__i386__) [width]"+m"(width) // %[width] #else [width]"+rm"(width) // %[width] @@ -1736,23 +2074,24 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) - : "memory", "cc", NACL_R14 YUVTORGB_REGS + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" ); } -void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, +void OMITFP I422ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" + LABELALIGN - "1: \n" + "1: \n" READYUV422 YUVTORGB(yuvconstants) STOREARGB @@ -1764,24 +2103,125 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } -#ifdef HAS_I422ALPHATOARGBROW_SSSE3 -void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +void OMITFP I422ToAR30Row_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width) { asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" + "pcmpeqb %%xmm5,%%xmm5 \n" // AR30 constants + "psrlw $14,%%xmm5 \n" + "psllw $4,%%xmm5 \n" // 2 alpha bits + "pxor %%xmm6,%%xmm6 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min + "psrlw $6,%%xmm7 \n" // 1023 for max + LABELALIGN - "1: \n" + "1: \n" + READYUV422 + YUVTORGB16(yuvconstants) + STOREAR30 + "sub $0x8,%[width] \n" + "jg 1b \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_ar30]"+r"(dst_ar30), // %[dst_ar30] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", YUVTORGB_REGS + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" + ); +} + +// 10 bit YUV to ARGB +void OMITFP I210ToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + + LABELALIGN + "1: \n" + READYUV210 + YUVTORGB(yuvconstants) + STOREARGB + "sub $0x8,%[width] \n" + "jg 1b \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_argb]"+r"(dst_argb), // %[dst_argb] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", YUVTORGB_REGS + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + ); +} + +// 10 bit YUV to AR30 +void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $14,%%xmm5 \n" + "psllw $4,%%xmm5 \n" // 2 alpha bits + "pxor %%xmm6,%%xmm6 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min + "psrlw $6,%%xmm7 \n" // 1023 for max + + LABELALIGN + "1: \n" + READYUV210 + YUVTORGB16(yuvconstants) + STOREAR30 + "sub $0x8,%[width] \n" + "jg 1b \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_ar30]"+r"(dst_ar30), // %[dst_ar30] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", YUVTORGB_REGS + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" + ); +} + +#ifdef HAS_I422ALPHATOARGBROW_SSSE3 +void OMITFP I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + // clang-format off + asm volatile ( + YUVTORGB_SETUP(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + + LABELALIGN + "1: \n" READYUVA422 YUVTORGB(yuvconstants) STOREARGB @@ -1792,64 +2232,31 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [a_buf]"+r"(a_buf), // %[a_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] -#if defined(__i386__) && defined(__pic__) +#if defined(__i386__) [width]"+m"(width) // %[width] #else [width]"+rm"(width) // %[width] #endif : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); + // clang-format on } #endif // HAS_I422ALPHATOARGBROW_SSSE3 -#ifdef HAS_I411TOARGBROW_SSSE3 -void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, +void OMITFP NV12ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - int temp; + // clang-format off asm volatile ( YUVTORGB_SETUP(yuvconstants) - "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" - LABELALIGN - "1: \n" - READYUV411_TEMP - YUVTORGB(yuvconstants) - STOREARGB - "subl $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [temp]"=&r"(temp), // %[temp] -#if defined(__i386__) && defined(__pic__) - [width]"+m"(width) // %[width] -#else - [width]"+rm"(width) // %[width] -#endif - : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); -} -#endif -void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - YUVTORGB_SETUP(yuvconstants) - "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN - "1: \n" + "1: \n" READNV12 YUVTORGB(yuvconstants) STOREARGB @@ -1860,21 +2267,24 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", YUVTORGB_REGS // Does not use r14. + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); + // clang-format on } -void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* vu_buf, - uint8* dst_argb, +void OMITFP NV21ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* vu_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { + // clang-format off asm volatile ( YUVTORGB_SETUP(yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" + LABELALIGN - "1: \n" + "1: \n" READNV21 YUVTORGB(yuvconstants) STOREARGB @@ -1886,20 +2296,23 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleNV21]"m"(kShuffleNV21) - : "memory", "cc", YUVTORGB_REGS // Does not use r14. + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); + // clang-format on } -void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, - uint8* dst_argb, +void OMITFP YUY2ToARGBRow_SSSE3(const uint8_t* yuy2_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { + // clang-format off asm volatile ( YUVTORGB_SETUP(yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" + LABELALIGN - "1: \n" + "1: \n" READYUY2 YUVTORGB(yuvconstants) STOREARGB @@ -1911,20 +2324,23 @@ void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleYUY2Y]"m"(kShuffleYUY2Y), [kShuffleYUY2UV]"m"(kShuffleYUY2UV) - : "memory", "cc", YUVTORGB_REGS // Does not use r14. + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); + // clang-format on } -void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, - uint8* dst_argb, +void OMITFP UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { + // clang-format off asm volatile ( YUVTORGB_SETUP(yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" + LABELALIGN - "1: \n" + "1: \n" READUYVY YUVTORGB(yuvconstants) STOREARGB @@ -1936,23 +2352,25 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleUYVYY]"m"(kShuffleUYVYY), [kShuffleUYVYUV]"m"(kShuffleUYVYUV) - : "memory", "cc", YUVTORGB_REGS // Does not use r14. + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); + // clang-format on } -void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgba, +void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" + LABELALIGN - "1: \n" + "1: \n" READYUV422 YUVTORGB(yuvconstants) STORERGBA @@ -1964,7 +2382,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS + : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -1972,179 +2390,211 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, #endif // HAS_I422TOARGBROW_SSSE3 // Read 16 UV from 444 -#define READYUV444_AVX2 \ - "vmovdqu " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x10, [u_buf]) ",%[u_buf] \n" \ - "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ - "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ - "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ - "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ - "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ - "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" +#define READYUV444_AVX2 \ + "vmovdqu (%[u_buf]),%%xmm0 \n" \ + "vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x10(%[u_buf]),%[u_buf] \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ + "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ + "vmovdqu (%[y_buf]),%%xmm4 \n" \ + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ + "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 8 UV from 422, upsample to 16 UV. -#define READYUV422_AVX2 \ - "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ - "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ - "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ - "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ - "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ - "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ - "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" +#define READYUV422_AVX2 \ + "vmovq (%[u_buf]),%%xmm0 \n" \ + "vmovq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x8(%[u_buf]),%[u_buf] \n" \ + "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ + "vmovdqu (%[y_buf]),%%xmm4 \n" \ + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ + "lea 0x10(%[y_buf]),%[y_buf] \n" + +// Read 8 UV from 210 10 bit, upsample to 16 UV +// TODO(fbarchard): Consider vshufb to replace pack/unpack +// TODO(fbarchard): Consider vunpcklpd to combine the 2 registers into 1. +#define READYUV210_AVX2 \ + "vmovdqu (%[u_buf]),%%xmm0 \n" \ + "vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x10(%[u_buf]),%[u_buf] \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ + "vpunpcklwd %%ymm1,%%ymm0,%%ymm0 \n" \ + "vpsraw $0x2,%%ymm0,%%ymm0 \n" \ + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ + "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ + "vmovdqu (%[y_buf]),%%ymm4 \n" \ + "vpsllw $0x6,%%ymm4,%%ymm4 \n" \ + "lea 0x20(%[y_buf]),%[y_buf] \n" // Read 8 UV from 422, upsample to 16 UV. With 16 Alpha. -#define READYUVA422_AVX2 \ - "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ - "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ - "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ - "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ - "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ - "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ - "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \ - "vmovdqu " MEMACCESS([a_buf]) ",%%xmm5 \n" \ - "vpermq $0xd8,%%ymm5,%%ymm5 \n" \ - "lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n" - -// Read 4 UV from 411, upsample to 16 UV. -#define READYUV411_AVX2 \ - "vmovd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - MEMOPREG(vmovd, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ - "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ - "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ - "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ - "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" \ - "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ - "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ - "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" +#define READYUVA422_AVX2 \ + "vmovq (%[u_buf]),%%xmm0 \n" \ + "vmovq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ + "lea 0x8(%[u_buf]),%[u_buf] \n" \ + "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ + "vmovdqu (%[y_buf]),%%xmm4 \n" \ + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ + "lea 0x10(%[y_buf]),%[y_buf] \n" \ + "vmovdqu (%[a_buf]),%%xmm5 \n" \ + "vpermq $0xd8,%%ymm5,%%ymm5 \n" \ + "lea 0x10(%[a_buf]),%[a_buf] \n" // Read 8 UV from NV12, upsample to 16 UV. -#define READNV12_AVX2 \ - "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ - "lea " MEMLEA(0x10, [uv_buf]) ",%[uv_buf] \n" \ - "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ - "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ - "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ - "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ - "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" +#define READNV12_AVX2 \ + "vmovdqu (%[uv_buf]),%%xmm0 \n" \ + "lea 0x10(%[uv_buf]),%[uv_buf] \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ + "vmovdqu (%[y_buf]),%%xmm4 \n" \ + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ + "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 8 VU from NV21, upsample to 16 UV. -#define READNV21_AVX2 \ - "vmovdqu " MEMACCESS([vu_buf]) ",%%xmm0 \n" \ - "lea " MEMLEA(0x10, [vu_buf]) ",%[vu_buf] \n" \ - "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ - "vpshufb %[kShuffleNV21], %%ymm0, %%ymm0 \n" \ - "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ - "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ - "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ - "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" +#define READNV21_AVX2 \ + "vmovdqu (%[vu_buf]),%%xmm0 \n" \ + "lea 0x10(%[vu_buf]),%[vu_buf] \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpshufb %[kShuffleNV21], %%ymm0, %%ymm0 \n" \ + "vmovdqu (%[y_buf]),%%xmm4 \n" \ + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ + "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV. -#define READYUY2_AVX2 \ - "vmovdqu " MEMACCESS([yuy2_buf]) ",%%ymm4 \n" \ - "vpshufb %[kShuffleYUY2Y], %%ymm4, %%ymm4 \n" \ - "vmovdqu " MEMACCESS([yuy2_buf]) ",%%ymm0 \n" \ - "vpshufb %[kShuffleYUY2UV], %%ymm0, %%ymm0 \n" \ - "lea " MEMLEA(0x20, [yuy2_buf]) ",%[yuy2_buf] \n" +#define READYUY2_AVX2 \ + "vmovdqu (%[yuy2_buf]),%%ymm4 \n" \ + "vpshufb %[kShuffleYUY2Y], %%ymm4, %%ymm4 \n" \ + "vmovdqu (%[yuy2_buf]),%%ymm0 \n" \ + "vpshufb %[kShuffleYUY2UV], %%ymm0, %%ymm0 \n" \ + "lea 0x20(%[yuy2_buf]),%[yuy2_buf] \n" // Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV. -#define READUYVY_AVX2 \ - "vmovdqu " MEMACCESS([uyvy_buf]) ",%%ymm4 \n" \ - "vpshufb %[kShuffleUYVYY], %%ymm4, %%ymm4 \n" \ - "vmovdqu " MEMACCESS([uyvy_buf]) ",%%ymm0 \n" \ - "vpshufb %[kShuffleUYVYUV], %%ymm0, %%ymm0 \n" \ - "lea " MEMLEA(0x20, [uyvy_buf]) ",%[uyvy_buf] \n" +#define READUYVY_AVX2 \ + "vmovdqu (%[uyvy_buf]),%%ymm4 \n" \ + "vpshufb %[kShuffleUYVYY], %%ymm4, %%ymm4 \n" \ + "vmovdqu (%[uyvy_buf]),%%ymm0 \n" \ + "vpshufb %[kShuffleUYVYUV], %%ymm0, %%ymm0 \n" \ + "lea 0x20(%[uyvy_buf]),%[uyvy_buf] \n" #if defined(__x86_64__) -#define YUVTORGB_SETUP_AVX2(yuvconstants) \ - "vmovdqa " MEMACCESS([yuvconstants]) ",%%ymm8 \n" \ - "vmovdqa " MEMACCESS2(32, [yuvconstants]) ",%%ymm9 \n" \ - "vmovdqa " MEMACCESS2(64, [yuvconstants]) ",%%ymm10 \n" \ - "vmovdqa " MEMACCESS2(96, [yuvconstants]) ",%%ymm11 \n" \ - "vmovdqa " MEMACCESS2(128, [yuvconstants]) ",%%ymm12 \n" \ - "vmovdqa " MEMACCESS2(160, [yuvconstants]) ",%%ymm13 \n" \ - "vmovdqa " MEMACCESS2(192, [yuvconstants]) ",%%ymm14 \n" -#define YUVTORGB_AVX2(yuvconstants) \ - "vpmaddubsw %%ymm10,%%ymm0,%%ymm2 \n" \ - "vpmaddubsw %%ymm9,%%ymm0,%%ymm1 \n" \ - "vpmaddubsw %%ymm8,%%ymm0,%%ymm0 \n" \ - "vpsubw %%ymm2,%%ymm13,%%ymm2 \n" \ - "vpsubw %%ymm1,%%ymm12,%%ymm1 \n" \ - "vpsubw %%ymm0,%%ymm11,%%ymm0 \n" \ - "vpmulhuw %%ymm14,%%ymm4,%%ymm4 \n" \ - "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \ - "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \ - "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" \ - "vpsraw $0x6,%%ymm0,%%ymm0 \n" \ - "vpsraw $0x6,%%ymm1,%%ymm1 \n" \ - "vpsraw $0x6,%%ymm2,%%ymm2 \n" \ - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ - "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \ - "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n" +#define YUVTORGB_SETUP_AVX2(yuvconstants) \ + "vmovdqa (%[yuvconstants]),%%ymm8 \n" \ + "vmovdqa 32(%[yuvconstants]),%%ymm9 \n" \ + "vmovdqa 64(%[yuvconstants]),%%ymm10 \n" \ + "vmovdqa 96(%[yuvconstants]),%%ymm11 \n" \ + "vmovdqa 128(%[yuvconstants]),%%ymm12 \n" \ + "vmovdqa 160(%[yuvconstants]),%%ymm13 \n" \ + "vmovdqa 192(%[yuvconstants]),%%ymm14 \n" + +#define YUVTORGB16_AVX2(yuvconstants) \ + "vpmaddubsw %%ymm10,%%ymm0,%%ymm2 \n" \ + "vpmaddubsw %%ymm9,%%ymm0,%%ymm1 \n" \ + "vpmaddubsw %%ymm8,%%ymm0,%%ymm0 \n" \ + "vpsubw %%ymm2,%%ymm13,%%ymm2 \n" \ + "vpsubw %%ymm1,%%ymm12,%%ymm1 \n" \ + "vpsubw %%ymm0,%%ymm11,%%ymm0 \n" \ + "vpmulhuw %%ymm14,%%ymm4,%%ymm4 \n" \ + "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \ + "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \ + "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" + #define YUVTORGB_REGS_AVX2 \ - "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", + #else // Convert 16 pixels: 16 UV and 16 Y. + #define YUVTORGB_SETUP_AVX2(yuvconstants) -#define YUVTORGB_AVX2(yuvconstants) \ - "vpmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%ymm0,%%ymm2 \n" \ - "vpmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%ymm0,%%ymm1 \n" \ - "vpmaddubsw " MEMACCESS([yuvconstants]) ",%%ymm0,%%ymm0 \n" \ - "vmovdqu " MEMACCESS2(160, [yuvconstants]) ",%%ymm3 \n" \ - "vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \ - "vmovdqu " MEMACCESS2(128, [yuvconstants]) ",%%ymm3 \n" \ - "vpsubw %%ymm1,%%ymm3,%%ymm1 \n" \ - "vmovdqu " MEMACCESS2(96, [yuvconstants]) ",%%ymm3 \n" \ - "vpsubw %%ymm0,%%ymm3,%%ymm0 \n" \ - "vpmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%ymm4,%%ymm4 \n" \ - "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \ - "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \ - "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" \ - "vpsraw $0x6,%%ymm0,%%ymm0 \n" \ - "vpsraw $0x6,%%ymm1,%%ymm1 \n" \ - "vpsraw $0x6,%%ymm2,%%ymm2 \n" \ - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ - "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \ - "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n" +#define YUVTORGB16_AVX2(yuvconstants) \ + "vpmaddubsw 64(%[yuvconstants]),%%ymm0,%%ymm2 \n" \ + "vpmaddubsw 32(%[yuvconstants]),%%ymm0,%%ymm1 \n" \ + "vpmaddubsw (%[yuvconstants]),%%ymm0,%%ymm0 \n" \ + "vmovdqu 160(%[yuvconstants]),%%ymm3 \n" \ + "vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \ + "vmovdqu 128(%[yuvconstants]),%%ymm3 \n" \ + "vpsubw %%ymm1,%%ymm3,%%ymm1 \n" \ + "vmovdqu 96(%[yuvconstants]),%%ymm3 \n" \ + "vpsubw %%ymm0,%%ymm3,%%ymm0 \n" \ + "vpmulhuw 192(%[yuvconstants]),%%ymm4,%%ymm4 \n" \ + "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \ + "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \ + "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" #define YUVTORGB_REGS_AVX2 #endif +#define YUVTORGB_AVX2(yuvconstants) \ + YUVTORGB16_AVX2(yuvconstants) \ + "vpsraw $0x6,%%ymm0,%%ymm0 \n" \ + "vpsraw $0x6,%%ymm1,%%ymm1 \n" \ + "vpsraw $0x6,%%ymm2,%%ymm2 \n" \ + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ + "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \ + "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n" + // Store 16 ARGB values. -#define STOREARGB_AVX2 \ - "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ - "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ - "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \ - "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ - "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \ - "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \ - "vmovdqu %%ymm1," MEMACCESS([dst_argb]) " \n" \ - "vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \ - "lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n" +#define STOREARGB_AVX2 \ + "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \ + "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ + "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \ + "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \ + "vmovdqu %%ymm1,(%[dst_argb]) \n" \ + "vmovdqu %%ymm0,0x20(%[dst_argb]) \n" \ + "lea 0x40(%[dst_argb]), %[dst_argb] \n" + +// Store 16 AR30 values. +#define STOREAR30_AVX2 \ + "vpsraw $0x4,%%ymm0,%%ymm0 \n" \ + "vpsraw $0x4,%%ymm1,%%ymm1 \n" \ + "vpsraw $0x4,%%ymm2,%%ymm2 \n" \ + "vpminsw %%ymm7,%%ymm0,%%ymm0 \n" \ + "vpminsw %%ymm7,%%ymm1,%%ymm1 \n" \ + "vpminsw %%ymm7,%%ymm2,%%ymm2 \n" \ + "vpmaxsw %%ymm6,%%ymm0,%%ymm0 \n" \ + "vpmaxsw %%ymm6,%%ymm1,%%ymm1 \n" \ + "vpmaxsw %%ymm6,%%ymm2,%%ymm2 \n" \ + "vpsllw $0x4,%%ymm2,%%ymm2 \n" \ + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ + "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ + "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ + "vpunpckhwd %%ymm2,%%ymm0,%%ymm3 \n" \ + "vpunpcklwd %%ymm2,%%ymm0,%%ymm0 \n" \ + "vpunpckhwd %%ymm5,%%ymm1,%%ymm2 \n" \ + "vpunpcklwd %%ymm5,%%ymm1,%%ymm1 \n" \ + "vpslld $0xa,%%ymm1,%%ymm1 \n" \ + "vpslld $0xa,%%ymm2,%%ymm2 \n" \ + "vpor %%ymm1,%%ymm0,%%ymm0 \n" \ + "vpor %%ymm2,%%ymm3,%%ymm3 \n" \ + "vmovdqu %%ymm0,(%[dst_ar30]) \n" \ + "vmovdqu %%ymm3,0x20(%[dst_ar30]) \n" \ + "lea 0x40(%[dst_ar30]), %[dst_ar30] \n" #ifdef HAS_I444TOARGBROW_AVX2 // 16 pixels // 16 UV values with 16 Y producing 16 ARGB (64 bytes). -void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, +void OMITFP I444ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + LABELALIGN - "1: \n" + "1: \n" READYUV444_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 @@ -2157,65 +2607,34 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_I444TOARGBROW_AVX2 -#ifdef HAS_I411TOARGBROW_AVX2 -// 16 pixels -// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -void OMITFP I411ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - YUVTORGB_SETUP_AVX2(yuvconstants) - "sub %[u_buf],%[v_buf] \n" - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - LABELALIGN - "1: \n" - READYUV411_AVX2 - YUVTORGB_AVX2(yuvconstants) - STOREARGB_AVX2 - "sub $0x10,%[width] \n" - "jg 1b \n" - "vzeroupper \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); -} -#endif // HAS_I411TOARGBROW_AVX2 - #if defined(HAS_I422TOARGBROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, +void OMITFP I422ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + LABELALIGN - "1: \n" + "1: \n" READYUV422_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 "sub $0x10,%[width] \n" "jg 1b \n" + "vzeroupper \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] @@ -2223,27 +2642,144 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_I422TOARGBROW_AVX2 -#if defined(HAS_I422ALPHATOARGBROW_AVX2) +#if defined(HAS_I422TOAR30ROW_AVX2) // 16 pixels -// 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. -void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes). +void OMITFP I422ToAR30Row_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants + "vpsrlw $14,%%ymm5,%%ymm5 \n" + "vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits + "vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min + "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max + "vpsrlw $6,%%ymm7,%%ymm7 \n" + LABELALIGN - "1: \n" + "1: \n" + READYUV422_AVX2 + YUVTORGB16_AVX2(yuvconstants) + STOREAR30_AVX2 + "sub $0x10,%[width] \n" + "jg 1b \n" + + "vzeroupper \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_ar30]"+r"(dst_ar30), // %[dst_ar30] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", YUVTORGB_REGS_AVX2 + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" + ); +} +#endif // HAS_I422TOAR30ROW_AVX2 + +#if defined(HAS_I210TOARGBROW_AVX2) +// 16 pixels +// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). +void OMITFP I210ToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + + LABELALIGN + "1: \n" + READYUV210_AVX2 + YUVTORGB_AVX2(yuvconstants) + STOREARGB_AVX2 + "sub $0x10,%[width] \n" + "jg 1b \n" + + "vzeroupper \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_argb]"+r"(dst_argb), // %[dst_argb] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", YUVTORGB_REGS_AVX2 + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + ); +} +#endif // HAS_I210TOARGBROW_AVX2 + +#if defined(HAS_I210TOAR30ROW_AVX2) +// 16 pixels +// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes). +void OMITFP I210ToAR30Row_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants + "vpsrlw $14,%%ymm5,%%ymm5 \n" + "vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits + "vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min + "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max + "vpsrlw $6,%%ymm7,%%ymm7 \n" + + LABELALIGN + "1: \n" + READYUV210_AVX2 + YUVTORGB16_AVX2(yuvconstants) + STOREAR30_AVX2 + "sub $0x10,%[width] \n" + "jg 1b \n" + + "vzeroupper \n" + : [y_buf]"+r"(y_buf), // %[y_buf] + [u_buf]"+r"(u_buf), // %[u_buf] + [v_buf]"+r"(v_buf), // %[v_buf] + [dst_ar30]"+r"(dst_ar30), // %[dst_ar30] + [width]"+rm"(width) // %[width] + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] + : "memory", "cc", YUVTORGB_REGS_AVX2 + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" + ); +} +#endif // HAS_I210TOAR30ROW_AVX2 + +#if defined(HAS_I422ALPHATOARGBROW_AVX2) +// 16 pixels +// 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. +void OMITFP I422AlphaToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + // clang-format off + asm volatile ( + YUVTORGB_SETUP_AVX2(yuvconstants) + "sub %[u_buf],%[v_buf] \n" + + LABELALIGN + "1: \n" READYUVA422_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 @@ -2255,33 +2791,35 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, [v_buf]"+r"(v_buf), // %[v_buf] [a_buf]"+r"(a_buf), // %[a_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] -#if defined(__i386__) && defined(__pic__) +#if defined(__i386__) [width]"+m"(width) // %[width] #else [width]"+rm"(width) // %[width] #endif : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); + // clang-format on } #endif // HAS_I422ALPHATOARGBROW_AVX2 #if defined(HAS_I422TORGBAROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). -void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, +void OMITFP I422ToRGBARow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + LABELALIGN - "1: \n" + "1: \n" READYUV422_AVX2 YUVTORGB_AVX2(yuvconstants) @@ -2292,11 +2830,11 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, "vpermq $0xd8,%%ymm2,%%ymm2 \n" "vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n" "vpunpckhwd %%ymm1,%%ymm2,%%ymm1 \n" - "vmovdqu %%ymm0," MEMACCESS([dst_argb]) "\n" - "vmovdqu %%ymm1," MEMACCESS2(0x20,[dst_argb]) "\n" - "lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n" - "sub $0x10,%[width] \n" - "jg 1b \n" + "vmovdqu %%ymm0,(%[dst_argb]) \n" + "vmovdqu %%ymm1,0x20(%[dst_argb]) \n" + "lea 0x40(%[dst_argb]),%[dst_argb] \n" + "sub $0x10,%[width] \n" + "jg 1b \n" "vzeroupper \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] @@ -2304,7 +2842,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } @@ -2313,16 +2851,18 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, #if defined(HAS_NV12TOARGBROW_AVX2) // 16 pixels. // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, +void OMITFP NV12ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { + // clang-format off asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + LABELALIGN - "1: \n" + "1: \n" READNV12_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 @@ -2334,25 +2874,28 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] - : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); + // clang-format on } #endif // HAS_NV12TOARGBROW_AVX2 #if defined(HAS_NV21TOARGBROW_AVX2) // 16 pixels. // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, - const uint8* vu_buf, - uint8* dst_argb, +void OMITFP NV21ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* vu_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { + // clang-format off asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + LABELALIGN - "1: \n" + "1: \n" READNV21_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 @@ -2365,24 +2908,27 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleNV21]"m"(kShuffleNV21) - : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); + // clang-format on } #endif // HAS_NV21TOARGBROW_AVX2 #if defined(HAS_YUY2TOARGBROW_AVX2) // 16 pixels. // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). -void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, - uint8* dst_argb, +void OMITFP YUY2ToARGBRow_AVX2(const uint8_t* yuy2_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { + // clang-format off asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + LABELALIGN - "1: \n" + "1: \n" READYUY2_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 @@ -2395,24 +2941,27 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleYUY2Y]"m"(kShuffleYUY2Y), [kShuffleYUY2UV]"m"(kShuffleYUY2UV) - : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); + // clang-format on } #endif // HAS_YUY2TOARGBROW_AVX2 #if defined(HAS_UYVYTOARGBROW_AVX2) // 16 pixels. // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). -void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, - uint8* dst_argb, +void OMITFP UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { + // clang-format off asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + LABELALIGN - "1: \n" + "1: \n" READUYVY_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 @@ -2425,1131 +2974,1603 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleUYVYY]"m"(kShuffleUYVYY), [kShuffleUYVYUV]"m"(kShuffleUYVYUV) - : "memory", "cc", YUVTORGB_REGS_AVX2 // Does not use r14. + : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); + // clang-format on } #endif // HAS_UYVYTOARGBROW_AVX2 #ifdef HAS_I400TOARGBROW_SSE2 -void I400ToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) { - asm volatile ( - "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164 - "movd %%eax,%%xmm2 \n" - "pshufd $0x0,%%xmm2,%%xmm2 \n" - "mov $0x04880488,%%eax \n" // 0488 = 1160 = 1.164 * 16 - "movd %%eax,%%xmm3 \n" - "pshufd $0x0,%%xmm3,%%xmm3 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "pslld $0x18,%%xmm4 \n" - LABELALIGN - "1: \n" - // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 - "movq " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "psubusw %%xmm3,%%xmm0 \n" - "psrlw $6, %%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" +void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width) { + asm volatile( + "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164 + "movd %%eax,%%xmm2 \n" + "pshufd $0x0,%%xmm2,%%xmm2 \n" + "mov $0x04880488,%%eax \n" // 0488 = 1160 = 1.164 * + // 16 + "movd %%eax,%%xmm3 \n" + "pshufd $0x0,%%xmm3,%%xmm3 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "pslld $0x18,%%xmm4 \n" - // Step 2: Weave into ARGB - "punpcklbw %%xmm0,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm0,%%xmm0 \n" - "punpckhwd %%xmm1,%%xmm1 \n" - "por %%xmm4,%%xmm0 \n" - "por %%xmm4,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" + LABELALIGN + "1: \n" + // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 + "movq (%0),%%xmm0 \n" + "lea 0x8(%0),%0 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "psubusw %%xmm3,%%xmm0 \n" + "psrlw $6, %%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(y_buf), // %0 - "+r"(dst_argb), // %1 - "+rm"(width) // %2 - : - : "memory", "cc", "eax" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" - ); + // Step 2: Weave into ARGB + "punpcklbw %%xmm0,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklwd %%xmm0,%%xmm0 \n" + "punpckhwd %%xmm1,%%xmm1 \n" + "por %%xmm4,%%xmm0 \n" + "por %%xmm4,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(y_buf), // %0 + "+r"(dst_argb), // %1 + "+rm"(width) // %2 + : + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); } #endif // HAS_I400TOARGBROW_SSE2 #ifdef HAS_I400TOARGBROW_AVX2 // 16 pixels of Y converted to 16 pixels of ARGB (64 bytes). // note: vpunpcklbw mutates and vpackuswb unmutates. -void I400ToARGBRow_AVX2(const uint8* y_buf, uint8* dst_argb, int width) { - asm volatile ( - "mov $0x4a354a35,%%eax \n" // 0488 = 1160 = 1.164 * 16 - "vmovd %%eax,%%xmm2 \n" - "vbroadcastss %%xmm2,%%ymm2 \n" - "mov $0x4880488,%%eax \n" // 4a35 = 18997 = 1.164 - "vmovd %%eax,%%xmm3 \n" - "vbroadcastss %%xmm3,%%ymm3 \n" - "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" - "vpslld $0x18,%%ymm4,%%ymm4 \n" +void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width) { + asm volatile( + "mov $0x4a354a35,%%eax \n" // 0488 = 1160 = 1.164 * + // 16 + "vmovd %%eax,%%xmm2 \n" + "vbroadcastss %%xmm2,%%ymm2 \n" + "mov $0x4880488,%%eax \n" // 4a35 = 18997 = 1.164 + "vmovd %%eax,%%xmm3 \n" + "vbroadcastss %%xmm3,%%ymm3 \n" + "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" + "vpslld $0x18,%%ymm4,%%ymm4 \n" - LABELALIGN - "1: \n" - // Step 1: Scale Y contribution to 16 G values. G = (y - 16) * 1.164 - "vmovdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" - "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" - "vpsubusw %%ymm3,%%ymm0,%%ymm0 \n" - "vpsrlw $0x6,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" - "vpunpcklbw %%ymm0,%%ymm0,%%ymm1 \n" - "vpermq $0xd8,%%ymm1,%%ymm1 \n" - "vpunpcklwd %%ymm1,%%ymm1,%%ymm0 \n" - "vpunpckhwd %%ymm1,%%ymm1,%%ymm1 \n" - "vpor %%ymm4,%%ymm0,%%ymm0 \n" - "vpor %%ymm4,%%ymm1,%%ymm1 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(y_buf), // %0 - "+r"(dst_argb), // %1 - "+rm"(width) // %2 - : - : "memory", "cc", "eax" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" - ); + LABELALIGN + "1: \n" + // Step 1: Scale Y contribution to 16 G values. G = (y - 16) * 1.164 + "vmovdqu (%0),%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpsubusw %%ymm3,%%ymm0,%%ymm0 \n" + "vpsrlw $0x6,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" + "vpunpcklbw %%ymm0,%%ymm0,%%ymm1 \n" + "vpermq $0xd8,%%ymm1,%%ymm1 \n" + "vpunpcklwd %%ymm1,%%ymm1,%%ymm0 \n" + "vpunpckhwd %%ymm1,%%ymm1,%%ymm1 \n" + "vpor %%ymm4,%%ymm0,%%ymm0 \n" + "vpor %%ymm4,%%ymm1,%%ymm1 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(y_buf), // %0 + "+r"(dst_argb), // %1 + "+rm"(width) // %2 + : + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); } #endif // HAS_I400TOARGBROW_AVX2 #ifdef HAS_MIRRORROW_SSSE3 // Shuffle table for reversing the bytes. -static uvec8 kShuffleMirror = { - 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u -}; +static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, + 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; -void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { +void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) { intptr_t temp_width = (intptr_t)(width); - asm volatile ( - "movdqa %3,%%xmm5 \n" - LABELALIGN - "1: \n" - MEMOPREG(movdqu,-0x10,0,2,1,xmm0) // movdqu -0x10(%0,%2),%%xmm0 - "pshufb %%xmm5,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(temp_width) // %2 - : "m"(kShuffleMirror) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm5" - ); + asm volatile( + + "movdqa %3,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu -0x10(%0,%2,1),%%xmm0 \n" + "pshufb %%xmm5,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(temp_width) // %2 + : "m"(kShuffleMirror) // %3 + : "memory", "cc", "xmm0", "xmm5"); } #endif // HAS_MIRRORROW_SSSE3 #ifdef HAS_MIRRORROW_AVX2 -void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { +void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { intptr_t temp_width = (intptr_t)(width); - asm volatile ( - "vbroadcastf128 %3,%%ymm5 \n" - LABELALIGN - "1: \n" - MEMOPREG(vmovdqu,-0x20,0,2,1,ymm0) // vmovdqu -0x20(%0,%2),%%ymm0 - "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" - "vpermq $0x4e,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(temp_width) // %2 - : "m"(kShuffleMirror) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm5" - ); + asm volatile( + + "vbroadcastf128 %3,%%ymm5 \n" + + LABELALIGN + "1: \n" + "vmovdqu -0x20(%0,%2,1),%%ymm0 \n" + "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" + "vpermq $0x4e,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(temp_width) // %2 + : "m"(kShuffleMirror) // %3 + : "memory", "cc", "xmm0", "xmm5"); } #endif // HAS_MIRRORROW_AVX2 #ifdef HAS_MIRRORUVROW_SSSE3 // Shuffle table for reversing the bytes of UV channels. -static uvec8 kShuffleMirrorUV = { - 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u -}; -void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, +static const uvec8 kShuffleMirrorUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, + 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u}; +void MirrorUVRow_SSSE3(const uint8_t* src, + uint8_t* dst_u, + uint8_t* dst_v, int width) { intptr_t temp_width = (intptr_t)(width); - asm volatile ( - "movdqa %4,%%xmm1 \n" - "lea " MEMLEA4(-0x10,0,3,2) ",%0 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(-0x10,0) ",%0 \n" - "pshufb %%xmm1,%%xmm0 \n" - "movlpd %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movhpd,xmm0,0x00,1,2,1) // movhpd %%xmm0,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $8,%3 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(temp_width) // %3 - : "m"(kShuffleMirrorUV) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1" - ); + asm volatile( + "movdqa %4,%%xmm1 \n" + "lea -0x10(%0,%3,2),%0 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "lea -0x10(%0),%0 \n" + "pshufb %%xmm1,%%xmm0 \n" + "movlpd %%xmm0,(%1) \n" + "movhpd %%xmm0,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $8,%3 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(temp_width) // %3 + : "m"(kShuffleMirrorUV) // %4 + : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_MIRRORUVROW_SSSE3 #ifdef HAS_ARGBMIRRORROW_SSE2 -void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width) { +void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { intptr_t temp_width = (intptr_t)(width); - asm volatile ( - "lea " MEMLEA4(-0x10,0,2,4) ",%0 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "pshufd $0x1b,%%xmm0,%%xmm0 \n" - "lea " MEMLEA(-0x10,0) ",%0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(temp_width) // %2 - : - : "memory", "cc" - , "xmm0" - ); + asm volatile( + + "lea -0x10(%0,%2,4),%0 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "pshufd $0x1b,%%xmm0,%%xmm0 \n" + "lea -0x10(%0),%0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(temp_width) // %2 + : + : "memory", "cc", "xmm0"); } #endif // HAS_ARGBMIRRORROW_SSE2 #ifdef HAS_ARGBMIRRORROW_AVX2 // Shuffle table for reversing the bytes. -static const ulvec32 kARGBShuffleMirror_AVX2 = { - 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u -}; -void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) { +static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; +void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { intptr_t temp_width = (intptr_t)(width); - asm volatile ( - "vmovdqu %3,%%ymm5 \n" - LABELALIGN - "1: \n" - VMEMOPREG(vpermd,-0x20,0,2,4,ymm5,ymm0) // vpermd -0x20(%0,%2,4),ymm5,ymm0 - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(temp_width) // %2 - : "m"(kARGBShuffleMirror_AVX2) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm5" - ); + asm volatile( + + "vmovdqu %3,%%ymm5 \n" + + LABELALIGN + "1: \n" + "vpermd -0x20(%0,%2,4),%%ymm5,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(temp_width) // %2 + : "m"(kARGBShuffleMirror_AVX2) // %3 + : "memory", "cc", "xmm0", "xmm5"); } #endif // HAS_ARGBMIRRORROW_AVX2 #ifdef HAS_SPLITUVROW_AVX2 -void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, +void SplitUVRow_AVX2(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsrlw $0x8,%%ymm5,%%ymm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpsrlw $0x8,%%ymm0,%%ymm2 \n" - "vpsrlw $0x8,%%ymm1,%%ymm3 \n" - "vpand %%ymm5,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm3,%%ymm2,%%ymm2 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm2,%%ymm2 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - MEMOPMEM(vmovdqu,ymm2,0x00,1,2,1) // vmovdqu %%ymm2,(%1,%2) - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_uv), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsrlw $0x8,%%ymm5,%%ymm5 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpsrlw $0x8,%%ymm0,%%ymm2 \n" + "vpsrlw $0x8,%%ymm1,%%ymm3 \n" + "vpand %%ymm5,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm3,%%ymm2,%%ymm2 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm2,%%ymm2 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm2,0x00(%1,%2,1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_uv), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SPLITUVROW_AVX2 #ifdef HAS_SPLITUVROW_SSE2 -void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, +void SplitUVRow_SSE2(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "movdqa %%xmm1,%%xmm3 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "psrlw $0x8,%%xmm2 \n" - "psrlw $0x8,%%xmm3 \n" - "packuswb %%xmm3,%%xmm2 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movdqu,xmm2,0x00,1,2,1) // movdqu %%xmm2,(%1,%2) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_uv), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "movdqa %%xmm1,%%xmm3 \n" + "pand %%xmm5,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "psrlw $0x8,%%xmm2 \n" + "psrlw $0x8,%%xmm3 \n" + "packuswb %%xmm3,%%xmm2 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm2,0x00(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_uv), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SPLITUVROW_SSE2 #ifdef HAS_MERGEUVROW_AVX2 -void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, +void MergeUVRow_AVX2(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width) { - asm volatile ( - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - MEMOPREG(vmovdqu,0x00,0,1,1,ymm1) // vmovdqu (%0,%1,1),%%ymm1 - "lea " MEMLEA(0x20,0) ",%0 \n" - "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n" - "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n" - "vextractf128 $0x0,%%ymm2," MEMACCESS(2) " \n" - "vextractf128 $0x0,%%ymm0," MEMACCESS2(0x10,2) "\n" - "vextractf128 $0x1,%%ymm2," MEMACCESS2(0x20,2) "\n" - "vextractf128 $0x1,%%ymm0," MEMACCESS2(0x30,2) "\n" - "lea " MEMLEA(0x40,2) ",%2 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_u), // %0 - "+r"(src_v), // %1 - "+r"(dst_uv), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2" - ); + asm volatile( + + "sub %0,%1 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x00(%0,%1,1),%%ymm1 \n" + "lea 0x20(%0),%0 \n" + "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n" + "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n" + "vextractf128 $0x0,%%ymm2,(%2) \n" + "vextractf128 $0x0,%%ymm0,0x10(%2) \n" + "vextractf128 $0x1,%%ymm2,0x20(%2) \n" + "vextractf128 $0x1,%%ymm0,0x30(%2) \n" + "lea 0x40(%2),%2 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_u), // %0 + "+r"(src_v), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_MERGEUVROW_AVX2 #ifdef HAS_MERGEUVROW_SSE2 -void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, +void MergeUVRow_SSE2(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width) { - asm volatile ( - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "punpcklbw %%xmm1,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm2 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "movdqu %%xmm2," MEMACCESS2(0x10,2) " \n" - "lea " MEMLEA(0x20,2) ",%2 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_u), // %0 - "+r"(src_v), // %1 - "+r"(dst_uv), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2" - ); + asm volatile( + + "sub %0,%1 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%1,1),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "punpcklbw %%xmm1,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm2 \n" + "movdqu %%xmm0,(%2) \n" + "movdqu %%xmm2,0x10(%2) \n" + "lea 0x20(%2),%2 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_u), // %0 + "+r"(src_v), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_MERGEUVROW_SSE2 -#ifdef HAS_COPYROW_SSE2 -void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 128 = 9 bits +// 64 = 10 bits +// 16 = 12 bits +// 1 = 16 bits +#ifdef HAS_MERGEUVROW_16_AVX2 +void MergeUVRow_16_AVX2(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, + int scale, + int width) { + // clang-format off asm volatile ( - "test $0xf,%0 \n" - "jne 2f \n" - "test $0xf,%1 \n" - "jne 2f \n" + "vmovd %4,%%xmm3 \n" + "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n" + "vbroadcastss %%xmm3,%%ymm3 \n" + "sub %0,%1 \n" + + // 16 pixels per loop. LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu (%0,%1,1),%%ymm1 \n" + "add $0x20,%0 \n" + + "vpmullw %%ymm3,%%ymm0,%%ymm0 \n" + "vpmullw %%ymm3,%%ymm1,%%ymm1 \n" + "vpunpcklwd %%ymm1,%%ymm0,%%ymm2 \n" // mutates + "vpunpckhwd %%ymm1,%%ymm0,%%ymm0 \n" + "vextractf128 $0x0,%%ymm2,(%2) \n" + "vextractf128 $0x0,%%ymm0,0x10(%2) \n" + "vextractf128 $0x1,%%ymm2,0x20(%2) \n" + "vextractf128 $0x1,%%ymm0,0x30(%2) \n" + "add $0x40,%2 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_u), // %0 + "+r"(src_v), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 + : "r"(scale) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3"); + // clang-format on +} +#endif // HAS_MERGEUVROW_AVX2 + +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 128 = 9 bits +// 64 = 10 bits +// 16 = 12 bits +// 1 = 16 bits +#ifdef HAS_MULTIPLYROW_16_AVX2 +void MultiplyRow_16_AVX2(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width) { + // clang-format off + asm volatile ( + "vmovd %3,%%xmm3 \n" + "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n" + "vbroadcastss %%xmm3,%%ymm3 \n" + "sub %0,%1 \n" + + // 16 pixels per loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vpmullw %%ymm3,%%ymm0,%%ymm0 \n" + "vpmullw %%ymm3,%%ymm1,%%ymm1 \n" + "vmovdqu %%ymm0,(%0,%1) \n" + "vmovdqu %%ymm1,0x20(%0,%1) \n" + "add $0x40,%0 \n" "sub $0x20,%2 \n" "jg 1b \n" - "jmp 9f \n" + "vzeroupper \n" + : "+r"(src_y), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(scale) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm3"); + // clang-format on +} +#endif // HAS_MULTIPLYROW_16_AVX2 + +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 32768 = 9 bits +// 16384 = 10 bits +// 4096 = 12 bits +// 256 = 16 bits +void Convert16To8Row_SSSE3(const uint16_t* src_y, + uint8_t* dst_y, + int scale, + int width) { + // clang-format off + asm volatile ( + "movd %3,%%xmm2 \n" + "punpcklwd %%xmm2,%%xmm2 \n" + "pshufd $0x0,%%xmm2,%%xmm2 \n" + + // 32 pixels per loop. LABELALIGN - "2: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "add $0x20,%0 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "pmulhuw %%xmm2,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "add $0x10,%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_y), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(scale) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2"); + // clang-format on +} + +#ifdef HAS_CONVERT16TO8ROW_AVX2 +void Convert16To8Row_AVX2(const uint16_t* src_y, + uint8_t* dst_y, + int scale, + int width) { + // clang-format off + asm volatile ( + "vmovd %3,%%xmm2 \n" + "vpunpcklwd %%xmm2,%%xmm2,%%xmm2 \n" + "vbroadcastss %%xmm2,%%ymm2 \n" + + // 32 pixels per loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "add $0x40,%0 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm2,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" // mutates + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "add $0x20,%1 \n" "sub $0x20,%2 \n" - "jg 2b \n" - "9: \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(count) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1" - ); + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_y), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(scale) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2"); + // clang-format on +} +#endif // HAS_CONVERT16TO8ROW_AVX2 + +// Use scale to convert to lsb formats depending how many bits there are: +// 512 = 9 bits +// 1024 = 10 bits +// 4096 = 12 bits +// TODO(fbarchard): reduce to SSE2 +void Convert8To16Row_SSE2(const uint8_t* src_y, + uint16_t* dst_y, + int scale, + int width) { + // clang-format off + asm volatile ( + "movd %3,%%xmm2 \n" + "punpcklwd %%xmm2,%%xmm2 \n" + "pshufd $0x0,%%xmm2,%%xmm2 \n" + + // 32 pixels per loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm1 \n" + "add $0x10,%0 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "pmulhuw %%xmm2,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "add $0x20,%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_y), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(scale) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2"); + // clang-format on +} + +#ifdef HAS_CONVERT8TO16ROW_AVX2 +void Convert8To16Row_AVX2(const uint8_t* src_y, + uint16_t* dst_y, + int scale, + int width) { + // clang-format off + asm volatile ( + "vmovd %3,%%xmm2 \n" + "vpunpcklwd %%xmm2,%%xmm2,%%xmm2 \n" + "vbroadcastss %%xmm2,%%ymm2 \n" + + // 32 pixels per loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "add $0x20,%0 \n" + "vpunpckhbw %%ymm0,%%ymm0,%%ymm1 \n" + "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm2,%%ymm1,%%ymm1 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "add $0x40,%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_y), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(scale) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2"); + // clang-format on +} +#endif // HAS_CONVERT8TO16ROW_AVX2 + +#ifdef HAS_SPLITRGBROW_SSSE3 + +// Shuffle table for converting RGB to Planar. +static const uvec8 kShuffleMaskRGBToR0 = {0u, 3u, 6u, 9u, 12u, 15u, + 128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u}; +static const uvec8 kShuffleMaskRGBToR1 = {128u, 128u, 128u, 128u, 128u, 128u, + 2u, 5u, 8u, 11u, 14u, 128u, + 128u, 128u, 128u, 128u}; +static const uvec8 kShuffleMaskRGBToR2 = {128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u, 128u, 1u, + 4u, 7u, 10u, 13u}; + +static const uvec8 kShuffleMaskRGBToG0 = {1u, 4u, 7u, 10u, 13u, 128u, + 128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u}; +static const uvec8 kShuffleMaskRGBToG1 = {128u, 128u, 128u, 128u, 128u, 0u, + 3u, 6u, 9u, 12u, 15u, 128u, + 128u, 128u, 128u, 128u}; +static const uvec8 kShuffleMaskRGBToG2 = {128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u, 128u, 2u, + 5u, 8u, 11u, 14u}; + +static const uvec8 kShuffleMaskRGBToB0 = {2u, 5u, 8u, 11u, 14u, 128u, + 128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u}; +static const uvec8 kShuffleMaskRGBToB1 = {128u, 128u, 128u, 128u, 128u, 1u, + 4u, 7u, 10u, 13u, 128u, 128u, + 128u, 128u, 128u, 128u}; +static const uvec8 kShuffleMaskRGBToB2 = {128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u, 0u, 3u, + 6u, 9u, 12u, 15u}; + +void SplitRGBRow_SSSE3(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width) { + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "pshufb %5, %%xmm0 \n" + "pshufb %6, %%xmm1 \n" + "pshufb %7, %%xmm2 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "pshufb %8, %%xmm0 \n" + "pshufb %9, %%xmm1 \n" + "pshufb %10, %%xmm2 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "pshufb %11, %%xmm0 \n" + "pshufb %12, %%xmm1 \n" + "pshufb %13, %%xmm2 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%3) \n" + "lea 0x10(%3),%3 \n" + "lea 0x30(%0),%0 \n" + "sub $0x10,%4 \n" + "jg 1b \n" + : "+r"(src_rgb), // %0 + "+r"(dst_r), // %1 + "+r"(dst_g), // %2 + "+r"(dst_b), // %3 + "+r"(width) // %4 + : "m"(kShuffleMaskRGBToR0), // %5 + "m"(kShuffleMaskRGBToR1), // %6 + "m"(kShuffleMaskRGBToR2), // %7 + "m"(kShuffleMaskRGBToG0), // %8 + "m"(kShuffleMaskRGBToG1), // %9 + "m"(kShuffleMaskRGBToG2), // %10 + "m"(kShuffleMaskRGBToB0), // %11 + "m"(kShuffleMaskRGBToB1), // %12 + "m"(kShuffleMaskRGBToB2) // %13 + : "memory", "cc", "xmm0", "xmm1", "xmm2"); +} +#endif // HAS_SPLITRGBROW_SSSE3 + +#ifdef HAS_MERGERGBROW_SSSE3 + +// Shuffle table for converting RGB to Planar. +static const uvec8 kShuffleMaskRToRGB0 = {0u, 128u, 128u, 1u, 128u, 128u, + 2u, 128u, 128u, 3u, 128u, 128u, + 4u, 128u, 128u, 5u}; +static const uvec8 kShuffleMaskGToRGB0 = {128u, 0u, 128u, 128u, 1u, 128u, + 128u, 2u, 128u, 128u, 3u, 128u, + 128u, 4u, 128u, 128u}; +static const uvec8 kShuffleMaskBToRGB0 = {128u, 128u, 0u, 128u, 128u, 1u, + 128u, 128u, 2u, 128u, 128u, 3u, + 128u, 128u, 4u, 128u}; + +static const uvec8 kShuffleMaskGToRGB1 = {5u, 128u, 128u, 6u, 128u, 128u, + 7u, 128u, 128u, 8u, 128u, 128u, + 9u, 128u, 128u, 10u}; +static const uvec8 kShuffleMaskBToRGB1 = {128u, 5u, 128u, 128u, 6u, 128u, + 128u, 7u, 128u, 128u, 8u, 128u, + 128u, 9u, 128u, 128u}; +static const uvec8 kShuffleMaskRToRGB1 = {128u, 128u, 6u, 128u, 128u, 7u, + 128u, 128u, 8u, 128u, 128u, 9u, + 128u, 128u, 10u, 128u}; + +static const uvec8 kShuffleMaskBToRGB2 = {10u, 128u, 128u, 11u, 128u, 128u, + 12u, 128u, 128u, 13u, 128u, 128u, + 14u, 128u, 128u, 15u}; +static const uvec8 kShuffleMaskRToRGB2 = {128u, 11u, 128u, 128u, 12u, 128u, + 128u, 13u, 128u, 128u, 14u, 128u, + 128u, 15u, 128u, 128u}; +static const uvec8 kShuffleMaskGToRGB2 = {128u, 128u, 11u, 128u, 128u, 12u, + 128u, 128u, 13u, 128u, 128u, 14u, + 128u, 128u, 15u, 128u}; + +void MergeRGBRow_SSSE3(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, + int width) { + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu (%1),%%xmm1 \n" + "movdqu (%2),%%xmm2 \n" + "pshufb %5, %%xmm0 \n" + "pshufb %6, %%xmm1 \n" + "pshufb %7, %%xmm2 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%3) \n" + + "movdqu (%0),%%xmm0 \n" + "movdqu (%1),%%xmm1 \n" + "movdqu (%2),%%xmm2 \n" + "pshufb %8, %%xmm0 \n" + "pshufb %9, %%xmm1 \n" + "pshufb %10, %%xmm2 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,16(%3) \n" + + "movdqu (%0),%%xmm0 \n" + "movdqu (%1),%%xmm1 \n" + "movdqu (%2),%%xmm2 \n" + "pshufb %11, %%xmm0 \n" + "pshufb %12, %%xmm1 \n" + "pshufb %13, %%xmm2 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,32(%3) \n" + + "lea 0x10(%0),%0 \n" + "lea 0x10(%1),%1 \n" + "lea 0x10(%2),%2 \n" + "lea 0x30(%3),%3 \n" + "sub $0x10,%4 \n" + "jg 1b \n" + : "+r"(src_r), // %0 + "+r"(src_g), // %1 + "+r"(src_b), // %2 + "+r"(dst_rgb), // %3 + "+r"(width) // %4 + : "m"(kShuffleMaskRToRGB0), // %5 + "m"(kShuffleMaskGToRGB0), // %6 + "m"(kShuffleMaskBToRGB0), // %7 + "m"(kShuffleMaskRToRGB1), // %8 + "m"(kShuffleMaskGToRGB1), // %9 + "m"(kShuffleMaskBToRGB1), // %10 + "m"(kShuffleMaskRToRGB2), // %11 + "m"(kShuffleMaskGToRGB2), // %12 + "m"(kShuffleMaskBToRGB2) // %13 + : "memory", "cc", "xmm0", "xmm1", "xmm2"); +} +#endif // HAS_MERGERGBROW_SSSE3 + +#ifdef HAS_COPYROW_SSE2 +void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "test $0xf,%0 \n" + "jne 2f \n" + "test $0xf,%1 \n" + "jne 2f \n" + + LABELALIGN + "1: \n" + "movdqa (%0),%%xmm0 \n" + "movdqa 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "movdqa %%xmm0,(%1) \n" + "movdqa %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "jmp 9f \n" + + LABELALIGN + "2: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 2b \n" + + LABELALIGN "9: \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_COPYROW_SSE2 #ifdef HAS_COPYROW_AVX -void CopyRow_AVX(const uint8* src, uint8* dst, int count) { - asm volatile ( - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x40,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(count) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1" - ); +void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x40,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_COPYROW_AVX #ifdef HAS_COPYROW_ERMS // Multiple of 1. -void CopyRow_ERMS(const uint8* src, uint8* dst, int width) { +void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width) { size_t width_tmp = (size_t)(width); - asm volatile ( - "rep movsb " MEMMOVESTRING(0,1) " \n" - : "+S"(src), // %0 - "+D"(dst), // %1 - "+c"(width_tmp) // %2 - : - : "memory", "cc" - ); + asm volatile( + + "rep movsb \n" + : "+S"(src), // %0 + "+D"(dst), // %1 + "+c"(width_tmp) // %2 + : + : "memory", "cc"); } #endif // HAS_COPYROW_ERMS #ifdef HAS_ARGBCOPYALPHAROW_SSE2 // width in pixels -void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "pcmpeqb %%xmm0,%%xmm0 \n" - "pslld $0x18,%%xmm0 \n" - "pcmpeqb %%xmm1,%%xmm1 \n" - "psrld $0x8,%%xmm1 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm3 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqu " MEMACCESS(1) ",%%xmm4 \n" - "movdqu " MEMACCESS2(0x10,1) ",%%xmm5 \n" - "pand %%xmm0,%%xmm2 \n" - "pand %%xmm0,%%xmm3 \n" - "pand %%xmm1,%%xmm4 \n" - "pand %%xmm1,%%xmm5 \n" - "por %%xmm4,%%xmm2 \n" - "por %%xmm5,%%xmm3 \n" - "movdqu %%xmm2," MEMACCESS(1) " \n" - "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "pcmpeqb %%xmm0,%%xmm0 \n" + "pslld $0x18,%%xmm0 \n" + "pcmpeqb %%xmm1,%%xmm1 \n" + "psrld $0x8,%%xmm1 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm2 \n" + "movdqu 0x10(%0),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "movdqu (%1),%%xmm4 \n" + "movdqu 0x10(%1),%%xmm5 \n" + "pand %%xmm0,%%xmm2 \n" + "pand %%xmm0,%%xmm3 \n" + "pand %%xmm1,%%xmm4 \n" + "pand %%xmm1,%%xmm5 \n" + "por %%xmm4,%%xmm2 \n" + "por %%xmm5,%%xmm3 \n" + "movdqu %%xmm2,(%1) \n" + "movdqu %%xmm3,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBCOPYALPHAROW_SSE2 #ifdef HAS_ARGBCOPYALPHAROW_AVX2 // width in pixels -void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" - "vpsrld $0x8,%%ymm0,%%ymm0 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm1 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm2 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1 \n" - "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2 \n" - "vmovdqu %%ymm1," MEMACCESS(1) " \n" - "vmovdqu %%ymm2," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm2" - ); +void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" + "vpsrld $0x8,%%ymm0,%%ymm0 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm1 \n" + "vmovdqu 0x20(%0),%%ymm2 \n" + "lea 0x40(%0),%0 \n" + "vpblendvb %%ymm0,(%1),%%ymm1,%%ymm1 \n" + "vpblendvb %%ymm0,0x20(%1),%%ymm2,%%ymm2 \n" + "vmovdqu %%ymm1,(%1) \n" + "vmovdqu %%ymm2,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_ARGBCOPYALPHAROW_AVX2 #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 // width in pixels -void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ", %%xmm0 \n" - "movdqu " MEMACCESS2(0x10, 0) ", %%xmm1 \n" - "lea " MEMLEA(0x20, 0) ", %0 \n" - "psrld $0x18, %%xmm0 \n" - "psrld $0x18, %%xmm1 \n" - "packssdw %%xmm1, %%xmm0 \n" - "packuswb %%xmm0, %%xmm0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8, 1) ", %1 \n" - "sub $0x8, %2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_a), // %1 - "+rm"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1" - ); +void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_a, + int width) { + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0), %%xmm0 \n" + "movdqu 0x10(%0), %%xmm1 \n" + "lea 0x20(%0), %0 \n" + "psrld $0x18, %%xmm0 \n" + "psrld $0x18, %%xmm1 \n" + "packssdw %%xmm1, %%xmm0 \n" + "packuswb %%xmm0, %%xmm0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1), %1 \n" + "sub $0x8, %2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_a), // %1 + "+rm"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_ARGBEXTRACTALPHAROW_SSE2 +#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 +static const uvec8 kShuffleAlphaShort_AVX2 = { + 3u, 128u, 128u, 128u, 7u, 128u, 128u, 128u, + 11u, 128u, 128u, 128u, 15u, 128u, 128u, 128u}; + +void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_a, + int width) { + asm volatile( + "vmovdqa %3,%%ymm4 \n" + "vbroadcastf128 %4,%%ymm5 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0), %%ymm0 \n" + "vmovdqu 0x20(%0), %%ymm1 \n" + "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" // vpsrld $0x18, %%ymm0 + "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" + "vmovdqu 0x40(%0), %%ymm2 \n" + "vmovdqu 0x60(%0), %%ymm3 \n" + "lea 0x80(%0), %0 \n" + "vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates + "vpshufb %%ymm5,%%ymm2,%%ymm2 \n" + "vpshufb %%ymm5,%%ymm3,%%ymm3 \n" + "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // mutates + "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. + "vpermd %%ymm0,%%ymm4,%%ymm0 \n" // unmutate. + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20, %2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_a), // %1 + "+rm"(width) // %2 + : "m"(kPermdARGBToY_AVX), // %3 + "m"(kShuffleAlphaShort_AVX2) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); +} +#endif // HAS_ARGBEXTRACTALPHAROW_AVX2 + #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 // width in pixels -void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "pcmpeqb %%xmm0,%%xmm0 \n" - "pslld $0x18,%%xmm0 \n" - "pcmpeqb %%xmm1,%%xmm1 \n" - "psrld $0x8,%%xmm1 \n" - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm2 \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "punpcklbw %%xmm2,%%xmm2 \n" - "punpckhwd %%xmm2,%%xmm3 \n" - "punpcklwd %%xmm2,%%xmm2 \n" - "movdqu " MEMACCESS(1) ",%%xmm4 \n" - "movdqu " MEMACCESS2(0x10,1) ",%%xmm5 \n" - "pand %%xmm0,%%xmm2 \n" - "pand %%xmm0,%%xmm3 \n" - "pand %%xmm1,%%xmm4 \n" - "pand %%xmm1,%%xmm5 \n" - "por %%xmm4,%%xmm2 \n" - "por %%xmm5,%%xmm3 \n" - "movdqu %%xmm2," MEMACCESS(1) " \n" - "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "pcmpeqb %%xmm0,%%xmm0 \n" + "pslld $0x18,%%xmm0 \n" + "pcmpeqb %%xmm1,%%xmm1 \n" + "psrld $0x8,%%xmm1 \n" + + LABELALIGN + "1: \n" + "movq (%0),%%xmm2 \n" + "lea 0x8(%0),%0 \n" + "punpcklbw %%xmm2,%%xmm2 \n" + "punpckhwd %%xmm2,%%xmm3 \n" + "punpcklwd %%xmm2,%%xmm2 \n" + "movdqu (%1),%%xmm4 \n" + "movdqu 0x10(%1),%%xmm5 \n" + "pand %%xmm0,%%xmm2 \n" + "pand %%xmm0,%%xmm3 \n" + "pand %%xmm1,%%xmm4 \n" + "pand %%xmm1,%%xmm5 \n" + "por %%xmm4,%%xmm2 \n" + "por %%xmm5,%%xmm3 \n" + "movdqu %%xmm2,(%1) \n" + "movdqu %%xmm3,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBCOPYYTOALPHAROW_SSE2 #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 // width in pixels -void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" - "vpsrld $0x8,%%ymm0,%%ymm0 \n" - LABELALIGN - "1: \n" - "vpmovzxbd " MEMACCESS(0) ",%%ymm1 \n" - "vpmovzxbd " MEMACCESS2(0x8,0) ",%%ymm2 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "vpslld $0x18,%%ymm1,%%ymm1 \n" - "vpslld $0x18,%%ymm2,%%ymm2 \n" - "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1 \n" - "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2 \n" - "vmovdqu %%ymm1," MEMACCESS(1) " \n" - "vmovdqu %%ymm2," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm2" - ); +void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" + "vpsrld $0x8,%%ymm0,%%ymm0 \n" + + LABELALIGN + "1: \n" + "vpmovzxbd (%0),%%ymm1 \n" + "vpmovzxbd 0x8(%0),%%ymm2 \n" + "lea 0x10(%0),%0 \n" + "vpslld $0x18,%%ymm1,%%ymm1 \n" + "vpslld $0x18,%%ymm2,%%ymm2 \n" + "vpblendvb %%ymm0,(%1),%%ymm1,%%ymm1 \n" + "vpblendvb %%ymm0,0x20(%1),%%ymm2,%%ymm2 \n" + "vmovdqu %%ymm1,(%1) \n" + "vmovdqu %%ymm2,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_ARGBCOPYYTOALPHAROW_AVX2 #ifdef HAS_SETROW_X86 -void SetRow_X86(uint8* dst, uint8 v8, int width) { +void SetRow_X86(uint8_t* dst, uint8_t v8, int width) { size_t width_tmp = (size_t)(width >> 2); - const uint32 v32 = v8 * 0x01010101u; // Duplicate byte to all bytes. - asm volatile ( - "rep stosl " MEMSTORESTRING(eax,0) " \n" - : "+D"(dst), // %0 - "+c"(width_tmp) // %1 - : "a"(v32) // %2 - : "memory", "cc"); + const uint32_t v32 = v8 * 0x01010101u; // Duplicate byte to all bytes. + asm volatile( + + "rep stosl \n" + : "+D"(dst), // %0 + "+c"(width_tmp) // %1 + : "a"(v32) // %2 + : "memory", "cc"); } -void SetRow_ERMS(uint8* dst, uint8 v8, int width) { +void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) { size_t width_tmp = (size_t)(width); - asm volatile ( - "rep stosb " MEMSTORESTRING(al,0) " \n" - : "+D"(dst), // %0 - "+c"(width_tmp) // %1 - : "a"(v8) // %2 - : "memory", "cc"); + asm volatile( + + "rep stosb \n" + : "+D"(dst), // %0 + "+c"(width_tmp) // %1 + : "a"(v8) // %2 + : "memory", "cc"); } -void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int width) { +void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width) { size_t width_tmp = (size_t)(width); - asm volatile ( - "rep stosl " MEMSTORESTRING(eax,0) " \n" - : "+D"(dst_argb), // %0 - "+c"(width_tmp) // %1 - : "a"(v32) // %2 - : "memory", "cc"); + asm volatile( + + "rep stosl \n" + : "+D"(dst_argb), // %0 + "+c"(width_tmp) // %1 + : "a"(v32) // %2 + : "memory", "cc"); } #endif // HAS_SETROW_X86 #ifdef HAS_YUY2TOYROW_SSE2 -void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm5" - ); +void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "pand %%xmm5,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } -void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : "r"((intptr_t)(stride_yuy2)) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); +void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x00(%0,%4,1),%%xmm2 \n" + "movdqu 0x10(%0,%4,1),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "pavgb %%xmm2,%%xmm0 \n" + "pavgb %%xmm3,%%xmm1 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "pand %%xmm5,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm1 \n" + "movq %%xmm0,(%1) \n" + "movq %%xmm1,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"((intptr_t)(stride_yuy2)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } -void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); +void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "pand %%xmm5,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm1 \n" + "movq %%xmm0,(%1) \n" + "movq %%xmm1,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } -void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1" - ); +void UYVYToYRow_SSE2(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1"); } -void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : "r"((intptr_t)(stride_uyvy)) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); +void UYVYToUVRow_SSE2(const uint8_t* src_uyvy, + int stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x00(%0,%4,1),%%xmm2 \n" + "movdqu 0x10(%0,%4,1),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "pavgb %%xmm2,%%xmm0 \n" + "pavgb %%xmm3,%%xmm1 \n" + "pand %%xmm5,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "pand %%xmm5,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm1 \n" + "movq %%xmm0,(%1) \n" + "movq %%xmm1,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"((intptr_t)(stride_uyvy)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } -void UYVYToUV422Row_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); +void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrlw $0x8,%%xmm5 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "pand %%xmm5,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "pand %%xmm5,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm1 \n" + "movq %%xmm0,(%1) \n" + "movq %%xmm1,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_YUY2TOYROW_SSE2 #ifdef HAS_YUY2TOYROW_AVX2 -void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width) { - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsrlw $0x8,%%ymm5,%%ymm5 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpand %%ymm5,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm5" - ); +void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsrlw $0x8,%%ymm5,%%ymm5 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpand %%ymm5,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } -void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsrlw $0x8,%%ymm5,%%ymm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0 - VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1) - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm0,%%ymm1 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm1,%%ymm1 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n" - VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : "r"((intptr_t)(stride_yuy2)) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); +void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsrlw $0x8,%%ymm5,%%ymm5 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n" + "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm0,%%ymm1 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm1,%%ymm1 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vextractf128 $0x0,%%ymm1,(%1) \n" + "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"((intptr_t)(stride_yuy2)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } -void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsrlw $0x8,%%ymm5,%%ymm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm0,%%ymm1 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm1,%%ymm1 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n" - VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); +void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsrlw $0x8,%%ymm5,%%ymm5 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm0,%%ymm1 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm1,%%ymm1 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vextractf128 $0x0,%%ymm1,(%1) \n" + "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } -void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width) { - asm volatile ( - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm5" - ); -} -void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsrlw $0x8,%%ymm5,%%ymm5 \n" - "sub %1,%2 \n" +void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { + asm volatile( - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - VMEMOPREG(vpavgb,0x00,0,4,1,ymm0,ymm0) // vpavgb (%0,%4,1),%%ymm0,%%ymm0 - VMEMOPREG(vpavgb,0x20,0,4,1,ymm1,ymm1) - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpand %%ymm5,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm0,%%ymm1 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm1,%%ymm1 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n" - VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : "r"((intptr_t)(stride_uyvy)) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm5"); +} +void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, + int stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsrlw $0x8,%%ymm5,%%ymm5 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n" + "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpand %%ymm5,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm0,%%ymm1 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm1,%%ymm1 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vextractf128 $0x0,%%ymm1,(%1) \n" + "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"((intptr_t)(stride_uyvy)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } -void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsrlw $0x8,%%ymm5,%%ymm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpand %%ymm5,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm0,%%ymm1 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm1,%%ymm1 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n" - VEXTOPMEM(vextractf128,0,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); +void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsrlw $0x8,%%ymm5,%%ymm5 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpand %%ymm5,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm0,%%ymm1 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm1,%%ymm1 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vextractf128 $0x0,%%ymm1,(%1) \n" + "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_YUY2TOYROW_AVX2 #ifdef HAS_ARGBBLENDROW_SSSE3 // Shuffle table for isolating alpha. -static uvec8 kShuffleAlpha = { - 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, - 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80 -}; +static const uvec8 kShuffleAlpha = {3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, + 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80}; // Blend 8 pixels at a time -void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - "pcmpeqb %%xmm7,%%xmm7 \n" - "psrlw $0xf,%%xmm7 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "psrlw $0x8,%%xmm6 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "psllw $0x8,%%xmm5 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "pslld $0x18,%%xmm4 \n" - "sub $0x4,%3 \n" - "jl 49f \n" +void ARGBBlendRow_SSSE3(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( + "pcmpeqb %%xmm7,%%xmm7 \n" + "psrlw $0xf,%%xmm7 \n" + "pcmpeqb %%xmm6,%%xmm6 \n" + "psrlw $0x8,%%xmm6 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "psllw $0x8,%%xmm5 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "pslld $0x18,%%xmm4 \n" + "sub $0x4,%3 \n" + "jl 49f \n" - // 4 pixel loop. - LABELALIGN - "40: \n" - "movdqu " MEMACCESS(0) ",%%xmm3 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm3,%%xmm0 \n" - "pxor %%xmm4,%%xmm3 \n" - "movdqu " MEMACCESS(1) ",%%xmm2 \n" - "pshufb %4,%%xmm3 \n" - "pand %%xmm6,%%xmm2 \n" - "paddw %%xmm7,%%xmm3 \n" - "pmullw %%xmm3,%%xmm2 \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "psrlw $0x8,%%xmm1 \n" - "por %%xmm4,%%xmm0 \n" - "pmullw %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm2 \n" - "paddusb %%xmm2,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jge 40b \n" + // 4 pixel loop. + LABELALIGN + "40: \n" + "movdqu (%0),%%xmm3 \n" + "lea 0x10(%0),%0 \n" + "movdqa %%xmm3,%%xmm0 \n" + "pxor %%xmm4,%%xmm3 \n" + "movdqu (%1),%%xmm2 \n" + "pshufb %4,%%xmm3 \n" + "pand %%xmm6,%%xmm2 \n" + "paddw %%xmm7,%%xmm3 \n" + "pmullw %%xmm3,%%xmm2 \n" + "movdqu (%1),%%xmm1 \n" + "lea 0x10(%1),%1 \n" + "psrlw $0x8,%%xmm1 \n" + "por %%xmm4,%%xmm0 \n" + "pmullw %%xmm3,%%xmm1 \n" + "psrlw $0x8,%%xmm2 \n" + "paddusb %%xmm2,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "paddusb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jge 40b \n" - "49: \n" - "add $0x3,%3 \n" - "jl 99f \n" + "49: \n" + "add $0x3,%3 \n" + "jl 99f \n" - // 1 pixel loop. - "91: \n" - "movd " MEMACCESS(0) ",%%xmm3 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - "movdqa %%xmm3,%%xmm0 \n" - "pxor %%xmm4,%%xmm3 \n" - "movd " MEMACCESS(1) ",%%xmm2 \n" - "pshufb %4,%%xmm3 \n" - "pand %%xmm6,%%xmm2 \n" - "paddw %%xmm7,%%xmm3 \n" - "pmullw %%xmm3,%%xmm2 \n" - "movd " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x4,1) ",%1 \n" - "psrlw $0x8,%%xmm1 \n" - "por %%xmm4,%%xmm0 \n" - "pmullw %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm2 \n" - "paddusb %%xmm2,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "movd %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x4,2) ",%2 \n" - "sub $0x1,%3 \n" - "jge 91b \n" - "99: \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : "m"(kShuffleAlpha) // %4 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + // 1 pixel loop. + "91: \n" + "movd (%0),%%xmm3 \n" + "lea 0x4(%0),%0 \n" + "movdqa %%xmm3,%%xmm0 \n" + "pxor %%xmm4,%%xmm3 \n" + "movd (%1),%%xmm2 \n" + "pshufb %4,%%xmm3 \n" + "pand %%xmm6,%%xmm2 \n" + "paddw %%xmm7,%%xmm3 \n" + "pmullw %%xmm3,%%xmm2 \n" + "movd (%1),%%xmm1 \n" + "lea 0x4(%1),%1 \n" + "psrlw $0x8,%%xmm1 \n" + "por %%xmm4,%%xmm0 \n" + "pmullw %%xmm3,%%xmm1 \n" + "psrlw $0x8,%%xmm2 \n" + "paddusb %%xmm2,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "paddusb %%xmm1,%%xmm0 \n" + "movd %%xmm0,(%2) \n" + "lea 0x4(%2),%2 \n" + "sub $0x1,%3 \n" + "jge 91b \n" + "99: \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : "m"(kShuffleAlpha) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBBLENDROW_SSSE3 @@ -3559,46 +4580,49 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, // =((A2*C2)+(B2*(255-C2))+255)/256 // signed version of math // =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256 -void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psllw $0x8,%%xmm5 \n" - "mov $0x80808080,%%eax \n" - "movd %%eax,%%xmm6 \n" - "pshufd $0x0,%%xmm6,%%xmm6 \n" - "mov $0x807f807f,%%eax \n" - "movd %%eax,%%xmm7 \n" - "pshufd $0x0,%%xmm7,%%xmm7 \n" - "sub %2,%0 \n" - "sub %2,%1 \n" - "sub %2,%3 \n" +void BlendPlaneRow_SSSE3(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width) { + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psllw $0x8,%%xmm5 \n" + "mov $0x80808080,%%eax \n" + "movd %%eax,%%xmm6 \n" + "pshufd $0x0,%%xmm6,%%xmm6 \n" + "mov $0x807f807f,%%eax \n" + "movd %%eax,%%xmm7 \n" + "pshufd $0x0,%%xmm7,%%xmm7 \n" + "sub %2,%0 \n" + "sub %2,%1 \n" + "sub %2,%3 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movq (%2),%%xmm0 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "pxor %%xmm5,%%xmm0 \n" - "movq (%0,%2,1),%%xmm1 \n" - "movq (%1,%2,1),%%xmm2 \n" - "punpcklbw %%xmm2,%%xmm1 \n" - "psubb %%xmm6,%%xmm1 \n" - "pmaddubsw %%xmm1,%%xmm0 \n" - "paddw %%xmm7,%%xmm0 \n" - "psrlw $0x8,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movq %%xmm0,(%3,%2,1) \n" - "lea 0x8(%2),%2 \n" - "sub $0x8,%4 \n" - "jg 1b \n" - : "+r"(src0), // %0 - "+r"(src1), // %1 - "+r"(alpha), // %2 - "+r"(dst), // %3 - "+rm"(width) // %4 - :: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movq (%2),%%xmm0 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "pxor %%xmm5,%%xmm0 \n" + "movq (%0,%2,1),%%xmm1 \n" + "movq (%1,%2,1),%%xmm2 \n" + "punpcklbw %%xmm2,%%xmm1 \n" + "psubb %%xmm6,%%xmm1 \n" + "pmaddubsw %%xmm1,%%xmm0 \n" + "paddw %%xmm7,%%xmm0 \n" + "psrlw $0x8,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,(%3,%2,1) \n" + "lea 0x8(%2),%2 \n" + "sub $0x8,%4 \n" + "jg 1b \n" + : "+r"(src0), // %0 + "+r"(src1), // %1 + "+r"(alpha), // %2 + "+r"(dst), // %3 + "+rm"(width) // %4 + ::"memory", + "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7"); } #endif // HAS_BLENDPLANEROW_SSSE3 @@ -3608,312 +4632,308 @@ void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1, // =((A2*C2)+(B2*(255-C2))+255)/256 // signed version of math // =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256 -void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width) { - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsllw $0x8,%%ymm5,%%ymm5 \n" - "mov $0x80808080,%%eax \n" - "vmovd %%eax,%%xmm6 \n" - "vbroadcastss %%xmm6,%%ymm6 \n" - "mov $0x807f807f,%%eax \n" - "vmovd %%eax,%%xmm7 \n" - "vbroadcastss %%xmm7,%%ymm7 \n" - "sub %2,%0 \n" - "sub %2,%1 \n" - "sub %2,%3 \n" +void BlendPlaneRow_AVX2(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width) { + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsllw $0x8,%%ymm5,%%ymm5 \n" + "mov $0x80808080,%%eax \n" + "vmovd %%eax,%%xmm6 \n" + "vbroadcastss %%xmm6,%%ymm6 \n" + "mov $0x807f807f,%%eax \n" + "vmovd %%eax,%%xmm7 \n" + "vbroadcastss %%xmm7,%%ymm7 \n" + "sub %2,%0 \n" + "sub %2,%1 \n" + "sub %2,%3 \n" - // 32 pixel loop. - LABELALIGN - "1: \n" - "vmovdqu (%2),%%ymm0 \n" - "vpunpckhbw %%ymm0,%%ymm0,%%ymm3 \n" - "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" - "vpxor %%ymm5,%%ymm3,%%ymm3 \n" - "vpxor %%ymm5,%%ymm0,%%ymm0 \n" - "vmovdqu (%0,%2,1),%%ymm1 \n" - "vmovdqu (%1,%2,1),%%ymm2 \n" - "vpunpckhbw %%ymm2,%%ymm1,%%ymm4 \n" - "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n" - "vpsubb %%ymm6,%%ymm4,%%ymm4 \n" - "vpsubb %%ymm6,%%ymm1,%%ymm1 \n" - "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" - "vpmaddubsw %%ymm1,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm7,%%ymm3,%%ymm3 \n" - "vpaddw %%ymm7,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm3,%%ymm3 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm3,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0,(%3,%2,1) \n" - "lea 0x20(%2),%2 \n" - "sub $0x20,%4 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src0), // %0 - "+r"(src1), // %1 - "+r"(alpha), // %2 - "+r"(dst), // %3 - "+rm"(width) // %4 - :: "memory", "cc", "eax", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + // 32 pixel loop. + LABELALIGN + "1: \n" + "vmovdqu (%2),%%ymm0 \n" + "vpunpckhbw %%ymm0,%%ymm0,%%ymm3 \n" + "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" + "vpxor %%ymm5,%%ymm3,%%ymm3 \n" + "vpxor %%ymm5,%%ymm0,%%ymm0 \n" + "vmovdqu (%0,%2,1),%%ymm1 \n" + "vmovdqu (%1,%2,1),%%ymm2 \n" + "vpunpckhbw %%ymm2,%%ymm1,%%ymm4 \n" + "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n" + "vpsubb %%ymm6,%%ymm4,%%ymm4 \n" + "vpsubb %%ymm6,%%ymm1,%%ymm1 \n" + "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" + "vpmaddubsw %%ymm1,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm7,%%ymm3,%%ymm3 \n" + "vpaddw %%ymm7,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm3,%%ymm3 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm3,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%3,%2,1) \n" + "lea 0x20(%2),%2 \n" + "sub $0x20,%4 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src0), // %0 + "+r"(src1), // %1 + "+r"(alpha), // %2 + "+r"(dst), // %3 + "+rm"(width) // %4 + ::"memory", + "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_BLENDPLANEROW_AVX2 #ifdef HAS_ARGBATTENUATEROW_SSSE3 // Shuffle table duplicating alpha -static uvec8 kShuffleAlpha0 = { - 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u -}; -static uvec8 kShuffleAlpha1 = { - 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, - 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u -}; +static const uvec8 kShuffleAlpha0 = {3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, + 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u}; +static const uvec8 kShuffleAlpha1 = {11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, + 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u}; // Attenuate 4 pixels at a time. -void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - "pcmpeqb %%xmm3,%%xmm3 \n" - "pslld $0x18,%%xmm3 \n" - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" +void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { + asm volatile( + "pcmpeqb %%xmm3,%%xmm3 \n" + "pslld $0x18,%%xmm3 \n" + "movdqa %3,%%xmm4 \n" + "movdqa %4,%%xmm5 \n" - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "pshufb %%xmm4,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "punpcklbw %%xmm1,%%xmm1 \n" - "pmulhuw %%xmm1,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "pshufb %%xmm5,%%xmm1 \n" - "movdqu " MEMACCESS(0) ",%%xmm2 \n" - "punpckhbw %%xmm2,%%xmm2 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "movdqu " MEMACCESS(0) ",%%xmm2 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "pand %%xmm3,%%xmm2 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "por %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleAlpha0), // %3 - "m"(kShuffleAlpha1) // %4 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "pshufb %%xmm4,%%xmm0 \n" + "movdqu (%0),%%xmm1 \n" + "punpcklbw %%xmm1,%%xmm1 \n" + "pmulhuw %%xmm1,%%xmm0 \n" + "movdqu (%0),%%xmm1 \n" + "pshufb %%xmm5,%%xmm1 \n" + "movdqu (%0),%%xmm2 \n" + "punpckhbw %%xmm2,%%xmm2 \n" + "pmulhuw %%xmm2,%%xmm1 \n" + "movdqu (%0),%%xmm2 \n" + "lea 0x10(%0),%0 \n" + "pand %%xmm3,%%xmm2 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "por %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kShuffleAlpha0), // %3 + "m"(kShuffleAlpha1) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBATTENUATEROW_SSSE3 #ifdef HAS_ARGBATTENUATEROW_AVX2 // Shuffle table duplicating alpha. -static const uvec8 kShuffleAlpha_AVX2 = { - 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u -}; +static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u, + 128u, 128u, 14u, 15u, 14u, 15u, + 14u, 15u, 128u, 128u}; // Attenuate 8 pixels at a time. -void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - "vbroadcastf128 %3,%%ymm4 \n" - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpslld $0x18,%%ymm5,%%ymm5 \n" - "sub %0,%1 \n" +void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { + asm volatile( + "vbroadcastf128 %3,%%ymm4 \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpslld $0x18,%%ymm5,%%ymm5 \n" + "sub %0,%1 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm6 \n" - "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n" - "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n" - "vpshufb %%ymm4,%%ymm0,%%ymm2 \n" - "vpshufb %%ymm4,%%ymm1,%%ymm3 \n" - "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" - "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" - "vpand %%ymm5,%%ymm6,%%ymm6 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpor %%ymm6,%%ymm0,%%ymm0 \n" - MEMOPMEM(vmovdqu,ymm0,0x00,0,1,1) // vmovdqu %%ymm0,(%0,%1) - "lea " MEMLEA(0x20,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleAlpha_AVX2) // %3 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm6 \n" + "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n" + "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n" + "vpshufb %%ymm4,%%ymm0,%%ymm2 \n" + "vpshufb %%ymm4,%%ymm1,%%ymm3 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" + "vpand %%ymm5,%%ymm6,%%ymm6 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpor %%ymm6,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,0x00(%0,%1,1) \n" + "lea 0x20(%0),%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kShuffleAlpha_AVX2) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBATTENUATEROW_AVX2 #ifdef HAS_ARGBUNATTENUATEROW_SSE2 // Unattenuate 4 pixels at a time. -void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, +void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, int width) { uintptr_t alpha; - asm volatile ( - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movzb " MEMACCESS2(0x03,0) ",%3 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 - "movzb " MEMACCESS2(0x07,0) ",%3 \n" - MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3 - "pshuflw $0x40,%%xmm2,%%xmm2 \n" - "pshuflw $0x40,%%xmm3,%%xmm3 \n" - "movlhps %%xmm3,%%xmm2 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "movzb " MEMACCESS2(0x0b,0) ",%3 \n" - "punpckhbw %%xmm1,%%xmm1 \n" - MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 - "movzb " MEMACCESS2(0x0f,0) ",%3 \n" - MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3 - "pshuflw $0x40,%%xmm2,%%xmm2 \n" - "pshuflw $0x40,%%xmm3,%%xmm3 \n" - "movlhps %%xmm3,%%xmm2 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width), // %2 - "=&r"(alpha) // %3 - : "r"(fixed_invtbl8) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + asm volatile( + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movzb 0x03(%0),%3 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "movd 0x00(%4,%3,4),%%xmm2 \n" + "movzb 0x07(%0),%3 \n" + "movd 0x00(%4,%3,4),%%xmm3 \n" + "pshuflw $0x40,%%xmm2,%%xmm2 \n" + "pshuflw $0x40,%%xmm3,%%xmm3 \n" + "movlhps %%xmm3,%%xmm2 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "movdqu (%0),%%xmm1 \n" + "movzb 0x0b(%0),%3 \n" + "punpckhbw %%xmm1,%%xmm1 \n" + "movd 0x00(%4,%3,4),%%xmm2 \n" + "movzb 0x0f(%0),%3 \n" + "movd 0x00(%4,%3,4),%%xmm3 \n" + "pshuflw $0x40,%%xmm2,%%xmm2 \n" + "pshuflw $0x40,%%xmm3,%%xmm3 \n" + "movlhps %%xmm3,%%xmm2 \n" + "pmulhuw %%xmm2,%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width), // %2 + "=&r"(alpha) // %3 + : "r"(fixed_invtbl8) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBUNATTENUATEROW_SSE2 #ifdef HAS_ARGBUNATTENUATEROW_AVX2 // Shuffle table duplicating alpha. static const uvec8 kUnattenShuffleAlpha_AVX2 = { - 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u -}; + 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u}; // Unattenuate 8 pixels at a time. -void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, +void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, int width) { uintptr_t alpha; - asm volatile ( - "sub %0,%1 \n" - "vbroadcastf128 %5,%%ymm5 \n" + asm volatile( + "sub %0,%1 \n" + "vbroadcastf128 %5,%%ymm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - // replace VPGATHER - "movzb " MEMACCESS2(0x03,0) ",%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0 - "movzb " MEMACCESS2(0x07,0) ",%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm1) // vmovd 0x0(%4,%3,4),%%xmm1 - "movzb " MEMACCESS2(0x0b,0) ",%3 \n" - "vpunpckldq %%xmm1,%%xmm0,%%xmm6 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm2) // vmovd 0x0(%4,%3,4),%%xmm2 - "movzb " MEMACCESS2(0x0f,0) ",%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm3) // vmovd 0x0(%4,%3,4),%%xmm3 - "movzb " MEMACCESS2(0x13,0) ",%3 \n" - "vpunpckldq %%xmm3,%%xmm2,%%xmm7 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0 - "movzb " MEMACCESS2(0x17,0) ",%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm1) // vmovd 0x0(%4,%3,4),%%xmm1 - "movzb " MEMACCESS2(0x1b,0) ",%3 \n" - "vpunpckldq %%xmm1,%%xmm0,%%xmm0 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm2) // vmovd 0x0(%4,%3,4),%%xmm2 - "movzb " MEMACCESS2(0x1f,0) ",%3 \n" - MEMOPREG(vmovd,0x00,4,3,4,xmm3) // vmovd 0x0(%4,%3,4),%%xmm3 - "vpunpckldq %%xmm3,%%xmm2,%%xmm2 \n" - "vpunpcklqdq %%xmm7,%%xmm6,%%xmm3 \n" - "vpunpcklqdq %%xmm2,%%xmm0,%%xmm0 \n" - "vinserti128 $0x1,%%xmm0,%%ymm3,%%ymm3 \n" - // end of VPGATHER + // 8 pixel loop. + LABELALIGN + "1: \n" + // replace VPGATHER + "movzb 0x03(%0),%3 \n" + "vmovd 0x00(%4,%3,4),%%xmm0 \n" + "movzb 0x07(%0),%3 \n" + "vmovd 0x00(%4,%3,4),%%xmm1 \n" + "movzb 0x0b(%0),%3 \n" + "vpunpckldq %%xmm1,%%xmm0,%%xmm6 \n" + "vmovd 0x00(%4,%3,4),%%xmm2 \n" + "movzb 0x0f(%0),%3 \n" + "vmovd 0x00(%4,%3,4),%%xmm3 \n" + "movzb 0x13(%0),%3 \n" + "vpunpckldq %%xmm3,%%xmm2,%%xmm7 \n" + "vmovd 0x00(%4,%3,4),%%xmm0 \n" + "movzb 0x17(%0),%3 \n" + "vmovd 0x00(%4,%3,4),%%xmm1 \n" + "movzb 0x1b(%0),%3 \n" + "vpunpckldq %%xmm1,%%xmm0,%%xmm0 \n" + "vmovd 0x00(%4,%3,4),%%xmm2 \n" + "movzb 0x1f(%0),%3 \n" + "vmovd 0x00(%4,%3,4),%%xmm3 \n" + "vpunpckldq %%xmm3,%%xmm2,%%xmm2 \n" + "vpunpcklqdq %%xmm7,%%xmm6,%%xmm3 \n" + "vpunpcklqdq %%xmm2,%%xmm0,%%xmm0 \n" + "vinserti128 $0x1,%%xmm0,%%ymm3,%%ymm3 \n" + // end of VPGATHER - "vmovdqu " MEMACCESS(0) ",%%ymm6 \n" - "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n" - "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n" - "vpunpcklwd %%ymm3,%%ymm3,%%ymm2 \n" - "vpunpckhwd %%ymm3,%%ymm3,%%ymm3 \n" - "vpshufb %%ymm5,%%ymm2,%%ymm2 \n" - "vpshufb %%ymm5,%%ymm3,%%ymm3 \n" - "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" - "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - MEMOPMEM(vmovdqu,ymm0,0x00,0,1,1) // vmovdqu %%ymm0,(%0,%1) - "lea " MEMLEA(0x20,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width), // %2 - "=&r"(alpha) // %3 - : "r"(fixed_invtbl8), // %4 - "m"(kUnattenShuffleAlpha_AVX2) // %5 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + "vmovdqu (%0),%%ymm6 \n" + "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n" + "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n" + "vpunpcklwd %%ymm3,%%ymm3,%%ymm2 \n" + "vpunpckhwd %%ymm3,%%ymm3,%%ymm3 \n" + "vpshufb %%ymm5,%%ymm2,%%ymm2 \n" + "vpshufb %%ymm5,%%ymm3,%%ymm3 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,0x00(%0,%1,1) \n" + "lea 0x20(%0),%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width), // %2 + "=&r"(alpha) // %3 + : "r"(fixed_invtbl8), // %4 + "m"(kUnattenShuffleAlpha_AVX2) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBUNATTENUATEROW_AVX2 #ifdef HAS_ARGBGRAYROW_SSSE3 // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels -void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" +void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width) { + asm volatile( + "movdqa %3,%%xmm4 \n" + "movdqa %4,%%xmm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "phaddw %%xmm1,%%xmm0 \n" - "paddw %%xmm5,%%xmm0 \n" - "psrlw $0x7,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm3 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrld $0x18,%%xmm2 \n" - "psrld $0x18,%%xmm3 \n" - "packuswb %%xmm3,%%xmm2 \n" - "packuswb %%xmm2,%%xmm2 \n" - "movdqa %%xmm0,%%xmm3 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "punpcklbw %%xmm2,%%xmm3 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm3,%%xmm0 \n" - "punpckhwd %%xmm3,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kARGBToYJ), // %3 - "m"(kAddYJ64) // %4 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "phaddw %%xmm1,%%xmm0 \n" + "paddw %%xmm5,%%xmm0 \n" + "psrlw $0x7,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movdqu (%0),%%xmm2 \n" + "movdqu 0x10(%0),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "psrld $0x18,%%xmm2 \n" + "psrld $0x18,%%xmm3 \n" + "packuswb %%xmm3,%%xmm2 \n" + "packuswb %%xmm2,%%xmm2 \n" + "movdqa %%xmm0,%%xmm3 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "punpcklbw %%xmm2,%%xmm3 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklwd %%xmm3,%%xmm0 \n" + "punpckhwd %%xmm3,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kARGBToYJ), // %3 + "m"(kAddYJ64) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBGRAYROW_SSSE3 @@ -3922,412 +4942,415 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { // g = (r * 45 + g * 88 + b * 22) >> 7 // r = (r * 50 + g * 98 + b * 24) >> 7 // Constant for ARGB color to sepia tone -static vec8 kARGBToSepiaB = { - 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0 -}; +static const vec8 kARGBToSepiaB = {17, 68, 35, 0, 17, 68, 35, 0, + 17, 68, 35, 0, 17, 68, 35, 0}; -static vec8 kARGBToSepiaG = { - 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0 -}; +static const vec8 kARGBToSepiaG = {22, 88, 45, 0, 22, 88, 45, 0, + 22, 88, 45, 0, 22, 88, 45, 0}; -static vec8 kARGBToSepiaR = { - 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0 -}; +static const vec8 kARGBToSepiaR = {24, 98, 50, 0, 24, 98, 50, 0, + 24, 98, 50, 0, 24, 98, 50, 0}; // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. -void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { - asm volatile ( - "movdqa %2,%%xmm2 \n" - "movdqa %3,%%xmm3 \n" - "movdqa %4,%%xmm4 \n" +void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width) { + asm volatile( + "movdqa %2,%%xmm2 \n" + "movdqa %3,%%xmm3 \n" + "movdqa %4,%%xmm4 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n" - "pmaddubsw %%xmm2,%%xmm0 \n" - "pmaddubsw %%xmm2,%%xmm6 \n" - "phaddw %%xmm6,%%xmm0 \n" - "psrlw $0x7,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm5 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm5 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "phaddw %%xmm1,%%xmm5 \n" - "psrlw $0x7,%%xmm5 \n" - "packuswb %%xmm5,%%xmm5 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm5 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm5 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "phaddw %%xmm1,%%xmm5 \n" - "psrlw $0x7,%%xmm5 \n" - "packuswb %%xmm5,%%xmm5 \n" - "movdqu " MEMACCESS(0) ",%%xmm6 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "psrld $0x18,%%xmm6 \n" - "psrld $0x18,%%xmm1 \n" - "packuswb %%xmm1,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "punpcklbw %%xmm6,%%xmm5 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm5,%%xmm0 \n" - "punpckhwd %%xmm5,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(0) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "sub $0x8,%1 \n" - "jg 1b \n" - : "+r"(dst_argb), // %0 - "+r"(width) // %1 - : "m"(kARGBToSepiaB), // %2 - "m"(kARGBToSepiaG), // %3 - "m"(kARGBToSepiaR) // %4 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm6 \n" + "pmaddubsw %%xmm2,%%xmm0 \n" + "pmaddubsw %%xmm2,%%xmm6 \n" + "phaddw %%xmm6,%%xmm0 \n" + "psrlw $0x7,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movdqu (%0),%%xmm5 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm5 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "phaddw %%xmm1,%%xmm5 \n" + "psrlw $0x7,%%xmm5 \n" + "packuswb %%xmm5,%%xmm5 \n" + "punpcklbw %%xmm5,%%xmm0 \n" + "movdqu (%0),%%xmm5 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm5 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "phaddw %%xmm1,%%xmm5 \n" + "psrlw $0x7,%%xmm5 \n" + "packuswb %%xmm5,%%xmm5 \n" + "movdqu (%0),%%xmm6 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "psrld $0x18,%%xmm6 \n" + "psrld $0x18,%%xmm1 \n" + "packuswb %%xmm1,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "punpcklbw %%xmm6,%%xmm5 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklwd %%xmm5,%%xmm0 \n" + "punpckhwd %%xmm5,%%xmm1 \n" + "movdqu %%xmm0,(%0) \n" + "movdqu %%xmm1,0x10(%0) \n" + "lea 0x20(%0),%0 \n" + "sub $0x8,%1 \n" + "jg 1b \n" + : "+r"(dst_argb), // %0 + "+r"(width) // %1 + : "m"(kARGBToSepiaB), // %2 + "m"(kARGBToSepiaG), // %3 + "m"(kARGBToSepiaR) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBSEPIAROW_SSSE3 #ifdef HAS_ARGBCOLORMATRIXROW_SSSE3 // Tranform 8 ARGB pixels (32 bytes) with color matrix. // Same as Sepia except matrix is provided. -void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width) { - asm volatile ( - "movdqu " MEMACCESS(3) ",%%xmm5 \n" - "pshufd $0x00,%%xmm5,%%xmm2 \n" - "pshufd $0x55,%%xmm5,%%xmm3 \n" - "pshufd $0xaa,%%xmm5,%%xmm4 \n" - "pshufd $0xff,%%xmm5,%%xmm5 \n" +void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width) { + asm volatile( + "movdqu (%3),%%xmm5 \n" + "pshufd $0x00,%%xmm5,%%xmm2 \n" + "pshufd $0x55,%%xmm5,%%xmm3 \n" + "pshufd $0xaa,%%xmm5,%%xmm4 \n" + "pshufd $0xff,%%xmm5,%%xmm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n" - "pmaddubsw %%xmm2,%%xmm0 \n" - "pmaddubsw %%xmm2,%%xmm7 \n" - "movdqu " MEMACCESS(0) ",%%xmm6 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "phaddsw %%xmm7,%%xmm0 \n" - "phaddsw %%xmm1,%%xmm6 \n" - "psraw $0x6,%%xmm0 \n" - "psraw $0x6,%%xmm6 \n" - "packuswb %%xmm0,%%xmm0 \n" - "packuswb %%xmm6,%%xmm6 \n" - "punpcklbw %%xmm6,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm7 \n" - "phaddsw %%xmm7,%%xmm1 \n" - "movdqu " MEMACCESS(0) ",%%xmm6 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm7 \n" - "pmaddubsw %%xmm5,%%xmm6 \n" - "pmaddubsw %%xmm5,%%xmm7 \n" - "phaddsw %%xmm7,%%xmm6 \n" - "psraw $0x6,%%xmm1 \n" - "psraw $0x6,%%xmm6 \n" - "packuswb %%xmm1,%%xmm1 \n" - "packuswb %%xmm6,%%xmm6 \n" - "punpcklbw %%xmm6,%%xmm1 \n" - "movdqa %%xmm0,%%xmm6 \n" - "punpcklwd %%xmm1,%%xmm0 \n" - "punpckhwd %%xmm1,%%xmm6 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm6," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(matrix_argb) // %3 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm7 \n" + "pmaddubsw %%xmm2,%%xmm0 \n" + "pmaddubsw %%xmm2,%%xmm7 \n" + "movdqu (%0),%%xmm6 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "pmaddubsw %%xmm3,%%xmm6 \n" + "pmaddubsw %%xmm3,%%xmm1 \n" + "phaddsw %%xmm7,%%xmm0 \n" + "phaddsw %%xmm1,%%xmm6 \n" + "psraw $0x6,%%xmm0 \n" + "psraw $0x6,%%xmm6 \n" + "packuswb %%xmm0,%%xmm0 \n" + "packuswb %%xmm6,%%xmm6 \n" + "punpcklbw %%xmm6,%%xmm0 \n" + "movdqu (%0),%%xmm1 \n" + "movdqu 0x10(%0),%%xmm7 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm7 \n" + "phaddsw %%xmm7,%%xmm1 \n" + "movdqu (%0),%%xmm6 \n" + "movdqu 0x10(%0),%%xmm7 \n" + "pmaddubsw %%xmm5,%%xmm6 \n" + "pmaddubsw %%xmm5,%%xmm7 \n" + "phaddsw %%xmm7,%%xmm6 \n" + "psraw $0x6,%%xmm1 \n" + "psraw $0x6,%%xmm6 \n" + "packuswb %%xmm1,%%xmm1 \n" + "packuswb %%xmm6,%%xmm6 \n" + "punpcklbw %%xmm6,%%xmm1 \n" + "movdqa %%xmm0,%%xmm6 \n" + "punpcklwd %%xmm1,%%xmm0 \n" + "punpckhwd %%xmm1,%%xmm6 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm6,0x10(%1) \n" + "lea 0x20(%0),%0 \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(matrix_argb) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBCOLORMATRIXROW_SSSE3 #ifdef HAS_ARGBQUANTIZEROW_SSE2 // Quantize 4 ARGB pixels (16 bytes). -void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width) { - asm volatile ( - "movd %2,%%xmm2 \n" - "movd %3,%%xmm3 \n" - "movd %4,%%xmm4 \n" - "pshuflw $0x40,%%xmm2,%%xmm2 \n" - "pshufd $0x44,%%xmm2,%%xmm2 \n" - "pshuflw $0x40,%%xmm3,%%xmm3 \n" - "pshufd $0x44,%%xmm3,%%xmm3 \n" - "pshuflw $0x40,%%xmm4,%%xmm4 \n" - "pshufd $0x44,%%xmm4,%%xmm4 \n" - "pxor %%xmm5,%%xmm5 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "pslld $0x18,%%xmm6 \n" +void ARGBQuantizeRow_SSE2(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width) { + asm volatile( + "movd %2,%%xmm2 \n" + "movd %3,%%xmm3 \n" + "movd %4,%%xmm4 \n" + "pshuflw $0x40,%%xmm2,%%xmm2 \n" + "pshufd $0x44,%%xmm2,%%xmm2 \n" + "pshuflw $0x40,%%xmm3,%%xmm3 \n" + "pshufd $0x44,%%xmm3,%%xmm3 \n" + "pshuflw $0x40,%%xmm4,%%xmm4 \n" + "pshufd $0x44,%%xmm4,%%xmm4 \n" + "pxor %%xmm5,%%xmm5 \n" + "pcmpeqb %%xmm6,%%xmm6 \n" + "pslld $0x18,%%xmm6 \n" - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "pmullw %%xmm3,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm7 \n" - "pmullw %%xmm3,%%xmm1 \n" - "pand %%xmm6,%%xmm7 \n" - "paddw %%xmm4,%%xmm0 \n" - "paddw %%xmm4,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "por %%xmm7,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(0) " \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "sub $0x4,%1 \n" - "jg 1b \n" - : "+r"(dst_argb), // %0 - "+r"(width) // %1 - : "r"(scale), // %2 - "r"(interval_size), // %3 - "r"(interval_offset) // %4 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "punpcklbw %%xmm5,%%xmm0 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "movdqu (%0),%%xmm1 \n" + "punpckhbw %%xmm5,%%xmm1 \n" + "pmulhuw %%xmm2,%%xmm1 \n" + "pmullw %%xmm3,%%xmm0 \n" + "movdqu (%0),%%xmm7 \n" + "pmullw %%xmm3,%%xmm1 \n" + "pand %%xmm6,%%xmm7 \n" + "paddw %%xmm4,%%xmm0 \n" + "paddw %%xmm4,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "por %%xmm7,%%xmm0 \n" + "movdqu %%xmm0,(%0) \n" + "lea 0x10(%0),%0 \n" + "sub $0x4,%1 \n" + "jg 1b \n" + : "+r"(dst_argb), // %0 + "+r"(width) // %1 + : "r"(scale), // %2 + "r"(interval_size), // %3 + "r"(interval_offset) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBQUANTIZEROW_SSE2 #ifdef HAS_ARGBSHADEROW_SSE2 // Shade 4 pixels at a time by specified value. -void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value) { - asm volatile ( - "movd %3,%%xmm2 \n" - "punpcklbw %%xmm2,%%xmm2 \n" - "punpcklqdq %%xmm2,%%xmm2 \n" +void ARGBShadeRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value) { + asm volatile( + "movd %3,%%xmm2 \n" + "punpcklbw %%xmm2,%%xmm2 \n" + "punpcklqdq %%xmm2,%%xmm2 \n" - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm1 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(value) // %3 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2" - ); + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm1 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "pmulhuw %%xmm2,%%xmm1 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(value) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_ARGBSHADEROW_SSE2 #ifdef HAS_ARGBMULTIPLYROW_SSE2 // Multiply 2 rows of ARGB pixels together, 4 pixels at a time. -void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - "pxor %%xmm5,%%xmm5 \n" +void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqu " MEMACCESS(1) ",%%xmm2 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "movdqu %%xmm0,%%xmm1 \n" - "movdqu %%xmm2,%%xmm3 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "punpckhbw %%xmm5,%%xmm3 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "pmulhuw %%xmm3,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + "pxor %%xmm5,%%xmm5 \n" + + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movdqu (%1),%%xmm2 \n" + "lea 0x10(%1),%1 \n" + "movdqu %%xmm0,%%xmm1 \n" + "movdqu %%xmm2,%%xmm3 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "punpckhbw %%xmm5,%%xmm3 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "pmulhuw %%xmm3,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jg 1b \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_ARGBMULTIPLYROW_SSE2 #ifdef HAS_ARGBMULTIPLYROW_AVX2 // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. -void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - "vpxor %%ymm5,%%ymm5,%%ymm5 \n" +void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( - // 4 pixel loop. - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "vmovdqu " MEMACCESS(1) ",%%ymm3 \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "vpunpcklbw %%ymm1,%%ymm1,%%ymm0 \n" - "vpunpckhbw %%ymm1,%%ymm1,%%ymm1 \n" - "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" - "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" - "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" - "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x20,2) ",%2 \n" - "sub $0x8,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" + "vpxor %%ymm5,%%ymm5,%%ymm5 \n" + + // 4 pixel loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm1 \n" + "lea 0x20(%0),%0 \n" + "vmovdqu (%1),%%ymm3 \n" + "lea 0x20(%1),%1 \n" + "vpunpcklbw %%ymm1,%%ymm1,%%ymm0 \n" + "vpunpckhbw %%ymm1,%%ymm1,%%ymm1 \n" + "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" + "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%2) \n" + "lea 0x20(%2),%2 \n" + "sub $0x8,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc" #if defined(__AVX2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" + , + "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" #endif - ); + ); } #endif // HAS_ARGBMULTIPLYROW_AVX2 #ifdef HAS_ARGBADDROW_SSE2 // Add 2 rows of ARGB pixels together, 4 pixels at a time. -void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" - , "xmm0", "xmm1" - ); +void ARGBAddRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movdqu (%1),%%xmm1 \n" + "lea 0x10(%1),%1 \n" + "paddusb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jg 1b \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_ARGBADDROW_SSE2 #ifdef HAS_ARGBADDROW_AVX2 // Add 2 rows of ARGB pixels together, 4 pixels at a time. -void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 4 pixel loop. - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "vpaddusb " MEMACCESS(1) ",%%ymm0,%%ymm0 \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "vmovdqu %%ymm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x20,2) ",%2 \n" - "sub $0x8,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" - , "xmm0" - ); +void ARGBAddRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( + // 4 pixel loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "lea 0x20(%0),%0 \n" + "vpaddusb (%1),%%ymm0,%%ymm0 \n" + "lea 0x20(%1),%1 \n" + "vmovdqu %%ymm0,(%2) \n" + "lea 0x20(%2),%2 \n" + "sub $0x8,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0"); } #endif // HAS_ARGBADDROW_AVX2 #ifdef HAS_ARGBSUBTRACTROW_SSE2 // Subtract 2 rows of ARGB pixels, 4 pixels at a time. -void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "psubusb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" - , "xmm0", "xmm1" - ); +void ARGBSubtractRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movdqu (%1),%%xmm1 \n" + "lea 0x10(%1),%1 \n" + "psubusb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jg 1b \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_ARGBSUBTRACTROW_SSE2 #ifdef HAS_ARGBSUBTRACTROW_AVX2 // Subtract 2 rows of ARGB pixels, 8 pixels at a time. -void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 4 pixel loop. - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "vpsubusb " MEMACCESS(1) ",%%ymm0,%%ymm0 \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "vmovdqu %%ymm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x20,2) ",%2 \n" - "sub $0x8,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" - , "xmm0" - ); +void ARGBSubtractRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( + // 4 pixel loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "lea 0x20(%0),%0 \n" + "vpsubusb (%1),%%ymm0,%%ymm0 \n" + "lea 0x20(%1),%1 \n" + "vmovdqu %%ymm0,(%2) \n" + "lea 0x20(%2),%2 \n" + "sub $0x8,%3 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0"); } #endif // HAS_ARGBSUBTRACTROW_AVX2 @@ -4336,52 +5359,53 @@ void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, // -1 0 1 // -2 0 2 // -1 0 1 -void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width) { - asm volatile ( - "sub %0,%1 \n" - "sub %0,%2 \n" - "sub %0,%3 \n" - "pxor %%xmm5,%%xmm5 \n" +void SobelXRow_SSE2(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int width) { + asm volatile( + "sub %0,%1 \n" + "sub %0,%2 \n" + "sub %0,%3 \n" + "pxor %%xmm5,%%xmm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - "movq " MEMACCESS2(0x2,0) ",%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "psubw %%xmm1,%%xmm0 \n" - MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1 - MEMOPREG(movq,0x02,0,1,1,xmm2) // movq 0x2(%0,%1,1),%%xmm2 - "punpcklbw %%xmm5,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "psubw %%xmm2,%%xmm1 \n" - MEMOPREG(movq,0x00,0,2,1,xmm2) // movq (%0,%2,1),%%xmm2 - MEMOPREG(movq,0x02,0,2,1,xmm3) // movq 0x2(%0,%2,1),%%xmm3 - "punpcklbw %%xmm5,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm3 \n" - "psubw %%xmm3,%%xmm2 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm1,%%xmm0 \n" - "paddw %%xmm1,%%xmm0 \n" - "pxor %%xmm1,%%xmm1 \n" - "psubw %%xmm0,%%xmm1 \n" - "pmaxsw %%xmm1,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - MEMOPMEM(movq,xmm0,0x00,0,3,1) // movq %%xmm0,(%0,%3,1) - "lea " MEMLEA(0x8,0) ",%0 \n" - "sub $0x8,%4 \n" - "jg 1b \n" - : "+r"(src_y0), // %0 - "+r"(src_y1), // %1 - "+r"(src_y2), // %2 - "+r"(dst_sobelx), // %3 - "+r"(width) // %4 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movq (%0),%%xmm0 \n" + "movq 0x2(%0),%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm0 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "psubw %%xmm1,%%xmm0 \n" + "movq 0x00(%0,%1,1),%%xmm1 \n" + "movq 0x02(%0,%1,1),%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "psubw %%xmm2,%%xmm1 \n" + "movq 0x00(%0,%2,1),%%xmm2 \n" + "movq 0x02(%0,%2,1),%%xmm3 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm3 \n" + "psubw %%xmm3,%%xmm2 \n" + "paddw %%xmm2,%%xmm0 \n" + "paddw %%xmm1,%%xmm0 \n" + "paddw %%xmm1,%%xmm0 \n" + "pxor %%xmm1,%%xmm1 \n" + "psubw %%xmm0,%%xmm1 \n" + "pmaxsw %%xmm1,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,0x00(%0,%3,1) \n" + "lea 0x8(%0),%0 \n" + "sub $0x8,%4 \n" + "jg 1b \n" + : "+r"(src_y0), // %0 + "+r"(src_y1), // %1 + "+r"(src_y2), // %2 + "+r"(dst_sobelx), // %3 + "+r"(width) // %4 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SOBELXROW_SSE2 @@ -4390,50 +5414,50 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, // -1 -2 -1 // 0 0 0 // 1 2 1 -void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width) { - asm volatile ( - "sub %0,%1 \n" - "sub %0,%2 \n" - "pxor %%xmm5,%%xmm5 \n" +void SobelYRow_SSE2(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int width) { + asm volatile( + "sub %0,%1 \n" + "sub %0,%2 \n" + "pxor %%xmm5,%%xmm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1 - "punpcklbw %%xmm5,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "psubw %%xmm1,%%xmm0 \n" - "movq " MEMACCESS2(0x1,0) ",%%xmm1 \n" - MEMOPREG(movq,0x01,0,1,1,xmm2) // movq 0x1(%0,%1,1),%%xmm2 - "punpcklbw %%xmm5,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "psubw %%xmm2,%%xmm1 \n" - "movq " MEMACCESS2(0x2,0) ",%%xmm2 \n" - MEMOPREG(movq,0x02,0,1,1,xmm3) // movq 0x2(%0,%1,1),%%xmm3 - "punpcklbw %%xmm5,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm3 \n" - "psubw %%xmm3,%%xmm2 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm1,%%xmm0 \n" - "paddw %%xmm1,%%xmm0 \n" - "pxor %%xmm1,%%xmm1 \n" - "psubw %%xmm0,%%xmm1 \n" - "pmaxsw %%xmm1,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - MEMOPMEM(movq,xmm0,0x00,0,2,1) // movq %%xmm0,(%0,%2,1) - "lea " MEMLEA(0x8,0) ",%0 \n" - "sub $0x8,%3 \n" - "jg 1b \n" - : "+r"(src_y0), // %0 - "+r"(src_y1), // %1 - "+r"(dst_sobely), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movq (%0),%%xmm0 \n" + "movq 0x00(%0,%1,1),%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm0 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "psubw %%xmm1,%%xmm0 \n" + "movq 0x1(%0),%%xmm1 \n" + "movq 0x01(%0,%1,1),%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "psubw %%xmm2,%%xmm1 \n" + "movq 0x2(%0),%%xmm2 \n" + "movq 0x02(%0,%1,1),%%xmm3 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm3 \n" + "psubw %%xmm3,%%xmm2 \n" + "paddw %%xmm2,%%xmm0 \n" + "paddw %%xmm1,%%xmm0 \n" + "paddw %%xmm1,%%xmm0 \n" + "pxor %%xmm1,%%xmm1 \n" + "psubw %%xmm0,%%xmm1 \n" + "pmaxsw %%xmm1,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,0x00(%0,%2,1) \n" + "lea 0x8(%0),%0 \n" + "sub $0x8,%3 \n" + "jg 1b \n" + : "+r"(src_y0), // %0 + "+r"(src_y1), // %1 + "+r"(dst_sobely), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SOBELYROW_SSE2 @@ -4443,79 +5467,79 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, // R = Sobel // G = Sobel // B = Sobel -void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { - asm volatile ( - "sub %0,%1 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pslld $0x18,%%xmm5 \n" +void SobelRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width) { + asm volatile( + "sub %0,%1 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "pslld $0x18,%%xmm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "paddusb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "punpcklbw %%xmm0,%%xmm2 \n" - "punpckhbw %%xmm0,%%xmm0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm1 \n" - "punpckhwd %%xmm2,%%xmm2 \n" - "por %%xmm5,%%xmm1 \n" - "por %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm3 \n" - "punpcklwd %%xmm0,%%xmm3 \n" - "punpckhwd %%xmm0,%%xmm0 \n" - "por %%xmm5,%%xmm3 \n" - "por %%xmm5,%%xmm0 \n" - "movdqu %%xmm1," MEMACCESS(2) " \n" - "movdqu %%xmm2," MEMACCESS2(0x10,2) " \n" - "movdqu %%xmm3," MEMACCESS2(0x20,2) " \n" - "movdqu %%xmm0," MEMACCESS2(0x30,2) " \n" - "lea " MEMLEA(0x40,2) ",%2 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%1,1),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "paddusb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "punpcklbw %%xmm0,%%xmm2 \n" + "punpckhbw %%xmm0,%%xmm0 \n" + "movdqa %%xmm2,%%xmm1 \n" + "punpcklwd %%xmm2,%%xmm1 \n" + "punpckhwd %%xmm2,%%xmm2 \n" + "por %%xmm5,%%xmm1 \n" + "por %%xmm5,%%xmm2 \n" + "movdqa %%xmm0,%%xmm3 \n" + "punpcklwd %%xmm0,%%xmm3 \n" + "punpckhwd %%xmm0,%%xmm0 \n" + "por %%xmm5,%%xmm3 \n" + "por %%xmm5,%%xmm0 \n" + "movdqu %%xmm1,(%2) \n" + "movdqu %%xmm2,0x10(%2) \n" + "movdqu %%xmm3,0x20(%2) \n" + "movdqu %%xmm0,0x30(%2) \n" + "lea 0x40(%2),%2 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_sobelx), // %0 + "+r"(src_sobely), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SOBELROW_SSE2 #ifdef HAS_SOBELTOPLANEROW_SSE2 // Adds Sobel X and Sobel Y and stores Sobel into a plane. -void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width) { - asm volatile ( - "sub %0,%1 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pslld $0x18,%%xmm5 \n" +void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, + int width) { + asm volatile( + "sub %0,%1 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "pslld $0x18,%%xmm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "paddusb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_y), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%1,1),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "paddusb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_sobelx), // %0 + "+r"(src_sobely), // %1 + "+r"(dst_y), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_SOBELTOPLANEROW_SSE2 @@ -4525,1004 +5549,1123 @@ void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, // R = Sobel X // G = Sobel // B = Sobel Y -void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { - asm volatile ( - "sub %0,%1 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" +void SobelXYRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width) { + asm volatile( + "sub %0,%1 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "paddusb %%xmm1,%%xmm2 \n" - "movdqa %%xmm0,%%xmm3 \n" - "punpcklbw %%xmm5,%%xmm3 \n" - "punpckhbw %%xmm5,%%xmm0 \n" - "movdqa %%xmm1,%%xmm4 \n" - "punpcklbw %%xmm2,%%xmm4 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - "movdqa %%xmm4,%%xmm6 \n" - "punpcklwd %%xmm3,%%xmm6 \n" - "punpckhwd %%xmm3,%%xmm4 \n" - "movdqa %%xmm1,%%xmm7 \n" - "punpcklwd %%xmm0,%%xmm7 \n" - "punpckhwd %%xmm0,%%xmm1 \n" - "movdqu %%xmm6," MEMACCESS(2) " \n" - "movdqu %%xmm4," MEMACCESS2(0x10,2) " \n" - "movdqu %%xmm7," MEMACCESS2(0x20,2) " \n" - "movdqu %%xmm1," MEMACCESS2(0x30,2) " \n" - "lea " MEMLEA(0x40,2) ",%2 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + // 8 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%1,1),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "paddusb %%xmm1,%%xmm2 \n" + "movdqa %%xmm0,%%xmm3 \n" + "punpcklbw %%xmm5,%%xmm3 \n" + "punpckhbw %%xmm5,%%xmm0 \n" + "movdqa %%xmm1,%%xmm4 \n" + "punpcklbw %%xmm2,%%xmm4 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "movdqa %%xmm4,%%xmm6 \n" + "punpcklwd %%xmm3,%%xmm6 \n" + "punpckhwd %%xmm3,%%xmm4 \n" + "movdqa %%xmm1,%%xmm7 \n" + "punpcklwd %%xmm0,%%xmm7 \n" + "punpckhwd %%xmm0,%%xmm1 \n" + "movdqu %%xmm6,(%2) \n" + "movdqu %%xmm4,0x10(%2) \n" + "movdqu %%xmm7,0x20(%2) \n" + "movdqu %%xmm1,0x30(%2) \n" + "lea 0x40(%2),%2 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + : "+r"(src_sobelx), // %0 + "+r"(src_sobely), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_SOBELXYROW_SSE2 #ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2 // Creates a table of cumulative sums where each value is a sum of all values // above and to the left of the value, inclusive of the value. -void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width) { - asm volatile ( - "pxor %%xmm0,%%xmm0 \n" - "pxor %%xmm1,%%xmm1 \n" - "sub $0x4,%3 \n" - "jl 49f \n" - "test $0xf,%1 \n" - "jne 49f \n" +void ComputeCumulativeSumRow_SSE2(const uint8_t* row, + int32_t* cumsum, + const int32_t* previous_cumsum, + int width) { + asm volatile( + "pxor %%xmm0,%%xmm0 \n" + "pxor %%xmm1,%%xmm1 \n" + "sub $0x4,%3 \n" + "jl 49f \n" + "test $0xf,%1 \n" + "jne 49f \n" - // 4 pixel loop \n" - LABELALIGN - "40: \n" - "movdqu " MEMACCESS(0) ",%%xmm2 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm2,%%xmm4 \n" - "punpcklbw %%xmm1,%%xmm2 \n" - "movdqa %%xmm2,%%xmm3 \n" - "punpcklwd %%xmm1,%%xmm2 \n" - "punpckhwd %%xmm1,%%xmm3 \n" - "punpckhbw %%xmm1,%%xmm4 \n" - "movdqa %%xmm4,%%xmm5 \n" - "punpcklwd %%xmm1,%%xmm4 \n" - "punpckhwd %%xmm1,%%xmm5 \n" - "paddd %%xmm2,%%xmm0 \n" - "movdqu " MEMACCESS(2) ",%%xmm2 \n" - "paddd %%xmm0,%%xmm2 \n" - "paddd %%xmm3,%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,2) ",%%xmm3 \n" - "paddd %%xmm0,%%xmm3 \n" - "paddd %%xmm4,%%xmm0 \n" - "movdqu " MEMACCESS2(0x20,2) ",%%xmm4 \n" - "paddd %%xmm0,%%xmm4 \n" - "paddd %%xmm5,%%xmm0 \n" - "movdqu " MEMACCESS2(0x30,2) ",%%xmm5 \n" - "lea " MEMLEA(0x40,2) ",%2 \n" - "paddd %%xmm0,%%xmm5 \n" - "movdqu %%xmm2," MEMACCESS(1) " \n" - "movdqu %%xmm3," MEMACCESS2(0x10,1) " \n" - "movdqu %%xmm4," MEMACCESS2(0x20,1) " \n" - "movdqu %%xmm5," MEMACCESS2(0x30,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x4,%3 \n" - "jge 40b \n" + // 4 pixel loop. + LABELALIGN + "40: \n" + "movdqu (%0),%%xmm2 \n" + "lea 0x10(%0),%0 \n" + "movdqa %%xmm2,%%xmm4 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "movdqa %%xmm2,%%xmm3 \n" + "punpcklwd %%xmm1,%%xmm2 \n" + "punpckhwd %%xmm1,%%xmm3 \n" + "punpckhbw %%xmm1,%%xmm4 \n" + "movdqa %%xmm4,%%xmm5 \n" + "punpcklwd %%xmm1,%%xmm4 \n" + "punpckhwd %%xmm1,%%xmm5 \n" + "paddd %%xmm2,%%xmm0 \n" + "movdqu (%2),%%xmm2 \n" + "paddd %%xmm0,%%xmm2 \n" + "paddd %%xmm3,%%xmm0 \n" + "movdqu 0x10(%2),%%xmm3 \n" + "paddd %%xmm0,%%xmm3 \n" + "paddd %%xmm4,%%xmm0 \n" + "movdqu 0x20(%2),%%xmm4 \n" + "paddd %%xmm0,%%xmm4 \n" + "paddd %%xmm5,%%xmm0 \n" + "movdqu 0x30(%2),%%xmm5 \n" + "lea 0x40(%2),%2 \n" + "paddd %%xmm0,%%xmm5 \n" + "movdqu %%xmm2,(%1) \n" + "movdqu %%xmm3,0x10(%1) \n" + "movdqu %%xmm4,0x20(%1) \n" + "movdqu %%xmm5,0x30(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x4,%3 \n" + "jge 40b \n" - "49: \n" - "add $0x3,%3 \n" - "jl 19f \n" + "49: \n" + "add $0x3,%3 \n" + "jl 19f \n" - // 1 pixel loop \n" - LABELALIGN - "10: \n" - "movd " MEMACCESS(0) ",%%xmm2 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - "punpcklbw %%xmm1,%%xmm2 \n" - "punpcklwd %%xmm1,%%xmm2 \n" - "paddd %%xmm2,%%xmm0 \n" - "movdqu " MEMACCESS(2) ",%%xmm2 \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "paddd %%xmm0,%%xmm2 \n" - "movdqu %%xmm2," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x1,%3 \n" - "jge 10b \n" + // 1 pixel loop. + LABELALIGN + "10: \n" + "movd (%0),%%xmm2 \n" + "lea 0x4(%0),%0 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "punpcklwd %%xmm1,%%xmm2 \n" + "paddd %%xmm2,%%xmm0 \n" + "movdqu (%2),%%xmm2 \n" + "lea 0x10(%2),%2 \n" + "paddd %%xmm0,%%xmm2 \n" + "movdqu %%xmm2,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x1,%3 \n" + "jge 10b \n" - "19: \n" - : "+r"(row), // %0 - "+r"(cumsum), // %1 - "+r"(previous_cumsum), // %2 - "+r"(width) // %3 - : - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + "19: \n" + : "+r"(row), // %0 + "+r"(cumsum), // %1 + "+r"(previous_cumsum), // %2 + "+r"(width) // %3 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_COMPUTECUMULATIVESUMROW_SSE2 #ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 -void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, +void CumulativeSumToAverageRow_SSE2(const int32_t* topleft, + const int32_t* botleft, + int width, + int area, + uint8_t* dst, int count) { - asm volatile ( - "movd %5,%%xmm5 \n" - "cvtdq2ps %%xmm5,%%xmm5 \n" - "rcpss %%xmm5,%%xmm4 \n" - "pshufd $0x0,%%xmm4,%%xmm4 \n" - "sub $0x4,%3 \n" - "jl 49f \n" - "cmpl $0x80,%5 \n" - "ja 40f \n" + asm volatile( + "movd %5,%%xmm5 \n" + "cvtdq2ps %%xmm5,%%xmm5 \n" + "rcpss %%xmm5,%%xmm4 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + "sub $0x4,%3 \n" + "jl 49f \n" + "cmpl $0x80,%5 \n" + "ja 40f \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "psrld $0x10,%%xmm6 \n" - "cvtdq2ps %%xmm6,%%xmm6 \n" - "addps %%xmm6,%%xmm5 \n" - "mulps %%xmm4,%%xmm5 \n" - "cvtps2dq %%xmm5,%%xmm5 \n" - "packssdw %%xmm5,%%xmm5 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "pcmpeqb %%xmm6,%%xmm6 \n" + "psrld $0x10,%%xmm6 \n" + "cvtdq2ps %%xmm6,%%xmm6 \n" + "addps %%xmm6,%%xmm5 \n" + "mulps %%xmm4,%%xmm5 \n" + "cvtps2dq %%xmm5,%%xmm5 \n" + "packssdw %%xmm5,%%xmm5 \n" - // 4 pixel small loop \n" - LABELALIGN - "4: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 - MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1 - MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2 - MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3 - "lea " MEMLEA(0x40,0) ",%0 \n" - "psubd " MEMACCESS(1) ",%%xmm0 \n" - "psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n" - "psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n" - "psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n" - MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 - MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1 - MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2 - MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3 - "lea " MEMLEA(0x40,1) ",%1 \n" - "packssdw %%xmm1,%%xmm0 \n" - "packssdw %%xmm3,%%xmm2 \n" - "pmulhuw %%xmm5,%%xmm0 \n" - "pmulhuw %%xmm5,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jge 4b \n" - "jmp 49f \n" + // 4 pixel small loop. + LABELALIGN + "4: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "psubd 0x00(%0,%4,4),%%xmm0 \n" + "psubd 0x10(%0,%4,4),%%xmm1 \n" + "psubd 0x20(%0,%4,4),%%xmm2 \n" + "psubd 0x30(%0,%4,4),%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "psubd (%1),%%xmm0 \n" + "psubd 0x10(%1),%%xmm1 \n" + "psubd 0x20(%1),%%xmm2 \n" + "psubd 0x30(%1),%%xmm3 \n" + "paddd 0x00(%1,%4,4),%%xmm0 \n" + "paddd 0x10(%1,%4,4),%%xmm1 \n" + "paddd 0x20(%1,%4,4),%%xmm2 \n" + "paddd 0x30(%1,%4,4),%%xmm3 \n" + "lea 0x40(%1),%1 \n" + "packssdw %%xmm1,%%xmm0 \n" + "packssdw %%xmm3,%%xmm2 \n" + "pmulhuw %%xmm5,%%xmm0 \n" + "pmulhuw %%xmm5,%%xmm2 \n" + "packuswb %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jge 4b \n" + "jmp 49f \n" - // 4 pixel loop \n" - LABELALIGN - "40: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 - MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1 - MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2 - MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3 - "lea " MEMLEA(0x40,0) ",%0 \n" - "psubd " MEMACCESS(1) ",%%xmm0 \n" - "psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n" - "psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n" - "psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n" - MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 - MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1 - MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2 - MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3 - "lea " MEMLEA(0x40,1) ",%1 \n" - "cvtdq2ps %%xmm0,%%xmm0 \n" - "cvtdq2ps %%xmm1,%%xmm1 \n" - "mulps %%xmm4,%%xmm0 \n" - "mulps %%xmm4,%%xmm1 \n" - "cvtdq2ps %%xmm2,%%xmm2 \n" - "cvtdq2ps %%xmm3,%%xmm3 \n" - "mulps %%xmm4,%%xmm2 \n" - "mulps %%xmm4,%%xmm3 \n" - "cvtps2dq %%xmm0,%%xmm0 \n" - "cvtps2dq %%xmm1,%%xmm1 \n" - "cvtps2dq %%xmm2,%%xmm2 \n" - "cvtps2dq %%xmm3,%%xmm3 \n" - "packssdw %%xmm1,%%xmm0 \n" - "packssdw %%xmm3,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jge 40b \n" + // 4 pixel loop + LABELALIGN + "40: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "psubd 0x00(%0,%4,4),%%xmm0 \n" + "psubd 0x10(%0,%4,4),%%xmm1 \n" + "psubd 0x20(%0,%4,4),%%xmm2 \n" + "psubd 0x30(%0,%4,4),%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "psubd (%1),%%xmm0 \n" + "psubd 0x10(%1),%%xmm1 \n" + "psubd 0x20(%1),%%xmm2 \n" + "psubd 0x30(%1),%%xmm3 \n" + "paddd 0x00(%1,%4,4),%%xmm0 \n" + "paddd 0x10(%1,%4,4),%%xmm1 \n" + "paddd 0x20(%1,%4,4),%%xmm2 \n" + "paddd 0x30(%1,%4,4),%%xmm3 \n" + "lea 0x40(%1),%1 \n" + "cvtdq2ps %%xmm0,%%xmm0 \n" + "cvtdq2ps %%xmm1,%%xmm1 \n" + "mulps %%xmm4,%%xmm0 \n" + "mulps %%xmm4,%%xmm1 \n" + "cvtdq2ps %%xmm2,%%xmm2 \n" + "cvtdq2ps %%xmm3,%%xmm3 \n" + "mulps %%xmm4,%%xmm2 \n" + "mulps %%xmm4,%%xmm3 \n" + "cvtps2dq %%xmm0,%%xmm0 \n" + "cvtps2dq %%xmm1,%%xmm1 \n" + "cvtps2dq %%xmm2,%%xmm2 \n" + "cvtps2dq %%xmm3,%%xmm3 \n" + "packssdw %%xmm1,%%xmm0 \n" + "packssdw %%xmm3,%%xmm2 \n" + "packuswb %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jge 40b \n" - "49: \n" - "add $0x3,%3 \n" - "jl 19f \n" + "49: \n" + "add $0x3,%3 \n" + "jl 19f \n" - // 1 pixel loop \n" - LABELALIGN - "10: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 - "lea " MEMLEA(0x10,0) ",%0 \n" - "psubd " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 - "lea " MEMLEA(0x10,1) ",%1 \n" - "cvtdq2ps %%xmm0,%%xmm0 \n" - "mulps %%xmm4,%%xmm0 \n" - "cvtps2dq %%xmm0,%%xmm0 \n" - "packssdw %%xmm0,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movd %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x4,2) ",%2 \n" - "sub $0x1,%3 \n" - "jge 10b \n" - "19: \n" - : "+r"(topleft), // %0 - "+r"(botleft), // %1 - "+r"(dst), // %2 - "+rm"(count) // %3 - : "r"((intptr_t)(width)), // %4 - "rm"(area) // %5 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); + // 1 pixel loop + LABELALIGN + "10: \n" + "movdqu (%0),%%xmm0 \n" + "psubd 0x00(%0,%4,4),%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "psubd (%1),%%xmm0 \n" + "paddd 0x00(%1,%4,4),%%xmm0 \n" + "lea 0x10(%1),%1 \n" + "cvtdq2ps %%xmm0,%%xmm0 \n" + "mulps %%xmm4,%%xmm0 \n" + "cvtps2dq %%xmm0,%%xmm0 \n" + "packssdw %%xmm0,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movd %%xmm0,(%2) \n" + "lea 0x4(%2),%2 \n" + "sub $0x1,%3 \n" + "jge 10b \n" + "19: \n" + : "+r"(topleft), // %0 + "+r"(botleft), // %1 + "+r"(dst), // %2 + "+rm"(count) // %3 + : "r"((intptr_t)(width)), // %4 + "rm"(area) // %5 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 #ifdef HAS_ARGBAFFINEROW_SSE2 // Copy ARGB pixels from source image with slope to a row of destination. LIBYUV_API -void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* src_dudv, int width) { +void ARGBAffineRow_SSE2(const uint8_t* src_argb, + int src_argb_stride, + uint8_t* dst_argb, + const float* src_dudv, + int width) { intptr_t src_argb_stride_temp = src_argb_stride; intptr_t temp; - asm volatile ( - "movq " MEMACCESS(3) ",%%xmm2 \n" - "movq " MEMACCESS2(0x08,3) ",%%xmm7 \n" - "shl $0x10,%1 \n" - "add $0x4,%1 \n" - "movd %1,%%xmm5 \n" - "sub $0x4,%4 \n" - "jl 49f \n" + asm volatile( + "movq (%3),%%xmm2 \n" + "movq 0x08(%3),%%xmm7 \n" + "shl $0x10,%1 \n" + "add $0x4,%1 \n" + "movd %1,%%xmm5 \n" + "sub $0x4,%4 \n" + "jl 49f \n" - "pshufd $0x44,%%xmm7,%%xmm7 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "movdqa %%xmm2,%%xmm0 \n" - "addps %%xmm7,%%xmm0 \n" - "movlhps %%xmm0,%%xmm2 \n" - "movdqa %%xmm7,%%xmm4 \n" - "addps %%xmm4,%%xmm4 \n" - "movdqa %%xmm2,%%xmm3 \n" - "addps %%xmm4,%%xmm3 \n" - "addps %%xmm4,%%xmm4 \n" + "pshufd $0x44,%%xmm7,%%xmm7 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "movdqa %%xmm2,%%xmm0 \n" + "addps %%xmm7,%%xmm0 \n" + "movlhps %%xmm0,%%xmm2 \n" + "movdqa %%xmm7,%%xmm4 \n" + "addps %%xmm4,%%xmm4 \n" + "movdqa %%xmm2,%%xmm3 \n" + "addps %%xmm4,%%xmm3 \n" + "addps %%xmm4,%%xmm4 \n" - // 4 pixel loop \n" - LABELALIGN - "40: \n" - "cvttps2dq %%xmm2,%%xmm0 \n" // x, y float to int first 2 - "cvttps2dq %%xmm3,%%xmm1 \n" // x, y float to int next 2 - "packssdw %%xmm1,%%xmm0 \n" // x, y as 8 shorts - "pmaddwd %%xmm5,%%xmm0 \n" // off = x * 4 + y * stride - "movd %%xmm0,%k1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - "movd %%xmm0,%k5 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 - MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6 - "punpckldq %%xmm6,%%xmm1 \n" - "addps %%xmm4,%%xmm2 \n" - "movq %%xmm1," MEMACCESS(2) " \n" - "movd %%xmm0,%k1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - "movd %%xmm0,%k5 \n" - MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0 - MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6 - "punpckldq %%xmm6,%%xmm0 \n" - "addps %%xmm4,%%xmm3 \n" - "movq %%xmm0," MEMACCESS2(0x08,2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%4 \n" - "jge 40b \n" + // 4 pixel loop + LABELALIGN + "40: \n" + "cvttps2dq %%xmm2,%%xmm0 \n" // x,y float->int first 2 + "cvttps2dq %%xmm3,%%xmm1 \n" // x,y float->int next 2 + "packssdw %%xmm1,%%xmm0 \n" // x, y as 8 shorts + "pmaddwd %%xmm5,%%xmm0 \n" // off = x*4 + y*stride + "movd %%xmm0,%k1 \n" + "pshufd $0x39,%%xmm0,%%xmm0 \n" + "movd %%xmm0,%k5 \n" + "pshufd $0x39,%%xmm0,%%xmm0 \n" + "movd 0x00(%0,%1,1),%%xmm1 \n" + "movd 0x00(%0,%5,1),%%xmm6 \n" + "punpckldq %%xmm6,%%xmm1 \n" + "addps %%xmm4,%%xmm2 \n" + "movq %%xmm1,(%2) \n" + "movd %%xmm0,%k1 \n" + "pshufd $0x39,%%xmm0,%%xmm0 \n" + "movd %%xmm0,%k5 \n" + "movd 0x00(%0,%1,1),%%xmm0 \n" + "movd 0x00(%0,%5,1),%%xmm6 \n" + "punpckldq %%xmm6,%%xmm0 \n" + "addps %%xmm4,%%xmm3 \n" + "movq %%xmm0,0x08(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%4 \n" + "jge 40b \n" - "49: \n" - "add $0x3,%4 \n" - "jl 19f \n" + "49: \n" + "add $0x3,%4 \n" + "jl 19f \n" - // 1 pixel loop \n" - LABELALIGN - "10: \n" - "cvttps2dq %%xmm2,%%xmm0 \n" - "packssdw %%xmm0,%%xmm0 \n" - "pmaddwd %%xmm5,%%xmm0 \n" - "addps %%xmm7,%%xmm2 \n" - "movd %%xmm0,%k1 \n" - MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0 - "movd %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x04,2) ",%2 \n" - "sub $0x1,%4 \n" - "jge 10b \n" - "19: \n" - : "+r"(src_argb), // %0 - "+r"(src_argb_stride_temp), // %1 - "+r"(dst_argb), // %2 - "+r"(src_dudv), // %3 - "+rm"(width), // %4 - "=&r"(temp) // %5 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + // 1 pixel loop + LABELALIGN + "10: \n" + "cvttps2dq %%xmm2,%%xmm0 \n" + "packssdw %%xmm0,%%xmm0 \n" + "pmaddwd %%xmm5,%%xmm0 \n" + "addps %%xmm7,%%xmm2 \n" + "movd %%xmm0,%k1 \n" + "movd 0x00(%0,%1,1),%%xmm0 \n" + "movd %%xmm0,(%2) \n" + "lea 0x04(%2),%2 \n" + "sub $0x1,%4 \n" + "jge 10b \n" + "19: \n" + : "+r"(src_argb), // %0 + "+r"(src_argb_stride_temp), // %1 + "+r"(dst_argb), // %2 + "+r"(src_dudv), // %3 + "+rm"(width), // %4 + "=&r"(temp) // %5 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBAFFINEROW_SSE2 #ifdef HAS_INTERPOLATEROW_SSSE3 // Bilinear filter 16x2 -> 16x1 -void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, +void InterpolateRow_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, int source_y_fraction) { - asm volatile ( - "sub %1,%0 \n" - "cmp $0x0,%3 \n" - "je 100f \n" - "cmp $0x80,%3 \n" - "je 50f \n" + asm volatile( + "sub %1,%0 \n" + "cmp $0x0,%3 \n" + "je 100f \n" + "cmp $0x80,%3 \n" + "je 50f \n" - "movd %3,%%xmm0 \n" - "neg %3 \n" - "add $0x100,%3 \n" - "movd %3,%%xmm5 \n" - "punpcklbw %%xmm0,%%xmm5 \n" - "punpcklwd %%xmm5,%%xmm5 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "mov $0x80808080,%%eax \n" - "movd %%eax,%%xmm4 \n" - "pshufd $0x0,%%xmm4,%%xmm4 \n" + "movd %3,%%xmm0 \n" + "neg %3 \n" + "add $0x100,%3 \n" + "movd %3,%%xmm5 \n" + "punpcklbw %%xmm0,%%xmm5 \n" + "punpcklwd %%xmm5,%%xmm5 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "mov $0x80808080,%%eax \n" + "movd %%eax,%%xmm4 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" - // General purpose row blend. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,1,4,1,xmm2) - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm0 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - "psubb %%xmm4,%%xmm0 \n" - "psubb %%xmm4,%%xmm1 \n" - "movdqa %%xmm5,%%xmm2 \n" - "movdqa %%xmm5,%%xmm3 \n" - "pmaddubsw %%xmm0,%%xmm2 \n" - "pmaddubsw %%xmm1,%%xmm3 \n" - "paddw %%xmm4,%%xmm2 \n" - "paddw %%xmm4,%%xmm3 \n" - "psrlw $0x8,%%xmm2 \n" - "psrlw $0x8,%%xmm3 \n" - "packuswb %%xmm3,%%xmm2 \n" - MEMOPMEM(movdqu,xmm2,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "jmp 99f \n" + // General purpose row blend. + LABELALIGN + "1: \n" + "movdqu (%1),%%xmm0 \n" + "movdqu 0x00(%1,%4,1),%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm2,%%xmm0 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "psubb %%xmm4,%%xmm0 \n" + "psubb %%xmm4,%%xmm1 \n" + "movdqa %%xmm5,%%xmm2 \n" + "movdqa %%xmm5,%%xmm3 \n" + "pmaddubsw %%xmm0,%%xmm2 \n" + "pmaddubsw %%xmm1,%%xmm3 \n" + "paddw %%xmm4,%%xmm2 \n" + "paddw %%xmm4,%%xmm3 \n" + "psrlw $0x8,%%xmm2 \n" + "psrlw $0x8,%%xmm3 \n" + "packuswb %%xmm3,%%xmm2 \n" + "movdqu %%xmm2,0x00(%1,%0,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "jmp 99f \n" - // Blend 50 / 50. - LABELALIGN - "50: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,1,4,1,xmm1) - "pavgb %%xmm1,%%xmm0 \n" - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 50b \n" - "jmp 99f \n" + // Blend 50 / 50. + LABELALIGN + "50: \n" + "movdqu (%1),%%xmm0 \n" + "movdqu 0x00(%1,%4,1),%%xmm1 \n" + "pavgb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,0x00(%1,%0,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 50b \n" + "jmp 99f \n" - // Blend 100 / 0 - Copy row unchanged. - LABELALIGN - "100: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 100b \n" + // Blend 100 / 0 - Copy row unchanged. + LABELALIGN + "100: \n" + "movdqu (%1),%%xmm0 \n" + "movdqu %%xmm0,0x00(%1,%0,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 100b \n" - "99: \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+rm"(dst_width), // %2 - "+r"(source_y_fraction) // %3 - : "r"((intptr_t)(src_stride)) // %4 - : "memory", "cc", "eax", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + "99: \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+rm"(dst_width), // %2 + "+r"(source_y_fraction) // %3 + : "r"((intptr_t)(src_stride)) // %4 + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_INTERPOLATEROW_SSSE3 #ifdef HAS_INTERPOLATEROW_AVX2 // Bilinear filter 32x2 -> 32x1 -void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, +void InterpolateRow_AVX2(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, int source_y_fraction) { - asm volatile ( - "cmp $0x0,%3 \n" - "je 100f \n" - "sub %1,%0 \n" - "cmp $0x80,%3 \n" - "je 50f \n" + asm volatile( + "cmp $0x0,%3 \n" + "je 100f \n" + "sub %1,%0 \n" + "cmp $0x80,%3 \n" + "je 50f \n" - "vmovd %3,%%xmm0 \n" - "neg %3 \n" - "add $0x100,%3 \n" - "vmovd %3,%%xmm5 \n" - "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" - "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" - "vbroadcastss %%xmm5,%%ymm5 \n" - "mov $0x80808080,%%eax \n" - "vmovd %%eax,%%xmm4 \n" - "vbroadcastss %%xmm4,%%ymm4 \n" + "vmovd %3,%%xmm0 \n" + "neg %3 \n" + "add $0x100,%3 \n" + "vmovd %3,%%xmm5 \n" + "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" + "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" + "vbroadcastss %%xmm5,%%ymm5 \n" + "mov $0x80808080,%%eax \n" + "vmovd %%eax,%%xmm4 \n" + "vbroadcastss %%xmm4,%%ymm4 \n" - // General purpose row blend. - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" - MEMOPREG(vmovdqu,0x00,1,4,1,ymm2) - "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" - "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" - "vpsubb %%ymm4,%%ymm1,%%ymm1 \n" - "vpsubb %%ymm4,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n" - "vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n" - "vpaddw %%ymm4,%%ymm1,%%ymm1 \n" - "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm1,%%ymm1 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "jmp 99f \n" + // General purpose row blend. + LABELALIGN + "1: \n" + "vmovdqu (%1),%%ymm0 \n" + "vmovdqu 0x00(%1,%4,1),%%ymm2 \n" + "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" + "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" + "vpsubb %%ymm4,%%ymm1,%%ymm1 \n" + "vpsubb %%ymm4,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n" + "vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n" + "vpaddw %%ymm4,%%ymm1,%%ymm1 \n" + "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm1,%%ymm1 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,0x00(%1,%0,1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "jmp 99f \n" - // Blend 50 / 50. - LABELALIGN - "50: \n" - "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" - VMEMOPREG(vpavgb,0x00,1,4,1,ymm0,ymm0) // vpavgb (%1,%4,1),%%ymm0,%%ymm0 - MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 50b \n" - "jmp 99f \n" + // Blend 50 / 50. + LABELALIGN + "50: \n" + "vmovdqu (%1),%%ymm0 \n" + "vpavgb 0x00(%1,%4,1),%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,0x00(%1,%0,1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 50b \n" + "jmp 99f \n" - // Blend 100 / 0 - Copy row unchanged. - LABELALIGN - "100: \n" - "rep movsb " MEMMOVESTRING(1,0) " \n" - "jmp 999f \n" + // Blend 100 / 0 - Copy row unchanged. + LABELALIGN + "100: \n" + "rep movsb \n" + "jmp 999f \n" - "99: \n" - "vzeroupper \n" - "999: \n" - : "+D"(dst_ptr), // %0 - "+S"(src_ptr), // %1 - "+cm"(dst_width), // %2 - "+r"(source_y_fraction) // %3 - : "r"((intptr_t)(src_stride)) // %4 - : "memory", "cc", "eax", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm4", "xmm5" - ); + "99: \n" + "vzeroupper \n" + "999: \n" + : "+D"(dst_ptr), // %0 + "+S"(src_ptr), // %1 + "+cm"(dst_width), // %2 + "+r"(source_y_fraction) // %3 + : "r"((intptr_t)(src_stride)) // %4 + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm4", "xmm5"); } #endif // HAS_INTERPOLATEROW_AVX2 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width) { - asm volatile ( - "movdqu " MEMACCESS(3) ",%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pshufb %%xmm5,%%xmm0 \n" - "pshufb %%xmm5,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(shuffler) // %3 - : "memory", "cc" - , "xmm0", "xmm1", "xmm5" - ); +void ARGBShuffleRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width) { + asm volatile( + + "movdqu (%3),%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "pshufb %%xmm5,%%xmm0 \n" + "pshufb %%xmm5,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(shuffler) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_ARGBSHUFFLEROW_SSSE3 #ifdef HAS_ARGBSHUFFLEROW_AVX2 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width) { - asm volatile ( - "vbroadcastf128 " MEMACCESS(3) ",%%ymm5 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" - "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(shuffler) // %3 - : "memory", "cc" - , "xmm0", "xmm1", "xmm5" - ); +void ARGBShuffleRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width) { + asm volatile( + + "vbroadcastf128 (%3),%%ymm5 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" + "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(shuffler) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_ARGBSHUFFLEROW_AVX2 -#ifdef HAS_ARGBSHUFFLEROW_SSE2 -// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width) { - uintptr_t pixel_temp; - asm volatile ( - "pxor %%xmm5,%%xmm5 \n" - "mov " MEMACCESS(4) ",%k2 \n" - "cmp $0x3000102,%k2 \n" - "je 3012f \n" - "cmp $0x10203,%k2 \n" - "je 123f \n" - "cmp $0x30201,%k2 \n" - "je 321f \n" - "cmp $0x2010003,%k2 \n" - "je 2103f \n" - - LABELALIGN - "1: \n" - "movzb " MEMACCESS(4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS(1) " \n" - "movzb " MEMACCESS2(0x1,4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS2(0x1,1) " \n" - "movzb " MEMACCESS2(0x2,4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS2(0x2,1) " \n" - "movzb " MEMACCESS2(0x3,4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS2(0x3,1) " \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - "lea " MEMLEA(0x4,1) ",%1 \n" - "sub $0x1,%3 \n" - "jg 1b \n" - "jmp 99f \n" - - LABELALIGN - "123: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0x1b,%%xmm0,%%xmm0 \n" - "pshuflw $0x1b,%%xmm0,%%xmm0 \n" - "pshufhw $0x1b,%%xmm1,%%xmm1 \n" - "pshuflw $0x1b,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%3 \n" - "jg 123b \n" - "jmp 99f \n" - - LABELALIGN - "321: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0x39,%%xmm0,%%xmm0 \n" - "pshuflw $0x39,%%xmm0,%%xmm0 \n" - "pshufhw $0x39,%%xmm1,%%xmm1 \n" - "pshuflw $0x39,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%3 \n" - "jg 321b \n" - "jmp 99f \n" - - LABELALIGN - "2103: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0x93,%%xmm0,%%xmm0 \n" - "pshuflw $0x93,%%xmm0,%%xmm0 \n" - "pshufhw $0x93,%%xmm1,%%xmm1 \n" - "pshuflw $0x93,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%3 \n" - "jg 2103b \n" - "jmp 99f \n" - - LABELALIGN - "3012: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0xc6,%%xmm0,%%xmm0 \n" - "pshuflw $0xc6,%%xmm0,%%xmm0 \n" - "pshufhw $0xc6,%%xmm1,%%xmm1 \n" - "pshuflw $0xc6,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%3 \n" - "jg 3012b \n" - - "99: \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "=&d"(pixel_temp), // %2 - "+r"(width) // %3 - : "r"(shuffler) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm5" - ); -} -#endif // HAS_ARGBSHUFFLEROW_SSE2 - #ifdef HAS_I422TOYUY2ROW_SSE2 -void I422ToYUY2Row_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movq " MEMACCESS(1) ",%%xmm2 \n" - MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3 - "lea " MEMLEA(0x8,1) ",%1 \n" - "punpcklbw %%xmm3,%%xmm2 \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm0 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(3) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,3) " \n" - "lea " MEMLEA(0x20,3) ",%3 \n" - "sub $0x10,%4 \n" - "jg 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_frame), // %3 - "+rm"(width) // %4 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3" - ); +void I422ToYUY2Row_SSE2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width) { + asm volatile( + + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "movq (%1),%%xmm2 \n" + "movq 0x00(%1,%2,1),%%xmm1 \n" + "add $0x8,%1 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "movdqu (%0),%%xmm0 \n" + "add $0x10,%0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm2,%%xmm0 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "movdqu %%xmm0,(%3) \n" + "movdqu %%xmm1,0x10(%3) \n" + "lea 0x20(%3),%3 \n" + "sub $0x10,%4 \n" + "jg 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_yuy2), // %3 + "+rm"(width) // %4 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_I422TOYUY2ROW_SSE2 #ifdef HAS_I422TOUYVYROW_SSE2 -void I422ToUYVYRow_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movq " MEMACCESS(1) ",%%xmm2 \n" - MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3 - "lea " MEMLEA(0x8,1) ",%1 \n" - "punpcklbw %%xmm3,%%xmm2 \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpckhbw %%xmm0,%%xmm2 \n" - "movdqu %%xmm1," MEMACCESS(3) " \n" - "movdqu %%xmm2," MEMACCESS2(0x10,3) " \n" - "lea " MEMLEA(0x20,3) ",%3 \n" - "sub $0x10,%4 \n" - "jg 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_frame), // %3 - "+rm"(width) // %4 - : - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3" - ); +void I422ToUYVYRow_SSE2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width) { + asm volatile( + + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "movq (%1),%%xmm2 \n" + "movq 0x00(%1,%2,1),%%xmm1 \n" + "add $0x8,%1 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm2,%%xmm1 \n" + "add $0x10,%0 \n" + "punpcklbw %%xmm0,%%xmm1 \n" + "punpckhbw %%xmm0,%%xmm2 \n" + "movdqu %%xmm1,(%3) \n" + "movdqu %%xmm2,0x10(%3) \n" + "lea 0x20(%3),%3 \n" + "sub $0x10,%4 \n" + "jg 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_uyvy), // %3 + "+rm"(width) // %4 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_I422TOUYVYROW_SSE2 -#ifdef HAS_ARGBPOLYNOMIALROW_SSE2 -void ARGBPolynomialRow_SSE2(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width) { - asm volatile ( - "pxor %%xmm3,%%xmm3 \n" +#ifdef HAS_I422TOYUY2ROW_AVX2 +void I422ToYUY2Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width) { + asm volatile( - // 2 pixel loop. - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "punpcklbw %%xmm3,%%xmm0 \n" - "movdqa %%xmm0,%%xmm4 \n" - "punpcklwd %%xmm3,%%xmm0 \n" - "punpckhwd %%xmm3,%%xmm4 \n" - "cvtdq2ps %%xmm0,%%xmm0 \n" - "cvtdq2ps %%xmm4,%%xmm4 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm4,%%xmm5 \n" - "mulps " MEMACCESS2(0x10,3) ",%%xmm0 \n" - "mulps " MEMACCESS2(0x10,3) ",%%xmm4 \n" - "addps " MEMACCESS(3) ",%%xmm0 \n" - "addps " MEMACCESS(3) ",%%xmm4 \n" - "movdqa %%xmm1,%%xmm2 \n" - "movdqa %%xmm5,%%xmm6 \n" - "mulps %%xmm1,%%xmm2 \n" - "mulps %%xmm5,%%xmm6 \n" - "mulps %%xmm2,%%xmm1 \n" - "mulps %%xmm6,%%xmm5 \n" - "mulps " MEMACCESS2(0x20,3) ",%%xmm2 \n" - "mulps " MEMACCESS2(0x20,3) ",%%xmm6 \n" - "mulps " MEMACCESS2(0x30,3) ",%%xmm1 \n" - "mulps " MEMACCESS2(0x30,3) ",%%xmm5 \n" - "addps %%xmm2,%%xmm0 \n" - "addps %%xmm6,%%xmm4 \n" - "addps %%xmm1,%%xmm0 \n" - "addps %%xmm5,%%xmm4 \n" - "cvttps2dq %%xmm0,%%xmm0 \n" - "cvttps2dq %%xmm4,%%xmm4 \n" - "packuswb %%xmm4,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x2,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(poly) // %3 - : "memory", "cc" - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "vpmovzxbw (%1),%%ymm1 \n" + "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" + "add $0x10,%1 \n" + "vpsllw $0x8,%%ymm2,%%ymm2 \n" + "vpor %%ymm1,%%ymm2,%%ymm2 \n" + "vmovdqu (%0),%%ymm0 \n" + "add $0x20,%0 \n" + "vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n" + "vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n" + "vextractf128 $0x0,%%ymm1,(%3) \n" + "vextractf128 $0x0,%%ymm2,0x10(%3) \n" + "vextractf128 $0x1,%%ymm1,0x20(%3) \n" + "vextractf128 $0x1,%%ymm2,0x30(%3) \n" + "lea 0x40(%3),%3 \n" + "sub $0x20,%4 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_yuy2), // %3 + "+rm"(width) // %4 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); +} +#endif // HAS_I422TOYUY2ROW_AVX2 + +#ifdef HAS_I422TOUYVYROW_AVX2 +void I422ToUYVYRow_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width) { + asm volatile( + + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "vpmovzxbw (%1),%%ymm1 \n" + "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" + "add $0x10,%1 \n" + "vpsllw $0x8,%%ymm2,%%ymm2 \n" + "vpor %%ymm1,%%ymm2,%%ymm2 \n" + "vmovdqu (%0),%%ymm0 \n" + "add $0x20,%0 \n" + "vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n" + "vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n" + "vextractf128 $0x0,%%ymm1,(%3) \n" + "vextractf128 $0x0,%%ymm2,0x10(%3) \n" + "vextractf128 $0x1,%%ymm1,0x20(%3) \n" + "vextractf128 $0x1,%%ymm2,0x30(%3) \n" + "lea 0x40(%3),%3 \n" + "sub $0x20,%4 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_uyvy), // %3 + "+rm"(width) // %4 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2"); +} +#endif // HAS_I422TOUYVYROW_AVX2 + +#ifdef HAS_ARGBPOLYNOMIALROW_SSE2 +void ARGBPolynomialRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, + const float* poly, + int width) { + asm volatile( + + "pxor %%xmm3,%%xmm3 \n" + + // 2 pixel loop. + LABELALIGN + "1: \n" + "movq (%0),%%xmm0 \n" + "lea 0x8(%0),%0 \n" + "punpcklbw %%xmm3,%%xmm0 \n" + "movdqa %%xmm0,%%xmm4 \n" + "punpcklwd %%xmm3,%%xmm0 \n" + "punpckhwd %%xmm3,%%xmm4 \n" + "cvtdq2ps %%xmm0,%%xmm0 \n" + "cvtdq2ps %%xmm4,%%xmm4 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm4,%%xmm5 \n" + "mulps 0x10(%3),%%xmm0 \n" + "mulps 0x10(%3),%%xmm4 \n" + "addps (%3),%%xmm0 \n" + "addps (%3),%%xmm4 \n" + "movdqa %%xmm1,%%xmm2 \n" + "movdqa %%xmm5,%%xmm6 \n" + "mulps %%xmm1,%%xmm2 \n" + "mulps %%xmm5,%%xmm6 \n" + "mulps %%xmm2,%%xmm1 \n" + "mulps %%xmm6,%%xmm5 \n" + "mulps 0x20(%3),%%xmm2 \n" + "mulps 0x20(%3),%%xmm6 \n" + "mulps 0x30(%3),%%xmm1 \n" + "mulps 0x30(%3),%%xmm5 \n" + "addps %%xmm2,%%xmm0 \n" + "addps %%xmm6,%%xmm4 \n" + "addps %%xmm1,%%xmm0 \n" + "addps %%xmm5,%%xmm4 \n" + "cvttps2dq %%xmm0,%%xmm0 \n" + "cvttps2dq %%xmm4,%%xmm4 \n" + "packuswb %%xmm4,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x2,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(poly) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBPOLYNOMIALROW_SSE2 #ifdef HAS_ARGBPOLYNOMIALROW_AVX2 -void ARGBPolynomialRow_AVX2(const uint8* src_argb, - uint8* dst_argb, const float* poly, +void ARGBPolynomialRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + const float* poly, int width) { - asm volatile ( - "vbroadcastf128 " MEMACCESS(3) ",%%ymm4 \n" - "vbroadcastf128 " MEMACCESS2(0x10,3) ",%%ymm5 \n" - "vbroadcastf128 " MEMACCESS2(0x20,3) ",%%ymm6 \n" - "vbroadcastf128 " MEMACCESS2(0x30,3) ",%%ymm7 \n" + asm volatile( + "vbroadcastf128 (%3),%%ymm4 \n" + "vbroadcastf128 0x10(%3),%%ymm5 \n" + "vbroadcastf128 0x20(%3),%%ymm6 \n" + "vbroadcastf128 0x30(%3),%%ymm7 \n" - // 2 pixel loop. - LABELALIGN - "1: \n" - "vpmovzxbd " MEMACCESS(0) ",%%ymm0 \n" // 2 ARGB pixels - "lea " MEMLEA(0x8,0) ",%0 \n" - "vcvtdq2ps %%ymm0,%%ymm0 \n" // X 8 floats - "vmulps %%ymm0,%%ymm0,%%ymm2 \n" // X * X - "vmulps %%ymm7,%%ymm0,%%ymm3 \n" // C3 * X - "vfmadd132ps %%ymm5,%%ymm4,%%ymm0 \n" // result = C0 + C1 * X - "vfmadd231ps %%ymm6,%%ymm2,%%ymm0 \n" // result += C2 * X * X - "vfmadd231ps %%ymm3,%%ymm2,%%ymm0 \n" // result += C3 * X * X * X - "vcvttps2dq %%ymm0,%%ymm0 \n" - "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpackuswb %%xmm0,%%xmm0,%%xmm0 \n" - "vmovq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x2,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(poly) // %3 - : "memory", "cc", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + // 2 pixel loop. + LABELALIGN + "1: \n" + "vpmovzxbd (%0),%%ymm0 \n" // 2 ARGB pixels + "lea 0x8(%0),%0 \n" + "vcvtdq2ps %%ymm0,%%ymm0 \n" // X 8 floats + "vmulps %%ymm0,%%ymm0,%%ymm2 \n" // X * X + "vmulps %%ymm7,%%ymm0,%%ymm3 \n" // C3 * X + "vfmadd132ps %%ymm5,%%ymm4,%%ymm0 \n" // result = C0 + C1 * X + "vfmadd231ps %%ymm6,%%ymm2,%%ymm0 \n" // result += C2 * X * X + "vfmadd231ps %%ymm3,%%ymm2,%%ymm0 \n" // result += C3 * X * X * + // X + "vcvttps2dq %%ymm0,%%ymm0 \n" + "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpackuswb %%xmm0,%%xmm0,%%xmm0 \n" + "vmovq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x2,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(poly) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } #endif // HAS_ARGBPOLYNOMIALROW_AVX2 +#ifdef HAS_HALFFLOATROW_SSE2 +static float kScaleBias = 1.9259299444e-34f; +void HalfFloatRow_SSE2(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { + scale *= kScaleBias; + asm volatile( + "movd %3,%%xmm4 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + "pxor %%xmm5,%%xmm5 \n" + "sub %0,%1 \n" + + // 16 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm2 \n" // 8 shorts + "add $0x10,%0 \n" + "movdqa %%xmm2,%%xmm3 \n" + "punpcklwd %%xmm5,%%xmm2 \n" // 8 ints in xmm2/1 + "cvtdq2ps %%xmm2,%%xmm2 \n" // 8 floats + "punpckhwd %%xmm5,%%xmm3 \n" + "cvtdq2ps %%xmm3,%%xmm3 \n" + "mulps %%xmm4,%%xmm2 \n" + "mulps %%xmm4,%%xmm3 \n" + "psrld $0xd,%%xmm2 \n" + "psrld $0xd,%%xmm3 \n" + "packssdw %%xmm3,%%xmm2 \n" + "movdqu %%xmm2,-0x10(%0,%1,1) \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(scale) // %3 + : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5"); +} +#endif // HAS_HALFFLOATROW_SSE2 + +#ifdef HAS_HALFFLOATROW_AVX2 +void HalfFloatRow_AVX2(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { + scale *= kScaleBias; + asm volatile( + "vbroadcastss %3, %%ymm4 \n" + "vpxor %%ymm5,%%ymm5,%%ymm5 \n" + "sub %0,%1 \n" + + // 16 pixel loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm2 \n" // 16 shorts + "add $0x20,%0 \n" + "vpunpckhwd %%ymm5,%%ymm2,%%ymm3 \n" // mutates + "vpunpcklwd %%ymm5,%%ymm2,%%ymm2 \n" + "vcvtdq2ps %%ymm3,%%ymm3 \n" + "vcvtdq2ps %%ymm2,%%ymm2 \n" + "vmulps %%ymm3,%%ymm4,%%ymm3 \n" + "vmulps %%ymm2,%%ymm4,%%ymm2 \n" + "vpsrld $0xd,%%ymm3,%%ymm3 \n" + "vpsrld $0xd,%%ymm2,%%ymm2 \n" + "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // unmutates + "vmovdqu %%ymm2,-0x20(%0,%1,1) \n" + "sub $0x10,%2 \n" + "jg 1b \n" + + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 +#if defined(__x86_64__) + : "x"(scale) // %3 +#else + : "m"(scale) // %3 +#endif + : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5"); +} +#endif // HAS_HALFFLOATROW_AVX2 + +#ifdef HAS_HALFFLOATROW_F16C +void HalfFloatRow_F16C(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { + asm volatile( + "vbroadcastss %3, %%ymm4 \n" + "sub %0,%1 \n" + + // 16 pixel loop. + LABELALIGN + "1: \n" + "vpmovzxwd (%0),%%ymm2 \n" // 16 shorts -> 16 ints + "vpmovzxwd 0x10(%0),%%ymm3 \n" + "vcvtdq2ps %%ymm2,%%ymm2 \n" + "vcvtdq2ps %%ymm3,%%ymm3 \n" + "vmulps %%ymm2,%%ymm4,%%ymm2 \n" + "vmulps %%ymm3,%%ymm4,%%ymm3 \n" + "vcvtps2ph $3, %%ymm2, %%xmm2 \n" + "vcvtps2ph $3, %%ymm3, %%xmm3 \n" + "vmovdqu %%xmm2,0x00(%0,%1,1) \n" + "vmovdqu %%xmm3,0x10(%0,%1,1) \n" + "add $0x20,%0 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 +#if defined(__x86_64__) + : "x"(scale) // %3 +#else + : "m"(scale) // %3 +#endif + : "memory", "cc", "xmm2", "xmm3", "xmm4"); +} +#endif // HAS_HALFFLOATROW_F16C + +#ifdef HAS_HALFFLOATROW_F16C +void HalfFloat1Row_F16C(const uint16_t* src, uint16_t* dst, float, int width) { + asm volatile( + "sub %0,%1 \n" + // 16 pixel loop. + LABELALIGN + "1: \n" + "vpmovzxwd (%0),%%ymm2 \n" // 16 shorts -> 16 ints + "vpmovzxwd 0x10(%0),%%ymm3 \n" + "vcvtdq2ps %%ymm2,%%ymm2 \n" + "vcvtdq2ps %%ymm3,%%ymm3 \n" + "vcvtps2ph $3, %%ymm2, %%xmm2 \n" + "vcvtps2ph $3, %%ymm3, %%xmm3 \n" + "vmovdqu %%xmm2,0x00(%0,%1,1) \n" + "vmovdqu %%xmm3,0x10(%0,%1,1) \n" + "add $0x20,%0 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "memory", "cc", "xmm2", "xmm3"); +} +#endif // HAS_HALFFLOATROW_F16C + #ifdef HAS_ARGBCOLORTABLEROW_X86 // Tranform ARGB pixels with color table. -void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, +void ARGBColorTableRow_X86(uint8_t* dst_argb, + const uint8_t* table_argb, int width) { uintptr_t pixel_temp; - asm volatile ( - // 1 pixel loop. - LABELALIGN - "1: \n" - "movzb " MEMACCESS(0) ",%1 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x4,0) " \n" - "movzb " MEMACCESS2(-0x3,0) ",%1 \n" - MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x3,0) " \n" - "movzb " MEMACCESS2(-0x2,0) ",%1 \n" - MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x2,0) " \n" - "movzb " MEMACCESS2(-0x1,0) ",%1 \n" - MEMOPARG(movzb,0x03,3,1,4,1) " \n" // movzb 0x3(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x1,0) " \n" - "dec %2 \n" - "jg 1b \n" - : "+r"(dst_argb), // %0 - "=&d"(pixel_temp), // %1 - "+r"(width) // %2 - : "r"(table_argb) // %3 - : "memory", "cc"); + asm volatile( + // 1 pixel loop. + LABELALIGN + "1: \n" + "movzb (%0),%1 \n" + "lea 0x4(%0),%0 \n" + "movzb 0x00(%3,%1,4),%1 \n" + "mov %b1,-0x4(%0) \n" + "movzb -0x3(%0),%1 \n" + "movzb 0x01(%3,%1,4),%1 \n" + "mov %b1,-0x3(%0) \n" + "movzb -0x2(%0),%1 \n" + "movzb 0x02(%3,%1,4),%1 \n" + "mov %b1,-0x2(%0) \n" + "movzb -0x1(%0),%1 \n" + "movzb 0x03(%3,%1,4),%1 \n" + "mov %b1,-0x1(%0) \n" + "dec %2 \n" + "jg 1b \n" + : "+r"(dst_argb), // %0 + "=&d"(pixel_temp), // %1 + "+r"(width) // %2 + : "r"(table_argb) // %3 + : "memory", "cc"); } #endif // HAS_ARGBCOLORTABLEROW_X86 #ifdef HAS_RGBCOLORTABLEROW_X86 // Tranform RGB pixels with color table. -void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { +void RGBColorTableRow_X86(uint8_t* dst_argb, + const uint8_t* table_argb, + int width) { uintptr_t pixel_temp; - asm volatile ( - // 1 pixel loop. - LABELALIGN - "1: \n" - "movzb " MEMACCESS(0) ",%1 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x4,0) " \n" - "movzb " MEMACCESS2(-0x3,0) ",%1 \n" - MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x3,0) " \n" - "movzb " MEMACCESS2(-0x2,0) ",%1 \n" - MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x2,0) " \n" - "dec %2 \n" - "jg 1b \n" - : "+r"(dst_argb), // %0 - "=&d"(pixel_temp), // %1 - "+r"(width) // %2 - : "r"(table_argb) // %3 - : "memory", "cc"); + asm volatile( + // 1 pixel loop. + LABELALIGN + "1: \n" + "movzb (%0),%1 \n" + "lea 0x4(%0),%0 \n" + "movzb 0x00(%3,%1,4),%1 \n" + "mov %b1,-0x4(%0) \n" + "movzb -0x3(%0),%1 \n" + "movzb 0x01(%3,%1,4),%1 \n" + "mov %b1,-0x3(%0) \n" + "movzb -0x2(%0),%1 \n" + "movzb 0x02(%3,%1,4),%1 \n" + "mov %b1,-0x2(%0) \n" + "dec %2 \n" + "jg 1b \n" + : "+r"(dst_argb), // %0 + "=&d"(pixel_temp), // %1 + "+r"(width) // %2 + : "r"(table_argb) // %3 + : "memory", "cc"); } #endif // HAS_RGBCOLORTABLEROW_X86 #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3 // Tranform RGB pixels with luma table. -void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, +void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - const uint8* luma, uint32 lumacoeff) { + const uint8_t* luma, + uint32_t lumacoeff) { uintptr_t pixel_temp; uintptr_t table_temp; - asm volatile ( - "movd %6,%%xmm3 \n" - "pshufd $0x0,%%xmm3,%%xmm3 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "psllw $0x8,%%xmm4 \n" - "pxor %%xmm5,%%xmm5 \n" + asm volatile( + "movd %6,%%xmm3 \n" + "pshufd $0x0,%%xmm3,%%xmm3 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "psllw $0x8,%%xmm4 \n" + "pxor %%xmm5,%%xmm5 \n" - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(2) ",%%xmm0 \n" - "pmaddubsw %%xmm3,%%xmm0 \n" - "phaddw %%xmm0,%%xmm0 \n" - "pand %%xmm4,%%xmm0 \n" - "punpcklwd %%xmm5,%%xmm0 \n" - "movd %%xmm0,%k1 \n" // 32 bit offset - "add %5,%1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" + // 4 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%2),%%xmm0 \n" + "pmaddubsw %%xmm3,%%xmm0 \n" + "phaddw %%xmm0,%%xmm0 \n" + "pand %%xmm4,%%xmm0 \n" + "punpcklwd %%xmm5,%%xmm0 \n" + "movd %%xmm0,%k1 \n" // 32 bit offset + "add %5,%1 \n" + "pshufd $0x39,%%xmm0,%%xmm0 \n" - "movzb " MEMACCESS(2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS(3) " \n" - "movzb " MEMACCESS2(0x1,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x1,3) " \n" - "movzb " MEMACCESS2(0x2,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x2,3) " \n" - "movzb " MEMACCESS2(0x3,2) ",%0 \n" - "mov %b0," MEMACCESS2(0x3,3) " \n" + "movzb (%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,(%3) \n" + "movzb 0x1(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0x1(%3) \n" + "movzb 0x2(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0x2(%3) \n" + "movzb 0x3(%2),%0 \n" + "mov %b0,0x3(%3) \n" - "movd %%xmm0,%k1 \n" // 32 bit offset - "add %5,%1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" + "movd %%xmm0,%k1 \n" // 32 bit offset + "add %5,%1 \n" + "pshufd $0x39,%%xmm0,%%xmm0 \n" - "movzb " MEMACCESS2(0x4,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x4,3) " \n" - "movzb " MEMACCESS2(0x5,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x5,3) " \n" - "movzb " MEMACCESS2(0x6,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x6,3) " \n" - "movzb " MEMACCESS2(0x7,2) ",%0 \n" - "mov %b0," MEMACCESS2(0x7,3) " \n" + "movzb 0x4(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0x4(%3) \n" + "movzb 0x5(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0x5(%3) \n" + "movzb 0x6(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0x6(%3) \n" + "movzb 0x7(%2),%0 \n" + "mov %b0,0x7(%3) \n" - "movd %%xmm0,%k1 \n" // 32 bit offset - "add %5,%1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" + "movd %%xmm0,%k1 \n" // 32 bit offset + "add %5,%1 \n" + "pshufd $0x39,%%xmm0,%%xmm0 \n" - "movzb " MEMACCESS2(0x8,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x8,3) " \n" - "movzb " MEMACCESS2(0x9,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x9,3) " \n" - "movzb " MEMACCESS2(0xa,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0xa,3) " \n" - "movzb " MEMACCESS2(0xb,2) ",%0 \n" - "mov %b0," MEMACCESS2(0xb,3) " \n" + "movzb 0x8(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0x8(%3) \n" + "movzb 0x9(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0x9(%3) \n" + "movzb 0xa(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0xa(%3) \n" + "movzb 0xb(%2),%0 \n" + "mov %b0,0xb(%3) \n" - "movd %%xmm0,%k1 \n" // 32 bit offset - "add %5,%1 \n" + "movd %%xmm0,%k1 \n" // 32 bit offset + "add %5,%1 \n" - "movzb " MEMACCESS2(0xc,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0xc,3) " \n" - "movzb " MEMACCESS2(0xd,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0xd,3) " \n" - "movzb " MEMACCESS2(0xe,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0xe,3) " \n" - "movzb " MEMACCESS2(0xf,2) ",%0 \n" - "mov %b0," MEMACCESS2(0xf,3) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "lea " MEMLEA(0x10,3) ",%3 \n" - "sub $0x4,%4 \n" - "jg 1b \n" - : "=&d"(pixel_temp), // %0 - "=&a"(table_temp), // %1 - "+r"(src_argb), // %2 - "+r"(dst_argb), // %3 - "+rm"(width) // %4 - : "r"(luma), // %5 - "rm"(lumacoeff) // %6 - : "memory", "cc", "xmm0", "xmm3", "xmm4", "xmm5" - ); + "movzb 0xc(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0xc(%3) \n" + "movzb 0xd(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0xd(%3) \n" + "movzb 0xe(%2),%0 \n" + "movzb 0x00(%1,%0,1),%0 \n" + "mov %b0,0xe(%3) \n" + "movzb 0xf(%2),%0 \n" + "mov %b0,0xf(%3) \n" + "lea 0x10(%2),%2 \n" + "lea 0x10(%3),%3 \n" + "sub $0x4,%4 \n" + "jg 1b \n" + : "=&d"(pixel_temp), // %0 + "=&a"(table_temp), // %1 + "+r"(src_argb), // %2 + "+r"(dst_argb), // %3 + "+rm"(width) // %4 + : "r"(luma), // %5 + "rm"(lumacoeff) // %6 + : "memory", "cc", "xmm0", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_mips.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_mips.cc deleted file mode 100644 index 285f0b5adc2a..000000000000 --- a/media/libvpx/libvpx/third_party/libyuv/source/row_mips.cc +++ /dev/null @@ -1,782 +0,0 @@ -/* - * Copyright (c) 2012 The LibYuv project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \ - (_MIPS_SIM == _MIPS_SIM_ABI32) - -#ifdef HAS_COPYROW_MIPS -void CopyRow_MIPS(const uint8* src, uint8* dst, int count) { - __asm__ __volatile__ ( - ".set noreorder \n" - ".set noat \n" - "slti $at, %[count], 8 \n" - "bne $at ,$zero, $last8 \n" - "xor $t8, %[src], %[dst] \n" - "andi $t8, $t8, 0x3 \n" - - "bne $t8, $zero, unaligned \n" - "negu $a3, %[dst] \n" - // make dst/src aligned - "andi $a3, $a3, 0x3 \n" - "beq $a3, $zero, $chk16w \n" - // word-aligned now count is the remining bytes count - "subu %[count], %[count], $a3 \n" - - "lwr $t8, 0(%[src]) \n" - "addu %[src], %[src], $a3 \n" - "swr $t8, 0(%[dst]) \n" - "addu %[dst], %[dst], $a3 \n" - - // Now the dst/src are mutually word-aligned with word-aligned addresses - "$chk16w: \n" - "andi $t8, %[count], 0x3f \n" // whole 64-B chunks? - // t8 is the byte count after 64-byte chunks - "beq %[count], $t8, chk8w \n" - // There will be at most 1 32-byte chunk after it - "subu $a3, %[count], $t8 \n" // the reminder - // Here a3 counts bytes in 16w chunks - "addu $a3, %[dst], $a3 \n" - // Now a3 is the final dst after 64-byte chunks - "addu $t0, %[dst], %[count] \n" - // t0 is the "past the end" address - - // When in the loop we exercise "pref 30,x(a1)", the a1+x should not be past - // the "t0-32" address - // This means: for x=128 the last "safe" a1 address is "t0-160" - // Alternatively, for x=64 the last "safe" a1 address is "t0-96" - // we will use "pref 30,128(a1)", so "t0-160" is the limit - "subu $t9, $t0, 160 \n" - // t9 is the "last safe pref 30,128(a1)" address - "pref 0, 0(%[src]) \n" // first line of src - "pref 0, 32(%[src]) \n" // second line of src - "pref 0, 64(%[src]) \n" - "pref 30, 32(%[dst]) \n" - // In case the a1 > t9 don't use "pref 30" at all - "sgtu $v1, %[dst], $t9 \n" - "bgtz $v1, $loop16w \n" - "nop \n" - // otherwise, start with using pref30 - "pref 30, 64(%[dst]) \n" - "$loop16w: \n" - "pref 0, 96(%[src]) \n" - "lw $t0, 0(%[src]) \n" - "bgtz $v1, $skip_pref30_96 \n" // skip - "lw $t1, 4(%[src]) \n" - "pref 30, 96(%[dst]) \n" // continue - "$skip_pref30_96: \n" - "lw $t2, 8(%[src]) \n" - "lw $t3, 12(%[src]) \n" - "lw $t4, 16(%[src]) \n" - "lw $t5, 20(%[src]) \n" - "lw $t6, 24(%[src]) \n" - "lw $t7, 28(%[src]) \n" - "pref 0, 128(%[src]) \n" - // bring the next lines of src, addr 128 - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "lw $t0, 32(%[src]) \n" - "bgtz $v1, $skip_pref30_128 \n" // skip pref 30,128(a1) - "lw $t1, 36(%[src]) \n" - "pref 30, 128(%[dst]) \n" // set dest, addr 128 - "$skip_pref30_128: \n" - "lw $t2, 40(%[src]) \n" - "lw $t3, 44(%[src]) \n" - "lw $t4, 48(%[src]) \n" - "lw $t5, 52(%[src]) \n" - "lw $t6, 56(%[src]) \n" - "lw $t7, 60(%[src]) \n" - "pref 0, 160(%[src]) \n" - // bring the next lines of src, addr 160 - "sw $t0, 32(%[dst]) \n" - "sw $t1, 36(%[dst]) \n" - "sw $t2, 40(%[dst]) \n" - "sw $t3, 44(%[dst]) \n" - "sw $t4, 48(%[dst]) \n" - "sw $t5, 52(%[dst]) \n" - "sw $t6, 56(%[dst]) \n" - "sw $t7, 60(%[dst]) \n" - - "addiu %[dst], %[dst], 64 \n" // adding 64 to dest - "sgtu $v1, %[dst], $t9 \n" - "bne %[dst], $a3, $loop16w \n" - " addiu %[src], %[src], 64 \n" // adding 64 to src - "move %[count], $t8 \n" - - // Here we have src and dest word-aligned but less than 64-bytes to go - - "chk8w: \n" - "pref 0, 0x0(%[src]) \n" - "andi $t8, %[count], 0x1f \n" // 32-byte chunk? - // the t8 is the reminder count past 32-bytes - "beq %[count], $t8, chk1w \n" - // count=t8,no 32-byte chunk - " nop \n" - - "lw $t0, 0(%[src]) \n" - "lw $t1, 4(%[src]) \n" - "lw $t2, 8(%[src]) \n" - "lw $t3, 12(%[src]) \n" - "lw $t4, 16(%[src]) \n" - "lw $t5, 20(%[src]) \n" - "lw $t6, 24(%[src]) \n" - "lw $t7, 28(%[src]) \n" - "addiu %[src], %[src], 32 \n" - - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "addiu %[dst], %[dst], 32 \n" - - "chk1w: \n" - "andi %[count], $t8, 0x3 \n" - // now count is the reminder past 1w chunks - "beq %[count], $t8, $last8 \n" - " subu $a3, $t8, %[count] \n" - // a3 is count of bytes in 1w chunks - "addu $a3, %[dst], $a3 \n" - // now a3 is the dst address past the 1w chunks - // copying in words (4-byte chunks) - "$wordCopy_loop: \n" - "lw $t3, 0(%[src]) \n" - // the first t3 may be equal t0 ... optimize? - "addiu %[src], %[src],4 \n" - "addiu %[dst], %[dst],4 \n" - "bne %[dst], $a3,$wordCopy_loop \n" - " sw $t3, -4(%[dst]) \n" - - // For the last (<8) bytes - "$last8: \n" - "blez %[count], leave \n" - " addu $a3, %[dst], %[count] \n" // a3 -last dst address - "$last8loop: \n" - "lb $v1, 0(%[src]) \n" - "addiu %[src], %[src], 1 \n" - "addiu %[dst], %[dst], 1 \n" - "bne %[dst], $a3, $last8loop \n" - " sb $v1, -1(%[dst]) \n" - - "leave: \n" - " j $ra \n" - " nop \n" - - // - // UNALIGNED case - // - - "unaligned: \n" - // got here with a3="negu a1" - "andi $a3, $a3, 0x3 \n" // a1 is word aligned? - "beqz $a3, $ua_chk16w \n" - " subu %[count], %[count], $a3 \n" - // bytes left after initial a3 bytes - "lwr $v1, 0(%[src]) \n" - "lwl $v1, 3(%[src]) \n" - "addu %[src], %[src], $a3 \n" // a3 may be 1, 2 or 3 - "swr $v1, 0(%[dst]) \n" - "addu %[dst], %[dst], $a3 \n" - // below the dst will be word aligned (NOTE1) - "$ua_chk16w: \n" - "andi $t8, %[count], 0x3f \n" // whole 64-B chunks? - // t8 is the byte count after 64-byte chunks - "beq %[count], $t8, ua_chk8w \n" - // if a2==t8, no 64-byte chunks - // There will be at most 1 32-byte chunk after it - "subu $a3, %[count], $t8 \n" // the reminder - // Here a3 counts bytes in 16w chunks - "addu $a3, %[dst], $a3 \n" - // Now a3 is the final dst after 64-byte chunks - "addu $t0, %[dst], %[count] \n" // t0 "past the end" - "subu $t9, $t0, 160 \n" - // t9 is the "last safe pref 30,128(a1)" address - "pref 0, 0(%[src]) \n" // first line of src - "pref 0, 32(%[src]) \n" // second line addr 32 - "pref 0, 64(%[src]) \n" - "pref 30, 32(%[dst]) \n" - // safe, as we have at least 64 bytes ahead - // In case the a1 > t9 don't use "pref 30" at all - "sgtu $v1, %[dst], $t9 \n" - "bgtz $v1, $ua_loop16w \n" - // skip "pref 30,64(a1)" for too short arrays - " nop \n" - // otherwise, start with using pref30 - "pref 30, 64(%[dst]) \n" - "$ua_loop16w: \n" - "pref 0, 96(%[src]) \n" - "lwr $t0, 0(%[src]) \n" - "lwl $t0, 3(%[src]) \n" - "lwr $t1, 4(%[src]) \n" - "bgtz $v1, $ua_skip_pref30_96 \n" - " lwl $t1, 7(%[src]) \n" - "pref 30, 96(%[dst]) \n" - // continue setting up the dest, addr 96 - "$ua_skip_pref30_96: \n" - "lwr $t2, 8(%[src]) \n" - "lwl $t2, 11(%[src]) \n" - "lwr $t3, 12(%[src]) \n" - "lwl $t3, 15(%[src]) \n" - "lwr $t4, 16(%[src]) \n" - "lwl $t4, 19(%[src]) \n" - "lwr $t5, 20(%[src]) \n" - "lwl $t5, 23(%[src]) \n" - "lwr $t6, 24(%[src]) \n" - "lwl $t6, 27(%[src]) \n" - "lwr $t7, 28(%[src]) \n" - "lwl $t7, 31(%[src]) \n" - "pref 0, 128(%[src]) \n" - // bring the next lines of src, addr 128 - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "lwr $t0, 32(%[src]) \n" - "lwl $t0, 35(%[src]) \n" - "lwr $t1, 36(%[src]) \n" - "bgtz $v1, ua_skip_pref30_128 \n" - " lwl $t1, 39(%[src]) \n" - "pref 30, 128(%[dst]) \n" - // continue setting up the dest, addr 128 - "ua_skip_pref30_128: \n" - - "lwr $t2, 40(%[src]) \n" - "lwl $t2, 43(%[src]) \n" - "lwr $t3, 44(%[src]) \n" - "lwl $t3, 47(%[src]) \n" - "lwr $t4, 48(%[src]) \n" - "lwl $t4, 51(%[src]) \n" - "lwr $t5, 52(%[src]) \n" - "lwl $t5, 55(%[src]) \n" - "lwr $t6, 56(%[src]) \n" - "lwl $t6, 59(%[src]) \n" - "lwr $t7, 60(%[src]) \n" - "lwl $t7, 63(%[src]) \n" - "pref 0, 160(%[src]) \n" - // bring the next lines of src, addr 160 - "sw $t0, 32(%[dst]) \n" - "sw $t1, 36(%[dst]) \n" - "sw $t2, 40(%[dst]) \n" - "sw $t3, 44(%[dst]) \n" - "sw $t4, 48(%[dst]) \n" - "sw $t5, 52(%[dst]) \n" - "sw $t6, 56(%[dst]) \n" - "sw $t7, 60(%[dst]) \n" - - "addiu %[dst],%[dst],64 \n" // adding 64 to dest - "sgtu $v1,%[dst],$t9 \n" - "bne %[dst],$a3,$ua_loop16w \n" - " addiu %[src],%[src],64 \n" // adding 64 to src - "move %[count],$t8 \n" - - // Here we have src and dest word-aligned but less than 64-bytes to go - - "ua_chk8w: \n" - "pref 0, 0x0(%[src]) \n" - "andi $t8, %[count], 0x1f \n" // 32-byte chunk? - // the t8 is the reminder count - "beq %[count], $t8, $ua_chk1w \n" - // when count==t8, no 32-byte chunk - - "lwr $t0, 0(%[src]) \n" - "lwl $t0, 3(%[src]) \n" - "lwr $t1, 4(%[src]) \n" - "lwl $t1, 7(%[src]) \n" - "lwr $t2, 8(%[src]) \n" - "lwl $t2, 11(%[src]) \n" - "lwr $t3, 12(%[src]) \n" - "lwl $t3, 15(%[src]) \n" - "lwr $t4, 16(%[src]) \n" - "lwl $t4, 19(%[src]) \n" - "lwr $t5, 20(%[src]) \n" - "lwl $t5, 23(%[src]) \n" - "lwr $t6, 24(%[src]) \n" - "lwl $t6, 27(%[src]) \n" - "lwr $t7, 28(%[src]) \n" - "lwl $t7, 31(%[src]) \n" - "addiu %[src], %[src], 32 \n" - - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "addiu %[dst], %[dst], 32 \n" - - "$ua_chk1w: \n" - "andi %[count], $t8, 0x3 \n" - // now count is the reminder past 1w chunks - "beq %[count], $t8, ua_smallCopy \n" - "subu $a3, $t8, %[count] \n" - // a3 is count of bytes in 1w chunks - "addu $a3, %[dst], $a3 \n" - // now a3 is the dst address past the 1w chunks - - // copying in words (4-byte chunks) - "$ua_wordCopy_loop: \n" - "lwr $v1, 0(%[src]) \n" - "lwl $v1, 3(%[src]) \n" - "addiu %[src], %[src], 4 \n" - "addiu %[dst], %[dst], 4 \n" - // note: dst=a1 is word aligned here, see NOTE1 - "bne %[dst], $a3, $ua_wordCopy_loop \n" - " sw $v1,-4(%[dst]) \n" - - // Now less than 4 bytes (value in count) left to copy - "ua_smallCopy: \n" - "beqz %[count], leave \n" - " addu $a3, %[dst], %[count] \n" // a3 = last dst address - "$ua_smallCopy_loop: \n" - "lb $v1, 0(%[src]) \n" - "addiu %[src], %[src], 1 \n" - "addiu %[dst], %[dst], 1 \n" - "bne %[dst],$a3,$ua_smallCopy_loop \n" - " sb $v1, -1(%[dst]) \n" - - "j $ra \n" - " nop \n" - ".set at \n" - ".set reorder \n" - : [dst] "+r" (dst), [src] "+r" (src) - : [count] "r" (count) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", - "t8", "t9", "a3", "v1", "at" - ); -} -#endif // HAS_COPYROW_MIPS - -// DSPR2 functions -#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \ - (__mips_dsp_rev >= 2) && \ - (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6) - -void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "srl $t4, %[width], 4 \n" // multiplies of 16 - "blez $t4, 2f \n" - " andi %[width], %[width], 0xf \n" // residual - - "1: \n" - "addiu $t4, $t4, -1 \n" - "lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0 - "lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2 - "lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4 - "lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6 - "lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8 - "lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 | U10 - "lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 | U12 - "lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 | U14 - "addiu %[src_uv], %[src_uv], 32 \n" - "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0 - "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0 - "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4 - "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4 - "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8 - "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8 - "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12 - "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12 - "sw $t9, 0(%[dst_v]) \n" - "sw $t0, 0(%[dst_u]) \n" - "sw $t1, 4(%[dst_v]) \n" - "sw $t2, 4(%[dst_u]) \n" - "sw $t3, 8(%[dst_v]) \n" - "sw $t5, 8(%[dst_u]) \n" - "sw $t6, 12(%[dst_v]) \n" - "sw $t7, 12(%[dst_u]) \n" - "addiu %[dst_v], %[dst_v], 16 \n" - "bgtz $t4, 1b \n" - " addiu %[dst_u], %[dst_u], 16 \n" - - "beqz %[width], 3f \n" - " nop \n" - - "2: \n" - "lbu $t0, 0(%[src_uv]) \n" - "lbu $t1, 1(%[src_uv]) \n" - "addiu %[src_uv], %[src_uv], 2 \n" - "addiu %[width], %[width], -1 \n" - "sb $t0, 0(%[dst_u]) \n" - "sb $t1, 0(%[dst_v]) \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "bgtz %[width], 2b \n" - " addiu %[dst_v], %[dst_v], 1 \n" - - "3: \n" - ".set pop \n" - : [src_uv] "+r" (src_uv), - [width] "+r" (width), - [dst_u] "+r" (dst_u), - [dst_v] "+r" (dst_v) - : - : "t0", "t1", "t2", "t3", - "t4", "t5", "t6", "t7", "t8", "t9" - ); -} - -void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "srl $t4, %[width], 4 \n" // multiplies of 16 - "andi $t5, %[width], 0xf \n" - "blez $t4, 2f \n" - " addu %[src], %[src], %[width] \n" // src += width - - "1: \n" - "lw $t0, -16(%[src]) \n" // |3|2|1|0| - "lw $t1, -12(%[src]) \n" // |7|6|5|4| - "lw $t2, -8(%[src]) \n" // |11|10|9|8| - "lw $t3, -4(%[src]) \n" // |15|14|13|12| - "wsbh $t0, $t0 \n" // |2|3|0|1| - "wsbh $t1, $t1 \n" // |6|7|4|5| - "wsbh $t2, $t2 \n" // |10|11|8|9| - "wsbh $t3, $t3 \n" // |14|15|12|13| - "rotr $t0, $t0, 16 \n" // |0|1|2|3| - "rotr $t1, $t1, 16 \n" // |4|5|6|7| - "rotr $t2, $t2, 16 \n" // |8|9|10|11| - "rotr $t3, $t3, 16 \n" // |12|13|14|15| - "addiu %[src], %[src], -16 \n" - "addiu $t4, $t4, -1 \n" - "sw $t3, 0(%[dst]) \n" // |15|14|13|12| - "sw $t2, 4(%[dst]) \n" // |11|10|9|8| - "sw $t1, 8(%[dst]) \n" // |7|6|5|4| - "sw $t0, 12(%[dst]) \n" // |3|2|1|0| - "bgtz $t4, 1b \n" - " addiu %[dst], %[dst], 16 \n" - "beqz $t5, 3f \n" - " nop \n" - - "2: \n" - "lbu $t0, -1(%[src]) \n" - "addiu $t5, $t5, -1 \n" - "addiu %[src], %[src], -1 \n" - "sb $t0, 0(%[dst]) \n" - "bgez $t5, 2b \n" - " addiu %[dst], %[dst], 1 \n" - - "3: \n" - ".set pop \n" - : [src] "+r" (src), [dst] "+r" (dst) - : [width] "r" (width) - : "t0", "t1", "t2", "t3", "t4", "t5" - ); -} - -void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width) { - int x; - int y; - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "addu $t4, %[width], %[width] \n" - "srl %[x], %[width], 4 \n" - "andi %[y], %[width], 0xf \n" - "blez %[x], 2f \n" - " addu %[src_uv], %[src_uv], $t4 \n" - - "1: \n" - "lw $t0, -32(%[src_uv]) \n" // |3|2|1|0| - "lw $t1, -28(%[src_uv]) \n" // |7|6|5|4| - "lw $t2, -24(%[src_uv]) \n" // |11|10|9|8| - "lw $t3, -20(%[src_uv]) \n" // |15|14|13|12| - "lw $t4, -16(%[src_uv]) \n" // |19|18|17|16| - "lw $t6, -12(%[src_uv]) \n" // |23|22|21|20| - "lw $t7, -8(%[src_uv]) \n" // |27|26|25|24| - "lw $t8, -4(%[src_uv]) \n" // |31|30|29|28| - - "rotr $t0, $t0, 16 \n" // |1|0|3|2| - "rotr $t1, $t1, 16 \n" // |5|4|7|6| - "rotr $t2, $t2, 16 \n" // |9|8|11|10| - "rotr $t3, $t3, 16 \n" // |13|12|15|14| - "rotr $t4, $t4, 16 \n" // |17|16|19|18| - "rotr $t6, $t6, 16 \n" // |21|20|23|22| - "rotr $t7, $t7, 16 \n" // |25|24|27|26| - "rotr $t8, $t8, 16 \n" // |29|28|31|30| - "precr.qb.ph $t9, $t0, $t1 \n" // |0|2|4|6| - "precrq.qb.ph $t5, $t0, $t1 \n" // |1|3|5|7| - "precr.qb.ph $t0, $t2, $t3 \n" // |8|10|12|14| - "precrq.qb.ph $t1, $t2, $t3 \n" // |9|11|13|15| - "precr.qb.ph $t2, $t4, $t6 \n" // |16|18|20|22| - "precrq.qb.ph $t3, $t4, $t6 \n" // |17|19|21|23| - "precr.qb.ph $t4, $t7, $t8 \n" // |24|26|28|30| - "precrq.qb.ph $t6, $t7, $t8 \n" // |25|27|29|31| - "addiu %[src_uv], %[src_uv], -32 \n" - "addiu %[x], %[x], -1 \n" - "swr $t4, 0(%[dst_u]) \n" - "swl $t4, 3(%[dst_u]) \n" // |30|28|26|24| - "swr $t6, 0(%[dst_v]) \n" - "swl $t6, 3(%[dst_v]) \n" // |31|29|27|25| - "swr $t2, 4(%[dst_u]) \n" - "swl $t2, 7(%[dst_u]) \n" // |22|20|18|16| - "swr $t3, 4(%[dst_v]) \n" - "swl $t3, 7(%[dst_v]) \n" // |23|21|19|17| - "swr $t0, 8(%[dst_u]) \n" - "swl $t0, 11(%[dst_u]) \n" // |14|12|10|8| - "swr $t1, 8(%[dst_v]) \n" - "swl $t1, 11(%[dst_v]) \n" // |15|13|11|9| - "swr $t9, 12(%[dst_u]) \n" - "swl $t9, 15(%[dst_u]) \n" // |6|4|2|0| - "swr $t5, 12(%[dst_v]) \n" - "swl $t5, 15(%[dst_v]) \n" // |7|5|3|1| - "addiu %[dst_v], %[dst_v], 16 \n" - "bgtz %[x], 1b \n" - " addiu %[dst_u], %[dst_u], 16 \n" - "beqz %[y], 3f \n" - " nop \n" - "b 2f \n" - " nop \n" - - "2: \n" - "lbu $t0, -2(%[src_uv]) \n" - "lbu $t1, -1(%[src_uv]) \n" - "addiu %[src_uv], %[src_uv], -2 \n" - "addiu %[y], %[y], -1 \n" - "sb $t0, 0(%[dst_u]) \n" - "sb $t1, 0(%[dst_v]) \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "bgtz %[y], 2b \n" - " addiu %[dst_v], %[dst_v], 1 \n" - - "3: \n" - ".set pop \n" - : [src_uv] "+r" (src_uv), - [dst_u] "+r" (dst_u), - [dst_v] "+r" (dst_v), - [x] "=&r" (x), - [y] "=&r" (y) - : [width] "r" (width) - : "t0", "t1", "t2", "t3", "t4", - "t5", "t7", "t8", "t9" - ); -} - -// Convert (4 Y and 2 VU) I422 and arrange RGB values into -// t5 = | 0 | B0 | 0 | b0 | -// t4 = | 0 | B1 | 0 | b1 | -// t9 = | 0 | G0 | 0 | g0 | -// t8 = | 0 | G1 | 0 | g1 | -// t2 = | 0 | R0 | 0 | r0 | -// t1 = | 0 | R1 | 0 | r1 | -#define YUVTORGB \ - "lw $t0, 0(%[y_buf]) \n" \ - "lhu $t1, 0(%[u_buf]) \n" \ - "lhu $t2, 0(%[v_buf]) \n" \ - "preceu.ph.qbr $t1, $t1 \n" \ - "preceu.ph.qbr $t2, $t2 \n" \ - "preceu.ph.qbra $t3, $t0 \n" \ - "preceu.ph.qbla $t0, $t0 \n" \ - "subu.ph $t1, $t1, $s5 \n" \ - "subu.ph $t2, $t2, $s5 \n" \ - "subu.ph $t3, $t3, $s4 \n" \ - "subu.ph $t0, $t0, $s4 \n" \ - "mul.ph $t3, $t3, $s0 \n" \ - "mul.ph $t0, $t0, $s0 \n" \ - "shll.ph $t4, $t1, 0x7 \n" \ - "subu.ph $t4, $t4, $t1 \n" \ - "mul.ph $t6, $t1, $s1 \n" \ - "mul.ph $t1, $t2, $s2 \n" \ - "addq_s.ph $t5, $t4, $t3 \n" \ - "addq_s.ph $t4, $t4, $t0 \n" \ - "shra.ph $t5, $t5, 6 \n" \ - "shra.ph $t4, $t4, 6 \n" \ - "addiu %[u_buf], 2 \n" \ - "addiu %[v_buf], 2 \n" \ - "addu.ph $t6, $t6, $t1 \n" \ - "mul.ph $t1, $t2, $s3 \n" \ - "addu.ph $t9, $t6, $t3 \n" \ - "addu.ph $t8, $t6, $t0 \n" \ - "shra.ph $t9, $t9, 6 \n" \ - "shra.ph $t8, $t8, 6 \n" \ - "addu.ph $t2, $t1, $t3 \n" \ - "addu.ph $t1, $t1, $t0 \n" \ - "shra.ph $t2, $t2, 6 \n" \ - "shra.ph $t1, $t1, 6 \n" \ - "subu.ph $t5, $t5, $s5 \n" \ - "subu.ph $t4, $t4, $s5 \n" \ - "subu.ph $t9, $t9, $s5 \n" \ - "subu.ph $t8, $t8, $s5 \n" \ - "subu.ph $t2, $t2, $s5 \n" \ - "subu.ph $t1, $t1, $s5 \n" \ - "shll_s.ph $t5, $t5, 8 \n" \ - "shll_s.ph $t4, $t4, 8 \n" \ - "shll_s.ph $t9, $t9, 8 \n" \ - "shll_s.ph $t8, $t8, 8 \n" \ - "shll_s.ph $t2, $t2, 8 \n" \ - "shll_s.ph $t1, $t1, 8 \n" \ - "shra.ph $t5, $t5, 8 \n" \ - "shra.ph $t4, $t4, 8 \n" \ - "shra.ph $t9, $t9, 8 \n" \ - "shra.ph $t8, $t8, 8 \n" \ - "shra.ph $t2, $t2, 8 \n" \ - "shra.ph $t1, $t1, 8 \n" \ - "addu.ph $t5, $t5, $s5 \n" \ - "addu.ph $t4, $t4, $s5 \n" \ - "addu.ph $t9, $t9, $s5 \n" \ - "addu.ph $t8, $t8, $s5 \n" \ - "addu.ph $t2, $t2, $s5 \n" \ - "addu.ph $t1, $t1, $s5 \n" - -// TODO(fbarchard): accept yuv conversion constants. -void I422ToARGBRow_DSPR2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "beqz %[width], 2f \n" - " repl.ph $s0, 74 \n" // |YG|YG| = |74|74| - "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25| - "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52| - "repl.ph $s3, 102 \n" // |VR|VR| = |102|102| - "repl.ph $s4, 16 \n" // |0|16|0|16| - "repl.ph $s5, 128 \n" // |128|128| // clipping - "lui $s6, 0xff00 \n" - "ori $s6, 0xff00 \n" // |ff|00|ff|00|ff| - - "1: \n" - YUVTORGB -// Arranging into argb format - "precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1| - "precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0| - "addiu %[width], -4 \n" - "precrq.qb.ph $t8, $t4, $t5 \n" // |G1|B1|G0|B0| - "precr.qb.ph $t9, $t4, $t5 \n" // |g1|b1|g0|b0| - "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0| - - "addiu %[y_buf], 4 \n" - "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0| - "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0| - "or $t1, $t1, $s6 \n" // |ff|R1|ff|R0| - "or $t2, $t2, $s6 \n" // |ff|r1|ff|r0| - "precrq.ph.w $t0, $t2, $t9 \n" // |ff|r1|g1|b1| - "precrq.ph.w $t3, $t1, $t8 \n" // |ff|R1|G1|B1| - "sll $t9, $t9, 16 \n" - "sll $t8, $t8, 16 \n" - "packrl.ph $t2, $t2, $t9 \n" // |ff|r0|g0|b0| - "packrl.ph $t1, $t1, $t8 \n" // |ff|R0|G0|B0| -// Store results. - "sw $t2, 0(%[rgb_buf]) \n" - "sw $t0, 4(%[rgb_buf]) \n" - "sw $t1, 8(%[rgb_buf]) \n" - "sw $t3, 12(%[rgb_buf]) \n" - "bnez %[width], 1b \n" - " addiu %[rgb_buf], 16 \n" - "2: \n" - ".set pop \n" - :[y_buf] "+r" (y_buf), - [u_buf] "+r" (u_buf), - [v_buf] "+r" (v_buf), - [width] "+r" (width), - [rgb_buf] "+r" (rgb_buf) - : - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9", - "s0", "s1", "s2", "s3", - "s4", "s5", "s6" - ); -} - -// Bilinear filter 8x2 -> 8x1 -void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) { - int y0_fraction = 256 - source_y_fraction; - const uint8* src_ptr1 = src_ptr + src_stride; - - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "replv.ph $t0, %[y0_fraction] \n" - "replv.ph $t1, %[source_y_fraction] \n" - - "1: \n" - "lw $t2, 0(%[src_ptr]) \n" - "lw $t3, 0(%[src_ptr1]) \n" - "lw $t4, 4(%[src_ptr]) \n" - "lw $t5, 4(%[src_ptr1]) \n" - "muleu_s.ph.qbl $t6, $t2, $t0 \n" - "muleu_s.ph.qbr $t7, $t2, $t0 \n" - "muleu_s.ph.qbl $t8, $t3, $t1 \n" - "muleu_s.ph.qbr $t9, $t3, $t1 \n" - "muleu_s.ph.qbl $t2, $t4, $t0 \n" - "muleu_s.ph.qbr $t3, $t4, $t0 \n" - "muleu_s.ph.qbl $t4, $t5, $t1 \n" - "muleu_s.ph.qbr $t5, $t5, $t1 \n" - "addq.ph $t6, $t6, $t8 \n" - "addq.ph $t7, $t7, $t9 \n" - "addq.ph $t2, $t2, $t4 \n" - "addq.ph $t3, $t3, $t5 \n" - "shra.ph $t6, $t6, 8 \n" - "shra.ph $t7, $t7, 8 \n" - "shra.ph $t2, $t2, 8 \n" - "shra.ph $t3, $t3, 8 \n" - "precr.qb.ph $t6, $t6, $t7 \n" - "precr.qb.ph $t2, $t2, $t3 \n" - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[src_ptr1], %[src_ptr1], 8 \n" - "addiu %[dst_width], %[dst_width], -8 \n" - "sw $t6, 0(%[dst_ptr]) \n" - "sw $t2, 4(%[dst_ptr]) \n" - "bgtz %[dst_width], 1b \n" - " addiu %[dst_ptr], %[dst_ptr], 8 \n" - - ".set pop \n" - : [dst_ptr] "+r" (dst_ptr), - [src_ptr1] "+r" (src_ptr1), - [src_ptr] "+r" (src_ptr), - [dst_width] "+r" (dst_width) - : [source_y_fraction] "r" (source_y_fraction), - [y0_fraction] "r" (y0_fraction), - [src_stride] "r" (src_stride) - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9" - ); -} -#endif // __mips_dsp_rev >= 2 - -#endif // defined(__mips__) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_msa.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_msa.cc new file mode 100644 index 000000000000..4fb2631f0b3b --- /dev/null +++ b/media/libvpx/libvpx/third_party/libyuv/source/row_msa.cc @@ -0,0 +1,3512 @@ +/* + * Copyright 2016 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "libyuv/row.h" + +// This module is for GCC MSA +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#include "libyuv/macros_msa.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +#define ALPHA_VAL (-1) + +// Fill YUV -> RGB conversion constants into vectors +#define YUVTORGB_SETUP(yuvconst, ub, vr, ug, vg, bb, bg, br, yg) \ + { \ + ub = __msa_fill_w(yuvconst->kUVToB[0]); \ + vr = __msa_fill_w(yuvconst->kUVToR[1]); \ + ug = __msa_fill_w(yuvconst->kUVToG[0]); \ + vg = __msa_fill_w(yuvconst->kUVToG[1]); \ + bb = __msa_fill_w(yuvconst->kUVBiasB[0]); \ + bg = __msa_fill_w(yuvconst->kUVBiasG[0]); \ + br = __msa_fill_w(yuvconst->kUVBiasR[0]); \ + yg = __msa_fill_w(yuvconst->kYToRgb[0]); \ + } + +// Load YUV 422 pixel data +#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \ + { \ + uint64_t y_m; \ + uint32_t u_m, v_m; \ + v4i32 zero_m = {0}; \ + y_m = LD(psrc_y); \ + u_m = LW(psrc_u); \ + v_m = LW(psrc_v); \ + out_y = (v16u8)__msa_insert_d((v2i64)zero_m, 0, (int64_t)y_m); \ + out_u = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)u_m); \ + out_v = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)v_m); \ + } + +// Clip input vector elements between 0 to 255 +#define CLIP_0TO255(in0, in1, in2, in3, in4, in5) \ + { \ + v4i32 max_m = __msa_ldi_w(0xFF); \ + \ + in0 = __msa_maxi_s_w(in0, 0); \ + in1 = __msa_maxi_s_w(in1, 0); \ + in2 = __msa_maxi_s_w(in2, 0); \ + in3 = __msa_maxi_s_w(in3, 0); \ + in4 = __msa_maxi_s_w(in4, 0); \ + in5 = __msa_maxi_s_w(in5, 0); \ + in0 = __msa_min_s_w(max_m, in0); \ + in1 = __msa_min_s_w(max_m, in1); \ + in2 = __msa_min_s_w(max_m, in2); \ + in3 = __msa_min_s_w(max_m, in3); \ + in4 = __msa_min_s_w(max_m, in4); \ + in5 = __msa_min_s_w(max_m, in5); \ + } + +// Convert 8 pixels of YUV 420 to RGB. +#define YUVTORGB(in_y, in_uv, ubvr, ugvg, bb, bg, br, yg, out_b, out_g, out_r) \ + { \ + v8i16 vec0_m, vec1_m; \ + v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \ + v4i32 reg5_m, reg6_m, reg7_m; \ + v16i8 zero_m = {0}; \ + \ + vec0_m = (v8i16)__msa_ilvr_b((v16i8)in_y, (v16i8)in_y); \ + vec1_m = (v8i16)__msa_ilvr_b((v16i8)zero_m, (v16i8)in_uv); \ + reg0_m = (v4i32)__msa_ilvr_h((v8i16)zero_m, (v8i16)vec0_m); \ + reg1_m = (v4i32)__msa_ilvl_h((v8i16)zero_m, (v8i16)vec0_m); \ + reg2_m = (v4i32)__msa_ilvr_h((v8i16)zero_m, (v8i16)vec1_m); \ + reg3_m = (v4i32)__msa_ilvl_h((v8i16)zero_m, (v8i16)vec1_m); \ + reg0_m *= yg; \ + reg1_m *= yg; \ + reg2_m *= ubvr; \ + reg3_m *= ubvr; \ + reg0_m = __msa_srai_w(reg0_m, 16); \ + reg1_m = __msa_srai_w(reg1_m, 16); \ + reg4_m = __msa_dotp_s_w((v8i16)vec1_m, (v8i16)ugvg); \ + reg5_m = __msa_ilvev_w(reg2_m, reg2_m); \ + reg6_m = __msa_ilvev_w(reg3_m, reg3_m); \ + reg7_m = __msa_ilvr_w(reg4_m, reg4_m); \ + reg2_m = __msa_ilvod_w(reg2_m, reg2_m); \ + reg3_m = __msa_ilvod_w(reg3_m, reg3_m); \ + reg4_m = __msa_ilvl_w(reg4_m, reg4_m); \ + reg5_m = reg0_m - reg5_m; \ + reg6_m = reg1_m - reg6_m; \ + reg2_m = reg0_m - reg2_m; \ + reg3_m = reg1_m - reg3_m; \ + reg7_m = reg0_m - reg7_m; \ + reg4_m = reg1_m - reg4_m; \ + reg5_m += bb; \ + reg6_m += bb; \ + reg7_m += bg; \ + reg4_m += bg; \ + reg2_m += br; \ + reg3_m += br; \ + reg5_m = __msa_srai_w(reg5_m, 6); \ + reg6_m = __msa_srai_w(reg6_m, 6); \ + reg7_m = __msa_srai_w(reg7_m, 6); \ + reg4_m = __msa_srai_w(reg4_m, 6); \ + reg2_m = __msa_srai_w(reg2_m, 6); \ + reg3_m = __msa_srai_w(reg3_m, 6); \ + CLIP_0TO255(reg5_m, reg6_m, reg7_m, reg4_m, reg2_m, reg3_m); \ + out_b = __msa_pckev_h((v8i16)reg6_m, (v8i16)reg5_m); \ + out_g = __msa_pckev_h((v8i16)reg4_m, (v8i16)reg7_m); \ + out_r = __msa_pckev_h((v8i16)reg3_m, (v8i16)reg2_m); \ + } + +// Pack and Store 8 ARGB values. +#define STOREARGB(in0, in1, in2, in3, pdst_argb) \ + { \ + v8i16 vec0_m, vec1_m; \ + v16u8 dst0_m, dst1_m; \ + vec0_m = (v8i16)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \ + vec1_m = (v8i16)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \ + dst0_m = (v16u8)__msa_ilvr_h(vec1_m, vec0_m); \ + dst1_m = (v16u8)__msa_ilvl_h(vec1_m, vec0_m); \ + ST_UB2(dst0_m, dst1_m, pdst_argb, 16); \ + } + +// Takes ARGB input and calculates Y. +#define ARGBTOY(argb0, argb1, argb2, argb3, const0, const1, const2, shift, \ + y_out) \ + { \ + v16u8 vec0_m, vec1_m, vec2_m, vec3_m; \ + v8u16 reg0_m, reg1_m; \ + \ + vec0_m = (v16u8)__msa_pckev_h((v8i16)argb1, (v8i16)argb0); \ + vec1_m = (v16u8)__msa_pckev_h((v8i16)argb3, (v8i16)argb2); \ + vec2_m = (v16u8)__msa_pckod_h((v8i16)argb1, (v8i16)argb0); \ + vec3_m = (v16u8)__msa_pckod_h((v8i16)argb3, (v8i16)argb2); \ + reg0_m = __msa_dotp_u_h(vec0_m, const0); \ + reg1_m = __msa_dotp_u_h(vec1_m, const0); \ + reg0_m = __msa_dpadd_u_h(reg0_m, vec2_m, const1); \ + reg1_m = __msa_dpadd_u_h(reg1_m, vec3_m, const1); \ + reg0_m += const2; \ + reg1_m += const2; \ + reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, shift); \ + reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, shift); \ + y_out = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \ + } + +// Loads current and next row of ARGB input and averages it to calculate U and V +#define READ_ARGB(s_ptr, t_ptr, argb0, argb1, argb2, argb3) \ + { \ + v16u8 src0_m, src1_m, src2_m, src3_m, src4_m, src5_m, src6_m, src7_m; \ + v16u8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ + v16u8 vec8_m, vec9_m; \ + v8u16 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m, reg5_m, reg6_m, reg7_m; \ + v8u16 reg8_m, reg9_m; \ + \ + src0_m = (v16u8)__msa_ld_b((v16i8*)s, 0); \ + src1_m = (v16u8)__msa_ld_b((v16i8*)s, 16); \ + src2_m = (v16u8)__msa_ld_b((v16i8*)s, 32); \ + src3_m = (v16u8)__msa_ld_b((v16i8*)s, 48); \ + src4_m = (v16u8)__msa_ld_b((v16i8*)t, 0); \ + src5_m = (v16u8)__msa_ld_b((v16i8*)t, 16); \ + src6_m = (v16u8)__msa_ld_b((v16i8*)t, 32); \ + src7_m = (v16u8)__msa_ld_b((v16i8*)t, 48); \ + vec0_m = (v16u8)__msa_ilvr_b((v16i8)src0_m, (v16i8)src4_m); \ + vec1_m = (v16u8)__msa_ilvr_b((v16i8)src1_m, (v16i8)src5_m); \ + vec2_m = (v16u8)__msa_ilvr_b((v16i8)src2_m, (v16i8)src6_m); \ + vec3_m = (v16u8)__msa_ilvr_b((v16i8)src3_m, (v16i8)src7_m); \ + vec4_m = (v16u8)__msa_ilvl_b((v16i8)src0_m, (v16i8)src4_m); \ + vec5_m = (v16u8)__msa_ilvl_b((v16i8)src1_m, (v16i8)src5_m); \ + vec6_m = (v16u8)__msa_ilvl_b((v16i8)src2_m, (v16i8)src6_m); \ + vec7_m = (v16u8)__msa_ilvl_b((v16i8)src3_m, (v16i8)src7_m); \ + reg0_m = __msa_hadd_u_h(vec0_m, vec0_m); \ + reg1_m = __msa_hadd_u_h(vec1_m, vec1_m); \ + reg2_m = __msa_hadd_u_h(vec2_m, vec2_m); \ + reg3_m = __msa_hadd_u_h(vec3_m, vec3_m); \ + reg4_m = __msa_hadd_u_h(vec4_m, vec4_m); \ + reg5_m = __msa_hadd_u_h(vec5_m, vec5_m); \ + reg6_m = __msa_hadd_u_h(vec6_m, vec6_m); \ + reg7_m = __msa_hadd_u_h(vec7_m, vec7_m); \ + reg8_m = (v8u16)__msa_pckev_d((v2i64)reg4_m, (v2i64)reg0_m); \ + reg9_m = (v8u16)__msa_pckev_d((v2i64)reg5_m, (v2i64)reg1_m); \ + reg8_m += (v8u16)__msa_pckod_d((v2i64)reg4_m, (v2i64)reg0_m); \ + reg9_m += (v8u16)__msa_pckod_d((v2i64)reg5_m, (v2i64)reg1_m); \ + reg0_m = (v8u16)__msa_pckev_d((v2i64)reg6_m, (v2i64)reg2_m); \ + reg1_m = (v8u16)__msa_pckev_d((v2i64)reg7_m, (v2i64)reg3_m); \ + reg0_m += (v8u16)__msa_pckod_d((v2i64)reg6_m, (v2i64)reg2_m); \ + reg1_m += (v8u16)__msa_pckod_d((v2i64)reg7_m, (v2i64)reg3_m); \ + reg8_m = (v8u16)__msa_srai_h((v8i16)reg8_m, 2); \ + reg9_m = (v8u16)__msa_srai_h((v8i16)reg9_m, 2); \ + reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, 2); \ + reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, 2); \ + argb0 = (v16u8)__msa_pckev_b((v16i8)reg9_m, (v16i8)reg8_m); \ + argb1 = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \ + src0_m = (v16u8)__msa_ld_b((v16i8*)s, 64); \ + src1_m = (v16u8)__msa_ld_b((v16i8*)s, 80); \ + src2_m = (v16u8)__msa_ld_b((v16i8*)s, 96); \ + src3_m = (v16u8)__msa_ld_b((v16i8*)s, 112); \ + src4_m = (v16u8)__msa_ld_b((v16i8*)t, 64); \ + src5_m = (v16u8)__msa_ld_b((v16i8*)t, 80); \ + src6_m = (v16u8)__msa_ld_b((v16i8*)t, 96); \ + src7_m = (v16u8)__msa_ld_b((v16i8*)t, 112); \ + vec2_m = (v16u8)__msa_ilvr_b((v16i8)src0_m, (v16i8)src4_m); \ + vec3_m = (v16u8)__msa_ilvr_b((v16i8)src1_m, (v16i8)src5_m); \ + vec4_m = (v16u8)__msa_ilvr_b((v16i8)src2_m, (v16i8)src6_m); \ + vec5_m = (v16u8)__msa_ilvr_b((v16i8)src3_m, (v16i8)src7_m); \ + vec6_m = (v16u8)__msa_ilvl_b((v16i8)src0_m, (v16i8)src4_m); \ + vec7_m = (v16u8)__msa_ilvl_b((v16i8)src1_m, (v16i8)src5_m); \ + vec8_m = (v16u8)__msa_ilvl_b((v16i8)src2_m, (v16i8)src6_m); \ + vec9_m = (v16u8)__msa_ilvl_b((v16i8)src3_m, (v16i8)src7_m); \ + reg0_m = __msa_hadd_u_h(vec2_m, vec2_m); \ + reg1_m = __msa_hadd_u_h(vec3_m, vec3_m); \ + reg2_m = __msa_hadd_u_h(vec4_m, vec4_m); \ + reg3_m = __msa_hadd_u_h(vec5_m, vec5_m); \ + reg4_m = __msa_hadd_u_h(vec6_m, vec6_m); \ + reg5_m = __msa_hadd_u_h(vec7_m, vec7_m); \ + reg6_m = __msa_hadd_u_h(vec8_m, vec8_m); \ + reg7_m = __msa_hadd_u_h(vec9_m, vec9_m); \ + reg8_m = (v8u16)__msa_pckev_d((v2i64)reg4_m, (v2i64)reg0_m); \ + reg9_m = (v8u16)__msa_pckev_d((v2i64)reg5_m, (v2i64)reg1_m); \ + reg8_m += (v8u16)__msa_pckod_d((v2i64)reg4_m, (v2i64)reg0_m); \ + reg9_m += (v8u16)__msa_pckod_d((v2i64)reg5_m, (v2i64)reg1_m); \ + reg0_m = (v8u16)__msa_pckev_d((v2i64)reg6_m, (v2i64)reg2_m); \ + reg1_m = (v8u16)__msa_pckev_d((v2i64)reg7_m, (v2i64)reg3_m); \ + reg0_m += (v8u16)__msa_pckod_d((v2i64)reg6_m, (v2i64)reg2_m); \ + reg1_m += (v8u16)__msa_pckod_d((v2i64)reg7_m, (v2i64)reg3_m); \ + reg8_m = (v8u16)__msa_srai_h((v8i16)reg8_m, 2); \ + reg9_m = (v8u16)__msa_srai_h((v8i16)reg9_m, 2); \ + reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, 2); \ + reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, 2); \ + argb2 = (v16u8)__msa_pckev_b((v16i8)reg9_m, (v16i8)reg8_m); \ + argb3 = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \ + } + +// Takes ARGB input and calculates U and V. +#define ARGBTOUV(argb0, argb1, argb2, argb3, const0, const1, const2, const3, \ + shf0, shf1, shf2, shf3, v_out, u_out) \ + { \ + v16u8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ + v8u16 reg0_m, reg1_m, reg2_m, reg3_m; \ + \ + vec0_m = (v16u8)__msa_vshf_b(shf0, (v16i8)argb1, (v16i8)argb0); \ + vec1_m = (v16u8)__msa_vshf_b(shf0, (v16i8)argb3, (v16i8)argb2); \ + vec2_m = (v16u8)__msa_vshf_b(shf1, (v16i8)argb1, (v16i8)argb0); \ + vec3_m = (v16u8)__msa_vshf_b(shf1, (v16i8)argb3, (v16i8)argb2); \ + vec4_m = (v16u8)__msa_vshf_b(shf2, (v16i8)argb1, (v16i8)argb0); \ + vec5_m = (v16u8)__msa_vshf_b(shf2, (v16i8)argb3, (v16i8)argb2); \ + vec6_m = (v16u8)__msa_vshf_b(shf3, (v16i8)argb1, (v16i8)argb0); \ + vec7_m = (v16u8)__msa_vshf_b(shf3, (v16i8)argb3, (v16i8)argb2); \ + reg0_m = __msa_dotp_u_h(vec0_m, const1); \ + reg1_m = __msa_dotp_u_h(vec1_m, const1); \ + reg2_m = __msa_dotp_u_h(vec4_m, const1); \ + reg3_m = __msa_dotp_u_h(vec5_m, const1); \ + reg0_m += const3; \ + reg1_m += const3; \ + reg2_m += const3; \ + reg3_m += const3; \ + reg0_m -= __msa_dotp_u_h(vec2_m, const0); \ + reg1_m -= __msa_dotp_u_h(vec3_m, const0); \ + reg2_m -= __msa_dotp_u_h(vec6_m, const2); \ + reg3_m -= __msa_dotp_u_h(vec7_m, const2); \ + v_out = (v16u8)__msa_pckod_b((v16i8)reg1_m, (v16i8)reg0_m); \ + u_out = (v16u8)__msa_pckod_b((v16i8)reg3_m, (v16i8)reg2_m); \ + } + +// Load I444 pixel data +#define READI444(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \ + { \ + uint64_t y_m, u_m, v_m; \ + v2i64 zero_m = {0}; \ + y_m = LD(psrc_y); \ + u_m = LD(psrc_u); \ + v_m = LD(psrc_v); \ + out_y = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)y_m); \ + out_u = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)u_m); \ + out_v = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)v_m); \ + } + +void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) { + int x; + v16u8 src0, src1, src2, src3; + v16u8 dst0, dst1, dst2, dst3; + v16i8 shuffler = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; + src += width - 64; + + for (x = 0; x < width; x += 64) { + LD_UB4(src, 16, src3, src2, src1, src0); + VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); + VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0); + ST_UB4(dst0, dst1, dst2, dst3, dst, 16); + dst += 64; + src -= 64; + } +} + +void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) { + int x; + v16u8 src0, src1, src2, src3; + v16u8 dst0, dst1, dst2, dst3; + v16i8 shuffler = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3}; + src += width * 4 - 64; + + for (x = 0; x < width; x += 16) { + LD_UB4(src, 16, src3, src2, src1, src0); + VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); + VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0); + ST_UB4(dst0, dst1, dst2, dst3, dst, 16); + dst += 64; + src -= 64; + } +} + +void I422ToYUY2Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width) { + int x; + v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; + v16u8 dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3; + + for (x = 0; x < width; x += 32) { + src_u0 = LD_UB(src_u); + src_v0 = LD_UB(src_v); + LD_UB2(src_y, 16, src_y0, src_y1); + ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1); + ILVRL_B2_UB(vec_uv0, src_y0, dst_yuy2_0, dst_yuy2_1); + ILVRL_B2_UB(vec_uv1, src_y1, dst_yuy2_2, dst_yuy2_3); + ST_UB4(dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3, dst_yuy2, 16); + src_u += 16; + src_v += 16; + src_y += 32; + dst_yuy2 += 64; + } +} + +void I422ToUYVYRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width) { + int x; + v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; + v16u8 dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3; + + for (x = 0; x < width; x += 32) { + src_u0 = LD_UB(src_u); + src_v0 = LD_UB(src_v); + LD_UB2(src_y, 16, src_y0, src_y1); + ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1); + ILVRL_B2_UB(src_y0, vec_uv0, dst_uyvy0, dst_uyvy1); + ILVRL_B2_UB(src_y1, vec_uv1, dst_uyvy2, dst_uyvy3); + ST_UB4(dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3, dst_uyvy, 16); + src_u += 16; + src_v += 16; + src_y += 32; + dst_uyvy += 64; + } +} + +void I422ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + int x; + v16u8 src0, src1, src2; + v8i16 vec0, vec1, vec2; + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; + v4i32 vec_ubvr, vec_ugvg; + v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, + vec_br, vec_yg); + vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); + vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); + + for (x = 0; x < width; x += 8) { + READYUV422(src_y, src_u, src_v, src0, src1, src2); + src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); + YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, + vec0, vec1, vec2); + STOREARGB(vec0, vec1, vec2, alpha, dst_argb); + src_y += 8; + src_u += 4; + src_v += 4; + dst_argb += 32; + } +} + +void I422ToRGBARow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + int x; + v16u8 src0, src1, src2; + v8i16 vec0, vec1, vec2; + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; + v4i32 vec_ubvr, vec_ugvg; + v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, + vec_br, vec_yg); + vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); + vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); + + for (x = 0; x < width; x += 8) { + READYUV422(src_y, src_u, src_v, src0, src1, src2); + src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); + YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, + vec0, vec1, vec2); + STOREARGB(alpha, vec0, vec1, vec2, dst_argb); + src_y += 8; + src_u += 4; + src_v += 4; + dst_argb += 32; + } +} + +void I422AlphaToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + int x; + int64_t data_a; + v16u8 src0, src1, src2, src3; + v8i16 vec0, vec1, vec2; + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; + v4i32 vec_ubvr, vec_ugvg; + v4i32 zero = {0}; + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, + vec_br, vec_yg); + vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); + vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); + + for (x = 0; x < width; x += 8) { + data_a = LD(src_a); + READYUV422(src_y, src_u, src_v, src0, src1, src2); + src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); + src3 = (v16u8)__msa_insert_d((v2i64)zero, 0, data_a); + YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, + vec0, vec1, vec2); + src3 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src3); + STOREARGB(vec0, vec1, vec2, src3, dst_argb); + src_y += 8; + src_u += 4; + src_v += 4; + src_a += 8; + dst_argb += 32; + } +} + +void I422ToRGB24Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int32_t width) { + int x; + int64_t data_u, data_v; + v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2; + v8i16 vec0, vec1, vec2, vec3, vec4, vec5; + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; + v4i32 vec_ubvr, vec_ugvg; + v16u8 reg0, reg1, reg2, reg3; + v2i64 zero = {0}; + v16i8 shuffler0 = {0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10}; + v16i8 shuffler1 = {0, 21, 1, 2, 22, 3, 4, 23, 5, 6, 24, 7, 8, 25, 9, 10}; + v16i8 shuffler2 = {26, 6, 7, 27, 8, 9, 28, 10, + 11, 29, 12, 13, 30, 14, 15, 31}; + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, + vec_br, vec_yg); + vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); + vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((v16u8*)src_y, 0); + data_u = LD(src_u); + data_v = LD(src_v); + src1 = (v16u8)__msa_insert_d(zero, 0, data_u); + src2 = (v16u8)__msa_insert_d(zero, 0, data_v); + src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); + src3 = (v16u8)__msa_sldi_b((v16i8)src0, (v16i8)src0, 8); + src4 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src1, 8); + YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, + vec0, vec1, vec2); + YUVTORGB(src3, src4, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, + vec3, vec4, vec5); + reg0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); + reg2 = (v16u8)__msa_ilvev_b((v16i8)vec4, (v16i8)vec3); + reg3 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec2); + reg1 = (v16u8)__msa_sldi_b((v16i8)reg2, (v16i8)reg0, 11); + dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)reg3, (v16i8)reg0); + dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)reg3, (v16i8)reg1); + dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)reg3, (v16i8)reg2); + ST_UB2(dst0, dst1, dst_argb, 16); + ST_UB(dst2, (dst_argb + 32)); + src_y += 16; + src_u += 8; + src_v += 8; + dst_argb += 48; + } +} + +// TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R. +void I422ToRGB565Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width) { + int x; + v16u8 src0, src1, src2, dst0; + v8i16 vec0, vec1, vec2; + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; + v4i32 vec_ubvr, vec_ugvg; + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, + vec_br, vec_yg); + vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); + vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); + + for (x = 0; x < width; x += 8) { + READYUV422(src_y, src_u, src_v, src0, src1, src2); + src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); + YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, + vec0, vec2, vec1); + vec0 = __msa_srai_h(vec0, 3); + vec1 = __msa_srai_h(vec1, 3); + vec2 = __msa_srai_h(vec2, 2); + vec1 = __msa_slli_h(vec1, 11); + vec2 = __msa_slli_h(vec2, 5); + vec0 |= vec1; + dst0 = (v16u8)(vec2 | vec0); + ST_UB(dst0, dst_rgb565); + src_y += 8; + src_u += 4; + src_v += 4; + dst_rgb565 += 16; + } +} + +// TODO(fbarchard): Consider AND instead of shift to isolate 4 upper bits of G. +void I422ToARGB4444Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, + const struct YuvConstants* yuvconstants, + int width) { + int x; + v16u8 src0, src1, src2, dst0; + v8i16 vec0, vec1, vec2; + v8u16 reg0, reg1, reg2; + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; + v4i32 vec_ubvr, vec_ugvg; + v8u16 const_0xF000 = (v8u16)__msa_fill_h(0xF000); + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, + vec_br, vec_yg); + vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); + vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); + + for (x = 0; x < width; x += 8) { + READYUV422(src_y, src_u, src_v, src0, src1, src2); + src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); + YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, + vec0, vec1, vec2); + reg0 = (v8u16)__msa_srai_h(vec0, 4); + reg1 = (v8u16)__msa_srai_h(vec1, 4); + reg2 = (v8u16)__msa_srai_h(vec2, 4); + reg1 = (v8u16)__msa_slli_h((v8i16)reg1, 4); + reg2 = (v8u16)__msa_slli_h((v8i16)reg2, 8); + reg1 |= const_0xF000; + reg0 |= reg2; + dst0 = (v16u8)(reg1 | reg0); + ST_UB(dst0, dst_argb4444); + src_y += 8; + src_u += 4; + src_v += 4; + dst_argb4444 += 16; + } +} + +void I422ToARGB1555Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, + const struct YuvConstants* yuvconstants, + int width) { + int x; + v16u8 src0, src1, src2, dst0; + v8i16 vec0, vec1, vec2; + v8u16 reg0, reg1, reg2; + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; + v4i32 vec_ubvr, vec_ugvg; + v8u16 const_0x8000 = (v8u16)__msa_fill_h(0x8000); + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, + vec_br, vec_yg); + vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); + vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); + + for (x = 0; x < width; x += 8) { + READYUV422(src_y, src_u, src_v, src0, src1, src2); + src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); + YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, + vec0, vec1, vec2); + reg0 = (v8u16)__msa_srai_h(vec0, 3); + reg1 = (v8u16)__msa_srai_h(vec1, 3); + reg2 = (v8u16)__msa_srai_h(vec2, 3); + reg1 = (v8u16)__msa_slli_h((v8i16)reg1, 5); + reg2 = (v8u16)__msa_slli_h((v8i16)reg2, 10); + reg1 |= const_0x8000; + reg0 |= reg2; + dst0 = (v16u8)(reg1 | reg0); + ST_UB(dst0, dst_argb1555); + src_y += 8; + src_u += 4; + src_v += 4; + dst_argb1555 += 16; + } +} + +void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { + int x; + v16u8 src0, src1, src2, src3, dst0, dst1; + + for (x = 0; x < width; x += 32) { + LD_UB4(src_yuy2, 16, src0, src1, src2, src3); + dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); + dst1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); + ST_UB2(dst0, dst1, dst_y, 16); + src_yuy2 += 64; + dst_y += 32; + } +} + +void YUY2ToUVRow_MSA(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_yuy2_next = src_yuy2 + src_stride_yuy2; + int x; + v16u8 src0, src1, src2, src3, src4, src5, src6, src7; + v16u8 vec0, vec1, dst0, dst1; + + for (x = 0; x < width; x += 32) { + LD_UB4(src_yuy2, 16, src0, src1, src2, src3); + LD_UB4(src_yuy2_next, 16, src4, src5, src6, src7); + src0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); + src1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); + src2 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); + src3 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); + vec0 = __msa_aver_u_b(src0, src2); + vec1 = __msa_aver_u_b(src1, src3); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + dst1 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); + ST_UB(dst0, dst_u); + ST_UB(dst1, dst_v); + src_yuy2 += 64; + src_yuy2_next += 64; + dst_u += 16; + dst_v += 16; + } +} + +void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + v16u8 src0, src1, src2, src3, dst0, dst1; + + for (x = 0; x < width; x += 32) { + LD_UB4(src_yuy2, 16, src0, src1, src2, src3); + src0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); + src1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); + dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); + dst1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); + ST_UB(dst0, dst_u); + ST_UB(dst1, dst_v); + src_yuy2 += 64; + dst_u += 16; + dst_v += 16; + } +} + +void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { + int x; + v16u8 src0, src1, src2, src3, dst0, dst1; + + for (x = 0; x < width; x += 32) { + LD_UB4(src_uyvy, 16, src0, src1, src2, src3); + dst0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); + dst1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); + ST_UB2(dst0, dst1, dst_y, 16); + src_uyvy += 64; + dst_y += 32; + } +} + +void UYVYToUVRow_MSA(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_uyvy_next = src_uyvy + src_stride_uyvy; + int x; + v16u8 src0, src1, src2, src3, src4, src5, src6, src7; + v16u8 vec0, vec1, dst0, dst1; + + for (x = 0; x < width; x += 32) { + LD_UB4(src_uyvy, 16, src0, src1, src2, src3); + LD_UB4(src_uyvy_next, 16, src4, src5, src6, src7); + src0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); + src1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); + src2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); + src3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); + vec0 = __msa_aver_u_b(src0, src2); + vec1 = __msa_aver_u_b(src1, src3); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + dst1 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); + ST_UB(dst0, dst_u); + ST_UB(dst1, dst_v); + src_uyvy += 64; + src_uyvy_next += 64; + dst_u += 16; + dst_v += 16; + } +} + +void UYVYToUV422Row_MSA(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + v16u8 src0, src1, src2, src3, dst0, dst1; + + for (x = 0; x < width; x += 32) { + LD_UB4(src_uyvy, 16, src0, src1, src2, src3); + src0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); + src1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); + dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); + dst1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); + ST_UB(dst0, dst_u); + ST_UB(dst1, dst_v); + src_uyvy += 64; + dst_u += 16; + dst_v += 16; + } +} + +void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { + int x; + v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0; + v8u16 reg0, reg1, reg2, reg3, reg4, reg5; + v16i8 zero = {0}; + v8u16 const_0x19 = (v8u16)__msa_ldi_h(0x19); + v8u16 const_0x81 = (v8u16)__msa_ldi_h(0x81); + v8u16 const_0x42 = (v8u16)__msa_ldi_h(0x42); + v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0); + src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16); + src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32); + src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48); + vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); + vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); + vec2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); + vec3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); + reg0 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec0); + reg1 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec1); + reg2 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec2); + reg3 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec3); + reg4 = (v8u16)__msa_ilvod_b(zero, (v16i8)vec0); + reg5 = (v8u16)__msa_ilvod_b(zero, (v16i8)vec1); + reg0 *= const_0x19; + reg1 *= const_0x19; + reg2 *= const_0x81; + reg3 *= const_0x81; + reg4 *= const_0x42; + reg5 *= const_0x42; + reg0 += reg2; + reg1 += reg3; + reg0 += reg4; + reg1 += reg5; + reg0 += const_0x1080; + reg1 += const_0x1080; + reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8); + reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 8); + dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); + ST_UB(dst0, dst_y); + src_argb0 += 64; + dst_y += 16; + } +} + +void ARGBToUVRow_MSA(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + const uint8_t* src_argb0_next = src_argb0 + src_stride_argb; + v16u8 src0, src1, src2, src3, src4, src5, src6, src7; + v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; + v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9; + v16u8 dst0, dst1; + v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x70); + v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x4A); + v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x26); + v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x5E); + v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x12); + v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); + + for (x = 0; x < width; x += 32) { + src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0); + src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16); + src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32); + src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48); + src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 64); + src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 80); + src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 96); + src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 112); + vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); + vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); + vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); + vec3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); + vec4 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); + vec5 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); + vec6 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); + vec7 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); + vec8 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + vec9 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); + vec4 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); + vec5 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); + vec0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); + vec1 = (v16u8)__msa_pckod_b((v16i8)vec3, (v16i8)vec2); + reg0 = __msa_hadd_u_h(vec8, vec8); + reg1 = __msa_hadd_u_h(vec9, vec9); + reg2 = __msa_hadd_u_h(vec4, vec4); + reg3 = __msa_hadd_u_h(vec5, vec5); + reg4 = __msa_hadd_u_h(vec0, vec0); + reg5 = __msa_hadd_u_h(vec1, vec1); + src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 0); + src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 16); + src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 32); + src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 48); + src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 64); + src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 80); + src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 96); + src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 112); + vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); + vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); + vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); + vec3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); + vec4 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); + vec5 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); + vec6 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); + vec7 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); + vec8 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + vec9 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); + vec4 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); + vec5 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); + vec0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); + vec1 = (v16u8)__msa_pckod_b((v16i8)vec3, (v16i8)vec2); + reg0 += __msa_hadd_u_h(vec8, vec8); + reg1 += __msa_hadd_u_h(vec9, vec9); + reg2 += __msa_hadd_u_h(vec4, vec4); + reg3 += __msa_hadd_u_h(vec5, vec5); + reg4 += __msa_hadd_u_h(vec0, vec0); + reg5 += __msa_hadd_u_h(vec1, vec1); + reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 2); + reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 2); + reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 2); + reg3 = (v8u16)__msa_srai_h((v8i16)reg3, 2); + reg4 = (v8u16)__msa_srai_h((v8i16)reg4, 2); + reg5 = (v8u16)__msa_srai_h((v8i16)reg5, 2); + reg6 = reg0 * const_0x70; + reg7 = reg1 * const_0x70; + reg8 = reg2 * const_0x4A; + reg9 = reg3 * const_0x4A; + reg6 += const_0x8080; + reg7 += const_0x8080; + reg8 += reg4 * const_0x26; + reg9 += reg5 * const_0x26; + reg0 *= const_0x12; + reg1 *= const_0x12; + reg2 *= const_0x5E; + reg3 *= const_0x5E; + reg4 *= const_0x70; + reg5 *= const_0x70; + reg2 += reg0; + reg3 += reg1; + reg4 += const_0x8080; + reg5 += const_0x8080; + reg6 -= reg8; + reg7 -= reg9; + reg4 -= reg2; + reg5 -= reg3; + reg6 = (v8u16)__msa_srai_h((v8i16)reg6, 8); + reg7 = (v8u16)__msa_srai_h((v8i16)reg7, 8); + reg4 = (v8u16)__msa_srai_h((v8i16)reg4, 8); + reg5 = (v8u16)__msa_srai_h((v8i16)reg5, 8); + dst0 = (v16u8)__msa_pckev_b((v16i8)reg7, (v16i8)reg6); + dst1 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); + ST_UB(dst0, dst_u); + ST_UB(dst1, dst_v); + src_argb0 += 128; + src_argb0_next += 128; + dst_u += 16; + dst_v += 16; + } +} + +void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { + int x; + v16u8 src0, src1, src2, src3, dst0, dst1, dst2; + v16i8 shuffler0 = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, 20}; + v16i8 shuffler1 = {5, 6, 8, 9, 10, 12, 13, 14, + 16, 17, 18, 20, 21, 22, 24, 25}; + v16i8 shuffler2 = {10, 12, 13, 14, 16, 17, 18, 20, + 21, 22, 24, 25, 26, 28, 29, 30}; + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48); + dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); + dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); + dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); + ST_UB2(dst0, dst1, dst_rgb, 16); + ST_UB(dst2, (dst_rgb + 32)); + src_argb += 64; + dst_rgb += 48; + } +} + +void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { + int x; + v16u8 src0, src1, src2, src3, dst0, dst1, dst2; + v16i8 shuffler0 = {2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 18, 17, 16, 22}; + v16i8 shuffler1 = {5, 4, 10, 9, 8, 14, 13, 12, + 18, 17, 16, 22, 21, 20, 26, 25}; + v16i8 shuffler2 = {8, 14, 13, 12, 18, 17, 16, 22, + 21, 20, 26, 25, 24, 30, 29, 28}; + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48); + dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); + dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); + dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); + ST_UB2(dst0, dst1, dst_rgb, 16); + ST_UB(dst2, (dst_rgb + 32)); + src_argb += 64; + dst_rgb += 48; + } +} + +void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { + int x; + v16u8 src0, src1, dst0; + v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; + v16i8 zero = {0}; + + for (x = 0; x < width; x += 8) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); + vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3); + vec1 = (v16u8)__msa_slli_b((v16i8)src0, 3); + vec2 = (v16u8)__msa_srai_b((v16i8)src0, 5); + vec4 = (v16u8)__msa_srai_b((v16i8)src1, 3); + vec5 = (v16u8)__msa_slli_b((v16i8)src1, 3); + vec6 = (v16u8)__msa_srai_b((v16i8)src1, 5); + vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1); + vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1); + vec5 = (v16u8)__msa_sldi_b(zero, (v16i8)vec5, 1); + vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1); + vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 2); + vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 2); + vec0 = __msa_binsli_b(vec0, vec1, 2); + vec1 = __msa_binsli_b(vec2, vec3, 4); + vec4 = __msa_binsli_b(vec4, vec5, 2); + vec5 = __msa_binsli_b(vec6, vec7, 4); + vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); + vec4 = (v16u8)__msa_ilvev_b((v16i8)vec5, (v16i8)vec4); + dst0 = (v16u8)__msa_pckev_h((v8i16)vec4, (v8i16)vec0); + ST_UB(dst0, dst_rgb); + src_argb += 32; + dst_rgb += 16; + } +} + +void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width) { + int x; + v16u8 src0, src1, dst0; + v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; + v16i8 zero = {0}; + + for (x = 0; x < width; x += 8) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); + vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3); + vec1 = (v16u8)__msa_slli_b((v16i8)src0, 2); + vec2 = (v16u8)__msa_srai_b((v16i8)vec0, 3); + vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1); + vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1); + vec3 = (v16u8)__msa_srai_b((v16i8)src0, 1); + vec5 = (v16u8)__msa_srai_b((v16i8)src1, 3); + vec6 = (v16u8)__msa_slli_b((v16i8)src1, 2); + vec7 = (v16u8)__msa_srai_b((v16i8)vec5, 3); + vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1); + vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)vec7, 1); + vec8 = (v16u8)__msa_srai_b((v16i8)src1, 1); + vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)vec3, 2); + vec8 = (v16u8)__msa_sldi_b(zero, (v16i8)vec8, 2); + vec4 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 3); + vec9 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 3); + vec0 = __msa_binsli_b(vec0, vec1, 2); + vec5 = __msa_binsli_b(vec5, vec6, 2); + vec1 = __msa_binsli_b(vec2, vec3, 5); + vec6 = __msa_binsli_b(vec7, vec8, 5); + vec1 = __msa_binsli_b(vec1, vec4, 0); + vec6 = __msa_binsli_b(vec6, vec9, 0); + vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); + vec1 = (v16u8)__msa_ilvev_b((v16i8)vec6, (v16i8)vec5); + dst0 = (v16u8)__msa_pckev_h((v8i16)vec1, (v8i16)vec0); + ST_UB(dst0, dst_rgb); + src_argb += 32; + dst_rgb += 16; + } +} + +void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width) { + int x; + v16u8 src0, src1; + v16u8 vec0, vec1; + v16u8 dst0; + v16i8 zero = {0}; + + for (x = 0; x < width; x += 8) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); + vec0 = (v16u8)__msa_srai_b((v16i8)src0, 4); + vec1 = (v16u8)__msa_srai_b((v16i8)src1, 4); + src0 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 1); + src1 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 1); + vec0 = __msa_binsli_b(vec0, src0, 3); + vec1 = __msa_binsli_b(vec1, src1, 3); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + ST_UB(dst0, dst_rgb); + src_argb += 32; + dst_rgb += 16; + } +} + +void ARGBToUV444Row_MSA(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int32_t width) { + int32_t x; + v16u8 src0, src1, src2, src3, reg0, reg1, reg2, reg3, dst0, dst1; + v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; + v8u16 vec8, vec9, vec10, vec11; + v8u16 const_112 = (v8u16)__msa_ldi_h(112); + v8u16 const_74 = (v8u16)__msa_ldi_h(74); + v8u16 const_38 = (v8u16)__msa_ldi_h(38); + v8u16 const_94 = (v8u16)__msa_ldi_h(94); + v8u16 const_18 = (v8u16)__msa_ldi_h(18); + v8u16 const_32896 = (v8u16)__msa_fill_h(32896); + v16i8 zero = {0}; + + for (x = width; x > 0; x -= 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48); + reg0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); + reg1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); + reg2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); + reg3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); + src0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); + src1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2); + src2 = (v16u8)__msa_pckod_b((v16i8)reg1, (v16i8)reg0); + vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0); + vec1 = (v8u16)__msa_ilvl_b(zero, (v16i8)src0); + vec2 = (v8u16)__msa_ilvr_b(zero, (v16i8)src1); + vec3 = (v8u16)__msa_ilvl_b(zero, (v16i8)src1); + vec4 = (v8u16)__msa_ilvr_b(zero, (v16i8)src2); + vec5 = (v8u16)__msa_ilvl_b(zero, (v16i8)src2); + vec10 = vec0 * const_18; + vec11 = vec1 * const_18; + vec8 = vec2 * const_94; + vec9 = vec3 * const_94; + vec6 = vec4 * const_112; + vec7 = vec5 * const_112; + vec0 *= const_112; + vec1 *= const_112; + vec2 *= const_74; + vec3 *= const_74; + vec4 *= const_38; + vec5 *= const_38; + vec8 += vec10; + vec9 += vec11; + vec6 += const_32896; + vec7 += const_32896; + vec0 += const_32896; + vec1 += const_32896; + vec2 += vec4; + vec3 += vec5; + vec0 -= vec2; + vec1 -= vec3; + vec6 -= vec8; + vec7 -= vec9; + vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8); + vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8); + vec6 = (v8u16)__msa_srai_h((v8i16)vec6, 8); + vec7 = (v8u16)__msa_srai_h((v8i16)vec7, 8); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); + ST_UB(dst0, dst_u); + ST_UB(dst1, dst_v); + src_argb += 64; + dst_u += 16; + dst_v += 16; + } +} + +void ARGBMultiplyRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + int x; + v16u8 src0, src1, dst0; + v8u16 vec0, vec1, vec2, vec3; + v4u32 reg0, reg1, reg2, reg3; + v8i16 zero = {0}; + + for (x = 0; x < width; x += 4) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0); + vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); + vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); + vec2 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); + vec3 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src1); + reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); + reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); + reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); + reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); + reg0 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec2); + reg1 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec2); + reg2 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec3); + reg3 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec3); + reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 16); + reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 16); + reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 16); + reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 16); + vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); + vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + ST_UB(dst0, dst_argb); + src_argb0 += 16; + src_argb1 += 16; + dst_argb += 16; + } +} + +void ARGBAddRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + int x; + v16u8 src0, src1, src2, src3, dst0, dst1; + + for (x = 0; x < width; x += 8) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 16); + dst0 = __msa_adds_u_b(src0, src2); + dst1 = __msa_adds_u_b(src1, src3); + ST_UB2(dst0, dst1, dst_argb, 16); + src_argb0 += 32; + src_argb1 += 32; + dst_argb += 32; + } +} + +void ARGBSubtractRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + int x; + v16u8 src0, src1, src2, src3, dst0, dst1; + + for (x = 0; x < width; x += 8) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 16); + dst0 = __msa_subs_u_b(src0, src2); + dst1 = __msa_subs_u_b(src1, src3); + ST_UB2(dst0, dst1, dst_argb, 16); + src_argb0 += 32; + src_argb1 += 32; + dst_argb += 32; + } +} + +void ARGBAttenuateRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { + int x; + v16u8 src0, src1, dst0, dst1; + v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; + v4u32 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; + v8i16 zero = {0}; + v16u8 mask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255}; + + for (x = 0; x < width; x += 8) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); + vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); + vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); + vec2 = (v8u16)__msa_ilvr_b((v16i8)src1, (v16i8)src1); + vec3 = (v8u16)__msa_ilvl_b((v16i8)src1, (v16i8)src1); + vec4 = (v8u16)__msa_fill_h(vec0[3]); + vec5 = (v8u16)__msa_fill_h(vec0[7]); + vec6 = (v8u16)__msa_fill_h(vec1[3]); + vec7 = (v8u16)__msa_fill_h(vec1[7]); + vec4 = (v8u16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4); + vec5 = (v8u16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); + vec6 = (v8u16)__msa_fill_h(vec2[3]); + vec7 = (v8u16)__msa_fill_h(vec2[7]); + vec8 = (v8u16)__msa_fill_h(vec3[3]); + vec9 = (v8u16)__msa_fill_h(vec3[7]); + vec6 = (v8u16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); + vec7 = (v8u16)__msa_pckev_d((v2i64)vec9, (v2i64)vec8); + reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec4); + reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec4); + reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec5); + reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec5); + reg4 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec6); + reg5 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec6); + reg6 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec7); + reg7 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec7); + reg0 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); + reg1 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); + reg2 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); + reg3 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); + reg4 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec2); + reg5 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec2); + reg6 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec3); + reg7 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec3); + reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 24); + reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 24); + reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 24); + reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 24); + reg4 = (v4u32)__msa_srai_w((v4i32)reg4, 24); + reg5 = (v4u32)__msa_srai_w((v4i32)reg5, 24); + reg6 = (v4u32)__msa_srai_w((v4i32)reg6, 24); + reg7 = (v4u32)__msa_srai_w((v4i32)reg7, 24); + vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); + vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); + vec2 = (v8u16)__msa_pckev_h((v8i16)reg5, (v8i16)reg4); + vec3 = (v8u16)__msa_pckev_h((v8i16)reg7, (v8i16)reg6); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); + dst0 = __msa_bmnz_v(dst0, src0, mask); + dst1 = __msa_bmnz_v(dst1, src1, mask); + ST_UB2(dst0, dst1, dst_argb, 16); + src_argb += 32; + dst_argb += 32; + } +} + +void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + uint32_t dither4, + int width) { + int x; + v16u8 src0, src1, dst0, vec0, vec1; + v8i16 vec_d0; + v8i16 reg0, reg1, reg2; + v16i8 zero = {0}; + v8i16 max = __msa_ldi_h(0xFF); + + vec_d0 = (v8i16)__msa_fill_w(dither4); + vec_d0 = (v8i16)__msa_ilvr_b(zero, (v16i8)vec_d0); + + for (x = 0; x < width; x += 8) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); + vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); + vec1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); + reg0 = (v8i16)__msa_ilvev_b(zero, (v16i8)vec0); + reg1 = (v8i16)__msa_ilvev_b(zero, (v16i8)vec1); + reg2 = (v8i16)__msa_ilvod_b(zero, (v16i8)vec0); + reg0 += vec_d0; + reg1 += vec_d0; + reg2 += vec_d0; + reg0 = __msa_maxi_s_h((v8i16)reg0, 0); + reg1 = __msa_maxi_s_h((v8i16)reg1, 0); + reg2 = __msa_maxi_s_h((v8i16)reg2, 0); + reg0 = __msa_min_s_h((v8i16)max, (v8i16)reg0); + reg1 = __msa_min_s_h((v8i16)max, (v8i16)reg1); + reg2 = __msa_min_s_h((v8i16)max, (v8i16)reg2); + reg0 = __msa_srai_h(reg0, 3); + reg2 = __msa_srai_h(reg2, 3); + reg1 = __msa_srai_h(reg1, 2); + reg2 = __msa_slli_h(reg2, 11); + reg1 = __msa_slli_h(reg1, 5); + reg0 |= reg1; + dst0 = (v16u8)(reg0 | reg2); + ST_UB(dst0, dst_rgb); + src_argb += 32; + dst_rgb += 16; + } +} + +void ARGBShuffleRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width) { + int x; + v16u8 src0, src1, dst0, dst1; + v16i8 vec0; + v16i8 shuffler_vec = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12}; + int32_t val = LW((int32_t*)shuffler); + + vec0 = (v16i8)__msa_fill_w(val); + shuffler_vec += vec0; + + for (x = 0; x < width; x += 8) { + src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 0); + src1 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 16); + dst0 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src0, (v16i8)src0); + dst1 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src1, (v16i8)src1); + ST_UB2(dst0, dst1, dst_argb, 16); + src_argb += 32; + dst_argb += 32; + } +} + +void ARGBShadeRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value) { + int x; + v16u8 src0, dst0; + v8u16 vec0, vec1; + v4u32 reg0, reg1, reg2, reg3, rgba_scale; + v8i16 zero = {0}; + + rgba_scale[0] = value; + rgba_scale = (v4u32)__msa_ilvr_b((v16i8)rgba_scale, (v16i8)rgba_scale); + rgba_scale = (v4u32)__msa_ilvr_h(zero, (v8i16)rgba_scale); + + for (x = 0; x < width; x += 4) { + src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 0); + vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); + vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); + reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); + reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); + reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); + reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); + reg0 *= rgba_scale; + reg1 *= rgba_scale; + reg2 *= rgba_scale; + reg3 *= rgba_scale; + reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 24); + reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 24); + reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 24); + reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 24); + vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); + vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + ST_UB(dst0, dst_argb); + src_argb += 16; + dst_argb += 16; + } +} + +void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width) { + int x; + v16u8 src0, src1, vec0, vec1, dst0, dst1; + v8u16 reg0; + v16u8 const_0x26 = (v16u8)__msa_ldi_h(0x26); + v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F); + + for (x = 0; x < width; x += 8) { + src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 0); + src1 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 16); + vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); + vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); + reg0 = __msa_dotp_u_h(vec0, const_0x4B0F); + reg0 = __msa_dpadd_u_h(reg0, vec1, const_0x26); + reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 7); + vec0 = (v16u8)__msa_ilvev_b((v16i8)reg0, (v16i8)reg0); + vec1 = (v16u8)__msa_ilvod_b((v16i8)vec1, (v16i8)vec0); + dst0 = (v16u8)__msa_ilvr_b((v16i8)vec1, (v16i8)vec0); + dst1 = (v16u8)__msa_ilvl_b((v16i8)vec1, (v16i8)vec0); + ST_UB2(dst0, dst1, dst_argb, 16); + src_argb += 32; + dst_argb += 32; + } +} + +void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width) { + int x; + v16u8 src0, src1, dst0, dst1, vec0, vec1, vec2, vec3, vec4, vec5; + v8u16 reg0, reg1, reg2; + v16u8 const_0x4411 = (v16u8)__msa_fill_h(0x4411); + v16u8 const_0x23 = (v16u8)__msa_ldi_h(0x23); + v16u8 const_0x5816 = (v16u8)__msa_fill_h(0x5816); + v16u8 const_0x2D = (v16u8)__msa_ldi_h(0x2D); + v16u8 const_0x6218 = (v16u8)__msa_fill_h(0x6218); + v16u8 const_0x32 = (v16u8)__msa_ldi_h(0x32); + v8u16 const_0xFF = (v8u16)__msa_ldi_h(0xFF); + + for (x = 0; x < width; x += 8) { + src0 = (v16u8)__msa_ld_b((v16u8*)dst_argb, 0); + src1 = (v16u8)__msa_ld_b((v16u8*)dst_argb, 16); + vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); + vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); + vec3 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec1); + reg0 = (v8u16)__msa_dotp_u_h(vec0, const_0x4411); + reg1 = (v8u16)__msa_dotp_u_h(vec0, const_0x5816); + reg2 = (v8u16)__msa_dotp_u_h(vec0, const_0x6218); + reg0 = (v8u16)__msa_dpadd_u_h(reg0, vec1, const_0x23); + reg1 = (v8u16)__msa_dpadd_u_h(reg1, vec1, const_0x2D); + reg2 = (v8u16)__msa_dpadd_u_h(reg2, vec1, const_0x32); + reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 7); + reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 7); + reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 7); + reg1 = (v8u16)__msa_min_u_h((v8u16)reg1, const_0xFF); + reg2 = (v8u16)__msa_min_u_h((v8u16)reg2, const_0xFF); + vec0 = (v16u8)__msa_pckev_b((v16i8)reg0, (v16i8)reg0); + vec1 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg1); + vec2 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg2); + vec4 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); + vec5 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); + dst0 = (v16u8)__msa_ilvr_b((v16i8)vec5, (v16i8)vec4); + dst1 = (v16u8)__msa_ilvl_b((v16i8)vec5, (v16i8)vec4); + ST_UB2(dst0, dst1, dst_argb, 16); + dst_argb += 32; + } +} + +void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width) { + int x; + v16u8 src0, src1; + v8u16 vec0, vec1, vec2, vec3; + v16u8 dst0, dst1, dst2, dst3; + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb4444, 0); + src1 = (v16u8)__msa_ld_b((const v16u8*)src_argb4444, 16); + vec0 = (v8u16)__msa_andi_b(src0, 0x0F); + vec1 = (v8u16)__msa_andi_b(src1, 0x0F); + vec2 = (v8u16)__msa_andi_b(src0, 0xF0); + vec3 = (v8u16)__msa_andi_b(src1, 0xF0); + vec0 |= (v8u16)__msa_slli_b((v16i8)vec0, 4); + vec1 |= (v8u16)__msa_slli_b((v16i8)vec1, 4); + vec2 |= (v8u16)__msa_srli_b((v16i8)vec2, 4); + vec3 |= (v8u16)__msa_srli_b((v16i8)vec3, 4); + dst0 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); + dst1 = (v16u8)__msa_ilvl_b((v16i8)vec2, (v16i8)vec0); + dst2 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); + dst3 = (v16u8)__msa_ilvl_b((v16i8)vec3, (v16i8)vec1); + ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); + src_argb4444 += 32; + dst_argb += 64; + } +} + +void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width) { + int x; + v8u16 src0, src1; + v8u16 vec0, vec1, vec2, vec3, vec4, vec5; + v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6; + v16u8 dst0, dst1, dst2, dst3; + v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); + + for (x = 0; x < width; x += 16) { + src0 = (v8u16)__msa_ld_h((const v8u16*)src_argb1555, 0); + src1 = (v8u16)__msa_ld_h((const v8u16*)src_argb1555, 16); + vec0 = src0 & const_0x1F; + vec1 = src1 & const_0x1F; + src0 = (v8u16)__msa_srli_h((v8i16)src0, 5); + src1 = (v8u16)__msa_srli_h((v8i16)src1, 5); + vec2 = src0 & const_0x1F; + vec3 = src1 & const_0x1F; + src0 = (v8u16)__msa_srli_h((v8i16)src0, 5); + src1 = (v8u16)__msa_srli_h((v8i16)src1, 5); + vec4 = src0 & const_0x1F; + vec5 = src1 & const_0x1F; + src0 = (v8u16)__msa_srli_h((v8i16)src0, 5); + src1 = (v8u16)__msa_srli_h((v8i16)src1, 5); + reg0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + reg1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); + reg2 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); + reg3 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); + reg4 = (v16u8)__msa_slli_b((v16i8)reg0, 3); + reg5 = (v16u8)__msa_slli_b((v16i8)reg1, 3); + reg6 = (v16u8)__msa_slli_b((v16i8)reg2, 3); + reg4 |= (v16u8)__msa_srai_b((v16i8)reg0, 2); + reg5 |= (v16u8)__msa_srai_b((v16i8)reg1, 2); + reg6 |= (v16u8)__msa_srai_b((v16i8)reg2, 2); + reg3 = -reg3; + reg0 = (v16u8)__msa_ilvr_b((v16i8)reg6, (v16i8)reg4); + reg1 = (v16u8)__msa_ilvl_b((v16i8)reg6, (v16i8)reg4); + reg2 = (v16u8)__msa_ilvr_b((v16i8)reg3, (v16i8)reg5); + reg3 = (v16u8)__msa_ilvl_b((v16i8)reg3, (v16i8)reg5); + dst0 = (v16u8)__msa_ilvr_b((v16i8)reg2, (v16i8)reg0); + dst1 = (v16u8)__msa_ilvl_b((v16i8)reg2, (v16i8)reg0); + dst2 = (v16u8)__msa_ilvr_b((v16i8)reg3, (v16i8)reg1); + dst3 = (v16u8)__msa_ilvl_b((v16i8)reg3, (v16i8)reg1); + ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); + src_argb1555 += 32; + dst_argb += 64; + } +} + +void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width) { + int x; + v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5; + v8u16 reg0, reg1, reg2, reg3, reg4, reg5; + v16u8 res0, res1, res2, res3, dst0, dst1, dst2, dst3; + v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); + v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); + v8u16 const_0x7E0 = (v8u16)__msa_fill_h(0x7E0); + v8u16 const_0xF800 = (v8u16)__msa_fill_h(0xF800); + + for (x = 0; x < width; x += 16) { + src0 = (v8u16)__msa_ld_h((const v8u16*)src_rgb565, 0); + src1 = (v8u16)__msa_ld_h((const v8u16*)src_rgb565, 16); + vec0 = src0 & const_0x1F; + vec1 = src0 & const_0x7E0; + vec2 = src0 & const_0xF800; + vec3 = src1 & const_0x1F; + vec4 = src1 & const_0x7E0; + vec5 = src1 & const_0xF800; + reg0 = (v8u16)__msa_slli_h((v8i16)vec0, 3); + reg1 = (v8u16)__msa_srli_h((v8i16)vec1, 3); + reg2 = (v8u16)__msa_srli_h((v8i16)vec2, 8); + reg3 = (v8u16)__msa_slli_h((v8i16)vec3, 3); + reg4 = (v8u16)__msa_srli_h((v8i16)vec4, 3); + reg5 = (v8u16)__msa_srli_h((v8i16)vec5, 8); + reg0 |= (v8u16)__msa_srli_h((v8i16)vec0, 2); + reg1 |= (v8u16)__msa_srli_h((v8i16)vec1, 9); + reg2 |= (v8u16)__msa_srli_h((v8i16)vec2, 13); + reg3 |= (v8u16)__msa_srli_h((v8i16)vec3, 2); + reg4 |= (v8u16)__msa_srli_h((v8i16)vec4, 9); + reg5 |= (v8u16)__msa_srli_h((v8i16)vec5, 13); + res0 = (v16u8)__msa_ilvev_b((v16i8)reg2, (v16i8)reg0); + res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)reg1); + res2 = (v16u8)__msa_ilvev_b((v16i8)reg5, (v16i8)reg3); + res3 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)reg4); + dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); + dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); + dst2 = (v16u8)__msa_ilvr_b((v16i8)res3, (v16i8)res2); + dst3 = (v16u8)__msa_ilvl_b((v16i8)res3, (v16i8)res2); + ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); + src_rgb565 += 32; + dst_argb += 64; + } +} + +void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width) { + int x; + v16u8 src0, src1, src2; + v16u8 vec0, vec1, vec2; + v16u8 dst0, dst1, dst2, dst3; + v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); + v16i8 shuffler = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19}; + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_rgb24, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_rgb24, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_rgb24, 32); + vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12); + vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); + vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4); + dst0 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)src0); + dst1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec0); + dst2 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec1); + dst3 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec2); + ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); + src_rgb24 += 48; + dst_argb += 64; + } +} + +void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width) { + int x; + v16u8 src0, src1, src2; + v16u8 vec0, vec1, vec2; + v16u8 dst0, dst1, dst2, dst3; + v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); + v16i8 mask = {2, 1, 0, 16, 5, 4, 3, 17, 8, 7, 6, 18, 11, 10, 9, 19}; + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 32); + vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12); + vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); + vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4); + dst0 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)src0); + dst1 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec0); + dst2 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec1); + dst3 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec2); + ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); + src_raw += 48; + dst_argb += 64; + } +} + +void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, + uint8_t* dst_y, + int width) { + int x; + v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5; + v8u16 reg0, reg1, reg2, reg3, reg4, reg5; + v16u8 dst0; + v8u16 const_0x19 = (v8u16)__msa_ldi_h(0x19); + v8u16 const_0x81 = (v8u16)__msa_ldi_h(0x81); + v8u16 const_0x42 = (v8u16)__msa_ldi_h(0x42); + v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); + v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); + + for (x = 0; x < width; x += 16) { + src0 = (v8u16)__msa_ld_b((const v8i16*)src_argb1555, 0); + src1 = (v8u16)__msa_ld_b((const v8i16*)src_argb1555, 16); + vec0 = src0 & const_0x1F; + vec1 = src1 & const_0x1F; + src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); + src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); + vec2 = src0 & const_0x1F; + vec3 = src1 & const_0x1F; + src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); + src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); + vec4 = src0 & const_0x1F; + vec5 = src1 & const_0x1F; + reg0 = (v8u16)__msa_slli_h((v8i16)vec0, 3); + reg1 = (v8u16)__msa_slli_h((v8i16)vec1, 3); + reg0 |= (v8u16)__msa_srai_h((v8i16)vec0, 2); + reg1 |= (v8u16)__msa_srai_h((v8i16)vec1, 2); + reg2 = (v8u16)__msa_slli_h((v8i16)vec2, 3); + reg3 = (v8u16)__msa_slli_h((v8i16)vec3, 3); + reg2 |= (v8u16)__msa_srai_h((v8i16)vec2, 2); + reg3 |= (v8u16)__msa_srai_h((v8i16)vec3, 2); + reg4 = (v8u16)__msa_slli_h((v8i16)vec4, 3); + reg5 = (v8u16)__msa_slli_h((v8i16)vec5, 3); + reg4 |= (v8u16)__msa_srai_h((v8i16)vec4, 2); + reg5 |= (v8u16)__msa_srai_h((v8i16)vec5, 2); + reg0 *= const_0x19; + reg1 *= const_0x19; + reg2 *= const_0x81; + reg3 *= const_0x81; + reg4 *= const_0x42; + reg5 *= const_0x42; + reg0 += reg2; + reg1 += reg3; + reg0 += reg4; + reg1 += reg5; + reg0 += const_0x1080; + reg1 += const_0x1080; + reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8); + reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 8); + dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); + ST_UB(dst0, dst_y); + src_argb1555 += 32; + dst_y += 16; + } +} + +void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { + int x; + v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; + v8u16 reg0, reg1, reg2, reg3, reg4, reg5; + v4u32 res0, res1, res2, res3; + v16u8 dst0; + v4u32 const_0x810019 = (v4u32)__msa_fill_w(0x810019); + v4u32 const_0x010042 = (v4u32)__msa_fill_w(0x010042); + v8i16 const_0x1080 = __msa_fill_h(0x1080); + v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); + v8u16 const_0x7E0 = (v8u16)__msa_fill_h(0x7E0); + v8u16 const_0xF800 = (v8u16)__msa_fill_h(0xF800); + + for (x = 0; x < width; x += 16) { + src0 = (v8u16)__msa_ld_b((const v8i16*)src_rgb565, 0); + src1 = (v8u16)__msa_ld_b((const v8i16*)src_rgb565, 16); + vec0 = src0 & const_0x1F; + vec1 = src0 & const_0x7E0; + vec2 = src0 & const_0xF800; + vec3 = src1 & const_0x1F; + vec4 = src1 & const_0x7E0; + vec5 = src1 & const_0xF800; + reg0 = (v8u16)__msa_slli_h((v8i16)vec0, 3); + reg1 = (v8u16)__msa_srli_h((v8i16)vec1, 3); + reg2 = (v8u16)__msa_srli_h((v8i16)vec2, 8); + reg3 = (v8u16)__msa_slli_h((v8i16)vec3, 3); + reg4 = (v8u16)__msa_srli_h((v8i16)vec4, 3); + reg5 = (v8u16)__msa_srli_h((v8i16)vec5, 8); + reg0 |= (v8u16)__msa_srli_h((v8i16)vec0, 2); + reg1 |= (v8u16)__msa_srli_h((v8i16)vec1, 9); + reg2 |= (v8u16)__msa_srli_h((v8i16)vec2, 13); + reg3 |= (v8u16)__msa_srli_h((v8i16)vec3, 2); + reg4 |= (v8u16)__msa_srli_h((v8i16)vec4, 9); + reg5 |= (v8u16)__msa_srli_h((v8i16)vec5, 13); + vec0 = (v8u16)__msa_ilvr_h((v8i16)reg1, (v8i16)reg0); + vec1 = (v8u16)__msa_ilvl_h((v8i16)reg1, (v8i16)reg0); + vec2 = (v8u16)__msa_ilvr_h((v8i16)reg4, (v8i16)reg3); + vec3 = (v8u16)__msa_ilvl_h((v8i16)reg4, (v8i16)reg3); + vec4 = (v8u16)__msa_ilvr_h(const_0x1080, (v8i16)reg2); + vec5 = (v8u16)__msa_ilvl_h(const_0x1080, (v8i16)reg2); + vec6 = (v8u16)__msa_ilvr_h(const_0x1080, (v8i16)reg5); + vec7 = (v8u16)__msa_ilvl_h(const_0x1080, (v8i16)reg5); + res0 = __msa_dotp_u_w(vec0, (v8u16)const_0x810019); + res1 = __msa_dotp_u_w(vec1, (v8u16)const_0x810019); + res2 = __msa_dotp_u_w(vec2, (v8u16)const_0x810019); + res3 = __msa_dotp_u_w(vec3, (v8u16)const_0x810019); + res0 = __msa_dpadd_u_w(res0, vec4, (v8u16)const_0x010042); + res1 = __msa_dpadd_u_w(res1, vec5, (v8u16)const_0x010042); + res2 = __msa_dpadd_u_w(res2, vec6, (v8u16)const_0x010042); + res3 = __msa_dpadd_u_w(res3, vec7, (v8u16)const_0x010042); + res0 = (v4u32)__msa_srai_w((v4i32)res0, 8); + res1 = (v4u32)__msa_srai_w((v4i32)res1, 8); + res2 = (v4u32)__msa_srai_w((v4i32)res2, 8); + res3 = (v4u32)__msa_srai_w((v4i32)res3, 8); + vec0 = (v8u16)__msa_pckev_h((v8i16)res1, (v8i16)res0); + vec1 = (v8u16)__msa_pckev_h((v8i16)res3, (v8i16)res2); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + ST_UB(dst0, dst_y); + src_rgb565 += 32; + dst_y += 16; + } +} + +void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { + int x; + v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0; + v8u16 vec0, vec1, vec2, vec3; + v8u16 const_0x8119 = (v8u16)__msa_fill_h(0x8119); + v8u16 const_0x42 = (v8u16)__msa_fill_h(0x42); + v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); + v16i8 mask0 = {0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 11, 12}; + v16i8 mask1 = {12, 13, 14, 15, 15, 16, 17, 18, + 18, 19, 20, 21, 21, 22, 23, 24}; + v16i8 mask2 = {8, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 20}; + v16i8 mask3 = {4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15, 16}; + v16i8 zero = {0}; + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32); + reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0); + reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); + reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1); + reg3 = (v16u8)__msa_vshf_b(mask3, zero, (v16i8)src2); + vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); + vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); + vec2 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0); + vec3 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2); + vec0 = __msa_dotp_u_h((v16u8)vec0, (v16u8)const_0x8119); + vec1 = __msa_dotp_u_h((v16u8)vec1, (v16u8)const_0x8119); + vec0 = __msa_dpadd_u_h(vec0, (v16u8)vec2, (v16u8)const_0x42); + vec1 = __msa_dpadd_u_h(vec1, (v16u8)vec3, (v16u8)const_0x42); + vec0 += const_0x1080; + vec1 += const_0x1080; + vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8); + vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + ST_UB(dst0, dst_y); + src_argb0 += 48; + dst_y += 16; + } +} + +void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { + int x; + v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0; + v8u16 vec0, vec1, vec2, vec3; + v8u16 const_0x8142 = (v8u16)__msa_fill_h(0x8142); + v8u16 const_0x19 = (v8u16)__msa_fill_h(0x19); + v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); + v16i8 mask0 = {0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 11, 12}; + v16i8 mask1 = {12, 13, 14, 15, 15, 16, 17, 18, + 18, 19, 20, 21, 21, 22, 23, 24}; + v16i8 mask2 = {8, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 20}; + v16i8 mask3 = {4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15, 16}; + v16i8 zero = {0}; + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32); + reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0); + reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); + reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1); + reg3 = (v16u8)__msa_vshf_b(mask3, zero, (v16i8)src2); + vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); + vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); + vec2 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0); + vec3 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2); + vec0 = __msa_dotp_u_h((v16u8)vec0, (v16u8)const_0x8142); + vec1 = __msa_dotp_u_h((v16u8)vec1, (v16u8)const_0x8142); + vec0 = __msa_dpadd_u_h(vec0, (v16u8)vec2, (v16u8)const_0x19); + vec1 = __msa_dpadd_u_h(vec1, (v16u8)vec3, (v16u8)const_0x19); + vec0 += const_0x1080; + vec1 += const_0x1080; + vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8); + vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + ST_UB(dst0, dst_y); + src_argb0 += 48; + dst_y += 16; + } +} + +void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + const uint16_t* s = (const uint16_t*)src_argb1555; + const uint16_t* t = (const uint16_t*)(src_argb1555 + src_stride_argb1555); + int64_t res0, res1; + v8u16 src0, src1, src2, src3, reg0, reg1, reg2, reg3; + v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6; + v16u8 dst0; + v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x70); + v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x4A); + v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x26); + v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x5E); + v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x12); + v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); + v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); + + for (x = 0; x < width; x += 16) { + src0 = (v8u16)__msa_ld_b((v8i16*)s, 0); + src1 = (v8u16)__msa_ld_b((v8i16*)s, 16); + src2 = (v8u16)__msa_ld_b((v8i16*)t, 0); + src3 = (v8u16)__msa_ld_b((v8i16*)t, 16); + vec0 = src0 & const_0x1F; + vec1 = src1 & const_0x1F; + vec0 += src2 & const_0x1F; + vec1 += src3 & const_0x1F; + vec0 = (v8u16)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); + src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); + src2 = (v8u16)__msa_srai_h((v8i16)src2, 5); + src3 = (v8u16)__msa_srai_h((v8i16)src3, 5); + vec2 = src0 & const_0x1F; + vec3 = src1 & const_0x1F; + vec2 += src2 & const_0x1F; + vec3 += src3 & const_0x1F; + vec2 = (v8u16)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); + src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); + src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); + src2 = (v8u16)__msa_srai_h((v8i16)src2, 5); + src3 = (v8u16)__msa_srai_h((v8i16)src3, 5); + vec4 = src0 & const_0x1F; + vec5 = src1 & const_0x1F; + vec4 += src2 & const_0x1F; + vec5 += src3 & const_0x1F; + vec4 = (v8u16)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); + vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); + vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); + vec4 = __msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); + vec6 = (v8u16)__msa_slli_h((v8i16)vec0, 1); + vec6 |= (v8u16)__msa_srai_h((v8i16)vec0, 6); + vec0 = (v8u16)__msa_slli_h((v8i16)vec2, 1); + vec0 |= (v8u16)__msa_srai_h((v8i16)vec2, 6); + vec2 = (v8u16)__msa_slli_h((v8i16)vec4, 1); + vec2 |= (v8u16)__msa_srai_h((v8i16)vec4, 6); + reg0 = vec6 * const_0x70; + reg1 = vec0 * const_0x4A; + reg2 = vec2 * const_0x70; + reg3 = vec0 * const_0x5E; + reg0 += const_0x8080; + reg1 += vec2 * const_0x26; + reg2 += const_0x8080; + reg3 += vec6 * const_0x12; + reg0 -= reg1; + reg2 -= reg3; + reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8); + reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 8); + dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); + res0 = __msa_copy_u_d((v2i64)dst0, 0); + res1 = __msa_copy_u_d((v2i64)dst0, 1); + SD(res0, dst_u); + SD(res1, dst_v); + s += 16; + t += 16; + dst_u += 8; + dst_v += 8; + } +} + +void RGB565ToUVRow_MSA(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + const uint16_t* s = (const uint16_t*)src_rgb565; + const uint16_t* t = (const uint16_t*)(src_rgb565 + src_stride_rgb565); + int64_t res0, res1; + v8u16 src0, src1, src2, src3, reg0, reg1, reg2, reg3; + v8u16 vec0, vec1, vec2, vec3, vec4, vec5; + v16u8 dst0; + v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x70); + v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x4A); + v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x26); + v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x5E); + v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x12); + v8u16 const_32896 = (v8u16)__msa_fill_h(0x8080); + v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); + v8u16 const_0x3F = (v8u16)__msa_fill_h(0x3F); + + for (x = 0; x < width; x += 16) { + src0 = (v8u16)__msa_ld_b((v8i16*)s, 0); + src1 = (v8u16)__msa_ld_b((v8i16*)s, 16); + src2 = (v8u16)__msa_ld_b((v8i16*)t, 0); + src3 = (v8u16)__msa_ld_b((v8i16*)t, 16); + vec0 = src0 & const_0x1F; + vec1 = src1 & const_0x1F; + vec0 += src2 & const_0x1F; + vec1 += src3 & const_0x1F; + vec0 = (v8u16)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); + src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); + src2 = (v8u16)__msa_srai_h((v8i16)src2, 5); + src3 = (v8u16)__msa_srai_h((v8i16)src3, 5); + vec2 = src0 & const_0x3F; + vec3 = src1 & const_0x3F; + vec2 += src2 & const_0x3F; + vec3 += src3 & const_0x3F; + vec1 = (v8u16)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); + src0 = (v8u16)__msa_srai_h((v8i16)src0, 6); + src1 = (v8u16)__msa_srai_h((v8i16)src1, 6); + src2 = (v8u16)__msa_srai_h((v8i16)src2, 6); + src3 = (v8u16)__msa_srai_h((v8i16)src3, 6); + vec4 = src0 & const_0x1F; + vec5 = src1 & const_0x1F; + vec4 += src2 & const_0x1F; + vec5 += src3 & const_0x1F; + vec2 = (v8u16)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); + vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); + vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); + vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); + vec3 = (v8u16)__msa_slli_h((v8i16)vec0, 1); + vec3 |= (v8u16)__msa_srai_h((v8i16)vec0, 6); + vec4 = (v8u16)__msa_slli_h((v8i16)vec2, 1); + vec4 |= (v8u16)__msa_srai_h((v8i16)vec2, 6); + reg0 = vec3 * const_0x70; + reg1 = vec1 * const_0x4A; + reg2 = vec4 * const_0x70; + reg3 = vec1 * const_0x5E; + reg0 += const_32896; + reg1 += vec4 * const_0x26; + reg2 += const_32896; + reg3 += vec3 * const_0x12; + reg0 -= reg1; + reg2 -= reg3; + reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8); + reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 8); + dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); + res0 = __msa_copy_u_d((v2i64)dst0, 0); + res1 = __msa_copy_u_d((v2i64)dst0, 1); + SD(res0, dst_u); + SD(res1, dst_v); + s += 16; + t += 16; + dst_u += 8; + dst_v += 8; + } +} + +void RGB24ToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + const uint8_t* s = src_rgb0; + const uint8_t* t = src_rgb0 + src_stride_rgb; + int64_t res0, res1; + v16u8 src0, src1, src2, src3, src4, src5, src6, src7; + v16u8 inp0, inp1, inp2, inp3, inp4, inp5; + v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; + v8i16 reg0, reg1, reg2, reg3; + v16u8 dst0; + v8u16 const_0x70 = (v8u16)__msa_fill_h(0x70); + v8u16 const_0x4A = (v8u16)__msa_fill_h(0x4A); + v8u16 const_0x26 = (v8u16)__msa_fill_h(0x26); + v8u16 const_0x5E = (v8u16)__msa_fill_h(0x5E); + v8u16 const_0x12 = (v8u16)__msa_fill_h(0x12); + v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); + v16i8 mask = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19}; + v16i8 zero = {0}; + + for (x = 0; x < width; x += 16) { + inp0 = (v16u8)__msa_ld_b((const v16i8*)s, 0); + inp1 = (v16u8)__msa_ld_b((const v16i8*)s, 16); + inp2 = (v16u8)__msa_ld_b((const v16i8*)s, 32); + inp3 = (v16u8)__msa_ld_b((const v16i8*)t, 0); + inp4 = (v16u8)__msa_ld_b((const v16i8*)t, 16); + inp5 = (v16u8)__msa_ld_b((const v16i8*)t, 32); + src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12); + src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12); + src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8); + src6 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp4, 8); + src3 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp2, 4); + src7 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp5, 4); + src0 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp0); + src1 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src1); + src2 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src2); + src3 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src3); + src4 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp3); + src5 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src5); + src6 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src6); + src7 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src7); + vec0 = (v8u16)__msa_ilvr_b((v16i8)src4, (v16i8)src0); + vec1 = (v8u16)__msa_ilvl_b((v16i8)src4, (v16i8)src0); + vec2 = (v8u16)__msa_ilvr_b((v16i8)src5, (v16i8)src1); + vec3 = (v8u16)__msa_ilvl_b((v16i8)src5, (v16i8)src1); + vec4 = (v8u16)__msa_ilvr_b((v16i8)src6, (v16i8)src2); + vec5 = (v8u16)__msa_ilvl_b((v16i8)src6, (v16i8)src2); + vec6 = (v8u16)__msa_ilvr_b((v16i8)src7, (v16i8)src3); + vec7 = (v8u16)__msa_ilvl_b((v16i8)src7, (v16i8)src3); + vec0 = (v8u16)__msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); + vec1 = (v8u16)__msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); + vec2 = (v8u16)__msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); + vec3 = (v8u16)__msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); + vec4 = (v8u16)__msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); + vec5 = (v8u16)__msa_hadd_u_h((v16u8)vec5, (v16u8)vec5); + vec6 = (v8u16)__msa_hadd_u_h((v16u8)vec6, (v16u8)vec6); + vec7 = (v8u16)__msa_hadd_u_h((v16u8)vec7, (v16u8)vec7); + reg0 = (v8i16)__msa_pckev_d((v2i64)vec1, (v2i64)vec0); + reg1 = (v8i16)__msa_pckev_d((v2i64)vec3, (v2i64)vec2); + reg2 = (v8i16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4); + reg3 = (v8i16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); + reg0 += (v8i16)__msa_pckod_d((v2i64)vec1, (v2i64)vec0); + reg1 += (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec2); + reg2 += (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec4); + reg3 += (v8i16)__msa_pckod_d((v2i64)vec7, (v2i64)vec6); + reg0 = __msa_srai_h((v8i16)reg0, 2); + reg1 = __msa_srai_h((v8i16)reg1, 2); + reg2 = __msa_srai_h((v8i16)reg2, 2); + reg3 = __msa_srai_h((v8i16)reg3, 2); + vec4 = (v8u16)__msa_pckev_h(reg1, reg0); + vec5 = (v8u16)__msa_pckev_h(reg3, reg2); + vec6 = (v8u16)__msa_pckod_h(reg1, reg0); + vec7 = (v8u16)__msa_pckod_h(reg3, reg2); + vec0 = (v8u16)__msa_pckev_h((v8i16)vec5, (v8i16)vec4); + vec1 = (v8u16)__msa_pckev_h((v8i16)vec7, (v8i16)vec6); + vec2 = (v8u16)__msa_pckod_h((v8i16)vec5, (v8i16)vec4); + vec3 = vec0 * const_0x70; + vec4 = vec1 * const_0x4A; + vec5 = vec2 * const_0x26; + vec2 *= const_0x70; + vec1 *= const_0x5E; + vec0 *= const_0x12; + reg0 = __msa_subv_h((v8i16)vec3, (v8i16)vec4); + reg1 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec5); + reg2 = __msa_subv_h((v8i16)vec2, (v8i16)vec1); + reg3 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec0); + reg0 += reg1; + reg2 += reg3; + reg0 = __msa_srai_h(reg0, 8); + reg2 = __msa_srai_h(reg2, 8); + dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); + res0 = __msa_copy_u_d((v2i64)dst0, 0); + res1 = __msa_copy_u_d((v2i64)dst0, 1); + SD(res0, dst_u); + SD(res1, dst_v); + t += 48; + s += 48; + dst_u += 8; + dst_v += 8; + } +} + +void RAWToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + const uint8_t* s = src_rgb0; + const uint8_t* t = src_rgb0 + src_stride_rgb; + int64_t res0, res1; + v16u8 inp0, inp1, inp2, inp3, inp4, inp5; + v16u8 src0, src1, src2, src3, src4, src5, src6, src7; + v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; + v8i16 reg0, reg1, reg2, reg3; + v16u8 dst0; + v8u16 const_0x70 = (v8u16)__msa_fill_h(0x70); + v8u16 const_0x4A = (v8u16)__msa_fill_h(0x4A); + v8u16 const_0x26 = (v8u16)__msa_fill_h(0x26); + v8u16 const_0x5E = (v8u16)__msa_fill_h(0x5E); + v8u16 const_0x12 = (v8u16)__msa_fill_h(0x12); + v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); + v16i8 mask = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19}; + v16i8 zero = {0}; + + for (x = 0; x < width; x += 16) { + inp0 = (v16u8)__msa_ld_b((const v16i8*)s, 0); + inp1 = (v16u8)__msa_ld_b((const v16i8*)s, 16); + inp2 = (v16u8)__msa_ld_b((const v16i8*)s, 32); + inp3 = (v16u8)__msa_ld_b((const v16i8*)t, 0); + inp4 = (v16u8)__msa_ld_b((const v16i8*)t, 16); + inp5 = (v16u8)__msa_ld_b((const v16i8*)t, 32); + src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12); + src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12); + src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8); + src6 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp4, 8); + src3 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp2, 4); + src7 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp5, 4); + src0 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp0); + src1 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src1); + src2 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src2); + src3 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src3); + src4 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp3); + src5 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src5); + src6 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src6); + src7 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src7); + vec0 = (v8u16)__msa_ilvr_b((v16i8)src4, (v16i8)src0); + vec1 = (v8u16)__msa_ilvl_b((v16i8)src4, (v16i8)src0); + vec2 = (v8u16)__msa_ilvr_b((v16i8)src5, (v16i8)src1); + vec3 = (v8u16)__msa_ilvl_b((v16i8)src5, (v16i8)src1); + vec4 = (v8u16)__msa_ilvr_b((v16i8)src6, (v16i8)src2); + vec5 = (v8u16)__msa_ilvl_b((v16i8)src6, (v16i8)src2); + vec6 = (v8u16)__msa_ilvr_b((v16i8)src7, (v16i8)src3); + vec7 = (v8u16)__msa_ilvl_b((v16i8)src7, (v16i8)src3); + vec0 = (v8u16)__msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); + vec1 = (v8u16)__msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); + vec2 = (v8u16)__msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); + vec3 = (v8u16)__msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); + vec4 = (v8u16)__msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); + vec5 = (v8u16)__msa_hadd_u_h((v16u8)vec5, (v16u8)vec5); + vec6 = (v8u16)__msa_hadd_u_h((v16u8)vec6, (v16u8)vec6); + vec7 = (v8u16)__msa_hadd_u_h((v16u8)vec7, (v16u8)vec7); + reg0 = (v8i16)__msa_pckev_d((v2i64)vec1, (v2i64)vec0); + reg1 = (v8i16)__msa_pckev_d((v2i64)vec3, (v2i64)vec2); + reg2 = (v8i16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4); + reg3 = (v8i16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); + reg0 += (v8i16)__msa_pckod_d((v2i64)vec1, (v2i64)vec0); + reg1 += (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec2); + reg2 += (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec4); + reg3 += (v8i16)__msa_pckod_d((v2i64)vec7, (v2i64)vec6); + reg0 = __msa_srai_h(reg0, 2); + reg1 = __msa_srai_h(reg1, 2); + reg2 = __msa_srai_h(reg2, 2); + reg3 = __msa_srai_h(reg3, 2); + vec4 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); + vec5 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); + vec6 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0); + vec7 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2); + vec0 = (v8u16)__msa_pckod_h((v8i16)vec5, (v8i16)vec4); + vec1 = (v8u16)__msa_pckev_h((v8i16)vec7, (v8i16)vec6); + vec2 = (v8u16)__msa_pckev_h((v8i16)vec5, (v8i16)vec4); + vec3 = vec0 * const_0x70; + vec4 = vec1 * const_0x4A; + vec5 = vec2 * const_0x26; + vec2 *= const_0x70; + vec1 *= const_0x5E; + vec0 *= const_0x12; + reg0 = __msa_subv_h((v8i16)vec3, (v8i16)vec4); + reg1 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec5); + reg2 = __msa_subv_h((v8i16)vec2, (v8i16)vec1); + reg3 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec0); + reg0 += reg1; + reg2 += reg3; + reg0 = __msa_srai_h(reg0, 8); + reg2 = __msa_srai_h(reg2, 8); + dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); + res0 = __msa_copy_u_d((v2i64)dst0, 0); + res1 = __msa_copy_u_d((v2i64)dst0, 1); + SD(res0, dst_u); + SD(res1, dst_v); + t += 48; + s += 48; + dst_u += 8; + dst_v += 8; + } +} + +void NV12ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + int x; + uint64_t val0, val1; + v16u8 src0, src1, res0, res1, dst0, dst1; + v8i16 vec0, vec1, vec2; + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; + v4i32 vec_ubvr, vec_ugvg; + v16u8 zero = {0}; + v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, + vec_br, vec_yg); + vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); + vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); + + for (x = 0; x < width; x += 8) { + val0 = LD(src_y); + val1 = LD(src_uv); + src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); + src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); + YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, + vec0, vec1, vec2); + res0 = (v16u8)__msa_ilvev_b((v16i8)vec2, (v16i8)vec0); + res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1); + dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); + dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); + ST_UB2(dst0, dst1, dst_argb, 16); + src_y += 8; + src_uv += 8; + dst_argb += 32; + } +} + +void NV12ToRGB565Row_MSA(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width) { + int x; + uint64_t val0, val1; + v16u8 src0, src1, dst0; + v8i16 vec0, vec1, vec2; + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; + v4i32 vec_ubvr, vec_ugvg; + v16u8 zero = {0}; + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, + vec_br, vec_yg); + vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); + vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); + + for (x = 0; x < width; x += 8) { + val0 = LD(src_y); + val1 = LD(src_uv); + src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); + src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); + YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, + vec0, vec1, vec2); + vec0 = vec0 >> 3; + vec1 = (vec1 >> 2) << 5; + vec2 = (vec2 >> 3) << 11; + dst0 = (v16u8)(vec0 | vec1 | vec2); + ST_UB(dst0, dst_rgb565); + src_y += 8; + src_uv += 8; + dst_rgb565 += 16; + } +} + +void NV21ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + int x; + uint64_t val0, val1; + v16u8 src0, src1, res0, res1, dst0, dst1; + v8i16 vec0, vec1, vec2; + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; + v4i32 vec_ubvr, vec_ugvg; + v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); + v16u8 zero = {0}; + v16i8 shuffler = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, + vec_br, vec_yg); + vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); + vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); + + for (x = 0; x < width; x += 8) { + val0 = LD(src_y); + val1 = LD(src_vu); + src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); + src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); + src1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src1, (v16i8)src1); + YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, + vec0, vec1, vec2); + res0 = (v16u8)__msa_ilvev_b((v16i8)vec2, (v16i8)vec0); + res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1); + dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); + dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); + ST_UB2(dst0, dst1, dst_argb, 16); + src_y += 8; + src_vu += 8; + dst_argb += 32; + } +} + +void SobelRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width) { + int x; + v16u8 src0, src1, vec0, dst0, dst1, dst2, dst3; + v16i8 mask0 = {0, 0, 0, 16, 1, 1, 1, 16, 2, 2, 2, 16, 3, 3, 3, 16}; + v16i8 const_0x4 = __msa_ldi_b(0x4); + v16i8 mask1 = mask0 + const_0x4; + v16i8 mask2 = mask1 + const_0x4; + v16i8 mask3 = mask2 + const_0x4; + v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 0); + vec0 = __msa_adds_u_b(src0, src1); + dst0 = (v16u8)__msa_vshf_b(mask0, (v16i8)alpha, (v16i8)vec0); + dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)alpha, (v16i8)vec0); + dst2 = (v16u8)__msa_vshf_b(mask2, (v16i8)alpha, (v16i8)vec0); + dst3 = (v16u8)__msa_vshf_b(mask3, (v16i8)alpha, (v16i8)vec0); + ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); + src_sobelx += 16; + src_sobely += 16; + dst_argb += 64; + } +} + +void SobelToPlaneRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, + int width) { + int x; + v16u8 src0, src1, src2, src3, dst0, dst1; + + for (x = 0; x < width; x += 32) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 0); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 16); + dst0 = __msa_adds_u_b(src0, src2); + dst1 = __msa_adds_u_b(src1, src3); + ST_UB2(dst0, dst1, dst_y, 16); + src_sobelx += 32; + src_sobely += 32; + dst_y += 32; + } +} + +void SobelXYRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width) { + int x; + v16u8 src0, src1, vec0, vec1, vec2; + v16u8 reg0, reg1, dst0, dst1, dst2, dst3; + v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 0); + vec0 = __msa_adds_u_b(src0, src1); + vec1 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src1); + vec2 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src1); + reg0 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)vec0); + reg1 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)vec0); + dst0 = (v16u8)__msa_ilvr_b((v16i8)reg0, (v16i8)vec1); + dst1 = (v16u8)__msa_ilvl_b((v16i8)reg0, (v16i8)vec1); + dst2 = (v16u8)__msa_ilvr_b((v16i8)reg1, (v16i8)vec2); + dst3 = (v16u8)__msa_ilvl_b((v16i8)reg1, (v16i8)vec2); + ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); + src_sobelx += 16; + src_sobely += 16; + dst_argb += 64; + } +} + +void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { + int x; + v16u8 src0, src1, src2, src3, dst0; + v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F); + v16u8 const_0x26 = (v16u8)__msa_fill_h(0x26); + v8u16 const_0x40 = (v8u16)__msa_fill_h(0x40); + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48); + ARGBTOY(src0, src1, src2, src3, const_0x4B0F, const_0x26, const_0x40, 7, + dst0); + ST_UB(dst0, dst_y); + src_argb0 += 64; + dst_y += 16; + } +} + +void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { + int x; + v16u8 src0, src1, src2, src3, dst0; + v16u8 const_0x4200 = (v16u8)__msa_fill_h(0x4200); + v16u8 const_0x1981 = (v16u8)__msa_fill_h(0x1981); + v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48); + ARGBTOY(src0, src1, src2, src3, const_0x4200, const_0x1981, const_0x1080, 8, + dst0); + ST_UB(dst0, dst_y); + src_argb0 += 64; + dst_y += 16; + } +} + +void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { + int x; + v16u8 src0, src1, src2, src3, dst0; + v16u8 const_0x8142 = (v16u8)__msa_fill_h(0x8142); + v16u8 const_0x19 = (v16u8)__msa_fill_h(0x19); + v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48); + ARGBTOY(src0, src1, src2, src3, const_0x8142, const_0x19, const_0x1080, 8, + dst0); + ST_UB(dst0, dst_y); + src_argb0 += 64; + dst_y += 16; + } +} + +void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { + int x; + v16u8 src0, src1, src2, src3, dst0; + v16u8 const_0x1900 = (v16u8)__msa_fill_h(0x1900); + v16u8 const_0x4281 = (v16u8)__msa_fill_h(0x4281); + v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48); + ARGBTOY(src0, src1, src2, src3, const_0x1900, const_0x4281, const_0x1080, 8, + dst0); + ST_UB(dst0, dst_y); + src_argb0 += 64; + dst_y += 16; + } +} + +void ARGBToUVJRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + const uint8_t* s = src_rgb0; + const uint8_t* t = src_rgb0 + src_stride_rgb; + v16u8 src0, src1, src2, src3, src4, src5, src6, src7; + v16u8 vec0, vec1, vec2, vec3; + v16u8 dst0, dst1; + v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; + v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15, + 18, 19, 22, 23, 26, 27, 30, 31}; + v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31}; + v16i8 shuffler3 = {1, 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 30}; + v16u8 const_0x7F = (v16u8)__msa_fill_h(0x7F); + v16u8 const_0x6B14 = (v16u8)__msa_fill_h(0x6B14); + v16u8 const_0x2B54 = (v16u8)__msa_fill_h(0x2B54); + v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); + + for (x = 0; x < width; x += 32) { + src0 = (v16u8)__msa_ld_b((const v16i8*)s, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)s, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)s, 32); + src3 = (v16u8)__msa_ld_b((const v16i8*)s, 48); + src4 = (v16u8)__msa_ld_b((const v16i8*)t, 0); + src5 = (v16u8)__msa_ld_b((const v16i8*)t, 16); + src6 = (v16u8)__msa_ld_b((const v16i8*)t, 32); + src7 = (v16u8)__msa_ld_b((const v16i8*)t, 48); + src0 = __msa_aver_u_b(src0, src4); + src1 = __msa_aver_u_b(src1, src5); + src2 = __msa_aver_u_b(src2, src6); + src3 = __msa_aver_u_b(src3, src7); + src4 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0); + src5 = (v16u8)__msa_pckev_w((v4i32)src3, (v4i32)src2); + src6 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); + src7 = (v16u8)__msa_pckod_w((v4i32)src3, (v4i32)src2); + vec0 = __msa_aver_u_b(src4, src6); + vec1 = __msa_aver_u_b(src5, src7); + src0 = (v16u8)__msa_ld_b((v16i8*)s, 64); + src1 = (v16u8)__msa_ld_b((v16i8*)s, 80); + src2 = (v16u8)__msa_ld_b((v16i8*)s, 96); + src3 = (v16u8)__msa_ld_b((v16i8*)s, 112); + src4 = (v16u8)__msa_ld_b((v16i8*)t, 64); + src5 = (v16u8)__msa_ld_b((v16i8*)t, 80); + src6 = (v16u8)__msa_ld_b((v16i8*)t, 96); + src7 = (v16u8)__msa_ld_b((v16i8*)t, 112); + src0 = __msa_aver_u_b(src0, src4); + src1 = __msa_aver_u_b(src1, src5); + src2 = __msa_aver_u_b(src2, src6); + src3 = __msa_aver_u_b(src3, src7); + src4 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0); + src5 = (v16u8)__msa_pckev_w((v4i32)src3, (v4i32)src2); + src6 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); + src7 = (v16u8)__msa_pckod_w((v4i32)src3, (v4i32)src2); + vec2 = __msa_aver_u_b(src4, src6); + vec3 = __msa_aver_u_b(src5, src7); + ARGBTOUV(vec0, vec1, vec2, vec3, const_0x6B14, const_0x7F, const_0x2B54, + const_0x8080, shuffler1, shuffler0, shuffler2, shuffler3, dst0, + dst1); + ST_UB(dst0, dst_v); + ST_UB(dst1, dst_u); + s += 128; + t += 128; + dst_v += 16; + dst_u += 16; + } +} + +void BGRAToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + const uint8_t* s = src_rgb0; + const uint8_t* t = src_rgb0 + src_stride_rgb; + v16u8 dst0, dst1, vec0, vec1, vec2, vec3; + v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; + v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15, + 18, 19, 22, 23, 26, 27, 30, 31}; + v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31}; + v16i8 shuffler3 = {2, 1, 6, 5, 10, 9, 14, 13, 18, 17, 22, 21, 26, 25, 30, 29}; + v16u8 const_0x125E = (v16u8)__msa_fill_h(0x125E); + v16u8 const_0x7000 = (v16u8)__msa_fill_h(0x7000); + v16u8 const_0x264A = (v16u8)__msa_fill_h(0x264A); + v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); + + for (x = 0; x < width; x += 32) { + READ_ARGB(s, t, vec0, vec1, vec2, vec3); + ARGBTOUV(vec0, vec1, vec2, vec3, const_0x125E, const_0x7000, const_0x264A, + const_0x8080, shuffler0, shuffler1, shuffler2, shuffler3, dst0, + dst1); + ST_UB(dst0, dst_v); + ST_UB(dst1, dst_u); + s += 128; + t += 128; + dst_v += 16; + dst_u += 16; + } +} + +void ABGRToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + const uint8_t* s = src_rgb0; + const uint8_t* t = src_rgb0 + src_stride_rgb; + v16u8 src0, src1, src2, src3; + v16u8 dst0, dst1; + v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; + v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15, + 18, 19, 22, 23, 26, 27, 30, 31}; + v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31}; + v16i8 shuffler3 = {1, 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 30}; + v16u8 const_0x4A26 = (v16u8)__msa_fill_h(0x4A26); + v16u8 const_0x0070 = (v16u8)__msa_fill_h(0x0070); + v16u8 const_0x125E = (v16u8)__msa_fill_h(0x125E); + v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); + + for (x = 0; x < width; x += 32) { + READ_ARGB(s, t, src0, src1, src2, src3); + ARGBTOUV(src0, src1, src2, src3, const_0x4A26, const_0x0070, const_0x125E, + const_0x8080, shuffler1, shuffler0, shuffler2, shuffler3, dst0, + dst1); + ST_UB(dst0, dst_u); + ST_UB(dst1, dst_v); + s += 128; + t += 128; + dst_u += 16; + dst_v += 16; + } +} + +void RGBAToUVRow_MSA(const uint8_t* src_rgb0, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + const uint8_t* s = src_rgb0; + const uint8_t* t = src_rgb0 + src_stride_rgb; + v16u8 dst0, dst1, vec0, vec1, vec2, vec3; + v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; + v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15, + 18, 19, 22, 23, 26, 27, 30, 31}; + v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31}; + v16i8 shuffler3 = {2, 1, 6, 5, 10, 9, 14, 13, 18, 17, 22, 21, 26, 25, 30, 29}; + v16u8 const_0x125E = (v16u8)__msa_fill_h(0x264A); + v16u8 const_0x7000 = (v16u8)__msa_fill_h(0x7000); + v16u8 const_0x264A = (v16u8)__msa_fill_h(0x125E); + v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); + + for (x = 0; x < width; x += 32) { + READ_ARGB(s, t, vec0, vec1, vec2, vec3); + ARGBTOUV(vec0, vec1, vec2, vec3, const_0x125E, const_0x7000, const_0x264A, + const_0x8080, shuffler0, shuffler1, shuffler2, shuffler3, dst0, + dst1); + ST_UB(dst0, dst_u); + ST_UB(dst1, dst_v); + s += 128; + t += 128; + dst_u += 16; + dst_v += 16; + } +} + +void I444ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + int x; + v16u8 src0, src1, src2, dst0, dst1; + v8u16 vec0, vec1, vec2; + v4i32 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9; + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; + v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); + v8i16 zero = {0}; + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, + vec_br, vec_yg); + + for (x = 0; x < width; x += 8) { + READI444(src_y, src_u, src_v, src0, src1, src2); + vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); + reg0 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec0); + reg1 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec0); + reg0 *= vec_yg; + reg1 *= vec_yg; + reg0 = __msa_srai_w(reg0, 16); + reg1 = __msa_srai_w(reg1, 16); + reg4 = reg0 + vec_br; + reg5 = reg1 + vec_br; + reg2 = reg0 + vec_bg; + reg3 = reg1 + vec_bg; + reg0 += vec_bb; + reg1 += vec_bb; + vec0 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); + vec1 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src2); + reg6 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec0); + reg7 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec0); + reg8 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec1); + reg9 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec1); + reg0 -= reg6 * vec_ub; + reg1 -= reg7 * vec_ub; + reg2 -= reg6 * vec_ug; + reg3 -= reg7 * vec_ug; + reg4 -= reg8 * vec_vr; + reg5 -= reg9 * vec_vr; + reg2 -= reg8 * vec_vg; + reg3 -= reg9 * vec_vg; + reg0 = __msa_srai_w(reg0, 6); + reg1 = __msa_srai_w(reg1, 6); + reg2 = __msa_srai_w(reg2, 6); + reg3 = __msa_srai_w(reg3, 6); + reg4 = __msa_srai_w(reg4, 6); + reg5 = __msa_srai_w(reg5, 6); + CLIP_0TO255(reg0, reg1, reg2, reg3, reg4, reg5); + vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); + vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); + vec2 = (v8u16)__msa_pckev_h((v8i16)reg5, (v8i16)reg4); + vec0 = (v8u16)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); + vec1 = (v8u16)__msa_ilvev_b((v16i8)alpha, (v16i8)vec2); + dst0 = (v16u8)__msa_ilvr_h((v8i16)vec1, (v8i16)vec0); + dst1 = (v16u8)__msa_ilvl_h((v8i16)vec1, (v8i16)vec0); + ST_UB2(dst0, dst1, dst_argb, 16); + src_y += 8; + src_u += 8; + src_v += 8; + dst_argb += 32; + } +} + +void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) { + int x; + v16u8 src0, res0, res1, res2, res3, res4, dst0, dst1, dst2, dst3; + v8i16 vec0, vec1; + v4i32 reg0, reg1, reg2, reg3; + v4i32 vec_yg = __msa_fill_w(0x4A35); + v8i16 vec_ygb = __msa_fill_h(0xFB78); + v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); + v8i16 max = __msa_ldi_h(0xFF); + v8i16 zero = {0}; + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_y, 0); + vec0 = (v8i16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); + vec1 = (v8i16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); + reg0 = (v4i32)__msa_ilvr_h(zero, vec0); + reg1 = (v4i32)__msa_ilvl_h(zero, vec0); + reg2 = (v4i32)__msa_ilvr_h(zero, vec1); + reg3 = (v4i32)__msa_ilvl_h(zero, vec1); + reg0 *= vec_yg; + reg1 *= vec_yg; + reg2 *= vec_yg; + reg3 *= vec_yg; + reg0 = __msa_srai_w(reg0, 16); + reg1 = __msa_srai_w(reg1, 16); + reg2 = __msa_srai_w(reg2, 16); + reg3 = __msa_srai_w(reg3, 16); + vec0 = (v8i16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); + vec1 = (v8i16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); + vec0 += vec_ygb; + vec1 += vec_ygb; + vec0 = __msa_srai_h(vec0, 6); + vec1 = __msa_srai_h(vec1, 6); + vec0 = __msa_maxi_s_h(vec0, 0); + vec1 = __msa_maxi_s_h(vec1, 0); + vec0 = __msa_min_s_h(max, vec0); + vec1 = __msa_min_s_h(max, vec1); + res0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + res1 = (v16u8)__msa_ilvr_b((v16i8)res0, (v16i8)res0); + res2 = (v16u8)__msa_ilvl_b((v16i8)res0, (v16i8)res0); + res3 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)res0); + res4 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)res0); + dst0 = (v16u8)__msa_ilvr_b((v16i8)res3, (v16i8)res1); + dst1 = (v16u8)__msa_ilvl_b((v16i8)res3, (v16i8)res1); + dst2 = (v16u8)__msa_ilvr_b((v16i8)res4, (v16i8)res2); + dst3 = (v16u8)__msa_ilvl_b((v16i8)res4, (v16i8)res2); + ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); + src_y += 16; + dst_argb += 64; + } +} + +void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) { + int x; + v16u8 src0, vec0, vec1, vec2, vec3, dst0, dst1, dst2, dst3; + v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_y, 0); + vec0 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src0); + vec1 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src0); + vec2 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)src0); + vec3 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)src0); + dst0 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); + dst1 = (v16u8)__msa_ilvl_b((v16i8)vec2, (v16i8)vec0); + dst2 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); + dst3 = (v16u8)__msa_ilvl_b((v16i8)vec3, (v16i8)vec1); + ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); + src_y += 16; + dst_argb += 64; + } +} + +void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + int x; + v16u8 src0, src1, src2; + v8i16 vec0, vec1, vec2; + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; + v4i32 vec_ubvr, vec_ugvg; + v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, + vec_br, vec_yg); + vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); + vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); + + for (x = 0; x < width; x += 8) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_yuy2, 0); + src1 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0); + src2 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0); + YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, + vec0, vec1, vec2); + STOREARGB(vec0, vec1, vec2, alpha, dst_argb); + src_yuy2 += 16; + dst_argb += 32; + } +} + +void UYVYToARGBRow_MSA(const uint8_t* src_uyvy, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { + int x; + v16u8 src0, src1, src2; + v8i16 vec0, vec1, vec2; + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; + v4i32 vec_ubvr, vec_ugvg; + v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); + + YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, + vec_br, vec_yg); + vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); + vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); + + for (x = 0; x < width; x += 8) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_uyvy, 0); + src1 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0); + src2 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0); + YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, + vec0, vec1, vec2); + STOREARGB(vec0, vec1, vec2, alpha, dst_argb); + src_uyvy += 16; + dst_argb += 32; + } +} + +void InterpolateRow_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int width, + int32_t source_y_fraction) { + int32_t y1_fraction = source_y_fraction; + int32_t y0_fraction = 256 - y1_fraction; + uint16_t y_fractions; + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; + int x; + v16u8 src0, src1, src2, src3, dst0, dst1; + v8u16 vec0, vec1, vec2, vec3, y_frac; + + if (0 == y1_fraction) { + memcpy(dst_ptr, src_ptr, width); + return; + } + + if (128 == y1_fraction) { + for (x = 0; x < width; x += 32) { + src0 = (v16u8)__msa_ld_b((const v16i8*)s, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)s, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)t, 0); + src3 = (v16u8)__msa_ld_b((const v16i8*)t, 16); + dst0 = __msa_aver_u_b(src0, src2); + dst1 = __msa_aver_u_b(src1, src3); + ST_UB2(dst0, dst1, dst_ptr, 16); + s += 32; + t += 32; + dst_ptr += 32; + } + return; + } + + y_fractions = (uint16_t)(y0_fraction + (y1_fraction << 8)); + y_frac = (v8u16)__msa_fill_h(y_fractions); + + for (x = 0; x < width; x += 32) { + src0 = (v16u8)__msa_ld_b((const v16i8*)s, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)s, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)t, 0); + src3 = (v16u8)__msa_ld_b((const v16i8*)t, 16); + vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); + vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); + vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); + vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); + vec0 = (v8u16)__msa_dotp_u_h((v16u8)vec0, (v16u8)y_frac); + vec1 = (v8u16)__msa_dotp_u_h((v16u8)vec1, (v16u8)y_frac); + vec2 = (v8u16)__msa_dotp_u_h((v16u8)vec2, (v16u8)y_frac); + vec3 = (v8u16)__msa_dotp_u_h((v16u8)vec3, (v16u8)y_frac); + vec0 = (v8u16)__msa_srari_h((v8i16)vec0, 8); + vec1 = (v8u16)__msa_srari_h((v8i16)vec1, 8); + vec2 = (v8u16)__msa_srari_h((v8i16)vec2, 8); + vec3 = (v8u16)__msa_srari_h((v8i16)vec3, 8); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); + ST_UB2(dst0, dst1, dst_ptr, 16); + s += 32; + t += 32; + dst_ptr += 32; + } +} + +void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width) { + int x; + v4i32 dst0 = __builtin_msa_fill_w(v32); + + for (x = 0; x < width; x += 4) { + ST_UB(dst0, dst_argb); + dst_argb += 16; + } +} + +void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { + int x; + v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2; + v16i8 shuffler0 = {2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 17}; + v16i8 shuffler1 = {8, 7, 12, 11, 10, 15, 14, 13, + 18, 17, 16, 21, 20, 19, 24, 23}; + v16i8 shuffler2 = {14, 19, 18, 17, 22, 21, 20, 25, + 24, 23, 28, 27, 26, 31, 30, 29}; + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 32); + src3 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 8); + src4 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); + dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); + dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src4, (v16i8)src3); + dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src2, (v16i8)src1); + ST_UB2(dst0, dst1, dst_rgb24, 16); + ST_UB(dst2, (dst_rgb24 + 32)); + src_raw += 48; + dst_rgb24 += 48; + } +} + +void MergeUVRow_MSA(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width) { + int x; + v16u8 src0, src1, dst0, dst1; + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_u, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_v, 0); + dst0 = (v16u8)__msa_ilvr_b((v16i8)src1, (v16i8)src0); + dst1 = (v16u8)__msa_ilvl_b((v16i8)src1, (v16i8)src0); + ST_UB2(dst0, dst1, dst_uv, 16); + src_u += 16; + src_v += 16; + dst_uv += 32; + } +} + +void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, + uint8_t* dst_a, + int width) { + int i; + v16u8 src0, src1, src2, src3, vec0, vec1, dst0; + + for (i = 0; i < width; i += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48); + vec0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); + vec1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); + dst0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); + ST_UB(dst0, dst_a); + src_argb += 64; + dst_a += 16; + } +} + +void ARGBBlendRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + int x; + v16u8 src0, src1, src2, src3, dst0, dst1; + v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; + v8u16 vec8, vec9, vec10, vec11, vec12, vec13; + v8u16 const_256 = (v8u16)__msa_ldi_h(256); + v16u8 const_255 = (v16u8)__msa_ldi_b(255); + v16u8 mask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255}; + v16i8 zero = {0}; + + for (x = 0; x < width; x += 8) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 16); + vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0); + vec1 = (v8u16)__msa_ilvl_b(zero, (v16i8)src0); + vec2 = (v8u16)__msa_ilvr_b(zero, (v16i8)src1); + vec3 = (v8u16)__msa_ilvl_b(zero, (v16i8)src1); + vec4 = (v8u16)__msa_ilvr_b(zero, (v16i8)src2); + vec5 = (v8u16)__msa_ilvl_b(zero, (v16i8)src2); + vec6 = (v8u16)__msa_ilvr_b(zero, (v16i8)src3); + vec7 = (v8u16)__msa_ilvl_b(zero, (v16i8)src3); + vec8 = (v8u16)__msa_fill_h(vec0[3]); + vec9 = (v8u16)__msa_fill_h(vec0[7]); + vec10 = (v8u16)__msa_fill_h(vec1[3]); + vec11 = (v8u16)__msa_fill_h(vec1[7]); + vec8 = (v8u16)__msa_pckev_d((v2i64)vec9, (v2i64)vec8); + vec9 = (v8u16)__msa_pckev_d((v2i64)vec11, (v2i64)vec10); + vec10 = (v8u16)__msa_fill_h(vec2[3]); + vec11 = (v8u16)__msa_fill_h(vec2[7]); + vec12 = (v8u16)__msa_fill_h(vec3[3]); + vec13 = (v8u16)__msa_fill_h(vec3[7]); + vec10 = (v8u16)__msa_pckev_d((v2i64)vec11, (v2i64)vec10); + vec11 = (v8u16)__msa_pckev_d((v2i64)vec13, (v2i64)vec12); + vec8 = const_256 - vec8; + vec9 = const_256 - vec9; + vec10 = const_256 - vec10; + vec11 = const_256 - vec11; + vec8 *= vec4; + vec9 *= vec5; + vec10 *= vec6; + vec11 *= vec7; + vec8 = (v8u16)__msa_srai_h((v8i16)vec8, 8); + vec9 = (v8u16)__msa_srai_h((v8i16)vec9, 8); + vec10 = (v8u16)__msa_srai_h((v8i16)vec10, 8); + vec11 = (v8u16)__msa_srai_h((v8i16)vec11, 8); + vec0 += vec8; + vec1 += vec9; + vec2 += vec10; + vec3 += vec11; + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); + dst0 = __msa_bmnz_v(dst0, const_255, mask); + dst1 = __msa_bmnz_v(dst1, const_255, mask); + ST_UB2(dst0, dst1, dst_argb, 16); + src_argb0 += 32; + src_argb1 += 32; + dst_argb += 32; + } +} + +void ARGBQuantizeRow_MSA(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width) { + int x; + v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3; + v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; + v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + v4i32 tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; + v4i32 vec_scale = __msa_fill_w(scale); + v16u8 vec_int_sz = (v16u8)__msa_fill_b(interval_size); + v16u8 vec_int_ofst = (v16u8)__msa_fill_b(interval_offset); + v16i8 mask = {0, 1, 2, 19, 4, 5, 6, 23, 8, 9, 10, 27, 12, 13, 14, 31}; + v16i8 zero = {0}; + + for (x = 0; x < width; x += 8) { + src0 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 32); + src3 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 48); + vec0 = (v8i16)__msa_ilvr_b(zero, (v16i8)src0); + vec1 = (v8i16)__msa_ilvl_b(zero, (v16i8)src0); + vec2 = (v8i16)__msa_ilvr_b(zero, (v16i8)src1); + vec3 = (v8i16)__msa_ilvl_b(zero, (v16i8)src1); + vec4 = (v8i16)__msa_ilvr_b(zero, (v16i8)src2); + vec5 = (v8i16)__msa_ilvl_b(zero, (v16i8)src2); + vec6 = (v8i16)__msa_ilvr_b(zero, (v16i8)src3); + vec7 = (v8i16)__msa_ilvl_b(zero, (v16i8)src3); + tmp0 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec0); + tmp1 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec0); + tmp2 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec1); + tmp3 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec1); + tmp4 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec2); + tmp5 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec2); + tmp6 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec3); + tmp7 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec3); + tmp8 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec4); + tmp9 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec4); + tmp10 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec5); + tmp11 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec5); + tmp12 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec6); + tmp13 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec6); + tmp14 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec7); + tmp15 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec7); + tmp0 *= vec_scale; + tmp1 *= vec_scale; + tmp2 *= vec_scale; + tmp3 *= vec_scale; + tmp4 *= vec_scale; + tmp5 *= vec_scale; + tmp6 *= vec_scale; + tmp7 *= vec_scale; + tmp8 *= vec_scale; + tmp9 *= vec_scale; + tmp10 *= vec_scale; + tmp11 *= vec_scale; + tmp12 *= vec_scale; + tmp13 *= vec_scale; + tmp14 *= vec_scale; + tmp15 *= vec_scale; + tmp0 >>= 16; + tmp1 >>= 16; + tmp2 >>= 16; + tmp3 >>= 16; + tmp4 >>= 16; + tmp5 >>= 16; + tmp6 >>= 16; + tmp7 >>= 16; + tmp8 >>= 16; + tmp9 >>= 16; + tmp10 >>= 16; + tmp11 >>= 16; + tmp12 >>= 16; + tmp13 >>= 16; + tmp14 >>= 16; + tmp15 >>= 16; + vec0 = (v8i16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); + vec1 = (v8i16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); + vec2 = (v8i16)__msa_pckev_h((v8i16)tmp5, (v8i16)tmp4); + vec3 = (v8i16)__msa_pckev_h((v8i16)tmp7, (v8i16)tmp6); + vec4 = (v8i16)__msa_pckev_h((v8i16)tmp9, (v8i16)tmp8); + vec5 = (v8i16)__msa_pckev_h((v8i16)tmp11, (v8i16)tmp10); + vec6 = (v8i16)__msa_pckev_h((v8i16)tmp13, (v8i16)tmp12); + vec7 = (v8i16)__msa_pckev_h((v8i16)tmp15, (v8i16)tmp14); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); + dst2 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); + dst3 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); + dst0 *= vec_int_sz; + dst1 *= vec_int_sz; + dst2 *= vec_int_sz; + dst3 *= vec_int_sz; + dst0 += vec_int_ofst; + dst1 += vec_int_ofst; + dst2 += vec_int_ofst; + dst3 += vec_int_ofst; + dst0 = (v16u8)__msa_vshf_b(mask, (v16i8)src0, (v16i8)dst0); + dst1 = (v16u8)__msa_vshf_b(mask, (v16i8)src1, (v16i8)dst1); + dst2 = (v16u8)__msa_vshf_b(mask, (v16i8)src2, (v16i8)dst2); + dst3 = (v16u8)__msa_vshf_b(mask, (v16i8)src3, (v16i8)dst3); + ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); + dst_argb += 64; + } +} + +void ARGBColorMatrixRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width) { + int32_t x; + v16i8 src0; + v16u8 src1, src2, dst0, dst1; + v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; + v8i16 vec10, vec11, vec12, vec13, vec14, vec15, vec16, vec17; + v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + v4i32 tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; + v16i8 zero = {0}; + v8i16 max = __msa_ldi_h(255); + + src0 = __msa_ld_b((v16i8*)matrix_argb, 0); + vec0 = (v8i16)__msa_ilvr_b(zero, src0); + vec1 = (v8i16)__msa_ilvl_b(zero, src0); + + for (x = 0; x < width; x += 8) { + src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); + vec2 = (v8i16)__msa_ilvr_b(zero, (v16i8)src1); + vec3 = (v8i16)__msa_ilvl_b(zero, (v16i8)src1); + vec4 = (v8i16)__msa_ilvr_b(zero, (v16i8)src2); + vec5 = (v8i16)__msa_ilvl_b(zero, (v16i8)src2); + vec6 = (v8i16)__msa_pckod_d((v2i64)vec2, (v2i64)vec2); + vec7 = (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec3); + vec8 = (v8i16)__msa_pckod_d((v2i64)vec4, (v2i64)vec4); + vec9 = (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec5); + vec2 = (v8i16)__msa_pckev_d((v2i64)vec2, (v2i64)vec2); + vec3 = (v8i16)__msa_pckev_d((v2i64)vec3, (v2i64)vec3); + vec4 = (v8i16)__msa_pckev_d((v2i64)vec4, (v2i64)vec4); + vec5 = (v8i16)__msa_pckev_d((v2i64)vec5, (v2i64)vec5); + vec10 = vec2 * vec0; + vec11 = vec2 * vec1; + vec12 = vec6 * vec0; + vec13 = vec6 * vec1; + tmp0 = __msa_hadd_s_w(vec10, vec10); + tmp1 = __msa_hadd_s_w(vec11, vec11); + tmp2 = __msa_hadd_s_w(vec12, vec12); + tmp3 = __msa_hadd_s_w(vec13, vec13); + vec14 = vec3 * vec0; + vec15 = vec3 * vec1; + vec16 = vec7 * vec0; + vec17 = vec7 * vec1; + tmp4 = __msa_hadd_s_w(vec14, vec14); + tmp5 = __msa_hadd_s_w(vec15, vec15); + tmp6 = __msa_hadd_s_w(vec16, vec16); + tmp7 = __msa_hadd_s_w(vec17, vec17); + vec10 = __msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); + vec11 = __msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); + vec12 = __msa_pckev_h((v8i16)tmp5, (v8i16)tmp4); + vec13 = __msa_pckev_h((v8i16)tmp7, (v8i16)tmp6); + tmp0 = __msa_hadd_s_w(vec10, vec10); + tmp1 = __msa_hadd_s_w(vec11, vec11); + tmp2 = __msa_hadd_s_w(vec12, vec12); + tmp3 = __msa_hadd_s_w(vec13, vec13); + tmp0 = __msa_srai_w(tmp0, 6); + tmp1 = __msa_srai_w(tmp1, 6); + tmp2 = __msa_srai_w(tmp2, 6); + tmp3 = __msa_srai_w(tmp3, 6); + vec2 = vec4 * vec0; + vec6 = vec4 * vec1; + vec3 = vec8 * vec0; + vec7 = vec8 * vec1; + tmp8 = __msa_hadd_s_w(vec2, vec2); + tmp9 = __msa_hadd_s_w(vec6, vec6); + tmp10 = __msa_hadd_s_w(vec3, vec3); + tmp11 = __msa_hadd_s_w(vec7, vec7); + vec4 = vec5 * vec0; + vec8 = vec5 * vec1; + vec5 = vec9 * vec0; + vec9 = vec9 * vec1; + tmp12 = __msa_hadd_s_w(vec4, vec4); + tmp13 = __msa_hadd_s_w(vec8, vec8); + tmp14 = __msa_hadd_s_w(vec5, vec5); + tmp15 = __msa_hadd_s_w(vec9, vec9); + vec14 = __msa_pckev_h((v8i16)tmp9, (v8i16)tmp8); + vec15 = __msa_pckev_h((v8i16)tmp11, (v8i16)tmp10); + vec16 = __msa_pckev_h((v8i16)tmp13, (v8i16)tmp12); + vec17 = __msa_pckev_h((v8i16)tmp15, (v8i16)tmp14); + tmp4 = __msa_hadd_s_w(vec14, vec14); + tmp5 = __msa_hadd_s_w(vec15, vec15); + tmp6 = __msa_hadd_s_w(vec16, vec16); + tmp7 = __msa_hadd_s_w(vec17, vec17); + tmp4 = __msa_srai_w(tmp4, 6); + tmp5 = __msa_srai_w(tmp5, 6); + tmp6 = __msa_srai_w(tmp6, 6); + tmp7 = __msa_srai_w(tmp7, 6); + vec10 = __msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); + vec11 = __msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); + vec12 = __msa_pckev_h((v8i16)tmp5, (v8i16)tmp4); + vec13 = __msa_pckev_h((v8i16)tmp7, (v8i16)tmp6); + vec10 = __msa_maxi_s_h(vec10, 0); + vec11 = __msa_maxi_s_h(vec11, 0); + vec12 = __msa_maxi_s_h(vec12, 0); + vec13 = __msa_maxi_s_h(vec13, 0); + vec10 = __msa_min_s_h(vec10, max); + vec11 = __msa_min_s_h(vec11, max); + vec12 = __msa_min_s_h(vec12, max); + vec13 = __msa_min_s_h(vec13, max); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec11, (v16i8)vec10); + dst1 = (v16u8)__msa_pckev_b((v16i8)vec13, (v16i8)vec12); + ST_UB2(dst0, dst1, dst_argb, 16); + src_argb += 32; + dst_argb += 32; + } +} + +void SplitUVRow_MSA(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3; + + for (x = 0; x < width; x += 32) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 32); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 48); + dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); + dst1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); + dst2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); + dst3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); + ST_UB2(dst0, dst1, dst_u, 16); + ST_UB2(dst2, dst3, dst_v, 16); + src_uv += 64; + dst_u += 32; + dst_v += 32; + } +} + +void SetRow_MSA(uint8_t* dst, uint8_t v8, int width) { + int x; + v16u8 dst0 = (v16u8)__msa_fill_b(v8); + + for (x = 0; x < width; x += 16) { + ST_UB(dst0, dst); + dst += 16; + } +} + +void MirrorUVRow_MSA(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + int x; + v16u8 src0, src1, src2, src3; + v16u8 dst0, dst1, dst2, dst3; + v16i8 mask0 = {30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0}; + v16i8 mask1 = {31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1}; + + src_uv += (2 * width); + + for (x = 0; x < width; x += 32) { + src_uv -= 64; + src2 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 0); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 16); + src0 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 32); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 48); + dst0 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); + dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2); + dst2 = (v16u8)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0); + dst3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src3, (v16i8)src2); + ST_UB2(dst0, dst1, dst_v, 16); + ST_UB2(dst2, dst3, dst_u, 16); + dst_u += 32; + dst_v += 32; + } +} + +void SobelXRow_MSA(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int32_t width) { + int x; + v16u8 src0, src1, src2, src3, src4, src5, dst0; + v8i16 vec0, vec1, vec2, vec3, vec4, vec5; + v16i8 mask0 = {0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9}; + v16i8 tmp = __msa_ldi_b(8); + v16i8 mask1 = mask0 + tmp; + v8i16 zero = {0}; + v8i16 max = __msa_ldi_h(255); + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_y0, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_y0, 16); + src2 = (v16u8)__msa_ld_b((const v16i8*)src_y1, 0); + src3 = (v16u8)__msa_ld_b((const v16i8*)src_y1, 16); + src4 = (v16u8)__msa_ld_b((const v16i8*)src_y2, 0); + src5 = (v16u8)__msa_ld_b((const v16i8*)src_y2, 16); + vec0 = (v8i16)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0); + vec1 = (v8i16)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); + vec2 = (v8i16)__msa_vshf_b(mask0, (v16i8)src3, (v16i8)src2); + vec3 = (v8i16)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2); + vec4 = (v8i16)__msa_vshf_b(mask0, (v16i8)src5, (v16i8)src4); + vec5 = (v8i16)__msa_vshf_b(mask1, (v16i8)src5, (v16i8)src4); + vec0 = (v8i16)__msa_hsub_u_h((v16u8)vec0, (v16u8)vec0); + vec1 = (v8i16)__msa_hsub_u_h((v16u8)vec1, (v16u8)vec1); + vec2 = (v8i16)__msa_hsub_u_h((v16u8)vec2, (v16u8)vec2); + vec3 = (v8i16)__msa_hsub_u_h((v16u8)vec3, (v16u8)vec3); + vec4 = (v8i16)__msa_hsub_u_h((v16u8)vec4, (v16u8)vec4); + vec5 = (v8i16)__msa_hsub_u_h((v16u8)vec5, (v16u8)vec5); + vec0 += vec2; + vec1 += vec3; + vec4 += vec2; + vec5 += vec3; + vec0 += vec4; + vec1 += vec5; + vec0 = __msa_add_a_h(zero, vec0); + vec1 = __msa_add_a_h(zero, vec1); + vec0 = __msa_maxi_s_h(vec0, 0); + vec1 = __msa_maxi_s_h(vec1, 0); + vec0 = __msa_min_s_h(max, vec0); + vec1 = __msa_min_s_h(max, vec1); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + ST_UB(dst0, dst_sobelx); + src_y0 += 16; + src_y1 += 16; + src_y2 += 16; + dst_sobelx += 16; + } +} + +void SobelYRow_MSA(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int32_t width) { + int x; + v16u8 src0, src1, dst0; + v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6; + v8i16 zero = {0}; + v8i16 max = __msa_ldi_h(255); + + for (x = 0; x < width; x += 16) { + src0 = (v16u8)__msa_ld_b((const v16i8*)src_y0, 0); + src1 = (v16u8)__msa_ld_b((const v16i8*)src_y1, 0); + vec0 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)src0); + vec1 = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)src0); + vec2 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); + vec3 = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)src1); + vec0 -= vec2; + vec1 -= vec3; + vec6[0] = src_y0[16] - src_y1[16]; + vec6[1] = src_y0[17] - src_y1[17]; + vec2 = (v8i16)__msa_sldi_b((v16i8)vec1, (v16i8)vec0, 2); + vec3 = (v8i16)__msa_sldi_b((v16i8)vec6, (v16i8)vec1, 2); + vec4 = (v8i16)__msa_sldi_b((v16i8)vec1, (v16i8)vec0, 4); + vec5 = (v8i16)__msa_sldi_b((v16i8)vec6, (v16i8)vec1, 4); + vec0 += vec2; + vec1 += vec3; + vec4 += vec2; + vec5 += vec3; + vec0 += vec4; + vec1 += vec5; + vec0 = __msa_add_a_h(zero, vec0); + vec1 = __msa_add_a_h(zero, vec1); + vec0 = __msa_maxi_s_h(vec0, 0); + vec1 = __msa_maxi_s_h(vec1, 0); + vec0 = __msa_min_s_h(max, vec0); + vec1 = __msa_min_s_h(max, vec1); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + ST_UB(dst0, dst_sobely); + src_y0 += 16; + src_y1 += 16; + dst_sobely += 16; + } +} + +void HalfFloatRow_MSA(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { + int i; + v8u16 src0, src1, src2, src3, dst0, dst1, dst2, dst3; + v4u32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; + v4f32 fvec0, fvec1, fvec2, fvec3, fvec4, fvec5, fvec6, fvec7; + v4f32 mult_vec; + v8i16 zero = {0}; + mult_vec[0] = 1.9259299444e-34f * scale; + mult_vec = (v4f32)__msa_splati_w((v4i32)mult_vec, 0); + + for (i = 0; i < width; i += 32) { + src0 = (v8u16)__msa_ld_h((v8i16*)src, 0); + src1 = (v8u16)__msa_ld_h((v8i16*)src, 16); + src2 = (v8u16)__msa_ld_h((v8i16*)src, 32); + src3 = (v8u16)__msa_ld_h((v8i16*)src, 48); + vec0 = (v4u32)__msa_ilvr_h(zero, (v8i16)src0); + vec1 = (v4u32)__msa_ilvl_h(zero, (v8i16)src0); + vec2 = (v4u32)__msa_ilvr_h(zero, (v8i16)src1); + vec3 = (v4u32)__msa_ilvl_h(zero, (v8i16)src1); + vec4 = (v4u32)__msa_ilvr_h(zero, (v8i16)src2); + vec5 = (v4u32)__msa_ilvl_h(zero, (v8i16)src2); + vec6 = (v4u32)__msa_ilvr_h(zero, (v8i16)src3); + vec7 = (v4u32)__msa_ilvl_h(zero, (v8i16)src3); + fvec0 = __msa_ffint_u_w(vec0); + fvec1 = __msa_ffint_u_w(vec1); + fvec2 = __msa_ffint_u_w(vec2); + fvec3 = __msa_ffint_u_w(vec3); + fvec4 = __msa_ffint_u_w(vec4); + fvec5 = __msa_ffint_u_w(vec5); + fvec6 = __msa_ffint_u_w(vec6); + fvec7 = __msa_ffint_u_w(vec7); + fvec0 *= mult_vec; + fvec1 *= mult_vec; + fvec2 *= mult_vec; + fvec3 *= mult_vec; + fvec4 *= mult_vec; + fvec5 *= mult_vec; + fvec6 *= mult_vec; + fvec7 *= mult_vec; + vec0 = ((v4u32)fvec0) >> 13; + vec1 = ((v4u32)fvec1) >> 13; + vec2 = ((v4u32)fvec2) >> 13; + vec3 = ((v4u32)fvec3) >> 13; + vec4 = ((v4u32)fvec4) >> 13; + vec5 = ((v4u32)fvec5) >> 13; + vec6 = ((v4u32)fvec6) >> 13; + vec7 = ((v4u32)fvec7) >> 13; + dst0 = (v8u16)__msa_pckev_h((v8i16)vec1, (v8i16)vec0); + dst1 = (v8u16)__msa_pckev_h((v8i16)vec3, (v8i16)vec2); + dst2 = (v8u16)__msa_pckev_h((v8i16)vec5, (v8i16)vec4); + dst3 = (v8u16)__msa_pckev_h((v8i16)vec7, (v8i16)vec6); + ST_UH2(dst0, dst1, dst, 8); + ST_UH2(dst2, dst3, dst + 16, 8); + src += 32; + dst += 32; + } +} + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + +#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_neon.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_neon.cc index 909df060c691..ff87e74c62c9 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/row_neon.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/row_neon.cc @@ -10,6 +10,8 @@ #include "libyuv/row.h" +#include + #ifdef __cplusplus namespace libyuv { extern "C" { @@ -20,1446 +22,1311 @@ extern "C" { !defined(__aarch64__) // Read 8 Y, 4 U and 4 V from 422 -#define READYUV422 \ - MEMACCESS(0) \ - "vld1.8 {d0}, [%0]! \n" \ - MEMACCESS(1) \ - "vld1.32 {d2[0]}, [%1]! \n" \ - MEMACCESS(2) \ - "vld1.32 {d2[1]}, [%2]! \n" - -// Read 8 Y, 2 U and 2 V from 422 -#define READYUV411 \ - MEMACCESS(0) \ - "vld1.8 {d0}, [%0]! \n" \ - MEMACCESS(1) \ - "vld1.16 {d2[0]}, [%1]! \n" \ - MEMACCESS(2) \ - "vld1.16 {d2[1]}, [%2]! \n" \ - "vmov.u8 d3, d2 \n" \ - "vzip.u8 d2, d3 \n" +#define READYUV422 \ + "vld1.8 {d0}, [%0]! \n" \ + "vld1.32 {d2[0]}, [%1]! \n" \ + "vld1.32 {d2[1]}, [%2]! \n" // Read 8 Y, 8 U and 8 V from 444 -#define READYUV444 \ - MEMACCESS(0) \ - "vld1.8 {d0}, [%0]! \n" \ - MEMACCESS(1) \ - "vld1.8 {d2}, [%1]! \n" \ - MEMACCESS(2) \ - "vld1.8 {d3}, [%2]! \n" \ - "vpaddl.u8 q1, q1 \n" \ - "vrshrn.u16 d2, q1, #1 \n" +#define READYUV444 \ + "vld1.8 {d0}, [%0]! \n" \ + "vld1.8 {d2}, [%1]! \n" \ + "vld1.8 {d3}, [%2]! \n" \ + "vpaddl.u8 q1, q1 \n" \ + "vrshrn.u16 d2, q1, #1 \n" // Read 8 Y, and set 4 U and 4 V to 128 -#define READYUV400 \ - MEMACCESS(0) \ - "vld1.8 {d0}, [%0]! \n" \ - "vmov.u8 d2, #128 \n" +#define READYUV400 \ + "vld1.8 {d0}, [%0]! \n" \ + "vmov.u8 d2, #128 \n" // Read 8 Y and 4 UV from NV12 #define READNV12 \ - MEMACCESS(0) \ - "vld1.8 {d0}, [%0]! \n" \ - MEMACCESS(1) \ - "vld1.8 {d2}, [%1]! \n" \ - "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\ - "vuzp.u8 d2, d3 \n" \ - "vtrn.u32 d2, d3 \n" + "vld1.8 {d0}, [%0]! \n" \ + "vld1.8 {d2}, [%1]! \n" \ + "vmov.u8 d3, d2 \n" /* split odd/even uv apart */ \ + "vuzp.u8 d2, d3 \n" \ + "vtrn.u32 d2, d3 \n" // Read 8 Y and 4 VU from NV21 #define READNV21 \ - MEMACCESS(0) \ - "vld1.8 {d0}, [%0]! \n" \ - MEMACCESS(1) \ - "vld1.8 {d2}, [%1]! \n" \ - "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\ - "vuzp.u8 d3, d2 \n" \ - "vtrn.u32 d2, d3 \n" + "vld1.8 {d0}, [%0]! \n" \ + "vld1.8 {d2}, [%1]! \n" \ + "vmov.u8 d3, d2 \n" /* split odd/even uv apart */ \ + "vuzp.u8 d3, d2 \n" \ + "vtrn.u32 d2, d3 \n" // Read 8 YUY2 -#define READYUY2 \ - MEMACCESS(0) \ - "vld2.8 {d0, d2}, [%0]! \n" \ - "vmov.u8 d3, d2 \n" \ - "vuzp.u8 d2, d3 \n" \ - "vtrn.u32 d2, d3 \n" +#define READYUY2 \ + "vld2.8 {d0, d2}, [%0]! \n" \ + "vmov.u8 d3, d2 \n" \ + "vuzp.u8 d2, d3 \n" \ + "vtrn.u32 d2, d3 \n" // Read 8 UYVY -#define READUYVY \ - MEMACCESS(0) \ - "vld2.8 {d2, d3}, [%0]! \n" \ - "vmov.u8 d0, d3 \n" \ - "vmov.u8 d3, d2 \n" \ - "vuzp.u8 d2, d3 \n" \ - "vtrn.u32 d2, d3 \n" +#define READUYVY \ + "vld2.8 {d2, d3}, [%0]! \n" \ + "vmov.u8 d0, d3 \n" \ + "vmov.u8 d3, d2 \n" \ + "vuzp.u8 d2, d3 \n" \ + "vtrn.u32 d2, d3 \n" -#define YUVTORGB_SETUP \ - MEMACCESS([kUVToRB]) \ - "vld1.8 {d24}, [%[kUVToRB]] \n" \ - MEMACCESS([kUVToG]) \ - "vld1.8 {d25}, [%[kUVToG]] \n" \ - MEMACCESS([kUVBiasBGR]) \ - "vld1.16 {d26[], d27[]}, [%[kUVBiasBGR]]! \n" \ - MEMACCESS([kUVBiasBGR]) \ - "vld1.16 {d8[], d9[]}, [%[kUVBiasBGR]]! \n" \ - MEMACCESS([kUVBiasBGR]) \ - "vld1.16 {d28[], d29[]}, [%[kUVBiasBGR]] \n" \ - MEMACCESS([kYToRgb]) \ - "vld1.32 {d30[], d31[]}, [%[kYToRgb]] \n" +#define YUVTORGB_SETUP \ + "vld1.8 {d24}, [%[kUVToRB]] \n" \ + "vld1.8 {d25}, [%[kUVToG]] \n" \ + "vld1.16 {d26[], d27[]}, [%[kUVBiasBGR]]! \n" \ + "vld1.16 {d8[], d9[]}, [%[kUVBiasBGR]]! \n" \ + "vld1.16 {d28[], d29[]}, [%[kUVBiasBGR]] \n" \ + "vld1.32 {d30[], d31[]}, [%[kYToRgb]] \n" -#define YUVTORGB \ - "vmull.u8 q8, d2, d24 \n" /* u/v B/R component */\ - "vmull.u8 q9, d2, d25 \n" /* u/v G component */\ - "vmovl.u8 q0, d0 \n" /* Y */\ - "vmovl.s16 q10, d1 \n" \ - "vmovl.s16 q0, d0 \n" \ - "vmul.s32 q10, q10, q15 \n" \ - "vmul.s32 q0, q0, q15 \n" \ - "vqshrun.s32 d0, q0, #16 \n" \ - "vqshrun.s32 d1, q10, #16 \n" /* Y */\ - "vadd.s16 d18, d19 \n" \ - "vshll.u16 q1, d16, #16 \n" /* Replicate u * UB */\ - "vshll.u16 q10, d17, #16 \n" /* Replicate v * VR */\ - "vshll.u16 q3, d18, #16 \n" /* Replicate (v*VG + u*UG)*/\ - "vaddw.u16 q1, q1, d16 \n" \ - "vaddw.u16 q10, q10, d17 \n" \ - "vaddw.u16 q3, q3, d18 \n" \ - "vqadd.s16 q8, q0, q13 \n" /* B */ \ - "vqadd.s16 q9, q0, q14 \n" /* R */ \ - "vqadd.s16 q0, q0, q4 \n" /* G */ \ - "vqadd.s16 q8, q8, q1 \n" /* B */ \ - "vqadd.s16 q9, q9, q10 \n" /* R */ \ - "vqsub.s16 q0, q0, q3 \n" /* G */ \ - "vqshrun.s16 d20, q8, #6 \n" /* B */ \ - "vqshrun.s16 d22, q9, #6 \n" /* R */ \ - "vqshrun.s16 d21, q0, #6 \n" /* G */ +#define YUVTORGB \ + "vmull.u8 q8, d2, d24 \n" /* u/v B/R component */ \ + "vmull.u8 q9, d2, d25 \n" /* u/v G component */ \ + "vmovl.u8 q0, d0 \n" /* Y */ \ + "vmovl.s16 q10, d1 \n" \ + "vmovl.s16 q0, d0 \n" \ + "vmul.s32 q10, q10, q15 \n" \ + "vmul.s32 q0, q0, q15 \n" \ + "vqshrun.s32 d0, q0, #16 \n" \ + "vqshrun.s32 d1, q10, #16 \n" /* Y */ \ + "vadd.s16 d18, d19 \n" \ + "vshll.u16 q1, d16, #16 \n" /* Replicate u * UB */ \ + "vshll.u16 q10, d17, #16 \n" /* Replicate v * VR */ \ + "vshll.u16 q3, d18, #16 \n" /* Replicate (v*VG + u*UG)*/ \ + "vaddw.u16 q1, q1, d16 \n" \ + "vaddw.u16 q10, q10, d17 \n" \ + "vaddw.u16 q3, q3, d18 \n" \ + "vqadd.s16 q8, q0, q13 \n" /* B */ \ + "vqadd.s16 q9, q0, q14 \n" /* R */ \ + "vqadd.s16 q0, q0, q4 \n" /* G */ \ + "vqadd.s16 q8, q8, q1 \n" /* B */ \ + "vqadd.s16 q9, q9, q10 \n" /* R */ \ + "vqsub.s16 q0, q0, q3 \n" /* G */ \ + "vqshrun.s16 d20, q8, #6 \n" /* B */ \ + "vqshrun.s16 d22, q9, #6 \n" /* R */ \ + "vqshrun.s16 d21, q0, #6 \n" /* G */ -void I444ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" - READYUV444 - YUVTORGB - "subs %4, %4, #8 \n" - MEMACCESS(3) - "vst4.8 {d20, d21, d22, d23}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_argb), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + asm volatile( + YUVTORGB_SETUP + "vmov.u8 d23, #255 \n" + "1: \n" READYUV444 YUVTORGB + "subs %4, %4, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%3]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_argb), // %3 + "+r"(width) // %4 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } -void I422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" - READYUV422 - YUVTORGB - "subs %4, %4, #8 \n" - MEMACCESS(3) - "vst4.8 {d20, d21, d22, d23}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_argb), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + asm volatile( + YUVTORGB_SETUP + "vmov.u8 d23, #255 \n" + "1: \n" READYUV422 YUVTORGB + "subs %4, %4, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%3]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_argb), // %3 + "+r"(width) // %4 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } -void I422AlphaToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - const uint8* src_a, - uint8* dst_argb, +void I422AlphaToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "1: \n" - READYUV422 - YUVTORGB - "subs %5, %5, #8 \n" - MEMACCESS(3) - "vld1.8 {d23}, [%3]! \n" - MEMACCESS(4) - "vst4.8 {d20, d21, d22, d23}, [%4]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(src_a), // %3 - "+r"(dst_argb), // %4 - "+r"(width) // %5 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + asm volatile( + YUVTORGB_SETUP + "1: \n" READYUV422 YUVTORGB + "subs %5, %5, #8 \n" + "vld1.8 {d23}, [%3]! \n" + "vst4.8 {d20, d21, d22, d23}, [%4]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(src_a), // %3 + "+r"(dst_argb), // %4 + "+r"(width) // %5 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } -void I411ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGBARow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" - READYUV411 - YUVTORGB - "subs %4, %4, #8 \n" - MEMACCESS(3) - "vst4.8 {d20, d21, d22, d23}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_argb), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + asm volatile( + YUVTORGB_SETUP + "1: \n" READYUV422 YUVTORGB + "subs %4, %4, #8 \n" + "vmov.u8 d19, #255 \n" // YUVTORGB modified d19 + "vst4.8 {d19, d20, d21, d22}, [%3]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_rgba), // %3 + "+r"(width) // %4 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } -void I422ToRGBARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - YUVTORGB_SETUP - "1: \n" - READYUV422 - YUVTORGB - "subs %4, %4, #8 \n" - "vmov.u8 d19, #255 \n" // d19 modified by YUVTORGB - MEMACCESS(3) - "vst4.8 {d19, d20, d21, d22}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_rgba), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void I422ToRGB24Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "1: \n" - READYUV422 - YUVTORGB - "subs %4, %4, #8 \n" - MEMACCESS(3) - "vst3.8 {d20, d21, d22}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_rgb24), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + asm volatile( + YUVTORGB_SETUP + "1: \n" READYUV422 YUVTORGB + "subs %4, %4, #8 \n" + "vst3.8 {d20, d21, d22}, [%3]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_rgb24), // %3 + "+r"(width) // %4 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } -#define ARGBTORGB565 \ - "vshll.u8 q0, d22, #8 \n" /* R */ \ - "vshll.u8 q8, d21, #8 \n" /* G */ \ - "vshll.u8 q9, d20, #8 \n" /* B */ \ - "vsri.16 q0, q8, #5 \n" /* RG */ \ - "vsri.16 q0, q9, #11 \n" /* RGB */ +#define ARGBTORGB565 \ + "vshll.u8 q0, d22, #8 \n" /* R */ \ + "vshll.u8 q8, d21, #8 \n" /* G */ \ + "vshll.u8 q9, d20, #8 \n" /* B */ \ + "vsri.16 q0, q8, #5 \n" /* RG */ \ + "vsri.16 q0, q9, #11 \n" /* RGB */ -void I422ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "1: \n" - READYUV422 - YUVTORGB - "subs %4, %4, #8 \n" - ARGBTORGB565 - MEMACCESS(3) - "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565. - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_rgb565), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + asm volatile( + YUVTORGB_SETUP + "1: \n" READYUV422 YUVTORGB + "subs %4, %4, #8 \n" ARGBTORGB565 + "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565. + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_rgb565), // %3 + "+r"(width) // %4 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } -#define ARGBTOARGB1555 \ - "vshll.u8 q0, d23, #8 \n" /* A */ \ - "vshll.u8 q8, d22, #8 \n" /* R */ \ - "vshll.u8 q9, d21, #8 \n" /* G */ \ - "vshll.u8 q10, d20, #8 \n" /* B */ \ - "vsri.16 q0, q8, #1 \n" /* AR */ \ - "vsri.16 q0, q9, #6 \n" /* ARG */ \ - "vsri.16 q0, q10, #11 \n" /* ARGB */ +#define ARGBTOARGB1555 \ + "vshll.u8 q0, d23, #8 \n" /* A */ \ + "vshll.u8 q8, d22, #8 \n" /* R */ \ + "vshll.u8 q9, d21, #8 \n" /* G */ \ + "vshll.u8 q10, d20, #8 \n" /* B */ \ + "vsri.16 q0, q8, #1 \n" /* AR */ \ + "vsri.16 q0, q9, #6 \n" /* ARG */ \ + "vsri.16 q0, q10, #11 \n" /* ARGB */ -void I422ToARGB1555Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "1: \n" - READYUV422 - YUVTORGB - "subs %4, %4, #8 \n" - "vmov.u8 d23, #255 \n" - ARGBTOARGB1555 - MEMACCESS(3) - "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB1555. - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_argb1555), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + asm volatile( + YUVTORGB_SETUP + "1: \n" READYUV422 YUVTORGB + "subs %4, %4, #8 \n" + "vmov.u8 d23, #255 \n" ARGBTOARGB1555 + "vst1.8 {q0}, [%3]! \n" // store 8 pixels + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_argb1555), // %3 + "+r"(width) // %4 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } -#define ARGBTOARGB4444 \ - "vshr.u8 d20, d20, #4 \n" /* B */ \ - "vbic.32 d21, d21, d4 \n" /* G */ \ - "vshr.u8 d22, d22, #4 \n" /* R */ \ - "vbic.32 d23, d23, d4 \n" /* A */ \ - "vorr d0, d20, d21 \n" /* BG */ \ - "vorr d1, d22, d23 \n" /* RA */ \ - "vzip.u8 d0, d1 \n" /* BGRA */ +#define ARGBTOARGB4444 \ + "vshr.u8 d20, d20, #4 \n" /* B */ \ + "vbic.32 d21, d21, d4 \n" /* G */ \ + "vshr.u8 d22, d22, #4 \n" /* R */ \ + "vbic.32 d23, d23, d4 \n" /* A */ \ + "vorr d0, d20, d21 \n" /* BG */ \ + "vorr d1, d22, d23 \n" /* RA */ \ + "vzip.u8 d0, d1 \n" /* BGRA */ -void I422ToARGB4444Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. - "1: \n" - READYUV422 - YUVTORGB - "subs %4, %4, #8 \n" - "vmov.u8 d23, #255 \n" - ARGBTOARGB4444 - MEMACCESS(3) - "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB4444. - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_argb4444), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + asm volatile( + YUVTORGB_SETUP + "vmov.u8 d4, #0x0f \n" // vbic bits to clear + "1: \n" + + READYUV422 YUVTORGB + "subs %4, %4, #8 \n" + "vmov.u8 d23, #255 \n" ARGBTOARGB4444 + "vst1.8 {q0}, [%3]! \n" // store 8 pixels + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_argb4444), // %3 + "+r"(width) // %4 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } -void I400ToARGBRow_NEON(const uint8* src_y, - uint8* dst_argb, - int width) { - asm volatile ( - YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" - READYUV400 - YUVTORGB - "subs %2, %2, #8 \n" - MEMACCESS(1) - "vst4.8 {d20, d21, d22, d23}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB), - [kUVToG]"r"(&kYuvI601Constants.kUVToG), - [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvI601Constants.kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); +void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) { + asm volatile( + YUVTORGB_SETUP + "vmov.u8 d23, #255 \n" + "1: \n" READYUV400 YUVTORGB + "subs %2, %2, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : [kUVToRB] "r"(&kYuvI601Constants.kUVToRB), + [kUVToG] "r"(&kYuvI601Constants.kUVToG), + [kUVBiasBGR] "r"(&kYuvI601Constants.kUVBiasBGR), + [kYToRgb] "r"(&kYuvI601Constants.kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } -void J400ToARGBRow_NEON(const uint8* src_y, - uint8* dst_argb, - int width) { - asm volatile ( - "vmov.u8 d23, #255 \n" - "1: \n" - MEMACCESS(0) - "vld1.8 {d20}, [%0]! \n" - "vmov d21, d20 \n" - "vmov d22, d20 \n" - "subs %2, %2, #8 \n" - MEMACCESS(1) - "vst4.8 {d20, d21, d22, d23}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "d20", "d21", "d22", "d23" - ); +void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) { + asm volatile( + "vmov.u8 d23, #255 \n" + "1: \n" + "vld1.8 {d20}, [%0]! \n" + "vmov d21, d20 \n" + "vmov d22, d20 \n" + "subs %2, %2, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "d20", "d21", "d22", "d23"); } -void NV12ToARGBRow_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" - READNV12 - YUVTORGB - "subs %3, %3, #8 \n" - MEMACCESS(2) - "vst4.8 {d20, d21, d22, d23}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_uv), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + asm volatile(YUVTORGB_SETUP + "vmov.u8 d23, #255 \n" + "1: \n" READNV12 YUVTORGB + "subs %3, %3, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%2]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_uv), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", + "q10", "q11", "q12", "q13", "q14", "q15"); } -void NV21ToARGBRow_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV21ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" - READNV21 - YUVTORGB - "subs %3, %3, #8 \n" - MEMACCESS(2) - "vst4.8 {d20, d21, d22, d23}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_vu), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + asm volatile(YUVTORGB_SETUP + "vmov.u8 d23, #255 \n" + "1: \n" READNV21 YUVTORGB + "subs %3, %3, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%2]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_vu), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", + "q10", "q11", "q12", "q13", "q14", "q15"); } -void NV12ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, +void NV12ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile( + + YUVTORGB_SETUP + + "1: \n" + + READNV12 YUVTORGB + "subs %3, %3, #8 \n" + "vst3.8 {d20, d21, d22}, [%2]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_uv), // %1 + "+r"(dst_rgb24), // %2 + "+r"(width) // %3 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); +} + +void NV21ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile( + + YUVTORGB_SETUP + + "1: \n" + + READNV21 YUVTORGB + "subs %3, %3, #8 \n" + "vst3.8 {d20, d21, d22}, [%2]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_vu), // %1 + "+r"(dst_rgb24), // %2 + "+r"(width) // %3 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); +} + +void NV12ToRGB565Row_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "1: \n" - READNV12 - YUVTORGB - "subs %3, %3, #8 \n" - ARGBTORGB565 - MEMACCESS(2) - "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_uv), // %1 - "+r"(dst_rgb565), // %2 - "+r"(width) // %3 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + asm volatile( + YUVTORGB_SETUP + "1: \n" READNV12 YUVTORGB + "subs %3, %3, #8 \n" ARGBTORGB565 + "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_uv), // %1 + "+r"(dst_rgb565), // %2 + "+r"(width) // %3 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } -void YUY2ToARGBRow_NEON(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" - READYUY2 - YUVTORGB - "subs %2, %2, #8 \n" - MEMACCESS(1) - "vst4.8 {d20, d21, d22, d23}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + asm volatile(YUVTORGB_SETUP + "vmov.u8 d23, #255 \n" + "1: \n" READYUY2 YUVTORGB + "subs %2, %2, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", + "q10", "q11", "q12", "q13", "q14", "q15"); } -void UYVYToARGBRow_NEON(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_NEON(const uint8_t* src_uyvy, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" - READUYVY - YUVTORGB - "subs %2, %2, #8 \n" - MEMACCESS(1) - "vst4.8 {d20, d21, d22, d23}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + asm volatile(YUVTORGB_SETUP + "vmov.u8 d23, #255 \n" + "1: \n" READUYVY YUVTORGB + "subs %2, %2, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", + "q10", "q11", "q12", "q13", "q14", "q15"); } // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. -void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, +void SplitUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld2.8 {q0, q1}, [%0]! \n" // load 16 pairs of UV - "subs %3, %3, #16 \n" // 16 processed per loop - MEMACCESS(1) - "vst1.8 {q0}, [%1]! \n" // store U - MEMACCESS(2) - "vst1.8 {q1}, [%2]! \n" // store V - "bgt 1b \n" - : "+r"(src_uv), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 // Output registers - : // Input registers - : "cc", "memory", "q0", "q1" // Clobber List - ); + asm volatile( + "1: \n" + "vld2.8 {q0, q1}, [%0]! \n" // load 16 pairs of UV + "subs %3, %3, #16 \n" // 16 processed per loop + "vst1.8 {q0}, [%1]! \n" // store U + "vst1.8 {q1}, [%2]! \n" // store V + "bgt 1b \n" + : "+r"(src_uv), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 // Output registers + : // Input registers + : "cc", "memory", "q0", "q1" // Clobber List + ); } // Reads 16 U's and V's and writes out 16 pairs of UV. -void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, +void MergeUVRow_NEON(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // load U - MEMACCESS(1) - "vld1.8 {q1}, [%1]! \n" // load V - "subs %3, %3, #16 \n" // 16 processed per loop - MEMACCESS(2) - "vst2.u8 {q0, q1}, [%2]! \n" // store 16 pairs of UV - "bgt 1b \n" - : - "+r"(src_u), // %0 - "+r"(src_v), // %1 - "+r"(dst_uv), // %2 - "+r"(width) // %3 // Output registers - : // Input registers - : "cc", "memory", "q0", "q1" // Clobber List - ); + asm volatile( + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load U + "vld1.8 {q1}, [%1]! \n" // load V + "subs %3, %3, #16 \n" // 16 processed per loop + "vst2.8 {q0, q1}, [%2]! \n" // store 16 pairs of UV + "bgt 1b \n" + : "+r"(src_u), // %0 + "+r"(src_v), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 // Output registers + : // Input registers + : "cc", "memory", "q0", "q1" // Clobber List + ); +} + +// Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b. +void SplitRGBRow_NEON(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width) { + asm volatile( + "1: \n" + "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB + "vld3.8 {d1, d3, d5}, [%0]! \n" // next 8 RGB + "subs %4, %4, #16 \n" // 16 processed per loop + "vst1.8 {q0}, [%1]! \n" // store R + "vst1.8 {q1}, [%2]! \n" // store G + "vst1.8 {q2}, [%3]! \n" // store B + "bgt 1b \n" + : "+r"(src_rgb), // %0 + "+r"(dst_r), // %1 + "+r"(dst_g), // %2 + "+r"(dst_b), // %3 + "+r"(width) // %4 + : // Input registers + : "cc", "memory", "d0", "d1", "d2" // Clobber List + ); +} + +// Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time +void MergeRGBRow_NEON(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, + int width) { + asm volatile( + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load R + "vld1.8 {q1}, [%1]! \n" // load G + "vld1.8 {q2}, [%2]! \n" // load B + "subs %4, %4, #16 \n" // 16 processed per loop + "vst3.8 {d0, d2, d4}, [%3]! \n" // store 8 RGB + "vst3.8 {d1, d3, d5}, [%3]! \n" // next 8 RGB + "bgt 1b \n" + : "+r"(src_r), // %0 + "+r"(src_g), // %1 + "+r"(src_b), // %2 + "+r"(dst_rgb), // %3 + "+r"(width) // %4 + : // Input registers + : "cc", "memory", "q0", "q1", "q2" // Clobber List + ); } // Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15. -void CopyRow_NEON(const uint8* src, uint8* dst, int count) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32 - "subs %2, %2, #32 \n" // 32 processed per loop - MEMACCESS(1) - "vst1.8 {d0, d1, d2, d3}, [%1]! \n" // store 32 - "bgt 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(count) // %2 // Output registers - : // Input registers - : "cc", "memory", "q0", "q1" // Clobber List - ); +void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "1: \n" + "vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32 + "subs %2, %2, #32 \n" // 32 processed per loop + "vst1.8 {d0, d1, d2, d3}, [%1]! \n" // store 32 + "bgt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 // Output registers + : // Input registers + : "cc", "memory", "q0", "q1" // Clobber List + ); } -// SetRow writes 'count' bytes using an 8 bit value repeated. -void SetRow_NEON(uint8* dst, uint8 v8, int count) { - asm volatile ( - "vdup.8 q0, %2 \n" // duplicate 16 bytes - "1: \n" - "subs %1, %1, #16 \n" // 16 bytes per loop - MEMACCESS(0) - "vst1.8 {q0}, [%0]! \n" // store - "bgt 1b \n" - : "+r"(dst), // %0 - "+r"(count) // %1 - : "r"(v8) // %2 - : "cc", "memory", "q0" - ); +// SetRow writes 'width' bytes using an 8 bit value repeated. +void SetRow_NEON(uint8_t* dst, uint8_t v8, int width) { + asm volatile( + "vdup.8 q0, %2 \n" // duplicate 16 bytes + "1: \n" + "subs %1, %1, #16 \n" // 16 bytes per loop + "vst1.8 {q0}, [%0]! \n" // store + "bgt 1b \n" + : "+r"(dst), // %0 + "+r"(width) // %1 + : "r"(v8) // %2 + : "cc", "memory", "q0"); } -// ARGBSetRow writes 'count' pixels using an 32 bit value repeated. -void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) { - asm volatile ( - "vdup.u32 q0, %2 \n" // duplicate 4 ints - "1: \n" - "subs %1, %1, #4 \n" // 4 pixels per loop - MEMACCESS(0) - "vst1.8 {q0}, [%0]! \n" // store - "bgt 1b \n" - : "+r"(dst), // %0 - "+r"(count) // %1 - : "r"(v32) // %2 - : "cc", "memory", "q0" - ); +// ARGBSetRow writes 'width' pixels using an 32 bit value repeated. +void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) { + asm volatile( + "vdup.u32 q0, %2 \n" // duplicate 4 ints + "1: \n" + "subs %1, %1, #4 \n" // 4 pixels per loop + "vst1.8 {q0}, [%0]! \n" // store + "bgt 1b \n" + : "+r"(dst), // %0 + "+r"(width) // %1 + : "r"(v32) // %2 + : "cc", "memory", "q0"); } -void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { - asm volatile ( - // Start at end of source row. - "mov r3, #-16 \n" - "add %0, %0, %2 \n" - "sub %0, #16 \n" +void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + // Start at end of source row. + "mov r3, #-16 \n" + "add %0, %0, %2 \n" + "sub %0, #16 \n" - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0], r3 \n" // src -= 16 - "subs %2, #16 \n" // 16 pixels per loop. - "vrev64.8 q0, q0 \n" - MEMACCESS(1) - "vst1.8 {d1}, [%1]! \n" // dst += 16 - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" - "bgt 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "r3", "q0" - ); + "1: \n" + "vld1.8 {q0}, [%0], r3 \n" // src -= 16 + "subs %2, #16 \n" // 16 pixels per loop. + "vrev64.8 q0, q0 \n" + "vst1.8 {d1}, [%1]! \n" // dst += 16 + "vst1.8 {d0}, [%1]! \n" + "bgt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "r3", "q0"); } -void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, +void MirrorUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - // Start at end of source row. - "mov r12, #-16 \n" - "add %0, %0, %3, lsl #1 \n" - "sub %0, #16 \n" + asm volatile( + // Start at end of source row. + "mov r12, #-16 \n" + "add %0, %0, %3, lsl #1 \n" + "sub %0, #16 \n" - "1: \n" - MEMACCESS(0) - "vld2.8 {d0, d1}, [%0], r12 \n" // src -= 16 - "subs %3, #8 \n" // 8 pixels per loop. - "vrev64.8 q0, q0 \n" - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // dst += 8 - MEMACCESS(2) - "vst1.8 {d1}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_uv), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "r12", "q0" - ); + "1: \n" + "vld2.8 {d0, d1}, [%0], r12 \n" // src -= 16 + "subs %3, #8 \n" // 8 pixels per loop. + "vrev64.8 q0, q0 \n" + "vst1.8 {d0}, [%1]! \n" // dst += 8 + "vst1.8 {d1}, [%2]! \n" + "bgt 1b \n" + : "+r"(src_uv), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "r12", "q0"); } -void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { - asm volatile ( - // Start at end of source row. - "mov r3, #-16 \n" - "add %0, %0, %2, lsl #2 \n" - "sub %0, #16 \n" +void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + // Start at end of source row. + "mov r3, #-16 \n" + "add %0, %0, %2, lsl #2 \n" + "sub %0, #16 \n" - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0], r3 \n" // src -= 16 - "subs %2, #4 \n" // 4 pixels per loop. - "vrev64.32 q0, q0 \n" - MEMACCESS(1) - "vst1.8 {d1}, [%1]! \n" // dst += 16 - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" - "bgt 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "r3", "q0" - ); + "1: \n" + "vld1.8 {q0}, [%0], r3 \n" // src -= 16 + "subs %2, #4 \n" // 4 pixels per loop. + "vrev64.32 q0, q0 \n" + "vst1.8 {d1}, [%1]! \n" // dst += 16 + "vst1.8 {d0}, [%1]! \n" + "bgt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "r3", "q0"); } -void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) { - asm volatile ( - "vmov.u8 d4, #255 \n" // Alpha - "1: \n" - MEMACCESS(0) - "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RGB24. - "subs %2, %2, #8 \n" // 8 processed per loop. - MEMACCESS(1) - "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(src_rgb24), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List - ); +void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width) { + asm volatile( + "vmov.u8 d4, #255 \n" // Alpha + "1: \n" + "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RGB24. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. + "bgt 1b \n" + : "+r"(src_rgb24), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List + ); } -void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) { - asm volatile ( - "vmov.u8 d4, #255 \n" // Alpha - "1: \n" - MEMACCESS(0) - "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vswp.u8 d1, d3 \n" // swap R, B - MEMACCESS(1) - "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(src_raw), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List - ); +void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) { + asm volatile( + "vmov.u8 d4, #255 \n" // Alpha + "1: \n" + "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vswp.u8 d1, d3 \n" // swap R, B + "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. + "bgt 1b \n" + : "+r"(src_raw), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List + ); } -void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vswp.u8 d1, d3 \n" // swap R, B - MEMACCESS(1) - "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RGB24. - "bgt 1b \n" - : "+r"(src_raw), // %0 - "+r"(dst_rgb24), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "d1", "d2", "d3" // Clobber List - ); +void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { + asm volatile( + "1: \n" + "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vswp.u8 d1, d3 \n" // swap R, B + "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of + // RGB24. + "bgt 1b \n" + : "+r"(src_raw), // %0 + "+r"(dst_rgb24), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "d1", "d2", "d3" // Clobber List + ); } -#define RGB565TOARGB \ - "vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \ - "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \ - "vshl.u8 d6, d6, #2 \n" /* G GGGGGG00 upper 6 */ \ - "vshr.u8 d1, d1, #3 \n" /* R 000RRRRR lower 5 */ \ - "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \ - "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \ - "vorr.u8 d0, d0, d4 \n" /* B */ \ - "vshr.u8 d4, d6, #6 \n" /* G 000000GG lower 2 */ \ - "vorr.u8 d2, d1, d5 \n" /* R */ \ - "vorr.u8 d1, d4, d6 \n" /* G */ +#define RGB565TOARGB \ + "vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \ + "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \ + "vshl.u8 d6, d6, #2 \n" /* G GGGGGG00 upper 6 */ \ + "vshr.u8 d1, d1, #3 \n" /* R 000RRRRR lower 5 */ \ + "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \ + "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \ + "vorr.u8 d0, d0, d4 \n" /* B */ \ + "vshr.u8 d4, d6, #6 \n" /* G 000000GG lower 2 */ \ + "vorr.u8 d2, d1, d5 \n" /* R */ \ + "vorr.u8 d1, d4, d6 \n" /* G */ -void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) { - asm volatile ( - "vmov.u8 d3, #255 \n" // Alpha - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - RGB565TOARGB - MEMACCESS(1) - "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(src_rgb565), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List - ); +void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width) { + asm volatile( + "vmov.u8 d3, #255 \n" // Alpha + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. + "subs %2, %2, #8 \n" // 8 processed per loop. + RGB565TOARGB + "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. + "bgt 1b \n" + : "+r"(src_rgb565), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List + ); } -#define ARGB1555TOARGB \ - "vshrn.u16 d7, q0, #8 \n" /* A Arrrrrxx */ \ - "vshr.u8 d6, d7, #2 \n" /* R xxxRRRRR */ \ - "vshrn.u16 d5, q0, #5 \n" /* G xxxGGGGG */ \ - "vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \ - "vshr.u8 d7, d7, #7 \n" /* A 0000000A */ \ - "vneg.s8 d7, d7 \n" /* A AAAAAAAA upper 8 */ \ - "vshl.u8 d6, d6, #3 \n" /* R RRRRR000 upper 5 */ \ - "vshr.u8 q1, q3, #5 \n" /* R,A 00000RRR lower 3 */ \ - "vshl.u8 q0, q2, #3 \n" /* B,G BBBBB000 upper 5 */ \ - "vshr.u8 q2, q0, #5 \n" /* B,G 00000BBB lower 3 */ \ - "vorr.u8 q1, q1, q3 \n" /* R,A */ \ - "vorr.u8 q0, q0, q2 \n" /* B,G */ \ +#define ARGB1555TOARGB \ + "vshrn.u16 d7, q0, #8 \n" /* A Arrrrrxx */ \ + "vshr.u8 d6, d7, #2 \n" /* R xxxRRRRR */ \ + "vshrn.u16 d5, q0, #5 \n" /* G xxxGGGGG */ \ + "vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \ + "vshr.u8 d7, d7, #7 \n" /* A 0000000A */ \ + "vneg.s8 d7, d7 \n" /* A AAAAAAAA upper 8 */ \ + "vshl.u8 d6, d6, #3 \n" /* R RRRRR000 upper 5 */ \ + "vshr.u8 q1, q3, #5 \n" /* R,A 00000RRR lower 3 */ \ + "vshl.u8 q0, q2, #3 \n" /* B,G BBBBB000 upper 5 */ \ + "vshr.u8 q2, q0, #5 \n" /* B,G 00000BBB lower 3 */ \ + "vorr.u8 q1, q1, q3 \n" /* R,A */ \ + "vorr.u8 q0, q0, q2 \n" /* B,G */ // RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha. -#define RGB555TOARGB \ - "vshrn.u16 d6, q0, #5 \n" /* G xxxGGGGG */ \ - "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB xRRRRRxx */ \ - "vshl.u8 d6, d6, #3 \n" /* G GGGGG000 upper 5 */ \ - "vshr.u8 d1, d1, #2 \n" /* R 00xRRRRR lower 5 */ \ - "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \ - "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \ - "vorr.u8 d0, d0, d4 \n" /* B */ \ - "vshr.u8 d4, d6, #5 \n" /* G 00000GGG lower 3 */ \ - "vorr.u8 d2, d1, d5 \n" /* R */ \ - "vorr.u8 d1, d4, d6 \n" /* G */ +#define RGB555TOARGB \ + "vshrn.u16 d6, q0, #5 \n" /* G xxxGGGGG */ \ + "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB xRRRRRxx */ \ + "vshl.u8 d6, d6, #3 \n" /* G GGGGG000 upper 5 */ \ + "vshr.u8 d1, d1, #2 \n" /* R 00xRRRRR lower 5 */ \ + "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \ + "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \ + "vorr.u8 d0, d0, d4 \n" /* B */ \ + "vshr.u8 d4, d6, #5 \n" /* G 00000GGG lower 3 */ \ + "vorr.u8 d2, d1, d5 \n" /* R */ \ + "vorr.u8 d1, d4, d6 \n" /* G */ -void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, +void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, + uint8_t* dst_argb, int width) { - asm volatile ( - "vmov.u8 d3, #255 \n" // Alpha - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - ARGB1555TOARGB - MEMACCESS(1) - "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(src_argb1555), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List - ); + asm volatile( + "vmov.u8 d3, #255 \n" // Alpha + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. + "subs %2, %2, #8 \n" // 8 processed per loop. + ARGB1555TOARGB + "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. + "bgt 1b \n" + : "+r"(src_argb1555), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List + ); } -#define ARGB4444TOARGB \ - "vuzp.u8 d0, d1 \n" /* d0 BG, d1 RA */ \ - "vshl.u8 q2, q0, #4 \n" /* B,R BBBB0000 */ \ - "vshr.u8 q1, q0, #4 \n" /* G,A 0000GGGG */ \ - "vshr.u8 q0, q2, #4 \n" /* B,R 0000BBBB */ \ - "vorr.u8 q0, q0, q2 \n" /* B,R BBBBBBBB */ \ - "vshl.u8 q2, q1, #4 \n" /* G,A GGGG0000 */ \ - "vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \ - "vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */ +#define ARGB4444TOARGB \ + "vuzp.u8 d0, d1 \n" /* d0 BG, d1 RA */ \ + "vshl.u8 q2, q0, #4 \n" /* B,R BBBB0000 */ \ + "vshr.u8 q1, q0, #4 \n" /* G,A 0000GGGG */ \ + "vshr.u8 q0, q2, #4 \n" /* B,R 0000BBBB */ \ + "vorr.u8 q0, q0, q2 \n" /* B,R BBBBBBBB */ \ + "vshl.u8 q2, q1, #4 \n" /* G,A GGGG0000 */ \ + "vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \ + "vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */ -void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, +void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, + uint8_t* dst_argb, int width) { - asm volatile ( - "vmov.u8 d3, #255 \n" // Alpha - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - ARGB4444TOARGB - MEMACCESS(1) - "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(src_argb4444), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1", "q2" // Clobber List - ); + asm volatile( + "vmov.u8 d3, #255 \n" // Alpha + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. + "subs %2, %2, #8 \n" // 8 processed per loop. + ARGB4444TOARGB + "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. + "bgt 1b \n" + : "+r"(src_argb4444), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q2" // Clobber List + ); } -void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - MEMACCESS(1) - "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RGB24. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_rgb24), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List - ); -} - -void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vswp.u8 d1, d3 \n" // swap R, B - MEMACCESS(1) - "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RAW. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_raw), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List - ); -} - -void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2. - "subs %2, %2, #16 \n" // 16 processed per loop. - MEMACCESS(1) - "vst1.8 {q0}, [%1]! \n" // store 16 pixels of Y. - "bgt 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1" // Clobber List - ); -} - -void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY. - "subs %2, %2, #16 \n" // 16 processed per loop. - MEMACCESS(1) - "vst1.8 {q1}, [%1]! \n" // store 16 pixels of Y. - "bgt 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1" // Clobber List - ); -} - -void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, +void ARGBToRGB24Row_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb24, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2. - "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. - MEMACCESS(1) - "vst1.8 {d1}, [%1]! \n" // store 8 U. - MEMACCESS(2) - "vst1.8 {d3}, [%2]! \n" // store 8 V. - "bgt 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List - ); + asm volatile( + "1: \n" + "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of + // RGB24. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_rgb24), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List + ); } -void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, +void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) { + asm volatile( + "1: \n" + "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vswp.u8 d1, d3 \n" // swap R, B + "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RAW. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_raw), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List + ); +} + +void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { + asm volatile( + "1: \n" + "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2. + "subs %2, %2, #16 \n" // 16 processed per loop. + "vst1.8 {q0}, [%1]! \n" // store 16 pixels of Y. + "bgt 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1" // Clobber List + ); +} + +void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { + asm volatile( + "1: \n" + "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY. + "subs %2, %2, #16 \n" // 16 processed per loop. + "vst1.8 {q1}, [%1]! \n" // store 16 pixels of Y. + "bgt 1b \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1" // Clobber List + ); +} + +void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY. - "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 U. - MEMACCESS(2) - "vst1.8 {d2}, [%2]! \n" // store 8 V. - "bgt 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List - ); + asm volatile( + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2. + "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. + "vst1.8 {d1}, [%1]! \n" // store 8 U. + "vst1.8 {d3}, [%2]! \n" // store 8 V. + "bgt 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List + ); } -void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "add %1, %0, %1 \n" // stride + src_yuy2 - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2. - "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. - MEMACCESS(1) - "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row YUY2. - "vrhadd.u8 d1, d1, d5 \n" // average rows of U - "vrhadd.u8 d3, d3, d7 \n" // average rows of V - MEMACCESS(2) - "vst1.8 {d1}, [%2]! \n" // store 8 U. - MEMACCESS(3) - "vst1.8 {d3}, [%3]! \n" // store 8 V. - "bgt 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(stride_yuy2), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List - ); +void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY. + "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. + "vst1.8 {d0}, [%1]! \n" // store 8 U. + "vst1.8 {d2}, [%2]! \n" // store 8 V. + "bgt 1b \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List + ); } -void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "add %1, %0, %1 \n" // stride + src_uyvy - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY. - "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. - MEMACCESS(1) - "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row UYVY. - "vrhadd.u8 d0, d0, d4 \n" // average rows of U - "vrhadd.u8 d2, d2, d6 \n" // average rows of V - MEMACCESS(2) - "vst1.8 {d0}, [%2]! \n" // store 8 U. - MEMACCESS(3) - "vst1.8 {d2}, [%3]! \n" // store 8 V. - "bgt 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(stride_uyvy), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List - ); +void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "add %1, %0, %1 \n" // stride + src_yuy2 + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2. + "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. + "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row YUY2. + "vrhadd.u8 d1, d1, d5 \n" // average rows of U + "vrhadd.u8 d3, d3, d7 \n" // average rows of V + "vst1.8 {d1}, [%2]! \n" // store 8 U. + "vst1.8 {d3}, [%3]! \n" // store 8 V. + "bgt 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(stride_yuy2), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", + "d7" // Clobber List + ); +} + +void UYVYToUVRow_NEON(const uint8_t* src_uyvy, + int stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "add %1, %0, %1 \n" // stride + src_uyvy + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY. + "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. + "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row UYVY. + "vrhadd.u8 d0, d0, d4 \n" // average rows of U + "vrhadd.u8 d2, d2, d6 \n" // average rows of V + "vst1.8 {d0}, [%2]! \n" // store 8 U. + "vst1.8 {d2}, [%3]! \n" // store 8 V. + "bgt 1b \n" + : "+r"(src_uyvy), // %0 + "+r"(stride_uyvy), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", + "d7" // Clobber List + ); } // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width) { - asm volatile ( - MEMACCESS(3) - "vld1.8 {q2}, [%3] \n" // shuffler - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // load 4 pixels. - "subs %2, %2, #4 \n" // 4 processed per loop - "vtbl.8 d2, {d0, d1}, d4 \n" // look up 2 first pixels - "vtbl.8 d3, {d0, d1}, d5 \n" // look up 2 next pixels - MEMACCESS(1) - "vst1.8 {q1}, [%1]! \n" // store 4. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(shuffler) // %3 - : "cc", "memory", "q0", "q1", "q2" // Clobber List - ); +void ARGBShuffleRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width) { + asm volatile( + "vld1.8 {q2}, [%3] \n" // shuffler + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 4 pixels. + "subs %2, %2, #4 \n" // 4 processed per loop + "vtbl.8 d2, {d0, d1}, d4 \n" // look up 2 first pixels + "vtbl.8 d3, {d0, d1}, d5 \n" // look up 2 next pixels + "vst1.8 {q1}, [%1]! \n" // store 4. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(shuffler) // %3 + : "cc", "memory", "q0", "q1", "q2" // Clobber List + ); } -void I422ToYUY2Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld2.8 {d0, d2}, [%0]! \n" // load 16 Ys - MEMACCESS(1) - "vld1.8 {d1}, [%1]! \n" // load 8 Us - MEMACCESS(2) - "vld1.8 {d3}, [%2]! \n" // load 8 Vs - "subs %4, %4, #16 \n" // 16 pixels - MEMACCESS(3) - "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 YUY2/16 pixels. - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_yuy2), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "d0", "d1", "d2", "d3" - ); +void I422ToYUY2Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width) { + asm volatile( + "1: \n" + "vld2.8 {d0, d2}, [%0]! \n" // load 16 Ys + "vld1.8 {d1}, [%1]! \n" // load 8 Us + "vld1.8 {d3}, [%2]! \n" // load 8 Vs + "subs %4, %4, #16 \n" // 16 pixels + "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 YUY2/16 pixels. + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_yuy2), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "d0", "d1", "d2", "d3"); } -void I422ToUYVYRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld2.8 {d1, d3}, [%0]! \n" // load 16 Ys - MEMACCESS(1) - "vld1.8 {d0}, [%1]! \n" // load 8 Us - MEMACCESS(2) - "vld1.8 {d2}, [%2]! \n" // load 8 Vs - "subs %4, %4, #16 \n" // 16 pixels - MEMACCESS(3) - "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 UYVY/16 pixels. - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_uyvy), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "d0", "d1", "d2", "d3" - ); +void I422ToUYVYRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width) { + asm volatile( + "1: \n" + "vld2.8 {d1, d3}, [%0]! \n" // load 16 Ys + "vld1.8 {d0}, [%1]! \n" // load 8 Us + "vld1.8 {d2}, [%2]! \n" // load 8 Vs + "subs %4, %4, #16 \n" // 16 pixels + "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 UYVY/16 pixels. + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_uyvy), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "d0", "d1", "d2", "d3"); } -void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - ARGBTORGB565 - MEMACCESS(1) - "vst1.8 {q0}, [%1]! \n" // store 8 pixels RGB565. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_rgb565), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q8", "q9", "q10", "q11" - ); +void ARGBToRGB565Row_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb565, + int width) { + asm volatile( + "1: \n" + "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. + "subs %2, %2, #8 \n" // 8 processed per loop. + ARGBTORGB565 + "vst1.8 {q0}, [%1]! \n" // store 8 pixels RGB565. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_rgb565), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q8", "q9", "q10", "q11"); } -void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width) { - asm volatile ( - "vdup.32 d2, %2 \n" // dither4 - "1: \n" - MEMACCESS(1) - "vld4.8 {d20, d21, d22, d23}, [%1]! \n" // load 8 pixels of ARGB. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vqadd.u8 d20, d20, d2 \n" - "vqadd.u8 d21, d21, d2 \n" - "vqadd.u8 d22, d22, d2 \n" - ARGBTORGB565 - MEMACCESS(0) - "vst1.8 {q0}, [%0]! \n" // store 8 pixels RGB565. - "bgt 1b \n" - : "+r"(dst_rgb) // %0 - : "r"(src_argb), // %1 - "r"(dither4), // %2 - "r"(width) // %3 - : "cc", "memory", "q0", "q1", "q8", "q9", "q10", "q11" - ); +void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, + int width) { + asm volatile( + "vdup.32 d2, %2 \n" // dither4 + "1: \n" + "vld4.8 {d20, d21, d22, d23}, [%1]! \n" // load 8 pixels of ARGB. + "subs %3, %3, #8 \n" // 8 processed per loop. + "vqadd.u8 d20, d20, d2 \n" + "vqadd.u8 d21, d21, d2 \n" + "vqadd.u8 d22, d22, d2 \n" // add for dither + ARGBTORGB565 + "vst1.8 {q0}, [%0]! \n" // store 8 RGB565. + "bgt 1b \n" + : "+r"(dst_rgb) // %0 + : "r"(src_argb), // %1 + "r"(dither4), // %2 + "r"(width) // %3 + : "cc", "memory", "q0", "q1", "q8", "q9", "q10", "q11"); } -void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555, +void ARGBToARGB1555Row_NEON(const uint8_t* src_argb, + uint8_t* dst_argb1555, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - ARGBTOARGB1555 - MEMACCESS(1) - "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB1555. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb1555), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q8", "q9", "q10", "q11" - ); + asm volatile( + "1: \n" + "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. + "subs %2, %2, #8 \n" // 8 processed per loop. + ARGBTOARGB1555 + "vst1.8 {q0}, [%1]! \n" // store 8 ARGB1555. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb1555), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q8", "q9", "q10", "q11"); } -void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, +void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_argb4444, int width) { - asm volatile ( - "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. - "1: \n" - MEMACCESS(0) - "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - ARGBTOARGB4444 - MEMACCESS(1) - "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB4444. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb4444), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q8", "q9", "q10", "q11" - ); + asm volatile( + "vmov.u8 d4, #0x0f \n" // bits to clear with + // vbic. + "1: \n" + "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. + "subs %2, %2, #8 \n" // 8 processed per loop. + ARGBTOARGB4444 + "vst1.8 {q0}, [%1]! \n" // store 8 ARGB4444. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb4444), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q8", "q9", "q10", "q11"); } -void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { - asm volatile ( - "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d27, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R - "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d27 \n" - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q12", "q13" - ); +void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { + asm volatile( + "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d27, #16 \n" // Add 16 constant + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmull.u8 q2, d0, d24 \n" // B + "vmlal.u8 q2, d1, d25 \n" // G + "vmlal.u8 q2, d2, d26 \n" // R + "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d27 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q2", "q12", "q13"); } -void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels - "subs %2, %2, #16 \n" // 16 processed per loop - MEMACCESS(1) - "vst1.8 {q3}, [%1]! \n" // store 16 A's. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_a), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List - ); +void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, + uint8_t* dst_a, + int width) { + asm volatile( + "1: \n" + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels + "subs %2, %2, #16 \n" // 16 processed per loop + "vst1.8 {q3}, [%1]! \n" // store 16 A's. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_a), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List + ); } -void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { - asm volatile ( - "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient - "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient - "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R - "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q12", "q13" - ); +void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { + asm volatile( + "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient + "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient + "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmull.u8 q2, d0, d24 \n" // B + "vmlal.u8 q2, d1, d25 \n" // G + "vmlal.u8 q2, d2, d26 \n" // R + "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q2", "q12", "q13"); } // 8x1 pixels. -void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, +void ARGBToUV444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "vmov.u8 d24, #112 \n" // UB / VR 0.875 coefficient - "vmov.u8 d25, #74 \n" // UG -0.5781 coefficient - "vmov.u8 d26, #38 \n" // UR -0.2969 coefficient - "vmov.u8 d27, #18 \n" // VB -0.1406 coefficient - "vmov.u8 d28, #94 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vmull.u8 q2, d0, d24 \n" // B - "vmlsl.u8 q2, d1, d25 \n" // G - "vmlsl.u8 q2, d2, d26 \n" // R - "vadd.u16 q2, q2, q15 \n" // +128 -> unsigned + asm volatile( + "vmov.u8 d24, #112 \n" // UB / VR 0.875 + // coefficient + "vmov.u8 d25, #74 \n" // UG -0.5781 coefficient + "vmov.u8 d26, #38 \n" // UR -0.2969 coefficient + "vmov.u8 d27, #18 \n" // VB -0.1406 coefficient + "vmov.u8 d28, #94 \n" // VG -0.7344 coefficient + "vmov.u16 q15, #0x8080 \n" // 128.5 + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. + "subs %3, %3, #8 \n" // 8 processed per loop. + "vmull.u8 q2, d0, d24 \n" // B + "vmlsl.u8 q2, d1, d25 \n" // G + "vmlsl.u8 q2, d2, d26 \n" // R + "vadd.u16 q2, q2, q15 \n" // +128 -> unsigned - "vmull.u8 q3, d2, d24 \n" // R - "vmlsl.u8 q3, d1, d28 \n" // G - "vmlsl.u8 q3, d0, d27 \n" // B - "vadd.u16 q3, q3, q15 \n" // +128 -> unsigned + "vmull.u8 q3, d2, d24 \n" // R + "vmlsl.u8 q3, d1, d28 \n" // G + "vmlsl.u8 q3, d0, d27 \n" // B + "vadd.u16 q3, q3, q15 \n" // +128 -> unsigned - "vqshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit U - "vqshrn.u16 d1, q3, #8 \n" // 16 bit to 8 bit V + "vqshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit U + "vqshrn.u16 d1, q3, #8 \n" // 16 bit to 8 bit V - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. - MEMACCESS(2) - "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15" - ); -} - -// 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. -void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) { - asm volatile ( - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - MEMACCESS(0) - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. - "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - MEMACCESS(0) - "vld4.8 {d8, d10, d12, d14}, [%0]! \n" // load 8 more ARGB pixels. - MEMACCESS(0) - "vld4.8 {d9, d11, d13, d15}, [%0]! \n" // load last 8 ARGB pixels. - "vpaddl.u8 q4, q4 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q5, q5 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q6, q6 \n" // R 16 bytes -> 8 shorts. - - "vpadd.u16 d0, d0, d1 \n" // B 16 shorts -> 8 shorts. - "vpadd.u16 d1, d8, d9 \n" // B - "vpadd.u16 d2, d2, d3 \n" // G 16 shorts -> 8 shorts. - "vpadd.u16 d3, d10, d11 \n" // G - "vpadd.u16 d4, d4, d5 \n" // R 16 shorts -> 8 shorts. - "vpadd.u16 d5, d12, d13 \n" // R - - "vrshr.u16 q0, q0, #1 \n" // 2x average - "vrshr.u16 q1, q1, #1 \n" - "vrshr.u16 q2, q2, #1 \n" - - "subs %3, %3, #32 \n" // 32 processed per loop. - "vmul.s16 q8, q0, q10 \n" // B - "vmls.s16 q8, q1, q11 \n" // G - "vmls.s16 q8, q2, q12 \n" // R - "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned - "vmul.s16 q9, q2, q10 \n" // R - "vmls.s16 q9, q1, q14 \n" // G - "vmls.s16 q9, q0, q13 \n" // B - "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned - "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U - "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. - MEMACCESS(2) - "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. + "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", + "q15"); } +// clang-format off // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -#define RGBTOUV(QB, QG, QR) \ - "vmul.s16 q8, " #QB ", q10 \n" /* B */ \ - "vmls.s16 q8, " #QG ", q11 \n" /* G */ \ - "vmls.s16 q8, " #QR ", q12 \n" /* R */ \ - "vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \ - "vmul.s16 q9, " #QR ", q10 \n" /* R */ \ - "vmls.s16 q9, " #QG ", q14 \n" /* G */ \ - "vmls.s16 q9, " #QB ", q13 \n" /* B */ \ - "vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \ - "vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \ - "vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */ +#define RGBTOUV(QB, QG, QR) \ + "vmul.s16 q8, " #QB ", q10 \n" /* B */ \ + "vmls.s16 q8, " #QG ", q11 \n" /* G */ \ + "vmls.s16 q8, " #QR ", q12 \n" /* R */ \ + "vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \ + "vmul.s16 q9, " #QR ", q10 \n" /* R */ \ + "vmls.s16 q9, " #QG ", q14 \n" /* G */ \ + "vmls.s16 q9, " #QB ", q13 \n" /* B */ \ + "vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \ + "vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \ + "vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */ +// clang-format on // TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr. -void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { +void ARGBToUVRow_NEON(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_argb "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient @@ -1468,17 +1335,13 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 - "1: \n" - MEMACCESS(0) + "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - MEMACCESS(0) "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - MEMACCESS(1) "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels. - MEMACCESS(1) "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels. "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. @@ -1490,9 +1353,7 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, "subs %4, %4, #16 \n" // 32 processed per loop. RGBTOUV(q0, q1, q2) - MEMACCESS(2) "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - MEMACCESS(3) "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_argb), // %0 @@ -1507,8 +1368,11 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, } // TODO(fbarchard): Subsample match C code. -void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { +void ARGBToUVJRow_NEON(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_argb "vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient @@ -1517,17 +1381,13 @@ void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, "vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient "vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 - "1: \n" - MEMACCESS(0) + "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - MEMACCESS(0) "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - MEMACCESS(1) "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels. - MEMACCESS(1) "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels. "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. @@ -1539,9 +1399,7 @@ void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, "subs %4, %4, #16 \n" // 32 processed per loop. RGBTOUV(q0, q1, q2) - MEMACCESS(2) "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - MEMACCESS(3) "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_argb), // %0 @@ -1555,8 +1413,11 @@ void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, ); } -void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width) { +void BGRAToUVRow_NEON(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_bgra "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient @@ -1565,17 +1426,13 @@ void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 - "1: \n" - MEMACCESS(0) + "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels. - MEMACCESS(0) "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels. "vpaddl.u8 q3, q3 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q2, q2 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // R 16 bytes -> 8 shorts. - MEMACCESS(1) "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more BGRA pixels. - MEMACCESS(1) "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 BGRA pixels. "vpadal.u8 q3, q7 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts. @@ -1587,9 +1444,7 @@ void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, "subs %4, %4, #16 \n" // 32 processed per loop. RGBTOUV(q3, q2, q1) - MEMACCESS(2) "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - MEMACCESS(3) "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_bgra), // %0 @@ -1603,8 +1458,11 @@ void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, ); } -void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width) { +void ABGRToUVRow_NEON(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_abgr "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient @@ -1613,17 +1471,13 @@ void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 - "1: \n" - MEMACCESS(0) + "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels. - MEMACCESS(0) "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels. "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. - MEMACCESS(1) "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels. - MEMACCESS(1) "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels. "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. @@ -1635,9 +1489,7 @@ void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, "subs %4, %4, #16 \n" // 32 processed per loop. RGBTOUV(q2, q1, q0) - MEMACCESS(2) "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - MEMACCESS(3) "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_abgr), // %0 @@ -1651,8 +1503,11 @@ void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, ); } -void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width) { +void RGBAToUVRow_NEON(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_rgba "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient @@ -1661,17 +1516,13 @@ void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 - "1: \n" - MEMACCESS(0) + "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels. - MEMACCESS(0) "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels. "vpaddl.u8 q0, q1 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q2 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q2, q3 \n" // R 16 bytes -> 8 shorts. - MEMACCESS(1) "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more RGBA pixels. - MEMACCESS(1) "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 RGBA pixels. "vpadal.u8 q0, q5 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts. @@ -1683,9 +1534,7 @@ void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, "subs %4, %4, #16 \n" // 32 processed per loop. RGBTOUV(q0, q1, q2) - MEMACCESS(2) "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - MEMACCESS(3) "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_rgba), // %0 @@ -1699,8 +1548,11 @@ void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, ); } -void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int width) { +void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_rgb24 "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient @@ -1709,17 +1561,13 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 - "1: \n" - MEMACCESS(0) + "1: \n" "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels. - MEMACCESS(0) "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels. "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - MEMACCESS(1) "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RGB24 pixels. - MEMACCESS(1) "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RGB24 pixels. "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. @@ -1731,9 +1579,7 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, "subs %4, %4, #16 \n" // 32 processed per loop. RGBTOUV(q0, q1, q2) - MEMACCESS(2) "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - MEMACCESS(3) "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_rgb24), // %0 @@ -1747,8 +1593,11 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, ); } -void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int width) { +void RAWToUVRow_NEON(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_raw "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient @@ -1757,17 +1606,13 @@ void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 - "1: \n" - MEMACCESS(0) + "1: \n" "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels. - MEMACCESS(0) "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels. "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. - MEMACCESS(1) "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RAW pixels. - MEMACCESS(1) "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RAW pixels. "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. @@ -1779,9 +1624,7 @@ void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, "subs %4, %4, #16 \n" // 32 processed per loop. RGBTOUV(q2, q1, q0) - MEMACCESS(2) "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - MEMACCESS(3) "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_raw), // %0 @@ -1796,875 +1639,815 @@ void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_argb - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. - RGB565TOARGB - "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. - "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. - "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // next 8 RGB565 pixels. - RGB565TOARGB - "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. - "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. - "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. +void RGB565ToUVRow_NEON(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "add %1, %0, %1 \n" // src_stride + src_argb + "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 + // coefficient + "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient + "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient + "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient + "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient + "vmov.u16 q15, #0x8080 \n" // 128.5 + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. + RGB565TOARGB + "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. + "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. + "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. + "vld1.8 {q0}, [%0]! \n" // next 8 RGB565 pixels. + RGB565TOARGB + "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. + "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. + "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. - MEMACCESS(1) - "vld1.8 {q0}, [%1]! \n" // load 8 RGB565 pixels. - RGB565TOARGB - "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. - "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. - "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. - MEMACCESS(1) - "vld1.8 {q0}, [%1]! \n" // next 8 RGB565 pixels. - RGB565TOARGB - "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. - "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. - "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. + "vld1.8 {q0}, [%1]! \n" // load 8 RGB565 pixels. + RGB565TOARGB + "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. + "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. + "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. + "vld1.8 {q0}, [%1]! \n" // next 8 RGB565 pixels. + RGB565TOARGB + "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. + "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. + "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. - "vrshr.u16 q4, q4, #1 \n" // 2x average - "vrshr.u16 q5, q5, #1 \n" - "vrshr.u16 q6, q6, #1 \n" + "vrshr.u16 q4, q4, #1 \n" // 2x average + "vrshr.u16 q5, q5, #1 \n" + "vrshr.u16 q6, q6, #1 \n" - "subs %4, %4, #16 \n" // 16 processed per loop. - "vmul.s16 q8, q4, q10 \n" // B - "vmls.s16 q8, q5, q11 \n" // G - "vmls.s16 q8, q6, q12 \n" // R - "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned - "vmul.s16 q9, q6, q10 \n" // R - "vmls.s16 q9, q5, q14 \n" // G - "vmls.s16 q9, q4, q13 \n" // B - "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned - "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U - "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V - MEMACCESS(2) - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - MEMACCESS(3) - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_rgb565), // %0 - "+r"(src_stride_rgb565), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + "subs %4, %4, #16 \n" // 16 processed per loop. + "vmul.s16 q8, q4, q10 \n" // B + "vmls.s16 q8, q5, q11 \n" // G + "vmls.s16 q8, q6, q12 \n" // R + "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned + "vmul.s16 q9, q6, q10 \n" // R + "vmls.s16 q9, q5, q14 \n" // G + "vmls.s16 q9, q4, q13 \n" // B + "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned + "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U + "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V + "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. + "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. + "bgt 1b \n" + : "+r"(src_rgb565), // %0 + "+r"(src_stride_rgb565), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", + "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_argb - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. - RGB555TOARGB - "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. - "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. - "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // next 8 ARGB1555 pixels. - RGB555TOARGB - "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. - "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. - "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. +void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "add %1, %0, %1 \n" // src_stride + src_argb + "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 + // coefficient + "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient + "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient + "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient + "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient + "vmov.u16 q15, #0x8080 \n" // 128.5 + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. + RGB555TOARGB + "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. + "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. + "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. + "vld1.8 {q0}, [%0]! \n" // next 8 ARGB1555 pixels. + RGB555TOARGB + "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. + "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. + "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. - MEMACCESS(1) - "vld1.8 {q0}, [%1]! \n" // load 8 ARGB1555 pixels. - RGB555TOARGB - "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. - "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. - "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. - MEMACCESS(1) - "vld1.8 {q0}, [%1]! \n" // next 8 ARGB1555 pixels. - RGB555TOARGB - "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. - "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. - "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. + "vld1.8 {q0}, [%1]! \n" // load 8 ARGB1555 pixels. + RGB555TOARGB + "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. + "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. + "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. + "vld1.8 {q0}, [%1]! \n" // next 8 ARGB1555 pixels. + RGB555TOARGB + "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. + "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. + "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. - "vrshr.u16 q4, q4, #1 \n" // 2x average - "vrshr.u16 q5, q5, #1 \n" - "vrshr.u16 q6, q6, #1 \n" + "vrshr.u16 q4, q4, #1 \n" // 2x average + "vrshr.u16 q5, q5, #1 \n" + "vrshr.u16 q6, q6, #1 \n" - "subs %4, %4, #16 \n" // 16 processed per loop. - "vmul.s16 q8, q4, q10 \n" // B - "vmls.s16 q8, q5, q11 \n" // G - "vmls.s16 q8, q6, q12 \n" // R - "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned - "vmul.s16 q9, q6, q10 \n" // R - "vmls.s16 q9, q5, q14 \n" // G - "vmls.s16 q9, q4, q13 \n" // B - "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned - "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U - "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V - MEMACCESS(2) - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - MEMACCESS(3) - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_argb1555), // %0 - "+r"(src_stride_argb1555), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + "subs %4, %4, #16 \n" // 16 processed per loop. + "vmul.s16 q8, q4, q10 \n" // B + "vmls.s16 q8, q5, q11 \n" // G + "vmls.s16 q8, q6, q12 \n" // R + "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned + "vmul.s16 q9, q6, q10 \n" // R + "vmls.s16 q9, q5, q14 \n" // G + "vmls.s16 q9, q4, q13 \n" // B + "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned + "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U + "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V + "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. + "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. + "bgt 1b \n" + : "+r"(src_argb1555), // %0 + "+r"(src_stride_argb1555), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", + "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_argb - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. - ARGB4444TOARGB - "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. - "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. - "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // next 8 ARGB4444 pixels. - ARGB4444TOARGB - "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. - "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. - "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. +void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "add %1, %0, %1 \n" // src_stride + src_argb + "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 + // coefficient + "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient + "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient + "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient + "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient + "vmov.u16 q15, #0x8080 \n" // 128.5 + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. + ARGB4444TOARGB + "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. + "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. + "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. + "vld1.8 {q0}, [%0]! \n" // next 8 ARGB4444 pixels. + ARGB4444TOARGB + "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. + "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. + "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. - MEMACCESS(1) - "vld1.8 {q0}, [%1]! \n" // load 8 ARGB4444 pixels. - ARGB4444TOARGB - "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. - "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. - "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. - MEMACCESS(1) - "vld1.8 {q0}, [%1]! \n" // next 8 ARGB4444 pixels. - ARGB4444TOARGB - "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. - "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. - "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. + "vld1.8 {q0}, [%1]! \n" // load 8 ARGB4444 pixels. + ARGB4444TOARGB + "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. + "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. + "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. + "vld1.8 {q0}, [%1]! \n" // next 8 ARGB4444 pixels. + ARGB4444TOARGB + "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. + "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. + "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. - "vrshr.u16 q4, q4, #1 \n" // 2x average - "vrshr.u16 q5, q5, #1 \n" - "vrshr.u16 q6, q6, #1 \n" + "vrshr.u16 q4, q4, #1 \n" // 2x average + "vrshr.u16 q5, q5, #1 \n" + "vrshr.u16 q6, q6, #1 \n" - "subs %4, %4, #16 \n" // 16 processed per loop. - "vmul.s16 q8, q4, q10 \n" // B - "vmls.s16 q8, q5, q11 \n" // G - "vmls.s16 q8, q6, q12 \n" // R - "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned - "vmul.s16 q9, q6, q10 \n" // R - "vmls.s16 q9, q5, q14 \n" // G - "vmls.s16 q9, q4, q13 \n" // B - "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned - "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U - "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V - MEMACCESS(2) - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - MEMACCESS(3) - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_argb4444), // %0 - "+r"(src_stride_argb4444), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); + "subs %4, %4, #16 \n" // 16 processed per loop. + "vmul.s16 q8, q4, q10 \n" // B + "vmls.s16 q8, q5, q11 \n" // G + "vmls.s16 q8, q6, q12 \n" // R + "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned + "vmul.s16 q9, q6, q10 \n" // R + "vmls.s16 q9, q5, q14 \n" // G + "vmls.s16 q9, q4, q13 \n" // B + "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned + "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U + "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V + "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. + "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. + "bgt 1b \n" + : "+r"(src_argb4444), // %0 + "+r"(src_stride_argb4444), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", + "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } -void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) { - asm volatile ( - "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d27, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - RGB565TOARGB - "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R - "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d27 \n" - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_rgb565), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" - ); +void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { + asm volatile( + "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d27, #16 \n" // Add 16 constant + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. + "subs %2, %2, #8 \n" // 8 processed per loop. + RGB565TOARGB + "vmull.u8 q2, d0, d24 \n" // B + "vmlal.u8 q2, d1, d25 \n" // G + "vmlal.u8 q2, d2, d26 \n" // R + "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d27 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_rgb565), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"); } -void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) { - asm volatile ( - "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d27, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - ARGB1555TOARGB - "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R - "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d27 \n" - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_argb1555), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" - ); +void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, + uint8_t* dst_y, + int width) { + asm volatile( + "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d27, #16 \n" // Add 16 constant + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. + "subs %2, %2, #8 \n" // 8 processed per loop. + ARGB1555TOARGB + "vmull.u8 q2, d0, d24 \n" // B + "vmlal.u8 q2, d1, d25 \n" // G + "vmlal.u8 q2, d2, d26 \n" // R + "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d27 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_argb1555), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"); } -void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) { - asm volatile ( - "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d27, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - ARGB4444TOARGB - "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R - "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d27 \n" - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_argb4444), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" - ); +void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, + uint8_t* dst_y, + int width) { + asm volatile( + "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d27, #16 \n" // Add 16 constant + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. + "subs %2, %2, #8 \n" // 8 processed per loop. + ARGB4444TOARGB + "vmull.u8 q2, d0, d24 \n" // B + "vmlal.u8 q2, d1, d25 \n" // G + "vmlal.u8 q2, d2, d26 \n" // R + "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d27 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_argb4444), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"); } -void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) { - asm volatile ( - "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d7, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q8, d1, d4 \n" // R - "vmlal.u8 q8, d2, d5 \n" // G - "vmlal.u8 q8, d3, d6 \n" // B - "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d7 \n" - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_bgra), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" - ); +void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) { + asm volatile( + "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d7, #16 \n" // Add 16 constant + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmull.u8 q8, d1, d4 \n" // R + "vmlal.u8 q8, d2, d5 \n" // G + "vmlal.u8 q8, d3, d6 \n" // B + "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d7 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_bgra), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); } -void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) { - asm volatile ( - "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d7, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q8, d0, d4 \n" // R - "vmlal.u8 q8, d1, d5 \n" // G - "vmlal.u8 q8, d2, d6 \n" // B - "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d7 \n" - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_abgr), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" - ); +void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) { + asm volatile( + "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d7, #16 \n" // Add 16 constant + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmull.u8 q8, d0, d4 \n" // R + "vmlal.u8 q8, d1, d5 \n" // G + "vmlal.u8 q8, d2, d6 \n" // B + "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d7 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_abgr), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); } -void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) { - asm volatile ( - "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d7, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q8, d1, d4 \n" // B - "vmlal.u8 q8, d2, d5 \n" // G - "vmlal.u8 q8, d3, d6 \n" // R - "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d7 \n" - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_rgba), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" - ); +void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) { + asm volatile( + "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d7, #16 \n" // Add 16 constant + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmull.u8 q8, d1, d4 \n" // B + "vmlal.u8 q8, d2, d5 \n" // G + "vmlal.u8 q8, d3, d6 \n" // R + "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d7 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_rgba), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); } -void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) { - asm volatile ( - "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d7, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q8, d0, d4 \n" // B - "vmlal.u8 q8, d1, d5 \n" // G - "vmlal.u8 q8, d2, d6 \n" // R - "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d7 \n" - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_rgb24), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" - ); +void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width) { + asm volatile( + "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d7, #16 \n" // Add 16 constant + "1: \n" + "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmull.u8 q8, d0, d4 \n" // B + "vmlal.u8 q8, d1, d5 \n" // G + "vmlal.u8 q8, d2, d6 \n" // R + "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d7 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_rgb24), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); } -void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) { - asm volatile ( - "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d7, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q8, d0, d4 \n" // B - "vmlal.u8 q8, d1, d5 \n" // G - "vmlal.u8 q8, d2, d6 \n" // R - "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d7 \n" - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_raw), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" - ); +void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) { + asm volatile( + "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient + "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient + "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient + "vmov.u8 d7, #16 \n" // Add 16 constant + "1: \n" + "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmull.u8 q8, d0, d4 \n" // B + "vmlal.u8 q8, d1, d5 \n" // G + "vmlal.u8 q8, d2, d6 \n" // R + "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y + "vqadd.u8 d0, d7 \n" + "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "bgt 1b \n" + : "+r"(src_raw), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); } // Bilinear filter 16x2 -> 16x1 -void InterpolateRow_NEON(uint8* dst_ptr, - const uint8* src_ptr, ptrdiff_t src_stride, - int dst_width, int source_y_fraction) { +void InterpolateRow_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, + int source_y_fraction) { int y1_fraction = source_y_fraction; - asm volatile ( - "cmp %4, #0 \n" - "beq 100f \n" - "add %2, %1 \n" - "cmp %4, #128 \n" - "beq 50f \n" + asm volatile( + "cmp %4, #0 \n" + "beq 100f \n" + "add %2, %1 \n" + "cmp %4, #128 \n" + "beq 50f \n" - "vdup.8 d5, %4 \n" - "rsb %4, #256 \n" - "vdup.8 d4, %4 \n" - // General purpose row blend. - "1: \n" - MEMACCESS(1) - "vld1.8 {q0}, [%1]! \n" - MEMACCESS(2) - "vld1.8 {q1}, [%2]! \n" - "subs %3, %3, #16 \n" - "vmull.u8 q13, d0, d4 \n" - "vmull.u8 q14, d1, d4 \n" - "vmlal.u8 q13, d2, d5 \n" - "vmlal.u8 q14, d3, d5 \n" - "vrshrn.u16 d0, q13, #8 \n" - "vrshrn.u16 d1, q14, #8 \n" - MEMACCESS(0) - "vst1.8 {q0}, [%0]! \n" - "bgt 1b \n" - "b 99f \n" + "vdup.8 d5, %4 \n" + "rsb %4, #256 \n" + "vdup.8 d4, %4 \n" + // General purpose row blend. + "1: \n" + "vld1.8 {q0}, [%1]! \n" + "vld1.8 {q1}, [%2]! \n" + "subs %3, %3, #16 \n" + "vmull.u8 q13, d0, d4 \n" + "vmull.u8 q14, d1, d4 \n" + "vmlal.u8 q13, d2, d5 \n" + "vmlal.u8 q14, d3, d5 \n" + "vrshrn.u16 d0, q13, #8 \n" + "vrshrn.u16 d1, q14, #8 \n" + "vst1.8 {q0}, [%0]! \n" + "bgt 1b \n" + "b 99f \n" - // Blend 50 / 50. - "50: \n" - MEMACCESS(1) - "vld1.8 {q0}, [%1]! \n" - MEMACCESS(2) - "vld1.8 {q1}, [%2]! \n" - "subs %3, %3, #16 \n" - "vrhadd.u8 q0, q1 \n" - MEMACCESS(0) - "vst1.8 {q0}, [%0]! \n" - "bgt 50b \n" - "b 99f \n" + // Blend 50 / 50. + "50: \n" + "vld1.8 {q0}, [%1]! \n" + "vld1.8 {q1}, [%2]! \n" + "subs %3, %3, #16 \n" + "vrhadd.u8 q0, q1 \n" + "vst1.8 {q0}, [%0]! \n" + "bgt 50b \n" + "b 99f \n" - // Blend 100 / 0 - Copy row unchanged. - "100: \n" - MEMACCESS(1) - "vld1.8 {q0}, [%1]! \n" - "subs %3, %3, #16 \n" - MEMACCESS(0) - "vst1.8 {q0}, [%0]! \n" - "bgt 100b \n" + // Blend 100 / 0 - Copy row unchanged. + "100: \n" + "vld1.8 {q0}, [%1]! \n" + "subs %3, %3, #16 \n" + "vst1.8 {q0}, [%0]! \n" + "bgt 100b \n" - "99: \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(src_stride), // %2 - "+r"(dst_width), // %3 - "+r"(y1_fraction) // %4 - : - : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14" - ); + "99: \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(src_stride), // %2 + "+r"(dst_width), // %3 + "+r"(y1_fraction) // %4 + : + : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14"); } // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr -void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - "subs %3, #8 \n" - "blt 89f \n" - // Blend 8 pixels. - "8: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB0. - MEMACCESS(1) - "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 pixels of ARGB1. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vmull.u8 q10, d4, d3 \n" // db * a - "vmull.u8 q11, d5, d3 \n" // dg * a - "vmull.u8 q12, d6, d3 \n" // dr * a - "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8 - "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8 - "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8 - "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256 - "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256 - "vqadd.u8 q0, q0, q2 \n" // + sbg - "vqadd.u8 d2, d2, d6 \n" // + sr - "vmov.u8 d3, #255 \n" // a = 255 - MEMACCESS(2) - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 pixels of ARGB. - "bge 8b \n" +void ARGBBlendRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( + "subs %3, #8 \n" + "blt 89f \n" + // Blend 8 pixels. + "8: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB0. + "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 pixels of ARGB1. + "subs %3, %3, #8 \n" // 8 processed per loop. + "vmull.u8 q10, d4, d3 \n" // db * a + "vmull.u8 q11, d5, d3 \n" // dg * a + "vmull.u8 q12, d6, d3 \n" // dr * a + "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8 + "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8 + "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8 + "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256 + "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256 + "vqadd.u8 q0, q0, q2 \n" // + sbg + "vqadd.u8 d2, d2, d6 \n" // + sr + "vmov.u8 d3, #255 \n" // a = 255 + "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 pixels of ARGB. + "bge 8b \n" - "89: \n" - "adds %3, #8-1 \n" - "blt 99f \n" + "89: \n" + "adds %3, #8-1 \n" + "blt 99f \n" - // Blend 1 pixels. - "1: \n" - MEMACCESS(0) - "vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n" // load 1 pixel ARGB0. - MEMACCESS(1) - "vld4.8 {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n" // load 1 pixel ARGB1. - "subs %3, %3, #1 \n" // 1 processed per loop. - "vmull.u8 q10, d4, d3 \n" // db * a - "vmull.u8 q11, d5, d3 \n" // dg * a - "vmull.u8 q12, d6, d3 \n" // dr * a - "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8 - "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8 - "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8 - "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256 - "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256 - "vqadd.u8 q0, q0, q2 \n" // + sbg - "vqadd.u8 d2, d2, d6 \n" // + sr - "vmov.u8 d3, #255 \n" // a = 255 - MEMACCESS(2) - "vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n" // store 1 pixel. - "bge 1b \n" + // Blend 1 pixels. + "1: \n" + "vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n" // load 1 pixel ARGB0. + "vld4.8 {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n" // load 1 pixel ARGB1. + "subs %3, %3, #1 \n" // 1 processed per loop. + "vmull.u8 q10, d4, d3 \n" // db * a + "vmull.u8 q11, d5, d3 \n" // dg * a + "vmull.u8 q12, d6, d3 \n" // dr * a + "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8 + "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8 + "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8 + "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256 + "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256 + "vqadd.u8 q0, q0, q2 \n" // + sbg + "vqadd.u8 d2, d2, d6 \n" // + sr + "vmov.u8 d3, #255 \n" // a = 255 + "vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n" // store 1 pixel. + "bge 1b \n" - "99: \n" + "99: \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12" - ); + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12"); } // Attenuate 8 pixels at a time. -void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - // Attenuate 8 pixels. - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q10, d0, d3 \n" // b * a - "vmull.u8 q11, d1, d3 \n" // g * a - "vmull.u8 q12, d2, d3 \n" // r * a - "vqrshrn.u16 d0, q10, #8 \n" // b >>= 8 - "vqrshrn.u16 d1, q11, #8 \n" // g >>= 8 - "vqrshrn.u16 d2, q12, #8 \n" // r >>= 8 - MEMACCESS(1) - "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1", "q10", "q11", "q12" - ); +void ARGBAttenuateRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { + asm volatile( + // Attenuate 8 pixels. + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmull.u8 q10, d0, d3 \n" // b * a + "vmull.u8 q11, d1, d3 \n" // g * a + "vmull.u8 q12, d2, d3 \n" // r * a + "vqrshrn.u16 d0, q10, #8 \n" // b >>= 8 + "vqrshrn.u16 d1, q11, #8 \n" // g >>= 8 + "vqrshrn.u16 d2, q12, #8 \n" // r >>= 8 + "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q10", "q11", "q12"); } // Quantize 8 ARGB pixels (32 bytes). // dst = (dst * scale >> 16) * interval_size + interval_offset; -void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width) { - asm volatile ( - "vdup.u16 q8, %2 \n" - "vshr.u16 q8, q8, #1 \n" // scale >>= 1 - "vdup.u16 q9, %3 \n" // interval multiply. - "vdup.u16 q10, %4 \n" // interval add +void ARGBQuantizeRow_NEON(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width) { + asm volatile( + "vdup.u16 q8, %2 \n" + "vshr.u16 q8, q8, #1 \n" // scale >>= 1 + "vdup.u16 q9, %3 \n" // interval multiply. + "vdup.u16 q10, %4 \n" // interval add - // 8 pixel loop. - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d2, d4, d6}, [%0] \n" // load 8 pixels of ARGB. - "subs %1, %1, #8 \n" // 8 processed per loop. - "vmovl.u8 q0, d0 \n" // b (0 .. 255) - "vmovl.u8 q1, d2 \n" - "vmovl.u8 q2, d4 \n" - "vqdmulh.s16 q0, q0, q8 \n" // b * scale - "vqdmulh.s16 q1, q1, q8 \n" // g - "vqdmulh.s16 q2, q2, q8 \n" // r - "vmul.u16 q0, q0, q9 \n" // b * interval_size - "vmul.u16 q1, q1, q9 \n" // g - "vmul.u16 q2, q2, q9 \n" // r - "vadd.u16 q0, q0, q10 \n" // b + interval_offset - "vadd.u16 q1, q1, q10 \n" // g - "vadd.u16 q2, q2, q10 \n" // r - "vqmovn.u16 d0, q0 \n" - "vqmovn.u16 d2, q1 \n" - "vqmovn.u16 d4, q2 \n" - MEMACCESS(0) - "vst4.8 {d0, d2, d4, d6}, [%0]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(dst_argb), // %0 - "+r"(width) // %1 - : "r"(scale), // %2 - "r"(interval_size), // %3 - "r"(interval_offset) // %4 - : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10" - ); + // 8 pixel loop. + "1: \n" + "vld4.8 {d0, d2, d4, d6}, [%0] \n" // load 8 pixels of ARGB. + "subs %1, %1, #8 \n" // 8 processed per loop. + "vmovl.u8 q0, d0 \n" // b (0 .. 255) + "vmovl.u8 q1, d2 \n" + "vmovl.u8 q2, d4 \n" + "vqdmulh.s16 q0, q0, q8 \n" // b * scale + "vqdmulh.s16 q1, q1, q8 \n" // g + "vqdmulh.s16 q2, q2, q8 \n" // r + "vmul.u16 q0, q0, q9 \n" // b * interval_size + "vmul.u16 q1, q1, q9 \n" // g + "vmul.u16 q2, q2, q9 \n" // r + "vadd.u16 q0, q0, q10 \n" // b + interval_offset + "vadd.u16 q1, q1, q10 \n" // g + "vadd.u16 q2, q2, q10 \n" // r + "vqmovn.u16 d0, q0 \n" + "vqmovn.u16 d2, q1 \n" + "vqmovn.u16 d4, q2 \n" + "vst4.8 {d0, d2, d4, d6}, [%0]! \n" // store 8 pixels of ARGB. + "bgt 1b \n" + : "+r"(dst_argb), // %0 + "+r"(width) // %1 + : "r"(scale), // %2 + "r"(interval_size), // %3 + "r"(interval_offset) // %4 + : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10"); } // Shade 8 pixels at a time by specified value. // NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8. // Rounding in vqrdmulh does +1 to high if high bit of low s16 is set. -void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value) { - asm volatile ( - "vdup.u32 q0, %3 \n" // duplicate scale value. - "vzip.u8 d0, d1 \n" // d0 aarrggbb. - "vshr.u16 q0, q0, #1 \n" // scale / 2. +void ARGBShadeRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value) { + asm volatile( + "vdup.u32 q0, %3 \n" // duplicate scale value. + "vzip.u8 d0, d1 \n" // d0 aarrggbb. + "vshr.u16 q0, q0, #1 \n" // scale / 2. - // 8 pixel loop. - "1: \n" - MEMACCESS(0) - "vld4.8 {d20, d22, d24, d26}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmovl.u8 q10, d20 \n" // b (0 .. 255) - "vmovl.u8 q11, d22 \n" - "vmovl.u8 q12, d24 \n" - "vmovl.u8 q13, d26 \n" - "vqrdmulh.s16 q10, q10, d0[0] \n" // b * scale * 2 - "vqrdmulh.s16 q11, q11, d0[1] \n" // g - "vqrdmulh.s16 q12, q12, d0[2] \n" // r - "vqrdmulh.s16 q13, q13, d0[3] \n" // a - "vqmovn.u16 d20, q10 \n" - "vqmovn.u16 d22, q11 \n" - "vqmovn.u16 d24, q12 \n" - "vqmovn.u16 d26, q13 \n" - MEMACCESS(1) - "vst4.8 {d20, d22, d24, d26}, [%1]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(value) // %3 - : "cc", "memory", "q0", "q10", "q11", "q12", "q13" - ); + // 8 pixel loop. + "1: \n" + "vld4.8 {d20, d22, d24, d26}, [%0]! \n" // load 8 pixels of ARGB. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmovl.u8 q10, d20 \n" // b (0 .. 255) + "vmovl.u8 q11, d22 \n" + "vmovl.u8 q12, d24 \n" + "vmovl.u8 q13, d26 \n" + "vqrdmulh.s16 q10, q10, d0[0] \n" // b * scale * 2 + "vqrdmulh.s16 q11, q11, d0[1] \n" // g + "vqrdmulh.s16 q12, q12, d0[2] \n" // r + "vqrdmulh.s16 q13, q13, d0[3] \n" // a + "vqmovn.u16 d20, q10 \n" + "vqmovn.u16 d22, q11 \n" + "vqmovn.u16 d24, q12 \n" + "vqmovn.u16 d26, q13 \n" + "vst4.8 {d20, d22, d24, d26}, [%1]! \n" // store 8 pixels of ARGB. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(value) // %3 + : "cc", "memory", "q0", "q10", "q11", "q12", "q13"); } // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels // Similar to ARGBToYJ but stores ARGB. // C code is (15 * b + 75 * g + 38 * r + 64) >> 7; -void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient - "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient - "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R - "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit B - "vmov d1, d0 \n" // G - "vmov d2, d0 \n" // R - MEMACCESS(1) - "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q12", "q13" - ); +void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) { + asm volatile( + "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient + "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient + "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmull.u8 q2, d0, d24 \n" // B + "vmlal.u8 q2, d1, d25 \n" // G + "vmlal.u8 q2, d2, d26 \n" // R + "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit B + "vmov d1, d0 \n" // G + "vmov d2, d0 \n" // R + "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 ARGB pixels. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "q0", "q1", "q2", "q12", "q13"); } // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. // b = (r * 35 + g * 68 + b * 17) >> 7 // g = (r * 45 + g * 88 + b * 22) >> 7 // r = (r * 50 + g * 98 + b * 24) >> 7 -void ARGBSepiaRow_NEON(uint8* dst_argb, int width) { - asm volatile ( - "vmov.u8 d20, #17 \n" // BB coefficient - "vmov.u8 d21, #68 \n" // BG coefficient - "vmov.u8 d22, #35 \n" // BR coefficient - "vmov.u8 d24, #22 \n" // GB coefficient - "vmov.u8 d25, #88 \n" // GG coefficient - "vmov.u8 d26, #45 \n" // GR coefficient - "vmov.u8 d28, #24 \n" // BB coefficient - "vmov.u8 d29, #98 \n" // BG coefficient - "vmov.u8 d30, #50 \n" // BR coefficient - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0] \n" // load 8 ARGB pixels. - "subs %1, %1, #8 \n" // 8 processed per loop. - "vmull.u8 q2, d0, d20 \n" // B to Sepia B - "vmlal.u8 q2, d1, d21 \n" // G - "vmlal.u8 q2, d2, d22 \n" // R - "vmull.u8 q3, d0, d24 \n" // B to Sepia G - "vmlal.u8 q3, d1, d25 \n" // G - "vmlal.u8 q3, d2, d26 \n" // R - "vmull.u8 q8, d0, d28 \n" // B to Sepia R - "vmlal.u8 q8, d1, d29 \n" // G - "vmlal.u8 q8, d2, d30 \n" // R - "vqshrn.u16 d0, q2, #7 \n" // 16 bit to 8 bit B - "vqshrn.u16 d1, q3, #7 \n" // 16 bit to 8 bit G - "vqshrn.u16 d2, q8, #7 \n" // 16 bit to 8 bit R - MEMACCESS(0) - "vst4.8 {d0, d1, d2, d3}, [%0]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - : "+r"(dst_argb), // %0 - "+r"(width) // %1 - : - : "cc", "memory", "q0", "q1", "q2", "q3", - "q10", "q11", "q12", "q13", "q14", "q15" - ); +void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width) { + asm volatile( + "vmov.u8 d20, #17 \n" // BB coefficient + "vmov.u8 d21, #68 \n" // BG coefficient + "vmov.u8 d22, #35 \n" // BR coefficient + "vmov.u8 d24, #22 \n" // GB coefficient + "vmov.u8 d25, #88 \n" // GG coefficient + "vmov.u8 d26, #45 \n" // GR coefficient + "vmov.u8 d28, #24 \n" // BB coefficient + "vmov.u8 d29, #98 \n" // BG coefficient + "vmov.u8 d30, #50 \n" // BR coefficient + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0] \n" // load 8 ARGB pixels. + "subs %1, %1, #8 \n" // 8 processed per loop. + "vmull.u8 q2, d0, d20 \n" // B to Sepia B + "vmlal.u8 q2, d1, d21 \n" // G + "vmlal.u8 q2, d2, d22 \n" // R + "vmull.u8 q3, d0, d24 \n" // B to Sepia G + "vmlal.u8 q3, d1, d25 \n" // G + "vmlal.u8 q3, d2, d26 \n" // R + "vmull.u8 q8, d0, d28 \n" // B to Sepia R + "vmlal.u8 q8, d1, d29 \n" // G + "vmlal.u8 q8, d2, d30 \n" // R + "vqshrn.u16 d0, q2, #7 \n" // 16 bit to 8 bit B + "vqshrn.u16 d1, q3, #7 \n" // 16 bit to 8 bit G + "vqshrn.u16 d2, q8, #7 \n" // 16 bit to 8 bit R + "vst4.8 {d0, d1, d2, d3}, [%0]! \n" // store 8 ARGB pixels. + "bgt 1b \n" + : "+r"(dst_argb), // %0 + "+r"(width) // %1 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12", "q13", + "q14", "q15"); } // Tranform 8 ARGB pixels (32 bytes) with color matrix. // TODO(fbarchard): Was same as Sepia except matrix is provided. This function // needs to saturate. Consider doing a non-saturating version. -void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width) { - asm volatile ( - MEMACCESS(3) - "vld1.8 {q2}, [%3] \n" // load 3 ARGB vectors. - "vmovl.s8 q0, d4 \n" // B,G coefficients s16. - "vmovl.s8 q1, d5 \n" // R,A coefficients s16. +void ARGBColorMatrixRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width) { + asm volatile( + "vld1.8 {q2}, [%3] \n" // load 3 ARGB vectors. + "vmovl.s8 q0, d4 \n" // B,G coefficients s16. + "vmovl.s8 q1, d5 \n" // R,A coefficients s16. - "1: \n" - MEMACCESS(0) - "vld4.8 {d16, d18, d20, d22}, [%0]! \n" // load 8 ARGB pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmovl.u8 q8, d16 \n" // b (0 .. 255) 16 bit - "vmovl.u8 q9, d18 \n" // g - "vmovl.u8 q10, d20 \n" // r - "vmovl.u8 q11, d22 \n" // a - "vmul.s16 q12, q8, d0[0] \n" // B = B * Matrix B - "vmul.s16 q13, q8, d1[0] \n" // G = B * Matrix G - "vmul.s16 q14, q8, d2[0] \n" // R = B * Matrix R - "vmul.s16 q15, q8, d3[0] \n" // A = B * Matrix A - "vmul.s16 q4, q9, d0[1] \n" // B += G * Matrix B - "vmul.s16 q5, q9, d1[1] \n" // G += G * Matrix G - "vmul.s16 q6, q9, d2[1] \n" // R += G * Matrix R - "vmul.s16 q7, q9, d3[1] \n" // A += G * Matrix A - "vqadd.s16 q12, q12, q4 \n" // Accumulate B - "vqadd.s16 q13, q13, q5 \n" // Accumulate G - "vqadd.s16 q14, q14, q6 \n" // Accumulate R - "vqadd.s16 q15, q15, q7 \n" // Accumulate A - "vmul.s16 q4, q10, d0[2] \n" // B += R * Matrix B - "vmul.s16 q5, q10, d1[2] \n" // G += R * Matrix G - "vmul.s16 q6, q10, d2[2] \n" // R += R * Matrix R - "vmul.s16 q7, q10, d3[2] \n" // A += R * Matrix A - "vqadd.s16 q12, q12, q4 \n" // Accumulate B - "vqadd.s16 q13, q13, q5 \n" // Accumulate G - "vqadd.s16 q14, q14, q6 \n" // Accumulate R - "vqadd.s16 q15, q15, q7 \n" // Accumulate A - "vmul.s16 q4, q11, d0[3] \n" // B += A * Matrix B - "vmul.s16 q5, q11, d1[3] \n" // G += A * Matrix G - "vmul.s16 q6, q11, d2[3] \n" // R += A * Matrix R - "vmul.s16 q7, q11, d3[3] \n" // A += A * Matrix A - "vqadd.s16 q12, q12, q4 \n" // Accumulate B - "vqadd.s16 q13, q13, q5 \n" // Accumulate G - "vqadd.s16 q14, q14, q6 \n" // Accumulate R - "vqadd.s16 q15, q15, q7 \n" // Accumulate A - "vqshrun.s16 d16, q12, #6 \n" // 16 bit to 8 bit B - "vqshrun.s16 d18, q13, #6 \n" // 16 bit to 8 bit G - "vqshrun.s16 d20, q14, #6 \n" // 16 bit to 8 bit R - "vqshrun.s16 d22, q15, #6 \n" // 16 bit to 8 bit A - MEMACCESS(1) - "vst4.8 {d16, d18, d20, d22}, [%1]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(matrix_argb) // %3 - : "cc", "memory", "q0", "q1", "q2", "q4", "q5", "q6", "q7", "q8", "q9", - "q10", "q11", "q12", "q13", "q14", "q15" - ); + "1: \n" + "vld4.8 {d16, d18, d20, d22}, [%0]! \n" // load 8 ARGB pixels. + "subs %2, %2, #8 \n" // 8 processed per loop. + "vmovl.u8 q8, d16 \n" // b (0 .. 255) 16 bit + "vmovl.u8 q9, d18 \n" // g + "vmovl.u8 q10, d20 \n" // r + "vmovl.u8 q11, d22 \n" // a + "vmul.s16 q12, q8, d0[0] \n" // B = B * Matrix B + "vmul.s16 q13, q8, d1[0] \n" // G = B * Matrix G + "vmul.s16 q14, q8, d2[0] \n" // R = B * Matrix R + "vmul.s16 q15, q8, d3[0] \n" // A = B * Matrix A + "vmul.s16 q4, q9, d0[1] \n" // B += G * Matrix B + "vmul.s16 q5, q9, d1[1] \n" // G += G * Matrix G + "vmul.s16 q6, q9, d2[1] \n" // R += G * Matrix R + "vmul.s16 q7, q9, d3[1] \n" // A += G * Matrix A + "vqadd.s16 q12, q12, q4 \n" // Accumulate B + "vqadd.s16 q13, q13, q5 \n" // Accumulate G + "vqadd.s16 q14, q14, q6 \n" // Accumulate R + "vqadd.s16 q15, q15, q7 \n" // Accumulate A + "vmul.s16 q4, q10, d0[2] \n" // B += R * Matrix B + "vmul.s16 q5, q10, d1[2] \n" // G += R * Matrix G + "vmul.s16 q6, q10, d2[2] \n" // R += R * Matrix R + "vmul.s16 q7, q10, d3[2] \n" // A += R * Matrix A + "vqadd.s16 q12, q12, q4 \n" // Accumulate B + "vqadd.s16 q13, q13, q5 \n" // Accumulate G + "vqadd.s16 q14, q14, q6 \n" // Accumulate R + "vqadd.s16 q15, q15, q7 \n" // Accumulate A + "vmul.s16 q4, q11, d0[3] \n" // B += A * Matrix B + "vmul.s16 q5, q11, d1[3] \n" // G += A * Matrix G + "vmul.s16 q6, q11, d2[3] \n" // R += A * Matrix R + "vmul.s16 q7, q11, d3[3] \n" // A += A * Matrix A + "vqadd.s16 q12, q12, q4 \n" // Accumulate B + "vqadd.s16 q13, q13, q5 \n" // Accumulate G + "vqadd.s16 q14, q14, q6 \n" // Accumulate R + "vqadd.s16 q15, q15, q7 \n" // Accumulate A + "vqshrun.s16 d16, q12, #6 \n" // 16 bit to 8 bit B + "vqshrun.s16 d18, q13, #6 \n" // 16 bit to 8 bit G + "vqshrun.s16 d20, q14, #6 \n" // 16 bit to 8 bit R + "vqshrun.s16 d22, q15, #6 \n" // 16 bit to 8 bit A + "vst4.8 {d16, d18, d20, d22}, [%1]! \n" // store 8 ARGB pixels. + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(matrix_argb) // %3 + : "cc", "memory", "q0", "q1", "q2", "q4", "q5", "q6", "q7", "q8", "q9", + "q10", "q11", "q12", "q13", "q14", "q15"); } // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. -void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 8 pixel loop. - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - MEMACCESS(1) - "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // load 8 more ARGB pixels. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vmull.u8 q0, d0, d1 \n" // multiply B - "vmull.u8 q1, d2, d3 \n" // multiply G - "vmull.u8 q2, d4, d5 \n" // multiply R - "vmull.u8 q3, d6, d7 \n" // multiply A - "vrshrn.u16 d0, q0, #8 \n" // 16 bit to 8 bit B - "vrshrn.u16 d1, q1, #8 \n" // 16 bit to 8 bit G - "vrshrn.u16 d2, q2, #8 \n" // 16 bit to 8 bit R - "vrshrn.u16 d3, q3, #8 \n" // 16 bit to 8 bit A - MEMACCESS(2) - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3" - ); +void ARGBMultiplyRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( + // 8 pixel loop. + "1: \n" + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. + "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // load 8 more ARGB + "subs %3, %3, #8 \n" // 8 processed per loop. + "vmull.u8 q0, d0, d1 \n" // multiply B + "vmull.u8 q1, d2, d3 \n" // multiply G + "vmull.u8 q2, d4, d5 \n" // multiply R + "vmull.u8 q3, d6, d7 \n" // multiply A + "vrshrn.u16 d0, q0, #8 \n" // 16 bit to 8 bit B + "vrshrn.u16 d1, q1, #8 \n" // 16 bit to 8 bit G + "vrshrn.u16 d2, q2, #8 \n" // 16 bit to 8 bit R + "vrshrn.u16 d3, q3, #8 \n" // 16 bit to 8 bit A + "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. + "bgt 1b \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "q0", "q1", "q2", "q3"); } // Add 2 rows of ARGB pixels together, 8 pixels at a time. -void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 8 pixel loop. - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. - MEMACCESS(1) - "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vqadd.u8 q0, q0, q2 \n" // add B, G - "vqadd.u8 q1, q1, q3 \n" // add R, A - MEMACCESS(2) - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3" - ); +void ARGBAddRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( + // 8 pixel loop. + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. + "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB + "subs %3, %3, #8 \n" // 8 processed per loop. + "vqadd.u8 q0, q0, q2 \n" // add B, G + "vqadd.u8 q1, q1, q3 \n" // add R, A + "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. + "bgt 1b \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "q0", "q1", "q2", "q3"); } // Subtract 2 rows of ARGB pixels, 8 pixels at a time. -void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 8 pixel loop. - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. - MEMACCESS(1) - "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vqsub.u8 q0, q0, q2 \n" // subtract B, G - "vqsub.u8 q1, q1, q3 \n" // subtract R, A - MEMACCESS(2) - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3" - ); +void ARGBSubtractRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( + // 8 pixel loop. + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. + "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB + "subs %3, %3, #8 \n" // 8 processed per loop. + "vqsub.u8 q0, q0, q2 \n" // subtract B, G + "vqsub.u8 q1, q1, q3 \n" // subtract R, A + "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. + "bgt 1b \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "q0", "q1", "q2", "q3"); } // Adds Sobel X and Sobel Y and stores Sobel into ARGB. @@ -2672,54 +2455,50 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1, // R = Sobel // G = Sobel // B = Sobel -void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { - asm volatile ( - "vmov.u8 d3, #255 \n" // alpha - // 8 pixel loop. - "1: \n" - MEMACCESS(0) - "vld1.8 {d0}, [%0]! \n" // load 8 sobelx. - MEMACCESS(1) - "vld1.8 {d1}, [%1]! \n" // load 8 sobely. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vqadd.u8 d0, d0, d1 \n" // add - "vmov.u8 d1, d0 \n" - "vmov.u8 d2, d0 \n" - MEMACCESS(2) - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1" - ); +void SobelRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width) { + asm volatile( + "vmov.u8 d3, #255 \n" // alpha + // 8 pixel loop. + "1: \n" + "vld1.8 {d0}, [%0]! \n" // load 8 sobelx. + "vld1.8 {d1}, [%1]! \n" // load 8 sobely. + "subs %3, %3, #8 \n" // 8 processed per loop. + "vqadd.u8 d0, d0, d1 \n" // add + "vmov.u8 d1, d0 \n" + "vmov.u8 d2, d0 \n" + "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. + "bgt 1b \n" + : "+r"(src_sobelx), // %0 + "+r"(src_sobely), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "q0", "q1"); } // Adds Sobel X and Sobel Y and stores Sobel into plane. -void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width) { - asm volatile ( - // 16 pixel loop. - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // load 16 sobelx. - MEMACCESS(1) - "vld1.8 {q1}, [%1]! \n" // load 16 sobely. - "subs %3, %3, #16 \n" // 16 processed per loop. - "vqadd.u8 q0, q0, q1 \n" // add - MEMACCESS(2) - "vst1.8 {q0}, [%2]! \n" // store 16 pixels. - "bgt 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_y), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1" - ); +void SobelToPlaneRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, + int width) { + asm volatile( + // 16 pixel loop. + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load 16 sobelx. + "vld1.8 {q1}, [%1]! \n" // load 16 sobely. + "subs %3, %3, #16 \n" // 16 processed per loop. + "vqadd.u8 q0, q0, q1 \n" // add + "vst1.8 {q0}, [%2]! \n" // store 16 pixels. + "bgt 1b \n" + : "+r"(src_sobelx), // %0 + "+r"(src_sobely), // %1 + "+r"(dst_y), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "q0", "q1"); } // Mixes Sobel X, Sobel Y and Sobel into ARGB. @@ -2727,115 +2506,186 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, // R = Sobel X // G = Sobel // B = Sobel Y -void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { - asm volatile ( - "vmov.u8 d3, #255 \n" // alpha - // 8 pixel loop. - "1: \n" - MEMACCESS(0) - "vld1.8 {d2}, [%0]! \n" // load 8 sobelx. - MEMACCESS(1) - "vld1.8 {d0}, [%1]! \n" // load 8 sobely. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vqadd.u8 d1, d0, d2 \n" // add - MEMACCESS(2) - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1" - ); +void SobelXYRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width) { + asm volatile( + "vmov.u8 d3, #255 \n" // alpha + // 8 pixel loop. + "1: \n" + "vld1.8 {d2}, [%0]! \n" // load 8 sobelx. + "vld1.8 {d0}, [%1]! \n" // load 8 sobely. + "subs %3, %3, #8 \n" // 8 processed per loop. + "vqadd.u8 d1, d0, d2 \n" // add + "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. + "bgt 1b \n" + : "+r"(src_sobelx), // %0 + "+r"(src_sobely), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "q0", "q1"); } // SobelX as a matrix is // -1 0 1 // -2 0 2 // -1 0 1 -void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld1.8 {d0}, [%0],%5 \n" // top - MEMACCESS(0) - "vld1.8 {d1}, [%0],%6 \n" - "vsubl.u8 q0, d0, d1 \n" - MEMACCESS(1) - "vld1.8 {d2}, [%1],%5 \n" // center * 2 - MEMACCESS(1) - "vld1.8 {d3}, [%1],%6 \n" - "vsubl.u8 q1, d2, d3 \n" - "vadd.s16 q0, q0, q1 \n" - "vadd.s16 q0, q0, q1 \n" - MEMACCESS(2) - "vld1.8 {d2}, [%2],%5 \n" // bottom - MEMACCESS(2) - "vld1.8 {d3}, [%2],%6 \n" - "subs %4, %4, #8 \n" // 8 pixels - "vsubl.u8 q1, d2, d3 \n" - "vadd.s16 q0, q0, q1 \n" - "vabs.s16 q0, q0 \n" - "vqmovn.u16 d0, q0 \n" - MEMACCESS(3) - "vst1.8 {d0}, [%3]! \n" // store 8 sobelx - "bgt 1b \n" - : "+r"(src_y0), // %0 - "+r"(src_y1), // %1 - "+r"(src_y2), // %2 - "+r"(dst_sobelx), // %3 - "+r"(width) // %4 - : "r"(2), // %5 - "r"(6) // %6 - : "cc", "memory", "q0", "q1" // Clobber List - ); +void SobelXRow_NEON(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int width) { + asm volatile( + "1: \n" + "vld1.8 {d0}, [%0],%5 \n" // top + "vld1.8 {d1}, [%0],%6 \n" + "vsubl.u8 q0, d0, d1 \n" + "vld1.8 {d2}, [%1],%5 \n" // center * 2 + "vld1.8 {d3}, [%1],%6 \n" + "vsubl.u8 q1, d2, d3 \n" + "vadd.s16 q0, q0, q1 \n" + "vadd.s16 q0, q0, q1 \n" + "vld1.8 {d2}, [%2],%5 \n" // bottom + "vld1.8 {d3}, [%2],%6 \n" + "subs %4, %4, #8 \n" // 8 pixels + "vsubl.u8 q1, d2, d3 \n" + "vadd.s16 q0, q0, q1 \n" + "vabs.s16 q0, q0 \n" + "vqmovn.u16 d0, q0 \n" + "vst1.8 {d0}, [%3]! \n" // store 8 sobelx + "bgt 1b \n" + : "+r"(src_y0), // %0 + "+r"(src_y1), // %1 + "+r"(src_y2), // %2 + "+r"(dst_sobelx), // %3 + "+r"(width) // %4 + : "r"(2), // %5 + "r"(6) // %6 + : "cc", "memory", "q0", "q1" // Clobber List + ); } // SobelY as a matrix is // -1 -2 -1 // 0 0 0 // 1 2 1 -void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld1.8 {d0}, [%0],%4 \n" // left - MEMACCESS(1) - "vld1.8 {d1}, [%1],%4 \n" - "vsubl.u8 q0, d0, d1 \n" - MEMACCESS(0) - "vld1.8 {d2}, [%0],%4 \n" // center * 2 - MEMACCESS(1) - "vld1.8 {d3}, [%1],%4 \n" - "vsubl.u8 q1, d2, d3 \n" - "vadd.s16 q0, q0, q1 \n" - "vadd.s16 q0, q0, q1 \n" - MEMACCESS(0) - "vld1.8 {d2}, [%0],%5 \n" // right - MEMACCESS(1) - "vld1.8 {d3}, [%1],%5 \n" - "subs %3, %3, #8 \n" // 8 pixels - "vsubl.u8 q1, d2, d3 \n" - "vadd.s16 q0, q0, q1 \n" - "vabs.s16 q0, q0 \n" - "vqmovn.u16 d0, q0 \n" - MEMACCESS(2) - "vst1.8 {d0}, [%2]! \n" // store 8 sobely - "bgt 1b \n" - : "+r"(src_y0), // %0 - "+r"(src_y1), // %1 - "+r"(dst_sobely), // %2 - "+r"(width) // %3 - : "r"(1), // %4 - "r"(6) // %5 - : "cc", "memory", "q0", "q1" // Clobber List - ); +void SobelYRow_NEON(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int width) { + asm volatile( + "1: \n" + "vld1.8 {d0}, [%0],%4 \n" // left + "vld1.8 {d1}, [%1],%4 \n" + "vsubl.u8 q0, d0, d1 \n" + "vld1.8 {d2}, [%0],%4 \n" // center * 2 + "vld1.8 {d3}, [%1],%4 \n" + "vsubl.u8 q1, d2, d3 \n" + "vadd.s16 q0, q0, q1 \n" + "vadd.s16 q0, q0, q1 \n" + "vld1.8 {d2}, [%0],%5 \n" // right + "vld1.8 {d3}, [%1],%5 \n" + "subs %3, %3, #8 \n" // 8 pixels + "vsubl.u8 q1, d2, d3 \n" + "vadd.s16 q0, q0, q1 \n" + "vabs.s16 q0, q0 \n" + "vqmovn.u16 d0, q0 \n" + "vst1.8 {d0}, [%2]! \n" // store 8 sobely + "bgt 1b \n" + : "+r"(src_y0), // %0 + "+r"(src_y1), // %1 + "+r"(dst_sobely), // %2 + "+r"(width) // %3 + : "r"(1), // %4 + "r"(6) // %5 + : "cc", "memory", "q0", "q1" // Clobber List + ); } -#endif // defined(__ARM_NEON__) && !defined(__aarch64__) + +// %y passes a float as a scalar vector for vector * scalar multiply. +// the regoster must be d0 to d15 and indexed with [0] or [1] to access +// the float in the first or second float of the d-reg + +void HalfFloat1Row_NEON(const uint16_t* src, + uint16_t* dst, + float /*unused*/, + int width) { + asm volatile( + + "1: \n" + "vld1.8 {q1}, [%0]! \n" // load 8 shorts + "subs %2, %2, #8 \n" // 8 pixels per loop + "vmovl.u16 q2, d2 \n" // 8 int's + "vmovl.u16 q3, d3 \n" + "vcvt.f32.u32 q2, q2 \n" // 8 floats + "vcvt.f32.u32 q3, q3 \n" + "vmul.f32 q2, q2, %y3 \n" // adjust exponent + "vmul.f32 q3, q3, %y3 \n" + "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat + "vqshrn.u32 d3, q3, #13 \n" + "vst1.8 {q1}, [%1]! \n" + "bgt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "w"(1.9259299444e-34f) // %3 + : "cc", "memory", "q1", "q2", "q3"); +} + +void HalfFloatRow_NEON(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { + asm volatile( + + "1: \n" + "vld1.8 {q1}, [%0]! \n" // load 8 shorts + "subs %2, %2, #8 \n" // 8 pixels per loop + "vmovl.u16 q2, d2 \n" // 8 int's + "vmovl.u16 q3, d3 \n" + "vcvt.f32.u32 q2, q2 \n" // 8 floats + "vcvt.f32.u32 q3, q3 \n" + "vmul.f32 q2, q2, %y3 \n" // adjust exponent + "vmul.f32 q3, q3, %y3 \n" + "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat + "vqshrn.u32 d3, q3, #13 \n" + "vst1.8 {q1}, [%1]! \n" + "bgt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "w"(scale * 1.9259299444e-34f) // %3 + : "cc", "memory", "q1", "q2", "q3"); +} + +void ByteToFloatRow_NEON(const uint8_t* src, + float* dst, + float scale, + int width) { + asm volatile( + + "1: \n" + "vld1.8 {d2}, [%0]! \n" // load 8 bytes + "subs %2, %2, #8 \n" // 8 pixels per loop + "vmovl.u8 q1, d2 \n" // 8 shorts + "vmovl.u16 q2, d2 \n" // 8 ints + "vmovl.u16 q3, d3 \n" + "vcvt.f32.u32 q2, q2 \n" // 8 floats + "vcvt.f32.u32 q3, q3 \n" + "vmul.f32 q2, q2, %y3 \n" // scale + "vmul.f32 q3, q3, %y3 \n" + "vst1.8 {q2, q3}, [%1]! \n" // store 8 floats + "bgt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "w"(scale) // %3 + : "cc", "memory", "q1", "q2", "q3"); +} + +#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__).. #ifdef __cplusplus } // extern "C" diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_neon64.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_neon64.cc index 6375d4f55f62..24b4520babce 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/row_neon64.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/row_neon64.cc @@ -19,118 +19,103 @@ extern "C" { #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) // Read 8 Y, 4 U and 4 V from 422 -#define READYUV422 \ - MEMACCESS(0) \ - "ld1 {v0.8b}, [%0], #8 \n" \ - MEMACCESS(1) \ - "ld1 {v1.s}[0], [%1], #4 \n" \ - MEMACCESS(2) \ - "ld1 {v1.s}[1], [%2], #4 \n" - -// Read 8 Y, 2 U and 2 V from 422 -#define READYUV411 \ - MEMACCESS(0) \ - "ld1 {v0.8b}, [%0], #8 \n" \ - MEMACCESS(1) \ - "ld1 {v2.h}[0], [%1], #2 \n" \ - MEMACCESS(2) \ - "ld1 {v2.h}[1], [%2], #2 \n" \ - "zip1 v1.8b, v2.8b, v2.8b \n" +#define READYUV422 \ + "ld1 {v0.8b}, [%0], #8 \n" \ + "ld1 {v1.s}[0], [%1], #4 \n" \ + "ld1 {v1.s}[1], [%2], #4 \n" // Read 8 Y, 8 U and 8 V from 444 -#define READYUV444 \ - MEMACCESS(0) \ - "ld1 {v0.8b}, [%0], #8 \n" \ - MEMACCESS(1) \ - "ld1 {v1.d}[0], [%1], #8 \n" \ - MEMACCESS(2) \ - "ld1 {v1.d}[1], [%2], #8 \n" \ - "uaddlp v1.8h, v1.16b \n" \ - "rshrn v1.8b, v1.8h, #1 \n" +#define READYUV444 \ + "ld1 {v0.8b}, [%0], #8 \n" \ + "ld1 {v1.d}[0], [%1], #8 \n" \ + "ld1 {v1.d}[1], [%2], #8 \n" \ + "uaddlp v1.8h, v1.16b \n" \ + "rshrn v1.8b, v1.8h, #1 \n" // Read 8 Y, and set 4 U and 4 V to 128 -#define READYUV400 \ - MEMACCESS(0) \ - "ld1 {v0.8b}, [%0], #8 \n" \ - "movi v1.8b , #128 \n" +#define READYUV400 \ + "ld1 {v0.8b}, [%0], #8 \n" \ + "movi v1.8b , #128 \n" // Read 8 Y and 4 UV from NV12 -#define READNV12 \ - MEMACCESS(0) \ - "ld1 {v0.8b}, [%0], #8 \n" \ - MEMACCESS(1) \ - "ld1 {v2.8b}, [%1], #8 \n" \ - "uzp1 v1.8b, v2.8b, v2.8b \n" \ - "uzp2 v3.8b, v2.8b, v2.8b \n" \ - "ins v1.s[1], v3.s[0] \n" +#define READNV12 \ + "ld1 {v0.8b}, [%0], #8 \n" \ + "ld1 {v2.8b}, [%1], #8 \n" \ + "uzp1 v1.8b, v2.8b, v2.8b \n" \ + "uzp2 v3.8b, v2.8b, v2.8b \n" \ + "ins v1.s[1], v3.s[0] \n" // Read 8 Y and 4 VU from NV21 -#define READNV21 \ - MEMACCESS(0) \ - "ld1 {v0.8b}, [%0], #8 \n" \ - MEMACCESS(1) \ - "ld1 {v2.8b}, [%1], #8 \n" \ - "uzp1 v3.8b, v2.8b, v2.8b \n" \ - "uzp2 v1.8b, v2.8b, v2.8b \n" \ - "ins v1.s[1], v3.s[0] \n" +#define READNV21 \ + "ld1 {v0.8b}, [%0], #8 \n" \ + "ld1 {v2.8b}, [%1], #8 \n" \ + "uzp1 v3.8b, v2.8b, v2.8b \n" \ + "uzp2 v1.8b, v2.8b, v2.8b \n" \ + "ins v1.s[1], v3.s[0] \n" // Read 8 YUY2 -#define READYUY2 \ - MEMACCESS(0) \ - "ld2 {v0.8b, v1.8b}, [%0], #16 \n" \ - "uzp2 v3.8b, v1.8b, v1.8b \n" \ - "uzp1 v1.8b, v1.8b, v1.8b \n" \ - "ins v1.s[1], v3.s[0] \n" +#define READYUY2 \ + "ld2 {v0.8b, v1.8b}, [%0], #16 \n" \ + "uzp2 v3.8b, v1.8b, v1.8b \n" \ + "uzp1 v1.8b, v1.8b, v1.8b \n" \ + "ins v1.s[1], v3.s[0] \n" // Read 8 UYVY -#define READUYVY \ - MEMACCESS(0) \ - "ld2 {v2.8b, v3.8b}, [%0], #16 \n" \ - "orr v0.8b, v3.8b, v3.8b \n" \ - "uzp1 v1.8b, v2.8b, v2.8b \n" \ - "uzp2 v3.8b, v2.8b, v2.8b \n" \ - "ins v1.s[1], v3.s[0] \n" +#define READUYVY \ + "ld2 {v2.8b, v3.8b}, [%0], #16 \n" \ + "orr v0.8b, v3.8b, v3.8b \n" \ + "uzp1 v1.8b, v2.8b, v2.8b \n" \ + "uzp2 v3.8b, v2.8b, v2.8b \n" \ + "ins v1.s[1], v3.s[0] \n" -#define YUVTORGB_SETUP \ - "ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \ - "ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \ - "ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \ - "ld1r {v31.4s}, [%[kYToRgb]] \n" \ - "ld2 {v27.8h, v28.8h}, [%[kUVToRB]] \n" \ - "ld2 {v29.8h, v30.8h}, [%[kUVToG]] \n" +#define YUVTORGB_SETUP \ + "ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \ + "ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \ + "ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \ + "ld1r {v31.4s}, [%[kYToRgb]] \n" \ + "ld2 {v27.8h, v28.8h}, [%[kUVToRB]] \n" \ + "ld2 {v29.8h, v30.8h}, [%[kUVToG]] \n" -#define YUVTORGB(vR, vG, vB) \ - "uxtl v0.8h, v0.8b \n" /* Extract Y */ \ - "shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \ - "ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \ - "ushll v0.4s, v0.4h, #0 \n" \ - "mul v3.4s, v3.4s, v31.4s \n" \ - "mul v0.4s, v0.4s, v31.4s \n" \ - "sqshrun v0.4h, v0.4s, #16 \n" \ - "sqshrun2 v0.8h, v3.4s, #16 \n" /* Y */ \ - "uaddw v1.8h, v2.8h, v1.8b \n" /* Replicate UV */ \ - "mov v2.d[0], v1.d[1] \n" /* Extract V */ \ - "uxtl v2.8h, v2.8b \n" \ - "uxtl v1.8h, v1.8b \n" /* Extract U */ \ - "mul v3.8h, v1.8h, v27.8h \n" \ - "mul v5.8h, v1.8h, v29.8h \n" \ - "mul v6.8h, v2.8h, v30.8h \n" \ - "mul v7.8h, v2.8h, v28.8h \n" \ - "sqadd v6.8h, v6.8h, v5.8h \n" \ - "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \ - "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \ - "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \ - "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \ - "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \ - "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \ - "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \ - "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \ - "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \ +#define YUVTORGB(vR, vG, vB) \ + "uxtl v0.8h, v0.8b \n" /* Extract Y */ \ + "shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \ + "ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \ + "ushll v0.4s, v0.4h, #0 \n" \ + "mul v3.4s, v3.4s, v31.4s \n" \ + "mul v0.4s, v0.4s, v31.4s \n" \ + "sqshrun v0.4h, v0.4s, #16 \n" \ + "sqshrun2 v0.8h, v3.4s, #16 \n" /* Y */ \ + "uaddw v1.8h, v2.8h, v1.8b \n" /* Replicate UV */ \ + "mov v2.d[0], v1.d[1] \n" /* Extract V */ \ + "uxtl v2.8h, v2.8b \n" \ + "uxtl v1.8h, v1.8b \n" /* Extract U */ \ + "mul v3.8h, v1.8h, v27.8h \n" \ + "mul v5.8h, v1.8h, v29.8h \n" \ + "mul v6.8h, v2.8h, v30.8h \n" \ + "mul v7.8h, v2.8h, v28.8h \n" \ + "sqadd v6.8h, v6.8h, v5.8h \n" \ + "sqadd " #vB \ + ".8h, v24.8h, v0.8h \n" /* B */ \ + "sqadd " #vG \ + ".8h, v25.8h, v0.8h \n" /* G */ \ + "sqadd " #vR \ + ".8h, v26.8h, v0.8h \n" /* R */ \ + "sqadd " #vB ".8h, " #vB \ + ".8h, v3.8h \n" /* B */ \ + "sqsub " #vG ".8h, " #vG \ + ".8h, v6.8h \n" /* G */ \ + "sqadd " #vR ".8h, " #vR \ + ".8h, v7.8h \n" /* R */ \ + "sqshrun " #vB ".8b, " #vB \ + ".8h, #6 \n" /* B */ \ + "sqshrun " #vG ".8b, " #vG \ + ".8h, #6 \n" /* G */ \ + "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ -void I444ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -140,7 +125,6 @@ void I444ToARGBRow_NEON(const uint8* src_y, READYUV444 YUVTORGB(v22, v21, v20) "subs %w4, %w4, #8 \n" - MEMACCESS(3) "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 @@ -157,10 +141,10 @@ void I444ToARGBRow_NEON(const uint8* src_y, ); } -void I422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -170,7 +154,6 @@ void I422ToARGBRow_NEON(const uint8* src_y, READYUV422 YUVTORGB(v22, v21, v20) "subs %w4, %w4, #8 \n" - MEMACCESS(3) "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 @@ -187,11 +170,11 @@ void I422ToARGBRow_NEON(const uint8* src_y, ); } -void I422AlphaToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - const uint8* src_a, - uint8* dst_argb, +void I422AlphaToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -199,10 +182,8 @@ void I422AlphaToARGBRow_NEON(const uint8* src_y, "1: \n" READYUV422 YUVTORGB(v22, v21, v20) - MEMACCESS(3) "ld1 {v23.8b}, [%3], #8 \n" "subs %w5, %w5, #8 \n" - MEMACCESS(4) "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%4], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 @@ -220,40 +201,10 @@ void I422AlphaToARGBRow_NEON(const uint8* src_y, ); } -void I411ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - asm volatile ( - YUVTORGB_SETUP - "movi v23.8b, #255 \n" /* A */ - "1: \n" - READYUV411 - YUVTORGB(v22, v21, v20) - "subs %w4, %w4, #8 \n" - MEMACCESS(3) - "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" - "b.gt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_argb), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", - "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" - ); -} - -void I422ToRGBARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGBARow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -263,7 +214,6 @@ void I422ToRGBARow_NEON(const uint8* src_y, READYUV422 YUVTORGB(v23, v22, v21) "subs %w4, %w4, #8 \n" - MEMACCESS(3) "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 @@ -280,10 +230,10 @@ void I422ToRGBARow_NEON(const uint8* src_y, ); } -void I422ToRGB24Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -292,7 +242,6 @@ void I422ToRGB24Row_NEON(const uint8* src_y, READYUV422 YUVTORGB(v22, v21, v20) "subs %w4, %w4, #8 \n" - MEMACCESS(3) "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" "b.gt 1b \n" : "+r"(src_y), // %0 @@ -309,97 +258,91 @@ void I422ToRGB24Row_NEON(const uint8* src_y, ); } -#define ARGBTORGB565 \ - "shll v0.8h, v22.8b, #8 \n" /* R */ \ - "shll v21.8h, v21.8b, #8 \n" /* G */ \ - "shll v20.8h, v20.8b, #8 \n" /* B */ \ - "sri v0.8h, v21.8h, #5 \n" /* RG */ \ - "sri v0.8h, v20.8h, #11 \n" /* RGB */ +#define ARGBTORGB565 \ + "shll v0.8h, v22.8b, #8 \n" /* R */ \ + "shll v21.8h, v21.8b, #8 \n" /* G */ \ + "shll v20.8h, v20.8b, #8 \n" /* B */ \ + "sri v0.8h, v21.8h, #5 \n" /* RG */ \ + "sri v0.8h, v20.8h, #11 \n" /* RGB */ -void I422ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "1: \n" - READYUV422 - YUVTORGB(v22, v21, v20) - "subs %w4, %w4, #8 \n" - ARGBTORGB565 - MEMACCESS(3) - "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. - "b.gt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_rgb565), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", - "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" - ); + asm volatile( + YUVTORGB_SETUP + "1: \n" READYUV422 YUVTORGB( + v22, v21, + v20) "subs %w4, %w4, #8 \n" ARGBTORGB565 + "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels + // RGB565. + "b.gt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_rgb565), // %3 + "+r"(width) // %4 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", + "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"); } -#define ARGBTOARGB1555 \ - "shll v0.8h, v23.8b, #8 \n" /* A */ \ - "shll v22.8h, v22.8b, #8 \n" /* R */ \ - "shll v21.8h, v21.8b, #8 \n" /* G */ \ - "shll v20.8h, v20.8b, #8 \n" /* B */ \ - "sri v0.8h, v22.8h, #1 \n" /* AR */ \ - "sri v0.8h, v21.8h, #6 \n" /* ARG */ \ - "sri v0.8h, v20.8h, #11 \n" /* ARGB */ +#define ARGBTOARGB1555 \ + "shll v0.8h, v23.8b, #8 \n" /* A */ \ + "shll v22.8h, v22.8b, #8 \n" /* R */ \ + "shll v21.8h, v21.8b, #8 \n" /* G */ \ + "shll v20.8h, v20.8b, #8 \n" /* B */ \ + "sri v0.8h, v22.8h, #1 \n" /* AR */ \ + "sri v0.8h, v21.8h, #6 \n" /* ARG */ \ + "sri v0.8h, v20.8h, #11 \n" /* ARGB */ -void I422ToARGB1555Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { - asm volatile ( - YUVTORGB_SETUP - "movi v23.8b, #255 \n" - "1: \n" - READYUV422 - YUVTORGB(v22, v21, v20) - "subs %w4, %w4, #8 \n" - ARGBTOARGB1555 - MEMACCESS(3) - "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. - "b.gt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_argb1555), // %3 - "+r"(width) // %4 - : [kUVToRB]"r"(&yuvconstants->kUVToRB), - [kUVToG]"r"(&yuvconstants->kUVToG), - [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), - [kYToRgb]"r"(&yuvconstants->kYToRgb) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", - "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" - ); + asm volatile( + YUVTORGB_SETUP + "movi v23.8b, #255 \n" + "1: \n" READYUV422 YUVTORGB( + v22, v21, + v20) "subs %w4, %w4, #8 \n" ARGBTOARGB1555 + "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels + // RGB565. + "b.gt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_argb1555), // %3 + "+r"(width) // %4 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", + "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"); } -#define ARGBTOARGB4444 \ - /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \ - "ushr v20.8b, v20.8b, #4 \n" /* B */ \ - "bic v21.8b, v21.8b, v4.8b \n" /* G */ \ - "ushr v22.8b, v22.8b, #4 \n" /* R */ \ - "bic v23.8b, v23.8b, v4.8b \n" /* A */ \ - "orr v0.8b, v20.8b, v21.8b \n" /* BG */ \ - "orr v1.8b, v22.8b, v23.8b \n" /* RA */ \ - "zip1 v0.16b, v0.16b, v1.16b \n" /* BGRA */ +#define ARGBTOARGB4444 \ + /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \ + "ushr v20.8b, v20.8b, #4 \n" /* B */ \ + "bic v21.8b, v21.8b, v4.8b \n" /* G */ \ + "ushr v22.8b, v22.8b, #4 \n" /* R */ \ + "bic v23.8b, v23.8b, v4.8b \n" /* A */ \ + "orr v0.8b, v20.8b, v21.8b \n" /* BG */ \ + "orr v1.8b, v22.8b, v23.8b \n" /* RA */ \ + "zip1 v0.16b, v0.16b, v1.16b \n" /* BGRA */ -void I422ToARGB4444Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -411,7 +354,6 @@ void I422ToARGB4444Row_NEON(const uint8* src_y, "subs %w4, %w4, #8 \n" "movi v23.8b, #255 \n" ARGBTOARGB4444 - MEMACCESS(3) "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444. "b.gt 1b \n" : "+r"(src_y), // %0 @@ -428,9 +370,7 @@ void I422ToARGB4444Row_NEON(const uint8* src_y, ); } -void I400ToARGBRow_NEON(const uint8* src_y, - uint8* dst_argb, - int width) { +void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) { asm volatile ( YUVTORGB_SETUP "movi v23.8b, #255 \n" @@ -438,7 +378,6 @@ void I400ToARGBRow_NEON(const uint8* src_y, READYUV400 YUVTORGB(v22, v21, v20) "subs %w2, %w2, #8 \n" - MEMACCESS(1) "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 @@ -453,31 +392,26 @@ void I400ToARGBRow_NEON(const uint8* src_y, ); } -void J400ToARGBRow_NEON(const uint8* src_y, - uint8* dst_argb, - int width) { - asm volatile ( - "movi v23.8b, #255 \n" - "1: \n" - MEMACCESS(0) - "ld1 {v20.8b}, [%0], #8 \n" - "orr v21.8b, v20.8b, v20.8b \n" - "orr v22.8b, v20.8b, v20.8b \n" - "subs %w2, %w2, #8 \n" - MEMACCESS(1) - "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" - "b.gt 1b \n" - : "+r"(src_y), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v20", "v21", "v22", "v23" - ); +void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) { + asm volatile( + "movi v23.8b, #255 \n" + "1: \n" + "ld1 {v20.8b}, [%0], #8 \n" + "orr v21.8b, v20.8b, v20.8b \n" + "orr v22.8b, v20.8b, v20.8b \n" + "subs %w2, %w2, #8 \n" + "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" + "b.gt 1b \n" + : "+r"(src_y), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v20", "v21", "v22", "v23"); } -void NV12ToARGBRow_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -487,7 +421,6 @@ void NV12ToARGBRow_NEON(const uint8* src_y, READNV12 YUVTORGB(v22, v21, v20) "subs %w3, %w3, #8 \n" - MEMACCESS(2) "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 @@ -503,9 +436,9 @@ void NV12ToARGBRow_NEON(const uint8* src_y, ); } -void NV21ToARGBRow_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV21ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -515,7 +448,6 @@ void NV21ToARGBRow_NEON(const uint8* src_y, READNV21 YUVTORGB(v22, v21, v20) "subs %w3, %w3, #8 \n" - MEMACCESS(2) "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 @@ -531,24 +463,22 @@ void NV21ToARGBRow_NEON(const uint8* src_y, ); } -void NV12ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, - const struct YuvConstants* yuvconstants, - int width) { +void NV12ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { asm volatile ( YUVTORGB_SETUP "1: \n" READNV12 YUVTORGB(v22, v21, v20) "subs %w3, %w3, #8 \n" - ARGBTORGB565 - MEMACCESS(2) - "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565. + "st3 {v20.8b,v21.8b,v22.8b}, [%2], #24 \n" "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_uv), // %1 - "+r"(dst_rgb565), // %2 + "+r"(dst_rgb24), // %2 "+r"(width) // %3 : [kUVToRB]"r"(&yuvconstants->kUVToRB), [kUVToG]"r"(&yuvconstants->kUVToG), @@ -559,8 +489,59 @@ void NV12ToRGB565Row_NEON(const uint8* src_y, ); } -void YUY2ToARGBRow_NEON(const uint8* src_yuy2, - uint8* dst_argb, +void NV21ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile ( + YUVTORGB_SETUP + "1: \n" + READNV21 + YUVTORGB(v22, v21, v20) + "subs %w3, %w3, #8 \n" + "st3 {v20.8b,v21.8b,v22.8b}, [%2], #24 \n" + "b.gt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_vu), // %1 + "+r"(dst_rgb24), // %2 + "+r"(width) // %3 + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", + "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" + ); +} + +void NV12ToRGB565Row_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width) { + asm volatile( + YUVTORGB_SETUP + "1: \n" READNV12 YUVTORGB( + v22, v21, + v20) "subs %w3, %w3, #8 \n" ARGBTORGB565 + "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels + // RGB565. + "b.gt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_uv), // %1 + "+r"(dst_rgb565), // %2 + "+r"(width) // %3 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", + "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"); +} + +void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -570,7 +551,6 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, READYUY2 YUVTORGB(v22, v21, v20) "subs %w2, %w2, #8 \n" - MEMACCESS(1) "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" "b.gt 1b \n" : "+r"(src_yuy2), // %0 @@ -585,8 +565,8 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, ); } -void UYVYToARGBRow_NEON(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_NEON(const uint8_t* src_uyvy, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( @@ -596,7 +576,6 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, READUYVY YUVTORGB(v22, v21, v20) "subs %w2, %w2, #8 \n" - MEMACCESS(1) "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" "b.gt 1b \n" : "+r"(src_uyvy), // %0 @@ -612,869 +591,819 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, } // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. -void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, +void SplitUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pairs of UV - "subs %w3, %w3, #16 \n" // 16 processed per loop - MEMACCESS(1) - "st1 {v0.16b}, [%1], #16 \n" // store U - MEMACCESS(2) - "st1 {v1.16b}, [%2], #16 \n" // store V - "b.gt 1b \n" - : "+r"(src_uv), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 // Output registers - : // Input registers - : "cc", "memory", "v0", "v1" // Clobber List - ); + asm volatile( + "1: \n" + "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pairs of UV + "subs %w3, %w3, #16 \n" // 16 processed per loop + "st1 {v0.16b}, [%1], #16 \n" // store U + "st1 {v1.16b}, [%2], #16 \n" // store V + "b.gt 1b \n" + : "+r"(src_uv), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 // Output registers + : // Input registers + : "cc", "memory", "v0", "v1" // Clobber List + ); } // Reads 16 U's and V's and writes out 16 pairs of UV. -void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, +void MergeUVRow_NEON(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // load U - MEMACCESS(1) - "ld1 {v1.16b}, [%1], #16 \n" // load V - "subs %w3, %w3, #16 \n" // 16 processed per loop - MEMACCESS(2) - "st2 {v0.16b,v1.16b}, [%2], #32 \n" // store 16 pairs of UV - "b.gt 1b \n" - : - "+r"(src_u), // %0 - "+r"(src_v), // %1 - "+r"(dst_uv), // %2 - "+r"(width) // %3 // Output registers - : // Input registers - : "cc", "memory", "v0", "v1" // Clobber List - ); + asm volatile( + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" // load U + "ld1 {v1.16b}, [%1], #16 \n" // load V + "subs %w3, %w3, #16 \n" // 16 processed per loop + "st2 {v0.16b,v1.16b}, [%2], #32 \n" // store 16 pairs of UV + "b.gt 1b \n" + : "+r"(src_u), // %0 + "+r"(src_v), // %1 + "+r"(dst_uv), // %2 + "+r"(width) // %3 // Output registers + : // Input registers + : "cc", "memory", "v0", "v1" // Clobber List + ); } -// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15. -void CopyRow_NEON(const uint8* src, uint8* dst, int count) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld1 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 32 - "subs %w2, %w2, #32 \n" // 32 processed per loop - MEMACCESS(1) - "st1 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 32 - "b.gt 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(count) // %2 // Output registers - : // Input registers - : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List - ); -} - -// SetRow writes 'count' bytes using an 8 bit value repeated. -void SetRow_NEON(uint8* dst, uint8 v8, int count) { - asm volatile ( - "dup v0.16b, %w2 \n" // duplicate 16 bytes - "1: \n" - "subs %w1, %w1, #16 \n" // 16 bytes per loop - MEMACCESS(0) - "st1 {v0.16b}, [%0], #16 \n" // store - "b.gt 1b \n" - : "+r"(dst), // %0 - "+r"(count) // %1 - : "r"(v8) // %2 - : "cc", "memory", "v0" - ); -} - -void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) { - asm volatile ( - "dup v0.4s, %w2 \n" // duplicate 4 ints - "1: \n" - "subs %w1, %w1, #4 \n" // 4 ints per loop - MEMACCESS(0) - "st1 {v0.16b}, [%0], #16 \n" // store - "b.gt 1b \n" - : "+r"(dst), // %0 - "+r"(count) // %1 - : "r"(v32) // %2 - : "cc", "memory", "v0" - ); -} - -void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { - asm volatile ( - // Start at end of source row. - "add %0, %0, %w2, sxtw \n" - "sub %0, %0, #16 \n" - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], %3 \n" // src -= 16 - "subs %w2, %w2, #16 \n" // 16 pixels per loop. - "rev64 v0.16b, v0.16b \n" - MEMACCESS(1) - "st1 {v0.D}[1], [%1], #8 \n" // dst += 16 - MEMACCESS(1) - "st1 {v0.D}[0], [%1], #8 \n" - "b.gt 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "r"((ptrdiff_t)-16) // %3 - : "cc", "memory", "v0" - ); -} - -void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, +// Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b. +void SplitRGBRow_NEON(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, int width) { - asm volatile ( - // Start at end of source row. - "add %0, %0, %w3, sxtw #1 \n" - "sub %0, %0, #16 \n" - "1: \n" - MEMACCESS(0) - "ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16 - "subs %w3, %w3, #8 \n" // 8 pixels per loop. - "rev64 v0.8b, v0.8b \n" - "rev64 v1.8b, v1.8b \n" - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // dst += 8 - MEMACCESS(2) - "st1 {v1.8b}, [%2], #8 \n" - "b.gt 1b \n" - : "+r"(src_uv), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : "r"((ptrdiff_t)-16) // %4 - : "cc", "memory", "v0", "v1" - ); + asm volatile( + "1: \n" + "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 RGB + "subs %w4, %w4, #16 \n" // 16 processed per loop + "st1 {v0.16b}, [%1], #16 \n" // store R + "st1 {v1.16b}, [%2], #16 \n" // store G + "st1 {v2.16b}, [%3], #16 \n" // store B + "b.gt 1b \n" + : "+r"(src_rgb), // %0 + "+r"(dst_r), // %1 + "+r"(dst_g), // %2 + "+r"(dst_b), // %3 + "+r"(width) // %4 + : // Input registers + : "cc", "memory", "v0", "v1", "v2" // Clobber List + ); } -void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { - asm volatile ( - // Start at end of source row. - "add %0, %0, %w2, sxtw #2 \n" - "sub %0, %0, #16 \n" - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], %3 \n" // src -= 16 - "subs %w2, %w2, #4 \n" // 4 pixels per loop. - "rev64 v0.4s, v0.4s \n" - MEMACCESS(1) - "st1 {v0.D}[1], [%1], #8 \n" // dst += 16 - MEMACCESS(1) - "st1 {v0.D}[0], [%1], #8 \n" - "b.gt 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "r"((ptrdiff_t)-16) // %3 - : "cc", "memory", "v0" - ); +// Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time +void MergeRGBRow_NEON(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, + int width) { + asm volatile( + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" // load R + "ld1 {v1.16b}, [%1], #16 \n" // load G + "ld1 {v2.16b}, [%2], #16 \n" // load B + "subs %w4, %w4, #16 \n" // 16 processed per loop + "st3 {v0.16b,v1.16b,v2.16b}, [%3], #48 \n" // store 16 RGB + "b.gt 1b \n" + : "+r"(src_r), // %0 + "+r"(src_g), // %1 + "+r"(src_b), // %2 + "+r"(dst_rgb), // %3 + "+r"(width) // %4 + : // Input registers + : "cc", "memory", "v0", "v1", "v2" // Clobber List + ); } -void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) { - asm volatile ( - "movi v4.8b, #255 \n" // Alpha - "1: \n" - MEMACCESS(0) - "ld3 {v1.8b,v2.8b,v3.8b}, [%0], #24 \n" // load 8 pixels of RGB24. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - MEMACCESS(1) - "st4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%1], #32 \n" // store 8 ARGB pixels - "b.gt 1b \n" - : "+r"(src_rgb24), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List - ); +// Copy multiple of 32. +void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "1: \n" + "ldp q0, q1, [%0], #32 \n" + "subs %w2, %w2, #32 \n" // 32 processed per loop + "stp q0, q1, [%1], #32 \n" + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 // Output registers + : // Input registers + : "cc", "memory", "v0", "v1" // Clobber List + ); } -void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) { - asm volatile ( - "movi v5.8b, #255 \n" // Alpha - "1: \n" - MEMACCESS(0) - "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "orr v3.8b, v1.8b, v1.8b \n" // move g - "orr v4.8b, v0.8b, v0.8b \n" // move r - MEMACCESS(1) - "st4 {v2.8b,v3.8b,v4.8b,v5.8b}, [%1], #32 \n" // store b g r a - "b.gt 1b \n" - : "+r"(src_raw), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List - ); +// SetRow writes 'width' bytes using an 8 bit value repeated. +void SetRow_NEON(uint8_t* dst, uint8_t v8, int width) { + asm volatile( + "dup v0.16b, %w2 \n" // duplicate 16 bytes + "1: \n" + "subs %w1, %w1, #16 \n" // 16 bytes per loop + "st1 {v0.16b}, [%0], #16 \n" // store + "b.gt 1b \n" + : "+r"(dst), // %0 + "+r"(width) // %1 + : "r"(v8) // %2 + : "cc", "memory", "v0"); } -void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "orr v3.8b, v1.8b, v1.8b \n" // move g - "orr v4.8b, v0.8b, v0.8b \n" // move r - MEMACCESS(1) - "st3 {v2.8b,v3.8b,v4.8b}, [%1], #24 \n" // store b g r - "b.gt 1b \n" - : "+r"(src_raw), // %0 - "+r"(dst_rgb24), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List - ); +void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) { + asm volatile( + "dup v0.4s, %w2 \n" // duplicate 4 ints + "1: \n" + "subs %w1, %w1, #4 \n" // 4 ints per loop + "st1 {v0.16b}, [%0], #16 \n" // store + "b.gt 1b \n" + : "+r"(dst), // %0 + "+r"(width) // %1 + : "r"(v32) // %2 + : "cc", "memory", "v0"); } -#define RGB565TOARGB \ - "shrn v6.8b, v0.8h, #5 \n" /* G xxGGGGGG */ \ - "shl v6.8b, v6.8b, #2 \n" /* G GGGGGG00 upper 6 */ \ - "ushr v4.8b, v6.8b, #6 \n" /* G 000000GG lower 2 */ \ - "orr v1.8b, v4.8b, v6.8b \n" /* G */ \ - "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \ - "ushr v0.8h, v0.8h, #11 \n" /* R 000RRRRR */ \ - "xtn2 v2.16b,v0.8h \n" /* R in upper part */ \ - "shl v2.16b, v2.16b, #3 \n" /* R,B BBBBB000 upper 5 */ \ - "ushr v0.16b, v2.16b, #5 \n" /* R,B 00000BBB lower 3 */ \ - "orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \ - "dup v2.2D, v0.D[1] \n" /* R */ - -void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) { - asm volatile ( - "movi v3.8b, #255 \n" // Alpha - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - RGB565TOARGB - MEMACCESS(1) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels - "b.gt 1b \n" - : "+r"(src_rgb565), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6" // Clobber List - ); +void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + // Start at end of source row. + "add %0, %0, %w2, sxtw \n" + "sub %0, %0, #16 \n" + "1: \n" + "ld1 {v0.16b}, [%0], %3 \n" // src -= 16 + "subs %w2, %w2, #16 \n" // 16 pixels per loop. + "rev64 v0.16b, v0.16b \n" + "st1 {v0.D}[1], [%1], #8 \n" // dst += 16 + "st1 {v0.D}[0], [%1], #8 \n" + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"((ptrdiff_t)-16) // %3 + : "cc", "memory", "v0"); } -#define ARGB1555TOARGB \ - "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \ - "shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \ - "xtn v3.8b, v2.8h \n" /* RRRRR000 AAAAAAAA */ \ - \ - "sshr v2.8h, v0.8h, #15 \n" /* A AAAAAAAA */ \ - "xtn2 v3.16b, v2.8h \n" \ - \ - "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \ - "shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \ - \ - "ushr v1.16b, v3.16b, #5 \n" /* R,A 00000RRR lower 3 */ \ - "shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \ - "ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \ - \ - "orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \ - "orr v2.16b, v1.16b, v3.16b \n" /* R,A */ \ - "dup v1.2D, v0.D[1] \n" \ - "dup v3.2D, v2.D[1] \n" +void MirrorUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + // Start at end of source row. + "add %0, %0, %w3, sxtw #1 \n" + "sub %0, %0, #16 \n" + "1: \n" + "ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16 + "subs %w3, %w3, #8 \n" // 8 pixels per loop. + "rev64 v0.8b, v0.8b \n" + "rev64 v1.8b, v1.8b \n" + "st1 {v0.8b}, [%1], #8 \n" // dst += 8 + "st1 {v1.8b}, [%2], #8 \n" + "b.gt 1b \n" + : "+r"(src_uv), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"((ptrdiff_t)-16) // %4 + : "cc", "memory", "v0", "v1"); +} + +void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + // Start at end of source row. + "add %0, %0, %w2, sxtw #2 \n" + "sub %0, %0, #16 \n" + "1: \n" + "ld1 {v0.16b}, [%0], %3 \n" // src -= 16 + "subs %w2, %w2, #4 \n" // 4 pixels per loop. + "rev64 v0.4s, v0.4s \n" + "st1 {v0.D}[1], [%1], #8 \n" // dst += 16 + "st1 {v0.D}[0], [%1], #8 \n" + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"((ptrdiff_t)-16) // %3 + : "cc", "memory", "v0"); +} + +void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width) { + asm volatile( + "movi v4.8b, #255 \n" // Alpha + "1: \n" + "ld3 {v1.8b,v2.8b,v3.8b}, [%0], #24 \n" // load 8 pixels of RGB24. + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "st4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%1], #32 \n" // store 8 ARGB + "b.gt 1b \n" + : "+r"(src_rgb24), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List + ); +} + +void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) { + asm volatile( + "movi v5.8b, #255 \n" // Alpha + "1: \n" + "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "orr v3.8b, v1.8b, v1.8b \n" // move g + "orr v4.8b, v0.8b, v0.8b \n" // move r + "st4 {v2.8b,v3.8b,v4.8b,v5.8b}, [%1], #32 \n" // store b g r a + "b.gt 1b \n" + : "+r"(src_raw), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List + ); +} + +void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { + asm volatile( + "1: \n" + "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "orr v3.8b, v1.8b, v1.8b \n" // move g + "orr v4.8b, v0.8b, v0.8b \n" // move r + "st3 {v2.8b,v3.8b,v4.8b}, [%1], #24 \n" // store b g r + "b.gt 1b \n" + : "+r"(src_raw), // %0 + "+r"(dst_rgb24), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List + ); +} + +#define RGB565TOARGB \ + "shrn v6.8b, v0.8h, #5 \n" /* G xxGGGGGG */ \ + "shl v6.8b, v6.8b, #2 \n" /* G GGGGGG00 upper 6 */ \ + "ushr v4.8b, v6.8b, #6 \n" /* G 000000GG lower 2 */ \ + "orr v1.8b, v4.8b, v6.8b \n" /* G */ \ + "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \ + "ushr v0.8h, v0.8h, #11 \n" /* R 000RRRRR */ \ + "xtn2 v2.16b,v0.8h \n" /* R in upper part */ \ + "shl v2.16b, v2.16b, #3 \n" /* R,B BBBBB000 upper 5 */ \ + "ushr v0.16b, v2.16b, #5 \n" /* R,B 00000BBB lower 3 */ \ + "orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \ + "dup v2.2D, v0.D[1] \n" /* R */ + +void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width) { + asm volatile( + "movi v3.8b, #255 \n" // Alpha + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels. + "subs %w2, %w2, #8 \n" // 8 processed per loop. + RGB565TOARGB + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB + "b.gt 1b \n" + : "+r"(src_rgb565), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6" // Clobber List + ); +} + +#define ARGB1555TOARGB \ + "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \ + "shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \ + "xtn v3.8b, v2.8h \n" /* RRRRR000 AAAAAAAA */ \ + \ + "sshr v2.8h, v0.8h, #15 \n" /* A AAAAAAAA */ \ + "xtn2 v3.16b, v2.8h \n" \ + \ + "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \ + "shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \ + \ + "ushr v1.16b, v3.16b, #5 \n" /* R,A 00000RRR lower 3 */ \ + "shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \ + "ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \ + \ + "orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \ + "orr v2.16b, v1.16b, v3.16b \n" /* R,A */ \ + "dup v1.2D, v0.D[1] \n" \ + "dup v3.2D, v2.D[1] \n" // RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha. -#define RGB555TOARGB \ - "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \ - "shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \ - "xtn v3.8b, v2.8h \n" /* RRRRR000 */ \ - \ - "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \ - "shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \ - \ - "ushr v1.16b, v3.16b, #5 \n" /* R 00000RRR lower 3 */ \ - "shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \ - "ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \ - \ - "orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \ - "orr v2.16b, v1.16b, v3.16b \n" /* R */ \ - "dup v1.2D, v0.D[1] \n" /* G */ \ +#define RGB555TOARGB \ + "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \ + "shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \ + "xtn v3.8b, v2.8h \n" /* RRRRR000 */ \ + \ + "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \ + "shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \ + \ + "ushr v1.16b, v3.16b, #5 \n" /* R 00000RRR lower 3 */ \ + "shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \ + "ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \ + \ + "orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \ + "orr v2.16b, v1.16b, v3.16b \n" /* R */ \ + "dup v1.2D, v0.D[1] \n" /* G */ -void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, +void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, + uint8_t* dst_argb, int width) { - asm volatile ( - "movi v3.8b, #255 \n" // Alpha - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - ARGB1555TOARGB - MEMACCESS(1) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels - "b.gt 1b \n" - : "+r"(src_argb1555), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List - ); + asm volatile( + "movi v3.8b, #255 \n" // Alpha + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels. + "subs %w2, %w2, #8 \n" // 8 processed per loop. + ARGB1555TOARGB + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB + // pixels + "b.gt 1b \n" + : "+r"(src_argb1555), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List + ); } -#define ARGB4444TOARGB \ - "shrn v1.8b, v0.8h, #8 \n" /* v1(l) AR */ \ - "xtn2 v1.16b, v0.8h \n" /* v1(h) GB */ \ - "shl v2.16b, v1.16b, #4 \n" /* B,R BBBB0000 */ \ - "ushr v3.16b, v1.16b, #4 \n" /* G,A 0000GGGG */ \ - "ushr v0.16b, v2.16b, #4 \n" /* B,R 0000BBBB */ \ - "shl v1.16b, v3.16b, #4 \n" /* G,A GGGG0000 */ \ - "orr v2.16b, v0.16b, v2.16b \n" /* B,R BBBBBBBB */ \ - "orr v3.16b, v1.16b, v3.16b \n" /* G,A GGGGGGGG */ \ - "dup v0.2D, v2.D[1] \n" \ - "dup v1.2D, v3.D[1] \n" +#define ARGB4444TOARGB \ + "shrn v1.8b, v0.8h, #8 \n" /* v1(l) AR */ \ + "xtn2 v1.16b, v0.8h \n" /* v1(h) GB */ \ + "shl v2.16b, v1.16b, #4 \n" /* B,R BBBB0000 */ \ + "ushr v3.16b, v1.16b, #4 \n" /* G,A 0000GGGG */ \ + "ushr v0.16b, v2.16b, #4 \n" /* B,R 0000BBBB */ \ + "shl v1.16b, v3.16b, #4 \n" /* G,A GGGG0000 */ \ + "orr v2.16b, v0.16b, v2.16b \n" /* B,R BBBBBBBB */ \ + "orr v3.16b, v1.16b, v3.16b \n" /* G,A GGGGGGGG */ \ + "dup v0.2D, v2.D[1] \n" \ + "dup v1.2D, v3.D[1] \n" -void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, +void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, + uint8_t* dst_argb, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - ARGB4444TOARGB - MEMACCESS(1) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels - "b.gt 1b \n" - : "+r"(src_argb4444), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List - ); + asm volatile( + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels. + "subs %w2, %w2, #8 \n" // 8 processed per loop. + ARGB4444TOARGB + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB + // pixels + "b.gt 1b \n" + : "+r"(src_argb4444), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List + ); } -void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB pixels - "subs %w2, %w2, #8 \n" // 8 processed per loop. - MEMACCESS(1) - "st3 {v1.8b,v2.8b,v3.8b}, [%1], #24 \n" // store 8 pixels of RGB24. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_rgb24), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List - ); -} - -void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load b g r a - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "orr v4.8b, v2.8b, v2.8b \n" // mov g - "orr v5.8b, v1.8b, v1.8b \n" // mov b - MEMACCESS(1) - "st3 {v3.8b,v4.8b,v5.8b}, [%1], #24 \n" // store r g b - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_raw), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List - ); -} - -void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of YUY2. - "subs %w2, %w2, #16 \n" // 16 processed per loop. - MEMACCESS(1) - "st1 {v0.16b}, [%1], #16 \n" // store 16 pixels of Y. - "b.gt 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1" // Clobber List - ); -} - -void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY. - "subs %w2, %w2, #16 \n" // 16 processed per loop. - MEMACCESS(1) - "st1 {v1.16b}, [%1], #16 \n" // store 16 pixels of Y. - "b.gt 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1" // Clobber List - ); -} - -void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, +void ARGBToRGB24Row_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb24, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 YUY2 pixels - "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs. - MEMACCESS(1) - "st1 {v1.8b}, [%1], #8 \n" // store 8 U. - MEMACCESS(2) - "st1 {v3.8b}, [%2], #8 \n" // store 8 V. - "b.gt 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List - ); + asm volatile( + "1: \n" + "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "st3 {v1.8b,v2.8b,v3.8b}, [%1], #24 \n" // store 8 pixels of + // RGB24. + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_rgb24), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List + ); } -void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, +void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) { + asm volatile( + "1: \n" + "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load b g r a + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "orr v4.8b, v2.8b, v2.8b \n" // mov g + "orr v5.8b, v1.8b, v1.8b \n" // mov b + "st3 {v3.8b,v4.8b,v5.8b}, [%1], #24 \n" // store r g b + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_raw), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List + ); +} + +void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { + asm volatile( + "1: \n" + "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of YUY2. + "subs %w2, %w2, #16 \n" // 16 processed per loop. + "st1 {v0.16b}, [%1], #16 \n" // store 16 pixels of Y. + "b.gt 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1" // Clobber List + ); +} + +void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { + asm volatile( + "1: \n" + "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY. + "subs %w2, %w2, #16 \n" // 16 processed per loop. + "st1 {v1.16b}, [%1], #16 \n" // store 16 pixels of Y. + "b.gt 1b \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1" // Clobber List + ); +} + +void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 UYVY pixels - "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs. - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 U. - MEMACCESS(2) - "st1 {v2.8b}, [%2], #8 \n" // store 8 V. - "b.gt 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List - ); + asm volatile( + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 YUY2 + "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs. + "st1 {v1.8b}, [%1], #8 \n" // store 8 U. + "st1 {v3.8b}, [%2], #8 \n" // store 8 V. + "b.gt 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List + ); } -void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* src_yuy2b = src_yuy2 + stride_yuy2; - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels - "subs %w4, %w4, #16 \n" // 16 pixels = 8 UVs. - MEMACCESS(1) - "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load next row - "urhadd v1.8b, v1.8b, v5.8b \n" // average rows of U - "urhadd v3.8b, v3.8b, v7.8b \n" // average rows of V - MEMACCESS(2) - "st1 {v1.8b}, [%2], #8 \n" // store 8 U. - MEMACCESS(3) - "st1 {v3.8b}, [%3], #8 \n" // store 8 V. - "b.gt 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(src_yuy2b), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", - "v5", "v6", "v7" // Clobber List - ); +void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + asm volatile( + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 UYVY + "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs. + "st1 {v0.8b}, [%1], #8 \n" // store 8 U. + "st1 {v2.8b}, [%2], #8 \n" // store 8 V. + "b.gt 1b \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List + ); } -void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* src_uyvyb = src_uyvy + stride_uyvy; - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels - "subs %w4, %w4, #16 \n" // 16 pixels = 8 UVs. - MEMACCESS(1) - "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load next row - "urhadd v0.8b, v0.8b, v4.8b \n" // average rows of U - "urhadd v2.8b, v2.8b, v6.8b \n" // average rows of V - MEMACCESS(2) - "st1 {v0.8b}, [%2], #8 \n" // store 8 U. - MEMACCESS(3) - "st1 {v2.8b}, [%3], #8 \n" // store 8 V. - "b.gt 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(src_uyvyb), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", - "v5", "v6", "v7" // Clobber List - ); +void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_yuy2b = src_yuy2 + stride_yuy2; + asm volatile( + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels + "subs %w4, %w4, #16 \n" // 16 pixels = 8 UVs. + "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load next row + "urhadd v1.8b, v1.8b, v5.8b \n" // average rows of U + "urhadd v3.8b, v3.8b, v7.8b \n" // average rows of V + "st1 {v1.8b}, [%2], #8 \n" // store 8 U. + "st1 {v3.8b}, [%3], #8 \n" // store 8 V. + "b.gt 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(src_yuy2b), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", + "v7" // Clobber List + ); +} + +void UYVYToUVRow_NEON(const uint8_t* src_uyvy, + int stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_uyvyb = src_uyvy + stride_uyvy; + asm volatile( + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels + "subs %w4, %w4, #16 \n" // 16 pixels = 8 UVs. + "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load next row + "urhadd v0.8b, v0.8b, v4.8b \n" // average rows of U + "urhadd v2.8b, v2.8b, v6.8b \n" // average rows of V + "st1 {v0.8b}, [%2], #8 \n" // store 8 U. + "st1 {v2.8b}, [%3], #8 \n" // store 8 V. + "b.gt 1b \n" + : "+r"(src_uyvy), // %0 + "+r"(src_uyvyb), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", + "v7" // Clobber List + ); } // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width) { - asm volatile ( - MEMACCESS(3) - "ld1 {v2.16b}, [%3] \n" // shuffler - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // load 4 pixels. - "subs %w2, %w2, #4 \n" // 4 processed per loop - "tbl v1.16b, {v0.16b}, v2.16b \n" // look up 4 pixels - MEMACCESS(1) - "st1 {v1.16b}, [%1], #16 \n" // store 4. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(shuffler) // %3 - : "cc", "memory", "v0", "v1", "v2" // Clobber List - ); +void ARGBShuffleRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width) { + asm volatile( + "ld1 {v2.16b}, [%3] \n" // shuffler + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" // load 4 pixels. + "subs %w2, %w2, #4 \n" // 4 processed per loop + "tbl v1.16b, {v0.16b}, v2.16b \n" // look up 4 pixels + "st1 {v1.16b}, [%1], #16 \n" // store 4. + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(shuffler) // %3 + : "cc", "memory", "v0", "v1", "v2" // Clobber List + ); } -void I422ToYUY2Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld2 {v0.8b, v1.8b}, [%0], #16 \n" // load 16 Ys - "orr v2.8b, v1.8b, v1.8b \n" - MEMACCESS(1) - "ld1 {v1.8b}, [%1], #8 \n" // load 8 Us - MEMACCESS(2) - "ld1 {v3.8b}, [%2], #8 \n" // load 8 Vs - "subs %w4, %w4, #16 \n" // 16 pixels - MEMACCESS(3) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%3], #32 \n" // Store 16 pixels. - "b.gt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_yuy2), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "v0", "v1", "v2", "v3" - ); +void I422ToYUY2Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width) { + asm volatile( + "1: \n" + "ld2 {v0.8b, v1.8b}, [%0], #16 \n" // load 16 Ys + "orr v2.8b, v1.8b, v1.8b \n" + "ld1 {v1.8b}, [%1], #8 \n" // load 8 Us + "ld1 {v3.8b}, [%2], #8 \n" // load 8 Vs + "subs %w4, %w4, #16 \n" // 16 pixels + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%3], #32 \n" // Store 16 pixels. + "b.gt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_yuy2), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "v0", "v1", "v2", "v3"); } -void I422ToUYVYRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld2 {v1.8b,v2.8b}, [%0], #16 \n" // load 16 Ys - "orr v3.8b, v2.8b, v2.8b \n" - MEMACCESS(1) - "ld1 {v0.8b}, [%1], #8 \n" // load 8 Us - MEMACCESS(2) - "ld1 {v2.8b}, [%2], #8 \n" // load 8 Vs - "subs %w4, %w4, #16 \n" // 16 pixels - MEMACCESS(3) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%3], #32 \n" // Store 16 pixels. - "b.gt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_uyvy), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "v0", "v1", "v2", "v3" - ); +void I422ToUYVYRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width) { + asm volatile( + "1: \n" + "ld2 {v1.8b,v2.8b}, [%0], #16 \n" // load 16 Ys + "orr v3.8b, v2.8b, v2.8b \n" + "ld1 {v0.8b}, [%1], #8 \n" // load 8 Us + "ld1 {v2.8b}, [%2], #8 \n" // load 8 Vs + "subs %w4, %w4, #16 \n" // 16 pixels + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%3], #32 \n" // Store 16 pixels. + "b.gt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_u), // %1 + "+r"(src_v), // %2 + "+r"(dst_uyvy), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "v0", "v1", "v2", "v3"); } -void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels - "subs %w2, %w2, #8 \n" // 8 processed per loop. - ARGBTORGB565 - MEMACCESS(1) - "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels RGB565. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_rgb565), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v20", "v21", "v22", "v23" - ); +void ARGBToRGB565Row_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb565, + int width) { + asm volatile( + "1: \n" + "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels + "subs %w2, %w2, #8 \n" // 8 processed per loop. + ARGBTORGB565 + "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels RGB565. + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_rgb565), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v20", "v21", "v22", "v23"); } -void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width) { - asm volatile ( - "dup v1.4s, %w2 \n" // dither4 - "1: \n" - MEMACCESS(1) - "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" // load 8 pixels - "subs %w3, %w3, #8 \n" // 8 processed per loop. - "uqadd v20.8b, v20.8b, v1.8b \n" - "uqadd v21.8b, v21.8b, v1.8b \n" - "uqadd v22.8b, v22.8b, v1.8b \n" - ARGBTORGB565 - MEMACCESS(0) - "st1 {v0.16b}, [%0], #16 \n" // store 8 pixels RGB565. - "b.gt 1b \n" - : "+r"(dst_rgb) // %0 - : "r"(src_argb), // %1 - "r"(dither4), // %2 - "r"(width) // %3 - : "cc", "memory", "v0", "v1", "v20", "v21", "v22", "v23" - ); +void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, + int width) { + asm volatile( + "dup v1.4s, %w2 \n" // dither4 + "1: \n" + "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" // load 8 pixels + "subs %w3, %w3, #8 \n" // 8 processed per loop. + "uqadd v20.8b, v20.8b, v1.8b \n" + "uqadd v21.8b, v21.8b, v1.8b \n" + "uqadd v22.8b, v22.8b, v1.8b \n" ARGBTORGB565 + "st1 {v0.16b}, [%0], #16 \n" // store 8 pixels RGB565. + "b.gt 1b \n" + : "+r"(dst_rgb) // %0 + : "r"(src_argb), // %1 + "r"(dither4), // %2 + "r"(width) // %3 + : "cc", "memory", "v0", "v1", "v20", "v21", "v22", "v23"); } -void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555, +void ARGBToARGB1555Row_NEON(const uint8_t* src_argb, + uint8_t* dst_argb1555, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels - "subs %w2, %w2, #8 \n" // 8 processed per loop. - ARGBTOARGB1555 - MEMACCESS(1) - "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels ARGB1555. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb1555), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v20", "v21", "v22", "v23" - ); + asm volatile( + "1: \n" + "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels + "subs %w2, %w2, #8 \n" // 8 processed per loop. + ARGBTOARGB1555 + "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels + // ARGB1555. + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb1555), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v20", "v21", "v22", "v23"); } -void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, +void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_argb4444, int width) { - asm volatile ( - "movi v4.16b, #0x0f \n" // bits to clear with vbic. - "1: \n" - MEMACCESS(0) - "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels - "subs %w2, %w2, #8 \n" // 8 processed per loop. - ARGBTOARGB4444 - MEMACCESS(1) - "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels ARGB4444. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb4444), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v4", "v20", "v21", "v22", "v23" - ); + asm volatile( + "movi v4.16b, #0x0f \n" // bits to clear with + // vbic. + "1: \n" + "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels + "subs %w2, %w2, #8 \n" // 8 processed per loop. + ARGBTOARGB4444 + "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels + // ARGB4444. + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb4444), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v4", "v20", "v21", "v22", "v23"); } -void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { - asm volatile ( - "movi v4.8b, #13 \n" // B * 0.1016 coefficient - "movi v5.8b, #65 \n" // G * 0.5078 coefficient - "movi v6.8b, #33 \n" // R * 0.2578 coefficient - "movi v7.8b, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "umull v3.8h, v0.8b, v4.8b \n" // B - "umlal v3.8h, v1.8b, v5.8b \n" // G - "umlal v3.8h, v2.8b, v6.8b \n" // R - "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y - "uqadd v0.8b, v0.8b, v7.8b \n" - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" - ); +void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { + asm volatile( + "movi v4.8b, #13 \n" // B * 0.1016 coefficient + "movi v5.8b, #65 \n" // G * 0.5078 coefficient + "movi v6.8b, #33 \n" // R * 0.2578 coefficient + "movi v7.8b, #16 \n" // Add 16 constant + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "umull v3.8h, v0.8b, v4.8b \n" // B + "umlal v3.8h, v1.8b, v5.8b \n" // G + "umlal v3.8h, v2.8b, v6.8b \n" // R + "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y + "uqadd v0.8b, v0.8b, v7.8b \n" + "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } -void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16 pixels - "subs %w2, %w2, #16 \n" // 16 processed per loop - MEMACCESS(1) - "st1 {v3.16b}, [%1], #16 \n" // store 16 A's. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_a), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List - ); +void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, + uint8_t* dst_a, + int width) { + asm volatile( + "1: \n" + "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16 + // pixels + "subs %w2, %w2, #16 \n" // 16 processed per loop + "st1 {v3.16b}, [%1], #16 \n" // store 16 A's. + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_a), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List + ); } -void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { - asm volatile ( - "movi v4.8b, #15 \n" // B * 0.11400 coefficient - "movi v5.8b, #75 \n" // G * 0.58700 coefficient - "movi v6.8b, #38 \n" // R * 0.29900 coefficient - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "umull v3.8h, v0.8b, v4.8b \n" // B - "umlal v3.8h, v1.8b, v5.8b \n" // G - "umlal v3.8h, v2.8b, v6.8b \n" // R - "sqrshrun v0.8b, v3.8h, #7 \n" // 15 bit to 8 bit Y - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6" - ); +void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { + asm volatile( + "movi v4.8b, #15 \n" // B * 0.11400 coefficient + "movi v5.8b, #75 \n" // G * 0.58700 coefficient + "movi v6.8b, #38 \n" // R * 0.29900 coefficient + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "umull v3.8h, v0.8b, v4.8b \n" // B + "umlal v3.8h, v1.8b, v5.8b \n" // G + "umlal v3.8h, v2.8b, v6.8b \n" // R + "sqrshrun v0.8b, v3.8h, #7 \n" // 15 bit to 8 bit Y + "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"); } // 8x1 pixels. -void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, +void ARGBToUV444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, int width) { - asm volatile ( - "movi v24.8b, #112 \n" // UB / VR 0.875 coefficient - "movi v25.8b, #74 \n" // UG -0.5781 coefficient - "movi v26.8b, #38 \n" // UR -0.2969 coefficient - "movi v27.8b, #18 \n" // VB -0.1406 coefficient - "movi v28.8b, #94 \n" // VG -0.7344 coefficient - "movi v29.16b,#0x80 \n" // 128.5 - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. - "subs %w3, %w3, #8 \n" // 8 processed per loop. - "umull v4.8h, v0.8b, v24.8b \n" // B - "umlsl v4.8h, v1.8b, v25.8b \n" // G - "umlsl v4.8h, v2.8b, v26.8b \n" // R - "add v4.8h, v4.8h, v29.8h \n" // +128 -> unsigned + asm volatile( + "movi v24.8b, #112 \n" // UB / VR 0.875 + // coefficient + "movi v25.8b, #74 \n" // UG -0.5781 coefficient + "movi v26.8b, #38 \n" // UR -0.2969 coefficient + "movi v27.8b, #18 \n" // VB -0.1406 coefficient + "movi v28.8b, #94 \n" // VG -0.7344 coefficient + "movi v29.16b,#0x80 \n" // 128.5 + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB + // pixels. + "subs %w3, %w3, #8 \n" // 8 processed per loop. + "umull v4.8h, v0.8b, v24.8b \n" // B + "umlsl v4.8h, v1.8b, v25.8b \n" // G + "umlsl v4.8h, v2.8b, v26.8b \n" // R + "add v4.8h, v4.8h, v29.8h \n" // +128 -> unsigned - "umull v3.8h, v2.8b, v24.8b \n" // R - "umlsl v3.8h, v1.8b, v28.8b \n" // G - "umlsl v3.8h, v0.8b, v27.8b \n" // B - "add v3.8h, v3.8h, v29.8h \n" // +128 -> unsigned + "umull v3.8h, v2.8b, v24.8b \n" // R + "umlsl v3.8h, v1.8b, v28.8b \n" // G + "umlsl v3.8h, v0.8b, v27.8b \n" // B + "add v3.8h, v3.8h, v29.8h \n" // +128 -> unsigned - "uqshrn v0.8b, v4.8h, #8 \n" // 16 bit to 8 bit U - "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V + "uqshrn v0.8b, v4.8h, #8 \n" // 16 bit to 8 bit U + "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U. - MEMACCESS(2) - "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", - "v24", "v25", "v26", "v27", "v28", "v29" - ); + "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U. + "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V. + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26", + "v27", "v28", "v29"); } -#define RGBTOUV_SETUP_REG \ - "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ - "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ - "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ - "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ - "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ - "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ - -// 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. -void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) { - asm volatile ( - RGBTOUV_SETUP_REG - "1: \n" - MEMACCESS(0) - "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. - "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. - "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. - "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. - MEMACCESS(0) - "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%0], #64 \n" // load next 16. - "uaddlp v4.8h, v4.16b \n" // B 16 bytes -> 8 shorts. - "uaddlp v5.8h, v5.16b \n" // G 16 bytes -> 8 shorts. - "uaddlp v6.8h, v6.16b \n" // R 16 bytes -> 8 shorts. - - "addp v0.8h, v0.8h, v4.8h \n" // B 16 shorts -> 8 shorts. - "addp v1.8h, v1.8h, v5.8h \n" // G 16 shorts -> 8 shorts. - "addp v2.8h, v2.8h, v6.8h \n" // R 16 shorts -> 8 shorts. - - "urshr v0.8h, v0.8h, #1 \n" // 2x average - "urshr v1.8h, v1.8h, #1 \n" - "urshr v2.8h, v2.8h, #1 \n" - - "subs %w3, %w3, #32 \n" // 32 processed per loop. - "mul v3.8h, v0.8h, v20.8h \n" // B - "mls v3.8h, v1.8h, v21.8h \n" // G - "mls v3.8h, v2.8h, v22.8h \n" // R - "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned - "mul v4.8h, v2.8h, v20.8h \n" // R - "mls v4.8h, v1.8h, v24.8h \n" // G - "mls v4.8h, v0.8h, v23.8h \n" // B - "add v4.8h, v4.8h, v25.8h \n" // +128 -> unsigned - "uqshrn v0.8b, v3.8h, #8 \n" // 16 bit to 8 bit U - "uqshrn v1.8b, v4.8h, #8 \n" // 16 bit to 8 bit V - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U. - MEMACCESS(2) - "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v20", "v21", "v22", "v23", "v24", "v25" - ); -} +#define RGBTOUV_SETUP_REG \ + "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ + "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ + "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ + "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ + "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ + "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -#define RGBTOUV(QB, QG, QR) \ - "mul v3.8h, " #QB ",v20.8h \n" /* B */ \ - "mul v4.8h, " #QR ",v20.8h \n" /* R */ \ - "mls v3.8h, " #QG ",v21.8h \n" /* G */ \ - "mls v4.8h, " #QG ",v24.8h \n" /* G */ \ - "mls v3.8h, " #QR ",v22.8h \n" /* R */ \ - "mls v4.8h, " #QB ",v23.8h \n" /* B */ \ - "add v3.8h, v3.8h, v25.8h \n" /* +128 -> unsigned */ \ - "add v4.8h, v4.8h, v25.8h \n" /* +128 -> unsigned */ \ - "uqshrn v0.8b, v3.8h, #8 \n" /* 16 bit to 8 bit U */ \ - "uqshrn v1.8b, v4.8h, #8 \n" /* 16 bit to 8 bit V */ +// clang-format off +#define RGBTOUV(QB, QG, QR) \ + "mul v3.8h, " #QB ",v20.8h \n" /* B */ \ + "mul v4.8h, " #QR ",v20.8h \n" /* R */ \ + "mls v3.8h, " #QG ",v21.8h \n" /* G */ \ + "mls v4.8h, " #QG ",v24.8h \n" /* G */ \ + "mls v3.8h, " #QR ",v22.8h \n" /* R */ \ + "mls v4.8h, " #QB ",v23.8h \n" /* B */ \ + "add v3.8h, v3.8h, v25.8h \n" /* +128 -> unsigned */ \ + "add v4.8h, v4.8h, v25.8h \n" /* +128 -> unsigned */ \ + "uqshrn v0.8b, v3.8h, #8 \n" /* 16 bit to 8 bit U */ \ + "uqshrn v1.8b, v4.8h, #8 \n" /* 16 bit to 8 bit V */ +// clang-format on // TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr. // TODO(fbarchard): consider ptrdiff_t for all strides. -void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* src_argb_1 = src_argb + src_stride_argb; +void ARGBToUVRow_NEON(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_argb_1 = src_argb + src_stride_argb; asm volatile ( RGBTOUV_SETUP_REG "1: \n" - MEMACCESS(0) "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. - MEMACCESS(1) "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16 "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts. "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. @@ -1486,9 +1415,7 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, "subs %w4, %w4, #16 \n" // 32 processed per loop. RGBTOUV(v0.8h, v1.8h, v2.8h) - MEMACCESS(2) "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - MEMACCESS(3) "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_argb), // %0 @@ -1503,9 +1430,12 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, } // TODO(fbarchard): Subsample match C code. -void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* src_argb_1 = src_argb + src_stride_argb; +void ARGBToUVJRow_NEON(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_argb_1 = src_argb + src_stride_argb; asm volatile ( "movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2 "movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2 @@ -1514,12 +1444,10 @@ void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, "movi v24.8h, #53, lsl #0 \n" // VG coeff (-0.41869) / 2 "movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit) "1: \n" - MEMACCESS(0) "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. - MEMACCESS(1) "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16 "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts. "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. @@ -1531,9 +1459,7 @@ void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, "subs %w4, %w4, #16 \n" // 32 processed per loop. RGBTOUV(v0.8h, v1.8h, v2.8h) - MEMACCESS(2) "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - MEMACCESS(3) "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_argb), // %0 @@ -1547,18 +1473,19 @@ void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, ); } -void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* src_bgra_1 = src_bgra + src_stride_bgra; +void BGRAToUVRow_NEON(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_bgra_1 = src_bgra + src_stride_bgra; asm volatile ( RGBTOUV_SETUP_REG "1: \n" - MEMACCESS(0) "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. "uaddlp v0.8h, v3.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v3.8h, v2.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v2.8h, v1.16b \n" // R 16 bytes -> 8 shorts. - MEMACCESS(1) "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more "uadalp v0.8h, v7.16b \n" // B 16 bytes -> 8 shorts. "uadalp v3.8h, v6.16b \n" // G 16 bytes -> 8 shorts. @@ -1570,9 +1497,7 @@ void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, "subs %w4, %w4, #16 \n" // 32 processed per loop. RGBTOUV(v0.8h, v1.8h, v2.8h) - MEMACCESS(2) "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - MEMACCESS(3) "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_bgra), // %0 @@ -1586,18 +1511,19 @@ void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, ); } -void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* src_abgr_1 = src_abgr + src_stride_abgr; +void ABGRToUVRow_NEON(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr; asm volatile ( RGBTOUV_SETUP_REG "1: \n" - MEMACCESS(0) "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. "uaddlp v3.8h, v2.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v2.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v1.8h, v0.16b \n" // R 16 bytes -> 8 shorts. - MEMACCESS(1) "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more. "uadalp v3.8h, v6.16b \n" // B 16 bytes -> 8 shorts. "uadalp v2.8h, v5.16b \n" // G 16 bytes -> 8 shorts. @@ -1609,9 +1535,7 @@ void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, "subs %w4, %w4, #16 \n" // 32 processed per loop. RGBTOUV(v0.8h, v2.8h, v1.8h) - MEMACCESS(2) "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - MEMACCESS(3) "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_abgr), // %0 @@ -1625,18 +1549,19 @@ void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, ); } -void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* src_rgba_1 = src_rgba + src_stride_rgba; +void RGBAToUVRow_NEON(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_rgba_1 = src_rgba + src_stride_rgba; asm volatile ( RGBTOUV_SETUP_REG "1: \n" - MEMACCESS(0) "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. "uaddlp v0.8h, v1.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v1.8h, v2.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v2.8h, v3.16b \n" // R 16 bytes -> 8 shorts. - MEMACCESS(1) "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more. "uadalp v0.8h, v5.16b \n" // B 16 bytes -> 8 shorts. "uadalp v1.8h, v6.16b \n" // G 16 bytes -> 8 shorts. @@ -1648,9 +1573,7 @@ void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, "subs %w4, %w4, #16 \n" // 32 processed per loop. RGBTOUV(v0.8h, v1.8h, v2.8h) - MEMACCESS(2) "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - MEMACCESS(3) "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_rgba), // %0 @@ -1664,18 +1587,19 @@ void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, ); } -void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* src_rgb24_1 = src_rgb24 + src_stride_rgb24; +void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_rgb24_1 = src_rgb24 + src_stride_rgb24; asm volatile ( RGBTOUV_SETUP_REG "1: \n" - MEMACCESS(0) "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 pixels. "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. - MEMACCESS(1) "ld3 {v4.16b,v5.16b,v6.16b}, [%1], #48 \n" // load 16 more. "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts. "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. @@ -1687,9 +1611,7 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, "subs %w4, %w4, #16 \n" // 32 processed per loop. RGBTOUV(v0.8h, v1.8h, v2.8h) - MEMACCESS(2) "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - MEMACCESS(3) "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_rgb24), // %0 @@ -1703,18 +1625,19 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, ); } -void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* src_raw_1 = src_raw + src_stride_raw; +void RAWToUVRow_NEON(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_raw_1 = src_raw + src_stride_raw; asm volatile ( RGBTOUV_SETUP_REG "1: \n" - MEMACCESS(0) "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 8 RAW pixels. "uaddlp v2.8h, v2.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v0.8h, v0.16b \n" // R 16 bytes -> 8 shorts. - MEMACCESS(1) "ld3 {v4.16b,v5.16b,v6.16b}, [%1], #48 \n" // load 8 more RAW pixels "uadalp v2.8h, v6.16b \n" // B 16 bytes -> 8 shorts. "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. @@ -1726,9 +1649,7 @@ void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, "subs %w4, %w4, #16 \n" // 32 processed per loop. RGBTOUV(v2.8h, v1.8h, v0.8h) - MEMACCESS(2) "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - MEMACCESS(3) "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_raw), // %0 @@ -1743,699 +1664,656 @@ void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* src_rgb565_1 = src_rgb565 + src_stride_rgb565; - asm volatile ( - "movi v22.8h, #56, lsl #0 \n" // UB / VR coeff (0.875) / 2 - "movi v23.8h, #37, lsl #0 \n" // UG coeff (-0.5781) / 2 - "movi v24.8h, #19, lsl #0 \n" // UR coeff (-0.2969) / 2 - "movi v25.8h, #9 , lsl #0 \n" // VB coeff (-0.1406) / 2 - "movi v26.8h, #47, lsl #0 \n" // VG coeff (-0.7344) / 2 - "movi v27.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit) - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels. - RGB565TOARGB - "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. - "uaddlp v18.4h, v1.8b \n" // G 8 bytes -> 4 shorts. - "uaddlp v20.4h, v2.8b \n" // R 8 bytes -> 4 shorts. - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // next 8 RGB565 pixels. - RGB565TOARGB - "uaddlp v17.4h, v0.8b \n" // B 8 bytes -> 4 shorts. - "uaddlp v19.4h, v1.8b \n" // G 8 bytes -> 4 shorts. - "uaddlp v21.4h, v2.8b \n" // R 8 bytes -> 4 shorts. +void RGB565ToUVRow_NEON(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_rgb565_1 = src_rgb565 + src_stride_rgb565; + asm volatile( + "movi v22.8h, #56, lsl #0 \n" // UB / VR coeff (0.875) / + // 2 + "movi v23.8h, #37, lsl #0 \n" // UG coeff (-0.5781) / 2 + "movi v24.8h, #19, lsl #0 \n" // UR coeff (-0.2969) / 2 + "movi v25.8h, #9 , lsl #0 \n" // VB coeff (-0.1406) / 2 + "movi v26.8h, #47, lsl #0 \n" // VG coeff (-0.7344) / 2 + "movi v27.16b, #0x80 \n" // 128.5 0x8080 in 16bit + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels. + RGB565TOARGB + "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. + "uaddlp v18.4h, v1.8b \n" // G 8 bytes -> 4 shorts. + "uaddlp v20.4h, v2.8b \n" // R 8 bytes -> 4 shorts. + "ld1 {v0.16b}, [%0], #16 \n" // next 8 RGB565 pixels. + RGB565TOARGB + "uaddlp v17.4h, v0.8b \n" // B 8 bytes -> 4 shorts. + "uaddlp v19.4h, v1.8b \n" // G 8 bytes -> 4 shorts. + "uaddlp v21.4h, v2.8b \n" // R 8 bytes -> 4 shorts. - MEMACCESS(1) - "ld1 {v0.16b}, [%1], #16 \n" // load 8 RGB565 pixels. - RGB565TOARGB - "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. - "uadalp v18.4h, v1.8b \n" // G 8 bytes -> 4 shorts. - "uadalp v20.4h, v2.8b \n" // R 8 bytes -> 4 shorts. - MEMACCESS(1) - "ld1 {v0.16b}, [%1], #16 \n" // next 8 RGB565 pixels. - RGB565TOARGB - "uadalp v17.4h, v0.8b \n" // B 8 bytes -> 4 shorts. - "uadalp v19.4h, v1.8b \n" // G 8 bytes -> 4 shorts. - "uadalp v21.4h, v2.8b \n" // R 8 bytes -> 4 shorts. + "ld1 {v0.16b}, [%1], #16 \n" // load 8 RGB565 pixels. + RGB565TOARGB + "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. + "uadalp v18.4h, v1.8b \n" // G 8 bytes -> 4 shorts. + "uadalp v20.4h, v2.8b \n" // R 8 bytes -> 4 shorts. + "ld1 {v0.16b}, [%1], #16 \n" // next 8 RGB565 pixels. + RGB565TOARGB + "uadalp v17.4h, v0.8b \n" // B 8 bytes -> 4 shorts. + "uadalp v19.4h, v1.8b \n" // G 8 bytes -> 4 shorts. + "uadalp v21.4h, v2.8b \n" // R 8 bytes -> 4 shorts. - "ins v16.D[1], v17.D[0] \n" - "ins v18.D[1], v19.D[0] \n" - "ins v20.D[1], v21.D[0] \n" + "ins v16.D[1], v17.D[0] \n" + "ins v18.D[1], v19.D[0] \n" + "ins v20.D[1], v21.D[0] \n" - "urshr v4.8h, v16.8h, #1 \n" // 2x average - "urshr v5.8h, v18.8h, #1 \n" - "urshr v6.8h, v20.8h, #1 \n" + "urshr v4.8h, v16.8h, #1 \n" // 2x average + "urshr v5.8h, v18.8h, #1 \n" + "urshr v6.8h, v20.8h, #1 \n" - "subs %w4, %w4, #16 \n" // 16 processed per loop. - "mul v16.8h, v4.8h, v22.8h \n" // B - "mls v16.8h, v5.8h, v23.8h \n" // G - "mls v16.8h, v6.8h, v24.8h \n" // R - "add v16.8h, v16.8h, v27.8h \n" // +128 -> unsigned - "mul v17.8h, v6.8h, v22.8h \n" // R - "mls v17.8h, v5.8h, v26.8h \n" // G - "mls v17.8h, v4.8h, v25.8h \n" // B - "add v17.8h, v17.8h, v27.8h \n" // +128 -> unsigned - "uqshrn v0.8b, v16.8h, #8 \n" // 16 bit to 8 bit U - "uqshrn v1.8b, v17.8h, #8 \n" // 16 bit to 8 bit V - MEMACCESS(2) - "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - MEMACCESS(3) - "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. - "b.gt 1b \n" - : "+r"(src_rgb565), // %0 - "+r"(src_rgb565_1), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", - "v25", "v26", "v27" - ); + "subs %w4, %w4, #16 \n" // 16 processed per loop. + "mul v16.8h, v4.8h, v22.8h \n" // B + "mls v16.8h, v5.8h, v23.8h \n" // G + "mls v16.8h, v6.8h, v24.8h \n" // R + "add v16.8h, v16.8h, v27.8h \n" // +128 -> unsigned + "mul v17.8h, v6.8h, v22.8h \n" // R + "mls v17.8h, v5.8h, v26.8h \n" // G + "mls v17.8h, v4.8h, v25.8h \n" // B + "add v17.8h, v17.8h, v27.8h \n" // +128 -> unsigned + "uqshrn v0.8b, v16.8h, #8 \n" // 16 bit to 8 bit U + "uqshrn v1.8b, v17.8h, #8 \n" // 16 bit to 8 bit V + "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. + "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. + "b.gt 1b \n" + : "+r"(src_rgb565), // %0 + "+r"(src_rgb565_1), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", + "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", + "v27"); } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* src_argb1555_1 = src_argb1555 + src_stride_argb1555; - asm volatile ( - RGBTOUV_SETUP_REG - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels. - RGB555TOARGB - "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. - "uaddlp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. - "uaddlp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // next 8 ARGB1555 pixels. - RGB555TOARGB - "uaddlp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. - "uaddlp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. - "uaddlp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. +void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_argb1555_1 = src_argb1555 + src_stride_argb1555; + asm volatile( + RGBTOUV_SETUP_REG + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels. + RGB555TOARGB + "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. + "uaddlp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. + "uaddlp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. + "ld1 {v0.16b}, [%0], #16 \n" // next 8 ARGB1555 pixels. + RGB555TOARGB + "uaddlp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. + "uaddlp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. + "uaddlp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. - MEMACCESS(1) - "ld1 {v0.16b}, [%1], #16 \n" // load 8 ARGB1555 pixels. - RGB555TOARGB - "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. - "uadalp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. - "uadalp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. - MEMACCESS(1) - "ld1 {v0.16b}, [%1], #16 \n" // next 8 ARGB1555 pixels. - RGB555TOARGB - "uadalp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. - "uadalp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. - "uadalp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. + "ld1 {v0.16b}, [%1], #16 \n" // load 8 ARGB1555 pixels. + RGB555TOARGB + "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. + "uadalp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. + "uadalp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. + "ld1 {v0.16b}, [%1], #16 \n" // next 8 ARGB1555 pixels. + RGB555TOARGB + "uadalp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. + "uadalp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. + "uadalp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. - "ins v16.D[1], v26.D[0] \n" - "ins v17.D[1], v27.D[0] \n" - "ins v18.D[1], v28.D[0] \n" + "ins v16.D[1], v26.D[0] \n" + "ins v17.D[1], v27.D[0] \n" + "ins v18.D[1], v28.D[0] \n" - "urshr v4.8h, v16.8h, #1 \n" // 2x average - "urshr v5.8h, v17.8h, #1 \n" - "urshr v6.8h, v18.8h, #1 \n" + "urshr v4.8h, v16.8h, #1 \n" // 2x average + "urshr v5.8h, v17.8h, #1 \n" + "urshr v6.8h, v18.8h, #1 \n" - "subs %w4, %w4, #16 \n" // 16 processed per loop. - "mul v2.8h, v4.8h, v20.8h \n" // B - "mls v2.8h, v5.8h, v21.8h \n" // G - "mls v2.8h, v6.8h, v22.8h \n" // R - "add v2.8h, v2.8h, v25.8h \n" // +128 -> unsigned - "mul v3.8h, v6.8h, v20.8h \n" // R - "mls v3.8h, v5.8h, v24.8h \n" // G - "mls v3.8h, v4.8h, v23.8h \n" // B - "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned - "uqshrn v0.8b, v2.8h, #8 \n" // 16 bit to 8 bit U - "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V - MEMACCESS(2) - "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - MEMACCESS(3) - "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. - "b.gt 1b \n" - : "+r"(src_argb1555), // %0 - "+r"(src_argb1555_1), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", - "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", - "v26", "v27", "v28" - ); + "subs %w4, %w4, #16 \n" // 16 processed per loop. + "mul v2.8h, v4.8h, v20.8h \n" // B + "mls v2.8h, v5.8h, v21.8h \n" // G + "mls v2.8h, v6.8h, v22.8h \n" // R + "add v2.8h, v2.8h, v25.8h \n" // +128 -> unsigned + "mul v3.8h, v6.8h, v20.8h \n" // R + "mls v3.8h, v5.8h, v24.8h \n" // G + "mls v3.8h, v4.8h, v23.8h \n" // B + "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned + "uqshrn v0.8b, v2.8h, #8 \n" // 16 bit to 8 bit U + "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V + "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. + "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. + "b.gt 1b \n" + : "+r"(src_argb1555), // %0 + "+r"(src_argb1555_1), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", + "v28"); } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* src_argb4444_1 = src_argb4444 + src_stride_argb4444; - asm volatile ( - RGBTOUV_SETUP_REG - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels. - ARGB4444TOARGB - "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. - "uaddlp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. - "uaddlp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // next 8 ARGB4444 pixels. - ARGB4444TOARGB - "uaddlp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. - "uaddlp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. - "uaddlp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. +void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_argb4444_1 = src_argb4444 + src_stride_argb4444; + asm volatile( + RGBTOUV_SETUP_REG + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels. + ARGB4444TOARGB + "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. + "uaddlp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. + "uaddlp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. + "ld1 {v0.16b}, [%0], #16 \n" // next 8 ARGB4444 pixels. + ARGB4444TOARGB + "uaddlp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. + "uaddlp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. + "uaddlp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. - MEMACCESS(1) - "ld1 {v0.16b}, [%1], #16 \n" // load 8 ARGB4444 pixels. - ARGB4444TOARGB - "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. - "uadalp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. - "uadalp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. - MEMACCESS(1) - "ld1 {v0.16b}, [%1], #16 \n" // next 8 ARGB4444 pixels. - ARGB4444TOARGB - "uadalp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. - "uadalp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. - "uadalp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. + "ld1 {v0.16b}, [%1], #16 \n" // load 8 ARGB4444 pixels. + ARGB4444TOARGB + "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. + "uadalp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. + "uadalp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. + "ld1 {v0.16b}, [%1], #16 \n" // next 8 ARGB4444 pixels. + ARGB4444TOARGB + "uadalp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. + "uadalp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. + "uadalp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. - "ins v16.D[1], v26.D[0] \n" - "ins v17.D[1], v27.D[0] \n" - "ins v18.D[1], v28.D[0] \n" + "ins v16.D[1], v26.D[0] \n" + "ins v17.D[1], v27.D[0] \n" + "ins v18.D[1], v28.D[0] \n" - "urshr v4.8h, v16.8h, #1 \n" // 2x average - "urshr v5.8h, v17.8h, #1 \n" - "urshr v6.8h, v18.8h, #1 \n" + "urshr v4.8h, v16.8h, #1 \n" // 2x average + "urshr v5.8h, v17.8h, #1 \n" + "urshr v6.8h, v18.8h, #1 \n" - "subs %w4, %w4, #16 \n" // 16 processed per loop. - "mul v2.8h, v4.8h, v20.8h \n" // B - "mls v2.8h, v5.8h, v21.8h \n" // G - "mls v2.8h, v6.8h, v22.8h \n" // R - "add v2.8h, v2.8h, v25.8h \n" // +128 -> unsigned - "mul v3.8h, v6.8h, v20.8h \n" // R - "mls v3.8h, v5.8h, v24.8h \n" // G - "mls v3.8h, v4.8h, v23.8h \n" // B - "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned - "uqshrn v0.8b, v2.8h, #8 \n" // 16 bit to 8 bit U - "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V - MEMACCESS(2) - "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. - MEMACCESS(3) - "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. - "b.gt 1b \n" - : "+r"(src_argb4444), // %0 - "+r"(src_argb4444_1), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", - "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", - "v26", "v27", "v28" + "subs %w4, %w4, #16 \n" // 16 processed per loop. + "mul v2.8h, v4.8h, v20.8h \n" // B + "mls v2.8h, v5.8h, v21.8h \n" // G + "mls v2.8h, v6.8h, v22.8h \n" // R + "add v2.8h, v2.8h, v25.8h \n" // +128 -> unsigned + "mul v3.8h, v6.8h, v20.8h \n" // R + "mls v3.8h, v5.8h, v24.8h \n" // G + "mls v3.8h, v4.8h, v23.8h \n" // B + "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned + "uqshrn v0.8b, v2.8h, #8 \n" // 16 bit to 8 bit U + "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V + "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. + "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. + "b.gt 1b \n" + : "+r"(src_argb4444), // %0 + "+r"(src_argb4444_1), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", + "v28" - ); + ); } -void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) { - asm volatile ( - "movi v24.8b, #13 \n" // B * 0.1016 coefficient - "movi v25.8b, #65 \n" // G * 0.5078 coefficient - "movi v26.8b, #33 \n" // R * 0.2578 coefficient - "movi v27.8b, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - RGB565TOARGB - "umull v3.8h, v0.8b, v24.8b \n" // B - "umlal v3.8h, v1.8b, v25.8b \n" // G - "umlal v3.8h, v2.8b, v26.8b \n" // R - "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y - "uqadd v0.8b, v0.8b, v27.8b \n" - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. - "b.gt 1b \n" - : "+r"(src_rgb565), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6", - "v24", "v25", "v26", "v27" - ); +void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { + asm volatile( + "movi v24.8b, #13 \n" // B * 0.1016 coefficient + "movi v25.8b, #65 \n" // G * 0.5078 coefficient + "movi v26.8b, #33 \n" // R * 0.2578 coefficient + "movi v27.8b, #16 \n" // Add 16 constant + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels. + "subs %w2, %w2, #8 \n" // 8 processed per loop. + RGB565TOARGB + "umull v3.8h, v0.8b, v24.8b \n" // B + "umlal v3.8h, v1.8b, v25.8b \n" // G + "umlal v3.8h, v2.8b, v26.8b \n" // R + "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y + "uqadd v0.8b, v0.8b, v27.8b \n" + "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. + "b.gt 1b \n" + : "+r"(src_rgb565), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6", "v24", "v25", "v26", + "v27"); } -void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) { - asm volatile ( - "movi v4.8b, #13 \n" // B * 0.1016 coefficient - "movi v5.8b, #65 \n" // G * 0.5078 coefficient - "movi v6.8b, #33 \n" // R * 0.2578 coefficient - "movi v7.8b, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - ARGB1555TOARGB - "umull v3.8h, v0.8b, v4.8b \n" // B - "umlal v3.8h, v1.8b, v5.8b \n" // G - "umlal v3.8h, v2.8b, v6.8b \n" // R - "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y - "uqadd v0.8b, v0.8b, v7.8b \n" - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. - "b.gt 1b \n" - : "+r"(src_argb1555), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" - ); +void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, + uint8_t* dst_y, + int width) { + asm volatile( + "movi v4.8b, #13 \n" // B * 0.1016 coefficient + "movi v5.8b, #65 \n" // G * 0.5078 coefficient + "movi v6.8b, #33 \n" // R * 0.2578 coefficient + "movi v7.8b, #16 \n" // Add 16 constant + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels. + "subs %w2, %w2, #8 \n" // 8 processed per loop. + ARGB1555TOARGB + "umull v3.8h, v0.8b, v4.8b \n" // B + "umlal v3.8h, v1.8b, v5.8b \n" // G + "umlal v3.8h, v2.8b, v6.8b \n" // R + "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y + "uqadd v0.8b, v0.8b, v7.8b \n" + "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. + "b.gt 1b \n" + : "+r"(src_argb1555), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } -void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) { - asm volatile ( - "movi v24.8b, #13 \n" // B * 0.1016 coefficient - "movi v25.8b, #65 \n" // G * 0.5078 coefficient - "movi v26.8b, #33 \n" // R * 0.2578 coefficient - "movi v27.8b, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - ARGB4444TOARGB - "umull v3.8h, v0.8b, v24.8b \n" // B - "umlal v3.8h, v1.8b, v25.8b \n" // G - "umlal v3.8h, v2.8b, v26.8b \n" // R - "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y - "uqadd v0.8b, v0.8b, v27.8b \n" - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. - "b.gt 1b \n" - : "+r"(src_argb4444), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v24", "v25", "v26", "v27" - ); +void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, + uint8_t* dst_y, + int width) { + asm volatile( + "movi v24.8b, #13 \n" // B * 0.1016 coefficient + "movi v25.8b, #65 \n" // G * 0.5078 coefficient + "movi v26.8b, #33 \n" // R * 0.2578 coefficient + "movi v27.8b, #16 \n" // Add 16 constant + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels. + "subs %w2, %w2, #8 \n" // 8 processed per loop. + ARGB4444TOARGB + "umull v3.8h, v0.8b, v24.8b \n" // B + "umlal v3.8h, v1.8b, v25.8b \n" // G + "umlal v3.8h, v2.8b, v26.8b \n" // R + "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y + "uqadd v0.8b, v0.8b, v27.8b \n" + "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. + "b.gt 1b \n" + : "+r"(src_argb4444), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v24", "v25", "v26", "v27"); } -void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) { - asm volatile ( - "movi v4.8b, #33 \n" // R * 0.2578 coefficient - "movi v5.8b, #65 \n" // G * 0.5078 coefficient - "movi v6.8b, #13 \n" // B * 0.1016 coefficient - "movi v7.8b, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "umull v16.8h, v1.8b, v4.8b \n" // R - "umlal v16.8h, v2.8b, v5.8b \n" // G - "umlal v16.8h, v3.8b, v6.8b \n" // B - "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y - "uqadd v0.8b, v0.8b, v7.8b \n" - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. - "b.gt 1b \n" - : "+r"(src_bgra), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" - ); +void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) { + asm volatile( + "movi v4.8b, #33 \n" // R * 0.2578 coefficient + "movi v5.8b, #65 \n" // G * 0.5078 coefficient + "movi v6.8b, #13 \n" // B * 0.1016 coefficient + "movi v7.8b, #16 \n" // Add 16 constant + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels. + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "umull v16.8h, v1.8b, v4.8b \n" // R + "umlal v16.8h, v2.8b, v5.8b \n" // G + "umlal v16.8h, v3.8b, v6.8b \n" // B + "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y + "uqadd v0.8b, v0.8b, v7.8b \n" + "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. + "b.gt 1b \n" + : "+r"(src_bgra), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } -void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) { - asm volatile ( - "movi v4.8b, #33 \n" // R * 0.2578 coefficient - "movi v5.8b, #65 \n" // G * 0.5078 coefficient - "movi v6.8b, #13 \n" // B * 0.1016 coefficient - "movi v7.8b, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "umull v16.8h, v0.8b, v4.8b \n" // R - "umlal v16.8h, v1.8b, v5.8b \n" // G - "umlal v16.8h, v2.8b, v6.8b \n" // B - "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y - "uqadd v0.8b, v0.8b, v7.8b \n" - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. - "b.gt 1b \n" - : "+r"(src_abgr), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" - ); +void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) { + asm volatile( + "movi v4.8b, #33 \n" // R * 0.2578 coefficient + "movi v5.8b, #65 \n" // G * 0.5078 coefficient + "movi v6.8b, #13 \n" // B * 0.1016 coefficient + "movi v7.8b, #16 \n" // Add 16 constant + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels. + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "umull v16.8h, v0.8b, v4.8b \n" // R + "umlal v16.8h, v1.8b, v5.8b \n" // G + "umlal v16.8h, v2.8b, v6.8b \n" // B + "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y + "uqadd v0.8b, v0.8b, v7.8b \n" + "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. + "b.gt 1b \n" + : "+r"(src_abgr), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } -void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) { - asm volatile ( - "movi v4.8b, #13 \n" // B * 0.1016 coefficient - "movi v5.8b, #65 \n" // G * 0.5078 coefficient - "movi v6.8b, #33 \n" // R * 0.2578 coefficient - "movi v7.8b, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "umull v16.8h, v1.8b, v4.8b \n" // B - "umlal v16.8h, v2.8b, v5.8b \n" // G - "umlal v16.8h, v3.8b, v6.8b \n" // R - "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y - "uqadd v0.8b, v0.8b, v7.8b \n" - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. - "b.gt 1b \n" - : "+r"(src_rgba), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" - ); +void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) { + asm volatile( + "movi v4.8b, #13 \n" // B * 0.1016 coefficient + "movi v5.8b, #65 \n" // G * 0.5078 coefficient + "movi v6.8b, #33 \n" // R * 0.2578 coefficient + "movi v7.8b, #16 \n" // Add 16 constant + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels. + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "umull v16.8h, v1.8b, v4.8b \n" // B + "umlal v16.8h, v2.8b, v5.8b \n" // G + "umlal v16.8h, v3.8b, v6.8b \n" // R + "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y + "uqadd v0.8b, v0.8b, v7.8b \n" + "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. + "b.gt 1b \n" + : "+r"(src_rgba), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } -void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) { - asm volatile ( - "movi v4.8b, #13 \n" // B * 0.1016 coefficient - "movi v5.8b, #65 \n" // G * 0.5078 coefficient - "movi v6.8b, #33 \n" // R * 0.2578 coefficient - "movi v7.8b, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "umull v16.8h, v0.8b, v4.8b \n" // B - "umlal v16.8h, v1.8b, v5.8b \n" // G - "umlal v16.8h, v2.8b, v6.8b \n" // R - "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y - "uqadd v0.8b, v0.8b, v7.8b \n" - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. - "b.gt 1b \n" - : "+r"(src_rgb24), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" - ); +void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width) { + asm volatile( + "movi v4.8b, #13 \n" // B * 0.1016 coefficient + "movi v5.8b, #65 \n" // G * 0.5078 coefficient + "movi v6.8b, #33 \n" // R * 0.2578 coefficient + "movi v7.8b, #16 \n" // Add 16 constant + "1: \n" + "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels. + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "umull v16.8h, v0.8b, v4.8b \n" // B + "umlal v16.8h, v1.8b, v5.8b \n" // G + "umlal v16.8h, v2.8b, v6.8b \n" // R + "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y + "uqadd v0.8b, v0.8b, v7.8b \n" + "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. + "b.gt 1b \n" + : "+r"(src_rgb24), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } -void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) { - asm volatile ( - "movi v4.8b, #33 \n" // R * 0.2578 coefficient - "movi v5.8b, #65 \n" // G * 0.5078 coefficient - "movi v6.8b, #13 \n" // B * 0.1016 coefficient - "movi v7.8b, #16 \n" // Add 16 constant - "1: \n" - MEMACCESS(0) - "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "umull v16.8h, v0.8b, v4.8b \n" // B - "umlal v16.8h, v1.8b, v5.8b \n" // G - "umlal v16.8h, v2.8b, v6.8b \n" // R - "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y - "uqadd v0.8b, v0.8b, v7.8b \n" - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. - "b.gt 1b \n" - : "+r"(src_raw), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" - ); +void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) { + asm volatile( + "movi v4.8b, #33 \n" // R * 0.2578 coefficient + "movi v5.8b, #65 \n" // G * 0.5078 coefficient + "movi v6.8b, #13 \n" // B * 0.1016 coefficient + "movi v7.8b, #16 \n" // Add 16 constant + "1: \n" + "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels. + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "umull v16.8h, v0.8b, v4.8b \n" // B + "umlal v16.8h, v1.8b, v5.8b \n" // G + "umlal v16.8h, v2.8b, v6.8b \n" // R + "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y + "uqadd v0.8b, v0.8b, v7.8b \n" + "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. + "b.gt 1b \n" + : "+r"(src_raw), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } // Bilinear filter 16x2 -> 16x1 -void InterpolateRow_NEON(uint8* dst_ptr, - const uint8* src_ptr, ptrdiff_t src_stride, - int dst_width, int source_y_fraction) { +void InterpolateRow_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, + int source_y_fraction) { int y1_fraction = source_y_fraction; int y0_fraction = 256 - y1_fraction; - const uint8* src_ptr1 = src_ptr + src_stride; - asm volatile ( - "cmp %w4, #0 \n" - "b.eq 100f \n" - "cmp %w4, #128 \n" - "b.eq 50f \n" + const uint8_t* src_ptr1 = src_ptr + src_stride; + asm volatile( + "cmp %w4, #0 \n" + "b.eq 100f \n" + "cmp %w4, #128 \n" + "b.eq 50f \n" - "dup v5.16b, %w4 \n" - "dup v4.16b, %w5 \n" - // General purpose row blend. - "1: \n" - MEMACCESS(1) - "ld1 {v0.16b}, [%1], #16 \n" - MEMACCESS(2) - "ld1 {v1.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "umull v2.8h, v0.8b, v4.8b \n" - "umull2 v3.8h, v0.16b, v4.16b \n" - "umlal v2.8h, v1.8b, v5.8b \n" - "umlal2 v3.8h, v1.16b, v5.16b \n" - "rshrn v0.8b, v2.8h, #8 \n" - "rshrn2 v0.16b, v3.8h, #8 \n" - MEMACCESS(0) - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 1b \n" - "b 99f \n" + "dup v5.16b, %w4 \n" + "dup v4.16b, %w5 \n" + // General purpose row blend. + "1: \n" + "ld1 {v0.16b}, [%1], #16 \n" + "ld1 {v1.16b}, [%2], #16 \n" + "subs %w3, %w3, #16 \n" + "umull v2.8h, v0.8b, v4.8b \n" + "umull2 v3.8h, v0.16b, v4.16b \n" + "umlal v2.8h, v1.8b, v5.8b \n" + "umlal2 v3.8h, v1.16b, v5.16b \n" + "rshrn v0.8b, v2.8h, #8 \n" + "rshrn2 v0.16b, v3.8h, #8 \n" + "st1 {v0.16b}, [%0], #16 \n" + "b.gt 1b \n" + "b 99f \n" - // Blend 50 / 50. - "50: \n" - MEMACCESS(1) - "ld1 {v0.16b}, [%1], #16 \n" - MEMACCESS(2) - "ld1 {v1.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "urhadd v0.16b, v0.16b, v1.16b \n" - MEMACCESS(0) - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 50b \n" - "b 99f \n" + // Blend 50 / 50. + "50: \n" + "ld1 {v0.16b}, [%1], #16 \n" + "ld1 {v1.16b}, [%2], #16 \n" + "subs %w3, %w3, #16 \n" + "urhadd v0.16b, v0.16b, v1.16b \n" + "st1 {v0.16b}, [%0], #16 \n" + "b.gt 50b \n" + "b 99f \n" - // Blend 100 / 0 - Copy row unchanged. - "100: \n" - MEMACCESS(1) - "ld1 {v0.16b}, [%1], #16 \n" - "subs %w3, %w3, #16 \n" - MEMACCESS(0) - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 100b \n" + // Blend 100 / 0 - Copy row unchanged. + "100: \n" + "ld1 {v0.16b}, [%1], #16 \n" + "subs %w3, %w3, #16 \n" + "st1 {v0.16b}, [%0], #16 \n" + "b.gt 100b \n" - "99: \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(src_ptr1), // %2 - "+r"(dst_width), // %3 - "+r"(y1_fraction), // %4 - "+r"(y0_fraction) // %5 - : - : "cc", "memory", "v0", "v1", "v3", "v4", "v5" - ); + "99: \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(src_ptr1), // %2 + "+r"(dst_width), // %3 + "+r"(y1_fraction), // %4 + "+r"(y0_fraction) // %5 + : + : "cc", "memory", "v0", "v1", "v3", "v4", "v5"); } // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr -void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - "subs %w3, %w3, #8 \n" - "b.lt 89f \n" - // Blend 8 pixels. - "8: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB0 pixels - MEMACCESS(1) - "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 ARGB1 pixels - "subs %w3, %w3, #8 \n" // 8 processed per loop. - "umull v16.8h, v4.8b, v3.8b \n" // db * a - "umull v17.8h, v5.8b, v3.8b \n" // dg * a - "umull v18.8h, v6.8b, v3.8b \n" // dr * a - "uqrshrn v16.8b, v16.8h, #8 \n" // db >>= 8 - "uqrshrn v17.8b, v17.8h, #8 \n" // dg >>= 8 - "uqrshrn v18.8b, v18.8h, #8 \n" // dr >>= 8 - "uqsub v4.8b, v4.8b, v16.8b \n" // db - (db * a / 256) - "uqsub v5.8b, v5.8b, v17.8b \n" // dg - (dg * a / 256) - "uqsub v6.8b, v6.8b, v18.8b \n" // dr - (dr * a / 256) - "uqadd v0.8b, v0.8b, v4.8b \n" // + sb - "uqadd v1.8b, v1.8b, v5.8b \n" // + sg - "uqadd v2.8b, v2.8b, v6.8b \n" // + sr - "movi v3.8b, #255 \n" // a = 255 - MEMACCESS(2) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels - "b.ge 8b \n" +void ARGBBlendRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( + "subs %w3, %w3, #8 \n" + "b.lt 89f \n" + // Blend 8 pixels. + "8: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB0 + // pixels + "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 ARGB1 + // pixels + "subs %w3, %w3, #8 \n" // 8 processed per loop. + "umull v16.8h, v4.8b, v3.8b \n" // db * a + "umull v17.8h, v5.8b, v3.8b \n" // dg * a + "umull v18.8h, v6.8b, v3.8b \n" // dr * a + "uqrshrn v16.8b, v16.8h, #8 \n" // db >>= 8 + "uqrshrn v17.8b, v17.8h, #8 \n" // dg >>= 8 + "uqrshrn v18.8b, v18.8h, #8 \n" // dr >>= 8 + "uqsub v4.8b, v4.8b, v16.8b \n" // db - (db * a / 256) + "uqsub v5.8b, v5.8b, v17.8b \n" // dg - (dg * a / 256) + "uqsub v6.8b, v6.8b, v18.8b \n" // dr - (dr * a / 256) + "uqadd v0.8b, v0.8b, v4.8b \n" // + sb + "uqadd v1.8b, v1.8b, v5.8b \n" // + sg + "uqadd v2.8b, v2.8b, v6.8b \n" // + sr + "movi v3.8b, #255 \n" // a = 255 + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB + // pixels + "b.ge 8b \n" - "89: \n" - "adds %w3, %w3, #8-1 \n" - "b.lt 99f \n" + "89: \n" + "adds %w3, %w3, #8-1 \n" + "b.lt 99f \n" - // Blend 1 pixels. - "1: \n" - MEMACCESS(0) - "ld4 {v0.b,v1.b,v2.b,v3.b}[0], [%0], #4 \n" // load 1 pixel ARGB0. - MEMACCESS(1) - "ld4 {v4.b,v5.b,v6.b,v7.b}[0], [%1], #4 \n" // load 1 pixel ARGB1. - "subs %w3, %w3, #1 \n" // 1 processed per loop. - "umull v16.8h, v4.8b, v3.8b \n" // db * a - "umull v17.8h, v5.8b, v3.8b \n" // dg * a - "umull v18.8h, v6.8b, v3.8b \n" // dr * a - "uqrshrn v16.8b, v16.8h, #8 \n" // db >>= 8 - "uqrshrn v17.8b, v17.8h, #8 \n" // dg >>= 8 - "uqrshrn v18.8b, v18.8h, #8 \n" // dr >>= 8 - "uqsub v4.8b, v4.8b, v16.8b \n" // db - (db * a / 256) - "uqsub v5.8b, v5.8b, v17.8b \n" // dg - (dg * a / 256) - "uqsub v6.8b, v6.8b, v18.8b \n" // dr - (dr * a / 256) - "uqadd v0.8b, v0.8b, v4.8b \n" // + sb - "uqadd v1.8b, v1.8b, v5.8b \n" // + sg - "uqadd v2.8b, v2.8b, v6.8b \n" // + sr - "movi v3.8b, #255 \n" // a = 255 - MEMACCESS(2) - "st4 {v0.b,v1.b,v2.b,v3.b}[0], [%2], #4 \n" // store 1 pixel. - "b.ge 1b \n" + // Blend 1 pixels. + "1: \n" + "ld4 {v0.b,v1.b,v2.b,v3.b}[0], [%0], #4 \n" // load 1 pixel ARGB0. + "ld4 {v4.b,v5.b,v6.b,v7.b}[0], [%1], #4 \n" // load 1 pixel ARGB1. + "subs %w3, %w3, #1 \n" // 1 processed per loop. + "umull v16.8h, v4.8b, v3.8b \n" // db * a + "umull v17.8h, v5.8b, v3.8b \n" // dg * a + "umull v18.8h, v6.8b, v3.8b \n" // dr * a + "uqrshrn v16.8b, v16.8h, #8 \n" // db >>= 8 + "uqrshrn v17.8b, v17.8h, #8 \n" // dg >>= 8 + "uqrshrn v18.8b, v18.8h, #8 \n" // dr >>= 8 + "uqsub v4.8b, v4.8b, v16.8b \n" // db - (db * a / 256) + "uqsub v5.8b, v5.8b, v17.8b \n" // dg - (dg * a / 256) + "uqsub v6.8b, v6.8b, v18.8b \n" // dr - (dr * a / 256) + "uqadd v0.8b, v0.8b, v4.8b \n" // + sb + "uqadd v1.8b, v1.8b, v5.8b \n" // + sg + "uqadd v2.8b, v2.8b, v6.8b \n" // + sr + "movi v3.8b, #255 \n" // a = 255 + "st4 {v0.b,v1.b,v2.b,v3.b}[0], [%2], #4 \n" // store 1 pixel. + "b.ge 1b \n" - "99: \n" + "99: \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v16", "v17", "v18" - ); + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", + "v17", "v18"); } // Attenuate 8 pixels at a time. -void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - // Attenuate 8 pixels. - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "umull v4.8h, v0.8b, v3.8b \n" // b * a - "umull v5.8h, v1.8b, v3.8b \n" // g * a - "umull v6.8h, v2.8b, v3.8b \n" // r * a - "uqrshrn v0.8b, v4.8h, #8 \n" // b >>= 8 - "uqrshrn v1.8b, v5.8h, #8 \n" // g >>= 8 - "uqrshrn v2.8b, v6.8h, #8 \n" // r >>= 8 - MEMACCESS(1) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6" - ); +void ARGBAttenuateRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { + asm volatile( + // Attenuate 8 pixels. + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "umull v4.8h, v0.8b, v3.8b \n" // b * a + "umull v5.8h, v1.8b, v3.8b \n" // g * a + "umull v6.8h, v2.8b, v3.8b \n" // r * a + "uqrshrn v0.8b, v4.8h, #8 \n" // b >>= 8 + "uqrshrn v1.8b, v5.8h, #8 \n" // g >>= 8 + "uqrshrn v2.8b, v6.8h, #8 \n" // r >>= 8 + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB + // pixels + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"); } // Quantize 8 ARGB pixels (32 bytes). // dst = (dst * scale >> 16) * interval_size + interval_offset; -void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width) { - asm volatile ( - "dup v4.8h, %w2 \n" - "ushr v4.8h, v4.8h, #1 \n" // scale >>= 1 - "dup v5.8h, %w3 \n" // interval multiply. - "dup v6.8h, %w4 \n" // interval add +void ARGBQuantizeRow_NEON(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width) { + asm volatile( + "dup v4.8h, %w2 \n" + "ushr v4.8h, v4.8h, #1 \n" // scale >>= 1 + "dup v5.8h, %w3 \n" // interval multiply. + "dup v6.8h, %w4 \n" // interval add - // 8 pixel loop. - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 pixels of ARGB. - "subs %w1, %w1, #8 \n" // 8 processed per loop. - "uxtl v0.8h, v0.8b \n" // b (0 .. 255) - "uxtl v1.8h, v1.8b \n" - "uxtl v2.8h, v2.8b \n" - "sqdmulh v0.8h, v0.8h, v4.8h \n" // b * scale - "sqdmulh v1.8h, v1.8h, v4.8h \n" // g - "sqdmulh v2.8h, v2.8h, v4.8h \n" // r - "mul v0.8h, v0.8h, v5.8h \n" // b * interval_size - "mul v1.8h, v1.8h, v5.8h \n" // g - "mul v2.8h, v2.8h, v5.8h \n" // r - "add v0.8h, v0.8h, v6.8h \n" // b + interval_offset - "add v1.8h, v1.8h, v6.8h \n" // g - "add v2.8h, v2.8h, v6.8h \n" // r - "uqxtn v0.8b, v0.8h \n" - "uqxtn v1.8b, v1.8h \n" - "uqxtn v2.8b, v2.8h \n" - MEMACCESS(0) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 ARGB pixels - "b.gt 1b \n" - : "+r"(dst_argb), // %0 - "+r"(width) // %1 - : "r"(scale), // %2 - "r"(interval_size), // %3 - "r"(interval_offset) // %4 - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6" - ); + // 8 pixel loop. + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB. + "subs %w1, %w1, #8 \n" // 8 processed per loop. + "uxtl v0.8h, v0.8b \n" // b (0 .. 255) + "uxtl v1.8h, v1.8b \n" + "uxtl v2.8h, v2.8b \n" + "sqdmulh v0.8h, v0.8h, v4.8h \n" // b * scale + "sqdmulh v1.8h, v1.8h, v4.8h \n" // g + "sqdmulh v2.8h, v2.8h, v4.8h \n" // r + "mul v0.8h, v0.8h, v5.8h \n" // b * interval_size + "mul v1.8h, v1.8h, v5.8h \n" // g + "mul v2.8h, v2.8h, v5.8h \n" // r + "add v0.8h, v0.8h, v6.8h \n" // b + interval_offset + "add v1.8h, v1.8h, v6.8h \n" // g + "add v2.8h, v2.8h, v6.8h \n" // r + "uqxtn v0.8b, v0.8h \n" + "uqxtn v1.8b, v1.8h \n" + "uqxtn v2.8b, v2.8h \n" + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 ARGB + "b.gt 1b \n" + : "+r"(dst_argb), // %0 + "+r"(width) // %1 + : "r"(scale), // %2 + "r"(interval_size), // %3 + "r"(interval_offset) // %4 + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"); } // Shade 8 pixels at a time by specified value. // NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8. // Rounding in vqrdmulh does +1 to high if high bit of low s16 is set. -void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value) { - asm volatile ( - "dup v0.4s, %w3 \n" // duplicate scale value. - "zip1 v0.8b, v0.8b, v0.8b \n" // v0.8b aarrggbb. - "ushr v0.8h, v0.8h, #1 \n" // scale / 2. +void ARGBShadeRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value) { + asm volatile( + "dup v0.4s, %w3 \n" // duplicate scale value. + "zip1 v0.8b, v0.8b, v0.8b \n" // v0.8b aarrggbb. + "ushr v0.8h, v0.8h, #1 \n" // scale / 2. - // 8 pixel loop. - "1: \n" - MEMACCESS(0) - "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%0], #32 \n" // load 8 ARGB pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "uxtl v4.8h, v4.8b \n" // b (0 .. 255) - "uxtl v5.8h, v5.8b \n" - "uxtl v6.8h, v6.8b \n" - "uxtl v7.8h, v7.8b \n" - "sqrdmulh v4.8h, v4.8h, v0.h[0] \n" // b * scale * 2 - "sqrdmulh v5.8h, v5.8h, v0.h[1] \n" // g - "sqrdmulh v6.8h, v6.8h, v0.h[2] \n" // r - "sqrdmulh v7.8h, v7.8h, v0.h[3] \n" // a - "uqxtn v4.8b, v4.8h \n" - "uqxtn v5.8b, v5.8h \n" - "uqxtn v6.8b, v6.8h \n" - "uqxtn v7.8b, v7.8h \n" - MEMACCESS(1) - "st4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // store 8 ARGB pixels - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(value) // %3 - : "cc", "memory", "v0", "v4", "v5", "v6", "v7" - ); + // 8 pixel loop. + "1: \n" + "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%0], #32 \n" // load 8 ARGB + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "uxtl v4.8h, v4.8b \n" // b (0 .. 255) + "uxtl v5.8h, v5.8b \n" + "uxtl v6.8h, v6.8b \n" + "uxtl v7.8h, v7.8b \n" + "sqrdmulh v4.8h, v4.8h, v0.h[0] \n" // b * scale * 2 + "sqrdmulh v5.8h, v5.8h, v0.h[1] \n" // g + "sqrdmulh v6.8h, v6.8h, v0.h[2] \n" // r + "sqrdmulh v7.8h, v7.8h, v0.h[3] \n" // a + "uqxtn v4.8b, v4.8h \n" + "uqxtn v5.8b, v5.8h \n" + "uqxtn v6.8b, v6.8h \n" + "uqxtn v7.8b, v7.8h \n" + "st4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // store 8 ARGB + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(value) // %3 + : "cc", "memory", "v0", "v4", "v5", "v6", "v7"); } // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels // Similar to ARGBToYJ but stores ARGB. // C code is (15 * b + 75 * g + 38 * r + 64) >> 7; -void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - "movi v24.8b, #15 \n" // B * 0.11400 coefficient - "movi v25.8b, #75 \n" // G * 0.58700 coefficient - "movi v26.8b, #38 \n" // R * 0.29900 coefficient - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "umull v4.8h, v0.8b, v24.8b \n" // B - "umlal v4.8h, v1.8b, v25.8b \n" // G - "umlal v4.8h, v2.8b, v26.8b \n" // R - "sqrshrun v0.8b, v4.8h, #7 \n" // 15 bit to 8 bit B - "orr v1.8b, v0.8b, v0.8b \n" // G - "orr v2.8b, v0.8b, v0.8b \n" // R - MEMACCESS(1) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 pixels. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26" - ); +void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) { + asm volatile( + "movi v24.8b, #15 \n" // B * 0.11400 coefficient + "movi v25.8b, #75 \n" // G * 0.58700 coefficient + "movi v26.8b, #38 \n" // R * 0.29900 coefficient + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "umull v4.8h, v0.8b, v24.8b \n" // B + "umlal v4.8h, v1.8b, v25.8b \n" // G + "umlal v4.8h, v2.8b, v26.8b \n" // R + "sqrshrun v0.8b, v4.8h, #7 \n" // 15 bit to 8 bit B + "orr v1.8b, v0.8b, v0.8b \n" // G + "orr v2.8b, v0.8b, v0.8b \n" // R + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 pixels. + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26"); } // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. @@ -2443,194 +2321,180 @@ void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { // g = (r * 45 + g * 88 + b * 22) >> 7 // r = (r * 50 + g * 98 + b * 24) >> 7 -void ARGBSepiaRow_NEON(uint8* dst_argb, int width) { - asm volatile ( - "movi v20.8b, #17 \n" // BB coefficient - "movi v21.8b, #68 \n" // BG coefficient - "movi v22.8b, #35 \n" // BR coefficient - "movi v24.8b, #22 \n" // GB coefficient - "movi v25.8b, #88 \n" // GG coefficient - "movi v26.8b, #45 \n" // GR coefficient - "movi v28.8b, #24 \n" // BB coefficient - "movi v29.8b, #98 \n" // BG coefficient - "movi v30.8b, #50 \n" // BR coefficient - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB pixels. - "subs %w1, %w1, #8 \n" // 8 processed per loop. - "umull v4.8h, v0.8b, v20.8b \n" // B to Sepia B - "umlal v4.8h, v1.8b, v21.8b \n" // G - "umlal v4.8h, v2.8b, v22.8b \n" // R - "umull v5.8h, v0.8b, v24.8b \n" // B to Sepia G - "umlal v5.8h, v1.8b, v25.8b \n" // G - "umlal v5.8h, v2.8b, v26.8b \n" // R - "umull v6.8h, v0.8b, v28.8b \n" // B to Sepia R - "umlal v6.8h, v1.8b, v29.8b \n" // G - "umlal v6.8h, v2.8b, v30.8b \n" // R - "uqshrn v0.8b, v4.8h, #7 \n" // 16 bit to 8 bit B - "uqshrn v1.8b, v5.8h, #7 \n" // 16 bit to 8 bit G - "uqshrn v2.8b, v6.8h, #7 \n" // 16 bit to 8 bit R - MEMACCESS(0) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 pixels. - "b.gt 1b \n" - : "+r"(dst_argb), // %0 - "+r"(width) // %1 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v20", "v21", "v22", "v24", "v25", "v26", "v28", "v29", "v30" - ); +void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width) { + asm volatile( + "movi v20.8b, #17 \n" // BB coefficient + "movi v21.8b, #68 \n" // BG coefficient + "movi v22.8b, #35 \n" // BR coefficient + "movi v24.8b, #22 \n" // GB coefficient + "movi v25.8b, #88 \n" // GG coefficient + "movi v26.8b, #45 \n" // GR coefficient + "movi v28.8b, #24 \n" // BB coefficient + "movi v29.8b, #98 \n" // BG coefficient + "movi v30.8b, #50 \n" // BR coefficient + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB pixels. + "subs %w1, %w1, #8 \n" // 8 processed per loop. + "umull v4.8h, v0.8b, v20.8b \n" // B to Sepia B + "umlal v4.8h, v1.8b, v21.8b \n" // G + "umlal v4.8h, v2.8b, v22.8b \n" // R + "umull v5.8h, v0.8b, v24.8b \n" // B to Sepia G + "umlal v5.8h, v1.8b, v25.8b \n" // G + "umlal v5.8h, v2.8b, v26.8b \n" // R + "umull v6.8h, v0.8b, v28.8b \n" // B to Sepia R + "umlal v6.8h, v1.8b, v29.8b \n" // G + "umlal v6.8h, v2.8b, v30.8b \n" // R + "uqshrn v0.8b, v4.8h, #7 \n" // 16 bit to 8 bit B + "uqshrn v1.8b, v5.8h, #7 \n" // 16 bit to 8 bit G + "uqshrn v2.8b, v6.8h, #7 \n" // 16 bit to 8 bit R + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 pixels. + "b.gt 1b \n" + : "+r"(dst_argb), // %0 + "+r"(width) // %1 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", + "v21", "v22", "v24", "v25", "v26", "v28", "v29", "v30"); } // Tranform 8 ARGB pixels (32 bytes) with color matrix. // TODO(fbarchard): Was same as Sepia except matrix is provided. This function // needs to saturate. Consider doing a non-saturating version. -void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width) { - asm volatile ( - MEMACCESS(3) - "ld1 {v2.16b}, [%3] \n" // load 3 ARGB vectors. - "sxtl v0.8h, v2.8b \n" // B,G coefficients s16. - "sxtl2 v1.8h, v2.16b \n" // R,A coefficients s16. +void ARGBColorMatrixRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width) { + asm volatile( + "ld1 {v2.16b}, [%3] \n" // load 3 ARGB vectors. + "sxtl v0.8h, v2.8b \n" // B,G coefficients s16. + "sxtl2 v1.8h, v2.16b \n" // R,A coefficients s16. - "1: \n" - MEMACCESS(0) - "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%0], #32 \n" // load 8 pixels. - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "uxtl v16.8h, v16.8b \n" // b (0 .. 255) 16 bit - "uxtl v17.8h, v17.8b \n" // g - "uxtl v18.8h, v18.8b \n" // r - "uxtl v19.8h, v19.8b \n" // a - "mul v22.8h, v16.8h, v0.h[0] \n" // B = B * Matrix B - "mul v23.8h, v16.8h, v0.h[4] \n" // G = B * Matrix G - "mul v24.8h, v16.8h, v1.h[0] \n" // R = B * Matrix R - "mul v25.8h, v16.8h, v1.h[4] \n" // A = B * Matrix A - "mul v4.8h, v17.8h, v0.h[1] \n" // B += G * Matrix B - "mul v5.8h, v17.8h, v0.h[5] \n" // G += G * Matrix G - "mul v6.8h, v17.8h, v1.h[1] \n" // R += G * Matrix R - "mul v7.8h, v17.8h, v1.h[5] \n" // A += G * Matrix A - "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B - "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G - "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R - "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A - "mul v4.8h, v18.8h, v0.h[2] \n" // B += R * Matrix B - "mul v5.8h, v18.8h, v0.h[6] \n" // G += R * Matrix G - "mul v6.8h, v18.8h, v1.h[2] \n" // R += R * Matrix R - "mul v7.8h, v18.8h, v1.h[6] \n" // A += R * Matrix A - "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B - "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G - "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R - "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A - "mul v4.8h, v19.8h, v0.h[3] \n" // B += A * Matrix B - "mul v5.8h, v19.8h, v0.h[7] \n" // G += A * Matrix G - "mul v6.8h, v19.8h, v1.h[3] \n" // R += A * Matrix R - "mul v7.8h, v19.8h, v1.h[7] \n" // A += A * Matrix A - "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B - "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G - "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R - "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A - "sqshrun v16.8b, v22.8h, #6 \n" // 16 bit to 8 bit B - "sqshrun v17.8b, v23.8h, #6 \n" // 16 bit to 8 bit G - "sqshrun v18.8b, v24.8h, #6 \n" // 16 bit to 8 bit R - "sqshrun v19.8b, v25.8h, #6 \n" // 16 bit to 8 bit A - MEMACCESS(1) - "st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%1], #32 \n" // store 8 pixels. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(matrix_argb) // %3 - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", - "v18", "v19", "v22", "v23", "v24", "v25" - ); + "1: \n" + "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%0], #32 \n" // load 8 ARGB + "subs %w2, %w2, #8 \n" // 8 processed per loop. + "uxtl v16.8h, v16.8b \n" // b (0 .. 255) 16 bit + "uxtl v17.8h, v17.8b \n" // g + "uxtl v18.8h, v18.8b \n" // r + "uxtl v19.8h, v19.8b \n" // a + "mul v22.8h, v16.8h, v0.h[0] \n" // B = B * Matrix B + "mul v23.8h, v16.8h, v0.h[4] \n" // G = B * Matrix G + "mul v24.8h, v16.8h, v1.h[0] \n" // R = B * Matrix R + "mul v25.8h, v16.8h, v1.h[4] \n" // A = B * Matrix A + "mul v4.8h, v17.8h, v0.h[1] \n" // B += G * Matrix B + "mul v5.8h, v17.8h, v0.h[5] \n" // G += G * Matrix G + "mul v6.8h, v17.8h, v1.h[1] \n" // R += G * Matrix R + "mul v7.8h, v17.8h, v1.h[5] \n" // A += G * Matrix A + "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B + "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G + "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R + "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A + "mul v4.8h, v18.8h, v0.h[2] \n" // B += R * Matrix B + "mul v5.8h, v18.8h, v0.h[6] \n" // G += R * Matrix G + "mul v6.8h, v18.8h, v1.h[2] \n" // R += R * Matrix R + "mul v7.8h, v18.8h, v1.h[6] \n" // A += R * Matrix A + "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B + "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G + "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R + "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A + "mul v4.8h, v19.8h, v0.h[3] \n" // B += A * Matrix B + "mul v5.8h, v19.8h, v0.h[7] \n" // G += A * Matrix G + "mul v6.8h, v19.8h, v1.h[3] \n" // R += A * Matrix R + "mul v7.8h, v19.8h, v1.h[7] \n" // A += A * Matrix A + "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B + "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G + "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R + "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A + "sqshrun v16.8b, v22.8h, #6 \n" // 16 bit to 8 bit B + "sqshrun v17.8b, v23.8h, #6 \n" // 16 bit to 8 bit G + "sqshrun v18.8b, v24.8h, #6 \n" // 16 bit to 8 bit R + "sqshrun v19.8b, v25.8h, #6 \n" // 16 bit to 8 bit A + "st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%1], #32 \n" // store 8 ARGB + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "r"(matrix_argb) // %3 + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", + "v17", "v18", "v19", "v22", "v23", "v24", "v25"); } // TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable. // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. -void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 8 pixel loop. - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. - MEMACCESS(1) - "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more pixels. - "subs %w3, %w3, #8 \n" // 8 processed per loop. - "umull v0.8h, v0.8b, v4.8b \n" // multiply B - "umull v1.8h, v1.8b, v5.8b \n" // multiply G - "umull v2.8h, v2.8b, v6.8b \n" // multiply R - "umull v3.8h, v3.8b, v7.8b \n" // multiply A - "rshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit B - "rshrn v1.8b, v1.8h, #8 \n" // 16 bit to 8 bit G - "rshrn v2.8b, v2.8h, #8 \n" // 16 bit to 8 bit R - "rshrn v3.8b, v3.8h, #8 \n" // 16 bit to 8 bit A - MEMACCESS(2) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels - "b.gt 1b \n" - - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" - ); +void ARGBMultiplyRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( + // 8 pixel loop. + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB + "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more + "subs %w3, %w3, #8 \n" // 8 processed per loop. + "umull v0.8h, v0.8b, v4.8b \n" // multiply B + "umull v1.8h, v1.8b, v5.8b \n" // multiply G + "umull v2.8h, v2.8b, v6.8b \n" // multiply R + "umull v3.8h, v3.8b, v7.8b \n" // multiply A + "rshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit B + "rshrn v1.8b, v1.8h, #8 \n" // 16 bit to 8 bit G + "rshrn v2.8b, v2.8h, #8 \n" // 16 bit to 8 bit R + "rshrn v3.8b, v3.8h, #8 \n" // 16 bit to 8 bit A + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB + "b.gt 1b \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } // Add 2 rows of ARGB pixels together, 8 pixels at a time. -void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 8 pixel loop. - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. - MEMACCESS(1) - "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more pixels. - "subs %w3, %w3, #8 \n" // 8 processed per loop. - "uqadd v0.8b, v0.8b, v4.8b \n" - "uqadd v1.8b, v1.8b, v5.8b \n" - "uqadd v2.8b, v2.8b, v6.8b \n" - "uqadd v3.8b, v3.8b, v7.8b \n" - MEMACCESS(2) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels - "b.gt 1b \n" - - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" - ); +void ARGBAddRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( + // 8 pixel loop. + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB + "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more + "subs %w3, %w3, #8 \n" // 8 processed per loop. + "uqadd v0.8b, v0.8b, v4.8b \n" + "uqadd v1.8b, v1.8b, v5.8b \n" + "uqadd v2.8b, v2.8b, v6.8b \n" + "uqadd v3.8b, v3.8b, v7.8b \n" + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB + "b.gt 1b \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } // Subtract 2 rows of ARGB pixels, 8 pixels at a time. -void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 8 pixel loop. - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. - MEMACCESS(1) - "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more pixels. - "subs %w3, %w3, #8 \n" // 8 processed per loop. - "uqsub v0.8b, v0.8b, v4.8b \n" - "uqsub v1.8b, v1.8b, v5.8b \n" - "uqsub v2.8b, v2.8b, v6.8b \n" - "uqsub v3.8b, v3.8b, v7.8b \n" - MEMACCESS(2) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels - "b.gt 1b \n" - - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" - ); +void ARGBSubtractRow_NEON(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { + asm volatile( + // 8 pixel loop. + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB + "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more + "subs %w3, %w3, #8 \n" // 8 processed per loop. + "uqsub v0.8b, v0.8b, v4.8b \n" + "uqsub v1.8b, v1.8b, v5.8b \n" + "uqsub v2.8b, v2.8b, v6.8b \n" + "uqsub v3.8b, v3.8b, v7.8b \n" + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB + "b.gt 1b \n" + : "+r"(src_argb0), // %0 + "+r"(src_argb1), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } // Adds Sobel X and Sobel Y and stores Sobel into ARGB. @@ -2638,54 +2502,50 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1, // R = Sobel // G = Sobel // B = Sobel -void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { - asm volatile ( - "movi v3.8b, #255 \n" // alpha - // 8 pixel loop. - "1: \n" - MEMACCESS(0) - "ld1 {v0.8b}, [%0], #8 \n" // load 8 sobelx. - MEMACCESS(1) - "ld1 {v1.8b}, [%1], #8 \n" // load 8 sobely. - "subs %w3, %w3, #8 \n" // 8 processed per loop. - "uqadd v0.8b, v0.8b, v1.8b \n" // add - "orr v1.8b, v0.8b, v0.8b \n" - "orr v2.8b, v0.8b, v0.8b \n" - MEMACCESS(2) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels - "b.gt 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "v0", "v1", "v2", "v3" - ); +void SobelRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width) { + asm volatile( + "movi v3.8b, #255 \n" // alpha + // 8 pixel loop. + "1: \n" + "ld1 {v0.8b}, [%0], #8 \n" // load 8 sobelx. + "ld1 {v1.8b}, [%1], #8 \n" // load 8 sobely. + "subs %w3, %w3, #8 \n" // 8 processed per loop. + "uqadd v0.8b, v0.8b, v1.8b \n" // add + "orr v1.8b, v0.8b, v0.8b \n" + "orr v2.8b, v0.8b, v0.8b \n" + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB + "b.gt 1b \n" + : "+r"(src_sobelx), // %0 + "+r"(src_sobely), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "v0", "v1", "v2", "v3"); } // Adds Sobel X and Sobel Y and stores Sobel into plane. -void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width) { - asm volatile ( - // 16 pixel loop. - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // load 16 sobelx. - MEMACCESS(1) - "ld1 {v1.16b}, [%1], #16 \n" // load 16 sobely. - "subs %w3, %w3, #16 \n" // 16 processed per loop. - "uqadd v0.16b, v0.16b, v1.16b \n" // add - MEMACCESS(2) - "st1 {v0.16b}, [%2], #16 \n" // store 16 pixels. - "b.gt 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_y), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "v0", "v1" - ); +void SobelToPlaneRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, + int width) { + asm volatile( + // 16 pixel loop. + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" // load 16 sobelx. + "ld1 {v1.16b}, [%1], #16 \n" // load 16 sobely. + "subs %w3, %w3, #16 \n" // 16 processed per loop. + "uqadd v0.16b, v0.16b, v1.16b \n" // add + "st1 {v0.16b}, [%2], #16 \n" // store 16 pixels. + "b.gt 1b \n" + : "+r"(src_sobelx), // %0 + "+r"(src_sobely), // %1 + "+r"(dst_y), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "v0", "v1"); } // Mixes Sobel X, Sobel Y and Sobel into ARGB. @@ -2693,114 +2553,329 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, // R = Sobel X // G = Sobel // B = Sobel Y -void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { - asm volatile ( - "movi v3.8b, #255 \n" // alpha - // 8 pixel loop. - "1: \n" - MEMACCESS(0) - "ld1 {v2.8b}, [%0], #8 \n" // load 8 sobelx. - MEMACCESS(1) - "ld1 {v0.8b}, [%1], #8 \n" // load 8 sobely. - "subs %w3, %w3, #8 \n" // 8 processed per loop. - "uqadd v1.8b, v0.8b, v2.8b \n" // add - MEMACCESS(2) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels - "b.gt 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "v0", "v1", "v2", "v3" - ); +void SobelXYRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width) { + asm volatile( + "movi v3.8b, #255 \n" // alpha + // 8 pixel loop. + "1: \n" + "ld1 {v2.8b}, [%0], #8 \n" // load 8 sobelx. + "ld1 {v0.8b}, [%1], #8 \n" // load 8 sobely. + "subs %w3, %w3, #8 \n" // 8 processed per loop. + "uqadd v1.8b, v0.8b, v2.8b \n" // add + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB + "b.gt 1b \n" + : "+r"(src_sobelx), // %0 + "+r"(src_sobely), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : + : "cc", "memory", "v0", "v1", "v2", "v3"); } // SobelX as a matrix is // -1 0 1 // -2 0 2 // -1 0 1 -void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld1 {v0.8b}, [%0],%5 \n" // top - MEMACCESS(0) - "ld1 {v1.8b}, [%0],%6 \n" - "usubl v0.8h, v0.8b, v1.8b \n" - MEMACCESS(1) - "ld1 {v2.8b}, [%1],%5 \n" // center * 2 - MEMACCESS(1) - "ld1 {v3.8b}, [%1],%6 \n" - "usubl v1.8h, v2.8b, v3.8b \n" - "add v0.8h, v0.8h, v1.8h \n" - "add v0.8h, v0.8h, v1.8h \n" - MEMACCESS(2) - "ld1 {v2.8b}, [%2],%5 \n" // bottom - MEMACCESS(2) - "ld1 {v3.8b}, [%2],%6 \n" - "subs %w4, %w4, #8 \n" // 8 pixels - "usubl v1.8h, v2.8b, v3.8b \n" - "add v0.8h, v0.8h, v1.8h \n" - "abs v0.8h, v0.8h \n" - "uqxtn v0.8b, v0.8h \n" - MEMACCESS(3) - "st1 {v0.8b}, [%3], #8 \n" // store 8 sobelx - "b.gt 1b \n" - : "+r"(src_y0), // %0 - "+r"(src_y1), // %1 - "+r"(src_y2), // %2 - "+r"(dst_sobelx), // %3 - "+r"(width) // %4 - : "r"(2LL), // %5 - "r"(6LL) // %6 - : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List - ); +void SobelXRow_NEON(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int width) { + asm volatile( + "1: \n" + "ld1 {v0.8b}, [%0],%5 \n" // top + "ld1 {v1.8b}, [%0],%6 \n" + "usubl v0.8h, v0.8b, v1.8b \n" + "ld1 {v2.8b}, [%1],%5 \n" // center * 2 + "ld1 {v3.8b}, [%1],%6 \n" + "usubl v1.8h, v2.8b, v3.8b \n" + "add v0.8h, v0.8h, v1.8h \n" + "add v0.8h, v0.8h, v1.8h \n" + "ld1 {v2.8b}, [%2],%5 \n" // bottom + "ld1 {v3.8b}, [%2],%6 \n" + "subs %w4, %w4, #8 \n" // 8 pixels + "usubl v1.8h, v2.8b, v3.8b \n" + "add v0.8h, v0.8h, v1.8h \n" + "abs v0.8h, v0.8h \n" + "uqxtn v0.8b, v0.8h \n" + "st1 {v0.8b}, [%3], #8 \n" // store 8 sobelx + "b.gt 1b \n" + : "+r"(src_y0), // %0 + "+r"(src_y1), // %1 + "+r"(src_y2), // %2 + "+r"(dst_sobelx), // %3 + "+r"(width) // %4 + : "r"(2LL), // %5 + "r"(6LL) // %6 + : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List + ); } // SobelY as a matrix is // -1 -2 -1 // 0 0 0 // 1 2 1 -void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld1 {v0.8b}, [%0],%4 \n" // left - MEMACCESS(1) - "ld1 {v1.8b}, [%1],%4 \n" - "usubl v0.8h, v0.8b, v1.8b \n" - MEMACCESS(0) - "ld1 {v2.8b}, [%0],%4 \n" // center * 2 - MEMACCESS(1) - "ld1 {v3.8b}, [%1],%4 \n" - "usubl v1.8h, v2.8b, v3.8b \n" - "add v0.8h, v0.8h, v1.8h \n" - "add v0.8h, v0.8h, v1.8h \n" - MEMACCESS(0) - "ld1 {v2.8b}, [%0],%5 \n" // right - MEMACCESS(1) - "ld1 {v3.8b}, [%1],%5 \n" - "subs %w3, %w3, #8 \n" // 8 pixels - "usubl v1.8h, v2.8b, v3.8b \n" - "add v0.8h, v0.8h, v1.8h \n" - "abs v0.8h, v0.8h \n" - "uqxtn v0.8b, v0.8h \n" - MEMACCESS(2) - "st1 {v0.8b}, [%2], #8 \n" // store 8 sobely - "b.gt 1b \n" - : "+r"(src_y0), // %0 - "+r"(src_y1), // %1 - "+r"(dst_sobely), // %2 - "+r"(width) // %3 - : "r"(1LL), // %4 - "r"(6LL) // %5 - : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List - ); +void SobelYRow_NEON(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int width) { + asm volatile( + "1: \n" + "ld1 {v0.8b}, [%0],%4 \n" // left + "ld1 {v1.8b}, [%1],%4 \n" + "usubl v0.8h, v0.8b, v1.8b \n" + "ld1 {v2.8b}, [%0],%4 \n" // center * 2 + "ld1 {v3.8b}, [%1],%4 \n" + "usubl v1.8h, v2.8b, v3.8b \n" + "add v0.8h, v0.8h, v1.8h \n" + "add v0.8h, v0.8h, v1.8h \n" + "ld1 {v2.8b}, [%0],%5 \n" // right + "ld1 {v3.8b}, [%1],%5 \n" + "subs %w3, %w3, #8 \n" // 8 pixels + "usubl v1.8h, v2.8b, v3.8b \n" + "add v0.8h, v0.8h, v1.8h \n" + "abs v0.8h, v0.8h \n" + "uqxtn v0.8b, v0.8h \n" + "st1 {v0.8b}, [%2], #8 \n" // store 8 sobely + "b.gt 1b \n" + : "+r"(src_y0), // %0 + "+r"(src_y1), // %1 + "+r"(dst_sobely), // %2 + "+r"(width) // %3 + : "r"(1LL), // %4 + "r"(6LL) // %5 + : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List + ); } + +// Caveat - rounds float to half float whereas scaling version truncates. +void HalfFloat1Row_NEON(const uint16_t* src, + uint16_t* dst, + float /*unused*/, + int width) { + asm volatile( + "1: \n" + "ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts + "subs %w2, %w2, #8 \n" // 8 pixels per loop + "uxtl v2.4s, v1.4h \n" // 8 int's + "uxtl2 v3.4s, v1.8h \n" + "scvtf v2.4s, v2.4s \n" // 8 floats + "scvtf v3.4s, v3.4s \n" + "fcvtn v1.4h, v2.4s \n" // 8 half floats + "fcvtn2 v1.8h, v3.4s \n" + "st1 {v1.16b}, [%1], #16 \n" // store 8 shorts + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : + : "cc", "memory", "v1", "v2", "v3"); +} + +void HalfFloatRow_NEON(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { + asm volatile( + "1: \n" + "ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts + "subs %w2, %w2, #8 \n" // 8 pixels per loop + "uxtl v2.4s, v1.4h \n" // 8 int's + "uxtl2 v3.4s, v1.8h \n" + "scvtf v2.4s, v2.4s \n" // 8 floats + "scvtf v3.4s, v3.4s \n" + "fmul v2.4s, v2.4s, %3.s[0] \n" // adjust exponent + "fmul v3.4s, v3.4s, %3.s[0] \n" + "uqshrn v1.4h, v2.4s, #13 \n" // isolate halffloat + "uqshrn2 v1.8h, v3.4s, #13 \n" + "st1 {v1.16b}, [%1], #16 \n" // store 8 shorts + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "w"(scale * 1.9259299444e-34f) // %3 + : "cc", "memory", "v1", "v2", "v3"); +} + +void ByteToFloatRow_NEON(const uint8_t* src, + float* dst, + float scale, + int width) { + asm volatile( + "1: \n" + "ld1 {v1.8b}, [%0], #8 \n" // load 8 bytes + "subs %w2, %w2, #8 \n" // 8 pixels per loop + "uxtl v1.8h, v1.8b \n" // 8 shorts + "uxtl v2.4s, v1.4h \n" // 8 ints + "uxtl2 v3.4s, v1.8h \n" + "scvtf v2.4s, v2.4s \n" // 8 floats + "scvtf v3.4s, v3.4s \n" + "fmul v2.4s, v2.4s, %3.s[0] \n" // scale + "fmul v3.4s, v3.4s, %3.s[0] \n" + "st1 {v2.16b, v3.16b}, [%1], #32 \n" // store 8 floats + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "w"(scale) // %3 + : "cc", "memory", "v1", "v2", "v3"); +} + +float ScaleMaxSamples_NEON(const float* src, + float* dst, + float scale, + int width) { + float fmax; + asm volatile( + "movi v5.4s, #0 \n" // max + "movi v6.4s, #0 \n" + + "1: \n" + "ld1 {v1.4s, v2.4s}, [%0], #32 \n" // load 8 samples + "subs %w2, %w2, #8 \n" // 8 processed per loop + "fmul v3.4s, v1.4s, %4.s[0] \n" // scale + "fmul v4.4s, v2.4s, %4.s[0] \n" // scale + "fmax v5.4s, v5.4s, v1.4s \n" // max + "fmax v6.4s, v6.4s, v2.4s \n" + "st1 {v3.4s, v4.4s}, [%1], #32 \n" // store 8 samples + "b.gt 1b \n" + "fmax v5.4s, v5.4s, v6.4s \n" // max + "fmaxv %s3, v5.4s \n" // signed max acculator + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width), // %2 + "=w"(fmax) // %3 + : "w"(scale) // %4 + : "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6"); + return fmax; +} + +float ScaleSumSamples_NEON(const float* src, + float* dst, + float scale, + int width) { + float fsum; + asm volatile( + "movi v5.4s, #0 \n" // max + "movi v6.4s, #0 \n" // max + + "1: \n" + "ld1 {v1.4s, v2.4s}, [%0], #32 \n" // load 8 samples + "subs %w2, %w2, #8 \n" // 8 processed per loop + "fmul v3.4s, v1.4s, %4.s[0] \n" // scale + "fmul v4.4s, v2.4s, %4.s[0] \n" + "fmla v5.4s, v1.4s, v1.4s \n" // sum of squares + "fmla v6.4s, v2.4s, v2.4s \n" + "st1 {v3.4s, v4.4s}, [%1], #32 \n" // store 8 samples + "b.gt 1b \n" + "faddp v5.4s, v5.4s, v6.4s \n" + "faddp v5.4s, v5.4s, v5.4s \n" + "faddp %3.4s, v5.4s, v5.4s \n" // sum + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width), // %2 + "=w"(fsum) // %3 + : "w"(scale) // %4 + : "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6"); + return fsum; +} + +void ScaleSamples_NEON(const float* src, float* dst, float scale, int width) { + asm volatile( + "1: \n" + "ld1 {v1.4s, v2.4s}, [%0], #32 \n" // load 8 samples + "subs %w2, %w2, #8 \n" // 8 processed per loop + "fmul v1.4s, v1.4s, %3.s[0] \n" // scale + "fmul v2.4s, v2.4s, %3.s[0] \n" // scale + "st1 {v1.4s, v2.4s}, [%1], #32 \n" // store 8 samples + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "w"(scale) // %3 + : "cc", "memory", "v1", "v2"); +} + +// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row. +void GaussCol_NEON(const uint16_t* src0, + const uint16_t* src1, + const uint16_t* src2, + const uint16_t* src3, + const uint16_t* src4, + uint32_t* dst, + int width) { + asm volatile( + "movi v6.8h, #4 \n" // constant 4 + "movi v7.8h, #6 \n" // constant 6 + + "1: \n" + "ld1 {v1.8h}, [%0], #16 \n" // load 8 samples, 5 rows + "ld1 {v2.8h}, [%4], #16 \n" + "uaddl v0.4s, v1.4h, v2.4h \n" // * 1 + "uaddl2 v1.4s, v1.8h, v2.8h \n" // * 1 + "ld1 {v2.8h}, [%1], #16 \n" + "umlal v0.4s, v2.4h, v6.4h \n" // * 4 + "umlal2 v1.4s, v2.8h, v6.8h \n" // * 4 + "ld1 {v2.8h}, [%2], #16 \n" + "umlal v0.4s, v2.4h, v7.4h \n" // * 6 + "umlal2 v1.4s, v2.8h, v7.8h \n" // * 6 + "ld1 {v2.8h}, [%3], #16 \n" + "umlal v0.4s, v2.4h, v6.4h \n" // * 4 + "umlal2 v1.4s, v2.8h, v6.8h \n" // * 4 + "subs %w6, %w6, #8 \n" // 8 processed per loop + "st1 {v0.4s,v1.4s}, [%5], #32 \n" // store 8 samples + "b.gt 1b \n" + : "+r"(src0), // %0 + "+r"(src1), // %1 + "+r"(src2), // %2 + "+r"(src3), // %3 + "+r"(src4), // %4 + "+r"(dst), // %5 + "+r"(width) // %6 + : + : "cc", "memory", "v0", "v1", "v2", "v6", "v7"); +} + +// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row. +void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) { + const uint32_t* src1 = src + 1; + const uint32_t* src2 = src + 2; + const uint32_t* src3 = src + 3; + asm volatile( + "movi v6.4s, #4 \n" // constant 4 + "movi v7.4s, #6 \n" // constant 6 + + "1: \n" + "ld1 {v0.4s,v1.4s,v2.4s}, [%0], %6 \n" // load 12 source samples + "add v0.4s, v0.4s, v1.4s \n" // * 1 + "add v1.4s, v1.4s, v2.4s \n" // * 1 + "ld1 {v2.4s,v3.4s}, [%2], #32 \n" + "mla v0.4s, v2.4s, v7.4s \n" // * 6 + "mla v1.4s, v3.4s, v7.4s \n" // * 6 + "ld1 {v2.4s,v3.4s}, [%1], #32 \n" + "ld1 {v4.4s,v5.4s}, [%3], #32 \n" + "add v2.4s, v2.4s, v4.4s \n" // add rows for * 4 + "add v3.4s, v3.4s, v5.4s \n" + "mla v0.4s, v2.4s, v6.4s \n" // * 4 + "mla v1.4s, v3.4s, v6.4s \n" // * 4 + "subs %w5, %w5, #8 \n" // 8 processed per loop + "uqrshrn v0.4h, v0.4s, #8 \n" // round and pack + "uqrshrn2 v0.8h, v1.4s, #8 \n" + "st1 {v0.8h}, [%4], #16 \n" // store 8 samples + "b.gt 1b \n" + : "+r"(src), // %0 + "+r"(src1), // %1 + "+r"(src2), // %2 + "+r"(src3), // %3 + "+r"(dst), // %4 + "+r"(width) // %5 + : "r"(32LL) // %6 + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); +} + #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #ifdef __cplusplus diff --git a/media/libvpx/libvpx/third_party/libyuv/source/row_win.cc b/media/libvpx/libvpx/third_party/libyuv/source/row_win.cc index 2a3da8969f1f..5500d7f5a64e 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/row_win.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/row_win.cc @@ -28,72 +28,71 @@ extern "C" { #if defined(_M_X64) // Read 4 UV from 422, upsample to 8 UV. -#define READYUV422 \ - xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ - xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ - xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ - xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ - u_buf += 4; \ - xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ - xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ - y_buf += 8; +#define READYUV422 \ + xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf); \ + xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \ + xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ + xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ + u_buf += 4; \ + xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ + xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ + y_buf += 8; // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. -#define READYUVA422 \ - xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ - xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ - xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ - xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ - u_buf += 4; \ - xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ - xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ - y_buf += 8; \ - xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \ - a_buf += 8; +#define READYUVA422 \ + xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf); \ + xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \ + xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ + xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ + u_buf += 4; \ + xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ + xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ + y_buf += 8; \ + xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \ + a_buf += 8; // Convert 8 pixels: 8 UV and 8 Y. -#define YUVTORGB(yuvconstants) \ - xmm1 = _mm_loadu_si128(&xmm0); \ - xmm2 = _mm_loadu_si128(&xmm0); \ - xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)yuvconstants->kUVToB); \ - xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)yuvconstants->kUVToG); \ - xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)yuvconstants->kUVToR); \ - xmm0 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasB, xmm0); \ - xmm1 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasG, xmm1); \ - xmm2 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasR, xmm2); \ - xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)yuvconstants->kYToRgb); \ - xmm0 = _mm_adds_epi16(xmm0, xmm4); \ - xmm1 = _mm_adds_epi16(xmm1, xmm4); \ - xmm2 = _mm_adds_epi16(xmm2, xmm4); \ - xmm0 = _mm_srai_epi16(xmm0, 6); \ - xmm1 = _mm_srai_epi16(xmm1, 6); \ - xmm2 = _mm_srai_epi16(xmm2, 6); \ - xmm0 = _mm_packus_epi16(xmm0, xmm0); \ - xmm1 = _mm_packus_epi16(xmm1, xmm1); \ - xmm2 = _mm_packus_epi16(xmm2, xmm2); +#define YUVTORGB(yuvconstants) \ + xmm1 = _mm_loadu_si128(&xmm0); \ + xmm2 = _mm_loadu_si128(&xmm0); \ + xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)yuvconstants->kUVToB); \ + xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)yuvconstants->kUVToG); \ + xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)yuvconstants->kUVToR); \ + xmm0 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasB, xmm0); \ + xmm1 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasG, xmm1); \ + xmm2 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasR, xmm2); \ + xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)yuvconstants->kYToRgb); \ + xmm0 = _mm_adds_epi16(xmm0, xmm4); \ + xmm1 = _mm_adds_epi16(xmm1, xmm4); \ + xmm2 = _mm_adds_epi16(xmm2, xmm4); \ + xmm0 = _mm_srai_epi16(xmm0, 6); \ + xmm1 = _mm_srai_epi16(xmm1, 6); \ + xmm2 = _mm_srai_epi16(xmm2, 6); \ + xmm0 = _mm_packus_epi16(xmm0, xmm0); \ + xmm1 = _mm_packus_epi16(xmm1, xmm1); \ + xmm2 = _mm_packus_epi16(xmm2, xmm2); // Store 8 ARGB values. -#define STOREARGB \ - xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ - xmm2 = _mm_unpacklo_epi8(xmm2, xmm5); \ - xmm1 = _mm_loadu_si128(&xmm0); \ - xmm0 = _mm_unpacklo_epi16(xmm0, xmm2); \ - xmm1 = _mm_unpackhi_epi16(xmm1, xmm2); \ - _mm_storeu_si128((__m128i *)dst_argb, xmm0); \ - _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \ - dst_argb += 32; - +#define STOREARGB \ + xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ + xmm2 = _mm_unpacklo_epi8(xmm2, xmm5); \ + xmm1 = _mm_loadu_si128(&xmm0); \ + xmm0 = _mm_unpacklo_epi16(xmm0, xmm2); \ + xmm1 = _mm_unpackhi_epi16(xmm1, xmm2); \ + _mm_storeu_si128((__m128i*)dst_argb, xmm0); \ + _mm_storeu_si128((__m128i*)(dst_argb + 16), xmm1); \ + dst_argb += 32; #if defined(HAS_I422TOARGBROW_SSSE3) -void I422ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, +void I422ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __m128i xmm0, xmm1, xmm2, xmm4; const __m128i xmm5 = _mm_set1_epi8(-1); - const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; + const ptrdiff_t offset = (uint8_t*)v_buf - (uint8_t*)u_buf; while (width > 0) { READYUV422 YUVTORGB(yuvconstants) @@ -104,15 +103,15 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, #endif #if defined(HAS_I422ALPHATOARGBROW_SSSE3) -void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __m128i xmm0, xmm1, xmm2, xmm4, xmm5; - const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; + const ptrdiff_t offset = (uint8_t*)v_buf - (uint8_t*)u_buf; while (width > 0) { READYUVA422 YUVTORGB(yuvconstants) @@ -127,175 +126,143 @@ void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, #ifdef HAS_ARGBTOYROW_SSSE3 // Constants for ARGB. -static const vec8 kARGBToY = { - 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0 -}; +static const vec8 kARGBToY = {13, 65, 33, 0, 13, 65, 33, 0, + 13, 65, 33, 0, 13, 65, 33, 0}; // JPeg full range. -static const vec8 kARGBToYJ = { - 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0 -}; +static const vec8 kARGBToYJ = {15, 75, 38, 0, 15, 75, 38, 0, + 15, 75, 38, 0, 15, 75, 38, 0}; -static const vec8 kARGBToU = { - 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0 -}; +static const vec8 kARGBToU = {112, -74, -38, 0, 112, -74, -38, 0, + 112, -74, -38, 0, 112, -74, -38, 0}; -static const vec8 kARGBToUJ = { - 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0 -}; +static const vec8 kARGBToUJ = {127, -84, -43, 0, 127, -84, -43, 0, + 127, -84, -43, 0, 127, -84, -43, 0}; static const vec8 kARGBToV = { - -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, + -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, }; -static const vec8 kARGBToVJ = { - -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0 -}; +static const vec8 kARGBToVJ = {-20, -107, 127, 0, -20, -107, 127, 0, + -20, -107, 127, 0, -20, -107, 127, 0}; // vpshufb for vphaddw + vpackuswb packed to shorts. static const lvec8 kShufARGBToUV_AVX = { - 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, - 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15 -}; + 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, + 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15}; // Constants for BGRA. -static const vec8 kBGRAToY = { - 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13 -}; +static const vec8 kBGRAToY = {0, 33, 65, 13, 0, 33, 65, 13, + 0, 33, 65, 13, 0, 33, 65, 13}; -static const vec8 kBGRAToU = { - 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112 -}; +static const vec8 kBGRAToU = {0, -38, -74, 112, 0, -38, -74, 112, + 0, -38, -74, 112, 0, -38, -74, 112}; -static const vec8 kBGRAToV = { - 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18 -}; +static const vec8 kBGRAToV = {0, 112, -94, -18, 0, 112, -94, -18, + 0, 112, -94, -18, 0, 112, -94, -18}; // Constants for ABGR. -static const vec8 kABGRToY = { - 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0 -}; +static const vec8 kABGRToY = {33, 65, 13, 0, 33, 65, 13, 0, + 33, 65, 13, 0, 33, 65, 13, 0}; -static const vec8 kABGRToU = { - -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0 -}; +static const vec8 kABGRToU = {-38, -74, 112, 0, -38, -74, 112, 0, + -38, -74, 112, 0, -38, -74, 112, 0}; -static const vec8 kABGRToV = { - 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0 -}; +static const vec8 kABGRToV = {112, -94, -18, 0, 112, -94, -18, 0, + 112, -94, -18, 0, 112, -94, -18, 0}; // Constants for RGBA. -static const vec8 kRGBAToY = { - 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33 -}; +static const vec8 kRGBAToY = {0, 13, 65, 33, 0, 13, 65, 33, + 0, 13, 65, 33, 0, 13, 65, 33}; -static const vec8 kRGBAToU = { - 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38 -}; +static const vec8 kRGBAToU = {0, 112, -74, -38, 0, 112, -74, -38, + 0, 112, -74, -38, 0, 112, -74, -38}; -static const vec8 kRGBAToV = { - 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112 -}; +static const vec8 kRGBAToV = {0, -18, -94, 112, 0, -18, -94, 112, + 0, -18, -94, 112, 0, -18, -94, 112}; -static const uvec8 kAddY16 = { - 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u -}; +static const uvec8 kAddY16 = {16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, + 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u}; // 7 bit fixed point 0.5. -static const vec16 kAddYJ64 = { - 64, 64, 64, 64, 64, 64, 64, 64 -}; +static const vec16 kAddYJ64 = {64, 64, 64, 64, 64, 64, 64, 64}; -static const uvec8 kAddUV128 = { - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u -}; +static const uvec8 kAddUV128 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, + 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; -static const uvec16 kAddUVJ128 = { - 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u -}; +static const uvec16 kAddUVJ128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u, + 0x8080u, 0x8080u, 0x8080u, 0x8080u}; // Shuffle table for converting RGB24 to ARGB. static const uvec8 kShuffleMaskRGB24ToARGB = { - 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u -}; + 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u}; // Shuffle table for converting RAW to ARGB. -static const uvec8 kShuffleMaskRAWToARGB = { - 2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u -}; +static const uvec8 kShuffleMaskRAWToARGB = {2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, + 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u}; // Shuffle table for converting RAW to RGB24. First 8. static const uvec8 kShuffleMaskRAWToRGB24_0 = { - 2u, 1u, 0u, 5u, 4u, 3u, 8u, 7u, - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u -}; + 2u, 1u, 0u, 5u, 4u, 3u, 8u, 7u, + 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; // Shuffle table for converting RAW to RGB24. Middle 8. static const uvec8 kShuffleMaskRAWToRGB24_1 = { - 2u, 7u, 6u, 5u, 10u, 9u, 8u, 13u, - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u -}; + 2u, 7u, 6u, 5u, 10u, 9u, 8u, 13u, + 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; // Shuffle table for converting RAW to RGB24. Last 8. static const uvec8 kShuffleMaskRAWToRGB24_2 = { - 8u, 7u, 12u, 11u, 10u, 15u, 14u, 13u, - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u -}; + 8u, 7u, 12u, 11u, 10u, 15u, 14u, 13u, + 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGB to RGB24. static const uvec8 kShuffleMaskARGBToRGB24 = { - 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u -}; + 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGB to RAW. static const uvec8 kShuffleMaskARGBToRAW = { - 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u -}; + 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4 static const uvec8 kShuffleMaskARGBToRGB24_0 = { - 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u -}; + 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u}; // YUY2 shuf 16 Y to 32 Y. -static const lvec8 kShuffleYUY2Y = { - 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, - 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 -}; +static const lvec8 kShuffleYUY2Y = {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, + 10, 12, 12, 14, 14, 0, 0, 2, 2, 4, 4, + 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}; // YUY2 shuf 8 UV to 16 UV. -static const lvec8 kShuffleYUY2UV = { - 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15, - 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15 -}; +static const lvec8 kShuffleYUY2UV = {1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, + 11, 13, 15, 13, 15, 1, 3, 1, 3, 5, 7, + 5, 7, 9, 11, 9, 11, 13, 15, 13, 15}; // UYVY shuf 16 Y to 32 Y. -static const lvec8 kShuffleUYVYY = { - 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15, - 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15 -}; +static const lvec8 kShuffleUYVYY = {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, + 11, 13, 13, 15, 15, 1, 1, 3, 3, 5, 5, + 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}; // UYVY shuf 8 UV to 16 UV. -static const lvec8 kShuffleUYVYUV = { - 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14, - 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 -}; +static const lvec8 kShuffleUYVYUV = {0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, + 10, 12, 14, 12, 14, 0, 2, 0, 2, 4, 6, + 4, 6, 8, 10, 8, 10, 12, 14, 12, 14}; // NV21 shuf 8 VU to 16 UV. static const lvec8 kShuffleNV21 = { - 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, - 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, + 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, + 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, }; // Duplicates gray value 3 times and fills in alpha opaque. -__declspec(naked) -void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width) { +__declspec(naked) void J400ToARGBRow_SSE2(const uint8_t* src_y, + uint8_t* dst_argb, + int width) { __asm { - mov eax, [esp + 4] // src_y - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // width - pcmpeqb xmm5, xmm5 // generate mask 0xff000000 + mov eax, [esp + 4] // src_y + mov edx, [esp + 8] // dst_argb + mov ecx, [esp + 12] // width + pcmpeqb xmm5, xmm5 // generate mask 0xff000000 pslld xmm5, 24 convertloop: @@ -318,13 +285,14 @@ void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width) { #ifdef HAS_J400TOARGBROW_AVX2 // Duplicates gray value 3 times and fills in alpha opaque. -__declspec(naked) -void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width) { +__declspec(naked) void J400ToARGBRow_AVX2(const uint8_t* src_y, + uint8_t* dst_argb, + int width) { __asm { - mov eax, [esp + 4] // src_y - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // width - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000 + mov eax, [esp + 4] // src_y + mov edx, [esp + 8] // dst_argb + mov ecx, [esp + 12] // width + vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000 vpslld ymm5, ymm5, 24 convertloop: @@ -348,13 +316,14 @@ void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width) { } #endif // HAS_J400TOARGBROW_AVX2 -__declspec(naked) -void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width) { +__declspec(naked) void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width) { __asm { - mov eax, [esp + 4] // src_rgb24 - mov edx, [esp + 8] // dst_argb + mov eax, [esp + 4] // src_rgb24 + mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width - pcmpeqb xmm5, xmm5 // generate mask 0xff000000 + pcmpeqb xmm5, xmm5 // generate mask 0xff000000 pslld xmm5, 24 movdqa xmm4, xmmword ptr kShuffleMaskRGB24ToARGB @@ -364,17 +333,17 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width) { movdqu xmm3, [eax + 32] lea eax, [eax + 48] movdqa xmm2, xmm3 - palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]} + palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]} pshufb xmm2, xmm4 por xmm2, xmm5 - palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]} + palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]} pshufb xmm0, xmm4 movdqu [edx + 32], xmm2 por xmm0, xmm5 pshufb xmm1, xmm4 movdqu [edx], xmm0 por xmm1, xmm5 - palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]} + palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]} pshufb xmm3, xmm4 movdqu [edx + 16], xmm1 por xmm3, xmm5 @@ -386,14 +355,14 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width) { } } -__declspec(naked) -void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, - int width) { +__declspec(naked) void RAWToARGBRow_SSSE3(const uint8_t* src_raw, + uint8_t* dst_argb, + int width) { __asm { - mov eax, [esp + 4] // src_raw - mov edx, [esp + 8] // dst_argb + mov eax, [esp + 4] // src_raw + mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width - pcmpeqb xmm5, xmm5 // generate mask 0xff000000 + pcmpeqb xmm5, xmm5 // generate mask 0xff000000 pslld xmm5, 24 movdqa xmm4, xmmword ptr kShuffleMaskRAWToARGB @@ -403,17 +372,17 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, movdqu xmm3, [eax + 32] lea eax, [eax + 48] movdqa xmm2, xmm3 - palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]} + palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]} pshufb xmm2, xmm4 por xmm2, xmm5 - palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]} + palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]} pshufb xmm0, xmm4 movdqu [edx + 32], xmm2 por xmm0, xmm5 pshufb xmm1, xmm4 movdqu [edx], xmm0 por xmm1, xmm5 - palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]} + palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]} pshufb xmm3, xmm4 movdqu [edx + 16], xmm1 por xmm3, xmm5 @@ -425,11 +394,12 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, } } -__declspec(naked) -void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width) { +__declspec(naked) void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, + uint8_t* dst_rgb24, + int width) { __asm { - mov eax, [esp + 4] // src_raw - mov edx, [esp + 8] // dst_rgb24 + mov eax, [esp + 4] // src_raw + mov edx, [esp + 8] // dst_rgb24 mov ecx, [esp + 12] // width movdqa xmm3, xmmword ptr kShuffleMaskRAWToRGB24_0 movdqa xmm4, xmmword ptr kShuffleMaskRAWToRGB24_1 @@ -460,9 +430,9 @@ void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width) { // v * (256 + 8) // G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3 // 20 instructions. -__declspec(naked) -void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, - int width) { +__declspec(naked) void RGB565ToARGBRow_SSE2(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width) { __asm { mov eax, 0x01080108 // generate multiplier to repeat 5 bits movd xmm5, eax @@ -470,33 +440,33 @@ void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits movd xmm6, eax pshufd xmm6, xmm6, 0 - pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red + pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red psllw xmm3, 11 - pcmpeqb xmm4, xmm4 // generate mask 0x07e007e0 for Green + pcmpeqb xmm4, xmm4 // generate mask 0x07e007e0 for Green psllw xmm4, 10 psrlw xmm4, 5 - pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha + pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha psllw xmm7, 8 - mov eax, [esp + 4] // src_rgb565 - mov edx, [esp + 8] // dst_argb + mov eax, [esp + 4] // src_rgb565 + mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax sub edx, eax convertloop: - movdqu xmm0, [eax] // fetch 8 pixels of bgr565 + movdqu xmm0, [eax] // fetch 8 pixels of bgr565 movdqa xmm1, xmm0 movdqa xmm2, xmm0 - pand xmm1, xmm3 // R in upper 5 bits - psllw xmm2, 11 // B in upper 5 bits - pmulhuw xmm1, xmm5 // * (256 + 8) - pmulhuw xmm2, xmm5 // * (256 + 8) + pand xmm1, xmm3 // R in upper 5 bits + psllw xmm2, 11 // B in upper 5 bits + pmulhuw xmm1, xmm5 // * (256 + 8) + pmulhuw xmm2, xmm5 // * (256 + 8) psllw xmm1, 8 - por xmm1, xmm2 // RB - pand xmm0, xmm4 // G in middle 6 bits - pmulhuw xmm0, xmm6 // << 5 * (256 + 4) - por xmm0, xmm7 // AG + por xmm1, xmm2 // RB + pand xmm0, xmm4 // G in middle 6 bits + pmulhuw xmm0, xmm6 // << 5 * (256 + 4) + por xmm0, xmm7 // AG movdqa xmm2, xmm1 punpcklbw xmm1, xmm0 punpckhbw xmm2, xmm0 @@ -516,9 +486,9 @@ void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, // v * 256 + v * 8 // v * (256 + 8) // G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3 -__declspec(naked) -void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, - int width) { +__declspec(naked) void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width) { __asm { mov eax, 0x01080108 // generate multiplier to repeat 5 bits vmovd xmm5, eax @@ -526,32 +496,32 @@ void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits vmovd xmm6, eax vbroadcastss ymm6, xmm6 - vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red + vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red vpsllw ymm3, ymm3, 11 - vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x07e007e0 for Green + vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x07e007e0 for Green vpsllw ymm4, ymm4, 10 vpsrlw ymm4, ymm4, 5 - vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xff00ff00 for Alpha + vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xff00ff00 for Alpha vpsllw ymm7, ymm7, 8 - mov eax, [esp + 4] // src_rgb565 - mov edx, [esp + 8] // dst_argb + mov eax, [esp + 4] // src_rgb565 + mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax sub edx, eax convertloop: - vmovdqu ymm0, [eax] // fetch 16 pixels of bgr565 - vpand ymm1, ymm0, ymm3 // R in upper 5 bits - vpsllw ymm2, ymm0, 11 // B in upper 5 bits - vpmulhuw ymm1, ymm1, ymm5 // * (256 + 8) - vpmulhuw ymm2, ymm2, ymm5 // * (256 + 8) + vmovdqu ymm0, [eax] // fetch 16 pixels of bgr565 + vpand ymm1, ymm0, ymm3 // R in upper 5 bits + vpsllw ymm2, ymm0, 11 // B in upper 5 bits + vpmulhuw ymm1, ymm1, ymm5 // * (256 + 8) + vpmulhuw ymm2, ymm2, ymm5 // * (256 + 8) vpsllw ymm1, ymm1, 8 - vpor ymm1, ymm1, ymm2 // RB - vpand ymm0, ymm0, ymm4 // G in middle 6 bits - vpmulhuw ymm0, ymm0, ymm6 // << 5 * (256 + 4) - vpor ymm0, ymm0, ymm7 // AG - vpermq ymm0, ymm0, 0xd8 // mutate for unpack + vpor ymm1, ymm1, ymm2 // RB + vpand ymm0, ymm0, ymm4 // G in middle 6 bits + vpmulhuw ymm0, ymm0, ymm6 // << 5 * (256 + 4) + vpor ymm0, ymm0, ymm7 // AG + vpermq ymm0, ymm0, 0xd8 // mutate for unpack vpermq ymm1, ymm1, 0xd8 vpunpckhbw ymm2, ymm1, ymm0 vpunpcklbw ymm1, ymm1, ymm0 @@ -567,9 +537,9 @@ void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, #endif // HAS_RGB565TOARGBROW_AVX2 #ifdef HAS_ARGB1555TOARGBROW_AVX2 -__declspec(naked) -void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb, - int width) { +__declspec(naked) void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width) { __asm { mov eax, 0x01080108 // generate multiplier to repeat 5 bits vmovd xmm5, eax @@ -577,33 +547,33 @@ void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb, mov eax, 0x42004200 // multiplier shift by 6 and then repeat 5 bits vmovd xmm6, eax vbroadcastss ymm6, xmm6 - vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red + vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red vpsllw ymm3, ymm3, 11 - vpsrlw ymm4, ymm3, 6 // generate mask 0x03e003e0 for Green - vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xff00ff00 for Alpha + vpsrlw ymm4, ymm3, 6 // generate mask 0x03e003e0 for Green + vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xff00ff00 for Alpha vpsllw ymm7, ymm7, 8 - mov eax, [esp + 4] // src_argb1555 - mov edx, [esp + 8] // dst_argb + mov eax, [esp + 4] // src_argb1555 + mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax sub edx, eax convertloop: - vmovdqu ymm0, [eax] // fetch 16 pixels of 1555 - vpsllw ymm1, ymm0, 1 // R in upper 5 bits - vpsllw ymm2, ymm0, 11 // B in upper 5 bits + vmovdqu ymm0, [eax] // fetch 16 pixels of 1555 + vpsllw ymm1, ymm0, 1 // R in upper 5 bits + vpsllw ymm2, ymm0, 11 // B in upper 5 bits vpand ymm1, ymm1, ymm3 - vpmulhuw ymm2, ymm2, ymm5 // * (256 + 8) - vpmulhuw ymm1, ymm1, ymm5 // * (256 + 8) + vpmulhuw ymm2, ymm2, ymm5 // * (256 + 8) + vpmulhuw ymm1, ymm1, ymm5 // * (256 + 8) vpsllw ymm1, ymm1, 8 - vpor ymm1, ymm1, ymm2 // RB - vpsraw ymm2, ymm0, 8 // A - vpand ymm0, ymm0, ymm4 // G in middle 5 bits - vpmulhuw ymm0, ymm0, ymm6 // << 6 * (256 + 8) + vpor ymm1, ymm1, ymm2 // RB + vpsraw ymm2, ymm0, 8 // A + vpand ymm0, ymm0, ymm4 // G in middle 5 bits + vpmulhuw ymm0, ymm0, ymm6 // << 6 * (256 + 8) vpand ymm2, ymm2, ymm7 - vpor ymm0, ymm0, ymm2 // AG - vpermq ymm0, ymm0, 0xd8 // mutate for unpack + vpor ymm0, ymm0, ymm2 // AG + vpermq ymm0, ymm0, 0xd8 // mutate for unpack vpermq ymm1, ymm1, 0xd8 vpunpckhbw ymm2, ymm1, ymm0 vpunpcklbw ymm1, ymm1, ymm0 @@ -619,29 +589,29 @@ void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb, #endif // HAS_ARGB1555TOARGBROW_AVX2 #ifdef HAS_ARGB4444TOARGBROW_AVX2 -__declspec(naked) -void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb, - int width) { +__declspec(naked) void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width) { __asm { mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f vmovd xmm4, eax vbroadcastss ymm4, xmm4 - vpslld ymm5, ymm4, 4 // 0xf0f0f0f0 for high nibbles - mov eax, [esp + 4] // src_argb4444 - mov edx, [esp + 8] // dst_argb + vpslld ymm5, ymm4, 4 // 0xf0f0f0f0 for high nibbles + mov eax, [esp + 4] // src_argb4444 + mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax sub edx, eax convertloop: - vmovdqu ymm0, [eax] // fetch 16 pixels of bgra4444 - vpand ymm2, ymm0, ymm5 // mask high nibbles - vpand ymm0, ymm0, ymm4 // mask low nibbles + vmovdqu ymm0, [eax] // fetch 16 pixels of bgra4444 + vpand ymm2, ymm0, ymm5 // mask high nibbles + vpand ymm0, ymm0, ymm4 // mask low nibbles vpsrlw ymm3, ymm2, 4 vpsllw ymm1, ymm0, 4 vpor ymm2, ymm2, ymm3 vpor ymm0, ymm0, ymm1 - vpermq ymm0, ymm0, 0xd8 // mutate for unpack + vpermq ymm0, ymm0, 0xd8 // mutate for unpack vpermq ymm2, ymm2, 0xd8 vpunpckhbw ymm1, ymm0, ymm2 vpunpcklbw ymm0, ymm0, ymm2 @@ -657,9 +627,9 @@ void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb, #endif // HAS_ARGB4444TOARGBROW_AVX2 // 24 instructions -__declspec(naked) -void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, - int width) { +__declspec(naked) void ARGB1555ToARGBRow_SSE2(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width) { __asm { mov eax, 0x01080108 // generate multiplier to repeat 5 bits movd xmm5, eax @@ -667,36 +637,36 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, mov eax, 0x42004200 // multiplier shift by 6 and then repeat 5 bits movd xmm6, eax pshufd xmm6, xmm6, 0 - pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red + pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red psllw xmm3, 11 - movdqa xmm4, xmm3 // generate mask 0x03e003e0 for Green + movdqa xmm4, xmm3 // generate mask 0x03e003e0 for Green psrlw xmm4, 6 - pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha + pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha psllw xmm7, 8 - mov eax, [esp + 4] // src_argb1555 - mov edx, [esp + 8] // dst_argb + mov eax, [esp + 4] // src_argb1555 + mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax sub edx, eax convertloop: - movdqu xmm0, [eax] // fetch 8 pixels of 1555 + movdqu xmm0, [eax] // fetch 8 pixels of 1555 movdqa xmm1, xmm0 movdqa xmm2, xmm0 - psllw xmm1, 1 // R in upper 5 bits - psllw xmm2, 11 // B in upper 5 bits + psllw xmm1, 1 // R in upper 5 bits + psllw xmm2, 11 // B in upper 5 bits pand xmm1, xmm3 - pmulhuw xmm2, xmm5 // * (256 + 8) - pmulhuw xmm1, xmm5 // * (256 + 8) + pmulhuw xmm2, xmm5 // * (256 + 8) + pmulhuw xmm1, xmm5 // * (256 + 8) psllw xmm1, 8 - por xmm1, xmm2 // RB + por xmm1, xmm2 // RB movdqa xmm2, xmm0 - pand xmm0, xmm4 // G in middle 5 bits - psraw xmm2, 8 // A - pmulhuw xmm0, xmm6 // << 6 * (256 + 8) + pand xmm0, xmm4 // G in middle 5 bits + psraw xmm2, 8 // A + pmulhuw xmm0, xmm6 // << 6 * (256 + 8) pand xmm2, xmm7 - por xmm0, xmm2 // AG + por xmm0, xmm2 // AG movdqa xmm2, xmm1 punpcklbw xmm1, xmm0 punpckhbw xmm2, xmm0 @@ -710,26 +680,26 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, } // 18 instructions. -__declspec(naked) -void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, - int width) { +__declspec(naked) void ARGB4444ToARGBRow_SSE2(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width) { __asm { mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f movd xmm4, eax pshufd xmm4, xmm4, 0 - movdqa xmm5, xmm4 // 0xf0f0f0f0 for high nibbles + movdqa xmm5, xmm4 // 0xf0f0f0f0 for high nibbles pslld xmm5, 4 - mov eax, [esp + 4] // src_argb4444 - mov edx, [esp + 8] // dst_argb + mov eax, [esp + 4] // src_argb4444 + mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax sub edx, eax convertloop: - movdqu xmm0, [eax] // fetch 8 pixels of bgra4444 + movdqu xmm0, [eax] // fetch 8 pixels of bgra4444 movdqa xmm2, xmm0 - pand xmm0, xmm4 // mask low nibbles - pand xmm2, xmm5 // mask high nibbles + pand xmm0, xmm4 // mask low nibbles + pand xmm2, xmm5 // mask high nibbles movdqa xmm1, xmm0 movdqa xmm3, xmm2 psllw xmm1, 4 @@ -748,37 +718,38 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, } } -__declspec(naked) -void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width) { +__declspec(naked) void ARGBToRGB24Row_SSSE3(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width) { __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_rgb mov ecx, [esp + 12] // width movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24 convertloop: - movdqu xmm0, [eax] // fetch 16 pixels of argb + movdqu xmm0, [eax] // fetch 16 pixels of argb movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] lea eax, [eax + 64] - pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB + pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB pshufb xmm1, xmm6 pshufb xmm2, xmm6 pshufb xmm3, xmm6 - movdqa xmm4, xmm1 // 4 bytes from 1 for 0 - psrldq xmm1, 4 // 8 bytes from 1 - pslldq xmm4, 12 // 4 bytes from 1 for 0 - movdqa xmm5, xmm2 // 8 bytes from 2 for 1 - por xmm0, xmm4 // 4 bytes from 1 for 0 - pslldq xmm5, 8 // 8 bytes from 2 for 1 + movdqa xmm4, xmm1 // 4 bytes from 1 for 0 + psrldq xmm1, 4 // 8 bytes from 1 + pslldq xmm4, 12 // 4 bytes from 1 for 0 + movdqa xmm5, xmm2 // 8 bytes from 2 for 1 + por xmm0, xmm4 // 4 bytes from 1 for 0 + pslldq xmm5, 8 // 8 bytes from 2 for 1 movdqu [edx], xmm0 // store 0 - por xmm1, xmm5 // 8 bytes from 2 for 1 - psrldq xmm2, 8 // 4 bytes from 2 - pslldq xmm3, 4 // 12 bytes from 3 for 2 - por xmm2, xmm3 // 12 bytes from 3 for 2 - movdqu [edx + 16], xmm1 // store 1 - movdqu [edx + 32], xmm2 // store 2 + por xmm1, xmm5 // 8 bytes from 2 for 1 + psrldq xmm2, 8 // 4 bytes from 2 + pslldq xmm3, 4 // 12 bytes from 3 for 2 + por xmm2, xmm3 // 12 bytes from 3 for 2 + movdqu [edx + 16], xmm1 // store 1 + movdqu [edx + 32], xmm2 // store 2 lea edx, [edx + 48] sub ecx, 16 jg convertloop @@ -786,37 +757,38 @@ void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width) { } } -__declspec(naked) -void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width) { +__declspec(naked) void ARGBToRAWRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width) { __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_rgb mov ecx, [esp + 12] // width movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW convertloop: - movdqu xmm0, [eax] // fetch 16 pixels of argb + movdqu xmm0, [eax] // fetch 16 pixels of argb movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] lea eax, [eax + 64] - pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB + pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB pshufb xmm1, xmm6 pshufb xmm2, xmm6 pshufb xmm3, xmm6 - movdqa xmm4, xmm1 // 4 bytes from 1 for 0 - psrldq xmm1, 4 // 8 bytes from 1 - pslldq xmm4, 12 // 4 bytes from 1 for 0 - movdqa xmm5, xmm2 // 8 bytes from 2 for 1 - por xmm0, xmm4 // 4 bytes from 1 for 0 - pslldq xmm5, 8 // 8 bytes from 2 for 1 + movdqa xmm4, xmm1 // 4 bytes from 1 for 0 + psrldq xmm1, 4 // 8 bytes from 1 + pslldq xmm4, 12 // 4 bytes from 1 for 0 + movdqa xmm5, xmm2 // 8 bytes from 2 for 1 + por xmm0, xmm4 // 4 bytes from 1 for 0 + pslldq xmm5, 8 // 8 bytes from 2 for 1 movdqu [edx], xmm0 // store 0 - por xmm1, xmm5 // 8 bytes from 2 for 1 - psrldq xmm2, 8 // 4 bytes from 2 - pslldq xmm3, 4 // 12 bytes from 3 for 2 - por xmm2, xmm3 // 12 bytes from 3 for 2 - movdqu [edx + 16], xmm1 // store 1 - movdqu [edx + 32], xmm2 // store 2 + por xmm1, xmm5 // 8 bytes from 2 for 1 + psrldq xmm2, 8 // 4 bytes from 2 + pslldq xmm3, 4 // 12 bytes from 3 for 2 + por xmm2, xmm3 // 12 bytes from 3 for 2 + movdqu [edx + 16], xmm1 // store 1 + movdqu [edx + 32], xmm2 // store 2 lea edx, [edx + 48] sub ecx, 16 jg convertloop @@ -824,33 +796,34 @@ void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width) { } } -__declspec(naked) -void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width) { +__declspec(naked) void ARGBToRGB565Row_SSE2(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width) { __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_rgb mov ecx, [esp + 12] // width - pcmpeqb xmm3, xmm3 // generate mask 0x0000001f + pcmpeqb xmm3, xmm3 // generate mask 0x0000001f psrld xmm3, 27 - pcmpeqb xmm4, xmm4 // generate mask 0x000007e0 + pcmpeqb xmm4, xmm4 // generate mask 0x000007e0 psrld xmm4, 26 pslld xmm4, 5 - pcmpeqb xmm5, xmm5 // generate mask 0xfffff800 + pcmpeqb xmm5, xmm5 // generate mask 0xfffff800 pslld xmm5, 11 convertloop: - movdqu xmm0, [eax] // fetch 4 pixels of argb - movdqa xmm1, xmm0 // B - movdqa xmm2, xmm0 // G - pslld xmm0, 8 // R - psrld xmm1, 3 // B - psrld xmm2, 5 // G - psrad xmm0, 16 // R - pand xmm1, xmm3 // B - pand xmm2, xmm4 // G - pand xmm0, xmm5 // R - por xmm1, xmm2 // BG - por xmm0, xmm1 // BGR + movdqu xmm0, [eax] // fetch 4 pixels of argb + movdqa xmm1, xmm0 // B + movdqa xmm2, xmm0 // G + pslld xmm0, 8 // R + psrld xmm1, 3 // B + psrld xmm2, 5 // G + psrad xmm0, 16 // R + pand xmm1, xmm3 // B + pand xmm2, xmm4 // G + pand xmm0, xmm5 // R + por xmm1, xmm2 // BG + por xmm0, xmm1 // BGR packssdw xmm0, xmm0 lea eax, [eax + 16] movq qword ptr [edx], xmm0 // store 4 pixels of RGB565 @@ -861,41 +834,42 @@ void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width) { } } -__declspec(naked) -void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width) { +__declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, + int width) { __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb - movd xmm6, [esp + 12] // dither4 + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_rgb + movd xmm6, [esp + 12] // dither4 mov ecx, [esp + 16] // width - punpcklbw xmm6, xmm6 // make dither 16 bytes + punpcklbw xmm6, xmm6 // make dither 16 bytes movdqa xmm7, xmm6 punpcklwd xmm6, xmm6 punpckhwd xmm7, xmm7 - pcmpeqb xmm3, xmm3 // generate mask 0x0000001f + pcmpeqb xmm3, xmm3 // generate mask 0x0000001f psrld xmm3, 27 - pcmpeqb xmm4, xmm4 // generate mask 0x000007e0 + pcmpeqb xmm4, xmm4 // generate mask 0x000007e0 psrld xmm4, 26 pslld xmm4, 5 - pcmpeqb xmm5, xmm5 // generate mask 0xfffff800 + pcmpeqb xmm5, xmm5 // generate mask 0xfffff800 pslld xmm5, 11 convertloop: - movdqu xmm0, [eax] // fetch 4 pixels of argb - paddusb xmm0, xmm6 // add dither - movdqa xmm1, xmm0 // B - movdqa xmm2, xmm0 // G - pslld xmm0, 8 // R - psrld xmm1, 3 // B - psrld xmm2, 5 // G - psrad xmm0, 16 // R - pand xmm1, xmm3 // B - pand xmm2, xmm4 // G - pand xmm0, xmm5 // R - por xmm1, xmm2 // BG - por xmm0, xmm1 // BGR + movdqu xmm0, [eax] // fetch 4 pixels of argb + paddusb xmm0, xmm6 // add dither + movdqa xmm1, xmm0 // B + movdqa xmm2, xmm0 // G + pslld xmm0, 8 // R + psrld xmm1, 3 // B + psrld xmm2, 5 // G + psrad xmm0, 16 // R + pand xmm1, xmm3 // B + pand xmm2, xmm4 // G + pand xmm0, xmm5 // R + por xmm1, xmm2 // BG + por xmm0, xmm1 // BGR packssdw xmm0, xmm0 lea eax, [eax + 16] movq qword ptr [edx], xmm0 // store 4 pixels of RGB565 @@ -907,39 +881,40 @@ void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, } #ifdef HAS_ARGBTORGB565DITHERROW_AVX2 -__declspec(naked) -void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width) { +__declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, + int width) { __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_rgb vbroadcastss xmm6, [esp + 12] // dither4 - mov ecx, [esp + 16] // width - vpunpcklbw xmm6, xmm6, xmm6 // make dither 32 bytes + mov ecx, [esp + 16] // width + vpunpcklbw xmm6, xmm6, xmm6 // make dither 32 bytes vpermq ymm6, ymm6, 0xd8 vpunpcklwd ymm6, ymm6, ymm6 - vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f + vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f vpsrld ymm3, ymm3, 27 - vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0 + vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0 vpsrld ymm4, ymm4, 26 vpslld ymm4, ymm4, 5 - vpslld ymm5, ymm3, 11 // generate mask 0x0000f800 + vpslld ymm5, ymm3, 11 // generate mask 0x0000f800 convertloop: - vmovdqu ymm0, [eax] // fetch 8 pixels of argb - vpaddusb ymm0, ymm0, ymm6 // add dither - vpsrld ymm2, ymm0, 5 // G - vpsrld ymm1, ymm0, 3 // B - vpsrld ymm0, ymm0, 8 // R - vpand ymm2, ymm2, ymm4 // G - vpand ymm1, ymm1, ymm3 // B - vpand ymm0, ymm0, ymm5 // R - vpor ymm1, ymm1, ymm2 // BG - vpor ymm0, ymm0, ymm1 // BGR + vmovdqu ymm0, [eax] // fetch 8 pixels of argb + vpaddusb ymm0, ymm0, ymm6 // add dither + vpsrld ymm2, ymm0, 5 // G + vpsrld ymm1, ymm0, 3 // B + vpsrld ymm0, ymm0, 8 // R + vpand ymm2, ymm2, ymm4 // G + vpand ymm1, ymm1, ymm3 // B + vpand ymm0, ymm0, ymm5 // R + vpor ymm1, ymm1, ymm2 // BG + vpor ymm0, ymm0, ymm1 // BGR vpackusdw ymm0, ymm0, ymm0 vpermq ymm0, ymm0, 0xd8 lea eax, [eax + 32] - vmovdqu [edx], xmm0 // store 8 pixels of RGB565 + vmovdqu [edx], xmm0 // store 8 pixels of RGB565 lea edx, [edx + 16] sub ecx, 8 jg convertloop @@ -950,37 +925,38 @@ void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb, #endif // HAS_ARGBTORGB565DITHERROW_AVX2 // TODO(fbarchard): Improve sign extension/packing. -__declspec(naked) -void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width) { +__declspec(naked) void ARGBToARGB1555Row_SSE2(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width) { __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_rgb mov ecx, [esp + 12] // width - pcmpeqb xmm4, xmm4 // generate mask 0x0000001f + pcmpeqb xmm4, xmm4 // generate mask 0x0000001f psrld xmm4, 27 - movdqa xmm5, xmm4 // generate mask 0x000003e0 + movdqa xmm5, xmm4 // generate mask 0x000003e0 pslld xmm5, 5 - movdqa xmm6, xmm4 // generate mask 0x00007c00 + movdqa xmm6, xmm4 // generate mask 0x00007c00 pslld xmm6, 10 - pcmpeqb xmm7, xmm7 // generate mask 0xffff8000 + pcmpeqb xmm7, xmm7 // generate mask 0xffff8000 pslld xmm7, 15 convertloop: - movdqu xmm0, [eax] // fetch 4 pixels of argb - movdqa xmm1, xmm0 // B - movdqa xmm2, xmm0 // G - movdqa xmm3, xmm0 // R - psrad xmm0, 16 // A - psrld xmm1, 3 // B - psrld xmm2, 6 // G - psrld xmm3, 9 // R - pand xmm0, xmm7 // A - pand xmm1, xmm4 // B - pand xmm2, xmm5 // G - pand xmm3, xmm6 // R - por xmm0, xmm1 // BA - por xmm2, xmm3 // GR - por xmm0, xmm2 // BGRA + movdqu xmm0, [eax] // fetch 4 pixels of argb + movdqa xmm1, xmm0 // B + movdqa xmm2, xmm0 // G + movdqa xmm3, xmm0 // R + psrad xmm0, 16 // A + psrld xmm1, 3 // B + psrld xmm2, 6 // G + psrld xmm3, 9 // R + pand xmm0, xmm7 // A + pand xmm1, xmm4 // B + pand xmm2, xmm5 // G + pand xmm3, xmm6 // R + por xmm0, xmm1 // BA + por xmm2, xmm3 // GR + por xmm0, xmm2 // BGRA packssdw xmm0, xmm0 lea eax, [eax + 16] movq qword ptr [edx], xmm0 // store 4 pixels of ARGB1555 @@ -991,22 +967,23 @@ void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width) { } } -__declspec(naked) -void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width) { +__declspec(naked) void ARGBToARGB4444Row_SSE2(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width) { __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_rgb mov ecx, [esp + 12] // width - pcmpeqb xmm4, xmm4 // generate mask 0xf000f000 + pcmpeqb xmm4, xmm4 // generate mask 0xf000f000 psllw xmm4, 12 - movdqa xmm3, xmm4 // generate mask 0x00f000f0 + movdqa xmm3, xmm4 // generate mask 0x00f000f0 psrlw xmm3, 8 convertloop: - movdqu xmm0, [eax] // fetch 4 pixels of argb + movdqu xmm0, [eax] // fetch 4 pixels of argb movdqa xmm1, xmm0 - pand xmm0, xmm3 // low nibble - pand xmm1, xmm4 // high nibble + pand xmm0, xmm3 // low nibble + pand xmm1, xmm4 // high nibble psrld xmm0, 4 psrld xmm1, 8 por xmm0, xmm1 @@ -1021,33 +998,34 @@ void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width) { } #ifdef HAS_ARGBTORGB565ROW_AVX2 -__declspec(naked) -void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width) { +__declspec(naked) void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width) { __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb - mov ecx, [esp + 12] // width - vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_rgb + mov ecx, [esp + 12] // width + vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f vpsrld ymm3, ymm3, 27 - vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0 + vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0 vpsrld ymm4, ymm4, 26 vpslld ymm4, ymm4, 5 - vpslld ymm5, ymm3, 11 // generate mask 0x0000f800 + vpslld ymm5, ymm3, 11 // generate mask 0x0000f800 convertloop: - vmovdqu ymm0, [eax] // fetch 8 pixels of argb - vpsrld ymm2, ymm0, 5 // G - vpsrld ymm1, ymm0, 3 // B - vpsrld ymm0, ymm0, 8 // R - vpand ymm2, ymm2, ymm4 // G - vpand ymm1, ymm1, ymm3 // B - vpand ymm0, ymm0, ymm5 // R - vpor ymm1, ymm1, ymm2 // BG - vpor ymm0, ymm0, ymm1 // BGR + vmovdqu ymm0, [eax] // fetch 8 pixels of argb + vpsrld ymm2, ymm0, 5 // G + vpsrld ymm1, ymm0, 3 // B + vpsrld ymm0, ymm0, 8 // R + vpand ymm2, ymm2, ymm4 // G + vpand ymm1, ymm1, ymm3 // B + vpand ymm0, ymm0, ymm5 // R + vpor ymm1, ymm1, ymm2 // BG + vpor ymm0, ymm0, ymm1 // BGR vpackusdw ymm0, ymm0, ymm0 vpermq ymm0, ymm0, 0xd8 lea eax, [eax + 32] - vmovdqu [edx], xmm0 // store 8 pixels of RGB565 + vmovdqu [edx], xmm0 // store 8 pixels of RGB565 lea edx, [edx + 16] sub ecx, 8 jg convertloop @@ -1058,36 +1036,37 @@ void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width) { #endif // HAS_ARGBTORGB565ROW_AVX2 #ifdef HAS_ARGBTOARGB1555ROW_AVX2 -__declspec(naked) -void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width) { +__declspec(naked) void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width) { __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb - mov ecx, [esp + 12] // width + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_rgb + mov ecx, [esp + 12] // width vpcmpeqb ymm4, ymm4, ymm4 - vpsrld ymm4, ymm4, 27 // generate mask 0x0000001f - vpslld ymm5, ymm4, 5 // generate mask 0x000003e0 - vpslld ymm6, ymm4, 10 // generate mask 0x00007c00 - vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xffff8000 + vpsrld ymm4, ymm4, 27 // generate mask 0x0000001f + vpslld ymm5, ymm4, 5 // generate mask 0x000003e0 + vpslld ymm6, ymm4, 10 // generate mask 0x00007c00 + vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xffff8000 vpslld ymm7, ymm7, 15 convertloop: - vmovdqu ymm0, [eax] // fetch 8 pixels of argb - vpsrld ymm3, ymm0, 9 // R - vpsrld ymm2, ymm0, 6 // G - vpsrld ymm1, ymm0, 3 // B - vpsrad ymm0, ymm0, 16 // A - vpand ymm3, ymm3, ymm6 // R - vpand ymm2, ymm2, ymm5 // G - vpand ymm1, ymm1, ymm4 // B - vpand ymm0, ymm0, ymm7 // A - vpor ymm0, ymm0, ymm1 // BA - vpor ymm2, ymm2, ymm3 // GR - vpor ymm0, ymm0, ymm2 // BGRA + vmovdqu ymm0, [eax] // fetch 8 pixels of argb + vpsrld ymm3, ymm0, 9 // R + vpsrld ymm2, ymm0, 6 // G + vpsrld ymm1, ymm0, 3 // B + vpsrad ymm0, ymm0, 16 // A + vpand ymm3, ymm3, ymm6 // R + vpand ymm2, ymm2, ymm5 // G + vpand ymm1, ymm1, ymm4 // B + vpand ymm0, ymm0, ymm7 // A + vpor ymm0, ymm0, ymm1 // BA + vpor ymm2, ymm2, ymm3 // GR + vpor ymm0, ymm0, ymm2 // BGRA vpackssdw ymm0, ymm0, ymm0 vpermq ymm0, ymm0, 0xd8 lea eax, [eax + 32] - vmovdqu [edx], xmm0 // store 8 pixels of ARGB1555 + vmovdqu [edx], xmm0 // store 8 pixels of ARGB1555 lea edx, [edx + 16] sub ecx, 8 jg convertloop @@ -1098,27 +1077,28 @@ void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width) { #endif // HAS_ARGBTOARGB1555ROW_AVX2 #ifdef HAS_ARGBTOARGB4444ROW_AVX2 -__declspec(naked) -void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width) { +__declspec(naked) void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width) { __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_rgb mov ecx, [esp + 12] // width - vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xf000f000 + vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xf000f000 vpsllw ymm4, ymm4, 12 - vpsrlw ymm3, ymm4, 8 // generate mask 0x00f000f0 + vpsrlw ymm3, ymm4, 8 // generate mask 0x00f000f0 convertloop: - vmovdqu ymm0, [eax] // fetch 8 pixels of argb - vpand ymm1, ymm0, ymm4 // high nibble - vpand ymm0, ymm0, ymm3 // low nibble + vmovdqu ymm0, [eax] // fetch 8 pixels of argb + vpand ymm1, ymm0, ymm4 // high nibble + vpand ymm0, ymm0, ymm3 // low nibble vpsrld ymm1, ymm1, 8 vpsrld ymm0, ymm0, 4 vpor ymm0, ymm0, ymm1 vpackuswb ymm0, ymm0, ymm0 vpermq ymm0, ymm0, 0xd8 lea eax, [eax + 32] - vmovdqu [edx], xmm0 // store 8 pixels of ARGB4444 + vmovdqu [edx], xmm0 // store 8 pixels of ARGB4444 lea edx, [edx + 16] sub ecx, 8 jg convertloop @@ -1129,12 +1109,13 @@ void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width) { #endif // HAS_ARGBTOARGB4444ROW_AVX2 // Convert 16 ARGB pixels (64 bytes) to 16 Y values. -__declspec(naked) -void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { +__declspec(naked) void ARGBToYRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_y, + int width) { __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* width */ + mov eax, [esp + 4] /* src_argb */ + mov edx, [esp + 8] /* dst_y */ + mov ecx, [esp + 12] /* width */ movdqa xmm4, xmmword ptr kARGBToY movdqa xmm5, xmmword ptr kAddY16 @@ -1164,12 +1145,13 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { // Convert 16 ARGB pixels (64 bytes) to 16 YJ values. // Same as ARGBToYRow but different coefficients, no add 16, but do rounding. -__declspec(naked) -void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { +__declspec(naked) void ARGBToYJRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_y, + int width) { __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* width */ + mov eax, [esp + 4] /* src_argb */ + mov edx, [esp + 8] /* dst_y */ + mov ecx, [esp + 12] /* width */ movdqa xmm4, xmmword ptr kARGBToYJ movdqa xmm5, xmmword ptr kAddYJ64 @@ -1200,17 +1182,16 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { #ifdef HAS_ARGBTOYROW_AVX2 // vpermd for vphaddw + vpackuswb vpermd. -static const lvec32 kPermdARGBToY_AVX = { - 0, 4, 1, 5, 2, 6, 3, 7 -}; +static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7}; // Convert 32 ARGB pixels (128 bytes) to 32 Y values. -__declspec(naked) -void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) { +__declspec(naked) void ARGBToYRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_y, + int width) { __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* width */ + mov eax, [esp + 4] /* src_argb */ + mov edx, [esp + 8] /* dst_y */ + mov ecx, [esp + 12] /* width */ vbroadcastf128 ymm4, xmmword ptr kARGBToY vbroadcastf128 ymm5, xmmword ptr kAddY16 vmovdqu ymm6, ymmword ptr kPermdARGBToY_AVX @@ -1244,12 +1225,13 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) { #ifdef HAS_ARGBTOYJROW_AVX2 // Convert 32 ARGB pixels (128 bytes) to 32 Y values. -__declspec(naked) -void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) { +__declspec(naked) void ARGBToYJRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_y, + int width) { __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* width */ + mov eax, [esp + 4] /* src_argb */ + mov edx, [esp + 8] /* dst_y */ + mov ecx, [esp + 12] /* width */ vbroadcastf128 ymm4, xmmword ptr kARGBToYJ vbroadcastf128 ymm5, xmmword ptr kAddYJ64 vmovdqu ymm6, ymmword ptr kPermdARGBToY_AVX @@ -1283,12 +1265,13 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) { } #endif // HAS_ARGBTOYJROW_AVX2 -__declspec(naked) -void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { +__declspec(naked) void BGRAToYRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_y, + int width) { __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* width */ + mov eax, [esp + 4] /* src_argb */ + mov edx, [esp + 8] /* dst_y */ + mov ecx, [esp + 12] /* width */ movdqa xmm4, xmmword ptr kBGRAToY movdqa xmm5, xmmword ptr kAddY16 @@ -1316,12 +1299,13 @@ void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { } } -__declspec(naked) -void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { +__declspec(naked) void ABGRToYRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_y, + int width) { __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* width */ + mov eax, [esp + 4] /* src_argb */ + mov edx, [esp + 8] /* dst_y */ + mov ecx, [esp + 12] /* width */ movdqa xmm4, xmmword ptr kABGRToY movdqa xmm5, xmmword ptr kAddY16 @@ -1349,12 +1333,13 @@ void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { } } -__declspec(naked) -void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { +__declspec(naked) void RGBAToYRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_y, + int width) { __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* width */ + mov eax, [esp + 4] /* src_argb */ + mov edx, [esp + 8] /* dst_y */ + mov ecx, [esp + 12] /* width */ movdqa xmm4, xmmword ptr kRGBAToY movdqa xmm5, xmmword ptr kAddY16 @@ -1382,24 +1367,26 @@ void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { } } -__declspec(naked) -void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void ARGBToUVRow_SSSE3(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb + mov eax, [esp + 8 + 4] // src_argb + mov esi, [esp + 8 + 8] // src_stride_argb mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width movdqa xmm5, xmmword ptr kAddUV128 movdqa xmm6, xmmword ptr kARGBToV movdqa xmm7, xmmword ptr kARGBToU - sub edi, edx // stride from u to v + sub edi, edx // stride from u to v convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ + /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] movdqu xmm4, [eax + esi] pavgb xmm0, xmm4 @@ -1423,9 +1410,9 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, shufps xmm4, xmm3, 0xdd pavgb xmm2, xmm4 - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V + // step 2 - convert to U and V + // from here down is very similar to Y code except + // instead of 16 different pixels, its 8 pixels of U and 8 of V movdqa xmm1, xmm0 movdqa xmm3, xmm2 pmaddubsw xmm0, xmm7 // U @@ -1437,11 +1424,11 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, psraw xmm0, 8 psraw xmm1, 8 packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned + paddb xmm0, xmm5 // -> unsigned - // step 3 - store 8 U and 8 V values - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V + // step 3 - store 8 U and 8 V values + movlps qword ptr [edx], xmm0 // U + movhps qword ptr [edx + edi], xmm0 // V lea edx, [edx + 8] sub ecx, 16 jg convertloop @@ -1452,24 +1439,26 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, } } -__declspec(naked) -void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb + mov eax, [esp + 8 + 4] // src_argb + mov esi, [esp + 8 + 8] // src_stride_argb mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width movdqa xmm5, xmmword ptr kAddUVJ128 movdqa xmm6, xmmword ptr kARGBToVJ movdqa xmm7, xmmword ptr kARGBToUJ - sub edi, edx // stride from u to v + sub edi, edx // stride from u to v convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ + /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] movdqu xmm4, [eax + esi] pavgb xmm0, xmm4 @@ -1493,9 +1482,9 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, shufps xmm4, xmm3, 0xdd pavgb xmm2, xmm4 - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V + // step 2 - convert to U and V + // from here down is very similar to Y code except + // instead of 16 different pixels, its 8 pixels of U and 8 of V movdqa xmm1, xmm0 movdqa xmm3, xmm2 pmaddubsw xmm0, xmm7 // U @@ -1510,9 +1499,9 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, psraw xmm1, 8 packsswb xmm0, xmm1 - // step 3 - store 8 U and 8 V values - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V + // step 3 - store 8 U and 8 V values + movlps qword ptr [edx], xmm0 // U + movhps qword ptr [edx + edi], xmm0 // V lea edx, [edx + 8] sub ecx, 16 jg convertloop @@ -1524,24 +1513,26 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, } #ifdef HAS_ARGBTOUVROW_AVX2 -__declspec(naked) -void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void ARGBToUVRow_AVX2(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb + mov eax, [esp + 8 + 4] // src_argb + mov esi, [esp + 8 + 8] // src_stride_argb mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width vbroadcastf128 ymm5, xmmword ptr kAddUV128 vbroadcastf128 ymm6, xmmword ptr kARGBToV vbroadcastf128 ymm7, xmmword ptr kARGBToU - sub edi, edx // stride from u to v + sub edi, edx // stride from u to v convertloop: - /* step 1 - subsample 32x2 argb pixels to 16x1 */ + /* step 1 - subsample 32x2 argb pixels to 16x1 */ vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] vmovdqu ymm2, [eax + 64] @@ -1558,9 +1549,9 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb, vshufps ymm2, ymm2, ymm3, 0xdd vpavgb ymm2, ymm2, ymm4 // mutated by vshufps - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 32 different pixels, its 16 pixels of U and 16 of V + // step 2 - convert to U and V + // from here down is very similar to Y code except + // instead of 32 different pixels, its 16 pixels of U and 16 of V vpmaddubsw ymm1, ymm0, ymm7 // U vpmaddubsw ymm3, ymm2, ymm7 vpmaddubsw ymm0, ymm0, ymm6 // V @@ -1574,9 +1565,9 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb, vpshufb ymm0, ymm0, ymmword ptr kShufARGBToUV_AVX // for vshufps/vphaddw vpaddb ymm0, ymm0, ymm5 // -> unsigned - // step 3 - store 16 U and 16 V values - vextractf128 [edx], ymm0, 0 // U - vextractf128 [edx + edi], ymm0, 1 // V + // step 3 - store 16 U and 16 V values + vextractf128 [edx], ymm0, 0 // U + vextractf128 [edx + edi], ymm0, 1 // V lea edx, [edx + 16] sub ecx, 32 jg convertloop @@ -1590,24 +1581,26 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb, #endif // HAS_ARGBTOUVROW_AVX2 #ifdef HAS_ARGBTOUVJROW_AVX2 -__declspec(naked) -void ARGBToUVJRow_AVX2(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb + mov eax, [esp + 8 + 4] // src_argb + mov esi, [esp + 8 + 8] // src_stride_argb mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width vbroadcastf128 ymm5, xmmword ptr kAddUV128 vbroadcastf128 ymm6, xmmword ptr kARGBToV vbroadcastf128 ymm7, xmmword ptr kARGBToU - sub edi, edx // stride from u to v + sub edi, edx // stride from u to v convertloop: - /* step 1 - subsample 32x2 argb pixels to 16x1 */ + /* step 1 - subsample 32x2 argb pixels to 16x1 */ vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] vmovdqu ymm2, [eax + 64] @@ -1624,9 +1617,9 @@ void ARGBToUVJRow_AVX2(const uint8* src_argb0, int src_stride_argb, vshufps ymm2, ymm2, ymm3, 0xdd vpavgb ymm2, ymm2, ymm4 // mutated by vshufps - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 32 different pixels, its 16 pixels of U and 16 of V + // step 2 - convert to U and V + // from here down is very similar to Y code except + // instead of 32 different pixels, its 16 pixels of U and 16 of V vpmaddubsw ymm1, ymm0, ymm7 // U vpmaddubsw ymm3, ymm2, ymm7 vpmaddubsw ymm0, ymm0, ymm6 // V @@ -1641,9 +1634,9 @@ void ARGBToUVJRow_AVX2(const uint8* src_argb0, int src_stride_argb, vpermq ymm0, ymm0, 0xd8 // For vpacksswb vpshufb ymm0, ymm0, ymmword ptr kShufARGBToUV_AVX // for vshufps/vphaddw - // step 3 - store 16 U and 16 V values - vextractf128 [edx], ymm0, 0 // U - vextractf128 [edx + edi], ymm0, 1 // V + // step 3 - store 16 U and 16 V values + vextractf128 [edx], ymm0, 0 // U + vextractf128 [edx + edi], ymm0, 1 // V lea edx, [edx + 16] sub ecx, 32 jg convertloop @@ -1656,23 +1649,24 @@ void ARGBToUVJRow_AVX2(const uint8* src_argb0, int src_stride_argb, } #endif // HAS_ARGBTOUVJROW_AVX2 -__declspec(naked) -void ARGBToUV444Row_SSSE3(const uint8* src_argb0, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void ARGBToUV444Row_SSSE3(const uint8_t* src_argb0, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push edi - mov eax, [esp + 4 + 4] // src_argb - mov edx, [esp + 4 + 8] // dst_u + mov eax, [esp + 4 + 4] // src_argb + mov edx, [esp + 4 + 8] // dst_u mov edi, [esp + 4 + 12] // dst_v mov ecx, [esp + 4 + 16] // width movdqa xmm5, xmmword ptr kAddUV128 movdqa xmm6, xmmword ptr kARGBToV movdqa xmm7, xmmword ptr kARGBToU - sub edi, edx // stride from u to v + sub edi, edx // stride from u to v convertloop: - /* convert to U and V */ - movdqu xmm0, [eax] // U + /* convert to U and V */ + movdqu xmm0, [eax] // U movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] @@ -1688,7 +1682,7 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb0, paddb xmm0, xmm5 movdqu [edx], xmm0 - movdqu xmm0, [eax] // V + movdqu xmm0, [eax] // V movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] @@ -1713,24 +1707,26 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb0, } } -__declspec(naked) -void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void BGRAToUVRow_SSSE3(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb + mov eax, [esp + 8 + 4] // src_argb + mov esi, [esp + 8 + 8] // src_stride_argb mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width movdqa xmm5, xmmword ptr kAddUV128 movdqa xmm6, xmmword ptr kBGRAToV movdqa xmm7, xmmword ptr kBGRAToU - sub edi, edx // stride from u to v + sub edi, edx // stride from u to v convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ + /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] movdqu xmm4, [eax + esi] pavgb xmm0, xmm4 @@ -1754,9 +1750,9 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, shufps xmm4, xmm3, 0xdd pavgb xmm2, xmm4 - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V + // step 2 - convert to U and V + // from here down is very similar to Y code except + // instead of 16 different pixels, its 8 pixels of U and 8 of V movdqa xmm1, xmm0 movdqa xmm3, xmm2 pmaddubsw xmm0, xmm7 // U @@ -1768,11 +1764,11 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, psraw xmm0, 8 psraw xmm1, 8 packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned + paddb xmm0, xmm5 // -> unsigned - // step 3 - store 8 U and 8 V values - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V + // step 3 - store 8 U and 8 V values + movlps qword ptr [edx], xmm0 // U + movhps qword ptr [edx + edi], xmm0 // V lea edx, [edx + 8] sub ecx, 16 jg convertloop @@ -1783,24 +1779,26 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, } } -__declspec(naked) -void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void ABGRToUVRow_SSSE3(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb + mov eax, [esp + 8 + 4] // src_argb + mov esi, [esp + 8 + 8] // src_stride_argb mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width movdqa xmm5, xmmword ptr kAddUV128 movdqa xmm6, xmmword ptr kABGRToV movdqa xmm7, xmmword ptr kABGRToU - sub edi, edx // stride from u to v + sub edi, edx // stride from u to v convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ + /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] movdqu xmm4, [eax + esi] pavgb xmm0, xmm4 @@ -1824,9 +1822,9 @@ void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, shufps xmm4, xmm3, 0xdd pavgb xmm2, xmm4 - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V + // step 2 - convert to U and V + // from here down is very similar to Y code except + // instead of 16 different pixels, its 8 pixels of U and 8 of V movdqa xmm1, xmm0 movdqa xmm3, xmm2 pmaddubsw xmm0, xmm7 // U @@ -1838,11 +1836,11 @@ void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, psraw xmm0, 8 psraw xmm1, 8 packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned + paddb xmm0, xmm5 // -> unsigned - // step 3 - store 8 U and 8 V values - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V + // step 3 - store 8 U and 8 V values + movlps qword ptr [edx], xmm0 // U + movhps qword ptr [edx + edi], xmm0 // V lea edx, [edx + 8] sub ecx, 16 jg convertloop @@ -1853,24 +1851,26 @@ void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, } } -__declspec(naked) -void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void RGBAToUVRow_SSSE3(const uint8_t* src_argb0, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb + mov eax, [esp + 8 + 4] // src_argb + mov esi, [esp + 8 + 8] // src_stride_argb mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width movdqa xmm5, xmmword ptr kAddUV128 movdqa xmm6, xmmword ptr kRGBAToV movdqa xmm7, xmmword ptr kRGBAToU - sub edi, edx // stride from u to v + sub edi, edx // stride from u to v convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ + /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] movdqu xmm4, [eax + esi] pavgb xmm0, xmm4 @@ -1894,9 +1894,9 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, shufps xmm4, xmm3, 0xdd pavgb xmm2, xmm4 - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V + // step 2 - convert to U and V + // from here down is very similar to Y code except + // instead of 16 different pixels, its 8 pixels of U and 8 of V movdqa xmm1, xmm0 movdqa xmm3, xmm2 pmaddubsw xmm0, xmm7 // U @@ -1908,11 +1908,11 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, psraw xmm0, 8 psraw xmm1, 8 packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned + paddb xmm0, xmm5 // -> unsigned - // step 3 - store 8 U and 8 V values - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V + // step 3 - store 8 U and 8 V values + movlps qword ptr [edx], xmm0 // U + movhps qword ptr [edx + edi], xmm0 // V lea edx, [edx + 8] sub ecx, 16 jg convertloop @@ -1925,109 +1925,95 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, #endif // HAS_ARGBTOYROW_SSSE3 // Read 16 UV from 444 -#define READYUV444_AVX2 __asm { \ - __asm vmovdqu xmm0, [esi] /* U */ \ - __asm vmovdqu xmm1, [esi + edi] /* V */ \ +#define READYUV444_AVX2 \ + __asm { \ + __asm vmovdqu xmm0, [esi] /* U */ \ + __asm vmovdqu xmm1, [esi + edi] /* V */ \ __asm lea esi, [esi + 16] \ __asm vpermq ymm0, ymm0, 0xd8 \ __asm vpermq ymm1, ymm1, 0xd8 \ - __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ - __asm vmovdqu xmm4, [eax] /* Y */ \ + __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ + __asm vmovdqu xmm4, [eax] /* Y */ \ __asm vpermq ymm4, ymm4, 0xd8 \ __asm vpunpcklbw ymm4, ymm4, ymm4 \ - __asm lea eax, [eax + 16] \ - } + __asm lea eax, [eax + 16]} // Read 8 UV from 422, upsample to 16 UV. -#define READYUV422_AVX2 __asm { \ - __asm vmovq xmm0, qword ptr [esi] /* U */ \ - __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \ +#define READYUV422_AVX2 \ + __asm { \ + __asm vmovq xmm0, qword ptr [esi] /* U */ \ + __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \ __asm lea esi, [esi + 8] \ - __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ + __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ __asm vpermq ymm0, ymm0, 0xd8 \ - __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ - __asm vmovdqu xmm4, [eax] /* Y */ \ + __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ + __asm vmovdqu xmm4, [eax] /* Y */ \ __asm vpermq ymm4, ymm4, 0xd8 \ __asm vpunpcklbw ymm4, ymm4, ymm4 \ - __asm lea eax, [eax + 16] \ - } + __asm lea eax, [eax + 16]} // Read 8 UV from 422, upsample to 16 UV. With 16 Alpha. -#define READYUVA422_AVX2 __asm { \ - __asm vmovq xmm0, qword ptr [esi] /* U */ \ - __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \ +#define READYUVA422_AVX2 \ + __asm { \ + __asm vmovq xmm0, qword ptr [esi] /* U */ \ + __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \ __asm lea esi, [esi + 8] \ - __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ + __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ __asm vpermq ymm0, ymm0, 0xd8 \ - __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ - __asm vmovdqu xmm4, [eax] /* Y */ \ + __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ + __asm vmovdqu xmm4, [eax] /* Y */ \ __asm vpermq ymm4, ymm4, 0xd8 \ __asm vpunpcklbw ymm4, ymm4, ymm4 \ __asm lea eax, [eax + 16] \ - __asm vmovdqu xmm5, [ebp] /* A */ \ + __asm vmovdqu xmm5, [ebp] /* A */ \ __asm vpermq ymm5, ymm5, 0xd8 \ - __asm lea ebp, [ebp + 16] \ - } - -// Read 4 UV from 411, upsample to 16 UV. -#define READYUV411_AVX2 __asm { \ - __asm vmovd xmm0, dword ptr [esi] /* U */ \ - __asm vmovd xmm1, dword ptr [esi + edi] /* V */ \ - __asm lea esi, [esi + 4] \ - __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ - __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ - __asm vpermq ymm0, ymm0, 0xd8 \ - __asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \ - __asm vmovdqu xmm4, [eax] /* Y */ \ - __asm vpermq ymm4, ymm4, 0xd8 \ - __asm vpunpcklbw ymm4, ymm4, ymm4 \ - __asm lea eax, [eax + 16] \ - } + __asm lea ebp, [ebp + 16]} // Read 8 UV from NV12, upsample to 16 UV. -#define READNV12_AVX2 __asm { \ - __asm vmovdqu xmm0, [esi] /* UV */ \ +#define READNV12_AVX2 \ + __asm { \ + __asm vmovdqu xmm0, [esi] /* UV */ \ __asm lea esi, [esi + 16] \ __asm vpermq ymm0, ymm0, 0xd8 \ - __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ - __asm vmovdqu xmm4, [eax] /* Y */ \ + __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ + __asm vmovdqu xmm4, [eax] /* Y */ \ __asm vpermq ymm4, ymm4, 0xd8 \ __asm vpunpcklbw ymm4, ymm4, ymm4 \ - __asm lea eax, [eax + 16] \ - } + __asm lea eax, [eax + 16]} // Read 8 UV from NV21, upsample to 16 UV. -#define READNV21_AVX2 __asm { \ - __asm vmovdqu xmm0, [esi] /* UV */ \ +#define READNV21_AVX2 \ + __asm { \ + __asm vmovdqu xmm0, [esi] /* UV */ \ __asm lea esi, [esi + 16] \ __asm vpermq ymm0, ymm0, 0xd8 \ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleNV21 \ - __asm vmovdqu xmm4, [eax] /* Y */ \ + __asm vmovdqu xmm4, [eax] /* Y */ \ __asm vpermq ymm4, ymm4, 0xd8 \ __asm vpunpcklbw ymm4, ymm4, ymm4 \ - __asm lea eax, [eax + 16] \ - } + __asm lea eax, [eax + 16]} // Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV. -#define READYUY2_AVX2 __asm { \ - __asm vmovdqu ymm4, [eax] /* YUY2 */ \ +#define READYUY2_AVX2 \ + __asm { \ + __asm vmovdqu ymm4, [eax] /* YUY2 */ \ __asm vpshufb ymm4, ymm4, ymmword ptr kShuffleYUY2Y \ - __asm vmovdqu ymm0, [eax] /* UV */ \ + __asm vmovdqu ymm0, [eax] /* UV */ \ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleYUY2UV \ - __asm lea eax, [eax + 32] \ - } + __asm lea eax, [eax + 32]} // Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV. -#define READUYVY_AVX2 __asm { \ - __asm vmovdqu ymm4, [eax] /* UYVY */ \ +#define READUYVY_AVX2 \ + __asm { \ + __asm vmovdqu ymm4, [eax] /* UYVY */ \ __asm vpshufb ymm4, ymm4, ymmword ptr kShuffleUYVYY \ - __asm vmovdqu ymm0, [eax] /* UV */ \ + __asm vmovdqu ymm0, [eax] /* UV */ \ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleUYVYUV \ - __asm lea eax, [eax + 32] \ - } + __asm lea eax, [eax + 32]} // Convert 16 pixels: 16 UV and 16 Y. -#define YUVTORGB_AVX2(YuvConstants) __asm { \ +#define YUVTORGB_AVX2(YuvConstants) \ + __asm { \ __asm vpmaddubsw ymm2, ymm0, ymmword ptr [YuvConstants + KUVTOR] /* R UV */\ __asm vpmaddubsw ymm1, ymm0, ymmword ptr [YuvConstants + KUVTOG] /* G UV */\ __asm vpmaddubsw ymm0, ymm0, ymmword ptr [YuvConstants + KUVTOB] /* B UV */\ @@ -2036,68 +2022,67 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASG] \ __asm vpsubw ymm1, ymm3, ymm1 \ __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASB] \ - __asm vpsubw ymm0, ymm3, ymm0 \ - /* Step 2: Find Y contribution to 16 R,G,B values */ \ + __asm vpsubw ymm0, ymm3, ymm0 /* Step 2: Find Y contribution to 16 R,G,B values */ \ __asm vpmulhuw ymm4, ymm4, ymmword ptr [YuvConstants + KYTORGB] \ - __asm vpaddsw ymm0, ymm0, ymm4 /* B += Y */ \ - __asm vpaddsw ymm1, ymm1, ymm4 /* G += Y */ \ - __asm vpaddsw ymm2, ymm2, ymm4 /* R += Y */ \ + __asm vpaddsw ymm0, ymm0, ymm4 /* B += Y */ \ + __asm vpaddsw ymm1, ymm1, ymm4 /* G += Y */ \ + __asm vpaddsw ymm2, ymm2, ymm4 /* R += Y */ \ __asm vpsraw ymm0, ymm0, 6 \ __asm vpsraw ymm1, ymm1, 6 \ __asm vpsraw ymm2, ymm2, 6 \ - __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \ - __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \ - __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \ + __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \ + __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \ + __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \ } // Store 16 ARGB values. -#define STOREARGB_AVX2 __asm { \ - __asm vpunpcklbw ymm0, ymm0, ymm1 /* BG */ \ +#define STOREARGB_AVX2 \ + __asm { \ + __asm vpunpcklbw ymm0, ymm0, ymm1 /* BG */ \ __asm vpermq ymm0, ymm0, 0xd8 \ - __asm vpunpcklbw ymm2, ymm2, ymm5 /* RA */ \ + __asm vpunpcklbw ymm2, ymm2, ymm5 /* RA */ \ __asm vpermq ymm2, ymm2, 0xd8 \ - __asm vpunpcklwd ymm1, ymm0, ymm2 /* BGRA first 8 pixels */ \ - __asm vpunpckhwd ymm0, ymm0, ymm2 /* BGRA next 8 pixels */ \ + __asm vpunpcklwd ymm1, ymm0, ymm2 /* BGRA first 8 pixels */ \ + __asm vpunpckhwd ymm0, ymm0, ymm2 /* BGRA next 8 pixels */ \ __asm vmovdqu 0[edx], ymm1 \ __asm vmovdqu 32[edx], ymm0 \ - __asm lea edx, [edx + 64] \ - } + __asm lea edx, [edx + 64]} // Store 16 RGBA values. -#define STORERGBA_AVX2 __asm { \ - __asm vpunpcklbw ymm1, ymm1, ymm2 /* GR */ \ +#define STORERGBA_AVX2 \ + __asm { \ + __asm vpunpcklbw ymm1, ymm1, ymm2 /* GR */ \ __asm vpermq ymm1, ymm1, 0xd8 \ - __asm vpunpcklbw ymm2, ymm5, ymm0 /* AB */ \ + __asm vpunpcklbw ymm2, ymm5, ymm0 /* AB */ \ __asm vpermq ymm2, ymm2, 0xd8 \ - __asm vpunpcklwd ymm0, ymm2, ymm1 /* ABGR first 8 pixels */ \ - __asm vpunpckhwd ymm1, ymm2, ymm1 /* ABGR next 8 pixels */ \ + __asm vpunpcklwd ymm0, ymm2, ymm1 /* ABGR first 8 pixels */ \ + __asm vpunpckhwd ymm1, ymm2, ymm1 /* ABGR next 8 pixels */ \ __asm vmovdqu [edx], ymm0 \ __asm vmovdqu [edx + 32], ymm1 \ - __asm lea edx, [edx + 64] \ - } + __asm lea edx, [edx + 64]} #ifdef HAS_I422TOARGBROW_AVX2 // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -__declspec(naked) -void I422ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void I422ToARGBRow_AVX2( + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi push ebx - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb mov ebx, [esp + 12 + 20] // yuvconstants mov ecx, [esp + 12 + 24] // width sub edi, esi - vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha + vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READYUV422_AVX2 @@ -2119,21 +2104,21 @@ void I422ToARGBRow_AVX2(const uint8* y_buf, #ifdef HAS_I422ALPHATOARGBROW_AVX2 // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. -__declspec(naked) -void I422AlphaToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void I422AlphaToARGBRow_AVX2( + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi push ebx push ebp - mov eax, [esp + 16 + 4] // Y - mov esi, [esp + 16 + 8] // U + mov eax, [esp + 16 + 4] // Y + mov esi, [esp + 16 + 8] // U mov edi, [esp + 16 + 12] // V mov ebp, [esp + 16 + 16] // A mov edx, [esp + 16 + 20] // argb @@ -2162,25 +2147,25 @@ void I422AlphaToARGBRow_AVX2(const uint8* y_buf, #ifdef HAS_I444TOARGBROW_AVX2 // 16 pixels // 16 UV values with 16 Y producing 16 ARGB (64 bytes). -__declspec(naked) -void I444ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void I444ToARGBRow_AVX2( + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi push ebx - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb mov ebx, [esp + 12 + 20] // yuvconstants mov ecx, [esp + 12 + 24] // width sub edi, esi - vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha + vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READYUV444_AVX2 YUVTORGB_AVX2(ebx) @@ -2198,64 +2183,24 @@ void I444ToARGBRow_AVX2(const uint8* y_buf, } #endif // HAS_I444TOARGBROW_AVX2 -#ifdef HAS_I411TOARGBROW_AVX2 -// 16 pixels -// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -__declspec(naked) -void I411ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - __asm { - push esi - push edi - push ebx - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U - mov edi, [esp + 12 + 12] // V - mov edx, [esp + 12 + 16] // abgr - mov ebx, [esp + 12 + 20] // yuvconstants - mov ecx, [esp + 12 + 24] // width - sub edi, esi - vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha - - convertloop: - READYUV411_AVX2 - YUVTORGB_AVX2(ebx) - STOREARGB_AVX2 - - sub ecx, 16 - jg convertloop - - pop ebx - pop edi - pop esi - vzeroupper - ret - } -} -#endif // HAS_I411TOARGBROW_AVX2 - #ifdef HAS_NV12TOARGBROW_AVX2 // 16 pixels. // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -__declspec(naked) -void NV12ToARGBRow_AVX2(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void NV12ToARGBRow_AVX2( + const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push esi push ebx - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // UV + mov eax, [esp + 8 + 4] // Y + mov esi, [esp + 8 + 8] // UV mov edx, [esp + 8 + 12] // argb mov ebx, [esp + 8 + 16] // yuvconstants mov ecx, [esp + 8 + 20] // width - vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha + vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READNV12_AVX2 @@ -2276,21 +2221,21 @@ void NV12ToARGBRow_AVX2(const uint8* y_buf, #ifdef HAS_NV21TOARGBROW_AVX2 // 16 pixels. // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -__declspec(naked) -void NV21ToARGBRow_AVX2(const uint8* y_buf, - const uint8* vu_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void NV21ToARGBRow_AVX2( + const uint8_t* y_buf, + const uint8_t* vu_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push esi push ebx - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // VU + mov eax, [esp + 8 + 4] // Y + mov esi, [esp + 8 + 8] // VU mov edx, [esp + 8 + 12] // argb mov ebx, [esp + 8 + 16] // yuvconstants mov ecx, [esp + 8 + 20] // width - vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha + vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READNV21_AVX2 @@ -2311,18 +2256,18 @@ void NV21ToARGBRow_AVX2(const uint8* y_buf, #ifdef HAS_YUY2TOARGBROW_AVX2 // 16 pixels. // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). -__declspec(naked) -void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void YUY2ToARGBRow_AVX2( + const uint8_t* src_yuy2, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push ebx - mov eax, [esp + 4 + 4] // yuy2 - mov edx, [esp + 4 + 8] // argb + mov eax, [esp + 4 + 4] // yuy2 + mov edx, [esp + 4 + 8] // argb mov ebx, [esp + 4 + 12] // yuvconstants mov ecx, [esp + 4 + 16] // width - vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha + vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READYUY2_AVX2 @@ -2342,18 +2287,18 @@ void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, #ifdef HAS_UYVYTOARGBROW_AVX2 // 16 pixels. // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). -__declspec(naked) -void UYVYToARGBRow_AVX2(const uint8* src_uyvy, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void UYVYToARGBRow_AVX2( + const uint8_t* src_uyvy, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push ebx - mov eax, [esp + 4 + 4] // uyvy - mov edx, [esp + 4 + 8] // argb + mov eax, [esp + 4 + 4] // uyvy + mov edx, [esp + 4 + 8] // argb mov ebx, [esp + 4 + 12] // yuvconstants mov ecx, [esp + 4 + 16] // width - vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha + vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READUYVY_AVX2 @@ -2373,25 +2318,25 @@ void UYVYToARGBRow_AVX2(const uint8* src_uyvy, #ifdef HAS_I422TORGBAROW_AVX2 // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). -__declspec(naked) -void I422ToRGBARow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void I422ToRGBARow_AVX2( + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi push ebx - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // abgr mov ebx, [esp + 12 + 20] // yuvconstants mov ecx, [esp + 12 + 24] // width sub edi, esi - vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha + vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READYUV422_AVX2 @@ -2415,100 +2360,83 @@ void I422ToRGBARow_AVX2(const uint8* y_buf, // Allows a conversion with half size scaling. // Read 8 UV from 444. -#define READYUV444 __asm { \ +#define READYUV444 \ + __asm { \ __asm movq xmm0, qword ptr [esi] /* U */ \ __asm movq xmm1, qword ptr [esi + edi] /* V */ \ __asm lea esi, [esi + 8] \ - __asm punpcklbw xmm0, xmm1 /* UV */ \ + __asm punpcklbw xmm0, xmm1 /* UV */ \ __asm movq xmm4, qword ptr [eax] \ __asm punpcklbw xmm4, xmm4 \ - __asm lea eax, [eax + 8] \ - } + __asm lea eax, [eax + 8]} // Read 4 UV from 422, upsample to 8 UV. -#define READYUV422 __asm { \ - __asm movd xmm0, [esi] /* U */ \ - __asm movd xmm1, [esi + edi] /* V */ \ +#define READYUV422 \ + __asm { \ + __asm movd xmm0, [esi] /* U */ \ + __asm movd xmm1, [esi + edi] /* V */ \ __asm lea esi, [esi + 4] \ - __asm punpcklbw xmm0, xmm1 /* UV */ \ - __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ + __asm punpcklbw xmm0, xmm1 /* UV */ \ + __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ __asm movq xmm4, qword ptr [eax] \ __asm punpcklbw xmm4, xmm4 \ - __asm lea eax, [eax + 8] \ - } + __asm lea eax, [eax + 8]} // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. -#define READYUVA422 __asm { \ - __asm movd xmm0, [esi] /* U */ \ - __asm movd xmm1, [esi + edi] /* V */ \ +#define READYUVA422 \ + __asm { \ + __asm movd xmm0, [esi] /* U */ \ + __asm movd xmm1, [esi + edi] /* V */ \ __asm lea esi, [esi + 4] \ - __asm punpcklbw xmm0, xmm1 /* UV */ \ - __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ - __asm movq xmm4, qword ptr [eax] /* Y */ \ + __asm punpcklbw xmm0, xmm1 /* UV */ \ + __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ + __asm movq xmm4, qword ptr [eax] /* Y */ \ __asm punpcklbw xmm4, xmm4 \ __asm lea eax, [eax + 8] \ - __asm movq xmm5, qword ptr [ebp] /* A */ \ - __asm lea ebp, [ebp + 8] \ - } - -// Read 2 UV from 411, upsample to 8 UV. -// drmemory fails with memory fault if pinsrw used. libyuv bug: 525 -// __asm pinsrw xmm0, [esi], 0 /* U */ -// __asm pinsrw xmm1, [esi + edi], 0 /* V */ -#define READYUV411_EBX __asm { \ - __asm movzx ebx, word ptr [esi] /* U */ \ - __asm movd xmm0, ebx \ - __asm movzx ebx, word ptr [esi + edi] /* V */ \ - __asm movd xmm1, ebx \ - __asm lea esi, [esi + 2] \ - __asm punpcklbw xmm0, xmm1 /* UV */ \ - __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ - __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \ - __asm movq xmm4, qword ptr [eax] \ - __asm punpcklbw xmm4, xmm4 \ - __asm lea eax, [eax + 8] \ - } + __asm movq xmm5, qword ptr [ebp] /* A */ \ + __asm lea ebp, [ebp + 8]} // Read 4 UV from NV12, upsample to 8 UV. -#define READNV12 __asm { \ +#define READNV12 \ + __asm { \ __asm movq xmm0, qword ptr [esi] /* UV */ \ __asm lea esi, [esi + 8] \ - __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ + __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ __asm movq xmm4, qword ptr [eax] \ __asm punpcklbw xmm4, xmm4 \ - __asm lea eax, [eax + 8] \ - } + __asm lea eax, [eax + 8]} // Read 4 VU from NV21, upsample to 8 UV. -#define READNV21 __asm { \ +#define READNV21 \ + __asm { \ __asm movq xmm0, qword ptr [esi] /* UV */ \ __asm lea esi, [esi + 8] \ __asm pshufb xmm0, xmmword ptr kShuffleNV21 \ __asm movq xmm4, qword ptr [eax] \ __asm punpcklbw xmm4, xmm4 \ - __asm lea eax, [eax + 8] \ - } + __asm lea eax, [eax + 8]} // Read 4 YUY2 with 8 Y and upsample 4 UV to 8 UV. -#define READYUY2 __asm { \ - __asm movdqu xmm4, [eax] /* YUY2 */ \ +#define READYUY2 \ + __asm { \ + __asm movdqu xmm4, [eax] /* YUY2 */ \ __asm pshufb xmm4, xmmword ptr kShuffleYUY2Y \ - __asm movdqu xmm0, [eax] /* UV */ \ + __asm movdqu xmm0, [eax] /* UV */ \ __asm pshufb xmm0, xmmword ptr kShuffleYUY2UV \ - __asm lea eax, [eax + 16] \ - } + __asm lea eax, [eax + 16]} // Read 4 UYVY with 8 Y and upsample 4 UV to 8 UV. -#define READUYVY __asm { \ - __asm movdqu xmm4, [eax] /* UYVY */ \ +#define READUYVY \ + __asm { \ + __asm movdqu xmm4, [eax] /* UYVY */ \ __asm pshufb xmm4, xmmword ptr kShuffleUYVYY \ - __asm movdqu xmm0, [eax] /* UV */ \ + __asm movdqu xmm0, [eax] /* UV */ \ __asm pshufb xmm0, xmmword ptr kShuffleUYVYUV \ - __asm lea eax, [eax + 16] \ - } + __asm lea eax, [eax + 16]} // Convert 8 pixels: 8 UV and 8 Y. -#define YUVTORGB(YuvConstants) __asm { \ +#define YUVTORGB(YuvConstants) \ + __asm { \ __asm movdqa xmm1, xmm0 \ __asm movdqa xmm2, xmm0 \ __asm movdqa xmm3, xmm0 \ @@ -2522,129 +2450,125 @@ void I422ToRGBARow_AVX2(const uint8* y_buf, __asm pmaddubsw xmm3, xmmword ptr [YuvConstants + KUVTOR] \ __asm psubw xmm2, xmm3 \ __asm pmulhuw xmm4, xmmword ptr [YuvConstants + KYTORGB] \ - __asm paddsw xmm0, xmm4 /* B += Y */ \ - __asm paddsw xmm1, xmm4 /* G += Y */ \ - __asm paddsw xmm2, xmm4 /* R += Y */ \ + __asm paddsw xmm0, xmm4 /* B += Y */ \ + __asm paddsw xmm1, xmm4 /* G += Y */ \ + __asm paddsw xmm2, xmm4 /* R += Y */ \ __asm psraw xmm0, 6 \ __asm psraw xmm1, 6 \ __asm psraw xmm2, 6 \ - __asm packuswb xmm0, xmm0 /* B */ \ - __asm packuswb xmm1, xmm1 /* G */ \ - __asm packuswb xmm2, xmm2 /* R */ \ + __asm packuswb xmm0, xmm0 /* B */ \ + __asm packuswb xmm1, xmm1 /* G */ \ + __asm packuswb xmm2, xmm2 /* R */ \ } // Store 8 ARGB values. -#define STOREARGB __asm { \ - __asm punpcklbw xmm0, xmm1 /* BG */ \ - __asm punpcklbw xmm2, xmm5 /* RA */ \ +#define STOREARGB \ + __asm { \ + __asm punpcklbw xmm0, xmm1 /* BG */ \ + __asm punpcklbw xmm2, xmm5 /* RA */ \ __asm movdqa xmm1, xmm0 \ - __asm punpcklwd xmm0, xmm2 /* BGRA first 4 pixels */ \ - __asm punpckhwd xmm1, xmm2 /* BGRA next 4 pixels */ \ + __asm punpcklwd xmm0, xmm2 /* BGRA first 4 pixels */ \ + __asm punpckhwd xmm1, xmm2 /* BGRA next 4 pixels */ \ __asm movdqu 0[edx], xmm0 \ __asm movdqu 16[edx], xmm1 \ - __asm lea edx, [edx + 32] \ - } + __asm lea edx, [edx + 32]} // Store 8 BGRA values. -#define STOREBGRA __asm { \ - __asm pcmpeqb xmm5, xmm5 /* generate 0xffffffff for alpha */ \ - __asm punpcklbw xmm1, xmm0 /* GB */ \ - __asm punpcklbw xmm5, xmm2 /* AR */ \ +#define STOREBGRA \ + __asm { \ + __asm pcmpeqb xmm5, xmm5 /* generate 0xffffffff for alpha */ \ + __asm punpcklbw xmm1, xmm0 /* GB */ \ + __asm punpcklbw xmm5, xmm2 /* AR */ \ __asm movdqa xmm0, xmm5 \ - __asm punpcklwd xmm5, xmm1 /* BGRA first 4 pixels */ \ - __asm punpckhwd xmm0, xmm1 /* BGRA next 4 pixels */ \ + __asm punpcklwd xmm5, xmm1 /* BGRA first 4 pixels */ \ + __asm punpckhwd xmm0, xmm1 /* BGRA next 4 pixels */ \ __asm movdqu 0[edx], xmm5 \ __asm movdqu 16[edx], xmm0 \ - __asm lea edx, [edx + 32] \ - } + __asm lea edx, [edx + 32]} // Store 8 RGBA values. -#define STORERGBA __asm { \ - __asm pcmpeqb xmm5, xmm5 /* generate 0xffffffff for alpha */ \ - __asm punpcklbw xmm1, xmm2 /* GR */ \ - __asm punpcklbw xmm5, xmm0 /* AB */ \ +#define STORERGBA \ + __asm { \ + __asm pcmpeqb xmm5, xmm5 /* generate 0xffffffff for alpha */ \ + __asm punpcklbw xmm1, xmm2 /* GR */ \ + __asm punpcklbw xmm5, xmm0 /* AB */ \ __asm movdqa xmm0, xmm5 \ - __asm punpcklwd xmm5, xmm1 /* RGBA first 4 pixels */ \ - __asm punpckhwd xmm0, xmm1 /* RGBA next 4 pixels */ \ + __asm punpcklwd xmm5, xmm1 /* RGBA first 4 pixels */ \ + __asm punpckhwd xmm0, xmm1 /* RGBA next 4 pixels */ \ __asm movdqu 0[edx], xmm5 \ __asm movdqu 16[edx], xmm0 \ - __asm lea edx, [edx + 32] \ - } + __asm lea edx, [edx + 32]} // Store 8 RGB24 values. -#define STORERGB24 __asm { \ - /* Weave into RRGB */ \ - __asm punpcklbw xmm0, xmm1 /* BG */ \ - __asm punpcklbw xmm2, xmm2 /* RR */ \ +#define STORERGB24 \ + __asm {/* Weave into RRGB */ \ + __asm punpcklbw xmm0, xmm1 /* BG */ \ + __asm punpcklbw xmm2, xmm2 /* RR */ \ __asm movdqa xmm1, xmm0 \ - __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \ - __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \ - /* RRGB -> RGB24 */ \ - __asm pshufb xmm0, xmm5 /* Pack first 8 and last 4 bytes. */ \ - __asm pshufb xmm1, xmm6 /* Pack first 12 bytes. */ \ - __asm palignr xmm1, xmm0, 12 /* last 4 bytes of xmm0 + 12 xmm1 */ \ - __asm movq qword ptr 0[edx], xmm0 /* First 8 bytes */ \ - __asm movdqu 8[edx], xmm1 /* Last 16 bytes */ \ - __asm lea edx, [edx + 24] \ - } + __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \ + __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ /* RRGB -> RGB24 */ \ + __asm pshufb xmm0, xmm5 /* Pack first 8 and last 4 bytes. */ \ + __asm pshufb xmm1, xmm6 /* Pack first 12 bytes. */ \ + __asm palignr xmm1, xmm0, 12 /* last 4 bytes of xmm0 + 12 xmm1 */ \ + __asm movq qword ptr 0[edx], xmm0 /* First 8 bytes */ \ + __asm movdqu 8[edx], xmm1 /* Last 16 bytes */ \ + __asm lea edx, [edx + 24]} // Store 8 RGB565 values. -#define STORERGB565 __asm { \ - /* Weave into RRGB */ \ - __asm punpcklbw xmm0, xmm1 /* BG */ \ - __asm punpcklbw xmm2, xmm2 /* RR */ \ +#define STORERGB565 \ + __asm {/* Weave into RRGB */ \ + __asm punpcklbw xmm0, xmm1 /* BG */ \ + __asm punpcklbw xmm2, xmm2 /* RR */ \ __asm movdqa xmm1, xmm0 \ - __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \ - __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \ - /* RRGB -> RGB565 */ \ - __asm movdqa xmm3, xmm0 /* B first 4 pixels of argb */ \ - __asm movdqa xmm2, xmm0 /* G */ \ - __asm pslld xmm0, 8 /* R */ \ - __asm psrld xmm3, 3 /* B */ \ - __asm psrld xmm2, 5 /* G */ \ - __asm psrad xmm0, 16 /* R */ \ - __asm pand xmm3, xmm5 /* B */ \ - __asm pand xmm2, xmm6 /* G */ \ - __asm pand xmm0, xmm7 /* R */ \ - __asm por xmm3, xmm2 /* BG */ \ - __asm por xmm0, xmm3 /* BGR */ \ - __asm movdqa xmm3, xmm1 /* B next 4 pixels of argb */ \ - __asm movdqa xmm2, xmm1 /* G */ \ - __asm pslld xmm1, 8 /* R */ \ - __asm psrld xmm3, 3 /* B */ \ - __asm psrld xmm2, 5 /* G */ \ - __asm psrad xmm1, 16 /* R */ \ - __asm pand xmm3, xmm5 /* B */ \ - __asm pand xmm2, xmm6 /* G */ \ - __asm pand xmm1, xmm7 /* R */ \ - __asm por xmm3, xmm2 /* BG */ \ - __asm por xmm1, xmm3 /* BGR */ \ + __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \ + __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ /* RRGB -> RGB565 */ \ + __asm movdqa xmm3, xmm0 /* B first 4 pixels of argb */ \ + __asm movdqa xmm2, xmm0 /* G */ \ + __asm pslld xmm0, 8 /* R */ \ + __asm psrld xmm3, 3 /* B */ \ + __asm psrld xmm2, 5 /* G */ \ + __asm psrad xmm0, 16 /* R */ \ + __asm pand xmm3, xmm5 /* B */ \ + __asm pand xmm2, xmm6 /* G */ \ + __asm pand xmm0, xmm7 /* R */ \ + __asm por xmm3, xmm2 /* BG */ \ + __asm por xmm0, xmm3 /* BGR */ \ + __asm movdqa xmm3, xmm1 /* B next 4 pixels of argb */ \ + __asm movdqa xmm2, xmm1 /* G */ \ + __asm pslld xmm1, 8 /* R */ \ + __asm psrld xmm3, 3 /* B */ \ + __asm psrld xmm2, 5 /* G */ \ + __asm psrad xmm1, 16 /* R */ \ + __asm pand xmm3, xmm5 /* B */ \ + __asm pand xmm2, xmm6 /* G */ \ + __asm pand xmm1, xmm7 /* R */ \ + __asm por xmm3, xmm2 /* BG */ \ + __asm por xmm1, xmm3 /* BGR */ \ __asm packssdw xmm0, xmm1 \ - __asm movdqu 0[edx], xmm0 /* store 8 pixels of RGB565 */ \ - __asm lea edx, [edx + 16] \ - } + __asm movdqu 0[edx], xmm0 /* store 8 pixels of RGB565 */ \ + __asm lea edx, [edx + 16]} // 8 pixels. // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) -void I444ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void I444ToARGBRow_SSSE3( + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi push ebx - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb mov ebx, [esp + 12 + 20] // yuvconstants mov ecx, [esp + 12 + 24] // width sub edi, esi - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha + pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha convertloop: READYUV444 @@ -2663,19 +2587,19 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf, // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes). -__declspec(naked) -void I422ToRGB24Row_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgb24, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void I422ToRGB24Row_SSSE3( + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi push ebx - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb mov ebx, [esp + 12 + 20] // yuvconstants @@ -2701,30 +2625,30 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf, // 8 pixels // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). -__declspec(naked) -void I422ToRGB565Row_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb565_buf, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void I422ToRGB565Row_SSSE3( + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* rgb565_buf, + const struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi push ebx - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb mov ebx, [esp + 12 + 20] // yuvconstants mov ecx, [esp + 12 + 24] // width sub edi, esi - pcmpeqb xmm5, xmm5 // generate mask 0x0000001f + pcmpeqb xmm5, xmm5 // generate mask 0x0000001f psrld xmm5, 27 - pcmpeqb xmm6, xmm6 // generate mask 0x000007e0 + pcmpeqb xmm6, xmm6 // generate mask 0x000007e0 psrld xmm6, 26 pslld xmm6, 5 - pcmpeqb xmm7, xmm7 // generate mask 0xfffff800 + pcmpeqb xmm7, xmm7 // generate mask 0xfffff800 pslld xmm7, 11 convertloop: @@ -2744,25 +2668,25 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf, // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) -void I422ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void I422ToARGBRow_SSSE3( + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi push ebx - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb mov ebx, [esp + 12 + 20] // yuvconstants mov ecx, [esp + 12 + 24] // width sub edi, esi - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha + pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha convertloop: READYUV422 @@ -2781,21 +2705,21 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB. -__declspec(naked) -void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void I422AlphaToARGBRow_SSSE3( + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi push ebx push ebp - mov eax, [esp + 16 + 4] // Y - mov esi, [esp + 16 + 8] // U + mov eax, [esp + 16 + 4] // Y + mov esi, [esp + 16 + 8] // U mov edi, [esp + 16 + 12] // V mov ebp, [esp + 16 + 16] // A mov edx, [esp + 16 + 20] // argb @@ -2819,63 +2743,23 @@ void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, } } -// 8 pixels. -// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -// Similar to I420 but duplicate UV once more. -__declspec(naked) -void I411ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { - __asm { - push esi - push edi - push ebx - push ebp - mov eax, [esp + 16 + 4] // Y - mov esi, [esp + 16 + 8] // U - mov edi, [esp + 16 + 12] // V - mov edx, [esp + 16 + 16] // abgr - mov ebp, [esp + 16 + 20] // yuvconstants - mov ecx, [esp + 16 + 24] // width - sub edi, esi - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - - convertloop: - READYUV411_EBX - YUVTORGB(ebp) - STOREARGB - - sub ecx, 8 - jg convertloop - - pop ebp - pop ebx - pop edi - pop esi - ret - } -} - // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) -void NV12ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void NV12ToARGBRow_SSSE3( + const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push esi push ebx - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // UV + mov eax, [esp + 8 + 4] // Y + mov esi, [esp + 8 + 8] // UV mov edx, [esp + 8 + 12] // argb mov ebx, [esp + 8 + 16] // yuvconstants mov ecx, [esp + 8 + 20] // width - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha + pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha convertloop: READNV12 @@ -2893,21 +2777,21 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf, // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) -void NV21ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* vu_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void NV21ToARGBRow_SSSE3( + const uint8_t* y_buf, + const uint8_t* vu_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push esi push ebx - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // VU + mov eax, [esp + 8 + 4] // Y + mov esi, [esp + 8 + 8] // VU mov edx, [esp + 8 + 12] // argb mov ebx, [esp + 8 + 16] // yuvconstants mov ecx, [esp + 8 + 20] // width - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha + pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha convertloop: READNV21 @@ -2925,18 +2809,18 @@ void NV21ToARGBRow_SSSE3(const uint8* y_buf, // 8 pixels. // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes). -__declspec(naked) -void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void YUY2ToARGBRow_SSSE3( + const uint8_t* src_yuy2, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push ebx - mov eax, [esp + 4 + 4] // yuy2 - mov edx, [esp + 4 + 8] // argb + mov eax, [esp + 4 + 4] // yuy2 + mov edx, [esp + 4 + 8] // argb mov ebx, [esp + 4 + 12] // yuvconstants mov ecx, [esp + 4 + 16] // width - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha + pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha convertloop: READYUY2 @@ -2953,18 +2837,18 @@ void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, // 8 pixels. // 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes). -__declspec(naked) -void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void UYVYToARGBRow_SSSE3( + const uint8_t* src_uyvy, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width) { __asm { push ebx - mov eax, [esp + 4 + 4] // uyvy - mov edx, [esp + 4 + 8] // argb + mov eax, [esp + 4 + 4] // uyvy + mov edx, [esp + 4 + 8] // argb mov ebx, [esp + 4 + 12] // yuvconstants mov ecx, [esp + 4 + 16] // width - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha + pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha convertloop: READUYVY @@ -2979,19 +2863,19 @@ void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, } } -__declspec(naked) -void I422ToRGBARow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgba, - const struct YuvConstants* yuvconstants, - int width) { +__declspec(naked) void I422ToRGBARow_SSSE3( + const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgba, + const struct YuvConstants* yuvconstants, + int width) { __asm { push esi push edi push ebx - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U + mov eax, [esp + 12 + 4] // Y + mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb mov ebx, [esp + 12 + 20] // yuvconstants @@ -3016,39 +2900,38 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf, #ifdef HAS_I400TOARGBROW_SSE2 // 8 pixels of Y converted to 8 pixels of ARGB (32 bytes). -__declspec(naked) -void I400ToARGBRow_SSE2(const uint8* y_buf, - uint8* rgb_buf, - int width) { +__declspec(naked) void I400ToARGBRow_SSE2(const uint8_t* y_buf, + uint8_t* rgb_buf, + int width) { __asm { - mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256) + mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256) movd xmm2, eax pshufd xmm2, xmm2,0 - mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16) + mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16) movd xmm3, eax pshufd xmm3, xmm3, 0 - pcmpeqb xmm4, xmm4 // generate mask 0xff000000 + pcmpeqb xmm4, xmm4 // generate mask 0xff000000 pslld xmm4, 24 - mov eax, [esp + 4] // Y - mov edx, [esp + 8] // rgb - mov ecx, [esp + 12] // width + mov eax, [esp + 4] // Y + mov edx, [esp + 8] // rgb + mov ecx, [esp + 12] // width convertloop: - // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 + // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 movq xmm0, qword ptr [eax] lea eax, [eax + 8] - punpcklbw xmm0, xmm0 // Y.Y + punpcklbw xmm0, xmm0 // Y.Y pmulhuw xmm0, xmm2 psubusw xmm0, xmm3 psrlw xmm0, 6 - packuswb xmm0, xmm0 // G + packuswb xmm0, xmm0 // G - // Step 2: Weave into ARGB - punpcklbw xmm0, xmm0 // GG + // Step 2: Weave into ARGB + punpcklbw xmm0, xmm0 // GG movdqa xmm1, xmm0 - punpcklwd xmm0, xmm0 // BGRA first 4 pixels - punpckhwd xmm1, xmm1 // BGRA next 4 pixels + punpcklwd xmm0, xmm0 // BGRA first 4 pixels + punpckhwd xmm1, xmm1 // BGRA next 4 pixels por xmm0, xmm4 por xmm1, xmm4 movdqu [edx], xmm0 @@ -3064,41 +2947,40 @@ void I400ToARGBRow_SSE2(const uint8* y_buf, #ifdef HAS_I400TOARGBROW_AVX2 // 16 pixels of Y converted to 16 pixels of ARGB (64 bytes). // note: vpunpcklbw mutates and vpackuswb unmutates. -__declspec(naked) -void I400ToARGBRow_AVX2(const uint8* y_buf, - uint8* rgb_buf, - int width) { +__declspec(naked) void I400ToARGBRow_AVX2(const uint8_t* y_buf, + uint8_t* rgb_buf, + int width) { __asm { - mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256) + mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256) vmovd xmm2, eax vbroadcastss ymm2, xmm2 - mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16) + mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16) vmovd xmm3, eax vbroadcastss ymm3, xmm3 - vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xff000000 + vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xff000000 vpslld ymm4, ymm4, 24 - mov eax, [esp + 4] // Y - mov edx, [esp + 8] // rgb - mov ecx, [esp + 12] // width + mov eax, [esp + 4] // Y + mov edx, [esp + 8] // rgb + mov ecx, [esp + 12] // width convertloop: - // Step 1: Scale Y contriportbution to 16 G values. G = (y - 16) * 1.164 + // Step 1: Scale Y contriportbution to 16 G values. G = (y - 16) * 1.164 vmovdqu xmm0, [eax] lea eax, [eax + 16] - vpermq ymm0, ymm0, 0xd8 // vpunpcklbw mutates - vpunpcklbw ymm0, ymm0, ymm0 // Y.Y + vpermq ymm0, ymm0, 0xd8 // vpunpcklbw mutates + vpunpcklbw ymm0, ymm0, ymm0 // Y.Y vpmulhuw ymm0, ymm0, ymm2 vpsubusw ymm0, ymm0, ymm3 vpsrlw ymm0, ymm0, 6 - vpackuswb ymm0, ymm0, ymm0 // G. still mutated: 3120 + vpackuswb ymm0, ymm0, ymm0 // G. still mutated: 3120 - // TODO(fbarchard): Weave alpha with unpack. - // Step 2: Weave into ARGB - vpunpcklbw ymm1, ymm0, ymm0 // GG - mutates + // TODO(fbarchard): Weave alpha with unpack. + // Step 2: Weave into ARGB + vpunpcklbw ymm1, ymm0, ymm0 // GG - mutates vpermq ymm1, ymm1, 0xd8 - vpunpcklwd ymm0, ymm1, ymm1 // GGGG first 8 pixels - vpunpckhwd ymm1, ymm1, ymm1 // GGGG next 8 pixels + vpunpcklwd ymm0, ymm1, ymm1 // GGGG first 8 pixels + vpunpckhwd ymm1, ymm1, ymm1 // GGGG next 8 pixels vpor ymm0, ymm0, ymm4 vpor ymm1, ymm1, ymm4 vmovdqu [edx], ymm0 @@ -3114,16 +2996,16 @@ void I400ToARGBRow_AVX2(const uint8* y_buf, #ifdef HAS_MIRRORROW_SSSE3 // Shuffle table for reversing the bytes. -static const uvec8 kShuffleMirror = { - 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u -}; +static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, + 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; // TODO(fbarchard): Replace lea with -16 offset. -__declspec(naked) -void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { +__declspec(naked) void MirrorRow_SSSE3(const uint8_t* src, + uint8_t* dst, + int width) { __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst + mov eax, [esp + 4] // src + mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width movdqa xmm5, xmmword ptr kShuffleMirror @@ -3140,11 +3022,12 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { #endif // HAS_MIRRORROW_SSSE3 #ifdef HAS_MIRRORROW_AVX2 -__declspec(naked) -void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { +__declspec(naked) void MirrorRow_AVX2(const uint8_t* src, + uint8_t* dst, + int width) { __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst + mov eax, [esp + 4] // src + mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width vbroadcastf128 ymm5, xmmword ptr kShuffleMirror @@ -3164,17 +3047,17 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { #ifdef HAS_MIRRORUVROW_SSSE3 // Shuffle table for reversing the bytes of UV channels. -static const uvec8 kShuffleMirrorUV = { - 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u -}; +static const uvec8 kShuffleMirrorUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, + 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u}; -__declspec(naked) -void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, - int width) { +__declspec(naked) void MirrorUVRow_SSSE3(const uint8_t* src, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push edi - mov eax, [esp + 4 + 4] // src - mov edx, [esp + 4 + 8] // dst_u + mov eax, [esp + 4 + 4] // src + mov edx, [esp + 4 + 8] // dst_u mov edi, [esp + 4 + 12] // dst_v mov ecx, [esp + 4 + 16] // width movdqa xmm1, xmmword ptr kShuffleMirrorUV @@ -3198,11 +3081,12 @@ void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, #endif // HAS_MIRRORUVROW_SSSE3 #ifdef HAS_ARGBMIRRORROW_SSE2 -__declspec(naked) -void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width) { +__declspec(naked) void ARGBMirrorRow_SSE2(const uint8_t* src, + uint8_t* dst, + int width) { __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst + mov eax, [esp + 4] // src + mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width lea eax, [eax - 16 + ecx * 4] // last 4 pixels. @@ -3221,15 +3105,14 @@ void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width) { #ifdef HAS_ARGBMIRRORROW_AVX2 // Shuffle table for reversing the bytes. -static const ulvec32 kARGBShuffleMirror_AVX2 = { - 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u -}; +static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; -__declspec(naked) -void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) { +__declspec(naked) void ARGBMirrorRow_AVX2(const uint8_t* src, + uint8_t* dst, + int width) { __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst + mov eax, [esp + 4] // src + mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width vmovdqu ymm5, ymmword ptr kARGBShuffleMirror_AVX2 @@ -3246,16 +3129,17 @@ void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) { #endif // HAS_ARGBMIRRORROW_AVX2 #ifdef HAS_SPLITUVROW_SSE2 -__declspec(naked) -void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width) { +__declspec(naked) void SplitUVRow_SSE2(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push edi - mov eax, [esp + 4 + 4] // src_uv - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // width - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff + mov eax, [esp + 4 + 4] // src_uv + mov edx, [esp + 4 + 8] // dst_u + mov edi, [esp + 4 + 12] // dst_v + mov ecx, [esp + 4 + 16] // width + pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff psrlw xmm5, 8 sub edi, edx @@ -3265,10 +3149,10 @@ void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, lea eax, [eax + 32] movdqa xmm2, xmm0 movdqa xmm3, xmm1 - pand xmm0, xmm5 // even bytes + pand xmm0, xmm5 // even bytes pand xmm1, xmm5 packuswb xmm0, xmm1 - psrlw xmm2, 8 // odd bytes + psrlw xmm2, 8 // odd bytes psrlw xmm3, 8 packuswb xmm2, xmm3 movdqu [edx], xmm0 @@ -3285,16 +3169,17 @@ void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, #endif // HAS_SPLITUVROW_SSE2 #ifdef HAS_SPLITUVROW_AVX2 -__declspec(naked) -void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width) { +__declspec(naked) void SplitUVRow_AVX2(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push edi - mov eax, [esp + 4 + 4] // src_uv - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // width - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff + mov eax, [esp + 4 + 4] // src_uv + mov edx, [esp + 4 + 8] // dst_u + mov edi, [esp + 4 + 12] // dst_v + mov ecx, [esp + 4 + 16] // width + vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff vpsrlw ymm5, ymm5, 8 sub edi, edx @@ -3302,9 +3187,9 @@ void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] - vpsrlw ymm2, ymm0, 8 // odd bytes + vpsrlw ymm2, ymm0, 8 // odd bytes vpsrlw ymm3, ymm1, 8 - vpand ymm0, ymm0, ymm5 // even bytes + vpand ymm0, ymm0, ymm5 // even bytes vpand ymm1, ymm1, ymm5 vpackuswb ymm0, ymm0, ymm1 vpackuswb ymm2, ymm2, ymm3 @@ -3324,24 +3209,25 @@ void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, #endif // HAS_SPLITUVROW_AVX2 #ifdef HAS_MERGEUVROW_SSE2 -__declspec(naked) -void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) { +__declspec(naked) void MergeUVRow_SSE2(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width) { __asm { push edi - mov eax, [esp + 4 + 4] // src_u - mov edx, [esp + 4 + 8] // src_v - mov edi, [esp + 4 + 12] // dst_uv - mov ecx, [esp + 4 + 16] // width + mov eax, [esp + 4 + 4] // src_u + mov edx, [esp + 4 + 8] // src_v + mov edi, [esp + 4 + 12] // dst_uv + mov ecx, [esp + 4 + 16] // width sub edx, eax convertloop: - movdqu xmm0, [eax] // read 16 U's + movdqu xmm0, [eax] // read 16 U's movdqu xmm1, [eax + edx] // and 16 V's lea eax, [eax + 16] movdqa xmm2, xmm0 - punpcklbw xmm0, xmm1 // first 8 UV pairs - punpckhbw xmm2, xmm1 // next 8 UV pairs + punpcklbw xmm0, xmm1 // first 8 UV pairs + punpckhbw xmm2, xmm1 // next 8 UV pairs movdqu [edi], xmm0 movdqu [edi + 16], xmm2 lea edi, [edi + 32] @@ -3355,24 +3241,25 @@ void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, #endif // HAS_MERGEUVROW_SSE2 #ifdef HAS_MERGEUVROW_AVX2 -__declspec(naked) -void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) { +__declspec(naked) void MergeUVRow_AVX2(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width) { __asm { push edi - mov eax, [esp + 4 + 4] // src_u - mov edx, [esp + 4 + 8] // src_v - mov edi, [esp + 4 + 12] // dst_uv - mov ecx, [esp + 4 + 16] // width + mov eax, [esp + 4 + 4] // src_u + mov edx, [esp + 4 + 8] // src_v + mov edi, [esp + 4 + 12] // dst_uv + mov ecx, [esp + 4 + 16] // width sub edx, eax convertloop: - vmovdqu ymm0, [eax] // read 32 U's - vmovdqu ymm1, [eax + edx] // and 32 V's + vmovdqu ymm0, [eax] // read 32 U's + vmovdqu ymm1, [eax + edx] // and 32 V's lea eax, [eax + 32] - vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2 - vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3 - vextractf128 [edi], ymm2, 0 // bytes 0..15 + vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2 + vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3 + vextractf128 [edi], ymm2, 0 // bytes 0..15 vextractf128 [edi + 16], ymm0, 0 // bytes 16..31 vextractf128 [edi + 32], ymm2, 1 // bytes 32..47 vextractf128 [edi + 48], ymm0, 1 // bytes 47..63 @@ -3388,13 +3275,14 @@ void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, #endif // HAS_MERGEUVROW_AVX2 #ifdef HAS_COPYROW_SSE2 -// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time. -__declspec(naked) -void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { +// CopyRow copys 'width' bytes using a 16 byte load/store, 32 bytes at time. +__declspec(naked) void CopyRow_SSE2(const uint8_t* src, + uint8_t* dst, + int width) { __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count + mov eax, [esp + 4] // src + mov edx, [esp + 8] // dst + mov ecx, [esp + 12] // width test eax, 15 jne convertloopu test edx, 15 @@ -3426,13 +3314,14 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { #endif // HAS_COPYROW_SSE2 #ifdef HAS_COPYROW_AVX -// CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time. -__declspec(naked) -void CopyRow_AVX(const uint8* src, uint8* dst, int count) { +// CopyRow copys 'width' bytes using a 32 byte load/store, 64 bytes at time. +__declspec(naked) void CopyRow_AVX(const uint8_t* src, + uint8_t* dst, + int width) { __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count + mov eax, [esp + 4] // src + mov edx, [esp + 8] // dst + mov ecx, [esp + 12] // width convertloop: vmovdqu ymm0, [eax] @@ -3451,14 +3340,15 @@ void CopyRow_AVX(const uint8* src, uint8* dst, int count) { #endif // HAS_COPYROW_AVX // Multiple of 1. -__declspec(naked) -void CopyRow_ERMS(const uint8* src, uint8* dst, int count) { +__declspec(naked) void CopyRow_ERMS(const uint8_t* src, + uint8_t* dst, + int width) { __asm { mov eax, esi mov edx, edi - mov esi, [esp + 4] // src - mov edi, [esp + 8] // dst - mov ecx, [esp + 12] // count + mov esi, [esp + 4] // src + mov edi, [esp + 8] // dst + mov ecx, [esp + 12] // width rep movsb mov edi, edx mov esi, eax @@ -3468,15 +3358,16 @@ void CopyRow_ERMS(const uint8* src, uint8* dst, int count) { #ifdef HAS_ARGBCOPYALPHAROW_SSE2 // width in pixels -__declspec(naked) -void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { +__declspec(naked) void ARGBCopyAlphaRow_SSE2(const uint8_t* src, + uint8_t* dst, + int width) { __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count - pcmpeqb xmm0, xmm0 // generate mask 0xff000000 + mov eax, [esp + 4] // src + mov edx, [esp + 8] // dst + mov ecx, [esp + 12] // width + pcmpeqb xmm0, xmm0 // generate mask 0xff000000 pslld xmm0, 24 - pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff + pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff psrld xmm1, 8 convertloop: @@ -3504,14 +3395,15 @@ void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { #ifdef HAS_ARGBCOPYALPHAROW_AVX2 // width in pixels -__declspec(naked) -void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { +__declspec(naked) void ARGBCopyAlphaRow_AVX2(const uint8_t* src, + uint8_t* dst, + int width) { __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count + mov eax, [esp + 4] // src + mov edx, [esp + 8] // dst + mov ecx, [esp + 12] // width vpcmpeqb ymm0, ymm0, ymm0 - vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff + vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff convertloop: vmovdqu ymm1, [eax] @@ -3533,11 +3425,12 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 // width in pixels -__declspec(naked) -void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) { +__declspec(naked) void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_a, + int width) { __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_a + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_a mov ecx, [esp + 12] // width extractloop: @@ -3558,17 +3451,54 @@ void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) { } #endif // HAS_ARGBEXTRACTALPHAROW_SSE2 +#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 +// width in pixels +__declspec(naked) void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_a, + int width) { + __asm { + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_a + mov ecx, [esp + 12] // width + vmovdqa ymm4, ymmword ptr kPermdARGBToY_AVX + + extractloop: + vmovdqu ymm0, [eax] + vmovdqu ymm1, [eax + 32] + vpsrld ymm0, ymm0, 24 + vpsrld ymm1, ymm1, 24 + vmovdqu ymm2, [eax + 64] + vmovdqu ymm3, [eax + 96] + lea eax, [eax + 128] + vpackssdw ymm0, ymm0, ymm1 // mutates + vpsrld ymm2, ymm2, 24 + vpsrld ymm3, ymm3, 24 + vpackssdw ymm2, ymm2, ymm3 // mutates + vpackuswb ymm0, ymm0, ymm2 // mutates + vpermd ymm0, ymm4, ymm0 // unmutate + vmovdqu [edx], ymm0 + lea edx, [edx + 32] + sub ecx, 32 + jg extractloop + + vzeroupper + ret + } +} +#endif // HAS_ARGBEXTRACTALPHAROW_AVX2 + #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 // width in pixels -__declspec(naked) -void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { +__declspec(naked) void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, + uint8_t* dst, + int width) { __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count - pcmpeqb xmm0, xmm0 // generate mask 0xff000000 + mov eax, [esp + 4] // src + mov edx, [esp + 8] // dst + mov ecx, [esp + 12] // width + pcmpeqb xmm0, xmm0 // generate mask 0xff000000 pslld xmm0, 24 - pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff + pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff psrld xmm1, 8 convertloop: @@ -3598,14 +3528,15 @@ void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 // width in pixels -__declspec(naked) -void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { +__declspec(naked) void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, + uint8_t* dst, + int width) { __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count + mov eax, [esp + 4] // src + mov edx, [esp + 8] // dst + mov ecx, [esp + 12] // width vpcmpeqb ymm0, ymm0, ymm0 - vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff + vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff convertloop: vpmovzxbd ymm1, qword ptr [eax] @@ -3628,17 +3559,16 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { #endif // HAS_ARGBCOPYYTOALPHAROW_AVX2 #ifdef HAS_SETROW_X86 -// Write 'count' bytes using an 8 bit value repeated. -// Count should be multiple of 4. -__declspec(naked) -void SetRow_X86(uint8* dst, uint8 v8, int count) { +// Write 'width' bytes using an 8 bit value repeated. +// width should be multiple of 4. +__declspec(naked) void SetRow_X86(uint8_t* dst, uint8_t v8, int width) { __asm { - movzx eax, byte ptr [esp + 8] // v8 + movzx eax, byte ptr [esp + 8] // v8 mov edx, 0x01010101 // Duplicate byte to all bytes. - mul edx // overwrites edx with upper part of result. + mul edx // overwrites edx with upper part of result. mov edx, edi - mov edi, [esp + 4] // dst - mov ecx, [esp + 12] // count + mov edi, [esp + 4] // dst + mov ecx, [esp + 12] // width shr ecx, 2 rep stosd mov edi, edx @@ -3646,28 +3576,28 @@ void SetRow_X86(uint8* dst, uint8 v8, int count) { } } -// Write 'count' bytes using an 8 bit value repeated. -__declspec(naked) -void SetRow_ERMS(uint8* dst, uint8 v8, int count) { +// Write 'width' bytes using an 8 bit value repeated. +__declspec(naked) void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) { __asm { mov edx, edi - mov edi, [esp + 4] // dst - mov eax, [esp + 8] // v8 - mov ecx, [esp + 12] // count + mov edi, [esp + 4] // dst + mov eax, [esp + 8] // v8 + mov ecx, [esp + 12] // width rep stosb mov edi, edx ret } } -// Write 'count' 32 bit values. -__declspec(naked) -void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count) { +// Write 'width' 32 bit values. +__declspec(naked) void ARGBSetRow_X86(uint8_t* dst_argb, + uint32_t v32, + int width) { __asm { mov edx, edi - mov edi, [esp + 4] // dst - mov eax, [esp + 8] // v32 - mov ecx, [esp + 12] // count + mov edi, [esp + 4] // dst + mov eax, [esp + 8] // v32 + mov ecx, [esp + 12] // width rep stosd mov edi, edx ret @@ -3676,12 +3606,13 @@ void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count) { #endif // HAS_SETROW_X86 #ifdef HAS_YUY2TOYROW_AVX2 -__declspec(naked) -void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width) { +__declspec(naked) void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, + uint8_t* dst_y, + int width) { __asm { - mov eax, [esp + 4] // src_yuy2 - mov edx, [esp + 8] // dst_y - mov ecx, [esp + 12] // width + mov eax, [esp + 4] // src_yuy2 + mov edx, [esp + 8] // dst_y + mov ecx, [esp + 12] // width vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff vpsrlw ymm5, ymm5, 8 @@ -3689,9 +3620,9 @@ void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width) { vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] - vpand ymm0, ymm0, ymm5 // even bytes are Y + vpand ymm0, ymm0, ymm5 // even bytes are Y vpand ymm1, ymm1, ymm5 - vpackuswb ymm0, ymm0, ymm1 // mutates. + vpackuswb ymm0, ymm0, ymm1 // mutates. vpermq ymm0, ymm0, 0xd8 vmovdqu [edx], ymm0 lea edx, [edx + 32] @@ -3702,18 +3633,20 @@ void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width) { } } -__declspec(naked) -void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_yuy2 - mov esi, [esp + 8 + 8] // stride_yuy2 - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // width - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff + mov eax, [esp + 8 + 4] // src_yuy2 + mov esi, [esp + 8 + 8] // stride_yuy2 + mov edx, [esp + 8 + 12] // dst_u + mov edi, [esp + 8 + 16] // dst_v + mov ecx, [esp + 8 + 20] // width + vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff vpsrlw ymm5, ymm5, 8 sub edi, edx @@ -3723,18 +3656,18 @@ void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, vpavgb ymm0, ymm0, [eax + esi] vpavgb ymm1, ymm1, [eax + esi + 32] lea eax, [eax + 64] - vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV + vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV vpsrlw ymm1, ymm1, 8 - vpackuswb ymm0, ymm0, ymm1 // mutates. + vpackuswb ymm0, ymm0, ymm1 // mutates. vpermq ymm0, ymm0, 0xd8 vpand ymm1, ymm0, ymm5 // U - vpsrlw ymm0, ymm0, 8 // V + vpsrlw ymm0, ymm0, 8 // V vpackuswb ymm1, ymm1, ymm1 // mutates. vpackuswb ymm0, ymm0, ymm0 // mutates. vpermq ymm1, ymm1, 0xd8 vpermq ymm0, ymm0, 0xd8 vextractf128 [edx], ymm1, 0 // U - vextractf128 [edx + edi], ymm0, 0 // V + vextractf128 [edx + edi], ymm0, 0 // V lea edx, [edx + 16] sub ecx, 32 jg convertloop @@ -3746,16 +3679,17 @@ void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, } } -__declspec(naked) -void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push edi - mov eax, [esp + 4 + 4] // src_yuy2 - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // width - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff + mov eax, [esp + 4 + 4] // src_yuy2 + mov edx, [esp + 4 + 8] // dst_u + mov edi, [esp + 4 + 12] // dst_v + mov ecx, [esp + 4 + 16] // width + vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff vpsrlw ymm5, ymm5, 8 sub edi, edx @@ -3763,18 +3697,18 @@ void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] - vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV + vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV vpsrlw ymm1, ymm1, 8 - vpackuswb ymm0, ymm0, ymm1 // mutates. + vpackuswb ymm0, ymm0, ymm1 // mutates. vpermq ymm0, ymm0, 0xd8 vpand ymm1, ymm0, ymm5 // U - vpsrlw ymm0, ymm0, 8 // V + vpsrlw ymm0, ymm0, 8 // V vpackuswb ymm1, ymm1, ymm1 // mutates. vpackuswb ymm0, ymm0, ymm0 // mutates. vpermq ymm1, ymm1, 0xd8 vpermq ymm0, ymm0, 0xd8 vextractf128 [edx], ymm1, 0 // U - vextractf128 [edx + edi], ymm0, 0 // V + vextractf128 [edx + edi], ymm0, 0 // V lea edx, [edx + 16] sub ecx, 32 jg convertloop @@ -3785,21 +3719,21 @@ void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, } } -__declspec(naked) -void UYVYToYRow_AVX2(const uint8* src_uyvy, - uint8* dst_y, int width) { +__declspec(naked) void UYVYToYRow_AVX2(const uint8_t* src_uyvy, + uint8_t* dst_y, + int width) { __asm { - mov eax, [esp + 4] // src_uyvy - mov edx, [esp + 8] // dst_y - mov ecx, [esp + 12] // width + mov eax, [esp + 4] // src_uyvy + mov edx, [esp + 8] // dst_y + mov ecx, [esp + 12] // width convertloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] - vpsrlw ymm0, ymm0, 8 // odd bytes are Y + vpsrlw ymm0, ymm0, 8 // odd bytes are Y vpsrlw ymm1, ymm1, 8 - vpackuswb ymm0, ymm0, ymm1 // mutates. + vpackuswb ymm0, ymm0, ymm1 // mutates. vpermq ymm0, ymm0, 0xd8 vmovdqu [edx], ymm0 lea edx, [edx + 32] @@ -3810,18 +3744,20 @@ void UYVYToYRow_AVX2(const uint8* src_uyvy, } } -__declspec(naked) -void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, + int stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_yuy2 - mov esi, [esp + 8 + 8] // stride_yuy2 - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // width - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff + mov eax, [esp + 8 + 4] // src_yuy2 + mov esi, [esp + 8 + 8] // stride_yuy2 + mov edx, [esp + 8 + 12] // dst_u + mov edi, [esp + 8 + 16] // dst_v + mov ecx, [esp + 8 + 20] // width + vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff vpsrlw ymm5, ymm5, 8 sub edi, edx @@ -3831,18 +3767,18 @@ void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, vpavgb ymm0, ymm0, [eax + esi] vpavgb ymm1, ymm1, [eax + esi + 32] lea eax, [eax + 64] - vpand ymm0, ymm0, ymm5 // UYVY -> UVUV + vpand ymm0, ymm0, ymm5 // UYVY -> UVUV vpand ymm1, ymm1, ymm5 - vpackuswb ymm0, ymm0, ymm1 // mutates. + vpackuswb ymm0, ymm0, ymm1 // mutates. vpermq ymm0, ymm0, 0xd8 vpand ymm1, ymm0, ymm5 // U - vpsrlw ymm0, ymm0, 8 // V + vpsrlw ymm0, ymm0, 8 // V vpackuswb ymm1, ymm1, ymm1 // mutates. vpackuswb ymm0, ymm0, ymm0 // mutates. vpermq ymm1, ymm1, 0xd8 vpermq ymm0, ymm0, 0xd8 vextractf128 [edx], ymm1, 0 // U - vextractf128 [edx + edi], ymm0, 0 // V + vextractf128 [edx + edi], ymm0, 0 // V lea edx, [edx + 16] sub ecx, 32 jg convertloop @@ -3854,16 +3790,17 @@ void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, } } -__declspec(naked) -void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push edi - mov eax, [esp + 4 + 4] // src_yuy2 - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // width - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff + mov eax, [esp + 4 + 4] // src_yuy2 + mov edx, [esp + 4 + 8] // dst_u + mov edi, [esp + 4 + 12] // dst_v + mov ecx, [esp + 4 + 16] // width + vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff vpsrlw ymm5, ymm5, 8 sub edi, edx @@ -3871,18 +3808,18 @@ void UYVYToUV422Row_AVX2(const uint8* src_uyvy, vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] - vpand ymm0, ymm0, ymm5 // UYVY -> UVUV + vpand ymm0, ymm0, ymm5 // UYVY -> UVUV vpand ymm1, ymm1, ymm5 - vpackuswb ymm0, ymm0, ymm1 // mutates. + vpackuswb ymm0, ymm0, ymm1 // mutates. vpermq ymm0, ymm0, 0xd8 vpand ymm1, ymm0, ymm5 // U - vpsrlw ymm0, ymm0, 8 // V + vpsrlw ymm0, ymm0, 8 // V vpackuswb ymm1, ymm1, ymm1 // mutates. vpackuswb ymm0, ymm0, ymm0 // mutates. vpermq ymm1, ymm1, 0xd8 vpermq ymm0, ymm0, 0xd8 vextractf128 [edx], ymm1, 0 // U - vextractf128 [edx + edi], ymm0, 0 // V + vextractf128 [edx + edi], ymm0, 0 // V lea edx, [edx + 16] sub ecx, 32 jg convertloop @@ -3895,21 +3832,21 @@ void UYVYToUV422Row_AVX2(const uint8* src_uyvy, #endif // HAS_YUY2TOYROW_AVX2 #ifdef HAS_YUY2TOYROW_SSE2 -__declspec(naked) -void YUY2ToYRow_SSE2(const uint8* src_yuy2, - uint8* dst_y, int width) { +__declspec(naked) void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, + uint8_t* dst_y, + int width) { __asm { - mov eax, [esp + 4] // src_yuy2 - mov edx, [esp + 8] // dst_y - mov ecx, [esp + 12] // width - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff + mov eax, [esp + 4] // src_yuy2 + mov edx, [esp + 8] // dst_y + mov ecx, [esp + 12] // width + pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff psrlw xmm5, 8 convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] - pand xmm0, xmm5 // even bytes are Y + pand xmm0, xmm5 // even bytes are Y pand xmm1, xmm5 packuswb xmm0, xmm1 movdqu [edx], xmm0 @@ -3920,18 +3857,20 @@ void YUY2ToYRow_SSE2(const uint8* src_yuy2, } } -__declspec(naked) -void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_yuy2 - mov esi, [esp + 8 + 8] // stride_yuy2 - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // width - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff + mov eax, [esp + 8 + 4] // src_yuy2 + mov esi, [esp + 8 + 8] // stride_yuy2 + mov edx, [esp + 8 + 12] // dst_u + mov edi, [esp + 8 + 16] // dst_v + mov ecx, [esp + 8 + 20] // width + pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff psrlw xmm5, 8 sub edi, edx @@ -3943,13 +3882,13 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, lea eax, [eax + 32] pavgb xmm0, xmm2 pavgb xmm1, xmm3 - psrlw xmm0, 8 // YUYV -> UVUV + psrlw xmm0, 8 // YUYV -> UVUV psrlw xmm1, 8 packuswb xmm0, xmm1 movdqa xmm1, xmm0 pand xmm0, xmm5 // U packuswb xmm0, xmm0 - psrlw xmm1, 8 // V + psrlw xmm1, 8 // V packuswb xmm1, xmm1 movq qword ptr [edx], xmm0 movq qword ptr [edx + edi], xmm1 @@ -3963,16 +3902,17 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, } } -__declspec(naked) -void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push edi - mov eax, [esp + 4 + 4] // src_yuy2 - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // width - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff + mov eax, [esp + 4 + 4] // src_yuy2 + mov edx, [esp + 4 + 8] // dst_u + mov edi, [esp + 4 + 12] // dst_v + mov ecx, [esp + 4 + 16] // width + pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff psrlw xmm5, 8 sub edi, edx @@ -3980,13 +3920,13 @@ void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] - psrlw xmm0, 8 // YUYV -> UVUV + psrlw xmm0, 8 // YUYV -> UVUV psrlw xmm1, 8 packuswb xmm0, xmm1 movdqa xmm1, xmm0 pand xmm0, xmm5 // U packuswb xmm0, xmm0 - psrlw xmm1, 8 // V + psrlw xmm1, 8 // V packuswb xmm1, xmm1 movq qword ptr [edx], xmm0 movq qword ptr [edx + edi], xmm1 @@ -3999,19 +3939,19 @@ void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, } } -__declspec(naked) -void UYVYToYRow_SSE2(const uint8* src_uyvy, - uint8* dst_y, int width) { +__declspec(naked) void UYVYToYRow_SSE2(const uint8_t* src_uyvy, + uint8_t* dst_y, + int width) { __asm { - mov eax, [esp + 4] // src_uyvy - mov edx, [esp + 8] // dst_y - mov ecx, [esp + 12] // width + mov eax, [esp + 4] // src_uyvy + mov edx, [esp + 8] // dst_y + mov ecx, [esp + 12] // width convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] - psrlw xmm0, 8 // odd bytes are Y + psrlw xmm0, 8 // odd bytes are Y psrlw xmm1, 8 packuswb xmm0, xmm1 movdqu [edx], xmm0 @@ -4022,18 +3962,20 @@ void UYVYToYRow_SSE2(const uint8* src_uyvy, } } -__declspec(naked) -void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void UYVYToUVRow_SSE2(const uint8_t* src_uyvy, + int stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_yuy2 - mov esi, [esp + 8 + 8] // stride_yuy2 - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // width - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff + mov eax, [esp + 8 + 4] // src_yuy2 + mov esi, [esp + 8 + 8] // stride_yuy2 + mov edx, [esp + 8 + 12] // dst_u + mov edi, [esp + 8 + 16] // dst_v + mov ecx, [esp + 8 + 20] // width + pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff psrlw xmm5, 8 sub edi, edx @@ -4045,13 +3987,13 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, lea eax, [eax + 32] pavgb xmm0, xmm2 pavgb xmm1, xmm3 - pand xmm0, xmm5 // UYVY -> UVUV + pand xmm0, xmm5 // UYVY -> UVUV pand xmm1, xmm5 packuswb xmm0, xmm1 movdqa xmm1, xmm0 pand xmm0, xmm5 // U packuswb xmm0, xmm0 - psrlw xmm1, 8 // V + psrlw xmm1, 8 // V packuswb xmm1, xmm1 movq qword ptr [edx], xmm0 movq qword ptr [edx + edi], xmm1 @@ -4065,16 +4007,17 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, } } -__declspec(naked) -void UYVYToUV422Row_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width) { +__declspec(naked) void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { __asm { push edi - mov eax, [esp + 4 + 4] // src_yuy2 - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // width - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff + mov eax, [esp + 4 + 4] // src_yuy2 + mov edx, [esp + 4 + 8] // dst_u + mov edi, [esp + 4 + 12] // dst_v + mov ecx, [esp + 4 + 16] // width + pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff psrlw xmm5, 8 sub edi, edx @@ -4082,13 +4025,13 @@ void UYVYToUV422Row_SSE2(const uint8* src_uyvy, movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] - pand xmm0, xmm5 // UYVY -> UVUV + pand xmm0, xmm5 // UYVY -> UVUV pand xmm1, xmm5 packuswb xmm0, xmm1 movdqa xmm1, xmm0 pand xmm0, xmm5 // U packuswb xmm0, xmm0 - psrlw xmm1, 8 // V + psrlw xmm1, 8 // V packuswb xmm1, xmm1 movq qword ptr [edx], xmm0 movq qword ptr [edx + edi], xmm1 @@ -4108,13 +4051,15 @@ void UYVYToUV422Row_SSE2(const uint8* src_uyvy, // =((A2*C2)+(B2*(255-C2))+255)/256 // signed version of math // =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256 -__declspec(naked) -void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width) { +__declspec(naked) void BlendPlaneRow_SSSE3(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width) { __asm { push esi push edi - pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00 + pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00 psllw xmm5, 8 mov eax, 0x80808080 // 128 for biasing image to signed. movd xmm6, eax @@ -4123,8 +4068,8 @@ void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1, mov eax, 0x807f807f // 32768 + 127 for unbias and round. movd xmm7, eax pshufd xmm7, xmm7, 0x00 - mov eax, [esp + 8 + 4] // src0 - mov edx, [esp + 8 + 8] // src1 + mov eax, [esp + 8 + 4] // src0 + mov edx, [esp + 8 + 8] // src1 mov esi, [esp + 8 + 12] // alpha mov edi, [esp + 8 + 16] // dst mov ecx, [esp + 8 + 20] // width @@ -4132,17 +4077,17 @@ void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1, sub edx, esi sub edi, esi - // 8 pixel loop. + // 8 pixel loop. convertloop8: - movq xmm0, qword ptr [esi] // alpha + movq xmm0, qword ptr [esi] // alpha punpcklbw xmm0, xmm0 - pxor xmm0, xmm5 // a, 255-a + pxor xmm0, xmm5 // a, 255-a movq xmm1, qword ptr [eax + esi] // src0 movq xmm2, qword ptr [edx + esi] // src1 punpcklbw xmm1, xmm2 - psubb xmm1, xmm6 // bias src0/1 - 128 + psubb xmm1, xmm6 // bias src0/1 - 128 pmaddubsw xmm0, xmm1 - paddw xmm0, xmm7 // unbias result - 32768 and round. + paddw xmm0, xmm7 // unbias result - 32768 and round. psrlw xmm0, 8 packuswb xmm0, xmm0 movq qword ptr [edi + esi], xmm0 @@ -4163,13 +4108,15 @@ void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1, // =((A2*C2)+(B2*(255-C2))+255)/256 // signed version of math // =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256 -__declspec(naked) -void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width) { +__declspec(naked) void BlendPlaneRow_AVX2(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width) { __asm { push esi push edi - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff00ff00 + vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff00ff00 vpsllw ymm5, ymm5, 8 mov eax, 0x80808080 // 128 for biasing image to signed. vmovd xmm6, eax @@ -4177,8 +4124,8 @@ void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1, mov eax, 0x807f807f // 32768 + 127 for unbias and round. vmovd xmm7, eax vbroadcastss ymm7, xmm7 - mov eax, [esp + 8 + 4] // src0 - mov edx, [esp + 8 + 8] // src1 + mov eax, [esp + 8 + 4] // src0 + mov edx, [esp + 8 + 8] // src1 mov esi, [esp + 8 + 12] // alpha mov edi, [esp + 8 + 16] // dst mov ecx, [esp + 8 + 20] // width @@ -4186,23 +4133,23 @@ void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1, sub edx, esi sub edi, esi - // 32 pixel loop. + // 32 pixel loop. convertloop32: - vmovdqu ymm0, [esi] // alpha - vpunpckhbw ymm3, ymm0, ymm0 // 8..15, 24..31 - vpunpcklbw ymm0, ymm0, ymm0 // 0..7, 16..23 - vpxor ymm3, ymm3, ymm5 // a, 255-a - vpxor ymm0, ymm0, ymm5 // a, 255-a + vmovdqu ymm0, [esi] // alpha + vpunpckhbw ymm3, ymm0, ymm0 // 8..15, 24..31 + vpunpcklbw ymm0, ymm0, ymm0 // 0..7, 16..23 + vpxor ymm3, ymm3, ymm5 // a, 255-a + vpxor ymm0, ymm0, ymm5 // a, 255-a vmovdqu ymm1, [eax + esi] // src0 vmovdqu ymm2, [edx + esi] // src1 vpunpckhbw ymm4, ymm1, ymm2 vpunpcklbw ymm1, ymm1, ymm2 - vpsubb ymm4, ymm4, ymm6 // bias src0/1 - 128 - vpsubb ymm1, ymm1, ymm6 // bias src0/1 - 128 + vpsubb ymm4, ymm4, ymm6 // bias src0/1 - 128 + vpsubb ymm1, ymm1, ymm6 // bias src0/1 - 128 vpmaddubsw ymm3, ymm3, ymm4 vpmaddubsw ymm0, ymm0, ymm1 - vpaddw ymm3, ymm3, ymm7 // unbias result - 32768 and round. - vpaddw ymm0, ymm0, ymm7 // unbias result - 32768 and round. + vpaddw ymm3, ymm3, ymm7 // unbias result - 32768 and round. + vpaddw ymm0, ymm0, ymm7 // unbias result - 32768 and round. vpsrlw ymm3, ymm3, 8 vpsrlw ymm0, ymm0, 8 vpackuswb ymm0, ymm0, ymm3 @@ -4221,52 +4168,51 @@ void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1, #ifdef HAS_ARGBBLENDROW_SSSE3 // Shuffle table for isolating alpha. -static const uvec8 kShuffleAlpha = { - 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, - 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80 -}; +static const uvec8 kShuffleAlpha = {3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, + 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80}; // Blend 8 pixels at a time. -__declspec(naked) -void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { +__declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 + mov eax, [esp + 4 + 4] // src_argb0 + mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width - pcmpeqb xmm7, xmm7 // generate constant 0x0001 + pcmpeqb xmm7, xmm7 // generate constant 0x0001 psrlw xmm7, 15 - pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff + pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff psrlw xmm6, 8 - pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00 + pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00 psllw xmm5, 8 - pcmpeqb xmm4, xmm4 // generate mask 0xff000000 + pcmpeqb xmm4, xmm4 // generate mask 0xff000000 pslld xmm4, 24 sub ecx, 4 - jl convertloop4b // less than 4 pixels? + jl convertloop4b // less than 4 pixels? - // 4 pixel loop. + // 4 pixel loop. convertloop4: - movdqu xmm3, [eax] // src argb + movdqu xmm3, [eax] // src argb lea eax, [eax + 16] - movdqa xmm0, xmm3 // src argb - pxor xmm3, xmm4 // ~alpha - movdqu xmm2, [esi] // _r_b - pshufb xmm3, xmmword ptr kShuffleAlpha // alpha - pand xmm2, xmm6 // _r_b - paddw xmm3, xmm7 // 256 - alpha - pmullw xmm2, xmm3 // _r_b * alpha - movdqu xmm1, [esi] // _a_g + movdqa xmm0, xmm3 // src argb + pxor xmm3, xmm4 // ~alpha + movdqu xmm2, [esi] // _r_b + pshufb xmm3, xmmword ptr kShuffleAlpha // alpha + pand xmm2, xmm6 // _r_b + paddw xmm3, xmm7 // 256 - alpha + pmullw xmm2, xmm3 // _r_b * alpha + movdqu xmm1, [esi] // _a_g lea esi, [esi + 16] - psrlw xmm1, 8 // _a_g - por xmm0, xmm4 // set alpha to 255 - pmullw xmm1, xmm3 // _a_g * alpha - psrlw xmm2, 8 // _r_b convert to 8 bits again - paddusb xmm0, xmm2 // + src argb - pand xmm1, xmm5 // a_g_ convert to 8 bits again - paddusb xmm0, xmm1 // + src argb + psrlw xmm1, 8 // _a_g + por xmm0, xmm4 // set alpha to 255 + pmullw xmm1, xmm3 // _a_g * alpha + psrlw xmm2, 8 // _r_b convert to 8 bits again + paddusb xmm0, xmm2 // + src argb + pand xmm1, xmm5 // a_g_ convert to 8 bits again + paddusb xmm0, xmm1 // + src argb movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 @@ -4276,26 +4222,26 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, add ecx, 4 - 1 jl convertloop1b - // 1 pixel loop. + // 1 pixel loop. convertloop1: - movd xmm3, [eax] // src argb + movd xmm3, [eax] // src argb lea eax, [eax + 4] - movdqa xmm0, xmm3 // src argb - pxor xmm3, xmm4 // ~alpha - movd xmm2, [esi] // _r_b - pshufb xmm3, xmmword ptr kShuffleAlpha // alpha - pand xmm2, xmm6 // _r_b - paddw xmm3, xmm7 // 256 - alpha - pmullw xmm2, xmm3 // _r_b * alpha - movd xmm1, [esi] // _a_g + movdqa xmm0, xmm3 // src argb + pxor xmm3, xmm4 // ~alpha + movd xmm2, [esi] // _r_b + pshufb xmm3, xmmword ptr kShuffleAlpha // alpha + pand xmm2, xmm6 // _r_b + paddw xmm3, xmm7 // 256 - alpha + pmullw xmm2, xmm3 // _r_b * alpha + movd xmm1, [esi] // _a_g lea esi, [esi + 4] - psrlw xmm1, 8 // _a_g - por xmm0, xmm4 // set alpha to 255 - pmullw xmm1, xmm3 // _a_g * alpha - psrlw xmm2, 8 // _r_b convert to 8 bits again - paddusb xmm0, xmm2 // + src argb - pand xmm1, xmm5 // a_g_ convert to 8 bits again - paddusb xmm0, xmm1 // + src argb + psrlw xmm1, 8 // _a_g + por xmm0, xmm4 // set alpha to 255 + pmullw xmm1, xmm3 // _a_g * alpha + psrlw xmm2, 8 // _r_b convert to 8 bits again + paddusb xmm0, xmm2 // + src argb + pand xmm1, xmm5 // a_g_ convert to 8 bits again + paddusb xmm0, xmm1 // + src argb movd [edx], xmm0 lea edx, [edx + 4] sub ecx, 1 @@ -4311,41 +4257,42 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, #ifdef HAS_ARGBATTENUATEROW_SSSE3 // Shuffle table duplicating alpha. static const uvec8 kShuffleAlpha0 = { - 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u, + 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u, }; static const uvec8 kShuffleAlpha1 = { - 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, - 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u, + 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, + 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u, }; -__declspec(naked) -void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { +__declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { __asm { - mov eax, [esp + 4] // src_argb0 - mov edx, [esp + 8] // dst_argb + mov eax, [esp + 4] // src_argb0 + mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width - pcmpeqb xmm3, xmm3 // generate mask 0xff000000 + pcmpeqb xmm3, xmm3 // generate mask 0xff000000 pslld xmm3, 24 movdqa xmm4, xmmword ptr kShuffleAlpha0 movdqa xmm5, xmmword ptr kShuffleAlpha1 convertloop: - movdqu xmm0, [eax] // read 4 pixels - pshufb xmm0, xmm4 // isolate first 2 alphas - movdqu xmm1, [eax] // read 4 pixels - punpcklbw xmm1, xmm1 // first 2 pixel rgbs - pmulhuw xmm0, xmm1 // rgb * a - movdqu xmm1, [eax] // read 4 pixels - pshufb xmm1, xmm5 // isolate next 2 alphas - movdqu xmm2, [eax] // read 4 pixels - punpckhbw xmm2, xmm2 // next 2 pixel rgbs - pmulhuw xmm1, xmm2 // rgb * a - movdqu xmm2, [eax] // mask original alpha + movdqu xmm0, [eax] // read 4 pixels + pshufb xmm0, xmm4 // isolate first 2 alphas + movdqu xmm1, [eax] // read 4 pixels + punpcklbw xmm1, xmm1 // first 2 pixel rgbs + pmulhuw xmm0, xmm1 // rgb * a + movdqu xmm1, [eax] // read 4 pixels + pshufb xmm1, xmm5 // isolate next 2 alphas + movdqu xmm2, [eax] // read 4 pixels + punpckhbw xmm2, xmm2 // next 2 pixel rgbs + pmulhuw xmm1, xmm2 // rgb * a + movdqu xmm2, [eax] // mask original alpha lea eax, [eax + 16] pand xmm2, xmm3 psrlw xmm0, 8 psrlw xmm1, 8 packuswb xmm0, xmm1 - por xmm0, xmm2 // copy original alpha + por xmm0, xmm2 // copy original alpha movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 @@ -4358,22 +4305,23 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { #ifdef HAS_ARGBATTENUATEROW_AVX2 // Shuffle table duplicating alpha. -static const uvec8 kShuffleAlpha_AVX2 = { - 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u -}; -__declspec(naked) -void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { +static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u, + 128u, 128u, 14u, 15u, 14u, 15u, + 14u, 15u, 128u, 128u}; +__declspec(naked) void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { __asm { - mov eax, [esp + 4] // src_argb0 - mov edx, [esp + 8] // dst_argb + mov eax, [esp + 4] // src_argb0 + mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax vbroadcastf128 ymm4, xmmword ptr kShuffleAlpha_AVX2 - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000 + vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000 vpslld ymm5, ymm5, 24 convertloop: - vmovdqu ymm6, [eax] // read 8 pixels. + vmovdqu ymm6, [eax] // read 8 pixels. vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated. vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated. vpshufb ymm2, ymm0, ymm4 // low 4 alphas @@ -4398,40 +4346,40 @@ void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { #ifdef HAS_ARGBUNATTENUATEROW_SSE2 // Unattenuate 4 pixels at a time. -__declspec(naked) -void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, - int width) { +__declspec(naked) void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { __asm { push ebx push esi push edi - mov eax, [esp + 12 + 4] // src_argb - mov edx, [esp + 12 + 8] // dst_argb + mov eax, [esp + 12 + 4] // src_argb + mov edx, [esp + 12 + 8] // dst_argb mov ecx, [esp + 12 + 12] // width lea ebx, fixed_invtbl8 convertloop: - movdqu xmm0, [eax] // read 4 pixels + movdqu xmm0, [eax] // read 4 pixels movzx esi, byte ptr [eax + 3] // first alpha movzx edi, byte ptr [eax + 7] // second alpha - punpcklbw xmm0, xmm0 // first 2 + punpcklbw xmm0, xmm0 // first 2 movd xmm2, dword ptr [ebx + esi * 4] movd xmm3, dword ptr [ebx + edi * 4] - pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a - pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words + pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a + pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words movlhps xmm2, xmm3 - pmulhuw xmm0, xmm2 // rgb * a + pmulhuw xmm0, xmm2 // rgb * a - movdqu xmm1, [eax] // read 4 pixels + movdqu xmm1, [eax] // read 4 pixels movzx esi, byte ptr [eax + 11] // third alpha movzx edi, byte ptr [eax + 15] // forth alpha - punpckhbw xmm1, xmm1 // next 2 + punpckhbw xmm1, xmm1 // next 2 movd xmm2, dword ptr [ebx + esi * 4] movd xmm3, dword ptr [ebx + edi * 4] - pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words - pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words + pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words + pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words movlhps xmm2, xmm3 - pmulhuw xmm1, xmm2 // rgb * a + pmulhuw xmm1, xmm2 // rgb * a lea eax, [eax + 16] packuswb xmm0, xmm1 movdqu [edx], xmm0 @@ -4450,25 +4398,24 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, #ifdef HAS_ARGBUNATTENUATEROW_AVX2 // Shuffle table duplicating alpha. static const uvec8 kUnattenShuffleAlpha_AVX2 = { - 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u -}; + 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u}; // TODO(fbarchard): Enable USE_GATHER for future hardware if faster. // USE_GATHER is not on by default, due to being a slow instruction. #ifdef USE_GATHER -__declspec(naked) -void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, - int width) { +__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { __asm { - mov eax, [esp + 4] // src_argb0 - mov edx, [esp + 8] // dst_argb + mov eax, [esp + 4] // src_argb0 + mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax vbroadcastf128 ymm4, xmmword ptr kUnattenShuffleAlpha_AVX2 convertloop: - vmovdqu ymm6, [eax] // read 8 pixels. + vmovdqu ymm6, [eax] // read 8 pixels. vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xffffffff for gather. - vpsrld ymm2, ymm6, 24 // alpha in low 8 bits. + vpsrld ymm2, ymm6, 24 // alpha in low 8 bits. vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated. vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated. vpgatherdd ymm3, [ymm2 * 4 + fixed_invtbl8], ymm5 // ymm5 cleared. 1, a @@ -4488,50 +4435,50 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, ret } } -#else // USE_GATHER -__declspec(naked) -void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, - int width) { +#else // USE_GATHER +__declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { __asm { push ebx push esi push edi - mov eax, [esp + 12 + 4] // src_argb - mov edx, [esp + 12 + 8] // dst_argb + mov eax, [esp + 12 + 4] // src_argb + mov edx, [esp + 12 + 8] // dst_argb mov ecx, [esp + 12 + 12] // width sub edx, eax lea ebx, fixed_invtbl8 vbroadcastf128 ymm5, xmmword ptr kUnattenShuffleAlpha_AVX2 convertloop: - // replace VPGATHER - movzx esi, byte ptr [eax + 3] // alpha0 - movzx edi, byte ptr [eax + 7] // alpha1 + // replace VPGATHER + movzx esi, byte ptr [eax + 3] // alpha0 + movzx edi, byte ptr [eax + 7] // alpha1 vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a0] vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a1] - movzx esi, byte ptr [eax + 11] // alpha2 - movzx edi, byte ptr [eax + 15] // alpha3 - vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0] + movzx esi, byte ptr [eax + 11] // alpha2 + movzx edi, byte ptr [eax + 15] // alpha3 + vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0] vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a2] vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a3] - movzx esi, byte ptr [eax + 19] // alpha4 - movzx edi, byte ptr [eax + 23] // alpha5 - vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2] + movzx esi, byte ptr [eax + 19] // alpha4 + movzx edi, byte ptr [eax + 23] // alpha5 + vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2] vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a4] vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a5] - movzx esi, byte ptr [eax + 27] // alpha6 - movzx edi, byte ptr [eax + 31] // alpha7 - vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4] + movzx esi, byte ptr [eax + 27] // alpha6 + movzx edi, byte ptr [eax + 31] // alpha7 + vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4] vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a6] vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a7] - vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6] - vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0] - vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4] - vinserti128 ymm3, ymm3, xmm0, 1 // [1,a7,1,a6,1,a5,1,a4,1,a3,1,a2,1,a1,1,a0] + vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6] + vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0] + vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4] + vinserti128 ymm3, ymm3, xmm0, 1 // [1,a7,1,a6,1,a5,1,a4,1,a3,1,a2,1,a1,1,a0] // end of VPGATHER - vmovdqu ymm6, [eax] // read 8 pixels. + vmovdqu ymm6, [eax] // read 8 pixels. vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated. vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated. vpunpcklwd ymm2, ymm3, ymm3 // low 4 inverted alphas. mutated. 1, 1, a, a @@ -4540,7 +4487,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, vpshufb ymm3, ymm3, ymm5 // replicate high 4 alphas vpmulhuw ymm0, ymm0, ymm2 // rgb * ia vpmulhuw ymm1, ymm1, ymm3 // rgb * ia - vpackuswb ymm0, ymm0, ymm1 // unmutated. + vpackuswb ymm0, ymm0, ymm1 // unmutated. vmovdqu [eax + edx], ymm0 lea eax, [eax + 32] sub ecx, 8 @@ -4558,12 +4505,13 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, #ifdef HAS_ARGBGRAYROW_SSSE3 // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels. -__declspec(naked) -void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { +__declspec(naked) void ARGBGrayRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + int width) { __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_argb */ - mov ecx, [esp + 12] /* width */ + mov eax, [esp + 4] /* src_argb */ + mov edx, [esp + 8] /* dst_argb */ + mov ecx, [esp + 12] /* width */ movdqa xmm4, xmmword ptr kARGBToYJ movdqa xmm5, xmmword ptr kAddYJ64 @@ -4575,20 +4523,20 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { phaddw xmm0, xmm1 paddw xmm0, xmm5 // Add .5 for rounding. psrlw xmm0, 7 - packuswb xmm0, xmm0 // 8 G bytes + packuswb xmm0, xmm0 // 8 G bytes movdqu xmm2, [eax] // A movdqu xmm3, [eax + 16] lea eax, [eax + 32] psrld xmm2, 24 psrld xmm3, 24 packuswb xmm2, xmm3 - packuswb xmm2, xmm2 // 8 A bytes - movdqa xmm3, xmm0 // Weave into GG, GA, then GGGA - punpcklbw xmm0, xmm0 // 8 GG words - punpcklbw xmm3, xmm2 // 8 GA words + packuswb xmm2, xmm2 // 8 A bytes + movdqa xmm3, xmm0 // Weave into GG, GA, then GGGA + punpcklbw xmm0, xmm0 // 8 GG words + punpcklbw xmm3, xmm2 // 8 GA words movdqa xmm1, xmm0 - punpcklwd xmm0, xmm3 // GGGA first 4 - punpckhwd xmm1, xmm3 // GGGA next 4 + punpcklwd xmm0, xmm3 // GGGA first 4 + punpckhwd xmm1, xmm3 // GGGA next 4 movdqu [edx], xmm0 movdqu [edx + 16], xmm1 lea edx, [edx + 32] @@ -4604,24 +4552,20 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { // g = (r * 45 + g * 88 + b * 22) >> 7 // r = (r * 50 + g * 98 + b * 24) >> 7 // Constant for ARGB color to sepia tone. -static const vec8 kARGBToSepiaB = { - 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0 -}; +static const vec8 kARGBToSepiaB = {17, 68, 35, 0, 17, 68, 35, 0, + 17, 68, 35, 0, 17, 68, 35, 0}; -static const vec8 kARGBToSepiaG = { - 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0 -}; +static const vec8 kARGBToSepiaG = {22, 88, 45, 0, 22, 88, 45, 0, + 22, 88, 45, 0, 22, 88, 45, 0}; -static const vec8 kARGBToSepiaR = { - 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0 -}; +static const vec8 kARGBToSepiaR = {24, 98, 50, 0, 24, 98, 50, 0, + 24, 98, 50, 0, 24, 98, 50, 0}; // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. -__declspec(naked) -void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { +__declspec(naked) void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width) { __asm { - mov eax, [esp + 4] /* dst_argb */ - mov ecx, [esp + 8] /* width */ + mov eax, [esp + 4] /* dst_argb */ + mov ecx, [esp + 8] /* width */ movdqa xmm2, xmmword ptr kARGBToSepiaB movdqa xmm3, xmmword ptr kARGBToSepiaG movdqa xmm4, xmmword ptr kARGBToSepiaR @@ -4633,32 +4577,32 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { pmaddubsw xmm6, xmm2 phaddw xmm0, xmm6 psrlw xmm0, 7 - packuswb xmm0, xmm0 // 8 B values + packuswb xmm0, xmm0 // 8 B values movdqu xmm5, [eax] // G movdqu xmm1, [eax + 16] pmaddubsw xmm5, xmm3 pmaddubsw xmm1, xmm3 phaddw xmm5, xmm1 psrlw xmm5, 7 - packuswb xmm5, xmm5 // 8 G values - punpcklbw xmm0, xmm5 // 8 BG values + packuswb xmm5, xmm5 // 8 G values + punpcklbw xmm0, xmm5 // 8 BG values movdqu xmm5, [eax] // R movdqu xmm1, [eax + 16] pmaddubsw xmm5, xmm4 pmaddubsw xmm1, xmm4 phaddw xmm5, xmm1 psrlw xmm5, 7 - packuswb xmm5, xmm5 // 8 R values + packuswb xmm5, xmm5 // 8 R values movdqu xmm6, [eax] // A movdqu xmm1, [eax + 16] psrld xmm6, 24 psrld xmm1, 24 packuswb xmm6, xmm1 - packuswb xmm6, xmm6 // 8 A values - punpcklbw xmm5, xmm6 // 8 RA values - movdqa xmm1, xmm0 // Weave BG, RA together - punpcklwd xmm0, xmm5 // BGRA first 4 - punpckhwd xmm1, xmm5 // BGRA next 4 + packuswb xmm6, xmm6 // 8 A values + punpcklbw xmm5, xmm6 // 8 RA values + movdqa xmm1, xmm0 // Weave BG, RA together + punpcklwd xmm0, xmm5 // BGRA first 4 + punpckhwd xmm1, xmm5 // BGRA next 4 movdqu [eax], xmm0 movdqu [eax + 16], xmm1 lea eax, [eax + 32] @@ -4674,19 +4618,20 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { // Same as Sepia except matrix is provided. // TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R // and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd. -__declspec(naked) -void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width) { +__declspec(naked) void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width) { __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_argb */ - mov ecx, [esp + 12] /* matrix_argb */ + mov eax, [esp + 4] /* src_argb */ + mov edx, [esp + 8] /* dst_argb */ + mov ecx, [esp + 12] /* matrix_argb */ movdqu xmm5, [ecx] pshufd xmm2, xmm5, 0x00 pshufd xmm3, xmm5, 0x55 pshufd xmm4, xmm5, 0xaa pshufd xmm5, xmm5, 0xff - mov ecx, [esp + 16] /* width */ + mov ecx, [esp + 16] /* width */ convertloop: movdqu xmm0, [eax] // B @@ -4697,31 +4642,31 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, movdqu xmm1, [eax + 16] pmaddubsw xmm6, xmm3 pmaddubsw xmm1, xmm3 - phaddsw xmm0, xmm7 // B - phaddsw xmm6, xmm1 // G - psraw xmm0, 6 // B - psraw xmm6, 6 // G - packuswb xmm0, xmm0 // 8 B values - packuswb xmm6, xmm6 // 8 G values - punpcklbw xmm0, xmm6 // 8 BG values + phaddsw xmm0, xmm7 // B + phaddsw xmm6, xmm1 // G + psraw xmm0, 6 // B + psraw xmm6, 6 // G + packuswb xmm0, xmm0 // 8 B values + packuswb xmm6, xmm6 // 8 G values + punpcklbw xmm0, xmm6 // 8 BG values movdqu xmm1, [eax] // R movdqu xmm7, [eax + 16] pmaddubsw xmm1, xmm4 pmaddubsw xmm7, xmm4 - phaddsw xmm1, xmm7 // R + phaddsw xmm1, xmm7 // R movdqu xmm6, [eax] // A movdqu xmm7, [eax + 16] pmaddubsw xmm6, xmm5 pmaddubsw xmm7, xmm5 - phaddsw xmm6, xmm7 // A - psraw xmm1, 6 // R - psraw xmm6, 6 // A - packuswb xmm1, xmm1 // 8 R values - packuswb xmm6, xmm6 // 8 A values - punpcklbw xmm1, xmm6 // 8 RA values - movdqa xmm6, xmm0 // Weave BG, RA together - punpcklwd xmm0, xmm1 // BGRA first 4 - punpckhwd xmm6, xmm1 // BGRA next 4 + phaddsw xmm6, xmm7 // A + psraw xmm1, 6 // R + psraw xmm6, 6 // A + packuswb xmm1, xmm1 // 8 R values + packuswb xmm6, xmm6 // 8 A values + punpcklbw xmm1, xmm6 // 8 RA values + movdqa xmm6, xmm0 // Weave BG, RA together + punpcklwd xmm0, xmm1 // BGRA first 4 + punpckhwd xmm6, xmm1 // BGRA next 4 movdqu [edx], xmm0 movdqu [edx + 16], xmm6 lea eax, [eax + 32] @@ -4735,15 +4680,17 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, #ifdef HAS_ARGBQUANTIZEROW_SSE2 // Quantize 4 ARGB pixels (16 bytes). -__declspec(naked) -void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width) { +__declspec(naked) void ARGBQuantizeRow_SSE2(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width) { __asm { - mov eax, [esp + 4] /* dst_argb */ - movd xmm2, [esp + 8] /* scale */ - movd xmm3, [esp + 12] /* interval_size */ - movd xmm4, [esp + 16] /* interval_offset */ - mov ecx, [esp + 20] /* width */ + mov eax, [esp + 4] /* dst_argb */ + movd xmm2, [esp + 8] /* scale */ + movd xmm3, [esp + 12] /* interval_size */ + movd xmm4, [esp + 16] /* interval_offset */ + mov ecx, [esp + 20] /* width */ pshuflw xmm2, xmm2, 040h pshufd xmm2, xmm2, 044h pshuflw xmm3, xmm3, 040h @@ -4756,16 +4703,16 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, convertloop: movdqu xmm0, [eax] // read 4 pixels - punpcklbw xmm0, xmm5 // first 2 pixels - pmulhuw xmm0, xmm2 // pixel * scale >> 16 + punpcklbw xmm0, xmm5 // first 2 pixels + pmulhuw xmm0, xmm2 // pixel * scale >> 16 movdqu xmm1, [eax] // read 4 pixels - punpckhbw xmm1, xmm5 // next 2 pixels + punpckhbw xmm1, xmm5 // next 2 pixels pmulhuw xmm1, xmm2 - pmullw xmm0, xmm3 // * interval_size + pmullw xmm0, xmm3 // * interval_size movdqu xmm7, [eax] // read 4 pixels pmullw xmm1, xmm3 - pand xmm7, xmm6 // mask alpha - paddw xmm0, xmm4 // + interval_size / 2 + pand xmm7, xmm6 // mask alpha + paddw xmm0, xmm4 // + interval_size / 2 paddw xmm1, xmm4 packuswb xmm0, xmm1 por xmm0, xmm7 @@ -4780,25 +4727,26 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, #ifdef HAS_ARGBSHADEROW_SSE2 // Shade 4 pixels at a time by specified value. -__declspec(naked) -void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value) { +__declspec(naked) void ARGBShadeRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value) { __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_argb + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width movd xmm2, [esp + 16] // value punpcklbw xmm2, xmm2 punpcklqdq xmm2, xmm2 convertloop: - movdqu xmm0, [eax] // read 4 pixels + movdqu xmm0, [eax] // read 4 pixels lea eax, [eax + 16] movdqa xmm1, xmm0 - punpcklbw xmm0, xmm0 // first 2 - punpckhbw xmm1, xmm1 // next 2 - pmulhuw xmm0, xmm2 // argb * value - pmulhuw xmm1, xmm2 // argb * value + punpcklbw xmm0, xmm0 // first 2 + punpckhbw xmm1, xmm1 // next 2 + pmulhuw xmm0, xmm2 // argb * value + pmulhuw xmm1, xmm2 // argb * value psrlw xmm0, 8 psrlw xmm1, 8 packuswb xmm0, xmm1 @@ -4814,28 +4762,29 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, #ifdef HAS_ARGBMULTIPLYROW_SSE2 // Multiply 2 rows of ARGB pixels together, 4 pixels at a time. -__declspec(naked) -void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { +__declspec(naked) void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 + mov eax, [esp + 4 + 4] // src_argb0 + mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width pxor xmm5, xmm5 // constant 0 convertloop: - movdqu xmm0, [eax] // read 4 pixels from src_argb0 - movdqu xmm2, [esi] // read 4 pixels from src_argb1 + movdqu xmm0, [eax] // read 4 pixels from src_argb0 + movdqu xmm2, [esi] // read 4 pixels from src_argb1 movdqu xmm1, xmm0 movdqu xmm3, xmm2 - punpcklbw xmm0, xmm0 // first 2 - punpckhbw xmm1, xmm1 // next 2 - punpcklbw xmm2, xmm5 // first 2 - punpckhbw xmm3, xmm5 // next 2 - pmulhuw xmm0, xmm2 // src_argb0 * src_argb1 first 2 - pmulhuw xmm1, xmm3 // src_argb0 * src_argb1 next 2 + punpcklbw xmm0, xmm0 // first 2 + punpckhbw xmm1, xmm1 // next 2 + punpcklbw xmm2, xmm5 // first 2 + punpckhbw xmm3, xmm5 // next 2 + pmulhuw xmm0, xmm2 // src_argb0 * src_argb1 first 2 + pmulhuw xmm1, xmm3 // src_argb0 * src_argb1 next 2 lea eax, [eax + 16] lea esi, [esi + 16] packuswb xmm0, xmm1 @@ -4853,13 +4802,14 @@ void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, #ifdef HAS_ARGBADDROW_SSE2 // Add 2 rows of ARGB pixels together, 4 pixels at a time. // TODO(fbarchard): Port this to posix, neon and other math functions. -__declspec(naked) -void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { +__declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 + mov eax, [esp + 4 + 4] // src_argb0 + mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width @@ -4867,11 +4817,11 @@ void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, jl convertloop49 convertloop4: - movdqu xmm0, [eax] // read 4 pixels from src_argb0 + movdqu xmm0, [eax] // read 4 pixels from src_argb0 lea eax, [eax + 16] - movdqu xmm1, [esi] // read 4 pixels from src_argb1 + movdqu xmm1, [esi] // read 4 pixels from src_argb1 lea esi, [esi + 16] - paddusb xmm0, xmm1 // src_argb0 + src_argb1 + paddusb xmm0, xmm1 // src_argb0 + src_argb1 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 @@ -4882,11 +4832,11 @@ void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, jl convertloop19 convertloop1: - movd xmm0, [eax] // read 1 pixels from src_argb0 + movd xmm0, [eax] // read 1 pixels from src_argb0 lea eax, [eax + 4] - movd xmm1, [esi] // read 1 pixels from src_argb1 + movd xmm1, [esi] // read 1 pixels from src_argb1 lea esi, [esi + 4] - paddusb xmm0, xmm1 // src_argb0 + src_argb1 + paddusb xmm0, xmm1 // src_argb0 + src_argb1 movd [edx], xmm0 lea edx, [edx + 4] sub ecx, 1 @@ -4901,22 +4851,23 @@ void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, #ifdef HAS_ARGBSUBTRACTROW_SSE2 // Subtract 2 rows of ARGB pixels together, 4 pixels at a time. -__declspec(naked) -void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { +__declspec(naked) void ARGBSubtractRow_SSE2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 + mov eax, [esp + 4 + 4] // src_argb0 + mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width convertloop: - movdqu xmm0, [eax] // read 4 pixels from src_argb0 + movdqu xmm0, [eax] // read 4 pixels from src_argb0 lea eax, [eax + 16] - movdqu xmm1, [esi] // read 4 pixels from src_argb1 + movdqu xmm1, [esi] // read 4 pixels from src_argb1 lea esi, [esi + 16] - psubusb xmm0, xmm1 // src_argb0 - src_argb1 + psubusb xmm0, xmm1 // src_argb0 - src_argb1 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 @@ -4930,28 +4881,29 @@ void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, #ifdef HAS_ARGBMULTIPLYROW_AVX2 // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. -__declspec(naked) -void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { +__declspec(naked) void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 + mov eax, [esp + 4 + 4] // src_argb0 + mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width - vpxor ymm5, ymm5, ymm5 // constant 0 + vpxor ymm5, ymm5, ymm5 // constant 0 convertloop: - vmovdqu ymm1, [eax] // read 8 pixels from src_argb0 + vmovdqu ymm1, [eax] // read 8 pixels from src_argb0 lea eax, [eax + 32] - vmovdqu ymm3, [esi] // read 8 pixels from src_argb1 + vmovdqu ymm3, [esi] // read 8 pixels from src_argb1 lea esi, [esi + 32] - vpunpcklbw ymm0, ymm1, ymm1 // low 4 - vpunpckhbw ymm1, ymm1, ymm1 // high 4 - vpunpcklbw ymm2, ymm3, ymm5 // low 4 - vpunpckhbw ymm3, ymm3, ymm5 // high 4 - vpmulhuw ymm0, ymm0, ymm2 // src_argb0 * src_argb1 low 4 - vpmulhuw ymm1, ymm1, ymm3 // src_argb0 * src_argb1 high 4 + vpunpcklbw ymm0, ymm1, ymm1 // low 4 + vpunpckhbw ymm1, ymm1, ymm1 // high 4 + vpunpcklbw ymm2, ymm3, ymm5 // low 4 + vpunpckhbw ymm3, ymm3, ymm5 // high 4 + vpmulhuw ymm0, ymm0, ymm2 // src_argb0 * src_argb1 low 4 + vpmulhuw ymm1, ymm1, ymm3 // src_argb0 * src_argb1 high 4 vpackuswb ymm0, ymm0, ymm1 vmovdqu [edx], ymm0 lea edx, [edx + 32] @@ -4967,20 +4919,21 @@ void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, #ifdef HAS_ARGBADDROW_AVX2 // Add 2 rows of ARGB pixels together, 8 pixels at a time. -__declspec(naked) -void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { +__declspec(naked) void ARGBAddRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 + mov eax, [esp + 4 + 4] // src_argb0 + mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width convertloop: - vmovdqu ymm0, [eax] // read 8 pixels from src_argb0 + vmovdqu ymm0, [eax] // read 8 pixels from src_argb0 lea eax, [eax + 32] - vpaddusb ymm0, ymm0, [esi] // add 8 pixels from src_argb1 + vpaddusb ymm0, ymm0, [esi] // add 8 pixels from src_argb1 lea esi, [esi + 32] vmovdqu [edx], ymm0 lea edx, [edx + 32] @@ -4996,20 +4949,21 @@ void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, #ifdef HAS_ARGBSUBTRACTROW_AVX2 // Subtract 2 rows of ARGB pixels together, 8 pixels at a time. -__declspec(naked) -void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { +__declspec(naked) void ARGBSubtractRow_AVX2(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 + mov eax, [esp + 4 + 4] // src_argb0 + mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width convertloop: - vmovdqu ymm0, [eax] // read 8 pixels from src_argb0 + vmovdqu ymm0, [eax] // read 8 pixels from src_argb0 lea eax, [eax + 32] - vpsubusb ymm0, ymm0, [esi] // src_argb0 - src_argb1 + vpsubusb ymm0, ymm0, [esi] // src_argb0 - src_argb1 lea esi, [esi + 32] vmovdqu [edx], ymm0 lea edx, [edx + 32] @@ -5028,14 +4982,16 @@ void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, // -1 0 1 // -2 0 2 // -1 0 1 -__declspec(naked) -void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width) { +__declspec(naked) void SobelXRow_SSE2(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_y0 - mov esi, [esp + 8 + 8] // src_y1 + mov eax, [esp + 8 + 4] // src_y0 + mov esi, [esp + 8 + 8] // src_y1 mov edi, [esp + 8 + 12] // src_y2 mov edx, [esp + 8 + 16] // dst_sobelx mov ecx, [esp + 8 + 20] // width @@ -5045,17 +5001,17 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, pxor xmm5, xmm5 // constant 0 convertloop: - movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0] - movq xmm1, qword ptr [eax + 2] // read 8 pixels from src_y0[2] + movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0] + movq xmm1, qword ptr [eax + 2] // read 8 pixels from src_y0[2] punpcklbw xmm0, xmm5 punpcklbw xmm1, xmm5 psubw xmm0, xmm1 - movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0] + movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0] movq xmm2, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2] punpcklbw xmm1, xmm5 punpcklbw xmm2, xmm5 psubw xmm1, xmm2 - movq xmm2, qword ptr [eax + edi] // read 8 pixels from src_y2[0] + movq xmm2, qword ptr [eax + edi] // read 8 pixels from src_y2[0] movq xmm3, qword ptr [eax + edi + 2] // read 8 pixels from src_y2[2] punpcklbw xmm2, xmm5 punpcklbw xmm3, xmm5 @@ -5063,7 +5019,7 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, paddw xmm0, xmm2 paddw xmm0, xmm1 paddw xmm0, xmm1 - pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw + pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw psubw xmm1, xmm0 pmaxsw xmm0, xmm1 packuswb xmm0, xmm0 @@ -5084,13 +5040,14 @@ void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, // -1 -2 -1 // 0 0 0 // 1 2 1 -__declspec(naked) -void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width) { +__declspec(naked) void SobelYRow_SSE2(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_y0 - mov esi, [esp + 4 + 8] // src_y1 + mov eax, [esp + 4 + 4] // src_y0 + mov esi, [esp + 4 + 8] // src_y1 mov edx, [esp + 4 + 12] // dst_sobely mov ecx, [esp + 4 + 16] // width sub esi, eax @@ -5098,17 +5055,17 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, pxor xmm5, xmm5 // constant 0 convertloop: - movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0] - movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0] + movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0] + movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0] punpcklbw xmm0, xmm5 punpcklbw xmm1, xmm5 psubw xmm0, xmm1 - movq xmm1, qword ptr [eax + 1] // read 8 pixels from src_y0[1] + movq xmm1, qword ptr [eax + 1] // read 8 pixels from src_y0[1] movq xmm2, qword ptr [eax + esi + 1] // read 8 pixels from src_y1[1] punpcklbw xmm1, xmm5 punpcklbw xmm2, xmm5 psubw xmm1, xmm2 - movq xmm2, qword ptr [eax + 2] // read 8 pixels from src_y0[2] + movq xmm2, qword ptr [eax + 2] // read 8 pixels from src_y0[2] movq xmm3, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2] punpcklbw xmm2, xmm5 punpcklbw xmm3, xmm5 @@ -5116,7 +5073,7 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, paddw xmm0, xmm2 paddw xmm0, xmm1 paddw xmm0, xmm1 - pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw + pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw psubw xmm1, xmm0 pmaxsw xmm0, xmm1 packuswb xmm0, xmm0 @@ -5137,36 +5094,37 @@ void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, // R = Sobel // G = Sobel // B = Sobel -__declspec(naked) -void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { +__declspec(naked) void SobelRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_sobelx - mov esi, [esp + 4 + 8] // src_sobely + mov eax, [esp + 4 + 4] // src_sobelx + mov esi, [esp + 4 + 8] // src_sobely mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width sub esi, eax - pcmpeqb xmm5, xmm5 // alpha 255 - pslld xmm5, 24 // 0xff000000 + pcmpeqb xmm5, xmm5 // alpha 255 + pslld xmm5, 24 // 0xff000000 convertloop: - movdqu xmm0, [eax] // read 16 pixels src_sobelx - movdqu xmm1, [eax + esi] // read 16 pixels src_sobely + movdqu xmm0, [eax] // read 16 pixels src_sobelx + movdqu xmm1, [eax + esi] // read 16 pixels src_sobely lea eax, [eax + 16] - paddusb xmm0, xmm1 // sobel = sobelx + sobely - movdqa xmm2, xmm0 // GG - punpcklbw xmm2, xmm0 // First 8 - punpckhbw xmm0, xmm0 // Next 8 - movdqa xmm1, xmm2 // GGGG - punpcklwd xmm1, xmm2 // First 4 - punpckhwd xmm2, xmm2 // Next 4 - por xmm1, xmm5 // GGGA + paddusb xmm0, xmm1 // sobel = sobelx + sobely + movdqa xmm2, xmm0 // GG + punpcklbw xmm2, xmm0 // First 8 + punpckhbw xmm0, xmm0 // Next 8 + movdqa xmm1, xmm2 // GGGG + punpcklwd xmm1, xmm2 // First 4 + punpckhwd xmm2, xmm2 // Next 4 + por xmm1, xmm5 // GGGA por xmm2, xmm5 - movdqa xmm3, xmm0 // GGGG - punpcklwd xmm3, xmm0 // Next 4 - punpckhwd xmm0, xmm0 // Last 4 - por xmm3, xmm5 // GGGA + movdqa xmm3, xmm0 // GGGG + punpcklwd xmm3, xmm0 // Next 4 + punpckhwd xmm0, xmm0 // Last 4 + por xmm3, xmm5 // GGGA por xmm0, xmm5 movdqu [edx], xmm1 movdqu [edx + 16], xmm2 @@ -5184,22 +5142,23 @@ void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, #ifdef HAS_SOBELTOPLANEROW_SSE2 // Adds Sobel X and Sobel Y and stores Sobel into a plane. -__declspec(naked) -void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width) { +__declspec(naked) void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, + int width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_sobelx - mov esi, [esp + 4 + 8] // src_sobely + mov eax, [esp + 4 + 4] // src_sobelx + mov esi, [esp + 4 + 8] // src_sobely mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width sub esi, eax convertloop: - movdqu xmm0, [eax] // read 16 pixels src_sobelx - movdqu xmm1, [eax + esi] // read 16 pixels src_sobely + movdqu xmm0, [eax] // read 16 pixels src_sobelx + movdqu xmm1, [eax + esi] // read 16 pixels src_sobely lea eax, [eax + 16] - paddusb xmm0, xmm1 // sobel = sobelx + sobely + paddusb xmm0, xmm1 // sobel = sobelx + sobely movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 @@ -5217,36 +5176,37 @@ void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, // R = Sobel X // G = Sobel // B = Sobel Y -__declspec(naked) -void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { +__declspec(naked) void SobelXYRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_sobelx - mov esi, [esp + 4 + 8] // src_sobely + mov eax, [esp + 4 + 4] // src_sobelx + mov esi, [esp + 4 + 8] // src_sobely mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width sub esi, eax - pcmpeqb xmm5, xmm5 // alpha 255 + pcmpeqb xmm5, xmm5 // alpha 255 convertloop: - movdqu xmm0, [eax] // read 16 pixels src_sobelx - movdqu xmm1, [eax + esi] // read 16 pixels src_sobely + movdqu xmm0, [eax] // read 16 pixels src_sobelx + movdqu xmm1, [eax + esi] // read 16 pixels src_sobely lea eax, [eax + 16] movdqa xmm2, xmm0 - paddusb xmm2, xmm1 // sobel = sobelx + sobely - movdqa xmm3, xmm0 // XA + paddusb xmm2, xmm1 // sobel = sobelx + sobely + movdqa xmm3, xmm0 // XA punpcklbw xmm3, xmm5 punpckhbw xmm0, xmm5 - movdqa xmm4, xmm1 // YS + movdqa xmm4, xmm1 // YS punpcklbw xmm4, xmm2 punpckhbw xmm1, xmm2 - movdqa xmm6, xmm4 // YSXA - punpcklwd xmm6, xmm3 // First 4 - punpckhwd xmm4, xmm3 // Next 4 - movdqa xmm7, xmm1 // YSXA - punpcklwd xmm7, xmm0 // Next 4 - punpckhwd xmm1, xmm0 // Last 4 + movdqa xmm6, xmm4 // YSXA + punpcklwd xmm6, xmm3 // First 4 + punpckhwd xmm4, xmm3 // Next 4 + movdqa xmm7, xmm1 // YSXA + punpcklwd xmm7, xmm0 // Next 4 + punpckhwd xmm1, xmm0 // Last 4 movdqu [edx], xmm6 movdqu [edx + 16], xmm4 movdqu [edx + 32], xmm7 @@ -5275,8 +5235,11 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, // count is number of averaged pixels to produce. // Does 4 pixels at a time. // This function requires alignment on accumulation buffer pointers. -void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, +void CumulativeSumToAverageRow_SSE2(const int32_t* topleft, + const int32_t* botleft, + int width, + int area, + uint8_t* dst, int count) { __asm { mov eax, topleft // eax topleft @@ -5294,18 +5257,18 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, cmp area, 128 // 128 pixels will not overflow 15 bits. ja l4 - pshufd xmm5, xmm5, 0 // area - pcmpeqb xmm6, xmm6 // constant of 65536.0 - 1 = 65535.0 + pshufd xmm5, xmm5, 0 // area + pcmpeqb xmm6, xmm6 // constant of 65536.0 - 1 = 65535.0 psrld xmm6, 16 cvtdq2ps xmm6, xmm6 - addps xmm5, xmm6 // (65536.0 + area - 1) - mulps xmm5, xmm4 // (65536.0 + area - 1) * 1 / area - cvtps2dq xmm5, xmm5 // 0.16 fixed point - packssdw xmm5, xmm5 // 16 bit shorts + addps xmm5, xmm6 // (65536.0 + area - 1) + mulps xmm5, xmm4 // (65536.0 + area - 1) * 1 / area + cvtps2dq xmm5, xmm5 // 0.16 fixed point + packssdw xmm5, xmm5 // 16 bit shorts - // 4 pixel loop small blocks. + // 4 pixel loop small blocks. s4: - // top left + // top left movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] @@ -5345,9 +5308,9 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, jmp l4b - // 4 pixel loop + // 4 pixel loop l4: - // top left + // top left movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] @@ -5373,7 +5336,7 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, paddd xmm3, [esi + edx * 4 + 48] lea esi, [esi + 64] - cvtdq2ps xmm0, xmm0 // Average = Sum * 1 / Area + cvtdq2ps xmm0, xmm0 // Average = Sum * 1 / Area cvtdq2ps xmm1, xmm1 mulps xmm0, xmm4 mulps xmm1, xmm4 @@ -5397,7 +5360,7 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, add ecx, 4 - 1 jl l1b - // 1 pixel loop + // 1 pixel loop l1: movdqu xmm0, [eax] psubd xmm0, [eax + edx * 4] @@ -5422,8 +5385,10 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, #ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2 // Creates a table of cumulative sums where each value is a sum of all values // above and to the left of the value. -void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width) { +void ComputeCumulativeSumRow_SSE2(const uint8_t* row, + int32_t* cumsum, + const int32_t* previous_cumsum, + int width) { __asm { mov eax, row mov edx, cumsum @@ -5437,7 +5402,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, test edx, 15 jne l4b - // 4 pixel loop + // 4 pixel loop l4: movdqu xmm2, [eax] // 4 argb pixels 16 bytes. lea eax, [eax + 16] @@ -5483,7 +5448,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, add ecx, 4 - 1 jl l1b - // 1 pixel loop + // 1 pixel loop l1: movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes. lea eax, [eax + 4] @@ -5505,10 +5470,11 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, #ifdef HAS_ARGBAFFINEROW_SSE2 // Copy ARGB pixels from source image with slope to a row of destination. -__declspec(naked) -LIBYUV_API -void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width) { +__declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8_t* src_argb, + int src_argb_stride, + uint8_t* dst_argb, + const float* uv_dudv, + int width) { __asm { push esi push edi @@ -5519,46 +5485,46 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, movq xmm2, qword ptr [ecx] // uv movq xmm7, qword ptr [ecx + 8] // dudv mov ecx, [esp + 28] // width - shl esi, 16 // 4, stride + shl esi, 16 // 4, stride add esi, 4 movd xmm5, esi sub ecx, 4 jl l4b - // setup for 4 pixel loop + // setup for 4 pixel loop pshufd xmm7, xmm7, 0x44 // dup dudv pshufd xmm5, xmm5, 0 // dup 4, stride - movdqa xmm0, xmm2 // x0, y0, x1, y1 + movdqa xmm0, xmm2 // x0, y0, x1, y1 addps xmm0, xmm7 movlhps xmm2, xmm0 movdqa xmm4, xmm7 - addps xmm4, xmm4 // dudv *= 2 - movdqa xmm3, xmm2 // x2, y2, x3, y3 + addps xmm4, xmm4 // dudv *= 2 + movdqa xmm3, xmm2 // x2, y2, x3, y3 addps xmm3, xmm4 - addps xmm4, xmm4 // dudv *= 4 + addps xmm4, xmm4 // dudv *= 4 - // 4 pixel loop + // 4 pixel loop l4: - cvttps2dq xmm0, xmm2 // x, y float to int first 2 - cvttps2dq xmm1, xmm3 // x, y float to int next 2 - packssdw xmm0, xmm1 // x, y as 8 shorts - pmaddwd xmm0, xmm5 // offsets = x * 4 + y * stride. + cvttps2dq xmm0, xmm2 // x, y float to int first 2 + cvttps2dq xmm1, xmm3 // x, y float to int next 2 + packssdw xmm0, xmm1 // x, y as 8 shorts + pmaddwd xmm0, xmm5 // offsets = x * 4 + y * stride. movd esi, xmm0 pshufd xmm0, xmm0, 0x39 // shift right movd edi, xmm0 pshufd xmm0, xmm0, 0x39 // shift right movd xmm1, [eax + esi] // read pixel 0 movd xmm6, [eax + edi] // read pixel 1 - punpckldq xmm1, xmm6 // combine pixel 0 and 1 - addps xmm2, xmm4 // x, y += dx, dy first 2 + punpckldq xmm1, xmm6 // combine pixel 0 and 1 + addps xmm2, xmm4 // x, y += dx, dy first 2 movq qword ptr [edx], xmm1 movd esi, xmm0 pshufd xmm0, xmm0, 0x39 // shift right movd edi, xmm0 movd xmm6, [eax + esi] // read pixel 2 movd xmm0, [eax + edi] // read pixel 3 - punpckldq xmm6, xmm0 // combine pixel 2 and 3 - addps xmm3, xmm4 // x, y += dx, dy next 2 + punpckldq xmm6, xmm0 // combine pixel 2 and 3 + addps xmm3, xmm4 // x, y += dx, dy next 2 movq qword ptr 8[edx], xmm6 lea edx, [edx + 16] sub ecx, 4 @@ -5568,12 +5534,12 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, add ecx, 4 - 1 jl l1b - // 1 pixel loop + // 1 pixel loop l1: - cvttps2dq xmm0, xmm2 // x, y float to int - packssdw xmm0, xmm0 // x, y as shorts - pmaddwd xmm0, xmm5 // offset = x * 4 + y * stride - addps xmm2, xmm7 // x, y += dx, dy + cvttps2dq xmm0, xmm2 // x, y float to int + packssdw xmm0, xmm0 // x, y as shorts + pmaddwd xmm0, xmm5 // offset = x * 4 + y * stride + addps xmm2, xmm7 // x, y += dx, dy movd esi, xmm0 movd xmm0, [eax + esi] // copy a pixel movd [edx], xmm0 @@ -5590,15 +5556,16 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, #ifdef HAS_INTERPOLATEROW_AVX2 // Bilinear filter 32x2 -> 32x1 -__declspec(naked) -void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) { +__declspec(naked) void InterpolateRow_AVX2(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, + int source_y_fraction) { __asm { push esi push edi - mov edi, [esp + 8 + 4] // dst_ptr - mov esi, [esp + 8 + 8] // src_ptr + mov edi, [esp + 8 + 4] // dst_ptr + mov esi, [esp + 8 + 8] // src_ptr mov edx, [esp + 8 + 12] // src_stride mov ecx, [esp + 8 + 16] // dst_width mov eax, [esp + 8 + 20] // source_y_fraction (0..255) @@ -5607,7 +5574,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, je xloop100 // 0 / 256. Blend 100 / 0. sub edi, esi cmp eax, 128 - je xloop50 // 128 /256 is 0.50. Blend 50 / 50. + je xloop50 // 128 /256 is 0.50. Blend 50 / 50. vmovd xmm0, eax // high fraction 0..255 neg eax @@ -5634,14 +5601,14 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, vpaddw ymm0, ymm0, ymm4 vpsrlw ymm1, ymm1, 8 vpsrlw ymm0, ymm0, 8 - vpackuswb ymm0, ymm0, ymm1 // unmutates + vpackuswb ymm0, ymm0, ymm1 // unmutates vmovdqu [esi + edi], ymm0 lea esi, [esi + 32] sub ecx, 32 jg xloop jmp xloop99 - // Blend 50 / 50. + // Blend 50 / 50. xloop50: vmovdqu ymm0, [esi] vpavgb ymm0, ymm0, [esi + edx] @@ -5651,7 +5618,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, jg xloop50 jmp xloop99 - // Blend 100 / 0 - Copy row unchanged. + // Blend 100 / 0 - Copy row unchanged. xloop100: rep movsb @@ -5666,25 +5633,26 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, // Bilinear filter 16x2 -> 16x1 // TODO(fbarchard): Consider allowing 256 using memcpy. -__declspec(naked) -void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) { +__declspec(naked) void InterpolateRow_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, + int source_y_fraction) { __asm { push esi push edi - mov edi, [esp + 8 + 4] // dst_ptr - mov esi, [esp + 8 + 8] // src_ptr + mov edi, [esp + 8 + 4] // dst_ptr + mov esi, [esp + 8 + 8] // src_ptr mov edx, [esp + 8 + 12] // src_stride mov ecx, [esp + 8 + 16] // dst_width mov eax, [esp + 8 + 20] // source_y_fraction (0..255) sub edi, esi - // Dispatch to specialized filters if applicable. + // Dispatch to specialized filters if applicable. cmp eax, 0 je xloop100 // 0 /256. Blend 100 / 0. cmp eax, 128 - je xloop50 // 128 / 256 is 0.50. Blend 50 / 50. + je xloop50 // 128 / 256 is 0.50. Blend 50 / 50. movd xmm0, eax // high fraction 0..255 neg eax @@ -5703,7 +5671,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, movdqu xmm1, xmm0 punpcklbw xmm0, xmm2 punpckhbw xmm1, xmm2 - psubb xmm0, xmm4 // bias image by -128 + psubb xmm0, xmm4 // bias image by -128 psubb xmm1, xmm4 movdqa xmm2, xmm5 movdqa xmm3, xmm5 @@ -5720,7 +5688,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, jg xloop jmp xloop99 - // Blend 50 / 50. + // Blend 50 / 50. xloop50: movdqu xmm0, [esi] movdqu xmm1, [esi + edx] @@ -5731,7 +5699,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, jg xloop50 jmp xloop99 - // Blend 100 / 0 - Copy row unchanged. + // Blend 100 / 0 - Copy row unchanged. xloop100: movdqu xmm0, [esi] movdqu [esi + edi], xmm0 @@ -5747,15 +5715,16 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, } // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -__declspec(naked) -void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width) { +__declspec(naked) void ARGBShuffleRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width) { __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // shuffler + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_argb + mov ecx, [esp + 12] // shuffler movdqu xmm5, [ecx] - mov ecx, [esp + 16] // width + mov ecx, [esp + 16] // width wloop: movdqu xmm0, [eax] @@ -5773,15 +5742,16 @@ void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, } #ifdef HAS_ARGBSHUFFLEROW_AVX2 -__declspec(naked) -void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width) { +__declspec(naked) void ARGBShuffleRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width) { __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // shuffler - vbroadcastf128 ymm5, [ecx] // same shuffle in high as low. - mov ecx, [esp + 16] // width + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_argb + mov ecx, [esp + 12] // shuffler + vbroadcastf128 ymm5, [ecx] // same shuffle in high as low. + mov ecx, [esp + 16] // width wloop: vmovdqu ymm0, [eax] @@ -5801,152 +5771,36 @@ void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, } #endif // HAS_ARGBSHUFFLEROW_AVX2 -__declspec(naked) -void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width) { - __asm { - push ebx - push esi - mov eax, [esp + 8 + 4] // src_argb - mov edx, [esp + 8 + 8] // dst_argb - mov esi, [esp + 8 + 12] // shuffler - mov ecx, [esp + 8 + 16] // width - pxor xmm5, xmm5 - - mov ebx, [esi] // shuffler - cmp ebx, 0x03000102 - je shuf_3012 - cmp ebx, 0x00010203 - je shuf_0123 - cmp ebx, 0x00030201 - je shuf_0321 - cmp ebx, 0x02010003 - je shuf_2103 - - // TODO(fbarchard): Use one source pointer and 3 offsets. - shuf_any1: - movzx ebx, byte ptr [esi] - movzx ebx, byte ptr [eax + ebx] - mov [edx], bl - movzx ebx, byte ptr [esi + 1] - movzx ebx, byte ptr [eax + ebx] - mov [edx + 1], bl - movzx ebx, byte ptr [esi + 2] - movzx ebx, byte ptr [eax + ebx] - mov [edx + 2], bl - movzx ebx, byte ptr [esi + 3] - movzx ebx, byte ptr [eax + ebx] - mov [edx + 3], bl - lea eax, [eax + 4] - lea edx, [edx + 4] - sub ecx, 1 - jg shuf_any1 - jmp shuf99 - - shuf_0123: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 01Bh // 1B = 00011011 = 0x0123 = BGRAToARGB - pshuflw xmm0, xmm0, 01Bh - pshufhw xmm1, xmm1, 01Bh - pshuflw xmm1, xmm1, 01Bh - packuswb xmm0, xmm1 - movdqu [edx], xmm0 - lea edx, [edx + 16] - sub ecx, 4 - jg shuf_0123 - jmp shuf99 - - shuf_0321: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 039h // 39 = 00111001 = 0x0321 = RGBAToARGB - pshuflw xmm0, xmm0, 039h - pshufhw xmm1, xmm1, 039h - pshuflw xmm1, xmm1, 039h - packuswb xmm0, xmm1 - movdqu [edx], xmm0 - lea edx, [edx + 16] - sub ecx, 4 - jg shuf_0321 - jmp shuf99 - - shuf_2103: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 093h // 93 = 10010011 = 0x2103 = ARGBToRGBA - pshuflw xmm0, xmm0, 093h - pshufhw xmm1, xmm1, 093h - pshuflw xmm1, xmm1, 093h - packuswb xmm0, xmm1 - movdqu [edx], xmm0 - lea edx, [edx + 16] - sub ecx, 4 - jg shuf_2103 - jmp shuf99 - - shuf_3012: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 0C6h // C6 = 11000110 = 0x3012 = ABGRToARGB - pshuflw xmm0, xmm0, 0C6h - pshufhw xmm1, xmm1, 0C6h - pshuflw xmm1, xmm1, 0C6h - packuswb xmm0, xmm1 - movdqu [edx], xmm0 - lea edx, [edx + 16] - sub ecx, 4 - jg shuf_3012 - - shuf99: - pop esi - pop ebx - ret - } -} - // YUY2 - Macro-pixel = 2 image pixels // Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4.... // UYVY - Macro-pixel = 2 image pixels // U0Y0V0Y1 -__declspec(naked) -void I422ToYUY2Row_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { +__declspec(naked) void I422ToYUY2Row_SSE2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_frame, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_y - mov esi, [esp + 8 + 8] // src_u - mov edx, [esp + 8 + 12] // src_v - mov edi, [esp + 8 + 16] // dst_frame - mov ecx, [esp + 8 + 20] // width + mov eax, [esp + 8 + 4] // src_y + mov esi, [esp + 8 + 8] // src_u + mov edx, [esp + 8 + 12] // src_v + mov edi, [esp + 8 + 16] // dst_frame + mov ecx, [esp + 8 + 20] // width sub edx, esi convertloop: - movq xmm2, qword ptr [esi] // U - movq xmm3, qword ptr [esi + edx] // V + movq xmm2, qword ptr [esi] // U + movq xmm3, qword ptr [esi + edx] // V lea esi, [esi + 8] - punpcklbw xmm2, xmm3 // UV - movdqu xmm0, [eax] // Y + punpcklbw xmm2, xmm3 // UV + movdqu xmm0, [eax] // Y lea eax, [eax + 16] movdqa xmm1, xmm0 - punpcklbw xmm0, xmm2 // YUYV + punpcklbw xmm0, xmm2 // YUYV punpckhbw xmm1, xmm2 movdqu [edi], xmm0 movdqu [edi + 16], xmm1 @@ -5960,30 +5814,30 @@ void I422ToYUY2Row_SSE2(const uint8* src_y, } } -__declspec(naked) -void I422ToUYVYRow_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { +__declspec(naked) void I422ToUYVYRow_SSE2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_frame, + int width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_y - mov esi, [esp + 8 + 8] // src_u - mov edx, [esp + 8 + 12] // src_v - mov edi, [esp + 8 + 16] // dst_frame - mov ecx, [esp + 8 + 20] // width + mov eax, [esp + 8 + 4] // src_y + mov esi, [esp + 8 + 8] // src_u + mov edx, [esp + 8 + 12] // src_v + mov edi, [esp + 8 + 16] // dst_frame + mov ecx, [esp + 8 + 20] // width sub edx, esi convertloop: - movq xmm2, qword ptr [esi] // U - movq xmm3, qword ptr [esi + edx] // V + movq xmm2, qword ptr [esi] // U + movq xmm3, qword ptr [esi + edx] // V lea esi, [esi + 8] - punpcklbw xmm2, xmm3 // UV - movdqu xmm0, [eax] // Y + punpcklbw xmm2, xmm3 // UV + movdqu xmm0, [eax] // Y movdqa xmm1, xmm2 lea eax, [eax + 16] - punpcklbw xmm1, xmm0 // UYVY + punpcklbw xmm1, xmm0 // UYVY punpckhbw xmm2, xmm0 movdqu [edi], xmm1 movdqu [edi + 16], xmm2 @@ -5998,22 +5852,22 @@ void I422ToUYVYRow_SSE2(const uint8* src_y, } #ifdef HAS_ARGBPOLYNOMIALROW_SSE2 -__declspec(naked) -void ARGBPolynomialRow_SSE2(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width) { +__declspec(naked) void ARGBPolynomialRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, + const float* poly, + int width) { __asm { push esi - mov eax, [esp + 4 + 4] /* src_argb */ - mov edx, [esp + 4 + 8] /* dst_argb */ - mov esi, [esp + 4 + 12] /* poly */ - mov ecx, [esp + 4 + 16] /* width */ + mov eax, [esp + 4 + 4] /* src_argb */ + mov edx, [esp + 4 + 8] /* dst_argb */ + mov esi, [esp + 4 + 12] /* poly */ + mov ecx, [esp + 4 + 16] /* width */ pxor xmm3, xmm3 // 0 constant for zero extending bytes to ints. - // 2 pixel loop. + // 2 pixel loop. convertloop: -// pmovzxbd xmm0, dword ptr [eax] // BGRA pixel -// pmovzxbd xmm4, dword ptr [eax + 4] // BGRA pixel + // pmovzxbd xmm0, dword ptr [eax] // BGRA pixel + // pmovzxbd xmm4, dword ptr [eax + 4] // BGRA pixel movq xmm0, qword ptr [eax] // BGRABGRA lea eax, [eax + 8] punpcklbw xmm0, xmm3 @@ -6057,25 +5911,25 @@ void ARGBPolynomialRow_SSE2(const uint8* src_argb, #endif // HAS_ARGBPOLYNOMIALROW_SSE2 #ifdef HAS_ARGBPOLYNOMIALROW_AVX2 -__declspec(naked) -void ARGBPolynomialRow_AVX2(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width) { +__declspec(naked) void ARGBPolynomialRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + const float* poly, + int width) { __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_argb */ - mov ecx, [esp + 12] /* poly */ - vbroadcastf128 ymm4, [ecx] // C0 + mov eax, [esp + 4] /* src_argb */ + mov edx, [esp + 8] /* dst_argb */ + mov ecx, [esp + 12] /* poly */ + vbroadcastf128 ymm4, [ecx] // C0 vbroadcastf128 ymm5, [ecx + 16] // C1 vbroadcastf128 ymm6, [ecx + 32] // C2 vbroadcastf128 ymm7, [ecx + 48] // C3 - mov ecx, [esp + 16] /* width */ + mov ecx, [esp + 16] /* width */ // 2 pixel loop. convertloop: vpmovzxbd ymm0, qword ptr [eax] // 2 BGRA pixels lea eax, [eax + 8] - vcvtdq2ps ymm0, ymm0 // X 8 floats + vcvtdq2ps ymm0, ymm0 // X 8 floats vmulps ymm2, ymm0, ymm0 // X * X vmulps ymm3, ymm0, ymm7 // C3 * X vfmadd132ps ymm0, ymm4, ymm5 // result = C0 + C1 * X @@ -6095,16 +5949,125 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb, } #endif // HAS_ARGBPOLYNOMIALROW_AVX2 +#ifdef HAS_HALFFLOATROW_SSE2 +static float kExpBias = 1.9259299444e-34f; +__declspec(naked) void HalfFloatRow_SSE2(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { + __asm { + mov eax, [esp + 4] /* src */ + mov edx, [esp + 8] /* dst */ + movd xmm4, dword ptr [esp + 12] /* scale */ + mov ecx, [esp + 16] /* width */ + mulss xmm4, kExpBias + pshufd xmm4, xmm4, 0 + pxor xmm5, xmm5 + sub edx, eax + + // 8 pixel loop. + convertloop: + movdqu xmm2, xmmword ptr [eax] // 8 shorts + add eax, 16 + movdqa xmm3, xmm2 + punpcklwd xmm2, xmm5 + cvtdq2ps xmm2, xmm2 // convert 8 ints to floats + punpckhwd xmm3, xmm5 + cvtdq2ps xmm3, xmm3 + mulps xmm2, xmm4 + mulps xmm3, xmm4 + psrld xmm2, 13 + psrld xmm3, 13 + packssdw xmm2, xmm3 + movdqu [eax + edx - 16], xmm2 + sub ecx, 8 + jg convertloop + ret + } +} +#endif // HAS_HALFFLOATROW_SSE2 + +#ifdef HAS_HALFFLOATROW_AVX2 +__declspec(naked) void HalfFloatRow_AVX2(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { + __asm { + mov eax, [esp + 4] /* src */ + mov edx, [esp + 8] /* dst */ + movd xmm4, dword ptr [esp + 12] /* scale */ + mov ecx, [esp + 16] /* width */ + + vmulss xmm4, xmm4, kExpBias + vbroadcastss ymm4, xmm4 + vpxor ymm5, ymm5, ymm5 + sub edx, eax + + // 16 pixel loop. + convertloop: + vmovdqu ymm2, [eax] // 16 shorts + add eax, 32 + vpunpckhwd ymm3, ymm2, ymm5 // convert 16 shorts to 16 ints + vpunpcklwd ymm2, ymm2, ymm5 + vcvtdq2ps ymm3, ymm3 // convert 16 ints to floats + vcvtdq2ps ymm2, ymm2 + vmulps ymm3, ymm3, ymm4 // scale to adjust exponent for 5 bit range. + vmulps ymm2, ymm2, ymm4 + vpsrld ymm3, ymm3, 13 // float convert to 8 half floats truncate + vpsrld ymm2, ymm2, 13 + vpackssdw ymm2, ymm2, ymm3 + vmovdqu [eax + edx - 32], ymm2 + sub ecx, 16 + jg convertloop + vzeroupper + ret + } +} +#endif // HAS_HALFFLOATROW_AVX2 + +#ifdef HAS_HALFFLOATROW_F16C +__declspec(naked) void HalfFloatRow_F16C(const uint16_t* src, + uint16_t* dst, + float scale, + int width) { + __asm { + mov eax, [esp + 4] /* src */ + mov edx, [esp + 8] /* dst */ + vbroadcastss ymm4, [esp + 12] /* scale */ + mov ecx, [esp + 16] /* width */ + sub edx, eax + + // 16 pixel loop. + convertloop: + vpmovzxwd ymm2, xmmword ptr [eax] // 8 shorts -> 8 ints + vpmovzxwd ymm3, xmmword ptr [eax + 16] // 8 more shorts + add eax, 32 + vcvtdq2ps ymm2, ymm2 // convert 8 ints to floats + vcvtdq2ps ymm3, ymm3 + vmulps ymm2, ymm2, ymm4 // scale to normalized range 0 to 1 + vmulps ymm3, ymm3, ymm4 + vcvtps2ph xmm2, ymm2, 3 // float convert to 8 half floats truncate + vcvtps2ph xmm3, ymm3, 3 + vmovdqu [eax + edx + 32], xmm2 + vmovdqu [eax + edx + 32 + 16], xmm3 + sub ecx, 16 + jg convertloop + vzeroupper + ret + } +} +#endif // HAS_HALFFLOATROW_F16C + #ifdef HAS_ARGBCOLORTABLEROW_X86 // Tranform ARGB pixels with color table. -__declspec(naked) -void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, - int width) { +__declspec(naked) void ARGBColorTableRow_X86(uint8_t* dst_argb, + const uint8_t* table_argb, + int width) { __asm { push esi - mov eax, [esp + 4 + 4] /* dst_argb */ - mov esi, [esp + 4 + 8] /* table_argb */ - mov ecx, [esp + 4 + 12] /* width */ + mov eax, [esp + 4 + 4] /* dst_argb */ + mov esi, [esp + 4 + 8] /* table_argb */ + mov ecx, [esp + 4 + 12] /* width */ // 1 pixel loop. convertloop: @@ -6131,13 +6094,14 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, #ifdef HAS_RGBCOLORTABLEROW_X86 // Tranform RGB pixels with color table. -__declspec(naked) -void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { +__declspec(naked) void RGBColorTableRow_X86(uint8_t* dst_argb, + const uint8_t* table_argb, + int width) { __asm { push esi - mov eax, [esp + 4 + 4] /* dst_argb */ - mov esi, [esp + 4 + 8] /* table_argb */ - mov ecx, [esp + 4 + 12] /* width */ + mov eax, [esp + 4 + 4] /* dst_argb */ + mov esi, [esp + 4 + 8] /* table_argb */ + mov ecx, [esp + 4 + 12] /* width */ // 1 pixel loop. convertloop: @@ -6162,27 +6126,28 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3 // Tranform RGB pixels with luma table. -__declspec(naked) -void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - int width, - const uint8* luma, uint32 lumacoeff) { +__declspec(naked) void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + const uint8_t* luma, + uint32_t lumacoeff) { __asm { push esi push edi - mov eax, [esp + 8 + 4] /* src_argb */ - mov edi, [esp + 8 + 8] /* dst_argb */ - mov ecx, [esp + 8 + 12] /* width */ + mov eax, [esp + 8 + 4] /* src_argb */ + mov edi, [esp + 8 + 8] /* dst_argb */ + mov ecx, [esp + 8 + 12] /* width */ movd xmm2, dword ptr [esp + 8 + 16] // luma table movd xmm3, dword ptr [esp + 8 + 20] // lumacoeff pshufd xmm2, xmm2, 0 pshufd xmm3, xmm3, 0 - pcmpeqb xmm4, xmm4 // generate mask 0xff00ff00 + pcmpeqb xmm4, xmm4 // generate mask 0xff00ff00 psllw xmm4, 8 pxor xmm5, xmm5 - // 4 pixel loop. + // 4 pixel loop. convertloop: - movdqu xmm0, xmmword ptr [eax] // generate luma ptr + movdqu xmm0, xmmword ptr [eax] // generate luma ptr pmaddubsw xmm0, xmm3 phaddw xmm0, xmm0 pand xmm0, xmm4 // mask out low bits diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale.cc index 36e3fe52813a..2cfa1c6cb1c4 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/scale.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/scale.cc @@ -33,17 +33,25 @@ static __inline int Abs(int v) { // This is an optimized version for scaling down a plane to 1/2 of // its original size. -static void ScalePlaneDown2(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr, +static void ScalePlaneDown2(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) = - filtering == kFilterNone ? ScaleRowDown2_C : - (filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C); + void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width) = + filtering == kFilterNone + ? ScaleRowDown2_C + : (filtering == kFilterLinear ? ScaleRowDown2Linear_C + : ScaleRowDown2Box_C); int row_stride = src_stride << 1; + (void)src_width; + (void)src_height; if (!filtering) { src_ptr += src_stride; // Point to odd rows. src_stride = 0; @@ -51,46 +59,63 @@ static void ScalePlaneDown2(int src_width, int src_height, #if defined(HAS_SCALEROWDOWN2_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON : - (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON : - ScaleRowDown2Box_Any_NEON); + ScaleRowDown2 = + filtering == kFilterNone + ? ScaleRowDown2_Any_NEON + : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON + : ScaleRowDown2Box_Any_NEON); if (IS_ALIGNED(dst_width, 16)) { - ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON : - (filtering == kFilterLinear ? ScaleRowDown2Linear_NEON : - ScaleRowDown2Box_NEON); + ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON + : (filtering == kFilterLinear + ? ScaleRowDown2Linear_NEON + : ScaleRowDown2Box_NEON); } } #endif #if defined(HAS_SCALEROWDOWN2_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSSE3 : - (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3 : - ScaleRowDown2Box_Any_SSSE3); + ScaleRowDown2 = + filtering == kFilterNone + ? ScaleRowDown2_Any_SSSE3 + : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3 + : ScaleRowDown2Box_Any_SSSE3); if (IS_ALIGNED(dst_width, 16)) { - ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSSE3 : - (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3 : - ScaleRowDown2Box_SSSE3); + ScaleRowDown2 = + filtering == kFilterNone + ? ScaleRowDown2_SSSE3 + : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3 + : ScaleRowDown2Box_SSSE3); } } #endif #if defined(HAS_SCALEROWDOWN2_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_AVX2 : - (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 : - ScaleRowDown2Box_Any_AVX2); + ScaleRowDown2 = + filtering == kFilterNone + ? ScaleRowDown2_Any_AVX2 + : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 + : ScaleRowDown2Box_Any_AVX2); if (IS_ALIGNED(dst_width, 32)) { - ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 : - (filtering == kFilterLinear ? ScaleRowDown2Linear_AVX2 : - ScaleRowDown2Box_AVX2); + ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 + : (filtering == kFilterLinear + ? ScaleRowDown2Linear_AVX2 + : ScaleRowDown2Box_AVX2); } } #endif -#if defined(HAS_SCALEROWDOWN2_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) && - IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - ScaleRowDown2 = filtering ? - ScaleRowDown2Box_DSPR2 : ScaleRowDown2_DSPR2; +#if defined(HAS_SCALEROWDOWN2_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ScaleRowDown2 = + filtering == kFilterNone + ? ScaleRowDown2_Any_MSA + : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA + : ScaleRowDown2Box_Any_MSA); + if (IS_ALIGNED(dst_width, 32)) { + ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA + : (filtering == kFilterLinear + ? ScaleRowDown2Linear_MSA + : ScaleRowDown2Box_MSA); + } } #endif @@ -105,18 +130,25 @@ static void ScalePlaneDown2(int src_width, int src_height, } } -static void ScalePlaneDown2_16(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint16* src_ptr, uint16* dst_ptr, +static void ScalePlaneDown2_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width) = - filtering == kFilterNone ? ScaleRowDown2_16_C : - (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C : - ScaleRowDown2Box_16_C); + void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, int dst_width) = + filtering == kFilterNone + ? ScaleRowDown2_16_C + : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C + : ScaleRowDown2Box_16_C); int row_stride = src_stride << 1; + (void)src_width; + (void)src_height; if (!filtering) { src_ptr += src_stride; // Point to odd rows. src_stride = 0; @@ -124,23 +156,17 @@ static void ScalePlaneDown2_16(int src_width, int src_height, #if defined(HAS_SCALEROWDOWN2_16_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { - ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON : - ScaleRowDown2_16_NEON; + ScaleRowDown2 = + filtering ? ScaleRowDown2Box_16_NEON : ScaleRowDown2_16_NEON; } #endif #if defined(HAS_SCALEROWDOWN2_16_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { - ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 : - (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 : - ScaleRowDown2Box_16_SSE2); - } -#endif -#if defined(HAS_SCALEROWDOWN2_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) && - IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - ScaleRowDown2 = filtering ? - ScaleRowDown2Box_16_DSPR2 : ScaleRowDown2_16_DSPR2; + ScaleRowDown2 = + filtering == kFilterNone + ? ScaleRowDown2_16_SSE2 + : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 + : ScaleRowDown2Box_16_SSE2); } #endif @@ -159,24 +185,30 @@ static void ScalePlaneDown2_16(int src_width, int src_height, // This is an optimized version for scaling down a plane to 1/4 of // its original size. -static void ScalePlaneDown4(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr, +static void ScalePlaneDown4(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) = + void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width) = filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C; int row_stride = src_stride << 2; + (void)src_width; + (void)src_height; if (!filtering) { src_ptr += src_stride * 2; // Point to row 2. src_stride = 0; } #if defined(HAS_SCALEROWDOWN4_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleRowDown4 = filtering ? - ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON; + ScaleRowDown4 = + filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON; if (IS_ALIGNED(dst_width, 8)) { ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; } @@ -184,8 +216,8 @@ static void ScalePlaneDown4(int src_width, int src_height, #endif #if defined(HAS_SCALEROWDOWN4_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ScaleRowDown4 = filtering ? - ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3; + ScaleRowDown4 = + filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3; if (IS_ALIGNED(dst_width, 8)) { ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3; } @@ -193,19 +225,20 @@ static void ScalePlaneDown4(int src_width, int src_height, #endif #if defined(HAS_SCALEROWDOWN4_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ScaleRowDown4 = filtering ? - ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2; + ScaleRowDown4 = + filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2; if (IS_ALIGNED(dst_width, 16)) { ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2; } } #endif -#if defined(HAS_SCALEROWDOWN4_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - ScaleRowDown4 = filtering ? - ScaleRowDown4Box_DSPR2 : ScaleRowDown4_DSPR2; +#if defined(HAS_SCALEROWDOWN4_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ScaleRowDown4 = + filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA; + if (IS_ALIGNED(dst_width, 16)) { + ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA; + } } #endif @@ -219,38 +252,36 @@ static void ScalePlaneDown4(int src_width, int src_height, } } -static void ScalePlaneDown4_16(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint16* src_ptr, uint16* dst_ptr, +static void ScalePlaneDown4_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width) = + void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, int dst_width) = filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C; int row_stride = src_stride << 2; + (void)src_width; + (void)src_height; if (!filtering) { src_ptr += src_stride * 2; // Point to row 2. src_stride = 0; } #if defined(HAS_SCALEROWDOWN4_16_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { - ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON : - ScaleRowDown4_16_NEON; + ScaleRowDown4 = + filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON; } #endif #if defined(HAS_SCALEROWDOWN4_16_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { - ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 : - ScaleRowDown4_16_SSE2; - } -#endif -#if defined(HAS_SCALEROWDOWN4_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - ScaleRowDown4 = filtering ? - ScaleRowDown4Box_16_DSPR2 : ScaleRowDown4_16_DSPR2; + ScaleRowDown4 = + filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2; } #endif @@ -265,18 +296,23 @@ static void ScalePlaneDown4_16(int src_width, int src_height, } // Scale plane down, 3/4 - -static void ScalePlaneDown34(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr, +static void ScalePlaneDown34(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); + void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width); + void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width); const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; + (void)src_width; + (void)src_height; assert(dst_width % 3 == 0); if (!filtering) { ScaleRowDown34_0 = ScaleRowDown34_C; @@ -305,6 +341,26 @@ static void ScalePlaneDown34(int src_width, int src_height, } } #endif +#if defined(HAS_SCALEROWDOWN34_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + if (!filtering) { + ScaleRowDown34_0 = ScaleRowDown34_Any_MSA; + ScaleRowDown34_1 = ScaleRowDown34_Any_MSA; + } else { + ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_MSA; + ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_MSA; + } + if (dst_width % 48 == 0) { + if (!filtering) { + ScaleRowDown34_0 = ScaleRowDown34_MSA; + ScaleRowDown34_1 = ScaleRowDown34_MSA; + } else { + ScaleRowDown34_0 = ScaleRowDown34_0_Box_MSA; + ScaleRowDown34_1 = ScaleRowDown34_1_Box_MSA; + } + } + } +#endif #if defined(HAS_SCALEROWDOWN34_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { if (!filtering) { @@ -325,19 +381,6 @@ static void ScalePlaneDown34(int src_width, int src_height, } } #endif -#if defined(HAS_SCALEROWDOWN34_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - if (!filtering) { - ScaleRowDown34_0 = ScaleRowDown34_DSPR2; - ScaleRowDown34_1 = ScaleRowDown34_DSPR2; - } else { - ScaleRowDown34_0 = ScaleRowDown34_0_Box_DSPR2; - ScaleRowDown34_1 = ScaleRowDown34_1_Box_DSPR2; - } - } -#endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); @@ -346,8 +389,7 @@ static void ScalePlaneDown34(int src_width, int src_height, ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride; dst_ptr += dst_stride; - ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, - dst_ptr, dst_width); + ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 2; dst_ptr += dst_stride; } @@ -363,17 +405,23 @@ static void ScalePlaneDown34(int src_width, int src_height, } } -static void ScalePlaneDown34_16(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint16* src_ptr, uint16* dst_ptr, +static void ScalePlaneDown34_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width); - void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width); + void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, int dst_width); + void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, int dst_width); const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; + (void)src_width; + (void)src_height; assert(dst_width % 3 == 0); if (!filtering) { ScaleRowDown34_0 = ScaleRowDown34_16_C; @@ -404,19 +452,6 @@ static void ScalePlaneDown34_16(int src_width, int src_height, } } #endif -#if defined(HAS_SCALEROWDOWN34_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - if (!filtering) { - ScaleRowDown34_0 = ScaleRowDown34_16_DSPR2; - ScaleRowDown34_1 = ScaleRowDown34_16_DSPR2; - } else { - ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_DSPR2; - ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_DSPR2; - } - } -#endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); @@ -425,8 +460,7 @@ static void ScalePlaneDown34_16(int src_width, int src_height, ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride; dst_ptr += dst_stride; - ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, - dst_ptr, dst_width); + ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 2; dst_ptr += dst_stride; } @@ -442,7 +476,6 @@ static void ScalePlaneDown34_16(int src_width, int src_height, } } - // Scale plane, 3/8 // This is an optimized version for scaling down a plane to 3/8 // of its original size. @@ -458,18 +491,24 @@ static void ScalePlaneDown34_16(int src_width, int src_height, // ggghhhii // Boxes are 3x3, 2x3, 3x2 and 2x2 -static void ScalePlaneDown38(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr, +static void ScalePlaneDown38(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); + void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width); + void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride, + uint8_t* dst_ptr, int dst_width); const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; assert(dst_width % 3 == 0); + (void)src_width; + (void)src_height; if (!filtering) { ScaleRowDown38_3 = ScaleRowDown38_C; ScaleRowDown38_2 = ScaleRowDown38_C; @@ -517,16 +556,23 @@ static void ScalePlaneDown38(int src_width, int src_height, } } #endif -#if defined(HAS_SCALEROWDOWN38_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { +#if defined(HAS_SCALEROWDOWN38_MSA) + if (TestCpuFlag(kCpuHasMSA)) { if (!filtering) { - ScaleRowDown38_3 = ScaleRowDown38_DSPR2; - ScaleRowDown38_2 = ScaleRowDown38_DSPR2; + ScaleRowDown38_3 = ScaleRowDown38_Any_MSA; + ScaleRowDown38_2 = ScaleRowDown38_Any_MSA; } else { - ScaleRowDown38_3 = ScaleRowDown38_3_Box_DSPR2; - ScaleRowDown38_2 = ScaleRowDown38_2_Box_DSPR2; + ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA; + ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA; + } + if (dst_width % 12 == 0) { + if (!filtering) { + ScaleRowDown38_3 = ScaleRowDown38_MSA; + ScaleRowDown38_2 = ScaleRowDown38_MSA; + } else { + ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA; + ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA; + } } } #endif @@ -554,17 +600,23 @@ static void ScalePlaneDown38(int src_width, int src_height, } } -static void ScalePlaneDown38_16(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint16* src_ptr, uint16* dst_ptr, +static void ScalePlaneDown38_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr, enum FilterMode filtering) { int y; - void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width); - void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width); + void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, int dst_width); + void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride, + uint16_t* dst_ptr, int dst_width); const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; + (void)src_width; + (void)src_height; assert(dst_width % 3 == 0); if (!filtering) { ScaleRowDown38_3 = ScaleRowDown38_16_C; @@ -595,19 +647,6 @@ static void ScalePlaneDown38_16(int src_width, int src_height, } } #endif -#if defined(HAS_SCALEROWDOWN38_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - if (!filtering) { - ScaleRowDown38_3 = ScaleRowDown38_16_DSPR2; - ScaleRowDown38_2 = ScaleRowDown38_16_DSPR2; - } else { - ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_DSPR2; - ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_DSPR2; - } - } -#endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); @@ -634,8 +673,8 @@ static void ScalePlaneDown38_16(int src_width, int src_height, #define MIN1(x) ((x) < 1 ? 1 : (x)) -static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { - uint32 sum = 0u; +static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) { + uint32_t sum = 0u; int x; assert(iboxwidth > 0); for (x = 0; x < iboxwidth; ++x) { @@ -644,8 +683,8 @@ static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { return sum; } -static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) { - uint32 sum = 0u; +static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) { + uint32_t sum = 0u; int x; assert(iboxwidth > 0); for (x = 0; x < iboxwidth; ++x) { @@ -654,8 +693,12 @@ static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) { return sum; } -static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, - const uint16* src_ptr, uint8* dst_ptr) { +static void ScaleAddCols2_C(int dst_width, + int boxheight, + int x, + int dx, + const uint16_t* src_ptr, + uint8_t* dst_ptr) { int i; int scaletbl[2]; int minboxwidth = dx >> 16; @@ -666,13 +709,18 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, int ix = x >> 16; x += dx; boxwidth = MIN1((x >> 16) - ix); - *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * - scaletbl[boxwidth - minboxwidth] >> 16; + *dst_ptr++ = + SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >> + 16; } } -static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx, - const uint32* src_ptr, uint16* dst_ptr) { +static void ScaleAddCols2_16_C(int dst_width, + int boxheight, + int x, + int dx, + const uint32_t* src_ptr, + uint16_t* dst_ptr) { int i; int scaletbl[2]; int minboxwidth = dx >> 16; @@ -684,22 +732,32 @@ static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx, x += dx; boxwidth = MIN1((x >> 16) - ix); *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) * - scaletbl[boxwidth - minboxwidth] >> 16; + scaletbl[boxwidth - minboxwidth] >> + 16; } } -static void ScaleAddCols0_C(int dst_width, int boxheight, int x, int, - const uint16* src_ptr, uint8* dst_ptr) { +static void ScaleAddCols0_C(int dst_width, + int boxheight, + int x, + int dx, + const uint16_t* src_ptr, + uint8_t* dst_ptr) { int scaleval = 65536 / boxheight; int i; + (void)dx; src_ptr += (x >> 16); for (i = 0; i < dst_width; ++i) { *dst_ptr++ = src_ptr[i] * scaleval >> 16; } } -static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, - const uint16* src_ptr, uint8* dst_ptr) { +static void ScaleAddCols1_C(int dst_width, + int boxheight, + int x, + int dx, + const uint16_t* src_ptr, + uint8_t* dst_ptr) { int boxwidth = MIN1(dx >> 16); int scaleval = 65536 / (boxwidth * boxheight); int i; @@ -710,8 +768,12 @@ static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, } } -static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx, - const uint32* src_ptr, uint16* dst_ptr) { +static void ScaleAddCols1_16_C(int dst_width, + int boxheight, + int x, + int dx, + const uint32_t* src_ptr, + uint16_t* dst_ptr) { int boxwidth = MIN1(dx >> 16); int scaleval = 65536 / (boxwidth * boxheight); int i; @@ -728,10 +790,14 @@ static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx, // one pixel of destination using fixed point (16.16) to step // through source, sampling a box of pixel with simple // averaging. -static void ScalePlaneBox(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr) { +static void ScalePlaneBox(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr) { int j, k; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -739,18 +805,18 @@ static void ScalePlaneBox(int src_width, int src_height, int dx = 0; int dy = 0; const int max_y = (src_height << 16); - ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, - &x, &y, &dx, &dy); + ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y, + &dx, &dy); src_width = Abs(src_width); { - // Allocate a row buffer of uint16. + // Allocate a row buffer of uint16_t. align_buffer_64(row16, src_width * 2); void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, - const uint16* src_ptr, uint8* dst_ptr) = - (dx & 0xffff) ? ScaleAddCols2_C: - ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C); - void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) = - ScaleAddRow_C; + const uint16_t* src_ptr, uint8_t* dst_ptr) = + (dx & 0xffff) ? ScaleAddCols2_C + : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C); + void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr, + int src_width) = ScaleAddRow_C; #if defined(HAS_SCALEADDROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleAddRow = ScaleAddRow_Any_SSE2; @@ -775,11 +841,19 @@ static void ScalePlaneBox(int src_width, int src_height, } } #endif +#if defined(HAS_SCALEADDROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ScaleAddRow = ScaleAddRow_Any_MSA; + if (IS_ALIGNED(src_width, 16)) { + ScaleAddRow = ScaleAddRow_MSA; + } + } +#endif for (j = 0; j < dst_height; ++j) { int boxheight; int iy = y >> 16; - const uint8* src = src_ptr + iy * src_stride; + const uint8_t* src = src_ptr + iy * src_stride; y += dy; if (y > max_y) { y = max_y; @@ -787,20 +861,24 @@ static void ScalePlaneBox(int src_width, int src_height, boxheight = MIN1((y >> 16) - iy); memset(row16, 0, src_width * 2); for (k = 0; k < boxheight; ++k) { - ScaleAddRow(src, (uint16 *)(row16), src_width); + ScaleAddRow(src, (uint16_t*)(row16), src_width); src += src_stride; } - ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr); + ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr); dst_ptr += dst_stride; } free_aligned_buffer_64(row16); } } -static void ScalePlaneBox_16(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint16* src_ptr, uint16* dst_ptr) { +static void ScalePlaneBox_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { int j, k; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -808,17 +886,17 @@ static void ScalePlaneBox_16(int src_width, int src_height, int dx = 0; int dy = 0; const int max_y = (src_height << 16); - ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, - &x, &y, &dx, &dy); + ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y, + &dx, &dy); src_width = Abs(src_width); { - // Allocate a row buffer of uint32. + // Allocate a row buffer of uint32_t. align_buffer_64(row32, src_width * 4); void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, - const uint32* src_ptr, uint16* dst_ptr) = - (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C; - void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) = - ScaleAddRow_16_C; + const uint32_t* src_ptr, uint16_t* dst_ptr) = + (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C; + void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr, + int src_width) = ScaleAddRow_16_C; #if defined(HAS_SCALEADDROW_16_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) { @@ -829,7 +907,7 @@ static void ScalePlaneBox_16(int src_width, int src_height, for (j = 0; j < dst_height; ++j) { int boxheight; int iy = y >> 16; - const uint16* src = src_ptr + iy * src_stride; + const uint16_t* src = src_ptr + iy * src_stride; y += dy; if (y > max_y) { y = max_y; @@ -837,10 +915,10 @@ static void ScalePlaneBox_16(int src_width, int src_height, boxheight = MIN1((y >> 16) - iy); memset(row32, 0, src_width * 4); for (k = 0; k < boxheight; ++k) { - ScaleAddRow(src, (uint32 *)(row32), src_width); + ScaleAddRow(src, (uint32_t*)(row32), src_width); src += src_stride; } - ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr); + ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr); dst_ptr += dst_stride; } free_aligned_buffer_64(row32); @@ -848,10 +926,14 @@ static void ScalePlaneBox_16(int src_width, int src_height, } // Scale plane down with bilinear interpolation. -void ScalePlaneBilinearDown(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr, +void ScalePlaneBilinearDown(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr, enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -864,14 +946,14 @@ void ScalePlaneBilinearDown(int src_width, int src_height, const int max_y = (src_height - 1) << 16; int j; - void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) = + void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, + int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C; - void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_C; - ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, - &x, &y, &dx, &dy); + void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, + ptrdiff_t src_stride, int dst_width, + int source_y_fraction) = InterpolateRow_C; + ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, + &dx, &dy); src_width = Abs(src_width); #if defined(HAS_INTERPOLATEROW_SSSE3) @@ -898,16 +980,15 @@ void ScalePlaneBilinearDown(int src_width, int src_height, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - InterpolateRow = InterpolateRow_Any_DSPR2; - if (IS_ALIGNED(src_width, 4)) { - InterpolateRow = InterpolateRow_DSPR2; +#if defined(HAS_INTERPOLATEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + InterpolateRow = InterpolateRow_Any_MSA; + if (IS_ALIGNED(src_width, 32)) { + InterpolateRow = InterpolateRow_MSA; } } #endif - #if defined(HAS_SCALEFILTERCOLS_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { ScaleFilterCols = ScaleFilterCols_SSSE3; @@ -920,6 +1001,14 @@ void ScalePlaneBilinearDown(int src_width, int src_height, ScaleFilterCols = ScaleFilterCols_NEON; } } +#endif +#if defined(HAS_SCALEFILTERCOLS_MSA) + if (TestCpuFlag(kCpuHasMSA) && src_width < 32768) { + ScaleFilterCols = ScaleFilterCols_Any_MSA; + if (IS_ALIGNED(dst_width, 16)) { + ScaleFilterCols = ScaleFilterCols_MSA; + } + } #endif if (y > max_y) { y = max_y; @@ -927,7 +1016,7 @@ void ScalePlaneBilinearDown(int src_width, int src_height, for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8* src = src_ptr + yi * src_stride; + const uint8_t* src = src_ptr + yi * src_stride; if (filtering == kFilterLinear) { ScaleFilterCols(dst_ptr, src, dst_width, x, dx); } else { @@ -944,10 +1033,14 @@ void ScalePlaneBilinearDown(int src_width, int src_height, free_aligned_buffer_64(row); } -void ScalePlaneBilinearDown_16(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint16* src_ptr, uint16* dst_ptr, +void ScalePlaneBilinearDown_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr, enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -960,14 +1053,14 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height, const int max_y = (src_height - 1) << 16; int j; - void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int x, int dx) = + void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, + int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C; - void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_16_C; - ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, - &x, &y, &dx, &dy); + void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr, + ptrdiff_t src_stride, int dst_width, + int source_y_fraction) = InterpolateRow_16_C; + ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, + &dx, &dy); src_width = Abs(src_width); #if defined(HAS_INTERPOLATEROW_16_SSE2) @@ -1002,15 +1095,6 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height, } } #endif -#if defined(HAS_INTERPOLATEROW_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - InterpolateRow = InterpolateRow_Any_16_DSPR2; - if (IS_ALIGNED(src_width, 4)) { - InterpolateRow = InterpolateRow_16_DSPR2; - } - } -#endif - #if defined(HAS_SCALEFILTERCOLS_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { @@ -1023,13 +1107,13 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height, for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint16* src = src_ptr + yi * src_stride; + const uint16_t* src = src_ptr + yi * src_stride; if (filtering == kFilterLinear) { ScaleFilterCols(dst_ptr, src, dst_width, x, dx); } else { int yf = (y >> 8) & 255; - InterpolateRow((uint16*)row, src, src_stride, src_width, yf); - ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx); + InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf); + ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx); } dst_ptr += dst_stride; y += dy; @@ -1041,10 +1125,14 @@ void ScalePlaneBilinearDown_16(int src_width, int src_height, } // Scale up down with bilinear interpolation. -void ScalePlaneBilinearUp(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr, +void ScalePlaneBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr, enum FilterMode filtering) { int j; // Initial source x/y coordinate and step values as 16.16 fixed point. @@ -1053,14 +1141,14 @@ void ScalePlaneBilinearUp(int src_width, int src_height, int dx = 0; int dy = 0; const int max_y = (src_height - 1) << 16; - void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_C; - void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) = + void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, + ptrdiff_t src_stride, int dst_width, + int source_y_fraction) = InterpolateRow_C; + void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, + int dst_width, int x, int dx) = filtering ? ScaleFilterCols_C : ScaleCols_C; - ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, - &x, &y, &dx, &dy); + ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, + &dx, &dy); src_width = Abs(src_width); #if defined(HAS_INTERPOLATEROW_SSSE3) @@ -1087,14 +1175,6 @@ void ScalePlaneBilinearUp(int src_width, int src_height, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - InterpolateRow = InterpolateRow_Any_DSPR2; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } - } -#endif if (filtering && src_width >= 32768) { ScaleFilterCols = ScaleFilterCols64_C; @@ -1111,6 +1191,14 @@ void ScalePlaneBilinearUp(int src_width, int src_height, ScaleFilterCols = ScaleFilterCols_NEON; } } +#endif +#if defined(HAS_SCALEFILTERCOLS_MSA) + if (filtering && TestCpuFlag(kCpuHasMSA) && src_width < 32768) { + ScaleFilterCols = ScaleFilterCols_Any_MSA; + if (IS_ALIGNED(dst_width, 16)) { + ScaleFilterCols = ScaleFilterCols_MSA; + } + } #endif if (!filtering && src_width * 2 == dst_width && x < 0x8000) { ScaleFilterCols = ScaleColsUp2_C; @@ -1126,13 +1214,13 @@ void ScalePlaneBilinearUp(int src_width, int src_height, } { int yi = y >> 16; - const uint8* src = src_ptr + yi * src_stride; + const uint8_t* src = src_ptr + yi * src_stride; // Allocate 2 row buffers. const int kRowSize = (dst_width + 31) & ~31; align_buffer_64(row, kRowSize * 2); - uint8* rowptr = row; + uint8_t* rowptr = row; int rowstride = kRowSize; int lasty = yi; @@ -1172,10 +1260,14 @@ void ScalePlaneBilinearUp(int src_width, int src_height, } } -void ScalePlaneBilinearUp_16(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint16* src_ptr, uint16* dst_ptr, +void ScalePlaneBilinearUp_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr, enum FilterMode filtering) { int j; // Initial source x/y coordinate and step values as 16.16 fixed point. @@ -1184,14 +1276,14 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height, int dx = 0; int dy = 0; const int max_y = (src_height - 1) << 16; - void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_16_C; - void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int x, int dx) = + void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr, + ptrdiff_t src_stride, int dst_width, + int source_y_fraction) = InterpolateRow_16_C; + void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, + int dst_width, int x, int dx) = filtering ? ScaleFilterCols_16_C : ScaleCols_16_C; - ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, - &x, &y, &dx, &dy); + ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, + &dx, &dy); src_width = Abs(src_width); #if defined(HAS_INTERPOLATEROW_16_SSE2) @@ -1226,14 +1318,6 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height, } } #endif -#if defined(HAS_INTERPOLATEROW_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2)) { - InterpolateRow = InterpolateRow_Any_16_DSPR2; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_16_DSPR2; - } - } -#endif if (filtering && src_width >= 32768) { ScaleFilterCols = ScaleFilterCols64_16_C; @@ -1257,13 +1341,13 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height, } { int yi = y >> 16; - const uint16* src = src_ptr + yi * src_stride; + const uint16_t* src = src_ptr + yi * src_stride; // Allocate 2 row buffers. const int kRowSize = (dst_width + 31) & ~31; align_buffer_64(row, kRowSize * 4); - uint16* rowptr = (uint16*)row; + uint16_t* rowptr = (uint16_t*)row; int rowstride = kRowSize; int lasty = yi; @@ -1308,20 +1392,24 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height, // of x and dx is the integer part of the source position and // the lower 16 bits are the fixed decimal part. -static void ScalePlaneSimple(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr) { +static void ScalePlaneSimple(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_ptr, + uint8_t* dst_ptr) { int i; - void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) = ScaleCols_C; + void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width, + int x, int dx) = ScaleCols_C; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; int dx = 0; int dy = 0; - ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, - &x, &y, &dx, &dy); + ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y, + &dx, &dy); src_width = Abs(src_width); if (src_width * 2 == dst_width && x < 0x8000) { @@ -1340,20 +1428,24 @@ static void ScalePlaneSimple(int src_width, int src_height, } } -static void ScalePlaneSimple_16(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint16* src_ptr, uint16* dst_ptr) { +static void ScalePlaneSimple_16(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_ptr, + uint16_t* dst_ptr) { int i; - void (*ScaleCols)(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int x, int dx) = ScaleCols_16_C; + void (*ScaleCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width, + int x, int dx) = ScaleCols_16_C; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; int dx = 0; int dy = 0; - ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, - &x, &y, &dx, &dy); + ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y, + &dx, &dy); src_width = Abs(src_width); if (src_width * 2 == dst_width && x < 0x8000) { @@ -1366,8 +1458,7 @@ static void ScalePlaneSimple_16(int src_width, int src_height, } for (i = 0; i < dst_height; ++i) { - ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, - dst_width, x, dx); + ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); dst_ptr += dst_stride; y += dy; } @@ -1377,14 +1468,18 @@ static void ScalePlaneSimple_16(int src_width, int src_height, // This function dispatches to a specialized scaler based on scale factor. LIBYUV_API -void ScalePlane(const uint8* src, int src_stride, - int src_width, int src_height, - uint8* dst, int dst_stride, - int dst_width, int dst_height, +void ScalePlane(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, enum FilterMode filtering) { // Simplify filtering when possible. - filtering = ScaleFilterReduce(src_width, src_height, - dst_width, dst_height, filtering); + filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, + filtering); // Negative height means invert the image. if (src_height < 0) { @@ -1403,46 +1498,42 @@ void ScalePlane(const uint8* src, int src_stride, if (dst_width == src_width && filtering != kFilterBox) { int dy = FixedDiv(src_height, dst_height); // Arbitrary scale vertically, but unscaled horizontally. - ScalePlaneVertical(src_height, - dst_width, dst_height, - src_stride, dst_stride, src, dst, - 0, 0, dy, 1, filtering); + ScalePlaneVertical(src_height, dst_width, dst_height, src_stride, + dst_stride, src, dst, 0, 0, dy, 1, filtering); return; } if (dst_width <= Abs(src_width) && dst_height <= src_height) { // Scale down. - if (4 * dst_width == 3 * src_width && - 4 * dst_height == 3 * src_height) { + if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) { // optimized, 3/4 - ScalePlaneDown34(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); + ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride, + dst_stride, src, dst, filtering); return; } if (2 * dst_width == src_width && 2 * dst_height == src_height) { // optimized, 1/2 - ScalePlaneDown2(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); + ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride, + dst_stride, src, dst, filtering); return; } // 3/8 rounded up for odd sized chroma height. - if (8 * dst_width == 3 * src_width && - dst_height == ((src_height * 3 + 7) / 8)) { + if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) { // optimized, 3/8 - ScalePlaneDown38(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); + ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride, + dst_stride, src, dst, filtering); return; } if (4 * dst_width == src_width && 4 * dst_height == src_height && (filtering == kFilterBox || filtering == kFilterNone)) { // optimized, 1/4 - ScalePlaneDown4(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); + ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride, + dst_stride, src, dst, filtering); return; } } if (filtering == kFilterBox && dst_height * 2 < src_height) { - ScalePlaneBox(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst); + ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride, + dst_stride, src, dst); return; } if (filtering && dst_height > src_height) { @@ -1455,19 +1546,23 @@ void ScalePlane(const uint8* src, int src_stride, src_stride, dst_stride, src, dst, filtering); return; } - ScalePlaneSimple(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst); + ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride, + dst_stride, src, dst); } LIBYUV_API -void ScalePlane_16(const uint16* src, int src_stride, - int src_width, int src_height, - uint16* dst, int dst_stride, - int dst_width, int dst_height, - enum FilterMode filtering) { +void ScalePlane_16(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering) { // Simplify filtering when possible. - filtering = ScaleFilterReduce(src_width, src_height, - dst_width, dst_height, filtering); + filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, + filtering); // Negative height means invert the image. if (src_height < 0) { @@ -1483,19 +1578,16 @@ void ScalePlane_16(const uint16* src, int src_stride, CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height); return; } - if (dst_width == src_width) { + if (dst_width == src_width && filtering != kFilterBox) { int dy = FixedDiv(src_height, dst_height); // Arbitrary scale vertically, but unscaled vertically. - ScalePlaneVertical_16(src_height, - dst_width, dst_height, - src_stride, dst_stride, src, dst, - 0, 0, dy, 1, filtering); + ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride, + dst_stride, src, dst, 0, 0, dy, 1, filtering); return; } if (dst_width <= Abs(src_width) && dst_height <= src_height) { // Scale down. - if (4 * dst_width == 3 * src_width && - 4 * dst_height == 3 * src_height) { + if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) { // optimized, 3/4 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); @@ -1508,15 +1600,14 @@ void ScalePlane_16(const uint16* src, int src_stride, return; } // 3/8 rounded up for odd sized chroma height. - if (8 * dst_width == 3 * src_width && - dst_height == ((src_height * 3 + 7) / 8)) { + if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) { // optimized, 3/8 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; } if (4 * dst_width == src_width && 4 * dst_height == src_height && - filtering != kFilterBilinear) { + (filtering == kFilterBox || filtering == kFilterNone)) { // optimized, 1/4 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); @@ -1524,8 +1615,8 @@ void ScalePlane_16(const uint16* src, int src_stride, } } if (filtering == kFilterBox && dst_height * 2 < src_height) { - ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst); + ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride, + dst_stride, src, dst); return; } if (filtering && dst_height > src_height) { @@ -1538,132 +1629,110 @@ void ScalePlane_16(const uint16* src, int src_stride, src_stride, dst_stride, src, dst, filtering); return; } - ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst); + ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride, + dst_stride, src, dst); } // Scale an I420 image. // This function in turn calls a scaling function for each plane. LIBYUV_API -int I420Scale(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - int src_width, int src_height, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int dst_width, int dst_height, +int I420Scale(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, enum FilterMode filtering) { int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || - src_width > 32768 || src_height > 32768 || - !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { + src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || + dst_width <= 0 || dst_height <= 0) { return -1; } - ScalePlane(src_y, src_stride_y, src_width, src_height, - dst_y, dst_stride_y, dst_width, dst_height, - filtering); - ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, - dst_u, dst_stride_u, dst_halfwidth, dst_halfheight, - filtering); - ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, - dst_v, dst_stride_v, dst_halfwidth, dst_halfheight, - filtering); + ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, + dst_width, dst_height, filtering); + ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, + dst_stride_u, dst_halfwidth, dst_halfheight, filtering); + ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, + dst_stride_v, dst_halfwidth, dst_halfheight, filtering); return 0; } LIBYUV_API -int I420Scale_16(const uint16* src_y, int src_stride_y, - const uint16* src_u, int src_stride_u, - const uint16* src_v, int src_stride_v, - int src_width, int src_height, - uint16* dst_y, int dst_stride_y, - uint16* dst_u, int dst_stride_u, - uint16* dst_v, int dst_stride_v, - int dst_width, int dst_height, +int I420Scale_16(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, enum FilterMode filtering) { int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || - src_width > 32768 || src_height > 32768 || - !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { + src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || + dst_width <= 0 || dst_height <= 0) { return -1; } - ScalePlane_16(src_y, src_stride_y, src_width, src_height, - dst_y, dst_stride_y, dst_width, dst_height, - filtering); - ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, - dst_u, dst_stride_u, dst_halfwidth, dst_halfheight, - filtering); - ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, - dst_v, dst_stride_v, dst_halfwidth, dst_halfheight, - filtering); + ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, + dst_width, dst_height, filtering); + ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, + dst_stride_u, dst_halfwidth, dst_halfheight, filtering); + ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, + dst_stride_v, dst_halfwidth, dst_halfheight, filtering); return 0; } // Deprecated api LIBYUV_API -int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v, - int src_stride_y, int src_stride_u, int src_stride_v, - int src_width, int src_height, - uint8* dst_y, uint8* dst_u, uint8* dst_v, - int dst_stride_y, int dst_stride_u, int dst_stride_v, - int dst_width, int dst_height, +int Scale(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + int src_stride_y, + int src_stride_u, + int src_stride_v, + int src_width, + int src_height, + uint8_t* dst_y, + uint8_t* dst_u, + uint8_t* dst_v, + int dst_stride_y, + int dst_stride_u, + int dst_stride_v, + int dst_width, + int dst_height, LIBYUV_BOOL interpolate) { - return I420Scale(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - src_width, src_height, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - dst_width, dst_height, - interpolate ? kFilterBox : kFilterNone); -} - -// Deprecated api -LIBYUV_API -int ScaleOffset(const uint8* src, int src_width, int src_height, - uint8* dst, int dst_width, int dst_height, int dst_yoffset, - LIBYUV_BOOL interpolate) { - // Chroma requires offset to multiple of 2. - int dst_yoffset_even = dst_yoffset & ~1; - int src_halfwidth = SUBSAMPLE(src_width, 1, 1); - int src_halfheight = SUBSAMPLE(src_height, 1, 1); - int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); - int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); - int aheight = dst_height - dst_yoffset_even * 2; // actual output height - const uint8* src_y = src; - const uint8* src_u = src + src_width * src_height; - const uint8* src_v = src + src_width * src_height + - src_halfwidth * src_halfheight; - uint8* dst_y = dst + dst_yoffset_even * dst_width; - uint8* dst_u = dst + dst_width * dst_height + - (dst_yoffset_even >> 1) * dst_halfwidth; - uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight + - (dst_yoffset_even >> 1) * dst_halfwidth; - if (!src || src_width <= 0 || src_height <= 0 || - !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 || - dst_yoffset_even >= dst_height) { - return -1; - } - return I420Scale(src_y, src_width, - src_u, src_halfwidth, - src_v, src_halfwidth, - src_width, src_height, - dst_y, dst_width, - dst_u, dst_halfwidth, - dst_v, dst_halfwidth, - dst_width, aheight, - interpolate ? kFilterBox : kFilterNone); + return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, src_width, src_height, dst_y, dst_stride_y, + dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width, + dst_height, interpolate ? kFilterBox : kFilterNone); } #ifdef __cplusplus diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_any.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_any.cc index ed76a9e4c041..53ad13640499 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/scale_any.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_any.cc @@ -20,184 +20,429 @@ extern "C" { // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \ - void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \ - int dst_width, int x, int dx) { \ - int n = dst_width & ~MASK; \ - if (n > 0) { \ - TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \ - } \ - TERP_C(dst_ptr + n * BPP, src_ptr, \ - dst_width & MASK, x + n * dx, dx); \ - } + void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \ + int dx) { \ + int r = dst_width & MASK; \ + int n = dst_width & ~MASK; \ + if (n > 0) { \ + TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \ + } \ + TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \ + } #ifdef HAS_SCALEFILTERCOLS_NEON CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7) #endif +#ifdef HAS_SCALEFILTERCOLS_MSA +CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15) +#endif #ifdef HAS_SCALEARGBCOLS_NEON CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7) #endif +#ifdef HAS_SCALEARGBCOLS_MSA +CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3) +#endif #ifdef HAS_SCALEARGBFILTERCOLS_NEON -CANY(ScaleARGBFilterCols_Any_NEON, ScaleARGBFilterCols_NEON, - ScaleARGBFilterCols_C, 4, 3) +CANY(ScaleARGBFilterCols_Any_NEON, + ScaleARGBFilterCols_NEON, + ScaleARGBFilterCols_C, + 4, + 3) +#endif +#ifdef HAS_SCALEARGBFILTERCOLS_MSA +CANY(ScaleARGBFilterCols_Any_MSA, + ScaleARGBFilterCols_MSA, + ScaleARGBFilterCols_C, + 4, + 7) #endif #undef CANY // Fixed scale down. +// Mask may be non-power of 2, so use MOD #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \ - uint8* dst_ptr, int dst_width) { \ - int r = (int)((unsigned int)dst_width % (MASK + 1)); \ - int n = dst_width - r; \ - if (n > 0) { \ - SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ - } \ - SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ - dst_ptr + n * BPP, r); \ - } + void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ + int dst_width) { \ + int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ + int n = dst_width - r; \ + if (n > 0) { \ + SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ + } \ + SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ + dst_ptr + n * BPP, r); \ + } // Fixed scale down for odd source width. Used by I420Blend subsampling. // Since dst_width is (width + 1) / 2, this function scales one less pixel // and copies the last pixel. #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \ - uint8* dst_ptr, int dst_width) { \ - int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); \ - int n = dst_width - r; \ - if (n > 0) { \ - SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ - } \ - SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ - dst_ptr + n * BPP, r); \ - } + void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ + int dst_width) { \ + int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \ + int n = (dst_width - 1) - r; \ + if (n > 0) { \ + SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ + } \ + SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ + dst_ptr + n * BPP, r + 1); \ + } #ifdef HAS_SCALEROWDOWN2_SSSE3 SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15) -SDANY(ScaleRowDown2Linear_Any_SSSE3, ScaleRowDown2Linear_SSSE3, - ScaleRowDown2Linear_C, 2, 1, 15) -SDANY(ScaleRowDown2Box_Any_SSSE3, ScaleRowDown2Box_SSSE3, ScaleRowDown2Box_C, - 2, 1, 15) -SDODD(ScaleRowDown2Box_Odd_SSSE3, ScaleRowDown2Box_SSSE3, - ScaleRowDown2Box_Odd_C, 2, 1, 15) +SDANY(ScaleRowDown2Linear_Any_SSSE3, + ScaleRowDown2Linear_SSSE3, + ScaleRowDown2Linear_C, + 2, + 1, + 15) +SDANY(ScaleRowDown2Box_Any_SSSE3, + ScaleRowDown2Box_SSSE3, + ScaleRowDown2Box_C, + 2, + 1, + 15) +SDODD(ScaleRowDown2Box_Odd_SSSE3, + ScaleRowDown2Box_SSSE3, + ScaleRowDown2Box_Odd_C, + 2, + 1, + 15) #endif #ifdef HAS_SCALEROWDOWN2_AVX2 SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31) -SDANY(ScaleRowDown2Linear_Any_AVX2, ScaleRowDown2Linear_AVX2, - ScaleRowDown2Linear_C, 2, 1, 31) -SDANY(ScaleRowDown2Box_Any_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_C, - 2, 1, 31) -SDODD(ScaleRowDown2Box_Odd_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_Odd_C, - 2, 1, 31) +SDANY(ScaleRowDown2Linear_Any_AVX2, + ScaleRowDown2Linear_AVX2, + ScaleRowDown2Linear_C, + 2, + 1, + 31) +SDANY(ScaleRowDown2Box_Any_AVX2, + ScaleRowDown2Box_AVX2, + ScaleRowDown2Box_C, + 2, + 1, + 31) +SDODD(ScaleRowDown2Box_Odd_AVX2, + ScaleRowDown2Box_AVX2, + ScaleRowDown2Box_Odd_C, + 2, + 1, + 31) #endif #ifdef HAS_SCALEROWDOWN2_NEON SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15) -SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON, - ScaleRowDown2Linear_C, 2, 1, 15) -SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON, - ScaleRowDown2Box_C, 2, 1, 15) -SDODD(ScaleRowDown2Box_Odd_NEON, ScaleRowDown2Box_NEON, - ScaleRowDown2Box_Odd_C, 2, 1, 15) +SDANY(ScaleRowDown2Linear_Any_NEON, + ScaleRowDown2Linear_NEON, + ScaleRowDown2Linear_C, + 2, + 1, + 15) +SDANY(ScaleRowDown2Box_Any_NEON, + ScaleRowDown2Box_NEON, + ScaleRowDown2Box_C, + 2, + 1, + 15) +SDODD(ScaleRowDown2Box_Odd_NEON, + ScaleRowDown2Box_NEON, + ScaleRowDown2Box_Odd_C, + 2, + 1, + 15) +#endif +#ifdef HAS_SCALEROWDOWN2_MSA +SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31) +SDANY(ScaleRowDown2Linear_Any_MSA, + ScaleRowDown2Linear_MSA, + ScaleRowDown2Linear_C, + 2, + 1, + 31) +SDANY(ScaleRowDown2Box_Any_MSA, + ScaleRowDown2Box_MSA, + ScaleRowDown2Box_C, + 2, + 1, + 31) #endif #ifdef HAS_SCALEROWDOWN4_SSSE3 SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7) -SDANY(ScaleRowDown4Box_Any_SSSE3, ScaleRowDown4Box_SSSE3, ScaleRowDown4Box_C, - 4, 1, 7) +SDANY(ScaleRowDown4Box_Any_SSSE3, + ScaleRowDown4Box_SSSE3, + ScaleRowDown4Box_C, + 4, + 1, + 7) #endif #ifdef HAS_SCALEROWDOWN4_AVX2 SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15) -SDANY(ScaleRowDown4Box_Any_AVX2, ScaleRowDown4Box_AVX2, ScaleRowDown4Box_C, - 4, 1, 15) +SDANY(ScaleRowDown4Box_Any_AVX2, + ScaleRowDown4Box_AVX2, + ScaleRowDown4Box_C, + 4, + 1, + 15) #endif #ifdef HAS_SCALEROWDOWN4_NEON SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7) -SDANY(ScaleRowDown4Box_Any_NEON, ScaleRowDown4Box_NEON, ScaleRowDown4Box_C, - 4, 1, 7) +SDANY(ScaleRowDown4Box_Any_NEON, + ScaleRowDown4Box_NEON, + ScaleRowDown4Box_C, + 4, + 1, + 7) +#endif +#ifdef HAS_SCALEROWDOWN4_MSA +SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15) +SDANY(ScaleRowDown4Box_Any_MSA, + ScaleRowDown4Box_MSA, + ScaleRowDown4Box_C, + 4, + 1, + 15) #endif #ifdef HAS_SCALEROWDOWN34_SSSE3 -SDANY(ScaleRowDown34_Any_SSSE3, ScaleRowDown34_SSSE3, - ScaleRowDown34_C, 4 / 3, 1, 23) -SDANY(ScaleRowDown34_0_Box_Any_SSSE3, ScaleRowDown34_0_Box_SSSE3, - ScaleRowDown34_0_Box_C, 4 / 3, 1, 23) -SDANY(ScaleRowDown34_1_Box_Any_SSSE3, ScaleRowDown34_1_Box_SSSE3, - ScaleRowDown34_1_Box_C, 4 / 3, 1, 23) +SDANY(ScaleRowDown34_Any_SSSE3, + ScaleRowDown34_SSSE3, + ScaleRowDown34_C, + 4 / 3, + 1, + 23) +SDANY(ScaleRowDown34_0_Box_Any_SSSE3, + ScaleRowDown34_0_Box_SSSE3, + ScaleRowDown34_0_Box_C, + 4 / 3, + 1, + 23) +SDANY(ScaleRowDown34_1_Box_Any_SSSE3, + ScaleRowDown34_1_Box_SSSE3, + ScaleRowDown34_1_Box_C, + 4 / 3, + 1, + 23) #endif #ifdef HAS_SCALEROWDOWN34_NEON -SDANY(ScaleRowDown34_Any_NEON, ScaleRowDown34_NEON, - ScaleRowDown34_C, 4 / 3, 1, 23) -SDANY(ScaleRowDown34_0_Box_Any_NEON, ScaleRowDown34_0_Box_NEON, - ScaleRowDown34_0_Box_C, 4 / 3, 1, 23) -SDANY(ScaleRowDown34_1_Box_Any_NEON, ScaleRowDown34_1_Box_NEON, - ScaleRowDown34_1_Box_C, 4 / 3, 1, 23) +SDANY(ScaleRowDown34_Any_NEON, + ScaleRowDown34_NEON, + ScaleRowDown34_C, + 4 / 3, + 1, + 23) +SDANY(ScaleRowDown34_0_Box_Any_NEON, + ScaleRowDown34_0_Box_NEON, + ScaleRowDown34_0_Box_C, + 4 / 3, + 1, + 23) +SDANY(ScaleRowDown34_1_Box_Any_NEON, + ScaleRowDown34_1_Box_NEON, + ScaleRowDown34_1_Box_C, + 4 / 3, + 1, + 23) +#endif +#ifdef HAS_SCALEROWDOWN34_MSA +SDANY(ScaleRowDown34_Any_MSA, + ScaleRowDown34_MSA, + ScaleRowDown34_C, + 4 / 3, + 1, + 47) +SDANY(ScaleRowDown34_0_Box_Any_MSA, + ScaleRowDown34_0_Box_MSA, + ScaleRowDown34_0_Box_C, + 4 / 3, + 1, + 47) +SDANY(ScaleRowDown34_1_Box_Any_MSA, + ScaleRowDown34_1_Box_MSA, + ScaleRowDown34_1_Box_C, + 4 / 3, + 1, + 47) #endif #ifdef HAS_SCALEROWDOWN38_SSSE3 -SDANY(ScaleRowDown38_Any_SSSE3, ScaleRowDown38_SSSE3, - ScaleRowDown38_C, 8 / 3, 1, 11) -SDANY(ScaleRowDown38_3_Box_Any_SSSE3, ScaleRowDown38_3_Box_SSSE3, - ScaleRowDown38_3_Box_C, 8 / 3, 1, 5) -SDANY(ScaleRowDown38_2_Box_Any_SSSE3, ScaleRowDown38_2_Box_SSSE3, - ScaleRowDown38_2_Box_C, 8 / 3, 1, 5) +SDANY(ScaleRowDown38_Any_SSSE3, + ScaleRowDown38_SSSE3, + ScaleRowDown38_C, + 8 / 3, + 1, + 11) +SDANY(ScaleRowDown38_3_Box_Any_SSSE3, + ScaleRowDown38_3_Box_SSSE3, + ScaleRowDown38_3_Box_C, + 8 / 3, + 1, + 5) +SDANY(ScaleRowDown38_2_Box_Any_SSSE3, + ScaleRowDown38_2_Box_SSSE3, + ScaleRowDown38_2_Box_C, + 8 / 3, + 1, + 5) #endif #ifdef HAS_SCALEROWDOWN38_NEON -SDANY(ScaleRowDown38_Any_NEON, ScaleRowDown38_NEON, - ScaleRowDown38_C, 8 / 3, 1, 11) -SDANY(ScaleRowDown38_3_Box_Any_NEON, ScaleRowDown38_3_Box_NEON, - ScaleRowDown38_3_Box_C, 8 / 3, 1, 11) -SDANY(ScaleRowDown38_2_Box_Any_NEON, ScaleRowDown38_2_Box_NEON, - ScaleRowDown38_2_Box_C, 8 / 3, 1, 11) +SDANY(ScaleRowDown38_Any_NEON, + ScaleRowDown38_NEON, + ScaleRowDown38_C, + 8 / 3, + 1, + 11) +SDANY(ScaleRowDown38_3_Box_Any_NEON, + ScaleRowDown38_3_Box_NEON, + ScaleRowDown38_3_Box_C, + 8 / 3, + 1, + 11) +SDANY(ScaleRowDown38_2_Box_Any_NEON, + ScaleRowDown38_2_Box_NEON, + ScaleRowDown38_2_Box_C, + 8 / 3, + 1, + 11) +#endif +#ifdef HAS_SCALEROWDOWN38_MSA +SDANY(ScaleRowDown38_Any_MSA, + ScaleRowDown38_MSA, + ScaleRowDown38_C, + 8 / 3, + 1, + 11) +SDANY(ScaleRowDown38_3_Box_Any_MSA, + ScaleRowDown38_3_Box_MSA, + ScaleRowDown38_3_Box_C, + 8 / 3, + 1, + 11) +SDANY(ScaleRowDown38_2_Box_Any_MSA, + ScaleRowDown38_2_Box_MSA, + ScaleRowDown38_2_Box_C, + 8 / 3, + 1, + 11) #endif #ifdef HAS_SCALEARGBROWDOWN2_SSE2 -SDANY(ScaleARGBRowDown2_Any_SSE2, ScaleARGBRowDown2_SSE2, - ScaleARGBRowDown2_C, 2, 4, 3) -SDANY(ScaleARGBRowDown2Linear_Any_SSE2, ScaleARGBRowDown2Linear_SSE2, - ScaleARGBRowDown2Linear_C, 2, 4, 3) -SDANY(ScaleARGBRowDown2Box_Any_SSE2, ScaleARGBRowDown2Box_SSE2, - ScaleARGBRowDown2Box_C, 2, 4, 3) +SDANY(ScaleARGBRowDown2_Any_SSE2, + ScaleARGBRowDown2_SSE2, + ScaleARGBRowDown2_C, + 2, + 4, + 3) +SDANY(ScaleARGBRowDown2Linear_Any_SSE2, + ScaleARGBRowDown2Linear_SSE2, + ScaleARGBRowDown2Linear_C, + 2, + 4, + 3) +SDANY(ScaleARGBRowDown2Box_Any_SSE2, + ScaleARGBRowDown2Box_SSE2, + ScaleARGBRowDown2Box_C, + 2, + 4, + 3) #endif #ifdef HAS_SCALEARGBROWDOWN2_NEON -SDANY(ScaleARGBRowDown2_Any_NEON, ScaleARGBRowDown2_NEON, - ScaleARGBRowDown2_C, 2, 4, 7) -SDANY(ScaleARGBRowDown2Linear_Any_NEON, ScaleARGBRowDown2Linear_NEON, - ScaleARGBRowDown2Linear_C, 2, 4, 7) -SDANY(ScaleARGBRowDown2Box_Any_NEON, ScaleARGBRowDown2Box_NEON, - ScaleARGBRowDown2Box_C, 2, 4, 7) +SDANY(ScaleARGBRowDown2_Any_NEON, + ScaleARGBRowDown2_NEON, + ScaleARGBRowDown2_C, + 2, + 4, + 7) +SDANY(ScaleARGBRowDown2Linear_Any_NEON, + ScaleARGBRowDown2Linear_NEON, + ScaleARGBRowDown2Linear_C, + 2, + 4, + 7) +SDANY(ScaleARGBRowDown2Box_Any_NEON, + ScaleARGBRowDown2Box_NEON, + ScaleARGBRowDown2Box_C, + 2, + 4, + 7) +#endif +#ifdef HAS_SCALEARGBROWDOWN2_MSA +SDANY(ScaleARGBRowDown2_Any_MSA, + ScaleARGBRowDown2_MSA, + ScaleARGBRowDown2_C, + 2, + 4, + 3) +SDANY(ScaleARGBRowDown2Linear_Any_MSA, + ScaleARGBRowDown2Linear_MSA, + ScaleARGBRowDown2Linear_C, + 2, + 4, + 3) +SDANY(ScaleARGBRowDown2Box_Any_MSA, + ScaleARGBRowDown2Box_MSA, + ScaleARGBRowDown2Box_C, + 2, + 4, + 3) #endif #undef SDANY // Scale down by even scale factor. -#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \ - void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, int src_stepx, \ - uint8* dst_ptr, int dst_width) { \ - int r = (int)((unsigned int)dst_width % (MASK + 1)); \ - int n = dst_width - r; \ - if (n > 0) { \ - SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \ - } \ - SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, \ - src_stepx, dst_ptr + n * BPP, r); \ - } +#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \ + uint8_t* dst_ptr, int dst_width) { \ + int r = dst_width & MASK; \ + int n = dst_width & ~MASK; \ + if (n > 0) { \ + SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \ + } \ + SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \ + dst_ptr + n * BPP, r); \ + } #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2 -SDAANY(ScaleARGBRowDownEven_Any_SSE2, ScaleARGBRowDownEven_SSE2, - ScaleARGBRowDownEven_C, 4, 3) -SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, ScaleARGBRowDownEvenBox_SSE2, - ScaleARGBRowDownEvenBox_C, 4, 3) +SDAANY(ScaleARGBRowDownEven_Any_SSE2, + ScaleARGBRowDownEven_SSE2, + ScaleARGBRowDownEven_C, + 4, + 3) +SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, + ScaleARGBRowDownEvenBox_SSE2, + ScaleARGBRowDownEvenBox_C, + 4, + 3) #endif #ifdef HAS_SCALEARGBROWDOWNEVEN_NEON -SDAANY(ScaleARGBRowDownEven_Any_NEON, ScaleARGBRowDownEven_NEON, - ScaleARGBRowDownEven_C, 4, 3) -SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, ScaleARGBRowDownEvenBox_NEON, - ScaleARGBRowDownEvenBox_C, 4, 3) +SDAANY(ScaleARGBRowDownEven_Any_NEON, + ScaleARGBRowDownEven_NEON, + ScaleARGBRowDownEven_C, + 4, + 3) +SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, + ScaleARGBRowDownEvenBox_NEON, + ScaleARGBRowDownEvenBox_C, + 4, + 3) +#endif +#ifdef HAS_SCALEARGBROWDOWNEVEN_MSA +SDAANY(ScaleARGBRowDownEven_Any_MSA, + ScaleARGBRowDownEven_MSA, + ScaleARGBRowDownEven_C, + 4, + 3) +SDAANY(ScaleARGBRowDownEvenBox_Any_MSA, + ScaleARGBRowDownEvenBox_MSA, + ScaleARGBRowDownEvenBox_C, + 4, + 3) #endif // Add rows box filter scale down. -#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ - void NAMEANY(const uint8* src_ptr, uint16* dst_ptr, int src_width) { \ - int n = src_width & ~MASK; \ - if (n > 0) { \ - SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ - } \ - SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ - } +#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \ + int n = src_width & ~MASK; \ + if (n > 0) { \ + SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ + } \ + SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ + } #ifdef HAS_SCALEADDROW_SSE2 SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15) @@ -208,14 +453,12 @@ SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31) #ifdef HAS_SCALEADDROW_NEON SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) #endif +#ifdef HAS_SCALEADDROW_MSA +SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) +#endif #undef SAANY #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif - - - - - diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_argb.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_argb.cc index 17f51ae9bf81..53a22e8b41ea 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/scale_argb.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_argb.cc @@ -30,20 +30,31 @@ static __inline int Abs(int v) { // ScaleARGB ARGB, 1/2 // This is an optimized version for scaling down a ARGB to 1/2 of // its original size. -static void ScaleARGBDown2(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int dx, int y, int dy, +static void ScaleARGBDown2(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, enum FilterMode filtering) { int j; int row_stride = src_stride * (dy >> 16); - void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) = - filtering == kFilterNone ? ScaleARGBRowDown2_C : - (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C : - ScaleARGBRowDown2Box_C); - assert(dx == 65536 * 2); // Test scale factor of 2. + void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, + uint8_t* dst_argb, int dst_width) = + filtering == kFilterNone + ? ScaleARGBRowDown2_C + : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C + : ScaleARGBRowDown2Box_C); + (void)src_width; + (void)src_height; + (void)dx; + assert(dx == 65536 * 2); // Test scale factor of 2. assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. // Advance to odd row, even column. if (filtering == kFilterBilinear) { @@ -54,25 +65,49 @@ static void ScaleARGBDown2(int src_width, int src_height, #if defined(HAS_SCALEARGBROWDOWN2_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 : - (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 : - ScaleARGBRowDown2Box_Any_SSE2); + ScaleARGBRowDown2 = + filtering == kFilterNone + ? ScaleARGBRowDown2_Any_SSE2 + : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 + : ScaleARGBRowDown2Box_Any_SSE2); if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 : - (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 : - ScaleARGBRowDown2Box_SSE2); + ScaleARGBRowDown2 = + filtering == kFilterNone + ? ScaleARGBRowDown2_SSE2 + : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 + : ScaleARGBRowDown2Box_SSE2); } } #endif #if defined(HAS_SCALEARGBROWDOWN2_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON : - (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON : - ScaleARGBRowDown2Box_Any_NEON); + ScaleARGBRowDown2 = + filtering == kFilterNone + ? ScaleARGBRowDown2_Any_NEON + : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON + : ScaleARGBRowDown2Box_Any_NEON); if (IS_ALIGNED(dst_width, 8)) { - ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON : - (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON : - ScaleARGBRowDown2Box_NEON); + ScaleARGBRowDown2 = + filtering == kFilterNone + ? ScaleARGBRowDown2_NEON + : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON + : ScaleARGBRowDown2Box_NEON); + } + } +#endif +#if defined(HAS_SCALEARGBROWDOWN2_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ScaleARGBRowDown2 = + filtering == kFilterNone + ? ScaleARGBRowDown2_Any_MSA + : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MSA + : ScaleARGBRowDown2Box_Any_MSA); + if (IS_ALIGNED(dst_width, 4)) { + ScaleARGBRowDown2 = + filtering == kFilterNone + ? ScaleARGBRowDown2_MSA + : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MSA + : ScaleARGBRowDown2Box_MSA); } } #endif @@ -90,21 +125,32 @@ static void ScaleARGBDown2(int src_width, int src_height, // ScaleARGB ARGB, 1/4 // This is an optimized version for scaling down a ARGB to 1/4 of // its original size. -static void ScaleARGBDown4Box(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int dx, int y, int dy) { +static void ScaleARGBDown4Box(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy) { int j; // Allocate 2 rows of ARGB. const int kRowSize = (dst_width * 2 * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); int row_stride = src_stride * (dy >> 16); - void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C; + void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, + uint8_t* dst_argb, int dst_width) = + ScaleARGBRowDown2Box_C; // Advance to odd row, even column. src_argb += (y >> 16) * src_stride + (x >> 16) * 4; - assert(dx == 65536 * 4); // Test scale factor of 4. + (void)src_width; + (void)src_height; + (void)dx; + assert(dx == 65536 * 4); // Test scale factor of 4. assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4. #if defined(HAS_SCALEARGBROWDOWN2_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { @@ -125,8 +171,8 @@ static void ScaleARGBDown4Box(int src_width, int src_height, for (j = 0; j < dst_height; ++j) { ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); - ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, - row + kRowSize, dst_width * 2); + ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize, + dst_width * 2); ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width); src_argb += row_stride; dst_argb += dst_stride; @@ -137,38 +183,57 @@ static void ScaleARGBDown4Box(int src_width, int src_height, // ScaleARGB ARGB Even // This is an optimized version for scaling down a ARGB to even // multiple of its original size. -static void ScaleARGBDownEven(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int dx, int y, int dy, +static void ScaleARGBDownEven(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, enum FilterMode filtering) { int j; int col_step = dx >> 16; int row_stride = (dy >> 16) * src_stride; - void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride, - int src_step, uint8* dst_argb, int dst_width) = + void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride, + int src_step, uint8_t* dst_argb, int dst_width) = filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; + (void)src_width; + (void)src_height; assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_height, 2)); src_argb += (y >> 16) * src_stride + (x >> 16) * 4; #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 : - ScaleARGBRowDownEven_Any_SSE2; + ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 + : ScaleARGBRowDownEven_Any_SSE2; if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 : - ScaleARGBRowDownEven_SSE2; + ScaleARGBRowDownEven = + filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven_SSE2; } } #endif #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON : - ScaleARGBRowDownEven_Any_NEON; + ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON + : ScaleARGBRowDownEven_Any_NEON; if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON : - ScaleARGBRowDownEven_NEON; + ScaleARGBRowDownEven = + filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven_NEON; + } + } +#endif +#if defined(HAS_SCALEARGBROWDOWNEVEN_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA + : ScaleARGBRowDownEven_Any_MSA; + if (IS_ALIGNED(dst_width, 4)) { + ScaleARGBRowDownEven = + filtering ? ScaleARGBRowDownEvenBox_MSA : ScaleARGBRowDownEven_MSA; } } #endif @@ -184,25 +249,32 @@ static void ScaleARGBDownEven(int src_width, int src_height, } // Scale ARGB down with bilinear interpolation. -static void ScaleARGBBilinearDown(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int dx, int y, int dy, +static void ScaleARGBBilinearDown(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, enum FilterMode filtering) { int j; - void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_C; - void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) = + void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, + ptrdiff_t src_stride, int dst_width, + int source_y_fraction) = InterpolateRow_C; + void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb, + int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C; - int64 xlast = x + (int64)(dst_width - 1) * dx; - int64 xl = (dx >= 0) ? x : xlast; - int64 xr = (dx >= 0) ? xlast : x; + int64_t xlast = x + (int64_t)(dst_width - 1) * dx; + int64_t xl = (dx >= 0) ? x : xlast; + int64_t xr = (dx >= 0) ? xlast : x; int clip_src_width; - xl = (xl >> 16) & ~3; // Left edge aligned. - xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels. + xl = (xl >> 16) & ~3; // Left edge aligned. + xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels. xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel. if (xr > src_width) { xr = src_width; @@ -234,12 +306,11 @@ static void ScaleARGBBilinearDown(int src_width, int src_height, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && - IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) { - InterpolateRow = InterpolateRow_Any_DSPR2; - if (IS_ALIGNED(clip_src_width, 4)) { - InterpolateRow = InterpolateRow_DSPR2; +#if defined(HAS_INTERPOLATEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + InterpolateRow = InterpolateRow_Any_MSA; + if (IS_ALIGNED(clip_src_width, 32)) { + InterpolateRow = InterpolateRow_MSA; } } #endif @@ -255,6 +326,14 @@ static void ScaleARGBBilinearDown(int src_width, int src_height, ScaleARGBFilterCols = ScaleARGBFilterCols_NEON; } } +#endif +#if defined(HAS_SCALEARGBFILTERCOLS_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA; + if (IS_ALIGNED(dst_width, 8)) { + ScaleARGBFilterCols = ScaleARGBFilterCols_MSA; + } + } #endif // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // Allocate a row of ARGB. @@ -267,7 +346,7 @@ static void ScaleARGBBilinearDown(int src_width, int src_height, } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8* src = src_argb + yi * src_stride; + const uint8_t* src = src_argb + yi * src_stride; if (filtering == kFilterLinear) { ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); } else { @@ -286,18 +365,25 @@ static void ScaleARGBBilinearDown(int src_width, int src_height, } // Scale ARGB up with bilinear interpolation. -static void ScaleARGBBilinearUp(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int dx, int y, int dy, +static void ScaleARGBBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, enum FilterMode filtering) { int j; - void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_C; - void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) = + void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, + ptrdiff_t src_stride, int dst_width, + int source_y_fraction) = InterpolateRow_C; + void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb, + int dst_width, int x, int dx) = filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; const int max_y = (src_height - 1) << 16; #if defined(HAS_INTERPOLATEROW_SSSE3) @@ -324,15 +410,17 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { - InterpolateRow = InterpolateRow_DSPR2; +#if defined(HAS_INTERPOLATEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + InterpolateRow = InterpolateRow_Any_MSA; + if (IS_ALIGNED(dst_width, 8)) { + InterpolateRow = InterpolateRow_MSA; + } } #endif if (src_width >= 32768) { - ScaleARGBFilterCols = filtering ? - ScaleARGBFilterCols64_C : ScaleARGBCols64_C; + ScaleARGBFilterCols = + filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C; } #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { @@ -347,6 +435,14 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, } } #endif +#if defined(HAS_SCALEARGBFILTERCOLS_MSA) + if (filtering && TestCpuFlag(kCpuHasMSA)) { + ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA; + if (IS_ALIGNED(dst_width, 8)) { + ScaleARGBFilterCols = ScaleARGBFilterCols_MSA; + } + } +#endif #if defined(HAS_SCALEARGBCOLS_SSE2) if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { ScaleARGBFilterCols = ScaleARGBCols_SSE2; @@ -359,6 +455,14 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, ScaleARGBFilterCols = ScaleARGBCols_NEON; } } +#endif +#if defined(HAS_SCALEARGBCOLS_MSA) + if (!filtering && TestCpuFlag(kCpuHasMSA)) { + ScaleARGBFilterCols = ScaleARGBCols_Any_MSA; + if (IS_ALIGNED(dst_width, 4)) { + ScaleARGBFilterCols = ScaleARGBCols_MSA; + } + } #endif if (!filtering && src_width * 2 == dst_width && x < 0x8000) { ScaleARGBFilterCols = ScaleARGBColsUp2_C; @@ -375,13 +479,13 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, { int yi = y >> 16; - const uint8* src = src_argb + yi * src_stride; + const uint8_t* src = src_argb + yi * src_stride; // Allocate 2 rows of ARGB. const int kRowSize = (dst_width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); - uint8* rowptr = row; + uint8_t* rowptr = row; int rowstride = kRowSize; int lasty = yi; @@ -423,24 +527,27 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, #ifdef YUVSCALEUP // Scale YUV to ARGB up with bilinear interpolation. -static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, - int dst_width, int dst_height, +static void ScaleYUVToARGBBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, int src_stride_y, int src_stride_u, int src_stride_v, int dst_stride_argb, - const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int x, int dx, int y, int dy, + const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, enum FilterMode filtering) { int j; - void (*I422ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToARGBRow_C; + void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, int width) = + I422ToARGBRow_C; #if defined(HAS_I422TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422ToARGBRow = I422ToARGBRow_Any_SSSE3; @@ -465,19 +572,18 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, } } #endif -#if defined(HAS_I422TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422ToARGBRow = I422ToARGBRow_DSPR2; +#if defined(HAS_I422TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToARGBRow = I422ToARGBRow_Any_MSA; + if (IS_ALIGNED(src_width, 8)) { + I422ToARGBRow = I422ToARGBRow_MSA; + } } #endif - void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_C; + void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, + ptrdiff_t src_stride, int dst_width, + int source_y_fraction) = InterpolateRow_C; #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; @@ -502,19 +608,21 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - InterpolateRow = InterpolateRow_DSPR2; +#if defined(HAS_INTERPOLATEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + InterpolateRow = InterpolateRow_Any_MSA; + if (IS_ALIGNED(dst_width, 8)) { + InterpolateRow = InterpolateRow_MSA; + } } #endif - void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) = + void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb, + int dst_width, int x, int dx) = filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; if (src_width >= 32768) { - ScaleARGBFilterCols = filtering ? - ScaleARGBFilterCols64_C : ScaleARGBCols64_C; + ScaleARGBFilterCols = + filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C; } #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { @@ -529,6 +637,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, } } #endif +#if defined(HAS_SCALEARGBFILTERCOLS_MSA) + if (filtering && TestCpuFlag(kCpuHasMSA)) { + ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA; + if (IS_ALIGNED(dst_width, 8)) { + ScaleARGBFilterCols = ScaleARGBFilterCols_MSA; + } + } +#endif #if defined(HAS_SCALEARGBCOLS_SSE2) if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { ScaleARGBFilterCols = ScaleARGBCols_SSE2; @@ -541,6 +657,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, ScaleARGBFilterCols = ScaleARGBCols_NEON; } } +#endif +#if defined(HAS_SCALEARGBCOLS_MSA) + if (!filtering && TestCpuFlag(kCpuHasMSA)) { + ScaleARGBFilterCols = ScaleARGBCols_Any_MSA; + if (IS_ALIGNED(dst_width, 4)) { + ScaleARGBFilterCols = ScaleARGBCols_MSA; + } + } #endif if (!filtering && src_width * 2 == dst_width && x < 0x8000) { ScaleARGBFilterCols = ScaleARGBColsUp2_C; @@ -558,9 +682,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. int yi = y >> 16; int uv_yi = yi >> kYShift; - const uint8* src_row_y = src_y + yi * src_stride_y; - const uint8* src_row_u = src_u + uv_yi * src_stride_u; - const uint8* src_row_v = src_v + uv_yi * src_stride_v; + const uint8_t* src_row_y = src_y + yi * src_stride_y; + const uint8_t* src_row_u = src_u + uv_yi * src_stride_u; + const uint8_t* src_row_v = src_v + uv_yi * src_stride_v; // Allocate 2 rows of ARGB. const int kRowSize = (dst_width * 4 + 31) & ~31; @@ -569,7 +693,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, // Allocate 1 row of ARGB for source conversion. align_buffer_64(argb_row, src_width * 4); - uint8* rowptr = row; + uint8_t* rowptr = row; int rowstride = kRowSize; int lasty = yi; @@ -635,15 +759,23 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, // of x and dx is the integer part of the source position and // the lower 16 bits are the fixed decimal part. -static void ScaleARGBSimple(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int dx, int y, int dy) { +static void ScaleARGBSimple(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy) { int j; - void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) = + void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb, + int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C; + (void)src_height; #if defined(HAS_SCALEARGBCOLS_SSE2) if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { ScaleARGBCols = ScaleARGBCols_SSE2; @@ -656,6 +788,14 @@ static void ScaleARGBSimple(int src_width, int src_height, ScaleARGBCols = ScaleARGBCols_NEON; } } +#endif +#if defined(HAS_SCALEARGBCOLS_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ScaleARGBCols = ScaleARGBCols_Any_MSA; + if (IS_ALIGNED(dst_width, 4)) { + ScaleARGBCols = ScaleARGBCols_MSA; + } + } #endif if (src_width * 2 == dst_width && x < 0x8000) { ScaleARGBCols = ScaleARGBColsUp2_C; @@ -667,8 +807,8 @@ static void ScaleARGBSimple(int src_width, int src_height, } for (j = 0; j < dst_height; ++j) { - ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, - dst_width, x, dx); + ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x, + dx); dst_argb += dst_stride; y += dy; } @@ -677,11 +817,18 @@ static void ScaleARGBSimple(int src_width, int src_height, // ScaleARGB a ARGB. // This function in turn calls a scaling function // suitable for handling the desired resolutions. -static void ScaleARGB(const uint8* src, int src_stride, - int src_width, int src_height, - uint8* dst, int dst_stride, - int dst_width, int dst_height, - int clip_x, int clip_y, int clip_width, int clip_height, +static void ScaleARGB(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, + int clip_x, + int clip_y, + int clip_width, + int clip_height, enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -690,8 +837,7 @@ static void ScaleARGB(const uint8* src, int src_stride, int dy = 0; // ARGB does not support box filter yet, but allow the user to pass it. // Simplify filtering when possible. - filtering = ScaleFilterReduce(src_width, src_height, - dst_width, dst_height, + filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); // Negative src_height means invert the image. @@ -700,17 +846,17 @@ static void ScaleARGB(const uint8* src, int src_stride, src = src + (src_height - 1) * src_stride; src_stride = -src_stride; } - ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, - &x, &y, &dx, &dy); + ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, + &dx, &dy); src_width = Abs(src_width); if (clip_x) { - int64 clipf = (int64)(clip_x) * dx; + int64_t clipf = (int64_t)(clip_x)*dx; x += (clipf & 0xffff); src += (clipf >> 16) * 4; dst += clip_x * 4; } if (clip_y) { - int64 clipf = (int64)(clip_y) * dy; + int64_t clipf = (int64_t)(clip_y)*dy; y += (clipf & 0xffff); src += (clipf >> 16) * src_stride; dst += clip_y * dst_stride; @@ -725,24 +871,20 @@ static void ScaleARGB(const uint8* src, int src_stride, if (!(dx & 0x10000) && !(dy & 0x10000)) { if (dx == 0x20000) { // Optimized 1/2 downsample. - ScaleARGBDown2(src_width, src_height, - clip_width, clip_height, - src_stride, dst_stride, src, dst, - x, dx, y, dy, filtering); + ScaleARGBDown2(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); return; } if (dx == 0x40000 && filtering == kFilterBox) { // Optimized 1/4 box downsample. - ScaleARGBDown4Box(src_width, src_height, - clip_width, clip_height, - src_stride, dst_stride, src, dst, - x, dx, y, dy); + ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy); return; } - ScaleARGBDownEven(src_width, src_height, - clip_width, clip_height, - src_stride, dst_stride, src, dst, - x, dx, y, dy, filtering); + ScaleARGBDownEven(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); return; } // Optimized odd scale down. ie 3, 5, 7, 9x. @@ -759,96 +901,105 @@ static void ScaleARGB(const uint8* src, int src_stride, } if (dx == 0x10000 && (x & 0xffff) == 0) { // Arbitrary scale vertically, but unscaled vertically. - ScalePlaneVertical(src_height, - clip_width, clip_height, - src_stride, dst_stride, src, dst, - x, y, dy, 4, filtering); + ScalePlaneVertical(src_height, clip_width, clip_height, src_stride, + dst_stride, src, dst, x, y, dy, 4, filtering); return; } if (filtering && dy < 65536) { - ScaleARGBBilinearUp(src_width, src_height, - clip_width, clip_height, - src_stride, dst_stride, src, dst, - x, dx, y, dy, filtering); + ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); return; } if (filtering) { - ScaleARGBBilinearDown(src_width, src_height, - clip_width, clip_height, - src_stride, dst_stride, src, dst, - x, dx, y, dy, filtering); + ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height, + src_stride, dst_stride, src, dst, x, dx, y, dy, + filtering); return; } - ScaleARGBSimple(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, - x, dx, y, dy); + ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride, + dst_stride, src, dst, x, dx, y, dy); } LIBYUV_API -int ARGBScaleClip(const uint8* src_argb, int src_stride_argb, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, - int clip_x, int clip_y, int clip_width, int clip_height, +int ARGBScaleClip(const uint8_t* src_argb, + int src_stride_argb, + int src_width, + int src_height, + uint8_t* dst_argb, + int dst_stride_argb, + int dst_width, + int dst_height, + int clip_x, + int clip_y, + int clip_width, + int clip_height, enum FilterMode filtering) { - if (!src_argb || src_width == 0 || src_height == 0 || - !dst_argb || dst_width <= 0 || dst_height <= 0 || - clip_x < 0 || clip_y < 0 || + if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb || + dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 || clip_width > 32768 || clip_height > 32768 || (clip_x + clip_width) > dst_width || (clip_y + clip_height) > dst_height) { return -1; } - ScaleARGB(src_argb, src_stride_argb, src_width, src_height, - dst_argb, dst_stride_argb, dst_width, dst_height, - clip_x, clip_y, clip_width, clip_height, filtering); + ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, + dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width, + clip_height, filtering); return 0; } // Scale an ARGB image. LIBYUV_API -int ARGBScale(const uint8* src_argb, int src_stride_argb, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, +int ARGBScale(const uint8_t* src_argb, + int src_stride_argb, + int src_width, + int src_height, + uint8_t* dst_argb, + int dst_stride_argb, + int dst_width, + int dst_height, enum FilterMode filtering) { - if (!src_argb || src_width == 0 || src_height == 0 || - src_width > 32768 || src_height > 32768 || - !dst_argb || dst_width <= 0 || dst_height <= 0) { + if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 || + src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) { return -1; } - ScaleARGB(src_argb, src_stride_argb, src_width, src_height, - dst_argb, dst_stride_argb, dst_width, dst_height, - 0, 0, dst_width, dst_height, filtering); + ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, + dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height, + filtering); return 0; } // Scale with YUV conversion to ARGB and clipping. LIBYUV_API -int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint32 src_fourcc, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - uint32 dst_fourcc, - int dst_width, int dst_height, - int clip_x, int clip_y, int clip_width, int clip_height, +int YUVToARGBScaleClip(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint32_t src_fourcc, + int src_width, + int src_height, + uint8_t* dst_argb, + int dst_stride_argb, + uint32_t dst_fourcc, + int dst_width, + int dst_height, + int clip_x, + int clip_y, + int clip_width, + int clip_height, enum FilterMode filtering) { - uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4); + uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4); int r; - I420ToARGB(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - argb_buffer, src_width * 4, - src_width, src_height); + (void)src_fourcc; // TODO(fbarchard): implement and/or assert. + (void)dst_fourcc; + I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, + argb_buffer, src_width * 4, src_width, src_height); - r = ARGBScaleClip(argb_buffer, src_width * 4, - src_width, src_height, - dst_argb, dst_stride_argb, - dst_width, dst_height, - clip_x, clip_y, clip_width, clip_height, - filtering); + r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb, + dst_stride_argb, dst_width, dst_height, clip_x, clip_y, + clip_width, clip_height, filtering); free(argb_buffer); return r; } diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_common.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_common.cc index 3507aa4d9ff0..b28d7da41fce 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/scale_common.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_common.cc @@ -28,9 +28,12 @@ static __inline int Abs(int v) { } // CPU agnostic row functions -void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { +void ScaleRowDown2_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { int x; + (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = src_ptr[1]; dst[1] = src_ptr[3]; @@ -42,9 +45,12 @@ void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride, } } -void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width) { +void ScaleRowDown2_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width) { int x; + (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = src_ptr[1]; dst[1] = src_ptr[3]; @@ -56,10 +62,13 @@ void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride, } } -void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - const uint8* s = src_ptr; +void ScaleRowDown2Linear_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + const uint8_t* s = src_ptr; int x; + (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (s[0] + s[1] + 1) >> 1; dst[1] = (s[2] + s[3] + 1) >> 1; @@ -71,10 +80,13 @@ void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, } } -void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width) { - const uint16* s = src_ptr; +void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width) { + const uint16_t* s = src_ptr; int x; + (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (s[0] + s[1] + 1) >> 1; dst[1] = (s[2] + s[3] + 1) >> 1; @@ -86,10 +98,12 @@ void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride, } } -void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - const uint8* s = src_ptr; - const uint8* t = src_ptr + src_stride; +void ScaleRowDown2Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; @@ -103,10 +117,12 @@ void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, } } -void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - const uint8* s = src_ptr; - const uint8* t = src_ptr + src_stride; +void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; int x; dst_width -= 1; for (x = 0; x < dst_width - 1; x += 2) { @@ -125,10 +141,12 @@ void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride, dst[0] = (s[0] + t[0] + 1) >> 1; } -void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width) { - const uint16* s = src_ptr; - const uint16* t = src_ptr + src_stride; +void ScaleRowDown2Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width) { + const uint16_t* s = src_ptr; + const uint16_t* t = src_ptr + src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; @@ -142,9 +160,12 @@ void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, } } -void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { +void ScaleRowDown4_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { int x; + (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = src_ptr[2]; dst[1] = src_ptr[6]; @@ -156,9 +177,12 @@ void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, } } -void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width) { +void ScaleRowDown4_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width) { int x; + (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = src_ptr[2]; dst[1] = src_ptr[6]; @@ -170,81 +194,88 @@ void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride, } } -void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { +void ScaleRowDown4Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { intptr_t stride = src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + - src_ptr[stride + 0] + src_ptr[stride + 1] + - src_ptr[stride + 2] + src_ptr[stride + 3] + - src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + - src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + - src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + - src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + - 8) >> 4; + src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + + src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + + src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + + src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + + src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + + src_ptr[stride * 3 + 3] + 8) >> + 4; dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] + - src_ptr[stride + 4] + src_ptr[stride + 5] + - src_ptr[stride + 6] + src_ptr[stride + 7] + - src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] + - src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] + - src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] + - src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] + - 8) >> 4; + src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] + + src_ptr[stride + 7] + src_ptr[stride * 2 + 4] + + src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] + + src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] + + src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] + + src_ptr[stride * 3 + 7] + 8) >> + 4; dst += 2; src_ptr += 8; } if (dst_width & 1) { dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + - src_ptr[stride + 0] + src_ptr[stride + 1] + - src_ptr[stride + 2] + src_ptr[stride + 3] + - src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + - src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + - src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + - src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + - 8) >> 4; + src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + + src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + + src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + + src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + + src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + + src_ptr[stride * 3 + 3] + 8) >> + 4; } } -void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width) { +void ScaleRowDown4Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width) { intptr_t stride = src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + - src_ptr[stride + 0] + src_ptr[stride + 1] + - src_ptr[stride + 2] + src_ptr[stride + 3] + - src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + - src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + - src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + - src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + - 8) >> 4; + src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + + src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + + src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + + src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + + src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + + src_ptr[stride * 3 + 3] + 8) >> + 4; dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] + - src_ptr[stride + 4] + src_ptr[stride + 5] + - src_ptr[stride + 6] + src_ptr[stride + 7] + - src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] + - src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] + - src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] + - src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] + - 8) >> 4; + src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] + + src_ptr[stride + 7] + src_ptr[stride * 2 + 4] + + src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] + + src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] + + src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] + + src_ptr[stride * 3 + 7] + 8) >> + 4; dst += 2; src_ptr += 8; } if (dst_width & 1) { dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + - src_ptr[stride + 0] + src_ptr[stride + 1] + - src_ptr[stride + 2] + src_ptr[stride + 3] + - src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + - src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + - src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + - src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + - 8) >> 4; + src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + + src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + + src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + + src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + + src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + + src_ptr[stride * 3 + 3] + 8) >> + 4; } } -void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { +void ScaleRowDown34_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { int x; + (void)src_stride; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { dst[0] = src_ptr[0]; @@ -255,9 +286,12 @@ void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, } } -void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width) { +void ScaleRowDown34_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width) { int x; + (void)src_stride; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { dst[0] = src_ptr[0]; @@ -269,19 +303,21 @@ void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride, } // Filter rows 0 and 1 together, 3 : 1 -void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width) { - const uint8* s = src_ptr; - const uint8* t = src_ptr + src_stride; +void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* d, + int dst_width) { + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; int x; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { - uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; - uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; - uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; - uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; - uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; - uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; + uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; + uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; + uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; + uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; + uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; + uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; d[0] = (a0 * 3 + b0 + 2) >> 2; d[1] = (a1 * 3 + b1 + 2) >> 2; d[2] = (a2 * 3 + b2 + 2) >> 2; @@ -291,19 +327,21 @@ void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, } } -void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* d, int dst_width) { - const uint16* s = src_ptr; - const uint16* t = src_ptr + src_stride; +void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* d, + int dst_width) { + const uint16_t* s = src_ptr; + const uint16_t* t = src_ptr + src_stride; int x; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { - uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; - uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; - uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; - uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; - uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; - uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; + uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; + uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; + uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; + uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; + uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; + uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; d[0] = (a0 * 3 + b0 + 2) >> 2; d[1] = (a1 * 3 + b1 + 2) >> 2; d[2] = (a2 * 3 + b2 + 2) >> 2; @@ -314,19 +352,21 @@ void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, } // Filter rows 1 and 2 together, 1 : 1 -void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width) { - const uint8* s = src_ptr; - const uint8* t = src_ptr + src_stride; +void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* d, + int dst_width) { + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; int x; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { - uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; - uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; - uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; - uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; - uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; - uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; + uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; + uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; + uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; + uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; + uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; + uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; d[0] = (a0 + b0 + 1) >> 1; d[1] = (a1 + b1 + 1) >> 1; d[2] = (a2 + b2 + 1) >> 1; @@ -336,19 +376,21 @@ void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, } } -void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* d, int dst_width) { - const uint16* s = src_ptr; - const uint16* t = src_ptr + src_stride; +void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* d, + int dst_width) { + const uint16_t* s = src_ptr; + const uint16_t* t = src_ptr + src_stride; int x; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { - uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; - uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; - uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; - uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; - uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; - uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; + uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; + uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; + uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; + uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; + uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; + uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; d[0] = (a0 + b0 + 1) >> 1; d[1] = (a1 + b1 + 1) >> 1; d[2] = (a2 + b2 + 1) >> 1; @@ -359,8 +401,11 @@ void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, } // Scales a single row of pixels using point sampling. -void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { +void ScaleCols_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx) { int j; for (j = 0; j < dst_width - 1; j += 2) { dst_ptr[0] = src_ptr[x >> 16]; @@ -374,8 +419,11 @@ void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, } } -void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int x, int dx) { +void ScaleCols_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + int dst_width, + int x, + int dx) { int j; for (j = 0; j < dst_width - 1; j += 2) { dst_ptr[0] = src_ptr[x >> 16]; @@ -390,9 +438,14 @@ void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr, } // Scales a single row of pixels up by 2x using point sampling. -void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { +void ScaleColsUp2_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx) { int j; + (void)x; + (void)dx; for (j = 0; j < dst_width - 1; j += 2) { dst_ptr[1] = dst_ptr[0] = src_ptr[0]; src_ptr += 1; @@ -403,9 +456,14 @@ void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, } } -void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int x, int dx) { +void ScaleColsUp2_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + int dst_width, + int x, + int dx) { int j; + (void)x; + (void)dx; for (j = 0; j < dst_width - 1; j += 2) { dst_ptr[1] = dst_ptr[0] = src_ptr[0]; src_ptr += 1; @@ -418,16 +476,19 @@ void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr, // (1-f)a + fb can be replaced with a + f(b-a) #if defined(__arm__) || defined(__aarch64__) -#define BLENDER(a, b, f) (uint8)((int)(a) + \ - ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) +#define BLENDER(a, b, f) \ + (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) #else -// inteluses 7 bit math with rounding. -#define BLENDER(a, b, f) (uint8)((int)(a) + \ - (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7)) +// Intel uses 7 bit math with rounding. +#define BLENDER(a, b, f) \ + (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7)) #endif -void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { +void ScaleFilterCols_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx) { int j; for (j = 0; j < dst_width - 1; j += 2) { int xi = x >> 16; @@ -450,12 +511,15 @@ void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, } } -void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x32, int dx) { - int64 x = (int64)(x32); +void ScaleFilterCols64_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x32, + int dx) { + int64_t x = (int64_t)(x32); int j; for (j = 0; j < dst_width - 1; j += 2) { - int64 xi = x >> 16; + int64_t xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); @@ -468,7 +532,7 @@ void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, dst_ptr += 2; } if (dst_width & 1) { - int64 xi = x >> 16; + int64_t xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); @@ -476,12 +540,15 @@ void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, } #undef BLENDER -// Same as 8 bit arm blender but return is cast to uint16 -#define BLENDER(a, b, f) (uint16)((int)(a) + \ - ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) +// Same as 8 bit arm blender but return is cast to uint16_t +#define BLENDER(a, b, f) \ + (uint16_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) -void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int x, int dx) { +void ScaleFilterCols_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + int dst_width, + int x, + int dx) { int j; for (j = 0; j < dst_width - 1; j += 2) { int xi = x >> 16; @@ -504,12 +571,15 @@ void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr, } } -void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int x32, int dx) { - int64 x = (int64)(x32); +void ScaleFilterCols64_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + int dst_width, + int x32, + int dx) { + int64_t x = (int64_t)(x32); int j; for (j = 0; j < dst_width - 1; j += 2) { - int64 xi = x >> 16; + int64_t xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); @@ -522,7 +592,7 @@ void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr, dst_ptr += 2; } if (dst_width & 1) { - int64 xi = x >> 16; + int64_t xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); @@ -530,9 +600,12 @@ void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr, } #undef BLENDER -void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { +void ScaleRowDown38_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { int x; + (void)src_stride; assert(dst_width % 3 == 0); for (x = 0; x < dst_width; x += 3) { dst[0] = src_ptr[0]; @@ -543,9 +616,12 @@ void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, } } -void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width) { +void ScaleRowDown38_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width) { int x; + (void)src_stride; assert(dst_width % 3 == 0); for (x = 0; x < dst_width; x += 3) { dst[0] = src_ptr[0]; @@ -557,100 +633,118 @@ void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride, } // 8x3 -> 3x1 -void ScaleRowDown38_3_Box_C(const uint8* src_ptr, +void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { + uint8_t* dst_ptr, + int dst_width) { intptr_t stride = src_stride; int i; assert((dst_width % 3 == 0) && (dst_width > 0)); for (i = 0; i < dst_width; i += 3) { - dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + - src_ptr[stride + 0] + src_ptr[stride + 1] + - src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + - src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * - (65536 / 9) >> 16; - dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + - src_ptr[stride + 3] + src_ptr[stride + 4] + - src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + - src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * - (65536 / 9) >> 16; - dst_ptr[2] = (src_ptr[6] + src_ptr[7] + - src_ptr[stride + 6] + src_ptr[stride + 7] + - src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * - (65536 / 6) >> 16; + dst_ptr[0] = + (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + + src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + + src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * + (65536 / 9) >> + 16; + dst_ptr[1] = + (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + + src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + + src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * + (65536 / 9) >> + 16; + dst_ptr[2] = + (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] + + src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * + (65536 / 6) >> + 16; src_ptr += 8; dst_ptr += 3; } } -void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr, +void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width) { + uint16_t* dst_ptr, + int dst_width) { intptr_t stride = src_stride; int i; assert((dst_width % 3 == 0) && (dst_width > 0)); for (i = 0; i < dst_width; i += 3) { - dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + - src_ptr[stride + 0] + src_ptr[stride + 1] + - src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + - src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * - (65536 / 9) >> 16; - dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + - src_ptr[stride + 3] + src_ptr[stride + 4] + - src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + - src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * - (65536 / 9) >> 16; - dst_ptr[2] = (src_ptr[6] + src_ptr[7] + - src_ptr[stride + 6] + src_ptr[stride + 7] + - src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * - (65536 / 6) >> 16; + dst_ptr[0] = + (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + + src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + + src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * + (65536 / 9) >> + 16; + dst_ptr[1] = + (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + + src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + + src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * + (65536 / 9) >> + 16; + dst_ptr[2] = + (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] + + src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * + (65536 / 6) >> + 16; src_ptr += 8; dst_ptr += 3; } } // 8x2 -> 3x1 -void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { intptr_t stride = src_stride; int i; assert((dst_width % 3 == 0) && (dst_width > 0)); for (i = 0; i < dst_width; i += 3) { - dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + - src_ptr[stride + 0] + src_ptr[stride + 1] + - src_ptr[stride + 2]) * (65536 / 6) >> 16; - dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + - src_ptr[stride + 3] + src_ptr[stride + 4] + - src_ptr[stride + 5]) * (65536 / 6) >> 16; - dst_ptr[2] = (src_ptr[6] + src_ptr[7] + - src_ptr[stride + 6] + src_ptr[stride + 7]) * - (65536 / 4) >> 16; + dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + + src_ptr[stride + 1] + src_ptr[stride + 2]) * + (65536 / 6) >> + 16; + dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + + src_ptr[stride + 4] + src_ptr[stride + 5]) * + (65536 / 6) >> + 16; + dst_ptr[2] = + (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) * + (65536 / 4) >> + 16; src_ptr += 8; dst_ptr += 3; } } -void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width) { +void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + int dst_width) { intptr_t stride = src_stride; int i; assert((dst_width % 3 == 0) && (dst_width > 0)); for (i = 0; i < dst_width; i += 3) { - dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + - src_ptr[stride + 0] + src_ptr[stride + 1] + - src_ptr[stride + 2]) * (65536 / 6) >> 16; - dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + - src_ptr[stride + 3] + src_ptr[stride + 4] + - src_ptr[stride + 5]) * (65536 / 6) >> 16; - dst_ptr[2] = (src_ptr[6] + src_ptr[7] + - src_ptr[stride + 6] + src_ptr[stride + 7]) * - (65536 / 4) >> 16; + dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + + src_ptr[stride + 1] + src_ptr[stride + 2]) * + (65536 / 6) >> + 16; + dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + + src_ptr[stride + 4] + src_ptr[stride + 5]) * + (65536 / 6) >> + 16; + dst_ptr[2] = + (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) * + (65536 / 4) >> + 16; src_ptr += 8; dst_ptr += 3; } } -void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) { +void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { int x; assert(src_width > 0); for (x = 0; x < src_width - 1; x += 2) { @@ -664,7 +758,9 @@ void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) { } } -void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) { +void ScaleAddRow_16_C(const uint16_t* src_ptr, + uint32_t* dst_ptr, + int src_width) { int x; assert(src_width > 0); for (x = 0; x < src_width - 1; x += 2) { @@ -678,13 +774,14 @@ void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) { } } -void ScaleARGBRowDown2_C(const uint8* src_argb, +void ScaleARGBRowDown2_C(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); - + uint8_t* dst_argb, + int dst_width) { + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); int x; + (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = src[1]; dst[1] = src[3]; @@ -696,10 +793,12 @@ void ScaleARGBRowDown2_C(const uint8* src_argb, } } -void ScaleARGBRowDown2Linear_C(const uint8* src_argb, +void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { + uint8_t* dst_argb, + int dst_width) { int x; + (void)src_stride; for (x = 0; x < dst_width; ++x) { dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1; dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1; @@ -710,29 +809,37 @@ void ScaleARGBRowDown2Linear_C(const uint8* src_argb, } } -void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { +void ScaleARGBRowDown2Box_C(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { int x; for (x = 0; x < dst_width; ++x) { - dst_argb[0] = (src_argb[0] + src_argb[4] + - src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2; - dst_argb[1] = (src_argb[1] + src_argb[5] + - src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2; - dst_argb[2] = (src_argb[2] + src_argb[6] + - src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2; - dst_argb[3] = (src_argb[3] + src_argb[7] + - src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2; + dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] + + src_argb[src_stride + 4] + 2) >> + 2; + dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] + + src_argb[src_stride + 5] + 2) >> + 2; + dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] + + src_argb[src_stride + 6] + 2) >> + 2; + dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] + + src_argb[src_stride + 7] + 2) >> + 2; src_argb += 8; dst_argb += 4; } } -void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride, +void ScaleARGBRowDownEven_C(const uint8_t* src_argb, + ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); - + uint8_t* dst_argb, + int dst_width) { + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); + (void)src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = src[0]; @@ -745,30 +852,38 @@ void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride, } } -void ScaleARGBRowDownEvenBox_C(const uint8* src_argb, +void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width) { + uint8_t* dst_argb, + int dst_width) { int x; for (x = 0; x < dst_width; ++x) { - dst_argb[0] = (src_argb[0] + src_argb[4] + - src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2; - dst_argb[1] = (src_argb[1] + src_argb[5] + - src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2; - dst_argb[2] = (src_argb[2] + src_argb[6] + - src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2; - dst_argb[3] = (src_argb[3] + src_argb[7] + - src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2; + dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] + + src_argb[src_stride + 4] + 2) >> + 2; + dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] + + src_argb[src_stride + 5] + 2) >> + 2; + dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] + + src_argb[src_stride + 6] + 2) >> + 2; + dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] + + src_argb[src_stride + 7] + 2) >> + 2; src_argb += src_stepx * 4; dst_argb += 4; } } // Scales a single row of pixels using point sampling. -void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); +void ScaleARGBCols_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); int j; for (j = 0; j < dst_width - 1; j += 2) { dst[0] = src[x >> 16]; @@ -782,11 +897,14 @@ void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb, } } -void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x32, int dx) { - int64 x = (int64)(x32); - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); +void ScaleARGBCols64_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x32, + int dx) { + int64_t x = (int64_t)(x32); + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); int j; for (j = 0; j < dst_width - 1; j += 2) { dst[0] = src[x >> 16]; @@ -801,11 +919,16 @@ void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb, } // Scales a single row of pixels up by 2x using point sampling. -void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); +void ScaleARGBColsUp2_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); int j; + (void)x; + (void)dx; for (j = 0; j < dst_width - 1; j += 2) { dst[1] = dst[0] = src[0]; src += 1; @@ -818,23 +941,26 @@ void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb, // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607. // Mimics SSSE3 blender -#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7 -#define BLENDERC(a, b, f, s) (uint32)( \ - BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) -#define BLENDER(a, b, f) \ - BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \ - BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0) +#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7 +#define BLENDERC(a, b, f, s) \ + (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) +#define BLENDER(a, b, f) \ + BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \ + BLENDERC(a, b, f, 0) -void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); +void ScaleARGBFilterCols_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); int j; for (j = 0; j < dst_width - 1; j += 2) { int xi = x >> 16; int xf = (x >> 9) & 0x7f; - uint32 a = src[xi]; - uint32 b = src[xi + 1]; + uint32_t a = src[xi]; + uint32_t b = src[xi + 1]; dst[0] = BLENDER(a, b, xf); x += dx; xi = x >> 16; @@ -848,23 +974,26 @@ void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb, if (dst_width & 1) { int xi = x >> 16; int xf = (x >> 9) & 0x7f; - uint32 a = src[xi]; - uint32 b = src[xi + 1]; + uint32_t a = src[xi]; + uint32_t b = src[xi + 1]; dst[0] = BLENDER(a, b, xf); } } -void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x32, int dx) { - int64 x = (int64)(x32); - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); +void ScaleARGBFilterCols64_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x32, + int dx) { + int64_t x = (int64_t)(x32); + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); int j; for (j = 0; j < dst_width - 1; j += 2) { - int64 xi = x >> 16; + int64_t xi = x >> 16; int xf = (x >> 9) & 0x7f; - uint32 a = src[xi]; - uint32 b = src[xi + 1]; + uint32_t a = src[xi]; + uint32_t b = src[xi + 1]; dst[0] = BLENDER(a, b, xf); x += dx; xi = x >> 16; @@ -876,10 +1005,10 @@ void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, dst += 2; } if (dst_width & 1) { - int64 xi = x >> 16; + int64_t xi = x >> 16; int xf = (x >> 9) & 0x7f; - uint32 a = src[xi]; - uint32 b = src[xi + 1]; + uint32_t a = src[xi]; + uint32_t b = src[xi + 1]; dst[0] = BLENDER(a, b, xf); } } @@ -889,16 +1018,22 @@ void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, // Scale plane vertically with bilinear interpolation. void ScalePlaneVertical(int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int y, int dy, - int bpp, enum FilterMode filtering) { + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int y, + int dy, + int bpp, + enum FilterMode filtering) { // TODO(fbarchard): Allow higher bpp. int dst_width_bytes = dst_width * bpp; - void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_C; + void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, + ptrdiff_t src_stride, int dst_width, + int source_y_fraction) = InterpolateRow_C; const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; int j; assert(bpp >= 1 && bpp <= 4); @@ -930,13 +1065,11 @@ void ScalePlaneVertical(int src_height, } } #endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && - IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { - InterpolateRow = InterpolateRow_Any_DSPR2; - if (IS_ALIGNED(dst_width_bytes, 4)) { - InterpolateRow = InterpolateRow_DSPR2; +#if defined(HAS_INTERPOLATEROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + InterpolateRow = InterpolateRow_Any_MSA; + if (IS_ALIGNED(dst_width_bytes, 32)) { + InterpolateRow = InterpolateRow_MSA; } } #endif @@ -948,23 +1081,29 @@ void ScalePlaneVertical(int src_height, } yi = y >> 16; yf = filtering ? ((y >> 8) & 255) : 0; - InterpolateRow(dst_argb, src_argb + yi * src_stride, - src_stride, dst_width_bytes, yf); + InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride, + dst_width_bytes, yf); dst_argb += dst_stride; y += dy; } } void ScalePlaneVertical_16(int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint16* src_argb, uint16* dst_argb, - int x, int y, int dy, - int wpp, enum FilterMode filtering) { + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_argb, + uint16_t* dst_argb, + int x, + int y, + int dy, + int wpp, + enum FilterMode filtering) { // TODO(fbarchard): Allow higher wpp. int dst_width_words = dst_width * wpp; - void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_16_C; + void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb, + ptrdiff_t src_stride, int dst_width, + int source_y_fraction) = InterpolateRow_16_C; const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; int j; assert(wpp >= 1 && wpp <= 2); @@ -1003,16 +1142,6 @@ void ScalePlaneVertical_16(int src_height, InterpolateRow = InterpolateRow_16_NEON; } } -#endif -#if defined(HAS_INTERPOLATEROW_16_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && - IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { - InterpolateRow = InterpolateRow_Any_16_DSPR2; - if (IS_ALIGNED(dst_width_bytes, 4)) { - InterpolateRow = InterpolateRow_16_DSPR2; - } - } #endif for (j = 0; j < dst_height; ++j) { int yi; @@ -1022,16 +1151,18 @@ void ScalePlaneVertical_16(int src_height, } yi = y >> 16; yf = filtering ? ((y >> 8) & 255) : 0; - InterpolateRow(dst_argb, src_argb + yi * src_stride, - src_stride, dst_width_words, yf); + InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride, + dst_width_words, yf); dst_argb += dst_stride; y += dy; } } // Simplify the filtering based on scale factors. -enum FilterMode ScaleFilterReduce(int src_width, int src_height, - int dst_width, int dst_height, +enum FilterMode ScaleFilterReduce(int src_width, + int src_height, + int dst_width, + int dst_height, enum FilterMode filtering) { if (src_width < 0) { src_width = -src_width; @@ -1073,22 +1204,26 @@ enum FilterMode ScaleFilterReduce(int src_width, int src_height, // Divide num by div and return as 16.16 fixed point result. int FixedDiv_C(int num, int div) { - return (int)(((int64)(num) << 16) / div); + return (int)(((int64_t)(num) << 16) / div); } // Divide num by div and return as 16.16 fixed point result. int FixedDiv1_C(int num, int div) { - return (int)((((int64)(num) << 16) - 0x00010001) / - (div - 1)); + return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1)); } #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s) // Compute slope values for stepping. -void ScaleSlope(int src_width, int src_height, - int dst_width, int dst_height, +void ScaleSlope(int src_width, + int src_height, + int dst_width, + int dst_height, enum FilterMode filtering, - int* x, int* y, int* dx, int* dy) { + int* x, + int* y, + int* dx, + int* dy) { assert(x != NULL); assert(y != NULL); assert(dx != NULL); @@ -1120,7 +1255,7 @@ void ScaleSlope(int src_width, int src_height, *x = 0; } if (dst_height <= src_height) { - *dy = FixedDiv(src_height, dst_height); + *dy = FixedDiv(src_height, dst_height); *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter. } else if (dst_height > 1) { *dy = FixedDiv1(src_height, dst_height); @@ -1153,6 +1288,35 @@ void ScaleSlope(int src_width, int src_height, } #undef CENTERSTART +// Read 8x2 upsample with filtering and write 16x1. +// actually reads an extra pixel, so 9x2. +void ScaleRowUp2_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width) { + const uint16_t* src2 = src_ptr + src_stride; + + int x; + for (x = 0; x < dst_width - 1; x += 2) { + uint16_t p0 = src_ptr[0]; + uint16_t p1 = src_ptr[1]; + uint16_t p2 = src2[0]; + uint16_t p3 = src2[1]; + dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4; + dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4; + ++src_ptr; + ++src2; + dst += 2; + } + if (dst_width & 1) { + uint16_t p0 = src_ptr[0]; + uint16_t p1 = src_ptr[1]; + uint16_t p2 = src2[0]; + uint16_t p3 = src2[1]; + dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4; + } +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_gcc.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_gcc.cc index e2f88544b7f5..312236d2df8e 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/scale_gcc.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_gcc.cc @@ -21,1296 +21,1348 @@ extern "C" { (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) // Offsets for source bytes 0 to 9 -static uvec8 kShuf0 = - { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 }; +static const uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9, + 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12. -static uvec8 kShuf1 = - { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 }; +static const uvec8 kShuf1 = {3, 4, 5, 7, 8, 9, 11, 12, + 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. -static uvec8 kShuf2 = - { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 }; +static const uvec8 kShuf2 = {5, 7, 8, 9, 11, 12, 13, 15, + 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 0 to 10 -static uvec8 kShuf01 = - { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 }; +static const uvec8 kShuf01 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10}; // Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13. -static uvec8 kShuf11 = - { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 }; +static const uvec8 kShuf11 = {2, 3, 4, 5, 5, 6, 6, 7, + 8, 9, 9, 10, 10, 11, 12, 13}; // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. -static uvec8 kShuf21 = - { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 }; +static const uvec8 kShuf21 = {5, 6, 6, 7, 8, 9, 9, 10, + 10, 11, 12, 13, 13, 14, 14, 15}; // Coefficients for source bytes 0 to 10 -static uvec8 kMadd01 = - { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 }; +static const uvec8 kMadd01 = {3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2}; // Coefficients for source bytes 10 to 21 -static uvec8 kMadd11 = - { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 }; +static const uvec8 kMadd11 = {1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1}; // Coefficients for source bytes 21 to 31 -static uvec8 kMadd21 = - { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 }; +static const uvec8 kMadd21 = {2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3}; // Coefficients for source bytes 21 to 31 -static vec16 kRound34 = - { 2, 2, 2, 2, 2, 2, 2, 2 }; +static const vec16 kRound34 = {2, 2, 2, 2, 2, 2, 2, 2}; -static uvec8 kShuf38a = - { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; +static const uvec8 kShuf38a = {0, 3, 6, 8, 11, 14, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128}; -static uvec8 kShuf38b = - { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 }; +static const uvec8 kShuf38b = {128, 128, 128, 128, 128, 128, 0, 3, + 6, 8, 11, 14, 128, 128, 128, 128}; // Arrange words 0,3,6 into 0,1,2 -static uvec8 kShufAc = - { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; +static const uvec8 kShufAc = {0, 1, 6, 7, 12, 13, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128}; // Arrange words 0,3,6 into 3,4,5 -static uvec8 kShufAc3 = - { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 }; +static const uvec8 kShufAc3 = {128, 128, 128, 128, 128, 128, 0, 1, + 6, 7, 12, 13, 128, 128, 128, 128}; // Scaling values for boxes of 3x3 and 2x3 -static uvec16 kScaleAc33 = - { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 }; +static const uvec16 kScaleAc33 = {65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, + 65536 / 9, 65536 / 6, 0, 0}; // Arrange first value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb0 = - { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 }; +static const uvec8 kShufAb0 = {0, 128, 3, 128, 6, 128, 8, 128, + 11, 128, 14, 128, 128, 128, 128, 128}; // Arrange second value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb1 = - { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 }; +static const uvec8 kShufAb1 = {1, 128, 4, 128, 7, 128, 9, 128, + 12, 128, 15, 128, 128, 128, 128, 128}; // Arrange third value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb2 = - { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 }; +static const uvec8 kShufAb2 = {2, 128, 5, 128, 128, 128, 10, 128, + 13, 128, 128, 128, 128, 128, 128, 128}; // Scaling values for boxes of 3x2 and 2x2 -static uvec16 kScaleAb2 = - { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 }; +static const uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, + 65536 / 3, 65536 / 2, 0, 0}; // GCC versions of row functions are verbatim conversions from Visual C. // Generated using gcc disassembly on Visual C object file: // objdump -D yuvscaler.obj >yuvscaler.txt -void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1" - ); +void ScaleRowDown2_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + (void)src_stride; + asm volatile( + // 16 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "psrlw $0x8,%%xmm0 \n" + "psrlw $0x8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1"); } -void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "pcmpeqb %%xmm4,%%xmm4 \n" - "psrlw $0xf,%%xmm4 \n" - "packuswb %%xmm4,%%xmm4 \n" - "pxor %%xmm5,%%xmm5 \n" +void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + (void)src_stride; + asm volatile( + "pcmpeqb %%xmm4,%%xmm4 \n" + "psrlw $0xf,%%xmm4 \n" + "packuswb %%xmm4,%%xmm4 \n" + "pxor %%xmm5,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10, 0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pavgw %%xmm5,%%xmm0 \n" - "pavgw %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pavgw %%xmm5,%%xmm0 \n" + "pavgw %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm4", "xmm5"); } -void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "pcmpeqb %%xmm4,%%xmm4 \n" - "psrlw $0xf,%%xmm4 \n" - "packuswb %%xmm4,%%xmm4 \n" - "pxor %%xmm5,%%xmm5 \n" +void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + asm volatile( + "pcmpeqb %%xmm4,%%xmm4 \n" + "psrlw $0xf,%%xmm4 \n" + "packuswb %%xmm4,%%xmm4 \n" + "pxor %%xmm5,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm3,%%xmm1 \n" - "psrlw $0x1,%%xmm0 \n" - "psrlw $0x1,%%xmm1 \n" - "pavgw %%xmm5,%%xmm0 \n" - "pavgw %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x00(%0,%3,1),%%xmm2 \n" + "movdqu 0x10(%0,%3,1),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "paddw %%xmm2,%%xmm0 \n" + "paddw %%xmm3,%%xmm1 \n" + "psrlw $0x1,%%xmm0 \n" + "psrlw $0x1,%%xmm1 \n" + "pavgw %%xmm5,%%xmm0 \n" + "pavgw %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #ifdef HAS_SCALEROWDOWN2_AVX2 -void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1" - ); +void ScaleRowDown2_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + (void)src_stride; + asm volatile( + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1"); } -void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" - "vpsrlw $0xf,%%ymm4,%%ymm4 \n" - "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" - "vpxor %%ymm5,%%ymm5,%%ymm5 \n" +void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + (void)src_stride; + asm volatile( + "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" + "vpsrlw $0xf,%%ymm4,%%ymm4 \n" + "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" + "vpxor %%ymm5,%%ymm5,%%ymm5 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20, 0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" - "vpavgw %%ymm5,%%ymm0,%%ymm0 \n" - "vpavgw %%ymm5,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" + "vpavgw %%ymm5,%%ymm0,%%ymm0 \n" + "vpavgw %%ymm5,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm4", "xmm5"); } -void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" - "vpsrlw $0xf,%%ymm4,%%ymm4 \n" - "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" - "vpxor %%ymm5,%%ymm5,%%ymm5 \n" +void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + asm volatile( + "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" + "vpsrlw $0xf,%%ymm4,%%ymm4 \n" + "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" + "vpxor %%ymm5,%%ymm5,%%ymm5 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2 - MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3 - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" - "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" - "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" - "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" - "vpsrlw $0x1,%%ymm0,%%ymm0 \n" - "vpsrlw $0x1,%%ymm1,%%ymm1 \n" - "vpavgw %%ymm5,%%ymm0,%%ymm0 \n" - "vpavgw %%ymm5,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x00(%0,%3,1),%%ymm2 \n" + "vmovdqu 0x20(%0,%3,1),%%ymm3 \n" + "lea 0x40(%0),%0 \n" + "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" + "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" + "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" + "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" + "vpsrlw $0x1,%%ymm0,%%ymm0 \n" + "vpsrlw $0x1,%%ymm1,%%ymm1 \n" + "vpavgw %%ymm5,%%ymm0,%%ymm0 \n" + "vpavgw %%ymm5,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SCALEROWDOWN2_AVX2 -void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrld $0x18,%%xmm5 \n" - "pslld $0x10,%%xmm5 \n" +void ScaleRowDown4_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + (void)src_stride; + asm volatile( + "pcmpeqb %%xmm5,%%xmm5 \n" + "psrld $0x18,%%xmm5 \n" + "pslld $0x10,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "psrlw $0x8,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "pand %%xmm5,%%xmm0 \n" + "pand %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "psrlw $0x8,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm5"); } -void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { intptr_t stridex3; - asm volatile ( - "pcmpeqb %%xmm4,%%xmm4 \n" - "psrlw $0xf,%%xmm4 \n" - "movdqa %%xmm4,%%xmm5 \n" - "packuswb %%xmm4,%%xmm4 \n" - "psllw $0x3,%%xmm5 \n" - "lea " MEMLEA4(0x00,4,4,2) ",%3 \n" + asm volatile( + "pcmpeqb %%xmm4,%%xmm4 \n" + "psrlw $0xf,%%xmm4 \n" + "movdqa %%xmm4,%%xmm5 \n" + "packuswb %%xmm4,%%xmm4 \n" + "psllw $0x3,%%xmm5 \n" + "lea 0x00(%4,%4,2),%3 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm3,%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,4,2,xmm2) // movdqu (%0,%4,2),%%xmm2 - MEMOPREG(movdqu,0x10,0,4,2,xmm3) // movdqu 0x10(%0,%4,2),%%xmm3 - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm3,%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm3,%%xmm1 \n" - "phaddw %%xmm1,%%xmm0 \n" - "paddw %%xmm5,%%xmm0 \n" - "psrlw $0x4,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width), // %2 - "=&r"(stridex3) // %3 - : "r"((intptr_t)(src_stride)) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x00(%0,%4,1),%%xmm2 \n" + "movdqu 0x10(%0,%4,1),%%xmm3 \n" + "pmaddubsw %%xmm4,%%xmm0 \n" + "pmaddubsw %%xmm4,%%xmm1 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "paddw %%xmm2,%%xmm0 \n" + "paddw %%xmm3,%%xmm1 \n" + "movdqu 0x00(%0,%4,2),%%xmm2 \n" + "movdqu 0x10(%0,%4,2),%%xmm3 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "paddw %%xmm2,%%xmm0 \n" + "paddw %%xmm3,%%xmm1 \n" + "movdqu 0x00(%0,%3,1),%%xmm2 \n" + "movdqu 0x10(%0,%3,1),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "pmaddubsw %%xmm4,%%xmm2 \n" + "pmaddubsw %%xmm4,%%xmm3 \n" + "paddw %%xmm2,%%xmm0 \n" + "paddw %%xmm3,%%xmm1 \n" + "phaddw %%xmm1,%%xmm0 \n" + "paddw %%xmm5,%%xmm0 \n" + "psrlw $0x4,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width), // %2 + "=&r"(stridex3) // %3 + : "r"((intptr_t)(src_stride)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } - #ifdef HAS_SCALEROWDOWN4_AVX2 -void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - "vpsrld $0x18,%%ymm5,%%ymm5 \n" - "vpslld $0x10,%%ymm5,%%ymm5 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpand %%ymm5,%%ymm0,%%ymm0 \n" - "vpand %%ymm5,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpsrlw $0x8,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vmovdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm5" - ); +void ScaleRowDown4_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + (void)src_stride; + asm volatile( + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + "vpsrld $0x18,%%ymm5,%%ymm5 \n" + "vpslld $0x10,%%ymm5,%%ymm5 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpand %%ymm5,%%ymm0,%%ymm0 \n" + "vpand %%ymm5,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpsrlw $0x8,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm5"); } -void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" - "vpsrlw $0xf,%%ymm4,%%ymm4 \n" - "vpsllw $0x3,%%ymm4,%%ymm5 \n" - "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" +void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + asm volatile( + "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" + "vpsrlw $0xf,%%ymm4,%%ymm4 \n" + "vpsllw $0x3,%%ymm4,%%ymm5 \n" + "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2 - MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3 - "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" - "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" - "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" - "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" - "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" - MEMOPREG(vmovdqu,0x00,0,3,2,ymm2) // vmovdqu (%0,%3,2),%%ymm2 - MEMOPREG(vmovdqu,0x20,0,3,2,ymm3) // vmovdqu 0x20(%0,%3,2),%%ymm3 - "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" - "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" - "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" - MEMOPREG(vmovdqu,0x00,0,4,1,ymm2) // vmovdqu (%0,%4,1),%%ymm2 - MEMOPREG(vmovdqu,0x20,0,4,1,ymm3) // vmovdqu 0x20(%0,%4,1),%%ymm3 - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" - "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" - "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" - "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" - "vpsrlw $0x4,%%ymm0,%%ymm0 \n" - "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vmovdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)), // %3 - "r"((intptr_t)(src_stride * 3)) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vmovdqu 0x00(%0,%3,1),%%ymm2 \n" + "vmovdqu 0x20(%0,%3,1),%%ymm3 \n" + "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" + "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" + "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" + "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" + "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" + "vmovdqu 0x00(%0,%3,2),%%ymm2 \n" + "vmovdqu 0x20(%0,%3,2),%%ymm3 \n" + "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" + "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" + "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" + "vmovdqu 0x00(%0,%4,1),%%ymm2 \n" + "vmovdqu 0x20(%0,%4,1),%%ymm3 \n" + "lea 0x40(%0),%0 \n" + "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" + "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" + "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" + "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" + "vpsrlw $0x4,%%ymm0,%%ymm0 \n" + "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)), // %3 + "r"((intptr_t)(src_stride * 3)) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_SCALEROWDOWN4_AVX2 -void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "movdqa %0,%%xmm3 \n" - "movdqa %1,%%xmm4 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kShuf0), // %0 - "m"(kShuf1), // %1 - "m"(kShuf2) // %2 - ); - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm2 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "palignr $0x8,%%xmm0,%%xmm1 \n" - "pshufb %%xmm3,%%xmm0 \n" - "pshufb %%xmm4,%%xmm1 \n" - "pshufb %%xmm5,%%xmm2 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "movq %%xmm1," MEMACCESS2(0x8,1) " \n" - "movq %%xmm2," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x18,1) ",%1 \n" - "sub $0x18,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); +void ScaleRowDown34_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + (void)src_stride; + asm volatile( + "movdqa %0,%%xmm3 \n" + "movdqa %1,%%xmm4 \n" + "movdqa %2,%%xmm5 \n" + : + : "m"(kShuf0), // %0 + "m"(kShuf1), // %1 + "m"(kShuf2) // %2 + ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm2 \n" + "lea 0x20(%0),%0 \n" + "movdqa %%xmm2,%%xmm1 \n" + "palignr $0x8,%%xmm0,%%xmm1 \n" + "pshufb %%xmm3,%%xmm0 \n" + "pshufb %%xmm4,%%xmm1 \n" + "pshufb %%xmm5,%%xmm2 \n" + "movq %%xmm0,(%1) \n" + "movq %%xmm1,0x8(%1) \n" + "movq %%xmm2,0x10(%1) \n" + "lea 0x18(%1),%1 \n" + "sub $0x18,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "movdqa %0,%%xmm2 \n" // kShuf01 - "movdqa %1,%%xmm3 \n" // kShuf11 - "movdqa %2,%%xmm4 \n" // kShuf21 - : - : "m"(kShuf01), // %0 - "m"(kShuf11), // %1 - "m"(kShuf21) // %2 - ); - asm volatile ( - "movdqa %0,%%xmm5 \n" // kMadd01 - "movdqa %1,%%xmm0 \n" // kMadd11 - "movdqa %2,%%xmm1 \n" // kRound34 - : - : "m"(kMadd01), // %0 - "m"(kMadd11), // %1 - "m"(kRound34) // %2 - ); - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm7) // movdqu (%0,%3),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm5,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS(1) " \n" - "movdqu " MEMACCESS2(0x8,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm3,%%xmm6 \n" - "pmaddubsw %%xmm0,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS2(0x8,1) " \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3),%%xmm7 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm4,%%xmm6 \n" - "pmaddubsw %4,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x18,1) ",%1 \n" - "sub $0x18,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)), // %3 - "m"(kMadd21) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + uint8_t* dst_ptr, + int dst_width) { + asm volatile( + "movdqa %0,%%xmm2 \n" // kShuf01 + "movdqa %1,%%xmm3 \n" // kShuf11 + "movdqa %2,%%xmm4 \n" // kShuf21 + : + : "m"(kShuf01), // %0 + "m"(kShuf11), // %1 + "m"(kShuf21) // %2 + ); + asm volatile( + "movdqa %0,%%xmm5 \n" // kMadd01 + "movdqa %1,%%xmm0 \n" // kMadd11 + "movdqa %2,%%xmm1 \n" // kRound34 + : + : "m"(kMadd01), // %0 + "m"(kMadd11), // %1 + "m"(kRound34) // %2 + ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm6 \n" + "movdqu 0x00(%0,%3,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + "pshufb %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm5,%%xmm6 \n" + "paddsw %%xmm1,%%xmm6 \n" + "psrlw $0x2,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "movq %%xmm6,(%1) \n" + "movdqu 0x8(%0),%%xmm6 \n" + "movdqu 0x8(%0,%3,1),%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + "pshufb %%xmm3,%%xmm6 \n" + "pmaddubsw %%xmm0,%%xmm6 \n" + "paddsw %%xmm1,%%xmm6 \n" + "psrlw $0x2,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "movq %%xmm6,0x8(%1) \n" + "movdqu 0x10(%0),%%xmm6 \n" + "movdqu 0x10(%0,%3,1),%%xmm7 \n" + "lea 0x20(%0),%0 \n" + "pavgb %%xmm7,%%xmm6 \n" + "pshufb %%xmm4,%%xmm6 \n" + "pmaddubsw %4,%%xmm6 \n" + "paddsw %%xmm1,%%xmm6 \n" + "psrlw $0x2,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "movq %%xmm6,0x10(%1) \n" + "lea 0x18(%1),%1 \n" + "sub $0x18,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)), // %3 + "m"(kMadd21) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } -void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "movdqa %0,%%xmm2 \n" // kShuf01 - "movdqa %1,%%xmm3 \n" // kShuf11 - "movdqa %2,%%xmm4 \n" // kShuf21 - : - : "m"(kShuf01), // %0 - "m"(kShuf11), // %1 - "m"(kShuf21) // %2 - ); - asm volatile ( - "movdqa %0,%%xmm5 \n" // kMadd01 - "movdqa %1,%%xmm0 \n" // kMadd11 - "movdqa %2,%%xmm1 \n" // kRound34 - : - : "m"(kMadd01), // %0 - "m"(kMadd11), // %1 - "m"(kRound34) // %2 - ); + uint8_t* dst_ptr, + int dst_width) { + asm volatile( + "movdqa %0,%%xmm2 \n" // kShuf01 + "movdqa %1,%%xmm3 \n" // kShuf11 + "movdqa %2,%%xmm4 \n" // kShuf21 + : + : "m"(kShuf01), // %0 + "m"(kShuf11), // %1 + "m"(kShuf21) // %2 + ); + asm volatile( + "movdqa %0,%%xmm5 \n" // kMadd01 + "movdqa %1,%%xmm0 \n" // kMadd11 + "movdqa %2,%%xmm1 \n" // kRound34 + : + : "m"(kMadd01), // %0 + "m"(kMadd11), // %1 + "m"(kRound34) // %2 + ); - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm7) // movdqu (%0,%3,1),%%xmm7 - "pavgb %%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm5,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS(1) " \n" - "movdqu " MEMACCESS2(0x8,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3,1),%%xmm7 - "pavgb %%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm3,%%xmm6 \n" - "pmaddubsw %%xmm0,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS2(0x8,1) " \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x10,0,3,1,xmm7) // movdqu 0x10(%0,%3,1),%%xmm7 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm4,%%xmm6 \n" - "pmaddubsw %4,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x18,1) ",%1 \n" - "sub $0x18,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)), // %3 - "m"(kMadd21) // %4 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm6 \n" + "movdqu 0x00(%0,%3,1),%%xmm7 \n" + "pavgb %%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + "pshufb %%xmm2,%%xmm6 \n" + "pmaddubsw %%xmm5,%%xmm6 \n" + "paddsw %%xmm1,%%xmm6 \n" + "psrlw $0x2,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "movq %%xmm6,(%1) \n" + "movdqu 0x8(%0),%%xmm6 \n" + "movdqu 0x8(%0,%3,1),%%xmm7 \n" + "pavgb %%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + "pshufb %%xmm3,%%xmm6 \n" + "pmaddubsw %%xmm0,%%xmm6 \n" + "paddsw %%xmm1,%%xmm6 \n" + "psrlw $0x2,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "movq %%xmm6,0x8(%1) \n" + "movdqu 0x10(%0),%%xmm6 \n" + "movdqu 0x10(%0,%3,1),%%xmm7 \n" + "lea 0x20(%0),%0 \n" + "pavgb %%xmm6,%%xmm7 \n" + "pavgb %%xmm7,%%xmm6 \n" + "pshufb %%xmm4,%%xmm6 \n" + "pmaddubsw %4,%%xmm6 \n" + "paddsw %%xmm1,%%xmm6 \n" + "psrlw $0x2,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "movq %%xmm6,0x10(%1) \n" + "lea 0x18(%1),%1 \n" + "sub $0x18,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)), // %3 + "m"(kMadd21) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } -void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" +void ScaleRowDown38_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + (void)src_stride; + asm volatile( + "movdqa %3,%%xmm4 \n" + "movdqa %4,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pshufb %%xmm4,%%xmm0 \n" - "pshufb %%xmm5,%%xmm1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "movhlps %%xmm0,%%xmm1 \n" - "movd %%xmm1," MEMACCESS2(0x8,1) " \n" - "lea " MEMLEA(0xc,1) ",%1 \n" - "sub $0xc,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "m"(kShuf38a), // %3 - "m"(kShuf38b) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5" - ); + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "pshufb %%xmm4,%%xmm0 \n" + "pshufb %%xmm5,%%xmm1 \n" + "paddusb %%xmm1,%%xmm0 \n" + "movq %%xmm0,(%1) \n" + "movhlps %%xmm0,%%xmm1 \n" + "movd %%xmm1,0x8(%1) \n" + "lea 0xc(%1),%1 \n" + "sub $0xc,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "m"(kShuf38a), // %3 + "m"(kShuf38b) // %4 + : "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"); } -void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "movdqa %0,%%xmm2 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm4 \n" - "movdqa %3,%%xmm5 \n" - : - : "m"(kShufAb0), // %0 - "m"(kShufAb1), // %1 - "m"(kShufAb2), // %2 - "m"(kScaleAb2) // %3 - ); - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm1) // movdqu (%0,%3,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "pavgb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pshufb %%xmm2,%%xmm1 \n" - "movdqa %%xmm0,%%xmm6 \n" - "pshufb %%xmm3,%%xmm6 \n" - "paddusw %%xmm6,%%xmm1 \n" - "pshufb %%xmm4,%%xmm0 \n" - "paddusw %%xmm0,%%xmm1 \n" - "pmulhuw %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movd %%xmm1," MEMACCESS(1) " \n" - "psrlq $0x10,%%xmm1 \n" - "movd %%xmm1," MEMACCESS2(0x2,1) " \n" - "lea " MEMLEA(0x6,1) ",%1 \n" - "sub $0x6,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); + uint8_t* dst_ptr, + int dst_width) { + asm volatile( + "movdqa %0,%%xmm2 \n" + "movdqa %1,%%xmm3 \n" + "movdqa %2,%%xmm4 \n" + "movdqa %3,%%xmm5 \n" + : + : "m"(kShufAb0), // %0 + "m"(kShufAb1), // %1 + "m"(kShufAb2), // %2 + "m"(kScaleAb2) // %3 + ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%3,1),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "pavgb %%xmm1,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "pshufb %%xmm2,%%xmm1 \n" + "movdqa %%xmm0,%%xmm6 \n" + "pshufb %%xmm3,%%xmm6 \n" + "paddusw %%xmm6,%%xmm1 \n" + "pshufb %%xmm4,%%xmm0 \n" + "paddusw %%xmm0,%%xmm1 \n" + "pmulhuw %%xmm5,%%xmm1 \n" + "packuswb %%xmm1,%%xmm1 \n" + "movd %%xmm1,(%1) \n" + "psrlq $0x10,%%xmm1 \n" + "movd %%xmm1,0x2(%1) \n" + "lea 0x6(%1),%1 \n" + "sub $0x6,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } -void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "movdqa %0,%%xmm2 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm4 \n" - "pxor %%xmm5,%%xmm5 \n" - : - : "m"(kShufAc), // %0 - "m"(kShufAc3), // %1 - "m"(kScaleAc33) // %2 - ); - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm6) // movdqu (%0,%3,1),%%xmm6 - "movhlps %%xmm0,%%xmm1 \n" - "movhlps %%xmm6,%%xmm7 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm6 \n" - "punpcklbw %%xmm5,%%xmm7 \n" - "paddusw %%xmm6,%%xmm0 \n" - "paddusw %%xmm7,%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,3,2,xmm6) // movdqu (%0,%3,2),%%xmm6 - "lea " MEMLEA(0x10,0) ",%0 \n" - "movhlps %%xmm6,%%xmm7 \n" - "punpcklbw %%xmm5,%%xmm6 \n" - "punpcklbw %%xmm5,%%xmm7 \n" - "paddusw %%xmm6,%%xmm0 \n" - "paddusw %%xmm7,%%xmm1 \n" - "movdqa %%xmm0,%%xmm6 \n" - "psrldq $0x2,%%xmm0 \n" - "paddusw %%xmm0,%%xmm6 \n" - "psrldq $0x2,%%xmm0 \n" - "paddusw %%xmm0,%%xmm6 \n" - "pshufb %%xmm2,%%xmm6 \n" - "movdqa %%xmm1,%%xmm7 \n" - "psrldq $0x2,%%xmm1 \n" - "paddusw %%xmm1,%%xmm7 \n" - "psrldq $0x2,%%xmm1 \n" - "paddusw %%xmm1,%%xmm7 \n" - "pshufb %%xmm3,%%xmm7 \n" - "paddusw %%xmm7,%%xmm6 \n" - "pmulhuw %%xmm4,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movd %%xmm6," MEMACCESS(1) " \n" - "psrlq $0x10,%%xmm6 \n" - "movd %%xmm6," MEMACCESS2(0x2,1) " \n" - "lea " MEMLEA(0x6,1) ",%1 \n" - "sub $0x6,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + uint8_t* dst_ptr, + int dst_width) { + asm volatile( + "movdqa %0,%%xmm2 \n" + "movdqa %1,%%xmm3 \n" + "movdqa %2,%%xmm4 \n" + "pxor %%xmm5,%%xmm5 \n" + : + : "m"(kShufAc), // %0 + "m"(kShufAc3), // %1 + "m"(kScaleAc33) // %2 + ); + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%3,1),%%xmm6 \n" + "movhlps %%xmm0,%%xmm1 \n" + "movhlps %%xmm6,%%xmm7 \n" + "punpcklbw %%xmm5,%%xmm0 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm6 \n" + "punpcklbw %%xmm5,%%xmm7 \n" + "paddusw %%xmm6,%%xmm0 \n" + "paddusw %%xmm7,%%xmm1 \n" + "movdqu 0x00(%0,%3,2),%%xmm6 \n" + "lea 0x10(%0),%0 \n" + "movhlps %%xmm6,%%xmm7 \n" + "punpcklbw %%xmm5,%%xmm6 \n" + "punpcklbw %%xmm5,%%xmm7 \n" + "paddusw %%xmm6,%%xmm0 \n" + "paddusw %%xmm7,%%xmm1 \n" + "movdqa %%xmm0,%%xmm6 \n" + "psrldq $0x2,%%xmm0 \n" + "paddusw %%xmm0,%%xmm6 \n" + "psrldq $0x2,%%xmm0 \n" + "paddusw %%xmm0,%%xmm6 \n" + "pshufb %%xmm2,%%xmm6 \n" + "movdqa %%xmm1,%%xmm7 \n" + "psrldq $0x2,%%xmm1 \n" + "paddusw %%xmm1,%%xmm7 \n" + "psrldq $0x2,%%xmm1 \n" + "paddusw %%xmm1,%%xmm7 \n" + "pshufb %%xmm3,%%xmm7 \n" + "paddusw %%xmm7,%%xmm6 \n" + "pmulhuw %%xmm4,%%xmm6 \n" + "packuswb %%xmm6,%%xmm6 \n" + "movd %%xmm6,(%1) \n" + "psrlq $0x10,%%xmm6 \n" + "movd %%xmm6,0x2(%1) \n" + "lea 0x6(%1),%1 \n" + "sub $0x6,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } // Reads 16xN bytes and produces 16 shorts at a time. -void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { - asm volatile ( - "pxor %%xmm5,%%xmm5 \n" +void ScaleAddRow_SSE2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width) { + asm volatile( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm3 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16 - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,1) ",%%xmm1 \n" - "movdqa %%xmm3,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "punpckhbw %%xmm5,%%xmm3 \n" - "paddusw %%xmm2,%%xmm0 \n" - "paddusw %%xmm3,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(src_width) // %2 - : - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + "pxor %%xmm5,%%xmm5 \n" + + // 16 pixel loop. + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm3 \n" + "lea 0x10(%0),%0 \n" // src_ptr += 16 + "movdqu (%1),%%xmm0 \n" + "movdqu 0x10(%1),%%xmm1 \n" + "movdqa %%xmm3,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "punpckhbw %%xmm5,%%xmm3 \n" + "paddusw %%xmm2,%%xmm0 \n" + "paddusw %%xmm3,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(src_width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } - #ifdef HAS_SCALEADDROW_AVX2 // Reads 32 bytes and accumulates to 32 shorts at a time. -void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { - asm volatile ( - "vpxor %%ymm5,%%ymm5,%%ymm5 \n" +void ScaleAddRow_AVX2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width) { + asm volatile( - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm3 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" // src_ptr += 32 - "vpermq $0xd8,%%ymm3,%%ymm3 \n" - "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" - "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" - "vpaddusw " MEMACCESS(1) ",%%ymm2,%%ymm0 \n" - "vpaddusw " MEMACCESS2(0x20,1) ",%%ymm3,%%ymm1 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(src_width) // %2 - : - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" - ); + "vpxor %%ymm5,%%ymm5,%%ymm5 \n" + + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm3 \n" + "lea 0x20(%0),%0 \n" // src_ptr += 32 + "vpermq $0xd8,%%ymm3,%%ymm3 \n" + "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" + "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" + "vpaddusw (%1),%%ymm2,%%ymm0 \n" + "vpaddusw 0x20(%1),%%ymm3,%%ymm1 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(src_width) // %2 + : + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SCALEADDROW_AVX2 // Constant for making pixels signed to avoid pmaddubsw // saturation. -static uvec8 kFsub80 = - { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +static const uvec8 kFsub80 = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}; // Constant for making pixels unsigned and adding .5 for rounding. -static uvec16 kFadd40 = - { 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040 }; +static const uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040, + 0x4040, 0x4040, 0x4040, 0x4040}; // Bilinear column filtering. SSSE3 version. -void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { +void ScaleFilterCols_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx) { intptr_t x0, x1, temp_pixel; - asm volatile ( - "movd %6,%%xmm2 \n" - "movd %7,%%xmm3 \n" - "movl $0x04040000,%k2 \n" - "movd %k2,%%xmm5 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "psrlw $0x9,%%xmm6 \n" // 0x007f007f - "pcmpeqb %%xmm7,%%xmm7 \n" - "psrlw $15,%%xmm7 \n" // 0x00010001 + asm volatile( + "movd %6,%%xmm2 \n" + "movd %7,%%xmm3 \n" + "movl $0x04040000,%k2 \n" + "movd %k2,%%xmm5 \n" + "pcmpeqb %%xmm6,%%xmm6 \n" + "psrlw $0x9,%%xmm6 \n" // 0x007f007f + "pcmpeqb %%xmm7,%%xmm7 \n" + "psrlw $15,%%xmm7 \n" // 0x00010001 - "pextrw $0x1,%%xmm2,%k3 \n" - "subl $0x2,%5 \n" - "jl 29f \n" - "movdqa %%xmm2,%%xmm0 \n" - "paddd %%xmm3,%%xmm0 \n" - "punpckldq %%xmm0,%%xmm2 \n" - "punpckldq %%xmm3,%%xmm3 \n" - "paddd %%xmm3,%%xmm3 \n" - "pextrw $0x3,%%xmm2,%k4 \n" + "pextrw $0x1,%%xmm2,%k3 \n" + "subl $0x2,%5 \n" + "jl 29f \n" + "movdqa %%xmm2,%%xmm0 \n" + "paddd %%xmm3,%%xmm0 \n" + "punpckldq %%xmm0,%%xmm2 \n" + "punpckldq %%xmm3,%%xmm3 \n" + "paddd %%xmm3,%%xmm3 \n" + "pextrw $0x3,%%xmm2,%k4 \n" - LABELALIGN - "2: \n" - "movdqa %%xmm2,%%xmm1 \n" - "paddd %%xmm3,%%xmm2 \n" - MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2 - "movd %k2,%%xmm0 \n" - "psrlw $0x9,%%xmm1 \n" - MEMOPARG(movzwl,0x00,1,4,1,k2) // movzwl (%1,%4,1),%k2 - "movd %k2,%%xmm4 \n" - "pshufb %%xmm5,%%xmm1 \n" - "punpcklwd %%xmm4,%%xmm0 \n" - "psubb %8,%%xmm0 \n" // make pixels signed. - "pxor %%xmm6,%%xmm1 \n" // 128 -f = (f ^ 127 ) + 1 - "paddusb %%xmm7,%%xmm1 \n" - "pmaddubsw %%xmm0,%%xmm1 \n" - "pextrw $0x1,%%xmm2,%k3 \n" - "pextrw $0x3,%%xmm2,%k4 \n" - "paddw %9,%%xmm1 \n" // make pixels unsigned. - "psrlw $0x7,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movd %%xmm1,%k2 \n" - "mov %w2," MEMACCESS(0) " \n" - "lea " MEMLEA(0x2,0) ",%0 \n" - "subl $0x2,%5 \n" - "jge 2b \n" + LABELALIGN + "2: \n" + "movdqa %%xmm2,%%xmm1 \n" + "paddd %%xmm3,%%xmm2 \n" + "movzwl 0x00(%1,%3,1),%k2 \n" + "movd %k2,%%xmm0 \n" + "psrlw $0x9,%%xmm1 \n" + "movzwl 0x00(%1,%4,1),%k2 \n" + "movd %k2,%%xmm4 \n" + "pshufb %%xmm5,%%xmm1 \n" + "punpcklwd %%xmm4,%%xmm0 \n" + "psubb %8,%%xmm0 \n" // make pixels signed. + "pxor %%xmm6,%%xmm1 \n" // 128 - f = (f ^ 127 ) + + // 1 + "paddusb %%xmm7,%%xmm1 \n" + "pmaddubsw %%xmm0,%%xmm1 \n" + "pextrw $0x1,%%xmm2,%k3 \n" + "pextrw $0x3,%%xmm2,%k4 \n" + "paddw %9,%%xmm1 \n" // make pixels unsigned. + "psrlw $0x7,%%xmm1 \n" + "packuswb %%xmm1,%%xmm1 \n" + "movd %%xmm1,%k2 \n" + "mov %w2,(%0) \n" + "lea 0x2(%0),%0 \n" + "subl $0x2,%5 \n" + "jge 2b \n" - LABELALIGN - "29: \n" - "addl $0x1,%5 \n" - "jl 99f \n" - MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2 - "movd %k2,%%xmm0 \n" - "psrlw $0x9,%%xmm2 \n" - "pshufb %%xmm5,%%xmm2 \n" - "psubb %8,%%xmm0 \n" // make pixels signed. - "pxor %%xmm6,%%xmm2 \n" - "paddusb %%xmm7,%%xmm2 \n" - "pmaddubsw %%xmm0,%%xmm2 \n" - "paddw %9,%%xmm2 \n" // make pixels unsigned. - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm2 \n" - "movd %%xmm2,%k2 \n" - "mov %b2," MEMACCESS(0) " \n" - "99: \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "=&a"(temp_pixel), // %2 - "=&r"(x0), // %3 - "=&r"(x1), // %4 + LABELALIGN + "29: \n" + "addl $0x1,%5 \n" + "jl 99f \n" + "movzwl 0x00(%1,%3,1),%k2 \n" + "movd %k2,%%xmm0 \n" + "psrlw $0x9,%%xmm2 \n" + "pshufb %%xmm5,%%xmm2 \n" + "psubb %8,%%xmm0 \n" // make pixels signed. + "pxor %%xmm6,%%xmm2 \n" + "paddusb %%xmm7,%%xmm2 \n" + "pmaddubsw %%xmm0,%%xmm2 \n" + "paddw %9,%%xmm2 \n" // make pixels unsigned. + "psrlw $0x7,%%xmm2 \n" + "packuswb %%xmm2,%%xmm2 \n" + "movd %%xmm2,%k2 \n" + "mov %b2,(%0) \n" + "99: \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "=&a"(temp_pixel), // %2 + "=&r"(x0), // %3 + "=&r"(x1), // %4 #if defined(__x86_64__) - "+rm"(dst_width) // %5 + "+rm"(dst_width) // %5 #else - "+m"(dst_width) // %5 + "+m"(dst_width) // %5 #endif - : "rm"(x), // %6 - "rm"(dx), // %7 + : "rm"(x), // %6 + "rm"(dx), // %7 #if defined(__x86_64__) - "x"(kFsub80), // %8 - "x"(kFadd40) // %9 + "x"(kFsub80), // %8 + "x"(kFadd40) // %9 #else - "m"(kFsub80), // %8 - "m"(kFadd40) // %9 + "m"(kFsub80), // %8 + "m"(kFadd40) // %9 #endif - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - ); + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); } // Reads 4 pixels, duplicates them and writes 8 pixels. // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(0) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "sub $0x20,%2 \n" - "jg 1b \n" +void ScaleColsUp2_SSE2(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx) { + (void)x; + (void)dx; + asm volatile( - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1" - ); + LABELALIGN + "1: \n" + "movdqu (%1),%%xmm0 \n" + "lea 0x10(%1),%1 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm1 \n" + "movdqu %%xmm0,(%0) \n" + "movdqu %%xmm1,0x10(%0) \n" + "lea 0x20(%0),%0 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1"); } -void ScaleARGBRowDown2_SSE2(const uint8* src_argb, +void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "shufps $0xdd,%%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1" - ); + uint8_t* dst_argb, + int dst_width) { + (void)src_stride; + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "shufps $0xdd,%%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1"); } -void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, +void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm2 \n" - "pavgb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", "xmm0", "xmm1" - ); + uint8_t* dst_argb, + int dst_width) { + (void)src_stride; + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm2 \n" + "pavgb %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1"); } -void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, +void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm2 \n" - "pavgb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3" - ); + uint8_t* dst_argb, + int dst_width) { + asm volatile( + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x00(%0,%3,1),%%xmm2 \n" + "movdqu 0x10(%0,%3,1),%%xmm3 \n" + "lea 0x20(%0),%0 \n" + "pavgb %%xmm2,%%xmm0 \n" + "pavgb %%xmm3,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm2 \n" + "pavgb %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(dst_width) // %2 + : "r"((intptr_t)(src_stride)) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3"); } // Reads 4 pixels at a time. // Alignment requirement: dst_argb 16 byte aligned. -void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, uint8* dst_argb, int dst_width) { +void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width) { intptr_t src_stepx_x4 = (intptr_t)(src_stepx); intptr_t src_stepx_x12; - asm volatile ( - "lea " MEMLEA3(0x00,1,4) ",%1 \n" - "lea " MEMLEA4(0x00,1,1,2) ",%4 \n" - LABELALIGN - "1: \n" - "movd " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 - "punpckldq %%xmm1,%%xmm0 \n" - MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2 - MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3 - "lea " MEMLEA4(0x00,0,1,4) ",%0 \n" - "punpckldq %%xmm3,%%xmm2 \n" - "punpcklqdq %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_stepx_x4), // %1 - "+r"(dst_argb), // %2 - "+r"(dst_width), // %3 - "=&r"(src_stepx_x12) // %4 - :: "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3" - ); + (void)src_stride; + asm volatile( + "lea 0x00(,%1,4),%1 \n" + "lea 0x00(%1,%1,2),%4 \n" + + LABELALIGN + "1: \n" + "movd (%0),%%xmm0 \n" + "movd 0x00(%0,%1,1),%%xmm1 \n" + "punpckldq %%xmm1,%%xmm0 \n" + "movd 0x00(%0,%1,2),%%xmm2 \n" + "movd 0x00(%0,%4,1),%%xmm3 \n" + "lea 0x00(%0,%1,4),%0 \n" + "punpckldq %%xmm3,%%xmm2 \n" + "punpcklqdq %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(src_stepx_x4), // %1 + "+r"(dst_argb), // %2 + "+r"(dst_width), // %3 + "=&r"(src_stepx_x12) // %4 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3"); } // Blends four 2x2 to 4x1. // Alignment requirement: dst_argb 16 byte aligned. -void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width) { +void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width) { intptr_t src_stepx_x4 = (intptr_t)(src_stepx); intptr_t src_stepx_x12; intptr_t row1 = (intptr_t)(src_stride); - asm volatile ( - "lea " MEMLEA3(0x00,1,4) ",%1 \n" - "lea " MEMLEA4(0x00,1,1,2) ",%4 \n" - "lea " MEMLEA4(0x00,0,5,1) ",%5 \n" + asm volatile( + "lea 0x00(,%1,4),%1 \n" + "lea 0x00(%1,%1,2),%4 \n" + "lea 0x00(%0,%5,1),%5 \n" - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0 - MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1 - MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1 - "lea " MEMLEA4(0x00,0,1,4) ",%0 \n" - "movq " MEMACCESS(5) ",%%xmm2 \n" - MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2 - MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3 - MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3 - "lea " MEMLEA4(0x00,5,1,4) ",%5 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm2 \n" - "pavgb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_stepx_x4), // %1 - "+r"(dst_argb), // %2 - "+rm"(dst_width), // %3 - "=&r"(src_stepx_x12), // %4 - "+r"(row1) // %5 - :: "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3" - ); + LABELALIGN + "1: \n" + "movq (%0),%%xmm0 \n" + "movhps 0x00(%0,%1,1),%%xmm0 \n" + "movq 0x00(%0,%1,2),%%xmm1 \n" + "movhps 0x00(%0,%4,1),%%xmm1 \n" + "lea 0x00(%0,%1,4),%0 \n" + "movq (%5),%%xmm2 \n" + "movhps 0x00(%5,%1,1),%%xmm2 \n" + "movq 0x00(%5,%1,2),%%xmm3 \n" + "movhps 0x00(%5,%4,1),%%xmm3 \n" + "lea 0x00(%5,%1,4),%5 \n" + "pavgb %%xmm2,%%xmm0 \n" + "pavgb %%xmm3,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "shufps $0x88,%%xmm1,%%xmm0 \n" + "shufps $0xdd,%%xmm1,%%xmm2 \n" + "pavgb %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jg 1b \n" + : "+r"(src_argb), // %0 + "+r"(src_stepx_x4), // %1 + "+r"(dst_argb), // %2 + "+rm"(dst_width), // %3 + "=&r"(src_stepx_x12), // %4 + "+r"(row1) // %5 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3"); } -void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { +void ScaleARGBCols_SSE2(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { intptr_t x0, x1; - asm volatile ( - "movd %5,%%xmm2 \n" - "movd %6,%%xmm3 \n" - "pshufd $0x0,%%xmm2,%%xmm2 \n" - "pshufd $0x11,%%xmm3,%%xmm0 \n" - "paddd %%xmm0,%%xmm2 \n" - "paddd %%xmm3,%%xmm3 \n" - "pshufd $0x5,%%xmm3,%%xmm0 \n" - "paddd %%xmm0,%%xmm2 \n" - "paddd %%xmm3,%%xmm3 \n" - "pshufd $0x0,%%xmm3,%%xmm3 \n" - "pextrw $0x1,%%xmm2,%k0 \n" - "pextrw $0x3,%%xmm2,%k1 \n" - "cmp $0x0,%4 \n" - "jl 99f \n" - "sub $0x4,%4 \n" - "jl 49f \n" + asm volatile( + "movd %5,%%xmm2 \n" + "movd %6,%%xmm3 \n" + "pshufd $0x0,%%xmm2,%%xmm2 \n" + "pshufd $0x11,%%xmm3,%%xmm0 \n" + "paddd %%xmm0,%%xmm2 \n" + "paddd %%xmm3,%%xmm3 \n" + "pshufd $0x5,%%xmm3,%%xmm0 \n" + "paddd %%xmm0,%%xmm2 \n" + "paddd %%xmm3,%%xmm3 \n" + "pshufd $0x0,%%xmm3,%%xmm3 \n" + "pextrw $0x1,%%xmm2,%k0 \n" + "pextrw $0x3,%%xmm2,%k1 \n" + "cmp $0x0,%4 \n" + "jl 99f \n" + "sub $0x4,%4 \n" + "jl 49f \n" - LABELALIGN - "40: \n" - MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 - MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1 - "pextrw $0x5,%%xmm2,%k0 \n" - "pextrw $0x7,%%xmm2,%k1 \n" - "paddd %%xmm3,%%xmm2 \n" - "punpckldq %%xmm1,%%xmm0 \n" - MEMOPREG(movd,0x00,3,0,4,xmm1) // movd (%3,%0,4),%%xmm1 - MEMOPREG(movd,0x00,3,1,4,xmm4) // movd (%3,%1,4),%%xmm4 - "pextrw $0x1,%%xmm2,%k0 \n" - "pextrw $0x3,%%xmm2,%k1 \n" - "punpckldq %%xmm4,%%xmm1 \n" - "punpcklqdq %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%4 \n" - "jge 40b \n" + LABELALIGN + "40: \n" + "movd 0x00(%3,%0,4),%%xmm0 \n" + "movd 0x00(%3,%1,4),%%xmm1 \n" + "pextrw $0x5,%%xmm2,%k0 \n" + "pextrw $0x7,%%xmm2,%k1 \n" + "paddd %%xmm3,%%xmm2 \n" + "punpckldq %%xmm1,%%xmm0 \n" + "movd 0x00(%3,%0,4),%%xmm1 \n" + "movd 0x00(%3,%1,4),%%xmm4 \n" + "pextrw $0x1,%%xmm2,%k0 \n" + "pextrw $0x3,%%xmm2,%k1 \n" + "punpckldq %%xmm4,%%xmm1 \n" + "punpcklqdq %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%4 \n" + "jge 40b \n" - "49: \n" - "test $0x2,%4 \n" - "je 29f \n" - MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 - MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1 - "pextrw $0x5,%%xmm2,%k0 \n" - "punpckldq %%xmm1,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x8,2) ",%2 \n" - "29: \n" - "test $0x1,%4 \n" - "je 99f \n" - MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 - "movd %%xmm0," MEMACCESS(2) " \n" - "99: \n" - : "=&a"(x0), // %0 - "=&d"(x1), // %1 - "+r"(dst_argb), // %2 - "+r"(src_argb), // %3 - "+r"(dst_width) // %4 - : "rm"(x), // %5 - "rm"(dx) // %6 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" - ); + "49: \n" + "test $0x2,%4 \n" + "je 29f \n" + "movd 0x00(%3,%0,4),%%xmm0 \n" + "movd 0x00(%3,%1,4),%%xmm1 \n" + "pextrw $0x5,%%xmm2,%k0 \n" + "punpckldq %%xmm1,%%xmm0 \n" + "movq %%xmm0,(%2) \n" + "lea 0x8(%2),%2 \n" + "29: \n" + "test $0x1,%4 \n" + "je 99f \n" + "movd 0x00(%3,%0,4),%%xmm0 \n" + "movd %%xmm0,(%2) \n" + "99: \n" + : "=&a"(x0), // %0 + "=&d"(x1), // %1 + "+r"(dst_argb), // %2 + "+r"(src_argb), // %3 + "+r"(dst_width) // %4 + : "rm"(x), // %5 + "rm"(dx) // %6 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); } // Reads 4 pixels, duplicates them and writes 8 pixels. // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpckldq %%xmm0,%%xmm0 \n" - "punpckhdq %%xmm1,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(0) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,0) " \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" +void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { + (void)x; + (void)dx; + asm volatile( - : "+r"(dst_argb), // %0 - "+r"(src_argb), // %1 - "+r"(dst_width) // %2 - :: "memory", "cc", NACL_R14 - "xmm0", "xmm1" - ); + LABELALIGN + "1: \n" + "movdqu (%1),%%xmm0 \n" + "lea 0x10(%1),%1 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpckldq %%xmm0,%%xmm0 \n" + "punpckhdq %%xmm1,%%xmm1 \n" + "movdqu %%xmm0,(%0) \n" + "movdqu %%xmm1,0x10(%0) \n" + "lea 0x20(%0),%0 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + + : "+r"(dst_argb), // %0 + "+r"(src_argb), // %1 + "+r"(dst_width) // %2 + ::"memory", + "cc", "xmm0", "xmm1"); } // Shuffle table for arranging 2 pixels into pairs for pmaddubsw -static uvec8 kShuffleColARGB = { - 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel - 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel +static const uvec8 kShuffleColARGB = { + 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel + 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel }; // Shuffle table for duplicating 2 fractions into 8 bytes each -static uvec8 kShuffleFractions = { - 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, +static const uvec8 kShuffleFractions = { + 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, }; // Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version -void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { +void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { intptr_t x0, x1; - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm5 \n" - : - : "m"(kShuffleColARGB), // %0 - "m"(kShuffleFractions) // %1 - ); + asm volatile( + "movdqa %0,%%xmm4 \n" + "movdqa %1,%%xmm5 \n" + : + : "m"(kShuffleColARGB), // %0 + "m"(kShuffleFractions) // %1 + ); - asm volatile ( - "movd %5,%%xmm2 \n" - "movd %6,%%xmm3 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "psrlw $0x9,%%xmm6 \n" - "pextrw $0x1,%%xmm2,%k3 \n" - "sub $0x2,%2 \n" - "jl 29f \n" - "movdqa %%xmm2,%%xmm0 \n" - "paddd %%xmm3,%%xmm0 \n" - "punpckldq %%xmm0,%%xmm2 \n" - "punpckldq %%xmm3,%%xmm3 \n" - "paddd %%xmm3,%%xmm3 \n" - "pextrw $0x3,%%xmm2,%k4 \n" + asm volatile( + "movd %5,%%xmm2 \n" + "movd %6,%%xmm3 \n" + "pcmpeqb %%xmm6,%%xmm6 \n" + "psrlw $0x9,%%xmm6 \n" + "pextrw $0x1,%%xmm2,%k3 \n" + "sub $0x2,%2 \n" + "jl 29f \n" + "movdqa %%xmm2,%%xmm0 \n" + "paddd %%xmm3,%%xmm0 \n" + "punpckldq %%xmm0,%%xmm2 \n" + "punpckldq %%xmm3,%%xmm3 \n" + "paddd %%xmm3,%%xmm3 \n" + "pextrw $0x3,%%xmm2,%k4 \n" - LABELALIGN - "2: \n" - "movdqa %%xmm2,%%xmm1 \n" - "paddd %%xmm3,%%xmm2 \n" - MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0 - "psrlw $0x9,%%xmm1 \n" - MEMOPREG(movhps,0x00,1,4,4,xmm0) // movhps (%1,%4,4),%%xmm0 - "pshufb %%xmm5,%%xmm1 \n" - "pshufb %%xmm4,%%xmm0 \n" - "pxor %%xmm6,%%xmm1 \n" - "pmaddubsw %%xmm1,%%xmm0 \n" - "psrlw $0x7,%%xmm0 \n" - "pextrw $0x1,%%xmm2,%k3 \n" - "pextrw $0x3,%%xmm2,%k4 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(0) " \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "sub $0x2,%2 \n" - "jge 2b \n" + LABELALIGN + "2: \n" + "movdqa %%xmm2,%%xmm1 \n" + "paddd %%xmm3,%%xmm2 \n" + "movq 0x00(%1,%3,4),%%xmm0 \n" + "psrlw $0x9,%%xmm1 \n" + "movhps 0x00(%1,%4,4),%%xmm0 \n" + "pshufb %%xmm5,%%xmm1 \n" + "pshufb %%xmm4,%%xmm0 \n" + "pxor %%xmm6,%%xmm1 \n" + "pmaddubsw %%xmm1,%%xmm0 \n" + "psrlw $0x7,%%xmm0 \n" + "pextrw $0x1,%%xmm2,%k3 \n" + "pextrw $0x3,%%xmm2,%k4 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,(%0) \n" + "lea 0x8(%0),%0 \n" + "sub $0x2,%2 \n" + "jge 2b \n" - LABELALIGN - "29: \n" - "add $0x1,%2 \n" - "jl 99f \n" - "psrlw $0x9,%%xmm2 \n" - MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0 - "pshufb %%xmm5,%%xmm2 \n" - "pshufb %%xmm4,%%xmm0 \n" - "pxor %%xmm6,%%xmm2 \n" - "pmaddubsw %%xmm2,%%xmm0 \n" - "psrlw $0x7,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movd %%xmm0," MEMACCESS(0) " \n" + LABELALIGN + "29: \n" + "add $0x1,%2 \n" + "jl 99f \n" + "psrlw $0x9,%%xmm2 \n" + "movq 0x00(%1,%3,4),%%xmm0 \n" + "pshufb %%xmm5,%%xmm2 \n" + "pshufb %%xmm4,%%xmm0 \n" + "pxor %%xmm6,%%xmm2 \n" + "pmaddubsw %%xmm2,%%xmm0 \n" + "psrlw $0x7,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movd %%xmm0,(%0) \n" - LABELALIGN - "99: \n" - : "+r"(dst_argb), // %0 - "+r"(src_argb), // %1 - "+rm"(dst_width), // %2 - "=&r"(x0), // %3 - "=&r"(x1) // %4 - : "rm"(x), // %5 - "rm"(dx) // %6 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" - ); + LABELALIGN "99: \n" // clang-format error. + + : "+r"(dst_argb), // %0 + "+r"(src_argb), // %1 + "+rm"(dst_width), // %2 + "=&r"(x0), // %3 + "=&r"(x1) // %4 + : "rm"(x), // %5 + "rm"(dx) // %6 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } // Divide num by div and return as 16.16 fixed point result. int FixedDiv_X86(int num, int div) { - asm volatile ( - "cdq \n" - "shld $0x10,%%eax,%%edx \n" - "shl $0x10,%%eax \n" - "idiv %1 \n" - "mov %0, %%eax \n" - : "+a"(num) // %0 - : "c"(div) // %1 - : "memory", "cc", "edx" - ); + asm volatile( + "cdq \n" + "shld $0x10,%%eax,%%edx \n" + "shl $0x10,%%eax \n" + "idiv %1 \n" + "mov %0, %%eax \n" + : "+a"(num) // %0 + : "c"(div) // %1 + : "memory", "cc", "edx"); return num; } // Divide num - 1 by div - 1 and return as 16.16 fixed point result. int FixedDiv1_X86(int num, int div) { - asm volatile ( - "cdq \n" - "shld $0x10,%%eax,%%edx \n" - "shl $0x10,%%eax \n" - "sub $0x10001,%%eax \n" - "sbb $0x0,%%edx \n" - "sub $0x1,%1 \n" - "idiv %1 \n" - "mov %0, %%eax \n" - : "+a"(num) // %0 - : "c"(div) // %1 - : "memory", "cc", "edx" - ); + asm volatile( + "cdq \n" + "shld $0x10,%%eax,%%edx \n" + "shl $0x10,%%eax \n" + "sub $0x10001,%%eax \n" + "sbb $0x0,%%edx \n" + "sub $0x1,%1 \n" + "idiv %1 \n" + "mov %0, %%eax \n" + : "+a"(num) // %0 + : "c"(div) // %1 + : "memory", "cc", "edx"); return num; } diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_mips.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_mips.cc deleted file mode 100644 index ae953073fa8d..000000000000 --- a/media/libvpx/libvpx/third_party/libyuv/source/scale_mips.cc +++ /dev/null @@ -1,644 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for GCC MIPS DSPR2 -#if !defined(LIBYUV_DISABLE_MIPS) && \ - defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \ - (_MIPS_SIM == _MIPS_SIM_ABI32) - -void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 4 \n" // iterations -> by 16 - "beqz $t9, 2f \n" - " nop \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28| - // TODO(fbarchard): Use odd pixels instead of even. - "precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0| - "precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8| - "precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16| - "precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "addiu $t9, $t9, -1 \n" - "sw $t8, 0(%[dst]) \n" - "sw $t0, 4(%[dst]) \n" - "sw $t1, 8(%[dst]) \n" - "sw $t2, 12(%[dst]) \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 16 \n" - - "2: \n" - "andi $t9, %[dst_width], 0xf \n" // residue - "beqz $t9, 3f \n" - " nop \n" - - "21: \n" - "lbu $t0, 0(%[src_ptr]) \n" - "addiu %[src_ptr], %[src_ptr], 2 \n" - "addiu $t9, $t9, -1 \n" - "sb $t0, 0(%[dst]) \n" - "bgtz $t9, 21b \n" - " addiu %[dst], %[dst], 1 \n" - - "3: \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [dst] "+r" (dst) - : [dst_width] "r" (dst_width) - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9" - ); -} - -void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - const uint8* t = src_ptr + src_stride; - - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 3 \n" // iterations -> step 8 - "bltz $t9, 2f \n" - " nop \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t4, 0(%[t]) \n" // |19|18|17|16| - "lw $t5, 4(%[t]) \n" // |23|22|21|20| - "lw $t6, 8(%[t]) \n" // |27|26|25|24| - "lw $t7, 12(%[t]) \n" // |31|30|29|28| - "addiu $t9, $t9, -1 \n" - "srl $t8, $t0, 16 \n" // |X|X|3|2| - "ins $t0, $t4, 16, 16 \n" // |17|16|1|0| - "ins $t4, $t8, 0, 16 \n" // |19|18|3|2| - "raddu.w.qb $t0, $t0 \n" // |17+16+1+0| - "raddu.w.qb $t4, $t4 \n" // |19+18+3+2| - "shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2 - "shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2 - "srl $t8, $t1, 16 \n" // |X|X|7|6| - "ins $t1, $t5, 16, 16 \n" // |21|20|5|4| - "ins $t5, $t8, 0, 16 \n" // |22|23|7|6| - "raddu.w.qb $t1, $t1 \n" // |21+20+5+4| - "raddu.w.qb $t5, $t5 \n" // |23+22+7+6| - "shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2 - "shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2 - "srl $t8, $t2, 16 \n" // |X|X|11|10| - "ins $t2, $t6, 16, 16 \n" // |25|24|9|8| - "ins $t6, $t8, 0, 16 \n" // |27|26|11|10| - "raddu.w.qb $t2, $t2 \n" // |25+24+9+8| - "raddu.w.qb $t6, $t6 \n" // |27+26+11+10| - "shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2 - "shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2 - "srl $t8, $t3, 16 \n" // |X|X|15|14| - "ins $t3, $t7, 16, 16 \n" // |29|28|13|12| - "ins $t7, $t8, 0, 16 \n" // |31|30|15|14| - "raddu.w.qb $t3, $t3 \n" // |29+28+13+12| - "raddu.w.qb $t7, $t7 \n" // |31+30+15+14| - "shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2 - "shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2 - "addiu %[src_ptr], %[src_ptr], 16 \n" - "addiu %[t], %[t], 16 \n" - "sb $t0, 0(%[dst]) \n" - "sb $t4, 1(%[dst]) \n" - "sb $t1, 2(%[dst]) \n" - "sb $t5, 3(%[dst]) \n" - "sb $t2, 4(%[dst]) \n" - "sb $t6, 5(%[dst]) \n" - "sb $t3, 6(%[dst]) \n" - "sb $t7, 7(%[dst]) \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 8 \n" - - "2: \n" - "andi $t9, %[dst_width], 0x7 \n" // x = residue - "beqz $t9, 3f \n" - " nop \n" - - "21: \n" - "lwr $t1, 0(%[src_ptr]) \n" - "lwl $t1, 3(%[src_ptr]) \n" - "lwr $t2, 0(%[t]) \n" - "lwl $t2, 3(%[t]) \n" - "srl $t8, $t1, 16 \n" - "ins $t1, $t2, 16, 16 \n" - "ins $t2, $t8, 0, 16 \n" - "raddu.w.qb $t1, $t1 \n" - "raddu.w.qb $t2, $t2 \n" - "shra_r.w $t1, $t1, 2 \n" - "shra_r.w $t2, $t2, 2 \n" - "sb $t1, 0(%[dst]) \n" - "sb $t2, 1(%[dst]) \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "addiu $t9, $t9, -2 \n" - "addiu %[t], %[t], 4 \n" - "bgtz $t9, 21b \n" - " addiu %[dst], %[dst], 2 \n" - - "3: \n" - ".set pop \n" - - : [src_ptr] "+r" (src_ptr), - [dst] "+r" (dst), [t] "+r" (t) - : [dst_width] "r" (dst_width) - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9" - ); -} - -void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 3 \n" - "beqz $t9, 2f \n" - " nop \n" - - "1: \n" - "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28| - "precr.qb.ph $t1, $t2, $t1 \n" // |6|4|2|0| - "precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8| - "precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16| - "precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24| - "precr.qb.ph $t1, $t2, $t1 \n" // |12|8|4|0| - "precr.qb.ph $t5, $t6, $t5 \n" // |28|24|20|16| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "addiu $t9, $t9, -1 \n" - "sw $t1, 0(%[dst]) \n" - "sw $t5, 4(%[dst]) \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 8 \n" - - "2: \n" - "andi $t9, %[dst_width], 7 \n" // residue - "beqz $t9, 3f \n" - " nop \n" - - "21: \n" - "lbu $t1, 0(%[src_ptr]) \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "addiu $t9, $t9, -1 \n" - "sb $t1, 0(%[dst]) \n" - "bgtz $t9, 21b \n" - " addiu %[dst], %[dst], 1 \n" - - "3: \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [dst] "+r" (dst) - : [dst_width] "r" (dst_width) - : "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9" - ); -} - -void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - intptr_t stride = src_stride; - const uint8* s1 = src_ptr + stride; - const uint8* s2 = s1 + stride; - const uint8* s3 = s2 + stride; - - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 1 \n" - "andi $t8, %[dst_width], 1 \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 0(%[s1]) \n" // |7|6|5|4| - "lw $t2, 0(%[s2]) \n" // |11|10|9|8| - "lw $t3, 0(%[s3]) \n" // |15|14|13|12| - "lw $t4, 4(%[src_ptr]) \n" // |19|18|17|16| - "lw $t5, 4(%[s1]) \n" // |23|22|21|20| - "lw $t6, 4(%[s2]) \n" // |27|26|25|24| - "lw $t7, 4(%[s3]) \n" // |31|30|29|28| - "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0| - "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4| - "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8| - "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12| - "raddu.w.qb $t4, $t4 \n" // |19 + 18 + 17 + 16| - "raddu.w.qb $t5, $t5 \n" // |23 + 22 + 21 + 20| - "raddu.w.qb $t6, $t6 \n" // |27 + 26 + 25 + 24| - "raddu.w.qb $t7, $t7 \n" // |31 + 30 + 29 + 28| - "add $t0, $t0, $t1 \n" - "add $t1, $t2, $t3 \n" - "add $t0, $t0, $t1 \n" - "add $t4, $t4, $t5 \n" - "add $t6, $t6, $t7 \n" - "add $t4, $t4, $t6 \n" - "shra_r.w $t0, $t0, 4 \n" - "shra_r.w $t4, $t4, 4 \n" - "sb $t0, 0(%[dst]) \n" - "sb $t4, 1(%[dst]) \n" - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[s1], %[s1], 8 \n" - "addiu %[s2], %[s2], 8 \n" - "addiu %[s3], %[s3], 8 \n" - "addiu $t9, $t9, -1 \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 2 \n" - "beqz $t8, 2f \n" - " nop \n" - - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 0(%[s1]) \n" // |7|6|5|4| - "lw $t2, 0(%[s2]) \n" // |11|10|9|8| - "lw $t3, 0(%[s3]) \n" // |15|14|13|12| - "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0| - "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4| - "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8| - "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12| - "add $t0, $t0, $t1 \n" - "add $t1, $t2, $t3 \n" - "add $t0, $t0, $t1 \n" - "shra_r.w $t0, $t0, 4 \n" - "sb $t0, 0(%[dst]) \n" - - "2: \n" - ".set pop \n" - - : [src_ptr] "+r" (src_ptr), - [dst] "+r" (dst), - [s1] "+r" (s1), - [s2] "+r" (s2), - [s3] "+r" (s3) - : [dst_width] "r" (dst_width) - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6","t7", "t8", "t9" - ); -} - -void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "1: \n" - "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28| - "precrq.qb.ph $t0, $t2, $t4 \n" // |7|5|15|13| - "precrq.qb.ph $t9, $t6, $t8 \n" // |23|21|31|30| - "addiu %[dst_width], %[dst_width], -24 \n" - "ins $t1, $t1, 8, 16 \n" // |3|1|0|X| - "ins $t4, $t0, 8, 16 \n" // |X|15|13|12| - "ins $t5, $t5, 8, 16 \n" // |19|17|16|X| - "ins $t8, $t9, 8, 16 \n" // |X|31|29|28| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "packrl.ph $t0, $t3, $t0 \n" // |9|8|7|5| - "packrl.ph $t9, $t7, $t9 \n" // |25|24|23|21| - "prepend $t1, $t2, 8 \n" // |4|3|1|0| - "prepend $t3, $t4, 24 \n" // |15|13|12|11| - "prepend $t5, $t6, 8 \n" // |20|19|17|16| - "prepend $t7, $t8, 24 \n" // |31|29|28|27| - "sw $t1, 0(%[dst]) \n" - "sw $t0, 4(%[dst]) \n" - "sw $t3, 8(%[dst]) \n" - "sw $t5, 12(%[dst]) \n" - "sw $t9, 16(%[dst]) \n" - "sw $t7, 20(%[dst]) \n" - "bnez %[dst_width], 1b \n" - " addiu %[dst], %[dst], 24 \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [dst] "+r" (dst), - [dst_width] "+r" (dst_width) - : - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6","t7", "t8", "t9" - ); -} - -void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "repl.ph $t3, 3 \n" // 0x00030003 - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| - "rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1| - "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| - "muleu_s.ph.qbl $t4, $t2, $t3 \n" // |S0*3|S3*3| - "muleu_s.ph.qbl $t5, $t6, $t3 \n" // |T0*3|T3*3| - "andi $t0, $t2, 0xFFFF \n" // |0|0|S2|S1| - "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1| - "raddu.w.qb $t0, $t0 \n" - "raddu.w.qb $t1, $t1 \n" - "shra_r.w $t0, $t0, 1 \n" - "shra_r.w $t1, $t1, 1 \n" - "preceu.ph.qbr $t2, $t2 \n" // |0|S2|0|S1| - "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1| - "rotr $t2, $t2, 16 \n" // |0|S1|0|S2| - "rotr $t6, $t6, 16 \n" // |0|T1|0|T2| - "addu.ph $t2, $t2, $t4 \n" - "addu.ph $t6, $t6, $t5 \n" - "sll $t5, $t0, 1 \n" - "add $t0, $t5, $t0 \n" - "shra_r.ph $t2, $t2, 2 \n" - "shra_r.ph $t6, $t6, 2 \n" - "shll.ph $t4, $t2, 1 \n" - "addq.ph $t4, $t4, $t2 \n" - "addu $t0, $t0, $t1 \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "shra_r.w $t0, $t0, 2 \n" - "addu.ph $t6, $t6, $t4 \n" - "shra_r.ph $t6, $t6, 2 \n" - "srl $t1, $t6, 16 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "sb $t1, 0(%[d]) \n" - "sb $t0, 1(%[d]) \n" - "sb $t6, 2(%[d]) \n" - "bgtz %[dst_width], 1b \n" - " addiu %[d], %[d], 3 \n" - "3: \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [src_stride] "+r" (src_stride), - [d] "+r" (d), - [dst_width] "+r" (dst_width) - : - : "t0", "t1", "t2", "t3", - "t4", "t5", "t6" - ); -} - -void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "repl.ph $t2, 3 \n" // 0x00030003 - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| - "rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1| - "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| - "muleu_s.ph.qbl $t3, $t4, $t2 \n" // |S0*3|S3*3| - "muleu_s.ph.qbl $t5, $t6, $t2 \n" // |T0*3|T3*3| - "andi $t0, $t4, 0xFFFF \n" // |0|0|S2|S1| - "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1| - "raddu.w.qb $t0, $t0 \n" - "raddu.w.qb $t1, $t1 \n" - "shra_r.w $t0, $t0, 1 \n" - "shra_r.w $t1, $t1, 1 \n" - "preceu.ph.qbr $t4, $t4 \n" // |0|S2|0|S1| - "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1| - "rotr $t4, $t4, 16 \n" // |0|S1|0|S2| - "rotr $t6, $t6, 16 \n" // |0|T1|0|T2| - "addu.ph $t4, $t4, $t3 \n" - "addu.ph $t6, $t6, $t5 \n" - "shra_r.ph $t6, $t6, 2 \n" - "shra_r.ph $t4, $t4, 2 \n" - "addu.ph $t6, $t6, $t4 \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "shra_r.ph $t6, $t6, 1 \n" - "addu $t0, $t0, $t1 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "shra_r.w $t0, $t0, 1 \n" - "srl $t1, $t6, 16 \n" - "sb $t1, 0(%[d]) \n" - "sb $t0, 1(%[d]) \n" - "sb $t6, 2(%[d]) \n" - "bgtz %[dst_width], 1b \n" - " addiu %[d], %[d], 3 \n" - "3: \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [src_stride] "+r" (src_stride), - [d] "+r" (d), - [dst_width] "+r" (dst_width) - : - : "t0", "t1", "t2", "t3", - "t4", "t5", "t6" - ); -} - -void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28| - "wsbh $t0, $t0 \n" // |2|3|0|1| - "wsbh $t6, $t6 \n" // |26|27|24|25| - "srl $t0, $t0, 8 \n" // |X|2|3|0| - "srl $t3, $t3, 16 \n" // |X|X|15|14| - "srl $t5, $t5, 16 \n" // |X|X|23|22| - "srl $t7, $t7, 16 \n" // |X|X|31|30| - "ins $t1, $t2, 24, 8 \n" // |8|6|5|4| - "ins $t6, $t5, 0, 8 \n" // |26|27|24|22| - "ins $t1, $t0, 0, 16 \n" // |8|6|3|0| - "ins $t6, $t7, 24, 8 \n" // |30|27|24|22| - "prepend $t2, $t3, 24 \n" // |X|15|14|11| - "ins $t4, $t4, 16, 8 \n" // |19|16|17|X| - "ins $t4, $t2, 0, 16 \n" // |19|16|14|11| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "addiu %[dst_width], %[dst_width], -12 \n" - "addiu $t8,%[dst_width], -12 \n" - "sw $t1, 0(%[dst]) \n" - "sw $t4, 4(%[dst]) \n" - "sw $t6, 8(%[dst]) \n" - "bgez $t8, 1b \n" - " addiu %[dst], %[dst], 12 \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [dst] "+r" (dst), - [dst_width] "+r" (dst_width) - : - : "t0", "t1", "t2", "t3", "t4", - "t5", "t6", "t7", "t8" - ); -} - -void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - intptr_t stride = src_stride; - const uint8* t = src_ptr + stride; - const int c = 0x2AAA; - - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4| - "lw $t2, 0(%[t]) \n" // |T3|T2|T1|T0| - "lw $t3, 4(%[t]) \n" // |T7|T6|T5|T4| - "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6| - "packrl.ph $t4, $t1, $t3 \n" // |S7|S6|T7|T6| - "packrl.ph $t5, $t3, $t1 \n" // |T5|T4|S5|S4| - "raddu.w.qb $t4, $t4 \n" // S7+S6+T7+T6 - "raddu.w.qb $t5, $t5 \n" // T5+T4+S5+S4 - "precrq.qb.ph $t6, $t0, $t2 \n" // |S3|S1|T3|T1| - "precrq.qb.ph $t6, $t6, $t6 \n" // |S3|T3|S3|T3| - "srl $t4, $t4, 2 \n" // t4 / 4 - "srl $t6, $t6, 16 \n" // |0|0|S3|T3| - "raddu.w.qb $t6, $t6 \n" // 0+0+S3+T3 - "addu $t6, $t5, $t6 \n" - "mul $t6, $t6, %[c] \n" // t6 * 0x2AAA - "sll $t0, $t0, 8 \n" // |S2|S1|S0|0| - "sll $t2, $t2, 8 \n" // |T2|T1|T0|0| - "raddu.w.qb $t0, $t0 \n" // S2+S1+S0+0 - "raddu.w.qb $t2, $t2 \n" // T2+T1+T0+0 - "addu $t0, $t0, $t2 \n" - "mul $t0, $t0, %[c] \n" // t0 * 0x2AAA - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[t], %[t], 8 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "addiu %[dst_ptr], %[dst_ptr], 3 \n" - "srl $t6, $t6, 16 \n" - "srl $t0, $t0, 16 \n" - "sb $t4, -1(%[dst_ptr]) \n" - "sb $t6, -2(%[dst_ptr]) \n" - "bgtz %[dst_width], 1b \n" - " sb $t0, -3(%[dst_ptr]) \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [dst_ptr] "+r" (dst_ptr), - [t] "+r" (t), - [dst_width] "+r" (dst_width) - : [c] "r" (c) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6" - ); -} - -void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - intptr_t stride = src_stride; - const uint8* s1 = src_ptr + stride; - stride += stride; - const uint8* s2 = src_ptr + stride; - const int c1 = 0x1C71; - const int c2 = 0x2AAA; - - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4| - "lw $t2, 0(%[s1]) \n" // |T3|T2|T1|T0| - "lw $t3, 4(%[s1]) \n" // |T7|T6|T5|T4| - "lw $t4, 0(%[s2]) \n" // |R3|R2|R1|R0| - "lw $t5, 4(%[s2]) \n" // |R7|R6|R5|R4| - "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6| - "packrl.ph $t6, $t1, $t3 \n" // |S7|S6|T7|T6| - "raddu.w.qb $t6, $t6 \n" // S7+S6+T7+T6 - "packrl.ph $t7, $t3, $t1 \n" // |T5|T4|S5|S4| - "raddu.w.qb $t7, $t7 \n" // T5+T4+S5+S4 - "sll $t8, $t5, 16 \n" // |R5|R4|0|0| - "raddu.w.qb $t8, $t8 \n" // R5+R4 - "addu $t7, $t7, $t8 \n" - "srl $t8, $t5, 16 \n" // |0|0|R7|R6| - "raddu.w.qb $t8, $t8 \n" // R7 + R6 - "addu $t6, $t6, $t8 \n" - "mul $t6, $t6, %[c2] \n" // t6 * 0x2AAA - "precrq.qb.ph $t8, $t0, $t2 \n" // |S3|S1|T3|T1| - "precrq.qb.ph $t8, $t8, $t4 \n" // |S3|T3|R3|R1| - "srl $t8, $t8, 8 \n" // |0|S3|T3|R3| - "raddu.w.qb $t8, $t8 \n" // S3 + T3 + R3 - "addu $t7, $t7, $t8 \n" - "mul $t7, $t7, %[c1] \n" // t7 * 0x1C71 - "sll $t0, $t0, 8 \n" // |S2|S1|S0|0| - "sll $t2, $t2, 8 \n" // |T2|T1|T0|0| - "sll $t4, $t4, 8 \n" // |R2|R1|R0|0| - "raddu.w.qb $t0, $t0 \n" - "raddu.w.qb $t2, $t2 \n" - "raddu.w.qb $t4, $t4 \n" - "addu $t0, $t0, $t2 \n" - "addu $t0, $t0, $t4 \n" - "mul $t0, $t0, %[c1] \n" // t0 * 0x1C71 - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[s1], %[s1], 8 \n" - "addiu %[s2], %[s2], 8 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "addiu %[dst_ptr], %[dst_ptr], 3 \n" - "srl $t6, $t6, 16 \n" - "srl $t7, $t7, 16 \n" - "srl $t0, $t0, 16 \n" - "sb $t6, -1(%[dst_ptr]) \n" - "sb $t7, -2(%[dst_ptr]) \n" - "bgtz %[dst_width], 1b \n" - " sb $t0, -3(%[dst_ptr]) \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [dst_ptr] "+r" (dst_ptr), - [s1] "+r" (s1), - [s2] "+r" (s2), - [dst_width] "+r" (dst_width) - : [c1] "r" (c1), [c2] "r" (c2) - : "t0", "t1", "t2", "t3", "t4", - "t5", "t6", "t7", "t8" - ); -} - -#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_msa.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_msa.cc new file mode 100644 index 000000000000..482a521f0d2f --- /dev/null +++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_msa.cc @@ -0,0 +1,949 @@ +/* + * Copyright 2016 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "libyuv/scale_row.h" + +// This module is for GCC MSA +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#include "libyuv/macros_msa.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +#define LOAD_INDEXED_DATA(srcp, indx0, out0) \ + { \ + out0[0] = srcp[indx0[0]]; \ + out0[1] = srcp[indx0[1]]; \ + out0[2] = srcp[indx0[2]]; \ + out0[3] = srcp[indx0[3]]; \ + } + +void ScaleARGBRowDown2_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { + int x; + v16u8 src0, src1, dst0; + (void)src_stride; + + for (x = 0; x < dst_width; x += 4) { + src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); + dst0 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); + ST_UB(dst0, dst_argb); + src_argb += 32; + dst_argb += 16; + } +} + +void ScaleARGBRowDown2Linear_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { + int x; + v16u8 src0, src1, vec0, vec1, dst0; + (void)src_stride; + + for (x = 0; x < dst_width; x += 4) { + src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); + vec0 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0); + vec1 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); + dst0 = (v16u8)__msa_aver_u_b((v16u8)vec0, (v16u8)vec1); + ST_UB(dst0, dst_argb); + src_argb += 32; + dst_argb += 16; + } +} + +void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { + int x; + const uint8_t* s = src_argb; + const uint8_t* t = src_argb + src_stride; + v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0; + v8u16 reg0, reg1, reg2, reg3; + v16i8 shuffler = {0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15}; + + for (x = 0; x < dst_width; x += 4) { + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)t, 0); + src3 = (v16u8)__msa_ld_b((v16i8*)t, 16); + vec0 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src0, (v16i8)src0); + vec1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src1, (v16i8)src1); + vec2 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src2, (v16i8)src2); + vec3 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src3, (v16i8)src3); + reg0 = __msa_hadd_u_h(vec0, vec0); + reg1 = __msa_hadd_u_h(vec1, vec1); + reg2 = __msa_hadd_u_h(vec2, vec2); + reg3 = __msa_hadd_u_h(vec3, vec3); + reg0 += reg2; + reg1 += reg3; + reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 2); + reg1 = (v8u16)__msa_srari_h((v8i16)reg1, 2); + dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); + ST_UB(dst0, dst_argb); + s += 32; + t += 32; + dst_argb += 16; + } +} + +void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_argb, + int dst_width) { + int x; + int32_t stepx = src_stepx * 4; + int32_t data0, data1, data2, data3; + (void)src_stride; + + for (x = 0; x < dst_width; x += 4) { + data0 = LW(src_argb); + data1 = LW(src_argb + stepx); + data2 = LW(src_argb + stepx * 2); + data3 = LW(src_argb + stepx * 3); + SW(data0, dst_argb); + SW(data1, dst_argb + 4); + SW(data2, dst_argb + 8); + SW(data3, dst_argb + 12); + src_argb += stepx * 4; + dst_argb += 16; + } +} + +void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width) { + int x; + const uint8_t* nxt_argb = src_argb + src_stride; + int32_t stepx = src_stepx * 4; + int64_t data0, data1, data2, data3; + v16u8 src0 = {0}, src1 = {0}, src2 = {0}, src3 = {0}; + v16u8 vec0, vec1, vec2, vec3; + v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; + v16u8 dst0; + + for (x = 0; x < dst_width; x += 4) { + data0 = LD(src_argb); + data1 = LD(src_argb + stepx); + data2 = LD(src_argb + stepx * 2); + data3 = LD(src_argb + stepx * 3); + src0 = (v16u8)__msa_insert_d((v2i64)src0, 0, data0); + src0 = (v16u8)__msa_insert_d((v2i64)src0, 1, data1); + src1 = (v16u8)__msa_insert_d((v2i64)src1, 0, data2); + src1 = (v16u8)__msa_insert_d((v2i64)src1, 1, data3); + data0 = LD(nxt_argb); + data1 = LD(nxt_argb + stepx); + data2 = LD(nxt_argb + stepx * 2); + data3 = LD(nxt_argb + stepx * 3); + src2 = (v16u8)__msa_insert_d((v2i64)src2, 0, data0); + src2 = (v16u8)__msa_insert_d((v2i64)src2, 1, data1); + src3 = (v16u8)__msa_insert_d((v2i64)src3, 0, data2); + src3 = (v16u8)__msa_insert_d((v2i64)src3, 1, data3); + vec0 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src0); + vec1 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src1); + vec2 = (v16u8)__msa_ilvl_b((v16i8)src2, (v16i8)src0); + vec3 = (v16u8)__msa_ilvl_b((v16i8)src3, (v16i8)src1); + reg0 = __msa_hadd_u_h(vec0, vec0); + reg1 = __msa_hadd_u_h(vec1, vec1); + reg2 = __msa_hadd_u_h(vec2, vec2); + reg3 = __msa_hadd_u_h(vec3, vec3); + reg4 = (v8u16)__msa_pckev_d((v2i64)reg2, (v2i64)reg0); + reg5 = (v8u16)__msa_pckev_d((v2i64)reg3, (v2i64)reg1); + reg6 = (v8u16)__msa_pckod_d((v2i64)reg2, (v2i64)reg0); + reg7 = (v8u16)__msa_pckod_d((v2i64)reg3, (v2i64)reg1); + reg4 += reg6; + reg5 += reg7; + reg4 = (v8u16)__msa_srari_h((v8i16)reg4, 2); + reg5 = (v8u16)__msa_srari_h((v8i16)reg5, 2); + dst0 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); + ST_UB(dst0, dst_argb); + src_argb += stepx * 4; + nxt_argb += stepx * 4; + dst_argb += 16; + } +} + +void ScaleRowDown2_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + int x; + v16u8 src0, src1, src2, src3, dst0, dst1; + (void)src_stride; + + for (x = 0; x < dst_width; x += 32) { + src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32); + src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48); + dst0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); + dst1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); + ST_UB2(dst0, dst1, dst, 16); + src_ptr += 64; + dst += 32; + } +} + +void ScaleRowDown2Linear_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + int x; + v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0, dst1; + (void)src_stride; + + for (x = 0; x < dst_width; x += 32) { + src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32); + src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48); + vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); + vec2 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); + vec1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); + vec3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); + dst0 = __msa_aver_u_b(vec1, vec0); + dst1 = __msa_aver_u_b(vec3, vec2); + ST_UB2(dst0, dst1, dst, 16); + src_ptr += 64; + dst += 32; + } +} + +void ScaleRowDown2Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + int x; + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; + v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1; + v8u16 vec0, vec1, vec2, vec3; + + for (x = 0; x < dst_width; x += 32) { + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)s, 32); + src3 = (v16u8)__msa_ld_b((v16i8*)s, 48); + src4 = (v16u8)__msa_ld_b((v16i8*)t, 0); + src5 = (v16u8)__msa_ld_b((v16i8*)t, 16); + src6 = (v16u8)__msa_ld_b((v16i8*)t, 32); + src7 = (v16u8)__msa_ld_b((v16i8*)t, 48); + vec0 = __msa_hadd_u_h(src0, src0); + vec1 = __msa_hadd_u_h(src1, src1); + vec2 = __msa_hadd_u_h(src2, src2); + vec3 = __msa_hadd_u_h(src3, src3); + vec0 += __msa_hadd_u_h(src4, src4); + vec1 += __msa_hadd_u_h(src5, src5); + vec2 += __msa_hadd_u_h(src6, src6); + vec3 += __msa_hadd_u_h(src7, src7); + vec0 = (v8u16)__msa_srari_h((v8i16)vec0, 2); + vec1 = (v8u16)__msa_srari_h((v8i16)vec1, 2); + vec2 = (v8u16)__msa_srari_h((v8i16)vec2, 2); + vec3 = (v8u16)__msa_srari_h((v8i16)vec3, 2); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); + ST_UB2(dst0, dst1, dst, 16); + s += 64; + t += 64; + dst += 32; + } +} + +void ScaleRowDown4_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + int x; + v16u8 src0, src1, src2, src3, vec0, vec1, dst0; + (void)src_stride; + + for (x = 0; x < dst_width; x += 16) { + src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32); + src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48); + vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); + vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); + dst0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); + ST_UB(dst0, dst); + src_ptr += 64; + dst += 16; + } +} + +void ScaleRowDown4Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + int x; + const uint8_t* s = src_ptr; + const uint8_t* t0 = s + src_stride; + const uint8_t* t1 = s + src_stride * 2; + const uint8_t* t2 = s + src_stride * 3; + v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0; + v8u16 vec0, vec1, vec2, vec3; + v4u32 reg0, reg1, reg2, reg3; + + for (x = 0; x < dst_width; x += 16) { + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)s, 32); + src3 = (v16u8)__msa_ld_b((v16i8*)s, 48); + src4 = (v16u8)__msa_ld_b((v16i8*)t0, 0); + src5 = (v16u8)__msa_ld_b((v16i8*)t0, 16); + src6 = (v16u8)__msa_ld_b((v16i8*)t0, 32); + src7 = (v16u8)__msa_ld_b((v16i8*)t0, 48); + vec0 = __msa_hadd_u_h(src0, src0); + vec1 = __msa_hadd_u_h(src1, src1); + vec2 = __msa_hadd_u_h(src2, src2); + vec3 = __msa_hadd_u_h(src3, src3); + vec0 += __msa_hadd_u_h(src4, src4); + vec1 += __msa_hadd_u_h(src5, src5); + vec2 += __msa_hadd_u_h(src6, src6); + vec3 += __msa_hadd_u_h(src7, src7); + src0 = (v16u8)__msa_ld_b((v16i8*)t1, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)t1, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)t1, 32); + src3 = (v16u8)__msa_ld_b((v16i8*)t1, 48); + src4 = (v16u8)__msa_ld_b((v16i8*)t2, 0); + src5 = (v16u8)__msa_ld_b((v16i8*)t2, 16); + src6 = (v16u8)__msa_ld_b((v16i8*)t2, 32); + src7 = (v16u8)__msa_ld_b((v16i8*)t2, 48); + vec0 += __msa_hadd_u_h(src0, src0); + vec1 += __msa_hadd_u_h(src1, src1); + vec2 += __msa_hadd_u_h(src2, src2); + vec3 += __msa_hadd_u_h(src3, src3); + vec0 += __msa_hadd_u_h(src4, src4); + vec1 += __msa_hadd_u_h(src5, src5); + vec2 += __msa_hadd_u_h(src6, src6); + vec3 += __msa_hadd_u_h(src7, src7); + reg0 = __msa_hadd_u_w(vec0, vec0); + reg1 = __msa_hadd_u_w(vec1, vec1); + reg2 = __msa_hadd_u_w(vec2, vec2); + reg3 = __msa_hadd_u_w(vec3, vec3); + reg0 = (v4u32)__msa_srari_w((v4i32)reg0, 4); + reg1 = (v4u32)__msa_srari_w((v4i32)reg1, 4); + reg2 = (v4u32)__msa_srari_w((v4i32)reg2, 4); + reg3 = (v4u32)__msa_srari_w((v4i32)reg3, 4); + vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); + vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); + dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); + ST_UB(dst0, dst); + s += 64; + t0 += 64; + t1 += 64; + t2 += 64; + dst += 16; + } +} + +void ScaleRowDown38_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + int x, width; + uint64_t dst0; + uint32_t dst1; + v16u8 src0, src1, vec0; + v16i8 mask = {0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0}; + (void)src_stride; + + assert(dst_width % 3 == 0); + width = dst_width / 3; + + for (x = 0; x < width; x += 4) { + src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); + vec0 = (v16u8)__msa_vshf_b(mask, (v16i8)src1, (v16i8)src0); + dst0 = __msa_copy_u_d((v2i64)vec0, 0); + dst1 = __msa_copy_u_w((v4i32)vec0, 2); + SD(dst0, dst); + SW(dst1, dst + 8); + src_ptr += 32; + dst += 12; + } +} + +void ScaleRowDown38_2_Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + int x, width; + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; + uint64_t dst0; + uint32_t dst1; + v16u8 src0, src1, src2, src3, out; + v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; + v4u32 tmp0, tmp1, tmp2, tmp3, tmp4; + v8i16 zero = {0}; + v8i16 mask = {0, 1, 2, 8, 3, 4, 5, 9}; + v16i8 dst_mask = {0, 2, 16, 4, 6, 18, 8, 10, 20, 12, 14, 22, 0, 0, 0, 0}; + v4u32 const_0x2AAA = (v4u32)__msa_fill_w(0x2AAA); + v4u32 const_0x4000 = (v4u32)__msa_fill_w(0x4000); + + assert((dst_width % 3 == 0) && (dst_width > 0)); + width = dst_width / 3; + + for (x = 0; x < width; x += 4) { + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)t, 0); + src3 = (v16u8)__msa_ld_b((v16i8*)t, 16); + vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); + vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); + vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); + vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); + vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); + vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); + vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); + vec3 = __msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); + vec4 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec0); + vec5 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec1); + vec6 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec2); + vec7 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec3); + vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0); + vec1 = (v8u16)__msa_pckod_w((v4i32)vec3, (v4i32)vec2); + vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0); + tmp0 = __msa_hadd_u_w(vec4, vec4); + tmp1 = __msa_hadd_u_w(vec5, vec5); + tmp2 = __msa_hadd_u_w(vec6, vec6); + tmp3 = __msa_hadd_u_w(vec7, vec7); + tmp4 = __msa_hadd_u_w(vec0, vec0); + vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); + vec1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); + tmp0 = __msa_hadd_u_w(vec0, vec0); + tmp1 = __msa_hadd_u_w(vec1, vec1); + tmp0 *= const_0x2AAA; + tmp1 *= const_0x2AAA; + tmp4 *= const_0x4000; + tmp0 = (v4u32)__msa_srai_w((v4i32)tmp0, 16); + tmp1 = (v4u32)__msa_srai_w((v4i32)tmp1, 16); + tmp4 = (v4u32)__msa_srai_w((v4i32)tmp4, 16); + vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); + vec1 = (v8u16)__msa_pckev_h((v8i16)tmp4, (v8i16)tmp4); + out = (v16u8)__msa_vshf_b(dst_mask, (v16i8)vec1, (v16i8)vec0); + dst0 = __msa_copy_u_d((v2i64)out, 0); + dst1 = __msa_copy_u_w((v4i32)out, 2); + SD(dst0, dst_ptr); + SW(dst1, dst_ptr + 8); + s += 32; + t += 32; + dst_ptr += 12; + } +} + +void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + int x, width; + const uint8_t* s = src_ptr; + const uint8_t* t0 = s + src_stride; + const uint8_t* t1 = s + src_stride * 2; + uint64_t dst0; + uint32_t dst1; + v16u8 src0, src1, src2, src3, src4, src5, out; + v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; + v4u32 tmp0, tmp1, tmp2, tmp3, tmp4; + v8u16 zero = {0}; + v8i16 mask = {0, 1, 2, 8, 3, 4, 5, 9}; + v16i8 dst_mask = {0, 2, 16, 4, 6, 18, 8, 10, 20, 12, 14, 22, 0, 0, 0, 0}; + v4u32 const_0x1C71 = (v4u32)__msa_fill_w(0x1C71); + v4u32 const_0x2AAA = (v4u32)__msa_fill_w(0x2AAA); + + assert((dst_width % 3 == 0) && (dst_width > 0)); + width = dst_width / 3; + + for (x = 0; x < width; x += 4) { + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)t0, 0); + src3 = (v16u8)__msa_ld_b((v16i8*)t0, 16); + src4 = (v16u8)__msa_ld_b((v16i8*)t1, 0); + src5 = (v16u8)__msa_ld_b((v16i8*)t1, 16); + vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); + vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); + vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); + vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); + vec4 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src4); + vec5 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src4); + vec6 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src5); + vec7 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src5); + vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); + vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); + vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); + vec3 = __msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); + vec0 += __msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); + vec1 += __msa_hadd_u_h((v16u8)vec5, (v16u8)vec5); + vec2 += __msa_hadd_u_h((v16u8)vec6, (v16u8)vec6); + vec3 += __msa_hadd_u_h((v16u8)vec7, (v16u8)vec7); + vec4 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec0); + vec5 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec1); + vec6 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec2); + vec7 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec3); + vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0); + vec1 = (v8u16)__msa_pckod_w((v4i32)vec3, (v4i32)vec2); + vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0); + tmp0 = __msa_hadd_u_w(vec4, vec4); + tmp1 = __msa_hadd_u_w(vec5, vec5); + tmp2 = __msa_hadd_u_w(vec6, vec6); + tmp3 = __msa_hadd_u_w(vec7, vec7); + tmp4 = __msa_hadd_u_w(vec0, vec0); + vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); + vec1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); + tmp0 = __msa_hadd_u_w(vec0, vec0); + tmp1 = __msa_hadd_u_w(vec1, vec1); + tmp0 *= const_0x1C71; + tmp1 *= const_0x1C71; + tmp4 *= const_0x2AAA; + tmp0 = (v4u32)__msa_srai_w((v4i32)tmp0, 16); + tmp1 = (v4u32)__msa_srai_w((v4i32)tmp1, 16); + tmp4 = (v4u32)__msa_srai_w((v4i32)tmp4, 16); + vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); + vec1 = (v8u16)__msa_pckev_h((v8i16)tmp4, (v8i16)tmp4); + out = (v16u8)__msa_vshf_b(dst_mask, (v16i8)vec1, (v16i8)vec0); + dst0 = __msa_copy_u_d((v2i64)out, 0); + dst1 = __msa_copy_u_w((v4i32)out, 2); + SD(dst0, dst_ptr); + SW(dst1, dst_ptr + 8); + s += 32; + t0 += 32; + t1 += 32; + dst_ptr += 12; + } +} + +void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { + int x; + v16u8 src0; + v8u16 dst0, dst1; + v16i8 zero = {0}; + + assert(src_width > 0); + + for (x = 0; x < src_width; x += 16) { + src0 = LD_UB(src_ptr); + dst0 = (v8u16)__msa_ld_h((v8i16*)dst_ptr, 0); + dst1 = (v8u16)__msa_ld_h((v8i16*)dst_ptr, 16); + dst0 += (v8u16)__msa_ilvr_b(zero, (v16i8)src0); + dst1 += (v8u16)__msa_ilvl_b(zero, (v16i8)src0); + ST_UH2(dst0, dst1, dst_ptr, 8); + src_ptr += 16; + dst_ptr += 16; + } +} + +void ScaleFilterCols_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx) { + int j; + v4i32 vec_x = __msa_fill_w(x); + v4i32 vec_dx = __msa_fill_w(dx); + v4i32 vec_const = {0, 1, 2, 3}; + v4i32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; + v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + v8u16 reg0, reg1; + v16u8 dst0; + v4i32 const_0xFFFF = __msa_fill_w(0xFFFF); + v4i32 const_0x40 = __msa_fill_w(0x40); + + vec0 = vec_dx * vec_const; + vec1 = vec_dx * 4; + vec_x += vec0; + + for (j = 0; j < dst_width - 1; j += 16) { + vec2 = vec_x >> 16; + vec6 = vec_x & const_0xFFFF; + vec_x += vec1; + vec3 = vec_x >> 16; + vec7 = vec_x & const_0xFFFF; + vec_x += vec1; + vec4 = vec_x >> 16; + vec8 = vec_x & const_0xFFFF; + vec_x += vec1; + vec5 = vec_x >> 16; + vec9 = vec_x & const_0xFFFF; + vec_x += vec1; + vec6 >>= 9; + vec7 >>= 9; + vec8 >>= 9; + vec9 >>= 9; + LOAD_INDEXED_DATA(src_ptr, vec2, tmp0); + LOAD_INDEXED_DATA(src_ptr, vec3, tmp1); + LOAD_INDEXED_DATA(src_ptr, vec4, tmp2); + LOAD_INDEXED_DATA(src_ptr, vec5, tmp3); + vec2 += 1; + vec3 += 1; + vec4 += 1; + vec5 += 1; + LOAD_INDEXED_DATA(src_ptr, vec2, tmp4); + LOAD_INDEXED_DATA(src_ptr, vec3, tmp5); + LOAD_INDEXED_DATA(src_ptr, vec4, tmp6); + LOAD_INDEXED_DATA(src_ptr, vec5, tmp7); + tmp4 -= tmp0; + tmp5 -= tmp1; + tmp6 -= tmp2; + tmp7 -= tmp3; + tmp4 *= vec6; + tmp5 *= vec7; + tmp6 *= vec8; + tmp7 *= vec9; + tmp4 += const_0x40; + tmp5 += const_0x40; + tmp6 += const_0x40; + tmp7 += const_0x40; + tmp4 >>= 7; + tmp5 >>= 7; + tmp6 >>= 7; + tmp7 >>= 7; + tmp0 += tmp4; + tmp1 += tmp5; + tmp2 += tmp6; + tmp3 += tmp7; + reg0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); + reg1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); + dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); + __msa_st_b(dst0, dst_ptr, 0); + dst_ptr += 16; + } +} + +void ScaleARGBCols_MSA(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { + const uint32_t* src = (const uint32_t*)(src_argb); + uint32_t* dst = (uint32_t*)(dst_argb); + int j; + v4i32 x_vec = __msa_fill_w(x); + v4i32 dx_vec = __msa_fill_w(dx); + v4i32 const_vec = {0, 1, 2, 3}; + v4i32 vec0, vec1, vec2; + v4i32 dst0; + + vec0 = dx_vec * const_vec; + vec1 = dx_vec * 4; + x_vec += vec0; + + for (j = 0; j < dst_width; j += 4) { + vec2 = x_vec >> 16; + x_vec += vec1; + LOAD_INDEXED_DATA(src, vec2, dst0); + __msa_st_w(dst0, dst, 0); + dst += 4; + } +} + +void ScaleARGBFilterCols_MSA(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { + const uint32_t* src = (const uint32_t*)(src_argb); + int j; + v4u32 src0, src1, src2, src3; + v4u32 vec0, vec1, vec2, vec3; + v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; + v16u8 mult0, mult1, mult2, mult3; + v8u16 tmp0, tmp1, tmp2, tmp3; + v16u8 dst0, dst1; + v4u32 vec_x = (v4u32)__msa_fill_w(x); + v4u32 vec_dx = (v4u32)__msa_fill_w(dx); + v4u32 vec_const = {0, 1, 2, 3}; + v16u8 const_0x7f = (v16u8)__msa_fill_b(0x7f); + + vec0 = vec_dx * vec_const; + vec1 = vec_dx * 4; + vec_x += vec0; + + for (j = 0; j < dst_width - 1; j += 8) { + vec2 = vec_x >> 16; + reg0 = (v16u8)(vec_x >> 9); + vec_x += vec1; + vec3 = vec_x >> 16; + reg1 = (v16u8)(vec_x >> 9); + vec_x += vec1; + reg0 = reg0 & const_0x7f; + reg1 = reg1 & const_0x7f; + reg0 = (v16u8)__msa_shf_b((v16i8)reg0, 0); + reg1 = (v16u8)__msa_shf_b((v16i8)reg1, 0); + reg2 = reg0 ^ const_0x7f; + reg3 = reg1 ^ const_0x7f; + mult0 = (v16u8)__msa_ilvr_b((v16i8)reg0, (v16i8)reg2); + mult1 = (v16u8)__msa_ilvl_b((v16i8)reg0, (v16i8)reg2); + mult2 = (v16u8)__msa_ilvr_b((v16i8)reg1, (v16i8)reg3); + mult3 = (v16u8)__msa_ilvl_b((v16i8)reg1, (v16i8)reg3); + LOAD_INDEXED_DATA(src, vec2, src0); + LOAD_INDEXED_DATA(src, vec3, src1); + vec2 += 1; + vec3 += 1; + LOAD_INDEXED_DATA(src, vec2, src2); + LOAD_INDEXED_DATA(src, vec3, src3); + reg4 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src0); + reg5 = (v16u8)__msa_ilvl_b((v16i8)src2, (v16i8)src0); + reg6 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src1); + reg7 = (v16u8)__msa_ilvl_b((v16i8)src3, (v16i8)src1); + tmp0 = __msa_dotp_u_h(reg4, mult0); + tmp1 = __msa_dotp_u_h(reg5, mult1); + tmp2 = __msa_dotp_u_h(reg6, mult2); + tmp3 = __msa_dotp_u_h(reg7, mult3); + tmp0 >>= 7; + tmp1 >>= 7; + tmp2 >>= 7; + tmp3 >>= 7; + dst0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); + dst1 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2); + __msa_st_b(dst0, dst_argb, 0); + __msa_st_b(dst1, dst_argb, 16); + dst_argb += 32; + } +} + +void ScaleRowDown34_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + int x; + (void)src_stride; + v16u8 src0, src1, src2, src3; + v16u8 vec0, vec1, vec2; + v16i8 mask0 = {0, 1, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 19, 20}; + v16i8 mask1 = {5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25}; + v16i8 mask2 = {11, 12, 13, 15, 16, 17, 19, 20, + 21, 23, 24, 25, 27, 28, 29, 31}; + + assert((dst_width % 3 == 0) && (dst_width > 0)); + + for (x = 0; x < dst_width; x += 48) { + src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32); + src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48); + vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0); + vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src2, (v16i8)src1); + vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src2); + __msa_st_b((v16i8)vec0, dst, 0); + __msa_st_b((v16i8)vec1, dst, 16); + __msa_st_b((v16i8)vec2, dst, 32); + src_ptr += 64; + dst += 48; + } +} + +void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* d, + int dst_width) { + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; + int x; + v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1, dst2; + v16u8 vec0, vec1, vec2, vec3, vec4, vec5; + v16u8 vec6, vec7, vec8, vec9, vec10, vec11; + v8i16 reg0, reg1, reg2, reg3, reg4, reg5; + v8i16 reg6, reg7, reg8, reg9, reg10, reg11; + v16u8 const0 = {3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1}; + v16u8 const1 = {1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1}; + v16u8 const2 = {1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3}; + v16i8 mask0 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10}; + v16i8 mask1 = {10, 11, 12, 13, 13, 14, 14, 15, + 16, 17, 17, 18, 18, 19, 20, 21}; + v16i8 mask2 = {5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15}; + v8i16 shft0 = {2, 1, 2, 2, 1, 2, 2, 1}; + v8i16 shft1 = {2, 2, 1, 2, 2, 1, 2, 2}; + v8i16 shft2 = {1, 2, 2, 1, 2, 2, 1, 2}; + + assert((dst_width % 3 == 0) && (dst_width > 0)); + + for (x = 0; x < dst_width; x += 48) { + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)s, 32); + src3 = (v16u8)__msa_ld_b((v16i8*)s, 48); + src4 = (v16u8)__msa_ld_b((v16i8*)t, 0); + src5 = (v16u8)__msa_ld_b((v16i8*)t, 16); + src6 = (v16u8)__msa_ld_b((v16i8*)t, 32); + src7 = (v16u8)__msa_ld_b((v16i8*)t, 48); + vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src0, (v16i8)src0); + vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); + vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src1, (v16i8)src1); + vec3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src2, (v16i8)src2); + vec4 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2); + vec5 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src3); + vec6 = (v16u8)__msa_vshf_b(mask0, (v16i8)src4, (v16i8)src4); + vec7 = (v16u8)__msa_vshf_b(mask1, (v16i8)src5, (v16i8)src4); + vec8 = (v16u8)__msa_vshf_b(mask2, (v16i8)src5, (v16i8)src5); + vec9 = (v16u8)__msa_vshf_b(mask0, (v16i8)src6, (v16i8)src6); + vec10 = (v16u8)__msa_vshf_b(mask1, (v16i8)src7, (v16i8)src6); + vec11 = (v16u8)__msa_vshf_b(mask2, (v16i8)src7, (v16i8)src7); + reg0 = (v8i16)__msa_dotp_u_h(vec0, const0); + reg1 = (v8i16)__msa_dotp_u_h(vec1, const1); + reg2 = (v8i16)__msa_dotp_u_h(vec2, const2); + reg3 = (v8i16)__msa_dotp_u_h(vec3, const0); + reg4 = (v8i16)__msa_dotp_u_h(vec4, const1); + reg5 = (v8i16)__msa_dotp_u_h(vec5, const2); + reg6 = (v8i16)__msa_dotp_u_h(vec6, const0); + reg7 = (v8i16)__msa_dotp_u_h(vec7, const1); + reg8 = (v8i16)__msa_dotp_u_h(vec8, const2); + reg9 = (v8i16)__msa_dotp_u_h(vec9, const0); + reg10 = (v8i16)__msa_dotp_u_h(vec10, const1); + reg11 = (v8i16)__msa_dotp_u_h(vec11, const2); + reg0 = __msa_srar_h(reg0, shft0); + reg1 = __msa_srar_h(reg1, shft1); + reg2 = __msa_srar_h(reg2, shft2); + reg3 = __msa_srar_h(reg3, shft0); + reg4 = __msa_srar_h(reg4, shft1); + reg5 = __msa_srar_h(reg5, shft2); + reg6 = __msa_srar_h(reg6, shft0); + reg7 = __msa_srar_h(reg7, shft1); + reg8 = __msa_srar_h(reg8, shft2); + reg9 = __msa_srar_h(reg9, shft0); + reg10 = __msa_srar_h(reg10, shft1); + reg11 = __msa_srar_h(reg11, shft2); + reg0 = reg0 * 3 + reg6; + reg1 = reg1 * 3 + reg7; + reg2 = reg2 * 3 + reg8; + reg3 = reg3 * 3 + reg9; + reg4 = reg4 * 3 + reg10; + reg5 = reg5 * 3 + reg11; + reg0 = __msa_srari_h(reg0, 2); + reg1 = __msa_srari_h(reg1, 2); + reg2 = __msa_srari_h(reg2, 2); + reg3 = __msa_srari_h(reg3, 2); + reg4 = __msa_srari_h(reg4, 2); + reg5 = __msa_srari_h(reg5, 2); + dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); + dst1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2); + dst2 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); + __msa_st_b((v16i8)dst0, d, 0); + __msa_st_b((v16i8)dst1, d, 16); + __msa_st_b((v16i8)dst2, d, 32); + s += 64; + t += 64; + d += 48; + } +} + +void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* d, + int dst_width) { + const uint8_t* s = src_ptr; + const uint8_t* t = src_ptr + src_stride; + int x; + v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1, dst2; + v16u8 vec0, vec1, vec2, vec3, vec4, vec5; + v16u8 vec6, vec7, vec8, vec9, vec10, vec11; + v8i16 reg0, reg1, reg2, reg3, reg4, reg5; + v8i16 reg6, reg7, reg8, reg9, reg10, reg11; + v16u8 const0 = {3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1}; + v16u8 const1 = {1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1}; + v16u8 const2 = {1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3}; + v16i8 mask0 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10}; + v16i8 mask1 = {10, 11, 12, 13, 13, 14, 14, 15, + 16, 17, 17, 18, 18, 19, 20, 21}; + v16i8 mask2 = {5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15}; + v8i16 shft0 = {2, 1, 2, 2, 1, 2, 2, 1}; + v8i16 shft1 = {2, 2, 1, 2, 2, 1, 2, 2}; + v8i16 shft2 = {1, 2, 2, 1, 2, 2, 1, 2}; + + assert((dst_width % 3 == 0) && (dst_width > 0)); + + for (x = 0; x < dst_width; x += 48) { + src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); + src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); + src2 = (v16u8)__msa_ld_b((v16i8*)s, 32); + src3 = (v16u8)__msa_ld_b((v16i8*)s, 48); + src4 = (v16u8)__msa_ld_b((v16i8*)t, 0); + src5 = (v16u8)__msa_ld_b((v16i8*)t, 16); + src6 = (v16u8)__msa_ld_b((v16i8*)t, 32); + src7 = (v16u8)__msa_ld_b((v16i8*)t, 48); + vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src0, (v16i8)src0); + vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); + vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src1, (v16i8)src1); + vec3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src2, (v16i8)src2); + vec4 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2); + vec5 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src3); + vec6 = (v16u8)__msa_vshf_b(mask0, (v16i8)src4, (v16i8)src4); + vec7 = (v16u8)__msa_vshf_b(mask1, (v16i8)src5, (v16i8)src4); + vec8 = (v16u8)__msa_vshf_b(mask2, (v16i8)src5, (v16i8)src5); + vec9 = (v16u8)__msa_vshf_b(mask0, (v16i8)src6, (v16i8)src6); + vec10 = (v16u8)__msa_vshf_b(mask1, (v16i8)src7, (v16i8)src6); + vec11 = (v16u8)__msa_vshf_b(mask2, (v16i8)src7, (v16i8)src7); + reg0 = (v8i16)__msa_dotp_u_h(vec0, const0); + reg1 = (v8i16)__msa_dotp_u_h(vec1, const1); + reg2 = (v8i16)__msa_dotp_u_h(vec2, const2); + reg3 = (v8i16)__msa_dotp_u_h(vec3, const0); + reg4 = (v8i16)__msa_dotp_u_h(vec4, const1); + reg5 = (v8i16)__msa_dotp_u_h(vec5, const2); + reg6 = (v8i16)__msa_dotp_u_h(vec6, const0); + reg7 = (v8i16)__msa_dotp_u_h(vec7, const1); + reg8 = (v8i16)__msa_dotp_u_h(vec8, const2); + reg9 = (v8i16)__msa_dotp_u_h(vec9, const0); + reg10 = (v8i16)__msa_dotp_u_h(vec10, const1); + reg11 = (v8i16)__msa_dotp_u_h(vec11, const2); + reg0 = __msa_srar_h(reg0, shft0); + reg1 = __msa_srar_h(reg1, shft1); + reg2 = __msa_srar_h(reg2, shft2); + reg3 = __msa_srar_h(reg3, shft0); + reg4 = __msa_srar_h(reg4, shft1); + reg5 = __msa_srar_h(reg5, shft2); + reg6 = __msa_srar_h(reg6, shft0); + reg7 = __msa_srar_h(reg7, shft1); + reg8 = __msa_srar_h(reg8, shft2); + reg9 = __msa_srar_h(reg9, shft0); + reg10 = __msa_srar_h(reg10, shft1); + reg11 = __msa_srar_h(reg11, shft2); + reg0 += reg6; + reg1 += reg7; + reg2 += reg8; + reg3 += reg9; + reg4 += reg10; + reg5 += reg11; + reg0 = __msa_srari_h(reg0, 1); + reg1 = __msa_srari_h(reg1, 1); + reg2 = __msa_srari_h(reg2, 1); + reg3 = __msa_srari_h(reg3, 1); + reg4 = __msa_srari_h(reg4, 1); + reg5 = __msa_srari_h(reg5, 1); + dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); + dst1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2); + dst2 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); + __msa_st_b((v16i8)dst0, d, 0); + __msa_st_b((v16i8)dst1, d, 16); + __msa_st_b((v16i8)dst2, d, 32); + s += 64; + t += 64; + d += 48; + } +} + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + +#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_neon.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_neon.cc index 44b0c8080d1e..459a2995dfe7 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/scale_neon.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_neon.cc @@ -23,564 +23,541 @@ extern "C" { // Provided by Fritz Koenig // Read 32x1 throw away even pixels, and write 16x1. -void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( - "1: \n" - // load even pixels into q0, odd into q1 - MEMACCESS(0) - "vld2.8 {q0, q1}, [%0]! \n" - "subs %2, %2, #16 \n" // 16 processed per loop - MEMACCESS(1) - "vst1.8 {q1}, [%1]! \n" // store odd pixels - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst), // %1 - "+r"(dst_width) // %2 - : - : "q0", "q1" // Clobber List - ); +void ScaleRowDown2_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + // load even pixels into q0, odd into q1 + "vld2.8 {q0, q1}, [%0]! \n" + "subs %2, %2, #16 \n" // 16 processed per loop + "vst1.8 {q1}, [%1]! \n" // store odd pixels + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst), // %1 + "+r"(dst_width) // %2 + : + : "q0", "q1" // Clobber List + ); } // Read 32x1 average down and write 16x1. -void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld1.8 {q0, q1}, [%0]! \n" // load pixels and post inc - "subs %2, %2, #16 \n" // 16 processed per loop - "vpaddl.u8 q0, q0 \n" // add adjacent - "vpaddl.u8 q1, q1 \n" - "vrshrn.u16 d0, q0, #1 \n" // downshift, round and pack - "vrshrn.u16 d1, q1, #1 \n" - MEMACCESS(1) - "vst1.8 {q0}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst), // %1 - "+r"(dst_width) // %2 - : - : "q0", "q1" // Clobber List - ); +void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + "vld2.8 {q0, q1}, [%0]! \n" // load 32 pixels + "subs %2, %2, #16 \n" // 16 processed per loop + "vrhadd.u8 q0, q0, q1 \n" // rounding half add + "vst1.8 {q0}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst), // %1 + "+r"(dst_width) // %2 + : + : "q0", "q1" // Clobber List + ); } // Read 32x2 average down and write 16x1. -void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( - // change the stride to row 2 pointer - "add %1, %0 \n" - "1: \n" - MEMACCESS(0) - "vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc - MEMACCESS(1) - "vld1.8 {q2, q3}, [%1]! \n" // load row 2 and post inc - "subs %3, %3, #16 \n" // 16 processed per loop - "vpaddl.u8 q0, q0 \n" // row 1 add adjacent - "vpaddl.u8 q1, q1 \n" - "vpadal.u8 q0, q2 \n" // row 2 add adjacent + row1 - "vpadal.u8 q1, q3 \n" - "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack - "vrshrn.u16 d1, q1, #2 \n" - MEMACCESS(2) - "vst1.8 {q0}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(src_stride), // %1 - "+r"(dst), // %2 - "+r"(dst_width) // %3 - : - : "q0", "q1", "q2", "q3" // Clobber List - ); +void ScaleRowDown2Box_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + asm volatile( + // change the stride to row 2 pointer + "add %1, %0 \n" + "1: \n" + "vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc + "vld1.8 {q2, q3}, [%1]! \n" // load row 2 and post inc + "subs %3, %3, #16 \n" // 16 processed per loop + "vpaddl.u8 q0, q0 \n" // row 1 add adjacent + "vpaddl.u8 q1, q1 \n" + "vpadal.u8 q0, q2 \n" // row 2 add adjacent + + // row1 + "vpadal.u8 q1, q3 \n" + "vrshrn.u16 d0, q0, #2 \n" // downshift, round and + // pack + "vrshrn.u16 d1, q1, #2 \n" + "vst1.8 {q0}, [%2]! \n" + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(src_stride), // %1 + "+r"(dst), // %2 + "+r"(dst_width) // %3 + : + : "q0", "q1", "q2", "q3" // Clobber List + ); } -void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 - "subs %2, %2, #8 \n" // 8 processed per loop - MEMACCESS(1) - "vst1.8 {d2}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : - : "q0", "q1", "memory", "cc" - ); +void ScaleRowDown4_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 + "subs %2, %2, #8 \n" // 8 processed per loop + "vst1.8 {d2}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : + : "q0", "q1", "memory", "cc"); } -void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - const uint8* src_ptr1 = src_ptr + src_stride; - const uint8* src_ptr2 = src_ptr + src_stride * 2; - const uint8* src_ptr3 = src_ptr + src_stride * 3; -asm volatile ( - "1: \n" - MEMACCESS(0) - "vld1.8 {q0}, [%0]! \n" // load up 16x4 - MEMACCESS(3) - "vld1.8 {q1}, [%3]! \n" - MEMACCESS(4) - "vld1.8 {q2}, [%4]! \n" - MEMACCESS(5) - "vld1.8 {q3}, [%5]! \n" - "subs %2, %2, #4 \n" - "vpaddl.u8 q0, q0 \n" - "vpadal.u8 q0, q1 \n" - "vpadal.u8 q0, q2 \n" - "vpadal.u8 q0, q3 \n" - "vpaddl.u16 q0, q0 \n" - "vrshrn.u32 d0, q0, #4 \n" // divide by 16 w/rounding - "vmovn.u16 d0, q0 \n" - MEMACCESS(1) - "vst1.32 {d0[0]}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(src_ptr1), // %3 - "+r"(src_ptr2), // %4 - "+r"(src_ptr3) // %5 - : - : "q0", "q1", "q2", "q3", "memory", "cc" - ); +void ScaleRowDown4Box_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + const uint8_t* src_ptr1 = src_ptr + src_stride; + const uint8_t* src_ptr2 = src_ptr + src_stride * 2; + const uint8_t* src_ptr3 = src_ptr + src_stride * 3; + asm volatile( + "1: \n" + "vld1.8 {q0}, [%0]! \n" // load up 16x4 + "vld1.8 {q1}, [%3]! \n" + "vld1.8 {q2}, [%4]! \n" + "vld1.8 {q3}, [%5]! \n" + "subs %2, %2, #4 \n" + "vpaddl.u8 q0, q0 \n" + "vpadal.u8 q0, q1 \n" + "vpadal.u8 q0, q2 \n" + "vpadal.u8 q0, q3 \n" + "vpaddl.u16 q0, q0 \n" + "vrshrn.u32 d0, q0, #4 \n" // divide by 16 w/rounding + "vmovn.u16 d0, q0 \n" + "vst1.32 {d0[0]}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width), // %2 + "+r"(src_ptr1), // %3 + "+r"(src_ptr2), // %4 + "+r"(src_ptr3) // %5 + : + : "q0", "q1", "q2", "q3", "memory", "cc"); } // Down scale from 4 to 3 pixels. Use the neon multilane read/write // to load up the every 4th pixel into a 4 different registers. // Point samples 32 pixels to 24 pixels. -void ScaleRowDown34_NEON(const uint8* src_ptr, +void ScaleRowDown34_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 - "subs %2, %2, #24 \n" - "vmov d2, d3 \n" // order d0, d1, d2 - MEMACCESS(1) - "vst3.8 {d0, d1, d2}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : - : "d0", "d1", "d2", "d3", "memory", "cc" - ); + uint8_t* dst_ptr, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 + "subs %2, %2, #24 \n" + "vmov d2, d3 \n" // order d0, d1, d2 + "vst3.8 {d0, d1, d2}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : + : "d0", "d1", "d2", "d3", "memory", "cc"); } -void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, +void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "vmov.u8 d24, #3 \n" - "add %3, %0 \n" - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 - MEMACCESS(3) - "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1 - "subs %2, %2, #24 \n" + uint8_t* dst_ptr, + int dst_width) { + asm volatile( + "vmov.u8 d24, #3 \n" + "add %3, %0 \n" + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 + "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1 + "subs %2, %2, #24 \n" - // filter src line 0 with src line 1 - // expand chars to shorts to allow for room - // when adding lines together - "vmovl.u8 q8, d4 \n" - "vmovl.u8 q9, d5 \n" - "vmovl.u8 q10, d6 \n" - "vmovl.u8 q11, d7 \n" + // filter src line 0 with src line 1 + // expand chars to shorts to allow for room + // when adding lines together + "vmovl.u8 q8, d4 \n" + "vmovl.u8 q9, d5 \n" + "vmovl.u8 q10, d6 \n" + "vmovl.u8 q11, d7 \n" - // 3 * line_0 + line_1 - "vmlal.u8 q8, d0, d24 \n" - "vmlal.u8 q9, d1, d24 \n" - "vmlal.u8 q10, d2, d24 \n" - "vmlal.u8 q11, d3, d24 \n" + // 3 * line_0 + line_1 + "vmlal.u8 q8, d0, d24 \n" + "vmlal.u8 q9, d1, d24 \n" + "vmlal.u8 q10, d2, d24 \n" + "vmlal.u8 q11, d3, d24 \n" - // (3 * line_0 + line_1) >> 2 - "vqrshrn.u16 d0, q8, #2 \n" - "vqrshrn.u16 d1, q9, #2 \n" - "vqrshrn.u16 d2, q10, #2 \n" - "vqrshrn.u16 d3, q11, #2 \n" + // (3 * line_0 + line_1) >> 2 + "vqrshrn.u16 d0, q8, #2 \n" + "vqrshrn.u16 d1, q9, #2 \n" + "vqrshrn.u16 d2, q10, #2 \n" + "vqrshrn.u16 d3, q11, #2 \n" - // a0 = (src[0] * 3 + s[1] * 1) >> 2 - "vmovl.u8 q8, d1 \n" - "vmlal.u8 q8, d0, d24 \n" - "vqrshrn.u16 d0, q8, #2 \n" + // a0 = (src[0] * 3 + s[1] * 1) >> 2 + "vmovl.u8 q8, d1 \n" + "vmlal.u8 q8, d0, d24 \n" + "vqrshrn.u16 d0, q8, #2 \n" - // a1 = (src[1] * 1 + s[2] * 1) >> 1 - "vrhadd.u8 d1, d1, d2 \n" + // a1 = (src[1] * 1 + s[2] * 1) >> 1 + "vrhadd.u8 d1, d1, d2 \n" - // a2 = (src[2] * 1 + s[3] * 3) >> 2 - "vmovl.u8 q8, d2 \n" - "vmlal.u8 q8, d3, d24 \n" - "vqrshrn.u16 d2, q8, #2 \n" + // a2 = (src[2] * 1 + s[3] * 3) >> 2 + "vmovl.u8 q8, d2 \n" + "vmlal.u8 q8, d3, d24 \n" + "vqrshrn.u16 d2, q8, #2 \n" - MEMACCESS(1) - "vst3.8 {d0, d1, d2}, [%1]! \n" + "vst3.8 {d0, d1, d2}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(src_stride) // %3 - : - : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "d24", "memory", "cc" - ); + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width), // %2 + "+r"(src_stride) // %3 + : + : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "d24", "memory", + "cc"); } -void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, +void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "vmov.u8 d24, #3 \n" - "add %3, %0 \n" - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 - MEMACCESS(3) - "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1 - "subs %2, %2, #24 \n" - // average src line 0 with src line 1 - "vrhadd.u8 q0, q0, q2 \n" - "vrhadd.u8 q1, q1, q3 \n" + uint8_t* dst_ptr, + int dst_width) { + asm volatile( + "vmov.u8 d24, #3 \n" + "add %3, %0 \n" + "1: \n" + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 + "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1 + "subs %2, %2, #24 \n" + // average src line 0 with src line 1 + "vrhadd.u8 q0, q0, q2 \n" + "vrhadd.u8 q1, q1, q3 \n" - // a0 = (src[0] * 3 + s[1] * 1) >> 2 - "vmovl.u8 q3, d1 \n" - "vmlal.u8 q3, d0, d24 \n" - "vqrshrn.u16 d0, q3, #2 \n" + // a0 = (src[0] * 3 + s[1] * 1) >> 2 + "vmovl.u8 q3, d1 \n" + "vmlal.u8 q3, d0, d24 \n" + "vqrshrn.u16 d0, q3, #2 \n" - // a1 = (src[1] * 1 + s[2] * 1) >> 1 - "vrhadd.u8 d1, d1, d2 \n" + // a1 = (src[1] * 1 + s[2] * 1) >> 1 + "vrhadd.u8 d1, d1, d2 \n" - // a2 = (src[2] * 1 + s[3] * 3) >> 2 - "vmovl.u8 q3, d2 \n" - "vmlal.u8 q3, d3, d24 \n" - "vqrshrn.u16 d2, q3, #2 \n" + // a2 = (src[2] * 1 + s[3] * 3) >> 2 + "vmovl.u8 q3, d2 \n" + "vmlal.u8 q3, d3, d24 \n" + "vqrshrn.u16 d2, q3, #2 \n" - MEMACCESS(1) - "vst3.8 {d0, d1, d2}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(src_stride) // %3 - : - : "r4", "q0", "q1", "q2", "q3", "d24", "memory", "cc" - ); + "vst3.8 {d0, d1, d2}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width), // %2 + "+r"(src_stride) // %3 + : + : "r4", "q0", "q1", "q2", "q3", "d24", "memory", "cc"); } #define HAS_SCALEROWDOWN38_NEON -static uvec8 kShuf38 = - { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 }; -static uvec8 kShuf38_2 = - { 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 }; -static vec16 kMult38_Div6 = - { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, - 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 }; -static vec16 kMult38_Div9 = - { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, - 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 }; +static const uvec8 kShuf38 = {0, 3, 6, 8, 11, 14, 16, 19, + 22, 24, 27, 30, 0, 0, 0, 0}; +static const uvec8 kShuf38_2 = {0, 8, 16, 2, 10, 17, 4, 12, + 18, 6, 14, 19, 0, 0, 0, 0}; +static const vec16 kMult38_Div6 = {65536 / 12, 65536 / 12, 65536 / 12, + 65536 / 12, 65536 / 12, 65536 / 12, + 65536 / 12, 65536 / 12}; +static const vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18, + 65536 / 18, 65536 / 18, 65536 / 18, + 65536 / 18, 65536 / 18}; // 32 -> 12 -void ScaleRowDown38_NEON(const uint8* src_ptr, +void ScaleRowDown38_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - MEMACCESS(3) - "vld1.8 {q3}, [%3] \n" - "1: \n" - MEMACCESS(0) - "vld1.8 {d0, d1, d2, d3}, [%0]! \n" - "subs %2, %2, #12 \n" - "vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n" - "vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n" - MEMACCESS(1) - "vst1.8 {d4}, [%1]! \n" - MEMACCESS(1) - "vst1.32 {d5[0]}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"(&kShuf38) // %3 - : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc" - ); + uint8_t* dst_ptr, + int dst_width) { + (void)src_stride; + asm volatile( + "vld1.8 {q3}, [%3] \n" + "1: \n" + "vld1.8 {d0, d1, d2, d3}, [%0]! \n" + "subs %2, %2, #12 \n" + "vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n" + "vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n" + "vst1.8 {d4}, [%1]! \n" + "vst1.32 {d5[0]}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"(&kShuf38) // %3 + : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc"); } // 32x3 -> 12x1 -void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, +void OMITFP ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - const uint8* src_ptr1 = src_ptr + src_stride * 2; + uint8_t* dst_ptr, + int dst_width) { + const uint8_t* src_ptr1 = src_ptr + src_stride * 2; - asm volatile ( - MEMACCESS(5) - "vld1.16 {q13}, [%5] \n" - MEMACCESS(6) - "vld1.8 {q14}, [%6] \n" - MEMACCESS(7) - "vld1.8 {q15}, [%7] \n" - "add %3, %0 \n" - "1: \n" + asm volatile( + "vld1.16 {q13}, [%5] \n" + "vld1.8 {q14}, [%6] \n" + "vld1.8 {q15}, [%7] \n" + "add %3, %0 \n" + "1: \n" - // d0 = 00 40 01 41 02 42 03 43 - // d1 = 10 50 11 51 12 52 13 53 - // d2 = 20 60 21 61 22 62 23 63 - // d3 = 30 70 31 71 32 72 33 73 - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" - MEMACCESS(3) - "vld4.8 {d4, d5, d6, d7}, [%3]! \n" - MEMACCESS(4) - "vld4.8 {d16, d17, d18, d19}, [%4]! \n" - "subs %2, %2, #12 \n" + // d0 = 00 40 01 41 02 42 03 43 + // d1 = 10 50 11 51 12 52 13 53 + // d2 = 20 60 21 61 22 62 23 63 + // d3 = 30 70 31 71 32 72 33 73 + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" + "vld4.8 {d4, d5, d6, d7}, [%3]! \n" + "vld4.8 {d16, d17, d18, d19}, [%4]! \n" + "subs %2, %2, #12 \n" - // Shuffle the input data around to get align the data - // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 - // d0 = 00 10 01 11 02 12 03 13 - // d1 = 40 50 41 51 42 52 43 53 - "vtrn.u8 d0, d1 \n" - "vtrn.u8 d4, d5 \n" - "vtrn.u8 d16, d17 \n" + // Shuffle the input data around to get align the data + // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 + // d0 = 00 10 01 11 02 12 03 13 + // d1 = 40 50 41 51 42 52 43 53 + "vtrn.u8 d0, d1 \n" + "vtrn.u8 d4, d5 \n" + "vtrn.u8 d16, d17 \n" - // d2 = 20 30 21 31 22 32 23 33 - // d3 = 60 70 61 71 62 72 63 73 - "vtrn.u8 d2, d3 \n" - "vtrn.u8 d6, d7 \n" - "vtrn.u8 d18, d19 \n" + // d2 = 20 30 21 31 22 32 23 33 + // d3 = 60 70 61 71 62 72 63 73 + "vtrn.u8 d2, d3 \n" + "vtrn.u8 d6, d7 \n" + "vtrn.u8 d18, d19 \n" - // d0 = 00+10 01+11 02+12 03+13 - // d2 = 40+50 41+51 42+52 43+53 - "vpaddl.u8 q0, q0 \n" - "vpaddl.u8 q2, q2 \n" - "vpaddl.u8 q8, q8 \n" + // d0 = 00+10 01+11 02+12 03+13 + // d2 = 40+50 41+51 42+52 43+53 + "vpaddl.u8 q0, q0 \n" + "vpaddl.u8 q2, q2 \n" + "vpaddl.u8 q8, q8 \n" - // d3 = 60+70 61+71 62+72 63+73 - "vpaddl.u8 d3, d3 \n" - "vpaddl.u8 d7, d7 \n" - "vpaddl.u8 d19, d19 \n" + // d3 = 60+70 61+71 62+72 63+73 + "vpaddl.u8 d3, d3 \n" + "vpaddl.u8 d7, d7 \n" + "vpaddl.u8 d19, d19 \n" - // combine source lines - "vadd.u16 q0, q2 \n" - "vadd.u16 q0, q8 \n" - "vadd.u16 d4, d3, d7 \n" - "vadd.u16 d4, d19 \n" + // combine source lines + "vadd.u16 q0, q2 \n" + "vadd.u16 q0, q8 \n" + "vadd.u16 d4, d3, d7 \n" + "vadd.u16 d4, d19 \n" - // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0] - // + s[6 + st * 1] + s[7 + st * 1] - // + s[6 + st * 2] + s[7 + st * 2]) / 6 - "vqrdmulh.s16 q2, q2, q13 \n" - "vmovn.u16 d4, q2 \n" + // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0] + // + s[6 + st * 1] + s[7 + st * 1] + // + s[6 + st * 2] + s[7 + st * 2]) / 6 + "vqrdmulh.s16 q2, q2, q13 \n" + "vmovn.u16 d4, q2 \n" - // Shuffle 2,3 reg around so that 2 can be added to the - // 0,1 reg and 3 can be added to the 4,5 reg. This - // requires expanding from u8 to u16 as the 0,1 and 4,5 - // registers are already expanded. Then do transposes - // to get aligned. - // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 - "vmovl.u8 q1, d2 \n" - "vmovl.u8 q3, d6 \n" - "vmovl.u8 q9, d18 \n" + // Shuffle 2,3 reg around so that 2 can be added to the + // 0,1 reg and 3 can be added to the 4,5 reg. This + // requires expanding from u8 to u16 as the 0,1 and 4,5 + // registers are already expanded. Then do transposes + // to get aligned. + // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 + "vmovl.u8 q1, d2 \n" + "vmovl.u8 q3, d6 \n" + "vmovl.u8 q9, d18 \n" - // combine source lines - "vadd.u16 q1, q3 \n" - "vadd.u16 q1, q9 \n" + // combine source lines + "vadd.u16 q1, q3 \n" + "vadd.u16 q1, q9 \n" - // d4 = xx 20 xx 30 xx 22 xx 32 - // d5 = xx 21 xx 31 xx 23 xx 33 - "vtrn.u32 d2, d3 \n" + // d4 = xx 20 xx 30 xx 22 xx 32 + // d5 = xx 21 xx 31 xx 23 xx 33 + "vtrn.u32 d2, d3 \n" - // d4 = xx 20 xx 21 xx 22 xx 23 - // d5 = xx 30 xx 31 xx 32 xx 33 - "vtrn.u16 d2, d3 \n" + // d4 = xx 20 xx 21 xx 22 xx 23 + // d5 = xx 30 xx 31 xx 32 xx 33 + "vtrn.u16 d2, d3 \n" - // 0+1+2, 3+4+5 - "vadd.u16 q0, q1 \n" + // 0+1+2, 3+4+5 + "vadd.u16 q0, q1 \n" - // Need to divide, but can't downshift as the the value - // isn't a power of 2. So multiply by 65536 / n - // and take the upper 16 bits. - "vqrdmulh.s16 q0, q0, q15 \n" + // Need to divide, but can't downshift as the the value + // isn't a power of 2. So multiply by 65536 / n + // and take the upper 16 bits. + "vqrdmulh.s16 q0, q0, q15 \n" - // Align for table lookup, vtbl requires registers to - // be adjacent - "vmov.u8 d2, d4 \n" + // Align for table lookup, vtbl requires registers to + // be adjacent + "vmov.u8 d2, d4 \n" - "vtbl.u8 d3, {d0, d1, d2}, d28 \n" - "vtbl.u8 d4, {d0, d1, d2}, d29 \n" + "vtbl.u8 d3, {d0, d1, d2}, d28 \n" + "vtbl.u8 d4, {d0, d1, d2}, d29 \n" - MEMACCESS(1) - "vst1.8 {d3}, [%1]! \n" - MEMACCESS(1) - "vst1.32 {d4[0]}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(src_stride), // %3 - "+r"(src_ptr1) // %4 - : "r"(&kMult38_Div6), // %5 - "r"(&kShuf38_2), // %6 - "r"(&kMult38_Div9) // %7 - : "q0", "q1", "q2", "q3", "q8", "q9", "q13", "q14", "q15", "memory", "cc" - ); + "vst1.8 {d3}, [%1]! \n" + "vst1.32 {d4[0]}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width), // %2 + "+r"(src_stride), // %3 + "+r"(src_ptr1) // %4 + : "r"(&kMult38_Div6), // %5 + "r"(&kShuf38_2), // %6 + "r"(&kMult38_Div9) // %7 + : "q0", "q1", "q2", "q3", "q8", "q9", "q13", "q14", "q15", "memory", + "cc"); } // 32x2 -> 12x1 -void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, +void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - MEMACCESS(4) - "vld1.16 {q13}, [%4] \n" - MEMACCESS(5) - "vld1.8 {q14}, [%5] \n" - "add %3, %0 \n" - "1: \n" + uint8_t* dst_ptr, + int dst_width) { + asm volatile( + "vld1.16 {q13}, [%4] \n" + "vld1.8 {q14}, [%5] \n" + "add %3, %0 \n" + "1: \n" - // d0 = 00 40 01 41 02 42 03 43 - // d1 = 10 50 11 51 12 52 13 53 - // d2 = 20 60 21 61 22 62 23 63 - // d3 = 30 70 31 71 32 72 33 73 - MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" - MEMACCESS(3) - "vld4.8 {d4, d5, d6, d7}, [%3]! \n" - "subs %2, %2, #12 \n" + // d0 = 00 40 01 41 02 42 03 43 + // d1 = 10 50 11 51 12 52 13 53 + // d2 = 20 60 21 61 22 62 23 63 + // d3 = 30 70 31 71 32 72 33 73 + "vld4.8 {d0, d1, d2, d3}, [%0]! \n" + "vld4.8 {d4, d5, d6, d7}, [%3]! \n" + "subs %2, %2, #12 \n" - // Shuffle the input data around to get align the data - // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 - // d0 = 00 10 01 11 02 12 03 13 - // d1 = 40 50 41 51 42 52 43 53 - "vtrn.u8 d0, d1 \n" - "vtrn.u8 d4, d5 \n" + // Shuffle the input data around to get align the data + // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 + // d0 = 00 10 01 11 02 12 03 13 + // d1 = 40 50 41 51 42 52 43 53 + "vtrn.u8 d0, d1 \n" + "vtrn.u8 d4, d5 \n" - // d2 = 20 30 21 31 22 32 23 33 - // d3 = 60 70 61 71 62 72 63 73 - "vtrn.u8 d2, d3 \n" - "vtrn.u8 d6, d7 \n" + // d2 = 20 30 21 31 22 32 23 33 + // d3 = 60 70 61 71 62 72 63 73 + "vtrn.u8 d2, d3 \n" + "vtrn.u8 d6, d7 \n" - // d0 = 00+10 01+11 02+12 03+13 - // d2 = 40+50 41+51 42+52 43+53 - "vpaddl.u8 q0, q0 \n" - "vpaddl.u8 q2, q2 \n" + // d0 = 00+10 01+11 02+12 03+13 + // d2 = 40+50 41+51 42+52 43+53 + "vpaddl.u8 q0, q0 \n" + "vpaddl.u8 q2, q2 \n" - // d3 = 60+70 61+71 62+72 63+73 - "vpaddl.u8 d3, d3 \n" - "vpaddl.u8 d7, d7 \n" + // d3 = 60+70 61+71 62+72 63+73 + "vpaddl.u8 d3, d3 \n" + "vpaddl.u8 d7, d7 \n" - // combine source lines - "vadd.u16 q0, q2 \n" - "vadd.u16 d4, d3, d7 \n" + // combine source lines + "vadd.u16 q0, q2 \n" + "vadd.u16 d4, d3, d7 \n" - // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4 - "vqrshrn.u16 d4, q2, #2 \n" + // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4 + "vqrshrn.u16 d4, q2, #2 \n" - // Shuffle 2,3 reg around so that 2 can be added to the - // 0,1 reg and 3 can be added to the 4,5 reg. This - // requires expanding from u8 to u16 as the 0,1 and 4,5 - // registers are already expanded. Then do transposes - // to get aligned. - // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 - "vmovl.u8 q1, d2 \n" - "vmovl.u8 q3, d6 \n" + // Shuffle 2,3 reg around so that 2 can be added to the + // 0,1 reg and 3 can be added to the 4,5 reg. This + // requires expanding from u8 to u16 as the 0,1 and 4,5 + // registers are already expanded. Then do transposes + // to get aligned. + // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 + "vmovl.u8 q1, d2 \n" + "vmovl.u8 q3, d6 \n" - // combine source lines - "vadd.u16 q1, q3 \n" + // combine source lines + "vadd.u16 q1, q3 \n" - // d4 = xx 20 xx 30 xx 22 xx 32 - // d5 = xx 21 xx 31 xx 23 xx 33 - "vtrn.u32 d2, d3 \n" + // d4 = xx 20 xx 30 xx 22 xx 32 + // d5 = xx 21 xx 31 xx 23 xx 33 + "vtrn.u32 d2, d3 \n" - // d4 = xx 20 xx 21 xx 22 xx 23 - // d5 = xx 30 xx 31 xx 32 xx 33 - "vtrn.u16 d2, d3 \n" + // d4 = xx 20 xx 21 xx 22 xx 23 + // d5 = xx 30 xx 31 xx 32 xx 33 + "vtrn.u16 d2, d3 \n" - // 0+1+2, 3+4+5 - "vadd.u16 q0, q1 \n" + // 0+1+2, 3+4+5 + "vadd.u16 q0, q1 \n" - // Need to divide, but can't downshift as the the value - // isn't a power of 2. So multiply by 65536 / n - // and take the upper 16 bits. - "vqrdmulh.s16 q0, q0, q13 \n" + // Need to divide, but can't downshift as the the value + // isn't a power of 2. So multiply by 65536 / n + // and take the upper 16 bits. + "vqrdmulh.s16 q0, q0, q13 \n" - // Align for table lookup, vtbl requires registers to - // be adjacent - "vmov.u8 d2, d4 \n" + // Align for table lookup, vtbl requires registers to + // be adjacent + "vmov.u8 d2, d4 \n" - "vtbl.u8 d3, {d0, d1, d2}, d28 \n" - "vtbl.u8 d4, {d0, d1, d2}, d29 \n" + "vtbl.u8 d3, {d0, d1, d2}, d28 \n" + "vtbl.u8 d4, {d0, d1, d2}, d29 \n" - MEMACCESS(1) - "vst1.8 {d3}, [%1]! \n" - MEMACCESS(1) - "vst1.32 {d4[0]}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(src_stride) // %3 - : "r"(&kMult38_Div6), // %4 - "r"(&kShuf38_2) // %5 - : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc" - ); + "vst1.8 {d3}, [%1]! \n" + "vst1.32 {d4[0]}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width), // %2 + "+r"(src_stride) // %3 + : "r"(&kMult38_Div6), // %4 + "r"(&kShuf38_2) // %5 + : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc"); } -void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int src_width, int src_height) { - const uint8* src_tmp; - asm volatile ( - "1: \n" - "mov %0, %1 \n" - "mov r12, %5 \n" - "veor q2, q2, q2 \n" - "veor q3, q3, q3 \n" - "2: \n" - // load 16 pixels into q0 - MEMACCESS(0) - "vld1.8 {q0}, [%0], %3 \n" - "vaddw.u8 q3, q3, d1 \n" - "vaddw.u8 q2, q2, d0 \n" - "subs r12, r12, #1 \n" - "bgt 2b \n" - MEMACCESS(2) - "vst1.16 {q2, q3}, [%2]! \n" // store pixels - "add %1, %1, #16 \n" - "subs %4, %4, #16 \n" // 16 processed per loop - "bgt 1b \n" - : "=&r"(src_tmp), // %0 - "+r"(src_ptr), // %1 - "+r"(dst_ptr), // %2 - "+r"(src_stride), // %3 - "+r"(src_width), // %4 - "+r"(src_height) // %5 - : - : "memory", "cc", "r12", "q0", "q1", "q2", "q3" // Clobber List - ); +void ScaleAddRows_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + int src_width, + int src_height) { + const uint8_t* src_tmp; + asm volatile( + "1: \n" + "mov %0, %1 \n" + "mov r12, %5 \n" + "veor q2, q2, q2 \n" + "veor q3, q3, q3 \n" + "2: \n" + // load 16 pixels into q0 + "vld1.8 {q0}, [%0], %3 \n" + "vaddw.u8 q3, q3, d1 \n" + "vaddw.u8 q2, q2, d0 \n" + "subs r12, r12, #1 \n" + "bgt 2b \n" + "vst1.16 {q2, q3}, [%2]! \n" // store pixels + "add %1, %1, #16 \n" + "subs %4, %4, #16 \n" // 16 processed per loop + "bgt 1b \n" + : "=&r"(src_tmp), // %0 + "+r"(src_ptr), // %1 + "+r"(dst_ptr), // %2 + "+r"(src_stride), // %3 + "+r"(src_width), // %4 + "+r"(src_height) // %5 + : + : "memory", "cc", "r12", "q0", "q1", "q2", "q3" // Clobber List + ); } // TODO(Yang Zhang): Investigate less load instructions for // the x/dx stepping -#define LOAD2_DATA8_LANE(n) \ - "lsr %5, %3, #16 \n" \ - "add %6, %1, %5 \n" \ - "add %3, %3, %4 \n" \ - MEMACCESS(6) \ - "vld2.8 {d6["#n"], d7["#n"]}, [%6] \n" +#define LOAD2_DATA8_LANE(n) \ + "lsr %5, %3, #16 \n" \ + "add %6, %1, %5 \n" \ + "add %3, %3, %4 \n" \ + "vld2.8 {d6[" #n "], d7[" #n "]}, [%6] \n" -// The NEON version mimics this formula: -// #define BLENDER(a, b, f) (uint8)((int)(a) + -// ((int)(f) * ((int)(b) - (int)(a)) >> 16)) +// The NEON version mimics this formula (from row_common.cc): +// #define BLENDER(a, b, f) (uint8_t)((int)(a) + +// ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) -void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { +void ScaleFilterCols_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx) { int dx_offset[4] = {0, 1, 2, 3}; int* tmp = dx_offset; - const uint8* src_tmp = src_ptr; + const uint8_t* src_tmp = src_ptr; asm volatile ( "vdup.32 q0, %3 \n" // x "vdup.32 q1, %4 \n" // dx @@ -617,7 +594,6 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, "vadd.s16 q8, q8, q9 \n" "vmovn.s16 d6, q8 \n" - MEMACCESS(0) "vst1.8 {d6}, [%0]! \n" // store pixels "vadd.s32 q1, q1, q0 \n" "vadd.s32 q2, q2, q0 \n" @@ -639,325 +615,299 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, #undef LOAD2_DATA8_LANE // 16x2 -> 16x1 -void ScaleFilterRows_NEON(uint8* dst_ptr, - const uint8* src_ptr, ptrdiff_t src_stride, - int dst_width, int source_y_fraction) { - asm volatile ( - "cmp %4, #0 \n" - "beq 100f \n" - "add %2, %1 \n" - "cmp %4, #64 \n" - "beq 75f \n" - "cmp %4, #128 \n" - "beq 50f \n" - "cmp %4, #192 \n" - "beq 25f \n" +void ScaleFilterRows_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, + int source_y_fraction) { + asm volatile( + "cmp %4, #0 \n" + "beq 100f \n" + "add %2, %1 \n" + "cmp %4, #64 \n" + "beq 75f \n" + "cmp %4, #128 \n" + "beq 50f \n" + "cmp %4, #192 \n" + "beq 25f \n" - "vdup.8 d5, %4 \n" - "rsb %4, #256 \n" - "vdup.8 d4, %4 \n" - // General purpose row blend. - "1: \n" - MEMACCESS(1) - "vld1.8 {q0}, [%1]! \n" - MEMACCESS(2) - "vld1.8 {q1}, [%2]! \n" - "subs %3, %3, #16 \n" - "vmull.u8 q13, d0, d4 \n" - "vmull.u8 q14, d1, d4 \n" - "vmlal.u8 q13, d2, d5 \n" - "vmlal.u8 q14, d3, d5 \n" - "vrshrn.u16 d0, q13, #8 \n" - "vrshrn.u16 d1, q14, #8 \n" - MEMACCESS(0) - "vst1.8 {q0}, [%0]! \n" - "bgt 1b \n" - "b 99f \n" + "vdup.8 d5, %4 \n" + "rsb %4, #256 \n" + "vdup.8 d4, %4 \n" + // General purpose row blend. + "1: \n" + "vld1.8 {q0}, [%1]! \n" + "vld1.8 {q1}, [%2]! \n" + "subs %3, %3, #16 \n" + "vmull.u8 q13, d0, d4 \n" + "vmull.u8 q14, d1, d4 \n" + "vmlal.u8 q13, d2, d5 \n" + "vmlal.u8 q14, d3, d5 \n" + "vrshrn.u16 d0, q13, #8 \n" + "vrshrn.u16 d1, q14, #8 \n" + "vst1.8 {q0}, [%0]! \n" + "bgt 1b \n" + "b 99f \n" - // Blend 25 / 75. - "25: \n" - MEMACCESS(1) - "vld1.8 {q0}, [%1]! \n" - MEMACCESS(2) - "vld1.8 {q1}, [%2]! \n" - "subs %3, %3, #16 \n" - "vrhadd.u8 q0, q1 \n" - "vrhadd.u8 q0, q1 \n" - MEMACCESS(0) - "vst1.8 {q0}, [%0]! \n" - "bgt 25b \n" - "b 99f \n" + // Blend 25 / 75. + "25: \n" + "vld1.8 {q0}, [%1]! \n" + "vld1.8 {q1}, [%2]! \n" + "subs %3, %3, #16 \n" + "vrhadd.u8 q0, q1 \n" + "vrhadd.u8 q0, q1 \n" + "vst1.8 {q0}, [%0]! \n" + "bgt 25b \n" + "b 99f \n" - // Blend 50 / 50. - "50: \n" - MEMACCESS(1) - "vld1.8 {q0}, [%1]! \n" - MEMACCESS(2) - "vld1.8 {q1}, [%2]! \n" - "subs %3, %3, #16 \n" - "vrhadd.u8 q0, q1 \n" - MEMACCESS(0) - "vst1.8 {q0}, [%0]! \n" - "bgt 50b \n" - "b 99f \n" + // Blend 50 / 50. + "50: \n" + "vld1.8 {q0}, [%1]! \n" + "vld1.8 {q1}, [%2]! \n" + "subs %3, %3, #16 \n" + "vrhadd.u8 q0, q1 \n" + "vst1.8 {q0}, [%0]! \n" + "bgt 50b \n" + "b 99f \n" - // Blend 75 / 25. - "75: \n" - MEMACCESS(1) - "vld1.8 {q1}, [%1]! \n" - MEMACCESS(2) - "vld1.8 {q0}, [%2]! \n" - "subs %3, %3, #16 \n" - "vrhadd.u8 q0, q1 \n" - "vrhadd.u8 q0, q1 \n" - MEMACCESS(0) - "vst1.8 {q0}, [%0]! \n" - "bgt 75b \n" - "b 99f \n" + // Blend 75 / 25. + "75: \n" + "vld1.8 {q1}, [%1]! \n" + "vld1.8 {q0}, [%2]! \n" + "subs %3, %3, #16 \n" + "vrhadd.u8 q0, q1 \n" + "vrhadd.u8 q0, q1 \n" + "vst1.8 {q0}, [%0]! \n" + "bgt 75b \n" + "b 99f \n" - // Blend 100 / 0 - Copy row unchanged. - "100: \n" - MEMACCESS(1) - "vld1.8 {q0}, [%1]! \n" - "subs %3, %3, #16 \n" - MEMACCESS(0) - "vst1.8 {q0}, [%0]! \n" - "bgt 100b \n" + // Blend 100 / 0 - Copy row unchanged. + "100: \n" + "vld1.8 {q0}, [%1]! \n" + "subs %3, %3, #16 \n" + "vst1.8 {q0}, [%0]! \n" + "bgt 100b \n" - "99: \n" - MEMACCESS(0) - "vst1.8 {d1[7]}, [%0] \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(src_stride), // %2 - "+r"(dst_width), // %3 - "+r"(source_y_fraction) // %4 - : - : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc" - ); + "99: \n" + "vst1.8 {d1[7]}, [%0] \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(src_stride), // %2 + "+r"(dst_width), // %3 + "+r"(source_y_fraction) // %4 + : + : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc"); } -void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( - "1: \n" - // load even pixels into q0, odd into q1 - MEMACCESS(0) - "vld2.32 {q0, q1}, [%0]! \n" - MEMACCESS(0) - "vld2.32 {q2, q3}, [%0]! \n" - "subs %2, %2, #8 \n" // 8 processed per loop - MEMACCESS(1) - "vst1.8 {q1}, [%1]! \n" // store odd pixels - MEMACCESS(1) - "vst1.8 {q3}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List - ); +void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + "vld4.32 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. + "vld4.32 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB + "subs %2, %2, #8 \n" // 8 processed per loop + "vmov q2, q1 \n" // load next 8 ARGB + "vst2.32 {q2, q3}, [%1]! \n" // store odd pixels + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List + ); } -void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - MEMACCESS(0) - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. - "subs %2, %2, #8 \n" // 8 processed per loop - "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts. - "vrshrn.u16 d0, q0, #1 \n" // downshift, round and pack - "vrshrn.u16 d1, q1, #1 \n" - "vrshrn.u16 d2, q2, #1 \n" - "vrshrn.u16 d3, q3, #1 \n" - MEMACCESS(1) - "vst4.8 {d0, d1, d2, d3}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List - ); +// 46: f964 018d vld4.32 {d16,d18,d20,d22}, [r4]! +// 4a: 3e04 subs r6, #4 +// 4c: f964 118d vld4.32 {d17,d19,d21,d23}, [r4]! +// 50: ef64 21f4 vorr q9, q10, q10 +// 54: f942 038d vst2.32 {d16-d19}, [r2]! +// 58: d1f5 bne.n 46 + +void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + "vld4.32 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. + "vld4.32 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB + "subs %2, %2, #8 \n" // 8 processed per loop + "vrhadd.u8 q0, q0, q1 \n" // rounding half add + "vrhadd.u8 q1, q2, q3 \n" // rounding half add + "vst2.32 {q0, q1}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List + ); } -void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( - // change the stride to row 2 pointer - "add %1, %1, %0 \n" - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - MEMACCESS(0) - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts. - MEMACCESS(1) - "vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB pixels. - MEMACCESS(1) - "vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB pixels. - "vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts. - "vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts. - "vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts. - "vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts. - "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack - "vrshrn.u16 d1, q1, #2 \n" - "vrshrn.u16 d2, q2, #2 \n" - "vrshrn.u16 d3, q3, #2 \n" - MEMACCESS(2) - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(src_stride), // %1 - "+r"(dst), // %2 - "+r"(dst_width) // %3 - : - : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11" - ); +void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + asm volatile( + // change the stride to row 2 pointer + "add %1, %1, %0 \n" + "1: \n" + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB + "subs %3, %3, #8 \n" // 8 processed per loop. + "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. + "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. + "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. + "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts. + "vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB + "vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB + "vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts. + "vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts. + "vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts. + "vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts. + "vrshrn.u16 d0, q0, #2 \n" // round and pack to bytes + "vrshrn.u16 d1, q1, #2 \n" + "vrshrn.u16 d2, q2, #2 \n" + "vrshrn.u16 d3, q3, #2 \n" + "vst4.8 {d0, d1, d2, d3}, [%2]! \n" + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(src_stride), // %1 + "+r"(dst), // %2 + "+r"(dst_width) // %3 + : + : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); } // Reads 4 pixels at a time. // Alignment requirement: src_argb 4 byte aligned. -void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, uint8* dst_argb, int dst_width) { - asm volatile ( - "mov r12, %3, lsl #2 \n" - "1: \n" - MEMACCESS(0) - "vld1.32 {d0[0]}, [%0], r12 \n" - MEMACCESS(0) - "vld1.32 {d0[1]}, [%0], r12 \n" - MEMACCESS(0) - "vld1.32 {d1[0]}, [%0], r12 \n" - MEMACCESS(0) - "vld1.32 {d1[1]}, [%0], r12 \n" - "subs %2, %2, #4 \n" // 4 pixels per loop. - MEMACCESS(1) - "vst1.8 {q0}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - : "r"(src_stepx) // %3 - : "memory", "cc", "r12", "q0" - ); +void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width) { + (void)src_stride; + asm volatile( + "mov r12, %3, lsl #2 \n" + "1: \n" + "vld1.32 {d0[0]}, [%0], r12 \n" + "vld1.32 {d0[1]}, [%0], r12 \n" + "vld1.32 {d1[0]}, [%0], r12 \n" + "vld1.32 {d1[1]}, [%0], r12 \n" + "subs %2, %2, #4 \n" // 4 pixels per loop. + "vst1.8 {q0}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(dst_width) // %2 + : "r"(src_stepx) // %3 + : "memory", "cc", "r12", "q0"); } // Reads 4 pixels at a time. // Alignment requirement: src_argb 4 byte aligned. -void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, +void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb, + ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width) { - asm volatile ( - "mov r12, %4, lsl #2 \n" - "add %1, %1, %0 \n" - "1: \n" - MEMACCESS(0) - "vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1 - MEMACCESS(1) - "vld1.8 {d1}, [%1], r12 \n" - MEMACCESS(0) - "vld1.8 {d2}, [%0], r12 \n" - MEMACCESS(1) - "vld1.8 {d3}, [%1], r12 \n" - MEMACCESS(0) - "vld1.8 {d4}, [%0], r12 \n" - MEMACCESS(1) - "vld1.8 {d5}, [%1], r12 \n" - MEMACCESS(0) - "vld1.8 {d6}, [%0], r12 \n" - MEMACCESS(1) - "vld1.8 {d7}, [%1], r12 \n" - "vaddl.u8 q0, d0, d1 \n" - "vaddl.u8 q1, d2, d3 \n" - "vaddl.u8 q2, d4, d5 \n" - "vaddl.u8 q3, d6, d7 \n" - "vswp.8 d1, d2 \n" // ab_cd -> ac_bd - "vswp.8 d5, d6 \n" // ef_gh -> eg_fh - "vadd.u16 q0, q0, q1 \n" // (a+b)_(c+d) - "vadd.u16 q2, q2, q3 \n" // (e+f)_(g+h) - "vrshrn.u16 d0, q0, #2 \n" // first 2 pixels. - "vrshrn.u16 d1, q2, #2 \n" // next 2 pixels. - "subs %3, %3, #4 \n" // 4 pixels per loop. - MEMACCESS(2) - "vst1.8 {q0}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_stride), // %1 - "+r"(dst_argb), // %2 - "+r"(dst_width) // %3 - : "r"(src_stepx) // %4 - : "memory", "cc", "r12", "q0", "q1", "q2", "q3" - ); + uint8_t* dst_argb, + int dst_width) { + asm volatile( + "mov r12, %4, lsl #2 \n" + "add %1, %1, %0 \n" + "1: \n" + "vld1.8 {d0}, [%0], r12 \n" // 4 2x2 blocks -> 2x1 + "vld1.8 {d1}, [%1], r12 \n" + "vld1.8 {d2}, [%0], r12 \n" + "vld1.8 {d3}, [%1], r12 \n" + "vld1.8 {d4}, [%0], r12 \n" + "vld1.8 {d5}, [%1], r12 \n" + "vld1.8 {d6}, [%0], r12 \n" + "vld1.8 {d7}, [%1], r12 \n" + "vaddl.u8 q0, d0, d1 \n" + "vaddl.u8 q1, d2, d3 \n" + "vaddl.u8 q2, d4, d5 \n" + "vaddl.u8 q3, d6, d7 \n" + "vswp.8 d1, d2 \n" // ab_cd -> ac_bd + "vswp.8 d5, d6 \n" // ef_gh -> eg_fh + "vadd.u16 q0, q0, q1 \n" // (a+b)_(c+d) + "vadd.u16 q2, q2, q3 \n" // (e+f)_(g+h) + "vrshrn.u16 d0, q0, #2 \n" // first 2 pixels. + "vrshrn.u16 d1, q2, #2 \n" // next 2 pixels. + "subs %3, %3, #4 \n" // 4 pixels per loop. + "vst1.8 {q0}, [%2]! \n" + "bgt 1b \n" + : "+r"(src_argb), // %0 + "+r"(src_stride), // %1 + "+r"(dst_argb), // %2 + "+r"(dst_width) // %3 + : "r"(src_stepx) // %4 + : "memory", "cc", "r12", "q0", "q1", "q2", "q3"); } // TODO(Yang Zhang): Investigate less load instructions for // the x/dx stepping -#define LOAD1_DATA32_LANE(dn, n) \ - "lsr %5, %3, #16 \n" \ - "add %6, %1, %5, lsl #2 \n" \ - "add %3, %3, %4 \n" \ - MEMACCESS(6) \ - "vld1.32 {"#dn"["#n"]}, [%6] \n" +#define LOAD1_DATA32_LANE(dn, n) \ + "lsr %5, %3, #16 \n" \ + "add %6, %1, %5, lsl #2 \n" \ + "add %3, %3, %4 \n" \ + "vld1.32 {" #dn "[" #n "]}, [%6] \n" -void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { +void ScaleARGBCols_NEON(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { int tmp; - const uint8* src_tmp = src_argb; - asm volatile ( - "1: \n" - LOAD1_DATA32_LANE(d0, 0) - LOAD1_DATA32_LANE(d0, 1) - LOAD1_DATA32_LANE(d1, 0) - LOAD1_DATA32_LANE(d1, 1) - LOAD1_DATA32_LANE(d2, 0) - LOAD1_DATA32_LANE(d2, 1) - LOAD1_DATA32_LANE(d3, 0) - LOAD1_DATA32_LANE(d3, 1) - - MEMACCESS(0) - "vst1.32 {q0, q1}, [%0]! \n" // store pixels - "subs %2, %2, #8 \n" // 8 processed per loop - "bgt 1b \n" - : "+r"(dst_argb), // %0 - "+r"(src_argb), // %1 - "+r"(dst_width), // %2 - "+r"(x), // %3 - "+r"(dx), // %4 - "=&r"(tmp), // %5 - "+r"(src_tmp) // %6 - : - : "memory", "cc", "q0", "q1" - ); + const uint8_t* src_tmp = src_argb; + asm volatile( + "1: \n" + // clang-format off + LOAD1_DATA32_LANE(d0, 0) + LOAD1_DATA32_LANE(d0, 1) + LOAD1_DATA32_LANE(d1, 0) + LOAD1_DATA32_LANE(d1, 1) + LOAD1_DATA32_LANE(d2, 0) + LOAD1_DATA32_LANE(d2, 1) + LOAD1_DATA32_LANE(d3, 0) + LOAD1_DATA32_LANE(d3, 1) + // clang-format on + "vst1.32 {q0, q1}, [%0]! \n" // store pixels + "subs %2, %2, #8 \n" // 8 processed per loop + "bgt 1b \n" + : "+r"(dst_argb), // %0 + "+r"(src_argb), // %1 + "+r"(dst_width), // %2 + "+r"(x), // %3 + "+r"(dx), // %4 + "=&r"(tmp), // %5 + "+r"(src_tmp) // %6 + : + : "memory", "cc", "q0", "q1"); } #undef LOAD1_DATA32_LANE // TODO(Yang Zhang): Investigate less load instructions for // the x/dx stepping -#define LOAD2_DATA32_LANE(dn1, dn2, n) \ - "lsr %5, %3, #16 \n" \ - "add %6, %1, %5, lsl #2 \n" \ - "add %3, %3, %4 \n" \ - MEMACCESS(6) \ - "vld2.32 {"#dn1"["#n"], "#dn2"["#n"]}, [%6] \n" +#define LOAD2_DATA32_LANE(dn1, dn2, n) \ + "lsr %5, %3, #16 \n" \ + "add %6, %1, %5, lsl #2 \n" \ + "add %3, %3, %4 \n" \ + "vld2.32 {" #dn1 "[" #n "], " #dn2 "[" #n "]}, [%6] \n" -void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { +void ScaleARGBFilterCols_NEON(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { int dx_offset[4] = {0, 1, 2, 3}; int* tmp = dx_offset; - const uint8* src_tmp = src_argb; + const uint8_t* src_tmp = src_argb; asm volatile ( "vdup.32 q0, %3 \n" // x "vdup.32 q1, %4 \n" // dx @@ -993,7 +943,6 @@ void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb, "vshrn.i16 d0, q11, #7 \n" "vshrn.i16 d1, q12, #7 \n" - MEMACCESS(0) "vst1.32 {d0, d1}, [%0]! \n" // store pixels "vadd.s32 q8, q8, q9 \n" "subs %2, %2, #4 \n" // 4 processed per loop diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_neon64.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_neon64.cc index ff277f26ff6e..494a9cfbfbe3 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/scale_neon64.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_neon64.cc @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "libyuv/scale.h" #include "libyuv/row.h" +#include "libyuv/scale.h" #include "libyuv/scale_row.h" #ifdef __cplusplus @@ -21,580 +21,556 @@ extern "C" { #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) // Read 32x1 throw away even pixels, and write 16x1. -void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( - "1: \n" - // load even pixels into v0, odd into v1 - MEMACCESS(0) - "ld2 {v0.16b,v1.16b}, [%0], #32 \n" - "subs %w2, %w2, #16 \n" // 16 processed per loop - MEMACCESS(1) - "st1 {v1.16b}, [%1], #16 \n" // store odd pixels - "b.gt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst), // %1 - "+r"(dst_width) // %2 - : - : "v0", "v1" // Clobber List - ); +void ScaleRowDown2_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + // load even pixels into v0, odd into v1 + "ld2 {v0.16b,v1.16b}, [%0], #32 \n" + "subs %w2, %w2, #16 \n" // 16 processed per loop + "st1 {v1.16b}, [%1], #16 \n" // store odd pixels + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst), // %1 + "+r"(dst_width) // %2 + : + : "v0", "v1" // Clobber List + ); } // Read 32x1 average down and write 16x1. -void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b,v1.16b}, [%0], #32 \n" // load pixels and post inc - "subs %w2, %w2, #16 \n" // 16 processed per loop - "uaddlp v0.8h, v0.16b \n" // add adjacent - "uaddlp v1.8h, v1.16b \n" - "rshrn v0.8b, v0.8h, #1 \n" // downshift, round and pack - "rshrn2 v0.16b, v1.8h, #1 \n" - MEMACCESS(1) - "st1 {v0.16b}, [%1], #16 \n" - "b.gt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst), // %1 - "+r"(dst_width) // %2 - : - : "v0", "v1" // Clobber List - ); +void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + // load even pixels into v0, odd into v1 + "ld2 {v0.16b,v1.16b}, [%0], #32 \n" + "subs %w2, %w2, #16 \n" // 16 processed per loop + "urhadd v0.16b, v0.16b, v1.16b \n" // rounding half add + "st1 {v0.16b}, [%1], #16 \n" + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst), // %1 + "+r"(dst_width) // %2 + : + : "v0", "v1" // Clobber List + ); } // Read 32x2 average down and write 16x1. -void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( - // change the stride to row 2 pointer - "add %1, %1, %0 \n" - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b,v1.16b}, [%0], #32 \n" // load row 1 and post inc - MEMACCESS(1) - "ld1 {v2.16b, v3.16b}, [%1], #32 \n" // load row 2 and post inc - "subs %w3, %w3, #16 \n" // 16 processed per loop - "uaddlp v0.8h, v0.16b \n" // row 1 add adjacent - "uaddlp v1.8h, v1.16b \n" - "uadalp v0.8h, v2.16b \n" // row 2 add adjacent + row1 - "uadalp v1.8h, v3.16b \n" - "rshrn v0.8b, v0.8h, #2 \n" // downshift, round and pack - "rshrn2 v0.16b, v1.8h, #2 \n" - MEMACCESS(2) - "st1 {v0.16b}, [%2], #16 \n" - "b.gt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(src_stride), // %1 - "+r"(dst), // %2 - "+r"(dst_width) // %3 - : - : "v0", "v1", "v2", "v3" // Clobber List - ); +void ScaleRowDown2Box_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + asm volatile( + // change the stride to row 2 pointer + "add %1, %1, %0 \n" + "1: \n" + "ld1 {v0.16b, v1.16b}, [%0], #32 \n" // load row 1 and post inc + "ld1 {v2.16b, v3.16b}, [%1], #32 \n" // load row 2 and post inc + "subs %w3, %w3, #16 \n" // 16 processed per loop + "uaddlp v0.8h, v0.16b \n" // row 1 add adjacent + "uaddlp v1.8h, v1.16b \n" + "uadalp v0.8h, v2.16b \n" // += row 2 add adjacent + "uadalp v1.8h, v3.16b \n" + "rshrn v0.8b, v0.8h, #2 \n" // round and pack + "rshrn2 v0.16b, v1.8h, #2 \n" + "st1 {v0.16b}, [%2], #16 \n" + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(src_stride), // %1 + "+r"(dst), // %2 + "+r"(dst_width) // %3 + : + : "v0", "v1", "v2", "v3" // Clobber List + ); } -void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0 - "subs %w2, %w2, #8 \n" // 8 processed per loop - MEMACCESS(1) - "st1 {v2.8b}, [%1], #8 \n" - "b.gt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : - : "v0", "v1", "v2", "v3", "memory", "cc" - ); +void ScaleRowDown4_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0 + "subs %w2, %w2, #8 \n" // 8 processed per loop + "st1 {v2.8b}, [%1], #8 \n" + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : + : "v0", "v1", "v2", "v3", "memory", "cc"); } -void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - const uint8* src_ptr1 = src_ptr + src_stride; - const uint8* src_ptr2 = src_ptr + src_stride * 2; - const uint8* src_ptr3 = src_ptr + src_stride * 3; -asm volatile ( - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b}, [%0], #16 \n" // load up 16x4 - MEMACCESS(3) - "ld1 {v1.16b}, [%2], #16 \n" - MEMACCESS(4) - "ld1 {v2.16b}, [%3], #16 \n" - MEMACCESS(5) - "ld1 {v3.16b}, [%4], #16 \n" - "subs %w5, %w5, #4 \n" - "uaddlp v0.8h, v0.16b \n" - "uadalp v0.8h, v1.16b \n" - "uadalp v0.8h, v2.16b \n" - "uadalp v0.8h, v3.16b \n" - "addp v0.8h, v0.8h, v0.8h \n" - "rshrn v0.8b, v0.8h, #4 \n" // divide by 16 w/rounding - MEMACCESS(1) - "st1 {v0.s}[0], [%1], #4 \n" - "b.gt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(src_ptr1), // %2 - "+r"(src_ptr2), // %3 - "+r"(src_ptr3), // %4 - "+r"(dst_width) // %5 - : - : "v0", "v1", "v2", "v3", "memory", "cc" - ); +void ScaleRowDown4Box_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { + const uint8_t* src_ptr1 = src_ptr + src_stride; + const uint8_t* src_ptr2 = src_ptr + src_stride * 2; + const uint8_t* src_ptr3 = src_ptr + src_stride * 3; + asm volatile( + "1: \n" + "ld1 {v0.16b}, [%0], #16 \n" // load up 16x4 + "ld1 {v1.16b}, [%2], #16 \n" + "ld1 {v2.16b}, [%3], #16 \n" + "ld1 {v3.16b}, [%4], #16 \n" + "subs %w5, %w5, #4 \n" + "uaddlp v0.8h, v0.16b \n" + "uadalp v0.8h, v1.16b \n" + "uadalp v0.8h, v2.16b \n" + "uadalp v0.8h, v3.16b \n" + "addp v0.8h, v0.8h, v0.8h \n" + "rshrn v0.8b, v0.8h, #4 \n" // divide by 16 w/rounding + "st1 {v0.s}[0], [%1], #4 \n" + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(src_ptr1), // %2 + "+r"(src_ptr2), // %3 + "+r"(src_ptr3), // %4 + "+r"(dst_width) // %5 + : + : "v0", "v1", "v2", "v3", "memory", "cc"); } // Down scale from 4 to 3 pixels. Use the neon multilane read/write // to load up the every 4th pixel into a 4 different registers. // Point samples 32 pixels to 24 pixels. -void ScaleRowDown34_NEON(const uint8* src_ptr, +void ScaleRowDown34_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0 - "subs %w2, %w2, #24 \n" - "orr v2.16b, v3.16b, v3.16b \n" // order v0, v1, v2 - MEMACCESS(1) - "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n" - "b.gt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : - : "v0", "v1", "v2", "v3", "memory", "cc" - ); + uint8_t* dst_ptr, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0 + "subs %w2, %w2, #24 \n" + "orr v2.16b, v3.16b, v3.16b \n" // order v0,v1,v2 + "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n" + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : + : "v0", "v1", "v2", "v3", "memory", "cc"); } -void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, +void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "movi v20.8b, #3 \n" - "add %3, %3, %0 \n" - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0 - MEMACCESS(3) - "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1 - "subs %w2, %w2, #24 \n" + uint8_t* dst_ptr, + int dst_width) { + asm volatile( + "movi v20.8b, #3 \n" + "add %3, %3, %0 \n" + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0 + "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1 + "subs %w2, %w2, #24 \n" - // filter src line 0 with src line 1 - // expand chars to shorts to allow for room - // when adding lines together - "ushll v16.8h, v4.8b, #0 \n" - "ushll v17.8h, v5.8b, #0 \n" - "ushll v18.8h, v6.8b, #0 \n" - "ushll v19.8h, v7.8b, #0 \n" + // filter src line 0 with src line 1 + // expand chars to shorts to allow for room + // when adding lines together + "ushll v16.8h, v4.8b, #0 \n" + "ushll v17.8h, v5.8b, #0 \n" + "ushll v18.8h, v6.8b, #0 \n" + "ushll v19.8h, v7.8b, #0 \n" - // 3 * line_0 + line_1 - "umlal v16.8h, v0.8b, v20.8b \n" - "umlal v17.8h, v1.8b, v20.8b \n" - "umlal v18.8h, v2.8b, v20.8b \n" - "umlal v19.8h, v3.8b, v20.8b \n" + // 3 * line_0 + line_1 + "umlal v16.8h, v0.8b, v20.8b \n" + "umlal v17.8h, v1.8b, v20.8b \n" + "umlal v18.8h, v2.8b, v20.8b \n" + "umlal v19.8h, v3.8b, v20.8b \n" - // (3 * line_0 + line_1) >> 2 - "uqrshrn v0.8b, v16.8h, #2 \n" - "uqrshrn v1.8b, v17.8h, #2 \n" - "uqrshrn v2.8b, v18.8h, #2 \n" - "uqrshrn v3.8b, v19.8h, #2 \n" + // (3 * line_0 + line_1) >> 2 + "uqrshrn v0.8b, v16.8h, #2 \n" + "uqrshrn v1.8b, v17.8h, #2 \n" + "uqrshrn v2.8b, v18.8h, #2 \n" + "uqrshrn v3.8b, v19.8h, #2 \n" - // a0 = (src[0] * 3 + s[1] * 1) >> 2 - "ushll v16.8h, v1.8b, #0 \n" - "umlal v16.8h, v0.8b, v20.8b \n" - "uqrshrn v0.8b, v16.8h, #2 \n" + // a0 = (src[0] * 3 + s[1] * 1) >> 2 + "ushll v16.8h, v1.8b, #0 \n" + "umlal v16.8h, v0.8b, v20.8b \n" + "uqrshrn v0.8b, v16.8h, #2 \n" - // a1 = (src[1] * 1 + s[2] * 1) >> 1 - "urhadd v1.8b, v1.8b, v2.8b \n" + // a1 = (src[1] * 1 + s[2] * 1) >> 1 + "urhadd v1.8b, v1.8b, v2.8b \n" - // a2 = (src[2] * 1 + s[3] * 3) >> 2 - "ushll v16.8h, v2.8b, #0 \n" - "umlal v16.8h, v3.8b, v20.8b \n" - "uqrshrn v2.8b, v16.8h, #2 \n" + // a2 = (src[2] * 1 + s[3] * 3) >> 2 + "ushll v16.8h, v2.8b, #0 \n" + "umlal v16.8h, v3.8b, v20.8b \n" + "uqrshrn v2.8b, v16.8h, #2 \n" - MEMACCESS(1) - "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n" + "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n" - "b.gt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(src_stride) // %3 - : - : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", - "v20", "memory", "cc" - ); + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width), // %2 + "+r"(src_stride) // %3 + : + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", + "v19", "v20", "memory", "cc"); } -void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, +void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "movi v20.8b, #3 \n" - "add %3, %3, %0 \n" - "1: \n" - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0 - MEMACCESS(3) - "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1 - "subs %w2, %w2, #24 \n" - // average src line 0 with src line 1 - "urhadd v0.8b, v0.8b, v4.8b \n" - "urhadd v1.8b, v1.8b, v5.8b \n" - "urhadd v2.8b, v2.8b, v6.8b \n" - "urhadd v3.8b, v3.8b, v7.8b \n" + uint8_t* dst_ptr, + int dst_width) { + asm volatile( + "movi v20.8b, #3 \n" + "add %3, %3, %0 \n" + "1: \n" + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0 + "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1 + "subs %w2, %w2, #24 \n" + // average src line 0 with src line 1 + "urhadd v0.8b, v0.8b, v4.8b \n" + "urhadd v1.8b, v1.8b, v5.8b \n" + "urhadd v2.8b, v2.8b, v6.8b \n" + "urhadd v3.8b, v3.8b, v7.8b \n" - // a0 = (src[0] * 3 + s[1] * 1) >> 2 - "ushll v4.8h, v1.8b, #0 \n" - "umlal v4.8h, v0.8b, v20.8b \n" - "uqrshrn v0.8b, v4.8h, #2 \n" + // a0 = (src[0] * 3 + s[1] * 1) >> 2 + "ushll v4.8h, v1.8b, #0 \n" + "umlal v4.8h, v0.8b, v20.8b \n" + "uqrshrn v0.8b, v4.8h, #2 \n" - // a1 = (src[1] * 1 + s[2] * 1) >> 1 - "urhadd v1.8b, v1.8b, v2.8b \n" + // a1 = (src[1] * 1 + s[2] * 1) >> 1 + "urhadd v1.8b, v1.8b, v2.8b \n" - // a2 = (src[2] * 1 + s[3] * 3) >> 2 - "ushll v4.8h, v2.8b, #0 \n" - "umlal v4.8h, v3.8b, v20.8b \n" - "uqrshrn v2.8b, v4.8h, #2 \n" + // a2 = (src[2] * 1 + s[3] * 3) >> 2 + "ushll v4.8h, v2.8b, #0 \n" + "umlal v4.8h, v3.8b, v20.8b \n" + "uqrshrn v2.8b, v4.8h, #2 \n" - MEMACCESS(1) - "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n" - "b.gt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(src_stride) // %3 - : - : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc" - ); + "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n" + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width), // %2 + "+r"(src_stride) // %3 + : + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc"); } -static uvec8 kShuf38 = - { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 }; -static uvec8 kShuf38_2 = - { 0, 16, 32, 2, 18, 33, 4, 20, 34, 6, 22, 35, 0, 0, 0, 0 }; -static vec16 kMult38_Div6 = - { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, - 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 }; -static vec16 kMult38_Div9 = - { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, - 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 }; +static const uvec8 kShuf38 = {0, 3, 6, 8, 11, 14, 16, 19, + 22, 24, 27, 30, 0, 0, 0, 0}; +static const uvec8 kShuf38_2 = {0, 16, 32, 2, 18, 33, 4, 20, + 34, 6, 22, 35, 0, 0, 0, 0}; +static const vec16 kMult38_Div6 = {65536 / 12, 65536 / 12, 65536 / 12, + 65536 / 12, 65536 / 12, 65536 / 12, + 65536 / 12, 65536 / 12}; +static const vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18, + 65536 / 18, 65536 / 18, 65536 / 18, + 65536 / 18, 65536 / 18}; // 32 -> 12 -void ScaleRowDown38_NEON(const uint8* src_ptr, +void ScaleRowDown38_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - MEMACCESS(3) - "ld1 {v3.16b}, [%3] \n" - "1: \n" - MEMACCESS(0) - "ld1 {v0.16b,v1.16b}, [%0], #32 \n" - "subs %w2, %w2, #12 \n" - "tbl v2.16b, {v0.16b,v1.16b}, v3.16b \n" - MEMACCESS(1) - "st1 {v2.8b}, [%1], #8 \n" - MEMACCESS(1) - "st1 {v2.s}[2], [%1], #4 \n" - "b.gt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"(&kShuf38) // %3 - : "v0", "v1", "v2", "v3", "memory", "cc" - ); + uint8_t* dst_ptr, + int dst_width) { + (void)src_stride; + asm volatile( + "ld1 {v3.16b}, [%3] \n" + "1: \n" + "ld1 {v0.16b,v1.16b}, [%0], #32 \n" + "subs %w2, %w2, #12 \n" + "tbl v2.16b, {v0.16b,v1.16b}, v3.16b \n" + "st1 {v2.8b}, [%1], #8 \n" + "st1 {v2.s}[2], [%1], #4 \n" + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(dst_width) // %2 + : "r"(&kShuf38) // %3 + : "v0", "v1", "v2", "v3", "memory", "cc"); } // 32x3 -> 12x1 -void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, +void OMITFP ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - const uint8* src_ptr1 = src_ptr + src_stride * 2; + uint8_t* dst_ptr, + int dst_width) { + const uint8_t* src_ptr1 = src_ptr + src_stride * 2; ptrdiff_t tmp_src_stride = src_stride; - asm volatile ( - MEMACCESS(5) - "ld1 {v29.8h}, [%5] \n" - MEMACCESS(6) - "ld1 {v30.16b}, [%6] \n" - MEMACCESS(7) - "ld1 {v31.8h}, [%7] \n" - "add %2, %2, %0 \n" - "1: \n" + asm volatile( + "ld1 {v29.8h}, [%5] \n" + "ld1 {v30.16b}, [%6] \n" + "ld1 {v31.8h}, [%7] \n" + "add %2, %2, %0 \n" + "1: \n" - // 00 40 01 41 02 42 03 43 - // 10 50 11 51 12 52 13 53 - // 20 60 21 61 22 62 23 63 - // 30 70 31 71 32 72 33 73 - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" - MEMACCESS(3) - "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n" - MEMACCESS(4) - "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%3], #32 \n" - "subs %w4, %w4, #12 \n" + // 00 40 01 41 02 42 03 43 + // 10 50 11 51 12 52 13 53 + // 20 60 21 61 22 62 23 63 + // 30 70 31 71 32 72 33 73 + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" + "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n" + "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%3], #32 \n" + "subs %w4, %w4, #12 \n" - // Shuffle the input data around to get align the data - // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 - // 00 10 01 11 02 12 03 13 - // 40 50 41 51 42 52 43 53 - "trn1 v20.8b, v0.8b, v1.8b \n" - "trn2 v21.8b, v0.8b, v1.8b \n" - "trn1 v22.8b, v4.8b, v5.8b \n" - "trn2 v23.8b, v4.8b, v5.8b \n" - "trn1 v24.8b, v16.8b, v17.8b \n" - "trn2 v25.8b, v16.8b, v17.8b \n" + // Shuffle the input data around to get align the data + // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 + // 00 10 01 11 02 12 03 13 + // 40 50 41 51 42 52 43 53 + "trn1 v20.8b, v0.8b, v1.8b \n" + "trn2 v21.8b, v0.8b, v1.8b \n" + "trn1 v22.8b, v4.8b, v5.8b \n" + "trn2 v23.8b, v4.8b, v5.8b \n" + "trn1 v24.8b, v16.8b, v17.8b \n" + "trn2 v25.8b, v16.8b, v17.8b \n" - // 20 30 21 31 22 32 23 33 - // 60 70 61 71 62 72 63 73 - "trn1 v0.8b, v2.8b, v3.8b \n" - "trn2 v1.8b, v2.8b, v3.8b \n" - "trn1 v4.8b, v6.8b, v7.8b \n" - "trn2 v5.8b, v6.8b, v7.8b \n" - "trn1 v16.8b, v18.8b, v19.8b \n" - "trn2 v17.8b, v18.8b, v19.8b \n" + // 20 30 21 31 22 32 23 33 + // 60 70 61 71 62 72 63 73 + "trn1 v0.8b, v2.8b, v3.8b \n" + "trn2 v1.8b, v2.8b, v3.8b \n" + "trn1 v4.8b, v6.8b, v7.8b \n" + "trn2 v5.8b, v6.8b, v7.8b \n" + "trn1 v16.8b, v18.8b, v19.8b \n" + "trn2 v17.8b, v18.8b, v19.8b \n" - // 00+10 01+11 02+12 03+13 - // 40+50 41+51 42+52 43+53 - "uaddlp v20.4h, v20.8b \n" - "uaddlp v21.4h, v21.8b \n" - "uaddlp v22.4h, v22.8b \n" - "uaddlp v23.4h, v23.8b \n" - "uaddlp v24.4h, v24.8b \n" - "uaddlp v25.4h, v25.8b \n" + // 00+10 01+11 02+12 03+13 + // 40+50 41+51 42+52 43+53 + "uaddlp v20.4h, v20.8b \n" + "uaddlp v21.4h, v21.8b \n" + "uaddlp v22.4h, v22.8b \n" + "uaddlp v23.4h, v23.8b \n" + "uaddlp v24.4h, v24.8b \n" + "uaddlp v25.4h, v25.8b \n" - // 60+70 61+71 62+72 63+73 - "uaddlp v1.4h, v1.8b \n" - "uaddlp v5.4h, v5.8b \n" - "uaddlp v17.4h, v17.8b \n" + // 60+70 61+71 62+72 63+73 + "uaddlp v1.4h, v1.8b \n" + "uaddlp v5.4h, v5.8b \n" + "uaddlp v17.4h, v17.8b \n" - // combine source lines - "add v20.4h, v20.4h, v22.4h \n" - "add v21.4h, v21.4h, v23.4h \n" - "add v20.4h, v20.4h, v24.4h \n" - "add v21.4h, v21.4h, v25.4h \n" - "add v2.4h, v1.4h, v5.4h \n" - "add v2.4h, v2.4h, v17.4h \n" + // combine source lines + "add v20.4h, v20.4h, v22.4h \n" + "add v21.4h, v21.4h, v23.4h \n" + "add v20.4h, v20.4h, v24.4h \n" + "add v21.4h, v21.4h, v25.4h \n" + "add v2.4h, v1.4h, v5.4h \n" + "add v2.4h, v2.4h, v17.4h \n" - // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0] - // + s[6 + st * 1] + s[7 + st * 1] - // + s[6 + st * 2] + s[7 + st * 2]) / 6 - "sqrdmulh v2.8h, v2.8h, v29.8h \n" - "xtn v2.8b, v2.8h \n" + // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0] + // + s[6 + st * 1] + s[7 + st * 1] + // + s[6 + st * 2] + s[7 + st * 2]) / 6 + "sqrdmulh v2.8h, v2.8h, v29.8h \n" + "xtn v2.8b, v2.8h \n" - // Shuffle 2,3 reg around so that 2 can be added to the - // 0,1 reg and 3 can be added to the 4,5 reg. This - // requires expanding from u8 to u16 as the 0,1 and 4,5 - // registers are already expanded. Then do transposes - // to get aligned. - // xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 - "ushll v16.8h, v16.8b, #0 \n" - "uaddl v0.8h, v0.8b, v4.8b \n" + // Shuffle 2,3 reg around so that 2 can be added to the + // 0,1 reg and 3 can be added to the 4,5 reg. This + // requires expanding from u8 to u16 as the 0,1 and 4,5 + // registers are already expanded. Then do transposes + // to get aligned. + // xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 + "ushll v16.8h, v16.8b, #0 \n" + "uaddl v0.8h, v0.8b, v4.8b \n" - // combine source lines - "add v0.8h, v0.8h, v16.8h \n" + // combine source lines + "add v0.8h, v0.8h, v16.8h \n" - // xx 20 xx 21 xx 22 xx 23 - // xx 30 xx 31 xx 32 xx 33 - "trn1 v1.8h, v0.8h, v0.8h \n" - "trn2 v4.8h, v0.8h, v0.8h \n" - "xtn v0.4h, v1.4s \n" - "xtn v4.4h, v4.4s \n" + // xx 20 xx 21 xx 22 xx 23 + // xx 30 xx 31 xx 32 xx 33 + "trn1 v1.8h, v0.8h, v0.8h \n" + "trn2 v4.8h, v0.8h, v0.8h \n" + "xtn v0.4h, v1.4s \n" + "xtn v4.4h, v4.4s \n" - // 0+1+2, 3+4+5 - "add v20.8h, v20.8h, v0.8h \n" - "add v21.8h, v21.8h, v4.8h \n" + // 0+1+2, 3+4+5 + "add v20.8h, v20.8h, v0.8h \n" + "add v21.8h, v21.8h, v4.8h \n" - // Need to divide, but can't downshift as the the value - // isn't a power of 2. So multiply by 65536 / n - // and take the upper 16 bits. - "sqrdmulh v0.8h, v20.8h, v31.8h \n" - "sqrdmulh v1.8h, v21.8h, v31.8h \n" + // Need to divide, but can't downshift as the the value + // isn't a power of 2. So multiply by 65536 / n + // and take the upper 16 bits. + "sqrdmulh v0.8h, v20.8h, v31.8h \n" + "sqrdmulh v1.8h, v21.8h, v31.8h \n" - // Align for table lookup, vtbl requires registers to - // be adjacent - "tbl v3.16b, {v0.16b, v1.16b, v2.16b}, v30.16b \n" + // Align for table lookup, vtbl requires registers to be adjacent + "tbl v3.16b, {v0.16b, v1.16b, v2.16b}, v30.16b \n" - MEMACCESS(1) - "st1 {v3.8b}, [%1], #8 \n" - MEMACCESS(1) - "st1 {v3.s}[2], [%1], #4 \n" - "b.gt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(tmp_src_stride), // %2 - "+r"(src_ptr1), // %3 - "+r"(dst_width) // %4 - : "r"(&kMult38_Div6), // %5 - "r"(&kShuf38_2), // %6 - "r"(&kMult38_Div9) // %7 - : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", - "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v29", - "v30", "v31", "memory", "cc" - ); + "st1 {v3.8b}, [%1], #8 \n" + "st1 {v3.s}[2], [%1], #4 \n" + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(tmp_src_stride), // %2 + "+r"(src_ptr1), // %3 + "+r"(dst_width) // %4 + : "r"(&kMult38_Div6), // %5 + "r"(&kShuf38_2), // %6 + "r"(&kMult38_Div9) // %7 + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", + "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v29", "v30", "v31", + "memory", "cc"); } // 32x2 -> 12x1 -void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, +void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { + uint8_t* dst_ptr, + int dst_width) { // TODO(fbarchard): use src_stride directly for clang 3.5+. ptrdiff_t tmp_src_stride = src_stride; - asm volatile ( - MEMACCESS(4) - "ld1 {v30.8h}, [%4] \n" - MEMACCESS(5) - "ld1 {v31.16b}, [%5] \n" - "add %2, %2, %0 \n" - "1: \n" + asm volatile( + "ld1 {v30.8h}, [%4] \n" + "ld1 {v31.16b}, [%5] \n" + "add %2, %2, %0 \n" + "1: \n" - // 00 40 01 41 02 42 03 43 - // 10 50 11 51 12 52 13 53 - // 20 60 21 61 22 62 23 63 - // 30 70 31 71 32 72 33 73 - MEMACCESS(0) - "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" - MEMACCESS(3) - "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n" - "subs %w3, %w3, #12 \n" + // 00 40 01 41 02 42 03 43 + // 10 50 11 51 12 52 13 53 + // 20 60 21 61 22 62 23 63 + // 30 70 31 71 32 72 33 73 + "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" + "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n" + "subs %w3, %w3, #12 \n" - // Shuffle the input data around to get align the data - // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 - // 00 10 01 11 02 12 03 13 - // 40 50 41 51 42 52 43 53 - "trn1 v16.8b, v0.8b, v1.8b \n" - "trn2 v17.8b, v0.8b, v1.8b \n" - "trn1 v18.8b, v4.8b, v5.8b \n" - "trn2 v19.8b, v4.8b, v5.8b \n" + // Shuffle the input data around to get align the data + // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 + // 00 10 01 11 02 12 03 13 + // 40 50 41 51 42 52 43 53 + "trn1 v16.8b, v0.8b, v1.8b \n" + "trn2 v17.8b, v0.8b, v1.8b \n" + "trn1 v18.8b, v4.8b, v5.8b \n" + "trn2 v19.8b, v4.8b, v5.8b \n" - // 20 30 21 31 22 32 23 33 - // 60 70 61 71 62 72 63 73 - "trn1 v0.8b, v2.8b, v3.8b \n" - "trn2 v1.8b, v2.8b, v3.8b \n" - "trn1 v4.8b, v6.8b, v7.8b \n" - "trn2 v5.8b, v6.8b, v7.8b \n" + // 20 30 21 31 22 32 23 33 + // 60 70 61 71 62 72 63 73 + "trn1 v0.8b, v2.8b, v3.8b \n" + "trn2 v1.8b, v2.8b, v3.8b \n" + "trn1 v4.8b, v6.8b, v7.8b \n" + "trn2 v5.8b, v6.8b, v7.8b \n" - // 00+10 01+11 02+12 03+13 - // 40+50 41+51 42+52 43+53 - "uaddlp v16.4h, v16.8b \n" - "uaddlp v17.4h, v17.8b \n" - "uaddlp v18.4h, v18.8b \n" - "uaddlp v19.4h, v19.8b \n" + // 00+10 01+11 02+12 03+13 + // 40+50 41+51 42+52 43+53 + "uaddlp v16.4h, v16.8b \n" + "uaddlp v17.4h, v17.8b \n" + "uaddlp v18.4h, v18.8b \n" + "uaddlp v19.4h, v19.8b \n" - // 60+70 61+71 62+72 63+73 - "uaddlp v1.4h, v1.8b \n" - "uaddlp v5.4h, v5.8b \n" + // 60+70 61+71 62+72 63+73 + "uaddlp v1.4h, v1.8b \n" + "uaddlp v5.4h, v5.8b \n" - // combine source lines - "add v16.4h, v16.4h, v18.4h \n" - "add v17.4h, v17.4h, v19.4h \n" - "add v2.4h, v1.4h, v5.4h \n" + // combine source lines + "add v16.4h, v16.4h, v18.4h \n" + "add v17.4h, v17.4h, v19.4h \n" + "add v2.4h, v1.4h, v5.4h \n" - // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4 - "uqrshrn v2.8b, v2.8h, #2 \n" + // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4 + "uqrshrn v2.8b, v2.8h, #2 \n" - // Shuffle 2,3 reg around so that 2 can be added to the - // 0,1 reg and 3 can be added to the 4,5 reg. This - // requires expanding from u8 to u16 as the 0,1 and 4,5 - // registers are already expanded. Then do transposes - // to get aligned. - // xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 + // Shuffle 2,3 reg around so that 2 can be added to the + // 0,1 reg and 3 can be added to the 4,5 reg. This + // requires expanding from u8 to u16 as the 0,1 and 4,5 + // registers are already expanded. Then do transposes + // to get aligned. + // xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 - // combine source lines - "uaddl v0.8h, v0.8b, v4.8b \n" + // combine source lines + "uaddl v0.8h, v0.8b, v4.8b \n" - // xx 20 xx 21 xx 22 xx 23 - // xx 30 xx 31 xx 32 xx 33 - "trn1 v1.8h, v0.8h, v0.8h \n" - "trn2 v4.8h, v0.8h, v0.8h \n" - "xtn v0.4h, v1.4s \n" - "xtn v4.4h, v4.4s \n" + // xx 20 xx 21 xx 22 xx 23 + // xx 30 xx 31 xx 32 xx 33 + "trn1 v1.8h, v0.8h, v0.8h \n" + "trn2 v4.8h, v0.8h, v0.8h \n" + "xtn v0.4h, v1.4s \n" + "xtn v4.4h, v4.4s \n" - // 0+1+2, 3+4+5 - "add v16.8h, v16.8h, v0.8h \n" - "add v17.8h, v17.8h, v4.8h \n" + // 0+1+2, 3+4+5 + "add v16.8h, v16.8h, v0.8h \n" + "add v17.8h, v17.8h, v4.8h \n" - // Need to divide, but can't downshift as the the value - // isn't a power of 2. So multiply by 65536 / n - // and take the upper 16 bits. - "sqrdmulh v0.8h, v16.8h, v30.8h \n" - "sqrdmulh v1.8h, v17.8h, v30.8h \n" + // Need to divide, but can't downshift as the the value + // isn't a power of 2. So multiply by 65536 / n + // and take the upper 16 bits. + "sqrdmulh v0.8h, v16.8h, v30.8h \n" + "sqrdmulh v1.8h, v17.8h, v30.8h \n" - // Align for table lookup, vtbl requires registers to - // be adjacent + // Align for table lookup, vtbl requires registers to + // be adjacent - "tbl v3.16b, {v0.16b, v1.16b, v2.16b}, v31.16b \n" + "tbl v3.16b, {v0.16b, v1.16b, v2.16b}, v31.16b \n" - MEMACCESS(1) - "st1 {v3.8b}, [%1], #8 \n" - MEMACCESS(1) - "st1 {v3.s}[2], [%1], #4 \n" - "b.gt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(tmp_src_stride), // %2 - "+r"(dst_width) // %3 - : "r"(&kMult38_Div6), // %4 - "r"(&kShuf38_2) // %5 - : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", - "v18", "v19", "v30", "v31", "memory", "cc" - ); + "st1 {v3.8b}, [%1], #8 \n" + "st1 {v3.s}[2], [%1], #4 \n" + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst_ptr), // %1 + "+r"(tmp_src_stride), // %2 + "+r"(dst_width) // %3 + : "r"(&kMult38_Div6), // %4 + "r"(&kShuf38_2) // %5 + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", + "v19", "v30", "v31", "memory", "cc"); } -void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int src_width, int src_height) { - const uint8* src_tmp; - asm volatile ( - "1: \n" - "mov %0, %1 \n" - "mov w12, %w5 \n" - "eor v2.16b, v2.16b, v2.16b \n" - "eor v3.16b, v3.16b, v3.16b \n" - "2: \n" - // load 16 pixels into q0 - MEMACCESS(0) - "ld1 {v0.16b}, [%0], %3 \n" - "uaddw2 v3.8h, v3.8h, v0.16b \n" - "uaddw v2.8h, v2.8h, v0.8b \n" - "subs w12, w12, #1 \n" - "b.gt 2b \n" - MEMACCESS(2) - "st1 {v2.8h, v3.8h}, [%2], #32 \n" // store pixels - "add %1, %1, #16 \n" - "subs %w4, %w4, #16 \n" // 16 processed per loop - "b.gt 1b \n" - : "=&r"(src_tmp), // %0 - "+r"(src_ptr), // %1 - "+r"(dst_ptr), // %2 - "+r"(src_stride), // %3 - "+r"(src_width), // %4 - "+r"(src_height) // %5 - : - : "memory", "cc", "w12", "v0", "v1", "v2", "v3" // Clobber List - ); +void ScaleAddRows_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + int src_width, + int src_height) { + const uint8_t* src_tmp; + asm volatile( + "1: \n" + "mov %0, %1 \n" + "mov w12, %w5 \n" + "eor v2.16b, v2.16b, v2.16b \n" + "eor v3.16b, v3.16b, v3.16b \n" + "2: \n" + // load 16 pixels into q0 + "ld1 {v0.16b}, [%0], %3 \n" + "uaddw2 v3.8h, v3.8h, v0.16b \n" + "uaddw v2.8h, v2.8h, v0.8b \n" + "subs w12, w12, #1 \n" + "b.gt 2b \n" + "st1 {v2.8h, v3.8h}, [%2], #32 \n" // store pixels + "add %1, %1, #16 \n" + "subs %w4, %w4, #16 \n" // 16 processed per loop + "b.gt 1b \n" + : "=&r"(src_tmp), // %0 + "+r"(src_ptr), // %1 + "+r"(dst_ptr), // %2 + "+r"(src_stride), // %3 + "+r"(src_width), // %4 + "+r"(src_height) // %5 + : + : "memory", "cc", "w12", "v0", "v1", "v2", "v3" // Clobber List + ); } // TODO(Yang Zhang): Investigate less load instructions for // the x/dx stepping -#define LOAD2_DATA8_LANE(n) \ - "lsr %5, %3, #16 \n" \ - "add %6, %1, %5 \n" \ - "add %3, %3, %4 \n" \ - MEMACCESS(6) \ - "ld2 {v4.b, v5.b}["#n"], [%6] \n" +#define LOAD2_DATA8_LANE(n) \ + "lsr %5, %3, #16 \n" \ + "add %6, %1, %5 \n" \ + "add %3, %3, %4 \n" \ + "ld2 {v4.b, v5.b}[" #n "], [%6] \n" -void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { +// The NEON version mimics this formula (from row_common.cc): +// #define BLENDER(a, b, f) (uint8_t)((int)(a) + +// ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) + +void ScaleFilterCols_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx) { int dx_offset[4] = {0, 1, 2, 3}; int* tmp = dx_offset; - const uint8* src_tmp = src_ptr; - int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning. - int64 x64 = (int64) x; - int64 dx64 = (int64) dx; + const uint8_t* src_tmp = src_ptr; + int64_t x64 = (int64_t)x; // NOLINT + int64_t dx64 = (int64_t)dx; // NOLINT asm volatile ( "dup v0.4s, %w3 \n" // x "dup v1.4s, %w4 \n" // dx @@ -626,12 +602,11 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, "ushll2 v6.4s, v6.8h, #0 \n" "mul v16.4s, v16.4s, v7.4s \n" "mul v17.4s, v17.4s, v6.4s \n" - "rshrn v6.4h, v16.4s, #16 \n" - "rshrn2 v6.8h, v17.4s, #16 \n" + "rshrn v6.4h, v16.4s, #16 \n" + "rshrn2 v6.8h, v17.4s, #16 \n" "add v4.8h, v4.8h, v6.8h \n" "xtn v4.8b, v4.8h \n" - MEMACCESS(0) "st1 {v4.8b}, [%0], #8 \n" // store pixels "add v1.4s, v1.4s, v0.4s \n" "add v2.4s, v2.4s, v0.4s \n" @@ -639,7 +614,7 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, "b.gt 1b \n" : "+r"(dst_ptr), // %0 "+r"(src_ptr), // %1 - "+r"(dst_width64), // %2 + "+r"(dst_width), // %2 "+r"(x64), // %3 "+r"(dx64), // %4 "+r"(tmp), // %5 @@ -653,331 +628,300 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, #undef LOAD2_DATA8_LANE // 16x2 -> 16x1 -void ScaleFilterRows_NEON(uint8* dst_ptr, - const uint8* src_ptr, ptrdiff_t src_stride, - int dst_width, int source_y_fraction) { - int y_fraction = 256 - source_y_fraction; - asm volatile ( - "cmp %w4, #0 \n" - "b.eq 100f \n" - "add %2, %2, %1 \n" - "cmp %w4, #64 \n" - "b.eq 75f \n" - "cmp %w4, #128 \n" - "b.eq 50f \n" - "cmp %w4, #192 \n" - "b.eq 25f \n" +void ScaleFilterRows_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, + int source_y_fraction) { + int y_fraction = 256 - source_y_fraction; + asm volatile( + "cmp %w4, #0 \n" + "b.eq 100f \n" + "add %2, %2, %1 \n" + "cmp %w4, #64 \n" + "b.eq 75f \n" + "cmp %w4, #128 \n" + "b.eq 50f \n" + "cmp %w4, #192 \n" + "b.eq 25f \n" - "dup v5.8b, %w4 \n" - "dup v4.8b, %w5 \n" - // General purpose row blend. - "1: \n" - MEMACCESS(1) - "ld1 {v0.16b}, [%1], #16 \n" - MEMACCESS(2) - "ld1 {v1.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "umull v6.8h, v0.8b, v4.8b \n" - "umull2 v7.8h, v0.16b, v4.16b \n" - "umlal v6.8h, v1.8b, v5.8b \n" - "umlal2 v7.8h, v1.16b, v5.16b \n" - "rshrn v0.8b, v6.8h, #8 \n" - "rshrn2 v0.16b, v7.8h, #8 \n" - MEMACCESS(0) - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 1b \n" - "b 99f \n" + "dup v5.8b, %w4 \n" + "dup v4.8b, %w5 \n" + // General purpose row blend. + "1: \n" + "ld1 {v0.16b}, [%1], #16 \n" + "ld1 {v1.16b}, [%2], #16 \n" + "subs %w3, %w3, #16 \n" + "umull v6.8h, v0.8b, v4.8b \n" + "umull2 v7.8h, v0.16b, v4.16b \n" + "umlal v6.8h, v1.8b, v5.8b \n" + "umlal2 v7.8h, v1.16b, v5.16b \n" + "rshrn v0.8b, v6.8h, #8 \n" + "rshrn2 v0.16b, v7.8h, #8 \n" + "st1 {v0.16b}, [%0], #16 \n" + "b.gt 1b \n" + "b 99f \n" - // Blend 25 / 75. - "25: \n" - MEMACCESS(1) - "ld1 {v0.16b}, [%1], #16 \n" - MEMACCESS(2) - "ld1 {v1.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "urhadd v0.16b, v0.16b, v1.16b \n" - "urhadd v0.16b, v0.16b, v1.16b \n" - MEMACCESS(0) - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 25b \n" - "b 99f \n" + // Blend 25 / 75. + "25: \n" + "ld1 {v0.16b}, [%1], #16 \n" + "ld1 {v1.16b}, [%2], #16 \n" + "subs %w3, %w3, #16 \n" + "urhadd v0.16b, v0.16b, v1.16b \n" + "urhadd v0.16b, v0.16b, v1.16b \n" + "st1 {v0.16b}, [%0], #16 \n" + "b.gt 25b \n" + "b 99f \n" - // Blend 50 / 50. - "50: \n" - MEMACCESS(1) - "ld1 {v0.16b}, [%1], #16 \n" - MEMACCESS(2) - "ld1 {v1.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "urhadd v0.16b, v0.16b, v1.16b \n" - MEMACCESS(0) - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 50b \n" - "b 99f \n" + // Blend 50 / 50. + "50: \n" + "ld1 {v0.16b}, [%1], #16 \n" + "ld1 {v1.16b}, [%2], #16 \n" + "subs %w3, %w3, #16 \n" + "urhadd v0.16b, v0.16b, v1.16b \n" + "st1 {v0.16b}, [%0], #16 \n" + "b.gt 50b \n" + "b 99f \n" - // Blend 75 / 25. - "75: \n" - MEMACCESS(1) - "ld1 {v1.16b}, [%1], #16 \n" - MEMACCESS(2) - "ld1 {v0.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "urhadd v0.16b, v0.16b, v1.16b \n" - "urhadd v0.16b, v0.16b, v1.16b \n" - MEMACCESS(0) - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 75b \n" - "b 99f \n" + // Blend 75 / 25. + "75: \n" + "ld1 {v1.16b}, [%1], #16 \n" + "ld1 {v0.16b}, [%2], #16 \n" + "subs %w3, %w3, #16 \n" + "urhadd v0.16b, v0.16b, v1.16b \n" + "urhadd v0.16b, v0.16b, v1.16b \n" + "st1 {v0.16b}, [%0], #16 \n" + "b.gt 75b \n" + "b 99f \n" - // Blend 100 / 0 - Copy row unchanged. - "100: \n" - MEMACCESS(1) - "ld1 {v0.16b}, [%1], #16 \n" - "subs %w3, %w3, #16 \n" - MEMACCESS(0) - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 100b \n" + // Blend 100 / 0 - Copy row unchanged. + "100: \n" + "ld1 {v0.16b}, [%1], #16 \n" + "subs %w3, %w3, #16 \n" + "st1 {v0.16b}, [%0], #16 \n" + "b.gt 100b \n" - "99: \n" - MEMACCESS(0) - "st1 {v0.b}[15], [%0] \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(src_stride), // %2 - "+r"(dst_width), // %3 - "+r"(source_y_fraction),// %4 - "+r"(y_fraction) // %5 - : - : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory", "cc" - ); + "99: \n" + "st1 {v0.b}[15], [%0] \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+r"(src_stride), // %2 + "+r"(dst_width), // %3 + "+r"(source_y_fraction), // %4 + "+r"(y_fraction) // %5 + : + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory", "cc"); } -void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( - "1: \n" - // load even pixels into q0, odd into q1 - MEMACCESS (0) - "ld2 {v0.4s, v1.4s}, [%0], #32 \n" - MEMACCESS (0) - "ld2 {v2.4s, v3.4s}, [%0], #32 \n" - "subs %w2, %w2, #8 \n" // 8 processed per loop - MEMACCESS (1) - "st1 {v1.16b}, [%1], #16 \n" // store odd pixels - MEMACCESS (1) - "st1 {v3.16b}, [%1], #16 \n" - "b.gt 1b \n" - : "+r" (src_ptr), // %0 - "+r" (dst), // %1 - "+r" (dst_width) // %2 - : - : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List - ); +void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + // load 16 ARGB pixels with even pixels into q0/q2, odd into q1/q3 + "ld4 {v0.4s,v1.4s,v2.4s,v3.4s}, [%0], #64 \n" + "subs %w2, %w2, #8 \n" // 8 processed per loop + "mov v2.16b, v3.16b \n" + "st2 {v1.4s,v2.4s}, [%1], #32 \n" // store 8 odd pixels + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(dst), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List + ); } -void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - asm volatile ( - "1: \n" - MEMACCESS (0) - // load 8 ARGB pixels. - "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" - "subs %w2, %w2, #8 \n" // 8 processed per loop. - "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. - "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. - "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. - "uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts. - "rshrn v0.8b, v0.8h, #1 \n" // downshift, round and pack - "rshrn v1.8b, v1.8h, #1 \n" - "rshrn v2.8b, v2.8h, #1 \n" - "rshrn v3.8b, v3.8h, #1 \n" - MEMACCESS (1) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List - ); +void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + // load 16 ARGB pixels with even pixels into q0/q2, odd into q1/q3 + "ld4 {v0.4s,v1.4s,v2.4s,v3.4s}, [%0], #64 \n" + "subs %w2, %w2, #8 \n" // 8 processed per loop + + "urhadd v0.16b, v0.16b, v1.16b \n" // rounding half add + "urhadd v1.16b, v2.16b, v3.16b \n" + "st2 {v0.4s,v1.4s}, [%1], #32 \n" // store 8 pixels + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(dst_width) // %2 + : + : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List + ); } -void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( - // change the stride to row 2 pointer - "add %1, %1, %0 \n" - "1: \n" - MEMACCESS (0) - "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 8 ARGB pixels. - "subs %w3, %w3, #8 \n" // 8 processed per loop. - "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. - "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. - "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. - "uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts. - MEMACCESS (1) - "ld4 {v16.16b,v17.16b,v18.16b,v19.16b}, [%1], #64 \n" // load 8 more ARGB pixels. - "uadalp v0.8h, v16.16b \n" // B 16 bytes -> 8 shorts. - "uadalp v1.8h, v17.16b \n" // G 16 bytes -> 8 shorts. - "uadalp v2.8h, v18.16b \n" // R 16 bytes -> 8 shorts. - "uadalp v3.8h, v19.16b \n" // A 16 bytes -> 8 shorts. - "rshrn v0.8b, v0.8h, #2 \n" // downshift, round and pack - "rshrn v1.8b, v1.8h, #2 \n" - "rshrn v2.8b, v2.8h, #2 \n" - "rshrn v3.8b, v3.8h, #2 \n" - MEMACCESS (2) - "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" - "b.gt 1b \n" - : "+r" (src_ptr), // %0 - "+r" (src_stride), // %1 - "+r" (dst), // %2 - "+r" (dst_width) // %3 - : - : "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19" - ); +void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width) { + asm volatile( + // change the stride to row 2 pointer + "add %1, %1, %0 \n" + "1: \n" + "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 8 ARGB + "subs %w3, %w3, #8 \n" // 8 processed per loop. + "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. + "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. + "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. + "uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts. + "ld4 {v16.16b,v17.16b,v18.16b,v19.16b}, [%1], #64 \n" // load 8 + "uadalp v0.8h, v16.16b \n" // B 16 bytes -> 8 shorts. + "uadalp v1.8h, v17.16b \n" // G 16 bytes -> 8 shorts. + "uadalp v2.8h, v18.16b \n" // R 16 bytes -> 8 shorts. + "uadalp v3.8h, v19.16b \n" // A 16 bytes -> 8 shorts. + "rshrn v0.8b, v0.8h, #2 \n" // round and pack + "rshrn v1.8b, v1.8h, #2 \n" + "rshrn v2.8b, v2.8h, #2 \n" + "rshrn v3.8b, v3.8h, #2 \n" + "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(src_stride), // %1 + "+r"(dst), // %2 + "+r"(dst_width) // %3 + : + : "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); } // Reads 4 pixels at a time. // Alignment requirement: src_argb 4 byte aligned. -void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, uint8* dst_argb, int dst_width) { - asm volatile ( - "1: \n" - MEMACCESS(0) - "ld1 {v0.s}[0], [%0], %3 \n" - MEMACCESS(0) - "ld1 {v0.s}[1], [%0], %3 \n" - MEMACCESS(0) - "ld1 {v0.s}[2], [%0], %3 \n" - MEMACCESS(0) - "ld1 {v0.s}[3], [%0], %3 \n" - "subs %w2, %w2, #4 \n" // 4 pixels per loop. - MEMACCESS(1) - "st1 {v0.16b}, [%1], #16 \n" - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - : "r"((int64)(src_stepx * 4)) // %3 - : "memory", "cc", "v0" - ); +void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width) { + (void)src_stride; + asm volatile( + "1: \n" + "ld1 {v0.s}[0], [%0], %3 \n" + "ld1 {v0.s}[1], [%0], %3 \n" + "ld1 {v0.s}[2], [%0], %3 \n" + "ld1 {v0.s}[3], [%0], %3 \n" + "subs %w2, %w2, #4 \n" // 4 pixels per loop. + "st1 {v0.16b}, [%1], #16 \n" + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(dst_argb), // %1 + "+r"(dst_width) // %2 + : "r"((int64_t)(src_stepx * 4)) // %3 + : "memory", "cc", "v0"); } // Reads 4 pixels at a time. // Alignment requirement: src_argb 4 byte aligned. // TODO(Yang Zhang): Might be worth another optimization pass in future. // It could be upgraded to 8 pixels at a time to start with. -void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, +void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb, + ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width) { - asm volatile ( - "add %1, %1, %0 \n" - "1: \n" - MEMACCESS(0) - "ld1 {v0.8b}, [%0], %4 \n" // Read 4 2x2 blocks -> 2x1 - MEMACCESS(1) - "ld1 {v1.8b}, [%1], %4 \n" - MEMACCESS(0) - "ld1 {v2.8b}, [%0], %4 \n" - MEMACCESS(1) - "ld1 {v3.8b}, [%1], %4 \n" - MEMACCESS(0) - "ld1 {v4.8b}, [%0], %4 \n" - MEMACCESS(1) - "ld1 {v5.8b}, [%1], %4 \n" - MEMACCESS(0) - "ld1 {v6.8b}, [%0], %4 \n" - MEMACCESS(1) - "ld1 {v7.8b}, [%1], %4 \n" - "uaddl v0.8h, v0.8b, v1.8b \n" - "uaddl v2.8h, v2.8b, v3.8b \n" - "uaddl v4.8h, v4.8b, v5.8b \n" - "uaddl v6.8h, v6.8b, v7.8b \n" - "mov v16.d[1], v0.d[1] \n" // ab_cd -> ac_bd - "mov v0.d[1], v2.d[0] \n" - "mov v2.d[0], v16.d[1] \n" - "mov v16.d[1], v4.d[1] \n" // ef_gh -> eg_fh - "mov v4.d[1], v6.d[0] \n" - "mov v6.d[0], v16.d[1] \n" - "add v0.8h, v0.8h, v2.8h \n" // (a+b)_(c+d) - "add v4.8h, v4.8h, v6.8h \n" // (e+f)_(g+h) - "rshrn v0.8b, v0.8h, #2 \n" // first 2 pixels. - "rshrn2 v0.16b, v4.8h, #2 \n" // next 2 pixels. - "subs %w3, %w3, #4 \n" // 4 pixels per loop. - MEMACCESS(2) - "st1 {v0.16b}, [%2], #16 \n" - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_stride), // %1 - "+r"(dst_argb), // %2 - "+r"(dst_width) // %3 - : "r"((int64)(src_stepx * 4)) // %4 - : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" - ); + uint8_t* dst_argb, + int dst_width) { + asm volatile( + "add %1, %1, %0 \n" + "1: \n" + "ld1 {v0.8b}, [%0], %4 \n" // Read 4 2x2 -> 2x1 + "ld1 {v1.8b}, [%1], %4 \n" + "ld1 {v2.8b}, [%0], %4 \n" + "ld1 {v3.8b}, [%1], %4 \n" + "ld1 {v4.8b}, [%0], %4 \n" + "ld1 {v5.8b}, [%1], %4 \n" + "ld1 {v6.8b}, [%0], %4 \n" + "ld1 {v7.8b}, [%1], %4 \n" + "uaddl v0.8h, v0.8b, v1.8b \n" + "uaddl v2.8h, v2.8b, v3.8b \n" + "uaddl v4.8h, v4.8b, v5.8b \n" + "uaddl v6.8h, v6.8b, v7.8b \n" + "mov v16.d[1], v0.d[1] \n" // ab_cd -> ac_bd + "mov v0.d[1], v2.d[0] \n" + "mov v2.d[0], v16.d[1] \n" + "mov v16.d[1], v4.d[1] \n" // ef_gh -> eg_fh + "mov v4.d[1], v6.d[0] \n" + "mov v6.d[0], v16.d[1] \n" + "add v0.8h, v0.8h, v2.8h \n" // (a+b)_(c+d) + "add v4.8h, v4.8h, v6.8h \n" // (e+f)_(g+h) + "rshrn v0.8b, v0.8h, #2 \n" // first 2 pixels. + "rshrn2 v0.16b, v4.8h, #2 \n" // next 2 pixels. + "subs %w3, %w3, #4 \n" // 4 pixels per loop. + "st1 {v0.16b}, [%2], #16 \n" + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(src_stride), // %1 + "+r"(dst_argb), // %2 + "+r"(dst_width) // %3 + : "r"((int64_t)(src_stepx * 4)) // %4 + : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } // TODO(Yang Zhang): Investigate less load instructions for // the x/dx stepping -#define LOAD1_DATA32_LANE(vn, n) \ - "lsr %5, %3, #16 \n" \ - "add %6, %1, %5, lsl #2 \n" \ - "add %3, %3, %4 \n" \ - MEMACCESS(6) \ - "ld1 {"#vn".s}["#n"], [%6] \n" +#define LOAD1_DATA32_LANE(vn, n) \ + "lsr %5, %3, #16 \n" \ + "add %6, %1, %5, lsl #2 \n" \ + "add %3, %3, %4 \n" \ + "ld1 {" #vn ".s}[" #n "], [%6] \n" -void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { - const uint8* src_tmp = src_argb; - int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning. - int64 x64 = (int64) x; - int64 dx64 = (int64) dx; - int64 tmp64; - asm volatile ( - "1: \n" - LOAD1_DATA32_LANE(v0, 0) - LOAD1_DATA32_LANE(v0, 1) - LOAD1_DATA32_LANE(v0, 2) - LOAD1_DATA32_LANE(v0, 3) - LOAD1_DATA32_LANE(v1, 0) - LOAD1_DATA32_LANE(v1, 1) - LOAD1_DATA32_LANE(v1, 2) - LOAD1_DATA32_LANE(v1, 3) - - MEMACCESS(0) - "st1 {v0.4s, v1.4s}, [%0], #32 \n" // store pixels - "subs %w2, %w2, #8 \n" // 8 processed per loop - "b.gt 1b \n" - : "+r"(dst_argb), // %0 - "+r"(src_argb), // %1 - "+r"(dst_width64), // %2 - "+r"(x64), // %3 - "+r"(dx64), // %4 - "=&r"(tmp64), // %5 - "+r"(src_tmp) // %6 - : - : "memory", "cc", "v0", "v1" - ); +void ScaleARGBCols_NEON(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { + const uint8_t* src_tmp = src_argb; + int64_t x64 = (int64_t)x; // NOLINT + int64_t dx64 = (int64_t)dx; // NOLINT + int64_t tmp64; + asm volatile( + "1: \n" + // clang-format off + LOAD1_DATA32_LANE(v0, 0) + LOAD1_DATA32_LANE(v0, 1) + LOAD1_DATA32_LANE(v0, 2) + LOAD1_DATA32_LANE(v0, 3) + LOAD1_DATA32_LANE(v1, 0) + LOAD1_DATA32_LANE(v1, 1) + LOAD1_DATA32_LANE(v1, 2) + LOAD1_DATA32_LANE(v1, 3) + // clang-format on + "st1 {v0.4s, v1.4s}, [%0], #32 \n" // store pixels + "subs %w2, %w2, #8 \n" // 8 processed per loop + "b.gt 1b \n" + : "+r"(dst_argb), // %0 + "+r"(src_argb), // %1 + "+r"(dst_width), // %2 + "+r"(x64), // %3 + "+r"(dx64), // %4 + "=&r"(tmp64), // %5 + "+r"(src_tmp) // %6 + : + : "memory", "cc", "v0", "v1"); } #undef LOAD1_DATA32_LANE // TODO(Yang Zhang): Investigate less load instructions for // the x/dx stepping -#define LOAD2_DATA32_LANE(vn1, vn2, n) \ - "lsr %5, %3, #16 \n" \ - "add %6, %1, %5, lsl #2 \n" \ - "add %3, %3, %4 \n" \ - MEMACCESS(6) \ - "ld2 {"#vn1".s, "#vn2".s}["#n"], [%6] \n" +#define LOAD2_DATA32_LANE(vn1, vn2, n) \ + "lsr %5, %3, #16 \n" \ + "add %6, %1, %5, lsl #2 \n" \ + "add %3, %3, %4 \n" \ + "ld2 {" #vn1 ".s, " #vn2 ".s}[" #n "], [%6] \n" -void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { +void ScaleARGBFilterCols_NEON(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { int dx_offset[4] = {0, 1, 2, 3}; int* tmp = dx_offset; - const uint8* src_tmp = src_argb; - int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning. - int64 x64 = (int64) x; - int64 dx64 = (int64) dx; + const uint8_t* src_tmp = src_argb; + int64_t x64 = (int64_t)x; // NOLINT + int64_t dx64 = (int64_t)dx; // NOLINT asm volatile ( "dup v0.4s, %w3 \n" // x "dup v1.4s, %w4 \n" // dx @@ -1014,14 +958,13 @@ void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb, "shrn v0.8b, v16.8h, #7 \n" "shrn2 v0.16b, v17.8h, #7 \n" - MEMACCESS(0) "st1 {v0.4s}, [%0], #16 \n" // store pixels "add v5.4s, v5.4s, v6.4s \n" "subs %w2, %w2, #4 \n" // 4 processed per loop "b.gt 1b \n" : "+r"(dst_argb), // %0 "+r"(src_argb), // %1 - "+r"(dst_width64), // %2 + "+r"(dst_width), // %2 "+r"(x64), // %3 "+r"(dx64), // %4 "+r"(tmp), // %5 @@ -1034,6 +977,85 @@ void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb, #undef LOAD2_DATA32_LANE +// Read 16x2 average down and write 8x1. +void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width) { + asm volatile( + // change the stride to row 2 pointer + "add %1, %0, %1, lsl #1 \n" // ptr + stide * 2 + "1: \n" + "ld1 {v0.8h, v1.8h}, [%0], #32 \n" // load row 1 and post inc + "ld1 {v2.8h, v3.8h}, [%1], #32 \n" // load row 2 and post inc + "subs %w3, %w3, #8 \n" // 8 processed per loop + "uaddlp v0.4s, v0.8h \n" // row 1 add adjacent + "uaddlp v1.4s, v1.8h \n" + "uadalp v0.4s, v2.8h \n" // +row 2 add adjacent + "uadalp v1.4s, v3.8h \n" + "rshrn v0.4h, v0.4s, #2 \n" // round and pack + "rshrn2 v0.8h, v1.4s, #2 \n" + "st1 {v0.8h}, [%2], #16 \n" + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(src_stride), // %1 + "+r"(dst), // %2 + "+r"(dst_width) // %3 + : + : "v0", "v1", "v2", "v3" // Clobber List + ); +} + +// Read 8x2 upsample with filtering and write 16x1. +// Actually reads an extra pixel, so 9x2. +void ScaleRowUp2_16_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width) { + asm volatile( + "add %1, %0, %1, lsl #1 \n" // ptr + stide * 2 + "movi v0.8h, #9 \n" // constants + "movi v1.4s, #3 \n" + + "1: \n" + "ld1 {v3.8h}, [%0], %4 \n" // TL read first 8 + "ld1 {v4.8h}, [%0], %5 \n" // TR read 8 offset by 1 + "ld1 {v5.8h}, [%1], %4 \n" // BL read 8 from next row + "ld1 {v6.8h}, [%1], %5 \n" // BR offset by 1 + "subs %w3, %w3, #16 \n" // 16 dst pixels per loop + "umull v16.4s, v3.4h, v0.4h \n" + "umull2 v7.4s, v3.8h, v0.8h \n" + "umull v18.4s, v4.4h, v0.4h \n" + "umull2 v17.4s, v4.8h, v0.8h \n" + "uaddw v16.4s, v16.4s, v6.4h \n" + "uaddl2 v19.4s, v6.8h, v3.8h \n" + "uaddl v3.4s, v6.4h, v3.4h \n" + "uaddw2 v6.4s, v7.4s, v6.8h \n" + "uaddl2 v7.4s, v5.8h, v4.8h \n" + "uaddl v4.4s, v5.4h, v4.4h \n" + "uaddw v18.4s, v18.4s, v5.4h \n" + "mla v16.4s, v4.4s, v1.4s \n" + "mla v18.4s, v3.4s, v1.4s \n" + "mla v6.4s, v7.4s, v1.4s \n" + "uaddw2 v4.4s, v17.4s, v5.8h \n" + "uqrshrn v16.4h, v16.4s, #4 \n" + "mla v4.4s, v19.4s, v1.4s \n" + "uqrshrn2 v16.8h, v6.4s, #4 \n" + "uqrshrn v17.4h, v18.4s, #4 \n" + "uqrshrn2 v17.8h, v4.4s, #4 \n" + "st2 {v16.8h-v17.8h}, [%2], #32 \n" + "b.gt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(src_stride), // %1 + "+r"(dst), // %2 + "+r"(dst_width) // %3 + : "r"(2LL), // %4 + "r"(14LL) // %5 + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", + "v19" // Clobber List + ); +} + #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #ifdef __cplusplus diff --git a/media/libvpx/libvpx/third_party/libyuv/source/scale_win.cc b/media/libvpx/libvpx/third_party/libyuv/source/scale_win.cc index f17097365cc0..c5fc86f3e96c 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/scale_win.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/scale_win.cc @@ -17,97 +17,93 @@ extern "C" { #endif // This module is for 32 bit Visual C x86 and clangcl -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) // Offsets for source bytes 0 to 9 -static uvec8 kShuf0 = - { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 }; +static const uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9, + 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12. -static uvec8 kShuf1 = - { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 }; +static const uvec8 kShuf1 = {3, 4, 5, 7, 8, 9, 11, 12, + 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. -static uvec8 kShuf2 = - { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 }; +static const uvec8 kShuf2 = {5, 7, 8, 9, 11, 12, 13, 15, + 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 0 to 10 -static uvec8 kShuf01 = - { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 }; +static const uvec8 kShuf01 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10}; // Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13. -static uvec8 kShuf11 = - { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 }; +static const uvec8 kShuf11 = {2, 3, 4, 5, 5, 6, 6, 7, + 8, 9, 9, 10, 10, 11, 12, 13}; // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. -static uvec8 kShuf21 = - { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 }; +static const uvec8 kShuf21 = {5, 6, 6, 7, 8, 9, 9, 10, + 10, 11, 12, 13, 13, 14, 14, 15}; // Coefficients for source bytes 0 to 10 -static uvec8 kMadd01 = - { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 }; +static const uvec8 kMadd01 = {3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2}; // Coefficients for source bytes 10 to 21 -static uvec8 kMadd11 = - { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 }; +static const uvec8 kMadd11 = {1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1}; // Coefficients for source bytes 21 to 31 -static uvec8 kMadd21 = - { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 }; +static const uvec8 kMadd21 = {2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3}; // Coefficients for source bytes 21 to 31 -static vec16 kRound34 = - { 2, 2, 2, 2, 2, 2, 2, 2 }; +static const vec16 kRound34 = {2, 2, 2, 2, 2, 2, 2, 2}; -static uvec8 kShuf38a = - { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; +static const uvec8 kShuf38a = {0, 3, 6, 8, 11, 14, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128}; -static uvec8 kShuf38b = - { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 }; +static const uvec8 kShuf38b = {128, 128, 128, 128, 128, 128, 0, 3, + 6, 8, 11, 14, 128, 128, 128, 128}; // Arrange words 0,3,6 into 0,1,2 -static uvec8 kShufAc = - { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; +static const uvec8 kShufAc = {0, 1, 6, 7, 12, 13, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128}; // Arrange words 0,3,6 into 3,4,5 -static uvec8 kShufAc3 = - { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 }; +static const uvec8 kShufAc3 = {128, 128, 128, 128, 128, 128, 0, 1, + 6, 7, 12, 13, 128, 128, 128, 128}; // Scaling values for boxes of 3x3 and 2x3 -static uvec16 kScaleAc33 = - { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 }; +static const uvec16 kScaleAc33 = {65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, + 65536 / 9, 65536 / 6, 0, 0}; // Arrange first value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb0 = - { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 }; +static const uvec8 kShufAb0 = {0, 128, 3, 128, 6, 128, 8, 128, + 11, 128, 14, 128, 128, 128, 128, 128}; // Arrange second value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb1 = - { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 }; +static const uvec8 kShufAb1 = {1, 128, 4, 128, 7, 128, 9, 128, + 12, 128, 15, 128, 128, 128, 128, 128}; // Arrange third value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb2 = - { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 }; +static const uvec8 kShufAb2 = {2, 128, 5, 128, 128, 128, 10, 128, + 13, 128, 128, 128, 128, 128, 128, 128}; // Scaling values for boxes of 3x2 and 2x2 -static uvec16 kScaleAb2 = - { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 }; +static const uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, + 65536 / 3, 65536 / 2, 0, 0}; // Reads 32 pixels, throws half away and writes 16 pixels. -__declspec(naked) -void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown2_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { - mov eax, [esp + 4] // src_ptr - // src_stride ignored - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width + mov eax, [esp + 4] // src_ptr + // src_stride ignored + mov edx, [esp + 12] // dst_ptr + mov ecx, [esp + 16] // dst_width wloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] - psrlw xmm0, 8 // isolate odd pixels. + psrlw xmm0, 8 // isolate odd pixels. psrlw xmm1, 8 packuswb xmm0, xmm1 movdqu [edx], xmm0 @@ -120,27 +116,28 @@ void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, } // Blends 32x1 rectangle to 16x1. -__declspec(naked) -void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { - mov eax, [esp + 4] // src_ptr - // src_stride - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width + mov eax, [esp + 4] // src_ptr + // src_stride + mov edx, [esp + 12] // dst_ptr + mov ecx, [esp + 16] // dst_width - pcmpeqb xmm4, xmm4 // constant 0x0101 + pcmpeqb xmm4, xmm4 // constant 0x0101 psrlw xmm4, 15 packuswb xmm4, xmm4 - pxor xmm5, xmm5 // constant 0 + pxor xmm5, xmm5 // constant 0 wloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] - pmaddubsw xmm0, xmm4 // horizontal add + pmaddubsw xmm0, xmm4 // horizontal add pmaddubsw xmm1, xmm4 - pavgw xmm0, xmm5 // (x + 1) / 2 + pavgw xmm0, xmm5 // (x + 1) / 2 pavgw xmm1, xmm5 packuswb xmm0, xmm1 movdqu [edx], xmm0 @@ -153,20 +150,21 @@ void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, } // Blends 32x2 rectangle to 16x1. -__declspec(naked) -void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_ptr - mov esi, [esp + 4 + 8] // src_stride - mov edx, [esp + 4 + 12] // dst_ptr - mov ecx, [esp + 4 + 16] // dst_width + mov eax, [esp + 4 + 4] // src_ptr + mov esi, [esp + 4 + 8] // src_stride + mov edx, [esp + 4 + 12] // dst_ptr + mov ecx, [esp + 4 + 16] // dst_width - pcmpeqb xmm4, xmm4 // constant 0x0101 + pcmpeqb xmm4, xmm4 // constant 0x0101 psrlw xmm4, 15 packuswb xmm4, xmm4 - pxor xmm5, xmm5 // constant 0 + pxor xmm5, xmm5 // constant 0 wloop: movdqu xmm0, [eax] @@ -174,15 +172,15 @@ void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, movdqu xmm2, [eax + esi] movdqu xmm3, [eax + esi + 16] lea eax, [eax + 32] - pmaddubsw xmm0, xmm4 // horizontal add + pmaddubsw xmm0, xmm4 // horizontal add pmaddubsw xmm1, xmm4 pmaddubsw xmm2, xmm4 pmaddubsw xmm3, xmm4 - paddw xmm0, xmm2 // vertical add + paddw xmm0, xmm2 // vertical add paddw xmm1, xmm3 psrlw xmm0, 1 psrlw xmm1, 1 - pavgw xmm0, xmm5 // (x + 1) / 2 + pavgw xmm0, xmm5 // (x + 1) / 2 pavgw xmm1, xmm5 packuswb xmm0, xmm1 movdqu [edx], xmm0 @@ -197,23 +195,24 @@ void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, #ifdef HAS_SCALEROWDOWN2_AVX2 // Reads 64 pixels, throws half away and writes 32 pixels. -__declspec(naked) -void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown2_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { - mov eax, [esp + 4] // src_ptr - // src_stride ignored - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width + mov eax, [esp + 4] // src_ptr + // src_stride ignored + mov edx, [esp + 12] // dst_ptr + mov ecx, [esp + 16] // dst_width wloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] - vpsrlw ymm0, ymm0, 8 // isolate odd pixels. + vpsrlw ymm0, ymm0, 8 // isolate odd pixels. vpsrlw ymm1, ymm1, 8 vpackuswb ymm0, ymm0, ymm1 - vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb + vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 32 @@ -225,30 +224,31 @@ void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, } // Blends 64x1 rectangle to 32x1. -__declspec(naked) -void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { - mov eax, [esp + 4] // src_ptr - // src_stride - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width + mov eax, [esp + 4] // src_ptr + // src_stride + mov edx, [esp + 12] // dst_ptr + mov ecx, [esp + 16] // dst_width - vpcmpeqb ymm4, ymm4, ymm4 // '1' constant, 8b + vpcmpeqb ymm4, ymm4, ymm4 // '1' constant, 8b vpsrlw ymm4, ymm4, 15 vpackuswb ymm4, ymm4, ymm4 - vpxor ymm5, ymm5, ymm5 // constant 0 + vpxor ymm5, ymm5, ymm5 // constant 0 wloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] - vpmaddubsw ymm0, ymm0, ymm4 // horizontal add + vpmaddubsw ymm0, ymm0, ymm4 // horizontal add vpmaddubsw ymm1, ymm1, ymm4 - vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2 + vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2 vpavgw ymm1, ymm1, ymm5 vpackuswb ymm0, ymm0, ymm1 - vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb + vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 32 @@ -262,20 +262,21 @@ void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, // For rounding, average = (sum + 2) / 4 // becomes average((sum >> 1), 0) // Blends 64x2 rectangle to 32x1. -__declspec(naked) -void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_ptr - mov esi, [esp + 4 + 8] // src_stride - mov edx, [esp + 4 + 12] // dst_ptr - mov ecx, [esp + 4 + 16] // dst_width + mov eax, [esp + 4 + 4] // src_ptr + mov esi, [esp + 4 + 8] // src_stride + mov edx, [esp + 4 + 12] // dst_ptr + mov ecx, [esp + 4 + 16] // dst_width - vpcmpeqb ymm4, ymm4, ymm4 // '1' constant, 8b + vpcmpeqb ymm4, ymm4, ymm4 // '1' constant, 8b vpsrlw ymm4, ymm4, 15 vpackuswb ymm4, ymm4, ymm4 - vpxor ymm5, ymm5, ymm5 // constant 0 + vpxor ymm5, ymm5, ymm5 // constant 0 wloop: vmovdqu ymm0, [eax] @@ -283,18 +284,18 @@ void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, vmovdqu ymm2, [eax + esi] vmovdqu ymm3, [eax + esi + 32] lea eax, [eax + 64] - vpmaddubsw ymm0, ymm0, ymm4 // horizontal add + vpmaddubsw ymm0, ymm0, ymm4 // horizontal add vpmaddubsw ymm1, ymm1, ymm4 vpmaddubsw ymm2, ymm2, ymm4 vpmaddubsw ymm3, ymm3, ymm4 - vpaddw ymm0, ymm0, ymm2 // vertical add + vpaddw ymm0, ymm0, ymm2 // vertical add vpaddw ymm1, ymm1, ymm3 - vpsrlw ymm0, ymm0, 1 // (x + 2) / 4 = (x / 2 + 1) / 2 + vpsrlw ymm0, ymm0, 1 // (x + 2) / 4 = (x / 2 + 1) / 2 vpsrlw ymm1, ymm1, 1 - vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2 + vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2 vpavgw ymm1, ymm1, ymm5 vpackuswb ymm0, ymm0, ymm1 - vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb + vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 32 @@ -308,15 +309,16 @@ void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, #endif // HAS_SCALEROWDOWN2_AVX2 // Point samples 32 pixels to 8 pixels. -__declspec(naked) -void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown4_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { - mov eax, [esp + 4] // src_ptr - // src_stride ignored - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width - pcmpeqb xmm5, xmm5 // generate mask 0x00ff0000 + mov eax, [esp + 4] // src_ptr + // src_stride ignored + mov edx, [esp + 12] // dst_ptr + mov ecx, [esp + 16] // dst_width + pcmpeqb xmm5, xmm5 // generate mask 0x00ff0000 psrld xmm5, 24 pslld xmm5, 16 @@ -339,50 +341,51 @@ void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, } // Blends 32x4 rectangle to 8x1. -__declspec(naked) -void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_ptr - mov esi, [esp + 8 + 8] // src_stride - mov edx, [esp + 8 + 12] // dst_ptr - mov ecx, [esp + 8 + 16] // dst_width + mov eax, [esp + 8 + 4] // src_ptr + mov esi, [esp + 8 + 8] // src_stride + mov edx, [esp + 8 + 12] // dst_ptr + mov ecx, [esp + 8 + 16] // dst_width lea edi, [esi + esi * 2] // src_stride * 3 - pcmpeqb xmm4, xmm4 // constant 0x0101 + pcmpeqb xmm4, xmm4 // constant 0x0101 psrlw xmm4, 15 movdqa xmm5, xmm4 packuswb xmm4, xmm4 - psllw xmm5, 3 // constant 0x0008 + psllw xmm5, 3 // constant 0x0008 wloop: - movdqu xmm0, [eax] // average rows + movdqu xmm0, [eax] // average rows movdqu xmm1, [eax + 16] movdqu xmm2, [eax + esi] movdqu xmm3, [eax + esi + 16] - pmaddubsw xmm0, xmm4 // horizontal add + pmaddubsw xmm0, xmm4 // horizontal add pmaddubsw xmm1, xmm4 pmaddubsw xmm2, xmm4 pmaddubsw xmm3, xmm4 - paddw xmm0, xmm2 // vertical add rows 0, 1 + paddw xmm0, xmm2 // vertical add rows 0, 1 paddw xmm1, xmm3 movdqu xmm2, [eax + esi * 2] movdqu xmm3, [eax + esi * 2 + 16] pmaddubsw xmm2, xmm4 pmaddubsw xmm3, xmm4 - paddw xmm0, xmm2 // add row 2 + paddw xmm0, xmm2 // add row 2 paddw xmm1, xmm3 movdqu xmm2, [eax + edi] movdqu xmm3, [eax + edi + 16] lea eax, [eax + 32] pmaddubsw xmm2, xmm4 pmaddubsw xmm3, xmm4 - paddw xmm0, xmm2 // add row 3 + paddw xmm0, xmm2 // add row 3 paddw xmm1, xmm3 phaddw xmm0, xmm1 - paddw xmm0, xmm5 // + 8 for round - psrlw xmm0, 4 // /16 for average of 4 * 4 + paddw xmm0, xmm5 // + 8 for round + psrlw xmm0, 4 // /16 for average of 4 * 4 packuswb xmm0, xmm0 movq qword ptr [edx], xmm0 lea edx, [edx + 8] @@ -397,15 +400,16 @@ void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, #ifdef HAS_SCALEROWDOWN4_AVX2 // Point samples 64 pixels to 16 pixels. -__declspec(naked) -void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown4_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { - mov eax, [esp + 4] // src_ptr - // src_stride ignored - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff0000 + mov eax, [esp + 4] // src_ptr + // src_stride ignored + mov edx, [esp + 12] // dst_ptr + mov ecx, [esp + 16] // dst_width + vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff0000 vpsrld ymm5, ymm5, 24 vpslld ymm5, ymm5, 16 @@ -416,10 +420,10 @@ void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, vpand ymm0, ymm0, ymm5 vpand ymm1, ymm1, ymm5 vpackuswb ymm0, ymm0, ymm1 - vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb + vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vpsrlw ymm0, ymm0, 8 vpackuswb ymm0, ymm0, ymm0 - vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb + vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vmovdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 @@ -431,52 +435,53 @@ void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, } // Blends 64x4 rectangle to 16x1. -__declspec(naked) -void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { push esi push edi - mov eax, [esp + 8 + 4] // src_ptr - mov esi, [esp + 8 + 8] // src_stride - mov edx, [esp + 8 + 12] // dst_ptr - mov ecx, [esp + 8 + 16] // dst_width + mov eax, [esp + 8 + 4] // src_ptr + mov esi, [esp + 8 + 8] // src_stride + mov edx, [esp + 8 + 12] // dst_ptr + mov ecx, [esp + 8 + 16] // dst_width lea edi, [esi + esi * 2] // src_stride * 3 - vpcmpeqb ymm4, ymm4, ymm4 // constant 0x0101 + vpcmpeqb ymm4, ymm4, ymm4 // constant 0x0101 vpsrlw ymm4, ymm4, 15 - vpsllw ymm5, ymm4, 3 // constant 0x0008 + vpsllw ymm5, ymm4, 3 // constant 0x0008 vpackuswb ymm4, ymm4, ymm4 wloop: - vmovdqu ymm0, [eax] // average rows + vmovdqu ymm0, [eax] // average rows vmovdqu ymm1, [eax + 32] vmovdqu ymm2, [eax + esi] vmovdqu ymm3, [eax + esi + 32] - vpmaddubsw ymm0, ymm0, ymm4 // horizontal add + vpmaddubsw ymm0, ymm0, ymm4 // horizontal add vpmaddubsw ymm1, ymm1, ymm4 vpmaddubsw ymm2, ymm2, ymm4 vpmaddubsw ymm3, ymm3, ymm4 - vpaddw ymm0, ymm0, ymm2 // vertical add rows 0, 1 + vpaddw ymm0, ymm0, ymm2 // vertical add rows 0, 1 vpaddw ymm1, ymm1, ymm3 vmovdqu ymm2, [eax + esi * 2] vmovdqu ymm3, [eax + esi * 2 + 32] vpmaddubsw ymm2, ymm2, ymm4 vpmaddubsw ymm3, ymm3, ymm4 - vpaddw ymm0, ymm0, ymm2 // add row 2 + vpaddw ymm0, ymm0, ymm2 // add row 2 vpaddw ymm1, ymm1, ymm3 vmovdqu ymm2, [eax + edi] vmovdqu ymm3, [eax + edi + 32] lea eax, [eax + 64] vpmaddubsw ymm2, ymm2, ymm4 vpmaddubsw ymm3, ymm3, ymm4 - vpaddw ymm0, ymm0, ymm2 // add row 3 + vpaddw ymm0, ymm0, ymm2 // add row 3 vpaddw ymm1, ymm1, ymm3 - vphaddw ymm0, ymm0, ymm1 // mutates - vpermq ymm0, ymm0, 0xd8 // unmutate vphaddw - vpaddw ymm0, ymm0, ymm5 // + 8 for round - vpsrlw ymm0, ymm0, 4 // /32 for average of 4 * 4 + vphaddw ymm0, ymm0, ymm1 // mutates + vpermq ymm0, ymm0, 0xd8 // unmutate vphaddw + vpaddw ymm0, ymm0, ymm5 // + 8 for round + vpsrlw ymm0, ymm0, 4 // /32 for average of 4 * 4 vpackuswb ymm0, ymm0, ymm0 - vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb + vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vmovdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 @@ -494,14 +499,15 @@ void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, // Produces three 8 byte values. For each 8 bytes, 16 bytes are read. // Then shuffled to do the scaling. -__declspec(naked) -void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown34_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { - mov eax, [esp + 4] // src_ptr - // src_stride ignored - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width + mov eax, [esp + 4] // src_ptr + // src_stride ignored + mov edx, [esp + 12] // dst_ptr + mov ecx, [esp + 16] // dst_width movdqa xmm3, xmmword ptr kShuf0 movdqa xmm4, xmmword ptr kShuf1 movdqa xmm5, xmmword ptr kShuf2 @@ -541,16 +547,16 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, // xmm7 kRound34 // Note that movdqa+palign may be better than movdqu. -__declspec(naked) -void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_ptr - mov esi, [esp + 4 + 8] // src_stride - mov edx, [esp + 4 + 12] // dst_ptr - mov ecx, [esp + 4 + 16] // dst_width + mov eax, [esp + 4 + 4] // src_ptr + mov esi, [esp + 4 + 8] // src_stride + mov edx, [esp + 4 + 12] // dst_ptr + mov ecx, [esp + 4 + 16] // dst_width movdqa xmm2, xmmword ptr kShuf01 movdqa xmm3, xmmword ptr kShuf11 movdqa xmm4, xmmword ptr kShuf21 @@ -559,7 +565,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, movdqa xmm7, xmmword ptr kRound34 wloop: - movdqu xmm0, [eax] // pixels 0..7 + movdqu xmm0, [eax] // pixels 0..7 movdqu xmm1, [eax + esi] pavgb xmm0, xmm1 pshufb xmm0, xmm2 @@ -568,7 +574,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, psrlw xmm0, 2 packuswb xmm0, xmm0 movq qword ptr [edx], xmm0 - movdqu xmm0, [eax + 8] // pixels 8..15 + movdqu xmm0, [eax + 8] // pixels 8..15 movdqu xmm1, [eax + esi + 8] pavgb xmm0, xmm1 pshufb xmm0, xmm3 @@ -577,7 +583,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, psrlw xmm0, 2 packuswb xmm0, xmm0 movq qword ptr [edx + 8], xmm0 - movdqu xmm0, [eax + 16] // pixels 16..23 + movdqu xmm0, [eax + 16] // pixels 16..23 movdqu xmm1, [eax + esi + 16] lea eax, [eax + 32] pavgb xmm0, xmm1 @@ -598,16 +604,16 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, } // Note that movdqa+palign may be better than movdqu. -__declspec(naked) -void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_ptr - mov esi, [esp + 4 + 8] // src_stride - mov edx, [esp + 4 + 12] // dst_ptr - mov ecx, [esp + 4 + 16] // dst_width + mov eax, [esp + 4 + 4] // src_ptr + mov esi, [esp + 4 + 8] // src_stride + mov edx, [esp + 4 + 12] // dst_ptr + mov ecx, [esp + 4 + 16] // dst_width movdqa xmm2, xmmword ptr kShuf01 movdqa xmm3, xmmword ptr kShuf11 movdqa xmm4, xmmword ptr kShuf21 @@ -616,7 +622,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, movdqa xmm7, xmmword ptr kRound34 wloop: - movdqu xmm0, [eax] // pixels 0..7 + movdqu xmm0, [eax] // pixels 0..7 movdqu xmm1, [eax + esi] pavgb xmm1, xmm0 pavgb xmm0, xmm1 @@ -626,7 +632,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, psrlw xmm0, 2 packuswb xmm0, xmm0 movq qword ptr [edx], xmm0 - movdqu xmm0, [eax + 8] // pixels 8..15 + movdqu xmm0, [eax + 8] // pixels 8..15 movdqu xmm1, [eax + esi + 8] pavgb xmm1, xmm0 pavgb xmm0, xmm1 @@ -636,7 +642,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, psrlw xmm0, 2 packuswb xmm0, xmm0 movq qword ptr [edx + 8], xmm0 - movdqu xmm0, [eax + 16] // pixels 16..23 + movdqu xmm0, [eax + 16] // pixels 16..23 movdqu xmm1, [eax + esi + 16] lea eax, [eax + 32] pavgb xmm1, xmm0 @@ -660,26 +666,27 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, // 3/8 point sampler // Scale 32 pixels to 12 -__declspec(naked) -void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown38_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { - mov eax, [esp + 4] // src_ptr - // src_stride ignored - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width + mov eax, [esp + 4] // src_ptr + // src_stride ignored + mov edx, [esp + 12] // dst_ptr + mov ecx, [esp + 16] // dst_width movdqa xmm4, xmmword ptr kShuf38a movdqa xmm5, xmmword ptr kShuf38b xloop: - movdqu xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5 - movdqu xmm1, [eax + 16] // 16 pixels -> 6,7,8,9,10,11 + movdqu xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5 + movdqu xmm1, [eax + 16] // 16 pixels -> 6,7,8,9,10,11 lea eax, [eax + 32] pshufb xmm0, xmm4 pshufb xmm1, xmm5 paddusb xmm0, xmm1 - movq qword ptr [edx], xmm0 // write 12 pixels + movq qword ptr [edx], xmm0 // write 12 pixels movhlps xmm1, xmm0 movd [edx + 8], xmm1 lea edx, [edx + 12] @@ -691,23 +698,23 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, } // Scale 16x3 pixels to 6x1 with interpolation -__declspec(naked) -void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_ptr - mov esi, [esp + 4 + 8] // src_stride - mov edx, [esp + 4 + 12] // dst_ptr - mov ecx, [esp + 4 + 16] // dst_width + mov eax, [esp + 4 + 4] // src_ptr + mov esi, [esp + 4 + 8] // src_stride + mov edx, [esp + 4 + 12] // dst_ptr + mov ecx, [esp + 4 + 16] // dst_width movdqa xmm2, xmmword ptr kShufAc movdqa xmm3, xmmword ptr kShufAc3 movdqa xmm4, xmmword ptr kScaleAc33 pxor xmm5, xmm5 xloop: - movdqu xmm0, [eax] // sum up 3 rows into xmm0/1 + movdqu xmm0, [eax] // sum up 3 rows into xmm0/1 movdqu xmm6, [eax + esi] movhlps xmm1, xmm0 movhlps xmm7, xmm6 @@ -725,14 +732,14 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, paddusw xmm0, xmm6 paddusw xmm1, xmm7 - movdqa xmm6, xmm0 // 8 pixels -> 0,1,2 of xmm6 + movdqa xmm6, xmm0 // 8 pixels -> 0,1,2 of xmm6 psrldq xmm0, 2 paddusw xmm6, xmm0 psrldq xmm0, 2 paddusw xmm6, xmm0 pshufb xmm6, xmm2 - movdqa xmm7, xmm1 // 8 pixels -> 3,4,5 of xmm6 + movdqa xmm7, xmm1 // 8 pixels -> 3,4,5 of xmm6 psrldq xmm1, 2 paddusw xmm7, xmm1 psrldq xmm1, 2 @@ -740,10 +747,10 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, pshufb xmm7, xmm3 paddusw xmm6, xmm7 - pmulhuw xmm6, xmm4 // divide by 9,9,6, 9,9,6 + pmulhuw xmm6, xmm4 // divide by 9,9,6, 9,9,6 packuswb xmm6, xmm6 - movd [edx], xmm6 // write 6 pixels + movd [edx], xmm6 // write 6 pixels psrlq xmm6, 16 movd [edx + 2], xmm6 lea edx, [edx + 6] @@ -756,28 +763,28 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, } // Scale 16x2 pixels to 6x1 with interpolation -__declspec(naked) -void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { +__declspec(naked) void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_ptr - mov esi, [esp + 4 + 8] // src_stride - mov edx, [esp + 4 + 12] // dst_ptr - mov ecx, [esp + 4 + 16] // dst_width + mov eax, [esp + 4 + 4] // src_ptr + mov esi, [esp + 4 + 8] // src_stride + mov edx, [esp + 4 + 12] // dst_ptr + mov ecx, [esp + 4 + 16] // dst_width movdqa xmm2, xmmword ptr kShufAb0 movdqa xmm3, xmmword ptr kShufAb1 movdqa xmm4, xmmword ptr kShufAb2 movdqa xmm5, xmmword ptr kScaleAb2 xloop: - movdqu xmm0, [eax] // average 2 rows into xmm0 + movdqu xmm0, [eax] // average 2 rows into xmm0 movdqu xmm1, [eax + esi] lea eax, [eax + 16] pavgb xmm0, xmm1 - movdqa xmm1, xmm0 // 16 pixels -> 0,1,2,3,4,5 of xmm1 + movdqa xmm1, xmm0 // 16 pixels -> 0,1,2,3,4,5 of xmm1 pshufb xmm1, xmm2 movdqa xmm6, xmm0 pshufb xmm6, xmm3 @@ -785,10 +792,10 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, pshufb xmm0, xmm4 paddusw xmm1, xmm0 - pmulhuw xmm1, xmm5 // divide by 3,3,2, 3,3,2 + pmulhuw xmm1, xmm5 // divide by 3,3,2, 3,3,2 packuswb xmm1, xmm1 - movd [edx], xmm1 // write 6 pixels + movd [edx], xmm1 // write 6 pixels psrlq xmm1, 16 movd [edx + 2], xmm1 lea edx, [edx + 6] @@ -801,26 +808,27 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, } // Reads 16 bytes and accumulates to 16 shorts at a time. -__declspec(naked) -void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { +__declspec(naked) void ScaleAddRow_SSE2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width) { __asm { - mov eax, [esp + 4] // src_ptr - mov edx, [esp + 8] // dst_ptr + mov eax, [esp + 4] // src_ptr + mov edx, [esp + 8] // dst_ptr mov ecx, [esp + 12] // src_width pxor xmm5, xmm5 - // sum rows + // sum rows xloop: - movdqu xmm3, [eax] // read 16 bytes + movdqu xmm3, [eax] // read 16 bytes lea eax, [eax + 16] - movdqu xmm0, [edx] // read 16 words from destination + movdqu xmm0, [edx] // read 16 words from destination movdqu xmm1, [edx + 16] movdqa xmm2, xmm3 punpcklbw xmm2, xmm5 punpckhbw xmm3, xmm5 - paddusw xmm0, xmm2 // sum 16 words + paddusw xmm0, xmm2 // sum 16 words paddusw xmm1, xmm3 - movdqu [edx], xmm0 // write 16 words to destination + movdqu [edx], xmm0 // write 16 words to destination movdqu [edx + 16], xmm1 lea edx, [edx + 32] sub ecx, 16 @@ -831,24 +839,25 @@ void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { #ifdef HAS_SCALEADDROW_AVX2 // Reads 32 bytes and accumulates to 32 shorts at a time. -__declspec(naked) -void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { +__declspec(naked) void ScaleAddRow_AVX2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width) { __asm { - mov eax, [esp + 4] // src_ptr - mov edx, [esp + 8] // dst_ptr + mov eax, [esp + 4] // src_ptr + mov edx, [esp + 8] // dst_ptr mov ecx, [esp + 12] // src_width vpxor ymm5, ymm5, ymm5 - // sum rows + // sum rows xloop: - vmovdqu ymm3, [eax] // read 32 bytes + vmovdqu ymm3, [eax] // read 32 bytes lea eax, [eax + 32] vpermq ymm3, ymm3, 0xd8 // unmutate for vpunpck vpunpcklbw ymm2, ymm3, ymm5 vpunpckhbw ymm3, ymm3, ymm5 - vpaddusw ymm0, ymm2, [edx] // sum 16 words + vpaddusw ymm0, ymm2, [edx] // sum 16 words vpaddusw ymm1, ymm3, [edx + 32] - vmovdqu [edx], ymm0 // write 32 words to destination + vmovdqu [edx], ymm0 // write 32 words to destination vmovdqu [edx + 32], ymm1 lea edx, [edx + 64] sub ecx, 32 @@ -862,86 +871,87 @@ void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { // Constant for making pixels signed to avoid pmaddubsw // saturation. -static uvec8 kFsub80 = - { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +static const uvec8 kFsub80 = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}; // Constant for making pixels unsigned and adding .5 for rounding. -static uvec16 kFadd40 = - { 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040 }; +static const uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040, + 0x4040, 0x4040, 0x4040, 0x4040}; // Bilinear column filtering. SSSE3 version. -__declspec(naked) -void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { +__declspec(naked) void ScaleFilterCols_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx) { __asm { push ebx push esi push edi - mov edi, [esp + 12 + 4] // dst_ptr - mov esi, [esp + 12 + 8] // src_ptr - mov ecx, [esp + 12 + 12] // dst_width + mov edi, [esp + 12 + 4] // dst_ptr + mov esi, [esp + 12 + 8] // src_ptr + mov ecx, [esp + 12 + 12] // dst_width movd xmm2, [esp + 12 + 16] // x movd xmm3, [esp + 12 + 20] // dx - mov eax, 0x04040000 // shuffle to line up fractions with pixel. + mov eax, 0x04040000 // shuffle to line up fractions with pixel. movd xmm5, eax - pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction. + pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction. psrlw xmm6, 9 - pcmpeqb xmm7, xmm7 // generate 0x0001 + pcmpeqb xmm7, xmm7 // generate 0x0001 psrlw xmm7, 15 - pextrw eax, xmm2, 1 // get x0 integer. preroll + pextrw eax, xmm2, 1 // get x0 integer. preroll sub ecx, 2 jl xloop29 - movdqa xmm0, xmm2 // x1 = x0 + dx + movdqa xmm0, xmm2 // x1 = x0 + dx paddd xmm0, xmm3 - punpckldq xmm2, xmm0 // x0 x1 - punpckldq xmm3, xmm3 // dx dx - paddd xmm3, xmm3 // dx * 2, dx * 2 - pextrw edx, xmm2, 3 // get x1 integer. preroll + punpckldq xmm2, xmm0 // x0 x1 + punpckldq xmm3, xmm3 // dx dx + paddd xmm3, xmm3 // dx * 2, dx * 2 + pextrw edx, xmm2, 3 // get x1 integer. preroll // 2 Pixel loop. xloop2: - movdqa xmm1, xmm2 // x0, x1 fractions. - paddd xmm2, xmm3 // x += dx + movdqa xmm1, xmm2 // x0, x1 fractions. + paddd xmm2, xmm3 // x += dx movzx ebx, word ptr [esi + eax] // 2 source x0 pixels movd xmm0, ebx - psrlw xmm1, 9 // 7 bit fractions. + psrlw xmm1, 9 // 7 bit fractions. movzx ebx, word ptr [esi + edx] // 2 source x1 pixels movd xmm4, ebx - pshufb xmm1, xmm5 // 0011 + pshufb xmm1, xmm5 // 0011 punpcklwd xmm0, xmm4 psubb xmm0, xmmword ptr kFsub80 // make pixels signed. - pxor xmm1, xmm6 // 0..7f and 7f..0 - paddusb xmm1, xmm7 // +1 so 0..7f and 80..1 - pmaddubsw xmm1, xmm0 // 16 bit, 2 pixels. - pextrw eax, xmm2, 1 // get x0 integer. next iteration. - pextrw edx, xmm2, 3 // get x1 integer. next iteration. + pxor xmm1, xmm6 // 0..7f and 7f..0 + paddusb xmm1, xmm7 // +1 so 0..7f and 80..1 + pmaddubsw xmm1, xmm0 // 16 bit, 2 pixels. + pextrw eax, xmm2, 1 // get x0 integer. next iteration. + pextrw edx, xmm2, 3 // get x1 integer. next iteration. paddw xmm1, xmmword ptr kFadd40 // make pixels unsigned and round. - psrlw xmm1, 7 // 8.7 fixed point to low 8 bits. - packuswb xmm1, xmm1 // 8 bits, 2 pixels. + psrlw xmm1, 7 // 8.7 fixed point to low 8 bits. + packuswb xmm1, xmm1 // 8 bits, 2 pixels. movd ebx, xmm1 mov [edi], bx lea edi, [edi + 2] - sub ecx, 2 // 2 pixels + sub ecx, 2 // 2 pixels jge xloop2 xloop29: add ecx, 2 - 1 jl xloop99 - // 1 pixel remainder + // 1 pixel remainder movzx ebx, word ptr [esi + eax] // 2 source x0 pixels movd xmm0, ebx - psrlw xmm2, 9 // 7 bit fractions. - pshufb xmm2, xmm5 // 0011 + psrlw xmm2, 9 // 7 bit fractions. + pshufb xmm2, xmm5 // 0011 psubb xmm0, xmmword ptr kFsub80 // make pixels signed. - pxor xmm2, xmm6 // 0..7f and 7f..0 - paddusb xmm2, xmm7 // +1 so 0..7f and 80..1 - pmaddubsw xmm2, xmm0 // 16 bit + pxor xmm2, xmm6 // 0..7f and 7f..0 + paddusb xmm2, xmm7 // +1 so 0..7f and 80..1 + pmaddubsw xmm2, xmm0 // 16 bit paddw xmm2, xmmword ptr kFadd40 // make pixels unsigned and round. - psrlw xmm2, 7 // 8.7 fixed point to low 8 bits. - packuswb xmm2, xmm2 // 8 bits + psrlw xmm2, 7 // 8.7 fixed point to low 8 bits. + packuswb xmm2, xmm2 // 8 bits movd ebx, xmm2 mov [edi], bl @@ -955,13 +965,15 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, } // Reads 16 pixels, duplicates them and writes 32 pixels. -__declspec(naked) -void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { +__declspec(naked) void ScaleColsUp2_SSE2(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx) { __asm { - mov edx, [esp + 4] // dst_ptr - mov eax, [esp + 8] // src_ptr - mov ecx, [esp + 12] // dst_width + mov edx, [esp + 4] // dst_ptr + mov eax, [esp + 8] // src_ptr + mov ecx, [esp + 12] // dst_width wloop: movdqu xmm0, [eax] @@ -980,15 +992,15 @@ void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr, } // Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6) -__declspec(naked) -void ScaleARGBRowDown2_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { +__declspec(naked) void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { __asm { - mov eax, [esp + 4] // src_argb - // src_stride ignored - mov edx, [esp + 12] // dst_argb - mov ecx, [esp + 16] // dst_width + mov eax, [esp + 4] // src_argb + // src_stride ignored + mov edx, [esp + 12] // dst_argb + mov ecx, [esp + 16] // dst_width wloop: movdqu xmm0, [eax] @@ -1005,23 +1017,23 @@ void ScaleARGBRowDown2_SSE2(const uint8* src_argb, } // Blends 8x1 rectangle to 4x1. -__declspec(naked) -void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { +__declspec(naked) void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { __asm { - mov eax, [esp + 4] // src_argb - // src_stride ignored - mov edx, [esp + 12] // dst_argb - mov ecx, [esp + 16] // dst_width + mov eax, [esp + 4] // src_argb + // src_stride ignored + mov edx, [esp + 12] // dst_argb + mov ecx, [esp + 16] // dst_width wloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] movdqa xmm2, xmm0 - shufps xmm0, xmm1, 0x88 // even pixels - shufps xmm2, xmm1, 0xdd // odd pixels + shufps xmm0, xmm1, 0x88 // even pixels + shufps xmm2, xmm1, 0xdd // odd pixels pavgb xmm0, xmm2 movdqu [edx], xmm0 lea edx, [edx + 16] @@ -1033,16 +1045,16 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, } // Blends 8x2 rectangle to 4x1. -__declspec(naked) -void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { +__declspec(naked) void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width) { __asm { push esi - mov eax, [esp + 4 + 4] // src_argb - mov esi, [esp + 4 + 8] // src_stride - mov edx, [esp + 4 + 12] // dst_argb - mov ecx, [esp + 4 + 16] // dst_width + mov eax, [esp + 4 + 4] // src_argb + mov esi, [esp + 4 + 8] // src_stride + mov edx, [esp + 4 + 12] // dst_argb + mov ecx, [esp + 4 + 16] // dst_width wloop: movdqu xmm0, [eax] @@ -1050,11 +1062,11 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, movdqu xmm2, [eax + esi] movdqu xmm3, [eax + esi + 16] lea eax, [eax + 32] - pavgb xmm0, xmm2 // average rows + pavgb xmm0, xmm2 // average rows pavgb xmm1, xmm3 - movdqa xmm2, xmm0 // average columns (8 to 4 pixels) - shufps xmm0, xmm1, 0x88 // even pixels - shufps xmm2, xmm1, 0xdd // odd pixels + movdqa xmm2, xmm0 // average columns (8 to 4 pixels) + shufps xmm0, xmm1, 0x88 // even pixels + shufps xmm2, xmm1, 0xdd // odd pixels pavgb xmm0, xmm2 movdqu [edx], xmm0 lea edx, [edx + 16] @@ -1067,18 +1079,19 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, } // Reads 4 pixels at a time. -__declspec(naked) -void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width) { +__declspec(naked) void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width) { __asm { push ebx push edi - mov eax, [esp + 8 + 4] // src_argb - // src_stride ignored - mov ebx, [esp + 8 + 12] // src_stepx - mov edx, [esp + 8 + 16] // dst_argb - mov ecx, [esp + 8 + 20] // dst_width + mov eax, [esp + 8 + 4] // src_argb + // src_stride ignored + mov ebx, [esp + 8 + 12] // src_stepx + mov edx, [esp + 8 + 16] // dst_argb + mov ecx, [esp + 8 + 20] // dst_width lea ebx, [ebx * 4] lea edi, [ebx + ebx * 2] @@ -1103,21 +1116,21 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, } // Blends four 2x2 to 4x1. -__declspec(naked) -void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width) { +__declspec(naked) void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width) { __asm { push ebx push esi push edi - mov eax, [esp + 12 + 4] // src_argb - mov esi, [esp + 12 + 8] // src_stride - mov ebx, [esp + 12 + 12] // src_stepx - mov edx, [esp + 12 + 16] // dst_argb - mov ecx, [esp + 12 + 20] // dst_width - lea esi, [eax + esi] // row1 pointer + mov eax, [esp + 12 + 4] // src_argb + mov esi, [esp + 12 + 8] // src_stride + mov ebx, [esp + 12 + 12] // src_stepx + mov edx, [esp + 12 + 16] // dst_argb + mov ecx, [esp + 12 + 20] // dst_width + lea esi, [eax + esi] // row1 pointer lea ebx, [ebx * 4] lea edi, [ebx + ebx * 2] @@ -1132,11 +1145,11 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, movq xmm3, qword ptr [esi + ebx * 2] movhps xmm3, qword ptr [esi + edi] lea esi, [esi + ebx * 4] - pavgb xmm0, xmm2 // average rows + pavgb xmm0, xmm2 // average rows pavgb xmm1, xmm3 - movdqa xmm2, xmm0 // average columns (8 to 4 pixels) - shufps xmm0, xmm1, 0x88 // even pixels - shufps xmm2, xmm1, 0xdd // odd pixels + movdqa xmm2, xmm0 // average columns (8 to 4 pixels) + shufps xmm0, xmm1, 0x88 // even pixels + shufps xmm2, xmm1, 0xdd // odd pixels pavgb xmm0, xmm2 movdqu [edx], xmm0 lea edx, [edx + 16] @@ -1151,64 +1164,66 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, } // Column scaling unfiltered. SSE2 version. -__declspec(naked) -void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { +__declspec(naked) void ScaleARGBCols_SSE2(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { __asm { push edi push esi - mov edi, [esp + 8 + 4] // dst_argb - mov esi, [esp + 8 + 8] // src_argb - mov ecx, [esp + 8 + 12] // dst_width + mov edi, [esp + 8 + 4] // dst_argb + mov esi, [esp + 8 + 8] // src_argb + mov ecx, [esp + 8 + 12] // dst_width movd xmm2, [esp + 8 + 16] // x movd xmm3, [esp + 8 + 20] // dx - pshufd xmm2, xmm2, 0 // x0 x0 x0 x0 - pshufd xmm0, xmm3, 0x11 // dx 0 dx 0 + pshufd xmm2, xmm2, 0 // x0 x0 x0 x0 + pshufd xmm0, xmm3, 0x11 // dx 0 dx 0 paddd xmm2, xmm0 - paddd xmm3, xmm3 // 0, 0, 0, dx * 2 - pshufd xmm0, xmm3, 0x05 // dx * 2, dx * 2, 0, 0 - paddd xmm2, xmm0 // x3 x2 x1 x0 - paddd xmm3, xmm3 // 0, 0, 0, dx * 4 - pshufd xmm3, xmm3, 0 // dx * 4, dx * 4, dx * 4, dx * 4 + paddd xmm3, xmm3 // 0, 0, 0, dx * 2 + pshufd xmm0, xmm3, 0x05 // dx * 2, dx * 2, 0, 0 + paddd xmm2, xmm0 // x3 x2 x1 x0 + paddd xmm3, xmm3 // 0, 0, 0, dx * 4 + pshufd xmm3, xmm3, 0 // dx * 4, dx * 4, dx * 4, dx * 4 - pextrw eax, xmm2, 1 // get x0 integer. - pextrw edx, xmm2, 3 // get x1 integer. + pextrw eax, xmm2, 1 // get x0 integer. + pextrw edx, xmm2, 3 // get x1 integer. cmp ecx, 0 jle xloop99 sub ecx, 4 jl xloop49 - // 4 Pixel loop. + // 4 Pixel loop. xloop4: movd xmm0, [esi + eax * 4] // 1 source x0 pixels movd xmm1, [esi + edx * 4] // 1 source x1 pixels - pextrw eax, xmm2, 5 // get x2 integer. - pextrw edx, xmm2, 7 // get x3 integer. - paddd xmm2, xmm3 // x += dx - punpckldq xmm0, xmm1 // x0 x1 + pextrw eax, xmm2, 5 // get x2 integer. + pextrw edx, xmm2, 7 // get x3 integer. + paddd xmm2, xmm3 // x += dx + punpckldq xmm0, xmm1 // x0 x1 movd xmm1, [esi + eax * 4] // 1 source x2 pixels movd xmm4, [esi + edx * 4] // 1 source x3 pixels - pextrw eax, xmm2, 1 // get x0 integer. next iteration. - pextrw edx, xmm2, 3 // get x1 integer. next iteration. - punpckldq xmm1, xmm4 // x2 x3 - punpcklqdq xmm0, xmm1 // x0 x1 x2 x3 + pextrw eax, xmm2, 1 // get x0 integer. next iteration. + pextrw edx, xmm2, 3 // get x1 integer. next iteration. + punpckldq xmm1, xmm4 // x2 x3 + punpcklqdq xmm0, xmm1 // x0 x1 x2 x3 movdqu [edi], xmm0 lea edi, [edi + 16] - sub ecx, 4 // 4 pixels + sub ecx, 4 // 4 pixels jge xloop4 xloop49: test ecx, 2 je xloop29 - // 2 Pixels. + // 2 Pixels. movd xmm0, [esi + eax * 4] // 1 source x0 pixels movd xmm1, [esi + edx * 4] // 1 source x1 pixels - pextrw eax, xmm2, 5 // get x2 integer. - punpckldq xmm0, xmm1 // x0 x1 + pextrw eax, xmm2, 5 // get x2 integer. + punpckldq xmm0, xmm1 // x0 x1 movq qword ptr [edi], xmm0 lea edi, [edi + 8] @@ -1217,7 +1232,7 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, test ecx, 1 je xloop99 - // 1 Pixels. + // 1 Pixels. movd xmm0, [esi + eax * 4] // 1 source x2 pixels movd dword ptr [edi], xmm0 xloop99: @@ -1232,60 +1247,62 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, // TODO(fbarchard): Port to Neon // Shuffle table for arranging 2 pixels into pairs for pmaddubsw -static uvec8 kShuffleColARGB = { - 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel - 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel +static const uvec8 kShuffleColARGB = { + 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel + 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel }; // Shuffle table for duplicating 2 fractions into 8 bytes each -static uvec8 kShuffleFractions = { - 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, +static const uvec8 kShuffleFractions = { + 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, }; -__declspec(naked) -void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { +__declspec(naked) void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { __asm { push esi push edi - mov edi, [esp + 8 + 4] // dst_argb - mov esi, [esp + 8 + 8] // src_argb - mov ecx, [esp + 8 + 12] // dst_width + mov edi, [esp + 8 + 4] // dst_argb + mov esi, [esp + 8 + 8] // src_argb + mov ecx, [esp + 8 + 12] // dst_width movd xmm2, [esp + 8 + 16] // x movd xmm3, [esp + 8 + 20] // dx movdqa xmm4, xmmword ptr kShuffleColARGB movdqa xmm5, xmmword ptr kShuffleFractions - pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction. + pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction. psrlw xmm6, 9 - pextrw eax, xmm2, 1 // get x0 integer. preroll + pextrw eax, xmm2, 1 // get x0 integer. preroll sub ecx, 2 jl xloop29 - movdqa xmm0, xmm2 // x1 = x0 + dx + movdqa xmm0, xmm2 // x1 = x0 + dx paddd xmm0, xmm3 - punpckldq xmm2, xmm0 // x0 x1 - punpckldq xmm3, xmm3 // dx dx - paddd xmm3, xmm3 // dx * 2, dx * 2 - pextrw edx, xmm2, 3 // get x1 integer. preroll + punpckldq xmm2, xmm0 // x0 x1 + punpckldq xmm3, xmm3 // dx dx + paddd xmm3, xmm3 // dx * 2, dx * 2 + pextrw edx, xmm2, 3 // get x1 integer. preroll // 2 Pixel loop. xloop2: - movdqa xmm1, xmm2 // x0, x1 fractions. - paddd xmm2, xmm3 // x += dx + movdqa xmm1, xmm2 // x0, x1 fractions. + paddd xmm2, xmm3 // x += dx movq xmm0, qword ptr [esi + eax * 4] // 2 source x0 pixels - psrlw xmm1, 9 // 7 bit fractions. + psrlw xmm1, 9 // 7 bit fractions. movhps xmm0, qword ptr [esi + edx * 4] // 2 source x1 pixels - pshufb xmm1, xmm5 // 0000000011111111 - pshufb xmm0, xmm4 // arrange pixels into pairs - pxor xmm1, xmm6 // 0..7f and 7f..0 - pmaddubsw xmm0, xmm1 // argb_argb 16 bit, 2 pixels. - pextrw eax, xmm2, 1 // get x0 integer. next iteration. - pextrw edx, xmm2, 3 // get x1 integer. next iteration. - psrlw xmm0, 7 // argb 8.7 fixed point to low 8 bits. - packuswb xmm0, xmm0 // argb_argb 8 bits, 2 pixels. + pshufb xmm1, xmm5 // 0000000011111111 + pshufb xmm0, xmm4 // arrange pixels into pairs + pxor xmm1, xmm6 // 0..7f and 7f..0 + pmaddubsw xmm0, xmm1 // argb_argb 16 bit, 2 pixels. + pextrw eax, xmm2, 1 // get x0 integer. next iteration. + pextrw edx, xmm2, 3 // get x1 integer. next iteration. + psrlw xmm0, 7 // argb 8.7 fixed point to low 8 bits. + packuswb xmm0, xmm0 // argb_argb 8 bits, 2 pixels. movq qword ptr [edi], xmm0 lea edi, [edi + 8] - sub ecx, 2 // 2 pixels + sub ecx, 2 // 2 pixels jge xloop2 xloop29: @@ -1293,15 +1310,15 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, add ecx, 2 - 1 jl xloop99 - // 1 pixel remainder - psrlw xmm2, 9 // 7 bit fractions. + // 1 pixel remainder + psrlw xmm2, 9 // 7 bit fractions. movq xmm0, qword ptr [esi + eax * 4] // 2 source x0 pixels - pshufb xmm2, xmm5 // 00000000 - pshufb xmm0, xmm4 // arrange pixels into pairs - pxor xmm2, xmm6 // 0..7f and 7f..0 - pmaddubsw xmm0, xmm2 // argb 16 bit, 1 pixel. + pshufb xmm2, xmm5 // 00000000 + pshufb xmm0, xmm4 // arrange pixels into pairs + pxor xmm2, xmm6 // 0..7f and 7f..0 + pmaddubsw xmm0, xmm2 // argb 16 bit, 1 pixel. psrlw xmm0, 7 - packuswb xmm0, xmm0 // argb 8 bits, 1 pixel. + packuswb xmm0, xmm0 // argb 8 bits, 1 pixel. movd [edi], xmm0 xloop99: @@ -1313,13 +1330,15 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, } // Reads 4 pixels, duplicates them and writes 8 pixels. -__declspec(naked) -void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { +__declspec(naked) void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx) { __asm { - mov edx, [esp + 4] // dst_argb - mov eax, [esp + 8] // src_argb - mov ecx, [esp + 12] // dst_width + mov edx, [esp + 4] // dst_argb + mov eax, [esp + 8] // src_argb + mov ecx, [esp + 12] // dst_width wloop: movdqu xmm0, [eax] @@ -1338,12 +1357,11 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, } // Divide num by div and return as 16.16 fixed point result. -__declspec(naked) -int FixedDiv_X86(int num, int div) { +__declspec(naked) int FixedDiv_X86(int num, int div) { __asm { - mov eax, [esp + 4] // num - cdq // extend num to 64 bits - shld edx, eax, 16 // 32.16 + mov eax, [esp + 4] // num + cdq // extend num to 64 bits + shld edx, eax, 16 // 32.16 shl eax, 16 idiv dword ptr [esp + 8] ret @@ -1351,13 +1369,12 @@ int FixedDiv_X86(int num, int div) { } // Divide num by div and return as 16.16 fixed point result. -__declspec(naked) -int FixedDiv1_X86(int num, int div) { +__declspec(naked) int FixedDiv1_X86(int num, int div) { __asm { - mov eax, [esp + 4] // num - mov ecx, [esp + 8] // denom - cdq // extend num to 64 bits - shld edx, eax, 16 // 32.16 + mov eax, [esp + 4] // num + mov ecx, [esp + 8] // denom + cdq // extend num to 64 bits + shld edx, eax, 16 // 32.16 shl eax, 16 sub eax, 0x00010001 sbb edx, 0 diff --git a/media/libvpx/libvpx/third_party/libyuv/source/video_common.cc b/media/libvpx/libvpx/third_party/libyuv/source/video_common.cc index 00fb71e18bfb..92384c050cda 100644 --- a/media/libvpx/libvpx/third_party/libyuv/source/video_common.cc +++ b/media/libvpx/libvpx/third_party/libyuv/source/video_common.cc @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include "libyuv/video_common.h" #ifdef __cplusplus @@ -16,40 +15,39 @@ namespace libyuv { extern "C" { #endif -#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0])) - struct FourCCAliasEntry { - uint32 alias; - uint32 canonical; + uint32_t alias; + uint32_t canonical; }; -static const struct FourCCAliasEntry kFourCCAliases[] = { - {FOURCC_IYUV, FOURCC_I420}, - {FOURCC_YU12, FOURCC_I420}, - {FOURCC_YU16, FOURCC_I422}, - {FOURCC_YU24, FOURCC_I444}, - {FOURCC_YUYV, FOURCC_YUY2}, - {FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs - {FOURCC_HDYC, FOURCC_UYVY}, - {FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8 - {FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not. - {FOURCC_DMB1, FOURCC_MJPG}, - {FOURCC_BA81, FOURCC_BGGR}, // deprecated. - {FOURCC_RGB3, FOURCC_RAW }, - {FOURCC_BGR3, FOURCC_24BG}, - {FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB - {FOURCC_CM24, FOURCC_RAW }, // kCMPixelFormat_24RGB - {FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555 - {FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565 - {FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551 +#define NUM_ALIASES 18 +static const struct FourCCAliasEntry kFourCCAliases[NUM_ALIASES] = { + {FOURCC_IYUV, FOURCC_I420}, + {FOURCC_YU12, FOURCC_I420}, + {FOURCC_YU16, FOURCC_I422}, + {FOURCC_YU24, FOURCC_I444}, + {FOURCC_YUYV, FOURCC_YUY2}, + {FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs + {FOURCC_HDYC, FOURCC_UYVY}, + {FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8 + {FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not. + {FOURCC_DMB1, FOURCC_MJPG}, + {FOURCC_BA81, FOURCC_BGGR}, // deprecated. + {FOURCC_RGB3, FOURCC_RAW}, + {FOURCC_BGR3, FOURCC_24BG}, + {FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB + {FOURCC_CM24, FOURCC_RAW}, // kCMPixelFormat_24RGB + {FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555 + {FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565 + {FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551 }; // TODO(fbarchard): Consider mapping kCMPixelFormat_32BGRA to FOURCC_ARGB. // {FOURCC_BGRA, FOURCC_ARGB}, // kCMPixelFormat_32BGRA LIBYUV_API -uint32 CanonicalFourCC(uint32 fourcc) { +uint32_t CanonicalFourCC(uint32_t fourcc) { int i; - for (i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) { + for (i = 0; i < NUM_ALIASES; ++i) { if (kFourCCAliases[i].alias == fourcc) { return kFourCCAliases[i].canonical; } @@ -62,4 +60,3 @@ uint32 CanonicalFourCC(uint32 fourcc) { } // extern "C" } // namespace libyuv #endif - diff --git a/media/libvpx/libvpx/third_party/x86inc/README.libvpx b/media/libvpx/libvpx/third_party/x86inc/README.libvpx index 8d3cd966da13..36735ffbb1a4 100644 --- a/media/libvpx/libvpx/third_party/x86inc/README.libvpx +++ b/media/libvpx/libvpx/third_party/x86inc/README.libvpx @@ -18,3 +18,4 @@ Copy PIC 'GLOBAL' macros from x86_abi_support.asm Use .text instead of .rodata on macho to avoid broken tables in PIC mode. Use .text with no alignment for aout Only use 'hidden' visibility with Chromium +Prefix ARCH_* with VPX_. diff --git a/media/libvpx/libvpx/third_party/x86inc/x86inc.asm b/media/libvpx/libvpx/third_party/x86inc/x86inc.asm index b647dff2f88b..3d722fec08f2 100644 --- a/media/libvpx/libvpx/third_party/x86inc/x86inc.asm +++ b/media/libvpx/libvpx/third_party/x86inc/x86inc.asm @@ -45,7 +45,7 @@ %endif %ifndef STACK_ALIGNMENT - %if ARCH_X86_64 + %if VPX_ARCH_X86_64 %define STACK_ALIGNMENT 16 %else %define STACK_ALIGNMENT 4 @@ -54,7 +54,7 @@ %define WIN64 0 %define UNIX64 0 -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 %ifidn __OUTPUT_FORMAT__,win32 %define WIN64 1 %elifidn __OUTPUT_FORMAT__,win64 @@ -165,7 +165,7 @@ %endif %endif - %if ARCH_X86_64 == 0 + %if VPX_ARCH_X86_64 == 0 %undef PIC %endif @@ -260,7 +260,7 @@ %if %0 == 2 %define r%1m %2d %define r%1mp %2 - %elif ARCH_X86_64 ; memory + %elif VPX_ARCH_X86_64 ; memory %define r%1m [rstk + stack_offset + %3] %define r%1mp qword r %+ %1 %+ m %else @@ -281,7 +281,7 @@ %define e%1h %3 %define r%1b %2 %define e%1b %2 - %if ARCH_X86_64 == 0 + %if VPX_ARCH_X86_64 == 0 %define r%1 e%1 %endif %endmacro @@ -318,7 +318,7 @@ DECLARE_REG_SIZE bp, bpl, null DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 %define gprsize 8 %else %define gprsize 4 @@ -485,7 +485,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 %if %1 > 0 %assign regs_used (regs_used + 1) %endif - %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3 + %if VPX_ARCH_X86_64 && regs_used < 5 + UNIX64 * 3 ; Ensure that we don't clobber any registers containing arguments %assign regs_used 5 + UNIX64 * 3 %endif @@ -607,7 +607,7 @@ DECLARE_REG 14, R15, 120 AUTO_REP_RET %endmacro -%elif ARCH_X86_64 ; *nix x64 ;============================================= +%elif VPX_ARCH_X86_64 ; *nix x64 ;============================================= DECLARE_REG 0, rdi DECLARE_REG 1, rsi @@ -948,7 +948,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, %endif %endif - %if ARCH_X86_64 || cpuflag(sse2) + %if VPX_ARCH_X86_64 || cpuflag(sse2) %ifdef __NASM_VER__ ALIGNMODE k8 %else @@ -1005,7 +1005,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, %define RESET_MM_PERMUTATION INIT_XMM %1 %define mmsize 16 %define num_mmregs 8 - %if ARCH_X86_64 + %if VPX_ARCH_X86_64 %define num_mmregs 16 %endif %define mova movdqa @@ -1026,7 +1026,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, %define RESET_MM_PERMUTATION INIT_YMM %1 %define mmsize 32 %define num_mmregs 8 - %if ARCH_X86_64 + %if VPX_ARCH_X86_64 %define num_mmregs 16 %endif %define mova movdqa @@ -1637,7 +1637,7 @@ FMA4_INSTR fnmsub, pd, ps, sd, ss ; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0) %ifdef __YASM_VER__ - %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0 + %if __YASM_VERSION_ID__ < 0x01030000 && VPX_ARCH_X86_64 == 0 %macro vpbroadcastq 2 %if sizeof%1 == 16 movddup %1, %2 diff --git a/media/libvpx/libvpx/tools/cpplint.py b/media/libvpx/libvpx/tools/cpplint.py deleted file mode 100755 index 25fbef73d8ed..000000000000 --- a/media/libvpx/libvpx/tools/cpplint.py +++ /dev/null @@ -1,4756 +0,0 @@ -#!/usr/bin/python -# -# Copyright (c) 2009 Google Inc. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following disclaimer -# in the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Google Inc. nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -"""Does google-lint on c++ files. - -The goal of this script is to identify places in the code that *may* -be in non-compliance with google style. It does not attempt to fix -up these problems -- the point is to educate. It does also not -attempt to find all problems, or to ensure that everything it does -find is legitimately a problem. - -In particular, we can get very confused by /* and // inside strings! -We do a small hack, which is to ignore //'s with "'s after them on the -same line, but it is far from perfect (in either direction). -""" - -import codecs -import copy -import getopt -import math # for log -import os -import re -import sre_compile -import string -import sys -import unicodedata - - -_USAGE = """ -Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...] - [--counting=total|toplevel|detailed] [--root=subdir] - [--linelength=digits] - [file] ... - - The style guidelines this tries to follow are those in - http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml - - Every problem is given a confidence score from 1-5, with 5 meaning we are - certain of the problem, and 1 meaning it could be a legitimate construct. - This will miss some errors, and is not a substitute for a code review. - - To suppress false-positive errors of a certain category, add a - 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*) - suppresses errors of all categories on that line. - - The files passed in will be linted; at least one file must be provided. - Default linted extensions are .cc, .cpp, .cu, .cuh and .h. Change the - extensions with the --extensions flag. - - Flags: - - output=vs7 - By default, the output is formatted to ease emacs parsing. Visual Studio - compatible output (vs7) may also be used. Other formats are unsupported. - - verbose=# - Specify a number 0-5 to restrict errors to certain verbosity levels. - - filter=-x,+y,... - Specify a comma-separated list of category-filters to apply: only - error messages whose category names pass the filters will be printed. - (Category names are printed with the message and look like - "[whitespace/indent]".) Filters are evaluated left to right. - "-FOO" and "FOO" means "do not print categories that start with FOO". - "+FOO" means "do print categories that start with FOO". - - Examples: --filter=-whitespace,+whitespace/braces - --filter=whitespace,runtime/printf,+runtime/printf_format - --filter=-,+build/include_what_you_use - - To see a list of all the categories used in cpplint, pass no arg: - --filter= - - counting=total|toplevel|detailed - The total number of errors found is always printed. If - 'toplevel' is provided, then the count of errors in each of - the top-level categories like 'build' and 'whitespace' will - also be printed. If 'detailed' is provided, then a count - is provided for each category like 'build/class'. - - root=subdir - The root directory used for deriving header guard CPP variable. - By default, the header guard CPP variable is calculated as the relative - path to the directory that contains .git, .hg, or .svn. When this flag - is specified, the relative path is calculated from the specified - directory. If the specified directory does not exist, this flag is - ignored. - - Examples: - Assuing that src/.git exists, the header guard CPP variables for - src/chrome/browser/ui/browser.h are: - - No flag => CHROME_BROWSER_UI_BROWSER_H_ - --root=chrome => BROWSER_UI_BROWSER_H_ - --root=chrome/browser => UI_BROWSER_H_ - - linelength=digits - This is the allowed line length for the project. The default value is - 80 characters. - - Examples: - --linelength=120 - - extensions=extension,extension,... - The allowed file extensions that cpplint will check - - Examples: - --extensions=hpp,cpp -""" - -# We categorize each error message we print. Here are the categories. -# We want an explicit list so we can list them all in cpplint --filter=. -# If you add a new error message with a new category, add it to the list -# here! cpplint_unittest.py should tell you if you forget to do this. -_ERROR_CATEGORIES = [ - 'build/class', - 'build/deprecated', - 'build/endif_comment', - 'build/explicit_make_pair', - 'build/forward_decl', - 'build/header_guard', - 'build/include', - 'build/include_alpha', - 'build/include_order', - 'build/include_what_you_use', - 'build/namespaces', - 'build/printf_format', - 'build/storage_class', - 'legal/copyright', - 'readability/alt_tokens', - 'readability/braces', - 'readability/casting', - 'readability/check', - 'readability/constructors', - 'readability/fn_size', - 'readability/function', - 'readability/multiline_comment', - 'readability/multiline_string', - 'readability/namespace', - 'readability/nolint', - 'readability/nul', - 'readability/streams', - 'readability/todo', - 'readability/utf8', - 'runtime/arrays', - 'runtime/casting', - 'runtime/explicit', - 'runtime/int', - 'runtime/init', - 'runtime/invalid_increment', - 'runtime/member_string_references', - 'runtime/memset', - 'runtime/operator', - 'runtime/printf', - 'runtime/printf_format', - 'runtime/references', - 'runtime/sizeof', - 'runtime/string', - 'runtime/threadsafe_fn', - 'runtime/vlog', - 'whitespace/blank_line', - 'whitespace/braces', - 'whitespace/comma', - 'whitespace/comments', - 'whitespace/empty_conditional_body', - 'whitespace/empty_loop_body', - 'whitespace/end_of_line', - 'whitespace/ending_newline', - 'whitespace/forcolon', - 'whitespace/indent', - 'whitespace/line_length', - 'whitespace/newline', - 'whitespace/operators', - 'whitespace/parens', - 'whitespace/semicolon', - 'whitespace/tab', - 'whitespace/todo' - ] - -# The default state of the category filter. This is overrided by the --filter= -# flag. By default all errors are on, so only add here categories that should be -# off by default (i.e., categories that must be enabled by the --filter= flags). -# All entries here should start with a '-' or '+', as in the --filter= flag. -_DEFAULT_FILTERS = ['-build/include_alpha'] - -# We used to check for high-bit characters, but after much discussion we -# decided those were OK, as long as they were in UTF-8 and didn't represent -# hard-coded international strings, which belong in a separate i18n file. - - -# C++ headers -_CPP_HEADERS = frozenset([ - # Legacy - 'algobase.h', - 'algo.h', - 'alloc.h', - 'builtinbuf.h', - 'bvector.h', - 'complex.h', - 'defalloc.h', - 'deque.h', - 'editbuf.h', - 'fstream.h', - 'function.h', - 'hash_map', - 'hash_map.h', - 'hash_set', - 'hash_set.h', - 'hashtable.h', - 'heap.h', - 'indstream.h', - 'iomanip.h', - 'iostream.h', - 'istream.h', - 'iterator.h', - 'list.h', - 'map.h', - 'multimap.h', - 'multiset.h', - 'ostream.h', - 'pair.h', - 'parsestream.h', - 'pfstream.h', - 'procbuf.h', - 'pthread_alloc', - 'pthread_alloc.h', - 'rope', - 'rope.h', - 'ropeimpl.h', - 'set.h', - 'slist', - 'slist.h', - 'stack.h', - 'stdiostream.h', - 'stl_alloc.h', - 'stl_relops.h', - 'streambuf.h', - 'stream.h', - 'strfile.h', - 'strstream.h', - 'tempbuf.h', - 'tree.h', - 'type_traits.h', - 'vector.h', - # 17.6.1.2 C++ library headers - 'algorithm', - 'array', - 'atomic', - 'bitset', - 'chrono', - 'codecvt', - 'complex', - 'condition_variable', - 'deque', - 'exception', - 'forward_list', - 'fstream', - 'functional', - 'future', - 'initializer_list', - 'iomanip', - 'ios', - 'iosfwd', - 'iostream', - 'istream', - 'iterator', - 'limits', - 'list', - 'locale', - 'map', - 'memory', - 'mutex', - 'new', - 'numeric', - 'ostream', - 'queue', - 'random', - 'ratio', - 'regex', - 'set', - 'sstream', - 'stack', - 'stdexcept', - 'streambuf', - 'string', - 'strstream', - 'system_error', - 'thread', - 'tuple', - 'typeindex', - 'typeinfo', - 'type_traits', - 'unordered_map', - 'unordered_set', - 'utility', - 'valarray', - 'vector', - # 17.6.1.2 C++ headers for C library facilities - 'cassert', - 'ccomplex', - 'cctype', - 'cerrno', - 'cfenv', - 'cfloat', - 'cinttypes', - 'ciso646', - 'climits', - 'clocale', - 'cmath', - 'csetjmp', - 'csignal', - 'cstdalign', - 'cstdarg', - 'cstdbool', - 'cstddef', - 'cstdint', - 'cstdio', - 'cstdlib', - 'cstring', - 'ctgmath', - 'ctime', - 'cuchar', - 'cwchar', - 'cwctype', - ]) - -# Assertion macros. These are defined in base/logging.h and -# testing/base/gunit.h. Note that the _M versions need to come first -# for substring matching to work. -_CHECK_MACROS = [ - 'DCHECK', 'CHECK', - 'EXPECT_TRUE_M', 'EXPECT_TRUE', - 'ASSERT_TRUE_M', 'ASSERT_TRUE', - 'EXPECT_FALSE_M', 'EXPECT_FALSE', - 'ASSERT_FALSE_M', 'ASSERT_FALSE', - ] - -# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE -_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS]) - -for op, replacement in [('==', 'EQ'), ('!=', 'NE'), - ('>=', 'GE'), ('>', 'GT'), - ('<=', 'LE'), ('<', 'LT')]: - _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement - _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement - _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement - _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement - _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement - _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement - -for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), - ('>=', 'LT'), ('>', 'LE'), - ('<=', 'GT'), ('<', 'GE')]: - _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement - _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement - _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement - _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement - -# Alternative tokens and their replacements. For full list, see section 2.5 -# Alternative tokens [lex.digraph] in the C++ standard. -# -# Digraphs (such as '%:') are not included here since it's a mess to -# match those on a word boundary. -_ALT_TOKEN_REPLACEMENT = { - 'and': '&&', - 'bitor': '|', - 'or': '||', - 'xor': '^', - 'compl': '~', - 'bitand': '&', - 'and_eq': '&=', - 'or_eq': '|=', - 'xor_eq': '^=', - 'not': '!', - 'not_eq': '!=' - } - -# Compile regular expression that matches all the above keywords. The "[ =()]" -# bit is meant to avoid matching these keywords outside of boolean expressions. -# -# False positives include C-style multi-line comments and multi-line strings -# but those have always been troublesome for cpplint. -_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile( - r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)') - - -# These constants define types of headers for use with -# _IncludeState.CheckNextIncludeOrder(). -_C_SYS_HEADER = 1 -_CPP_SYS_HEADER = 2 -_LIKELY_MY_HEADER = 3 -_POSSIBLE_MY_HEADER = 4 -_OTHER_HEADER = 5 - -# These constants define the current inline assembly state -_NO_ASM = 0 # Outside of inline assembly block -_INSIDE_ASM = 1 # Inside inline assembly block -_END_ASM = 2 # Last line of inline assembly block -_BLOCK_ASM = 3 # The whole block is an inline assembly block - -# Match start of assembly blocks -_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)' - r'(?:\s+(volatile|__volatile__))?' - r'\s*[{(]') - - -_regexp_compile_cache = {} - -# Finds occurrences of NOLINT or NOLINT(...). -_RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?') - -# {str, set(int)}: a map from error categories to sets of linenumbers -# on which those errors are expected and should be suppressed. -_error_suppressions = {} - -# The root directory used for deriving header guard CPP variable. -# This is set by --root flag. -_root = None - -# The allowed line length of files. -# This is set by --linelength flag. -_line_length = 80 - -# The allowed extensions for file names -# This is set by --extensions flag. -_valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh']) - -def ParseNolintSuppressions(filename, raw_line, linenum, error): - """Updates the global list of error-suppressions. - - Parses any NOLINT comments on the current line, updating the global - error_suppressions store. Reports an error if the NOLINT comment - was malformed. - - Args: - filename: str, the name of the input file. - raw_line: str, the line of input text, with comments. - linenum: int, the number of the current line. - error: function, an error handler. - """ - # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*). - matched = _RE_SUPPRESSION.search(raw_line) - if matched: - category = matched.group(1) - if category in (None, '(*)'): # => "suppress all" - _error_suppressions.setdefault(None, set()).add(linenum) - else: - if category.startswith('(') and category.endswith(')'): - category = category[1:-1] - if category in _ERROR_CATEGORIES: - _error_suppressions.setdefault(category, set()).add(linenum) - else: - error(filename, linenum, 'readability/nolint', 5, - 'Unknown NOLINT error category: %s' % category) - - -def ResetNolintSuppressions(): - "Resets the set of NOLINT suppressions to empty." - _error_suppressions.clear() - - -def IsErrorSuppressedByNolint(category, linenum): - """Returns true if the specified error category is suppressed on this line. - - Consults the global error_suppressions map populated by - ParseNolintSuppressions/ResetNolintSuppressions. - - Args: - category: str, the category of the error. - linenum: int, the current line number. - Returns: - bool, True iff the error should be suppressed due to a NOLINT comment. - """ - return (linenum in _error_suppressions.get(category, set()) or - linenum in _error_suppressions.get(None, set())) - -def Match(pattern, s): - """Matches the string with the pattern, caching the compiled regexp.""" - # The regexp compilation caching is inlined in both Match and Search for - # performance reasons; factoring it out into a separate function turns out - # to be noticeably expensive. - if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = sre_compile.compile(pattern) - return _regexp_compile_cache[pattern].match(s) - - -def ReplaceAll(pattern, rep, s): - """Replaces instances of pattern in a string with a replacement. - - The compiled regex is kept in a cache shared by Match and Search. - - Args: - pattern: regex pattern - rep: replacement text - s: search string - - Returns: - string with replacements made (or original string if no replacements) - """ - if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = sre_compile.compile(pattern) - return _regexp_compile_cache[pattern].sub(rep, s) - - -def Search(pattern, s): - """Searches the string for the pattern, caching the compiled regexp.""" - if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = sre_compile.compile(pattern) - return _regexp_compile_cache[pattern].search(s) - - -class _IncludeState(dict): - """Tracks line numbers for includes, and the order in which includes appear. - - As a dict, an _IncludeState object serves as a mapping between include - filename and line number on which that file was included. - - Call CheckNextIncludeOrder() once for each header in the file, passing - in the type constants defined above. Calls in an illegal order will - raise an _IncludeError with an appropriate error message. - - """ - # self._section will move monotonically through this set. If it ever - # needs to move backwards, CheckNextIncludeOrder will raise an error. - _INITIAL_SECTION = 0 - _MY_H_SECTION = 1 - _C_SECTION = 2 - _CPP_SECTION = 3 - _OTHER_H_SECTION = 4 - - _TYPE_NAMES = { - _C_SYS_HEADER: 'C system header', - _CPP_SYS_HEADER: 'C++ system header', - _LIKELY_MY_HEADER: 'header this file implements', - _POSSIBLE_MY_HEADER: 'header this file may implement', - _OTHER_HEADER: 'other header', - } - _SECTION_NAMES = { - _INITIAL_SECTION: "... nothing. (This can't be an error.)", - _MY_H_SECTION: 'a header this file implements', - _C_SECTION: 'C system header', - _CPP_SECTION: 'C++ system header', - _OTHER_H_SECTION: 'other header', - } - - def __init__(self): - dict.__init__(self) - self.ResetSection() - - def ResetSection(self): - # The name of the current section. - self._section = self._INITIAL_SECTION - # The path of last found header. - self._last_header = '' - - def SetLastHeader(self, header_path): - self._last_header = header_path - - def CanonicalizeAlphabeticalOrder(self, header_path): - """Returns a path canonicalized for alphabetical comparison. - - - replaces "-" with "_" so they both cmp the same. - - removes '-inl' since we don't require them to be after the main header. - - lowercase everything, just in case. - - Args: - header_path: Path to be canonicalized. - - Returns: - Canonicalized path. - """ - return header_path.replace('-inl.h', '.h').replace('-', '_').lower() - - def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path): - """Check if a header is in alphabetical order with the previous header. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - header_path: Canonicalized header to be checked. - - Returns: - Returns true if the header is in alphabetical order. - """ - # If previous section is different from current section, _last_header will - # be reset to empty string, so it's always less than current header. - # - # If previous line was a blank line, assume that the headers are - # intentionally sorted the way they are. - if (self._last_header > header_path and - not Match(r'^\s*$', clean_lines.elided[linenum - 1])): - return False - return True - - def CheckNextIncludeOrder(self, header_type): - """Returns a non-empty error message if the next header is out of order. - - This function also updates the internal state to be ready to check - the next include. - - Args: - header_type: One of the _XXX_HEADER constants defined above. - - Returns: - The empty string if the header is in the right order, or an - error message describing what's wrong. - - """ - error_message = ('Found %s after %s' % - (self._TYPE_NAMES[header_type], - self._SECTION_NAMES[self._section])) - - last_section = self._section - - if header_type == _C_SYS_HEADER: - if self._section <= self._C_SECTION: - self._section = self._C_SECTION - else: - self._last_header = '' - return error_message - elif header_type == _CPP_SYS_HEADER: - if self._section <= self._CPP_SECTION: - self._section = self._CPP_SECTION - else: - self._last_header = '' - return error_message - elif header_type == _LIKELY_MY_HEADER: - if self._section <= self._MY_H_SECTION: - self._section = self._MY_H_SECTION - else: - self._section = self._OTHER_H_SECTION - elif header_type == _POSSIBLE_MY_HEADER: - if self._section <= self._MY_H_SECTION: - self._section = self._MY_H_SECTION - else: - # This will always be the fallback because we're not sure - # enough that the header is associated with this file. - self._section = self._OTHER_H_SECTION - else: - assert header_type == _OTHER_HEADER - self._section = self._OTHER_H_SECTION - - if last_section != self._section: - self._last_header = '' - - return '' - - -class _CppLintState(object): - """Maintains module-wide state..""" - - def __init__(self): - self.verbose_level = 1 # global setting. - self.error_count = 0 # global count of reported errors - # filters to apply when emitting error messages - self.filters = _DEFAULT_FILTERS[:] - self.counting = 'total' # In what way are we counting errors? - self.errors_by_category = {} # string to int dict storing error counts - - # output format: - # "emacs" - format that emacs can parse (default) - # "vs7" - format that Microsoft Visual Studio 7 can parse - self.output_format = 'emacs' - - def SetOutputFormat(self, output_format): - """Sets the output format for errors.""" - self.output_format = output_format - - def SetVerboseLevel(self, level): - """Sets the module's verbosity, and returns the previous setting.""" - last_verbose_level = self.verbose_level - self.verbose_level = level - return last_verbose_level - - def SetCountingStyle(self, counting_style): - """Sets the module's counting options.""" - self.counting = counting_style - - def SetFilters(self, filters): - """Sets the error-message filters. - - These filters are applied when deciding whether to emit a given - error message. - - Args: - filters: A string of comma-separated filters (eg "+whitespace/indent"). - Each filter should start with + or -; else we die. - - Raises: - ValueError: The comma-separated filters did not all start with '+' or '-'. - E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" - """ - # Default filters always have less priority than the flag ones. - self.filters = _DEFAULT_FILTERS[:] - for filt in filters.split(','): - clean_filt = filt.strip() - if clean_filt: - self.filters.append(clean_filt) - for filt in self.filters: - if not (filt.startswith('+') or filt.startswith('-')): - raise ValueError('Every filter in --filters must start with + or -' - ' (%s does not)' % filt) - - def ResetErrorCounts(self): - """Sets the module's error statistic back to zero.""" - self.error_count = 0 - self.errors_by_category = {} - - def IncrementErrorCount(self, category): - """Bumps the module's error statistic.""" - self.error_count += 1 - if self.counting in ('toplevel', 'detailed'): - if self.counting != 'detailed': - category = category.split('/')[0] - if category not in self.errors_by_category: - self.errors_by_category[category] = 0 - self.errors_by_category[category] += 1 - - def PrintErrorCounts(self): - """Print a summary of errors by category, and the total.""" - for category, count in self.errors_by_category.iteritems(): - sys.stderr.write('Category \'%s\' errors found: %d\n' % - (category, count)) - sys.stderr.write('Total errors found: %d\n' % self.error_count) - -_cpplint_state = _CppLintState() - - -def _OutputFormat(): - """Gets the module's output format.""" - return _cpplint_state.output_format - - -def _SetOutputFormat(output_format): - """Sets the module's output format.""" - _cpplint_state.SetOutputFormat(output_format) - - -def _VerboseLevel(): - """Returns the module's verbosity setting.""" - return _cpplint_state.verbose_level - - -def _SetVerboseLevel(level): - """Sets the module's verbosity, and returns the previous setting.""" - return _cpplint_state.SetVerboseLevel(level) - - -def _SetCountingStyle(level): - """Sets the module's counting options.""" - _cpplint_state.SetCountingStyle(level) - - -def _Filters(): - """Returns the module's list of output filters, as a list.""" - return _cpplint_state.filters - - -def _SetFilters(filters): - """Sets the module's error-message filters. - - These filters are applied when deciding whether to emit a given - error message. - - Args: - filters: A string of comma-separated filters (eg "whitespace/indent"). - Each filter should start with + or -; else we die. - """ - _cpplint_state.SetFilters(filters) - - -class _FunctionState(object): - """Tracks current function name and the number of lines in its body.""" - - _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. - _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. - - def __init__(self): - self.in_a_function = False - self.lines_in_function = 0 - self.current_function = '' - - def Begin(self, function_name): - """Start analyzing function body. - - Args: - function_name: The name of the function being tracked. - """ - self.in_a_function = True - self.lines_in_function = 0 - self.current_function = function_name - - def Count(self): - """Count line in current function body.""" - if self.in_a_function: - self.lines_in_function += 1 - - def Check(self, error, filename, linenum): - """Report if too many lines in function body. - - Args: - error: The function to call with any errors found. - filename: The name of the current file. - linenum: The number of the line to check. - """ - if Match(r'T(EST|est)', self.current_function): - base_trigger = self._TEST_TRIGGER - else: - base_trigger = self._NORMAL_TRIGGER - trigger = base_trigger * 2**_VerboseLevel() - - if self.lines_in_function > trigger: - error_level = int(math.log(self.lines_in_function / base_trigger, 2)) - # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... - if error_level > 5: - error_level = 5 - error(filename, linenum, 'readability/fn_size', error_level, - 'Small and focused functions are preferred:' - ' %s has %d non-comment lines' - ' (error triggered by exceeding %d lines).' % ( - self.current_function, self.lines_in_function, trigger)) - - def End(self): - """Stop analyzing function body.""" - self.in_a_function = False - - -class _IncludeError(Exception): - """Indicates a problem with the include order in a file.""" - pass - - -class FileInfo: - """Provides utility functions for filenames. - - FileInfo provides easy access to the components of a file's path - relative to the project root. - """ - - def __init__(self, filename): - self._filename = filename - - def FullName(self): - """Make Windows paths like Unix.""" - return os.path.abspath(self._filename).replace('\\', '/') - - def RepositoryName(self): - """FullName after removing the local path to the repository. - - If we have a real absolute path name here we can try to do something smart: - detecting the root of the checkout and truncating /path/to/checkout from - the name so that we get header guards that don't include things like - "C:\Documents and Settings\..." or "/home/username/..." in them and thus - people on different computers who have checked the source out to different - locations won't see bogus errors. - """ - fullname = self.FullName() - - if os.path.exists(fullname): - project_dir = os.path.dirname(fullname) - - if os.path.exists(os.path.join(project_dir, ".svn")): - # If there's a .svn file in the current directory, we recursively look - # up the directory tree for the top of the SVN checkout - root_dir = project_dir - one_up_dir = os.path.dirname(root_dir) - while os.path.exists(os.path.join(one_up_dir, ".svn")): - root_dir = os.path.dirname(root_dir) - one_up_dir = os.path.dirname(one_up_dir) - - prefix = os.path.commonprefix([root_dir, project_dir]) - return fullname[len(prefix) + 1:] - - # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by - # searching up from the current path. - root_dir = os.path.dirname(fullname) - while (root_dir != os.path.dirname(root_dir) and - not os.path.exists(os.path.join(root_dir, ".git")) and - not os.path.exists(os.path.join(root_dir, ".hg")) and - not os.path.exists(os.path.join(root_dir, ".svn"))): - root_dir = os.path.dirname(root_dir) - - if (os.path.exists(os.path.join(root_dir, ".git")) or - os.path.exists(os.path.join(root_dir, ".hg")) or - os.path.exists(os.path.join(root_dir, ".svn"))): - prefix = os.path.commonprefix([root_dir, project_dir]) - return fullname[len(prefix) + 1:] - - # Don't know what to do; header guard warnings may be wrong... - return fullname - - def Split(self): - """Splits the file into the directory, basename, and extension. - - For 'chrome/browser/browser.cc', Split() would - return ('chrome/browser', 'browser', '.cc') - - Returns: - A tuple of (directory, basename, extension). - """ - - googlename = self.RepositoryName() - project, rest = os.path.split(googlename) - return (project,) + os.path.splitext(rest) - - def BaseName(self): - """File base name - text after the final slash, before the final period.""" - return self.Split()[1] - - def Extension(self): - """File extension - text following the final period.""" - return self.Split()[2] - - def NoExtension(self): - """File has no source file extension.""" - return '/'.join(self.Split()[0:2]) - - def IsSource(self): - """File has a source file extension.""" - return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx') - - -def _ShouldPrintError(category, confidence, linenum): - """If confidence >= verbose, category passes filter and is not suppressed.""" - - # There are three ways we might decide not to print an error message: - # a "NOLINT(category)" comment appears in the source, - # the verbosity level isn't high enough, or the filters filter it out. - if IsErrorSuppressedByNolint(category, linenum): - return False - if confidence < _cpplint_state.verbose_level: - return False - - is_filtered = False - for one_filter in _Filters(): - if one_filter.startswith('-'): - if category.startswith(one_filter[1:]): - is_filtered = True - elif one_filter.startswith('+'): - if category.startswith(one_filter[1:]): - is_filtered = False - else: - assert False # should have been checked for in SetFilter. - if is_filtered: - return False - - return True - - -def Error(filename, linenum, category, confidence, message): - """Logs the fact we've found a lint error. - - We log where the error was found, and also our confidence in the error, - that is, how certain we are this is a legitimate style regression, and - not a misidentification or a use that's sometimes justified. - - False positives can be suppressed by the use of - "cpplint(category)" comments on the offending line. These are - parsed into _error_suppressions. - - Args: - filename: The name of the file containing the error. - linenum: The number of the line containing the error. - category: A string used to describe the "category" this bug - falls under: "whitespace", say, or "runtime". Categories - may have a hierarchy separated by slashes: "whitespace/indent". - confidence: A number from 1-5 representing a confidence score for - the error, with 5 meaning that we are certain of the problem, - and 1 meaning that it could be a legitimate construct. - message: The error message. - """ - if _ShouldPrintError(category, confidence, linenum): - _cpplint_state.IncrementErrorCount(category) - if _cpplint_state.output_format == 'vs7': - sys.stderr.write('%s(%s): %s [%s] [%d]\n' % ( - filename, linenum, message, category, confidence)) - elif _cpplint_state.output_format == 'eclipse': - sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % ( - filename, linenum, message, category, confidence)) - else: - sys.stderr.write('%s:%s: %s [%s] [%d]\n' % ( - filename, linenum, message, category, confidence)) - - -# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard. -_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( - r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') -# Matches strings. Escape codes should already be removed by ESCAPES. -_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"') -# Matches characters. Escape codes should already be removed by ESCAPES. -_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'") -# Matches multi-line C++ comments. -# This RE is a little bit more complicated than one might expect, because we -# have to take care of space removals tools so we can handle comments inside -# statements better. -# The current rule is: We only clear spaces from both sides when we're at the -# end of the line. Otherwise, we try to remove spaces from the right side, -# if this doesn't work we try on left side but only if there's a non-character -# on the right. -_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( - r"""(\s*/\*.*\*/\s*$| - /\*.*\*/\s+| - \s+/\*.*\*/(?=\W)| - /\*.*\*/)""", re.VERBOSE) - - -def IsCppString(line): - """Does line terminate so, that the next symbol is in string constant. - - This function does not consider single-line nor multi-line comments. - - Args: - line: is a partial line of code starting from the 0..n. - - Returns: - True, if next character appended to 'line' is inside a - string constant. - """ - - line = line.replace(r'\\', 'XX') # after this, \\" does not match to \" - return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1 - - -def CleanseRawStrings(raw_lines): - """Removes C++11 raw strings from lines. - - Before: - static const char kData[] = R"( - multi-line string - )"; - - After: - static const char kData[] = "" - (replaced by blank line) - ""; - - Args: - raw_lines: list of raw lines. - - Returns: - list of lines with C++11 raw strings replaced by empty strings. - """ - - delimiter = None - lines_without_raw_strings = [] - for line in raw_lines: - if delimiter: - # Inside a raw string, look for the end - end = line.find(delimiter) - if end >= 0: - # Found the end of the string, match leading space for this - # line and resume copying the original lines, and also insert - # a "" on the last line. - leading_space = Match(r'^(\s*)\S', line) - line = leading_space.group(1) + '""' + line[end + len(delimiter):] - delimiter = None - else: - # Haven't found the end yet, append a blank line. - line = '' - - else: - # Look for beginning of a raw string. - # See 2.14.15 [lex.string] for syntax. - matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line) - if matched: - delimiter = ')' + matched.group(2) + '"' - - end = matched.group(3).find(delimiter) - if end >= 0: - # Raw string ended on same line - line = (matched.group(1) + '""' + - matched.group(3)[end + len(delimiter):]) - delimiter = None - else: - # Start of a multi-line raw string - line = matched.group(1) + '""' - - lines_without_raw_strings.append(line) - - # TODO(unknown): if delimiter is not None here, we might want to - # emit a warning for unterminated string. - return lines_without_raw_strings - - -def FindNextMultiLineCommentStart(lines, lineix): - """Find the beginning marker for a multiline comment.""" - while lineix < len(lines): - if lines[lineix].strip().startswith('/*'): - # Only return this marker if the comment goes beyond this line - if lines[lineix].strip().find('*/', 2) < 0: - return lineix - lineix += 1 - return len(lines) - - -def FindNextMultiLineCommentEnd(lines, lineix): - """We are inside a comment, find the end marker.""" - while lineix < len(lines): - if lines[lineix].strip().endswith('*/'): - return lineix - lineix += 1 - return len(lines) - - -def RemoveMultiLineCommentsFromRange(lines, begin, end): - """Clears a range of lines for multi-line comments.""" - # Having // dummy comments makes the lines non-empty, so we will not get - # unnecessary blank line warnings later in the code. - for i in range(begin, end): - lines[i] = '// dummy' - - -def RemoveMultiLineComments(filename, lines, error): - """Removes multiline (c-style) comments from lines.""" - lineix = 0 - while lineix < len(lines): - lineix_begin = FindNextMultiLineCommentStart(lines, lineix) - if lineix_begin >= len(lines): - return - lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin) - if lineix_end >= len(lines): - error(filename, lineix_begin + 1, 'readability/multiline_comment', 5, - 'Could not find end of multi-line comment') - return - RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1) - lineix = lineix_end + 1 - - -def CleanseComments(line): - """Removes //-comments and single-line C-style /* */ comments. - - Args: - line: A line of C++ source. - - Returns: - The line with single-line comments removed. - """ - commentpos = line.find('//') - if commentpos != -1 and not IsCppString(line[:commentpos]): - line = line[:commentpos].rstrip() - # get rid of /* ... */ - return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) - - -class CleansedLines(object): - """Holds 3 copies of all lines with different preprocessing applied to them. - - 1) elided member contains lines without strings and comments, - 2) lines member contains lines without comments, and - 3) raw_lines member contains all the lines without processing. - All these three members are of , and of the same length. - """ - - def __init__(self, lines): - self.elided = [] - self.lines = [] - self.raw_lines = lines - self.num_lines = len(lines) - self.lines_without_raw_strings = CleanseRawStrings(lines) - for linenum in range(len(self.lines_without_raw_strings)): - self.lines.append(CleanseComments( - self.lines_without_raw_strings[linenum])) - elided = self._CollapseStrings(self.lines_without_raw_strings[linenum]) - self.elided.append(CleanseComments(elided)) - - def NumLines(self): - """Returns the number of lines represented.""" - return self.num_lines - - @staticmethod - def _CollapseStrings(elided): - """Collapses strings and chars on a line to simple "" or '' blocks. - - We nix strings first so we're not fooled by text like '"http://"' - - Args: - elided: The line being processed. - - Returns: - The line with collapsed strings. - """ - if not _RE_PATTERN_INCLUDE.match(elided): - # Remove escaped characters first to make quote/single quote collapsing - # basic. Things that look like escaped characters shouldn't occur - # outside of strings and chars. - elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) - elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided) - elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided) - return elided - - -def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar): - """Find the position just after the matching endchar. - - Args: - line: a CleansedLines line. - startpos: start searching at this position. - depth: nesting level at startpos. - startchar: expression opening character. - endchar: expression closing character. - - Returns: - On finding matching endchar: (index just after matching endchar, 0) - Otherwise: (-1, new depth at end of this line) - """ - for i in xrange(startpos, len(line)): - if line[i] == startchar: - depth += 1 - elif line[i] == endchar: - depth -= 1 - if depth == 0: - return (i + 1, 0) - return (-1, depth) - - -def CloseExpression(clean_lines, linenum, pos): - """If input points to ( or { or [ or <, finds the position that closes it. - - If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the - linenum/pos that correspond to the closing of the expression. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - pos: A position on the line. - - Returns: - A tuple (line, linenum, pos) pointer *past* the closing brace, or - (line, len(lines), -1) if we never find a close. Note we ignore - strings and comments when matching; and the line we return is the - 'cleansed' line at linenum. - """ - - line = clean_lines.elided[linenum] - startchar = line[pos] - if startchar not in '({[<': - return (line, clean_lines.NumLines(), -1) - if startchar == '(': endchar = ')' - if startchar == '[': endchar = ']' - if startchar == '{': endchar = '}' - if startchar == '<': endchar = '>' - - # Check first line - (end_pos, num_open) = FindEndOfExpressionInLine( - line, pos, 0, startchar, endchar) - if end_pos > -1: - return (line, linenum, end_pos) - - # Continue scanning forward - while linenum < clean_lines.NumLines() - 1: - linenum += 1 - line = clean_lines.elided[linenum] - (end_pos, num_open) = FindEndOfExpressionInLine( - line, 0, num_open, startchar, endchar) - if end_pos > -1: - return (line, linenum, end_pos) - - # Did not find endchar before end of file, give up - return (line, clean_lines.NumLines(), -1) - - -def FindStartOfExpressionInLine(line, endpos, depth, startchar, endchar): - """Find position at the matching startchar. - - This is almost the reverse of FindEndOfExpressionInLine, but note - that the input position and returned position differs by 1. - - Args: - line: a CleansedLines line. - endpos: start searching at this position. - depth: nesting level at endpos. - startchar: expression opening character. - endchar: expression closing character. - - Returns: - On finding matching startchar: (index at matching startchar, 0) - Otherwise: (-1, new depth at beginning of this line) - """ - for i in xrange(endpos, -1, -1): - if line[i] == endchar: - depth += 1 - elif line[i] == startchar: - depth -= 1 - if depth == 0: - return (i, 0) - return (-1, depth) - - -def ReverseCloseExpression(clean_lines, linenum, pos): - """If input points to ) or } or ] or >, finds the position that opens it. - - If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the - linenum/pos that correspond to the opening of the expression. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - pos: A position on the line. - - Returns: - A tuple (line, linenum, pos) pointer *at* the opening brace, or - (line, 0, -1) if we never find the matching opening brace. Note - we ignore strings and comments when matching; and the line we - return is the 'cleansed' line at linenum. - """ - line = clean_lines.elided[linenum] - endchar = line[pos] - if endchar not in ')}]>': - return (line, 0, -1) - if endchar == ')': startchar = '(' - if endchar == ']': startchar = '[' - if endchar == '}': startchar = '{' - if endchar == '>': startchar = '<' - - # Check last line - (start_pos, num_open) = FindStartOfExpressionInLine( - line, pos, 0, startchar, endchar) - if start_pos > -1: - return (line, linenum, start_pos) - - # Continue scanning backward - while linenum > 0: - linenum -= 1 - line = clean_lines.elided[linenum] - (start_pos, num_open) = FindStartOfExpressionInLine( - line, len(line) - 1, num_open, startchar, endchar) - if start_pos > -1: - return (line, linenum, start_pos) - - # Did not find startchar before beginning of file, give up - return (line, 0, -1) - - -def CheckForCopyright(filename, lines, error): - """Logs an error if no Copyright message appears at the top of the file.""" - - # We'll say it should occur by line 10. Don't forget there's a - # dummy line at the front. - for line in xrange(1, min(len(lines), 11)): - if re.search(r'Copyright', lines[line], re.I): break - else: # means no copyright line was found - error(filename, 0, 'legal/copyright', 5, - 'No copyright message found. ' - 'You should have a line: "Copyright [year] "') - - -def GetHeaderGuardCPPVariable(filename): - """Returns the CPP variable that should be used as a header guard. - - Args: - filename: The name of a C++ header file. - - Returns: - The CPP variable that should be used as a header guard in the - named file. - - """ - - # Restores original filename in case that cpplint is invoked from Emacs's - # flymake. - filename = re.sub(r'_flymake\.h$', '.h', filename) - filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename) - - fileinfo = FileInfo(filename) - file_path_from_root = fileinfo.RepositoryName() - if _root: - file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root) - return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_' - - -def CheckForHeaderGuard(filename, lines, error): - """Checks that the file contains a header guard. - - Logs an error if no #ifndef header guard is present. For other - headers, checks that the full pathname is used. - - Args: - filename: The name of the C++ header file. - lines: An array of strings, each representing a line of the file. - error: The function to call with any errors found. - """ - - cppvar = GetHeaderGuardCPPVariable(filename) - - ifndef = None - ifndef_linenum = 0 - define = None - endif = None - endif_linenum = 0 - for linenum, line in enumerate(lines): - linesplit = line.split() - if len(linesplit) >= 2: - # find the first occurrence of #ifndef and #define, save arg - if not ifndef and linesplit[0] == '#ifndef': - # set ifndef to the header guard presented on the #ifndef line. - ifndef = linesplit[1] - ifndef_linenum = linenum - if not define and linesplit[0] == '#define': - define = linesplit[1] - # find the last occurrence of #endif, save entire line - if line.startswith('#endif'): - endif = line - endif_linenum = linenum - - if not ifndef: - error(filename, 0, 'build/header_guard', 5, - 'No #ifndef header guard found, suggested CPP variable is: %s' % - cppvar) - return - - if not define: - error(filename, 0, 'build/header_guard', 5, - 'No #define header guard found, suggested CPP variable is: %s' % - cppvar) - return - - # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ - # for backward compatibility. - if ifndef != cppvar: - error_level = 0 - if ifndef != cppvar + '_': - error_level = 5 - - ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum, - error) - error(filename, ifndef_linenum, 'build/header_guard', error_level, - '#ifndef header guard has wrong style, please use: %s' % cppvar) - - if define != ifndef: - error(filename, 0, 'build/header_guard', 5, - '#ifndef and #define don\'t match, suggested CPP variable is: %s' % - cppvar) - return - - if endif != ('#endif // %s' % cppvar): - error_level = 0 - if endif != ('#endif // %s' % (cppvar + '_')): - error_level = 5 - - ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum, - error) - error(filename, endif_linenum, 'build/header_guard', error_level, - '#endif line should be "#endif // %s"' % cppvar) - - -def CheckForBadCharacters(filename, lines, error): - """Logs an error for each line containing bad characters. - - Two kinds of bad characters: - - 1. Unicode replacement characters: These indicate that either the file - contained invalid UTF-8 (likely) or Unicode replacement characters (which - it shouldn't). Note that it's possible for this to throw off line - numbering if the invalid UTF-8 occurred adjacent to a newline. - - 2. NUL bytes. These are problematic for some tools. - - Args: - filename: The name of the current file. - lines: An array of strings, each representing a line of the file. - error: The function to call with any errors found. - """ - for linenum, line in enumerate(lines): - if u'\ufffd' in line: - error(filename, linenum, 'readability/utf8', 5, - 'Line contains invalid UTF-8 (or Unicode replacement character).') - if '\0' in line: - error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.') - - -def CheckForNewlineAtEOF(filename, lines, error): - """Logs an error if there is no newline char at the end of the file. - - Args: - filename: The name of the current file. - lines: An array of strings, each representing a line of the file. - error: The function to call with any errors found. - """ - - # The array lines() was created by adding two newlines to the - # original file (go figure), then splitting on \n. - # To verify that the file ends in \n, we just have to make sure the - # last-but-two element of lines() exists and is empty. - if len(lines) < 3 or lines[-2]: - error(filename, len(lines) - 2, 'whitespace/ending_newline', 5, - 'Could not find a newline character at the end of the file.') - - -def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): - """Logs an error if we see /* ... */ or "..." that extend past one line. - - /* ... */ comments are legit inside macros, for one line. - Otherwise, we prefer // comments, so it's ok to warn about the - other. Likewise, it's ok for strings to extend across multiple - lines, as long as a line continuation character (backslash) - terminates each line. Although not currently prohibited by the C++ - style guide, it's ugly and unnecessary. We don't do well with either - in this lint program, so we warn about both. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Remove all \\ (escaped backslashes) from the line. They are OK, and the - # second (escaped) slash may trigger later \" detection erroneously. - line = line.replace('\\\\', '') - - if line.count('/*') > line.count('*/'): - error(filename, linenum, 'readability/multiline_comment', 5, - 'Complex multi-line /*...*/-style comment found. ' - 'Lint may give bogus warnings. ' - 'Consider replacing these with //-style comments, ' - 'with #if 0...#endif, ' - 'or with more clearly structured multi-line comments.') - - if (line.count('"') - line.count('\\"')) % 2: - error(filename, linenum, 'readability/multiline_string', 5, - 'Multi-line string ("...") found. This lint script doesn\'t ' - 'do well with such strings, and may give bogus warnings. ' - 'Use C++11 raw strings or concatenation instead.') - - -threading_list = ( - ('asctime(', 'asctime_r('), - ('ctime(', 'ctime_r('), - ('getgrgid(', 'getgrgid_r('), - ('getgrnam(', 'getgrnam_r('), - ('getlogin(', 'getlogin_r('), - ('getpwnam(', 'getpwnam_r('), - ('getpwuid(', 'getpwuid_r('), - ('gmtime(', 'gmtime_r('), - ('localtime(', 'localtime_r('), - ('rand(', 'rand_r('), - ('strtok(', 'strtok_r('), - ('ttyname(', 'ttyname_r('), - ) - - -def CheckPosixThreading(filename, clean_lines, linenum, error): - """Checks for calls to thread-unsafe functions. - - Much code has been originally written without consideration of - multi-threading. Also, engineers are relying on their old experience; - they have learned posix before threading extensions were added. These - tests guide the engineers to use thread-safe functions (when using - posix directly). - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - for single_thread_function, multithread_safe_function in threading_list: - ix = line.find(single_thread_function) - # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison - if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and - line[ix - 1] not in ('_', '.', '>'))): - error(filename, linenum, 'runtime/threadsafe_fn', 2, - 'Consider using ' + multithread_safe_function + - '...) instead of ' + single_thread_function + - '...) for improved thread safety.') - - -def CheckVlogArguments(filename, clean_lines, linenum, error): - """Checks that VLOG() is only used for defining a logging level. - - For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and - VLOG(FATAL) are not. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line): - error(filename, linenum, 'runtime/vlog', 5, - 'VLOG() should be used with numeric verbosity level. ' - 'Use LOG() if you want symbolic severity levels.') - - -# Matches invalid increment: *count++, which moves pointer instead of -# incrementing a value. -_RE_PATTERN_INVALID_INCREMENT = re.compile( - r'^\s*\*\w+(\+\+|--);') - - -def CheckInvalidIncrement(filename, clean_lines, linenum, error): - """Checks for invalid increment *count++. - - For example following function: - void increment_counter(int* count) { - *count++; - } - is invalid, because it effectively does count++, moving pointer, and should - be replaced with ++*count, (*count)++ or *count += 1. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - if _RE_PATTERN_INVALID_INCREMENT.match(line): - error(filename, linenum, 'runtime/invalid_increment', 5, - 'Changing pointer instead of value (or unused value of operator*).') - - -class _BlockInfo(object): - """Stores information about a generic block of code.""" - - def __init__(self, seen_open_brace): - self.seen_open_brace = seen_open_brace - self.open_parentheses = 0 - self.inline_asm = _NO_ASM - - def CheckBegin(self, filename, clean_lines, linenum, error): - """Run checks that applies to text up to the opening brace. - - This is mostly for checking the text after the class identifier - and the "{", usually where the base class is specified. For other - blocks, there isn't much to check, so we always pass. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - pass - - def CheckEnd(self, filename, clean_lines, linenum, error): - """Run checks that applies to text after the closing brace. - - This is mostly used for checking end of namespace comments. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - pass - - -class _ClassInfo(_BlockInfo): - """Stores information about a class.""" - - def __init__(self, name, class_or_struct, clean_lines, linenum): - _BlockInfo.__init__(self, False) - self.name = name - self.starting_linenum = linenum - self.is_derived = False - if class_or_struct == 'struct': - self.access = 'public' - self.is_struct = True - else: - self.access = 'private' - self.is_struct = False - - # Remember initial indentation level for this class. Using raw_lines here - # instead of elided to account for leading comments. - initial_indent = Match(r'^( *)\S', clean_lines.raw_lines[linenum]) - if initial_indent: - self.class_indent = len(initial_indent.group(1)) - else: - self.class_indent = 0 - - # Try to find the end of the class. This will be confused by things like: - # class A { - # } *x = { ... - # - # But it's still good enough for CheckSectionSpacing. - self.last_line = 0 - depth = 0 - for i in range(linenum, clean_lines.NumLines()): - line = clean_lines.elided[i] - depth += line.count('{') - line.count('}') - if not depth: - self.last_line = i - break - - def CheckBegin(self, filename, clean_lines, linenum, error): - # Look for a bare ':' - if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]): - self.is_derived = True - - def CheckEnd(self, filename, clean_lines, linenum, error): - # Check that closing brace is aligned with beginning of the class. - # Only do this if the closing brace is indented by only whitespaces. - # This means we will not check single-line class definitions. - indent = Match(r'^( *)\}', clean_lines.elided[linenum]) - if indent and len(indent.group(1)) != self.class_indent: - if self.is_struct: - parent = 'struct ' + self.name - else: - parent = 'class ' + self.name - error(filename, linenum, 'whitespace/indent', 3, - 'Closing brace should be aligned with beginning of %s' % parent) - - -class _NamespaceInfo(_BlockInfo): - """Stores information about a namespace.""" - - def __init__(self, name, linenum): - _BlockInfo.__init__(self, False) - self.name = name or '' - self.starting_linenum = linenum - - def CheckEnd(self, filename, clean_lines, linenum, error): - """Check end of namespace comments.""" - line = clean_lines.raw_lines[linenum] - - # Check how many lines is enclosed in this namespace. Don't issue - # warning for missing namespace comments if there aren't enough - # lines. However, do apply checks if there is already an end of - # namespace comment and it's incorrect. - # - # TODO(unknown): We always want to check end of namespace comments - # if a namespace is large, but sometimes we also want to apply the - # check if a short namespace contained nontrivial things (something - # other than forward declarations). There is currently no logic on - # deciding what these nontrivial things are, so this check is - # triggered by namespace size only, which works most of the time. - if (linenum - self.starting_linenum < 10 - and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)): - return - - # Look for matching comment at end of namespace. - # - # Note that we accept C style "/* */" comments for terminating - # namespaces, so that code that terminate namespaces inside - # preprocessor macros can be cpplint clean. - # - # We also accept stuff like "// end of namespace ." with the - # period at the end. - # - # Besides these, we don't accept anything else, otherwise we might - # get false negatives when existing comment is a substring of the - # expected namespace. - if self.name: - # Named namespace - if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) + - r'[\*/\.\\\s]*$'), - line): - error(filename, linenum, 'readability/namespace', 5, - 'Namespace should be terminated with "// namespace %s"' % - self.name) - else: - # Anonymous namespace - if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line): - error(filename, linenum, 'readability/namespace', 5, - 'Namespace should be terminated with "// namespace"') - - -class _PreprocessorInfo(object): - """Stores checkpoints of nesting stacks when #if/#else is seen.""" - - def __init__(self, stack_before_if): - # The entire nesting stack before #if - self.stack_before_if = stack_before_if - - # The entire nesting stack up to #else - self.stack_before_else = [] - - # Whether we have already seen #else or #elif - self.seen_else = False - - -class _NestingState(object): - """Holds states related to parsing braces.""" - - def __init__(self): - # Stack for tracking all braces. An object is pushed whenever we - # see a "{", and popped when we see a "}". Only 3 types of - # objects are possible: - # - _ClassInfo: a class or struct. - # - _NamespaceInfo: a namespace. - # - _BlockInfo: some other type of block. - self.stack = [] - - # Stack of _PreprocessorInfo objects. - self.pp_stack = [] - - def SeenOpenBrace(self): - """Check if we have seen the opening brace for the innermost block. - - Returns: - True if we have seen the opening brace, False if the innermost - block is still expecting an opening brace. - """ - return (not self.stack) or self.stack[-1].seen_open_brace - - def InNamespaceBody(self): - """Check if we are currently one level inside a namespace body. - - Returns: - True if top of the stack is a namespace block, False otherwise. - """ - return self.stack and isinstance(self.stack[-1], _NamespaceInfo) - - def UpdatePreprocessor(self, line): - """Update preprocessor stack. - - We need to handle preprocessors due to classes like this: - #ifdef SWIG - struct ResultDetailsPageElementExtensionPoint { - #else - struct ResultDetailsPageElementExtensionPoint : public Extension { - #endif - - We make the following assumptions (good enough for most files): - - Preprocessor condition evaluates to true from #if up to first - #else/#elif/#endif. - - - Preprocessor condition evaluates to false from #else/#elif up - to #endif. We still perform lint checks on these lines, but - these do not affect nesting stack. - - Args: - line: current line to check. - """ - if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line): - # Beginning of #if block, save the nesting stack here. The saved - # stack will allow us to restore the parsing state in the #else case. - self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) - elif Match(r'^\s*#\s*(else|elif)\b', line): - # Beginning of #else block - if self.pp_stack: - if not self.pp_stack[-1].seen_else: - # This is the first #else or #elif block. Remember the - # whole nesting stack up to this point. This is what we - # keep after the #endif. - self.pp_stack[-1].seen_else = True - self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack) - - # Restore the stack to how it was before the #if - self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if) - else: - # TODO(unknown): unexpected #else, issue warning? - pass - elif Match(r'^\s*#\s*endif\b', line): - # End of #if or #else blocks. - if self.pp_stack: - # If we saw an #else, we will need to restore the nesting - # stack to its former state before the #else, otherwise we - # will just continue from where we left off. - if self.pp_stack[-1].seen_else: - # Here we can just use a shallow copy since we are the last - # reference to it. - self.stack = self.pp_stack[-1].stack_before_else - # Drop the corresponding #if - self.pp_stack.pop() - else: - # TODO(unknown): unexpected #endif, issue warning? - pass - - def Update(self, filename, clean_lines, linenum, error): - """Update nesting state with current line. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Update pp_stack first - self.UpdatePreprocessor(line) - - # Count parentheses. This is to avoid adding struct arguments to - # the nesting stack. - if self.stack: - inner_block = self.stack[-1] - depth_change = line.count('(') - line.count(')') - inner_block.open_parentheses += depth_change - - # Also check if we are starting or ending an inline assembly block. - if inner_block.inline_asm in (_NO_ASM, _END_ASM): - if (depth_change != 0 and - inner_block.open_parentheses == 1 and - _MATCH_ASM.match(line)): - # Enter assembly block - inner_block.inline_asm = _INSIDE_ASM - else: - # Not entering assembly block. If previous line was _END_ASM, - # we will now shift to _NO_ASM state. - inner_block.inline_asm = _NO_ASM - elif (inner_block.inline_asm == _INSIDE_ASM and - inner_block.open_parentheses == 0): - # Exit assembly block - inner_block.inline_asm = _END_ASM - - # Consume namespace declaration at the beginning of the line. Do - # this in a loop so that we catch same line declarations like this: - # namespace proto2 { namespace bridge { class MessageSet; } } - while True: - # Match start of namespace. The "\b\s*" below catches namespace - # declarations even if it weren't followed by a whitespace, this - # is so that we don't confuse our namespace checker. The - # missing spaces will be flagged by CheckSpacing. - namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line) - if not namespace_decl_match: - break - - new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum) - self.stack.append(new_namespace) - - line = namespace_decl_match.group(2) - if line.find('{') != -1: - new_namespace.seen_open_brace = True - line = line[line.find('{') + 1:] - - # Look for a class declaration in whatever is left of the line - # after parsing namespaces. The regexp accounts for decorated classes - # such as in: - # class LOCKABLE API Object { - # }; - # - # Templates with class arguments may confuse the parser, for example: - # template , - # class Vector = vector > - # class HeapQueue { - # - # Because this parser has no nesting state about templates, by the - # time it saw "class Comparator", it may think that it's a new class. - # Nested templates have a similar problem: - # template < - # typename ExportedType, - # typename TupleType, - # template class ImplTemplate> - # - # To avoid these cases, we ignore classes that are followed by '=' or '>' - class_decl_match = Match( - r'\s*(template\s*<[\w\s<>,:]*>\s*)?' - r'(class|struct)\s+([A-Z_]+\s+)*(\w+(?:::\w+)*)' - r'(([^=>]|<[^<>]*>|<[^<>]*<[^<>]*>\s*>)*)$', line) - if (class_decl_match and - (not self.stack or self.stack[-1].open_parentheses == 0)): - self.stack.append(_ClassInfo( - class_decl_match.group(4), class_decl_match.group(2), - clean_lines, linenum)) - line = class_decl_match.group(5) - - # If we have not yet seen the opening brace for the innermost block, - # run checks here. - if not self.SeenOpenBrace(): - self.stack[-1].CheckBegin(filename, clean_lines, linenum, error) - - # Update access control if we are inside a class/struct - if self.stack and isinstance(self.stack[-1], _ClassInfo): - classinfo = self.stack[-1] - access_match = Match( - r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?' - r':(?:[^:]|$)', - line) - if access_match: - classinfo.access = access_match.group(2) - - # Check that access keywords are indented +1 space. Skip this - # check if the keywords are not preceded by whitespaces. - indent = access_match.group(1) - if (len(indent) != classinfo.class_indent + 1 and - Match(r'^\s*$', indent)): - if classinfo.is_struct: - parent = 'struct ' + classinfo.name - else: - parent = 'class ' + classinfo.name - slots = '' - if access_match.group(3): - slots = access_match.group(3) - error(filename, linenum, 'whitespace/indent', 3, - '%s%s: should be indented +1 space inside %s' % ( - access_match.group(2), slots, parent)) - - # Consume braces or semicolons from what's left of the line - while True: - # Match first brace, semicolon, or closed parenthesis. - matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line) - if not matched: - break - - token = matched.group(1) - if token == '{': - # If namespace or class hasn't seen a opening brace yet, mark - # namespace/class head as complete. Push a new block onto the - # stack otherwise. - if not self.SeenOpenBrace(): - self.stack[-1].seen_open_brace = True - else: - self.stack.append(_BlockInfo(True)) - if _MATCH_ASM.match(line): - self.stack[-1].inline_asm = _BLOCK_ASM - elif token == ';' or token == ')': - # If we haven't seen an opening brace yet, but we already saw - # a semicolon, this is probably a forward declaration. Pop - # the stack for these. - # - # Similarly, if we haven't seen an opening brace yet, but we - # already saw a closing parenthesis, then these are probably - # function arguments with extra "class" or "struct" keywords. - # Also pop these stack for these. - if not self.SeenOpenBrace(): - self.stack.pop() - else: # token == '}' - # Perform end of block checks and pop the stack. - if self.stack: - self.stack[-1].CheckEnd(filename, clean_lines, linenum, error) - self.stack.pop() - line = matched.group(2) - - def InnermostClass(self): - """Get class info on the top of the stack. - - Returns: - A _ClassInfo object if we are inside a class, or None otherwise. - """ - for i in range(len(self.stack), 0, -1): - classinfo = self.stack[i - 1] - if isinstance(classinfo, _ClassInfo): - return classinfo - return None - - def CheckCompletedBlocks(self, filename, error): - """Checks that all classes and namespaces have been completely parsed. - - Call this when all lines in a file have been processed. - Args: - filename: The name of the current file. - error: The function to call with any errors found. - """ - # Note: This test can result in false positives if #ifdef constructs - # get in the way of brace matching. See the testBuildClass test in - # cpplint_unittest.py for an example of this. - for obj in self.stack: - if isinstance(obj, _ClassInfo): - error(filename, obj.starting_linenum, 'build/class', 5, - 'Failed to find complete declaration of class %s' % - obj.name) - elif isinstance(obj, _NamespaceInfo): - error(filename, obj.starting_linenum, 'build/namespaces', 5, - 'Failed to find complete declaration of namespace %s' % - obj.name) - - -def CheckForNonStandardConstructs(filename, clean_lines, linenum, - nesting_state, error): - r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2. - - Complain about several constructs which gcc-2 accepts, but which are - not standard C++. Warning about these in lint is one way to ease the - transition to new compilers. - - put storage class first (e.g. "static const" instead of "const static"). - - "%lld" instead of %qd" in printf-type functions. - - "%1$d" is non-standard in printf-type functions. - - "\%" is an undefined character escape sequence. - - text after #endif is not allowed. - - invalid inner-style forward declaration. - - >? and ?= and )\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', - line): - error(filename, linenum, 'build/deprecated', 3, - '>? and ))?' - # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;' - error(filename, linenum, 'runtime/member_string_references', 2, - 'const string& members are dangerous. It is much better to use ' - 'alternatives, such as pointers or simple constants.') - - # Everything else in this function operates on class declarations. - # Return early if the top of the nesting stack is not a class, or if - # the class head is not completed yet. - classinfo = nesting_state.InnermostClass() - if not classinfo or not classinfo.seen_open_brace: - return - - # The class may have been declared with namespace or classname qualifiers. - # The constructor and destructor will not have those qualifiers. - base_classname = classinfo.name.split('::')[-1] - - # Look for single-argument constructors that aren't marked explicit. - # Technically a valid construct, but against style. - args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)' - % re.escape(base_classname), - line) - if (args and - args.group(1) != 'void' and - not Match(r'(const\s+)?%s(\s+const)?\s*(?:<\w+>\s*)?&' - % re.escape(base_classname), args.group(1).strip())): - error(filename, linenum, 'runtime/explicit', 5, - 'Single-argument constructors should be marked explicit.') - - -def CheckSpacingForFunctionCall(filename, line, linenum, error): - """Checks for the correctness of various spacing around function calls. - - Args: - filename: The name of the current file. - line: The text of the line to check. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - # Since function calls often occur inside if/for/while/switch - # expressions - which have their own, more liberal conventions - we - # first see if we should be looking inside such an expression for a - # function call, to which we can apply more strict standards. - fncall = line # if there's no control flow construct, look at whole line - for pattern in (r'\bif\s*\((.*)\)\s*{', - r'\bfor\s*\((.*)\)\s*{', - r'\bwhile\s*\((.*)\)\s*[{;]', - r'\bswitch\s*\((.*)\)\s*{'): - match = Search(pattern, line) - if match: - fncall = match.group(1) # look inside the parens for function calls - break - - # Except in if/for/while/switch, there should never be space - # immediately inside parens (eg "f( 3, 4 )"). We make an exception - # for nested parens ( (a+b) + c ). Likewise, there should never be - # a space before a ( when it's a function argument. I assume it's a - # function argument when the char before the whitespace is legal in - # a function name (alnum + _) and we're not starting a macro. Also ignore - # pointers and references to arrays and functions coz they're too tricky: - # we use a very simple way to recognize these: - # " (something)(maybe-something)" or - # " (something)(maybe-something," or - # " (something)[something]" - # Note that we assume the contents of [] to be short enough that - # they'll never need to wrap. - if ( # Ignore control structures. - not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b', - fncall) and - # Ignore pointers/references to functions. - not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and - # Ignore pointers/references to arrays. - not Search(r' \([^)]+\)\[[^\]]+\]', fncall)): - if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call - error(filename, linenum, 'whitespace/parens', 4, - 'Extra space after ( in function call') - elif Search(r'\(\s+(?!(\s*\\)|\()', fncall): - error(filename, linenum, 'whitespace/parens', 2, - 'Extra space after (') - if (Search(r'\w\s+\(', fncall) and - not Search(r'#\s*define|typedef', fncall) and - not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall)): - error(filename, linenum, 'whitespace/parens', 4, - 'Extra space before ( in function call') - # If the ) is followed only by a newline or a { + newline, assume it's - # part of a control statement (if/while/etc), and don't complain - if Search(r'[^)]\s+\)\s*[^{\s]', fncall): - # If the closing parenthesis is preceded by only whitespaces, - # try to give a more descriptive error message. - if Search(r'^\s+\)', fncall): - error(filename, linenum, 'whitespace/parens', 2, - 'Closing ) should be moved to the previous line') - else: - error(filename, linenum, 'whitespace/parens', 2, - 'Extra space before )') - - -def IsBlankLine(line): - """Returns true if the given line is blank. - - We consider a line to be blank if the line is empty or consists of - only white spaces. - - Args: - line: A line of a string. - - Returns: - True, if the given line is blank. - """ - return not line or line.isspace() - - -def CheckForFunctionLengths(filename, clean_lines, linenum, - function_state, error): - """Reports for long function bodies. - - For an overview why this is done, see: - http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions - - Uses a simplistic algorithm assuming other style guidelines - (especially spacing) are followed. - Only checks unindented functions, so class members are unchecked. - Trivial bodies are unchecked, so constructors with huge initializer lists - may be missed. - Blank/comment lines are not counted so as to avoid encouraging the removal - of vertical space and comments just to get through a lint check. - NOLINT *on the last line of a function* disables this check. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - function_state: Current function name and lines in body so far. - error: The function to call with any errors found. - """ - lines = clean_lines.lines - line = lines[linenum] - raw = clean_lines.raw_lines - raw_line = raw[linenum] - joined_line = '' - - starting_func = False - regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ... - match_result = Match(regexp, line) - if match_result: - # If the name is all caps and underscores, figure it's a macro and - # ignore it, unless it's TEST or TEST_F. - function_name = match_result.group(1).split()[-1] - if function_name == 'TEST' or function_name == 'TEST_F' or ( - not Match(r'[A-Z_]+$', function_name)): - starting_func = True - - if starting_func: - body_found = False - for start_linenum in xrange(linenum, clean_lines.NumLines()): - start_line = lines[start_linenum] - joined_line += ' ' + start_line.lstrip() - if Search(r'(;|})', start_line): # Declarations and trivial functions - body_found = True - break # ... ignore - elif Search(r'{', start_line): - body_found = True - function = Search(r'((\w|:)*)\(', line).group(1) - if Match(r'TEST', function): # Handle TEST... macros - parameter_regexp = Search(r'(\(.*\))', joined_line) - if parameter_regexp: # Ignore bad syntax - function += parameter_regexp.group(1) - else: - function += '()' - function_state.Begin(function) - break - if not body_found: - # No body for the function (or evidence of a non-function) was found. - error(filename, linenum, 'readability/fn_size', 5, - 'Lint failed to find start of function body.') - elif Match(r'^\}\s*$', line): # function end - function_state.Check(error, filename, linenum) - function_state.End() - elif not Match(r'^\s*$', line): - function_state.Count() # Count non-blank/non-comment lines. - - -_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?') - - -def CheckComment(comment, filename, linenum, error): - """Checks for common mistakes in TODO comments. - - Args: - comment: The text of the comment from the line in question. - filename: The name of the current file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - match = _RE_PATTERN_TODO.match(comment) - if match: - # One whitespace is correct; zero whitespace is handled elsewhere. - leading_whitespace = match.group(1) - if len(leading_whitespace) > 1: - error(filename, linenum, 'whitespace/todo', 2, - 'Too many spaces before TODO') - - username = match.group(2) - if not username: - error(filename, linenum, 'readability/todo', 2, - 'Missing username in TODO; it should look like ' - '"// TODO(my_username): Stuff."') - - middle_whitespace = match.group(3) - # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison - if middle_whitespace != ' ' and middle_whitespace != '': - error(filename, linenum, 'whitespace/todo', 2, - 'TODO(my_username) should be followed by a space') - -def CheckAccess(filename, clean_lines, linenum, nesting_state, error): - """Checks for improper use of DISALLOW* macros. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A _NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] # get rid of comments and strings - - matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|' - r'DISALLOW_EVIL_CONSTRUCTORS|' - r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line) - if not matched: - return - if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo): - if nesting_state.stack[-1].access != 'private': - error(filename, linenum, 'readability/constructors', 3, - '%s must be in the private: section' % matched.group(1)) - - else: - # Found DISALLOW* macro outside a class declaration, or perhaps it - # was used inside a function when it should have been part of the - # class declaration. We could issue a warning here, but it - # probably resulted in a compiler error already. - pass - - -def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix): - """Find the corresponding > to close a template. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: Current line number. - init_suffix: Remainder of the current line after the initial <. - - Returns: - True if a matching bracket exists. - """ - line = init_suffix - nesting_stack = ['<'] - while True: - # Find the next operator that can tell us whether < is used as an - # opening bracket or as a less-than operator. We only want to - # warn on the latter case. - # - # We could also check all other operators and terminate the search - # early, e.g. if we got something like this "a(),;\[\]]*([<>(),;\[\]])(.*)$', line) - if match: - # Found an operator, update nesting stack - operator = match.group(1) - line = match.group(2) - - if nesting_stack[-1] == '<': - # Expecting closing angle bracket - if operator in ('<', '(', '['): - nesting_stack.append(operator) - elif operator == '>': - nesting_stack.pop() - if not nesting_stack: - # Found matching angle bracket - return True - elif operator == ',': - # Got a comma after a bracket, this is most likely a template - # argument. We have not seen a closing angle bracket yet, but - # it's probably a few lines later if we look for it, so just - # return early here. - return True - else: - # Got some other operator. - return False - - else: - # Expecting closing parenthesis or closing bracket - if operator in ('<', '(', '['): - nesting_stack.append(operator) - elif operator in (')', ']'): - # We don't bother checking for matching () or []. If we got - # something like (] or [), it would have been a syntax error. - nesting_stack.pop() - - else: - # Scan the next line - linenum += 1 - if linenum >= len(clean_lines.elided): - break - line = clean_lines.elided[linenum] - - # Exhausted all remaining lines and still no matching angle bracket. - # Most likely the input was incomplete, otherwise we should have - # seen a semicolon and returned early. - return True - - -def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix): - """Find the corresponding < that started a template. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: Current line number. - init_prefix: Part of the current line before the initial >. - - Returns: - True if a matching bracket exists. - """ - line = init_prefix - nesting_stack = ['>'] - while True: - # Find the previous operator - match = Search(r'^(.*)([<>(),;\[\]])[^<>(),;\[\]]*$', line) - if match: - # Found an operator, update nesting stack - operator = match.group(2) - line = match.group(1) - - if nesting_stack[-1] == '>': - # Expecting opening angle bracket - if operator in ('>', ')', ']'): - nesting_stack.append(operator) - elif operator == '<': - nesting_stack.pop() - if not nesting_stack: - # Found matching angle bracket - return True - elif operator == ',': - # Got a comma before a bracket, this is most likely a - # template argument. The opening angle bracket is probably - # there if we look for it, so just return early here. - return True - else: - # Got some other operator. - return False - - else: - # Expecting opening parenthesis or opening bracket - if operator in ('>', ')', ']'): - nesting_stack.append(operator) - elif operator in ('(', '['): - nesting_stack.pop() - - else: - # Scan the previous line - linenum -= 1 - if linenum < 0: - break - line = clean_lines.elided[linenum] - - # Exhausted all earlier lines and still no matching angle bracket. - return False - - -def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): - """Checks for the correctness of various spacing issues in the code. - - Things we check for: spaces around operators, spaces after - if/for/while/switch, no spaces around parens in function calls, two - spaces between code and comment, don't start a block with a blank - line, don't end a function with a blank line, don't add a blank line - after public/protected/private, don't have too many blank lines in a row. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A _NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - - # Don't use "elided" lines here, otherwise we can't check commented lines. - # Don't want to use "raw" either, because we don't want to check inside C++11 - # raw strings, - raw = clean_lines.lines_without_raw_strings - line = raw[linenum] - - # Before nixing comments, check if the line is blank for no good - # reason. This includes the first line after a block is opened, and - # blank lines at the end of a function (ie, right before a line like '}' - # - # Skip all the blank line checks if we are immediately inside a - # namespace body. In other words, don't issue blank line warnings - # for this block: - # namespace { - # - # } - # - # A warning about missing end of namespace comments will be issued instead. - if IsBlankLine(line) and not nesting_state.InNamespaceBody(): - elided = clean_lines.elided - prev_line = elided[linenum - 1] - prevbrace = prev_line.rfind('{') - # TODO(unknown): Don't complain if line before blank line, and line after, - # both start with alnums and are indented the same amount. - # This ignores whitespace at the start of a namespace block - # because those are not usually indented. - if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1: - # OK, we have a blank line at the start of a code block. Before we - # complain, we check if it is an exception to the rule: The previous - # non-empty line has the parameters of a function header that are indented - # 4 spaces (because they did not fit in a 80 column line when placed on - # the same line as the function name). We also check for the case where - # the previous line is indented 6 spaces, which may happen when the - # initializers of a constructor do not fit into a 80 column line. - exception = False - if Match(r' {6}\w', prev_line): # Initializer list? - # We are looking for the opening column of initializer list, which - # should be indented 4 spaces to cause 6 space indentation afterwards. - search_position = linenum-2 - while (search_position >= 0 - and Match(r' {6}\w', elided[search_position])): - search_position -= 1 - exception = (search_position >= 0 - and elided[search_position][:5] == ' :') - else: - # Search for the function arguments or an initializer list. We use a - # simple heuristic here: If the line is indented 4 spaces; and we have a - # closing paren, without the opening paren, followed by an opening brace - # or colon (for initializer lists) we assume that it is the last line of - # a function header. If we have a colon indented 4 spaces, it is an - # initializer list. - exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', - prev_line) - or Match(r' {4}:', prev_line)) - - if not exception: - error(filename, linenum, 'whitespace/blank_line', 2, - 'Redundant blank line at the start of a code block ' - 'should be deleted.') - # Ignore blank lines at the end of a block in a long if-else - # chain, like this: - # if (condition1) { - # // Something followed by a blank line - # - # } else if (condition2) { - # // Something else - # } - if linenum + 1 < clean_lines.NumLines(): - next_line = raw[linenum + 1] - if (next_line - and Match(r'\s*}', next_line) - and next_line.find('} else ') == -1): - error(filename, linenum, 'whitespace/blank_line', 3, - 'Redundant blank line at the end of a code block ' - 'should be deleted.') - - matched = Match(r'\s*(public|protected|private):', prev_line) - if matched: - error(filename, linenum, 'whitespace/blank_line', 3, - 'Do not leave a blank line after "%s:"' % matched.group(1)) - - # Next, we complain if there's a comment too near the text - commentpos = line.find('//') - if commentpos != -1: - # Check if the // may be in quotes. If so, ignore it - # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison - if (line.count('"', 0, commentpos) - - line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes - # Allow one space for new scopes, two spaces otherwise: - if (not Match(r'^\s*{ //', line) and - ((commentpos >= 1 and - line[commentpos-1] not in string.whitespace) or - (commentpos >= 2 and - line[commentpos-2] not in string.whitespace))): - error(filename, linenum, 'whitespace/comments', 2, - 'At least two spaces is best between code and comments') - # There should always be a space between the // and the comment - commentend = commentpos + 2 - if commentend < len(line) and not line[commentend] == ' ': - # but some lines are exceptions -- e.g. if they're big - # comment delimiters like: - # //---------------------------------------------------------- - # or are an empty C++ style Doxygen comment, like: - # /// - # or C++ style Doxygen comments placed after the variable: - # ///< Header comment - # //!< Header comment - # or they begin with multiple slashes followed by a space: - # //////// Header comment - match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or - Search(r'^/$', line[commentend:]) or - Search(r'^!< ', line[commentend:]) or - Search(r'^/< ', line[commentend:]) or - Search(r'^/+ ', line[commentend:])) - if not match: - error(filename, linenum, 'whitespace/comments', 4, - 'Should have a space between // and comment') - CheckComment(line[commentpos:], filename, linenum, error) - - line = clean_lines.elided[linenum] # get rid of comments and strings - - # Don't try to do spacing checks for operator methods - line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line) - - # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". - # Otherwise not. Note we only check for non-spaces on *both* sides; - # sometimes people put non-spaces on one side when aligning ='s among - # many lines (not that this is behavior that I approve of...) - if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line): - error(filename, linenum, 'whitespace/operators', 4, - 'Missing spaces around =') - - # It's ok not to have spaces around binary operators like + - * /, but if - # there's too little whitespace, we get concerned. It's hard to tell, - # though, so we punt on this one for now. TODO. - - # You should always have whitespace around binary operators. - # - # Check <= and >= first to avoid false positives with < and >, then - # check non-include lines for spacing around < and >. - match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line) - if match: - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around %s' % match.group(1)) - # We allow no-spaces around << when used like this: 10<<20, but - # not otherwise (particularly, not when used as streams) - # Also ignore using ns::operator<<; - match = Search(r'(operator|\S)(?:L|UL|ULL|l|ul|ull)?<<(\S)', line) - if (match and - not (match.group(1).isdigit() and match.group(2).isdigit()) and - not (match.group(1) == 'operator' and match.group(2) == ';')): - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around <<') - elif not Match(r'#.*include', line): - # Avoid false positives on -> - reduced_line = line.replace('->', '') - - # Look for < that is not surrounded by spaces. This is only - # triggered if both sides are missing spaces, even though - # technically should should flag if at least one side is missing a - # space. This is done to avoid some false positives with shifts. - match = Search(r'[^\s<]<([^\s=<].*)', reduced_line) - if (match and - not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))): - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around <') - - # Look for > that is not surrounded by spaces. Similar to the - # above, we only trigger if both sides are missing spaces to avoid - # false positives with shifts. - match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line) - if (match and - not FindPreviousMatchingAngleBracket(clean_lines, linenum, - match.group(1))): - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around >') - - # We allow no-spaces around >> for almost anything. This is because - # C++11 allows ">>" to close nested templates, which accounts for - # most cases when ">>" is not followed by a space. - # - # We still warn on ">>" followed by alpha character, because that is - # likely due to ">>" being used for right shifts, e.g.: - # value >> alpha - # - # When ">>" is used to close templates, the alphanumeric letter that - # follows would be part of an identifier, and there should still be - # a space separating the template type and the identifier. - # type> alpha - match = Search(r'>>[a-zA-Z_]', line) - if match: - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around >>') - - # There shouldn't be space around unary operators - match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) - if match: - error(filename, linenum, 'whitespace/operators', 4, - 'Extra space for operator %s' % match.group(1)) - - # A pet peeve of mine: no spaces after an if, while, switch, or for - match = Search(r' (if\(|for\(|while\(|switch\()', line) - if match: - error(filename, linenum, 'whitespace/parens', 5, - 'Missing space before ( in %s' % match.group(1)) - - # For if/for/while/switch, the left and right parens should be - # consistent about how many spaces are inside the parens, and - # there should either be zero or one spaces inside the parens. - # We don't want: "if ( foo)" or "if ( foo )". - # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. - match = Search(r'\b(if|for|while|switch)\s*' - r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$', - line) - if match: - if len(match.group(2)) != len(match.group(4)): - if not (match.group(3) == ';' and - len(match.group(2)) == 1 + len(match.group(4)) or - not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)): - error(filename, linenum, 'whitespace/parens', 5, - 'Mismatching spaces inside () in %s' % match.group(1)) - if len(match.group(2)) not in [0, 1]: - error(filename, linenum, 'whitespace/parens', 5, - 'Should have zero or one spaces inside ( and ) in %s' % - match.group(1)) - - # You should always have a space after a comma (either as fn arg or operator) - # - # This does not apply when the non-space character following the - # comma is another comma, since the only time when that happens is - # for empty macro arguments. - # - # We run this check in two passes: first pass on elided lines to - # verify that lines contain missing whitespaces, second pass on raw - # lines to confirm that those missing whitespaces are not due to - # elided comments. - if Search(r',[^,\s]', line) and Search(r',[^,\s]', raw[linenum]): - error(filename, linenum, 'whitespace/comma', 3, - 'Missing space after ,') - - # You should always have a space after a semicolon - # except for few corner cases - # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more - # space after ; - if Search(r';[^\s};\\)/]', line): - error(filename, linenum, 'whitespace/semicolon', 3, - 'Missing space after ;') - - # Next we will look for issues with function calls. - CheckSpacingForFunctionCall(filename, line, linenum, error) - - # Except after an opening paren, or after another opening brace (in case of - # an initializer list, for instance), you should have spaces before your - # braces. And since you should never have braces at the beginning of a line, - # this is an easy test. - match = Match(r'^(.*[^ ({]){', line) - if match: - # Try a bit harder to check for brace initialization. This - # happens in one of the following forms: - # Constructor() : initializer_list_{} { ... } - # Constructor{}.MemberFunction() - # Type variable{}; - # FunctionCall(type{}, ...); - # LastArgument(..., type{}); - # LOG(INFO) << type{} << " ..."; - # map_of_type[{...}] = ...; - # - # We check for the character following the closing brace, and - # silence the warning if it's one of those listed above, i.e. - # "{.;,)<]". - # - # To account for nested initializer list, we allow any number of - # closing braces up to "{;,)<". We can't simply silence the - # warning on first sight of closing brace, because that would - # cause false negatives for things that are not initializer lists. - # Silence this: But not this: - # Outer{ if (...) { - # Inner{...} if (...){ // Missing space before { - # }; } - # - # There is a false negative with this approach if people inserted - # spurious semicolons, e.g. "if (cond){};", but we will catch the - # spurious semicolon with a separate check. - (endline, endlinenum, endpos) = CloseExpression( - clean_lines, linenum, len(match.group(1))) - trailing_text = '' - if endpos > -1: - trailing_text = endline[endpos:] - for offset in xrange(endlinenum + 1, - min(endlinenum + 3, clean_lines.NumLines() - 1)): - trailing_text += clean_lines.elided[offset] - if not Match(r'^[\s}]*[{.;,)<\]]', trailing_text): - error(filename, linenum, 'whitespace/braces', 5, - 'Missing space before {') - - # Make sure '} else {' has spaces. - if Search(r'}else', line): - error(filename, linenum, 'whitespace/braces', 5, - 'Missing space before else') - - # You shouldn't have spaces before your brackets, except maybe after - # 'delete []' or 'new char * []'. - if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line): - error(filename, linenum, 'whitespace/braces', 5, - 'Extra space before [') - - # You shouldn't have a space before a semicolon at the end of the line. - # There's a special case for "for" since the style guide allows space before - # the semicolon there. - if Search(r':\s*;\s*$', line): - error(filename, linenum, 'whitespace/semicolon', 5, - 'Semicolon defining empty statement. Use {} instead.') - elif Search(r'^\s*;\s*$', line): - error(filename, linenum, 'whitespace/semicolon', 5, - 'Line contains only semicolon. If this should be an empty statement, ' - 'use {} instead.') - elif (Search(r'\s+;\s*$', line) and - not Search(r'\bfor\b', line)): - error(filename, linenum, 'whitespace/semicolon', 5, - 'Extra space before last semicolon. If this should be an empty ' - 'statement, use {} instead.') - - # In range-based for, we wanted spaces before and after the colon, but - # not around "::" tokens that might appear. - if (Search('for *\(.*[^:]:[^: ]', line) or - Search('for *\(.*[^: ]:[^:]', line)): - error(filename, linenum, 'whitespace/forcolon', 2, - 'Missing space around colon in range-based for loop') - - -def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): - """Checks for additional blank line issues related to sections. - - Currently the only thing checked here is blank line before protected/private. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - class_info: A _ClassInfo objects. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - # Skip checks if the class is small, where small means 25 lines or less. - # 25 lines seems like a good cutoff since that's the usual height of - # terminals, and any class that can't fit in one screen can't really - # be considered "small". - # - # Also skip checks if we are on the first line. This accounts for - # classes that look like - # class Foo { public: ... }; - # - # If we didn't find the end of the class, last_line would be zero, - # and the check will be skipped by the first condition. - if (class_info.last_line - class_info.starting_linenum <= 24 or - linenum <= class_info.starting_linenum): - return - - matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum]) - if matched: - # Issue warning if the line before public/protected/private was - # not a blank line, but don't do this if the previous line contains - # "class" or "struct". This can happen two ways: - # - We are at the beginning of the class. - # - We are forward-declaring an inner class that is semantically - # private, but needed to be public for implementation reasons. - # Also ignores cases where the previous line ends with a backslash as can be - # common when defining classes in C macros. - prev_line = clean_lines.lines[linenum - 1] - if (not IsBlankLine(prev_line) and - not Search(r'\b(class|struct)\b', prev_line) and - not Search(r'\\$', prev_line)): - # Try a bit harder to find the beginning of the class. This is to - # account for multi-line base-specifier lists, e.g.: - # class Derived - # : public Base { - end_class_head = class_info.starting_linenum - for i in range(class_info.starting_linenum, linenum): - if Search(r'\{\s*$', clean_lines.lines[i]): - end_class_head = i - break - if end_class_head < linenum - 1: - error(filename, linenum, 'whitespace/blank_line', 3, - '"%s:" should be preceded by a blank line' % matched.group(1)) - - -def GetPreviousNonBlankLine(clean_lines, linenum): - """Return the most recent non-blank line and its line number. - - Args: - clean_lines: A CleansedLines instance containing the file contents. - linenum: The number of the line to check. - - Returns: - A tuple with two elements. The first element is the contents of the last - non-blank line before the current line, or the empty string if this is the - first non-blank line. The second is the line number of that line, or -1 - if this is the first non-blank line. - """ - - prevlinenum = linenum - 1 - while prevlinenum >= 0: - prevline = clean_lines.elided[prevlinenum] - if not IsBlankLine(prevline): # if not a blank line... - return (prevline, prevlinenum) - prevlinenum -= 1 - return ('', -1) - - -def CheckBraces(filename, clean_lines, linenum, error): - """Looks for misplaced braces (e.g. at the end of line). - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - line = clean_lines.elided[linenum] # get rid of comments and strings - - if Match(r'\s*{\s*$', line): - # We allow an open brace to start a line in the case where someone is using - # braces in a block to explicitly create a new scope, which is commonly used - # to control the lifetime of stack-allocated variables. Braces are also - # used for brace initializers inside function calls. We don't detect this - # perfectly: we just don't complain if the last non-whitespace character on - # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the - # previous line starts a preprocessor block. - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if (not Search(r'[,;:}{(]\s*$', prevline) and - not Match(r'\s*#', prevline)): - error(filename, linenum, 'whitespace/braces', 4, - '{ should almost always be at the end of the previous line') - - # An else clause should be on the same line as the preceding closing brace. - if Match(r'\s*else\s*', line): - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if Match(r'\s*}\s*$', prevline): - error(filename, linenum, 'whitespace/newline', 4, - 'An else should appear on the same line as the preceding }') - - # If braces come on one side of an else, they should be on both. - # However, we have to worry about "else if" that spans multiple lines! - if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line): - if Search(r'}\s*else if([^{]*)$', line): # could be multi-line if - # find the ( after the if - pos = line.find('else if') - pos = line.find('(', pos) - if pos > 0: - (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos) - if endline[endpos:].find('{') == -1: # must be brace after if - error(filename, linenum, 'readability/braces', 5, - 'If an else has a brace on one side, it should have it on both') - else: # common case: else not followed by a multi-line if - error(filename, linenum, 'readability/braces', 5, - 'If an else has a brace on one side, it should have it on both') - - # Likewise, an else should never have the else clause on the same line - if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line): - error(filename, linenum, 'whitespace/newline', 4, - 'Else clause should never be on same line as else (use 2 lines)') - - # In the same way, a do/while should never be on one line - if Match(r'\s*do [^\s{]', line): - error(filename, linenum, 'whitespace/newline', 4, - 'do/while clauses should not be on a single line') - - # Block bodies should not be followed by a semicolon. Due to C++11 - # brace initialization, there are more places where semicolons are - # required than not, so we use a whitelist approach to check these - # rather than a blacklist. These are the places where "};" should - # be replaced by just "}": - # 1. Some flavor of block following closing parenthesis: - # for (;;) {}; - # while (...) {}; - # switch (...) {}; - # Function(...) {}; - # if (...) {}; - # if (...) else if (...) {}; - # - # 2. else block: - # if (...) else {}; - # - # 3. const member function: - # Function(...) const {}; - # - # 4. Block following some statement: - # x = 42; - # {}; - # - # 5. Block at the beginning of a function: - # Function(...) { - # {}; - # } - # - # Note that naively checking for the preceding "{" will also match - # braces inside multi-dimensional arrays, but this is fine since - # that expression will not contain semicolons. - # - # 6. Block following another block: - # while (true) {} - # {}; - # - # 7. End of namespaces: - # namespace {}; - # - # These semicolons seems far more common than other kinds of - # redundant semicolons, possibly due to people converting classes - # to namespaces. For now we do not warn for this case. - # - # Try matching case 1 first. - match = Match(r'^(.*\)\s*)\{', line) - if match: - # Matched closing parenthesis (case 1). Check the token before the - # matching opening parenthesis, and don't warn if it looks like a - # macro. This avoids these false positives: - # - macro that defines a base class - # - multi-line macro that defines a base class - # - macro that defines the whole class-head - # - # But we still issue warnings for macros that we know are safe to - # warn, specifically: - # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P - # - TYPED_TEST - # - INTERFACE_DEF - # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED: - # - # We implement a whitelist of safe macros instead of a blacklist of - # unsafe macros, even though the latter appears less frequently in - # google code and would have been easier to implement. This is because - # the downside for getting the whitelist wrong means some extra - # semicolons, while the downside for getting the blacklist wrong - # would result in compile errors. - # - # In addition to macros, we also don't want to warn on compound - # literals. - closing_brace_pos = match.group(1).rfind(')') - opening_parenthesis = ReverseCloseExpression( - clean_lines, linenum, closing_brace_pos) - if opening_parenthesis[2] > -1: - line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]] - macro = Search(r'\b([A-Z_]+)\s*$', line_prefix) - if ((macro and - macro.group(1) not in ( - 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST', - 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED', - 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or - Search(r'\s+=\s*$', line_prefix)): - match = None - - else: - # Try matching cases 2-3. - match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line) - if not match: - # Try matching cases 4-6. These are always matched on separate lines. - # - # Note that we can't simply concatenate the previous line to the - # current line and do a single match, otherwise we may output - # duplicate warnings for the blank line case: - # if (cond) { - # // blank line - # } - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if prevline and Search(r'[;{}]\s*$', prevline): - match = Match(r'^(\s*)\{', line) - - # Check matching closing brace - if match: - (endline, endlinenum, endpos) = CloseExpression( - clean_lines, linenum, len(match.group(1))) - if endpos > -1 and Match(r'^\s*;', endline[endpos:]): - # Current {} pair is eligible for semicolon check, and we have found - # the redundant semicolon, output warning here. - # - # Note: because we are scanning forward for opening braces, and - # outputting warnings for the matching closing brace, if there are - # nested blocks with trailing semicolons, we will get the error - # messages in reversed order. - error(filename, endlinenum, 'readability/braces', 4, - "You don't need a ; after a }") - - -def CheckEmptyBlockBody(filename, clean_lines, linenum, error): - """Look for empty loop/conditional body with only a single semicolon. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - # Search for loop keywords at the beginning of the line. Because only - # whitespaces are allowed before the keywords, this will also ignore most - # do-while-loops, since those lines should start with closing brace. - # - # We also check "if" blocks here, since an empty conditional block - # is likely an error. - line = clean_lines.elided[linenum] - matched = Match(r'\s*(for|while|if)\s*\(', line) - if matched: - # Find the end of the conditional expression - (end_line, end_linenum, end_pos) = CloseExpression( - clean_lines, linenum, line.find('(')) - - # Output warning if what follows the condition expression is a semicolon. - # No warning for all other cases, including whitespace or newline, since we - # have a separate check for semicolons preceded by whitespace. - if end_pos >= 0 and Match(r';', end_line[end_pos:]): - if matched.group(1) == 'if': - error(filename, end_linenum, 'whitespace/empty_conditional_body', 5, - 'Empty conditional bodies should use {}') - else: - error(filename, end_linenum, 'whitespace/empty_loop_body', 5, - 'Empty loop bodies should use {} or continue') - - -def CheckCheck(filename, clean_lines, linenum, error): - """Checks the use of CHECK and EXPECT macros. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - # Decide the set of replacement macros that should be suggested - lines = clean_lines.elided - check_macro = None - start_pos = -1 - for macro in _CHECK_MACROS: - i = lines[linenum].find(macro) - if i >= 0: - check_macro = macro - - # Find opening parenthesis. Do a regular expression match here - # to make sure that we are matching the expected CHECK macro, as - # opposed to some other macro that happens to contain the CHECK - # substring. - matched = Match(r'^(.*\b' + check_macro + r'\s*)\(', lines[linenum]) - if not matched: - continue - start_pos = len(matched.group(1)) - break - if not check_macro or start_pos < 0: - # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT' - return - - # Find end of the boolean expression by matching parentheses - (last_line, end_line, end_pos) = CloseExpression( - clean_lines, linenum, start_pos) - if end_pos < 0: - return - if linenum == end_line: - expression = lines[linenum][start_pos + 1:end_pos - 1] - else: - expression = lines[linenum][start_pos + 1:] - for i in xrange(linenum + 1, end_line): - expression += lines[i] - expression += last_line[0:end_pos - 1] - - # Parse expression so that we can take parentheses into account. - # This avoids false positives for inputs like "CHECK((a < 4) == b)", - # which is not replaceable by CHECK_LE. - lhs = '' - rhs = '' - operator = None - while expression: - matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||' - r'==|!=|>=|>|<=|<|\()(.*)$', expression) - if matched: - token = matched.group(1) - if token == '(': - # Parenthesized operand - expression = matched.group(2) - (end, _) = FindEndOfExpressionInLine(expression, 0, 1, '(', ')') - if end < 0: - return # Unmatched parenthesis - lhs += '(' + expression[0:end] - expression = expression[end:] - elif token in ('&&', '||'): - # Logical and/or operators. This means the expression - # contains more than one term, for example: - # CHECK(42 < a && a < b); - # - # These are not replaceable with CHECK_LE, so bail out early. - return - elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'): - # Non-relational operator - lhs += token - expression = matched.group(2) - else: - # Relational operator - operator = token - rhs = matched.group(2) - break - else: - # Unparenthesized operand. Instead of appending to lhs one character - # at a time, we do another regular expression match to consume several - # characters at once if possible. Trivial benchmark shows that this - # is more efficient when the operands are longer than a single - # character, which is generally the case. - matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression) - if not matched: - matched = Match(r'^(\s*\S)(.*)$', expression) - if not matched: - break - lhs += matched.group(1) - expression = matched.group(2) - - # Only apply checks if we got all parts of the boolean expression - if not (lhs and operator and rhs): - return - - # Check that rhs do not contain logical operators. We already know - # that lhs is fine since the loop above parses out && and ||. - if rhs.find('&&') > -1 or rhs.find('||') > -1: - return - - # At least one of the operands must be a constant literal. This is - # to avoid suggesting replacements for unprintable things like - # CHECK(variable != iterator) - # - # The following pattern matches decimal, hex integers, strings, and - # characters (in that order). - lhs = lhs.strip() - rhs = rhs.strip() - match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$' - if Match(match_constant, lhs) or Match(match_constant, rhs): - # Note: since we know both lhs and rhs, we can provide a more - # descriptive error message like: - # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42) - # Instead of: - # Consider using CHECK_EQ instead of CHECK(a == b) - # - # We are still keeping the less descriptive message because if lhs - # or rhs gets long, the error message might become unreadable. - error(filename, linenum, 'readability/check', 2, - 'Consider using %s instead of %s(a %s b)' % ( - _CHECK_REPLACEMENT[check_macro][operator], - check_macro, operator)) - - -def CheckAltTokens(filename, clean_lines, linenum, error): - """Check alternative keywords being used in boolean expressions. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Avoid preprocessor lines - if Match(r'^\s*#', line): - return - - # Last ditch effort to avoid multi-line comments. This will not help - # if the comment started before the current line or ended after the - # current line, but it catches most of the false positives. At least, - # it provides a way to workaround this warning for people who use - # multi-line comments in preprocessor macros. - # - # TODO(unknown): remove this once cpplint has better support for - # multi-line comments. - if line.find('/*') >= 0 or line.find('*/') >= 0: - return - - for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): - error(filename, linenum, 'readability/alt_tokens', 2, - 'Use operator %s instead of %s' % ( - _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1))) - - -def GetLineWidth(line): - """Determines the width of the line in column positions. - - Args: - line: A string, which may be a Unicode string. - - Returns: - The width of the line in column positions, accounting for Unicode - combining characters and wide characters. - """ - if isinstance(line, unicode): - width = 0 - for uc in unicodedata.normalize('NFC', line): - if unicodedata.east_asian_width(uc) in ('W', 'F'): - width += 2 - elif not unicodedata.combining(uc): - width += 1 - return width - else: - return len(line) - - -def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, - error): - """Checks rules from the 'C++ style rules' section of cppguide.html. - - Most of these rules are hard to test (naming, comment style), but we - do what we can. In particular we check for 2-space indents, line lengths, - tab usage, spaces inside code, etc. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - file_extension: The extension (without the dot) of the filename. - nesting_state: A _NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - - # Don't use "elided" lines here, otherwise we can't check commented lines. - # Don't want to use "raw" either, because we don't want to check inside C++11 - # raw strings, - raw_lines = clean_lines.lines_without_raw_strings - line = raw_lines[linenum] - - if line.find('\t') != -1: - error(filename, linenum, 'whitespace/tab', 1, - 'Tab found; better to use spaces') - - # One or three blank spaces at the beginning of the line is weird; it's - # hard to reconcile that with 2-space indents. - # NOTE: here are the conditions rob pike used for his tests. Mine aren't - # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces - # if(RLENGTH > 20) complain = 0; - # if(match($0, " +(error|private|public|protected):")) complain = 0; - # if(match(prev, "&& *$")) complain = 0; - # if(match(prev, "\\|\\| *$")) complain = 0; - # if(match(prev, "[\",=><] *$")) complain = 0; - # if(match($0, " <<")) complain = 0; - # if(match(prev, " +for \\(")) complain = 0; - # if(prevodd && match(prevprev, " +for \\(")) complain = 0; - initial_spaces = 0 - cleansed_line = clean_lines.elided[linenum] - while initial_spaces < len(line) and line[initial_spaces] == ' ': - initial_spaces += 1 - if line and line[-1].isspace(): - error(filename, linenum, 'whitespace/end_of_line', 4, - 'Line ends in whitespace. Consider deleting these extra spaces.') - # There are certain situations we allow one space, notably for section labels - elif ((initial_spaces == 1 or initial_spaces == 3) and - not Match(r'\s*\w+\s*:\s*$', cleansed_line)): - error(filename, linenum, 'whitespace/indent', 3, - 'Weird number of spaces at line-start. ' - 'Are you using a 2-space indent?') - - # Check if the line is a header guard. - is_header_guard = False - if file_extension == 'h': - cppvar = GetHeaderGuardCPPVariable(filename) - if (line.startswith('#ifndef %s' % cppvar) or - line.startswith('#define %s' % cppvar) or - line.startswith('#endif // %s' % cppvar)): - is_header_guard = True - # #include lines and header guards can be long, since there's no clean way to - # split them. - # - # URLs can be long too. It's possible to split these, but it makes them - # harder to cut&paste. - # - # The "$Id:...$" comment may also get very long without it being the - # developers fault. - if (not line.startswith('#include') and not is_header_guard and - not Match(r'^\s*//.*http(s?)://\S*$', line) and - not Match(r'^// \$Id:.*#[0-9]+ \$$', line)): - line_width = GetLineWidth(line) - extended_length = int((_line_length * 1.25)) - if line_width > extended_length: - error(filename, linenum, 'whitespace/line_length', 4, - 'Lines should very rarely be longer than %i characters' % - extended_length) - elif line_width > _line_length: - error(filename, linenum, 'whitespace/line_length', 2, - 'Lines should be <= %i characters long' % _line_length) - - if (cleansed_line.count(';') > 1 and - # for loops are allowed two ;'s (and may run over two lines). - cleansed_line.find('for') == -1 and - (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or - GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and - # It's ok to have many commands in a switch case that fits in 1 line - not ((cleansed_line.find('case ') != -1 or - cleansed_line.find('default:') != -1) and - cleansed_line.find('break;') != -1)): - error(filename, linenum, 'whitespace/newline', 0, - 'More than one command on the same line') - - # Some more style checks - CheckBraces(filename, clean_lines, linenum, error) - CheckEmptyBlockBody(filename, clean_lines, linenum, error) - CheckAccess(filename, clean_lines, linenum, nesting_state, error) - CheckSpacing(filename, clean_lines, linenum, nesting_state, error) - CheckCheck(filename, clean_lines, linenum, error) - CheckAltTokens(filename, clean_lines, linenum, error) - classinfo = nesting_state.InnermostClass() - if classinfo: - CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) - - -_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"') -_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') -# Matches the first component of a filename delimited by -s and _s. That is: -# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' -# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo' -# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo' -# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo' -_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') - - -def _DropCommonSuffixes(filename): - """Drops common suffixes like _test.cc or -inl.h from filename. - - For example: - >>> _DropCommonSuffixes('foo/foo-inl.h') - 'foo/foo' - >>> _DropCommonSuffixes('foo/bar/foo.cc') - 'foo/bar/foo' - >>> _DropCommonSuffixes('foo/foo_internal.h') - 'foo/foo' - >>> _DropCommonSuffixes('foo/foo_unusualinternal.h') - 'foo/foo_unusualinternal' - - Args: - filename: The input filename. - - Returns: - The filename with the common suffix removed. - """ - for suffix in ('test.cc', 'regtest.cc', 'unittest.cc', - 'inl.h', 'impl.h', 'internal.h'): - if (filename.endswith(suffix) and len(filename) > len(suffix) and - filename[-len(suffix) - 1] in ('-', '_')): - return filename[:-len(suffix) - 1] - return os.path.splitext(filename)[0] - - -def _IsTestFilename(filename): - """Determines if the given filename has a suffix that identifies it as a test. - - Args: - filename: The input filename. - - Returns: - True if 'filename' looks like a test, False otherwise. - """ - if (filename.endswith('_test.cc') or - filename.endswith('_unittest.cc') or - filename.endswith('_regtest.cc')): - return True - else: - return False - - -def _ClassifyInclude(fileinfo, include, is_system): - """Figures out what kind of header 'include' is. - - Args: - fileinfo: The current file cpplint is running over. A FileInfo instance. - include: The path to a #included file. - is_system: True if the #include used <> rather than "". - - Returns: - One of the _XXX_HEADER constants. - - For example: - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True) - _C_SYS_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True) - _CPP_SYS_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False) - _LIKELY_MY_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'), - ... 'bar/foo_other_ext.h', False) - _POSSIBLE_MY_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False) - _OTHER_HEADER - """ - # This is a list of all standard c++ header files, except - # those already checked for above. - is_cpp_h = include in _CPP_HEADERS - - if is_system: - if is_cpp_h: - return _CPP_SYS_HEADER - else: - return _C_SYS_HEADER - - # If the target file and the include we're checking share a - # basename when we drop common extensions, and the include - # lives in . , then it's likely to be owned by the target file. - target_dir, target_base = ( - os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName()))) - include_dir, include_base = os.path.split(_DropCommonSuffixes(include)) - if target_base == include_base and ( - include_dir == target_dir or - include_dir == os.path.normpath(target_dir + '/../public')): - return _LIKELY_MY_HEADER - - # If the target and include share some initial basename - # component, it's possible the target is implementing the - # include, so it's allowed to be first, but we'll never - # complain if it's not there. - target_first_component = _RE_FIRST_COMPONENT.match(target_base) - include_first_component = _RE_FIRST_COMPONENT.match(include_base) - if (target_first_component and include_first_component and - target_first_component.group(0) == - include_first_component.group(0)): - return _POSSIBLE_MY_HEADER - - return _OTHER_HEADER - - - -def CheckIncludeLine(filename, clean_lines, linenum, include_state, error): - """Check rules that are applicable to #include lines. - - Strings on #include lines are NOT removed from elided line, to make - certain tasks easier. However, to prevent false positives, checks - applicable to #include lines in CheckLanguage must be put here. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - include_state: An _IncludeState instance in which the headers are inserted. - error: The function to call with any errors found. - """ - fileinfo = FileInfo(filename) - - line = clean_lines.lines[linenum] - - # "include" should use the new style "foo/bar.h" instead of just "bar.h" - if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line): - error(filename, linenum, 'build/include', 4, - 'Include the directory when naming .h files') - - # we shouldn't include a file more than once. actually, there are a - # handful of instances where doing so is okay, but in general it's - # not. - match = _RE_PATTERN_INCLUDE.search(line) - if match: - include = match.group(2) - is_system = (match.group(1) == '<') - if include in include_state: - error(filename, linenum, 'build/include', 4, - '"%s" already included at %s:%s' % - (include, filename, include_state[include])) - else: - include_state[include] = linenum - - # We want to ensure that headers appear in the right order: - # 1) for foo.cc, foo.h (preferred location) - # 2) c system files - # 3) cpp system files - # 4) for foo.cc, foo.h (deprecated location) - # 5) other google headers - # - # We classify each include statement as one of those 5 types - # using a number of techniques. The include_state object keeps - # track of the highest type seen, and complains if we see a - # lower type after that. - error_message = include_state.CheckNextIncludeOrder( - _ClassifyInclude(fileinfo, include, is_system)) - if error_message: - error(filename, linenum, 'build/include_order', 4, - '%s. Should be: %s.h, c system, c++ system, other.' % - (error_message, fileinfo.BaseName())) - canonical_include = include_state.CanonicalizeAlphabeticalOrder(include) - if not include_state.IsInAlphabeticalOrder( - clean_lines, linenum, canonical_include): - error(filename, linenum, 'build/include_alpha', 4, - 'Include "%s" not in alphabetical order' % include) - include_state.SetLastHeader(canonical_include) - - # Look for any of the stream classes that are part of standard C++. - match = _RE_PATTERN_INCLUDE.match(line) - if match: - include = match.group(2) - if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include): - # Many unit tests use cout, so we exempt them. - if not _IsTestFilename(filename): - error(filename, linenum, 'readability/streams', 3, - 'Streams are highly discouraged.') - - -def _GetTextInside(text, start_pattern): - r"""Retrieves all the text between matching open and close parentheses. - - Given a string of lines and a regular expression string, retrieve all the text - following the expression and between opening punctuation symbols like - (, [, or {, and the matching close-punctuation symbol. This properly nested - occurrences of the punctuations, so for the text like - printf(a(), b(c())); - a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'. - start_pattern must match string having an open punctuation symbol at the end. - - Args: - text: The lines to extract text. Its comments and strings must be elided. - It can be single line and can span multiple lines. - start_pattern: The regexp string indicating where to start extracting - the text. - Returns: - The extracted text. - None if either the opening string or ending punctuation could not be found. - """ - # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably - # rewritten to use _GetTextInside (and use inferior regexp matching today). - - # Give opening punctuations to get the matching close-punctuations. - matching_punctuation = {'(': ')', '{': '}', '[': ']'} - closing_punctuation = set(matching_punctuation.itervalues()) - - # Find the position to start extracting text. - match = re.search(start_pattern, text, re.M) - if not match: # start_pattern not found in text. - return None - start_position = match.end(0) - - assert start_position > 0, ( - 'start_pattern must ends with an opening punctuation.') - assert text[start_position - 1] in matching_punctuation, ( - 'start_pattern must ends with an opening punctuation.') - # Stack of closing punctuations we expect to have in text after position. - punctuation_stack = [matching_punctuation[text[start_position - 1]]] - position = start_position - while punctuation_stack and position < len(text): - if text[position] == punctuation_stack[-1]: - punctuation_stack.pop() - elif text[position] in closing_punctuation: - # A closing punctuation without matching opening punctuations. - return None - elif text[position] in matching_punctuation: - punctuation_stack.append(matching_punctuation[text[position]]) - position += 1 - if punctuation_stack: - # Opening punctuations left without matching close-punctuations. - return None - # punctuations match. - return text[start_position:position - 1] - - -# Patterns for matching call-by-reference parameters. -# -# Supports nested templates up to 2 levels deep using this messy pattern: -# < (?: < (?: < [^<>]* -# > -# | [^<>] )* -# > -# | [^<>] )* -# > -_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]* -_RE_PATTERN_TYPE = ( - r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?' - r'(?:\w|' - r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|' - r'::)+') -# A call-by-reference parameter ends with '& identifier'. -_RE_PATTERN_REF_PARAM = re.compile( - r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*' - r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]') -# A call-by-const-reference parameter either ends with 'const& identifier' -# or looks like 'const type& identifier' when 'type' is atomic. -_RE_PATTERN_CONST_REF_PARAM = ( - r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT + - r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')') - - -def CheckLanguage(filename, clean_lines, linenum, file_extension, - include_state, nesting_state, error): - """Checks rules from the 'C++ language rules' section of cppguide.html. - - Some of these rules are hard to test (function overloading, using - uint32 inappropriately), but we do the best we can. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - file_extension: The extension (without the dot) of the filename. - include_state: An _IncludeState instance in which the headers are inserted. - nesting_state: A _NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - # If the line is empty or consists of entirely a comment, no need to - # check it. - line = clean_lines.elided[linenum] - if not line: - return - - match = _RE_PATTERN_INCLUDE.search(line) - if match: - CheckIncludeLine(filename, clean_lines, linenum, include_state, error) - return - - # Reset include state across preprocessor directives. This is meant - # to silence warnings for conditional includes. - if Match(r'^\s*#\s*(?:ifdef|elif|else|endif)\b', line): - include_state.ResetSection() - - # Make Windows paths like Unix. - fullname = os.path.abspath(filename).replace('\\', '/') - - # TODO(unknown): figure out if they're using default arguments in fn proto. - - # Check to see if they're using an conversion function cast. - # I just try to capture the most common basic types, though there are more. - # Parameterless conversion functions, such as bool(), are allowed as they are - # probably a member operator declaration or default constructor. - match = Search( - r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there - r'(int|float|double|bool|char|int32|uint32|int64|uint64)' - r'(\([^)].*)', line) - if match: - matched_new = match.group(1) - matched_type = match.group(2) - matched_funcptr = match.group(3) - - # gMock methods are defined using some variant of MOCK_METHODx(name, type) - # where type may be float(), int(string), etc. Without context they are - # virtually indistinguishable from int(x) casts. Likewise, gMock's - # MockCallback takes a template parameter of the form return_type(arg_type), - # which looks much like the cast we're trying to detect. - # - # std::function<> wrapper has a similar problem. - # - # Return types for function pointers also look like casts if they - # don't have an extra space. - if (matched_new is None and # If new operator, then this isn't a cast - not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or - Search(r'\bMockCallback<.*>', line) or - Search(r'\bstd::function<.*>', line)) and - not (matched_funcptr and - Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(', - matched_funcptr))): - # Try a bit harder to catch gmock lines: the only place where - # something looks like an old-style cast is where we declare the - # return type of the mocked method, and the only time when we - # are missing context is if MOCK_METHOD was split across - # multiple lines. The missing MOCK_METHOD is usually one or two - # lines back, so scan back one or two lines. - # - # It's not possible for gmock macros to appear in the first 2 - # lines, since the class head + section name takes up 2 lines. - if (linenum < 2 or - not (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$', - clean_lines.elided[linenum - 1]) or - Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$', - clean_lines.elided[linenum - 2]))): - error(filename, linenum, 'readability/casting', 4, - 'Using deprecated casting style. ' - 'Use static_cast<%s>(...) instead' % - matched_type) - - CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum], - 'static_cast', - r'\((int|float|double|bool|char|u?int(16|32|64))\)', error) - - # This doesn't catch all cases. Consider (const char * const)"hello". - # - # (char *) "foo" should always be a const_cast (reinterpret_cast won't - # compile). - if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum], - 'const_cast', r'\((char\s?\*+\s?)\)\s*"', error): - pass - else: - # Check pointer casts for other than string constants - CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum], - 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error) - - # In addition, we look for people taking the address of a cast. This - # is dangerous -- casts can assign to temporaries, so the pointer doesn't - # point where you think. - match = Search( - r'(?:&\(([^)]+)\)[\w(])|' - r'(?:&(static|dynamic|down|reinterpret)_cast\b)', line) - if match and match.group(1) != '*': - error(filename, linenum, 'runtime/casting', 4, - ('Are you taking an address of a cast? ' - 'This is dangerous: could be a temp var. ' - 'Take the address before doing the cast, rather than after')) - - # Create an extended_line, which is the concatenation of the current and - # next lines, for more effective checking of code that may span more than one - # line. - if linenum + 1 < clean_lines.NumLines(): - extended_line = line + clean_lines.elided[linenum + 1] - else: - extended_line = line - - # Check for people declaring static/global STL strings at the top level. - # This is dangerous because the C++ language does not guarantee that - # globals with constructors are initialized before the first access. - match = Match( - r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)', - line) - # Make sure it's not a function. - # Function template specialization looks like: "string foo(...". - # Class template definitions look like: "string Foo::Method(...". - # - # Also ignore things that look like operators. These are matched separately - # because operator names cross non-word boundaries. If we change the pattern - # above, we would decrease the accuracy of matching identifiers. - if (match and - not Search(r'\boperator\W', line) and - not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)', match.group(3))): - error(filename, linenum, 'runtime/string', 4, - 'For a static/global string constant, use a C style string instead: ' - '"%schar %s[]".' % - (match.group(1), match.group(2))) - - if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line): - error(filename, linenum, 'runtime/init', 4, - 'You seem to be initializing a member variable with itself.') - - if file_extension == 'h': - # TODO(unknown): check that 1-arg constructors are explicit. - # How to tell it's a constructor? - # (handled in CheckForNonStandardConstructs for now) - # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS - # (level 1 error) - pass - - # Check if people are using the verboten C basic types. The only exception - # we regularly allow is "unsigned short port" for port. - if Search(r'\bshort port\b', line): - if not Search(r'\bunsigned short port\b', line): - error(filename, linenum, 'runtime/int', 4, - 'Use "unsigned short" for ports, not "short"') - else: - match = Search(r'\b(short|long(?! +double)|long long)\b', line) - if match: - error(filename, linenum, 'runtime/int', 4, - 'Use int16/int64/etc, rather than the C type %s' % match.group(1)) - - # When snprintf is used, the second argument shouldn't be a literal. - match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) - if match and match.group(2) != '0': - # If 2nd arg is zero, snprintf is used to calculate size. - error(filename, linenum, 'runtime/printf', 3, - 'If you can, use sizeof(%s) instead of %s as the 2nd arg ' - 'to snprintf.' % (match.group(1), match.group(2))) - - # Check if some verboten C functions are being used. - if Search(r'\bsprintf\b', line): - error(filename, linenum, 'runtime/printf', 5, - 'Never use sprintf. Use snprintf instead.') - match = Search(r'\b(strcpy|strcat)\b', line) - if match: - error(filename, linenum, 'runtime/printf', 4, - 'Almost always, snprintf is better than %s' % match.group(1)) - - # Check if some verboten operator overloading is going on - # TODO(unknown): catch out-of-line unary operator&: - # class X {}; - # int operator&(const X& x) { return 42; } // unary operator& - # The trick is it's hard to tell apart from binary operator&: - # class Y { int operator&(const Y& x) { return 23; } }; // binary operator& - if Search(r'\boperator\s*&\s*\(\s*\)', line): - error(filename, linenum, 'runtime/operator', 4, - 'Unary operator& is dangerous. Do not use it.') - - # Check for suspicious usage of "if" like - # } if (a == b) { - if Search(r'\}\s*if\s*\(', line): - error(filename, linenum, 'readability/braces', 4, - 'Did you mean "else if"? If not, start a new line for "if".') - - # Check for potential format string bugs like printf(foo). - # We constrain the pattern not to pick things like DocidForPrintf(foo). - # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) - # TODO(sugawarayu): Catch the following case. Need to change the calling - # convention of the whole function to process multiple line to handle it. - # printf( - # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line); - printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(') - if printf_args: - match = Match(r'([\w.\->()]+)$', printf_args) - if match and match.group(1) != '__VA_ARGS__': - function_name = re.search(r'\b((?:string)?printf)\s*\(', - line, re.I).group(1) - error(filename, linenum, 'runtime/printf', 4, - 'Potential format string bug. Do %s("%%s", %s) instead.' - % (function_name, match.group(1))) - - # Check for potential memset bugs like memset(buf, sizeof(buf), 0). - match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) - if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): - error(filename, linenum, 'runtime/memset', 4, - 'Did you mean "memset(%s, 0, %s)"?' - % (match.group(1), match.group(2))) - - if Search(r'\busing namespace\b', line): - error(filename, linenum, 'build/namespaces', 5, - 'Do not use namespace using-directives. ' - 'Use using-declarations instead.') - - # Detect variable-length arrays. - match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) - if (match and match.group(2) != 'return' and match.group(2) != 'delete' and - match.group(3).find(']') == -1): - # Split the size using space and arithmetic operators as delimiters. - # If any of the resulting tokens are not compile time constants then - # report the error. - tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3)) - is_const = True - skip_next = False - for tok in tokens: - if skip_next: - skip_next = False - continue - - if Search(r'sizeof\(.+\)', tok): continue - if Search(r'arraysize\(\w+\)', tok): continue - - tok = tok.lstrip('(') - tok = tok.rstrip(')') - if not tok: continue - if Match(r'\d+', tok): continue - if Match(r'0[xX][0-9a-fA-F]+', tok): continue - if Match(r'k[A-Z0-9]\w*', tok): continue - if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue - if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue - # A catch all for tricky sizeof cases, including 'sizeof expression', - # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' - # requires skipping the next token because we split on ' ' and '*'. - if tok.startswith('sizeof'): - skip_next = True - continue - is_const = False - break - if not is_const: - error(filename, linenum, 'runtime/arrays', 1, - 'Do not use variable-length arrays. Use an appropriately named ' - "('k' followed by CamelCase) compile-time constant for the size.") - - # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or - # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing - # in the class declaration. - match = Match( - (r'\s*' - r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))' - r'\(.*\);$'), - line) - if match and linenum + 1 < clean_lines.NumLines(): - next_line = clean_lines.elided[linenum + 1] - # We allow some, but not all, declarations of variables to be present - # in the statement that defines the class. The [\w\*,\s]* fragment of - # the regular expression below allows users to declare instances of - # the class or pointers to instances, but not less common types such - # as function pointers or arrays. It's a tradeoff between allowing - # reasonable code and avoiding trying to parse more C++ using regexps. - if not Search(r'^\s*}[\w\*,\s]*;', next_line): - error(filename, linenum, 'readability/constructors', 3, - match.group(1) + ' should be the last thing in the class') - - # Check for use of unnamed namespaces in header files. Registration - # macros are typically OK, so we allow use of "namespace {" on lines - # that end with backslashes. - if (file_extension == 'h' - and Search(r'\bnamespace\s*{', line) - and line[-1] != '\\'): - error(filename, linenum, 'build/namespaces', 4, - 'Do not use unnamed namespaces in header files. See ' - 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' - ' for more information.') - -def CheckForNonConstReference(filename, clean_lines, linenum, - nesting_state, error): - """Check for non-const references. - - Separate from CheckLanguage since it scans backwards from current - line, instead of scanning forward. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A _NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - # Do nothing if there is no '&' on current line. - line = clean_lines.elided[linenum] - if '&' not in line: - return - - # Long type names may be broken across multiple lines, usually in one - # of these forms: - # LongType - # ::LongTypeContinued &identifier - # LongType:: - # LongTypeContinued &identifier - # LongType< - # ...>::LongTypeContinued &identifier - # - # If we detected a type split across two lines, join the previous - # line to current line so that we can match const references - # accordingly. - # - # Note that this only scans back one line, since scanning back - # arbitrary number of lines would be expensive. If you have a type - # that spans more than 2 lines, please use a typedef. - if linenum > 1: - previous = None - if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line): - # previous_line\n + ::current_line - previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$', - clean_lines.elided[linenum - 1]) - elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line): - # previous_line::\n + current_line - previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$', - clean_lines.elided[linenum - 1]) - if previous: - line = previous.group(1) + line.lstrip() - else: - # Check for templated parameter that is split across multiple lines - endpos = line.rfind('>') - if endpos > -1: - (_, startline, startpos) = ReverseCloseExpression( - clean_lines, linenum, endpos) - if startpos > -1 and startline < linenum: - # Found the matching < on an earlier line, collect all - # pieces up to current line. - line = '' - for i in xrange(startline, linenum + 1): - line += clean_lines.elided[i].strip() - - # Check for non-const references in function parameters. A single '&' may - # found in the following places: - # inside expression: binary & for bitwise AND - # inside expression: unary & for taking the address of something - # inside declarators: reference parameter - # We will exclude the first two cases by checking that we are not inside a - # function body, including one that was just introduced by a trailing '{'. - # TODO(unknwon): Doesn't account for preprocessor directives. - # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare]. - check_params = False - if not nesting_state.stack: - check_params = True # top level - elif (isinstance(nesting_state.stack[-1], _ClassInfo) or - isinstance(nesting_state.stack[-1], _NamespaceInfo)): - check_params = True # within class or namespace - elif Match(r'.*{\s*$', line): - if (len(nesting_state.stack) == 1 or - isinstance(nesting_state.stack[-2], _ClassInfo) or - isinstance(nesting_state.stack[-2], _NamespaceInfo)): - check_params = True # just opened global/class/namespace block - # We allow non-const references in a few standard places, like functions - # called "swap()" or iostream operators like "<<" or ">>". Do not check - # those function parameters. - # - # We also accept & in static_assert, which looks like a function but - # it's actually a declaration expression. - whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|' - r'operator\s*[<>][<>]|' - r'static_assert|COMPILE_ASSERT' - r')\s*\(') - if Search(whitelisted_functions, line): - check_params = False - elif not Search(r'\S+\([^)]*$', line): - # Don't see a whitelisted function on this line. Actually we - # didn't see any function name on this line, so this is likely a - # multi-line parameter list. Try a bit harder to catch this case. - for i in xrange(2): - if (linenum > i and - Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])): - check_params = False - break - - if check_params: - decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body - for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls): - if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter): - error(filename, linenum, 'runtime/references', 2, - 'Is this a non-const reference? ' - 'If so, make const or use a pointer: ' + - ReplaceAll(' *<', '<', parameter)) - - -def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern, - error): - """Checks for a C-style cast by looking for the pattern. - - Args: - filename: The name of the current file. - linenum: The number of the line to check. - line: The line of code to check. - raw_line: The raw line of code to check, with comments. - cast_type: The string for the C++ cast to recommend. This is either - reinterpret_cast, static_cast, or const_cast, depending. - pattern: The regular expression used to find C-style casts. - error: The function to call with any errors found. - - Returns: - True if an error was emitted. - False otherwise. - """ - match = Search(pattern, line) - if not match: - return False - - # e.g., sizeof(int) - sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1]) - if sizeof_match: - error(filename, linenum, 'runtime/sizeof', 1, - 'Using sizeof(type). Use sizeof(varname) instead if possible') - return True - - # operator++(int) and operator--(int) - if (line[0:match.start(1) - 1].endswith(' operator++') or - line[0:match.start(1) - 1].endswith(' operator--')): - return False - - # A single unnamed argument for a function tends to look like old - # style cast. If we see those, don't issue warnings for deprecated - # casts, instead issue warnings for unnamed arguments where - # appropriate. - # - # These are things that we want warnings for, since the style guide - # explicitly require all parameters to be named: - # Function(int); - # Function(int) { - # ConstMember(int) const; - # ConstMember(int) const { - # ExceptionMember(int) throw (...); - # ExceptionMember(int) throw (...) { - # PureVirtual(int) = 0; - # - # These are functions of some sort, where the compiler would be fine - # if they had named parameters, but people often omit those - # identifiers to reduce clutter: - # (FunctionPointer)(int); - # (FunctionPointer)(int) = value; - # Function((function_pointer_arg)(int)) - # ; - # <(FunctionPointerTemplateArgument)(int)>; - remainder = line[match.end(0):] - if Match(r'^\s*(?:;|const\b|throw\b|=|>|\{|\))', remainder): - # Looks like an unnamed parameter. - - # Don't warn on any kind of template arguments. - if Match(r'^\s*>', remainder): - return False - - # Don't warn on assignments to function pointers, but keep warnings for - # unnamed parameters to pure virtual functions. Note that this pattern - # will also pass on assignments of "0" to function pointers, but the - # preferred values for those would be "nullptr" or "NULL". - matched_zero = Match(r'^\s=\s*(\S+)\s*;', remainder) - if matched_zero and matched_zero.group(1) != '0': - return False - - # Don't warn on function pointer declarations. For this we need - # to check what came before the "(type)" string. - if Match(r'.*\)\s*$', line[0:match.start(0)]): - return False - - # Don't warn if the parameter is named with block comments, e.g.: - # Function(int /*unused_param*/); - if '/*' in raw_line: - return False - - # Passed all filters, issue warning here. - error(filename, linenum, 'readability/function', 3, - 'All parameters should be named in a function') - return True - - # At this point, all that should be left is actual casts. - error(filename, linenum, 'readability/casting', 4, - 'Using C-style cast. Use %s<%s>(...) instead' % - (cast_type, match.group(1))) - - return True - - -_HEADERS_CONTAINING_TEMPLATES = ( - ('', ('deque',)), - ('', ('unary_function', 'binary_function', - 'plus', 'minus', 'multiplies', 'divides', 'modulus', - 'negate', - 'equal_to', 'not_equal_to', 'greater', 'less', - 'greater_equal', 'less_equal', - 'logical_and', 'logical_or', 'logical_not', - 'unary_negate', 'not1', 'binary_negate', 'not2', - 'bind1st', 'bind2nd', - 'pointer_to_unary_function', - 'pointer_to_binary_function', - 'ptr_fun', - 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t', - 'mem_fun_ref_t', - 'const_mem_fun_t', 'const_mem_fun1_t', - 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t', - 'mem_fun_ref', - )), - ('', ('numeric_limits',)), - ('', ('list',)), - ('', ('map', 'multimap',)), - ('', ('allocator',)), - ('', ('queue', 'priority_queue',)), - ('', ('set', 'multiset',)), - ('', ('stack',)), - ('', ('char_traits', 'basic_string',)), - ('', ('pair',)), - ('', ('vector',)), - - # gcc extensions. - # Note: std::hash is their hash, ::hash is our hash - ('', ('hash_map', 'hash_multimap',)), - ('', ('hash_set', 'hash_multiset',)), - ('', ('slist',)), - ) - -_RE_PATTERN_STRING = re.compile(r'\bstring\b') - -_re_pattern_algorithm_header = [] -for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap', - 'transform'): - # Match max(..., ...), max(..., ...), but not foo->max, foo.max or - # type::max(). - _re_pattern_algorithm_header.append( - (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'), - _template, - '')) - -_re_pattern_templates = [] -for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: - for _template in _templates: - _re_pattern_templates.append( - (re.compile(r'(\<|\b)' + _template + r'\s*\<'), - _template + '<>', - _header)) - - -def FilesBelongToSameModule(filename_cc, filename_h): - """Check if these two filenames belong to the same module. - - The concept of a 'module' here is a as follows: - foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the - same 'module' if they are in the same directory. - some/path/public/xyzzy and some/path/internal/xyzzy are also considered - to belong to the same module here. - - If the filename_cc contains a longer path than the filename_h, for example, - '/absolute/path/to/base/sysinfo.cc', and this file would include - 'base/sysinfo.h', this function also produces the prefix needed to open the - header. This is used by the caller of this function to more robustly open the - header file. We don't have access to the real include paths in this context, - so we need this guesswork here. - - Known bugs: tools/base/bar.cc and base/bar.h belong to the same module - according to this implementation. Because of this, this function gives - some false positives. This should be sufficiently rare in practice. - - Args: - filename_cc: is the path for the .cc file - filename_h: is the path for the header path - - Returns: - Tuple with a bool and a string: - bool: True if filename_cc and filename_h belong to the same module. - string: the additional prefix needed to open the header file. - """ - - if not filename_cc.endswith('.cc'): - return (False, '') - filename_cc = filename_cc[:-len('.cc')] - if filename_cc.endswith('_unittest'): - filename_cc = filename_cc[:-len('_unittest')] - elif filename_cc.endswith('_test'): - filename_cc = filename_cc[:-len('_test')] - filename_cc = filename_cc.replace('/public/', '/') - filename_cc = filename_cc.replace('/internal/', '/') - - if not filename_h.endswith('.h'): - return (False, '') - filename_h = filename_h[:-len('.h')] - if filename_h.endswith('-inl'): - filename_h = filename_h[:-len('-inl')] - filename_h = filename_h.replace('/public/', '/') - filename_h = filename_h.replace('/internal/', '/') - - files_belong_to_same_module = filename_cc.endswith(filename_h) - common_path = '' - if files_belong_to_same_module: - common_path = filename_cc[:-len(filename_h)] - return files_belong_to_same_module, common_path - - -def UpdateIncludeState(filename, include_state, io=codecs): - """Fill up the include_state with new includes found from the file. - - Args: - filename: the name of the header to read. - include_state: an _IncludeState instance in which the headers are inserted. - io: The io factory to use to read the file. Provided for testability. - - Returns: - True if a header was succesfully added. False otherwise. - """ - headerfile = None - try: - headerfile = io.open(filename, 'r', 'utf8', 'replace') - except IOError: - return False - linenum = 0 - for line in headerfile: - linenum += 1 - clean_line = CleanseComments(line) - match = _RE_PATTERN_INCLUDE.search(clean_line) - if match: - include = match.group(2) - # The value formatting is cute, but not really used right now. - # What matters here is that the key is in include_state. - include_state.setdefault(include, '%s:%d' % (filename, linenum)) - return True - - -def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, - io=codecs): - """Reports for missing stl includes. - - This function will output warnings to make sure you are including the headers - necessary for the stl containers and functions that you use. We only give one - reason to include a header. For example, if you use both equal_to<> and - less<> in a .h file, only one (the latter in the file) of these will be - reported as a reason to include the . - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - include_state: An _IncludeState instance. - error: The function to call with any errors found. - io: The IO factory to use to read the header file. Provided for unittest - injection. - """ - required = {} # A map of header name to linenumber and the template entity. - # Example of required: { '': (1219, 'less<>') } - - for linenum in xrange(clean_lines.NumLines()): - line = clean_lines.elided[linenum] - if not line or line[0] == '#': - continue - - # String is special -- it is a non-templatized type in STL. - matched = _RE_PATTERN_STRING.search(line) - if matched: - # Don't warn about strings in non-STL namespaces: - # (We check only the first match per line; good enough.) - prefix = line[:matched.start()] - if prefix.endswith('std::') or not prefix.endswith('::'): - required[''] = (linenum, 'string') - - for pattern, template, header in _re_pattern_algorithm_header: - if pattern.search(line): - required[header] = (linenum, template) - - # The following function is just a speed up, no semantics are changed. - if not '<' in line: # Reduces the cpu time usage by skipping lines. - continue - - for pattern, template, header in _re_pattern_templates: - if pattern.search(line): - required[header] = (linenum, template) - - # The policy is that if you #include something in foo.h you don't need to - # include it again in foo.cc. Here, we will look at possible includes. - # Let's copy the include_state so it is only messed up within this function. - include_state = include_state.copy() - - # Did we find the header for this file (if any) and succesfully load it? - header_found = False - - # Use the absolute path so that matching works properly. - abs_filename = FileInfo(filename).FullName() - - # For Emacs's flymake. - # If cpplint is invoked from Emacs's flymake, a temporary file is generated - # by flymake and that file name might end with '_flymake.cc'. In that case, - # restore original file name here so that the corresponding header file can be - # found. - # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h' - # instead of 'foo_flymake.h' - abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename) - - # include_state is modified during iteration, so we iterate over a copy of - # the keys. - header_keys = include_state.keys() - for header in header_keys: - (same_module, common_path) = FilesBelongToSameModule(abs_filename, header) - fullpath = common_path + header - if same_module and UpdateIncludeState(fullpath, include_state, io): - header_found = True - - # If we can't find the header file for a .cc, assume it's because we don't - # know where to look. In that case we'll give up as we're not sure they - # didn't include it in the .h file. - # TODO(unknown): Do a better job of finding .h files so we are confident that - # not having the .h file means there isn't one. - if filename.endswith('.cc') and not header_found: - return - - # All the lines have been processed, report the errors found. - for required_header_unstripped in required: - template = required[required_header_unstripped][1] - if required_header_unstripped.strip('<>"') not in include_state: - error(filename, required[required_header_unstripped][0], - 'build/include_what_you_use', 4, - 'Add #include ' + required_header_unstripped + ' for ' + template) - - -_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<') - - -def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error): - """Check that make_pair's template arguments are deduced. - - G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are - specified explicitly, and such use isn't intended in any case. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line) - if match: - error(filename, linenum, 'build/explicit_make_pair', - 4, # 4 = high confidence - 'For C++11-compatibility, omit template arguments from make_pair' - ' OR use pair directly OR if appropriate, construct a pair directly') - - -def ProcessLine(filename, file_extension, clean_lines, line, - include_state, function_state, nesting_state, error, - extra_check_functions=[]): - """Processes a single line in the file. - - Args: - filename: Filename of the file that is being processed. - file_extension: The extension (dot not included) of the file. - clean_lines: An array of strings, each representing a line of the file, - with comments stripped. - line: Number of line being processed. - include_state: An _IncludeState instance in which the headers are inserted. - function_state: A _FunctionState instance which counts function lines, etc. - nesting_state: A _NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: A callable to which errors are reported, which takes 4 arguments: - filename, line number, error level, and message - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ - raw_lines = clean_lines.raw_lines - ParseNolintSuppressions(filename, raw_lines[line], line, error) - nesting_state.Update(filename, clean_lines, line, error) - if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM: - return - CheckForFunctionLengths(filename, clean_lines, line, function_state, error) - CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) - CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error) - CheckLanguage(filename, clean_lines, line, file_extension, include_state, - nesting_state, error) - CheckForNonConstReference(filename, clean_lines, line, nesting_state, error) - CheckForNonStandardConstructs(filename, clean_lines, line, - nesting_state, error) - CheckVlogArguments(filename, clean_lines, line, error) - CheckPosixThreading(filename, clean_lines, line, error) - CheckInvalidIncrement(filename, clean_lines, line, error) - CheckMakePairUsesDeduction(filename, clean_lines, line, error) - for check_fn in extra_check_functions: - check_fn(filename, clean_lines, line, error) - -def ProcessFileData(filename, file_extension, lines, error, - extra_check_functions=[]): - """Performs lint checks and reports any errors to the given error function. - - Args: - filename: Filename of the file that is being processed. - file_extension: The extension (dot not included) of the file. - lines: An array of strings, each representing a line of the file, with the - last element being empty if the file is terminated with a newline. - error: A callable to which errors are reported, which takes 4 arguments: - filename, line number, error level, and message - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ - lines = (['// marker so line numbers and indices both start at 1'] + lines + - ['// marker so line numbers end in a known way']) - - include_state = _IncludeState() - function_state = _FunctionState() - nesting_state = _NestingState() - - ResetNolintSuppressions() - - CheckForCopyright(filename, lines, error) - - if file_extension == 'h': - CheckForHeaderGuard(filename, lines, error) - - RemoveMultiLineComments(filename, lines, error) - clean_lines = CleansedLines(lines) - for line in xrange(clean_lines.NumLines()): - ProcessLine(filename, file_extension, clean_lines, line, - include_state, function_state, nesting_state, error, - extra_check_functions) - nesting_state.CheckCompletedBlocks(filename, error) - - CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) - - # We check here rather than inside ProcessLine so that we see raw - # lines rather than "cleaned" lines. - CheckForBadCharacters(filename, lines, error) - - CheckForNewlineAtEOF(filename, lines, error) - -def ProcessFile(filename, vlevel, extra_check_functions=[]): - """Does google-lint on a single file. - - Args: - filename: The name of the file to parse. - - vlevel: The level of errors to report. Every error of confidence - >= verbose_level will be reported. 0 is a good default. - - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ - - _SetVerboseLevel(vlevel) - - try: - # Support the UNIX convention of using "-" for stdin. Note that - # we are not opening the file with universal newline support - # (which codecs doesn't support anyway), so the resulting lines do - # contain trailing '\r' characters if we are reading a file that - # has CRLF endings. - # If after the split a trailing '\r' is present, it is removed - # below. If it is not expected to be present (i.e. os.linesep != - # '\r\n' as in Windows), a warning is issued below if this file - # is processed. - - if filename == '-': - lines = codecs.StreamReaderWriter(sys.stdin, - codecs.getreader('utf8'), - codecs.getwriter('utf8'), - 'replace').read().split('\n') - else: - lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n') - - carriage_return_found = False - # Remove trailing '\r'. - for linenum in range(len(lines)): - if lines[linenum].endswith('\r'): - lines[linenum] = lines[linenum].rstrip('\r') - carriage_return_found = True - - except IOError: - sys.stderr.write( - "Skipping input '%s': Can't open for reading\n" % filename) - return - - # Note, if no dot is found, this will give the entire filename as the ext. - file_extension = filename[filename.rfind('.') + 1:] - - # When reading from stdin, the extension is unknown, so no cpplint tests - # should rely on the extension. - if filename != '-' and file_extension not in _valid_extensions: - sys.stderr.write('Ignoring %s; not a valid file name ' - '(%s)\n' % (filename, ', '.join(_valid_extensions))) - else: - ProcessFileData(filename, file_extension, lines, Error, - extra_check_functions) - if carriage_return_found and os.linesep != '\r\n': - # Use 0 for linenum since outputting only one error for potentially - # several lines. - Error(filename, 0, 'whitespace/newline', 1, - 'One or more unexpected \\r (^M) found;' - 'better to use only a \\n') - - sys.stderr.write('Done processing %s\n' % filename) - - -def PrintUsage(message): - """Prints a brief usage string and exits, optionally with an error message. - - Args: - message: The optional error message. - """ - sys.stderr.write(_USAGE) - if message: - sys.exit('\nFATAL ERROR: ' + message) - else: - sys.exit(1) - - -def PrintCategories(): - """Prints a list of all the error-categories used by error messages. - - These are the categories used to filter messages via --filter. - """ - sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES)) - sys.exit(0) - - -def ParseArguments(args): - """Parses the command line arguments. - - This may set the output format and verbosity level as side-effects. - - Args: - args: The command line arguments: - - Returns: - The list of filenames to lint. - """ - try: - (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=', - 'counting=', - 'filter=', - 'root=', - 'linelength=', - 'extensions=']) - except getopt.GetoptError: - PrintUsage('Invalid arguments.') - - verbosity = _VerboseLevel() - output_format = _OutputFormat() - filters = '' - counting_style = '' - - for (opt, val) in opts: - if opt == '--help': - PrintUsage(None) - elif opt == '--output': - if val not in ('emacs', 'vs7', 'eclipse'): - PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.') - output_format = val - elif opt == '--verbose': - verbosity = int(val) - elif opt == '--filter': - filters = val - if not filters: - PrintCategories() - elif opt == '--counting': - if val not in ('total', 'toplevel', 'detailed'): - PrintUsage('Valid counting options are total, toplevel, and detailed') - counting_style = val - elif opt == '--root': - global _root - _root = val - elif opt == '--linelength': - global _line_length - try: - _line_length = int(val) - except ValueError: - PrintUsage('Line length must be digits.') - elif opt == '--extensions': - global _valid_extensions - try: - _valid_extensions = set(val.split(',')) - except ValueError: - PrintUsage('Extensions must be comma seperated list.') - - if not filenames: - PrintUsage('No files were specified.') - - _SetOutputFormat(output_format) - _SetVerboseLevel(verbosity) - _SetFilters(filters) - _SetCountingStyle(counting_style) - - return filenames - - -def main(): - filenames = ParseArguments(sys.argv[1:]) - - # Change stderr to write with replacement characters so we don't die - # if we try to print something containing non-ASCII characters. - sys.stderr = codecs.StreamReaderWriter(sys.stderr, - codecs.getreader('utf8'), - codecs.getwriter('utf8'), - 'replace') - - _cpplint_state.ResetErrorCounts() - for filename in filenames: - ProcessFile(filename, _cpplint_state.verbose_level) - _cpplint_state.PrintErrorCounts() - - sys.exit(_cpplint_state.error_count > 0) - - -if __name__ == '__main__': - main() diff --git a/media/libvpx/libvpx/tools/diff.py b/media/libvpx/libvpx/tools/diff.py deleted file mode 100644 index a96c7db851db..000000000000 --- a/media/libvpx/libvpx/tools/diff.py +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env python -## Copyright (c) 2012 The WebM project authors. All Rights Reserved. -## -## Use of this source code is governed by a BSD-style license -## that can be found in the LICENSE file in the root of the source -## tree. An additional intellectual property rights grant can be found -## in the file PATENTS. All contributing project authors may -## be found in the AUTHORS file in the root of the source tree. -## -"""Classes for representing diff pieces.""" - -__author__ = "jkoleszar@google.com" - -import re - - -class DiffLines(object): - """A container for one half of a diff.""" - - def __init__(self, filename, offset, length): - self.filename = filename - self.offset = offset - self.length = length - self.lines = [] - self.delta_line_nums = [] - - def Append(self, line): - l = len(self.lines) - if line[0] != " ": - self.delta_line_nums.append(self.offset + l) - self.lines.append(line[1:]) - assert l+1 <= self.length - - def Complete(self): - return len(self.lines) == self.length - - def __contains__(self, item): - return item >= self.offset and item <= self.offset + self.length - 1 - - -class DiffHunk(object): - """A container for one diff hunk, consisting of two DiffLines.""" - - def __init__(self, header, file_a, file_b, start_a, len_a, start_b, len_b): - self.header = header - self.left = DiffLines(file_a, start_a, len_a) - self.right = DiffLines(file_b, start_b, len_b) - self.lines = [] - - def Append(self, line): - """Adds a line to the DiffHunk and its DiffLines children.""" - if line[0] == "-": - self.left.Append(line) - elif line[0] == "+": - self.right.Append(line) - elif line[0] == " ": - self.left.Append(line) - self.right.Append(line) - elif line[0] == "\\": - # Ignore newline messages from git diff. - pass - else: - assert False, ("Unrecognized character at start of diff line " - "%r" % line[0]) - self.lines.append(line) - - def Complete(self): - return self.left.Complete() and self.right.Complete() - - def __repr__(self): - return "DiffHunk(%s, %s, len %d)" % ( - self.left.filename, self.right.filename, - max(self.left.length, self.right.length)) - - -def ParseDiffHunks(stream): - """Walk a file-like object, yielding DiffHunks as they're parsed.""" - - file_regex = re.compile(r"(\+\+\+|---) (\S+)") - range_regex = re.compile(r"@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?") - hunk = None - while True: - line = stream.readline() - if not line: - break - - if hunk is None: - # Parse file names - diff_file = file_regex.match(line) - if diff_file: - if line.startswith("---"): - a_line = line - a = diff_file.group(2) - continue - if line.startswith("+++"): - b_line = line - b = diff_file.group(2) - continue - - # Parse offset/lengths - diffrange = range_regex.match(line) - if diffrange: - if diffrange.group(2): - start_a = int(diffrange.group(1)) - len_a = int(diffrange.group(3)) - else: - start_a = 1 - len_a = int(diffrange.group(1)) - - if diffrange.group(5): - start_b = int(diffrange.group(4)) - len_b = int(diffrange.group(6)) - else: - start_b = 1 - len_b = int(diffrange.group(4)) - - header = [a_line, b_line, line] - hunk = DiffHunk(header, a, b, start_a, len_a, start_b, len_b) - else: - # Add the current line to the hunk - hunk.Append(line) - - # See if the whole hunk has been parsed. If so, yield it and prepare - # for the next hunk. - if hunk.Complete(): - yield hunk - hunk = None - - # Partial hunks are a parse error - assert hunk is None diff --git a/media/libvpx/libvpx/tools/gen_authors.sh b/media/libvpx/libvpx/tools/gen_authors.sh deleted file mode 100755 index f163f663af03..000000000000 --- a/media/libvpx/libvpx/tools/gen_authors.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -# Add organization names manually. - -cat <" | sort | uniq | grep -v corp.google \ - | grep -v noreply) -Google Inc. -The Mozilla Foundation -The Xiph.Org Foundation -EOF diff --git a/media/libvpx/libvpx/tools/intersect-diffs.py b/media/libvpx/libvpx/tools/intersect-diffs.py deleted file mode 100755 index 4dbafa90b7db..000000000000 --- a/media/libvpx/libvpx/tools/intersect-diffs.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python -## Copyright (c) 2012 The WebM project authors. All Rights Reserved. -## -## Use of this source code is governed by a BSD-style license -## that can be found in the LICENSE file in the root of the source -## tree. An additional intellectual property rights grant can be found -## in the file PATENTS. All contributing project authors may -## be found in the AUTHORS file in the root of the source tree. -## -"""Calculates the "intersection" of two unified diffs. - -Given two diffs, A and B, it finds all hunks in B that had non-context lines -in A and prints them to stdout. This is useful to determine the hunks in B that -are relevant to A. The resulting file can be applied with patch(1) on top of A. -""" - -__author__ = "jkoleszar@google.com" - -import sys - -import diff - - -def FormatDiffHunks(hunks): - """Re-serialize a list of DiffHunks.""" - r = [] - last_header = None - for hunk in hunks: - this_header = hunk.header[0:2] - if last_header != this_header: - r.extend(hunk.header) - last_header = this_header - else: - r.extend(hunk.header[2]) - r.extend(hunk.lines) - r.append("\n") - return "".join(r) - - -def ZipHunks(rhs_hunks, lhs_hunks): - """Join two hunk lists on filename.""" - for rhs_hunk in rhs_hunks: - rhs_file = rhs_hunk.right.filename.split("/")[1:] - - for lhs_hunk in lhs_hunks: - lhs_file = lhs_hunk.left.filename.split("/")[1:] - if lhs_file != rhs_file: - continue - yield (rhs_hunk, lhs_hunk) - - -def main(): - old_hunks = [x for x in diff.ParseDiffHunks(open(sys.argv[1], "r"))] - new_hunks = [x for x in diff.ParseDiffHunks(open(sys.argv[2], "r"))] - out_hunks = [] - - # Join the right hand side of the older diff with the left hand side of the - # newer diff. - for old_hunk, new_hunk in ZipHunks(old_hunks, new_hunks): - if new_hunk in out_hunks: - continue - old_lines = old_hunk.right - new_lines = new_hunk.left - - # Determine if this hunk overlaps any non-context line from the other - for i in old_lines.delta_line_nums: - if i in new_lines: - out_hunks.append(new_hunk) - break - - if out_hunks: - print FormatDiffHunks(out_hunks) - sys.exit(1) - -if __name__ == "__main__": - main() diff --git a/media/libvpx/libvpx/tools/lint-hunks.py b/media/libvpx/libvpx/tools/lint-hunks.py deleted file mode 100755 index 6e25d93624dd..000000000000 --- a/media/libvpx/libvpx/tools/lint-hunks.py +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/bin/python -## Copyright (c) 2012 The WebM project authors. All Rights Reserved. -## -## Use of this source code is governed by a BSD-style license -## that can be found in the LICENSE file in the root of the source -## tree. An additional intellectual property rights grant can be found -## in the file PATENTS. All contributing project authors may -## be found in the AUTHORS file in the root of the source tree. -## -"""Performs style checking on each diff hunk.""" -import getopt -import os -import StringIO -import subprocess -import sys - -import diff - - -SHORT_OPTIONS = "h" -LONG_OPTIONS = ["help"] - -TOPLEVEL_CMD = ["git", "rev-parse", "--show-toplevel"] -DIFF_CMD = ["git", "diff"] -DIFF_INDEX_CMD = ["git", "diff-index", "-u", "HEAD", "--"] -SHOW_CMD = ["git", "show"] -CPPLINT_FILTERS = ["-readability/casting"] - - -class Usage(Exception): - pass - - -class SubprocessException(Exception): - def __init__(self, args): - msg = "Failed to execute '%s'"%(" ".join(args)) - super(SubprocessException, self).__init__(msg) - - -class Subprocess(subprocess.Popen): - """Adds the notion of an expected returncode to Popen.""" - - def __init__(self, args, expected_returncode=0, **kwargs): - self._args = args - self._expected_returncode = expected_returncode - super(Subprocess, self).__init__(args, **kwargs) - - def communicate(self, *args, **kwargs): - result = super(Subprocess, self).communicate(*args, **kwargs) - if self._expected_returncode is not None: - try: - ok = self.returncode in self._expected_returncode - except TypeError: - ok = self.returncode == self._expected_returncode - if not ok: - raise SubprocessException(self._args) - return result - - -def main(argv=None): - if argv is None: - argv = sys.argv - try: - try: - opts, args = getopt.getopt(argv[1:], SHORT_OPTIONS, LONG_OPTIONS) - except getopt.error, msg: - raise Usage(msg) - - # process options - for o, _ in opts: - if o in ("-h", "--help"): - print __doc__ - sys.exit(0) - - if args and len(args) > 1: - print __doc__ - sys.exit(0) - - # Find the fully qualified path to the root of the tree - tl = Subprocess(TOPLEVEL_CMD, stdout=subprocess.PIPE) - tl = tl.communicate()[0].strip() - - # See if we're working on the index or not. - if args: - diff_cmd = DIFF_CMD + [args[0] + "^!"] - else: - diff_cmd = DIFF_INDEX_CMD - - # Build the command line to execute cpplint - cpplint_cmd = [os.path.join(tl, "tools", "cpplint.py"), - "--filter=" + ",".join(CPPLINT_FILTERS), - "-"] - - # Get a list of all affected lines - file_affected_line_map = {} - p = Subprocess(diff_cmd, stdout=subprocess.PIPE) - stdout = p.communicate()[0] - for hunk in diff.ParseDiffHunks(StringIO.StringIO(stdout)): - filename = hunk.right.filename[2:] - if filename not in file_affected_line_map: - file_affected_line_map[filename] = set() - file_affected_line_map[filename].update(hunk.right.delta_line_nums) - - # Run each affected file through cpplint - lint_failed = False - for filename, affected_lines in file_affected_line_map.iteritems(): - if filename.split(".")[-1] not in ("c", "h", "cc"): - continue - - if args: - # File contents come from git - show_cmd = SHOW_CMD + [args[0] + ":" + filename] - show = Subprocess(show_cmd, stdout=subprocess.PIPE) - lint = Subprocess(cpplint_cmd, expected_returncode=(0, 1), - stdin=show.stdout, stderr=subprocess.PIPE) - lint_out = lint.communicate()[1] - else: - # File contents come from the working tree - lint = Subprocess(cpplint_cmd, expected_returncode=(0, 1), - stdin=subprocess.PIPE, stderr=subprocess.PIPE) - stdin = open(os.path.join(tl, filename)).read() - lint_out = lint.communicate(stdin)[1] - - for line in lint_out.split("\n"): - fields = line.split(":") - if fields[0] != "-": - continue - warning_line_num = int(fields[1]) - if warning_line_num in affected_lines: - print "%s:%d:%s"%(filename, warning_line_num, - ":".join(fields[2:])) - lint_failed = True - - # Set exit code if any relevant lint errors seen - if lint_failed: - return 1 - - except Usage, err: - print >>sys.stderr, err - print >>sys.stderr, "for help use --help" - return 2 - -if __name__ == "__main__": - sys.exit(main()) diff --git a/media/libvpx/libvpx/tools/tiny_ssim.c b/media/libvpx/libvpx/tools/tiny_ssim.c deleted file mode 100644 index 5e8ca02b49d7..000000000000 --- a/media/libvpx/libvpx/tools/tiny_ssim.c +++ /dev/null @@ -1,802 +0,0 @@ -/* - * Copyright (c) 2016 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include -#include -#include -#include -#include -#include "vpx/vpx_codec.h" -#include "vpx/vpx_integer.h" -#include "./y4minput.h" -#include "vpx_dsp/ssim.h" -#include "vpx_ports/mem.h" - -static const int64_t cc1 = 26634; // (64^2*(.01*255)^2 -static const int64_t cc2 = 239708; // (64^2*(.03*255)^2 -static const int64_t cc1_10 = 428658; // (64^2*(.01*1023)^2 -static const int64_t cc2_10 = 3857925; // (64^2*(.03*1023)^2 -static const int64_t cc1_12 = 6868593; // (64^2*(.01*4095)^2 -static const int64_t cc2_12 = 61817334; // (64^2*(.03*4095)^2 - -#if CONFIG_VP9_HIGHBITDEPTH -static uint64_t calc_plane_error16(uint16_t *orig, int orig_stride, - uint16_t *recon, int recon_stride, - unsigned int cols, unsigned int rows) { - unsigned int row, col; - uint64_t total_sse = 0; - int diff; - - for (row = 0; row < rows; row++) { - for (col = 0; col < cols; col++) { - diff = orig[col] - recon[col]; - total_sse += diff * diff; - } - - orig += orig_stride; - recon += recon_stride; - } - return total_sse; -} -#endif -static uint64_t calc_plane_error(uint8_t *orig, int orig_stride, uint8_t *recon, - int recon_stride, unsigned int cols, - unsigned int rows) { - unsigned int row, col; - uint64_t total_sse = 0; - int diff; - - for (row = 0; row < rows; row++) { - for (col = 0; col < cols; col++) { - diff = orig[col] - recon[col]; - total_sse += diff * diff; - } - - orig += orig_stride; - recon += recon_stride; - } - return total_sse; -} - -#define MAX_PSNR 100 -static double mse2psnr(double samples, double peak, double mse) { - double psnr; - - if (mse > 0.0) - psnr = 10.0 * log10(peak * peak * samples / mse); - else - psnr = MAX_PSNR; // Limit to prevent / 0 - - if (psnr > MAX_PSNR) psnr = MAX_PSNR; - - return psnr; -} - -typedef enum { RAW_YUV, Y4M } input_file_type; - -typedef struct input_file { - FILE *file; - input_file_type type; - unsigned char *buf; - y4m_input y4m; - vpx_image_t img; - int w; - int h; - int bit_depth; -} input_file_t; - -// Open a file and determine if its y4m or raw. If y4m get the header. -static int open_input_file(const char *file_name, input_file_t *input, int w, - int h, int bit_depth) { - char y4m_buf[4]; - size_t r1; - input->type = RAW_YUV; - input->buf = NULL; - input->file = strcmp(file_name, "-") ? fopen(file_name, "rb") : stdin; - if (input->file == NULL) return -1; - r1 = fread(y4m_buf, 1, 4, input->file); - if (r1 == 4) { - if (memcmp(y4m_buf, "YUV4", 4) == 0) input->type = Y4M; - switch (input->type) { - case Y4M: - y4m_input_open(&input->y4m, input->file, y4m_buf, 4, 0); - input->w = input->y4m.pic_w; - input->h = input->y4m.pic_h; - input->bit_depth = input->y4m.bit_depth; - // Y4M alloc's its own buf. Init this to avoid problems if we never - // read frames. - memset(&input->img, 0, sizeof(input->img)); - break; - case RAW_YUV: - fseek(input->file, 0, SEEK_SET); - input->w = w; - input->h = h; - if (bit_depth < 9) - input->buf = malloc(w * h * 3 / 2); - else - input->buf = malloc(w * h * 3); - break; - } - } - return 0; -} - -static void close_input_file(input_file_t *in) { - if (in->file) fclose(in->file); - if (in->type == Y4M) { - vpx_img_free(&in->img); - } else { - free(in->buf); - } -} - -static size_t read_input_file(input_file_t *in, unsigned char **y, - unsigned char **u, unsigned char **v, int bd) { - size_t r1 = 0; - switch (in->type) { - case Y4M: - r1 = y4m_input_fetch_frame(&in->y4m, in->file, &in->img); - *y = in->img.planes[0]; - *u = in->img.planes[1]; - *v = in->img.planes[2]; - break; - case RAW_YUV: - if (bd < 9) { - r1 = fread(in->buf, in->w * in->h * 3 / 2, 1, in->file); - *y = in->buf; - *u = in->buf + in->w * in->h; - *v = in->buf + 5 * in->w * in->h / 4; - } else { - r1 = fread(in->buf, in->w * in->h * 3, 1, in->file); - *y = in->buf; - *u = in->buf + in->w * in->h / 2; - *v = *u + in->w * in->h / 2; - } - break; - } - - return r1; -} - -void ssim_parms_16x16(const uint8_t *s, int sp, const uint8_t *r, int rp, - uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, - uint32_t *sum_sq_r, uint32_t *sum_sxr) { - int i, j; - for (i = 0; i < 16; i++, s += sp, r += rp) { - for (j = 0; j < 16; j++) { - *sum_s += s[j]; - *sum_r += r[j]; - *sum_sq_s += s[j] * s[j]; - *sum_sq_r += r[j] * r[j]; - *sum_sxr += s[j] * r[j]; - } - } -} -void ssim_parms_8x8(const uint8_t *s, int sp, const uint8_t *r, int rp, - uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, - uint32_t *sum_sq_r, uint32_t *sum_sxr) { - int i, j; - for (i = 0; i < 8; i++, s += sp, r += rp) { - for (j = 0; j < 8; j++) { - *sum_s += s[j]; - *sum_r += r[j]; - *sum_sq_s += s[j] * s[j]; - *sum_sq_r += r[j] * r[j]; - *sum_sxr += s[j] * r[j]; - } - } -} - -void highbd_ssim_parms_8x8(const uint16_t *s, int sp, const uint16_t *r, int rp, - uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, - uint32_t *sum_sq_r, uint32_t *sum_sxr) { - int i, j; - for (i = 0; i < 8; i++, s += sp, r += rp) { - for (j = 0; j < 8; j++) { - *sum_s += s[j]; - *sum_r += r[j]; - *sum_sq_s += s[j] * s[j]; - *sum_sq_r += r[j] * r[j]; - *sum_sxr += s[j] * r[j]; - } - } -} - -static double similarity(uint32_t sum_s, uint32_t sum_r, uint32_t sum_sq_s, - uint32_t sum_sq_r, uint32_t sum_sxr, int count, - uint32_t bd) { - int64_t ssim_n, ssim_d; - int64_t c1 = 0, c2 = 0; - if (bd == 8) { - // scale the constants by number of pixels - c1 = (cc1 * count * count) >> 12; - c2 = (cc2 * count * count) >> 12; - } else if (bd == 10) { - c1 = (cc1_10 * count * count) >> 12; - c2 = (cc2_10 * count * count) >> 12; - } else if (bd == 12) { - c1 = (cc1_12 * count * count) >> 12; - c2 = (cc2_12 * count * count) >> 12; - } else { - assert(0); - } - - ssim_n = (2 * sum_s * sum_r + c1) * - ((int64_t)2 * count * sum_sxr - (int64_t)2 * sum_s * sum_r + c2); - - ssim_d = (sum_s * sum_s + sum_r * sum_r + c1) * - ((int64_t)count * sum_sq_s - (int64_t)sum_s * sum_s + - (int64_t)count * sum_sq_r - (int64_t)sum_r * sum_r + c2); - - return ssim_n * 1.0 / ssim_d; -} - -static double ssim_8x8(const uint8_t *s, int sp, const uint8_t *r, int rp) { - uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; - ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); - return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64, 8); -} - -static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r, - int rp, uint32_t bd, uint32_t shift) { - uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; - highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, - &sum_sxr); - return similarity(sum_s >> shift, sum_r >> shift, sum_sq_s >> (2 * shift), - sum_sq_r >> (2 * shift), sum_sxr >> (2 * shift), 64, bd); -} - -// We are using a 8x8 moving window with starting location of each 8x8 window -// on the 4x4 pixel grid. Such arrangement allows the windows to overlap -// block boundaries to penalize blocking artifacts. -static double ssim2(const uint8_t *img1, const uint8_t *img2, int stride_img1, - int stride_img2, int width, int height) { - int i, j; - int samples = 0; - double ssim_total = 0; - - // sample point start with each 4x4 location - for (i = 0; i <= height - 8; - i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) { - for (j = 0; j <= width - 8; j += 4) { - double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2); - ssim_total += v; - samples++; - } - } - ssim_total /= samples; - return ssim_total; -} - -static double highbd_ssim2(const uint8_t *img1, const uint8_t *img2, - int stride_img1, int stride_img2, int width, - int height, uint32_t bd, uint32_t shift) { - int i, j; - int samples = 0; - double ssim_total = 0; - - // sample point start with each 4x4 location - for (i = 0; i <= height - 8; - i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) { - for (j = 0; j <= width - 8; j += 4) { - double v = highbd_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1, - CONVERT_TO_SHORTPTR(img2 + j), stride_img2, bd, - shift); - ssim_total += v; - samples++; - } - } - ssim_total /= samples; - return ssim_total; -} - -// traditional ssim as per: http://en.wikipedia.org/wiki/Structural_similarity -// -// Re working out the math -> -// -// ssim(x,y) = (2*mean(x)*mean(y) + c1)*(2*cov(x,y)+c2) / -// ((mean(x)^2+mean(y)^2+c1)*(var(x)+var(y)+c2)) -// -// mean(x) = sum(x) / n -// -// cov(x,y) = (n*sum(xi*yi)-sum(x)*sum(y))/(n*n) -// -// var(x) = (n*sum(xi*xi)-sum(xi)*sum(xi))/(n*n) -// -// ssim(x,y) = -// (2*sum(x)*sum(y)/(n*n) + c1)*(2*(n*sum(xi*yi)-sum(x)*sum(y))/(n*n)+c2) / -// (((sum(x)*sum(x)+sum(y)*sum(y))/(n*n) +c1) * -// ((n*sum(xi*xi) - sum(xi)*sum(xi))/(n*n)+ -// (n*sum(yi*yi) - sum(yi)*sum(yi))/(n*n)+c2))) -// -// factoring out n*n -// -// ssim(x,y) = -// (2*sum(x)*sum(y) + n*n*c1)*(2*(n*sum(xi*yi)-sum(x)*sum(y))+n*n*c2) / -// (((sum(x)*sum(x)+sum(y)*sum(y)) + n*n*c1) * -// (n*sum(xi*xi)-sum(xi)*sum(xi)+n*sum(yi*yi)-sum(yi)*sum(yi)+n*n*c2)) -// -// Replace c1 with n*n * c1 for the final step that leads to this code: -// The final step scales by 12 bits so we don't lose precision in the constants. - -static double ssimv_similarity(const Ssimv *sv, int64_t n) { - // Scale the constants by number of pixels. - const int64_t c1 = (cc1 * n * n) >> 12; - const int64_t c2 = (cc2 * n * n) >> 12; - - const double l = 1.0 * (2 * sv->sum_s * sv->sum_r + c1) / - (sv->sum_s * sv->sum_s + sv->sum_r * sv->sum_r + c1); - - // Since these variables are unsigned sums, convert to double so - // math is done in double arithmetic. - const double v = (2.0 * n * sv->sum_sxr - 2 * sv->sum_s * sv->sum_r + c2) / - (n * sv->sum_sq_s - sv->sum_s * sv->sum_s + - n * sv->sum_sq_r - sv->sum_r * sv->sum_r + c2); - - return l * v; -} - -// The first term of the ssim metric is a luminance factor. -// -// (2*mean(x)*mean(y) + c1)/ (mean(x)^2+mean(y)^2+c1) -// -// This luminance factor is super sensitive to the dark side of luminance -// values and completely insensitive on the white side. check out 2 sets -// (1,3) and (250,252) the term gives ( 2*1*3/(1+9) = .60 -// 2*250*252/ (250^2+252^2) => .99999997 -// -// As a result in this tweaked version of the calculation in which the -// luminance is taken as percentage off from peak possible. -// -// 255 * 255 - (sum_s - sum_r) / count * (sum_s - sum_r) / count -// -static double ssimv_similarity2(const Ssimv *sv, int64_t n) { - // Scale the constants by number of pixels. - const int64_t c1 = (cc1 * n * n) >> 12; - const int64_t c2 = (cc2 * n * n) >> 12; - - const double mean_diff = (1.0 * sv->sum_s - sv->sum_r) / n; - const double l = (255 * 255 - mean_diff * mean_diff + c1) / (255 * 255 + c1); - - // Since these variables are unsigned, sums convert to double so - // math is done in double arithmetic. - const double v = (2.0 * n * sv->sum_sxr - 2 * sv->sum_s * sv->sum_r + c2) / - (n * sv->sum_sq_s - sv->sum_s * sv->sum_s + - n * sv->sum_sq_r - sv->sum_r * sv->sum_r + c2); - - return l * v; -} -static void ssimv_parms(uint8_t *img1, int img1_pitch, uint8_t *img2, - int img2_pitch, Ssimv *sv) { - ssim_parms_8x8(img1, img1_pitch, img2, img2_pitch, &sv->sum_s, &sv->sum_r, - &sv->sum_sq_s, &sv->sum_sq_r, &sv->sum_sxr); -} - -double get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2, - int img2_pitch, int width, int height, Ssimv *sv2, - Metrics *m, int do_inconsistency) { - double dssim_total = 0; - double ssim_total = 0; - double ssim2_total = 0; - double inconsistency_total = 0; - int i, j; - int c = 0; - double norm; - double old_ssim_total = 0; - - // We can sample points as frequently as we like start with 1 per 4x4. - for (i = 0; i < height; - i += 4, img1 += img1_pitch * 4, img2 += img2_pitch * 4) { - for (j = 0; j < width; j += 4, ++c) { - Ssimv sv = { 0, 0, 0, 0, 0, 0 }; - double ssim; - double ssim2; - double dssim; - uint32_t var_new; - uint32_t var_old; - uint32_t mean_new; - uint32_t mean_old; - double ssim_new; - double ssim_old; - - // Not sure there's a great way to handle the edge pixels - // in ssim when using a window. Seems biased against edge pixels - // however you handle this. This uses only samples that are - // fully in the frame. - if (j + 8 <= width && i + 8 <= height) { - ssimv_parms(img1 + j, img1_pitch, img2 + j, img2_pitch, &sv); - } - - ssim = ssimv_similarity(&sv, 64); - ssim2 = ssimv_similarity2(&sv, 64); - - sv.ssim = ssim2; - - // dssim is calculated to use as an actual error metric and - // is scaled up to the same range as sum square error. - // Since we are subsampling every 16th point maybe this should be - // *16 ? - dssim = 255 * 255 * (1 - ssim2) / 2; - - // Here I introduce a new error metric: consistency-weighted - // SSIM-inconsistency. This metric isolates frames where the - // SSIM 'suddenly' changes, e.g. if one frame in every 8 is much - // sharper or blurrier than the others. Higher values indicate a - // temporally inconsistent SSIM. There are two ideas at work: - // - // 1) 'SSIM-inconsistency': the total inconsistency value - // reflects how much SSIM values are changing between this - // source / reference frame pair and the previous pair. - // - // 2) 'consistency-weighted': weights de-emphasize areas in the - // frame where the scene content has changed. Changes in scene - // content are detected via changes in local variance and local - // mean. - // - // Thus the overall measure reflects how inconsistent the SSIM - // values are, over consistent regions of the frame. - // - // The metric has three terms: - // - // term 1 -> uses change in scene Variance to weight error score - // 2 * var(Fi)*var(Fi-1) / (var(Fi)^2+var(Fi-1)^2) - // larger changes from one frame to the next mean we care - // less about consistency. - // - // term 2 -> uses change in local scene luminance to weight error - // 2 * avg(Fi)*avg(Fi-1) / (avg(Fi)^2+avg(Fi-1)^2) - // larger changes from one frame to the next mean we care - // less about consistency. - // - // term3 -> measures inconsistency in ssim scores between frames - // 1 - ( 2 * ssim(Fi)*ssim(Fi-1)/(ssim(Fi)^2+sssim(Fi-1)^2). - // - // This term compares the ssim score for the same location in 2 - // subsequent frames. - var_new = sv.sum_sq_s - sv.sum_s * sv.sum_s / 64; - var_old = sv2[c].sum_sq_s - sv2[c].sum_s * sv2[c].sum_s / 64; - mean_new = sv.sum_s; - mean_old = sv2[c].sum_s; - ssim_new = sv.ssim; - ssim_old = sv2[c].ssim; - - if (do_inconsistency) { - // We do the metric once for every 4x4 block in the image. Since - // we are scaling the error to SSE for use in a psnr calculation - // 1.0 = 4x4x255x255 the worst error we can possibly have. - static const double kScaling = 4. * 4 * 255 * 255; - - // The constants have to be non 0 to avoid potential divide by 0 - // issues other than that they affect kind of a weighting between - // the terms. No testing of what the right terms should be has been - // done. - static const double c1 = 1, c2 = 1, c3 = 1; - - // This measures how much consistent variance is in two consecutive - // source frames. 1.0 means they have exactly the same variance. - const double variance_term = - (2.0 * var_old * var_new + c1) / - (1.0 * var_old * var_old + 1.0 * var_new * var_new + c1); - - // This measures how consistent the local mean are between two - // consecutive frames. 1.0 means they have exactly the same mean. - const double mean_term = - (2.0 * mean_old * mean_new + c2) / - (1.0 * mean_old * mean_old + 1.0 * mean_new * mean_new + c2); - - // This measures how consistent the ssims of two - // consecutive frames is. 1.0 means they are exactly the same. - double ssim_term = - pow((2.0 * ssim_old * ssim_new + c3) / - (ssim_old * ssim_old + ssim_new * ssim_new + c3), - 5); - - double this_inconsistency; - - // Floating point math sometimes makes this > 1 by a tiny bit. - // We want the metric to scale between 0 and 1.0 so we can convert - // it to an snr scaled value. - if (ssim_term > 1) ssim_term = 1; - - // This converts the consistency metric to an inconsistency metric - // ( so we can scale it like psnr to something like sum square error. - // The reason for the variance and mean terms is the assumption that - // if there are big changes in the source we shouldn't penalize - // inconsistency in ssim scores a bit less as it will be less visible - // to the user. - this_inconsistency = (1 - ssim_term) * variance_term * mean_term; - - this_inconsistency *= kScaling; - inconsistency_total += this_inconsistency; - } - sv2[c] = sv; - ssim_total += ssim; - ssim2_total += ssim2; - dssim_total += dssim; - - old_ssim_total += ssim_old; - } - old_ssim_total += 0; - } - - norm = 1. / (width / 4) / (height / 4); - ssim_total *= norm; - ssim2_total *= norm; - m->ssim2 = ssim2_total; - m->ssim = ssim_total; - if (old_ssim_total == 0) inconsistency_total = 0; - - m->ssimc = inconsistency_total; - - m->dssim = dssim_total; - return inconsistency_total; -} - -double highbd_calc_ssim(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *dest, double *weight, - uint32_t bd, uint32_t in_bd) { - double a, b, c; - double ssimv; - uint32_t shift = 0; - - assert(bd >= in_bd); - shift = bd - in_bd; - - a = highbd_ssim2(source->y_buffer, dest->y_buffer, source->y_stride, - dest->y_stride, source->y_crop_width, source->y_crop_height, - in_bd, shift); - - b = highbd_ssim2(source->u_buffer, dest->u_buffer, source->uv_stride, - dest->uv_stride, source->uv_crop_width, - source->uv_crop_height, in_bd, shift); - - c = highbd_ssim2(source->v_buffer, dest->v_buffer, source->uv_stride, - dest->uv_stride, source->uv_crop_width, - source->uv_crop_height, in_bd, shift); - - ssimv = a * .8 + .1 * (b + c); - - *weight = 1; - - return ssimv; -} - -int main(int argc, char *argv[]) { - FILE *framestats = NULL; - int bit_depth = 8; - int w = 0, h = 0, tl_skip = 0, tl_skips_remaining = 0; - double ssimavg = 0, ssimyavg = 0, ssimuavg = 0, ssimvavg = 0; - double psnrglb = 0, psnryglb = 0, psnruglb = 0, psnrvglb = 0; - double psnravg = 0, psnryavg = 0, psnruavg = 0, psnrvavg = 0; - double *ssimy = NULL, *ssimu = NULL, *ssimv = NULL; - uint64_t *psnry = NULL, *psnru = NULL, *psnrv = NULL; - size_t i, n_frames = 0, allocated_frames = 0; - int return_value = 0; - input_file_t in[2]; - double peak = 255.0; - - if (argc < 2) { - fprintf(stderr, - "Usage: %s file1.{yuv|y4m} file2.{yuv|y4m}" - "[WxH tl_skip={0,1,3} frame_stats_file bits]\n", - argv[0]); - return_value = 1; - goto clean_up; - } - - if (argc > 3) { - sscanf(argv[3], "%dx%d", &w, &h); - } - - if (argc > 6) { - sscanf(argv[6], "%d", &bit_depth); - } - - if (open_input_file(argv[1], &in[0], w, h, bit_depth) < 0) { - fprintf(stderr, "File %s can't be opened or parsed!\n", argv[2]); - goto clean_up; - } - - if (w == 0 && h == 0) { - // If a y4m is the first file and w, h is not set grab from first file. - w = in[0].w; - h = in[0].h; - bit_depth = in[0].bit_depth; - } - if (bit_depth == 10) peak = 1023.0; - - if (bit_depth == 12) peak = 4095; - - if (open_input_file(argv[2], &in[1], w, h, bit_depth) < 0) { - fprintf(stderr, "File %s can't be opened or parsed!\n", argv[2]); - goto clean_up; - } - - if (in[0].w != in[1].w || in[0].h != in[1].h || in[0].w != w || - in[0].h != h || w == 0 || h == 0) { - fprintf(stderr, - "Failing: Image dimensions don't match or are unspecified!\n"); - return_value = 1; - goto clean_up; - } - - // Number of frames to skip from file1.yuv for every frame used. Normal values - // 0, 1 and 3 correspond to TL2, TL1 and TL0 respectively for a 3TL encoding - // in mode 10. 7 would be reasonable for comparing TL0 of a 4-layer encoding. - if (argc > 4) { - sscanf(argv[4], "%d", &tl_skip); - if (argc > 5) { - framestats = fopen(argv[5], "w"); - if (!framestats) { - fprintf(stderr, "Could not open \"%s\" for writing: %s\n", argv[5], - strerror(errno)); - return_value = 1; - goto clean_up; - } - } - } - - if (w & 1 || h & 1) { - fprintf(stderr, "Invalid size %dx%d\n", w, h); - return_value = 1; - goto clean_up; - } - - while (1) { - size_t r1, r2; - unsigned char *y[2], *u[2], *v[2]; - - r1 = read_input_file(&in[0], &y[0], &u[0], &v[0], bit_depth); - - if (r1) { - // Reading parts of file1.yuv that were not used in temporal layer. - if (tl_skips_remaining > 0) { - --tl_skips_remaining; - continue; - } - // Use frame, but skip |tl_skip| after it. - tl_skips_remaining = tl_skip; - } - - r2 = read_input_file(&in[1], &y[1], &u[1], &v[1], bit_depth); - - if (r1 && r2 && r1 != r2) { - fprintf(stderr, "Failed to read data: %s [%d/%d]\n", strerror(errno), - (int)r1, (int)r2); - return_value = 1; - goto clean_up; - } else if (r1 == 0 || r2 == 0) { - break; - } -#if CONFIG_VP9_HIGHBITDEPTH -#define psnr_and_ssim(ssim, psnr, buf0, buf1, w, h) \ - if (bit_depth < 9) { \ - ssim = ssim2(buf0, buf1, w, w, w, h); \ - psnr = calc_plane_error(buf0, w, buf1, w, w, h); \ - } else { \ - ssim = highbd_ssim2(CONVERT_TO_BYTEPTR(buf0), CONVERT_TO_BYTEPTR(buf1), w, \ - w, w, h, bit_depth, bit_depth - 8); \ - psnr = calc_plane_error16(CAST_TO_SHORTPTR(buf0), w, \ - CAST_TO_SHORTPTR(buf1), w, w, h); \ - } -#else -#define psnr_and_ssim(ssim, psnr, buf0, buf1, w, h) \ - ssim = ssim2(buf0, buf1, w, w, w, h); \ - psnr = calc_plane_error(buf0, w, buf1, w, w, h); -#endif - - if (n_frames == allocated_frames) { - allocated_frames = allocated_frames == 0 ? 1024 : allocated_frames * 2; - ssimy = realloc(ssimy, allocated_frames * sizeof(*ssimy)); - ssimu = realloc(ssimu, allocated_frames * sizeof(*ssimu)); - ssimv = realloc(ssimv, allocated_frames * sizeof(*ssimv)); - psnry = realloc(psnry, allocated_frames * sizeof(*psnry)); - psnru = realloc(psnru, allocated_frames * sizeof(*psnru)); - psnrv = realloc(psnrv, allocated_frames * sizeof(*psnrv)); - } - psnr_and_ssim(ssimy[n_frames], psnry[n_frames], y[0], y[1], w, h); - psnr_and_ssim(ssimu[n_frames], psnru[n_frames], u[0], u[1], w / 2, h / 2); - psnr_and_ssim(ssimv[n_frames], psnrv[n_frames], v[0], v[1], w / 2, h / 2); - - n_frames++; - } - - if (framestats) { - fprintf(framestats, - "ssim,ssim-y,ssim-u,ssim-v,psnr,psnr-y,psnr-u,psnr-v\n"); - } - - for (i = 0; i < n_frames; ++i) { - double frame_ssim; - double frame_psnr, frame_psnry, frame_psnru, frame_psnrv; - - frame_ssim = 0.8 * ssimy[i] + 0.1 * (ssimu[i] + ssimv[i]); - ssimavg += frame_ssim; - ssimyavg += ssimy[i]; - ssimuavg += ssimu[i]; - ssimvavg += ssimv[i]; - - frame_psnr = - mse2psnr(w * h * 6 / 4, peak, (double)psnry[i] + psnru[i] + psnrv[i]); - frame_psnry = mse2psnr(w * h * 4 / 4, peak, (double)psnry[i]); - frame_psnru = mse2psnr(w * h * 1 / 4, peak, (double)psnru[i]); - frame_psnrv = mse2psnr(w * h * 1 / 4, peak, (double)psnrv[i]); - - psnravg += frame_psnr; - psnryavg += frame_psnry; - psnruavg += frame_psnru; - psnrvavg += frame_psnrv; - - psnryglb += psnry[i]; - psnruglb += psnru[i]; - psnrvglb += psnrv[i]; - - if (framestats) { - fprintf(framestats, "%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf\n", frame_ssim, - ssimy[i], ssimu[i], ssimv[i], frame_psnr, frame_psnry, - frame_psnru, frame_psnrv); - } - } - - ssimavg /= n_frames; - ssimyavg /= n_frames; - ssimuavg /= n_frames; - ssimvavg /= n_frames; - - printf("VpxSSIM: %lf\n", 100 * pow(ssimavg, 8.0)); - printf("SSIM: %lf\n", ssimavg); - printf("SSIM-Y: %lf\n", ssimyavg); - printf("SSIM-U: %lf\n", ssimuavg); - printf("SSIM-V: %lf\n", ssimvavg); - puts(""); - - psnravg /= n_frames; - psnryavg /= n_frames; - psnruavg /= n_frames; - psnrvavg /= n_frames; - - printf("AvgPSNR: %lf\n", psnravg); - printf("AvgPSNR-Y: %lf\n", psnryavg); - printf("AvgPSNR-U: %lf\n", psnruavg); - printf("AvgPSNR-V: %lf\n", psnrvavg); - puts(""); - - psnrglb = psnryglb + psnruglb + psnrvglb; - psnrglb = mse2psnr((double)n_frames * w * h * 6 / 4, peak, psnrglb); - psnryglb = mse2psnr((double)n_frames * w * h * 4 / 4, peak, psnryglb); - psnruglb = mse2psnr((double)n_frames * w * h * 1 / 4, peak, psnruglb); - psnrvglb = mse2psnr((double)n_frames * w * h * 1 / 4, peak, psnrvglb); - - printf("GlbPSNR: %lf\n", psnrglb); - printf("GlbPSNR-Y: %lf\n", psnryglb); - printf("GlbPSNR-U: %lf\n", psnruglb); - printf("GlbPSNR-V: %lf\n", psnrvglb); - puts(""); - - printf("Nframes: %d\n", (int)n_frames); - -clean_up: - - close_input_file(&in[0]); - close_input_file(&in[1]); - - if (framestats) fclose(framestats); - - free(ssimy); - free(ssimu); - free(ssimv); - - free(psnry); - free(psnru); - free(psnrv); - - return return_value; -} diff --git a/media/libvpx/libvpx/tools/wrap-commit-msg.py b/media/libvpx/libvpx/tools/wrap-commit-msg.py deleted file mode 100755 index d5b4b046b137..000000000000 --- a/media/libvpx/libvpx/tools/wrap-commit-msg.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python -## Copyright (c) 2012 The WebM project authors. All Rights Reserved. -## -## Use of this source code is governed by a BSD-style license -## that can be found in the LICENSE file in the root of the source -## tree. An additional intellectual property rights grant can be found -## in the file PATENTS. All contributing project authors may -## be found in the AUTHORS file in the root of the source tree. -## -"""Wraps paragraphs of text, preserving manual formatting - -This is like fold(1), but has the special convention of not modifying lines -that start with whitespace. This allows you to intersperse blocks with -special formatting, like code blocks, with written prose. The prose will -be wordwrapped, and the manual formatting will be preserved. - - * This won't handle the case of a bulleted (or ordered) list specially, so - manual wrapping must be done. - -Occasionally it's useful to put something with explicit formatting that -doesn't look at all like a block of text inline. - - indicator = has_leading_whitespace(line); - if (indicator) - preserve_formatting(line); - -The intent is that this docstring would make it through the transform -and still be legible and presented as it is in the source. If additional -cases are handled, update this doc to describe the effect. -""" - -__author__ = "jkoleszar@google.com" -import textwrap -import sys - -def wrap(text): - if text: - return textwrap.fill(text, break_long_words=False) + '\n' - return "" - - -def main(fileobj): - text = "" - output = "" - while True: - line = fileobj.readline() - if not line: - break - - if line.lstrip() == line: - text += line - else: - output += wrap(text) - text="" - output += line - output += wrap(text) - - # Replace the file or write to stdout. - if fileobj == sys.stdin: - fileobj = sys.stdout - else: - fileobj.seek(0) - fileobj.truncate(0) - fileobj.write(output) - -if __name__ == "__main__": - if len(sys.argv) > 1: - main(open(sys.argv[1], "r+")) - else: - main(sys.stdin) diff --git a/media/libvpx/libvpx/tools_common.c b/media/libvpx/libvpx/tools_common.c index 6f14c2556173..59978b7f93ab 100644 --- a/media/libvpx/libvpx/tools_common.c +++ b/media/libvpx/libvpx/tools_common.c @@ -46,6 +46,14 @@ va_end(ap); \ } while (0) +#if CONFIG_ENCODERS +/* Swallow warnings about unused results of fread/fwrite */ +static size_t wrap_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { + return fread(ptr, size, nmemb, stream); +} +#define fread wrap_fread +#endif + FILE *set_binary_mode(FILE *stream) { (void)stream; #if defined(_WIN32) || defined(__OS2__) @@ -200,8 +208,6 @@ const VpxInterface *get_vpx_decoder_by_fourcc(uint32_t fourcc) { #endif // CONFIG_DECODERS -// TODO(dkovalev): move this function to vpx_image.{c, h}, so it will be part -// of vpx_image_t support int vpx_img_plane_width(const vpx_image_t *img, int plane) { if (plane > 0 && img->x_chroma_shift > 0) return (img->d_w + 1) >> img->x_chroma_shift; @@ -266,6 +272,88 @@ double sse_to_psnr(double samples, double peak, double sse) { } } +#if CONFIG_ENCODERS +int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) { + FILE *f = input_ctx->file; + y4m_input *y4m = &input_ctx->y4m; + int shortread = 0; + + if (input_ctx->file_type == FILE_TYPE_Y4M) { + if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0; + } else { + shortread = read_yuv_frame(input_ctx, img); + } + + return !shortread; +} + +int file_is_y4m(const char detect[4]) { + if (memcmp(detect, "YUV4", 4) == 0) { + return 1; + } + return 0; +} + +int fourcc_is_ivf(const char detect[4]) { + if (memcmp(detect, "DKIF", 4) == 0) { + return 1; + } + return 0; +} + +void open_input_file(struct VpxInputContext *input) { + /* Parse certain options from the input file, if possible */ + input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb") + : set_binary_mode(stdin); + + if (!input->file) fatal("Failed to open input file"); + + if (!fseeko(input->file, 0, SEEK_END)) { + /* Input file is seekable. Figure out how long it is, so we can get + * progress info. + */ + input->length = ftello(input->file); + rewind(input->file); + } + + /* Default to 1:1 pixel aspect ratio. */ + input->pixel_aspect_ratio.numerator = 1; + input->pixel_aspect_ratio.denominator = 1; + + /* For RAW input sources, these bytes will applied on the first frame + * in read_frame(). + */ + input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file); + input->detect.position = 0; + + if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) { + if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, + input->only_i420) >= 0) { + input->file_type = FILE_TYPE_Y4M; + input->width = input->y4m.pic_w; + input->height = input->y4m.pic_h; + input->pixel_aspect_ratio.numerator = input->y4m.par_n; + input->pixel_aspect_ratio.denominator = input->y4m.par_d; + input->framerate.numerator = input->y4m.fps_n; + input->framerate.denominator = input->y4m.fps_d; + input->fmt = input->y4m.vpx_fmt; + input->bit_depth = input->y4m.bit_depth; + } else { + fatal("Unsupported Y4M stream."); + } + } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) { + fatal("IVF is not supported as input."); + } else { + input->file_type = FILE_TYPE_RAW; + } +} + +void close_input_file(struct VpxInputContext *input) { + fclose(input->file); + if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m); +} +#endif + // TODO(debargha): Consolidate the functions below into a separate file. #if CONFIG_VP9_HIGHBITDEPTH static void highbd_img_upshift(vpx_image_t *dst, vpx_image_t *src, @@ -459,3 +547,225 @@ void vpx_img_downshift(vpx_image_t *dst, vpx_image_t *src, int down_shift) { } } #endif // CONFIG_VP9_HIGHBITDEPTH + +int compare_img(const vpx_image_t *const img1, const vpx_image_t *const img2) { + uint32_t l_w = img1->d_w; + uint32_t c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; + const uint32_t c_h = + (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; + uint32_t i; + int match = 1; + + match &= (img1->fmt == img2->fmt); + match &= (img1->d_w == img2->d_w); + match &= (img1->d_h == img2->d_h); +#if CONFIG_VP9_HIGHBITDEPTH + if (img1->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + l_w *= 2; + c_w *= 2; + } +#endif + + for (i = 0; i < img1->d_h; ++i) + match &= (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y], + img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y], + l_w) == 0); + + for (i = 0; i < c_h; ++i) + match &= (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U], + img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U], + c_w) == 0); + + for (i = 0; i < c_h; ++i) + match &= (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V], + img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V], + c_w) == 0); + + return match; +} + +#define mmin(a, b) ((a) < (b) ? (a) : (b)) + +#if CONFIG_VP9_HIGHBITDEPTH +void find_mismatch_high(const vpx_image_t *const img1, + const vpx_image_t *const img2, int yloc[4], int uloc[4], + int vloc[4]) { + uint16_t *plane1, *plane2; + uint32_t stride1, stride2; + const uint32_t bsize = 64; + const uint32_t bsizey = bsize >> img1->y_chroma_shift; + const uint32_t bsizex = bsize >> img1->x_chroma_shift; + const uint32_t c_w = + (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; + const uint32_t c_h = + (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; + int match = 1; + uint32_t i, j; + yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; + plane1 = (uint16_t *)img1->planes[VPX_PLANE_Y]; + plane2 = (uint16_t *)img2->planes[VPX_PLANE_Y]; + stride1 = img1->stride[VPX_PLANE_Y] / 2; + stride2 = img2->stride[VPX_PLANE_Y] / 2; + for (i = 0, match = 1; match && i < img1->d_h; i += bsize) { + for (j = 0; match && j < img1->d_w; j += bsize) { + int k, l; + const int si = mmin(i + bsize, img1->d_h) - i; + const int sj = mmin(j + bsize, img1->d_w) - j; + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(plane1 + (i + k) * stride1 + j + l) != + *(plane2 + (i + k) * stride2 + j + l)) { + yloc[0] = i + k; + yloc[1] = j + l; + yloc[2] = *(plane1 + (i + k) * stride1 + j + l); + yloc[3] = *(plane2 + (i + k) * stride2 + j + l); + match = 0; + break; + } + } + } + } + } + + uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; + plane1 = (uint16_t *)img1->planes[VPX_PLANE_U]; + plane2 = (uint16_t *)img2->planes[VPX_PLANE_U]; + stride1 = img1->stride[VPX_PLANE_U] / 2; + stride2 = img2->stride[VPX_PLANE_U] / 2; + for (i = 0, match = 1; match && i < c_h; i += bsizey) { + for (j = 0; match && j < c_w; j += bsizex) { + int k, l; + const int si = mmin(i + bsizey, c_h - i); + const int sj = mmin(j + bsizex, c_w - j); + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(plane1 + (i + k) * stride1 + j + l) != + *(plane2 + (i + k) * stride2 + j + l)) { + uloc[0] = i + k; + uloc[1] = j + l; + uloc[2] = *(plane1 + (i + k) * stride1 + j + l); + uloc[3] = *(plane2 + (i + k) * stride2 + j + l); + match = 0; + break; + } + } + } + } + } + + vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; + plane1 = (uint16_t *)img1->planes[VPX_PLANE_V]; + plane2 = (uint16_t *)img2->planes[VPX_PLANE_V]; + stride1 = img1->stride[VPX_PLANE_V] / 2; + stride2 = img2->stride[VPX_PLANE_V] / 2; + for (i = 0, match = 1; match && i < c_h; i += bsizey) { + for (j = 0; match && j < c_w; j += bsizex) { + int k, l; + const int si = mmin(i + bsizey, c_h - i); + const int sj = mmin(j + bsizex, c_w - j); + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(plane1 + (i + k) * stride1 + j + l) != + *(plane2 + (i + k) * stride2 + j + l)) { + vloc[0] = i + k; + vloc[1] = j + l; + vloc[2] = *(plane1 + (i + k) * stride1 + j + l); + vloc[3] = *(plane2 + (i + k) * stride2 + j + l); + match = 0; + break; + } + } + } + } + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +void find_mismatch(const vpx_image_t *const img1, const vpx_image_t *const img2, + int yloc[4], int uloc[4], int vloc[4]) { + const uint32_t bsize = 64; + const uint32_t bsizey = bsize >> img1->y_chroma_shift; + const uint32_t bsizex = bsize >> img1->x_chroma_shift; + const uint32_t c_w = + (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; + const uint32_t c_h = + (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; + int match = 1; + uint32_t i, j; + yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; + for (i = 0, match = 1; match && i < img1->d_h; i += bsize) { + for (j = 0; match && j < img1->d_w; j += bsize) { + int k, l; + const int si = mmin(i + bsize, img1->d_h) - i; + const int sj = mmin(j + bsize, img1->d_w) - j; + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(img1->planes[VPX_PLANE_Y] + + (i + k) * img1->stride[VPX_PLANE_Y] + j + l) != + *(img2->planes[VPX_PLANE_Y] + + (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) { + yloc[0] = i + k; + yloc[1] = j + l; + yloc[2] = *(img1->planes[VPX_PLANE_Y] + + (i + k) * img1->stride[VPX_PLANE_Y] + j + l); + yloc[3] = *(img2->planes[VPX_PLANE_Y] + + (i + k) * img2->stride[VPX_PLANE_Y] + j + l); + match = 0; + break; + } + } + } + } + } + + uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; + for (i = 0, match = 1; match && i < c_h; i += bsizey) { + for (j = 0; match && j < c_w; j += bsizex) { + int k, l; + const int si = mmin(i + bsizey, c_h - i); + const int sj = mmin(j + bsizex, c_w - j); + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(img1->planes[VPX_PLANE_U] + + (i + k) * img1->stride[VPX_PLANE_U] + j + l) != + *(img2->planes[VPX_PLANE_U] + + (i + k) * img2->stride[VPX_PLANE_U] + j + l)) { + uloc[0] = i + k; + uloc[1] = j + l; + uloc[2] = *(img1->planes[VPX_PLANE_U] + + (i + k) * img1->stride[VPX_PLANE_U] + j + l); + uloc[3] = *(img2->planes[VPX_PLANE_U] + + (i + k) * img2->stride[VPX_PLANE_U] + j + l); + match = 0; + break; + } + } + } + } + } + vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; + for (i = 0, match = 1; match && i < c_h; i += bsizey) { + for (j = 0; match && j < c_w; j += bsizex) { + int k, l; + const int si = mmin(i + bsizey, c_h - i); + const int sj = mmin(j + bsizex, c_w - j); + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(img1->planes[VPX_PLANE_V] + + (i + k) * img1->stride[VPX_PLANE_V] + j + l) != + *(img2->planes[VPX_PLANE_V] + + (i + k) * img2->stride[VPX_PLANE_V] + j + l)) { + vloc[0] = i + k; + vloc[1] = j + l; + vloc[2] = *(img1->planes[VPX_PLANE_V] + + (i + k) * img1->stride[VPX_PLANE_V] + j + l); + vloc[3] = *(img2->planes[VPX_PLANE_V] + + (i + k) * img2->stride[VPX_PLANE_V] + j + l); + match = 0; + break; + } + } + } + } + } +} diff --git a/media/libvpx/libvpx/tools_common.h b/media/libvpx/libvpx/tools_common.h index e41de3195f57..4526d9f165c0 100644 --- a/media/libvpx/libvpx/tools_common.h +++ b/media/libvpx/libvpx/tools_common.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef TOOLS_COMMON_H_ -#define TOOLS_COMMON_H_ +#ifndef VPX_TOOLS_COMMON_H_ +#define VPX_TOOLS_COMMON_H_ #include @@ -33,6 +33,7 @@ typedef int64_t FileOffset; #define ftello ftello64 typedef off64_t FileOffset; #elif CONFIG_OS_SUPPORT +#include /* NOLINT */ typedef off_t FileOffset; /* Use 32-bit file operations in WebM file format when building ARM * executables (.axf) with RVCT. */ @@ -144,8 +145,6 @@ const VpxInterface *get_vpx_decoder_by_index(int i); const VpxInterface *get_vpx_decoder_by_name(const char *name); const VpxInterface *get_vpx_decoder_by_fourcc(uint32_t fourcc); -// TODO(dkovalev): move this function to vpx_image.{c, h}, so it will be part -// of vpx_image_t support int vpx_img_plane_width(const vpx_image_t *img, int plane); int vpx_img_plane_height(const vpx_image_t *img, int plane); void vpx_img_write(const vpx_image_t *img, FILE *file); @@ -153,14 +152,31 @@ int vpx_img_read(vpx_image_t *img, FILE *file); double sse_to_psnr(double samples, double peak, double mse); +#if CONFIG_ENCODERS +int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img); +int file_is_y4m(const char detect[4]); +int fourcc_is_ivf(const char detect[4]); +void open_input_file(struct VpxInputContext *input); +void close_input_file(struct VpxInputContext *input); +#endif + #if CONFIG_VP9_HIGHBITDEPTH void vpx_img_upshift(vpx_image_t *dst, vpx_image_t *src, int input_shift); void vpx_img_downshift(vpx_image_t *dst, vpx_image_t *src, int down_shift); void vpx_img_truncate_16_to_8(vpx_image_t *dst, vpx_image_t *src); #endif +int compare_img(const vpx_image_t *const img1, const vpx_image_t *const img2); +#if CONFIG_VP9_HIGHBITDEPTH +void find_mismatch_high(const vpx_image_t *const img1, + const vpx_image_t *const img2, int yloc[4], int uloc[4], + int vloc[4]); +#endif +void find_mismatch(const vpx_image_t *const img1, const vpx_image_t *const img2, + int yloc[4], int uloc[4], int vloc[4]); + #ifdef __cplusplus } /* extern "C" */ #endif -#endif // TOOLS_COMMON_H_ +#endif // VPX_TOOLS_COMMON_H_ diff --git a/media/libvpx/libvpx/usage_cx.dox b/media/libvpx/libvpx/usage_cx.dox index 92b0d34ef4d5..b2220cfddebb 100644 --- a/media/libvpx/libvpx/usage_cx.dox +++ b/media/libvpx/libvpx/usage_cx.dox @@ -8,6 +8,8 @@ \ref usage_deadline. + \if samples \ref samples + \endif */ diff --git a/media/libvpx/libvpx/usage_dx.dox b/media/libvpx/libvpx/usage_dx.dox index 883ce24926f0..85063f705b6c 100644 --- a/media/libvpx/libvpx/usage_dx.dox +++ b/media/libvpx/libvpx/usage_dx.dox @@ -11,7 +11,9 @@ \ref usage_postproc based on the amount of free CPU time. For more information on the deadline parameter, see \ref usage_deadline. + \if samples \ref samples + \endif \section usage_cb Callback Based Decoding diff --git a/media/libvpx/libvpx/video_common.h b/media/libvpx/libvpx/video_common.h index 44b27a839019..77eb9fac0cd1 100644 --- a/media/libvpx/libvpx/video_common.h +++ b/media/libvpx/libvpx/video_common.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VIDEO_COMMON_H_ -#define VIDEO_COMMON_H_ +#ifndef VPX_VIDEO_COMMON_H_ +#define VPX_VIDEO_COMMON_H_ #include "./tools_common.h" @@ -20,4 +20,4 @@ typedef struct { struct VpxRational time_base; } VpxVideoInfo; -#endif // VIDEO_COMMON_H_ +#endif // VPX_VIDEO_COMMON_H_ diff --git a/media/libvpx/libvpx/video_reader.c b/media/libvpx/libvpx/video_reader.c index a0ba2521c613..16822eff3c13 100644 --- a/media/libvpx/libvpx/video_reader.c +++ b/media/libvpx/libvpx/video_reader.c @@ -30,17 +30,37 @@ VpxVideoReader *vpx_video_reader_open(const char *filename) { char header[32]; VpxVideoReader *reader = NULL; FILE *const file = fopen(filename, "rb"); - if (!file) return NULL; // Can't open file + if (!file) { + fprintf(stderr, "%s can't be opened.\n", filename); // Can't open file + return NULL; + } - if (fread(header, 1, 32, file) != 32) return NULL; // Can't read file header + if (fread(header, 1, 32, file) != 32) { + fprintf(stderr, "File header on %s can't be read.\n", + filename); // Can't read file header + return NULL; + } + if (memcmp(kIVFSignature, header, 4) != 0) { + fprintf(stderr, "The IVF signature on %s is wrong.\n", + filename); // Wrong IVF signature - if (memcmp(kIVFSignature, header, 4) != 0) - return NULL; // Wrong IVF signature + return NULL; + } + if (mem_get_le16(header + 4) != 0) { + fprintf(stderr, "%s uses the wrong IVF version.\n", + filename); // Wrong IVF version - if (mem_get_le16(header + 4) != 0) return NULL; // Wrong IVF version + return NULL; + } reader = calloc(1, sizeof(*reader)); - if (!reader) return NULL; // Can't allocate VpxVideoReader + if (!reader) { + fprintf( + stderr, + "Can't allocate VpxVideoReader\n"); // Can't allocate VpxVideoReader + + return NULL; + } reader->file = file; reader->info.codec_fourcc = mem_get_le32(header + 8); diff --git a/media/libvpx/libvpx/video_reader.h b/media/libvpx/libvpx/video_reader.h index 73c25b00a7d9..1f5c8088bb85 100644 --- a/media/libvpx/libvpx/video_reader.h +++ b/media/libvpx/libvpx/video_reader.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VIDEO_READER_H_ -#define VIDEO_READER_H_ +#ifndef VPX_VIDEO_READER_H_ +#define VPX_VIDEO_READER_H_ #include "./video_common.h" @@ -48,4 +48,4 @@ const VpxVideoInfo *vpx_video_reader_get_info(VpxVideoReader *reader); } // extern "C" #endif -#endif // VIDEO_READER_H_ +#endif // VPX_VIDEO_READER_H_ diff --git a/media/libvpx/libvpx/video_writer.c b/media/libvpx/libvpx/video_writer.c index 56d428b0720f..6e9a848bc3db 100644 --- a/media/libvpx/libvpx/video_writer.c +++ b/media/libvpx/libvpx/video_writer.c @@ -37,11 +37,15 @@ VpxVideoWriter *vpx_video_writer_open(const char *filename, if (container == kContainerIVF) { VpxVideoWriter *writer = NULL; FILE *const file = fopen(filename, "wb"); - if (!file) return NULL; - + if (!file) { + fprintf(stderr, "%s can't be written to.\n", filename); + return NULL; + } writer = malloc(sizeof(*writer)); - if (!writer) return NULL; - + if (!writer) { + fprintf(stderr, "Can't allocate VpxVideoWriter.\n"); + return NULL; + } writer->frame_count = 0; writer->info = *info; writer->file = file; @@ -50,7 +54,7 @@ VpxVideoWriter *vpx_video_writer_open(const char *filename, return writer; } - + fprintf(stderr, "VpxVideoWriter supports only IVF.\n"); return NULL; } diff --git a/media/libvpx/libvpx/video_writer.h b/media/libvpx/libvpx/video_writer.h index a769811c4404..b4d242b920f4 100644 --- a/media/libvpx/libvpx/video_writer.h +++ b/media/libvpx/libvpx/video_writer.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VIDEO_WRITER_H_ -#define VIDEO_WRITER_H_ +#ifndef VPX_VIDEO_WRITER_H_ +#define VPX_VIDEO_WRITER_H_ #include "./video_common.h" @@ -41,4 +41,4 @@ int vpx_video_writer_write_frame(VpxVideoWriter *writer, const uint8_t *buffer, } // extern "C" #endif -#endif // VIDEO_WRITER_H_ +#endif // VPX_VIDEO_WRITER_H_ diff --git a/media/libvpx/libvpx/vp8/common/alloccommon.h b/media/libvpx/libvpx/vp8/common/alloccommon.h index 5d0840c670cd..2d376bbac312 100644 --- a/media/libvpx/libvpx/vp8/common/alloccommon.h +++ b/media/libvpx/libvpx/vp8/common/alloccommon.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_ALLOCCOMMON_H_ -#define VP8_COMMON_ALLOCCOMMON_H_ +#ifndef VPX_VP8_COMMON_ALLOCCOMMON_H_ +#define VPX_VP8_COMMON_ALLOCCOMMON_H_ #include "onyxc_int.h" @@ -21,10 +21,10 @@ void vp8_create_common(VP8_COMMON *oci); void vp8_remove_common(VP8_COMMON *oci); void vp8_de_alloc_frame_buffers(VP8_COMMON *oci); int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height); -void vp8_setup_version(VP8_COMMON *oci); +void vp8_setup_version(VP8_COMMON *cm); #ifdef __cplusplus } // extern "C" #endif -#endif // VP8_COMMON_ALLOCCOMMON_H_ +#endif // VPX_VP8_COMMON_ALLOCCOMMON_H_ diff --git a/media/libvpx/libvpx/vp8/common/arm/loopfilter_arm.c b/media/libvpx/libvpx/vp8/common/arm/loopfilter_arm.c index e12f65a04299..48a19720483d 100644 --- a/media/libvpx/libvpx/vp8/common/arm/loopfilter_arm.c +++ b/media/libvpx/libvpx/vp8/common/arm/loopfilter_arm.c @@ -8,28 +8,12 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "vpx_config.h" -#include "vp8_rtcd.h" +#include "./vpx_config.h" +#include "./vp8_rtcd.h" +#include "vp8/common/arm/loopfilter_arm.h" #include "vp8/common/loopfilter.h" #include "vp8/common/onyxc_int.h" -typedef void loopfilter_y_neon(unsigned char *src, int pitch, - unsigned char blimit, unsigned char limit, - unsigned char thresh); -typedef void loopfilter_uv_neon(unsigned char *u, int pitch, - unsigned char blimit, unsigned char limit, - unsigned char thresh, unsigned char *v); - -extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon; -extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon; -extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon; -extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon; - -extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon; -extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon; -extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon; -extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon; - /* NEON loopfilter functions */ /* Horizontal MB filtering */ void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, diff --git a/media/libvpx/libvpx/vp8/common/arm/loopfilter_arm.h b/media/libvpx/libvpx/vp8/common/arm/loopfilter_arm.h new file mode 100644 index 000000000000..6cf660d22897 --- /dev/null +++ b/media/libvpx/libvpx/vp8/common/arm/loopfilter_arm.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VP8_COMMON_ARM_LOOPFILTER_ARM_H_ +#define VPX_VP8_COMMON_ARM_LOOPFILTER_ARM_H_ + +typedef void loopfilter_y_neon(unsigned char *src, int pitch, + unsigned char blimit, unsigned char limit, + unsigned char thresh); +typedef void loopfilter_uv_neon(unsigned char *u, int pitch, + unsigned char blimit, unsigned char limit, + unsigned char thresh, unsigned char *v); + +loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon; +loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon; +loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon; +loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon; + +loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon; +loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon; +loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon; +loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon; + +#endif // VPX_VP8_COMMON_ARM_LOOPFILTER_ARM_H_ diff --git a/media/libvpx/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c b/media/libvpx/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c index 8520ab5ca016..590956dde1d2 100644 --- a/media/libvpx/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c +++ b/media/libvpx/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c @@ -10,7 +10,9 @@ #include #include + #include "./vpx_config.h" +#include "./vp8_rtcd.h" #include "vpx_dsp/arm/mem_neon.h" static const uint8_t bifilter4_coeff[8][2] = { { 128, 0 }, { 112, 16 }, diff --git a/media/libvpx/libvpx/vp8/common/arm/neon/copymem_neon.c b/media/libvpx/libvpx/vp8/common/arm/neon/copymem_neon.c index c1d293b58d81..c89b47d628b7 100644 --- a/media/libvpx/libvpx/vp8/common/arm/neon/copymem_neon.c +++ b/media/libvpx/libvpx/vp8/common/arm/neon/copymem_neon.c @@ -10,6 +10,8 @@ #include +#include "./vp8_rtcd.h" + void vp8_copy_mem8x4_neon(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) { uint8x8_t vtmp; diff --git a/media/libvpx/libvpx/vp8/common/arm/neon/dequantizeb_neon.c b/media/libvpx/libvpx/vp8/common/arm/neon/dequantizeb_neon.c index 6edff3c69f33..791aaea2ae68 100644 --- a/media/libvpx/libvpx/vp8/common/arm/neon/dequantizeb_neon.c +++ b/media/libvpx/libvpx/vp8/common/arm/neon/dequantizeb_neon.c @@ -10,6 +10,7 @@ #include +#include "./vp8_rtcd.h" #include "vp8/common/blockd.h" void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) { diff --git a/media/libvpx/libvpx/vp8/common/arm/neon/idct_blk_neon.c b/media/libvpx/libvpx/vp8/common/arm/neon/idct_blk_neon.c index d61dde86cf54..5c26ce67a434 100644 --- a/media/libvpx/libvpx/vp8/common/arm/neon/idct_blk_neon.c +++ b/media/libvpx/libvpx/vp8/common/arm/neon/idct_blk_neon.c @@ -8,15 +8,226 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "vpx_config.h" -#include "vp8_rtcd.h" +#include -/* place these declarations here because we don't want to maintain them - * outside of this scope - */ -void idct_dequant_full_2x_neon(short *q, short *dq, unsigned char *dst, - int stride); -void idct_dequant_0_2x_neon(short *q, short dq, unsigned char *dst, int stride); +#include "./vp8_rtcd.h" + +static void idct_dequant_0_2x_neon(int16_t *q, int16_t dq, unsigned char *dst, + int stride) { + unsigned char *dst0; + int i, a0, a1; + int16x8x2_t q2Add; + int32x2_t d2s32 = vdup_n_s32(0), d4s32 = vdup_n_s32(0); + uint8x8_t d2u8, d4u8; + uint16x8_t q1u16, q2u16; + + a0 = ((q[0] * dq) + 4) >> 3; + a1 = ((q[16] * dq) + 4) >> 3; + q[0] = q[16] = 0; + q2Add.val[0] = vdupq_n_s16((int16_t)a0); + q2Add.val[1] = vdupq_n_s16((int16_t)a1); + + for (i = 0; i < 2; i++, dst += 4) { + dst0 = dst; + d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0); + dst0 += stride; + d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1); + dst0 += stride; + d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0); + dst0 += stride; + d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1); + + q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), + vreinterpret_u8_s32(d2s32)); + q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), + vreinterpret_u8_s32(d4s32)); + + d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); + d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16)); + + d2s32 = vreinterpret_s32_u8(d2u8); + d4s32 = vreinterpret_s32_u8(d4u8); + + dst0 = dst; + vst1_lane_s32((int32_t *)dst0, d2s32, 0); + dst0 += stride; + vst1_lane_s32((int32_t *)dst0, d2s32, 1); + dst0 += stride; + vst1_lane_s32((int32_t *)dst0, d4s32, 0); + dst0 += stride; + vst1_lane_s32((int32_t *)dst0, d4s32, 1); + } +} + +static const int16_t cospi8sqrt2minus1 = 20091; +static const int16_t sinpi8sqrt2 = 17734; +// because the lowest bit in 0x8a8c is 0, we can pre-shift this + +static void idct_dequant_full_2x_neon(int16_t *q, int16_t *dq, + unsigned char *dst, int stride) { + unsigned char *dst0, *dst1; + int32x2_t d28, d29, d30, d31; + int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11; + int16x8_t qEmpty = vdupq_n_s16(0); + int32x4x2_t q2tmp0, q2tmp1; + int16x8x2_t q2tmp2, q2tmp3; + int16x4_t dLow0, dLow1, dHigh0, dHigh1; + + d28 = d29 = d30 = d31 = vdup_n_s32(0); + + // load dq + q0 = vld1q_s16(dq); + dq += 8; + q1 = vld1q_s16(dq); + + // load q + q2 = vld1q_s16(q); + vst1q_s16(q, qEmpty); + q += 8; + q3 = vld1q_s16(q); + vst1q_s16(q, qEmpty); + q += 8; + q4 = vld1q_s16(q); + vst1q_s16(q, qEmpty); + q += 8; + q5 = vld1q_s16(q); + vst1q_s16(q, qEmpty); + + // load src from dst + dst0 = dst; + dst1 = dst + 4; + d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0); + dst0 += stride; + d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1); + dst1 += stride; + d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0); + dst0 += stride; + d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1); + dst1 += stride; + + d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0); + dst0 += stride; + d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1); + dst1 += stride; + d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0); + d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1); + + q2 = vmulq_s16(q2, q0); + q3 = vmulq_s16(q3, q1); + q4 = vmulq_s16(q4, q0); + q5 = vmulq_s16(q5, q1); + + // vswp + dLow0 = vget_low_s16(q2); + dHigh0 = vget_high_s16(q2); + dLow1 = vget_low_s16(q4); + dHigh1 = vget_high_s16(q4); + q2 = vcombine_s16(dLow0, dLow1); + q4 = vcombine_s16(dHigh0, dHigh1); + + dLow0 = vget_low_s16(q3); + dHigh0 = vget_high_s16(q3); + dLow1 = vget_low_s16(q5); + dHigh1 = vget_high_s16(q5); + q3 = vcombine_s16(dLow0, dLow1); + q5 = vcombine_s16(dHigh0, dHigh1); + + q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2); + q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2); + q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1); + q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1); + + q10 = vqaddq_s16(q2, q3); + q11 = vqsubq_s16(q2, q3); + + q8 = vshrq_n_s16(q8, 1); + q9 = vshrq_n_s16(q9, 1); + + q4 = vqaddq_s16(q4, q8); + q5 = vqaddq_s16(q5, q9); + + q2 = vqsubq_s16(q6, q5); + q3 = vqaddq_s16(q7, q4); + + q4 = vqaddq_s16(q10, q3); + q5 = vqaddq_s16(q11, q2); + q6 = vqsubq_s16(q11, q2); + q7 = vqsubq_s16(q10, q3); + + q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); + q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); + q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), + vreinterpretq_s16_s32(q2tmp1.val[0])); + q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), + vreinterpretq_s16_s32(q2tmp1.val[1])); + + // loop 2 + q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2); + q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2); + q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1); + q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1); + + q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]); + q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]); + + q10 = vshrq_n_s16(q10, 1); + q11 = vshrq_n_s16(q11, 1); + + q10 = vqaddq_s16(q2tmp2.val[1], q10); + q11 = vqaddq_s16(q2tmp3.val[1], q11); + + q8 = vqsubq_s16(q8, q11); + q9 = vqaddq_s16(q9, q10); + + q4 = vqaddq_s16(q2, q9); + q5 = vqaddq_s16(q3, q8); + q6 = vqsubq_s16(q3, q8); + q7 = vqsubq_s16(q2, q9); + + q4 = vrshrq_n_s16(q4, 3); + q5 = vrshrq_n_s16(q5, 3); + q6 = vrshrq_n_s16(q6, 3); + q7 = vrshrq_n_s16(q7, 3); + + q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); + q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); + q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), + vreinterpretq_s16_s32(q2tmp1.val[0])); + q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), + vreinterpretq_s16_s32(q2tmp1.val[1])); + + q4 = vreinterpretq_s16_u16( + vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]), vreinterpret_u8_s32(d28))); + q5 = vreinterpretq_s16_u16( + vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]), vreinterpret_u8_s32(d29))); + q6 = vreinterpretq_s16_u16( + vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]), vreinterpret_u8_s32(d30))); + q7 = vreinterpretq_s16_u16( + vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]), vreinterpret_u8_s32(d31))); + + d28 = vreinterpret_s32_u8(vqmovun_s16(q4)); + d29 = vreinterpret_s32_u8(vqmovun_s16(q5)); + d30 = vreinterpret_s32_u8(vqmovun_s16(q6)); + d31 = vreinterpret_s32_u8(vqmovun_s16(q7)); + + dst0 = dst; + dst1 = dst + 4; + vst1_lane_s32((int32_t *)dst0, d28, 0); + dst0 += stride; + vst1_lane_s32((int32_t *)dst1, d28, 1); + dst1 += stride; + vst1_lane_s32((int32_t *)dst0, d29, 0); + dst0 += stride; + vst1_lane_s32((int32_t *)dst1, d29, 1); + dst1 += stride; + + vst1_lane_s32((int32_t *)dst0, d30, 0); + dst0 += stride; + vst1_lane_s32((int32_t *)dst1, d30, 1); + dst1 += stride; + vst1_lane_s32((int32_t *)dst0, d31, 0); + vst1_lane_s32((int32_t *)dst1, d31, 1); +} void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, unsigned char *dst, int stride, char *eobs) { @@ -43,42 +254,42 @@ void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, unsigned char *dst, } void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq, - unsigned char *dstu, - unsigned char *dstv, int stride, + unsigned char *dst_u, + unsigned char *dst_v, int stride, char *eobs) { if (((short *)(eobs))[0]) { if (((short *)eobs)[0] & 0xfefe) - idct_dequant_full_2x_neon(q, dq, dstu, stride); + idct_dequant_full_2x_neon(q, dq, dst_u, stride); else - idct_dequant_0_2x_neon(q, dq[0], dstu, stride); + idct_dequant_0_2x_neon(q, dq[0], dst_u, stride); } q += 32; - dstu += 4 * stride; + dst_u += 4 * stride; if (((short *)(eobs))[1]) { if (((short *)eobs)[1] & 0xfefe) - idct_dequant_full_2x_neon(q, dq, dstu, stride); + idct_dequant_full_2x_neon(q, dq, dst_u, stride); else - idct_dequant_0_2x_neon(q, dq[0], dstu, stride); + idct_dequant_0_2x_neon(q, dq[0], dst_u, stride); } q += 32; if (((short *)(eobs))[2]) { if (((short *)eobs)[2] & 0xfefe) - idct_dequant_full_2x_neon(q, dq, dstv, stride); + idct_dequant_full_2x_neon(q, dq, dst_v, stride); else - idct_dequant_0_2x_neon(q, dq[0], dstv, stride); + idct_dequant_0_2x_neon(q, dq[0], dst_v, stride); } q += 32; - dstv += 4 * stride; + dst_v += 4 * stride; if (((short *)(eobs))[3]) { if (((short *)eobs)[3] & 0xfefe) - idct_dequant_full_2x_neon(q, dq, dstv, stride); + idct_dequant_full_2x_neon(q, dq, dst_v, stride); else - idct_dequant_0_2x_neon(q, dq[0], dstv, stride); + idct_dequant_0_2x_neon(q, dq[0], dst_v, stride); } } diff --git a/media/libvpx/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c b/media/libvpx/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c deleted file mode 100644 index c83102a5cc69..000000000000 --- a/media/libvpx/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -void idct_dequant_0_2x_neon(int16_t *q, int16_t dq, unsigned char *dst, - int stride) { - unsigned char *dst0; - int i, a0, a1; - int16x8x2_t q2Add; - int32x2_t d2s32 = vdup_n_s32(0), d4s32 = vdup_n_s32(0); - uint8x8_t d2u8, d4u8; - uint16x8_t q1u16, q2u16; - - a0 = ((q[0] * dq) + 4) >> 3; - a1 = ((q[16] * dq) + 4) >> 3; - q[0] = q[16] = 0; - q2Add.val[0] = vdupq_n_s16((int16_t)a0); - q2Add.val[1] = vdupq_n_s16((int16_t)a1); - - for (i = 0; i < 2; i++, dst += 4) { - dst0 = dst; - d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0); - dst0 += stride; - d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1); - dst0 += stride; - d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0); - dst0 += stride; - d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1); - - q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), - vreinterpret_u8_s32(d2s32)); - q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), - vreinterpret_u8_s32(d4s32)); - - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); - d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16)); - - d2s32 = vreinterpret_s32_u8(d2u8); - d4s32 = vreinterpret_s32_u8(d4u8); - - dst0 = dst; - vst1_lane_s32((int32_t *)dst0, d2s32, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d2s32, 1); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d4s32, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d4s32, 1); - } - return; -} diff --git a/media/libvpx/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c b/media/libvpx/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c deleted file mode 100644 index f30671cc3f18..000000000000 --- a/media/libvpx/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -static const int16_t cospi8sqrt2minus1 = 20091; -static const int16_t sinpi8sqrt2 = 17734; -// because the lowest bit in 0x8a8c is 0, we can pre-shift this - -void idct_dequant_full_2x_neon(int16_t *q, int16_t *dq, unsigned char *dst, - int stride) { - unsigned char *dst0, *dst1; - int32x2_t d28, d29, d30, d31; - int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11; - int16x8_t qEmpty = vdupq_n_s16(0); - int32x4x2_t q2tmp0, q2tmp1; - int16x8x2_t q2tmp2, q2tmp3; - int16x4_t dLow0, dLow1, dHigh0, dHigh1; - - d28 = d29 = d30 = d31 = vdup_n_s32(0); - - // load dq - q0 = vld1q_s16(dq); - dq += 8; - q1 = vld1q_s16(dq); - - // load q - q2 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q3 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q4 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q5 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - - // load src from dst - dst0 = dst; - dst1 = dst + 4; - d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0); - dst0 += stride; - d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1); - dst1 += stride; - d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0); - dst0 += stride; - d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1); - dst1 += stride; - - d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0); - dst0 += stride; - d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1); - dst1 += stride; - d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0); - d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1); - - q2 = vmulq_s16(q2, q0); - q3 = vmulq_s16(q3, q1); - q4 = vmulq_s16(q4, q0); - q5 = vmulq_s16(q5, q1); - - // vswp - dLow0 = vget_low_s16(q2); - dHigh0 = vget_high_s16(q2); - dLow1 = vget_low_s16(q4); - dHigh1 = vget_high_s16(q4); - q2 = vcombine_s16(dLow0, dLow1); - q4 = vcombine_s16(dHigh0, dHigh1); - - dLow0 = vget_low_s16(q3); - dHigh0 = vget_high_s16(q3); - dLow1 = vget_low_s16(q5); - dHigh1 = vget_high_s16(q5); - q3 = vcombine_s16(dLow0, dLow1); - q5 = vcombine_s16(dHigh0, dHigh1); - - q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2); - q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2); - q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1); - q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1); - - q10 = vqaddq_s16(q2, q3); - q11 = vqsubq_s16(q2, q3); - - q8 = vshrq_n_s16(q8, 1); - q9 = vshrq_n_s16(q9, 1); - - q4 = vqaddq_s16(q4, q8); - q5 = vqaddq_s16(q5, q9); - - q2 = vqsubq_s16(q6, q5); - q3 = vqaddq_s16(q7, q4); - - q4 = vqaddq_s16(q10, q3); - q5 = vqaddq_s16(q11, q2); - q6 = vqsubq_s16(q11, q2); - q7 = vqsubq_s16(q10, q3); - - q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); - q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); - q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), - vreinterpretq_s16_s32(q2tmp1.val[0])); - q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), - vreinterpretq_s16_s32(q2tmp1.val[1])); - - // loop 2 - q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2); - q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2); - q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1); - q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1); - - q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]); - q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]); - - q10 = vshrq_n_s16(q10, 1); - q11 = vshrq_n_s16(q11, 1); - - q10 = vqaddq_s16(q2tmp2.val[1], q10); - q11 = vqaddq_s16(q2tmp3.val[1], q11); - - q8 = vqsubq_s16(q8, q11); - q9 = vqaddq_s16(q9, q10); - - q4 = vqaddq_s16(q2, q9); - q5 = vqaddq_s16(q3, q8); - q6 = vqsubq_s16(q3, q8); - q7 = vqsubq_s16(q2, q9); - - q4 = vrshrq_n_s16(q4, 3); - q5 = vrshrq_n_s16(q5, 3); - q6 = vrshrq_n_s16(q6, 3); - q7 = vrshrq_n_s16(q7, 3); - - q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); - q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); - q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), - vreinterpretq_s16_s32(q2tmp1.val[0])); - q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), - vreinterpretq_s16_s32(q2tmp1.val[1])); - - q4 = vreinterpretq_s16_u16( - vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]), vreinterpret_u8_s32(d28))); - q5 = vreinterpretq_s16_u16( - vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]), vreinterpret_u8_s32(d29))); - q6 = vreinterpretq_s16_u16( - vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]), vreinterpret_u8_s32(d30))); - q7 = vreinterpretq_s16_u16( - vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]), vreinterpret_u8_s32(d31))); - - d28 = vreinterpret_s32_u8(vqmovun_s16(q4)); - d29 = vreinterpret_s32_u8(vqmovun_s16(q5)); - d30 = vreinterpret_s32_u8(vqmovun_s16(q6)); - d31 = vreinterpret_s32_u8(vqmovun_s16(q7)); - - dst0 = dst; - dst1 = dst + 4; - vst1_lane_s32((int32_t *)dst0, d28, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d28, 1); - dst1 += stride; - vst1_lane_s32((int32_t *)dst0, d29, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d29, 1); - dst1 += stride; - - vst1_lane_s32((int32_t *)dst0, d30, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d30, 1); - dst1 += stride; - vst1_lane_s32((int32_t *)dst0, d31, 0); - vst1_lane_s32((int32_t *)dst1, d31, 1); - return; -} diff --git a/media/libvpx/libvpx/vp8/common/arm/neon/iwalsh_neon.c b/media/libvpx/libvpx/vp8/common/arm/neon/iwalsh_neon.c index 6c4bcc134b2a..91600bfc005f 100644 --- a/media/libvpx/libvpx/vp8/common/arm/neon/iwalsh_neon.c +++ b/media/libvpx/libvpx/vp8/common/arm/neon/iwalsh_neon.c @@ -10,6 +10,8 @@ #include +#include "./vp8_rtcd.h" + void vp8_short_inv_walsh4x4_neon(int16_t *input, int16_t *mb_dqcoeff) { int16x8_t q0s16, q1s16, q2s16, q3s16; int16x4_t d4s16, d5s16, d6s16, d7s16; diff --git a/media/libvpx/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c b/media/libvpx/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c index a16821970577..df983b23a3df 100644 --- a/media/libvpx/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c +++ b/media/libvpx/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c @@ -9,7 +9,9 @@ */ #include + #include "./vpx_config.h" +#include "./vp8_rtcd.h" static INLINE void vp8_loop_filter_simple_horizontal_edge_neon( unsigned char *s, int p, const unsigned char *blimit) { diff --git a/media/libvpx/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c b/media/libvpx/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c index 80a222d24801..fbc83ae290d1 100644 --- a/media/libvpx/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c +++ b/media/libvpx/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c @@ -9,7 +9,9 @@ */ #include + #include "./vpx_config.h" +#include "./vp8_rtcd.h" #include "vpx_ports/arm.h" #ifdef VPX_INCOMPATIBLE_GCC diff --git a/media/libvpx/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c b/media/libvpx/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c index 65eec300ff39..fafaf2d451c4 100644 --- a/media/libvpx/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c +++ b/media/libvpx/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c @@ -9,7 +9,9 @@ */ #include + #include "./vpx_config.h" +#include "vp8/common/arm/loopfilter_arm.h" static INLINE void vp8_mbloop_filter_neon(uint8x16_t qblimit, // mblimit uint8x16_t qlimit, // limit diff --git a/media/libvpx/libvpx/vp8/common/arm/neon/sixtappredict_neon.c b/media/libvpx/libvpx/vp8/common/arm/neon/sixtappredict_neon.c index aa2567df792e..48e86d327814 100644 --- a/media/libvpx/libvpx/vp8/common/arm/neon/sixtappredict_neon.c +++ b/media/libvpx/libvpx/vp8/common/arm/neon/sixtappredict_neon.c @@ -11,6 +11,7 @@ #include #include #include "./vpx_config.h" +#include "./vp8_rtcd.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_ports/mem.h" diff --git a/media/libvpx/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c b/media/libvpx/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c index d7286739dad5..ebc004a04871 100644 --- a/media/libvpx/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c +++ b/media/libvpx/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c @@ -9,7 +9,9 @@ */ #include + #include "./vpx_config.h" +#include "vp8/common/arm/loopfilter_arm.h" #include "vpx_ports/arm.h" static INLINE void vp8_loop_filter_neon(uint8x16_t qblimit, // flimit diff --git a/media/libvpx/libvpx/vp8/common/blockd.c b/media/libvpx/libvpx/vp8/common/blockd.c index f47c5bae1586..22905c10a6f4 100644 --- a/media/libvpx/libvpx/vp8/common/blockd.c +++ b/media/libvpx/libvpx/vp8/common/blockd.c @@ -11,9 +11,9 @@ #include "blockd.h" #include "vpx_mem/vpx_mem.h" -const unsigned char vp8_block2left[25] = { - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 -}; -const unsigned char vp8_block2above[25] = { - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8 -}; +const unsigned char vp8_block2left[25] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, + 2, 2, 2, 3, 3, 3, 3, 4, 4, + 5, 5, 6, 6, 7, 7, 8 }; +const unsigned char vp8_block2above[25] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, + 1, 2, 3, 0, 1, 2, 3, 4, 5, + 4, 5, 6, 7, 6, 7, 8 }; diff --git a/media/libvpx/libvpx/vp8/common/blockd.h b/media/libvpx/libvpx/vp8/common/blockd.h index 1a3aad16afda..02abe053cb4f 100644 --- a/media/libvpx/libvpx/vp8/common/blockd.h +++ b/media/libvpx/libvpx/vp8/common/blockd.h @@ -8,11 +8,12 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_BLOCKD_H_ -#define VP8_COMMON_BLOCKD_H_ +#ifndef VPX_VP8_COMMON_BLOCKD_H_ +#define VPX_VP8_COMMON_BLOCKD_H_ void vpx_log(const char *format, ...); +#include "vpx/internal/vpx_codec_internal.h" #include "vpx_config.h" #include "vpx_scale/yv12config.h" #include "mv.h" @@ -37,7 +38,9 @@ extern "C" { #define SEGMENT_DELTADATA 0 #define SEGMENT_ABSDATA 1 -typedef struct { int r, c; } POS; +typedef struct { + int r, c; +} POS; #define PLANE_TYPE_Y_NO_DC 0 #define PLANE_TYPE_Y2 1 @@ -180,6 +183,9 @@ typedef struct { unsigned int low_res_ref_frames[MAX_REF_FRAMES]; // The video frame counter value for the key frame, for lowest resolution. unsigned int key_frame_counter_value; + // Flags to signal skipped encoding of previous and base layer stream. + unsigned int skip_encoding_prev_stream; + unsigned int skip_encoding_base_stream; LOWER_RES_MB_INFO *mb_info; } LOWER_RES_FRAME_INFO; #endif @@ -196,8 +202,9 @@ typedef struct blockd { union b_mode_info bmi; } BLOCKD; -typedef void (*vp8_subpix_fn_t)(unsigned char *src, int src_pitch, int xofst, - int yofst, unsigned char *dst, int dst_pitch); +typedef void (*vp8_subpix_fn_t)(unsigned char *src_ptr, int src_pixels_per_line, + int xoffset, int yoffset, + unsigned char *dst_ptr, int dst_pitch); typedef struct macroblockd { DECLARE_ALIGNED(16, unsigned char, predictor[384]); @@ -283,7 +290,9 @@ typedef struct macroblockd { int corrupted; -#if ARCH_X86 || ARCH_X86_64 + struct vpx_internal_error_info error_info; + +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 /* This is an intermediate buffer currently used in sub-pixel motion search * to keep a copy of the reference area. This buffer can be used for other * purpose. @@ -299,4 +308,4 @@ extern void vp8_setup_block_dptrs(MACROBLOCKD *x); } // extern "C" #endif -#endif // VP8_COMMON_BLOCKD_H_ +#endif // VPX_VP8_COMMON_BLOCKD_H_ diff --git a/media/libvpx/libvpx/vp8/common/coefupdateprobs.h b/media/libvpx/libvpx/vp8/common/coefupdateprobs.h index 9b01bba312d0..b342096b5523 100644 --- a/media/libvpx/libvpx/vp8/common/coefupdateprobs.h +++ b/media/libvpx/libvpx/vp8/common/coefupdateprobs.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_COEFUPDATEPROBS_H_ -#define VP8_COMMON_COEFUPDATEPROBS_H_ +#ifndef VPX_VP8_COMMON_COEFUPDATEPROBS_H_ +#define VPX_VP8_COMMON_COEFUPDATEPROBS_H_ #ifdef __cplusplus extern "C" { @@ -194,4 +194,4 @@ const vp8_prob vp8_coef_update_probs } // extern "C" #endif -#endif // VP8_COMMON_COEFUPDATEPROBS_H_ +#endif // VPX_VP8_COMMON_COEFUPDATEPROBS_H_ diff --git a/media/libvpx/libvpx/vp8/common/common.h b/media/libvpx/libvpx/vp8/common/common.h index bbfc4f39348f..2c30e8d6c519 100644 --- a/media/libvpx/libvpx/vp8/common/common.h +++ b/media/libvpx/libvpx/vp8/common/common.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_COMMON_H_ -#define VP8_COMMON_COMMON_H_ +#ifndef VPX_VP8_COMMON_COMMON_H_ +#define VPX_VP8_COMMON_COMMON_H_ #include @@ -31,18 +31,18 @@ extern "C" { /* Use this for variably-sized arrays. */ -#define vp8_copy_array(Dest, Src, N) \ - { \ - assert(sizeof(*Dest) == sizeof(*Src)); \ - memcpy(Dest, Src, N * sizeof(*Src)); \ +#define vp8_copy_array(Dest, Src, N) \ + { \ + assert(sizeof(*(Dest)) == sizeof(*(Src))); \ + memcpy(Dest, Src, (N) * sizeof(*(Src))); \ } -#define vp8_zero(Dest) memset(&Dest, 0, sizeof(Dest)); +#define vp8_zero(Dest) memset(&(Dest), 0, sizeof(Dest)); -#define vp8_zero_array(Dest, N) memset(Dest, 0, N * sizeof(*Dest)); +#define vp8_zero_array(Dest, N) memset(Dest, 0, (N) * sizeof(*(Dest))); #ifdef __cplusplus } // extern "C" #endif -#endif // VP8_COMMON_COMMON_H_ +#endif // VPX_VP8_COMMON_COMMON_H_ diff --git a/media/libvpx/libvpx/vp8/common/default_coef_probs.h b/media/libvpx/libvpx/vp8/common/default_coef_probs.h index 8c861ac87633..b25e4a45a3c6 100644 --- a/media/libvpx/libvpx/vp8/common/default_coef_probs.h +++ b/media/libvpx/libvpx/vp8/common/default_coef_probs.h @@ -6,10 +6,10 @@ * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. -*/ + */ -#ifndef VP8_COMMON_DEFAULT_COEF_PROBS_H_ -#define VP8_COMMON_DEFAULT_COEF_PROBS_H_ +#ifndef VPX_VP8_COMMON_DEFAULT_COEF_PROBS_H_ +#define VPX_VP8_COMMON_DEFAULT_COEF_PROBS_H_ #ifdef __cplusplus extern "C" { @@ -157,4 +157,4 @@ static const vp8_prob default_coef_probs } // extern "C" #endif -#endif // VP8_COMMON_DEFAULT_COEF_PROBS_H_ +#endif // VPX_VP8_COMMON_DEFAULT_COEF_PROBS_H_ diff --git a/media/libvpx/libvpx/vp8/common/entropy.c b/media/libvpx/libvpx/vp8/common/entropy.c index f61fa9e8e4b4..fc4a3539fdf3 100644 --- a/media/libvpx/libvpx/vp8/common/entropy.c +++ b/media/libvpx/libvpx/vp8/common/entropy.c @@ -28,9 +28,9 @@ DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]) = { - 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7 -}; +DECLARE_ALIGNED(16, const unsigned char, + vp8_coef_bands[16]) = { 0, 1, 2, 3, 6, 4, 5, 6, + 6, 6, 6, 6, 6, 6, 6, 7 }; DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]) = { @@ -41,9 +41,9 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15, }; -DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) = { - 1, 2, 6, 7, 3, 5, 8, 13, 4, 9, 12, 14, 10, 11, 15, 16 -}; +DECLARE_ALIGNED(16, const short, + vp8_default_inv_zig_zag[16]) = { 1, 2, 6, 7, 3, 5, 8, 13, + 4, 9, 12, 14, 10, 11, 15, 16 }; /* vp8_default_zig_zag_mask generated with: @@ -129,9 +129,9 @@ static const vp8_tree_index cat2[4] = { 2, 2, 0, 0 }; static const vp8_tree_index cat3[6] = { 2, 2, 4, 4, 0, 0 }; static const vp8_tree_index cat4[8] = { 2, 2, 4, 4, 6, 6, 0, 0 }; static const vp8_tree_index cat5[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 }; -static const vp8_tree_index cat6[22] = { - 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, 16, 16, 18, 18, 20, 20, 0, 0 -}; +static const vp8_tree_index cat6[22] = { 2, 2, 4, 4, 6, 6, 8, 8, + 10, 10, 12, 12, 14, 14, 16, 16, + 18, 18, 20, 20, 0, 0 }; const vp8_extra_bit_struct vp8_extra_bits[12] = { { 0, 0, 0, 0 }, { 0, 0, 0, 1 }, { 0, 0, 0, 2 }, diff --git a/media/libvpx/libvpx/vp8/common/entropy.h b/media/libvpx/libvpx/vp8/common/entropy.h index d088560011b0..fbdb7bcfca80 100644 --- a/media/libvpx/libvpx/vp8/common/entropy.h +++ b/media/libvpx/libvpx/vp8/common/entropy.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_ENTROPY_H_ -#define VP8_COMMON_ENTROPY_H_ +#ifndef VPX_VP8_COMMON_ENTROPY_H_ +#define VPX_VP8_COMMON_ENTROPY_H_ #include "treecoder.h" #include "blockd.h" @@ -105,4 +105,4 @@ void vp8_coef_tree_initialize(void); } // extern "C" #endif -#endif // VP8_COMMON_ENTROPY_H_ +#endif // VPX_VP8_COMMON_ENTROPY_H_ diff --git a/media/libvpx/libvpx/vp8/common/entropymode.c b/media/libvpx/libvpx/vp8/common/entropymode.c index 239492a8cb80..f61e0c2e2bc6 100644 --- a/media/libvpx/libvpx/vp8/common/entropymode.c +++ b/media/libvpx/libvpx/vp8/common/entropymode.c @@ -75,9 +75,9 @@ const vp8_tree_index vp8_ymode_tree[8] = { -DC_PRED, 2, 4, 6, -V_PRED, -H_PRED, -TM_PRED, -B_PRED }; -const vp8_tree_index vp8_kf_ymode_tree[8] = { - -B_PRED, 2, 4, 6, -DC_PRED, -V_PRED, -H_PRED, -TM_PRED -}; +const vp8_tree_index vp8_kf_ymode_tree[8] = { -B_PRED, 2, 4, + 6, -DC_PRED, -V_PRED, + -H_PRED, -TM_PRED }; const vp8_tree_index vp8_uv_mode_tree[6] = { -DC_PRED, 2, -V_PRED, 4, -H_PRED, -TM_PRED }; @@ -99,6 +99,6 @@ void vp8_init_mbmode_probs(VP8_COMMON *x) { memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob)); } -void vp8_default_bmode_probs(vp8_prob p[VP8_BINTRAMODES - 1]) { - memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob)); +void vp8_default_bmode_probs(vp8_prob dest[VP8_BINTRAMODES - 1]) { + memcpy(dest, vp8_bmode_prob, sizeof(vp8_bmode_prob)); } diff --git a/media/libvpx/libvpx/vp8/common/entropymode.h b/media/libvpx/libvpx/vp8/common/entropymode.h index b3fad19be0eb..c772cece573b 100644 --- a/media/libvpx/libvpx/vp8/common/entropymode.h +++ b/media/libvpx/libvpx/vp8/common/entropymode.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_ENTROPYMODE_H_ -#define VP8_COMMON_ENTROPYMODE_H_ +#ifndef VPX_VP8_COMMON_ENTROPYMODE_H_ +#define VPX_VP8_COMMON_ENTROPYMODE_H_ #include "onyxc_int.h" #include "treecoder.h" @@ -85,4 +85,4 @@ void vp8_kf_default_bmode_probs( } // extern "C" #endif -#endif // VP8_COMMON_ENTROPYMODE_H_ +#endif // VPX_VP8_COMMON_ENTROPYMODE_H_ diff --git a/media/libvpx/libvpx/vp8/common/entropymv.h b/media/libvpx/libvpx/vp8/common/entropymv.h index 63730009033d..40039f5b2c33 100644 --- a/media/libvpx/libvpx/vp8/common/entropymv.h +++ b/media/libvpx/libvpx/vp8/common/entropymv.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_ENTROPYMV_H_ -#define VP8_COMMON_ENTROPYMV_H_ +#ifndef VPX_VP8_COMMON_ENTROPYMV_H_ +#define VPX_VP8_COMMON_ENTROPYMV_H_ #include "treecoder.h" @@ -46,4 +46,4 @@ extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2]; } // extern "C" #endif -#endif // VP8_COMMON_ENTROPYMV_H_ +#endif // VPX_VP8_COMMON_ENTROPYMV_H_ diff --git a/media/libvpx/libvpx/vp8/common/extend.c b/media/libvpx/libvpx/vp8/common/extend.c index 2d67b516bef6..f4dbce2cd537 100644 --- a/media/libvpx/libvpx/vp8/common/extend.c +++ b/media/libvpx/libvpx/vp8/common/extend.c @@ -20,8 +20,7 @@ static void copy_and_extend_plane(unsigned char *s, /* source */ int et, /* extend top border */ int el, /* extend left border */ int eb, /* extend bottom border */ - int er /* extend right border */ - ) { + int er) { /* extend right border */ int i; unsigned char *src_ptr1, *src_ptr2; unsigned char *dest_ptr1, *dest_ptr2; diff --git a/media/libvpx/libvpx/vp8/common/extend.h b/media/libvpx/libvpx/vp8/common/extend.h index 7da5ce31dac7..586a38a4f377 100644 --- a/media/libvpx/libvpx/vp8/common/extend.h +++ b/media/libvpx/libvpx/vp8/common/extend.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_EXTEND_H_ -#define VP8_COMMON_EXTEND_H_ +#ifndef VPX_VP8_COMMON_EXTEND_H_ +#define VPX_VP8_COMMON_EXTEND_H_ #include "vpx_scale/yv12config.h" @@ -29,4 +29,4 @@ void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, } // extern "C" #endif -#endif // VP8_COMMON_EXTEND_H_ +#endif // VPX_VP8_COMMON_EXTEND_H_ diff --git a/media/libvpx/libvpx/vp8/common/filter.h b/media/libvpx/libvpx/vp8/common/filter.h index f1d5ece4a5f3..6acee22b21b8 100644 --- a/media/libvpx/libvpx/vp8/common/filter.h +++ b/media/libvpx/libvpx/vp8/common/filter.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_FILTER_H_ -#define VP8_COMMON_FILTER_H_ +#ifndef VPX_VP8_COMMON_FILTER_H_ +#define VPX_VP8_COMMON_FILTER_H_ #include "vpx_ports/mem.h" @@ -28,4 +28,4 @@ extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]); } // extern "C" #endif -#endif // VP8_COMMON_FILTER_H_ +#endif // VPX_VP8_COMMON_FILTER_H_ diff --git a/media/libvpx/libvpx/vp8/common/findnearmv.c b/media/libvpx/libvpx/vp8/common/findnearmv.c index f40d2c6bde5b..6889fdeddef9 100644 --- a/media/libvpx/libvpx/vp8/common/findnearmv.c +++ b/media/libvpx/libvpx/vp8/common/findnearmv.c @@ -21,19 +21,20 @@ const unsigned char vp8_mbsplit_offset[4][16] = { Note that we only consider one 4x4 subblock from each candidate 16x16 macroblock. */ void vp8_find_near_mvs(MACROBLOCKD *xd, const MODE_INFO *here, int_mv *nearest, - int_mv *nearby, int_mv *best_mv, int cnt[4], + int_mv *nearby, int_mv *best_mv, int near_mv_ref_cnts[4], int refframe, int *ref_frame_sign_bias) { const MODE_INFO *above = here - xd->mode_info_stride; const MODE_INFO *left = here - 1; const MODE_INFO *aboveleft = above - 1; int_mv near_mvs[4]; int_mv *mv = near_mvs; - int *cntx = cnt; + int *cntx = near_mv_ref_cnts; enum { CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; /* Zero accumulators */ mv[0].as_int = mv[1].as_int = mv[2].as_int = 0; - cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0; + near_mv_ref_cnts[0] = near_mv_ref_cnts[1] = near_mv_ref_cnts[2] = + near_mv_ref_cnts[3] = 0; /* Process above */ if (above->mbmi.ref_frame != INTRA_FRAME) { @@ -63,7 +64,7 @@ void vp8_find_near_mvs(MACROBLOCKD *xd, const MODE_INFO *here, int_mv *nearest, *cntx += 2; } else { - cnt[CNT_INTRA] += 2; + near_mv_ref_cnts[CNT_INTRA] += 2; } } @@ -83,33 +84,34 @@ void vp8_find_near_mvs(MACROBLOCKD *xd, const MODE_INFO *here, int_mv *nearest, *cntx += 1; } else { - cnt[CNT_INTRA] += 1; + near_mv_ref_cnts[CNT_INTRA] += 1; } } /* If we have three distinct MV's ... */ - if (cnt[CNT_SPLITMV]) { + if (near_mv_ref_cnts[CNT_SPLITMV]) { /* See if above-left MV can be merged with NEAREST */ - if (mv->as_int == near_mvs[CNT_NEAREST].as_int) cnt[CNT_NEAREST] += 1; + if (mv->as_int == near_mvs[CNT_NEAREST].as_int) + near_mv_ref_cnts[CNT_NEAREST] += 1; } - cnt[CNT_SPLITMV] = + near_mv_ref_cnts[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV) + (left->mbmi.mode == SPLITMV)) * 2 + (aboveleft->mbmi.mode == SPLITMV); /* Swap near and nearest if necessary */ - if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { + if (near_mv_ref_cnts[CNT_NEAR] > near_mv_ref_cnts[CNT_NEAREST]) { int tmp; - tmp = cnt[CNT_NEAREST]; - cnt[CNT_NEAREST] = cnt[CNT_NEAR]; - cnt[CNT_NEAR] = tmp; + tmp = near_mv_ref_cnts[CNT_NEAREST]; + near_mv_ref_cnts[CNT_NEAREST] = near_mv_ref_cnts[CNT_NEAR]; + near_mv_ref_cnts[CNT_NEAR] = tmp; tmp = near_mvs[CNT_NEAREST].as_int; near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int; near_mvs[CNT_NEAR].as_int = tmp; } /* Use near_mvs[0] to store the "best" MV */ - if (cnt[CNT_NEAREST] >= cnt[CNT_INTRA]) { + if (near_mv_ref_cnts[CNT_NEAREST] >= near_mv_ref_cnts[CNT_INTRA]) { near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST]; } diff --git a/media/libvpx/libvpx/vp8/common/findnearmv.h b/media/libvpx/libvpx/vp8/common/findnearmv.h index c1eaa2698009..d7db9544aa3a 100644 --- a/media/libvpx/libvpx/vp8/common/findnearmv.h +++ b/media/libvpx/libvpx/vp8/common/findnearmv.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_FINDNEARMV_H_ -#define VP8_COMMON_FINDNEARMV_H_ +#ifndef VPX_VP8_COMMON_FINDNEARMV_H_ +#define VPX_VP8_COMMON_FINDNEARMV_H_ #include "./vpx_config.h" #include "mv.h" @@ -70,7 +70,7 @@ static INLINE unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge, } void vp8_find_near_mvs(MACROBLOCKD *xd, const MODE_INFO *here, int_mv *nearest, - int_mv *nearby, int_mv *best, int near_mv_ref_cts[4], + int_mv *nearby, int_mv *best_mv, int near_mv_ref_cnts[4], int refframe, int *ref_frame_sign_bias); int vp8_find_near_mvs_bias(MACROBLOCKD *xd, const MODE_INFO *here, @@ -148,4 +148,4 @@ static INLINE B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, } // extern "C" #endif -#endif // VP8_COMMON_FINDNEARMV_H_ +#endif // VPX_VP8_COMMON_FINDNEARMV_H_ diff --git a/media/libvpx/libvpx/vp8/common/generic/systemdependent.c b/media/libvpx/libvpx/vp8/common/generic/systemdependent.c index 0432ee988744..75ce7ef35964 100644 --- a/media/libvpx/libvpx/vp8/common/generic/systemdependent.c +++ b/media/libvpx/libvpx/vp8/common/generic/systemdependent.c @@ -10,11 +10,11 @@ #include "vpx_config.h" #include "vp8_rtcd.h" -#if ARCH_ARM +#if VPX_ARCH_ARM #include "vpx_ports/arm.h" -#elif ARCH_X86 || ARCH_X86_64 +#elif VPX_ARCH_X86 || VPX_ARCH_X86_64 #include "vpx_ports/x86.h" -#elif ARCH_PPC +#elif VPX_ARCH_PPC #include "vpx_ports/ppc.h" #endif #include "vp8/common/onyxc_int.h" @@ -88,15 +88,16 @@ static int get_cpu_count() { void vp8_machine_specific_config(VP8_COMMON *ctx) { #if CONFIG_MULTITHREAD ctx->processor_core_count = get_cpu_count(); -#else - (void)ctx; #endif /* CONFIG_MULTITHREAD */ -#if ARCH_ARM +#if VPX_ARCH_ARM ctx->cpu_caps = arm_cpu_caps(); -#elif ARCH_X86 || ARCH_X86_64 +#elif VPX_ARCH_X86 || VPX_ARCH_X86_64 ctx->cpu_caps = x86_simd_caps(); -#elif ARCH_PPC +#elif VPX_ARCH_PPC ctx->cpu_caps = ppc_simd_caps(); +#else + // generic-gnu targets. + ctx->cpu_caps = 0; #endif } diff --git a/media/libvpx/libvpx/vp8/common/header.h b/media/libvpx/libvpx/vp8/common/header.h index 1df01fc6fa53..e64e24190852 100644 --- a/media/libvpx/libvpx/vp8/common/header.h +++ b/media/libvpx/libvpx/vp8/common/header.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_HEADER_H_ -#define VP8_COMMON_HEADER_H_ +#ifndef VPX_VP8_COMMON_HEADER_H_ +#define VPX_VP8_COMMON_HEADER_H_ #ifdef __cplusplus extern "C" { @@ -45,4 +45,4 @@ typedef struct { } // extern "C" #endif -#endif // VP8_COMMON_HEADER_H_ +#endif // VPX_VP8_COMMON_HEADER_H_ diff --git a/media/libvpx/libvpx/vp8/common/idct_blk.c b/media/libvpx/libvpx/vp8/common/idct_blk.c index ff9f3eb7f21e..ebe1774f56e1 100644 --- a/media/libvpx/libvpx/vp8/common/idct_blk.c +++ b/media/libvpx/libvpx/vp8/common/idct_blk.c @@ -12,12 +12,6 @@ #include "vp8_rtcd.h" #include "vpx_mem/vpx_mem.h" -void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *dest, - int stride); -void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred, - int pred_stride, unsigned char *dst_ptr, - int dst_stride); - void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs) { int i, j; @@ -39,40 +33,40 @@ void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, } } -void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dstu, - unsigned char *dstv, int stride, +void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, + unsigned char *dst_v, int stride, char *eobs) { int i, j; for (i = 0; i < 2; ++i) { for (j = 0; j < 2; ++j) { if (*eobs++ > 1) { - vp8_dequant_idct_add_c(q, dq, dstu, stride); + vp8_dequant_idct_add_c(q, dq, dst_u, stride); } else { - vp8_dc_only_idct_add_c(q[0] * dq[0], dstu, stride, dstu, stride); + vp8_dc_only_idct_add_c(q[0] * dq[0], dst_u, stride, dst_u, stride); memset(q, 0, 2 * sizeof(q[0])); } q += 16; - dstu += 4; + dst_u += 4; } - dstu += 4 * stride - 8; + dst_u += 4 * stride - 8; } for (i = 0; i < 2; ++i) { for (j = 0; j < 2; ++j) { if (*eobs++ > 1) { - vp8_dequant_idct_add_c(q, dq, dstv, stride); + vp8_dequant_idct_add_c(q, dq, dst_v, stride); } else { - vp8_dc_only_idct_add_c(q[0] * dq[0], dstv, stride, dstv, stride); + vp8_dc_only_idct_add_c(q[0] * dq[0], dst_v, stride, dst_v, stride); memset(q, 0, 2 * sizeof(q[0])); } q += 16; - dstv += 4; + dst_v += 4; } - dstv += 4 * stride - 8; + dst_v += 4 * stride - 8; } } diff --git a/media/libvpx/libvpx/vp8/common/invtrans.h b/media/libvpx/libvpx/vp8/common/invtrans.h index c7af32fb675e..aed7bb0600b2 100644 --- a/media/libvpx/libvpx/vp8/common/invtrans.h +++ b/media/libvpx/libvpx/vp8/common/invtrans.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_INVTRANS_H_ -#define VP8_COMMON_INVTRANS_H_ +#ifndef VPX_VP8_COMMON_INVTRANS_H_ +#define VPX_VP8_COMMON_INVTRANS_H_ #include "./vpx_config.h" #include "vp8_rtcd.h" @@ -54,4 +54,4 @@ static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd) { } // extern "C" #endif -#endif // VP8_COMMON_INVTRANS_H_ +#endif // VPX_VP8_COMMON_INVTRANS_H_ diff --git a/media/libvpx/libvpx/vp8/common/loopfilter.h b/media/libvpx/libvpx/vp8/common/loopfilter.h index 7484563e06a1..909e8df512df 100644 --- a/media/libvpx/libvpx/vp8/common/loopfilter.h +++ b/media/libvpx/libvpx/vp8/common/loopfilter.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_LOOPFILTER_H_ -#define VP8_COMMON_LOOPFILTER_H_ +#ifndef VPX_VP8_COMMON_LOOPFILTER_H_ +#define VPX_VP8_COMMON_LOOPFILTER_H_ #include "vpx_ports/mem.h" #include "vpx_config.h" @@ -26,7 +26,7 @@ extern "C" { typedef enum { NORMAL_LOOPFILTER = 0, SIMPLE_LOOPFILTER = 1 } LOOPFILTERTYPE; -#if ARCH_ARM +#if VPX_ARCH_ARM #define SIMD_WIDTH 1 #else #define SIMD_WIDTH 16 @@ -93,11 +93,9 @@ void vp8_loop_filter_row_normal(struct VP8Common *cm, void vp8_loop_filter_row_simple(struct VP8Common *cm, struct modeinfo *mode_info_context, int mb_row, - int post_ystride, int post_uvstride, - unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr); + int post_ystride, unsigned char *y_ptr); #ifdef __cplusplus } // extern "C" #endif -#endif // VP8_COMMON_LOOPFILTER_H_ +#endif // VPX_VP8_COMMON_LOOPFILTER_H_ diff --git a/media/libvpx/libvpx/vp8/common/loopfilter_filters.c b/media/libvpx/libvpx/vp8/common/loopfilter_filters.c index 188e290ca7ff..61a55d3c92f8 100644 --- a/media/libvpx/libvpx/vp8/common/loopfilter_filters.c +++ b/media/libvpx/libvpx/vp8/common/loopfilter_filters.c @@ -270,28 +270,32 @@ static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, *op0 = u ^ 0x80; } -void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *s, int p, +void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y_ptr, + int y_stride, const unsigned char *blimit) { signed char mask = 0; int i = 0; do { - mask = vp8_simple_filter_mask(blimit[0], s[-2 * p], s[-1 * p], s[0 * p], - s[1 * p]); - vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p); - ++s; + mask = vp8_simple_filter_mask(blimit[0], y_ptr[-2 * y_stride], + y_ptr[-1 * y_stride], y_ptr[0 * y_stride], + y_ptr[1 * y_stride]); + vp8_simple_filter(mask, y_ptr - 2 * y_stride, y_ptr - 1 * y_stride, y_ptr, + y_ptr + 1 * y_stride); + ++y_ptr; } while (++i < 16); } -void vp8_loop_filter_simple_vertical_edge_c(unsigned char *s, int p, +void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { signed char mask = 0; int i = 0; do { - mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]); - vp8_simple_filter(mask, s - 2, s - 1, s, s + 1); - s += p; + mask = vp8_simple_filter_mask(blimit[0], y_ptr[-2], y_ptr[-1], y_ptr[0], + y_ptr[1]); + vp8_simple_filter(mask, y_ptr - 2, y_ptr - 1, y_ptr, y_ptr + 1); + y_ptr += y_stride; } while (++i < 16); } diff --git a/media/libvpx/libvpx/vp8/common/mfqe.c b/media/libvpx/libvpx/vp8/common/mfqe.c index b6f8146b8447..1fe7363f177e 100644 --- a/media/libvpx/libvpx/vp8/common/mfqe.c +++ b/media/libvpx/libvpx/vp8/common/mfqe.c @@ -18,6 +18,7 @@ #include "./vp8_rtcd.h" #include "./vpx_dsp_rtcd.h" +#include "vp8/common/common.h" #include "vp8/common/postproc.h" #include "vpx_dsp/variance.h" #include "vpx_mem/vpx_mem.h" @@ -211,6 +212,7 @@ static int qualify_inter_mb(const MODE_INFO *mode_info_context, int *map) { { 0, 1, 4, 5 }, { 2, 3, 6, 7 }, { 8, 9, 12, 13 }, { 10, 11, 14, 15 } }; int i, j; + vp8_zero(*map); for (i = 0; i < 4; ++i) { map[i] = 1; for (j = 0; j < 4 && map[j]; ++j) { @@ -233,7 +235,7 @@ void vp8_multiframe_quality_enhance(VP8_COMMON *cm) { FRAME_TYPE frame_type = cm->frame_type; /* Point at base of Mb MODE_INFO list has motion vectors etc */ - const MODE_INFO *mode_info_context = cm->show_frame_mi; + const MODE_INFO *mode_info_context = cm->mi; int mb_row; int mb_col; int totmap, map[4]; diff --git a/media/libvpx/libvpx/vp8/common/mips/dspr2/idct_blk_dspr2.c b/media/libvpx/libvpx/vp8/common/mips/dspr2/idct_blk_dspr2.c index 899dc10ad96e..eae852d59284 100644 --- a/media/libvpx/libvpx/vp8/common/mips/dspr2/idct_blk_dspr2.c +++ b/media/libvpx/libvpx/vp8/common/mips/dspr2/idct_blk_dspr2.c @@ -35,41 +35,41 @@ void vp8_dequant_idct_add_y_block_dspr2(short *q, short *dq, unsigned char *dst, } void vp8_dequant_idct_add_uv_block_dspr2(short *q, short *dq, - unsigned char *dstu, - unsigned char *dstv, int stride, + unsigned char *dst_u, + unsigned char *dst_v, int stride, char *eobs) { int i, j; for (i = 0; i < 2; ++i) { for (j = 0; j < 2; ++j) { if (*eobs++ > 1) - vp8_dequant_idct_add_dspr2(q, dq, dstu, stride); + vp8_dequant_idct_add_dspr2(q, dq, dst_u, stride); else { - vp8_dc_only_idct_add_dspr2(q[0] * dq[0], dstu, stride, dstu, stride); + vp8_dc_only_idct_add_dspr2(q[0] * dq[0], dst_u, stride, dst_u, stride); ((int *)q)[0] = 0; } q += 16; - dstu += 4; + dst_u += 4; } - dstu += 4 * stride - 8; + dst_u += 4 * stride - 8; } for (i = 0; i < 2; ++i) { for (j = 0; j < 2; ++j) { if (*eobs++ > 1) - vp8_dequant_idct_add_dspr2(q, dq, dstv, stride); + vp8_dequant_idct_add_dspr2(q, dq, dst_v, stride); else { - vp8_dc_only_idct_add_dspr2(q[0] * dq[0], dstv, stride, dstv, stride); + vp8_dc_only_idct_add_dspr2(q[0] * dq[0], dst_v, stride, dst_v, stride); ((int *)q)[0] = 0; } q += 16; - dstv += 4; + dst_v += 4; } - dstv += 4 * stride - 8; + dst_v += 4 * stride - 8; } } diff --git a/media/libvpx/libvpx/vp8/common/mips/dspr2/vp8_loopfilter_filters_dspr2.c b/media/libvpx/libvpx/vp8/common/mips/dspr2/vp8_loopfilter_filters_dspr2.c index d2c34425156a..21446fb4132a 100644 --- a/media/libvpx/libvpx/vp8/common/mips/dspr2/vp8_loopfilter_filters_dspr2.c +++ b/media/libvpx/libvpx/vp8/common/mips/dspr2/vp8_loopfilter_filters_dspr2.c @@ -934,8 +934,8 @@ void vp8_loop_filter_uvvertical_edge_mips(unsigned char *s, int p, s4 = s3 + p; /* load quad-byte vectors - * memory is 4 byte aligned - */ + * memory is 4 byte aligned + */ p2 = *((uint32_t *)(s1 - 4)); p6 = *((uint32_t *)(s1)); p1 = *((uint32_t *)(s2 - 4)); @@ -990,8 +990,8 @@ void vp8_loop_filter_uvvertical_edge_mips(unsigned char *s, int p, :); /* if (p1 - p4 == 0) and (p2 - p3 == 0) - * mask will be zero and filtering is not needed - */ + * mask will be zero and filtering is not needed + */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); @@ -2102,8 +2102,8 @@ void vp8_mbloop_filter_uvvertical_edge_mips(unsigned char *s, int p, s4 = s3 + p; /* load quad-byte vectors - * memory is 4 byte aligned - */ + * memory is 4 byte aligned + */ p2 = *((uint32_t *)(s1 - 4)); p6 = *((uint32_t *)(s1)); p1 = *((uint32_t *)(s2 - 4)); diff --git a/media/libvpx/libvpx/vp8/common/mips/mmi/idct_blk_mmi.c b/media/libvpx/libvpx/vp8/common/mips/mmi/idct_blk_mmi.c index f6020ab46882..4fd6854c528a 100644 --- a/media/libvpx/libvpx/vp8/common/mips/mmi/idct_blk_mmi.c +++ b/media/libvpx/libvpx/vp8/common/mips/mmi/idct_blk_mmi.c @@ -12,7 +12,7 @@ #include "vpx_mem/vpx_mem.h" void vp8_dequant_idct_add_y_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst, - int stride, int8_t *eobs) { + int stride, char *eobs) { int i, j; for (i = 0; i < 4; i++) { @@ -32,40 +32,39 @@ void vp8_dequant_idct_add_y_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst, } } -void vp8_dequant_idct_add_uv_block_mmi(int16_t *q, int16_t *dq, uint8_t *dstu, - uint8_t *dstv, int stride, - int8_t *eobs) { +void vp8_dequant_idct_add_uv_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst_u, + uint8_t *dst_v, int stride, char *eobs) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { if (*eobs++ > 1) { - vp8_dequant_idct_add_mmi(q, dq, dstu, stride); + vp8_dequant_idct_add_mmi(q, dq, dst_u, stride); } else { - vp8_dc_only_idct_add_mmi(q[0] * dq[0], dstu, stride, dstu, stride); + vp8_dc_only_idct_add_mmi(q[0] * dq[0], dst_u, stride, dst_u, stride); memset(q, 0, 2 * sizeof(q[0])); } q += 16; - dstu += 4; + dst_u += 4; } - dstu += 4 * stride - 8; + dst_u += 4 * stride - 8; } for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { if (*eobs++ > 1) { - vp8_dequant_idct_add_mmi(q, dq, dstv, stride); + vp8_dequant_idct_add_mmi(q, dq, dst_v, stride); } else { - vp8_dc_only_idct_add_mmi(q[0] * dq[0], dstv, stride, dstv, stride); + vp8_dc_only_idct_add_mmi(q[0] * dq[0], dst_v, stride, dst_v, stride); memset(q, 0, 2 * sizeof(q[0])); } q += 16; - dstv += 4; + dst_v += 4; } - dstv += 4 * stride - 8; + dst_v += 4 * stride - 8; } } diff --git a/media/libvpx/libvpx/vp8/common/mips/msa/idct_msa.c b/media/libvpx/libvpx/vp8/common/mips/msa/idct_msa.c index 3d516d0f81ac..efad0c29f8a9 100644 --- a/media/libvpx/libvpx/vp8/common/mips/msa/idct_msa.c +++ b/media/libvpx/libvpx/vp8/common/mips/msa/idct_msa.c @@ -134,7 +134,7 @@ static void idct4x4_addconst_msa(int16_t in_dc, uint8_t *pred, ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dest, dest_stride); } -void vp8_short_inv_walsh4x4_msa(int16_t *input, int16_t *mb_dq_coeff) { +void vp8_short_inv_walsh4x4_msa(int16_t *input, int16_t *mb_dqcoeff) { v8i16 input0, input1, tmp0, tmp1, tmp2, tmp3, out0, out1; const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 }; const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 }; @@ -157,22 +157,22 @@ void vp8_short_inv_walsh4x4_msa(int16_t *input, int16_t *mb_dq_coeff) { ADD2(tmp0, 3, tmp1, 3, out0, out1); out0 >>= 3; out1 >>= 3; - mb_dq_coeff[0] = __msa_copy_s_h(out0, 0); - mb_dq_coeff[16] = __msa_copy_s_h(out0, 4); - mb_dq_coeff[32] = __msa_copy_s_h(out1, 0); - mb_dq_coeff[48] = __msa_copy_s_h(out1, 4); - mb_dq_coeff[64] = __msa_copy_s_h(out0, 1); - mb_dq_coeff[80] = __msa_copy_s_h(out0, 5); - mb_dq_coeff[96] = __msa_copy_s_h(out1, 1); - mb_dq_coeff[112] = __msa_copy_s_h(out1, 5); - mb_dq_coeff[128] = __msa_copy_s_h(out0, 2); - mb_dq_coeff[144] = __msa_copy_s_h(out0, 6); - mb_dq_coeff[160] = __msa_copy_s_h(out1, 2); - mb_dq_coeff[176] = __msa_copy_s_h(out1, 6); - mb_dq_coeff[192] = __msa_copy_s_h(out0, 3); - mb_dq_coeff[208] = __msa_copy_s_h(out0, 7); - mb_dq_coeff[224] = __msa_copy_s_h(out1, 3); - mb_dq_coeff[240] = __msa_copy_s_h(out1, 7); + mb_dqcoeff[0] = __msa_copy_s_h(out0, 0); + mb_dqcoeff[16] = __msa_copy_s_h(out0, 4); + mb_dqcoeff[32] = __msa_copy_s_h(out1, 0); + mb_dqcoeff[48] = __msa_copy_s_h(out1, 4); + mb_dqcoeff[64] = __msa_copy_s_h(out0, 1); + mb_dqcoeff[80] = __msa_copy_s_h(out0, 5); + mb_dqcoeff[96] = __msa_copy_s_h(out1, 1); + mb_dqcoeff[112] = __msa_copy_s_h(out1, 5); + mb_dqcoeff[128] = __msa_copy_s_h(out0, 2); + mb_dqcoeff[144] = __msa_copy_s_h(out0, 6); + mb_dqcoeff[160] = __msa_copy_s_h(out1, 2); + mb_dqcoeff[176] = __msa_copy_s_h(out1, 6); + mb_dqcoeff[192] = __msa_copy_s_h(out0, 3); + mb_dqcoeff[208] = __msa_copy_s_h(out0, 7); + mb_dqcoeff[224] = __msa_copy_s_h(out1, 3); + mb_dqcoeff[240] = __msa_copy_s_h(out1, 7); } static void dequant_idct4x4_addblk_msa(int16_t *input, int16_t *dequant_input, @@ -359,27 +359,27 @@ void vp8_dequant_idct_add_y_block_msa(int16_t *q, int16_t *dq, uint8_t *dst, } } -void vp8_dequant_idct_add_uv_block_msa(int16_t *q, int16_t *dq, uint8_t *dstu, - uint8_t *dstv, int32_t stride, +void vp8_dequant_idct_add_uv_block_msa(int16_t *q, int16_t *dq, uint8_t *dst_u, + uint8_t *dst_v, int32_t stride, char *eobs) { int16_t *eobs_h = (int16_t *)eobs; if (eobs_h[0]) { if (eobs_h[0] & 0xfefe) { - dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride); + dequant_idct4x4_addblk_2x_msa(q, dq, dst_u, stride); } else { - dequant_idct_addconst_2x_msa(q, dq, dstu, stride); + dequant_idct_addconst_2x_msa(q, dq, dst_u, stride); } } q += 32; - dstu += (stride * 4); + dst_u += (stride * 4); if (eobs_h[1]) { if (eobs_h[1] & 0xfefe) { - dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride); + dequant_idct4x4_addblk_2x_msa(q, dq, dst_u, stride); } else { - dequant_idct_addconst_2x_msa(q, dq, dstu, stride); + dequant_idct_addconst_2x_msa(q, dq, dst_u, stride); } } @@ -387,20 +387,20 @@ void vp8_dequant_idct_add_uv_block_msa(int16_t *q, int16_t *dq, uint8_t *dstu, if (eobs_h[2]) { if (eobs_h[2] & 0xfefe) { - dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride); + dequant_idct4x4_addblk_2x_msa(q, dq, dst_v, stride); } else { - dequant_idct_addconst_2x_msa(q, dq, dstv, stride); + dequant_idct_addconst_2x_msa(q, dq, dst_v, stride); } } q += 32; - dstv += (stride * 4); + dst_v += (stride * 4); if (eobs_h[3]) { if (eobs_h[3] & 0xfefe) { - dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride); + dequant_idct4x4_addblk_2x_msa(q, dq, dst_v, stride); } else { - dequant_idct_addconst_2x_msa(q, dq, dstv, stride); + dequant_idct_addconst_2x_msa(q, dq, dst_v, stride); } } } diff --git a/media/libvpx/libvpx/vp8/common/mips/msa/vp8_macros_msa.h b/media/libvpx/libvpx/vp8/common/mips/msa/vp8_macros_msa.h index 6bec3adec392..14f83799ffd6 100644 --- a/media/libvpx/libvpx/vp8/common/mips/msa/vp8_macros_msa.h +++ b/media/libvpx/libvpx/vp8/common/mips/msa/vp8_macros_msa.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_MIPS_MSA_VP8_MACROS_MSA_H_ -#define VP8_COMMON_MIPS_MSA_VP8_MACROS_MSA_H_ +#ifndef VPX_VP8_COMMON_MIPS_MSA_VP8_MACROS_MSA_H_ +#define VPX_VP8_COMMON_MIPS_MSA_VP8_MACROS_MSA_H_ #include @@ -1757,4 +1757,4 @@ \ tmp1_m; \ }) -#endif /* VP8_COMMON_MIPS_MSA_VP8_MACROS_MSA_H_ */ +#endif // VPX_VP8_COMMON_MIPS_MSA_VP8_MACROS_MSA_H_ diff --git a/media/libvpx/libvpx/vp8/common/modecont.c b/media/libvpx/libvpx/vp8/common/modecont.c index d6ad9bb99ae0..bab410374f6e 100644 --- a/media/libvpx/libvpx/vp8/common/modecont.c +++ b/media/libvpx/libvpx/vp8/common/modecont.c @@ -11,28 +11,16 @@ #include "entropy.h" const int vp8_mode_contexts[6][4] = { - { - /* 0 */ - 7, 1, 1, 143, - }, - { - /* 1 */ - 14, 18, 14, 107, - }, - { - /* 2 */ - 135, 64, 57, 68, - }, - { - /* 3 */ - 60, 56, 128, 65, - }, - { - /* 4 */ - 159, 134, 128, 34, - }, - { - /* 5 */ - 234, 188, 128, 28, - }, + { /* 0 */ + 7, 1, 1, 143 }, + { /* 1 */ + 14, 18, 14, 107 }, + { /* 2 */ + 135, 64, 57, 68 }, + { /* 3 */ + 60, 56, 128, 65 }, + { /* 4 */ + 159, 134, 128, 34 }, + { /* 5 */ + 234, 188, 128, 28 }, }; diff --git a/media/libvpx/libvpx/vp8/common/modecont.h b/media/libvpx/libvpx/vp8/common/modecont.h index b58c7dc2d3da..031f74f2ff67 100644 --- a/media/libvpx/libvpx/vp8/common/modecont.h +++ b/media/libvpx/libvpx/vp8/common/modecont.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_MODECONT_H_ -#define VP8_COMMON_MODECONT_H_ +#ifndef VPX_VP8_COMMON_MODECONT_H_ +#define VPX_VP8_COMMON_MODECONT_H_ #ifdef __cplusplus extern "C" { @@ -21,4 +21,4 @@ extern const int vp8_mode_contexts[6][4]; } // extern "C" #endif -#endif // VP8_COMMON_MODECONT_H_ +#endif // VPX_VP8_COMMON_MODECONT_H_ diff --git a/media/libvpx/libvpx/vp8/common/mv.h b/media/libvpx/libvpx/vp8/common/mv.h index b6d2147af872..4cde12f201aa 100644 --- a/media/libvpx/libvpx/vp8/common/mv.h +++ b/media/libvpx/libvpx/vp8/common/mv.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_MV_H_ -#define VP8_COMMON_MV_H_ +#ifndef VPX_VP8_COMMON_MV_H_ +#define VPX_VP8_COMMON_MV_H_ #include "vpx/vpx_integer.h" #ifdef __cplusplus @@ -30,4 +30,4 @@ typedef union int_mv { } // extern "C" #endif -#endif // VP8_COMMON_MV_H_ +#endif // VPX_VP8_COMMON_MV_H_ diff --git a/media/libvpx/libvpx/vp8/common/onyx.h b/media/libvpx/libvpx/vp8/common/onyx.h index 72fba2ec56bf..05c72df3faa4 100644 --- a/media/libvpx/libvpx/vp8/common/onyx.h +++ b/media/libvpx/libvpx/vp8/common/onyx.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_ONYX_H_ -#define VP8_COMMON_ONYX_H_ +#ifndef VPX_VP8_COMMON_ONYX_H_ +#define VPX_VP8_COMMON_ONYX_H_ #ifdef __cplusplus extern "C" { @@ -247,38 +247,38 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf); void vp8_remove_compressor(struct VP8_COMP **comp); void vp8_init_config(struct VP8_COMP *onyx, VP8_CONFIG *oxcf); -void vp8_change_config(struct VP8_COMP *onyx, VP8_CONFIG *oxcf); +void vp8_change_config(struct VP8_COMP *cpi, VP8_CONFIG *oxcf); -int vp8_receive_raw_frame(struct VP8_COMP *comp, unsigned int frame_flags, +int vp8_receive_raw_frame(struct VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, - int64_t end_time_stamp); -int vp8_get_compressed_data(struct VP8_COMP *comp, unsigned int *frame_flags, + int64_t end_time); +int vp8_get_compressed_data(struct VP8_COMP *cpi, unsigned int *frame_flags, size_t *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush); -int vp8_get_preview_raw_frame(struct VP8_COMP *comp, YV12_BUFFER_CONFIG *dest, +int vp8_get_preview_raw_frame(struct VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags); -int vp8_use_as_reference(struct VP8_COMP *comp, int ref_frame_flags); -int vp8_update_reference(struct VP8_COMP *comp, int ref_frame_flags); -int vp8_get_reference(struct VP8_COMP *comp, +int vp8_use_as_reference(struct VP8_COMP *cpi, int ref_frame_flags); +int vp8_update_reference(struct VP8_COMP *cpi, int ref_frame_flags); +int vp8_get_reference(struct VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); -int vp8_set_reference(struct VP8_COMP *comp, +int vp8_set_reference(struct VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); -int vp8_update_entropy(struct VP8_COMP *comp, int update); -int vp8_set_roimap(struct VP8_COMP *comp, unsigned char *map, unsigned int rows, +int vp8_update_entropy(struct VP8_COMP *cpi, int update); +int vp8_set_roimap(struct VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]); -int vp8_set_active_map(struct VP8_COMP *comp, unsigned char *map, +int vp8_set_active_map(struct VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols); -int vp8_set_internal_size(struct VP8_COMP *comp, VPX_SCALING horiz_mode, +int vp8_set_internal_size(struct VP8_COMP *cpi, VPX_SCALING horiz_mode, VPX_SCALING vert_mode); -int vp8_get_quantizer(struct VP8_COMP *c); +int vp8_get_quantizer(struct VP8_COMP *cpi); #ifdef __cplusplus } #endif -#endif // VP8_COMMON_ONYX_H_ +#endif // VPX_VP8_COMMON_ONYX_H_ diff --git a/media/libvpx/libvpx/vp8/common/onyxc_int.h b/media/libvpx/libvpx/vp8/common/onyxc_int.h index 9a12c7fb67dc..ef8d00762096 100644 --- a/media/libvpx/libvpx/vp8/common/onyxc_int.h +++ b/media/libvpx/libvpx/vp8/common/onyxc_int.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_ONYXC_INT_H_ -#define VP8_COMMON_ONYXC_INT_H_ +#ifndef VPX_VP8_COMMON_ONYXC_INT_H_ +#define VPX_VP8_COMMON_ONYXC_INT_H_ #include "vpx_config.h" #include "vp8_rtcd.h" @@ -174,4 +174,4 @@ typedef struct VP8Common { } // extern "C" #endif -#endif // VP8_COMMON_ONYXC_INT_H_ +#endif // VPX_VP8_COMMON_ONYXC_INT_H_ diff --git a/media/libvpx/libvpx/vp8/common/onyxd.h b/media/libvpx/libvpx/vp8/common/onyxd.h index d3c1b0e972c5..e4e81aaac58d 100644 --- a/media/libvpx/libvpx/vp8/common/onyxd.h +++ b/media/libvpx/libvpx/vp8/common/onyxd.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_ONYXD_H_ -#define VP8_COMMON_ONYXD_H_ +#ifndef VPX_VP8_COMMON_ONYXD_H_ +#define VPX_VP8_COMMON_ONYXD_H_ /* Create/destroy static data structures. */ #ifdef __cplusplus @@ -41,23 +41,22 @@ void vp8dx_set_setting(struct VP8D_COMP *comp, VP8D_SETTING oxst, int x); int vp8dx_get_setting(struct VP8D_COMP *comp, VP8D_SETTING oxst); -int vp8dx_receive_compressed_data(struct VP8D_COMP *comp, size_t size, - const uint8_t *dest, int64_t time_stamp); -int vp8dx_get_raw_frame(struct VP8D_COMP *comp, YV12_BUFFER_CONFIG *sd, +int vp8dx_receive_compressed_data(struct VP8D_COMP *pbi, int64_t time_stamp); +int vp8dx_get_raw_frame(struct VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags); int vp8dx_references_buffer(struct VP8Common *oci, int ref_frame); -vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP *comp, +vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); -vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP *comp, +vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); -int vp8dx_get_quantizer(const struct VP8D_COMP *c); +int vp8dx_get_quantizer(const struct VP8D_COMP *pbi); #ifdef __cplusplus } #endif -#endif // VP8_COMMON_ONYXD_H_ +#endif // VPX_VP8_COMMON_ONYXD_H_ diff --git a/media/libvpx/libvpx/vp8/common/postproc.c b/media/libvpx/libvpx/vp8/common/postproc.c index 8c292d6161dd..c03b16b2f5be 100644 --- a/media/libvpx/libvpx/vp8/common/postproc.c +++ b/media/libvpx/libvpx/vp8/common/postproc.c @@ -60,8 +60,7 @@ static void vp8_de_mblock(YV12_BUFFER_CONFIG *post, int q) { } void vp8_deblock(VP8_COMMON *cm, YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, - int flag) { + YV12_BUFFER_CONFIG *post, int q) { double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; int ppl = (int)(level + .5); @@ -72,8 +71,6 @@ void vp8_deblock(VP8_COMMON *cm, YV12_BUFFER_CONFIG *source, * is a skipped block. */ unsigned char *ylimits = cm->pp_limits_buffer; unsigned char *uvlimits = cm->pp_limits_buffer + 16 * cm->mb_cols; - (void)low_var_thresh; - (void)flag; if (ppl > 0) { for (mbr = 0; mbr < cm->mb_rows; ++mbr) { @@ -116,8 +113,7 @@ void vp8_deblock(VP8_COMMON *cm, YV12_BUFFER_CONFIG *source, } } -void vp8_de_noise(VP8_COMMON *cm, YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, int flag, +void vp8_de_noise(VP8_COMMON *cm, YV12_BUFFER_CONFIG *source, int q, int uvfilter) { int mbr; double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; @@ -125,9 +121,6 @@ void vp8_de_noise(VP8_COMMON *cm, YV12_BUFFER_CONFIG *source, int mb_rows = cm->mb_rows; int mb_cols = cm->mb_cols; unsigned char *limits = cm->pp_limits_buffer; - (void)post; - (void)low_var_thresh; - (void)flag; memset(limits, (unsigned char)ppl, 16 * mb_cols); @@ -151,124 +144,6 @@ void vp8_de_noise(VP8_COMMON *cm, YV12_BUFFER_CONFIG *source, } #endif // CONFIG_POSTPROC -/* Blend the macro block with a solid colored square. Leave the - * edges unblended to give distinction to macro blocks in areas - * filled with the same color block. - */ -void vp8_blend_mb_inner_c(unsigned char *y, unsigned char *u, unsigned char *v, - int y_1, int u_1, int v_1, int alpha, int stride) { - int i, j; - int y1_const = y_1 * ((1 << 16) - alpha); - int u1_const = u_1 * ((1 << 16) - alpha); - int v1_const = v_1 * ((1 << 16) - alpha); - - y += 2 * stride + 2; - for (i = 0; i < 12; ++i) { - for (j = 0; j < 12; ++j) { - y[j] = (y[j] * alpha + y1_const) >> 16; - } - y += stride; - } - - stride >>= 1; - - u += stride + 1; - v += stride + 1; - - for (i = 0; i < 6; ++i) { - for (j = 0; j < 6; ++j) { - u[j] = (u[j] * alpha + u1_const) >> 16; - v[j] = (v[j] * alpha + v1_const) >> 16; - } - u += stride; - v += stride; - } -} - -/* Blend only the edge of the macro block. Leave center - * unblended to allow for other visualizations to be layered. - */ -void vp8_blend_mb_outer_c(unsigned char *y, unsigned char *u, unsigned char *v, - int y_1, int u_1, int v_1, int alpha, int stride) { - int i, j; - int y1_const = y_1 * ((1 << 16) - alpha); - int u1_const = u_1 * ((1 << 16) - alpha); - int v1_const = v_1 * ((1 << 16) - alpha); - - for (i = 0; i < 2; ++i) { - for (j = 0; j < 16; ++j) { - y[j] = (y[j] * alpha + y1_const) >> 16; - } - y += stride; - } - - for (i = 0; i < 12; ++i) { - y[0] = (y[0] * alpha + y1_const) >> 16; - y[1] = (y[1] * alpha + y1_const) >> 16; - y[14] = (y[14] * alpha + y1_const) >> 16; - y[15] = (y[15] * alpha + y1_const) >> 16; - y += stride; - } - - for (i = 0; i < 2; ++i) { - for (j = 0; j < 16; ++j) { - y[j] = (y[j] * alpha + y1_const) >> 16; - } - y += stride; - } - - stride >>= 1; - - for (j = 0; j < 8; ++j) { - u[j] = (u[j] * alpha + u1_const) >> 16; - v[j] = (v[j] * alpha + v1_const) >> 16; - } - u += stride; - v += stride; - - for (i = 0; i < 6; ++i) { - u[0] = (u[0] * alpha + u1_const) >> 16; - v[0] = (v[0] * alpha + v1_const) >> 16; - - u[7] = (u[7] * alpha + u1_const) >> 16; - v[7] = (v[7] * alpha + v1_const) >> 16; - - u += stride; - v += stride; - } - - for (j = 0; j < 8; ++j) { - u[j] = (u[j] * alpha + u1_const) >> 16; - v[j] = (v[j] * alpha + v1_const) >> 16; - } -} - -void vp8_blend_b_c(unsigned char *y, unsigned char *u, unsigned char *v, - int y_1, int u_1, int v_1, int alpha, int stride) { - int i, j; - int y1_const = y_1 * ((1 << 16) - alpha); - int u1_const = u_1 * ((1 << 16) - alpha); - int v1_const = v_1 * ((1 << 16) - alpha); - - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) { - y[j] = (y[j] * alpha + y1_const) >> 16; - } - y += stride; - } - - stride >>= 1; - - for (i = 0; i < 2; ++i) { - for (j = 0; j < 2; ++j) { - u[j] = (u[j] * alpha + u1_const) >> 16; - v[j] = (v[j] * alpha + v1_const) >> 16; - } - u += stride; - v += stride; - } -} - #if CONFIG_POSTPROC int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags) { @@ -325,7 +200,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vpx_clear_system_state(); if ((flags & VP8D_MFQE) && oci->postproc_state.last_frame_valid && - oci->current_video_frame >= 2 && + oci->current_video_frame > 10 && oci->postproc_state.last_base_qindex < 60 && oci->base_qindex - oci->postproc_state.last_base_qindex >= 20) { vp8_multiframe_quality_enhance(oci); @@ -334,11 +209,10 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_yv12_copy_frame(&oci->post_proc_buffer, &oci->post_proc_buffer_int); if (flags & VP8D_DEMACROBLOCK) { vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer, - q + (deblock_level - 5) * 10, 1, 0); + q + (deblock_level - 5) * 10); vp8_de_mblock(&oci->post_proc_buffer, q + (deblock_level - 5) * 10); } else if (flags & VP8D_DEBLOCK) { - vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer, q, - 1, 0); + vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer, q); } } /* Move partially towards the base q of the previous frame */ @@ -346,12 +220,12 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, (3 * oci->postproc_state.last_base_qindex + oci->base_qindex) >> 2; } else if (flags & VP8D_DEMACROBLOCK) { vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer, - q + (deblock_level - 5) * 10, 1, 0); + q + (deblock_level - 5) * 10); vp8_de_mblock(&oci->post_proc_buffer, q + (deblock_level - 5) * 10); oci->postproc_state.last_base_qindex = oci->base_qindex; } else if (flags & VP8D_DEBLOCK) { - vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer, q, 1, 0); + vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer, q); oci->postproc_state.last_base_qindex = oci->base_qindex; } else { vp8_yv12_copy_frame(oci->frame_to_show, &oci->post_proc_buffer); diff --git a/media/libvpx/libvpx/vp8/common/postproc.h b/media/libvpx/libvpx/vp8/common/postproc.h index 7be112b16357..492c52aef656 100644 --- a/media/libvpx/libvpx/vp8/common/postproc.h +++ b/media/libvpx/libvpx/vp8/common/postproc.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_POSTPROC_H_ -#define VP8_COMMON_POSTPROC_H_ +#ifndef VPX_VP8_COMMON_POSTPROC_H_ +#define VPX_VP8_COMMON_POSTPROC_H_ #include "vpx_ports/mem.h" struct postproc_state { @@ -27,14 +27,13 @@ struct postproc_state { extern "C" { #endif int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest, - vp8_ppflags_t *flags); + vp8_ppflags_t *ppflags); -void vp8_de_noise(struct VP8Common *oci, YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, int flag, +void vp8_de_noise(struct VP8Common *cm, YV12_BUFFER_CONFIG *source, int q, int uvfilter); -void vp8_deblock(struct VP8Common *oci, YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, int flag); +void vp8_deblock(struct VP8Common *cm, YV12_BUFFER_CONFIG *source, + YV12_BUFFER_CONFIG *post, int q); #define MFQE_PRECISION 4 @@ -43,4 +42,4 @@ void vp8_multiframe_quality_enhance(struct VP8Common *cm); } // extern "C" #endif -#endif // VP8_COMMON_POSTPROC_H_ +#endif // VPX_VP8_COMMON_POSTPROC_H_ diff --git a/media/libvpx/libvpx/vp8/common/ppflags.h b/media/libvpx/libvpx/vp8/common/ppflags.h index 96e3af6c9c1e..bdf08734b966 100644 --- a/media/libvpx/libvpx/vp8/common/ppflags.h +++ b/media/libvpx/libvpx/vp8/common/ppflags.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_PPFLAGS_H_ -#define VP8_COMMON_PPFLAGS_H_ +#ifndef VPX_VP8_COMMON_PPFLAGS_H_ +#define VPX_VP8_COMMON_PPFLAGS_H_ #ifdef __cplusplus extern "C" { @@ -36,4 +36,4 @@ typedef struct { } // extern "C" #endif -#endif // VP8_COMMON_PPFLAGS_H_ +#endif // VPX_VP8_COMMON_PPFLAGS_H_ diff --git a/media/libvpx/libvpx/vp8/common/quant_common.h b/media/libvpx/libvpx/vp8/common/quant_common.h index ff4203df8701..049840a27257 100644 --- a/media/libvpx/libvpx/vp8/common/quant_common.h +++ b/media/libvpx/libvpx/vp8/common/quant_common.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_QUANT_COMMON_H_ -#define VP8_COMMON_QUANT_COMMON_H_ +#ifndef VPX_VP8_COMMON_QUANT_COMMON_H_ +#define VPX_VP8_COMMON_QUANT_COMMON_H_ #include "string.h" #include "blockd.h" @@ -30,4 +30,4 @@ extern int vp8_ac_uv_quant(int QIndex, int Delta); } // extern "C" #endif -#endif // VP8_COMMON_QUANT_COMMON_H_ +#endif // VPX_VP8_COMMON_QUANT_COMMON_H_ diff --git a/media/libvpx/libvpx/vp8/common/reconinter.c b/media/libvpx/libvpx/vp8/common/reconinter.c index 48892c9b8e5d..2cb0709318b5 100644 --- a/media/libvpx/libvpx/vp8/common/reconinter.c +++ b/media/libvpx/libvpx/vp8/common/reconinter.c @@ -333,6 +333,13 @@ void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, unsigned char *dst_y, _16x16mv.as_mv.row &= x->fullpixel_mask; _16x16mv.as_mv.col &= x->fullpixel_mask; + if (2 * _16x16mv.as_mv.col < (x->mb_to_left_edge - (19 << 3)) || + 2 * _16x16mv.as_mv.col > x->mb_to_right_edge + (18 << 3) || + 2 * _16x16mv.as_mv.row < (x->mb_to_top_edge - (19 << 3)) || + 2 * _16x16mv.as_mv.row > x->mb_to_bottom_edge + (18 << 3)) { + return; + } + pre_stride >>= 1; offset = (_16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3); uptr = x->pre.u_buffer + offset; diff --git a/media/libvpx/libvpx/vp8/common/reconinter.h b/media/libvpx/libvpx/vp8/common/reconinter.h index 4cdd4fee0fd3..974e7ce75478 100644 --- a/media/libvpx/libvpx/vp8/common/reconinter.h +++ b/media/libvpx/libvpx/vp8/common/reconinter.h @@ -8,30 +8,29 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_RECONINTER_H_ -#define VP8_COMMON_RECONINTER_H_ +#ifndef VPX_VP8_COMMON_RECONINTER_H_ +#define VPX_VP8_COMMON_RECONINTER_H_ #ifdef __cplusplus extern "C" { #endif -extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x); -extern void vp8_build_inter16x16_predictors_mb( - MACROBLOCKD *x, unsigned char *dst_y, unsigned char *dst_u, - unsigned char *dst_v, int dst_ystride, int dst_uvstride); +void vp8_build_inter_predictors_mb(MACROBLOCKD *xd); +void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, unsigned char *dst_y, + unsigned char *dst_u, + unsigned char *dst_v, int dst_ystride, + int dst_uvstride); -extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, - unsigned char *dst_y, - int dst_ystride); -extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, - unsigned char *base_pre, - int pre_stride, vp8_subpix_fn_t sppf); +void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, unsigned char *dst_y, + int dst_ystride); +void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, + int pre_stride, vp8_subpix_fn_t sppf); -extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x); -extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x); +void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x); +void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x); #ifdef __cplusplus } // extern "C" #endif -#endif // VP8_COMMON_RECONINTER_H_ +#endif // VPX_VP8_COMMON_RECONINTER_H_ diff --git a/media/libvpx/libvpx/vp8/common/reconintra.h b/media/libvpx/libvpx/vp8/common/reconintra.h index fd7c725f35d5..029ac00a24bf 100644 --- a/media/libvpx/libvpx/vp8/common/reconintra.h +++ b/media/libvpx/libvpx/vp8/common/reconintra.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_RECONINTRA_H_ -#define VP8_COMMON_RECONINTRA_H_ +#ifndef VPX_VP8_COMMON_RECONINTRA_H_ +#define VPX_VP8_COMMON_RECONINTRA_H_ #include "vp8/common/blockd.h" @@ -32,4 +32,4 @@ void vp8_init_intra_predictors(void); } // extern "C" #endif -#endif // VP8_COMMON_RECONINTRA_H_ +#endif // VPX_VP8_COMMON_RECONINTRA_H_ diff --git a/media/libvpx/libvpx/vp8/common/reconintra4x4.h b/media/libvpx/libvpx/vp8/common/reconintra4x4.h index e17fc58c0121..3618ec5cbebe 100644 --- a/media/libvpx/libvpx/vp8/common/reconintra4x4.h +++ b/media/libvpx/libvpx/vp8/common/reconintra4x4.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_RECONINTRA4X4_H_ -#define VP8_COMMON_RECONINTRA4X4_H_ +#ifndef VPX_VP8_COMMON_RECONINTRA4X4_H_ +#define VPX_VP8_COMMON_RECONINTRA4X4_H_ #include "vp8/common/blockd.h" #ifdef __cplusplus @@ -31,7 +31,7 @@ static INLINE void intra_prediction_down_copy(MACROBLOCKD *xd, *dst_ptr2 = *src_ptr; } -void vp8_intra4x4_predict(unsigned char *Above, unsigned char *yleft, +void vp8_intra4x4_predict(unsigned char *above, unsigned char *yleft, int left_stride, B_PREDICTION_MODE b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); @@ -42,4 +42,4 @@ void vp8_init_intra4x4_predictors_internal(void); } // extern "C" #endif -#endif // VP8_COMMON_RECONINTRA4X4_H_ +#endif // VPX_VP8_COMMON_RECONINTRA4X4_H_ diff --git a/media/libvpx/libvpx/vp8/common/rtcd_defs.pl b/media/libvpx/libvpx/vp8/common/rtcd_defs.pl index 3df745f75a84..8452b5e85451 100644 --- a/media/libvpx/libvpx/vp8/common/rtcd_defs.pl +++ b/media/libvpx/libvpx/vp8/common/rtcd_defs.pl @@ -31,10 +31,10 @@ forward_decls qw/vp8_common_forward_decls/; # # Dequant # -add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc"; +add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *DQC"; specialize qw/vp8_dequantize_b mmx neon msa mmi/; -add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride"; +add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *dest, int stride"; specialize qw/vp8_dequant_idct_add mmx neon dspr2 msa mmi/; add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs"; @@ -46,20 +46,20 @@ specialize qw/vp8_dequant_idct_add_uv_block sse2 neon dspr2 msa mmi/; # # Loopfilter # -add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; +add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi"; specialize qw/vp8_loop_filter_mbv sse2 neon dspr2 msa mmi/; -add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; +add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi"; specialize qw/vp8_loop_filter_bv sse2 neon dspr2 msa mmi/; -add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; +add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi"; specialize qw/vp8_loop_filter_mbh sse2 neon dspr2 msa mmi/; -add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; +add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, struct loop_filter_info *lfi"; specialize qw/vp8_loop_filter_bh sse2 neon dspr2 msa mmi/; -add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit"; +add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y_ptr, int y_stride, const unsigned char *blimit"; specialize qw/vp8_loop_filter_simple_mbv sse2 neon msa mmi/; $vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c; $vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2; @@ -67,7 +67,7 @@ $vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon; $vp8_loop_filter_simple_mbv_msa=vp8_loop_filter_simple_vertical_edge_msa; $vp8_loop_filter_simple_mbv_mmi=vp8_loop_filter_simple_vertical_edge_mmi; -add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit"; +add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y_ptr, int y_stride, const unsigned char *blimit"; specialize qw/vp8_loop_filter_simple_mbh sse2 neon msa mmi/; $vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c; $vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2; @@ -75,7 +75,7 @@ $vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon; $vp8_loop_filter_simple_mbh_msa=vp8_loop_filter_simple_horizontal_edge_msa; $vp8_loop_filter_simple_mbh_mmi=vp8_loop_filter_simple_horizontal_edge_mmi; -add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit"; +add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y_ptr, int y_stride, const unsigned char *blimit"; specialize qw/vp8_loop_filter_simple_bv sse2 neon msa mmi/; $vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c; $vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2; @@ -83,7 +83,7 @@ $vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon; $vp8_loop_filter_simple_bv_msa=vp8_loop_filter_bvs_msa; $vp8_loop_filter_simple_bv_mmi=vp8_loop_filter_bvs_mmi; -add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit"; +add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y_ptr, int y_stride, const unsigned char *blimit"; specialize qw/vp8_loop_filter_simple_bh sse2 neon msa mmi/; $vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c; $vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2; @@ -95,31 +95,31 @@ $vp8_loop_filter_simple_bh_mmi=vp8_loop_filter_bhs_mmi; # IDCT # #idct16 -add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride"; +add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride"; specialize qw/vp8_short_idct4x4llm mmx neon dspr2 msa mmi/; #iwalsh1 -add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output"; +add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *mb_dqcoeff"; specialize qw/vp8_short_inv_walsh4x4_1 dspr2/; #iwalsh16 -add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output"; +add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *mb_dqcoeff"; specialize qw/vp8_short_inv_walsh4x4 sse2 neon dspr2 msa mmi/; #idct1_scalar_add -add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride"; +add_proto qw/void vp8_dc_only_idct_add/, "short input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride"; specialize qw/vp8_dc_only_idct_add mmx neon dspr2 msa mmi/; # # RECON # -add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; +add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride"; specialize qw/vp8_copy_mem16x16 sse2 neon dspr2 msa mmi/; -add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; +add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride"; specialize qw/vp8_copy_mem8x8 mmx neon dspr2 msa mmi/; -add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; +add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride"; specialize qw/vp8_copy_mem8x4 mmx neon dspr2 msa mmi/; # @@ -127,11 +127,11 @@ specialize qw/vp8_copy_mem8x4 mmx neon dspr2 msa mmi/; # if (vpx_config("CONFIG_POSTPROC") eq "yes") { - add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; + add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y_1, int u_1, int v_1, int alpha, int stride"; - add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; + add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y_1, int u_1, int v_1, int alpha, int stride"; - add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; + add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y_1, int u_1, int v_1, int alpha, int stride"; add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"; specialize qw/vp8_filter_by_weight16x16 sse2 msa/; @@ -145,29 +145,29 @@ if (vpx_config("CONFIG_POSTPROC") eq "yes") { # # Subpixel # -add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"; specialize qw/vp8_sixtap_predict16x16 sse2 ssse3 neon dspr2 msa mmi/; -add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"; specialize qw/vp8_sixtap_predict8x8 sse2 ssse3 neon dspr2 msa mmi/; -add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"; specialize qw/vp8_sixtap_predict8x4 sse2 ssse3 neon dspr2 msa mmi/; -add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"; specialize qw/vp8_sixtap_predict4x4 mmx ssse3 neon dspr2 msa mmi/; -add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"; specialize qw/vp8_bilinear_predict16x16 sse2 ssse3 neon msa/; -add_proto qw/void vp8_bilinear_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +add_proto qw/void vp8_bilinear_predict8x8/, "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"; specialize qw/vp8_bilinear_predict8x8 sse2 ssse3 neon msa/; -add_proto qw/void vp8_bilinear_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; -specialize qw/vp8_bilinear_predict8x4 mmx neon msa/; +add_proto qw/void vp8_bilinear_predict8x4/, "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"; +specialize qw/vp8_bilinear_predict8x4 sse2 neon msa/; -add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; -specialize qw/vp8_bilinear_predict4x4 mmx neon msa/; +add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch"; +specialize qw/vp8_bilinear_predict4x4 sse2 neon msa/; # # Encoder functions below this point. @@ -177,10 +177,8 @@ if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") { # # Block copy # -if ($opts{arch} =~ /x86/) { - add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n"; - specialize qw/vp8_copy32xn sse2 sse3/; -} +add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int src_stride, unsigned char *dst_ptr, int dst_stride, int height"; +specialize qw/vp8_copy32xn sse2 sse3/; # # Forward DCT @@ -223,7 +221,7 @@ specialize qw/vp8_full_search_sad sse3 sse4_1/; $vp8_full_search_sad_sse3=vp8_full_search_sadx3; $vp8_full_search_sad_sse4_1=vp8_full_search_sadx8; -add_proto qw/int vp8_refining_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"; +add_proto qw/int vp8_refining_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int error_per_bit, int search_range, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"; specialize qw/vp8_refining_search_sad sse2 msa/; $vp8_refining_search_sad_sse2=vp8_refining_search_sadx4; $vp8_refining_search_sad_msa=vp8_refining_search_sadx4; diff --git a/media/libvpx/libvpx/vp8/common/setupintrarecon.h b/media/libvpx/libvpx/vp8/common/setupintrarecon.h index f3ffa16607a3..903a536aed8d 100644 --- a/media/libvpx/libvpx/vp8/common/setupintrarecon.h +++ b/media/libvpx/libvpx/vp8/common/setupintrarecon.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_SETUPINTRARECON_H_ -#define VP8_COMMON_SETUPINTRARECON_H_ +#ifndef VPX_VP8_COMMON_SETUPINTRARECON_H_ +#define VPX_VP8_COMMON_SETUPINTRARECON_H_ #include "./vpx_config.h" #include "vpx_scale/yv12config.h" @@ -37,4 +37,4 @@ static INLINE void setup_intra_recon_left(unsigned char *y_buffer, } // extern "C" #endif -#endif // VP8_COMMON_SETUPINTRARECON_H_ +#endif // VPX_VP8_COMMON_SETUPINTRARECON_H_ diff --git a/media/libvpx/libvpx/vp8/common/swapyv12buffer.h b/media/libvpx/libvpx/vp8/common/swapyv12buffer.h index 0ee9a52ceb01..e37c471f632d 100644 --- a/media/libvpx/libvpx/vp8/common/swapyv12buffer.h +++ b/media/libvpx/libvpx/vp8/common/swapyv12buffer.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_SWAPYV12BUFFER_H_ -#define VP8_COMMON_SWAPYV12BUFFER_H_ +#ifndef VPX_VP8_COMMON_SWAPYV12BUFFER_H_ +#define VPX_VP8_COMMON_SWAPYV12BUFFER_H_ #include "vpx_scale/yv12config.h" @@ -24,4 +24,4 @@ void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, } // extern "C" #endif -#endif // VP8_COMMON_SWAPYV12BUFFER_H_ +#endif // VPX_VP8_COMMON_SWAPYV12BUFFER_H_ diff --git a/media/libvpx/libvpx/vp8/common/systemdependent.h b/media/libvpx/libvpx/vp8/common/systemdependent.h index 3d44e37cf249..83a5513aaef6 100644 --- a/media/libvpx/libvpx/vp8/common/systemdependent.h +++ b/media/libvpx/libvpx/vp8/common/systemdependent.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_SYSTEMDEPENDENT_H_ -#define VP8_COMMON_SYSTEMDEPENDENT_H_ +#ifndef VPX_VP8_COMMON_SYSTEMDEPENDENT_H_ +#define VPX_VP8_COMMON_SYSTEMDEPENDENT_H_ #include "vpx_config.h" @@ -24,4 +24,4 @@ void vp8_machine_specific_config(struct VP8Common *); } // extern "C" #endif -#endif // VP8_COMMON_SYSTEMDEPENDENT_H_ +#endif // VPX_VP8_COMMON_SYSTEMDEPENDENT_H_ diff --git a/media/libvpx/libvpx/vp8/common/threading.h b/media/libvpx/libvpx/vp8/common/threading.h index b082bf109e41..f9213693864e 100644 --- a/media/libvpx/libvpx/vp8/common/threading.h +++ b/media/libvpx/libvpx/vp8/common/threading.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_THREADING_H_ -#define VP8_COMMON_THREADING_H_ +#ifndef VPX_VP8_COMMON_THREADING_H_ +#define VPX_VP8_COMMON_THREADING_H_ #include "./vpx_config.h" @@ -185,7 +185,7 @@ static inline int sem_destroy(sem_t *sem) { #endif -#if ARCH_X86 || ARCH_X86_64 +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 #include "vpx_ports/x86.h" #else #define x86_pause_hint() @@ -209,4 +209,4 @@ static INLINE void vp8_atomic_spin_wait( } // extern "C" #endif -#endif // VP8_COMMON_THREADING_H_ +#endif // VPX_VP8_COMMON_THREADING_H_ diff --git a/media/libvpx/libvpx/vp8/common/treecoder.c b/media/libvpx/libvpx/vp8/common/treecoder.c index 9feb40a5a7c2..f1e78f43210d 100644 --- a/media/libvpx/libvpx/vp8/common/treecoder.c +++ b/media/libvpx/libvpx/vp8/common/treecoder.c @@ -12,6 +12,7 @@ #include #include "vp8/common/treecoder.h" +#include "vpx/vpx_integer.h" static void tree2tok(struct vp8_token_struct *const p, vp8_tree t, int i, int v, int L) { @@ -79,7 +80,7 @@ void vp8_tree_probs_from_distribution(int n, /* n = size of alphabet */ vp8_prob probs[/* n-1 */], unsigned int branch_ct[/* n-1 */][2], const unsigned int num_events[/* n */], - unsigned int Pfac, int rd) { + unsigned int Pfactor, int Round) { const int tree_len = n - 1; int t = 0; @@ -89,10 +90,10 @@ void vp8_tree_probs_from_distribution(int n, /* n = size of alphabet */ const unsigned int *const c = branch_ct[t]; const unsigned int tot = c[0] + c[1]; - assert(tot < (1 << 24)); /* no overflow below */ - if (tot) { - const unsigned int p = ((c[0] * Pfac) + (rd ? tot >> 1 : 0)) / tot; + const unsigned int p = + (unsigned int)(((uint64_t)c[0] * Pfactor) + (Round ? tot >> 1 : 0)) / + tot; probs[t] = p < 256 ? (p ? p : 1) : 255; /* agree w/old version for now */ } else { probs[t] = vp8_prob_half; diff --git a/media/libvpx/libvpx/vp8/common/treecoder.h b/media/libvpx/libvpx/vp8/common/treecoder.h index d8503cf3f8fa..d7d8d0ead00c 100644 --- a/media/libvpx/libvpx/vp8/common/treecoder.h +++ b/media/libvpx/libvpx/vp8/common/treecoder.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_TREECODER_H_ -#define VP8_COMMON_TREECODER_H_ +#ifndef VPX_VP8_COMMON_TREECODER_H_ +#define VPX_VP8_COMMON_TREECODER_H_ #ifdef __cplusplus extern "C" { @@ -32,7 +32,7 @@ typedef const bool_coder_spec c_bool_coder_spec; typedef const bool_writer c_bool_writer; typedef const bool_reader c_bool_reader; -#define vp8_complement(x) (255 - x) +#define vp8_complement(x) (255 - (x)) /* We build coding trees compactly in arrays. Each node of the tree is a pair of vp8_tree_indices. @@ -79,4 +79,4 @@ void vp8bc_tree_probs_from_distribution(int n, /* n = size of alphabet */ } // extern "C" #endif -#endif // VP8_COMMON_TREECODER_H_ +#endif // VPX_VP8_COMMON_TREECODER_H_ diff --git a/media/libvpx/libvpx/vp8/common/vp8_entropymodedata.h b/media/libvpx/libvpx/vp8/common/vp8_entropymodedata.h index 9a81ebfe624d..3fc942e05094 100644 --- a/media/libvpx/libvpx/vp8/common/vp8_entropymodedata.h +++ b/media/libvpx/libvpx/vp8/common/vp8_entropymodedata.h @@ -6,10 +6,10 @@ * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. -*/ + */ -#ifndef VP8_COMMON_VP8_ENTROPYMODEDATA_H_ -#define VP8_COMMON_VP8_ENTROPYMODEDATA_H_ +#ifndef VPX_VP8_COMMON_VP8_ENTROPYMODEDATA_H_ +#define VPX_VP8_COMMON_VP8_ENTROPYMODEDATA_H_ #ifdef __cplusplus extern "C" { @@ -169,4 +169,4 @@ const vp8_prob } // extern "C" #endif -#endif // VP8_COMMON_VP8_ENTROPYMODEDATA_H_ +#endif // VPX_VP8_COMMON_VP8_ENTROPYMODEDATA_H_ diff --git a/media/libvpx/libvpx/vp8/common/vp8_loopfilter.c b/media/libvpx/libvpx/vp8/common/vp8_loopfilter.c index 9fb1250650f0..9c9e5f351bcb 100644 --- a/media/libvpx/libvpx/vp8/common/vp8_loopfilter.c +++ b/media/libvpx/libvpx/vp8/common/vp8_loopfilter.c @@ -219,13 +219,11 @@ void vp8_loop_filter_row_normal(VP8_COMMON *cm, MODE_INFO *mode_info_context, } void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context, - int mb_row, int post_ystride, int post_uvstride, - unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr) { + int mb_row, int post_ystride, + unsigned char *y_ptr) { int mb_col; int filter_level; loop_filter_info_n *lfi_n = &cm->lf_info; - (void)post_uvstride; for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { int skip_lf = (mode_info_context->mbmi.mode != B_PRED && @@ -258,8 +256,6 @@ void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context, } y_ptr += 16; - u_ptr += 8; - v_ptr += 8; mode_info_context++; /* step to next MB */ } diff --git a/media/libvpx/libvpx/vp8/common/vp8_skin_detection.h b/media/libvpx/libvpx/vp8/common/vp8_skin_detection.h index 4d27f5eb2ea6..ef0e4ae4fe70 100644 --- a/media/libvpx/libvpx/vp8/common/vp8_skin_detection.h +++ b/media/libvpx/libvpx/vp8/common/vp8_skin_detection.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_COMMON_SKIN_DETECTION_H_ -#define VP8_COMMON_SKIN_DETECTION_H_ +#ifndef VPX_VP8_COMMON_VP8_SKIN_DETECTION_H_ +#define VPX_VP8_COMMON_VP8_SKIN_DETECTION_H_ #include "vp8/encoder/onyx_int.h" #include "vpx/vpx_integer.h" @@ -44,4 +44,4 @@ void vp8_compute_skin_map(struct VP8_COMP *const cpi, FILE *yuv_skinmap_file); } // extern "C" #endif -#endif // VP8_COMMON_SKIN_DETECTION_H_ +#endif // VPX_VP8_COMMON_VP8_SKIN_DETECTION_H_ diff --git a/media/libvpx/libvpx/vp8/common/x86/bilinear_filter_sse2.c b/media/libvpx/libvpx/vp8/common/x86/bilinear_filter_sse2.c new file mode 100644 index 000000000000..9bf65d8045e8 --- /dev/null +++ b/media/libvpx/libvpx/vp8/common/x86/bilinear_filter_sse2.c @@ -0,0 +1,336 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "./vp8_rtcd.h" +#include "./vpx_config.h" +#include "vp8/common/filter.h" +#include "vpx_dsp/x86/mem_sse2.h" +#include "vpx_ports/mem.h" + +static INLINE void horizontal_16x16(uint8_t *src, const int stride, + uint16_t *dst, const int xoffset) { + int h; + const __m128i zero = _mm_setzero_si128(); + + if (xoffset == 0) { + for (h = 0; h < 17; ++h) { + const __m128i a = _mm_loadu_si128((__m128i *)src); + const __m128i a_lo = _mm_unpacklo_epi8(a, zero); + const __m128i a_hi = _mm_unpackhi_epi8(a, zero); + _mm_store_si128((__m128i *)dst, a_lo); + _mm_store_si128((__m128i *)(dst + 8), a_hi); + src += stride; + dst += 16; + } + return; + } + + { + const __m128i round_factor = _mm_set1_epi16(1 << (VP8_FILTER_SHIFT - 1)); + const __m128i hfilter_0 = _mm_set1_epi16(vp8_bilinear_filters[xoffset][0]); + const __m128i hfilter_1 = _mm_set1_epi16(vp8_bilinear_filters[xoffset][1]); + + for (h = 0; h < 17; ++h) { + const __m128i a = _mm_loadu_si128((__m128i *)src); + const __m128i a_lo = _mm_unpacklo_epi8(a, zero); + const __m128i a_hi = _mm_unpackhi_epi8(a, zero); + const __m128i a_lo_filtered = _mm_mullo_epi16(a_lo, hfilter_0); + const __m128i a_hi_filtered = _mm_mullo_epi16(a_hi, hfilter_0); + + const __m128i b = _mm_loadu_si128((__m128i *)(src + 1)); + const __m128i b_lo = _mm_unpacklo_epi8(b, zero); + const __m128i b_hi = _mm_unpackhi_epi8(b, zero); + const __m128i b_lo_filtered = _mm_mullo_epi16(b_lo, hfilter_1); + const __m128i b_hi_filtered = _mm_mullo_epi16(b_hi, hfilter_1); + + const __m128i sum_lo = _mm_add_epi16(a_lo_filtered, b_lo_filtered); + const __m128i sum_hi = _mm_add_epi16(a_hi_filtered, b_hi_filtered); + + const __m128i compensated_lo = _mm_add_epi16(sum_lo, round_factor); + const __m128i compensated_hi = _mm_add_epi16(sum_hi, round_factor); + + const __m128i shifted_lo = + _mm_srai_epi16(compensated_lo, VP8_FILTER_SHIFT); + const __m128i shifted_hi = + _mm_srai_epi16(compensated_hi, VP8_FILTER_SHIFT); + + _mm_store_si128((__m128i *)dst, shifted_lo); + _mm_store_si128((__m128i *)(dst + 8), shifted_hi); + src += stride; + dst += 16; + } + } +} + +static INLINE void vertical_16x16(uint16_t *src, uint8_t *dst, const int stride, + const int yoffset) { + int h; + + if (yoffset == 0) { + for (h = 0; h < 16; ++h) { + const __m128i row_lo = _mm_load_si128((__m128i *)src); + const __m128i row_hi = _mm_load_si128((__m128i *)(src + 8)); + const __m128i packed = _mm_packus_epi16(row_lo, row_hi); + _mm_store_si128((__m128i *)dst, packed); + src += 16; + dst += stride; + } + return; + } + + { + const __m128i round_factor = _mm_set1_epi16(1 << (VP8_FILTER_SHIFT - 1)); + const __m128i vfilter_0 = _mm_set1_epi16(vp8_bilinear_filters[yoffset][0]); + const __m128i vfilter_1 = _mm_set1_epi16(vp8_bilinear_filters[yoffset][1]); + + __m128i row_0_lo = _mm_load_si128((__m128i *)src); + __m128i row_0_hi = _mm_load_si128((__m128i *)(src + 8)); + src += 16; + for (h = 0; h < 16; ++h) { + const __m128i row_0_lo_filtered = _mm_mullo_epi16(row_0_lo, vfilter_0); + const __m128i row_0_hi_filtered = _mm_mullo_epi16(row_0_hi, vfilter_0); + + const __m128i row_1_lo = _mm_load_si128((__m128i *)src); + const __m128i row_1_hi = _mm_load_si128((__m128i *)(src + 8)); + const __m128i row_1_lo_filtered = _mm_mullo_epi16(row_1_lo, vfilter_1); + const __m128i row_1_hi_filtered = _mm_mullo_epi16(row_1_hi, vfilter_1); + + const __m128i sum_lo = + _mm_add_epi16(row_0_lo_filtered, row_1_lo_filtered); + const __m128i sum_hi = + _mm_add_epi16(row_0_hi_filtered, row_1_hi_filtered); + + const __m128i compensated_lo = _mm_add_epi16(sum_lo, round_factor); + const __m128i compensated_hi = _mm_add_epi16(sum_hi, round_factor); + + const __m128i shifted_lo = + _mm_srai_epi16(compensated_lo, VP8_FILTER_SHIFT); + const __m128i shifted_hi = + _mm_srai_epi16(compensated_hi, VP8_FILTER_SHIFT); + + const __m128i packed = _mm_packus_epi16(shifted_lo, shifted_hi); + _mm_store_si128((__m128i *)dst, packed); + row_0_lo = row_1_lo; + row_0_hi = row_1_hi; + src += 16; + dst += stride; + } + } +} + +void vp8_bilinear_predict16x16_sse2(uint8_t *src_ptr, int src_pixels_per_line, + int xoffset, int yoffset, uint8_t *dst_ptr, + int dst_pitch) { + DECLARE_ALIGNED(16, uint16_t, FData[16 * 17]); + + assert((xoffset | yoffset) != 0); + + horizontal_16x16(src_ptr, src_pixels_per_line, FData, xoffset); + + vertical_16x16(FData, dst_ptr, dst_pitch, yoffset); +} + +static INLINE void horizontal_8xN(uint8_t *src, const int stride, uint16_t *dst, + const int xoffset, const int height) { + int h; + const __m128i zero = _mm_setzero_si128(); + + if (xoffset == 0) { + for (h = 0; h < height; ++h) { + const __m128i a = _mm_loadl_epi64((__m128i *)src); + const __m128i a_u16 = _mm_unpacklo_epi8(a, zero); + _mm_store_si128((__m128i *)dst, a_u16); + src += stride; + dst += 8; + } + return; + } + + { + const __m128i round_factor = _mm_set1_epi16(1 << (VP8_FILTER_SHIFT - 1)); + const __m128i hfilter_0 = _mm_set1_epi16(vp8_bilinear_filters[xoffset][0]); + const __m128i hfilter_1 = _mm_set1_epi16(vp8_bilinear_filters[xoffset][1]); + + // Filter horizontally. Rather than load the whole array and transpose, load + // 16 values (overreading) and shift to set up the second value. Do an + // "extra" 9th line so the vertical pass has the necessary context. + for (h = 0; h < height; ++h) { + const __m128i a = _mm_loadu_si128((__m128i *)src); + const __m128i b = _mm_srli_si128(a, 1); + const __m128i a_u16 = _mm_unpacklo_epi8(a, zero); + const __m128i b_u16 = _mm_unpacklo_epi8(b, zero); + const __m128i a_filtered = _mm_mullo_epi16(a_u16, hfilter_0); + const __m128i b_filtered = _mm_mullo_epi16(b_u16, hfilter_1); + const __m128i sum = _mm_add_epi16(a_filtered, b_filtered); + const __m128i compensated = _mm_add_epi16(sum, round_factor); + const __m128i shifted = _mm_srai_epi16(compensated, VP8_FILTER_SHIFT); + _mm_store_si128((__m128i *)dst, shifted); + src += stride; + dst += 8; + } + } +} + +static INLINE void vertical_8xN(uint16_t *src, uint8_t *dst, const int stride, + const int yoffset, const int height) { + int h; + + if (yoffset == 0) { + for (h = 0; h < height; ++h) { + const __m128i row = _mm_load_si128((__m128i *)src); + const __m128i packed = _mm_packus_epi16(row, row); + _mm_storel_epi64((__m128i *)dst, packed); + src += 8; + dst += stride; + } + return; + } + + { + const __m128i round_factor = _mm_set1_epi16(1 << (VP8_FILTER_SHIFT - 1)); + const __m128i vfilter_0 = _mm_set1_epi16(vp8_bilinear_filters[yoffset][0]); + const __m128i vfilter_1 = _mm_set1_epi16(vp8_bilinear_filters[yoffset][1]); + + __m128i row_0 = _mm_load_si128((__m128i *)src); + src += 8; + for (h = 0; h < height; ++h) { + const __m128i row_1 = _mm_load_si128((__m128i *)src); + const __m128i row_0_filtered = _mm_mullo_epi16(row_0, vfilter_0); + const __m128i row_1_filtered = _mm_mullo_epi16(row_1, vfilter_1); + const __m128i sum = _mm_add_epi16(row_0_filtered, row_1_filtered); + const __m128i compensated = _mm_add_epi16(sum, round_factor); + const __m128i shifted = _mm_srai_epi16(compensated, VP8_FILTER_SHIFT); + const __m128i packed = _mm_packus_epi16(shifted, shifted); + _mm_storel_epi64((__m128i *)dst, packed); + row_0 = row_1; + src += 8; + dst += stride; + } + } +} + +void vp8_bilinear_predict8x8_sse2(uint8_t *src_ptr, int src_pixels_per_line, + int xoffset, int yoffset, uint8_t *dst_ptr, + int dst_pitch) { + DECLARE_ALIGNED(16, uint16_t, FData[8 * 9]); + + assert((xoffset | yoffset) != 0); + + horizontal_8xN(src_ptr, src_pixels_per_line, FData, xoffset, 9); + + vertical_8xN(FData, dst_ptr, dst_pitch, yoffset, 8); +} + +void vp8_bilinear_predict8x4_sse2(uint8_t *src_ptr, int src_pixels_per_line, + int xoffset, int yoffset, uint8_t *dst_ptr, + int dst_pitch) { + DECLARE_ALIGNED(16, uint16_t, FData[8 * 5]); + + assert((xoffset | yoffset) != 0); + + horizontal_8xN(src_ptr, src_pixels_per_line, FData, xoffset, 5); + + vertical_8xN(FData, dst_ptr, dst_pitch, yoffset, 4); +} + +static INLINE void horizontal_4x4(uint8_t *src, const int stride, uint16_t *dst, + const int xoffset) { + int h; + const __m128i zero = _mm_setzero_si128(); + + if (xoffset == 0) { + for (h = 0; h < 5; ++h) { + const __m128i a = load_unaligned_u32(src); + const __m128i a_u16 = _mm_unpacklo_epi8(a, zero); + _mm_storel_epi64((__m128i *)dst, a_u16); + src += stride; + dst += 4; + } + return; + } + + { + const __m128i round_factor = _mm_set1_epi16(1 << (VP8_FILTER_SHIFT - 1)); + const __m128i hfilter_0 = _mm_set1_epi16(vp8_bilinear_filters[xoffset][0]); + const __m128i hfilter_1 = _mm_set1_epi16(vp8_bilinear_filters[xoffset][1]); + + for (h = 0; h < 5; ++h) { + const __m128i a = load_unaligned_u32(src); + const __m128i b = load_unaligned_u32(src + 1); + const __m128i a_u16 = _mm_unpacklo_epi8(a, zero); + const __m128i b_u16 = _mm_unpacklo_epi8(b, zero); + const __m128i a_filtered = _mm_mullo_epi16(a_u16, hfilter_0); + const __m128i b_filtered = _mm_mullo_epi16(b_u16, hfilter_1); + const __m128i sum = _mm_add_epi16(a_filtered, b_filtered); + const __m128i compensated = _mm_add_epi16(sum, round_factor); + const __m128i shifted = _mm_srai_epi16(compensated, VP8_FILTER_SHIFT); + _mm_storel_epi64((__m128i *)dst, shifted); + src += stride; + dst += 4; + } + } +} + +static INLINE void vertical_4x4(uint16_t *src, uint8_t *dst, const int stride, + const int yoffset) { + int h; + + if (yoffset == 0) { + for (h = 0; h < 4; h += 2) { + const __m128i row = _mm_load_si128((__m128i *)src); + __m128i packed = _mm_packus_epi16(row, row); + store_unaligned_u32(dst, packed); + dst += stride; + packed = _mm_srli_si128(packed, 4); + store_unaligned_u32(dst, packed); + dst += stride; + src += 8; + } + return; + } + + { + const __m128i round_factor = _mm_set1_epi16(1 << (VP8_FILTER_SHIFT - 1)); + const __m128i vfilter_0 = _mm_set1_epi16(vp8_bilinear_filters[yoffset][0]); + const __m128i vfilter_1 = _mm_set1_epi16(vp8_bilinear_filters[yoffset][1]); + + for (h = 0; h < 4; h += 2) { + const __m128i row_0 = _mm_load_si128((__m128i *)src); + const __m128i row_1 = _mm_loadu_si128((__m128i *)(src + 4)); + const __m128i row_0_filtered = _mm_mullo_epi16(row_0, vfilter_0); + const __m128i row_1_filtered = _mm_mullo_epi16(row_1, vfilter_1); + const __m128i sum = _mm_add_epi16(row_0_filtered, row_1_filtered); + const __m128i compensated = _mm_add_epi16(sum, round_factor); + const __m128i shifted = _mm_srai_epi16(compensated, VP8_FILTER_SHIFT); + __m128i packed = _mm_packus_epi16(shifted, shifted); + storeu_uint32(dst, _mm_cvtsi128_si32(packed)); + packed = _mm_srli_si128(packed, 4); + dst += stride; + storeu_uint32(dst, _mm_cvtsi128_si32(packed)); + dst += stride; + src += 8; + } + } +} + +void vp8_bilinear_predict4x4_sse2(uint8_t *src_ptr, int src_pixels_per_line, + int xoffset, int yoffset, uint8_t *dst_ptr, + int dst_pitch) { + DECLARE_ALIGNED(16, uint16_t, FData[4 * 5]); + + assert((xoffset | yoffset) != 0); + + horizontal_4x4(src_ptr, src_pixels_per_line, FData, xoffset); + + vertical_4x4(FData, dst_ptr, dst_pitch, yoffset); +} diff --git a/media/libvpx/libvpx/vp8/common/x86/filter_x86.c b/media/libvpx/libvpx/vp8/common/x86/filter_x86.c deleted file mode 100644 index 2405342f02a3..000000000000 --- a/media/libvpx/libvpx/vp8/common/x86/filter_x86.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp8/common/x86/filter_x86.h" - -DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) = { - { 128, 128, 128, 128, 0, 0, 0, 0 }, { 112, 112, 112, 112, 16, 16, 16, 16 }, - { 96, 96, 96, 96, 32, 32, 32, 32 }, { 80, 80, 80, 80, 48, 48, 48, 48 }, - { 64, 64, 64, 64, 64, 64, 64, 64 }, { 48, 48, 48, 48, 80, 80, 80, 80 }, - { 32, 32, 32, 32, 96, 96, 96, 96 }, { 16, 16, 16, 16, 112, 112, 112, 112 } -}; - -DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]) = { - { 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 }, - { 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 }, - { 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 }, - { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 }, - { 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 }, - { 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 }, - { 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 } -}; diff --git a/media/libvpx/libvpx/vp8/common/x86/filter_x86.h b/media/libvpx/libvpx/vp8/common/x86/filter_x86.h deleted file mode 100644 index d282841bee4d..000000000000 --- a/media/libvpx/libvpx/vp8/common/x86/filter_x86.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_COMMON_X86_FILTER_X86_H_ -#define VP8_COMMON_X86_FILTER_X86_H_ - -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with - * duplicated values */ - -/* duplicated 4x */ -extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]); - -/* duplicated 8x */ -extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_X86_FILTER_X86_H_ diff --git a/media/libvpx/libvpx/vp8/common/x86/idct_blk_sse2.c b/media/libvpx/libvpx/vp8/common/x86/idct_blk_sse2.c index 8aefb2799702..897ed5b65272 100644 --- a/media/libvpx/libvpx/vp8/common/x86/idct_blk_sse2.c +++ b/media/libvpx/libvpx/vp8/common/x86/idct_blk_sse2.c @@ -42,43 +42,43 @@ void vp8_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *dst, } void vp8_dequant_idct_add_uv_block_sse2(short *q, short *dq, - unsigned char *dstu, - unsigned char *dstv, int stride, + unsigned char *dst_u, + unsigned char *dst_v, int stride, char *eobs) { if (((short *)(eobs))[0]) { if (((short *)(eobs))[0] & 0xfefe) { - vp8_idct_dequant_full_2x_sse2(q, dq, dstu, stride); + vp8_idct_dequant_full_2x_sse2(q, dq, dst_u, stride); } else { - vp8_idct_dequant_0_2x_sse2(q, dq, dstu, stride); + vp8_idct_dequant_0_2x_sse2(q, dq, dst_u, stride); } } q += 32; - dstu += stride * 4; + dst_u += stride * 4; if (((short *)(eobs))[1]) { if (((short *)(eobs))[1] & 0xfefe) { - vp8_idct_dequant_full_2x_sse2(q, dq, dstu, stride); + vp8_idct_dequant_full_2x_sse2(q, dq, dst_u, stride); } else { - vp8_idct_dequant_0_2x_sse2(q, dq, dstu, stride); + vp8_idct_dequant_0_2x_sse2(q, dq, dst_u, stride); } } q += 32; if (((short *)(eobs))[2]) { if (((short *)(eobs))[2] & 0xfefe) { - vp8_idct_dequant_full_2x_sse2(q, dq, dstv, stride); + vp8_idct_dequant_full_2x_sse2(q, dq, dst_v, stride); } else { - vp8_idct_dequant_0_2x_sse2(q, dq, dstv, stride); + vp8_idct_dequant_0_2x_sse2(q, dq, dst_v, stride); } } q += 32; - dstv += stride * 4; + dst_v += stride * 4; if (((short *)(eobs))[3]) { if (((short *)(eobs))[3] & 0xfefe) { - vp8_idct_dequant_full_2x_sse2(q, dq, dstv, stride); + vp8_idct_dequant_full_2x_sse2(q, dq, dst_v, stride); } else { - vp8_idct_dequant_0_2x_sse2(q, dq, dstv, stride); + vp8_idct_dequant_0_2x_sse2(q, dq, dst_v, stride); } } } diff --git a/media/libvpx/libvpx/vp8/common/x86/iwalsh_sse2.asm b/media/libvpx/libvpx/vp8/common/x86/iwalsh_sse2.asm index 82d7bf91a696..0043e93b0613 100644 --- a/media/libvpx/libvpx/vp8/common/x86/iwalsh_sse2.asm +++ b/media/libvpx/libvpx/vp8/common/x86/iwalsh_sse2.asm @@ -13,7 +13,7 @@ SECTION .text -;void vp8_short_inv_walsh4x4_sse2(short *input, short *output) +;void vp8_short_inv_walsh4x4_sse2(short *input, short *mb_dqcoeff) global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE sym(vp8_short_inv_walsh4x4_sse2): push rbp diff --git a/media/libvpx/libvpx/vp8/common/x86/loopfilter_x86.c b/media/libvpx/libvpx/vp8/common/x86/loopfilter_x86.c index a187d51fbe58..cfa13a2ddb23 100644 --- a/media/libvpx/libvpx/vp8/common/x86/loopfilter_x86.c +++ b/media/libvpx/libvpx/vp8/common/x86/loopfilter_x86.c @@ -22,7 +22,7 @@ #define prototype_simple_loopfilter(sym) \ void sym(unsigned char *y, int ystride, const unsigned char *blimit) -#if HAVE_SSE2 && ARCH_X86_64 +#if HAVE_SSE2 && VPX_ARCH_X86_64 prototype_loopfilter(vp8_loop_filter_bv_y_sse2); prototype_loopfilter(vp8_loop_filter_bh_y_sse2); #else @@ -68,7 +68,7 @@ void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); #else @@ -101,7 +101,7 @@ void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); #else diff --git a/media/libvpx/libvpx/vp8/common/x86/subpixel_mmx.asm b/media/libvpx/libvpx/vp8/common/x86/subpixel_mmx.asm index 1f3a2baca00d..67bcd0cbd713 100644 --- a/media/libvpx/libvpx/vp8/common/x86/subpixel_mmx.asm +++ b/media/libvpx/libvpx/vp8/common/x86/subpixel_mmx.asm @@ -10,8 +10,6 @@ %include "vpx_ports/x86_abi_support.asm" -extern sym(vp8_bilinear_filters_x86_8) - %define BLOCK_HEIGHT_WIDTH 4 %define vp8_filter_weight 128 @@ -205,280 +203,6 @@ sym(vp8_filter_block1dc_v6_mmx): ret -;void bilinear_predict8x4_mmx -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict8x4_mmx) PRIVATE -sym(vp8_bilinear_predict8x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; - ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; - - movsxd rax, dword ptr arg(2) ;xoffset - mov rdi, arg(4) ;dst_ptr ; - - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] - shl rax, 5 - - mov rsi, arg(0) ;src_ptr ; - add rax, rcx - - movsxd rdx, dword ptr arg(5) ;dst_pitch - movq mm1, [rax] ; - - movq mm2, [rax+16] ; - movsxd rax, dword ptr arg(3) ;yoffset - - pxor mm0, mm0 ; - shl rax, 5 - - add rax, rcx - lea rcx, [rdi+rdx*4] ; - - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; - - ; get the first horizontal line done ; - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movq mm4, mm3 ; make a copy of current line - - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - punpckhbw mm4, mm0 ; - - pmullw mm3, mm1 ; - pmullw mm4, mm1 ; - - movq mm5, [rsi+1] ; - movq mm6, mm5 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 ; - - pmullw mm5, mm2 ; - pmullw mm6, mm2 ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - movq mm7, mm3 ; - packuswb mm7, mm4 ; - - add rsi, rdx ; next line -.next_row_8x4: - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movq mm4, mm3 ; make a copy of current line - - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - punpckhbw mm4, mm0 ; - - pmullw mm3, mm1 ; - pmullw mm4, mm1 ; - - movq mm5, [rsi+1] ; - movq mm6, mm5 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 ; - - pmullw mm5, mm2 ; - pmullw mm6, mm2 ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - movq mm5, mm7 ; - movq mm6, mm7 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 - - pmullw mm5, [rax] ; - pmullw mm6, [rax] ; - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - movq mm7, mm3 ; - packuswb mm7, mm4 ; - - - pmullw mm3, [rax+16] ; - pmullw mm4, [rax+16] ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - packuswb mm3, mm4 - - movq [rdi], mm3 ; store the results in the destination - -%if ABI_IS_32BIT - add rsi, rdx ; next line - add rdi, dword ptr arg(5) ;dst_pitch ; -%else - movsxd r8, dword ptr arg(5) ;dst_pitch - add rsi, rdx ; next line - add rdi, r8 -%endif - cmp rdi, rcx ; - jne .next_row_8x4 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void bilinear_predict4x4_mmx -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict4x4_mmx) PRIVATE -sym(vp8_bilinear_predict4x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; - ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; - - movsxd rax, dword ptr arg(2) ;xoffset - mov rdi, arg(4) ;dst_ptr ; - - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] - shl rax, 5 - - add rax, rcx ; HFilter - mov rsi, arg(0) ;src_ptr ; - - movsxd rdx, dword ptr arg(5) ;ldst_pitch - movq mm1, [rax] ; - - movq mm2, [rax+16] ; - movsxd rax, dword ptr arg(3) ;yoffset - - pxor mm0, mm0 ; - shl rax, 5 - - add rax, rcx - lea rcx, [rdi+rdx*4] ; - - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; - - ; get the first horizontal line done ; - movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - - pmullw mm3, mm1 ; - movd mm5, [rsi+1] ; - - punpcklbw mm5, mm0 ; - pmullw mm5, mm2 ; - - paddw mm3, mm5 ; - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - movq mm7, mm3 ; - packuswb mm7, mm0 ; - - add rsi, rdx ; next line -.next_row_4x4: - movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - - pmullw mm3, mm1 ; - movd mm5, [rsi+1] ; - - punpcklbw mm5, mm0 ; - pmullw mm5, mm2 ; - - paddw mm3, mm5 ; - - movq mm5, mm7 ; - punpcklbw mm5, mm0 ; - - pmullw mm5, [rax] ; - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - movq mm7, mm3 ; - - packuswb mm7, mm0 ; - - pmullw mm3, [rax+16] ; - paddw mm3, mm5 ; - - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - packuswb mm3, mm0 - movd [rdi], mm3 ; store the results in the destination - -%if ABI_IS_32BIT - add rsi, rdx ; next line - add rdi, dword ptr arg(5) ;dst_pitch ; -%else - movsxd r8, dword ptr arg(5) ;dst_pitch ; - add rsi, rdx ; next line - add rdi, r8 -%endif - - cmp rdi, rcx ; - jne .next_row_4x4 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - - SECTION_RODATA align 16 rd: diff --git a/media/libvpx/libvpx/vp8/common/x86/subpixel_sse2.asm b/media/libvpx/libvpx/vp8/common/x86/subpixel_sse2.asm index 6e70f6d2e80a..51c015e3df06 100644 --- a/media/libvpx/libvpx/vp8/common/x86/subpixel_sse2.asm +++ b/media/libvpx/libvpx/vp8/common/x86/subpixel_sse2.asm @@ -10,7 +10,6 @@ %include "vpx_ports/x86_abi_support.asm" -extern sym(vp8_bilinear_filters_x86_8) %define BLOCK_HEIGHT_WIDTH 4 %define VP8_FILTER_WEIGHT 128 @@ -958,419 +957,6 @@ sym(vp8_unpack_block1d16_h6_sse2): ret -;void vp8_bilinear_predict16x16_sse2 -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -extern sym(vp8_bilinear_filters_x86_8) -global sym(vp8_bilinear_predict16x16_sse2) PRIVATE -sym(vp8_bilinear_predict16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset] - ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset] - - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] - movsxd rax, dword ptr arg(2) ;xoffset - - cmp rax, 0 ;skip first_pass filter if xoffset=0 - je .b16x16_sp_only - - shl rax, 5 - add rax, rcx ;HFilter - - mov rdi, arg(4) ;dst_ptr - mov rsi, arg(0) ;src_ptr - movsxd rdx, dword ptr arg(5) ;dst_pitch - - movdqa xmm1, [rax] - movdqa xmm2, [rax+16] - - movsxd rax, dword ptr arg(3) ;yoffset - - cmp rax, 0 ;skip second_pass filter if yoffset=0 - je .b16x16_fp_only - - shl rax, 5 - add rax, rcx ;VFilter - - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line - - pxor xmm0, xmm0 - -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(5) ;dst_pitch -%endif - ; get the first horizontal line done - movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movdqa xmm4, xmm3 ; make a copy of current line - - punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 - punpckhbw xmm4, xmm0 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm1 - - movdqu xmm5, [rsi+1] - movdqa xmm6, xmm5 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - - pmullw xmm5, xmm2 - pmullw xmm6, xmm2 - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - movdqa xmm7, xmm3 - packuswb xmm7, xmm4 - - add rsi, rdx ; next line -.next_row: - movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movdqa xmm4, xmm3 ; make a copy of current line - - punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 - punpckhbw xmm4, xmm0 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm1 - - movdqu xmm5, [rsi+1] - movdqa xmm6, xmm5 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - - pmullw xmm5, xmm2 - pmullw xmm6, xmm2 - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - movdqa xmm5, xmm7 - movdqa xmm6, xmm7 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - - pmullw xmm5, [rax] - pmullw xmm6, [rax] - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - movdqa xmm7, xmm3 - packuswb xmm7, xmm4 - - pmullw xmm3, [rax+16] - pmullw xmm4, [rax+16] - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - packuswb xmm3, xmm4 - movdqa [rdi], xmm3 ; store the results in the destination - - add rsi, rdx ; next line -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(5) ;dst_pitch -%else - add rdi, r8 -%endif - - cmp rdi, rcx - jne .next_row - - jmp .done - -.b16x16_sp_only: - movsxd rax, dword ptr arg(3) ;yoffset - shl rax, 5 - add rax, rcx ;VFilter - - mov rdi, arg(4) ;dst_ptr - mov rsi, arg(0) ;src_ptr - movsxd rdx, dword ptr arg(5) ;dst_pitch - - movdqa xmm1, [rax] - movdqa xmm2, [rax+16] - - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - - pxor xmm0, xmm0 - - ; get the first horizontal line done - movdqu xmm7, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - - add rsi, rax ; next line -.next_row_spo: - movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - - movdqa xmm5, xmm7 - movdqa xmm6, xmm7 - - movdqa xmm4, xmm3 ; make a copy of current line - movdqa xmm7, xmm3 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 - punpckhbw xmm4, xmm0 - - pmullw xmm5, xmm1 - pmullw xmm6, xmm1 - pmullw xmm3, xmm2 - pmullw xmm4, xmm2 - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - packuswb xmm3, xmm4 - movdqa [rdi], xmm3 ; store the results in the destination - - add rsi, rax ; next line - add rdi, rdx ;dst_pitch - cmp rdi, rcx - jne .next_row_spo - - jmp .done - -.b16x16_fp_only: - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - pxor xmm0, xmm0 - -.next_row_fpo: - movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movdqa xmm4, xmm3 ; make a copy of current line - - punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 - punpckhbw xmm4, xmm0 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm1 - - movdqu xmm5, [rsi+1] - movdqa xmm6, xmm5 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - - pmullw xmm5, xmm2 - pmullw xmm6, xmm2 - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - packuswb xmm3, xmm4 - movdqa [rdi], xmm3 ; store the results in the destination - - add rsi, rax ; next line - add rdi, rdx ; dst_pitch - cmp rdi, rcx - jne .next_row_fpo - -.done: - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_bilinear_predict8x8_sse2 -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict8x8_sse2) PRIVATE -sym(vp8_bilinear_predict8x8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 144 ; reserve 144 bytes - - ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset] - ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset] - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] - - mov rsi, arg(0) ;src_ptr - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line - - ;Read 9-line unaligned data in and put them on stack. This gives a big - ;performance boost. - movdqu xmm0, [rsi] - lea rax, [rdx + rdx*2] - movdqu xmm1, [rsi+rdx] - movdqu xmm2, [rsi+rdx*2] - add rsi, rax - movdqu xmm3, [rsi] - movdqu xmm4, [rsi+rdx] - movdqu xmm5, [rsi+rdx*2] - add rsi, rax - movdqu xmm6, [rsi] - movdqu xmm7, [rsi+rdx] - - movdqa XMMWORD PTR [rsp], xmm0 - - movdqu xmm0, [rsi+rdx*2] - - movdqa XMMWORD PTR [rsp+16], xmm1 - movdqa XMMWORD PTR [rsp+32], xmm2 - movdqa XMMWORD PTR [rsp+48], xmm3 - movdqa XMMWORD PTR [rsp+64], xmm4 - movdqa XMMWORD PTR [rsp+80], xmm5 - movdqa XMMWORD PTR [rsp+96], xmm6 - movdqa XMMWORD PTR [rsp+112], xmm7 - movdqa XMMWORD PTR [rsp+128], xmm0 - - movsxd rax, dword ptr arg(2) ;xoffset - shl rax, 5 - add rax, rcx ;HFilter - - mov rdi, arg(4) ;dst_ptr - movsxd rdx, dword ptr arg(5) ;dst_pitch - - movdqa xmm1, [rax] - movdqa xmm2, [rax+16] - - movsxd rax, dword ptr arg(3) ;yoffset - shl rax, 5 - add rax, rcx ;VFilter - - lea rcx, [rdi+rdx*8] - - movdqa xmm5, [rax] - movdqa xmm6, [rax+16] - - pxor xmm0, xmm0 - - ; get the first horizontal line done - movdqa xmm3, XMMWORD PTR [rsp] - movdqa xmm4, xmm3 ; make a copy of current line - psrldq xmm4, 1 - - punpcklbw xmm3, xmm0 ; 00 01 02 03 04 05 06 07 - punpcklbw xmm4, xmm0 ; 01 02 03 04 05 06 07 08 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm2 - - paddw xmm3, xmm4 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - movdqa xmm7, xmm3 - add rsp, 16 ; next line -.next_row8x8: - movdqa xmm3, XMMWORD PTR [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 - movdqa xmm4, xmm3 ; make a copy of current line - psrldq xmm4, 1 - - punpcklbw xmm3, xmm0 ; 00 01 02 03 04 05 06 07 - punpcklbw xmm4, xmm0 ; 01 02 03 04 05 06 07 08 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm2 - - paddw xmm3, xmm4 - pmullw xmm7, xmm5 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - movdqa xmm4, xmm3 - - pmullw xmm3, xmm6 - paddw xmm3, xmm7 - - movdqa xmm7, xmm4 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - packuswb xmm3, xmm0 - movq [rdi], xmm3 ; store the results in the destination - - add rsp, 16 ; next line - add rdi, rdx - - cmp rdi, rcx - jne .next_row8x8 - - ;add rsp, 144 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - SECTION_RODATA align 16 rd: diff --git a/media/libvpx/libvpx/vp8/common/x86/vp8_asm_stubs.c b/media/libvpx/libvpx/vp8/common/x86/vp8_asm_stubs.c index b9d087e20df6..7fb83c2d5e20 100644 --- a/media/libvpx/libvpx/vp8/common/x86/vp8_asm_stubs.c +++ b/media/libvpx/libvpx/vp8/common/x86/vp8_asm_stubs.c @@ -11,7 +11,6 @@ #include "vpx_config.h" #include "vp8_rtcd.h" #include "vpx_ports/mem.h" -#include "filter_x86.h" extern const short vp8_six_tap_x86[8][6 * 8]; @@ -95,9 +94,7 @@ void vp8_sixtap_predict4x4_mmx(unsigned char *src_ptr, int src_pixels_per_line, void vp8_sixtap_predict16x16_sse2(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, - int dst_pitch - - ) { + int dst_pitch) { DECLARE_ALIGNED(16, unsigned short, FData2[24 * 24]); /* Temp data bufffer used in filtering */ @@ -236,9 +233,7 @@ extern void vp8_filter_block1d4_v6_ssse3(unsigned char *src_ptr, void vp8_sixtap_predict16x16_ssse3(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, - int dst_pitch - - ) { + int dst_pitch) { DECLARE_ALIGNED(16, unsigned char, FData2[24 * 24]); if (xoffset) { @@ -351,8 +346,8 @@ void vp8_sixtap_predict4x4_ssse3(unsigned char *src_ptr, yoffset); } else { /* ssse3 second-pass only function couldn't handle (xoffset==0 && - * yoffset==0) case correctly. Add copy function here to guarantee - * six-tap function handles all possible offsets. */ + * yoffset==0) case correctly. Add copy function here to guarantee + * six-tap function handles all possible offsets. */ int r; for (r = 0; r < 4; ++r) { diff --git a/media/libvpx/libvpx/vp8/decoder/dboolhuff.h b/media/libvpx/libvpx/vp8/decoder/dboolhuff.h index 04c027cd786f..f2a18f0d90da 100644 --- a/media/libvpx/libvpx/vp8/decoder/dboolhuff.h +++ b/media/libvpx/libvpx/vp8/decoder/dboolhuff.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_DECODER_DBOOLHUFF_H_ -#define VP8_DECODER_DBOOLHUFF_H_ +#ifndef VPX_VP8_DECODER_DBOOLHUFF_H_ +#define VPX_VP8_DECODER_DBOOLHUFF_H_ #include #include @@ -76,7 +76,7 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) { } { - register int shift = vp8_norm[range]; + const unsigned char shift = vp8_norm[(unsigned char)range]; range <<= shift; value <<= shift; count -= shift; @@ -127,4 +127,4 @@ static INLINE int vp8dx_bool_error(BOOL_DECODER *br) { } // extern "C" #endif -#endif // VP8_DECODER_DBOOLHUFF_H_ +#endif // VPX_VP8_DECODER_DBOOLHUFF_H_ diff --git a/media/libvpx/libvpx/vp8/decoder/decodeframe.c b/media/libvpx/libvpx/vp8/decoder/decodeframe.c index 077bd3da268d..67c254fa14cb 100644 --- a/media/libvpx/libvpx/vp8/decoder/decodeframe.c +++ b/media/libvpx/libvpx/vp8/decoder/decodeframe.c @@ -211,7 +211,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, vp8_short_inv_walsh4x4(&b->dqcoeff[0], xd->qcoeff); memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); } else { - b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; + b->dqcoeff[0] = (short)(b->qcoeff[0] * xd->dequant_y2[0]); vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], xd->qcoeff); memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); } @@ -610,8 +610,7 @@ static void decode_mb_rows(VP8D_COMP *pbi) { lf_dst[2]); } else { vp8_loop_filter_row_simple(pc, lf_mic, mb_row - 1, recon_y_stride, - recon_uv_stride, lf_dst[0], lf_dst[1], - lf_dst[2]); + lf_dst[0]); } if (mb_row > 1) { yv12_extend_frame_left_right_c(yv12_fb_new, eb_dst[0], eb_dst[1], @@ -647,8 +646,7 @@ static void decode_mb_rows(VP8D_COMP *pbi) { lf_dst[2]); } else { vp8_loop_filter_row_simple(pc, lf_mic, mb_row - 1, recon_y_stride, - recon_uv_stride, lf_dst[0], lf_dst[1], - lf_dst[2]); + lf_dst[0]); } yv12_extend_frame_left_right_c(yv12_fb_new, eb_dst[0], eb_dst[1], @@ -674,7 +672,7 @@ static unsigned int read_partition_size(VP8D_COMP *pbi, static int read_is_valid(const unsigned char *start, size_t len, const unsigned char *end) { - return (start + len > start && start + len <= end); + return len != 0 && end > start && len <= (size_t)(end - start); } static unsigned int read_available_partition_size( @@ -686,6 +684,12 @@ static unsigned int read_available_partition_size( const unsigned char *partition_size_ptr = token_part_sizes + i * 3; unsigned int partition_size = 0; ptrdiff_t bytes_left = fragment_end - fragment_start; + if (bytes_left < 0) { + vpx_internal_error( + &pc->error, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt partition. No bytes left %d.", + (int)bytes_left); + } /* Calculate the length of this partition. The last partition * size is implicit. If the partition size can't be read, then * either use the remaining data in the buffer (for EC mode) @@ -750,6 +754,9 @@ static void setup_token_decoder(VP8D_COMP *pbi, ptrdiff_t ext_first_part_size = token_part_sizes - pbi->fragments.ptrs[0] + 3 * (num_token_partitions - 1); + if (fragment_size < (unsigned int)ext_first_part_size) + vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, + "Corrupted fragment size %d", fragment_size); fragment_size -= (unsigned int)ext_first_part_size; if (fragment_size > 0) { pbi->fragments.sizes[0] = (unsigned int)ext_first_part_size; @@ -767,6 +774,9 @@ static void setup_token_decoder(VP8D_COMP *pbi, first_fragment_end, fragment_end, fragment_idx - 1, num_token_partitions); pbi->fragments.sizes[fragment_idx] = (unsigned int)partition_size; + if (fragment_size < (unsigned int)partition_size) + vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, + "Corrupted fragment size %d", fragment_size); fragment_size -= (unsigned int)partition_size; assert(fragment_idx <= num_token_partitions); if (fragment_size > 0) { @@ -1208,7 +1218,11 @@ int vp8_decode_frame(VP8D_COMP *pbi) { if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) && pc->multi_token_partition != ONE_PARTITION) { unsigned int thread; - vp8mt_decode_mb_rows(pbi, xd); + if (vp8mt_decode_mb_rows(pbi, xd)) { + vp8_decoder_remove_threads(pbi); + pbi->restart_threads = 1; + vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, NULL); + } vp8_yv12_extend_frame_borders(yv12_fb_new); for (thread = 0; thread < pbi->decoding_thread_count; ++thread) { corrupt_tokens |= pbi->mb_row_di[thread].mbd.corrupted; diff --git a/media/libvpx/libvpx/vp8/decoder/decodemv.c b/media/libvpx/libvpx/vp8/decoder/decodemv.c index 8e9600c6da9b..94373852dc10 100644 --- a/media/libvpx/libvpx/vp8/decoder/decodemv.c +++ b/media/libvpx/libvpx/vp8/decoder/decodemv.c @@ -485,10 +485,7 @@ static void read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x) { } } -static void decode_mb_mode_mvs(VP8D_COMP *pbi, MODE_INFO *mi, - MB_MODE_INFO *mbmi) { - (void)mbmi; - +static void decode_mb_mode_mvs(VP8D_COMP *pbi, MODE_INFO *mi) { /* Read the Macroblock segmentation map if it is being updated explicitly * this frame (reset to 0 above by default) * By default on a key frame reset all MBs to segment 0 @@ -537,7 +534,7 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi) { int mb_num = mb_row * pbi->common.mb_cols + mb_col; #endif - decode_mb_mode_mvs(pbi, mi, &mi->mbmi); + decode_mb_mode_mvs(pbi, mi); #if CONFIG_ERROR_CONCEALMENT /* look for corruption. set mvs_corrupt_from_mb to the current diff --git a/media/libvpx/libvpx/vp8/decoder/decodemv.h b/media/libvpx/libvpx/vp8/decoder/decodemv.h index f33b07351d3c..504e943d8558 100644 --- a/media/libvpx/libvpx/vp8/decoder/decodemv.h +++ b/media/libvpx/libvpx/vp8/decoder/decodemv.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_DECODER_DECODEMV_H_ -#define VP8_DECODER_DECODEMV_H_ +#ifndef VPX_VP8_DECODER_DECODEMV_H_ +#define VPX_VP8_DECODER_DECODEMV_H_ #include "onyxd_int.h" @@ -23,4 +23,4 @@ void vp8_decode_mode_mvs(VP8D_COMP *); } // extern "C" #endif -#endif // VP8_DECODER_DECODEMV_H_ +#endif // VPX_VP8_DECODER_DECODEMV_H_ diff --git a/media/libvpx/libvpx/vp8/decoder/decoderthreading.h b/media/libvpx/libvpx/vp8/decoder/decoderthreading.h index c563cf6e93aa..3d49bc831746 100644 --- a/media/libvpx/libvpx/vp8/decoder/decoderthreading.h +++ b/media/libvpx/libvpx/vp8/decoder/decoderthreading.h @@ -8,15 +8,15 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_DECODER_DECODERTHREADING_H_ -#define VP8_DECODER_DECODERTHREADING_H_ +#ifndef VPX_VP8_DECODER_DECODERTHREADING_H_ +#define VPX_VP8_DECODER_DECODERTHREADING_H_ #ifdef __cplusplus extern "C" { #endif #if CONFIG_MULTITHREAD -void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd); +int vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd); void vp8_decoder_remove_threads(VP8D_COMP *pbi); void vp8_decoder_create_threads(VP8D_COMP *pbi); void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows); @@ -27,4 +27,4 @@ void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows); } // extern "C" #endif -#endif // VP8_DECODER_DECODERTHREADING_H_ +#endif // VPX_VP8_DECODER_DECODERTHREADING_H_ diff --git a/media/libvpx/libvpx/vp8/decoder/detokenize.h b/media/libvpx/libvpx/vp8/decoder/detokenize.h index f0b125444f03..410a431ba0fb 100644 --- a/media/libvpx/libvpx/vp8/decoder/detokenize.h +++ b/media/libvpx/libvpx/vp8/decoder/detokenize.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_DECODER_DETOKENIZE_H_ -#define VP8_DECODER_DETOKENIZE_H_ +#ifndef VPX_VP8_DECODER_DETOKENIZE_H_ +#define VPX_VP8_DECODER_DETOKENIZE_H_ #include "onyxd_int.h" @@ -24,4 +24,4 @@ int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *); } // extern "C" #endif -#endif // VP8_DECODER_DETOKENIZE_H_ +#endif // VPX_VP8_DECODER_DETOKENIZE_H_ diff --git a/media/libvpx/libvpx/vp8/decoder/ec_types.h b/media/libvpx/libvpx/vp8/decoder/ec_types.h index 0ab08b649ad7..84feb269df07 100644 --- a/media/libvpx/libvpx/vp8/decoder/ec_types.h +++ b/media/libvpx/libvpx/vp8/decoder/ec_types.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_DECODER_EC_TYPES_H_ -#define VP8_DECODER_EC_TYPES_H_ +#ifndef VPX_VP8_DECODER_EC_TYPES_H_ +#define VPX_VP8_DECODER_EC_TYPES_H_ #ifdef __cplusplus extern "C" { @@ -34,7 +34,9 @@ typedef struct { /* Structure used to hold all the overlaps of a macroblock. The overlaps of a * macroblock is further divided into block overlaps. */ -typedef struct { B_OVERLAP overlaps[16]; } MB_OVERLAP; +typedef struct { + B_OVERLAP overlaps[16]; +} MB_OVERLAP; /* Structure for keeping track of motion vectors and which reference frame they * refer to. Used for motion vector interpolation. @@ -48,4 +50,4 @@ typedef struct { } // extern "C" #endif -#endif // VP8_DECODER_EC_TYPES_H_ +#endif // VPX_VP8_DECODER_EC_TYPES_H_ diff --git a/media/libvpx/libvpx/vp8/decoder/error_concealment.c b/media/libvpx/libvpx/vp8/decoder/error_concealment.c index e22141492c85..85982e4de335 100644 --- a/media/libvpx/libvpx/vp8/decoder/error_concealment.c +++ b/media/libvpx/libvpx/vp8/decoder/error_concealment.c @@ -147,8 +147,8 @@ static void calculate_overlaps_mb(B_OVERLAP *b_overlaps, union b_mode_info *bmi, } } -void vp8_calculate_overlaps(MB_OVERLAP *overlap_ul, int mb_rows, int mb_cols, - union b_mode_info *bmi, int b_row, int b_col) { +static void calculate_overlaps(MB_OVERLAP *overlap_ul, int mb_rows, int mb_cols, + union b_mode_info *bmi, int b_row, int b_col) { MB_OVERLAP *mb_overlap; int row, col, rel_row, rel_col; int new_row, new_col; @@ -280,9 +280,9 @@ static void calc_prev_mb_overlaps(MB_OVERLAP *overlaps, MODE_INFO *prev_mi, int sub_col; for (sub_row = 0; sub_row < 4; ++sub_row) { for (sub_col = 0; sub_col < 4; ++sub_col) { - vp8_calculate_overlaps(overlaps, mb_rows, mb_cols, - &(prev_mi->bmi[sub_row * 4 + sub_col]), - 4 * mb_row + sub_row, 4 * mb_col + sub_col); + calculate_overlaps(overlaps, mb_rows, mb_cols, + &(prev_mi->bmi[sub_row * 4 + sub_col]), + 4 * mb_row + sub_row, 4 * mb_col + sub_col); } } } diff --git a/media/libvpx/libvpx/vp8/decoder/error_concealment.h b/media/libvpx/libvpx/vp8/decoder/error_concealment.h index 89c78c14420a..608a79f1891b 100644 --- a/media/libvpx/libvpx/vp8/decoder/error_concealment.h +++ b/media/libvpx/libvpx/vp8/decoder/error_concealment.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_DECODER_ERROR_CONCEALMENT_H_ -#define VP8_DECODER_ERROR_CONCEALMENT_H_ +#ifndef VPX_VP8_DECODER_ERROR_CONCEALMENT_H_ +#define VPX_VP8_DECODER_ERROR_CONCEALMENT_H_ #include "onyxd_int.h" #include "ec_types.h" @@ -38,4 +38,4 @@ void vp8_interpolate_motion(MACROBLOCKD *mb, int mb_row, int mb_col, } // extern "C" #endif -#endif // VP8_DECODER_ERROR_CONCEALMENT_H_ +#endif // VPX_VP8_DECODER_ERROR_CONCEALMENT_H_ diff --git a/media/libvpx/libvpx/vp8/decoder/onyxd_if.c b/media/libvpx/libvpx/vp8/decoder/onyxd_if.c index f516eb0c78b3..765d2ec83e9c 100644 --- a/media/libvpx/libvpx/vp8/decoder/onyxd_if.c +++ b/media/libvpx/libvpx/vp8/decoder/onyxd_if.c @@ -16,6 +16,7 @@ #include "onyxd_int.h" #include "vpx_mem/vpx_mem.h" #include "vp8/common/alloccommon.h" +#include "vp8/common/common.h" #include "vp8/common/loopfilter.h" #include "vp8/common/swapyv12buffer.h" #include "vp8/common/threading.h" @@ -36,7 +37,7 @@ #if CONFIG_ERROR_CONCEALMENT #include "error_concealment.h" #endif -#if ARCH_ARM +#if VPX_ARCH_ARM #include "vpx_ports/arm.h" #endif @@ -301,12 +302,9 @@ static int check_fragments_for_errors(VP8D_COMP *pbi) { return 1; } -int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, - const uint8_t *source, int64_t time_stamp) { +int vp8dx_receive_compressed_data(VP8D_COMP *pbi, int64_t time_stamp) { VP8_COMMON *cm = &pbi->common; int retcode = -1; - (void)size; - (void)source; pbi->common.error.error_code = VPX_CODEC_OK; @@ -321,21 +319,6 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, pbi->dec_fb_ref[GOLDEN_FRAME] = &cm->yv12_fb[cm->gld_fb_idx]; pbi->dec_fb_ref[ALTREF_FRAME] = &cm->yv12_fb[cm->alt_fb_idx]; - if (setjmp(pbi->common.error.jmp)) { - /* We do not know if the missing frame(s) was supposed to update - * any of the reference buffers, but we act conservative and - * mark only the last buffer as corrupted. - */ - cm->yv12_fb[cm->lst_fb_idx].corrupted = 1; - - if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) { - cm->fb_idx_ref_cnt[cm->new_fb_idx]--; - } - goto decode_exit; - } - - pbi->common.error.setjmp = 1; - retcode = vp8_decode_frame(pbi); if (retcode < 0) { @@ -344,6 +327,12 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, } pbi->common.error.error_code = VPX_CODEC_ERROR; + // Propagate the error info. + if (pbi->mb.error_info.error_code != 0) { + pbi->common.error.error_code = pbi->mb.error_info.error_code; + memcpy(pbi->common.error.detail, pbi->mb.error_info.detail, + sizeof(pbi->mb.error_info.detail)); + } goto decode_exit; } @@ -382,7 +371,6 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, pbi->last_time_stamp = time_stamp; decode_exit: - pbi->common.error.setjmp = 0; vpx_clear_system_state(); return retcode; } @@ -445,7 +433,7 @@ int vp8_create_decoder_instances(struct frame_buffers *fb, VP8D_CONFIG *oxcf) { #if CONFIG_MULTITHREAD if (setjmp(fb->pbi[0]->common.error.jmp)) { vp8_remove_decoder_instances(fb); - memset(fb->pbi, 0, sizeof(fb->pbi)); + vp8_zero(fb->pbi); vpx_clear_system_state(); return VPX_CODEC_ERROR; } @@ -471,6 +459,6 @@ int vp8_remove_decoder_instances(struct frame_buffers *fb) { return VPX_CODEC_OK; } -int vp8dx_get_quantizer(const VP8D_COMP *cpi) { - return cpi->common.base_qindex; +int vp8dx_get_quantizer(const VP8D_COMP *pbi) { + return pbi->common.base_qindex; } diff --git a/media/libvpx/libvpx/vp8/decoder/onyxd_int.h b/media/libvpx/libvpx/vp8/decoder/onyxd_int.h index 5ecacdbb9723..cf2c066d9be0 100644 --- a/media/libvpx/libvpx/vp8/decoder/onyxd_int.h +++ b/media/libvpx/libvpx/vp8/decoder/onyxd_int.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_DECODER_ONYXD_INT_H_ -#define VP8_DECODER_ONYXD_INT_H_ +#ifndef VPX_VP8_DECODER_ONYXD_INT_H_ +#define VPX_VP8_DECODER_ONYXD_INT_H_ #include "vpx_config.h" #include "vp8/common/onyxd.h" @@ -31,7 +31,9 @@ typedef struct { void *ptr2; } DECODETHREAD_DATA; -typedef struct { MACROBLOCKD mbd; } MB_ROW_DEC; +typedef struct { + MACROBLOCKD mbd; +} MB_ROW_DEC; typedef struct { int enabled; @@ -116,11 +118,17 @@ typedef struct VP8D_COMP { vpx_decrypt_cb decrypt_cb; void *decrypt_state; +#if CONFIG_MULTITHREAD + // Restart threads on next frame if set to 1. + // This is set when error happens in multithreaded decoding and all threads + // are shut down. + int restart_threads; +#endif } VP8D_COMP; void vp8cx_init_de_quantizer(VP8D_COMP *pbi); void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd); -int vp8_decode_frame(VP8D_COMP *cpi); +int vp8_decode_frame(VP8D_COMP *pbi); int vp8_create_decoder_instances(struct frame_buffers *fb, VP8D_CONFIG *oxcf); int vp8_remove_decoder_instances(struct frame_buffers *fb); @@ -128,8 +136,8 @@ int vp8_remove_decoder_instances(struct frame_buffers *fb); #if CONFIG_DEBUG #define CHECK_MEM_ERROR(lval, expr) \ do { \ - lval = (expr); \ - if (!lval) \ + (lval) = (expr); \ + if (!(lval)) \ vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, \ "Failed to allocate " #lval " at %s:%d", __FILE__, \ __LINE__); \ @@ -137,8 +145,8 @@ int vp8_remove_decoder_instances(struct frame_buffers *fb); #else #define CHECK_MEM_ERROR(lval, expr) \ do { \ - lval = (expr); \ - if (!lval) \ + (lval) = (expr); \ + if (!(lval)) \ vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, \ "Failed to allocate " #lval); \ } while (0) @@ -148,4 +156,4 @@ int vp8_remove_decoder_instances(struct frame_buffers *fb); } // extern "C" #endif -#endif // VP8_DECODER_ONYXD_INT_H_ +#endif // VPX_VP8_DECODER_ONYXD_INT_H_ diff --git a/media/libvpx/libvpx/vp8/decoder/threading.c b/media/libvpx/libvpx/vp8/decoder/threading.c index d0213f75c12a..561922de3296 100644 --- a/media/libvpx/libvpx/vp8/decoder/threading.c +++ b/media/libvpx/libvpx/vp8/decoder/threading.c @@ -15,8 +15,8 @@ #endif #include "onyxd_int.h" #include "vpx_mem/vpx_mem.h" +#include "vp8/common/common.h" #include "vp8/common/threading.h" - #include "vp8/common/loopfilter.h" #include "vp8/common/extend.h" #include "vpx_ports/vpx_timer.h" @@ -400,16 +400,32 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset; xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset; - xd->pre.y_buffer = - ref_buffer[xd->mode_info_context->mbmi.ref_frame][0] + recon_yoffset; - xd->pre.u_buffer = - ref_buffer[xd->mode_info_context->mbmi.ref_frame][1] + recon_uvoffset; - xd->pre.v_buffer = - ref_buffer[xd->mode_info_context->mbmi.ref_frame][2] + recon_uvoffset; - /* propagate errors from reference frames */ xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; + if (xd->corrupted) { + // Move current decoding marcoblock to the end of row for all rows + // assigned to this thread, such that other threads won't be waiting. + for (; mb_row < pc->mb_rows; + mb_row += (pbi->decoding_thread_count + 1)) { + current_mb_col = &pbi->mt_current_mb_col[mb_row]; + vpx_atomic_store_release(current_mb_col, pc->mb_cols + nsync); + } + vpx_internal_error(&xd->error_info, VPX_CODEC_CORRUPT_FRAME, + "Corrupted reference frame"); + } + + if (xd->mode_info_context->mbmi.ref_frame >= LAST_FRAME) { + const MV_REFERENCE_FRAME ref = xd->mode_info_context->mbmi.ref_frame; + xd->pre.y_buffer = ref_buffer[ref][0] + recon_yoffset; + xd->pre.u_buffer = ref_buffer[ref][1] + recon_uvoffset; + xd->pre.v_buffer = ref_buffer[ref][2] + recon_uvoffset; + } else { + // ref_frame is INTRA_FRAME, pre buffer should not be used. + xd->pre.y_buffer = 0; + xd->pre.u_buffer = 0; + xd->pre.v_buffer = 0; + } mt_decode_macroblock(pbi, xd, 0); xd->left_available = 1; @@ -557,8 +573,9 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; } - /* signal end of frame decoding if this thread processed the last mb_row */ - if (last_mb_row == (pc->mb_rows - 1)) sem_post(&pbi->h_event_end_decoding); + /* signal end of decoding of current thread for current frame */ + if (last_mb_row + (int)pbi->decoding_thread_count + 1 >= pc->mb_rows) + sem_post(&pbi->h_event_end_decoding); } static THREAD_FUNCTION thread_decoding_proc(void *p_data) { @@ -576,7 +593,13 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) { } else { MACROBLOCKD *xd = &mbrd->mbd; xd->left_context = &mb_row_left_context; - + if (setjmp(xd->error_info.jmp)) { + xd->error_info.setjmp = 0; + // Signal the end of decoding for current thread. + sem_post(&pbi->h_event_end_decoding); + continue; + } + xd->error_info.setjmp = 1; mt_decode_mb_rows(pbi, xd, ithread + 1); } } @@ -738,25 +761,28 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) { /* Allocate memory for above_row buffers. */ CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows); - for (i = 0; i < pc->mb_rows; ++i) - CHECK_MEM_ERROR( - pbi->mt_yabove_row[i], - vpx_memalign( - 16, sizeof(unsigned char) * (width + (VP8BORDERINPIXELS << 1)))); + for (i = 0; i < pc->mb_rows; ++i) { + CHECK_MEM_ERROR(pbi->mt_yabove_row[i], + vpx_memalign(16, sizeof(unsigned char) * + (width + (VP8BORDERINPIXELS << 1)))); + vp8_zero_array(pbi->mt_yabove_row[i], width + (VP8BORDERINPIXELS << 1)); + } CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows); - for (i = 0; i < pc->mb_rows; ++i) - CHECK_MEM_ERROR( - pbi->mt_uabove_row[i], - vpx_memalign(16, - sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); + for (i = 0; i < pc->mb_rows; ++i) { + CHECK_MEM_ERROR(pbi->mt_uabove_row[i], + vpx_memalign(16, sizeof(unsigned char) * + (uv_width + VP8BORDERINPIXELS))); + vp8_zero_array(pbi->mt_uabove_row[i], uv_width + VP8BORDERINPIXELS); + } CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows); - for (i = 0; i < pc->mb_rows; ++i) - CHECK_MEM_ERROR( - pbi->mt_vabove_row[i], - vpx_memalign(16, - sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); + for (i = 0; i < pc->mb_rows; ++i) { + CHECK_MEM_ERROR(pbi->mt_vabove_row[i], + vpx_memalign(16, sizeof(unsigned char) * + (uv_width + VP8BORDERINPIXELS))); + vp8_zero_array(pbi->mt_vabove_row[i], uv_width + VP8BORDERINPIXELS); + } /* Allocate memory for left_col buffers. */ CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows); @@ -812,7 +838,7 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi) { } } -void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd) { +int vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd) { VP8_COMMON *pc = &pbi->common; unsigned int i; int j; @@ -858,7 +884,22 @@ void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd) { sem_post(&pbi->h_event_start_decoding[i]); } + if (setjmp(xd->error_info.jmp)) { + xd->error_info.setjmp = 0; + xd->corrupted = 1; + // Wait for other threads to finish. This prevents other threads decoding + // the current frame while the main thread starts decoding the next frame, + // which causes a data race. + for (i = 0; i < pbi->decoding_thread_count; ++i) + sem_wait(&pbi->h_event_end_decoding); + return -1; + } + + xd->error_info.setjmp = 1; mt_decode_mb_rows(pbi, xd, 0); - sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ + for (i = 0; i < pbi->decoding_thread_count + 1; ++i) + sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ + + return 0; } diff --git a/media/libvpx/libvpx/vp8/decoder/treereader.h b/media/libvpx/libvpx/vp8/decoder/treereader.h index dd0f0986e970..4bf938a741dc 100644 --- a/media/libvpx/libvpx/vp8/decoder/treereader.h +++ b/media/libvpx/libvpx/vp8/decoder/treereader.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_DECODER_TREEREADER_H_ -#define VP8_DECODER_TREEREADER_H_ +#ifndef VPX_VP8_DECODER_TREEREADER_H_ +#define VPX_VP8_DECODER_TREEREADER_H_ #include "./vpx_config.h" #include "vp8/common/treecoder.h" @@ -30,7 +30,7 @@ typedef BOOL_DECODER vp8_reader; static INLINE int vp8_treed_read( vp8_reader *const r, /* !!! must return a 0 or 1 !!! */ vp8_tree t, const vp8_prob *const p) { - register vp8_tree_index i = 0; + vp8_tree_index i = 0; while ((i = t[i + vp8_read(r, p[i >> 1])]) > 0) { } @@ -42,4 +42,4 @@ static INLINE int vp8_treed_read( } // extern "C" #endif -#endif // VP8_DECODER_TREEREADER_H_ +#endif // VPX_VP8_DECODER_TREEREADER_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.c b/media/libvpx/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.c index c42005df6c81..6fc60805f655 100644 --- a/media/libvpx/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.c +++ b/media/libvpx/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.c @@ -9,6 +9,8 @@ */ #include + +#include "./vp8_rtcd.h" #include "vp8/encoder/block.h" static const uint16_t inv_zig_zag[16] = { 1, 2, 6, 7, 3, 5, 8, 13, @@ -26,9 +28,11 @@ void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) { zig_zag1 = vld1q_u16(inv_zig_zag + 8); int16x8_t x0, x1, sz0, sz1, y0, y1; uint16x8_t eob0, eob1; +#ifndef __aarch64__ uint16x4_t eob_d16; uint32x2_t eob_d32; uint32x4_t eob_q32; +#endif // __arch64__ /* sign of z: z >> 15 */ sz0 = vshrq_n_s16(z0, 15); @@ -66,11 +70,17 @@ void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) { /* select the largest value */ eob0 = vmaxq_u16(eob0, eob1); +#ifdef __aarch64__ + *d->eob = (int8_t)vmaxvq_u16(eob0); +#else eob_d16 = vmax_u16(vget_low_u16(eob0), vget_high_u16(eob0)); eob_q32 = vmovl_u16(eob_d16); eob_d32 = vmax_u32(vget_low_u32(eob_q32), vget_high_u32(eob_q32)); eob_d32 = vpmax_u32(eob_d32, eob_d32); + vst1_lane_s8((int8_t *)d->eob, vreinterpret_s8_u32(eob_d32), 0); +#endif // __aarch64__ + /* qcoeff = x */ vst1q_s16(d->qcoeff, x0); vst1q_s16(d->qcoeff + 8, x1); @@ -78,6 +88,4 @@ void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) { /* dqcoeff = x * dequant */ vst1q_s16(d->dqcoeff, vmulq_s16(dequant0, x0)); vst1q_s16(d->dqcoeff + 8, vmulq_s16(dequant1, x1)); - - vst1_lane_s8((int8_t *)d->eob, vreinterpret_s8_u32(eob_d32), 0); } diff --git a/media/libvpx/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c b/media/libvpx/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c index 76853e65243f..99dff6b52090 100644 --- a/media/libvpx/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c +++ b/media/libvpx/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c @@ -10,6 +10,8 @@ #include +#include "./vp8_rtcd.h" + void vp8_short_fdct4x4_neon(int16_t *input, int16_t *output, int pitch) { int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; int16x4_t d16s16, d17s16, d26s16, dEmptys16; diff --git a/media/libvpx/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c b/media/libvpx/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c index 8d6ea4ccbe90..02056f2f9000 100644 --- a/media/libvpx/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c +++ b/media/libvpx/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c @@ -9,6 +9,8 @@ */ #include + +#include "./vp8_rtcd.h" #include "vpx_ports/arm.h" #ifdef VPX_INCOMPATIBLE_GCC diff --git a/media/libvpx/libvpx/vp8/encoder/bitstream.c b/media/libvpx/libvpx/vp8/encoder/bitstream.c index 8cacb6450557..3daa4e2c2a65 100644 --- a/media/libvpx/libvpx/vp8/encoder/bitstream.c +++ b/media/libvpx/libvpx/vp8/encoder/bitstream.c @@ -41,13 +41,6 @@ const int vp8cx_base_skip_false_prob[128] = { unsigned __int64 Sectionbits[500]; #endif -#ifdef VP8_ENTROPY_STATS -int intra_mode_stats[10][10][10]; -static unsigned int tree_update_hist[BLOCK_TYPES][COEF_BANDS] - [PREV_COEF_CONTEXTS][ENTROPY_NODES][2]; -extern unsigned int active_section; -#endif - #ifdef MODE_STATS int count_mb_seg[4] = { 0, 0, 0, 0 }; #endif @@ -178,7 +171,7 @@ void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount) { validate_buffer(w->buffer + w->pos, 1, w->buffer_end, w->error); - w->buffer[w->pos++] = (lowvalue >> (24 - offset)); + w->buffer[w->pos++] = (lowvalue >> (24 - offset)) & 0xff; lowvalue <<= offset; shift = count; lowvalue &= 0xffffff; @@ -428,10 +421,6 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) { vp8_convert_rfct_to_prob(cpi); -#ifdef VP8_ENTROPY_STATS - active_section = 1; -#endif - if (pc->mb_no_coeff_skip) { int total_mbs = pc->mb_rows * pc->mb_cols; @@ -472,10 +461,6 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) { xd->mb_to_top_edge = -((mb_row * 16) << 3); xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; -#ifdef VP8_ENTROPY_STATS - active_section = 9; -#endif - if (cpi->mb.e_mbd.update_mb_segmentation_map) { write_mb_features(w, mi, &cpi->mb.e_mbd); } @@ -486,9 +471,6 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) { if (rf == INTRA_FRAME) { vp8_write(w, 0, cpi->prob_intra_coded); -#ifdef VP8_ENTROPY_STATS - active_section = 6; -#endif write_ymode(w, mode, pc->fc.ymode_prob); if (mode == B_PRED) { @@ -522,28 +504,13 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) { vp8_clamp_mv2(&best_mv, xd); vp8_mv_ref_probs(mv_ref_p, ct); - -#ifdef VP8_ENTROPY_STATS - accum_mv_refs(mode, ct); -#endif } -#ifdef VP8_ENTROPY_STATS - active_section = 3; -#endif - write_mv_ref(w, mode, mv_ref_p); switch (mode) /* new, split require MVs */ { - case NEWMV: - -#ifdef VP8_ENTROPY_STATS - active_section = 5; -#endif - - write_mv(w, &mi->mv.as_mv, &best_mv, mvc); - break; + case NEWMV: write_mv(w, &mi->mv.as_mv, &best_mv, mvc); break; case SPLITMV: { int j = 0; @@ -574,9 +541,6 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) { write_sub_mv_ref(w, blockmode, vp8_sub_mv_ref_prob2[mv_contz]); if (blockmode == NEW4X4) { -#ifdef VP8_ENTROPY_STATS - active_section = 11; -#endif write_mv(w, &blockmv.as_mv, &best_mv, (const MV_CONTEXT *)mvc); } } while (++j < cpi->mb.partition_info->count); @@ -642,10 +606,6 @@ static void write_kfmodes(VP8_COMP *cpi) { const B_PREDICTION_MODE L = left_block_mode(m, i); const int bm = m->bmi[i].as_mode; -#ifdef VP8_ENTROPY_STATS - ++intra_mode_stats[A][L][bm]; -#endif - write_bmode(bc, bm, vp8_kf_bmode_prob[A][L]); } while (++i < 16); } @@ -973,10 +933,6 @@ void vp8_update_coef_probs(VP8_COMP *cpi) { vp8_write(w, u, upd); #endif -#ifdef VP8_ENTROPY_STATS - ++tree_update_hist[i][j][k][t][u]; -#endif - if (u) { /* send/use new probability */ @@ -990,16 +946,6 @@ void vp8_update_coef_probs(VP8_COMP *cpi) { } while (++t < ENTROPY_NODES); -/* Accum token counts for generation of default statistics */ -#ifdef VP8_ENTROPY_STATS - t = 0; - - do { - context_counters[i][j][k][t] += cpi->coef_counts[i][j][k][t]; - } while (++t < MAX_ENTROPY_TOKENS); - -#endif - } while (++k < PREV_COEF_CONTEXTS); } while (++j < COEF_BANDS); } while (++i < BLOCK_TYPES); @@ -1097,12 +1043,18 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, cx_data[1] = 0x01; cx_data[2] = 0x2a; + /* Pack scale and frame size into 16 bits. Store it 8 bits at a time. + * https://tools.ietf.org/html/rfc6386 + * 9.1. Uncompressed Data Chunk + * 16 bits : (2 bits Horizontal Scale << 14) | Width (14 bits) + * 16 bits : (2 bits Vertical Scale << 14) | Height (14 bits) + */ v = (pc->horiz_scale << 14) | pc->Width; - cx_data[3] = v; + cx_data[3] = v & 0xff; cx_data[4] = v >> 8; v = (pc->vert_scale << 14) | pc->Height; - cx_data[5] = v; + cx_data[5] = v & 0xff; cx_data[6] = v >> 8; extra_bytes_packed = 7; @@ -1286,15 +1238,6 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, if (pc->frame_type != KEY_FRAME) vp8_write_bit(bc, pc->refresh_last_frame); -#ifdef VP8_ENTROPY_STATS - - if (pc->frame_type == INTER_FRAME) - active_section = 0; - else - active_section = 7; - -#endif - vpx_clear_system_state(); #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING @@ -1308,25 +1251,13 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, vp8_update_coef_probs(cpi); #endif -#ifdef VP8_ENTROPY_STATS - active_section = 2; -#endif - /* Write out the mb_no_coeff_skip flag */ vp8_write_bit(bc, pc->mb_no_coeff_skip); if (pc->frame_type == KEY_FRAME) { write_kfmodes(cpi); - -#ifdef VP8_ENTROPY_STATS - active_section = 8; -#endif } else { pack_inter_mode_mvs(cpi); - -#ifdef VP8_ENTROPY_STATS - active_section = 1; -#endif } vp8_stop_encode(bc); @@ -1337,11 +1268,30 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, /* update frame tag */ { + /* Pack partition size, show frame, version and frame type into to 24 bits. + * Store it 8 bits at a time. + * https://tools.ietf.org/html/rfc6386 + * 9.1. Uncompressed Data Chunk + * The uncompressed data chunk comprises a common (for key frames and + * interframes) 3-byte frame tag that contains four fields, as follows: + * + * 1. A 1-bit frame type (0 for key frames, 1 for interframes). + * + * 2. A 3-bit version number (0 - 3 are defined as four different + * profiles with different decoding complexity; other values may be + * defined for future variants of the VP8 data format). + * + * 3. A 1-bit show_frame flag (0 when current frame is not for display, + * 1 when current frame is for display). + * + * 4. A 19-bit field containing the size of the first data partition in + * bytes + */ int v = (oh.first_partition_length_in_bytes << 5) | (oh.show_frame << 4) | (oh.version << 1) | oh.type; - dest[0] = v; - dest[1] = v >> 8; + dest[0] = v & 0xff; + dest[1] = (v >> 8) & 0xff; dest[2] = v >> 16; } @@ -1431,50 +1381,3 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, } #endif } - -#ifdef VP8_ENTROPY_STATS -void print_tree_update_probs() { - int i, j, k, l; - FILE *f = fopen("context.c", "a"); - int Sum; - fprintf(f, "\n/* Update probabilities for token entropy tree. */\n\n"); - fprintf(f, - "const vp8_prob tree_update_probs[BLOCK_TYPES] [COEF_BANDS] " - "[PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {\n"); - - for (i = 0; i < BLOCK_TYPES; ++i) { - fprintf(f, " { \n"); - - for (j = 0; j < COEF_BANDS; ++j) { - fprintf(f, " {\n"); - - for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { - fprintf(f, " {"); - - for (l = 0; l < ENTROPY_NODES; ++l) { - Sum = - tree_update_hist[i][j][k][l][0] + tree_update_hist[i][j][k][l][1]; - - if (Sum > 0) { - if (((tree_update_hist[i][j][k][l][0] * 255) / Sum) > 0) - fprintf(f, "%3ld, ", - (tree_update_hist[i][j][k][l][0] * 255) / Sum); - else - fprintf(f, "%3ld, ", 1); - } else - fprintf(f, "%3ld, ", 128); - } - - fprintf(f, "},\n"); - } - - fprintf(f, " },\n"); - } - - fprintf(f, " },\n"); - } - - fprintf(f, "};\n"); - fclose(f); -} -#endif diff --git a/media/libvpx/libvpx/vp8/encoder/bitstream.h b/media/libvpx/libvpx/vp8/encoder/bitstream.h index ed45bff9e209..ee3f3e4aab8f 100644 --- a/media/libvpx/libvpx/vp8/encoder/bitstream.h +++ b/media/libvpx/libvpx/vp8/encoder/bitstream.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_BITSTREAM_H_ -#define VP8_ENCODER_BITSTREAM_H_ +#ifndef VPX_VP8_ENCODER_BITSTREAM_H_ +#define VPX_VP8_ENCODER_BITSTREAM_H_ #ifdef __cplusplus extern "C" { @@ -29,4 +29,4 @@ void vp8_update_coef_probs(struct VP8_COMP *cpi); } // extern "C" #endif -#endif // VP8_ENCODER_BITSTREAM_H_ +#endif // VPX_VP8_ENCODER_BITSTREAM_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/block.h b/media/libvpx/libvpx/vp8/encoder/block.h index f9a273bd2749..f0efd3e1e255 100644 --- a/media/libvpx/libvpx/vp8/encoder/block.h +++ b/media/libvpx/libvpx/vp8/encoder/block.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_BLOCK_H_ -#define VP8_ENCODER_BLOCK_H_ +#ifndef VPX_VP8_ENCODER_BLOCK_H_ +#define VPX_VP8_ENCODER_BLOCK_H_ #include "vp8/common/onyx.h" #include "vp8/common/blockd.h" @@ -166,4 +166,4 @@ typedef struct macroblock { } // extern "C" #endif -#endif // VP8_ENCODER_BLOCK_H_ +#endif // VPX_VP8_ENCODER_BLOCK_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/boolhuff.c b/media/libvpx/libvpx/vp8/encoder/boolhuff.c index 04f8db933112..819c2f22a0f2 100644 --- a/media/libvpx/libvpx/vp8/encoder/boolhuff.c +++ b/media/libvpx/libvpx/vp8/encoder/boolhuff.c @@ -15,10 +15,6 @@ unsigned __int64 Sectionbits[500]; #endif -#ifdef VP8_ENTROPY_STATS -unsigned int active_section = 0; -#endif - const unsigned int vp8_prob_cost[256] = { 2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, 1129, 1099, 1072, 1046, 1023, 1000, 979, 959, 940, 922, 905, 889, 873, 858, @@ -42,26 +38,26 @@ const unsigned int vp8_prob_cost[256] = { 12, 10, 9, 7, 6, 4, 3, 1, 1 }; -void vp8_start_encode(BOOL_CODER *br, unsigned char *source, +void vp8_start_encode(BOOL_CODER *bc, unsigned char *source, unsigned char *source_end) { - br->lowvalue = 0; - br->range = 255; - br->count = -24; - br->buffer = source; - br->buffer_end = source_end; - br->pos = 0; + bc->lowvalue = 0; + bc->range = 255; + bc->count = -24; + bc->buffer = source; + bc->buffer_end = source_end; + bc->pos = 0; } -void vp8_stop_encode(BOOL_CODER *br) { +void vp8_stop_encode(BOOL_CODER *bc) { int i; - for (i = 0; i < 32; ++i) vp8_encode_bool(br, 0, 128); + for (i = 0; i < 32; ++i) vp8_encode_bool(bc, 0, 128); } -void vp8_encode_value(BOOL_CODER *br, int data, int bits) { +void vp8_encode_value(BOOL_CODER *bc, int data, int bits) { int bit; for (bit = bits - 1; bit >= 0; bit--) { - vp8_encode_bool(br, (1 & (data >> bit)), 0x80); + vp8_encode_bool(bc, (1 & (data >> bit)), 0x80); } } diff --git a/media/libvpx/libvpx/vp8/encoder/boolhuff.h b/media/libvpx/libvpx/vp8/encoder/boolhuff.h index d001eea9cd8c..8cc61bdd44e2 100644 --- a/media/libvpx/libvpx/vp8/encoder/boolhuff.h +++ b/media/libvpx/libvpx/vp8/encoder/boolhuff.h @@ -9,14 +9,14 @@ */ /**************************************************************************** -* -* Module Title : boolhuff.h -* -* Description : Bool Coder header file. -* -****************************************************************************/ -#ifndef VP8_ENCODER_BOOLHUFF_H_ -#define VP8_ENCODER_BOOLHUFF_H_ + * + * Module Title : boolhuff.h + * + * Description : Bool Coder header file. + * + ****************************************************************************/ +#ifndef VPX_VP8_ENCODER_BOOLHUFF_H_ +#define VPX_VP8_ENCODER_BOOLHUFF_H_ #include "vpx_ports/mem.h" #include "vpx/internal/vpx_codec_internal.h" @@ -35,11 +35,11 @@ typedef struct { struct vpx_internal_error_info *error; } BOOL_CODER; -extern void vp8_start_encode(BOOL_CODER *bc, unsigned char *buffer, - unsigned char *buffer_end); +void vp8_start_encode(BOOL_CODER *bc, unsigned char *source, + unsigned char *source_end); -extern void vp8_encode_value(BOOL_CODER *br, int data, int bits); -extern void vp8_stop_encode(BOOL_CODER *bc); +void vp8_encode_value(BOOL_CODER *bc, int data, int bits); +void vp8_stop_encode(BOOL_CODER *bc); extern const unsigned int vp8_prob_cost[256]; DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); @@ -56,23 +56,12 @@ static int validate_buffer(const unsigned char *start, size_t len, return 0; } -static void vp8_encode_bool(BOOL_CODER *br, int bit, int probability) { +static void vp8_encode_bool(BOOL_CODER *bc, int bit, int probability) { unsigned int split; - int count = br->count; - unsigned int range = br->range; - unsigned int lowvalue = br->lowvalue; - register int shift; - -#ifdef VP8_ENTROPY_STATS -#if defined(SECTIONBITS_OUTPUT) - - if (bit) - Sectionbits[active_section] += vp8_prob_cost[255 - probability]; - else - Sectionbits[active_section] += vp8_prob_cost[probability]; - -#endif -#endif + int count = bc->count; + unsigned int range = bc->range; + unsigned int lowvalue = bc->lowvalue; + int shift; split = 1 + (((range - 1) * probability) >> 8); @@ -80,7 +69,7 @@ static void vp8_encode_bool(BOOL_CODER *br, int bit, int probability) { if (bit) { lowvalue += split; - range = br->range - split; + range = bc->range - split; } shift = vp8_norm[range]; @@ -92,18 +81,18 @@ static void vp8_encode_bool(BOOL_CODER *br, int bit, int probability) { int offset = shift - count; if ((lowvalue << (offset - 1)) & 0x80000000) { - int x = br->pos - 1; + int x = bc->pos - 1; - while (x >= 0 && br->buffer[x] == 0xff) { - br->buffer[x] = (unsigned char)0; + while (x >= 0 && bc->buffer[x] == 0xff) { + bc->buffer[x] = (unsigned char)0; x--; } - br->buffer[x] += 1; + bc->buffer[x] += 1; } - validate_buffer(br->buffer + br->pos, 1, br->buffer_end, br->error); - br->buffer[br->pos++] = (lowvalue >> (24 - offset)); + validate_buffer(bc->buffer + bc->pos, 1, bc->buffer_end, bc->error); + bc->buffer[bc->pos++] = (lowvalue >> (24 - offset) & 0xff); lowvalue <<= offset; shift = count; @@ -112,13 +101,13 @@ static void vp8_encode_bool(BOOL_CODER *br, int bit, int probability) { } lowvalue <<= shift; - br->count = count; - br->lowvalue = lowvalue; - br->range = range; + bc->count = count; + bc->lowvalue = lowvalue; + bc->range = range; } #ifdef __cplusplus } // extern "C" #endif -#endif // VP8_ENCODER_BOOLHUFF_H_ +#endif // VPX_VP8_ENCODER_BOOLHUFF_H_ diff --git a/media/libvpx/libvpx/vp8/common/copy_c.c b/media/libvpx/libvpx/vp8/encoder/copy_c.c similarity index 100% rename from media/libvpx/libvpx/vp8/common/copy_c.c rename to media/libvpx/libvpx/vp8/encoder/copy_c.c diff --git a/media/libvpx/libvpx/vp8/encoder/dct_value_cost.h b/media/libvpx/libvpx/vp8/encoder/dct_value_cost.h index 278dce73f40d..0cd6cb4e65b8 100644 --- a/media/libvpx/libvpx/vp8/encoder/dct_value_cost.h +++ b/media/libvpx/libvpx/vp8/encoder/dct_value_cost.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_DCT_VALUE_COST_H_ -#define VP8_ENCODER_DCT_VALUE_COST_H_ +#ifndef VPX_VP8_ENCODER_DCT_VALUE_COST_H_ +#define VPX_VP8_ENCODER_DCT_VALUE_COST_H_ #ifdef __cplusplus extern "C" { @@ -341,4 +341,4 @@ static const short dct_value_cost[2048 * 2] = { } // extern "C" #endif -#endif // VP8_ENCODER_DCT_VALUE_COST_H_ +#endif // VPX_VP8_ENCODER_DCT_VALUE_COST_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/dct_value_tokens.h b/media/libvpx/libvpx/vp8/encoder/dct_value_tokens.h index 0597deab2d4d..5cc4505f09a0 100644 --- a/media/libvpx/libvpx/vp8/encoder/dct_value_tokens.h +++ b/media/libvpx/libvpx/vp8/encoder/dct_value_tokens.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_DCT_VALUE_TOKENS_H_ -#define VP8_ENCODER_DCT_VALUE_TOKENS_H_ +#ifndef VPX_VP8_ENCODER_DCT_VALUE_TOKENS_H_ +#define VPX_VP8_ENCODER_DCT_VALUE_TOKENS_H_ #ifdef __cplusplus extern "C" { @@ -845,4 +845,4 @@ static const TOKENVALUE dct_value_tokens[2048 * 2] = { } // extern "C" #endif -#endif // VP8_ENCODER_DCT_VALUE_TOKENS_H_ +#endif // VPX_VP8_ENCODER_DCT_VALUE_TOKENS_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/defaultcoefcounts.h b/media/libvpx/libvpx/vp8/encoder/defaultcoefcounts.h index 2976325dc570..a3ab34c8a040 100644 --- a/media/libvpx/libvpx/vp8/encoder/defaultcoefcounts.h +++ b/media/libvpx/libvpx/vp8/encoder/defaultcoefcounts.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_DEFAULTCOEFCOUNTS_H_ -#define VP8_ENCODER_DEFAULTCOEFCOUNTS_H_ +#ifndef VPX_VP8_ENCODER_DEFAULTCOEFCOUNTS_H_ +#define VPX_VP8_ENCODER_DEFAULTCOEFCOUNTS_H_ #ifdef __cplusplus extern "C" { @@ -232,4 +232,4 @@ static const unsigned int default_coef_counts } // extern "C" #endif -#endif // VP8_ENCODER_DEFAULTCOEFCOUNTS_H_ +#endif // VPX_VP8_ENCODER_DEFAULTCOEFCOUNTS_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/denoising.c b/media/libvpx/libvpx/vp8/encoder/denoising.c index eb963b97e369..e54d1e9f4bdd 100644 --- a/media/libvpx/libvpx/vp8/encoder/denoising.c +++ b/media/libvpx/libvpx/vp8/encoder/denoising.c @@ -213,13 +213,12 @@ int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, return FILTER_BLOCK; } -int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg_uv, - int mc_avg_uv_stride, - unsigned char *running_avg_uv, int avg_uv_stride, +int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, + unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising) { - unsigned char *running_avg_uv_start = running_avg_uv; + unsigned char *running_avg_start = running_avg; unsigned char *sig_start = sig; int sum_diff_thresh; int r, c; @@ -259,13 +258,13 @@ int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg_uv, int adjustment = 0; int absdiff = 0; - diff = mc_running_avg_uv[c] - sig[c]; + diff = mc_running_avg[c] - sig[c]; absdiff = abs(diff); // When |diff| <= |3 + shift_inc1|, use pixel value from // last denoised raw. if (absdiff <= 3 + shift_inc1) { - running_avg_uv[c] = mc_running_avg_uv[c]; + running_avg[c] = mc_running_avg[c]; sum_diff += diff; } else { if (absdiff >= 4 && absdiff <= 7) { @@ -277,16 +276,16 @@ int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg_uv, } if (diff > 0) { if ((sig[c] + adjustment) > 255) { - running_avg_uv[c] = 255; + running_avg[c] = 255; } else { - running_avg_uv[c] = sig[c] + adjustment; + running_avg[c] = sig[c] + adjustment; } sum_diff += adjustment; } else { if ((sig[c] - adjustment) < 0) { - running_avg_uv[c] = 0; + running_avg[c] = 0; } else { - running_avg_uv[c] = sig[c] - adjustment; + running_avg[c] = sig[c] - adjustment; } sum_diff -= adjustment; } @@ -294,8 +293,8 @@ int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg_uv, } /* Update pointers for next iteration. */ sig += sig_stride; - mc_running_avg_uv += mc_avg_uv_stride; - running_avg_uv += avg_uv_stride; + mc_running_avg += mc_avg_stride; + running_avg += avg_stride; } sum_diff_thresh = SUM_DIFF_THRESHOLD_UV; @@ -314,27 +313,27 @@ int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg_uv, // Only apply the adjustment for max delta up to 3. if (delta < 4) { sig -= sig_stride * 8; - mc_running_avg_uv -= mc_avg_uv_stride * 8; - running_avg_uv -= avg_uv_stride * 8; + mc_running_avg -= mc_avg_stride * 8; + running_avg -= avg_stride * 8; for (r = 0; r < 8; ++r) { for (c = 0; c < 8; ++c) { - int diff = mc_running_avg_uv[c] - sig[c]; + int diff = mc_running_avg[c] - sig[c]; int adjustment = abs(diff); if (adjustment > delta) adjustment = delta; if (diff > 0) { // Bring denoised signal down. - if (running_avg_uv[c] - adjustment < 0) { - running_avg_uv[c] = 0; + if (running_avg[c] - adjustment < 0) { + running_avg[c] = 0; } else { - running_avg_uv[c] = running_avg_uv[c] - adjustment; + running_avg[c] = running_avg[c] - adjustment; } sum_diff -= adjustment; } else if (diff < 0) { // Bring denoised signal up. - if (running_avg_uv[c] + adjustment > 255) { - running_avg_uv[c] = 255; + if (running_avg[c] + adjustment > 255) { + running_avg[c] = 255; } else { - running_avg_uv[c] = running_avg_uv[c] + adjustment; + running_avg[c] = running_avg[c] + adjustment; } sum_diff += adjustment; } @@ -342,8 +341,8 @@ int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg_uv, // TODO(marpan): Check here if abs(sum_diff) has gone below the // threshold sum_diff_thresh, and if so, we can exit the row loop. sig += sig_stride; - mc_running_avg_uv += mc_avg_uv_stride; - running_avg_uv += avg_uv_stride; + mc_running_avg += mc_avg_stride; + running_avg += avg_stride; } if (abs(sum_diff) > sum_diff_thresh) return COPY_BLOCK; } else { @@ -351,7 +350,7 @@ int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg_uv, } } - vp8_copy_mem8x8(running_avg_uv_start, avg_uv_stride, sig_start, sig_stride); + vp8_copy_mem8x8(running_avg_start, avg_stride, sig_start, sig_stride); return FILTER_BLOCK; } diff --git a/media/libvpx/libvpx/vp8/encoder/denoising.h b/media/libvpx/libvpx/vp8/encoder/denoising.h index 91d87b3a1cd3..51ae3b0ab336 100644 --- a/media/libvpx/libvpx/vp8/encoder/denoising.h +++ b/media/libvpx/libvpx/vp8/encoder/denoising.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_DENOISING_H_ -#define VP8_ENCODER_DENOISING_H_ +#ifndef VPX_VP8_ENCODER_DENOISING_H_ +#define VPX_VP8_ENCODER_DENOISING_H_ #include "block.h" #include "vp8/common/loopfilter.h" @@ -100,4 +100,4 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, MACROBLOCK *x, } // extern "C" #endif -#endif // VP8_ENCODER_DENOISING_H_ +#endif // VPX_VP8_ENCODER_DENOISING_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/encodeframe.c b/media/libvpx/libvpx/vp8/encoder/encodeframe.c index 9bb0df72d521..2b3d9564ce20 100644 --- a/media/libvpx/libvpx/vp8/encoder/encodeframe.c +++ b/media/libvpx/libvpx/vp8/encoder/encodeframe.c @@ -64,9 +64,9 @@ unsigned int b_modes[14] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; * Eventually this should be replaced by custom no-reference routines, * which will be faster. */ -static const unsigned char VP8_VAR_OFFS[16] = { - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 -}; +static const unsigned char VP8_VAR_OFFS[16] = { 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128 }; /* Original activity measure from Tim T's code. */ static unsigned int tt_activity_measure(VP8_COMP *cpi, MACROBLOCK *x) { diff --git a/media/libvpx/libvpx/vp8/encoder/encodeframe.h b/media/libvpx/libvpx/vp8/encoder/encodeframe.h index 5274aba41205..cc8cf4d713d7 100644 --- a/media/libvpx/libvpx/vp8/encoder/encodeframe.h +++ b/media/libvpx/libvpx/vp8/encoder/encodeframe.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_ENCODEFRAME_H_ -#define VP8_ENCODER_ENCODEFRAME_H_ +#ifndef VPX_VP8_ENCODER_ENCODEFRAME_H_ +#define VPX_VP8_ENCODER_ENCODEFRAME_H_ #include "vp8/encoder/tokenize.h" @@ -37,4 +37,4 @@ int vp8cx_encode_intra_macroblock(struct VP8_COMP *cpi, struct macroblock *x, } // extern "C" #endif -#endif // VP8_ENCODER_ENCODEFRAME_H_ +#endif // VPX_VP8_ENCODER_ENCODEFRAME_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/encodeintra.h b/media/libvpx/libvpx/vp8/encoder/encodeintra.h index 3956cf5fb1e6..021dc5ed76be 100644 --- a/media/libvpx/libvpx/vp8/encoder/encodeintra.h +++ b/media/libvpx/libvpx/vp8/encoder/encodeintra.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_ENCODEINTRA_H_ -#define VP8_ENCODER_ENCODEINTRA_H_ +#ifndef VPX_VP8_ENCODER_ENCODEINTRA_H_ +#define VPX_VP8_ENCODER_ENCODEINTRA_H_ #include "onyx_int.h" #ifdef __cplusplus @@ -25,4 +25,4 @@ void vp8_encode_intra4x4block(MACROBLOCK *x, int ib); } // extern "C" #endif -#endif // VP8_ENCODER_ENCODEINTRA_H_ +#endif // VPX_VP8_ENCODER_ENCODEINTRA_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/encodemb.h b/media/libvpx/libvpx/vp8/encoder/encodemb.h index b55ba3ac3f1c..db577ddc108f 100644 --- a/media/libvpx/libvpx/vp8/encoder/encodemb.h +++ b/media/libvpx/libvpx/vp8/encoder/encodemb.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_ENCODEMB_H_ -#define VP8_ENCODER_ENCODEMB_H_ +#ifndef VPX_VP8_ENCODER_ENCODEMB_H_ +#define VPX_VP8_ENCODER_ENCODEMB_H_ #include "onyx_int.h" @@ -37,4 +37,4 @@ void vp8_encode_inter16x16y(MACROBLOCK *x); } // extern "C" #endif -#endif // VP8_ENCODER_ENCODEMB_H_ +#endif // VPX_VP8_ENCODER_ENCODEMB_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/encodemv.c b/media/libvpx/libvpx/vp8/encoder/encodemv.c index ea93ccd71052..04adf105b9bc 100644 --- a/media/libvpx/libvpx/vp8/encoder/encodemv.c +++ b/media/libvpx/libvpx/vp8/encoder/encodemv.c @@ -16,10 +16,6 @@ #include -#ifdef VP8_ENTROPY_STATS -extern unsigned int active_section; -#endif - static void encode_mvcomponent(vp8_writer *const w, const int v, const struct mv_context *mvc) { const vp8_prob *p = mvc->prob; @@ -309,9 +305,6 @@ void vp8_write_mvprobs(VP8_COMP *cpi) { vp8_writer *const w = cpi->bc; MV_CONTEXT *mvc = cpi->common.fc.mvc; int flags[2] = { 0, 0 }; -#ifdef VP8_ENTROPY_STATS - active_section = 4; -#endif write_component_probs(w, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0], cpi->mb.MVcount[0], 0, &flags[0]); @@ -323,8 +316,4 @@ void vp8_write_mvprobs(VP8_COMP *cpi) { vp8_build_component_cost_table( cpi->mb.mvcost, (const MV_CONTEXT *)cpi->common.fc.mvc, flags); } - -#ifdef VP8_ENTROPY_STATS - active_section = 5; -#endif } diff --git a/media/libvpx/libvpx/vp8/encoder/encodemv.h b/media/libvpx/libvpx/vp8/encoder/encodemv.h index 87db30f31054..347b9feffeae 100644 --- a/media/libvpx/libvpx/vp8/encoder/encodemv.h +++ b/media/libvpx/libvpx/vp8/encoder/encodemv.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_ENCODEMV_H_ -#define VP8_ENCODER_ENCODEMV_H_ +#ifndef VPX_VP8_ENCODER_ENCODEMV_H_ +#define VPX_VP8_ENCODER_ENCODEMV_H_ #include "onyx_int.h" @@ -26,4 +26,4 @@ void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, } // extern "C" #endif -#endif // VP8_ENCODER_ENCODEMV_H_ +#endif // VPX_VP8_ENCODER_ENCODEMV_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/ethreading.h b/media/libvpx/libvpx/vp8/encoder/ethreading.h index 95bf73d182cb..598fe60559b8 100644 --- a/media/libvpx/libvpx/vp8/encoder/ethreading.h +++ b/media/libvpx/libvpx/vp8/encoder/ethreading.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_ETHREADING_H_ -#define VP8_ENCODER_ETHREADING_H_ +#ifndef VPX_VP8_ENCODER_ETHREADING_H_ +#define VPX_VP8_ENCODER_ETHREADING_H_ #include "vp8/encoder/onyx_int.h" @@ -29,4 +29,4 @@ void vp8cx_remove_encoder_threads(struct VP8_COMP *cpi); } #endif -#endif // VP8_ENCODER_ETHREADING_H_ +#endif // VPX_VP8_ENCODER_ETHREADING_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/firstpass.c b/media/libvpx/libvpx/vp8/encoder/firstpass.c index 70f924341039..981c0fde35fb 100644 --- a/media/libvpx/libvpx/vp8/encoder/firstpass.c +++ b/media/libvpx/libvpx/vp8/encoder/firstpass.c @@ -113,11 +113,9 @@ static int input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps) { return 1; } -static void output_stats(const VP8_COMP *cpi, - struct vpx_codec_pkt_list *pktlist, +static void output_stats(struct vpx_codec_pkt_list *pktlist, FIRSTPASS_STATS *stats) { struct vpx_codec_cx_pkt pkt; - (void)cpi; pkt.kind = VPX_CODEC_STATS_PKT; pkt.data.twopass_stats.buf = stats; pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS); @@ -371,11 +369,10 @@ void vp8_init_first_pass(VP8_COMP *cpi) { } void vp8_end_first_pass(VP8_COMP *cpi) { - output_stats(cpi, cpi->output_pkt_list, &cpi->twopass.total_stats); + output_stats(cpi->output_pkt_list, &cpi->twopass.total_stats); } -static void zz_motion_search(VP8_COMP *cpi, MACROBLOCK *x, - YV12_BUFFER_CONFIG *raw_buffer, +static void zz_motion_search(MACROBLOCK *x, YV12_BUFFER_CONFIG *raw_buffer, int *raw_motion_err, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset) { @@ -389,7 +386,6 @@ static void zz_motion_search(VP8_COMP *cpi, MACROBLOCK *x, int raw_stride = raw_buffer->y_stride; unsigned char *ref_ptr; int ref_stride = x->e_mbd.pre.y_stride; - (void)cpi; /* Set up pointers for this macro block raw buffer */ raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset + d->offset); @@ -603,9 +599,8 @@ void vp8_first_pass(VP8_COMP *cpi) { int raw_motion_error = INT_MAX; /* Simple 0,0 motion with no mv overhead */ - zz_motion_search(cpi, x, cpi->last_frame_unscaled_source, - &raw_motion_error, lst_yv12, &motion_error, - recon_yoffset); + zz_motion_search(x, cpi->last_frame_unscaled_source, &raw_motion_error, + lst_yv12, &motion_error, recon_yoffset); d->bmi.mv.as_mv.row = 0; d->bmi.mv.as_mv.col = 0; @@ -798,7 +793,7 @@ void vp8_first_pass(VP8_COMP *cpi) { /* don't want to do output stats with a stack variable! */ memcpy(&cpi->twopass.this_frame_stats, &fps, sizeof(FIRSTPASS_STATS)); - output_stats(cpi, cpi->output_pkt_list, &cpi->twopass.this_frame_stats); + output_stats(cpi->output_pkt_list, &cpi->twopass.this_frame_stats); accumulate_stats(&cpi->twopass.total_stats, &fps); } @@ -989,11 +984,11 @@ static int estimate_max_q(VP8_COMP *cpi, FIRSTPASS_STATS *fpstats, bits_per_mb_at_this_q = vp8_bits_per_mb[INTER_FRAME][Q] + overhead_bits_per_mb; - bits_per_mb_at_this_q = (int)(.5 + - err_correction_factor * speed_correction * - cpi->twopass.est_max_qcorrection_factor * - cpi->twopass.section_max_qfactor * - (double)bits_per_mb_at_this_q); + bits_per_mb_at_this_q = + (int)(.5 + err_correction_factor * speed_correction * + cpi->twopass.est_max_qcorrection_factor * + cpi->twopass.section_max_qfactor * + (double)bits_per_mb_at_this_q); /* Mode and motion overhead */ /* As Q rises in real encode loop rd code will force overhead down @@ -1086,9 +1081,8 @@ static int estimate_cq(VP8_COMP *cpi, FIRSTPASS_STATS *fpstats, vp8_bits_per_mb[INTER_FRAME][Q] + overhead_bits_per_mb; bits_per_mb_at_this_q = - (int)(.5 + - err_correction_factor * speed_correction * clip_iifactor * - (double)bits_per_mb_at_this_q); + (int)(.5 + err_correction_factor * speed_correction * clip_iifactor * + (double)bits_per_mb_at_this_q); /* Mode and motion overhead */ /* As Q rises in real encode loop rd code will force overhead down @@ -1273,9 +1267,8 @@ void vp8_init_second_pass(VP8_COMP *cpi) { * sum duration is not. Its calculated based on the actual durations of * all frames from the first pass. */ - vp8_new_framerate(cpi, - 10000000.0 * cpi->twopass.total_stats.count / - cpi->twopass.total_stats.duration); + vp8_new_framerate(cpi, 10000000.0 * cpi->twopass.total_stats.count / + cpi->twopass.total_stats.duration); cpi->output_framerate = cpi->framerate; cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * @@ -1339,12 +1332,10 @@ void vp8_end_second_pass(VP8_COMP *cpi) { (void)cpi; } /* This function gives and estimate of how badly we believe the prediction * quality is decaying from frame to frame. */ -static double get_prediction_decay_rate(VP8_COMP *cpi, - FIRSTPASS_STATS *next_frame) { +static double get_prediction_decay_rate(FIRSTPASS_STATS *next_frame) { double prediction_decay_rate; double motion_decay; double motion_pct = next_frame->pcnt_motion; - (void)cpi; /* Initial basis is the % mbs inter coded */ prediction_decay_rate = next_frame->pcnt_inter; @@ -1401,7 +1392,7 @@ static int detect_transition_to_still(VP8_COMP *cpi, int frame_interval, for (j = 0; j < still_interval; ++j) { if (EOF == input_stats(cpi, &tmp_next_frame)) break; - decay_rate = get_prediction_decay_rate(cpi, &tmp_next_frame); + decay_rate = get_prediction_decay_rate(&tmp_next_frame); if (decay_rate < 0.999) break; } /* Reset file position */ @@ -1452,8 +1443,7 @@ static int detect_flash(VP8_COMP *cpi, int offset) { } /* Update the motion related elements to the GF arf boost calculation */ -static void accumulate_frame_motion_stats(VP8_COMP *cpi, - FIRSTPASS_STATS *this_frame, +static void accumulate_frame_motion_stats(FIRSTPASS_STATS *this_frame, double *this_frame_mv_in_out, double *mv_in_out_accumulator, double *abs_mv_in_out_accumulator, @@ -1461,7 +1451,6 @@ static void accumulate_frame_motion_stats(VP8_COMP *cpi, double this_frame_mvr_ratio; double this_frame_mvc_ratio; double motion_pct; - (void)cpi; /* Accumulate motion stats. */ motion_pct = this_frame->pcnt_motion; @@ -1544,7 +1533,7 @@ static int calc_arf_boost(VP8_COMP *cpi, int offset, int f_frames, int b_frames, /* Update the motion related elements to the boost calculation */ accumulate_frame_motion_stats( - cpi, &this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, + &this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); /* Calculate the baseline boost number for this frame */ @@ -1559,7 +1548,7 @@ static int calc_arf_boost(VP8_COMP *cpi, int offset, int f_frames, int b_frames, /* Cumulative effect of prediction quality decay */ if (!flash_detected) { decay_accumulator = - decay_accumulator * get_prediction_decay_rate(cpi, &this_frame); + decay_accumulator * get_prediction_decay_rate(&this_frame); decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator; } boost_score += (decay_accumulator * r); @@ -1588,7 +1577,7 @@ static int calc_arf_boost(VP8_COMP *cpi, int offset, int f_frames, int b_frames, /* Update the motion related elements to the boost calculation */ accumulate_frame_motion_stats( - cpi, &this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, + &this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); /* Calculate the baseline boost number for this frame */ @@ -1603,7 +1592,7 @@ static int calc_arf_boost(VP8_COMP *cpi, int offset, int f_frames, int b_frames, /* Cumulative effect of prediction quality decay */ if (!flash_detected) { decay_accumulator = - decay_accumulator * get_prediction_decay_rate(cpi, &this_frame); + decay_accumulator * get_prediction_decay_rate(&this_frame); decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator; } @@ -1705,7 +1694,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { /* Update the motion related elements to the boost calculation */ accumulate_frame_motion_stats( - cpi, &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, + &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); /* Calculate a baseline boost number for this frame */ @@ -1713,7 +1702,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { /* Cumulative effect of prediction quality decay */ if (!flash_detected) { - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); + loop_decay_rate = get_prediction_decay_rate(&next_frame); decay_accumulator = decay_accumulator * loop_decay_rate; decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator; } @@ -1739,10 +1728,11 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { /* Dont break out very close to a key frame */ ((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) && ((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) && - (!flash_detected) && ((mv_ratio_accumulator > 100.0) || - (abs_mv_in_out_accumulator > 3.0) || - (mv_in_out_accumulator < -2.0) || - ((boost_score - old_boost_score) < 2.0)))) { + (!flash_detected) && + ((mv_ratio_accumulator > 100.0) || + (abs_mv_in_out_accumulator > 3.0) || + (mv_in_out_accumulator < -2.0) || + ((boost_score - old_boost_score) < 2.0)))) { boost_score = old_boost_score; break; } @@ -1815,8 +1805,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { (next_frame.pcnt_inter > 0.75) && ((mv_in_out_accumulator / (double)i > -0.2) || (mv_in_out_accumulator > -2.0)) && - (cpi->gfu_boost > 100) && (cpi->twopass.gf_decay_rate <= - (ARF_DECAY_THRESH + (cpi->gfu_boost / 200)))) + (cpi->gfu_boost > 100) && + (cpi->twopass.gf_decay_rate <= + (ARF_DECAY_THRESH + (cpi->gfu_boost / 200)))) #endif { int Boost; @@ -2081,9 +2072,10 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { * score, otherwise it may be worse off than an "un-boosted" frame */ else { + // Avoid division by 0 by clamping cpi->twopass.kf_group_error_left to 1 int alt_gf_bits = (int)((double)cpi->twopass.kf_group_bits * mod_frame_err / - DOUBLE_DIVIDE_CHECK((double)cpi->twopass.kf_group_error_left)); + (double)VPXMAX(cpi->twopass.kf_group_error_left, 1)); if (alt_gf_bits > gf_bits) { gf_bits = alt_gf_bits; @@ -2599,7 +2591,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { } /* How fast is prediction quality decaying */ - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); + loop_decay_rate = get_prediction_decay_rate(&next_frame); /* We want to know something about the recent past... rather than * as used elsewhere where we are concened with decay in prediction @@ -2781,7 +2773,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (r > RMAX) r = RMAX; /* How fast is prediction quality decaying */ - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); + loop_decay_rate = get_prediction_decay_rate(&next_frame); decay_accumulator = decay_accumulator * loop_decay_rate; decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator; diff --git a/media/libvpx/libvpx/vp8/encoder/firstpass.h b/media/libvpx/libvpx/vp8/encoder/firstpass.h index ac8a7b1bfb69..f5490f1efff0 100644 --- a/media/libvpx/libvpx/vp8/encoder/firstpass.h +++ b/media/libvpx/libvpx/vp8/encoder/firstpass.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_FIRSTPASS_H_ -#define VP8_ENCODER_FIRSTPASS_H_ +#ifndef VPX_VP8_ENCODER_FIRSTPASS_H_ +#define VPX_VP8_ENCODER_FIRSTPASS_H_ #ifdef __cplusplus extern "C" { @@ -28,4 +28,4 @@ extern size_t vp8_firstpass_stats_sz(unsigned int mb_count); } // extern "C" #endif -#endif // VP8_ENCODER_FIRSTPASS_H_ +#endif // VPX_VP8_ENCODER_FIRSTPASS_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/lookahead.h b/media/libvpx/libvpx/vp8/encoder/lookahead.h index a67f226946a5..bf0401190b11 100644 --- a/media/libvpx/libvpx/vp8/encoder/lookahead.h +++ b/media/libvpx/libvpx/vp8/encoder/lookahead.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_LOOKAHEAD_H_ -#define VP8_ENCODER_LOOKAHEAD_H_ +#ifndef VPX_VP8_ENCODER_LOOKAHEAD_H_ +#define VPX_VP8_ENCODER_LOOKAHEAD_H_ #include "vpx_scale/yv12config.h" #include "vpx/vpx_integer.h" @@ -74,7 +74,7 @@ int vp8_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, struct lookahead_entry *vp8_lookahead_pop(struct lookahead_ctx *ctx, int drain); #define PEEK_FORWARD 1 -#define PEEK_BACKWARD -1 +#define PEEK_BACKWARD (-1) /**\brief Get a future source buffer to encode * * \param[in] ctx Pointer to the lookahead context @@ -96,4 +96,4 @@ unsigned int vp8_lookahead_depth(struct lookahead_ctx *ctx); } // extern "C" #endif -#endif // VP8_ENCODER_LOOKAHEAD_H_ +#endif // VPX_VP8_ENCODER_LOOKAHEAD_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/mcomp.c b/media/libvpx/libvpx/vp8/encoder/mcomp.c index 970120f3b26a..9e7f5c7acefe 100644 --- a/media/libvpx/libvpx/vp8/encoder/mcomp.c +++ b/media/libvpx/libvpx/vp8/encoder/mcomp.c @@ -21,11 +21,6 @@ #include "vp8/common/common.h" #include "vpx_dsp/vpx_dsp_common.h" -#ifdef VP8_ENTROPY_STATS -static int mv_ref_ct[31][4][2]; -static int mv_mode_cts[4][2]; -#endif - int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) { /* MV costing is based on the distribution of vectors in the previous * frame and as such will tend to over state the cost of vectors. In @@ -34,19 +29,22 @@ int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) { * NEAREST for subsequent blocks. The "Weight" parameter allows, to a * limited extent, for some account to be taken of these factors. */ - return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + - mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * - Weight) >> - 7; + const int mv_idx_row = + clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals); + const int mv_idx_col = + clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals); + return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * Weight) >> 7; } static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit) { /* Ignore mv costing if mvcost is NULL */ if (mvcost) { - return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + - mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * - error_per_bit + + const int mv_idx_row = + clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals); + const int mv_idx_col = + clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals); + return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * error_per_bit + 128) >> 8; } @@ -253,7 +251,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; -#if ARCH_X86 || ARCH_X86_64 +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 MACROBLOCKD *xd = &x->e_mbd; unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; @@ -382,7 +380,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; -#if ARCH_X86 || ARCH_X86_64 +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 MACROBLOCKD *xd = &x->e_mbd; unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; @@ -678,7 +676,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; -#if ARCH_X86 || ARCH_X86_64 +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 MACROBLOCKD *xd = &x->e_mbd; unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; @@ -839,7 +837,7 @@ static const MV next_chkpts[6][3] = { int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int_mv *best_mv, int search_param, int sad_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2], - int *mvcost[2], int_mv *center_mv) { + int_mv *center_mv) { MV hex[6] = { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } }; @@ -868,8 +866,6 @@ int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - (void)mvcost; - /* adjust ref_mv to make sure it is within MV range */ vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); @@ -1131,6 +1127,7 @@ int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } +#if HAVE_SSE2 || HAVE_MSA int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int_mv *best_mv, int search_param, int sad_per_bit, int *num00, vp8_variance_fn_ptr_t *fn_ptr, @@ -1279,6 +1276,7 @@ int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } +#endif // HAVE_SSE2 || HAVE_MSA int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int sad_per_bit, int distance, @@ -1366,6 +1364,7 @@ int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } +#if HAVE_SSSE3 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int sad_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], @@ -1484,7 +1483,9 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } +#endif // HAVE_SSSE3 +#if HAVE_SSE4_1 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int sad_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], @@ -1630,6 +1631,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } +#endif // HAVE_SSE4_1 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int error_per_bit, @@ -1709,6 +1711,7 @@ int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } +#if HAVE_SSE2 || HAVE_MSA int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int error_per_bit, int search_range, vp8_variance_fn_ptr_t *fn_ptr, @@ -1818,96 +1821,4 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } - -#ifdef VP8_ENTROPY_STATS -void print_mode_context(void) { - FILE *f = fopen("modecont.c", "w"); - int i, j; - - fprintf(f, "#include \"entropy.h\"\n"); - fprintf(f, "const int vp8_mode_contexts[6][4] =\n"); - fprintf(f, "{\n"); - - for (j = 0; j < 6; ++j) { - fprintf(f, " { /* %d */\n", j); - fprintf(f, " "); - - for (i = 0; i < 4; ++i) { - int overal_prob; - int this_prob; - int count; - - /* Overall probs */ - count = mv_mode_cts[i][0] + mv_mode_cts[i][1]; - - if (count) - overal_prob = 256 * mv_mode_cts[i][0] / count; - else - overal_prob = 128; - - if (overal_prob == 0) overal_prob = 1; - - /* context probs */ - count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1]; - - if (count) - this_prob = 256 * mv_ref_ct[j][i][0] / count; - else - this_prob = 128; - - if (this_prob == 0) this_prob = 1; - - fprintf(f, "%5d, ", this_prob); - } - - fprintf(f, " },\n"); - } - - fprintf(f, "};\n"); - fclose(f); -} - -/* MV ref count VP8_ENTROPY_STATS stats code */ -#ifdef VP8_ENTROPY_STATS -void init_mv_ref_counts() { - memset(mv_ref_ct, 0, sizeof(mv_ref_ct)); - memset(mv_mode_cts, 0, sizeof(mv_mode_cts)); -} - -void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) { - if (m == ZEROMV) { - ++mv_ref_ct[ct[0]][0][0]; - ++mv_mode_cts[0][0]; - } else { - ++mv_ref_ct[ct[0]][0][1]; - ++mv_mode_cts[0][1]; - - if (m == NEARESTMV) { - ++mv_ref_ct[ct[1]][1][0]; - ++mv_mode_cts[1][0]; - } else { - ++mv_ref_ct[ct[1]][1][1]; - ++mv_mode_cts[1][1]; - - if (m == NEARMV) { - ++mv_ref_ct[ct[2]][2][0]; - ++mv_mode_cts[2][0]; - } else { - ++mv_ref_ct[ct[2]][2][1]; - ++mv_mode_cts[2][1]; - - if (m == NEWMV) { - ++mv_ref_ct[ct[3]][3][0]; - ++mv_mode_cts[3][0]; - } else { - ++mv_ref_ct[ct[3]][3][1]; - ++mv_mode_cts[3][1]; - } - } - } - } -} - -#endif /* END MV ref count VP8_ENTROPY_STATS stats code */ - -#endif +#endif // HAVE_SSE2 || HAVE_MSA diff --git a/media/libvpx/libvpx/vp8/encoder/mcomp.h b/media/libvpx/libvpx/vp8/encoder/mcomp.h index b6228798ff36..57c18f523ffc 100644 --- a/media/libvpx/libvpx/vp8/encoder/mcomp.h +++ b/media/libvpx/libvpx/vp8/encoder/mcomp.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_MCOMP_H_ -#define VP8_ENCODER_MCOMP_H_ +#ifndef VPX_VP8_ENCODER_MCOMP_H_ +#define VPX_VP8_ENCODER_MCOMP_H_ #include "block.h" #include "vpx_dsp/variance.h" @@ -18,11 +18,6 @@ extern "C" { #endif -#ifdef VP8_ENTROPY_STATS -extern void init_mv_ref_counts(); -extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]); -#endif - /* The maximum number of steps in a step search given the largest allowed * initial step */ @@ -34,15 +29,14 @@ extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]); /* Maximum size of the first step in full pel units */ #define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS - 1)) -extern void print_mode_context(void); -extern int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight); -extern void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride); -extern void vp8_init3smotion_compensation(MACROBLOCK *x, int stride); +int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight); +void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride); +void vp8_init3smotion_compensation(MACROBLOCK *x, int stride); -extern int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, - int_mv *best_mv, int search_param, int error_per_bit, - const vp8_variance_fn_ptr_t *vf, int *mvsadcost[2], - int *mvcost[2], int_mv *center_mv); +int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, + int_mv *best_mv, int search_param, int sad_per_bit, + const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2], + int_mv *center_mv); typedef int(fractional_mv_step_fp)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv, @@ -51,10 +45,10 @@ typedef int(fractional_mv_step_fp)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int *mvcost[2], int *distortion, unsigned int *sse); -extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively; -extern fractional_mv_step_fp vp8_find_best_sub_pixel_step; -extern fractional_mv_step_fp vp8_find_best_half_pixel_step; -extern fractional_mv_step_fp vp8_skip_fractional_mv_step; +fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively; +fractional_mv_step_fp vp8_find_best_sub_pixel_step; +fractional_mv_step_fp vp8_find_best_half_pixel_step; +fractional_mv_step_fp vp8_skip_fractional_mv_step; typedef int (*vp8_full_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int sad_per_bit, @@ -78,4 +72,4 @@ typedef int (*vp8_diamond_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, } // extern "C" #endif -#endif // VP8_ENCODER_MCOMP_H_ +#endif // VPX_VP8_ENCODER_MCOMP_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/modecosts.h b/media/libvpx/libvpx/vp8/encoder/modecosts.h index dfb8989f7f91..09ee2b5520bb 100644 --- a/media/libvpx/libvpx/vp8/encoder/modecosts.h +++ b/media/libvpx/libvpx/vp8/encoder/modecosts.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_MODECOSTS_H_ -#define VP8_ENCODER_MODECOSTS_H_ +#ifndef VPX_VP8_ENCODER_MODECOSTS_H_ +#define VPX_VP8_ENCODER_MODECOSTS_H_ #ifdef __cplusplus extern "C" { @@ -17,10 +17,10 @@ extern "C" { struct VP8_COMP; -void vp8_init_mode_costs(struct VP8_COMP *x); +void vp8_init_mode_costs(struct VP8_COMP *c); #ifdef __cplusplus } // extern "C" #endif -#endif // VP8_ENCODER_MODECOSTS_H_ +#endif // VPX_VP8_ENCODER_MODECOSTS_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/mr_dissim.h b/media/libvpx/libvpx/vp8/encoder/mr_dissim.h index da36628afad9..58f5a9762302 100644 --- a/media/libvpx/libvpx/vp8/encoder/mr_dissim.h +++ b/media/libvpx/libvpx/vp8/encoder/mr_dissim.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_MR_DISSIM_H_ -#define VP8_ENCODER_MR_DISSIM_H_ +#ifndef VPX_VP8_ENCODER_MR_DISSIM_H_ +#define VPX_VP8_ENCODER_MR_DISSIM_H_ #include "vpx_config.h" #ifdef __cplusplus @@ -24,4 +24,4 @@ extern void vp8_store_drop_frame_info(VP8_COMP *cpi); } // extern "C" #endif -#endif // VP8_ENCODER_MR_DISSIM_H_ +#endif // VPX_VP8_ENCODER_MR_DISSIM_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/onyx_if.c b/media/libvpx/libvpx/vp8/encoder/onyx_if.c index 22431824256c..29c8cc66ccdb 100644 --- a/media/libvpx/libvpx/vp8/encoder/onyx_if.c +++ b/media/libvpx/libvpx/vp8/encoder/onyx_if.c @@ -38,7 +38,7 @@ #include "vpx_ports/system_state.h" #include "vpx_ports/vpx_timer.h" #include "vpx_util/vpx_write_yuv_frame.h" -#if ARCH_ARM +#if VPX_ARCH_ARM #include "vpx_ports/arm.h" #endif #if CONFIG_MULTI_RES_ENCODING @@ -62,12 +62,7 @@ extern int vp8_update_coef_context(VP8_COMP *cpi); #endif -extern void vp8_deblock_frame(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *post, int filt_lvl, - int low_var_thresh, int flag); -extern void print_parms(VP8_CONFIG *ocf, char *filenam); extern unsigned int vp8_get_processor_freq(); -extern void print_tree_update_probs(); int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest); @@ -101,10 +96,6 @@ extern int skip_true_count; extern int skip_false_count; #endif -#ifdef VP8_ENTROPY_STATS -extern int intra_mode_stats[10][10][10]; -#endif - #ifdef SPEEDSTATS unsigned int frames_at_speed[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; @@ -224,6 +215,8 @@ static void save_layer_context(VP8_COMP *cpi) { lc->frames_since_last_drop_overshoot = cpi->frames_since_last_drop_overshoot; lc->force_maxqp = cpi->force_maxqp; lc->last_frame_percent_intra = cpi->last_frame_percent_intra; + lc->last_q[0] = cpi->last_q[0]; + lc->last_q[1] = cpi->last_q[1]; memcpy(lc->count_mb_ref_frame_usage, cpi->mb.count_mb_ref_frame_usage, sizeof(cpi->mb.count_mb_ref_frame_usage)); @@ -261,6 +254,8 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer) { cpi->frames_since_last_drop_overshoot = lc->frames_since_last_drop_overshoot; cpi->force_maxqp = lc->force_maxqp; cpi->last_frame_percent_intra = lc->last_frame_percent_intra; + cpi->last_q[0] = lc->last_q[0]; + cpi->last_q[1] = lc->last_q[1]; memcpy(cpi->mb.count_mb_ref_frame_usage, lc->count_mb_ref_frame_usage, sizeof(cpi->mb.count_mb_ref_frame_usage)); @@ -689,8 +684,8 @@ static void set_default_lf_deltas(VP8_COMP *cpi) { /* Convenience macros for mapping speed and mode into a continuous * range */ -#define GOOD(x) (x + 1) -#define RT(x) (x + 7) +#define GOOD(x) ((x) + 1) +#define RT(x) ((x) + 7) static int speed_map(int speed, const int *map) { int res; @@ -743,9 +738,9 @@ static const int mode_check_freq_map_zn2[] = { 0, RT(10), 1 << 1, RT(11), 1 << 2, RT(12), 1 << 3, INT_MAX }; -static const int mode_check_freq_map_vhbpred[] = { - 0, GOOD(5), 2, RT(0), 0, RT(3), 2, RT(5), 4, INT_MAX -}; +static const int mode_check_freq_map_vhbpred[] = { 0, GOOD(5), 2, RT(0), + 0, RT(3), 2, RT(5), + 4, INT_MAX }; static const int mode_check_freq_map_near2[] = { 0, GOOD(5), 2, RT(0), 0, RT(3), 2, @@ -761,13 +756,13 @@ static const int mode_check_freq_map_new2[] = { 0, GOOD(5), 4, RT(0), 1 << 3, RT(11), 1 << 4, RT(12), 1 << 5, INT_MAX }; -static const int mode_check_freq_map_split1[] = { - 0, GOOD(2), 2, GOOD(3), 7, RT(1), 2, RT(2), 7, INT_MAX -}; +static const int mode_check_freq_map_split1[] = { 0, GOOD(2), 2, GOOD(3), + 7, RT(1), 2, RT(2), + 7, INT_MAX }; -static const int mode_check_freq_map_split2[] = { - 0, GOOD(1), 2, GOOD(2), 4, GOOD(3), 15, RT(1), 4, RT(2), 15, INT_MAX -}; +static const int mode_check_freq_map_split2[] = { 0, GOOD(1), 2, GOOD(2), + 4, GOOD(3), 15, RT(1), + 4, RT(2), 15, INT_MAX }; void vp8_set_speed_features(VP8_COMP *cpi) { SPEED_FEATURES *sf = &cpi->sf; @@ -1534,6 +1529,8 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) { } } + cpi->ext_refresh_frame_flags_pending = 0; + cpi->baseline_gf_interval = cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL; @@ -1893,10 +1890,6 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) { CHECK_MEM_ERROR(cpi->consec_zero_last_mvbias, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); -#ifdef VP8_ENTROPY_STATS - init_context_counters(); -#endif - /*Initialize the feed-forward activity masking.*/ cpi->activity_avg = 90 << 12; @@ -2005,10 +1998,6 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) { cpi->mb.rd_thresh_mult[i] = 128; } -#ifdef VP8_ENTROPY_STATS - init_mv_ref_counts(); -#endif - #if CONFIG_MULTITHREAD if (vp8cx_create_encoder_threads(cpi)) { vp8_remove_compressor(&cpi); @@ -2051,7 +2040,7 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) { cpi->fn_ptr[BLOCK_4X4].sdx8f = vpx_sad4x4x8; cpi->fn_ptr[BLOCK_4X4].sdx4df = vpx_sad4x4x4d; -#if ARCH_X86 || ARCH_X86_64 +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 cpi->fn_ptr[BLOCK_16X16].copymem = vp8_copy32xn; cpi->fn_ptr[BLOCK_16X8].copymem = vp8_copy32xn; cpi->fn_ptr[BLOCK_8X16].copymem = vp8_copy32xn; @@ -2106,8 +2095,8 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) { return cpi; } -void vp8_remove_compressor(VP8_COMP **ptr) { - VP8_COMP *cpi = *ptr; +void vp8_remove_compressor(VP8_COMP **comp) { + VP8_COMP *cpi = *comp; if (!cpi) return; @@ -2120,12 +2109,6 @@ void vp8_remove_compressor(VP8_COMP **ptr) { #endif -#ifdef VP8_ENTROPY_STATS - print_context_counters(); - print_tree_update_probs(); - print_mode_context(); -#endif - #if CONFIG_INTERNAL_STATS if (cpi->pass != 1) { @@ -2252,40 +2235,6 @@ void vp8_remove_compressor(VP8_COMP **ptr) { } #endif -#ifdef VP8_ENTROPY_STATS - { - int i, j, k; - FILE *fmode = fopen("modecontext.c", "w"); - - fprintf(fmode, "\n#include \"entropymode.h\"\n\n"); - fprintf(fmode, "const unsigned int vp8_kf_default_bmode_counts "); - fprintf(fmode, - "[VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES] =\n{\n"); - - for (i = 0; i < 10; ++i) { - fprintf(fmode, " { /* Above Mode : %d */\n", i); - - for (j = 0; j < 10; ++j) { - fprintf(fmode, " {"); - - for (k = 0; k < 10; ++k) { - if (!intra_mode_stats[i][j][k]) - fprintf(fmode, " %5d, ", 1); - else - fprintf(fmode, " %5d, ", intra_mode_stats[i][j][k]); - } - - fprintf(fmode, "}, /* left_mode %d */\n", j); - } - - fprintf(fmode, " },\n"); - } - - fprintf(fmode, "};\n"); - fclose(fmode); - } -#endif - #if defined(SECTIONBITS_OUTPUT) if (0) { @@ -2326,7 +2275,7 @@ void vp8_remove_compressor(VP8_COMP **ptr) { vp8_remove_common(&cpi->common); vpx_free(cpi); - *ptr = 0; + *comp = 0; #ifdef OUTPUT_YUV_SRC fclose(yuv_file); @@ -2464,6 +2413,7 @@ int vp8_update_reference(VP8_COMP *cpi, int ref_frame_flags) { if (ref_frame_flags & VP8_ALTR_FRAME) cpi->common.refresh_alt_ref_frame = 1; + cpi->ext_refresh_frame_flags_pending = 1; return 0; } @@ -2819,13 +2769,8 @@ static int decide_key_frame(VP8_COMP *cpi) { return code_key_frame; } -static void Pass1Encode(VP8_COMP *cpi, size_t *size, unsigned char *dest, - unsigned int *frame_flags) { - (void)size; - (void)dest; - (void)frame_flags; +static void Pass1Encode(VP8_COMP *cpi) { vp8_set_quantizer(cpi, 26); - vp8_first_pass(cpi); } #endif @@ -2862,7 +2807,6 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) fclose(yframe); } #endif -/* return of 0 means drop frame */ #if !CONFIG_REALTIME_ONLY /* Function to test for conditions that indeicate we should loop @@ -3364,11 +3308,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size, (LOWER_RES_FRAME_INFO *)cpi->oxcf.mr_low_res_mode_info; if (cpi->oxcf.mr_encoder_id) { - // TODO(marpan): This constraint shouldn't be needed, as we would like - // to allow for key frame setting (forced or periodic) defined per - // spatial layer. For now, keep this in. - cm->frame_type = low_res_frame_info->frame_type; - // Check if lower resolution is available for motion vector reuse. if (cm->frame_type != KEY_FRAME) { cpi->mr_low_res_mv_avail = 1; @@ -3393,7 +3332,16 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size, == low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]); */ } + // Disable motion vector reuse (i.e., disable any usage of the low_res) + // if the previous lower stream is skipped/disabled. + if (low_res_frame_info->skip_encoding_prev_stream) { + cpi->mr_low_res_mv_avail = 0; + } } + // This stream is not skipped (i.e., it's being encoded), so set this skip + // flag to 0. This is needed for the next stream (i.e., which is the next + // frame to be encoded). + low_res_frame_info->skip_encoding_prev_stream = 0; // On a key frame: For the lowest resolution, keep track of the key frame // counter value. For the higher resolutions, reset the current video @@ -3559,6 +3507,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size, cm->current_video_frame++; cpi->frames_since_key++; + cpi->ext_refresh_frame_flags_pending = 0; // We advance the temporal pattern for dropped frames. cpi->temporal_pattern_counter++; @@ -3600,6 +3549,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size, #endif cm->current_video_frame++; cpi->frames_since_key++; + cpi->ext_refresh_frame_flags_pending = 0; // We advance the temporal pattern for dropped frames. cpi->temporal_pattern_counter++; return; @@ -3799,7 +3749,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size, /* Setup background Q adjustment for error resilient mode. * For multi-layer encodes only enable this for the base layer. - */ + */ if (cpi->cyclic_refresh_mode_enabled) { // Special case for screen_content_mode with golden frame updates. int disable_cr_gf = @@ -3833,8 +3783,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size, // (temporal denoising) mode. if (cpi->oxcf.noise_sensitivity >= 3) { if (cpi->denoiser.denoise_pars.spatial_blur != 0) { - vp8_de_noise(cm, cpi->Source, cpi->Source, - cpi->denoiser.denoise_pars.spatial_blur, 1, 0, 0); + vp8_de_noise(cm, cpi->Source, cpi->denoiser.denoise_pars.spatial_blur, 1); } } #endif @@ -3855,9 +3804,9 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size, } if (cm->frame_type == KEY_FRAME) { - vp8_de_noise(cm, cpi->Source, cpi->Source, l, 1, 0, 1); + vp8_de_noise(cm, cpi->Source, l, 1); } else { - vp8_de_noise(cm, cpi->Source, cpi->Source, l, 1, 0, 1); + vp8_de_noise(cm, cpi->Source, l, 1); src = cpi->Source->y_buffer; @@ -4000,7 +3949,13 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size, vp8_encode_frame(cpi); if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - if (vp8_drop_encodedframe_overshoot(cpi, Q)) return; + if (vp8_drop_encodedframe_overshoot(cpi, Q)) { + vpx_clear_system_state(); + return; + } + if (cm->frame_type != KEY_FRAME) + cpi->last_pred_err_mb = + (int)(cpi->mb.prediction_error / cpi->common.MBs); } cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi); @@ -4283,6 +4238,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size, cpi->common.current_video_frame++; cpi->frames_since_key++; cpi->drop_frame_count++; + cpi->ext_refresh_frame_flags_pending = 0; // We advance the temporal pattern for dropped frames. cpi->temporal_pattern_counter++; return; @@ -4391,8 +4347,10 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size, /* For inter frames the current default behavior is that when * cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer * This is purely an encoder decision at present. + * Avoid this behavior when refresh flags are set by the user. */ - if (!cpi->oxcf.error_resilient_mode && cm->refresh_golden_frame) { + if (!cpi->oxcf.error_resilient_mode && cm->refresh_golden_frame && + !cpi->ext_refresh_frame_flags_pending) { cm->copy_buffer_to_arf = 2; } else { cm->copy_buffer_to_arf = 0; @@ -4699,6 +4657,8 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size, #endif + cpi->ext_refresh_frame_flags_pending = 0; + if (cm->refresh_golden_frame == 1) { cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN; } else { @@ -4782,8 +4742,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size, cpi->temporal_pattern_counter++; } -/* reset to normal state now that we are done. */ - #if 0 { char filename[512]; @@ -4866,14 +4824,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, cm = &cpi->common; - if (setjmp(cpi->common.error.jmp)) { - cpi->common.error.setjmp = 0; - vpx_clear_system_state(); - return VPX_CODEC_CORRUPT_FRAME; - } - - cpi->common.error.setjmp = 1; - vpx_usec_timer_start(&cmptimer); cpi->source = NULL; @@ -4999,10 +4949,13 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, // be received for that high layer, which will yield an incorrect // frame rate (from time-stamp adjustment in above calculation). if (cpi->oxcf.mr_encoder_id) { - cpi->ref_framerate = low_res_frame_info->low_res_framerate; + if (!low_res_frame_info->skip_encoding_base_stream) + cpi->ref_framerate = low_res_frame_info->low_res_framerate; } else { // Keep track of frame rate for lowest resolution. low_res_frame_info->low_res_framerate = cpi->ref_framerate; + // The base stream is being encoded so set skip flag to 0. + low_res_frame_info->skip_encoding_base_stream = 0; } } #endif @@ -5108,7 +5061,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, } switch (cpi->pass) { #if !CONFIG_REALTIME_ONLY - case 1: Pass1Encode(cpi, size, dest, frame_flags); break; + case 1: Pass1Encode(cpi); break; case 2: Pass2Encode(cpi, size, dest, dest_end, frame_flags); break; #endif // !CONFIG_REALTIME_ONLY default: @@ -5227,7 +5180,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, double weight = 0; vp8_deblock(cm, cm->frame_to_show, &cm->post_proc_buffer, - cm->filter_level * 10 / 6, 1, 0); + cm->filter_level * 10 / 6); vpx_clear_system_state(); ye = calc_plane_error(orig->y_buffer, orig->y_stride, pp->y_buffer, diff --git a/media/libvpx/libvpx/vp8/encoder/onyx_int.h b/media/libvpx/libvpx/vp8/encoder/onyx_int.h index c489b46c2d3a..50a750da3101 100644 --- a/media/libvpx/libvpx/vp8/encoder/onyx_int.h +++ b/media/libvpx/libvpx/vp8/encoder/onyx_int.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_ONYX_INT_H_ -#define VP8_ENCODER_ONYX_INT_H_ +#ifndef VPX_VP8_ENCODER_ONYX_INT_H_ +#define VPX_VP8_ENCODER_ONYX_INT_H_ #include #include "vpx_config.h" @@ -57,6 +57,9 @@ extern "C" { #define VP8_TEMPORAL_ALT_REF !CONFIG_REALTIME_ONLY +/* vp8 uses 10,000,000 ticks/second as time stamp */ +#define TICKS_PER_SEC 10000000 + typedef struct { int kf_indicated; unsigned int frames_since_key; @@ -257,6 +260,7 @@ typedef struct { int count_mb_ref_frame_usage[MAX_REF_FRAMES]; + int last_q[2]; } LAYER_CONTEXT; typedef struct VP8_COMP { @@ -510,6 +514,7 @@ typedef struct VP8_COMP { int force_maxqp; int frames_since_last_drop_overshoot; + int last_pred_err_mb; // GF update for 1 pass cbr. int gf_update_onepass_cbr; @@ -695,6 +700,8 @@ typedef struct VP8_COMP { // Use the static threshold from ROI settings. int use_roi_static_threshold; + + int ext_refresh_frame_flags_pending; } VP8_COMP; void vp8_initialize_enc(void); @@ -714,8 +721,8 @@ void vp8_set_speed_features(VP8_COMP *cpi); #if CONFIG_DEBUG #define CHECK_MEM_ERROR(lval, expr) \ do { \ - lval = (expr); \ - if (!lval) \ + (lval) = (expr); \ + if (!(lval)) \ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, \ "Failed to allocate " #lval " at %s:%d", __FILE__, \ __LINE__); \ @@ -723,8 +730,8 @@ void vp8_set_speed_features(VP8_COMP *cpi); #else #define CHECK_MEM_ERROR(lval, expr) \ do { \ - lval = (expr); \ - if (!lval) \ + (lval) = (expr); \ + if (!(lval)) \ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, \ "Failed to allocate " #lval); \ } while (0) @@ -733,4 +740,4 @@ void vp8_set_speed_features(VP8_COMP *cpi); } // extern "C" #endif -#endif // VP8_ENCODER_ONYX_INT_H_ +#endif // VPX_VP8_ENCODER_ONYX_INT_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/pickinter.c b/media/libvpx/libvpx/vp8/encoder/pickinter.c index a9943eb6ab9f..04f68c32454a 100644 --- a/media/libvpx/libvpx/vp8/encoder/pickinter.c +++ b/media/libvpx/libvpx/vp8/encoder/pickinter.c @@ -173,9 +173,8 @@ static int get_prediction_error(BLOCK *be, BLOCKD *b) { static int pick_intra4x4block(MACROBLOCK *x, int ib, B_PREDICTION_MODE *best_mode, - const int *mode_costs, - - int *bestrate, int *bestdistortion) { + const int *mode_costs, int *bestrate, + int *bestdistortion) { BLOCKD *b = &x->e_mbd.block[ib]; BLOCK *be = &x->block[ib]; int dst_stride = x->e_mbd.dst.y_stride; @@ -564,7 +563,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO best_mbmode; - int_mv best_ref_mv_sb[2]; + int_mv best_ref_mv_sb[2] = { { 0 }, { 0 } }; int_mv mode_mv_sb[2][MB_MODE_COUNT]; int_mv best_ref_mv; int_mv *mode_mv; @@ -602,7 +601,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, /* search range got from mv_pred(). It uses step_param levels. (0-7) */ int sr = 0; - unsigned char *plane[4][3]; + unsigned char *plane[4][3] = { { 0, 0 } }; int ref_frame_map[4]; int sign_bias = 0; int dot_artifact_candidate = 0; @@ -631,13 +630,16 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, } } #endif + assert(plane[LAST_FRAME][0] != NULL); dot_artifact_candidate = check_dot_artifact_candidate( cpi, x, target_y, stride, plane[LAST_FRAME][0], mb_row, mb_col, 0); // If not found in Y channel, check UV channel. if (!dot_artifact_candidate) { + assert(plane[LAST_FRAME][1] != NULL); dot_artifact_candidate = check_dot_artifact_candidate( cpi, x, target_u, stride_uv, plane[LAST_FRAME][1], mb_row, mb_col, 1); if (!dot_artifact_candidate) { + assert(plane[LAST_FRAME][2] != NULL); dot_artifact_candidate = check_dot_artifact_candidate( cpi, x, target_v, stride_uv, plane[LAST_FRAME][2], mb_row, mb_col, 2); @@ -741,10 +743,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; /* If the frame has big static background and current MB is in low - * motion area, its mode decision is biased to ZEROMV mode. - * No adjustment if cpu_used is <= -12 (i.e., cpi->Speed >= 12). - * At such speed settings, ZEROMV is already heavily favored. - */ + * motion area, its mode decision is biased to ZEROMV mode. + * No adjustment if cpu_used is <= -12 (i.e., cpi->Speed >= 12). + * At such speed settings, ZEROMV is already heavily favored. + */ if (cpi->Speed < 12) { calculate_zeromv_rd_adjustment(cpi, x, &rd_adjustment); } @@ -1016,7 +1018,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, #endif bestsme = vp8_hex_search(x, b, d, &mvp_full, &d->bmi.mv, step_param, sadpb, &cpi->fn_ptr[BLOCK_16X16], - x->mvsadcost, x->mvcost, &best_ref_mv); + x->mvsadcost, &best_ref_mv); mode_mv[NEWMV].as_int = d->bmi.mv.as_int; } else { bestsme = cpi->diamond_search_sad( @@ -1068,10 +1070,12 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, cpi->mb.mvcost, 128); } + // fall through case NEARESTMV: case NEARMV: if (mode_mv[this_mode].as_int == 0) continue; + // fall through case ZEROMV: @@ -1301,9 +1305,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, update_mvcount(x, &best_ref_mv); } -void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_) { +void vp8_pick_intra_mode(MACROBLOCK *x, int *rate) { int error4x4, error16x16 = INT_MAX; - int rate, best_rate = 0, distortion, best_sse; + int rate_, best_rate = 0, distortion, best_sse; MB_PREDICTION_MODE mode, best_mode = DC_PRED; int this_rd; unsigned int sse; @@ -1321,23 +1325,23 @@ void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_) { xd->predictor, 16); distortion = vpx_variance16x16(*(b->base_src), b->src_stride, xd->predictor, 16, &sse); - rate = x->mbmode_cost[xd->frame_type][mode]; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + rate_ = x->mbmode_cost[xd->frame_type][mode]; + this_rd = RDCOST(x->rdmult, x->rddiv, rate_, distortion); if (error16x16 > this_rd) { error16x16 = this_rd; best_mode = mode; best_sse = sse; - best_rate = rate; + best_rate = rate_; } } xd->mode_info_context->mbmi.mode = best_mode; - error4x4 = pick_intra4x4mby_modes(x, &rate, &best_sse); + error4x4 = pick_intra4x4mby_modes(x, &rate_, &best_sse); if (error4x4 < error16x16) { xd->mode_info_context->mbmi.mode = B_PRED; - best_rate = rate; + best_rate = rate_; } - *rate_ = best_rate; + *rate = best_rate; } diff --git a/media/libvpx/libvpx/vp8/encoder/pickinter.h b/media/libvpx/libvpx/vp8/encoder/pickinter.h index bf1d0c97499e..392fb41593f5 100644 --- a/media/libvpx/libvpx/vp8/encoder/pickinter.h +++ b/media/libvpx/libvpx/vp8/encoder/pickinter.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_PICKINTER_H_ -#define VP8_ENCODER_PICKINTER_H_ +#ifndef VPX_VP8_ENCODER_PICKINTER_H_ +#define VPX_VP8_ENCODER_PICKINTER_H_ #include "vpx_config.h" #include "vp8/common/onyxc_int.h" @@ -30,4 +30,4 @@ extern int vp8_get_inter_mbpred_error(MACROBLOCK *mb, } // extern "C" #endif -#endif // VP8_ENCODER_PICKINTER_H_ +#endif // VPX_VP8_ENCODER_PICKINTER_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/picklpf.c b/media/libvpx/libvpx/vp8/encoder/picklpf.c index b1b712db9a7e..387ac9788bee 100644 --- a/media/libvpx/libvpx/vp8/encoder/picklpf.c +++ b/media/libvpx/libvpx/vp8/encoder/picklpf.c @@ -18,7 +18,7 @@ #include "vpx_scale/vpx_scale.h" #include "vp8/common/alloccommon.h" #include "vp8/common/loopfilter.h" -#if ARCH_ARM +#if VPX_ARCH_ARM #include "vpx_ports/arm.h" #endif diff --git a/media/libvpx/libvpx/vp8/encoder/picklpf.h b/media/libvpx/libvpx/vp8/encoder/picklpf.h index e6ad0dbf2654..03597e5427a5 100644 --- a/media/libvpx/libvpx/vp8/encoder/picklpf.h +++ b/media/libvpx/libvpx/vp8/encoder/picklpf.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_PICKLPF_H_ -#define VP8_ENCODER_PICKLPF_H_ +#ifndef VPX_VP8_ENCODER_PICKLPF_H_ +#define VPX_VP8_ENCODER_PICKLPF_H_ #ifdef __cplusplus extern "C" { @@ -27,4 +27,4 @@ void vp8cx_pick_filter_level(struct yv12_buffer_config *sd, VP8_COMP *cpi); } #endif -#endif // VP8_ENCODER_PICKLPF_H_ +#endif // VPX_VP8_ENCODER_PICKLPF_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/quantize.h b/media/libvpx/libvpx/vp8/encoder/quantize.h index 267150f99fba..78746c0c20bf 100644 --- a/media/libvpx/libvpx/vp8/encoder/quantize.h +++ b/media/libvpx/libvpx/vp8/encoder/quantize.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_QUANTIZE_H_ -#define VP8_ENCODER_QUANTIZE_H_ +#ifndef VPX_VP8_ENCODER_QUANTIZE_H_ +#define VPX_VP8_ENCODER_QUANTIZE_H_ #ifdef __cplusplus extern "C" { @@ -31,4 +31,4 @@ extern void vp8cx_init_quantizer(struct VP8_COMP *cpi); } // extern "C" #endif -#endif // VP8_ENCODER_QUANTIZE_H_ +#endif // VPX_VP8_ENCODER_QUANTIZE_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/ratectrl.c b/media/libvpx/libvpx/vp8/encoder/ratectrl.c index e58c31098027..dbd76edad05e 100644 --- a/media/libvpx/libvpx/vp8/encoder/ratectrl.c +++ b/media/libvpx/libvpx/vp8/encoder/ratectrl.c @@ -996,7 +996,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi) { * bits on this frame even if it is a contructed arf. * The active maximum quantizer insures that an appropriate * number of bits will be spent if needed for contstructed ARFs. - */ + */ cpi->this_frame_target = 0; } @@ -1052,9 +1052,8 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) { * overflow when values are large */ projected_size_based_on_q = - (int)(((.5 + - rate_correction_factor * - vp8_bits_per_mb[cpi->common.frame_type][Q]) * + (int)(((.5 + rate_correction_factor * + vp8_bits_per_mb[cpi->common.frame_type][Q]) * cpi->common.MBs) / (1 << BPER_MB_NORMBITS)); @@ -1126,6 +1125,14 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) { } } +static int limit_q_cbr_inter(int last_q, int current_q) { + int limit_down = 12; + if (last_q - current_q > limit_down) + return (last_q - limit_down); + else + return current_q; +} + int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) { int Q = cpi->active_worst_quality; @@ -1265,6 +1272,12 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) { } } + // Limit decrease in Q for 1 pass CBR screen content mode. + if (cpi->common.frame_type != KEY_FRAME && cpi->pass == 0 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER && + cpi->oxcf.screen_content_mode) + Q = limit_q_cbr_inter(cpi->last_q[1], Q); + return Q; } @@ -1465,7 +1478,7 @@ int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q) { (cpi->oxcf.screen_content_mode == 2 || (cpi->drop_frames_allowed && (force_drop_overshoot || - (cpi->rate_correction_factor < (4.0f * MIN_BPB_FACTOR) && + (cpi->rate_correction_factor < (8.0f * MIN_BPB_FACTOR) && cpi->frames_since_last_drop_overshoot > (int)cpi->framerate))))) { // Note: the "projected_frame_size" from encode_frame() only gives estimate // of mode/motion vector rate (in non-rd mode): so below we only require @@ -1485,7 +1498,8 @@ int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q) { if (cpi->drop_frames_allowed && pred_err_mb > (thresh_pred_err_mb << 4)) thresh_rate = thresh_rate >> 3; if ((Q < thresh_qp && cpi->projected_frame_size > thresh_rate && - pred_err_mb > thresh_pred_err_mb) || + pred_err_mb > thresh_pred_err_mb && + pred_err_mb > 2 * cpi->last_pred_err_mb) || force_drop_overshoot) { unsigned int i; double new_correction_factor; diff --git a/media/libvpx/libvpx/vp8/encoder/ratectrl.h b/media/libvpx/libvpx/vp8/encoder/ratectrl.h index 249de4e706c3..844c72cb8614 100644 --- a/media/libvpx/libvpx/vp8/encoder/ratectrl.h +++ b/media/libvpx/libvpx/vp8/encoder/ratectrl.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_RATECTRL_H_ -#define VP8_ENCODER_RATECTRL_H_ +#ifndef VPX_VP8_ENCODER_RATECTRL_H_ +#define VPX_VP8_ENCODER_RATECTRL_H_ #include "onyx_int.h" @@ -37,4 +37,4 @@ extern int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q); } // extern "C" #endif -#endif // VP8_ENCODER_RATECTRL_H_ +#endif // VPX_VP8_ENCODER_RATECTRL_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/rdopt.c b/media/libvpx/libvpx/vp8/encoder/rdopt.c index e210b441055f..79a858e4370e 100644 --- a/media/libvpx/libvpx/vp8/encoder/rdopt.c +++ b/media/libvpx/libvpx/vp8/encoder/rdopt.c @@ -770,9 +770,9 @@ static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate, vp8_quantize_mbuv(x); rate_to = rd_cost_mbuv(x); - this_rate = rate_to + - x->intra_uv_mode_cost[xd->frame_type] - [xd->mode_info_context->mbmi.uv_mode]; + this_rate = + rate_to + x->intra_uv_mode_cost[xd->frame_type] + [xd->mode_info_context->mbmi.uv_mode]; this_distortion = vp8_mbuverror(x) / 4; @@ -989,7 +989,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi, br += rate; for (i = 0; i < label_count; ++i) { - int_mv mode_mv[B_MODE_COUNT]; + int_mv mode_mv[B_MODE_COUNT] = { { 0 }, { 0 } }; int best_label_rd = INT_MAX; B_PREDICTION_MODE mode_selected = ZERO4X4; int bestlabelyrate = 0; @@ -1767,7 +1767,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, /* search range got from mv_pred(). It uses step_param levels. (0-7) */ int sr = 0; - unsigned char *plane[4][3]; + unsigned char *plane[4][3] = { { 0, 0 } }; int ref_frame_map[4]; int sign_bias = 0; @@ -1779,6 +1779,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, best_rd_sse = UINT_MAX; #endif + // _uv variables are not set consistantly before calling update_best_mode. + rd.rate_uv = 0; + rd.distortion_uv = 0; + mode_mv = mode_mv_sb[sign_bias]; best_ref_mv.as_int = 0; best_mode.rd = INT_MAX; @@ -1846,6 +1850,9 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, /* everything but intra */ if (x->e_mbd.mode_info_context->mbmi.ref_frame) { + assert(plane[this_ref_frame][0] != NULL && + plane[this_ref_frame][1] != NULL && + plane[this_ref_frame][2] != NULL); x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; x->e_mbd.pre.u_buffer = plane[this_ref_frame][1]; x->e_mbd.pre.v_buffer = plane[this_ref_frame][2]; @@ -1940,6 +1947,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, rd.distortion2 += distortion; if (tmp_rd < best_mode.yrd) { + assert(uv_intra_done); rd.rate2 += uv_intra_rate; rd.rate_uv = uv_intra_rate_tokenonly; rd.distortion2 += uv_intra_distortion; @@ -2000,6 +2008,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, rd.distortion2 += distortion; rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type] [x->e_mbd.mode_info_context->mbmi.mode]; + assert(uv_intra_done); rd.rate2 += uv_intra_rate; rd.rate_uv = uv_intra_rate_tokenonly; rd.distortion2 += uv_intra_distortion; @@ -2131,6 +2140,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, rd.rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96); } + // fall through case NEARESTMV: case NEARMV: @@ -2147,6 +2157,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, (mode_mv[this_mode].as_int == 0)) { continue; } + // fall through case ZEROMV: @@ -2352,11 +2363,11 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, rd_update_mvcount(x, &best_ref_mv); } -void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_) { +void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate) { int error4x4, error16x16; int rate4x4, rate16x16 = 0, rateuv; int dist4x4, dist16x16, distuv; - int rate; + int rate_; int rate4x4_tokenonly = 0; int rate16x16_tokenonly = 0; int rateuv_tokenonly = 0; @@ -2364,7 +2375,7 @@ void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_) { x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv); - rate = rateuv; + rate_ = rateuv; error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly, &dist16x16); @@ -2374,10 +2385,10 @@ void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_) { if (error4x4 < error16x16) { x->e_mbd.mode_info_context->mbmi.mode = B_PRED; - rate += rate4x4; + rate_ += rate4x4; } else { - rate += rate16x16; + rate_ += rate16x16; } - *rate_ = rate; + *rate = rate_; } diff --git a/media/libvpx/libvpx/vp8/encoder/rdopt.h b/media/libvpx/libvpx/vp8/encoder/rdopt.h index 960bd8f1cdfa..cc3db8197c7f 100644 --- a/media/libvpx/libvpx/vp8/encoder/rdopt.h +++ b/media/libvpx/libvpx/vp8/encoder/rdopt.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_RDOPT_H_ -#define VP8_ENCODER_RDOPT_H_ +#ifndef VPX_VP8_ENCODER_RDOPT_H_ +#define VPX_VP8_ENCODER_RDOPT_H_ #include "./vpx_config.h" @@ -63,12 +63,12 @@ static INLINE void insertsortsad(int arr[], int idx[], int len) { } } -extern void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue); -extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, int recon_uvoffset, - int *returnrate, int *returndistortion, - int *returnintra, int mb_row, int mb_col); -extern void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate); +void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue); +void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, + int recon_uvoffset, int *returnrate, + int *returndistortion, int *returnintra, int mb_row, + int mb_col); +void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate); static INLINE void get_plane_pointers(const YV12_BUFFER_CONFIG *fb, unsigned char *plane[3], @@ -110,9 +110,9 @@ static INLINE void get_reference_search_order(const VP8_COMP *cpi, for (; i < 4; ++i) ref_frame_map[i] = -1; } -extern void vp8_mv_pred(VP8_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here, - int_mv *mvp, int refframe, int *ref_frame_sign_bias, - int *sr, int near_sadidx[]); +void vp8_mv_pred(VP8_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here, + int_mv *mvp, int refframe, int *ref_frame_sign_bias, int *sr, + int near_sadidx[]); void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]); int VP8_UVSSE(MACROBLOCK *x); @@ -123,4 +123,4 @@ void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv); } // extern "C" #endif -#endif // VP8_ENCODER_RDOPT_H_ +#endif // VPX_VP8_ENCODER_RDOPT_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/segmentation.h b/media/libvpx/libvpx/vp8/encoder/segmentation.h index 1395a3411856..4ddbdbbd265e 100644 --- a/media/libvpx/libvpx/vp8/encoder/segmentation.h +++ b/media/libvpx/libvpx/vp8/encoder/segmentation.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_SEGMENTATION_H_ -#define VP8_ENCODER_SEGMENTATION_H_ +#ifndef VPX_VP8_ENCODER_SEGMENTATION_H_ +#define VPX_VP8_ENCODER_SEGMENTATION_H_ #include "string.h" #include "vp8/common/blockd.h" @@ -26,4 +26,4 @@ extern void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, } // extern "C" #endif -#endif // VP8_ENCODER_SEGMENTATION_H_ +#endif // VPX_VP8_ENCODER_SEGMENTATION_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/temporal_filter.c b/media/libvpx/libvpx/vp8/encoder/temporal_filter.c index 0a7d25fb0a78..1c1a55fde6a6 100644 --- a/media/libvpx/libvpx/vp8/encoder/temporal_filter.c +++ b/media/libvpx/libvpx/vp8/encoder/temporal_filter.c @@ -158,7 +158,8 @@ static int vp8_temporal_filter_find_matching_mb_c(VP8_COMP *cpi, /* Ignore mv costing by sending NULL cost arrays */ bestsme = vp8_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.mv, step_param, sadpb, - &cpi->fn_ptr[BLOCK_16X16], NULL, NULL, &best_ref_mv1); + &cpi->fn_ptr[BLOCK_16X16], NULL, &best_ref_mv1); + (void)bestsme; // Ignore unused return value. #if ALT_REF_SUBPEL_ENABLED /* Try sub-pixel MC? */ diff --git a/media/libvpx/libvpx/vp8/encoder/temporal_filter.h b/media/libvpx/libvpx/vp8/encoder/temporal_filter.h index 865d909fb653..fd39f5cb8731 100644 --- a/media/libvpx/libvpx/vp8/encoder/temporal_filter.h +++ b/media/libvpx/libvpx/vp8/encoder/temporal_filter.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_TEMPORAL_FILTER_H_ -#define VP8_ENCODER_TEMPORAL_FILTER_H_ +#ifndef VPX_VP8_ENCODER_TEMPORAL_FILTER_H_ +#define VPX_VP8_ENCODER_TEMPORAL_FILTER_H_ #ifdef __cplusplus extern "C" { @@ -23,4 +23,4 @@ void vp8_temporal_filter_prepare_c(struct VP8_COMP *cpi, int distance); } #endif -#endif // VP8_ENCODER_TEMPORAL_FILTER_H_ +#endif // VPX_VP8_ENCODER_TEMPORAL_FILTER_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/tokenize.c b/media/libvpx/libvpx/vp8/encoder/tokenize.c index ca5f0e3d8929..c3d70266074d 100644 --- a/media/libvpx/libvpx/vp8/encoder/tokenize.c +++ b/media/libvpx/libvpx/vp8/encoder/tokenize.c @@ -19,10 +19,6 @@ /* Global event counters used for accumulating statistics across several compressions, then generating context.c = initial stats. */ -#ifdef VP8_ENTROPY_STATS -_int64 context_counters[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] - [MAX_ENTROPY_TOKENS]; -#endif void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t); void vp8_fix_contexts(MACROBLOCKD *x); @@ -383,72 +379,6 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) { tokenize1st_order_b(x, t, plane_type, cpi); } -#ifdef VP8_ENTROPY_STATS - -void init_context_counters(void) { - memset(context_counters, 0, sizeof(context_counters)); -} - -void print_context_counters() { - int type, band, pt, t; - - FILE *const f = fopen("context.c", "w"); - - fprintf(f, "#include \"entropy.h\"\n"); - - fprintf(f, "\n/* *** GENERATED FILE: DO NOT EDIT *** */\n\n"); - - fprintf(f, - "int Contexts[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] " - "[MAX_ENTROPY_TOKENS];\n\n"); - - fprintf(f, - "const int default_contexts[BLOCK_TYPES] [COEF_BANDS] " - "[PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {"); - -#define Comma(X) (X ? "," : "") - - type = 0; - - do { - fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); - - band = 0; - - do { - fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); - - pt = 0; - - do { - fprintf(f, "%s\n {", Comma(pt)); - - t = 0; - - do { - const _int64 x = context_counters[type][band][pt][t]; - const int y = (int)x; - - assert(x == (_int64)y); /* no overflow handling yet */ - fprintf(f, "%s %d", Comma(t), y); - - } while (++t < MAX_ENTROPY_TOKENS); - - fprintf(f, "}"); - } while (++pt < PREV_COEF_CONTEXTS); - - fprintf(f, "\n }"); - - } while (++band < COEF_BANDS); - - fprintf(f, "\n }"); - } while (++type < BLOCK_TYPES); - - fprintf(f, "\n};\n"); - fclose(f); -} -#endif - static void stuff2nd_order_b(TOKENEXTRA **tp, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, VP8_COMP *cpi, MACROBLOCK *x) { int pt; /* near block/prev token context index */ diff --git a/media/libvpx/libvpx/vp8/encoder/tokenize.h b/media/libvpx/libvpx/vp8/encoder/tokenize.h index e5dbdfc5af4e..47b5be17f1f4 100644 --- a/media/libvpx/libvpx/vp8/encoder/tokenize.h +++ b/media/libvpx/libvpx/vp8/encoder/tokenize.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_TOKENIZE_H_ -#define VP8_ENCODER_TOKENIZE_H_ +#ifndef VPX_VP8_ENCODER_TOKENIZE_H_ +#define VPX_VP8_ENCODER_TOKENIZE_H_ #include "vp8/common/entropy.h" #include "block.h" @@ -34,14 +34,6 @@ typedef struct { int rd_cost_mby(MACROBLOCKD *); -#ifdef VP8_ENTROPY_STATS -void init_context_counters(); -void print_context_counters(); - -extern _int64 context_counters[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] - [MAX_ENTROPY_TOKENS]; -#endif - extern const short *const vp8_dct_value_cost_ptr; /* TODO: The Token field should be broken out into a separate char array to * improve cache locality, since it's needed for costing when the rest of the @@ -53,4 +45,4 @@ extern const TOKENVALUE *const vp8_dct_value_tokens_ptr; } // extern "C" #endif -#endif // VP8_ENCODER_TOKENIZE_H_ +#endif // VPX_VP8_ENCODER_TOKENIZE_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/treewriter.h b/media/libvpx/libvpx/vp8/encoder/treewriter.h index dadbbe3f80d7..c02683a58b78 100644 --- a/media/libvpx/libvpx/vp8/encoder/treewriter.h +++ b/media/libvpx/libvpx/vp8/encoder/treewriter.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP8_ENCODER_TREEWRITER_H_ -#define VP8_ENCODER_TREEWRITER_H_ +#ifndef VPX_VP8_ENCODER_TREEWRITER_H_ +#define VPX_VP8_ENCODER_TREEWRITER_H_ /* Trees map alphabets into huffman-like codes suitable for an arithmetic bit coder. Timothy S Murphy 11 October 2004 */ @@ -56,8 +56,7 @@ static INLINE unsigned int vp8_cost_branch(const unsigned int ct[2], static void vp8_treed_write(vp8_writer *const w, vp8_tree t, const vp8_prob *const p, int v, - int n /* number of bits in v, assumed nonzero */ - ) { + int n) { /* number of bits in v, assumed nonzero */ vp8_tree_index i = 0; do { @@ -73,8 +72,7 @@ static INLINE void vp8_write_token(vp8_writer *const w, vp8_tree t, } static int vp8_treed_cost(vp8_tree t, const vp8_prob *const p, int v, - int n /* number of bits in v, assumed nonzero */ - ) { + int n) { /* number of bits in v, assumed nonzero */ int c = 0; vp8_tree_index i = 0; @@ -93,12 +91,12 @@ static INLINE int vp8_cost_token(vp8_tree t, const vp8_prob *const p, /* Fill array of costs for all possible token values. */ -void vp8_cost_tokens(int *Costs, const vp8_prob *, vp8_tree); +void vp8_cost_tokens(int *c, const vp8_prob *, vp8_tree); -void vp8_cost_tokens2(int *Costs, const vp8_prob *, vp8_tree, int); +void vp8_cost_tokens2(int *c, const vp8_prob *, vp8_tree, int); #ifdef __cplusplus } // extern "C" #endif -#endif // VP8_ENCODER_TREEWRITER_H_ +#endif // VPX_VP8_ENCODER_TREEWRITER_H_ diff --git a/media/libvpx/libvpx/vp8/encoder/vp8_quantize.c b/media/libvpx/libvpx/vp8/encoder/vp8_quantize.c index ff6e04eaada4..5b8955510819 100644 --- a/media/libvpx/libvpx/vp8/encoder/vp8_quantize.c +++ b/media/libvpx/libvpx/vp8/encoder/vp8_quantize.c @@ -174,8 +174,6 @@ static void invert_quant(int improved_quant, short *quant, short *shift, } else { *quant = (1 << 16) / d; *shift = 0; - /* use multiplication and constant shift by 16 */ - *shift = 1 << (16 - *shift); } } diff --git a/media/libvpx/libvpx/vp8/encoder/x86/encodeopt.asm b/media/libvpx/libvpx/vp8/encoder/x86/block_error_sse2.asm similarity index 100% rename from media/libvpx/libvpx/vp8/encoder/x86/encodeopt.asm rename to media/libvpx/libvpx/vp8/encoder/x86/block_error_sse2.asm diff --git a/media/libvpx/libvpx/vp8/common/x86/copy_sse2.asm b/media/libvpx/libvpx/vp8/encoder/x86/copy_sse2.asm similarity index 100% rename from media/libvpx/libvpx/vp8/common/x86/copy_sse2.asm rename to media/libvpx/libvpx/vp8/encoder/x86/copy_sse2.asm diff --git a/media/libvpx/libvpx/vp8/common/x86/copy_sse3.asm b/media/libvpx/libvpx/vp8/encoder/x86/copy_sse3.asm similarity index 100% rename from media/libvpx/libvpx/vp8/common/x86/copy_sse3.asm rename to media/libvpx/libvpx/vp8/encoder/x86/copy_sse3.asm diff --git a/media/libvpx/libvpx/vp8/encoder/x86/quantize_sse4.c b/media/libvpx/libvpx/vp8/encoder/x86/quantize_sse4.c index 6f2c1634926e..389c16705d4a 100644 --- a/media/libvpx/libvpx/vp8/encoder/x86/quantize_sse4.c +++ b/media/libvpx/libvpx/vp8/encoder/x86/quantize_sse4.c @@ -11,28 +11,29 @@ #include /* SSE4.1 */ #include "./vp8_rtcd.h" -#include "vp8/encoder/block.h" #include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */ +#include "vp8/encoder/block.h" -#define SELECT_EOB(i, z, x, y, q) \ - do { \ - short boost = *zbin_boost_ptr; \ - short x_z = _mm_extract_epi16(x, z); \ - short y_z = _mm_extract_epi16(y, z); \ - int cmp = (x_z < boost) | (y_z == 0); \ - zbin_boost_ptr++; \ - if (cmp) break; \ - q = _mm_insert_epi16(q, y_z, z); \ - eob = i; \ - zbin_boost_ptr = b->zrun_zbin_boost; \ +#define SELECT_EOB(i, z, x, y, q) \ + do { \ + short boost = *zbin_boost_ptr; \ + /* Technically _mm_extract_epi16() returns an int: */ \ + /* https://bugs.llvm.org/show_bug.cgi?id=41657 */ \ + short x_z = (short)_mm_extract_epi16(x, z); \ + short y_z = (short)_mm_extract_epi16(y, z); \ + int cmp = (x_z < boost) | (y_z == 0); \ + zbin_boost_ptr++; \ + if (cmp) break; \ + q = _mm_insert_epi16(q, y_z, z); \ + eob = i; \ + zbin_boost_ptr = b->zrun_zbin_boost; \ } while (0) void vp8_regular_quantize_b_sse4_1(BLOCK *b, BLOCKD *d) { char eob = 0; short *zbin_boost_ptr = b->zrun_zbin_boost; - __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1, dqcoeff0, - dqcoeff1; + __m128i x0, x1, y0, y1, x_minus_zbin0, x_minus_zbin1, dqcoeff0, dqcoeff1; __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift)); __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8)); __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); @@ -53,15 +54,9 @@ void vp8_regular_quantize_b_sse4_1(BLOCK *b, BLOCKD *d) { zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0); zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra); - /* Sign of z: z >> 15 */ - sz0 = _mm_srai_epi16(z0, 15); - sz1 = _mm_srai_epi16(z1, 15); - - /* x = abs(z): (z ^ sz) - sz */ - x0 = _mm_xor_si128(z0, sz0); - x1 = _mm_xor_si128(z1, sz1); - x0 = _mm_sub_epi16(x0, sz0); - x1 = _mm_sub_epi16(x1, sz1); + /* x = abs(z) */ + x0 = _mm_abs_epi16(z0); + x1 = _mm_abs_epi16(z1); /* zbin[] + zbin_extra */ zbin0 = _mm_add_epi16(zbin0, zbin_extra); @@ -89,11 +84,9 @@ void vp8_regular_quantize_b_sse4_1(BLOCK *b, BLOCKD *d) { y0 = _mm_mulhi_epi16(y0, quant_shift0); y1 = _mm_mulhi_epi16(y1, quant_shift1); - /* Return the sign: (y ^ sz) - sz */ - y0 = _mm_xor_si128(y0, sz0); - y1 = _mm_xor_si128(y1, sz1); - y0 = _mm_sub_epi16(y0, sz0); - y1 = _mm_sub_epi16(y1, sz1); + /* Restore the sign. */ + y0 = _mm_sign_epi16(y0, z0); + y1 = _mm_sign_epi16(y1, z1); /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */ SELECT_EOB(1, 0, x_minus_zbin0, y0, qcoeff0); diff --git a/media/libvpx/libvpx/vp8/encoder/x86/vp8_quantize_ssse3.c b/media/libvpx/libvpx/vp8/encoder/x86/vp8_quantize_ssse3.c index d5474501544c..147c30cc3539 100644 --- a/media/libvpx/libvpx/vp8/encoder/x86/vp8_quantize_ssse3.c +++ b/media/libvpx/libvpx/vp8/encoder/x86/vp8_quantize_ssse3.c @@ -52,9 +52,9 @@ void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d) { __m128i sz0, sz1, x, x0, x1, y0, y1, zeros, abs0, abs1; - DECLARE_ALIGNED(16, const uint8_t, pshufb_zig_zag_mask[16]) = { - 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 - }; + DECLARE_ALIGNED(16, const uint8_t, + pshufb_zig_zag_mask[16]) = { 0, 1, 4, 8, 5, 2, 3, 6, + 9, 12, 13, 10, 7, 11, 14, 15 }; __m128i zig_zag = _mm_load_si128((const __m128i *)pshufb_zig_zag_mask); /* sign of z: z >> 15 */ diff --git a/media/libvpx/libvpx/vp8/vp8_common.mk b/media/libvpx/libvpx/vp8/vp8_common.mk index 246fe6a6772c..286a93a05650 100644 --- a/media/libvpx/libvpx/vp8/vp8_common.mk +++ b/media/libvpx/libvpx/vp8/vp8_common.mk @@ -15,7 +15,6 @@ VP8_COMMON_SRCS-yes += common/onyxd.h VP8_COMMON_SRCS-yes += common/alloccommon.c VP8_COMMON_SRCS-yes += common/blockd.c VP8_COMMON_SRCS-yes += common/coefupdateprobs.h -VP8_COMMON_SRCS-yes += common/copy_c.c # VP8_COMMON_SRCS-yes += common/debugmodes.c VP8_COMMON_SRCS-yes += common/default_coef_probs.h VP8_COMMON_SRCS-yes += common/dequantize.c @@ -70,10 +69,8 @@ VP8_COMMON_SRCS-yes += common/vp8_entropymodedata.h VP8_COMMON_SRCS-yes += common/treecoder.c -VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.c -VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.h -VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp8_asm_stubs.c -VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c +VP8_COMMON_SRCS-$(VPX_ARCH_X86)$(VPX_ARCH_X86_64) += common/x86/vp8_asm_stubs.c +VP8_COMMON_SRCS-$(VPX_ARCH_X86)$(VPX_ARCH_X86_64) += common/x86/loopfilter_x86.c VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/mfqe.c VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c @@ -82,21 +79,20 @@ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idct_blk_mmx.c VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm -VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/copy_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_blk_sse2.c VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm +VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/bilinear_filter_sse2.c VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm -VP8_COMMON_SRCS-$(HAVE_SSE3) += common/x86/copy_sse3.asm VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm ifeq ($(CONFIG_POSTPROC),yes) VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm endif -ifeq ($(ARCH_X86_64),yes) +ifeq ($(VPX_ARCH_X86_64),yes) VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_block_sse2_x86_64.asm endif @@ -130,14 +126,13 @@ endif # common (neon intrinsics) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/loopfilter_arm.c +VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/loopfilter_arm.h VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dc_only_idct_add_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_blk_neon.c -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_0_2x_neon.c -VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_full_2x_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iwalsh_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_loopfilter_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon.c diff --git a/media/libvpx/libvpx/vp8/vp8_cx_iface.c b/media/libvpx/libvpx/vp8/vp8_cx_iface.c index 92b4dc744223..ca2124179842 100644 --- a/media/libvpx/libvpx/vp8/vp8_cx_iface.c +++ b/media/libvpx/libvpx/vp8/vp8_cx_iface.c @@ -16,7 +16,9 @@ #include "vpx/internal/vpx_codec_internal.h" #include "vpx_version.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/system_state.h" #include "vpx_ports/vpx_once.h" +#include "vpx_util/vpx_timestamp.h" #include "vp8/encoder/onyx_int.h" #include "vpx/vp8cx.h" #include "vp8/encoder/firstpass.h" @@ -49,7 +51,7 @@ static struct vp8_extracfg default_extracfg = { #if !(CONFIG_REALTIME_ONLY) 0, /* cpu_used */ #else - 4, /* cpu_used */ + 4, /* cpu_used */ #endif 0, /* enable_auto_alt_ref */ 0, /* noise_sensitivity */ @@ -74,6 +76,9 @@ struct vpx_codec_alg_priv { vpx_codec_priv_t base; vpx_codec_enc_cfg_t cfg; struct vp8_extracfg vp8_cfg; + vpx_rational64_t timestamp_ratio; + vpx_codec_pts_t pts_offset; + unsigned char pts_offset_initialized; VP8_CONFIG oxcf; struct VP8_COMP *cpi; unsigned char *cx_data; @@ -105,10 +110,10 @@ static vpx_codec_err_t update_error_state( return VPX_CODEC_INVALID_PARAM; \ } while (0) -#define RANGE_CHECK(p, memb, lo, hi) \ - do { \ - if (!(((p)->memb == lo || (p)->memb > (lo)) && (p)->memb <= hi)) \ - ERROR(#memb " out of range [" #lo ".." #hi "]"); \ +#define RANGE_CHECK(p, memb, lo, hi) \ + do { \ + if (!(((p)->memb == (lo) || (p)->memb > (lo)) && (p)->memb <= (hi))) \ + ERROR(#memb " out of range [" #lo ".." #hi "]"); \ } while (0) #define RANGE_CHECK_HI(p, memb, hi) \ @@ -126,6 +131,22 @@ static vpx_codec_err_t update_error_state( if (!!((p)->memb) != (p)->memb) ERROR(#memb " expected boolean"); \ } while (0) +#if defined(_MSC_VER) +#define COMPILE_TIME_ASSERT(boolexp) \ + do { \ + char compile_time_assert[(boolexp) ? 1 : -1]; \ + (void)compile_time_assert; \ + } while (0) +#else /* !_MSC_VER */ +#define COMPILE_TIME_ASSERT(boolexp) \ + do { \ + struct { \ + unsigned int compile_time_assert : (boolexp) ? 1 : -1; \ + } compile_time_assert; \ + (void)compile_time_assert; \ + } while (0) +#endif /* _MSC_VER */ + static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, const vpx_codec_enc_cfg_t *cfg, const struct vp8_extracfg *vp8_cfg, @@ -258,9 +279,7 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx, const vpx_image_t *img) { switch (img->fmt) { case VPX_IMG_FMT_YV12: - case VPX_IMG_FMT_I420: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: break; + case VPX_IMG_FMT_I420: break; default: ERROR("Invalid image format. Only YV12 and I420 images are supported"); } @@ -484,6 +503,9 @@ static vpx_codec_err_t update_extracfg(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t set_cpu_used(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.cpu_used = CAST(VP8E_SET_CPUUSED, args); + // Use fastest speed setting (speed 16 or -16) if it's set beyond the range. + extra_cfg.cpu_used = VPXMIN(16, extra_cfg.cpu_used); + extra_cfg.cpu_used = VPXMAX(-16, extra_cfg.cpu_used); return update_extracfg(ctx, &extra_cfg); } @@ -577,7 +599,7 @@ static vpx_codec_err_t set_screen_content_mode(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t vp8e_mr_alloc_mem(const vpx_codec_enc_cfg_t *cfg, void **mem_loc) { - vpx_codec_err_t res = 0; + vpx_codec_err_t res = VPX_CODEC_OK; #if CONFIG_MULTI_RES_ENCODING LOWER_RES_FRAME_INFO *shared_mem_loc; @@ -586,12 +608,13 @@ static vpx_codec_err_t vp8e_mr_alloc_mem(const vpx_codec_enc_cfg_t *cfg, shared_mem_loc = calloc(1, sizeof(LOWER_RES_FRAME_INFO)); if (!shared_mem_loc) { - res = VPX_CODEC_MEM_ERROR; + return VPX_CODEC_MEM_ERROR; } shared_mem_loc->mb_info = calloc(mb_rows * mb_cols, sizeof(LOWER_RES_MB_INFO)); if (!(shared_mem_loc->mb_info)) { + free(shared_mem_loc); res = VPX_CODEC_MEM_ERROR; } else { *mem_loc = (void *)shared_mem_loc; @@ -655,6 +678,12 @@ static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx, res = validate_config(priv, &priv->cfg, &priv->vp8_cfg, 0); if (!res) { + priv->pts_offset_initialized = 0; + priv->timestamp_ratio.den = priv->cfg.g_timebase.den; + priv->timestamp_ratio.num = (int64_t)priv->cfg.g_timebase.num; + priv->timestamp_ratio.num *= TICKS_PER_SEC; + reduce_ratio(&priv->timestamp_ratio); + set_vp8e_config(&priv->oxcf, priv->cfg, priv->vp8_cfg, mr_cfg); priv->cpi = vp8_create_compressor(&priv->oxcf); if (!priv->cpi) res = VPX_CODEC_MEM_ERROR; @@ -719,12 +748,14 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx, new_qc = MODE_BESTQUALITY; if (deadline) { + /* Convert duration parameter from stream timebase to microseconds */ uint64_t duration_us; - /* Convert duration parameter from stream timebase to microseconds */ - duration_us = (uint64_t)duration * 1000000 * - (uint64_t)ctx->cfg.g_timebase.num / - (uint64_t)ctx->cfg.g_timebase.den; + COMPILE_TIME_ASSERT(TICKS_PER_SEC > 1000000 && + (TICKS_PER_SEC % 1000000) == 0); + + duration_us = duration * (uint64_t)ctx->timestamp_ratio.num / + (ctx->timestamp_ratio.den * (TICKS_PER_SEC / 1000000)); /* If the deadline is more that the duration this frame is to be shown, * use good quality mode. Otherwise use realtime mode. @@ -798,16 +829,38 @@ static vpx_codec_err_t set_reference_and_update(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, const vpx_image_t *img, vpx_codec_pts_t pts, unsigned long duration, - vpx_enc_frame_flags_t flags, + vpx_enc_frame_flags_t enc_flags, unsigned long deadline) { - vpx_codec_err_t res = VPX_CODEC_OK; + volatile vpx_codec_err_t res = VPX_CODEC_OK; + // Make a copy as volatile to avoid -Wclobbered with longjmp. + volatile vpx_enc_frame_flags_t flags = enc_flags; + volatile vpx_codec_pts_t pts_val = pts; - if (!ctx->cfg.rc_target_bitrate) return res; + if (!ctx->cfg.rc_target_bitrate) { +#if CONFIG_MULTI_RES_ENCODING + if (!ctx->cpi) return VPX_CODEC_ERROR; + if (ctx->cpi->oxcf.mr_total_resolutions > 1) { + LOWER_RES_FRAME_INFO *low_res_frame_info = + (LOWER_RES_FRAME_INFO *)ctx->cpi->oxcf.mr_low_res_mode_info; + if (!low_res_frame_info) return VPX_CODEC_ERROR; + low_res_frame_info->skip_encoding_prev_stream = 1; + if (ctx->cpi->oxcf.mr_encoder_id == 0) + low_res_frame_info->skip_encoding_base_stream = 1; + } +#endif + return res; + } if (img) res = validate_img(ctx, img); if (!res) res = validate_config(ctx, &ctx->cfg, &ctx->vp8_cfg, 1); + if (!ctx->pts_offset_initialized) { + ctx->pts_offset = pts_val; + ctx->pts_offset_initialized = 1; + } + pts_val -= ctx->pts_offset; + pick_quickcompress_mode(ctx, duration, deadline); vpx_codec_pkt_list_init(&ctx->pkt_list); @@ -829,6 +882,12 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, } } + if (setjmp(ctx->cpi->common.error.jmp)) { + ctx->cpi->common.error.setjmp = 0; + vpx_clear_system_state(); + return VPX_CODEC_CORRUPT_FRAME; + } + /* Initialize the encoder instance on the first frame*/ if (!res && ctx->cpi) { unsigned int lib_flags; @@ -851,11 +910,10 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, /* Convert API flags to internal codec lib flags */ lib_flags = (flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0; - /* vp8 use 10,000,000 ticks/second as time stamp */ dst_time_stamp = - pts * 10000000 * ctx->cfg.g_timebase.num / ctx->cfg.g_timebase.den; - dst_end_time_stamp = (pts + duration) * 10000000 * ctx->cfg.g_timebase.num / - ctx->cfg.g_timebase.den; + pts_val * ctx->timestamp_ratio.num / ctx->timestamp_ratio.den; + dst_end_time_stamp = (pts_val + (int64_t)duration) * + ctx->timestamp_ratio.num / ctx->timestamp_ratio.den; if (img != NULL) { res = image2yuvconfig(img, &sd); @@ -884,6 +942,8 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, cx_data_end = ctx->cx_data + cx_data_sz; lib_flags = 0; + ctx->cpi->common.error.setjmp = 1; + while (cx_data_sz >= ctx->cx_data_sz / 2) { comp_data_state = vp8_get_compressed_data( ctx->cpi, &lib_flags, &size, cx_data, cx_data_end, &dst_time_stamp, @@ -901,16 +961,21 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, VP8_COMP *cpi = (VP8_COMP *)ctx->cpi; /* Add the frame packet to the list of returned packets. */ - round = (vpx_codec_pts_t)10000000 * ctx->cfg.g_timebase.num / 2 - 1; + round = (vpx_codec_pts_t)ctx->timestamp_ratio.num / 2; + if (round > 0) --round; delta = (dst_end_time_stamp - dst_time_stamp); pkt.kind = VPX_CODEC_CX_FRAME_PKT; pkt.data.frame.pts = - (dst_time_stamp * ctx->cfg.g_timebase.den + round) / - ctx->cfg.g_timebase.num / 10000000; + (dst_time_stamp * ctx->timestamp_ratio.den + round) / + ctx->timestamp_ratio.num + + ctx->pts_offset; pkt.data.frame.duration = - (unsigned long)((delta * ctx->cfg.g_timebase.den + round) / - ctx->cfg.g_timebase.num / 10000000); + (unsigned long)((delta * ctx->timestamp_ratio.den + round) / + ctx->timestamp_ratio.num); pkt.data.frame.flags = lib_flags << 16; + pkt.data.frame.width[0] = cpi->common.Width; + pkt.data.frame.height[0] = cpi->common.Height; + pkt.data.frame.spatial_layer_encoded[0] = 1; if (lib_flags & FRAMEFLAGS_KEY) { pkt.data.frame.flags |= VPX_FRAME_IS_KEY; @@ -925,9 +990,9 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, * Invisible frames have no duration. */ pkt.data.frame.pts = - ((cpi->last_time_stamp_seen * ctx->cfg.g_timebase.den + round) / - ctx->cfg.g_timebase.num / 10000000) + - 1; + ((cpi->last_time_stamp_seen * ctx->timestamp_ratio.den + round) / + ctx->timestamp_ratio.num) + + ctx->pts_offset + 1; pkt.data.frame.duration = 0; } @@ -1185,7 +1250,7 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = { static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] = { { 0, { - 0, /* g_usage */ + 0, /* g_usage (unused) */ 0, /* g_threads */ 0, /* g_profile */ @@ -1268,6 +1333,9 @@ CODEC_INTERFACE(vpx_codec_vp8_cx) = { vp8e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t cfg_maps; */ vp8e_encode, /* vpx_codec_encode_fn_t encode; */ vp8e_get_cxdata, /* vpx_codec_get_cx_data_fn_t get_cx_data; */ - vp8e_set_config, NULL, vp8e_get_preview, vp8e_mr_alloc_mem, + vp8e_set_config, + NULL, + vp8e_get_preview, + vp8e_mr_alloc_mem, } /* encoder functions */ }; diff --git a/media/libvpx/libvpx/vp8/vp8_dx_iface.c b/media/libvpx/libvpx/vp8/vp8_dx_iface.c index f20283c1e1b9..12e5781f551f 100644 --- a/media/libvpx/libvpx/vp8/vp8_dx_iface.c +++ b/media/libvpx/libvpx/vp8/vp8_dx_iface.c @@ -38,13 +38,19 @@ typedef vpx_codec_stream_info_t vp8_stream_info_t; /* Structures for handling memory allocations */ typedef enum { VP8_SEG_ALG_PRIV = 256, VP8_SEG_MAX } mem_seg_id_t; -#define NELEMENTS(x) ((int)(sizeof(x) / sizeof(x[0]))) +#define NELEMENTS(x) ((int)(sizeof(x) / sizeof((x)[0]))) struct vpx_codec_alg_priv { vpx_codec_priv_t base; vpx_codec_dec_cfg_t cfg; vp8_stream_info_t si; int decoder_init; +#if CONFIG_MULTITHREAD + // Restart threads on next frame if set to 1. + // This is set when error happens in multithreaded decoding and all threads + // are shut down. + int restart_threads; +#endif int postproc_cfg_set; vp8_postproc_cfg_t postproc_cfg; vpx_decrypt_cb decrypt_cb; @@ -200,9 +206,9 @@ static vpx_codec_err_t update_error_state( static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, void *user_priv) { /** vpx_img_wrap() doesn't allow specifying independent strides for - * the Y, U, and V planes, nor other alignment adjustments that - * might be representable by a YV12_BUFFER_CONFIG, so we just - * initialize all the fields.*/ + * the Y, U, and V planes, nor other alignment adjustments that + * might be representable by a YV12_BUFFER_CONFIG, so we just + * initialize all the fields.*/ img->fmt = VPX_IMG_FMT_I420; img->w = yv12->y_stride; img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15; @@ -268,7 +274,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, const uint8_t *data, unsigned int data_sz, void *user_priv, long deadline) { volatile vpx_codec_err_t res; - unsigned int resolution_change = 0; + volatile unsigned int resolution_change = 0; unsigned int w, h; if (!ctx->fragments.enabled && (data == NULL && data_sz == 0)) { @@ -298,6 +304,27 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, if ((ctx->si.h != h) || (ctx->si.w != w)) resolution_change = 1; +#if CONFIG_MULTITHREAD + if (!res && ctx->restart_threads) { + struct frame_buffers *fb = &ctx->yv12_frame_buffers; + VP8D_COMP *pbi = ctx->yv12_frame_buffers.pbi[0]; + VP8_COMMON *const pc = &pbi->common; + if (setjmp(pbi->common.error.jmp)) { + vp8_remove_decoder_instances(fb); + vp8_zero(fb->pbi); + vpx_clear_system_state(); + return VPX_CODEC_ERROR; + } + pbi->common.error.setjmp = 1; + pbi->max_threads = ctx->cfg.threads; + vp8_decoder_create_threads(pbi); + if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) { + vp8mt_alloc_temp_buffers(pbi, pc->Width, pc->mb_rows); + } + ctx->restart_threads = 0; + pbi->common.error.setjmp = 0; + } +#endif /* Initialize the decoder instance on the first frame*/ if (!res && !ctx->decoder_init) { VP8D_CONFIG oxcf; @@ -335,8 +362,8 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, if (!res) { VP8D_COMP *pbi = ctx->yv12_frame_buffers.pbi[0]; + VP8_COMMON *const pc = &pbi->common; if (resolution_change) { - VP8_COMMON *const pc = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; #if CONFIG_MULTITHREAD int i; @@ -428,11 +455,37 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, pbi->common.fb_idx_ref_cnt[0] = 0; } + if (setjmp(pbi->common.error.jmp)) { + /* We do not know if the missing frame(s) was supposed to update + * any of the reference buffers, but we act conservative and + * mark only the last buffer as corrupted. + */ + pc->yv12_fb[pc->lst_fb_idx].corrupted = 1; + + if (pc->fb_idx_ref_cnt[pc->new_fb_idx] > 0) { + pc->fb_idx_ref_cnt[pc->new_fb_idx]--; + } + pc->error.setjmp = 0; +#if CONFIG_MULTITHREAD + if (pbi->restart_threads) { + ctx->si.w = 0; + ctx->si.h = 0; + ctx->restart_threads = 1; + } +#endif + res = update_error_state(ctx, &pbi->common.error); + return res; + } + + pbi->common.error.setjmp = 1; + /* update the pbi fragment data */ pbi->fragments = ctx->fragments; - +#if CONFIG_MULTITHREAD + pbi->restart_threads = 0; +#endif ctx->user_priv = user_priv; - if (vp8dx_receive_compressed_data(pbi, data_sz, data, deadline)) { + if (vp8dx_receive_compressed_data(pbi, deadline)) { res = update_error_state(ctx, &pbi->common.error); } diff --git a/media/libvpx/libvpx/vp8/vp8cx.mk b/media/libvpx/libvpx/vp8/vp8cx.mk index 0dac0169d5f9..3a8f8ea45a9a 100644 --- a/media/libvpx/libvpx/vp8/vp8cx.mk +++ b/media/libvpx/libvpx/vp8/vp8cx.mk @@ -23,6 +23,7 @@ VP8_CX_SRCS-yes += vp8_cx_iface.c VP8_CX_SRCS-yes += encoder/defaultcoefcounts.h VP8_CX_SRCS-yes += encoder/bitstream.c VP8_CX_SRCS-yes += encoder/boolhuff.c +VP8_CX_SRCS-yes += encoder/copy_c.c VP8_CX_SRCS-yes += encoder/dct.c VP8_CX_SRCS-yes += encoder/encodeframe.c VP8_CX_SRCS-yes += encoder/encodeframe.h @@ -82,6 +83,8 @@ VP8_CX_SRCS_REMOVE-yes += encoder/temporal_filter.c VP8_CX_SRCS_REMOVE-yes += encoder/temporal_filter.h endif +VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/copy_sse2.asm +VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/copy_sse3.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_quantize_sse2.c @@ -92,9 +95,9 @@ ifeq ($(CONFIG_TEMPORAL_DENOISING),yes) VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c endif +VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/block_error_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c -VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm ifeq ($(CONFIG_REALTIME_ONLY),yes) VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm diff --git a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht16x16_add_neon.c b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht16x16_add_neon.c new file mode 100644 index 000000000000..219ff63cb8ff --- /dev/null +++ b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht16x16_add_neon.c @@ -0,0 +1,446 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_dsp_rtcd.h" +#include "vp9/common/vp9_enums.h" +#include "vp9/common/arm/neon/vp9_iht_neon.h" +#include "vpx_dsp/arm/highbd_idct_neon.h" +#include "vpx_dsp/arm/idct_neon.h" +#include "vpx_dsp/arm/transpose_neon.h" +#include "vpx_dsp/inv_txfm.h" + +// Use macros to make sure argument lane is passed in as an constant integer. + +#define vmull_lane_s32_dual(in, c, lane, out) \ + do { \ + out[0].val[0] = vmull_lane_s32(vget_low_s32(in.val[0]), c, lane); \ + out[0].val[1] = vmull_lane_s32(vget_low_s32(in.val[1]), c, lane); \ + out[1].val[0] = vmull_lane_s32(vget_high_s32(in.val[0]), c, lane); \ + out[1].val[1] = vmull_lane_s32(vget_high_s32(in.val[1]), c, lane); \ + } while (0) + +#define vmlal_lane_s32_dual(in, c, lane, out) \ + do { \ + out[0].val[0] = \ + vmlal_lane_s32(out[0].val[0], vget_low_s32(in.val[0]), c, lane); \ + out[0].val[1] = \ + vmlal_lane_s32(out[0].val[1], vget_low_s32(in.val[1]), c, lane); \ + out[1].val[0] = \ + vmlal_lane_s32(out[1].val[0], vget_high_s32(in.val[0]), c, lane); \ + out[1].val[1] = \ + vmlal_lane_s32(out[1].val[1], vget_high_s32(in.val[1]), c, lane); \ + } while (0) + +#define vmlsl_lane_s32_dual(in, c, lane, out) \ + do { \ + out[0].val[0] = \ + vmlsl_lane_s32(out[0].val[0], vget_low_s32(in.val[0]), c, lane); \ + out[0].val[1] = \ + vmlsl_lane_s32(out[0].val[1], vget_low_s32(in.val[1]), c, lane); \ + out[1].val[0] = \ + vmlsl_lane_s32(out[1].val[0], vget_high_s32(in.val[0]), c, lane); \ + out[1].val[1] = \ + vmlsl_lane_s32(out[1].val[1], vget_high_s32(in.val[1]), c, lane); \ + } while (0) + +static INLINE int32x4x2_t +highbd_dct_const_round_shift_low_8(const int64x2x2_t *const in) { + int32x4x2_t out; + out.val[0] = vcombine_s32(vrshrn_n_s64(in[0].val[0], DCT_CONST_BITS), + vrshrn_n_s64(in[1].val[0], DCT_CONST_BITS)); + out.val[1] = vcombine_s32(vrshrn_n_s64(in[0].val[1], DCT_CONST_BITS), + vrshrn_n_s64(in[1].val[1], DCT_CONST_BITS)); + return out; +} + +#define highbd_iadst_half_butterfly(in, c, lane, out) \ + do { \ + int64x2x2_t t[2]; \ + vmull_lane_s32_dual(in, c, lane, t); \ + out = highbd_dct_const_round_shift_low_8(t); \ + } while (0) + +#define highbd_iadst_butterfly(in0, in1, c, lane0, lane1, s0, s1) \ + do { \ + vmull_lane_s32_dual(in0, c, lane0, s0); \ + vmull_lane_s32_dual(in0, c, lane1, s1); \ + vmlal_lane_s32_dual(in1, c, lane1, s0); \ + vmlsl_lane_s32_dual(in1, c, lane0, s1); \ + } while (0) + +static INLINE int32x4x2_t vaddq_s32_dual(const int32x4x2_t in0, + const int32x4x2_t in1) { + int32x4x2_t out; + out.val[0] = vaddq_s32(in0.val[0], in1.val[0]); + out.val[1] = vaddq_s32(in0.val[1], in1.val[1]); + return out; +} + +static INLINE int64x2x2_t vaddq_s64_dual(const int64x2x2_t in0, + const int64x2x2_t in1) { + int64x2x2_t out; + out.val[0] = vaddq_s64(in0.val[0], in1.val[0]); + out.val[1] = vaddq_s64(in0.val[1], in1.val[1]); + return out; +} + +static INLINE int32x4x2_t vsubq_s32_dual(const int32x4x2_t in0, + const int32x4x2_t in1) { + int32x4x2_t out; + out.val[0] = vsubq_s32(in0.val[0], in1.val[0]); + out.val[1] = vsubq_s32(in0.val[1], in1.val[1]); + return out; +} + +static INLINE int64x2x2_t vsubq_s64_dual(const int64x2x2_t in0, + const int64x2x2_t in1) { + int64x2x2_t out; + out.val[0] = vsubq_s64(in0.val[0], in1.val[0]); + out.val[1] = vsubq_s64(in0.val[1], in1.val[1]); + return out; +} + +static INLINE int32x4x2_t vcombine_s32_dual(const int32x2x2_t in0, + const int32x2x2_t in1) { + int32x4x2_t out; + out.val[0] = vcombine_s32(in0.val[0], in1.val[0]); + out.val[1] = vcombine_s32(in0.val[1], in1.val[1]); + return out; +} + +static INLINE int32x4x2_t highbd_add_dct_const_round_shift_low_8( + const int64x2x2_t *const in0, const int64x2x2_t *const in1) { + const int64x2x2_t sum_lo = vaddq_s64_dual(in0[0], in1[0]); + const int64x2x2_t sum_hi = vaddq_s64_dual(in0[1], in1[1]); + int32x2x2_t out_lo, out_hi; + + out_lo.val[0] = vrshrn_n_s64(sum_lo.val[0], DCT_CONST_BITS); + out_lo.val[1] = vrshrn_n_s64(sum_lo.val[1], DCT_CONST_BITS); + out_hi.val[0] = vrshrn_n_s64(sum_hi.val[0], DCT_CONST_BITS); + out_hi.val[1] = vrshrn_n_s64(sum_hi.val[1], DCT_CONST_BITS); + return vcombine_s32_dual(out_lo, out_hi); +} + +static INLINE int32x4x2_t highbd_sub_dct_const_round_shift_low_8( + const int64x2x2_t *const in0, const int64x2x2_t *const in1) { + const int64x2x2_t sub_lo = vsubq_s64_dual(in0[0], in1[0]); + const int64x2x2_t sub_hi = vsubq_s64_dual(in0[1], in1[1]); + int32x2x2_t out_lo, out_hi; + + out_lo.val[0] = vrshrn_n_s64(sub_lo.val[0], DCT_CONST_BITS); + out_lo.val[1] = vrshrn_n_s64(sub_lo.val[1], DCT_CONST_BITS); + out_hi.val[0] = vrshrn_n_s64(sub_hi.val[0], DCT_CONST_BITS); + out_hi.val[1] = vrshrn_n_s64(sub_hi.val[1], DCT_CONST_BITS); + return vcombine_s32_dual(out_lo, out_hi); +} + +static INLINE int32x4x2_t vnegq_s32_dual(const int32x4x2_t in) { + int32x4x2_t out; + out.val[0] = vnegq_s32(in.val[0]); + out.val[1] = vnegq_s32(in.val[1]); + return out; +} + +static void highbd_iadst16_neon(const int32_t *input, int32_t *output, + uint16_t *dest, const int stride, + const int bd) { + const int32x4_t c_1_31_5_27 = + create_s32x4_neon(cospi_1_64, cospi_31_64, cospi_5_64, cospi_27_64); + const int32x4_t c_9_23_13_19 = + create_s32x4_neon(cospi_9_64, cospi_23_64, cospi_13_64, cospi_19_64); + const int32x4_t c_17_15_21_11 = + create_s32x4_neon(cospi_17_64, cospi_15_64, cospi_21_64, cospi_11_64); + const int32x4_t c_25_7_29_3 = + create_s32x4_neon(cospi_25_64, cospi_7_64, cospi_29_64, cospi_3_64); + const int32x4_t c_4_28_20_12 = + create_s32x4_neon(cospi_4_64, cospi_28_64, cospi_20_64, cospi_12_64); + const int32x4_t c_16_n16_8_24 = + create_s32x4_neon(cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64); + int32x4x2_t in[16], out[16]; + int32x4x2_t x[16], t[12]; + int64x2x2_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2]; + int64x2x2_t s8[2], s9[2], s10[2], s11[2], s12[2], s13[2], s14[2], s15[2]; + + // Load input (16x8) + in[0].val[0] = vld1q_s32(input); + in[0].val[1] = vld1q_s32(input + 4); + input += 8; + in[8].val[0] = vld1q_s32(input); + in[8].val[1] = vld1q_s32(input + 4); + input += 8; + in[1].val[0] = vld1q_s32(input); + in[1].val[1] = vld1q_s32(input + 4); + input += 8; + in[9].val[0] = vld1q_s32(input); + in[9].val[1] = vld1q_s32(input + 4); + input += 8; + in[2].val[0] = vld1q_s32(input); + in[2].val[1] = vld1q_s32(input + 4); + input += 8; + in[10].val[0] = vld1q_s32(input); + in[10].val[1] = vld1q_s32(input + 4); + input += 8; + in[3].val[0] = vld1q_s32(input); + in[3].val[1] = vld1q_s32(input + 4); + input += 8; + in[11].val[0] = vld1q_s32(input); + in[11].val[1] = vld1q_s32(input + 4); + input += 8; + in[4].val[0] = vld1q_s32(input); + in[4].val[1] = vld1q_s32(input + 4); + input += 8; + in[12].val[0] = vld1q_s32(input); + in[12].val[1] = vld1q_s32(input + 4); + input += 8; + in[5].val[0] = vld1q_s32(input); + in[5].val[1] = vld1q_s32(input + 4); + input += 8; + in[13].val[0] = vld1q_s32(input); + in[13].val[1] = vld1q_s32(input + 4); + input += 8; + in[6].val[0] = vld1q_s32(input); + in[6].val[1] = vld1q_s32(input + 4); + input += 8; + in[14].val[0] = vld1q_s32(input); + in[14].val[1] = vld1q_s32(input + 4); + input += 8; + in[7].val[0] = vld1q_s32(input); + in[7].val[1] = vld1q_s32(input + 4); + input += 8; + in[15].val[0] = vld1q_s32(input); + in[15].val[1] = vld1q_s32(input + 4); + + // Transpose + transpose_s32_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], + &in[7]); + transpose_s32_8x8(&in[8], &in[9], &in[10], &in[11], &in[12], &in[13], &in[14], + &in[15]); + + x[0] = in[15]; + x[1] = in[0]; + x[2] = in[13]; + x[3] = in[2]; + x[4] = in[11]; + x[5] = in[4]; + x[6] = in[9]; + x[7] = in[6]; + x[8] = in[7]; + x[9] = in[8]; + x[10] = in[5]; + x[11] = in[10]; + x[12] = in[3]; + x[13] = in[12]; + x[14] = in[1]; + x[15] = in[14]; + + // stage 1 + highbd_iadst_butterfly(x[0], x[1], vget_low_s32(c_1_31_5_27), 0, 1, s0, s1); + highbd_iadst_butterfly(x[2], x[3], vget_high_s32(c_1_31_5_27), 0, 1, s2, s3); + highbd_iadst_butterfly(x[4], x[5], vget_low_s32(c_9_23_13_19), 0, 1, s4, s5); + highbd_iadst_butterfly(x[6], x[7], vget_high_s32(c_9_23_13_19), 0, 1, s6, s7); + highbd_iadst_butterfly(x[8], x[9], vget_low_s32(c_17_15_21_11), 0, 1, s8, s9); + highbd_iadst_butterfly(x[10], x[11], vget_high_s32(c_17_15_21_11), 0, 1, s10, + s11); + highbd_iadst_butterfly(x[12], x[13], vget_low_s32(c_25_7_29_3), 0, 1, s12, + s13); + highbd_iadst_butterfly(x[14], x[15], vget_high_s32(c_25_7_29_3), 0, 1, s14, + s15); + + x[0] = highbd_add_dct_const_round_shift_low_8(s0, s8); + x[1] = highbd_add_dct_const_round_shift_low_8(s1, s9); + x[2] = highbd_add_dct_const_round_shift_low_8(s2, s10); + x[3] = highbd_add_dct_const_round_shift_low_8(s3, s11); + x[4] = highbd_add_dct_const_round_shift_low_8(s4, s12); + x[5] = highbd_add_dct_const_round_shift_low_8(s5, s13); + x[6] = highbd_add_dct_const_round_shift_low_8(s6, s14); + x[7] = highbd_add_dct_const_round_shift_low_8(s7, s15); + x[8] = highbd_sub_dct_const_round_shift_low_8(s0, s8); + x[9] = highbd_sub_dct_const_round_shift_low_8(s1, s9); + x[10] = highbd_sub_dct_const_round_shift_low_8(s2, s10); + x[11] = highbd_sub_dct_const_round_shift_low_8(s3, s11); + x[12] = highbd_sub_dct_const_round_shift_low_8(s4, s12); + x[13] = highbd_sub_dct_const_round_shift_low_8(s5, s13); + x[14] = highbd_sub_dct_const_round_shift_low_8(s6, s14); + x[15] = highbd_sub_dct_const_round_shift_low_8(s7, s15); + + // stage 2 + t[0] = x[0]; + t[1] = x[1]; + t[2] = x[2]; + t[3] = x[3]; + t[4] = x[4]; + t[5] = x[5]; + t[6] = x[6]; + t[7] = x[7]; + highbd_iadst_butterfly(x[8], x[9], vget_low_s32(c_4_28_20_12), 0, 1, s8, s9); + highbd_iadst_butterfly(x[10], x[11], vget_high_s32(c_4_28_20_12), 0, 1, s10, + s11); + highbd_iadst_butterfly(x[13], x[12], vget_low_s32(c_4_28_20_12), 1, 0, s13, + s12); + highbd_iadst_butterfly(x[15], x[14], vget_high_s32(c_4_28_20_12), 1, 0, s15, + s14); + + x[0] = vaddq_s32_dual(t[0], t[4]); + x[1] = vaddq_s32_dual(t[1], t[5]); + x[2] = vaddq_s32_dual(t[2], t[6]); + x[3] = vaddq_s32_dual(t[3], t[7]); + x[4] = vsubq_s32_dual(t[0], t[4]); + x[5] = vsubq_s32_dual(t[1], t[5]); + x[6] = vsubq_s32_dual(t[2], t[6]); + x[7] = vsubq_s32_dual(t[3], t[7]); + x[8] = highbd_add_dct_const_round_shift_low_8(s8, s12); + x[9] = highbd_add_dct_const_round_shift_low_8(s9, s13); + x[10] = highbd_add_dct_const_round_shift_low_8(s10, s14); + x[11] = highbd_add_dct_const_round_shift_low_8(s11, s15); + x[12] = highbd_sub_dct_const_round_shift_low_8(s8, s12); + x[13] = highbd_sub_dct_const_round_shift_low_8(s9, s13); + x[14] = highbd_sub_dct_const_round_shift_low_8(s10, s14); + x[15] = highbd_sub_dct_const_round_shift_low_8(s11, s15); + + // stage 3 + t[0] = x[0]; + t[1] = x[1]; + t[2] = x[2]; + t[3] = x[3]; + highbd_iadst_butterfly(x[4], x[5], vget_high_s32(c_16_n16_8_24), 0, 1, s4, + s5); + highbd_iadst_butterfly(x[7], x[6], vget_high_s32(c_16_n16_8_24), 1, 0, s7, + s6); + t[8] = x[8]; + t[9] = x[9]; + t[10] = x[10]; + t[11] = x[11]; + highbd_iadst_butterfly(x[12], x[13], vget_high_s32(c_16_n16_8_24), 0, 1, s12, + s13); + highbd_iadst_butterfly(x[15], x[14], vget_high_s32(c_16_n16_8_24), 1, 0, s15, + s14); + + x[0] = vaddq_s32_dual(t[0], t[2]); + x[1] = vaddq_s32_dual(t[1], t[3]); + x[2] = vsubq_s32_dual(t[0], t[2]); + x[3] = vsubq_s32_dual(t[1], t[3]); + x[4] = highbd_add_dct_const_round_shift_low_8(s4, s6); + x[5] = highbd_add_dct_const_round_shift_low_8(s5, s7); + x[6] = highbd_sub_dct_const_round_shift_low_8(s4, s6); + x[7] = highbd_sub_dct_const_round_shift_low_8(s5, s7); + x[8] = vaddq_s32_dual(t[8], t[10]); + x[9] = vaddq_s32_dual(t[9], t[11]); + x[10] = vsubq_s32_dual(t[8], t[10]); + x[11] = vsubq_s32_dual(t[9], t[11]); + x[12] = highbd_add_dct_const_round_shift_low_8(s12, s14); + x[13] = highbd_add_dct_const_round_shift_low_8(s13, s15); + x[14] = highbd_sub_dct_const_round_shift_low_8(s12, s14); + x[15] = highbd_sub_dct_const_round_shift_low_8(s13, s15); + + // stage 4 + { + const int32x4x2_t sum = vaddq_s32_dual(x[2], x[3]); + const int32x4x2_t sub = vsubq_s32_dual(x[2], x[3]); + highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 1, x[2]); + highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[3]); + } + { + const int32x4x2_t sum = vaddq_s32_dual(x[7], x[6]); + const int32x4x2_t sub = vsubq_s32_dual(x[7], x[6]); + highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 0, x[6]); + highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[7]); + } + { + const int32x4x2_t sum = vaddq_s32_dual(x[11], x[10]); + const int32x4x2_t sub = vsubq_s32_dual(x[11], x[10]); + highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 0, x[10]); + highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[11]); + } + { + const int32x4x2_t sum = vaddq_s32_dual(x[14], x[15]); + const int32x4x2_t sub = vsubq_s32_dual(x[14], x[15]); + highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 1, x[14]); + highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[15]); + } + + out[0] = x[0]; + out[1] = vnegq_s32_dual(x[8]); + out[2] = x[12]; + out[3] = vnegq_s32_dual(x[4]); + out[4] = x[6]; + out[5] = x[14]; + out[6] = x[10]; + out[7] = x[2]; + out[8] = x[3]; + out[9] = x[11]; + out[10] = x[15]; + out[11] = x[7]; + out[12] = x[5]; + out[13] = vnegq_s32_dual(x[13]); + out[14] = x[9]; + out[15] = vnegq_s32_dual(x[1]); + + if (output) { + highbd_idct16x16_store_pass1(out, output); + } else { + highbd_idct16x16_add_store(out, dest, stride, bd); + } +} + +typedef void (*highbd_iht_1d)(const int32_t *input, int32_t *output, + uint16_t *dest, const int stride, const int bd); + +typedef struct { + highbd_iht_1d cols, rows; // vertical and horizontal +} highbd_iht_2d; + +void vp9_highbd_iht16x16_256_add_neon(const tran_low_t *input, uint16_t *dest, + int stride, int tx_type, int bd) { + if (bd == 8) { + static const iht_2d IHT_16[] = { + { vpx_idct16x16_256_add_half1d, + vpx_idct16x16_256_add_half1d }, // DCT_DCT = 0 + { vpx_iadst16x16_256_add_half1d, + vpx_idct16x16_256_add_half1d }, // ADST_DCT = 1 + { vpx_idct16x16_256_add_half1d, + vpx_iadst16x16_256_add_half1d }, // DCT_ADST = 2 + { vpx_iadst16x16_256_add_half1d, + vpx_iadst16x16_256_add_half1d } // ADST_ADST = 3 + }; + const iht_2d ht = IHT_16[tx_type]; + int16_t row_output[16 * 16]; + + // pass 1 + ht.rows(input, row_output, dest, stride, 1); // upper 8 rows + ht.rows(input + 8 * 16, row_output + 8, dest, stride, 1); // lower 8 rows + + // pass 2 + ht.cols(row_output, NULL, dest, stride, 1); // left 8 columns + ht.cols(row_output + 16 * 8, NULL, dest + 8, stride, 1); // right 8 columns + } else { + static const highbd_iht_2d IHT_16[] = { + { vpx_highbd_idct16x16_256_add_half1d, + vpx_highbd_idct16x16_256_add_half1d }, // DCT_DCT = 0 + { highbd_iadst16_neon, + vpx_highbd_idct16x16_256_add_half1d }, // ADST_DCT = 1 + { vpx_highbd_idct16x16_256_add_half1d, + highbd_iadst16_neon }, // DCT_ADST = 2 + { highbd_iadst16_neon, highbd_iadst16_neon } // ADST_ADST = 3 + }; + const highbd_iht_2d ht = IHT_16[tx_type]; + int32_t row_output[16 * 16]; + + // pass 1 + ht.rows(input, row_output, dest, stride, bd); // upper 8 rows + ht.rows(input + 8 * 16, row_output + 8, dest, stride, bd); // lower 8 rows + + // pass 2 + ht.cols(row_output, NULL, dest, stride, bd); // left 8 columns + ht.cols(row_output + 8 * 16, NULL, dest + 8, stride, + bd); // right 8 columns + } +} diff --git a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c new file mode 100644 index 000000000000..52c4f1937dfb --- /dev/null +++ b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "./vp9_rtcd.h" +#include "./vpx_config.h" +#include "vp9/common/vp9_common.h" +#include "vp9/common/arm/neon/vp9_iht_neon.h" +#include "vpx_dsp/arm/highbd_idct_neon.h" +#include "vpx_dsp/arm/idct_neon.h" +#include "vpx_dsp/arm/mem_neon.h" +#include "vpx_dsp/txfm_common.h" + +static INLINE void highbd_iadst4(int32x4_t *const io) { + const int32_t sinpis[4] = { sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9 }; + const int32x4_t sinpi = vld1q_s32(sinpis); + int64x2x2_t s[7], t[4]; + int32x4_t s7; + + s[0].val[0] = vmull_lane_s32(vget_low_s32(io[0]), vget_low_s32(sinpi), 0); + s[0].val[1] = vmull_lane_s32(vget_high_s32(io[0]), vget_low_s32(sinpi), 0); + s[1].val[0] = vmull_lane_s32(vget_low_s32(io[0]), vget_low_s32(sinpi), 1); + s[1].val[1] = vmull_lane_s32(vget_high_s32(io[0]), vget_low_s32(sinpi), 1); + s[2].val[0] = vmull_lane_s32(vget_low_s32(io[1]), vget_high_s32(sinpi), 0); + s[2].val[1] = vmull_lane_s32(vget_high_s32(io[1]), vget_high_s32(sinpi), 0); + s[3].val[0] = vmull_lane_s32(vget_low_s32(io[2]), vget_high_s32(sinpi), 1); + s[3].val[1] = vmull_lane_s32(vget_high_s32(io[2]), vget_high_s32(sinpi), 1); + s[4].val[0] = vmull_lane_s32(vget_low_s32(io[2]), vget_low_s32(sinpi), 0); + s[4].val[1] = vmull_lane_s32(vget_high_s32(io[2]), vget_low_s32(sinpi), 0); + s[5].val[0] = vmull_lane_s32(vget_low_s32(io[3]), vget_low_s32(sinpi), 1); + s[5].val[1] = vmull_lane_s32(vget_high_s32(io[3]), vget_low_s32(sinpi), 1); + s[6].val[0] = vmull_lane_s32(vget_low_s32(io[3]), vget_high_s32(sinpi), 1); + s[6].val[1] = vmull_lane_s32(vget_high_s32(io[3]), vget_high_s32(sinpi), 1); + s7 = vsubq_s32(io[0], io[2]); + s7 = vaddq_s32(s7, io[3]); + + s[0].val[0] = vaddq_s64(s[0].val[0], s[3].val[0]); + s[0].val[1] = vaddq_s64(s[0].val[1], s[3].val[1]); + s[0].val[0] = vaddq_s64(s[0].val[0], s[5].val[0]); + s[0].val[1] = vaddq_s64(s[0].val[1], s[5].val[1]); + s[1].val[0] = vsubq_s64(s[1].val[0], s[4].val[0]); + s[1].val[1] = vsubq_s64(s[1].val[1], s[4].val[1]); + s[1].val[0] = vsubq_s64(s[1].val[0], s[6].val[0]); + s[1].val[1] = vsubq_s64(s[1].val[1], s[6].val[1]); + s[3] = s[2]; + s[2].val[0] = vmull_lane_s32(vget_low_s32(s7), vget_high_s32(sinpi), 0); + s[2].val[1] = vmull_lane_s32(vget_high_s32(s7), vget_high_s32(sinpi), 0); + + t[0].val[0] = vaddq_s64(s[0].val[0], s[3].val[0]); + t[0].val[1] = vaddq_s64(s[0].val[1], s[3].val[1]); + t[1].val[0] = vaddq_s64(s[1].val[0], s[3].val[0]); + t[1].val[1] = vaddq_s64(s[1].val[1], s[3].val[1]); + t[2] = s[2]; + t[3].val[0] = vaddq_s64(s[0].val[0], s[1].val[0]); + t[3].val[1] = vaddq_s64(s[0].val[1], s[1].val[1]); + t[3].val[0] = vsubq_s64(t[3].val[0], s[3].val[0]); + t[3].val[1] = vsubq_s64(t[3].val[1], s[3].val[1]); + io[0] = vcombine_s32(vrshrn_n_s64(t[0].val[0], DCT_CONST_BITS), + vrshrn_n_s64(t[0].val[1], DCT_CONST_BITS)); + io[1] = vcombine_s32(vrshrn_n_s64(t[1].val[0], DCT_CONST_BITS), + vrshrn_n_s64(t[1].val[1], DCT_CONST_BITS)); + io[2] = vcombine_s32(vrshrn_n_s64(t[2].val[0], DCT_CONST_BITS), + vrshrn_n_s64(t[2].val[1], DCT_CONST_BITS)); + io[3] = vcombine_s32(vrshrn_n_s64(t[3].val[0], DCT_CONST_BITS), + vrshrn_n_s64(t[3].val[1], DCT_CONST_BITS)); +} + +void vp9_highbd_iht4x4_16_add_neon(const tran_low_t *input, uint16_t *dest, + int stride, int tx_type, int bd) { + const int16x8_t max = vdupq_n_s16((1 << bd) - 1); + int16x8_t a[2]; + int32x4_t c[4]; + + c[0] = vld1q_s32(input); + c[1] = vld1q_s32(input + 4); + c[2] = vld1q_s32(input + 8); + c[3] = vld1q_s32(input + 12); + + if (bd == 8) { + a[0] = vcombine_s16(vmovn_s32(c[0]), vmovn_s32(c[1])); + a[1] = vcombine_s16(vmovn_s32(c[2]), vmovn_s32(c[3])); + transpose_s16_4x4q(&a[0], &a[1]); + + switch (tx_type) { + case DCT_DCT: + idct4x4_16_kernel_bd8(a); + a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); + transpose_s16_4x4q(&a[0], &a[1]); + idct4x4_16_kernel_bd8(a); + a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); + break; + + case ADST_DCT: + idct4x4_16_kernel_bd8(a); + a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); + transpose_s16_4x4q(&a[0], &a[1]); + iadst4(a); + break; + + case DCT_ADST: + iadst4(a); + transpose_s16_4x4q(&a[0], &a[1]); + idct4x4_16_kernel_bd8(a); + a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); + break; + + default: + assert(tx_type == ADST_ADST); + iadst4(a); + transpose_s16_4x4q(&a[0], &a[1]); + iadst4(a); + break; + } + a[0] = vrshrq_n_s16(a[0], 4); + a[1] = vrshrq_n_s16(a[1], 4); + } else { + switch (tx_type) { + case DCT_DCT: { + const int32x4_t cospis = vld1q_s32(kCospi32); + + if (bd == 10) { + idct4x4_16_kernel_bd10(cospis, c); + idct4x4_16_kernel_bd10(cospis, c); + } else { + idct4x4_16_kernel_bd12(cospis, c); + idct4x4_16_kernel_bd12(cospis, c); + } + break; + } + + case ADST_DCT: { + const int32x4_t cospis = vld1q_s32(kCospi32); + + if (bd == 10) { + idct4x4_16_kernel_bd10(cospis, c); + } else { + idct4x4_16_kernel_bd12(cospis, c); + } + transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]); + highbd_iadst4(c); + break; + } + + case DCT_ADST: { + const int32x4_t cospis = vld1q_s32(kCospi32); + + transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]); + highbd_iadst4(c); + if (bd == 10) { + idct4x4_16_kernel_bd10(cospis, c); + } else { + idct4x4_16_kernel_bd12(cospis, c); + } + break; + } + + default: { + assert(tx_type == ADST_ADST); + transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]); + highbd_iadst4(c); + transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]); + highbd_iadst4(c); + break; + } + } + a[0] = vcombine_s16(vqrshrn_n_s32(c[0], 4), vqrshrn_n_s32(c[1], 4)); + a[1] = vcombine_s16(vqrshrn_n_s32(c[2], 4), vqrshrn_n_s32(c[3], 4)); + } + + highbd_idct4x4_1_add_kernel1(&dest, stride, a[0], max); + highbd_idct4x4_1_add_kernel1(&dest, stride, a[1], max); +} diff --git a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht8x8_add_neon.c b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht8x8_add_neon.c new file mode 100644 index 000000000000..2232c6841c70 --- /dev/null +++ b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_highbd_iht8x8_add_neon.c @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_dsp_rtcd.h" +#include "vp9/common/vp9_enums.h" +#include "vp9/common/arm/neon/vp9_iht_neon.h" +#include "vpx_dsp/arm/highbd_idct_neon.h" +#include "vpx_dsp/arm/idct_neon.h" +#include "vpx_dsp/arm/transpose_neon.h" +#include "vpx_dsp/inv_txfm.h" + +static INLINE void highbd_iadst_half_butterfly_neon(int32x4_t *const x, + const int32x2_t c) { + const int32x4_t sum = vaddq_s32(x[0], x[1]); + const int32x4_t sub = vsubq_s32(x[0], x[1]); + const int64x2_t t0_lo = vmull_lane_s32(vget_low_s32(sum), c, 0); + const int64x2_t t1_lo = vmull_lane_s32(vget_low_s32(sub), c, 0); + const int64x2_t t0_hi = vmull_lane_s32(vget_high_s32(sum), c, 0); + const int64x2_t t1_hi = vmull_lane_s32(vget_high_s32(sub), c, 0); + const int32x2_t out0_lo = vrshrn_n_s64(t0_lo, DCT_CONST_BITS); + const int32x2_t out1_lo = vrshrn_n_s64(t1_lo, DCT_CONST_BITS); + const int32x2_t out0_hi = vrshrn_n_s64(t0_hi, DCT_CONST_BITS); + const int32x2_t out1_hi = vrshrn_n_s64(t1_hi, DCT_CONST_BITS); + + x[0] = vcombine_s32(out0_lo, out0_hi); + x[1] = vcombine_s32(out1_lo, out1_hi); +} + +static INLINE void highbd_iadst_butterfly_lane_0_1_neon(const int32x4_t in0, + const int32x4_t in1, + const int32x2_t c, + int64x2_t *const s0, + int64x2_t *const s1) { + const int64x2_t t0_lo = vmull_lane_s32(vget_low_s32(in0), c, 0); + const int64x2_t t1_lo = vmull_lane_s32(vget_low_s32(in0), c, 1); + const int64x2_t t0_hi = vmull_lane_s32(vget_high_s32(in0), c, 0); + const int64x2_t t1_hi = vmull_lane_s32(vget_high_s32(in0), c, 1); + + s0[0] = vmlal_lane_s32(t0_lo, vget_low_s32(in1), c, 1); + s1[0] = vmlsl_lane_s32(t1_lo, vget_low_s32(in1), c, 0); + s0[1] = vmlal_lane_s32(t0_hi, vget_high_s32(in1), c, 1); + s1[1] = vmlsl_lane_s32(t1_hi, vget_high_s32(in1), c, 0); +} + +static INLINE void highbd_iadst_butterfly_lane_1_0_neon(const int32x4_t in0, + const int32x4_t in1, + const int32x2_t c, + int64x2_t *const s0, + int64x2_t *const s1) { + const int64x2_t t0_lo = vmull_lane_s32(vget_low_s32(in0), c, 1); + const int64x2_t t1_lo = vmull_lane_s32(vget_low_s32(in0), c, 0); + const int64x2_t t0_hi = vmull_lane_s32(vget_high_s32(in0), c, 1); + const int64x2_t t1_hi = vmull_lane_s32(vget_high_s32(in0), c, 0); + + s0[0] = vmlal_lane_s32(t0_lo, vget_low_s32(in1), c, 0); + s1[0] = vmlsl_lane_s32(t1_lo, vget_low_s32(in1), c, 1); + s0[1] = vmlal_lane_s32(t0_hi, vget_high_s32(in1), c, 0); + s1[1] = vmlsl_lane_s32(t1_hi, vget_high_s32(in1), c, 1); +} + +static INLINE int32x4_t highbd_add_dct_const_round_shift_low_8( + const int64x2_t *const in0, const int64x2_t *const in1) { + const int64x2_t sum_lo = vaddq_s64(in0[0], in1[0]); + const int64x2_t sum_hi = vaddq_s64(in0[1], in1[1]); + const int32x2_t out_lo = vrshrn_n_s64(sum_lo, DCT_CONST_BITS); + const int32x2_t out_hi = vrshrn_n_s64(sum_hi, DCT_CONST_BITS); + return vcombine_s32(out_lo, out_hi); +} + +static INLINE int32x4_t highbd_sub_dct_const_round_shift_low_8( + const int64x2_t *const in0, const int64x2_t *const in1) { + const int64x2_t sub_lo = vsubq_s64(in0[0], in1[0]); + const int64x2_t sub_hi = vsubq_s64(in0[1], in1[1]); + const int32x2_t out_lo = vrshrn_n_s64(sub_lo, DCT_CONST_BITS); + const int32x2_t out_hi = vrshrn_n_s64(sub_hi, DCT_CONST_BITS); + return vcombine_s32(out_lo, out_hi); +} + +static INLINE void highbd_iadst8(int32x4_t *const io0, int32x4_t *const io1, + int32x4_t *const io2, int32x4_t *const io3, + int32x4_t *const io4, int32x4_t *const io5, + int32x4_t *const io6, int32x4_t *const io7) { + const int32x4_t c0 = + create_s32x4_neon(cospi_2_64, cospi_30_64, cospi_10_64, cospi_22_64); + const int32x4_t c1 = + create_s32x4_neon(cospi_18_64, cospi_14_64, cospi_26_64, cospi_6_64); + const int32x4_t c2 = + create_s32x4_neon(cospi_16_64, 0, cospi_8_64, cospi_24_64); + int32x4_t x[8], t[4]; + int64x2_t s[8][2]; + + x[0] = *io7; + x[1] = *io0; + x[2] = *io5; + x[3] = *io2; + x[4] = *io3; + x[5] = *io4; + x[6] = *io1; + x[7] = *io6; + + // stage 1 + highbd_iadst_butterfly_lane_0_1_neon(x[0], x[1], vget_low_s32(c0), s[0], + s[1]); + highbd_iadst_butterfly_lane_0_1_neon(x[2], x[3], vget_high_s32(c0), s[2], + s[3]); + highbd_iadst_butterfly_lane_0_1_neon(x[4], x[5], vget_low_s32(c1), s[4], + s[5]); + highbd_iadst_butterfly_lane_0_1_neon(x[6], x[7], vget_high_s32(c1), s[6], + s[7]); + + x[0] = highbd_add_dct_const_round_shift_low_8(s[0], s[4]); + x[1] = highbd_add_dct_const_round_shift_low_8(s[1], s[5]); + x[2] = highbd_add_dct_const_round_shift_low_8(s[2], s[6]); + x[3] = highbd_add_dct_const_round_shift_low_8(s[3], s[7]); + x[4] = highbd_sub_dct_const_round_shift_low_8(s[0], s[4]); + x[5] = highbd_sub_dct_const_round_shift_low_8(s[1], s[5]); + x[6] = highbd_sub_dct_const_round_shift_low_8(s[2], s[6]); + x[7] = highbd_sub_dct_const_round_shift_low_8(s[3], s[7]); + + // stage 2 + t[0] = x[0]; + t[1] = x[1]; + t[2] = x[2]; + t[3] = x[3]; + highbd_iadst_butterfly_lane_0_1_neon(x[4], x[5], vget_high_s32(c2), s[4], + s[5]); + highbd_iadst_butterfly_lane_1_0_neon(x[7], x[6], vget_high_s32(c2), s[7], + s[6]); + + x[0] = vaddq_s32(t[0], t[2]); + x[1] = vaddq_s32(t[1], t[3]); + x[2] = vsubq_s32(t[0], t[2]); + x[3] = vsubq_s32(t[1], t[3]); + x[4] = highbd_add_dct_const_round_shift_low_8(s[4], s[6]); + x[5] = highbd_add_dct_const_round_shift_low_8(s[5], s[7]); + x[6] = highbd_sub_dct_const_round_shift_low_8(s[4], s[6]); + x[7] = highbd_sub_dct_const_round_shift_low_8(s[5], s[7]); + + // stage 3 + highbd_iadst_half_butterfly_neon(x + 2, vget_low_s32(c2)); + highbd_iadst_half_butterfly_neon(x + 6, vget_low_s32(c2)); + + *io0 = x[0]; + *io1 = vnegq_s32(x[4]); + *io2 = x[6]; + *io3 = vnegq_s32(x[2]); + *io4 = x[3]; + *io5 = vnegq_s32(x[7]); + *io6 = x[5]; + *io7 = vnegq_s32(x[1]); +} + +void vp9_highbd_iht8x8_64_add_neon(const tran_low_t *input, uint16_t *dest, + int stride, int tx_type, int bd) { + int32x4_t a[16]; + int16x8_t c[8]; + + a[0] = vld1q_s32(input); + a[1] = vld1q_s32(input + 4); + a[2] = vld1q_s32(input + 8); + a[3] = vld1q_s32(input + 12); + a[4] = vld1q_s32(input + 16); + a[5] = vld1q_s32(input + 20); + a[6] = vld1q_s32(input + 24); + a[7] = vld1q_s32(input + 28); + a[8] = vld1q_s32(input + 32); + a[9] = vld1q_s32(input + 36); + a[10] = vld1q_s32(input + 40); + a[11] = vld1q_s32(input + 44); + a[12] = vld1q_s32(input + 48); + a[13] = vld1q_s32(input + 52); + a[14] = vld1q_s32(input + 56); + a[15] = vld1q_s32(input + 60); + + if (bd == 8) { + c[0] = vcombine_s16(vmovn_s32(a[0]), vmovn_s32(a[1])); + c[1] = vcombine_s16(vmovn_s32(a[2]), vmovn_s32(a[3])); + c[2] = vcombine_s16(vmovn_s32(a[4]), vmovn_s32(a[5])); + c[3] = vcombine_s16(vmovn_s32(a[6]), vmovn_s32(a[7])); + c[4] = vcombine_s16(vmovn_s32(a[8]), vmovn_s32(a[9])); + c[5] = vcombine_s16(vmovn_s32(a[10]), vmovn_s32(a[11])); + c[6] = vcombine_s16(vmovn_s32(a[12]), vmovn_s32(a[13])); + c[7] = vcombine_s16(vmovn_s32(a[14]), vmovn_s32(a[15])); + + switch (tx_type) { + case DCT_DCT: { + const int16x8_t cospis = vld1q_s16(kCospi); + const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 + const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28 + + idct8x8_64_1d_bd8(cospis0, cospis1, c); + idct8x8_64_1d_bd8(cospis0, cospis1, c); + break; + } + + case ADST_DCT: { + const int16x8_t cospis = vld1q_s16(kCospi); + const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 + const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28 + + idct8x8_64_1d_bd8(cospis0, cospis1, c); + transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6], + &c[7]); + iadst8(c); + break; + } + + case DCT_ADST: { + const int16x8_t cospis = vld1q_s16(kCospi); + const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 + const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28 + + transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6], + &c[7]); + iadst8(c); + idct8x8_64_1d_bd8(cospis0, cospis1, c); + break; + } + + default: { + transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6], + &c[7]); + iadst8(c); + transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6], + &c[7]); + iadst8(c); + break; + } + } + + c[0] = vrshrq_n_s16(c[0], 5); + c[1] = vrshrq_n_s16(c[1], 5); + c[2] = vrshrq_n_s16(c[2], 5); + c[3] = vrshrq_n_s16(c[3], 5); + c[4] = vrshrq_n_s16(c[4], 5); + c[5] = vrshrq_n_s16(c[5], 5); + c[6] = vrshrq_n_s16(c[6], 5); + c[7] = vrshrq_n_s16(c[7], 5); + } else { + switch (tx_type) { + case DCT_DCT: { + const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24 + const int32x4_t cospis1 = + vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28 + + if (bd == 10) { + idct8x8_64_half1d_bd10(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], + &a[4], &a[5], &a[6], &a[7]); + idct8x8_64_half1d_bd10(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11], + &a[12], &a[13], &a[14], &a[15]); + idct8x8_64_half1d_bd10(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9], + &a[2], &a[10], &a[3], &a[11]); + idct8x8_64_half1d_bd10(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13], + &a[6], &a[14], &a[7], &a[15]); + } else { + idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], + &a[4], &a[5], &a[6], &a[7]); + idct8x8_64_half1d_bd12(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11], + &a[12], &a[13], &a[14], &a[15]); + idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9], + &a[2], &a[10], &a[3], &a[11]); + idct8x8_64_half1d_bd12(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13], + &a[6], &a[14], &a[7], &a[15]); + } + break; + } + + case ADST_DCT: { + const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24 + const int32x4_t cospis1 = + vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28 + + idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], + &a[4], &a[5], &a[6], &a[7]); + idct8x8_64_half1d_bd12(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11], + &a[12], &a[13], &a[14], &a[15]); + transpose_s32_8x4(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], + &a[11]); + highbd_iadst8(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]); + transpose_s32_8x4(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7], + &a[15]); + highbd_iadst8(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7], + &a[15]); + break; + } + + case DCT_ADST: { + const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24 + const int32x4_t cospis1 = + vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28 + + transpose_s32_8x4(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], + &a[7]); + highbd_iadst8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); + transpose_s32_8x4(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14], + &a[15]); + highbd_iadst8(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14], + &a[15]); + idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9], + &a[2], &a[10], &a[3], &a[11]); + idct8x8_64_half1d_bd12(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13], + &a[6], &a[14], &a[7], &a[15]); + break; + } + + default: { + assert(tx_type == ADST_ADST); + transpose_s32_8x4(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], + &a[7]); + highbd_iadst8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); + transpose_s32_8x4(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14], + &a[15]); + highbd_iadst8(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14], + &a[15]); + transpose_s32_8x4(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], + &a[11]); + highbd_iadst8(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]); + transpose_s32_8x4(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7], + &a[15]); + highbd_iadst8(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7], + &a[15]); + break; + } + } + + c[0] = vcombine_s16(vrshrn_n_s32(a[0], 5), vrshrn_n_s32(a[4], 5)); + c[1] = vcombine_s16(vrshrn_n_s32(a[8], 5), vrshrn_n_s32(a[12], 5)); + c[2] = vcombine_s16(vrshrn_n_s32(a[1], 5), vrshrn_n_s32(a[5], 5)); + c[3] = vcombine_s16(vrshrn_n_s32(a[9], 5), vrshrn_n_s32(a[13], 5)); + c[4] = vcombine_s16(vrshrn_n_s32(a[2], 5), vrshrn_n_s32(a[6], 5)); + c[5] = vcombine_s16(vrshrn_n_s32(a[10], 5), vrshrn_n_s32(a[14], 5)); + c[6] = vcombine_s16(vrshrn_n_s32(a[3], 5), vrshrn_n_s32(a[7], 5)); + c[7] = vcombine_s16(vrshrn_n_s32(a[11], 5), vrshrn_n_s32(a[15], 5)); + } + highbd_add8x8(c, dest, stride, bd); +} diff --git a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht16x16_add_neon.c b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht16x16_add_neon.c new file mode 100644 index 000000000000..db72ff11618f --- /dev/null +++ b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht16x16_add_neon.c @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "./vp9_rtcd.h" +#include "./vpx_config.h" +#include "vp9/common/vp9_common.h" +#include "vp9/common/arm/neon/vp9_iht_neon.h" +#include "vpx_dsp/arm/idct_neon.h" +#include "vpx_dsp/arm/mem_neon.h" +#include "vpx_dsp/arm/transpose_neon.h" + +void vpx_iadst16x16_256_add_half1d(const void *const input, int16_t *output, + void *const dest, const int stride, + const int highbd_flag) { + int16x8_t in[16], out[16]; + const int16x4_t c_1_31_5_27 = + create_s16x4_neon(cospi_1_64, cospi_31_64, cospi_5_64, cospi_27_64); + const int16x4_t c_9_23_13_19 = + create_s16x4_neon(cospi_9_64, cospi_23_64, cospi_13_64, cospi_19_64); + const int16x4_t c_17_15_21_11 = + create_s16x4_neon(cospi_17_64, cospi_15_64, cospi_21_64, cospi_11_64); + const int16x4_t c_25_7_29_3 = + create_s16x4_neon(cospi_25_64, cospi_7_64, cospi_29_64, cospi_3_64); + const int16x4_t c_4_28_20_12 = + create_s16x4_neon(cospi_4_64, cospi_28_64, cospi_20_64, cospi_12_64); + const int16x4_t c_16_n16_8_24 = + create_s16x4_neon(cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64); + int16x8_t x[16], t[12]; + int32x4_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2]; + int32x4_t s8[2], s9[2], s10[2], s11[2], s12[2], s13[2], s14[2], s15[2]; + + // Load input (16x8) + if (output) { + const tran_low_t *inputT = (const tran_low_t *)input; + in[0] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[8] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[1] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[9] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[2] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[10] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[3] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[11] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[4] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[12] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[5] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[13] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[6] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[14] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[7] = load_tran_low_to_s16q(inputT); + inputT += 8; + in[15] = load_tran_low_to_s16q(inputT); + } else { + const int16_t *inputT = (const int16_t *)input; + in[0] = vld1q_s16(inputT); + inputT += 8; + in[8] = vld1q_s16(inputT); + inputT += 8; + in[1] = vld1q_s16(inputT); + inputT += 8; + in[9] = vld1q_s16(inputT); + inputT += 8; + in[2] = vld1q_s16(inputT); + inputT += 8; + in[10] = vld1q_s16(inputT); + inputT += 8; + in[3] = vld1q_s16(inputT); + inputT += 8; + in[11] = vld1q_s16(inputT); + inputT += 8; + in[4] = vld1q_s16(inputT); + inputT += 8; + in[12] = vld1q_s16(inputT); + inputT += 8; + in[5] = vld1q_s16(inputT); + inputT += 8; + in[13] = vld1q_s16(inputT); + inputT += 8; + in[6] = vld1q_s16(inputT); + inputT += 8; + in[14] = vld1q_s16(inputT); + inputT += 8; + in[7] = vld1q_s16(inputT); + inputT += 8; + in[15] = vld1q_s16(inputT); + } + + // Transpose + transpose_s16_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], + &in[7]); + transpose_s16_8x8(&in[8], &in[9], &in[10], &in[11], &in[12], &in[13], &in[14], + &in[15]); + + x[0] = in[15]; + x[1] = in[0]; + x[2] = in[13]; + x[3] = in[2]; + x[4] = in[11]; + x[5] = in[4]; + x[6] = in[9]; + x[7] = in[6]; + x[8] = in[7]; + x[9] = in[8]; + x[10] = in[5]; + x[11] = in[10]; + x[12] = in[3]; + x[13] = in[12]; + x[14] = in[1]; + x[15] = in[14]; + + // stage 1 + iadst_butterfly_lane_0_1_neon(x[0], x[1], c_1_31_5_27, s0, s1); + iadst_butterfly_lane_2_3_neon(x[2], x[3], c_1_31_5_27, s2, s3); + iadst_butterfly_lane_0_1_neon(x[4], x[5], c_9_23_13_19, s4, s5); + iadst_butterfly_lane_2_3_neon(x[6], x[7], c_9_23_13_19, s6, s7); + iadst_butterfly_lane_0_1_neon(x[8], x[9], c_17_15_21_11, s8, s9); + iadst_butterfly_lane_2_3_neon(x[10], x[11], c_17_15_21_11, s10, s11); + iadst_butterfly_lane_0_1_neon(x[12], x[13], c_25_7_29_3, s12, s13); + iadst_butterfly_lane_2_3_neon(x[14], x[15], c_25_7_29_3, s14, s15); + + x[0] = add_dct_const_round_shift_low_8(s0, s8); + x[1] = add_dct_const_round_shift_low_8(s1, s9); + x[2] = add_dct_const_round_shift_low_8(s2, s10); + x[3] = add_dct_const_round_shift_low_8(s3, s11); + x[4] = add_dct_const_round_shift_low_8(s4, s12); + x[5] = add_dct_const_round_shift_low_8(s5, s13); + x[6] = add_dct_const_round_shift_low_8(s6, s14); + x[7] = add_dct_const_round_shift_low_8(s7, s15); + x[8] = sub_dct_const_round_shift_low_8(s0, s8); + x[9] = sub_dct_const_round_shift_low_8(s1, s9); + x[10] = sub_dct_const_round_shift_low_8(s2, s10); + x[11] = sub_dct_const_round_shift_low_8(s3, s11); + x[12] = sub_dct_const_round_shift_low_8(s4, s12); + x[13] = sub_dct_const_round_shift_low_8(s5, s13); + x[14] = sub_dct_const_round_shift_low_8(s6, s14); + x[15] = sub_dct_const_round_shift_low_8(s7, s15); + + // stage 2 + t[0] = x[0]; + t[1] = x[1]; + t[2] = x[2]; + t[3] = x[3]; + t[4] = x[4]; + t[5] = x[5]; + t[6] = x[6]; + t[7] = x[7]; + iadst_butterfly_lane_0_1_neon(x[8], x[9], c_4_28_20_12, s8, s9); + iadst_butterfly_lane_2_3_neon(x[10], x[11], c_4_28_20_12, s10, s11); + iadst_butterfly_lane_1_0_neon(x[13], x[12], c_4_28_20_12, s13, s12); + iadst_butterfly_lane_3_2_neon(x[15], x[14], c_4_28_20_12, s15, s14); + + x[0] = vaddq_s16(t[0], t[4]); + x[1] = vaddq_s16(t[1], t[5]); + x[2] = vaddq_s16(t[2], t[6]); + x[3] = vaddq_s16(t[3], t[7]); + x[4] = vsubq_s16(t[0], t[4]); + x[5] = vsubq_s16(t[1], t[5]); + x[6] = vsubq_s16(t[2], t[6]); + x[7] = vsubq_s16(t[3], t[7]); + x[8] = add_dct_const_round_shift_low_8(s8, s12); + x[9] = add_dct_const_round_shift_low_8(s9, s13); + x[10] = add_dct_const_round_shift_low_8(s10, s14); + x[11] = add_dct_const_round_shift_low_8(s11, s15); + x[12] = sub_dct_const_round_shift_low_8(s8, s12); + x[13] = sub_dct_const_round_shift_low_8(s9, s13); + x[14] = sub_dct_const_round_shift_low_8(s10, s14); + x[15] = sub_dct_const_round_shift_low_8(s11, s15); + + // stage 3 + t[0] = x[0]; + t[1] = x[1]; + t[2] = x[2]; + t[3] = x[3]; + iadst_butterfly_lane_2_3_neon(x[4], x[5], c_16_n16_8_24, s4, s5); + iadst_butterfly_lane_3_2_neon(x[7], x[6], c_16_n16_8_24, s7, s6); + t[8] = x[8]; + t[9] = x[9]; + t[10] = x[10]; + t[11] = x[11]; + iadst_butterfly_lane_2_3_neon(x[12], x[13], c_16_n16_8_24, s12, s13); + iadst_butterfly_lane_3_2_neon(x[15], x[14], c_16_n16_8_24, s15, s14); + + x[0] = vaddq_s16(t[0], t[2]); + x[1] = vaddq_s16(t[1], t[3]); + x[2] = vsubq_s16(t[0], t[2]); + x[3] = vsubq_s16(t[1], t[3]); + x[4] = add_dct_const_round_shift_low_8(s4, s6); + x[5] = add_dct_const_round_shift_low_8(s5, s7); + x[6] = sub_dct_const_round_shift_low_8(s4, s6); + x[7] = sub_dct_const_round_shift_low_8(s5, s7); + x[8] = vaddq_s16(t[8], t[10]); + x[9] = vaddq_s16(t[9], t[11]); + x[10] = vsubq_s16(t[8], t[10]); + x[11] = vsubq_s16(t[9], t[11]); + x[12] = add_dct_const_round_shift_low_8(s12, s14); + x[13] = add_dct_const_round_shift_low_8(s13, s15); + x[14] = sub_dct_const_round_shift_low_8(s12, s14); + x[15] = sub_dct_const_round_shift_low_8(s13, s15); + + // stage 4 + iadst_half_butterfly_neg_neon(&x[3], &x[2], c_16_n16_8_24); + iadst_half_butterfly_pos_neon(&x[7], &x[6], c_16_n16_8_24); + iadst_half_butterfly_pos_neon(&x[11], &x[10], c_16_n16_8_24); + iadst_half_butterfly_neg_neon(&x[15], &x[14], c_16_n16_8_24); + + out[0] = x[0]; + out[1] = vnegq_s16(x[8]); + out[2] = x[12]; + out[3] = vnegq_s16(x[4]); + out[4] = x[6]; + out[5] = x[14]; + out[6] = x[10]; + out[7] = x[2]; + out[8] = x[3]; + out[9] = x[11]; + out[10] = x[15]; + out[11] = x[7]; + out[12] = x[5]; + out[13] = vnegq_s16(x[13]); + out[14] = x[9]; + out[15] = vnegq_s16(x[1]); + + if (output) { + idct16x16_store_pass1(out, output); + } else { + if (highbd_flag) { + idct16x16_add_store_bd8(out, dest, stride); + } else { + idct16x16_add_store(out, dest, stride); + } + } +} + +void vp9_iht16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, + int stride, int tx_type) { + static const iht_2d IHT_16[] = { + { vpx_idct16x16_256_add_half1d, + vpx_idct16x16_256_add_half1d }, // DCT_DCT = 0 + { vpx_iadst16x16_256_add_half1d, + vpx_idct16x16_256_add_half1d }, // ADST_DCT = 1 + { vpx_idct16x16_256_add_half1d, + vpx_iadst16x16_256_add_half1d }, // DCT_ADST = 2 + { vpx_iadst16x16_256_add_half1d, + vpx_iadst16x16_256_add_half1d } // ADST_ADST = 3 + }; + const iht_2d ht = IHT_16[tx_type]; + int16_t row_output[16 * 16]; + + // pass 1 + ht.rows(input, row_output, dest, stride, 0); // upper 8 rows + ht.rows(input + 8 * 16, row_output + 8, dest, stride, 0); // lower 8 rows + + // pass 2 + ht.cols(row_output, NULL, dest, stride, 0); // left 8 columns + ht.cols(row_output + 16 * 8, NULL, dest + 8, stride, 0); // right 8 columns +} diff --git a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c index 025254c3f337..4f0a90f21566 100644 --- a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c +++ b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c @@ -14,206 +14,63 @@ #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "vp9/common/vp9_common.h" +#include "vp9/common/arm/neon/vp9_iht_neon.h" +#include "vpx_dsp/arm/idct_neon.h" +#include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/txfm_common.h" -static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) { - int32x4_t q8s32, q9s32; - int16x4x2_t d0x2s16, d1x2s16; - int32x4x2_t q0x2s32; - - d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16)); - d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16)); - - q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1])); - q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1])); - q0x2s32 = vtrnq_s32(q8s32, q9s32); - - *q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]); - *q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]); -} - -static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16, - int16x4_t *d2s16) { - *d0s16 = vdup_n_s16(cospi_8_64); - *d1s16 = vdup_n_s16(cospi_16_64); - *d2s16 = vdup_n_s16(cospi_24_64); -} - -static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16, - int16x4_t *d5s16, int16x8_t *q3s16) { - *d3s16 = vdup_n_s16(sinpi_1_9); - *d4s16 = vdup_n_s16(sinpi_2_9); - *q3s16 = vdupq_n_s16(sinpi_3_9); - *d5s16 = vdup_n_s16(sinpi_4_9); -} - -static INLINE void IDCT4x4_1D(int16x4_t *d0s16, int16x4_t *d1s16, - int16x4_t *d2s16, int16x8_t *q8s16, - int16x8_t *q9s16) { - int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16; - int16x4_t d26s16, d27s16, d28s16, d29s16; - int32x4_t q10s32, q13s32, q14s32, q15s32; - int16x8_t q13s16, q14s16; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - - d23s16 = vadd_s16(d16s16, d18s16); - d24s16 = vsub_s16(d16s16, d18s16); - - q15s32 = vmull_s16(d17s16, *d2s16); - q10s32 = vmull_s16(d17s16, *d0s16); - q13s32 = vmull_s16(d23s16, *d1s16); - q14s32 = vmull_s16(d24s16, *d1s16); - q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16); - q10s32 = vmlal_s16(q10s32, d19s16, *d2s16); - - d26s16 = vrshrn_n_s32(q13s32, 14); - d27s16 = vrshrn_n_s32(q14s32, 14); - d29s16 = vrshrn_n_s32(q15s32, 14); - d28s16 = vrshrn_n_s32(q10s32, 14); - - q13s16 = vcombine_s16(d26s16, d27s16); - q14s16 = vcombine_s16(d28s16, d29s16); - *q8s16 = vaddq_s16(q13s16, q14s16); - *q9s16 = vsubq_s16(q13s16, q14s16); - *q9s16 = vcombine_s16(vget_high_s16(*q9s16), vget_low_s16(*q9s16)); // vswp -} - -static INLINE void IADST4x4_1D(int16x4_t *d3s16, int16x4_t *d4s16, - int16x4_t *d5s16, int16x8_t *q3s16, - int16x8_t *q8s16, int16x8_t *q9s16) { - int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16; - int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32; - - d6s16 = vget_low_s16(*q3s16); - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - - q10s32 = vmull_s16(*d3s16, d16s16); - q11s32 = vmull_s16(*d4s16, d16s16); - q12s32 = vmull_s16(d6s16, d17s16); - q13s32 = vmull_s16(*d5s16, d18s16); - q14s32 = vmull_s16(*d3s16, d18s16); - q15s32 = vmovl_s16(d16s16); - q15s32 = vaddw_s16(q15s32, d19s16); - q8s32 = vmull_s16(*d4s16, d19s16); - q15s32 = vsubw_s16(q15s32, d18s16); - q9s32 = vmull_s16(*d5s16, d19s16); - - q10s32 = vaddq_s32(q10s32, q13s32); - q10s32 = vaddq_s32(q10s32, q8s32); - q11s32 = vsubq_s32(q11s32, q14s32); - q8s32 = vdupq_n_s32(sinpi_3_9); - q11s32 = vsubq_s32(q11s32, q9s32); - q15s32 = vmulq_s32(q15s32, q8s32); - - q13s32 = vaddq_s32(q10s32, q12s32); - q10s32 = vaddq_s32(q10s32, q11s32); - q14s32 = vaddq_s32(q11s32, q12s32); - q10s32 = vsubq_s32(q10s32, q12s32); - - d16s16 = vrshrn_n_s32(q13s32, 14); - d17s16 = vrshrn_n_s32(q14s32, 14); - d18s16 = vrshrn_n_s32(q15s32, 14); - d19s16 = vrshrn_n_s32(q10s32, 14); - - *q8s16 = vcombine_s16(d16s16, d17s16); - *q9s16 = vcombine_s16(d18s16, d19s16); -} - void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { - uint8x8_t d26u8, d27u8; - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16; - uint32x2_t d26u32, d27u32; - int16x8_t q3s16, q8s16, q9s16; - uint16x8_t q8u16, q9u16; + int16x8_t a[2]; + uint8x8_t s[2], d[2]; + uint16x8_t sum[2]; - d26u32 = d27u32 = vdup_n_u32(0); + assert(!((intptr_t)dest % sizeof(uint32_t))); + assert(!(stride % sizeof(uint32_t))); - q8s16 = vld1q_s16(input); - q9s16 = vld1q_s16(input + 8); - - TRANSPOSE4X4(&q8s16, &q9s16); + a[0] = load_tran_low_to_s16q(input); + a[1] = load_tran_low_to_s16q(input + 8); + transpose_s16_4x4q(&a[0], &a[1]); switch (tx_type) { - case 0: // idct_idct is not supported. Fall back to C - vp9_iht4x4_16_add_c(input, dest, stride, tx_type); - return; - case 1: // iadst_idct - // generate constants - GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16); - GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); - - // first transform rows - IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16); - - // transpose the matrix - TRANSPOSE4X4(&q8s16, &q9s16); - - // then transform columns - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); + case DCT_DCT: + idct4x4_16_kernel_bd8(a); + a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); + transpose_s16_4x4q(&a[0], &a[1]); + idct4x4_16_kernel_bd8(a); + a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); break; - case 2: // idct_iadst - // generate constantsyy - GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16); - GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); - // first transform rows - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); - - // transpose the matrix - TRANSPOSE4X4(&q8s16, &q9s16); - - // then transform columns - IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16); + case ADST_DCT: + idct4x4_16_kernel_bd8(a); + a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); + transpose_s16_4x4q(&a[0], &a[1]); + iadst4(a); break; - case 3: // iadst_iadst - // generate constants - GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); - // first transform rows - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); - - // transpose the matrix - TRANSPOSE4X4(&q8s16, &q9s16); - - // then transform columns - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); + case DCT_ADST: + iadst4(a); + transpose_s16_4x4q(&a[0], &a[1]); + idct4x4_16_kernel_bd8(a); + a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); break; - default: // iadst_idct - assert(0); + + default: + assert(tx_type == ADST_ADST); + iadst4(a); + transpose_s16_4x4q(&a[0], &a[1]); + iadst4(a); break; } - q8s16 = vrshrq_n_s16(q8s16, 4); - q9s16 = vrshrq_n_s16(q9s16, 4); - - d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0); - dest += stride; - d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1); - dest += stride; - d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0); - dest += stride; - d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1); - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32)); - - d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1); - dest -= stride; - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0); - dest -= stride; - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1); - dest -= stride; - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0); + a[0] = vrshrq_n_s16(a[0], 4); + a[1] = vrshrq_n_s16(a[1], 4); + s[0] = load_u8(dest, stride); + s[1] = load_u8(dest + 2 * stride, stride); + sum[0] = vaddw_u8(vreinterpretq_u16_s16(a[0]), s[0]); + sum[1] = vaddw_u8(vreinterpretq_u16_s16(a[1]), s[1]); + d[0] = vqmovun_s16(vreinterpretq_s16_u16(sum[0])); + d[1] = vqmovun_s16(vreinterpretq_s16_u16(sum[1])); + store_u8(dest, stride, d[0]); + store_u8(dest + 2 * stride, stride, d[1]); } diff --git a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c index 1c739861c38a..46ee632e018b 100644 --- a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c +++ b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c @@ -14,527 +14,55 @@ #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "vp9/common/vp9_common.h" +#include "vp9/common/arm/neon/vp9_iht_neon.h" +#include "vpx_dsp/arm/idct_neon.h" +#include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/transpose_neon.h" -static int16_t cospi_2_64 = 16305; -static int16_t cospi_4_64 = 16069; -static int16_t cospi_6_64 = 15679; -static int16_t cospi_8_64 = 15137; -static int16_t cospi_10_64 = 14449; -static int16_t cospi_12_64 = 13623; -static int16_t cospi_14_64 = 12665; -static int16_t cospi_16_64 = 11585; -static int16_t cospi_18_64 = 10394; -static int16_t cospi_20_64 = 9102; -static int16_t cospi_22_64 = 7723; -static int16_t cospi_24_64 = 6270; -static int16_t cospi_26_64 = 4756; -static int16_t cospi_28_64 = 3196; -static int16_t cospi_30_64 = 1606; - -static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16, - int16x8_t *q10s16, int16x8_t *q11s16, - int16x8_t *q12s16, int16x8_t *q13s16, - int16x8_t *q14s16, int16x8_t *q15s16) { - int16x4_t d0s16, d1s16, d2s16, d3s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32; - int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32; - - d0s16 = vdup_n_s16(cospi_28_64); - d1s16 = vdup_n_s16(cospi_4_64); - d2s16 = vdup_n_s16(cospi_12_64); - d3s16 = vdup_n_s16(cospi_20_64); - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - q2s32 = vmull_s16(d18s16, d0s16); - q3s32 = vmull_s16(d19s16, d0s16); - q5s32 = vmull_s16(d26s16, d2s16); - q6s32 = vmull_s16(d27s16, d2s16); - - q2s32 = vmlsl_s16(q2s32, d30s16, d1s16); - q3s32 = vmlsl_s16(q3s32, d31s16, d1s16); - q5s32 = vmlsl_s16(q5s32, d22s16, d3s16); - q6s32 = vmlsl_s16(q6s32, d23s16, d3s16); - - d8s16 = vrshrn_n_s32(q2s32, 14); - d9s16 = vrshrn_n_s32(q3s32, 14); - d10s16 = vrshrn_n_s32(q5s32, 14); - d11s16 = vrshrn_n_s32(q6s32, 14); - q4s16 = vcombine_s16(d8s16, d9s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - q2s32 = vmull_s16(d18s16, d1s16); - q3s32 = vmull_s16(d19s16, d1s16); - q9s32 = vmull_s16(d26s16, d3s16); - q13s32 = vmull_s16(d27s16, d3s16); - - q2s32 = vmlal_s16(q2s32, d30s16, d0s16); - q3s32 = vmlal_s16(q3s32, d31s16, d0s16); - q9s32 = vmlal_s16(q9s32, d22s16, d2s16); - q13s32 = vmlal_s16(q13s32, d23s16, d2s16); - - d14s16 = vrshrn_n_s32(q2s32, 14); - d15s16 = vrshrn_n_s32(q3s32, 14); - d12s16 = vrshrn_n_s32(q9s32, 14); - d13s16 = vrshrn_n_s32(q13s32, 14); - q6s16 = vcombine_s16(d12s16, d13s16); - q7s16 = vcombine_s16(d14s16, d15s16); - - d0s16 = vdup_n_s16(cospi_16_64); - - q2s32 = vmull_s16(d16s16, d0s16); - q3s32 = vmull_s16(d17s16, d0s16); - q13s32 = vmull_s16(d16s16, d0s16); - q15s32 = vmull_s16(d17s16, d0s16); - - q2s32 = vmlal_s16(q2s32, d24s16, d0s16); - q3s32 = vmlal_s16(q3s32, d25s16, d0s16); - q13s32 = vmlsl_s16(q13s32, d24s16, d0s16); - q15s32 = vmlsl_s16(q15s32, d25s16, d0s16); - - d0s16 = vdup_n_s16(cospi_24_64); - d1s16 = vdup_n_s16(cospi_8_64); - - d18s16 = vrshrn_n_s32(q2s32, 14); - d19s16 = vrshrn_n_s32(q3s32, 14); - d22s16 = vrshrn_n_s32(q13s32, 14); - d23s16 = vrshrn_n_s32(q15s32, 14); - *q9s16 = vcombine_s16(d18s16, d19s16); - *q11s16 = vcombine_s16(d22s16, d23s16); - - q2s32 = vmull_s16(d20s16, d0s16); - q3s32 = vmull_s16(d21s16, d0s16); - q8s32 = vmull_s16(d20s16, d1s16); - q12s32 = vmull_s16(d21s16, d1s16); - - q2s32 = vmlsl_s16(q2s32, d28s16, d1s16); - q3s32 = vmlsl_s16(q3s32, d29s16, d1s16); - q8s32 = vmlal_s16(q8s32, d28s16, d0s16); - q12s32 = vmlal_s16(q12s32, d29s16, d0s16); - - d26s16 = vrshrn_n_s32(q2s32, 14); - d27s16 = vrshrn_n_s32(q3s32, 14); - d30s16 = vrshrn_n_s32(q8s32, 14); - d31s16 = vrshrn_n_s32(q12s32, 14); - *q13s16 = vcombine_s16(d26s16, d27s16); - *q15s16 = vcombine_s16(d30s16, d31s16); - - q0s16 = vaddq_s16(*q9s16, *q15s16); - q1s16 = vaddq_s16(*q11s16, *q13s16); - q2s16 = vsubq_s16(*q11s16, *q13s16); - q3s16 = vsubq_s16(*q9s16, *q15s16); - - *q13s16 = vsubq_s16(q4s16, q5s16); - q4s16 = vaddq_s16(q4s16, q5s16); - *q14s16 = vsubq_s16(q7s16, q6s16); - q7s16 = vaddq_s16(q7s16, q6s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - - d16s16 = vdup_n_s16(cospi_16_64); - - q9s32 = vmull_s16(d28s16, d16s16); - q10s32 = vmull_s16(d29s16, d16s16); - q11s32 = vmull_s16(d28s16, d16s16); - q12s32 = vmull_s16(d29s16, d16s16); - - q9s32 = vmlsl_s16(q9s32, d26s16, d16s16); - q10s32 = vmlsl_s16(q10s32, d27s16, d16s16); - q11s32 = vmlal_s16(q11s32, d26s16, d16s16); - q12s32 = vmlal_s16(q12s32, d27s16, d16s16); - - d10s16 = vrshrn_n_s32(q9s32, 14); - d11s16 = vrshrn_n_s32(q10s32, 14); - d12s16 = vrshrn_n_s32(q11s32, 14); - d13s16 = vrshrn_n_s32(q12s32, 14); - q5s16 = vcombine_s16(d10s16, d11s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - *q8s16 = vaddq_s16(q0s16, q7s16); - *q9s16 = vaddq_s16(q1s16, q6s16); - *q10s16 = vaddq_s16(q2s16, q5s16); - *q11s16 = vaddq_s16(q3s16, q4s16); - *q12s16 = vsubq_s16(q3s16, q4s16); - *q13s16 = vsubq_s16(q2s16, q5s16); - *q14s16 = vsubq_s16(q1s16, q6s16); - *q15s16 = vsubq_s16(q0s16, q7s16); -} - -static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16, - int16x8_t *q10s16, int16x8_t *q11s16, - int16x8_t *q12s16, int16x8_t *q13s16, - int16x8_t *q14s16, int16x8_t *q15s16) { - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int16x8_t q2s16, q4s16, q5s16, q6s16; - int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32; - int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - d14s16 = vdup_n_s16(cospi_2_64); - d15s16 = vdup_n_s16(cospi_30_64); - - q1s32 = vmull_s16(d30s16, d14s16); - q2s32 = vmull_s16(d31s16, d14s16); - q3s32 = vmull_s16(d30s16, d15s16); - q4s32 = vmull_s16(d31s16, d15s16); - - d30s16 = vdup_n_s16(cospi_18_64); - d31s16 = vdup_n_s16(cospi_14_64); - - q1s32 = vmlal_s16(q1s32, d16s16, d15s16); - q2s32 = vmlal_s16(q2s32, d17s16, d15s16); - q3s32 = vmlsl_s16(q3s32, d16s16, d14s16); - q4s32 = vmlsl_s16(q4s32, d17s16, d14s16); - - q5s32 = vmull_s16(d22s16, d30s16); - q6s32 = vmull_s16(d23s16, d30s16); - q7s32 = vmull_s16(d22s16, d31s16); - q8s32 = vmull_s16(d23s16, d31s16); - - q5s32 = vmlal_s16(q5s32, d24s16, d31s16); - q6s32 = vmlal_s16(q6s32, d25s16, d31s16); - q7s32 = vmlsl_s16(q7s32, d24s16, d30s16); - q8s32 = vmlsl_s16(q8s32, d25s16, d30s16); - - q11s32 = vaddq_s32(q1s32, q5s32); - q12s32 = vaddq_s32(q2s32, q6s32); - q1s32 = vsubq_s32(q1s32, q5s32); - q2s32 = vsubq_s32(q2s32, q6s32); - - d22s16 = vrshrn_n_s32(q11s32, 14); - d23s16 = vrshrn_n_s32(q12s32, 14); - *q11s16 = vcombine_s16(d22s16, d23s16); - - q12s32 = vaddq_s32(q3s32, q7s32); - q15s32 = vaddq_s32(q4s32, q8s32); - q3s32 = vsubq_s32(q3s32, q7s32); - q4s32 = vsubq_s32(q4s32, q8s32); - - d2s16 = vrshrn_n_s32(q1s32, 14); - d3s16 = vrshrn_n_s32(q2s32, 14); - d24s16 = vrshrn_n_s32(q12s32, 14); - d25s16 = vrshrn_n_s32(q15s32, 14); - d6s16 = vrshrn_n_s32(q3s32, 14); - d7s16 = vrshrn_n_s32(q4s32, 14); - *q12s16 = vcombine_s16(d24s16, d25s16); - - d0s16 = vdup_n_s16(cospi_10_64); - d1s16 = vdup_n_s16(cospi_22_64); - q4s32 = vmull_s16(d26s16, d0s16); - q5s32 = vmull_s16(d27s16, d0s16); - q2s32 = vmull_s16(d26s16, d1s16); - q6s32 = vmull_s16(d27s16, d1s16); - - d30s16 = vdup_n_s16(cospi_26_64); - d31s16 = vdup_n_s16(cospi_6_64); - - q4s32 = vmlal_s16(q4s32, d20s16, d1s16); - q5s32 = vmlal_s16(q5s32, d21s16, d1s16); - q2s32 = vmlsl_s16(q2s32, d20s16, d0s16); - q6s32 = vmlsl_s16(q6s32, d21s16, d0s16); - - q0s32 = vmull_s16(d18s16, d30s16); - q13s32 = vmull_s16(d19s16, d30s16); - - q0s32 = vmlal_s16(q0s32, d28s16, d31s16); - q13s32 = vmlal_s16(q13s32, d29s16, d31s16); - - q10s32 = vmull_s16(d18s16, d31s16); - q9s32 = vmull_s16(d19s16, d31s16); - - q10s32 = vmlsl_s16(q10s32, d28s16, d30s16); - q9s32 = vmlsl_s16(q9s32, d29s16, d30s16); - - q14s32 = vaddq_s32(q2s32, q10s32); - q15s32 = vaddq_s32(q6s32, q9s32); - q2s32 = vsubq_s32(q2s32, q10s32); - q6s32 = vsubq_s32(q6s32, q9s32); - - d28s16 = vrshrn_n_s32(q14s32, 14); - d29s16 = vrshrn_n_s32(q15s32, 14); - d4s16 = vrshrn_n_s32(q2s32, 14); - d5s16 = vrshrn_n_s32(q6s32, 14); - *q14s16 = vcombine_s16(d28s16, d29s16); - - q9s32 = vaddq_s32(q4s32, q0s32); - q10s32 = vaddq_s32(q5s32, q13s32); - q4s32 = vsubq_s32(q4s32, q0s32); - q5s32 = vsubq_s32(q5s32, q13s32); - - d30s16 = vdup_n_s16(cospi_8_64); - d31s16 = vdup_n_s16(cospi_24_64); - - d18s16 = vrshrn_n_s32(q9s32, 14); - d19s16 = vrshrn_n_s32(q10s32, 14); - d8s16 = vrshrn_n_s32(q4s32, 14); - d9s16 = vrshrn_n_s32(q5s32, 14); - *q9s16 = vcombine_s16(d18s16, d19s16); - - q5s32 = vmull_s16(d2s16, d30s16); - q6s32 = vmull_s16(d3s16, d30s16); - q7s32 = vmull_s16(d2s16, d31s16); - q0s32 = vmull_s16(d3s16, d31s16); - - q5s32 = vmlal_s16(q5s32, d6s16, d31s16); - q6s32 = vmlal_s16(q6s32, d7s16, d31s16); - q7s32 = vmlsl_s16(q7s32, d6s16, d30s16); - q0s32 = vmlsl_s16(q0s32, d7s16, d30s16); - - q1s32 = vmull_s16(d4s16, d30s16); - q3s32 = vmull_s16(d5s16, d30s16); - q10s32 = vmull_s16(d4s16, d31s16); - q2s32 = vmull_s16(d5s16, d31s16); - - q1s32 = vmlsl_s16(q1s32, d8s16, d31s16); - q3s32 = vmlsl_s16(q3s32, d9s16, d31s16); - q10s32 = vmlal_s16(q10s32, d8s16, d30s16); - q2s32 = vmlal_s16(q2s32, d9s16, d30s16); - - *q8s16 = vaddq_s16(*q11s16, *q9s16); - *q11s16 = vsubq_s16(*q11s16, *q9s16); - q4s16 = vaddq_s16(*q12s16, *q14s16); - *q12s16 = vsubq_s16(*q12s16, *q14s16); - - q14s32 = vaddq_s32(q5s32, q1s32); - q15s32 = vaddq_s32(q6s32, q3s32); - q5s32 = vsubq_s32(q5s32, q1s32); - q6s32 = vsubq_s32(q6s32, q3s32); - - d18s16 = vrshrn_n_s32(q14s32, 14); - d19s16 = vrshrn_n_s32(q15s32, 14); - d10s16 = vrshrn_n_s32(q5s32, 14); - d11s16 = vrshrn_n_s32(q6s32, 14); - *q9s16 = vcombine_s16(d18s16, d19s16); - - q1s32 = vaddq_s32(q7s32, q10s32); - q3s32 = vaddq_s32(q0s32, q2s32); - q7s32 = vsubq_s32(q7s32, q10s32); - q0s32 = vsubq_s32(q0s32, q2s32); - - d28s16 = vrshrn_n_s32(q1s32, 14); - d29s16 = vrshrn_n_s32(q3s32, 14); - d14s16 = vrshrn_n_s32(q7s32, 14); - d15s16 = vrshrn_n_s32(q0s32, 14); - *q14s16 = vcombine_s16(d28s16, d29s16); - - d30s16 = vdup_n_s16(cospi_16_64); - - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - q2s32 = vmull_s16(d22s16, d30s16); - q3s32 = vmull_s16(d23s16, d30s16); - q13s32 = vmull_s16(d22s16, d30s16); - q1s32 = vmull_s16(d23s16, d30s16); - - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - q2s32 = vmlal_s16(q2s32, d24s16, d30s16); - q3s32 = vmlal_s16(q3s32, d25s16, d30s16); - q13s32 = vmlsl_s16(q13s32, d24s16, d30s16); - q1s32 = vmlsl_s16(q1s32, d25s16, d30s16); - - d4s16 = vrshrn_n_s32(q2s32, 14); - d5s16 = vrshrn_n_s32(q3s32, 14); - d24s16 = vrshrn_n_s32(q13s32, 14); - d25s16 = vrshrn_n_s32(q1s32, 14); - q2s16 = vcombine_s16(d4s16, d5s16); - *q12s16 = vcombine_s16(d24s16, d25s16); - - q13s32 = vmull_s16(d10s16, d30s16); - q1s32 = vmull_s16(d11s16, d30s16); - q11s32 = vmull_s16(d10s16, d30s16); - q0s32 = vmull_s16(d11s16, d30s16); - - q13s32 = vmlal_s16(q13s32, d14s16, d30s16); - q1s32 = vmlal_s16(q1s32, d15s16, d30s16); - q11s32 = vmlsl_s16(q11s32, d14s16, d30s16); - q0s32 = vmlsl_s16(q0s32, d15s16, d30s16); - - d20s16 = vrshrn_n_s32(q13s32, 14); - d21s16 = vrshrn_n_s32(q1s32, 14); - d12s16 = vrshrn_n_s32(q11s32, 14); - d13s16 = vrshrn_n_s32(q0s32, 14); - *q10s16 = vcombine_s16(d20s16, d21s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - q5s16 = vdupq_n_s16(0); - - *q9s16 = vsubq_s16(q5s16, *q9s16); - *q11s16 = vsubq_s16(q5s16, q2s16); - *q13s16 = vsubq_s16(q5s16, q6s16); - *q15s16 = vsubq_s16(q5s16, q4s16); -} - void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { - int i; - uint8_t *d1, *d2; - uint8x8_t d0u8, d1u8, d2u8, d3u8; - uint64x1_t d0u64, d1u64, d2u64, d3u64; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - uint16x8_t q8u16, q9u16, q10u16, q11u16; + const int16x8_t cospis = vld1q_s16(kCospi); + const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 + const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28 + int16x8_t a[8]; - q8s16 = vld1q_s16(input); - q9s16 = vld1q_s16(input + 8); - q10s16 = vld1q_s16(input + 8 * 2); - q11s16 = vld1q_s16(input + 8 * 3); - q12s16 = vld1q_s16(input + 8 * 4); - q13s16 = vld1q_s16(input + 8 * 5); - q14s16 = vld1q_s16(input + 8 * 6); - q15s16 = vld1q_s16(input + 8 * 7); + a[0] = load_tran_low_to_s16q(input + 0 * 8); + a[1] = load_tran_low_to_s16q(input + 1 * 8); + a[2] = load_tran_low_to_s16q(input + 2 * 8); + a[3] = load_tran_low_to_s16q(input + 3 * 8); + a[4] = load_tran_low_to_s16q(input + 4 * 8); + a[5] = load_tran_low_to_s16q(input + 5 * 8); + a[6] = load_tran_low_to_s16q(input + 6 * 8); + a[7] = load_tran_low_to_s16q(input + 7 * 8); - transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, - &q15s16); + transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); switch (tx_type) { - case 0: // idct_idct is not supported. Fall back to C - vp9_iht8x8_64_add_c(input, dest, stride, tx_type); - return; - case 1: // iadst_idct - // generate IDCT constants - // GENERATE_IDCT_CONSTANTS - - // first transform rows - IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, - &q15s16); - - // transpose the matrix - transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, - &q14s16, &q15s16); - - // generate IADST constants - // GENERATE_IADST_CONSTANTS - - // then transform columns - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, - &q15s16); + case DCT_DCT: + idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a); + transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); + idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a); break; - case 2: // idct_iadst - // generate IADST constants - // GENERATE_IADST_CONSTANTS - // first transform rows - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, - &q15s16); - - // transpose the matrix - transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, - &q14s16, &q15s16); - - // generate IDCT constants - // GENERATE_IDCT_CONSTANTS - - // then transform columns - IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, - &q15s16); + case ADST_DCT: + idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a); + transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); + iadst8(a); break; - case 3: // iadst_iadst - // generate IADST constants - // GENERATE_IADST_CONSTANTS - // first transform rows - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, - &q15s16); - - // transpose the matrix - transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, - &q14s16, &q15s16); - - // then transform columns - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16, - &q15s16); + case DCT_ADST: + iadst8(a); + transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); + idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a); break; - default: // iadst_idct - assert(0); + + default: + assert(tx_type == ADST_ADST); + iadst8(a); + transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); + iadst8(a); break; } - q8s16 = vrshrq_n_s16(q8s16, 5); - q9s16 = vrshrq_n_s16(q9s16, 5); - q10s16 = vrshrq_n_s16(q10s16, 5); - q11s16 = vrshrq_n_s16(q11s16, 5); - q12s16 = vrshrq_n_s16(q12s16, 5); - q13s16 = vrshrq_n_s16(q13s16, 5); - q14s16 = vrshrq_n_s16(q14s16, 5); - q15s16 = vrshrq_n_s16(q15s16, 5); - - for (d1 = d2 = dest, i = 0; i < 2; i++) { - if (i != 0) { - q8s16 = q12s16; - q9s16 = q13s16; - q10s16 = q14s16; - q11s16 = q15s16; - } - - d0u64 = vld1_u64((uint64_t *)d1); - d1 += stride; - d1u64 = vld1_u64((uint64_t *)d1); - d1 += stride; - d2u64 = vld1_u64((uint64_t *)d1); - d1 += stride; - d3u64 = vld1_u64((uint64_t *)d1); - d1 += stride; - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64)); - q10u16 = - vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64)); - q11u16 = - vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64)); - - d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); - d2 += stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); - d2 += stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += stride; - } + idct8x8_add8x8_neon(a, dest, stride); } diff --git a/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht_neon.h b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht_neon.h new file mode 100644 index 000000000000..c64822e27c60 --- /dev/null +++ b/media/libvpx/libvpx/vp9/common/arm/neon/vp9_iht_neon.h @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_ +#define VPX_VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_ + +#include + +#include "./vp9_rtcd.h" +#include "./vpx_config.h" +#include "vp9/common/vp9_common.h" +#include "vpx_dsp/arm/idct_neon.h" +#include "vpx_dsp/arm/mem_neon.h" +#include "vpx_dsp/txfm_common.h" + +static INLINE void iadst4(int16x8_t *const io) { + const int32x4_t c3 = vdupq_n_s32(sinpi_3_9); + int16x4_t x[4]; + int32x4_t s[8], output[4]; + const int16x4_t c = + create_s16x4_neon(sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9); + + x[0] = vget_low_s16(io[0]); + x[1] = vget_low_s16(io[1]); + x[2] = vget_high_s16(io[0]); + x[3] = vget_high_s16(io[1]); + + s[0] = vmull_lane_s16(x[0], c, 0); + s[1] = vmull_lane_s16(x[0], c, 1); + s[2] = vmull_lane_s16(x[1], c, 2); + s[3] = vmull_lane_s16(x[2], c, 3); + s[4] = vmull_lane_s16(x[2], c, 0); + s[5] = vmull_lane_s16(x[3], c, 1); + s[6] = vmull_lane_s16(x[3], c, 3); + s[7] = vaddl_s16(x[0], x[3]); + s[7] = vsubw_s16(s[7], x[2]); + + s[0] = vaddq_s32(s[0], s[3]); + s[0] = vaddq_s32(s[0], s[5]); + s[1] = vsubq_s32(s[1], s[4]); + s[1] = vsubq_s32(s[1], s[6]); + s[3] = s[2]; + s[2] = vmulq_s32(c3, s[7]); + + output[0] = vaddq_s32(s[0], s[3]); + output[1] = vaddq_s32(s[1], s[3]); + output[2] = s[2]; + output[3] = vaddq_s32(s[0], s[1]); + output[3] = vsubq_s32(output[3], s[3]); + dct_const_round_shift_low_8_dual(output, &io[0], &io[1]); +} + +static INLINE void iadst_half_butterfly_neon(int16x8_t *const x, + const int16x4_t c) { + // Don't add/sub before multiply, which will overflow in iadst8. + const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(x[0]), c, 0); + const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(x[0]), c, 0); + const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(x[1]), c, 0); + const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(x[1]), c, 0); + int32x4_t t0[2], t1[2]; + + t0[0] = vaddq_s32(x0_lo, x1_lo); + t0[1] = vaddq_s32(x0_hi, x1_hi); + t1[0] = vsubq_s32(x0_lo, x1_lo); + t1[1] = vsubq_s32(x0_hi, x1_hi); + x[0] = dct_const_round_shift_low_8(t0); + x[1] = dct_const_round_shift_low_8(t1); +} + +static INLINE void iadst_half_butterfly_neg_neon(int16x8_t *const x0, + int16x8_t *const x1, + const int16x4_t c) { + // Don't add/sub before multiply, which will overflow in iadst8. + const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(*x0), c, 1); + const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(*x0), c, 1); + const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(*x1), c, 1); + const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(*x1), c, 1); + int32x4_t t0[2], t1[2]; + + t0[0] = vaddq_s32(x0_lo, x1_lo); + t0[1] = vaddq_s32(x0_hi, x1_hi); + t1[0] = vsubq_s32(x0_lo, x1_lo); + t1[1] = vsubq_s32(x0_hi, x1_hi); + *x1 = dct_const_round_shift_low_8(t0); + *x0 = dct_const_round_shift_low_8(t1); +} + +static INLINE void iadst_half_butterfly_pos_neon(int16x8_t *const x0, + int16x8_t *const x1, + const int16x4_t c) { + // Don't add/sub before multiply, which will overflow in iadst8. + const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(*x0), c, 0); + const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(*x0), c, 0); + const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(*x1), c, 0); + const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(*x1), c, 0); + int32x4_t t0[2], t1[2]; + + t0[0] = vaddq_s32(x0_lo, x1_lo); + t0[1] = vaddq_s32(x0_hi, x1_hi); + t1[0] = vsubq_s32(x0_lo, x1_lo); + t1[1] = vsubq_s32(x0_hi, x1_hi); + *x1 = dct_const_round_shift_low_8(t0); + *x0 = dct_const_round_shift_low_8(t1); +} + +static INLINE void iadst_butterfly_lane_0_1_neon(const int16x8_t in0, + const int16x8_t in1, + const int16x4_t c, + int32x4_t *const s0, + int32x4_t *const s1) { + s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 0); + s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 0); + s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 1); + s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 1); + + s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 1); + s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 1); + s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 0); + s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 0); +} + +static INLINE void iadst_butterfly_lane_2_3_neon(const int16x8_t in0, + const int16x8_t in1, + const int16x4_t c, + int32x4_t *const s0, + int32x4_t *const s1) { + s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 2); + s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 2); + s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 3); + s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 3); + + s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 3); + s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 3); + s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 2); + s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 2); +} + +static INLINE void iadst_butterfly_lane_1_0_neon(const int16x8_t in0, + const int16x8_t in1, + const int16x4_t c, + int32x4_t *const s0, + int32x4_t *const s1) { + s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 1); + s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 1); + s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 0); + s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 0); + + s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 0); + s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 0); + s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 1); + s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 1); +} + +static INLINE void iadst_butterfly_lane_3_2_neon(const int16x8_t in0, + const int16x8_t in1, + const int16x4_t c, + int32x4_t *const s0, + int32x4_t *const s1) { + s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 3); + s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 3); + s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 2); + s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 2); + + s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 2); + s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 2); + s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 3); + s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 3); +} + +static INLINE int16x8_t add_dct_const_round_shift_low_8( + const int32x4_t *const in0, const int32x4_t *const in1) { + int32x4_t sum[2]; + + sum[0] = vaddq_s32(in0[0], in1[0]); + sum[1] = vaddq_s32(in0[1], in1[1]); + return dct_const_round_shift_low_8(sum); +} + +static INLINE int16x8_t sub_dct_const_round_shift_low_8( + const int32x4_t *const in0, const int32x4_t *const in1) { + int32x4_t sum[2]; + + sum[0] = vsubq_s32(in0[0], in1[0]); + sum[1] = vsubq_s32(in0[1], in1[1]); + return dct_const_round_shift_low_8(sum); +} + +static INLINE void iadst8(int16x8_t *const io) { + const int16x4_t c0 = + create_s16x4_neon(cospi_2_64, cospi_30_64, cospi_10_64, cospi_22_64); + const int16x4_t c1 = + create_s16x4_neon(cospi_18_64, cospi_14_64, cospi_26_64, cospi_6_64); + const int16x4_t c2 = + create_s16x4_neon(cospi_16_64, 0, cospi_8_64, cospi_24_64); + int16x8_t x[8], t[4]; + int32x4_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2]; + + x[0] = io[7]; + x[1] = io[0]; + x[2] = io[5]; + x[3] = io[2]; + x[4] = io[3]; + x[5] = io[4]; + x[6] = io[1]; + x[7] = io[6]; + + // stage 1 + iadst_butterfly_lane_0_1_neon(x[0], x[1], c0, s0, s1); + iadst_butterfly_lane_2_3_neon(x[2], x[3], c0, s2, s3); + iadst_butterfly_lane_0_1_neon(x[4], x[5], c1, s4, s5); + iadst_butterfly_lane_2_3_neon(x[6], x[7], c1, s6, s7); + + x[0] = add_dct_const_round_shift_low_8(s0, s4); + x[1] = add_dct_const_round_shift_low_8(s1, s5); + x[2] = add_dct_const_round_shift_low_8(s2, s6); + x[3] = add_dct_const_round_shift_low_8(s3, s7); + x[4] = sub_dct_const_round_shift_low_8(s0, s4); + x[5] = sub_dct_const_round_shift_low_8(s1, s5); + x[6] = sub_dct_const_round_shift_low_8(s2, s6); + x[7] = sub_dct_const_round_shift_low_8(s3, s7); + + // stage 2 + t[0] = x[0]; + t[1] = x[1]; + t[2] = x[2]; + t[3] = x[3]; + iadst_butterfly_lane_2_3_neon(x[4], x[5], c2, s4, s5); + iadst_butterfly_lane_3_2_neon(x[7], x[6], c2, s7, s6); + + x[0] = vaddq_s16(t[0], t[2]); + x[1] = vaddq_s16(t[1], t[3]); + x[2] = vsubq_s16(t[0], t[2]); + x[3] = vsubq_s16(t[1], t[3]); + x[4] = add_dct_const_round_shift_low_8(s4, s6); + x[5] = add_dct_const_round_shift_low_8(s5, s7); + x[6] = sub_dct_const_round_shift_low_8(s4, s6); + x[7] = sub_dct_const_round_shift_low_8(s5, s7); + + // stage 3 + iadst_half_butterfly_neon(x + 2, c2); + iadst_half_butterfly_neon(x + 6, c2); + + io[0] = x[0]; + io[1] = vnegq_s16(x[4]); + io[2] = x[6]; + io[3] = vnegq_s16(x[2]); + io[4] = x[3]; + io[5] = vnegq_s16(x[7]); + io[6] = x[5]; + io[7] = vnegq_s16(x[1]); +} + +void vpx_iadst16x16_256_add_half1d(const void *const input, int16_t *output, + void *const dest, const int stride, + const int highbd_flag); + +typedef void (*iht_1d)(const void *const input, int16_t *output, + void *const dest, const int stride, + const int highbd_flag); + +typedef struct { + iht_1d cols, rows; // vertical and horizontal +} iht_2d; + +#endif // VPX_VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_ diff --git a/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c b/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c index 3e3530116d1b..c031322806b6 100644 --- a/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c +++ b/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c @@ -10,6 +10,7 @@ #include +#include "./vp9_rtcd.h" #include "vp9/common/vp9_enums.h" #include "vpx_dsp/mips/inv_txfm_msa.h" diff --git a/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct4x4_msa.c b/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct4x4_msa.c index 786fbdb794c3..aaccd5ca7b91 100644 --- a/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct4x4_msa.c +++ b/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct4x4_msa.c @@ -10,6 +10,7 @@ #include +#include "./vp9_rtcd.h" #include "vp9/common/vp9_enums.h" #include "vpx_dsp/mips/inv_txfm_msa.h" diff --git a/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct8x8_msa.c b/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct8x8_msa.c index e4166775da7b..76d15ff8c0d2 100644 --- a/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct8x8_msa.c +++ b/media/libvpx/libvpx/vp9/common/mips/msa/vp9_idct8x8_msa.c @@ -10,6 +10,7 @@ #include +#include "./vp9_rtcd.h" #include "vp9/common/vp9_enums.h" #include "vpx_dsp/mips/inv_txfm_msa.h" diff --git a/media/libvpx/libvpx/vp9/common/ppc/vp9_idct_vsx.c b/media/libvpx/libvpx/vp9/common/ppc/vp9_idct_vsx.c new file mode 100644 index 000000000000..e861596ad48f --- /dev/null +++ b/media/libvpx/libvpx/vp9/common/ppc/vp9_idct_vsx.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vp9_rtcd.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_dsp/ppc/inv_txfm_vsx.h" +#include "vpx_dsp/ppc/bitdepth_conversion_vsx.h" + +#include "vp9/common/vp9_enums.h" + +void vp9_iht4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + int16x8_t in[2], out[2]; + + in[0] = load_tran_low(0, input); + in[1] = load_tran_low(8 * sizeof(*input), input); + + switch (tx_type) { + case DCT_DCT: + vpx_idct4_vsx(in, out); + vpx_idct4_vsx(out, in); + break; + case ADST_DCT: + vpx_idct4_vsx(in, out); + vp9_iadst4_vsx(out, in); + break; + case DCT_ADST: + vp9_iadst4_vsx(in, out); + vpx_idct4_vsx(out, in); + break; + default: + assert(tx_type == ADST_ADST); + vp9_iadst4_vsx(in, out); + vp9_iadst4_vsx(out, in); + break; + } + + vpx_round_store4x4_vsx(in, out, dest, stride); +} + +void vp9_iht8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + int16x8_t in[8], out[8]; + + // load input data + in[0] = load_tran_low(0, input); + in[1] = load_tran_low(8 * sizeof(*input), input); + in[2] = load_tran_low(2 * 8 * sizeof(*input), input); + in[3] = load_tran_low(3 * 8 * sizeof(*input), input); + in[4] = load_tran_low(4 * 8 * sizeof(*input), input); + in[5] = load_tran_low(5 * 8 * sizeof(*input), input); + in[6] = load_tran_low(6 * 8 * sizeof(*input), input); + in[7] = load_tran_low(7 * 8 * sizeof(*input), input); + + switch (tx_type) { + case DCT_DCT: + vpx_idct8_vsx(in, out); + vpx_idct8_vsx(out, in); + break; + case ADST_DCT: + vpx_idct8_vsx(in, out); + vp9_iadst8_vsx(out, in); + break; + case DCT_ADST: + vp9_iadst8_vsx(in, out); + vpx_idct8_vsx(out, in); + break; + default: + assert(tx_type == ADST_ADST); + vp9_iadst8_vsx(in, out); + vp9_iadst8_vsx(out, in); + break; + } + + vpx_round_store8x8_vsx(in, dest, stride); +} + +void vp9_iht16x16_256_add_vsx(const tran_low_t *input, uint8_t *dest, + int stride, int tx_type) { + int16x8_t in0[16], in1[16]; + + LOAD_INPUT16(load_tran_low, input, 0, 8 * sizeof(*input), in0); + LOAD_INPUT16(load_tran_low, input, 8 * 8 * 2 * sizeof(*input), + 8 * sizeof(*input), in1); + + switch (tx_type) { + case DCT_DCT: + vpx_idct16_vsx(in0, in1); + vpx_idct16_vsx(in0, in1); + break; + case ADST_DCT: + vpx_idct16_vsx(in0, in1); + vpx_iadst16_vsx(in0, in1); + break; + case DCT_ADST: + vpx_iadst16_vsx(in0, in1); + vpx_idct16_vsx(in0, in1); + break; + default: + assert(tx_type == ADST_ADST); + vpx_iadst16_vsx(in0, in1); + vpx_iadst16_vsx(in0, in1); + break; + } + + vpx_round_store16x16_vsx(in0, in1, dest, stride); +} diff --git a/media/libvpx/libvpx/vp9/common/vp9_alloccommon.c b/media/libvpx/libvpx/vp9/common/vp9_alloccommon.c index 7345e259b6e8..5702dca718e5 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_alloccommon.c +++ b/media/libvpx/libvpx/vp9/common/vp9_alloccommon.c @@ -17,17 +17,26 @@ #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_onyxc_int.h" -void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) { +void vp9_set_mi_size(int *mi_rows, int *mi_cols, int *mi_stride, int width, + int height) { const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2); const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2); + *mi_cols = aligned_width >> MI_SIZE_LOG2; + *mi_rows = aligned_height >> MI_SIZE_LOG2; + *mi_stride = calc_mi_size(*mi_cols); +} - cm->mi_cols = aligned_width >> MI_SIZE_LOG2; - cm->mi_rows = aligned_height >> MI_SIZE_LOG2; - cm->mi_stride = calc_mi_size(cm->mi_cols); +void vp9_set_mb_size(int *mb_rows, int *mb_cols, int *mb_num, int mi_rows, + int mi_cols) { + *mb_cols = (mi_cols + 1) >> 1; + *mb_rows = (mi_rows + 1) >> 1; + *mb_num = (*mb_rows) * (*mb_cols); +} - cm->mb_cols = (cm->mi_cols + 1) >> 1; - cm->mb_rows = (cm->mi_rows + 1) >> 1; - cm->MBs = cm->mb_rows * cm->mb_cols; +void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) { + vp9_set_mi_size(&cm->mi_rows, &cm->mi_cols, &cm->mi_stride, width, height); + vp9_set_mb_size(&cm->mb_rows, &cm->mb_cols, &cm->MBs, cm->mi_rows, + cm->mi_cols); } static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) { diff --git a/media/libvpx/libvpx/vp9/common/vp9_alloccommon.h b/media/libvpx/libvpx/vp9/common/vp9_alloccommon.h index a3a1638572d6..90cbb093d716 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_alloccommon.h +++ b/media/libvpx/libvpx/vp9/common/vp9_alloccommon.h @@ -8,10 +8,10 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_ALLOCCOMMON_H_ -#define VP9_COMMON_VP9_ALLOCCOMMON_H_ +#ifndef VPX_VP9_COMMON_VP9_ALLOCCOMMON_H_ +#define VPX_VP9_COMMON_VP9_ALLOCCOMMON_H_ -#define INVALID_IDX -1 // Invalid buffer index. +#define INVALID_IDX (-1) // Invalid buffer index. #ifdef __cplusplus extern "C" { @@ -33,6 +33,11 @@ void vp9_free_postproc_buffers(struct VP9Common *cm); int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height); void vp9_free_state_buffers(struct VP9Common *cm); +void vp9_set_mi_size(int *mi_rows, int *mi_cols, int *mi_stride, int width, + int height); +void vp9_set_mb_size(int *mb_rows, int *mb_cols, int *mb_num, int mi_rows, + int mi_cols); + void vp9_set_mb_mi(struct VP9Common *cm, int width, int height); void vp9_swap_current_and_last_seg_map(struct VP9Common *cm); @@ -41,4 +46,4 @@ void vp9_swap_current_and_last_seg_map(struct VP9Common *cm); } // extern "C" #endif -#endif // VP9_COMMON_VP9_ALLOCCOMMON_H_ +#endif // VPX_VP9_COMMON_VP9_ALLOCCOMMON_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_blockd.h b/media/libvpx/libvpx/vp9/common/vp9_blockd.h index 780b29208bca..6ef8127a5929 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_blockd.h +++ b/media/libvpx/libvpx/vp9/common/vp9_blockd.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_BLOCKD_H_ -#define VP9_COMMON_VP9_BLOCKD_H_ +#ifndef VPX_VP9_COMMON_VP9_BLOCKD_H_ +#define VPX_VP9_COMMON_VP9_BLOCKD_H_ #include "./vpx_config.h" @@ -54,12 +54,14 @@ typedef struct { // decoder implementation modules critically rely on the defined entry values // specified herein. They should be refactored concurrently. -#define NONE -1 +#define NONE (-1) #define INTRA_FRAME 0 #define LAST_FRAME 1 #define GOLDEN_FRAME 2 #define ALTREF_FRAME 3 #define MAX_REF_FRAMES 4 +#define MAX_INTER_REF_FRAMES 3 + typedef int8_t MV_REFERENCE_FRAME; // This structure now relates to 8x8 block regions. @@ -130,6 +132,8 @@ struct macroblockd_plane { // encoder const int16_t *dequant; + + int *eob; }; #define BLOCK_OFFSET(x, i) ((x) + (i)*16) @@ -173,7 +177,7 @@ typedef struct macroblockd { FRAME_CONTEXT *fc; /* pointers to reference frames */ - RefBuffer *block_refs[2]; + const RefBuffer *block_refs[2]; /* pointer to current frame */ const YV12_BUFFER_CONFIG *cur_buf; @@ -193,6 +197,8 @@ typedef struct macroblockd { int corrupted; struct vpx_internal_error_info *error_info; + + PARTITION_TYPE *partition; } MACROBLOCKD; static INLINE PLANE_TYPE get_plane_type(int plane) { @@ -281,8 +287,30 @@ void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, int aoff, int loff); +#if CONFIG_MISMATCH_DEBUG +#define TX_UNIT_SIZE_LOG2 2 +static INLINE void mi_to_pixel_loc(int *pixel_c, int *pixel_r, int mi_col, + int mi_row, int tx_blk_col, int tx_blk_row, + int subsampling_x, int subsampling_y) { + *pixel_c = ((mi_col << MI_SIZE_LOG2) >> subsampling_x) + + (tx_blk_col << TX_UNIT_SIZE_LOG2); + *pixel_r = ((mi_row << MI_SIZE_LOG2) >> subsampling_y) + + (tx_blk_row << TX_UNIT_SIZE_LOG2); +} + +static INLINE int get_block_width(BLOCK_SIZE bsize) { + const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; + return 4 * num_4x4_w; +} + +static INLINE int get_block_height(BLOCK_SIZE bsize) { + const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; + return 4 * num_4x4_h; +} +#endif + #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_COMMON_VP9_BLOCKD_H_ +#endif // VPX_VP9_COMMON_VP9_BLOCKD_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_common.h b/media/libvpx/libvpx/vp9/common/vp9_common.h index 666c3beaf037..e3c5535ddbaa 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_common.h +++ b/media/libvpx/libvpx/vp9/common/vp9_common.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_COMMON_H_ -#define VP9_COMMON_VP9_COMMON_H_ +#ifndef VPX_VP9_COMMON_VP9_COMMON_H_ +#define VPX_VP9_COMMON_VP9_COMMON_H_ /* Interface header for common constant data structures and lookup tables */ @@ -33,14 +33,14 @@ extern "C" { } // Use this for variably-sized arrays. -#define vp9_copy_array(dest, src, n) \ - { \ - assert(sizeof(*dest) == sizeof(*src)); \ - memcpy(dest, src, n * sizeof(*src)); \ +#define vp9_copy_array(dest, src, n) \ + { \ + assert(sizeof(*(dest)) == sizeof(*(src))); \ + memcpy(dest, src, (n) * sizeof(*(src))); \ } #define vp9_zero(dest) memset(&(dest), 0, sizeof(dest)) -#define vp9_zero_array(dest, n) memset(dest, 0, n * sizeof(*dest)) +#define vp9_zero_array(dest, n) memset(dest, 0, (n) * sizeof(*(dest))) static INLINE int get_unsigned_bits(unsigned int num_values) { return num_values > 0 ? get_msb(num_values) + 1 : 0; @@ -49,8 +49,8 @@ static INLINE int get_unsigned_bits(unsigned int num_values) { #if CONFIG_DEBUG #define CHECK_MEM_ERROR(cm, lval, expr) \ do { \ - lval = (expr); \ - if (!lval) \ + (lval) = (expr); \ + if (!(lval)) \ vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR, \ "Failed to allocate " #lval " at %s:%d", __FILE__, \ __LINE__); \ @@ -58,8 +58,8 @@ static INLINE int get_unsigned_bits(unsigned int num_values) { #else #define CHECK_MEM_ERROR(cm, lval, expr) \ do { \ - lval = (expr); \ - if (!lval) \ + (lval) = (expr); \ + if (!(lval)) \ vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR, \ "Failed to allocate " #lval); \ } while (0) @@ -75,4 +75,4 @@ static INLINE int get_unsigned_bits(unsigned int num_values) { } // extern "C" #endif -#endif // VP9_COMMON_VP9_COMMON_H_ +#endif // VPX_VP9_COMMON_VP9_COMMON_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_common_data.c b/media/libvpx/libvpx/vp9/common/vp9_common_data.c index 4a10833229d0..809d7317cef8 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_common_data.c +++ b/media/libvpx/libvpx/vp9/common/vp9_common_data.c @@ -28,7 +28,7 @@ const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 2, 2, const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8 }; -// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize))) +// VPXMIN(3, VPXMIN(b_width_log2_lookup(bsize), b_height_log2_lookup(bsize))) const uint8_t size_group_lookup[BLOCK_SIZES] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; diff --git a/media/libvpx/libvpx/vp9/common/vp9_common_data.h b/media/libvpx/libvpx/vp9/common/vp9_common_data.h index 5c6a7e8ff3ea..a533c5f058d7 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_common_data.h +++ b/media/libvpx/libvpx/vp9/common/vp9_common_data.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_COMMON_DATA_H_ -#define VP9_COMMON_VP9_COMMON_DATA_H_ +#ifndef VPX_VP9_COMMON_VP9_COMMON_DATA_H_ +#define VPX_VP9_COMMON_VP9_COMMON_DATA_H_ #include "vp9/common/vp9_enums.h" #include "vpx/vpx_integer.h" @@ -42,4 +42,4 @@ extern const uint8_t need_top_left[INTRA_MODES]; } // extern "C" #endif -#endif // VP9_COMMON_VP9_COMMON_DATA_H_ +#endif // VPX_VP9_COMMON_VP9_COMMON_DATA_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_entropy.c b/media/libvpx/libvpx/vp9/common/vp9_entropy.c index a575bda729c7..430b917b8fb9 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_entropy.c +++ b/media/libvpx/libvpx/vp9/common/vp9_entropy.c @@ -42,6 +42,7 @@ const vpx_prob vp9_cat6_prob_high12[] = { 255, 255, 255, 255, 254, 254, 177, 153, 140, 133, 130, 129 }; #endif +/* clang-format off */ const uint8_t vp9_coefband_trans_8x8plus[1024] = { 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, // beyond MAXBAND_INDEX+1 all values are filled as 5 @@ -85,6 +86,7 @@ const uint8_t vp9_coefband_trans_8x8plus[1024] = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, }; +/* clang-format on */ const uint8_t vp9_coefband_trans_4x4[16] = { 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, diff --git a/media/libvpx/libvpx/vp9/common/vp9_entropy.h b/media/libvpx/libvpx/vp9/common/vp9_entropy.h index 1da49116687d..d026651df761 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_entropy.h +++ b/media/libvpx/libvpx/vp9/common/vp9_entropy.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_ENTROPY_H_ -#define VP9_COMMON_VP9_ENTROPY_H_ +#ifndef VPX_VP9_COMMON_VP9_ENTROPY_H_ +#define VPX_VP9_COMMON_VP9_ENTROPY_H_ #include "vpx/vpx_integer.h" #include "vpx_dsp/prob.h" @@ -137,7 +137,6 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) { // 128 lists of probabilities are stored for the following ONE node probs: // 1, 3, 5, 7, ..., 253, 255 // In between probabilities are interpolated linearly - #define COEFF_PROB_MODELS 255 #define UNCONSTRAINED_NODES 3 @@ -195,4 +194,4 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a, } // extern "C" #endif -#endif // VP9_COMMON_VP9_ENTROPY_H_ +#endif // VPX_VP9_COMMON_VP9_ENTROPY_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_entropymode.c b/media/libvpx/libvpx/vp9/common/vp9_entropymode.c index 47cd63e94f92..bda824de3c26 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_entropymode.c +++ b/media/libvpx/libvpx/vp9/common/vp9_entropymode.c @@ -179,29 +179,32 @@ static const vpx_prob default_if_uv_probs[INTRA_MODES][INTRA_MODES - 1] = { { 101, 21, 107, 181, 192, 103, 19, 67, 125 } // y = tm }; -const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS][PARTITION_TYPES - 1] = - { - // 8x8 -> 4x4 - { 158, 97, 94 }, // a/l both not split - { 93, 24, 99 }, // a split, l not split - { 85, 119, 44 }, // l split, a not split - { 62, 59, 67 }, // a/l both split - // 16x16 -> 8x8 - { 149, 53, 53 }, // a/l both not split - { 94, 20, 48 }, // a split, l not split - { 83, 53, 24 }, // l split, a not split - { 52, 18, 18 }, // a/l both split - // 32x32 -> 16x16 - { 150, 40, 39 }, // a/l both not split - { 78, 12, 26 }, // a split, l not split - { 67, 33, 11 }, // l split, a not split - { 24, 7, 5 }, // a/l both split - // 64x64 -> 32x32 - { 174, 35, 49 }, // a/l both not split - { 68, 11, 27 }, // a split, l not split - { 57, 15, 9 }, // l split, a not split - { 12, 3, 3 }, // a/l both split - }; +const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS] + [PARTITION_TYPES - 1] = { + // 8x8 -> 4x4 + { 158, 97, 94 }, // a/l both not split + { 93, 24, 99 }, // a split, l not split + { 85, 119, 44 }, // l split, a not split + { 62, 59, 67 }, // a/l both split + + // 16x16 -> 8x8 + { 149, 53, 53 }, // a/l both not split + { 94, 20, 48 }, // a split, l not split + { 83, 53, 24 }, // l split, a not split + { 52, 18, 18 }, // a/l both split + + // 32x32 -> 16x16 + { 150, 40, 39 }, // a/l both not split + { 78, 12, 26 }, // a split, l not split + { 67, 33, 11 }, // l split, a not split + { 24, 7, 5 }, // a/l both split + + // 64x64 -> 32x32 + { 174, 35, 49 }, // a/l both not split + { 68, 11, 27 }, // a split, l not split + { 57, 15, 9 }, // l split, a not split + { 12, 3, 3 }, // a/l both split + }; static const vpx_prob default_partition_probs[PARTITION_CONTEXTS][PARTITION_TYPES - 1] = { @@ -260,13 +263,13 @@ const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)] = { -PARTITION_NONE, 2, -PARTITION_HORZ, 4, -PARTITION_VERT, -PARTITION_SPLIT }; -static const vpx_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = { - 9, 102, 187, 225 -}; +static const vpx_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = { 9, 102, + 187, + 225 }; -static const vpx_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = { - 239, 183, 119, 96, 41 -}; +static const vpx_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = { 239, 183, + 119, 96, + 41 }; static const vpx_prob default_comp_ref_p[REF_CONTEXTS] = { 50, 126, 123, 221, 226 }; @@ -331,8 +334,8 @@ static void init_mode_probs(FRAME_CONTEXT *fc) { vp9_copy(fc->inter_mode_probs, default_inter_mode_probs); } -const vpx_tree_index vp9_switchable_interp_tree[TREE_SIZE(SWITCHABLE_FILTERS)] = - { -EIGHTTAP, 2, -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP }; +const vpx_tree_index vp9_switchable_interp_tree[TREE_SIZE( + SWITCHABLE_FILTERS)] = { -EIGHTTAP, 2, -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP }; void vp9_adapt_mode_probs(VP9_COMMON *cm) { int i, j; diff --git a/media/libvpx/libvpx/vp9/common/vp9_entropymode.h b/media/libvpx/libvpx/vp9/common/vp9_entropymode.h index 0ee663fe8835..a756c8d0b8c5 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_entropymode.h +++ b/media/libvpx/libvpx/vp9/common/vp9_entropymode.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_ENTROPYMODE_H_ -#define VP9_COMMON_VP9_ENTROPYMODE_H_ +#ifndef VPX_VP9_COMMON_VP9_ENTROPYMODE_H_ +#define VPX_VP9_COMMON_VP9_ENTROPYMODE_H_ #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymv.h" @@ -104,4 +104,4 @@ void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, } // extern "C" #endif -#endif // VP9_COMMON_VP9_ENTROPYMODE_H_ +#endif // VPX_VP9_COMMON_VP9_ENTROPYMODE_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_entropymv.c b/media/libvpx/libvpx/vp9/common/vp9_entropymv.c index a18a290cfd03..b6f052d088e7 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_entropymv.c +++ b/media/libvpx/libvpx/vp9/common/vp9_entropymv.c @@ -22,9 +22,7 @@ const vpx_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = { 18, -MV_CLASS_7, -MV_CLASS_8, -MV_CLASS_9, -MV_CLASS_10, }; -const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { - -0, -1, -}; +const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { -0, -1 }; const vpx_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2, -1, 4, -2, -3 }; diff --git a/media/libvpx/libvpx/vp9/common/vp9_entropymv.h b/media/libvpx/libvpx/vp9/common/vp9_entropymv.h index e2fe37a327af..ee9d37973ff7 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_entropymv.h +++ b/media/libvpx/libvpx/vp9/common/vp9_entropymv.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_ENTROPYMV_H_ -#define VP9_COMMON_VP9_ENTROPYMV_H_ +#ifndef VPX_VP9_COMMON_VP9_ENTROPYMV_H_ +#define VPX_VP9_COMMON_VP9_ENTROPYMV_H_ #include "./vpx_config.h" @@ -25,7 +25,7 @@ struct VP9Common; void vp9_init_mv_probs(struct VP9Common *cm); -void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp); +void vp9_adapt_mv_probs(struct VP9Common *cm, int allow_hp); static INLINE int use_mv_hp(const MV *ref) { const int kMvRefThresh = 64; // threshold for use of high-precision 1/8 mv @@ -127,10 +127,10 @@ typedef struct { nmv_component_counts comps[2]; } nmv_context_counts; -void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx); +void vp9_inc_mv(const MV *mv, nmv_context_counts *counts); #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_COMMON_VP9_ENTROPYMV_H_ +#endif // VPX_VP9_COMMON_VP9_ENTROPYMV_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_enums.h b/media/libvpx/libvpx/vp9/common/vp9_enums.h index 056b298b3dbf..b33a3a2978ce 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_enums.h +++ b/media/libvpx/libvpx/vp9/common/vp9_enums.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_ENUMS_H_ -#define VP9_COMMON_VP9_ENUMS_H_ +#ifndef VPX_VP9_COMMON_VP9_ENUMS_H_ +#define VPX_VP9_COMMON_VP9_ENUMS_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" @@ -41,6 +41,8 @@ typedef enum BITSTREAM_PROFILE { MAX_PROFILES } BITSTREAM_PROFILE; +typedef enum PARSE_RECON_FLAG { PARSE = 1, RECON = 2 } PARSE_RECON_FLAG; + #define BLOCK_4X4 0 #define BLOCK_4X8 1 #define BLOCK_8X4 2 @@ -140,4 +142,4 @@ typedef uint8_t PREDICTION_MODE; } // extern "C" #endif -#endif // VP9_COMMON_VP9_ENUMS_H_ +#endif // VPX_VP9_COMMON_VP9_ENUMS_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_filter.c b/media/libvpx/libvpx/vp9/common/vp9_filter.c index 6c43af8ce802..adbda6c825b0 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_filter.c +++ b/media/libvpx/libvpx/vp9/common/vp9_filter.c @@ -63,6 +63,20 @@ DECLARE_ALIGNED(256, static const InterpKernel, { 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 1, 38, 64, 32, -1, -3 } }; -const InterpKernel *vp9_filter_kernels[4] = { - sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters +// 4-tap filter +DECLARE_ALIGNED(256, static const InterpKernel, + sub_pel_filters_4[SUBPEL_SHIFTS]) = { + { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -4, 126, 8, -2, 0, 0 }, + { 0, 0, -6, 120, 18, -4, 0, 0 }, { 0, 0, -8, 114, 28, -6, 0, 0 }, + { 0, 0, -10, 108, 36, -6, 0, 0 }, { 0, 0, -12, 102, 46, -8, 0, 0 }, + { 0, 0, -12, 94, 56, -10, 0, 0 }, { 0, 0, -12, 84, 66, -10, 0, 0 }, + { 0, 0, -12, 76, 76, -12, 0, 0 }, { 0, 0, -10, 66, 84, -12, 0, 0 }, + { 0, 0, -10, 56, 94, -12, 0, 0 }, { 0, 0, -8, 46, 102, -12, 0, 0 }, + { 0, 0, -6, 36, 108, -10, 0, 0 }, { 0, 0, -6, 28, 114, -8, 0, 0 }, + { 0, 0, -4, 18, 120, -6, 0, 0 }, { 0, 0, -2, 8, 126, -4, 0, 0 } +}; + +const InterpKernel *vp9_filter_kernels[5] = { + sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters, + sub_pel_filters_4 }; diff --git a/media/libvpx/libvpx/vp9/common/vp9_filter.h b/media/libvpx/libvpx/vp9/common/vp9_filter.h index 9d2b8e1dbf27..0382c88e7c06 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_filter.h +++ b/media/libvpx/libvpx/vp9/common/vp9_filter.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_FILTER_H_ -#define VP9_COMMON_VP9_FILTER_H_ +#ifndef VPX_VP9_COMMON_VP9_FILTER_H_ +#define VPX_VP9_COMMON_VP9_FILTER_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" @@ -25,6 +25,7 @@ extern "C" { #define EIGHTTAP_SHARP 2 #define SWITCHABLE_FILTERS 3 /* Number of switchable filters */ #define BILINEAR 3 +#define FOURTAP 4 // The codec can operate in four possible inter prediction filter mode: // 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three. #define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1) @@ -32,10 +33,10 @@ extern "C" { typedef uint8_t INTERP_FILTER; -extern const InterpKernel *vp9_filter_kernels[4]; +extern const InterpKernel *vp9_filter_kernels[5]; #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_COMMON_VP9_FILTER_H_ +#endif // VPX_VP9_COMMON_VP9_FILTER_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_frame_buffers.h b/media/libvpx/libvpx/vp9/common/vp9_frame_buffers.h index e2cfe61b6621..11be838c0205 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_frame_buffers.h +++ b/media/libvpx/libvpx/vp9/common/vp9_frame_buffers.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_FRAME_BUFFERS_H_ -#define VP9_COMMON_VP9_FRAME_BUFFERS_H_ +#ifndef VPX_VP9_COMMON_VP9_FRAME_BUFFERS_H_ +#define VPX_VP9_COMMON_VP9_FRAME_BUFFERS_H_ #include "vpx/vpx_frame_buffer.h" #include "vpx/vpx_integer.h" @@ -50,4 +50,4 @@ int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb); } // extern "C" #endif -#endif // VP9_COMMON_VP9_FRAME_BUFFERS_H_ +#endif // VPX_VP9_COMMON_VP9_FRAME_BUFFERS_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_idct.h b/media/libvpx/libvpx/vp9/common/vp9_idct.h index 3e83b8402de0..94eeaf599e10 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_idct.h +++ b/media/libvpx/libvpx/vp9/common/vp9_idct.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_IDCT_H_ -#define VP9_COMMON_VP9_IDCT_H_ +#ifndef VPX_VP9_COMMON_VP9_IDCT_H_ +#define VPX_VP9_COMMON_VP9_IDCT_H_ #include @@ -78,4 +78,4 @@ void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, } // extern "C" #endif -#endif // VP9_COMMON_VP9_IDCT_H_ +#endif // VPX_VP9_COMMON_VP9_IDCT_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_loopfilter.c b/media/libvpx/libvpx/vp9/common/vp9_loopfilter.c index c7c343aed5d7..95d6029f3b53 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_loopfilter.c +++ b/media/libvpx/libvpx/vp9/common/vp9_loopfilter.c @@ -880,12 +880,12 @@ void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, // This function sets up the bit masks for the entire 64x64 region represented // by mi_row, mi_col. void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, - MODE_INFO **mi, const int mode_info_stride, + MODE_INFO **mi8x8, const int mode_info_stride, LOOP_FILTER_MASK *lfm) { int idx_32, idx_16, idx_8; const loop_filter_info_n *const lfi_n = &cm->lf_info; - MODE_INFO **mip = mi; - MODE_INFO **mip2 = mi; + MODE_INFO **mip = mi8x8; + MODE_INFO **mip2 = mi8x8; // These are offsets to the next mi in the 64x64 block. It is what gets // added to the mi ptr as we go through each loop. It helps us to avoid @@ -1087,13 +1087,19 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, const int row_step_stride = cm->mi_stride * row_step; struct buf_2d *const dst = &plane->dst; uint8_t *const dst0 = dst->buf; - unsigned int mask_16x16[MI_BLOCK_SIZE] = { 0 }; - unsigned int mask_8x8[MI_BLOCK_SIZE] = { 0 }; - unsigned int mask_4x4[MI_BLOCK_SIZE] = { 0 }; - unsigned int mask_4x4_int[MI_BLOCK_SIZE] = { 0 }; + unsigned int mask_16x16[MI_BLOCK_SIZE]; + unsigned int mask_8x8[MI_BLOCK_SIZE]; + unsigned int mask_4x4[MI_BLOCK_SIZE]; + unsigned int mask_4x4_int[MI_BLOCK_SIZE]; uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; int r, c; + vp9_zero(mask_16x16); + vp9_zero(mask_8x8); + vp9_zero(mask_4x4); + vp9_zero(mask_4x4_int); + vp9_zero(lfl); + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { unsigned int mask_16x16_c = 0; unsigned int mask_8x8_c = 0; @@ -1174,7 +1180,7 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, } // Disable filtering on the leftmost column - border_mask = ~(mi_col == 0); + border_mask = ~(mi_col == 0 ? 1 : 0); #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { highbd_filter_selectively_vert( @@ -1330,6 +1336,8 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; uint16_t mask_4x4_int = lfm->int_4x4_uv; + vp9_zero(lfl_uv); + assert(plane->subsampling_x == 1 && plane->subsampling_y == 1); // Vertical pass: do 2 rows at one time diff --git a/media/libvpx/libvpx/vp9/common/vp9_loopfilter.h b/media/libvpx/libvpx/vp9/common/vp9_loopfilter.h index 481a6cdc63cd..39648a72c324 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_loopfilter.h +++ b/media/libvpx/libvpx/vp9/common/vp9_loopfilter.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_LOOPFILTER_H_ -#define VP9_COMMON_VP9_LOOPFILTER_H_ +#ifndef VPX_VP9_COMMON_VP9_LOOPFILTER_H_ +#define VPX_VP9_COMMON_VP9_LOOPFILTER_H_ #include "vpx_ports/mem.h" #include "./vpx_config.h" @@ -97,7 +97,7 @@ struct VP9LfSyncData; // This function sets up the bit masks for the entire 64x64 region represented // by mi_row, mi_col. void vp9_setup_mask(struct VP9Common *const cm, const int mi_row, - const int mi_col, MODE_INFO **mi_8x8, + const int mi_col, MODE_INFO **mi8x8, const int mode_info_stride, LOOP_FILTER_MASK *lfm); void vp9_filter_block_plane_ss00(struct VP9Common *const cm, @@ -120,7 +120,7 @@ void vp9_loop_filter_init(struct VP9Common *cm); void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl); void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm, - struct macroblockd *mbd, int filter_level, + struct macroblockd *xd, int frame_filter_level, int y_only, int partial_frame); // Get the superblock lfm for a given mi_row, mi_col. @@ -157,4 +157,4 @@ int vp9_loop_filter_worker(void *arg1, void *unused); } // extern "C" #endif -#endif // VP9_COMMON_VP9_LOOPFILTER_H_ +#endif // VPX_VP9_COMMON_VP9_LOOPFILTER_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_mfqe.h b/media/libvpx/libvpx/vp9/common/vp9_mfqe.h index dfff8c23d658..f53e1c2f9d9e 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_mfqe.h +++ b/media/libvpx/libvpx/vp9/common/vp9_mfqe.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_MFQE_H_ -#define VP9_COMMON_VP9_MFQE_H_ +#ifndef VPX_VP9_COMMON_VP9_MFQE_H_ +#define VPX_VP9_COMMON_VP9_MFQE_H_ #ifdef __cplusplus extern "C" { @@ -28,4 +28,4 @@ void vp9_mfqe(struct VP9Common *cm); } // extern "C" #endif -#endif // VP9_COMMON_VP9_MFQE_H_ +#endif // VPX_VP9_COMMON_VP9_MFQE_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_mv.h b/media/libvpx/libvpx/vp9/common/vp9_mv.h index 4c8eac7213de..76f93cf0baf7 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_mv.h +++ b/media/libvpx/libvpx/vp9/common/vp9_mv.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_MV_H_ -#define VP9_COMMON_VP9_MV_H_ +#ifndef VPX_VP9_COMMON_VP9_MV_H_ +#define VPX_VP9_COMMON_VP9_MV_H_ #include "vpx/vpx_integer.h" @@ -19,6 +19,8 @@ extern "C" { #endif +#define INVALID_MV 0x80008000 + typedef struct mv { int16_t row; int16_t col; @@ -52,4 +54,4 @@ static INLINE void clamp_mv(MV *mv, int min_col, int max_col, int min_row, } // extern "C" #endif -#endif // VP9_COMMON_VP9_MV_H_ +#endif // VPX_VP9_COMMON_VP9_MV_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_mvref_common.h b/media/libvpx/libvpx/vp9/common/vp9_mvref_common.h index 2b2c1ba9eed0..5db6772dca51 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_mvref_common.h +++ b/media/libvpx/libvpx/vp9/common/vp9_mvref_common.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_ -#define VP9_COMMON_VP9_MVREF_COMMON_H_ +#ifndef VPX_VP9_COMMON_VP9_MVREF_COMMON_H_ +#define VPX_VP9_COMMON_VP9_MVREF_COMMON_H_ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_blockd.h" @@ -263,10 +263,10 @@ static INLINE int_mv scale_mv(const MODE_INFO *mi, int ref, mv_ref_list, Done) \ do { \ if (is_inter_block(mbmi)) { \ - if ((mbmi)->ref_frame[0] != ref_frame) \ + if ((mbmi)->ref_frame[0] != (ref_frame)) \ ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ refmv_count, mv_ref_list, Done); \ - if (has_second_ref(mbmi) && (mbmi)->ref_frame[1] != ref_frame && \ + if (has_second_ref(mbmi) && (mbmi)->ref_frame[1] != (ref_frame) && \ (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ refmv_count, mv_ref_list, Done); \ @@ -320,4 +320,4 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, int block, } // extern "C" #endif -#endif // VP9_COMMON_VP9_MVREF_COMMON_H_ +#endif // VPX_VP9_COMMON_VP9_MVREF_COMMON_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_onyxc_int.h b/media/libvpx/libvpx/vp9/common/vp9_onyxc_int.h index 1d96d92c2448..f3942a8f070f 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_onyxc_int.h +++ b/media/libvpx/libvpx/vp9/common/vp9_onyxc_int.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_ONYXC_INT_H_ -#define VP9_COMMON_VP9_ONYXC_INT_H_ +#ifndef VPX_VP9_COMMON_VP9_ONYXC_INT_H_ +#define VPX_VP9_COMMON_VP9_ONYXC_INT_H_ #include "./vpx_config.h" #include "vpx/internal/vpx_codec_internal.h" @@ -37,10 +37,9 @@ extern "C" { #define REF_FRAMES_LOG2 3 #define REF_FRAMES (1 << REF_FRAMES_LOG2) -// 1 scratch frame for the new frame, 3 for scaled references on the encoder. -// TODO(jkoleszar): These 3 extra references could probably come from the -// normal reference pool. -#define FRAME_BUFFERS (REF_FRAMES + 4) +// 1 scratch frame for the new frame, REFS_PER_FRAME for scaled references on +// the encoder. +#define FRAME_BUFFERS (REF_FRAMES + 1 + REFS_PER_FRAME) #define FRAME_CONTEXTS_LOG2 2 #define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2) @@ -70,6 +69,7 @@ typedef struct { int mi_rows; int mi_cols; uint8_t released; + int frame_index; vpx_codec_frame_buffer_t raw_frame_buffer; YV12_BUFFER_CONFIG buf; } RefCntBuffer; @@ -128,6 +128,8 @@ typedef struct VP9Common { int new_fb_idx; + int cur_show_frame_fb_idx; + #if CONFIG_VP9_POSTPROC YV12_BUFFER_CONFIG post_proc_buffer; YV12_BUFFER_CONFIG post_proc_buffer_int; @@ -242,22 +244,50 @@ typedef struct VP9Common { int byte_alignment; int skip_loop_filter; - // Private data associated with the frame buffer callbacks. - void *cb_priv; - vpx_get_frame_buffer_cb_fn_t get_fb_cb; - vpx_release_frame_buffer_cb_fn_t release_fb_cb; - - // Handles memory for the codec. - InternalFrameBufferList int_frame_buffers; - // External BufferPool passed from outside. BufferPool *buffer_pool; PARTITION_CONTEXT *above_seg_context; ENTROPY_CONTEXT *above_context; int above_context_alloc_cols; + + int lf_row; } VP9_COMMON; +typedef struct { + int frame_width; + int frame_height; + int render_frame_width; + int render_frame_height; + int mi_rows; + int mi_cols; + int mb_rows; + int mb_cols; + int num_mbs; + vpx_bit_depth_t bit_depth; +} FRAME_INFO; + +static INLINE void init_frame_info(FRAME_INFO *frame_info, + const VP9_COMMON *cm) { + frame_info->frame_width = cm->width; + frame_info->frame_height = cm->height; + frame_info->render_frame_width = cm->render_width; + frame_info->render_frame_height = cm->render_height; + frame_info->mi_cols = cm->mi_cols; + frame_info->mi_rows = cm->mi_rows; + frame_info->mb_cols = cm->mb_cols; + frame_info->mb_rows = cm->mb_rows; + frame_info->num_mbs = cm->MBs; + frame_info->bit_depth = cm->bit_depth; + // TODO(angiebird): Figure out how to get subsampling_x/y here +} + +static INLINE YV12_BUFFER_CONFIG *get_buf_frame(VP9_COMMON *cm, int index) { + if (index < 0 || index >= FRAME_BUFFERS) return NULL; + if (cm->error.error_code != VPX_CODEC_OK) return NULL; + return &cm->buffer_pool->frame_bufs[index].buf; +} + static INLINE YV12_BUFFER_CONFIG *get_ref_frame(VP9_COMMON *cm, int index) { if (index < 0 || index >= REF_FRAMES) return NULL; if (cm->ref_frame_map[index] < 0) return NULL; @@ -405,4 +435,4 @@ static INLINE int partition_plane_context(const MACROBLOCKD *xd, int mi_row, } // extern "C" #endif -#endif // VP9_COMMON_VP9_ONYXC_INT_H_ +#endif // VPX_VP9_COMMON_VP9_ONYXC_INT_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_postproc.c b/media/libvpx/libvpx/vp9/common/vp9_postproc.c index dfc315eeacf8..d2c8535b01c6 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_postproc.c +++ b/media/libvpx/libvpx/vp9/common/vp9_postproc.c @@ -183,7 +183,8 @@ void vp9_highbd_mbpost_proc_down_c(uint16_t *dst, int pitch, int rows, int cols, } #endif // CONFIG_VP9_HIGHBITDEPTH -static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, +static void deblock_and_de_macro_block(VP9_COMMON *cm, + YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, int flag, uint8_t *limits) { @@ -216,7 +217,7 @@ static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, source->uv_height, source->uv_width, ppl); } else { #endif // CONFIG_VP9_HIGHBITDEPTH - vp9_deblock(source, post, q, limits); + vp9_deblock(cm, source, post, q, limits); vpx_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q)); vpx_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, @@ -226,8 +227,8 @@ static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source, #endif // CONFIG_VP9_HIGHBITDEPTH } -void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, - uint8_t *limits) { +void vp9_deblock(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits) { const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q + 0.0065 + 0.5); #if CONFIG_VP9_HIGHBITDEPTH @@ -252,9 +253,8 @@ void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, } else { #endif // CONFIG_VP9_HIGHBITDEPTH int mbr; - const int mb_rows = src->y_height / 16; - const int mb_cols = src->y_width / 16; - + const int mb_rows = cm->mb_rows; + const int mb_cols = cm->mb_cols; memset(limits, (unsigned char)ppl, 16 * mb_cols); for (mbr = 0; mbr < mb_rows; mbr++) { @@ -276,9 +276,9 @@ void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, #endif // CONFIG_VP9_HIGHBITDEPTH } -void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, - uint8_t *limits) { - vp9_deblock(src, dst, q, limits); +void vp9_denoise(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits) { + vp9_deblock(cm, src, dst, q, limits); } static void swap_mi_and_prev_mi(VP9_COMMON *cm) { @@ -293,7 +293,7 @@ static void swap_mi_and_prev_mi(VP9_COMMON *cm) { } int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest, - vp9_ppflags_t *ppflags) { + vp9_ppflags_t *ppflags, int unscaled_width) { const int q = VPXMIN(105, cm->lf.filter_level * 2); const int flags = ppflags->post_proc_flag; YV12_BUFFER_CONFIG *const ppbuf = &cm->post_proc_buffer; @@ -359,7 +359,7 @@ int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest, if (flags & (VP9D_DEMACROBLOCK | VP9D_DEBLOCK)) { if (!cm->postproc_state.limits) { cm->postproc_state.limits = - vpx_calloc(cm->width, sizeof(*cm->postproc_state.limits)); + vpx_calloc(unscaled_width, sizeof(*cm->postproc_state.limits)); } } @@ -383,21 +383,21 @@ int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest, vpx_yv12_copy_frame(ppbuf, &cm->post_proc_buffer_int); } if ((flags & VP9D_DEMACROBLOCK) && cm->post_proc_buffer_int.buffer_alloc) { - deblock_and_de_macro_block(&cm->post_proc_buffer_int, ppbuf, + deblock_and_de_macro_block(cm, &cm->post_proc_buffer_int, ppbuf, q + (ppflags->deblocking_level - 5) * 10, 1, 0, cm->postproc_state.limits); } else if (flags & VP9D_DEBLOCK) { - vp9_deblock(&cm->post_proc_buffer_int, ppbuf, q, + vp9_deblock(cm, &cm->post_proc_buffer_int, ppbuf, q, cm->postproc_state.limits); } else { vpx_yv12_copy_frame(&cm->post_proc_buffer_int, ppbuf); } } else if (flags & VP9D_DEMACROBLOCK) { - deblock_and_de_macro_block(cm->frame_to_show, ppbuf, + deblock_and_de_macro_block(cm, cm->frame_to_show, ppbuf, q + (ppflags->deblocking_level - 5) * 10, 1, 0, cm->postproc_state.limits); } else if (flags & VP9D_DEBLOCK) { - vp9_deblock(cm->frame_to_show, ppbuf, q, cm->postproc_state.limits); + vp9_deblock(cm, cm->frame_to_show, ppbuf, q, cm->postproc_state.limits); } else { vpx_yv12_copy_frame(cm->frame_to_show, ppbuf); } diff --git a/media/libvpx/libvpx/vp9/common/vp9_postproc.h b/media/libvpx/libvpx/vp9/common/vp9_postproc.h index 6059094114b9..bbe3aed8353f 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_postproc.h +++ b/media/libvpx/libvpx/vp9/common/vp9_postproc.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_POSTPROC_H_ -#define VP9_COMMON_VP9_POSTPROC_H_ +#ifndef VPX_VP9_COMMON_VP9_POSTPROC_H_ +#define VPX_VP9_COMMON_VP9_POSTPROC_H_ #include "vpx_ports/mem.h" #include "vpx_scale/yv12config.h" @@ -38,16 +38,16 @@ struct VP9Common; #define MFQE_PRECISION 4 int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest, - vp9_ppflags_t *flags); + vp9_ppflags_t *ppflags, int unscaled_width); -void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, - uint8_t *limits); +void vp9_denoise(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits); -void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, - uint8_t *limits); +void vp9_deblock(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits); #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_COMMON_VP9_POSTPROC_H_ +#endif // VPX_VP9_COMMON_VP9_POSTPROC_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_ppflags.h b/media/libvpx/libvpx/vp9/common/vp9_ppflags.h index b8b647bf18d8..a0e301762691 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_ppflags.h +++ b/media/libvpx/libvpx/vp9/common/vp9_ppflags.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_PPFLAGS_H_ -#define VP9_COMMON_VP9_PPFLAGS_H_ +#ifndef VPX_VP9_COMMON_VP9_PPFLAGS_H_ +#define VPX_VP9_COMMON_VP9_PPFLAGS_H_ #ifdef __cplusplus extern "C" { @@ -33,4 +33,4 @@ typedef struct { } // extern "C" #endif -#endif // VP9_COMMON_VP9_PPFLAGS_H_ +#endif // VPX_VP9_COMMON_VP9_PPFLAGS_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_pred_common.c b/media/libvpx/libvpx/vp9/common/vp9_pred_common.c index a7ddc0b95151..375cb4d76c79 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_pred_common.c +++ b/media/libvpx/libvpx/vp9/common/vp9_pred_common.c @@ -13,6 +13,32 @@ #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_seg_common.h" +int vp9_compound_reference_allowed(const VP9_COMMON *cm) { + int i; + for (i = 1; i < REFS_PER_FRAME; ++i) + if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) return 1; + + return 0; +} + +void vp9_setup_compound_reference_mode(VP9_COMMON *cm) { + if (cm->ref_frame_sign_bias[LAST_FRAME] == + cm->ref_frame_sign_bias[GOLDEN_FRAME]) { + cm->comp_fixed_ref = ALTREF_FRAME; + cm->comp_var_ref[0] = LAST_FRAME; + cm->comp_var_ref[1] = GOLDEN_FRAME; + } else if (cm->ref_frame_sign_bias[LAST_FRAME] == + cm->ref_frame_sign_bias[ALTREF_FRAME]) { + cm->comp_fixed_ref = GOLDEN_FRAME; + cm->comp_var_ref[0] = LAST_FRAME; + cm->comp_var_ref[1] = ALTREF_FRAME; + } else { + cm->comp_fixed_ref = LAST_FRAME; + cm->comp_var_ref[0] = GOLDEN_FRAME; + cm->comp_var_ref[1] = ALTREF_FRAME; + } +} + int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd) { int ctx; @@ -229,9 +255,8 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { else pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); } else { - pred_context = 1 + - 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || - edge_mi->ref_frame[1] == GOLDEN_FRAME); + pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || + edge_mi->ref_frame[1] == GOLDEN_FRAME); } } else { // inter/inter const int above_has_second = has_second_ref(above_mi); diff --git a/media/libvpx/libvpx/vp9/common/vp9_pred_common.h b/media/libvpx/libvpx/vp9/common/vp9_pred_common.h index 8400bd70f1d5..ee59669359c5 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_pred_common.h +++ b/media/libvpx/libvpx/vp9/common/vp9_pred_common.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_PRED_COMMON_H_ -#define VP9_COMMON_VP9_PRED_COMMON_H_ +#ifndef VPX_VP9_COMMON_VP9_PRED_COMMON_H_ +#define VPX_VP9_COMMON_VP9_PRED_COMMON_H_ #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_onyxc_int.h" @@ -145,6 +145,10 @@ static INLINE vpx_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm, return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p2(xd)][1]; } +int vp9_compound_reference_allowed(const VP9_COMMON *cm); + +void vp9_setup_compound_reference_mode(VP9_COMMON *cm); + // Returns a context number for the given MB prediction signal // The mode info data structure has a one element border above and to the // left of the entries corresponding to real blocks. @@ -176,12 +180,6 @@ static INLINE const vpx_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx, } } -static INLINE const vpx_prob *get_tx_probs2(TX_SIZE max_tx_size, - const MACROBLOCKD *xd, - const struct tx_probs *tx_probs) { - return get_tx_probs(max_tx_size, get_tx_size_context(xd), tx_probs); -} - static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx, struct tx_counts *tx_counts) { switch (max_tx_size) { @@ -196,4 +194,4 @@ static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx, } // extern "C" #endif -#endif // VP9_COMMON_VP9_PRED_COMMON_H_ +#endif // VPX_VP9_COMMON_VP9_PRED_COMMON_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_quant_common.h b/media/libvpx/libvpx/vp9/common/vp9_quant_common.h index 4bae4a89677a..ec8b9f4c6a74 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_quant_common.h +++ b/media/libvpx/libvpx/vp9/common/vp9_quant_common.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_QUANT_COMMON_H_ -#define VP9_COMMON_VP9_QUANT_COMMON_H_ +#ifndef VPX_VP9_COMMON_VP9_QUANT_COMMON_H_ +#define VPX_VP9_COMMON_VP9_QUANT_COMMON_H_ #include "vpx/vpx_codec.h" #include "vp9/common/vp9_seg_common.h" @@ -33,4 +33,4 @@ int vp9_get_qindex(const struct segmentation *seg, int segment_id, } // extern "C" #endif -#endif // VP9_COMMON_VP9_QUANT_COMMON_H_ +#endif // VPX_VP9_COMMON_VP9_QUANT_COMMON_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_reconinter.c b/media/libvpx/libvpx/vp9/common/vp9_reconinter.c index a108a65153bf..ff59ff50423d 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_reconinter.c +++ b/media/libvpx/libvpx/vp9/common/vp9_reconinter.c @@ -63,14 +63,14 @@ static INLINE int round_mv_comp_q4(int value) { } static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) { - MV res = { - round_mv_comp_q4( - mi->bmi[0].as_mv[idx].as_mv.row + mi->bmi[1].as_mv[idx].as_mv.row + - mi->bmi[2].as_mv[idx].as_mv.row + mi->bmi[3].as_mv[idx].as_mv.row), - round_mv_comp_q4( - mi->bmi[0].as_mv[idx].as_mv.col + mi->bmi[1].as_mv[idx].as_mv.col + - mi->bmi[2].as_mv[idx].as_mv.col + mi->bmi[3].as_mv[idx].as_mv.col) - }; + MV res = { round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.row + + mi->bmi[1].as_mv[idx].as_mv.row + + mi->bmi[2].as_mv[idx].as_mv.row + + mi->bmi[3].as_mv[idx].as_mv.row), + round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col + + mi->bmi[1].as_mv[idx].as_mv.col + + mi->bmi[2].as_mv[idx].as_mv.col + + mi->bmi[3].as_mv[idx].as_mv.col) }; return res; } @@ -96,8 +96,8 @@ MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, int bw, const int spel_right = spel_left - SUBPEL_SHIFTS; const int spel_top = (VP9_INTERP_EXTEND + bh) << SUBPEL_BITS; const int spel_bottom = spel_top - SUBPEL_SHIFTS; - MV clamped_mv = { src_mv->row * (1 << (1 - ss_y)), - src_mv->col * (1 << (1 - ss_x)) }; + MV clamped_mv = { (short)(src_mv->row * (1 << (1 - ss_y))), + (short)(src_mv->col * (1 << (1 - ss_x))) }; assert(ss_x <= 1); assert(ss_y <= 1); @@ -136,7 +136,7 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, const struct scale_factors *const sf = &xd->block_refs[ref]->sf; struct buf_2d *const pre_buf = &pd->pre[ref]; struct buf_2d *const dst_buf = &pd->dst; - uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; + uint8_t *const dst = dst_buf->buf + (int64_t)dst_buf->stride * y + x; const MV mv = mi->sb_type < BLOCK_8X8 ? average_split_mvs(pd, mi, ref, block) : mi->mv[ref].as_mv; @@ -178,7 +178,7 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, xs = sf->x_step_q4; ys = sf->y_step_q4; } else { - pre = pre_buf->buf + (y * pre_buf->stride + x); + pre = pre_buf->buf + ((int64_t)y * pre_buf->stride + x); scaled_mv.row = mv_q4.row; scaled_mv.col = mv_q4.col; xs = ys = 16; diff --git a/media/libvpx/libvpx/vp9/common/vp9_reconinter.h b/media/libvpx/libvpx/vp9/common/vp9_reconinter.h index bb9291a26464..12b545831a89 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_reconinter.h +++ b/media/libvpx/libvpx/vp9/common/vp9_reconinter.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_RECONINTER_H_ -#define VP9_COMMON_VP9_RECONINTER_H_ +#ifndef VPX_VP9_COMMON_VP9_RECONINTER_H_ +#define VPX_VP9_COMMON_VP9_RECONINTER_H_ #include "vp9/common/vp9_filter.h" #include "vp9/common/vp9_onyxc_int.h" @@ -61,24 +61,25 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, const MV *mv_q3, + int dst_stride, const MV *src_mv, const struct scale_factors *sf, int w, int h, - int do_avg, const InterpKernel *kernel, + int ref, const InterpKernel *kernel, enum mv_precision precision, int x, int y); #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_build_inter_predictor( const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, - const MV *mv_q3, const struct scale_factors *sf, int w, int h, int do_avg, + const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref, const InterpKernel *kernel, enum mv_precision precision, int x, int y, int bd); #endif -static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride, - const struct scale_factors *sf) { +static INLINE int64_t scaled_buffer_offset(int x_offset, int y_offset, + int stride, + const struct scale_factors *sf) { const int x = sf ? sf->scale_value_x(x_offset, sf) : x_offset; const int y = sf ? sf->scale_value_y(y_offset, sf) : y_offset; - return y * stride + x; + return (int64_t)y * stride + x; } static INLINE void setup_pred_plane(struct buf_2d *dst, uint8_t *src, @@ -103,4 +104,4 @@ void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, } // extern "C" #endif -#endif // VP9_COMMON_VP9_RECONINTER_H_ +#endif // VPX_VP9_COMMON_VP9_RECONINTER_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_reconintra.h b/media/libvpx/libvpx/vp9/common/vp9_reconintra.h index 78e41c881182..426a35ebfa2e 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_reconintra.h +++ b/media/libvpx/libvpx/vp9/common/vp9_reconintra.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_RECONINTRA_H_ -#define VP9_COMMON_VP9_RECONINTRA_H_ +#ifndef VPX_VP9_COMMON_VP9_RECONINTRA_H_ +#define VPX_VP9_COMMON_VP9_RECONINTRA_H_ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" @@ -28,4 +28,4 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, TX_SIZE tx_size, } // extern "C" #endif -#endif // VP9_COMMON_VP9_RECONINTRA_H_ +#endif // VPX_VP9_COMMON_VP9_RECONINTRA_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl b/media/libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl index 22b67ecacee2..6980b9b7fbd1 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl +++ b/media/libvpx/libvpx/vp9/common/vp9_rtcd_defs.pl @@ -62,18 +62,18 @@ add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, i add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type"; -add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type"; +add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type"; if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { # Note that there are more specializations appended when # CONFIG_VP9_HIGHBITDEPTH is off. - specialize qw/vp9_iht4x4_16_add sse2/; - specialize qw/vp9_iht8x8_64_add sse2/; - specialize qw/vp9_iht16x16_256_add sse2/; + specialize qw/vp9_iht4x4_16_add neon sse2 vsx/; + specialize qw/vp9_iht8x8_64_add neon sse2 vsx/; + specialize qw/vp9_iht16x16_256_add neon sse2 vsx/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") { # Note that these specializations are appended to the above ones. - specialize qw/vp9_iht4x4_16_add neon dspr2 msa/; - specialize qw/vp9_iht8x8_64_add neon dspr2 msa/; + specialize qw/vp9_iht4x4_16_add dspr2 msa/; + specialize qw/vp9_iht8x8_64_add dspr2 msa/; specialize qw/vp9_iht16x16_256_add dspr2 msa/; } } @@ -100,7 +100,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp9_highbd_iht8x8_64_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd"; - add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint16_t *output, int pitch, int tx_type, int bd"; + add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd"; + + if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { + specialize qw/vp9_highbd_iht4x4_16_add neon sse4_1/; + specialize qw/vp9_highbd_iht8x8_64_add neon sse4_1/; + specialize qw/vp9_highbd_iht16x16_256_add neon sse4_1/; + } } # @@ -123,28 +129,22 @@ add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_ add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size"; add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; -specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64"; +specialize qw/vp9_quantize_fp neon sse2 avx2 vsx/, "$ssse3_x86_64"; add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; -specialize qw/vp9_quantize_fp_32x32 neon/, "$ssse3_x86_64"; - -add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; +specialize qw/vp9_quantize_fp_32x32 neon vsx/, "$ssse3_x86_64"; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vp9_block_error avx2 sse2/; specialize qw/vp9_block_error_fp avx2 sse2/; - specialize qw/vp9_fdct8x8_quant neon ssse3/; - add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd"; specialize qw/vp9_highbd_block_error sse2/; } else { specialize qw/vp9_block_error avx2 msa sse2/; specialize qw/vp9_block_error_fp neon avx2 sse2/; - - specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/; } # fdct functions @@ -177,11 +177,20 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") { add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; specialize qw/vp9_diamond_search_sad avx/; +# +# Apply temporal filter +# if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") { -add_proto qw/void vp9_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count"; -specialize qw/vp9_temporal_filter_apply sse4_1/; +add_proto qw/void vp9_apply_temporal_filter/, "const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator, uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count"; +specialize qw/vp9_apply_temporal_filter sse4_1/; + + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/void vp9_highbd_apply_temporal_filter/, "const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accum, uint16_t *y_count, uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count"; + specialize qw/vp9_highbd_apply_temporal_filter sse4_1/; + } } + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # ENCODEMB INVOKE @@ -199,7 +208,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp9_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; - add_proto qw/void vp9_highbd_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count"; + add_proto qw/void vp9_highbd_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int *blk_fw, int use_32x32, uint32_t *accumulator, uint16_t *count"; } # End vp9_high encoder functions diff --git a/media/libvpx/libvpx/vp9/common/vp9_scale.h b/media/libvpx/libvpx/vp9/common/vp9_scale.h index ada8dbaad5c9..2f3b609483e2 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_scale.h +++ b/media/libvpx/libvpx/vp9/common/vp9_scale.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_SCALE_H_ -#define VP9_COMMON_VP9_SCALE_H_ +#ifndef VPX_VP9_COMMON_VP9_SCALE_H_ +#define VPX_VP9_COMMON_VP9_SCALE_H_ #include "vp9/common/vp9_mv.h" #include "vpx_dsp/vpx_convolve.h" @@ -20,7 +20,7 @@ extern "C" { #define REF_SCALE_SHIFT 14 #define REF_NO_SCALE (1 << REF_SCALE_SHIFT) -#define REF_INVALID_SCALE -1 +#define REF_INVALID_SCALE (-1) struct scale_factors { int x_scale_fp; // horizontal fixed point scale factor @@ -42,7 +42,7 @@ MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf); #if CONFIG_VP9_HIGHBITDEPTH void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, int other_h, int this_w, int this_h, - int use_high); + int use_highbd); #else void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, int other_h, int this_w, int this_h); @@ -68,4 +68,4 @@ static INLINE int valid_ref_frame_size(int ref_width, int ref_height, } // extern "C" #endif -#endif // VP9_COMMON_VP9_SCALE_H_ +#endif // VPX_VP9_COMMON_VP9_SCALE_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_scan.h b/media/libvpx/libvpx/vp9/common/vp9_scan.h index b3520e7dccb5..72a9a5ec472c 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_scan.h +++ b/media/libvpx/libvpx/vp9/common/vp9_scan.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_SCAN_H_ -#define VP9_COMMON_VP9_SCAN_H_ +#ifndef VPX_VP9_COMMON_VP9_SCAN_H_ +#define VPX_VP9_COMMON_VP9_SCAN_H_ #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" @@ -55,4 +55,4 @@ static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size, } // extern "C" #endif -#endif // VP9_COMMON_VP9_SCAN_H_ +#endif // VPX_VP9_COMMON_VP9_SCAN_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_seg_common.h b/media/libvpx/libvpx/vp9/common/vp9_seg_common.h index b9bf75d5802b..b63e4f499902 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_seg_common.h +++ b/media/libvpx/libvpx/vp9/common/vp9_seg_common.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_SEG_COMMON_H_ -#define VP9_COMMON_VP9_SEG_COMMON_H_ +#ifndef VPX_VP9_COMMON_VP9_SEG_COMMON_H_ +#define VPX_VP9_COMMON_VP9_SEG_COMMON_H_ #include "vpx_dsp/prob.h" @@ -78,4 +78,4 @@ extern const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)]; } // extern "C" #endif -#endif // VP9_COMMON_VP9_SEG_COMMON_H_ +#endif // VPX_VP9_COMMON_VP9_SEG_COMMON_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_thread_common.c b/media/libvpx/libvpx/vp9/common/vp9_thread_common.c index 8d44e91f2e52..b3d50162b2cb 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_thread_common.c +++ b/media/libvpx/libvpx/vp9/common/vp9_thread_common.c @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include +#include #include "./vpx_config.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" @@ -38,11 +40,11 @@ static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) { const int nsync = lf_sync->sync_range; if (r && !(c & (nsync - 1))) { - pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1]; + pthread_mutex_t *const mutex = &lf_sync->mutex[r - 1]; mutex_lock(mutex); while (c > lf_sync->cur_sb_col[r - 1] - nsync) { - pthread_cond_wait(&lf_sync->cond_[r - 1], mutex); + pthread_cond_wait(&lf_sync->cond[r - 1], mutex); } pthread_mutex_unlock(mutex); } @@ -69,12 +71,12 @@ static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c, } if (sig) { - mutex_lock(&lf_sync->mutex_[r]); + mutex_lock(&lf_sync->mutex[r]); lf_sync->cur_sb_col[r] = cur; - pthread_cond_signal(&lf_sync->cond_[r]); - pthread_mutex_unlock(&lf_sync->mutex_[r]); + pthread_cond_signal(&lf_sync->cond[r]); + pthread_mutex_unlock(&lf_sync->mutex[r]); } #else (void)lf_sync; @@ -91,6 +93,7 @@ static INLINE void thread_loop_filter_rows( int y_only, VP9LfSync *const lf_sync) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; + const int num_active_workers = lf_sync->num_active_workers; int mi_row, mi_col; enum lf_path path; if (y_only) @@ -102,8 +105,10 @@ static INLINE void thread_loop_filter_rows( else path = LF_PATH_SLOW; + assert(num_active_workers > 0); + for (mi_row = start; mi_row < stop; - mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) { + mi_row += num_active_workers * MI_BLOCK_SIZE) { MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0); @@ -157,10 +162,12 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm, const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); // Number of superblock rows and cols const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; - // Decoder may allocate more threads than number of tiles based on user's - // input. - const int tile_cols = 1 << cm->log2_tile_cols; - const int num_workers = VPXMIN(nworkers, tile_cols); + const int num_tile_cols = 1 << cm->log2_tile_cols; + // Limit the number of workers to prevent changes in frame dimensions from + // causing incorrect sync calculations when sb_rows < threads/tile_cols. + // Further restrict them by the number of tile columns should the user + // request more as this implementation doesn't scale well beyond that. + const int num_workers = VPXMIN(nworkers, VPXMIN(num_tile_cols, sb_rows)); int i; if (!lf_sync->sync_range || sb_rows != lf_sync->rows || @@ -168,6 +175,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm, vp9_loop_filter_dealloc(lf_sync); vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); } + lf_sync->num_active_workers = num_workers; // Initialize cur_sb_col to -1 for all SB rows. memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); @@ -231,6 +239,28 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm, workers, num_workers, lf_sync); } +void vp9_lpf_mt_init(VP9LfSync *lf_sync, VP9_COMMON *cm, int frame_filter_level, + int num_workers) { + const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; + + if (!frame_filter_level) return; + + if (!lf_sync->sync_range || sb_rows != lf_sync->rows || + num_workers > lf_sync->num_workers) { + vp9_loop_filter_dealloc(lf_sync); + vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); + } + + // Initialize cur_sb_col to -1 for all SB rows. + memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); + + lf_sync->corrupted = 0; + + memset(lf_sync->num_tiles_done, 0, + sizeof(*lf_sync->num_tiles_done) * sb_rows); + cm->lf_row = 0; +} + // Set up nsync by width. static INLINE int get_sync_range(int width) { // nsync numbers are picked by testing. For example, for 4k @@ -253,19 +283,41 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, { int i; - CHECK_MEM_ERROR(cm, lf_sync->mutex_, - vpx_malloc(sizeof(*lf_sync->mutex_) * rows)); - if (lf_sync->mutex_) { + CHECK_MEM_ERROR(cm, lf_sync->mutex, + vpx_malloc(sizeof(*lf_sync->mutex) * rows)); + if (lf_sync->mutex) { for (i = 0; i < rows; ++i) { - pthread_mutex_init(&lf_sync->mutex_[i], NULL); + pthread_mutex_init(&lf_sync->mutex[i], NULL); } } - CHECK_MEM_ERROR(cm, lf_sync->cond_, - vpx_malloc(sizeof(*lf_sync->cond_) * rows)); - if (lf_sync->cond_) { + CHECK_MEM_ERROR(cm, lf_sync->cond, + vpx_malloc(sizeof(*lf_sync->cond) * rows)); + if (lf_sync->cond) { for (i = 0; i < rows; ++i) { - pthread_cond_init(&lf_sync->cond_[i], NULL); + pthread_cond_init(&lf_sync->cond[i], NULL); + } + } + + CHECK_MEM_ERROR(cm, lf_sync->lf_mutex, + vpx_malloc(sizeof(*lf_sync->lf_mutex))); + pthread_mutex_init(lf_sync->lf_mutex, NULL); + + CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex, + vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows)); + if (lf_sync->recon_done_mutex) { + int i; + for (i = 0; i < rows; ++i) { + pthread_mutex_init(&lf_sync->recon_done_mutex[i], NULL); + } + } + + CHECK_MEM_ERROR(cm, lf_sync->recon_done_cond, + vpx_malloc(sizeof(*lf_sync->recon_done_cond) * rows)); + if (lf_sync->recon_done_cond) { + int i; + for (i = 0; i < rows; ++i) { + pthread_cond_init(&lf_sync->recon_done_cond[i], NULL); } } } @@ -274,39 +326,170 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, CHECK_MEM_ERROR(cm, lf_sync->lfdata, vpx_malloc(num_workers * sizeof(*lf_sync->lfdata))); lf_sync->num_workers = num_workers; + lf_sync->num_active_workers = lf_sync->num_workers; CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); + CHECK_MEM_ERROR(cm, lf_sync->num_tiles_done, + vpx_malloc(sizeof(*lf_sync->num_tiles_done) * + mi_cols_aligned_to_sb(cm->mi_rows) >> + MI_BLOCK_SIZE_LOG2)); + // Set up nsync. lf_sync->sync_range = get_sync_range(width); } // Deallocate lf synchronization related mutex and data void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { - if (lf_sync != NULL) { -#if CONFIG_MULTITHREAD - int i; + assert(lf_sync != NULL); - if (lf_sync->mutex_ != NULL) { - for (i = 0; i < lf_sync->rows; ++i) { - pthread_mutex_destroy(&lf_sync->mutex_[i]); - } - vpx_free(lf_sync->mutex_); +#if CONFIG_MULTITHREAD + if (lf_sync->mutex != NULL) { + int i; + for (i = 0; i < lf_sync->rows; ++i) { + pthread_mutex_destroy(&lf_sync->mutex[i]); } - if (lf_sync->cond_ != NULL) { - for (i = 0; i < lf_sync->rows; ++i) { - pthread_cond_destroy(&lf_sync->cond_[i]); - } - vpx_free(lf_sync->cond_); - } -#endif // CONFIG_MULTITHREAD - vpx_free(lf_sync->lfdata); - vpx_free(lf_sync->cur_sb_col); - // clear the structure as the source of this call may be a resize in which - // case this call will be followed by an _alloc() which may fail. - vp9_zero(*lf_sync); + vpx_free(lf_sync->mutex); } + if (lf_sync->cond != NULL) { + int i; + for (i = 0; i < lf_sync->rows; ++i) { + pthread_cond_destroy(&lf_sync->cond[i]); + } + vpx_free(lf_sync->cond); + } + if (lf_sync->recon_done_mutex != NULL) { + int i; + for (i = 0; i < lf_sync->rows; ++i) { + pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]); + } + vpx_free(lf_sync->recon_done_mutex); + } + + if (lf_sync->lf_mutex != NULL) { + pthread_mutex_destroy(lf_sync->lf_mutex); + vpx_free(lf_sync->lf_mutex); + } + if (lf_sync->recon_done_cond != NULL) { + int i; + for (i = 0; i < lf_sync->rows; ++i) { + pthread_cond_destroy(&lf_sync->recon_done_cond[i]); + } + vpx_free(lf_sync->recon_done_cond); + } +#endif // CONFIG_MULTITHREAD + + vpx_free(lf_sync->lfdata); + vpx_free(lf_sync->cur_sb_col); + vpx_free(lf_sync->num_tiles_done); + // clear the structure as the source of this call may be a resize in which + // case this call will be followed by an _alloc() which may fail. + vp9_zero(*lf_sync); +} + +static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) { + int return_val = -1; + int cur_row; + const int max_rows = cm->mi_rows; + +#if CONFIG_MULTITHREAD + const int tile_cols = 1 << cm->log2_tile_cols; + + pthread_mutex_lock(lf_sync->lf_mutex); + if (cm->lf_row < max_rows) { + cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2; + return_val = cm->lf_row; + cm->lf_row += MI_BLOCK_SIZE; + if (cm->lf_row < max_rows) { + /* If this is not the last row, make sure the next row is also decoded. + * This is because the intra predict has to happen before loop filter */ + cur_row += 1; + } + } + pthread_mutex_unlock(lf_sync->lf_mutex); + + if (return_val == -1) return return_val; + + pthread_mutex_lock(&lf_sync->recon_done_mutex[cur_row]); + if (lf_sync->num_tiles_done[cur_row] < tile_cols) { + pthread_cond_wait(&lf_sync->recon_done_cond[cur_row], + &lf_sync->recon_done_mutex[cur_row]); + } + pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]); + pthread_mutex_lock(lf_sync->lf_mutex); + if (lf_sync->corrupted) { + int row = return_val >> MI_BLOCK_SIZE_LOG2; + pthread_mutex_lock(&lf_sync->mutex[row]); + lf_sync->cur_sb_col[row] = INT_MAX; + pthread_cond_signal(&lf_sync->cond[row]); + pthread_mutex_unlock(&lf_sync->mutex[row]); + return_val = -1; + } + pthread_mutex_unlock(lf_sync->lf_mutex); +#else + (void)lf_sync; + if (cm->lf_row < max_rows) { + cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2; + return_val = cm->lf_row; + cm->lf_row += MI_BLOCK_SIZE; + if (cm->lf_row < max_rows) { + /* If this is not the last row, make sure the next row is also decoded. + * This is because the intra predict has to happen before loop filter */ + cur_row += 1; + } + } +#endif // CONFIG_MULTITHREAD + + return return_val; +} + +void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync) { + int mi_row; + VP9_COMMON *cm = lf_data->cm; + + while ((mi_row = get_next_row(cm, lf_sync)) != -1 && mi_row < cm->mi_rows) { + lf_data->start = mi_row; + lf_data->stop = mi_row + MI_BLOCK_SIZE; + + thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, + lf_data->start, lf_data->stop, lf_data->y_only, + lf_sync); + } +} + +void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row, + int corrupted) { +#if CONFIG_MULTITHREAD + pthread_mutex_lock(lf_sync->lf_mutex); + lf_sync->corrupted |= corrupted; + pthread_mutex_unlock(lf_sync->lf_mutex); + pthread_mutex_lock(&lf_sync->recon_done_mutex[row]); + lf_sync->num_tiles_done[row] += 1; + if (num_tiles == lf_sync->num_tiles_done[row]) { + if (is_last_row) { + /* The last 2 rows wait on the last row to be done. + * So, we have to broadcast the signal in this case. + */ + pthread_cond_broadcast(&lf_sync->recon_done_cond[row]); + } else { + pthread_cond_signal(&lf_sync->recon_done_cond[row]); + } + } + pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]); +#else + (void)lf_sync; + (void)num_tiles; + (void)row; + (void)is_last_row; + (void)corrupted; +#endif // CONFIG_MULTITHREAD +} + +void vp9_loopfilter_job(LFWorkerData *lf_data, VP9LfSync *lf_sync) { + thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, + lf_data->start, lf_data->stop, lf_data->y_only, + lf_sync); } // Accumulate frame counts. diff --git a/media/libvpx/libvpx/vp9/common/vp9_thread_common.h b/media/libvpx/libvpx/vp9/common/vp9_thread_common.h index 0f7c3ff74883..5df0117f124b 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_thread_common.h +++ b/media/libvpx/libvpx/vp9/common/vp9_thread_common.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_THREAD_COMMON_H_ -#define VP9_COMMON_VP9_THREAD_COMMON_H_ +#ifndef VPX_VP9_COMMON_VP9_THREAD_COMMON_H_ +#define VPX_VP9_COMMON_VP9_THREAD_COMMON_H_ #include "./vpx_config.h" #include "vp9/common/vp9_loopfilter.h" #include "vpx_util/vpx_thread.h" @@ -24,8 +24,8 @@ struct FRAME_COUNTS; // Loopfilter row synchronization typedef struct VP9LfSyncData { #if CONFIG_MULTITHREAD - pthread_mutex_t *mutex_; - pthread_cond_t *cond_; + pthread_mutex_t *mutex; + pthread_cond_t *cond; #endif // Allocate memory to store the loop-filtered superblock index in each row. int *cur_sb_col; @@ -36,7 +36,16 @@ typedef struct VP9LfSyncData { // Row-based parallel loopfilter data LFWorkerData *lfdata; - int num_workers; + int num_workers; // number of allocated workers. + int num_active_workers; // number of scheduled workers. + +#if CONFIG_MULTITHREAD + pthread_mutex_t *lf_mutex; + pthread_mutex_t *recon_done_mutex; + pthread_cond_t *recon_done_cond; +#endif + int *num_tiles_done; + int corrupted; } VP9LfSync; // Allocate memory for loopfilter row synchronization. @@ -53,6 +62,17 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm, int partial_frame, VPxWorker *workers, int num_workers, VP9LfSync *lf_sync); +// Multi-threaded loopfilter initialisations +void vp9_lpf_mt_init(VP9LfSync *lf_sync, struct VP9Common *cm, + int frame_filter_level, int num_workers); + +void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync); + +void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row, + int corrupted); + +void vp9_loopfilter_job(LFWorkerData *lf_data, VP9LfSync *lf_sync); + void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum, const struct FRAME_COUNTS *counts, int is_dec); @@ -60,4 +80,4 @@ void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum, } // extern "C" #endif -#endif // VP9_COMMON_VP9_THREAD_COMMON_H_ +#endif // VPX_VP9_COMMON_VP9_THREAD_COMMON_H_ diff --git a/media/libvpx/libvpx/vp9/common/vp9_tile_common.h b/media/libvpx/libvpx/vp9/common/vp9_tile_common.h index 1b11c2680ded..4ccf0a3d5f62 100644 --- a/media/libvpx/libvpx/vp9/common/vp9_tile_common.h +++ b/media/libvpx/libvpx/vp9/common/vp9_tile_common.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_TILE_COMMON_H_ -#define VP9_COMMON_VP9_TILE_COMMON_H_ +#ifndef VPX_VP9_COMMON_VP9_TILE_COMMON_H_ +#define VPX_VP9_COMMON_VP9_TILE_COMMON_H_ #ifdef __cplusplus extern "C" { @@ -37,4 +37,4 @@ void vp9_get_tile_n_bits(int mi_cols, int *min_log2_tile_cols, } // extern "C" #endif -#endif // VP9_COMMON_VP9_TILE_COMMON_H_ +#endif // VPX_VP9_COMMON_VP9_TILE_COMMON_H_ diff --git a/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c b/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c new file mode 100644 index 000000000000..57b79a732dac --- /dev/null +++ b/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c @@ -0,0 +1,419 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vp9_rtcd.h" +#include "vp9/common/vp9_idct.h" +#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h" +#include "vpx_dsp/x86/inv_txfm_sse2.h" +#include "vpx_dsp/x86/transpose_sse2.h" +#include "vpx_dsp/x86/txfm_common_sse2.h" + +static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in, + const int c, + __m128i *const s) { + const __m128i pair_c = pair_set_epi32(4 * c, 0); + __m128i x[2]; + + extend_64bit(in, x); + s[0] = _mm_mul_epi32(pair_c, x[0]); + s[1] = _mm_mul_epi32(pair_c, x[1]); +} + +static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0, + const __m128i in1, + const int c0, const int c1, + __m128i *const s0, + __m128i *const s1) { + const __m128i pair_c0 = pair_set_epi32(4 * c0, 0); + const __m128i pair_c1 = pair_set_epi32(4 * c1, 0); + __m128i t00[2], t01[2], t10[2], t11[2]; + __m128i x0[2], x1[2]; + + extend_64bit(in0, x0); + extend_64bit(in1, x1); + t00[0] = _mm_mul_epi32(pair_c0, x0[0]); + t00[1] = _mm_mul_epi32(pair_c0, x0[1]); + t01[0] = _mm_mul_epi32(pair_c0, x1[0]); + t01[1] = _mm_mul_epi32(pair_c0, x1[1]); + t10[0] = _mm_mul_epi32(pair_c1, x0[0]); + t10[1] = _mm_mul_epi32(pair_c1, x0[1]); + t11[0] = _mm_mul_epi32(pair_c1, x1[0]); + t11[1] = _mm_mul_epi32(pair_c1, x1[1]); + + s0[0] = _mm_add_epi64(t00[0], t11[0]); + s0[1] = _mm_add_epi64(t00[1], t11[1]); + s1[0] = _mm_sub_epi64(t10[0], t01[0]); + s1[1] = _mm_sub_epi64(t10[1], t01[1]); +} + +static void highbd_iadst16_4col_sse4_1(__m128i *const io /*io[16]*/) { + __m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2], s8[2], s9[2], + s10[2], s11[2], s12[2], s13[2], s14[2], s15[2]; + __m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2], x8[2], x9[2], + x10[2], x11[2], x12[2], x13[2], x14[2], x15[2]; + + // stage 1 + highbd_iadst_butterfly_sse4_1(io[15], io[0], cospi_1_64, cospi_31_64, s0, s1); + highbd_iadst_butterfly_sse4_1(io[13], io[2], cospi_5_64, cospi_27_64, s2, s3); + highbd_iadst_butterfly_sse4_1(io[11], io[4], cospi_9_64, cospi_23_64, s4, s5); + highbd_iadst_butterfly_sse4_1(io[9], io[6], cospi_13_64, cospi_19_64, s6, s7); + highbd_iadst_butterfly_sse4_1(io[7], io[8], cospi_17_64, cospi_15_64, s8, s9); + highbd_iadst_butterfly_sse4_1(io[5], io[10], cospi_21_64, cospi_11_64, s10, + s11); + highbd_iadst_butterfly_sse4_1(io[3], io[12], cospi_25_64, cospi_7_64, s12, + s13); + highbd_iadst_butterfly_sse4_1(io[1], io[14], cospi_29_64, cospi_3_64, s14, + s15); + + x0[0] = _mm_add_epi64(s0[0], s8[0]); + x0[1] = _mm_add_epi64(s0[1], s8[1]); + x1[0] = _mm_add_epi64(s1[0], s9[0]); + x1[1] = _mm_add_epi64(s1[1], s9[1]); + x2[0] = _mm_add_epi64(s2[0], s10[0]); + x2[1] = _mm_add_epi64(s2[1], s10[1]); + x3[0] = _mm_add_epi64(s3[0], s11[0]); + x3[1] = _mm_add_epi64(s3[1], s11[1]); + x4[0] = _mm_add_epi64(s4[0], s12[0]); + x4[1] = _mm_add_epi64(s4[1], s12[1]); + x5[0] = _mm_add_epi64(s5[0], s13[0]); + x5[1] = _mm_add_epi64(s5[1], s13[1]); + x6[0] = _mm_add_epi64(s6[0], s14[0]); + x6[1] = _mm_add_epi64(s6[1], s14[1]); + x7[0] = _mm_add_epi64(s7[0], s15[0]); + x7[1] = _mm_add_epi64(s7[1], s15[1]); + x8[0] = _mm_sub_epi64(s0[0], s8[0]); + x8[1] = _mm_sub_epi64(s0[1], s8[1]); + x9[0] = _mm_sub_epi64(s1[0], s9[0]); + x9[1] = _mm_sub_epi64(s1[1], s9[1]); + x10[0] = _mm_sub_epi64(s2[0], s10[0]); + x10[1] = _mm_sub_epi64(s2[1], s10[1]); + x11[0] = _mm_sub_epi64(s3[0], s11[0]); + x11[1] = _mm_sub_epi64(s3[1], s11[1]); + x12[0] = _mm_sub_epi64(s4[0], s12[0]); + x12[1] = _mm_sub_epi64(s4[1], s12[1]); + x13[0] = _mm_sub_epi64(s5[0], s13[0]); + x13[1] = _mm_sub_epi64(s5[1], s13[1]); + x14[0] = _mm_sub_epi64(s6[0], s14[0]); + x14[1] = _mm_sub_epi64(s6[1], s14[1]); + x15[0] = _mm_sub_epi64(s7[0], s15[0]); + x15[1] = _mm_sub_epi64(s7[1], s15[1]); + + x0[0] = dct_const_round_shift_64bit(x0[0]); + x0[1] = dct_const_round_shift_64bit(x0[1]); + x1[0] = dct_const_round_shift_64bit(x1[0]); + x1[1] = dct_const_round_shift_64bit(x1[1]); + x2[0] = dct_const_round_shift_64bit(x2[0]); + x2[1] = dct_const_round_shift_64bit(x2[1]); + x3[0] = dct_const_round_shift_64bit(x3[0]); + x3[1] = dct_const_round_shift_64bit(x3[1]); + x4[0] = dct_const_round_shift_64bit(x4[0]); + x4[1] = dct_const_round_shift_64bit(x4[1]); + x5[0] = dct_const_round_shift_64bit(x5[0]); + x5[1] = dct_const_round_shift_64bit(x5[1]); + x6[0] = dct_const_round_shift_64bit(x6[0]); + x6[1] = dct_const_round_shift_64bit(x6[1]); + x7[0] = dct_const_round_shift_64bit(x7[0]); + x7[1] = dct_const_round_shift_64bit(x7[1]); + x8[0] = dct_const_round_shift_64bit(x8[0]); + x8[1] = dct_const_round_shift_64bit(x8[1]); + x9[0] = dct_const_round_shift_64bit(x9[0]); + x9[1] = dct_const_round_shift_64bit(x9[1]); + x10[0] = dct_const_round_shift_64bit(x10[0]); + x10[1] = dct_const_round_shift_64bit(x10[1]); + x11[0] = dct_const_round_shift_64bit(x11[0]); + x11[1] = dct_const_round_shift_64bit(x11[1]); + x12[0] = dct_const_round_shift_64bit(x12[0]); + x12[1] = dct_const_round_shift_64bit(x12[1]); + x13[0] = dct_const_round_shift_64bit(x13[0]); + x13[1] = dct_const_round_shift_64bit(x13[1]); + x14[0] = dct_const_round_shift_64bit(x14[0]); + x14[1] = dct_const_round_shift_64bit(x14[1]); + x15[0] = dct_const_round_shift_64bit(x15[0]); + x15[1] = dct_const_round_shift_64bit(x15[1]); + x0[0] = pack_4(x0[0], x0[1]); + x1[0] = pack_4(x1[0], x1[1]); + x2[0] = pack_4(x2[0], x2[1]); + x3[0] = pack_4(x3[0], x3[1]); + x4[0] = pack_4(x4[0], x4[1]); + x5[0] = pack_4(x5[0], x5[1]); + x6[0] = pack_4(x6[0], x6[1]); + x7[0] = pack_4(x7[0], x7[1]); + x8[0] = pack_4(x8[0], x8[1]); + x9[0] = pack_4(x9[0], x9[1]); + x10[0] = pack_4(x10[0], x10[1]); + x11[0] = pack_4(x11[0], x11[1]); + x12[0] = pack_4(x12[0], x12[1]); + x13[0] = pack_4(x13[0], x13[1]); + x14[0] = pack_4(x14[0], x14[1]); + x15[0] = pack_4(x15[0], x15[1]); + + // stage 2 + s0[0] = x0[0]; + s1[0] = x1[0]; + s2[0] = x2[0]; + s3[0] = x3[0]; + s4[0] = x4[0]; + s5[0] = x5[0]; + s6[0] = x6[0]; + s7[0] = x7[0]; + x0[0] = _mm_add_epi32(s0[0], s4[0]); + x1[0] = _mm_add_epi32(s1[0], s5[0]); + x2[0] = _mm_add_epi32(s2[0], s6[0]); + x3[0] = _mm_add_epi32(s3[0], s7[0]); + x4[0] = _mm_sub_epi32(s0[0], s4[0]); + x5[0] = _mm_sub_epi32(s1[0], s5[0]); + x6[0] = _mm_sub_epi32(s2[0], s6[0]); + x7[0] = _mm_sub_epi32(s3[0], s7[0]); + + highbd_iadst_butterfly_sse4_1(x8[0], x9[0], cospi_4_64, cospi_28_64, s8, s9); + highbd_iadst_butterfly_sse4_1(x10[0], x11[0], cospi_20_64, cospi_12_64, s10, + s11); + highbd_iadst_butterfly_sse4_1(x13[0], x12[0], cospi_28_64, cospi_4_64, s13, + s12); + highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_12_64, cospi_20_64, s15, + s14); + + x8[0] = _mm_add_epi64(s8[0], s12[0]); + x8[1] = _mm_add_epi64(s8[1], s12[1]); + x9[0] = _mm_add_epi64(s9[0], s13[0]); + x9[1] = _mm_add_epi64(s9[1], s13[1]); + x10[0] = _mm_add_epi64(s10[0], s14[0]); + x10[1] = _mm_add_epi64(s10[1], s14[1]); + x11[0] = _mm_add_epi64(s11[0], s15[0]); + x11[1] = _mm_add_epi64(s11[1], s15[1]); + x12[0] = _mm_sub_epi64(s8[0], s12[0]); + x12[1] = _mm_sub_epi64(s8[1], s12[1]); + x13[0] = _mm_sub_epi64(s9[0], s13[0]); + x13[1] = _mm_sub_epi64(s9[1], s13[1]); + x14[0] = _mm_sub_epi64(s10[0], s14[0]); + x14[1] = _mm_sub_epi64(s10[1], s14[1]); + x15[0] = _mm_sub_epi64(s11[0], s15[0]); + x15[1] = _mm_sub_epi64(s11[1], s15[1]); + x8[0] = dct_const_round_shift_64bit(x8[0]); + x8[1] = dct_const_round_shift_64bit(x8[1]); + x9[0] = dct_const_round_shift_64bit(x9[0]); + x9[1] = dct_const_round_shift_64bit(x9[1]); + x10[0] = dct_const_round_shift_64bit(x10[0]); + x10[1] = dct_const_round_shift_64bit(x10[1]); + x11[0] = dct_const_round_shift_64bit(x11[0]); + x11[1] = dct_const_round_shift_64bit(x11[1]); + x12[0] = dct_const_round_shift_64bit(x12[0]); + x12[1] = dct_const_round_shift_64bit(x12[1]); + x13[0] = dct_const_round_shift_64bit(x13[0]); + x13[1] = dct_const_round_shift_64bit(x13[1]); + x14[0] = dct_const_round_shift_64bit(x14[0]); + x14[1] = dct_const_round_shift_64bit(x14[1]); + x15[0] = dct_const_round_shift_64bit(x15[0]); + x15[1] = dct_const_round_shift_64bit(x15[1]); + x8[0] = pack_4(x8[0], x8[1]); + x9[0] = pack_4(x9[0], x9[1]); + x10[0] = pack_4(x10[0], x10[1]); + x11[0] = pack_4(x11[0], x11[1]); + x12[0] = pack_4(x12[0], x12[1]); + x13[0] = pack_4(x13[0], x13[1]); + x14[0] = pack_4(x14[0], x14[1]); + x15[0] = pack_4(x15[0], x15[1]); + + // stage 3 + s0[0] = x0[0]; + s1[0] = x1[0]; + s2[0] = x2[0]; + s3[0] = x3[0]; + highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5); + highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6); + s8[0] = x8[0]; + s9[0] = x9[0]; + s10[0] = x10[0]; + s11[0] = x11[0]; + highbd_iadst_butterfly_sse4_1(x12[0], x13[0], cospi_8_64, cospi_24_64, s12, + s13); + highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_24_64, cospi_8_64, s15, + s14); + + x0[0] = _mm_add_epi32(s0[0], s2[0]); + x1[0] = _mm_add_epi32(s1[0], s3[0]); + x2[0] = _mm_sub_epi32(s0[0], s2[0]); + x3[0] = _mm_sub_epi32(s1[0], s3[0]); + x4[0] = _mm_add_epi64(s4[0], s6[0]); + x4[1] = _mm_add_epi64(s4[1], s6[1]); + x5[0] = _mm_add_epi64(s5[0], s7[0]); + x5[1] = _mm_add_epi64(s5[1], s7[1]); + x6[0] = _mm_sub_epi64(s4[0], s6[0]); + x6[1] = _mm_sub_epi64(s4[1], s6[1]); + x7[0] = _mm_sub_epi64(s5[0], s7[0]); + x7[1] = _mm_sub_epi64(s5[1], s7[1]); + x4[0] = dct_const_round_shift_64bit(x4[0]); + x4[1] = dct_const_round_shift_64bit(x4[1]); + x5[0] = dct_const_round_shift_64bit(x5[0]); + x5[1] = dct_const_round_shift_64bit(x5[1]); + x6[0] = dct_const_round_shift_64bit(x6[0]); + x6[1] = dct_const_round_shift_64bit(x6[1]); + x7[0] = dct_const_round_shift_64bit(x7[0]); + x7[1] = dct_const_round_shift_64bit(x7[1]); + x4[0] = pack_4(x4[0], x4[1]); + x5[0] = pack_4(x5[0], x5[1]); + x6[0] = pack_4(x6[0], x6[1]); + x7[0] = pack_4(x7[0], x7[1]); + x8[0] = _mm_add_epi32(s8[0], s10[0]); + x9[0] = _mm_add_epi32(s9[0], s11[0]); + x10[0] = _mm_sub_epi32(s8[0], s10[0]); + x11[0] = _mm_sub_epi32(s9[0], s11[0]); + x12[0] = _mm_add_epi64(s12[0], s14[0]); + x12[1] = _mm_add_epi64(s12[1], s14[1]); + x13[0] = _mm_add_epi64(s13[0], s15[0]); + x13[1] = _mm_add_epi64(s13[1], s15[1]); + x14[0] = _mm_sub_epi64(s12[0], s14[0]); + x14[1] = _mm_sub_epi64(s12[1], s14[1]); + x15[0] = _mm_sub_epi64(s13[0], s15[0]); + x15[1] = _mm_sub_epi64(s13[1], s15[1]); + x12[0] = dct_const_round_shift_64bit(x12[0]); + x12[1] = dct_const_round_shift_64bit(x12[1]); + x13[0] = dct_const_round_shift_64bit(x13[0]); + x13[1] = dct_const_round_shift_64bit(x13[1]); + x14[0] = dct_const_round_shift_64bit(x14[0]); + x14[1] = dct_const_round_shift_64bit(x14[1]); + x15[0] = dct_const_round_shift_64bit(x15[0]); + x15[1] = dct_const_round_shift_64bit(x15[1]); + x12[0] = pack_4(x12[0], x12[1]); + x13[0] = pack_4(x13[0], x13[1]); + x14[0] = pack_4(x14[0], x14[1]); + x15[0] = pack_4(x15[0], x15[1]); + + // stage 4 + s2[0] = _mm_add_epi32(x2[0], x3[0]); + s3[0] = _mm_sub_epi32(x2[0], x3[0]); + s6[0] = _mm_add_epi32(x7[0], x6[0]); + s7[0] = _mm_sub_epi32(x7[0], x6[0]); + s10[0] = _mm_add_epi32(x11[0], x10[0]); + s11[0] = _mm_sub_epi32(x11[0], x10[0]); + s14[0] = _mm_add_epi32(x14[0], x15[0]); + s15[0] = _mm_sub_epi32(x14[0], x15[0]); + highbd_iadst_half_butterfly_sse4_1(s2[0], -cospi_16_64, s2); + highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3); + highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6); + highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7); + highbd_iadst_half_butterfly_sse4_1(s10[0], cospi_16_64, s10); + highbd_iadst_half_butterfly_sse4_1(s11[0], cospi_16_64, s11); + highbd_iadst_half_butterfly_sse4_1(s14[0], -cospi_16_64, s14); + highbd_iadst_half_butterfly_sse4_1(s15[0], cospi_16_64, s15); + + x2[0] = dct_const_round_shift_64bit(s2[0]); + x2[1] = dct_const_round_shift_64bit(s2[1]); + x3[0] = dct_const_round_shift_64bit(s3[0]); + x3[1] = dct_const_round_shift_64bit(s3[1]); + x6[0] = dct_const_round_shift_64bit(s6[0]); + x6[1] = dct_const_round_shift_64bit(s6[1]); + x7[0] = dct_const_round_shift_64bit(s7[0]); + x7[1] = dct_const_round_shift_64bit(s7[1]); + x10[0] = dct_const_round_shift_64bit(s10[0]); + x10[1] = dct_const_round_shift_64bit(s10[1]); + x11[0] = dct_const_round_shift_64bit(s11[0]); + x11[1] = dct_const_round_shift_64bit(s11[1]); + x14[0] = dct_const_round_shift_64bit(s14[0]); + x14[1] = dct_const_round_shift_64bit(s14[1]); + x15[0] = dct_const_round_shift_64bit(s15[0]); + x15[1] = dct_const_round_shift_64bit(s15[1]); + x2[0] = pack_4(x2[0], x2[1]); + x3[0] = pack_4(x3[0], x3[1]); + x6[0] = pack_4(x6[0], x6[1]); + x7[0] = pack_4(x7[0], x7[1]); + x10[0] = pack_4(x10[0], x10[1]); + x11[0] = pack_4(x11[0], x11[1]); + x14[0] = pack_4(x14[0], x14[1]); + x15[0] = pack_4(x15[0], x15[1]); + + io[0] = x0[0]; + io[1] = _mm_sub_epi32(_mm_setzero_si128(), x8[0]); + io[2] = x12[0]; + io[3] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]); + io[4] = x6[0]; + io[5] = x14[0]; + io[6] = x10[0]; + io[7] = x2[0]; + io[8] = x3[0]; + io[9] = x11[0]; + io[10] = x15[0]; + io[11] = x7[0]; + io[12] = x5[0]; + io[13] = _mm_sub_epi32(_mm_setzero_si128(), x13[0]); + io[14] = x9[0]; + io[15] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]); +} + +void vp9_highbd_iht16x16_256_add_sse4_1(const tran_low_t *input, uint16_t *dest, + int stride, int tx_type, int bd) { + int i; + __m128i out[16], *in; + + if (bd == 8) { + __m128i l[16], r[16]; + + in = l; + for (i = 0; i < 2; i++) { + highbd_load_pack_transpose_32bit_8x8(&input[0], 16, &in[0]); + highbd_load_pack_transpose_32bit_8x8(&input[8], 16, &in[8]); + if (tx_type == DCT_DCT || tx_type == ADST_DCT) { + idct16_8col(in, in); + } else { + vpx_iadst16_8col_sse2(in); + } + in = r; + input += 128; + } + + for (i = 0; i < 16; i += 8) { + int j; + transpose_16bit_8x8(l + i, out); + transpose_16bit_8x8(r + i, out + 8); + if (tx_type == DCT_DCT || tx_type == DCT_ADST) { + idct16_8col(out, out); + } else { + vpx_iadst16_8col_sse2(out); + } + + for (j = 0; j < 16; ++j) { + highbd_write_buffer_8(dest + j * stride, out[j], bd); + } + dest += 8; + } + } else { + __m128i all[4][16]; + + for (i = 0; i < 4; i++) { + in = all[i]; + highbd_load_transpose_32bit_8x4(&input[0], 16, &in[0]); + highbd_load_transpose_32bit_8x4(&input[8], 16, &in[8]); + if (tx_type == DCT_DCT || tx_type == ADST_DCT) { + vpx_highbd_idct16_4col_sse4_1(in); + } else { + highbd_iadst16_4col_sse4_1(in); + } + input += 4 * 16; + } + + for (i = 0; i < 16; i += 4) { + int j; + transpose_32bit_4x4(all[0] + i, out + 0); + transpose_32bit_4x4(all[1] + i, out + 4); + transpose_32bit_4x4(all[2] + i, out + 8); + transpose_32bit_4x4(all[3] + i, out + 12); + if (tx_type == DCT_DCT || tx_type == DCT_ADST) { + vpx_highbd_idct16_4col_sse4_1(out); + } else { + highbd_iadst16_4col_sse4_1(out); + } + + for (j = 0; j < 16; ++j) { + highbd_write_buffer_4(dest + j * stride, out[j], bd); + } + dest += 4; + } + } +} diff --git a/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c b/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c new file mode 100644 index 000000000000..af158536f926 --- /dev/null +++ b/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vp9_rtcd.h" +#include "vp9/common/vp9_idct.h" +#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h" +#include "vpx_dsp/x86/inv_txfm_sse2.h" +#include "vpx_dsp/x86/transpose_sse2.h" +#include "vpx_dsp/x86/txfm_common_sse2.h" + +static INLINE void highbd_iadst4_sse4_1(__m128i *const io) { + const __m128i pair_c1 = pair_set_epi32(4 * sinpi_1_9, 0); + const __m128i pair_c2 = pair_set_epi32(4 * sinpi_2_9, 0); + const __m128i pair_c3 = pair_set_epi32(4 * sinpi_3_9, 0); + const __m128i pair_c4 = pair_set_epi32(4 * sinpi_4_9, 0); + __m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], t0[2], t1[2], t2[2]; + __m128i temp[2]; + + transpose_32bit_4x4(io, io); + + extend_64bit(io[0], temp); + s0[0] = _mm_mul_epi32(pair_c1, temp[0]); + s0[1] = _mm_mul_epi32(pair_c1, temp[1]); + s1[0] = _mm_mul_epi32(pair_c2, temp[0]); + s1[1] = _mm_mul_epi32(pair_c2, temp[1]); + + extend_64bit(io[1], temp); + s2[0] = _mm_mul_epi32(pair_c3, temp[0]); + s2[1] = _mm_mul_epi32(pair_c3, temp[1]); + + extend_64bit(io[2], temp); + s3[0] = _mm_mul_epi32(pair_c4, temp[0]); + s3[1] = _mm_mul_epi32(pair_c4, temp[1]); + s4[0] = _mm_mul_epi32(pair_c1, temp[0]); + s4[1] = _mm_mul_epi32(pair_c1, temp[1]); + + extend_64bit(io[3], temp); + s5[0] = _mm_mul_epi32(pair_c2, temp[0]); + s5[1] = _mm_mul_epi32(pair_c2, temp[1]); + s6[0] = _mm_mul_epi32(pair_c4, temp[0]); + s6[1] = _mm_mul_epi32(pair_c4, temp[1]); + + t0[0] = _mm_add_epi64(s0[0], s3[0]); + t0[1] = _mm_add_epi64(s0[1], s3[1]); + t0[0] = _mm_add_epi64(t0[0], s5[0]); + t0[1] = _mm_add_epi64(t0[1], s5[1]); + t1[0] = _mm_sub_epi64(s1[0], s4[0]); + t1[1] = _mm_sub_epi64(s1[1], s4[1]); + t1[0] = _mm_sub_epi64(t1[0], s6[0]); + t1[1] = _mm_sub_epi64(t1[1], s6[1]); + temp[0] = _mm_sub_epi32(io[0], io[2]); + temp[0] = _mm_add_epi32(temp[0], io[3]); + extend_64bit(temp[0], temp); + t2[0] = _mm_mul_epi32(pair_c3, temp[0]); + t2[1] = _mm_mul_epi32(pair_c3, temp[1]); + + s0[0] = _mm_add_epi64(t0[0], s2[0]); + s0[1] = _mm_add_epi64(t0[1], s2[1]); + s1[0] = _mm_add_epi64(t1[0], s2[0]); + s1[1] = _mm_add_epi64(t1[1], s2[1]); + s3[0] = _mm_add_epi64(t0[0], t1[0]); + s3[1] = _mm_add_epi64(t0[1], t1[1]); + s3[0] = _mm_sub_epi64(s3[0], s2[0]); + s3[1] = _mm_sub_epi64(s3[1], s2[1]); + + s0[0] = dct_const_round_shift_64bit(s0[0]); + s0[1] = dct_const_round_shift_64bit(s0[1]); + s1[0] = dct_const_round_shift_64bit(s1[0]); + s1[1] = dct_const_round_shift_64bit(s1[1]); + s2[0] = dct_const_round_shift_64bit(t2[0]); + s2[1] = dct_const_round_shift_64bit(t2[1]); + s3[0] = dct_const_round_shift_64bit(s3[0]); + s3[1] = dct_const_round_shift_64bit(s3[1]); + io[0] = pack_4(s0[0], s0[1]); + io[1] = pack_4(s1[0], s1[1]); + io[2] = pack_4(s2[0], s2[1]); + io[3] = pack_4(s3[0], s3[1]); +} + +void vp9_highbd_iht4x4_16_add_sse4_1(const tran_low_t *input, uint16_t *dest, + int stride, int tx_type, int bd) { + __m128i io[4]; + + io[0] = _mm_load_si128((const __m128i *)(input + 0)); + io[1] = _mm_load_si128((const __m128i *)(input + 4)); + io[2] = _mm_load_si128((const __m128i *)(input + 8)); + io[3] = _mm_load_si128((const __m128i *)(input + 12)); + + if (bd == 8) { + __m128i io_short[2]; + + io_short[0] = _mm_packs_epi32(io[0], io[1]); + io_short[1] = _mm_packs_epi32(io[2], io[3]); + if (tx_type == DCT_DCT || tx_type == ADST_DCT) { + idct4_sse2(io_short); + } else { + iadst4_sse2(io_short); + } + if (tx_type == DCT_DCT || tx_type == DCT_ADST) { + idct4_sse2(io_short); + } else { + iadst4_sse2(io_short); + } + io_short[0] = _mm_add_epi16(io_short[0], _mm_set1_epi16(8)); + io_short[1] = _mm_add_epi16(io_short[1], _mm_set1_epi16(8)); + io[0] = _mm_srai_epi16(io_short[0], 4); + io[1] = _mm_srai_epi16(io_short[1], 4); + } else { + if (tx_type == DCT_DCT || tx_type == ADST_DCT) { + highbd_idct4_sse4_1(io); + } else { + highbd_iadst4_sse4_1(io); + } + if (tx_type == DCT_DCT || tx_type == DCT_ADST) { + highbd_idct4_sse4_1(io); + } else { + highbd_iadst4_sse4_1(io); + } + io[0] = wraplow_16bit_shift4(io[0], io[1], _mm_set1_epi32(8)); + io[1] = wraplow_16bit_shift4(io[2], io[3], _mm_set1_epi32(8)); + } + + recon_and_store_4x4(io, dest, stride, bd); +} diff --git a/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c b/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c new file mode 100644 index 000000000000..7d949b6dbcee --- /dev/null +++ b/media/libvpx/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vp9_rtcd.h" +#include "vp9/common/vp9_idct.h" +#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h" +#include "vpx_dsp/x86/inv_txfm_sse2.h" +#include "vpx_dsp/x86/transpose_sse2.h" +#include "vpx_dsp/x86/txfm_common_sse2.h" + +static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in, + const int c, + __m128i *const s) { + const __m128i pair_c = pair_set_epi32(4 * c, 0); + __m128i x[2]; + + extend_64bit(in, x); + s[0] = _mm_mul_epi32(pair_c, x[0]); + s[1] = _mm_mul_epi32(pair_c, x[1]); +} + +static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0, + const __m128i in1, + const int c0, const int c1, + __m128i *const s0, + __m128i *const s1) { + const __m128i pair_c0 = pair_set_epi32(4 * c0, 0); + const __m128i pair_c1 = pair_set_epi32(4 * c1, 0); + __m128i t00[2], t01[2], t10[2], t11[2]; + __m128i x0[2], x1[2]; + + extend_64bit(in0, x0); + extend_64bit(in1, x1); + t00[0] = _mm_mul_epi32(pair_c0, x0[0]); + t00[1] = _mm_mul_epi32(pair_c0, x0[1]); + t01[0] = _mm_mul_epi32(pair_c0, x1[0]); + t01[1] = _mm_mul_epi32(pair_c0, x1[1]); + t10[0] = _mm_mul_epi32(pair_c1, x0[0]); + t10[1] = _mm_mul_epi32(pair_c1, x0[1]); + t11[0] = _mm_mul_epi32(pair_c1, x1[0]); + t11[1] = _mm_mul_epi32(pair_c1, x1[1]); + + s0[0] = _mm_add_epi64(t00[0], t11[0]); + s0[1] = _mm_add_epi64(t00[1], t11[1]); + s1[0] = _mm_sub_epi64(t10[0], t01[0]); + s1[1] = _mm_sub_epi64(t10[1], t01[1]); +} + +static void highbd_iadst8_sse4_1(__m128i *const io) { + __m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2]; + __m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2]; + + transpose_32bit_4x4x2(io, io); + + // stage 1 + highbd_iadst_butterfly_sse4_1(io[7], io[0], cospi_2_64, cospi_30_64, s0, s1); + highbd_iadst_butterfly_sse4_1(io[3], io[4], cospi_18_64, cospi_14_64, s4, s5); + x0[0] = _mm_add_epi64(s0[0], s4[0]); + x0[1] = _mm_add_epi64(s0[1], s4[1]); + x1[0] = _mm_add_epi64(s1[0], s5[0]); + x1[1] = _mm_add_epi64(s1[1], s5[1]); + x4[0] = _mm_sub_epi64(s0[0], s4[0]); + x4[1] = _mm_sub_epi64(s0[1], s4[1]); + x5[0] = _mm_sub_epi64(s1[0], s5[0]); + x5[1] = _mm_sub_epi64(s1[1], s5[1]); + + highbd_iadst_butterfly_sse4_1(io[5], io[2], cospi_10_64, cospi_22_64, s2, s3); + highbd_iadst_butterfly_sse4_1(io[1], io[6], cospi_26_64, cospi_6_64, s6, s7); + x2[0] = _mm_add_epi64(s2[0], s6[0]); + x2[1] = _mm_add_epi64(s2[1], s6[1]); + x3[0] = _mm_add_epi64(s3[0], s7[0]); + x3[1] = _mm_add_epi64(s3[1], s7[1]); + x6[0] = _mm_sub_epi64(s2[0], s6[0]); + x6[1] = _mm_sub_epi64(s2[1], s6[1]); + x7[0] = _mm_sub_epi64(s3[0], s7[0]); + x7[1] = _mm_sub_epi64(s3[1], s7[1]); + + x0[0] = dct_const_round_shift_64bit(x0[0]); + x0[1] = dct_const_round_shift_64bit(x0[1]); + x1[0] = dct_const_round_shift_64bit(x1[0]); + x1[1] = dct_const_round_shift_64bit(x1[1]); + x2[0] = dct_const_round_shift_64bit(x2[0]); + x2[1] = dct_const_round_shift_64bit(x2[1]); + x3[0] = dct_const_round_shift_64bit(x3[0]); + x3[1] = dct_const_round_shift_64bit(x3[1]); + x4[0] = dct_const_round_shift_64bit(x4[0]); + x4[1] = dct_const_round_shift_64bit(x4[1]); + x5[0] = dct_const_round_shift_64bit(x5[0]); + x5[1] = dct_const_round_shift_64bit(x5[1]); + x6[0] = dct_const_round_shift_64bit(x6[0]); + x6[1] = dct_const_round_shift_64bit(x6[1]); + x7[0] = dct_const_round_shift_64bit(x7[0]); + x7[1] = dct_const_round_shift_64bit(x7[1]); + s0[0] = pack_4(x0[0], x0[1]); // s0 = x0; + s1[0] = pack_4(x1[0], x1[1]); // s1 = x1; + s2[0] = pack_4(x2[0], x2[1]); // s2 = x2; + s3[0] = pack_4(x3[0], x3[1]); // s3 = x3; + x4[0] = pack_4(x4[0], x4[1]); + x5[0] = pack_4(x5[0], x5[1]); + x6[0] = pack_4(x6[0], x6[1]); + x7[0] = pack_4(x7[0], x7[1]); + + // stage 2 + x0[0] = _mm_add_epi32(s0[0], s2[0]); + x1[0] = _mm_add_epi32(s1[0], s3[0]); + x2[0] = _mm_sub_epi32(s0[0], s2[0]); + x3[0] = _mm_sub_epi32(s1[0], s3[0]); + + highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5); + highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6); + + x4[0] = _mm_add_epi64(s4[0], s6[0]); + x4[1] = _mm_add_epi64(s4[1], s6[1]); + x5[0] = _mm_add_epi64(s5[0], s7[0]); + x5[1] = _mm_add_epi64(s5[1], s7[1]); + x6[0] = _mm_sub_epi64(s4[0], s6[0]); + x6[1] = _mm_sub_epi64(s4[1], s6[1]); + x7[0] = _mm_sub_epi64(s5[0], s7[0]); + x7[1] = _mm_sub_epi64(s5[1], s7[1]); + x4[0] = dct_const_round_shift_64bit(x4[0]); + x4[1] = dct_const_round_shift_64bit(x4[1]); + x5[0] = dct_const_round_shift_64bit(x5[0]); + x5[1] = dct_const_round_shift_64bit(x5[1]); + x6[0] = dct_const_round_shift_64bit(x6[0]); + x6[1] = dct_const_round_shift_64bit(x6[1]); + x7[0] = dct_const_round_shift_64bit(x7[0]); + x7[1] = dct_const_round_shift_64bit(x7[1]); + x4[0] = pack_4(x4[0], x4[1]); + x5[0] = pack_4(x5[0], x5[1]); + x6[0] = pack_4(x6[0], x6[1]); + x7[0] = pack_4(x7[0], x7[1]); + + // stage 3 + s2[0] = _mm_add_epi32(x2[0], x3[0]); + s3[0] = _mm_sub_epi32(x2[0], x3[0]); + s6[0] = _mm_add_epi32(x6[0], x7[0]); + s7[0] = _mm_sub_epi32(x6[0], x7[0]); + highbd_iadst_half_butterfly_sse4_1(s2[0], cospi_16_64, s2); + highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3); + highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6); + highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7); + + x2[0] = dct_const_round_shift_64bit(s2[0]); + x2[1] = dct_const_round_shift_64bit(s2[1]); + x3[0] = dct_const_round_shift_64bit(s3[0]); + x3[1] = dct_const_round_shift_64bit(s3[1]); + x6[0] = dct_const_round_shift_64bit(s6[0]); + x6[1] = dct_const_round_shift_64bit(s6[1]); + x7[0] = dct_const_round_shift_64bit(s7[0]); + x7[1] = dct_const_round_shift_64bit(s7[1]); + x2[0] = pack_4(x2[0], x2[1]); + x3[0] = pack_4(x3[0], x3[1]); + x6[0] = pack_4(x6[0], x6[1]); + x7[0] = pack_4(x7[0], x7[1]); + + io[0] = x0[0]; + io[1] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]); + io[2] = x6[0]; + io[3] = _mm_sub_epi32(_mm_setzero_si128(), x2[0]); + io[4] = x3[0]; + io[5] = _mm_sub_epi32(_mm_setzero_si128(), x7[0]); + io[6] = x5[0]; + io[7] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]); +} + +void vp9_highbd_iht8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest, + int stride, int tx_type, int bd) { + __m128i io[16]; + + io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0)); + io[4] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 4)); + io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0)); + io[5] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 4)); + io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0)); + io[6] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 4)); + io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0)); + io[7] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 4)); + io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0)); + io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4)); + io[9] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 0)); + io[13] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 4)); + io[10] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 0)); + io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4)); + io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0)); + io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4)); + + if (bd == 8) { + __m128i io_short[8]; + + io_short[0] = _mm_packs_epi32(io[0], io[4]); + io_short[1] = _mm_packs_epi32(io[1], io[5]); + io_short[2] = _mm_packs_epi32(io[2], io[6]); + io_short[3] = _mm_packs_epi32(io[3], io[7]); + io_short[4] = _mm_packs_epi32(io[8], io[12]); + io_short[5] = _mm_packs_epi32(io[9], io[13]); + io_short[6] = _mm_packs_epi32(io[10], io[14]); + io_short[7] = _mm_packs_epi32(io[11], io[15]); + + if (tx_type == DCT_DCT || tx_type == ADST_DCT) { + vpx_idct8_sse2(io_short); + } else { + iadst8_sse2(io_short); + } + if (tx_type == DCT_DCT || tx_type == DCT_ADST) { + vpx_idct8_sse2(io_short); + } else { + iadst8_sse2(io_short); + } + round_shift_8x8(io_short, io); + } else { + __m128i temp[4]; + + if (tx_type == DCT_DCT || tx_type == ADST_DCT) { + vpx_highbd_idct8x8_half1d_sse4_1(io); + vpx_highbd_idct8x8_half1d_sse4_1(&io[8]); + } else { + highbd_iadst8_sse4_1(io); + highbd_iadst8_sse4_1(&io[8]); + } + + temp[0] = io[4]; + temp[1] = io[5]; + temp[2] = io[6]; + temp[3] = io[7]; + io[4] = io[8]; + io[5] = io[9]; + io[6] = io[10]; + io[7] = io[11]; + + if (tx_type == DCT_DCT || tx_type == DCT_ADST) { + vpx_highbd_idct8x8_half1d_sse4_1(io); + io[8] = temp[0]; + io[9] = temp[1]; + io[10] = temp[2]; + io[11] = temp[3]; + vpx_highbd_idct8x8_half1d_sse4_1(&io[8]); + } else { + highbd_iadst8_sse4_1(io); + io[8] = temp[0]; + io[9] = temp[1]; + io[10] = temp[2]; + io[11] = temp[3]; + highbd_iadst8_sse4_1(&io[8]); + } + highbd_idct8x8_final_round(io); + } + recon_and_store_8x8(io, dest, stride, bd); +} diff --git a/media/libvpx/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/media/libvpx/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c index 6996260e261a..ad693718c0ed 100644 --- a/media/libvpx/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/media/libvpx/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -10,8 +10,6 @@ #include "./vp9_rtcd.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" -#include "vpx_dsp/x86/txfm_common_sse2.h" -#include "vpx_ports/mem.h" void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { @@ -22,23 +20,23 @@ void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, in[1] = load_input_data8(input + 8); switch (tx_type) { - case 0: // DCT_DCT + case DCT_DCT: idct4_sse2(in); idct4_sse2(in); break; - case 1: // ADST_DCT + case ADST_DCT: idct4_sse2(in); iadst4_sse2(in); break; - case 2: // DCT_ADST + case DCT_ADST: iadst4_sse2(in); idct4_sse2(in); break; - case 3: // ADST_ADST + default: + assert(tx_type == ADST_ADST); iadst4_sse2(in); iadst4_sse2(in); break; - default: assert(0); break; } // Final round and shift @@ -67,23 +65,23 @@ void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, in[7] = load_input_data8(input + 8 * 7); switch (tx_type) { - case 0: // DCT_DCT - idct8_sse2(in); - idct8_sse2(in); + case DCT_DCT: + vpx_idct8_sse2(in); + vpx_idct8_sse2(in); break; - case 1: // ADST_DCT - idct8_sse2(in); + case ADST_DCT: + vpx_idct8_sse2(in); iadst8_sse2(in); break; - case 2: // DCT_ADST + case DCT_ADST: iadst8_sse2(in); - idct8_sse2(in); + vpx_idct8_sse2(in); break; - case 3: // ADST_ADST + default: + assert(tx_type == ADST_ADST); iadst8_sse2(in); iadst8_sse2(in); break; - default: assert(0); break; } // Final rounding and shift @@ -201,23 +199,23 @@ void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, load_buffer_8x16(input, in1); switch (tx_type) { - case 0: // DCT_DCT + case DCT_DCT: idct16_sse2(in0, in1); idct16_sse2(in0, in1); break; - case 1: // ADST_DCT + case ADST_DCT: idct16_sse2(in0, in1); iadst16_sse2(in0, in1); break; - case 2: // DCT_ADST + case DCT_ADST: iadst16_sse2(in0, in1); idct16_sse2(in0, in1); break; - case 3: // ADST_ADST + default: + assert(tx_type == ADST_ADST); iadst16_sse2(in0, in1); iadst16_sse2(in0, in1); break; - default: assert(0); break; } write_buffer_8x16(dest, in0, stride); diff --git a/media/libvpx/libvpx/vp9/decoder/vp9_decodeframe.c b/media/libvpx/libvpx/vp9/decoder/vp9_decodeframe.c index d0e896c13fce..e8b38699478a 100644 --- a/media/libvpx/libvpx/vp9/decoder/vp9_decodeframe.c +++ b/media/libvpx/libvpx/vp9/decoder/vp9_decodeframe.c @@ -23,6 +23,9 @@ #include "vpx_ports/mem_ops.h" #include "vpx_scale/vpx_scale.h" #include "vpx_util/vpx_thread.h" +#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG +#include "vpx_util/vpx_debug_util.h" +#endif // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_common.h" @@ -42,34 +45,15 @@ #include "vp9/decoder/vp9_decodemv.h" #include "vp9/decoder/vp9_decoder.h" #include "vp9/decoder/vp9_dsubexp.h" +#include "vp9/decoder/vp9_job_queue.h" #define MAX_VP9_HEADER_SIZE 80 -static int is_compound_reference_allowed(const VP9_COMMON *cm) { - int i; - for (i = 1; i < REFS_PER_FRAME; ++i) - if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) return 1; +typedef int (*predict_recon_func)(TileWorkerData *twd, MODE_INFO *const mi, + int plane, int row, int col, TX_SIZE tx_size); - return 0; -} - -static void setup_compound_reference_mode(VP9_COMMON *cm) { - if (cm->ref_frame_sign_bias[LAST_FRAME] == - cm->ref_frame_sign_bias[GOLDEN_FRAME]) { - cm->comp_fixed_ref = ALTREF_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = GOLDEN_FRAME; - } else if (cm->ref_frame_sign_bias[LAST_FRAME] == - cm->ref_frame_sign_bias[ALTREF_FRAME]) { - cm->comp_fixed_ref = GOLDEN_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = ALTREF_FRAME; - } else { - cm->comp_fixed_ref = LAST_FRAME; - cm->comp_var_ref[0] = GOLDEN_FRAME; - cm->comp_var_ref[1] = ALTREF_FRAME; - } -} +typedef void (*intra_recon_func)(TileWorkerData *twd, MODE_INFO *const mi, + int plane, int row, int col, TX_SIZE tx_size); static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) { return len != 0 && len <= (size_t)(end - start); @@ -118,7 +102,7 @@ static void read_inter_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) { static REFERENCE_MODE read_frame_reference_mode(const VP9_COMMON *cm, vpx_reader *r) { - if (is_compound_reference_allowed(cm)) { + if (vp9_compound_reference_allowed(cm)) { return vpx_read_bit(r) ? (vpx_read_bit(r) ? REFERENCE_MODE_SELECT : COMPOUND_REFERENCE) : SINGLE_REFERENCE; @@ -351,20 +335,121 @@ static void predict_and_reconstruct_intra_block(TileWorkerData *twd, } } +static void parse_intra_block_row_mt(TileWorkerData *twd, MODE_INFO *const mi, + int plane, int row, int col, + TX_SIZE tx_size) { + MACROBLOCKD *const xd = &twd->xd; + PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode; + + if (mi->sb_type < BLOCK_8X8) + if (plane == 0) mode = xd->mi[0]->bmi[(row << 1) + col].as_mode; + + if (!mi->skip) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const TX_TYPE tx_type = + (plane || xd->lossless) ? DCT_DCT : intra_mode_to_tx_type_lookup[mode]; + const scan_order *sc = (plane || xd->lossless) + ? &vp9_default_scan_orders[tx_size] + : &vp9_scan_orders[tx_size][tx_type]; + *pd->eob = vp9_decode_block_tokens(twd, plane, sc, col, row, tx_size, + mi->segment_id); + /* Keep the alignment to 16 */ + pd->dqcoeff += (16 << (tx_size << 1)); + pd->eob++; + } +} + +static void predict_and_reconstruct_intra_block_row_mt(TileWorkerData *twd, + MODE_INFO *const mi, + int plane, int row, + int col, + TX_SIZE tx_size) { + MACROBLOCKD *const xd = &twd->xd; + struct macroblockd_plane *const pd = &xd->plane[plane]; + PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode; + uint8_t *dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; + + if (mi->sb_type < BLOCK_8X8) + if (plane == 0) mode = xd->mi[0]->bmi[(row << 1) + col].as_mode; + + vp9_predict_intra_block(xd, pd->n4_wl, tx_size, mode, dst, pd->dst.stride, + dst, pd->dst.stride, col, row, plane); + + if (!mi->skip) { + const TX_TYPE tx_type = + (plane || xd->lossless) ? DCT_DCT : intra_mode_to_tx_type_lookup[mode]; + if (*pd->eob > 0) { + inverse_transform_block_intra(xd, plane, tx_type, tx_size, dst, + pd->dst.stride, *pd->eob); + } + /* Keep the alignment to 16 */ + pd->dqcoeff += (16 << (tx_size << 1)); + pd->eob++; + } +} + static int reconstruct_inter_block(TileWorkerData *twd, MODE_INFO *const mi, - int plane, int row, int col, - TX_SIZE tx_size) { + int plane, int row, int col, TX_SIZE tx_size, + int mi_row, int mi_col) { + MACROBLOCKD *const xd = &twd->xd; + struct macroblockd_plane *const pd = &xd->plane[plane]; + const scan_order *sc = &vp9_default_scan_orders[tx_size]; + const int eob = vp9_decode_block_tokens(twd, plane, sc, col, row, tx_size, + mi->segment_id); + uint8_t *dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; + + if (eob > 0) { + inverse_transform_block_inter(xd, plane, tx_size, dst, pd->dst.stride, eob); + } +#if CONFIG_MISMATCH_DEBUG + { + int pixel_c, pixel_r; + int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2); + int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2); + mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row, + pd->subsampling_x, pd->subsampling_y); + mismatch_check_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r, blk_w, + blk_h, xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); + } +#else + (void)mi_row; + (void)mi_col; +#endif + return eob; +} + +static int parse_inter_block_row_mt(TileWorkerData *twd, MODE_INFO *const mi, + int plane, int row, int col, + TX_SIZE tx_size) { MACROBLOCKD *const xd = &twd->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; const scan_order *sc = &vp9_default_scan_orders[tx_size]; const int eob = vp9_decode_block_tokens(twd, plane, sc, col, row, tx_size, mi->segment_id); + *pd->eob = eob; + pd->dqcoeff += (16 << (tx_size << 1)); + pd->eob++; + + return eob; +} + +static int reconstruct_inter_block_row_mt(TileWorkerData *twd, + MODE_INFO *const mi, int plane, + int row, int col, TX_SIZE tx_size) { + MACROBLOCKD *const xd = &twd->xd; + struct macroblockd_plane *const pd = &xd->plane[plane]; + const int eob = *pd->eob; + + (void)mi; if (eob > 0) { inverse_transform_block_inter( xd, plane, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col], pd->dst.stride, eob); } + pd->dqcoeff += (16 << (tx_size << 1)); + pd->eob++; + return eob; } @@ -444,16 +529,15 @@ static void high_build_mc_border(const uint8_t *src8, int src_stride, #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH -static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, - int x0, int y0, int b_w, int b_h, - int frame_width, int frame_height, +static void extend_and_predict(TileWorkerData *twd, const uint8_t *buf_ptr1, + int pre_buf_stride, int x0, int y0, int b_w, + int b_h, int frame_width, int frame_height, int border_offset, uint8_t *const dst, int dst_buf_stride, int subpel_x, int subpel_y, const InterpKernel *kernel, const struct scale_factors *sf, MACROBLOCKD *xd, int w, int h, int ref, int xs, int ys) { - DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]); - + uint16_t *mc_buf_high = twd->extend_and_predict_buf; if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { high_build_mc_border(buf_ptr1, pre_buf_stride, mc_buf_high, b_w, x0, y0, b_w, b_h, frame_width, frame_height); @@ -469,15 +553,15 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, } } #else -static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, - int x0, int y0, int b_w, int b_h, - int frame_width, int frame_height, +static void extend_and_predict(TileWorkerData *twd, const uint8_t *buf_ptr1, + int pre_buf_stride, int x0, int y0, int b_w, + int b_h, int frame_width, int frame_height, int border_offset, uint8_t *const dst, int dst_buf_stride, int subpel_x, int subpel_y, const InterpKernel *kernel, const struct scale_factors *sf, int w, int h, int ref, int xs, int ys) { - DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]); + uint8_t *mc_buf = (uint8_t *)twd->extend_and_predict_buf; const uint8_t *buf_ptr; build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w, x0, y0, b_w, b_h, @@ -490,8 +574,8 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, #endif // CONFIG_VP9_HIGHBITDEPTH static void dec_build_inter_predictors( - MACROBLOCKD *xd, int plane, int bw, int bh, int x, int y, int w, int h, - int mi_x, int mi_y, const InterpKernel *kernel, + TileWorkerData *twd, MACROBLOCKD *xd, int plane, int bw, int bh, int x, + int y, int w, int h, int mi_x, int mi_y, const InterpKernel *kernel, const struct scale_factors *sf, struct buf_2d *pre_buf, struct buf_2d *dst_buf, const MV *mv, RefCntBuffer *ref_frame_buf, int is_scaled, int ref) { @@ -602,9 +686,9 @@ static void dec_build_inter_predictors( const int b_h = y1 - y0 + 1; const int border_offset = y_pad * 3 * b_w + x_pad * 3; - extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h, frame_width, - frame_height, border_offset, dst, dst_buf->stride, - subpel_x, subpel_y, kernel, sf, + extend_and_predict(twd, buf_ptr1, buf_stride, x0, y0, b_w, b_h, + frame_width, frame_height, border_offset, dst, + dst_buf->stride, subpel_x, subpel_y, kernel, sf, #if CONFIG_VP9_HIGHBITDEPTH xd, #endif @@ -627,7 +711,8 @@ static void dec_build_inter_predictors( #endif // CONFIG_VP9_HIGHBITDEPTH } -static void dec_build_inter_predictors_sb(VP9Decoder *const pbi, +static void dec_build_inter_predictors_sb(TileWorkerData *twd, + VP9Decoder *const pbi, MACROBLOCKD *xd, int mi_row, int mi_col) { int plane; @@ -670,10 +755,10 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi, for (y = 0; y < num_4x4_h; ++y) { for (x = 0; x < num_4x4_w; ++x) { const MV mv = average_split_mvs(pd, mi, ref, i++); - dec_build_inter_predictors(xd, plane, n4w_x4, n4h_x4, 4 * x, 4 * y, - 4, 4, mi_x, mi_y, kernel, sf, pre_buf, - dst_buf, &mv, ref_frame_buf, is_scaled, - ref); + dec_build_inter_predictors(twd, xd, plane, n4w_x4, n4h_x4, 4 * x, + 4 * y, 4, 4, mi_x, mi_y, kernel, sf, + pre_buf, dst_buf, &mv, ref_frame_buf, + is_scaled, ref); } } } @@ -687,7 +772,7 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi, const int n4w_x4 = 4 * num_4x4_w; const int n4h_x4 = 4 * num_4x4_h; struct buf_2d *const pre_buf = &pd->pre[ref]; - dec_build_inter_predictors(xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4, + dec_build_inter_predictors(twd, xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4, n4h_x4, mi_x, mi_y, kernel, sf, pre_buf, dst_buf, &mv, ref_frame_buf, is_scaled, ref); } @@ -715,6 +800,25 @@ static void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh, int bwl, } } +static MODE_INFO *set_offsets_recon(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int mi_row, int mi_col, int bw, int bh, + int bwl, int bhl) { + const int offset = mi_row * cm->mi_stride + mi_col; + const TileInfo *const tile = &xd->tile; + xd->mi = cm->mi_grid_visible + offset; + + set_plane_n4(xd, bw, bh, bwl, bhl); + + set_skip_context(xd, mi_row, mi_col); + + // Distance of Mb to the various image edges. These are specified to 8th pel + // as they are always compared to values that are in 1/8th pel units + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); + + vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); + return xd->mi[0]; +} + static MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, BLOCK_SIZE bsize, int mi_row, int mi_col, int bw, int bh, int x_mis, int y_mis, int bwl, int bhl) { @@ -744,6 +848,66 @@ static MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, return xd->mi[0]; } +static INLINE int predict_recon_inter(MACROBLOCKD *xd, MODE_INFO *mi, + TileWorkerData *twd, + predict_recon_func func) { + int eobtotal = 0; + int plane; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + const int step = (1 << tx_size); + int row, col; + const int max_blocks_wide = + num_4x4_w + (xd->mb_to_right_edge >= 0 + ? 0 + : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = + num_4x4_h + (xd->mb_to_bottom_edge >= 0 + ? 0 + : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; + xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; + + for (row = 0; row < max_blocks_high; row += step) + for (col = 0; col < max_blocks_wide; col += step) + eobtotal += func(twd, mi, plane, row, col, tx_size); + } + return eobtotal; +} + +static INLINE void predict_recon_intra(MACROBLOCKD *xd, MODE_INFO *mi, + TileWorkerData *twd, + intra_recon_func func) { + int plane; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + const int step = (1 << tx_size); + int row, col; + const int max_blocks_wide = + num_4x4_w + (xd->mb_to_right_edge >= 0 + ? 0 + : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = + num_4x4_h + (xd->mb_to_bottom_edge >= 0 + ? 0 + : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; + xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; + + for (row = 0; row < max_blocks_high; row += step) + for (col = 0; col < max_blocks_wide; col += step) + func(twd, mi, plane, row, col, tx_size); + } +} + static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, int mi_col, BLOCK_SIZE bsize, int bwl, int bhl) { VP9_COMMON *const cm = &pbi->common; @@ -800,7 +964,25 @@ static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, } } else { // Prediction - dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); + dec_build_inter_predictors_sb(twd, pbi, xd, mi_row, mi_col); +#if CONFIG_MISMATCH_DEBUG + { + int plane; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *pd = &xd->plane[plane]; + int pixel_c, pixel_r; + const BLOCK_SIZE plane_bsize = + get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), &xd->plane[plane]); + const int bw = get_block_width(plane_bsize); + const int bh = get_block_height(plane_bsize); + mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, + pd->subsampling_x, pd->subsampling_y); + mismatch_check_block_pre(pd->dst.buf, pd->dst.stride, plane, pixel_c, + pixel_r, bw, bh, + xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); + } + } +#endif // Reconstruction if (!mi->skip) { @@ -829,8 +1011,8 @@ static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, for (row = 0; row < max_blocks_high; row += step) for (col = 0; col < max_blocks_wide; col += step) - eobtotal += - reconstruct_inter_block(twd, mi, plane, row, col, tx_size); + eobtotal += reconstruct_inter_block(twd, mi, plane, row, col, + tx_size, mi_row, mi_col); } if (!less8x8 && eobtotal == 0) mi->skip = 1; // skip loopfilter @@ -844,6 +1026,98 @@ static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, } } +static void recon_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, + int mi_col, BLOCK_SIZE bsize, int bwl, int bhl) { + VP9_COMMON *const cm = &pbi->common; + const int bw = 1 << (bwl - 1); + const int bh = 1 << (bhl - 1); + MACROBLOCKD *const xd = &twd->xd; + + MODE_INFO *mi = set_offsets_recon(cm, xd, mi_row, mi_col, bw, bh, bwl, bhl); + + if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { + const BLOCK_SIZE uv_subsize = + ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y]; + if (uv_subsize == BLOCK_INVALID) + vpx_internal_error(xd->error_info, VPX_CODEC_CORRUPT_FRAME, + "Invalid block size."); + } + + if (!is_inter_block(mi)) { + predict_recon_intra(xd, mi, twd, + predict_and_reconstruct_intra_block_row_mt); + } else { + // Prediction + dec_build_inter_predictors_sb(twd, pbi, xd, mi_row, mi_col); + + // Reconstruction + if (!mi->skip) { + predict_recon_inter(xd, mi, twd, reconstruct_inter_block_row_mt); + } + } + + vp9_build_mask(cm, mi, mi_row, mi_col, bw, bh); +} + +static void parse_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, + int mi_col, BLOCK_SIZE bsize, int bwl, int bhl) { + VP9_COMMON *const cm = &pbi->common; + const int bw = 1 << (bwl - 1); + const int bh = 1 << (bhl - 1); + const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); + const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); + vpx_reader *r = &twd->bit_reader; + MACROBLOCKD *const xd = &twd->xd; + + MODE_INFO *mi = set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, + y_mis, bwl, bhl); + + if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { + const BLOCK_SIZE uv_subsize = + ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y]; + if (uv_subsize == BLOCK_INVALID) + vpx_internal_error(xd->error_info, VPX_CODEC_CORRUPT_FRAME, + "Invalid block size."); + } + + vp9_read_mode_info(twd, pbi, mi_row, mi_col, x_mis, y_mis); + + if (mi->skip) { + dec_reset_skip_context(xd); + } + + if (!is_inter_block(mi)) { + predict_recon_intra(xd, mi, twd, parse_intra_block_row_mt); + } else { + if (!mi->skip) { + tran_low_t *dqcoeff[MAX_MB_PLANE]; + int *eob[MAX_MB_PLANE]; + int plane; + int eobtotal; + // Based on eobtotal and bsize, this may be mi->skip may be set to true + // In that case dqcoeff and eob need to be backed up and restored as + // recon_block will not increment these pointers for skip cases + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + dqcoeff[plane] = pd->dqcoeff; + eob[plane] = pd->eob; + } + eobtotal = predict_recon_inter(xd, mi, twd, parse_inter_block_row_mt); + + if (bsize >= BLOCK_8X8 && eobtotal == 0) { + mi->skip = 1; // skip loopfilter + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + struct macroblockd_plane *pd = &xd->plane[plane]; + pd->dqcoeff = dqcoeff[plane]; + pd->eob = eob[plane]; + } + } + } + } + + xd->corrupted |= vpx_reader_has_error(r); +} + static INLINE int dec_partition_plane_context(TileWorkerData *twd, int mi_row, int mi_col, int bsl) { const PARTITION_CONTEXT *above_ctx = twd->xd.above_seg_context + mi_col; @@ -950,6 +1224,75 @@ static void decode_partition(TileWorkerData *twd, VP9Decoder *const pbi, dec_update_partition_context(twd, mi_row, mi_col, subsize, num_8x8_wh); } +static void process_partition(TileWorkerData *twd, VP9Decoder *const pbi, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int n4x4_l2, int parse_recon_flag, + process_block_fn_t process_block) { + VP9_COMMON *const cm = &pbi->common; + const int n8x8_l2 = n4x4_l2 - 1; + const int num_8x8_wh = 1 << n8x8_l2; + const int hbs = num_8x8_wh >> 1; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + const int has_rows = (mi_row + hbs) < cm->mi_rows; + const int has_cols = (mi_col + hbs) < cm->mi_cols; + MACROBLOCKD *const xd = &twd->xd; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; + + if (parse_recon_flag & PARSE) { + *xd->partition = + read_partition(twd, mi_row, mi_col, has_rows, has_cols, n8x8_l2); + } + + partition = *xd->partition; + xd->partition++; + + subsize = get_subsize(bsize, partition); + if (!hbs) { + // calculate bmode block dimensions (log 2) + xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT); + xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ); + process_block(twd, pbi, mi_row, mi_col, subsize, 1, 1); + } else { + switch (partition) { + case PARTITION_NONE: + process_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n4x4_l2); + break; + case PARTITION_HORZ: + process_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n8x8_l2); + if (has_rows) + process_block(twd, pbi, mi_row + hbs, mi_col, subsize, n4x4_l2, + n8x8_l2); + break; + case PARTITION_VERT: + process_block(twd, pbi, mi_row, mi_col, subsize, n8x8_l2, n4x4_l2); + if (has_cols) + process_block(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2, + n4x4_l2); + break; + case PARTITION_SPLIT: + process_partition(twd, pbi, mi_row, mi_col, subsize, n8x8_l2, + parse_recon_flag, process_block); + process_partition(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2, + parse_recon_flag, process_block); + process_partition(twd, pbi, mi_row + hbs, mi_col, subsize, n8x8_l2, + parse_recon_flag, process_block); + process_partition(twd, pbi, mi_row + hbs, mi_col + hbs, subsize, + n8x8_l2, parse_recon_flag, process_block); + break; + default: assert(0 && "Invalid partition type"); + } + } + + if (parse_recon_flag & PARSE) { + // update partition context + if ((bsize == BLOCK_8X8 || partition != PARTITION_SPLIT) && + bsize >= BLOCK_8X8) + dec_update_partition_context(twd, mi_row, mi_col, subsize, num_8x8_wh); + } +} + static void setup_token_decoder(const uint8_t *data, const uint8_t *data_end, size_t read_size, struct vpx_internal_error_info *error_info, @@ -1148,9 +1491,15 @@ static void resize_context_buffers(VP9_COMMON *cm, int width, int height) { // Allocations in vp9_alloc_context_buffers() depend on individual // dimensions as well as the overall size. if (new_mi_cols > cm->mi_cols || new_mi_rows > cm->mi_rows) { - if (vp9_alloc_context_buffers(cm, width, height)) + if (vp9_alloc_context_buffers(cm, width, height)) { + // The cm->mi_* values have been cleared and any existing context + // buffers have been freed. Clear cm->width and cm->height to be + // consistent and to force a realloc next time. + cm->width = 0; + cm->height = 0; vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate context buffers"); + } } else { vp9_set_mb_mi(cm, width, height); } @@ -1348,6 +1697,321 @@ static void get_tile_buffers(VP9Decoder *pbi, const uint8_t *data, } } +static void map_write(RowMTWorkerData *const row_mt_worker_data, int map_idx, + int sync_idx) { +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&row_mt_worker_data->recon_sync_mutex[sync_idx]); + row_mt_worker_data->recon_map[map_idx] = 1; + pthread_cond_signal(&row_mt_worker_data->recon_sync_cond[sync_idx]); + pthread_mutex_unlock(&row_mt_worker_data->recon_sync_mutex[sync_idx]); +#else + (void)row_mt_worker_data; + (void)map_idx; + (void)sync_idx; +#endif // CONFIG_MULTITHREAD +} + +static void map_read(RowMTWorkerData *const row_mt_worker_data, int map_idx, + int sync_idx) { +#if CONFIG_MULTITHREAD + volatile int8_t *map = row_mt_worker_data->recon_map + map_idx; + pthread_mutex_t *const mutex = + &row_mt_worker_data->recon_sync_mutex[sync_idx]; + pthread_mutex_lock(mutex); + while (!(*map)) { + pthread_cond_wait(&row_mt_worker_data->recon_sync_cond[sync_idx], mutex); + } + pthread_mutex_unlock(mutex); +#else + (void)row_mt_worker_data; + (void)map_idx; + (void)sync_idx; +#endif // CONFIG_MULTITHREAD +} + +static int lpf_map_write_check(VP9LfSync *lf_sync, int row, int num_tile_cols) { + int return_val = 0; +#if CONFIG_MULTITHREAD + int corrupted; + pthread_mutex_lock(lf_sync->lf_mutex); + corrupted = lf_sync->corrupted; + pthread_mutex_unlock(lf_sync->lf_mutex); + if (!corrupted) { + pthread_mutex_lock(&lf_sync->recon_done_mutex[row]); + lf_sync->num_tiles_done[row] += 1; + if (num_tile_cols == lf_sync->num_tiles_done[row]) return_val = 1; + pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]); + } +#else + (void)lf_sync; + (void)row; + (void)num_tile_cols; +#endif + return return_val; +} + +static void vp9_tile_done(VP9Decoder *pbi) { +#if CONFIG_MULTITHREAD + int terminate; + RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; + const int all_parse_done = 1 << pbi->common.log2_tile_cols; + pthread_mutex_lock(&row_mt_worker_data->recon_done_mutex); + row_mt_worker_data->num_tiles_done++; + terminate = all_parse_done == row_mt_worker_data->num_tiles_done; + pthread_mutex_unlock(&row_mt_worker_data->recon_done_mutex); + if (terminate) { + vp9_jobq_terminate(&row_mt_worker_data->jobq); + } +#else + (void)pbi; +#endif +} + +static void vp9_jobq_alloc(VP9Decoder *pbi) { + VP9_COMMON *const cm = &pbi->common; + RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; + const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); + const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2; + const int tile_cols = 1 << cm->log2_tile_cols; + const size_t jobq_size = (tile_cols * sb_rows * 2 + sb_rows) * sizeof(Job); + + if (jobq_size > row_mt_worker_data->jobq_size) { + vpx_free(row_mt_worker_data->jobq_buf); + CHECK_MEM_ERROR(cm, row_mt_worker_data->jobq_buf, vpx_calloc(1, jobq_size)); + vp9_jobq_init(&row_mt_worker_data->jobq, row_mt_worker_data->jobq_buf, + jobq_size); + row_mt_worker_data->jobq_size = jobq_size; + } +} + +static void recon_tile_row(TileWorkerData *tile_data, VP9Decoder *pbi, + int mi_row, int is_last_row, VP9LfSync *lf_sync, + int cur_tile_col) { + VP9_COMMON *const cm = &pbi->common; + RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; + const int tile_cols = 1 << cm->log2_tile_cols; + const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2; + const int cur_sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; + int mi_col_start = tile_data->xd.tile.mi_col_start; + int mi_col_end = tile_data->xd.tile.mi_col_end; + int mi_col; + + vp9_zero(tile_data->xd.left_context); + vp9_zero(tile_data->xd.left_seg_context); + for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) { + const int c = mi_col >> MI_BLOCK_SIZE_LOG2; + int plane; + const int sb_num = (cur_sb_row * (aligned_cols >> MI_BLOCK_SIZE_LOG2) + c); + + // Top Dependency + if (cur_sb_row) { + map_read(row_mt_worker_data, ((cur_sb_row - 1) * sb_cols) + c, + ((cur_sb_row - 1) * tile_cols) + cur_tile_col); + } + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + tile_data->xd.plane[plane].eob = + row_mt_worker_data->eob[plane] + (sb_num << EOBS_PER_SB_LOG2); + tile_data->xd.plane[plane].dqcoeff = + row_mt_worker_data->dqcoeff[plane] + (sb_num << DQCOEFFS_PER_SB_LOG2); + } + tile_data->xd.partition = + row_mt_worker_data->partition + (sb_num * PARTITIONS_PER_SB); + process_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4, RECON, + recon_block); + if (cm->lf.filter_level && !cm->skip_loop_filter) { + // Queue LPF_JOB + int is_lpf_job_ready = 0; + + if (mi_col + MI_BLOCK_SIZE >= mi_col_end) { + // Checks if this row has been decoded in all tiles + is_lpf_job_ready = lpf_map_write_check(lf_sync, cur_sb_row, tile_cols); + + if (is_lpf_job_ready) { + Job lpf_job; + lpf_job.job_type = LPF_JOB; + if (cur_sb_row > 0) { + lpf_job.row_num = mi_row - MI_BLOCK_SIZE; + vp9_jobq_queue(&row_mt_worker_data->jobq, &lpf_job, + sizeof(lpf_job)); + } + if (is_last_row) { + lpf_job.row_num = mi_row; + vp9_jobq_queue(&row_mt_worker_data->jobq, &lpf_job, + sizeof(lpf_job)); + } + } + } + } + map_write(row_mt_worker_data, (cur_sb_row * sb_cols) + c, + (cur_sb_row * tile_cols) + cur_tile_col); + } +} + +static void parse_tile_row(TileWorkerData *tile_data, VP9Decoder *pbi, + int mi_row, int cur_tile_col, uint8_t **data_end) { + int mi_col; + VP9_COMMON *const cm = &pbi->common; + RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; + TileInfo *tile = &tile_data->xd.tile; + TileBuffer *const buf = &pbi->tile_buffers[cur_tile_col]; + const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); + + vp9_zero(tile_data->dqcoeff); + vp9_tile_init(tile, cm, 0, cur_tile_col); + + /* Update reader only at the beginning of each row in a tile */ + if (mi_row == 0) { + setup_token_decoder(buf->data, *data_end, buf->size, &tile_data->error_info, + &tile_data->bit_reader, pbi->decrypt_cb, + pbi->decrypt_state); + } + vp9_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff); + tile_data->xd.error_info = &tile_data->error_info; + + vp9_zero(tile_data->xd.left_context); + vp9_zero(tile_data->xd.left_seg_context); + for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; + mi_col += MI_BLOCK_SIZE) { + const int r = mi_row >> MI_BLOCK_SIZE_LOG2; + const int c = mi_col >> MI_BLOCK_SIZE_LOG2; + int plane; + const int sb_num = (r * (aligned_cols >> MI_BLOCK_SIZE_LOG2) + c); + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + tile_data->xd.plane[plane].eob = + row_mt_worker_data->eob[plane] + (sb_num << EOBS_PER_SB_LOG2); + tile_data->xd.plane[plane].dqcoeff = + row_mt_worker_data->dqcoeff[plane] + (sb_num << DQCOEFFS_PER_SB_LOG2); + } + tile_data->xd.partition = + row_mt_worker_data->partition + sb_num * PARTITIONS_PER_SB; + process_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4, PARSE, + parse_block); + } +} + +static int row_decode_worker_hook(void *arg1, void *arg2) { + ThreadData *const thread_data = (ThreadData *)arg1; + uint8_t **data_end = (uint8_t **)arg2; + VP9Decoder *const pbi = thread_data->pbi; + VP9_COMMON *const cm = &pbi->common; + RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; + const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); + const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2; + const int tile_cols = 1 << cm->log2_tile_cols; + Job job; + LFWorkerData *lf_data = thread_data->lf_data; + VP9LfSync *lf_sync = thread_data->lf_sync; + volatile int corrupted = 0; + TileWorkerData *volatile tile_data_recon = NULL; + + while (!vp9_jobq_dequeue(&row_mt_worker_data->jobq, &job, sizeof(job), 1)) { + int mi_col; + const int mi_row = job.row_num; + + if (job.job_type == LPF_JOB) { + lf_data->start = mi_row; + lf_data->stop = lf_data->start + MI_BLOCK_SIZE; + + if (cm->lf.filter_level && !cm->skip_loop_filter && + mi_row < cm->mi_rows) { + vp9_loopfilter_job(lf_data, lf_sync); + } + } else if (job.job_type == RECON_JOB) { + const int cur_sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; + const int is_last_row = sb_rows - 1 == cur_sb_row; + int mi_col_start, mi_col_end; + if (!tile_data_recon) + CHECK_MEM_ERROR(cm, tile_data_recon, + vpx_memalign(32, sizeof(TileWorkerData))); + + tile_data_recon->xd = pbi->mb; + vp9_tile_init(&tile_data_recon->xd.tile, cm, 0, job.tile_col); + vp9_init_macroblockd(cm, &tile_data_recon->xd, tile_data_recon->dqcoeff); + mi_col_start = tile_data_recon->xd.tile.mi_col_start; + mi_col_end = tile_data_recon->xd.tile.mi_col_end; + + if (setjmp(tile_data_recon->error_info.jmp)) { + const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2; + tile_data_recon->error_info.setjmp = 0; + corrupted = 1; + for (mi_col = mi_col_start; mi_col < mi_col_end; + mi_col += MI_BLOCK_SIZE) { + const int c = mi_col >> MI_BLOCK_SIZE_LOG2; + map_write(row_mt_worker_data, (cur_sb_row * sb_cols) + c, + (cur_sb_row * tile_cols) + job.tile_col); + } + if (is_last_row) { + vp9_tile_done(pbi); + } + continue; + } + + tile_data_recon->error_info.setjmp = 1; + tile_data_recon->xd.error_info = &tile_data_recon->error_info; + + recon_tile_row(tile_data_recon, pbi, mi_row, is_last_row, lf_sync, + job.tile_col); + + if (corrupted) + vpx_internal_error(&tile_data_recon->error_info, + VPX_CODEC_CORRUPT_FRAME, + "Failed to decode tile data"); + + if (is_last_row) { + vp9_tile_done(pbi); + } + } else if (job.job_type == PARSE_JOB) { + TileWorkerData *const tile_data = &pbi->tile_worker_data[job.tile_col]; + + if (setjmp(tile_data->error_info.jmp)) { + tile_data->error_info.setjmp = 0; + corrupted = 1; + vp9_tile_done(pbi); + continue; + } + + tile_data->xd = pbi->mb; + tile_data->xd.counts = + cm->frame_parallel_decoding_mode ? 0 : &tile_data->counts; + + tile_data->error_info.setjmp = 1; + + parse_tile_row(tile_data, pbi, mi_row, job.tile_col, data_end); + + corrupted |= tile_data->xd.corrupted; + if (corrupted) + vpx_internal_error(&tile_data->error_info, VPX_CODEC_CORRUPT_FRAME, + "Failed to decode tile data"); + + /* Queue in the recon_job for this row */ + { + Job recon_job; + recon_job.row_num = mi_row; + recon_job.tile_col = job.tile_col; + recon_job.job_type = RECON_JOB; + vp9_jobq_queue(&row_mt_worker_data->jobq, &recon_job, + sizeof(recon_job)); + } + + /* Queue next parse job */ + if (mi_row + MI_BLOCK_SIZE < cm->mi_rows) { + Job parse_job; + parse_job.row_num = mi_row + MI_BLOCK_SIZE; + parse_job.tile_col = job.tile_col; + parse_job.job_type = PARSE_JOB; + vp9_jobq_queue(&row_mt_worker_data->jobq, &parse_job, + sizeof(parse_job)); + } + } + } + + vpx_free(tile_data_recon); + return !corrupted; +} + static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; @@ -1426,7 +2090,29 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data, vp9_zero(tile_data->xd.left_seg_context); for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4); + if (pbi->row_mt == 1) { + int plane; + RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + tile_data->xd.plane[plane].eob = row_mt_worker_data->eob[plane]; + tile_data->xd.plane[plane].dqcoeff = + row_mt_worker_data->dqcoeff[plane]; + } + tile_data->xd.partition = row_mt_worker_data->partition; + process_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4, + PARSE, parse_block); + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + tile_data->xd.plane[plane].eob = row_mt_worker_data->eob[plane]; + tile_data->xd.plane[plane].dqcoeff = + row_mt_worker_data->dqcoeff[plane]; + } + tile_data->xd.partition = row_mt_worker_data->partition; + process_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4, + RECON, recon_block); + } else { + decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4); + } } pbi->mb.corrupted |= tile_data->xd.corrupted; if (pbi->mb.corrupted) @@ -1471,6 +2157,25 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data, return vpx_reader_find_end(&tile_data->bit_reader); } +static void set_rows_after_error(VP9LfSync *lf_sync, int start_row, int mi_rows, + int num_tiles_left, int total_num_tiles) { + do { + int mi_row; + const int aligned_rows = mi_cols_aligned_to_sb(mi_rows); + const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2); + const int corrupted = 1; + for (mi_row = start_row; mi_row < mi_rows; mi_row += MI_BLOCK_SIZE) { + const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2); + vp9_set_row(lf_sync, total_num_tiles, mi_row >> MI_BLOCK_SIZE_LOG2, + is_last_row, corrupted); + } + /* If there are multiple tiles, the second tile should start marking row + * progress from row 0. + */ + start_row = 0; + } while (num_tiles_left--); +} + // On entry 'tile_data->data_end' points to the end of the input frame, on exit // it is updated to reflect the bitreader position of the final tile column if // present in the tile buffer group or NULL otherwise. @@ -1481,6 +2186,12 @@ static int tile_worker_hook(void *arg1, void *arg2) { TileInfo *volatile tile = &tile_data->xd.tile; const int final_col = (1 << pbi->common.log2_tile_cols) - 1; const uint8_t *volatile bit_reader_end = NULL; + VP9_COMMON *cm = &pbi->common; + + LFWorkerData *lf_data = tile_data->lf_data; + VP9LfSync *lf_sync = tile_data->lf_sync; + + volatile int mi_row = 0; volatile int n = tile_data->buf_start; tile_data->error_info.setjmp = 1; @@ -1488,14 +2199,26 @@ static int tile_worker_hook(void *arg1, void *arg2) { tile_data->error_info.setjmp = 0; tile_data->xd.corrupted = 1; tile_data->data_end = NULL; + if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) { + const int num_tiles_left = tile_data->buf_end - n; + const int mi_row_start = mi_row; + set_rows_after_error(lf_sync, mi_row_start, cm->mi_rows, num_tiles_left, + 1 << cm->log2_tile_cols); + } return 0; } tile_data->xd.corrupted = 0; do { - int mi_row, mi_col; + int mi_col; const TileBuffer *const buf = pbi->tile_buffers + n; + + /* Initialize to 0 is safe since we do not deal with streams that have + * more than one row of tiles. (So tile->mi_row_start will be 0) + */ + assert(cm->log2_tile_rows == 0); + mi_row = 0; vp9_zero(tile_data->dqcoeff); vp9_tile_init(tile, &pbi->common, 0, buf->col); setup_token_decoder(buf->data, tile_data->data_end, buf->size, @@ -1513,6 +2236,14 @@ static int tile_worker_hook(void *arg1, void *arg2) { mi_col += MI_BLOCK_SIZE) { decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4); } + if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) { + const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); + const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2); + const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2); + vp9_set_row(lf_sync, 1 << cm->log2_tile_cols, + mi_row >> MI_BLOCK_SIZE_LOG2, is_last_row, + tile_data->xd.corrupted); + } } if (buf->col == final_col) { @@ -1520,31 +2251,38 @@ static int tile_worker_hook(void *arg1, void *arg2) { } } while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end); + if (pbi->lpf_mt_opt && n < tile_data->buf_end && cm->lf.filter_level && + !cm->skip_loop_filter) { + /* This was not incremented in the tile loop, so increment before tiles left + * calculation + */ + ++n; + set_rows_after_error(lf_sync, 0, cm->mi_rows, tile_data->buf_end - n, + 1 << cm->log2_tile_cols); + } + + if (pbi->lpf_mt_opt && !tile_data->xd.corrupted && cm->lf.filter_level && + !cm->skip_loop_filter) { + vp9_loopfilter_rows(lf_data, lf_sync); + } + tile_data->data_end = bit_reader_end; return !tile_data->xd.corrupted; } // sorts in descending order static int compare_tile_buffers(const void *a, const void *b) { - const TileBuffer *const buf1 = (const TileBuffer *)a; - const TileBuffer *const buf2 = (const TileBuffer *)b; - return (int)(buf2->size - buf1->size); + const TileBuffer *const buf_a = (const TileBuffer *)a; + const TileBuffer *const buf_b = (const TileBuffer *)b; + return (buf_a->size < buf_b->size) - (buf_a->size > buf_b->size); } -static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, - const uint8_t *data_end) { - VP9_COMMON *const cm = &pbi->common; - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - const uint8_t *bit_reader_end = NULL; - const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); - const int tile_cols = 1 << cm->log2_tile_cols; - const int tile_rows = 1 << cm->log2_tile_rows; - const int num_workers = VPXMIN(pbi->max_threads, tile_cols); +static INLINE void init_mt(VP9Decoder *pbi) { int n; - - assert(tile_cols <= (1 << 6)); - assert(tile_rows == 1); - (void)tile_rows; + VP9_COMMON *const cm = &pbi->common; + VP9LfSync *lf_row_sync = &pbi->lf_row_sync; + const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); if (pbi->num_tile_workers == 0) { const int num_threads = pbi->max_threads; @@ -1562,12 +2300,173 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, } } + // Initialize LPF + if ((pbi->lpf_mt_opt || pbi->row_mt) && cm->lf.filter_level && + !cm->skip_loop_filter) { + vp9_lpf_mt_init(lf_row_sync, cm, cm->lf.filter_level, + pbi->num_tile_workers); + } + + // Note: this memset assumes above_context[0], [1] and [2] + // are allocated as part of the same buffer. + memset(cm->above_context, 0, + sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); + + memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * aligned_mi_cols); + + vp9_reset_lfm(cm); +} + +static const uint8_t *decode_tiles_row_wise_mt(VP9Decoder *pbi, + const uint8_t *data, + const uint8_t *data_end) { + VP9_COMMON *const cm = &pbi->common; + RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + const int num_workers = pbi->max_threads; + int i, n; + int col; + int corrupted = 0; + const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; + const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; + VP9LfSync *lf_row_sync = &pbi->lf_row_sync; + YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); + + assert(tile_cols <= (1 << 6)); + assert(tile_rows == 1); + (void)tile_rows; + + memset(row_mt_worker_data->recon_map, 0, + sb_rows * sb_cols * sizeof(*row_mt_worker_data->recon_map)); + + init_mt(pbi); + + // Reset tile decoding hook + for (n = 0; n < num_workers; ++n) { + VPxWorker *const worker = &pbi->tile_workers[n]; + ThreadData *const thread_data = &pbi->row_mt_worker_data->thread_data[n]; + winterface->sync(worker); + + if (cm->lf.filter_level && !cm->skip_loop_filter) { + thread_data->lf_sync = lf_row_sync; + thread_data->lf_data = &thread_data->lf_sync->lfdata[n]; + vp9_loop_filter_data_reset(thread_data->lf_data, new_fb, cm, + pbi->mb.plane); + } + + thread_data->pbi = pbi; + + worker->hook = row_decode_worker_hook; + worker->data1 = thread_data; + worker->data2 = (void *)&row_mt_worker_data->data_end; + } + + for (col = 0; col < tile_cols; ++col) { + TileWorkerData *const tile_data = &pbi->tile_worker_data[col]; + tile_data->xd = pbi->mb; + tile_data->xd.counts = + cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts; + } + + /* Reset the jobq to start of the jobq buffer */ + vp9_jobq_reset(&row_mt_worker_data->jobq); + row_mt_worker_data->num_tiles_done = 0; + row_mt_worker_data->data_end = NULL; + + // Load tile data into tile_buffers + get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, + &pbi->tile_buffers); + + // Initialize thread frame counts. + if (!cm->frame_parallel_decoding_mode) { + for (col = 0; col < tile_cols; ++col) { + TileWorkerData *const tile_data = &pbi->tile_worker_data[col]; + vp9_zero(tile_data->counts); + } + } + + // queue parse jobs for 0th row of every tile + for (col = 0; col < tile_cols; ++col) { + Job parse_job; + parse_job.row_num = 0; + parse_job.tile_col = col; + parse_job.job_type = PARSE_JOB; + vp9_jobq_queue(&row_mt_worker_data->jobq, &parse_job, sizeof(parse_job)); + } + + for (i = 0; i < num_workers; ++i) { + VPxWorker *const worker = &pbi->tile_workers[i]; + worker->had_error = 0; + if (i == num_workers - 1) { + winterface->execute(worker); + } else { + winterface->launch(worker); + } + } + + for (; n > 0; --n) { + VPxWorker *const worker = &pbi->tile_workers[n - 1]; + // TODO(jzern): The tile may have specific error data associated with + // its vpx_internal_error_info which could be propagated to the main info + // in cm. Additionally once the threads have been synced and an error is + // detected, there's no point in continuing to decode tiles. + corrupted |= !winterface->sync(worker); + } + + pbi->mb.corrupted = corrupted; + + { + /* Set data end */ + TileWorkerData *const tile_data = &pbi->tile_worker_data[tile_cols - 1]; + row_mt_worker_data->data_end = vpx_reader_find_end(&tile_data->bit_reader); + } + + // Accumulate thread frame counts. + if (!cm->frame_parallel_decoding_mode) { + for (i = 0; i < tile_cols; ++i) { + TileWorkerData *const tile_data = &pbi->tile_worker_data[i]; + vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1); + } + } + + return row_mt_worker_data->data_end; +} + +static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, + const uint8_t *data_end) { + VP9_COMMON *const cm = &pbi->common; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + const uint8_t *bit_reader_end = NULL; + VP9LfSync *lf_row_sync = &pbi->lf_row_sync; + YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + const int num_workers = VPXMIN(pbi->max_threads, tile_cols); + int n; + + assert(tile_cols <= (1 << 6)); + assert(tile_rows == 1); + (void)tile_rows; + + init_mt(pbi); + // Reset tile decoding hook for (n = 0; n < num_workers; ++n) { VPxWorker *const worker = &pbi->tile_workers[n]; TileWorkerData *const tile_data = &pbi->tile_worker_data[n + pbi->total_tiles]; winterface->sync(worker); + + if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) { + tile_data->lf_sync = lf_row_sync; + tile_data->lf_data = &tile_data->lf_sync->lfdata[n]; + vp9_loop_filter_data_reset(tile_data->lf_data, new_fb, cm, pbi->mb.plane); + tile_data->lf_data->y_only = 0; + } + tile_data->xd = pbi->mb; tile_data->xd.counts = cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts; @@ -1576,15 +2475,6 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, worker->data2 = pbi; } - // Note: this memset assumes above_context[0], [1] and [2] - // are allocated as part of the same buffer. - memset(cm->above_context, 0, - sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); - memset(cm->above_seg_context, 0, - sizeof(*cm->above_seg_context) * aligned_mi_cols); - - vp9_reset_lfm(cm); - // Load tile data into tile_buffers get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, &pbi->tile_buffers); @@ -1724,6 +2614,22 @@ static void read_bitdepth_colorspace_sampling(VP9_COMMON *cm, } } +static INLINE void flush_all_fb_on_key(VP9_COMMON *cm) { + if (cm->frame_type == KEY_FRAME && cm->current_video_frame > 0) { + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + BufferPool *const pool = cm->buffer_pool; + int i; + for (i = 0; i < FRAME_BUFFERS; ++i) { + if (i == cm->new_fb_idx) continue; + frame_bufs[i].ref_count = 0; + if (!frame_bufs[i].released) { + pool->release_fb_cb(pool->cb_priv, &frame_bufs[i].raw_frame_buffer); + frame_bufs[i].released = 1; + } + } + } +} + static size_t read_uncompressed_header(VP9Decoder *pbi, struct vpx_read_bit_buffer *rb) { VP9_COMMON *const cm = &pbi->common; @@ -1788,6 +2694,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, setup_frame_size(cm, rb); if (pbi->need_resync) { memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + flush_all_fb_on_key(cm); pbi->need_resync = 0; } } else { @@ -1911,6 +2818,35 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, setup_segmentation_dequant(cm); setup_tile_info(cm, rb); + if (pbi->row_mt == 1) { + int num_sbs = 1; + const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); + const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2; + const int num_jobs = sb_rows << cm->log2_tile_cols; + + if (pbi->row_mt_worker_data == NULL) { + CHECK_MEM_ERROR(cm, pbi->row_mt_worker_data, + vpx_calloc(1, sizeof(*pbi->row_mt_worker_data))); +#if CONFIG_MULTITHREAD + pthread_mutex_init(&pbi->row_mt_worker_data->recon_done_mutex, NULL); +#endif + } + + if (pbi->max_threads > 1) { + const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2; + + num_sbs = sb_cols * sb_rows; + } + + if (num_sbs > pbi->row_mt_worker_data->num_sbs || + num_jobs > pbi->row_mt_worker_data->num_jobs) { + vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data); + vp9_dec_alloc_row_mt_mem(pbi->row_mt_worker_data, cm, num_sbs, + pbi->max_threads, num_jobs); + } + vp9_jobq_alloc(pbi); + } sz = vpx_rb_read_literal(rb, 16); if (sz == 0) @@ -1953,7 +2889,7 @@ static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data, cm->reference_mode = read_frame_reference_mode(cm, &r); if (cm->reference_mode != SINGLE_REFERENCE) - setup_compound_reference_mode(cm); + vp9_setup_compound_reference_mode(cm); read_frame_reference_mode_probs(cm, &r); for (j = 0; j < BLOCK_SIZE_GROUPS; j++) @@ -2021,6 +2957,12 @@ void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data, const int tile_rows = 1 << cm->log2_tile_rows; const int tile_cols = 1 << cm->log2_tile_cols; YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); +#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG + bitstream_queue_set_frame_read(cm->current_video_frame * 2 + cm->show_frame); +#endif +#if CONFIG_MISMATCH_DEBUG + mismatch_move_frame_idx_r(); +#endif xd->cur_buf = new_fb; if (!first_partition_size) { @@ -2069,20 +3011,28 @@ void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data, pbi->total_tiles = tile_rows * tile_cols; } - if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) { - // Multi-threaded tile decoder - *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); - if (!xd->corrupted) { - if (!cm->skip_loop_filter) { - // If multiple threads are used to decode tiles, then we use those - // threads to do parallel loopfiltering. - vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level, - 0, 0, pbi->tile_workers, pbi->num_tile_workers, - &pbi->lf_row_sync); - } + if (pbi->max_threads > 1 && tile_rows == 1 && + (tile_cols > 1 || pbi->row_mt == 1)) { + if (pbi->row_mt == 1) { + *p_data_end = + decode_tiles_row_wise_mt(pbi, data + first_partition_size, data_end); } else { - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Decode failed. Frame data is corrupted."); + // Multi-threaded tile decoder + *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); + if (!pbi->lpf_mt_opt) { + if (!xd->corrupted) { + if (!cm->skip_loop_filter) { + // If multiple threads are used to decode tiles, then we use those + // threads to do parallel loopfiltering. + vp9_loop_filter_frame_mt( + new_fb, cm, pbi->mb.plane, cm->lf.filter_level, 0, 0, + pbi->tile_workers, pbi->num_tile_workers, &pbi->lf_row_sync); + } + } else { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Decode failed. Frame data is corrupted."); + } + } } } else { *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); diff --git a/media/libvpx/libvpx/vp9/decoder/vp9_decodeframe.h b/media/libvpx/libvpx/vp9/decoder/vp9_decodeframe.h index 44717f546a5b..ba95e72344b0 100644 --- a/media/libvpx/libvpx/vp9/decoder/vp9_decodeframe.h +++ b/media/libvpx/libvpx/vp9/decoder/vp9_decodeframe.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_DECODER_VP9_DECODEFRAME_H_ -#define VP9_DECODER_VP9_DECODEFRAME_H_ +#ifndef VPX_VP9_DECODER_VP9_DECODEFRAME_H_ +#define VPX_VP9_DECODER_VP9_DECODEFRAME_H_ #ifdef __cplusplus extern "C" { @@ -32,4 +32,4 @@ void vp9_decode_frame(struct VP9Decoder *pbi, const uint8_t *data, } // extern "C" #endif -#endif // VP9_DECODER_VP9_DECODEFRAME_H_ +#endif // VPX_VP9_DECODER_VP9_DECODEFRAME_H_ diff --git a/media/libvpx/libvpx/vp9/decoder/vp9_decodemv.c b/media/libvpx/libvpx/vp9/decoder/vp9_decodemv.c index 0a781413b17d..49c6753948bc 100644 --- a/media/libvpx/libvpx/vp9/decoder/vp9_decodemv.c +++ b/media/libvpx/libvpx/vp9/decoder/vp9_decodemv.c @@ -444,17 +444,6 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, } } -static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv, - int refmv_count) { - int i; - - // Make sure all the candidates are properly clamped etc - for (i = 0; i < refmv_count; ++i) { - lower_mv_precision(&mvlist[i].as_mv, allow_hp); - *best_mv = mvlist[i]; - } -} - // This macro is used to add a motion vector mv_ref list if it isn't // already in the list. If it's the second motion vector or early_break // it will also skip all additional processing and jump to Done! @@ -494,7 +483,7 @@ static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame, const POSITION *const mv_ref_search, int_mv *mv_ref_list, int mi_row, int mi_col, - int block, int is_sub8x8) { + int block) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; int different_ref_found = 0; @@ -511,7 +500,7 @@ static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); i = 0; - if (is_sub8x8) { + if (block >= 0) { // If the size < 8x8 we get the mv from the bmi substructure for the // nearest two blocks. for (i = 0; i < 2; ++i) { @@ -628,19 +617,22 @@ static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, assert(MAX_MV_REF_CANDIDATES == 2); - refmv_count = - dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search, - mv_list, mi_row, mi_col, block, 1); - switch (block) { - case 0: best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; break; + case 0: + refmv_count = + dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search, + mv_list, mi_row, mi_col, block); + best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; + break; case 1: case 2: if (b_mode == NEARESTMV) { best_sub8x8->as_int = bmi[0].as_mv[ref].as_int; } else { + dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search, + mv_list, mi_row, mi_col, block); best_sub8x8->as_int = 0; - for (n = 0; n < refmv_count; ++n) + for (n = 0; n < 2; ++n) if (bmi[0].as_mv[ref].as_int != mv_list[n].as_int) { best_sub8x8->as_int = mv_list[n].as_int; break; @@ -651,15 +643,20 @@ static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, if (b_mode == NEARESTMV) { best_sub8x8->as_int = bmi[2].as_mv[ref].as_int; } else { - int_mv candidates[2 + MAX_MV_REF_CANDIDATES]; - candidates[0] = bmi[1].as_mv[ref]; - candidates[1] = bmi[0].as_mv[ref]; - candidates[2] = mv_list[0]; - candidates[3] = mv_list[1]; best_sub8x8->as_int = 0; - for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n) - if (bmi[2].as_mv[ref].as_int != candidates[n].as_int) { - best_sub8x8->as_int = candidates[n].as_int; + if (bmi[2].as_mv[ref].as_int != bmi[1].as_mv[ref].as_int) { + best_sub8x8->as_int = bmi[1].as_mv[ref].as_int; + break; + } + if (bmi[2].as_mv[ref].as_int != bmi[0].as_mv[ref].as_int) { + best_sub8x8->as_int = bmi[0].as_mv[ref].as_int; + break; + } + dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search, + mv_list, mi_row, mi_col, block); + for (n = 0; n < 2; ++n) + if (bmi[2].as_mv[ref].as_int != mv_list[n].as_int) { + best_sub8x8->as_int = mv_list[n].as_int; break; } } @@ -696,7 +693,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, VP9_COMMON *const cm = &pbi->common; const BLOCK_SIZE bsize = mi->sb_type; const int allow_hp = cm->allow_high_precision_mv; - int_mv best_ref_mvs[2]; + int_mv best_ref_mvs[2] = { { 0 }, { 0 } }; int ref, is_compound; uint8_t inter_mode_ctx; const POSITION *const mv_ref_search = mv_ref_blocks[bsize]; @@ -715,26 +712,6 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, } else { if (bsize >= BLOCK_8X8) mi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx); - else - // Sub 8x8 blocks use the nearestmv as a ref_mv if the b_mode is NEWMV. - // Setting mode to NEARESTMV forces the search to stop after the nearestmv - // has been found. After b_modes have been read, mode will be overwritten - // by the last b_mode. - mi->mode = NEARESTMV; - - if (mi->mode != ZEROMV) { - for (ref = 0; ref < 1 + is_compound; ++ref) { - int_mv tmp_mvs[MAX_MV_REF_CANDIDATES]; - const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; - int refmv_count; - - refmv_count = dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, - tmp_mvs, mi_row, mi_col, -1, 0); - - dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref], - refmv_count); - } - } } mi->interp_filter = (cm->interp_filter == SWITCHABLE) @@ -746,6 +723,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, const int num_4x4_h = 1 << xd->bmode_blocks_hl; int idx, idy; PREDICTION_MODE b_mode; + int got_mv_refs_for_new = 0; int_mv best_sub8x8[2]; const uint32_t invalid_mv = 0x80008000; // Initialize the 2nd element as even though it won't be used meaningfully @@ -760,6 +738,18 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, for (ref = 0; ref < 1 + is_compound; ++ref) append_sub8x8_mvs_for_idx(cm, xd, mv_ref_search, b_mode, j, ref, mi_row, mi_col, &best_sub8x8[ref]); + } else if (b_mode == NEWMV && !got_mv_refs_for_new) { + for (ref = 0; ref < 1 + is_compound; ++ref) { + int_mv tmp_mvs[MAX_MV_REF_CANDIDATES]; + const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; + + dec_find_mv_refs(cm, xd, NEWMV, frame, mv_ref_search, tmp_mvs, + mi_row, mi_col, -1); + + lower_mv_precision(&tmp_mvs[0].as_mv, allow_hp); + best_ref_mvs[ref] = tmp_mvs[0]; + got_mv_refs_for_new = 1; + } } if (!assign_mv(cm, xd, b_mode, mi->bmi[j].as_mv, best_ref_mvs, @@ -777,6 +767,17 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, copy_mv_pair(mi->mv, mi->bmi[3].as_mv); } else { + if (mi->mode != ZEROMV) { + for (ref = 0; ref < 1 + is_compound; ++ref) { + int_mv tmp_mvs[MAX_MV_REF_CANDIDATES]; + const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; + int refmv_count = + dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, tmp_mvs, + mi_row, mi_col, -1); + lower_mv_precision(&tmp_mvs[refmv_count - 1].as_mv, allow_hp); + best_ref_mvs[ref] = tmp_mvs[refmv_count - 1]; + } + } xd->corrupted |= !assign_mv(cm, xd, mi->mode, mi->mv, best_ref_mvs, best_ref_mvs, is_compound, allow_hp, r); } diff --git a/media/libvpx/libvpx/vp9/decoder/vp9_decodemv.h b/media/libvpx/libvpx/vp9/decoder/vp9_decodemv.h index b460cb8fb17b..11b45ace063e 100644 --- a/media/libvpx/libvpx/vp9/decoder/vp9_decodemv.h +++ b/media/libvpx/libvpx/vp9/decoder/vp9_decodemv.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_DECODER_VP9_DECODEMV_H_ -#define VP9_DECODER_VP9_DECODEMV_H_ +#ifndef VPX_VP9_DECODER_VP9_DECODEMV_H_ +#define VPX_VP9_DECODER_VP9_DECODEMV_H_ #include "vpx_dsp/bitreader.h" @@ -26,4 +26,4 @@ void vp9_read_mode_info(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, } // extern "C" #endif -#endif // VP9_DECODER_VP9_DECODEMV_H_ +#endif // VPX_VP9_DECODER_VP9_DECODEMV_H_ diff --git a/media/libvpx/libvpx/vp9/decoder/vp9_decoder.c b/media/libvpx/libvpx/vp9/decoder/vp9_decoder.c index a913fa560cfe..0aed3d717c56 100644 --- a/media/libvpx/libvpx/vp9/decoder/vp9_decoder.c +++ b/media/libvpx/libvpx/vp9/decoder/vp9_decoder.c @@ -55,6 +55,94 @@ static void vp9_dec_setup_mi(VP9_COMMON *cm) { cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base)); } +void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data, + VP9_COMMON *cm, int num_sbs, int max_threads, + int num_jobs) { + int plane; + const size_t dqcoeff_size = (num_sbs << DQCOEFFS_PER_SB_LOG2) * + sizeof(*row_mt_worker_data->dqcoeff[0]); + row_mt_worker_data->num_jobs = num_jobs; +#if CONFIG_MULTITHREAD + { + int i; + CHECK_MEM_ERROR( + cm, row_mt_worker_data->recon_sync_mutex, + vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_mutex) * num_jobs)); + if (row_mt_worker_data->recon_sync_mutex) { + for (i = 0; i < num_jobs; ++i) { + pthread_mutex_init(&row_mt_worker_data->recon_sync_mutex[i], NULL); + } + } + + CHECK_MEM_ERROR( + cm, row_mt_worker_data->recon_sync_cond, + vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_cond) * num_jobs)); + if (row_mt_worker_data->recon_sync_cond) { + for (i = 0; i < num_jobs; ++i) { + pthread_cond_init(&row_mt_worker_data->recon_sync_cond[i], NULL); + } + } + } +#endif + row_mt_worker_data->num_sbs = num_sbs; + for (plane = 0; plane < 3; ++plane) { + CHECK_MEM_ERROR(cm, row_mt_worker_data->dqcoeff[plane], + vpx_memalign(16, dqcoeff_size)); + memset(row_mt_worker_data->dqcoeff[plane], 0, dqcoeff_size); + CHECK_MEM_ERROR(cm, row_mt_worker_data->eob[plane], + vpx_calloc(num_sbs << EOBS_PER_SB_LOG2, + sizeof(*row_mt_worker_data->eob[plane]))); + } + CHECK_MEM_ERROR(cm, row_mt_worker_data->partition, + vpx_calloc(num_sbs * PARTITIONS_PER_SB, + sizeof(*row_mt_worker_data->partition))); + CHECK_MEM_ERROR(cm, row_mt_worker_data->recon_map, + vpx_calloc(num_sbs, sizeof(*row_mt_worker_data->recon_map))); + + // allocate memory for thread_data + if (row_mt_worker_data->thread_data == NULL) { + const size_t thread_size = + max_threads * sizeof(*row_mt_worker_data->thread_data); + CHECK_MEM_ERROR(cm, row_mt_worker_data->thread_data, + vpx_memalign(32, thread_size)); + } +} + +void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data) { + if (row_mt_worker_data != NULL) { + int plane; +#if CONFIG_MULTITHREAD + int i; + if (row_mt_worker_data->recon_sync_mutex != NULL) { + for (i = 0; i < row_mt_worker_data->num_jobs; ++i) { + pthread_mutex_destroy(&row_mt_worker_data->recon_sync_mutex[i]); + } + vpx_free(row_mt_worker_data->recon_sync_mutex); + row_mt_worker_data->recon_sync_mutex = NULL; + } + if (row_mt_worker_data->recon_sync_cond != NULL) { + for (i = 0; i < row_mt_worker_data->num_jobs; ++i) { + pthread_cond_destroy(&row_mt_worker_data->recon_sync_cond[i]); + } + vpx_free(row_mt_worker_data->recon_sync_cond); + row_mt_worker_data->recon_sync_cond = NULL; + } +#endif + for (plane = 0; plane < 3; ++plane) { + vpx_free(row_mt_worker_data->eob[plane]); + row_mt_worker_data->eob[plane] = NULL; + vpx_free(row_mt_worker_data->dqcoeff[plane]); + row_mt_worker_data->dqcoeff[plane] = NULL; + } + vpx_free(row_mt_worker_data->partition); + row_mt_worker_data->partition = NULL; + vpx_free(row_mt_worker_data->recon_map); + row_mt_worker_data->recon_map = NULL; + vpx_free(row_mt_worker_data->thread_data); + row_mt_worker_data->thread_data = NULL; + } +} + static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) { cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip)); if (!cm->mip) return 1; @@ -69,6 +157,7 @@ static void vp9_dec_free_mi(VP9_COMMON *cm) { cm->mip = NULL; vpx_free(cm->mi_grid_base); cm->mi_grid_base = NULL; + cm->mi_alloc_size = 0; } VP9Decoder *vp9_decoder_create(BufferPool *const pool) { @@ -139,6 +228,18 @@ void vp9_decoder_remove(VP9Decoder *pbi) { vp9_loop_filter_dealloc(&pbi->lf_row_sync); } + if (pbi->row_mt == 1) { + vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data); + if (pbi->row_mt_worker_data != NULL) { + vp9_jobq_deinit(&pbi->row_mt_worker_data->jobq); + vpx_free(pbi->row_mt_worker_data->jobq_buf); +#if CONFIG_MULTITHREAD + pthread_mutex_destroy(&pbi->row_mt_worker_data->recon_done_mutex); +#endif + } + vpx_free(pbi->row_mt_worker_data); + } + vp9_remove_common(&pbi->common); vpx_free(pbi); } @@ -260,6 +361,44 @@ static void swap_frame_buffers(VP9Decoder *pbi) { cm->frame_refs[ref_index].idx = -1; } +static void release_fb_on_decoder_exit(VP9Decoder *pbi) { + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + VP9_COMMON *volatile const cm = &pbi->common; + BufferPool *volatile const pool = cm->buffer_pool; + RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs; + int i; + + // Synchronize all threads immediately as a subsequent decode call may + // cause a resize invalidating some allocations. + winterface->sync(&pbi->lf_worker); + for (i = 0; i < pbi->num_tile_workers; ++i) { + winterface->sync(&pbi->tile_workers[i]); + } + + // Release all the reference buffers if worker thread is holding them. + if (pbi->hold_ref_buf == 1) { + int ref_index = 0, mask; + for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { + const int old_idx = cm->ref_frame_map[ref_index]; + // Current thread releases the holding of reference frame. + decrease_ref_count(old_idx, frame_bufs, pool); + + // Release the reference frame in reference map. + if (mask & 1) { + decrease_ref_count(old_idx, frame_bufs, pool); + } + ++ref_index; + } + + // Current thread releases the holding of reference frame. + for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { + const int old_idx = cm->ref_frame_map[ref_index]; + decrease_ref_count(old_idx, frame_bufs, pool); + } + pbi->hold_ref_buf = 0; + } +} + int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size, const uint8_t **psource) { VP9_COMMON *volatile const cm = &pbi->common; @@ -297,6 +436,9 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size, // Find a free frame buffer. Return error if can not find any. cm->new_fb_idx = get_free_fb(cm); if (cm->new_fb_idx == INVALID_IDX) { + pbi->ready_for_new_data = 1; + release_fb_on_decoder_exit(pbi); + vpx_clear_system_state(); vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Unable to find free frame buffer"); return cm->error.error_code; @@ -309,44 +451,11 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size, pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; if (setjmp(cm->error.jmp)) { - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - int i; - cm->error.setjmp = 0; pbi->ready_for_new_data = 1; - - // Synchronize all threads immediately as a subsequent decode call may - // cause a resize invalidating some allocations. - winterface->sync(&pbi->lf_worker); - for (i = 0; i < pbi->num_tile_workers; ++i) { - winterface->sync(&pbi->tile_workers[i]); - } - - // Release all the reference buffers if worker thread is holding them. - if (pbi->hold_ref_buf == 1) { - int ref_index = 0, mask; - for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { - const int old_idx = cm->ref_frame_map[ref_index]; - // Current thread releases the holding of reference frame. - decrease_ref_count(old_idx, frame_bufs, pool); - - // Release the reference frame in reference map. - if (mask & 1) { - decrease_ref_count(old_idx, frame_bufs, pool); - } - ++ref_index; - } - - // Current thread releases the holding of reference frame. - for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { - const int old_idx = cm->ref_frame_map[ref_index]; - decrease_ref_count(old_idx, frame_bufs, pool); - } - pbi->hold_ref_buf = 0; - } + release_fb_on_decoder_exit(pbi); // Release current frame. decrease_ref_count(cm->new_fb_idx, frame_bufs, pool); - vpx_clear_system_state(); return -1; } @@ -364,6 +473,8 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size, if (cm->seg.enabled) vp9_swap_current_and_last_seg_map(cm); } + if (cm->show_frame) cm->cur_show_frame_fb_idx = cm->new_fb_idx; + // Update progress in frame parallel decode. cm->last_width = cm->width; cm->last_height = cm->height; @@ -394,7 +505,7 @@ int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, #if CONFIG_VP9_POSTPROC if (!cm->show_existing_frame) { - ret = vp9_post_proc_frame(cm, sd, flags); + ret = vp9_post_proc_frame(cm, sd, flags, cm->width); } else { *sd = *cm->frame_to_show; ret = 0; diff --git a/media/libvpx/libvpx/vp9/decoder/vp9_decoder.h b/media/libvpx/libvpx/vp9/decoder/vp9_decoder.h index 4b26c314d37b..b0ef83c73d37 100644 --- a/media/libvpx/libvpx/vp9/decoder/vp9_decoder.h +++ b/media/libvpx/libvpx/vp9/decoder/vp9_decoder.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_DECODER_VP9_DECODER_H_ -#define VP9_DECODER_VP9_DECODER_H_ +#ifndef VPX_VP9_DECODER_VP9_DECODER_H_ +#define VPX_VP9_DECODER_VP9_DECODER_H_ #include "./vpx_config.h" @@ -21,11 +21,24 @@ #include "vp9/common/vp9_thread_common.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_ppflags.h" +#include "./vp9_job_queue.h" #ifdef __cplusplus extern "C" { #endif +#define EOBS_PER_SB_LOG2 8 +#define DQCOEFFS_PER_SB_LOG2 12 +#define PARTITIONS_PER_SB 85 + +typedef enum JobType { PARSE_JOB, RECON_JOB, LPF_JOB } JobType; + +typedef struct ThreadData { + struct VP9Decoder *pbi; + LFWorkerData *lf_data; + VP9LfSync *lf_sync; +} ThreadData; + typedef struct TileBuffer { const uint8_t *data; size_t size; @@ -37,12 +50,47 @@ typedef struct TileWorkerData { int buf_start, buf_end; // pbi->tile_buffers to decode, inclusive vpx_reader bit_reader; FRAME_COUNTS counts; + LFWorkerData *lf_data; + VP9LfSync *lf_sync; DECLARE_ALIGNED(16, MACROBLOCKD, xd); /* dqcoeff are shared by all the planes. So planes must be decoded serially */ DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); + DECLARE_ALIGNED(16, uint16_t, extend_and_predict_buf[80 * 2 * 80 * 2]); struct vpx_internal_error_info error_info; } TileWorkerData; +typedef void (*process_block_fn_t)(TileWorkerData *twd, + struct VP9Decoder *const pbi, int mi_row, + int mi_col, BLOCK_SIZE bsize, int bwl, + int bhl); + +typedef struct RowMTWorkerData { + int num_sbs; + int *eob[MAX_MB_PLANE]; + PARTITION_TYPE *partition; + tran_low_t *dqcoeff[MAX_MB_PLANE]; + int8_t *recon_map; + const uint8_t *data_end; + uint8_t *jobq_buf; + JobQueueRowMt jobq; + size_t jobq_size; + int num_tiles_done; + int num_jobs; +#if CONFIG_MULTITHREAD + pthread_mutex_t recon_done_mutex; + pthread_mutex_t *recon_sync_mutex; + pthread_cond_t *recon_sync_cond; +#endif + ThreadData *thread_data; +} RowMTWorkerData; + +/* Structure to queue and dequeue row decode jobs */ +typedef struct Job { + int row_num; + int tile_col; + JobType job_type; +} Job; + typedef struct VP9Decoder { DECLARE_ALIGNED(16, MACROBLOCKD, mb); @@ -72,10 +120,14 @@ typedef struct VP9Decoder { int inv_tile_order; int need_resync; // wait for key/intra-only frame. int hold_ref_buf; // hold the reference buffer. + + int row_mt; + int lpf_mt_opt; + RowMTWorkerData *row_mt_worker_data; } VP9Decoder; int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size, - const uint8_t **dest); + const uint8_t **psource); int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, vp9_ppflags_t *flags); @@ -109,6 +161,11 @@ struct VP9Decoder *vp9_decoder_create(BufferPool *const pool); void vp9_decoder_remove(struct VP9Decoder *pbi); +void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data, + VP9_COMMON *cm, int num_sbs, int max_threads, + int num_jobs); +void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data); + static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, BufferPool *const pool) { if (idx >= 0 && frame_bufs[idx].ref_count > 0) { @@ -129,4 +186,4 @@ static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, } // extern "C" #endif -#endif // VP9_DECODER_VP9_DECODER_H_ +#endif // VPX_VP9_DECODER_VP9_DECODER_H_ diff --git a/media/libvpx/libvpx/vp9/decoder/vp9_detokenize.c b/media/libvpx/libvpx/vp9/decoder/vp9_detokenize.c index 4bd016dc7dbd..c2e6b3d545bf 100644 --- a/media/libvpx/libvpx/vp9/decoder/vp9_detokenize.c +++ b/media/libvpx/libvpx/vp9/decoder/vp9_detokenize.c @@ -33,6 +33,20 @@ static INLINE int read_bool(vpx_reader *r, int prob, BD_VALUE *value, int *count, unsigned int *range) { const unsigned int split = (*range * prob + (256 - prob)) >> CHAR_BIT; const BD_VALUE bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT); +#if CONFIG_BITSTREAM_DEBUG + const int queue_r = bitstream_queue_get_read(); + const int frame_idx = bitstream_queue_get_frame_read(); + int ref_result, ref_prob; + bitstream_queue_pop(&ref_result, &ref_prob); + if (prob != ref_prob) { + fprintf(stderr, + "\n *** [bit] prob error, frame_idx_r %d prob %d ref_prob %d " + "queue_r %d\n", + frame_idx, prob, ref_prob, queue_r); + + assert(0); + } +#endif if (*count < 0) { r->value = *value; @@ -51,6 +65,20 @@ static INLINE int read_bool(vpx_reader *r, int prob, BD_VALUE *value, *value <<= shift; *count -= shift; } +#if CONFIG_BITSTREAM_DEBUG + { + const int bit = 1; + if (bit != ref_result) { + fprintf( + stderr, + "\n *** [bit] result error, frame_idx_r %d bit %d ref_result %d " + "queue_r %d\n", + frame_idx, bit, ref_result, queue_r); + + assert(0); + } + } +#endif return 1; } *range = split; @@ -60,6 +88,19 @@ static INLINE int read_bool(vpx_reader *r, int prob, BD_VALUE *value, *value <<= shift; *count -= shift; } +#if CONFIG_BITSTREAM_DEBUG + { + const int bit = 0; + if (bit != ref_result) { + fprintf(stderr, + "\n *** [bit] result error, frame_idx_r %d bit %d ref_result %d " + "queue_r %d\n", + frame_idx, bit, ref_result, queue_r); + + assert(0); + } + } +#endif return 0; } @@ -202,9 +243,9 @@ static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type, #endif // CONFIG_VP9_HIGHBITDEPTH #else if (read_bool(r, 128, &value, &count, &range)) { - dqcoeff[scan[c]] = -v; + dqcoeff[scan[c]] = (tran_low_t)-v; } else { - dqcoeff[scan[c]] = v; + dqcoeff[scan[c]] = (tran_low_t)v; } #endif // CONFIG_COEFFICIENT_RANGE_CHECKING ++c; diff --git a/media/libvpx/libvpx/vp9/decoder/vp9_detokenize.h b/media/libvpx/libvpx/vp9/decoder/vp9_detokenize.h index 7b0d876016d2..a32052fffffc 100644 --- a/media/libvpx/libvpx/vp9/decoder/vp9_detokenize.h +++ b/media/libvpx/libvpx/vp9/decoder/vp9_detokenize.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_DECODER_VP9_DETOKENIZE_H_ -#define VP9_DECODER_VP9_DETOKENIZE_H_ +#ifndef VPX_VP9_DECODER_VP9_DETOKENIZE_H_ +#define VPX_VP9_DECODER_VP9_DETOKENIZE_H_ #include "vpx_dsp/bitreader.h" #include "vp9/decoder/vp9_decoder.h" @@ -27,4 +27,4 @@ int vp9_decode_block_tokens(TileWorkerData *twd, int plane, } // extern "C" #endif -#endif // VP9_DECODER_VP9_DETOKENIZE_H_ +#endif // VPX_VP9_DECODER_VP9_DETOKENIZE_H_ diff --git a/media/libvpx/libvpx/vp9/decoder/vp9_dsubexp.h b/media/libvpx/libvpx/vp9/decoder/vp9_dsubexp.h index 5a8ec8300c17..b0c7750736b5 100644 --- a/media/libvpx/libvpx/vp9/decoder/vp9_dsubexp.h +++ b/media/libvpx/libvpx/vp9/decoder/vp9_dsubexp.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_DECODER_VP9_DSUBEXP_H_ -#define VP9_DECODER_VP9_DSUBEXP_H_ +#ifndef VPX_VP9_DECODER_VP9_DSUBEXP_H_ +#define VPX_VP9_DECODER_VP9_DSUBEXP_H_ #include "vpx_dsp/bitreader.h" @@ -23,4 +23,4 @@ void vp9_diff_update_prob(vpx_reader *r, vpx_prob *p); } // extern "C" #endif -#endif // VP9_DECODER_VP9_DSUBEXP_H_ +#endif // VPX_VP9_DECODER_VP9_DSUBEXP_H_ diff --git a/media/libvpx/libvpx/vp9/decoder/vp9_job_queue.c b/media/libvpx/libvpx/vp9/decoder/vp9_job_queue.c new file mode 100644 index 000000000000..9a31f5a6d09b --- /dev/null +++ b/media/libvpx/libvpx/vp9/decoder/vp9_job_queue.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "vpx/vpx_integer.h" + +#include "vp9/decoder/vp9_job_queue.h" + +void vp9_jobq_init(JobQueueRowMt *jobq, uint8_t *buf, size_t buf_size) { +#if CONFIG_MULTITHREAD + pthread_mutex_init(&jobq->mutex, NULL); + pthread_cond_init(&jobq->cond, NULL); +#endif + jobq->buf_base = buf; + jobq->buf_wr = buf; + jobq->buf_rd = buf; + jobq->buf_end = buf + buf_size; + jobq->terminate = 0; +} + +void vp9_jobq_reset(JobQueueRowMt *jobq) { +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&jobq->mutex); +#endif + jobq->buf_wr = jobq->buf_base; + jobq->buf_rd = jobq->buf_base; + jobq->terminate = 0; +#if CONFIG_MULTITHREAD + pthread_mutex_unlock(&jobq->mutex); +#endif +} + +void vp9_jobq_deinit(JobQueueRowMt *jobq) { + vp9_jobq_reset(jobq); +#if CONFIG_MULTITHREAD + pthread_mutex_destroy(&jobq->mutex); + pthread_cond_destroy(&jobq->cond); +#endif +} + +void vp9_jobq_terminate(JobQueueRowMt *jobq) { +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&jobq->mutex); +#endif + jobq->terminate = 1; +#if CONFIG_MULTITHREAD + pthread_cond_broadcast(&jobq->cond); + pthread_mutex_unlock(&jobq->mutex); +#endif +} + +int vp9_jobq_queue(JobQueueRowMt *jobq, void *job, size_t job_size) { + int ret = 0; +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&jobq->mutex); +#endif + if (jobq->buf_end >= jobq->buf_wr + job_size) { + memcpy(jobq->buf_wr, job, job_size); + jobq->buf_wr = jobq->buf_wr + job_size; +#if CONFIG_MULTITHREAD + pthread_cond_signal(&jobq->cond); +#endif + ret = 0; + } else { + /* Wrap around case is not supported */ + assert(0); + ret = 1; + } +#if CONFIG_MULTITHREAD + pthread_mutex_unlock(&jobq->mutex); +#endif + return ret; +} + +int vp9_jobq_dequeue(JobQueueRowMt *jobq, void *job, size_t job_size, + int blocking) { + int ret = 0; +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&jobq->mutex); +#endif + if (jobq->buf_end >= jobq->buf_rd + job_size) { + while (1) { + if (jobq->buf_wr >= jobq->buf_rd + job_size) { + memcpy(job, jobq->buf_rd, job_size); + jobq->buf_rd = jobq->buf_rd + job_size; + ret = 0; + break; + } else { + /* If all the entries have been dequeued, then break and return */ + if (jobq->terminate == 1) { + ret = 1; + break; + } + if (blocking == 1) { +#if CONFIG_MULTITHREAD + pthread_cond_wait(&jobq->cond, &jobq->mutex); +#endif + } else { + /* If there is no job available, + * and this is non blocking call then return fail */ + ret = 1; + break; + } + } + } + } else { + /* Wrap around case is not supported */ + ret = 1; + } +#if CONFIG_MULTITHREAD + pthread_mutex_unlock(&jobq->mutex); +#endif + + return ret; +} diff --git a/media/libvpx/libvpx/vp9/decoder/vp9_job_queue.h b/media/libvpx/libvpx/vp9/decoder/vp9_job_queue.h new file mode 100644 index 000000000000..bc23bf9c2c17 --- /dev/null +++ b/media/libvpx/libvpx/vp9/decoder/vp9_job_queue.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VP9_DECODER_VP9_JOB_QUEUE_H_ +#define VPX_VP9_DECODER_VP9_JOB_QUEUE_H_ + +#include "vpx_util/vpx_thread.h" + +typedef struct { + // Pointer to buffer base which contains the jobs + uint8_t *buf_base; + + // Pointer to current address where new job can be added + uint8_t *volatile buf_wr; + + // Pointer to current address from where next job can be obtained + uint8_t *volatile buf_rd; + + // Pointer to end of job buffer + uint8_t *buf_end; + + int terminate; + +#if CONFIG_MULTITHREAD + pthread_mutex_t mutex; + pthread_cond_t cond; +#endif +} JobQueueRowMt; + +void vp9_jobq_init(JobQueueRowMt *jobq, uint8_t *buf, size_t buf_size); +void vp9_jobq_reset(JobQueueRowMt *jobq); +void vp9_jobq_deinit(JobQueueRowMt *jobq); +void vp9_jobq_terminate(JobQueueRowMt *jobq); +int vp9_jobq_queue(JobQueueRowMt *jobq, void *job, size_t job_size); +int vp9_jobq_dequeue(JobQueueRowMt *jobq, void *job, size_t job_size, + int blocking); + +#endif // VPX_VP9_DECODER_VP9_JOB_QUEUE_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c b/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c deleted file mode 100644 index 513718e7cb12..000000000000 --- a/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "./vp9_rtcd.h" -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" - -#include "vp9/common/vp9_blockd.h" -#include "vpx_dsp/txfm_common.h" -#include "vpx_dsp/vpx_dsp_common.h" - -void vp9_fdct8x8_quant_neon(const int16_t *input, int stride, - tran_low_t *coeff_ptr, intptr_t n_coeffs, - int skip_block, const int16_t *round_ptr, - const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan_ptr, - const int16_t *iscan_ptr) { - tran_low_t temp_buffer[64]; - (void)coeff_ptr; - - vpx_fdct8x8_neon(input, temp_buffer, stride); - vp9_quantize_fp_neon(temp_buffer, n_coeffs, skip_block, round_ptr, quant_ptr, - qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan_ptr, - iscan_ptr); -} diff --git a/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c b/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c index 97a09bdff6f5..d75a4817964c 100644 --- a/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c +++ b/media/libvpx/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c @@ -26,6 +26,22 @@ #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/vpx_dsp_common.h" +static INLINE void calculate_dqcoeff_and_store(const int16x8_t qcoeff, + const int16x8_t dequant, + tran_low_t *dqcoeff) { + const int32x4_t dqcoeff_0 = + vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant)); + const int32x4_t dqcoeff_1 = + vmull_s16(vget_high_s16(qcoeff), vget_high_s16(dequant)); + +#if CONFIG_VP9_HIGHBITDEPTH + vst1q_s32(dqcoeff, dqcoeff_0); + vst1q_s32(dqcoeff + 4, dqcoeff_1); +#else + vst1q_s16(dqcoeff, vcombine_s16(vmovn_s32(dqcoeff_0), vmovn_s32(dqcoeff_1))); +#endif // CONFIG_VP9_HIGHBITDEPTH +} + void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, @@ -55,7 +71,8 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, const int16x8_t v_iscan = vld1q_s16(&iscan[0]); const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); - const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero); + const int16x8_t v_abs = vabsq_s16(v_coeff); + const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round); const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); const int32x4_t v_tmp_hi = @@ -67,10 +84,9 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1); const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign); const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); - const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant); + calculate_dqcoeff_and_store(v_qcoeff, v_dequant, dqcoeff_ptr); v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); store_s16q_to_tran_low(qcoeff_ptr, v_qcoeff); - store_s16q_to_tran_low(dqcoeff_ptr, v_dqcoeff); v_round = vmovq_n_s16(round_ptr[1]); v_quant = vmovq_n_s16(quant_ptr[1]); v_dequant = vmovq_n_s16(dequant_ptr[1]); @@ -80,7 +96,8 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, const int16x8_t v_iscan = vld1q_s16(&iscan[i]); const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr + i); const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); - const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero); + const int16x8_t v_abs = vabsq_s16(v_coeff); + const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round); const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); const int32x4_t v_tmp_hi = @@ -92,11 +109,13 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1); const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign); const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); - const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant); + calculate_dqcoeff_and_store(v_qcoeff, v_dequant, dqcoeff_ptr + i); v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); store_s16q_to_tran_low(qcoeff_ptr + i, v_qcoeff); - store_s16q_to_tran_low(dqcoeff_ptr + i, v_dqcoeff); } +#ifdef __aarch64__ + *eob_ptr = vmaxvq_s16(v_eobmax_76543210); +#else { const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210), vget_high_s16(v_eobmax_76543210)); @@ -111,6 +130,7 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0); } +#endif // __aarch64__ } static INLINE int32x4_t extract_sign_bit(int32x4_t a) { @@ -122,7 +142,7 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan_ptr) { + const int16_t *scan, const int16_t *iscan) { const int16x8_t one = vdupq_n_s16(1); const int16x8_t neg_one = vdupq_n_s16(-1); @@ -134,17 +154,16 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, const int16x8_t dequant_thresh = vshrq_n_s16(vld1q_s16(dequant_ptr), 2); // Process dc and the first seven ac coeffs. - const uint16x8_t iscan = - vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan_ptr), one)); + const uint16x8_t v_iscan = + vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one)); const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15); const int16x8_t coeff_abs = vabsq_s16(coeff); const int16x8_t dequant_mask = vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh)); - int16x8_t qcoeff = vaddq_s16(coeff_abs, round); + int16x8_t qcoeff = vqaddq_s16(coeff_abs, round); int32x4_t dqcoeff_0, dqcoeff_1; - int16x8_t dqcoeff; uint16x8_t eob_max; (void)scan; (void)count; @@ -166,15 +185,19 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, // Add 1 if negative to round towards zero because the C uses division. dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0)); dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1)); +#if CONFIG_VP9_HIGHBITDEPTH + vst1q_s32(dqcoeff_ptr, vshrq_n_s32(dqcoeff_0, 1)); + vst1q_s32(dqcoeff_ptr + 4, vshrq_n_s32(dqcoeff_1, 1)); +#else + store_s16q_to_tran_low(dqcoeff_ptr, vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), + vshrn_n_s32(dqcoeff_1, 1))); +#endif - dqcoeff = vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1)); - - eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), iscan); + eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan); store_s16q_to_tran_low(qcoeff_ptr, qcoeff); - store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff); - iscan_ptr += 8; + iscan += 8; coeff_ptr += 8; qcoeff_ptr += 8; dqcoeff_ptr += 8; @@ -188,17 +211,16 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, // Process the rest of the ac coeffs. for (i = 8; i < 32 * 32; i += 8) { - const uint16x8_t iscan = - vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan_ptr), one)); + const uint16x8_t v_iscan = + vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one)); const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15); const int16x8_t coeff_abs = vabsq_s16(coeff); const int16x8_t dequant_mask = vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh)); - int16x8_t qcoeff = vaddq_s16(coeff_abs, round); + int16x8_t qcoeff = vqaddq_s16(coeff_abs, round); int32x4_t dqcoeff_0, dqcoeff_1; - int16x8_t dqcoeff; qcoeff = vqdmulhq_s16(qcoeff, quant); qcoeff = veorq_s16(qcoeff, coeff_sign); @@ -211,21 +233,29 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0)); dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1)); - dqcoeff = - vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1)); +#if CONFIG_VP9_HIGHBITDEPTH + vst1q_s32(dqcoeff_ptr, vshrq_n_s32(dqcoeff_0, 1)); + vst1q_s32(dqcoeff_ptr + 4, vshrq_n_s32(dqcoeff_1, 1)); +#else + store_s16q_to_tran_low( + dqcoeff_ptr, + vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1))); +#endif eob_max = - vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), iscan)); + vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan)); store_s16q_to_tran_low(qcoeff_ptr, qcoeff); - store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff); - iscan_ptr += 8; + iscan += 8; coeff_ptr += 8; qcoeff_ptr += 8; dqcoeff_ptr += 8; } +#ifdef __aarch64__ + *eob_ptr = vmaxvq_u16(eob_max); +#else { const uint16x4_t eob_max_0 = vmax_u16(vget_low_u16(eob_max), vget_high_u16(eob_max)); @@ -233,5 +263,6 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1); vst1_lane_u16(eob_ptr, eob_max_2, 0); } +#endif // __aarch64__ } } diff --git a/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_error_msa.c b/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_error_msa.c index 188d04d8f652..61786d8f6623 100644 --- a/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_error_msa.c +++ b/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_error_msa.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vpx_config.h" #include "./vp9_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" @@ -79,6 +80,7 @@ return err; \ } +#if !CONFIG_VP9_HIGHBITDEPTH BLOCK_ERROR_BLOCKSIZE_MSA(16); BLOCK_ERROR_BLOCKSIZE_MSA(64); BLOCK_ERROR_BLOCKSIZE_MSA(256); @@ -103,3 +105,4 @@ int64_t vp9_block_error_msa(const tran_low_t *coeff_ptr, return err; } +#endif // !CONFIG_VP9_HIGHBITDEPTH diff --git a/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c b/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c index 0831e5914886..efbbe830dbd7 100644 --- a/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c +++ b/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c @@ -10,6 +10,7 @@ #include +#include "./vp9_rtcd.h" #include "vp9/common/vp9_enums.h" #include "vp9/encoder/mips/msa/vp9_fdct_msa.h" #include "vpx_dsp/mips/fwd_txfm_msa.h" diff --git a/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c b/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c index fa36f09ab856..9c5cc12ef04d 100644 --- a/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c +++ b/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c @@ -10,6 +10,7 @@ #include +#include "./vp9_rtcd.h" #include "vp9/common/vp9_enums.h" #include "vp9/encoder/mips/msa/vp9_fdct_msa.h" diff --git a/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c b/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c index 604db853c418..26d81aa9ef89 100644 --- a/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c +++ b/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c @@ -10,6 +10,7 @@ #include +#include "./vp9_rtcd.h" #include "vp9/common/vp9_enums.h" #include "vp9/encoder/mips/msa/vp9_fdct_msa.h" diff --git a/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h b/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h index 794bec70b64e..fa1af2fc5710 100644 --- a/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h +++ b/media/libvpx/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ -#define VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ +#ifndef VPX_VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ +#define VPX_VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ #include "vpx_dsp/mips/fwd_txfm_msa.h" #include "vpx_dsp/mips/txfm_macros_msa.h" @@ -113,4 +113,4 @@ PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m, s3_m, s3_m, \ out0, out1, out2, out3); \ } -#endif /* VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ */ +#endif // VPX_VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c b/media/libvpx/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c new file mode 100644 index 000000000000..4f88b8fff6ff --- /dev/null +++ b/media/libvpx/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" + +#include "./vp9_rtcd.h" +#include "vpx_dsp/ppc/types_vsx.h" + +// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit +// integers, and return the high 16 bits of the intermediate integers. +// (a * b) >> 16 +// Note: Because this is done in 2 operations, a and b cannot both be UINT16_MIN +static INLINE int16x8_t vec_mulhi(int16x8_t a, int16x8_t b) { + // madds does ((A * B) >> 15) + C, we need >> 16, so we perform an extra right + // shift. + return vec_sra(vec_madds(a, b, vec_zeros_s16), vec_ones_u16); +} + +// Negate 16-bit integers in a when the corresponding signed 16-bit +// integer in b is negative. +static INLINE int16x8_t vec_sign(int16x8_t a, int16x8_t b) { + const int16x8_t mask = vec_sra(b, vec_shift_sign_s16); + return vec_xor(vec_add(a, mask), mask); +} + +// Compare packed 16-bit integers across a, and return the maximum value in +// every element. Returns a vector containing the biggest value across vector a. +static INLINE int16x8_t vec_max_across(int16x8_t a) { + a = vec_max(a, vec_perm(a, a, vec_perm64)); + a = vec_max(a, vec_perm(a, a, vec_perm32)); + return vec_max(a, vec_perm(a, a, vec_perm16)); +} + +void vp9_quantize_fp_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { + int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob; + bool16x8_t zero_coeff0, zero_coeff1; + + int16x8_t round = vec_vsx_ld(0, round_ptr); + int16x8_t quant = vec_vsx_ld(0, quant_ptr); + int16x8_t dequant = vec_vsx_ld(0, dequant_ptr); + int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr); + int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr); + int16x8_t scan0 = vec_vsx_ld(0, iscan); + int16x8_t scan1 = vec_vsx_ld(16, iscan); + + (void)scan; + (void)skip_block; + assert(!skip_block); + + // First set of 8 coeff starts with DC + 7 AC + qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant); + zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); + qcoeff0 = vec_sign(qcoeff0, coeff0); + vec_vsx_st(qcoeff0, 0, qcoeff_ptr); + + dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); + vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr); + + // Remove DC value from round and quant + round = vec_splat(round, 1); + quant = vec_splat(quant, 1); + + // Remove DC value from dequant + dequant = vec_splat(dequant, 1); + + // Second set of 8 coeff starts with (all AC) + qcoeff1 = vec_mulhi(vec_vaddshs(vec_abs(coeff1), round), quant); + zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); + qcoeff1 = vec_sign(qcoeff1, coeff1); + vec_vsx_st(qcoeff1, 16, qcoeff_ptr); + + dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); + vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr); + + eob = vec_max(vec_or(scan0, zero_coeff0), vec_or(scan1, zero_coeff1)); + + // We quantize 16 coeff up front (enough for a 4x4) and process 24 coeff per + // loop iteration. + // for 8x8: 16 + 2 x 24 = 64 + // for 16x16: 16 + 10 x 24 = 256 + if (n_coeffs > 16) { + int16x8_t coeff2, qcoeff2, dqcoeff2, eob2, scan2; + bool16x8_t zero_coeff2; + + int index = 16; + int off0 = 32; + int off1 = 48; + int off2 = 64; + + do { + coeff0 = vec_vsx_ld(off0, coeff_ptr); + coeff1 = vec_vsx_ld(off1, coeff_ptr); + coeff2 = vec_vsx_ld(off2, coeff_ptr); + scan0 = vec_vsx_ld(off0, iscan); + scan1 = vec_vsx_ld(off1, iscan); + scan2 = vec_vsx_ld(off2, iscan); + + qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant); + zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); + qcoeff0 = vec_sign(qcoeff0, coeff0); + vec_vsx_st(qcoeff0, off0, qcoeff_ptr); + dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); + vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr); + + qcoeff1 = vec_mulhi(vec_vaddshs(vec_abs(coeff1), round), quant); + zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); + qcoeff1 = vec_sign(qcoeff1, coeff1); + vec_vsx_st(qcoeff1, off1, qcoeff_ptr); + dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); + vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr); + + qcoeff2 = vec_mulhi(vec_vaddshs(vec_abs(coeff2), round), quant); + zero_coeff2 = vec_cmpeq(qcoeff2, vec_zeros_s16); + qcoeff2 = vec_sign(qcoeff2, coeff2); + vec_vsx_st(qcoeff2, off2, qcoeff_ptr); + dqcoeff2 = vec_mladd(qcoeff2, dequant, vec_zeros_s16); + vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr); + + eob = vec_max(eob, vec_or(scan0, zero_coeff0)); + eob2 = vec_max(vec_or(scan1, zero_coeff1), vec_or(scan2, zero_coeff2)); + eob = vec_max(eob, eob2); + + index += 24; + off0 += 48; + off1 += 48; + off2 += 48; + } while (index < n_coeffs); + } + + eob = vec_max_across(eob); + *eob_ptr = eob[0] + 1; +} + +// Sets the value of a 32-bit integers to 1 when the corresponding value in a is +// negative. +static INLINE int32x4_t vec_is_neg(int32x4_t a) { + return vec_sr(a, vec_shift_sign_s32); +} + +// DeQuantization function used for 32x32 blocks. Quantized coeff of 32x32 +// blocks are twice as big as for other block sizes. As such, using +// vec_mladd results in overflow. +static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff, + int16x8_t dequant) { + int32x4_t dqcoeffe = vec_mule(qcoeff, dequant); + int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant); + // Add 1 if negative to round towards zero because the C uses division. + dqcoeffe = vec_add(dqcoeffe, vec_is_neg(dqcoeffe)); + dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo)); + dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32); + dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32); + return (int16x8_t)vec_perm(dqcoeffe, dqcoeffo, vec_perm_odd_even_pack); +} + +void vp9_quantize_fp_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan) { + // In stage 1, we quantize 16 coeffs (DC + 15 AC) + // In stage 2, we loop 42 times and quantize 24 coeffs per iteration + // (32 * 32 - 16) / 24 = 42 + int num_itr = 42; + // Offsets are in bytes, 16 coeffs = 32 bytes + int off0 = 32; + int off1 = 48; + int off2 = 64; + + int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob; + bool16x8_t mask0, mask1, zero_coeff0, zero_coeff1; + + int16x8_t round = vec_vsx_ld(0, round_ptr); + int16x8_t quant = vec_vsx_ld(0, quant_ptr); + int16x8_t dequant = vec_vsx_ld(0, dequant_ptr); + int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr); + int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr); + int16x8_t scan0 = vec_vsx_ld(0, iscan); + int16x8_t scan1 = vec_vsx_ld(16, iscan); + int16x8_t thres = vec_sra(dequant, vec_splats((uint16_t)2)); + int16x8_t abs_coeff0 = vec_abs(coeff0); + int16x8_t abs_coeff1 = vec_abs(coeff1); + + (void)scan; + (void)skip_block; + (void)n_coeffs; + assert(!skip_block); + + mask0 = vec_cmpge(abs_coeff0, thres); + round = vec_sra(vec_add(round, vec_ones_s16), vec_ones_u16); + // First set of 8 coeff starts with DC + 7 AC + qcoeff0 = vec_madds(vec_vaddshs(abs_coeff0, round), quant, vec_zeros_s16); + qcoeff0 = vec_and(qcoeff0, mask0); + zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); + qcoeff0 = vec_sign(qcoeff0, coeff0); + vec_vsx_st(qcoeff0, 0, qcoeff_ptr); + + dqcoeff0 = dequantize_coeff_32(qcoeff0, dequant); + vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr); + + // Remove DC value from thres, round, quant and dequant + thres = vec_splat(thres, 1); + round = vec_splat(round, 1); + quant = vec_splat(quant, 1); + dequant = vec_splat(dequant, 1); + + mask1 = vec_cmpge(abs_coeff1, thres); + + // Second set of 8 coeff starts with (all AC) + qcoeff1 = + vec_madds(vec_vaddshs(vec_abs(coeff1), round), quant, vec_zeros_s16); + qcoeff1 = vec_and(qcoeff1, mask1); + zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); + qcoeff1 = vec_sign(qcoeff1, coeff1); + vec_vsx_st(qcoeff1, 16, qcoeff_ptr); + + dqcoeff1 = dequantize_coeff_32(qcoeff1, dequant); + vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr); + + eob = vec_max(vec_or(scan0, zero_coeff0), vec_or(scan1, zero_coeff1)); + + do { + int16x8_t coeff2, abs_coeff2, qcoeff2, dqcoeff2, eob2, scan2; + bool16x8_t zero_coeff2, mask2; + coeff0 = vec_vsx_ld(off0, coeff_ptr); + coeff1 = vec_vsx_ld(off1, coeff_ptr); + coeff2 = vec_vsx_ld(off2, coeff_ptr); + scan0 = vec_vsx_ld(off0, iscan); + scan1 = vec_vsx_ld(off1, iscan); + scan2 = vec_vsx_ld(off2, iscan); + + abs_coeff0 = vec_abs(coeff0); + abs_coeff1 = vec_abs(coeff1); + abs_coeff2 = vec_abs(coeff2); + + qcoeff0 = vec_madds(vec_vaddshs(abs_coeff0, round), quant, vec_zeros_s16); + qcoeff1 = vec_madds(vec_vaddshs(abs_coeff1, round), quant, vec_zeros_s16); + qcoeff2 = vec_madds(vec_vaddshs(abs_coeff2, round), quant, vec_zeros_s16); + + mask0 = vec_cmpge(abs_coeff0, thres); + mask1 = vec_cmpge(abs_coeff1, thres); + mask2 = vec_cmpge(abs_coeff2, thres); + + qcoeff0 = vec_and(qcoeff0, mask0); + qcoeff1 = vec_and(qcoeff1, mask1); + qcoeff2 = vec_and(qcoeff2, mask2); + + zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); + zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); + zero_coeff2 = vec_cmpeq(qcoeff2, vec_zeros_s16); + + qcoeff0 = vec_sign(qcoeff0, coeff0); + qcoeff1 = vec_sign(qcoeff1, coeff1); + qcoeff2 = vec_sign(qcoeff2, coeff2); + + vec_vsx_st(qcoeff0, off0, qcoeff_ptr); + vec_vsx_st(qcoeff1, off1, qcoeff_ptr); + vec_vsx_st(qcoeff2, off2, qcoeff_ptr); + + dqcoeff0 = dequantize_coeff_32(qcoeff0, dequant); + dqcoeff1 = dequantize_coeff_32(qcoeff1, dequant); + dqcoeff2 = dequantize_coeff_32(qcoeff2, dequant); + + vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr); + vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr); + vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr); + + eob = vec_max(eob, vec_or(scan0, zero_coeff0)); + eob2 = vec_max(vec_or(scan1, zero_coeff1), vec_or(scan2, zero_coeff2)); + eob = vec_max(eob, eob2); + + off0 += 48; + off1 += 48; + off2 += 48; + num_itr--; + } while (num_itr != 0); + + eob = vec_max_across(eob); + *eob_ptr = eob[0] + 1; +} diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_alt_ref_aq.h b/media/libvpx/libvpx/vp9/encoder/vp9_alt_ref_aq.h index e508cb44ac2d..22a657e035c9 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_alt_ref_aq.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_alt_ref_aq.h @@ -15,8 +15,8 @@ * for altref frames. Go to alt_ref_aq_private.h for implmentation details. */ -#ifndef VP9_ENCODER_VP9_ALT_REF_AQ_H_ -#define VP9_ENCODER_VP9_ALT_REF_AQ_H_ +#ifndef VPX_VP9_ENCODER_VP9_ALT_REF_AQ_H_ +#define VPX_VP9_ENCODER_VP9_ALT_REF_AQ_H_ #include "vpx/vpx_integer.h" @@ -124,4 +124,4 @@ void vp9_alt_ref_aq_destroy(struct ALT_REF_AQ *const self); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_ALT_REF_AQ_H_ +#endif // VPX_VP9_ENCODER_VP9_ALT_REF_AQ_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_aq_360.h b/media/libvpx/libvpx/vp9/encoder/vp9_aq_360.h index b1b56561d827..749d3c198ab9 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_aq_360.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_aq_360.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_AQ_360_H_ -#define VP9_ENCODER_VP9_AQ_360_H_ +#ifndef VPX_VP9_ENCODER_VP9_AQ_360_H_ +#define VPX_VP9_ENCODER_VP9_AQ_360_H_ #include "vp9/encoder/vp9_encoder.h" @@ -24,4 +24,4 @@ void vp9_360aq_frame_setup(VP9_COMP *cpi); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_AQ_VARIANCE_H_ +#endif // VPX_VP9_ENCODER_VP9_AQ_360_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_aq_complexity.h b/media/libvpx/libvpx/vp9/encoder/vp9_aq_complexity.h index a00d34e70254..d3cb34c0132c 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_aq_complexity.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_aq_complexity.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ -#define VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ +#ifndef VPX_VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ +#define VPX_VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ #ifdef __cplusplus extern "C" { @@ -33,4 +33,4 @@ void vp9_setup_in_frame_q_adj(struct VP9_COMP *cpi); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ +#endif // VPX_VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/media/libvpx/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c index 2f2f0055a7c9..858a41654605 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -21,6 +21,14 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_segmentation.h" +static const uint8_t VP9_VAR_OFFS[64] = { + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 +}; + CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { size_t last_coded_q_map_size; CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr)); @@ -39,13 +47,16 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { } assert(MAXQ <= 255); memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size); + cr->counter_encode_maxq_scene_change = 0; return cr; } void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) { - vpx_free(cr->map); - vpx_free(cr->last_coded_q_map); - vpx_free(cr); + if (cr != NULL) { + vpx_free(cr->map); + vpx_free(cr->last_coded_q_map); + vpx_free(cr); + } } // Check if this coding block, of size bsize, should be considered for refresh @@ -176,7 +187,8 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, MODE_INFO *const mi, // If this block is labeled for refresh, check if we should reset the // segment_id. - if (cyclic_refresh_segment_id_boosted(mi->segment_id)) { + if (cpi->sf.use_nonrd_pick_mode && + cyclic_refresh_segment_id_boosted(mi->segment_id)) { mi->segment_id = refresh_this_block; // Reset segment_id if it will be skipped. if (skip) mi->segment_id = CR_SEGMENT_ID_BASE; @@ -318,6 +330,28 @@ void vp9_cyclic_refresh_set_golden_update(VP9_COMP *const cpi) { rc->baseline_gf_interval = 10; } +static int is_superblock_flat_static(VP9_COMP *const cpi, int sb_row_index, + int sb_col_index) { + unsigned int source_variance; + const uint8_t *src_y = cpi->Source->y_buffer; + const int ystride = cpi->Source->y_stride; + unsigned int sse; + const BLOCK_SIZE bsize = BLOCK_64X64; + src_y += (sb_row_index << 6) * ystride + (sb_col_index << 6); + source_variance = + cpi->fn_ptr[bsize].vf(src_y, ystride, VP9_VAR_OFFS, 0, &sse); + if (source_variance == 0) { + uint64_t block_sad; + const uint8_t *last_src_y = cpi->Last_Source->y_buffer; + const int last_ystride = cpi->Last_Source->y_stride; + last_src_y += (sb_row_index << 6) * ystride + (sb_col_index << 6); + block_sad = + cpi->fn_ptr[bsize].sdf(src_y, ystride, last_src_y, last_ystride); + if (block_sad == 0) return 1; + } + return 0; +} + // Update the segmentation map, and related quantities: cyclic refresh map, // refresh sb_index, and target number of blocks to be refreshed. // The map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or to @@ -368,8 +402,17 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { int sb_col_index = i - sb_row_index * sb_cols; int mi_row = sb_row_index * MI_BLOCK_SIZE; int mi_col = sb_col_index * MI_BLOCK_SIZE; + int flat_static_blocks = 0; + int compute_content = 1; assert(mi_row >= 0 && mi_row < cm->mi_rows); assert(mi_col >= 0 && mi_col < cm->mi_cols); +#if CONFIG_VP9_HIGHBITDEPTH + if (cpi->common.use_highbitdepth) compute_content = 0; +#endif + if (cpi->Last_Source == NULL || + cpi->Last_Source->y_width != cpi->Source->y_width || + cpi->Last_Source->y_height != cpi->Source->y_height) + compute_content = 0; bl_index = mi_row * cm->mi_cols + mi_col; // Loop through all 8x8 blocks in superblock and update map. xmis = @@ -400,11 +443,21 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { // Enforce constant segment over superblock. // If segment is at least half of superblock, set to 1. if (sum_map >= xmis * ymis / 2) { - for (y = 0; y < ymis; y++) - for (x = 0; x < xmis; x++) { - seg_map[bl_index + y * cm->mi_cols + x] = CR_SEGMENT_ID_BOOST1; - } - cr->target_num_seg_blocks += xmis * ymis; + // This superblock is a candidate for refresh: + // compute spatial variance and exclude blocks that are spatially flat + // and stationary. Note: this is currently only done for screne content + // mode. + if (compute_content && cr->skip_flat_static_blocks) + flat_static_blocks = + is_superblock_flat_static(cpi, sb_row_index, sb_col_index); + if (!flat_static_blocks) { + // Label this superblock as segment 1. + for (y = 0; y < ymis; y++) + for (x = 0; x < xmis; x++) { + seg_map[bl_index + y * cm->mi_cols + x] = CR_SEGMENT_ID_BOOST1; + } + cr->target_num_seg_blocks += xmis * ymis; + } } i++; if (i == sbs_in_frame) { @@ -413,7 +466,8 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { } while (cr->target_num_seg_blocks < block_count && i != cr->sb_index); cr->sb_index = i; cr->reduce_refresh = 0; - if (count_sel<(3 * count_tot)>> 2) cr->reduce_refresh = 1; + if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) + if (count_sel<(3 * count_tot)>> 2) cr->reduce_refresh = 1; } // Set cyclic refresh parameters. @@ -425,11 +479,20 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) { int target_refresh = 0; double weight_segment_target = 0; double weight_segment = 0; - int thresh_low_motion = (cm->width < 720) ? 55 : 20; + int thresh_low_motion = 20; + int qp_thresh = VPXMIN((cpi->oxcf.content == VP9E_CONTENT_SCREEN) ? 35 : 20, + rc->best_quality << 1); + int qp_max_thresh = 117 * MAXQ >> 7; cr->apply_cyclic_refresh = 1; - if (cm->frame_type == KEY_FRAME || cpi->svc.temporal_layer_id > 0 || + if (frame_is_intra_only(cm) || cpi->svc.temporal_layer_id > 0 || + is_lossless_requested(&cpi->oxcf) || + rc->avg_frame_qindex[INTER_FRAME] < qp_thresh || + (cpi->use_svc && + cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) || (!cpi->use_svc && rc->avg_frame_low_motion < thresh_low_motion && - rc->frames_since_key > 40)) { + rc->frames_since_key > 40) || + (!cpi->use_svc && rc->avg_frame_qindex[INTER_FRAME] > qp_max_thresh && + rc->frames_since_key > 20)) { cr->apply_cyclic_refresh = 0; return; } @@ -454,20 +517,32 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) { cr->rate_boost_fac = 13; } } + // For screen-content: keep rate_ratio_qdelta to 2.0 (segment#1 boost) and + // percent_refresh (refresh rate) to 10. But reduce rate boost for segment#2 + // (rate_boost_fac = 10 disables segment#2). + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) { + // Only enable feature of skipping flat_static blocks for top layer + // under screen content mode. + if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) + cr->skip_flat_static_blocks = 1; + cr->percent_refresh = (cr->skip_flat_static_blocks) ? 5 : 10; + // Increase the amount of refresh on scene change that is encoded at max Q, + // increase for a few cycles of the refresh period (~100 / percent_refresh). + if (cr->counter_encode_maxq_scene_change < 30) + cr->percent_refresh = (cr->skip_flat_static_blocks) ? 10 : 15; + cr->rate_ratio_qdelta = 2.0; + cr->rate_boost_fac = 10; + } // Adjust some parameters for low resolutions. - if (cm->width <= 352 && cm->height <= 288) { + if (cm->width * cm->height <= 352 * 288) { if (rc->avg_frame_bandwidth < 3000) { - cr->motion_thresh = 16; + cr->motion_thresh = 64; cr->rate_boost_fac = 13; } else { cr->max_qdelta_perc = 70; cr->rate_ratio_qdelta = VPXMAX(cr->rate_ratio_qdelta, 2.5); } } - if (cpi->svc.spatial_layer_id > 0) { - cr->motion_thresh = 4; - cr->rate_boost_fac = 12; - } if (cpi->oxcf.rc_mode == VPX_VBR) { // To be adjusted for VBR mode, e.g., based on gf period and boost. // For now use smaller qp-delta (than CBR), no second boosted seg, and @@ -492,6 +567,13 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) { num8x8bl; if (weight_segment_target < 7 * weight_segment / 8) weight_segment = weight_segment_target; + // For screen-content: don't include target for the weight segment, + // since for all flat areas the segment is reset, so its more accurate + // to just use the previous actual number of seg blocks for the weight. + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) + weight_segment = + (double)(cr->actual_num_seg1_blocks + cr->actual_num_seg2_blocks) / + num8x8bl; cr->weight_segment = weight_segment; } @@ -501,23 +583,31 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { const RATE_CONTROL *const rc = &cpi->rc; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; struct segmentation *const seg = &cm->seg; + int scene_change_detected = + cpi->rc.high_source_sad || + (cpi->use_svc && cpi->svc.high_source_sad_superframe); if (cm->current_video_frame == 0) cr->low_content_avg = 0.0; - if (!cr->apply_cyclic_refresh || (cpi->force_update_segmentation)) { + // Reset if resoluton change has occurred. + if (cpi->resize_pending != 0) vp9_cyclic_refresh_reset_resize(cpi); + if (!cr->apply_cyclic_refresh || (cpi->force_update_segmentation) || + scene_change_detected) { // Set segmentation map to 0 and disable. unsigned char *const seg_map = cpi->segmentation_map; memset(seg_map, 0, cm->mi_rows * cm->mi_cols); vp9_disable_segmentation(&cm->seg); - if (cm->frame_type == KEY_FRAME) { + if (cm->frame_type == KEY_FRAME || scene_change_detected) { memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); cr->sb_index = 0; cr->reduce_refresh = 0; + cr->counter_encode_maxq_scene_change = 0; } return; } else { int qindex_delta = 0; int qindex2; const double q = vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth); + cr->counter_encode_maxq_scene_change++; vpx_clear_system_state(); // Set rate threshold to some multiple (set to 2 for now) of the target // rate (target is given by sb64_target_rate and scaled by 256). @@ -567,9 +657,6 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { cr->qindex_delta[2] = qindex_delta; vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta); - // Reset if resoluton change has occurred. - if (cpi->resize_pending != 0) vp9_cyclic_refresh_reset_resize(cpi); - // Update the segmentation and refresh map. cyclic_refresh_update_map(cpi); } @@ -583,8 +670,19 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; memset(cr->map, 0, cm->mi_rows * cm->mi_cols); - memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols); + memset(cr->last_coded_q_map, MAXQ, + cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); cr->sb_index = 0; cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 1; + cr->counter_encode_maxq_scene_change = 0; +} + +void vp9_cyclic_refresh_limit_q(const VP9_COMP *cpi, int *q) { + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; + // For now apply hard limit to frame-level decrease in q, if the cyclic + // refresh is active (percent_refresh > 0). + if (cr->percent_refresh > 0 && cpi->rc.q_1_frame - *q > 8) { + *q = cpi->rc.q_1_frame - 8; + } } diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/media/libvpx/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h index 77fa67c9e16e..b6d7fdeae77a 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ -#define VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ +#ifndef VPX_VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ +#define VPX_VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" @@ -68,6 +68,8 @@ struct CYCLIC_REFRESH { int reduce_refresh; double weight_segment; int apply_cyclic_refresh; + int counter_encode_maxq_scene_change; + int skip_flat_static_blocks; }; struct VP9_COMP; @@ -102,10 +104,6 @@ void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi, int mi_row, int mi_col, BLOCK_SIZE bsize); -// Update the segmentation map, and related quantities: cyclic refresh map, -// refresh sb_index, and target number of blocks to be refreshed. -void vp9_cyclic_refresh_update__map(struct VP9_COMP *const cpi); - // From the just encoded frame: update the actual number of blocks that were // applied the segment delta q, and the amount of low motion in the frame. // Also check conditions for forcing golden update, or preventing golden @@ -139,8 +137,10 @@ static INLINE int cyclic_refresh_segment_id(int segment_id) { return CR_SEGMENT_ID_BASE; } +void vp9_cyclic_refresh_limit_q(const struct VP9_COMP *cpi, int *q); + #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ +#endif // VPX_VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_aq_variance.c b/media/libvpx/libvpx/vp9/encoder/vp9_aq_variance.c index 477f62ba5adc..1f9ce2354ce3 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_aq_variance.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_aq_variance.c @@ -19,6 +19,7 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rd.h" +#include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_segmentation.h" #define ENERGY_MIN (-4) @@ -108,7 +109,7 @@ static void aq_variance(const uint8_t *a, int a_stride, const uint8_t *b, #if CONFIG_VP9_HIGHBITDEPTH static void aq_highbd_variance64(const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, int w, int h, - uint64_t *sse, uint64_t *sum) { + uint64_t *sse, int64_t *sum) { int i, j; uint16_t *a = CONVERT_TO_SHORTPTR(a8); @@ -127,15 +128,6 @@ static void aq_highbd_variance64(const uint8_t *a8, int a_stride, } } -static void aq_highbd_8_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, int w, int h, - unsigned int *sse, int *sum) { - uint64_t sse_long = 0; - uint64_t sum_long = 0; - aq_highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); - *sse = (unsigned int)sse_long; - *sum = (int)sum_long; -} #endif // CONFIG_VP9_HIGHBITDEPTH static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x, @@ -153,11 +145,13 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x, int avg; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - aq_highbd_8_variance(x->plane[0].src.buf, x->plane[0].src.stride, + uint64_t sse64 = 0; + int64_t sum64 = 0; + aq_highbd_variance64(x->plane[0].src.buf, x->plane[0].src.stride, CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh, - &sse, &avg); - sse >>= 2 * (xd->bd - 8); - avg >>= (xd->bd - 8); + &sse64, &sum64); + sse = (unsigned int)(sse64 >> (2 * (xd->bd - 8))); + avg = (int)(sum64 >> (xd->bd - 8)); } else { aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, vp9_64_zeros, 0, bw, bh, &sse, &avg); @@ -192,6 +186,40 @@ double vp9_log_block_var(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) { return log(var + 1.0); } +// Get the range of sub block energy values; +void vp9_get_sub_block_energy(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, + int mi_col, BLOCK_SIZE bsize, int *min_e, + int *max_e) { + VP9_COMMON *const cm = &cpi->common; + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); + const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); + int x, y; + + if (xmis < bw || ymis < bh) { + vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col); + *min_e = vp9_block_energy(cpi, mb, bsize); + *max_e = *min_e; + } else { + int energy; + *min_e = ENERGY_MAX; + *max_e = ENERGY_MIN; + + for (y = 0; y < ymis; ++y) { + for (x = 0; x < xmis; ++x) { + vp9_setup_src_planes(mb, cpi->Source, mi_row + y, mi_col + x); + energy = vp9_block_energy(cpi, mb, BLOCK_8X8); + *min_e = VPXMIN(*min_e, energy); + *max_e = VPXMAX(*max_e, energy); + } + } + } + + // Re-instate source pointers back to what they should have been on entry. + vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col); +} + #define DEFAULT_E_MIDPOINT 10.0 int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) { double energy; diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_aq_variance.h b/media/libvpx/libvpx/vp9/encoder/vp9_aq_variance.h index 211a69f392cb..a4f872879d70 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_aq_variance.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_aq_variance.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_AQ_VARIANCE_H_ -#define VP9_ENCODER_VP9_AQ_VARIANCE_H_ +#ifndef VPX_VP9_ENCODER_VP9_AQ_VARIANCE_H_ +#define VPX_VP9_ENCODER_VP9_AQ_VARIANCE_H_ #include "vp9/encoder/vp9_encoder.h" @@ -20,11 +20,15 @@ extern "C" { unsigned int vp9_vaq_segment_id(int energy); void vp9_vaq_frame_setup(VP9_COMP *cpi); +void vp9_get_sub_block_energy(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, + int mi_col, BLOCK_SIZE bsize, int *min_e, + int *max_e); int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs); + double vp9_log_block_var(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs); #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_ENCODER_VP9_AQ_VARIANCE_H_ +#endif // VPX_VP9_ENCODER_VP9_AQ_VARIANCE_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_bitstream.c b/media/libvpx/libvpx/vp9/encoder/vp9_bitstream.c index d346cd57aa04..3eff4ce830d1 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_bitstream.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_bitstream.c @@ -18,6 +18,9 @@ #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem_ops.h" #include "vpx_ports/system_state.h" +#if CONFIG_BITSTREAM_DEBUG +#include "vpx_util/vpx_debug_util.h" +#endif // CONFIG_BITSTREAM_DEBUG #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" @@ -39,8 +42,10 @@ static const struct vp9_token intra_mode_encodings[INTRA_MODES] = { { 0, 1 }, { 6, 3 }, { 28, 5 }, { 30, 5 }, { 58, 6 }, { 59, 6 }, { 126, 7 }, { 127, 7 }, { 62, 6 }, { 2, 2 } }; -static const struct vp9_token switchable_interp_encodings[SWITCHABLE_FILTERS] = - { { 0, 1 }, { 2, 2 }, { 3, 2 } }; +static const struct vp9_token + switchable_interp_encodings[SWITCHABLE_FILTERS] = { { 0, 1 }, + { 2, 2 }, + { 3, 2 } }; static const struct vp9_token partition_encodings[PARTITION_TYPES] = { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 } }; @@ -86,7 +91,7 @@ static void write_selected_tx_size(const VP9_COMMON *cm, BLOCK_SIZE bsize = xd->mi[0]->sb_type; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; const vpx_prob *const tx_probs = - get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); + get_tx_probs(max_tx_size, get_tx_size_context(xd), &cm->fc->tx_probs); vpx_write(w, tx_size != TX_4X4, tx_probs[0]); if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) { vpx_write(w, tx_size != TX_8X8, tx_probs[1]); @@ -217,7 +222,8 @@ static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *const xd, } if (is_compound) { - vpx_write(w, mi->ref_frame[0] == GOLDEN_FRAME, + const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; + vpx_write(w, mi->ref_frame[!idx] == cm->comp_var_ref[1], vp9_get_pred_prob_comp_ref_p(cm, xd)); } else { const int bit0 = mi->ref_frame[0] != LAST_FRAME; @@ -459,7 +465,8 @@ static void write_modes_sb( write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs, max_mv_magnitude, interp_filter_selected); break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col, subsize, max_mv_magnitude, interp_filter_selected); write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs, @@ -469,7 +476,6 @@ static void write_modes_sb( write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col + bs, subsize, max_mv_magnitude, interp_filter_selected); break; - default: assert(0); } } @@ -618,9 +624,10 @@ static void update_coef_probs_common(vpx_writer *const bc, VP9_COMP *cpi, return; } - case ONE_LOOP_REDUCED: { + default: { int updates = 0; int noupdates_before_first = 0; + assert(cpi->sf.use_fast_coef_updates == ONE_LOOP_REDUCED); for (i = 0; i < PLANE_TYPES; ++i) { for (j = 0; j < REF_TYPES; ++j) { for (k = 0; k < COEF_BANDS; ++k) { @@ -670,7 +677,6 @@ static void update_coef_probs_common(vpx_writer *const bc, VP9_COMP *cpi, } return; } - default: assert(0); } } @@ -909,10 +915,24 @@ int vp9_get_refresh_mask(VP9_COMP *cpi) { (cpi->refresh_golden_frame << cpi->alt_fb_idx); } else { int arf_idx = cpi->alt_fb_idx; - if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - arf_idx = gf_group->arf_update_idx[gf_group->index]; + GF_GROUP *const gf_group = &cpi->twopass.gf_group; + + if (cpi->multi_layer_arf) { + for (arf_idx = 0; arf_idx < REF_FRAMES; ++arf_idx) { + if (arf_idx != cpi->alt_fb_idx && arf_idx != cpi->lst_fb_idx && + arf_idx != cpi->gld_fb_idx) { + int idx; + for (idx = 0; idx < gf_group->stack_size; ++idx) + if (arf_idx == gf_group->arf_index_stack[idx]) break; + if (idx == gf_group->stack_size) break; + } + } } + cpi->twopass.gf_group.top_arf_idx = arf_idx; + + if (cpi->use_svc && cpi->svc.use_set_ref_frame_config && + cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) + return cpi->svc.update_buffer_slot[cpi->svc.spatial_layer_id]; return (cpi->refresh_last_frame << cpi->lst_fb_idx) | (cpi->refresh_golden_frame << cpi->gld_fb_idx) | (cpi->refresh_alt_ref_frame << arf_idx); @@ -1117,11 +1137,7 @@ static void write_frame_size_with_refs(VP9_COMP *cpi, ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || (cpi->svc.number_spatial_layers > 1 && - cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame) || - (is_two_pass_svc(cpi) && - cpi->svc.encode_empty_frame_state == ENCODING && - cpi->svc.layer_context[0].frames_from_key_frame < - cpi->svc.number_temporal_layers + 1))) { + cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame))) { found = 0; } else if (cfg != NULL) { found = @@ -1153,8 +1169,10 @@ static void write_profile(BITSTREAM_PROFILE profile, case PROFILE_0: vpx_wb_write_literal(wb, 0, 2); break; case PROFILE_1: vpx_wb_write_literal(wb, 2, 2); break; case PROFILE_2: vpx_wb_write_literal(wb, 1, 2); break; - case PROFILE_3: vpx_wb_write_literal(wb, 6, 3); break; - default: assert(0); + default: + assert(profile == PROFILE_3); + vpx_wb_write_literal(wb, 6, 3); + break; } } @@ -1191,7 +1209,13 @@ static void write_uncompressed_header(VP9_COMP *cpi, write_profile(cm->profile, wb); - vpx_wb_write_bit(wb, 0); // show_existing_frame + // If to use show existing frame. + vpx_wb_write_bit(wb, cm->show_existing_frame); + if (cm->show_existing_frame) { + vpx_wb_write_literal(wb, cpi->alt_fb_idx, 3); + return; + } + vpx_wb_write_bit(wb, cm->frame_type); vpx_wb_write_bit(wb, cm->show_frame); vpx_wb_write_bit(wb, cm->error_resilient_mode); @@ -1201,14 +1225,6 @@ static void write_uncompressed_header(VP9_COMP *cpi, write_bitdepth_colorspace_sampling(cm, wb); write_frame_size(cm, wb); } else { - // In spatial svc if it's not error_resilient_mode then we need to code all - // visible frames as invisible. But we need to keep the show_frame flag so - // that the publisher could know whether it is supposed to be visible. - // So we will code the show_frame flag as it is. Then code the intra_only - // bit here. This will make the bitstream incompatible. In the player we - // will change to show_frame flag to 0, then add an one byte frame with - // show_existing_frame flag which tells the decoder which frame we want to - // show. if (!cm->show_frame) vpx_wb_write_bit(wb, cm->intra_only); if (!cm->error_resilient_mode) @@ -1340,7 +1356,20 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) { struct vpx_write_bit_buffer wb = { data, 0 }; struct vpx_write_bit_buffer saved_wb; +#if CONFIG_BITSTREAM_DEBUG + bitstream_queue_reset_write(); +#endif + write_uncompressed_header(cpi, &wb); + + // Skip the rest coding process if use show existing frame. + if (cpi->common.show_existing_frame) { + uncompressed_hdr_size = vpx_wb_bytes_written(&wb); + data += uncompressed_hdr_size; + *size = data - dest; + return; + } + saved_wb = wb; vpx_wb_write_literal(&wb, 0, 16); // don't know in advance first part. size diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_bitstream.h b/media/libvpx/libvpx/vp9/encoder/vp9_bitstream.h index 339c3fecb13f..208651dc227c 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_bitstream.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_bitstream.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_BITSTREAM_H_ -#define VP9_ENCODER_VP9_BITSTREAM_H_ +#ifndef VPX_VP9_ENCODER_VP9_BITSTREAM_H_ +#define VPX_VP9_ENCODER_VP9_BITSTREAM_H_ #ifdef __cplusplus extern "C" { @@ -38,16 +38,12 @@ void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi); void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size); static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) { - return !cpi->multi_arf_allowed && cpi->refresh_golden_frame && - cpi->rc.is_src_frame_alt_ref && - (!cpi->use_svc || // Add spatial svc base layer case here - (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id == 0 && - cpi->svc.layer_context[0].gold_ref_idx >= 0 && - cpi->oxcf.ss_enable_auto_arf[0])); + return cpi->refresh_golden_frame && cpi->rc.is_src_frame_alt_ref && + !cpi->use_svc; } #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_ENCODER_VP9_BITSTREAM_H_ +#endif // VPX_VP9_ENCODER_VP9_BITSTREAM_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_block.h b/media/libvpx/libvpx/vp9/encoder/vp9_block.h index 724205dd578f..37a4605ad8ca 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_block.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_block.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_BLOCK_H_ -#define VP9_ENCODER_VP9_BLOCK_H_ +#ifndef VPX_VP9_ENCODER_VP9_BLOCK_H_ +#define VPX_VP9_ENCODER_VP9_BLOCK_H_ #include "vpx_util/vpx_thread.h" @@ -34,8 +34,8 @@ struct macroblock_plane { struct buf_2d src; // Quantizer setings + DECLARE_ALIGNED(16, int16_t, round_fp[8]); int16_t *quant_fp; - int16_t *round_fp; int16_t *quant; int16_t *quant_shift; int16_t *zbin; @@ -92,6 +92,8 @@ struct macroblock { int sadperbit4; int rddiv; int rdmult; + int cb_rdmult; + int segment_id; int mb_energy; // These are set to their default values at the beginning, and then adjusted @@ -115,6 +117,12 @@ struct macroblock { int *nmvsadcost_hp[2]; int **mvsadcost; + // sharpness is used to disable skip mode and change rd_mult + int sharpness; + + // aq mode is used to adjust rd based on segment. + int adjust_rdmult_by_segment; + // These define limits to motion vector components to prevent them // from extending outside the UMV borders MvLimits mv_limits; @@ -180,6 +188,8 @@ struct macroblock { int sb_pickmode_part; + int zero_temp_sad_source; + // For each superblock: saves the content value (e.g., low/high sad/sumdiff) // based on source sad, prior to encoding the frame. uint8_t content_state_sb; @@ -199,10 +209,13 @@ struct macroblock { void (*highbd_inv_txfm_add)(const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd); #endif + DECLARE_ALIGNED(16, uint8_t, est_pred[64 * 64]); + + struct scale_factors *me_sf; }; #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_ENCODER_VP9_BLOCK_H_ +#endif // VPX_VP9_ENCODER_VP9_BLOCK_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_blockiness.c b/media/libvpx/libvpx/vp9/encoder/vp9_blockiness.c index 9ab57b57c761..da68a3c3c3dc 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_blockiness.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_blockiness.c @@ -11,6 +11,7 @@ #include "vpx/vpx_integer.h" #include "vpx_ports/system_state.h" +#include "vp9/encoder/vp9_blockiness.h" static int horizontal_filter(const uint8_t *s) { return (s[1] - s[-2]) * 2 + (s[-1] - s[0]) * 6; diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_blockiness.h b/media/libvpx/libvpx/vp9/encoder/vp9_blockiness.h new file mode 100644 index 000000000000..e840cb251806 --- /dev/null +++ b/media/libvpx/libvpx/vp9/encoder/vp9_blockiness.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VP9_ENCODER_VP9_BLOCKINESS_H_ +#define VPX_VP9_ENCODER_VP9_BLOCKINESS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +double vp9_get_blockiness(const uint8_t *img1, int img1_pitch, + const uint8_t *img2, int img2_pitch, int width, + int height); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_VP9_ENCODER_VP9_BLOCKINESS_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_context_tree.c b/media/libvpx/libvpx/vp9/encoder/vp9_context_tree.c index 2f7e54433256..b74b9027cab8 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_context_tree.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_context_tree.c @@ -12,7 +12,10 @@ #include "vp9/encoder/vp9_encoder.h" static const BLOCK_SIZE square[] = { - BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64, + BLOCK_8X8, + BLOCK_16X16, + BLOCK_32X32, + BLOCK_64X64, }; static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk, @@ -136,17 +139,22 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, ThreadData *td) { } void vp9_free_pc_tree(ThreadData *td) { - const int tree_nodes = 64 + 16 + 4 + 1; int i; - // Set up all 4x4 mode contexts - for (i = 0; i < 64; ++i) free_mode_context(&td->leaf_tree[i]); + if (td == NULL) return; - // Sets up all the leaf nodes in the tree. - for (i = 0; i < tree_nodes; ++i) free_tree_contexts(&td->pc_tree[i]); + if (td->leaf_tree != NULL) { + // Set up all 4x4 mode contexts + for (i = 0; i < 64; ++i) free_mode_context(&td->leaf_tree[i]); + vpx_free(td->leaf_tree); + td->leaf_tree = NULL; + } - vpx_free(td->pc_tree); - td->pc_tree = NULL; - vpx_free(td->leaf_tree); - td->leaf_tree = NULL; + if (td->pc_tree != NULL) { + const int tree_nodes = 64 + 16 + 4 + 1; + // Sets up all the leaf nodes in the tree. + for (i = 0; i < tree_nodes; ++i) free_tree_contexts(&td->pc_tree[i]); + vpx_free(td->pc_tree); + td->pc_tree = NULL; + } } diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_context_tree.h b/media/libvpx/libvpx/vp9/encoder/vp9_context_tree.h index 73423c075874..4e301cc17dfd 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_context_tree.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_context_tree.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_CONTEXT_TREE_H_ -#define VP9_ENCODER_VP9_CONTEXT_TREE_H_ +#ifndef VPX_VP9_ENCODER_VP9_CONTEXT_TREE_H_ +#define VPX_VP9_ENCODER_VP9_CONTEXT_TREE_H_ #include "vp9/common/vp9_blockd.h" #include "vp9/encoder/vp9_block.h" @@ -56,6 +56,7 @@ typedef struct { // scope of refactoring. int rate; int64_t dist; + int64_t rdcost; #if CONFIG_VP9_TEMPORAL_DENOISING unsigned int newmv_sse; @@ -75,6 +76,8 @@ typedef struct { // Used for the machine learning-based early termination int32_t sum_y_eobs; + // Skip certain ref frames during RD search of rectangular partitions. + uint8_t skip_ref_frame_mask; } PICK_MODE_CONTEXT; typedef struct PC_TREE { @@ -88,6 +91,9 @@ typedef struct PC_TREE { struct PC_TREE *split[4]; PICK_MODE_CONTEXT *leaf_split[4]; }; + // Obtained from a simple motion search. Used by the ML based partition search + // speed feature. + MV mv; } PC_TREE; void vp9_setup_pc_tree(struct VP9Common *cm, struct ThreadData *td); @@ -97,4 +103,4 @@ void vp9_free_pc_tree(struct ThreadData *td); } // extern "C" #endif -#endif /* VP9_ENCODER_VP9_CONTEXT_TREE_H_ */ +#endif // VPX_VP9_ENCODER_VP9_CONTEXT_TREE_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_cost.h b/media/libvpx/libvpx/vp9/encoder/vp9_cost.h index 70a1a2e0e935..638d72a91616 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_cost.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_cost.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_COST_H_ -#define VP9_ENCODER_VP9_COST_H_ +#ifndef VPX_VP9_ENCODER_VP9_COST_H_ +#define VPX_VP9_ENCODER_VP9_COST_H_ #include "vpx_dsp/prob.h" #include "vpx/vpx_integer.h" @@ -55,4 +55,4 @@ void vp9_cost_tokens_skip(int *costs, const vpx_prob *probs, vpx_tree tree); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_COST_H_ +#endif // VPX_VP9_ENCODER_VP9_COST_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_dct.c b/media/libvpx/libvpx/vp9/encoder/vp9_dct.c index 5c66562a569f..2f42c6afc223 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_dct.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_dct.c @@ -554,109 +554,6 @@ void vp9_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, } } -void vp9_fdct8x8_quant_c(const int16_t *input, int stride, - tran_low_t *coeff_ptr, intptr_t n_coeffs, - int skip_block, const int16_t *round_ptr, - const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan, - const int16_t *iscan) { - int eob = -1; - - int i, j; - tran_low_t intermediate[64]; - - (void)iscan; - - // Transform columns - { - tran_low_t *output = intermediate; - tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16 - tran_high_t t0, t1, t2, t3; // needs32 - tran_high_t x0, x1, x2, x3; // canbe16 - - int i; - for (i = 0; i < 8; i++) { - // stage 1 - s0 = (input[0 * stride] + input[7 * stride]) * 4; - s1 = (input[1 * stride] + input[6 * stride]) * 4; - s2 = (input[2 * stride] + input[5 * stride]) * 4; - s3 = (input[3 * stride] + input[4 * stride]) * 4; - s4 = (input[3 * stride] - input[4 * stride]) * 4; - s5 = (input[2 * stride] - input[5 * stride]) * 4; - s6 = (input[1 * stride] - input[6 * stride]) * 4; - s7 = (input[0 * stride] - input[7 * stride]) * 4; - - // fdct4(step, step); - x0 = s0 + s3; - x1 = s1 + s2; - x2 = s1 - s2; - x3 = s0 - s3; - t0 = (x0 + x1) * cospi_16_64; - t1 = (x0 - x1) * cospi_16_64; - t2 = x2 * cospi_24_64 + x3 * cospi_8_64; - t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; - output[0 * 8] = (tran_low_t)fdct_round_shift(t0); - output[2 * 8] = (tran_low_t)fdct_round_shift(t2); - output[4 * 8] = (tran_low_t)fdct_round_shift(t1); - output[6 * 8] = (tran_low_t)fdct_round_shift(t3); - - // Stage 2 - t0 = (s6 - s5) * cospi_16_64; - t1 = (s6 + s5) * cospi_16_64; - t2 = fdct_round_shift(t0); - t3 = fdct_round_shift(t1); - - // Stage 3 - x0 = s4 + t2; - x1 = s4 - t2; - x2 = s7 - t3; - x3 = s7 + t3; - - // Stage 4 - t0 = x0 * cospi_28_64 + x3 * cospi_4_64; - t1 = x1 * cospi_12_64 + x2 * cospi_20_64; - t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; - t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; - output[1 * 8] = (tran_low_t)fdct_round_shift(t0); - output[3 * 8] = (tran_low_t)fdct_round_shift(t2); - output[5 * 8] = (tran_low_t)fdct_round_shift(t1); - output[7 * 8] = (tran_low_t)fdct_round_shift(t3); - input++; - output++; - } - } - - // Rows - for (i = 0; i < 8; ++i) { - fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]); - for (j = 0; j < 8; ++j) coeff_ptr[j + i * 8] /= 2; - } - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - // Quantization pass: All coefficients with index >= zero_flag are - // skippable. Note: zero_flag can be zero. - for (i = 0; i < n_coeffs; i++) { - const int rc = scan[i]; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - - int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); - tmp = (tmp * quant_ptr[rc != 0]) >> 16; - - qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; - - if (tmp) eob = i; - } - } - *eob_ptr = eob + 1; -} - void vp9_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { if (tx_type == DCT_DCT) { diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_denoiser.c b/media/libvpx/libvpx/vp9/encoder/vp9_denoiser.c index b08ccaa66ca8..2885223b59e4 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_denoiser.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_denoiser.c @@ -189,7 +189,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation( int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx, int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv, int num_spatial_layers, int width, int lst_fb_idx, int gld_fb_idx, - int use_svc, int spatial_layer) { + int use_svc, int spatial_layer, int use_gf_temporal_ref) { const int sse_diff = (ctx->newmv_sse == UINT_MAX) ? 0 : ((int)ctx->zeromv_sse - (int)ctx->newmv_sse); @@ -201,7 +201,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation( int i; struct buf_2d saved_dst[MAX_MB_PLANE]; struct buf_2d saved_pre[MAX_MB_PLANE]; - RefBuffer *saved_block_refs[2]; + const RefBuffer *saved_block_refs[2]; MV_REFERENCE_FRAME saved_frame; frame = ctx->best_reference_frame; @@ -219,8 +219,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation( // If the best reference frame uses inter-prediction and there is enough of a // difference in sum-squared-error, use it. - if (frame != INTRA_FRAME && frame != ALTREF_FRAME && - (frame != GOLDEN_FRAME || num_spatial_layers == 1) && + if (frame != INTRA_FRAME && frame != ALTREF_FRAME && frame != GOLDEN_FRAME && sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) { mi->ref_frame[0] = ctx->best_reference_frame; mi->mode = ctx->best_sse_inter_mode; @@ -230,7 +229,9 @@ static VP9_DENOISER_DECISION perform_motion_compensation( frame = ctx->best_zeromv_reference_frame; ctx->newmv_sse = ctx->zeromv_sse; // Bias to last reference. - if (num_spatial_layers > 1 || frame == ALTREF_FRAME || + if ((num_spatial_layers > 1 && !use_gf_temporal_ref) || + frame == ALTREF_FRAME || + (frame == GOLDEN_FRAME && use_gf_temporal_ref) || (frame != LAST_FRAME && ((ctx->zeromv_lastref_sse<(5 * ctx->zeromv_sse)>> 2) || denoiser->denoising_level >= kDenHigh))) { @@ -261,6 +262,14 @@ static VP9_DENOISER_DECISION perform_motion_compensation( denoise_layer_idx = num_spatial_layers - spatial_layer - 1; } + // Force copy (no denoise, copy source in denoised buffer) if + // running_avg_y[frame] is NULL. + if (denoiser->running_avg_y[frame].buffer_alloc == NULL) { + // Restore everything to its original state + *mi = saved_mi; + return COPY_BLOCK; + } + if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) { // Restore everything to its original state *mi = saved_mi; @@ -326,7 +335,8 @@ static VP9_DENOISER_DECISION perform_motion_compensation( void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx, - VP9_DENOISER_DECISION *denoiser_decision) { + VP9_DENOISER_DECISION *denoiser_decision, + int use_gf_temporal_ref) { int mv_col, mv_row; int motion_magnitude = 0; int zeromv_filter = 0; @@ -349,6 +359,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, int is_skin = 0; int increase_denoising = 0; int consec_zeromv = 0; + int last_is_reference = cpi->ref_frame_flags & VP9_LAST_FLAG; mv_col = ctx->best_sse_mv.as_mv.col; mv_row = ctx->best_sse_mv.as_mv.row; motion_magnitude = mv_row * mv_row + mv_col * mv_col; @@ -379,7 +390,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, // zero/small motion in skin detection is high, i.e, > 4). if (consec_zeromv < 4) { i = ymis; - j = xmis; + break; } } } @@ -392,12 +403,18 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, } if (!is_skin && denoiser->denoising_level == kDenHigh) increase_denoising = 1; - if (denoiser->denoising_level >= kDenLow && !ctx->sb_skip_denoising) + // Copy block if LAST_FRAME is not a reference. + // Last doesn't always exist when SVC layers are dynamically changed, e.g. top + // spatial layer doesn't have last reference when it's brought up for the + // first time on the fly. + if (last_is_reference && denoiser->denoising_level >= kDenLow && + !ctx->sb_skip_denoising) decision = perform_motion_compensation( &cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx, motion_magnitude, is_skin, &zeromv_filter, consec_zeromv, cpi->svc.number_spatial_layers, cpi->Source->y_width, cpi->lst_fb_idx, - cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id); + cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id, + use_gf_temporal_ref); if (decision == FILTER_BLOCK) { decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start, @@ -445,16 +462,16 @@ static void swap_frame_buffer(YV12_BUFFER_CONFIG *const dest, } void vp9_denoiser_update_frame_info( - VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, - int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame, - int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized, - int svc_base_is_key, int second_spatial_layer) { + VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, struct SVC *svc, + FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame, + int refresh_last_frame, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, + int resized, int svc_refresh_denoiser_buffers, int second_spatial_layer) { const int shift = second_spatial_layer ? denoiser->num_ref_frames : 0; // Copy source into denoised reference buffers on KEY_FRAME or // if the just encoded frame was resized. For SVC, copy source if the base // spatial layer was key frame. if (frame_type == KEY_FRAME || resized != 0 || denoiser->reset || - svc_base_is_key) { + svc_refresh_denoiser_buffers) { int i; // Start at 1 so as not to overwrite the INTRA_FRAME for (i = 1; i < denoiser->num_ref_frames; ++i) { @@ -465,32 +482,43 @@ void vp9_denoiser_update_frame_info( return; } - // If more than one refresh occurs, must copy frame buffer. - if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) > 1) { - if (refresh_alt_ref_frame) { - copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1 + shift], - &denoiser->running_avg_y[INTRA_FRAME + shift]); - } - if (refresh_golden_frame) { - copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1 + shift], - &denoiser->running_avg_y[INTRA_FRAME + shift]); - } - if (refresh_last_frame) { - copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1 + shift], - &denoiser->running_avg_y[INTRA_FRAME + shift]); + if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + svc->use_set_ref_frame_config) { + int i; + for (i = 0; i < REF_FRAMES; i++) { + if (svc->update_buffer_slot[svc->spatial_layer_id] & (1 << i)) + copy_frame(&denoiser->running_avg_y[i + 1 + shift], + &denoiser->running_avg_y[INTRA_FRAME + shift]); } } else { - if (refresh_alt_ref_frame) { - swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1 + shift], - &denoiser->running_avg_y[INTRA_FRAME + shift]); - } - if (refresh_golden_frame) { - swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1 + shift], - &denoiser->running_avg_y[INTRA_FRAME + shift]); - } - if (refresh_last_frame) { - swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1 + shift], - &denoiser->running_avg_y[INTRA_FRAME + shift]); + // If more than one refresh occurs, must copy frame buffer. + if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) > + 1) { + if (refresh_alt_ref_frame) { + copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1 + shift], + &denoiser->running_avg_y[INTRA_FRAME + shift]); + } + if (refresh_golden_frame) { + copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1 + shift], + &denoiser->running_avg_y[INTRA_FRAME + shift]); + } + if (refresh_last_frame) { + copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1 + shift], + &denoiser->running_avg_y[INTRA_FRAME + shift]); + } + } else { + if (refresh_alt_ref_frame) { + swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1 + shift], + &denoiser->running_avg_y[INTRA_FRAME + shift]); + } + if (refresh_golden_frame) { + swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1 + shift], + &denoiser->running_avg_y[INTRA_FRAME + shift]); + } + if (refresh_last_frame) { + swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1 + shift], + &denoiser->running_avg_y[INTRA_FRAME + shift]); + } } } } @@ -539,26 +567,38 @@ static int vp9_denoiser_realloc_svc_helper(VP9_COMMON *cm, } int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser, - int svc_buf_shift, int refresh_alt, - int refresh_gld, int refresh_lst, int alt_fb_idx, - int gld_fb_idx, int lst_fb_idx) { + struct SVC *svc, int svc_buf_shift, + int refresh_alt, int refresh_gld, int refresh_lst, + int alt_fb_idx, int gld_fb_idx, int lst_fb_idx) { int fail = 0; - if (refresh_alt) { - // Increase the frame buffer index by 1 to map it to the buffer index in the - // denoiser. - fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, - alt_fb_idx + 1 + svc_buf_shift); - if (fail) return 1; - } - if (refresh_gld) { - fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, - gld_fb_idx + 1 + svc_buf_shift); - if (fail) return 1; - } - if (refresh_lst) { - fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, - lst_fb_idx + 1 + svc_buf_shift); - if (fail) return 1; + if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + svc->use_set_ref_frame_config) { + int i; + for (i = 0; i < REF_FRAMES; i++) { + if (cm->frame_type == KEY_FRAME || + svc->update_buffer_slot[svc->spatial_layer_id] & (1 << i)) { + fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, + i + 1 + svc_buf_shift); + } + } + } else { + if (refresh_alt) { + // Increase the frame buffer index by 1 to map it to the buffer index in + // the denoiser. + fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, + alt_fb_idx + 1 + svc_buf_shift); + if (fail) return 1; + } + if (refresh_gld) { + fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, + gld_fb_idx + 1 + svc_buf_shift); + if (fail) return 1; + } + if (refresh_lst) { + fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, + lst_fb_idx + 1 + svc_buf_shift); + if (fail) return 1; + } } return 0; } @@ -648,9 +688,10 @@ int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser, make_grayscale(&denoiser->running_avg_y[i]); #endif denoiser->frame_buffer_initialized = 1; - denoiser->denoising_level = kDenLow; - denoiser->prev_denoising_level = kDenLow; + denoiser->denoising_level = kDenMedium; + denoiser->prev_denoising_level = kDenMedium; denoiser->reset = 0; + denoiser->current_denoiser_frame = 0; return 0; } @@ -675,13 +716,29 @@ void vp9_denoiser_free(VP9_DENOISER *denoiser) { vpx_free_frame_buffer(&denoiser->last_source); } -void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, int noise_level) { +static void force_refresh_longterm_ref(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; + // If long term reference is used, force refresh of that slot, so + // denoiser buffer for long term reference stays in sync. + if (svc->use_gf_temporal_ref_current_layer) { + int index = svc->spatial_layer_id; + if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1; + assert(index >= 0); + cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx; + cpi->refresh_alt_ref_frame = 1; + } +} + +void vp9_denoiser_set_noise_level(VP9_COMP *const cpi, int noise_level) { + VP9_DENOISER *const denoiser = &cpi->denoiser; denoiser->denoising_level = noise_level; if (denoiser->denoising_level > kDenLowLow && - denoiser->prev_denoising_level == kDenLowLow) + denoiser->prev_denoising_level == kDenLowLow) { denoiser->reset = 1; - else + force_refresh_longterm_ref(cpi); + } else { denoiser->reset = 0; + } denoiser->prev_denoising_level = denoiser->denoising_level; } @@ -713,6 +770,56 @@ int64_t vp9_scale_acskip_thresh(int64_t threshold, return threshold; } +void vp9_denoiser_reset_on_first_frame(VP9_COMP *const cpi) { + if (vp9_denoise_svc_non_key(cpi) && + cpi->denoiser.current_denoiser_frame == 0) { + cpi->denoiser.reset = 1; + force_refresh_longterm_ref(cpi); + } +} + +void vp9_denoiser_update_ref_frame(VP9_COMP *const cpi) { + VP9_COMMON *const cm = &cpi->common; + SVC *const svc = &cpi->svc; + + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && + cpi->denoiser.denoising_level > kDenLowLow) { + int svc_refresh_denoiser_buffers = 0; + int denoise_svc_second_layer = 0; + FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type; + cpi->denoiser.current_denoiser_frame++; + if (cpi->use_svc) { + const int svc_buf_shift = + svc->number_spatial_layers - svc->spatial_layer_id == 2 + ? cpi->denoiser.num_ref_frames + : 0; + int layer = + LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, + svc->number_temporal_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + svc_refresh_denoiser_buffers = + lc->is_key_frame || svc->spatial_layer_sync[svc->spatial_layer_id]; + denoise_svc_second_layer = + svc->number_spatial_layers - svc->spatial_layer_id == 2 ? 1 : 0; + // Check if we need to allocate extra buffers in the denoiser + // for refreshed frames. + if (vp9_denoiser_realloc_svc(cm, &cpi->denoiser, svc, svc_buf_shift, + cpi->refresh_alt_ref_frame, + cpi->refresh_golden_frame, + cpi->refresh_last_frame, cpi->alt_fb_idx, + cpi->gld_fb_idx, cpi->lst_fb_idx)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to re-allocate denoiser for SVC"); + } + vp9_denoiser_update_frame_info( + &cpi->denoiser, *cpi->Source, svc, frame_type, + cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame, + cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx, + cpi->lst_fb_idx, cpi->resize_pending, svc_refresh_denoiser_buffers, + denoise_svc_second_layer); + } +} + #ifdef OUTPUT_YUV_DENOISED static void make_grayscale(YV12_BUFFER_CONFIG *yuv) { int r, c; diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_denoiser.h b/media/libvpx/libvpx/vp9/encoder/vp9_denoiser.h index f4da24cbf616..1973e989886d 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_denoiser.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_denoiser.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_DENOISER_H_ -#define VP9_ENCODER_DENOISER_H_ +#ifndef VPX_VP9_ENCODER_VP9_DENOISER_H_ +#define VPX_VP9_ENCODER_VP9_DENOISER_H_ #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_skin_detection.h" @@ -50,6 +50,7 @@ typedef struct vp9_denoiser { int reset; int num_ref_frames; int num_layers; + unsigned int current_denoiser_frame; VP9_DENOISER_LEVEL denoising_level; VP9_DENOISER_LEVEL prev_denoising_level; } VP9_DENOISER; @@ -70,14 +71,15 @@ struct VP9_COMP; struct SVC; void vp9_denoiser_update_frame_info( - VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, - int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame, - int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized, - int svc_base_is_key, int second_spatial_layer); + VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, struct SVC *svc, + FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame, + int refresh_last_frame, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, + int resized, int svc_refresh_denoiser_buffers, int second_spatial_layer); void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx, - VP9_DENOISER_DECISION *denoiser_decision); + VP9_DENOISER_DECISION *denoiser_decision, + int use_gf_temporal_ref); void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx); @@ -86,9 +88,9 @@ void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse, PICK_MODE_CONTEXT *ctx); int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser, - int svc_buf_shift, int refresh_alt, - int refresh_gld, int refresh_lst, int alt_fb_idx, - int gld_fb_idx, int lst_fb_idx); + struct SVC *svc, int svc_buf_shift, + int refresh_alt, int refresh_gld, int refresh_lst, + int alt_fb_idx, int gld_fb_idx, int lst_fb_idx); int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser, int use_svc, int noise_sen, int width, int height, @@ -110,7 +112,9 @@ static INLINE int total_adj_strong_thresh(BLOCK_SIZE bs, void vp9_denoiser_free(VP9_DENOISER *denoiser); -void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, int noise_level); +void vp9_denoiser_set_noise_level(struct VP9_COMP *const cpi, int noise_level); + +void vp9_denoiser_reset_on_first_frame(struct VP9_COMP *const cpi); int64_t vp9_scale_part_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level, int content_state, int temporal_layer_id); @@ -119,8 +123,10 @@ int64_t vp9_scale_acskip_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level, int abs_sumdiff, int temporal_layer_id); +void vp9_denoiser_update_ref_frame(struct VP9_COMP *const cpi); + #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_ENCODER_DENOISER_H_ +#endif // VPX_VP9_ENCODER_VP9_DENOISER_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_encodeframe.c b/media/libvpx/libvpx/vp9/encoder/vp9_encodeframe.c index 682477df18b5..9eddf545eeda 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_encodeframe.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include #include #include @@ -21,6 +22,10 @@ #include "vpx_ports/vpx_timer.h" #include "vpx_ports/system_state.h" +#if CONFIG_MISMATCH_DEBUG +#include "vpx_util/vpx_debug_util.h" +#endif // CONFIG_MISMATCH_DEBUG + #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" @@ -32,16 +37,21 @@ #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_tile_common.h" - +#if !CONFIG_REALTIME_ONLY #include "vp9/encoder/vp9_aq_360.h" #include "vp9/encoder/vp9_aq_complexity.h" +#endif #include "vp9/encoder/vp9_aq_cyclicrefresh.h" +#if !CONFIG_REALTIME_ONLY #include "vp9/encoder/vp9_aq_variance.h" +#endif #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_ethread.h" #include "vp9/encoder/vp9_extend.h" +#include "vp9/encoder/vp9_multi_thread.h" +#include "vp9/encoder/vp9_partition_models.h" #include "vp9/encoder/vp9_pickmode.h" #include "vp9/encoder/vp9_rd.h" #include "vp9/encoder/vp9_rdopt.h" @@ -52,33 +62,6 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); -// Machine learning-based early termination parameters. -static const double train_mean[24] = { - 303501.697372, 3042630.372158, 24.694696, 1.392182, - 689.413511, 162.027012, 1.478213, 0.0, - 135382.260230, 912738.513263, 28.845217, 1.515230, - 544.158492, 131.807995, 1.436863, 0.0, - 43682.377587, 208131.711766, 28.084737, 1.356677, - 138.254122, 119.522553, 1.252322, 0.0 -}; - -static const double train_stdm[24] = { - 673689.212982, 5996652.516628, 0.024449, 1.989792, - 985.880847, 0.014638, 2.001898, 0.0, - 208798.775332, 1812548.443284, 0.018693, 1.838009, - 396.986910, 0.015657, 1.332541, 0.0, - 55888.847031, 448587.962714, 0.017900, 1.904776, - 98.652832, 0.016598, 1.320992, 0.0 -}; - -// Error tolerance: 0.01%-0.0.05%-0.1% -static const double classifiers[24] = { - 0.111736, 0.289977, 0.042219, 0.204765, 0.120410, -0.143863, - 0.282376, 0.847811, 0.637161, 0.131570, 0.018636, 0.202134, - 0.112797, 0.028162, 0.182450, 1.124367, 0.386133, 0.083700, - 0.050028, 0.150873, 0.061119, 0.109318, 0.127255, 0.625211 -}; - // This is used as a reference when computing the source variance for the // purpose of activity masking. // Eventually this should be replaced by custom no-reference routines, @@ -176,6 +159,7 @@ unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi, } #endif // CONFIG_VP9_HIGHBITDEPTH +#if !CONFIG_REALTIME_ONLY static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi, const struct buf_2d *ref, int mi_row, int mi_col, @@ -204,6 +188,72 @@ static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x, else return BLOCK_8X8; } +#endif // !CONFIG_REALTIME_ONLY + +static void set_segment_index(VP9_COMP *cpi, MACROBLOCK *const x, int mi_row, + int mi_col, BLOCK_SIZE bsize, int segment_index) { + VP9_COMMON *const cm = &cpi->common; + const struct segmentation *const seg = &cm->seg; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *mi = xd->mi[0]; + + const AQ_MODE aq_mode = cpi->oxcf.aq_mode; + const uint8_t *const map = + seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; + + // Initialize the segmentation index as 0. + mi->segment_id = 0; + + // Skip the rest if AQ mode is disabled. + if (!seg->enabled) return; + + switch (aq_mode) { + case CYCLIC_REFRESH_AQ: + mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); + break; +#if !CONFIG_REALTIME_ONLY + case VARIANCE_AQ: + if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || + cpi->force_update_segmentation || + (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { + int min_energy; + int max_energy; + // Get sub block energy range + if (bsize >= BLOCK_32X32) { + vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, + &max_energy); + } else { + min_energy = bsize <= BLOCK_16X16 ? x->mb_energy + : vp9_block_energy(cpi, x, bsize); + } + mi->segment_id = vp9_vaq_segment_id(min_energy); + } else { + mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); + } + break; + case EQUATOR360_AQ: + if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation) + mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows); + else + mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); + break; +#endif + case LOOKAHEAD_AQ: + mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); + break; + case PSNR_AQ: mi->segment_id = segment_index; break; + case PERCEPTUAL_AQ: mi->segment_id = x->segment_id; break; + default: + // NO_AQ or PSNR_AQ + break; + } + + // Set segment index from ROI map if it's enabled. + if (cpi->roi.enabled) + mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); + + vp9_init_plane_quantizers(cpi, x); +} // Lighter version of set_offsets that only sets the mode info // pointers. @@ -217,23 +267,57 @@ static INLINE void set_mode_info_offsets(VP9_COMMON *const cm, x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); } +static void set_ssim_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, + const BLOCK_SIZE bsize, const int mi_row, + const int mi_col, int *const rdmult) { + const VP9_COMMON *const cm = &cpi->common; + + const int bsize_base = BLOCK_16X16; + const int num_8x8_w = num_8x8_blocks_wide_lookup[bsize_base]; + const int num_8x8_h = num_8x8_blocks_high_lookup[bsize_base]; + const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w; + const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h; + const int num_bcols = + (num_8x8_blocks_wide_lookup[bsize] + num_8x8_w - 1) / num_8x8_w; + const int num_brows = + (num_8x8_blocks_high_lookup[bsize] + num_8x8_h - 1) / num_8x8_h; + int row, col; + double num_of_mi = 0.0; + double geom_mean_of_scale = 0.0; + + assert(cpi->oxcf.tuning == VP8_TUNE_SSIM); + + for (row = mi_row / num_8x8_w; + row < num_rows && row < mi_row / num_8x8_w + num_brows; ++row) { + for (col = mi_col / num_8x8_h; + col < num_cols && col < mi_col / num_8x8_h + num_bcols; ++col) { + const int index = row * num_cols + col; + geom_mean_of_scale += log(cpi->mi_ssim_rdmult_scaling_factors[index]); + num_of_mi += 1.0; + } + } + geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi); + + *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale); + *rdmult = VPXMAX(*rdmult, 0); + set_error_per_bit(x, *rdmult); + vpx_clear_system_state(); +} + static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCK *const x, int mi_row, int mi_col, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *mi; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; - const struct segmentation *const seg = &cm->seg; MvLimits *const mv_limits = &x->mv_limits; set_skip_context(xd, mi_row, mi_col); set_mode_info_offsets(cm, x, xd, mi_row, mi_col); - mi = xd->mi[0]; - // Set up destination pointers. vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); @@ -255,21 +339,8 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, // R/D setup. x->rddiv = cpi->rd.RDDIV; x->rdmult = cpi->rd.RDMULT; - - // Setup segment ID. - if (seg->enabled) { - if (cpi->oxcf.aq_mode != VARIANCE_AQ && cpi->oxcf.aq_mode != LOOKAHEAD_AQ && - cpi->oxcf.aq_mode != EQUATOR360_AQ) { - const uint8_t *const map = - seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); - } - vp9_init_plane_quantizers(cpi, x); - - x->encode_breakout = cpi->segment_encode_breakout[mi->segment_id]; - } else { - mi->segment_id = 0; - x->encode_breakout = cpi->encode_breakout; + if (oxcf->tuning == VP8_TUNE_SSIM) { + set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); } // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs() @@ -385,16 +456,13 @@ static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { node->split[i] = &vt->split[i].part_variances.none; break; } - case BLOCK_4X4: { + default: { v4x4 *vt = (v4x4 *)data; + assert(bsize == BLOCK_4X4); node->part_variances = &vt->part_variances; for (i = 0; i < 4; i++) node->split[i] = &vt->split[i]; break; } - default: { - assert(0); - break; - } } } @@ -408,7 +476,8 @@ static void fill_variance(uint32_t s2, int32_t s, int c, var *v) { static void get_variance(var *v) { v->variance = (int)(256 * (v->sum_square_error - - ((v->sum_error * v->sum_error) >> v->log2_count)) >> + (uint32_t)(((int64_t)v->sum_error * v->sum_error) >> + v->log2_count)) >> v->log2_count); } @@ -450,7 +519,7 @@ static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x, // No check for vert/horiz split as too few samples for variance. if (bsize == bsize_min) { // Variance already computed to set the force_split. - if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none); + if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); if (mi_col + block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows && vt.part_variances->none.variance < threshold) { @@ -460,9 +529,9 @@ static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x, return 0; } else if (bsize > bsize_min) { // Variance already computed to set the force_split. - if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none); + if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); // For key frame: take split for bsize above 32X32 or very high variance. - if (cm->frame_type == KEY_FRAME && + if (frame_is_intra_only(cm) && (bsize > BLOCK_32X32 || vt.part_variances->none.variance > (threshold << 4))) { return 0; @@ -534,8 +603,9 @@ static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed, static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, int content_state) { VP9_COMMON *const cm = &cpi->common; - const int is_key_frame = (cm->frame_type == KEY_FRAME); - const int threshold_multiplier = is_key_frame ? 20 : 1; + const int is_key_frame = frame_is_intra_only(cm); + const int threshold_multiplier = + is_key_frame ? 20 : cpi->sf.variance_part_thresh_mult; int64_t threshold_base = (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]); @@ -586,6 +656,7 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, } else { thresholds[1] = (5 * threshold_base) >> 1; } + if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX; } } @@ -593,7 +664,7 @@ void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q, int content_state) { VP9_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; - const int is_key_frame = (cm->frame_type == KEY_FRAME); + const int is_key_frame = frame_is_intra_only(cm); if (sf->partition_search_type != VAR_BASED_PARTITION && sf->partition_search_type != REFERENCE_PARTITION) { return; @@ -620,6 +691,11 @@ void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q, cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000 ? (cpi->y_dequant[q][1] << 3) : 8000; + if (cpi->rc.high_source_sad || + (cpi->use_svc && cpi->svc.high_source_sad_superframe)) { + cpi->vbp_threshold_sad = 0; + cpi->vbp_threshold_copy = 0; + } } cpi->vbp_threshold_minmax = 15 + (q >> 3); } @@ -885,13 +961,13 @@ static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x, set_block_size(cpi, x, xd, mi_row, mi_col, subsize); set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize); break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col); copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col); copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs); copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs); break; - default: assert(0); } } } @@ -940,18 +1016,20 @@ static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, const int has_rows = (mi_row_high + bs_high) < cm->mi_rows; const int has_cols = (mi_col_high + bs_high) < cm->mi_cols; - const int row_boundary_block_scale_factor[BLOCK_SIZES] = { - 13, 13, 13, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0 - }; - const int col_boundary_block_scale_factor[BLOCK_SIZES] = { - 13, 13, 13, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0 - }; + const int row_boundary_block_scale_factor[BLOCK_SIZES] = { 13, 13, 13, 1, 0, + 1, 1, 0, 1, 1, + 0, 1, 0 }; + const int col_boundary_block_scale_factor[BLOCK_SIZES] = { 13, 13, 13, 2, 2, + 0, 2, 2, 0, 2, + 2, 0, 0 }; int start_pos; BLOCK_SIZE bsize_low; PARTITION_TYPE partition_high; if (mi_row_high >= cm->mi_rows || mi_col_high >= cm->mi_cols) return 0; - if (mi_row >= (cm->mi_rows >> 1) || mi_col >= (cm->mi_cols >> 1)) return 0; + if (mi_row >= svc->mi_rows[svc->spatial_layer_id - 1] || + mi_col >= svc->mi_cols[svc->spatial_layer_id - 1]) + return 0; // Find corresponding (mi_col/mi_row) block down-scaled by 2x2. start_pos = mi_row * (svc->mi_stride[svc->spatial_layer_id - 1]) + mi_col; @@ -1004,7 +1082,8 @@ static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, set_block_size(cpi, x, xd, mi_row_high, mi_col_high + bs_high, subsize_high); break; - case PARTITION_SPLIT: + default: + assert(partition_high == PARTITION_SPLIT); if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, mi_col, mi_row_high, mi_col_high)) return 1; @@ -1020,7 +1099,6 @@ static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, mi_col_high + bs_high)) return 1; break; - default: assert(0); } } @@ -1067,13 +1145,13 @@ static void update_partition_svc(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, prev_part[start_pos] = subsize; if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); update_partition_svc(cpi, subsize, mi_row, mi_col); update_partition_svc(cpi, subsize, mi_row + bs, mi_col); update_partition_svc(cpi, subsize, mi_row, mi_col + bs); update_partition_svc(cpi, subsize, mi_row + bs, mi_col + bs); break; - default: assert(0); } } } @@ -1108,13 +1186,13 @@ static void update_prev_partition_helper(VP9_COMP *cpi, BLOCK_SIZE bsize, prev_part[start_pos] = subsize; if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); update_prev_partition_helper(cpi, subsize, mi_row, mi_col); update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col); update_prev_partition_helper(cpi, subsize, mi_row, mi_col + bs); update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col + bs); break; - default: assert(0); } } } @@ -1136,8 +1214,8 @@ static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize, if (is_key_frame) return; - // For speed >= 8, avoid the chroma check if y_sad is above threshold. - if (cpi->oxcf.speed >= 8) { + // For speed > 8, avoid the chroma check if y_sad is above threshold. + if (cpi->oxcf.speed > 8) { if (y_sad > cpi->vbp_thresholds[1] && (!cpi->noise_estimate.enabled || vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium)) @@ -1206,6 +1284,7 @@ static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift, cpi->content_state_sb_fd[sb_offset] = 0; } } + if (tmp_sad == 0) x->zero_temp_sad_source = 1; return tmp_sad; } @@ -1241,21 +1320,40 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, int pixels_wide = 64, pixels_high = 64; int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] }; + int force_64_split = cpi->rc.high_source_sad || + (cpi->use_svc && cpi->svc.high_source_sad_superframe) || + (cpi->oxcf.content == VP9E_CONTENT_SCREEN && + cpi->compute_source_sad_onepass && + cpi->sf.use_source_sad && !x->zero_temp_sad_source); // For the variance computation under SVC mode, we treat the frame as key if // the reference (base layer frame) is key frame (i.e., is_key_frame == 1). - const int is_key_frame = - (cm->frame_type == KEY_FRAME || + int is_key_frame = + (frame_is_intra_only(cm) || (is_one_pass_cbr_svc(cpi) && cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)); // Always use 4x4 partition for key frame. - const int use_4x4_partition = cm->frame_type == KEY_FRAME; + const int use_4x4_partition = frame_is_intra_only(cm); const int low_res = (cm->width <= 352 && cm->height <= 288); int variance4x4downsample[16]; int segment_id; int sb_offset = (cm->mi_stride >> 3) * (mi_row >> 3) + (mi_col >> 3); + // For SVC: check if LAST frame is NULL or if the resolution of LAST is + // different than the current frame resolution, and if so, treat this frame + // as a key frame, for the purpose of the superblock partitioning. + // LAST == NULL can happen in some cases where enhancement spatial layers are + // enabled dyanmically in the stream and the only reference is the spatial + // reference (GOLDEN). + if (cpi->use_svc) { + const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, LAST_FRAME); + if (ref == NULL || ref->y_crop_height != cm->height || + ref->y_crop_width != cm->width) + is_key_frame = 1; + } + set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); + set_segment_index(cpi, x, mi_row, mi_col, BLOCK_64X64, 0); segment_id = xd->mi[0]->segment_id; if (cpi->oxcf.speed >= 8 || (cpi->use_svc && cpi->svc.non_reference_frame)) @@ -1289,6 +1387,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, } // If source_sad is low copy the partition without computing the y_sad. if (x->skip_low_source_sad && cpi->sf.copy_partition_flag && + !force_64_split && copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { x->sb_use_mv_part = 1; if (cpi->sf.svc_use_lowres_part && @@ -1305,6 +1404,11 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, } else { set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state); } + // Decrease 32x32 split threshold for screen on base layer, for scene + // change/high motion frames. + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && + cpi->svc.spatial_layer_id == 0 && force_64_split) + thresholds[1] = 3 * thresholds[1] >> 2; // For non keyframes, disable 4x4 average for low resolution when speed = 8 threshold_4x4avg = (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : INT64_MAX; @@ -1317,7 +1421,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, // 5-20 for the 16x16 blocks. - force_split[0] = 0; + force_split[0] = force_64_split; if (!is_key_frame) { // In the case of spatial/temporal scalable coding, the assumption here is @@ -1333,7 +1437,8 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, assert(yv12 != NULL); - if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) { + if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) || + cpi->svc.use_gf_temporal_ref_current_layer) { // For now, GOLDEN will not be used for non-zero spatial layers, since // it may not be a temporal reference. yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); @@ -1374,10 +1479,28 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); } else { - y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col); + const MV dummy_mv = { 0, 0 }; + y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col, + &dummy_mv); x->sb_use_mv_part = 1; x->sb_mvcol_part = mi->mv[0].as_mv.col; x->sb_mvrow_part = mi->mv[0].as_mv.row; + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && + cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode && + cpi->svc.high_num_blocks_with_motion && !x->zero_temp_sad_source && + cm->width > 640 && cm->height > 480) { + // Disable split below 16x16 block size when scroll motion (horz or + // vert) is detected. + // TODO(marpan/jianj): Improve this condition: issue is that search + // range is hard-coded/limited in vp9_int_pro_motion_estimation() so + // scroll motion may not be detected here. + if (((abs(x->sb_mvrow_part) >= 48 && abs(x->sb_mvcol_part) <= 8) || + (abs(x->sb_mvcol_part) >= 48 && abs(x->sb_mvrow_part) <= 8)) && + y_sad < 100000) { + compute_minmax_variance = 0; + thresholds[2] = INT64_MAX; + } + } } y_sad_last = y_sad; @@ -1513,9 +1636,9 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, } } } - if (is_key_frame || (low_res && - vt.split[i].split[j].part_variances.none.variance > - threshold_4x4avg)) { + if (is_key_frame || + (low_res && vt.split[i].split[j].part_variances.none.variance > + threshold_4x4avg)) { force_split[split_index] = 0; // Go down to 4x4 down-sampling for variance. variance4x4downsample[i2 + j] = 1; @@ -1648,11 +1771,11 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, } } - if (cm->frame_type != KEY_FRAME && cpi->sf.copy_partition_flag) { + if (!frame_is_intra_only(cm) && cpi->sf.copy_partition_flag) { update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); } - if (cm->frame_type != KEY_FRAME && cpi->sf.svc_use_lowres_part && + if (!frame_is_intra_only(cm) && cpi->sf.svc_use_lowres_part && cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); @@ -1666,6 +1789,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, return 0; } +#if !CONFIG_REALTIME_ONLY static void update_state(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, BLOCK_SIZE bsize, int output_enabled) { @@ -1794,6 +1918,7 @@ static void update_state(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx, } } } +#endif // !CONFIG_REALTIME_ONLY void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col) { @@ -1836,20 +1961,41 @@ static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, vp9_rd_cost_init(rd_cost); } -static int set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, - int8_t segment_id) { - int segment_qindex; +#if !CONFIG_REALTIME_ONLY +static void set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, + int mi_row, int mi_col, BLOCK_SIZE bsize, + AQ_MODE aq_mode) { VP9_COMMON *const cm = &cpi->common; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; + const uint8_t *const map = + cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; + vp9_init_plane_quantizers(cpi, x); vpx_clear_system_state(); - segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); - return vp9_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q); + + if (aq_mode == NO_AQ || aq_mode == PSNR_AQ) { + if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult; + } else if (aq_mode == PERCEPTUAL_AQ) { + x->rdmult = x->cb_rdmult; + } else if (aq_mode == CYCLIC_REFRESH_AQ) { + // If segment is boosted, use rdmult for that segment. + if (cyclic_refresh_segment_id_boosted( + get_segment_id(cm, map, bsize, mi_row, mi_col))) + x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); + } else { + x->rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); + } + + if (oxcf->tuning == VP8_TUNE_SSIM) { + set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); + } } static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *const x, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx, int64_t best_rd) { + PICK_MODE_CONTEXT *ctx, int rate_in_best_rd, + int64_t dist_in_best_rd) { VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCKD *const xd = &x->e_mbd; @@ -1858,6 +2004,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, struct macroblockd_plane *const pd = xd->plane; const AQ_MODE aq_mode = cpi->oxcf.aq_mode; int i, orig_rdmult; + int64_t best_rd = INT64_MAX; vpx_clear_system_state(); @@ -1914,43 +2061,11 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt; } - if (aq_mode == VARIANCE_AQ) { - const int energy = - bsize <= BLOCK_16X16 ? x->mb_energy : vp9_block_energy(cpi, x, bsize); - - if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || - cpi->force_update_segmentation || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { - mi->segment_id = vp9_vaq_segment_id(energy); - } else { - const uint8_t *const map = - cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); - } - x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id); - } else if (aq_mode == LOOKAHEAD_AQ) { - const uint8_t *const map = cpi->segmentation_map; - - // I do not change rdmult here consciously. - mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); - } else if (aq_mode == EQUATOR360_AQ) { - if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation) { - mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows); - } else { - const uint8_t *const map = - cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); - } - x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id); - } else if (aq_mode == COMPLEXITY_AQ) { - x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id); - } else if (aq_mode == CYCLIC_REFRESH_AQ) { - const uint8_t *const map = - cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - // If segment is boosted, use rdmult for that segment. - if (cyclic_refresh_segment_id_boosted( - get_segment_id(cm, map, bsize, mi_row, mi_col))) - x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); + set_segment_index(cpi, x, mi_row, mi_col, bsize, 0); + set_segment_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode); + if (rate_in_best_rd < INT_MAX && dist_in_best_rd < INT64_MAX) { + best_rd = vp9_calculate_rd_cost(x->rdmult, x->rddiv, rate_in_best_rd, + dist_in_best_rd); } // Find best coding mode & reconstruct the MB so it is available @@ -1979,15 +2094,19 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate); } - x->rdmult = orig_rdmult; - // TODO(jingning) The rate-distortion optimization flow needs to be // refactored to provide proper exit/return handle. - if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX; + if (rd_cost->rate == INT_MAX || rd_cost->dist == INT64_MAX) + rd_cost->rdcost = INT64_MAX; + else + rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); + + x->rdmult = orig_rdmult; ctx->rate = rd_cost->rate; ctx->dist = rd_cost->dist; } +#endif // !CONFIG_REALTIME_ONLY static void update_stats(VP9_COMMON *cm, ThreadData *td) { const MACROBLOCK *x = &td->mb; @@ -2013,8 +2132,10 @@ static void update_stats(VP9_COMMON *cm, ThreadData *td) { [has_second_ref(mi)]++; if (has_second_ref(mi)) { - counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)] - [ref0 == GOLDEN_FRAME]++; + const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; + const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd); + const int bit = mi->ref_frame[!idx] == cm->comp_var_ref[1]; + counts->comp_ref[ctx][bit]++; } else { counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0] [ref0 != LAST_FRAME]++; @@ -2046,6 +2167,7 @@ static void update_stats(VP9_COMMON *cm, ThreadData *td) { } } +#if !CONFIG_REALTIME_ONLY static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col, ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], @@ -2110,6 +2232,16 @@ static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td, PICK_MODE_CONTEXT *ctx) { MACROBLOCK *const x = &td->mb; set_offsets(cpi, tile, x, mi_row, mi_col, bsize); + + if (cpi->sf.enable_tpl_model && + (cpi->oxcf.aq_mode == NO_AQ || cpi->oxcf.aq_mode == PERCEPTUAL_AQ)) { + const VP9EncoderConfig *const oxcf = &cpi->oxcf; + x->rdmult = x->cb_rdmult; + if (oxcf->tuning == VP8_TUNE_SSIM) { + set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); + } + } + update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled); encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); @@ -2168,7 +2300,8 @@ static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile, subsize, &pc_tree->horizontal[1]); } break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); if (bsize == BLOCK_8X8) { encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, pc_tree->leaf_split[0]); @@ -2183,12 +2316,12 @@ static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile, subsize, pc_tree->split[3]); } break; - default: assert(0 && "Invalid partition type."); break; } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) update_partition_context(xd, mi_row, mi_col, subsize, bsize); } +#endif // !CONFIG_REALTIME_ONLY // Check to see if the given partition size is allowed for a specified number // of 8x8 block rows and columns remaining in the image. @@ -2393,17 +2526,15 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td, *(xd->mi[0]) = ctx->mic; *(x->mbmi_ext) = ctx->mbmi_ext; - if (seg->enabled && cpi->oxcf.aq_mode != NO_AQ) { - // For in frame complexity AQ or variance AQ, copy segment_id from - // segmentation_map. - if (cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) { + if (seg->enabled && (cpi->oxcf.aq_mode != NO_AQ || cpi->roi.enabled)) { + // Setting segmentation map for cyclic_refresh. + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize, + ctx->rate, ctx->dist, x->skip, p); + } else { const uint8_t *const map = seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); - } else { - // Setting segmentation map for cyclic_refresh. - vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize, - ctx->rate, ctx->dist, x->skip, p); } vp9_init_plane_quantizers(cpi, x); } @@ -2441,7 +2572,7 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td, } x->skip = ctx->skip; - x->skip_txfm[0] = mi->segment_id ? 0 : ctx->skip_txfm[0]; + x->skip_txfm[0] = (mi->segment_id || xd->lossless) ? 0 : ctx->skip_txfm[0]; } static void encode_b_rt(VP9_COMP *cpi, ThreadData *td, @@ -2509,7 +2640,8 @@ static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td, subsize, &pc_tree->horizontal[1]); } break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); subsize = get_subsize(bsize, PARTITION_SPLIT); encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, pc_tree->split[0]); @@ -2520,13 +2652,13 @@ static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td, encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled, subsize, pc_tree->split[3]); break; - default: assert(0 && "Invalid partition type."); break; } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) update_partition_context(xd, mi_row, mi_col, subsize, bsize); } +#if !CONFIG_REALTIME_ONLY static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, MODE_INFO **mi_8x8, TOKENEXTRA **tp, int mi_row, int mi_col, @@ -2595,7 +2727,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, mi_col + (mi_step >> 1) < cm->mi_cols) { pc_tree->partitioning = PARTITION_NONE; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, ctx, - INT64_MAX); + INT_MAX, INT64_MAX); pl = partition_plane_context(xd, mi_row, mi_col, bsize); @@ -2614,11 +2746,12 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, switch (partition) { case PARTITION_NONE: rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, bsize, - ctx, INT64_MAX); + ctx, INT_MAX, INT64_MAX); break; case PARTITION_HORZ: + pc_tree->horizontal[0].skip_ref_frame_mask = 0; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, - subsize, &pc_tree->horizontal[0], INT64_MAX); + subsize, &pc_tree->horizontal[0], INT_MAX, INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) { RD_COST tmp_rdc; @@ -2626,8 +2759,10 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, vp9_rd_cost_init(&tmp_rdc); update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); + pc_tree->horizontal[1].skip_ref_frame_mask = 0; rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col, - &tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX); + &tmp_rdc, subsize, &pc_tree->horizontal[1], INT_MAX, + INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp9_rd_cost_reset(&last_part_rdc); break; @@ -2638,8 +2773,9 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, } break; case PARTITION_VERT: + pc_tree->vertical[0].skip_ref_frame_mask = 0; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, - subsize, &pc_tree->vertical[0], INT64_MAX); + subsize, &pc_tree->vertical[0], INT_MAX, INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) { RD_COST tmp_rdc; @@ -2647,9 +2783,10 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, vp9_rd_cost_init(&tmp_rdc); update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1), - &tmp_rdc, subsize, - &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX); + pc_tree->vertical[bsize > BLOCK_8X8].skip_ref_frame_mask = 0; + rd_pick_sb_modes( + cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1), &tmp_rdc, + subsize, &pc_tree->vertical[bsize > BLOCK_8X8], INT_MAX, INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp9_rd_cost_reset(&last_part_rdc); break; @@ -2659,10 +2796,11 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, last_part_rdc.rdcost += tmp_rdc.rdcost; } break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); if (bsize == BLOCK_8X8) { rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, - subsize, pc_tree->leaf_split[0], INT64_MAX); + subsize, pc_tree->leaf_split[0], INT_MAX, INT64_MAX); break; } last_part_rdc.rate = 0; @@ -2689,7 +2827,6 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, last_part_rdc.dist += tmp_rdc.dist; } break; - default: assert(0); break; } pl = partition_plane_context(xd, mi_row, mi_col, bsize); @@ -2727,7 +2864,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, pc_tree->split[i]->partitioning = PARTITION_NONE; rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc, split_subsize, &pc_tree->split[i]->none, - INT64_MAX); + INT_MAX, INT64_MAX); restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); @@ -2961,6 +3098,7 @@ static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row, *min_bs = min_size; *max_bs = max_size; } +#endif // !CONFIG_REALTIME_ONLY static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv)); @@ -2975,15 +3113,15 @@ const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4 }; const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4 }; -const int qindex_skip_threshold_lookup[BLOCK_SIZES] = { - 0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120 -}; -const int qindex_split_threshold_lookup[BLOCK_SIZES] = { - 0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120 -}; -const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6 -}; +const int qindex_skip_threshold_lookup[BLOCK_SIZES] = { 0, 10, 10, 30, 40, + 40, 60, 80, 80, 90, + 100, 100, 120 }; +const int qindex_split_threshold_lookup[BLOCK_SIZES] = { 0, 3, 3, 7, 15, + 15, 30, 40, 40, 60, + 80, 80, 120 }; +const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = { 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + 4, 4, 6 }; typedef enum { MV_ZERO = 0, @@ -3018,14 +3156,60 @@ static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv, } #endif -// Calculate the score used in machine-learning based partition search early -// termination. -static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd, - PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - const double *clf; - const double *mean; - const double *sd; +// Calculate prediction based on the given input features and neural net config. +// Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden +// layer. +static void nn_predict(const float *features, const NN_CONFIG *nn_config, + float *output) { + int num_input_nodes = nn_config->num_inputs; + int buf_index = 0; + float buf[2][NN_MAX_NODES_PER_LAYER]; + const float *input_nodes = features; + + // Propagate hidden layers. + const int num_layers = nn_config->num_hidden_layers; + int layer, node, i; + assert(num_layers <= NN_MAX_HIDDEN_LAYERS); + for (layer = 0; layer < num_layers; ++layer) { + const float *weights = nn_config->weights[layer]; + const float *bias = nn_config->bias[layer]; + float *output_nodes = buf[buf_index]; + const int num_output_nodes = nn_config->num_hidden_nodes[layer]; + assert(num_output_nodes < NN_MAX_NODES_PER_LAYER); + for (node = 0; node < num_output_nodes; ++node) { + float val = 0.0f; + for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i]; + val += bias[node]; + // ReLU as activation function. + val = VPXMAX(val, 0.0f); + output_nodes[node] = val; + weights += num_input_nodes; + } + num_input_nodes = num_output_nodes; + input_nodes = output_nodes; + buf_index = 1 - buf_index; + } + + // Final output layer. + { + const float *weights = nn_config->weights[num_layers]; + for (node = 0; node < nn_config->num_outputs; ++node) { + const float *bias = nn_config->bias[num_layers]; + float val = 0.0f; + for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i]; + output[node] = val + bias[node]; + weights += num_input_nodes; + } + } +} + +#if !CONFIG_REALTIME_ONLY +#define FEATURES 7 +// Machine-learning based partition search early termination. +// Return 1 to skip split and rect partitions. +static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, + PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, + BLOCK_SIZE bsize) { const int mag_mv = abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row); const int left_in_image = !!xd->left_mi; @@ -3035,11 +3219,32 @@ static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd, int above_par = 0; // above_partitioning int left_par = 0; // left_partitioning int last_par = 0; // last_partitioning - BLOCK_SIZE context_size; - double score; int offset = 0; + int i; + BLOCK_SIZE context_size; + const NN_CONFIG *nn_config = NULL; + const float *mean, *sd, *linear_weights; + float nn_score, linear_score; + float features[FEATURES]; assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); + vpx_clear_system_state(); + + switch (bsize) { + case BLOCK_64X64: + offset = 0; + nn_config = &vp9_partition_nnconfig_64x64; + break; + case BLOCK_32X32: + offset = 8; + nn_config = &vp9_partition_nnconfig_32x32; + break; + case BLOCK_16X16: + offset = 16; + nn_config = &vp9_partition_nnconfig_16x16; + break; + default: assert(0 && "Unexpected block size."); return 0; + } if (above_in_image) { context_size = xd->above_mi->sb_type; @@ -3065,36 +3270,550 @@ static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd, last_par = 1; } - if (bsize == BLOCK_64X64) - offset = 0; - else if (bsize == BLOCK_32X32) - offset = 8; - else if (bsize == BLOCK_16X16) - offset = 16; + mean = &vp9_partition_feature_mean[offset]; + sd = &vp9_partition_feature_std[offset]; + features[0] = ((float)ctx->rate - mean[0]) / sd[0]; + features[1] = ((float)ctx->dist - mean[1]) / sd[1]; + features[2] = ((float)mag_mv / 2 - mean[2]) * sd[2]; + features[3] = ((float)(left_par + above_par) / 2 - mean[3]) * sd[3]; + features[4] = ((float)ctx->sum_y_eobs - mean[4]) / sd[4]; + features[5] = ((float)cm->base_qindex - mean[5]) * sd[5]; + features[6] = ((float)last_par - mean[6]) * sd[6]; - // early termination score calculation - clf = &classifiers[offset]; - mean = &train_mean[offset]; - sd = &train_stdm[offset]; - score = clf[0] * (((double)ctx->rate - mean[0]) / sd[0]) + - clf[1] * (((double)ctx->dist - mean[1]) / sd[1]) + - clf[2] * (((double)mag_mv / 2 - mean[2]) * sd[2]) + - clf[3] * (((double)(left_par + above_par) / 2 - mean[3]) * sd[3]) + - clf[4] * (((double)ctx->sum_y_eobs - mean[4]) / sd[4]) + - clf[5] * (((double)cm->base_qindex - mean[5]) * sd[5]) + - clf[6] * (((double)last_par - mean[6]) * sd[6]) + clf[7]; - return score; + // Predict using linear model. + linear_weights = &vp9_partition_linear_weights[offset]; + linear_score = linear_weights[FEATURES]; + for (i = 0; i < FEATURES; ++i) + linear_score += linear_weights[i] * features[i]; + if (linear_score > 0.1f) return 0; + + // Predict using neural net model. + nn_predict(features, nn_config, &nn_score); + + if (linear_score < -0.0f && nn_score < 0.1f) return 1; + if (nn_score < -0.0f && linear_score < 0.1f) return 1; + return 0; +} +#undef FEATURES + +#define FEATURES 4 +// ML-based partition search breakout. +static int ml_predict_breakout(VP9_COMP *const cpi, BLOCK_SIZE bsize, + const MACROBLOCK *const x, + const RD_COST *const rd_cost) { + DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 }; + const VP9_COMMON *const cm = &cpi->common; + float features[FEATURES]; + const float *linear_weights = NULL; // Linear model weights. + float linear_score = 0.0f; + const int qindex = cm->base_qindex; + const int q_ctx = qindex >= 200 ? 0 : (qindex >= 150 ? 1 : 2); + const int is_720p_or_larger = VPXMIN(cm->width, cm->height) >= 720; + const int resolution_ctx = is_720p_or_larger ? 1 : 0; + + switch (bsize) { + case BLOCK_64X64: + linear_weights = vp9_partition_breakout_weights_64[resolution_ctx][q_ctx]; + break; + case BLOCK_32X32: + linear_weights = vp9_partition_breakout_weights_32[resolution_ctx][q_ctx]; + break; + case BLOCK_16X16: + linear_weights = vp9_partition_breakout_weights_16[resolution_ctx][q_ctx]; + break; + case BLOCK_8X8: + linear_weights = vp9_partition_breakout_weights_8[resolution_ctx][q_ctx]; + break; + default: assert(0 && "Unexpected block size."); return 0; + } + if (!linear_weights) return 0; + + { // Generate feature values. +#if CONFIG_VP9_HIGHBITDEPTH + const int ac_q = + vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8); +#else + const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth); +#endif // CONFIG_VP9_HIGHBITDEPTH + const int num_pels_log2 = num_pels_log2_lookup[bsize]; + int feature_index = 0; + unsigned int var, sse; + float rate_f, dist_f; + +#if CONFIG_VP9_HIGHBITDEPTH + if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + var = + vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, x->e_mbd.bd); + } else { + var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, + vp9_64_zeros, 0, &sse); + } +#else + var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, + vp9_64_zeros, 0, &sse); +#endif + var = var >> num_pels_log2; + + vpx_clear_system_state(); + + rate_f = (float)VPXMIN(rd_cost->rate, INT_MAX); + dist_f = (float)(VPXMIN(rd_cost->dist, INT_MAX) >> num_pels_log2); + rate_f = + ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) * + rate_f; + + features[feature_index++] = rate_f; + features[feature_index++] = dist_f; + features[feature_index++] = (float)var; + features[feature_index++] = (float)ac_q; + assert(feature_index == FEATURES); + } + + { // Calculate the output score. + int i; + linear_score = linear_weights[FEATURES]; + for (i = 0; i < FEATURES; ++i) + linear_score += linear_weights[i] * features[i]; + } + + return linear_score >= cpi->sf.rd_ml_partition.search_breakout_thresh[q_ctx]; +} +#undef FEATURES + +#define FEATURES 8 +#define LABELS 4 +static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x, + BLOCK_SIZE bsize, + const PC_TREE *const pc_tree, + int *allow_horz, int *allow_vert, + int64_t ref_rd) { + const NN_CONFIG *nn_config = NULL; + float score[LABELS] = { + 0.0f, + }; + int thresh = -1; + int i; + (void)x; + + if (ref_rd <= 0 || ref_rd > 1000000000) return; + + switch (bsize) { + case BLOCK_8X8: break; + case BLOCK_16X16: + nn_config = &vp9_rect_part_nnconfig_16; + thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[1]; + break; + case BLOCK_32X32: + nn_config = &vp9_rect_part_nnconfig_32; + thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[2]; + break; + case BLOCK_64X64: + nn_config = &vp9_rect_part_nnconfig_64; + thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[3]; + break; + default: assert(0 && "Unexpected block size."); return; + } + if (!nn_config || thresh < 0) return; + + // Feature extraction and model score calculation. + { + const VP9_COMMON *const cm = &cpi->common; +#if CONFIG_VP9_HIGHBITDEPTH + const int dc_q = + vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8); +#else + const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); +#endif // CONFIG_VP9_HIGHBITDEPTH + const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; + int feature_index = 0; + float features[FEATURES]; + + features[feature_index++] = logf((float)dc_q + 1.0f); + features[feature_index++] = + (float)(pc_tree->partitioning == PARTITION_NONE); + features[feature_index++] = logf((float)ref_rd / bs / bs + 1.0f); + + { + const float norm_factor = 1.0f / ((float)ref_rd + 1.0f); + const int64_t none_rdcost = pc_tree->none.rdcost; + float rd_ratio = 2.0f; + if (none_rdcost > 0 && none_rdcost < 1000000000) + rd_ratio = (float)none_rdcost * norm_factor; + features[feature_index++] = VPXMIN(rd_ratio, 2.0f); + + for (i = 0; i < 4; ++i) { + const int64_t this_rd = pc_tree->split[i]->none.rdcost; + const int rd_valid = this_rd > 0 && this_rd < 1000000000; + // Ratio between sub-block RD and whole block RD. + features[feature_index++] = + rd_valid ? (float)this_rd * norm_factor : 1.0f; + } + } + + assert(feature_index == FEATURES); + nn_predict(features, nn_config, score); + } + + // Make decisions based on the model score. + { + int max_score = -1000; + int horz = 0, vert = 0; + int int_score[LABELS]; + for (i = 0; i < LABELS; ++i) { + int_score[i] = (int)(100 * score[i]); + max_score = VPXMAX(int_score[i], max_score); + } + thresh = max_score - thresh; + for (i = 0; i < LABELS; ++i) { + if (int_score[i] >= thresh) { + if ((i >> 0) & 1) horz = 1; + if ((i >> 1) & 1) vert = 1; + } + } + *allow_horz = *allow_horz && horz; + *allow_vert = *allow_vert && vert; + } +} +#undef FEATURES +#undef LABELS + +// Perform fast and coarse motion search for the given block. This is a +// pre-processing step for the ML based partition search speedup. +static void simple_motion_search(const VP9_COMP *const cpi, MACROBLOCK *const x, + BLOCK_SIZE bsize, int mi_row, int mi_col, + MV ref_mv, MV_REFERENCE_FRAME ref, + uint8_t *const pred_buf) { + const VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *const mi = xd->mi[0]; + const YV12_BUFFER_CONFIG *const yv12 = get_ref_frame_buffer(cpi, ref); + const int step_param = 1; + const MvLimits tmp_mv_limits = x->mv_limits; + const SEARCH_METHODS search_method = NSTEP; + const int sadpb = x->sadperbit16; + MV ref_mv_full = { ref_mv.row >> 3, ref_mv.col >> 3 }; + MV best_mv = { 0, 0 }; + int cost_list[5]; + + assert(yv12 != NULL); + if (!yv12) return; + vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, + &cm->frame_refs[ref - 1].sf); + mi->ref_frame[0] = ref; + mi->ref_frame[1] = NONE; + mi->sb_type = bsize; + vp9_set_mv_search_range(&x->mv_limits, &ref_mv); + vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, search_method, + sadpb, cond_cost_list(cpi, cost_list), &ref_mv, + &best_mv, 0, 0); + best_mv.row *= 8; + best_mv.col *= 8; + x->mv_limits = tmp_mv_limits; + mi->mv[0].as_mv = best_mv; + + set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); + xd->plane[0].dst.buf = pred_buf; + xd->plane[0].dst.stride = 64; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); } +// Use a neural net model to prune partition-none and partition-split search. +// Features used: QP; spatial block size contexts; variance of prediction +// residue after simple_motion_search. +#define FEATURES 12 +static void ml_predict_var_rd_paritioning(const VP9_COMP *const cpi, + MACROBLOCK *const x, + PC_TREE *const pc_tree, + BLOCK_SIZE bsize, int mi_row, + int mi_col, int *none, int *split) { + const VP9_COMMON *const cm = &cpi->common; + const NN_CONFIG *nn_config = NULL; +#if CONFIG_VP9_HIGHBITDEPTH + MACROBLOCKD *xd = &x->e_mbd; + DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]); + uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + ? (CONVERT_TO_BYTEPTR(pred_buffer)) + : pred_buffer; +#else + DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64]); + uint8_t *const pred_buf = pred_buffer; +#endif // CONFIG_VP9_HIGHBITDEPTH + const int speed = cpi->oxcf.speed; + float thresh = 0.0f; + + switch (bsize) { + case BLOCK_64X64: + nn_config = &vp9_part_split_nnconfig_64; + thresh = speed > 0 ? 2.8f : 3.0f; + break; + case BLOCK_32X32: + nn_config = &vp9_part_split_nnconfig_32; + thresh = speed > 0 ? 3.5f : 3.0f; + break; + case BLOCK_16X16: + nn_config = &vp9_part_split_nnconfig_16; + thresh = speed > 0 ? 3.8f : 4.0f; + break; + case BLOCK_8X8: + nn_config = &vp9_part_split_nnconfig_8; + if (cm->width >= 720 && cm->height >= 720) + thresh = speed > 0 ? 2.5f : 2.0f; + else + thresh = speed > 0 ? 3.8f : 2.0f; + break; + default: assert(0 && "Unexpected block size."); return; + } + + if (!nn_config) return; + + // Do a simple single motion search to find a prediction for current block. + // The variance of the residue will be used as input features. + { + MV ref_mv; + const MV_REFERENCE_FRAME ref = + cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; + // If bsize is 64x64, use zero MV as reference; otherwise, use MV result + // of previous(larger) block as reference. + if (bsize == BLOCK_64X64) + ref_mv.row = ref_mv.col = 0; + else + ref_mv = pc_tree->mv; + vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); + simple_motion_search(cpi, x, bsize, mi_row, mi_col, ref_mv, ref, pred_buf); + pc_tree->mv = x->e_mbd.mi[0]->mv[0].as_mv; + } + + vpx_clear_system_state(); + + { + float features[FEATURES] = { 0.0f }; +#if CONFIG_VP9_HIGHBITDEPTH + const int dc_q = + vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (xd->bd - 8); +#else + const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); +#endif // CONFIG_VP9_HIGHBITDEPTH + int feature_idx = 0; + float score; + + // Generate model input features. + features[feature_idx++] = logf((float)dc_q + 1.0f); + + // Get the variance of the residue as input features. + { + const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; + const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); + const uint8_t *pred = pred_buf; + const uint8_t *src = x->plane[0].src.buf; + const int src_stride = x->plane[0].src.stride; + const int pred_stride = 64; + unsigned int sse; + // Variance of whole block. + const unsigned int var = + cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse); + const float factor = (var == 0) ? 1.0f : (1.0f / (float)var); + const MACROBLOCKD *const xd = &x->e_mbd; + const int has_above = !!xd->above_mi; + const int has_left = !!xd->left_mi; + const BLOCK_SIZE above_bsize = has_above ? xd->above_mi->sb_type : bsize; + const BLOCK_SIZE left_bsize = has_left ? xd->left_mi->sb_type : bsize; + int i; + + features[feature_idx++] = (float)has_above; + features[feature_idx++] = (float)b_width_log2_lookup[above_bsize]; + features[feature_idx++] = (float)b_height_log2_lookup[above_bsize]; + features[feature_idx++] = (float)has_left; + features[feature_idx++] = (float)b_width_log2_lookup[left_bsize]; + features[feature_idx++] = (float)b_height_log2_lookup[left_bsize]; + features[feature_idx++] = logf((float)var + 1.0f); + for (i = 0; i < 4; ++i) { + const int x_idx = (i & 1) * bs / 2; + const int y_idx = (i >> 1) * bs / 2; + const int src_offset = y_idx * src_stride + x_idx; + const int pred_offset = y_idx * pred_stride + x_idx; + // Variance of quarter block. + const unsigned int sub_var = + cpi->fn_ptr[subsize].vf(src + src_offset, src_stride, + pred + pred_offset, pred_stride, &sse); + const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var; + features[feature_idx++] = var_ratio; + } + } + assert(feature_idx == FEATURES); + + // Feed the features into the model to get the confidence score. + nn_predict(features, nn_config, &score); + + // Higher score means that the model has higher confidence that the split + // partition is better than the non-split partition. So if the score is + // high enough, we skip the none-split partition search; if the score is + // low enough, we skip the split partition search. + if (score > thresh) *none = 0; + if (score < -thresh) *split = 0; + } +} +#undef FEATURES +#endif // !CONFIG_REALTIME_ONLY + +static double log_wiener_var(int64_t wiener_variance) { + return log(1.0 + wiener_variance) / log(2.0); +} + +static void build_kmeans_segmentation(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + BLOCK_SIZE bsize = BLOCK_64X64; + KMEANS_DATA *kmeans_data; + + vp9_disable_segmentation(&cm->seg); + if (cm->show_frame) { + int mi_row, mi_col; + cpi->kmeans_data_size = 0; + cpi->kmeans_ctr_num = 8; + + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { + int mb_row_start = mi_row >> 1; + int mb_col_start = mi_col >> 1; + int mb_row_end = VPXMIN( + (mi_row + num_8x8_blocks_high_lookup[bsize]) >> 1, cm->mb_rows); + int mb_col_end = VPXMIN( + (mi_col + num_8x8_blocks_wide_lookup[bsize]) >> 1, cm->mb_cols); + int row, col; + int64_t wiener_variance = 0; + + for (row = mb_row_start; row < mb_row_end; ++row) + for (col = mb_col_start; col < mb_col_end; ++col) + wiener_variance += cpi->mb_wiener_variance[row * cm->mb_cols + col]; + + wiener_variance /= + (mb_row_end - mb_row_start) * (mb_col_end - mb_col_start); + +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&cpi->kmeans_mutex); +#endif // CONFIG_MULTITHREAD + + kmeans_data = &cpi->kmeans_data_arr[cpi->kmeans_data_size++]; + kmeans_data->value = log_wiener_var(wiener_variance); + kmeans_data->pos = mi_row * cpi->kmeans_data_stride + mi_col; +#if CONFIG_MULTITHREAD + pthread_mutex_unlock(&cpi->kmeans_mutex); +#endif // CONFIG_MULTITHREAD + } + } + + vp9_kmeans(cpi->kmeans_ctr_ls, cpi->kmeans_boundary_ls, + cpi->kmeans_count_ls, cpi->kmeans_ctr_num, cpi->kmeans_data_arr, + cpi->kmeans_data_size); + + vp9_perceptual_aq_mode_setup(cpi, &cm->seg); + } +} + +#if !CONFIG_REALTIME_ONLY +static int wiener_var_segment(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, + int mi_col) { + VP9_COMMON *cm = &cpi->common; + int mb_row_start = mi_row >> 1; + int mb_col_start = mi_col >> 1; + int mb_row_end = + VPXMIN((mi_row + num_8x8_blocks_high_lookup[bsize]) >> 1, cm->mb_rows); + int mb_col_end = + VPXMIN((mi_col + num_8x8_blocks_wide_lookup[bsize]) >> 1, cm->mb_cols); + int row, col, idx; + int64_t wiener_variance = 0; + int segment_id; + int8_t seg_hist[MAX_SEGMENTS] = { 0 }; + int8_t max_count = 0, max_index = -1; + + vpx_clear_system_state(); + + assert(cpi->norm_wiener_variance > 0); + + for (row = mb_row_start; row < mb_row_end; ++row) { + for (col = mb_col_start; col < mb_col_end; ++col) { + wiener_variance = cpi->mb_wiener_variance[row * cm->mb_cols + col]; + segment_id = + vp9_get_group_idx(log_wiener_var(wiener_variance), + cpi->kmeans_boundary_ls, cpi->kmeans_ctr_num); + ++seg_hist[segment_id]; + } + } + + for (idx = 0; idx < cpi->kmeans_ctr_num; ++idx) { + if (seg_hist[idx] > max_count) { + max_count = seg_hist[idx]; + max_index = idx; + } + } + + assert(max_index >= 0); + segment_id = max_index; + + return segment_id; +} + +static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, + int mi_col, int orig_rdmult) { + const int gf_group_index = cpi->twopass.gf_group.index; + TplDepFrame *tpl_frame = &cpi->tpl_stats[gf_group_index]; + TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; + int tpl_stride = tpl_frame->stride; + int64_t intra_cost = 0; + int64_t mc_dep_cost = 0; + int mi_wide = num_8x8_blocks_wide_lookup[bsize]; + int mi_high = num_8x8_blocks_high_lookup[bsize]; + int row, col; + + int dr = 0; + int count = 0; + double r0, rk, beta; + + if (tpl_frame->is_valid == 0) return orig_rdmult; + + if (cpi->twopass.gf_group.layer_depth[gf_group_index] > 1) return orig_rdmult; + + if (gf_group_index >= MAX_ARF_GOP_SIZE) return orig_rdmult; + + for (row = mi_row; row < mi_row + mi_high; ++row) { + for (col = mi_col; col < mi_col + mi_wide; ++col) { + TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; + + if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue; + + intra_cost += this_stats->intra_cost; + mc_dep_cost += this_stats->mc_dep_cost; + + ++count; + } + } + + vpx_clear_system_state(); + + r0 = cpi->rd.r0; + rk = (double)intra_cost / mc_dep_cost; + beta = r0 / rk; + dr = vp9_get_adaptive_rdmult(cpi, beta); + + dr = VPXMIN(dr, orig_rdmult * 3 / 2); + dr = VPXMAX(dr, orig_rdmult * 1 / 2); + + dr = VPXMAX(1, dr); + + return dr; +} +#endif // !CONFIG_REALTIME_ONLY + +#if !CONFIG_REALTIME_ONLY // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are // unlikely to be selected depending on previous rate-distortion optimization // results, for encoding speed-up. -static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, - TileDataEnc *tile_data, TOKENEXTRA **tp, - int mi_row, int mi_col, BLOCK_SIZE bsize, - RD_COST *rd_cost, int64_t best_rd, - PC_TREE *pc_tree) { +static int rd_pick_partition(VP9_COMP *cpi, ThreadData *td, + TileDataEnc *tile_data, TOKENEXTRA **tp, + int mi_row, int mi_col, BLOCK_SIZE bsize, + RD_COST *rd_cost, RD_COST best_rdc, + PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; @@ -3102,11 +3821,11 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; TOKENEXTRA *tp_orig = *tp; - PICK_MODE_CONTEXT *ctx = &pc_tree->none; + PICK_MODE_CONTEXT *const ctx = &pc_tree->none; int i; const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); BLOCK_SIZE subsize; - RD_COST this_rdc, sum_rdc, best_rdc; + RD_COST this_rdc, sum_rdc; int do_split = bsize >= BLOCK_8X8; int do_rect = 1; INTERP_FILTER pred_interp_filter; @@ -3133,24 +3852,35 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist; int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate; + int must_split = 0; + int should_encode_sb = 0; + + // Ref frames picked in the [i_th] quarter subblock during square partition + // RD search. It may be used to prune ref frame selection of rect partitions. + uint8_t ref_frames_used[4] = { 0, 0, 0, 0 }; + + int partition_mul = x->cb_rdmult; (void)*tp_orig; assert(num_8x8_blocks_wide_lookup[bsize] == num_8x8_blocks_high_lookup[bsize]); - // Adjust dist breakout threshold according to the partition size. dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); + rate_breakout_thr *= num_pels_log2_lookup[bsize]; vp9_rd_cost_init(&this_rdc); vp9_rd_cost_init(&sum_rdc); - vp9_rd_cost_reset(&best_rdc); - best_rdc.rdcost = best_rd; set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); + if (oxcf->tuning == VP8_TUNE_SSIM) { + set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &partition_mul); + } + vp9_rd_cost_update(partition_mul, x->rddiv, &best_rdc); + if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ && cpi->oxcf.aq_mode != LOOKAHEAD_AQ) x->mb_energy = vp9_block_energy(cpi, x, bsize); @@ -3165,10 +3895,18 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size); } + // Get sub block energy range + if (bsize >= BLOCK_16X16) { + int min_energy, max_energy; + vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, + &max_energy); + must_split = (min_energy < -3) && (max_energy - min_energy > 2); + } + // Determine partition types in search according to the speed features. // The threshold set here has to be of square block size. if (cpi->sf.auto_min_max_partition_size) { - partition_none_allowed &= (bsize <= max_size && bsize >= min_size); + partition_none_allowed &= (bsize <= max_size); partition_horz_allowed &= ((bsize <= max_size && bsize > min_size) || force_horz_split); partition_vert_allowed &= @@ -3177,7 +3915,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } if (cpi->sf.use_square_partition_only && - bsize > cpi->sf.use_square_only_threshold) { + (bsize > cpi->sf.use_square_only_thresh_high || + bsize < cpi->sf.use_square_only_thresh_low)) { if (cpi->use_svc) { if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) partition_horz_allowed &= force_horz_split; @@ -3250,48 +3989,84 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } #endif + pc_tree->partitioning = PARTITION_NONE; + + if (cpi->sf.rd_ml_partition.var_pruning && !frame_is_intra_only(cm)) { + const int do_rd_ml_partition_var_pruning = + partition_none_allowed && do_split && + mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows && + mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols; + if (do_rd_ml_partition_var_pruning) { + ml_predict_var_rd_paritioning(cpi, x, pc_tree, bsize, mi_row, mi_col, + &partition_none_allowed, &do_split); + } else { + vp9_zero(pc_tree->mv); + } + if (bsize > BLOCK_8X8) { // Store MV result as reference for subblocks. + for (i = 0; i < 4; ++i) pc_tree->split[i]->mv = pc_tree->mv; + } + } + // PARTITION_NONE if (partition_none_allowed) { rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx, - best_rdc.rdcost); + best_rdc.rate, best_rdc.dist); + ctx->rdcost = this_rdc.rdcost; if (this_rdc.rate != INT_MAX) { + if (cpi->sf.prune_ref_frame_for_rect_partitions) { + const int ref1 = ctx->mic.ref_frame[0]; + const int ref2 = ctx->mic.ref_frame[1]; + for (i = 0; i < 4; ++i) { + ref_frames_used[i] |= (1 << ref1); + if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); + } + } if (bsize >= BLOCK_8X8) { this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; - this_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); + vp9_rd_cost_update(partition_mul, x->rddiv, &this_rdc); } if (this_rdc.rdcost < best_rdc.rdcost) { MODE_INFO *mi = xd->mi[0]; best_rdc = this_rdc; + should_encode_sb = 1; if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; - if (!cpi->sf.ml_partition_search_early_termination) { - // If all y, u, v transform blocks in this partition are skippable, - // and the dist & rate are within the thresholds, the partition search - // is terminated for current branch of the partition search tree. - if (!x->e_mbd.lossless && ctx->skippable && - ((best_rdc.dist < (dist_breakout_thr >> 2)) || - (best_rdc.dist < dist_breakout_thr && - best_rdc.rate < rate_breakout_thr))) { - do_split = 0; - do_rect = 0; - } - } else { + if (cpi->sf.rd_ml_partition.search_early_termination) { // Currently, the machine-learning based partition search early // termination is only used while bsize is 16x16, 32x32 or 64x64, // VPXMIN(cm->width, cm->height) >= 480, and speed = 0. if (!x->e_mbd.lossless && !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) && ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) { - if (compute_score(cm, xd, ctx, mi_row, mi_col, bsize) < 0.0) { + if (ml_pruning_partition(cm, xd, ctx, mi_row, mi_col, bsize)) { do_split = 0; do_rect = 0; } } } + if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) { + const int use_ml_based_breakout = + cpi->sf.rd_ml_partition.search_breakout && cm->base_qindex >= 100; + if (use_ml_based_breakout) { + if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) { + do_split = 0; + do_rect = 0; + } + } else { + if (!cpi->sf.rd_ml_partition.search_early_termination) { + if ((best_rdc.dist < (dist_breakout_thr >> 2)) || + (best_rdc.dist < dist_breakout_thr && + best_rdc.rate < rate_breakout_thr)) { + do_split = 0; + do_rect = 0; + } + } + } + } + #if CONFIG_FP_MB_STATS // Check if every 16x16 first pass block statistics has zero // motion and the corresponding first pass residue is small enough. @@ -3341,10 +4116,13 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); + } else { + vp9_zero(ctx->pred_mv); + ctx->mic.interp_filter = EIGHTTAP; } // store estimated motion vector - if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx); + store_pred_mv(x, ctx); // If the interp_filter is marked as SWITCHABLE_FILTERS, it was for an // intra block and used for context purposes. @@ -3357,113 +4135,192 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, // PARTITION_SPLIT // TODO(jingning): use the motion vectors given by the above search as // the starting point of motion search in the following partition type check. - if (do_split) { + pc_tree->split[0]->none.rdcost = 0; + pc_tree->split[1]->none.rdcost = 0; + pc_tree->split[2]->none.rdcost = 0; + pc_tree->split[3]->none.rdcost = 0; + if (do_split || must_split) { subsize = get_subsize(bsize, PARTITION_SPLIT); + load_pred_mv(x, ctx); if (bsize == BLOCK_8X8) { i = 4; if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed) pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, - pc_tree->leaf_split[0], best_rdc.rdcost); - - if (sum_rdc.rate == INT_MAX) sum_rdc.rdcost = INT64_MAX; + pc_tree->leaf_split[0], best_rdc.rate, best_rdc.dist); + if (sum_rdc.rate == INT_MAX) { + sum_rdc.rdcost = INT64_MAX; + } else { + if (cpi->sf.prune_ref_frame_for_rect_partitions) { + const int ref1 = pc_tree->leaf_split[0]->mic.ref_frame[0]; + const int ref2 = pc_tree->leaf_split[0]->mic.ref_frame[1]; + for (i = 0; i < 4; ++i) { + ref_frames_used[i] |= (1 << ref1); + if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); + } + } + } } else { - for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) { + for (i = 0; (i < 4) && ((sum_rdc.rdcost < best_rdc.rdcost) || must_split); + ++i) { const int x_idx = (i & 1) * mi_step; const int y_idx = (i >> 1) * mi_step; + int found_best_rd = 0; + RD_COST best_rdc_split; + vp9_rd_cost_reset(&best_rdc_split); + + if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) { + // A must split test here increases the number of sub + // partitions but hurts metrics results quite a bit, + // so this extra test is commented out pending + // further tests on whether it adds much in terms of + // visual quality. + // (must_split) ? best_rdc.rate + // : best_rdc.rate - sum_rdc.rate, + // (must_split) ? best_rdc.dist + // : best_rdc.dist - sum_rdc.dist, + best_rdc_split.rate = best_rdc.rate - sum_rdc.rate; + best_rdc_split.dist = best_rdc.dist - sum_rdc.dist; + } if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) continue; - if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); - pc_tree->split[i]->index = i; - rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, - mi_col + x_idx, subsize, &this_rdc, - best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]); + if (cpi->sf.prune_ref_frame_for_rect_partitions) + pc_tree->split[i]->none.rate = INT_MAX; + found_best_rd = rd_pick_partition( + cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize, + &this_rdc, best_rdc_split, pc_tree->split[i]); - if (this_rdc.rate == INT_MAX) { + if (found_best_rd == 0) { sum_rdc.rdcost = INT64_MAX; break; } else { + if (cpi->sf.prune_ref_frame_for_rect_partitions && + pc_tree->split[i]->none.rate != INT_MAX) { + const int ref1 = pc_tree->split[i]->none.mic.ref_frame[0]; + const int ref2 = pc_tree->split[i]->none.mic.ref_frame[1]; + ref_frames_used[i] |= (1 << ref1); + if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); + } sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; - sum_rdc.rdcost += this_rdc.rdcost; + vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); } } } - if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) { + if (((sum_rdc.rdcost < best_rdc.rdcost) || must_split) && i == 4) { sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); - if (sum_rdc.rdcost < best_rdc.rdcost) { + if ((sum_rdc.rdcost < best_rdc.rdcost) || + (must_split && (sum_rdc.dist < best_rdc.dist))) { best_rdc = sum_rdc; + should_encode_sb = 1; pc_tree->partitioning = PARTITION_SPLIT; // Rate and distortion based partition search termination clause. - if (!cpi->sf.ml_partition_search_early_termination && - !x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) || - (best_rdc.dist < dist_breakout_thr && - best_rdc.rate < rate_breakout_thr))) { + if (!cpi->sf.rd_ml_partition.search_early_termination && + !x->e_mbd.lossless && + ((best_rdc.dist < (dist_breakout_thr >> 2)) || + (best_rdc.dist < dist_breakout_thr && + best_rdc.rate < rate_breakout_thr))) { do_rect = 0; } } } else { // skip rectangular partition test when larger block size // gives better rd cost - if ((cpi->sf.less_rectangular_check) && - ((bsize > cpi->sf.use_square_only_threshold) || - (best_rdc.dist < dist_breakout_thr))) + if (cpi->sf.less_rectangular_check && + (bsize > cpi->sf.use_square_only_thresh_high || + best_rdc.dist < dist_breakout_thr)) do_rect &= !partition_none_allowed; } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); } + pc_tree->horizontal[0].skip_ref_frame_mask = 0; + pc_tree->horizontal[1].skip_ref_frame_mask = 0; + pc_tree->vertical[0].skip_ref_frame_mask = 0; + pc_tree->vertical[1].skip_ref_frame_mask = 0; + if (cpi->sf.prune_ref_frame_for_rect_partitions) { + uint8_t used_frames; + used_frames = ref_frames_used[0] | ref_frames_used[1]; + if (used_frames) { + pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames & 0xff; + } + used_frames = ref_frames_used[2] | ref_frames_used[3]; + if (used_frames) { + pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames & 0xff; + } + used_frames = ref_frames_used[0] | ref_frames_used[2]; + if (used_frames) { + pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames & 0xff; + } + used_frames = ref_frames_used[1] | ref_frames_used[3]; + if (used_frames) { + pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames & 0xff; + } + } + + { + const int do_ml_rect_partition_pruning = + !frame_is_intra_only(cm) && !force_horz_split && !force_vert_split && + (partition_horz_allowed || partition_vert_allowed) && bsize > BLOCK_8X8; + if (do_ml_rect_partition_pruning) { + ml_prune_rect_partition(cpi, x, bsize, pc_tree, &partition_horz_allowed, + &partition_vert_allowed, best_rdc.rdcost); + } + } + // PARTITION_HORZ if (partition_horz_allowed && (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) { + const int part_mode_rate = cpi->partition_cost[pl][PARTITION_HORZ]; subsize = get_subsize(bsize, PARTITION_HORZ); - if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); + load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) pc_tree->horizontal[0].pred_interp_filter = pred_interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, - &pc_tree->horizontal[0], best_rdc.rdcost); + &pc_tree->horizontal[0], best_rdc.rate - part_mode_rate, + best_rdc.dist); + if (sum_rdc.rdcost < INT64_MAX) { + sum_rdc.rate += part_mode_rate; + vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); + } if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows && bsize > BLOCK_8X8) { PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); - - if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) pc_tree->horizontal[1].pred_interp_filter = pred_interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, subsize, &pc_tree->horizontal[1], - best_rdc.rdcost - sum_rdc.rdcost); + best_rdc.rate - sum_rdc.rate, + best_rdc.dist - sum_rdc.dist); if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; } else { sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; - sum_rdc.rdcost += this_rdc.rdcost; + vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); } } if (sum_rdc.rdcost < best_rdc.rdcost) { - sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ]; - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); - if (sum_rdc.rdcost < best_rdc.rdcost) { - best_rdc = sum_rdc; - pc_tree->partitioning = PARTITION_HORZ; + best_rdc = sum_rdc; + should_encode_sb = 1; + pc_tree->partitioning = PARTITION_HORZ; - if ((cpi->sf.less_rectangular_check) && - (bsize > cpi->sf.use_square_only_threshold)) - do_rect = 0; - } + if (cpi->sf.less_rectangular_check && + bsize > cpi->sf.use_square_only_thresh_high) + do_rect = 0; } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); } @@ -3471,56 +4328,52 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, // PARTITION_VERT if (partition_vert_allowed && (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) { + const int part_mode_rate = cpi->partition_cost[pl][PARTITION_VERT]; subsize = get_subsize(bsize, PARTITION_VERT); - - if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); + load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) pc_tree->vertical[0].pred_interp_filter = pred_interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, - &pc_tree->vertical[0], best_rdc.rdcost); + &pc_tree->vertical[0], best_rdc.rate - part_mode_rate, + best_rdc.dist); + if (sum_rdc.rdcost < INT64_MAX) { + sum_rdc.rate += part_mode_rate; + vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); + } + if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols && bsize > BLOCK_8X8) { update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, &pc_tree->vertical[0]); - - if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) pc_tree->vertical[1].pred_interp_filter = pred_interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, subsize, &pc_tree->vertical[1], - best_rdc.rdcost - sum_rdc.rdcost); + best_rdc.rate - sum_rdc.rate, + best_rdc.dist - sum_rdc.dist); if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; } else { sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; - sum_rdc.rdcost += this_rdc.rdcost; + vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); } } if (sum_rdc.rdcost < best_rdc.rdcost) { - sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT]; - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); - if (sum_rdc.rdcost < best_rdc.rdcost) { - best_rdc = sum_rdc; - pc_tree->partitioning = PARTITION_VERT; - } + best_rdc = sum_rdc; + should_encode_sb = 1; + pc_tree->partitioning = PARTITION_VERT; } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); } - // TODO(jbb): This code added so that we avoid static analysis - // warning related to the fact that best_rd isn't used after this - // point. This code should be refactored so that the duplicate - // checks occur in some sub function and thus are used... - (void)best_rd; *rd_cost = best_rdc; - if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && - pc_tree->index != 3) { + if (should_encode_sb && pc_tree->index != 3) { int output_enabled = (bsize == BLOCK_64X64); encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, pc_tree); @@ -3533,6 +4386,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } else { assert(tp_orig == *tp); } + + return should_encode_sb; } static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td, @@ -3564,10 +4419,12 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td, RD_COST dummy_rdc; int i; int seg_skip = 0; + int orig_rdmult = cpi->rd.RDMULT; const int idx_str = cm->mi_stride * mi_row + mi_col; MODE_INFO **mi = cm->mi_grid_visible + idx_str; + vp9_rd_cost_reset(&dummy_rdc); (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, sb_col_in_tile); @@ -3582,7 +4439,10 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td, } } - vp9_zero(x->pred_mv); + for (i = 0; i < MAX_REF_FRAMES; ++i) { + x->pred_mv[i].row = INT16_MAX; + x->pred_mv[i].col = INT16_MAX; + } td->pc_root->index = 0; if (seg->enabled) { @@ -3593,6 +4453,9 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td, } x->source_variance = UINT_MAX; + + x->cb_rdmult = orig_rdmult; + if (sf->partition_search_type == FIXED_PARTITION || seg_skip) { const BLOCK_SIZE bsize = seg_skip ? BLOCK_64X64 : sf->always_this_block_size; @@ -3613,19 +4476,33 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td, rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); } else { + if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) { + int dr = + get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult); + x->cb_rdmult = dr; + } + + if (cpi->oxcf.aq_mode == PERCEPTUAL_AQ && cm->show_frame) { + x->segment_id = wiener_var_segment(cpi, BLOCK_64X64, mi_row, mi_col); + x->cb_rdmult = vp9_compute_rd_mult( + cpi, vp9_get_qindex(&cm->seg, x->segment_id, cm->base_qindex)); + } + // If required set upper and lower partition size limits if (sf->auto_min_max_partition_size) { set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, &x->min_partition_size, &x->max_partition_size); } + td->pc_root->none.rdcost = 0; rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rdc, INT64_MAX, td->pc_root); + &dummy_rdc, dummy_rdc, td->pc_root); } (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, sb_col_in_tile, num_sb_cols); } } +#endif // !CONFIG_REALTIME_ONLY static void init_encode_frame_mb_context(VP9_COMP *cpi) { MACROBLOCK *const x = &cpi->td.mb; @@ -3703,6 +4580,36 @@ static void hybrid_intra_mode_search(VP9_COMP *cpi, MACROBLOCK *const x, vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx); } +static void hybrid_search_svc_baseiskey(VP9_COMP *cpi, MACROBLOCK *const x, + RD_COST *rd_cost, BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx, + TileDataEnc *tile_data, int mi_row, + int mi_col) { + if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) { + vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); + } else { + if (cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF) + vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx); + else if (bsize >= BLOCK_8X8) + vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, + ctx); + else + vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx); + } +} + +static void hybrid_search_scene_change(VP9_COMP *cpi, MACROBLOCK *const x, + RD_COST *rd_cost, BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx, + TileDataEnc *tile_data, int mi_row, + int mi_col) { + if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) { + vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); + } else { + vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx); + } +} + static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *const x, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, @@ -3718,6 +4625,9 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, int plane; set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); + + set_segment_index(cpi, x, mi_row, mi_col, bsize, 0); + mi = xd->mi[0]; mi->sb_type = bsize; @@ -3733,14 +4643,23 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, if (cyclic_refresh_segment_id_boosted(mi->segment_id)) x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); - if (cm->frame_type == KEY_FRAME) + if (frame_is_intra_only(cm)) hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx); + else if (cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) + hybrid_search_svc_baseiskey(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row, + mi_col); else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize); - else if (bsize >= BLOCK_8X8) - vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx); - else + else if (bsize >= BLOCK_8X8) { + if (cpi->rc.hybrid_intra_scene_change) + hybrid_search_scene_change(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row, + mi_col); + else + vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, + ctx); + } else { vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx); + } duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); @@ -3830,6 +4749,76 @@ static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) { } } +#define FEATURES 6 +#define LABELS 2 +static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, int mi_row, + int mi_col) { + VP9_COMMON *const cm = &cpi->common; + const NN_CONFIG *nn_config = NULL; + + switch (bsize) { + case BLOCK_64X64: nn_config = &vp9_var_part_nnconfig_64; break; + case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break; + case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break; + case BLOCK_8X8: break; + default: assert(0 && "Unexpected block size."); return -1; + } + + if (!nn_config) return -1; + + vpx_clear_system_state(); + + { + const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f; + float features[FEATURES] = { 0.0f }; + const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); + int feature_idx = 0; + float score[LABELS]; + + features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f); + vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); + { + const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; + const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); + const int sb_offset_row = 8 * (mi_row & 7); + const int sb_offset_col = 8 * (mi_col & 7); + const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col; + const uint8_t *src = x->plane[0].src.buf; + const int src_stride = x->plane[0].src.stride; + const int pred_stride = 64; + unsigned int sse; + int i; + // Variance of whole block. + const unsigned int var = + cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse); + const float factor = (var == 0) ? 1.0f : (1.0f / (float)var); + + features[feature_idx++] = logf((float)var + 1.0f); + for (i = 0; i < 4; ++i) { + const int x_idx = (i & 1) * bs / 2; + const int y_idx = (i >> 1) * bs / 2; + const int src_offset = y_idx * src_stride + x_idx; + const int pred_offset = y_idx * pred_stride + x_idx; + // Variance of quarter block. + const unsigned int sub_var = + cpi->fn_ptr[subsize].vf(src + src_offset, src_stride, + pred + pred_offset, pred_stride, &sse); + const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var; + features[feature_idx++] = var_ratio; + } + } + + assert(feature_idx == FEATURES); + nn_predict(features, nn_config, score); + if (score[0] > thresh) return PARTITION_SPLIT; + if (score[0] < -thresh) return PARTITION_NONE; + return -1; + } +} +#undef FEATURES +#undef LABELS + static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, @@ -3859,6 +4848,9 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, !force_vert_split && yss <= xss && bsize >= BLOCK_8X8; int partition_vert_allowed = !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; + const int use_ml_based_partitioning = + sf->partition_search_type == ML_BASED_PARTITION; + (void)*tp_orig; // Avoid checking for rectangular partitions for speed >= 6. @@ -3889,6 +4881,18 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, partition_vert_allowed &= force_vert_split; } + if (use_ml_based_partitioning) { + if (partition_none_allowed || do_split) do_rect = 0; + if (partition_none_allowed && do_split) { + const int ml_predicted_partition = + ml_predict_var_paritioning(cpi, x, bsize, mi_row, mi_col); + if (ml_predicted_partition == PARTITION_NONE) do_split = 0; + if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0; + } + } + + if (!partition_none_allowed && !do_split) do_rect = 1; + ctx->pred_pixel_ready = !(partition_vert_allowed || partition_horz_allowed || do_split); @@ -3902,26 +4906,25 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, ctx->skip = x->skip; if (this_rdc.rate != INT_MAX) { - int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); if (this_rdc.rdcost < best_rdc.rdcost) { - int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist; - int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate; - - dist_breakout_thr >>= - 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); - - rate_breakout_thr *= num_pels_log2_lookup[bsize]; - best_rdc = this_rdc; if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; - if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr && - this_rdc.dist < dist_breakout_thr) { - do_split = 0; - do_rect = 0; + if (!use_ml_based_partitioning) { + int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist; + int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate; + dist_breakout_thr >>= + 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); + rate_breakout_thr *= num_pels_log2_lookup[bsize]; + if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr && + this_rdc.dist < dist_breakout_thr) { + do_split = 0; + do_rect = 0; + } } } } @@ -3969,7 +4972,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, // PARTITION_HORZ if (partition_horz_allowed && do_rect) { subsize = get_subsize(bsize, PARTITION_HORZ); - if (sf->adaptive_motion_search) load_pred_mv(x, ctx); + load_pred_mv(x, ctx); pc_tree->horizontal[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->horizontal[0]); @@ -4013,7 +5016,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, // PARTITION_VERT if (partition_vert_allowed && do_rect) { subsize = get_subsize(bsize, PARTITION_VERT); - if (sf->adaptive_motion_search) load_pred_mv(x, ctx); + load_pred_mv(x, ctx); pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->vertical[0]); @@ -4173,7 +5176,8 @@ static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td, } } break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); subsize = get_subsize(bsize, PARTITION_SPLIT); nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize, output_enabled, rd_cost, @@ -4203,7 +5207,6 @@ static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td, rd_cost->dist += this_rdc.dist; } break; - default: assert(0 && "Invalid partition type."); break; } } @@ -4292,7 +5295,8 @@ static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td, output_enabled, subsize, &pc_tree->horizontal[1]); } break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); subsize = get_subsize(bsize, PARTITION_SPLIT); if (bsize == BLOCK_8X8) { nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, @@ -4313,13 +5317,110 @@ static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td, dummy_cost, pc_tree->split[3]); } break; - default: assert(0 && "Invalid partition type."); break; } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) update_partition_context(xd, mi_row, mi_col, subsize, bsize); } +// Get a prediction(stored in x->est_pred) for the whole 64x64 superblock. +static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile, + MACROBLOCK *x, int mi_row, int mi_col) { + VP9_COMMON *const cm = &cpi->common; + const int is_key_frame = frame_is_intra_only(cm); + MACROBLOCKD *xd = &x->e_mbd; + + set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); + + if (!is_key_frame) { + MODE_INFO *mi = xd->mi[0]; + YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); + const YV12_BUFFER_CONFIG *yv12_g = NULL; + const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + + (mi_row + 4 < cm->mi_rows); + unsigned int y_sad_g, y_sad_thr; + unsigned int y_sad = UINT_MAX; + + assert(yv12 != NULL); + + if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) || + cpi->svc.use_gf_temporal_ref_current_layer) { + // For now, GOLDEN will not be used for non-zero spatial layers, since + // it may not be a temporal reference. + yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); + } + + // Only compute y_sad_g (sad for golden reference) for speed < 8. + if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 && + (cpi->ref_frame_flags & VP9_GOLD_FLAG)) { + vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, + &cm->frame_refs[GOLDEN_FRAME - 1].sf); + y_sad_g = cpi->fn_ptr[bsize].sdf( + x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, + xd->plane[0].pre[0].stride); + } else { + y_sad_g = UINT_MAX; + } + + if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && + cpi->rc.is_src_frame_alt_ref) { + yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME); + vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, + &cm->frame_refs[ALTREF_FRAME - 1].sf); + mi->ref_frame[0] = ALTREF_FRAME; + y_sad_g = UINT_MAX; + } else { + vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, + &cm->frame_refs[LAST_FRAME - 1].sf); + mi->ref_frame[0] = LAST_FRAME; + } + mi->ref_frame[1] = NONE; + mi->sb_type = BLOCK_64X64; + mi->mv[0].as_int = 0; + mi->interp_filter = BILINEAR; + + { + const MV dummy_mv = { 0, 0 }; + y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col, + &dummy_mv); + x->sb_use_mv_part = 1; + x->sb_mvcol_part = mi->mv[0].as_mv.col; + x->sb_mvrow_part = mi->mv[0].as_mv.row; + } + + // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad + // are close if short_circuit_low_temp_var is on. + y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad; + if (y_sad_g < y_sad_thr) { + vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, + &cm->frame_refs[GOLDEN_FRAME - 1].sf); + mi->ref_frame[0] = GOLDEN_FRAME; + mi->mv[0].as_int = 0; + } else { + x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; + } + + set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); + xd->plane[0].dst.buf = x->est_pred; + xd->plane[0].dst.stride = 64; + vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); + } else { +#if CONFIG_VP9_HIGHBITDEPTH + switch (xd->bd) { + case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break; + case 10: + memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0])); + break; + case 12: + memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0])); + break; + } +#else + memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); +#endif // CONFIG_VP9_HIGHBITDEPTH + } +} + static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, int mi_row, TOKENEXTRA **tp) { @@ -4350,6 +5451,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type; BLOCK_SIZE bsize = BLOCK_64X64; int seg_skip = 0; + int i; (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, sb_col_in_tile); @@ -4359,7 +5461,10 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, } x->source_variance = UINT_MAX; - vp9_zero(x->pred_mv); + for (i = 0; i < MAX_REF_FRAMES; ++i) { + x->pred_mv[i].row = INT16_MAX; + x->pred_mv[i].col = INT16_MAX; + } vp9_rd_cost_init(&dummy_rdc); x->color_sensitivity[0] = 0; x->color_sensitivity[1] = 0; @@ -4367,6 +5472,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, x->skip_low_source_sad = 0; x->lowvar_highsumdiff = 0; x->content_state_sb = 0; + x->zero_temp_sad_source = 0; x->sb_use_mv_part = 0; x->sb_mvcol_part = 0; x->sb_mvrow_part = 0; @@ -4406,6 +5512,15 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rdc, td->pc_root); break; + case ML_BASED_PARTITION: + get_estimated_pred(cpi, tile_info, x, mi_row, mi_col); + x->max_partition_size = BLOCK_64X64; + x->min_partition_size = BLOCK_8X8; + x->sb_pickmode_part = 1; + nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, + BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, + td->pc_root); + break; case SOURCE_VAR_BASED_PARTITION: set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col); nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, @@ -4417,14 +5532,15 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rdc, td->pc_root); break; - case REFERENCE_PARTITION: + default: + assert(partition_search_type == REFERENCE_PARTITION); x->sb_pickmode_part = 1; set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); // Use nonrd_pick_partition on scene-cut for VBR mode. // nonrd_pick_partition does not support 4x4 partition, so avoid it // on key frame for now. if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad && - cpi->oxcf.speed < 6 && cm->frame_type != KEY_FRAME && + cpi->oxcf.speed < 6 && !frame_is_intra_only(cm) && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { // Use lower max_partition_size for low resoultions. if (cm->width <= 352 && cm->height <= 288) @@ -4440,7 +5556,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, // TODO(marpan): Seems like nonrd_select_partition does not support // 4x4 partition. Since 4x4 is used on key frame, use this switch // for now. - if (cm->frame_type == KEY_FRAME) + if (frame_is_intra_only(cm)) nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rdc, td->pc_root); else @@ -4449,7 +5565,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, } break; - default: assert(0); break; } // Update ref_frame usage for inter frame if this group is ARF group. @@ -4516,16 +5631,12 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) { &var16->sse, &var16->sum); var16->var = variance_highbd(var16); break; - case VPX_BITS_12: + default: + assert(cm->bit_depth == VPX_BITS_12); vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, &var16->sum); var16->var = variance_highbd(var16); break; - default: - assert(0 && - "cm->bit_depth should be VPX_BITS_8, VPX_BITS_10" - " or VPX_BITS_12"); - return -1; } } else { vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, @@ -4620,8 +5731,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) { if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) { if (cpi->tile_data != NULL) vpx_free(cpi->tile_data); - CHECK_MEM_ERROR(cm, cpi->tile_data, vpx_malloc(tile_cols * tile_rows * - sizeof(*cpi->tile_data))); + CHECK_MEM_ERROR( + cm, cpi->tile_data, + vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data))); cpi->allocated_tiles = tile_cols * tile_rows; for (tile_row = 0; tile_row < tile_rows; ++tile_row) @@ -4632,6 +5744,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) { for (i = 0; i < BLOCK_SIZES; ++i) { for (j = 0; j < MAX_MODES; ++j) { tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT; +#if CONFIG_CONSISTENT_RECODE + tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT; +#endif tile_data->mode_map[i][j] = j; } } @@ -4645,6 +5760,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) { for (tile_col = 0; tile_col < tile_cols; ++tile_col) { TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; TileInfo *tile_info = &this_tile->tile_info; + if (cpi->sf.adaptive_rd_thresh_row_mt && + this_tile->row_base_thresh_freq_fact == NULL) + vp9_row_mt_alloc_rd_thresh(cpi, this_tile); vp9_tile_init(tile_info, cm, tile_row, tile_col); cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok; @@ -4675,8 +5793,10 @@ void vp9_encode_sb_row(VP9_COMP *cpi, ThreadData *td, int tile_row, if (cpi->sf.use_nonrd_pick_mode) encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok); +#if !CONFIG_REALTIME_ONLY else encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); +#endif cpi->tplist[tile_row][tile_col][tile_sb_row].stop = tok; cpi->tplist[tile_row][tile_col][tile_sb_row].count = @@ -4729,16 +5849,117 @@ static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats, } #endif +static int compare_kmeans_data(const void *a, const void *b) { + if (((const KMEANS_DATA *)a)->value > ((const KMEANS_DATA *)b)->value) { + return 1; + } else if (((const KMEANS_DATA *)a)->value < + ((const KMEANS_DATA *)b)->value) { + return -1; + } else { + return 0; + } +} + +static void compute_boundary_ls(const double *ctr_ls, int k, + double *boundary_ls) { + // boundary_ls[j] is the upper bound of data centered at ctr_ls[j] + int j; + for (j = 0; j < k - 1; ++j) { + boundary_ls[j] = (ctr_ls[j] + ctr_ls[j + 1]) / 2.; + } + boundary_ls[k - 1] = DBL_MAX; +} + +int vp9_get_group_idx(double value, double *boundary_ls, int k) { + int group_idx = 0; + while (value >= boundary_ls[group_idx]) { + ++group_idx; + if (group_idx == k - 1) { + break; + } + } + return group_idx; +} + +void vp9_kmeans(double *ctr_ls, double *boundary_ls, int *count_ls, int k, + KMEANS_DATA *arr, int size) { + int i, j; + int itr; + int group_idx; + double sum[MAX_KMEANS_GROUPS]; + int count[MAX_KMEANS_GROUPS]; + + vpx_clear_system_state(); + + assert(k >= 2 && k <= MAX_KMEANS_GROUPS); + + qsort(arr, size, sizeof(*arr), compare_kmeans_data); + + // initialize the center points + for (j = 0; j < k; ++j) { + ctr_ls[j] = arr[(size * (2 * j + 1)) / (2 * k)].value; + } + + for (itr = 0; itr < 10; ++itr) { + compute_boundary_ls(ctr_ls, k, boundary_ls); + for (i = 0; i < MAX_KMEANS_GROUPS; ++i) { + sum[i] = 0; + count[i] = 0; + } + + // Both the data and centers are sorted in ascending order. + // As each data point is processed in order, its corresponding group index + // can only increase. So we only need to reset the group index to zero here. + group_idx = 0; + for (i = 0; i < size; ++i) { + while (arr[i].value >= boundary_ls[group_idx]) { + // place samples into clusters + ++group_idx; + if (group_idx == k - 1) { + break; + } + } + sum[group_idx] += arr[i].value; + ++count[group_idx]; + } + + for (group_idx = 0; group_idx < k; ++group_idx) { + if (count[group_idx] > 0) + ctr_ls[group_idx] = sum[group_idx] / count[group_idx]; + + sum[group_idx] = 0; + count[group_idx] = 0; + } + } + + // compute group_idx, boundary_ls and count_ls + for (j = 0; j < k; ++j) { + count_ls[j] = 0; + } + compute_boundary_ls(ctr_ls, k, boundary_ls); + group_idx = 0; + for (i = 0; i < size; ++i) { + while (arr[i].value >= boundary_ls[group_idx]) { + ++group_idx; + if (group_idx == k - 1) { + break; + } + } + arr[i].group_idx = group_idx; + ++count_ls[group_idx]; + } +} + static void encode_frame_internal(VP9_COMP *cpi) { SPEED_FEATURES *const sf = &cpi->sf; ThreadData *const td = &cpi->td; MACROBLOCK *const x = &td->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; + const int gf_group_index = cpi->twopass.gf_group.index; xd->mi = cm->mi_grid_visible; xd->mi[0] = cm->mi; - vp9_zero(*td->counts); vp9_zero(cpi->td.rd_counts); @@ -4756,8 +5977,12 @@ static void encode_frame_internal(VP9_COMP *cpi) { x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4; #endif // CONFIG_VP9_HIGHBITDEPTH x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; - +#if CONFIG_CONSISTENT_RECODE + x->optimize = sf->optimize_coefficients == 1 && cpi->oxcf.pass != 1; +#endif if (xd->lossless) x->optimize = 0; + x->sharpness = cpi->oxcf.sharpness; + x->adjust_rdmult_by_segment = (cpi->oxcf.aq_mode == VARIANCE_AQ); cm->tx_mode = select_tx_mode(cpi, xd); @@ -4799,8 +6024,33 @@ static void encode_frame_internal(VP9_COMP *cpi) { if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION) source_var_based_partition_search_method(cpi); + } else if (gf_group_index && gf_group_index < MAX_ARF_GOP_SIZE && + cpi->sf.enable_tpl_model) { + TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index]; + TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; + + int tpl_stride = tpl_frame->stride; + int64_t intra_cost_base = 0; + int64_t mc_dep_cost_base = 0; + int row, col; + + for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) { + for (col = 0; col < cm->mi_cols; ++col) { + TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; + intra_cost_base += this_stats->intra_cost; + mc_dep_cost_base += this_stats->mc_dep_cost; + } + } + + vpx_clear_system_state(); + + if (tpl_frame->is_valid) + cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base; } + // Frame segmentation + if (cpi->oxcf.aq_mode == PERCEPTUAL_AQ) build_kmeans_segmentation(cpi); + { struct vpx_usec_timer emr_timer; vpx_usec_timer_start(&emr_timer); @@ -4881,9 +6131,52 @@ static int compute_frame_aq_offset(struct VP9_COMP *cpi) { return sum_delta / (cm->mi_rows * cm->mi_cols); } +#if CONFIG_CONSISTENT_RECODE +static void restore_encode_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + int tile_col, tile_row; + int i, j; + RD_OPT *rd_opt = &cpi->rd; + for (i = 0; i < MAX_REF_FRAMES; i++) { + for (j = 0; j < REFERENCE_MODES; j++) + rd_opt->prediction_type_threshes[i][j] = + rd_opt->prediction_type_threshes_prev[i][j]; + + for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++) + rd_opt->filter_threshes[i][j] = rd_opt->filter_threshes_prev[i][j]; + } + + if (cpi->tile_data != NULL) { + for (tile_row = 0; tile_row < tile_rows; ++tile_row) + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + TileDataEnc *tile_data = + &cpi->tile_data[tile_row * tile_cols + tile_col]; + for (i = 0; i < BLOCK_SIZES; ++i) { + for (j = 0; j < MAX_MODES; ++j) { + tile_data->thresh_freq_fact[i][j] = + tile_data->thresh_freq_fact_prev[i][j]; + } + } + } + } + + cm->interp_filter = cpi->sf.default_interp_filter; +} +#endif + void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; +#if CONFIG_CONSISTENT_RECODE + restore_encode_params(cpi); +#endif + +#if CONFIG_MISMATCH_DEBUG + mismatch_reset_frame(MAX_MB_PLANE); +#endif + // In the longer term the encoder should be generalized to match the // decoder such that we allow compound where one of the 3 buffers has a // different sign bias and that buffer is then the fixed ref. However, this @@ -4891,16 +6184,11 @@ void vp9_encode_frame(VP9_COMP *cpi) { // side behavior is where the ALT ref buffer has opposite sign bias to // the other two. if (!frame_is_intra_only(cm)) { - if ((cm->ref_frame_sign_bias[ALTREF_FRAME] == - cm->ref_frame_sign_bias[GOLDEN_FRAME]) || - (cm->ref_frame_sign_bias[ALTREF_FRAME] == - cm->ref_frame_sign_bias[LAST_FRAME])) { - cpi->allow_comp_inter_inter = 0; - } else { + if (vp9_compound_reference_allowed(cm)) { cpi->allow_comp_inter_inter = 1; - cm->comp_fixed_ref = ALTREF_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = GOLDEN_FRAME; + vp9_setup_compound_reference_mode(cm); + } else { + cpi->allow_comp_inter_inter = 0; } } @@ -5064,7 +6352,8 @@ static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi, for (y = 0; y < ymis; y++) for (x = 0; x < xmis; x++) { int map_offset = block_index + y * cm->mi_cols + x; - if (is_inter_block(mi) && mi->segment_id <= CR_SEGMENT_ID_BOOST2) { + if (mi->ref_frame[0] == LAST_FRAME && is_inter_block(mi) && + mi->segment_id <= CR_SEGMENT_ID_BOOST2) { if (abs(mv.row) < 8 && abs(mv.col) < 8) { if (cpi->consec_zero_mv[map_offset] < 255) cpi->consec_zero_mv[map_offset]++; @@ -5131,7 +6420,27 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, VPXMAX(bsize, BLOCK_8X8)); - vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8)); +#if CONFIG_MISMATCH_DEBUG + if (output_enabled) { + int plane; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *pd = &xd->plane[plane]; + int pixel_c, pixel_r; + const BLOCK_SIZE plane_bsize = + get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), &xd->plane[plane]); + const int bw = get_block_width(plane_bsize); + const int bh = get_block_height(plane_bsize); + mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, + pd->subsampling_x, pd->subsampling_y); + + mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, plane, pixel_c, + pixel_r, bw, bh, + xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); + } + } +#endif + + vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8), mi_row, mi_col, output_enabled); vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip, VPXMAX(bsize, BLOCK_8X8)); } @@ -5159,7 +6468,11 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])]; if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize); - if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0) + if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 && + (!cpi->use_svc || + (cpi->use_svc && + !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && + cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1))) update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize); } } diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_encodeframe.h b/media/libvpx/libvpx/vp9/encoder/vp9_encodeframe.h index cf5ae3d8ac7a..fd0a9c517ef0 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_encodeframe.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_encodeframe.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_ENCODEFRAME_H_ -#define VP9_ENCODER_VP9_ENCODEFRAME_H_ +#ifndef VPX_VP9_ENCODER_VP9_ENCODEFRAME_H_ +#define VPX_VP9_ENCODER_VP9_ENCODEFRAME_H_ #include "vpx/vpx_integer.h" @@ -45,8 +45,13 @@ void vp9_encode_sb_row(struct VP9_COMP *cpi, struct ThreadData *td, void vp9_set_variance_partition_thresholds(struct VP9_COMP *cpi, int q, int content_state); +struct KMEANS_DATA; +void vp9_kmeans(double *ctr_ls, double *boundary_ls, int *count_ls, int k, + struct KMEANS_DATA *arr, int size); +int vp9_get_group_idx(double value, double *boundary_ls, int k); + #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_ENCODER_VP9_ENCODEFRAME_H_ +#endif // VPX_VP9_ENCODER_VP9_ENCODEFRAME_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_encodemb.c b/media/libvpx/libvpx/vp9/encoder/vp9_encodemb.c index f3c17f2559f4..7630a811038f 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_encodemb.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_encodemb.c @@ -16,6 +16,10 @@ #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" +#if CONFIG_MISMATCH_DEBUG +#include "vpx_util/vpx_debug_util.h" +#endif + #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" @@ -50,12 +54,13 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { } static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = { - { 10, 6 }, { 8, 5 }, + { 10, 6 }, + { 8, 5 }, }; // 'num' can be negative, but 'shift' must be non-negative. #define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \ - ((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift)) + (((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift))) int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, int ctx) { @@ -76,13 +81,19 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, const scan_order *const so = get_scan(xd, tx_size, plane_type, block); const int16_t *const scan = so->scan; const int16_t *const nb = so->neighbors; + const MODE_INFO *mbmi = xd->mi[0]; + const int sharpness = mb->sharpness; + const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type]; const int64_t rdmult = - ((int64_t)mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1; + (sharpness == 0 ? rdadj >> 1 + : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4); + const int64_t rddiv = mb->rddiv; int64_t rd_cost0, rd_cost1; int64_t rate0, rate1; int16_t t0, t1; int i, final_eob; + int count_high_values_after_eob = 0; #if CONFIG_VP9_HIGHBITDEPTH const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); #else @@ -200,9 +211,9 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, const int band_next = band_translate[i + 1]; const int token_next = (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN; - unsigned int( - *const token_costs_next)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = - token_costs + band_next; + unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS] + [ENTROPY_TOKENS] = + token_costs + band_next; token_cache[rc] = vp9_pt_energy_class[t0]; ctx_next = get_coef_context(nb, token_cache, i + 1); token_tree_sel_next = (x == 0); @@ -262,6 +273,7 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, assert(distortion0 <= distortion_for_zero); token_cache[rc] = vp9_pt_energy_class[t0]; } + if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++; assert(accu_error >= 0); x_prev = qcoeff[rc]; // Update based on selected quantized value. @@ -272,6 +284,7 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, if (best_eob_cost_cur < best_block_rd_cost) { best_block_rd_cost = best_eob_cost_cur; final_eob = i + 1; + count_high_values_after_eob = 0; if (use_x1) { before_best_eob_qc = x1; before_best_eob_dqc = dqc1; @@ -283,19 +296,31 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, } } } - assert(final_eob <= eob); - if (final_eob > 0) { - int rc; - assert(before_best_eob_qc != 0); - i = final_eob - 1; - rc = scan[i]; - qcoeff[rc] = before_best_eob_qc; - dqcoeff[rc] = before_best_eob_dqc; - } - for (i = final_eob; i < eob; i++) { - int rc = scan[i]; - qcoeff[rc] = 0; - dqcoeff[rc] = 0; + if (count_high_values_after_eob > 0) { + final_eob = eob - 1; + for (; final_eob >= 0; final_eob--) { + const int rc = scan[final_eob]; + const int x = qcoeff[rc]; + if (x) { + break; + } + } + final_eob++; + } else { + assert(final_eob <= eob); + if (final_eob > 0) { + int rc; + assert(before_best_eob_qc != 0); + i = final_eob - 1; + rc = scan[i]; + qcoeff[rc] = before_best_eob_qc; + dqcoeff[rc] = before_best_eob_dqc; + } + for (i = final_eob; i < eob; i++) { + int rc = scan[i]; + qcoeff[rc] = 0; + dqcoeff[rc] = 0; + } } mb->plane[plane].eobs[block] = final_eob; return final_eob; @@ -357,13 +382,13 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - default: assert(0); } return; } @@ -383,17 +408,19 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, scan_order->iscan); break; case TX_8X8: - vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64, x->skip_block, - p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, - eob, scan_order->scan, scan_order->iscan); + vpx_fdct8x8(src_diff, coeff, diff_stride); + vp9_quantize_fp(coeff, 64, x->skip_block, p->round_fp, p->quant_fp, + qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, + scan_order->iscan); + break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - default: assert(0); break; } } @@ -433,13 +460,13 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; - default: assert(0); } return; } @@ -461,12 +488,12 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col, vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; - default: assert(0); break; } } @@ -510,14 +537,14 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - default: assert(0); } return; } @@ -543,19 +570,24 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - default: assert(0); break; } } static void encode_block(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct encode_b_args *const args = arg; +#if CONFIG_MISMATCH_DEBUG + int mi_row = args->mi_row; + int mi_col = args->mi_col; + int output_enabled = args->output_enabled; +#endif MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = &x->plane[plane]; @@ -572,7 +604,11 @@ static void encode_block(int plane, int block, int row, int col, if (x->zcoeff_blk[tx_size][block] && plane == 0) { p->eobs[block] = 0; *a = *l = 0; +#if CONFIG_MISMATCH_DEBUG + goto encode_block_end; +#else return; +#endif } if (!x->skip_recode) { @@ -582,7 +618,11 @@ static void encode_block(int plane, int block, int row, int col, // skip forward transform p->eobs[block] = 0; *a = *l = 0; +#if CONFIG_MISMATCH_DEBUG + goto encode_block_end; +#else return; +#endif } else { vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size); } @@ -599,7 +639,11 @@ static void encode_block(int plane, int block, int row, int col, // skip forward transform p->eobs[block] = 0; *a = *l = 0; +#if CONFIG_MISMATCH_DEBUG + goto encode_block_end; +#else return; +#endif } } else { vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); @@ -616,7 +660,13 @@ static void encode_block(int plane, int block, int row, int col, if (p->eobs[block]) *(args->skip) = 0; - if (x->skip_encode || p->eobs[block] == 0) return; + if (x->skip_encode || p->eobs[block] == 0) { +#if CONFIG_MISMATCH_DEBUG + goto encode_block_end; +#else + return; +#endif + } #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); @@ -633,16 +683,20 @@ static void encode_block(int plane, int block, int row, int col, vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], xd->bd); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], xd->bd); break; - default: assert(0 && "Invalid transform size"); } +#if CONFIG_MISMATCH_DEBUG + goto encode_block_end; +#else return; +#endif } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -656,14 +710,27 @@ static void encode_block(int plane, int block, int row, int col, case TX_8X8: vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; - default: assert(0 && "Invalid transform size"); break; } +#if CONFIG_MISMATCH_DEBUG +encode_block_end: + if (output_enabled) { + int pixel_c, pixel_r; + int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2); + int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2); + mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row, + pd->subsampling_x, pd->subsampling_y); + mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r, + blk_w, blk_h, + xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); + } +#endif } static void encode_block_pass1(int plane, int block, int row, int col, @@ -697,12 +764,21 @@ void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { encode_block_pass1, x); } -void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { +void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, + int output_enabled) { MACROBLOCKD *const xd = &x->e_mbd; struct optimize_ctx ctx; MODE_INFO *mi = xd->mi[0]; - struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip }; int plane; +#if CONFIG_MISMATCH_DEBUG + struct encode_b_args arg = { x, 1, NULL, NULL, + &mi->skip, mi_row, mi_col, output_enabled }; +#else + struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip }; + (void)mi_row; + (void)mi_col; + (void)output_enabled; +#endif mi->skip = 1; @@ -847,7 +923,8 @@ void vp9_encode_block_intra(int plane, int block, int row, int col, xd->bd); } break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); if (!x->skip_recode) { vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst, dst_stride, xd->bd); @@ -875,7 +952,6 @@ void vp9_encode_block_intra(int plane, int block, int row, int col, } } break; - default: assert(0); return; } if (*eob) *(args->skip) = 0; return; @@ -929,7 +1005,8 @@ void vp9_encode_block_intra(int plane, int block, int row, int col, if (!x->skip_encode && *eob) vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); if (!x->skip_recode) { vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst, dst_stride); @@ -954,7 +1031,6 @@ void vp9_encode_block_intra(int plane, int block, int row, int col, vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type); } break; - default: assert(0); break; } if (*eob) *(args->skip) = 0; } @@ -963,8 +1039,16 @@ void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane, int enable_optimize_b) { const MACROBLOCKD *const xd = &x->e_mbd; struct optimize_ctx ctx; +#if CONFIG_MISMATCH_DEBUG + // TODO(angiebird): make mismatch_debug support intra mode + struct encode_b_args arg = { + x, enable_optimize_b, ctx.ta[plane], ctx.tl[plane], &xd->mi[0]->skip, 0, 0, + 0 + }; +#else struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane], ctx.tl[plane], &xd->mi[0]->skip }; +#endif if (enable_optimize_b && x->optimize && (!x->skip_recode || !x->skip_optimize)) { diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_encodemb.h b/media/libvpx/libvpx/vp9/encoder/vp9_encodemb.h index cf943bedfdf0..1975ee73acd4 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_encodemb.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_encodemb.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_ENCODEMB_H_ -#define VP9_ENCODER_VP9_ENCODEMB_H_ +#ifndef VPX_VP9_ENCODER_VP9_ENCODEMB_H_ +#define VPX_VP9_ENCODER_VP9_ENCODEMB_H_ #include "./vpx_config.h" #include "vp9/encoder/vp9_block.h" @@ -24,10 +24,16 @@ struct encode_b_args { ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; int8_t *skip; +#if CONFIG_MISMATCH_DEBUG + int mi_row; + int mi_col; + int output_enabled; +#endif }; int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, int ctx); -void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize); +void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, + int output_enabled); void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize); void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size); @@ -48,4 +54,4 @@ void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane, } // extern "C" #endif -#endif // VP9_ENCODER_VP9_ENCODEMB_H_ +#endif // VPX_VP9_ENCODER_VP9_ENCODEMB_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_encodemv.h b/media/libvpx/libvpx/vp9/encoder/vp9_encodemv.h index 9fc7ab8dc45b..2f1be4b233f5 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_encodemv.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_encodemv.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_ENCODEMV_H_ -#define VP9_ENCODER_VP9_ENCODEMV_H_ +#ifndef VPX_VP9_ENCODER_VP9_ENCODEMV_H_ +#define VPX_VP9_ENCODER_VP9_ENCODEMV_H_ #include "vp9/encoder/vp9_encoder.h" @@ -27,7 +27,7 @@ void vp9_encode_mv(VP9_COMP *cpi, vpx_writer *w, const MV *mv, const MV *ref, unsigned int *const max_mv_magnitude); void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2], - const nmv_context *mvctx, int usehp); + const nmv_context *ctx, int usehp); void vp9_update_mv_count(ThreadData *td); @@ -35,4 +35,4 @@ void vp9_update_mv_count(ThreadData *td); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_ENCODEMV_H_ +#endif // VPX_VP9_ENCODER_VP9_ENCODEMV_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_encoder.c b/media/libvpx/libvpx/vp9/encoder/vp9_encoder.c index 2ae59dd9812c..4a37816e2070 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_encoder.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_encoder.c @@ -8,9 +8,10 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include #include -#include +#include #include "./vp9_rtcd.h" #include "./vpx_config.h" @@ -25,31 +26,49 @@ #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" #include "vpx_ports/vpx_timer.h" +#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG +#include "vpx_util/vpx_debug_util.h" +#endif // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_filter.h" #include "vp9/common/vp9_idct.h" +#if CONFIG_NON_GREEDY_MV +#include "vp9/common/vp9_mvref_common.h" +#endif #if CONFIG_VP9_POSTPROC #include "vp9/common/vp9_postproc.h" #endif #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_tile_common.h" +#include "vp9/common/vp9_scan.h" +#if !CONFIG_REALTIME_ONLY #include "vp9/encoder/vp9_alt_ref_aq.h" #include "vp9/encoder/vp9_aq_360.h" #include "vp9/encoder/vp9_aq_complexity.h" +#endif #include "vp9/encoder/vp9_aq_cyclicrefresh.h" +#if !CONFIG_REALTIME_ONLY #include "vp9/encoder/vp9_aq_variance.h" +#endif #include "vp9/encoder/vp9_bitstream.h" +#if CONFIG_INTERNAL_STATS +#include "vp9/encoder/vp9_blockiness.h" +#endif #include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_encodeframe.h" +#include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_encoder.h" -#include "vp9/encoder/vp9_extend.h" #include "vp9/encoder/vp9_ethread.h" +#include "vp9/encoder/vp9_extend.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_mbgraph.h" +#if CONFIG_NON_GREEDY_MV +#include "vp9/encoder/vp9_mcomp.h" +#endif #include "vp9/encoder/vp9_multi_thread.h" #include "vp9/encoder/vp9_noise_estimate.h" #include "vp9/encoder/vp9_picklpf.h" @@ -61,16 +80,17 @@ #include "vp9/encoder/vp9_speed_features.h" #include "vp9/encoder/vp9_svc_layercontext.h" #include "vp9/encoder/vp9_temporal_filter.h" +#include "vp9/vp9_cx_iface.h" #define AM_SEGMENT_ID_INACTIVE 7 #define AM_SEGMENT_ID_ACTIVE 0 -#define ALTREF_HIGH_PRECISION_MV 1 // Whether to use high precision mv - // for altref computation. -#define HIGH_PRECISION_MV_QTHRESH 200 // Q threshold for high precision - // mv. Choose a very high value for - // now so that HIGH_PRECISION is always - // chosen. +// Whether to use high precision mv for altref computation. +#define ALTREF_HIGH_PRECISION_MV 1 + +// Q threshold for high precision mv. Choose a very high value for now so that +// HIGH_PRECISION is always chosen. +#define HIGH_PRECISION_MV_QTHRESH 200 #define FRAME_SIZE_FACTOR 128 // empirical params for context model threshold #define FRAME_RATE_FACTOR 8 @@ -84,6 +104,9 @@ static FILE *yuv_skinmap_file = NULL; #ifdef OUTPUT_YUV_REC FILE *yuv_rec_file; #endif +#ifdef OUTPUT_YUV_SVC_SRC +FILE *yuv_svc_src[3] = { NULL, NULL, NULL }; +#endif #if 0 FILE *framepsnr; @@ -102,6 +125,14 @@ static int is_spatial_denoise_enabled(VP9_COMP *cpi) { } #endif +#if CONFIG_VP9_HIGHBITDEPTH +void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, + TX_SIZE tx_size); +#endif +void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, + TX_SIZE tx_size); + +#if !CONFIG_REALTIME_ONLY // compute adaptive threshold for skip recoding static int compute_context_model_thresh(const VP9_COMP *const cpi) { const VP9_COMMON *const cm = &cpi->common; @@ -426,10 +457,11 @@ static int compute_context_model_diff(const VP9_COMMON *const cm) { return -diff; } +#endif // !CONFIG_REALTIME_ONLY // Test for whether to calculate metrics for the frame. -static int is_psnr_calc_enabled(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; +static int is_psnr_calc_enabled(const VP9_COMP *cpi) { + const VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; return cpi->b_calculate_psnr && (oxcf->pass != 1) && cm->show_frame; @@ -483,15 +515,11 @@ static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) { *hr = 3; *hs = 5; break; - case ONETWO: + default: + assert(mode == ONETWO); *hr = 1; *hs = 2; break; - default: - *hr = 1; - *hs = 1; - assert(0); - break; } } @@ -547,6 +575,74 @@ static void apply_active_map(VP9_COMP *cpi) { } } +static void apply_roi_map(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + struct segmentation *const seg = &cm->seg; + vpx_roi_map_t *roi = &cpi->roi; + const int *delta_q = roi->delta_q; + const int *delta_lf = roi->delta_lf; + const int *skip = roi->skip; + int ref_frame[8]; + int internal_delta_q[MAX_SEGMENTS]; + int i; + static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, + VP9_ALT_FLAG }; + + // TODO(jianj): Investigate why ROI not working in speed < 5 or in non + // realtime mode. + if (cpi->oxcf.mode != REALTIME || cpi->oxcf.speed < 5) return; + if (!roi->enabled) return; + + memcpy(&ref_frame, roi->ref_frame, sizeof(ref_frame)); + + vp9_enable_segmentation(seg); + vp9_clearall_segfeatures(seg); + // Select delta coding method; + seg->abs_delta = SEGMENT_DELTADATA; + + memcpy(cpi->segmentation_map, roi->roi_map, (cm->mi_rows * cm->mi_cols)); + + for (i = 0; i < MAX_SEGMENTS; ++i) { + // Translate the external delta q values to internal values. + internal_delta_q[i] = vp9_quantizer_to_qindex(abs(delta_q[i])); + if (delta_q[i] < 0) internal_delta_q[i] = -internal_delta_q[i]; + vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q); + vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF); + if (internal_delta_q[i] != 0) { + vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); + vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, internal_delta_q[i]); + } + if (delta_lf[i] != 0) { + vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF); + vp9_set_segdata(seg, i, SEG_LVL_ALT_LF, delta_lf[i]); + } + if (skip[i] != 0) { + vp9_enable_segfeature(seg, i, SEG_LVL_SKIP); + vp9_set_segdata(seg, i, SEG_LVL_SKIP, skip[i]); + } + if (ref_frame[i] >= 0) { + int valid_ref = 1; + // ALTREF is not used as reference for nonrd_pickmode with 0 lag. + if (ref_frame[i] == ALTREF_FRAME && cpi->sf.use_nonrd_pick_mode) + valid_ref = 0; + // If GOLDEN is selected, make sure it's set as reference. + if (ref_frame[i] == GOLDEN_FRAME && + !(cpi->ref_frame_flags & flag_list[ref_frame[i]])) { + valid_ref = 0; + } + // GOLDEN was updated in previous encoded frame, so GOLDEN and LAST are + // same reference. + if (ref_frame[i] == GOLDEN_FRAME && cpi->rc.frames_since_golden == 0) + ref_frame[i] = LAST_FRAME; + if (valid_ref) { + vp9_enable_segfeature(seg, i, SEG_LVL_REF_FRAME); + vp9_set_segdata(seg, i, SEG_LVL_REF_FRAME, ref_frame[i]); + } + } + } + roi->enabled = 1; +} + static void init_level_info(Vp9LevelInfo *level_info) { Vp9LevelStats *const level_stats = &level_info->level_stats; Vp9LevelSpec *const level_spec = &level_info->level_spec; @@ -557,6 +653,13 @@ static void init_level_info(Vp9LevelInfo *level_info) { level_spec->min_altref_distance = INT_MAX; } +static int check_seg_range(int seg_data[8], int range) { + return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range || + abs(seg_data[2]) > range || abs(seg_data[3]) > range || + abs(seg_data[4]) > range || abs(seg_data[5]) > range || + abs(seg_data[6]) > range || abs(seg_data[7]) > range); +} + VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) { int i; const Vp9LevelSpec *this_level; @@ -583,6 +686,61 @@ VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) { return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level; } +int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows, + unsigned int cols, int delta_q[8], int delta_lf[8], + int skip[8], int ref_frame[8]) { + VP9_COMMON *cm = &cpi->common; + vpx_roi_map_t *roi = &cpi->roi; + const int range = 63; + const int ref_frame_range = 3; // Alt-ref + const int skip_range = 1; + const int frame_rows = cpi->common.mi_rows; + const int frame_cols = cpi->common.mi_cols; + + // Check number of rows and columns match + if (frame_rows != (int)rows || frame_cols != (int)cols) { + return -1; + } + + if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) || + !check_seg_range(ref_frame, ref_frame_range) || + !check_seg_range(skip, skip_range)) + return -1; + + // Also disable segmentation if no deltas are specified. + if (!map || + (!(delta_q[0] | delta_q[1] | delta_q[2] | delta_q[3] | delta_q[4] | + delta_q[5] | delta_q[6] | delta_q[7] | delta_lf[0] | delta_lf[1] | + delta_lf[2] | delta_lf[3] | delta_lf[4] | delta_lf[5] | delta_lf[6] | + delta_lf[7] | skip[0] | skip[1] | skip[2] | skip[3] | skip[4] | + skip[5] | skip[6] | skip[7]) && + (ref_frame[0] == -1 && ref_frame[1] == -1 && ref_frame[2] == -1 && + ref_frame[3] == -1 && ref_frame[4] == -1 && ref_frame[5] == -1 && + ref_frame[6] == -1 && ref_frame[7] == -1))) { + vp9_disable_segmentation(&cm->seg); + cpi->roi.enabled = 0; + return 0; + } + + if (roi->roi_map) { + vpx_free(roi->roi_map); + roi->roi_map = NULL; + } + CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols)); + + // Copy to ROI sturcture in the compressor. + memcpy(roi->roi_map, map, rows * cols); + memcpy(&roi->delta_q, delta_q, MAX_SEGMENTS * sizeof(delta_q[0])); + memcpy(&roi->delta_lf, delta_lf, MAX_SEGMENTS * sizeof(delta_lf[0])); + memcpy(&roi->skip, skip, MAX_SEGMENTS * sizeof(skip[0])); + memcpy(&roi->ref_frame, ref_frame, MAX_SEGMENTS * sizeof(ref_frame[0])); + roi->enabled = 1; + roi->rows = rows; + roi->cols = cols; + + return 0; +} + int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows, int cols) { if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) { @@ -660,8 +818,37 @@ static void setup_frame(VP9_COMP *cpi) { if (!cpi->use_svc) cm->frame_context_idx = cpi->refresh_alt_ref_frame; } + // TODO(jingning): Overwrite the frame_context_idx index in multi-layer ARF + // case. Need some further investigation on if we could apply this to single + // layer ARF case as well. + if (cpi->multi_layer_arf && !cpi->use_svc) { + GF_GROUP *const gf_group = &cpi->twopass.gf_group; + const int gf_group_index = gf_group->index; + const int boost_frame = + !cpi->rc.is_src_frame_alt_ref && + (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame); + + // frame_context_idx Frame Type + // 0 Intra only frame, base layer ARF + // 1 ARFs with layer depth = 2,3 + // 2 ARFs with layer depth > 3 + // 3 Non-boosted frames + if (frame_is_intra_only(cm)) { + cm->frame_context_idx = 0; + } else if (boost_frame) { + if (gf_group->rf_level[gf_group_index] == GF_ARF_STD) + cm->frame_context_idx = 0; + else if (gf_group->layer_depth[gf_group_index] <= 3) + cm->frame_context_idx = 1; + else + cm->frame_context_idx = 2; + } else { + cm->frame_context_idx = 3; + } + } + if (cm->frame_type == KEY_FRAME) { - if (!is_two_pass_svc(cpi)) cpi->refresh_golden_frame = 1; + cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 1; vp9_zero(cpi->interp_filter_selected); } else { @@ -713,12 +900,17 @@ static void vp9_enc_free_mi(VP9_COMMON *cm) { cm->mi_grid_base = NULL; vpx_free(cm->prev_mi_grid_base); cm->prev_mi_grid_base = NULL; + cm->mi_alloc_size = 0; } static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) { // Current mip will be the prev_mip for the next frame. MODE_INFO **temp_base = cm->prev_mi_grid_base; MODE_INFO *temp = cm->prev_mip; + + // Skip update prev_mi frame in show_existing_frame mode. + if (cm->show_existing_frame) return; + cm->prev_mip = cm->mip; cm->mip = temp; @@ -817,9 +1009,18 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->active_map.map); cpi->active_map.map = NULL; + vpx_free(cpi->roi.roi_map); + cpi->roi.roi_map = NULL; + vpx_free(cpi->consec_zero_mv); cpi->consec_zero_mv = NULL; + vpx_free(cpi->mb_wiener_variance); + cpi->mb_wiener_variance = NULL; + + vpx_free(cpi->mi_ssim_rdmult_scaling_factors); + cpi->mi_ssim_rdmult_scaling_factors = NULL; + vp9_free_ref_frame_buffers(cm->buffer_pool); #if CONFIG_VP9_POSTPROC vp9_free_postproc_buffers(cm); @@ -1121,8 +1322,9 @@ static void alloc_util_frame_buffers(VP9_COMP *cpi) { // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a - // target of 1/4x1/4. - if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc) { + // target of 1/4x1/4. number_spatial_layers must be greater than 2. + if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc && + cpi->svc.number_spatial_layers > 2) { cpi->svc.scaled_temp_is_alloc = 1; if (vpx_realloc_frame_buffer( &cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1, @@ -1213,15 +1415,9 @@ static void set_tile_limits(VP9_COMP *cpi) { int min_log2_tile_cols, max_log2_tile_cols; vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); - if (is_two_pass_svc(cpi) && (cpi->svc.encode_empty_frame_state == ENCODING || - cpi->svc.number_spatial_layers > 1)) { - cm->log2_tile_cols = 0; - cm->log2_tile_rows = 0; - } else { - cm->log2_tile_cols = - clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols); - cm->log2_tile_rows = cpi->oxcf.tile_rows; - } + cm->log2_tile_cols = + clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols); + cm->log2_tile_rows = cpi->oxcf.tile_rows; if (cpi->oxcf.target_level == LEVEL_AUTO) { const int level_tile_cols = @@ -1244,31 +1440,23 @@ static void update_frame_size(VP9_COMP *cpi) { cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base)); set_tile_limits(cpi); - - if (is_two_pass_svc(cpi)) { - if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer, cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, - NULL, NULL, NULL)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to reallocate alt_ref_buffer"); - } } static void init_buffer_indices(VP9_COMP *cpi) { - cpi->lst_fb_idx = 0; - cpi->gld_fb_idx = 1; - cpi->alt_fb_idx = 2; + int ref_frame; + + for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) + cpi->ref_fb_idx[ref_frame] = ref_frame; + + cpi->lst_fb_idx = cpi->ref_fb_idx[LAST_FRAME - 1]; + cpi->gld_fb_idx = cpi->ref_fb_idx[GOLDEN_FRAME - 1]; + cpi->alt_fb_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1]; } static void init_level_constraint(LevelConstraint *lc) { lc->level_index = -1; lc->max_cpb_size = INT_MAX; lc->max_frame_size = INT_MAX; - lc->rc_config_updated = 0; lc->fail_flag = 0; } @@ -1280,7 +1468,7 @@ static void set_level_constraint(LevelConstraint *ls, int8_t level_index) { } } -static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { +static void init_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { VP9_COMMON *const cm = &cpi->common; cpi->oxcf = *oxcf; @@ -1345,13 +1533,15 @@ static void set_rc_buffer_sizes(RATE_CONTROL *rc, } #if CONFIG_VP9_HIGHBITDEPTH +// TODO(angiebird): make sdx8f available for highbitdepth if needed #define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \ cpi->fn_ptr[BT].sdf = SDF; \ cpi->fn_ptr[BT].sdaf = SDAF; \ cpi->fn_ptr[BT].vf = VF; \ cpi->fn_ptr[BT].svf = SVF; \ cpi->fn_ptr[BT].svaf = SVAF; \ - cpi->fn_ptr[BT].sdx4df = SDX4DF; + cpi->fn_ptr[BT].sdx4df = SDX4DF; \ + cpi->fn_ptr[BT].sdx8f = NULL; #define MAKE_BFP_SAD_WRAPPER(fnname) \ static unsigned int fnname##_bits8(const uint8_t *src_ptr, \ @@ -1610,7 +1800,8 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) { vpx_highbd_sad4x4x4d_bits10) break; - case VPX_BITS_12: + default: + assert(cm->bit_depth == VPX_BITS_12); HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits12, vpx_highbd_sad32x16_avg_bits12, vpx_highbd_12_variance32x16, vpx_highbd_12_sub_pixel_variance32x16, @@ -1689,11 +1880,6 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) { vpx_highbd_12_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x4d_bits12) break; - - default: - assert(0 && - "cm->bit_depth should be VPX_BITS_8, " - "VPX_BITS_10 or VPX_BITS_12"); } } } @@ -1757,6 +1943,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { int last_w = cpi->oxcf.width; int last_h = cpi->oxcf.height; + vp9_init_quantizer(cpi); if (cm->profile != oxcf->profile) cm->profile = oxcf->profile; cm->bit_depth = oxcf->bit_depth; cm->color_space = oxcf->color_space; @@ -1972,7 +2159,112 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { } while (++i <= MV_MAX); } -VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, +static void init_ref_frame_bufs(VP9_COMMON *cm) { + int i; + BufferPool *const pool = cm->buffer_pool; + cm->new_fb_idx = INVALID_IDX; + for (i = 0; i < REF_FRAMES; ++i) { + cm->ref_frame_map[i] = INVALID_IDX; + } + for (i = 0; i < FRAME_BUFFERS; ++i) { + pool->frame_bufs[i].ref_count = 0; + } +} + +static void update_initial_width(VP9_COMP *cpi, int use_highbitdepth, + int subsampling_x, int subsampling_y) { + VP9_COMMON *const cm = &cpi->common; +#if !CONFIG_VP9_HIGHBITDEPTH + (void)use_highbitdepth; + assert(use_highbitdepth == 0); +#endif + + if (!cpi->initial_width || +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth != use_highbitdepth || +#endif + cm->subsampling_x != subsampling_x || + cm->subsampling_y != subsampling_y) { + cm->subsampling_x = subsampling_x; + cm->subsampling_y = subsampling_y; +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth = use_highbitdepth; +#endif + alloc_util_frame_buffers(cpi); + cpi->initial_width = cm->width; + cpi->initial_height = cm->height; + cpi->initial_mbs = cm->MBs; + } +} + +// TODO(angiebird): Check whether we can move this function to vpx_image.c +static INLINE void vpx_img_chroma_subsampling(vpx_img_fmt_t fmt, + unsigned int *subsampling_x, + unsigned int *subsampling_y) { + switch (fmt) { + case VPX_IMG_FMT_I420: + case VPX_IMG_FMT_YV12: + case VPX_IMG_FMT_I422: + case VPX_IMG_FMT_I42016: + case VPX_IMG_FMT_I42216: *subsampling_x = 1; break; + default: *subsampling_x = 0; break; + } + + switch (fmt) { + case VPX_IMG_FMT_I420: + case VPX_IMG_FMT_I440: + case VPX_IMG_FMT_YV12: + case VPX_IMG_FMT_I42016: + case VPX_IMG_FMT_I44016: *subsampling_y = 1; break; + default: *subsampling_y = 0; break; + } +} + +// TODO(angiebird): Check whether we can move this function to vpx_image.c +static INLINE int vpx_img_use_highbitdepth(vpx_img_fmt_t fmt) { + return fmt & VPX_IMG_FMT_HIGHBITDEPTH; +} + +#if CONFIG_VP9_TEMPORAL_DENOISING +static void setup_denoiser_buffer(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + if (cpi->oxcf.noise_sensitivity > 0 && + !cpi->denoiser.frame_buffer_initialized) { + if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc, + cpi->oxcf.noise_sensitivity, cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif + VP9_ENC_BORDER_IN_PIXELS)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate denoiser"); + } +} +#endif + +void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt) { + const VP9EncoderConfig *oxcf = &cpi->oxcf; + unsigned int subsampling_x, subsampling_y; + const int use_highbitdepth = vpx_img_use_highbitdepth(img_fmt); + vpx_img_chroma_subsampling(img_fmt, &subsampling_x, &subsampling_y); + + update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y); +#if CONFIG_VP9_TEMPORAL_DENOISING + setup_denoiser_buffer(cpi); +#endif + + assert(cpi->lookahead == NULL); + cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height, subsampling_x, + subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + use_highbitdepth, +#endif + oxcf->lag_in_frames); + alloc_raw_frame_buffers(cpi); +} + +VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf, BufferPool *const pool) { unsigned int i; VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP)); @@ -2005,10 +2297,13 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cpi->resize_buffer_underflow = 0; cpi->use_skin_detection = 0; cpi->common.buffer_pool = pool; + init_ref_frame_bufs(cm); cpi->force_update_segmentation = 0; init_config(cpi, oxcf); + cpi->frame_info = vp9_get_frame_info(oxcf); + vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc); cm->current_video_frame = 0; @@ -2017,10 +2312,13 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, realloc_segmentation_maps(cpi); - CHECK_MEM_ERROR(cm, cpi->skin_map, vpx_calloc(cm->mi_rows * cm->mi_cols, - sizeof(cpi->skin_map[0]))); + CHECK_MEM_ERROR( + cm, cpi->skin_map, + vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0]))); +#if !CONFIG_REALTIME_ONLY CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create()); +#endif CHECK_MEM_ERROR( cm, cpi->consec_zero_mv, @@ -2062,8 +2360,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, #endif cpi->refresh_alt_ref_frame = 0; - cpi->multi_arf_last_grp_enabled = 0; - cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS; init_level_info(&cpi->level_info); @@ -2104,9 +2400,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, if (cpi->b_calculate_consistency) { CHECK_MEM_ERROR(cm, cpi->ssim_vars, - vpx_malloc(sizeof(*cpi->ssim_vars) * 4 * - cpi->common.mi_rows * cpi->common.mi_cols)); + vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols, + sizeof(*cpi->ssim_vars) * 4)); cpi->worst_consistency = 100.0; + } else { + cpi->ssim_vars = NULL; } #endif @@ -2141,6 +2439,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, #ifdef OUTPUT_YUV_REC yuv_rec_file = fopen("rec.yuv", "wb"); #endif +#ifdef OUTPUT_YUV_SVC_SRC + yuv_svc_src[0] = fopen("svc_src_0.yuv", "wb"); + yuv_svc_src[1] = fopen("svc_src_1.yuv", "wb"); + yuv_svc_src[2] = fopen("svc_src_2.yuv", "wb"); +#endif #if 0 framepsnr = fopen("framepsnr.stt", "a"); @@ -2168,6 +2471,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, const int layer_id = (int)last_packet_for_layer->spatial_layer_id; const int packets_in_layer = (int)last_packet_for_layer->count + 1; if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) { + int num_frames; LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id]; vpx_free(lc->rc_twopass_stats_in.buf); @@ -2179,6 +2483,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, lc->twopass.stats_in = lc->twopass.stats_in_start; lc->twopass.stats_in_end = lc->twopass.stats_in_start + packets_in_layer - 1; + // Note the last packet is cumulative first pass stats. + // So the number of frames is packet number minus one + num_frames = packets_in_layer - 1; + fps_init_first_pass_info(&lc->twopass.first_pass_info, + lc->rc_twopass_stats_in.buf, num_frames); stats_copy[layer_id] = lc->rc_twopass_stats_in.buf; } } @@ -2194,6 +2503,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, vp9_init_second_pass_spatial_svc(cpi); } else { + int num_frames; #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { const size_t psz = cpi->common.MBs * sizeof(uint8_t); @@ -2210,75 +2520,106 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; cpi->twopass.stats_in = cpi->twopass.stats_in_start; cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1]; + // Note the last packet is cumulative first pass stats. + // So the number of frames is packet number minus one + num_frames = packets - 1; + fps_init_first_pass_info(&cpi->twopass.first_pass_info, + oxcf->two_pass_stats_in.buf, num_frames); vp9_init_second_pass(cpi); } } #endif // !CONFIG_REALTIME_ONLY - vp9_set_speed_features_framesize_independent(cpi); - vp9_set_speed_features_framesize_dependent(cpi); + cpi->mb_wiener_var_cols = 0; + cpi->mb_wiener_var_rows = 0; + cpi->mb_wiener_variance = NULL; + + vp9_set_speed_features_framesize_independent(cpi, oxcf->speed); + vp9_set_speed_features_framesize_dependent(cpi, oxcf->speed); + + { + const int bsize = BLOCK_16X16; + const int w = num_8x8_blocks_wide_lookup[bsize]; + const int h = num_8x8_blocks_high_lookup[bsize]; + const int num_cols = (cm->mi_cols + w - 1) / w; + const int num_rows = (cm->mi_rows + h - 1) / h; + CHECK_MEM_ERROR(cm, cpi->mi_ssim_rdmult_scaling_factors, + vpx_calloc(num_rows * num_cols, + sizeof(*cpi->mi_ssim_rdmult_scaling_factors))); + } + + cpi->kmeans_data_arr_alloc = 0; +#if CONFIG_NON_GREEDY_MV + cpi->tpl_ready = 0; +#endif // CONFIG_NON_GREEDY_MV + for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL; // Allocate memory to store variances for a frame. CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff))); cpi->source_var_thresh = 0; cpi->frames_till_next_var_check = 0; +#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \ + cpi->fn_ptr[BT].sdf = SDF; \ + cpi->fn_ptr[BT].sdaf = SDAF; \ + cpi->fn_ptr[BT].vf = VF; \ + cpi->fn_ptr[BT].svf = SVF; \ + cpi->fn_ptr[BT].svaf = SVAF; \ + cpi->fn_ptr[BT].sdx4df = SDX4DF; \ + cpi->fn_ptr[BT].sdx8f = SDX8F; -#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \ - cpi->fn_ptr[BT].sdf = SDF; \ - cpi->fn_ptr[BT].sdaf = SDAF; \ - cpi->fn_ptr[BT].vf = VF; \ - cpi->fn_ptr[BT].svf = SVF; \ - cpi->fn_ptr[BT].svaf = SVAF; \ - cpi->fn_ptr[BT].sdx4df = SDX4DF; - + // TODO(angiebird): make sdx8f available for every block size BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16, vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16, - vpx_sad32x16x4d) + vpx_sad32x16x4d, NULL) BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32, vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32, - vpx_sad16x32x4d) + vpx_sad16x32x4d, NULL) BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32, vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32, - vpx_sad64x32x4d) + vpx_sad64x32x4d, NULL) BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64, vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64, - vpx_sad32x64x4d) + vpx_sad32x64x4d, NULL) BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32, vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32, - vpx_sad32x32x4d) + vpx_sad32x32x4d, vpx_sad32x32x8) BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64, vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64, - vpx_sad64x64x4d) + vpx_sad64x64x4d, NULL) BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16, vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16, - vpx_sad16x16x4d) + vpx_sad16x16x4d, vpx_sad16x16x8) BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8, vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8, - vpx_sad16x8x4d) + vpx_sad16x8x4d, vpx_sad16x8x8) BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16, vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16, - vpx_sad8x16x4d) + vpx_sad8x16x4d, vpx_sad8x16x8) BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8, - vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d) + vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d, + vpx_sad8x8x8) BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4, - vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d) + vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d, + NULL) BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8, - vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d) + vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d, + NULL) BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4, - vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d) + vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d, + vpx_sad4x4x8) #if CONFIG_VP9_HIGHBITDEPTH highbd_set_var_fns(cpi); @@ -2293,8 +2634,23 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, vp9_loop_filter_init(cm); + // Set up the unit scaling factor used during motion search. +#if CONFIG_VP9_HIGHBITDEPTH + vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height, + cm->width, cm->height, + cm->use_highbitdepth); +#else + vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height, + cm->width, cm->height); +#endif // CONFIG_VP9_HIGHBITDEPTH + cpi->td.mb.me_sf = &cpi->me_sf; + cm->error.setjmp = 0; +#if CONFIG_RATE_CTRL + encode_command_init(&cpi->encode_command); +#endif + return cpi; } @@ -2305,6 +2661,8 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V)) #endif // CONFIG_INTERNAL_STATS +static void free_tpl_buffer(VP9_COMP *cpi); + void vp9_remove_compressor(VP9_COMP *cpi) { VP9_COMMON *cm; unsigned int i; @@ -2312,6 +2670,10 @@ void vp9_remove_compressor(VP9_COMP *cpi) { if (!cpi) return; +#if CONFIG_INTERNAL_STATS + vpx_free(cpi->ssim_vars); +#endif + cm = &cpi->common; if (cm->current_video_frame > 0) { #if CONFIG_INTERNAL_STATS @@ -2376,14 +2738,20 @@ void vp9_remove_compressor(VP9_COMP *cpi) { SNPRINT2(results, "\t%7.3f", cpi->worst_consistency); } - fprintf(f, "%s\t Time\tRcErr\tAbsErr\n", headings); - fprintf(f, "%s\t%8.0f\t%7.2f\t%7.2f\n", results, total_encode_time, - rate_err, fabs(rate_err)); + SNPRINT(headings, "\t Time\tRcErr\tAbsErr"); + SNPRINT2(results, "\t%8.0f", total_encode_time); + SNPRINT2(results, "\t%7.2f", rate_err); + SNPRINT2(results, "\t%7.2f", fabs(rate_err)); + + fprintf(f, "%s\tAPsnr611\n", headings); + fprintf( + f, "%s\t%7.3f\n", results, + (6 * cpi->psnr.stat[Y] + cpi->psnr.stat[U] + cpi->psnr.stat[V]) / + (cpi->count * 8)); } fclose(f); } - #endif #if 0 @@ -2402,6 +2770,15 @@ void vp9_remove_compressor(VP9_COMP *cpi) { vp9_denoiser_free(&(cpi->denoiser)); #endif + if (cpi->kmeans_data_arr_alloc) { +#if CONFIG_MULTITHREAD + pthread_mutex_destroy(&cpi->kmeans_mutex); +#endif + vpx_free(cpi->kmeans_data_arr); + } + + free_tpl_buffer(cpi); + for (t = 0; t < cpi->num_workers; ++t) { VPxWorker *const worker = &cpi->workers[t]; EncWorkerData *const thread_data = &cpi->tile_thr_data[t]; @@ -2425,7 +2802,9 @@ void vp9_remove_compressor(VP9_COMP *cpi) { vp9_bitstream_encode_tiles_buffer_dealloc(cpi); } +#if !CONFIG_REALTIME_ONLY vp9_alt_ref_aq_destroy(cpi->alt_ref_aq); +#endif dealloc_compressor_data(cpi); @@ -2459,6 +2838,11 @@ void vp9_remove_compressor(VP9_COMP *cpi) { #ifdef OUTPUT_YUV_REC fclose(yuv_rec_file); #endif +#ifdef OUTPUT_YUV_SVC_SRC + fclose(yuv_svc_src[0]); + fclose(yuv_svc_src[1]); + fclose(yuv_svc_src[2]); +#endif #if 0 @@ -2474,30 +2858,19 @@ void vp9_remove_compressor(VP9_COMP *cpi) { #endif } -static void generate_psnr_packet(VP9_COMP *cpi) { - struct vpx_codec_cx_pkt pkt; - int i; - PSNR_STATS psnr; +int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr) { + if (is_psnr_calc_enabled(cpi)) { #if CONFIG_VP9_HIGHBITDEPTH - vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, &psnr, - cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth); + vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr, + cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth); #else - vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, &psnr); + vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr); #endif - - for (i = 0; i < 4; ++i) { - pkt.data.psnr.samples[i] = psnr.samples[i]; - pkt.data.psnr.sse[i] = psnr.sse[i]; - pkt.data.psnr.psnr[i] = psnr.psnr[i]; + return 1; + } else { + vp9_zero(*psnr); + return 0; } - pkt.kind = VPX_CODEC_PSNR_PKT; - if (cpi->use_svc) - cpi->svc - .layer_context[cpi->svc.spatial_layer_id * - cpi->svc.number_temporal_layers] - .psnr_pkt = pkt.data.psnr; - else - vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt); } int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) { @@ -2707,6 +3080,7 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, } #endif // CONFIG_VP9_HIGHBITDEPTH +#if !CONFIG_REALTIME_ONLY static int scale_down(VP9_COMP *cpi, int q) { RATE_CONTROL *const rc = &cpi->rc; GF_GROUP *const gf_group = &cpi->twopass.gf_group; @@ -2754,11 +3128,14 @@ static int big_rate_miss(VP9_COMP *cpi) { // test in two pass for the first static int two_pass_first_group_inter(VP9_COMP *cpi) { - TWO_PASS *const twopass = &cpi->twopass; - GF_GROUP *const gf_group = &twopass->gf_group; - if ((cpi->oxcf.pass == 2) && - (gf_group->index == gf_group->first_inter_index)) { - return 1; + if (cpi->oxcf.pass == 2) { + TWO_PASS *const twopass = &cpi->twopass; + GF_GROUP *const gf_group = &twopass->gf_group; + const int gfg_index = gf_group->index; + + if (gfg_index == 0) return gf_group->update_type[gfg_index] == LF_UPDATE; + return gf_group->update_type[gfg_index - 1] != LF_UPDATE && + gf_group->update_type[gfg_index] == LF_UPDATE; } else { return 0; } @@ -2807,10 +3184,24 @@ static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q, } return force_recode; } +#endif // !CONFIG_REALTIME_ONLY -void vp9_update_reference_frames(VP9_COMP *cpi) { +static void update_ref_frames(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; BufferPool *const pool = cm->buffer_pool; + GF_GROUP *const gf_group = &cpi->twopass.gf_group; + + if (cpi->rc.show_arf_as_gld) { + int tmp = cpi->alt_fb_idx; + cpi->alt_fb_idx = cpi->gld_fb_idx; + cpi->gld_fb_idx = tmp; + } else if (cm->show_existing_frame) { + // Pop ARF. + cpi->lst_fb_idx = cpi->alt_fb_idx; + cpi->alt_fb_idx = + stack_pop(gf_group->arf_index_stack, gf_group->stack_size); + --gf_group->stack_size; + } // At this point the new frame has been encoded. // If any buffer copy / swapping is signaled it should be done here. @@ -2836,23 +3227,23 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { tmp = cpi->alt_fb_idx; cpi->alt_fb_idx = cpi->gld_fb_idx; cpi->gld_fb_idx = tmp; - - if (is_two_pass_svc(cpi)) { - cpi->svc.layer_context[0].gold_ref_idx = cpi->gld_fb_idx; - cpi->svc.layer_context[0].alt_ref_idx = cpi->alt_fb_idx; - } } else { /* For non key/golden frames */ if (cpi->refresh_alt_ref_frame) { - int arf_idx = cpi->alt_fb_idx; - if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - arf_idx = gf_group->arf_update_idx[gf_group->index]; - } + int arf_idx = gf_group->top_arf_idx; + + // Push new ARF into stack. + stack_push(gf_group->arf_index_stack, cpi->alt_fb_idx, + gf_group->stack_size); + ++gf_group->stack_size; + + assert(arf_idx < REF_FRAMES); ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx); memcpy(cpi->interp_filter_selected[ALTREF_FRAME], cpi->interp_filter_selected[0], sizeof(cpi->interp_filter_selected[0])); + + cpi->alt_fb_idx = arf_idx; } if (cpi->refresh_golden_frame) { @@ -2877,69 +3268,39 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { cpi->interp_filter_selected[0], sizeof(cpi->interp_filter_selected[0])); } + + if (gf_group->update_type[gf_group->index] == MID_OVERLAY_UPDATE) { + cpi->alt_fb_idx = + stack_pop(gf_group->arf_index_stack, gf_group->stack_size); + --gf_group->stack_size; + } +} + +void vp9_update_reference_frames(VP9_COMP *cpi) { + update_ref_frames(cpi); + #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && - cpi->denoiser.denoising_level > kDenLowLow) { - int svc_base_is_key = 0; - int denoise_svc_second_layer = 0; - if (cpi->use_svc) { - int realloc_fail = 0; - const int svc_buf_shift = - cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 - ? cpi->denoiser.num_ref_frames - : 0; - int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, - cpi->svc.temporal_layer_id, - cpi->svc.number_temporal_layers); - LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; - svc_base_is_key = lc->is_key_frame; - denoise_svc_second_layer = - cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 ? 1 - : 0; - // Check if we need to allocate extra buffers in the denoiser - // for - // refreshed frames. - realloc_fail = vp9_denoiser_realloc_svc( - cm, &cpi->denoiser, svc_buf_shift, cpi->refresh_alt_ref_frame, - cpi->refresh_golden_frame, cpi->refresh_last_frame, cpi->alt_fb_idx, - cpi->gld_fb_idx, cpi->lst_fb_idx); - if (realloc_fail) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to re-allocate denoiser for SVC"); - } - vp9_denoiser_update_frame_info( - &cpi->denoiser, *cpi->Source, cpi->common.frame_type, - cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame, - cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx, - cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key, - denoise_svc_second_layer); - } + vp9_denoiser_update_ref_frame(cpi); #endif - if (is_one_pass_cbr_svc(cpi)) { - // Keep track of frame index for each reference frame. - SVC *const svc = &cpi->svc; - if (cm->frame_type == KEY_FRAME) { - svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe; - svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe; - svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe; - } else { - if (cpi->refresh_last_frame) - svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe; - if (cpi->refresh_golden_frame) - svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe; - if (cpi->refresh_alt_ref_frame) - svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe; - } - } + + if (is_one_pass_cbr_svc(cpi)) vp9_svc_update_ref_frame(cpi); } static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { MACROBLOCKD *xd = &cpi->td.mb.e_mbd; struct loopfilter *lf = &cm->lf; - - const int is_reference_frame = + int is_reference_frame = (cm->frame_type == KEY_FRAME || cpi->refresh_last_frame || cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame); + if (cpi->use_svc && + cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) + is_reference_frame = !cpi->svc.non_reference_frame; + + // Skip loop filter in show_existing_frame mode. + if (cm->show_existing_frame) { + lf->filter_level = 0; + return; + } if (xd->lossless) { lf->filter_level = 0; @@ -3066,8 +3427,8 @@ void vp9_scale_references(VP9_COMP *cpi) { if (cpi->oxcf.pass == 0 && !cpi->use_svc) { // Check for release of scaled reference. buf_idx = cpi->scaled_ref_idx[ref_frame - 1]; - buf = (buf_idx != INVALID_IDX) ? &pool->frame_bufs[buf_idx] : NULL; - if (buf != NULL) { + if (buf_idx != INVALID_IDX) { + buf = &pool->frame_bufs[buf_idx]; --buf->ref_count; cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX; } @@ -3098,22 +3459,21 @@ static void release_scaled_references(VP9_COMP *cpi) { refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0; for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { const int idx = cpi->scaled_ref_idx[i - 1]; - RefCntBuffer *const buf = - idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL; - const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i); - if (buf != NULL && - (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width && - buf->buf.y_crop_height == ref->y_crop_height))) { - --buf->ref_count; - cpi->scaled_ref_idx[i - 1] = INVALID_IDX; + if (idx != INVALID_IDX) { + RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx]; + const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i); + if (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width && + buf->buf.y_crop_height == ref->y_crop_height)) { + --buf->ref_count; + cpi->scaled_ref_idx[i - 1] = INVALID_IDX; + } } } } else { - for (i = 0; i < MAX_REF_FRAMES; ++i) { + for (i = 0; i < REFS_PER_FRAME; ++i) { const int idx = cpi->scaled_ref_idx[i]; - RefCntBuffer *const buf = - idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL; - if (buf != NULL) { + if (idx != INVALID_IDX) { + RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx]; --buf->ref_count; cpi->scaled_ref_idx[i] = INVALID_IDX; } @@ -3172,11 +3532,9 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { case VPX_BITS_10: dc_quant_devisor = 16.0; break; - case VPX_BITS_12: - dc_quant_devisor = 64.0; - break; default: - assert(0 && "bit_depth must be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); + assert(cm->bit_depth == VPX_BITS_12); + dc_quant_devisor = 64.0; break; } #else @@ -3292,7 +3650,7 @@ static void set_mv_search_params(VP9_COMP *cpi) { } static void set_size_independent_vars(VP9_COMP *cpi) { - vp9_set_speed_features_framesize_independent(cpi); + vp9_set_speed_features_framesize_independent(cpi, cpi->oxcf.speed); vp9_set_rd_speed_thresholds(cpi); vp9_set_rd_speed_thresholds_sub8x8(cpi); cpi->common.interp_filter = cpi->sf.default_interp_filter; @@ -3303,11 +3661,16 @@ static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index, VP9_COMMON *const cm = &cpi->common; // Setup variables that depend on the dimensions of the frame. - vp9_set_speed_features_framesize_dependent(cpi); + vp9_set_speed_features_framesize_dependent(cpi, cpi->oxcf.speed); // Decide q and q bounds. *q = vp9_rc_pick_q_and_bounds(cpi, bottom_index, top_index); + if (cpi->oxcf.rc_mode == VPX_CBR && cpi->rc.force_max_q) { + *q = cpi->rc.worst_quality; + cpi->rc.force_max_q = 0; + } + if (!frame_is_intra_only(cm)) { vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH); } @@ -3337,29 +3700,12 @@ static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index, vpx_calloc(cpi->un_scaled_source->y_width, sizeof(*cpi->common.postproc_state.limits)); } - vp9_denoise(cpi->Source, cpi->Source, l, cpi->common.postproc_state.limits); + vp9_denoise(&cpi->common, cpi->Source, cpi->Source, l, + cpi->common.postproc_state.limits); } #endif // CONFIG_VP9_POSTPROC } -#if CONFIG_VP9_TEMPORAL_DENOISING -static void setup_denoiser_buffer(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - if (cpi->oxcf.noise_sensitivity > 0 && - !cpi->denoiser.frame_buffer_initialized) { - if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc, - cpi->oxcf.noise_sensitivity, cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate denoiser"); - } -} -#endif - static void init_motion_estimation(VP9_COMP *cpi) { int y_stride = cpi->scaled_source.y_stride; @@ -3415,9 +3761,7 @@ static void set_frame_size(VP9_COMP *cpi) { #endif } - if ((oxcf->pass == 2) && - (!cpi->use_svc || (is_two_pass_svc(cpi) && - cpi->svc.encode_empty_frame_state != ENCODING))) { + if ((oxcf->pass == 2) && !cpi->use_svc) { vp9_set_target_rate(cpi); } @@ -3464,19 +3808,76 @@ static void set_frame_size(VP9_COMP *cpi) { set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME); } -static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, - uint8_t *dest) { +#if CONFIG_CONSISTENT_RECODE +static void save_encode_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; - int q = 0, bottom_index = 0, top_index = 0; // Dummy variables. + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + int tile_col, tile_row; + int i, j; + RD_OPT *rd_opt = &cpi->rd; + for (i = 0; i < MAX_REF_FRAMES; i++) { + for (j = 0; j < REFERENCE_MODES; j++) + rd_opt->prediction_type_threshes_prev[i][j] = + rd_opt->prediction_type_threshes[i][j]; + + for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++) + rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j]; + } + + if (cpi->tile_data != NULL) { + for (tile_row = 0; tile_row < tile_rows; ++tile_row) + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + TileDataEnc *tile_data = + &cpi->tile_data[tile_row * tile_cols + tile_col]; + for (i = 0; i < BLOCK_SIZES; ++i) { + for (j = 0; j < MAX_MODES; ++j) { + tile_data->thresh_freq_fact_prev[i][j] = + tile_data->thresh_freq_fact[i][j]; + } + } + } + } +} +#endif + +static INLINE void set_raw_source_frame(VP9_COMP *cpi) { +#ifdef ENABLE_KF_DENOISE + if (is_spatial_denoise_enabled(cpi)) { + cpi->raw_source_frame = vp9_scale_if_required( + cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source, + (oxcf->pass == 0), EIGHTTAP, 0); + } else { + cpi->raw_source_frame = cpi->Source; + } +#else + cpi->raw_source_frame = cpi->Source; +#endif +} + +static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, + uint8_t *dest) { + VP9_COMMON *const cm = &cpi->common; + SVC *const svc = &cpi->svc; + int q = 0, bottom_index = 0, top_index = 0; + int no_drop_scene_change = 0; const INTERP_FILTER filter_scaler = (is_one_pass_cbr_svc(cpi)) - ? cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] + ? svc->downsample_filter_type[svc->spatial_layer_id] : EIGHTTAP; const int phase_scaler = (is_one_pass_cbr_svc(cpi)) - ? cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id] + ? svc->downsample_filter_phase[svc->spatial_layer_id] : 0; + if (cm->show_existing_frame) { + cpi->rc.this_frame_target = 0; + if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi); + return 1; + } + + svc->time_stamp_prev[svc->spatial_layer_id] = svc->time_stamp_superframe; + // Flag to check if its valid to compute the source sad (used for // scene detection and for superblock content state in CBR mode). // The flag may get reset below based on SVC or resizing state. @@ -3489,30 +3890,36 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, if (is_one_pass_cbr_svc(cpi) && cpi->un_scaled_source->y_width == cm->width << 2 && cpi->un_scaled_source->y_height == cm->height << 2 && - cpi->svc.scaled_temp.y_width == cm->width << 1 && - cpi->svc.scaled_temp.y_height == cm->height << 1) { + svc->scaled_temp.y_width == cm->width << 1 && + svc->scaled_temp.y_height == cm->height << 1) { // For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take // advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2 // result will be saved in scaled_temp and might be used later. - const INTERP_FILTER filter_scaler2 = cpi->svc.downsample_filter_type[1]; - const int phase_scaler2 = cpi->svc.downsample_filter_phase[1]; + const INTERP_FILTER filter_scaler2 = svc->downsample_filter_type[1]; + const int phase_scaler2 = svc->downsample_filter_phase[1]; cpi->Source = vp9_svc_twostage_scale( - cm, cpi->un_scaled_source, &cpi->scaled_source, &cpi->svc.scaled_temp, + cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp, filter_scaler, phase_scaler, filter_scaler2, phase_scaler2); - cpi->svc.scaled_one_half = 1; + svc->scaled_one_half = 1; } else if (is_one_pass_cbr_svc(cpi) && cpi->un_scaled_source->y_width == cm->width << 1 && cpi->un_scaled_source->y_height == cm->height << 1 && - cpi->svc.scaled_one_half) { + svc->scaled_one_half) { // If the spatial layer is 1/2x1/2 and the scaling is already done in the // two-stage scaling, use the result directly. - cpi->Source = &cpi->svc.scaled_temp; - cpi->svc.scaled_one_half = 0; + cpi->Source = &svc->scaled_temp; + svc->scaled_one_half = 0; } else { cpi->Source = vp9_scale_if_required( cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0), filter_scaler, phase_scaler); } +#ifdef OUTPUT_YUV_SVC_SRC + // Write out at most 3 spatial layers. + if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) { + vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source); + } +#endif // Unfiltered raw source used in metrics calculation if the source // has been filtered. if (is_psnr_calc_enabled(cpi)) { @@ -3530,9 +3937,9 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, } if ((cpi->use_svc && - (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1 || - cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1 || - cpi->svc.current_superframe < 1)) || + (svc->spatial_layer_id < svc->number_spatial_layers - 1 || + svc->temporal_layer_id < svc->number_temporal_layers - 1 || + svc->current_superframe < 1)) || cpi->resize_pending || cpi->resize_state || cpi->external_resize || cpi->resize_state != ORIG) { cpi->compute_source_sad_onepass = 0; @@ -3562,53 +3969,102 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, cpi->Last_Source->y_height != cpi->Source->y_height) cpi->compute_source_sad_onepass = 0; - if (cm->frame_type == KEY_FRAME || cpi->resize_pending != 0) { + if (frame_is_intra_only(cm) || cpi->resize_pending != 0) { memset(cpi->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv)); } - vp9_update_noise_estimate(cpi); +#if CONFIG_VP9_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity > 0 && cpi->use_svc) + vp9_denoiser_reset_on_first_frame(cpi); +#endif // Scene detection is always used for VBR mode or screen-content case. // For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now // (need to check encoding time cost for doing this for speed 8). cpi->rc.high_source_sad = 0; - if (cpi->compute_source_sad_onepass && cm->show_frame && + cpi->rc.hybrid_intra_scene_change = 0; + cpi->rc.re_encode_maxq_scene_change = 0; + if (cm->show_frame && cpi->oxcf.mode == REALTIME && (cpi->oxcf.rc_mode == VPX_VBR || cpi->oxcf.content == VP9E_CONTENT_SCREEN || - (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8 && !cpi->use_svc))) + (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8))) vp9_scene_detection_onepass(cpi); + if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) { + svc->high_source_sad_superframe = cpi->rc.high_source_sad; + svc->high_num_blocks_with_motion = cpi->rc.high_num_blocks_with_motion; + // On scene change reset temporal layer pattern to TL0. + // Note that if the base/lower spatial layers are skipped: instead of + // inserting base layer here, we force max-q for the next superframe + // with lower spatial layers: this is done in vp9_encodedframe_overshoot() + // when max-q is decided for the current layer. + // Only do this reset for bypass/flexible mode. + if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0 && + svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + // rc->high_source_sad will get reset so copy it to restore it. + int tmp_high_source_sad = cpi->rc.high_source_sad; + vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME); + cpi->rc.high_source_sad = tmp_high_source_sad; + } + } + + vp9_update_noise_estimate(cpi); + + // For 1 pass CBR, check if we are dropping this frame. + // Never drop on key frame, if base layer is key for svc, + // on scene change, or if superframe has layer sync. + if ((cpi->rc.high_source_sad || svc->high_source_sad_superframe) && + !(cpi->rc.use_post_encode_drop && svc->last_layer_dropped[0])) + no_drop_scene_change = 1; + if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && + !frame_is_intra_only(cm) && !no_drop_scene_change && + !svc->superframe_has_layer_sync && + (!cpi->use_svc || + !svc->layer_context[svc->temporal_layer_id].is_key_frame)) { + if (vp9_rc_drop_frame(cpi)) return 0; + } + // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can // avoid this frame-level upsampling (for non intra_only frames). if (frame_is_intra_only(cm) == 0 && - !(is_one_pass_cbr_svc(cpi) && cpi->svc.force_zero_mode_spatial_ref)) { + !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref)) { vp9_scale_references(cpi); } set_size_independent_vars(cpi); set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); + // search method and step parameter might be changed in speed settings. + init_motion_estimation(cpi); + if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi); if (cpi->sf.svc_use_lowres_part && - cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) { - if (cpi->svc.prev_partition_svc == NULL) { + svc->spatial_layer_id == svc->number_spatial_layers - 2) { + if (svc->prev_partition_svc == NULL) { CHECK_MEM_ERROR( - cm, cpi->svc.prev_partition_svc, + cm, svc->prev_partition_svc, (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows, - sizeof(*cpi->svc.prev_partition_svc))); + sizeof(*svc->prev_partition_svc))); } } - if (cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 && + // TODO(jianj): Look into issue of skin detection with high bitdepth. + if (cm->bit_depth == 8 && cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.content != VP9E_CONTENT_SCREEN && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { cpi->use_skin_detection = 1; } + // Enable post encode frame dropping for CBR on non key frame, when + // ext_use_post_encode_drop is specified by user. + cpi->rc.use_post_encode_drop = cpi->rc.ext_use_post_encode_drop && + cpi->oxcf.rc_mode == VPX_CBR && + cm->frame_type != KEY_FRAME; + vp9_set_quantizer(cm, q); vp9_set_variance_partition_thresholds(cpi, q, 0); @@ -3616,6 +4072,34 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, suppress_active_map(cpi); + if (cpi->use_svc) { + // On non-zero spatial layer, check for disabling inter-layer + // prediction. + if (svc->spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi); + vp9_svc_assert_constraints_pattern(cpi); + } + + if (cpi->rc.last_post_encode_dropped_scene_change) { + cpi->rc.high_source_sad = 1; + svc->high_source_sad_superframe = 1; + // For now disable use_source_sad since Last_Source will not be the previous + // encoded but the dropped one. + cpi->sf.use_source_sad = 0; + cpi->rc.last_post_encode_dropped_scene_change = 0; + } + // Check if this high_source_sad (scene/slide change) frame should be + // encoded at high/max QP, and if so, set the q and adjust some rate + // control parameters. + if (cpi->sf.overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ && + (cpi->rc.high_source_sad || + (cpi->use_svc && svc->high_source_sad_superframe))) { + if (vp9_encodedframe_overshoot(cpi, -1, &q)) { + vp9_set_quantizer(cm, q); + vp9_set_variance_partition_thresholds(cpi, q, 0); + } + } + +#if !CONFIG_REALTIME_ONLY // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. if (cpi->oxcf.aq_mode == VARIANCE_AQ) { @@ -3624,24 +4108,32 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, vp9_360aq_frame_setup(cpi); } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { vp9_setup_in_frame_q_adj(cpi); - } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - vp9_cyclic_refresh_setup(cpi); } else if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) { // it may be pretty bad for rate-control, // and I should handle it somehow vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi); + } else { +#endif + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + vp9_cyclic_refresh_setup(cpi); + } else if (cpi->roi.enabled && !frame_is_intra_only(cm)) { + apply_roi_map(cpi); + } +#if !CONFIG_REALTIME_ONLY } +#endif apply_active_map(cpi); vp9_encode_frame(cpi); - // Check if we should drop this frame because of high overshoot. - // Only for frames where high temporal-source SAD is detected. - if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && - cpi->resize_state == ORIG && cm->frame_type != KEY_FRAME && - cpi->oxcf.content == VP9E_CONTENT_SCREEN && - cpi->rc.high_source_sad == 1) { + // Check if we should re-encode this frame at high Q because of high + // overshoot based on the encoded frame size. Only for frames where + // high temporal-source SAD is detected. + // For SVC: all spatial layers are checked for re-encoding. + if (cpi->sf.overshoot_detection_cbr_rt == RE_ENCODE_MAXQ && + (cpi->rc.high_source_sad || + (cpi->use_svc && svc->high_source_sad_superframe))) { int frame_size = 0; // Get an estimate of the encoded frame size. save_coding_context(cpi); @@ -3657,8 +4149,12 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, suppress_active_map(cpi); // Turn-off cyclic refresh for re-encoded frame. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; unsigned char *const seg_map = cpi->segmentation_map; memset(seg_map, 0, cm->mi_rows * cm->mi_cols); + memset(cr->last_coded_q_map, MAXQ, + cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); + cr->sb_index = 0; vp9_disable_segmentation(&cm->seg); } apply_active_map(cpi); @@ -3668,15 +4164,17 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, // Update some stats from cyclic refresh, and check for golden frame update. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && - cm->frame_type != KEY_FRAME) + !frame_is_intra_only(cm)) vp9_cyclic_refresh_postencode(cpi); // Update the skip mb flag probabilities based on the distribution // seen in the last encoder iteration. // update_base_skip_probs(cpi); vpx_clear_system_state(); + return 1; } +#if !CONFIG_REALTIME_ONLY #define MAX_QSTEP_ADJ 4 static int get_qstep_adj(int rate_excess, int rate_limit) { int qstep = @@ -3703,11 +4201,17 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, int qrange_adj = 1; #endif + if (cm->show_existing_frame) { + rc->this_frame_target = 0; + if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi); + return; + } + set_size_independent_vars(cpi); - enable_acl = cpi->sf.allow_acl - ? (cm->frame_type == KEY_FRAME) || (cm->show_frame == 0) - : 0; + enable_acl = cpi->sf.allow_acl ? (cm->frame_type == KEY_FRAME) || + (cpi->twopass.gf_group.index == 1) + : 0; do { vpx_clear_system_state(); @@ -3782,6 +4286,14 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, vp9_scale_references(cpi); } +#if CONFIG_RATE_CTRL + // TODO(angiebird): This is a hack for making sure the encoder use the + // external_quantize_index exactly. Avoid this kind of hack later. + if (cpi->encode_command.use_external_quantize_index) { + q = cpi->encode_command.external_quantize_index; + } +#endif + vp9_set_quantizer(cm, q); if (loop_count == 0) setup_frame(cpi); @@ -3796,6 +4308,8 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, vp9_setup_in_frame_q_adj(cpi); } else if (oxcf->aq_mode == LOOKAHEAD_AQ) { vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi); + } else if (oxcf->aq_mode == PSNR_AQ) { + vp9_psnr_aq_mode_setup(&cm->seg); } vp9_encode_frame(cpi); @@ -3818,6 +4332,16 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1; } +#if CONFIG_RATE_CTRL + // This part needs to be after save_coding_context() because + // restore_coding_context will be called in the end of this function. + // TODO(angiebird): This is a hack for making sure the encoder use the + // external_quantize_index exactly. Avoid this kind of hack later. + if (cpi->encode_command.use_external_quantize_index) { + break; + } +#endif + if (oxcf->rc_mode == VPX_Q) { loop = 0; } else { @@ -3900,8 +4424,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, // Special case if the projected size is > the max allowed. if ((q == q_high) && ((rc->projected_frame_size >= rc->max_frame_bandwidth) || - (rc->projected_frame_size >= - big_rate_miss_high_threshold(cpi)))) { + (!rc->is_src_frame_alt_ref && + (rc->projected_frame_size >= + big_rate_miss_high_threshold(cpi))))) { int max_rate = VPXMAX(1, VPXMIN(rc->max_frame_bandwidth, big_rate_miss_high_threshold(cpi))); double q_val_high; @@ -3993,7 +4518,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, } if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) - if (loop || !enable_acl) restore_coding_context(cpi); + if (loop) restore_coding_context(cpi); } while (loop); #ifdef AGGRESSIVE_VBR @@ -4006,7 +4531,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, #endif // Have we been forced to adapt Q outside the expected range by an extreme // rate miss. If so adjust the active maxQ for the subsequent frames. - if (q > cpi->twopass.active_worst_quality) { + if (!rc->is_src_frame_alt_ref && (q > cpi->twopass.active_worst_quality)) { cpi->twopass.active_worst_quality = q; } else if (oxcf->vbr_corpus_complexity && q == q_low && rc->projected_frame_size < rc->this_frame_target) { @@ -4019,23 +4544,16 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, // Skip recoding, if model diff is below threshold const int thresh = compute_context_model_thresh(cpi); const int diff = compute_context_model_diff(cm); - if (diff < thresh) { - vpx_clear_system_state(); - restore_coding_context(cpi); - return; + if (diff >= thresh) { + vp9_encode_frame(cpi); } - - vp9_encode_frame(cpi); + } + if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) { vpx_clear_system_state(); restore_coding_context(cpi); - vp9_pack_bitstream(cpi, dest, size); - - vp9_encode_frame(cpi); - vpx_clear_system_state(); - - restore_coding_context(cpi); } } +#endif // !CONFIG_REALTIME_ONLY static int get_ref_frame_flags(const VP9_COMP *cpi) { const int *const map = cpi->common.ref_frame_map; @@ -4131,20 +4649,21 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required( } } -static void set_arf_sign_bias(VP9_COMP *cpi) { +static void set_ref_sign_bias(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; - int arf_sign_bias; + RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx); + const int cur_frame_index = ref_buffer->frame_index; + MV_REFERENCE_FRAME ref_frame; - if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - arf_sign_bias = cpi->rc.source_alt_ref_active && - (!cpi->refresh_alt_ref_frame || - (gf_group->rf_level[gf_group->index] == GF_ARF_LOW)); - } else { - arf_sign_bias = - (cpi->rc.source_alt_ref_active && !cpi->refresh_alt_ref_frame); + for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) { + const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); + const RefCntBuffer *const ref_cnt_buf = + get_ref_cnt_buffer(&cpi->common, buf_idx); + if (ref_cnt_buf) { + cm->ref_frame_sign_bias[ref_frame] = + cur_frame_index < ref_cnt_buf->frame_index; + } } - cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias; } static int setup_interp_filter_search_mask(VP9_COMP *cpi) { @@ -4328,6 +4847,7 @@ static void spatial_denoise_frame(VP9_COMP *cpi) { } #endif // ENABLE_KF_DENOISE +#if !CONFIG_REALTIME_ONLY static void vp9_try_disable_lookahead_aq(VP9_COMP *cpi, size_t *size, uint8_t *dest) { if (cpi->common.seg.enabled) @@ -4351,6 +4871,207 @@ static void vp9_try_disable_lookahead_aq(VP9_COMP *cpi, size_t *size, vp9_enable_segmentation(&cpi->common.seg); } } +#endif + +static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) { + RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx); + + if (ref_buffer) { + const GF_GROUP *const gf_group = &cpi->twopass.gf_group; + ref_buffer->frame_index = + cm->current_video_frame + gf_group->arf_src_offset[gf_group->index]; + } +} + +static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + ThreadData *td = &cpi->td; + MACROBLOCK *x = &td->mb; + MACROBLOCKD *xd = &x->e_mbd; + uint8_t *y_buffer = cpi->Source->y_buffer; + const int y_stride = cpi->Source->y_stride; + const int block_size = BLOCK_16X16; + + const int num_8x8_w = num_8x8_blocks_wide_lookup[block_size]; + const int num_8x8_h = num_8x8_blocks_high_lookup[block_size]; + const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w; + const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h; + double log_sum = 0.0; + int row, col; + + // Loop through each 64x64 block. + for (row = 0; row < num_rows; ++row) { + for (col = 0; col < num_cols; ++col) { + int mi_row, mi_col; + double var = 0.0, num_of_var = 0.0; + const int index = row * num_cols + col; + + for (mi_row = row * num_8x8_h; + mi_row < cm->mi_rows && mi_row < (row + 1) * num_8x8_h; ++mi_row) { + for (mi_col = col * num_8x8_w; + mi_col < cm->mi_cols && mi_col < (col + 1) * num_8x8_w; ++mi_col) { + struct buf_2d buf; + const int row_offset_y = mi_row << 3; + const int col_offset_y = mi_col << 3; + + buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y; + buf.stride = y_stride; + + // In order to make SSIM_VAR_SCALE in a same scale for both 8 bit + // and high bit videos, the variance needs to be divided by 2.0 or + // 64.0 separately. + // TODO(sdeng): need to tune for 12bit videos. +#if CONFIG_VP9_HIGHBITDEPTH + if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) + var += vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd); + else +#endif + var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8); + + num_of_var += 1.0; + } + } + var = var / num_of_var / 64.0; + + // Curve fitting with an exponential model on all 16x16 blocks from the + // Midres dataset. + var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222; + cpi->mi_ssim_rdmult_scaling_factors[index] = var; + log_sum += log(var); + } + } + log_sum = exp(log_sum / (double)(num_rows * num_cols)); + + for (row = 0; row < num_rows; ++row) { + for (col = 0; col < num_cols; ++col) { + const int index = row * num_cols + col; + cpi->mi_ssim_rdmult_scaling_factors[index] /= log_sum; + } + } + + (void)xd; +} + +// Process the wiener variance in 16x16 block basis. +static int qsort_comp(const void *elem1, const void *elem2) { + int a = *((const int *)elem1); + int b = *((const int *)elem2); + if (a > b) return 1; + if (a < b) return -1; + return 0; +} + +static void init_mb_wiener_var_buffer(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + + if (cpi->mb_wiener_variance && cpi->mb_wiener_var_rows >= cm->mb_rows && + cpi->mb_wiener_var_cols >= cm->mb_cols) + return; + + vpx_free(cpi->mb_wiener_variance); + cpi->mb_wiener_variance = NULL; + + CHECK_MEM_ERROR( + cm, cpi->mb_wiener_variance, + vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->mb_wiener_variance))); + cpi->mb_wiener_var_rows = cm->mb_rows; + cpi->mb_wiener_var_cols = cm->mb_cols; +} + +static void set_mb_wiener_variance(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + uint8_t *buffer = cpi->Source->y_buffer; + int buf_stride = cpi->Source->y_stride; + +#if CONFIG_VP9_HIGHBITDEPTH + ThreadData *td = &cpi->td; + MACROBLOCK *x = &td->mb; + MACROBLOCKD *xd = &x->e_mbd; + DECLARE_ALIGNED(16, uint16_t, zero_pred16[32 * 32]); + DECLARE_ALIGNED(16, uint8_t, zero_pred8[32 * 32]); + uint8_t *zero_pred; +#else + DECLARE_ALIGNED(16, uint8_t, zero_pred[32 * 32]); +#endif + + DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]); + DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]); + + int mb_row, mb_col, count = 0; + // Hard coded operating block size + const int block_size = 16; + const int coeff_count = block_size * block_size; + const TX_SIZE tx_size = TX_16X16; + +#if CONFIG_VP9_HIGHBITDEPTH + xd->cur_buf = cpi->Source; + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + zero_pred = CONVERT_TO_BYTEPTR(zero_pred16); + memset(zero_pred16, 0, sizeof(*zero_pred16) * coeff_count); + } else { + zero_pred = zero_pred8; + memset(zero_pred8, 0, sizeof(*zero_pred8) * coeff_count); + } +#else + memset(zero_pred, 0, sizeof(*zero_pred) * coeff_count); +#endif + + cpi->norm_wiener_variance = 0; + + for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { + for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { + int idx; + int16_t median_val = 0; + uint8_t *mb_buffer = + buffer + mb_row * block_size * buf_stride + mb_col * block_size; + int64_t wiener_variance = 0; + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vpx_highbd_subtract_block(block_size, block_size, src_diff, block_size, + mb_buffer, buf_stride, zero_pred, block_size, + xd->bd); + highbd_wht_fwd_txfm(src_diff, block_size, coeff, tx_size); + } else { + vpx_subtract_block(block_size, block_size, src_diff, block_size, + mb_buffer, buf_stride, zero_pred, block_size); + wht_fwd_txfm(src_diff, block_size, coeff, tx_size); + } +#else + vpx_subtract_block(block_size, block_size, src_diff, block_size, + mb_buffer, buf_stride, zero_pred, block_size); + wht_fwd_txfm(src_diff, block_size, coeff, tx_size); +#endif // CONFIG_VP9_HIGHBITDEPTH + + coeff[0] = 0; + for (idx = 1; idx < coeff_count; ++idx) coeff[idx] = abs(coeff[idx]); + + qsort(coeff, coeff_count - 1, sizeof(*coeff), qsort_comp); + + // Noise level estimation + median_val = coeff[coeff_count / 2]; + + // Wiener filter + for (idx = 1; idx < coeff_count; ++idx) { + int64_t sqr_coeff = (int64_t)coeff[idx] * coeff[idx]; + int64_t tmp_coeff = (int64_t)coeff[idx]; + if (median_val) { + tmp_coeff = (sqr_coeff * coeff[idx]) / + (sqr_coeff + (int64_t)median_val * median_val); + } + wiener_variance += tmp_coeff * tmp_coeff; + } + cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col] = + wiener_variance / coeff_count; + cpi->norm_wiener_variance += + cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col]; + ++count; + } + } + + if (count) cpi->norm_wiener_variance /= count; + cpi->norm_wiener_variance = VPXMAX(1, cpi->norm_wiener_variance); +} static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, uint8_t *dest, @@ -4360,6 +5081,27 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, struct segmentation *const seg = &cm->seg; TX_SIZE t; + // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0. + // No need to set svc.skip_enhancement_layer if whole superframe will be + // dropped. + if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 && + cpi->oxcf.target_bandwidth == 0 && + !(cpi->svc.framedrop_mode != LAYER_DROP && + (cpi->svc.framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP || + cpi->svc + .force_drop_constrained_from_above[cpi->svc.number_spatial_layers - + 1]) && + cpi->svc.drop_spatial_layer[0])) { + cpi->svc.skip_enhancement_layer = 1; + vp9_rc_postencode_update_drop_frame(cpi); + cpi->ext_refresh_frame_flags_pending = 0; + cpi->last_frame_dropped = 1; + cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1; + cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1; + vp9_inc_frame_in_layer(cpi); + return; + } + set_ext_overrides(cpi); vpx_clear_system_state(); @@ -4368,8 +5110,13 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, if (is_spatial_denoise_enabled(cpi)) spatial_denoise_frame(cpi); #endif - // Set the arf sign bias for this frame. - set_arf_sign_bias(cpi); + if (cm->show_existing_frame == 0) { + // Update frame index + set_frame_index(cpi, cm); + + // Set the arf sign bias for this frame. + set_ref_sign_bias(cpi); + } // Set default state for segment based loop filter update flags. cm->lf.mode_ref_delta_update = 0; @@ -4404,66 +5151,12 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, cm->reset_frame_context = 2; } } - if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) { - // Use context 0 for intra only empty frame, but the last frame context - // for other empty frames. - if (cpi->svc.encode_empty_frame_state == ENCODING) { - if (cpi->svc.encode_intra_empty_frame != 0) - cm->frame_context_idx = 0; - else - cm->frame_context_idx = FRAME_CONTEXTS - 1; - } else { - cm->frame_context_idx = - cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers + - cpi->svc.temporal_layer_id; - } - cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode; + if (oxcf->tuning == VP8_TUNE_SSIM) set_mb_ssim_rdmult_scaling(cpi); - // The probs will be updated based on the frame type of its previous - // frame if frame_parallel_decoding_mode is 0. The type may vary for - // the frame after a key frame in base layer since we may drop enhancement - // layers. So set frame_parallel_decoding_mode to 1 in this case. - if (cm->frame_parallel_decoding_mode == 0) { - if (cpi->svc.number_temporal_layers == 1) { - if (cpi->svc.spatial_layer_id == 0 && - cpi->svc.layer_context[0].last_frame_type == KEY_FRAME) - cm->frame_parallel_decoding_mode = 1; - } else if (cpi->svc.spatial_layer_id == 0) { - // Find the 2nd frame in temporal base layer and 1st frame in temporal - // enhancement layers from the key frame. - int i; - for (i = 0; i < cpi->svc.number_temporal_layers; ++i) { - if (cpi->svc.layer_context[0].frames_from_key_frame == 1 << i) { - cm->frame_parallel_decoding_mode = 1; - break; - } - } - } - } - } - - // For 1 pass CBR, check if we are dropping this frame. - // For spatial layers, for now only check for frame-dropping on first spatial - // layer, and if decision is to drop, we drop whole super-frame. - if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR && - cm->frame_type != KEY_FRAME) { - if (vp9_rc_drop_frame(cpi) || - (is_one_pass_cbr_svc(cpi) && cpi->svc.rc_drop_superframe == 1)) { - vp9_rc_postencode_update_drop_frame(cpi); - ++cm->current_video_frame; - cpi->ext_refresh_frame_flags_pending = 0; - cpi->svc.rc_drop_superframe = 1; - cpi->last_frame_dropped = 1; - // TODO(marpan): Advancing the svc counters on dropped frames can break - // the referencing scheme for the fixed svc patterns defined in - // vp9_one_pass_cbr_svc_start_layer(). Look into fixing this issue, but - // for now, don't advance the svc frame counters on dropped frame. - // if (cpi->use_svc) - // vp9_inc_frame_in_layer(cpi); - - return; - } + if (oxcf->aq_mode == PERCEPTUAL_AQ) { + init_mb_wiener_var_buffer(cpi); + set_mb_wiener_variance(cpi); } vpx_clear_system_state(); @@ -4472,18 +5165,33 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, memset(cpi->mode_chosen_counts, 0, MAX_MODES * sizeof(*cpi->mode_chosen_counts)); #endif +#if CONFIG_CONSISTENT_RECODE + // Backup to ensure consistency between recodes + save_encode_params(cpi); +#endif if (cpi->sf.recode_loop == DISALLOW_RECODE) { - encode_without_recode_loop(cpi, size, dest); + if (!encode_without_recode_loop(cpi, size, dest)) return; } else { +#if !CONFIG_REALTIME_ONLY encode_with_recode_loop(cpi, size, dest); +#endif } - cpi->last_frame_dropped = 0; + // TODO(jingning): When using show existing frame mode, we assume that the + // current ARF will be directly used as the final reconstructed frame. This is + // an encoder control scheme. One could in principle explore other + // possibilities to arrange the reference frame buffer and their coding order. + if (cm->show_existing_frame) { + ref_cnt_fb(cm->buffer_pool->frame_bufs, &cm->new_fb_idx, + cm->ref_frame_map[cpi->alt_fb_idx]); + } +#if !CONFIG_REALTIME_ONLY // Disable segmentation if it decrease rate/distortion ratio if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) vp9_try_disable_lookahead_aq(cpi, size, dest); +#endif #if CONFIG_VP9_TEMPORAL_DENOISING #ifdef OUTPUT_YUV_DENOISED @@ -4527,9 +5235,33 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, // Pick the loop filter level for the frame. loopfilter_frame(cpi, cm); + if (cpi->rc.use_post_encode_drop) save_coding_context(cpi); + // build the bitstream vp9_pack_bitstream(cpi, dest, size); + if (cpi->rc.use_post_encode_drop && cm->base_qindex < cpi->rc.worst_quality && + cpi->svc.spatial_layer_id == 0 && post_encode_drop_cbr(cpi, size)) { + restore_coding_context(cpi); + return; + } + + cpi->last_frame_dropped = 0; + cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0; + if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) + cpi->svc.num_encoded_top_layer++; + + // Keep track of the frame buffer index updated/refreshed for the + // current encoded TL0 superframe. + if (cpi->svc.temporal_layer_id == 0) { + if (cpi->refresh_last_frame) + cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx; + else if (cpi->refresh_golden_frame) + cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx; + else if (cpi->refresh_alt_ref_frame) + cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx; + } + if (cm->seg.update_map) update_reference_segmentation_map(cpi); if (frame_is_intra_only(cm) == 0) { @@ -4537,17 +5269,18 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, } vp9_update_reference_frames(cpi); - for (t = TX_4X4; t <= TX_32X32; t++) - full_to_model_counts(cpi->td.counts->coef[t], - cpi->td.rd_counts.coef_counts[t]); + if (!cm->show_existing_frame) { + for (t = TX_4X4; t <= TX_32X32; ++t) { + full_to_model_counts(cpi->td.counts->coef[t], + cpi->td.rd_counts.coef_counts[t]); + } - if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) - vp9_adapt_coef_probs(cm); - - if (!frame_is_intra_only(cm)) { if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { - vp9_adapt_mode_probs(cm); - vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); + if (!frame_is_intra_only(cm)) { + vp9_adapt_mode_probs(cm); + vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); + } + vp9_adapt_coef_probs(cm); } } @@ -4567,8 +5300,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, cm->last_frame_type = cm->frame_type; - if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING)) - vp9_rc_postencode_update(cpi, *size); + vp9_rc_postencode_update(cpi, *size); + + *size = VPXMAX(1, *size); #if 0 output_frame_level_debug_stats(cpi); @@ -4592,7 +5326,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, cm->last_height = cm->height; // reset to normal state now that we are done. - if (!cm->show_existing_frame) cm->last_show_frame = cm->show_frame; + if (!cm->show_existing_frame) { + cm->last_show_frame = cm->show_frame; + cm->prev_frame = cm->cur_frame; + } if (cm->show_frame) { vp9_swap_mi_and_prev_mi(cm); @@ -4601,19 +5338,26 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, ++cm->current_video_frame; if (cpi->use_svc) vp9_inc_frame_in_layer(cpi); } - cm->prev_frame = cm->cur_frame; - if (cpi->use_svc) + if (cpi->use_svc) { cpi->svc .layer_context[cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id] .last_frame_type = cm->frame_type; + // Reset layer_sync back to 0 for next frame. + cpi->svc.spatial_layer_sync[cpi->svc.spatial_layer_id] = 0; + } cpi->force_update_segmentation = 0; +#if !CONFIG_REALTIME_ONLY if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) vp9_alt_ref_aq_unset_all(cpi->alt_ref_aq, cpi); +#endif + + cpi->svc.previous_frame_is_intra_only = cm->intra_only; + cpi->svc.set_intra_only_frame = 0; } static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest, @@ -4636,54 +5380,13 @@ static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags) { cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; +#if CONFIG_MISMATCH_DEBUG + mismatch_move_frame_idx_w(); +#endif encode_frame_to_data_rate(cpi, size, dest, frame_flags); - - if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING)) - vp9_twopass_postencode_update(cpi); } #endif // !CONFIG_REALTIME_ONLY -static void init_ref_frame_bufs(VP9_COMMON *cm) { - int i; - BufferPool *const pool = cm->buffer_pool; - cm->new_fb_idx = INVALID_IDX; - for (i = 0; i < REF_FRAMES; ++i) { - cm->ref_frame_map[i] = INVALID_IDX; - pool->frame_bufs[i].ref_count = 0; - } -} - -static void check_initial_width(VP9_COMP *cpi, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int subsampling_x, int subsampling_y) { - VP9_COMMON *const cm = &cpi->common; - - if (!cpi->initial_width || -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth != use_highbitdepth || -#endif - cm->subsampling_x != subsampling_x || - cm->subsampling_y != subsampling_y) { - cm->subsampling_x = subsampling_x; - cm->subsampling_y = subsampling_y; -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth = use_highbitdepth; -#endif - - alloc_raw_frame_buffers(cpi); - init_ref_frame_bufs(cm); - alloc_util_frame_buffers(cpi); - - init_motion_estimation(cpi); // TODO(agrange) This can be removed. - - cpi->initial_width = cm->width; - cpi->initial_height = cm->height; - cpi->initial_mbs = cm->MBs; - } -} - int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time) { @@ -4694,24 +5397,21 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags, const int subsampling_y = sd->subsampling_y; #if CONFIG_VP9_HIGHBITDEPTH const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0; +#else + const int use_highbitdepth = 0; #endif -#if CONFIG_VP9_HIGHBITDEPTH - check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y); -#else - check_initial_width(cpi, subsampling_x, subsampling_y); -#endif // CONFIG_VP9_HIGHBITDEPTH - + update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y); #if CONFIG_VP9_TEMPORAL_DENOISING setup_denoiser_buffer(cpi); #endif + + alloc_raw_frame_buffers(cpi); + vpx_usec_timer_start(&timer); if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, -#if CONFIG_VP9_HIGHBITDEPTH - use_highbitdepth, -#endif // CONFIG_VP9_HIGHBITDEPTH - frame_flags)) + use_highbitdepth, frame_flags)) res = -1; vpx_usec_timer_mark(&timer); cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); @@ -4822,10 +5522,6 @@ static void check_src_altref(VP9_COMP *cpi, } #if CONFIG_INTERNAL_STATS -extern double vp9_get_blockiness(const uint8_t *img1, int img1_pitch, - const uint8_t *img2, int img2_pitch, int width, - int height); - static void adjust_image_stat(double y, double u, double v, double all, ImageStat *s) { s->stat[Y] += y; @@ -5065,9 +5761,1373 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) { } } +typedef struct GF_PICTURE { + YV12_BUFFER_CONFIG *frame; + int ref_frame[3]; + FRAME_UPDATE_TYPE update_type; +} GF_PICTURE; + +static void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture, + const GF_GROUP *gf_group, int *tpl_group_frames) { + VP9_COMMON *cm = &cpi->common; + int frame_idx = 0; + int i; + int gld_index = -1; + int alt_index = -1; + int lst_index = -1; + int arf_index_stack[MAX_ARF_LAYERS]; + int arf_stack_size = 0; + int extend_frame_count = 0; + int pframe_qindex = cpi->tpl_stats[2].base_qindex; + int frame_gop_offset = 0; + + RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs; + int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS]; + + memset(recon_frame_index, -1, sizeof(recon_frame_index)); + stack_init(arf_index_stack, MAX_ARF_LAYERS); + + // TODO(jingning): To be used later for gf frame type parsing. + (void)gf_group; + + for (i = 0; i < FRAME_BUFFERS; ++i) { + if (frame_bufs[i].ref_count == 0) { + alloc_frame_mvs(cm, i); + if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif + VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, + NULL, NULL, NULL)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate frame buffer"); + + recon_frame_index[frame_idx] = i; + ++frame_idx; + + if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break; + } + } + + for (i = 0; i < REFS_PER_FRAME + 1; ++i) { + assert(recon_frame_index[i] >= 0); + cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf; + } + + *tpl_group_frames = 0; + + // Initialize Golden reference frame. + gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME); + for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1; + gf_picture[0].update_type = gf_group->update_type[0]; + gld_index = 0; + ++*tpl_group_frames; + + // Initialize base layer ARF frame + gf_picture[1].frame = cpi->Source; + gf_picture[1].ref_frame[0] = gld_index; + gf_picture[1].ref_frame[1] = lst_index; + gf_picture[1].ref_frame[2] = alt_index; + gf_picture[1].update_type = gf_group->update_type[1]; + alt_index = 1; + ++*tpl_group_frames; + + // Initialize P frames + for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) { + struct lookahead_entry *buf; + frame_gop_offset = gf_group->frame_gop_index[frame_idx]; + buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1); + + if (buf == NULL) break; + + gf_picture[frame_idx].frame = &buf->img; + gf_picture[frame_idx].ref_frame[0] = gld_index; + gf_picture[frame_idx].ref_frame[1] = lst_index; + gf_picture[frame_idx].ref_frame[2] = alt_index; + gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx]; + + switch (gf_group->update_type[frame_idx]) { + case ARF_UPDATE: + stack_push(arf_index_stack, alt_index, arf_stack_size); + ++arf_stack_size; + alt_index = frame_idx; + break; + case LF_UPDATE: lst_index = frame_idx; break; + case OVERLAY_UPDATE: + gld_index = frame_idx; + alt_index = stack_pop(arf_index_stack, arf_stack_size); + --arf_stack_size; + break; + case USE_BUF_FRAME: + lst_index = alt_index; + alt_index = stack_pop(arf_index_stack, arf_stack_size); + --arf_stack_size; + break; + default: break; + } + + ++*tpl_group_frames; + + // The length of group of pictures is baseline_gf_interval, plus the + // beginning golden frame from last GOP, plus the last overlay frame in + // the same GOP. + if (frame_idx == gf_group->gf_group_size) break; + } + + alt_index = -1; + ++frame_idx; + ++frame_gop_offset; + + // Extend two frames outside the current gf group. + for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) { + struct lookahead_entry *buf = + vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1); + + if (buf == NULL) break; + + cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex; + + gf_picture[frame_idx].frame = &buf->img; + gf_picture[frame_idx].ref_frame[0] = gld_index; + gf_picture[frame_idx].ref_frame[1] = lst_index; + gf_picture[frame_idx].ref_frame[2] = alt_index; + gf_picture[frame_idx].update_type = LF_UPDATE; + lst_index = frame_idx; + ++*tpl_group_frames; + ++extend_frame_count; + ++frame_gop_offset; + } +} + +static void init_tpl_stats(VP9_COMP *cpi) { + int frame_idx; + for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) { + TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; + memset(tpl_frame->tpl_stats_ptr, 0, + tpl_frame->height * tpl_frame->width * + sizeof(*tpl_frame->tpl_stats_ptr)); + tpl_frame->is_valid = 0; + } +} + +#if CONFIG_NON_GREEDY_MV +static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td, + MotionField *motion_field, + int frame_idx, uint8_t *cur_frame_buf, + uint8_t *ref_frame_buf, int stride, + BLOCK_SIZE bsize, int mi_row, + int mi_col, MV *mv) { + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; + int step_param; + uint32_t bestsme = UINT_MAX; + const MvLimits tmp_mv_limits = x->mv_limits; + // lambda is used to adjust the importance of motion vector consitency. + // TODO(angiebird): Figure out lambda's proper value. + const int lambda = cpi->tpl_stats[frame_idx].lambda; + int_mv nb_full_mvs[NB_MVS_NUM]; + int nb_full_mv_num; + + MV best_ref_mv1 = { 0, 0 }; + MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ + + best_ref_mv1_full.col = best_ref_mv1.col >> 3; + best_ref_mv1_full.row = best_ref_mv1.row >> 3; + + // Setup frame pointers + x->plane[0].src.buf = cur_frame_buf; + x->plane[0].src.stride = stride; + xd->plane[0].pre[0].buf = ref_frame_buf; + xd->plane[0].pre[0].stride = stride; + + step_param = mv_sf->reduce_first_step_size; + step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2); + + vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); + + nb_full_mv_num = + vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs); + vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param, + lambda, 1, nb_full_mvs, nb_full_mv_num, mv); + + /* restore UMV window */ + x->mv_limits = tmp_mv_limits; + + return bestsme; +} + +static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td, + uint8_t *cur_frame_buf, + uint8_t *ref_frame_buf, int stride, + BLOCK_SIZE bsize, MV *mv) { + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; + uint32_t bestsme = UINT_MAX; + uint32_t distortion; + uint32_t sse; + int cost_list[5]; + + MV best_ref_mv1 = { 0, 0 }; + + // Setup frame pointers + x->plane[0].src.buf = cur_frame_buf; + x->plane[0].src.stride = stride; + xd->plane[0].pre[0].buf = ref_frame_buf; + xd->plane[0].pre[0].stride = stride; + + // TODO(yunqing): may use higher tap interp filter than 2 taps. + // Ignore mv costing by sending NULL pointer instead of cost array + bestsme = cpi->find_fractional_mv_step( + x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit, + &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level, + cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0, + USE_2_TAPS); + + return bestsme; +} + +#else // CONFIG_NON_GREEDY_MV +static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, + uint8_t *cur_frame_buf, + uint8_t *ref_frame_buf, + int stride, BLOCK_SIZE bsize, + MV *mv) { + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; + const SEARCH_METHODS search_method = NSTEP; + int step_param; + int sadpb = x->sadperbit16; + uint32_t bestsme = UINT_MAX; + uint32_t distortion; + uint32_t sse; + int cost_list[5]; + const MvLimits tmp_mv_limits = x->mv_limits; + + MV best_ref_mv1 = { 0, 0 }; + MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ + + best_ref_mv1_full.col = best_ref_mv1.col >> 3; + best_ref_mv1_full.row = best_ref_mv1.row >> 3; + + // Setup frame pointers + x->plane[0].src.buf = cur_frame_buf; + x->plane[0].src.stride = stride; + xd->plane[0].pre[0].buf = ref_frame_buf; + xd->plane[0].pre[0].stride = stride; + + step_param = mv_sf->reduce_first_step_size; + step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2); + + vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); + + vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param, + search_method, sadpb, cond_cost_list(cpi, cost_list), + &best_ref_mv1, mv, 0, 0); + + /* restore UMV window */ + x->mv_limits = tmp_mv_limits; + + // TODO(yunqing): may use higher tap interp filter than 2 taps. + // Ignore mv costing by sending NULL pointer instead of cost array + bestsme = cpi->find_fractional_mv_step( + x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit, + &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level, + cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0, + USE_2_TAPS); + + return bestsme; +} +#endif + +static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row, + int ref_pos_col, int block, BLOCK_SIZE bsize) { + int width = 0, height = 0; + int bw = 4 << b_width_log2_lookup[bsize]; + int bh = 4 << b_height_log2_lookup[bsize]; + + switch (block) { + case 0: + width = grid_pos_col + bw - ref_pos_col; + height = grid_pos_row + bh - ref_pos_row; + break; + case 1: + width = ref_pos_col + bw - grid_pos_col; + height = grid_pos_row + bh - ref_pos_row; + break; + case 2: + width = grid_pos_col + bw - ref_pos_col; + height = ref_pos_row + bh - grid_pos_row; + break; + case 3: + width = ref_pos_col + bw - grid_pos_col; + height = ref_pos_row + bh - grid_pos_row; + break; + default: assert(0); + } + + return width * height; +} + +static int round_floor(int ref_pos, int bsize_pix) { + int round; + if (ref_pos < 0) + round = -(1 + (-ref_pos - 1) / bsize_pix); + else + round = ref_pos / bsize_pix; + + return round; +} + +static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col, + BLOCK_SIZE bsize, int stride) { + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col]; + int idx, idy; + + for (idy = 0; idy < mi_height; ++idy) { + for (idx = 0; idx < mi_width; ++idx) { + TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx]; + const int64_t mc_flow = tpl_ptr->mc_flow; + const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost; + *tpl_ptr = *src_stats; + tpl_ptr->mc_flow = mc_flow; + tpl_ptr->mc_ref_cost = mc_ref_cost; + tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow; + } + } +} + +static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats, + int mi_row, int mi_col, const BLOCK_SIZE bsize) { + TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index]; + TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr; + MV mv = tpl_stats->mv.as_mv; + int mv_row = mv.row >> 3; + int mv_col = mv.col >> 3; + + int ref_pos_row = mi_row * MI_SIZE + mv_row; + int ref_pos_col = mi_col * MI_SIZE + mv_col; + + const int bw = 4 << b_width_log2_lookup[bsize]; + const int bh = 4 << b_height_log2_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int pix_num = bw * bh; + + // top-left on grid block location in pixel + int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh; + int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw; + int block; + + for (block = 0; block < 4; ++block) { + int grid_pos_row = grid_pos_row_base + bh * (block >> 1); + int grid_pos_col = grid_pos_col_base + bw * (block & 0x01); + + if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE && + grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) { + int overlap_area = get_overlap_area( + grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize); + int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height; + int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width; + + int64_t mc_flow = tpl_stats->mc_dep_cost - + (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) / + tpl_stats->intra_cost; + + int idx, idy; + + for (idy = 0; idy < mi_height; ++idy) { + for (idx = 0; idx < mi_width; ++idx) { + TplDepStats *des_stats = + &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride + + (ref_mi_col + idx)]; + + des_stats->mc_flow += (mc_flow * overlap_area) / pix_num; + des_stats->mc_ref_cost += + ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) / + pix_num; + assert(overlap_area >= 0); + } + } + } + } +} + +static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats, + int mi_row, int mi_col, const BLOCK_SIZE bsize) { + int idx, idy; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + + for (idy = 0; idy < mi_height; ++idy) { + for (idx = 0; idx < mi_width; ++idx) { + TplDepStats *tpl_ptr = + &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)]; + tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx, + BLOCK_8X8); + } + } +} + +static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff, + tran_low_t *qcoeff, tran_low_t *dqcoeff, + TX_SIZE tx_size, int64_t *recon_error, + int64_t *sse) { + MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblock_plane *const p = &x->plane[plane]; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; + uint16_t eob; + int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]; + const int shift = tx_size == TX_32X32 ? 0 : 2; + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_highbd_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, + p->quant_fp, qcoeff, dqcoeff, pd->dequant, + &eob, scan_order->scan, scan_order->iscan); + } else { + vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, + p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob, + scan_order->scan, scan_order->iscan); + } +#else + vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp, + qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan, + scan_order->iscan); +#endif // CONFIG_VP9_HIGHBITDEPTH + + *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift; + *recon_error = VPXMAX(*recon_error, 1); + + *sse = (*sse) >> shift; + *sse = VPXMAX(*sse, 1); +} + +#if CONFIG_VP9_HIGHBITDEPTH +void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, + TX_SIZE tx_size) { + // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms. + switch (tx_size) { + case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break; + case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break; + case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break; + default: assert(0); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, + TX_SIZE tx_size) { + switch (tx_size) { + case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break; + case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break; + case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break; + default: assert(0); + } +} + +static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row, + int mi_col) { + x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND)); + x->mv_limits.row_max = + (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND); + x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND)); + x->mv_limits.col_max = + ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND); +} + +static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, + struct scale_factors *sf, GF_PICTURE *gf_picture, + int frame_idx, TplDepFrame *tpl_frame, + int16_t *src_diff, tran_low_t *coeff, + tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row, + int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size, + YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor, + int64_t *recon_error, int64_t *sse) { + VP9_COMMON *cm = &cpi->common; + ThreadData *td = &cpi->td; + + const int bw = 4 << b_width_log2_lookup[bsize]; + const int bh = 4 << b_height_log2_lookup[bsize]; + const int pix_num = bw * bh; + int best_rf_idx = -1; + int_mv best_mv; + int64_t best_inter_cost = INT64_MAX; + int64_t inter_cost; + int rf_idx; + const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP]; + + int64_t best_intra_cost = INT64_MAX; + int64_t intra_cost; + PREDICTION_MODE mode; + int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; + MODE_INFO mi_above, mi_left; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + TplDepStats *tpl_stats = + &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; + + xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); + xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8; + xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); + xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8; + xd->above_mi = (mi_row > 0) ? &mi_above : NULL; + xd->left_mi = (mi_col > 0) ? &mi_left : NULL; + + // Intra prediction search + for (mode = DC_PRED; mode <= TM_PRED; ++mode) { + uint8_t *src, *dst; + int src_stride, dst_stride; + + src = xd->cur_buf->y_buffer + mb_y_offset; + src_stride = xd->cur_buf->y_stride; + + dst = &predictor[0]; + dst_stride = bw; + + xd->mi[0]->sb_type = bsize; + xd->mi[0]->ref_frame[0] = INTRA_FRAME; + + vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src, + src_stride, dst, dst_stride, 0, 0, 0); + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, + dst_stride, xd->bd); + highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size); + intra_cost = vpx_highbd_satd(coeff, pix_num); + } else { + vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, + dst_stride); + wht_fwd_txfm(src_diff, bw, coeff, tx_size); + intra_cost = vpx_satd(coeff, pix_num); + } +#else + vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride); + wht_fwd_txfm(src_diff, bw, coeff, tx_size); + intra_cost = vpx_satd(coeff, pix_num); +#endif // CONFIG_VP9_HIGHBITDEPTH + + if (intra_cost < best_intra_cost) best_intra_cost = intra_cost; + } + + // Motion compensated prediction + best_mv.as_int = 0; + + set_mv_limits(cm, x, mi_row, mi_col); + + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { + int_mv mv; +#if CONFIG_NON_GREEDY_MV + MotionField *motion_field; +#endif + if (ref_frame[rf_idx] == NULL) continue; + +#if CONFIG_NON_GREEDY_MV + (void)td; + motion_field = vp9_motion_field_info_get_motion_field( + &cpi->motion_field_info, frame_idx, rf_idx, bsize); + mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col); +#else + motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset, + ref_frame[rf_idx]->y_buffer + mb_y_offset, + xd->cur_buf->y_stride, bsize, &mv.as_mv); +#endif + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_highbd_build_inter_predictor( + CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset), + ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw, + &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, + mi_row * MI_SIZE, xd->bd); + vpx_highbd_subtract_block( + bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset, + xd->cur_buf->y_stride, &predictor[0], bw, xd->bd); + highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size); + inter_cost = vpx_highbd_satd(coeff, pix_num); + } else { + vp9_build_inter_predictor( + ref_frame[rf_idx]->y_buffer + mb_y_offset, + ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh, + 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE); + vpx_subtract_block(bh, bw, src_diff, bw, + xd->cur_buf->y_buffer + mb_y_offset, + xd->cur_buf->y_stride, &predictor[0], bw); + wht_fwd_txfm(src_diff, bw, coeff, tx_size); + inter_cost = vpx_satd(coeff, pix_num); + } +#else + vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset, + ref_frame[rf_idx]->y_stride, &predictor[0], bw, + &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, + mi_col * MI_SIZE, mi_row * MI_SIZE); + vpx_subtract_block(bh, bw, src_diff, bw, + xd->cur_buf->y_buffer + mb_y_offset, + xd->cur_buf->y_stride, &predictor[0], bw); + wht_fwd_txfm(src_diff, bw, coeff, tx_size); + inter_cost = vpx_satd(coeff, pix_num); +#endif + + if (inter_cost < best_inter_cost) { + best_rf_idx = rf_idx; + best_inter_cost = inter_cost; + best_mv.as_int = mv.as_int; + get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error, + sse); + } + } + best_intra_cost = VPXMAX(best_intra_cost, 1); + best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost); + tpl_stats->inter_cost = VPXMAX( + 1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width)); + tpl_stats->intra_cost = VPXMAX( + 1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width)); + tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx]; + tpl_stats->mv.as_int = best_mv.as_int; +} + +#if CONFIG_NON_GREEDY_MV +static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture, + int frame_idx, int rf_idx, int mi_row, + int mi_col, struct buf_2d *src, + struct buf_2d *pre) { + const int mb_y_offset = + mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; + YV12_BUFFER_CONFIG *ref_frame = NULL; + int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx]; + if (ref_frame_idx != -1) { + ref_frame = gf_picture[ref_frame_idx].frame; + src->buf = xd->cur_buf->y_buffer + mb_y_offset; + src->stride = xd->cur_buf->y_stride; + pre->buf = ref_frame->y_buffer + mb_y_offset; + pre->stride = ref_frame->y_stride; + assert(src->stride == pre->stride); + return 1; + } else { + printf("invalid ref_frame_idx"); + assert(ref_frame_idx != -1); + return 0; + } +} + +#define kMvPreCheckLines 5 +#define kMvPreCheckSize 15 + +#define MV_REF_POS_NUM 3 +POSITION mv_ref_pos[MV_REF_POS_NUM] = { + { -1, 0 }, + { 0, -1 }, + { -1, -1 }, +}; + +static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row, + int mi_col) { + return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col]; +} + +static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + int i; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + int_mv nearest_mv, near_mv, invalid_mv; + nearest_mv.as_int = INVALID_MV; + near_mv.as_int = INVALID_MV; + invalid_mv.as_int = INVALID_MV; + for (i = 0; i < MV_REF_POS_NUM; ++i) { + int nb_row = mi_row + mv_ref_pos[i].row * mi_height; + int nb_col = mi_col + mv_ref_pos[i].col * mi_width; + assert(mv_ref_pos[i].row <= 0); + assert(mv_ref_pos[i].col <= 0); + if (nb_row >= 0 && nb_col >= 0) { + if (nearest_mv.as_int == INVALID_MV) { + nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col); + } else { + int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col); + if (mv.as_int == nearest_mv.as_int) { + continue; + } else { + near_mv = mv; + break; + } + } + } + } + if (nearest_mv.as_int == INVALID_MV) { + nearest_mv.as_mv.row = 0; + nearest_mv.as_mv.col = 0; + } + if (near_mv.as_int == INVALID_MV) { + near_mv.as_mv.row = 0; + near_mv.as_mv.col = 0; + } + if (mv_mode == NEAREST_MV_MODE) { + return nearest_mv; + } + if (mv_mode == NEAR_MV_MODE) { + return near_mv; + } + assert(0); + return invalid_mv; +} + +static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi, + MotionField *motion_field, + TplDepFrame *tpl_frame, BLOCK_SIZE bsize, + int mi_row, int mi_col) { + int_mv mv; + switch (mv_mode) { + case ZERO_MV_MODE: + mv.as_mv.row = 0; + mv.as_mv.col = 0; + break; + case NEW_MV_MODE: + mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col); + break; + case NEAREST_MV_MODE: + mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col); + break; + case NEAR_MV_MODE: + mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col); + break; + default: + mv.as_int = INVALID_MV; + assert(0); + break; + } + return mv; +} + +static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd, + GF_PICTURE *gf_picture, MotionField *motion_field, + int frame_idx, TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int_mv *mv) { + uint32_t sse; + struct buf_2d src; + struct buf_2d pre; + MV full_mv; + *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize, + mi_row, mi_col); + full_mv = get_full_mv(&mv->as_mv); + if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col, + &src, &pre)) { + // TODO(angiebird): Consider subpixel when computing the sse. + cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv), + pre.stride, &sse); + return (double)(sse << VP9_DIST_SCALE_LOG2); + } else { + assert(0); + return 0; + } +} + +static int get_mv_mode_cost(int mv_mode) { + // TODO(angiebird): The probabilities are roughly inferred from + // default_inter_mode_probs. Check if there is a better way to set the + // probabilities. + const int zero_mv_prob = 16; + const int new_mv_prob = 24 * 1; + const int ref_mv_prob = 256 - zero_mv_prob - new_mv_prob; + assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256); + switch (mv_mode) { + case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break; + case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break; + case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break; + case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break; + default: assert(0); return -1; + } +} + +static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) { + double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) + + log2(1 + abs(new_mv->col - ref_mv->col)); + mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT); + return mv_diff_cost; +} +static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field, + TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row, + int mi_col) { + double mv_cost = get_mv_mode_cost(mv_mode); + if (mv_mode == NEW_MV_MODE) { + MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, + bsize, mi_row, mi_col) + .as_mv; + MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field, + tpl_frame, bsize, mi_row, mi_col) + .as_mv; + MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame, + bsize, mi_row, mi_col) + .as_mv; + double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv); + double near_cost = get_mv_diff_cost(&new_mv, &near_mv); + mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost; + } + return mv_cost; +} + +static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x, + GF_PICTURE *gf_picture, MotionField *motion_field, + int frame_idx, TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int_mv *mv) { + MACROBLOCKD *xd = &x->e_mbd; + double mv_dist = + get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx, + tpl_frame, rf_idx, bsize, mi_row, mi_col, mv); + double mv_cost = + get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col); + double mult = 180; + + return mv_cost + mult * log2f(1 + mv_dist); +} + +static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, + GF_PICTURE *gf_picture, + MotionField *motion_field, int frame_idx, + TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize, int mi_row, int mi_col, + double *rd, int_mv *mv) { + int best_mv_mode = ZERO_MV_MODE; + int update = 0; + int mv_mode; + *rd = 0; + for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) { + double this_rd; + int_mv this_mv; + if (mv_mode == NEW_MV_MODE) { + continue; + } + this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx, + tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv); + if (update == 0) { + *rd = this_rd; + *mv = this_mv; + best_mv_mode = mv_mode; + update = 1; + } else { + if (this_rd < *rd) { + *rd = this_rd; + *mv = this_mv; + best_mv_mode = mv_mode; + } + } + } + return best_mv_mode; +} + +static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, + GF_PICTURE *gf_picture, MotionField *motion_field, + int frame_idx, TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + int tmp_mv_mode_arr[kMvPreCheckSize]; + int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx]; + double *rd_diff_arr = tpl_frame->rd_diff_arr[rf_idx]; + int_mv *select_mv_arr = cpi->select_mv_arr; + int_mv tmp_select_mv_arr[kMvPreCheckSize]; + int stride = tpl_frame->stride; + double new_mv_rd = 0; + double no_new_mv_rd = 0; + double this_new_mv_rd = 0; + double this_no_new_mv_rd = 0; + int idx; + int tmp_idx; + assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1); + + // no new mv + // diagnal scan order + tmp_idx = 0; + for (idx = 0; idx < kMvPreCheckLines; ++idx) { + int r; + for (r = 0; r <= idx; ++r) { + int c = idx - r; + int nb_row = mi_row + r * mi_height; + int nb_col = mi_col + c * mi_width; + if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) { + double this_rd; + int_mv *mv = &select_mv_arr[nb_row * stride + nb_col]; + mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode( + cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx, + bsize, nb_row, nb_col, &this_rd, mv); + if (r == 0 && c == 0) { + this_no_new_mv_rd = this_rd; + } + no_new_mv_rd += this_rd; + tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col]; + tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col]; + ++tmp_idx; + } + } + } + + // new mv + mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE; + this_new_mv_rd = eval_mv_mode( + NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, + rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]); + new_mv_rd = this_new_mv_rd; + // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE + // beforehand. + for (idx = 1; idx < kMvPreCheckLines; ++idx) { + int r; + for (r = 0; r <= idx; ++r) { + int c = idx - r; + int nb_row = mi_row + r * mi_height; + int nb_col = mi_col + c * mi_width; + if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) { + double this_rd; + int_mv *mv = &select_mv_arr[nb_row * stride + nb_col]; + mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode( + cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx, + bsize, nb_row, nb_col, &this_rd, mv); + new_mv_rd += this_rd; + } + } + } + + // update best_mv_mode + tmp_idx = 0; + if (no_new_mv_rd < new_mv_rd) { + for (idx = 0; idx < kMvPreCheckLines; ++idx) { + int r; + for (r = 0; r <= idx; ++r) { + int c = idx - r; + int nb_row = mi_row + r * mi_height; + int nb_col = mi_col + c * mi_width; + if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) { + mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx]; + select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx]; + ++tmp_idx; + } + } + } + rd_diff_arr[mi_row * stride + mi_col] = 0; + } else { + rd_diff_arr[mi_row * stride + mi_col] = + (no_new_mv_rd - this_no_new_mv_rd) - (new_mv_rd - this_new_mv_rd); + } +} + +static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x, + GF_PICTURE *gf_picture, + MotionField *motion_field, int frame_idx, + TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize) { + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int unit_rows = tpl_frame->mi_rows / mi_height; + const int unit_cols = tpl_frame->mi_cols / mi_width; + const int max_diagonal_lines = unit_rows + unit_cols - 1; + int idx; + for (idx = 0; idx < max_diagonal_lines; ++idx) { + int r; + for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1); + ++r) { + int c = idx - r; + int mi_row = r * mi_height; + int mi_col = c * mi_width; + assert(c >= 0 && c < unit_cols); + assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows); + assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols); + predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, + rf_idx, bsize, mi_row, mi_col); + } + } +} + +static void do_motion_search(VP9_COMP *cpi, ThreadData *td, + MotionField *motion_field, int frame_idx, + YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize, + int mi_row, int mi_col) { + VP9_COMMON *cm = &cpi->common; + MACROBLOCK *x = &td->mb; + MACROBLOCKD *xd = &x->e_mbd; + const int mb_y_offset = + mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; + assert(ref_frame != NULL); + set_mv_limits(cm, x, mi_row, mi_col); + { + int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col); + uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset; + uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset; + const int stride = xd->cur_buf->y_stride; + full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf, + ref_frame_buf, stride, bsize, mi_row, mi_col, + &mv.as_mv); + sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride, + bsize, &mv.as_mv); + vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv); + } +} + +static void build_motion_field( + VP9_COMP *cpi, int frame_idx, + YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) { + VP9_COMMON *cm = &cpi->common; + ThreadData *td = &cpi->td; + TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int pw = num_4x4_blocks_wide_lookup[bsize] << 2; + const int ph = num_4x4_blocks_high_lookup[bsize] << 2; + int mi_row, mi_col; + int rf_idx; + + tpl_frame->lambda = (pw * ph) >> 2; + assert(pw * ph == tpl_frame->lambda << 2); + + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { + MotionField *motion_field = vp9_motion_field_info_get_motion_field( + &cpi->motion_field_info, frame_idx, rf_idx, bsize); + if (ref_frame[rf_idx] == NULL) { + continue; + } + vp9_motion_field_reset_mvs(motion_field); + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { + do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx], + bsize, mi_row, mi_col); + } + } + } +} +#endif // CONFIG_NON_GREEDY_MV + +static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, + int frame_idx, BLOCK_SIZE bsize) { + TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; + YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame; + YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL }; + + VP9_COMMON *cm = &cpi->common; + struct scale_factors sf; + int rdmult, idx; + ThreadData *td = &cpi->td; + MACROBLOCK *x = &td->mb; + MACROBLOCKD *xd = &x->e_mbd; + int mi_row, mi_col; + +#if CONFIG_VP9_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]); + DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]); + uint8_t *predictor; +#else + DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]); +#endif + DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]); + DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]); + DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]); + DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); + + const TX_SIZE tx_size = max_txsize_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + int64_t recon_error, sse; +#if CONFIG_NON_GREEDY_MV + int square_block_idx; + int rf_idx; +#endif + + // Setup scaling factor +#if CONFIG_VP9_HIGHBITDEPTH + vp9_setup_scale_factors_for_frame( + &sf, this_frame->y_crop_width, this_frame->y_crop_height, + this_frame->y_crop_width, this_frame->y_crop_height, + cpi->common.use_highbitdepth); + + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + predictor = CONVERT_TO_BYTEPTR(predictor16); + else + predictor = predictor8; +#else + vp9_setup_scale_factors_for_frame( + &sf, this_frame->y_crop_width, this_frame->y_crop_height, + this_frame->y_crop_width, this_frame->y_crop_height); +#endif // CONFIG_VP9_HIGHBITDEPTH + + // Prepare reference frame pointers. If any reference frame slot is + // unavailable, the pointer will be set to Null. + for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) { + int rf_idx = gf_picture[frame_idx].ref_frame[idx]; + if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame; + } + + xd->mi = cm->mi_grid_visible; + xd->mi[0] = cm->mi; + xd->cur_buf = this_frame; + + // Get rd multiplier set up. + rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex); + set_error_per_bit(&cpi->td.mb, rdmult); + vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex); + + tpl_frame->is_valid = 1; + + cm->base_qindex = tpl_frame->base_qindex; + vp9_frame_init_quantizer(cpi); + +#if CONFIG_NON_GREEDY_MV + for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES; + ++square_block_idx) { + BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx); + build_motion_field(cpi, frame_idx, ref_frame, square_bsize); + } + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { + int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx]; + if (ref_frame_idx != -1) { + MotionField *motion_field = vp9_motion_field_info_get_motion_field( + &cpi->motion_field_info, frame_idx, rf_idx, bsize); + predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx, + tpl_frame, rf_idx, bsize); + } + } +#endif + + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { + mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame, + src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize, + tx_size, ref_frame, predictor, &recon_error, &sse); + // Motion flow dependency dispenser. + tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize, + tpl_frame->stride); + + tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col, + bsize); + } + } +} + +#if CONFIG_NON_GREEDY_MV +#define DUMP_TPL_STATS 0 +#if DUMP_TPL_STATS +static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) { + int i, j; + printf("%d %d\n", h, w); + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) { + printf("%d ", buf[(row + i) * stride + col + j]); + } + } + printf("\n"); +} + +static void dump_frame_buf(const YV12_BUFFER_CONFIG *frame_buf) { + dump_buf(frame_buf->y_buffer, frame_buf->y_stride, 0, 0, frame_buf->y_height, + frame_buf->y_width); + dump_buf(frame_buf->u_buffer, frame_buf->uv_stride, 0, 0, + frame_buf->uv_height, frame_buf->uv_width); + dump_buf(frame_buf->v_buffer, frame_buf->uv_stride, 0, 0, + frame_buf->uv_height, frame_buf->uv_width); +} + +static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames, + const GF_GROUP *gf_group, + const GF_PICTURE *gf_picture, BLOCK_SIZE bsize) { + int frame_idx; + const VP9_COMMON *cm = &cpi->common; + int rf_idx; + for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) { + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { + const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; + int mi_row, mi_col; + int ref_frame_idx; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx]; + if (ref_frame_idx != -1) { + YV12_BUFFER_CONFIG *ref_frame_buf = gf_picture[ref_frame_idx].frame; + const int gf_frame_offset = gf_group->frame_gop_index[frame_idx]; + const int ref_gf_frame_offset = + gf_group->frame_gop_index[ref_frame_idx]; + printf("=\n"); + printf( + "frame_idx %d mi_rows %d mi_cols %d bsize %d ref_frame_idx %d " + "rf_idx %d gf_frame_offset %d ref_gf_frame_offset %d\n", + frame_idx, cm->mi_rows, cm->mi_cols, mi_width * MI_SIZE, + ref_frame_idx, rf_idx, gf_frame_offset, ref_gf_frame_offset); + for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) { + for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { + if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) { + int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info, + frame_idx, rf_idx, bsize, + mi_row, mi_col); + printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row, + mv.as_mv.col); + } + } + } + for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) { + for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { + if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) { + const TplDepStats *tpl_ptr = + &tpl_frame + ->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; + printf("%f ", tpl_ptr->feature_score); + } + } + } + printf("\n"); + + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { + const int mv_mode = + tpl_frame + ->mv_mode_arr[rf_idx][mi_row * tpl_frame->stride + mi_col]; + printf("%d ", mv_mode); + } + } + printf("\n"); + + dump_frame_buf(gf_picture[frame_idx].frame); + dump_frame_buf(ref_frame_buf); + } + } + } +} +#endif // DUMP_TPL_STATS +#endif // CONFIG_NON_GREEDY_MV + +static void init_tpl_buffer(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + int frame; + + const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows); +#if CONFIG_NON_GREEDY_MV + int rf_idx; + + vpx_free(cpi->select_mv_arr); + CHECK_MEM_ERROR( + cm, cpi->select_mv_arr, + vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr))); +#endif + + // TODO(jingning): Reduce the actual memory use for tpl model build up. + for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { + if (cpi->tpl_stats[frame].width >= mi_cols && + cpi->tpl_stats[frame].height >= mi_rows && + cpi->tpl_stats[frame].tpl_stats_ptr) + continue; + +#if CONFIG_NON_GREEDY_MV + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { + vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]); + CHECK_MEM_ERROR( + cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx], + vpx_calloc(mi_rows * mi_cols * 4, + sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx]))); + vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]); + CHECK_MEM_ERROR( + cm, cpi->tpl_stats[frame].rd_diff_arr[rf_idx], + vpx_calloc(mi_rows * mi_cols * 4, + sizeof(*cpi->tpl_stats[frame].rd_diff_arr[rf_idx]))); + } +#endif + vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); + CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr, + vpx_calloc(mi_rows * mi_cols, + sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr))); + cpi->tpl_stats[frame].is_valid = 0; + cpi->tpl_stats[frame].width = mi_cols; + cpi->tpl_stats[frame].height = mi_rows; + cpi->tpl_stats[frame].stride = mi_cols; + cpi->tpl_stats[frame].mi_rows = cm->mi_rows; + cpi->tpl_stats[frame].mi_cols = cm->mi_cols; + } + + for (frame = 0; frame < REF_FRAMES; ++frame) { + cpi->enc_frame_buf[frame].mem_valid = 0; + cpi->enc_frame_buf[frame].released = 1; + } +} + +static void free_tpl_buffer(VP9_COMP *cpi) { + int frame; +#if CONFIG_NON_GREEDY_MV + vp9_free_motion_field_info(&cpi->motion_field_info); + vpx_free(cpi->select_mv_arr); +#endif + for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { +#if CONFIG_NON_GREEDY_MV + int rf_idx; + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { + vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]); + vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]); + } +#endif + vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); + cpi->tpl_stats[frame].is_valid = 0; + } +} + +static void setup_tpl_stats(VP9_COMP *cpi) { + GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE]; + const GF_GROUP *gf_group = &cpi->twopass.gf_group; + int tpl_group_frames = 0; + int frame_idx; + cpi->tpl_bsize = BLOCK_32X32; + + init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames); + + init_tpl_stats(cpi); + + // Backward propagation from tpl_group_frames to 1. + for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) { + if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue; + mc_flow_dispenser(cpi, gf_picture, frame_idx, cpi->tpl_bsize); + } +#if CONFIG_NON_GREEDY_MV + cpi->tpl_ready = 1; +#if DUMP_TPL_STATS + dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize); +#endif // DUMP_TPL_STATS +#endif // CONFIG_NON_GREEDY_MV +} + +static void init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) { + encode_frame_result->show_idx = -1; // Actual encoding deosn't happen. +} + +#if !CONFIG_REALTIME_ONLY +static void update_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result, + int show_idx, + FRAME_UPDATE_TYPE update_type, + const YV12_BUFFER_CONFIG *source_frame, + const YV12_BUFFER_CONFIG *coded_frame, + int quantize_index, uint32_t bit_depth, + uint32_t input_bit_depth) { + PSNR_STATS psnr; +#if CONFIG_VP9_HIGHBITDEPTH + vpx_calc_highbd_psnr(source_frame, coded_frame, &psnr, bit_depth, + input_bit_depth); +#else + (void)bit_depth; + (void)input_bit_depth; + vpx_calc_psnr(source_frame, coded_frame, &psnr); +#endif + encode_frame_result->psnr = psnr.psnr[0]; + encode_frame_result->sse = psnr.sse[0]; + encode_frame_result->show_idx = show_idx; + encode_frame_result->update_type = update_type; + encode_frame_result->quantize_index = quantize_index; +} +#endif // !CONFIG_REALTIME_ONLY + int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, - int64_t *time_end, int flush) { + int64_t *time_end, int flush, + ENCODE_FRAME_RESULT *encode_frame_result) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; VP9_COMMON *const cm = &cpi->common; BufferPool *const pool = cm->buffer_pool; @@ -5077,17 +7137,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, struct lookahead_entry *last_source = NULL; struct lookahead_entry *source = NULL; int arf_src_index; + const int gf_group_index = cpi->twopass.gf_group.index; int i; + init_encode_frame_result(encode_frame_result); - if (is_two_pass_svc(cpi)) { -#if CONFIG_SPATIAL_SVC - vp9_svc_start_frame(cpi); - // Use a small empty frame instead of a real frame - if (cpi->svc.encode_empty_frame_state == ENCODING) - source = &cpi->svc.empty_frame; -#endif - if (oxcf->pass == 2) vp9_restore_layer_context(cpi); - } else if (is_one_pass_cbr_svc(cpi)) { + if (is_one_pass_cbr_svc(cpi)) { vp9_one_pass_cbr_svc_start_layer(cpi); } @@ -5098,10 +7152,12 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // Is multi-arf enabled. // Note that at the moment multi_arf is only configured for 2 pass VBR and // will not work properly with svc. - if ((oxcf->pass == 2) && !cpi->use_svc && (cpi->oxcf.enable_auto_arf > 1)) - cpi->multi_arf_allowed = 1; + // Enable the Jingning's new "multi_layer_arf" code if "enable_auto_arf" + // is greater than or equal to 2. + if ((oxcf->pass == 2) && !cpi->use_svc && (cpi->oxcf.enable_auto_arf >= 2)) + cpi->multi_layer_arf = 1; else - cpi->multi_arf_allowed = 0; + cpi->multi_layer_arf = 0; // Normal defaults cm->reset_frame_context = 0; @@ -5115,9 +7171,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // Should we encode an arf frame. arf_src_index = get_arf_src_index(cpi); - // Skip alt frame if we encode the empty frame - if (is_two_pass_svc(cpi) && source != NULL) arf_src_index = 0; - if (arf_src_index) { for (i = 0; i <= arf_src_index; ++i) { struct lookahead_entry *e = vp9_lookahead_peek(cpi->lookahead, i); @@ -5132,25 +7185,17 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } } + // Clear arf index stack before group of pictures processing starts. + if (gf_group_index == 1) { + stack_init(cpi->twopass.gf_group.arf_index_stack, MAX_LAG_BUFFERS * 2); + cpi->twopass.gf_group.stack_size = 0; + } + if (arf_src_index) { assert(arf_src_index <= rc->frames_to_key); - if ((source = vp9_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) { cpi->alt_ref_source = source; -#if CONFIG_SPATIAL_SVC - if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0) { - int i; - // Reference a hidden frame from a lower layer - for (i = cpi->svc.spatial_layer_id - 1; i >= 0; --i) { - if (oxcf->ss_enable_auto_arf[i]) { - cpi->gld_fb_idx = cpi->svc.layer_context[i].alt_ref_idx; - break; - } - } - } - cpi->svc.layer_context[cpi->svc.spatial_layer_id].has_alt_frame = 1; -#endif #if !CONFIG_REALTIME_ONLY if ((oxcf->mode != REALTIME) && (oxcf->arnr_max_frames > 0) && (oxcf->arnr_strength > 0)) { @@ -5192,7 +7237,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } // Read in the source frame. - if (cpi->use_svc) + if (cpi->use_svc || cpi->svc.set_intra_only_frame) source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush); else source = vp9_lookahead_pop(cpi->lookahead, flush); @@ -5200,9 +7245,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (source != NULL) { cm->show_frame = 1; cm->intra_only = 0; - // if the flags indicate intra frame, but if the current picture is for - // non-zero spatial layer, it should not be an intra picture. - if ((source->flags & VPX_EFLAG_FORCE_KF) && + // If the flags indicate intra frame, but if the current picture is for + // spatial layer above first_spatial_layer_to_encode, it should not be an + // intra picture. + if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc && cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) { source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF); } @@ -5227,15 +7273,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, *time_stamp = source->ts_start; *time_end = source->ts_end; *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0; - } else { *size = 0; -#if !CONFIG_REALTIME_ONLY - if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) { - vp9_end_first_pass(cpi); /* get last stats packet */ - cpi->twopass.first_pass_done = 1; - } -#endif // !CONFIG_REALTIME_ONLY return -1; } @@ -5249,7 +7288,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // adjust frame rates based on timestamps given if (cm->show_frame) { - adjust_frame_rate(cpi, source); + if (cpi->use_svc && cpi->svc.use_set_ref_frame_config && + cpi->svc.duration[cpi->svc.spatial_layer_id] > 0) + vp9_svc_adjust_frame_rate(cpi); + else + adjust_frame_rate(cpi, source); } if (is_one_pass_cbr_svc(cpi)) { @@ -5268,24 +7311,13 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx]; - if (!cpi->use_svc && cpi->multi_arf_allowed) { - if (cm->frame_type == KEY_FRAME) { - init_buffer_indices(cpi); - } else if (oxcf->pass == 2) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - cpi->alt_fb_idx = gf_group->arf_ref_idx[gf_group->index]; - } - } - // Start with a 0 size frame. *size = 0; cpi->frame_flags = *frame_flags; #if !CONFIG_REALTIME_ONLY - if ((oxcf->pass == 2) && - (!cpi->use_svc || (is_two_pass_svc(cpi) && - cpi->svc.encode_empty_frame_state != ENCODING))) { + if ((oxcf->pass == 2) && !cpi->use_svc) { vp9_rc_get_second_pass_params(cpi); } else if (oxcf->pass == 1) { set_frame_size(cpi); @@ -5297,9 +7329,52 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, level_rc_framerate(cpi, arf_src_index); if (cpi->oxcf.pass != 0 || cpi->use_svc || frame_is_intra_only(cm) == 1) { - for (i = 0; i < MAX_REF_FRAMES; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX; + for (i = 0; i < REFS_PER_FRAME; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX; } + if (cpi->kmeans_data_arr_alloc == 0) { + const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows); +#if CONFIG_MULTITHREAD + pthread_mutex_init(&cpi->kmeans_mutex, NULL); +#endif + CHECK_MEM_ERROR( + cm, cpi->kmeans_data_arr, + vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->kmeans_data_arr))); + cpi->kmeans_data_stride = mi_cols; + cpi->kmeans_data_arr_alloc = 1; + } + +#if CONFIG_NON_GREEDY_MV + { + const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows); + Status status = vp9_alloc_motion_field_info( + &cpi->motion_field_info, MAX_ARF_GOP_SIZE, mi_rows, mi_cols); + if (status == STATUS_FAILED) { + vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR, + "vp9_alloc_motion_field_info failed"); + } + } +#endif // CONFIG_NON_GREEDY_MV + + if (gf_group_index == 1 && + cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE && + cpi->sf.enable_tpl_model) { + init_tpl_buffer(cpi); + vp9_estimate_qp_gop(cpi); + setup_tpl_stats(cpi); + } + +#if CONFIG_BITSTREAM_DEBUG + assert(cpi->oxcf.max_threads == 0 && + "bitstream debug tool does not support multithreading"); + bitstream_queue_record_write(); +#endif +#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG + bitstream_queue_set_frame_write(cm->current_video_frame * 2 + cm->show_frame); +#endif + cpi->td.mb.fp_src_pred = 0; #if CONFIG_REALTIME_ONLY if (cpi->use_svc) { @@ -5309,7 +7384,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, Pass0Encode(cpi, size, dest, frame_flags); } #else // !CONFIG_REALTIME_ONLY - if (oxcf->pass == 1 && (!cpi->use_svc || is_two_pass_svc(cpi))) { + if (oxcf->pass == 1 && !cpi->use_svc) { const int lossless = is_lossless_requested(oxcf); #if CONFIG_VP9_HIGHBITDEPTH if (cpi->oxcf.use_highbitdepth) @@ -5324,8 +7399,27 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, #endif // CONFIG_VP9_HIGHBITDEPTH cpi->td.mb.inv_txfm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; vp9_first_pass(cpi, source); - } else if (oxcf->pass == 2 && (!cpi->use_svc || is_two_pass_svc(cpi))) { + } else if (oxcf->pass == 2 && !cpi->use_svc) { Pass2Encode(cpi, size, dest, frame_flags); + // update_encode_frame_result() depends on twopass.gf_group.index and + // cm->new_fb_idx and cpi->Source are updated for current properly and have + // not been updated for the next frame yet. + // The update locations are as follows. + // 1) twopass.gf_group.index is initialized at define_gf_group by vp9_zero() + // for the first frame in the gf_group and is updated for the next frame at + // vp9_twopass_postencode_update(). + // 2) cpi->Source is updated at the beginging of this function, i.e. + // vp9_get_compressed_data() + // 3) cm->new_fb_idx is updated at the beginging of this function by + // get_free_fb(cm) + // TODO(angiebird): Improve the codebase to make the update of frame + // dependent variables more robust. + update_encode_frame_result( + encode_frame_result, source->show_idx, + cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index], + cpi->Source, get_frame_new_buffer(cm), vp9_get_quantizer(cpi), + cpi->oxcf.input_bit_depth, cm->bit_depth); + vp9_twopass_postencode_update(cpi); } else if (cpi->use_svc) { SvcEncode(cpi, size, dest, frame_flags); } else { @@ -5334,6 +7428,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } #endif // CONFIG_REALTIME_ONLY + if (cm->show_frame) cm->cur_show_frame_fb_idx = cm->new_fb_idx; + if (cm->refresh_frame_context) cm->frame_contexts[cm->frame_context_idx] = *cm->fc; @@ -5356,9 +7452,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, vpx_usec_timer_mark(&cmptimer); cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer); - // Should we calculate metrics for the frame. - if (is_psnr_calc_enabled(cpi)) generate_psnr_packet(cpi); - if (cpi->keep_level_stats && oxcf->pass != 1) update_level_info(cpi, size, arf_src_index); @@ -5416,7 +7509,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, ppflags.post_proc_flag = VP9D_DEBLOCK; ppflags.deblocking_level = 0; // not used in vp9_post_proc_frame() ppflags.noise_level = 0; // not used in vp9_post_proc_frame() - vp9_post_proc_frame(cm, pp, &ppflags); + vp9_post_proc_frame(cm, pp, &ppflags, + cpi->un_scaled_source->y_width); } #endif vpx_clear_system_state(); @@ -5462,11 +7556,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->summedp_quality += frame_ssim2 * weight; cpi->summedp_weights += weight; #if 0 - { + if (cm->show_frame) { FILE *f = fopen("q_used.stt", "a"); fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n", - cpi->common.current_video_frame, y2, u2, v2, - frame_psnr2, frame_ssim2); + cpi->common.current_video_frame, psnr2.psnr[1], + psnr2.psnr[2], psnr2.psnr[3], psnr2.psnr[0], frame_ssim2); fclose(f); } #endif @@ -5525,21 +7619,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, #endif - if (is_two_pass_svc(cpi)) { - if (cpi->svc.encode_empty_frame_state == ENCODING) { - cpi->svc.encode_empty_frame_state = ENCODED; - cpi->svc.encode_intra_empty_frame = 0; - } - - if (cm->show_frame) { - ++cpi->svc.spatial_layer_to_encode; - if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers) - cpi->svc.spatial_layer_to_encode = 0; - - // May need the empty frame after an visible frame. - cpi->svc.encode_empty_frame_state = NEED_TO_ENCODE; - } - } else if (is_one_pass_cbr_svc(cpi)) { + if (is_one_pass_cbr_svc(cpi)) { if (cm->show_frame) { ++cpi->svc.spatial_layer_to_encode; if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers) @@ -5563,7 +7643,7 @@ int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, } else { int ret; #if CONFIG_VP9_POSTPROC - ret = vp9_post_proc_frame(cm, dest, flags); + ret = vp9_post_proc_frame(cm, dest, flags, cpi->un_scaled_source->y_width); #else if (cm->frame_to_show) { *dest = *cm->frame_to_show; @@ -5608,15 +7688,15 @@ int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width, unsigned int height) { VP9_COMMON *cm = &cpi->common; #if CONFIG_VP9_HIGHBITDEPTH - check_initial_width(cpi, cm->use_highbitdepth, 1, 1); + update_initial_width(cpi, cm->use_highbitdepth, 1, 1); #else - check_initial_width(cpi, 1, 1); + update_initial_width(cpi, 0, 1, 1); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_TEMPORAL_DENOISING setup_denoiser_buffer(cpi); #endif - + alloc_raw_frame_buffers(cpi); if (width) { cm->width = width; if (cm->width > cpi->initial_width) { @@ -5645,7 +7725,7 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc) { return; } -int vp9_get_quantizer(VP9_COMP *cpi) { return cpi->common.base_qindex; } +int vp9_get_quantizer(const VP9_COMP *cpi) { return cpi->common.base_qindex; } void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) { if (flags & diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_encoder.h b/media/libvpx/libvpx/vp9/encoder/vp9_encoder.h index d723d93cbc16..0a8623efb65f 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_encoder.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_encoder.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_ENCODER_H_ -#define VP9_ENCODER_VP9_ENCODER_H_ +#ifndef VPX_VP9_ENCODER_VP9_ENCODER_H_ +#define VPX_VP9_ENCODER_VP9_ENCODER_H_ #include @@ -20,8 +20,10 @@ #include "vpx_dsp/ssim.h" #endif #include "vpx_dsp/variance.h" +#include "vpx_dsp/psnr.h" #include "vpx_ports/system_state.h" #include "vpx_util/vpx_thread.h" +#include "vpx_util/vpx_timestamp.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_ppflags.h" @@ -29,7 +31,9 @@ #include "vp9/common/vp9_thread_common.h" #include "vp9/common/vp9_onyxc_int.h" +#if !CONFIG_REALTIME_ONLY #include "vp9/encoder/vp9_alt_ref_aq.h" +#endif #include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_encodemb.h" @@ -119,9 +123,11 @@ typedef enum { COMPLEXITY_AQ = 2, CYCLIC_REFRESH_AQ = 3, EQUATOR360_AQ = 4, + PERCEPTUAL_AQ = 5, + PSNR_AQ = 6, // AQ based on lookahead temporal // variance (only valid for altref frames) - LOOKAHEAD_AQ = 5, + LOOKAHEAD_AQ = 7, AQ_MODE_COUNT // This should always be the last member of the enum } AQ_MODE; @@ -148,7 +154,10 @@ typedef struct VP9EncoderConfig { int height; // height of data passed to the compressor unsigned int input_bit_depth; // Input bit depth. double init_framerate; // set to passed in framerate - int64_t target_bandwidth; // bandwidth to be used in bits per second + vpx_rational_t g_timebase; // equivalent to g_timebase in vpx_codec_enc_cfg_t + vpx_rational64_t g_timebase_in_ts; // g_timebase * TICKS_PER_SEC + + int64_t target_bandwidth; // bandwidth to be used in bits per second int noise_sensitivity; // pre processing blur: recommendation 0 int sharpness; // sharpening output: recommendation 0: @@ -248,12 +257,13 @@ typedef struct VP9EncoderConfig { int tile_columns; int tile_rows; + int enable_tpl_model; + int max_threads; unsigned int target_level; vpx_fixed_buf_t two_pass_stats_in; - struct vpx_codec_pkt_list *output_pkt_list; #if CONFIG_FP_MB_STATS vpx_fixed_buf_t firstpass_mb_stats_in; @@ -278,11 +288,52 @@ static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0; } +typedef struct TplDepStats { + int64_t intra_cost; + int64_t inter_cost; + int64_t mc_flow; + int64_t mc_dep_cost; + int64_t mc_ref_cost; + + int ref_frame_index; + int_mv mv; +} TplDepStats; + +#if CONFIG_NON_GREEDY_MV + +#define ZERO_MV_MODE 0 +#define NEW_MV_MODE 1 +#define NEAREST_MV_MODE 2 +#define NEAR_MV_MODE 3 +#define MAX_MV_MODE 4 +#endif + +typedef struct TplDepFrame { + uint8_t is_valid; + TplDepStats *tpl_stats_ptr; + int stride; + int width; + int height; + int mi_rows; + int mi_cols; + int base_qindex; +#if CONFIG_NON_GREEDY_MV + int lambda; + int *mv_mode_arr[3]; + double *rd_diff_arr[3]; +#endif +} TplDepFrame; + +#define TPL_DEP_COST_SCALE_LOG2 4 + // TODO(jingning) All spatially adaptive variables should go to TileDataEnc. typedef struct TileDataEnc { TileInfo tile_info; int thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; - int mode_map[BLOCK_SIZES][MAX_MODES]; +#if CONFIG_CONSISTENT_RECODE + int thresh_freq_fact_prev[BLOCK_SIZES][MAX_MODES]; +#endif + int8_t mode_map[BLOCK_SIZES][MAX_MODES]; FIRSTPASS_DATA fp_data; VP9RowMTSync row_mt_sync; @@ -436,7 +487,6 @@ typedef enum { typedef struct { int8_t level_index; - uint8_t rc_config_updated; uint8_t fail_flag; int max_frame_size; // in bits double max_cpb_size; // in bits @@ -450,7 +500,49 @@ typedef struct ARNRFilterData { struct scale_factors sf; } ARNRFilterData; +typedef struct EncFrameBuf { + int mem_valid; + int released; + YV12_BUFFER_CONFIG frame; +} EncFrameBuf; + +// Maximum operating frame buffer size needed for a GOP using ARF reference. +#define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS) +#define MAX_KMEANS_GROUPS 8 + +typedef struct KMEANS_DATA { + double value; + int pos; + int group_idx; +} KMEANS_DATA; + +#if CONFIG_RATE_CTRL +typedef struct ENCODE_COMMAND { + int use_external_quantize_index; + int external_quantize_index; +} ENCODE_COMMAND; + +static INLINE void encode_command_init(ENCODE_COMMAND *encode_command) { + vp9_zero(*encode_command); + encode_command->use_external_quantize_index = 0; + encode_command->external_quantize_index = -1; +} + +static INLINE void encode_command_set_external_quantize_index( + ENCODE_COMMAND *encode_command, int quantize_index) { + encode_command->use_external_quantize_index = 1; + encode_command->external_quantize_index = quantize_index; +} + +static INLINE void encode_command_reset_external_quantize_index( + ENCODE_COMMAND *encode_command) { + encode_command->use_external_quantize_index = 0; + encode_command->external_quantize_index = -1; +} +#endif // CONFIG_RATE_CTRL + typedef struct VP9_COMP { + FRAME_INFO frame_info; QUANTS quants; ThreadData td; MB_MODE_INFO_EXT *mbmi_ext_base; @@ -473,17 +565,40 @@ typedef struct VP9_COMP { #endif YV12_BUFFER_CONFIG *raw_source_frame; + BLOCK_SIZE tpl_bsize; + TplDepFrame tpl_stats[MAX_ARF_GOP_SIZE]; + YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES]; + EncFrameBuf enc_frame_buf[REF_FRAMES]; +#if CONFIG_MULTITHREAD + pthread_mutex_t kmeans_mutex; +#endif + int kmeans_data_arr_alloc; + KMEANS_DATA *kmeans_data_arr; + int kmeans_data_size; + int kmeans_data_stride; + double kmeans_ctr_ls[MAX_KMEANS_GROUPS]; + double kmeans_boundary_ls[MAX_KMEANS_GROUPS]; + int kmeans_count_ls[MAX_KMEANS_GROUPS]; + int kmeans_ctr_num; +#if CONFIG_NON_GREEDY_MV + MotionFieldInfo motion_field_info; + int tpl_ready; + int_mv *select_mv_arr; +#endif + TileDataEnc *tile_data; int allocated_tiles; // Keep track of memory allocated for tiles. // For a still frame, this flag is set to 1 to skip partition search. int partition_search_skippable_frame; - int scaled_ref_idx[MAX_REF_FRAMES]; + int scaled_ref_idx[REFS_PER_FRAME]; int lst_fb_idx; int gld_fb_idx; int alt_fb_idx; + int ref_fb_idx[REF_FRAMES]; + int refresh_last_frame; int refresh_golden_frame; int refresh_alt_ref_frame; @@ -496,10 +611,15 @@ typedef struct VP9_COMP { int ext_refresh_frame_context_pending; int ext_refresh_frame_context; + int64_t norm_wiener_variance; + int64_t *mb_wiener_variance; + int mb_wiener_var_rows; + int mb_wiener_var_cols; + double *mi_ssim_rdmult_scaling_factors; + YV12_BUFFER_CONFIG last_frame_uf; TOKENEXTRA *tile_tok[4][1 << 6]; - uint32_t tok_count[4][1 << 6]; TOKENLIST *tplist[4][1 << 6]; // Ambient reconstruction err target for force key frames @@ -521,7 +641,7 @@ typedef struct VP9_COMP { RATE_CONTROL rc; double framerate; - int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]; + int interp_filter_selected[REF_FRAMES][SWITCHABLE]; struct vpx_codec_pkt_list *output_pkt_list; @@ -555,6 +675,7 @@ typedef struct VP9_COMP { ActiveMap active_map; fractional_mv_step_fp *find_fractional_mv_step; + struct scale_factors me_sf; vp9_diamond_search_fn_t diamond_search_sad; vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZES]; uint64_t time_receive_data; @@ -645,10 +766,8 @@ typedef struct VP9_COMP { int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES]; - - int multi_arf_allowed; - int multi_arf_enabled; - int multi_arf_last_grp_enabled; + // Indices are: max_tx_size-1, tx_size_ctx, tx_size + int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES]; #if CONFIG_VP9_TEMPORAL_DENOISING VP9_DENOISER denoiser; @@ -723,11 +842,26 @@ typedef struct VP9_COMP { uint8_t *count_arf_frame_usage; uint8_t *count_lastgolden_frame_usage; + + int multi_layer_arf; + vpx_roi_map_t roi; +#if CONFIG_RATE_CTRL + ENCODE_COMMAND encode_command; +#endif } VP9_COMP; +typedef struct ENCODE_FRAME_RESULT { + int show_idx; + FRAME_UPDATE_TYPE update_type; + double psnr; + uint64_t sse; + int quantize_index; +} ENCODE_FRAME_RESULT; + void vp9_initialize_enc(void); -struct VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, +void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt); +struct VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf, BufferPool *const pool); void vp9_remove_compressor(VP9_COMP *cpi); @@ -737,11 +871,12 @@ void vp9_change_config(VP9_COMP *cpi, const VP9EncoderConfig *oxcf); // frame is made and not just a copy of the pointer.. int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, - int64_t end_time_stamp); + int64_t end_time); int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, - int64_t *time_end, int flush); + int64_t *time_end, int flush, + ENCODE_FRAME_RESULT *encode_frame_result); int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *flags); @@ -758,9 +893,11 @@ int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, int vp9_update_entropy(VP9_COMP *cpi, int update); -int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols); +int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows, + int cols); -int vp9_get_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols); +int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows, + int cols); int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode, VPX_SCALING vert_mode); @@ -770,7 +907,28 @@ int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width, void vp9_set_svc(VP9_COMP *cpi, int use_svc); -int vp9_get_quantizer(struct VP9_COMP *cpi); +static INLINE int stack_pop(int *stack, int stack_size) { + int idx; + const int r = stack[0]; + for (idx = 1; idx < stack_size; ++idx) stack[idx - 1] = stack[idx]; + + return r; +} + +static INLINE int stack_top(const int *stack) { return stack[0]; } + +static INLINE void stack_push(int *stack, int new_item, int stack_size) { + int idx; + for (idx = stack_size; idx > 0; --idx) stack[idx] = stack[idx - 1]; + stack[0] = new_item; +} + +static INLINE void stack_init(int *stack, int length) { + int idx; + for (idx = 0; idx < length; ++idx) stack[idx] = -1; +} + +int vp9_get_quantizer(const VP9_COMP *cpi); static INLINE int frame_is_kf_gf_arf(const VP9_COMP *cpi) { return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame || @@ -795,9 +953,13 @@ static INLINE int get_ref_frame_buf_idx(const VP9_COMP *const cpi, return (map_idx != INVALID_IDX) ? cm->ref_frame_map[map_idx] : INVALID_IDX; } +static INLINE RefCntBuffer *get_ref_cnt_buffer(VP9_COMMON *cm, int fb_idx) { + return fb_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[fb_idx] : NULL; +} + static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( - VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { - VP9_COMMON *const cm = &cpi->common; + const VP9_COMP *const cpi, MV_REFERENCE_FRAME ref_frame) { + const VP9_COMMON *const cm = &cpi->common; const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); return buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf : NULL; @@ -858,19 +1020,14 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required( void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags); -static INLINE int is_two_pass_svc(const struct VP9_COMP *const cpi) { - return cpi->use_svc && cpi->oxcf.pass != 0; -} - static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) { return (cpi->use_svc && cpi->oxcf.pass == 0); } #if CONFIG_VP9_TEMPORAL_DENOISING static INLINE int denoise_svc(const struct VP9_COMP *const cpi) { - return (!cpi->use_svc || - (cpi->use_svc && - cpi->svc.spatial_layer_id >= cpi->svc.first_layer_denoise)); + return (!cpi->use_svc || (cpi->use_svc && cpi->svc.spatial_layer_id >= + cpi->svc.first_layer_denoise)); } #endif @@ -878,12 +1035,10 @@ static INLINE int denoise_svc(const struct VP9_COMP *const cpi) { static INLINE int is_altref_enabled(const VP9_COMP *const cpi) { return !(cpi->oxcf.mode == REALTIME && cpi->oxcf.rc_mode == VPX_CBR) && cpi->oxcf.lag_in_frames >= MIN_LOOKAHEAD_FOR_ARFS && - (cpi->oxcf.enable_auto_arf && - (!is_two_pass_svc(cpi) || - cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id])); + cpi->oxcf.enable_auto_arf; } -static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, +static INLINE void set_ref_ptrs(const VP9_COMMON *const cm, MACROBLOCKD *xd, MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) { xd->block_refs[0] = @@ -938,14 +1093,20 @@ static INLINE int log_tile_cols_from_picsize_level(uint32_t width, VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec); +int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows, + unsigned int cols, int delta_q[8], int delta_lf[8], + int skip[8], int ref_frame[8]); + void vp9_new_framerate(VP9_COMP *cpi, double framerate); void vp9_set_row_mt(VP9_COMP *cpi); +int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr); + #define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl)) #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_ENCODER_VP9_ENCODER_H_ +#endif // VPX_VP9_ENCODER_VP9_ENCODER_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_ethread.c b/media/libvpx/libvpx/vp9/encoder/vp9_ethread.c index 0bd2e21451a9..e7f8a537d475 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_ethread.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_ethread.c @@ -270,19 +270,19 @@ void vp9_row_mt_sync_mem_alloc(VP9RowMTSync *row_mt_sync, VP9_COMMON *cm, { int i; - CHECK_MEM_ERROR(cm, row_mt_sync->mutex_, - vpx_malloc(sizeof(*row_mt_sync->mutex_) * rows)); - if (row_mt_sync->mutex_) { + CHECK_MEM_ERROR(cm, row_mt_sync->mutex, + vpx_malloc(sizeof(*row_mt_sync->mutex) * rows)); + if (row_mt_sync->mutex) { for (i = 0; i < rows; ++i) { - pthread_mutex_init(&row_mt_sync->mutex_[i], NULL); + pthread_mutex_init(&row_mt_sync->mutex[i], NULL); } } - CHECK_MEM_ERROR(cm, row_mt_sync->cond_, - vpx_malloc(sizeof(*row_mt_sync->cond_) * rows)); - if (row_mt_sync->cond_) { + CHECK_MEM_ERROR(cm, row_mt_sync->cond, + vpx_malloc(sizeof(*row_mt_sync->cond) * rows)); + if (row_mt_sync->cond) { for (i = 0; i < rows; ++i) { - pthread_cond_init(&row_mt_sync->cond_[i], NULL); + pthread_cond_init(&row_mt_sync->cond[i], NULL); } } } @@ -301,17 +301,17 @@ void vp9_row_mt_sync_mem_dealloc(VP9RowMTSync *row_mt_sync) { #if CONFIG_MULTITHREAD int i; - if (row_mt_sync->mutex_ != NULL) { + if (row_mt_sync->mutex != NULL) { for (i = 0; i < row_mt_sync->rows; ++i) { - pthread_mutex_destroy(&row_mt_sync->mutex_[i]); + pthread_mutex_destroy(&row_mt_sync->mutex[i]); } - vpx_free(row_mt_sync->mutex_); + vpx_free(row_mt_sync->mutex); } - if (row_mt_sync->cond_ != NULL) { + if (row_mt_sync->cond != NULL) { for (i = 0; i < row_mt_sync->rows; ++i) { - pthread_cond_destroy(&row_mt_sync->cond_[i]); + pthread_cond_destroy(&row_mt_sync->cond[i]); } - vpx_free(row_mt_sync->cond_); + vpx_free(row_mt_sync->cond); } #endif // CONFIG_MULTITHREAD vpx_free(row_mt_sync->cur_col); @@ -327,11 +327,11 @@ void vp9_row_mt_sync_read(VP9RowMTSync *const row_mt_sync, int r, int c) { const int nsync = row_mt_sync->sync_range; if (r && !(c & (nsync - 1))) { - pthread_mutex_t *const mutex = &row_mt_sync->mutex_[r - 1]; + pthread_mutex_t *const mutex = &row_mt_sync->mutex[r - 1]; pthread_mutex_lock(mutex); while (c > row_mt_sync->cur_col[r - 1] - nsync + 1) { - pthread_cond_wait(&row_mt_sync->cond_[r - 1], mutex); + pthread_cond_wait(&row_mt_sync->cond[r - 1], mutex); } pthread_mutex_unlock(mutex); } @@ -365,12 +365,12 @@ void vp9_row_mt_sync_write(VP9RowMTSync *const row_mt_sync, int r, int c, } if (sig) { - pthread_mutex_lock(&row_mt_sync->mutex_[r]); + pthread_mutex_lock(&row_mt_sync->mutex[r]); row_mt_sync->cur_col[r] = cur; - pthread_cond_signal(&row_mt_sync->cond_[r]); - pthread_mutex_unlock(&row_mt_sync->mutex_[r]); + pthread_cond_signal(&row_mt_sync->cond[r]); + pthread_mutex_unlock(&row_mt_sync->mutex[r]); } #else (void)row_mt_sync; @@ -390,8 +390,9 @@ void vp9_row_mt_sync_write_dummy(VP9RowMTSync *const row_mt_sync, int r, int c, } #if !CONFIG_REALTIME_ONLY -static int first_pass_worker_hook(EncWorkerData *const thread_data, - MultiThreadHandle *multi_thread_ctxt) { +static int first_pass_worker_hook(void *arg1, void *arg2) { + EncWorkerData *const thread_data = (EncWorkerData *)arg1; + MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2; VP9_COMP *const cpi = thread_data->cpi; const VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; @@ -470,8 +471,8 @@ void vp9_encode_fp_row_mt(VP9_COMP *cpi) { } } - launch_enc_workers(cpi, (VPxWorkerHook)first_pass_worker_hook, - multi_thread_ctxt, num_workers); + launch_enc_workers(cpi, first_pass_worker_hook, multi_thread_ctxt, + num_workers); first_tile_col = &cpi->tile_data[0]; for (i = 1; i < tile_cols; i++) { @@ -480,8 +481,9 @@ void vp9_encode_fp_row_mt(VP9_COMP *cpi) { } } -static int temporal_filter_worker_hook(EncWorkerData *const thread_data, - MultiThreadHandle *multi_thread_ctxt) { +static int temporal_filter_worker_hook(void *arg1, void *arg2) { + EncWorkerData *const thread_data = (EncWorkerData *)arg1; + MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2; VP9_COMP *const cpi = thread_data->cpi; const VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; @@ -508,8 +510,8 @@ static int temporal_filter_worker_hook(EncWorkerData *const thread_data, tile_col = proc_job->tile_col_id; tile_row = proc_job->tile_row_id; this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; - mb_col_start = (this_tile->tile_info.mi_col_start) >> 1; - mb_col_end = (this_tile->tile_info.mi_col_end + 1) >> 1; + mb_col_start = (this_tile->tile_info.mi_col_start) >> TF_SHIFT; + mb_col_end = (this_tile->tile_info.mi_col_end + TF_ROUND) >> TF_SHIFT; mb_row = proc_job->vert_unit_row_num; vp9_temporal_filter_iterate_row_c(cpi, thread_data->td, mb_row, @@ -553,13 +555,14 @@ void vp9_temporal_filter_row_mt(VP9_COMP *cpi) { } } - launch_enc_workers(cpi, (VPxWorkerHook)temporal_filter_worker_hook, - multi_thread_ctxt, num_workers); + launch_enc_workers(cpi, temporal_filter_worker_hook, multi_thread_ctxt, + num_workers); } #endif // !CONFIG_REALTIME_ONLY -static int enc_row_mt_worker_hook(EncWorkerData *const thread_data, - MultiThreadHandle *multi_thread_ctxt) { +static int enc_row_mt_worker_hook(void *arg1, void *arg2) { + EncWorkerData *const thread_data = (EncWorkerData *)arg1; + MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2; VP9_COMP *const cpi = thread_data->cpi; const VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; @@ -648,8 +651,8 @@ void vp9_encode_tiles_row_mt(VP9_COMP *cpi) { } } - launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook, - multi_thread_ctxt, num_workers); + launch_enc_workers(cpi, enc_row_mt_worker_hook, multi_thread_ctxt, + num_workers); for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_ethread.h b/media/libvpx/libvpx/vp9/encoder/vp9_ethread.h index a396e621d7dc..cda0293bcf0d 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_ethread.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_ethread.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_ETHREAD_H_ -#define VP9_ENCODER_VP9_ETHREAD_H_ +#ifndef VPX_VP9_ENCODER_VP9_ETHREAD_H_ +#define VPX_VP9_ENCODER_VP9_ETHREAD_H_ #ifdef __cplusplus extern "C" { @@ -33,8 +33,8 @@ typedef struct EncWorkerData { // Encoder row synchronization typedef struct VP9RowMTSyncData { #if CONFIG_MULTITHREAD - pthread_mutex_t *mutex_; - pthread_cond_t *cond_; + pthread_mutex_t *mutex; + pthread_cond_t *cond; #endif // Allocate memory to store the sb/mb block index in each row. int *cur_col; @@ -69,4 +69,4 @@ void vp9_temporal_filter_row_mt(struct VP9_COMP *cpi); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_ETHREAD_H_ +#endif // VPX_VP9_ENCODER_VP9_ETHREAD_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_extend.h b/media/libvpx/libvpx/vp9/encoder/vp9_extend.h index c0dd75715965..4ba7fc95e3dd 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_extend.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_extend.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_EXTEND_H_ -#define VP9_ENCODER_VP9_EXTEND_H_ +#ifndef VPX_VP9_ENCODER_VP9_EXTEND_H_ +#define VPX_VP9_ENCODER_VP9_EXTEND_H_ #include "vpx_scale/yv12config.h" #include "vpx/vpx_integer.h" @@ -28,4 +28,4 @@ void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src, } // extern "C" #endif -#endif // VP9_ENCODER_VP9_EXTEND_H_ +#endif // VPX_VP9_ENCODER_VP9_EXTEND_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_firstpass.c b/media/libvpx/libvpx/vp9/encoder/vp9_firstpass.c index fb6b132a5b44..57ab583cfcf4 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_firstpass.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_firstpass.c @@ -44,15 +44,11 @@ #define COMPLEXITY_STATS_OUTPUT 0 #define FIRST_PASS_Q 10.0 -#define INTRA_MODE_PENALTY 1024 -#define MIN_ARF_GF_BOOST 240 +#define NORMAL_BOOST 100 +#define MIN_ARF_GF_BOOST 250 #define MIN_DECAY_FACTOR 0.01 #define NEW_MV_MODE_PENALTY 32 #define DARK_THRESH 64 -#define DEFAULT_GRP_WEIGHT 1.0 -#define RC_FACTOR_MIN 0.75 -#define RC_FACTOR_MAX 1.75 -#define SECTION_NOISE_DEF 250.0 #define LOW_I_THRESH 24000 #define NCOUNT_INTRA_THRESH 8192 @@ -88,14 +84,8 @@ static int input_stats(TWO_PASS *p, FIRSTPASS_STATS *fps) { return 1; } -static void output_stats(FIRSTPASS_STATS *stats, - struct vpx_codec_pkt_list *pktlist) { - struct vpx_codec_cx_pkt pkt; - pkt.kind = VPX_CODEC_STATS_PKT; - pkt.data.twopass_stats.buf = stats; - pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS); - vpx_codec_pkt_list_add(pktlist, &pkt); - +static void output_stats(FIRSTPASS_STATS *stats) { + (void)stats; // TEMP debug code #if OUTPUT_FPF { @@ -105,7 +95,7 @@ static void output_stats(FIRSTPASS_STATS *stats, fprintf(fpfile, "%12.0lf %12.4lf %12.2lf %12.2lf %12.2lf %12.0lf %12.4lf %12.4lf" "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf" - "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.0lf %12.0lf %12.0lf" + "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.0lf %12.4lf %12.0lf" "%12.4lf" "\n", stats->frame, stats->weight, stats->intra_error, stats->coded_error, @@ -224,14 +214,14 @@ static void subtract_stats(FIRSTPASS_STATS *section, // bars and partially discounts other 0 energy areas. #define MIN_ACTIVE_AREA 0.5 #define MAX_ACTIVE_AREA 1.0 -static double calculate_active_area(const VP9_COMP *cpi, +static double calculate_active_area(const FRAME_INFO *frame_info, const FIRSTPASS_STATS *this_frame) { double active_pct; active_pct = 1.0 - ((this_frame->intra_skip_pct / 2) + - ((this_frame->inactive_zone_rows * 2) / (double)cpi->common.mb_rows)); + ((this_frame->inactive_zone_rows * 2) / (double)frame_info->mb_rows)); return fclamp(active_pct, MIN_ACTIVE_AREA, MAX_ACTIVE_AREA); } @@ -264,17 +254,16 @@ static double calculate_mod_frame_score(const VP9_COMP *cpi, // remaining active MBs. The correction here assumes that coding // 0.5N blocks of complexity 2X is a little easier than coding N // blocks of complexity X. - modified_score *= - pow(calculate_active_area(cpi, this_frame), ACT_AREA_CORRECTION); + modified_score *= pow(calculate_active_area(&cpi->frame_info, this_frame), + ACT_AREA_CORRECTION); return modified_score; } -static double calculate_norm_frame_score(const VP9_COMP *cpi, - const TWO_PASS *twopass, - const VP9EncoderConfig *oxcf, - const FIRSTPASS_STATS *this_frame, - const double av_err) { +static double calc_norm_frame_score(const VP9EncoderConfig *oxcf, + const FRAME_INFO *frame_info, + const FIRSTPASS_STATS *this_frame, + double mean_mod_score, double av_err) { double modified_score = av_err * pow(this_frame->coded_error * this_frame->weight / DOUBLE_DIVIDE_CHECK(av_err), @@ -289,14 +278,22 @@ static double calculate_norm_frame_score(const VP9_COMP *cpi, // 0.5N blocks of complexity 2X is a little easier than coding N // blocks of complexity X. modified_score *= - pow(calculate_active_area(cpi, this_frame), ACT_AREA_CORRECTION); + pow(calculate_active_area(frame_info, this_frame), ACT_AREA_CORRECTION); // Normalize to a midpoint score. - modified_score /= DOUBLE_DIVIDE_CHECK(twopass->mean_mod_score); - + modified_score /= DOUBLE_DIVIDE_CHECK(mean_mod_score); return fclamp(modified_score, min_score, max_score); } +static double calculate_norm_frame_score(const VP9_COMP *cpi, + const TWO_PASS *twopass, + const VP9EncoderConfig *oxcf, + const FIRSTPASS_STATS *this_frame, + const double av_err) { + return calc_norm_frame_score(oxcf, &cpi->frame_info, this_frame, + twopass->mean_mod_score, av_err); +} + // This function returns the maximum target rate per frame. static int frame_max_bits(const RATE_CONTROL *rc, const VP9EncoderConfig *oxcf) { @@ -316,16 +313,8 @@ void vp9_init_first_pass(VP9_COMP *cpi) { } void vp9_end_first_pass(VP9_COMP *cpi) { - if (is_two_pass_svc(cpi)) { - int i; - for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { - output_stats(&cpi->svc.layer_context[i].twopass.total_stats, - cpi->output_pkt_list); - } - } else { - output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list); - } - + output_stats(&cpi->twopass.total_stats); + cpi->twopass.first_pass_done = 1; vpx_free(cpi->twopass.fp_mb_float_stats); cpi->twopass.fp_mb_float_stats = NULL; } @@ -503,11 +492,10 @@ static int scale_sse_threshold(VP9_COMMON *cm, int thresh) { switch (cm->bit_depth) { case VPX_BITS_8: ret_val = thresh; break; case VPX_BITS_10: ret_val = thresh << 4; break; - case VPX_BITS_12: ret_val = thresh << 8; break; default: - assert(0 && - "cm->bit_depth should be VPX_BITS_8, " - "VPX_BITS_10 or VPX_BITS_12"); + assert(cm->bit_depth == VPX_BITS_12); + ret_val = thresh << 8; + break; } } #else @@ -529,11 +517,10 @@ static int get_ul_intra_threshold(VP9_COMMON *cm) { switch (cm->bit_depth) { case VPX_BITS_8: ret_val = UL_INTRA_THRESH; break; case VPX_BITS_10: ret_val = UL_INTRA_THRESH << 2; break; - case VPX_BITS_12: ret_val = UL_INTRA_THRESH << 4; break; default: - assert(0 && - "cm->bit_depth should be VPX_BITS_8, " - "VPX_BITS_10 or VPX_BITS_12"); + assert(cm->bit_depth == VPX_BITS_12); + ret_val = UL_INTRA_THRESH << 4; + break; } } #else @@ -550,11 +537,10 @@ static int get_smooth_intra_threshold(VP9_COMMON *cm) { switch (cm->bit_depth) { case VPX_BITS_8: ret_val = SMOOTH_INTRA_THRESH; break; case VPX_BITS_10: ret_val = SMOOTH_INTRA_THRESH << 4; break; - case VPX_BITS_12: ret_val = SMOOTH_INTRA_THRESH << 8; break; default: - assert(0 && - "cm->bit_depth should be VPX_BITS_8, " - "VPX_BITS_10 or VPX_BITS_12"); + assert(cm->bit_depth == VPX_BITS_12); + ret_val = SMOOTH_INTRA_THRESH << 8; + break; } } #else @@ -564,7 +550,7 @@ static int get_smooth_intra_threshold(VP9_COMMON *cm) { } #define FP_DN_THRESH 8 -#define FP_MAX_DN_THRESH 16 +#define FP_MAX_DN_THRESH 24 #define KERNEL_SIZE 3 // Baseline Kernal weights for first pass noise metric @@ -731,9 +717,8 @@ static void first_pass_stat_calc(VP9_COMP *cpi, FIRSTPASS_STATS *fps, // Exclude any image dead zone if (fp_acc_data->image_data_start_row > 0) { fp_acc_data->intra_skip_count = - VPXMAX(0, - fp_acc_data->intra_skip_count - - (fp_acc_data->image_data_start_row * cm->mb_cols * 2)); + VPXMAX(0, fp_acc_data->intra_skip_count - + (fp_acc_data->image_data_start_row * cm->mb_cols * 2)); } fp_acc_data->intra_factor = fp_acc_data->intra_factor / (double)num_mbs; @@ -825,6 +810,8 @@ static void accumulate_fp_mb_row_stat(TileDataEnc *this_tile, fp_acc_data->image_data_start_row); } +#define NZ_MOTION_PENALTY 128 +#define INTRA_MODE_PENALTY 1024 void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, FIRSTPASS_DATA *fp_acc_data, TileDataEnc *tile_data, MV *best_ref_mv, @@ -834,6 +821,8 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; TileInfo tile = tile_data->tile_info; + const int mb_col_start = ROUND_POWER_OF_TWO(tile.mi_col_start, 1); + const int mb_col_end = ROUND_POWER_OF_TWO(tile.mi_col_end, 1); struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; const PICK_MODE_CONTEXT *ctx = &td->pc_root->none; @@ -850,40 +839,19 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm); const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; - LAYER_CONTEXT *const lc = - is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id] - : NULL; MODE_INFO mi_above, mi_left; double mb_intra_factor; double mb_brightness_factor; double mb_neutral_count; + int scaled_low_intra_thresh = scale_sse_threshold(cm, LOW_I_THRESH); // First pass code requires valid last and new frame buffers. assert(new_yv12 != NULL); - assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL)); + assert(frame_is_intra_only(cm) || (lst_yv12 != NULL)); - if (lc != NULL) { - // Use either last frame or alt frame for motion search. - if (cpi->ref_frame_flags & VP9_LAST_FLAG) { - first_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME); - if (first_ref_buf == NULL) - first_ref_buf = get_ref_frame_buffer(cpi, LAST_FRAME); - } - - if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { - gld_yv12 = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME); - if (gld_yv12 == NULL) { - gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); - } - } else { - gld_yv12 = NULL; - } - } - - xd->mi = cm->mi_grid_visible + xd->mi_stride * (mb_row << 1) + - (tile.mi_col_start >> 1); - xd->mi[0] = cm->mi + xd->mi_stride * (mb_row << 1) + (tile.mi_col_start >> 1); + xd->mi = cm->mi_grid_visible + xd->mi_stride * (mb_row << 1) + mb_col_start; + xd->mi[0] = cm->mi + xd->mi_stride * (mb_row << 1) + mb_col_start; for (i = 0; i < MAX_MB_PLANE; ++i) { p[i].coeff = ctx->coeff_pbuf[i][1]; @@ -897,10 +865,9 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height); // Reset above block coeffs. - recon_yoffset = - (mb_row * recon_y_stride * 16) + (tile.mi_col_start >> 1) * 16; - recon_uvoffset = (mb_row * recon_uv_stride * uv_mb_height) + - (tile.mi_col_start >> 1) * uv_mb_height; + recon_yoffset = (mb_row * recon_y_stride * 16) + mb_col_start * 16; + recon_uvoffset = + (mb_row * recon_uv_stride * uv_mb_height) + mb_col_start * uv_mb_height; // Set up limit values for motion vectors to prevent them extending // outside the UMV borders. @@ -908,8 +875,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, x->mv_limits.row_max = ((cm->mb_rows - 1 - mb_row) * 16) + BORDER_MV_PIXELS_B16; - for (mb_col = tile.mi_col_start >> 1, c = 0; mb_col < (tile.mi_col_end >> 1); - ++mb_col, c++) { + for (mb_col = mb_col_start, c = 0; mb_col < mb_col_end; ++mb_col, c++) { int this_error; int this_intra_error; const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); @@ -955,7 +921,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, x->skip_encode = 0; x->fp_src_pred = 0; // Do intra prediction based on source pixels for tile boundaries - if ((mb_col == (tile.mi_col_start >> 1)) && mb_col != 0) { + if (mb_col == mb_col_start && mb_col != 0) { xd->left_mi = &mi_left; x->fp_src_pred = 1; } @@ -1002,12 +968,10 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, switch (cm->bit_depth) { case VPX_BITS_8: break; case VPX_BITS_10: this_error >>= 4; break; - case VPX_BITS_12: this_error >>= 8; break; default: - assert(0 && - "cm->bit_depth should be VPX_BITS_8, " - "VPX_BITS_10 or VPX_BITS_12"); - return; + assert(cm->bit_depth == VPX_BITS_12); + this_error >>= 8; + break; } } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -1073,30 +1037,34 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16; // Other than for the first frame do a motion search. - if ((lc == NULL && cm->current_video_frame > 0) || - (lc != NULL && lc->current_video_frame_in_layer > 0)) { - int tmp_err, motion_error, raw_motion_error; + if (cm->current_video_frame > 0) { + int tmp_err, motion_error, this_motion_error, raw_motion_error; // Assume 0,0 motion with no mv overhead. MV mv = { 0, 0 }, tmp_mv = { 0, 0 }; struct buf_2d unscaled_last_source_buf_2d; + vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize]; xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { motion_error = highbd_get_prediction_error( bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd); + this_motion_error = highbd_get_prediction_error( + bsize, &x->plane[0].src, &xd->plane[0].pre[0], 8); } else { motion_error = get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]); + this_motion_error = motion_error; } #else motion_error = get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]); + this_motion_error = motion_error; #endif // CONFIG_VP9_HIGHBITDEPTH // Compute the motion error of the 0,0 motion using the last source // frame as the reference. Skip the further motion search on - // reconstructed frame if this error is small. + // reconstructed frame if this error is very small. unscaled_last_source_buf_2d.buf = cpi->unscaled_last_source->y_buffer + recon_yoffset; unscaled_last_source_buf_2d.stride = cpi->unscaled_last_source->y_stride; @@ -1113,12 +1081,20 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, &unscaled_last_source_buf_2d); #endif // CONFIG_VP9_HIGHBITDEPTH - // TODO(pengchong): Replace the hard-coded threshold - if (raw_motion_error > 25 || lc != NULL) { + if (raw_motion_error > NZ_MOTION_PENALTY) { // Test last reference frame using the previous best mv as the // starting point (best reference) for the search. first_pass_motion_search(cpi, x, best_ref_mv, &mv, &motion_error); + v_fn_ptr.vf = get_block_variance_fn(bsize); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, 8); + } +#endif // CONFIG_VP9_HIGHBITDEPTH + this_motion_error = + vp9_get_mvpred_var(x, &mv, best_ref_mv, &v_fn_ptr, 0); + // If the current best reference mv is not centered on 0,0 then do a // 0,0 based search as well. if (!is_zero_mv(best_ref_mv)) { @@ -1128,13 +1104,13 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, if (tmp_err < motion_error) { motion_error = tmp_err; mv = tmp_mv; + this_motion_error = + vp9_get_mvpred_var(x, &tmp_mv, &zero_mv, &v_fn_ptr, 0); } } // Search in an older reference frame. - if (((lc == NULL && cm->current_video_frame > 1) || - (lc != NULL && lc->current_video_frame_in_layer > 1)) && - gld_yv12 != NULL) { + if ((cm->current_video_frame > 1) && gld_yv12 != NULL) { // Assume 0,0 motion with no mv overhead. int gf_motion_error; @@ -1280,7 +1256,6 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, } } #endif - // Does the row vector point inwards or outwards? if (mb_row < cm->mb_rows / 2) { if (mv.row > 0) @@ -1306,17 +1281,16 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, else if (mv.col < 0) --(fp_acc_data->sum_in_vectors); } - fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF; - } else if (this_intra_error < scale_sse_threshold(cm, LOW_I_THRESH)) { + } + if (this_intra_error < scaled_low_intra_thresh) { fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize); - } else { // 0,0 mv but high error + } else { fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF; } } else { // Intra < inter error - int scaled_low_intra_thresh = scale_sse_threshold(cm, LOW_I_THRESH); if (this_intra_error < scaled_low_intra_thresh) { fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize); - if (motion_error < scaled_low_intra_thresh) { + if (this_motion_error < scaled_low_intra_thresh) { fp_acc_data->intra_count_low += 1.0; } else { fp_acc_data->intra_count_high += 1.0; @@ -1335,7 +1309,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, recon_uvoffset += uv_mb_height; // Accumulate row level stats to the corresponding tile stats - if (cpi->row_mt && mb_col == (tile.mi_col_end >> 1) - 1) + if (cpi->row_mt && mb_col == mb_col_end - 1) accumulate_fp_mb_row_stat(tile_data, fp_acc_data); (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, mb_row, c, @@ -1372,9 +1346,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm); const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; - LAYER_CONTEXT *const lc = - is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id] - : NULL; BufferPool *const pool = cm->buffer_pool; FIRSTPASS_DATA fp_temp_data; @@ -1386,7 +1357,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { // First pass code requires valid last and new frame buffers. assert(new_yv12 != NULL); - assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL)); + assert(frame_is_intra_only(cm) || (lst_yv12 != NULL)); #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { @@ -1397,50 +1368,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { set_first_pass_params(cpi); vp9_set_quantizer(cm, find_fp_qindex(cm->bit_depth)); - if (lc != NULL) { - twopass = &lc->twopass; - - cpi->lst_fb_idx = cpi->svc.spatial_layer_id; - cpi->ref_frame_flags = VP9_LAST_FLAG; - - if (cpi->svc.number_spatial_layers + cpi->svc.spatial_layer_id < - REF_FRAMES) { - cpi->gld_fb_idx = - cpi->svc.number_spatial_layers + cpi->svc.spatial_layer_id; - cpi->ref_frame_flags |= VP9_GOLD_FLAG; - cpi->refresh_golden_frame = (lc->current_video_frame_in_layer == 0); - } else { - cpi->refresh_golden_frame = 0; - } - - if (lc->current_video_frame_in_layer == 0) cpi->ref_frame_flags = 0; - - vp9_scale_references(cpi); - - // Use either last frame or alt frame for motion search. - if (cpi->ref_frame_flags & VP9_LAST_FLAG) { - first_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME); - if (first_ref_buf == NULL) - first_ref_buf = get_ref_frame_buffer(cpi, LAST_FRAME); - } - - if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { - gld_yv12 = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME); - if (gld_yv12 == NULL) { - gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); - } - } else { - gld_yv12 = NULL; - } - - set_ref_ptrs(cm, xd, - (cpi->ref_frame_flags & VP9_LAST_FLAG) ? LAST_FRAME : NONE, - (cpi->ref_frame_flags & VP9_GOLD_FLAG) ? GOLDEN_FRAME : NONE); - - cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source, - &cpi->scaled_source, 0, EIGHTTAP, 0); - } - vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); vp9_setup_src_planes(x, cpi->Source, 0, 0); @@ -1496,7 +1423,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { // Don't want to do output stats with a stack variable! twopass->this_frame_stats = fps; - output_stats(&twopass->this_frame_stats, cpi->output_pkt_list); + output_stats(&twopass->this_frame_stats); accumulate_stats(&twopass->total_stats, &fps); #if CONFIG_FP_MB_STATS @@ -1524,18 +1451,13 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { vpx_extend_frame_borders(new_yv12); - if (lc != NULL) { - vp9_update_reference_frames(cpi); - } else { - // The frame we just compressed now becomes the last frame. - ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], - cm->new_fb_idx); - } + // The frame we just compressed now becomes the last frame. + ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], + cm->new_fb_idx); // Special case for the first frame. Copy into the GF buffer as a second // reference. - if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX && - lc == NULL) { + if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->ref_frame_map[cpi->lst_fb_idx]); } @@ -1560,9 +1482,9 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { if (cpi->use_svc) vp9_inc_frame_in_layer(cpi); } -static const double q_pow_term[(QINDEX_RANGE >> 5) + 1] = { - 0.65, 0.70, 0.75, 0.85, 0.90, 0.90, 0.90, 1.00, 1.25 -}; +static const double q_pow_term[(QINDEX_RANGE >> 5) + 1] = { 0.65, 0.70, 0.75, + 0.85, 0.90, 0.90, + 0.90, 1.00, 1.25 }; static double calc_correction_factor(double err_per_mb, double err_divisor, int q) { @@ -1583,7 +1505,26 @@ static double calc_correction_factor(double err_per_mb, double err_divisor, return fclamp(pow(error_term, power_term), 0.05, 5.0); } -#define ERR_DIVISOR 115.0 +static double wq_err_divisor(VP9_COMP *cpi) { + const VP9_COMMON *const cm = &cpi->common; + unsigned int screen_area = (cm->width * cm->height); + + // Use a different error per mb factor for calculating boost for + // different formats. + if (screen_area <= 640 * 360) { + return 115.0; + } else if (screen_area < 1280 * 720) { + return 125.0; + } else if (screen_area <= 1920 * 1080) { + return 130.0; + } else if (screen_area < 3840 * 2160) { + return 150.0; + } + + // Fall through to here only for 4K and above. + return 200.0; +} + #define NOISE_FACTOR_MIN 0.9 #define NOISE_FACTOR_MAX 1.1 static int get_twopass_worst_quality(VP9_COMP *cpi, const double section_err, @@ -1643,7 +1584,7 @@ static int get_twopass_worst_quality(VP9_COMP *cpi, const double section_err, // content at the given rate. for (q = rc->best_quality; q < rc->worst_quality; ++q) { const double factor = - calc_correction_factor(av_err_per_mb, ERR_DIVISOR, q); + calc_correction_factor(av_err_per_mb, wq_err_divisor(cpi), q); const int bits_per_mb = vp9_rc_bits_per_mb( INTER_FRAME, q, factor * speed_term * cpi->twopass.bpm_factor * noise_factor, @@ -1690,14 +1631,9 @@ void calculate_coded_size(VP9_COMP *cpi, int *scaled_frame_width, } void vp9_init_second_pass(VP9_COMP *cpi) { - SVC *const svc = &cpi->svc; VP9EncoderConfig *const oxcf = &cpi->oxcf; - const int is_two_pass_svc = - (svc->number_spatial_layers > 1) || (svc->number_temporal_layers > 1); RATE_CONTROL *const rc = &cpi->rc; - TWO_PASS *const twopass = - is_two_pass_svc ? &svc->layer_context[svc->spatial_layer_id].twopass - : &cpi->twopass; + TWO_PASS *const twopass = &cpi->twopass; double frame_rate; FIRSTPASS_STATS *stats; @@ -1774,18 +1710,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) { // encoded in the second pass is a guess. However, the sum duration is not. // It is calculated based on the actual durations of all frames from the // first pass. - - if (is_two_pass_svc) { - vp9_update_spatial_layer_framerate(cpi, frame_rate); - twopass->bits_left = - (int64_t)(stats->duration * - svc->layer_context[svc->spatial_layer_id].target_bandwidth / - 10000000.0); - } else { - vp9_new_framerate(cpi, frame_rate); - twopass->bits_left = - (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0); - } + vp9_new_framerate(cpi, frame_rate); + twopass->bits_left = + (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0); // This variable monitors how far behind the second ref update is lagging. twopass->sr_update_lag = 1; @@ -1822,15 +1749,16 @@ void vp9_init_second_pass(VP9_COMP *cpi) { #define LOW_CODED_ERR_PER_MB 10.0 #define NCOUNT_FRAME_II_THRESH 6.0 -static double get_sr_decay_rate(const VP9_COMP *cpi, +static double get_sr_decay_rate(const FRAME_INFO *frame_info, const FIRSTPASS_STATS *frame) { double sr_diff = (frame->sr_coded_error - frame->coded_error); double sr_decay = 1.0; double modified_pct_inter; double modified_pcnt_intra; const double motion_amplitude_part = - frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / - (cpi->initial_height + cpi->initial_width)); + frame->pcnt_motion * + ((frame->mvc_abs + frame->mvr_abs) / + (frame_info->frame_height + frame_info->frame_width)); modified_pct_inter = frame->pcnt_inter; if ((frame->coded_error > LOW_CODED_ERR_PER_MB) && @@ -1851,72 +1779,73 @@ static double get_sr_decay_rate(const VP9_COMP *cpi, // This function gives an estimate of how badly we believe the prediction // quality is decaying from frame to frame. -static double get_zero_motion_factor(const VP9_COMP *cpi, - const FIRSTPASS_STATS *frame) { - const double zero_motion_pct = frame->pcnt_inter - frame->pcnt_motion; - double sr_decay = get_sr_decay_rate(cpi, frame); +static double get_zero_motion_factor(const FRAME_INFO *frame_info, + const FIRSTPASS_STATS *frame_stats) { + const double zero_motion_pct = + frame_stats->pcnt_inter - frame_stats->pcnt_motion; + double sr_decay = get_sr_decay_rate(frame_info, frame_stats); return VPXMIN(sr_decay, zero_motion_pct); } #define ZM_POWER_FACTOR 0.75 -static double get_prediction_decay_rate(const VP9_COMP *cpi, - const FIRSTPASS_STATS *next_frame) { - const double sr_decay_rate = get_sr_decay_rate(cpi, next_frame); +static double get_prediction_decay_rate(const FRAME_INFO *frame_info, + const FIRSTPASS_STATS *frame_stats) { + const double sr_decay_rate = get_sr_decay_rate(frame_info, frame_stats); const double zero_motion_factor = - (0.95 * pow((next_frame->pcnt_inter - next_frame->pcnt_motion), + (0.95 * pow((frame_stats->pcnt_inter - frame_stats->pcnt_motion), ZM_POWER_FACTOR)); return VPXMAX(zero_motion_factor, (sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor))); } +static int get_show_idx(const TWO_PASS *twopass) { + return (int)(twopass->stats_in - twopass->stats_in_start); +} // Function to test for a condition where a complex transition is followed // by a static section. For example in slide shows where there is a fade // between slides. This is to help with more optimal kf and gf positioning. -static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval, - int still_interval, - double loop_decay_rate, - double last_decay_rate) { - TWO_PASS *const twopass = &cpi->twopass; - RATE_CONTROL *const rc = &cpi->rc; - - // Break clause to detect very still sections after motion - // For example a static image after a fade or other transition - // instead of a clean scene cut. - if (frame_interval > rc->min_gf_interval && loop_decay_rate >= 0.999 && - last_decay_rate < 0.9) { - int j; - - // Look ahead a few frames to see if static condition persists... - for (j = 0; j < still_interval; ++j) { - const FIRSTPASS_STATS *stats = &twopass->stats_in[j]; - if (stats >= twopass->stats_in_end) break; - - if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break; - } - - // Only if it does do we signal a transition to still. - return j == still_interval; +static int check_transition_to_still(const FIRST_PASS_INFO *first_pass_info, + int show_idx, int still_interval) { + int j; + int num_frames = fps_get_num_frames(first_pass_info); + if (show_idx + still_interval > num_frames) { + return 0; } - return 0; + // Look ahead a few frames to see if static condition persists... + for (j = 0; j < still_interval; ++j) { + const FIRSTPASS_STATS *stats = + fps_get_frame_stats(first_pass_info, show_idx + j); + if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break; + } + + // Only if it does do we signal a transition to still. + return j == still_interval; } // This function detects a flash through the high relative pcnt_second_ref // score in the frame following a flash frame. The offset passed in should // reflect this. -static int detect_flash(const TWO_PASS *twopass, int offset) { - const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset); - +static int detect_flash_from_frame_stats(const FIRSTPASS_STATS *frame_stats) { // What we are looking for here is a situation where there is a // brief break in prediction (such as a flash) but subsequent frames // are reasonably well predicted by an earlier (pre flash) frame. // The recovery after a flash is indicated by a high pcnt_second_ref - // compared to pcnt_inter. - return next_frame != NULL && - next_frame->pcnt_second_ref > next_frame->pcnt_inter && - next_frame->pcnt_second_ref >= 0.5; + // useage or a second ref coded error notabley lower than the last + // frame coded error. + if (frame_stats == NULL) { + return 0; + } + return (frame_stats->sr_coded_error < frame_stats->coded_error) || + ((frame_stats->pcnt_second_ref > frame_stats->pcnt_inter) && + (frame_stats->pcnt_second_ref >= 0.5)); +} + +static int detect_flash(const TWO_PASS *twopass, int offset) { + const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset); + return detect_flash_from_frame_stats(next_frame); } // Update the motion related elements to the GF arf boost calculation. @@ -1949,13 +1878,15 @@ static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats, #define BASELINE_ERR_PER_MB 12500.0 #define GF_MAX_BOOST 96.0 -static double calc_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame, +static double calc_frame_boost(const FRAME_INFO *frame_info, + const FIRSTPASS_STATS *this_frame, + int avg_frame_qindex, double this_frame_mv_in_out) { double frame_boost; - const double lq = vp9_convert_qindex_to_q( - cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth); + const double lq = + vp9_convert_qindex_to_q(avg_frame_qindex, frame_info->bit_depth); const double boost_q_correction = VPXMIN((0.5 + (lq * 0.015)), 1.5); - const double active_area = calculate_active_area(cpi, this_frame); + const double active_area = calculate_active_area(frame_info, this_frame); // Underlying boost factor is based on inter error ratio. frame_boost = (BASELINE_ERR_PER_MB * active_area) / @@ -1971,7 +1902,20 @@ static double calc_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame, return VPXMIN(frame_boost, GF_MAX_BOOST * boost_q_correction); } -#define KF_BASELINE_ERR_PER_MB 12500.0 +static double kf_err_per_mb(VP9_COMP *cpi) { + const VP9_COMMON *const cm = &cpi->common; + unsigned int screen_area = (cm->width * cm->height); + + // Use a different error per mb factor for calculating boost for + // different formats. + if (screen_area < 1280 * 720) { + return 2000.0; + } else if (screen_area < 1920 * 1080) { + return 500.0; + } + return 250.0; +} + static double calc_kf_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame, double *sr_accumulator, @@ -1981,10 +1925,11 @@ static double calc_kf_frame_boost(VP9_COMP *cpi, const double lq = vp9_convert_qindex_to_q( cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth); const double boost_q_correction = VPXMIN((0.50 + (lq * 0.015)), 2.00); - const double active_area = calculate_active_area(cpi, this_frame); + const double active_area = + calculate_active_area(&cpi->frame_info, this_frame); // Underlying boost factor is based on inter error ratio. - frame_boost = (KF_BASELINE_ERR_PER_MB * active_area) / + frame_boost = (kf_err_per_mb(cpi) * active_area) / DOUBLE_DIVIDE_CHECK(this_frame->coded_error + *sr_accumulator); // Update the accumulator for second ref error difference. @@ -1997,14 +1942,19 @@ static double calc_kf_frame_boost(VP9_COMP *cpi, if (this_frame_mv_in_out > 0.0) frame_boost += frame_boost * (this_frame_mv_in_out * 2.0); - // Q correction and scalling - frame_boost = frame_boost * boost_q_correction; + // Q correction and scaling + // The 40.0 value here is an experimentally derived baseline minimum. + // This value is in line with the minimum per frame boost in the alt_ref + // boost calculation. + frame_boost = ((frame_boost + 40.0) * boost_q_correction); return VPXMIN(frame_boost, max_boost * boost_q_correction); } -static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { - TWO_PASS *const twopass = &cpi->twopass; +static int compute_arf_boost(const FRAME_INFO *frame_info, + const FIRST_PASS_INFO *first_pass_info, + int arf_show_idx, int f_frames, int b_frames, + int avg_frame_qindex) { int i; double boost_score = 0.0; double mv_ratio_accumulator = 0.0; @@ -2017,7 +1967,10 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { // Search forward from the proposed arf/next gf position. for (i = 0; i < f_frames; ++i) { - const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i); + const FIRSTPASS_STATS *this_frame = + fps_get_frame_stats(first_pass_info, arf_show_idx + i); + const FIRSTPASS_STATS *next_frame = + fps_get_frame_stats(first_pass_info, arf_show_idx + i + 1); if (this_frame == NULL) break; // Update the motion related elements to the boost calculation. @@ -2027,17 +1980,19 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { // We want to discount the flash frame itself and the recovery // frame that follows as both will have poor scores. - flash_detected = detect_flash(twopass, i) || detect_flash(twopass, i + 1); + flash_detected = detect_flash_from_frame_stats(this_frame) || + detect_flash_from_frame_stats(next_frame); // Accumulate the effect of prediction quality decay. if (!flash_detected) { - decay_accumulator *= get_prediction_decay_rate(cpi, this_frame); + decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } - boost_score += decay_accumulator * - calc_frame_boost(cpi, this_frame, this_frame_mv_in_out); + boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame, + avg_frame_qindex, + this_frame_mv_in_out); } arf_boost = (int)boost_score; @@ -2052,7 +2007,10 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { // Search backward towards last gf position. for (i = -1; i >= -b_frames; --i) { - const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i); + const FIRSTPASS_STATS *this_frame = + fps_get_frame_stats(first_pass_info, arf_show_idx + i); + const FIRSTPASS_STATS *next_frame = + fps_get_frame_stats(first_pass_info, arf_show_idx + i + 1); if (this_frame == NULL) break; // Update the motion related elements to the boost calculation. @@ -2062,17 +2020,19 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { // We want to discount the the flash frame itself and the recovery // frame that follows as both will have poor scores. - flash_detected = detect_flash(twopass, i) || detect_flash(twopass, i + 1); + flash_detected = detect_flash_from_frame_stats(this_frame) || + detect_flash_from_frame_stats(next_frame); // Cumulative effect of prediction quality decay. if (!flash_detected) { - decay_accumulator *= get_prediction_decay_rate(cpi, this_frame); + decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } - boost_score += decay_accumulator * - calc_frame_boost(cpi, this_frame, this_frame_mv_in_out); + boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame, + avg_frame_qindex, + this_frame_mv_in_out); } arf_boost += (int)boost_score; @@ -2083,6 +2043,15 @@ static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { return arf_boost; } +static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { + const FRAME_INFO *frame_info = &cpi->frame_info; + TWO_PASS *const twopass = &cpi->twopass; + const int avg_inter_frame_qindex = cpi->rc.avg_frame_qindex[INTER_FRAME]; + int arf_show_idx = get_show_idx(twopass); + return compute_arf_boost(frame_info, &twopass->first_pass_info, arf_show_idx, + f_frames, b_frames, avg_inter_frame_qindex); +} + // Calculate a section intra ratio used in setting max loop filter. static int calculate_section_intra_ratio(const FIRSTPASS_STATS *begin, const FIRSTPASS_STATS *end, @@ -2105,15 +2074,31 @@ static int calculate_section_intra_ratio(const FIRSTPASS_STATS *begin, // Calculate the total bits to allocate in this GF/ARF group. static int64_t calculate_total_gf_group_bits(VP9_COMP *cpi, double gf_group_err) { + VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; const TWO_PASS *const twopass = &cpi->twopass; const int max_bits = frame_max_bits(rc, &cpi->oxcf); int64_t total_group_bits; + const int is_key_frame = frame_is_intra_only(cm); + const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active; + int gop_frames = + rc->baseline_gf_interval + rc->source_alt_ref_pending - arf_active_or_kf; // Calculate the bits to be allocated to the group as a whole. if ((twopass->kf_group_bits > 0) && (twopass->kf_group_error_left > 0.0)) { + int key_frame_interval = rc->frames_since_key + rc->frames_to_key; + int distance_from_next_key_frame = + rc->frames_to_key - + (rc->baseline_gf_interval + rc->source_alt_ref_pending); + int max_gf_bits_bias = rc->avg_frame_bandwidth; + double gf_interval_bias_bits_normalize_factor = + (double)rc->baseline_gf_interval / 16; total_group_bits = (int64_t)(twopass->kf_group_bits * (gf_group_err / twopass->kf_group_error_left)); + // TODO(ravi): Experiment with different values of max_gf_bits_bias + total_group_bits += + (int64_t)((double)distance_from_next_key_frame / key_frame_interval * + max_gf_bits_bias * gf_interval_bias_bits_normalize_factor); } else { total_group_bits = 0; } @@ -2126,8 +2111,8 @@ static int64_t calculate_total_gf_group_bits(VP9_COMP *cpi, : total_group_bits; // Clip based on user supplied data rate variability limit. - if (total_group_bits > (int64_t)max_bits * rc->baseline_gf_interval) - total_group_bits = (int64_t)max_bits * rc->baseline_gf_interval; + if (total_group_bits > (int64_t)max_bits * gop_frames) + total_group_bits = (int64_t)max_bits * gop_frames; return total_group_bits; } @@ -2140,7 +2125,7 @@ static int calculate_boost_bits(int frame_count, int boost, // return 0 for invalid inputs (could arise e.g. through rounding errors) if (!boost || (total_group_bits <= 0) || (frame_count < 0)) return 0; - allocation_chunks = (frame_count * 100) + boost; + allocation_chunks = (frame_count * NORMAL_BOOST) + boost; // Prevent overflow. if (boost > 1023) { @@ -2154,18 +2139,6 @@ static int calculate_boost_bits(int frame_count, int boost, 0); } -// Current limit on maximum number of active arfs in a GF/ARF group. -#define MAX_ACTIVE_ARFS 2 -#define ARF_SLOT1 2 -#define ARF_SLOT2 3 -// This function indirects the choice of buffers for arfs. -// At the moment the values are fixed but this may change as part of -// the integration process with other codec features that swap buffers around. -static void get_arf_buffer_indices(unsigned char *arf_buffer_indices) { - arf_buffer_indices[0] = ARF_SLOT1; - arf_buffer_indices[1] = ARF_SLOT2; -} - // Used in corpus vbr: Calculates the total normalized group complexity score // for a given number of frames starting at the current position in the stats // file. @@ -2185,11 +2158,129 @@ static double calculate_group_score(VP9_COMP *cpi, double av_score, ++s; ++i; } - assert(i == frame_count); return score_total; } +static void find_arf_order(VP9_COMP *cpi, GF_GROUP *gf_group, + int *index_counter, int depth, int start, int end) { + TWO_PASS *twopass = &cpi->twopass; + const FIRSTPASS_STATS *const start_pos = twopass->stats_in; + FIRSTPASS_STATS fpf_frame; + const int mid = (start + end + 1) >> 1; + const int min_frame_interval = 2; + int idx; + + // Process regular P frames + if ((end - start < min_frame_interval) || + (depth > gf_group->allowed_max_layer_depth)) { + for (idx = start; idx <= end; ++idx) { + gf_group->update_type[*index_counter] = LF_UPDATE; + gf_group->arf_src_offset[*index_counter] = 0; + gf_group->frame_gop_index[*index_counter] = idx; + gf_group->rf_level[*index_counter] = INTER_NORMAL; + gf_group->layer_depth[*index_counter] = depth; + gf_group->gfu_boost[*index_counter] = NORMAL_BOOST; + ++(*index_counter); + } + gf_group->max_layer_depth = VPXMAX(gf_group->max_layer_depth, depth); + return; + } + + assert(abs(mid - start) >= 1 && abs(mid - end) >= 1); + + // Process ARF frame + gf_group->layer_depth[*index_counter] = depth; + gf_group->update_type[*index_counter] = ARF_UPDATE; + gf_group->arf_src_offset[*index_counter] = mid - start; + gf_group->frame_gop_index[*index_counter] = mid; + gf_group->rf_level[*index_counter] = GF_ARF_LOW; + + for (idx = 0; idx <= mid; ++idx) + if (EOF == input_stats(twopass, &fpf_frame)) break; + + gf_group->gfu_boost[*index_counter] = + VPXMAX(MIN_ARF_GF_BOOST, + calc_arf_boost(cpi, end - mid + 1, mid - start) >> depth); + + reset_fpf_position(twopass, start_pos); + + ++(*index_counter); + + find_arf_order(cpi, gf_group, index_counter, depth + 1, start, mid - 1); + + gf_group->update_type[*index_counter] = USE_BUF_FRAME; + gf_group->arf_src_offset[*index_counter] = 0; + gf_group->frame_gop_index[*index_counter] = mid; + gf_group->rf_level[*index_counter] = INTER_NORMAL; + gf_group->layer_depth[*index_counter] = depth; + ++(*index_counter); + + find_arf_order(cpi, gf_group, index_counter, depth + 1, mid + 1, end); +} + +static INLINE void set_gf_overlay_frame_type(GF_GROUP *gf_group, + int frame_index, + int source_alt_ref_active) { + if (source_alt_ref_active) { + gf_group->update_type[frame_index] = OVERLAY_UPDATE; + gf_group->rf_level[frame_index] = INTER_NORMAL; + gf_group->layer_depth[frame_index] = MAX_ARF_LAYERS - 1; + gf_group->gfu_boost[frame_index] = NORMAL_BOOST; + } else { + gf_group->update_type[frame_index] = GF_UPDATE; + gf_group->rf_level[frame_index] = GF_ARF_STD; + gf_group->layer_depth[frame_index] = 0; + } +} + +static void define_gf_group_structure(VP9_COMP *cpi) { + RATE_CONTROL *const rc = &cpi->rc; + TWO_PASS *const twopass = &cpi->twopass; + GF_GROUP *const gf_group = &twopass->gf_group; + int frame_index = 0; + int key_frame = cpi->common.frame_type == KEY_FRAME; + int layer_depth = 1; + int gop_frames = + rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending); + + gf_group->frame_start = cpi->common.current_video_frame; + gf_group->frame_end = gf_group->frame_start + rc->baseline_gf_interval; + gf_group->max_layer_depth = 0; + gf_group->allowed_max_layer_depth = 0; + + // For key frames the frame target rate is already set and it + // is also the golden frame. + // === [frame_index == 0] === + if (!key_frame) + set_gf_overlay_frame_type(gf_group, frame_index, rc->source_alt_ref_active); + + ++frame_index; + + // === [frame_index == 1] === + if (rc->source_alt_ref_pending) { + gf_group->update_type[frame_index] = ARF_UPDATE; + gf_group->rf_level[frame_index] = GF_ARF_STD; + gf_group->layer_depth[frame_index] = layer_depth; + gf_group->arf_src_offset[frame_index] = + (unsigned char)(rc->baseline_gf_interval - 1); + gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval; + gf_group->max_layer_depth = 1; + ++frame_index; + ++layer_depth; + gf_group->allowed_max_layer_depth = cpi->oxcf.enable_auto_arf; + } + + find_arf_order(cpi, gf_group, &frame_index, layer_depth, 1, gop_frames); + + set_gf_overlay_frame_type(gf_group, frame_index, rc->source_alt_ref_pending); + gf_group->arf_src_offset[frame_index] = 0; + gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval; + + // Set the frame ops number. + gf_group->gf_group_size = frame_index; +} + static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, int gf_arf_bits) { VP9EncoderConfig *const oxcf = &cpi->oxcf; @@ -2198,17 +2289,12 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, GF_GROUP *const gf_group = &twopass->gf_group; FIRSTPASS_STATS frame_stats; int i; - int frame_index = 1; + int frame_index = 0; int target_frame_size; int key_frame; const int max_bits = frame_max_bits(&cpi->rc, oxcf); int64_t total_group_bits = gf_group_bits; - int mid_boost_bits = 0; int mid_frame_idx; - unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS]; - int alt_frame_index = frame_index; - int has_temporal_layers = - is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1; int normal_frames; int normal_frame_bits; int last_frame_reduction = 0; @@ -2216,81 +2302,97 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, double tot_norm_frame_score = 1.0; double this_frame_score = 1.0; - // Only encode alt reference frame in temporal base layer. - if (has_temporal_layers) alt_frame_index = cpi->svc.number_temporal_layers; + // Define the GF structure and specify + int gop_frames = gf_group->gf_group_size; - key_frame = - cpi->common.frame_type == KEY_FRAME || vp9_is_upper_layer_key_frame(cpi); - - get_arf_buffer_indices(arf_buffer_indices); + key_frame = cpi->common.frame_type == KEY_FRAME; // For key frames the frame target rate is already set and it // is also the golden frame. + // === [frame_index == 0] === if (!key_frame) { - if (rc->source_alt_ref_active) { - gf_group->update_type[0] = OVERLAY_UPDATE; - gf_group->rf_level[0] = INTER_NORMAL; - gf_group->bit_allocation[0] = 0; - } else { - gf_group->update_type[0] = GF_UPDATE; - gf_group->rf_level[0] = GF_ARF_STD; - gf_group->bit_allocation[0] = gf_arf_bits; - } - gf_group->arf_update_idx[0] = arf_buffer_indices[0]; - gf_group->arf_ref_idx[0] = arf_buffer_indices[0]; - - // Step over the golden frame / overlay frame - if (EOF == input_stats(twopass, &frame_stats)) return; + gf_group->bit_allocation[frame_index] = + rc->source_alt_ref_active ? 0 : gf_arf_bits; } // Deduct the boost bits for arf (or gf if it is not a key frame) // from the group total. if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits; + ++frame_index; + + // === [frame_index == 1] === // Store the bits to spend on the ARF if there is one. if (rc->source_alt_ref_pending) { - gf_group->update_type[alt_frame_index] = ARF_UPDATE; - gf_group->rf_level[alt_frame_index] = GF_ARF_STD; - gf_group->bit_allocation[alt_frame_index] = gf_arf_bits; + gf_group->bit_allocation[frame_index] = gf_arf_bits; - if (has_temporal_layers) - gf_group->arf_src_offset[alt_frame_index] = - (unsigned char)(rc->baseline_gf_interval - - cpi->svc.number_temporal_layers); - else - gf_group->arf_src_offset[alt_frame_index] = - (unsigned char)(rc->baseline_gf_interval - 1); - - gf_group->arf_update_idx[alt_frame_index] = arf_buffer_indices[0]; - gf_group->arf_ref_idx[alt_frame_index] = - arf_buffer_indices[cpi->multi_arf_last_grp_enabled && - rc->source_alt_ref_active]; - if (!has_temporal_layers) ++frame_index; - - if (cpi->multi_arf_enabled) { - // Set aside a slot for a level 1 arf. - gf_group->update_type[frame_index] = ARF_UPDATE; - gf_group->rf_level[frame_index] = GF_ARF_LOW; - gf_group->arf_src_offset[frame_index] = - (unsigned char)((rc->baseline_gf_interval >> 1) - 1); - gf_group->arf_update_idx[frame_index] = arf_buffer_indices[1]; - gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0]; - ++frame_index; - } + ++frame_index; } - // Note index of the first normal inter frame int eh group (not gf kf arf) - gf_group->first_inter_index = frame_index; - // Define middle frame mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1; - normal_frames = (rc->baseline_gf_interval - rc->source_alt_ref_pending); + normal_frames = (rc->baseline_gf_interval - 1); if (normal_frames > 1) normal_frame_bits = (int)(total_group_bits / normal_frames); else normal_frame_bits = (int)total_group_bits; + gf_group->gfu_boost[1] = rc->gfu_boost; + + if (cpi->multi_layer_arf) { + int idx; + int arf_depth_bits[MAX_ARF_LAYERS] = { 0 }; + int arf_depth_count[MAX_ARF_LAYERS] = { 0 }; + int arf_depth_boost[MAX_ARF_LAYERS] = { 0 }; + int total_arfs = 1; // Account for the base layer ARF. + + for (idx = 0; idx < gop_frames; ++idx) { + if (gf_group->update_type[idx] == ARF_UPDATE) { + arf_depth_boost[gf_group->layer_depth[idx]] += gf_group->gfu_boost[idx]; + ++arf_depth_count[gf_group->layer_depth[idx]]; + } + } + + for (idx = 2; idx < MAX_ARF_LAYERS; ++idx) { + if (arf_depth_boost[idx] == 0) break; + arf_depth_bits[idx] = calculate_boost_bits( + rc->baseline_gf_interval - total_arfs - arf_depth_count[idx], + arf_depth_boost[idx], total_group_bits); + + total_group_bits -= arf_depth_bits[idx]; + total_arfs += arf_depth_count[idx]; + } + + // offset the base layer arf + normal_frames -= (total_arfs - 1); + if (normal_frames > 1) + normal_frame_bits = (int)(total_group_bits / normal_frames); + else + normal_frame_bits = (int)total_group_bits; + + target_frame_size = normal_frame_bits; + target_frame_size = + clamp(target_frame_size, 0, VPXMIN(max_bits, (int)total_group_bits)); + + // The first layer ARF has its bit allocation assigned. + for (idx = frame_index; idx < gop_frames; ++idx) { + switch (gf_group->update_type[idx]) { + case ARF_UPDATE: + gf_group->bit_allocation[idx] = + (int)(((int64_t)arf_depth_bits[gf_group->layer_depth[idx]] * + gf_group->gfu_boost[idx]) / + arf_depth_boost[gf_group->layer_depth[idx]]); + break; + case USE_BUF_FRAME: gf_group->bit_allocation[idx] = 0; break; + default: gf_group->bit_allocation[idx] = target_frame_size; break; + } + } + gf_group->bit_allocation[idx] = 0; + + return; + } + if (oxcf->vbr_corpus_complexity) { av_score = get_distribution_av_err(cpi, twopass); tot_norm_frame_score = calculate_group_score(cpi, av_score, normal_frames); @@ -2298,13 +2400,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, // Allocate bits to the other frames in the group. for (i = 0; i < normal_frames; ++i) { - int arf_idx = 0; if (EOF == input_stats(twopass, &frame_stats)) break; - - if (has_temporal_layers && frame_index == alt_frame_index) { - ++frame_index; - } - if (oxcf->vbr_corpus_complexity) { this_frame_score = calculate_norm_frame_score(cpi, twopass, oxcf, &frame_stats, av_score); @@ -2318,21 +2414,9 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, target_frame_size -= last_frame_reduction; } - if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) { - mid_boost_bits += (target_frame_size >> 4); - target_frame_size -= (target_frame_size >> 4); - - if (frame_index <= mid_frame_idx) arf_idx = 1; - } - gf_group->arf_update_idx[frame_index] = arf_buffer_indices[arf_idx]; - gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx]; - target_frame_size = clamp(target_frame_size, 0, VPXMIN(max_bits, (int)total_group_bits)); - gf_group->update_type[frame_index] = LF_UPDATE; - gf_group->rf_level[frame_index] = INTER_NORMAL; - gf_group->bit_allocation[frame_index] = target_frame_size; ++frame_index; } @@ -2344,27 +2428,6 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, // We need to configure the frame at the end of the sequence + 1 that will be // the start frame for the next group. Otherwise prior to the call to // vp9_rc_get_second_pass_params() the data will be undefined. - gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0]; - gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0]; - - if (rc->source_alt_ref_pending) { - gf_group->update_type[frame_index] = OVERLAY_UPDATE; - gf_group->rf_level[frame_index] = INTER_NORMAL; - - // Final setup for second arf and its overlay. - if (cpi->multi_arf_enabled) { - gf_group->bit_allocation[2] = - gf_group->bit_allocation[mid_frame_idx] + mid_boost_bits; - gf_group->update_type[mid_frame_idx] = OVERLAY_UPDATE; - gf_group->bit_allocation[mid_frame_idx] = 0; - } - } else { - gf_group->update_type[frame_index] = GF_UPDATE; - gf_group->rf_level[frame_index] = GF_ARF_STD; - } - - // Note whether multi-arf was enabled this group for next time. - cpi->multi_arf_last_grp_enabled = cpi->multi_arf_enabled; } // Adjusts the ARNF filter for a GF group. @@ -2376,23 +2439,223 @@ static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise, twopass->arnr_strength_adjustment = 0; - if ((section_zeromv < 0.10) || (section_noise <= (SECTION_NOISE_DEF * 0.75))) + if (section_noise < 150) { twopass->arnr_strength_adjustment -= 1; + if (section_noise < 75) twopass->arnr_strength_adjustment -= 1; + } else if (section_noise > 250) + twopass->arnr_strength_adjustment += 1; + if (section_zeromv > 0.50) twopass->arnr_strength_adjustment += 1; } // Analyse and define a gf/arf group. -#define ARF_DECAY_BREAKOUT 0.10 #define ARF_ABS_ZOOM_THRESH 4.0 -static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { +#define MAX_GF_BOOST 5400 + +typedef struct RANGE { + int min; + int max; +} RANGE; + +static int get_gop_coding_frame_num( + int *use_alt_ref, const FRAME_INFO *frame_info, + const FIRST_PASS_INFO *first_pass_info, const RATE_CONTROL *rc, + int gf_start_show_idx, const RANGE *active_gf_interval, + double gop_intra_factor, int lag_in_frames) { + double loop_decay_rate = 1.00; + double mv_ratio_accumulator = 0.0; + double this_frame_mv_in_out = 0.0; + double mv_in_out_accumulator = 0.0; + double abs_mv_in_out_accumulator = 0.0; + double sr_accumulator = 0.0; + // Motion breakout threshold for loop below depends on image size. + double mv_ratio_accumulator_thresh = + (frame_info->frame_height + frame_info->frame_width) / 4.0; + double zero_motion_accumulator = 1.0; + int gop_coding_frames; + + *use_alt_ref = 1; + gop_coding_frames = 0; + while (gop_coding_frames < rc->static_scene_max_gf_interval && + gop_coding_frames < rc->frames_to_key) { + const FIRSTPASS_STATS *next_next_frame; + const FIRSTPASS_STATS *next_frame; + int flash_detected; + ++gop_coding_frames; + + next_frame = fps_get_frame_stats(first_pass_info, + gf_start_show_idx + gop_coding_frames); + if (next_frame == NULL) { + break; + } + + // Test for the case where there is a brief flash but the prediction + // quality back to an earlier frame is then restored. + next_next_frame = fps_get_frame_stats( + first_pass_info, gf_start_show_idx + gop_coding_frames + 1); + flash_detected = detect_flash_from_frame_stats(next_next_frame); + + // Update the motion related elements to the boost calculation. + accumulate_frame_motion_stats( + next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, + &abs_mv_in_out_accumulator, &mv_ratio_accumulator); + + // Monitor for static sections. + if ((rc->frames_since_key + gop_coding_frames - 1) > 1) { + zero_motion_accumulator = + VPXMIN(zero_motion_accumulator, + get_zero_motion_factor(frame_info, next_frame)); + } + + // Accumulate the effect of prediction quality decay. + if (!flash_detected) { + double last_loop_decay_rate = loop_decay_rate; + loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame); + + // Break clause to detect very still sections after motion. For example, + // a static image after a fade or other transition. + if (gop_coding_frames > rc->min_gf_interval && loop_decay_rate >= 0.999 && + last_loop_decay_rate < 0.9) { + int still_interval = 5; + if (check_transition_to_still(first_pass_info, + gf_start_show_idx + gop_coding_frames, + still_interval)) { + *use_alt_ref = 0; + break; + } + } + + // Update the accumulator for second ref error difference. + // This is intended to give an indication of how much the coded error is + // increasing over time. + if (gop_coding_frames == 1) { + sr_accumulator += next_frame->coded_error; + } else { + sr_accumulator += + (next_frame->sr_coded_error - next_frame->coded_error); + } + } + + // Break out conditions. + // Break at maximum of active_gf_interval->max unless almost totally + // static. + // + // Note that the addition of a test of rc->source_alt_ref_active is + // deliberate. The effect of this is that after a normal altref group even + // if the material is static there will be one normal length GF group + // before allowing longer GF groups. The reason for this is that in cases + // such as slide shows where slides are separated by a complex transition + // such as a fade, the arf group spanning the transition may not be coded + // at a very high quality and hence this frame (with its overlay) is a + // poor golden frame to use for an extended group. + if ((gop_coding_frames >= active_gf_interval->max) && + ((zero_motion_accumulator < 0.995) || (rc->source_alt_ref_active))) { + break; + } + if ( + // Don't break out with a very short interval. + (gop_coding_frames >= active_gf_interval->min) && + // If possible dont break very close to a kf + ((rc->frames_to_key - gop_coding_frames) >= rc->min_gf_interval) && + (gop_coding_frames & 0x01) && (!flash_detected) && + ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) || + (abs_mv_in_out_accumulator > ARF_ABS_ZOOM_THRESH) || + (sr_accumulator > gop_intra_factor * next_frame->intra_error))) { + break; + } + } + *use_alt_ref &= zero_motion_accumulator < 0.995; + *use_alt_ref &= gop_coding_frames < lag_in_frames; + *use_alt_ref &= gop_coding_frames >= rc->min_gf_interval; + return gop_coding_frames; +} + +static RANGE get_active_gf_inverval_range( + const FRAME_INFO *frame_info, const RATE_CONTROL *rc, int arf_active_or_kf, + int gf_start_show_idx, int active_worst_quality, int last_boosted_qindex) { + RANGE active_gf_interval; +#if CONFIG_RATE_CTRL + (void)frame_info; + (void)gf_start_show_idx; + (void)active_worst_quality; + (void)last_boosted_qindex; + active_gf_interval.min = rc->min_gf_interval + arf_active_or_kf + 2; + + active_gf_interval.max = 16 + arf_active_or_kf; + + if ((active_gf_interval.max <= rc->frames_to_key) && + (active_gf_interval.max >= (rc->frames_to_key - rc->min_gf_interval))) { + active_gf_interval.min = rc->frames_to_key / 2; + active_gf_interval.max = rc->frames_to_key / 2; + } +#else + int int_max_q = (int)(vp9_convert_qindex_to_q(active_worst_quality, + frame_info->bit_depth)); + int q_term = (gf_start_show_idx == 0) + ? int_max_q / 32 + : (int)(vp9_convert_qindex_to_q(last_boosted_qindex, + frame_info->bit_depth) / + 6); + active_gf_interval.min = + rc->min_gf_interval + arf_active_or_kf + VPXMIN(2, int_max_q / 200); + active_gf_interval.min = + VPXMIN(active_gf_interval.min, rc->max_gf_interval + arf_active_or_kf); + + // The value chosen depends on the active Q range. At low Q we have + // bits to spare and are better with a smaller interval and smaller boost. + // At high Q when there are few bits to spare we are better with a longer + // interval to spread the cost of the GF. + active_gf_interval.max = 11 + arf_active_or_kf + VPXMIN(5, q_term); + + // Force max GF interval to be odd. + active_gf_interval.max = active_gf_interval.max | 0x01; + + // We have: active_gf_interval.min <= + // rc->max_gf_interval + arf_active_or_kf. + if (active_gf_interval.max < active_gf_interval.min) { + active_gf_interval.max = active_gf_interval.min; + } else { + active_gf_interval.max = + VPXMIN(active_gf_interval.max, rc->max_gf_interval + arf_active_or_kf); + } + + // Would the active max drop us out just before the near the next kf? + if ((active_gf_interval.max <= rc->frames_to_key) && + (active_gf_interval.max >= (rc->frames_to_key - rc->min_gf_interval))) { + active_gf_interval.max = rc->frames_to_key / 2; + } + active_gf_interval.max = + VPXMAX(active_gf_interval.max, active_gf_interval.min); +#endif + return active_gf_interval; +} + +static int get_arf_layers(int multi_layer_arf, int max_layers, + int coding_frame_num) { + assert(max_layers <= MAX_ARF_LAYERS); + if (multi_layer_arf) { + int layers = 0; + int i; + for (i = coding_frame_num; i > 0; i >>= 1) { + ++layers; + } + layers = VPXMIN(max_layers, layers); + return layers; + } else { + return 1; + } +} + +static void define_gf_group(VP9_COMP *cpi, int gf_start_show_idx) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; VP9EncoderConfig *const oxcf = &cpi->oxcf; TWO_PASS *const twopass = &cpi->twopass; - FIRSTPASS_STATS next_frame; + const FRAME_INFO *frame_info = &cpi->frame_info; + const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info; const FIRSTPASS_STATS *const start_pos = twopass->stats_in; - int i; + int gop_coding_frames; double gf_group_err = 0.0; double gf_group_raw_error = 0.0; @@ -2401,30 +2664,21 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { double gf_group_inactive_zone_rows = 0.0; double gf_group_inter = 0.0; double gf_group_motion = 0.0; - double gf_first_frame_err = 0.0; - double mod_frame_err = 0.0; - double mv_ratio_accumulator = 0.0; - double zero_motion_accumulator = 1.0; - double loop_decay_rate = 1.00; - double last_loop_decay_rate = 1.00; + int allow_alt_ref = is_altref_enabled(cpi); + int use_alt_ref; - double this_frame_mv_in_out = 0.0; - double mv_in_out_accumulator = 0.0; - double abs_mv_in_out_accumulator = 0.0; - double mv_ratio_accumulator_thresh; - double abs_mv_in_out_thresh; - double sr_accumulator = 0.0; - const double av_err = get_distribution_av_err(cpi, twopass); - unsigned int allow_alt_ref = is_altref_enabled(cpi); - - int flash_detected; - int active_max_gf_interval; - int active_min_gf_interval; int64_t gf_group_bits; int gf_arf_bits; const int is_key_frame = frame_is_intra_only(cm); + // If this is a key frame or the overlay from a previous arf then + // the error score / cost of this frame has already been accounted for. const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active; + int is_alt_ref_flash = 0; + + double gop_intra_factor; + int gop_frames; + RANGE active_gf_interval; // Reset the GF group data structures unless this is a key // frame in which case it will already have been done. @@ -2433,221 +2687,155 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } vpx_clear_system_state(); - vp9_zero(next_frame); - // Load stats for the current frame. - mod_frame_err = - calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err); + active_gf_interval = get_active_gf_inverval_range( + frame_info, rc, arf_active_or_kf, gf_start_show_idx, + twopass->active_worst_quality, rc->last_boosted_qindex); - // Note the error of the frame at the start of the group. This will be - // the GF frame error if we code a normal gf. - gf_first_frame_err = mod_frame_err; - - // If this is a key frame or the overlay from a previous arf then - // the error score / cost of this frame has already been accounted for. - if (arf_active_or_kf) { - gf_group_err -= gf_first_frame_err; - gf_group_raw_error -= this_frame->coded_error; - gf_group_noise -= this_frame->frame_noise_energy; - gf_group_skip_pct -= this_frame->intra_skip_pct; - gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows; - gf_group_inter -= this_frame->pcnt_inter; - gf_group_motion -= this_frame->pcnt_motion; + if (cpi->multi_layer_arf) { + int arf_layers = get_arf_layers(cpi->multi_layer_arf, oxcf->enable_auto_arf, + active_gf_interval.max); + gop_intra_factor = 1.0 + 0.25 * arf_layers; + } else { + gop_intra_factor = 1.0; } - // Motion breakout threshold for loop below depends on image size. - mv_ratio_accumulator_thresh = - (cpi->initial_height + cpi->initial_width) / 4.0; - abs_mv_in_out_thresh = ARF_ABS_ZOOM_THRESH; - - // Set a maximum and minimum interval for the GF group. - // If the image appears almost completely static we can extend beyond this. { - int int_max_q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality, - cpi->common.bit_depth)); - int int_lbq = (int)(vp9_convert_qindex_to_q(rc->last_boosted_qindex, - cpi->common.bit_depth)); - active_min_gf_interval = - rc->min_gf_interval + arf_active_or_kf + VPXMIN(2, int_max_q / 200); - active_min_gf_interval = - VPXMIN(active_min_gf_interval, rc->max_gf_interval + arf_active_or_kf); - - if (cpi->multi_arf_allowed) { - active_max_gf_interval = rc->max_gf_interval; - } else { - // The value chosen depends on the active Q range. At low Q we have - // bits to spare and are better with a smaller interval and smaller boost. - // At high Q when there are few bits to spare we are better with a longer - // interval to spread the cost of the GF. - active_max_gf_interval = 12 + arf_active_or_kf + VPXMIN(4, (int_lbq / 6)); - - // We have: active_min_gf_interval <= - // rc->max_gf_interval + arf_active_or_kf. - if (active_max_gf_interval < active_min_gf_interval) { - active_max_gf_interval = active_min_gf_interval; - } else { - active_max_gf_interval = VPXMIN(active_max_gf_interval, - rc->max_gf_interval + arf_active_or_kf); - } - - // Would the active max drop us out just before the near the next kf? - if ((active_max_gf_interval <= rc->frames_to_key) && - (active_max_gf_interval >= (rc->frames_to_key - rc->min_gf_interval))) - active_max_gf_interval = rc->frames_to_key / 2; - } - } - - i = 0; - while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) { - ++i; - - // Accumulate error score of frames in this gf group. - mod_frame_err = - calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err); - gf_group_err += mod_frame_err; - gf_group_raw_error += this_frame->coded_error; - gf_group_noise += this_frame->frame_noise_energy; - gf_group_skip_pct += this_frame->intra_skip_pct; - gf_group_inactive_zone_rows += this_frame->inactive_zone_rows; - gf_group_inter += this_frame->pcnt_inter; - gf_group_motion += this_frame->pcnt_motion; - - if (EOF == input_stats(twopass, &next_frame)) break; - - // Test for the case where there is a brief flash but the prediction - // quality back to an earlier frame is then restored. - flash_detected = detect_flash(twopass, 0); - - // Update the motion related elements to the boost calculation. - accumulate_frame_motion_stats( - &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, - &abs_mv_in_out_accumulator, &mv_ratio_accumulator); - - // Accumulate the effect of prediction quality decay. - if (!flash_detected) { - last_loop_decay_rate = loop_decay_rate; - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); - - // Monitor for static sections. - zero_motion_accumulator = VPXMIN( - zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame)); - - // Break clause to detect very still sections after motion. For example, - // a static image after a fade or other transition. - if (detect_transition_to_still(cpi, i, 5, loop_decay_rate, - last_loop_decay_rate)) { - allow_alt_ref = 0; - break; - } - - // Update the accumulator for second ref error difference. - // This is intended to give an indication of how much the coded error is - // increasing over time. - if (i == 1) { - sr_accumulator += next_frame.coded_error; - } else { - sr_accumulator += (next_frame.sr_coded_error - next_frame.coded_error); - } - } - - // Break out conditions. - if ( - // Break at active_max_gf_interval unless almost totally static. - ((i >= active_max_gf_interval) && (zero_motion_accumulator < 0.995)) || - ( - // Don't break out with a very short interval. - (i >= active_min_gf_interval) && - // If possible dont break very close to a kf - ((rc->frames_to_key - i) >= rc->min_gf_interval) && - (!flash_detected) && - ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) || - (abs_mv_in_out_accumulator > abs_mv_in_out_thresh) || - (sr_accumulator > next_frame.intra_error)))) { - break; - } - - *this_frame = next_frame; + gop_coding_frames = get_gop_coding_frame_num( + &use_alt_ref, frame_info, first_pass_info, rc, gf_start_show_idx, + &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames); + use_alt_ref &= allow_alt_ref; } // Was the group length constrained by the requirement for a new KF? - rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0; + rc->constrained_gf_group = (gop_coding_frames >= rc->frames_to_key) ? 1 : 0; // Should we use the alternate reference frame. - if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) && - (i >= rc->min_gf_interval)) { - const int forward_frames = (rc->frames_to_key - i >= i - 1) - ? i - 1 - : VPXMAX(0, rc->frames_to_key - i); + if (use_alt_ref) { + const int f_frames = + (rc->frames_to_key - gop_coding_frames >= gop_coding_frames - 1) + ? gop_coding_frames - 1 + : VPXMAX(0, rc->frames_to_key - gop_coding_frames); + const int b_frames = gop_coding_frames - 1; + const int avg_inter_frame_qindex = rc->avg_frame_qindex[INTER_FRAME]; + // TODO(angiebird): figure out why arf's location is assigned this way + const int arf_show_idx = VPXMIN(gf_start_show_idx + gop_coding_frames + 1, + fps_get_num_frames(first_pass_info)); // Calculate the boost for alt ref. - rc->gfu_boost = calc_arf_boost(cpi, forward_frames, (i - 1)); + rc->gfu_boost = + compute_arf_boost(frame_info, first_pass_info, arf_show_idx, f_frames, + b_frames, avg_inter_frame_qindex); rc->source_alt_ref_pending = 1; - - // Test to see if multi arf is appropriate. - cpi->multi_arf_enabled = - (cpi->multi_arf_allowed && (rc->baseline_gf_interval >= 6) && - (zero_motion_accumulator < 0.995)) - ? 1 - : 0; } else { - rc->gfu_boost = calc_arf_boost(cpi, 0, (i - 1)); + const int f_frames = gop_coding_frames - 1; + const int b_frames = 0; + const int avg_inter_frame_qindex = rc->avg_frame_qindex[INTER_FRAME]; + // TODO(angiebird): figure out why arf's location is assigned this way + const int gld_show_idx = + VPXMIN(gf_start_show_idx + 1, fps_get_num_frames(first_pass_info)); + const int arf_boost = + compute_arf_boost(frame_info, first_pass_info, gld_show_idx, f_frames, + b_frames, avg_inter_frame_qindex); + rc->gfu_boost = VPXMIN(MAX_GF_BOOST, arf_boost); rc->source_alt_ref_pending = 0; } +#define LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR 0.2 + rc->arf_active_best_quality_adjustment_factor = 1.0; + rc->arf_increase_active_best_quality = 0; + + if (!is_lossless_requested(&cpi->oxcf)) { + if (rc->frames_since_key >= rc->frames_to_key) { + // Increase the active best quality in the second half of key frame + // interval. + rc->arf_active_best_quality_adjustment_factor = + LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR + + (1.0 - LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR) * + (rc->frames_to_key - gop_coding_frames) / + (VPXMAX(1, ((rc->frames_to_key + rc->frames_since_key) / 2 - + gop_coding_frames))); + rc->arf_increase_active_best_quality = 1; + } else if ((rc->frames_to_key - gop_coding_frames) > 0) { + // Reduce the active best quality in the first half of key frame interval. + rc->arf_active_best_quality_adjustment_factor = + LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR + + (1.0 - LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR) * + (rc->frames_since_key + gop_coding_frames) / + (VPXMAX(1, (rc->frames_to_key + rc->frames_since_key) / 2 + + gop_coding_frames)); + rc->arf_increase_active_best_quality = -1; + } + } + #ifdef AGGRESSIVE_VBR // Limit maximum boost based on interval length. - rc->gfu_boost = VPXMIN((int)rc->gfu_boost, i * 140); + rc->gfu_boost = VPXMIN((int)rc->gfu_boost, gop_coding_frames * 140); #else - rc->gfu_boost = VPXMIN((int)rc->gfu_boost, i * 200); + rc->gfu_boost = VPXMIN((int)rc->gfu_boost, gop_coding_frames * 200); #endif - // Set the interval until the next gf. - rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending); + // Cap the ARF boost when perceptual quality AQ mode is enabled. This is + // designed to improve the perceptual quality of high value content and to + // make consistent quality across consecutive frames. It will hurt objective + // quality. + if (oxcf->aq_mode == PERCEPTUAL_AQ) + rc->gfu_boost = VPXMIN(rc->gfu_boost, MIN_ARF_GF_BOOST); - // Only encode alt reference frame in temporal base layer. So - // baseline_gf_interval should be multiple of a temporal layer group - // (typically the frame distance between two base layer frames) - if (is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1) { - int count = (1 << (cpi->svc.number_temporal_layers - 1)) - 1; - int new_gf_interval = (rc->baseline_gf_interval + count) & (~count); + rc->baseline_gf_interval = gop_coding_frames - rc->source_alt_ref_pending; + + if (rc->source_alt_ref_pending) + is_alt_ref_flash = detect_flash(twopass, rc->baseline_gf_interval); + + { + const double av_err = get_distribution_av_err(cpi, twopass); + const double mean_mod_score = twopass->mean_mod_score; + // If the first frame is a key frame or the overlay from a previous arf then + // the error score / cost of this frame has already been accounted for. + int start_idx = arf_active_or_kf ? 1 : 0; int j; - for (j = 0; j < new_gf_interval - rc->baseline_gf_interval; ++j) { - if (EOF == input_stats(twopass, this_frame)) break; - gf_group_err += - calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err); - gf_group_raw_error += this_frame->coded_error; - gf_group_noise += this_frame->frame_noise_energy; - gf_group_skip_pct += this_frame->intra_skip_pct; - gf_group_inactive_zone_rows += this_frame->inactive_zone_rows; - gf_group_inter += this_frame->pcnt_inter; - gf_group_motion += this_frame->pcnt_motion; + for (j = start_idx; j < gop_coding_frames; ++j) { + int show_idx = gf_start_show_idx + j; + const FIRSTPASS_STATS *frame_stats = + fps_get_frame_stats(first_pass_info, show_idx); + // Accumulate error score of frames in this gf group. + gf_group_err += calc_norm_frame_score(oxcf, frame_info, frame_stats, + mean_mod_score, av_err); + gf_group_raw_error += frame_stats->coded_error; + gf_group_noise += frame_stats->frame_noise_energy; + gf_group_skip_pct += frame_stats->intra_skip_pct; + gf_group_inactive_zone_rows += frame_stats->inactive_zone_rows; + gf_group_inter += frame_stats->pcnt_inter; + gf_group_motion += frame_stats->pcnt_motion; } - rc->baseline_gf_interval = new_gf_interval; } - rc->frames_till_gf_update_due = rc->baseline_gf_interval; - - // Reset the file position. - reset_fpf_position(twopass, start_pos); - // Calculate the bits to be allocated to the gf/arf group as a whole gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err); + gop_frames = + rc->baseline_gf_interval + rc->source_alt_ref_pending - arf_active_or_kf; + + // Store the average moise level measured for the group + // TODO(any): Experiment with removal of else condition (gop_frames = 0) so + // that consumption of group noise energy is based on previous gf group + if (gop_frames > 0) + twopass->gf_group.group_noise_energy = (int)(gf_group_noise / gop_frames); + else + twopass->gf_group.group_noise_energy = 0; + // Calculate an estimate of the maxq needed for the group. // We are more aggressive about correcting for sections // where there could be significant overshoot than for easier // sections where we do not wish to risk creating an overshoot // of the allocated bit budget. if ((cpi->oxcf.rc_mode != VPX_Q) && (rc->baseline_gf_interval > 1)) { - const int vbr_group_bits_per_frame = - (int)(gf_group_bits / rc->baseline_gf_interval); - const double group_av_err = gf_group_raw_error / rc->baseline_gf_interval; - const double group_av_noise = gf_group_noise / rc->baseline_gf_interval; - const double group_av_skip_pct = - gf_group_skip_pct / rc->baseline_gf_interval; - const double group_av_inactive_zone = - ((gf_group_inactive_zone_rows * 2) / - (rc->baseline_gf_interval * (double)cm->mb_rows)); + const int vbr_group_bits_per_frame = (int)(gf_group_bits / gop_frames); + const double group_av_err = gf_group_raw_error / gop_frames; + const double group_av_noise = gf_group_noise / gop_frames; + const double group_av_skip_pct = gf_group_skip_pct / gop_frames; + const double group_av_inactive_zone = ((gf_group_inactive_zone_rows * 2) / + (gop_frames * (double)cm->mb_rows)); int tmp_q = get_twopass_worst_quality( cpi, group_av_err, (group_av_skip_pct + group_av_inactive_zone), group_av_noise, vbr_group_bits_per_frame); @@ -2663,20 +2851,23 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Context Adjustment of ARNR filter strength if (rc->baseline_gf_interval > 1) { - adjust_group_arnr_filter(cpi, (gf_group_noise / rc->baseline_gf_interval), - (gf_group_inter / rc->baseline_gf_interval), - (gf_group_motion / rc->baseline_gf_interval)); + adjust_group_arnr_filter(cpi, (gf_group_noise / gop_frames), + (gf_group_inter / gop_frames), + (gf_group_motion / gop_frames)); } else { twopass->arnr_strength_adjustment = 0; } // Calculate the extra bits to be used for boosted frame(s) - gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, rc->gfu_boost, - gf_group_bits); + gf_arf_bits = calculate_boost_bits((rc->baseline_gf_interval - 1), + rc->gfu_boost, gf_group_bits); // Adjust KF group bits and error remaining. twopass->kf_group_error_left -= gf_group_err; + // Decide GOP structure. + define_gf_group_structure(cpi); + // Allocate bits to each of the frames in the GF group. allocate_gf_group_bits(cpi, gf_group_bits, gf_arf_bits); @@ -2684,10 +2875,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { reset_fpf_position(twopass, start_pos); // Calculate a section intra ratio used in setting max loop filter. - if (cpi->common.frame_type != KEY_FRAME) { - twopass->section_intra_rating = calculate_section_intra_ratio( - start_pos, twopass->stats_in_end, rc->baseline_gf_interval); - } + twopass->section_intra_rating = calculate_section_intra_ratio( + start_pos, twopass->stats_in_end, rc->baseline_gf_interval); if (oxcf->resize_mode == RESIZE_DYNAMIC) { // Default to starting GF groups at normal frame size. @@ -2698,19 +2887,82 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->rolling_arf_group_target_bits = 0; twopass->rolling_arf_group_actual_bits = 0; #endif + rc->preserve_arf_as_gld = rc->preserve_next_arf_as_gld; + rc->preserve_next_arf_as_gld = 0; + // If alt ref frame is flash do not set preserve_arf_as_gld + if (!is_lossless_requested(&cpi->oxcf) && !cpi->use_svc && + cpi->oxcf.aq_mode == NO_AQ && cpi->multi_layer_arf && !is_alt_ref_flash) + rc->preserve_next_arf_as_gld = 1; +} + +// Intra / Inter threshold very low +#define VERY_LOW_II 1.5 +// Clean slide transitions we expect a sharp single frame spike in error. +#define ERROR_SPIKE 5.0 + +// Slide show transition detection. +// Tests for case where there is very low error either side of the current frame +// but much higher just for this frame. This can help detect key frames in +// slide shows even where the slides are pictures of different sizes. +// Also requires that intra and inter errors are very similar to help eliminate +// harmful false positives. +// It will not help if the transition is a fade or other multi-frame effect. +static int slide_transition(const FIRSTPASS_STATS *this_frame, + const FIRSTPASS_STATS *last_frame, + const FIRSTPASS_STATS *next_frame) { + return (this_frame->intra_error < (this_frame->coded_error * VERY_LOW_II)) && + (this_frame->coded_error > (last_frame->coded_error * ERROR_SPIKE)) && + (this_frame->coded_error > (next_frame->coded_error * ERROR_SPIKE)); +} + +// This test looks for anomalous changes in the nature of the intra signal +// related to the previous and next frame as an indicator for coding a key +// frame. This test serves to detect some additional scene cuts, +// especially in lowish motion and low contrast sections, that are missed +// by the other tests. +static int intra_step_transition(const FIRSTPASS_STATS *this_frame, + const FIRSTPASS_STATS *last_frame, + const FIRSTPASS_STATS *next_frame) { + double last_ii_ratio; + double this_ii_ratio; + double next_ii_ratio; + double last_pcnt_intra = 1.0 - last_frame->pcnt_inter; + double this_pcnt_intra = 1.0 - this_frame->pcnt_inter; + double next_pcnt_intra = 1.0 - next_frame->pcnt_inter; + double mod_this_intra = this_pcnt_intra + this_frame->pcnt_neutral; + + // Calculate ii ratio for this frame last frame and next frame. + last_ii_ratio = + last_frame->intra_error / DOUBLE_DIVIDE_CHECK(last_frame->coded_error); + this_ii_ratio = + this_frame->intra_error / DOUBLE_DIVIDE_CHECK(this_frame->coded_error); + next_ii_ratio = + next_frame->intra_error / DOUBLE_DIVIDE_CHECK(next_frame->coded_error); + + // Return true the intra/inter ratio for the current frame is + // low but better in the next and previous frame and the relative useage of + // intra in the current frame is markedly higher than the last and next frame. + if ((this_ii_ratio < 2.0) && (last_ii_ratio > 2.25) && + (next_ii_ratio > 2.25) && (this_pcnt_intra > (3 * last_pcnt_intra)) && + (this_pcnt_intra > (3 * next_pcnt_intra)) && + ((this_pcnt_intra > 0.075) || (mod_this_intra > 0.85))) { + return 1; + // Very low inter intra ratio (i.e. not much gain from inter coding), most + // blocks neutral on coding method and better inter prediction either side + } else if ((this_ii_ratio < 1.25) && (mod_this_intra > 0.85) && + (this_ii_ratio < last_ii_ratio * 0.9) && + (this_ii_ratio < next_ii_ratio * 0.9)) { + return 1; + } else { + return 0; + } } -// Threshold for use of the lagging second reference frame. High second ref -// usage may point to a transient event like a flash or occlusion rather than -// a real scene cut. -#define SECOND_REF_USEAGE_THRESH 0.1 // Minimum % intra coding observed in first pass (1.0 = 100%) #define MIN_INTRA_LEVEL 0.25 -// Minimum ratio between the % of intra coding and inter coding in the first -// pass after discounting neutral blocks (discounting neutral blocks in this -// way helps catch scene cuts in clips with very flat areas or letter box -// format clips with image padding. -#define INTRA_VS_INTER_THRESH 2.0 +// Threshold for use of the lagging second reference frame. Scene cuts do not +// usually have a high second ref useage. +#define SECOND_REF_USEAGE_THRESH 0.2 // Hard threshold where the first pass chooses intra for almost all blocks. // In such a case even if the frame is not a scene cut coding a key frame // may be a good option. @@ -2718,83 +2970,75 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Maximum threshold for the relative ratio of intra error score vs best // inter error score. #define KF_II_ERR_THRESHOLD 2.5 -// In real scene cuts there is almost always a sharp change in the intra -// or inter error score. -#define ERR_CHANGE_THRESHOLD 0.4 -// For real scene cuts we expect an improvment in the intra inter error -// ratio in the next frame. -#define II_IMPROVEMENT_THRESHOLD 3.5 #define KF_II_MAX 128.0 #define II_FACTOR 12.5 // Test for very low intra complexity which could cause false key frames #define V_LOW_INTRA 0.5 -static int test_candidate_kf(TWO_PASS *twopass, - const FIRSTPASS_STATS *last_frame, - const FIRSTPASS_STATS *this_frame, - const FIRSTPASS_STATS *next_frame) { +static int test_candidate_kf(const FIRST_PASS_INFO *first_pass_info, + int show_idx) { + const FIRSTPASS_STATS *last_frame = + fps_get_frame_stats(first_pass_info, show_idx - 1); + const FIRSTPASS_STATS *this_frame = + fps_get_frame_stats(first_pass_info, show_idx); + const FIRSTPASS_STATS *next_frame = + fps_get_frame_stats(first_pass_info, show_idx + 1); int is_viable_kf = 0; double pcnt_intra = 1.0 - this_frame->pcnt_inter; - double modified_pcnt_inter = - this_frame->pcnt_inter - this_frame->pcnt_neutral; // Does the frame satisfy the primary criteria of a key frame? // See above for an explanation of the test criteria. // If so, then examine how well it predicts subsequent frames. - if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) && - (next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) && + detect_flash_from_frame_stats(next_frame); + if (!detect_flash_from_frame_stats(this_frame) && + !detect_flash_from_frame_stats(next_frame) && + (this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) && ((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) || - ((pcnt_intra > MIN_INTRA_LEVEL) && - (pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) && + (slide_transition(this_frame, last_frame, next_frame)) || + (intra_step_transition(this_frame, last_frame, next_frame)) || + (((this_frame->coded_error > (next_frame->coded_error * 1.2)) && + (this_frame->coded_error > (last_frame->coded_error * 1.2))) && + (pcnt_intra > MIN_INTRA_LEVEL) && + ((pcnt_intra + this_frame->pcnt_neutral) > 0.5) && ((this_frame->intra_error / DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) < - KF_II_ERR_THRESHOLD) && - ((fabs(last_frame->coded_error - this_frame->coded_error) / - DOUBLE_DIVIDE_CHECK(this_frame->coded_error) > - ERR_CHANGE_THRESHOLD) || - (fabs(last_frame->intra_error - this_frame->intra_error) / - DOUBLE_DIVIDE_CHECK(this_frame->intra_error) > - ERR_CHANGE_THRESHOLD) || - ((next_frame->intra_error / - DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > - II_IMPROVEMENT_THRESHOLD))))) { + KF_II_ERR_THRESHOLD)))) { int i; - const FIRSTPASS_STATS *start_pos = twopass->stats_in; - FIRSTPASS_STATS local_next_frame = *next_frame; double boost_score = 0.0; double old_boost_score = 0.0; double decay_accumulator = 1.0; // Examine how well the key frame predicts subsequent frames. for (i = 0; i < 16; ++i) { - double next_iiratio = (II_FACTOR * local_next_frame.intra_error / - DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error)); + const FIRSTPASS_STATS *frame_stats = + fps_get_frame_stats(first_pass_info, show_idx + 1 + i); + double next_iiratio = (II_FACTOR * frame_stats->intra_error / + DOUBLE_DIVIDE_CHECK(frame_stats->coded_error)); if (next_iiratio > KF_II_MAX) next_iiratio = KF_II_MAX; // Cumulative effect of decay in prediction quality. - if (local_next_frame.pcnt_inter > 0.85) - decay_accumulator *= local_next_frame.pcnt_inter; + if (frame_stats->pcnt_inter > 0.85) + decay_accumulator *= frame_stats->pcnt_inter; else - decay_accumulator *= (0.85 + local_next_frame.pcnt_inter) / 2.0; + decay_accumulator *= (0.85 + frame_stats->pcnt_inter) / 2.0; // Keep a running total. boost_score += (decay_accumulator * next_iiratio); // Test various breakout clauses. - if ((local_next_frame.pcnt_inter < 0.05) || (next_iiratio < 1.5) || - (((local_next_frame.pcnt_inter - local_next_frame.pcnt_neutral) < - 0.20) && + if ((frame_stats->pcnt_inter < 0.05) || (next_iiratio < 1.5) || + (((frame_stats->pcnt_inter - frame_stats->pcnt_neutral) < 0.20) && (next_iiratio < 3.0)) || ((boost_score - old_boost_score) < 3.0) || - (local_next_frame.intra_error < V_LOW_INTRA)) { + (frame_stats->intra_error < V_LOW_INTRA)) { break; } old_boost_score = boost_score; // Get the next frame details - if (EOF == input_stats(twopass, &local_next_frame)) break; + if (show_idx + 1 + i == fps_get_num_frames(first_pass_info) - 1) break; } // If there is tolerable prediction for at least the next 3 frames then @@ -2802,9 +3046,6 @@ static int test_candidate_kf(TWO_PASS *twopass, if (boost_score > 30.0 && (i > 3)) { is_viable_kf = 1; } else { - // Reset the file position - reset_fpf_position(twopass, start_pos); - is_viable_kf = 0; } } @@ -2814,7 +3055,10 @@ static int test_candidate_kf(TWO_PASS *twopass, #define FRAMES_TO_CHECK_DECAY 8 #define MIN_KF_TOT_BOOST 300 -#define KF_BOOST_SCAN_MAX_FRAMES 32 +#define DEFAULT_SCAN_FRAMES_FOR_KF_BOOST 32 +#define MAX_SCAN_FRAMES_FOR_KF_BOOST 48 +#define MIN_SCAN_FRAMES_FOR_KF_BOOST 32 +#define KF_ABS_ZOOM_THRESH 6.0 #ifdef AGGRESSIVE_VBR #define KF_MAX_FRAME_BOOST 80.0 @@ -2824,28 +3068,99 @@ static int test_candidate_kf(TWO_PASS *twopass, #define MAX_KF_TOT_BOOST 5400 #endif -static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - int i, j; +int vp9_get_frames_to_next_key(const VP9EncoderConfig *oxcf, + const FRAME_INFO *frame_info, + const FIRST_PASS_INFO *first_pass_info, + int kf_show_idx, int min_gf_interval) { + double recent_loop_decay[FRAMES_TO_CHECK_DECAY]; + int j; + int frames_to_key; + int max_frames_to_key = first_pass_info->num_frames - kf_show_idx; + max_frames_to_key = VPXMIN(max_frames_to_key, oxcf->key_freq); + + // Initialize the decay rates for the recent frames to check + for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) recent_loop_decay[j] = 1.0; + // Find the next keyframe. + if (!oxcf->auto_key) { + frames_to_key = max_frames_to_key; + } else { + frames_to_key = 1; + while (frames_to_key < max_frames_to_key) { + // Provided that we are not at the end of the file... + if (kf_show_idx + frames_to_key + 1 < first_pass_info->num_frames) { + double loop_decay_rate; + double decay_accumulator; + const FIRSTPASS_STATS *next_frame = fps_get_frame_stats( + first_pass_info, kf_show_idx + frames_to_key + 1); + + // Check for a scene cut. + if (test_candidate_kf(first_pass_info, kf_show_idx + frames_to_key)) + break; + + // How fast is the prediction quality decaying? + loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame); + + // We want to know something about the recent past... rather than + // as used elsewhere where we are concerned with decay in prediction + // quality since the last GF or KF. + recent_loop_decay[(frames_to_key - 1) % FRAMES_TO_CHECK_DECAY] = + loop_decay_rate; + decay_accumulator = 1.0; + for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) + decay_accumulator *= recent_loop_decay[j]; + + // Special check for transition or high motion followed by a + // static scene. + if ((frames_to_key - 1) > min_gf_interval && loop_decay_rate >= 0.999 && + decay_accumulator < 0.9) { + int still_interval = oxcf->key_freq - (frames_to_key - 1); + // TODO(angiebird): Figure out why we use "+1" here + int show_idx = kf_show_idx + frames_to_key; + if (check_transition_to_still(first_pass_info, show_idx, + still_interval)) { + break; + } + } + } + ++frames_to_key; + } + } + return frames_to_key; +} + +static void find_next_key_frame(VP9_COMP *cpi, int kf_show_idx) { + int i; RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = &cpi->twopass; GF_GROUP *const gf_group = &twopass->gf_group; const VP9EncoderConfig *const oxcf = &cpi->oxcf; - const FIRSTPASS_STATS first_frame = *this_frame; + const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info; + const FRAME_INFO *frame_info = &cpi->frame_info; const FIRSTPASS_STATS *const start_position = twopass->stats_in; + const FIRSTPASS_STATS *keyframe_stats = + fps_get_frame_stats(first_pass_info, kf_show_idx); FIRSTPASS_STATS next_frame; - FIRSTPASS_STATS last_frame; int kf_bits = 0; - double decay_accumulator = 1.0; + int64_t max_kf_bits; double zero_motion_accumulator = 1.0; + double zero_motion_sum = 0.0; + double zero_motion_avg; + double motion_compensable_sum = 0.0; + double motion_compensable_avg; + int num_frames = 0; + int kf_boost_scan_frames = DEFAULT_SCAN_FRAMES_FOR_KF_BOOST; double boost_score = 0.0; double kf_mod_err = 0.0; + double kf_raw_err = 0.0; double kf_group_err = 0.0; - double recent_loop_decay[FRAMES_TO_CHECK_DECAY]; double sr_accumulator = 0.0; + double abs_mv_in_out_accumulator = 0.0; const double av_err = get_distribution_av_err(cpi, twopass); + const double mean_mod_score = twopass->mean_mod_score; vp9_zero(next_frame); cpi->common.frame_type = KEY_FRAME; + rc->frames_since_key = 0; // Reset the GF group data structures. vp9_zero(*gf_group); @@ -2856,7 +3171,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Clear the alt ref active flag and last group multi arf flags as they // can never be set for a key frame. rc->source_alt_ref_active = 0; - cpi->multi_arf_last_grp_enabled = 0; // KF is always a GF so clear frames till next gf counter. rc->frames_till_gf_update_due = 0; @@ -2866,107 +3180,29 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->kf_group_bits = 0; // Total bits available to kf group twopass->kf_group_error_left = 0.0; // Group modified error score. - kf_mod_err = - calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err); + kf_raw_err = keyframe_stats->intra_error; + kf_mod_err = calc_norm_frame_score(oxcf, frame_info, keyframe_stats, + mean_mod_score, av_err); - // Initialize the decay rates for the recent frames to check - for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) recent_loop_decay[j] = 1.0; - - // Find the next keyframe. - i = 0; - while (twopass->stats_in < twopass->stats_in_end && - rc->frames_to_key < cpi->oxcf.key_freq) { - // Accumulate kf group error. - kf_group_err += - calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err); - - // Load the next frame's stats. - last_frame = *this_frame; - input_stats(twopass, this_frame); - - // Provided that we are not at the end of the file... - if (cpi->oxcf.auto_key && twopass->stats_in < twopass->stats_in_end) { - double loop_decay_rate; - - // Check for a scene cut. - if (test_candidate_kf(twopass, &last_frame, this_frame, - twopass->stats_in)) - break; - - // How fast is the prediction quality decaying? - loop_decay_rate = get_prediction_decay_rate(cpi, twopass->stats_in); - - // We want to know something about the recent past... rather than - // as used elsewhere where we are concerned with decay in prediction - // quality since the last GF or KF. - recent_loop_decay[i % FRAMES_TO_CHECK_DECAY] = loop_decay_rate; - decay_accumulator = 1.0; - for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) - decay_accumulator *= recent_loop_decay[j]; - - // Special check for transition or high motion followed by a - // static scene. - if (detect_transition_to_still(cpi, i, cpi->oxcf.key_freq - i, - loop_decay_rate, decay_accumulator)) - break; - - // Step on to the next frame. - ++rc->frames_to_key; - - // If we don't have a real key frame within the next two - // key_freq intervals then break out of the loop. - if (rc->frames_to_key >= 2 * cpi->oxcf.key_freq) break; - } else { - ++rc->frames_to_key; - } - ++i; - } + rc->frames_to_key = vp9_get_frames_to_next_key( + oxcf, frame_info, first_pass_info, kf_show_idx, rc->min_gf_interval); // If there is a max kf interval set by the user we must obey it. // We already breakout of the loop above at 2x max. // This code centers the extra kf if the actual natural interval // is between 1x and 2x. - if (cpi->oxcf.auto_key && rc->frames_to_key > cpi->oxcf.key_freq) { - FIRSTPASS_STATS tmp_frame = first_frame; - - rc->frames_to_key /= 2; - - // Reset to the start of the group. - reset_fpf_position(twopass, start_position); - - kf_group_err = 0.0; - - // Rescan to get the correct error data for the forced kf group. - for (i = 0; i < rc->frames_to_key; ++i) { - kf_group_err += - calculate_norm_frame_score(cpi, twopass, oxcf, &tmp_frame, av_err); - input_stats(twopass, &tmp_frame); - } - rc->next_key_frame_forced = 1; - } else if (twopass->stats_in == twopass->stats_in_end || - rc->frames_to_key >= cpi->oxcf.key_freq) { + if (rc->frames_to_key >= cpi->oxcf.key_freq) { rc->next_key_frame_forced = 1; } else { rc->next_key_frame_forced = 0; } - if (is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1) { - int count = (1 << (cpi->svc.number_temporal_layers - 1)) - 1; - int new_frame_to_key = (rc->frames_to_key + count) & (~count); - int j; - for (j = 0; j < new_frame_to_key - rc->frames_to_key; ++j) { - if (EOF == input_stats(twopass, this_frame)) break; - kf_group_err += - calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err); - } - rc->frames_to_key = new_frame_to_key; - } - - // Special case for the last key frame of the file. - if (twopass->stats_in >= twopass->stats_in_end) { + for (i = 0; i < rc->frames_to_key; ++i) { + const FIRSTPASS_STATS *frame_stats = + fps_get_frame_stats(first_pass_info, kf_show_idx + i); // Accumulate kf group error. - kf_group_err += - calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err); + kf_group_err += calc_norm_frame_score(oxcf, frame_info, frame_stats, + mean_mod_score, av_err); } // Calculate the number of bits that should be assigned to the kf group. @@ -2991,23 +3227,51 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } twopass->kf_group_bits = VPXMAX(0, twopass->kf_group_bits); - // Reset the first pass file position. - reset_fpf_position(twopass, start_position); - // Scan through the kf group collating various stats used to determine // how many bits to spend on it. boost_score = 0.0; + for (i = 0; i < VPXMIN(MAX_SCAN_FRAMES_FOR_KF_BOOST, (rc->frames_to_key - 1)); + ++i) { + if (EOF == input_stats(twopass, &next_frame)) break; + + zero_motion_sum += next_frame.pcnt_inter - next_frame.pcnt_motion; + motion_compensable_sum += + 1 - (double)next_frame.coded_error / next_frame.intra_error; + num_frames++; + } + + if (num_frames >= MIN_SCAN_FRAMES_FOR_KF_BOOST) { + zero_motion_avg = zero_motion_sum / num_frames; + motion_compensable_avg = motion_compensable_sum / num_frames; + kf_boost_scan_frames = (int)(VPXMAX(64 * zero_motion_avg - 16, + 160 * motion_compensable_avg - 112)); + kf_boost_scan_frames = + VPXMAX(VPXMIN(kf_boost_scan_frames, MAX_SCAN_FRAMES_FOR_KF_BOOST), + MIN_SCAN_FRAMES_FOR_KF_BOOST); + } + reset_fpf_position(twopass, start_position); + for (i = 0; i < (rc->frames_to_key - 1); ++i) { if (EOF == input_stats(twopass, &next_frame)) break; - if (i <= KF_BOOST_SCAN_MAX_FRAMES) { + // The zero motion test here insures that if we mark a kf group as static + // it is static throughout not just the first KF_BOOST_SCAN_MAX_FRAMES. + // It also allows for a larger boost on long static groups. + if ((i <= kf_boost_scan_frames) || (zero_motion_accumulator >= 0.99)) { double frame_boost; double zm_factor; // Monitor for static sections. - zero_motion_accumulator = VPXMIN( - zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame)); + // First frame in kf group the second ref indicator is invalid. + if (i > 0) { + zero_motion_accumulator = + VPXMIN(zero_motion_accumulator, + get_zero_motion_factor(&cpi->frame_info, &next_frame)); + } else { + zero_motion_accumulator = + next_frame.pcnt_inter - next_frame.pcnt_motion; + } // Factor 0.75-1.25 based on how much of frame is static. zm_factor = (0.75 + (zero_motion_accumulator / 2.0)); @@ -3021,7 +3285,15 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { KF_MAX_FRAME_BOOST * zm_factor); boost_score += frame_boost; - if (frame_boost < 25.00) break; + + // Measure of zoom. Large zoom tends to indicate reduced boost. + abs_mv_in_out_accumulator += + fabs(next_frame.mv_in_out_count * next_frame.pcnt_motion); + + if ((frame_boost < 25.00) || + (abs_mv_in_out_accumulator > KF_ABS_ZOOM_THRESH) || + (sr_accumulator > (kf_raw_err * 1.50))) + break; } else { break; } @@ -3033,17 +3305,30 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0); // Calculate a section intra ratio used in setting max loop filter. - twopass->section_intra_rating = calculate_section_intra_ratio( + twopass->key_frame_section_intra_rating = calculate_section_intra_ratio( start_position, twopass->stats_in_end, rc->frames_to_key); - // Apply various clamps for min and max boost - rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3)); - rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST); - rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST); + // Special case for static / slide show content but dont apply + // if the kf group is very short. + if ((zero_motion_accumulator > 0.99) && (rc->frames_to_key > 8)) { + rc->kf_boost = MAX_KF_TOT_BOOST; + } else { + // Apply various clamps for min and max boost + rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3)); + rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST); + rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST); + } // Work out how many bits to allocate for the key frame itself. kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost, twopass->kf_group_bits); + // Based on the spatial complexity, increase the bits allocated to key frame. + kf_bits += + (int)((twopass->kf_group_bits - kf_bits) * (kf_mod_err / kf_group_err)); + max_kf_bits = + twopass->kf_group_bits - (rc->frames_to_key - 1) * FRAME_OVERHEAD_BITS; + max_kf_bits = lclamp(max_kf_bits, 0, INT_MAX); + kf_bits = VPXMIN(kf_bits, (int)max_kf_bits); twopass->kf_group_bits -= kf_bits; @@ -3051,6 +3336,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { gf_group->bit_allocation[0] = kf_bits; gf_group->update_type[0] = KF_UPDATE; gf_group->rf_level[0] = KF_STD; + gf_group->layer_depth[0] = 0; // Note the total error score of the kf group minus the key frame itself. twopass->kf_group_error_left = (kf_group_err - kf_mod_err); @@ -3066,60 +3352,12 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } } -// Define the reference buffers that will be updated post encode. -static void configure_buffer_updates(VP9_COMP *cpi) { - TWO_PASS *const twopass = &cpi->twopass; - - cpi->rc.is_src_frame_alt_ref = 0; - switch (twopass->gf_group.update_type[twopass->gf_group.index]) { - case KF_UPDATE: - cpi->refresh_last_frame = 1; - cpi->refresh_golden_frame = 1; - cpi->refresh_alt_ref_frame = 1; - break; - case LF_UPDATE: - cpi->refresh_last_frame = 1; - cpi->refresh_golden_frame = 0; - cpi->refresh_alt_ref_frame = 0; - break; - case GF_UPDATE: - cpi->refresh_last_frame = 1; - cpi->refresh_golden_frame = 1; - cpi->refresh_alt_ref_frame = 0; - break; - case OVERLAY_UPDATE: - cpi->refresh_last_frame = 0; - cpi->refresh_golden_frame = 1; - cpi->refresh_alt_ref_frame = 0; - cpi->rc.is_src_frame_alt_ref = 1; - break; - case ARF_UPDATE: - cpi->refresh_last_frame = 0; - cpi->refresh_golden_frame = 0; - cpi->refresh_alt_ref_frame = 1; - break; - default: assert(0); break; - } - if (is_two_pass_svc(cpi)) { - if (cpi->svc.temporal_layer_id > 0) { - cpi->refresh_last_frame = 0; - cpi->refresh_golden_frame = 0; - } - if (cpi->svc.layer_context[cpi->svc.spatial_layer_id].gold_ref_idx < 0) - cpi->refresh_golden_frame = 0; - if (cpi->alt_ref_source == NULL) cpi->refresh_alt_ref_frame = 0; - } -} - static int is_skippable_frame(const VP9_COMP *cpi) { // If the current frame does not have non-zero motion vector detected in the // first pass, and so do its previous and forward frames, then this frame // can be skipped for partition check, and the partition size is assigned // according to the variance - const SVC *const svc = &cpi->svc; - const TWO_PASS *const twopass = - is_two_pass_svc(cpi) ? &svc->layer_context[svc->spatial_layer_id].twopass - : &cpi->twopass; + const TWO_PASS *const twopass = &cpi->twopass; return (!frame_is_intra_only(&cpi->common) && twopass->stats_in - 2 > twopass->stats_in_start && @@ -3139,11 +3377,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { TWO_PASS *const twopass = &cpi->twopass; GF_GROUP *const gf_group = &twopass->gf_group; FIRSTPASS_STATS this_frame; - - int target_rate; - LAYER_CONTEXT *const lc = - is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id] - : 0; + const int show_idx = cm->current_video_frame; if (!twopass->stats_in) return; @@ -3151,30 +3385,32 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { // advance the input pointer as we already have what we need. if (gf_group->update_type[gf_group->index] == ARF_UPDATE) { int target_rate; - configure_buffer_updates(cpi); + + vp9_zero(this_frame); + this_frame = + cpi->twopass.stats_in_start[cm->current_video_frame + + gf_group->arf_src_offset[gf_group->index]]; + + vp9_configure_buffer_updates(cpi, gf_group->index); + target_rate = gf_group->bit_allocation[gf_group->index]; target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate); rc->base_frame_target = target_rate; cm->frame_type = INTER_FRAME; - if (lc != NULL) { - if (cpi->svc.spatial_layer_id == 0) { - lc->is_key_frame = 0; - } else { - lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame; - - if (lc->is_key_frame) cpi->ref_frame_flags &= (~VP9_LAST_FLAG); - } - } - // Do the firstpass stats indicate that this frame is skippable for the // partition search? if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 && - (!cpi->use_svc || is_two_pass_svc(cpi))) { + !cpi->use_svc) { cpi->partition_search_skippable_frame = is_skippable_frame(cpi); } + // The multiplication by 256 reverses a scaling factor of (>> 8) + // applied when combining MB error values for the frame. + twopass->mb_av_energy = log((this_frame.intra_error * 256.0) + 1.0); + twopass->mb_smooth_pct = this_frame.intra_smooth_pct; + return; } @@ -3182,12 +3418,9 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { if (cpi->oxcf.rc_mode == VPX_Q) { twopass->active_worst_quality = cpi->oxcf.cq_level; - } else if (cm->current_video_frame == 0 || - (lc != NULL && lc->current_video_frame_in_layer == 0)) { + } else if (cm->current_video_frame == 0) { const int frames_left = - (int)(twopass->total_stats.count - - ((lc != NULL) ? lc->current_video_frame_in_layer - : cm->current_video_frame)); + (int)(twopass->total_stats.count - cm->current_video_frame); // Special case code for first frame. const int section_target_bandwidth = (int)(twopass->bits_left / frames_left); @@ -3227,68 +3460,42 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { // Keyframe and section processing. if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) { - FIRSTPASS_STATS this_frame_copy; - this_frame_copy = this_frame; // Define next KF group and assign bits to it. - find_next_key_frame(cpi, &this_frame); - this_frame = this_frame_copy; + find_next_key_frame(cpi, show_idx); } else { cm->frame_type = INTER_FRAME; } - if (lc != NULL) { - if (cpi->svc.spatial_layer_id == 0) { - lc->is_key_frame = (cm->frame_type == KEY_FRAME); - if (lc->is_key_frame) { - cpi->ref_frame_flags &= - (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); - lc->frames_from_key_frame = 0; - // Encode an intra only empty frame since we have a key frame. - cpi->svc.encode_intra_empty_frame = 1; - } - } else { - cm->frame_type = INTER_FRAME; - lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame; - - if (lc->is_key_frame) { - cpi->ref_frame_flags &= (~VP9_LAST_FLAG); - lc->frames_from_key_frame = 0; - } - } - } - // Define a new GF/ARF group. (Should always enter here for key frames). if (rc->frames_till_gf_update_due == 0) { - define_gf_group(cpi, &this_frame); + define_gf_group(cpi, show_idx); rc->frames_till_gf_update_due = rc->baseline_gf_interval; - if (lc != NULL) cpi->refresh_golden_frame = 1; #if ARF_STATS_OUTPUT { FILE *fpfile; fpfile = fopen("arf.stt", "a"); ++arf_count; - fprintf(fpfile, "%10d %10ld %10d %10d %10ld\n", cm->current_video_frame, - rc->frames_till_gf_update_due, rc->kf_boost, arf_count, - rc->gfu_boost); + fprintf(fpfile, "%10d %10ld %10d %10d %10ld %10ld\n", + cm->current_video_frame, rc->frames_till_gf_update_due, + rc->kf_boost, arf_count, rc->gfu_boost, cm->frame_type); fclose(fpfile); } #endif } - configure_buffer_updates(cpi); + vp9_configure_buffer_updates(cpi, gf_group->index); // Do the firstpass stats indicate that this frame is skippable for the // partition search? if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 && - (!cpi->use_svc || is_two_pass_svc(cpi))) { + !cpi->use_svc) { cpi->partition_search_skippable_frame = is_skippable_frame(cpi); } - target_rate = gf_group->bit_allocation[gf_group->index]; - rc->base_frame_target = target_rate; + rc->base_frame_target = gf_group->bit_allocation[gf_group->index]; // The multiplication by 256 reverses a scaling factor of (>> 8) // applied when combining MB error values for the frame. @@ -3329,8 +3536,7 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { rc->rate_error_estimate = 0; } - if (cpi->common.frame_type != KEY_FRAME && - !vp9_is_upper_layer_key_frame(cpi)) { + if (cpi->common.frame_type != KEY_FRAME) { twopass->kf_group_bits -= bits_used; twopass->last_kfgroup_zeromotion_pct = twopass->kf_zeromotion_pct; } @@ -3350,7 +3556,8 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { // Extend min or Max Q range to account for imbalance from the base // value when using AQ. - if (cpi->oxcf.aq_mode != NO_AQ) { + if (cpi->oxcf.aq_mode != NO_AQ && cpi->oxcf.aq_mode != PSNR_AQ && + cpi->oxcf.aq_mode != PERCEPTUAL_AQ) { if (cm->seg.aq_av_offset < 0) { // The balance of the AQ map tends towarda lowering the average Q. aq_extend_min = 0; @@ -3418,3 +3625,70 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { } } } + +#if CONFIG_RATE_CTRL +// Under CONFIG_RATE_CTRL, once the first_pass_info is ready, the number of +// coding frames (including show frame and alt ref) can be determined. +int vp9_get_coding_frame_num(const struct VP9EncoderConfig *oxcf, + const FRAME_INFO *frame_info, + const FIRST_PASS_INFO *first_pass_info, + int multi_layer_arf, int allow_alt_ref) { + int coding_frame_num = 0; + RATE_CONTROL rc; + RANGE active_gf_interval; + int arf_layers; + double gop_intra_factor; + int use_alt_ref; + int gop_coding_frames; + int gop_show_frames; + int show_idx = 0; + int arf_active_or_kf = 1; + rc.static_scene_max_gf_interval = 250; + vp9_rc_init(oxcf, 1, &rc); + + while (show_idx < first_pass_info->num_frames) { + if (rc.frames_to_key == 0) { + rc.frames_to_key = vp9_get_frames_to_next_key( + oxcf, frame_info, first_pass_info, show_idx, rc.min_gf_interval); + arf_active_or_kf = 1; + } else { + } + + { + int dummy = 0; + active_gf_interval = get_active_gf_inverval_range( + frame_info, &rc, arf_active_or_kf, show_idx, dummy, dummy); + } + + arf_layers = get_arf_layers(multi_layer_arf, oxcf->enable_auto_arf, + active_gf_interval.max); + if (multi_layer_arf) { + gop_intra_factor = 1.0 + 0.25 * arf_layers; + } else { + gop_intra_factor = 1.0; + } + + gop_coding_frames = get_gop_coding_frame_num( + &use_alt_ref, frame_info, first_pass_info, &rc, show_idx, + &active_gf_interval, gop_intra_factor, oxcf->lag_in_frames); + + use_alt_ref &= allow_alt_ref; + + rc.source_alt_ref_active = use_alt_ref; + arf_active_or_kf = use_alt_ref; + gop_show_frames = gop_coding_frames - use_alt_ref; + rc.frames_to_key -= gop_show_frames; + rc.frames_since_key += gop_show_frames; + show_idx += gop_show_frames; + coding_frame_num += gop_show_frames + use_alt_ref; + } + return coding_frame_num; +} +#endif + +FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *twopass) { + return twopass->this_frame_stats; +} +FIRSTPASS_STATS vp9_get_total_stats(const TWO_PASS *twopass) { + return twopass->total_stats; +} diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_firstpass.h b/media/libvpx/libvpx/vp9/encoder/vp9_firstpass.h index 000ecd77926b..cfbc143c3046 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_firstpass.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_firstpass.h @@ -8,9 +8,12 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_FIRSTPASS_H_ -#define VP9_ENCODER_VP9_FIRSTPASS_H_ +#ifndef VPX_VP9_ENCODER_VP9_FIRSTPASS_H_ +#define VPX_VP9_ENCODER_VP9_FIRSTPASS_H_ +#include + +#include "vp9/common/vp9_onyxc_int.h" #include "vp9/encoder/vp9_lookahead.h" #include "vp9/encoder/vp9_ratectrl.h" @@ -39,7 +42,10 @@ typedef struct { } FIRSTPASS_MB_STATS; #endif -#define INVALID_ROW -1 +#define INVALID_ROW (-1) + +#define MAX_ARF_LAYERS 6 +#define SECTION_NOISE_DEF 250.0 typedef struct { double frame_mb_intra_factor; @@ -107,7 +113,9 @@ typedef enum { GF_UPDATE = 2, ARF_UPDATE = 3, OVERLAY_UPDATE = 4, - FRAME_UPDATE_TYPES = 5 + MID_OVERLAY_UPDATE = 5, + USE_BUF_FRAME = 6, // Use show existing frame, no ref buffer update + FRAME_UPDATE_TYPES = 7 } FRAME_UPDATE_TYPE; #define FC_ANIMATION_THRESH 0.15 @@ -119,22 +127,59 @@ typedef enum { typedef struct { unsigned char index; - unsigned char first_inter_index; - RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1]; - FRAME_UPDATE_TYPE update_type[(MAX_LAG_BUFFERS * 2) + 1]; - unsigned char arf_src_offset[(MAX_LAG_BUFFERS * 2) + 1]; - unsigned char arf_update_idx[(MAX_LAG_BUFFERS * 2) + 1]; - unsigned char arf_ref_idx[(MAX_LAG_BUFFERS * 2) + 1]; - int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1]; + RATE_FACTOR_LEVEL rf_level[MAX_STATIC_GF_GROUP_LENGTH + 2]; + FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 2]; + unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 2]; + unsigned char layer_depth[MAX_STATIC_GF_GROUP_LENGTH + 2]; + unsigned char frame_gop_index[MAX_STATIC_GF_GROUP_LENGTH + 2]; + int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 2]; + int gfu_boost[MAX_STATIC_GF_GROUP_LENGTH + 2]; + + int frame_start; + int frame_end; + // TODO(jingning): The array size of arf_stack could be reduced. + int arf_index_stack[MAX_LAG_BUFFERS * 2]; + int top_arf_idx; + int stack_size; + int gf_group_size; + int max_layer_depth; + int allowed_max_layer_depth; + int group_noise_energy; } GF_GROUP; +typedef struct { + const FIRSTPASS_STATS *stats; + int num_frames; +} FIRST_PASS_INFO; + +static INLINE void fps_init_first_pass_info(FIRST_PASS_INFO *first_pass_info, + const FIRSTPASS_STATS *stats, + int num_frames) { + first_pass_info->stats = stats; + first_pass_info->num_frames = num_frames; +} + +static INLINE int fps_get_num_frames(const FIRST_PASS_INFO *first_pass_info) { + return first_pass_info->num_frames; +} + +static INLINE const FIRSTPASS_STATS *fps_get_frame_stats( + const FIRST_PASS_INFO *first_pass_info, int show_idx) { + if (show_idx < 0 || show_idx >= first_pass_info->num_frames) { + return NULL; + } + return &first_pass_info->stats[show_idx]; +} + typedef struct { unsigned int section_intra_rating; + unsigned int key_frame_section_intra_rating; FIRSTPASS_STATS total_stats; FIRSTPASS_STATS this_frame_stats; const FIRSTPASS_STATS *stats_in; const FIRSTPASS_STATS *stats_in_start; const FIRSTPASS_STATS *stats_in_end; + FIRST_PASS_INFO first_pass_info; FIRSTPASS_STATS total_left_stats; int first_pass_done; int64_t bits_left; @@ -173,6 +218,7 @@ typedef struct { int extend_maxq; int extend_minq_fast; int arnr_strength_adjustment; + int last_qindex_of_arf_layer[MAX_ARF_LAYERS]; GF_GROUP gf_group; } TWO_PASS; @@ -182,7 +228,6 @@ struct ThreadData; struct TileDataEnc; void vp9_init_first_pass(struct VP9_COMP *cpi); -void vp9_rc_get_first_pass_params(struct VP9_COMP *cpi); void vp9_first_pass(struct VP9_COMP *cpi, const struct lookahead_entry *source); void vp9_end_first_pass(struct VP9_COMP *cpi); @@ -194,7 +239,6 @@ void vp9_first_pass_encode_tile_mb_row(struct VP9_COMP *cpi, void vp9_init_second_pass(struct VP9_COMP *cpi); void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi); -void vp9_twopass_postencode_update(struct VP9_COMP *cpi); // Post encode update of the rate control parameters for 2-pass void vp9_twopass_postencode_update(struct VP9_COMP *cpi); @@ -202,8 +246,23 @@ void vp9_twopass_postencode_update(struct VP9_COMP *cpi); void calculate_coded_size(struct VP9_COMP *cpi, int *scaled_frame_width, int *scaled_frame_height); +struct VP9EncoderConfig; +int vp9_get_frames_to_next_key(const struct VP9EncoderConfig *oxcf, + const FRAME_INFO *frame_info, + const FIRST_PASS_INFO *first_pass_info, + int kf_show_idx, int min_gf_interval); +#if CONFIG_RATE_CTRL +int vp9_get_coding_frame_num(const struct VP9EncoderConfig *oxcf, + const FRAME_INFO *frame_info, + const FIRST_PASS_INFO *first_pass_info, + int multi_layer_arf, int allow_alt_ref); +#endif + +FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *two_pass); +FIRSTPASS_STATS vp9_get_total_stats(const TWO_PASS *two_pass); + #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_ENCODER_VP9_FIRSTPASS_H_ +#endif // VPX_VP9_ENCODER_VP9_FIRSTPASS_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_job_queue.h b/media/libvpx/libvpx/vp9/encoder/vp9_job_queue.h index 89c08f207a42..ad09c11198aa 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_job_queue.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_job_queue.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_JOB_QUEUE_H_ -#define VP9_ENCODER_VP9_JOB_QUEUE_H_ +#ifndef VPX_VP9_ENCODER_VP9_JOB_QUEUE_H_ +#define VPX_VP9_ENCODER_VP9_JOB_QUEUE_H_ typedef enum { FIRST_PASS_JOB, @@ -43,4 +43,4 @@ typedef struct { int num_jobs_acquired; } JobQueueHandle; -#endif // VP9_ENCODER_VP9_JOB_QUEUE_H_ +#endif // VPX_VP9_ENCODER_VP9_JOB_QUEUE_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_lookahead.c b/media/libvpx/libvpx/vp9/encoder/vp9_lookahead.c index 392cd5d418a1..97838c38e61a 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_lookahead.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_lookahead.c @@ -64,6 +64,7 @@ struct lookahead_ctx *vp9_lookahead_init(unsigned int width, unsigned int i; ctx->max_sz = depth; ctx->buf = calloc(depth, sizeof(*ctx->buf)); + ctx->next_show_idx = 0; if (!ctx->buf) goto bail; for (i = 0; i < depth; i++) if (vpx_alloc_frame_buffer( @@ -81,12 +82,16 @@ bail: } #define USE_PARTIAL_COPY 0 +int vp9_lookahead_full(const struct lookahead_ctx *ctx) { + return ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz; +} + +int vp9_lookahead_next_show_idx(const struct lookahead_ctx *ctx) { + return ctx->next_show_idx; +} int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, - int64_t ts_start, int64_t ts_end, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif + int64_t ts_start, int64_t ts_end, int use_highbitdepth, vpx_enc_frame_flags_t flags) { struct lookahead_entry *buf; #if USE_PARTIAL_COPY @@ -101,8 +106,12 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, int subsampling_x = src->subsampling_x; int subsampling_y = src->subsampling_y; int larger_dimensions, new_dimensions; +#if !CONFIG_VP9_HIGHBITDEPTH + (void)use_highbitdepth; + assert(use_highbitdepth == 0); +#endif - if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz) return 1; + if (vp9_lookahead_full(ctx)) return 1; ctx->sz++; buf = pop(ctx, &ctx->write_idx); @@ -184,6 +193,8 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, buf->ts_start = ts_start; buf->ts_end = ts_end; buf->flags = flags; + buf->show_idx = ctx->next_show_idx; + ++ctx->next_show_idx; return 0; } diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_lookahead.h b/media/libvpx/libvpx/vp9/encoder/vp9_lookahead.h index 88be0ffcd58f..dbbe3af58401 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_lookahead.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_lookahead.h @@ -8,17 +8,13 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_LOOKAHEAD_H_ -#define VP9_ENCODER_VP9_LOOKAHEAD_H_ +#ifndef VPX_VP9_ENCODER_VP9_LOOKAHEAD_H_ +#define VPX_VP9_ENCODER_VP9_LOOKAHEAD_H_ #include "vpx_scale/yv12config.h" #include "vpx/vpx_encoder.h" #include "vpx/vpx_integer.h" -#if CONFIG_SPATIAL_SVC -#include "vpx/vp8cx.h" -#endif - #ifdef __cplusplus extern "C" { #endif @@ -29,6 +25,7 @@ struct lookahead_entry { YV12_BUFFER_CONFIG img; int64_t ts_start; int64_t ts_end; + int show_idx; /*The show_idx of this frame*/ vpx_enc_frame_flags_t flags; }; @@ -36,10 +33,12 @@ struct lookahead_entry { #define MAX_PRE_FRAMES 1 struct lookahead_ctx { - int max_sz; /* Absolute size of the queue */ - int sz; /* Number of buffers currently in the queue */ - int read_idx; /* Read index */ - int write_idx; /* Write index */ + int max_sz; /* Absolute size of the queue */ + int sz; /* Number of buffers currently in the queue */ + int read_idx; /* Read index */ + int write_idx; /* Write index */ + int next_show_idx; /* The show_idx that will be assigned to the next frame + being pushed in the queue*/ struct lookahead_entry *buf; /* Buffer list */ }; @@ -61,6 +60,23 @@ struct lookahead_ctx *vp9_lookahead_init(unsigned int width, */ void vp9_lookahead_destroy(struct lookahead_ctx *ctx); +/**\brief Check if lookahead is full + * + * \param[in] ctx Pointer to the lookahead context + * + * Return 1 if lookahead is full, otherwise return 0. + */ +int vp9_lookahead_full(const struct lookahead_ctx *ctx); + +/**\brief Return the next_show_idx + * + * \param[in] ctx Pointer to the lookahead context + * + * Return the show_idx that will be assigned to the next + * frame pushed by vp9_lookahead_push() + */ +int vp9_lookahead_next_show_idx(const struct lookahead_ctx *ctx); + /**\brief Enqueue a source buffer * * This function will copy the source image into a new framebuffer with @@ -77,10 +93,7 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx); * \param[in] active_map Map that specifies which macroblock is active */ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, - int64_t ts_start, int64_t ts_end, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif + int64_t ts_start, int64_t ts_end, int use_highbitdepth, vpx_enc_frame_flags_t flags); /**\brief Get the next source buffer to encode @@ -115,4 +128,4 @@ unsigned int vp9_lookahead_depth(struct lookahead_ctx *ctx); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_LOOKAHEAD_H_ +#endif // VPX_VP9_ENCODER_VP9_LOOKAHEAD_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_mbgraph.c b/media/libvpx/libvpx/vp9/encoder/vp9_mbgraph.c index 46d626def173..831c79c17535 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_mbgraph.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_mbgraph.c @@ -57,11 +57,12 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv, { uint32_t distortion; uint32_t sse; + // TODO(yunqing): may use higher tap interp filter than 2 taps if needed. cpi->find_fractional_mv_step( x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, - &v_fn_ptr, 0, mv_sf->subpel_iters_per_step, + &v_fn_ptr, 0, mv_sf->subpel_search_level, cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, - 0); + 0, USE_2_TAPS); } xd->mi[0]->mode = NEWMV; diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_mbgraph.h b/media/libvpx/libvpx/vp9/encoder/vp9_mbgraph.h index df2fb98efa65..7b629861d517 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_mbgraph.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_mbgraph.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_MBGRAPH_H_ -#define VP9_ENCODER_VP9_MBGRAPH_H_ +#ifndef VPX_VP9_ENCODER_VP9_MBGRAPH_H_ +#define VPX_VP9_ENCODER_VP9_MBGRAPH_H_ #ifdef __cplusplus extern "C" { @@ -25,7 +25,9 @@ typedef struct { } ref[MAX_REF_FRAMES]; } MBGRAPH_MB_STATS; -typedef struct { MBGRAPH_MB_STATS *mb_stats; } MBGRAPH_FRAME_STATS; +typedef struct { + MBGRAPH_MB_STATS *mb_stats; +} MBGRAPH_FRAME_STATS; struct VP9_COMP; @@ -35,4 +37,4 @@ void vp9_update_mbgraph_stats(struct VP9_COMP *cpi); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_MBGRAPH_H_ +#endif // VPX_VP9_ENCODER_VP9_MBGRAPH_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_mcomp.c b/media/libvpx/libvpx/vp9/encoder/vp9_mcomp.c index 44f01be25a02..ac29f36ec185 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_mcomp.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_mcomp.c @@ -29,11 +29,6 @@ // #define NEW_DIAMOND_SEARCH -static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, - const MV *mv) { - return &buf->buf[mv->row * buf->stride + mv->col]; -} - void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) { int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); @@ -263,27 +258,6 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { } \ } -// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of -// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten -// later in the same way. -#define SECOND_LEVEL_CHECKS_BEST \ - { \ - unsigned int second; \ - int br0 = br; \ - int bc0 = bc; \ - assert(tr == br || tc == bc); \ - if (tr == br && tc != bc) { \ - kc = bc - tc; \ - } else if (tr != br && tc == bc) { \ - kr = br - tr; \ - } \ - CHECK_BETTER(second, br0 + kr, bc0); \ - CHECK_BETTER(second, br0, bc0 + kc); \ - if (br0 != br || bc0 != bc) { \ - CHECK_BETTER(second, br0 + kr, bc0 + kc); \ - } \ - } - #define SETUP_SUBPEL_SEARCH \ const uint8_t *const z = x->plane[0].src.buf; \ const int src_stride = x->plane[0].src.stride; \ @@ -329,8 +303,8 @@ static unsigned int setup_center_error( if (second_pred != NULL) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); - vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, - y_stride); + vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w, + h, CONVERT_TO_SHORTPTR(y + offset), y_stride); besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1); } else { @@ -388,14 +362,12 @@ static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) { *ir = (int)divide_and_round(x1 * b, y1); } -uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv, - const MV *ref_mv, int allow_hp, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int forced_stop, int iters_per_step, - int *cost_list, int *mvjcost, int *mvcost[2], - uint32_t *distortion, uint32_t *sse1, - const uint8_t *second_pred, int w, int h) { +uint32_t vp9_skip_sub_pixel_tree( + const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, + int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, + int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], + uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, + int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, @@ -418,6 +390,7 @@ uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv, (void)sse; (void)thismse; (void)cost_list; + (void)use_accurate_subpel_search; return besterr; } @@ -427,7 +400,7 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, @@ -439,6 +412,7 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( (void)allow_hp; (void)forced_stop; (void)hstep; + (void)use_accurate_subpel_search; if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && @@ -492,8 +466,10 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_more( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; + (void)use_accurate_subpel_search; + besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); @@ -552,8 +528,10 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; + (void)use_accurate_subpel_search; + besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); @@ -638,12 +616,119 @@ static const MV search_step_table[12] = { }; /* clang-format on */ +static int accurate_sub_pel_search( + const MACROBLOCKD *xd, const MV *this_mv, const struct scale_factors *sf, + const InterpKernel *kernel, const vp9_variance_fn_ptr_t *vfp, + const uint8_t *const src_address, const int src_stride, + const uint8_t *const pre_address, int y_stride, const uint8_t *second_pred, + int w, int h, uint32_t *sse) { +#if CONFIG_VP9_HIGHBITDEPTH + uint64_t besterr; + assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16); + assert(w != 0 && h != 0); + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]); + vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(pre_address), y_stride, + pred16, w, this_mv, sf, w, h, 0, kernel, + MV_PRECISION_Q3, 0, 0, xd->bd); + if (second_pred != NULL) { + DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); + vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w, + h, pred16, w); + besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src_address, + src_stride, sse); + } else { + besterr = + vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src_address, src_stride, sse); + } + } else { + DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); + vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h, + 0, kernel, MV_PRECISION_Q3, 0, 0); + if (second_pred != NULL) { + DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); + vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w); + besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse); + } else { + besterr = vfp->vf(pred, w, src_address, src_stride, sse); + } + } + if (besterr >= UINT_MAX) return UINT_MAX; + return (int)besterr; +#else + int besterr; + DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); + assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16); + assert(w != 0 && h != 0); + (void)xd; + + vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h, + 0, kernel, MV_PRECISION_Q3, 0, 0); + if (second_pred != NULL) { + DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); + vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w); + besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse); + } else { + besterr = vfp->vf(pred, w, src_address, src_stride, sse); + } + return besterr; +#endif // CONFIG_VP9_HIGHBITDEPTH +} + +// TODO(yunqing): this part can be further refactored. +#if CONFIG_VP9_HIGHBITDEPTH +/* checks if (r, c) has better score than previous best */ +#define CHECK_BETTER1(v, r, c) \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + int64_t tmpmse; \ + const MV mv = { r, c }; \ + const MV ref_mv = { rr, rc }; \ + thismse = \ + accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \ + y, y_stride, second_pred, w, h, &sse); \ + tmpmse = thismse; \ + tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \ + if (tmpmse >= INT_MAX) { \ + v = INT_MAX; \ + } else if ((v = (uint32_t)tmpmse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } +#else +/* checks if (r, c) has better score than previous best */ +#define CHECK_BETTER1(v, r, c) \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + const MV mv = { r, c }; \ + const MV ref_mv = { rr, rc }; \ + thismse = \ + accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \ + y, y_stride, second_pred, w, h, &sse); \ + if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \ + thismse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } + +#endif + uint32_t vp9_find_best_sub_pixel_tree( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { const uint8_t *const z = x->plane[0].src.buf; const uint8_t *const src_address = z; const int src_stride = x->plane[0].src.stride; @@ -671,6 +756,17 @@ uint32_t vp9_find_best_sub_pixel_tree( int kr, kc; MvLimits subpel_mv_limits; + // TODO(yunqing): need to add 4-tap filter optimization to speed up the + // encoder. + const InterpKernel *kernel = + (use_accurate_subpel_search > 0) + ? ((use_accurate_subpel_search == USE_4_TAPS) + ? vp9_filter_kernels[FOURTAP] + : ((use_accurate_subpel_search == USE_8_TAPS) + ? vp9_filter_kernels[EIGHTTAP] + : vp9_filter_kernels[EIGHTTAP_SHARP])) + : vp9_filter_kernels[BILINEAR]; + vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); minc = subpel_mv_limits.col_min; maxc = subpel_mv_limits.col_max; @@ -695,16 +791,25 @@ uint32_t vp9_find_best_sub_pixel_tree( tr = br + search_step[idx].row; tc = bc + search_step[idx].col; if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { - const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); MV this_mv; this_mv.row = tr; this_mv.col = tc; - if (second_pred == NULL) - thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, - src_stride, &sse); - else - thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), - src_address, src_stride, &sse, second_pred); + + if (use_accurate_subpel_search) { + thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp, + src_address, src_stride, y, + y_stride, second_pred, w, h, &sse); + } else { + const uint8_t *const pre_address = + y + (tr >> 3) * y_stride + (tc >> 3); + if (second_pred == NULL) + thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), + src_address, src_stride, &sse); + else + thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), + src_address, src_stride, &sse, second_pred); + } + cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); @@ -726,14 +831,21 @@ uint32_t vp9_find_best_sub_pixel_tree( tc = bc + kc; tr = br + kr; if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { - const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); MV this_mv = { tr, tc }; - if (second_pred == NULL) - thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, - src_stride, &sse); - else - thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address, - src_stride, &sse, second_pred); + if (use_accurate_subpel_search) { + thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp, + src_address, src_stride, y, y_stride, + second_pred, w, h, &sse); + } else { + const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); + if (second_pred == NULL) + thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, + src_stride, &sse); + else + thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), + src_address, src_stride, &sse, second_pred); + } + cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); @@ -755,10 +867,48 @@ uint32_t vp9_find_best_sub_pixel_tree( bc = tc; } - if (iters_per_step > 1 && best_idx != -1) SECOND_LEVEL_CHECKS_BEST; + if (iters_per_step > 0 && best_idx != -1) { + unsigned int second; + const int br0 = br; + const int bc0 = bc; + assert(tr == br || tc == bc); - tr = br; - tc = bc; + if (tr == br && tc != bc) { + kc = bc - tc; + if (iters_per_step == 1) { + if (use_accurate_subpel_search) { + CHECK_BETTER1(second, br0, bc0 + kc); + } else { + CHECK_BETTER(second, br0, bc0 + kc); + } + } + } else if (tr != br && tc == bc) { + kr = br - tr; + if (iters_per_step == 1) { + if (use_accurate_subpel_search) { + CHECK_BETTER1(second, br0 + kr, bc0); + } else { + CHECK_BETTER(second, br0 + kr, bc0); + } + } + } + + if (iters_per_step > 1) { + if (use_accurate_subpel_search) { + CHECK_BETTER1(second, br0 + kr, bc0); + CHECK_BETTER1(second, br0, bc0 + kc); + if (br0 != br || bc0 != bc) { + CHECK_BETTER1(second, br0 + kr, bc0 + kc); + } + } else { + CHECK_BETTER(second, br0 + kr, bc0); + CHECK_BETTER(second, br0, bc0 + kc); + if (br0 != br || bc0 != bc) { + CHECK_BETTER(second, br0 + kr, bc0 + kc); + } + } + } + } search_step += 4; hstep >>= 1; @@ -780,6 +930,7 @@ uint32_t vp9_find_best_sub_pixel_tree( } #undef CHECK_BETTER +#undef CHECK_BETTER1 static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col, int range) { @@ -1490,7 +1641,7 @@ static int fast_dia_search(const MACROBLOCK *x, MV *ref_mv, int search_param, // Exhuastive motion search around a given centre position with a given // step size. -static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, +static int exhaustive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, int range, int step, int sad_per_bit, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv) { @@ -1576,6 +1727,361 @@ static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, return best_sad; } +#define MIN_RANGE 7 +#define MAX_RANGE 256 +#define MIN_INTERVAL 1 +#if CONFIG_NON_GREEDY_MV +static int64_t exhaustive_mesh_search_multi_step( + MV *best_mv, const MV *center_mv, int range, int step, + const struct buf_2d *src, const struct buf_2d *pre, int lambda, + const int_mv *nb_full_mvs, int full_mv_num, const MvLimits *mv_limits, + const vp9_variance_fn_ptr_t *fn_ptr) { + int64_t best_sad; + int r, c; + int start_col, end_col, start_row, end_row; + *best_mv = *center_mv; + best_sad = + ((int64_t)fn_ptr->sdf(src->buf, src->stride, + get_buf_from_mv(pre, center_mv), pre->stride) + << LOG2_PRECISION) + + lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num); + start_row = VPXMAX(center_mv->row - range, mv_limits->row_min); + start_col = VPXMAX(center_mv->col - range, mv_limits->col_min); + end_row = VPXMIN(center_mv->row + range, mv_limits->row_max); + end_col = VPXMIN(center_mv->col + range, mv_limits->col_max); + for (r = start_row; r <= end_row; r += step) { + for (c = start_col; c <= end_col; c += step) { + const MV mv = { r, c }; + int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride, + get_buf_from_mv(pre, &mv), pre->stride) + << LOG2_PRECISION; + if (sad < best_sad) { + sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } + } + } + } + return best_sad; +} + +static int64_t exhaustive_mesh_search_single_step( + MV *best_mv, const MV *center_mv, int range, const struct buf_2d *src, + const struct buf_2d *pre, int lambda, const int_mv *nb_full_mvs, + int full_mv_num, const MvLimits *mv_limits, + const vp9_variance_fn_ptr_t *fn_ptr) { + int64_t best_sad; + int r, c, i; + int start_col, end_col, start_row, end_row; + + *best_mv = *center_mv; + best_sad = + ((int64_t)fn_ptr->sdf(src->buf, src->stride, + get_buf_from_mv(pre, center_mv), pre->stride) + << LOG2_PRECISION) + + lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num); + start_row = VPXMAX(center_mv->row - range, mv_limits->row_min); + start_col = VPXMAX(center_mv->col - range, mv_limits->col_min); + end_row = VPXMIN(center_mv->row + range, mv_limits->row_max); + end_col = VPXMIN(center_mv->col + range, mv_limits->col_max); + for (r = start_row; r <= end_row; r += 1) { + c = start_col; + // sdx8f may not be available some block size + if (fn_ptr->sdx8f) { + while (c + 7 <= end_col) { + unsigned int sads[8]; + const MV mv = { r, c }; + const uint8_t *buf = get_buf_from_mv(pre, &mv); + fn_ptr->sdx8f(src->buf, src->stride, buf, pre->stride, sads); + + for (i = 0; i < 8; ++i) { + int64_t sad = (int64_t)sads[i] << LOG2_PRECISION; + if (sad < best_sad) { + const MV mv = { r, c + i }; + sad += lambda * + vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } + } + } + c += 8; + } + } + while (c + 3 <= end_col) { + unsigned int sads[4]; + const uint8_t *addrs[4]; + for (i = 0; i < 4; ++i) { + const MV mv = { r, c + i }; + addrs[i] = get_buf_from_mv(pre, &mv); + } + fn_ptr->sdx4df(src->buf, src->stride, addrs, pre->stride, sads); + + for (i = 0; i < 4; ++i) { + int64_t sad = (int64_t)sads[i] << LOG2_PRECISION; + if (sad < best_sad) { + const MV mv = { r, c + i }; + sad += + lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } + } + } + c += 4; + } + while (c <= end_col) { + const MV mv = { r, c }; + int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride, + get_buf_from_mv(pre, &mv), pre->stride) + << LOG2_PRECISION; + if (sad < best_sad) { + sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } + } + c += 1; + } + } + return best_sad; +} + +static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv, + int range, int step, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *center_mv, int lambda, + const int_mv *nb_full_mvs, + int full_mv_num) { + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *src = &x->plane[0].src; + const struct buf_2d *pre = &xd->plane[0].pre[0]; + assert(step >= 1); + assert(is_mv_in(&x->mv_limits, center_mv)); + if (step == 1) { + return exhaustive_mesh_search_single_step( + best_mv, center_mv, range, src, pre, lambda, nb_full_mvs, full_mv_num, + &x->mv_limits, fn_ptr); + } + return exhaustive_mesh_search_multi_step(best_mv, center_mv, range, step, src, + pre, lambda, nb_full_mvs, + full_mv_num, &x->mv_limits, fn_ptr); +} + +static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x, + MV *centre_mv_full, + const vp9_variance_fn_ptr_t *fn_ptr, + MV *dst_mv, int lambda, + const int_mv *nb_full_mvs, + int full_mv_num) { + const SPEED_FEATURES *const sf = &cpi->sf; + MV temp_mv = { centre_mv_full->row, centre_mv_full->col }; + int64_t bestsme; + int i; + int interval = sf->mesh_patterns[0].interval; + int range = sf->mesh_patterns[0].range; + int baseline_interval_divisor; + + // Trap illegal values for interval and range for this function. + if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) || + (interval > range)) { + printf("ERROR: invalid range\n"); + assert(0); + } + + baseline_interval_divisor = range / interval; + + // Check size of proposed first range against magnitude of the centre + // value used as a starting point. + range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4); + range = VPXMIN(range, MAX_RANGE); + interval = VPXMAX(interval, range / baseline_interval_divisor); + + // initial search + bestsme = + exhaustive_mesh_search_new(x, &temp_mv, range, interval, fn_ptr, &temp_mv, + lambda, nb_full_mvs, full_mv_num); + + if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { + // Progressive searches with range and step size decreasing each time + // till we reach a step size of 1. Then break out. + for (i = 1; i < MAX_MESH_STEP; ++i) { + // First pass with coarser step and longer range + bestsme = exhaustive_mesh_search_new( + x, &temp_mv, sf->mesh_patterns[i].range, + sf->mesh_patterns[i].interval, fn_ptr, &temp_mv, lambda, nb_full_mvs, + full_mv_num); + + if (sf->mesh_patterns[i].interval == 1) break; + } + } + + *dst_mv = temp_mv; + + return bestsme; +} + +static int64_t diamond_search_sad_new(const MACROBLOCK *x, + const search_site_config *cfg, + const MV *init_full_mv, MV *best_full_mv, + int search_param, int lambda, int *num00, + const vp9_variance_fn_ptr_t *fn_ptr, + const int_mv *nb_full_mvs, + int full_mv_num) { + int i, j, step; + + const MACROBLOCKD *const xd = &x->e_mbd; + uint8_t *what = x->plane[0].src.buf; + const int what_stride = x->plane[0].src.stride; + const uint8_t *in_what; + const int in_what_stride = xd->plane[0].pre[0].stride; + const uint8_t *best_address; + + int64_t bestsad; + int best_site = -1; + int last_site = -1; + + // search_param determines the length of the initial step and hence the number + // of iterations. + // 0 = initial step (MAX_FIRST_STEP) pel + // 1 = (MAX_FIRST_STEP/2) pel, + // 2 = (MAX_FIRST_STEP/4) pel... + // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; + const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step]; + const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step]; + const int tot_steps = cfg->total_steps - search_param; + vpx_clear_system_state(); + + *best_full_mv = *init_full_mv; + clamp_mv(best_full_mv, x->mv_limits.col_min, x->mv_limits.col_max, + x->mv_limits.row_min, x->mv_limits.row_max); + *num00 = 0; + + // Work out the start point for the search + in_what = xd->plane[0].pre[0].buf + best_full_mv->row * in_what_stride + + best_full_mv->col; + best_address = in_what; + + // Check the starting position + { + const int64_t mv_dist = + (int64_t)fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + << LOG2_PRECISION; + const int64_t mv_cost = + vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num); + bestsad = mv_dist + lambda * mv_cost; + } + + i = 0; + + for (step = 0; step < tot_steps; step++) { + int all_in = 1, t; + + // All_in is true if every one of the points we are checking are within + // the bounds of the image. + all_in &= ((best_full_mv->row + ss_mv[i].row) > x->mv_limits.row_min); + all_in &= ((best_full_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max); + all_in &= ((best_full_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min); + all_in &= ((best_full_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max); + + // If all the pixels are within the bounds we don't check whether the + // search point is valid in this loop, otherwise we check each point + // for validity.. + if (all_in) { + unsigned int sad_array[4]; + + for (j = 0; j < cfg->searches_per_step; j += 4) { + unsigned char const *block_offset[4]; + + for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address; + + fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, + sad_array); + + for (t = 0; t < 4; t++, i++) { + const int64_t mv_dist = (int64_t)sad_array[t] << LOG2_PRECISION; + if (mv_dist < bestsad) { + const MV this_mv = { best_full_mv->row + ss_mv[i].row, + best_full_mv->col + ss_mv[i].col }; + const int64_t mv_cost = + vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num); + const int64_t thissad = mv_dist + lambda * mv_cost; + if (thissad < bestsad) { + bestsad = thissad; + best_site = i; + } + } + } + } + } else { + for (j = 0; j < cfg->searches_per_step; j++) { + // Trap illegal vectors + const MV this_mv = { best_full_mv->row + ss_mv[i].row, + best_full_mv->col + ss_mv[i].col }; + + if (is_mv_in(&x->mv_limits, &this_mv)) { + const uint8_t *const check_here = ss_os[i] + best_address; + const int64_t mv_dist = + (int64_t)fn_ptr->sdf(what, what_stride, check_here, + in_what_stride) + << LOG2_PRECISION; + if (mv_dist < bestsad) { + const int64_t mv_cost = + vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num); + const int64_t thissad = mv_dist + lambda * mv_cost; + if (thissad < bestsad) { + bestsad = thissad; + best_site = i; + } + } + } + i++; + } + } + if (best_site != last_site) { + best_full_mv->row += ss_mv[best_site].row; + best_full_mv->col += ss_mv[best_site].col; + best_address += ss_os[best_site]; + last_site = best_site; + } else if (best_address == in_what) { + (*num00)++; + } + } + return bestsad; +} + +int vp9_prepare_nb_full_mvs(const MotionField *motion_field, int mi_row, + int mi_col, int_mv *nb_full_mvs) { + const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize]; + const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize]; + const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } }; + int nb_full_mv_num = 0; + int i; + assert(mi_row % mi_height == 0); + assert(mi_col % mi_width == 0); + for (i = 0; i < NB_MVS_NUM; ++i) { + int r = dirs[i][0]; + int c = dirs[i][1]; + int brow = mi_row / mi_height + r; + int bcol = mi_col / mi_width + c; + if (brow >= 0 && brow < motion_field->block_rows && bcol >= 0 && + bcol < motion_field->block_cols) { + if (vp9_motion_field_is_mv_set(motion_field, brow, bcol)) { + int_mv mv = vp9_motion_field_get_mv(motion_field, brow, bcol); + nb_full_mvs[nb_full_mv_num].as_mv = get_full_mv(&mv.as_mv); + ++nb_full_mv_num; + } + } + } + return nb_full_mv_num; +} +#endif // CONFIG_NON_GREEDY_MV + int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, @@ -1785,12 +2291,15 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) { } static const MV search_pos[4] = { - { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 }, + { -1, 0 }, + { 0, -1 }, + { 0, 1 }, + { 1, 0 }, }; unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, - int mi_col) { + int mi_col, const MV *ref_mv) { MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } }; @@ -1812,6 +2321,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, const int norm_factor = 3 + (bw >> 5); const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]); + MvLimits subpel_mv_limits; if (scaled_ref_frame) { int i; @@ -1876,7 +2386,10 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, { const uint8_t *const pos[4] = { - ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride, + ref_buf - ref_stride, + ref_buf - 1, + ref_buf + 1, + ref_buf + ref_stride, }; cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad); @@ -1911,6 +2424,10 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, tmp_mv->row *= 8; tmp_mv->col *= 8; + vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); + clamp_mv(tmp_mv, subpel_mv_limits.col_min, subpel_mv_limits.col_max, + subpel_mv_limits.row_min, subpel_mv_limits.row_max); + if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; @@ -1919,11 +2436,92 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, return best_sad; } +static int get_exhaustive_threshold(int exhaustive_searches_thresh, + BLOCK_SIZE bsize) { + return exhaustive_searches_thresh >> + (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize])); +} + +#if CONFIG_NON_GREEDY_MV // Runs sequence of diamond searches in smaller steps for RD. /* do_refine: If last step (1-away) of n-step search doesn't pick the center point as the best match, we will do a final 1-away diamond refining search */ -static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, +int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, MV *mvp_full, int step_param, + int lambda, int do_refine, + const int_mv *nb_full_mvs, int full_mv_num, + MV *best_mv) { + const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; + const SPEED_FEATURES *const sf = &cpi->sf; + int n, num00 = 0; + int thissme; + int bestsme; + const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param; + const MV center_mv = { 0, 0 }; + vpx_clear_system_state(); + diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param, lambda, + &n, fn_ptr, nb_full_mvs, full_mv_num); + + bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0); + + // If there won't be more n-step search, check to see if refining search is + // needed. + if (n > further_steps) do_refine = 0; + + while (n < further_steps) { + ++n; + if (num00) { + num00--; + } else { + MV temp_mv; + diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv, + step_param + n, lambda, &num00, fn_ptr, + nb_full_mvs, full_mv_num); + thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0); + // check to see if refining search is needed. + if (num00 > further_steps - n) do_refine = 0; + + if (thissme < bestsme) { + bestsme = thissme; + *best_mv = temp_mv; + } + } + } + + // final 1-away diamond refining search + if (do_refine) { + const int search_range = 8; + MV temp_mv = *best_mv; + vp9_refining_search_sad_new(x, &temp_mv, lambda, search_range, fn_ptr, + nb_full_mvs, full_mv_num); + thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0); + if (thissme < bestsme) { + bestsme = thissme; + *best_mv = temp_mv; + } + } + + if (sf->exhaustive_searches_thresh < INT_MAX && + !cpi->rc.is_src_frame_alt_ref) { + const int64_t exhaustive_thr = + get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize); + if (bestsme > exhaustive_thr) { + full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda, + nb_full_mvs, full_mv_num); + bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0); + } + } + return bestsme; +} +#endif // CONFIG_NON_GREEDY_MV + +// Runs sequence of diamond searches in smaller steps for RD. +/* do_refine: If last step (1-away) of n-step search doesn't pick the center + point as the best match, we will do a final 1-away diamond + refining search */ +static int full_pixel_diamond(const VP9_COMP *const cpi, + const MACROBLOCK *const x, MV *mvp_full, int step_param, int sadpb, int further_steps, int do_refine, int *cost_list, const vp9_variance_fn_ptr_t *fn_ptr, @@ -1983,13 +2581,11 @@ static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, return bestsme; } -#define MIN_RANGE 7 -#define MAX_RANGE 256 -#define MIN_INTERVAL 1 // Runs an limited range exhaustive mesh search using a pattern set // according to the encode speed profile. -static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x, - MV *centre_mv_full, int sadpb, int *cost_list, +static int full_pixel_exhaustive(const VP9_COMP *const cpi, + const MACROBLOCK *const x, MV *centre_mv_full, + int sadpb, int *cost_list, const vp9_variance_fn_ptr_t *fn_ptr, const MV *ref_mv, MV *dst_mv) { const SPEED_FEATURES *const sf = &cpi->sf; @@ -2015,7 +2611,7 @@ static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x, interval = VPXMAX(interval, range / baseline_interval_divisor); // initial search - bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval, + bestsme = exhaustive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval, sadpb, fn_ptr, &temp_mv); if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { @@ -2023,7 +2619,7 @@ static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x, // till we reach a step size of 1. Then break out. for (i = 1; i < MAX_MESH_STEP; ++i) { // First pass with coarser step and longer range - bestsme = exhuastive_mesh_search( + bestsme = exhaustive_mesh_search( x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range, sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv); @@ -2042,6 +2638,91 @@ static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x, return bestsme; } +#if CONFIG_NON_GREEDY_MV +int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, + int lambda, int search_range, + const vp9_variance_fn_ptr_t *fn_ptr, + const int_mv *nb_full_mvs, + int full_mv_num) { + const MACROBLOCKD *const xd = &x->e_mbd; + const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + const uint8_t *best_address = get_buf_from_mv(in_what, best_full_mv); + int64_t best_sad; + int i, j; + vpx_clear_system_state(); + { + const int64_t mv_dist = (int64_t)fn_ptr->sdf(what->buf, what->stride, + best_address, in_what->stride) + << LOG2_PRECISION; + const int64_t mv_cost = + vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num); + best_sad = mv_dist + lambda * mv_cost; + } + + for (i = 0; i < search_range; i++) { + int best_site = -1; + const int all_in = ((best_full_mv->row - 1) > x->mv_limits.row_min) & + ((best_full_mv->row + 1) < x->mv_limits.row_max) & + ((best_full_mv->col - 1) > x->mv_limits.col_min) & + ((best_full_mv->col + 1) < x->mv_limits.col_max); + + if (all_in) { + unsigned int sads[4]; + const uint8_t *const positions[4] = { best_address - in_what->stride, + best_address - 1, best_address + 1, + best_address + in_what->stride }; + + fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); + + for (j = 0; j < 4; ++j) { + const MV mv = { best_full_mv->row + neighbors[j].row, + best_full_mv->col + neighbors[j].col }; + const int64_t mv_dist = (int64_t)sads[j] << LOG2_PRECISION; + const int64_t mv_cost = + vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + const int64_t thissad = mv_dist + lambda * mv_cost; + if (thissad < best_sad) { + best_sad = thissad; + best_site = j; + } + } + } else { + for (j = 0; j < 4; ++j) { + const MV mv = { best_full_mv->row + neighbors[j].row, + best_full_mv->col + neighbors[j].col }; + + if (is_mv_in(&x->mv_limits, &mv)) { + const int64_t mv_dist = + (int64_t)fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), + in_what->stride) + << LOG2_PRECISION; + const int64_t mv_cost = + vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + const int64_t thissad = mv_dist + lambda * mv_cost; + if (thissad < best_sad) { + best_sad = thissad; + best_site = j; + } + } + } + } + + if (best_site == -1) { + break; + } else { + best_full_mv->row += neighbors[best_site].row; + best_full_mv->col += neighbors[best_site].col; + best_address = get_buf_from_mv(in_what, best_full_mv); + } + } + + return best_sad; +} +#endif // CONFIG_NON_GREEDY_MV + int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, @@ -2167,14 +2848,16 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, return best_sad; } -int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, - MV *mvp_full, int step_param, int search_method, - int error_per_bit, int *cost_list, const MV *ref_mv, - MV *tmp_mv, int var_max, int rd) { +int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x, + BLOCK_SIZE bsize, MV *mvp_full, int step_param, + int search_method, int error_per_bit, int *cost_list, + const MV *ref_mv, MV *tmp_mv, int var_max, int rd) { const SPEED_FEATURES *const sf = &cpi->sf; const SEARCH_METHODS method = (SEARCH_METHODS)search_method; - vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; + const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; int var = 0; + int run_exhaustive_search = 0; + if (cost_list) { cost_list[0] = INT_MAX; cost_list[1] = INT_MAX; @@ -2205,35 +2888,39 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, fn_ptr, 1, ref_mv, tmp_mv); break; case NSTEP: + case MESH: var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, MAX_MVSEARCH_STEPS - 1 - step_param, 1, cost_list, fn_ptr, ref_mv, tmp_mv); - - // Should we allow a follow on exhaustive search? - if ((sf->exhaustive_searches_thresh < INT_MAX) && - !cpi->rc.is_src_frame_alt_ref) { - int64_t exhuastive_thr = sf->exhaustive_searches_thresh; - exhuastive_thr >>= - 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); - - // Threshold variance for an exhaustive full search. - if (var > exhuastive_thr) { - int var_ex; - MV tmp_mv_ex; - var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit, - cost_list, fn_ptr, ref_mv, &tmp_mv_ex); - - if (var_ex < var) { - var = var_ex; - *tmp_mv = tmp_mv_ex; - } - } - } break; - default: assert(0 && "Invalid search method."); + default: assert(0 && "Unknown search method"); } - if (method != NSTEP && rd && var < var_max) + if (method == NSTEP) { + if (sf->exhaustive_searches_thresh < INT_MAX && + !cpi->rc.is_src_frame_alt_ref) { + const int64_t exhaustive_thr = + get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize); + if (var > exhaustive_thr) { + run_exhaustive_search = 1; + } + } + } else if (method == MESH) { + run_exhaustive_search = 1; + } + + if (run_exhaustive_search) { + int var_ex; + MV tmp_mv_ex; + var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit, cost_list, + fn_ptr, ref_mv, &tmp_mv_ex); + if (var_ex < var) { + var = var_ex; + *tmp_mv = tmp_mv_ex; + } + } + + if (method != NSTEP && method != MESH && rd && var < var_max) var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1); return var; @@ -2274,7 +2961,8 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, (void)tc; \ (void)sse; \ (void)thismse; \ - (void)cost_list; + (void)cost_list; \ + (void)use_accurate_subpel_search; // Return the maximum MV. uint32_t vp9_return_max_sub_pixel_mv( @@ -2282,7 +2970,7 @@ uint32_t vp9_return_max_sub_pixel_mv( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { COMMON_MV_TEST; (void)minr; @@ -2304,7 +2992,7 @@ uint32_t vp9_return_min_sub_pixel_mv( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { COMMON_MV_TEST; (void)maxr; diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_mcomp.h b/media/libvpx/libvpx/vp9/encoder/vp9_mcomp.h index b8db2c353688..0c4d8f23c6a8 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_mcomp.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_mcomp.h @@ -8,10 +8,13 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_MCOMP_H_ -#define VP9_ENCODER_VP9_MCOMP_H_ +#ifndef VPX_VP9_ENCODER_VP9_MCOMP_H_ +#define VPX_VP9_ENCODER_VP9_MCOMP_H_ #include "vp9/encoder/vp9_block.h" +#if CONFIG_NON_GREEDY_MV +#include "vp9/encoder/vp9_non_greedy_mv.h" +#endif // CONFIG_NON_GREEDY_MV #include "vpx_dsp/variance.h" #ifdef __cplusplus @@ -38,6 +41,11 @@ typedef struct search_site_config { int total_steps; } search_site_config; +static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, + const MV *mv) { + return &buf->buf[mv->row * buf->stride + mv->col]; +} + void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride); void vp9_init3smotion_compensation(search_site_config *cfg, int stride); @@ -59,14 +67,15 @@ struct SPEED_FEATURES; int vp9_init_search_range(int size); int vp9_refining_search_sad(const struct macroblock *x, struct mv *ref_mv, - int sad_per_bit, int distance, + int error_per_bit, int search_range, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); // Perform integral projection based motion estimation. unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, - int mi_row, int mi_col); + int mi_row, int mi_col, + const MV *ref_mv); typedef uint32_t(fractional_mv_step_fp)( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, @@ -74,7 +83,7 @@ typedef uint32_t(fractional_mv_step_fp)( int forced_stop, // 0 - full, 1 - qtr only, 2 - half only int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h); + int h, int use_accurate_subpel_search); extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned; @@ -106,7 +115,11 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, struct VP9_COMP; -int vp9_full_pixel_search(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, +// "mvp_full" is the MV search starting point; +// "ref_mv" is the context reference MV; +// "tmp_mv" is the searched best MV. +int vp9_full_pixel_search(const struct VP9_COMP *const cpi, + const MACROBLOCK *const x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int search_method, int error_per_bit, int *cost_list, const MV *ref_mv, MV *tmp_mv, int var_max, int rd); @@ -115,8 +128,55 @@ void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits, const MvLimits *umv_window_limits, const MV *ref_mv); +#if CONFIG_NON_GREEDY_MV +struct TplDepStats; +int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, + int lambda, int search_range, + const vp9_variance_fn_ptr_t *fn_ptr, + const int_mv *nb_full_mvs, int full_mv_num); + +int vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, MV *mvp_full, int step_param, + int lambda, int do_refine, + const int_mv *nb_full_mvs, int full_mv_num, + MV *best_mv); + +static INLINE MV get_full_mv(const MV *mv) { + MV out_mv; + out_mv.row = mv->row >> 3; + out_mv.col = mv->col >> 3; + return out_mv; +} +struct TplDepFrame; +int vp9_prepare_nb_full_mvs(const struct MotionField *motion_field, int mi_row, + int mi_col, int_mv *nb_full_mvs); + +static INLINE BLOCK_SIZE get_square_block_size(BLOCK_SIZE bsize) { + BLOCK_SIZE square_bsize; + switch (bsize) { + case BLOCK_4X4: + case BLOCK_4X8: + case BLOCK_8X4: square_bsize = BLOCK_4X4; break; + case BLOCK_8X8: + case BLOCK_8X16: + case BLOCK_16X8: square_bsize = BLOCK_8X8; break; + case BLOCK_16X16: + case BLOCK_16X32: + case BLOCK_32X16: square_bsize = BLOCK_16X16; break; + case BLOCK_32X32: + case BLOCK_32X64: + case BLOCK_64X32: + case BLOCK_64X64: square_bsize = BLOCK_32X32; break; + default: + square_bsize = BLOCK_INVALID; + assert(0 && "ERROR: invalid block size"); + break; + } + return square_bsize; +} +#endif // CONFIG_NON_GREEDY_MV #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_ENCODER_VP9_MCOMP_H_ +#endif // VPX_VP9_ENCODER_VP9_MCOMP_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_multi_thread.c b/media/libvpx/libvpx/vp9/encoder/vp9_multi_thread.c index da06fb151d8b..c66c03549289 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_multi_thread.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_multi_thread.c @@ -13,6 +13,7 @@ #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_ethread.h" #include "vp9/encoder/vp9_multi_thread.h" +#include "vp9/encoder/vp9_temporal_filter.h" void *vp9_enc_grp_get_next_job(MultiThreadHandle *multi_thread_ctxt, int tile_id) { @@ -50,6 +51,20 @@ void *vp9_enc_grp_get_next_job(MultiThreadHandle *multi_thread_ctxt, return job_info; } +void vp9_row_mt_alloc_rd_thresh(VP9_COMP *const cpi, + TileDataEnc *const this_tile) { + VP9_COMMON *const cm = &cpi->common; + const int sb_rows = + (mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2) + 1; + int i; + + this_tile->row_base_thresh_freq_fact = + (int *)vpx_calloc(sb_rows * BLOCK_SIZES * MAX_MODES, + sizeof(*(this_tile->row_base_thresh_freq_fact))); + for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++) + this_tile->row_base_thresh_freq_fact[i] = RD_THRESH_INIT_FACT; +} + void vp9_row_mt_mem_alloc(VP9_COMP *cpi) { struct VP9Common *cm = &cpi->common; MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; @@ -59,6 +74,8 @@ void vp9_row_mt_mem_alloc(VP9_COMP *cpi) { const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; int jobs_per_tile_col, total_jobs; + // Allocate memory that is large enough for all row_mt stages. First pass + // uses 16x16 block size. jobs_per_tile_col = VPXMAX(cm->mb_rows, sb_rows); // Calculate the total number of jobs total_jobs = jobs_per_tile_col * tile_cols; @@ -83,14 +100,11 @@ void vp9_row_mt_mem_alloc(VP9_COMP *cpi) { TileDataEnc *this_tile = &cpi->tile_data[tile_col]; vp9_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, jobs_per_tile_col); if (cpi->sf.adaptive_rd_thresh_row_mt) { - const int sb_rows = - (mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2) + 1; - int i; - this_tile->row_base_thresh_freq_fact = - (int *)vpx_calloc(sb_rows * BLOCK_SIZES * MAX_MODES, - sizeof(*(this_tile->row_base_thresh_freq_fact))); - for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++) - this_tile->row_base_thresh_freq_fact[i] = RD_THRESH_INIT_FACT; + if (this_tile->row_base_thresh_freq_fact != NULL) { + vpx_free(this_tile->row_base_thresh_freq_fact); + this_tile->row_base_thresh_freq_fact = NULL; + } + vp9_row_mt_alloc_rd_thresh(cpi, this_tile); } } @@ -146,11 +160,9 @@ void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) { TileDataEnc *this_tile = &cpi->tile_data[tile_row * multi_thread_ctxt->allocated_tile_cols + tile_col]; - if (cpi->sf.adaptive_rd_thresh_row_mt) { - if (this_tile->row_base_thresh_freq_fact != NULL) { - vpx_free(this_tile->row_base_thresh_freq_fact); - this_tile->row_base_thresh_freq_fact = NULL; - } + if (this_tile->row_base_thresh_freq_fact != NULL) { + vpx_free(this_tile->row_base_thresh_freq_fact); + this_tile->row_base_thresh_freq_fact = NULL; } } } @@ -219,11 +231,19 @@ void vp9_prepare_job_queue(VP9_COMP *cpi, JOB_TYPE job_type) { MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; JobQueue *job_queue = multi_thread_ctxt->job_queue; const int tile_cols = 1 << cm->log2_tile_cols; - int job_row_num, jobs_per_tile, jobs_per_tile_col, total_jobs; + int job_row_num, jobs_per_tile, jobs_per_tile_col = 0, total_jobs; const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; int tile_col, i; - jobs_per_tile_col = (job_type != ENCODE_JOB) ? cm->mb_rows : sb_rows; + switch (job_type) { + case ENCODE_JOB: jobs_per_tile_col = sb_rows; break; + case FIRST_PASS_JOB: jobs_per_tile_col = cm->mb_rows; break; + case ARNR_JOB: + jobs_per_tile_col = ((cm->mi_rows + TF_ROUND) >> TF_SHIFT); + break; + default: assert(0); + } + total_jobs = jobs_per_tile_col * tile_cols; multi_thread_ctxt->jobs_per_tile_col = jobs_per_tile_col; diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_multi_thread.h b/media/libvpx/libvpx/vp9/encoder/vp9_multi_thread.h index bfc0c0ae4f69..a2276f4fe63e 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_multi_thread.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_multi_thread.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_MULTI_THREAD_H -#define VP9_ENCODER_VP9_MULTI_THREAD_H +#ifndef VPX_VP9_ENCODER_VP9_MULTI_THREAD_H_ +#define VPX_VP9_ENCODER_VP9_MULTI_THREAD_H_ #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_job_queue.h" @@ -29,10 +29,13 @@ void vp9_multi_thread_tile_init(VP9_COMP *cpi); void vp9_row_mt_mem_alloc(VP9_COMP *cpi); +void vp9_row_mt_alloc_rd_thresh(VP9_COMP *const cpi, + TileDataEnc *const this_tile); + void vp9_row_mt_mem_dealloc(VP9_COMP *cpi); int vp9_get_tiles_proc_status(MultiThreadHandle *multi_thread_ctxt, int *tile_completion_status, int *cur_tile_id, int tile_cols); -#endif // VP9_ENCODER_VP9_MULTI_THREAD_H +#endif // VPX_VP9_ENCODER_VP9_MULTI_THREAD_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_noise_estimate.c b/media/libvpx/libvpx/vp9/encoder/vp9_noise_estimate.c index 276a0c785259..9696529c5041 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_noise_estimate.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_noise_estimate.c @@ -32,7 +32,7 @@ static INLINE int noise_est_svc(const struct VP9_COMP *const cpi) { void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height) { ne->enabled = 0; - ne->level = kLowLow; + ne->level = (width * height < 1280 * 720) ? kLowLow : kLow; ne->value = 0; ne->count = 0; ne->thresh = 90; @@ -46,6 +46,7 @@ void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height) { ne->thresh = 115; } ne->num_frames_estimate = 15; + ne->adapt_thresh = (3 * ne->thresh) >> 1; } static int enable_noise_estimation(VP9_COMP *const cpi) { @@ -97,7 +98,7 @@ NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne) { } else { if (ne->value > ne->thresh) noise_level = kMedium; - else if (ne->value > ((9 * ne->thresh) >> 4)) + else if (ne->value > (ne->thresh >> 1)) noise_level = kLow; else noise_level = kLowLow; @@ -112,10 +113,6 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { // Estimate of noise level every frame_period frames. int frame_period = 8; int thresh_consec_zeromv = 6; - unsigned int thresh_sum_diff = 100; - unsigned int thresh_sum_spatial = (200 * 200) << 8; - unsigned int thresh_spatial_var = (32 * 32) << 8; - int min_blocks_estimate = cm->mi_rows * cm->mi_cols >> 7; int frame_counter = cm->current_video_frame; // Estimate is between current source and last source. YV12_BUFFER_CONFIG *last_source = cpi->Last_Source; @@ -124,11 +121,8 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { last_source = &cpi->denoiser.last_source; // Tune these thresholds for different resolutions when denoising is // enabled. - if (cm->width > 640 && cm->width < 1920) { - thresh_consec_zeromv = 4; - thresh_sum_diff = 200; - thresh_sum_spatial = (120 * 120) << 8; - thresh_spatial_var = (48 * 48) << 8; + if (cm->width > 640 && cm->width <= 1920) { + thresh_consec_zeromv = 2; } } #endif @@ -148,8 +142,10 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { ne->last_h = cm->height; } return; - } else if (cm->current_video_frame > 60 && - cpi->rc.avg_frame_low_motion < (low_res ? 70 : 50)) { + } else if (frame_counter > 60 && cpi->svc.num_encoded_top_layer > 1 && + cpi->rc.frames_since_key > cpi->svc.number_spatial_layers && + cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 && + cpi->rc.avg_frame_low_motion < (low_res ? 60 : 40)) { // Force noise estimation to 0 and denoiser off if content has high motion. ne->level = kLowLow; ne->count = 0; @@ -157,17 +153,19 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) && cpi->svc.current_superframe > 1) { - vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level); + vp9_denoiser_set_noise_level(cpi, ne->level); copy_frame(&cpi->denoiser.last_source, cpi->Source); } #endif return; } else { - int num_samples = 0; - uint64_t avg_est = 0; + unsigned int bin_size = 100; + unsigned int hist[MAX_VAR_HIST_BINS] = { 0 }; + unsigned int hist_avg[MAX_VAR_HIST_BINS]; + unsigned int max_bin = 0; + unsigned int max_bin_count = 0; + unsigned int bin_cnt; int bsize = BLOCK_16X16; - static const unsigned char const_source[16] = { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 }; // Loop over sub-sample of 16x16 blocks of frame, and for blocks that have // been encoded as zero/small mv at least x consecutive frames, compute // the variance to update estimate of noise in the source. @@ -207,8 +205,11 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { // Only consider blocks that are likely steady background. i.e, have // been encoded as zero/low motion x (= thresh_consec_zeromv) frames // in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all - // 4 sub-blocks for 16x16 block. Also, avoid skin blocks. - if (frame_low_motion && consec_zeromv > thresh_consec_zeromv) { + // 4 sub-blocks for 16x16 block. And exclude this frame if + // high_source_sad is true (i.e., scene/content change). + if (frame_low_motion && consec_zeromv > thresh_consec_zeromv && + !cpi->rc.high_source_sad && + !cpi->svc.high_source_sad_superframe) { int is_skin = 0; if (cpi->use_skin_detection) { is_skin = @@ -217,25 +218,15 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { } if (!is_skin) { unsigned int sse; - // Compute variance. + // Compute variance between co-located blocks from current and + // last input frames. unsigned int variance = cpi->fn_ptr[bsize].vf( src_y, src_ystride, last_src_y, last_src_ystride, &sse); - // Only consider this block as valid for noise measurement if the - // average term (sse - variance = N * avg^{2}, N = 16X16) of the - // temporal residual is small (avoid effects from lighting - // change). - if ((sse - variance) < thresh_sum_diff) { - unsigned int sse2; - const unsigned int spatial_variance = cpi->fn_ptr[bsize].vf( - src_y, src_ystride, const_source, 0, &sse2); - // Avoid blocks with high brightness and high spatial variance. - if ((sse2 - spatial_variance) < thresh_sum_spatial && - spatial_variance < thresh_spatial_var) { - avg_est += low_res ? variance >> 4 - : variance / ((spatial_variance >> 9) + 1); - num_samples++; - } - } + unsigned int hist_index = variance / bin_size; + if (hist_index < MAX_VAR_HIST_BINS) + hist[hist_index]++; + else if (hist_index < 3 * (MAX_VAR_HIST_BINS >> 1)) + hist[MAX_VAR_HIST_BINS - 1]++; // Account for the tail } } } @@ -251,26 +242,58 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { } ne->last_w = cm->width; ne->last_h = cm->height; - // Update noise estimate if we have at a minimum number of block samples, - // and avg_est > 0 (avg_est == 0 can happen if the application inputs - // duplicate frames). - if (num_samples > min_blocks_estimate && avg_est > 0) { - // Normalize. - avg_est = avg_est / num_samples; - // Update noise estimate. - ne->value = (int)((15 * ne->value + avg_est) >> 4); - ne->count++; - if (ne->count == ne->num_frames_estimate) { - // Reset counter and check noise level condition. - ne->num_frames_estimate = 30; - ne->count = 0; - ne->level = vp9_noise_estimate_extract_level(ne); -#if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) - vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level); -#endif + // Adjust histogram to account for effect that histogram flattens + // and shifts to zero as scene darkens. + if (hist[0] > 10 && (hist[MAX_VAR_HIST_BINS - 1] > hist[0] >> 2)) { + hist[0] = 0; + hist[1] >>= 2; + hist[2] >>= 2; + hist[3] >>= 2; + hist[4] >>= 1; + hist[5] >>= 1; + hist[6] = 3 * hist[6] >> 1; + hist[MAX_VAR_HIST_BINS - 1] >>= 1; + } + + // Average hist[] and find largest bin + for (bin_cnt = 0; bin_cnt < MAX_VAR_HIST_BINS; bin_cnt++) { + if (bin_cnt == 0) + hist_avg[bin_cnt] = (hist[0] + hist[1] + hist[2]) / 3; + else if (bin_cnt == MAX_VAR_HIST_BINS - 1) + hist_avg[bin_cnt] = hist[MAX_VAR_HIST_BINS - 1] >> 2; + else if (bin_cnt == MAX_VAR_HIST_BINS - 2) + hist_avg[bin_cnt] = (hist[bin_cnt - 1] + 2 * hist[bin_cnt] + + (hist[bin_cnt + 1] >> 1) + 2) >> + 2; + else + hist_avg[bin_cnt] = + (hist[bin_cnt - 1] + 2 * hist[bin_cnt] + hist[bin_cnt + 1] + 2) >> + 2; + + if (hist_avg[bin_cnt] > max_bin_count) { + max_bin_count = hist_avg[bin_cnt]; + max_bin = bin_cnt; } } + + // Scale by 40 to work with existing thresholds + ne->value = (int)((3 * ne->value + max_bin * 40) >> 2); + // Quickly increase VNR strength when the noise level increases suddenly. + if (ne->level < kMedium && ne->value > ne->adapt_thresh) { + ne->count = ne->num_frames_estimate; + } else { + ne->count++; + } + if (ne->count == ne->num_frames_estimate) { + // Reset counter and check noise level condition. + ne->num_frames_estimate = 30; + ne->count = 0; + ne->level = vp9_noise_estimate_extract_level(ne); +#if CONFIG_VP9_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) + vp9_denoiser_set_noise_level(cpi, ne->level); +#endif + } } #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_noise_estimate.h b/media/libvpx/libvpx/vp9/encoder/vp9_noise_estimate.h index 335cdbe64342..7fc94ff8c95a 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_noise_estimate.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_noise_estimate.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_NOISE_ESTIMATE_H_ -#define VP9_ENCODER_NOISE_ESTIMATE_H_ +#ifndef VPX_VP9_ENCODER_VP9_NOISE_ESTIMATE_H_ +#define VPX_VP9_ENCODER_VP9_NOISE_ESTIMATE_H_ #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_skin_detection.h" @@ -23,6 +23,8 @@ extern "C" { #endif +#define MAX_VAR_HIST_BINS 20 + typedef enum noise_level { kLowLow, kLow, kMedium, kHigh } NOISE_LEVEL; typedef struct noise_estimate { @@ -30,6 +32,7 @@ typedef struct noise_estimate { NOISE_LEVEL level; int value; int thresh; + int adapt_thresh; int count; int last_w; int last_h; @@ -48,4 +51,4 @@ void vp9_update_noise_estimate(struct VP9_COMP *const cpi); } // extern "C" #endif -#endif // VP9_ENCODER_NOISE_ESTIMATE_H_ +#endif // VPX_VP9_ENCODER_VP9_NOISE_ESTIMATE_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_non_greedy_mv.c b/media/libvpx/libvpx/vp9/encoder/vp9_non_greedy_mv.c new file mode 100644 index 000000000000..4679d6c49c4b --- /dev/null +++ b/media/libvpx/libvpx/vp9/encoder/vp9_non_greedy_mv.c @@ -0,0 +1,533 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_mv.h" +#include "vp9/encoder/vp9_non_greedy_mv.h" +// TODO(angiebird): move non_greedy_mv related functions to this file + +#define LOG2_TABLE_SIZE 1024 +static const int log2_table[LOG2_TABLE_SIZE] = { + 0, // This is a dummy value + 0, 1048576, 1661954, 2097152, 2434718, 2710530, 2943725, + 3145728, 3323907, 3483294, 3627477, 3759106, 3880192, 3992301, + 4096672, 4194304, 4286015, 4372483, 4454275, 4531870, 4605679, + 4676053, 4743299, 4807682, 4869436, 4928768, 4985861, 5040877, + 5093962, 5145248, 5194851, 5242880, 5289431, 5334591, 5378443, + 5421059, 5462508, 5502851, 5542146, 5580446, 5617800, 5654255, + 5689851, 5724629, 5758625, 5791875, 5824409, 5856258, 5887450, + 5918012, 5947969, 5977344, 6006160, 6034437, 6062195, 6089453, + 6116228, 6142538, 6168398, 6193824, 6218829, 6243427, 6267632, + 6291456, 6314910, 6338007, 6360756, 6383167, 6405252, 6427019, + 6448477, 6469635, 6490501, 6511084, 6531390, 6551427, 6571202, + 6590722, 6609993, 6629022, 6647815, 6666376, 6684713, 6702831, + 6720734, 6738427, 6755916, 6773205, 6790299, 6807201, 6823917, + 6840451, 6856805, 6872985, 6888993, 6904834, 6920510, 6936026, + 6951384, 6966588, 6981641, 6996545, 7011304, 7025920, 7040397, + 7054736, 7068940, 7083013, 7096956, 7110771, 7124461, 7138029, + 7151476, 7164804, 7178017, 7191114, 7204100, 7216974, 7229740, + 7242400, 7254954, 7267405, 7279754, 7292003, 7304154, 7316208, + 7328167, 7340032, 7351805, 7363486, 7375079, 7386583, 7398000, + 7409332, 7420579, 7431743, 7442826, 7453828, 7464751, 7475595, + 7486362, 7497053, 7507669, 7518211, 7528680, 7539077, 7549404, + 7559660, 7569847, 7579966, 7590017, 7600003, 7609923, 7619778, + 7629569, 7639298, 7648964, 7658569, 7668114, 7677598, 7687023, + 7696391, 7705700, 7714952, 7724149, 7733289, 7742375, 7751407, + 7760385, 7769310, 7778182, 7787003, 7795773, 7804492, 7813161, + 7821781, 7830352, 7838875, 7847350, 7855777, 7864158, 7872493, + 7880782, 7889027, 7897226, 7905381, 7913492, 7921561, 7929586, + 7937569, 7945510, 7953410, 7961268, 7969086, 7976864, 7984602, + 7992301, 7999960, 8007581, 8015164, 8022709, 8030217, 8037687, + 8045121, 8052519, 8059880, 8067206, 8074496, 8081752, 8088973, + 8096159, 8103312, 8110431, 8117516, 8124569, 8131589, 8138576, + 8145532, 8152455, 8159347, 8166208, 8173037, 8179836, 8186605, + 8193343, 8200052, 8206731, 8213380, 8220001, 8226593, 8233156, + 8239690, 8246197, 8252676, 8259127, 8265550, 8271947, 8278316, + 8284659, 8290976, 8297266, 8303530, 8309768, 8315981, 8322168, + 8328330, 8334467, 8340579, 8346667, 8352730, 8358769, 8364784, + 8370775, 8376743, 8382687, 8388608, 8394506, 8400381, 8406233, + 8412062, 8417870, 8423655, 8429418, 8435159, 8440878, 8446576, + 8452252, 8457908, 8463542, 8469155, 8474748, 8480319, 8485871, + 8491402, 8496913, 8502404, 8507875, 8513327, 8518759, 8524171, + 8529564, 8534938, 8540293, 8545629, 8550947, 8556245, 8561525, + 8566787, 8572031, 8577256, 8582464, 8587653, 8592825, 8597980, + 8603116, 8608236, 8613338, 8618423, 8623491, 8628542, 8633576, + 8638593, 8643594, 8648579, 8653547, 8658499, 8663434, 8668354, + 8673258, 8678145, 8683017, 8687874, 8692715, 8697540, 8702350, + 8707145, 8711925, 8716690, 8721439, 8726174, 8730894, 8735599, + 8740290, 8744967, 8749628, 8754276, 8758909, 8763528, 8768134, + 8772725, 8777302, 8781865, 8786415, 8790951, 8795474, 8799983, + 8804478, 8808961, 8813430, 8817886, 8822328, 8826758, 8831175, + 8835579, 8839970, 8844349, 8848715, 8853068, 8857409, 8861737, + 8866053, 8870357, 8874649, 8878928, 8883195, 8887451, 8891694, + 8895926, 8900145, 8904353, 8908550, 8912734, 8916908, 8921069, + 8925220, 8929358, 8933486, 8937603, 8941708, 8945802, 8949885, + 8953957, 8958018, 8962068, 8966108, 8970137, 8974155, 8978162, + 8982159, 8986145, 8990121, 8994086, 8998041, 9001986, 9005920, + 9009844, 9013758, 9017662, 9021556, 9025440, 9029314, 9033178, + 9037032, 9040877, 9044711, 9048536, 9052352, 9056157, 9059953, + 9063740, 9067517, 9071285, 9075044, 9078793, 9082533, 9086263, + 9089985, 9093697, 9097400, 9101095, 9104780, 9108456, 9112123, + 9115782, 9119431, 9123072, 9126704, 9130328, 9133943, 9137549, + 9141146, 9144735, 9148316, 9151888, 9155452, 9159007, 9162554, + 9166092, 9169623, 9173145, 9176659, 9180165, 9183663, 9187152, + 9190634, 9194108, 9197573, 9201031, 9204481, 9207923, 9211357, + 9214784, 9218202, 9221613, 9225017, 9228412, 9231800, 9235181, + 9238554, 9241919, 9245277, 9248628, 9251971, 9255307, 9258635, + 9261956, 9265270, 9268577, 9271876, 9275169, 9278454, 9281732, + 9285002, 9288266, 9291523, 9294773, 9298016, 9301252, 9304481, + 9307703, 9310918, 9314126, 9317328, 9320523, 9323711, 9326892, + 9330067, 9333235, 9336397, 9339552, 9342700, 9345842, 9348977, + 9352106, 9355228, 9358344, 9361454, 9364557, 9367654, 9370744, + 9373828, 9376906, 9379978, 9383043, 9386102, 9389155, 9392202, + 9395243, 9398278, 9401306, 9404329, 9407345, 9410356, 9413360, + 9416359, 9419351, 9422338, 9425319, 9428294, 9431263, 9434226, + 9437184, 9440136, 9443082, 9446022, 9448957, 9451886, 9454809, + 9457726, 9460638, 9463545, 9466446, 9469341, 9472231, 9475115, + 9477994, 9480867, 9483735, 9486597, 9489454, 9492306, 9495152, + 9497993, 9500828, 9503659, 9506484, 9509303, 9512118, 9514927, + 9517731, 9520530, 9523324, 9526112, 9528895, 9531674, 9534447, + 9537215, 9539978, 9542736, 9545489, 9548237, 9550980, 9553718, + 9556451, 9559179, 9561903, 9564621, 9567335, 9570043, 9572747, + 9575446, 9578140, 9580830, 9583514, 9586194, 9588869, 9591540, + 9594205, 9596866, 9599523, 9602174, 9604821, 9607464, 9610101, + 9612735, 9615363, 9617987, 9620607, 9623222, 9625832, 9628438, + 9631040, 9633637, 9636229, 9638818, 9641401, 9643981, 9646556, + 9649126, 9651692, 9654254, 9656812, 9659365, 9661914, 9664459, + 9666999, 9669535, 9672067, 9674594, 9677118, 9679637, 9682152, + 9684663, 9687169, 9689672, 9692170, 9694665, 9697155, 9699641, + 9702123, 9704601, 9707075, 9709545, 9712010, 9714472, 9716930, + 9719384, 9721834, 9724279, 9726721, 9729159, 9731593, 9734024, + 9736450, 9738872, 9741291, 9743705, 9746116, 9748523, 9750926, + 9753326, 9755721, 9758113, 9760501, 9762885, 9765266, 9767642, + 9770015, 9772385, 9774750, 9777112, 9779470, 9781825, 9784175, + 9786523, 9788866, 9791206, 9793543, 9795875, 9798204, 9800530, + 9802852, 9805170, 9807485, 9809797, 9812104, 9814409, 9816710, + 9819007, 9821301, 9823591, 9825878, 9828161, 9830441, 9832718, + 9834991, 9837261, 9839527, 9841790, 9844050, 9846306, 9848559, + 9850808, 9853054, 9855297, 9857537, 9859773, 9862006, 9864235, + 9866462, 9868685, 9870904, 9873121, 9875334, 9877544, 9879751, + 9881955, 9884155, 9886352, 9888546, 9890737, 9892925, 9895109, + 9897291, 9899469, 9901644, 9903816, 9905985, 9908150, 9910313, + 9912473, 9914629, 9916783, 9918933, 9921080, 9923225, 9925366, + 9927504, 9929639, 9931771, 9933900, 9936027, 9938150, 9940270, + 9942387, 9944502, 9946613, 9948721, 9950827, 9952929, 9955029, + 9957126, 9959219, 9961310, 9963398, 9965484, 9967566, 9969645, + 9971722, 9973796, 9975866, 9977934, 9980000, 9982062, 9984122, + 9986179, 9988233, 9990284, 9992332, 9994378, 9996421, 9998461, + 10000498, 10002533, 10004565, 10006594, 10008621, 10010644, 10012665, + 10014684, 10016700, 10018713, 10020723, 10022731, 10024736, 10026738, + 10028738, 10030735, 10032729, 10034721, 10036710, 10038697, 10040681, + 10042662, 10044641, 10046617, 10048591, 10050562, 10052530, 10054496, + 10056459, 10058420, 10060379, 10062334, 10064287, 10066238, 10068186, + 10070132, 10072075, 10074016, 10075954, 10077890, 10079823, 10081754, + 10083682, 10085608, 10087532, 10089453, 10091371, 10093287, 10095201, + 10097112, 10099021, 10100928, 10102832, 10104733, 10106633, 10108529, + 10110424, 10112316, 10114206, 10116093, 10117978, 10119861, 10121742, + 10123620, 10125495, 10127369, 10129240, 10131109, 10132975, 10134839, + 10136701, 10138561, 10140418, 10142273, 10144126, 10145976, 10147825, + 10149671, 10151514, 10153356, 10155195, 10157032, 10158867, 10160699, + 10162530, 10164358, 10166184, 10168007, 10169829, 10171648, 10173465, + 10175280, 10177093, 10178904, 10180712, 10182519, 10184323, 10186125, + 10187925, 10189722, 10191518, 10193311, 10195103, 10196892, 10198679, + 10200464, 10202247, 10204028, 10205806, 10207583, 10209357, 10211130, + 10212900, 10214668, 10216435, 10218199, 10219961, 10221721, 10223479, + 10225235, 10226989, 10228741, 10230491, 10232239, 10233985, 10235728, + 10237470, 10239210, 10240948, 10242684, 10244417, 10246149, 10247879, + 10249607, 10251333, 10253057, 10254779, 10256499, 10258217, 10259933, + 10261647, 10263360, 10265070, 10266778, 10268485, 10270189, 10271892, + 10273593, 10275292, 10276988, 10278683, 10280376, 10282068, 10283757, + 10285444, 10287130, 10288814, 10290495, 10292175, 10293853, 10295530, + 10297204, 10298876, 10300547, 10302216, 10303883, 10305548, 10307211, + 10308873, 10310532, 10312190, 10313846, 10315501, 10317153, 10318804, + 10320452, 10322099, 10323745, 10325388, 10327030, 10328670, 10330308, + 10331944, 10333578, 10335211, 10336842, 10338472, 10340099, 10341725, + 10343349, 10344971, 10346592, 10348210, 10349828, 10351443, 10353057, + 10354668, 10356279, 10357887, 10359494, 10361099, 10362702, 10364304, + 10365904, 10367502, 10369099, 10370694, 10372287, 10373879, 10375468, + 10377057, 10378643, 10380228, 10381811, 10383393, 10384973, 10386551, + 10388128, 10389703, 10391276, 10392848, 10394418, 10395986, 10397553, + 10399118, 10400682, 10402244, 10403804, 10405363, 10406920, 10408476, + 10410030, 10411582, 10413133, 10414682, 10416230, 10417776, 10419320, + 10420863, 10422404, 10423944, 10425482, 10427019, 10428554, 10430087, + 10431619, 10433149, 10434678, 10436206, 10437731, 10439256, 10440778, + 10442299, 10443819, 10445337, 10446854, 10448369, 10449882, 10451394, + 10452905, 10454414, 10455921, 10457427, 10458932, 10460435, 10461936, + 10463436, 10464935, 10466432, 10467927, 10469422, 10470914, 10472405, + 10473895, 10475383, 10476870, 10478355, 10479839, 10481322, 10482802, + 10484282, +}; + +static int mi_size_to_block_size(int mi_bsize, int mi_num) { + return (mi_num % mi_bsize) ? mi_num / mi_bsize + 1 : mi_num / mi_bsize; +} + +Status vp9_alloc_motion_field_info(MotionFieldInfo *motion_field_info, + int frame_num, int mi_rows, int mi_cols) { + int frame_idx, rf_idx, square_block_idx; + if (motion_field_info->allocated) { + // TODO(angiebird): Avoid re-allocate buffer if possible + vp9_free_motion_field_info(motion_field_info); + } + motion_field_info->frame_num = frame_num; + motion_field_info->motion_field_array = + vpx_calloc(frame_num, sizeof(*motion_field_info->motion_field_array)); + for (frame_idx = 0; frame_idx < frame_num; ++frame_idx) { + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { + for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES; + ++square_block_idx) { + BLOCK_SIZE bsize = square_block_idx_to_bsize(square_block_idx); + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int block_rows = mi_size_to_block_size(mi_height, mi_rows); + const int block_cols = mi_size_to_block_size(mi_width, mi_cols); + MotionField *motion_field = + &motion_field_info + ->motion_field_array[frame_idx][rf_idx][square_block_idx]; + Status status = + vp9_alloc_motion_field(motion_field, bsize, block_rows, block_cols); + if (status == STATUS_FAILED) { + return STATUS_FAILED; + } + } + } + } + motion_field_info->allocated = 1; + return STATUS_OK; +} + +Status vp9_alloc_motion_field(MotionField *motion_field, BLOCK_SIZE bsize, + int block_rows, int block_cols) { + Status status = STATUS_OK; + motion_field->ready = 0; + motion_field->bsize = bsize; + motion_field->block_rows = block_rows; + motion_field->block_cols = block_cols; + motion_field->block_num = block_rows * block_cols; + motion_field->mf = + vpx_calloc(motion_field->block_num, sizeof(*motion_field->mf)); + if (motion_field->mf == NULL) { + status = STATUS_FAILED; + } + motion_field->set_mv = + vpx_calloc(motion_field->block_num, sizeof(*motion_field->set_mv)); + if (motion_field->set_mv == NULL) { + vpx_free(motion_field->mf); + motion_field->mf = NULL; + status = STATUS_FAILED; + } + motion_field->local_structure = vpx_calloc( + motion_field->block_num, sizeof(*motion_field->local_structure)); + if (motion_field->local_structure == NULL) { + vpx_free(motion_field->mf); + motion_field->mf = NULL; + vpx_free(motion_field->set_mv); + motion_field->set_mv = NULL; + status = STATUS_FAILED; + } + return status; +} + +void vp9_free_motion_field(MotionField *motion_field) { + vpx_free(motion_field->mf); + vpx_free(motion_field->set_mv); + vpx_free(motion_field->local_structure); + vp9_zero(*motion_field); +} + +void vp9_free_motion_field_info(MotionFieldInfo *motion_field_info) { + if (motion_field_info->allocated) { + int frame_idx, rf_idx, square_block_idx; + for (frame_idx = 0; frame_idx < motion_field_info->frame_num; ++frame_idx) { + for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { + for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES; + ++square_block_idx) { + MotionField *motion_field = + &motion_field_info + ->motion_field_array[frame_idx][rf_idx][square_block_idx]; + vp9_free_motion_field(motion_field); + } + } + } + vpx_free(motion_field_info->motion_field_array); + motion_field_info->motion_field_array = NULL; + motion_field_info->frame_num = 0; + motion_field_info->allocated = 0; + } +} + +MotionField *vp9_motion_field_info_get_motion_field( + MotionFieldInfo *motion_field_info, int frame_idx, int rf_idx, + BLOCK_SIZE bsize) { + int square_block_idx = get_square_block_idx(bsize); + assert(frame_idx < motion_field_info->frame_num); + assert(motion_field_info->allocated == 1); + return &motion_field_info + ->motion_field_array[frame_idx][rf_idx][square_block_idx]; +} + +int vp9_motion_field_is_mv_set(const MotionField *motion_field, int brow, + int bcol) { + assert(brow >= 0 && brow < motion_field->block_rows); + assert(bcol >= 0 && bcol < motion_field->block_cols); + return motion_field->set_mv[brow * motion_field->block_cols + bcol]; +} + +int_mv vp9_motion_field_get_mv(const MotionField *motion_field, int brow, + int bcol) { + assert(brow >= 0 && brow < motion_field->block_rows); + assert(bcol >= 0 && bcol < motion_field->block_cols); + return motion_field->mf[brow * motion_field->block_cols + bcol]; +} + +int_mv vp9_motion_field_mi_get_mv(const MotionField *motion_field, int mi_row, + int mi_col) { + const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize]; + const int brow = mi_row / mi_height; + const int bcol = mi_col / mi_width; + assert(mi_row % mi_height == 0); + assert(mi_col % mi_width == 0); + return vp9_motion_field_get_mv(motion_field, brow, bcol); +} + +void vp9_motion_field_mi_set_mv(MotionField *motion_field, int mi_row, + int mi_col, int_mv mv) { + const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize]; + const int brow = mi_row / mi_height; + const int bcol = mi_col / mi_width; + assert(mi_row % mi_height == 0); + assert(mi_col % mi_width == 0); + assert(brow >= 0 && brow < motion_field->block_rows); + assert(bcol >= 0 && bcol < motion_field->block_cols); + motion_field->mf[brow * motion_field->block_cols + bcol] = mv; + motion_field->set_mv[brow * motion_field->block_cols + bcol] = 1; +} + +void vp9_motion_field_reset_mvs(MotionField *motion_field) { + memset(motion_field->set_mv, 0, + motion_field->block_num * sizeof(*motion_field->set_mv)); +} + +static int64_t log2_approximation(int64_t v) { + assert(v > 0); + if (v < LOG2_TABLE_SIZE) { + return log2_table[v]; + } else { + // use linear approximation when v >= 2^10 + const int slope = + 1477; // slope = 1 / (log(2) * 1024) * (1 << LOG2_PRECISION) + assert(LOG2_TABLE_SIZE == 1 << 10); + + return slope * (v - LOG2_TABLE_SIZE) + (10 << LOG2_PRECISION); + } +} + +int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_full_mvs, + int mv_num) { + // The behavior of this function is to compute log2 of mv difference, + // i.e. min log2(1 + row_diff * row_diff + col_diff * col_diff) + // against available neighbor mvs. + // Since the log2 is monotonically increasing, we can compute + // min row_diff * row_diff + col_diff * col_diff first + // then apply log2 in the end. + int i; + int64_t min_abs_diff = INT64_MAX; + int cnt = 0; + assert(mv_num <= NB_MVS_NUM); + for (i = 0; i < mv_num; ++i) { + MV nb_mv = nb_full_mvs[i].as_mv; + const int64_t row_diff = abs(mv->row - nb_mv.row); + const int64_t col_diff = abs(mv->col - nb_mv.col); + const int64_t abs_diff = row_diff * row_diff + col_diff * col_diff; + assert(nb_full_mvs[i].as_int != INVALID_MV); + min_abs_diff = VPXMIN(abs_diff, min_abs_diff); + ++cnt; + } + if (cnt) { + return log2_approximation(1 + min_abs_diff); + } + return 0; +} + +static FloatMV get_smooth_motion_vector(const FloatMV scaled_search_mv, + const FloatMV *tmp_mf, + const int (*M)[MF_LOCAL_STRUCTURE_SIZE], + int rows, int cols, int row, int col, + float alpha) { + const FloatMV tmp_mv = tmp_mf[row * cols + col]; + int idx_row, idx_col; + FloatMV avg_nb_mv = { 0.0f, 0.0f }; + FloatMV mv = { 0.0f, 0.0f }; + float filter[3][3] = { { 1.0f / 12.0f, 1.0f / 6.0f, 1.0f / 12.0f }, + { 1.0f / 6.0f, 0.0f, 1.0f / 6.0f }, + { 1.0f / 12.0f, 1.0f / 6.0f, 1.0f / 12.0f } }; + for (idx_row = 0; idx_row < 3; ++idx_row) { + int nb_row = row + idx_row - 1; + for (idx_col = 0; idx_col < 3; ++idx_col) { + int nb_col = col + idx_col - 1; + if (nb_row < 0 || nb_col < 0 || nb_row >= rows || nb_col >= cols) { + avg_nb_mv.row += (tmp_mv.row) * filter[idx_row][idx_col]; + avg_nb_mv.col += (tmp_mv.col) * filter[idx_row][idx_col]; + } else { + const FloatMV nb_mv = tmp_mf[nb_row * cols + nb_col]; + avg_nb_mv.row += (nb_mv.row) * filter[idx_row][idx_col]; + avg_nb_mv.col += (nb_mv.col) * filter[idx_row][idx_col]; + } + } + } + { + // M is the local variance of reference frame + float M00 = M[row * cols + col][0]; + float M01 = M[row * cols + col][1]; + float M10 = M[row * cols + col][2]; + float M11 = M[row * cols + col][3]; + + float det = (M00 + alpha) * (M11 + alpha) - M01 * M10; + + float inv_M00 = (M11 + alpha) / det; + float inv_M01 = -M01 / det; + float inv_M10 = -M10 / det; + float inv_M11 = (M00 + alpha) / det; + + float inv_MM00 = inv_M00 * M00 + inv_M01 * M10; + float inv_MM01 = inv_M00 * M01 + inv_M01 * M11; + float inv_MM10 = inv_M10 * M00 + inv_M11 * M10; + float inv_MM11 = inv_M10 * M01 + inv_M11 * M11; + + mv.row = inv_M00 * avg_nb_mv.row * alpha + inv_M01 * avg_nb_mv.col * alpha + + inv_MM00 * scaled_search_mv.row + inv_MM01 * scaled_search_mv.col; + mv.col = inv_M10 * avg_nb_mv.row * alpha + inv_M11 * avg_nb_mv.col * alpha + + inv_MM10 * scaled_search_mv.row + inv_MM11 * scaled_search_mv.col; + } + return mv; +} + +void vp9_get_smooth_motion_field(const MV *search_mf, + const int (*M)[MF_LOCAL_STRUCTURE_SIZE], + int rows, int cols, BLOCK_SIZE bsize, + float alpha, int num_iters, MV *smooth_mf) { + // M is the local variation of reference frame + // build two buffers + FloatMV *input = (FloatMV *)malloc(rows * cols * sizeof(FloatMV)); + FloatMV *output = (FloatMV *)malloc(rows * cols * sizeof(FloatMV)); + int idx; + int row, col; + int bw = 4 << b_width_log2_lookup[bsize]; + int bh = 4 << b_height_log2_lookup[bsize]; + // copy search results to input buffer + for (idx = 0; idx < rows * cols; ++idx) { + input[idx].row = (float)search_mf[idx].row / bh; + input[idx].col = (float)search_mf[idx].col / bw; + } + for (idx = 0; idx < num_iters; ++idx) { + FloatMV *tmp; + for (row = 0; row < rows; ++row) { + for (col = 0; col < cols; ++col) { + // note: the scaled_search_mf and smooth_mf are all scaled by macroblock + // size + const MV search_mv = search_mf[row * cols + col]; + FloatMV scaled_search_mv = { (float)search_mv.row / bh, + (float)search_mv.col / bw }; + output[row * cols + col] = get_smooth_motion_vector( + scaled_search_mv, input, M, rows, cols, row, col, alpha); + } + } + // swap buffers + tmp = input; + input = output; + output = tmp; + } + // copy smoothed results to output + for (idx = 0; idx < rows * cols; ++idx) { + smooth_mf[idx].row = (int)(input[idx].row * bh); + smooth_mf[idx].col = (int)(input[idx].col * bw); + } + free(input); + free(output); +} + +void vp9_get_local_structure(const YV12_BUFFER_CONFIG *cur_frame, + const YV12_BUFFER_CONFIG *ref_frame, + const MV *search_mf, + const vp9_variance_fn_ptr_t *fn_ptr, int rows, + int cols, BLOCK_SIZE bsize, + int (*M)[MF_LOCAL_STRUCTURE_SIZE]) { + const int bw = 4 << b_width_log2_lookup[bsize]; + const int bh = 4 << b_height_log2_lookup[bsize]; + const int cur_stride = cur_frame->y_stride; + const int ref_stride = ref_frame->y_stride; + const int width = ref_frame->y_width; + const int height = ref_frame->y_height; + int row, col; + for (row = 0; row < rows; ++row) { + for (col = 0; col < cols; ++col) { + int cur_offset = row * bh * cur_stride + col * bw; + uint8_t *center = cur_frame->y_buffer + cur_offset; + int ref_h = row * bh + search_mf[row * cols + col].row; + int ref_w = col * bw + search_mf[row * cols + col].col; + int ref_offset; + uint8_t *target; + uint8_t *nb; + int search_dist; + int nb_dist; + int I_row = 0, I_col = 0; + // TODO(Dan): handle the case that when reference frame block beyond the + // boundary + ref_h = ref_h < 0 ? 0 : (ref_h >= height - bh ? height - bh - 1 : ref_h); + ref_w = ref_w < 0 ? 0 : (ref_w >= width - bw ? width - bw - 1 : ref_w); + // compute search results distortion + // TODO(Dan): maybe need to use vp9 function to find the reference block, + // to compare with the results of my python code, I first use my way to + // compute the reference block + ref_offset = ref_h * ref_stride + ref_w; + target = ref_frame->y_buffer + ref_offset; + search_dist = fn_ptr->sdf(center, cur_stride, target, ref_stride); + // compute target's neighbors' distortions + // TODO(Dan): if using padding, the boundary condition may vary + // up + if (ref_h - bh >= 0) { + nb = target - ref_stride * bh; + nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride); + I_row += nb_dist - search_dist; + } + // down + if (ref_h + bh < height - bh) { + nb = target + ref_stride * bh; + nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride); + I_row += nb_dist - search_dist; + } + if (ref_h - bh >= 0 && ref_h + bh < height - bh) { + I_row /= 2; + } + I_row /= (bw * bh); + // left + if (ref_w - bw >= 0) { + nb = target - bw; + nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride); + I_col += nb_dist - search_dist; + } + // down + if (ref_w + bw < width - bw) { + nb = target + bw; + nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride); + I_col += nb_dist - search_dist; + } + if (ref_w - bw >= 0 && ref_w + bw < width - bw) { + I_col /= 2; + } + I_col /= (bw * bh); + M[row * cols + col][0] = I_row * I_row; + M[row * cols + col][1] = I_row * I_col; + M[row * cols + col][2] = I_col * I_row; + M[row * cols + col][3] = I_col * I_col; + } + } +} diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_non_greedy_mv.h b/media/libvpx/libvpx/vp9/encoder/vp9_non_greedy_mv.h new file mode 100644 index 000000000000..c2bd69722ac8 --- /dev/null +++ b/media/libvpx/libvpx/vp9/encoder/vp9_non_greedy_mv.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_ +#define VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_ + +#include "vp9/common/vp9_enums.h" +#include "vp9/common/vp9_blockd.h" +#include "vpx_scale/yv12config.h" +#include "vpx_dsp/variance.h" + +#ifdef __cplusplus +extern "C" { +#endif +#define NB_MVS_NUM 4 +#define LOG2_PRECISION 20 +#define MF_LOCAL_STRUCTURE_SIZE 4 +#define SQUARE_BLOCK_SIZES 4 + +typedef enum Status { STATUS_OK = 0, STATUS_FAILED = 1 } Status; + +typedef struct MotionField { + int ready; + BLOCK_SIZE bsize; + int block_rows; + int block_cols; + int block_num; // block_num == block_rows * block_cols + int (*local_structure)[MF_LOCAL_STRUCTURE_SIZE]; + int_mv *mf; + int *set_mv; + int mv_log_scale; +} MotionField; + +typedef struct MotionFieldInfo { + int frame_num; + int allocated; + MotionField (*motion_field_array)[MAX_INTER_REF_FRAMES][SQUARE_BLOCK_SIZES]; +} MotionFieldInfo; + +typedef struct { + float row, col; +} FloatMV; + +static INLINE int get_square_block_idx(BLOCK_SIZE bsize) { + if (bsize == BLOCK_4X4) { + return 0; + } + if (bsize == BLOCK_8X8) { + return 1; + } + if (bsize == BLOCK_16X16) { + return 2; + } + if (bsize == BLOCK_32X32) { + return 3; + } + assert(0 && "ERROR: non-square block size"); + return -1; +} + +static INLINE BLOCK_SIZE square_block_idx_to_bsize(int square_block_idx) { + if (square_block_idx == 0) { + return BLOCK_4X4; + } + if (square_block_idx == 1) { + return BLOCK_8X8; + } + if (square_block_idx == 2) { + return BLOCK_16X16; + } + if (square_block_idx == 3) { + return BLOCK_32X32; + } + assert(0 && "ERROR: invalid square_block_idx"); + return BLOCK_INVALID; +} + +Status vp9_alloc_motion_field_info(MotionFieldInfo *motion_field_info, + int frame_num, int mi_rows, int mi_cols); + +Status vp9_alloc_motion_field(MotionField *motion_field, BLOCK_SIZE bsize, + int block_rows, int block_cols); + +void vp9_free_motion_field(MotionField *motion_field); + +void vp9_free_motion_field_info(MotionFieldInfo *motion_field_info); + +int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_full_mvs, + int mv_num); + +void vp9_get_smooth_motion_field(const MV *search_mf, + const int (*M)[MF_LOCAL_STRUCTURE_SIZE], + int rows, int cols, BLOCK_SIZE bize, + float alpha, int num_iters, MV *smooth_mf); + +void vp9_get_local_structure(const YV12_BUFFER_CONFIG *cur_frame, + const YV12_BUFFER_CONFIG *ref_frame, + const MV *search_mf, + const vp9_variance_fn_ptr_t *fn_ptr, int rows, + int cols, BLOCK_SIZE bsize, + int (*M)[MF_LOCAL_STRUCTURE_SIZE]); + +MotionField *vp9_motion_field_info_get_motion_field( + MotionFieldInfo *motion_field_info, int frame_idx, int rf_idx, + BLOCK_SIZE bsize); + +void vp9_motion_field_mi_set_mv(MotionField *motion_field, int mi_row, + int mi_col, int_mv mv); + +void vp9_motion_field_reset_mvs(MotionField *motion_field); + +int_mv vp9_motion_field_get_mv(const MotionField *motion_field, int brow, + int bcol); +int_mv vp9_motion_field_mi_get_mv(const MotionField *motion_field, int mi_row, + int mi_col); +int vp9_motion_field_is_mv_set(const MotionField *motion_field, int brow, + int bcol); + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_partition_models.h b/media/libvpx/libvpx/vp9/encoder/vp9_partition_models.h new file mode 100644 index 000000000000..09c0e30a47d6 --- /dev/null +++ b/media/libvpx/libvpx/vp9/encoder/vp9_partition_models.h @@ -0,0 +1,975 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VP9_ENCODER_VP9_PARTITION_MODELS_H_ +#define VPX_VP9_ENCODER_VP9_PARTITION_MODELS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#define NN_MAX_HIDDEN_LAYERS 10 +#define NN_MAX_NODES_PER_LAYER 128 + +// Neural net model config. It defines the layout of a neural net model, such as +// the number of inputs/outputs, number of layers, the number of nodes in each +// layer, as well as the weights and bias of each node. +typedef struct { + int num_inputs; // Number of input nodes, i.e. features. + int num_outputs; // Number of output nodes. + int num_hidden_layers; // Number of hidden layers, maximum 10. + // Number of nodes for each hidden layer. + int num_hidden_nodes[NN_MAX_HIDDEN_LAYERS]; + // Weight parameters, indexed by layer. + const float *weights[NN_MAX_HIDDEN_LAYERS + 1]; + // Bias parameters, indexed by layer. + const float *bias[NN_MAX_HIDDEN_LAYERS + 1]; +} NN_CONFIG; + +// Partition search breakout model. +#define FEATURES 4 +#define Q_CTX 3 +#define RESOLUTION_CTX 2 +static const float + vp9_partition_breakout_weights_64[RESOLUTION_CTX][Q_CTX][FEATURES + 1] = { + { + { + -0.016673f, + -0.001025f, + -0.000032f, + 0.000833f, + 1.94261885f - 2.1f, + }, + { + -0.160867f, + -0.002101f, + 0.000011f, + 0.002448f, + 1.65738142f - 2.5f, + }, + { + -0.628934f, + -0.011459f, + -0.000009f, + 0.013833f, + 1.47982645f - 1.6f, + }, + }, + { + { + -0.064309f, + -0.006121f, + 0.000232f, + 0.005778f, + 0.7989465f - 5.0f, + }, + { + -0.314957f, + -0.009346f, + -0.000225f, + 0.010072f, + 2.80695581f - 5.5f, + }, + { + -0.635535f, + -0.015135f, + 0.000091f, + 0.015247f, + 2.90381241f - 5.0f, + }, + }, + }; + +static const float + vp9_partition_breakout_weights_32[RESOLUTION_CTX][Q_CTX][FEATURES + 1] = { + { + { + -0.010554f, + -0.003081f, + -0.000134f, + 0.004491f, + 1.68445992f - 3.5f, + }, + { + -0.051489f, + -0.007609f, + 0.000016f, + 0.009792f, + 1.28089404f - 2.5f, + }, + { + -0.163097f, + -0.013081f, + 0.000022f, + 0.019006f, + 1.36129403f - 3.2f, + }, + }, + { + { + -0.024629f, + -0.006492f, + -0.000254f, + 0.004895f, + 1.27919173f - 4.5f, + }, + { + -0.083936f, + -0.009827f, + -0.000200f, + 0.010399f, + 2.73731065f - 4.5f, + }, + { + -0.279052f, + -0.013334f, + 0.000289f, + 0.023203f, + 2.43595719f - 3.5f, + }, + }, + }; + +static const float + vp9_partition_breakout_weights_16[RESOLUTION_CTX][Q_CTX][FEATURES + 1] = { + { + { + -0.013154f, + -0.002404f, + -0.000977f, + 0.008450f, + 2.57404566f - 5.5f, + }, + { + -0.019146f, + -0.004018f, + 0.000064f, + 0.008187f, + 2.15043926f - 2.5f, + }, + { + -0.075755f, + -0.010858f, + 0.000030f, + 0.024505f, + 2.06848121f - 2.5f, + }, + }, + { + { + -0.007636f, + -0.002751f, + -0.000682f, + 0.005968f, + 0.19225763f - 4.5f, + }, + { + -0.047306f, + -0.009113f, + -0.000518f, + 0.016007f, + 2.61068869f - 4.0f, + }, + { + -0.069336f, + -0.010448f, + -0.001120f, + 0.023083f, + 1.47591054f - 5.5f, + }, + }, + }; + +static const float vp9_partition_breakout_weights_8[RESOLUTION_CTX][Q_CTX] + [FEATURES + 1] = { + { + { + -0.011807f, + -0.009873f, + -0.000931f, + 0.034768f, + 1.32254851f - 2.0f, + }, + { + -0.003861f, + -0.002701f, + 0.000100f, + 0.013876f, + 1.96755111f - 1.5f, + }, + { + -0.013522f, + -0.008677f, + -0.000562f, + 0.034468f, + 1.53440356f - 1.5f, + }, + }, + { + { + -0.003221f, + -0.002125f, + 0.000993f, + 0.012768f, + 0.03541421f - 2.0f, + }, + { + -0.006069f, + -0.007335f, + 0.000229f, + 0.026104f, + 0.17135315f - 1.5f, + }, + { + -0.039894f, + -0.011419f, + 0.000070f, + 0.061817f, + 0.6739977f - 1.5f, + }, + }, + }; +#undef FEATURES +#undef Q_CTX +#undef RESOLUTION_CTX + +// Rectangular partition search pruning model. +#define FEATURES 8 +#define LABELS 4 +#define NODES 16 +static const float vp9_rect_part_nn_weights_16_layer0[FEATURES * NODES] = { + -0.432522f, 0.133070f, -0.169187f, 0.768340f, 0.891228f, 0.554458f, + 0.356000f, 0.403621f, 0.809165f, 0.778214f, -0.520357f, 0.301451f, + -0.386972f, -0.314402f, 0.021878f, 1.148746f, -0.462258f, -0.175524f, + -0.344589f, -0.475159f, -0.232322f, 0.471147f, -0.489948f, 0.467740f, + -0.391550f, 0.208601f, 0.054138f, 0.076859f, -0.309497f, -0.095927f, + 0.225917f, 0.011582f, -0.520730f, -0.585497f, 0.174036f, 0.072521f, + 0.120771f, -0.517234f, -0.581908f, -0.034003f, -0.694722f, -0.364368f, + 0.290584f, 0.038373f, 0.685654f, 0.394019f, 0.759667f, 1.257502f, + -0.610516f, -0.185434f, 0.211997f, -0.172458f, 0.044605f, 0.145316f, + -0.182525f, -0.147376f, 0.578742f, 0.312412f, -0.446135f, -0.389112f, + 0.454033f, 0.260490f, 0.664285f, 0.395856f, -0.231827f, 0.215228f, + 0.014856f, -0.395462f, 0.479646f, -0.391445f, -0.357788f, 0.166238f, + -0.056818f, -0.027783f, 0.060880f, -1.604710f, 0.531268f, 0.282184f, + 0.714944f, 0.093523f, -0.218312f, -0.095546f, -0.285621f, -0.190871f, + -0.448340f, -0.016611f, 0.413913f, -0.286720f, -0.158828f, -0.092635f, + -0.279551f, 0.166509f, -0.088162f, 0.446543f, -0.276830f, -0.065642f, + -0.176346f, -0.984754f, 0.338738f, 0.403809f, 0.738065f, 1.154439f, + 0.750764f, 0.770959f, -0.269403f, 0.295651f, -0.331858f, 0.367144f, + 0.279279f, 0.157419f, -0.348227f, -0.168608f, -0.956000f, -0.647136f, + 0.250516f, 0.858084f, 0.809802f, 0.492408f, 0.804841f, 0.282802f, + 0.079395f, -0.291771f, -0.024382f, -1.615880f, -0.445166f, -0.407335f, + -0.483044f, 0.141126f, +}; + +static const float vp9_rect_part_nn_bias_16_layer0[NODES] = { + 0.275384f, -0.053745f, 0.000000f, 0.000000f, -0.178103f, 0.513965f, + -0.161352f, 0.228551f, 0.000000f, 1.013712f, 0.000000f, 0.000000f, + -1.144009f, -0.000006f, -0.241727f, 2.048764f, +}; + +static const float vp9_rect_part_nn_weights_16_layer1[NODES * LABELS] = { + -1.435278f, 2.204691f, -0.410718f, 0.202708f, 0.109208f, 1.059142f, + -0.306360f, 0.845906f, 0.489654f, -1.121915f, -0.169133f, -0.003385f, + 0.660590f, -0.018711f, 1.227158f, -2.967504f, 1.407345f, -1.293243f, + -0.386921f, 0.300492f, 0.338824f, -0.083250f, -0.069454f, -1.001827f, + -0.327891f, 0.899353f, 0.367397f, -0.118601f, -0.171936f, -0.420646f, + -0.803319f, 2.029634f, 0.940268f, -0.664484f, 0.339916f, 0.315944f, + 0.157374f, -0.402482f, -0.491695f, 0.595827f, 0.015031f, 0.255887f, + -0.466327f, -0.212598f, 0.136485f, 0.033363f, -0.796921f, 1.414304f, + -0.282185f, -2.673571f, -0.280994f, 0.382658f, -0.350902f, 0.227926f, + 0.062602f, -1.000199f, 0.433731f, 1.176439f, -0.163216f, -0.229015f, + -0.640098f, -0.438852f, -0.947700f, 2.203434f, +}; + +static const float vp9_rect_part_nn_bias_16_layer1[LABELS] = { + -0.875510f, + 0.982408f, + 0.560854f, + -0.415209f, +}; + +static const NN_CONFIG vp9_rect_part_nnconfig_16 = { + FEATURES, // num_inputs + LABELS, // num_outputs + 1, // num_hidden_layers + { + NODES, + }, // num_hidden_nodes + { + vp9_rect_part_nn_weights_16_layer0, + vp9_rect_part_nn_weights_16_layer1, + }, + { + vp9_rect_part_nn_bias_16_layer0, + vp9_rect_part_nn_bias_16_layer1, + }, +}; + +static const float vp9_rect_part_nn_weights_32_layer0[FEATURES * NODES] = { + -0.147312f, -0.753248f, 0.540206f, 0.661415f, 0.484117f, -0.341609f, + 0.016183f, 0.064177f, 0.781580f, 0.902232f, -0.505342f, 0.325183f, + -0.231072f, -0.120107f, -0.076216f, 0.120038f, 0.403695f, -0.463301f, + -0.192158f, 0.407442f, 0.106633f, 1.072371f, -0.446779f, 0.467353f, + 0.318812f, -0.505996f, -0.008768f, -0.239598f, 0.085480f, 0.284640f, + -0.365045f, -0.048083f, -0.112090f, -0.067089f, 0.304138f, -0.228809f, + 0.383651f, -0.196882f, 0.477039f, -0.217978f, -0.506931f, -0.125675f, + 0.050456f, 1.086598f, 0.732128f, 0.326941f, 0.103952f, 0.121769f, + -0.154487f, -0.255514f, 0.030591f, -0.382797f, -0.019981f, -0.326570f, + 0.149691f, -0.435633f, -0.070795f, 0.167691f, 0.251413f, -0.153405f, + 0.160347f, 0.455107f, -0.968580f, -0.575879f, 0.623115f, -0.069793f, + -0.379768f, -0.965807f, -0.062057f, 0.071312f, 0.457098f, 0.350372f, + -0.460659f, -0.985393f, 0.359963f, -0.093677f, 0.404272f, -0.326896f, + -0.277752f, 0.609322f, -0.114193f, -0.230701f, 0.089208f, 0.645381f, + 0.494485f, 0.467876f, -0.166187f, 0.251044f, -0.394661f, 0.192895f, + -0.344777f, -0.041893f, -0.111163f, 0.066347f, 0.378158f, -0.455465f, + 0.339839f, -0.418207f, -0.356515f, -0.227536f, -0.211091f, -0.122945f, + 0.361772f, -0.338095f, 0.004564f, -0.398510f, 0.060876f, -2.132504f, + -0.086776f, -0.029166f, 0.039241f, 0.222534f, -0.188565f, -0.288792f, + -0.160789f, -0.123905f, 0.397916f, -0.063779f, 0.167210f, -0.445004f, + 0.056889f, 0.207280f, 0.000101f, 0.384507f, -1.721239f, -2.036402f, + -2.084403f, -2.060483f, +}; + +static const float vp9_rect_part_nn_bias_32_layer0[NODES] = { + -0.859251f, -0.109938f, 0.091838f, 0.187817f, -0.728265f, 0.253080f, + 0.000000f, -0.357195f, -0.031290f, -1.373237f, -0.761086f, 0.000000f, + -0.024504f, 1.765711f, 0.000000f, 1.505390f, +}; + +static const float vp9_rect_part_nn_weights_32_layer1[NODES * LABELS] = { + 0.680940f, 1.367178f, 0.403075f, 0.029957f, 0.500917f, 1.407776f, + -0.354002f, 0.011667f, 1.663767f, 0.959155f, 0.428323f, -0.205345f, + -0.081850f, -3.920103f, -0.243802f, -4.253933f, -0.034020f, -1.361057f, + 0.128236f, -0.138422f, -0.025790f, -0.563518f, -0.148715f, -0.344381f, + -1.677389f, -0.868332f, -0.063792f, 0.052052f, 0.359591f, 2.739808f, + -0.414304f, 3.036597f, -0.075368f, -1.019680f, 0.642501f, 0.209779f, + -0.374539f, -0.718294f, -0.116616f, -0.043212f, -1.787809f, -0.773262f, + 0.068734f, 0.508309f, 0.099334f, 1.802239f, -0.333538f, 2.708645f, + -0.447682f, -2.355555f, -0.506674f, -0.061028f, -0.310305f, -0.375475f, + 0.194572f, 0.431788f, -0.789624f, -0.031962f, 0.358353f, 0.382937f, + 0.232002f, 2.321813f, -0.037523f, 2.104652f, +}; + +static const float vp9_rect_part_nn_bias_32_layer1[LABELS] = { + -0.693383f, + 0.773661f, + 0.426878f, + -0.070619f, +}; + +static const NN_CONFIG vp9_rect_part_nnconfig_32 = { + FEATURES, // num_inputs + LABELS, // num_outputs + 1, // num_hidden_layers + { + NODES, + }, // num_hidden_nodes + { + vp9_rect_part_nn_weights_32_layer0, + vp9_rect_part_nn_weights_32_layer1, + }, + { + vp9_rect_part_nn_bias_32_layer0, + vp9_rect_part_nn_bias_32_layer1, + }, +}; +#undef NODES + +#define NODES 24 +static const float vp9_rect_part_nn_weights_64_layer0[FEATURES * NODES] = { + 0.024671f, -0.220610f, -0.284362f, -0.069556f, -0.315700f, 0.187861f, + 0.139782f, 0.063110f, 0.796561f, 0.172868f, -0.662194f, -1.393074f, + 0.085003f, 0.393381f, 0.358477f, -0.187268f, -0.370745f, 0.218287f, + 0.027271f, -0.254089f, -0.048236f, -0.459137f, 0.253171f, 0.122598f, + -0.550107f, -0.568456f, 0.159866f, -0.246534f, 0.096384f, -0.255460f, + 0.077864f, -0.334837f, 0.026921f, -0.697252f, 0.345262f, 1.343578f, + 0.815984f, 1.118211f, 1.574016f, 0.578476f, -0.285967f, -0.508672f, + 0.118137f, 0.037695f, 1.540510f, 1.256648f, 1.163819f, 1.172027f, + 0.661551f, -0.111980f, -0.434204f, -0.894217f, 0.570524f, 0.050292f, + -0.113680f, 0.000784f, -0.211554f, -0.369394f, 0.158306f, -0.512505f, + -0.238696f, 0.091498f, -0.448490f, -0.491268f, -0.353112f, -0.303315f, + -0.428438f, 0.127998f, -0.406790f, -0.401786f, -0.279888f, -0.384223f, + 0.026100f, 0.041621f, -0.315818f, -0.087888f, 0.353497f, 0.163123f, + -0.380128f, -0.090334f, -0.216647f, -0.117849f, -0.173502f, 0.301871f, + 0.070854f, 0.114627f, -0.050545f, -0.160381f, 0.595294f, 0.492696f, + -0.453858f, -1.154139f, 0.126000f, 0.034550f, 0.456665f, -0.236618f, + -0.112640f, 0.050759f, -0.449162f, 0.110059f, 0.147116f, 0.249358f, + -0.049894f, 0.063351f, -0.004467f, 0.057242f, -0.482015f, -0.174335f, + -0.085617f, -0.333808f, -0.358440f, -0.069006f, 0.099260f, -1.243430f, + -0.052963f, 0.112088f, -2.661115f, -2.445893f, -2.688174f, -2.624232f, + 0.030494f, 0.161311f, 0.012136f, 0.207564f, -2.776856f, -2.791940f, + -2.623962f, -2.918820f, 1.231619f, -0.376692f, -0.698078f, 0.110336f, + -0.285378f, 0.258367f, -0.180159f, -0.376608f, -0.034348f, -0.130206f, + 0.160020f, 0.852977f, 0.580573f, 1.450782f, 1.357596f, 0.787382f, + -0.544004f, -0.014795f, 0.032121f, -0.557696f, 0.159994f, -0.540908f, + 0.180380f, -0.398045f, 0.705095f, 0.515103f, -0.511521f, -1.271374f, + -0.231019f, 0.423647f, 0.064907f, -0.255338f, -0.877748f, -0.667205f, + 0.267847f, 0.135229f, 0.617844f, 1.349849f, 1.012623f, 0.730506f, + -0.078571f, 0.058401f, 0.053221f, -2.426146f, -0.098808f, -0.138508f, + -0.153299f, 0.149116f, -0.444243f, 0.301807f, 0.065066f, 0.092929f, + -0.372784f, -0.095540f, 0.192269f, 0.237894f, 0.080228f, -0.214074f, + -0.011426f, -2.352367f, -0.085394f, -0.190361f, -0.001177f, 0.089197f, +}; + +static const float vp9_rect_part_nn_bias_64_layer0[NODES] = { + 0.000000f, -0.057652f, -0.175413f, -0.175389f, -1.084097f, -1.423801f, + -0.076307f, -0.193803f, 0.000000f, -0.066474f, -0.050318f, -0.019832f, + -0.038814f, -0.144184f, 2.652451f, 2.415006f, 0.197464f, -0.729842f, + -0.173774f, 0.239171f, 0.486425f, 2.463304f, -0.175279f, 2.352637f, +}; + +static const float vp9_rect_part_nn_weights_64_layer1[NODES * LABELS] = { + -0.063237f, 1.925696f, -0.182145f, -0.226687f, 0.602941f, -0.941140f, + 0.814598f, -0.117063f, 0.282988f, 0.066369f, 0.096951f, 1.049735f, + -0.188188f, -0.281227f, -4.836746f, -5.047797f, 0.892358f, 0.417145f, + -0.279849f, 1.335945f, 0.660338f, -2.757938f, -0.115714f, -1.862183f, + -0.045980f, -1.597624f, -0.586822f, -0.615589f, -0.330537f, 1.068496f, + -0.167290f, 0.141290f, -0.112100f, 0.232761f, 0.252307f, -0.399653f, + 0.353118f, 0.241583f, 2.635241f, 4.026119f, -1.137327f, -0.052446f, + -0.139814f, -1.104256f, -0.759391f, 2.508457f, -0.526297f, 2.095348f, + -0.444473f, -1.090452f, 0.584122f, 0.468729f, -0.368865f, 1.041425f, + -1.079504f, 0.348837f, 0.390091f, 0.416191f, 0.212906f, -0.660255f, + 0.053630f, 0.209476f, 3.595525f, 2.257293f, -0.514030f, 0.074203f, + -0.375862f, -1.998307f, -0.930310f, 1.866686f, -0.247137f, 1.087789f, + 0.100186f, 0.298150f, 0.165265f, 0.050478f, 0.249167f, 0.371789f, + -0.294497f, 0.202954f, 0.037310f, 0.193159f, 0.161551f, 0.301597f, + 0.299286f, 0.185946f, 0.822976f, 2.066130f, -1.724588f, 0.055977f, + -0.330747f, -0.067747f, -0.475801f, 1.555958f, -0.025808f, -0.081516f, +}; + +static const float vp9_rect_part_nn_bias_64_layer1[LABELS] = { + -0.090723f, + 0.894968f, + 0.844754f, + -3.496194f, +}; + +static const NN_CONFIG vp9_rect_part_nnconfig_64 = { + FEATURES, // num_inputs + LABELS, // num_outputs + 1, // num_hidden_layers + { + NODES, + }, // num_hidden_nodes + { + vp9_rect_part_nn_weights_64_layer0, + vp9_rect_part_nn_weights_64_layer1, + }, + { + vp9_rect_part_nn_bias_64_layer0, + vp9_rect_part_nn_bias_64_layer1, + }, +}; +#undef FEATURES +#undef LABELS +#undef NODES + +#define FEATURES 7 +// Partition pruning model(neural nets). +static const float vp9_partition_nn_weights_64x64_layer0[FEATURES * 8] = { + -3.571348f, 0.014835f, -3.255393f, -0.098090f, -0.013120f, 0.000221f, + 0.056273f, 0.190179f, -0.268130f, -1.828242f, -0.010655f, 0.937244f, + -0.435120f, 0.512125f, 1.610679f, 0.190816f, -0.799075f, -0.377348f, + -0.144232f, 0.614383f, -0.980388f, 1.754150f, -0.185603f, -0.061854f, + -0.807172f, 1.240177f, 1.419531f, -0.438544f, -5.980774f, 0.139045f, + -0.032359f, -0.068887f, -1.237918f, 0.115706f, 0.003164f, 2.924212f, + 1.246838f, -0.035833f, 0.810011f, -0.805894f, 0.010966f, 0.076463f, + -4.226380f, -2.437764f, -0.010619f, -0.020935f, -0.451494f, 0.300079f, + -0.168961f, -3.326450f, -2.731094f, 0.002518f, 0.018840f, -1.656815f, + 0.068039f, 0.010586f, +}; + +static const float vp9_partition_nn_bias_64x64_layer0[8] = { + -3.469882f, 0.683989f, 0.194010f, 0.313782f, + -3.153335f, 2.245849f, -1.946190f, -3.740020f, +}; + +static const float vp9_partition_nn_weights_64x64_layer1[8] = { + -8.058566f, 0.108306f, -0.280620f, -0.818823f, + -6.445117f, 0.865364f, -1.127127f, -8.808660f, +}; + +static const float vp9_partition_nn_bias_64x64_layer1[1] = { + 6.46909416f, +}; + +static const NN_CONFIG vp9_partition_nnconfig_64x64 = { + FEATURES, // num_inputs + 1, // num_outputs + 1, // num_hidden_layers + { + 8, + }, // num_hidden_nodes + { + vp9_partition_nn_weights_64x64_layer0, + vp9_partition_nn_weights_64x64_layer1, + }, + { + vp9_partition_nn_bias_64x64_layer0, + vp9_partition_nn_bias_64x64_layer1, + }, +}; + +static const float vp9_partition_nn_weights_32x32_layer0[FEATURES * 8] = { + -0.295437f, -4.002648f, -0.205399f, -0.060919f, 0.708037f, 0.027221f, + -0.039137f, -0.907724f, -3.151662f, 0.007106f, 0.018726f, -0.534928f, + 0.022744f, 0.000159f, -1.717189f, -3.229031f, -0.027311f, 0.269863f, + -0.400747f, -0.394366f, -0.108878f, 0.603027f, 0.455369f, -0.197170f, + 1.241746f, -1.347820f, -0.575636f, -0.462879f, -2.296426f, 0.196696f, + -0.138347f, -0.030754f, -0.200774f, 0.453795f, 0.055625f, -3.163116f, + -0.091003f, -0.027028f, -0.042984f, -0.605185f, 0.143240f, -0.036439f, + -0.801228f, 0.313409f, -0.159942f, 0.031267f, 0.886454f, -1.531644f, + -0.089655f, 0.037683f, -0.163441f, -0.130454f, -0.058344f, 0.060011f, + 0.275387f, 1.552226f, +}; + +static const float vp9_partition_nn_bias_32x32_layer0[8] = { + -0.838372f, -2.609089f, -0.055763f, 1.329485f, + -1.297638f, -2.636622f, -0.826909f, 1.012644f, +}; + +static const float vp9_partition_nn_weights_32x32_layer1[8] = { + -1.792632f, -7.322353f, -0.683386f, 0.676564f, + -1.488118f, -7.527719f, 1.240163f, 0.614309f, +}; + +static const float vp9_partition_nn_bias_32x32_layer1[1] = { + 4.97422546f, +}; + +static const NN_CONFIG vp9_partition_nnconfig_32x32 = { + FEATURES, // num_inputs + 1, // num_outputs + 1, // num_hidden_layers + { + 8, + }, // num_hidden_nodes + { + vp9_partition_nn_weights_32x32_layer0, + vp9_partition_nn_weights_32x32_layer1, + }, + { + vp9_partition_nn_bias_32x32_layer0, + vp9_partition_nn_bias_32x32_layer1, + }, +}; + +static const float vp9_partition_nn_weights_16x16_layer0[FEATURES * 8] = { + -1.717673f, -4.718130f, -0.125725f, -0.183427f, -0.511764f, 0.035328f, + 0.130891f, -3.096753f, 0.174968f, -0.188769f, -0.640796f, 1.305661f, + 1.700638f, -0.073806f, -4.006781f, -1.630999f, -0.064863f, -0.086410f, + -0.148617f, 0.172733f, -0.018619f, 2.152595f, 0.778405f, -0.156455f, + 0.612995f, -0.467878f, 0.152022f, -0.236183f, 0.339635f, -0.087119f, + -3.196610f, -1.080401f, -0.637704f, -0.059974f, 1.706298f, -0.793705f, + -6.399260f, 0.010624f, -0.064199f, -0.650621f, 0.338087f, -0.001531f, + 1.023655f, -3.700272f, -0.055281f, -0.386884f, 0.375504f, -0.898678f, + 0.281156f, -0.314611f, 0.863354f, -0.040582f, -0.145019f, 0.029329f, + -2.197880f, -0.108733f, +}; + +static const float vp9_partition_nn_bias_16x16_layer0[8] = { + 0.411516f, -2.143737f, -3.693192f, 2.123142f, + -1.356910f, -3.561016f, -0.765045f, -2.417082f, +}; + +static const float vp9_partition_nn_weights_16x16_layer1[8] = { + -0.619755f, -2.202391f, -4.337171f, 0.611319f, + 0.377677f, -4.998723f, -1.052235f, 1.949922f, +}; + +static const float vp9_partition_nn_bias_16x16_layer1[1] = { + 3.20981717f, +}; + +static const NN_CONFIG vp9_partition_nnconfig_16x16 = { + FEATURES, // num_inputs + 1, // num_outputs + 1, // num_hidden_layers + { + 8, + }, // num_hidden_nodes + { + vp9_partition_nn_weights_16x16_layer0, + vp9_partition_nn_weights_16x16_layer1, + }, + { + vp9_partition_nn_bias_16x16_layer0, + vp9_partition_nn_bias_16x16_layer1, + }, +}; +#undef FEATURES + +#define FEATURES 6 +static const float vp9_var_part_nn_weights_64_layer0[FEATURES * 8] = { + -0.249572f, 0.205532f, -2.175608f, 1.094836f, -2.986370f, 0.193160f, + -0.143823f, 0.378511f, -1.997788f, -2.166866f, -1.930158f, -1.202127f, + -0.611875f, -0.506422f, -0.432487f, 0.071205f, 0.578172f, -0.154285f, + -0.051830f, 0.331681f, -1.457177f, -2.443546f, -2.000302f, -1.389283f, + 0.372084f, -0.464917f, 2.265235f, 2.385787f, 2.312722f, 2.127868f, + -0.403963f, -0.177860f, -0.436751f, -0.560539f, 0.254903f, 0.193976f, + -0.305611f, 0.256632f, 0.309388f, -0.437439f, 1.702640f, -5.007069f, + -0.323450f, 0.294227f, 1.267193f, 1.056601f, 0.387181f, -0.191215f, +}; + +static const float vp9_var_part_nn_bias_64_layer0[8] = { + -0.044396f, -0.938166f, 0.000000f, -0.916375f, + 1.242299f, 0.000000f, -0.405734f, 0.014206f, +}; + +static const float vp9_var_part_nn_weights_64_layer1[8] = { + 1.635945f, 0.979557f, 0.455315f, 1.197199f, + -2.251024f, -0.464953f, 1.378676f, -0.111927f, +}; + +static const float vp9_var_part_nn_bias_64_layer1[1] = { + -0.37972447f, +}; + +static const NN_CONFIG vp9_var_part_nnconfig_64 = { + FEATURES, // num_inputs + 1, // num_outputs + 1, // num_hidden_layers + { + 8, + }, // num_hidden_nodes + { + vp9_var_part_nn_weights_64_layer0, + vp9_var_part_nn_weights_64_layer1, + }, + { + vp9_var_part_nn_bias_64_layer0, + vp9_var_part_nn_bias_64_layer1, + }, +}; + +static const float vp9_var_part_nn_weights_32_layer0[FEATURES * 8] = { + 0.067243f, -0.083598f, -2.191159f, 2.726434f, -3.324013f, 3.477977f, + 0.323736f, -0.510199f, 2.960693f, 2.937661f, 2.888476f, 2.938315f, + -0.307602f, -0.503353f, -0.080725f, -0.473909f, -0.417162f, 0.457089f, + 0.665153f, -0.273210f, 0.028279f, 0.972220f, -0.445596f, 1.756611f, + -0.177892f, -0.091758f, 0.436661f, -0.521506f, 0.133786f, 0.266743f, + 0.637367f, -0.160084f, -1.396269f, 1.020841f, -1.112971f, 0.919496f, + -0.235883f, 0.651954f, 0.109061f, -0.429463f, 0.740839f, -0.962060f, + 0.299519f, -0.386298f, 1.550231f, 2.464915f, 1.311969f, 2.561612f, +}; + +static const float vp9_var_part_nn_bias_32_layer0[8] = { + 0.368242f, 0.736617f, 0.000000f, 0.757287f, + 0.000000f, 0.613248f, -0.776390f, 0.928497f, +}; + +static const float vp9_var_part_nn_weights_32_layer1[8] = { + 0.939884f, -2.420850f, -0.410489f, -0.186690f, + 0.063287f, -0.522011f, 0.484527f, -0.639625f, +}; + +static const float vp9_var_part_nn_bias_32_layer1[1] = { + -0.6455006f, +}; + +static const NN_CONFIG vp9_var_part_nnconfig_32 = { + FEATURES, // num_inputs + 1, // num_outputs + 1, // num_hidden_layers + { + 8, + }, // num_hidden_nodes + { + vp9_var_part_nn_weights_32_layer0, + vp9_var_part_nn_weights_32_layer1, + }, + { + vp9_var_part_nn_bias_32_layer0, + vp9_var_part_nn_bias_32_layer1, + }, +}; + +static const float vp9_var_part_nn_weights_16_layer0[FEATURES * 8] = { + 0.742567f, -0.580624f, -0.244528f, 0.331661f, -0.113949f, -0.559295f, + -0.386061f, 0.438653f, 1.467463f, 0.211589f, 0.513972f, 1.067855f, + -0.876679f, 0.088560f, -0.687483f, -0.380304f, -0.016412f, 0.146380f, + 0.015318f, 0.000351f, -2.764887f, 3.269717f, 2.752428f, -2.236754f, + 0.561539f, -0.852050f, -0.084667f, 0.202057f, 0.197049f, 0.364922f, + -0.463801f, 0.431790f, 1.872096f, -0.091887f, -0.055034f, 2.443492f, + -0.156958f, -0.189571f, -0.542424f, -0.589804f, -0.354422f, 0.401605f, + 0.642021f, -0.875117f, 2.040794f, 1.921070f, 1.792413f, 1.839727f, +}; + +static const float vp9_var_part_nn_bias_16_layer0[8] = { + 2.901234f, -1.940932f, -0.198970f, -0.406524f, + 0.059422f, -1.879207f, -0.232340f, 2.979821f, +}; + +static const float vp9_var_part_nn_weights_16_layer1[8] = { + -0.528731f, 0.375234f, -0.088422f, 0.668629f, + 0.870449f, 0.578735f, 0.546103f, -1.957207f, +}; + +static const float vp9_var_part_nn_bias_16_layer1[1] = { + -1.95769405f, +}; + +static const NN_CONFIG vp9_var_part_nnconfig_16 = { + FEATURES, // num_inputs + 1, // num_outputs + 1, // num_hidden_layers + { + 8, + }, // num_hidden_nodes + { + vp9_var_part_nn_weights_16_layer0, + vp9_var_part_nn_weights_16_layer1, + }, + { + vp9_var_part_nn_bias_16_layer0, + vp9_var_part_nn_bias_16_layer1, + }, +}; +#undef FEATURES + +#define FEATURES 12 +#define LABELS 1 +#define NODES 8 +static const float vp9_part_split_nn_weights_64_layer0[FEATURES * NODES] = { + -0.609728f, -0.409099f, -0.472449f, 0.183769f, -0.457740f, 0.081089f, + 0.171003f, 0.578696f, -0.019043f, -0.856142f, 0.557369f, -1.779424f, + -0.274044f, -0.320632f, -0.392531f, -0.359462f, -0.404106f, -0.288357f, + 0.200620f, 0.038013f, -0.430093f, 0.235083f, -0.487442f, 0.424814f, + -0.232758f, -0.442943f, 0.229397f, -0.540301f, -0.648421f, -0.649747f, + -0.171638f, 0.603824f, 0.468497f, -0.421580f, 0.178840f, -0.533838f, + -0.029471f, -0.076296f, 0.197426f, -0.187908f, -0.003950f, -0.065740f, + 0.085165f, -0.039674f, -5.640702f, 1.909538f, -1.434604f, 3.294606f, + -0.788812f, 0.196864f, 0.057012f, -0.019757f, 0.336233f, 0.075378f, + 0.081503f, 0.491864f, -1.899470f, -1.764173f, -1.888137f, -1.762343f, + 0.845542f, 0.202285f, 0.381948f, -0.150996f, 0.556893f, -0.305354f, + 0.561482f, -0.021974f, -0.703117f, 0.268638f, -0.665736f, 1.191005f, + -0.081568f, -0.115653f, 0.272029f, -0.140074f, 0.072683f, 0.092651f, + -0.472287f, -0.055790f, -0.434425f, 0.352055f, 0.048246f, 0.372865f, + 0.111499f, -0.338304f, 0.739133f, 0.156519f, -0.594644f, 0.137295f, + 0.613350f, -0.165102f, -1.003731f, 0.043070f, -0.887896f, -0.174202f, +}; + +static const float vp9_part_split_nn_bias_64_layer0[NODES] = { + 1.182714f, 0.000000f, 0.902019f, 0.953115f, + -1.372486f, -1.288740f, -0.155144f, -3.041362f, +}; + +static const float vp9_part_split_nn_weights_64_layer1[NODES * LABELS] = { + 0.841214f, 0.456016f, 0.869270f, 1.692999f, + -1.700494f, -0.911761f, 0.030111f, -1.447548f, +}; + +static const float vp9_part_split_nn_bias_64_layer1[LABELS] = { + 1.17782545f, +}; + +static const NN_CONFIG vp9_part_split_nnconfig_64 = { + FEATURES, // num_inputs + LABELS, // num_outputs + 1, // num_hidden_layers + { + NODES, + }, // num_hidden_nodes + { + vp9_part_split_nn_weights_64_layer0, + vp9_part_split_nn_weights_64_layer1, + }, + { + vp9_part_split_nn_bias_64_layer0, + vp9_part_split_nn_bias_64_layer1, + }, +}; + +static const float vp9_part_split_nn_weights_32_layer0[FEATURES * NODES] = { + -0.105488f, -0.218662f, 0.010980f, -0.226979f, 0.028076f, 0.743430f, + 0.789266f, 0.031907f, -1.464200f, 0.222336f, -1.068493f, -0.052712f, + -0.176181f, -0.102654f, -0.973932f, -0.182637f, -0.198000f, 0.335977f, + 0.271346f, 0.133005f, 1.674203f, 0.689567f, 0.657133f, 0.283524f, + 0.115529f, 0.738327f, 0.317184f, -0.179736f, 0.403691f, 0.679350f, + 0.048925f, 0.271338f, -1.538921f, -0.900737f, -1.377845f, 0.084245f, + 0.803122f, -0.107806f, 0.103045f, -0.023335f, -0.098116f, -0.127809f, + 0.037665f, -0.523225f, 1.622185f, 1.903999f, 1.358889f, 1.680785f, + 0.027743f, 0.117906f, -0.158810f, 0.057775f, 0.168257f, 0.062414f, + 0.086228f, -0.087381f, -3.066082f, 3.021855f, -4.092155f, 2.550104f, + -0.230022f, -0.207445f, -0.000347f, 0.034042f, 0.097057f, 0.220088f, + -0.228841f, -0.029405f, -1.507174f, -1.455184f, 2.624904f, 2.643355f, + 0.319912f, 0.585531f, -1.018225f, -0.699606f, 1.026490f, 0.169952f, + -0.093579f, -0.142352f, -0.107256f, 0.059598f, 0.043190f, 0.507543f, + -0.138617f, 0.030197f, 0.059574f, -0.634051f, -0.586724f, -0.148020f, + -0.334380f, 0.459547f, 1.620600f, 0.496850f, 0.639480f, -0.465715f, +}; + +static const float vp9_part_split_nn_bias_32_layer0[NODES] = { + -1.125885f, 0.753197f, -0.825808f, 0.004839f, + 0.583920f, 0.718062f, 0.976741f, 0.796188f, +}; + +static const float vp9_part_split_nn_weights_32_layer1[NODES * LABELS] = { + -0.458745f, 0.724624f, -0.479720f, -2.199872f, + 1.162661f, 1.194153f, -0.716896f, 0.824080f, +}; + +static const float vp9_part_split_nn_bias_32_layer1[LABELS] = { + 0.71644074f, +}; + +static const NN_CONFIG vp9_part_split_nnconfig_32 = { + FEATURES, // num_inputs + LABELS, // num_outputs + 1, // num_hidden_layers + { + NODES, + }, // num_hidden_nodes + { + vp9_part_split_nn_weights_32_layer0, + vp9_part_split_nn_weights_32_layer1, + }, + { + vp9_part_split_nn_bias_32_layer0, + vp9_part_split_nn_bias_32_layer1, + }, +}; + +static const float vp9_part_split_nn_weights_16_layer0[FEATURES * NODES] = { + -0.003629f, -0.046852f, 0.220428f, -0.033042f, 0.049365f, 0.112818f, + -0.306149f, -0.005872f, 1.066947f, -2.290226f, 2.159505f, -0.618714f, + -0.213294f, 0.451372f, -0.199459f, 0.223730f, -0.321709f, 0.063364f, + 0.148704f, -0.293371f, 0.077225f, -0.421947f, -0.515543f, -0.240975f, + -0.418516f, 1.036523f, -0.009165f, 0.032484f, 1.086549f, 0.220322f, + -0.247585f, -0.221232f, -0.225050f, 0.993051f, 0.285907f, 1.308846f, + 0.707456f, 0.335152f, 0.234556f, 0.264590f, -0.078033f, 0.542226f, + 0.057777f, 0.163471f, 0.039245f, -0.725960f, 0.963780f, -0.972001f, + 0.252237f, -0.192745f, -0.836571f, -0.460539f, -0.528713f, -0.160198f, + -0.621108f, 0.486405f, -0.221923f, 1.519426f, -0.857871f, 0.411595f, + 0.947188f, 0.203339f, 0.174526f, 0.016382f, 0.256879f, 0.049818f, + 0.057836f, -0.659096f, 0.459894f, 0.174695f, 0.379359f, 0.062530f, + -0.210201f, -0.355788f, -0.208432f, -0.401723f, -0.115373f, 0.191336f, + -0.109342f, 0.002455f, -0.078746f, -0.391871f, 0.149892f, -0.239615f, + -0.520709f, 0.118568f, -0.437975f, 0.118116f, -0.565426f, -0.206446f, + 0.113407f, 0.558894f, 0.534627f, 1.154350f, -0.116833f, 1.723311f, +}; + +static const float vp9_part_split_nn_bias_16_layer0[NODES] = { + 0.013109f, -0.034341f, 0.679845f, -0.035781f, + -0.104183f, 0.098055f, -0.041130f, 0.160107f, +}; + +static const float vp9_part_split_nn_weights_16_layer1[NODES * LABELS] = { + 1.499564f, -0.403259f, 1.366532f, -0.469868f, + 0.482227f, -2.076697f, 0.527691f, 0.540495f, +}; + +static const float vp9_part_split_nn_bias_16_layer1[LABELS] = { + 0.01134653f, +}; + +static const NN_CONFIG vp9_part_split_nnconfig_16 = { + FEATURES, // num_inputs + LABELS, // num_outputs + 1, // num_hidden_layers + { + NODES, + }, // num_hidden_nodes + { + vp9_part_split_nn_weights_16_layer0, + vp9_part_split_nn_weights_16_layer1, + }, + { + vp9_part_split_nn_bias_16_layer0, + vp9_part_split_nn_bias_16_layer1, + }, +}; + +static const float vp9_part_split_nn_weights_8_layer0[FEATURES * NODES] = { + -0.668875f, -0.159078f, -0.062663f, -0.483785f, -0.146814f, -0.608975f, + -0.589145f, 0.203704f, -0.051007f, -0.113769f, -0.477511f, -0.122603f, + -1.329890f, 1.403386f, 0.199636f, -0.161139f, 2.182090f, -0.014307f, + 0.015755f, -0.208468f, 0.884353f, 0.815920f, 0.632464f, 0.838225f, + 1.369483f, -0.029068f, 0.570213f, -0.573546f, 0.029617f, 0.562054f, + -0.653093f, -0.211910f, -0.661013f, -0.384418f, -0.574038f, -0.510069f, + 0.173047f, -0.274231f, -1.044008f, -0.422040f, -0.810296f, 0.144069f, + -0.406704f, 0.411230f, -0.144023f, 0.745651f, -0.595091f, 0.111787f, + 0.840651f, 0.030123f, -0.242155f, 0.101486f, -0.017889f, -0.254467f, + -0.285407f, -0.076675f, -0.549542f, -0.013544f, -0.686566f, -0.755150f, + 1.623949f, -0.286369f, 0.170976f, 0.016442f, -0.598353f, -0.038540f, + 0.202597f, -0.933582f, 0.599510f, 0.362273f, 0.577722f, 0.477603f, + 0.767097f, 0.431532f, 0.457034f, 0.223279f, 0.381349f, 0.033777f, + 0.423923f, -0.664762f, 0.385662f, 0.075744f, 0.182681f, 0.024118f, + 0.319408f, -0.528864f, 0.976537f, -0.305971f, -0.189380f, -0.241689f, + -1.318092f, 0.088647f, -0.109030f, -0.945654f, 1.082797f, 0.184564f, +}; + +static const float vp9_part_split_nn_bias_8_layer0[NODES] = { + -0.237472f, 2.051396f, 0.297062f, -0.730194f, + 0.060472f, -0.565959f, 0.560869f, -0.395448f, +}; + +static const float vp9_part_split_nn_weights_8_layer1[NODES * LABELS] = { + 0.568121f, 1.575915f, -0.544309f, 0.751595f, + -0.117911f, -1.340730f, -0.739671f, 0.661216f, +}; + +static const float vp9_part_split_nn_bias_8_layer1[LABELS] = { + -0.63375306f, +}; + +static const NN_CONFIG vp9_part_split_nnconfig_8 = { + FEATURES, // num_inputs + LABELS, // num_outputs + 1, // num_hidden_layers + { + NODES, + }, // num_hidden_nodes + { + vp9_part_split_nn_weights_8_layer0, + vp9_part_split_nn_weights_8_layer1, + }, + { + vp9_part_split_nn_bias_8_layer0, + vp9_part_split_nn_bias_8_layer1, + }, +}; +#undef NODES +#undef FEATURES +#undef LABELS + +// Partition pruning model(linear). +static const float vp9_partition_feature_mean[24] = { + 303501.697372f, 3042630.372158f, 24.694696f, 1.392182f, + 689.413511f, 162.027012f, 1.478213f, 0.0, + 135382.260230f, 912738.513263f, 28.845217f, 1.515230f, + 544.158492f, 131.807995f, 1.436863f, 0.0f, + 43682.377587f, 208131.711766f, 28.084737f, 1.356677f, + 138.254122f, 119.522553f, 1.252322f, 0.0f, +}; + +static const float vp9_partition_feature_std[24] = { + 673689.212982f, 5996652.516628f, 0.024449f, 1.989792f, + 985.880847f, 0.014638f, 2.001898f, 0.0f, + 208798.775332f, 1812548.443284f, 0.018693f, 1.838009f, + 396.986910f, 0.015657f, 1.332541f, 0.0f, + 55888.847031f, 448587.962714f, 0.017900f, 1.904776f, + 98.652832f, 0.016598f, 1.320992f, 0.0f, +}; + +// Error tolerance: 0.01%-0.0.05%-0.1% +static const float vp9_partition_linear_weights[24] = { + 0.111736f, 0.289977f, 0.042219f, 0.204765f, 0.120410f, -0.143863f, + 0.282376f, 0.847811f, 0.637161f, 0.131570f, 0.018636f, 0.202134f, + 0.112797f, 0.028162f, 0.182450f, 1.124367f, 0.386133f, 0.083700f, + 0.050028f, 0.150873f, 0.061119f, 0.109318f, 0.127255f, 0.625211f, +}; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_VP9_ENCODER_VP9_PARTITION_MODELS_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_picklpf.c b/media/libvpx/libvpx/vp9/encoder/vp9_picklpf.c index 1c2c55b9e4ba..3a620df693cd 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_picklpf.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_picklpf.c @@ -24,10 +24,20 @@ #include "vp9/encoder/vp9_picklpf.h" #include "vp9/encoder/vp9_quantize.h" +static unsigned int get_section_intra_rating(const VP9_COMP *cpi) { + unsigned int section_intra_rating; + + section_intra_rating = (cpi->common.frame_type == KEY_FRAME) + ? cpi->twopass.key_frame_section_intra_rating + : cpi->twopass.section_intra_rating; + + return section_intra_rating; +} + static int get_max_filter_level(const VP9_COMP *cpi) { if (cpi->oxcf.pass == 2) { - return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 - : MAX_LOOP_FILTER; + unsigned int section_intra_rating = get_section_intra_rating(cpi); + return section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 : MAX_LOOP_FILTER; } else { return MAX_LOOP_FILTER; } @@ -81,6 +91,7 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int filter_step = filt_mid < 16 ? 4 : filt_mid / 4; // Sum squared error at each filter level int64_t ss_err[MAX_LOOP_FILTER + 1]; + unsigned int section_intra_rating = get_section_intra_rating(cpi); // Set each entry to -1 memset(ss_err, 0xFF, sizeof(ss_err)); @@ -99,8 +110,8 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Bias against raising loop filter in favor of lowering it. int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; - if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20)) - bias = (bias * cpi->twopass.section_intra_rating) / 20; + if ((cpi->oxcf.pass == 2) && (section_intra_rating < 20)) + bias = (bias * section_intra_rating) / 20; // yx, bias less for large block size if (cm->tx_mode != ONLY_4X4) bias >>= 1; @@ -150,7 +161,7 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, VP9_COMMON *const cm = &cpi->common; struct loopfilter *const lf = &cm->lf; - lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness; + lf->sharpness_level = 0; if (method == LPF_PICK_MINIMAL_LPF && lf->filter_level) { lf->filter_level = 0; @@ -169,20 +180,17 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, case VPX_BITS_10: filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20); break; - case VPX_BITS_12: + default: + assert(cm->bit_depth == VPX_BITS_12); filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22); break; - default: - assert(0 && - "bit_depth should be VPX_BITS_8, VPX_BITS_10 " - "or VPX_BITS_12"); - return; } #else int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18); #endif // CONFIG_VP9_HIGHBITDEPTH if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && + (cm->base_qindex < 200 || cm->width * cm->height > 320 * 240) && cpi->oxcf.content != VP9E_CONTENT_SCREEN && cm->frame_type != KEY_FRAME) filt_guess = 5 * filt_guess >> 3; diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_picklpf.h b/media/libvpx/libvpx/vp9/encoder/vp9_picklpf.h index cecca058b47d..8881b44daaa3 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_picklpf.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_picklpf.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_PICKLPF_H_ -#define VP9_ENCODER_VP9_PICKLPF_H_ +#ifndef VPX_VP9_ENCODER_VP9_PICKLPF_H_ +#define VPX_VP9_ENCODER_VP9_PICKLPF_H_ #ifdef __cplusplus extern "C" { @@ -26,4 +26,4 @@ void vp9_pick_filter_level(const struct yv12_buffer_config *sd, } // extern "C" #endif -#endif // VP9_ENCODER_VP9_PICKLPF_H_ +#endif // VPX_VP9_ENCODER_VP9_PICKLPF_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_pickmode.c b/media/libvpx/libvpx/vp9/encoder/vp9_pickmode.c index f2f323a282d2..9b2e48505293 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_pickmode.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_pickmode.c @@ -41,6 +41,17 @@ typedef struct { int in_use; } PRED_BUFFER; +typedef struct { + PRED_BUFFER *best_pred; + PREDICTION_MODE best_mode; + TX_SIZE best_tx_size; + TX_SIZE best_intra_tx_size; + MV_REFERENCE_FRAME best_ref_frame; + MV_REFERENCE_FRAME best_second_ref_frame; + uint8_t best_mode_skip_txfm; + INTERP_FILTER best_pred_filter; +} BEST_PICKMODE; + static const int pos_shift_16x16[4][4] = { { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 } }; @@ -222,13 +233,22 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } if (rv && search_subpel) { - int subpel_force_stop = cpi->sf.mv.subpel_force_stop; - if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = 2; + SUBPEL_FORCE_STOP subpel_force_stop = cpi->sf.mv.subpel_force_stop; + if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = HALF_PEL; + if (cpi->sf.mv.enable_adaptive_subpel_force_stop) { + const int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh; + if (abs(tmp_mv->as_mv.row) >= mv_thresh || + abs(tmp_mv->as_mv.col) >= mv_thresh) + subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_above; + else + subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_below; + } cpi->find_fractional_mv_step( x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0); + cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), + x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0, + cpi->sf.use_accurate_subpel_search); *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } @@ -326,6 +346,82 @@ static int ac_thr_factor(const int speed, const int width, const int height, return 1; } +static TX_SIZE calculate_tx_size(VP9_COMP *const cpi, BLOCK_SIZE bsize, + MACROBLOCKD *const xd, unsigned int var, + unsigned int sse, int64_t ac_thr, + unsigned int source_variance, int is_intra) { + // TODO(marpan): Tune selection for intra-modes, screen content, etc. + TX_SIZE tx_size; + unsigned int var_thresh = is_intra ? (unsigned int)ac_thr : 1; + int limit_tx = 1; + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && + (source_variance == 0 || var < var_thresh)) + limit_tx = 0; + if (cpi->common.tx_mode == TX_MODE_SELECT) { + if (sse > (var << 2)) + tx_size = VPXMIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); + else + tx_size = TX_8X8; + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && limit_tx && + cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id)) + tx_size = TX_8X8; + else if (tx_size > TX_16X16 && limit_tx) + tx_size = TX_16X16; + // For screen-content force 4X4 tx_size over 8X8, for large variance. + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && tx_size == TX_8X8 && + bsize <= BLOCK_16X16 && ((var >> 5) > (unsigned int)ac_thr)) + tx_size = TX_4X4; + } else { + tx_size = VPXMIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); + } + return tx_size; +} + +static void compute_intra_yprediction(PREDICTION_MODE mode, BLOCK_SIZE bsize, + MACROBLOCK *x, MACROBLOCKD *xd) { + struct macroblockd_plane *const pd = &xd->plane[0]; + struct macroblock_plane *const p = &x->plane[0]; + uint8_t *const src_buf_base = p->src.buf; + uint8_t *const dst_buf_base = pd->dst.buf; + const int src_stride = p->src.stride; + const int dst_stride = pd->dst.stride; + // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") + // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 + const TX_SIZE tx_size = max_txsize_lookup[bsize]; + const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; + int row, col; + // If mb_to_right_edge is < 0 we are in a situation in which + // the current block size extends into the UMV and we won't + // visit the sub blocks that are wholly within the UMV. + const int max_blocks_wide = + num_4x4_w + (xd->mb_to_right_edge >= 0 + ? 0 + : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = + num_4x4_h + (xd->mb_to_bottom_edge >= 0 + ? 0 + : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + // Keep track of the row and column of the blocks we use so that we know + // if we are in the unrestricted motion border. + for (row = 0; row < max_blocks_high; row += (1 << tx_size)) { + // Skip visiting the sub blocks that are wholly within the UMV. + for (col = 0; col < max_blocks_wide; col += (1 << tx_size)) { + p->src.buf = &src_buf_base[4 * (row * (int64_t)src_stride + col)]; + pd->dst.buf = &dst_buf_base[4 * (row * (int64_t)dst_stride + col)]; + vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, + x->skip_encode ? p->src.buf : pd->dst.buf, + x->skip_encode ? src_stride : dst_stride, + pd->dst.buf, dst_stride, col, row, 0); + } + } + p->src.buf = src_buf_base; + pd->dst.buf = dst_buf_base; +} + static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, int *out_rate_sum, int64_t *out_dist_sum, @@ -342,7 +438,7 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, struct macroblockd_plane *const pd = &xd->plane[0]; const uint32_t dc_quant = pd->dequant[0]; const uint32_t ac_quant = pd->dequant[1]; - const int64_t dc_thr = dc_quant * dc_quant >> 6; + int64_t dc_thr = dc_quant * dc_quant >> 6; int64_t ac_thr = ac_quant * ac_quant >> 6; unsigned int var; int sum; @@ -386,26 +482,17 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, cpi->common.height, abs(sum) >> (bw + bh)); #endif - if (cpi->common.tx_mode == TX_MODE_SELECT) { - if (sse > (var << 2)) - tx_size = VPXMIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - else - tx_size = TX_8X8; - - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && - cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id)) - tx_size = TX_8X8; - else if (tx_size > TX_16X16) - tx_size = TX_16X16; - } else { - tx_size = VPXMIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - } - - assert(tx_size >= TX_8X8); + tx_size = calculate_tx_size(cpi, bsize, xd, var, sse, ac_thr, + x->source_variance, 0); + // The code below for setting skip flag assumes tranform size of at least 8x8, + // so force this lower limit on transform. + if (tx_size < TX_8X8) tx_size = TX_8X8; xd->mi[0]->tx_size = tx_size; + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && x->zero_temp_sad_source && + x->source_variance == 0) + dc_thr = dc_thr << 1; + // Evaluate if the partition block is a skippable block in Y plane. { unsigned int sse16x16[16] = { 0 }; @@ -473,33 +560,29 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, // Transform skipping test in UV planes. for (i = 1; i <= 2; i++) { - if (cpi->oxcf.speed < 8 || x->color_sensitivity[i - 1]) { - struct macroblock_plane *const p = &x->plane[i]; - struct macroblockd_plane *const pd = &xd->plane[i]; - const TX_SIZE uv_tx_size = get_uv_tx_size(xd->mi[0], pd); - const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size]; - const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd); - const int uv_bw = b_width_log2_lookup[uv_bsize]; - const int uv_bh = b_height_log2_lookup[uv_bsize]; - const int sf = (uv_bw - b_width_log2_lookup[unit_size]) + - (uv_bh - b_height_log2_lookup[unit_size]); - const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf); - const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf); - int j = i - 1; + struct macroblock_plane *const p = &x->plane[i]; + struct macroblockd_plane *const pd = &xd->plane[i]; + const TX_SIZE uv_tx_size = get_uv_tx_size(xd->mi[0], pd); + const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size]; + const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd); + const int uv_bw = b_width_log2_lookup[uv_bsize]; + const int uv_bh = b_height_log2_lookup[uv_bsize]; + const int sf = (uv_bw - b_width_log2_lookup[unit_size]) + + (uv_bh - b_height_log2_lookup[unit_size]); + const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf); + const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf); + int j = i - 1; - vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i); - flag_preduv_computed[i - 1] = 1; - var_uv[j] = cpi->fn_ptr[uv_bsize].vf( - p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse_uv[j]); + vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i); + flag_preduv_computed[i - 1] = 1; + var_uv[j] = cpi->fn_ptr[uv_bsize].vf( + p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse_uv[j]); - if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) && - (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j])) - skip_uv[j] = 1; - else - break; - } else { - skip_uv[i - 1] = 1; - } + if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) && + (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j])) + skip_uv[j] = 1; + else + break; } // If the transform in YUV planes are skippable, the mode search checks @@ -543,7 +626,7 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, int *out_rate_sum, int64_t *out_dist_sum, unsigned int *var_y, - unsigned int *sse_y) { + unsigned int *sse_y, int is_intra) { // Note our transform coeffs are 8 times an orthogonal transform. // Hence quantizer step is also 8 times. To get effective quantizer // we need to divide by 8 before sending to modeling function. @@ -563,24 +646,8 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, *var_y = var; *sse_y = sse; - if (cpi->common.tx_mode == TX_MODE_SELECT) { - if (sse > (var << 2)) - xd->mi[0]->tx_size = - VPXMIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - else - xd->mi[0]->tx_size = TX_8X8; - - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && - cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id)) - xd->mi[0]->tx_size = TX_8X8; - else if (xd->mi[0]->tx_size > TX_16X16) - xd->mi[0]->tx_size = TX_16X16; - } else { - xd->mi[0]->tx_size = - VPXMIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - } + xd->mi[0]->tx_size = calculate_tx_size(cpi, bsize, xd, var, sse, ac_thr, + x->source_variance, is_intra); // Evaluate if the partition block is a skippable block in Y plane. { @@ -641,7 +708,7 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, int *skippable, int64_t *sse, BLOCK_SIZE bsize, - TX_SIZE tx_size, int rd_computed) { + TX_SIZE tx_size, int rd_computed, int is_intra) { MACROBLOCKD *xd = &x->e_mbd; const struct macroblockd_plane *pd = &xd->plane[0]; struct macroblock_plane *const p = &x->plane[0]; @@ -658,25 +725,6 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; -#if CONFIG_VP9_HIGHBITDEPTH - // TODO(jingning): Implement the high bit-depth Hadamard transforms and - // remove this check condition. - // TODO(marpan): Use this path (model_rd) for 8bit under certain conditions - // for now, as the vp9_quantize_fp below for highbitdepth build is slow. - if (xd->bd != 8 || - (cpi->oxcf.speed > 5 && cpi->common.frame_type != KEY_FRAME && - bsize < BLOCK_32X32)) { - unsigned int var_y, sse_y; - (void)tx_size; - if (!rd_computed) - model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, - &var_y, &sse_y); - *sse = INT_MAX; - *skippable = 0; - return; - } -#endif - if (cpi->sf.use_simple_block_yrd && cpi->common.frame_type != KEY_FRAME && (bsize < BLOCK_32X32 || (cpi->use_svc && @@ -685,7 +733,7 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, (void)tx_size; if (!rd_computed) model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, - &var_y, &sse_y); + &var_y, &sse_y, is_intra); *sse = INT_MAX; *skippable = 0; return; @@ -695,9 +743,19 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, // The max tx_size passed in is TX_16X16. assert(tx_size != TX_32X32); - +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, + p->src.stride, pd->dst.buf, pd->dst.stride, + x->e_mbd.bd); + } else { + vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride); + } +#else vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride); +#endif *skippable = 1; // Keep track of the row and column of the blocks we use so that we know // if we are in the unrestricted motion border. @@ -726,13 +784,13 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - default: assert(0); break; } *skippable &= (*eob == 0); eob_cost += 1; @@ -876,6 +934,7 @@ static void encode_breakout_test( // Skipping threshold for dc. unsigned int thresh_dc; int motion_low = 1; + if (cpi->use_svc && ref_frame == GOLDEN_FRAME) return; if (mi->mv[0].as_mv.row > 64 || mi->mv[0].as_mv.row < -64 || mi->mv[0].as_mv.col > 64 || mi->mv[0].as_mv.col < -64) @@ -981,8 +1040,8 @@ static void estimate_block_intra(int plane, int block, int row, int col, VP9_COMP *const cpi = args->cpi; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; - struct macroblock_plane *const p = &x->plane[0]; - struct macroblockd_plane *const pd = &xd->plane[0]; + struct macroblock_plane *const p = &x->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; const BLOCK_SIZE bsize_tx = txsize_to_bsize[tx_size]; uint8_t *const src_buf_base = p->src.buf; uint8_t *const dst_buf_base = pd->dst.buf; @@ -992,8 +1051,8 @@ static void estimate_block_intra(int plane, int block, int row, int col, (void)block; - p->src.buf = &src_buf_base[4 * (row * src_stride + col)]; - pd->dst.buf = &dst_buf_base[4 * (row * dst_stride + col)]; + p->src.buf = &src_buf_base[4 * (row * (int64_t)src_stride + col)]; + pd->dst.buf = &dst_buf_base[4 * (row * (int64_t)dst_stride + col)]; // Use source buffer as an approximation for the fully reconstructed buffer. vp9_predict_intra_block(xd, b_width_log2_lookup[plane_bsize], tx_size, args->mode, x->skip_encode ? p->src.buf : pd->dst.buf, @@ -1002,13 +1061,12 @@ static void estimate_block_intra(int plane, int block, int row, int col, if (plane == 0) { int64_t this_sse = INT64_MAX; - // TODO(jingning): This needs further refactoring. block_yrd(cpi, x, &this_rdc, &args->skippable, &this_sse, bsize_tx, - VPXMIN(tx_size, TX_16X16), 0); + VPXMIN(tx_size, TX_16X16), 0, 1); } else { unsigned int var = 0; unsigned int sse = 0; - model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &this_rdc, &var, &sse, plane, + model_rd_for_sb_uv(cpi, bsize_tx, x, xd, &this_rdc, &var, &sse, plane, plane); } @@ -1292,18 +1350,16 @@ static void vp9_pickmode_ctx_den_update( VP9_PICKMODE_CTX_DEN *ctx_den, int64_t zero_last_cost_orig, int ref_frame_cost[MAX_REF_FRAMES], int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int reuse_inter_pred, - TX_SIZE best_tx_size, PREDICTION_MODE best_mode, - MV_REFERENCE_FRAME best_ref_frame, INTERP_FILTER best_pred_filter, - uint8_t best_mode_skip_txfm) { + BEST_PICKMODE *bp) { ctx_den->zero_last_cost_orig = zero_last_cost_orig; ctx_den->ref_frame_cost = ref_frame_cost; ctx_den->frame_mv = frame_mv; ctx_den->reuse_inter_pred = reuse_inter_pred; - ctx_den->best_tx_size = best_tx_size; - ctx_den->best_mode = best_mode; - ctx_den->best_ref_frame = best_ref_frame; - ctx_den->best_pred_filter = best_pred_filter; - ctx_den->best_mode_skip_txfm = best_mode_skip_txfm; + ctx_den->best_tx_size = bp->best_tx_size; + ctx_den->best_mode = bp->best_mode; + ctx_den->best_ref_frame = bp->best_ref_frame; + ctx_den->best_pred_filter = bp->best_pred_filter; + ctx_den->best_mode_skip_txfm = bp->best_mode_skip_txfm; } static void recheck_zeromv_after_denoising( @@ -1322,6 +1378,7 @@ static void recheck_zeromv_after_denoising( cpi->svc.number_spatial_layers == 1 && decision == FILTER_ZEROMV_BLOCK))) { // Check if we should pick ZEROMV on denoised signal. + VP9_COMMON *const cm = &cpi->common; int rate = 0; int64_t dist = 0; uint32_t var_y = UINT_MAX; @@ -1330,11 +1387,13 @@ static void recheck_zeromv_after_denoising( mi->mode = ZEROMV; mi->ref_frame[0] = LAST_FRAME; mi->ref_frame[1] = NONE; + set_ref_ptrs(cm, xd, mi->ref_frame[0], NONE); mi->mv[0].as_int = 0; mi->interp_filter = EIGHTTAP; + if (cpi->sf.default_interp_filter == BILINEAR) mi->interp_filter = BILINEAR; xd->plane[0].pre[0] = yv12_mb[LAST_FRAME][0]; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); - model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y); + model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y, 0); this_rdc.rate = rate + ctx_den->ref_frame_cost[LAST_FRAME] + cpi->inter_mode_cost[x->mbmi_ext->mode_context[LAST_FRAME]] [INTER_OFFSET(ZEROMV)]; @@ -1346,6 +1405,7 @@ static void recheck_zeromv_after_denoising( this_rdc = *best_rdc; mi->mode = ctx_den->best_mode; mi->ref_frame[0] = ctx_den->best_ref_frame; + set_ref_ptrs(cm, xd, mi->ref_frame[0], NONE); mi->interp_filter = ctx_den->best_pred_filter; if (ctx_den->best_ref_frame == INTRA_FRAME) { mi->mv[0].as_int = INVALID_MV; @@ -1416,27 +1476,223 @@ static INLINE int get_force_skip_low_temp_var(uint8_t *variance_low, int mi_row, return force_skip_low_temp_var; } +static void search_filter_ref(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, + int mi_row, int mi_col, PRED_BUFFER *tmp, + BLOCK_SIZE bsize, int reuse_inter_pred, + PRED_BUFFER **this_mode_pred, unsigned int *var_y, + unsigned int *sse_y, int force_smooth_filter, + int *this_early_term, int *flag_preduv_computed, + int use_model_yrd_large) { + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *const mi = xd->mi[0]; + struct macroblockd_plane *const pd = &xd->plane[0]; + const int bw = num_4x4_blocks_wide_lookup[bsize] << 2; + + int pf_rate[3] = { 0 }; + int64_t pf_dist[3] = { 0 }; + int curr_rate[3] = { 0 }; + unsigned int pf_var[3] = { 0 }; + unsigned int pf_sse[3] = { 0 }; + TX_SIZE pf_tx_size[3] = { 0 }; + int64_t best_cost = INT64_MAX; + INTERP_FILTER best_filter = SWITCHABLE, filter; + PRED_BUFFER *current_pred = *this_mode_pred; + uint8_t skip_txfm = SKIP_TXFM_NONE; + int best_early_term = 0; + int best_flag_preduv_computed[2] = { 0 }; + INTERP_FILTER filter_start = force_smooth_filter ? EIGHTTAP_SMOOTH : EIGHTTAP; + INTERP_FILTER filter_end = EIGHTTAP_SMOOTH; + for (filter = filter_start; filter <= filter_end; ++filter) { + int64_t cost; + mi->interp_filter = filter; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + // For large partition blocks, extra testing is done. + if (use_model_yrd_large) + model_rd_for_sb_y_large(cpi, bsize, x, xd, &pf_rate[filter], + &pf_dist[filter], &pf_var[filter], + &pf_sse[filter], mi_row, mi_col, this_early_term, + flag_preduv_computed); + else + model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter], + &pf_var[filter], &pf_sse[filter], 0); + curr_rate[filter] = pf_rate[filter]; + pf_rate[filter] += vp9_get_switchable_rate(cpi, xd); + cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]); + pf_tx_size[filter] = mi->tx_size; + if (cost < best_cost) { + best_filter = filter; + best_cost = cost; + skip_txfm = x->skip_txfm[0]; + best_early_term = *this_early_term; + best_flag_preduv_computed[0] = flag_preduv_computed[0]; + best_flag_preduv_computed[1] = flag_preduv_computed[1]; + + if (reuse_inter_pred) { + if (*this_mode_pred != current_pred) { + free_pred_buffer(*this_mode_pred); + *this_mode_pred = current_pred; + } + if (filter != filter_end) { + current_pred = &tmp[get_pred_buffer(tmp, 3)]; + pd->dst.buf = current_pred->data; + pd->dst.stride = bw; + } + } + } + } + + if (reuse_inter_pred && *this_mode_pred != current_pred) + free_pred_buffer(current_pred); + + mi->interp_filter = best_filter; + mi->tx_size = pf_tx_size[best_filter]; + this_rdc->rate = curr_rate[best_filter]; + this_rdc->dist = pf_dist[best_filter]; + *var_y = pf_var[best_filter]; + *sse_y = pf_sse[best_filter]; + x->skip_txfm[0] = skip_txfm; + *this_early_term = best_early_term; + flag_preduv_computed[0] = best_flag_preduv_computed[0]; + flag_preduv_computed[1] = best_flag_preduv_computed[1]; + if (reuse_inter_pred) { + pd->dst.buf = (*this_mode_pred)->data; + pd->dst.stride = (*this_mode_pred)->stride; + } else if (best_filter < filter_end) { + mi->interp_filter = best_filter; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + } +} + +static int search_new_mv(VP9_COMP *cpi, MACROBLOCK *x, + int_mv frame_mv[][MAX_REF_FRAMES], + MV_REFERENCE_FRAME ref_frame, int gf_temporal_ref, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int best_pred_sad, int *rate_mv, + unsigned int best_sse_sofar, RD_COST *best_rdc) { + SVC *const svc = &cpi->svc; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *const mi = xd->mi[0]; + SPEED_FEATURES *const sf = &cpi->sf; + + if (ref_frame > LAST_FRAME && gf_temporal_ref && + cpi->oxcf.rc_mode == VPX_CBR) { + int tmp_sad; + uint32_t dis; + int cost_list[5] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX }; + + if (bsize < BLOCK_16X16) return -1; + + tmp_sad = vp9_int_pro_motion_estimation( + cpi, x, bsize, mi_row, mi_col, + &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv); + + if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) return -1; + if (tmp_sad + (num_pels_log2_lookup[bsize] << 4) > best_pred_sad) return -1; + + frame_mv[NEWMV][ref_frame].as_int = mi->mv[0].as_int; + *rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv, + &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + frame_mv[NEWMV][ref_frame].as_mv.row >>= 3; + frame_mv[NEWMV][ref_frame].as_mv.col >>= 3; + + cpi->find_fractional_mv_step( + x, &frame_mv[NEWMV][ref_frame].as_mv, + &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv, + cpi->common.allow_high_precision_mv, x->errorperbit, + &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, + cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), + x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0, + cpi->sf.use_accurate_subpel_search); + } else if (svc->use_base_mv && svc->spatial_layer_id) { + if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV) { + const int pre_stride = xd->plane[0].pre[0].stride; + unsigned int base_mv_sse = UINT_MAX; + int scale = (cpi->rc.avg_frame_low_motion > 60) ? 2 : 4; + const uint8_t *const pre_buf = + xd->plane[0].pre[0].buf + + (frame_mv[NEWMV][ref_frame].as_mv.row >> 3) * pre_stride + + (frame_mv[NEWMV][ref_frame].as_mv.col >> 3); + cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, + pre_buf, pre_stride, &base_mv_sse); + + // Exit NEWMV search if base_mv is (0,0) && bsize < BLOCK_16x16, + // for SVC encoding. + if (cpi->use_svc && svc->use_base_mv && bsize < BLOCK_16X16 && + frame_mv[NEWMV][ref_frame].as_mv.row == 0 && + frame_mv[NEWMV][ref_frame].as_mv.col == 0) + return -1; + + // Exit NEWMV search if base_mv_sse is large. + if (sf->base_mv_aggressive && base_mv_sse > (best_sse_sofar << scale)) + return -1; + if (base_mv_sse < (best_sse_sofar << 1)) { + // Base layer mv is good. + // Exit NEWMV search if the base_mv is (0, 0) and sse is low, since + // (0, 0) mode is already tested. + unsigned int base_mv_sse_normalized = + base_mv_sse >> + (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); + if (sf->base_mv_aggressive && base_mv_sse <= best_sse_sofar && + base_mv_sse_normalized < 400 && + frame_mv[NEWMV][ref_frame].as_mv.row == 0 && + frame_mv[NEWMV][ref_frame].as_mv.col == 0) + return -1; + if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame], rate_mv, + best_rdc->rdcost, 1)) { + return -1; + } + } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame], rate_mv, + best_rdc->rdcost, 0)) { + return -1; + } + } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame], rate_mv, + best_rdc->rdcost, 0)) { + return -1; + } + } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame], rate_mv, + best_rdc->rdcost, 0)) { + return -1; + } + + return 0; +} + +static INLINE void init_best_pickmode(BEST_PICKMODE *bp) { + bp->best_mode = ZEROMV; + bp->best_ref_frame = LAST_FRAME; + bp->best_tx_size = TX_SIZES; + bp->best_intra_tx_size = TX_SIZES; + bp->best_pred_filter = EIGHTTAP; + bp->best_mode_skip_txfm = SKIP_TXFM_NONE; + bp->best_second_ref_frame = NONE; + bp->best_pred = NULL; +} + void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { VP9_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; - const SVC *const svc = &cpi->svc; + SVC *const svc = &cpi->svc; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; struct macroblockd_plane *const pd = &xd->plane[0]; - PREDICTION_MODE best_mode = ZEROMV; - MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME; + + BEST_PICKMODE best_pickmode; + + MV_REFERENCE_FRAME ref_frame; MV_REFERENCE_FRAME usable_ref_frame, second_ref_frame; - TX_SIZE best_tx_size = TX_SIZES; - INTERP_FILTER best_pred_filter = EIGHTTAP; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; uint8_t mode_checked[MB_MODE_COUNT][MAX_REF_FRAMES]; struct buf_2d yv12_mb[4][MAX_MB_PLANE]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; RD_COST this_rdc, best_rdc; - uint8_t skip_txfm = SKIP_TXFM_NONE, best_mode_skip_txfm = SKIP_TXFM_NONE; // var_y and sse_y are saved to be used in skipping checking unsigned int var_y = UINT_MAX; unsigned int sse_y = UINT_MAX; @@ -1451,15 +1707,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, (cpi->sf.adaptive_rd_thresh_row_mt) ? &(tile_data->row_base_thresh_freq_fact[thresh_freq_fact_idx]) : tile_data->thresh_freq_fact[bsize]; - +#if CONFIG_VP9_TEMPORAL_DENOISING + const int denoise_recheck_zeromv = 1; +#endif INTERP_FILTER filter_ref; - const int bsl = mi_width_log2_lookup[bsize]; - const int pred_filter_search = - cm->interp_filter == SWITCHABLE - ? (((mi_row + mi_col) >> bsl) + - get_chessboard_index(cm->current_video_frame)) & - 0x1 - : 0; + int pred_filter_search = cm->interp_filter == SWITCHABLE; int const_motion[MAX_REF_FRAMES] = { 0 }; const int bh = num_4x4_blocks_high_lookup[bsize] << 2; const int bw = num_4x4_blocks_wide_lookup[bsize] << 2; @@ -1467,12 +1719,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // process. // tmp[3] points to dst buffer, and the other 3 point to allocated buffers. PRED_BUFFER tmp[4]; - DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64] VPX_UNINITIALIZED); #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64]); + DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64] VPX_UNINITIALIZED); #endif struct buf_2d orig_dst = pd->dst; - PRED_BUFFER *best_pred = NULL; PRED_BUFFER *this_mode_pred = NULL; const int pixels_in_block = bh * bw; int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready; @@ -1488,22 +1739,84 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, int skip_ref_find_pred[4] = { 0 }; unsigned int sse_zeromv_normalized = UINT_MAX; unsigned int best_sse_sofar = UINT_MAX; - unsigned int thresh_svc_skip_golden = 500; + int gf_temporal_ref = 0; + int force_test_gf_zeromv = 0; #if CONFIG_VP9_TEMPORAL_DENOISING VP9_PICKMODE_CTX_DEN ctx_den; int64_t zero_last_cost_orig = INT64_MAX; int denoise_svc_pickmode = 1; #endif INTERP_FILTER filter_gf_svc = EIGHTTAP; - MV_REFERENCE_FRAME best_second_ref_frame = NONE; + MV_REFERENCE_FRAME inter_layer_ref = GOLDEN_FRAME; + const struct segmentation *const seg = &cm->seg; int comp_modes = 0; int num_inter_modes = (cpi->use_svc) ? RT_INTER_MODES_SVC : RT_INTER_MODES; int flag_svc_subpel = 0; int svc_mv_col = 0; int svc_mv_row = 0; + int no_scaling = 0; + int large_block = 0; + int use_model_yrd_large = 0; + unsigned int thresh_svc_skip_golden = 500; + unsigned int thresh_skip_golden = 500; + int force_smooth_filter = cpi->sf.force_smooth_interpol; + int scene_change_detected = + cpi->rc.high_source_sad || + (cpi->use_svc && cpi->svc.high_source_sad_superframe); + + init_best_pickmode(&best_pickmode); + + x->encode_breakout = seg->enabled + ? cpi->segment_encode_breakout[mi->segment_id] + : cpi->encode_breakout; + + x->source_variance = UINT_MAX; + if (cpi->sf.default_interp_filter == BILINEAR) { + best_pickmode.best_pred_filter = BILINEAR; + filter_gf_svc = BILINEAR; + } + if (cpi->use_svc && svc->spatial_layer_id > 0) { + int layer = + LAYER_IDS_TO_IDX(svc->spatial_layer_id - 1, svc->temporal_layer_id, + svc->number_temporal_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + if (lc->scaling_factor_num == lc->scaling_factor_den) no_scaling = 1; + } + if (svc->spatial_layer_id > 0 && + (svc->high_source_sad_superframe || no_scaling)) + thresh_svc_skip_golden = 0; + // Lower the skip threshold if lower spatial layer is better quality relative + // to current layer. + else if (svc->spatial_layer_id > 0 && cm->base_qindex > 150 && + cm->base_qindex > svc->lower_layer_qindex + 15) + thresh_svc_skip_golden = 100; + // Increase skip threshold if lower spatial layer is lower quality relative + // to current layer. + else if (svc->spatial_layer_id > 0 && cm->base_qindex < 140 && + cm->base_qindex < svc->lower_layer_qindex - 20) + thresh_svc_skip_golden = 1000; + + if (!cpi->use_svc || + (svc->use_gf_temporal_ref_current_layer && + !svc->layer_context[svc->temporal_layer_id].is_key_frame)) { + struct scale_factors *const sf_last = &cm->frame_refs[LAST_FRAME - 1].sf; + struct scale_factors *const sf_golden = + &cm->frame_refs[GOLDEN_FRAME - 1].sf; + gf_temporal_ref = 1; + // For temporal long term prediction, check that the golden reference + // is same scale as last reference, otherwise disable. + if ((sf_last->x_scale_fp != sf_golden->x_scale_fp) || + (sf_last->y_scale_fp != sf_golden->y_scale_fp)) { + gf_temporal_ref = 0; + } else { + if (cpi->rc.avg_frame_low_motion > 70) + thresh_svc_skip_golden = 500; + else + thresh_svc_skip_golden = 0; + } + } init_ref_frame_cost(cm, xd, ref_frame_cost); - memset(&mode_checked[0][0], 0, MB_MODE_COUNT * MAX_REF_FRAMES); if (reuse_inter_pred) { @@ -1528,16 +1841,25 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; x->skip = 0; + if (cpi->sf.cb_pred_filter_search) { + const int bsl = mi_width_log2_lookup[bsize]; + pred_filter_search = cm->interp_filter == SWITCHABLE + ? (((mi_row + mi_col) >> bsl) + + get_chessboard_index(cm->current_video_frame)) & + 0x1 + : 0; + } // Instead of using vp9_get_pred_context_switchable_interp(xd) to assign // filter_ref, we use a less strict condition on assigning filter_ref. // This is to reduce the probabily of entering the flow of not assigning // filter_ref and then skip filter search. - if (xd->above_mi && is_inter_block(xd->above_mi)) - filter_ref = xd->above_mi->interp_filter; - else if (xd->left_mi && is_inter_block(xd->left_mi)) - filter_ref = xd->left_mi->interp_filter; - else - filter_ref = cm->interp_filter; + filter_ref = cm->interp_filter; + if (cpi->sf.default_interp_filter != BILINEAR) { + if (xd->above_mi && is_inter_block(xd->above_mi)) + filter_ref = xd->above_mi->interp_filter; + else if (xd->left_mi && is_inter_block(xd->left_mi)) + filter_ref = xd->left_mi->interp_filter; + } // initialize mode decisions vp9_rd_cost_reset(&best_rdc); @@ -1558,23 +1880,24 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, #endif // CONFIG_VP9_HIGHBITDEPTH x->source_variance = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); + + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && + cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && mi->segment_id > 0 && + x->zero_temp_sad_source && x->source_variance == 0) { + mi->segment_id = 0; + vp9_init_plane_quantizers(cpi, x); + } } #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0) { - if (cpi->use_svc) { - int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, - cpi->svc.temporal_layer_id, - cpi->svc.number_temporal_layers); - LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; - denoise_svc_pickmode = denoise_svc(cpi) && !lc->is_key_frame; - } + if (cpi->use_svc) denoise_svc_pickmode = vp9_denoise_svc_non_key(cpi); if (cpi->denoiser.denoising_level > kDenLowLow && denoise_svc_pickmode) vp9_denoiser_reset_frame_stats(ctx); } #endif - if (cpi->rc.frames_since_golden == 0 && !cpi->use_svc && + if (cpi->rc.frames_since_golden == 0 && gf_temporal_ref && !cpi->rc.alt_ref_gf_group && !cpi->rc.last_frame_is_src_altref) { usable_ref_frame = LAST_FRAME; } else { @@ -1601,14 +1924,20 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // For svc mode, on spatial_layer_id > 0: if the reference has different scale // constrain the inter mode to only test zero motion. if (cpi->use_svc && svc->force_zero_mode_spatial_ref && - cpi->svc.spatial_layer_id > 0) { + svc->spatial_layer_id > 0 && !gf_temporal_ref) { if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) { struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf; - if (vp9_is_scaled(sf)) svc_force_zero_mode[LAST_FRAME - 1] = 1; + if (vp9_is_scaled(sf)) { + svc_force_zero_mode[LAST_FRAME - 1] = 1; + inter_layer_ref = LAST_FRAME; + } } if (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) { struct scale_factors *const sf = &cm->frame_refs[GOLDEN_FRAME - 1].sf; - if (vp9_is_scaled(sf)) svc_force_zero_mode[GOLDEN_FRAME - 1] = 1; + if (vp9_is_scaled(sf)) { + svc_force_zero_mode[GOLDEN_FRAME - 1] = 1; + inter_layer_ref = GOLDEN_FRAME; + } } } @@ -1624,6 +1953,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } } + if (sf->disable_golden_ref && (x->content_state_sb != kVeryHighSad || + cpi->rc.avg_frame_low_motion < 60)) + usable_ref_frame = LAST_FRAME; + if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) && !svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var)) use_golden_nonzeromv = 0; @@ -1638,7 +1971,21 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, cpi->sf.use_compound_nonrd_pickmode && usable_ref_frame == ALTREF_FRAME) comp_modes = 2; + // If the segment reference frame feature is enabled and it's set to GOLDEN + // reference, then make sure we don't skip checking GOLDEN, this is to + // prevent possibility of not picking any mode. + if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) && + get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) == GOLDEN_FRAME) { + usable_ref_frame = GOLDEN_FRAME; + skip_ref_find_pred[GOLDEN_FRAME] = 0; + thresh_svc_skip_golden = 0; + } + for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) { + // Skip find_predictor if the reference frame is not in the + // ref_frame_flags (i.e., not used as a reference for this frame). + skip_ref_find_pred[ref_frame] = + !(cpi->ref_frame_flags & flag_list[ref_frame]); if (!skip_ref_find_pred[ref_frame]) { find_predictors(cpi, x, ref_frame, frame_mv, const_motion, &ref_frame_skip_mask, flag_list, tile_data, mi_row, @@ -1652,16 +1999,37 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // Set the flag_svc_subpel to 1 for SVC if the lower spatial layer used // an averaging filter for downsampling (phase = 8). If so, we will test - // a nonzero motion mode on the spatial (goldeen) reference. + // a nonzero motion mode on the spatial reference. // The nonzero motion is half pixel shifted to left and top (-4, -4). - if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 && - svc_force_zero_mode[GOLDEN_FRAME - 1] && - cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id - 1] == 8) { + if (cpi->use_svc && svc->spatial_layer_id > 0 && + svc_force_zero_mode[inter_layer_ref - 1] && + svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8 && + !gf_temporal_ref) { svc_mv_col = -4; svc_mv_row = -4; flag_svc_subpel = 1; } + // For SVC with quality layers, when QP of lower layer is lower + // than current layer: force check of GF-ZEROMV before early exit + // due to skip flag. + if (svc->spatial_layer_id > 0 && no_scaling && + (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) && + cm->base_qindex > svc->lower_layer_qindex + 10) + force_test_gf_zeromv = 1; + + // For low motion content use x->sb_is_skin in addition to VeryHighSad + // for setting large_block. + large_block = (x->content_state_sb == kVeryHighSad || + (x->sb_is_skin && cpi->rc.avg_frame_low_motion > 70) || + cpi->oxcf.speed < 7) + ? bsize > BLOCK_32X32 + : bsize >= BLOCK_32X32; + use_model_yrd_large = + cpi->oxcf.rc_mode == VPX_CBR && large_block && + !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) && + cm->base_qindex; + for (idx = 0; idx < num_inter_modes + comp_modes; ++idx) { int rate_mv = 0; int mode_rd_thresh; @@ -1675,7 +2043,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, int inter_mv_mode = 0; int skip_this_mv = 0; int comp_pred = 0; - int force_gf_mv = 0; + int force_mv_inter_layer = 0; PREDICTION_MODE this_mode; second_ref_frame = NONE; @@ -1699,8 +2067,19 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (ref_frame > usable_ref_frame) continue; if (skip_ref_find_pred[ref_frame]) continue; - if (flag_svc_subpel && ref_frame == GOLDEN_FRAME) { - force_gf_mv = 1; + if (svc->previous_frame_is_intra_only) { + if (ref_frame != LAST_FRAME || frame_mv[this_mode][ref_frame].as_int != 0) + continue; + } + + // If the segment reference frame feature is enabled then do nothing if the + // current ref frame is not allowed. + if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) && + get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) + continue; + + if (flag_svc_subpel && ref_frame == inter_layer_ref) { + force_mv_inter_layer = 1; // Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row), // otherwise set NEWMV to (svc_mv_col, svc_mv_row). if (this_mode == NEWMV) { @@ -1713,7 +2092,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } if (comp_pred) { - const struct segmentation *const seg = &cm->seg; if (!cpi->allow_comp_inter_inter) continue; // Skip compound inter modes if ARF is not available. if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue; @@ -1722,15 +2100,33 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) continue; } - // For SVC, skip the golden (spatial) reference search if sse of zeromv_last - // is below threshold. - if (cpi->use_svc && ref_frame == GOLDEN_FRAME && - sse_zeromv_normalized < thresh_svc_skip_golden) + // For CBR mode: skip the golden reference search if sse of zeromv_last is + // below threshold. + if (ref_frame == GOLDEN_FRAME && cpi->oxcf.rc_mode == VPX_CBR && + ((cpi->use_svc && sse_zeromv_normalized < thresh_svc_skip_golden) || + (!cpi->use_svc && sse_zeromv_normalized < thresh_skip_golden))) continue; - if (sf->short_circuit_flat_blocks && x->source_variance == 0 && - this_mode != NEARESTMV) { - continue; + if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; + + // For screen content. If zero_temp_sad source is computed: skip + // non-zero motion check for stationary blocks. If the superblock is + // non-stationary then for flat blocks skip the zero last check (keep golden + // as it may be inter-layer reference). Otherwise (if zero_temp_sad_source + // is not computed) skip non-zero motion check for flat blocks. + // TODO(marpan): Compute zero_temp_sad_source per coding block. + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) { + if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) { + if ((frame_mv[this_mode][ref_frame].as_int != 0 && + x->zero_temp_sad_source) || + (frame_mv[this_mode][ref_frame].as_int == 0 && + x->source_variance == 0 && ref_frame == LAST_FRAME && + !x->zero_temp_sad_source)) + continue; + } else if (frame_mv[this_mode][ref_frame].as_int != 0 && + x->source_variance == 0) { + continue; + } } if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) continue; @@ -1759,14 +2155,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, continue; } - if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; - if (const_motion[ref_frame] && this_mode == NEARMV) continue; // Skip non-zeromv mode search for golden frame if force_skip_low_temp_var // is set. If nearestmv for golden frame is 0, zeromv mode will be skipped // later. - if (!force_gf_mv && force_skip_low_temp_var && ref_frame == GOLDEN_FRAME && + if (!force_mv_inter_layer && force_skip_low_temp_var && + ref_frame == GOLDEN_FRAME && frame_mv[this_mode][ref_frame].as_int != 0) { continue; } @@ -1780,34 +2175,39 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } if (cpi->use_svc) { - if (!force_gf_mv && svc_force_zero_mode[ref_frame - 1] && + if (!force_mv_inter_layer && svc_force_zero_mode[ref_frame - 1] && frame_mv[this_mode][ref_frame].as_int != 0) continue; } - if (sf->reference_masking && - !(frame_mv[this_mode][ref_frame].as_int == 0 && - ref_frame == LAST_FRAME)) { - if (usable_ref_frame < ALTREF_FRAME) { - if (!force_skip_low_temp_var && usable_ref_frame > LAST_FRAME) { - i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME; - if ((cpi->ref_frame_flags & flag_list[i])) - if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1)) - ref_frame_skip_mask |= (1 << ref_frame); + // Disable this drop out case if the ref frame segment level feature is + // enabled for this segment. This is to prevent the possibility that we end + // up unable to pick any mode. + if (!segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) { + if (sf->reference_masking && + !(frame_mv[this_mode][ref_frame].as_int == 0 && + ref_frame == LAST_FRAME)) { + if (usable_ref_frame < ALTREF_FRAME) { + if (!force_skip_low_temp_var && usable_ref_frame > LAST_FRAME) { + i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME; + if ((cpi->ref_frame_flags & flag_list[i])) + if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1)) + ref_frame_skip_mask |= (1 << ref_frame); + } + } else if (!cpi->rc.is_src_frame_alt_ref && + !(frame_mv[this_mode][ref_frame].as_int == 0 && + ref_frame == ALTREF_FRAME)) { + int ref1 = (ref_frame == GOLDEN_FRAME) ? LAST_FRAME : GOLDEN_FRAME; + int ref2 = (ref_frame == ALTREF_FRAME) ? LAST_FRAME : ALTREF_FRAME; + if (((cpi->ref_frame_flags & flag_list[ref1]) && + (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref1] << 1))) || + ((cpi->ref_frame_flags & flag_list[ref2]) && + (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref2] << 1)))) + ref_frame_skip_mask |= (1 << ref_frame); } - } else if (!cpi->rc.is_src_frame_alt_ref && - !(frame_mv[this_mode][ref_frame].as_int == 0 && - ref_frame == ALTREF_FRAME)) { - int ref1 = (ref_frame == GOLDEN_FRAME) ? LAST_FRAME : GOLDEN_FRAME; - int ref2 = (ref_frame == ALTREF_FRAME) ? LAST_FRAME : ALTREF_FRAME; - if (((cpi->ref_frame_flags & flag_list[ref1]) && - (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref1] << 1))) || - ((cpi->ref_frame_flags & flag_list[ref2]) && - (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref2] << 1)))) - ref_frame_skip_mask |= (1 << ref_frame); } + if (ref_frame_skip_mask & (1 << ref_frame)) continue; } - if (ref_frame_skip_mask & (1 << ref_frame)) continue; // Select prediction reference frames. for (i = 0; i < MAX_MB_PLANE; i++) { @@ -1820,8 +2220,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)]; - mode_rd_thresh = best_mode_skip_txfm ? rd_threshes[mode_index] << 1 - : rd_threshes[mode_index]; + mode_rd_thresh = best_pickmode.best_mode_skip_txfm + ? rd_threshes[mode_index] << 1 + : rd_threshes[mode_index]; // Increase mode_rd_thresh value for GOLDEN_FRAME for improved encoding // speed with little/no subjective quality loss. @@ -1835,92 +2236,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, (!cpi->sf.adaptive_rd_thresh_row_mt && rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, &rd_thresh_freq_fact[mode_index]))) - continue; + if (frame_mv[this_mode][ref_frame].as_int != 0) continue; - if (this_mode == NEWMV && !force_gf_mv) { - if (ref_frame > LAST_FRAME && !cpi->use_svc && - cpi->oxcf.rc_mode == VPX_CBR) { - int tmp_sad; - uint32_t dis; - int cost_list[5] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX }; - - if (bsize < BLOCK_16X16) continue; - - tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col); - - if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) continue; - if (tmp_sad + (num_pels_log2_lookup[bsize] << 4) > best_pred_sad) - continue; - - frame_mv[NEWMV][ref_frame].as_int = mi->mv[0].as_int; - rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv, - &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); - frame_mv[NEWMV][ref_frame].as_mv.row >>= 3; - frame_mv[NEWMV][ref_frame].as_mv.col >>= 3; - - cpi->find_fractional_mv_step( - x, &frame_mv[NEWMV][ref_frame].as_mv, - &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv, - cpi->common.allow_high_precision_mv, x->errorperbit, - &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, - 0); - } else if (svc->use_base_mv && svc->spatial_layer_id) { - if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV) { - const int pre_stride = xd->plane[0].pre[0].stride; - unsigned int base_mv_sse = UINT_MAX; - int scale = (cpi->rc.avg_frame_low_motion > 60) ? 2 : 4; - const uint8_t *const pre_buf = - xd->plane[0].pre[0].buf + - (frame_mv[NEWMV][ref_frame].as_mv.row >> 3) * pre_stride + - (frame_mv[NEWMV][ref_frame].as_mv.col >> 3); - cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, - pre_buf, pre_stride, &base_mv_sse); - - // Exit NEWMV search if base_mv is (0,0) && bsize < BLOCK_16x16, - // for SVC encoding. - if (cpi->use_svc && cpi->svc.use_base_mv && bsize < BLOCK_16X16 && - frame_mv[NEWMV][ref_frame].as_mv.row == 0 && - frame_mv[NEWMV][ref_frame].as_mv.col == 0) - continue; - - // Exit NEWMV search if base_mv_sse is large. - if (sf->base_mv_aggressive && base_mv_sse > (best_sse_sofar << scale)) - continue; - if (base_mv_sse < (best_sse_sofar << 1)) { - // Base layer mv is good. - // Exit NEWMV search if the base_mv is (0, 0) and sse is low, since - // (0, 0) mode is already tested. - unsigned int base_mv_sse_normalized = - base_mv_sse >> - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); - if (sf->base_mv_aggressive && base_mv_sse <= best_sse_sofar && - base_mv_sse_normalized < 400 && - frame_mv[NEWMV][ref_frame].as_mv.row == 0 && - frame_mv[NEWMV][ref_frame].as_mv.col == 0) - continue; - if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame], &rate_mv, - best_rdc.rdcost, 1)) { - continue; - } - } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame], - &rate_mv, best_rdc.rdcost, 0)) { - continue; - } - } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame], - &rate_mv, best_rdc.rdcost, 0)) { - continue; - } - } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame], &rate_mv, - best_rdc.rdcost, 0)) { + if (this_mode == NEWMV && !force_mv_inter_layer) { + if (search_new_mv(cpi, x, frame_mv, ref_frame, gf_temporal_ref, bsize, + mi_row, mi_col, best_pred_sad, &rate_mv, best_sse_sofar, + &best_rdc)) continue; - } } // TODO(jianj): Skipping the testing of (duplicate) non-zero motion vector @@ -1978,70 +2300,15 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && pred_filter_search && (ref_frame == LAST_FRAME || - (ref_frame == GOLDEN_FRAME && !force_gf_mv && + (ref_frame == GOLDEN_FRAME && !force_mv_inter_layer && (cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) && (((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) { - int pf_rate[3]; - int64_t pf_dist[3]; - int curr_rate[3]; - unsigned int pf_var[3]; - unsigned int pf_sse[3]; - TX_SIZE pf_tx_size[3]; - int64_t best_cost = INT64_MAX; - INTERP_FILTER best_filter = SWITCHABLE, filter; - PRED_BUFFER *current_pred = this_mode_pred; rd_computed = 1; - - for (filter = EIGHTTAP; filter <= EIGHTTAP_SMOOTH; ++filter) { - int64_t cost; - mi->interp_filter = filter; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); - model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter], - &pf_var[filter], &pf_sse[filter]); - curr_rate[filter] = pf_rate[filter]; - pf_rate[filter] += vp9_get_switchable_rate(cpi, xd); - cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]); - pf_tx_size[filter] = mi->tx_size; - if (cost < best_cost) { - best_filter = filter; - best_cost = cost; - skip_txfm = x->skip_txfm[0]; - - if (reuse_inter_pred) { - if (this_mode_pred != current_pred) { - free_pred_buffer(this_mode_pred); - this_mode_pred = current_pred; - } - current_pred = &tmp[get_pred_buffer(tmp, 3)]; - pd->dst.buf = current_pred->data; - pd->dst.stride = bw; - } - } - } - - if (reuse_inter_pred && this_mode_pred != current_pred) - free_pred_buffer(current_pred); - - mi->interp_filter = best_filter; - mi->tx_size = pf_tx_size[best_filter]; - this_rdc.rate = curr_rate[best_filter]; - this_rdc.dist = pf_dist[best_filter]; - var_y = pf_var[best_filter]; - sse_y = pf_sse[best_filter]; - x->skip_txfm[0] = skip_txfm; - if (reuse_inter_pred) { - pd->dst.buf = this_mode_pred->data; - pd->dst.stride = this_mode_pred->stride; - } + search_filter_ref(cpi, x, &this_rdc, mi_row, mi_col, tmp, bsize, + reuse_inter_pred, &this_mode_pred, &var_y, &sse_y, + force_smooth_filter, &this_early_term, + flag_preduv_computed, use_model_yrd_large); } else { - // For low motion content use x->sb_is_skin in addition to VeryHighSad - // for setting large_block. - const int large_block = - (x->content_state_sb == kVeryHighSad || - (x->sb_is_skin && cpi->rc.avg_frame_low_motion > 70) || - cpi->oxcf.speed < 7) - ? bsize > BLOCK_32X32 - : bsize >= BLOCK_32X32; mi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref; if (cpi->use_svc && ref_frame == GOLDEN_FRAME && @@ -2051,19 +2318,18 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); // For large partition blocks, extra testing is done. - if (cpi->oxcf.rc_mode == VPX_CBR && large_block && - !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) && - cm->base_qindex) { + if (use_model_yrd_large) { + rd_computed = 1; model_rd_for_sb_y_large(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, &var_y, &sse_y, mi_row, mi_col, &this_early_term, flag_preduv_computed); } else { rd_computed = 1; model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, - &var_y, &sse_y); + &var_y, &sse_y, 0); } // Save normalized sse (between current and last frame) for (0, 0) motion. - if (cpi->use_svc && ref_frame == LAST_FRAME && + if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0) { sse_zeromv_normalized = sse_y >> (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); @@ -2074,8 +2340,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (!this_early_term) { this_sse = (int64_t)sse_y; block_yrd(cpi, x, &this_rdc, &is_skippable, &this_sse, bsize, - VPXMIN(mi->tx_size, TX_16X16), rd_computed); - + VPXMIN(mi->tx_size, TX_16X16), rd_computed, 0); x->skip_txfm[0] = is_skippable; if (is_skippable) { this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); @@ -2095,9 +2360,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, this_rdc.rate += vp9_get_switchable_rate(cpi, xd); } } else { - this_rdc.rate += cm->interp_filter == SWITCHABLE - ? vp9_get_switchable_rate(cpi, xd) - : 0; + if (cm->interp_filter == SWITCHABLE) { + if ((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) + this_rdc.rate += vp9_get_switchable_rate(cpi, xd); + } this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); } @@ -2138,7 +2404,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // Skipping checking: test to see if this block can be reconstructed by // prediction only. - if (cpi->allow_encode_breakout) { + if (cpi->allow_encode_breakout && !xd->lossless && !scene_change_detected && + !svc->high_num_blocks_with_motion) { encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame, this_mode, var_y, sse_y, yv12_mb, &this_rdc.rate, &this_rdc.dist, flag_preduv_computed); @@ -2149,6 +2416,15 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } } + // On spatially flat blocks for screne content: bias against zero-last + // if the sse_y is non-zero. Only on scene change or high motion frames. + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && + (scene_change_detected || svc->high_num_blocks_with_motion) && + ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0 && + svc->spatial_layer_id == 0 && x->source_variance == 0 && sse_y > 0) { + this_rdc.rdcost = this_rdc.rdcost << 2; + } + #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc_pickmode && cpi->denoiser.denoising_level > kDenLowLow) { @@ -2165,71 +2441,86 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (this_rdc.rdcost < best_rdc.rdcost || x->skip) { best_rdc = this_rdc; - best_mode = this_mode; - best_pred_filter = mi->interp_filter; - best_tx_size = mi->tx_size; - best_ref_frame = ref_frame; - best_mode_skip_txfm = x->skip_txfm[0]; best_early_term = this_early_term; - best_second_ref_frame = second_ref_frame; + best_pickmode.best_mode = this_mode; + best_pickmode.best_pred_filter = mi->interp_filter; + best_pickmode.best_tx_size = mi->tx_size; + best_pickmode.best_ref_frame = ref_frame; + best_pickmode.best_mode_skip_txfm = x->skip_txfm[0]; + best_pickmode.best_second_ref_frame = second_ref_frame; if (reuse_inter_pred) { - free_pred_buffer(best_pred); - best_pred = this_mode_pred; + free_pred_buffer(best_pickmode.best_pred); + best_pickmode.best_pred = this_mode_pred; } } else { if (reuse_inter_pred) free_pred_buffer(this_mode_pred); } - if (x->skip) break; + if (x->skip && + (!force_test_gf_zeromv || mode_checked[ZEROMV][GOLDEN_FRAME])) + break; // If early termination flag is 1 and at least 2 modes are checked, // the mode search is terminated. - if (best_early_term && idx > 0) { + if (best_early_term && idx > 0 && !scene_change_detected && + (!force_test_gf_zeromv || mode_checked[ZEROMV][GOLDEN_FRAME])) { x->skip = 1; break; } } - mi->mode = best_mode; - mi->interp_filter = best_pred_filter; - mi->tx_size = best_tx_size; - mi->ref_frame[0] = best_ref_frame; - mi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int; + mi->mode = best_pickmode.best_mode; + mi->interp_filter = best_pickmode.best_pred_filter; + mi->tx_size = best_pickmode.best_tx_size; + mi->ref_frame[0] = best_pickmode.best_ref_frame; + mi->mv[0].as_int = + frame_mv[best_pickmode.best_mode][best_pickmode.best_ref_frame].as_int; xd->mi[0]->bmi[0].as_mv[0].as_int = mi->mv[0].as_int; - x->skip_txfm[0] = best_mode_skip_txfm; - mi->ref_frame[1] = best_second_ref_frame; + x->skip_txfm[0] = best_pickmode.best_mode_skip_txfm; + mi->ref_frame[1] = best_pickmode.best_second_ref_frame; // For spatial enhancemanent layer: perform intra prediction only if base // layer is chosen as the reference. Always perform intra prediction if - // LAST is the only reference or is_key_frame is set. - if (cpi->svc.spatial_layer_id) { + // LAST is the only reference, or is_key_frame is set, or on base + // temporal layer. + if (svc->spatial_layer_id && !gf_temporal_ref) { perform_intra_pred = - cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame || + svc->temporal_layer_id == 0 || + svc->layer_context[svc->temporal_layer_id].is_key_frame || !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) || - (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && - svc_force_zero_mode[best_ref_frame - 1]); + (!svc->layer_context[svc->temporal_layer_id].is_key_frame && + svc_force_zero_mode[best_pickmode.best_ref_frame - 1]); inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh; } - if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && - cpi->rc.is_src_frame_alt_ref) + if ((cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && + cpi->rc.is_src_frame_alt_ref) || + svc->previous_frame_is_intra_only) perform_intra_pred = 0; + + // If the segment reference frame feature is enabled and set then + // skip the intra prediction. + if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) && + get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) > 0) + perform_intra_pred = 0; + // Perform intra prediction search, if the best SAD is above a certain // threshold. if (best_rdc.rdcost == INT64_MAX || + (cpi->oxcf.content == VP9E_CONTENT_SCREEN && x->source_variance == 0) || + (scene_change_detected && perform_intra_pred) || ((!force_skip_low_temp_var || bsize < BLOCK_32X32 || x->content_state_sb == kVeryHighSad) && perform_intra_pred && !x->skip && best_rdc.rdcost > inter_mode_thresh && bsize <= cpi->sf.max_intra_bsize && !x->skip_low_source_sad && !x->lowvar_highsumdiff)) { struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 }; + int64_t this_sse = INT64_MAX; int i; - TX_SIZE best_intra_tx_size = TX_SIZES; + PRED_BUFFER *const best_pred = best_pickmode.best_pred; TX_SIZE intra_tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && intra_tx_size > TX_16X16) - intra_tx_size = TX_16X16; if (reuse_inter_pred && best_pred != NULL) { if (best_pred->data == orig_dst.buf) { @@ -2249,7 +2540,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, this_mode_pred->data, this_mode_pred->stride, NULL, 0, 0, 0, 0, bw, bh); #endif // CONFIG_VP9_HIGHBITDEPTH - best_pred = this_mode_pred; + best_pickmode.best_pred = this_mode_pred; } } pd->dst = orig_dst; @@ -2258,21 +2549,34 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, const PREDICTION_MODE this_mode = intra_mode_list[i]; THR_MODES mode_index = mode_idx[INTRA_FRAME][mode_offset(this_mode)]; int mode_rd_thresh = rd_threshes[mode_index]; + // For spatially flat blocks, under short_circuit_flat_blocks flag: + // only check DC mode for stationary blocks, otherwise also check + // H and V mode. if (sf->short_circuit_flat_blocks && x->source_variance == 0 && - this_mode != DC_PRED) { + ((x->zero_temp_sad_source && this_mode != DC_PRED) || i > 2)) { continue; } if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize])) continue; + if (cpi->sf.rt_intra_dc_only_low_content && this_mode != DC_PRED && + x->content_state_sb != kVeryHighSad) + continue; + if ((cpi->sf.adaptive_rd_thresh_row_mt && rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh, &rd_thresh_freq_fact[mode_index])) || (!cpi->sf.adaptive_rd_thresh_row_mt && rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, - &rd_thresh_freq_fact[mode_index]))) - continue; + &rd_thresh_freq_fact[mode_index]))) { + // Avoid this early exit for screen on base layer, for scene + // changes or high motion frames. + if (cpi->oxcf.content != VP9E_CONTENT_SCREEN || + svc->spatial_layer_id > 0 || + (!scene_change_detected && !svc->high_num_blocks_with_motion)) + continue; + } mi->mode = this_mode; mi->ref_frame[0] = INTRA_FRAME; @@ -2281,8 +2585,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, args.skippable = 1; args.rdc = &this_rdc; mi->tx_size = intra_tx_size; - vp9_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra, - &args); + + compute_intra_yprediction(this_mode, bsize, x, xd); + model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, + &var_y, &sse_y, 1); + block_yrd(cpi, x, &this_rdc, &args.skippable, &this_sse, bsize, + VPXMIN(mi->tx_size, TX_16X16), 1, 1); + // Check skip cost here since skippable is not set for for uv, this // mirrors the behavior used by inter if (args.skippable) { @@ -2309,36 +2618,37 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (this_rdc.rdcost < best_rdc.rdcost) { best_rdc = this_rdc; - best_mode = this_mode; - best_intra_tx_size = mi->tx_size; - best_ref_frame = INTRA_FRAME; - best_second_ref_frame = NONE; + best_pickmode.best_mode = this_mode; + best_pickmode.best_intra_tx_size = mi->tx_size; + best_pickmode.best_ref_frame = INTRA_FRAME; + best_pickmode.best_second_ref_frame = NONE; mi->uv_mode = this_mode; mi->mv[0].as_int = INVALID_MV; mi->mv[1].as_int = INVALID_MV; - best_mode_skip_txfm = x->skip_txfm[0]; + best_pickmode.best_mode_skip_txfm = x->skip_txfm[0]; } } // Reset mb_mode_info to the best inter mode. - if (best_ref_frame != INTRA_FRAME) { - mi->tx_size = best_tx_size; + if (best_pickmode.best_ref_frame != INTRA_FRAME) { + mi->tx_size = best_pickmode.best_tx_size; } else { - mi->tx_size = best_intra_tx_size; + mi->tx_size = best_pickmode.best_intra_tx_size; } } pd->dst = orig_dst; - mi->mode = best_mode; - mi->ref_frame[0] = best_ref_frame; - mi->ref_frame[1] = best_second_ref_frame; - x->skip_txfm[0] = best_mode_skip_txfm; + mi->mode = best_pickmode.best_mode; + mi->ref_frame[0] = best_pickmode.best_ref_frame; + mi->ref_frame[1] = best_pickmode.best_second_ref_frame; + x->skip_txfm[0] = best_pickmode.best_mode_skip_txfm; if (!is_inter_block(mi)) { mi->interp_filter = SWITCHABLE_FILTERS; } - if (reuse_inter_pred && best_pred != NULL) { + if (reuse_inter_pred && best_pickmode.best_pred != NULL) { + PRED_BUFFER *const best_pred = best_pickmode.best_pred; if (best_pred->data != orig_dst.buf && is_inter_mode(mi->mode)) { #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) @@ -2367,25 +2677,27 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // Remove this condition when the issue is resolved. if (x->sb_pickmode_part) ctx->sb_skip_denoising = 1; vp9_pickmode_ctx_den_update(&ctx_den, zero_last_cost_orig, ref_frame_cost, - frame_mv, reuse_inter_pred, best_tx_size, - best_mode, best_ref_frame, best_pred_filter, - best_mode_skip_txfm); - vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision); - recheck_zeromv_after_denoising(cpi, mi, x, xd, decision, &ctx_den, yv12_mb, - &best_rdc, bsize, mi_row, mi_col); - best_ref_frame = ctx_den.best_ref_frame; + frame_mv, reuse_inter_pred, &best_pickmode); + vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision, + gf_temporal_ref); + if (denoise_recheck_zeromv) + recheck_zeromv_after_denoising(cpi, mi, x, xd, decision, &ctx_den, + yv12_mb, &best_rdc, bsize, mi_row, mi_col); + best_pickmode.best_ref_frame = ctx_den.best_ref_frame; } #endif - if (best_ref_frame == ALTREF_FRAME || best_second_ref_frame == ALTREF_FRAME) + if (best_pickmode.best_ref_frame == ALTREF_FRAME || + best_pickmode.best_second_ref_frame == ALTREF_FRAME) x->arf_frame_usage++; - else if (best_ref_frame != INTRA_FRAME) + else if (best_pickmode.best_ref_frame != INTRA_FRAME) x->lastgolden_frame_usage++; if (cpi->sf.adaptive_rd_thresh) { - THR_MODES best_mode_idx = mode_idx[best_ref_frame][mode_offset(mi->mode)]; + THR_MODES best_mode_idx = + mode_idx[best_pickmode.best_ref_frame][mode_offset(mi->mode)]; - if (best_ref_frame == INTRA_FRAME) { + if (best_pickmode.best_ref_frame == INTRA_FRAME) { // Only consider the modes that are included in the intra_mode_list. int intra_modes = sizeof(intra_mode_list) / sizeof(PREDICTION_MODE); int i; @@ -2405,7 +2717,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } else { for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) { PREDICTION_MODE this_mode; - if (best_ref_frame != ref_frame) continue; + if (best_pickmode.best_ref_frame != ref_frame) continue; for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { if (cpi->sf.adaptive_rd_thresh_row_mt) update_thresh_freq_fact_row_mt(cpi, tile_data, x->source_variance, @@ -2585,9 +2897,10 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, x, &tmp_mv, &mbmi_ext->ref_mvs[ref_frame][0].as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, - cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, - &dummy_dist, &x->pred_sse[ref_frame], NULL, 0, 0); + cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), + x->nmvjointcost, x->mvcost, &dummy_dist, + &x->pred_sse[ref_frame], NULL, 0, 0, + cpi->sf.use_accurate_subpel_search); xd->mi[0]->bmi[i].as_mv[0].as_mv = tmp_mv; } else { @@ -2620,7 +2933,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, #endif model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, - &var_y, &sse_y); + &var_y, &sse_y, 0); this_rdc.rate += b_rate; this_rdc.rdcost = diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_pickmode.h b/media/libvpx/libvpx/vp9/encoder/vp9_pickmode.h index 9aa00c4fabfa..15207e6cf4dc 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_pickmode.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_pickmode.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_PICKMODE_H_ -#define VP9_ENCODER_VP9_PICKMODE_H_ +#ifndef VPX_VP9_ENCODER_VP9_PICKMODE_H_ +#define VPX_VP9_ENCODER_VP9_PICKMODE_H_ #include "vp9/encoder/vp9_encoder.h" @@ -32,4 +32,4 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, } // extern "C" #endif -#endif // VP9_ENCODER_VP9_PICKMODE_H_ +#endif // VPX_VP9_ENCODER_VP9_PICKMODE_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_quantize.c b/media/libvpx/libvpx/vp9/encoder/vp9_quantize.c index 09f61ead2636..26d1434c3492 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_quantize.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_quantize.c @@ -204,10 +204,9 @@ static int get_qzbin_factor(int q, vpx_bit_depth_t bit_depth) { switch (bit_depth) { case VPX_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80); case VPX_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80); - case VPX_BITS_12: return q == 0 ? 64 : (quant < 2368 ? 84 : 80); default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1; + assert(bit_depth == VPX_BITS_12); + return q == 0 ? 64 : (quant < 2368 ? 84 : 80); } #else (void)bit_depth; @@ -221,13 +220,20 @@ void vp9_init_quantizer(VP9_COMP *cpi) { int i, q, quant; for (q = 0; q < QINDEX_RANGE; q++) { - const int qzbin_factor = get_qzbin_factor(q, cm->bit_depth); - const int qrounding_factor = q == 0 ? 64 : 48; + int qzbin_factor = get_qzbin_factor(q, cm->bit_depth); + int qrounding_factor = q == 0 ? 64 : 48; + const int sharpness_adjustment = 16 * (7 - cpi->oxcf.sharpness) / 7; + + if (cpi->oxcf.sharpness > 0 && q > 0) { + qzbin_factor = 64 + sharpness_adjustment; + qrounding_factor = 64 - sharpness_adjustment; + } for (i = 0; i < 2; ++i) { int qrounding_factor_fp = i == 0 ? 48 : 42; if (q == 0) qrounding_factor_fp = 64; - + if (cpi->oxcf.sharpness > 0) + qrounding_factor_fp = 64 - sharpness_adjustment; // y quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth) : vp9_ac_quant(q, 0, cm->bit_depth); @@ -282,12 +288,12 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { // Y x->plane[0].quant = quants->y_quant[qindex]; x->plane[0].quant_fp = quants->y_quant_fp[qindex]; - x->plane[0].round_fp = quants->y_round_fp[qindex]; + memcpy(x->plane[0].round_fp, quants->y_round_fp[qindex], + 8 * sizeof(*(x->plane[0].round_fp))); x->plane[0].quant_shift = quants->y_quant_shift[qindex]; x->plane[0].zbin = quants->y_zbin[qindex]; x->plane[0].round = quants->y_round[qindex]; xd->plane[0].dequant = cpi->y_dequant[qindex]; - x->plane[0].quant_thred[0] = x->plane[0].zbin[0] * x->plane[0].zbin[0]; x->plane[0].quant_thred[1] = x->plane[0].zbin[1] * x->plane[0].zbin[1]; @@ -295,12 +301,12 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { for (i = 1; i < 3; i++) { x->plane[i].quant = quants->uv_quant[qindex]; x->plane[i].quant_fp = quants->uv_quant_fp[qindex]; - x->plane[i].round_fp = quants->uv_round_fp[qindex]; + memcpy(x->plane[i].round_fp, quants->uv_round_fp[qindex], + 8 * sizeof(*(x->plane[i].round_fp))); x->plane[i].quant_shift = quants->uv_quant_shift[qindex]; x->plane[i].zbin = quants->uv_zbin[qindex]; x->plane[i].round = quants->uv_round[qindex]; xd->plane[i].dequant = cpi->uv_dequant[qindex]; - x->plane[i].quant_thred[0] = x->plane[i].zbin[0] * x->plane[i].zbin[0]; x->plane[i].quant_thred[1] = x->plane[i].zbin[1] * x->plane[i].zbin[1]; } diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_quantize.h b/media/libvpx/libvpx/vp9/encoder/vp9_quantize.h index 61320361b6c2..ed9b84958499 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_quantize.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_quantize.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_QUANTIZE_H_ -#define VP9_ENCODER_VP9_QUANTIZE_H_ +#ifndef VPX_VP9_ENCODER_VP9_QUANTIZE_H_ +#define VPX_VP9_ENCODER_VP9_QUANTIZE_H_ #include "./vpx_config.h" #include "vp9/encoder/vp9_block.h" @@ -59,4 +59,4 @@ int vp9_qindex_to_quantizer(int qindex); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_QUANTIZE_H_ +#endif // VPX_VP9_ENCODER_VP9_QUANTIZE_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_ratectrl.c b/media/libvpx/libvpx/vp9/encoder/vp9_ratectrl.c index b7f3a0e897b9..cbafbf7b9af4 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_ratectrl.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_ratectrl.c @@ -31,10 +31,13 @@ #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_ratectrl.h" -// Max rate target for 1080P and below encodes under normal circumstances -// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB +// Max rate per frame for 1080P and below encodes if no level requirement given. +// For larger formats limit to MAX_MB_RATE bits per MB +// 4Mbits is derived from the level requirement for level 4 (1080P 30) which +// requires that HW can sustain a rate of 16Mbits over a 4 frame group. +// If a lower level requirement is specified then this may over ride this value. #define MAX_MB_RATE 250 -#define MAXRATE_1080P 2025000 +#define MAXRATE_1080P 4000000 #define DEFAULT_KF_BOOST 2000 #define DEFAULT_GF_BOOST 2000 @@ -45,18 +48,16 @@ #define MAX_BPB_FACTOR 50 #if CONFIG_VP9_HIGHBITDEPTH -#define ASSIGN_MINQ_TABLE(bit_depth, name) \ - do { \ - switch (bit_depth) { \ - case VPX_BITS_8: name = name##_8; break; \ - case VPX_BITS_10: name = name##_10; break; \ - case VPX_BITS_12: name = name##_12; break; \ - default: \ - assert(0 && \ - "bit_depth should be VPX_BITS_8, VPX_BITS_10" \ - " or VPX_BITS_12"); \ - name = NULL; \ - } \ +#define ASSIGN_MINQ_TABLE(bit_depth, name) \ + do { \ + switch (bit_depth) { \ + case VPX_BITS_8: name = name##_8; break; \ + case VPX_BITS_10: name = name##_10; break; \ + default: \ + assert(bit_depth == VPX_BITS_12); \ + name = name##_12; \ + break; \ + } \ } while (0) #else #define ASSIGN_MINQ_TABLE(bit_depth, name) \ @@ -97,8 +98,8 @@ static int kf_low = 400; #else static int gf_high = 2000; static int gf_low = 400; -static int kf_high = 5000; -static int kf_low = 400; +static int kf_high = 4800; +static int kf_low = 300; #endif // Functions to compute the active minq lookup table entries based on a @@ -128,7 +129,7 @@ static void init_minq_luts(int *kf_low_m, int *kf_high_m, int *arfgf_low, for (i = 0; i < QINDEX_RANGE; i++) { const double maxq = vp9_convert_qindex_to_q(i, bit_depth); kf_low_m[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.150, bit_depth); - kf_high_m[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55, bit_depth); + kf_high_m[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.45, bit_depth); #ifdef AGGRESSIVE_VBR arfgf_low[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.275, bit_depth); inter[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.80, bit_depth); @@ -164,10 +165,9 @@ double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth) { switch (bit_depth) { case VPX_BITS_8: return vp9_ac_quant(qindex, 0, bit_depth) / 4.0; case VPX_BITS_10: return vp9_ac_quant(qindex, 0, bit_depth) / 16.0; - case VPX_BITS_12: return vp9_ac_quant(qindex, 0, bit_depth) / 64.0; default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1.0; + assert(bit_depth == VPX_BITS_12); + return vp9_ac_quant(qindex, 0, bit_depth) / 64.0; } #else return vp9_ac_quant(qindex, 0, bit_depth) / 4.0; @@ -211,17 +211,15 @@ int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) { const RATE_CONTROL *rc = &cpi->rc; const VP9EncoderConfig *oxcf = &cpi->oxcf; - if (cpi->oxcf.pass != 2) { - const int min_frame_target = - VPXMAX(rc->min_frame_bandwidth, rc->avg_frame_bandwidth >> 5); - if (target < min_frame_target) target = min_frame_target; - if (cpi->refresh_golden_frame && rc->is_src_frame_alt_ref) { - // If there is an active ARF at this location use the minimum - // bits on this frame even if it is a constructed arf. - // The active maximum quantizer insures that an appropriate - // number of bits will be spent if needed for constructed ARFs. - target = min_frame_target; - } + const int min_frame_target = + VPXMAX(rc->min_frame_bandwidth, rc->avg_frame_bandwidth >> 5); + if (target < min_frame_target) target = min_frame_target; + if (cpi->refresh_golden_frame && rc->is_src_frame_alt_ref) { + // If there is an active ARF at this location use the minimum + // bits on this frame even if it is a constructed arf. + // The active maximum quantizer insures that an appropriate + // number of bits will be spent if needed for constructed ARFs. + target = min_frame_target; } // Clip the frame target to the maximum allowed value. @@ -247,20 +245,68 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) { return target; } +// TODO(marpan/jianj): bits_off_target and buffer_level are used in the saame +// way for CBR mode, for the buffering updates below. Look into removing one +// of these (i.e., bits_off_target). +// Update the buffer level before encoding with the per-frame-bandwidth, +static void update_buffer_level_preencode(VP9_COMP *cpi) { + RATE_CONTROL *const rc = &cpi->rc; + rc->bits_off_target += rc->avg_frame_bandwidth; + // Clip the buffer level to the maximum specified buffer size. + rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size); + rc->buffer_level = rc->bits_off_target; +} + +// Update the buffer level before encoding with the per-frame-bandwidth +// for SVC. The current and all upper temporal layers are updated, needed +// for the layered rate control which involves cumulative buffer levels for +// the temporal layers. Allow for using the timestamp(pts) delta for the +// framerate when the set_ref_frame_config is used. +static void update_buffer_level_svc_preencode(VP9_COMP *cpi) { + SVC *const svc = &cpi->svc; + int i; + // Set this to 1 to use timestamp delta for "framerate" under + // ref_frame_config usage. + int use_timestamp = 1; + const int64_t ts_delta = + svc->time_stamp_superframe - svc->time_stamp_prev[svc->spatial_layer_id]; + for (i = svc->temporal_layer_id; i < svc->number_temporal_layers; ++i) { + const int layer = + LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, svc->number_temporal_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + if (use_timestamp && cpi->svc.use_set_ref_frame_config && + svc->number_temporal_layers == 1 && ts_delta > 0 && + svc->current_superframe > 0) { + // TODO(marpan): This may need to be modified for temporal layers. + const double framerate_pts = 10000000.0 / ts_delta; + lrc->bits_off_target += (int)(lc->target_bandwidth / framerate_pts); + } else { + lrc->bits_off_target += (int)(lc->target_bandwidth / lc->framerate); + } + // Clip buffer level to maximum buffer size for the layer. + lrc->bits_off_target = + VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size); + lrc->buffer_level = lrc->bits_off_target; + if (i == svc->temporal_layer_id) { + cpi->rc.bits_off_target = lrc->bits_off_target; + cpi->rc.buffer_level = lrc->buffer_level; + } + } +} + // Update the buffer level for higher temporal layers, given the encoded current // temporal layer. -static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { +static void update_layer_buffer_level_postencode(SVC *svc, + int encoded_frame_size) { int i = 0; - int current_temporal_layer = svc->temporal_layer_id; + const int current_temporal_layer = svc->temporal_layer_id; for (i = current_temporal_layer + 1; i < svc->number_temporal_layers; ++i) { const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, svc->number_temporal_layers); LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; - int bits_off_for_this_layer = - (int)(lc->target_bandwidth / lc->framerate - encoded_frame_size); - lrc->bits_off_target += bits_off_for_this_layer; - + lrc->bits_off_target -= encoded_frame_size; // Clip buffer level to maximum buffer size for the layer. lrc->bits_off_target = VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size); @@ -268,21 +314,13 @@ static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { } } -// Update the buffer level: leaky bucket model. -static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { - const VP9_COMMON *const cm = &cpi->common; +// Update the buffer level after encoding with encoded frame size. +static void update_buffer_level_postencode(VP9_COMP *cpi, + int encoded_frame_size) { RATE_CONTROL *const rc = &cpi->rc; - - // Non-viewable frames are a special case and are treated as pure overhead. - if (!cm->show_frame) { - rc->bits_off_target -= encoded_frame_size; - } else { - rc->bits_off_target += rc->avg_frame_bandwidth - encoded_frame_size; - } - + rc->bits_off_target -= encoded_frame_size; // Clip the buffer level to the maximum specified buffer size. rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size); - // For screen-content mode, and if frame-dropper is off, don't let buffer // level go below threshold, given here as -rc->maximum_ buffer_size. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && @@ -292,7 +330,7 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { rc->buffer_level = rc->bits_off_target; if (is_one_pass_cbr_svc(cpi)) { - update_layer_buffer_level(&cpi->svc, encoded_frame_size); + update_layer_buffer_level_postencode(&cpi->svc, encoded_frame_size); } } @@ -355,6 +393,9 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { rc->high_source_sad = 0; rc->reset_high_source_sad = 0; rc->high_source_sad_lagindex = -1; + rc->high_num_blocks_with_motion = 0; + rc->hybrid_intra_scene_change = 0; + rc->re_encode_maxq_scene_change = 0; rc->alt_ref_gf_group = 0; rc->last_frame_is_src_altref = 0; rc->fac_active_worst_inter = 150; @@ -377,6 +418,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { rc->rate_correction_factors[i] = 1.0; + rc->damped_adjustment[i] = 0; } rc->min_gf_interval = oxcf->min_gf_interval; @@ -388,27 +430,115 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( oxcf->init_framerate, rc->min_gf_interval); rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2; + + rc->force_max_q = 0; + rc->last_post_encode_dropped_scene_change = 0; + rc->use_post_encode_drop = 0; + rc->ext_use_post_encode_drop = 0; + rc->arf_active_best_quality_adjustment_factor = 1.0; + rc->arf_increase_active_best_quality = 0; + rc->preserve_arf_as_gld = 0; + rc->preserve_next_arf_as_gld = 0; + rc->show_arf_as_gld = 0; } -int vp9_rc_drop_frame(VP9_COMP *cpi) { +static int check_buffer_above_thresh(VP9_COMP *cpi, int drop_mark) { + SVC *svc = &cpi->svc; + if (!cpi->use_svc || cpi->svc.framedrop_mode != FULL_SUPERFRAME_DROP) { + RATE_CONTROL *const rc = &cpi->rc; + return (rc->buffer_level > drop_mark); + } else { + int i; + // For SVC in the FULL_SUPERFRAME_DROP): the condition on + // buffer (if its above threshold, so no drop) is checked on current and + // upper spatial layers. If any spatial layer is not above threshold then + // we return 0. + for (i = svc->spatial_layer_id; i < svc->number_spatial_layers; ++i) { + const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id, + svc->number_temporal_layers); + LAYER_CONTEXT *lc = &svc->layer_context[layer]; + RATE_CONTROL *lrc = &lc->rc; + // Exclude check for layer whose bitrate is 0. + if (lc->target_bandwidth > 0) { + const int drop_mark_layer = (int)(cpi->svc.framedrop_thresh[i] * + lrc->optimal_buffer_level / 100); + if (!(lrc->buffer_level > drop_mark_layer)) return 0; + } + } + return 1; + } +} + +static int check_buffer_below_thresh(VP9_COMP *cpi, int drop_mark) { + SVC *svc = &cpi->svc; + if (!cpi->use_svc || cpi->svc.framedrop_mode == LAYER_DROP) { + RATE_CONTROL *const rc = &cpi->rc; + return (rc->buffer_level <= drop_mark); + } else { + int i; + // For SVC in the constrained framedrop mode (svc->framedrop_mode = + // CONSTRAINED_LAYER_DROP or FULL_SUPERFRAME_DROP): the condition on + // buffer (if its below threshold, so drop frame) is checked on current + // and upper spatial layers. For FULL_SUPERFRAME_DROP mode if any + // spatial layer is <= threshold, then we return 1 (drop). + for (i = svc->spatial_layer_id; i < svc->number_spatial_layers; ++i) { + const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id, + svc->number_temporal_layers); + LAYER_CONTEXT *lc = &svc->layer_context[layer]; + RATE_CONTROL *lrc = &lc->rc; + // Exclude check for layer whose bitrate is 0. + if (lc->target_bandwidth > 0) { + const int drop_mark_layer = (int)(cpi->svc.framedrop_thresh[i] * + lrc->optimal_buffer_level / 100); + if (cpi->svc.framedrop_mode == FULL_SUPERFRAME_DROP) { + if (lrc->buffer_level <= drop_mark_layer) return 1; + } else { + if (!(lrc->buffer_level <= drop_mark_layer)) return 0; + } + } + } + if (cpi->svc.framedrop_mode == FULL_SUPERFRAME_DROP) + return 0; + else + return 1; + } +} + +int vp9_test_drop(VP9_COMP *cpi) { const VP9EncoderConfig *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; - if (!oxcf->drop_frames_water_mark || - (is_one_pass_cbr_svc(cpi) && - cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode)) { + SVC *svc = &cpi->svc; + int drop_frames_water_mark = oxcf->drop_frames_water_mark; + if (cpi->use_svc) { + // If we have dropped max_consec_drop frames, then we don't + // drop this spatial layer, and reset counter to 0. + if (svc->drop_count[svc->spatial_layer_id] == svc->max_consec_drop) { + svc->drop_count[svc->spatial_layer_id] = 0; + return 0; + } else { + drop_frames_water_mark = svc->framedrop_thresh[svc->spatial_layer_id]; + } + } + if (!drop_frames_water_mark || + (svc->spatial_layer_id > 0 && + svc->framedrop_mode == FULL_SUPERFRAME_DROP)) { return 0; } else { - if (rc->buffer_level < 0) { + if ((rc->buffer_level < 0 && svc->framedrop_mode != FULL_SUPERFRAME_DROP) || + (check_buffer_below_thresh(cpi, -1) && + svc->framedrop_mode == FULL_SUPERFRAME_DROP)) { // Always drop if buffer is below 0. return 1; } else { // If buffer is below drop_mark, for now just drop every other frame // (starting with the next frame) until it increases back over drop_mark. int drop_mark = - (int)(oxcf->drop_frames_water_mark * rc->optimal_buffer_level / 100); - if ((rc->buffer_level > drop_mark) && (rc->decimation_factor > 0)) { + (int)(drop_frames_water_mark * rc->optimal_buffer_level / 100); + if (check_buffer_above_thresh(cpi, drop_mark) && + (rc->decimation_factor > 0)) { --rc->decimation_factor; - } else if (rc->buffer_level <= drop_mark && rc->decimation_factor == 0) { + } else if (check_buffer_below_thresh(cpi, drop_mark) && + rc->decimation_factor == 0) { rc->decimation_factor = 1; } if (rc->decimation_factor > 0) { @@ -427,11 +557,134 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) { } } +int post_encode_drop_cbr(VP9_COMP *cpi, size_t *size) { + size_t frame_size = *size << 3; + int64_t new_buffer_level = + cpi->rc.buffer_level + cpi->rc.avg_frame_bandwidth - (int64_t)frame_size; + + // For now we drop if new buffer level (given the encoded frame size) goes + // below 0. + if (new_buffer_level < 0) { + *size = 0; + vp9_rc_postencode_update_drop_frame(cpi); + // Update flag to use for next frame. + if (cpi->rc.high_source_sad || + (cpi->use_svc && cpi->svc.high_source_sad_superframe)) + cpi->rc.last_post_encode_dropped_scene_change = 1; + // Force max_q on next fame. + cpi->rc.force_max_q = 1; + cpi->rc.avg_frame_qindex[INTER_FRAME] = cpi->rc.worst_quality; + cpi->last_frame_dropped = 1; + cpi->ext_refresh_frame_flags_pending = 0; + if (cpi->use_svc) { + SVC *svc = &cpi->svc; + int sl = 0; + int tl = 0; + svc->last_layer_dropped[svc->spatial_layer_id] = 1; + svc->drop_spatial_layer[svc->spatial_layer_id] = 1; + svc->drop_count[svc->spatial_layer_id]++; + svc->skip_enhancement_layer = 1; + // Postencode drop is only checked on base spatial layer, + // for now if max-q is set on base we force it on all layers. + for (sl = 0; sl < svc->number_spatial_layers; ++sl) { + for (tl = 0; tl < svc->number_temporal_layers; ++tl) { + const int layer = + LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers); + LAYER_CONTEXT *lc = &svc->layer_context[layer]; + RATE_CONTROL *lrc = &lc->rc; + lrc->force_max_q = 1; + lrc->avg_frame_qindex[INTER_FRAME] = cpi->rc.worst_quality; + } + } + } + return 1; + } + + cpi->rc.force_max_q = 0; + cpi->rc.last_post_encode_dropped_scene_change = 0; + return 0; +} + +int vp9_rc_drop_frame(VP9_COMP *cpi) { + SVC *svc = &cpi->svc; + int svc_prev_layer_dropped = 0; + // In the constrained or full_superframe framedrop mode for svc + // (framedrop_mode != (LAYER_DROP && CONSTRAINED_FROM_ABOVE)), + // if the previous spatial layer was dropped, drop the current spatial layer. + if (cpi->use_svc && svc->spatial_layer_id > 0 && + svc->drop_spatial_layer[svc->spatial_layer_id - 1]) + svc_prev_layer_dropped = 1; + if ((svc_prev_layer_dropped && svc->framedrop_mode != LAYER_DROP && + svc->framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP) || + svc->force_drop_constrained_from_above[svc->spatial_layer_id] || + vp9_test_drop(cpi)) { + vp9_rc_postencode_update_drop_frame(cpi); + cpi->ext_refresh_frame_flags_pending = 0; + cpi->last_frame_dropped = 1; + if (cpi->use_svc) { + svc->last_layer_dropped[svc->spatial_layer_id] = 1; + svc->drop_spatial_layer[svc->spatial_layer_id] = 1; + svc->drop_count[svc->spatial_layer_id]++; + svc->skip_enhancement_layer = 1; + if (svc->framedrop_mode == LAYER_DROP || + (svc->framedrop_mode == CONSTRAINED_FROM_ABOVE_DROP && + svc->force_drop_constrained_from_above[svc->number_spatial_layers - + 1] == 0) || + svc->drop_spatial_layer[0] == 0) { + // For the case of constrained drop mode where full superframe is + // dropped, we don't increment the svc frame counters. + // In particular temporal layer counter (which is incremented in + // vp9_inc_frame_in_layer()) won't be incremented, so on a dropped + // frame we try the same temporal_layer_id on next incoming frame. + // This is to avoid an issue with temporal alignement with full + // superframe dropping. + vp9_inc_frame_in_layer(cpi); + } + if (svc->spatial_layer_id == svc->number_spatial_layers - 1) { + int i; + int all_layers_drop = 1; + for (i = 0; i < svc->spatial_layer_id; i++) { + if (svc->drop_spatial_layer[i] == 0) { + all_layers_drop = 0; + break; + } + } + if (all_layers_drop == 1) svc->skip_enhancement_layer = 0; + } + } + return 1; + } + return 0; +} + +static int adjust_q_cbr(const VP9_COMP *cpi, int q) { + // This makes sure q is between oscillating Qs to prevent resonance. + if (!cpi->rc.reset_high_source_sad && + (!cpi->oxcf.gf_cbr_boost_pct || + !(cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)) && + (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) && + cpi->rc.q_1_frame != cpi->rc.q_2_frame) { + int qclamp = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame), + VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame)); + // If the previous frame had overshoot and the current q needs to increase + // above the clamped value, reduce the clamp for faster reaction to + // overshoot. + if (cpi->rc.rc_1_frame == -1 && q > qclamp) + q = (q + qclamp) >> 1; + else + q = qclamp; + } + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) + vp9_cyclic_refresh_limit_q(cpi, &q); + return VPXMAX(VPXMIN(q, cpi->rc.worst_quality), cpi->rc.best_quality); +} + static double get_rate_correction_factor(const VP9_COMP *cpi) { const RATE_CONTROL *const rc = &cpi->rc; + const VP9_COMMON *const cm = &cpi->common; double rcf; - if (cpi->common.frame_type == KEY_FRAME) { + if (frame_is_intra_only(cm)) { rcf = rc->rate_correction_factors[KF_STD]; } else if (cpi->oxcf.pass == 2) { RATE_FACTOR_LEVEL rf_lvl = @@ -451,13 +704,14 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) { static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { RATE_CONTROL *const rc = &cpi->rc; + const VP9_COMMON *const cm = &cpi->common; // Normalize RCF to account for the size-dependent scaling factor. factor /= rcf_mult[cpi->rc.frame_size_selector]; factor = fclamp(factor, MIN_BPB_FACTOR, MAX_BPB_FACTOR); - if (cpi->common.frame_type == KEY_FRAME) { + if (frame_is_intra_only(cm)) { rc->rate_correction_factors[KF_STD] = factor; } else if (cpi->oxcf.pass == 2) { RATE_FACTOR_LEVEL rf_lvl = @@ -478,6 +732,8 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) { int correction_factor = 100; double rate_correction_factor = get_rate_correction_factor(cpi); double adjustment_limit; + RATE_FACTOR_LEVEL rf_lvl = + cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index]; int projected_size_based_on_q = 0; @@ -494,8 +750,9 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) { projected_size_based_on_q = vp9_cyclic_refresh_estimate_bits_at_q(cpi, rate_correction_factor); } else { + FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type; projected_size_based_on_q = - vp9_estimate_bits_at_q(cpi->common.frame_type, cm->base_qindex, cm->MBs, + vp9_estimate_bits_at_q(frame_type, cm->base_qindex, cm->MBs, rate_correction_factor, cm->bit_depth); } // Work out a size correction factor. @@ -503,10 +760,16 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) { correction_factor = (int)((100 * (int64_t)cpi->rc.projected_frame_size) / projected_size_based_on_q); - // More heavily damped adjustment used if we have been oscillating either side - // of target. - adjustment_limit = - 0.25 + 0.5 * VPXMIN(1, fabs(log10(0.01 * correction_factor))); + // Do not use damped adjustment for the first frame of each frame type + if (!cpi->rc.damped_adjustment[rf_lvl]) { + adjustment_limit = 1.0; + cpi->rc.damped_adjustment[rf_lvl] = 1; + } else { + // More heavily damped adjustment used if we have been oscillating either + // side of target. + adjustment_limit = + 0.25 + 0.5 * VPXMIN(1, fabs(log10(0.01 * correction_factor))); + } cpi->rc.q_2_frame = cpi->rc.q_1_frame; cpi->rc.q_1_frame = cm->base_qindex; @@ -569,8 +832,9 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, bits_per_mb_at_this_q = (int)vp9_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor); } else { + FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type; bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb( - cm->frame_type, i, correction_factor, cm->bit_depth); + frame_type, i, correction_factor, cm->bit_depth); } if (bits_per_mb_at_this_q <= target_bits_per_mb) { @@ -585,16 +849,9 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, } } while (++i <= active_worst_quality); - // In CBR mode, this makes sure q is between oscillating Qs to prevent - // resonance. - if (cpi->oxcf.rc_mode == VPX_CBR && !cpi->rc.reset_high_source_sad && - (!cpi->oxcf.gf_cbr_boost_pct || - !(cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)) && - (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) && - cpi->rc.q_1_frame != cpi->rc.q_2_frame) { - q = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame), - VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame)); - } + // Adjustment to q for CBR mode. + if (cpi->oxcf.rc_mode == VPX_CBR) return adjust_q_cbr(cpi, q); + return q; } @@ -623,13 +880,19 @@ static int get_kf_active_quality(const RATE_CONTROL *const rc, int q, kf_low_motion_minq, kf_high_motion_minq); } -static int get_gf_active_quality(const RATE_CONTROL *const rc, int q, +static int get_gf_active_quality(const VP9_COMP *const cpi, int q, vpx_bit_depth_t bit_depth) { + const GF_GROUP *const gf_group = &cpi->twopass.gf_group; + const RATE_CONTROL *const rc = &cpi->rc; + int *arfgf_low_motion_minq; int *arfgf_high_motion_minq; + const int gfu_boost = cpi->multi_layer_arf + ? gf_group->gfu_boost[gf_group->index] + : rc->gfu_boost; ASSIGN_MINQ_TABLE(bit_depth, arfgf_low_motion_minq); ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq); - return get_active_quality(q, rc->gfu_boost, gf_low, gf_high, + return get_active_quality(q, gfu_boost, gf_low, gf_high, arfgf_low_motion_minq, arfgf_high_motion_minq); } @@ -674,7 +937,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { int active_worst_quality; int ambient_qp; unsigned int num_frames_weight_key = 5 * cpi->svc.number_temporal_layers; - if (cm->frame_type == KEY_FRAME || rc->reset_high_source_sad) + if (frame_is_intra_only(cm) || rc->reset_high_source_sad || rc->force_max_q) return rc->worst_quality; // For ambient_qp we use minimum of avg_frame_qindex[KEY_FRAME/INTER_FRAME] // for the first few frames following key frame. These are both initialized @@ -685,6 +948,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { ? VPXMIN(rc->avg_frame_qindex[INTER_FRAME], rc->avg_frame_qindex[KEY_FRAME]) : rc->avg_frame_qindex[INTER_FRAME]; + active_worst_quality = VPXMIN(rc->worst_quality, (ambient_qp * 5) >> 2); // For SVC if the current base spatial layer was key frame, use the QP from // that base layer for ambient_qp. if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) { @@ -694,13 +958,15 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { if (lc->is_key_frame) { const RATE_CONTROL *lrc = &lc->rc; ambient_qp = VPXMIN(ambient_qp, lrc->last_q[KEY_FRAME]); + active_worst_quality = VPXMIN(rc->worst_quality, (ambient_qp * 9) >> 3); } } - active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 >> 2); if (rc->buffer_level > rc->optimal_buffer_level) { // Adjust down. - // Maximum limit for down adjustment, ~30%. + // Maximum limit for down adjustment ~30%; make it lower for screen content. int max_adjustment_down = active_worst_quality / 3; + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) + max_adjustment_down = active_worst_quality >> 3; if (max_adjustment_down) { buff_lvl_step = ((rc->maximum_buffer_size - rc->optimal_buffer_level) / max_adjustment_down); @@ -769,6 +1035,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, vp9_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth); } } else if (!rc->is_src_frame_alt_ref && !cpi->use_svc && + cpi->oxcf.gf_cbr_boost_pct && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { // Use the lower of active_worst_quality and recent // average Q as basis for GF/ARF best Q limit unless last frame was @@ -779,7 +1046,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, } else { q = active_worst_quality; } - active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth); + active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth); } else { // Use the lower of active_worst_quality and recent/average Q. if (cm->current_video_frame > 1) { @@ -804,21 +1071,8 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, *top_index = active_worst_quality; *bottom_index = active_best_quality; -#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY - // Limit Q range for the adaptive loop. - if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced && - !(cm->current_video_frame == 0)) { - int qdelta = 0; - vpx_clear_system_state(); - qdelta = vp9_compute_qdelta_by_rate( - &cpi->rc, cm->frame_type, active_worst_quality, 2.0, cm->bit_depth); - *top_index = active_worst_quality + qdelta; - *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index; - } -#endif - // Special case code to try and match quality with forced key frames - if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) { + if (frame_is_intra_only(cm) && rc->this_key_frame_forced) { q = rc->last_boosted_qindex; } else { q = vp9_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality, @@ -831,6 +1085,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, q = *top_index; } } + assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality); assert(*bottom_index <= rc->worst_quality && *bottom_index >= rc->best_quality); @@ -939,7 +1194,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, if (oxcf->rc_mode == VPX_CQ) { if (q < cq_level) q = cq_level; - active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth); + active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth); // Constrained quality use slightly lower active best. active_best_quality = active_best_quality * 15 / 16; @@ -954,7 +1209,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, delta_qindex = vp9_compute_qdelta(rc, q, q * 0.50, cm->bit_depth); active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); } else { - active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth); + active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth); } } else { if (oxcf->rc_mode == VPX_Q) { @@ -1045,19 +1300,122 @@ int vp9_frame_type_qdelta(const VP9_COMP *cpi, int rf_level, int q) { 1.75, // GF_ARF_STD 2.00, // KF_STD }; - static const FRAME_TYPE frame_type[RATE_FACTOR_LEVELS] = { - INTER_FRAME, INTER_FRAME, INTER_FRAME, INTER_FRAME, KEY_FRAME - }; const VP9_COMMON *const cm = &cpi->common; - int qdelta = - vp9_compute_qdelta_by_rate(&cpi->rc, frame_type[rf_level], q, - rate_factor_deltas[rf_level], cm->bit_depth); + + int qdelta = vp9_compute_qdelta_by_rate( + &cpi->rc, cm->frame_type, q, rate_factor_deltas[rf_level], cm->bit_depth); return qdelta; } #define STATIC_MOTION_THRESH 95 + +static void pick_kf_q_bound_two_pass(const VP9_COMP *cpi, int *bottom_index, + int *top_index) { + const VP9_COMMON *const cm = &cpi->common; + const RATE_CONTROL *const rc = &cpi->rc; + int active_best_quality; + int active_worst_quality = cpi->twopass.active_worst_quality; + + if (rc->this_key_frame_forced) { + // Handle the special case for key frames forced when we have reached + // the maximum key frame interval. Here force the Q to a range + // based on the ambient Q to reduce the risk of popping. + double last_boosted_q; + int delta_qindex; + int qindex; + + if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) { + qindex = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex); + active_best_quality = qindex; + last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); + delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, + last_boosted_q * 1.25, cm->bit_depth); + active_worst_quality = + VPXMIN(qindex + delta_qindex, active_worst_quality); + } else { + qindex = rc->last_boosted_qindex; + last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); + delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, + last_boosted_q * 0.75, cm->bit_depth); + active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); + } + } else { + // Not forced keyframe. + double q_adj_factor = 1.0; + double q_val; + // Baseline value derived from cpi->active_worst_quality and kf boost. + active_best_quality = + get_kf_active_quality(rc, active_worst_quality, cm->bit_depth); + if (cpi->twopass.kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH) { + active_best_quality /= 4; + } + + // Dont allow the active min to be lossless (q0) unlesss the max q + // already indicates lossless. + active_best_quality = + VPXMIN(active_worst_quality, VPXMAX(1, active_best_quality)); + + // Allow somewhat lower kf minq with small image formats. + if ((cm->width * cm->height) <= (352 * 288)) { + q_adj_factor -= 0.25; + } + + // Make a further adjustment based on the kf zero motion measure. + q_adj_factor += 0.05 - (0.001 * (double)cpi->twopass.kf_zeromotion_pct); + + // Convert the adjustment factor to a qindex delta + // on active_best_quality. + q_val = vp9_convert_qindex_to_q(active_best_quality, cm->bit_depth); + active_best_quality += + vp9_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth); + } + *top_index = active_worst_quality; + *bottom_index = active_best_quality; +} + +static int rc_constant_q(const VP9_COMP *cpi, int *bottom_index, int *top_index, + int gf_group_index) { + const VP9_COMMON *const cm = &cpi->common; + const RATE_CONTROL *const rc = &cpi->rc; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; + const GF_GROUP *gf_group = &cpi->twopass.gf_group; + const int is_intra_frame = frame_is_intra_only(cm); + + const int cq_level = get_active_cq_level_two_pass(&cpi->twopass, rc, oxcf); + + int q = cq_level; + int active_best_quality = cq_level; + int active_worst_quality = cq_level; + + // Key frame qp decision + if (is_intra_frame && rc->frames_to_key > 1) + pick_kf_q_bound_two_pass(cpi, &active_best_quality, &active_worst_quality); + + // ARF / GF qp decision + if (!is_intra_frame && !rc->is_src_frame_alt_ref && + cpi->refresh_alt_ref_frame) { + active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth); + + // Modify best quality for second level arfs. For mode VPX_Q this + // becomes the baseline frame q. + if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) { + const int layer_depth = gf_group->layer_depth[gf_group_index]; + // linearly fit the frame q depending on the layer depth index from + // the base layer ARF. + active_best_quality = ((layer_depth - 1) * cq_level + + active_best_quality + layer_depth / 2) / + layer_depth; + } + } + + q = active_best_quality; + *top_index = active_worst_quality; + *bottom_index = active_best_quality; + return q; +} + static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, - int *top_index) { + int *top_index, int gf_group_index) { const VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; @@ -1067,56 +1425,20 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, int active_worst_quality = cpi->twopass.active_worst_quality; int q; int *inter_minq; + int arf_active_best_quality_hl; + int *arfgf_high_motion_minq, *arfgf_low_motion_minq; + const int boost_frame = + !rc->is_src_frame_alt_ref && + (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame); + ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq); - if (frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi)) { - // Handle the special case for key frames forced when we have reached - // the maximum key frame interval. Here force the Q to a range - // based on the ambient Q to reduce the risk of popping. - if (rc->this_key_frame_forced) { - double last_boosted_q; - int delta_qindex; - int qindex; + if (oxcf->rc_mode == VPX_Q) + return rc_constant_q(cpi, bottom_index, top_index, gf_group_index); - if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) { - qindex = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex); - active_best_quality = qindex; - last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); - delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, - last_boosted_q * 1.25, cm->bit_depth); - active_worst_quality = - VPXMIN(qindex + delta_qindex, active_worst_quality); - } else { - qindex = rc->last_boosted_qindex; - last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); - delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, - last_boosted_q * 0.75, cm->bit_depth); - active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); - } - } else { - // Not forced keyframe. - double q_adj_factor = 1.0; - double q_val; - // Baseline value derived from cpi->active_worst_quality and kf boost. - active_best_quality = - get_kf_active_quality(rc, active_worst_quality, cm->bit_depth); - - // Allow somewhat lower kf minq with small image formats. - if ((cm->width * cm->height) <= (352 * 288)) { - q_adj_factor -= 0.25; - } - - // Make a further adjustment based on the kf zero motion measure. - q_adj_factor += 0.05 - (0.001 * (double)cpi->twopass.kf_zeromotion_pct); - - // Convert the adjustment factor to a qindex delta - // on active_best_quality. - q_val = vp9_convert_qindex_to_q(active_best_quality, cm->bit_depth); - active_best_quality += - vp9_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth); - } - } else if (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { + if (frame_is_intra_only(cm)) { + pick_kf_q_bound_two_pass(cpi, &active_best_quality, &active_worst_quality); + } else if (boost_frame) { // Use the lower of active_worst_quality and recent // average Q as basis for GF/ARF best Q limit unless last frame was // a key frame. @@ -1129,63 +1451,78 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, // For constrained quality dont allow Q less than the cq level if (oxcf->rc_mode == VPX_CQ) { if (q < cq_level) q = cq_level; + } + active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth); + arf_active_best_quality_hl = active_best_quality; - active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth); + if (rc->arf_increase_active_best_quality == 1) { + ASSIGN_MINQ_TABLE(cm->bit_depth, arfgf_high_motion_minq); + arf_active_best_quality_hl = arfgf_high_motion_minq[q]; + } else if (rc->arf_increase_active_best_quality == -1) { + ASSIGN_MINQ_TABLE(cm->bit_depth, arfgf_low_motion_minq); + arf_active_best_quality_hl = arfgf_low_motion_minq[q]; + } + active_best_quality = + (int)((double)active_best_quality * + rc->arf_active_best_quality_adjustment_factor + + (double)arf_active_best_quality_hl * + (1.0 - rc->arf_active_best_quality_adjustment_factor)); - // Constrained quality use slightly lower active best. - active_best_quality = active_best_quality * 15 / 16; - - } else if (oxcf->rc_mode == VPX_Q) { - if (!cpi->refresh_alt_ref_frame) { - active_best_quality = cq_level; - } else { - active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth); - - // Modify best quality for second level arfs. For mode VPX_Q this - // becomes the baseline frame q. - if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW) - active_best_quality = (active_best_quality + cq_level + 1) / 2; - } - } else { - active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth); + // Modify best quality for second level arfs. For mode VPX_Q this + // becomes the baseline frame q. + if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) { + const int layer_depth = gf_group->layer_depth[gf_group_index]; + // linearly fit the frame q depending on the layer depth index from + // the base layer ARF. + active_best_quality = + ((layer_depth - 1) * q + active_best_quality + layer_depth / 2) / + layer_depth; } } else { - if (oxcf->rc_mode == VPX_Q) { - active_best_quality = cq_level; - } else { - active_best_quality = inter_minq[active_worst_quality]; + active_best_quality = inter_minq[active_worst_quality]; - // For the constrained quality mode we don't want - // q to fall below the cq level. - if ((oxcf->rc_mode == VPX_CQ) && (active_best_quality < cq_level)) { - active_best_quality = cq_level; - } + // For the constrained quality mode we don't want + // q to fall below the cq level. + if ((oxcf->rc_mode == VPX_CQ) && (active_best_quality < cq_level)) { + active_best_quality = cq_level; } } // Extension to max or min Q if undershoot or overshoot is outside // the permitted range. - if (cpi->oxcf.rc_mode != VPX_Q) { - if (frame_is_intra_only(cm) || - (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { - active_best_quality -= - (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast); - active_worst_quality += (cpi->twopass.extend_maxq / 2); - } else { - active_best_quality -= - (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2; - active_worst_quality += cpi->twopass.extend_maxq; + if (frame_is_intra_only(cm) || boost_frame) { + const int layer_depth = gf_group->layer_depth[gf_group_index]; + active_best_quality -= + (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast); + active_worst_quality += (cpi->twopass.extend_maxq / 2); + + if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) { + assert(layer_depth > 1); + active_best_quality = + VPXMAX(active_best_quality, + cpi->twopass.last_qindex_of_arf_layer[layer_depth - 1]); } + } else { + const int max_layer_depth = gf_group->max_layer_depth; + assert(max_layer_depth > 0); + + active_best_quality -= + (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2; + active_worst_quality += cpi->twopass.extend_maxq; + + // For normal frames do not allow an active minq lower than the q used for + // the last boosted frame. + active_best_quality = + VPXMAX(active_best_quality, + cpi->twopass.last_qindex_of_arf_layer[max_layer_depth - 1]); } #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY vpx_clear_system_state(); // Static forced key frames Q restrictions dealt with elsewhere. - if (!((frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi))) || - !rc->this_key_frame_forced || - (cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH)) { - int qdelta = vp9_frame_type_qdelta(cpi, gf_group->rf_level[gf_group->index], + if (!frame_is_intra_only(cm) || !rc->this_key_frame_forced || + cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH) { + int qdelta = vp9_frame_type_qdelta(cpi, gf_group->rf_level[gf_group_index], active_worst_quality); active_worst_quality = VPXMAX(active_worst_quality + qdelta, active_best_quality); @@ -1205,17 +1542,15 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, active_worst_quality = clamp(active_worst_quality, active_best_quality, rc->worst_quality); - if (oxcf->rc_mode == VPX_Q) { - q = active_best_quality; - // Special case code to try and match quality with forced key frames. - } else if ((frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi)) && - rc->this_key_frame_forced) { + if (frame_is_intra_only(cm) && rc->this_key_frame_forced) { // If static since last kf use better of last boosted and last kf q. if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) { q = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex); } else { q = rc->last_boosted_qindex; } + } else if (frame_is_intra_only(cm) && !rc->this_key_frame_forced) { + q = active_best_quality; } else { q = vp9_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality, active_worst_quality); @@ -1242,13 +1577,15 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, int *bottom_index, int *top_index) { int q; + const int gf_group_index = cpi->twopass.gf_group.index; if (cpi->oxcf.pass == 0) { if (cpi->oxcf.rc_mode == VPX_CBR) q = rc_pick_q_and_bounds_one_pass_cbr(cpi, bottom_index, top_index); else q = rc_pick_q_and_bounds_one_pass_vbr(cpi, bottom_index, top_index); } else { - q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index); + q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index, + gf_group_index); } if (cpi->sf.use_nonrd_pick_mode) { if (cpi->sf.force_frame_boost == 1) q -= cpi->sf.max_delta_qindex; @@ -1261,6 +1598,89 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, int *bottom_index, return q; } +void vp9_configure_buffer_updates(VP9_COMP *cpi, int gf_group_index) { + VP9_COMMON *cm = &cpi->common; + TWO_PASS *const twopass = &cpi->twopass; + + cpi->rc.is_src_frame_alt_ref = 0; + cm->show_existing_frame = 0; + cpi->rc.show_arf_as_gld = 0; + switch (twopass->gf_group.update_type[gf_group_index]) { + case KF_UPDATE: + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 1; + break; + case LF_UPDATE: + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 0; + cpi->refresh_alt_ref_frame = 0; + break; + case GF_UPDATE: + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 0; + break; + case OVERLAY_UPDATE: + cpi->refresh_last_frame = 0; + cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 0; + cpi->rc.is_src_frame_alt_ref = 1; + if (cpi->rc.preserve_arf_as_gld) { + cpi->rc.show_arf_as_gld = 1; + cpi->refresh_golden_frame = 0; + cm->show_existing_frame = 1; + cm->refresh_frame_context = 0; + } + break; + case MID_OVERLAY_UPDATE: + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 0; + cpi->refresh_alt_ref_frame = 0; + cpi->rc.is_src_frame_alt_ref = 1; + break; + case USE_BUF_FRAME: + cpi->refresh_last_frame = 0; + cpi->refresh_golden_frame = 0; + cpi->refresh_alt_ref_frame = 0; + cpi->rc.is_src_frame_alt_ref = 1; + cm->show_existing_frame = 1; + cm->refresh_frame_context = 0; + break; + default: + assert(twopass->gf_group.update_type[gf_group_index] == ARF_UPDATE); + cpi->refresh_last_frame = 0; + cpi->refresh_golden_frame = 0; + cpi->refresh_alt_ref_frame = 1; + break; + } +} + +void vp9_estimate_qp_gop(VP9_COMP *cpi) { + int gop_length = cpi->twopass.gf_group.gf_group_size; + int bottom_index, top_index; + int idx; + const int gf_index = cpi->twopass.gf_group.index; + const int is_src_frame_alt_ref = cpi->rc.is_src_frame_alt_ref; + const int refresh_frame_context = cpi->common.refresh_frame_context; + + for (idx = 1; idx <= gop_length; ++idx) { + TplDepFrame *tpl_frame = &cpi->tpl_stats[idx]; + int target_rate = cpi->twopass.gf_group.bit_allocation[idx]; + cpi->twopass.gf_group.index = idx; + vp9_rc_set_frame_target(cpi, target_rate); + vp9_configure_buffer_updates(cpi, idx); + tpl_frame->base_qindex = + rc_pick_q_and_bounds_two_pass(cpi, &bottom_index, &top_index, idx); + tpl_frame->base_qindex = VPXMAX(tpl_frame->base_qindex, 1); + } + // Reset the actual index and frame update + cpi->twopass.gf_group.index = gf_index; + cpi->rc.is_src_frame_alt_ref = is_src_frame_alt_ref; + cpi->common.refresh_frame_context = refresh_frame_context; + vp9_configure_buffer_updates(cpi, gf_index); +} + void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, int frame_target, int *frame_under_shoot_limit, int *frame_over_shoot_limit) { @@ -1333,6 +1753,15 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--; rc->frames_since_golden++; + + if (rc->show_arf_as_gld) { + rc->frames_since_golden = 0; + // If we are not using alt ref in the up and coming group clear the arf + // active flag. In multi arf group case, if the index is not 0 then + // we are overlaying a mid group arf so should not reset the flag. + if (!rc->source_alt_ref_pending && (cpi->twopass.gf_group.index == 0)) + rc->source_alt_ref_active = 0; + } } } @@ -1367,7 +1796,8 @@ static void compute_frame_low_motion(VP9_COMP *const cpi) { int cnt_zeromv = 0; for (mi_row = 0; mi_row < rows; mi_row++) { for (mi_col = 0; mi_col < cols; mi_col++) { - if (abs(mi[0]->mv[0].as_mv.row) < 16 && abs(mi[0]->mv[0].as_mv.col) < 16) + if (mi[0]->ref_frame[0] == LAST_FRAME && + abs(mi[0]->mv[0].as_mv.row) < 16 && abs(mi[0]->mv[0].as_mv.col) < 16) cnt_zeromv++; mi++; } @@ -1381,7 +1811,11 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { const VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; + SVC *const svc = &cpi->svc; const int qindex = cm->base_qindex; + const GF_GROUP *gf_group = &cpi->twopass.gf_group; + const int gf_group_index = cpi->twopass.gf_group.index; + const int layer_depth = gf_group->layer_depth[gf_group_index]; // Update rate control heuristics rc->projected_frame_size = (int)(bytes_used << 3); @@ -1390,7 +1824,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { vp9_rc_update_rate_correction_factors(cpi); // Keep a record of last Q and ambient average Q. - if (cm->frame_type == KEY_FRAME) { + if (frame_is_intra_only(cm)) { rc->last_q[KEY_FRAME] = qindex; rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2); @@ -1423,6 +1857,8 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { } } + if (cpi->use_svc) vp9_svc_adjust_avg_frame_qindex(cpi); + // Keep record of last boosted (KF/KF/ARF) Q value. // If the current frame is coded at a lower Q then we also update it. // If all mbs in this group are skipped only update if the Q value is @@ -1434,13 +1870,22 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) { rc->last_boosted_qindex = qindex; } - if (cm->frame_type == KEY_FRAME) rc->last_kf_qindex = qindex; - update_buffer_level(cpi, rc->projected_frame_size); + if ((qindex < cpi->twopass.last_qindex_of_arf_layer[layer_depth]) || + (cm->frame_type == KEY_FRAME) || + (!rc->constrained_gf_group && + (cpi->refresh_alt_ref_frame || + (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) { + cpi->twopass.last_qindex_of_arf_layer[layer_depth] = qindex; + } + + if (frame_is_intra_only(cm)) rc->last_kf_qindex = qindex; + + update_buffer_level_postencode(cpi, rc->projected_frame_size); // Rolling monitors of whether we are over or underspending used to help // regulate min and Max Q in two pass. - if (cm->frame_type != KEY_FRAME) { + if (!frame_is_intra_only(cm)) { rc->rolling_target_bits = ROUND_POWER_OF_TWO( rc->rolling_target_bits * 3 + rc->this_frame_target, 2); rc->rolling_actual_bits = ROUND_POWER_OF_TWO( @@ -1457,9 +1902,9 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits; - if (!cpi->use_svc || is_two_pass_svc(cpi)) { + if (!cpi->use_svc) { if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame && - (cm->frame_type != KEY_FRAME)) + (!frame_is_intra_only(cm))) // Update the alternate reference frame stats as appropriate. update_alt_ref_frame_stats(cpi); else @@ -1467,7 +1912,28 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { update_golden_frame_stats(cpi); } - if (cm->frame_type == KEY_FRAME) rc->frames_since_key = 0; + // If second (long term) temporal reference is used for SVC, + // update the golden frame counter, only for base temporal layer. + if (cpi->use_svc && svc->use_gf_temporal_ref_current_layer && + svc->temporal_layer_id == 0) { + int i = 0; + if (cpi->refresh_golden_frame) + rc->frames_since_golden = 0; + else + rc->frames_since_golden++; + // Decrement count down till next gf + if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--; + // Update the frames_since_golden for all upper temporal layers. + for (i = 1; i < svc->number_temporal_layers; ++i) { + const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, + svc->number_temporal_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + lrc->frames_since_golden = rc->frames_since_golden; + } + } + + if (frame_is_intra_only(cm)) rc->frames_since_key = 0; if (cm->show_frame) { rc->frames_since_key++; rc->frames_to_key--; @@ -1481,24 +1947,53 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { } if (oxcf->pass == 0) { - if (cm->frame_type != KEY_FRAME) { + if (!frame_is_intra_only(cm) && + (!cpi->use_svc || + (cpi->use_svc && + !svc->layer_context[svc->temporal_layer_id].is_key_frame && + svc->spatial_layer_id == svc->number_spatial_layers - 1))) { compute_frame_low_motion(cpi); if (cpi->sf.use_altref_onepass) update_altref_usage(cpi); } + // For SVC: set avg_frame_low_motion (only computed on top spatial layer) + // to all lower spatial layers. + if (cpi->use_svc && + svc->spatial_layer_id == svc->number_spatial_layers - 1) { + int i; + for (i = 0; i < svc->number_spatial_layers - 1; ++i) { + const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id, + svc->number_temporal_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + lrc->avg_frame_low_motion = rc->avg_frame_low_motion; + } + } cpi->rc.last_frame_is_src_altref = cpi->rc.is_src_frame_alt_ref; } - if (cm->frame_type != KEY_FRAME) rc->reset_high_source_sad = 0; + if (!frame_is_intra_only(cm)) rc->reset_high_source_sad = 0; rc->last_avg_frame_bandwidth = rc->avg_frame_bandwidth; + if (cpi->use_svc && svc->spatial_layer_id < svc->number_spatial_layers - 1) + svc->lower_layer_qindex = cm->base_qindex; } void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) { - // Update buffer level with zero size, update frame counters, and return. - update_buffer_level(cpi, 0); + cpi->common.current_video_frame++; cpi->rc.frames_since_key++; cpi->rc.frames_to_key--; cpi->rc.rc_2_frame = 0; cpi->rc.rc_1_frame = 0; + cpi->rc.last_avg_frame_bandwidth = cpi->rc.avg_frame_bandwidth; + // For SVC on dropped frame when framedrop_mode != LAYER_DROP: + // in this mode the whole superframe may be dropped if only a single layer + // has buffer underflow (below threshold). Since this can then lead to + // increasing buffer levels/overflow for certain layers even though whole + // superframe is dropped, we cap buffer level if its already stable. + if (cpi->use_svc && cpi->svc.framedrop_mode != LAYER_DROP && + cpi->rc.buffer_level > cpi->rc.optimal_buffer_level) { + cpi->rc.buffer_level = cpi->rc.optimal_buffer_level; + cpi->rc.bits_off_target = cpi->rc.optimal_buffer_level; + } } static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { @@ -1544,10 +2039,9 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int target; - // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic. if (!cpi->refresh_alt_ref_frame && (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) || - rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) { + rc->frames_to_key == 0)) { cm->frame_type = KEY_FRAME; rc->this_key_frame_forced = cm->current_video_frame != 0 && rc->frames_to_key == 0; @@ -1582,9 +2076,8 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { // Adjust boost and af_ratio based on avg_frame_low_motion, which varies // between 0 and 100 (stationary, 100% zero/small motion). rc->gfu_boost = - VPXMAX(500, - DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) / - (rc->avg_frame_low_motion + 100)); + VPXMAX(500, DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) / + (rc->avg_frame_low_motion + 100)); rc->af_ratio_onepass_vbr = VPXMIN(15, VPXMAX(5, 3 * rc->gfu_boost / 400)); } adjust_gfint_frame_constraint(cpi, rc->frames_to_key); @@ -1684,30 +2177,80 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { return vp9_rc_clamp_iframe_target_size(cpi, target); } +static void set_intra_only_frame(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + SVC *const svc = &cpi->svc; + // Don't allow intra_only frame for bypass/flexible SVC mode, or if number + // of spatial layers is 1 or if number of spatial or temporal layers > 3. + // Also if intra-only is inserted on very first frame, don't allow if + // if number of temporal layers > 1. This is because on intra-only frame + // only 3 reference buffers can be updated, but for temporal layers > 1 + // we generally need to use buffer slots 4 and 5. + if ((cm->current_video_frame == 0 && svc->number_temporal_layers > 1) || + svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS || + svc->number_spatial_layers > 3 || svc->number_temporal_layers > 3 || + svc->number_spatial_layers == 1) + return; + cm->show_frame = 0; + cm->intra_only = 1; + cm->frame_type = INTER_FRAME; + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_last_frame = 1; + cpi->ext_refresh_golden_frame = 1; + cpi->ext_refresh_alt_ref_frame = 1; + if (cm->current_video_frame == 0) { + cpi->lst_fb_idx = 0; + cpi->gld_fb_idx = 1; + cpi->alt_fb_idx = 2; + } else { + int i; + int count = 0; + cpi->lst_fb_idx = -1; + cpi->gld_fb_idx = -1; + cpi->alt_fb_idx = -1; + // For intra-only frame we need to refresh all slots that were + // being used for the base layer (fb_idx_base[i] == 1). + // Start with assigning last first, then golden and then alt. + for (i = 0; i < REF_FRAMES; ++i) { + if (svc->fb_idx_base[i] == 1) count++; + if (count == 1 && cpi->lst_fb_idx == -1) cpi->lst_fb_idx = i; + if (count == 2 && cpi->gld_fb_idx == -1) cpi->gld_fb_idx = i; + if (count == 3 && cpi->alt_fb_idx == -1) cpi->alt_fb_idx = i; + } + // If golden or alt is not being used for base layer, then set them + // to the lst_fb_idx. + if (cpi->gld_fb_idx == -1) cpi->gld_fb_idx = cpi->lst_fb_idx; + if (cpi->alt_fb_idx == -1) cpi->alt_fb_idx = cpi->lst_fb_idx; + } +} + void vp9_rc_get_svc_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; + SVC *const svc = &cpi->svc; int target = rc->avg_frame_bandwidth; - int layer = - LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, cpi->svc.temporal_layer_id, - cpi->svc.number_temporal_layers); + int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, + svc->number_temporal_layers); + if (svc->first_spatial_layer_to_encode) + svc->layer_context[svc->temporal_layer_id].is_key_frame = 0; // Periodic key frames is based on the super-frame counter // (svc.current_superframe), also only base spatial layer is key frame. - if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) || + // Key frame is set for any of the following: very first frame, frame flags + // indicates key, superframe counter hits key frequencey, or (non-intra) sync + // flag is set for spatial layer 0. + if ((cm->current_video_frame == 0 && !svc->previous_frame_is_intra_only) || + (cpi->frame_flags & FRAMEFLAGS_KEY) || (cpi->oxcf.auto_key && - (cpi->svc.current_superframe % cpi->oxcf.key_freq == 0) && - cpi->svc.spatial_layer_id == 0)) { + (svc->current_superframe % cpi->oxcf.key_freq == 0) && + !svc->previous_frame_is_intra_only && svc->spatial_layer_id == 0) || + (svc->spatial_layer_sync[0] == 1 && svc->spatial_layer_id == 0)) { cm->frame_type = KEY_FRAME; rc->source_alt_ref_active = 0; - if (is_two_pass_svc(cpi)) { - cpi->svc.layer_context[layer].is_key_frame = 1; - cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); - } else if (is_one_pass_cbr_svc(cpi)) { - if (cm->current_video_frame > 0) vp9_svc_reset_key_frame(cpi); - layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, - cpi->svc.temporal_layer_id, - cpi->svc.number_temporal_layers); - cpi->svc.layer_context[layer].is_key_frame = 1; + if (is_one_pass_cbr_svc(cpi)) { + if (cm->current_video_frame > 0) vp9_svc_reset_temporal_layers(cpi, 1); + layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, + svc->number_temporal_layers); + svc->layer_context[layer].is_key_frame = 1; cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); // Assumption here is that LAST_FRAME is being updated for a keyframe. // Thus no change in update flags. @@ -1715,48 +2258,127 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { } } else { cm->frame_type = INTER_FRAME; - if (is_two_pass_svc(cpi)) { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; - if (cpi->svc.spatial_layer_id == 0) { - lc->is_key_frame = 0; - } else { - lc->is_key_frame = - cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame; - if (lc->is_key_frame) cpi->ref_frame_flags &= (~VP9_LAST_FLAG); - } - cpi->ref_frame_flags &= (~VP9_ALT_FLAG); - } else if (is_one_pass_cbr_svc(cpi)) { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; - if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode) { - lc->is_key_frame = 0; - } else { - lc->is_key_frame = - cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame; - } + if (is_one_pass_cbr_svc(cpi)) { + LAYER_CONTEXT *lc = &svc->layer_context[layer]; + // Add condition current_video_frame > 0 for the case where first frame + // is intra only followed by overlay/copy frame. In this case we don't + // want to reset is_key_frame to 0 on overlay/copy frame. + lc->is_key_frame = + (svc->spatial_layer_id == 0 && cm->current_video_frame > 0) + ? 0 + : svc->layer_context[svc->temporal_layer_id].is_key_frame; target = calc_pframe_target_size_one_pass_cbr(cpi); } } + if (svc->simulcast_mode) { + if (svc->spatial_layer_id > 0 && + svc->layer_context[layer].is_key_frame == 1) { + cm->frame_type = KEY_FRAME; + cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); + target = calc_iframe_target_size_one_pass_cbr(cpi); + } + // Set the buffer idx and refresh flags for key frames in simulcast mode. + // Note the buffer slot for long-term reference is set below (line 2255), + // and alt_ref is used for that on key frame. So use last and golden for + // the other two normal slots. + if (cm->frame_type == KEY_FRAME) { + if (svc->number_spatial_layers == 2) { + if (svc->spatial_layer_id == 0) { + cpi->lst_fb_idx = 0; + cpi->gld_fb_idx = 2; + cpi->alt_fb_idx = 6; + } else if (svc->spatial_layer_id == 1) { + cpi->lst_fb_idx = 1; + cpi->gld_fb_idx = 3; + cpi->alt_fb_idx = 6; + } + } else if (svc->number_spatial_layers == 3) { + if (svc->spatial_layer_id == 0) { + cpi->lst_fb_idx = 0; + cpi->gld_fb_idx = 3; + cpi->alt_fb_idx = 6; + } else if (svc->spatial_layer_id == 1) { + cpi->lst_fb_idx = 1; + cpi->gld_fb_idx = 4; + cpi->alt_fb_idx = 6; + } else if (svc->spatial_layer_id == 2) { + cpi->lst_fb_idx = 2; + cpi->gld_fb_idx = 5; + cpi->alt_fb_idx = 7; + } + } + cpi->ext_refresh_last_frame = 1; + cpi->ext_refresh_golden_frame = 1; + cpi->ext_refresh_alt_ref_frame = 1; + } + } + + // Check if superframe contains a sync layer request. + vp9_svc_check_spatial_layer_sync(cpi); + + // If long term termporal feature is enabled, set the period of the update. + // The update/refresh of this reference frame is always on base temporal + // layer frame. + if (svc->use_gf_temporal_ref_current_layer) { + // Only use gf long-term prediction on non-key superframes. + if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) { + // Use golden for this reference, which will be used for prediction. + int index = svc->spatial_layer_id; + if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1; + assert(index >= 0); + cpi->gld_fb_idx = svc->buffer_gf_temporal_ref[index].idx; + // Enable prediction off LAST (last reference) and golden (which will + // generally be further behind/long-term reference). + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + // Check for update/refresh of reference: only refresh on base temporal + // layer. + if (svc->temporal_layer_id == 0) { + if (svc->layer_context[svc->temporal_layer_id].is_key_frame) { + // On key frame we update the buffer index used for long term reference. + // Use the alt_ref since it is not used or updated on key frames. + int index = svc->spatial_layer_id; + if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1; + assert(index >= 0); + cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx; + cpi->ext_refresh_alt_ref_frame = 1; + } else if (rc->frames_till_gf_update_due == 0) { + // Set perdiod of next update. Make it a multiple of 10, as the cyclic + // refresh is typically ~10%, and we'd like the update to happen after + // a few cylces of the refresh (so it better quality frame). Note the + // cyclic refresh for SVC only operates on base temporal layer frames. + // Choose 20 as perdiod for now (2 cycles). + rc->baseline_gf_interval = 20; + rc->frames_till_gf_update_due = rc->baseline_gf_interval; + cpi->ext_refresh_golden_frame = 1; + rc->gfu_boost = DEFAULT_GF_BOOST; + } + } + } else if (!svc->use_gf_temporal_ref) { + rc->frames_till_gf_update_due = INT_MAX; + rc->baseline_gf_interval = INT_MAX; + } + if (svc->set_intra_only_frame) { + set_intra_only_frame(cpi); + target = calc_iframe_target_size_one_pass_cbr(cpi); + } // Any update/change of global cyclic refresh parameters (amount/delta-qp) // should be done here, before the frame qp is selected. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_update_parameters(cpi); vp9_rc_set_frame_target(cpi, target); - rc->frames_till_gf_update_due = INT_MAX; - rc->baseline_gf_interval = INT_MAX; + if (cm->show_frame) update_buffer_level_svc_preencode(cpi); } void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int target; - // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic. - if ((cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) || - rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) { + if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) || + (cpi->oxcf.auto_key && rc->frames_to_key == 0)) { cm->frame_type = KEY_FRAME; - rc->this_key_frame_forced = - cm->current_video_frame != 0 && rc->frames_to_key == 0; rc->frames_to_key = cpi->oxcf.key_freq; rc->kf_boost = DEFAULT_KF_BOOST; rc->source_alt_ref_active = 0; @@ -1782,12 +2404,15 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_update_parameters(cpi); - if (cm->frame_type == KEY_FRAME) + if (frame_is_intra_only(cm)) target = calc_iframe_target_size_one_pass_cbr(cpi); else target = calc_pframe_target_size_one_pass_cbr(cpi); vp9_rc_set_frame_target(cpi, target); + + if (cm->show_frame) update_buffer_level_preencode(cpi); + if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC) cpi->resize_pending = vp9_resize_one_pass_cbr(cpi); else @@ -1852,20 +2477,26 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi, // Set Maximum gf/arf interval rc->max_gf_interval = oxcf->max_gf_interval; rc->min_gf_interval = oxcf->min_gf_interval; +#if CONFIG_RATE_CTRL + if (rc->min_gf_interval == 0) { + rc->min_gf_interval = vp9_rc_get_default_min_gf_interval( + oxcf->width, oxcf->height, oxcf->init_framerate); + } + if (rc->max_gf_interval == 0) { + rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( + oxcf->init_framerate, rc->min_gf_interval); + } +#else if (rc->min_gf_interval == 0) rc->min_gf_interval = vp9_rc_get_default_min_gf_interval( oxcf->width, oxcf->height, cpi->framerate); if (rc->max_gf_interval == 0) rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( cpi->framerate, rc->min_gf_interval); +#endif - // Extended interval for genuinely static scenes - rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2; - - if (is_altref_enabled(cpi)) { - if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) - rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; - } + // Extended max interval for genuinely static scenes like slide shows. + rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH; if (rc->max_gf_interval > rc->static_scene_max_gf_interval) rc->max_gf_interval = rc->static_scene_max_gf_interval; @@ -1909,12 +2540,12 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) { VPXMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS); // A maximum bitrate for a frame is defined. - // The baseline for this aligns with HW implementations that - // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits - // per 16x16 MB (averaged over a frame). However this limit is extended if - // a very high rate is given on the command line or the the rate cannnot - // be acheived because of a user specificed max q (e.g. when the user - // specifies lossless encode. + // However this limit is extended if a very high rate is given on the command + // line or the the rate cannnot be acheived because of a user specificed max q + // (e.g. when the user specifies lossless encode). + // + // If a level is specified that requires a lower maximum rate then the level + // value take precedence. vbr_max_bits = (int)(((int64_t)rc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) / 100); @@ -2271,30 +2902,56 @@ static void adjust_gf_boost_lag_one_pass_vbr(VP9_COMP *cpi, void vp9_scene_detection_onepass(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; + YV12_BUFFER_CONFIG const *unscaled_src = cpi->un_scaled_source; + YV12_BUFFER_CONFIG const *unscaled_last_src = cpi->unscaled_last_source; + uint8_t *src_y; + int src_ystride; + int src_width; + int src_height; + uint8_t *last_src_y; + int last_src_ystride; + int last_src_width; + int last_src_height; + if (cpi->un_scaled_source == NULL || cpi->unscaled_last_source == NULL || + (cpi->use_svc && cpi->svc.current_superframe == 0)) + return; + src_y = unscaled_src->y_buffer; + src_ystride = unscaled_src->y_stride; + src_width = unscaled_src->y_width; + src_height = unscaled_src->y_height; + last_src_y = unscaled_last_src->y_buffer; + last_src_ystride = unscaled_last_src->y_stride; + last_src_width = unscaled_last_src->y_width; + last_src_height = unscaled_last_src->y_height; #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) return; #endif rc->high_source_sad = 0; - if (cpi->Last_Source != NULL && - cpi->Last_Source->y_width == cpi->Source->y_width && - cpi->Last_Source->y_height == cpi->Source->y_height) { + rc->high_num_blocks_with_motion = 0; + // For SVC: scene detection is only checked on first spatial layer of + // the superframe using the original/unscaled resolutions. + if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode && + src_width == last_src_width && src_height == last_src_height) { YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL }; - uint8_t *src_y = cpi->Source->y_buffer; - int src_ystride = cpi->Source->y_stride; - uint8_t *last_src_y = cpi->Last_Source->y_buffer; - int last_src_ystride = cpi->Last_Source->y_stride; + int num_mi_cols = cm->mi_cols; + int num_mi_rows = cm->mi_rows; int start_frame = 0; int frames_to_buffer = 1; int frame = 0; int scene_cut_force_key_frame = 0; + int num_zero_temp_sad = 0; uint64_t avg_sad_current = 0; - uint32_t min_thresh = 4000; + uint32_t min_thresh = 10000; float thresh = 8.0f; uint32_t thresh_key = 140000; if (cpi->oxcf.speed <= 5) thresh_key = 240000; - if (cpi->oxcf.rc_mode == VPX_VBR) { - min_thresh = 65000; - thresh = 2.1f; + if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) min_thresh = 65000; + if (cpi->oxcf.rc_mode == VPX_VBR) thresh = 2.1f; + if (cpi->use_svc && cpi->svc.number_spatial_layers > 1) { + const int aligned_width = ALIGN_POWER_OF_TWO(src_width, MI_SIZE_LOG2); + const int aligned_height = ALIGN_POWER_OF_TWO(src_height, MI_SIZE_LOG2); + num_mi_cols = aligned_width >> MI_SIZE_LOG2; + num_mi_rows = aligned_height >> MI_SIZE_LOG2; } if (cpi->oxcf.lag_in_frames > 0) { frames_to_buffer = (cm->current_video_frame == 1) @@ -2342,14 +2999,15 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { uint64_t avg_sad = 0; uint64_t tmp_sad = 0; int num_samples = 0; - int sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; - int sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; + int sb_cols = (num_mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; + int sb_rows = (num_mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; if (cpi->oxcf.lag_in_frames > 0) { src_y = frames[frame]->y_buffer; src_ystride = frames[frame]->y_stride; last_src_y = frames[frame + 1]->y_buffer; last_src_ystride = frames[frame + 1]->y_stride; } + num_zero_temp_sad = 0; for (sbi_row = 0; sbi_row < sb_rows; ++sbi_row) { for (sbi_col = 0; sbi_col < sb_cols; ++sbi_col) { // Checker-board pattern, ignore boundary. @@ -2361,6 +3019,7 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { last_src_ystride); avg_sad += tmp_sad; num_samples++; + if (tmp_sad == 0) num_zero_temp_sad++; } src_y += 64; last_src_y += 64; @@ -2377,7 +3036,8 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { if (avg_sad > VPXMAX(min_thresh, (unsigned int)(rc->avg_source_sad[0] * thresh)) && - rc->frames_since_key > 1) + rc->frames_since_key > 1 + cpi->svc.number_spatial_layers && + num_zero_temp_sad < 3 * (num_samples >> 2)) rc->high_source_sad = 1; else rc->high_source_sad = 0; @@ -2388,6 +3048,8 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { } else { rc->avg_source_sad[lagframe_idx] = avg_sad; } + if (num_zero_temp_sad < (3 * num_samples >> 2)) + rc->high_num_blocks_with_motion = 1; } } // For CBR non-screen content mode, check if we should reset the rate @@ -2407,6 +3069,19 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { if (cm->frame_type != KEY_FRAME && rc->reset_high_source_sad) rc->this_frame_target = rc->avg_frame_bandwidth; } + // For SVC the new (updated) avg_source_sad[0] for the current superframe + // updates the setting for all layers. + if (cpi->use_svc) { + int sl, tl; + SVC *const svc = &cpi->svc; + for (sl = 0; sl < svc->number_spatial_layers; ++sl) + for (tl = 0; tl < svc->number_temporal_layers; ++tl) { + int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + lrc->avg_source_sad[0] = rc->avg_source_sad[0]; + } + } // For VBR, under scene change/high content change, force golden refresh. if (cpi->oxcf.rc_mode == VPX_VBR && cm->frame_type != KEY_FRAME && rc->high_source_sad && rc->frames_to_key > 3 && @@ -2437,12 +3112,26 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { // Test if encoded frame will significantly overshoot the target bitrate, and // if so, set the QP, reset/adjust some rate control parameters, and return 1. +// frame_size = -1 means frame has not been encoded. int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; - int thresh_qp = 3 * (rc->worst_quality >> 2); - int thresh_rate = rc->avg_frame_bandwidth * 10; - if (cm->base_qindex < thresh_qp && frame_size > thresh_rate) { + SPEED_FEATURES *const sf = &cpi->sf; + int thresh_qp = 7 * (rc->worst_quality >> 3); + int thresh_rate = rc->avg_frame_bandwidth << 3; + // Lower thresh_qp for video (more overshoot at lower Q) to be + // more conservative for video. + if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) + thresh_qp = 3 * (rc->worst_quality >> 2); + // If this decision is not based on an encoded frame size but just on + // scene/slide change detection (i.e., re_encode_overshoot_cbr_rt == + // FAST_DETECTION_MAXQ), for now skip the (frame_size > thresh_rate) + // condition in this case. + // TODO(marpan): Use a better size/rate condition for this case and + // adjust thresholds. + if ((sf->overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ || + frame_size > thresh_rate) && + cm->base_qindex < thresh_qp) { double rate_correction_factor = cpi->rc.rate_correction_factors[INTER_NORMAL]; const int target_size = cpi->rc.avg_frame_bandwidth; @@ -2452,6 +3141,29 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) { int enumerator; // Force a re-encode, and for now use max-QP. *q = cpi->rc.worst_quality; + cpi->cyclic_refresh->counter_encode_maxq_scene_change = 0; + cpi->rc.re_encode_maxq_scene_change = 1; + // If the frame_size is much larger than the threshold (big content change) + // and the encoded frame used alot of Intra modes, then force hybrid_intra + // encoding for the re-encode on this scene change. hybrid_intra will + // use rd-based intra mode selection for small blocks. + if (sf->overshoot_detection_cbr_rt == RE_ENCODE_MAXQ && + frame_size > (thresh_rate << 1) && cpi->svc.spatial_layer_id == 0) { + MODE_INFO **mi = cm->mi_grid_visible; + int sum_intra_usage = 0; + int mi_row, mi_col; + int tot = 0; + for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { + if (mi[0]->ref_frame[0] == INTRA_FRAME) sum_intra_usage++; + tot++; + mi++; + } + mi += 8; + } + sum_intra_usage = 100 * sum_intra_usage / (cm->mi_rows * cm->mi_cols); + if (sum_intra_usage > 60) cpi->rc.hybrid_intra_scene_change = 1; + } // Adjust avg_frame_qindex, buffer_level, and rate correction factors, as // these parameters will affect QP selection for subsequent frames. If they // have settled down to a very different (low QP) state, then not adjusting @@ -2479,21 +3191,27 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) { cpi->rc.rate_correction_factors[INTER_NORMAL] = rate_correction_factor; } // For temporal layers, reset the rate control parametes across all - // temporal layers. + // temporal layers. If the first_spatial_layer_to_encode > 0, then this + // superframe has skipped lower base layers. So in this case we should also + // reset and force max-q for spatial layers < first_spatial_layer_to_encode. if (cpi->use_svc) { - int i = 0; + int tl = 0; + int sl = 0; SVC *svc = &cpi->svc; - for (i = 0; i < svc->number_temporal_layers; ++i) { - const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, - svc->number_temporal_layers); - LAYER_CONTEXT *lc = &svc->layer_context[layer]; - RATE_CONTROL *lrc = &lc->rc; - lrc->avg_frame_qindex[INTER_FRAME] = *q; - lrc->buffer_level = rc->optimal_buffer_level; - lrc->bits_off_target = rc->optimal_buffer_level; - lrc->rc_1_frame = 0; - lrc->rc_2_frame = 0; - lrc->rate_correction_factors[INTER_NORMAL] = rate_correction_factor; + for (sl = 0; sl < svc->first_spatial_layer_to_encode; ++sl) { + for (tl = 0; tl < svc->number_temporal_layers; ++tl) { + const int layer = + LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers); + LAYER_CONTEXT *lc = &svc->layer_context[layer]; + RATE_CONTROL *lrc = &lc->rc; + lrc->avg_frame_qindex[INTER_FRAME] = *q; + lrc->buffer_level = lrc->optimal_buffer_level; + lrc->bits_off_target = lrc->optimal_buffer_level; + lrc->rc_1_frame = 0; + lrc->rc_2_frame = 0; + lrc->rate_correction_factors[INTER_NORMAL] = rate_correction_factor; + lrc->force_max_q = 1; + } } } return 1; diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_ratectrl.h b/media/libvpx/libvpx/vp9/encoder/vp9_ratectrl.h index c1b210677e20..7dbe17dc56d9 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_ratectrl.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_ratectrl.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_RATECTRL_H_ -#define VP9_ENCODER_VP9_RATECTRL_H_ +#ifndef VPX_VP9_ENCODER_VP9_RATECTRL_H_ +#define VPX_VP9_ENCODER_VP9_RATECTRL_H_ #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" @@ -34,6 +34,14 @@ extern "C" { #define FRAME_OVERHEAD_BITS 200 +// Threshold used to define a KF group as static (e.g. a slide show). +// Essentially this means that no frame in the group has more than 1% of MBs +// that are not marked as coded with 0,0 motion in the first pass. +#define STATIC_KF_GROUP_THRESH 99 + +// The maximum duration of a GF group that is static (for example a slide show). +#define MAX_STATIC_GF_GROUP_LENGTH 250 + typedef enum { INTER_NORMAL = 0, INTER_HIGH = 1, @@ -167,15 +175,34 @@ typedef struct { uint64_t avg_source_sad[MAX_LAG_BUFFERS]; uint64_t prev_avg_source_sad_lag; int high_source_sad_lagindex; + int high_num_blocks_with_motion; int alt_ref_gf_group; int last_frame_is_src_altref; int high_source_sad; int count_last_scene_change; + int hybrid_intra_scene_change; + int re_encode_maxq_scene_change; int avg_frame_low_motion; int af_ratio_onepass_vbr; int force_qpmin; int reset_high_source_sad; double perc_arf_usage; + int force_max_q; + // Last frame was dropped post encode on scene change. + int last_post_encode_dropped_scene_change; + // Enable post encode frame dropping for screen content. Only enabled when + // ext_use_post_encode_drop is enabled by user. + int use_post_encode_drop; + // External flag to enable post encode frame dropping, controlled by user. + int ext_use_post_encode_drop; + + int damped_adjustment[RATE_FACTOR_LEVELS]; + double arf_active_best_quality_adjustment_factor; + int arf_increase_active_best_quality; + + int preserve_arf_as_gld; + int preserve_next_arf_as_gld; + int show_arf_as_gld; } RATE_CONTROL; struct VP9_COMP; @@ -184,7 +211,7 @@ struct VP9EncoderConfig; void vp9_rc_init(const struct VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc); -int vp9_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs, +int vp9_estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs, double correction_factor, vpx_bit_depth_t bit_depth); double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth); @@ -195,9 +222,9 @@ void vp9_rc_init_minq_luts(void); int vp9_rc_get_default_min_gf_interval(int width, int height, double framerate); // Note vp9_rc_get_default_max_gf_interval() requires the min_gf_interval to -// be passed in to ensure that the max_gf_interval returned is at least as bis +// be passed in to ensure that the max_gf_interval returned is at least as big // as that. -int vp9_rc_get_default_max_gf_interval(double framerate, int min_frame_rate); +int vp9_rc_get_default_max_gf_interval(double framerate, int min_gf_interval); // Generally at the high level, the following flow is expected // to be enforced for rate control: @@ -237,13 +264,18 @@ void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi); // Changes only the rate correction factors in the rate control structure. void vp9_rc_update_rate_correction_factors(struct VP9_COMP *cpi); +// Post encode drop for CBR mode. +int post_encode_drop_cbr(struct VP9_COMP *cpi, size_t *size); + +int vp9_test_drop(struct VP9_COMP *cpi); + // Decide if we should drop this frame: For 1-pass CBR. // Changes only the decimation count in the rate control structure int vp9_rc_drop_frame(struct VP9_COMP *cpi); // Computes frame size bounds. void vp9_rc_compute_frame_size_bounds(const struct VP9_COMP *cpi, - int this_frame_target, + int frame_target, int *frame_under_shoot_limit, int *frame_over_shoot_limit); @@ -294,8 +326,12 @@ void vp9_scene_detection_onepass(struct VP9_COMP *cpi); int vp9_encodedframe_overshoot(struct VP9_COMP *cpi, int frame_size, int *q); +void vp9_configure_buffer_updates(struct VP9_COMP *cpi, int gf_group_index); + +void vp9_estimate_qp_gop(struct VP9_COMP *cpi); + #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_ENCODER_VP9_RATECTRL_H_ +#endif // VPX_VP9_ENCODER_VP9_RATECTRL_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_rd.c b/media/libvpx/libvpx/vp9/encoder/vp9_rd.c index 6b2306ce9b0c..34c74424ce0e 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_rd.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_rd.c @@ -57,6 +57,30 @@ void vp9_rd_cost_init(RD_COST *rd_cost) { rd_cost->rdcost = 0; } +int64_t vp9_calculate_rd_cost(int mult, int div, int rate, int64_t dist) { + assert(mult >= 0); + assert(div > 0); + if (rate >= 0 && dist >= 0) { + return RDCOST(mult, div, rate, dist); + } + if (rate >= 0 && dist < 0) { + return RDCOST_NEG_D(mult, div, rate, -dist); + } + if (rate < 0 && dist >= 0) { + return RDCOST_NEG_R(mult, div, -rate, dist); + } + return -RDCOST(mult, div, -rate, -dist); +} + +void vp9_rd_cost_update(int mult, int div, RD_COST *rd_cost) { + if (rd_cost->rate < INT_MAX && rd_cost->dist < INT64_MAX) { + rd_cost->rdcost = + vp9_calculate_rd_cost(mult, div, rd_cost->rate, rd_cost->dist); + } else { + vp9_rd_cost_reset(rd_cost); + } +} + // The baseline rd thresholds for breaking out of the rd loop for // certain modes are assumed to be based on 8x8 blocks. // This table is used to correct for block size. @@ -69,10 +93,12 @@ static void fill_mode_costs(VP9_COMP *cpi) { const FRAME_CONTEXT *const fc = cpi->common.fc; int i, j; - for (i = 0; i < INTRA_MODES; ++i) - for (j = 0; j < INTRA_MODES; ++j) + for (i = 0; i < INTRA_MODES; ++i) { + for (j = 0; j < INTRA_MODES; ++j) { vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j], vp9_intra_mode_tree); + } + } vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree); for (i = 0; i < INTRA_MODES; ++i) { @@ -82,9 +108,28 @@ static void fill_mode_costs(VP9_COMP *cpi) { fc->uv_mode_prob[i], vp9_intra_mode_tree); } - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) { vp9_cost_tokens(cpi->switchable_interp_costs[i], fc->switchable_interp_prob[i], vp9_switchable_interp_tree); + } + + for (i = TX_8X8; i < TX_SIZES; ++i) { + for (j = 0; j < TX_SIZE_CONTEXTS; ++j) { + const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs); + int k; + for (k = 0; k <= i; ++k) { + int cost = 0; + int m; + for (m = 0; m <= k - (k == i); ++m) { + if (m == k) + cost += vp9_cost_zero(tx_probs[m]); + else + cost += vp9_cost_one(tx_probs[m]); + } + cpi->tx_size_cost[i - 1][j][k] = cost; + } + } + } } static void fill_token_costs(vp9_coeff_cost *c, @@ -143,40 +188,74 @@ void vp9_init_me_luts(void) { static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12, 8, 8, 4, 4, 2, 2, 1, 0 }; -static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128, - 128, 144 }; -int64_t vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) { - const int64_t q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth); -#if CONFIG_VP9_HIGHBITDEPTH - int64_t rdmult = 0; - switch (cpi->common.bit_depth) { - case VPX_BITS_8: rdmult = 88 * q * q / 24; break; - case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break; - case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); break; - default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1; +// Note that the element below for frame type "USE_BUF_FRAME", which indicates +// that the show frame flag is set, should not be used as no real frame +// is encoded so we should not reach here. However, a dummy value +// is inserted here to make sure the data structure has the right number +// of values assigned. +static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128, + 128, 144, 144 }; + +int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) { + // largest dc_quant is 21387, therefore rdmult should always fit in int32_t + const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth); + uint32_t rdmult = q * q; + + if (cpi->common.frame_type != KEY_FRAME) { + if (qindex < 128) + rdmult = rdmult * 4; + else if (qindex < 190) + rdmult = rdmult * 4 + rdmult / 2; + else + rdmult = rdmult * 3; + } else { + if (qindex < 64) + rdmult = rdmult * 4; + else if (qindex <= 128) + rdmult = rdmult * 3 + rdmult / 2; + else if (qindex < 190) + rdmult = rdmult * 4 + rdmult / 2; + else + rdmult = rdmult * 7 + rdmult / 2; + } +#if CONFIG_VP9_HIGHBITDEPTH + switch (cpi->common.bit_depth) { + case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break; + case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break; + default: break; } -#else - int64_t rdmult = 88 * q * q / 24; #endif // CONFIG_VP9_HIGHBITDEPTH - return rdmult; + return rdmult > 0 ? rdmult : 1; } -int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) { - int64_t rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex); - +static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) { + int64_t rdmult_64 = rdmult; if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { const GF_GROUP *const gf_group = &cpi->twopass.gf_group; const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index]; - const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100)); + const int gfu_boost = cpi->multi_layer_arf + ? gf_group->gfu_boost[gf_group->index] + : cpi->rc.gfu_boost; + const int boost_index = VPXMIN(15, (gfu_boost / 100)); - rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7; - rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7); + rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7; + rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7); } - if (rdmult < 1) rdmult = 1; - return (int)rdmult; + return (int)rdmult_64; +} + +int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) { + int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex); + return modulate_rdmult(cpi, rdmult); +} + +int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) { + int rdmult = + vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex); + rdmult = (int)((double)rdmult / beta); + rdmult = rdmult > 0 ? rdmult : 1; + return modulate_rdmult(cpi, rdmult); } static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) { @@ -185,10 +264,10 @@ static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) { switch (bit_depth) { case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break; case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break; - case VPX_BITS_12: q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; break; default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1; + assert(bit_depth == VPX_BITS_12); + q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; + break; } #else (void)bit_depth; @@ -209,12 +288,11 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) { x->sadperbit16 = sad_per_bit16lut_10[qindex]; x->sadperbit4 = sad_per_bit4lut_10[qindex]; break; - case VPX_BITS_12: + default: + assert(cpi->common.bit_depth == VPX_BITS_12); x->sadperbit16 = sad_per_bit16lut_12[qindex]; x->sadperbit4 = sad_per_bit4lut_12[qindex]; break; - default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); } #else (void)cpi; @@ -255,6 +333,15 @@ static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) { } } +void vp9_build_inter_mode_cost(VP9_COMP *cpi) { + const VP9_COMMON *const cm = &cpi->common; + int i; + for (i = 0; i < INTER_MODE_CONTEXTS; ++i) { + vp9_cost_tokens((int *)cpi->inter_mode_cost[i], cm->fc->inter_mode_probs[i], + vp9_inter_mode_tree); + } +} + void vp9_initialize_rd_consts(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->td.mb; @@ -303,10 +390,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { x->nmvjointcost, cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, &cm->fc->nmvc, cm->allow_high_precision_mv); - - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - vp9_cost_tokens((int *)cpi->inter_mode_cost[i], - cm->fc->inter_mode_probs[i], vp9_inter_mode_tree); + vp9_build_inter_mode_cost(cpi); } } } @@ -471,13 +555,13 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, for (i = 0; i < num_4x4_h; i += 4) t_left[i] = !!*(const uint32_t *)&left[i]; break; - case TX_32X32: + default: + assert(tx_size == TX_32X32); for (i = 0; i < num_4x4_w; i += 8) t_above[i] = !!*(const uint64_t *)&above[i]; for (i = 0; i < num_4x4_h; i += 8) t_left[i] = !!*(const uint64_t *)&left[i]; break; - default: assert(0 && "Invalid transform size."); break; } } @@ -493,8 +577,7 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, uint8_t *src_y_ptr = x->plane[0].src.buf; uint8_t *ref_y_ptr; const int num_mv_refs = - MAX_MV_REF_CANDIDATES + - (cpi->sf.adaptive_motion_search && block_size < x->max_partition_size); + MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size); MV pred_mv[3]; pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv; @@ -504,11 +587,12 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int == x->mbmi_ext->ref_mvs[ref_frame][1].as_int; + // Get the sad for each candidate reference mv. for (i = 0; i < num_mv_refs; ++i) { const MV *this_mv = &pred_mv[i]; int fp_row, fp_col; - + if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue; if (i == 1 && near_same_nearest) continue; fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3; fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3; @@ -573,6 +657,7 @@ YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, const VP9_COMMON *const cm = &cpi->common; const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1]; const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame); + assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME); return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ? &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL; diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_rd.h b/media/libvpx/libvpx/vp9/encoder/vp9_rd.h index 59022c106e2b..908989c071e0 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_rd.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_rd.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_RD_H_ -#define VP9_ENCODER_VP9_RD_H_ +#ifndef VPX_VP9_ENCODER_VP9_RD_H_ +#define VPX_VP9_ENCODER_VP9_RD_H_ #include @@ -27,14 +27,17 @@ extern "C" { #define RD_EPB_SHIFT 6 #define RDCOST(RM, DM, R, D) \ - (ROUND_POWER_OF_TWO(((int64_t)R) * (RM), VP9_PROB_COST_SHIFT) + (D << DM)) + ROUND_POWER_OF_TWO(((int64_t)(R)) * (RM), VP9_PROB_COST_SHIFT) + ((D) << (DM)) +#define RDCOST_NEG_R(RM, DM, R, D) \ + ((D) << (DM)) - ROUND_POWER_OF_TWO(((int64_t)(R)) * (RM), VP9_PROB_COST_SHIFT) +#define RDCOST_NEG_D(RM, DM, R, D) \ + ROUND_POWER_OF_TWO(((int64_t)(R)) * (RM), VP9_PROB_COST_SHIFT) - ((D) << (DM)) + #define QIDX_SKIP_THRESH 115 #define MV_COST_WEIGHT 108 #define MV_COST_WEIGHT_SUB 120 -#define INVALID_MV 0x80008000 - #define MAX_MODES 30 #define MAX_REFS 6 @@ -42,6 +45,9 @@ extern "C" { #define RD_THRESH_MAX_FACT 64 #define RD_THRESH_INC 1 +#define VP9_DIST_SCALE_LOG2 4 +#define VP9_DIST_SCALE (1 << VP9_DIST_SCALE_LOG2) + // This enumerator type needs to be kept aligned with the mode order in // const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code. typedef enum { @@ -98,8 +104,8 @@ typedef enum { typedef struct RD_OPT { // Thresh_mult is used to set a threshold for the rd score. A higher value // means that we will accept the best mode so far more often. This number - // is used in combination with the current block size, and thresh_freq_fact - // to pick a threshold. + // is used in combination with the current block size, and thresh_freq_fact to + // pick a threshold. int thresh_mult[MAX_MODES]; int thresh_mult_sub8x8[MAX_REFS]; @@ -108,9 +114,14 @@ typedef struct RD_OPT { int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES]; int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; +#if CONFIG_CONSISTENT_RECODE + int64_t prediction_type_threshes_prev[MAX_REF_FRAMES][REFERENCE_MODES]; + int64_t filter_threshes_prev[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; +#endif int RDMULT; int RDDIV; + double r0; } RD_OPT; typedef struct RD_COST { @@ -123,22 +134,27 @@ typedef struct RD_COST { void vp9_rd_cost_reset(RD_COST *rd_cost); // Initialize the rate distortion cost values to zero. void vp9_rd_cost_init(RD_COST *rd_cost); +// It supports negative rate and dist, which is different from RDCOST(). +int64_t vp9_calculate_rd_cost(int mult, int div, int rate, int64_t dist); +// Update the cost value based on its rate and distortion. +void vp9_rd_cost_update(int mult, int div, RD_COST *rd_cost); struct TileInfo; struct TileDataEnc; struct VP9_COMP; struct macroblock; -int64_t vp9_compute_rd_mult_based_on_qindex(const struct VP9_COMP *cpi, - int qindex); +int vp9_compute_rd_mult_based_on_qindex(const struct VP9_COMP *cpi, int qindex); int vp9_compute_rd_mult(const struct VP9_COMP *cpi, int qindex); +int vp9_get_adaptive_rdmult(const struct VP9_COMP *cpi, double beta); + void vp9_initialize_rd_consts(struct VP9_COMP *cpi); void vp9_initialize_me_consts(struct VP9_COMP *cpi, MACROBLOCK *x, int qindex); -void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, +void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2, unsigned int qstep, int *rate, int64_t *dist); void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE], @@ -169,8 +185,8 @@ void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi); void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi); -void vp9_update_rd_thresh_fact(int (*fact)[MAX_MODES], int rd_thresh, int bsize, - int best_mode_index); +void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh, + int bsize, int best_mode_index); static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, const int *const thresh_fact) { @@ -208,8 +224,10 @@ unsigned int vp9_high_get_sby_perpixel_variance(struct VP9_COMP *cpi, BLOCK_SIZE bs, int bd); #endif +void vp9_build_inter_mode_cost(struct VP9_COMP *cpi); + #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_ENCODER_VP9_RD_H_ +#endif // VPX_VP9_ENCODER_VP9_RD_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_rdopt.c b/media/libvpx/libvpx/vp9/encoder/vp9_rdopt.c index 2ba6378c5e4e..fa7472ca612a 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_rdopt.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_rdopt.c @@ -31,6 +31,9 @@ #include "vp9/common/vp9_scan.h" #include "vp9/common/vp9_seg_common.h" +#if !CONFIG_REALTIME_ONLY +#include "vp9/encoder/vp9_aq_variance.h" +#endif #include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" @@ -40,7 +43,6 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rd.h" #include "vp9/encoder/vp9_rdopt.h" -#include "vp9/encoder/vp9_aq_variance.h" #define LAST_FRAME_MODE_MASK \ ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME)) @@ -59,7 +61,9 @@ typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } MODE_DEFINITION; -typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION; +typedef struct { + MV_REFERENCE_FRAME ref_frame[2]; +} REF_DEFINITION; struct rdcost_block_args { const VP9_COMP *cpi; @@ -75,9 +79,12 @@ struct rdcost_block_args { int use_fast_coef_costing; const scan_order *so; uint8_t skippable; + struct buf_2d *this_recon; }; #define LAST_NEW_MV_INDEX 6 + +#if !CONFIG_REALTIME_ONLY static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { { NEARESTMV, { LAST_FRAME, NONE } }, { NEARESTMV, { ALTREF_FRAME, NONE } }, @@ -125,6 +132,7 @@ static const REF_DEFINITION vp9_ref_order[MAX_REFS] = { { { ALTREF_FRAME, NONE } }, { { LAST_FRAME, ALTREF_FRAME } }, { { GOLDEN_FRAME, ALTREF_FRAME } }, { { INTRA_FRAME, NONE } }, }; +#endif // !CONFIG_REALTIME_ONLY static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int m, int n, int min_plane, int max_plane) { @@ -151,6 +159,7 @@ static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int m, int n, } } +#if !CONFIG_REALTIME_ONLY static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, int *out_rate_sum, int64_t *out_dist_sum, int *skip_txfm_sb, @@ -271,10 +280,11 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, } *skip_txfm_sb = skip_flag; - *skip_sse_sb = total_sse << 4; + *skip_sse_sb = total_sse << VP9_DIST_SCALE_LOG2; *out_rate_sum = (int)rate_sum; - *out_dist_sum = dist_sum << 4; + *out_dist_sum = dist_sum << VP9_DIST_SCALE_LOG2; } +#endif // !CONFIG_REALTIME_ONLY #if CONFIG_VP9_HIGHBITDEPTH int64_t vp9_highbd_block_error_c(const tran_low_t *coeff, @@ -457,6 +467,66 @@ static INLINE int num_4x4_to_edge(int plane_4x4_dim, int mb_to_edge_dim, return plane_4x4_dim + (mb_to_edge_dim >> (5 + subsampling_dim)) - blk_dim; } +// Copy all visible 4x4s in the transform block. +static void copy_block_visible(const MACROBLOCKD *xd, + const struct macroblockd_plane *const pd, + const uint8_t *src, const int src_stride, + uint8_t *dst, const int dst_stride, int blk_row, + int blk_col, const BLOCK_SIZE plane_bsize, + const BLOCK_SIZE tx_bsize) { + const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize]; + const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize]; + int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge, + pd->subsampling_x, blk_col); + int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge, + pd->subsampling_y, blk_row); + const int is_highbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH; + if (tx_bsize == BLOCK_4X4 || + (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) { + const int w = tx_4x4_w << 2; + const int h = tx_4x4_h << 2; +#if CONFIG_VP9_HIGHBITDEPTH + if (is_highbd) { + vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(src), src_stride, + CONVERT_TO_SHORTPTR(dst), dst_stride, NULL, 0, 0, + 0, 0, w, h, xd->bd); + } else { +#endif + vpx_convolve_copy(src, src_stride, dst, dst_stride, NULL, 0, 0, 0, 0, w, + h); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif + } else { + int r, c; + int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h); + int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w); + // if we are in the unrestricted motion border. + for (r = 0; r < max_r; ++r) { + // Skip visiting the sub blocks that are wholly within the UMV. + for (c = 0; c < max_c; ++c) { + const uint8_t *src_ptr = src + r * src_stride * 4 + c * 4; + uint8_t *dst_ptr = dst + r * dst_stride * 4 + c * 4; +#if CONFIG_VP9_HIGHBITDEPTH + if (is_highbd) { + vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(src_ptr), src_stride, + CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, + NULL, 0, 0, 0, 0, 4, 4, xd->bd); + } else { +#endif + vpx_convolve_copy(src_ptr, src_stride, dst_ptr, dst_stride, NULL, 0, + 0, 0, 0, 4, 4); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif + } + } + } + (void)is_highbd; +} + // Compute the pixel domain sum square error on all visible 4x4s in the // transform block. static unsigned pixel_sse(const VP9_COMP *const cpi, const MACROBLOCKD *xd, @@ -537,12 +607,13 @@ static int64_t sum_squares_visible(const MACROBLOCKD *xd, static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col, TX_SIZE tx_size, int64_t *out_dist, - int64_t *out_sse) { + int64_t *out_sse, struct buf_2d *out_recon) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; + const int eob = p->eobs[block]; - if (x->block_tx_domain) { + if (!out_recon && x->block_tx_domain && eob) { const int ss_txfrm_size = tx_size << 1; int64_t this_sse; const int shift = tx_size == TX_32X32 ? 0 : 2; @@ -581,15 +652,23 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane, const int dst_idx = 4 * (blk_row * dst_stride + blk_col); const uint8_t *src = &p->src.buf[src_idx]; const uint8_t *dst = &pd->dst.buf[dst_idx]; + uint8_t *out_recon_ptr = 0; + const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - const uint16_t *eob = &p->eobs[block]; unsigned int tmp; tmp = pixel_sse(cpi, xd, pd, src, src_stride, dst, dst_stride, blk_row, blk_col, plane_bsize, tx_bsize); *out_sse = (int64_t)tmp * 16; + if (out_recon) { + const int out_recon_idx = 4 * (blk_row * out_recon->stride + blk_col); + out_recon_ptr = &out_recon->buf[out_recon_idx]; + copy_block_visible(xd, pd, dst, dst_stride, out_recon_ptr, + out_recon->stride, blk_row, blk_col, plane_bsize, + tx_bsize); + } - if (*eob) { + if (eob) { #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, recon16[1024]); uint8_t *recon = (uint8_t *)recon16; @@ -602,22 +681,22 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane, vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16, 32, NULL, 0, 0, 0, 0, bs, bs, xd->bd); if (xd->lossless) { - vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, *eob, xd->bd); + vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, eob, xd->bd); } else { switch (tx_size) { case TX_4X4: - vp9_highbd_idct4x4_add(dqcoeff, recon16, 32, *eob, xd->bd); + vp9_highbd_idct4x4_add(dqcoeff, recon16, 32, eob, xd->bd); break; case TX_8X8: - vp9_highbd_idct8x8_add(dqcoeff, recon16, 32, *eob, xd->bd); + vp9_highbd_idct8x8_add(dqcoeff, recon16, 32, eob, xd->bd); break; case TX_16X16: - vp9_highbd_idct16x16_add(dqcoeff, recon16, 32, *eob, xd->bd); + vp9_highbd_idct16x16_add(dqcoeff, recon16, 32, eob, xd->bd); break; - case TX_32X32: - vp9_highbd_idct32x32_add(dqcoeff, recon16, 32, *eob, xd->bd); + default: + assert(tx_size == TX_32X32); + vp9_highbd_idct32x32_add(dqcoeff, recon16, 32, eob, xd->bd); break; - default: assert(0 && "Invalid transform size"); } } recon = CONVERT_TO_BYTEPTR(recon16); @@ -625,16 +704,16 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane, #endif // CONFIG_VP9_HIGHBITDEPTH vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, 0, 0, 0, bs, bs); switch (tx_size) { - case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, *eob); break; - case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, *eob); break; - case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, *eob); break; - case TX_4X4: + case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, eob); break; + case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, eob); break; + case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, eob); break; + default: + assert(tx_size == TX_4X4); // this is like vp9_short_idct4x4 but has a special case around // eob<=1, which is significant (not just an optimization) for // the lossless case. - x->inv_txfm_add(dqcoeff, recon, 32, *eob); + x->inv_txfm_add(dqcoeff, recon, 32, eob); break; - default: assert(0 && "Invalid transform size"); break; } #if CONFIG_VP9_HIGHBITDEPTH } @@ -642,6 +721,10 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane, tmp = pixel_sse(cpi, xd, pd, src, src_stride, recon, 32, blk_row, blk_col, plane_bsize, tx_bsize); + if (out_recon) { + copy_block_visible(xd, pd, recon, 32, out_recon_ptr, out_recon->stride, + blk_row, blk_col, plane_bsize, tx_bsize); + } } *out_dist = (int64_t)tmp * 16; @@ -666,26 +749,38 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, int64_t sse; const int coeff_ctx = combine_entropy_contexts(args->t_left[blk_row], args->t_above[blk_col]); + struct buf_2d *recon = args->this_recon; + const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const int dst_stride = pd->dst.stride; + const uint8_t *dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)]; if (args->exit_early) return; if (!is_inter_block(mi)) { +#if CONFIG_MISMATCH_DEBUG + struct encode_b_args intra_arg = { + x, x->block_qcoeff_opt, args->t_above, args->t_left, &mi->skip, 0, 0, 0 + }; +#else struct encode_b_args intra_arg = { x, x->block_qcoeff_opt, args->t_above, args->t_left, &mi->skip }; +#endif vp9_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size, &intra_arg); + if (recon) { + uint8_t *rec_ptr = &recon->buf[4 * (blk_row * recon->stride + blk_col)]; + copy_block_visible(xd, pd, dst, dst_stride, rec_ptr, recon->stride, + blk_row, blk_col, plane_bsize, tx_bsize); + } if (x->block_tx_domain) { dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col, - tx_size, &dist, &sse); + tx_size, &dist, &sse, /*recon =*/0); } else { - const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; const struct macroblock_plane *const p = &x->plane[plane]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; const int src_stride = p->src.stride; - const int dst_stride = pd->dst.stride; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const uint8_t *src = &p->src.buf[4 * (blk_row * src_stride + blk_col)]; - const uint8_t *dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)]; const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; unsigned int tmp; sse = sum_squares_visible(xd, pd, diff, diff_stride, blk_row, blk_col, @@ -699,17 +794,20 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, blk_row, blk_col, plane_bsize, tx_bsize); dist = (int64_t)tmp * 16; } - } else if (max_txsize_lookup[plane_bsize] == tx_size) { - if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == - SKIP_TXFM_NONE) { + } else { + int skip_txfm_flag = SKIP_TXFM_NONE; + if (max_txsize_lookup[plane_bsize] == tx_size) + skip_txfm_flag = x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))]; + + if (skip_txfm_flag == SKIP_TXFM_NONE || + (recon && skip_txfm_flag == SKIP_TXFM_AC_ONLY)) { // full forward transform and quantization vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size); if (x->block_qcoeff_opt) vp9_optimize_b(x, plane, block, tx_size, coeff_ctx); dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col, - tx_size, &dist, &sse); - } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == - SKIP_TXFM_AC_ONLY) { + tx_size, &dist, &sse, recon); + } else if (skip_txfm_flag == SKIP_TXFM_AC_ONLY) { // compute DC coefficient tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); @@ -735,14 +833,12 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, x->plane[plane].eobs[block] = 0; sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; dist = sse; + if (recon) { + uint8_t *rec_ptr = &recon->buf[4 * (blk_row * recon->stride + blk_col)]; + copy_block_visible(xd, pd, dst, dst_stride, rec_ptr, recon->stride, + blk_row, blk_col, plane_bsize, tx_bsize); + } } - } else { - // full forward transform and quantization - vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size); - if (x->block_qcoeff_opt) - vp9_optimize_b(x, plane, block, tx_size, coeff_ctx); - dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col, - tx_size, &dist, &sse); } rd = RDCOST(x->rdmult, x->rddiv, 0, dist); @@ -761,7 +857,8 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, rd = VPXMIN(rd1, rd2); if (plane == 0) { x->zcoeff_blk[tx_size][block] = - !x->plane[plane].eobs[block] || (rd1 > rd2 && !xd->lossless); + !x->plane[plane].eobs[block] || + (x->sharpness == 0 && rd1 > rd2 && !xd->lossless); x->sum_y_eobs[tx_size] += x->plane[plane].eobs[block]; } @@ -781,7 +878,8 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skippable, int64_t *sse, int64_t ref_best_rd, int plane, BLOCK_SIZE bsize, - TX_SIZE tx_size, int use_fast_coef_casting) { + TX_SIZE tx_size, int use_fast_coef_costing, + struct buf_2d *recon) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblockd_plane *const pd = &xd->plane[plane]; struct rdcost_block_args args; @@ -789,8 +887,9 @@ static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, args.cpi = cpi; args.x = x; args.best_rd = ref_best_rd; - args.use_fast_coef_costing = use_fast_coef_casting; + args.use_fast_coef_costing = use_fast_coef_costing; args.skippable = 1; + args.this_recon = recon; if (plane == 0) xd->mi[0]->tx_size = tx_size; @@ -815,7 +914,8 @@ static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, int64_t *sse, - int64_t ref_best_rd, BLOCK_SIZE bs) { + int64_t ref_best_rd, BLOCK_SIZE bs, + struct buf_2d *recon) { const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; @@ -825,13 +925,13 @@ static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, int *rate, mi->tx_size = VPXMIN(max_tx_size, largest_tx_size); txfm_rd_in_plane(cpi, x, rate, distortion, skip, sse, ref_best_rd, 0, bs, - mi->tx_size, cpi->sf.use_fast_coef_costing); + mi->tx_size, cpi->sf.use_fast_coef_costing, recon); } static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, int64_t *psse, int64_t ref_best_rd, - BLOCK_SIZE bs) { + BLOCK_SIZE bs, struct buf_2d *recon) { const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; @@ -843,20 +943,34 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, { INT64_MAX, INT64_MAX }, { INT64_MAX, INT64_MAX }, { INT64_MAX, INT64_MAX } }; - int n, m; + int n; int s0, s1; - int64_t best_rd = INT64_MAX; + int64_t best_rd = ref_best_rd; TX_SIZE best_tx = max_tx_size; int start_tx, end_tx; + const int tx_size_ctx = get_tx_size_context(xd); +#if CONFIG_VP9_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, recon_buf16[TX_SIZES][64 * 64]); + uint8_t *recon_buf[TX_SIZES]; + for (n = 0; n < TX_SIZES; ++n) { + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + recon_buf[n] = CONVERT_TO_BYTEPTR(recon_buf16[n]); + } else { + recon_buf[n] = (uint8_t *)recon_buf16[n]; + } + } +#else + DECLARE_ALIGNED(16, uint8_t, recon_buf[TX_SIZES][64 * 64]); +#endif // CONFIG_VP9_HIGHBITDEPTH - const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); assert(skip_prob > 0); s0 = vp9_cost_bit(skip_prob, 0); s1 = vp9_cost_bit(skip_prob, 1); if (cm->tx_mode == TX_MODE_SELECT) { start_tx = max_tx_size; - end_tx = 0; + end_tx = VPXMAX(start_tx - cpi->sf.tx_size_search_depth, 0); + if (bs > BLOCK_32X32) end_tx = VPXMIN(end_tx + 1, start_tx); } else { TX_SIZE chosen_tx_size = VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]); @@ -865,15 +979,17 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, } for (n = start_tx; n >= end_tx; n--) { - int r_tx_size = 0; - for (m = 0; m <= n - (n == (int)max_tx_size); m++) { - if (m == n) - r_tx_size += vp9_cost_zero(tx_probs[m]); - else - r_tx_size += vp9_cost_one(tx_probs[m]); + const int r_tx_size = cpi->tx_size_cost[max_tx_size - 1][tx_size_ctx][n]; + if (recon) { + struct buf_2d this_recon; + this_recon.buf = recon_buf[n]; + this_recon.stride = recon->stride; + txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs, + n, cpi->sf.use_fast_coef_costing, &this_recon); + } else { + txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs, + n, cpi->sf.use_fast_coef_costing, 0); } - txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], ref_best_rd, 0, - bs, n, cpi->sf.use_fast_coef_costing); r[n][1] = r[n][0]; if (r[n][0] < INT_MAX) { r[n][1] += r_tx_size; @@ -915,11 +1031,25 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, *rate = r[mi->tx_size][cm->tx_mode == TX_MODE_SELECT]; *skip = s[mi->tx_size]; *psse = sse[mi->tx_size]; + if (recon) { +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + memcpy(CONVERT_TO_SHORTPTR(recon->buf), + CONVERT_TO_SHORTPTR(recon_buf[mi->tx_size]), + 64 * 64 * sizeof(uint16_t)); + } else { +#endif + memcpy(recon->buf, recon_buf[mi->tx_size], 64 * 64); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif + } } static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, int64_t *psse, - BLOCK_SIZE bs, int64_t ref_best_rd) { + BLOCK_SIZE bs, int64_t ref_best_rd, + struct buf_2d *recon) { MACROBLOCKD *xd = &x->e_mbd; int64_t sse; int64_t *ret_sse = psse ? psse : &sse; @@ -928,10 +1058,10 @@ static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) { choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd, - bs); + bs, recon); } else { choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd, - bs); + bs, recon); } } @@ -1273,7 +1403,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate, mic->mode = mode; super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL, - bsize, best_rd); + bsize, best_rd, /*recon = */ 0); if (this_rate_tokenonly == INT_MAX) continue; @@ -1325,7 +1455,8 @@ static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, for (plane = 1; plane < MAX_MB_PLANE; ++plane) { txfm_rd_in_plane(cpi, x, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd, - plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing); + plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing, + /*recon = */ 0); if (pnrate == INT_MAX) { is_cost_valid = 0; break; @@ -1393,6 +1524,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, return best_rd; } +#if !CONFIG_REALTIME_ONLY static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE bsize) { @@ -1466,11 +1598,11 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, if (is_compound) this_mv[1].as_int = frame_mv[mode][mi->ref_frame[1]].as_int; break; - case ZEROMV: + default: + assert(mode == ZEROMV); this_mv[0].as_int = 0; if (is_compound) this_mv[1].as_int = 0; break; - default: break; } mi->bmi[i].as_mv[0].as_int = this_mv[0].as_int; @@ -1604,6 +1736,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, MACROBLOCK *x, return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); } +#endif // !CONFIG_REALTIME_ONLY typedef struct { int eobs; @@ -1631,6 +1764,7 @@ typedef struct { int mvthresh; } BEST_SEG_INFO; +#if !CONFIG_REALTIME_ONLY static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) { return (mv->row >> 3) < mv_limits->row_min || (mv->row >> 3) > mv_limits->row_max || @@ -1829,8 +1963,8 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, bestsme = cpi->find_fractional_mv_step( x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], 0, - cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, pw, ph); + cpi->sf.mv.subpel_search_level, NULL, x->nmvjointcost, x->mvcost, + &dis, &sse, second_pred, pw, ph, cpi->sf.use_accurate_subpel_search); } // Restore the pointer to the first (possibly scaled) prediction buffer. @@ -1884,6 +2018,8 @@ static int64_t rd_pick_best_sub8x8_mode( const BLOCK_SIZE bsize = mi->sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int pw = num_4x4_blocks_wide << 2; + const int ph = num_4x4_blocks_high << 2; ENTROPY_CONTEXT t_above[2], t_left[2]; int subpelmv = 1, have_ref = 0; SPEED_FEATURES *const sf = &cpi->sf; @@ -1992,8 +2128,11 @@ static int64_t rd_pick_best_sub8x8_mode( mvp_full.col = bsi->mvp.as_mv.col >> 3; if (sf->adaptive_motion_search) { - mvp_full.row = x->pred_mv[mi->ref_frame[0]].row >> 3; - mvp_full.col = x->pred_mv[mi->ref_frame[0]].col >> 3; + if (x->pred_mv[mi->ref_frame[0]].row != INT16_MAX && + x->pred_mv[mi->ref_frame[0]].col != INT16_MAX) { + mvp_full.row = x->pred_mv[mi->ref_frame[0]].row >> 3; + mvp_full.col = x->pred_mv[mi->ref_frame[0]].col >> 3; + } step_param = VPXMAX(step_param, 8); } @@ -2015,16 +2154,16 @@ static int64_t rd_pick_best_sub8x8_mode( cpi->find_fractional_mv_step( x, new_mv, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], sf->mv.subpel_force_stop, - sf->mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list), + sf->mv.subpel_search_level, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &distortion, - &x->pred_sse[mi->ref_frame[0]], NULL, 0, 0); + &x->pred_sse[mi->ref_frame[0]], NULL, pw, ph, + cpi->sf.use_accurate_subpel_search); // save motion search result for use in compound prediction seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv; } - if (sf->adaptive_motion_search) - x->pred_mv[mi->ref_frame[0]] = *new_mv; + x->pred_mv[mi->ref_frame[0]] = *new_mv; // restore src pointers mi_buf_restore(x, orig_src, orig_pre); @@ -2319,6 +2458,22 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, block_size); } +#if CONFIG_NON_GREEDY_MV +static int ref_frame_to_gf_rf_idx(int ref_frame) { + if (ref_frame == GOLDEN_FRAME) { + return 0; + } + if (ref_frame == LAST_FRAME) { + return 1; + } + if (ref_frame == ALTREF_FRAME) { + return 2; + } + assert(0); + return -1; +} +#endif + static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv) { @@ -2326,19 +2481,35 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, const VP9_COMMON *cm = &cpi->common; MODE_INFO *mi = xd->mi[0]; struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } }; - int bestsme = INT_MAX; int step_param; - int sadpb = x->sadperbit16; MV mvp_full; int ref = mi->ref_frame[0]; MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv; const MvLimits tmp_mv_limits = x->mv_limits; int cost_list[5]; - + const int best_predmv_idx = x->mv_best_ref_index[ref]; const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); - + const int pw = num_4x4_blocks_wide_lookup[bsize] << 2; + const int ph = num_4x4_blocks_high_lookup[bsize] << 2; MV pred_mv[3]; + + int bestsme = INT_MAX; +#if CONFIG_NON_GREEDY_MV + int gf_group_idx = cpi->twopass.gf_group.index; + int gf_rf_idx = ref_frame_to_gf_rf_idx(ref); + BLOCK_SIZE square_bsize = get_square_block_size(bsize); + int_mv nb_full_mvs[NB_MVS_NUM] = { 0 }; + MotionField *motion_field = vp9_motion_field_info_get_motion_field( + &cpi->motion_field_info, gf_group_idx, gf_rf_idx, square_bsize); + const int nb_full_mv_num = + vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs); + const int lambda = (pw * ph) / 4; + assert(pw * ph == lambda << 2); +#else // CONFIG_NON_GREEDY_MV + int sadpb = x->sadperbit16; +#endif // CONFIG_NON_GREEDY_MV + pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv; pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv; pred_mv[2] = x->pred_mv[ref]; @@ -2367,7 +2538,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, } if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) { - int boffset = + const int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] - VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); step_param = VPXMAX(step_param, boffset); @@ -2385,8 +2556,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int i; for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) { if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { - x->pred_mv[ref].row = 0; - x->pred_mv[ref].col = 0; + x->pred_mv[ref].row = INT16_MAX; + x->pred_mv[ref].col = INT16_MAX; tmp_mv->as_int = INVALID_MV; if (scaled_ref_frame) { @@ -2404,14 +2575,65 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, // after full-pixel motion search. vp9_set_mv_search_range(&x->mv_limits, &ref_mv); - mvp_full = pred_mv[x->mv_best_ref_index[ref]]; - + mvp_full = pred_mv[best_predmv_idx]; mvp_full.col >>= 3; mvp_full.row >>= 3; +#if CONFIG_NON_GREEDY_MV + bestsme = vp9_full_pixel_diamond_new(cpi, x, bsize, &mvp_full, step_param, + lambda, 1, nb_full_mvs, nb_full_mv_num, + &tmp_mv->as_mv); +#else // CONFIG_NON_GREEDY_MV bestsme = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb, cond_cost_list(cpi, cost_list), &ref_mv, &tmp_mv->as_mv, INT_MAX, 1); +#endif // CONFIG_NON_GREEDY_MV + + if (cpi->sf.enhanced_full_pixel_motion_search) { + int i; + for (i = 0; i < 3; ++i) { + int this_me; + MV this_mv; + int diff_row; + int diff_col; + int step; + + if (pred_mv[i].row == INT16_MAX || pred_mv[i].col == INT16_MAX) continue; + if (i == best_predmv_idx) continue; + + diff_row = ((int)pred_mv[i].row - + pred_mv[i > 0 ? (i - 1) : best_predmv_idx].row) >> + 3; + diff_col = ((int)pred_mv[i].col - + pred_mv[i > 0 ? (i - 1) : best_predmv_idx].col) >> + 3; + if (diff_row == 0 && diff_col == 0) continue; + if (diff_row < 0) diff_row = -diff_row; + if (diff_col < 0) diff_col = -diff_col; + step = get_msb((diff_row + diff_col + 1) >> 1); + if (step <= 0) continue; + + mvp_full = pred_mv[i]; + mvp_full.col >>= 3; + mvp_full.row >>= 3; +#if CONFIG_NON_GREEDY_MV + this_me = vp9_full_pixel_diamond_new( + cpi, x, bsize, &mvp_full, + VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), lambda, 1, nb_full_mvs, + nb_full_mv_num, &this_mv); +#else // CONFIG_NON_GREEDY_MV + this_me = vp9_full_pixel_search( + cpi, x, bsize, &mvp_full, + VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), + cpi->sf.mv.search_method, sadpb, cond_cost_list(cpi, cost_list), + &ref_mv, &this_mv, INT_MAX, 1); +#endif // CONFIG_NON_GREEDY_MV + if (this_me < bestsme) { + tmp_mv->as_mv = this_mv; + bestsme = this_me; + } + } + } x->mv_limits = tmp_mv_limits; @@ -2420,13 +2642,14 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, cpi->find_fractional_mv_step( x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0); + cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), + x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph, + cpi->sf.use_accurate_subpel_search); } *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); - if (cpi->sf.adaptive_motion_search) x->pred_mv[ref] = tmp_mv->as_mv; + x->pred_mv[ref] = tmp_mv->as_mv; if (scaled_ref_frame) { int i; @@ -2451,23 +2674,59 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd, // However, once established that vector may be usable through the nearest and // near mv modes to reduce distortion in subsequent blocks and also improve // visual quality. -static int discount_newmv_test(const VP9_COMP *cpi, int this_mode, - int_mv this_mv, - int_mv (*mode_mv)[MAX_REF_FRAMES], - int ref_frame) { +static int discount_newmv_test(VP9_COMP *cpi, int this_mode, int_mv this_mv, + int_mv (*mode_mv)[MAX_REF_FRAMES], int ref_frame, + int mi_row, int mi_col, BLOCK_SIZE bsize) { +#if CONFIG_NON_GREEDY_MV + (void)mode_mv; + (void)this_mv; + if (this_mode == NEWMV && bsize >= BLOCK_8X8 && cpi->tpl_ready) { + const int gf_group_idx = cpi->twopass.gf_group.index; + const int gf_rf_idx = ref_frame_to_gf_rf_idx(ref_frame); + const TplDepFrame tpl_frame = cpi->tpl_stats[gf_group_idx]; + const MotionField *motion_field = vp9_motion_field_info_get_motion_field( + &cpi->motion_field_info, gf_group_idx, gf_rf_idx, cpi->tpl_bsize); + const int tpl_block_mi_h = num_8x8_blocks_high_lookup[cpi->tpl_bsize]; + const int tpl_block_mi_w = num_8x8_blocks_wide_lookup[cpi->tpl_bsize]; + const int tpl_mi_row = mi_row - (mi_row % tpl_block_mi_h); + const int tpl_mi_col = mi_col - (mi_col % tpl_block_mi_w); + const int mv_mode = + tpl_frame + .mv_mode_arr[gf_rf_idx][tpl_mi_row * tpl_frame.stride + tpl_mi_col]; + if (mv_mode == NEW_MV_MODE) { + int_mv tpl_new_mv = + vp9_motion_field_mi_get_mv(motion_field, tpl_mi_row, tpl_mi_col); + int row_diff = abs(tpl_new_mv.as_mv.row - this_mv.as_mv.row); + int col_diff = abs(tpl_new_mv.as_mv.col - this_mv.as_mv.col); + if (VPXMAX(row_diff, col_diff) <= 8) { + return 1; + } else { + return 0; + } + } else { + return 0; + } + } else { + return 0; + } +#else + (void)mi_row; + (void)mi_col; + (void)bsize; return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) && (this_mv.as_int != 0) && ((mode_mv[NEARESTMV][ref_frame].as_int == 0) || (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) && ((mode_mv[NEARMV][ref_frame].as_int == 0) || (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV))); +#endif } static int64_t handle_inter_mode( VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int *rate2, int64_t *distortion, int *skippable, int *rate_y, int *rate_uv, - int *disable_skip, int_mv (*mode_mv)[MAX_REF_FRAMES], int mi_row, - int mi_col, int_mv single_newmv[MAX_REF_FRAMES], + struct buf_2d *recon, int *disable_skip, int_mv (*mode_mv)[MAX_REF_FRAMES], + int mi_row, int mi_col, int_mv single_newmv[MAX_REF_FRAMES], INTERP_FILTER (*single_filter)[MAX_REF_FRAMES], int (*single_skippable)[MAX_REF_FRAMES], int64_t *psse, const int64_t ref_best_rd, int64_t *mask_filter, int64_t filter_cache[]) { @@ -2573,7 +2832,8 @@ static int64_t handle_inter_mode( // under certain circumstances where we want to help initiate a weak // motion field, where the distortion gain for a single block may not // be enough to overcome the cost of a new mv. - if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) { + if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0], mi_row, + mi_col, bsize)) { *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); } else { *rate2 += rate_mv; @@ -2606,8 +2866,8 @@ static int64_t handle_inter_mode( // // Under some circumstances we discount the cost of new mv mode to encourage // initiation of a motion field. - if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv, - refs[0])) { + if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv, refs[0], + mi_row, mi_col, bsize)) { *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]), cost_mv_ref(cpi, NEARESTMV, mbmi_ext->mode_context[refs[0]])); @@ -2771,7 +3031,7 @@ static int64_t handle_inter_mode( memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm)); memcpy(x->bsse, bsse, sizeof(bsse)); - if (!skip_txfm_sb) { + if (!skip_txfm_sb || xd->lossless) { int skippable_y, skippable_uv; int64_t sseuv = INT64_MAX; int64_t rdcosty = INT64_MAX; @@ -2779,7 +3039,7 @@ static int64_t handle_inter_mode( // Y cost and distortion vp9_subtract_plane(x, bsize, 0); super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, bsize, - ref_best_rd); + ref_best_rd, recon); if (*rate_y == INT_MAX) { *rate2 = INT_MAX; @@ -2821,6 +3081,7 @@ static int64_t handle_inter_mode( restore_dst_buf(xd, orig_dst, orig_dst_stride); return 0; // The rate-distortion cost will be re-calculated by caller. } +#endif // !CONFIG_REALTIME_ONLY void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, @@ -2874,85 +3135,97 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); } +#if !CONFIG_REALTIME_ONLY // This function is designed to apply a bias or adjustment to an rd value based // on the relative variance of the source and reconstruction. -#define VERY_LOW_VAR_THRESH 2 -#define LOW_VAR_THRESH 5 -#define VAR_MULT 100 -static unsigned int max_var_adjust[VP9E_CONTENT_INVALID] = { 16, 16, 100 }; +#define LOW_VAR_THRESH 250 +#define VAR_MULT 250 +static unsigned int max_var_adjust[VP9E_CONTENT_INVALID] = { 16, 16, 250 }; static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int64_t *this_rd, + struct buf_2d *recon, MV_REFERENCE_FRAME ref_frame, - unsigned int source_variance) { + MV_REFERENCE_FRAME second_ref_frame, + PREDICTION_MODE this_mode) { MACROBLOCKD *const xd = &x->e_mbd; unsigned int rec_variance; unsigned int src_variance; unsigned int src_rec_min; - unsigned int absvar_diff = 0; + unsigned int var_diff = 0; unsigned int var_factor = 0; unsigned int adj_max; + unsigned int low_var_thresh = LOW_VAR_THRESH; + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; vp9e_tune_content content_type = cpi->oxcf.content; if (*this_rd == INT64_MAX) return; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - if (source_variance > 0) { - rec_variance = vp9_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, - bsize, xd->bd); - src_variance = source_variance; - } else { - rec_variance = - vp9_high_get_sby_variance(cpi, &xd->plane[0].dst, bsize, xd->bd); - src_variance = - vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, xd->bd); - } + rec_variance = vp9_high_get_sby_variance(cpi, recon, bsize, xd->bd); + src_variance = + vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, xd->bd); } else { - if (source_variance > 0) { - rec_variance = - vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize); - src_variance = source_variance; - } else { - rec_variance = vp9_get_sby_variance(cpi, &xd->plane[0].dst, bsize); - src_variance = vp9_get_sby_variance(cpi, &x->plane[0].src, bsize); - } - } -#else - if (source_variance > 0) { - rec_variance = vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize); - src_variance = source_variance; - } else { - rec_variance = vp9_get_sby_variance(cpi, &xd->plane[0].dst, bsize); + rec_variance = vp9_get_sby_variance(cpi, recon, bsize); src_variance = vp9_get_sby_variance(cpi, &x->plane[0].src, bsize); } +#else + rec_variance = vp9_get_sby_variance(cpi, recon, bsize); + src_variance = vp9_get_sby_variance(cpi, &x->plane[0].src, bsize); #endif // CONFIG_VP9_HIGHBITDEPTH + // Scale based on area in 8x8 blocks + rec_variance /= (bw * bh); + src_variance /= (bw * bh); + + if (content_type == VP9E_CONTENT_FILM) { + if (cpi->oxcf.pass == 2) { + // Adjust low variance threshold based on estimated group noise enegry. + double noise_factor = + (double)cpi->twopass.gf_group.group_noise_energy / SECTION_NOISE_DEF; + low_var_thresh = (unsigned int)(low_var_thresh * noise_factor); + + if (ref_frame == INTRA_FRAME) { + low_var_thresh *= 2; + if (this_mode == DC_PRED) low_var_thresh *= 5; + } else if (second_ref_frame > INTRA_FRAME) { + low_var_thresh *= 2; + } + } + } else { + low_var_thresh = LOW_VAR_THRESH / 2; + } + // Lower of source (raw per pixel value) and recon variance. Note that // if the source per pixel is 0 then the recon value here will not be per // pixel (see above) so will likely be much larger. - src_rec_min = VPXMIN(source_variance, rec_variance); + src_rec_min = VPXMIN(src_variance, rec_variance); - if (src_rec_min > LOW_VAR_THRESH) return; + if (src_rec_min > low_var_thresh) return; - absvar_diff = (src_variance > rec_variance) ? (src_variance - rec_variance) - : (rec_variance - src_variance); + // We care more when the reconstruction has lower variance so give this case + // a stronger weighting. + var_diff = (src_variance > rec_variance) ? (src_variance - rec_variance) * 2 + : (rec_variance - src_variance) / 2; adj_max = max_var_adjust[content_type]; var_factor = - (unsigned int)((int64_t)VAR_MULT * absvar_diff) / VPXMAX(1, src_variance); + (unsigned int)((int64_t)VAR_MULT * var_diff) / VPXMAX(1, src_variance); var_factor = VPXMIN(adj_max, var_factor); + if ((content_type == VP9E_CONTENT_FILM) && + ((ref_frame == INTRA_FRAME) || (second_ref_frame > INTRA_FRAME))) { + var_factor *= 2; + } + *this_rd += (*this_rd * var_factor) / 100; - if (content_type == VP9E_CONTENT_FILM) { - if (src_rec_min <= VERY_LOW_VAR_THRESH) { - if (ref_frame == INTRA_FRAME) *this_rd *= 2; - if (bsize > 6) *this_rd *= 2; - } - } + (void)xd; } +#endif // !CONFIG_REALTIME_ONLY // Do we have an internal image edge (e.g. formatting bars). int vp9_internal_image_edge(VP9_COMP *cpi) { @@ -3023,6 +3296,7 @@ int vp9_active_edge_sb(VP9_COMP *cpi, int mi_row, int mi_col) { vp9_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE); } +#if !CONFIG_REALTIME_ONLY void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, @@ -3066,20 +3340,36 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, const int intra_cost_penalty = vp9_get_intra_cost_penalty(cpi, bsize, cm->base_qindex, cm->y_dc_delta_q); int best_skip2 = 0; - uint8_t ref_frame_skip_mask[2] = { 0 }; + uint8_t ref_frame_skip_mask[2] = { 0, 1 }; uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 }; int mode_skip_start = sf->mode_skip_start + 1; const int *const rd_threshes = rd_opt->threshes[segment_id][bsize]; const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize]; int64_t mode_threshold[MAX_MODES]; - int *tile_mode_map = tile_data->mode_map[bsize]; - int mode_map[MAX_MODES]; // Maintain mode_map information locally to avoid - // lock mechanism involved with reads from - // tile_mode_map + int8_t *tile_mode_map = tile_data->mode_map[bsize]; + int8_t mode_map[MAX_MODES]; // Maintain mode_map information locally to avoid + // lock mechanism involved with reads from + // tile_mode_map const int mode_search_skip_flags = sf->mode_search_skip_flags; + const int is_rect_partition = + num_4x4_blocks_wide_lookup[bsize] != num_4x4_blocks_high_lookup[bsize]; int64_t mask_filter = 0; int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; + struct buf_2d *recon; + struct buf_2d recon_buf; +#if CONFIG_VP9_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, recon16[64 * 64]); + recon_buf.buf = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH + ? CONVERT_TO_BYTEPTR(recon16) + : (uint8_t *)recon16; +#else + DECLARE_ALIGNED(16, uint8_t, recon8[64 * 64]); + recon_buf.buf = recon8; +#endif // CONFIG_VP9_HIGHBITDEPTH + recon_buf.stride = 64; + recon = cpi->oxcf.content == VP9E_CONTENT_FILM ? &recon_buf : 0; + vp9_zero(best_mbmode); x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; @@ -3105,7 +3395,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; - if (cpi->ref_frame_flags & flag_list[ref_frame]) { + if ((cpi->ref_frame_flags & flag_list[ref_frame]) && + !(is_rect_partition && (ctx->skip_ref_frame_mask & (1 << ref_frame)))) { assert(get_ref_frame_buffer(cpi, ref_frame) != NULL); setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); @@ -3161,7 +3452,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, if (cpi->rc.is_src_frame_alt_ref) { if (sf->alt_ref_search_fp) { mode_skip_mask[ALTREF_FRAME] = 0; - ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME); + ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME) & 0xff; ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; } } @@ -3228,18 +3519,21 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, vp9_zero(x->sum_y_eobs); + if (is_rect_partition) { + if (ctx->skip_ref_frame_mask & (1 << ref_frame)) continue; + if (second_ref_frame > 0 && + (ctx->skip_ref_frame_mask & (1 << second_ref_frame))) + continue; + } + // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. if (midx == mode_skip_start && best_mode_index >= 0) { switch (best_mbmode.ref_frame[0]) { case INTRA_FRAME: break; - case LAST_FRAME: - ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK; - ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; - break; + case LAST_FRAME: ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK; break; case GOLDEN_FRAME: ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK; - ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; break; case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK; break; case NONE: @@ -3313,6 +3607,10 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, if (comp_pred) { if (!cpi->allow_comp_inter_inter) continue; + if (cm->ref_frame_sign_bias[ref_frame] == + cm->ref_frame_sign_bias[second_ref_frame]) + continue; + // Skip compound inter modes if ARF is not available. if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue; @@ -3339,7 +3637,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, // Disable intra modes other than DC_PRED for blocks with low variance // Threshold for intra skipping based on source variance // TODO(debargha): Specialize the threshold for super block sizes - const unsigned int skip_intra_var_thresh = 64; + const unsigned int skip_intra_var_thresh = + (cpi->oxcf.content == VP9E_CONTENT_FILM) ? 0 : 64; if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && x->source_variance < skip_intra_var_thresh) continue; @@ -3385,7 +3684,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, struct macroblockd_plane *const pd = &xd->plane[1]; memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize, - best_rd); + best_rd, recon); if (rate_y == INT_MAX) continue; uv_tx = uv_txsize_lookup[bsize][mi->tx_size][pd->subsampling_x] @@ -3408,7 +3707,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, } else { this_rd = handle_inter_mode( cpi, x, bsize, &rate2, &distortion2, &skippable, &rate_y, &rate_uv, - &disable_skip, frame_mv, mi_row, mi_col, single_newmv, + recon, &disable_skip, frame_mv, mi_row, mi_col, single_newmv, single_inter_filter, single_skippable, &total_sse, best_rd, &mask_filter, filter_cache); if (this_rd == INT64_MAX) continue; @@ -3437,7 +3736,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, // Cost the skip mb case rate2 += skip_cost1; - } else if (ref_frame != INTRA_FRAME && !xd->lossless) { + } else if (ref_frame != INTRA_FRAME && !xd->lossless && + !cpi->oxcf.sharpness) { if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0, distortion2) < RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) { @@ -3461,10 +3761,39 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); } - // Apply an adjustment to the rd value based on the similarity of the - // source variance and reconstructed variance. - rd_variance_adjustment(cpi, x, bsize, &this_rd, ref_frame, - x->source_variance); + if (recon) { + // In film mode bias against DC pred and other intra if there is a + // significant difference between the variance of the sub blocks in the + // the source. Also apply some bias against compound modes which also + // tend to blur fine texture such as film grain over time. + // + // The sub block test here acts in the case where one or more sub + // blocks have high relatively variance but others relatively low + // variance. Here the high variance sub blocks may push the + // total variance for the current block size over the thresholds + // used in rd_variance_adjustment() below. + if (cpi->oxcf.content == VP9E_CONTENT_FILM) { + if (bsize >= BLOCK_16X16) { + int min_energy, max_energy; + vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, + &max_energy); + if (max_energy > min_energy) { + if (ref_frame == INTRA_FRAME) { + if (this_mode == DC_PRED) + this_rd += (this_rd * (max_energy - min_energy)); + else + this_rd += (this_rd * (max_energy - min_energy)) / 4; + } else if (second_ref_frame > INTRA_FRAME) { + this_rd += this_rd / 4; + } + } + } + } + // Apply an adjustment to the rd value based on the similarity of the + // source variance and reconstructed variance. + rd_variance_adjustment(cpi, x, bsize, &this_rd, recon, ref_frame, + second_ref_frame, this_mode); + } if (ref_frame == INTRA_FRAME) { // Keep record of best intra rd @@ -3616,9 +3945,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, } if (best_mode_index < 0 || best_rd >= best_rd_so_far) { - // If adaptive interp filter is enabled, then the current leaf node of 8x8 - // data is needed for sub8x8. Hence preserve the context. +// If adaptive interp filter is enabled, then the current leaf node of 8x8 +// data is needed for sub8x8. Hence preserve the context. +#if CONFIG_CONSISTENT_RECODE + if (bsize == BLOCK_8X8) ctx->mic = *xd->mi[0]; +#else if (cpi->row_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0]; +#endif rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; return; @@ -3894,7 +4227,8 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data, #if CONFIG_BETTER_HW_COMPATIBILITY // forbid 8X4 and 4X8 partitions if any reference frame is scaled. if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) { - int ref_scaled = vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf); + int ref_scaled = ref_frame > INTRA_FRAME && + vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf); if (second_ref_frame > INTRA_FRAME) ref_scaled += vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf); if (ref_scaled) continue; @@ -3940,6 +4274,11 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data, comp_pred = second_ref_frame > INTRA_FRAME; if (comp_pred) { if (!cpi->allow_comp_inter_inter) continue; + + if (cm->ref_frame_sign_bias[ref_frame] == + cm->ref_frame_sign_bias[second_ref_frame]) + continue; + if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue; // Do not allow compound prediction if the segment level reference frame // feature is in use as in this case there can only be one reference. @@ -4418,3 +4757,4 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data, store_coding_context(x, ctx, best_ref_index, best_pred_diff, best_filter_diff, 0); } +#endif // !CONFIG_REALTIME_ONLY diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_rdopt.h b/media/libvpx/libvpx/vp9/encoder/vp9_rdopt.h index 795c91aef7aa..e1147ff94385 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_rdopt.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_rdopt.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_RDOPT_H_ -#define VP9_ENCODER_VP9_RDOPT_H_ +#ifndef VPX_VP9_ENCODER_VP9_RDOPT_H_ +#define VPX_VP9_ENCODER_VP9_RDOPT_H_ #include "vp9/common/vp9_blockd.h" @@ -29,6 +29,7 @@ void vp9_rd_pick_intra_mode_sb(struct VP9_COMP *cpi, struct macroblock *x, struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd); +#if !CONFIG_REALTIME_ONLY void vp9_rd_pick_inter_mode_sb(struct VP9_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x, int mi_row, int mi_col, @@ -39,21 +40,24 @@ void vp9_rd_pick_inter_mode_sb_seg_skip( struct VP9_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x, struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far); +#endif int vp9_internal_image_edge(struct VP9_COMP *cpi); int vp9_active_h_edge(struct VP9_COMP *cpi, int mi_row, int mi_step); int vp9_active_v_edge(struct VP9_COMP *cpi, int mi_col, int mi_step); int vp9_active_edge_sb(struct VP9_COMP *cpi, int mi_row, int mi_col); +#if !CONFIG_REALTIME_ONLY void vp9_rd_pick_inter_mode_sub8x8(struct VP9_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x, int mi_row, int mi_col, struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far); +#endif #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_ENCODER_VP9_RDOPT_H_ +#endif // VPX_VP9_ENCODER_VP9_RDOPT_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_resize.c b/media/libvpx/libvpx/vp9/encoder/vp9_resize.c index f6c4aad4d34f..7486dee25b82 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_resize.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_resize.c @@ -424,11 +424,11 @@ void vp9_resize_plane(const uint8_t *const input, int height, int width, int in_stride, uint8_t *output, int height2, int width2, int out_stride) { int i; - uint8_t *intbuf = (uint8_t *)malloc(sizeof(uint8_t) * width2 * height); + uint8_t *intbuf = (uint8_t *)calloc(width2 * height, sizeof(*intbuf)); uint8_t *tmpbuf = - (uint8_t *)malloc(sizeof(uint8_t) * (width < height ? height : width)); - uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * height); - uint8_t *arrbuf2 = (uint8_t *)malloc(sizeof(uint8_t) * height2); + (uint8_t *)calloc(width < height ? height : width, sizeof(*tmpbuf)); + uint8_t *arrbuf = (uint8_t *)calloc(height, sizeof(*arrbuf)); + uint8_t *arrbuf2 = (uint8_t *)calloc(height2, sizeof(*arrbuf2)); if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) goto Error; assert(width > 0); @@ -506,10 +506,12 @@ static void highbd_interpolate(const uint16_t *const input, int inlength, sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; filter = interp_filters[sub_pel]; sum = 0; - for (k = 0; k < INTERP_TAPS; ++k) + for (k = 0; k < INTERP_TAPS; ++k) { + assert(int_pel - INTERP_TAPS / 2 + 1 + k < inlength); sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ? 0 : int_pel - INTERP_TAPS / 2 + 1 + k)]; + } *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); } // Middle part. @@ -720,6 +722,10 @@ void vp9_highbd_resize_plane(const uint8_t *const input, int height, int width, uint16_t *arrbuf2 = (uint16_t *)malloc(sizeof(uint16_t) * height2); if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) goto Error; + assert(width > 0); + assert(height > 0); + assert(width2 > 0); + assert(height2 > 0); for (i = 0; i < height; ++i) { highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width, intbuf + width2 * i, width2, tmpbuf, bd); diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_resize.h b/media/libvpx/libvpx/vp9/encoder/vp9_resize.h index d3282ee1919b..5d4ce97eba9e 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_resize.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_resize.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_RESIZE_H_ -#define VP9_ENCODER_VP9_RESIZE_H_ +#ifndef VPX_VP9_ENCODER_VP9_RESIZE_H_ +#define VPX_VP9_ENCODER_VP9_RESIZE_H_ #include #include "vpx/vpx_integer.h" @@ -65,4 +65,4 @@ void vp9_highbd_resize_frame444(const uint8_t *const y, int y_stride, } // extern "C" #endif -#endif // VP9_ENCODER_VP9_RESIZE_H_ +#endif // VPX_VP9_ENCODER_VP9_RESIZE_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_segmentation.c b/media/libvpx/libvpx/vp9/encoder/vp9_segmentation.c index 4a5a68e07a12..a163297e6e14 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_segmentation.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_segmentation.c @@ -9,6 +9,7 @@ */ #include +#include #include "vpx_mem/vpx_mem.h" @@ -46,6 +47,59 @@ void vp9_clear_segdata(struct segmentation *seg, int segment_id, seg->feature_data[segment_id][feature_id] = 0; } +void vp9_psnr_aq_mode_setup(struct segmentation *seg) { + int i; + + vp9_enable_segmentation(seg); + vp9_clearall_segfeatures(seg); + seg->abs_delta = SEGMENT_DELTADATA; + + for (i = 0; i < MAX_SEGMENTS; ++i) { + vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, 2 * (i - (MAX_SEGMENTS / 2))); + vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); + } +} + +void vp9_perceptual_aq_mode_setup(struct VP9_COMP *cpi, + struct segmentation *seg) { + const VP9_COMMON *cm = &cpi->common; + const int seg_counts = cpi->kmeans_ctr_num; + const int base_qindex = cm->base_qindex; + const double base_qstep = vp9_convert_qindex_to_q(base_qindex, cm->bit_depth); + const double mid_ctr = cpi->kmeans_ctr_ls[seg_counts / 2]; + const double var_diff_scale = 4.0; + int i; + + assert(seg_counts <= MAX_SEGMENTS); + + vp9_enable_segmentation(seg); + vp9_clearall_segfeatures(seg); + seg->abs_delta = SEGMENT_DELTADATA; + + for (i = 0; i < seg_counts / 2; ++i) { + double wiener_var_diff = mid_ctr - cpi->kmeans_ctr_ls[i]; + double target_qstep = base_qstep / (1.0 + wiener_var_diff / var_diff_scale); + int target_qindex = vp9_convert_q_to_qindex(target_qstep, cm->bit_depth); + assert(wiener_var_diff >= 0.0); + + vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, target_qindex - base_qindex); + vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); + } + + vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, 0); + vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); + + for (; i < seg_counts; ++i) { + double wiener_var_diff = cpi->kmeans_ctr_ls[i] - mid_ctr; + double target_qstep = base_qstep * (1.0 + wiener_var_diff / var_diff_scale); + int target_qindex = vp9_convert_q_to_qindex(target_qstep, cm->bit_depth); + assert(wiener_var_diff >= 0.0); + + vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, target_qindex - base_qindex); + vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); + } +} + // Based on set of segment counts calculate a probability tree static void calc_segtree_probs(int *segcounts, vpx_prob *segment_tree_probs) { // Work out probabilities of each segment diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_segmentation.h b/media/libvpx/libvpx/vp9/encoder/vp9_segmentation.h index 562805543b81..9404c38bc891 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_segmentation.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_segmentation.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_SEGMENTATION_H_ -#define VP9_ENCODER_VP9_SEGMENTATION_H_ +#ifndef VPX_VP9_ENCODER_VP9_SEGMENTATION_H_ +#define VPX_VP9_ENCODER_VP9_SEGMENTATION_H_ #include "vp9/common/vp9_blockd.h" #include "vp9/encoder/vp9_encoder.h" @@ -26,6 +26,11 @@ void vp9_disable_segfeature(struct segmentation *seg, int segment_id, void vp9_clear_segdata(struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id); +void vp9_psnr_aq_mode_setup(struct segmentation *seg); + +void vp9_perceptual_aq_mode_setup(struct VP9_COMP *cpi, + struct segmentation *seg); + // The values given for each segment can be either deltas (from the default // value chosen for the frame) or absolute values. // @@ -47,4 +52,4 @@ void vp9_reset_segment_features(struct segmentation *seg); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_SEGMENTATION_H_ +#endif // VPX_VP9_ENCODER_VP9_SEGMENTATION_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_skin_detection.h b/media/libvpx/libvpx/vp9/encoder/vp9_skin_detection.h index 8880bff466af..46a722af9bfe 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_skin_detection.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_skin_detection.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_SKIN_MAP_H_ -#define VP9_ENCODER_VP9_SKIN_MAP_H_ +#ifndef VPX_VP9_ENCODER_VP9_SKIN_DETECTION_H_ +#define VPX_VP9_ENCODER_VP9_SKIN_DETECTION_H_ #include "vp9/common/vp9_blockd.h" #include "vpx_dsp/skin_detection.h" @@ -37,4 +37,4 @@ void vp9_output_skin_map(struct VP9_COMP *const cpi, FILE *yuv_skinmap_file); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_SKIN_MAP_H_ +#endif // VPX_VP9_ENCODER_VP9_SKIN_DETECTION_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_speed_features.c b/media/libvpx/libvpx/vp9/encoder/vp9_speed_features.c index a05db60c6521..0b24b5cb3158 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_speed_features.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_speed_features.c @@ -20,6 +20,7 @@ static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] = { { 64, 4 }, { 28, 2 }, { 15, 1 }, { 7, 1 } }; +#if !CONFIG_REALTIME_ONLY // Define 3 mesh density levels to control the number of searches. #define MESH_DENSITY_LEVELS 3 static MESH_PATTERN @@ -32,7 +33,7 @@ static MESH_PATTERN // Intra only frames, golden frames (except alt ref overlays) and // alt ref frames tend to be coded at a higher than ambient quality static int frame_is_boosted(const VP9_COMP *cpi) { - return frame_is_kf_gf_arf(cpi) || vp9_is_upper_layer_key_frame(cpi); + return frame_is_kf_gf_arf(cpi); } // Sets a partition size down to which the auto partition code will always @@ -61,46 +62,92 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed) { VP9_COMMON *const cm = &cpi->common; + const int min_frame_size = VPXMIN(cm->width, cm->height); + const int is_480p_or_larger = min_frame_size >= 480; + const int is_720p_or_larger = min_frame_size >= 720; + const int is_1080p_or_larger = min_frame_size >= 1080; + const int is_2160p_or_larger = min_frame_size >= 2160; // speed 0 features sf->partition_search_breakout_thr.dist = (1 << 20); sf->partition_search_breakout_thr.rate = 80; + sf->use_square_only_thresh_high = BLOCK_SIZES; + sf->use_square_only_thresh_low = BLOCK_4X4; - // Currently, the machine-learning based partition search early termination - // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0. - if (VPXMIN(cm->width, cm->height) >= 480) { - sf->ml_partition_search_early_termination = 1; + if (is_480p_or_larger) { + // Currently, the machine-learning based partition search early termination + // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0. + sf->rd_ml_partition.search_early_termination = 1; + } else { + sf->use_square_only_thresh_high = BLOCK_32X32; } - if (speed >= 1) { - sf->ml_partition_search_early_termination = 0; - - if (VPXMIN(cm->width, cm->height) >= 720) { - sf->disable_split_mask = - cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - sf->partition_search_breakout_thr.dist = (1 << 23); + if (!is_1080p_or_larger) { + sf->rd_ml_partition.search_breakout = 1; + if (is_720p_or_larger) { + sf->rd_ml_partition.search_breakout_thresh[0] = 0.0f; + sf->rd_ml_partition.search_breakout_thresh[1] = 0.0f; + sf->rd_ml_partition.search_breakout_thresh[2] = 0.0f; } else { - sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - sf->partition_search_breakout_thr.dist = (1 << 21); + sf->rd_ml_partition.search_breakout_thresh[0] = 2.5f; + sf->rd_ml_partition.search_breakout_thresh[1] = 1.5f; + sf->rd_ml_partition.search_breakout_thresh[2] = 1.5f; } } + if (speed >= 1) { + sf->rd_ml_partition.search_early_termination = 0; + sf->rd_ml_partition.search_breakout = 1; + if (is_480p_or_larger) + sf->use_square_only_thresh_high = BLOCK_64X64; + else + sf->use_square_only_thresh_high = BLOCK_32X32; + sf->use_square_only_thresh_low = BLOCK_16X16; + if (is_720p_or_larger) { + sf->disable_split_mask = + cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + sf->partition_search_breakout_thr.dist = (1 << 22); + sf->rd_ml_partition.search_breakout_thresh[0] = -5.0f; + sf->rd_ml_partition.search_breakout_thresh[1] = -5.0f; + sf->rd_ml_partition.search_breakout_thresh[2] = -9.0f; + } else { + sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; + sf->partition_search_breakout_thr.dist = (1 << 21); + sf->rd_ml_partition.search_breakout_thresh[0] = -1.0f; + sf->rd_ml_partition.search_breakout_thresh[1] = -1.0f; + sf->rd_ml_partition.search_breakout_thresh[2] = -1.0f; + } +#if CONFIG_VP9_HIGHBITDEPTH + if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) { + sf->rd_ml_partition.search_breakout_thresh[0] -= 1.0f; + sf->rd_ml_partition.search_breakout_thresh[1] -= 1.0f; + sf->rd_ml_partition.search_breakout_thresh[2] -= 1.0f; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + } + if (speed >= 2) { - if (VPXMIN(cm->width, cm->height) >= 720) { + sf->use_square_only_thresh_high = BLOCK_4X4; + sf->use_square_only_thresh_low = BLOCK_SIZES; + if (is_720p_or_larger) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; sf->adaptive_pred_interp_filter = 0; sf->partition_search_breakout_thr.dist = (1 << 24); sf->partition_search_breakout_thr.rate = 120; + sf->rd_ml_partition.search_breakout = 0; } else { sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; sf->partition_search_breakout_thr.dist = (1 << 22); sf->partition_search_breakout_thr.rate = 100; + sf->rd_ml_partition.search_breakout_thresh[0] = 0.0f; + sf->rd_ml_partition.search_breakout_thresh[1] = -1.0f; + sf->rd_ml_partition.search_breakout_thresh[2] = -4.0f; } sf->rd_auto_partition_min_limit = set_partition_min_limit(cm); // Use a set of speed features for 4k videos. - if (VPXMIN(cm->width, cm->height) >= 2160) { + if (is_2160p_or_larger) { sf->use_square_partition_only = 1; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC; @@ -112,7 +159,8 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, } if (speed >= 3) { - if (VPXMIN(cm->width, cm->height) >= 720) { + sf->rd_ml_partition.search_breakout = 0; + if (is_720p_or_larger) { sf->disable_split_mask = DISABLE_ALL_SPLIT; sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0; sf->partition_search_breakout_thr.dist = (1 << 25); @@ -137,7 +185,7 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, if (speed >= 4) { sf->partition_search_breakout_thr.rate = 300; - if (VPXMIN(cm->width, cm->height) >= 720) { + if (is_720p_or_larger) { sf->partition_search_breakout_thr.dist = (1 << 26); } else { sf->partition_search_breakout_thr.dist = (1 << 24); @@ -166,28 +214,41 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->adaptive_rd_thresh_row_mt = 0; sf->allow_skip_recode = 1; sf->less_rectangular_check = 1; - sf->use_square_partition_only = !frame_is_boosted(cpi); - sf->use_square_only_threshold = BLOCK_16X16; + sf->use_square_partition_only = !boosted; + sf->prune_ref_frame_for_rect_partitions = 1; + sf->rd_ml_partition.var_pruning = 1; + + sf->rd_ml_partition.prune_rect_thresh[0] = -1; + sf->rd_ml_partition.prune_rect_thresh[1] = 350; + sf->rd_ml_partition.prune_rect_thresh[2] = 325; + sf->rd_ml_partition.prune_rect_thresh[3] = 250; if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { sf->exhaustive_searches_thresh = (1 << 22); - for (i = 0; i < MAX_MESH_STEP; ++i) { - int mesh_density_level = 0; - sf->mesh_patterns[i].range = - good_quality_mesh_patterns[mesh_density_level][i].range; - sf->mesh_patterns[i].interval = - good_quality_mesh_patterns[mesh_density_level][i].interval; - } } else { sf->exhaustive_searches_thresh = INT_MAX; } + for (i = 0; i < MAX_MESH_STEP; ++i) { + const int mesh_density_level = 0; + sf->mesh_patterns[i].range = + good_quality_mesh_patterns[mesh_density_level][i].range; + sf->mesh_patterns[i].interval = + good_quality_mesh_patterns[mesh_density_level][i].interval; + } + if (speed >= 1) { + sf->temporal_filter_search_method = NSTEP; + sf->rd_ml_partition.var_pruning = !boosted; + sf->rd_ml_partition.prune_rect_thresh[1] = 225; + sf->rd_ml_partition.prune_rect_thresh[2] = 225; + sf->rd_ml_partition.prune_rect_thresh[3] = 225; + if (oxcf->pass == 2) { TWO_PASS *const twopass = &cpi->twopass; if ((twopass->fr_content_type == FC_GRAPHICS_ANIMATION) || vp9_internal_image_edge(cpi)) { - sf->use_square_partition_only = !frame_is_boosted(cpi); + sf->use_square_partition_only = !boosted; } else { sf->use_square_partition_only = !frame_is_intra_only(cm); } @@ -199,23 +260,22 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->tx_domain_thresh = tx_dom_thresholds[(speed < 6) ? speed : 5]; sf->allow_quant_coeff_opt = sf->optimize_coefficients; sf->quant_opt_thresh = qopt_thresholds[(speed < 6) ? speed : 5]; - - sf->use_square_only_threshold = BLOCK_4X4; sf->less_rectangular_check = 1; - sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; sf->mv.auto_mv_step_size = 1; sf->adaptive_rd_thresh = 2; - sf->mv.subpel_iters_per_step = 1; - sf->mode_skip_start = 10; + sf->mv.subpel_search_level = 1; + if (cpi->oxcf.content != VP9E_CONTENT_FILM) sf->mode_skip_start = 10; sf->adaptive_pred_interp_filter = 1; sf->allow_acl = 0; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + if (cpi->oxcf.content != VP9E_CONTENT_FILM) { + sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + } sf->recode_tolerance_low = 15; sf->recode_tolerance_high = 30; @@ -223,9 +283,11 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->exhaustive_searches_thresh = (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23) : INT_MAX; + sf->use_accurate_subpel_search = USE_4_TAPS; } if (speed >= 2) { + sf->rd_ml_partition.var_pruning = 0; if (oxcf->vbr_corpus_complexity) sf->recode_loop = ALLOW_RECODE_FIRST; else @@ -247,6 +309,12 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->recode_tolerance_low = 15; sf->recode_tolerance_high = 45; + sf->enhanced_full_pixel_motion_search = 0; + sf->prune_ref_frame_for_rect_partitions = 0; + sf->rd_ml_partition.prune_rect_thresh[1] = -1; + sf->rd_ml_partition.prune_rect_thresh[2] = -1; + sf->rd_ml_partition.prune_rect_thresh[3] = -1; + sf->mv.subpel_search_level = 0; if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { for (i = 0; i < MAX_MESH_STEP; ++i) { @@ -257,6 +325,8 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, good_quality_mesh_patterns[mesh_density_level][i].interval; } } + + sf->use_accurate_subpel_search = USE_2_TAPS; } if (speed >= 3) { @@ -316,6 +386,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->simple_model_rd_from_var = 1; } } +#endif // !CONFIG_REALTIME_ONLY static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi, SPEED_FEATURES *sf, @@ -358,6 +429,7 @@ static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi, static void set_rt_speed_feature_framesize_independent( VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, vp9e_tune_content content) { VP9_COMMON *const cm = &cpi->common; + SVC *const svc = &cpi->svc; const int is_keyframe = cm->frame_type == KEY_FRAME; const int frames_since_key = is_keyframe ? 0 : cpi->rc.frames_since_key; sf->static_segmentation = 0; @@ -374,6 +446,17 @@ static void set_rt_speed_feature_framesize_independent( sf->use_compound_nonrd_pickmode = 0; sf->nonrd_keyframe = 0; sf->svc_use_lowres_part = 0; + sf->overshoot_detection_cbr_rt = NO_DETECTION; + sf->disable_16x16part_nonkey = 0; + sf->disable_golden_ref = 0; + sf->enable_tpl_model = 0; + sf->enhanced_full_pixel_motion_search = 0; + sf->use_accurate_subpel_search = USE_2_TAPS; + sf->nonrd_use_ml_partition = 0; + sf->variance_part_thresh_mult = 1; + sf->cb_pred_filter_search = 0; + sf->force_smooth_interpol = 0; + sf->rt_intra_dc_only_low_content = 0; if (speed >= 1) { sf->allow_txfm_domain_distortion = 1; @@ -407,7 +490,7 @@ static void set_rt_speed_feature_framesize_independent( // Reference masking only enabled for 1 spatial layer, and if none of the // references have been scaled. The latter condition needs to be checked // for external or internal dynamic resize. - sf->reference_masking = (cpi->svc.number_spatial_layers == 1); + sf->reference_masking = (svc->number_spatial_layers == 1); if (sf->reference_masking == 1 && (cpi->external_resize == 1 || cpi->oxcf.resize_mode == RESIZE_DYNAMIC)) { @@ -440,7 +523,7 @@ static void set_rt_speed_feature_framesize_independent( sf->disable_filter_search_var_thresh = 100; sf->use_uv_intra_rd_estimate = 1; sf->skip_encode_sb = 1; - sf->mv.subpel_iters_per_step = 1; + sf->mv.subpel_search_level = 0; sf->adaptive_rd_thresh = 4; sf->mode_skip_start = 6; sf->allow_skip_recode = 0; @@ -453,14 +536,7 @@ static void set_rt_speed_feature_framesize_independent( int i; if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0) sf->use_altref_onepass = 1; - sf->last_partitioning_redo_frequency = 4; - sf->adaptive_rd_thresh = 5; - sf->use_fast_coef_costing = 0; - sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX; - sf->adjust_partitioning_from_last_frame = - cm->last_frame_type != cm->frame_type || - (0 == (frames_since_key + 1) % sf->last_partitioning_redo_frequency); - sf->mv.subpel_force_stop = 1; + sf->mv.subpel_force_stop = QUARTER_PEL; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_DC_H_V; sf->intra_uv_mode_mask[i] = INTRA_DC; @@ -468,13 +544,19 @@ static void set_rt_speed_feature_framesize_independent( sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->frame_parameter_update = 0; sf->mv.search_method = FAST_HEX; - - sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW; - sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST; - sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST; - sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST; + sf->allow_skip_recode = 0; sf->max_intra_bsize = BLOCK_32X32; - sf->allow_skip_recode = 1; + sf->use_fast_coef_costing = 0; + sf->use_quant_fp = !is_keyframe; + sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO; + sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO; + sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO; + sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO; + sf->adaptive_rd_thresh = 2; + sf->use_fast_coef_updates = is_keyframe ? TWO_LOOP : ONE_LOOP_REDUCED; + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH; + sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8; + sf->partition_search_type = VAR_BASED_PARTITION; } if (speed >= 5) { @@ -513,7 +595,10 @@ static void set_rt_speed_feature_framesize_independent( int i; if (content == VP9E_CONTENT_SCREEN) { for (i = 0; i < BLOCK_SIZES; ++i) - sf->intra_y_mode_bsize_mask[i] = INTRA_DC_TM_H_V; + if (i >= BLOCK_32X32) + sf->intra_y_mode_bsize_mask[i] = INTRA_DC_H_V; + else + sf->intra_y_mode_bsize_mask[i] = INTRA_DC_TM_H_V; } else { for (i = 0; i < BLOCK_SIZES; ++i) if (i > BLOCK_16X16) @@ -531,6 +616,23 @@ static void set_rt_speed_feature_framesize_independent( sf->limit_newmv_early_exit = 1; if (!cpi->use_svc) sf->bias_golden = 1; } + // Keep nonrd_keyframe = 1 for non-base spatial layers to prevent + // increase in encoding time. + if (cpi->use_svc && svc->spatial_layer_id > 0) sf->nonrd_keyframe = 1; + if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG && + cpi->oxcf.rc_mode == VPX_CBR) { + if (cm->width * cm->height <= 352 * 288 && !cpi->use_svc && + cpi->oxcf.content != VP9E_CONTENT_SCREEN) + sf->overshoot_detection_cbr_rt = RE_ENCODE_MAXQ; + else + sf->overshoot_detection_cbr_rt = FAST_DETECTION_MAXQ; + } + if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 && + cm->width <= 1280 && cm->height <= 720) { + sf->use_altref_onepass = 1; + sf->use_compound_nonrd_pickmode = 1; + } + if (cm->width * cm->height > 1280 * 720) sf->cb_pred_filter_search = 1; } if (speed >= 6) { @@ -539,8 +641,6 @@ static void set_rt_speed_feature_framesize_independent( sf->use_compound_nonrd_pickmode = 1; } sf->partition_search_type = VAR_BASED_PARTITION; - // Turn on this to use non-RD key frame coding mode. - sf->use_nonrd_pick_mode = 1; sf->mv.search_method = NSTEP; sf->mv.reduce_first_step_size = 1; sf->skip_encode_sb = 0; @@ -553,7 +653,7 @@ static void set_rt_speed_feature_framesize_independent( (cm->width * cm->height <= 640 * 360) ? 40000 : 60000; if (cpi->content_state_sb_fd == NULL && (!cpi->use_svc || - cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) { + svc->spatial_layer_id == svc->number_spatial_layers - 1)) { cpi->content_state_sb_fd = (uint8_t *)vpx_calloc( (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t)); } @@ -562,11 +662,14 @@ static void set_rt_speed_feature_framesize_independent( // Enable short circuit for low temporal variance. sf->short_circuit_low_temp_var = 1; } - if (cpi->svc.temporal_layer_id > 0) { + if (svc->temporal_layer_id > 0) { sf->adaptive_rd_thresh = 4; sf->limit_newmv_early_exit = 0; sf->base_mv_aggressive = 1; } + if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG && + cpi->oxcf.rc_mode == VPX_CBR) + sf->overshoot_detection_cbr_rt = FAST_DETECTION_MAXQ; } if (speed >= 7) { @@ -576,16 +679,15 @@ static void set_rt_speed_feature_framesize_independent( sf->mv.fullpel_search_step_param = 10; // For SVC: use better mv search on base temporal layer, and only // on base spatial layer if highest resolution is above 640x360. - if (cpi->svc.number_temporal_layers > 2 && - cpi->svc.temporal_layer_id == 0 && - (cpi->svc.spatial_layer_id == 0 || + if (svc->number_temporal_layers > 2 && svc->temporal_layer_id == 0 && + (svc->spatial_layer_id == 0 || cpi->oxcf.width * cpi->oxcf.height <= 640 * 360)) { sf->mv.search_method = NSTEP; sf->mv.fullpel_search_step_param = 6; } - if (cpi->svc.temporal_layer_id > 0 || cpi->svc.spatial_layer_id > 1) { + if (svc->temporal_layer_id > 0 || svc->spatial_layer_id > 1) { sf->use_simple_block_yrd = 1; - if (cpi->svc.non_reference_frame) + if (svc->non_reference_frame) sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_EVENMORE; } if (cpi->use_svc && cpi->row_mt && cpi->oxcf.max_threads > 1) @@ -596,22 +698,30 @@ static void set_rt_speed_feature_framesize_independent( if (!cpi->last_frame_dropped && cpi->resize_state == ORIG && !cpi->external_resize && (!cpi->use_svc || - cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) { + (svc->spatial_layer_id == svc->number_spatial_layers - 1 && + !svc->last_layer_dropped[svc->number_spatial_layers - 1]))) { sf->copy_partition_flag = 1; cpi->max_copied_frame = 2; // The top temporal enhancement layer (for number of temporal layers > 1) // are non-reference frames, so use large/max value for max_copied_frame. - if (cpi->svc.number_temporal_layers > 1 && - cpi->svc.temporal_layer_id == cpi->svc.number_temporal_layers - 1) + if (svc->number_temporal_layers > 1 && + svc->temporal_layer_id == svc->number_temporal_layers - 1) cpi->max_copied_frame = 255; } // For SVC: enable use of lower resolution partition for higher resolution, // only for 3 spatial layers and when config/top resolution is above VGA. // Enable only for non-base temporal layer frames. - if (cpi->use_svc && cpi->svc.number_spatial_layers == 3 && - cpi->svc.temporal_layer_id > 0 && + if (cpi->use_svc && svc->use_partition_reuse && + svc->number_spatial_layers == 3 && svc->temporal_layer_id > 0 && cpi->oxcf.width * cpi->oxcf.height > 640 * 480) sf->svc_use_lowres_part = 1; + // For SVC when golden is used as second temporal reference: to avoid + // encode time increase only use this feature on base temporal layer. + // (i.e remove golden flag from frame_flags for temporal_layer_id > 0). + if (cpi->use_svc && svc->use_gf_temporal_ref_current_layer && + svc->temporal_layer_id > 0) + cpi->ref_frame_flags &= (~VP9_GOLD_FLAG); + if (cm->width * cm->height > 640 * 480) sf->cb_pred_filter_search = 1; } if (speed >= 8) { @@ -621,15 +731,16 @@ static void set_rt_speed_feature_framesize_independent( if (!cpi->use_svc) cpi->max_copied_frame = 4; if (cpi->row_mt && cpi->oxcf.max_threads > 1) sf->adaptive_rd_thresh_row_mt = 1; - - if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = 3; - if (content == VP9E_CONTENT_SCREEN) sf->lpf_pick = LPF_PICK_MINIMAL_LPF; - // Only keep INTRA_DC mode for speed 8. - if (!is_keyframe) { - int i = 0; - for (i = 0; i < BLOCK_SIZES; ++i) - sf->intra_y_mode_bsize_mask[i] = INTRA_DC; + // Enable ML based partition for low res. + if (!frame_is_intra_only(cm) && cm->width * cm->height <= 352 * 288) { + sf->nonrd_use_ml_partition = 1; } +#if CONFIG_VP9_HIGHBITDEPTH + if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) + sf->nonrd_use_ml_partition = 0; +#endif + if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = HALF_PEL; + sf->rt_intra_dc_only_low_content = 1; if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && content != VP9E_CONTENT_SCREEN) { // More aggressive short circuit for speed 8. @@ -651,7 +762,33 @@ static void set_rt_speed_feature_framesize_independent( } sf->limit_newmv_early_exit = 0; sf->use_simple_block_yrd = 1; + if (cm->width * cm->height > 352 * 288) sf->cb_pred_filter_search = 1; } + + if (speed >= 9) { + // Only keep INTRA_DC mode for speed 9. + if (!is_keyframe) { + int i = 0; + for (i = 0; i < BLOCK_SIZES; ++i) + sf->intra_y_mode_bsize_mask[i] = INTRA_DC; + } + sf->cb_pred_filter_search = 1; + sf->mv.enable_adaptive_subpel_force_stop = 1; + sf->mv.adapt_subpel_force_stop.mv_thresh = 1; + sf->mv.adapt_subpel_force_stop.force_stop_below = QUARTER_PEL; + sf->mv.adapt_subpel_force_stop.force_stop_above = HALF_PEL; + // Disable partition blocks below 16x16, except for low-resolutions. + if (cm->frame_type != KEY_FRAME && cm->width >= 320 && cm->height >= 240) + sf->disable_16x16part_nonkey = 1; + // Allow for disabling GOLDEN reference, for CBR mode. + if (cpi->oxcf.rc_mode == VPX_CBR) sf->disable_golden_ref = 1; + if (cpi->rc.avg_frame_low_motion < 70) sf->default_interp_filter = BILINEAR; + if (cm->width * cm->height >= 640 * 360) sf->variance_part_thresh_mult = 2; + } + + if (sf->nonrd_use_ml_partition) + sf->partition_search_type = ML_BASED_PARTITION; + if (sf->use_altref_onepass) { if (cpi->rc.is_src_frame_alt_ref && cm->frame_type != KEY_FRAME) { sf->partition_search_type = FIXED_PARTITION; @@ -666,9 +803,26 @@ static void set_rt_speed_feature_framesize_independent( (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(*cpi->count_lastgolden_frame_usage)); } + if (svc->previous_frame_is_intra_only) { + sf->partition_search_type = FIXED_PARTITION; + sf->always_this_block_size = BLOCK_64X64; + } + // Special case for screen content: increase motion search on base spatial + // layer when high motion is detected or previous SL0 frame was dropped. + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.speed >= 5 && + (svc->high_num_blocks_with_motion || svc->last_layer_dropped[0])) { + sf->mv.search_method = NSTEP; + // TODO(marpan/jianj): Tune this setting for screensharing. For now use + // small step_param for all spatial layers. + sf->mv.fullpel_search_step_param = 2; + } + // TODO(marpan): There is regression for aq-mode=3 speed <= 4, force it + // off for now. + if (speed <= 3 && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + cpi->oxcf.aq_mode = 0; } -void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { +void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi, int speed) { SPEED_FEATURES *const sf = &cpi->sf; const VP9EncoderConfig *const oxcf = &cpi->oxcf; RD_OPT *const rd = &cpi->rd; @@ -678,13 +832,15 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { // Some speed-up features even for best quality as minimal impact on quality. sf->partition_search_breakout_thr.dist = (1 << 19); sf->partition_search_breakout_thr.rate = 80; - sf->ml_partition_search_early_termination = 0; + sf->rd_ml_partition.search_early_termination = 0; + sf->rd_ml_partition.search_breakout = 0; - if (oxcf->mode == REALTIME) { - set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed); - } else if (oxcf->mode == GOOD) { - set_good_speed_feature_framesize_dependent(cpi, sf, oxcf->speed); - } + if (oxcf->mode == REALTIME) + set_rt_speed_feature_framesize_dependent(cpi, sf, speed); +#if !CONFIG_REALTIME_ONLY + else if (oxcf->mode == GOOD) + set_good_speed_feature_framesize_dependent(cpi, sf, speed); +#endif if (sf->disable_split_mask == DISABLE_ALL_SPLIT) { sf->adaptive_pred_interp_filter = 0; @@ -710,17 +866,13 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { if (!sf->adaptive_rd_thresh_row_mt && cpi->row_mt_bit_exact && oxcf->max_threads > 1) sf->adaptive_rd_thresh = 0; - - // This is only used in motion vector unit test. - if (cpi->oxcf.motion_vector_unit_test == 1) - cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv; - else if (cpi->oxcf.motion_vector_unit_test == 2) - cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv; } -void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { +void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi, int speed) { SPEED_FEATURES *const sf = &cpi->sf; +#if !CONFIG_REALTIME_ONLY VP9_COMMON *const cm = &cpi->common; +#endif MACROBLOCK *const x = &cpi->td.mb; const VP9EncoderConfig *const oxcf = &cpi->oxcf; int i; @@ -730,8 +882,8 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->mv.search_method = NSTEP; sf->recode_loop = ALLOW_RECODE_FIRST; sf->mv.subpel_search_method = SUBPEL_TREE; - sf->mv.subpel_iters_per_step = 2; - sf->mv.subpel_force_stop = 0; + sf->mv.subpel_search_level = 2; + sf->mv.subpel_force_stop = EIGHTH_PEL; sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf); sf->mv.reduce_first_step_size = 0; sf->coeff_prob_appx_step = 1; @@ -741,6 +893,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->tx_size_search_method = USE_FULL_RD; sf->use_lp32x32fdct = 0; sf->adaptive_motion_search = 0; + sf->enhanced_full_pixel_motion_search = 1; sf->adaptive_pred_interp_filter = 0; sf->adaptive_mode_search = 0; sf->cb_pred_filter_search = 0; @@ -752,7 +905,8 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->partition_search_type = SEARCH_PARTITION; sf->less_rectangular_check = 0; sf->use_square_partition_only = 0; - sf->use_square_only_threshold = BLOCK_SIZES; + sf->use_square_only_thresh_high = BLOCK_SIZES; + sf->use_square_only_thresh_low = BLOCK_4X4; sf->auto_min_max_partition_size = NOT_IN_USE; sf->rd_auto_partition_min_limit = BLOCK_4X4; sf->default_max_partition_size = BLOCK_64X64; @@ -771,6 +925,9 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->allow_quant_coeff_opt = sf->optimize_coefficients; sf->quant_opt_thresh = 99.0; sf->allow_acl = 1; + sf->enable_tpl_model = oxcf->enable_tpl_model; + sf->prune_ref_frame_for_rect_partitions = 0; + sf->temporal_filter_search_method = MESH; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_ALL; @@ -804,10 +961,17 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->limit_newmv_early_exit = 0; sf->bias_golden = 0; sf->base_mv_aggressive = 0; + sf->rd_ml_partition.prune_rect_thresh[0] = -1; + sf->rd_ml_partition.prune_rect_thresh[1] = -1; + sf->rd_ml_partition.prune_rect_thresh[2] = -1; + sf->rd_ml_partition.prune_rect_thresh[3] = -1; + sf->rd_ml_partition.var_pruning = 0; + sf->use_accurate_subpel_search = USE_8_TAPS; // Some speed-up features even for best quality as minimal impact on quality. sf->adaptive_rd_thresh = 1; sf->tx_size_search_breakout = 1; + sf->tx_size_search_depth = 2; sf->exhaustive_searches_thresh = (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20) @@ -820,10 +984,11 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { } if (oxcf->mode == REALTIME) - set_rt_speed_feature_framesize_independent(cpi, sf, oxcf->speed, - oxcf->content); + set_rt_speed_feature_framesize_independent(cpi, sf, speed, oxcf->content); +#if !CONFIG_REALTIME_ONLY else if (oxcf->mode == GOOD) - set_good_speed_feature_framesize_independent(cpi, cm, sf, oxcf->speed); + set_good_speed_feature_framesize_independent(cpi, cm, sf, speed); +#endif cpi->diamond_search_sad = vp9_diamond_search_sad; @@ -837,7 +1002,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->optimize_coefficients = 0; } - if (sf->mv.subpel_force_stop == 3) { + if (sf->mv.subpel_force_stop == FULL_PEL) { // Whole pel only cpi->find_fractional_mv_step = vp9_skip_sub_pixel_tree; } else if (sf->mv.subpel_search_method == SUBPEL_TREE) { @@ -850,6 +1015,12 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_evenmore; } + // This is only used in motion vector unit test. + if (cpi->oxcf.motion_vector_unit_test == 1) + cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv; + else if (cpi->oxcf.motion_vector_unit_test == 2) + cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv; + x->optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1; x->min_partition_size = sf->default_min_partition_size; @@ -867,10 +1038,4 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { if (!sf->adaptive_rd_thresh_row_mt && cpi->row_mt_bit_exact && oxcf->max_threads > 1) sf->adaptive_rd_thresh = 0; - - // This is only used in motion vector unit test. - if (cpi->oxcf.motion_vector_unit_test == 1) - cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv; - else if (cpi->oxcf.motion_vector_unit_test == 2) - cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv; } diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_speed_features.h b/media/libvpx/libvpx/vp9/encoder/vp9_speed_features.h index 50d52bc23a4b..ca284ded821c 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_speed_features.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_speed_features.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_SPEED_FEATURES_H_ -#define VP9_ENCODER_VP9_SPEED_FEATURES_H_ +#ifndef VPX_VP9_ENCODER_VP9_SPEED_FEATURES_H_ +#define VPX_VP9_ENCODER_VP9_SPEED_FEATURES_H_ #include "vp9/common/vp9_enums.h" @@ -57,7 +57,8 @@ typedef enum { BIGDIA = 3, SQUARE = 4, FAST_HEX = 5, - FAST_DIAMOND = 6 + FAST_DIAMOND = 6, + MESH = 7 } SEARCH_METHODS; typedef enum { @@ -135,20 +136,23 @@ typedef enum { } INTERP_FILTER_MASK; typedef enum { - // Search partitions using RD/NONRD criterion + // Search partitions using RD/NONRD criterion. SEARCH_PARTITION, - // Always use a fixed size partition + // Always use a fixed size partition. FIXED_PARTITION, REFERENCE_PARTITION, // Use an arbitrary partitioning scheme based on source variance within - // a 64X64 SB + // a 64X64 SB. VAR_BASED_PARTITION, - // Use non-fixed partitions based on source variance - SOURCE_VAR_BASED_PARTITION + // Use non-fixed partitions based on source variance. + SOURCE_VAR_BASED_PARTITION, + + // Make partition decisions with machine learning models. + ML_BASED_PARTITION } PARTITION_SEARCH_TYPE; typedef enum { @@ -161,6 +165,19 @@ typedef enum { ONE_LOOP_REDUCED = 1 } FAST_COEFF_UPDATE; +typedef enum { EIGHTH_PEL, QUARTER_PEL, HALF_PEL, FULL_PEL } SUBPEL_FORCE_STOP; + +typedef struct ADAPT_SUBPEL_FORCE_STOP { + // Threshold for full pixel motion vector; + int mv_thresh; + + // subpel_force_stop if full pixel MV is below the threshold. + SUBPEL_FORCE_STOP force_stop_below; + + // subpel_force_stop if full pixel MV is equal to or above the threshold. + SUBPEL_FORCE_STOP force_stop_above; +} ADAPT_SUBPEL_FORCE_STOP; + typedef struct MV_SPEED_FEATURES { // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). SEARCH_METHODS search_method; @@ -179,15 +196,17 @@ typedef struct MV_SPEED_FEATURES { // the same process. Along the way it skips many diagonals. SUBPEL_SEARCH_METHODS subpel_search_method; - // Maximum number of steps in logarithmic subpel search before giving up. - int subpel_iters_per_step; + // Subpel MV search level. Can take values 0 - 2. Higher values mean more + // extensive subpel search. + int subpel_search_level; - // Control when to stop subpel search: - // 0: Full subpel search. - // 1: Stop at quarter pixel. - // 2: Stop at half pixel. - // 3: Stop at full pixel. - int subpel_force_stop; + // When to stop subpel motion search. + SUBPEL_FORCE_STOP subpel_force_stop; + + // If it's enabled, different subpel_force_stop will be used for different MV. + int enable_adaptive_subpel_force_stop; + + ADAPT_SUBPEL_FORCE_STOP adapt_subpel_force_stop; // This variable sets the step_param used in full pel motion search. int fullpel_search_step_param; @@ -205,6 +224,28 @@ typedef struct MESH_PATTERN { int interval; } MESH_PATTERN; +typedef enum { + // No reaction to rate control on a detected slide/scene change. + NO_DETECTION = 0, + + // Set to larger Q (max_q set by user) based only on the + // detected slide/scene change and current/past Q. + FAST_DETECTION_MAXQ = 1, + + // Based on (first pass) encoded frame, if large frame size is detected + // then set to higher Q for the second re-encode. This involves 2 pass + // encoding on slide change, so slower than 1, but more accurate for + // detecting overshoot. + RE_ENCODE_MAXQ = 2 +} OVERSHOOT_DETECTION_CBR_RT; + +typedef enum { + USE_2_TAPS = 0, + USE_4_TAPS, + USE_8_TAPS, + USE_8_TAPS_SHARP, +} SUBPEL_SEARCH_TYPE; + typedef struct SPEED_FEATURES { MV_SPEED_FEATURES mv; @@ -258,6 +299,9 @@ typedef struct SPEED_FEATURES { // alternate reference frames. int allow_acl; + // Temporal dependency model based encoding mode optimization + int enable_tpl_model; + // Use transform domain distortion. Use pixel domain distortion in speed 0 // and certain situations in higher speed to improve the RD model precision. int allow_txfm_domain_distortion; @@ -272,6 +316,9 @@ typedef struct SPEED_FEATURES { // for intra and model coefs for the rest. TX_SIZE_SEARCH_METHOD tx_size_search_method; + // How many levels of tx size to search, starting from the largest. + int tx_size_search_depth; + // Low precision 32x32 fdct keeps everything in 16 bits and thus is less // precise but significantly faster than the non lp version. int use_lp32x32fdct; @@ -293,9 +340,14 @@ typedef struct SPEED_FEATURES { // rd than partition type split. int less_rectangular_check; - // Disable testing non square partitions. (eg 16x32) + // Disable testing non square partitions(eg 16x32) for block sizes larger than + // use_square_only_thresh_high or smaller than use_square_only_thresh_low. int use_square_partition_only; - BLOCK_SIZE use_square_only_threshold; + BLOCK_SIZE use_square_only_thresh_high; + BLOCK_SIZE use_square_only_thresh_low; + + // Prune reference frames for rectangular partitions. + int prune_ref_frame_for_rect_partitions; // Sets min and max partition sizes for this 64x64 region based on the // same 64x64 in last encoded frame, and the left and above neighbor. @@ -327,6 +379,9 @@ typedef struct SPEED_FEATURES { // point for this motion search and limits the search range around it. int adaptive_motion_search; + // Do extra full pixel motion search to obtain better motion vector. + int enhanced_full_pixel_motion_search; + // Threshold for allowing exhaistive motion search. int exhaustive_searches_thresh; @@ -448,8 +503,27 @@ typedef struct SPEED_FEATURES { // Partition search early breakout thresholds. PARTITION_SEARCH_BREAKOUT_THR partition_search_breakout_thr; - // Machine-learning based partition search early termination - int ml_partition_search_early_termination; + struct { + // Use ML-based partition search early breakout. + int search_breakout; + // Higher values mean more aggressiveness for partition search breakout that + // results in better encoding speed but worse compression performance. + float search_breakout_thresh[3]; + + // Machine-learning based partition search early termination + int search_early_termination; + + // Machine-learning based partition search pruning using prediction residue + // variance. + int var_pruning; + + // Threshold values used for ML based rectangular partition search pruning. + // If < 0, the feature is turned off. + // Higher values mean more aggressiveness to skip rectangular partition + // search that results in better encoding speed but worse coding + // performance. + int prune_rect_thresh[4]; + } rd_ml_partition; // Allow skipping partition search for still image frame int allow_partition_search_skip; @@ -508,15 +582,47 @@ typedef struct SPEED_FEATURES { // For SVC: enables use of partition from lower spatial resolution. int svc_use_lowres_part; + + // Flag to indicate process for handling overshoot on slide/scene change, + // for real-time CBR mode. + OVERSHOOT_DETECTION_CBR_RT overshoot_detection_cbr_rt; + + // Disable partitioning of 16x16 blocks. + int disable_16x16part_nonkey; + + // Allow for disabling golden reference. + int disable_golden_ref; + + // Allow sub-pixel search to use interpolation filters with different taps in + // order to achieve accurate motion search result. + SUBPEL_SEARCH_TYPE use_accurate_subpel_search; + + // Search method used by temporal filtering in full_pixel_motion_search. + SEARCH_METHODS temporal_filter_search_method; + + // Use machine learning based partition search. + int nonrd_use_ml_partition; + + // Multiplier for base thresold for variance partitioning. + int variance_part_thresh_mult; + + // Force subpel motion filter to always use SMOOTH_FILTER. + int force_smooth_interpol; + + // For real-time mode: force DC only under intra search when content + // does not have high souce SAD. + int rt_intra_dc_only_low_content; } SPEED_FEATURES; struct VP9_COMP; -void vp9_set_speed_features_framesize_independent(struct VP9_COMP *cpi); -void vp9_set_speed_features_framesize_dependent(struct VP9_COMP *cpi); +void vp9_set_speed_features_framesize_independent(struct VP9_COMP *cpi, + int speed); +void vp9_set_speed_features_framesize_dependent(struct VP9_COMP *cpi, + int speed); #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_ENCODER_VP9_SPEED_FEATURES_H_ +#endif // VPX_VP9_ENCODER_VP9_SPEED_FEATURES_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_subexp.c b/media/libvpx/libvpx/vp9/encoder/vp9_subexp.c index e8212ce05e1e..19bbd5373fdf 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_subexp.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_subexp.c @@ -71,6 +71,7 @@ static int remap_prob(int v, int m) { else i = recenter_nonneg(MAX_PROB - 1 - v, MAX_PROB - 1 - m) - 1; + assert(i >= 0 && (size_t)i < sizeof(map_table)); i = map_table[i]; return i; } diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_subexp.h b/media/libvpx/libvpx/vp9/encoder/vp9_subexp.h index 26c89e2ea784..f0d544b5270d 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_subexp.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_subexp.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_SUBEXP_H_ -#define VP9_ENCODER_VP9_SUBEXP_H_ +#ifndef VPX_VP9_ENCODER_VP9_SUBEXP_H_ +#define VPX_VP9_ENCODER_VP9_SUBEXP_H_ #ifdef __cplusplus extern "C" { @@ -37,4 +37,4 @@ int vp9_prob_diff_update_savings_search_model(const unsigned int *ct, } // extern "C" #endif -#endif // VP9_ENCODER_VP9_SUBEXP_H_ +#endif // VPX_VP9_ENCODER_VP9_SUBEXP_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_svc_layercontext.c b/media/libvpx/libvpx/vp9/encoder/vp9_svc_layercontext.c index 2636bd9a5802..32ee6e064f29 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_svc_layercontext.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_svc_layercontext.c @@ -19,6 +19,14 @@ #define SMALL_FRAME_WIDTH 32 #define SMALL_FRAME_HEIGHT 16 +static void swap_ptr(void *a, void *b) { + void **a_p = (void **)a; + void **b_p = (void **)b; + void *c = *a_p; + *a_p = *b_p; + *b_p = c; +} + void vp9_init_layer_context(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; @@ -29,24 +37,51 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->spatial_layer_id = 0; svc->temporal_layer_id = 0; - svc->first_spatial_layer_to_encode = 0; - svc->rc_drop_superframe = 0; svc->force_zero_mode_spatial_ref = 0; svc->use_base_mv = 0; + svc->use_partition_reuse = 0; + svc->use_gf_temporal_ref = 1; + svc->use_gf_temporal_ref_current_layer = 0; svc->scaled_temp_is_alloc = 0; svc->scaled_one_half = 0; svc->current_superframe = 0; svc->non_reference_frame = 0; + svc->skip_enhancement_layer = 0; + svc->disable_inter_layer_pred = INTER_LAYER_PRED_ON; + svc->framedrop_mode = CONSTRAINED_LAYER_DROP; + svc->set_intra_only_frame = 0; + svc->previous_frame_is_intra_only = 0; + svc->superframe_has_layer_sync = 0; + svc->use_set_ref_frame_config = 0; + svc->num_encoded_top_layer = 0; + svc->simulcast_mode = 0; - for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1; - for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { - svc->ext_frame_flags[sl] = 0; - svc->ext_lst_fb_idx[sl] = 0; - svc->ext_gld_fb_idx[sl] = 1; - svc->ext_alt_fb_idx[sl] = 2; - svc->downsample_filter_type[sl] = EIGHTTAP; - svc->downsample_filter_phase[sl] = 0; // Set to 8 for averaging filter. + for (i = 0; i < REF_FRAMES; ++i) { + svc->fb_idx_spatial_layer_id[i] = 0xff; + svc->fb_idx_temporal_layer_id[i] = 0xff; + svc->fb_idx_base[i] = 0; } + for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { + svc->last_layer_dropped[sl] = 0; + svc->drop_spatial_layer[sl] = 0; + svc->ext_frame_flags[sl] = 0; + svc->lst_fb_idx[sl] = 0; + svc->gld_fb_idx[sl] = 1; + svc->alt_fb_idx[sl] = 2; + svc->downsample_filter_type[sl] = BILINEAR; + svc->downsample_filter_phase[sl] = 8; // Set to 8 for averaging filter. + svc->framedrop_thresh[sl] = oxcf->drop_frames_water_mark; + svc->fb_idx_upd_tl0[sl] = -1; + svc->drop_count[sl] = 0; + svc->spatial_layer_sync[sl] = 0; + svc->force_drop_constrained_from_above[sl] = 0; + } + svc->max_consec_drop = INT_MAX; + + svc->buffer_gf_temporal_ref[1].idx = 7; + svc->buffer_gf_temporal_ref[0].idx = 6; + svc->buffer_gf_temporal_ref[1].is_used = 0; + svc->buffer_gf_temporal_ref[0].is_used = 0; if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) { if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img, SMALL_FRAME_WIDTH, @@ -84,6 +119,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { lrc->ni_frames = 0; lrc->decimation_count = 0; lrc->decimation_factor = 0; + lrc->worst_quality = oxcf->worst_allowed_q; + lrc->best_quality = oxcf->best_allowed_q; for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { lrc->rate_correction_factors[i] = 1.0; @@ -122,6 +159,9 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { size_t consec_zero_mv_size; VP9_COMMON *const cm = &cpi->common; lc->sb_index = 0; + lc->actual_num_seg1_blocks = 0; + lc->actual_num_seg2_blocks = 0; + lc->counter_encode_maxq_scene_change = 0; CHECK_MEM_ERROR(cm, lc->map, vpx_malloc(mi_rows * mi_cols * sizeof(*lc->map))); memset(lc->map, 0, mi_rows * mi_cols); @@ -154,6 +194,8 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, int sl, tl, layer = 0, spatial_layer_target; float bitrate_alloc = 1.0; + cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode; + if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { @@ -290,6 +332,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { LAYER_CONTEXT *const lc = get_layer_context(cpi); const int old_frame_since_key = cpi->rc.frames_since_key; const int old_frame_to_key = cpi->rc.frames_to_key; + const int old_ext_use_post_encode_drop = cpi->rc.ext_use_post_encode_drop; cpi->rc = lc->rc; cpi->twopass = lc->twopass; @@ -303,26 +346,23 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { // Reset the frames_since_key and frames_to_key counters to their values // before the layer restore. Keep these defined for the stream (not layer). if (cpi->svc.number_temporal_layers > 1 || - (cpi->svc.number_spatial_layers > 1 && !is_two_pass_svc(cpi))) { + cpi->svc.number_spatial_layers > 1) { cpi->rc.frames_since_key = old_frame_since_key; cpi->rc.frames_to_key = old_frame_to_key; } - + cpi->rc.ext_use_post_encode_drop = old_ext_use_post_encode_drop; // For spatial-svc, allow cyclic-refresh to be applied on the spatial layers, // for the base temporal layer. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->svc.number_spatial_layers > 1 && cpi->svc.temporal_layer_id == 0) { CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - signed char *temp = cr->map; - uint8_t *temp2 = cr->last_coded_q_map; - uint8_t *temp3 = cpi->consec_zero_mv; - cr->map = lc->map; - lc->map = temp; - cr->last_coded_q_map = lc->last_coded_q_map; - lc->last_coded_q_map = temp2; - cpi->consec_zero_mv = lc->consec_zero_mv; - lc->consec_zero_mv = temp3; + swap_ptr(&cr->map, &lc->map); + swap_ptr(&cr->last_coded_q_map, &lc->last_coded_q_map); + swap_ptr(&cpi->consec_zero_mv, &lc->consec_zero_mv); cr->sb_index = lc->sb_index; + cr->actual_num_seg1_blocks = lc->actual_num_seg1_blocks; + cr->actual_num_seg2_blocks = lc->actual_num_seg2_blocks; + cr->counter_encode_maxq_scene_change = lc->counter_encode_maxq_scene_change; } } @@ -350,6 +390,9 @@ void vp9_save_layer_context(VP9_COMP *const cpi) { lc->consec_zero_mv = cpi->consec_zero_mv; cpi->consec_zero_mv = temp3; lc->sb_index = cr->sb_index; + lc->actual_num_seg1_blocks = cr->actual_num_seg1_blocks; + lc->actual_num_seg2_blocks = cr->actual_num_seg2_blocks; + lc->counter_encode_maxq_scene_change = cr->counter_encode_maxq_scene_change; } } @@ -381,15 +424,6 @@ void vp9_inc_frame_in_layer(VP9_COMP *const cpi) { ++cpi->svc.current_superframe; } -int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) { - return is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0 && - cpi->svc - .layer_context[cpi->svc.spatial_layer_id * - cpi->svc.number_temporal_layers + - cpi->svc.temporal_layer_id] - .is_key_frame; -} - void get_layer_resolution(const int width_org, const int height_org, const int num, const int den, int *width_out, int *height_out) { @@ -408,6 +442,51 @@ void get_layer_resolution(const int width_org, const int height_org, *height_out = h; } +static void reset_fb_idx_unused(VP9_COMP *const cpi) { + // If a reference frame is not referenced or refreshed, then set the + // fb_idx for that reference to the first one used/referenced. + // This is to avoid setting fb_idx for a reference to a slot that is not + // used/needed (i.e., since that reference is not referenced or refreshed). + static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, + VP9_ALT_FLAG }; + MV_REFERENCE_FRAME ref_frame; + MV_REFERENCE_FRAME first_ref = 0; + int first_fb_idx = 0; + int fb_idx[3] = { cpi->lst_fb_idx, cpi->gld_fb_idx, cpi->alt_fb_idx }; + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { + if (cpi->ref_frame_flags & flag_list[ref_frame]) { + first_ref = ref_frame; + first_fb_idx = fb_idx[ref_frame - 1]; + break; + } + } + if (first_ref > 0) { + if (first_ref != LAST_FRAME && + !(cpi->ref_frame_flags & flag_list[LAST_FRAME]) && + !cpi->ext_refresh_last_frame) + cpi->lst_fb_idx = first_fb_idx; + else if (first_ref != GOLDEN_FRAME && + !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) && + !cpi->ext_refresh_golden_frame) + cpi->gld_fb_idx = first_fb_idx; + else if (first_ref != ALTREF_FRAME && + !(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]) && + !cpi->ext_refresh_alt_ref_frame) + cpi->alt_fb_idx = first_fb_idx; + } +} + +// Never refresh any reference frame buffers on top temporal layers in +// simulcast mode, which has interlayer prediction disabled. +static void non_reference_frame_simulcast(VP9_COMP *const cpi) { + if (cpi->svc.temporal_layer_id == cpi->svc.number_temporal_layers - 1 && + cpi->svc.temporal_layer_id > 0) { + cpi->ext_refresh_last_frame = 0; + cpi->ext_refresh_golden_frame = 0; + cpi->ext_refresh_alt_ref_frame = 0; + } +} + // The function sets proper ref_frame_flags, buffer indices, and buffer update // variables for temporal layering mode 3 - that does 0-2-1-2 temporal layering // scheme. @@ -511,6 +590,10 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) { cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; } + + if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi); + + reset_fb_idx_unused(cpi); } // The function sets proper ref_frame_flags, buffer indices, and buffer update @@ -546,6 +629,8 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) { if (!spatial_id) { cpi->ref_frame_flags = VP9_LAST_FLAG; } else { + if (spatial_id == cpi->svc.number_spatial_layers - 1) + cpi->ext_refresh_alt_ref_frame = 0; cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } } @@ -568,6 +653,10 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) { cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; } + + if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi); + + reset_fb_idx_unused(cpi); } // The function sets proper ref_frame_flags, buffer indices, and buffer update @@ -600,54 +689,200 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering( } else { cpi->gld_fb_idx = 0; } + + if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi); + + reset_fb_idx_unused(cpi); +} + +static void set_flags_and_fb_idx_bypass_via_set_ref_frame_config( + VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; + int sl = svc->spatial_layer_id = svc->spatial_layer_to_encode; + cpi->svc.temporal_layer_id = cpi->svc.temporal_layer_id_per_spatial[sl]; + cpi->ext_refresh_frame_flags_pending = 1; + cpi->lst_fb_idx = svc->lst_fb_idx[sl]; + cpi->gld_fb_idx = svc->gld_fb_idx[sl]; + cpi->alt_fb_idx = svc->alt_fb_idx[sl]; + cpi->ext_refresh_last_frame = 0; + cpi->ext_refresh_golden_frame = 0; + cpi->ext_refresh_alt_ref_frame = 0; + cpi->ref_frame_flags = 0; + if (svc->reference_last[sl]) cpi->ref_frame_flags |= VP9_LAST_FLAG; + if (svc->reference_golden[sl]) cpi->ref_frame_flags |= VP9_GOLD_FLAG; + if (svc->reference_altref[sl]) cpi->ref_frame_flags |= VP9_ALT_FLAG; +} + +void vp9_copy_flags_ref_update_idx(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; + static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, + VP9_ALT_FLAG }; + int sl = svc->spatial_layer_id; + svc->lst_fb_idx[sl] = cpi->lst_fb_idx; + svc->gld_fb_idx[sl] = cpi->gld_fb_idx; + svc->alt_fb_idx[sl] = cpi->alt_fb_idx; + // For the fixed SVC mode: pass the refresh_lst/gld/alt_frame flags to the + // update_buffer_slot, this is needed for the GET_SVC_REF_FRAME_CONFIG api. + if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + int ref; + for (ref = 0; ref < REF_FRAMES; ++ref) { + svc->update_buffer_slot[sl] &= ~(1 << ref); + if ((ref == svc->lst_fb_idx[sl] && cpi->refresh_last_frame) || + (ref == svc->gld_fb_idx[sl] && cpi->refresh_golden_frame) || + (ref == svc->alt_fb_idx[sl] && cpi->refresh_alt_ref_frame)) + svc->update_buffer_slot[sl] |= (1 << ref); + } + } + + // TODO(jianj): Remove these 3, deprecated. + svc->update_last[sl] = (uint8_t)cpi->refresh_last_frame; + svc->update_golden[sl] = (uint8_t)cpi->refresh_golden_frame; + svc->update_altref[sl] = (uint8_t)cpi->refresh_alt_ref_frame; + + svc->reference_last[sl] = + (uint8_t)(cpi->ref_frame_flags & flag_list[LAST_FRAME]); + svc->reference_golden[sl] = + (uint8_t)(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]); + svc->reference_altref[sl] = + (uint8_t)(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]); } int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { int width = 0, height = 0; + SVC *const svc = &cpi->svc; LAYER_CONTEXT *lc = NULL; - if (cpi->svc.number_spatial_layers > 1) cpi->svc.use_base_mv = 1; - cpi->svc.force_zero_mode_spatial_ref = 1; - cpi->svc.mi_stride[cpi->svc.spatial_layer_id] = cpi->common.mi_stride; + svc->skip_enhancement_layer = 0; - if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { - set_flags_and_fb_idx_for_temporal_mode3(cpi); - } else if (cpi->svc.temporal_layering_mode == - VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { - set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi); - } else if (cpi->svc.temporal_layering_mode == - VP9E_TEMPORAL_LAYERING_MODE_0101) { - set_flags_and_fb_idx_for_temporal_mode2(cpi); - } else if (cpi->svc.temporal_layering_mode == - VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { - // In the BYPASS/flexible mode, the encoder is relying on the application - // to specify, for each spatial layer, the flags and buffer indices for the - // layering. - // Note that the check (cpi->ext_refresh_frame_flags_pending == 0) is - // needed to support the case where the frame flags may be passed in via - // vpx_codec_encode(), which can be used for the temporal-only svc case. - // TODO(marpan): Consider adding an enc_config parameter to better handle - // this case. - if (cpi->ext_refresh_frame_flags_pending == 0) { - int sl; - cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; - sl = cpi->svc.spatial_layer_id; - vp9_apply_encoding_flags(cpi, cpi->svc.ext_frame_flags[sl]); - cpi->lst_fb_idx = cpi->svc.ext_lst_fb_idx[sl]; - cpi->gld_fb_idx = cpi->svc.ext_gld_fb_idx[sl]; - cpi->alt_fb_idx = cpi->svc.ext_alt_fb_idx[sl]; + if (svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF && + svc->number_spatial_layers > 1 && svc->number_spatial_layers <= 3 && + svc->number_temporal_layers <= 3 && + !(svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + svc->use_set_ref_frame_config)) + svc->simulcast_mode = 1; + else + svc->simulcast_mode = 0; + + if (svc->number_spatial_layers > 1) { + svc->use_base_mv = 1; + svc->use_partition_reuse = 1; + } + svc->force_zero_mode_spatial_ref = 1; + svc->mi_stride[svc->spatial_layer_id] = cpi->common.mi_stride; + svc->mi_rows[svc->spatial_layer_id] = cpi->common.mi_rows; + svc->mi_cols[svc->spatial_layer_id] = cpi->common.mi_cols; + + // For constrained_from_above drop mode: before encoding superframe (i.e., + // at SL0 frame) check all spatial layers (starting from top) for possible + // drop, and if so, set a flag to force drop of that layer and all its lower + // layers. + if (svc->spatial_layer_to_encode == svc->first_spatial_layer_to_encode) { + int sl; + for (sl = 0; sl < svc->number_spatial_layers; sl++) + svc->force_drop_constrained_from_above[sl] = 0; + if (svc->framedrop_mode == CONSTRAINED_FROM_ABOVE_DROP) { + for (sl = svc->number_spatial_layers - 1; + sl >= svc->first_spatial_layer_to_encode; sl--) { + int layer = sl * svc->number_temporal_layers + svc->temporal_layer_id; + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + cpi->rc = lc->rc; + cpi->oxcf.target_bandwidth = lc->target_bandwidth; + if (vp9_test_drop(cpi)) { + int sl2; + // Set flag to force drop in encoding for this mode. + for (sl2 = sl; sl2 >= svc->first_spatial_layer_to_encode; sl2--) + svc->force_drop_constrained_from_above[sl2] = 1; + break; + } + } } } - if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode) - cpi->svc.rc_drop_superframe = 0; + if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { + set_flags_and_fb_idx_for_temporal_mode3(cpi); + } else if (svc->temporal_layering_mode == + VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { + set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi); + } else if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0101) { + set_flags_and_fb_idx_for_temporal_mode2(cpi); + } else if (svc->temporal_layering_mode == + VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + svc->use_set_ref_frame_config) { + set_flags_and_fb_idx_bypass_via_set_ref_frame_config(cpi); + } - lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id * - cpi->svc.number_temporal_layers + - cpi->svc.temporal_layer_id]; + if (cpi->lst_fb_idx == svc->buffer_gf_temporal_ref[0].idx || + cpi->gld_fb_idx == svc->buffer_gf_temporal_ref[0].idx || + cpi->alt_fb_idx == svc->buffer_gf_temporal_ref[0].idx) + svc->buffer_gf_temporal_ref[0].is_used = 1; + if (cpi->lst_fb_idx == svc->buffer_gf_temporal_ref[1].idx || + cpi->gld_fb_idx == svc->buffer_gf_temporal_ref[1].idx || + cpi->alt_fb_idx == svc->buffer_gf_temporal_ref[1].idx) + svc->buffer_gf_temporal_ref[1].is_used = 1; + + // For the fixed (non-flexible/bypass) SVC mode: + // If long term temporal reference is enabled at the sequence level + // (use_gf_temporal_ref == 1), and inter_layer is disabled (on inter-frames), + // we can use golden as a second temporal reference + // (since the spatial/inter-layer reference is disabled). + // We check that the fb_idx for this reference (buffer_gf_temporal_ref.idx) is + // unused (slot 7 and 6 should be available for 3-3 layer system). + // For now usage of this second temporal reference will only be used for + // highest and next to highest spatial layer (i.e., top and middle layer for + // 3 spatial layers). + svc->use_gf_temporal_ref_current_layer = 0; + if (svc->use_gf_temporal_ref && !svc->buffer_gf_temporal_ref[0].is_used && + !svc->buffer_gf_temporal_ref[1].is_used && + svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + svc->disable_inter_layer_pred != INTER_LAYER_PRED_ON && + svc->number_spatial_layers <= 3 && svc->number_temporal_layers <= 3 && + svc->spatial_layer_id >= svc->number_spatial_layers - 2) { + // Enable the second (long-term) temporal reference at the frame-level. + svc->use_gf_temporal_ref_current_layer = 1; + } + + // Check if current superframe has any layer sync, only check once on + // base layer. + if (svc->spatial_layer_id == 0) { + int sl = 0; + // Default is no sync. + svc->superframe_has_layer_sync = 0; + for (sl = 0; sl < svc->number_spatial_layers; ++sl) { + if (cpi->svc.spatial_layer_sync[sl]) svc->superframe_has_layer_sync = 1; + } + } + + // Reset the drop flags for all spatial layers, on the base layer. + if (svc->spatial_layer_id == 0) { + vp9_zero(svc->drop_spatial_layer); + // TODO(jianj/marpan): Investigate why setting svc->lst/gld/alt_fb_idx + // causes an issue with frame dropping and temporal layers, when the frame + // flags are passed via the encode call (bypass mode). Issue is that we're + // resetting ext_refresh_frame_flags_pending to 0 on frame drops. + if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + memset(&svc->lst_fb_idx, -1, sizeof(svc->lst_fb_idx)); + memset(&svc->gld_fb_idx, -1, sizeof(svc->lst_fb_idx)); + memset(&svc->alt_fb_idx, -1, sizeof(svc->lst_fb_idx)); + // These are set by API before the superframe is encoded and they are + // passed to encoder layer by layer. Don't reset them on layer 0 in bypass + // mode. + vp9_zero(svc->update_buffer_slot); + vp9_zero(svc->reference_last); + vp9_zero(svc->reference_golden); + vp9_zero(svc->reference_altref); + // TODO(jianj): Remove these 3, deprecated. + vp9_zero(svc->update_last); + vp9_zero(svc->update_golden); + vp9_zero(svc->update_altref); + } + } + + lc = &svc->layer_context[svc->spatial_layer_id * svc->number_temporal_layers + + svc->temporal_layer_id]; // Setting the worst/best_quality via the encoder control: SET_SVC_PARAMETERS, // only for non-BYPASS mode for now. - if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS || + svc->use_set_ref_frame_config) { RATE_CONTROL *const lrc = &lc->rc; lrc->worst_quality = vp9_quantizer_to_qindex(lc->max_q); lrc->best_quality = vp9_quantizer_to_qindex(lc->min_q); @@ -657,35 +892,68 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { lc->scaling_factor_num, lc->scaling_factor_den, &width, &height); - // For resolutions <= VGA: set phase of the filter = 8 (for symmetric - // averaging filter), use bilinear for now. - if (width * height <= 640 * 480) { - cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] = BILINEAR; - cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id] = 8; - } + // Use Eightap_smooth for low resolutions. + if (width * height <= 320 * 240) + svc->downsample_filter_type[svc->spatial_layer_id] = EIGHTTAP_SMOOTH; + // For scale factors > 0.75, set the phase to 0 (aligns decimated pixel + // to source pixel). + lc = &svc->layer_context[svc->spatial_layer_id * svc->number_temporal_layers + + svc->temporal_layer_id]; + if (lc->scaling_factor_num > (3 * lc->scaling_factor_den) >> 2) + svc->downsample_filter_phase[svc->spatial_layer_id] = 0; - // The usage of use_base_mv assumes down-scale of 2x2. For now, turn off use - // of base motion vectors if spatial scale factors for any layers are not 2, + // The usage of use_base_mv or partition_reuse assumes down-scale of 2x2. + // For now, turn off use of base motion vectors and partition reuse if the + // spatial scale factors for any layers are not 2, // keep the case of 3 spatial layers with scale factor of 4x4 for base layer. // TODO(marpan): Fix this to allow for use_base_mv for scale factors != 2. - if (cpi->svc.number_spatial_layers > 1) { + if (svc->number_spatial_layers > 1) { int sl; - for (sl = 0; sl < cpi->svc.number_spatial_layers - 1; ++sl) { - lc = &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers + - cpi->svc.temporal_layer_id]; + for (sl = 0; sl < svc->number_spatial_layers - 1; ++sl) { + lc = &svc->layer_context[sl * svc->number_temporal_layers + + svc->temporal_layer_id]; if ((lc->scaling_factor_num != lc->scaling_factor_den >> 1) && !(lc->scaling_factor_num == lc->scaling_factor_den >> 2 && sl == 0 && - cpi->svc.number_spatial_layers == 3)) { - cpi->svc.use_base_mv = 0; + svc->number_spatial_layers == 3)) { + svc->use_base_mv = 0; + svc->use_partition_reuse = 0; break; } } + // For non-zero spatial layers: if the previous spatial layer was dropped + // disable the base_mv and partition_reuse features. + if (svc->spatial_layer_id > 0 && + svc->drop_spatial_layer[svc->spatial_layer_id - 1]) { + svc->use_base_mv = 0; + svc->use_partition_reuse = 0; + } } - cpi->svc.non_reference_frame = 0; + svc->non_reference_frame = 0; if (cpi->common.frame_type != KEY_FRAME && !cpi->ext_refresh_last_frame && - !cpi->ext_refresh_golden_frame && !cpi->ext_refresh_alt_ref_frame) { - cpi->svc.non_reference_frame = 1; + !cpi->ext_refresh_golden_frame && !cpi->ext_refresh_alt_ref_frame) + svc->non_reference_frame = 1; + // For non-flexible mode, where update_buffer_slot is used, need to check if + // all buffer slots are not refreshed. + if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + if (svc->update_buffer_slot[svc->spatial_layer_id] != 0) + svc->non_reference_frame = 0; + } + + if (svc->spatial_layer_id == 0) { + svc->high_source_sad_superframe = 0; + svc->high_num_blocks_with_motion = 0; + } + + if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + svc->last_layer_dropped[svc->spatial_layer_id] && + svc->fb_idx_upd_tl0[svc->spatial_layer_id] != -1 && + !svc->layer_context[svc->temporal_layer_id].is_key_frame) { + // For fixed/non-flexible mode, if the previous frame (same spatial layer + // from previous superframe) was dropped, make sure the lst_fb_idx + // for this frame corresponds to the buffer index updated on (last) encoded + // TL0 frame (with same spatial layer). + cpi->lst_fb_idx = svc->fb_idx_upd_tl0[svc->spatial_layer_id]; } if (vp9_set_size_literal(cpi, width, height) != 0) @@ -694,120 +962,6 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { return 0; } -#if CONFIG_SPATIAL_SVC -#define SMALL_FRAME_FB_IDX 7 - -int vp9_svc_start_frame(VP9_COMP *const cpi) { - int width = 0, height = 0; - LAYER_CONTEXT *lc; - struct lookahead_entry *buf; - int count = 1 << (cpi->svc.number_temporal_layers - 1); - - cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; - lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; - - cpi->svc.temporal_layer_id = 0; - while ((lc->current_video_frame_in_layer % count) != 0) { - ++cpi->svc.temporal_layer_id; - count >>= 1; - } - - cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; - - cpi->lst_fb_idx = cpi->svc.spatial_layer_id; - - if (cpi->svc.spatial_layer_id == 0) - cpi->gld_fb_idx = - (lc->gold_ref_idx >= 0) ? lc->gold_ref_idx : cpi->lst_fb_idx; - else - cpi->gld_fb_idx = cpi->svc.spatial_layer_id - 1; - - if (lc->current_video_frame_in_layer == 0) { - if (cpi->svc.spatial_layer_id >= 2) { - cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2; - } else { - cpi->alt_fb_idx = cpi->lst_fb_idx; - cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_ALT_FLAG); - } - } else { - if (cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id]) { - cpi->alt_fb_idx = lc->alt_ref_idx; - if (!lc->has_alt_frame) cpi->ref_frame_flags &= (~VP9_ALT_FLAG); - } else { - // Find a proper alt_fb_idx for layers that don't have alt ref frame - if (cpi->svc.spatial_layer_id == 0) { - cpi->alt_fb_idx = cpi->lst_fb_idx; - } else { - LAYER_CONTEXT *lc_lower = - &cpi->svc.layer_context[cpi->svc.spatial_layer_id - 1]; - - if (cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id - 1] && - lc_lower->alt_ref_source != NULL) - cpi->alt_fb_idx = lc_lower->alt_ref_idx; - else if (cpi->svc.spatial_layer_id >= 2) - cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2; - else - cpi->alt_fb_idx = cpi->lst_fb_idx; - } - } - } - - get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height, - lc->scaling_factor_num, lc->scaling_factor_den, &width, - &height); - - // Workaround for multiple frame contexts. In some frames we can't use prev_mi - // since its previous frame could be changed during decoding time. The idea is - // we put a empty invisible frame in front of them, then we will not use - // prev_mi when encoding these frames. - - buf = vp9_lookahead_peek(cpi->lookahead, 0); - if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2 && - cpi->svc.encode_empty_frame_state == NEED_TO_ENCODE && - lc->rc.frames_to_key != 0 && - !(buf != NULL && (buf->flags & VPX_EFLAG_FORCE_KF))) { - if ((cpi->svc.number_temporal_layers > 1 && - cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1) || - (cpi->svc.number_spatial_layers > 1 && - cpi->svc.spatial_layer_id == 0)) { - struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead, 0); - - if (buf != NULL) { - cpi->svc.empty_frame.ts_start = buf->ts_start; - cpi->svc.empty_frame.ts_end = buf->ts_end; - cpi->svc.encode_empty_frame_state = ENCODING; - cpi->common.show_frame = 0; - cpi->ref_frame_flags = 0; - cpi->common.frame_type = INTER_FRAME; - cpi->lst_fb_idx = cpi->gld_fb_idx = cpi->alt_fb_idx = - SMALL_FRAME_FB_IDX; - - if (cpi->svc.encode_intra_empty_frame != 0) cpi->common.intra_only = 1; - - width = SMALL_FRAME_WIDTH; - height = SMALL_FRAME_HEIGHT; - } - } - } - - cpi->oxcf.worst_allowed_q = vp9_quantizer_to_qindex(lc->max_q); - cpi->oxcf.best_allowed_q = vp9_quantizer_to_qindex(lc->min_q); - - vp9_change_config(cpi, &cpi->oxcf); - - if (vp9_set_size_literal(cpi, width, height) != 0) - return VPX_CODEC_INVALID_PARAM; - - vp9_set_high_precision_mv(cpi, 1); - - cpi->alt_ref_source = get_layer_context(cpi)->alt_ref_source; - - return 0; -} - -#undef SMALL_FRAME_FB_IDX -#endif // CONFIG_SPATIAL_SVC - struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi, struct lookahead_ctx *ctx, int drain) { @@ -840,7 +994,7 @@ void vp9_free_svc_cyclic_refresh(VP9_COMP *const cpi) { } // Reset on key frame: reset counters, references and buffer updates. -void vp9_svc_reset_key_frame(VP9_COMP *const cpi) { +void vp9_svc_reset_temporal_layers(VP9_COMP *const cpi, int is_key) { int sl, tl; SVC *const svc = &cpi->svc; LAYER_CONTEXT *lc = NULL; @@ -848,7 +1002,7 @@ void vp9_svc_reset_key_frame(VP9_COMP *const cpi) { for (tl = 0; tl < svc->number_temporal_layers; ++tl) { lc = &cpi->svc.layer_context[sl * svc->number_temporal_layers + tl]; lc->current_video_frame_in_layer = 0; - lc->frames_from_key_frame = 0; + if (is_key) lc->frames_from_key_frame = 0; } } if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { @@ -887,3 +1041,276 @@ void vp9_svc_check_reset_layer_rc_flag(VP9_COMP *const cpi) { } } } + +void vp9_svc_constrain_inter_layer_pred(VP9_COMP *const cpi) { + VP9_COMMON *const cm = &cpi->common; + SVC *const svc = &cpi->svc; + const int sl = svc->spatial_layer_id; + // Check for disabling inter-layer (spatial) prediction, if + // svc.disable_inter_layer_pred is set. If the previous spatial layer was + // dropped then disable the prediction from this (scaled) reference. + // For INTER_LAYER_PRED_OFF_NONKEY: inter-layer prediction is disabled + // on key frames or if any spatial layer is a sync layer. + if ((svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF_NONKEY && + !svc->layer_context[svc->temporal_layer_id].is_key_frame && + !svc->superframe_has_layer_sync) || + svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF || + svc->drop_spatial_layer[sl - 1]) { + MV_REFERENCE_FRAME ref_frame; + static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, + VP9_ALT_FLAG }; + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); + if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) { + const struct scale_factors *const scale_fac = + &cm->frame_refs[ref_frame - 1].sf; + if (vp9_is_scaled(scale_fac)) { + cpi->ref_frame_flags &= (~flag_list[ref_frame]); + // Point golden/altref frame buffer index to last. + if (!svc->simulcast_mode) { + if (ref_frame == GOLDEN_FRAME) + cpi->gld_fb_idx = cpi->lst_fb_idx; + else if (ref_frame == ALTREF_FRAME) + cpi->alt_fb_idx = cpi->lst_fb_idx; + } + } + } + } + } + // For fixed/non-flexible SVC: check for disabling inter-layer prediction. + // If the reference for inter-layer prediction (the reference that is scaled) + // is not the previous spatial layer from the same superframe, then we disable + // inter-layer prediction. Only need to check when inter_layer prediction is + // not set to OFF mode. + if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + svc->disable_inter_layer_pred != INTER_LAYER_PRED_OFF) { + // We only use LAST and GOLDEN for prediction in real-time mode, so we + // check both here. + MV_REFERENCE_FRAME ref_frame; + for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ref_frame++) { + struct scale_factors *scale_fac = &cm->frame_refs[ref_frame - 1].sf; + if (vp9_is_scaled(scale_fac)) { + // If this reference was updated on the previous spatial layer of the + // current superframe, then we keep this reference (don't disable). + // Otherwise we disable the inter-layer prediction. + // This condition is verified by checking if the current frame buffer + // index is equal to any of the slots for the previous spatial layer, + // and if so, check if that slot was updated/refreshed. If that is the + // case, then this reference is valid for inter-layer prediction under + // the mode INTER_LAYER_PRED_ON_CONSTRAINED. + int fb_idx = + ref_frame == LAST_FRAME ? cpi->lst_fb_idx : cpi->gld_fb_idx; + int ref_flag = ref_frame == LAST_FRAME ? VP9_LAST_FLAG : VP9_GOLD_FLAG; + int disable = 1; + if (fb_idx < 0) continue; + if ((fb_idx == svc->lst_fb_idx[sl - 1] && + (svc->update_buffer_slot[sl - 1] & (1 << fb_idx))) || + (fb_idx == svc->gld_fb_idx[sl - 1] && + (svc->update_buffer_slot[sl - 1] & (1 << fb_idx))) || + (fb_idx == svc->alt_fb_idx[sl - 1] && + (svc->update_buffer_slot[sl - 1] & (1 << fb_idx)))) + disable = 0; + if (disable) cpi->ref_frame_flags &= (~ref_flag); + } + } + } +} + +void vp9_svc_assert_constraints_pattern(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; + // For fixed/non-flexible mode, the following constraint are expected, + // when inter-layer prediciton is on (default). + if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + svc->disable_inter_layer_pred == INTER_LAYER_PRED_ON && + svc->framedrop_mode != LAYER_DROP) { + if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) { + // On non-key frames: LAST is always temporal reference, GOLDEN is + // spatial reference. + if (svc->temporal_layer_id == 0) + // Base temporal only predicts from base temporal. + assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] == 0); + else + // Non-base temporal only predicts from lower temporal layer. + assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] < + svc->temporal_layer_id); + if (svc->spatial_layer_id > 0 && cpi->ref_frame_flags & VP9_GOLD_FLAG && + svc->spatial_layer_id > svc->first_spatial_layer_to_encode) { + // Non-base spatial only predicts from lower spatial layer with same + // temporal_id. + assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] == + svc->spatial_layer_id - 1); + assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == + svc->temporal_layer_id); + } + } else if (svc->spatial_layer_id > 0 && + svc->spatial_layer_id > svc->first_spatial_layer_to_encode) { + // Only 1 reference for frame whose base is key; reference may be LAST + // or GOLDEN, so we check both. + if (cpi->ref_frame_flags & VP9_LAST_FLAG) { + assert(svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] == + svc->spatial_layer_id - 1); + assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] == + svc->temporal_layer_id); + } else if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { + assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] == + svc->spatial_layer_id - 1); + assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == + svc->temporal_layer_id); + } + } + } else if (svc->use_gf_temporal_ref_current_layer && + !svc->layer_context[svc->temporal_layer_id].is_key_frame) { + // For the usage of golden as second long term reference: the + // temporal_layer_id of that reference must be base temporal layer 0, and + // spatial_layer_id of that reference must be same as current + // spatial_layer_id. If not, disable feature. + // TODO(marpan): Investigate when this can happen, and maybe put this check + // and reset in a different place. + if (svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] != + svc->spatial_layer_id || + svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] != 0) + svc->use_gf_temporal_ref_current_layer = 0; + } +} + +#if CONFIG_VP9_TEMPORAL_DENOISING +int vp9_denoise_svc_non_key(VP9_COMP *const cpi) { + int layer = + LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, cpi->svc.temporal_layer_id, + cpi->svc.number_temporal_layers); + LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; + return denoise_svc(cpi) && !lc->is_key_frame; +} +#endif + +void vp9_svc_check_spatial_layer_sync(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; + // Only for superframes whose base is not key, as those are + // already sync frames. + if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) { + if (svc->spatial_layer_id == 0) { + // On base spatial layer: if the current superframe has a layer sync then + // reset the pattern counters and reset to base temporal layer. + if (svc->superframe_has_layer_sync) + vp9_svc_reset_temporal_layers(cpi, cpi->common.frame_type == KEY_FRAME); + } + // If the layer sync is set for this current spatial layer then + // disable the temporal reference. + if (svc->spatial_layer_id > 0 && + svc->spatial_layer_sync[svc->spatial_layer_id]) { + cpi->ref_frame_flags &= (~VP9_LAST_FLAG); + if (svc->use_gf_temporal_ref_current_layer) { + int index = svc->spatial_layer_id; + // If golden is used as second reference: need to remove it from + // prediction, reset refresh period to 0, and update the reference. + svc->use_gf_temporal_ref_current_layer = 0; + cpi->rc.baseline_gf_interval = 0; + cpi->rc.frames_till_gf_update_due = 0; + // On layer sync frame we must update the buffer index used for long + // term reference. Use the alt_ref since it is not used or updated on + // sync frames. + if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1; + assert(index >= 0); + cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx; + cpi->ext_refresh_alt_ref_frame = 1; + } + } + } +} + +void vp9_svc_update_ref_frame_buffer_idx(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; + // Update the usage of frame buffer index for base spatial layers. + if (svc->spatial_layer_id == 0) { + if ((cpi->ref_frame_flags & VP9_LAST_FLAG) || cpi->refresh_last_frame) + svc->fb_idx_base[cpi->lst_fb_idx] = 1; + if ((cpi->ref_frame_flags & VP9_GOLD_FLAG) || cpi->refresh_golden_frame) + svc->fb_idx_base[cpi->gld_fb_idx] = 1; + if ((cpi->ref_frame_flags & VP9_ALT_FLAG) || cpi->refresh_alt_ref_frame) + svc->fb_idx_base[cpi->alt_fb_idx] = 1; + } +} + +static void vp9_svc_update_ref_frame_bypass_mode(VP9_COMP *const cpi) { + // For non-flexible/bypass SVC mode: check for refreshing other buffer + // slots. + SVC *const svc = &cpi->svc; + VP9_COMMON *const cm = &cpi->common; + BufferPool *const pool = cm->buffer_pool; + int i; + for (i = 0; i < REF_FRAMES; i++) { + if (cm->frame_type == KEY_FRAME || + svc->update_buffer_slot[svc->spatial_layer_id] & (1 << i)) { + ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx); + svc->fb_idx_spatial_layer_id[i] = svc->spatial_layer_id; + svc->fb_idx_temporal_layer_id[i] = svc->temporal_layer_id; + } + } +} + +void vp9_svc_update_ref_frame(VP9_COMP *const cpi) { + VP9_COMMON *const cm = &cpi->common; + SVC *const svc = &cpi->svc; + BufferPool *const pool = cm->buffer_pool; + + if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + svc->use_set_ref_frame_config) { + vp9_svc_update_ref_frame_bypass_mode(cpi); + } else if (cm->frame_type == KEY_FRAME && !svc->simulcast_mode) { + // Keep track of frame index for each reference frame. + int i; + // On key frame update all reference frame slots. + for (i = 0; i < REF_FRAMES; i++) { + svc->fb_idx_spatial_layer_id[i] = svc->spatial_layer_id; + svc->fb_idx_temporal_layer_id[i] = svc->temporal_layer_id; + // LAST/GOLDEN/ALTREF is already updated above. + if (i != cpi->lst_fb_idx && i != cpi->gld_fb_idx && i != cpi->alt_fb_idx) + ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx); + } + } else { + if (cpi->refresh_last_frame) { + svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] = svc->spatial_layer_id; + svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] = svc->temporal_layer_id; + } + if (cpi->refresh_golden_frame) { + svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] = svc->spatial_layer_id; + svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] = svc->temporal_layer_id; + } + if (cpi->refresh_alt_ref_frame) { + svc->fb_idx_spatial_layer_id[cpi->alt_fb_idx] = svc->spatial_layer_id; + svc->fb_idx_temporal_layer_id[cpi->alt_fb_idx] = svc->temporal_layer_id; + } + } + // Copy flags from encoder to SVC struct. + vp9_copy_flags_ref_update_idx(cpi); + vp9_svc_update_ref_frame_buffer_idx(cpi); +} + +void vp9_svc_adjust_frame_rate(VP9_COMP *const cpi) { + int64_t this_duration = + cpi->svc.timebase_fac * cpi->svc.duration[cpi->svc.spatial_layer_id]; + vp9_new_framerate(cpi, 10000000.0 / this_duration); +} + +void vp9_svc_adjust_avg_frame_qindex(VP9_COMP *const cpi) { + VP9_COMMON *const cm = &cpi->common; + SVC *const svc = &cpi->svc; + RATE_CONTROL *const rc = &cpi->rc; + // On key frames in CBR mode: reset the avg_frame_index for base layer + // (to level closer to worst_quality) if the overshoot is significant. + // Reset it for all temporal layers on base spatial layer. + if (cm->frame_type == KEY_FRAME && cpi->oxcf.rc_mode == VPX_CBR && + !svc->simulcast_mode && + rc->projected_frame_size > 3 * rc->avg_frame_bandwidth) { + int tl; + rc->avg_frame_qindex[INTER_FRAME] = + VPXMAX(rc->avg_frame_qindex[INTER_FRAME], + (cm->base_qindex + rc->worst_quality) >> 1); + for (tl = 0; tl < svc->number_temporal_layers; ++tl) { + const int layer = LAYER_IDS_TO_IDX(0, tl, svc->number_temporal_layers); + LAYER_CONTEXT *lc = &svc->layer_context[layer]; + RATE_CONTROL *lrc = &lc->rc; + lrc->avg_frame_qindex[INTER_FRAME] = rc->avg_frame_qindex[INTER_FRAME]; + } + } +} diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_svc_layercontext.h b/media/libvpx/libvpx/vp9/encoder/vp9_svc_layercontext.h index b7cdfd9623d1..f1ba779706aa 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_svc_layercontext.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_svc_layercontext.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_ -#define VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_ +#ifndef VPX_VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_ +#define VPX_VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_ #include "vpx/vpx_encoder.h" @@ -19,6 +19,24 @@ extern "C" { #endif +typedef enum { + // Inter-layer prediction is on on all frames. + INTER_LAYER_PRED_ON, + // Inter-layer prediction is off on all frames. + INTER_LAYER_PRED_OFF, + // Inter-layer prediction is off on non-key frames and non-sync frames. + INTER_LAYER_PRED_OFF_NONKEY, + // Inter-layer prediction is on on all frames, but constrained such + // that any layer S (> 0) can only predict from previous spatial + // layer S-1, from the same superframe. + INTER_LAYER_PRED_ON_CONSTRAINED +} INTER_LAYER_PRED; + +typedef struct BUFFER_LONGTERM_REF { + int idx; + int is_used; +} BUFFER_LONGTERM_REF; + typedef struct { RATE_CONTROL rc; int target_bandwidth; @@ -40,12 +58,15 @@ typedef struct { int gold_ref_idx; int has_alt_frame; size_t layer_size; - struct vpx_psnr_pkt psnr_pkt; // Cyclic refresh parameters (aq-mode=3), that need to be updated per-frame. + // TODO(jianj/marpan): Is it better to use the full cyclic refresh struct. int sb_index; signed char *map; uint8_t *last_coded_q_map; uint8_t *consec_zero_mv; + int actual_num_seg1_blocks; + int actual_num_seg2_blocks; + int counter_encode_maxq_scene_change; uint8_t speed; } LAYER_CONTEXT; @@ -56,8 +77,6 @@ typedef struct SVC { int number_temporal_layers; int spatial_layer_to_encode; - int first_spatial_layer_to_encode; - int rc_drop_superframe; // Workaround for multiple frame contexts enum { ENCODED = 0, ENCODING, NEED_TO_ENCODE } encode_empty_frame_state; @@ -81,14 +100,20 @@ typedef struct SVC { // Frame flags and buffer indexes for each spatial layer, set by the // application (external settings). int ext_frame_flags[VPX_MAX_LAYERS]; - int ext_lst_fb_idx[VPX_MAX_LAYERS]; - int ext_gld_fb_idx[VPX_MAX_LAYERS]; - int ext_alt_fb_idx[VPX_MAX_LAYERS]; - int ref_frame_index[REF_FRAMES]; + int lst_fb_idx[VPX_MAX_LAYERS]; + int gld_fb_idx[VPX_MAX_LAYERS]; + int alt_fb_idx[VPX_MAX_LAYERS]; int force_zero_mode_spatial_ref; + // Sequence level flag to enable second (long term) temporal reference. + int use_gf_temporal_ref; + // Frame level flag to enable second (long term) temporal reference. + int use_gf_temporal_ref_current_layer; + // Allow second reference for at most 2 top highest resolution layers. + BUFFER_LONGTERM_REF buffer_gf_temporal_ref[2]; int current_superframe; int non_reference_frame; int use_base_mv; + int use_partition_reuse; // Used to control the downscaling filter for source scaling, for 1 pass CBR. // downsample_filter_phase: = 0 will do sub-sampling (no weighted average), // = 8 will center the target pixel and get a symmetric averaging filter. @@ -99,8 +124,74 @@ typedef struct SVC { BLOCK_SIZE *prev_partition_svc; int mi_stride[VPX_MAX_LAYERS]; + int mi_rows[VPX_MAX_LAYERS]; + int mi_cols[VPX_MAX_LAYERS]; int first_layer_denoise; + + int skip_enhancement_layer; + + int lower_layer_qindex; + + int last_layer_dropped[VPX_MAX_LAYERS]; + int drop_spatial_layer[VPX_MAX_LAYERS]; + int framedrop_thresh[VPX_MAX_LAYERS]; + int drop_count[VPX_MAX_LAYERS]; + int force_drop_constrained_from_above[VPX_MAX_LAYERS]; + int max_consec_drop; + SVC_LAYER_DROP_MODE framedrop_mode; + + INTER_LAYER_PRED disable_inter_layer_pred; + + // Flag to indicate scene change and high num of motion blocks at current + // superframe, scene detection is currently checked for each superframe prior + // to encoding, on the full resolution source. + int high_source_sad_superframe; + int high_num_blocks_with_motion; + + // Flags used to get SVC pattern info. + int update_buffer_slot[VPX_SS_MAX_LAYERS]; + uint8_t reference_last[VPX_SS_MAX_LAYERS]; + uint8_t reference_golden[VPX_SS_MAX_LAYERS]; + uint8_t reference_altref[VPX_SS_MAX_LAYERS]; + // TODO(jianj): Remove these last 3, deprecated. + uint8_t update_last[VPX_SS_MAX_LAYERS]; + uint8_t update_golden[VPX_SS_MAX_LAYERS]; + uint8_t update_altref[VPX_SS_MAX_LAYERS]; + + // Keep track of the frame buffer index updated/refreshed on the base + // temporal superframe. + int fb_idx_upd_tl0[VPX_SS_MAX_LAYERS]; + + // Keep track of the spatial and temporal layer id of the frame that last + // updated the frame buffer index. + uint8_t fb_idx_spatial_layer_id[REF_FRAMES]; + uint8_t fb_idx_temporal_layer_id[REF_FRAMES]; + + int spatial_layer_sync[VPX_SS_MAX_LAYERS]; + uint8_t set_intra_only_frame; + uint8_t previous_frame_is_intra_only; + uint8_t superframe_has_layer_sync; + + uint8_t fb_idx_base[REF_FRAMES]; + + int use_set_ref_frame_config; + + int temporal_layer_id_per_spatial[VPX_SS_MAX_LAYERS]; + + int first_spatial_layer_to_encode; + + // Parameters for allowing framerate per spatial layer, and buffer + // update based on timestamps. + int64_t duration[VPX_SS_MAX_LAYERS]; + int64_t timebase_fac; + int64_t time_stamp_superframe; + int64_t time_stamp_prev[VPX_SS_MAX_LAYERS]; + + int num_encoded_top_layer; + + // Every spatial layer on a superframe whose base is key is key too. + int simulcast_mode; } SVC; struct VP9_COMP; @@ -148,16 +239,37 @@ struct lookahead_entry *vp9_svc_lookahead_pop(struct VP9_COMP *const cpi, // Start a frame and initialize svc parameters int vp9_svc_start_frame(struct VP9_COMP *const cpi); +#if CONFIG_VP9_TEMPORAL_DENOISING +int vp9_denoise_svc_non_key(struct VP9_COMP *const cpi); +#endif + +void vp9_copy_flags_ref_update_idx(struct VP9_COMP *const cpi); + int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi); void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi); -void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi); +void vp9_svc_reset_temporal_layers(struct VP9_COMP *const cpi, int is_key); void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi); +void vp9_svc_constrain_inter_layer_pred(struct VP9_COMP *const cpi); + +void vp9_svc_assert_constraints_pattern(struct VP9_COMP *const cpi); + +void vp9_svc_check_spatial_layer_sync(struct VP9_COMP *const cpi); + +void vp9_svc_update_ref_frame_buffer_idx(struct VP9_COMP *const cpi); + +void vp9_svc_update_ref_frame_key_simulcast(struct VP9_COMP *const cpi); + +void vp9_svc_update_ref_frame(struct VP9_COMP *const cpi); + +void vp9_svc_adjust_frame_rate(struct VP9_COMP *const cpi); + +void vp9_svc_adjust_avg_frame_qindex(struct VP9_COMP *const cpi); #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_ENCODER_VP9_SVC_LAYERCONTEXT_ +#endif // VPX_VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_temporal_filter.c b/media/libvpx/libvpx/vp9/encoder/vp9_temporal_filter.c index 2758c42aebfe..701bb8928714 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_temporal_filter.c +++ b/media/libvpx/libvpx/vp9/encoder/vp9_temporal_filter.c @@ -34,57 +34,155 @@ #include "vpx_scale/vpx_scale.h" static int fixed_divide[512]; +static unsigned int index_mult[14] = { 0, 0, 0, 0, 49152, + 39322, 32768, 28087, 24576, 21846, + 19661, 17874, 0, 15124 }; +#if CONFIG_VP9_HIGHBITDEPTH +static int64_t highbd_index_mult[14] = { 0U, 0U, 0U, + 0U, 3221225472U, 2576980378U, + 2147483648U, 1840700270U, 1610612736U, + 1431655766U, 1288490189U, 1171354718U, + 0U, 991146300U }; +#endif // CONFIG_VP9_HIGHBITDEPTH static void temporal_filter_predictors_mb_c( MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr, int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col, - uint8_t *pred, struct scale_factors *scale, int x, int y) { + uint8_t *pred, struct scale_factors *scale, int x, int y, MV *blk_mvs, + int use_32x32) { const int which_mv = 0; - const MV mv = { mv_row, mv_col }; const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP_SHARP]; + int i, j, k = 0, ys = (BH >> 1), xs = (BW >> 1); enum mv_precision mv_precision_uv; int uv_stride; - if (uv_block_width == 8) { + if (uv_block_width == (BW >> 1)) { uv_stride = (stride + 1) >> 1; mv_precision_uv = MV_PRECISION_Q4; } else { uv_stride = stride; mv_precision_uv = MV_PRECISION_Q3; } +#if !CONFIG_VP9_HIGHBITDEPTH + (void)xd; +#endif + if (use_32x32) { + const MV mv = { mv_row, mv_col }; #if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(y_mb_ptr), stride, - CONVERT_TO_SHORTPTR(&pred[0]), 16, &mv, - scale, 16, 16, which_mv, kernel, - MV_PRECISION_Q3, x, y, xd->bd); + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(y_mb_ptr), stride, + CONVERT_TO_SHORTPTR(&pred[0]), BW, &mv, + scale, BW, BH, which_mv, kernel, + MV_PRECISION_Q3, x, y, xd->bd); - vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(u_mb_ptr), uv_stride, - CONVERT_TO_SHORTPTR(&pred[256]), - uv_block_width, &mv, scale, uv_block_width, - uv_block_height, which_mv, kernel, - mv_precision_uv, x, y, xd->bd); + vp9_highbd_build_inter_predictor( + CONVERT_TO_SHORTPTR(u_mb_ptr), uv_stride, + CONVERT_TO_SHORTPTR(&pred[BLK_PELS]), uv_block_width, &mv, scale, + uv_block_width, uv_block_height, which_mv, kernel, mv_precision_uv, x, + y, xd->bd); - vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(v_mb_ptr), uv_stride, - CONVERT_TO_SHORTPTR(&pred[512]), - uv_block_width, &mv, scale, uv_block_width, - uv_block_height, which_mv, kernel, - mv_precision_uv, x, y, xd->bd); + vp9_highbd_build_inter_predictor( + CONVERT_TO_SHORTPTR(v_mb_ptr), uv_stride, + CONVERT_TO_SHORTPTR(&pred[(BLK_PELS << 1)]), uv_block_width, &mv, + scale, uv_block_width, uv_block_height, which_mv, kernel, + mv_precision_uv, x, y, xd->bd); + return; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + vp9_build_inter_predictor(y_mb_ptr, stride, &pred[0], BW, &mv, scale, BW, + BH, which_mv, kernel, MV_PRECISION_Q3, x, y); + + vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[BLK_PELS], + uv_block_width, &mv, scale, uv_block_width, + uv_block_height, which_mv, kernel, + mv_precision_uv, x, y); + + vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[(BLK_PELS << 1)], + uv_block_width, &mv, scale, uv_block_width, + uv_block_height, which_mv, kernel, + mv_precision_uv, x, y); return; } + + // While use_32x32 = 0, construct the 32x32 predictor using 4 16x16 + // predictors. + // Y predictor + for (i = 0; i < BH; i += ys) { + for (j = 0; j < BW; j += xs) { + const MV mv = blk_mvs[k]; + const int y_offset = i * stride + j; + const int p_offset = i * BW + j; + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_highbd_build_inter_predictor( + CONVERT_TO_SHORTPTR(y_mb_ptr + y_offset), stride, + CONVERT_TO_SHORTPTR(&pred[p_offset]), BW, &mv, scale, xs, ys, + which_mv, kernel, MV_PRECISION_Q3, x, y, xd->bd); + } else { + vp9_build_inter_predictor(y_mb_ptr + y_offset, stride, &pred[p_offset], + BW, &mv, scale, xs, ys, which_mv, kernel, + MV_PRECISION_Q3, x, y); + } +#else + vp9_build_inter_predictor(y_mb_ptr + y_offset, stride, &pred[p_offset], + BW, &mv, scale, xs, ys, which_mv, kernel, + MV_PRECISION_Q3, x, y); #endif // CONFIG_VP9_HIGHBITDEPTH - (void)xd; - vp9_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16, - which_mv, kernel, MV_PRECISION_Q3, x, y); + k++; + } + } - vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_width, - &mv, scale, uv_block_width, uv_block_height, - which_mv, kernel, mv_precision_uv, x, y); + // U and V predictors + ys = (uv_block_height >> 1); + xs = (uv_block_width >> 1); + k = 0; - vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_width, - &mv, scale, uv_block_width, uv_block_height, - which_mv, kernel, mv_precision_uv, x, y); + for (i = 0; i < uv_block_height; i += ys) { + for (j = 0; j < uv_block_width; j += xs) { + const MV mv = blk_mvs[k]; + const int uv_offset = i * uv_stride + j; + const int p_offset = i * uv_block_width + j; + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_highbd_build_inter_predictor( + CONVERT_TO_SHORTPTR(u_mb_ptr + uv_offset), uv_stride, + CONVERT_TO_SHORTPTR(&pred[BLK_PELS + p_offset]), uv_block_width, + &mv, scale, xs, ys, which_mv, kernel, mv_precision_uv, x, y, + xd->bd); + + vp9_highbd_build_inter_predictor( + CONVERT_TO_SHORTPTR(v_mb_ptr + uv_offset), uv_stride, + CONVERT_TO_SHORTPTR(&pred[(BLK_PELS << 1) + p_offset]), + uv_block_width, &mv, scale, xs, ys, which_mv, kernel, + mv_precision_uv, x, y, xd->bd); + } else { + vp9_build_inter_predictor(u_mb_ptr + uv_offset, uv_stride, + &pred[BLK_PELS + p_offset], uv_block_width, + &mv, scale, xs, ys, which_mv, kernel, + mv_precision_uv, x, y); + + vp9_build_inter_predictor(v_mb_ptr + uv_offset, uv_stride, + &pred[(BLK_PELS << 1) + p_offset], + uv_block_width, &mv, scale, xs, ys, which_mv, + kernel, mv_precision_uv, x, y); + } +#else + vp9_build_inter_predictor(u_mb_ptr + uv_offset, uv_stride, + &pred[BLK_PELS + p_offset], uv_block_width, &mv, + scale, xs, ys, which_mv, kernel, + mv_precision_uv, x, y); + + vp9_build_inter_predictor(v_mb_ptr + uv_offset, uv_stride, + &pred[(BLK_PELS << 1) + p_offset], + uv_block_width, &mv, scale, xs, ys, which_mv, + kernel, mv_precision_uv, x, y); +#endif // CONFIG_VP9_HIGHBITDEPTH + k++; + } + } } void vp9_temporal_filter_init(void) { @@ -94,143 +192,372 @@ void vp9_temporal_filter_init(void) { for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i; } -void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride, - const uint8_t *frame2, - unsigned int block_width, - unsigned int block_height, int strength, - int filter_weight, uint32_t *accumulator, - uint16_t *count) { - unsigned int i, j, k; +static INLINE int mod_index(int sum_dist, int index, int rounding, int strength, + int filter_weight) { + int mod; + + assert(index >= 0 && index <= 13); + assert(index_mult[index] != 0); + + mod = + ((unsigned int)clamp(sum_dist, 0, UINT16_MAX) * index_mult[index]) >> 16; + mod += rounding; + mod >>= strength; + + mod = VPXMIN(16, mod); + + mod = 16 - mod; + mod *= filter_weight; + + return mod; +} + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE int highbd_mod_index(int sum_dist, int index, int rounding, + int strength, int filter_weight) { + int mod; + + assert(index >= 0 && index <= 13); + assert(highbd_index_mult[index] != 0); + + mod = (int)((clamp(sum_dist, 0, INT32_MAX) * highbd_index_mult[index]) >> 32); + mod += rounding; + mod >>= strength; + + mod = VPXMIN(16, mod); + + mod = 16 - mod; + mod *= filter_weight; + + return mod; +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +static INLINE int get_filter_weight(unsigned int i, unsigned int j, + unsigned int block_height, + unsigned int block_width, + const int *const blk_fw, int use_32x32) { + // blk_fw[0] ~ blk_fw[3] are the same. + if (use_32x32) { + return blk_fw[0]; + } + + if (i < block_height / 2) { + if (j < block_width / 2) { + return blk_fw[0]; + } + + return blk_fw[1]; + } + + if (j < block_width / 2) { + return blk_fw[2]; + } + + return blk_fw[3]; +} + +void vp9_apply_temporal_filter_c( + const uint8_t *y_frame1, int y_stride, const uint8_t *y_pred, + int y_buf_stride, const uint8_t *u_frame1, const uint8_t *v_frame1, + int uv_stride, const uint8_t *u_pred, const uint8_t *v_pred, + int uv_buf_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, + uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator, + uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count) { + unsigned int i, j, k, m; int modifier; - int byte = 0; - const int rounding = strength > 0 ? 1 << (strength - 1) : 0; + const int rounding = (1 << strength) >> 1; + const unsigned int uv_block_width = block_width >> ss_x; + const unsigned int uv_block_height = block_height >> ss_y; + DECLARE_ALIGNED(16, uint16_t, y_diff_sse[BLK_PELS]); + DECLARE_ALIGNED(16, uint16_t, u_diff_sse[BLK_PELS]); + DECLARE_ALIGNED(16, uint16_t, v_diff_sse[BLK_PELS]); + + int idx = 0, idy; assert(strength >= 0); assert(strength <= 6); - assert(filter_weight >= 0); - assert(filter_weight <= 2); + memset(y_diff_sse, 0, BLK_PELS * sizeof(uint16_t)); + memset(u_diff_sse, 0, BLK_PELS * sizeof(uint16_t)); + memset(v_diff_sse, 0, BLK_PELS * sizeof(uint16_t)); - for (i = 0, k = 0; i < block_height; i++) { - for (j = 0; j < block_width; j++, k++) { - int pixel_value = *frame2; + // Calculate diff^2 for each pixel of the 16x16 block. + // TODO(yunqing): the following code needs to be optimized. + for (i = 0; i < block_height; i++) { + for (j = 0; j < block_width; j++) { + const int16_t diff = + y_frame1[i * (int)y_stride + j] - y_pred[i * (int)block_width + j]; + y_diff_sse[idx++] = diff * diff; + } + } + idx = 0; + for (i = 0; i < uv_block_height; i++) { + for (j = 0; j < uv_block_width; j++) { + const int16_t diffu = + u_frame1[i * uv_stride + j] - u_pred[i * uv_buf_stride + j]; + const int16_t diffv = + v_frame1[i * uv_stride + j] - v_pred[i * uv_buf_stride + j]; + u_diff_sse[idx] = diffu * diffu; + v_diff_sse[idx] = diffv * diffv; + idx++; + } + } + + for (i = 0, k = 0, m = 0; i < block_height; i++) { + for (j = 0; j < block_width; j++) { + const int pixel_value = y_pred[i * y_buf_stride + j]; + const int filter_weight = + get_filter_weight(i, j, block_height, block_width, blk_fw, use_32x32); // non-local mean approach - int diff_sse[9] = { 0 }; - int idx, idy, index = 0; + int y_index = 0; + + const int uv_r = i >> ss_y; + const int uv_c = j >> ss_x; + modifier = 0; for (idy = -1; idy <= 1; ++idy) { for (idx = -1; idx <= 1; ++idx) { - int row = (int)i + idy; - int col = (int)j + idx; + const int row = (int)i + idy; + const int col = (int)j + idx; if (row >= 0 && row < (int)block_height && col >= 0 && col < (int)block_width) { - int diff = frame1[byte + idy * (int)stride + idx] - - frame2[idy * (int)block_width + idx]; - diff_sse[index] = diff * diff; - ++index; + modifier += y_diff_sse[row * (int)block_width + col]; + ++y_index; } } } - assert(index > 0); + assert(y_index > 0); - modifier = 0; - for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx]; + modifier += u_diff_sse[uv_r * uv_block_width + uv_c]; + modifier += v_diff_sse[uv_r * uv_block_width + uv_c]; - modifier *= 3; - modifier /= index; + y_index += 2; - ++frame2; + modifier = + mod_index(modifier, y_index, rounding, strength, filter_weight); - modifier += rounding; - modifier >>= strength; + y_count[k] += modifier; + y_accumulator[k] += modifier * pixel_value; - if (modifier > 16) modifier = 16; + ++k; - modifier = 16 - modifier; - modifier *= filter_weight; + // Process chroma component + if (!(i & ss_y) && !(j & ss_x)) { + const int u_pixel_value = u_pred[uv_r * uv_buf_stride + uv_c]; + const int v_pixel_value = v_pred[uv_r * uv_buf_stride + uv_c]; - count[k] += modifier; - accumulator[k] += modifier * pixel_value; + // non-local mean approach + int cr_index = 0; + int u_mod = 0, v_mod = 0; + int y_diff = 0; - byte++; + for (idy = -1; idy <= 1; ++idy) { + for (idx = -1; idx <= 1; ++idx) { + const int row = uv_r + idy; + const int col = uv_c + idx; + + if (row >= 0 && row < (int)uv_block_height && col >= 0 && + col < (int)uv_block_width) { + u_mod += u_diff_sse[row * uv_block_width + col]; + v_mod += v_diff_sse[row * uv_block_width + col]; + ++cr_index; + } + } + } + + assert(cr_index > 0); + + for (idy = 0; idy < 1 + ss_y; ++idy) { + for (idx = 0; idx < 1 + ss_x; ++idx) { + const int row = (uv_r << ss_y) + idy; + const int col = (uv_c << ss_x) + idx; + y_diff += y_diff_sse[row * (int)block_width + col]; + ++cr_index; + } + } + + u_mod += y_diff; + v_mod += y_diff; + + u_mod = mod_index(u_mod, cr_index, rounding, strength, filter_weight); + v_mod = mod_index(v_mod, cr_index, rounding, strength, filter_weight); + + u_count[m] += u_mod; + u_accumulator[m] += u_mod * u_pixel_value; + v_count[m] += v_mod; + v_accumulator[m] += v_mod * v_pixel_value; + + ++m; + } // Complete YUV pixel } - - byte += stride - block_width; } } #if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_temporal_filter_apply_c( - const uint8_t *frame1_8, unsigned int stride, const uint8_t *frame2_8, - unsigned int block_width, unsigned int block_height, int strength, - int filter_weight, uint32_t *accumulator, uint16_t *count) { - const uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8); - const uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8); - unsigned int i, j, k; - int modifier; - int byte = 0; - const int rounding = strength > 0 ? 1 << (strength - 1) : 0; +void vp9_highbd_apply_temporal_filter_c( + const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, + int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, + int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, + uint32_t *y_accum, uint16_t *y_count, uint32_t *u_accum, uint16_t *u_count, + uint32_t *v_accum, uint16_t *v_count) { + const int uv_block_width = block_width >> ss_x; + const int uv_block_height = block_height >> ss_y; + const int y_diff_stride = BW; + const int uv_diff_stride = BW; - for (i = 0, k = 0; i < block_height; i++) { - for (j = 0; j < block_width; j++, k++) { - int pixel_value = *frame2; - int diff_sse[9] = { 0 }; - int idx, idy, index = 0; + DECLARE_ALIGNED(16, uint32_t, y_diff_sse[BLK_PELS]); + DECLARE_ALIGNED(16, uint32_t, u_diff_sse[BLK_PELS]); + DECLARE_ALIGNED(16, uint32_t, v_diff_sse[BLK_PELS]); - for (idy = -1; idy <= 1; ++idy) { - for (idx = -1; idx <= 1; ++idx) { - int row = (int)i + idy; - int col = (int)j + idx; + const int rounding = (1 << strength) >> 1; - if (row >= 0 && row < (int)block_height && col >= 0 && - col < (int)block_width) { - int diff = frame1[byte + idy * (int)stride + idx] - - frame2[idy * (int)block_width + idx]; - diff_sse[index] = diff * diff; - ++index; + // Loop variables + int row, col; + int uv_row, uv_col; + int row_step, col_step; + + memset(y_diff_sse, 0, BLK_PELS * sizeof(uint32_t)); + memset(u_diff_sse, 0, BLK_PELS * sizeof(uint32_t)); + memset(v_diff_sse, 0, BLK_PELS * sizeof(uint32_t)); + + // Get the square diffs + for (row = 0; row < (int)block_height; row++) { + for (col = 0; col < (int)block_width; col++) { + const int diff = + y_src[row * y_src_stride + col] - y_pre[row * y_pre_stride + col]; + y_diff_sse[row * y_diff_stride + col] = diff * diff; + } + } + + for (row = 0; row < uv_block_height; row++) { + for (col = 0; col < uv_block_width; col++) { + const int u_diff = + u_src[row * uv_src_stride + col] - u_pre[row * uv_pre_stride + col]; + const int v_diff = + v_src[row * uv_src_stride + col] - v_pre[row * uv_pre_stride + col]; + u_diff_sse[row * uv_diff_stride + col] = u_diff * u_diff; + v_diff_sse[row * uv_diff_stride + col] = v_diff * v_diff; + } + } + + // Apply the filter to luma + for (row = 0; row < (int)block_height; row++) { + for (col = 0; col < (int)block_width; col++) { + const int uv_row = row >> ss_y; + const int uv_col = col >> ss_x; + const int filter_weight = get_filter_weight( + row, col, block_height, block_width, blk_fw, use_32x32); + + // First we get the modifier for the current y pixel + const int y_pixel = y_pre[row * y_pre_stride + col]; + int y_num_used = 0; + int y_mod = 0; + + // Sum the neighboring 3x3 y pixels + for (row_step = -1; row_step <= 1; row_step++) { + for (col_step = -1; col_step <= 1; col_step++) { + const int sub_row = row + row_step; + const int sub_col = col + col_step; + + if (sub_row >= 0 && sub_row < (int)block_height && sub_col >= 0 && + sub_col < (int)block_width) { + y_mod += y_diff_sse[sub_row * y_diff_stride + sub_col]; + y_num_used++; } } } - assert(index > 0); - modifier = 0; - for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx]; + // Sum the corresponding uv pixels to the current y modifier + // Note we are rounding down instead of rounding to the nearest pixel. + y_mod += u_diff_sse[uv_row * uv_diff_stride + uv_col]; + y_mod += v_diff_sse[uv_row * uv_diff_stride + uv_col]; - modifier *= 3; - modifier /= index; + y_num_used += 2; - ++frame2; - modifier += rounding; - modifier >>= strength; + // Set the modifier + y_mod = highbd_mod_index(y_mod, y_num_used, rounding, strength, + filter_weight); - if (modifier > 16) modifier = 16; - - modifier = 16 - modifier; - modifier *= filter_weight; - - count[k] += modifier; - accumulator[k] += modifier * pixel_value; - - byte++; + // Accumulate the result + y_count[row * block_width + col] += y_mod; + y_accum[row * block_width + col] += y_mod * y_pixel; } + } - byte += stride - block_width; + // Apply the filter to chroma + for (uv_row = 0; uv_row < uv_block_height; uv_row++) { + for (uv_col = 0; uv_col < uv_block_width; uv_col++) { + const int y_row = uv_row << ss_y; + const int y_col = uv_col << ss_x; + const int filter_weight = get_filter_weight( + uv_row, uv_col, uv_block_height, uv_block_width, blk_fw, use_32x32); + + const int u_pixel = u_pre[uv_row * uv_pre_stride + uv_col]; + const int v_pixel = v_pre[uv_row * uv_pre_stride + uv_col]; + + int uv_num_used = 0; + int u_mod = 0, v_mod = 0; + + // Sum the neighboring 3x3 chromal pixels to the chroma modifier + for (row_step = -1; row_step <= 1; row_step++) { + for (col_step = -1; col_step <= 1; col_step++) { + const int sub_row = uv_row + row_step; + const int sub_col = uv_col + col_step; + + if (sub_row >= 0 && sub_row < uv_block_height && sub_col >= 0 && + sub_col < uv_block_width) { + u_mod += u_diff_sse[sub_row * uv_diff_stride + sub_col]; + v_mod += v_diff_sse[sub_row * uv_diff_stride + sub_col]; + uv_num_used++; + } + } + } + + // Sum all the luma pixels associated with the current luma pixel + for (row_step = 0; row_step < 1 + ss_y; row_step++) { + for (col_step = 0; col_step < 1 + ss_x; col_step++) { + const int sub_row = y_row + row_step; + const int sub_col = y_col + col_step; + const int y_diff = y_diff_sse[sub_row * y_diff_stride + sub_col]; + + u_mod += y_diff; + v_mod += y_diff; + uv_num_used++; + } + } + + // Set the modifier + u_mod = highbd_mod_index(u_mod, uv_num_used, rounding, strength, + filter_weight); + v_mod = highbd_mod_index(v_mod, uv_num_used, rounding, strength, + filter_weight); + + // Accumulate the result + u_count[uv_row * uv_block_width + uv_col] += u_mod; + u_accum[uv_row * uv_block_width + uv_col] += u_mod * u_pixel; + v_count[uv_row * uv_block_width + uv_col] += v_mod; + v_accum[uv_row * uv_block_width + uv_col] += v_mod * v_pixel; + } } } #endif // CONFIG_VP9_HIGHBITDEPTH -static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi, - ThreadData *td, - uint8_t *arf_frame_buf, - uint8_t *frame_ptr_buf, - int stride, MV *ref_mv) { +static uint32_t temporal_filter_find_matching_mb_c( + VP9_COMP *cpi, ThreadData *td, uint8_t *arf_frame_buf, + uint8_t *frame_ptr_buf, int stride, MV *ref_mv, MV *blk_mvs, + int *blk_bestsme) { MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; - const SEARCH_METHODS search_method = HEX; + const SEARCH_METHODS search_method = MESH; + const SEARCH_METHODS search_method_16 = cpi->sf.temporal_filter_search_method; int step_param; int sadpb = x->sadperbit16; uint32_t bestsme = UINT_MAX; @@ -245,6 +572,7 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi, // Save input state struct buf_2d src = x->plane[0].src; struct buf_2d pre = xd->plane[0].pre[0]; + int i, j, k = 0; best_ref_mv1_full.col = best_ref_mv1.col >> 3; best_ref_mv1_full.row = best_ref_mv1.row >> 3; @@ -260,19 +588,52 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi, vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); - vp9_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param, + vp9_full_pixel_search(cpi, x, TF_BLOCK, &best_ref_mv1_full, step_param, search_method, sadpb, cond_cost_list(cpi, cost_list), &best_ref_mv1, ref_mv, 0, 0); /* restore UMV window */ x->mv_limits = tmp_mv_limits; - // Ignore mv costing by sending NULL pointer instead of cost array + // find_fractional_mv_step parameters: best_ref_mv1 is for mv rate cost + // calculation. The start full mv and the search result are stored in + // ref_mv. bestsme = cpi->find_fractional_mv_step( x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, - x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0, - mv_sf->subpel_iters_per_step, cond_cost_list(cpi, cost_list), NULL, NULL, - &distortion, &sse, NULL, 0, 0); + x->errorperbit, &cpi->fn_ptr[TF_BLOCK], 0, mv_sf->subpel_search_level, + cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, BW, + BH, USE_8_TAPS_SHARP); + + // DO motion search on 4 16x16 sub_blocks. + best_ref_mv1.row = ref_mv->row; + best_ref_mv1.col = ref_mv->col; + best_ref_mv1_full.col = best_ref_mv1.col >> 3; + best_ref_mv1_full.row = best_ref_mv1.row >> 3; + + for (i = 0; i < BH; i += SUB_BH) { + for (j = 0; j < BW; j += SUB_BW) { + // Setup frame pointers + x->plane[0].src.buf = arf_frame_buf + i * stride + j; + x->plane[0].src.stride = stride; + xd->plane[0].pre[0].buf = frame_ptr_buf + i * stride + j; + xd->plane[0].pre[0].stride = stride; + + vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); + vp9_full_pixel_search(cpi, x, TF_SUB_BLOCK, &best_ref_mv1_full, + step_param, search_method_16, sadpb, + cond_cost_list(cpi, cost_list), &best_ref_mv1, + &blk_mvs[k], 0, 0); + /* restore UMV window */ + x->mv_limits = tmp_mv_limits; + + blk_bestsme[k] = cpi->find_fractional_mv_step( + x, &blk_mvs[k], &best_ref_mv1, cpi->common.allow_high_precision_mv, + x->errorperbit, &cpi->fn_ptr[TF_SUB_BLOCK], 0, + mv_sf->subpel_search_level, cond_cost_list(cpi, cost_list), NULL, + NULL, &distortion, &sse, NULL, SUB_BW, SUB_BH, USE_8_TAPS_SHARP); + k++; + } + } // Restore input state x->plane[0].src = src; @@ -293,25 +654,24 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, int byte; int frame; int mb_col; - unsigned int filter_weight; - int mb_cols = (frames[alt_ref_index]->y_crop_width + 15) >> 4; - int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4; - DECLARE_ALIGNED(16, uint32_t, accumulator[16 * 16 * 3]); - DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]); + int mb_cols = (frames[alt_ref_index]->y_crop_width + BW - 1) >> BW_LOG2; + int mb_rows = (frames[alt_ref_index]->y_crop_height + BH - 1) >> BH_LOG2; + DECLARE_ALIGNED(16, uint32_t, accumulator[BLK_PELS * 3]); + DECLARE_ALIGNED(16, uint16_t, count[BLK_PELS * 3]); MACROBLOCKD *mbd = &td->mb.e_mbd; YV12_BUFFER_CONFIG *f = frames[alt_ref_index]; uint8_t *dst1, *dst2; #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint16_t, predictor16[16 * 16 * 3]); - DECLARE_ALIGNED(16, uint8_t, predictor8[16 * 16 * 3]); + DECLARE_ALIGNED(16, uint16_t, predictor16[BLK_PELS * 3]); + DECLARE_ALIGNED(16, uint8_t, predictor8[BLK_PELS * 3]); uint8_t *predictor; #else - DECLARE_ALIGNED(16, uint8_t, predictor[16 * 16 * 3]); + DECLARE_ALIGNED(16, uint8_t, predictor[BLK_PELS * 3]); #endif - const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y; - const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x; + const int mb_uv_height = BH >> mbd->plane[1].subsampling_y; + const int mb_uv_width = BW >> mbd->plane[1].subsampling_x; // Addition of the tile col level offsets - int mb_y_offset = mb_row * 16 * (f->y_stride) + 16 * mb_col_start; + int mb_y_offset = mb_row * BH * (f->y_stride) + BW * mb_col_start; int mb_uv_offset = mb_row * mb_uv_height * f->uv_stride + mb_uv_width * mb_col_start; @@ -334,21 +694,21 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, // 8 - VP9_INTERP_EXTEND. // To keep the mv in play for both Y and UV planes the max that it // can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1). - td->mb.mv_limits.row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND)); + td->mb.mv_limits.row_min = -((mb_row * BH) + (17 - 2 * VP9_INTERP_EXTEND)); td->mb.mv_limits.row_max = - ((mb_rows - 1 - mb_row) * 16) + (17 - 2 * VP9_INTERP_EXTEND); + ((mb_rows - 1 - mb_row) * BH) + (17 - 2 * VP9_INTERP_EXTEND); for (mb_col = mb_col_start; mb_col < mb_col_end; mb_col++) { int i, j, k; int stride; MV ref_mv; - vp9_zero_array(accumulator, 16 * 16 * 3); - vp9_zero_array(count, 16 * 16 * 3); + vp9_zero_array(accumulator, BLK_PELS * 3); + vp9_zero_array(count, BLK_PELS * 3); - td->mb.mv_limits.col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND)); + td->mb.mv_limits.col_min = -((mb_col * BW) + (17 - 2 * VP9_INTERP_EXTEND)); td->mb.mv_limits.col_max = - ((mb_cols - 1 - mb_col) * 16) + (17 - 2 * VP9_INTERP_EXTEND); + ((mb_cols - 1 - mb_col) * BW) + (17 - 2 * VP9_INTERP_EXTEND); if (cpi->oxcf.content == VP9E_CONTENT_FILM) { unsigned int src_variance; @@ -360,92 +720,130 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, #if CONFIG_VP9_HIGHBITDEPTH if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { src_variance = - vp9_high_get_sby_perpixel_variance(cpi, &src, BLOCK_16X16, mbd->bd); + vp9_high_get_sby_perpixel_variance(cpi, &src, TF_BLOCK, mbd->bd); } else { - src_variance = vp9_get_sby_perpixel_variance(cpi, &src, BLOCK_16X16); + src_variance = vp9_get_sby_perpixel_variance(cpi, &src, TF_BLOCK); } #else - src_variance = vp9_get_sby_perpixel_variance(cpi, &src, BLOCK_16X16); + src_variance = vp9_get_sby_perpixel_variance(cpi, &src, TF_BLOCK); #endif // CONFIG_VP9_HIGHBITDEPTH - if (src_variance <= 2) strength = VPXMAX(0, (int)strength - 2); + if (src_variance <= 2) { + strength = VPXMAX(0, arnr_filter_data->strength - 2); + } } for (frame = 0; frame < frame_count; frame++) { - const uint32_t thresh_low = 10000; - const uint32_t thresh_high = 20000; + // MVs for 4 16x16 sub blocks. + MV blk_mvs[4]; + // Filter weights for 4 16x16 sub blocks. + int blk_fw[4] = { 0, 0, 0, 0 }; + int use_32x32 = 0; if (frames[frame] == NULL) continue; ref_mv.row = 0; ref_mv.col = 0; + blk_mvs[0] = kZeroMv; + blk_mvs[1] = kZeroMv; + blk_mvs[2] = kZeroMv; + blk_mvs[3] = kZeroMv; if (frame == alt_ref_index) { - filter_weight = 2; + blk_fw[0] = blk_fw[1] = blk_fw[2] = blk_fw[3] = 2; + use_32x32 = 1; } else { + const int thresh_low = 10000; + const int thresh_high = 20000; + int blk_bestsme[4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX }; + // Find best match in this frame by MC - uint32_t err = temporal_filter_find_matching_mb_c( + int err = temporal_filter_find_matching_mb_c( cpi, td, frames[alt_ref_index]->y_buffer + mb_y_offset, frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride, - &ref_mv); + &ref_mv, blk_mvs, blk_bestsme); - // Assign higher weight to matching MB if its error - // score is lower. If not applying MC default behavior - // is to weight all MBs equal. - filter_weight = err < thresh_low ? 2 : err < thresh_high ? 1 : 0; + int err16 = + blk_bestsme[0] + blk_bestsme[1] + blk_bestsme[2] + blk_bestsme[3]; + int max_err = INT_MIN, min_err = INT_MAX; + for (k = 0; k < 4; k++) { + if (min_err > blk_bestsme[k]) min_err = blk_bestsme[k]; + if (max_err < blk_bestsme[k]) max_err = blk_bestsme[k]; + } + + if (((err * 15 < (err16 << 4)) && max_err - min_err < 10000) || + ((err * 14 < (err16 << 4)) && max_err - min_err < 5000)) { + use_32x32 = 1; + // Assign higher weight to matching MB if it's error + // score is lower. If not applying MC default behavior + // is to weight all MBs equal. + blk_fw[0] = err < (thresh_low << THR_SHIFT) + ? 2 + : err < (thresh_high << THR_SHIFT) ? 1 : 0; + blk_fw[1] = blk_fw[2] = blk_fw[3] = blk_fw[0]; + } else { + use_32x32 = 0; + for (k = 0; k < 4; k++) + blk_fw[k] = blk_bestsme[k] < thresh_low + ? 2 + : blk_bestsme[k] < thresh_high ? 1 : 0; + } + + for (k = 0; k < 4; k++) { + switch (abs(frame - alt_ref_index)) { + case 1: blk_fw[k] = VPXMIN(blk_fw[k], 2); break; + case 2: + case 3: blk_fw[k] = VPXMIN(blk_fw[k], 1); break; + default: break; + } + } } - if (filter_weight != 0) { + if (blk_fw[0] | blk_fw[1] | blk_fw[2] | blk_fw[3]) { // Construct the predictors temporal_filter_predictors_mb_c( mbd, frames[frame]->y_buffer + mb_y_offset, frames[frame]->u_buffer + mb_uv_offset, frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride, mb_uv_width, mb_uv_height, ref_mv.row, ref_mv.col, predictor, scale, - mb_col * 16, mb_row * 16); + mb_col * BW, mb_row * BH, blk_mvs, use_32x32); #if CONFIG_VP9_HIGHBITDEPTH if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int adj_strength = strength + 2 * (mbd->bd - 8); // Apply the filter (YUV) - vp9_highbd_temporal_filter_apply( - f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16, - adj_strength, filter_weight, accumulator, count); - vp9_highbd_temporal_filter_apply( - f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256, - mb_uv_width, mb_uv_height, adj_strength, filter_weight, - accumulator + 256, count + 256); - vp9_highbd_temporal_filter_apply( - f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512, - mb_uv_width, mb_uv_height, adj_strength, filter_weight, - accumulator + 512, count + 512); + vp9_highbd_apply_temporal_filter( + CONVERT_TO_SHORTPTR(f->y_buffer + mb_y_offset), f->y_stride, + CONVERT_TO_SHORTPTR(predictor), BW, + CONVERT_TO_SHORTPTR(f->u_buffer + mb_uv_offset), + CONVERT_TO_SHORTPTR(f->v_buffer + mb_uv_offset), f->uv_stride, + CONVERT_TO_SHORTPTR(predictor + BLK_PELS), + CONVERT_TO_SHORTPTR(predictor + (BLK_PELS << 1)), mb_uv_width, BW, + BH, mbd->plane[1].subsampling_x, mbd->plane[1].subsampling_y, + adj_strength, blk_fw, use_32x32, accumulator, count, + accumulator + BLK_PELS, count + BLK_PELS, + accumulator + (BLK_PELS << 1), count + (BLK_PELS << 1)); } else { // Apply the filter (YUV) - vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, - predictor, 16, 16, strength, filter_weight, - accumulator, count); - vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride, - predictor + 256, mb_uv_width, mb_uv_height, - strength, filter_weight, accumulator + 256, - count + 256); - vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride, - predictor + 512, mb_uv_width, mb_uv_height, - strength, filter_weight, accumulator + 512, - count + 512); + vp9_apply_temporal_filter( + f->y_buffer + mb_y_offset, f->y_stride, predictor, BW, + f->u_buffer + mb_uv_offset, f->v_buffer + mb_uv_offset, + f->uv_stride, predictor + BLK_PELS, predictor + (BLK_PELS << 1), + mb_uv_width, BW, BH, mbd->plane[1].subsampling_x, + mbd->plane[1].subsampling_y, strength, blk_fw, use_32x32, + accumulator, count, accumulator + BLK_PELS, count + BLK_PELS, + accumulator + (BLK_PELS << 1), count + (BLK_PELS << 1)); } #else // Apply the filter (YUV) - vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, - predictor, 16, 16, strength, filter_weight, - accumulator, count); - vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride, - predictor + 256, mb_uv_width, mb_uv_height, - strength, filter_weight, accumulator + 256, - count + 256); - vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride, - predictor + 512, mb_uv_width, mb_uv_height, - strength, filter_weight, accumulator + 512, - count + 512); + vp9_apply_temporal_filter( + f->y_buffer + mb_y_offset, f->y_stride, predictor, BW, + f->u_buffer + mb_uv_offset, f->v_buffer + mb_uv_offset, + f->uv_stride, predictor + BLK_PELS, predictor + (BLK_PELS << 1), + mb_uv_width, BW, BH, mbd->plane[1].subsampling_x, + mbd->plane[1].subsampling_y, strength, blk_fw, use_32x32, + accumulator, count, accumulator + BLK_PELS, count + BLK_PELS, + accumulator + (BLK_PELS << 1), count + (BLK_PELS << 1)); #endif // CONFIG_VP9_HIGHBITDEPTH } } @@ -459,8 +857,8 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, dst1_16 = CONVERT_TO_SHORTPTR(dst1); stride = cpi->alt_ref_buffer.y_stride; byte = mb_y_offset; - for (i = 0, k = 0; i < 16; i++) { - for (j = 0; j < 16; j++, k++) { + for (i = 0, k = 0; i < BH; i++) { + for (j = 0; j < BW; j++, k++) { unsigned int pval = accumulator[k] + (count[k] >> 1); pval *= fixed_divide[count[k]]; pval >>= 19; @@ -471,7 +869,7 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, byte++; } - byte += stride - 16; + byte += stride - BW; } dst1 = cpi->alt_ref_buffer.u_buffer; @@ -480,9 +878,9 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, dst2_16 = CONVERT_TO_SHORTPTR(dst2); stride = cpi->alt_ref_buffer.uv_stride; byte = mb_uv_offset; - for (i = 0, k = 256; i < mb_uv_height; i++) { + for (i = 0, k = BLK_PELS; i < mb_uv_height; i++) { for (j = 0; j < mb_uv_width; j++, k++) { - int m = k + 256; + int m = k + BLK_PELS; // U unsigned int pval = accumulator[k] + (count[k] >> 1); @@ -507,8 +905,8 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, dst1 = cpi->alt_ref_buffer.y_buffer; stride = cpi->alt_ref_buffer.y_stride; byte = mb_y_offset; - for (i = 0, k = 0; i < 16; i++) { - for (j = 0; j < 16; j++, k++) { + for (i = 0, k = 0; i < BH; i++) { + for (j = 0; j < BW; j++, k++) { unsigned int pval = accumulator[k] + (count[k] >> 1); pval *= fixed_divide[count[k]]; pval >>= 19; @@ -518,16 +916,16 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, // move to next pixel byte++; } - byte += stride - 16; + byte += stride - BW; } dst1 = cpi->alt_ref_buffer.u_buffer; dst2 = cpi->alt_ref_buffer.v_buffer; stride = cpi->alt_ref_buffer.uv_stride; byte = mb_uv_offset; - for (i = 0, k = 256; i < mb_uv_height; i++) { + for (i = 0, k = BLK_PELS; i < mb_uv_height; i++) { for (j = 0; j < mb_uv_width; j++, k++) { - int m = k + 256; + int m = k + BLK_PELS; // U unsigned int pval = accumulator[k] + (count[k] >> 1); @@ -552,8 +950,8 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, dst1 = cpi->alt_ref_buffer.y_buffer; stride = cpi->alt_ref_buffer.y_stride; byte = mb_y_offset; - for (i = 0, k = 0; i < 16; i++) { - for (j = 0; j < 16; j++, k++) { + for (i = 0, k = 0; i < BH; i++) { + for (j = 0; j < BW; j++, k++) { unsigned int pval = accumulator[k] + (count[k] >> 1); pval *= fixed_divide[count[k]]; pval >>= 19; @@ -563,16 +961,16 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, // move to next pixel byte++; } - byte += stride - 16; + byte += stride - BW; } dst1 = cpi->alt_ref_buffer.u_buffer; dst2 = cpi->alt_ref_buffer.v_buffer; stride = cpi->alt_ref_buffer.uv_stride; byte = mb_uv_offset; - for (i = 0, k = 256; i < mb_uv_height; i++) { + for (i = 0, k = BLK_PELS; i < mb_uv_height; i++) { for (j = 0; j < mb_uv_width; j++, k++) { - int m = k + 256; + int m = k + BLK_PELS; // U unsigned int pval = accumulator[k] + (count[k] >> 1); @@ -592,7 +990,7 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, byte += stride - mb_uv_width; } #endif // CONFIG_VP9_HIGHBITDEPTH - mb_y_offset += 16; + mb_y_offset += BW; mb_uv_offset += mb_uv_width; } } @@ -603,10 +1001,10 @@ static void temporal_filter_iterate_tile_c(VP9_COMP *cpi, int tile_row, const int tile_cols = 1 << cm->log2_tile_cols; TileInfo *tile_info = &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info; - const int mb_row_start = (tile_info->mi_row_start) >> 1; - const int mb_row_end = (tile_info->mi_row_end + 1) >> 1; - const int mb_col_start = (tile_info->mi_col_start) >> 1; - const int mb_col_end = (tile_info->mi_col_end + 1) >> 1; + const int mb_row_start = (tile_info->mi_row_start) >> TF_SHIFT; + const int mb_row_end = (tile_info->mi_row_end + TF_ROUND) >> TF_SHIFT; + const int mb_col_start = (tile_info->mi_col_start) >> TF_SHIFT; + const int mb_col_end = (tile_info->mi_col_end + TF_ROUND) >> TF_SHIFT; int mb_row; for (mb_row = mb_row_start; mb_row < mb_row_end; mb_row++) { @@ -620,13 +1018,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi) { const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; int tile_row, tile_col; - MACROBLOCKD *mbd = &cpi->td.mb.e_mbd; - // Save input state - uint8_t *input_buffer[MAX_MB_PLANE]; - int i; - - for (i = 0; i < MAX_MB_PLANE; i++) input_buffer[i] = mbd->plane[i].pre[0].buf; - vp9_init_tile_data(cpi); for (tile_row = 0; tile_row < tile_rows; ++tile_row) { @@ -634,15 +1025,13 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi) { temporal_filter_iterate_tile_c(cpi, tile_row, tile_col); } } - - // Restore input state - for (i = 0; i < MAX_MB_PLANE; i++) mbd->plane[i].pre[0].buf = input_buffer[i]; } // Apply buffer limits and context specific adjustments to arnr filter. static void adjust_arnr_filter(VP9_COMP *cpi, int distance, int group_boost, int *arnr_frames, int *arnr_strength) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; + const GF_GROUP *const gf_group = &cpi->twopass.gf_group; const int frames_after_arf = vp9_lookahead_depth(cpi->lookahead) - distance - 1; int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1; @@ -696,12 +1085,17 @@ static void adjust_arnr_filter(VP9_COMP *cpi, int distance, int group_boost, } // Adjustments for second level arf in multi arf case. - if (cpi->oxcf.pass == 2 && cpi->multi_arf_allowed) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - if (gf_group->rf_level[gf_group->index] != GF_ARF_STD) { - strength >>= 1; - } - } + // Leave commented out place holder for possible filtering adjustment with + // new multi-layer arf code. + // if (cpi->oxcf.pass == 2 && cpi->multi_arf_allowed) + // if (gf_group->rf_level[gf_group->index] != GF_ARF_STD) strength >>= 1; + + // TODO(jingning): Skip temporal filtering for intermediate frames that will + // be used as show_existing_frame. Need to further explore the possibility to + // apply certain filter. + if (gf_group->arf_src_offset[gf_group->index] < + cpi->rc.baseline_gf_interval - 1) + frames = 1; *arnr_frames = frames; *arnr_strength = strength; @@ -800,8 +1194,7 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) { } // Initialize errorperbit and sabperbit. - rdmult = (int)vp9_compute_rd_mult_based_on_qindex(cpi, ARNR_FILT_QINDEX); - if (rdmult < 1) rdmult = 1; + rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, ARNR_FILT_QINDEX); set_error_per_bit(&cpi->td.mb, rdmult); vp9_initialize_me_consts(cpi, &cpi->td.mb, ARNR_FILT_QINDEX); diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_temporal_filter.h b/media/libvpx/libvpx/vp9/encoder/vp9_temporal_filter.h index 775e49cc537b..553a468280ff 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_temporal_filter.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_temporal_filter.h @@ -8,14 +8,29 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ -#define VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ +#ifndef VPX_VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ +#define VPX_VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ #ifdef __cplusplus extern "C" { #endif #define ARNR_FILT_QINDEX 128 +static const MV kZeroMv = { 0, 0 }; + +// Block size used in temporal filtering +#define TF_BLOCK BLOCK_32X32 +#define BH 32 +#define BH_LOG2 5 +#define BW 32 +#define BW_LOG2 5 +#define BLK_PELS ((BH) * (BW)) // Pixels in the block +#define TF_SHIFT 2 +#define TF_ROUND 3 +#define THR_SHIFT 2 +#define TF_SUB_BLOCK BLOCK_16X16 +#define SUB_BH 16 +#define SUB_BW 16 void vp9_temporal_filter_init(void); void vp9_temporal_filter(VP9_COMP *cpi, int distance); @@ -28,4 +43,4 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, } // extern "C" #endif -#endif // VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ +#endif // VPX_VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_tokenize.h b/media/libvpx/libvpx/vp9/encoder/vp9_tokenize.h index b2f63ffef5a8..6407ff92376c 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_tokenize.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_tokenize.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_TOKENIZE_H_ -#define VP9_ENCODER_VP9_TOKENIZE_H_ +#ifndef VPX_VP9_ENCODER_VP9_TOKENIZE_H_ +#define VPX_VP9_ENCODER_VP9_TOKENIZE_H_ #include "vp9/common/vp9_entropy.h" @@ -127,4 +127,4 @@ static INLINE int vp9_get_token_cost(int v, int16_t *token, } // extern "C" #endif -#endif // VP9_ENCODER_VP9_TOKENIZE_H_ +#endif // VPX_VP9_ENCODER_VP9_TOKENIZE_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/vp9_treewriter.h b/media/libvpx/libvpx/vp9/encoder/vp9_treewriter.h index a8b9c2cd3122..86c5fa22447c 100644 --- a/media/libvpx/libvpx/vp9/encoder/vp9_treewriter.h +++ b/media/libvpx/libvpx/vp9/encoder/vp9_treewriter.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_ENCODER_VP9_TREEWRITER_H_ -#define VP9_ENCODER_VP9_TREEWRITER_H_ +#ifndef VPX_VP9_ENCODER_VP9_TREEWRITER_H_ +#define VPX_VP9_ENCODER_VP9_TREEWRITER_H_ #include "vpx_dsp/bitwriter.h" @@ -48,4 +48,4 @@ static INLINE void vp9_write_token(vpx_writer *w, const vpx_tree_index *tree, } // extern "C" #endif -#endif // VP9_ENCODER_VP9_TREEWRITER_H_ +#endif // VPX_VP9_ENCODER_VP9_TREEWRITER_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/x86/highbd_temporal_filter_sse4.c b/media/libvpx/libvpx/vp9/encoder/x86/highbd_temporal_filter_sse4.c new file mode 100644 index 000000000000..4fa24512c59f --- /dev/null +++ b/media/libvpx/libvpx/vp9/encoder/x86/highbd_temporal_filter_sse4.c @@ -0,0 +1,943 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "./vp9_rtcd.h" +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" +#include "vp9/encoder/vp9_encoder.h" +#include "vp9/encoder/vp9_temporal_filter.h" +#include "vp9/encoder/x86/temporal_filter_constants.h" + +// Compute (a-b)**2 for 8 pixels with size 16-bit +static INLINE void highbd_store_dist_8(const uint16_t *a, const uint16_t *b, + uint32_t *dst) { + const __m128i zero = _mm_setzero_si128(); + const __m128i a_reg = _mm_loadu_si128((const __m128i *)a); + const __m128i b_reg = _mm_loadu_si128((const __m128i *)b); + + const __m128i a_first = _mm_cvtepu16_epi32(a_reg); + const __m128i a_second = _mm_unpackhi_epi16(a_reg, zero); + const __m128i b_first = _mm_cvtepu16_epi32(b_reg); + const __m128i b_second = _mm_unpackhi_epi16(b_reg, zero); + + __m128i dist_first, dist_second; + + dist_first = _mm_sub_epi32(a_first, b_first); + dist_second = _mm_sub_epi32(a_second, b_second); + dist_first = _mm_mullo_epi32(dist_first, dist_first); + dist_second = _mm_mullo_epi32(dist_second, dist_second); + + _mm_storeu_si128((__m128i *)dst, dist_first); + _mm_storeu_si128((__m128i *)(dst + 4), dist_second); +} + +// Sum up three neighboring distortions for the pixels +static INLINE void highbd_get_sum_4(const uint32_t *dist, __m128i *sum) { + __m128i dist_reg, dist_left, dist_right; + + dist_reg = _mm_loadu_si128((const __m128i *)dist); + dist_left = _mm_loadu_si128((const __m128i *)(dist - 1)); + dist_right = _mm_loadu_si128((const __m128i *)(dist + 1)); + + *sum = _mm_add_epi32(dist_reg, dist_left); + *sum = _mm_add_epi32(*sum, dist_right); +} + +static INLINE void highbd_get_sum_8(const uint32_t *dist, __m128i *sum_first, + __m128i *sum_second) { + highbd_get_sum_4(dist, sum_first); + highbd_get_sum_4(dist + 4, sum_second); +} + +// Average the value based on the number of values summed (9 for pixels away +// from the border, 4 for pixels in corners, and 6 for other edge values, plus +// however many values from y/uv plane are). +// +// Add in the rounding factor and shift, clamp to 16, invert and shift. Multiply +// by weight. +static INLINE void highbd_average_4(__m128i *output, const __m128i *sum, + const __m128i *mul_constants, + const int strength, const int rounding, + const int weight) { + // _mm_srl_epi16 uses the lower 64 bit value for the shift. + const __m128i strength_u128 = _mm_set_epi32(0, 0, 0, strength); + const __m128i rounding_u32 = _mm_set1_epi32(rounding); + const __m128i weight_u32 = _mm_set1_epi32(weight); + const __m128i sixteen = _mm_set1_epi32(16); + const __m128i zero = _mm_setzero_si128(); + + // modifier * 3 / index; + const __m128i sum_lo = _mm_unpacklo_epi32(*sum, zero); + const __m128i sum_hi = _mm_unpackhi_epi32(*sum, zero); + const __m128i const_lo = _mm_unpacklo_epi32(*mul_constants, zero); + const __m128i const_hi = _mm_unpackhi_epi32(*mul_constants, zero); + + const __m128i mul_lo = _mm_mul_epu32(sum_lo, const_lo); + const __m128i mul_lo_div = _mm_srli_epi64(mul_lo, 32); + const __m128i mul_hi = _mm_mul_epu32(sum_hi, const_hi); + const __m128i mul_hi_div = _mm_srli_epi64(mul_hi, 32); + + // Now we have + // mul_lo: 00 a1 00 a0 + // mul_hi: 00 a3 00 a2 + // Unpack as 64 bit words to get even and odd elements + // unpack_lo: 00 a2 00 a0 + // unpack_hi: 00 a3 00 a1 + // Then we can shift and OR the results to get everything in 32-bits + const __m128i mul_even = _mm_unpacklo_epi64(mul_lo_div, mul_hi_div); + const __m128i mul_odd = _mm_unpackhi_epi64(mul_lo_div, mul_hi_div); + const __m128i mul_odd_shift = _mm_slli_si128(mul_odd, 4); + const __m128i mul = _mm_or_si128(mul_even, mul_odd_shift); + + // Round + *output = _mm_add_epi32(mul, rounding_u32); + *output = _mm_srl_epi32(*output, strength_u128); + + // Multiply with the weight + *output = _mm_min_epu32(*output, sixteen); + *output = _mm_sub_epi32(sixteen, *output); + *output = _mm_mullo_epi32(*output, weight_u32); +} + +static INLINE void highbd_average_8(__m128i *output_0, __m128i *output_1, + const __m128i *sum_0_u32, + const __m128i *sum_1_u32, + const __m128i *mul_constants_0, + const __m128i *mul_constants_1, + const int strength, const int rounding, + const int weight) { + highbd_average_4(output_0, sum_0_u32, mul_constants_0, strength, rounding, + weight); + highbd_average_4(output_1, sum_1_u32, mul_constants_1, strength, rounding, + weight); +} + +// Add 'sum_u32' to 'count'. Multiply by 'pred' and add to 'accumulator.' +static INLINE void highbd_accumulate_and_store_8(const __m128i sum_first_u32, + const __m128i sum_second_u32, + const uint16_t *pred, + uint16_t *count, + uint32_t *accumulator) { + // Cast down to 16-bit ints + const __m128i sum_u16 = _mm_packus_epi32(sum_first_u32, sum_second_u32); + const __m128i zero = _mm_setzero_si128(); + + __m128i pred_u16 = _mm_loadu_si128((const __m128i *)pred); + __m128i count_u16 = _mm_loadu_si128((const __m128i *)count); + + __m128i pred_0_u32, pred_1_u32; + __m128i accum_0_u32, accum_1_u32; + + count_u16 = _mm_adds_epu16(count_u16, sum_u16); + _mm_storeu_si128((__m128i *)count, count_u16); + + pred_u16 = _mm_mullo_epi16(sum_u16, pred_u16); + + pred_0_u32 = _mm_cvtepu16_epi32(pred_u16); + pred_1_u32 = _mm_unpackhi_epi16(pred_u16, zero); + + accum_0_u32 = _mm_loadu_si128((const __m128i *)accumulator); + accum_1_u32 = _mm_loadu_si128((const __m128i *)(accumulator + 4)); + + accum_0_u32 = _mm_add_epi32(pred_0_u32, accum_0_u32); + accum_1_u32 = _mm_add_epi32(pred_1_u32, accum_1_u32); + + _mm_storeu_si128((__m128i *)accumulator, accum_0_u32); + _mm_storeu_si128((__m128i *)(accumulator + 4), accum_1_u32); +} + +static INLINE void highbd_read_dist_4(const uint32_t *dist, __m128i *dist_reg) { + *dist_reg = _mm_loadu_si128((const __m128i *)dist); +} + +static INLINE void highbd_read_dist_8(const uint32_t *dist, __m128i *reg_first, + __m128i *reg_second) { + highbd_read_dist_4(dist, reg_first); + highbd_read_dist_4(dist + 4, reg_second); +} + +static INLINE void highbd_read_chroma_dist_row_8( + int ss_x, const uint32_t *u_dist, const uint32_t *v_dist, __m128i *u_first, + __m128i *u_second, __m128i *v_first, __m128i *v_second) { + if (!ss_x) { + // If there is no chroma subsampling in the horizontal direction, then we + // need to load 8 entries from chroma. + highbd_read_dist_8(u_dist, u_first, u_second); + highbd_read_dist_8(v_dist, v_first, v_second); + } else { // ss_x == 1 + // Otherwise, we only need to load 8 entries + __m128i u_reg, v_reg; + + highbd_read_dist_4(u_dist, &u_reg); + + *u_first = _mm_unpacklo_epi32(u_reg, u_reg); + *u_second = _mm_unpackhi_epi32(u_reg, u_reg); + + highbd_read_dist_4(v_dist, &v_reg); + + *v_first = _mm_unpacklo_epi32(v_reg, v_reg); + *v_second = _mm_unpackhi_epi32(v_reg, v_reg); + } +} + +static void vp9_highbd_apply_temporal_filter_luma_8( + const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, + int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, + int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, int use_whole_blk, uint32_t *y_accum, + uint16_t *y_count, const uint32_t *y_dist, const uint32_t *u_dist, + const uint32_t *v_dist, const uint32_t *const *neighbors_first, + const uint32_t *const *neighbors_second, int top_weight, + int bottom_weight) { + const int rounding = (1 << strength) >> 1; + int weight = top_weight; + + __m128i mul_first, mul_second; + + __m128i sum_row_1_first, sum_row_1_second; + __m128i sum_row_2_first, sum_row_2_second; + __m128i sum_row_3_first, sum_row_3_second; + + __m128i u_first, u_second; + __m128i v_first, v_second; + + __m128i sum_row_first; + __m128i sum_row_second; + + // Loop variables + unsigned int h; + + assert(strength >= 4 && strength <= 14 && + "invalid adjusted temporal filter strength"); + assert(block_width == 8); + + (void)block_width; + + // First row + mul_first = _mm_load_si128((const __m128i *)neighbors_first[0]); + mul_second = _mm_load_si128((const __m128i *)neighbors_second[0]); + + // Add luma values + highbd_get_sum_8(y_dist, &sum_row_2_first, &sum_row_2_second); + highbd_get_sum_8(y_dist + DIST_STRIDE, &sum_row_3_first, &sum_row_3_second); + + // We don't need to saturate here because the maximum value is UINT12_MAX ** 2 + // * 9 ~= 2**24 * 9 < 2 ** 28 < INT32_MAX + sum_row_first = _mm_add_epi32(sum_row_2_first, sum_row_3_first); + sum_row_second = _mm_add_epi32(sum_row_2_second, sum_row_3_second); + + // Add chroma values + highbd_read_chroma_dist_row_8(ss_x, u_dist, v_dist, &u_first, &u_second, + &v_first, &v_second); + + // Max value here is 2 ** 24 * (9 + 2), so no saturation is needed + sum_row_first = _mm_add_epi32(sum_row_first, u_first); + sum_row_second = _mm_add_epi32(sum_row_second, u_second); + + sum_row_first = _mm_add_epi32(sum_row_first, v_first); + sum_row_second = _mm_add_epi32(sum_row_second, v_second); + + // Get modifier and store result + highbd_average_8(&sum_row_first, &sum_row_second, &sum_row_first, + &sum_row_second, &mul_first, &mul_second, strength, rounding, + weight); + + highbd_accumulate_and_store_8(sum_row_first, sum_row_second, y_pre, y_count, + y_accum); + + y_src += y_src_stride; + y_pre += y_pre_stride; + y_count += y_pre_stride; + y_accum += y_pre_stride; + y_dist += DIST_STRIDE; + + u_src += uv_src_stride; + u_pre += uv_pre_stride; + u_dist += DIST_STRIDE; + v_src += uv_src_stride; + v_pre += uv_pre_stride; + v_dist += DIST_STRIDE; + + // Then all the rows except the last one + mul_first = _mm_load_si128((const __m128i *)neighbors_first[1]); + mul_second = _mm_load_si128((const __m128i *)neighbors_second[1]); + + for (h = 1; h < block_height - 1; ++h) { + // Move the weight to bottom half + if (!use_whole_blk && h == block_height / 2) { + weight = bottom_weight; + } + // Shift the rows up + sum_row_1_first = sum_row_2_first; + sum_row_1_second = sum_row_2_second; + sum_row_2_first = sum_row_3_first; + sum_row_2_second = sum_row_3_second; + + // Add luma values to the modifier + sum_row_first = _mm_add_epi32(sum_row_1_first, sum_row_2_first); + sum_row_second = _mm_add_epi32(sum_row_1_second, sum_row_2_second); + + highbd_get_sum_8(y_dist + DIST_STRIDE, &sum_row_3_first, &sum_row_3_second); + + sum_row_first = _mm_add_epi32(sum_row_first, sum_row_3_first); + sum_row_second = _mm_add_epi32(sum_row_second, sum_row_3_second); + + // Add chroma values to the modifier + if (ss_y == 0 || h % 2 == 0) { + // Only calculate the new chroma distortion if we are at a pixel that + // corresponds to a new chroma row + highbd_read_chroma_dist_row_8(ss_x, u_dist, v_dist, &u_first, &u_second, + &v_first, &v_second); + + u_src += uv_src_stride; + u_pre += uv_pre_stride; + u_dist += DIST_STRIDE; + v_src += uv_src_stride; + v_pre += uv_pre_stride; + v_dist += DIST_STRIDE; + } + + sum_row_first = _mm_add_epi32(sum_row_first, u_first); + sum_row_second = _mm_add_epi32(sum_row_second, u_second); + sum_row_first = _mm_add_epi32(sum_row_first, v_first); + sum_row_second = _mm_add_epi32(sum_row_second, v_second); + + // Get modifier and store result + highbd_average_8(&sum_row_first, &sum_row_second, &sum_row_first, + &sum_row_second, &mul_first, &mul_second, strength, + rounding, weight); + highbd_accumulate_and_store_8(sum_row_first, sum_row_second, y_pre, y_count, + y_accum); + + y_src += y_src_stride; + y_pre += y_pre_stride; + y_count += y_pre_stride; + y_accum += y_pre_stride; + y_dist += DIST_STRIDE; + } + + // The last row + mul_first = _mm_load_si128((const __m128i *)neighbors_first[0]); + mul_second = _mm_load_si128((const __m128i *)neighbors_second[0]); + + // Shift the rows up + sum_row_1_first = sum_row_2_first; + sum_row_1_second = sum_row_2_second; + sum_row_2_first = sum_row_3_first; + sum_row_2_second = sum_row_3_second; + + // Add luma values to the modifier + sum_row_first = _mm_add_epi32(sum_row_1_first, sum_row_2_first); + sum_row_second = _mm_add_epi32(sum_row_1_second, sum_row_2_second); + + // Add chroma values to the modifier + if (ss_y == 0) { + // Only calculate the new chroma distortion if we are at a pixel that + // corresponds to a new chroma row + highbd_read_chroma_dist_row_8(ss_x, u_dist, v_dist, &u_first, &u_second, + &v_first, &v_second); + } + + sum_row_first = _mm_add_epi32(sum_row_first, u_first); + sum_row_second = _mm_add_epi32(sum_row_second, u_second); + sum_row_first = _mm_add_epi32(sum_row_first, v_first); + sum_row_second = _mm_add_epi32(sum_row_second, v_second); + + // Get modifier and store result + highbd_average_8(&sum_row_first, &sum_row_second, &sum_row_first, + &sum_row_second, &mul_first, &mul_second, strength, rounding, + weight); + highbd_accumulate_and_store_8(sum_row_first, sum_row_second, y_pre, y_count, + y_accum); +} + +// Perform temporal filter for the luma component. +static void vp9_highbd_apply_temporal_filter_luma( + const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, + int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, + int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, const int *blk_fw, int use_whole_blk, + uint32_t *y_accum, uint16_t *y_count, const uint32_t *y_dist, + const uint32_t *u_dist, const uint32_t *v_dist) { + unsigned int blk_col = 0, uv_blk_col = 0; + const unsigned int blk_col_step = 8, uv_blk_col_step = 8 >> ss_x; + const unsigned int mid_width = block_width >> 1, + last_width = block_width - blk_col_step; + int top_weight = blk_fw[0], + bottom_weight = use_whole_blk ? blk_fw[0] : blk_fw[2]; + const uint32_t *const *neighbors_first; + const uint32_t *const *neighbors_second; + + // Left + neighbors_first = HIGHBD_LUMA_LEFT_COLUMN_NEIGHBORS; + neighbors_second = HIGHBD_LUMA_MIDDLE_COLUMN_NEIGHBORS; + vp9_highbd_apply_temporal_filter_luma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, + v_pre + uv_blk_col, uv_pre_stride, blk_col_step, block_height, ss_x, ss_y, + strength, use_whole_blk, y_accum + blk_col, y_count + blk_col, + y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, + neighbors_first, neighbors_second, top_weight, bottom_weight); + + blk_col += blk_col_step; + uv_blk_col += uv_blk_col_step; + + // Middle First + neighbors_first = HIGHBD_LUMA_MIDDLE_COLUMN_NEIGHBORS; + for (; blk_col < mid_width; + blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { + vp9_highbd_apply_temporal_filter_luma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, blk_col_step, + block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, + y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, + v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, + bottom_weight); + } + + if (!use_whole_blk) { + top_weight = blk_fw[1]; + bottom_weight = blk_fw[3]; + } + + // Middle Second + for (; blk_col < last_width; + blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { + vp9_highbd_apply_temporal_filter_luma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, blk_col_step, + block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, + y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, + v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, + bottom_weight); + } + + // Right + neighbors_second = HIGHBD_LUMA_RIGHT_COLUMN_NEIGHBORS; + vp9_highbd_apply_temporal_filter_luma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, + v_pre + uv_blk_col, uv_pre_stride, blk_col_step, block_height, ss_x, ss_y, + strength, use_whole_blk, y_accum + blk_col, y_count + blk_col, + y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, + neighbors_first, neighbors_second, top_weight, bottom_weight); +} + +// Add a row of luma distortion that corresponds to 8 chroma mods. If we are +// subsampling in x direction, then we have 16 lumas, else we have 8. +static INLINE void highbd_add_luma_dist_to_8_chroma_mod( + const uint32_t *y_dist, int ss_x, int ss_y, __m128i *u_mod_fst, + __m128i *u_mod_snd, __m128i *v_mod_fst, __m128i *v_mod_snd) { + __m128i y_reg_fst, y_reg_snd; + if (!ss_x) { + highbd_read_dist_8(y_dist, &y_reg_fst, &y_reg_snd); + if (ss_y == 1) { + __m128i y_tmp_fst, y_tmp_snd; + highbd_read_dist_8(y_dist + DIST_STRIDE, &y_tmp_fst, &y_tmp_snd); + y_reg_fst = _mm_add_epi32(y_reg_fst, y_tmp_fst); + y_reg_snd = _mm_add_epi32(y_reg_snd, y_tmp_snd); + } + } else { + // Temporary + __m128i y_fst, y_snd; + + // First 8 + highbd_read_dist_8(y_dist, &y_fst, &y_snd); + if (ss_y == 1) { + __m128i y_tmp_fst, y_tmp_snd; + highbd_read_dist_8(y_dist + DIST_STRIDE, &y_tmp_fst, &y_tmp_snd); + + y_fst = _mm_add_epi32(y_fst, y_tmp_fst); + y_snd = _mm_add_epi32(y_snd, y_tmp_snd); + } + + y_reg_fst = _mm_hadd_epi32(y_fst, y_snd); + + // Second 8 + highbd_read_dist_8(y_dist + 8, &y_fst, &y_snd); + if (ss_y == 1) { + __m128i y_tmp_fst, y_tmp_snd; + highbd_read_dist_8(y_dist + 8 + DIST_STRIDE, &y_tmp_fst, &y_tmp_snd); + + y_fst = _mm_add_epi32(y_fst, y_tmp_fst); + y_snd = _mm_add_epi32(y_snd, y_tmp_snd); + } + + y_reg_snd = _mm_hadd_epi32(y_fst, y_snd); + } + + *u_mod_fst = _mm_add_epi32(*u_mod_fst, y_reg_fst); + *u_mod_snd = _mm_add_epi32(*u_mod_snd, y_reg_snd); + *v_mod_fst = _mm_add_epi32(*v_mod_fst, y_reg_fst); + *v_mod_snd = _mm_add_epi32(*v_mod_snd, y_reg_snd); +} + +// Apply temporal filter to the chroma components. This performs temporal +// filtering on a chroma block of 8 X uv_height. If blk_fw is not NULL, use +// blk_fw as an array of size 4 for the weights for each of the 4 subblocks, +// else use top_weight for top half, and bottom weight for bottom half. +static void vp9_highbd_apply_temporal_filter_chroma_8( + const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, + int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, + int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, + int uv_pre_stride, unsigned int uv_block_width, + unsigned int uv_block_height, int ss_x, int ss_y, int strength, + uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count, + const uint32_t *y_dist, const uint32_t *u_dist, const uint32_t *v_dist, + const uint32_t *const *neighbors_fst, const uint32_t *const *neighbors_snd, + int top_weight, int bottom_weight, const int *blk_fw) { + const int rounding = (1 << strength) >> 1; + int weight = top_weight; + + __m128i mul_fst, mul_snd; + + __m128i u_sum_row_1_fst, u_sum_row_2_fst, u_sum_row_3_fst; + __m128i v_sum_row_1_fst, v_sum_row_2_fst, v_sum_row_3_fst; + __m128i u_sum_row_1_snd, u_sum_row_2_snd, u_sum_row_3_snd; + __m128i v_sum_row_1_snd, v_sum_row_2_snd, v_sum_row_3_snd; + + __m128i u_sum_row_fst, v_sum_row_fst; + __m128i u_sum_row_snd, v_sum_row_snd; + + // Loop variable + unsigned int h; + + (void)uv_block_width; + + // First row + mul_fst = _mm_load_si128((const __m128i *)neighbors_fst[0]); + mul_snd = _mm_load_si128((const __m128i *)neighbors_snd[0]); + + // Add chroma values + highbd_get_sum_8(u_dist, &u_sum_row_2_fst, &u_sum_row_2_snd); + highbd_get_sum_8(u_dist + DIST_STRIDE, &u_sum_row_3_fst, &u_sum_row_3_snd); + + u_sum_row_fst = _mm_add_epi32(u_sum_row_2_fst, u_sum_row_3_fst); + u_sum_row_snd = _mm_add_epi32(u_sum_row_2_snd, u_sum_row_3_snd); + + highbd_get_sum_8(v_dist, &v_sum_row_2_fst, &v_sum_row_2_snd); + highbd_get_sum_8(v_dist + DIST_STRIDE, &v_sum_row_3_fst, &v_sum_row_3_snd); + + v_sum_row_fst = _mm_add_epi32(v_sum_row_2_fst, v_sum_row_3_fst); + v_sum_row_snd = _mm_add_epi32(v_sum_row_2_snd, v_sum_row_3_snd); + + // Add luma values + highbd_add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row_fst, + &u_sum_row_snd, &v_sum_row_fst, + &v_sum_row_snd); + + // Get modifier and store result + if (blk_fw) { + highbd_average_4(&u_sum_row_fst, &u_sum_row_fst, &mul_fst, strength, + rounding, blk_fw[0]); + highbd_average_4(&u_sum_row_snd, &u_sum_row_snd, &mul_snd, strength, + rounding, blk_fw[1]); + + highbd_average_4(&v_sum_row_fst, &v_sum_row_fst, &mul_fst, strength, + rounding, blk_fw[0]); + highbd_average_4(&v_sum_row_snd, &v_sum_row_snd, &mul_snd, strength, + rounding, blk_fw[1]); + + } else { + highbd_average_8(&u_sum_row_fst, &u_sum_row_snd, &u_sum_row_fst, + &u_sum_row_snd, &mul_fst, &mul_snd, strength, rounding, + weight); + highbd_average_8(&v_sum_row_fst, &v_sum_row_snd, &v_sum_row_fst, + &v_sum_row_snd, &mul_fst, &mul_snd, strength, rounding, + weight); + } + highbd_accumulate_and_store_8(u_sum_row_fst, u_sum_row_snd, u_pre, u_count, + u_accum); + highbd_accumulate_and_store_8(v_sum_row_fst, v_sum_row_snd, v_pre, v_count, + v_accum); + + u_src += uv_src_stride; + u_pre += uv_pre_stride; + u_dist += DIST_STRIDE; + v_src += uv_src_stride; + v_pre += uv_pre_stride; + v_dist += DIST_STRIDE; + u_count += uv_pre_stride; + u_accum += uv_pre_stride; + v_count += uv_pre_stride; + v_accum += uv_pre_stride; + + y_src += y_src_stride * (1 + ss_y); + y_pre += y_pre_stride * (1 + ss_y); + y_dist += DIST_STRIDE * (1 + ss_y); + + // Then all the rows except the last one + mul_fst = _mm_load_si128((const __m128i *)neighbors_fst[1]); + mul_snd = _mm_load_si128((const __m128i *)neighbors_snd[1]); + + for (h = 1; h < uv_block_height - 1; ++h) { + // Move the weight pointer to the bottom half of the blocks + if (h == uv_block_height / 2) { + if (blk_fw) { + blk_fw += 2; + } else { + weight = bottom_weight; + } + } + + // Shift the rows up + u_sum_row_1_fst = u_sum_row_2_fst; + u_sum_row_2_fst = u_sum_row_3_fst; + u_sum_row_1_snd = u_sum_row_2_snd; + u_sum_row_2_snd = u_sum_row_3_snd; + + v_sum_row_1_fst = v_sum_row_2_fst; + v_sum_row_2_fst = v_sum_row_3_fst; + v_sum_row_1_snd = v_sum_row_2_snd; + v_sum_row_2_snd = v_sum_row_3_snd; + + // Add chroma values + u_sum_row_fst = _mm_add_epi32(u_sum_row_1_fst, u_sum_row_2_fst); + u_sum_row_snd = _mm_add_epi32(u_sum_row_1_snd, u_sum_row_2_snd); + highbd_get_sum_8(u_dist + DIST_STRIDE, &u_sum_row_3_fst, &u_sum_row_3_snd); + u_sum_row_fst = _mm_add_epi32(u_sum_row_fst, u_sum_row_3_fst); + u_sum_row_snd = _mm_add_epi32(u_sum_row_snd, u_sum_row_3_snd); + + v_sum_row_fst = _mm_add_epi32(v_sum_row_1_fst, v_sum_row_2_fst); + v_sum_row_snd = _mm_add_epi32(v_sum_row_1_snd, v_sum_row_2_snd); + highbd_get_sum_8(v_dist + DIST_STRIDE, &v_sum_row_3_fst, &v_sum_row_3_snd); + v_sum_row_fst = _mm_add_epi32(v_sum_row_fst, v_sum_row_3_fst); + v_sum_row_snd = _mm_add_epi32(v_sum_row_snd, v_sum_row_3_snd); + + // Add luma values + highbd_add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row_fst, + &u_sum_row_snd, &v_sum_row_fst, + &v_sum_row_snd); + + // Get modifier and store result + if (blk_fw) { + highbd_average_4(&u_sum_row_fst, &u_sum_row_fst, &mul_fst, strength, + rounding, blk_fw[0]); + highbd_average_4(&u_sum_row_snd, &u_sum_row_snd, &mul_snd, strength, + rounding, blk_fw[1]); + + highbd_average_4(&v_sum_row_fst, &v_sum_row_fst, &mul_fst, strength, + rounding, blk_fw[0]); + highbd_average_4(&v_sum_row_snd, &v_sum_row_snd, &mul_snd, strength, + rounding, blk_fw[1]); + + } else { + highbd_average_8(&u_sum_row_fst, &u_sum_row_snd, &u_sum_row_fst, + &u_sum_row_snd, &mul_fst, &mul_snd, strength, rounding, + weight); + highbd_average_8(&v_sum_row_fst, &v_sum_row_snd, &v_sum_row_fst, + &v_sum_row_snd, &mul_fst, &mul_snd, strength, rounding, + weight); + } + + highbd_accumulate_and_store_8(u_sum_row_fst, u_sum_row_snd, u_pre, u_count, + u_accum); + highbd_accumulate_and_store_8(v_sum_row_fst, v_sum_row_snd, v_pre, v_count, + v_accum); + + u_src += uv_src_stride; + u_pre += uv_pre_stride; + u_dist += DIST_STRIDE; + v_src += uv_src_stride; + v_pre += uv_pre_stride; + v_dist += DIST_STRIDE; + u_count += uv_pre_stride; + u_accum += uv_pre_stride; + v_count += uv_pre_stride; + v_accum += uv_pre_stride; + + y_src += y_src_stride * (1 + ss_y); + y_pre += y_pre_stride * (1 + ss_y); + y_dist += DIST_STRIDE * (1 + ss_y); + } + + // The last row + mul_fst = _mm_load_si128((const __m128i *)neighbors_fst[0]); + mul_snd = _mm_load_si128((const __m128i *)neighbors_snd[0]); + + // Shift the rows up + u_sum_row_1_fst = u_sum_row_2_fst; + u_sum_row_2_fst = u_sum_row_3_fst; + u_sum_row_1_snd = u_sum_row_2_snd; + u_sum_row_2_snd = u_sum_row_3_snd; + + v_sum_row_1_fst = v_sum_row_2_fst; + v_sum_row_2_fst = v_sum_row_3_fst; + v_sum_row_1_snd = v_sum_row_2_snd; + v_sum_row_2_snd = v_sum_row_3_snd; + + // Add chroma values + u_sum_row_fst = _mm_add_epi32(u_sum_row_1_fst, u_sum_row_2_fst); + v_sum_row_fst = _mm_add_epi32(v_sum_row_1_fst, v_sum_row_2_fst); + u_sum_row_snd = _mm_add_epi32(u_sum_row_1_snd, u_sum_row_2_snd); + v_sum_row_snd = _mm_add_epi32(v_sum_row_1_snd, v_sum_row_2_snd); + + // Add luma values + highbd_add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row_fst, + &u_sum_row_snd, &v_sum_row_fst, + &v_sum_row_snd); + + // Get modifier and store result + if (blk_fw) { + highbd_average_4(&u_sum_row_fst, &u_sum_row_fst, &mul_fst, strength, + rounding, blk_fw[0]); + highbd_average_4(&u_sum_row_snd, &u_sum_row_snd, &mul_snd, strength, + rounding, blk_fw[1]); + + highbd_average_4(&v_sum_row_fst, &v_sum_row_fst, &mul_fst, strength, + rounding, blk_fw[0]); + highbd_average_4(&v_sum_row_snd, &v_sum_row_snd, &mul_snd, strength, + rounding, blk_fw[1]); + + } else { + highbd_average_8(&u_sum_row_fst, &u_sum_row_snd, &u_sum_row_fst, + &u_sum_row_snd, &mul_fst, &mul_snd, strength, rounding, + weight); + highbd_average_8(&v_sum_row_fst, &v_sum_row_snd, &v_sum_row_fst, + &v_sum_row_snd, &mul_fst, &mul_snd, strength, rounding, + weight); + } + + highbd_accumulate_and_store_8(u_sum_row_fst, u_sum_row_snd, u_pre, u_count, + u_accum); + highbd_accumulate_and_store_8(v_sum_row_fst, v_sum_row_snd, v_pre, v_count, + v_accum); +} + +// Perform temporal filter for the chroma components. +static void vp9_highbd_apply_temporal_filter_chroma( + const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, + int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, + int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, const int *blk_fw, int use_whole_blk, + uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count, + const uint32_t *y_dist, const uint32_t *u_dist, const uint32_t *v_dist) { + const unsigned int uv_width = block_width >> ss_x, + uv_height = block_height >> ss_y; + + unsigned int blk_col = 0, uv_blk_col = 0; + const unsigned int uv_blk_col_step = 8, blk_col_step = 8 << ss_x; + const unsigned int uv_mid_width = uv_width >> 1, + uv_last_width = uv_width - uv_blk_col_step; + int top_weight = blk_fw[0], + bottom_weight = use_whole_blk ? blk_fw[0] : blk_fw[2]; + const uint32_t *const *neighbors_fst; + const uint32_t *const *neighbors_snd; + + if (uv_width == 8) { + // Special Case: We are subsampling in x direction on a 16x16 block. Since + // we are operating on a row of 8 chroma pixels, we can't use the usual + // left-middle-right pattern. + assert(ss_x); + + if (ss_y) { + neighbors_fst = HIGHBD_CHROMA_DOUBLE_SS_LEFT_COLUMN_NEIGHBORS; + neighbors_snd = HIGHBD_CHROMA_DOUBLE_SS_RIGHT_COLUMN_NEIGHBORS; + } else { + neighbors_fst = HIGHBD_CHROMA_SINGLE_SS_LEFT_COLUMN_NEIGHBORS; + neighbors_snd = HIGHBD_CHROMA_SINGLE_SS_RIGHT_COLUMN_NEIGHBORS; + } + + if (use_whole_blk) { + vp9_highbd_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, + uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, + u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, + y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, + neighbors_fst, neighbors_snd, top_weight, bottom_weight, NULL); + } else { + vp9_highbd_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, + uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, + u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, + y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, + neighbors_fst, neighbors_snd, 0, 0, blk_fw); + } + + return; + } + + // Left + if (ss_x && ss_y) { + neighbors_fst = HIGHBD_CHROMA_DOUBLE_SS_LEFT_COLUMN_NEIGHBORS; + neighbors_snd = HIGHBD_CHROMA_DOUBLE_SS_MIDDLE_COLUMN_NEIGHBORS; + } else if (ss_x || ss_y) { + neighbors_fst = HIGHBD_CHROMA_SINGLE_SS_LEFT_COLUMN_NEIGHBORS; + neighbors_snd = HIGHBD_CHROMA_SINGLE_SS_MIDDLE_COLUMN_NEIGHBORS; + } else { + neighbors_fst = HIGHBD_CHROMA_NO_SS_LEFT_COLUMN_NEIGHBORS; + neighbors_snd = HIGHBD_CHROMA_NO_SS_MIDDLE_COLUMN_NEIGHBORS; + } + + vp9_highbd_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, + v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, + strength, u_accum + uv_blk_col, u_count + uv_blk_col, + v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, + u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_fst, neighbors_snd, + top_weight, bottom_weight, NULL); + + blk_col += blk_col_step; + uv_blk_col += uv_blk_col_step; + + // Middle First + if (ss_x && ss_y) { + neighbors_fst = HIGHBD_CHROMA_DOUBLE_SS_MIDDLE_COLUMN_NEIGHBORS; + } else if (ss_x || ss_y) { + neighbors_fst = HIGHBD_CHROMA_SINGLE_SS_MIDDLE_COLUMN_NEIGHBORS; + } else { + neighbors_fst = HIGHBD_CHROMA_NO_SS_MIDDLE_COLUMN_NEIGHBORS; + } + + for (; uv_blk_col < uv_mid_width; + blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { + vp9_highbd_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, + uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, + u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, + y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, + neighbors_fst, neighbors_snd, top_weight, bottom_weight, NULL); + } + + if (!use_whole_blk) { + top_weight = blk_fw[1]; + bottom_weight = blk_fw[3]; + } + + // Middle Second + for (; uv_blk_col < uv_last_width; + blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { + vp9_highbd_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, + uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, + u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, + y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, + neighbors_fst, neighbors_snd, top_weight, bottom_weight, NULL); + } + + // Right + if (ss_x && ss_y) { + neighbors_snd = HIGHBD_CHROMA_DOUBLE_SS_RIGHT_COLUMN_NEIGHBORS; + } else if (ss_x || ss_y) { + neighbors_snd = HIGHBD_CHROMA_SINGLE_SS_RIGHT_COLUMN_NEIGHBORS; + } else { + neighbors_snd = HIGHBD_CHROMA_NO_SS_RIGHT_COLUMN_NEIGHBORS; + } + + vp9_highbd_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, + v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, + strength, u_accum + uv_blk_col, u_count + uv_blk_col, + v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, + u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_fst, neighbors_snd, + top_weight, bottom_weight, NULL); +} + +void vp9_highbd_apply_temporal_filter_sse4_1( + const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, + int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, + int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, const int *const blk_fw, + int use_whole_blk, uint32_t *y_accum, uint16_t *y_count, uint32_t *u_accum, + uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count) { + const unsigned int chroma_height = block_height >> ss_y, + chroma_width = block_width >> ss_x; + + DECLARE_ALIGNED(16, uint32_t, y_dist[BH * DIST_STRIDE]) = { 0 }; + DECLARE_ALIGNED(16, uint32_t, u_dist[BH * DIST_STRIDE]) = { 0 }; + DECLARE_ALIGNED(16, uint32_t, v_dist[BH * DIST_STRIDE]) = { 0 }; + + uint32_t *y_dist_ptr = y_dist + 1, *u_dist_ptr = u_dist + 1, + *v_dist_ptr = v_dist + 1; + const uint16_t *y_src_ptr = y_src, *u_src_ptr = u_src, *v_src_ptr = v_src; + const uint16_t *y_pre_ptr = y_pre, *u_pre_ptr = u_pre, *v_pre_ptr = v_pre; + + // Loop variables + unsigned int row, blk_col; + + assert(block_width <= BW && "block width too large"); + assert(block_height <= BH && "block height too large"); + assert(block_width % 16 == 0 && "block width must be multiple of 16"); + assert(block_height % 2 == 0 && "block height must be even"); + assert((ss_x == 0 || ss_x == 1) && (ss_y == 0 || ss_y == 1) && + "invalid chroma subsampling"); + assert(strength >= 4 && strength <= 14 && + "invalid adjusted temporal filter strength"); + assert(blk_fw[0] >= 0 && "filter weight must be positive"); + assert( + (use_whole_blk || (blk_fw[1] >= 0 && blk_fw[2] >= 0 && blk_fw[3] >= 0)) && + "subblock filter weight must be positive"); + assert(blk_fw[0] <= 2 && "sublock filter weight must be less than 2"); + assert( + (use_whole_blk || (blk_fw[1] <= 2 && blk_fw[2] <= 2 && blk_fw[3] <= 2)) && + "subblock filter weight must be less than 2"); + + // Precompute the difference squared + for (row = 0; row < block_height; row++) { + for (blk_col = 0; blk_col < block_width; blk_col += 8) { + highbd_store_dist_8(y_src_ptr + blk_col, y_pre_ptr + blk_col, + y_dist_ptr + blk_col); + } + y_src_ptr += y_src_stride; + y_pre_ptr += y_pre_stride; + y_dist_ptr += DIST_STRIDE; + } + + for (row = 0; row < chroma_height; row++) { + for (blk_col = 0; blk_col < chroma_width; blk_col += 8) { + highbd_store_dist_8(u_src_ptr + blk_col, u_pre_ptr + blk_col, + u_dist_ptr + blk_col); + highbd_store_dist_8(v_src_ptr + blk_col, v_pre_ptr + blk_col, + v_dist_ptr + blk_col); + } + + u_src_ptr += uv_src_stride; + u_pre_ptr += uv_pre_stride; + u_dist_ptr += DIST_STRIDE; + v_src_ptr += uv_src_stride; + v_pre_ptr += uv_pre_stride; + v_dist_ptr += DIST_STRIDE; + } + + y_dist_ptr = y_dist + 1; + u_dist_ptr = u_dist + 1; + v_dist_ptr = v_dist + 1; + + vp9_highbd_apply_temporal_filter_luma( + y_src, y_src_stride, y_pre, y_pre_stride, u_src, v_src, uv_src_stride, + u_pre, v_pre, uv_pre_stride, block_width, block_height, ss_x, ss_y, + strength, blk_fw, use_whole_blk, y_accum, y_count, y_dist_ptr, u_dist_ptr, + v_dist_ptr); + + vp9_highbd_apply_temporal_filter_chroma( + y_src, y_src_stride, y_pre, y_pre_stride, u_src, v_src, uv_src_stride, + u_pre, v_pre, uv_pre_stride, block_width, block_height, ss_x, ss_y, + strength, blk_fw, use_whole_blk, u_accum, u_count, v_accum, v_count, + y_dist_ptr, u_dist_ptr, v_dist_ptr); +} diff --git a/media/libvpx/libvpx/vp9/encoder/x86/temporal_filter_constants.h b/media/libvpx/libvpx/vp9/encoder/x86/temporal_filter_constants.h new file mode 100644 index 000000000000..7dcedda192a9 --- /dev/null +++ b/media/libvpx/libvpx/vp9/encoder/x86/temporal_filter_constants.h @@ -0,0 +1,410 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VP9_ENCODER_X86_TEMPORAL_FILTER_CONSTANTS_H_ +#define VPX_VP9_ENCODER_X86_TEMPORAL_FILTER_CONSTANTS_H_ +#include "./vpx_config.h" + +// Division using multiplication and shifting. The C implementation does: +// modifier *= 3; +// modifier /= index; +// where 'modifier' is a set of summed values and 'index' is the number of +// summed values. +// +// This equation works out to (m * 3) / i which reduces to: +// m * 3/4 +// m * 1/2 +// m * 1/3 +// +// By pairing the multiply with a down shift by 16 (_mm_mulhi_epu16): +// m * C / 65536 +// we can create a C to replicate the division. +// +// m * 49152 / 65536 = m * 3/4 +// m * 32758 / 65536 = m * 1/2 +// m * 21846 / 65536 = m * 0.3333 +// +// These are loaded using an instruction expecting int16_t values but are used +// with _mm_mulhi_epu16(), which treats them as unsigned. +#define NEIGHBOR_CONSTANT_4 (int16_t)49152 +#define NEIGHBOR_CONSTANT_5 (int16_t)39322 +#define NEIGHBOR_CONSTANT_6 (int16_t)32768 +#define NEIGHBOR_CONSTANT_7 (int16_t)28087 +#define NEIGHBOR_CONSTANT_8 (int16_t)24576 +#define NEIGHBOR_CONSTANT_9 (int16_t)21846 +#define NEIGHBOR_CONSTANT_10 (int16_t)19661 +#define NEIGHBOR_CONSTANT_11 (int16_t)17874 +#define NEIGHBOR_CONSTANT_13 (int16_t)15124 + +DECLARE_ALIGNED(16, static const int16_t, LEFT_CORNER_NEIGHBORS_PLUS_1[8]) = { + NEIGHBOR_CONSTANT_5, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7 +}; + +DECLARE_ALIGNED(16, static const int16_t, RIGHT_CORNER_NEIGHBORS_PLUS_1[8]) = { + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_5 +}; + +DECLARE_ALIGNED(16, static const int16_t, LEFT_EDGE_NEIGHBORS_PLUS_1[8]) = { + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const int16_t, RIGHT_EDGE_NEIGHBORS_PLUS_1[8]) = { + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_7 +}; + +DECLARE_ALIGNED(16, static const int16_t, MIDDLE_EDGE_NEIGHBORS_PLUS_1[8]) = { + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7 +}; + +DECLARE_ALIGNED(16, static const int16_t, MIDDLE_CENTER_NEIGHBORS_PLUS_1[8]) = { + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const int16_t, LEFT_CORNER_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const int16_t, RIGHT_CORNER_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_6 +}; + +DECLARE_ALIGNED(16, static const int16_t, LEFT_EDGE_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11 +}; + +DECLARE_ALIGNED(16, static const int16_t, RIGHT_EDGE_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const int16_t, MIDDLE_EDGE_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const int16_t, MIDDLE_CENTER_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11 +}; + +DECLARE_ALIGNED(16, static const int16_t, TWO_CORNER_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_6 +}; + +DECLARE_ALIGNED(16, static const int16_t, TWO_EDGE_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const int16_t, LEFT_CORNER_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const int16_t, RIGHT_CORNER_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const int16_t, LEFT_EDGE_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13 +}; + +DECLARE_ALIGNED(16, static const int16_t, RIGHT_EDGE_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const int16_t, MIDDLE_EDGE_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const int16_t, MIDDLE_CENTER_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13 +}; + +DECLARE_ALIGNED(16, static const int16_t, TWO_CORNER_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const int16_t, TWO_EDGE_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_10 +}; + +static const int16_t *const LUMA_LEFT_COLUMN_NEIGHBORS[2] = { + LEFT_CORNER_NEIGHBORS_PLUS_2, LEFT_EDGE_NEIGHBORS_PLUS_2 +}; + +static const int16_t *const LUMA_MIDDLE_COLUMN_NEIGHBORS[2] = { + MIDDLE_EDGE_NEIGHBORS_PLUS_2, MIDDLE_CENTER_NEIGHBORS_PLUS_2 +}; + +static const int16_t *const LUMA_RIGHT_COLUMN_NEIGHBORS[2] = { + RIGHT_CORNER_NEIGHBORS_PLUS_2, RIGHT_EDGE_NEIGHBORS_PLUS_2 +}; + +static const int16_t *const CHROMA_NO_SS_LEFT_COLUMN_NEIGHBORS[2] = { + LEFT_CORNER_NEIGHBORS_PLUS_1, LEFT_EDGE_NEIGHBORS_PLUS_1 +}; + +static const int16_t *const CHROMA_NO_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { + MIDDLE_EDGE_NEIGHBORS_PLUS_1, MIDDLE_CENTER_NEIGHBORS_PLUS_1 +}; + +static const int16_t *const CHROMA_NO_SS_RIGHT_COLUMN_NEIGHBORS[2] = { + RIGHT_CORNER_NEIGHBORS_PLUS_1, RIGHT_EDGE_NEIGHBORS_PLUS_1 +}; + +static const int16_t *const CHROMA_SINGLE_SS_LEFT_COLUMN_NEIGHBORS[2] = { + LEFT_CORNER_NEIGHBORS_PLUS_2, LEFT_EDGE_NEIGHBORS_PLUS_2 +}; + +static const int16_t *const CHROMA_SINGLE_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { + MIDDLE_EDGE_NEIGHBORS_PLUS_2, MIDDLE_CENTER_NEIGHBORS_PLUS_2 +}; + +static const int16_t *const CHROMA_SINGLE_SS_RIGHT_COLUMN_NEIGHBORS[2] = { + RIGHT_CORNER_NEIGHBORS_PLUS_2, RIGHT_EDGE_NEIGHBORS_PLUS_2 +}; + +static const int16_t *const CHROMA_SINGLE_SS_SINGLE_COLUMN_NEIGHBORS[2] = { + TWO_CORNER_NEIGHBORS_PLUS_2, TWO_EDGE_NEIGHBORS_PLUS_2 +}; + +static const int16_t *const CHROMA_DOUBLE_SS_LEFT_COLUMN_NEIGHBORS[2] = { + LEFT_CORNER_NEIGHBORS_PLUS_4, LEFT_EDGE_NEIGHBORS_PLUS_4 +}; + +static const int16_t *const CHROMA_DOUBLE_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { + MIDDLE_EDGE_NEIGHBORS_PLUS_4, MIDDLE_CENTER_NEIGHBORS_PLUS_4 +}; + +static const int16_t *const CHROMA_DOUBLE_SS_RIGHT_COLUMN_NEIGHBORS[2] = { + RIGHT_CORNER_NEIGHBORS_PLUS_4, RIGHT_EDGE_NEIGHBORS_PLUS_4 +}; + +static const int16_t *const CHROMA_DOUBLE_SS_SINGLE_COLUMN_NEIGHBORS[2] = { + TWO_CORNER_NEIGHBORS_PLUS_4, TWO_EDGE_NEIGHBORS_PLUS_4 +}; + +#if CONFIG_VP9_HIGHBITDEPTH +#define HIGHBD_NEIGHBOR_CONSTANT_4 (uint32_t)3221225472U +#define HIGHBD_NEIGHBOR_CONSTANT_5 (uint32_t)2576980378U +#define HIGHBD_NEIGHBOR_CONSTANT_6 (uint32_t)2147483648U +#define HIGHBD_NEIGHBOR_CONSTANT_7 (uint32_t)1840700270U +#define HIGHBD_NEIGHBOR_CONSTANT_8 (uint32_t)1610612736U +#define HIGHBD_NEIGHBOR_CONSTANT_9 (uint32_t)1431655766U +#define HIGHBD_NEIGHBOR_CONSTANT_10 (uint32_t)1288490189U +#define HIGHBD_NEIGHBOR_CONSTANT_11 (uint32_t)1171354718U +#define HIGHBD_NEIGHBOR_CONSTANT_13 (uint32_t)991146300U + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_LEFT_CORNER_NEIGHBORS_PLUS_1[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_5, HIGHBD_NEIGHBOR_CONSTANT_7, + HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_7 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_RIGHT_CORNER_NEIGHBORS_PLUS_1[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_7, + HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_5 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_LEFT_EDGE_NEIGHBORS_PLUS_1[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_10, + HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_RIGHT_EDGE_NEIGHBORS_PLUS_1[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10, + HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_7 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_MIDDLE_EDGE_NEIGHBORS_PLUS_1[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_7, + HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_7 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_MIDDLE_CENTER_NEIGHBORS_PLUS_1[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10, + HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_LEFT_CORNER_NEIGHBORS_PLUS_2[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_6, HIGHBD_NEIGHBOR_CONSTANT_8, + HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_RIGHT_CORNER_NEIGHBORS_PLUS_2[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_8, + HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_6 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_LEFT_EDGE_NEIGHBORS_PLUS_2[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_11, + HIGHBD_NEIGHBOR_CONSTANT_11, HIGHBD_NEIGHBOR_CONSTANT_11 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_RIGHT_EDGE_NEIGHBORS_PLUS_2[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_11, HIGHBD_NEIGHBOR_CONSTANT_11, + HIGHBD_NEIGHBOR_CONSTANT_11, HIGHBD_NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_MIDDLE_EDGE_NEIGHBORS_PLUS_2[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_8, + HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_MIDDLE_CENTER_NEIGHBORS_PLUS_2[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_11, HIGHBD_NEIGHBOR_CONSTANT_11, + HIGHBD_NEIGHBOR_CONSTANT_11, HIGHBD_NEIGHBOR_CONSTANT_11 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_LEFT_CORNER_NEIGHBORS_PLUS_4[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_10, + HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_RIGHT_CORNER_NEIGHBORS_PLUS_4[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10, + HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_LEFT_EDGE_NEIGHBORS_PLUS_4[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_13, + HIGHBD_NEIGHBOR_CONSTANT_13, HIGHBD_NEIGHBOR_CONSTANT_13 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_RIGHT_EDGE_NEIGHBORS_PLUS_4[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_13, HIGHBD_NEIGHBOR_CONSTANT_13, + HIGHBD_NEIGHBOR_CONSTANT_13, HIGHBD_NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_MIDDLE_EDGE_NEIGHBORS_PLUS_4[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10, + HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const uint32_t, + HIGHBD_MIDDLE_CENTER_NEIGHBORS_PLUS_4[4]) = { + HIGHBD_NEIGHBOR_CONSTANT_13, HIGHBD_NEIGHBOR_CONSTANT_13, + HIGHBD_NEIGHBOR_CONSTANT_13, HIGHBD_NEIGHBOR_CONSTANT_13 +}; + +static const uint32_t *const HIGHBD_LUMA_LEFT_COLUMN_NEIGHBORS[2] = { + HIGHBD_LEFT_CORNER_NEIGHBORS_PLUS_2, HIGHBD_LEFT_EDGE_NEIGHBORS_PLUS_2 +}; + +static const uint32_t *const HIGHBD_LUMA_MIDDLE_COLUMN_NEIGHBORS[2] = { + HIGHBD_MIDDLE_EDGE_NEIGHBORS_PLUS_2, HIGHBD_MIDDLE_CENTER_NEIGHBORS_PLUS_2 +}; + +static const uint32_t *const HIGHBD_LUMA_RIGHT_COLUMN_NEIGHBORS[2] = { + HIGHBD_RIGHT_CORNER_NEIGHBORS_PLUS_2, HIGHBD_RIGHT_EDGE_NEIGHBORS_PLUS_2 +}; + +static const uint32_t *const HIGHBD_CHROMA_NO_SS_LEFT_COLUMN_NEIGHBORS[2] = { + HIGHBD_LEFT_CORNER_NEIGHBORS_PLUS_1, HIGHBD_LEFT_EDGE_NEIGHBORS_PLUS_1 +}; + +static const uint32_t *const HIGHBD_CHROMA_NO_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { + HIGHBD_MIDDLE_EDGE_NEIGHBORS_PLUS_1, HIGHBD_MIDDLE_CENTER_NEIGHBORS_PLUS_1 +}; + +static const uint32_t *const HIGHBD_CHROMA_NO_SS_RIGHT_COLUMN_NEIGHBORS[2] = { + HIGHBD_RIGHT_CORNER_NEIGHBORS_PLUS_1, HIGHBD_RIGHT_EDGE_NEIGHBORS_PLUS_1 +}; + +static const uint32_t + *const HIGHBD_CHROMA_SINGLE_SS_LEFT_COLUMN_NEIGHBORS[2] = { + HIGHBD_LEFT_CORNER_NEIGHBORS_PLUS_2, HIGHBD_LEFT_EDGE_NEIGHBORS_PLUS_2 + }; + +static const uint32_t + *const HIGHBD_CHROMA_SINGLE_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { + HIGHBD_MIDDLE_EDGE_NEIGHBORS_PLUS_2, HIGHBD_MIDDLE_CENTER_NEIGHBORS_PLUS_2 + }; + +static const uint32_t + *const HIGHBD_CHROMA_SINGLE_SS_RIGHT_COLUMN_NEIGHBORS[2] = { + HIGHBD_RIGHT_CORNER_NEIGHBORS_PLUS_2, HIGHBD_RIGHT_EDGE_NEIGHBORS_PLUS_2 + }; + +static const uint32_t + *const HIGHBD_CHROMA_DOUBLE_SS_LEFT_COLUMN_NEIGHBORS[2] = { + HIGHBD_LEFT_CORNER_NEIGHBORS_PLUS_4, HIGHBD_LEFT_EDGE_NEIGHBORS_PLUS_4 + }; + +static const uint32_t + *const HIGHBD_CHROMA_DOUBLE_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { + HIGHBD_MIDDLE_EDGE_NEIGHBORS_PLUS_4, HIGHBD_MIDDLE_CENTER_NEIGHBORS_PLUS_4 + }; + +static const uint32_t + *const HIGHBD_CHROMA_DOUBLE_SS_RIGHT_COLUMN_NEIGHBORS[2] = { + HIGHBD_RIGHT_CORNER_NEIGHBORS_PLUS_4, HIGHBD_RIGHT_EDGE_NEIGHBORS_PLUS_4 + }; +#endif // CONFIG_VP9_HIGHBITDEPTH + +#define DIST_STRIDE ((BW) + 2) + +#endif // VPX_VP9_ENCODER_X86_TEMPORAL_FILTER_CONSTANTS_H_ diff --git a/media/libvpx/libvpx/vp9/encoder/x86/temporal_filter_sse4.c b/media/libvpx/libvpx/vp9/encoder/x86/temporal_filter_sse4.c index 460dab659380..437f49f5a0db 100644 --- a/media/libvpx/libvpx/vp9/encoder/x86/temporal_filter_sse4.c +++ b/media/libvpx/libvpx/vp9/encoder/x86/temporal_filter_sse4.c @@ -14,96 +14,58 @@ #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "vpx/vpx_integer.h" +#include "vp9/encoder/vp9_encoder.h" +#include "vp9/encoder/vp9_temporal_filter.h" +#include "vp9/encoder/x86/temporal_filter_constants.h" -// Division using multiplication and shifting. The C implementation does: -// modifier *= 3; -// modifier /= index; -// where 'modifier' is a set of summed values and 'index' is the number of -// summed values. 'index' may be 4, 6, or 9, representing a block of 9 values -// which may be bound by the edges of the block being filtered. -// -// This equation works out to (m * 3) / i which reduces to: -// m * 3/4 -// m * 1/2 -// m * 1/3 -// -// By pairing the multiply with a down shift by 16 (_mm_mulhi_epu16): -// m * C / 65536 -// we can create a C to replicate the division. -// -// m * 49152 / 65536 = m * 3/4 -// m * 32758 / 65536 = m * 1/2 -// m * 21846 / 65536 = m * 0.3333 -// -// These are loaded using an instruction expecting int16_t values but are used -// with _mm_mulhi_epu16(), which treats them as unsigned. -#define NEIGHBOR_CONSTANT_4 (int16_t)49152 -#define NEIGHBOR_CONSTANT_6 (int16_t)32768 -#define NEIGHBOR_CONSTANT_9 (int16_t)21846 +// Read in 8 pixels from a and b as 8-bit unsigned integers, compute the +// difference squared, and store as unsigned 16-bit integer to dst. +static INLINE void store_dist_8(const uint8_t *a, const uint8_t *b, + uint16_t *dst) { + const __m128i a_reg = _mm_loadl_epi64((const __m128i *)a); + const __m128i b_reg = _mm_loadl_epi64((const __m128i *)b); -// Load values from 'a' and 'b'. Compute the difference squared and sum -// neighboring values such that: -// sum[1] = (a[0]-b[0])^2 + (a[1]-b[1])^2 + (a[2]-b[2])^2 -// Values to the left and right of the row are set to 0. -// The values are returned in sum_0 and sum_1 as *unsigned* 16 bit values. -static void sum_8(const uint8_t *a, const uint8_t *b, __m128i *sum) { - const __m128i a_u8 = _mm_loadl_epi64((const __m128i *)a); - const __m128i b_u8 = _mm_loadl_epi64((const __m128i *)b); + const __m128i a_first = _mm_cvtepu8_epi16(a_reg); + const __m128i b_first = _mm_cvtepu8_epi16(b_reg); - const __m128i a_u16 = _mm_cvtepu8_epi16(a_u8); - const __m128i b_u16 = _mm_cvtepu8_epi16(b_u8); + __m128i dist_first; - const __m128i diff_s16 = _mm_sub_epi16(a_u16, b_u16); - const __m128i diff_sq_u16 = _mm_mullo_epi16(diff_s16, diff_s16); + dist_first = _mm_sub_epi16(a_first, b_first); + dist_first = _mm_mullo_epi16(dist_first, dist_first); - // Shift all the values one place to the left/right so we can efficiently sum - // diff_sq_u16[i - 1] + diff_sq_u16[i] + diff_sq_u16[i + 1]. - const __m128i shift_left = _mm_slli_si128(diff_sq_u16, 2); - const __m128i shift_right = _mm_srli_si128(diff_sq_u16, 2); - - // It becomes necessary to treat the values as unsigned at this point. The - // 255^2 fits in uint16_t but not int16_t. Use saturating adds from this point - // forward since the filter is only applied to smooth small pixel changes. - // Once the value has saturated to uint16_t it is well outside the useful - // range. - __m128i sum_u16 = _mm_adds_epu16(diff_sq_u16, shift_left); - sum_u16 = _mm_adds_epu16(sum_u16, shift_right); - - *sum = sum_u16; + _mm_storeu_si128((__m128i *)dst, dist_first); } -static void sum_16(const uint8_t *a, const uint8_t *b, __m128i *sum_0, - __m128i *sum_1) { +static INLINE void store_dist_16(const uint8_t *a, const uint8_t *b, + uint16_t *dst) { const __m128i zero = _mm_setzero_si128(); - const __m128i a_u8 = _mm_loadu_si128((const __m128i *)a); - const __m128i b_u8 = _mm_loadu_si128((const __m128i *)b); + const __m128i a_reg = _mm_loadu_si128((const __m128i *)a); + const __m128i b_reg = _mm_loadu_si128((const __m128i *)b); - const __m128i a_0_u16 = _mm_cvtepu8_epi16(a_u8); - const __m128i a_1_u16 = _mm_unpackhi_epi8(a_u8, zero); - const __m128i b_0_u16 = _mm_cvtepu8_epi16(b_u8); - const __m128i b_1_u16 = _mm_unpackhi_epi8(b_u8, zero); + const __m128i a_first = _mm_cvtepu8_epi16(a_reg); + const __m128i a_second = _mm_unpackhi_epi8(a_reg, zero); + const __m128i b_first = _mm_cvtepu8_epi16(b_reg); + const __m128i b_second = _mm_unpackhi_epi8(b_reg, zero); - const __m128i diff_0_s16 = _mm_sub_epi16(a_0_u16, b_0_u16); - const __m128i diff_1_s16 = _mm_sub_epi16(a_1_u16, b_1_u16); - const __m128i diff_sq_0_u16 = _mm_mullo_epi16(diff_0_s16, diff_0_s16); - const __m128i diff_sq_1_u16 = _mm_mullo_epi16(diff_1_s16, diff_1_s16); + __m128i dist_first, dist_second; - __m128i shift_left = _mm_slli_si128(diff_sq_0_u16, 2); - // Use _mm_alignr_epi8() to "shift in" diff_sq_u16[8]. - __m128i shift_right = _mm_alignr_epi8(diff_sq_1_u16, diff_sq_0_u16, 2); + dist_first = _mm_sub_epi16(a_first, b_first); + dist_second = _mm_sub_epi16(a_second, b_second); + dist_first = _mm_mullo_epi16(dist_first, dist_first); + dist_second = _mm_mullo_epi16(dist_second, dist_second); - __m128i sum_u16 = _mm_adds_epu16(diff_sq_0_u16, shift_left); - sum_u16 = _mm_adds_epu16(sum_u16, shift_right); + _mm_storeu_si128((__m128i *)dst, dist_first); + _mm_storeu_si128((__m128i *)(dst + 8), dist_second); +} - *sum_0 = sum_u16; +static INLINE void read_dist_8(const uint16_t *dist, __m128i *dist_reg) { + *dist_reg = _mm_loadu_si128((const __m128i *)dist); +} - shift_left = _mm_alignr_epi8(diff_sq_1_u16, diff_sq_0_u16, 14); - shift_right = _mm_srli_si128(diff_sq_1_u16, 2); - - sum_u16 = _mm_adds_epu16(diff_sq_1_u16, shift_left); - sum_u16 = _mm_adds_epu16(sum_u16, shift_right); - - *sum_1 = sum_u16; +static INLINE void read_dist_16(const uint16_t *dist, __m128i *reg_first, + __m128i *reg_second) { + read_dist_8(dist, reg_first); + read_dist_8(dist + 8, reg_second); } // Average the value based on the number of values summed (9 for pixels away @@ -111,17 +73,17 @@ static void sum_16(const uint8_t *a, const uint8_t *b, __m128i *sum_0, // // Add in the rounding factor and shift, clamp to 16, invert and shift. Multiply // by weight. -static __m128i average_8(__m128i sum, const __m128i mul_constants, - const int strength, const int rounding, - const int weight) { +static INLINE __m128i average_8(__m128i sum, const __m128i *mul_constants, + const int strength, const int rounding, + const __m128i *weight) { // _mm_srl_epi16 uses the lower 64 bit value for the shift. const __m128i strength_u128 = _mm_set_epi32(0, 0, 0, strength); const __m128i rounding_u16 = _mm_set1_epi16(rounding); - const __m128i weight_u16 = _mm_set1_epi16(weight); + const __m128i weight_u16 = *weight; const __m128i sixteen = _mm_set1_epi16(16); // modifier * 3 / index; - sum = _mm_mulhi_epu16(sum, mul_constants); + sum = _mm_mulhi_epu16(sum, *mul_constants); sum = _mm_adds_epu16(sum, rounding_u16); sum = _mm_srl_epi16(sum, strength_u128); @@ -136,34 +98,6 @@ static __m128i average_8(__m128i sum, const __m128i mul_constants, return _mm_mullo_epi16(sum, weight_u16); } -static void average_16(__m128i *sum_0_u16, __m128i *sum_1_u16, - const __m128i mul_constants_0, - const __m128i mul_constants_1, const int strength, - const int rounding, const int weight) { - const __m128i strength_u128 = _mm_set_epi32(0, 0, 0, strength); - const __m128i rounding_u16 = _mm_set1_epi16(rounding); - const __m128i weight_u16 = _mm_set1_epi16(weight); - const __m128i sixteen = _mm_set1_epi16(16); - __m128i input_0, input_1; - - input_0 = _mm_mulhi_epu16(*sum_0_u16, mul_constants_0); - input_0 = _mm_adds_epu16(input_0, rounding_u16); - - input_1 = _mm_mulhi_epu16(*sum_1_u16, mul_constants_1); - input_1 = _mm_adds_epu16(input_1, rounding_u16); - - input_0 = _mm_srl_epi16(input_0, strength_u128); - input_1 = _mm_srl_epi16(input_1, strength_u128); - - input_0 = _mm_min_epu16(input_0, sixteen); - input_1 = _mm_min_epu16(input_1, sixteen); - input_0 = _mm_sub_epi16(sixteen, input_0); - input_1 = _mm_sub_epi16(sixteen, input_1); - - *sum_0_u16 = _mm_mullo_epi16(input_0, weight_u16); - *sum_1_u16 = _mm_mullo_epi16(input_1, weight_u16); -} - // Add 'sum_u16' to 'count'. Multiply by 'pred' and add to 'accumulator.' static void accumulate_and_store_8(const __m128i sum_u16, const uint8_t *pred, uint16_t *count, uint32_t *accumulator) { @@ -192,10 +126,10 @@ static void accumulate_and_store_8(const __m128i sum_u16, const uint8_t *pred, _mm_storeu_si128((__m128i *)(accumulator + 4), accum_1_u32); } -static void accumulate_and_store_16(const __m128i sum_0_u16, - const __m128i sum_1_u16, - const uint8_t *pred, uint16_t *count, - uint32_t *accumulator) { +static INLINE void accumulate_and_store_16(const __m128i sum_0_u16, + const __m128i sum_1_u16, + const uint8_t *pred, uint16_t *count, + uint32_t *accumulator) { const __m128i pred_u8 = _mm_loadu_si128((const __m128i *)pred); const __m128i zero = _mm_setzero_si128(); __m128i count_0_u16 = _mm_loadu_si128((const __m128i *)count), @@ -235,142 +169,768 @@ static void accumulate_and_store_16(const __m128i sum_0_u16, _mm_storeu_si128((__m128i *)(accumulator + 12), accum_3_u32); } -void vp9_temporal_filter_apply_sse4_1(const uint8_t *a, unsigned int stride, - const uint8_t *b, unsigned int width, - unsigned int height, int strength, - int weight, uint32_t *accumulator, - uint16_t *count) { +// Read in 8 pixels from y_dist. For each index i, compute y_dist[i-1] + +// y_dist[i] + y_dist[i+1] and store in sum as 16-bit unsigned int. +static INLINE void get_sum_8(const uint16_t *y_dist, __m128i *sum) { + __m128i dist_reg, dist_left, dist_right; + + dist_reg = _mm_loadu_si128((const __m128i *)y_dist); + dist_left = _mm_loadu_si128((const __m128i *)(y_dist - 1)); + dist_right = _mm_loadu_si128((const __m128i *)(y_dist + 1)); + + *sum = _mm_adds_epu16(dist_reg, dist_left); + *sum = _mm_adds_epu16(*sum, dist_right); +} + +// Read in 16 pixels from y_dist. For each index i, compute y_dist[i-1] + +// y_dist[i] + y_dist[i+1]. Store the result for first 8 pixels in sum_first and +// the rest in sum_second. +static INLINE void get_sum_16(const uint16_t *y_dist, __m128i *sum_first, + __m128i *sum_second) { + get_sum_8(y_dist, sum_first); + get_sum_8(y_dist + 8, sum_second); +} + +// Read in a row of chroma values corresponds to a row of 16 luma values. +static INLINE void read_chroma_dist_row_16(int ss_x, const uint16_t *u_dist, + const uint16_t *v_dist, + __m128i *u_first, __m128i *u_second, + __m128i *v_first, + __m128i *v_second) { + if (!ss_x) { + // If there is no chroma subsampling in the horizontal direction, then we + // need to load 16 entries from chroma. + read_dist_16(u_dist, u_first, u_second); + read_dist_16(v_dist, v_first, v_second); + } else { // ss_x == 1 + // Otherwise, we only need to load 8 entries + __m128i u_reg, v_reg; + + read_dist_8(u_dist, &u_reg); + + *u_first = _mm_unpacklo_epi16(u_reg, u_reg); + *u_second = _mm_unpackhi_epi16(u_reg, u_reg); + + read_dist_8(v_dist, &v_reg); + + *v_first = _mm_unpacklo_epi16(v_reg, v_reg); + *v_second = _mm_unpackhi_epi16(v_reg, v_reg); + } +} + +// Horizontal add unsigned 16-bit ints in src and store them as signed 32-bit +// int in dst. +static INLINE void hadd_epu16(__m128i *src, __m128i *dst) { + const __m128i zero = _mm_setzero_si128(); + const __m128i shift_right = _mm_srli_si128(*src, 2); + + const __m128i odd = _mm_blend_epi16(shift_right, zero, 170); + const __m128i even = _mm_blend_epi16(*src, zero, 170); + + *dst = _mm_add_epi32(even, odd); +} + +// Add a row of luma distortion to 8 corresponding chroma mods. +static INLINE void add_luma_dist_to_8_chroma_mod(const uint16_t *y_dist, + int ss_x, int ss_y, + __m128i *u_mod, + __m128i *v_mod) { + __m128i y_reg; + if (!ss_x) { + read_dist_8(y_dist, &y_reg); + if (ss_y == 1) { + __m128i y_tmp; + read_dist_8(y_dist + DIST_STRIDE, &y_tmp); + + y_reg = _mm_adds_epu16(y_reg, y_tmp); + } + } else { + __m128i y_first, y_second; + read_dist_16(y_dist, &y_first, &y_second); + if (ss_y == 1) { + __m128i y_tmp_0, y_tmp_1; + read_dist_16(y_dist + DIST_STRIDE, &y_tmp_0, &y_tmp_1); + + y_first = _mm_adds_epu16(y_first, y_tmp_0); + y_second = _mm_adds_epu16(y_second, y_tmp_1); + } + + hadd_epu16(&y_first, &y_first); + hadd_epu16(&y_second, &y_second); + + y_reg = _mm_packus_epi32(y_first, y_second); + } + + *u_mod = _mm_adds_epu16(*u_mod, y_reg); + *v_mod = _mm_adds_epu16(*v_mod, y_reg); +} + +// Apply temporal filter to the luma components. This performs temporal +// filtering on a luma block of 16 X block_height. Use blk_fw as an array of +// size 4 for the weights for each of the 4 subblocks if blk_fw is not NULL, +// else use top_weight for top half, and bottom weight for bottom half. +static void vp9_apply_temporal_filter_luma_16( + const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, + int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, + int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, int use_whole_blk, uint32_t *y_accum, + uint16_t *y_count, const uint16_t *y_dist, const uint16_t *u_dist, + const uint16_t *v_dist, const int16_t *const *neighbors_first, + const int16_t *const *neighbors_second, int top_weight, int bottom_weight, + const int *blk_fw) { + const int rounding = (1 << strength) >> 1; + __m128i weight_first, weight_second; + + __m128i mul_first, mul_second; + + __m128i sum_row_1_first, sum_row_1_second; + __m128i sum_row_2_first, sum_row_2_second; + __m128i sum_row_3_first, sum_row_3_second; + + __m128i u_first, u_second; + __m128i v_first, v_second; + + __m128i sum_row_first; + __m128i sum_row_second; + + // Loop variables unsigned int h; - const int rounding = strength > 0 ? 1 << (strength - 1) : 0; assert(strength >= 0); assert(strength <= 6); - assert(weight >= 0); - assert(weight <= 2); + assert(block_width == 16); - assert(width == 8 || width == 16); + (void)block_width; - if (width == 8) { - __m128i sum_row_a, sum_row_b, sum_row_c; - __m128i mul_constants = _mm_setr_epi16( - NEIGHBOR_CONSTANT_4, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_4); - - sum_8(a, b, &sum_row_a); - sum_8(a + stride, b + width, &sum_row_b); - sum_row_c = _mm_adds_epu16(sum_row_a, sum_row_b); - sum_row_c = average_8(sum_row_c, mul_constants, strength, rounding, weight); - accumulate_and_store_8(sum_row_c, b, count, accumulator); - - a += stride + stride; - b += width; - count += width; - accumulator += width; - - mul_constants = _mm_setr_epi16(NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_6); - - for (h = 0; h < height - 2; ++h) { - sum_8(a, b + width, &sum_row_c); - sum_row_a = _mm_adds_epu16(sum_row_a, sum_row_b); - sum_row_a = _mm_adds_epu16(sum_row_a, sum_row_c); - sum_row_a = - average_8(sum_row_a, mul_constants, strength, rounding, weight); - accumulate_and_store_8(sum_row_a, b, count, accumulator); - - a += stride; - b += width; - count += width; - accumulator += width; - - sum_row_a = sum_row_b; - sum_row_b = sum_row_c; - } - - mul_constants = _mm_setr_epi16(NEIGHBOR_CONSTANT_4, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_4); - sum_row_a = _mm_adds_epu16(sum_row_a, sum_row_b); - sum_row_a = average_8(sum_row_a, mul_constants, strength, rounding, weight); - accumulate_and_store_8(sum_row_a, b, count, accumulator); - - } else { // width == 16 - __m128i sum_row_a_0, sum_row_a_1; - __m128i sum_row_b_0, sum_row_b_1; - __m128i sum_row_c_0, sum_row_c_1; - __m128i mul_constants_0 = _mm_setr_epi16( - NEIGHBOR_CONSTANT_4, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6), - mul_constants_1 = _mm_setr_epi16( - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_4); - - sum_16(a, b, &sum_row_a_0, &sum_row_a_1); - sum_16(a + stride, b + width, &sum_row_b_0, &sum_row_b_1); - - sum_row_c_0 = _mm_adds_epu16(sum_row_a_0, sum_row_b_0); - sum_row_c_1 = _mm_adds_epu16(sum_row_a_1, sum_row_b_1); - - average_16(&sum_row_c_0, &sum_row_c_1, mul_constants_0, mul_constants_1, - strength, rounding, weight); - accumulate_and_store_16(sum_row_c_0, sum_row_c_1, b, count, accumulator); - - a += stride + stride; - b += width; - count += width; - accumulator += width; - - mul_constants_0 = _mm_setr_epi16(NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9); - mul_constants_1 = _mm_setr_epi16(NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_6); - for (h = 0; h < height - 2; ++h) { - sum_16(a, b + width, &sum_row_c_0, &sum_row_c_1); - - sum_row_a_0 = _mm_adds_epu16(sum_row_a_0, sum_row_b_0); - sum_row_a_0 = _mm_adds_epu16(sum_row_a_0, sum_row_c_0); - sum_row_a_1 = _mm_adds_epu16(sum_row_a_1, sum_row_b_1); - sum_row_a_1 = _mm_adds_epu16(sum_row_a_1, sum_row_c_1); - - average_16(&sum_row_a_0, &sum_row_a_1, mul_constants_0, mul_constants_1, - strength, rounding, weight); - accumulate_and_store_16(sum_row_a_0, sum_row_a_1, b, count, accumulator); - - a += stride; - b += width; - count += width; - accumulator += width; - - sum_row_a_0 = sum_row_b_0; - sum_row_a_1 = sum_row_b_1; - sum_row_b_0 = sum_row_c_0; - sum_row_b_1 = sum_row_c_1; - } - - mul_constants_0 = _mm_setr_epi16(NEIGHBOR_CONSTANT_4, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6); - mul_constants_1 = _mm_setr_epi16(NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_4); - sum_row_c_0 = _mm_adds_epu16(sum_row_a_0, sum_row_b_0); - sum_row_c_1 = _mm_adds_epu16(sum_row_a_1, sum_row_b_1); - - average_16(&sum_row_c_0, &sum_row_c_1, mul_constants_0, mul_constants_1, - strength, rounding, weight); - accumulate_and_store_16(sum_row_c_0, sum_row_c_1, b, count, accumulator); + // Initialize the weights + if (blk_fw) { + weight_first = _mm_set1_epi16(blk_fw[0]); + weight_second = _mm_set1_epi16(blk_fw[1]); + } else { + weight_first = _mm_set1_epi16(top_weight); + weight_second = weight_first; } + + // First row + mul_first = _mm_load_si128((const __m128i *)neighbors_first[0]); + mul_second = _mm_load_si128((const __m128i *)neighbors_second[0]); + + // Add luma values + get_sum_16(y_dist, &sum_row_2_first, &sum_row_2_second); + get_sum_16(y_dist + DIST_STRIDE, &sum_row_3_first, &sum_row_3_second); + + sum_row_first = _mm_adds_epu16(sum_row_2_first, sum_row_3_first); + sum_row_second = _mm_adds_epu16(sum_row_2_second, sum_row_3_second); + + // Add chroma values + read_chroma_dist_row_16(ss_x, u_dist, v_dist, &u_first, &u_second, &v_first, + &v_second); + + sum_row_first = _mm_adds_epu16(sum_row_first, u_first); + sum_row_second = _mm_adds_epu16(sum_row_second, u_second); + + sum_row_first = _mm_adds_epu16(sum_row_first, v_first); + sum_row_second = _mm_adds_epu16(sum_row_second, v_second); + + // Get modifier and store result + sum_row_first = + average_8(sum_row_first, &mul_first, strength, rounding, &weight_first); + sum_row_second = average_8(sum_row_second, &mul_second, strength, rounding, + &weight_second); + accumulate_and_store_16(sum_row_first, sum_row_second, y_pre, y_count, + y_accum); + + y_src += y_src_stride; + y_pre += y_pre_stride; + y_count += y_pre_stride; + y_accum += y_pre_stride; + y_dist += DIST_STRIDE; + + u_src += uv_src_stride; + u_pre += uv_pre_stride; + u_dist += DIST_STRIDE; + v_src += uv_src_stride; + v_pre += uv_pre_stride; + v_dist += DIST_STRIDE; + + // Then all the rows except the last one + mul_first = _mm_load_si128((const __m128i *)neighbors_first[1]); + mul_second = _mm_load_si128((const __m128i *)neighbors_second[1]); + + for (h = 1; h < block_height - 1; ++h) { + // Move the weight to bottom half + if (!use_whole_blk && h == block_height / 2) { + if (blk_fw) { + weight_first = _mm_set1_epi16(blk_fw[2]); + weight_second = _mm_set1_epi16(blk_fw[3]); + } else { + weight_first = _mm_set1_epi16(bottom_weight); + weight_second = weight_first; + } + } + // Shift the rows up + sum_row_1_first = sum_row_2_first; + sum_row_1_second = sum_row_2_second; + sum_row_2_first = sum_row_3_first; + sum_row_2_second = sum_row_3_second; + + // Add luma values to the modifier + sum_row_first = _mm_adds_epu16(sum_row_1_first, sum_row_2_first); + sum_row_second = _mm_adds_epu16(sum_row_1_second, sum_row_2_second); + + get_sum_16(y_dist + DIST_STRIDE, &sum_row_3_first, &sum_row_3_second); + + sum_row_first = _mm_adds_epu16(sum_row_first, sum_row_3_first); + sum_row_second = _mm_adds_epu16(sum_row_second, sum_row_3_second); + + // Add chroma values to the modifier + if (ss_y == 0 || h % 2 == 0) { + // Only calculate the new chroma distortion if we are at a pixel that + // corresponds to a new chroma row + read_chroma_dist_row_16(ss_x, u_dist, v_dist, &u_first, &u_second, + &v_first, &v_second); + + u_src += uv_src_stride; + u_pre += uv_pre_stride; + u_dist += DIST_STRIDE; + v_src += uv_src_stride; + v_pre += uv_pre_stride; + v_dist += DIST_STRIDE; + } + + sum_row_first = _mm_adds_epu16(sum_row_first, u_first); + sum_row_second = _mm_adds_epu16(sum_row_second, u_second); + sum_row_first = _mm_adds_epu16(sum_row_first, v_first); + sum_row_second = _mm_adds_epu16(sum_row_second, v_second); + + // Get modifier and store result + sum_row_first = + average_8(sum_row_first, &mul_first, strength, rounding, &weight_first); + sum_row_second = average_8(sum_row_second, &mul_second, strength, rounding, + &weight_second); + accumulate_and_store_16(sum_row_first, sum_row_second, y_pre, y_count, + y_accum); + + y_src += y_src_stride; + y_pre += y_pre_stride; + y_count += y_pre_stride; + y_accum += y_pre_stride; + y_dist += DIST_STRIDE; + } + + // The last row + mul_first = _mm_load_si128((const __m128i *)neighbors_first[0]); + mul_second = _mm_load_si128((const __m128i *)neighbors_second[0]); + + // Shift the rows up + sum_row_1_first = sum_row_2_first; + sum_row_1_second = sum_row_2_second; + sum_row_2_first = sum_row_3_first; + sum_row_2_second = sum_row_3_second; + + // Add luma values to the modifier + sum_row_first = _mm_adds_epu16(sum_row_1_first, sum_row_2_first); + sum_row_second = _mm_adds_epu16(sum_row_1_second, sum_row_2_second); + + // Add chroma values to the modifier + if (ss_y == 0) { + // Only calculate the new chroma distortion if we are at a pixel that + // corresponds to a new chroma row + read_chroma_dist_row_16(ss_x, u_dist, v_dist, &u_first, &u_second, &v_first, + &v_second); + } + + sum_row_first = _mm_adds_epu16(sum_row_first, u_first); + sum_row_second = _mm_adds_epu16(sum_row_second, u_second); + sum_row_first = _mm_adds_epu16(sum_row_first, v_first); + sum_row_second = _mm_adds_epu16(sum_row_second, v_second); + + // Get modifier and store result + sum_row_first = + average_8(sum_row_first, &mul_first, strength, rounding, &weight_first); + sum_row_second = average_8(sum_row_second, &mul_second, strength, rounding, + &weight_second); + accumulate_and_store_16(sum_row_first, sum_row_second, y_pre, y_count, + y_accum); +} + +// Perform temporal filter for the luma component. +static void vp9_apply_temporal_filter_luma( + const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, + int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, + int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, const int *blk_fw, int use_whole_blk, + uint32_t *y_accum, uint16_t *y_count, const uint16_t *y_dist, + const uint16_t *u_dist, const uint16_t *v_dist) { + unsigned int blk_col = 0, uv_blk_col = 0; + const unsigned int blk_col_step = 16, uv_blk_col_step = 16 >> ss_x; + const unsigned int mid_width = block_width >> 1, + last_width = block_width - blk_col_step; + int top_weight = blk_fw[0], + bottom_weight = use_whole_blk ? blk_fw[0] : blk_fw[2]; + const int16_t *const *neighbors_first; + const int16_t *const *neighbors_second; + + if (block_width == 16) { + // Special Case: The blockwidth is 16 and we are operating on a row of 16 + // chroma pixels. In this case, we can't use the usualy left-midle-right + // pattern. We also don't support splitting now. + neighbors_first = LUMA_LEFT_COLUMN_NEIGHBORS; + neighbors_second = LUMA_RIGHT_COLUMN_NEIGHBORS; + if (use_whole_blk) { + vp9_apply_temporal_filter_luma_16( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, 16, + block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, + y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, + v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, + bottom_weight, NULL); + } else { + vp9_apply_temporal_filter_luma_16( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, 16, + block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, + y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, + v_dist + uv_blk_col, neighbors_first, neighbors_second, 0, 0, blk_fw); + } + + return; + } + + // Left + neighbors_first = LUMA_LEFT_COLUMN_NEIGHBORS; + neighbors_second = LUMA_MIDDLE_COLUMN_NEIGHBORS; + vp9_apply_temporal_filter_luma_16( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, + v_pre + uv_blk_col, uv_pre_stride, 16, block_height, ss_x, ss_y, strength, + use_whole_blk, y_accum + blk_col, y_count + blk_col, y_dist + blk_col, + u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_first, + neighbors_second, top_weight, bottom_weight, NULL); + + blk_col += blk_col_step; + uv_blk_col += uv_blk_col_step; + + // Middle First + neighbors_first = LUMA_MIDDLE_COLUMN_NEIGHBORS; + for (; blk_col < mid_width; + blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { + vp9_apply_temporal_filter_luma_16( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, 16, block_height, + ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, + y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, + v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, + bottom_weight, NULL); + } + + if (!use_whole_blk) { + top_weight = blk_fw[1]; + bottom_weight = blk_fw[3]; + } + + // Middle Second + for (; blk_col < last_width; + blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { + vp9_apply_temporal_filter_luma_16( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, 16, block_height, + ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, + y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, + v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, + bottom_weight, NULL); + } + + // Right + neighbors_second = LUMA_RIGHT_COLUMN_NEIGHBORS; + vp9_apply_temporal_filter_luma_16( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, + v_pre + uv_blk_col, uv_pre_stride, 16, block_height, ss_x, ss_y, strength, + use_whole_blk, y_accum + blk_col, y_count + blk_col, y_dist + blk_col, + u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_first, + neighbors_second, top_weight, bottom_weight, NULL); +} + +// Apply temporal filter to the chroma components. This performs temporal +// filtering on a chroma block of 8 X uv_height. If blk_fw is not NULL, use +// blk_fw as an array of size 4 for the weights for each of the 4 subblocks, +// else use top_weight for top half, and bottom weight for bottom half. +static void vp9_apply_temporal_filter_chroma_8( + const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, + int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, + int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, + int uv_pre_stride, unsigned int uv_block_width, + unsigned int uv_block_height, int ss_x, int ss_y, int strength, + uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count, + const uint16_t *y_dist, const uint16_t *u_dist, const uint16_t *v_dist, + const int16_t *const *neighbors, int top_weight, int bottom_weight, + const int *blk_fw) { + const int rounding = (1 << strength) >> 1; + + __m128i weight; + + __m128i mul; + + __m128i u_sum_row_1, u_sum_row_2, u_sum_row_3; + __m128i v_sum_row_1, v_sum_row_2, v_sum_row_3; + + __m128i u_sum_row, v_sum_row; + + // Loop variable + unsigned int h; + + (void)uv_block_width; + + // Initilize weight + if (blk_fw) { + weight = _mm_setr_epi16(blk_fw[0], blk_fw[0], blk_fw[0], blk_fw[0], + blk_fw[1], blk_fw[1], blk_fw[1], blk_fw[1]); + } else { + weight = _mm_set1_epi16(top_weight); + } + + // First row + mul = _mm_load_si128((const __m128i *)neighbors[0]); + + // Add chroma values + get_sum_8(u_dist, &u_sum_row_2); + get_sum_8(u_dist + DIST_STRIDE, &u_sum_row_3); + + u_sum_row = _mm_adds_epu16(u_sum_row_2, u_sum_row_3); + + get_sum_8(v_dist, &v_sum_row_2); + get_sum_8(v_dist + DIST_STRIDE, &v_sum_row_3); + + v_sum_row = _mm_adds_epu16(v_sum_row_2, v_sum_row_3); + + // Add luma values + add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row, &v_sum_row); + + // Get modifier and store result + u_sum_row = average_8(u_sum_row, &mul, strength, rounding, &weight); + v_sum_row = average_8(v_sum_row, &mul, strength, rounding, &weight); + + accumulate_and_store_8(u_sum_row, u_pre, u_count, u_accum); + accumulate_and_store_8(v_sum_row, v_pre, v_count, v_accum); + + u_src += uv_src_stride; + u_pre += uv_pre_stride; + u_dist += DIST_STRIDE; + v_src += uv_src_stride; + v_pre += uv_pre_stride; + v_dist += DIST_STRIDE; + u_count += uv_pre_stride; + u_accum += uv_pre_stride; + v_count += uv_pre_stride; + v_accum += uv_pre_stride; + + y_src += y_src_stride * (1 + ss_y); + y_pre += y_pre_stride * (1 + ss_y); + y_dist += DIST_STRIDE * (1 + ss_y); + + // Then all the rows except the last one + mul = _mm_load_si128((const __m128i *)neighbors[1]); + + for (h = 1; h < uv_block_height - 1; ++h) { + // Move the weight pointer to the bottom half of the blocks + if (h == uv_block_height / 2) { + if (blk_fw) { + weight = _mm_setr_epi16(blk_fw[2], blk_fw[2], blk_fw[2], blk_fw[2], + blk_fw[3], blk_fw[3], blk_fw[3], blk_fw[3]); + } else { + weight = _mm_set1_epi16(bottom_weight); + } + } + + // Shift the rows up + u_sum_row_1 = u_sum_row_2; + u_sum_row_2 = u_sum_row_3; + + v_sum_row_1 = v_sum_row_2; + v_sum_row_2 = v_sum_row_3; + + // Add chroma values + u_sum_row = _mm_adds_epu16(u_sum_row_1, u_sum_row_2); + get_sum_8(u_dist + DIST_STRIDE, &u_sum_row_3); + u_sum_row = _mm_adds_epu16(u_sum_row, u_sum_row_3); + + v_sum_row = _mm_adds_epu16(v_sum_row_1, v_sum_row_2); + get_sum_8(v_dist + DIST_STRIDE, &v_sum_row_3); + v_sum_row = _mm_adds_epu16(v_sum_row, v_sum_row_3); + + // Add luma values + add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row, &v_sum_row); + + // Get modifier and store result + u_sum_row = average_8(u_sum_row, &mul, strength, rounding, &weight); + v_sum_row = average_8(v_sum_row, &mul, strength, rounding, &weight); + + accumulate_and_store_8(u_sum_row, u_pre, u_count, u_accum); + accumulate_and_store_8(v_sum_row, v_pre, v_count, v_accum); + + u_src += uv_src_stride; + u_pre += uv_pre_stride; + u_dist += DIST_STRIDE; + v_src += uv_src_stride; + v_pre += uv_pre_stride; + v_dist += DIST_STRIDE; + u_count += uv_pre_stride; + u_accum += uv_pre_stride; + v_count += uv_pre_stride; + v_accum += uv_pre_stride; + + y_src += y_src_stride * (1 + ss_y); + y_pre += y_pre_stride * (1 + ss_y); + y_dist += DIST_STRIDE * (1 + ss_y); + } + + // The last row + mul = _mm_load_si128((const __m128i *)neighbors[0]); + + // Shift the rows up + u_sum_row_1 = u_sum_row_2; + u_sum_row_2 = u_sum_row_3; + + v_sum_row_1 = v_sum_row_2; + v_sum_row_2 = v_sum_row_3; + + // Add chroma values + u_sum_row = _mm_adds_epu16(u_sum_row_1, u_sum_row_2); + v_sum_row = _mm_adds_epu16(v_sum_row_1, v_sum_row_2); + + // Add luma values + add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row, &v_sum_row); + + // Get modifier and store result + u_sum_row = average_8(u_sum_row, &mul, strength, rounding, &weight); + v_sum_row = average_8(v_sum_row, &mul, strength, rounding, &weight); + + accumulate_and_store_8(u_sum_row, u_pre, u_count, u_accum); + accumulate_and_store_8(v_sum_row, v_pre, v_count, v_accum); +} + +// Perform temporal filter for the chroma components. +static void vp9_apply_temporal_filter_chroma( + const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, + int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, + int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, const int *blk_fw, int use_whole_blk, + uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count, + const uint16_t *y_dist, const uint16_t *u_dist, const uint16_t *v_dist) { + const unsigned int uv_width = block_width >> ss_x, + uv_height = block_height >> ss_y; + + unsigned int blk_col = 0, uv_blk_col = 0; + const unsigned int uv_blk_col_step = 8, blk_col_step = 8 << ss_x; + const unsigned int uv_mid_width = uv_width >> 1, + uv_last_width = uv_width - uv_blk_col_step; + int top_weight = blk_fw[0], + bottom_weight = use_whole_blk ? blk_fw[0] : blk_fw[2]; + const int16_t *const *neighbors; + + if (uv_width == 8) { + // Special Case: We are subsampling in x direction on a 16x16 block. Since + // we are operating on a row of 8 chroma pixels, we can't use the usual + // left-middle-right pattern. + assert(ss_x); + + if (ss_y) { + neighbors = CHROMA_DOUBLE_SS_SINGLE_COLUMN_NEIGHBORS; + } else { + neighbors = CHROMA_SINGLE_SS_SINGLE_COLUMN_NEIGHBORS; + } + + if (use_whole_blk) { + vp9_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, + uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, + u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, + y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, + top_weight, bottom_weight, NULL); + } else { + vp9_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, + uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, + u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, + y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, + 0, 0, blk_fw); + } + + return; + } + + // Left + if (ss_x && ss_y) { + neighbors = CHROMA_DOUBLE_SS_LEFT_COLUMN_NEIGHBORS; + } else if (ss_x || ss_y) { + neighbors = CHROMA_SINGLE_SS_LEFT_COLUMN_NEIGHBORS; + } else { + neighbors = CHROMA_NO_SS_LEFT_COLUMN_NEIGHBORS; + } + + vp9_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, + v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, + strength, u_accum + uv_blk_col, u_count + uv_blk_col, + v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, + u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, top_weight, + bottom_weight, NULL); + + blk_col += blk_col_step; + uv_blk_col += uv_blk_col_step; + + // Middle First + if (ss_x && ss_y) { + neighbors = CHROMA_DOUBLE_SS_MIDDLE_COLUMN_NEIGHBORS; + } else if (ss_x || ss_y) { + neighbors = CHROMA_SINGLE_SS_MIDDLE_COLUMN_NEIGHBORS; + } else { + neighbors = CHROMA_NO_SS_MIDDLE_COLUMN_NEIGHBORS; + } + + for (; uv_blk_col < uv_mid_width; + blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { + vp9_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, + uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, + u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, + y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, + top_weight, bottom_weight, NULL); + } + + if (!use_whole_blk) { + top_weight = blk_fw[1]; + bottom_weight = blk_fw[3]; + } + + // Middle Second + for (; uv_blk_col < uv_last_width; + blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { + vp9_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, + uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, + u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, + y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, + top_weight, bottom_weight, NULL); + } + + // Right + if (ss_x && ss_y) { + neighbors = CHROMA_DOUBLE_SS_RIGHT_COLUMN_NEIGHBORS; + } else if (ss_x || ss_y) { + neighbors = CHROMA_SINGLE_SS_RIGHT_COLUMN_NEIGHBORS; + } else { + neighbors = CHROMA_NO_SS_RIGHT_COLUMN_NEIGHBORS; + } + + vp9_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, + v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, + strength, u_accum + uv_blk_col, u_count + uv_blk_col, + v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, + u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, top_weight, + bottom_weight, NULL); +} + +void vp9_apply_temporal_filter_sse4_1( + const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, + int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, + int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, const int *const blk_fw, + int use_whole_blk, uint32_t *y_accum, uint16_t *y_count, uint32_t *u_accum, + uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count) { + const unsigned int chroma_height = block_height >> ss_y, + chroma_width = block_width >> ss_x; + + DECLARE_ALIGNED(16, uint16_t, y_dist[BH * DIST_STRIDE]) = { 0 }; + DECLARE_ALIGNED(16, uint16_t, u_dist[BH * DIST_STRIDE]) = { 0 }; + DECLARE_ALIGNED(16, uint16_t, v_dist[BH * DIST_STRIDE]) = { 0 }; + const int *blk_fw_ptr = blk_fw; + + uint16_t *y_dist_ptr = y_dist + 1, *u_dist_ptr = u_dist + 1, + *v_dist_ptr = v_dist + 1; + const uint8_t *y_src_ptr = y_src, *u_src_ptr = u_src, *v_src_ptr = v_src; + const uint8_t *y_pre_ptr = y_pre, *u_pre_ptr = u_pre, *v_pre_ptr = v_pre; + + // Loop variables + unsigned int row, blk_col; + + assert(block_width <= BW && "block width too large"); + assert(block_height <= BH && "block height too large"); + assert(block_width % 16 == 0 && "block width must be multiple of 16"); + assert(block_height % 2 == 0 && "block height must be even"); + assert((ss_x == 0 || ss_x == 1) && (ss_y == 0 || ss_y == 1) && + "invalid chroma subsampling"); + assert(strength >= 0 && strength <= 6 && "invalid temporal filter strength"); + assert(blk_fw[0] >= 0 && "filter weight must be positive"); + assert( + (use_whole_blk || (blk_fw[1] >= 0 && blk_fw[2] >= 0 && blk_fw[3] >= 0)) && + "subblock filter weight must be positive"); + assert(blk_fw[0] <= 2 && "sublock filter weight must be less than 2"); + assert( + (use_whole_blk || (blk_fw[1] <= 2 && blk_fw[2] <= 2 && blk_fw[3] <= 2)) && + "subblock filter weight must be less than 2"); + + // Precompute the difference sqaured + for (row = 0; row < block_height; row++) { + for (blk_col = 0; blk_col < block_width; blk_col += 16) { + store_dist_16(y_src_ptr + blk_col, y_pre_ptr + blk_col, + y_dist_ptr + blk_col); + } + y_src_ptr += y_src_stride; + y_pre_ptr += y_pre_stride; + y_dist_ptr += DIST_STRIDE; + } + + for (row = 0; row < chroma_height; row++) { + for (blk_col = 0; blk_col < chroma_width; blk_col += 8) { + store_dist_8(u_src_ptr + blk_col, u_pre_ptr + blk_col, + u_dist_ptr + blk_col); + store_dist_8(v_src_ptr + blk_col, v_pre_ptr + blk_col, + v_dist_ptr + blk_col); + } + + u_src_ptr += uv_src_stride; + u_pre_ptr += uv_pre_stride; + u_dist_ptr += DIST_STRIDE; + v_src_ptr += uv_src_stride; + v_pre_ptr += uv_pre_stride; + v_dist_ptr += DIST_STRIDE; + } + + y_dist_ptr = y_dist + 1; + u_dist_ptr = u_dist + 1; + v_dist_ptr = v_dist + 1; + + vp9_apply_temporal_filter_luma( + y_src, y_src_stride, y_pre, y_pre_stride, u_src, v_src, uv_src_stride, + u_pre, v_pre, uv_pre_stride, block_width, block_height, ss_x, ss_y, + strength, blk_fw_ptr, use_whole_blk, y_accum, y_count, y_dist_ptr, + u_dist_ptr, v_dist_ptr); + + vp9_apply_temporal_filter_chroma( + y_src, y_src_stride, y_pre, y_pre_stride, u_src, v_src, uv_src_stride, + u_pre, v_pre, uv_pre_stride, block_width, block_height, ss_x, ss_y, + strength, blk_fw_ptr, use_whole_blk, u_accum, u_count, v_accum, v_count, + y_dist_ptr, u_dist_ptr, v_dist_ptr); } diff --git a/media/libvpx/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c b/media/libvpx/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c index dbd243ac1039..2188903b1731 100644 --- a/media/libvpx/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c +++ b/media/libvpx/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c @@ -14,6 +14,7 @@ #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/txfm_common.h" +#include "vpx_dsp/x86/bitdepth_conversion_sse2.h" #include "vpx_dsp/x86/fwd_txfm_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" @@ -170,452 +171,13 @@ void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride, fadst4_sse2(in); write_buffer_4x4(output, in); break; - case ADST_ADST: + default: + assert(tx_type == ADST_ADST); load_buffer_4x4(input, in, stride); fadst4_sse2(in); fadst4_sse2(in); write_buffer_4x4(output, in); break; - default: assert(0); break; - } -} - -void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride, - int16_t *coeff_ptr, intptr_t n_coeffs, - int skip_block, const int16_t *round_ptr, - const int16_t *quant_ptr, int16_t *qcoeff_ptr, - int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan_ptr, - const int16_t *iscan_ptr) { - __m128i zero; - int pass; - - // Constants - // When we use them, in one case, they are all the same. In all others - // it's a pair of them that we need to repeat four times. This is done - // by constructing the 32 bit constant corresponding to that pair. - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); - const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64); - const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); - const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64); - const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - // Load input - __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride)); - __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride)); - __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride)); - __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride)); - __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride)); - __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride)); - __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride)); - __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride)); - __m128i *in[8]; - int index = 0; - - (void)scan_ptr; - (void)coeff_ptr; - - // Pre-condition input (shift by two) - in0 = _mm_slli_epi16(in0, 2); - in1 = _mm_slli_epi16(in1, 2); - in2 = _mm_slli_epi16(in2, 2); - in3 = _mm_slli_epi16(in3, 2); - in4 = _mm_slli_epi16(in4, 2); - in5 = _mm_slli_epi16(in5, 2); - in6 = _mm_slli_epi16(in6, 2); - in7 = _mm_slli_epi16(in7, 2); - - in[0] = &in0; - in[1] = &in1; - in[2] = &in2; - in[3] = &in3; - in[4] = &in4; - in[5] = &in5; - in[6] = &in6; - in[7] = &in7; - - // We do two passes, first the columns, then the rows. The results of the - // first pass are transposed so that the same column code can be reused. The - // results of the second pass are also transposed so that the rows (processed - // as columns) are put back in row positions. - for (pass = 0; pass < 2; pass++) { - // To store results of each pass before the transpose. - __m128i res0, res1, res2, res3, res4, res5, res6, res7; - // Add/subtract - const __m128i q0 = _mm_add_epi16(in0, in7); - const __m128i q1 = _mm_add_epi16(in1, in6); - const __m128i q2 = _mm_add_epi16(in2, in5); - const __m128i q3 = _mm_add_epi16(in3, in4); - const __m128i q4 = _mm_sub_epi16(in3, in4); - const __m128i q5 = _mm_sub_epi16(in2, in5); - const __m128i q6 = _mm_sub_epi16(in1, in6); - const __m128i q7 = _mm_sub_epi16(in0, in7); - // Work on first four results - { - // Add/subtract - const __m128i r0 = _mm_add_epi16(q0, q3); - const __m128i r1 = _mm_add_epi16(q1, q2); - const __m128i r2 = _mm_sub_epi16(q1, q2); - const __m128i r3 = _mm_sub_epi16(q0, q3); - // Interleave to do the multiply by constants which gets us into 32bits - const __m128i t0 = _mm_unpacklo_epi16(r0, r1); - const __m128i t1 = _mm_unpackhi_epi16(r0, r1); - const __m128i t2 = _mm_unpacklo_epi16(r2, r3); - const __m128i t3 = _mm_unpackhi_epi16(r2, r3); - const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16); - const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16); - const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16); - const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16); - const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08); - const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08); - const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24); - const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24); - // dct_const_round_shift - const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); - const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING); - const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING); - const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING); - const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING); - const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING); - const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING); - const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING); - const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS); - const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS); - const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS); - const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS); - const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS); - const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS); - const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS); - const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS); - // Combine - res0 = _mm_packs_epi32(w0, w1); - res4 = _mm_packs_epi32(w2, w3); - res2 = _mm_packs_epi32(w4, w5); - res6 = _mm_packs_epi32(w6, w7); - } - // Work on next four results - { - // Interleave to do the multiply by constants which gets us into 32bits - const __m128i d0 = _mm_unpacklo_epi16(q6, q5); - const __m128i d1 = _mm_unpackhi_epi16(q6, q5); - const __m128i e0 = _mm_madd_epi16(d0, k__cospi_p16_m16); - const __m128i e1 = _mm_madd_epi16(d1, k__cospi_p16_m16); - const __m128i e2 = _mm_madd_epi16(d0, k__cospi_p16_p16); - const __m128i e3 = _mm_madd_epi16(d1, k__cospi_p16_p16); - // dct_const_round_shift - const __m128i f0 = _mm_add_epi32(e0, k__DCT_CONST_ROUNDING); - const __m128i f1 = _mm_add_epi32(e1, k__DCT_CONST_ROUNDING); - const __m128i f2 = _mm_add_epi32(e2, k__DCT_CONST_ROUNDING); - const __m128i f3 = _mm_add_epi32(e3, k__DCT_CONST_ROUNDING); - const __m128i s0 = _mm_srai_epi32(f0, DCT_CONST_BITS); - const __m128i s1 = _mm_srai_epi32(f1, DCT_CONST_BITS); - const __m128i s2 = _mm_srai_epi32(f2, DCT_CONST_BITS); - const __m128i s3 = _mm_srai_epi32(f3, DCT_CONST_BITS); - // Combine - const __m128i r0 = _mm_packs_epi32(s0, s1); - const __m128i r1 = _mm_packs_epi32(s2, s3); - // Add/subtract - const __m128i x0 = _mm_add_epi16(q4, r0); - const __m128i x1 = _mm_sub_epi16(q4, r0); - const __m128i x2 = _mm_sub_epi16(q7, r1); - const __m128i x3 = _mm_add_epi16(q7, r1); - // Interleave to do the multiply by constants which gets us into 32bits - const __m128i t0 = _mm_unpacklo_epi16(x0, x3); - const __m128i t1 = _mm_unpackhi_epi16(x0, x3); - const __m128i t2 = _mm_unpacklo_epi16(x1, x2); - const __m128i t3 = _mm_unpackhi_epi16(x1, x2); - const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04); - const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04); - const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28); - const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28); - const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20); - const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20); - const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12); - const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12); - // dct_const_round_shift - const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); - const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING); - const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING); - const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING); - const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING); - const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING); - const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING); - const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING); - const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS); - const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS); - const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS); - const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS); - const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS); - const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS); - const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS); - const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS); - // Combine - res1 = _mm_packs_epi32(w0, w1); - res7 = _mm_packs_epi32(w2, w3); - res5 = _mm_packs_epi32(w4, w5); - res3 = _mm_packs_epi32(w6, w7); - } - // Transpose the 8x8. - { - // 00 01 02 03 04 05 06 07 - // 10 11 12 13 14 15 16 17 - // 20 21 22 23 24 25 26 27 - // 30 31 32 33 34 35 36 37 - // 40 41 42 43 44 45 46 47 - // 50 51 52 53 54 55 56 57 - // 60 61 62 63 64 65 66 67 - // 70 71 72 73 74 75 76 77 - const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1); - const __m128i tr0_1 = _mm_unpacklo_epi16(res2, res3); - const __m128i tr0_2 = _mm_unpackhi_epi16(res0, res1); - const __m128i tr0_3 = _mm_unpackhi_epi16(res2, res3); - const __m128i tr0_4 = _mm_unpacklo_epi16(res4, res5); - const __m128i tr0_5 = _mm_unpacklo_epi16(res6, res7); - const __m128i tr0_6 = _mm_unpackhi_epi16(res4, res5); - const __m128i tr0_7 = _mm_unpackhi_epi16(res6, res7); - // 00 10 01 11 02 12 03 13 - // 20 30 21 31 22 32 23 33 - // 04 14 05 15 06 16 07 17 - // 24 34 25 35 26 36 27 37 - // 40 50 41 51 42 52 43 53 - // 60 70 61 71 62 72 63 73 - // 54 54 55 55 56 56 57 57 - // 64 74 65 75 66 76 67 77 - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); - const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3); - const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); - const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); - const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); - const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); - const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); - const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); - // 00 10 20 30 01 11 21 31 - // 40 50 60 70 41 51 61 71 - // 02 12 22 32 03 13 23 33 - // 42 52 62 72 43 53 63 73 - // 04 14 24 34 05 15 21 36 - // 44 54 64 74 45 55 61 76 - // 06 16 26 36 07 17 27 37 - // 46 56 66 76 47 57 67 77 - in0 = _mm_unpacklo_epi64(tr1_0, tr1_4); - in1 = _mm_unpackhi_epi64(tr1_0, tr1_4); - in2 = _mm_unpacklo_epi64(tr1_2, tr1_6); - in3 = _mm_unpackhi_epi64(tr1_2, tr1_6); - in4 = _mm_unpacklo_epi64(tr1_1, tr1_5); - in5 = _mm_unpackhi_epi64(tr1_1, tr1_5); - in6 = _mm_unpacklo_epi64(tr1_3, tr1_7); - in7 = _mm_unpackhi_epi64(tr1_3, tr1_7); - // 00 10 20 30 40 50 60 70 - // 01 11 21 31 41 51 61 71 - // 02 12 22 32 42 52 62 72 - // 03 13 23 33 43 53 63 73 - // 04 14 24 34 44 54 64 74 - // 05 15 25 35 45 55 65 75 - // 06 16 26 36 46 56 66 76 - // 07 17 27 37 47 57 67 77 - } - } - // Post-condition output and store it - { - // Post-condition (division by two) - // division of two 16 bits signed numbers using shifts - // n / 2 = (n - (n >> 15)) >> 1 - const __m128i sign_in0 = _mm_srai_epi16(in0, 15); - const __m128i sign_in1 = _mm_srai_epi16(in1, 15); - const __m128i sign_in2 = _mm_srai_epi16(in2, 15); - const __m128i sign_in3 = _mm_srai_epi16(in3, 15); - const __m128i sign_in4 = _mm_srai_epi16(in4, 15); - const __m128i sign_in5 = _mm_srai_epi16(in5, 15); - const __m128i sign_in6 = _mm_srai_epi16(in6, 15); - const __m128i sign_in7 = _mm_srai_epi16(in7, 15); - in0 = _mm_sub_epi16(in0, sign_in0); - in1 = _mm_sub_epi16(in1, sign_in1); - in2 = _mm_sub_epi16(in2, sign_in2); - in3 = _mm_sub_epi16(in3, sign_in3); - in4 = _mm_sub_epi16(in4, sign_in4); - in5 = _mm_sub_epi16(in5, sign_in5); - in6 = _mm_sub_epi16(in6, sign_in6); - in7 = _mm_sub_epi16(in7, sign_in7); - in0 = _mm_srai_epi16(in0, 1); - in1 = _mm_srai_epi16(in1, 1); - in2 = _mm_srai_epi16(in2, 1); - in3 = _mm_srai_epi16(in3, 1); - in4 = _mm_srai_epi16(in4, 1); - in5 = _mm_srai_epi16(in5, 1); - in6 = _mm_srai_epi16(in6, 1); - in7 = _mm_srai_epi16(in7, 1); - } - - iscan_ptr += n_coeffs; - qcoeff_ptr += n_coeffs; - dqcoeff_ptr += n_coeffs; - n_coeffs = -n_coeffs; - zero = _mm_setzero_si128(); - - if (!skip_block) { - __m128i eob; - __m128i round, quant, dequant; - { - __m128i coeff0, coeff1; - - // Setup global values - { - round = _mm_load_si128((const __m128i *)round_ptr); - quant = _mm_load_si128((const __m128i *)quant_ptr); - dequant = _mm_load_si128((const __m128i *)dequant_ptr); - } - - { - __m128i coeff0_sign, coeff1_sign; - __m128i qcoeff0, qcoeff1; - __m128i qtmp0, qtmp1; - // Do DC and first 15 AC - coeff0 = *in[0]; - coeff1 = *in[1]; - - // Poor man's sign extract - coeff0_sign = _mm_srai_epi16(coeff0, 15); - coeff1_sign = _mm_srai_epi16(coeff1, 15); - qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign); - qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign); - qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); - qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - - qcoeff0 = _mm_adds_epi16(qcoeff0, round); - round = _mm_unpackhi_epi64(round, round); - qcoeff1 = _mm_adds_epi16(qcoeff1, round); - qtmp0 = _mm_mulhi_epi16(qcoeff0, quant); - quant = _mm_unpackhi_epi64(quant, quant); - qtmp1 = _mm_mulhi_epi16(qcoeff1, quant); - - // Reinsert signs - qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign); - qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign); - qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); - qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - - _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0); - _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1); - - coeff0 = _mm_mullo_epi16(qcoeff0, dequant); - dequant = _mm_unpackhi_epi64(dequant, dequant); - coeff1 = _mm_mullo_epi16(qcoeff1, dequant); - - _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0); - _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1); - } - - { - // Scan for eob - __m128i zero_coeff0, zero_coeff1; - __m128i nzero_coeff0, nzero_coeff1; - __m128i iscan0, iscan1; - __m128i eob1; - zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero); - zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); - nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); - nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); - iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs)); - iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1); - // Add one to convert from indices to counts - iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); - iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); - eob = _mm_and_si128(iscan0, nzero_coeff0); - eob1 = _mm_and_si128(iscan1, nzero_coeff1); - eob = _mm_max_epi16(eob, eob1); - } - n_coeffs += 8 * 2; - } - - // AC only loop - index = 2; - while (n_coeffs < 0) { - __m128i coeff0, coeff1; - { - __m128i coeff0_sign, coeff1_sign; - __m128i qcoeff0, qcoeff1; - __m128i qtmp0, qtmp1; - - assert(index < (int)(sizeof(in) / sizeof(in[0])) - 1); - coeff0 = *in[index]; - coeff1 = *in[index + 1]; - - // Poor man's sign extract - coeff0_sign = _mm_srai_epi16(coeff0, 15); - coeff1_sign = _mm_srai_epi16(coeff1, 15); - qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign); - qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign); - qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); - qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - - qcoeff0 = _mm_adds_epi16(qcoeff0, round); - qcoeff1 = _mm_adds_epi16(qcoeff1, round); - qtmp0 = _mm_mulhi_epi16(qcoeff0, quant); - qtmp1 = _mm_mulhi_epi16(qcoeff1, quant); - - // Reinsert signs - qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign); - qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign); - qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); - qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - - _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0); - _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1); - - coeff0 = _mm_mullo_epi16(qcoeff0, dequant); - coeff1 = _mm_mullo_epi16(qcoeff1, dequant); - - _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0); - _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1); - } - - { - // Scan for eob - __m128i zero_coeff0, zero_coeff1; - __m128i nzero_coeff0, nzero_coeff1; - __m128i iscan0, iscan1; - __m128i eob0, eob1; - zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero); - zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); - nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); - nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); - iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs)); - iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1); - // Add one to convert from indices to counts - iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); - iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); - eob0 = _mm_and_si128(iscan0, nzero_coeff0); - eob1 = _mm_and_si128(iscan1, nzero_coeff1); - eob0 = _mm_max_epi16(eob0, eob1); - eob = _mm_max_epi16(eob, eob0); - } - n_coeffs += 8 * 2; - index += 2; - } - - // Accumulate EOB - { - __m128i eob_shuffled; - eob_shuffled = _mm_shuffle_epi32(eob, 0xe); - eob = _mm_max_epi16(eob, eob_shuffled); - eob_shuffled = _mm_shufflelo_epi16(eob, 0xe); - eob = _mm_max_epi16(eob, eob_shuffled); - eob_shuffled = _mm_shufflelo_epi16(eob, 0x1); - eob = _mm_max_epi16(eob, eob_shuffled); - *eob_ptr = _mm_extract_epi16(eob, 1); - } - } else { - do { - _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero); - _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero); - _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero); - _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero); - n_coeffs += 8 * 2; - } while (n_coeffs < 0); - *eob_ptr = 0; } } @@ -1097,14 +659,14 @@ void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, right_shift_8x8(in, 1); write_buffer_8x8(output, in, 8); break; - case ADST_ADST: + default: + assert(tx_type == ADST_ADST); load_buffer_8x8(input, in, stride); fadst8_sse2(in); fadst8_sse2(in); right_shift_8x8(in, 1); write_buffer_8x8(output, in, 8); break; - default: assert(0); break; } } @@ -1963,13 +1525,13 @@ void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, fadst16_sse2(in0, in1); write_buffer_16x16(output, in0, in1, 16); break; - case ADST_ADST: + default: + assert(tx_type == ADST_ADST); load_buffer_16x16(input, in0, in1, stride); fadst16_sse2(in0, in1); right_shift_16x16(in0, in1); fadst16_sse2(in0, in1); write_buffer_16x16(output, in0, in1, 16); break; - default: assert(0); break; } } diff --git a/media/libvpx/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c b/media/libvpx/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c deleted file mode 100644 index bf874a09ec5b..000000000000 --- a/media/libvpx/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c +++ /dev/null @@ -1,465 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include // SSSE3 - -#include "./vp9_rtcd.h" -#include "./vpx_config.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_dsp/x86/bitdepth_conversion_sse2.h" -#include "vpx_dsp/x86/inv_txfm_sse2.h" -#include "vpx_dsp/x86/txfm_common_sse2.h" - -void vp9_fdct8x8_quant_ssse3( - const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, - int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) { - __m128i zero; - int pass; - - // Constants - // When we use them, in one case, they are all the same. In all others - // it's a pair of them that we need to repeat four times. This is done - // by constructing the 32 bit constant corresponding to that pair. - const __m128i k__dual_p16_p16 = dual_set_epi16(23170, 23170); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); - const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64); - const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); - const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64); - const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - // Load input - __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride)); - __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride)); - __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride)); - __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride)); - __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride)); - __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride)); - __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride)); - __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride)); - __m128i *in[8]; - int index = 0; - - (void)scan_ptr; - (void)coeff_ptr; - - // Pre-condition input (shift by two) - in0 = _mm_slli_epi16(in0, 2); - in1 = _mm_slli_epi16(in1, 2); - in2 = _mm_slli_epi16(in2, 2); - in3 = _mm_slli_epi16(in3, 2); - in4 = _mm_slli_epi16(in4, 2); - in5 = _mm_slli_epi16(in5, 2); - in6 = _mm_slli_epi16(in6, 2); - in7 = _mm_slli_epi16(in7, 2); - - in[0] = &in0; - in[1] = &in1; - in[2] = &in2; - in[3] = &in3; - in[4] = &in4; - in[5] = &in5; - in[6] = &in6; - in[7] = &in7; - - // We do two passes, first the columns, then the rows. The results of the - // first pass are transposed so that the same column code can be reused. The - // results of the second pass are also transposed so that the rows (processed - // as columns) are put back in row positions. - for (pass = 0; pass < 2; pass++) { - // To store results of each pass before the transpose. - __m128i res0, res1, res2, res3, res4, res5, res6, res7; - // Add/subtract - const __m128i q0 = _mm_add_epi16(in0, in7); - const __m128i q1 = _mm_add_epi16(in1, in6); - const __m128i q2 = _mm_add_epi16(in2, in5); - const __m128i q3 = _mm_add_epi16(in3, in4); - const __m128i q4 = _mm_sub_epi16(in3, in4); - const __m128i q5 = _mm_sub_epi16(in2, in5); - const __m128i q6 = _mm_sub_epi16(in1, in6); - const __m128i q7 = _mm_sub_epi16(in0, in7); - // Work on first four results - { - // Add/subtract - const __m128i r0 = _mm_add_epi16(q0, q3); - const __m128i r1 = _mm_add_epi16(q1, q2); - const __m128i r2 = _mm_sub_epi16(q1, q2); - const __m128i r3 = _mm_sub_epi16(q0, q3); - // Interleave to do the multiply by constants which gets us into 32bits - const __m128i t0 = _mm_unpacklo_epi16(r0, r1); - const __m128i t1 = _mm_unpackhi_epi16(r0, r1); - const __m128i t2 = _mm_unpacklo_epi16(r2, r3); - const __m128i t3 = _mm_unpackhi_epi16(r2, r3); - - const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16); - const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16); - const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16); - const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16); - - const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08); - const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08); - const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24); - const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24); - // dct_const_round_shift - - const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); - const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING); - const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING); - const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING); - - const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING); - const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING); - const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING); - const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING); - - const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS); - const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS); - const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS); - const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS); - - const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS); - const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS); - const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS); - const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS); - // Combine - - res0 = _mm_packs_epi32(w0, w1); - res4 = _mm_packs_epi32(w2, w3); - res2 = _mm_packs_epi32(w4, w5); - res6 = _mm_packs_epi32(w6, w7); - } - // Work on next four results - { - // Interleave to do the multiply by constants which gets us into 32bits - const __m128i d0 = _mm_sub_epi16(q6, q5); - const __m128i d1 = _mm_add_epi16(q6, q5); - const __m128i r0 = _mm_mulhrs_epi16(d0, k__dual_p16_p16); - const __m128i r1 = _mm_mulhrs_epi16(d1, k__dual_p16_p16); - - // Add/subtract - const __m128i x0 = _mm_add_epi16(q4, r0); - const __m128i x1 = _mm_sub_epi16(q4, r0); - const __m128i x2 = _mm_sub_epi16(q7, r1); - const __m128i x3 = _mm_add_epi16(q7, r1); - // Interleave to do the multiply by constants which gets us into 32bits - const __m128i t0 = _mm_unpacklo_epi16(x0, x3); - const __m128i t1 = _mm_unpackhi_epi16(x0, x3); - const __m128i t2 = _mm_unpacklo_epi16(x1, x2); - const __m128i t3 = _mm_unpackhi_epi16(x1, x2); - const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04); - const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04); - const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28); - const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28); - const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20); - const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20); - const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12); - const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12); - // dct_const_round_shift - const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); - const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING); - const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING); - const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING); - const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING); - const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING); - const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING); - const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING); - const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS); - const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS); - const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS); - const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS); - const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS); - const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS); - const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS); - const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS); - // Combine - res1 = _mm_packs_epi32(w0, w1); - res7 = _mm_packs_epi32(w2, w3); - res5 = _mm_packs_epi32(w4, w5); - res3 = _mm_packs_epi32(w6, w7); - } - // Transpose the 8x8. - { - // 00 01 02 03 04 05 06 07 - // 10 11 12 13 14 15 16 17 - // 20 21 22 23 24 25 26 27 - // 30 31 32 33 34 35 36 37 - // 40 41 42 43 44 45 46 47 - // 50 51 52 53 54 55 56 57 - // 60 61 62 63 64 65 66 67 - // 70 71 72 73 74 75 76 77 - const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1); - const __m128i tr0_1 = _mm_unpacklo_epi16(res2, res3); - const __m128i tr0_2 = _mm_unpackhi_epi16(res0, res1); - const __m128i tr0_3 = _mm_unpackhi_epi16(res2, res3); - const __m128i tr0_4 = _mm_unpacklo_epi16(res4, res5); - const __m128i tr0_5 = _mm_unpacklo_epi16(res6, res7); - const __m128i tr0_6 = _mm_unpackhi_epi16(res4, res5); - const __m128i tr0_7 = _mm_unpackhi_epi16(res6, res7); - // 00 10 01 11 02 12 03 13 - // 20 30 21 31 22 32 23 33 - // 04 14 05 15 06 16 07 17 - // 24 34 25 35 26 36 27 37 - // 40 50 41 51 42 52 43 53 - // 60 70 61 71 62 72 63 73 - // 54 54 55 55 56 56 57 57 - // 64 74 65 75 66 76 67 77 - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); - const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3); - const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); - const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); - const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); - const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); - const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); - const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); - // 00 10 20 30 01 11 21 31 - // 40 50 60 70 41 51 61 71 - // 02 12 22 32 03 13 23 33 - // 42 52 62 72 43 53 63 73 - // 04 14 24 34 05 15 21 36 - // 44 54 64 74 45 55 61 76 - // 06 16 26 36 07 17 27 37 - // 46 56 66 76 47 57 67 77 - in0 = _mm_unpacklo_epi64(tr1_0, tr1_4); - in1 = _mm_unpackhi_epi64(tr1_0, tr1_4); - in2 = _mm_unpacklo_epi64(tr1_2, tr1_6); - in3 = _mm_unpackhi_epi64(tr1_2, tr1_6); - in4 = _mm_unpacklo_epi64(tr1_1, tr1_5); - in5 = _mm_unpackhi_epi64(tr1_1, tr1_5); - in6 = _mm_unpacklo_epi64(tr1_3, tr1_7); - in7 = _mm_unpackhi_epi64(tr1_3, tr1_7); - // 00 10 20 30 40 50 60 70 - // 01 11 21 31 41 51 61 71 - // 02 12 22 32 42 52 62 72 - // 03 13 23 33 43 53 63 73 - // 04 14 24 34 44 54 64 74 - // 05 15 25 35 45 55 65 75 - // 06 16 26 36 46 56 66 76 - // 07 17 27 37 47 57 67 77 - } - } - // Post-condition output and store it - { - // Post-condition (division by two) - // division of two 16 bits signed numbers using shifts - // n / 2 = (n - (n >> 15)) >> 1 - const __m128i sign_in0 = _mm_srai_epi16(in0, 15); - const __m128i sign_in1 = _mm_srai_epi16(in1, 15); - const __m128i sign_in2 = _mm_srai_epi16(in2, 15); - const __m128i sign_in3 = _mm_srai_epi16(in3, 15); - const __m128i sign_in4 = _mm_srai_epi16(in4, 15); - const __m128i sign_in5 = _mm_srai_epi16(in5, 15); - const __m128i sign_in6 = _mm_srai_epi16(in6, 15); - const __m128i sign_in7 = _mm_srai_epi16(in7, 15); - in0 = _mm_sub_epi16(in0, sign_in0); - in1 = _mm_sub_epi16(in1, sign_in1); - in2 = _mm_sub_epi16(in2, sign_in2); - in3 = _mm_sub_epi16(in3, sign_in3); - in4 = _mm_sub_epi16(in4, sign_in4); - in5 = _mm_sub_epi16(in5, sign_in5); - in6 = _mm_sub_epi16(in6, sign_in6); - in7 = _mm_sub_epi16(in7, sign_in7); - in0 = _mm_srai_epi16(in0, 1); - in1 = _mm_srai_epi16(in1, 1); - in2 = _mm_srai_epi16(in2, 1); - in3 = _mm_srai_epi16(in3, 1); - in4 = _mm_srai_epi16(in4, 1); - in5 = _mm_srai_epi16(in5, 1); - in6 = _mm_srai_epi16(in6, 1); - in7 = _mm_srai_epi16(in7, 1); - } - - iscan_ptr += n_coeffs; - qcoeff_ptr += n_coeffs; - dqcoeff_ptr += n_coeffs; - n_coeffs = -n_coeffs; - zero = _mm_setzero_si128(); - - if (!skip_block) { - __m128i eob; - __m128i round, quant, dequant, thr; - int16_t nzflag; - { - __m128i coeff0, coeff1; - - // Setup global values - { - round = _mm_load_si128((const __m128i *)round_ptr); - quant = _mm_load_si128((const __m128i *)quant_ptr); - dequant = _mm_load_si128((const __m128i *)dequant_ptr); - } - - { - __m128i coeff0_sign, coeff1_sign; - __m128i qcoeff0, qcoeff1; - __m128i qtmp0, qtmp1; - // Do DC and first 15 AC - coeff0 = *in[0]; - coeff1 = *in[1]; - - // Poor man's sign extract - coeff0_sign = _mm_srai_epi16(coeff0, 15); - coeff1_sign = _mm_srai_epi16(coeff1, 15); - qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign); - qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign); - qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); - qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - - qcoeff0 = _mm_adds_epi16(qcoeff0, round); - round = _mm_unpackhi_epi64(round, round); - qcoeff1 = _mm_adds_epi16(qcoeff1, round); - qtmp0 = _mm_mulhi_epi16(qcoeff0, quant); - quant = _mm_unpackhi_epi64(quant, quant); - qtmp1 = _mm_mulhi_epi16(qcoeff1, quant); - - // Reinsert signs - qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign); - qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign); - qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); - qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - - store_tran_low(qcoeff0, qcoeff_ptr + n_coeffs); - store_tran_low(qcoeff1, qcoeff_ptr + n_coeffs + 8); - - coeff0 = _mm_mullo_epi16(qcoeff0, dequant); - dequant = _mm_unpackhi_epi64(dequant, dequant); - coeff1 = _mm_mullo_epi16(qcoeff1, dequant); - - store_tran_low(coeff0, dqcoeff_ptr + n_coeffs); - store_tran_low(coeff1, dqcoeff_ptr + n_coeffs + 8); - } - - { - // Scan for eob - __m128i zero_coeff0, zero_coeff1; - __m128i nzero_coeff0, nzero_coeff1; - __m128i iscan0, iscan1; - __m128i eob1; - zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero); - zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); - nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); - nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); - iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs)); - iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1); - // Add one to convert from indices to counts - iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); - iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); - eob = _mm_and_si128(iscan0, nzero_coeff0); - eob1 = _mm_and_si128(iscan1, nzero_coeff1); - eob = _mm_max_epi16(eob, eob1); - } - n_coeffs += 8 * 2; - } - - // AC only loop - index = 2; - thr = _mm_srai_epi16(dequant, 1); - while (n_coeffs < 0) { - __m128i coeff0, coeff1; - { - __m128i coeff0_sign, coeff1_sign; - __m128i qcoeff0, qcoeff1; - __m128i qtmp0, qtmp1; - - assert(index < (int)(sizeof(in) / sizeof(in[0])) - 1); - coeff0 = *in[index]; - coeff1 = *in[index + 1]; - - // Poor man's sign extract - coeff0_sign = _mm_srai_epi16(coeff0, 15); - coeff1_sign = _mm_srai_epi16(coeff1, 15); - qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign); - qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign); - qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); - qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - - nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) | - _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr)); - - if (nzflag) { - qcoeff0 = _mm_adds_epi16(qcoeff0, round); - qcoeff1 = _mm_adds_epi16(qcoeff1, round); - qtmp0 = _mm_mulhi_epi16(qcoeff0, quant); - qtmp1 = _mm_mulhi_epi16(qcoeff1, quant); - - // Reinsert signs - qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign); - qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign); - qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); - qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - - store_tran_low(qcoeff0, qcoeff_ptr + n_coeffs); - store_tran_low(qcoeff1, qcoeff_ptr + n_coeffs + 8); - - coeff0 = _mm_mullo_epi16(qcoeff0, dequant); - coeff1 = _mm_mullo_epi16(qcoeff1, dequant); - - store_tran_low(coeff0, dqcoeff_ptr + n_coeffs); - store_tran_low(coeff1, dqcoeff_ptr + n_coeffs + 8); - } else { - // Maybe a more efficient way to store 0? - store_zero_tran_low(qcoeff_ptr + n_coeffs); - store_zero_tran_low(qcoeff_ptr + n_coeffs + 8); - - store_zero_tran_low(dqcoeff_ptr + n_coeffs); - store_zero_tran_low(dqcoeff_ptr + n_coeffs + 8); - } - } - - if (nzflag) { - // Scan for eob - __m128i zero_coeff0, zero_coeff1; - __m128i nzero_coeff0, nzero_coeff1; - __m128i iscan0, iscan1; - __m128i eob0, eob1; - zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero); - zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); - nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); - nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); - iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs)); - iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1); - // Add one to convert from indices to counts - iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); - iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); - eob0 = _mm_and_si128(iscan0, nzero_coeff0); - eob1 = _mm_and_si128(iscan1, nzero_coeff1); - eob0 = _mm_max_epi16(eob0, eob1); - eob = _mm_max_epi16(eob, eob0); - } - n_coeffs += 8 * 2; - index += 2; - } - - // Accumulate EOB - { - __m128i eob_shuffled; - eob_shuffled = _mm_shuffle_epi32(eob, 0xe); - eob = _mm_max_epi16(eob, eob_shuffled); - eob_shuffled = _mm_shufflelo_epi16(eob, 0xe); - eob = _mm_max_epi16(eob, eob_shuffled); - eob_shuffled = _mm_shufflelo_epi16(eob, 0x1); - eob = _mm_max_epi16(eob, eob_shuffled); - *eob_ptr = _mm_extract_epi16(eob, 1); - } - } else { - do { - store_zero_tran_low(dqcoeff_ptr + n_coeffs); - store_zero_tran_low(dqcoeff_ptr + n_coeffs + 8); - store_zero_tran_low(qcoeff_ptr + n_coeffs); - store_zero_tran_low(qcoeff_ptr + n_coeffs + 8); - n_coeffs += 8 * 2; - } while (n_coeffs < 0); - *eob_ptr = 0; - } -} diff --git a/media/libvpx/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/media/libvpx/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c index 2f3c66c0835c..4be6a5ea02e1 100644 --- a/media/libvpx/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c +++ b/media/libvpx/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c @@ -114,7 +114,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, // Work out the start point for the search const uint8_t *best_address = in_what; const uint8_t *new_best_address = best_address; -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 __m128i v_ba_q = _mm_set1_epi64x((intptr_t)best_address); #else __m128i v_ba_d = _mm_set1_epi32((intptr_t)best_address); @@ -138,7 +138,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, for (i = 0, step = 0; step < tot_steps; step++) { for (j = 0; j < cfg->searches_per_step; j += 4, i += 4) { __m128i v_sad_d, v_cost_d, v_outside_d, v_inside_d, v_diff_mv_w; -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 __m128i v_blocka[2]; #else __m128i v_blocka[1]; @@ -160,7 +160,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, } // The inverse mask indicates which of the MVs are outside - v_outside_d = _mm_xor_si128(v_inside_d, _mm_set1_epi8(0xff)); + v_outside_d = _mm_xor_si128(v_inside_d, _mm_set1_epi8((int8_t)0xff)); // Shift right to keep the sign bit clear, we will use this later // to set the cost to the maximum value. v_outside_d = _mm_srli_epi32(v_outside_d, 1); @@ -175,7 +175,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, // Compute the SIMD pointer offsets. { -#if ARCH_X86_64 // sizeof(intptr_t) == 8 +#if VPX_ARCH_X86_64 // sizeof(intptr_t) == 8 // Load the offsets __m128i v_bo10_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 0]); __m128i v_bo32_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 2]); @@ -186,7 +186,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, // Compute the candidate addresses v_blocka[0] = _mm_add_epi64(v_ba_q, v_bo10_q); v_blocka[1] = _mm_add_epi64(v_ba_q, v_bo32_q); -#else // ARCH_X86 // sizeof(intptr_t) == 4 +#else // VPX_ARCH_X86 // sizeof(intptr_t) == 4 __m128i v_bo_d = _mm_loadu_si128((const __m128i *)&ss_os[i]); v_bo_d = _mm_and_si128(v_bo_d, v_inside_d); v_blocka[0] = _mm_add_epi32(v_ba_d, v_bo_d); @@ -294,7 +294,7 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, best_address = new_best_address; v_bmv_w = _mm_set1_epi32(bmv.as_int); -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 v_ba_q = _mm_set1_epi64x((intptr_t)best_address); #else v_ba_d = _mm_set1_epi32((intptr_t)best_address); diff --git a/media/libvpx/libvpx/vp9/encoder/x86/vp9_error_sse2.asm b/media/libvpx/libvpx/vp9/encoder/x86/vp9_error_sse2.asm index 11d473b2dfa0..7beec130abe2 100644 --- a/media/libvpx/libvpx/vp9/encoder/x86/vp9_error_sse2.asm +++ b/media/libvpx/libvpx/vp9/encoder/x86/vp9_error_sse2.asm @@ -58,7 +58,7 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz movhlps m7, m6 paddq m4, m5 paddq m6, m7 -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 movq rax, m4 movq [sszq], m6 %else @@ -105,7 +105,7 @@ cglobal block_error_fp, 3, 3, 6, uqc, dqc, size ; accumulate horizontally and store in return value movhlps m5, m4 paddq m4, m5 -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 movq rax, m4 %else pshufd m5, m4, 0x1 diff --git a/media/libvpx/libvpx/vp9/encoder/x86/vp9_highbd_block_error_intrin_sse2.c b/media/libvpx/libvpx/vp9/encoder/x86/vp9_highbd_block_error_intrin_sse2.c index 91f627c343ae..d7aafe7b011e 100644 --- a/media/libvpx/libvpx/vp9/encoder/x86/vp9_highbd_block_error_intrin_sse2.c +++ b/media/libvpx/libvpx/vp9/encoder/x86/vp9_highbd_block_error_intrin_sse2.c @@ -11,27 +11,28 @@ #include #include +#include "./vp9_rtcd.h" #include "vp9/common/vp9_common.h" -int64_t vp9_highbd_block_error_sse2(tran_low_t *coeff, tran_low_t *dqcoeff, - intptr_t block_size, int64_t *ssz, - int bps) { +int64_t vp9_highbd_block_error_sse2(const tran_low_t *coeff, + const tran_low_t *dqcoeff, + intptr_t block_size, int64_t *ssz, int bd) { int i, j, test; uint32_t temp[4]; __m128i max, min, cmp0, cmp1, cmp2, cmp3; int64_t error = 0, sqcoeff = 0; - const int shift = 2 * (bps - 8); + const int shift = 2 * (bd - 8); const int rounding = shift > 0 ? 1 << (shift - 1) : 0; for (i = 0; i < block_size; i += 8) { // Load the data into xmm registers - __m128i mm_coeff = _mm_load_si128((__m128i *)(coeff + i)); - __m128i mm_coeff2 = _mm_load_si128((__m128i *)(coeff + i + 4)); - __m128i mm_dqcoeff = _mm_load_si128((__m128i *)(dqcoeff + i)); - __m128i mm_dqcoeff2 = _mm_load_si128((__m128i *)(dqcoeff + i + 4)); + __m128i mm_coeff = _mm_load_si128((const __m128i *)(coeff + i)); + __m128i mm_coeff2 = _mm_load_si128((const __m128i *)(coeff + i + 4)); + __m128i mm_dqcoeff = _mm_load_si128((const __m128i *)(dqcoeff + i)); + __m128i mm_dqcoeff2 = _mm_load_si128((const __m128i *)(dqcoeff + i + 4)); // Check if any values require more than 15 bit max = _mm_set1_epi32(0x3fff); - min = _mm_set1_epi32(0xffffc000); + min = _mm_set1_epi32((int32_t)0xffffc000); cmp0 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff, max), _mm_cmplt_epi32(mm_coeff, min)); cmp1 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff2, max), diff --git a/media/libvpx/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c b/media/libvpx/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c new file mode 100644 index 000000000000..8dfdbd50f6c2 --- /dev/null +++ b/media/libvpx/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2017 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include // AVX2 + +#include "./vp9_rtcd.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_dsp/x86/bitdepth_conversion_avx2.h" +#include "vpx_dsp/x86/quantize_sse2.h" + +// Zero fill 8 positions in the output buffer. +static INLINE void store_zero_tran_low(tran_low_t *a) { + const __m256i zero = _mm256_setzero_si256(); +#if CONFIG_VP9_HIGHBITDEPTH + _mm256_storeu_si256((__m256i *)(a), zero); + _mm256_storeu_si256((__m256i *)(a + 8), zero); +#else + _mm256_storeu_si256((__m256i *)(a), zero); +#endif +} + +static INLINE __m256i scan_eob_256(const __m256i *iscan_ptr, + __m256i *coeff256) { + const __m256i iscan = _mm256_loadu_si256(iscan_ptr); + const __m256i zero256 = _mm256_setzero_si256(); +#if CONFIG_VP9_HIGHBITDEPTH + // The _mm256_packs_epi32() in load_tran_low() packs the 64 bit coeff as + // B1 A1 B0 A0. Shuffle to B1 B0 A1 A0 in order to scan eob correctly. + const __m256i _coeff256 = _mm256_permute4x64_epi64(*coeff256, 0xd8); + const __m256i zero_coeff0 = _mm256_cmpeq_epi16(_coeff256, zero256); +#else + const __m256i zero_coeff0 = _mm256_cmpeq_epi16(*coeff256, zero256); +#endif + const __m256i nzero_coeff0 = _mm256_cmpeq_epi16(zero_coeff0, zero256); + // Add one to convert from indices to counts + const __m256i iscan_plus_one = _mm256_sub_epi16(iscan, nzero_coeff0); + return _mm256_and_si256(iscan_plus_one, nzero_coeff0); +} + +void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { + __m128i eob; + __m256i round256, quant256, dequant256; + __m256i eob256, thr256; + + (void)scan; + (void)skip_block; + assert(!skip_block); + + coeff_ptr += n_coeffs; + iscan += n_coeffs; + qcoeff_ptr += n_coeffs; + dqcoeff_ptr += n_coeffs; + n_coeffs = -n_coeffs; + + { + __m256i coeff256; + + // Setup global values + { + const __m128i round = _mm_load_si128((const __m128i *)round_ptr); + const __m128i quant = _mm_load_si128((const __m128i *)quant_ptr); + const __m128i dequant = _mm_load_si128((const __m128i *)dequant_ptr); + round256 = _mm256_castsi128_si256(round); + round256 = _mm256_permute4x64_epi64(round256, 0x54); + + quant256 = _mm256_castsi128_si256(quant); + quant256 = _mm256_permute4x64_epi64(quant256, 0x54); + + dequant256 = _mm256_castsi128_si256(dequant); + dequant256 = _mm256_permute4x64_epi64(dequant256, 0x54); + } + + { + __m256i qcoeff256; + __m256i qtmp256; + coeff256 = load_tran_low(coeff_ptr + n_coeffs); + qcoeff256 = _mm256_abs_epi16(coeff256); + qcoeff256 = _mm256_adds_epi16(qcoeff256, round256); + qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256); + qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256); + store_tran_low(qcoeff256, qcoeff_ptr + n_coeffs); + coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256); + store_tran_low(coeff256, dqcoeff_ptr + n_coeffs); + } + + eob256 = scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256); + n_coeffs += 8 * 2; + } + + // remove dc constants + dequant256 = _mm256_permute2x128_si256(dequant256, dequant256, 0x31); + quant256 = _mm256_permute2x128_si256(quant256, quant256, 0x31); + round256 = _mm256_permute2x128_si256(round256, round256, 0x31); + + thr256 = _mm256_srai_epi16(dequant256, 1); + + // AC only loop + while (n_coeffs < 0) { + __m256i coeff256 = load_tran_low(coeff_ptr + n_coeffs); + __m256i qcoeff256 = _mm256_abs_epi16(coeff256); + int32_t nzflag = + _mm256_movemask_epi8(_mm256_cmpgt_epi16(qcoeff256, thr256)); + + if (nzflag) { + __m256i qtmp256; + qcoeff256 = _mm256_adds_epi16(qcoeff256, round256); + qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256); + qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256); + store_tran_low(qcoeff256, qcoeff_ptr + n_coeffs); + coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256); + store_tran_low(coeff256, dqcoeff_ptr + n_coeffs); + eob256 = _mm256_max_epi16( + eob256, scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256)); + } else { + store_zero_tran_low(qcoeff_ptr + n_coeffs); + store_zero_tran_low(dqcoeff_ptr + n_coeffs); + } + n_coeffs += 8 * 2; + } + + eob = _mm_max_epi16(_mm256_castsi256_si128(eob256), + _mm256_extracti128_si256(eob256, 1)); + + *eob_ptr = accumulate_eob(eob); +} diff --git a/media/libvpx/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c b/media/libvpx/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c index ca0ad4407e56..e3d803b8f076 100644 --- a/media/libvpx/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c +++ b/media/libvpx/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c @@ -21,20 +21,20 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan_ptr, - const int16_t *iscan_ptr) { + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { __m128i zero; __m128i thr; - int16_t nzflag; + int nzflag; __m128i eob; __m128i round, quant, dequant; - (void)scan_ptr; + (void)scan; (void)skip_block; assert(!skip_block); coeff_ptr += n_coeffs; - iscan_ptr += n_coeffs; + iscan += n_coeffs; qcoeff_ptr += n_coeffs; dqcoeff_ptr += n_coeffs; n_coeffs = -n_coeffs; @@ -100,8 +100,8 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); - iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs)); - iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1); + iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs)); + iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1); // Add one to convert from indices to counts iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); @@ -175,8 +175,8 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); - iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs)); - iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1); + iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs)); + iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1); // Add one to convert from indices to counts iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); diff --git a/media/libvpx/libvpx/vp9/simple_encode.cc b/media/libvpx/libvpx/vp9/simple_encode.cc new file mode 100644 index 000000000000..6a35eb6bcfa0 --- /dev/null +++ b/media/libvpx/libvpx/vp9/simple_encode.cc @@ -0,0 +1,313 @@ +#include +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/vp9_iface_common.h" +#include "vp9/encoder/vp9_encoder.h" +#include "vp9/encoder/vp9_firstpass.h" +#include "vp9/simple_encode.h" +#include "vp9/vp9_cx_iface.h" + +namespace vp9 { + +// TODO(angiebird): Merge this function with vpx_img_plane_width() +static int img_plane_width(const vpx_image_t *img, int plane) { + if (plane > 0 && img->x_chroma_shift > 0) + return (img->d_w + 1) >> img->x_chroma_shift; + else + return img->d_w; +} + +// TODO(angiebird): Merge this function with vpx_img_plane_height() +static int img_plane_height(const vpx_image_t *img, int plane) { + if (plane > 0 && img->y_chroma_shift > 0) + return (img->d_h + 1) >> img->y_chroma_shift; + else + return img->d_h; +} + +// TODO(angiebird): Merge this function with vpx_img_read() +static int img_read(vpx_image_t *img, FILE *file) { + int plane; + + for (plane = 0; plane < 3; ++plane) { + unsigned char *buf = img->planes[plane]; + const int stride = img->stride[plane]; + const int w = img_plane_width(img, plane) * + ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1); + const int h = img_plane_height(img, plane); + int y; + + for (y = 0; y < h; ++y) { + if (fread(buf, 1, w, file) != (size_t)w) return 0; + buf += stride; + } + } + + return 1; +} + +class SimpleEncode::EncodeImpl { + public: + VP9_COMP *cpi; + vpx_img_fmt_t img_fmt; + vpx_image_t tmp_img; + std::vector first_pass_stats; +}; + +static VP9_COMP *init_encoder(const VP9EncoderConfig *oxcf, + vpx_img_fmt_t img_fmt) { + VP9_COMP *cpi; + BufferPool *buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(*buffer_pool)); + vp9_initialize_enc(); + cpi = vp9_create_compressor(oxcf, buffer_pool); + vp9_update_compressor_with_img_fmt(cpi, img_fmt); + return cpi; +} + +static void free_encoder(VP9_COMP *cpi) { + BufferPool *buffer_pool = cpi->common.buffer_pool; + vp9_remove_compressor(cpi); + // buffer_pool needs to be free after cpi because buffer_pool contains + // allocated buffers that will be free in vp9_remove_compressor() + vpx_free(buffer_pool); +} + +static INLINE vpx_rational_t make_vpx_rational(int num, int den) { + vpx_rational_t v; + v.num = num; + v.den = den; + return v; +} + +static INLINE FrameType +get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type) { + // TODO(angiebird): Figure out if we need frame type other than key frame, + // alternate reference and inter frame + switch (update_type) { + case KF_UPDATE: return kKeyFrame; break; + case ARF_UPDATE: return kAlternateReference; break; + default: return kInterFrame; break; + } +} + +static void update_encode_frame_result( + EncodeFrameResult *encode_frame_result, + const ENCODE_FRAME_RESULT *encode_frame_info) { + encode_frame_result->coding_data_bit_size = + encode_frame_result->coding_data_byte_size * 8; + encode_frame_result->show_idx = encode_frame_info->show_idx; + encode_frame_result->frame_type = + get_frame_type_from_update_type(encode_frame_info->update_type); + encode_frame_result->psnr = encode_frame_info->psnr; + encode_frame_result->sse = encode_frame_info->sse; + encode_frame_result->quantize_index = encode_frame_info->quantize_index; +} + +SimpleEncode::SimpleEncode(int frame_width, int frame_height, + int frame_rate_num, int frame_rate_den, + int target_bitrate, int num_frames, + const char *infile_path) { + impl_ptr_ = std::unique_ptr(new EncodeImpl()); + frame_width_ = frame_width; + frame_height_ = frame_height; + frame_rate_num_ = frame_rate_num; + frame_rate_den_ = frame_rate_den; + target_bitrate_ = target_bitrate; + num_frames_ = num_frames; + // TODO(angirbid): Should we keep a file pointer here or keep the file_path? + file_ = fopen(infile_path, "r"); + impl_ptr_->cpi = NULL; + impl_ptr_->img_fmt = VPX_IMG_FMT_I420; +} + +void SimpleEncode::ComputeFirstPassStats() { + vpx_rational_t frame_rate = + make_vpx_rational(frame_rate_num_, frame_rate_den_); + const VP9EncoderConfig oxcf = + vp9_get_encoder_config(frame_width_, frame_height_, frame_rate, + target_bitrate_, VPX_RC_FIRST_PASS); + VP9_COMP *cpi = init_encoder(&oxcf, impl_ptr_->img_fmt); + struct lookahead_ctx *lookahead = cpi->lookahead; + int i; + int use_highbitdepth = 0; +#if CONFIG_VP9_HIGHBITDEPTH + use_highbitdepth = cpi->common.use_highbitdepth; +#endif + vpx_image_t img; + vpx_img_alloc(&img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1); + rewind(file_); + impl_ptr_->first_pass_stats.clear(); + for (i = 0; i < num_frames_; ++i) { + assert(!vp9_lookahead_full(lookahead)); + if (img_read(&img, file_)) { + int next_show_idx = vp9_lookahead_next_show_idx(lookahead); + int64_t ts_start = + timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx); + int64_t ts_end = + timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx + 1); + YV12_BUFFER_CONFIG sd; + image2yuvconfig(&img, &sd); + vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0); + { + int64_t time_stamp; + int64_t time_end; + int flush = 1; // Makes vp9_get_compressed_data process a frame + size_t size; + unsigned int frame_flags = 0; + ENCODE_FRAME_RESULT encode_frame_info; + // TODO(angiebird): Call vp9_first_pass directly + vp9_get_compressed_data(cpi, &frame_flags, &size, NULL, &time_stamp, + &time_end, flush, &encode_frame_info); + // vp9_get_compressed_data only generates first pass stats not + // compresses data + assert(size == 0); + } + impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass)); + } + } + vp9_end_first_pass(cpi); + // TODO(angiebird): Store the total_stats apart form first_pass_stats + impl_ptr_->first_pass_stats.push_back(vp9_get_total_stats(&cpi->twopass)); + free_encoder(cpi); + rewind(file_); + vpx_img_free(&img); +} + +std::vector> SimpleEncode::ObserveFirstPassStats() { + std::vector> output_stats; + // TODO(angiebird): This function make several assumptions of + // FIRSTPASS_STATS. 1) All elements in FIRSTPASS_STATS are double except the + // last one. 2) The last entry of first_pass_stats is the total_stats. + // Change the code structure, so that we don't have to make these assumptions + + // Note the last entry of first_pass_stats is the total_stats, we don't need + // it. + for (size_t i = 0; i < impl_ptr_->first_pass_stats.size() - 1; ++i) { + double *buf_start = + reinterpret_cast(&impl_ptr_->first_pass_stats[i]); + // We use - 1 here because the last member in FIRSTPASS_STATS is not double + double *buf_end = + buf_start + sizeof(impl_ptr_->first_pass_stats[i]) / sizeof(*buf_end) - + 1; + std::vector this_stats(buf_start, buf_end); + output_stats.push_back(this_stats); + } + return output_stats; +} + +void SimpleEncode::StartEncode() { + assert(impl_ptr_->first_pass_stats.size() > 0); + vpx_rational_t frame_rate = + make_vpx_rational(frame_rate_num_, frame_rate_den_); + VP9EncoderConfig oxcf = + vp9_get_encoder_config(frame_width_, frame_height_, frame_rate, + target_bitrate_, VPX_RC_LAST_PASS); + vpx_fixed_buf_t stats; + stats.buf = impl_ptr_->first_pass_stats.data(); + stats.sz = sizeof(impl_ptr_->first_pass_stats[0]) * + impl_ptr_->first_pass_stats.size(); + + vp9_set_first_pass_stats(&oxcf, &stats); + assert(impl_ptr_->cpi == NULL); + impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt); + vpx_img_alloc(&impl_ptr_->tmp_img, impl_ptr_->img_fmt, frame_width_, + frame_height_, 1); + rewind(file_); +} + +void SimpleEncode::EndEncode() { + free_encoder(impl_ptr_->cpi); + impl_ptr_->cpi = nullptr; + vpx_img_free(&impl_ptr_->tmp_img); + rewind(file_); +} + +int SimpleEncode::GetKeyFrameGroupSize(int key_frame_index) const { + const VP9_COMP *cpi = impl_ptr_->cpi; + return vp9_get_frames_to_next_key(&cpi->oxcf, &cpi->frame_info, + &cpi->twopass.first_pass_info, + key_frame_index, cpi->rc.min_gf_interval); +} + +void SimpleEncode::EncodeFrame(EncodeFrameResult *encode_frame_result) { + VP9_COMP *cpi = impl_ptr_->cpi; + struct lookahead_ctx *lookahead = cpi->lookahead; + int use_highbitdepth = 0; +#if CONFIG_VP9_HIGHBITDEPTH + use_highbitdepth = cpi->common.use_highbitdepth; +#endif + // The lookahead's size is set to oxcf->lag_in_frames. + // We want to fill lookahead to it's max capacity if possible so that the + // encoder can construct alt ref frame in time. + // In the other words, we hope vp9_get_compressed_data to encode a frame + // every time in the function + while (!vp9_lookahead_full(lookahead)) { + // TODO(angiebird): Check whether we can move this file read logics to + // lookahead + if (img_read(&impl_ptr_->tmp_img, file_)) { + int next_show_idx = vp9_lookahead_next_show_idx(lookahead); + int64_t ts_start = + timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts, next_show_idx); + int64_t ts_end = timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts, + next_show_idx + 1); + YV12_BUFFER_CONFIG sd; + image2yuvconfig(&impl_ptr_->tmp_img, &sd); + vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0); + } else { + break; + } + } + assert(encode_frame_result->coding_data.get() == nullptr); + const size_t max_coding_data_byte_size = frame_width_ * frame_height_ * 3; + encode_frame_result->coding_data = std::move( + std::unique_ptr(new uint8_t[max_coding_data_byte_size])); + int64_t time_stamp; + int64_t time_end; + int flush = 1; // Make vp9_get_compressed_data encode a frame + unsigned int frame_flags = 0; + ENCODE_FRAME_RESULT encode_frame_info; + vp9_get_compressed_data(cpi, &frame_flags, + &encode_frame_result->coding_data_byte_size, + encode_frame_result->coding_data.get(), &time_stamp, + &time_end, flush, &encode_frame_info); + // vp9_get_compressed_data is expected to encode a frame every time, so the + // data size should be greater than zero. + assert(encode_frame_result->coding_data_byte_size > 0); + assert(encode_frame_result->coding_data_byte_size < + max_coding_data_byte_size); + + update_encode_frame_result(encode_frame_result, &encode_frame_info); +} + +void SimpleEncode::EncodeFrameWithQuantizeIndex( + EncodeFrameResult *encode_frame_result, int quantize_index) { + encode_command_set_external_quantize_index(&impl_ptr_->cpi->encode_command, + quantize_index); + EncodeFrame(encode_frame_result); + encode_command_reset_external_quantize_index(&impl_ptr_->cpi->encode_command); +} + +int SimpleEncode::GetCodingFrameNum() const { + assert(impl_ptr_->first_pass_stats.size() - 1 > 0); + // These are the default settings for now. + const int multi_layer_arf = 0; + const int allow_alt_ref = 1; + vpx_rational_t frame_rate = + make_vpx_rational(frame_rate_num_, frame_rate_den_); + const VP9EncoderConfig oxcf = + vp9_get_encoder_config(frame_width_, frame_height_, frame_rate, + target_bitrate_, VPX_RC_LAST_PASS); + FRAME_INFO frame_info = vp9_get_frame_info(&oxcf); + FIRST_PASS_INFO first_pass_info; + fps_init_first_pass_info(&first_pass_info, impl_ptr_->first_pass_stats.data(), + num_frames_); + return vp9_get_coding_frame_num(&oxcf, &frame_info, &first_pass_info, + multi_layer_arf, allow_alt_ref); +} + +SimpleEncode::~SimpleEncode() { + if (this->file_ != NULL) { + fclose(this->file_); + } +} + +} // namespace vp9 diff --git a/media/libvpx/libvpx/vp9/simple_encode.h b/media/libvpx/libvpx/vp9/simple_encode.h new file mode 100644 index 000000000000..471b4e7a84da --- /dev/null +++ b/media/libvpx/libvpx/vp9/simple_encode.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VP9_SIMPLE_ENCODE_H_ +#define VPX_VP9_SIMPLE_ENCODE_H_ + +#include +#include +#include +#include +#include + +namespace vp9 { + +enum FrameType { + kKeyFrame = 0, + kInterFrame, + kAlternateReference, +}; + +struct EncodeFrameResult { + int show_idx; + FrameType frame_type; + size_t coding_data_bit_size; + size_t coding_data_byte_size; + // The EncodeFrame will allocate a buffer, write the coding data into the + // buffer and give the ownership of the buffer to coding_data. + std::unique_ptr coding_data; + double psnr; + uint64_t sse; + int quantize_index; +}; + +class SimpleEncode { + public: + SimpleEncode(int frame_width, int frame_height, int frame_rate_num, + int frame_rate_den, int target_bitrate, int num_frames, + const char *infile_path); + ~SimpleEncode(); + SimpleEncode(SimpleEncode &) = delete; + SimpleEncode &operator=(const SimpleEncode &) = delete; + + // Makes encoder compute the first pass stats and store it internally for + // future encode. + void ComputeFirstPassStats(); + + // Outputs the first pass stats represented by a 2-D vector. + // One can use the frame index at first dimension to retrieve the stats for + // each video frame. The stats of each video frame is a vector of 25 double + // values. For details, please check FIRSTPASS_STATS in vp9_firstpass.h + std::vector> ObserveFirstPassStats(); + + // Initializes the encoder for actual encoding. + // This function should be called after ComputeFirstPassStats(). + void StartEncode(); + + // Frees the encoder. + // This function should be called after StartEncode() or EncodeFrame(). + void EndEncode(); + + // Given a key_frame_index, computes this key frame group's size. + // The key frame group size includes one key frame plus the number of + // following inter frames. Note that the key frame group size only counts the + // show frames. The number of no show frames like alternate refereces are not + // counted. + int GetKeyFrameGroupSize(int key_frame_index) const; + + // Encodes a frame + // This function should be called after StartEncode() and before EndEncode(). + void EncodeFrame(EncodeFrameResult *encode_frame_result); + + // Encodes a frame with a specific quantize index. + // This function should be called after StartEncode() and before EndEncode(). + void EncodeFrameWithQuantizeIndex(EncodeFrameResult *encode_frame_result, + int quantize_index); + + // Gets the number of coding frames for the video. The coding frames include + // show frame and no show frame. + // This function should be called after ComputeFirstPassStats(). + int GetCodingFrameNum() const; + + private: + class EncodeImpl; + + int frame_width_; + int frame_height_; + int frame_rate_num_; + int frame_rate_den_; + int target_bitrate_; + int num_frames_; + std::FILE *file_; + std::unique_ptr impl_ptr_; +}; + +} // namespace vp9 + +#endif // VPX_VP9_SIMPLE_ENCODE_H_ diff --git a/media/libvpx/libvpx/vp9/vp9_common.mk b/media/libvpx/libvpx/vp9/vp9_common.mk index 5bfc0d3599fd..5ef2f891a82b 100644 --- a/media/libvpx/libvpx/vp9/vp9_common.mk +++ b/media/libvpx/libvpx/vp9/vp9_common.mk @@ -10,6 +10,7 @@ VP9_COMMON_SRCS-yes += vp9_common.mk VP9_COMMON_SRCS-yes += vp9_iface_common.h +VP9_COMMON_SRCS-yes += vp9_iface_common.c VP9_COMMON_SRCS-yes += common/vp9_ppflags.h VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c VP9_COMMON_SRCS-yes += common/vp9_blockd.c @@ -63,30 +64,36 @@ VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.h VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c + +ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) +VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c +VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c +VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c +endif # !CONFIG_VP9_HIGHBITDEPTH + +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c +VP9_COMMON_SRCS-$(HAVE_VSX) += common/ppc/vp9_idct_vsx.c +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht16x16_add_neon.c +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht_neon.h + ifeq ($(CONFIG_VP9_POSTPROC),yes) +VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_mfqe_sse2.asm endif ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) -VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans4_dspr2.c -VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans8_dspr2.c -VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans16_dspr2.c -endif - -# common (msa) -VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c -VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c -VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c - -ifeq ($(CONFIG_VP9_POSTPROC),yes) -VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c -endif - -VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c - -ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c +VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans4_dspr2.c +VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans8_dspr2.c +VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans16_dspr2.c +else +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_highbd_iht4x4_add_neon.c +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_highbd_iht8x8_add_neon.c +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_highbd_iht16x16_add_neon.c +VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht4x4_add_sse4.c +VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht8x8_add_sse4.c +VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht16x16_add_sse4.c endif $(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl)) diff --git a/media/libvpx/libvpx/vp9/vp9_cx_iface.c b/media/libvpx/libvpx/vp9/vp9_cx_iface.c index c13d6e1ba768..62fe1f8a465e 100644 --- a/media/libvpx/libvpx/vp9/vp9_cx_iface.c +++ b/media/libvpx/libvpx/vp9/vp9_cx_iface.c @@ -13,16 +13,22 @@ #include "./vpx_config.h" #include "vpx/vpx_encoder.h" +#include "vpx_dsp/psnr.h" #include "vpx_ports/vpx_once.h" #include "vpx_ports/system_state.h" +#include "vpx_util/vpx_timestamp.h" #include "vpx/internal/vpx_codec_internal.h" #include "./vpx_version.h" #include "vp9/encoder/vp9_encoder.h" #include "vpx/vp8cx.h" +#include "vp9/common/vp9_alloccommon.h" +#include "vp9/vp9_cx_iface.h" #include "vp9/encoder/vp9_firstpass.h" +#include "vp9/encoder/vp9_lookahead.h" +#include "vp9/vp9_cx_iface.h" #include "vp9/vp9_iface_common.h" -struct vp9_extracfg { +typedef struct vp9_extracfg { int cpu_used; // available cpu percentage in 1/16 unsigned int enable_auto_alt_ref; unsigned int noise_sensitivity; @@ -30,6 +36,7 @@ struct vp9_extracfg { unsigned int static_thresh; unsigned int tile_columns; unsigned int tile_rows; + unsigned int enable_tpl_model; unsigned int arnr_max_frames; unsigned int arnr_strength; unsigned int min_gf_interval; @@ -53,7 +60,7 @@ struct vp9_extracfg { int render_height; unsigned int row_mt; unsigned int motion_vector_unit_test; -}; +} vp9_extracfg; static struct vp9_extracfg default_extra_cfg = { 0, // cpu_used @@ -63,6 +70,7 @@ static struct vp9_extracfg default_extra_cfg = { 0, // static_thresh 6, // tile_columns 0, // tile_rows + 1, // enable_tpl_model 7, // arnr_max_frames 5, // arnr_strength 0, // min_gf_interval; 0 -> default decision @@ -92,6 +100,9 @@ struct vpx_codec_alg_priv { vpx_codec_priv_t base; vpx_codec_enc_cfg_t cfg; struct vp9_extracfg extra_cfg; + vpx_rational64_t timestamp_ratio; + vpx_codec_pts_t pts_offset; + unsigned char pts_offset_initialized; VP9EncoderConfig oxcf; VP9_COMP *cpi; unsigned char *cx_data; @@ -128,10 +139,10 @@ static vpx_codec_err_t update_error_state( return VPX_CODEC_INVALID_PARAM; \ } while (0) -#define RANGE_CHECK(p, memb, lo, hi) \ - do { \ - if (!(((p)->memb == lo || (p)->memb > (lo)) && (p)->memb <= hi)) \ - ERROR(#memb " out of range [" #lo ".." #hi "]"); \ +#define RANGE_CHECK(p, memb, lo, hi) \ + do { \ + if (!(((p)->memb == (lo) || (p)->memb > (lo)) && (p)->memb <= (hi))) \ + ERROR(#memb " out of range [" #lo ".." #hi "]"); \ } while (0) #define RANGE_CHECK_HI(p, memb, hi) \ @@ -149,6 +160,22 @@ static vpx_codec_err_t update_error_state( if (!!((p)->memb) != (p)->memb) ERROR(#memb " expected boolean"); \ } while (0) +#if defined(_MSC_VER) +#define COMPILE_TIME_ASSERT(boolexp) \ + do { \ + char compile_time_assert[(boolexp) ? 1 : -1]; \ + (void)compile_time_assert; \ + } while (0) +#else // !_MSC_VER +#define COMPILE_TIME_ASSERT(boolexp) \ + do { \ + struct { \ + unsigned int compile_time_assert : (boolexp) ? 1 : -1; \ + } compile_time_assert; \ + (void)compile_time_assert; \ + } while (0) +#endif // _MSC_VER + static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, const vpx_codec_enc_cfg_t *cfg, const struct vp9_extracfg *extra_cfg) { @@ -237,22 +264,6 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, ERROR("ts_rate_decimator factors are not powers of 2"); } -#if CONFIG_SPATIAL_SVC - - if ((cfg->ss_number_layers > 1 || cfg->ts_number_layers > 1) && - cfg->g_pass == VPX_RC_LAST_PASS) { - unsigned int i, alt_ref_sum = 0; - for (i = 0; i < cfg->ss_number_layers; ++i) { - if (cfg->ss_enable_auto_alt_ref[i]) ++alt_ref_sum; - } - if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers) - ERROR("Not enough ref buffers for svc alt ref frames"); - if (cfg->ss_number_layers * cfg->ts_number_layers > 3 && - cfg->g_error_resilient == 0) - ERROR("Multiple frame context are not supported for more than 3 layers"); - } -#endif - // VP9 does not support a lower bound on the keyframe interval in // automatic keyframe placement mode. if (cfg->kf_mode != VPX_KF_DISABLED && cfg->kf_min_dist != cfg->kf_max_dist && @@ -263,8 +274,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(extra_cfg, row_mt, 0, 1); RANGE_CHECK(extra_cfg, motion_vector_unit_test, 0, 2); - RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2); - RANGE_CHECK(extra_cfg, cpu_used, -8, 8); + RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, MAX_ARF_LAYERS); + RANGE_CHECK(extra_cfg, cpu_used, -9, 9); RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6); RANGE_CHECK(extra_cfg, tile_columns, 0, 6); RANGE_CHECK(extra_cfg, tile_rows, 0, 2); @@ -277,10 +288,6 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(extra_cfg, content, VP9E_CONTENT_DEFAULT, VP9E_CONTENT_INVALID - 1); - // TODO(yaowu): remove this when ssim tuning is implemented for vp9 - if (extra_cfg->tuning == VP8_TUNE_SSIM) - ERROR("Option --tune=ssim is not currently supported in VP9."); - #if !CONFIG_REALTIME_ONLY if (cfg->g_pass == VPX_RC_LAST_PASS) { const size_t packet_sz = sizeof(FIRSTPASS_STATS); @@ -464,6 +471,15 @@ static void config_target_level(VP9EncoderConfig *oxcf) { } } +static vpx_rational64_t get_g_timebase_in_ts(vpx_rational_t g_timebase) { + vpx_rational64_t g_timebase_in_ts; + g_timebase_in_ts.den = g_timebase.den; + g_timebase_in_ts.num = g_timebase.num; + g_timebase_in_ts.num *= TICKS_PER_SEC; + reduce_ratio(&g_timebase_in_ts); + return g_timebase_in_ts; +} + static vpx_codec_err_t set_encoder_config( VP9EncoderConfig *oxcf, const vpx_codec_enc_cfg_t *cfg, const struct vp9_extracfg *extra_cfg) { @@ -475,9 +491,13 @@ static vpx_codec_err_t set_encoder_config( oxcf->height = cfg->g_h; oxcf->bit_depth = cfg->g_bit_depth; oxcf->input_bit_depth = cfg->g_input_bit_depth; + // TODO(angiebird): Figure out if we can just use g_timebase to indicate the + // inverse of framerate // guess a frame rate if out of whack, use 30 oxcf->init_framerate = (double)cfg->g_timebase.den / cfg->g_timebase.num; if (oxcf->init_framerate > 180) oxcf->init_framerate = 30; + oxcf->g_timebase = cfg->g_timebase; + oxcf->g_timebase_in_ts = get_g_timebase_in_ts(oxcf->g_timebase); oxcf->mode = GOOD; @@ -537,10 +557,16 @@ static vpx_codec_err_t set_encoder_config( oxcf->speed = abs(extra_cfg->cpu_used); oxcf->encode_breakout = extra_cfg->static_thresh; oxcf->enable_auto_arf = extra_cfg->enable_auto_alt_ref; - oxcf->noise_sensitivity = extra_cfg->noise_sensitivity; + if (oxcf->bit_depth == VPX_BITS_8) { + oxcf->noise_sensitivity = extra_cfg->noise_sensitivity; + } else { + // Disable denoiser for high bitdepth since vp9_denoiser_filter only works + // for 8 bits. + oxcf->noise_sensitivity = 0; + } oxcf->sharpness = extra_cfg->sharpness; - oxcf->two_pass_stats_in = cfg->rc_twopass_stats_in; + vp9_set_first_pass_stats(oxcf, &cfg->rc_twopass_stats_in); #if CONFIG_FP_MB_STATS oxcf->firstpass_mb_stats_in = cfg->rc_firstpass_mb_stats_in; @@ -560,6 +586,8 @@ static vpx_codec_err_t set_encoder_config( oxcf->tile_columns = extra_cfg->tile_columns; + oxcf->enable_tpl_model = extra_cfg->enable_tpl_model; + // TODO(yunqing): The dependencies between row tiles cause error in multi- // threaded encoding. For now, tile_rows is forced to be 0 in this case. // The further fix can be done by adding synchronizations after a tile row @@ -589,9 +617,6 @@ static vpx_codec_err_t set_encoder_config( oxcf->motion_vector_unit_test = extra_cfg->motion_vector_unit_test; for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { -#if CONFIG_SPATIAL_SVC - oxcf->ss_enable_auto_arf[sl] = cfg->ss_enable_auto_alt_ref[sl]; -#endif for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { oxcf->layer_target_bitrate[sl * oxcf->ts_number_layers + tl] = 1000 * cfg->layer_target_bitrate[sl * oxcf->ts_number_layers + tl]; @@ -599,9 +624,6 @@ static vpx_codec_err_t set_encoder_config( } if (oxcf->ss_number_layers == 1 && oxcf->pass != 0) { oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth; -#if CONFIG_SPATIAL_SVC - oxcf->ss_enable_auto_arf[0] = extra_cfg->enable_auto_alt_ref; -#endif } if (oxcf->ts_number_layers > 1) { for (tl = 0; tl < VPX_TS_MAX_LAYERS; ++tl) { @@ -613,40 +635,7 @@ static vpx_codec_err_t set_encoder_config( } if (get_level_index(oxcf->target_level) >= 0) config_target_level(oxcf); - /* - printf("Current VP9 Settings: \n"); - printf("target_bandwidth: %d\n", oxcf->target_bandwidth); - printf("target_level: %d\n", oxcf->target_level); - printf("noise_sensitivity: %d\n", oxcf->noise_sensitivity); - printf("sharpness: %d\n", oxcf->sharpness); - printf("cpu_used: %d\n", oxcf->cpu_used); - printf("Mode: %d\n", oxcf->mode); - printf("auto_key: %d\n", oxcf->auto_key); - printf("key_freq: %d\n", oxcf->key_freq); - printf("end_usage: %d\n", oxcf->end_usage); - printf("under_shoot_pct: %d\n", oxcf->under_shoot_pct); - printf("over_shoot_pct: %d\n", oxcf->over_shoot_pct); - printf("starting_buffer_level: %d\n", oxcf->starting_buffer_level); - printf("optimal_buffer_level: %d\n", oxcf->optimal_buffer_level); - printf("maximum_buffer_size: %d\n", oxcf->maximum_buffer_size); - printf("fixed_q: %d\n", oxcf->fixed_q); - printf("worst_allowed_q: %d\n", oxcf->worst_allowed_q); - printf("best_allowed_q: %d\n", oxcf->best_allowed_q); - printf("allow_spatial_resampling: %d\n", oxcf->allow_spatial_resampling); - printf("scaled_frame_width: %d\n", oxcf->scaled_frame_width); - printf("scaled_frame_height: %d\n", oxcf->scaled_frame_height); - printf("two_pass_vbrbias: %d\n", oxcf->two_pass_vbrbias); - printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section); - printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section); - printf("vbr_corpus_complexity: %d\n", oxcf->vbr_corpus_complexity); - printf("lag_in_frames: %d\n", oxcf->lag_in_frames); - printf("enable_auto_arf: %d\n", oxcf->enable_auto_arf); - printf("Version: %d\n", oxcf->Version); - printf("encode_breakout: %d\n", oxcf->encode_breakout); - printf("error resilient: %d\n", oxcf->error_resilient_mode); - printf("frame parallel detokenization: %d\n", - oxcf->frame_parallel_decoding_mode); - */ + // vp9_dump_encoder_config(oxcf); return VPX_CODEC_OK; } @@ -716,7 +705,10 @@ static vpx_codec_err_t update_extra_cfg(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t ctrl_set_cpuused(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; + // Use fastest speed setting (speed 9 or -9) if it's set beyond the range. extra_cfg.cpu_used = CAST(VP8E_SET_CPUUSED, args); + extra_cfg.cpu_used = VPXMIN(9, extra_cfg.cpu_used); + extra_cfg.cpu_used = VPXMAX(-9, extra_cfg.cpu_used); return update_extra_cfg(ctx, &extra_cfg); } @@ -762,6 +754,13 @@ static vpx_codec_err_t ctrl_set_tile_rows(vpx_codec_alg_priv_t *ctx, return update_extra_cfg(ctx, &extra_cfg); } +static vpx_codec_err_t ctrl_set_tpl_model(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.enable_tpl_model = CAST(VP9E_SET_TPL, args); + return update_extra_cfg(ctx, &extra_cfg); +} + static vpx_codec_err_t ctrl_set_arnr_max_frames(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; @@ -809,7 +808,7 @@ static vpx_codec_err_t ctrl_set_rc_max_inter_bitrate_pct( vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.rc_max_inter_bitrate_pct = - CAST(VP8E_SET_MAX_INTER_BITRATE_PCT, args); + CAST(VP9E_SET_MAX_INTER_BITRATE_PCT, args); return update_extra_cfg(ctx, &extra_cfg); } @@ -926,16 +925,18 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx, res = validate_config(priv, &priv->cfg, &priv->extra_cfg); if (res == VPX_CODEC_OK) { + priv->pts_offset_initialized = 0; + // TODO(angiebird): Replace priv->timestamp_ratio by + // oxcf->g_timebase_in_ts + priv->timestamp_ratio = get_g_timebase_in_ts(priv->cfg.g_timebase); + set_encoder_config(&priv->oxcf, &priv->cfg, &priv->extra_cfg); #if CONFIG_VP9_HIGHBITDEPTH priv->oxcf.use_highbitdepth = (ctx->init_flags & VPX_CODEC_USE_HIGHBITDEPTH) ? 1 : 0; #endif priv->cpi = vp9_create_compressor(&priv->oxcf, priv->buffer_pool); - if (priv->cpi == NULL) - res = VPX_CODEC_MEM_ERROR; - else - priv->cpi->output_pkt_list = &priv->pkt_list.head; + if (priv->cpi == NULL) res = VPX_CODEC_MEM_ERROR; } } @@ -962,12 +963,14 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx, switch (ctx->cfg.g_pass) { case VPX_RC_ONE_PASS: if (deadline > 0) { - const vpx_codec_enc_cfg_t *const cfg = &ctx->cfg; - // Convert duration parameter from stream timebase to microseconds. - const uint64_t duration_us = (uint64_t)duration * 1000000 * - (uint64_t)cfg->g_timebase.num / - (uint64_t)cfg->g_timebase.den; + uint64_t duration_us; + + COMPILE_TIME_ASSERT(TICKS_PER_SEC > 1000000 && + (TICKS_PER_SEC % 1000000) == 0); + + duration_us = duration * (uint64_t)ctx->timestamp_ratio.num / + (ctx->timestamp_ratio.den * (TICKS_PER_SEC / 1000000)); // If the deadline is more that the duration this frame is to be shown, // use good quality mode. Otherwise use realtime mode. @@ -1051,28 +1054,16 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { return index_sz; } -static int64_t timebase_units_to_ticks(const vpx_rational_t *timebase, - int64_t n) { - return n * TICKS_PER_SEC * timebase->num / timebase->den; -} - -static int64_t ticks_to_timebase_units(const vpx_rational_t *timebase, - int64_t n) { - const int64_t round = (int64_t)TICKS_PER_SEC * timebase->num / 2 - 1; - return (n * timebase->den + round) / timebase->num / TICKS_PER_SEC; -} - static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi, unsigned int lib_flags) { vpx_codec_frame_flags_t flags = lib_flags << 16; if (lib_flags & FRAMEFLAGS_KEY || - (cpi->use_svc && - cpi->svc - .layer_context[cpi->svc.spatial_layer_id * - cpi->svc.number_temporal_layers + - cpi->svc.temporal_layer_id] - .is_key_frame)) + (cpi->use_svc && cpi->svc + .layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers + + cpi->svc.temporal_layer_id] + .is_key_frame)) flags |= VPX_FRAME_IS_KEY; if (cpi->droppable) flags |= VPX_FRAME_IS_DROPPABLE; @@ -1080,50 +1071,52 @@ static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi, return flags; } +static INLINE vpx_codec_cx_pkt_t get_psnr_pkt(const PSNR_STATS *psnr) { + vpx_codec_cx_pkt_t pkt; + pkt.kind = VPX_CODEC_PSNR_PKT; + pkt.data.psnr = *psnr; + return pkt; +} + +#if !CONFIG_REALTIME_ONLY +static INLINE vpx_codec_cx_pkt_t +get_first_pass_stats_pkt(FIRSTPASS_STATS *stats) { + // WARNNING: This function assumes that stats will + // exist and not be changed until the packet is processed + // TODO(angiebird): Refactor the code to avoid using the assumption. + vpx_codec_cx_pkt_t pkt; + pkt.kind = VPX_CODEC_STATS_PKT; + pkt.data.twopass_stats.buf = stats; + pkt.data.twopass_stats.sz = sizeof(*stats); + return pkt; +} +#endif + const size_t kMinCompressedSize = 8192; static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, const vpx_image_t *img, - vpx_codec_pts_t pts, + vpx_codec_pts_t pts_val, unsigned long duration, vpx_enc_frame_flags_t enc_flags, unsigned long deadline) { volatile vpx_codec_err_t res = VPX_CODEC_OK; volatile vpx_enc_frame_flags_t flags = enc_flags; + volatile vpx_codec_pts_t pts = pts_val; VP9_COMP *const cpi = ctx->cpi; - const vpx_rational_t *const timebase = &ctx->cfg.g_timebase; + const vpx_rational64_t *const timestamp_ratio = &ctx->timestamp_ratio; size_t data_sz; + vpx_codec_cx_pkt_t pkt; + memset(&pkt, 0, sizeof(pkt)); if (cpi == NULL) return VPX_CODEC_INVALID_PARAM; - if (cpi->oxcf.pass == 2 && cpi->level_constraint.level_index >= 0 && - !cpi->level_constraint.rc_config_updated) { - SVC *const svc = &cpi->svc; - const int is_two_pass_svc = - (svc->number_spatial_layers > 1) || (svc->number_temporal_layers > 1); - const VP9EncoderConfig *const oxcf = &cpi->oxcf; - TWO_PASS *const twopass = &cpi->twopass; - FIRSTPASS_STATS *stats = &twopass->total_stats; - if (is_two_pass_svc) { - const double frame_rate = 10000000.0 * stats->count / stats->duration; - vp9_update_spatial_layer_framerate(cpi, frame_rate); - twopass->bits_left = - (int64_t)(stats->duration * - svc->layer_context[svc->spatial_layer_id].target_bandwidth / - 10000000.0); - } else { - twopass->bits_left = - (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0); - } - cpi->level_constraint.rc_config_updated = 1; - } - if (img != NULL) { res = validate_img(ctx, img); if (res == VPX_CODEC_OK) { // There's no codec control for multiple alt-refs so check the encoder // instance for its status to determine the compressed data size. data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img) / 8 * - (cpi->multi_arf_allowed ? 8 : 2); + (cpi->multi_layer_arf ? 8 : 2); if (data_sz < kMinCompressedSize) data_sz = kMinCompressedSize; if (ctx->cx_data == NULL || ctx->cx_data_sz < data_sz) { ctx->cx_data_sz = data_sz; @@ -1136,6 +1129,12 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, } } + if (!ctx->pts_offset_initialized) { + ctx->pts_offset = pts; + ctx->pts_offset_initialized = 1; + } + pts -= ctx->pts_offset; + pick_quickcompress_mode(ctx, duration, deadline); vpx_codec_pkt_list_init(&ctx->pkt_list); @@ -1168,12 +1167,15 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, if (res == VPX_CODEC_OK) { unsigned int lib_flags = 0; YV12_BUFFER_CONFIG sd; - int64_t dst_time_stamp = timebase_units_to_ticks(timebase, pts); + int64_t dst_time_stamp = timebase_units_to_ticks(timestamp_ratio, pts); int64_t dst_end_time_stamp = - timebase_units_to_ticks(timebase, pts + duration); + timebase_units_to_ticks(timestamp_ratio, pts + duration); size_t size, cx_data_sz; unsigned char *cx_data; + cpi->svc.timebase_fac = timebase_units_to_ticks(timestamp_ratio, 1); + cpi->svc.time_stamp_superframe = dst_time_stamp; + // Set up internal flags if (ctx->base.init_flags & VPX_CODEC_USE_PSNR) cpi->b_calculate_psnr = 1; @@ -1218,110 +1220,135 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, } } - while (cx_data_sz >= ctx->cx_data_sz / 2 && - -1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data, - &dst_time_stamp, &dst_end_time_stamp, - !img)) { - if (size) { - vpx_codec_cx_pkt_t pkt; + if (cpi->oxcf.pass == 1 && !cpi->use_svc) { +#if !CONFIG_REALTIME_ONLY + // compute first pass stats + if (img) { + int ret; + ENCODE_FRAME_RESULT encode_frame_result; + vpx_codec_cx_pkt_t fps_pkt; + // TODO(angiebird): Call vp9_first_pass directly + ret = vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data, + &dst_time_stamp, &dst_end_time_stamp, + !img, &encode_frame_result); + assert(size == 0); // There is no compressed data in the first pass + (void)ret; + assert(ret == 0); + fps_pkt = get_first_pass_stats_pkt(&cpi->twopass.this_frame_stats); + vpx_codec_pkt_list_add(&ctx->pkt_list.head, &fps_pkt); + } else { + if (!cpi->twopass.first_pass_done) { + vpx_codec_cx_pkt_t fps_pkt; + vp9_end_first_pass(cpi); + fps_pkt = get_first_pass_stats_pkt(&cpi->twopass.total_stats); + vpx_codec_pkt_list_add(&ctx->pkt_list.head, &fps_pkt); + } + } +#else // !CONFIG_REALTIME_ONLY + assert(0); +#endif // !CONFIG_REALTIME_ONLY + } else { + ENCODE_FRAME_RESULT encode_frame_result; + while (cx_data_sz >= ctx->cx_data_sz / 2 && + -1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data, + &dst_time_stamp, &dst_end_time_stamp, + !img, &encode_frame_result)) { + // Pack psnr pkt + if (size > 0 && !cpi->use_svc) { + // TODO(angiebird): Figure out while we don't need psnr pkt when + // use_svc is on + PSNR_STATS psnr; + if (vp9_get_psnr(cpi, &psnr)) { + vpx_codec_cx_pkt_t psnr_pkt = get_psnr_pkt(&psnr); + vpx_codec_pkt_list_add(&ctx->pkt_list.head, &psnr_pkt); + } + } -#if CONFIG_SPATIAL_SVC - if (cpi->use_svc) - cpi->svc - .layer_context[cpi->svc.spatial_layer_id * - cpi->svc.number_temporal_layers] - .layer_size += size; -#endif + if (size || (cpi->use_svc && cpi->svc.skip_enhancement_layer)) { + // Pack invisible frames with the next visible frame + if (!cpi->common.show_frame || + (cpi->use_svc && cpi->svc.spatial_layer_id < + cpi->svc.number_spatial_layers - 1)) { + if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data; + ctx->pending_cx_data_sz += size; + if (size) + ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; + ctx->pending_frame_magnitude |= size; + cx_data += size; + cx_data_sz -= size; + pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width; + pkt.data.frame.height[cpi->svc.spatial_layer_id] = + cpi->common.height; + pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] = + 1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id]; - // Pack invisible frames with the next visible frame - if (!cpi->common.show_frame || - (cpi->use_svc && - cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)) { - if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data; - ctx->pending_cx_data_sz += size; - ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; - ctx->pending_frame_magnitude |= size; - cx_data += size; - cx_data_sz -= size; + if (ctx->output_cx_pkt_cb.output_cx_pkt) { + pkt.kind = VPX_CODEC_CX_FRAME_PKT; + pkt.data.frame.pts = + ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) + + ctx->pts_offset; + pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units( + timestamp_ratio, dst_end_time_stamp - dst_time_stamp); + pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags); + pkt.data.frame.buf = ctx->pending_cx_data; + pkt.data.frame.sz = size; + ctx->pending_cx_data = NULL; + ctx->pending_cx_data_sz = 0; + ctx->pending_frame_count = 0; + ctx->pending_frame_magnitude = 0; + ctx->output_cx_pkt_cb.output_cx_pkt( + &pkt, ctx->output_cx_pkt_cb.user_priv); + } + continue; + } - if (ctx->output_cx_pkt_cb.output_cx_pkt) { - pkt.kind = VPX_CODEC_CX_FRAME_PKT; - pkt.data.frame.pts = - ticks_to_timebase_units(timebase, dst_time_stamp); - pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units( - timebase, dst_end_time_stamp - dst_time_stamp); - pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags); + // Add the frame packet to the list of returned packets. + pkt.kind = VPX_CODEC_CX_FRAME_PKT; + pkt.data.frame.pts = + ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) + + ctx->pts_offset; + pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units( + timestamp_ratio, dst_end_time_stamp - dst_time_stamp); + pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags); + pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width; + pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height; + pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] = + 1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id]; + + if (ctx->pending_cx_data) { + if (size) + ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; + ctx->pending_frame_magnitude |= size; + ctx->pending_cx_data_sz += size; + // write the superframe only for the case when + if (!ctx->output_cx_pkt_cb.output_cx_pkt) + size += write_superframe_index(ctx); pkt.data.frame.buf = ctx->pending_cx_data; - pkt.data.frame.sz = size; + pkt.data.frame.sz = ctx->pending_cx_data_sz; ctx->pending_cx_data = NULL; ctx->pending_cx_data_sz = 0; ctx->pending_frame_count = 0; ctx->pending_frame_magnitude = 0; + } else { + pkt.data.frame.buf = cx_data; + pkt.data.frame.sz = size; + } + pkt.data.frame.partition_id = -1; + + if (ctx->output_cx_pkt_cb.output_cx_pkt) ctx->output_cx_pkt_cb.output_cx_pkt( &pkt, ctx->output_cx_pkt_cb.user_priv); + else + vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); + + cx_data += size; + cx_data_sz -= size; + if (is_one_pass_cbr_svc(cpi) && + (cpi->svc.spatial_layer_id == + cpi->svc.number_spatial_layers - 1)) { + // Encoded all spatial layers; exit loop. + break; } - continue; - } - - // Add the frame packet to the list of returned packets. - pkt.kind = VPX_CODEC_CX_FRAME_PKT; - pkt.data.frame.pts = ticks_to_timebase_units(timebase, dst_time_stamp); - pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units( - timebase, dst_end_time_stamp - dst_time_stamp); - pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags); - - if (ctx->pending_cx_data) { - ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; - ctx->pending_frame_magnitude |= size; - ctx->pending_cx_data_sz += size; - // write the superframe only for the case when - if (!ctx->output_cx_pkt_cb.output_cx_pkt) - size += write_superframe_index(ctx); - pkt.data.frame.buf = ctx->pending_cx_data; - pkt.data.frame.sz = ctx->pending_cx_data_sz; - ctx->pending_cx_data = NULL; - ctx->pending_cx_data_sz = 0; - ctx->pending_frame_count = 0; - ctx->pending_frame_magnitude = 0; - } else { - pkt.data.frame.buf = cx_data; - pkt.data.frame.sz = size; - } - pkt.data.frame.partition_id = -1; - - if (ctx->output_cx_pkt_cb.output_cx_pkt) - ctx->output_cx_pkt_cb.output_cx_pkt(&pkt, - ctx->output_cx_pkt_cb.user_priv); - else - vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); - - cx_data += size; - cx_data_sz -= size; -#if CONFIG_SPATIAL_SVC && defined(VPX_TEST_SPATIAL_SVC) - if (cpi->use_svc && !ctx->output_cx_pkt_cb.output_cx_pkt) { - vpx_codec_cx_pkt_t pkt_sizes, pkt_psnr; - int sl; - vp9_zero(pkt_sizes); - vp9_zero(pkt_psnr); - pkt_sizes.kind = VPX_CODEC_SPATIAL_SVC_LAYER_SIZES; - pkt_psnr.kind = VPX_CODEC_SPATIAL_SVC_LAYER_PSNR; - for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { - LAYER_CONTEXT *lc = - &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers]; - pkt_sizes.data.layer_sizes[sl] = lc->layer_size; - pkt_psnr.data.layer_psnr[sl] = lc->psnr_pkt; - lc->layer_size = 0; - } - - vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt_sizes); - - vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt_psnr); - } -#endif - if (is_one_pass_cbr_svc(cpi) && - (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) { - // Encoded all spatial layers; exit loop. - break; } } } @@ -1347,9 +1374,8 @@ static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, vp9_set_reference_enc(ctx->cpi, ref_frame_to_vp9_reframe(frame->frame_type), &sd); return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; } + return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, @@ -1363,9 +1389,8 @@ static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, vp9_copy_reference_enc(ctx->cpi, ref_frame_to_vp9_reframe(frame->frame_type), &sd); return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; } + return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, @@ -1373,14 +1398,13 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, vp9_ref_frame_t *const frame = va_arg(args, vp9_ref_frame_t *); if (frame != NULL) { - YV12_BUFFER_CONFIG *fb = get_ref_frame(&ctx->cpi->common, frame->idx); + const int fb_idx = ctx->cpi->common.cur_show_frame_fb_idx; + YV12_BUFFER_CONFIG *fb = get_buf_frame(&ctx->cpi->common, fb_idx); if (fb == NULL) return VPX_CODEC_ERROR; - yuvconfig2image(&frame->img, fb, NULL); return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; } + return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx, @@ -1390,9 +1414,8 @@ static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx, if (config != NULL) { ctx->preview_ppcfg = *config; return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; } + return VPX_CODEC_INVALID_PARAM; #else (void)ctx; (void)args; @@ -1414,17 +1437,24 @@ static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) { if (vp9_get_preview_raw_frame(ctx->cpi, &sd, &flags) == 0) { yuvconfig2image(&ctx->preview_img, &sd, NULL); return &ctx->preview_img; - } else { - return NULL; } + return NULL; } static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx, va_list args) { - (void)ctx; - (void)args; + vpx_roi_map_t *data = va_arg(args, vpx_roi_map_t *); - // TODO(yaowu): Need to re-implement and test for VP9. + if (data) { + vpx_roi_map_t *roi = (vpx_roi_map_t *)data; + + if (!vp9_set_roi_map(ctx->cpi, roi->roi_map, roi->rows, roi->cols, + roi->delta_q, roi->delta_lf, roi->skip, + roi->ref_frame)) { + return VPX_CODEC_OK; + } + return VPX_CODEC_INVALID_PARAM; + } return VPX_CODEC_INVALID_PARAM; } @@ -1436,11 +1466,10 @@ static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx, if (!vp9_set_active_map(ctx->cpi, map->active_map, (int)map->rows, (int)map->cols)) return VPX_CODEC_OK; - else - return VPX_CODEC_INVALID_PARAM; - } else { + return VPX_CODEC_INVALID_PARAM; } + return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_get_active_map(vpx_codec_alg_priv_t *ctx, @@ -1451,11 +1480,10 @@ static vpx_codec_err_t ctrl_get_active_map(vpx_codec_alg_priv_t *ctx, if (!vp9_get_active_map(ctx->cpi, map->active_map, (int)map->rows, (int)map->cols)) return VPX_CODEC_OK; - else - return VPX_CODEC_INVALID_PARAM; - } else { + return VPX_CODEC_INVALID_PARAM; } + return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx, @@ -1467,9 +1495,8 @@ static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx, vp9_set_internal_size(ctx->cpi, (VPX_SCALING)mode->h_scaling_mode, (VPX_SCALING)mode->v_scaling_mode); return (res == 0) ? VPX_CODEC_OK : VPX_CODEC_INVALID_PARAM; - } else { - return VPX_CODEC_INVALID_PARAM; } + return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, va_list args) { @@ -1500,22 +1527,23 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, vpx_svc_layer_id_t *const data = va_arg(args, vpx_svc_layer_id_t *); VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; SVC *const svc = &cpi->svc; + int sl; - svc->first_spatial_layer_to_encode = data->spatial_layer_id; svc->spatial_layer_to_encode = data->spatial_layer_id; + svc->first_spatial_layer_to_encode = data->spatial_layer_id; + // TODO(jianj): Deprecated to be removed. svc->temporal_layer_id = data->temporal_layer_id; + // Allow for setting temporal layer per spatial layer for superframe. + for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { + svc->temporal_layer_id_per_spatial[sl] = + data->temporal_layer_id_per_spatial[sl]; + } // Checks on valid layer_id input. if (svc->temporal_layer_id < 0 || svc->temporal_layer_id >= (int)ctx->cfg.ts_number_layers) { return VPX_CODEC_INVALID_PARAM; } - if (svc->first_spatial_layer_to_encode < 0 || - svc->first_spatial_layer_to_encode >= (int)ctx->cfg.ss_number_layers) { - return VPX_CODEC_INVALID_PARAM; - } - // First spatial layer to encode not implemented for two-pass. - if (is_two_pass_svc(cpi) && svc->first_spatial_layer_to_encode > 0) - return VPX_CODEC_INVALID_PARAM; + return VPX_CODEC_OK; } @@ -1555,20 +1583,87 @@ static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } +static vpx_codec_err_t ctrl_get_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx, + va_list args) { + VP9_COMP *const cpi = ctx->cpi; + vpx_svc_ref_frame_config_t *data = va_arg(args, vpx_svc_ref_frame_config_t *); + int sl; + for (sl = 0; sl <= cpi->svc.spatial_layer_id; sl++) { + data->update_buffer_slot[sl] = cpi->svc.update_buffer_slot[sl]; + data->reference_last[sl] = cpi->svc.reference_last[sl]; + data->reference_golden[sl] = cpi->svc.reference_golden[sl]; + data->reference_alt_ref[sl] = cpi->svc.reference_altref[sl]; + data->lst_fb_idx[sl] = cpi->svc.lst_fb_idx[sl]; + data->gld_fb_idx[sl] = cpi->svc.gld_fb_idx[sl]; + data->alt_fb_idx[sl] = cpi->svc.alt_fb_idx[sl]; + // TODO(jianj): Remove these 3, deprecated. + data->update_last[sl] = cpi->svc.update_last[sl]; + data->update_golden[sl] = cpi->svc.update_golden[sl]; + data->update_alt_ref[sl] = cpi->svc.update_altref[sl]; + } + return VPX_CODEC_OK; +} + static vpx_codec_err_t ctrl_set_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx, va_list args) { VP9_COMP *const cpi = ctx->cpi; vpx_svc_ref_frame_config_t *data = va_arg(args, vpx_svc_ref_frame_config_t *); int sl; + cpi->svc.use_set_ref_frame_config = 1; for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { - cpi->svc.ext_frame_flags[sl] = data->frame_flags[sl]; - cpi->svc.ext_lst_fb_idx[sl] = data->lst_fb_idx[sl]; - cpi->svc.ext_gld_fb_idx[sl] = data->gld_fb_idx[sl]; - cpi->svc.ext_alt_fb_idx[sl] = data->alt_fb_idx[sl]; + cpi->svc.update_buffer_slot[sl] = data->update_buffer_slot[sl]; + cpi->svc.reference_last[sl] = data->reference_last[sl]; + cpi->svc.reference_golden[sl] = data->reference_golden[sl]; + cpi->svc.reference_altref[sl] = data->reference_alt_ref[sl]; + cpi->svc.lst_fb_idx[sl] = data->lst_fb_idx[sl]; + cpi->svc.gld_fb_idx[sl] = data->gld_fb_idx[sl]; + cpi->svc.alt_fb_idx[sl] = data->alt_fb_idx[sl]; + cpi->svc.duration[sl] = data->duration[sl]; } return VPX_CODEC_OK; } +static vpx_codec_err_t ctrl_set_svc_inter_layer_pred(vpx_codec_alg_priv_t *ctx, + va_list args) { + const int data = va_arg(args, int); + VP9_COMP *const cpi = ctx->cpi; + cpi->svc.disable_inter_layer_pred = data; + return VPX_CODEC_OK; +} + +static vpx_codec_err_t ctrl_set_svc_frame_drop_layer(vpx_codec_alg_priv_t *ctx, + va_list args) { + VP9_COMP *const cpi = ctx->cpi; + vpx_svc_frame_drop_t *data = va_arg(args, vpx_svc_frame_drop_t *); + int sl; + cpi->svc.framedrop_mode = data->framedrop_mode; + for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) + cpi->svc.framedrop_thresh[sl] = data->framedrop_thresh[sl]; + // Don't allow max_consec_drop values below 1. + cpi->svc.max_consec_drop = VPXMAX(1, data->max_consec_drop); + return VPX_CODEC_OK; +} + +static vpx_codec_err_t ctrl_set_svc_gf_temporal_ref(vpx_codec_alg_priv_t *ctx, + va_list args) { + VP9_COMP *const cpi = ctx->cpi; + const unsigned int data = va_arg(args, unsigned int); + cpi->svc.use_gf_temporal_ref = data; + return VPX_CODEC_OK; +} + +static vpx_codec_err_t ctrl_set_svc_spatial_layer_sync( + vpx_codec_alg_priv_t *ctx, va_list args) { + VP9_COMP *const cpi = ctx->cpi; + vpx_svc_spatial_layer_sync_t *data = + va_arg(args, vpx_svc_spatial_layer_sync_t *); + int sl; + for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) + cpi->svc.spatial_layer_sync[sl] = data->spatial_layer_sync[sl]; + cpi->svc.set_intra_only_frame = data->base_layer_intra_only; + return VPX_CODEC_OK; +} + static vpx_codec_err_t ctrl_register_cx_callback(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_codec_priv_output_cx_pkt_cb_pair_t *cbp = @@ -1609,13 +1704,21 @@ static vpx_codec_err_t ctrl_set_render_size(vpx_codec_alg_priv_t *ctx, return update_extra_cfg(ctx, &extra_cfg); } +static vpx_codec_err_t ctrl_set_postencode_drop(vpx_codec_alg_priv_t *ctx, + va_list args) { + VP9_COMP *const cpi = ctx->cpi; + const unsigned int data = va_arg(args, unsigned int); + cpi->rc.ext_use_post_encode_drop = data; + return VPX_CODEC_OK; +} + static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { { VP8_COPY_REFERENCE, ctrl_copy_reference }, // Setters { VP8_SET_REFERENCE, ctrl_set_reference }, { VP8_SET_POSTPROC, ctrl_set_previewpp }, - { VP8E_SET_ROI_MAP, ctrl_set_roi_map }, + { VP9E_SET_ROI_MAP, ctrl_set_roi_map }, { VP8E_SET_ACTIVEMAP, ctrl_set_active_map }, { VP8E_SET_SCALEMODE, ctrl_set_scale_mode }, { VP8E_SET_CPUUSED, ctrl_set_cpuused }, @@ -1624,6 +1727,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { { VP8E_SET_STATIC_THRESHOLD, ctrl_set_static_thresh }, { VP9E_SET_TILE_COLUMNS, ctrl_set_tile_columns }, { VP9E_SET_TILE_ROWS, ctrl_set_tile_rows }, + { VP9E_SET_TPL, ctrl_set_tpl_model }, { VP8E_SET_ARNR_MAXFRAMES, ctrl_set_arnr_max_frames }, { VP8E_SET_ARNR_STRENGTH, ctrl_set_arnr_strength }, { VP8E_SET_ARNR_TYPE, ctrl_set_arnr_type }, @@ -1651,7 +1755,12 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { { VP9E_SET_RENDER_SIZE, ctrl_set_render_size }, { VP9E_SET_TARGET_LEVEL, ctrl_set_target_level }, { VP9E_SET_ROW_MT, ctrl_set_row_mt }, + { VP9E_SET_POSTENCODE_DROP, ctrl_set_postencode_drop }, { VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, ctrl_enable_motion_vector_unit_test }, + { VP9E_SET_SVC_INTER_LAYER_PRED, ctrl_set_svc_inter_layer_pred }, + { VP9E_SET_SVC_FRAME_DROP_LAYER, ctrl_set_svc_frame_drop_layer }, + { VP9E_SET_SVC_GF_TEMPORAL_REF, ctrl_set_svc_gf_temporal_ref }, + { VP9E_SET_SVC_SPATIAL_LAYER_SYNC, ctrl_set_svc_spatial_layer_sync }, // Getters { VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer }, @@ -1660,6 +1769,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { { VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id }, { VP9E_GET_ACTIVEMAP, ctrl_get_active_map }, { VP9E_GET_LEVEL, ctrl_get_level }, + { VP9E_GET_SVC_REF_FRAME_CONFIG, ctrl_get_svc_ref_frame_config }, { -1, NULL }, }; @@ -1668,7 +1778,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { { 0, { // NOLINT - 0, // g_usage + 0, // g_usage (unused) 8, // g_threads 0, // g_profile @@ -1695,7 +1805,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { VPX_VBR, // rc_end_usage { NULL, 0 }, // rc_twopass_stats_in { NULL, 0 }, // rc_firstpass_mb_stats_in - 256, // rc_target_bandwidth + 256, // rc_target_bitrate 0, // rc_min_quantizer 63, // rc_max_quantizer 25, // rc_undershoot_pct @@ -1761,3 +1871,222 @@ CODEC_INTERFACE(vpx_codec_vp9_cx) = { NULL // vpx_codec_enc_mr_get_mem_loc_fn_t } }; + +static vpx_codec_enc_cfg_t get_enc_cfg(int frame_width, int frame_height, + vpx_rational_t frame_rate, + int target_bitrate, + vpx_enc_pass enc_pass) { + vpx_codec_enc_cfg_t enc_cfg = encoder_usage_cfg_map[0].cfg; + enc_cfg.g_w = frame_width; + enc_cfg.g_h = frame_height; + enc_cfg.rc_target_bitrate = target_bitrate; + enc_cfg.g_pass = enc_pass; + // g_timebase is the inverse of frame_rate + enc_cfg.g_timebase.num = frame_rate.den; + enc_cfg.g_timebase.den = frame_rate.num; + return enc_cfg; +} + +static vp9_extracfg get_extra_cfg() { + vp9_extracfg extra_cfg = default_extra_cfg; + return extra_cfg; +} + +VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height, + vpx_rational_t frame_rate, + int target_bitrate, + vpx_enc_pass enc_pass) { + /* This function will generate the same VP9EncoderConfig used by the + * vpxenc command given below. + * The configs in the vpxenc command corresponds to parameters of + * vp9_get_encoder_config() as follows. + * + * WIDTH: frame_width + * HEIGHT: frame_height + * FPS: frame_rate + * BITRATE: target_bitrate + * + * INPUT, OUTPUT, LIMIT will not affect VP9EncoderConfig + * + * vpxenc command: + * INPUT=bus_cif.y4m + * OUTPUT=output.webm + * WIDTH=352 + * HEIGHT=288 + * BITRATE=600 + * FPS=30/1 + * LIMIT=150 + * ./vpxenc --limit=$LIMIT --width=$WIDTH --height=$HEIGHT --fps=$FPS + * --lag-in-frames=25 \ + * --codec=vp9 --good --cpu-used=0 --threads=0 --profile=0 \ + * --min-q=0 --max-q=63 --auto-alt-ref=1 --passes=2 --kf-max-dist=150 \ + * --kf-min-dist=0 --drop-frame=0 --static-thresh=0 --bias-pct=50 \ + * --minsection-pct=0 --maxsection-pct=150 --arnr-maxframes=7 --psnr \ + * --arnr-strength=5 --sharpness=0 --undershoot-pct=100 --overshoot-pct=100 \ + * --frame-parallel=0 --tile-columns=0 --cpu-used=0 --end-usage=vbr \ + * --target-bitrate=$BITRATE -o $OUTPUT $INPUT + */ + + VP9EncoderConfig oxcf; + vp9_extracfg extra_cfg = get_extra_cfg(); + vpx_codec_enc_cfg_t enc_cfg = get_enc_cfg( + frame_width, frame_height, frame_rate, target_bitrate, enc_pass); + set_encoder_config(&oxcf, &enc_cfg, &extra_cfg); + + // These settings are made to match the settings of the vpxenc command. + oxcf.key_freq = 150; + oxcf.under_shoot_pct = 100; + oxcf.over_shoot_pct = 100; + oxcf.max_threads = 0; + oxcf.tile_columns = 0; + oxcf.frame_parallel_decoding_mode = 0; + oxcf.two_pass_vbrmax_section = 150; + return oxcf; +} + +#define DUMP_STRUCT_VALUE(struct, value) \ + printf(#value " %" PRId64 "\n", (int64_t)(struct)->value) + +void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf) { + DUMP_STRUCT_VALUE(oxcf, profile); + DUMP_STRUCT_VALUE(oxcf, bit_depth); + DUMP_STRUCT_VALUE(oxcf, width); + DUMP_STRUCT_VALUE(oxcf, height); + DUMP_STRUCT_VALUE(oxcf, input_bit_depth); + DUMP_STRUCT_VALUE(oxcf, init_framerate); + // TODO(angiebird): dump g_timebase + // TODO(angiebird): dump g_timebase_in_ts + + DUMP_STRUCT_VALUE(oxcf, target_bandwidth); + + DUMP_STRUCT_VALUE(oxcf, noise_sensitivity); + DUMP_STRUCT_VALUE(oxcf, sharpness); + DUMP_STRUCT_VALUE(oxcf, speed); + DUMP_STRUCT_VALUE(oxcf, rc_max_intra_bitrate_pct); + DUMP_STRUCT_VALUE(oxcf, rc_max_inter_bitrate_pct); + DUMP_STRUCT_VALUE(oxcf, gf_cbr_boost_pct); + + DUMP_STRUCT_VALUE(oxcf, mode); + DUMP_STRUCT_VALUE(oxcf, pass); + + // Key Framing Operations + DUMP_STRUCT_VALUE(oxcf, auto_key); + DUMP_STRUCT_VALUE(oxcf, key_freq); + + DUMP_STRUCT_VALUE(oxcf, lag_in_frames); + + // ---------------------------------------------------------------- + // DATARATE CONTROL OPTIONS + + // vbr, cbr, constrained quality or constant quality + DUMP_STRUCT_VALUE(oxcf, rc_mode); + + // buffer targeting aggressiveness + DUMP_STRUCT_VALUE(oxcf, under_shoot_pct); + DUMP_STRUCT_VALUE(oxcf, over_shoot_pct); + + // buffering parameters + // TODO(angiebird): dump tarting_buffer_level_ms + // TODO(angiebird): dump ptimal_buffer_level_ms + // TODO(angiebird): dump maximum_buffer_size_ms + + // Frame drop threshold. + DUMP_STRUCT_VALUE(oxcf, drop_frames_water_mark); + + // controlling quality + DUMP_STRUCT_VALUE(oxcf, fixed_q); + DUMP_STRUCT_VALUE(oxcf, worst_allowed_q); + DUMP_STRUCT_VALUE(oxcf, best_allowed_q); + DUMP_STRUCT_VALUE(oxcf, cq_level); + DUMP_STRUCT_VALUE(oxcf, aq_mode); + + // Special handling of Adaptive Quantization for AltRef frames + DUMP_STRUCT_VALUE(oxcf, alt_ref_aq); + + // Internal frame size scaling. + DUMP_STRUCT_VALUE(oxcf, resize_mode); + DUMP_STRUCT_VALUE(oxcf, scaled_frame_width); + DUMP_STRUCT_VALUE(oxcf, scaled_frame_height); + + // Enable feature to reduce the frame quantization every x frames. + DUMP_STRUCT_VALUE(oxcf, frame_periodic_boost); + + // two pass datarate control + DUMP_STRUCT_VALUE(oxcf, two_pass_vbrbias); + DUMP_STRUCT_VALUE(oxcf, two_pass_vbrmin_section); + DUMP_STRUCT_VALUE(oxcf, two_pass_vbrmax_section); + DUMP_STRUCT_VALUE(oxcf, vbr_corpus_complexity); + // END DATARATE CONTROL OPTIONS + // ---------------------------------------------------------------- + + // Spatial and temporal scalability. + DUMP_STRUCT_VALUE(oxcf, ss_number_layers); + DUMP_STRUCT_VALUE(oxcf, ts_number_layers); + + // Bitrate allocation for spatial layers. + // TODO(angiebird): dump layer_target_bitrate[VPX_MAX_LAYERS] + // TODO(angiebird): dump ss_target_bitrate[VPX_SS_MAX_LAYERS] + // TODO(angiebird): dump ss_enable_auto_arf[VPX_SS_MAX_LAYERS] + // TODO(angiebird): dump ts_rate_decimator[VPX_TS_MAX_LAYERS] + + DUMP_STRUCT_VALUE(oxcf, enable_auto_arf); + DUMP_STRUCT_VALUE(oxcf, encode_breakout); + DUMP_STRUCT_VALUE(oxcf, error_resilient_mode); + DUMP_STRUCT_VALUE(oxcf, frame_parallel_decoding_mode); + + DUMP_STRUCT_VALUE(oxcf, arnr_max_frames); + DUMP_STRUCT_VALUE(oxcf, arnr_strength); + + DUMP_STRUCT_VALUE(oxcf, min_gf_interval); + DUMP_STRUCT_VALUE(oxcf, max_gf_interval); + + DUMP_STRUCT_VALUE(oxcf, tile_columns); + DUMP_STRUCT_VALUE(oxcf, tile_rows); + + DUMP_STRUCT_VALUE(oxcf, enable_tpl_model); + + DUMP_STRUCT_VALUE(oxcf, max_threads); + + DUMP_STRUCT_VALUE(oxcf, target_level); + + // TODO(angiebird): dump two_pass_stats_in + +#if CONFIG_FP_MB_STATS + // TODO(angiebird): dump firstpass_mb_stats_in +#endif + + DUMP_STRUCT_VALUE(oxcf, tuning); + DUMP_STRUCT_VALUE(oxcf, content); +#if CONFIG_VP9_HIGHBITDEPTH + DUMP_STRUCT_VALUE(oxcf, use_highbitdepth); +#endif + DUMP_STRUCT_VALUE(oxcf, color_space); + DUMP_STRUCT_VALUE(oxcf, color_range); + DUMP_STRUCT_VALUE(oxcf, render_width); + DUMP_STRUCT_VALUE(oxcf, render_height); + DUMP_STRUCT_VALUE(oxcf, temporal_layering_mode); + + DUMP_STRUCT_VALUE(oxcf, row_mt); + DUMP_STRUCT_VALUE(oxcf, motion_vector_unit_test); +} + +FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf) { + FRAME_INFO frame_info; + int dummy; + frame_info.frame_width = oxcf->width; + frame_info.frame_height = oxcf->height; + frame_info.render_frame_width = oxcf->width; + frame_info.render_frame_height = oxcf->height; + frame_info.bit_depth = oxcf->bit_depth; + vp9_set_mi_size(&frame_info.mi_rows, &frame_info.mi_cols, &dummy, + frame_info.frame_width, frame_info.frame_height); + vp9_set_mb_size(&frame_info.mb_rows, &frame_info.mb_cols, &frame_info.num_mbs, + frame_info.mi_rows, frame_info.mi_cols); + // TODO(angiebird): Figure out how to get subsampling_x/y here + return frame_info; +} + +void vp9_set_first_pass_stats(VP9EncoderConfig *oxcf, + const vpx_fixed_buf_t *stats) { + oxcf->two_pass_stats_in = *stats; +} diff --git a/media/libvpx/libvpx/vp9/vp9_cx_iface.h b/media/libvpx/libvpx/vp9/vp9_cx_iface.h new file mode 100644 index 000000000000..08569fcc96ce --- /dev/null +++ b/media/libvpx/libvpx/vp9/vp9_cx_iface.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VP9_VP9_CX_IFACE_H_ +#define VPX_VP9_VP9_CX_IFACE_H_ +#include "vp9/encoder/vp9_encoder.h" +#include "vp9/common/vp9_onyxc_int.h" + +#ifdef __cplusplus +extern "C" { +#endif + +VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height, + vpx_rational_t frame_rate, + int target_bitrate, + vpx_enc_pass enc_pass); + +void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf); + +FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf); + +static INLINE int64_t +timebase_units_to_ticks(const vpx_rational64_t *timestamp_ratio, int64_t n) { + return n * timestamp_ratio->num / timestamp_ratio->den; +} + +static INLINE int64_t +ticks_to_timebase_units(const vpx_rational64_t *timestamp_ratio, int64_t n) { + int64_t round = timestamp_ratio->num / 2; + if (round > 0) --round; + return (n * timestamp_ratio->den + round) / timestamp_ratio->num; +} + +void vp9_set_first_pass_stats(VP9EncoderConfig *oxcf, + const vpx_fixed_buf_t *stats); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_VP9_VP9_CX_IFACE_H_ diff --git a/media/libvpx/libvpx/vp9/vp9_dx_iface.c b/media/libvpx/libvpx/vp9/vp9_dx_iface.c index 657490f4bd3c..fa79f7aedc24 100644 --- a/media/libvpx/libvpx/vp9/vp9_dx_iface.c +++ b/media/libvpx/libvpx/vp9/vp9_dx_iface.c @@ -97,7 +97,7 @@ static vpx_codec_err_t decoder_peek_si_internal( const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si, int *is_intra_only, vpx_decrypt_cb decrypt_cb, void *decrypt_state) { int intra_only_flag = 0; - uint8_t clear_buffer[10]; + uint8_t clear_buffer[11]; if (data + data_sz <= data) return VPX_CODEC_INVALID_PARAM; @@ -158,6 +158,9 @@ static vpx_codec_err_t decoder_peek_si_internal( if (profile > PROFILE_0) { if (!parse_bitdepth_colorspace_sampling(profile, &rb)) return VPX_CODEC_UNSUP_BITSTREAM; + // The colorspace info may cause vp9_read_frame_size() to need 11 + // bytes. + if (data_sz < 11) return VPX_CODEC_UNSUP_BITSTREAM; } rb.bit_offset += REF_FRAMES; // refresh_frame_flags vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h); @@ -235,6 +238,19 @@ static void set_ppflags(const vpx_codec_alg_priv_t *ctx, vp9_ppflags_t *flags) { flags->noise_level = ctx->postproc_cfg.noise_level; } +#undef ERROR +#define ERROR(str) \ + do { \ + ctx->base.err_detail = str; \ + return VPX_CODEC_INVALID_PARAM; \ + } while (0) + +#define RANGE_CHECK(p, memb, lo, hi) \ + do { \ + if (!(((p)->memb == (lo) || (p)->memb > (lo)) && (p)->memb <= (hi))) \ + ERROR(#memb " out of range [" #lo ".." #hi "]"); \ + } while (0) + static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { ctx->last_show_frame = -1; ctx->need_resync = 1; @@ -251,6 +267,12 @@ static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { ctx->pbi->max_threads = ctx->cfg.threads; ctx->pbi->inv_tile_order = ctx->invert_tile_order; + RANGE_CHECK(ctx, row_mt, 0, 1); + ctx->pbi->row_mt = ctx->row_mt; + + RANGE_CHECK(ctx, lpf_opt, 0, 1); + ctx->pbi->lpf_mt_opt = ctx->lpf_opt; + // If postprocessing was enabled by the application and a // configuration has not been provided, default it. if (!ctx->postproc_cfg_set && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) @@ -452,8 +474,8 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *); if (data) { - YV12_BUFFER_CONFIG *fb; - fb = get_ref_frame(&ctx->pbi->common, data->idx); + const int fb_idx = ctx->pbi->common.cur_show_frame_fb_idx; + YV12_BUFFER_CONFIG *fb = get_buf_frame(&ctx->pbi->common, fb_idx); if (fb == NULL) return VPX_CODEC_ERROR; yuvconfig2image(&data->img, fb, NULL); return VPX_CODEC_OK; @@ -632,6 +654,20 @@ static vpx_codec_err_t ctrl_set_spatial_layer_svc(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } +static vpx_codec_err_t ctrl_set_row_mt(vpx_codec_alg_priv_t *ctx, + va_list args) { + ctx->row_mt = va_arg(args, int); + + return VPX_CODEC_OK; +} + +static vpx_codec_err_t ctrl_enable_lpf_opt(vpx_codec_alg_priv_t *ctx, + va_list args) { + ctx->lpf_opt = va_arg(args, int); + + return VPX_CODEC_OK; +} + static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { { VP8_COPY_REFERENCE, ctrl_copy_reference }, @@ -643,6 +679,8 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { { VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment }, { VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter }, { VP9_DECODE_SVC_SPATIAL_LAYER, ctrl_set_spatial_layer_svc }, + { VP9D_SET_ROW_MT, ctrl_set_row_mt }, + { VP9D_SET_LOOP_FILTER_OPT, ctrl_enable_lpf_opt }, // Getters { VPXD_GET_LAST_QUANTIZER, ctrl_get_quantizer }, diff --git a/media/libvpx/libvpx/vp9/vp9_dx_iface.h b/media/libvpx/libvpx/vp9/vp9_dx_iface.h index 18bc7ab0d621..f60688c4db2e 100644 --- a/media/libvpx/libvpx/vp9/vp9_dx_iface.h +++ b/media/libvpx/libvpx/vp9/vp9_dx_iface.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_VP9_DX_IFACE_H_ -#define VP9_VP9_DX_IFACE_H_ +#ifndef VPX_VP9_VP9_DX_IFACE_H_ +#define VPX_VP9_VP9_DX_IFACE_H_ #include "vp9/decoder/vp9_decoder.h" @@ -45,6 +45,8 @@ struct vpx_codec_alg_priv { // Allow for decoding up to a given spatial layer for SVC stream. int svc_decoding; int svc_spatial_layer; + int row_mt; + int lpf_opt; }; -#endif // VP9_VP9_DX_IFACE_H_ +#endif // VPX_VP9_VP9_DX_IFACE_H_ diff --git a/media/libvpx/libvpx/vp9/vp9_iface_common.c b/media/libvpx/libvpx/vp9/vp9_iface_common.c new file mode 100644 index 000000000000..74d08a5873a1 --- /dev/null +++ b/media/libvpx/libvpx/vp9/vp9_iface_common.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file in the root of the source tree. An additional + * intellectual property rights grant can be found in the file PATENTS. + * All contributing project authors may be found in the AUTHORS file in + * the root of the source tree. + */ + +#include "vp9/vp9_iface_common.h" +void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, + void *user_priv) { + /** vpx_img_wrap() doesn't allow specifying independent strides for + * the Y, U, and V planes, nor other alignment adjustments that + * might be representable by a YV12_BUFFER_CONFIG, so we just + * initialize all the fields.*/ + int bps; + if (!yv12->subsampling_y) { + if (!yv12->subsampling_x) { + img->fmt = VPX_IMG_FMT_I444; + bps = 24; + } else { + img->fmt = VPX_IMG_FMT_I422; + bps = 16; + } + } else { + if (!yv12->subsampling_x) { + img->fmt = VPX_IMG_FMT_I440; + bps = 16; + } else { + img->fmt = VPX_IMG_FMT_I420; + bps = 12; + } + } + img->cs = yv12->color_space; + img->range = yv12->color_range; + img->bit_depth = 8; + img->w = yv12->y_stride; + img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3); + img->d_w = yv12->y_crop_width; + img->d_h = yv12->y_crop_height; + img->r_w = yv12->render_width; + img->r_h = yv12->render_height; + img->x_chroma_shift = yv12->subsampling_x; + img->y_chroma_shift = yv12->subsampling_y; + img->planes[VPX_PLANE_Y] = yv12->y_buffer; + img->planes[VPX_PLANE_U] = yv12->u_buffer; + img->planes[VPX_PLANE_V] = yv12->v_buffer; + img->planes[VPX_PLANE_ALPHA] = NULL; + img->stride[VPX_PLANE_Y] = yv12->y_stride; + img->stride[VPX_PLANE_U] = yv12->uv_stride; + img->stride[VPX_PLANE_V] = yv12->uv_stride; + img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; +#if CONFIG_VP9_HIGHBITDEPTH + if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) { + // vpx_image_t uses byte strides and a pointer to the first byte + // of the image. + img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH); + img->bit_depth = yv12->bit_depth; + img->planes[VPX_PLANE_Y] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->y_buffer); + img->planes[VPX_PLANE_U] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->u_buffer); + img->planes[VPX_PLANE_V] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->v_buffer); + img->planes[VPX_PLANE_ALPHA] = NULL; + img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride; + img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride; + img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride; + img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + img->bps = bps; + img->user_priv = user_priv; + img->img_data = yv12->buffer_alloc; + img->img_data_owner = 0; + img->self_allocd = 0; +} + +vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, + YV12_BUFFER_CONFIG *yv12) { + yv12->y_buffer = img->planes[VPX_PLANE_Y]; + yv12->u_buffer = img->planes[VPX_PLANE_U]; + yv12->v_buffer = img->planes[VPX_PLANE_V]; + + yv12->y_crop_width = img->d_w; + yv12->y_crop_height = img->d_h; + yv12->render_width = img->r_w; + yv12->render_height = img->r_h; + yv12->y_width = img->d_w; + yv12->y_height = img->d_h; + + yv12->uv_width = + img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 : yv12->y_width; + yv12->uv_height = + img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height; + yv12->uv_crop_width = yv12->uv_width; + yv12->uv_crop_height = yv12->uv_height; + + yv12->y_stride = img->stride[VPX_PLANE_Y]; + yv12->uv_stride = img->stride[VPX_PLANE_U]; + yv12->color_space = img->cs; + yv12->color_range = img->range; + +#if CONFIG_VP9_HIGHBITDEPTH + if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + // In vpx_image_t + // planes point to uint8 address of start of data + // stride counts uint8s to reach next row + // In YV12_BUFFER_CONFIG + // y_buffer, u_buffer, v_buffer point to uint16 address of data + // stride and border counts in uint16s + // This means that all the address calculations in the main body of code + // should work correctly. + // However, before we do any pixel operations we need to cast the address + // to a uint16 ponter and double its value. + yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer); + yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer); + yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer); + yv12->y_stride >>= 1; + yv12->uv_stride >>= 1; + yv12->flags = YV12_FLAG_HIGHBITDEPTH; + } else { + yv12->flags = 0; + } + yv12->border = (yv12->y_stride - img->w) / 2; +#else + yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; +#endif // CONFIG_VP9_HIGHBITDEPTH + yv12->subsampling_x = img->x_chroma_shift; + yv12->subsampling_y = img->y_chroma_shift; + return VPX_CODEC_OK; +} diff --git a/media/libvpx/libvpx/vp9/vp9_iface_common.h b/media/libvpx/libvpx/vp9/vp9_iface_common.h index d68872750b9a..e646917c69b5 100644 --- a/media/libvpx/libvpx/vp9/vp9_iface_common.h +++ b/media/libvpx/libvpx/vp9/vp9_iface_common.h @@ -7,133 +7,27 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_VP9_IFACE_COMMON_H_ -#define VP9_VP9_IFACE_COMMON_H_ +#ifndef VPX_VP9_VP9_IFACE_COMMON_H_ +#define VPX_VP9_VP9_IFACE_COMMON_H_ +#include #include "vpx_ports/mem.h" +#include "vpx/vp8.h" +#include "vpx_scale/yv12config.h" +#include "common/vp9_enums.h" -static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, - void *user_priv) { - /** vpx_img_wrap() doesn't allow specifying independent strides for - * the Y, U, and V planes, nor other alignment adjustments that - * might be representable by a YV12_BUFFER_CONFIG, so we just - * initialize all the fields.*/ - int bps; - if (!yv12->subsampling_y) { - if (!yv12->subsampling_x) { - img->fmt = VPX_IMG_FMT_I444; - bps = 24; - } else { - img->fmt = VPX_IMG_FMT_I422; - bps = 16; - } - } else { - if (!yv12->subsampling_x) { - img->fmt = VPX_IMG_FMT_I440; - bps = 16; - } else { - img->fmt = VPX_IMG_FMT_I420; - bps = 12; - } - } - img->cs = yv12->color_space; - img->range = yv12->color_range; - img->bit_depth = 8; - img->w = yv12->y_stride; - img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3); - img->d_w = yv12->y_crop_width; - img->d_h = yv12->y_crop_height; - img->r_w = yv12->render_width; - img->r_h = yv12->render_height; - img->x_chroma_shift = yv12->subsampling_x; - img->y_chroma_shift = yv12->subsampling_y; - img->planes[VPX_PLANE_Y] = yv12->y_buffer; - img->planes[VPX_PLANE_U] = yv12->u_buffer; - img->planes[VPX_PLANE_V] = yv12->v_buffer; - img->planes[VPX_PLANE_ALPHA] = NULL; - img->stride[VPX_PLANE_Y] = yv12->y_stride; - img->stride[VPX_PLANE_U] = yv12->uv_stride; - img->stride[VPX_PLANE_V] = yv12->uv_stride; - img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; -#if CONFIG_VP9_HIGHBITDEPTH - if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) { - // vpx_image_t uses byte strides and a pointer to the first byte - // of the image. - img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH); - img->bit_depth = yv12->bit_depth; - img->planes[VPX_PLANE_Y] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->y_buffer); - img->planes[VPX_PLANE_U] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->u_buffer); - img->planes[VPX_PLANE_V] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->v_buffer); - img->planes[VPX_PLANE_ALPHA] = NULL; - img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride; - img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride; - img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride; - img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride; - } -#endif // CONFIG_VP9_HIGHBITDEPTH - img->bps = bps; - img->user_priv = user_priv; - img->img_data = yv12->buffer_alloc; - img->img_data_owner = 0; - img->self_allocd = 0; -} +#ifdef __cplusplus +extern "C" { +#endif -static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, - YV12_BUFFER_CONFIG *yv12) { - yv12->y_buffer = img->planes[VPX_PLANE_Y]; - yv12->u_buffer = img->planes[VPX_PLANE_U]; - yv12->v_buffer = img->planes[VPX_PLANE_V]; +void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, + void *user_priv); - yv12->y_crop_width = img->d_w; - yv12->y_crop_height = img->d_h; - yv12->render_width = img->r_w; - yv12->render_height = img->r_h; - yv12->y_width = img->d_w; - yv12->y_height = img->d_h; +vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, + YV12_BUFFER_CONFIG *yv12); - yv12->uv_width = - img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 : yv12->y_width; - yv12->uv_height = - img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height; - yv12->uv_crop_width = yv12->uv_width; - yv12->uv_crop_height = yv12->uv_height; - - yv12->y_stride = img->stride[VPX_PLANE_Y]; - yv12->uv_stride = img->stride[VPX_PLANE_U]; - yv12->color_space = img->cs; - yv12->color_range = img->range; - -#if CONFIG_VP9_HIGHBITDEPTH - if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { - // In vpx_image_t - // planes point to uint8 address of start of data - // stride counts uint8s to reach next row - // In YV12_BUFFER_CONFIG - // y_buffer, u_buffer, v_buffer point to uint16 address of data - // stride and border counts in uint16s - // This means that all the address calculations in the main body of code - // should work correctly. - // However, before we do any pixel operations we need to cast the address - // to a uint16 ponter and double its value. - yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer); - yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer); - yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer); - yv12->y_stride >>= 1; - yv12->uv_stride >>= 1; - yv12->flags = YV12_FLAG_HIGHBITDEPTH; - } else { - yv12->flags = 0; - } - yv12->border = (yv12->y_stride - img->w) / 2; -#else - yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; -#endif // CONFIG_VP9_HIGHBITDEPTH - yv12->subsampling_x = img->x_chroma_shift; - yv12->subsampling_y = img->y_chroma_shift; - return VPX_CODEC_OK; -} - -static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) { +static INLINE VP9_REFFRAME +ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) { switch (frame) { case VP8_LAST_FRAME: return VP9_LAST_FLAG; case VP8_GOLD_FRAME: return VP9_GOLD_FLAG; @@ -142,4 +36,9 @@ static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) { assert(0 && "Invalid Reference Frame"); return VP9_LAST_FLAG; } -#endif // VP9_VP9_IFACE_COMMON_H_ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_VP9_VP9_IFACE_COMMON_H_ diff --git a/media/libvpx/libvpx/vp9/vp9cx.mk b/media/libvpx/libvpx/vp9/vp9cx.mk index d633ed1429d1..ad774505c607 100644 --- a/media/libvpx/libvpx/vp9/vp9cx.mk +++ b/media/libvpx/libvpx/vp9/vp9cx.mk @@ -16,6 +16,10 @@ VP9_CX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes) VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no) VP9_CX_SRCS-yes += vp9_cx_iface.c +VP9_CX_SRCS-yes += vp9_cx_iface.h + +VP9_CX_SRCS-$(CONFIG_RATE_CTRL) += simple_encode.cc +VP9_CX_SRCS-$(CONFIG_RATE_CTRL) += simple_encode.h VP9_CX_SRCS-yes += encoder/vp9_bitstream.c VP9_CX_SRCS-yes += encoder/vp9_context_tree.c @@ -64,6 +68,7 @@ VP9_CX_SRCS-yes += encoder/vp9_ratectrl.c VP9_CX_SRCS-yes += encoder/vp9_rd.c VP9_CX_SRCS-yes += encoder/vp9_rdopt.c VP9_CX_SRCS-yes += encoder/vp9_pickmode.c +VP9_CX_SRCS-yes += encoder/vp9_partition_models.h VP9_CX_SRCS-yes += encoder/vp9_segmentation.c VP9_CX_SRCS-yes += encoder/vp9_segmentation.h VP9_CX_SRCS-yes += encoder/vp9_speed_features.c @@ -74,6 +79,9 @@ VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.c VP9_CX_SRCS-yes += encoder/vp9_resize.c VP9_CX_SRCS-yes += encoder/vp9_resize.h VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_blockiness.c +VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_blockiness.h +VP9_CX_SRCS-$(CONFIG_NON_GREEDY_MV) += encoder/vp9_non_greedy_mv.c +VP9_CX_SRCS-$(CONFIG_NON_GREEDY_MV) += encoder/vp9_non_greedy_mv.h VP9_CX_SRCS-yes += encoder/vp9_tokenize.c VP9_CX_SRCS-yes += encoder/vp9_treewriter.c @@ -101,22 +109,24 @@ VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/temporal_filter_sse4.c +VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/temporal_filter_constants.h VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c +VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_quantize_avx2.c VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_diamond_search_sad_avx.c ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c +VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_temporal_filter_sse4.c endif VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm -ifeq ($(ARCH_X86_64),yes) +ifeq ($(VPX_ARCH_X86_64),yes) VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm endif VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_intrin_sse2.c -VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.c VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_frame_scale_ssse3.c ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes) @@ -129,20 +139,34 @@ VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_avx2.c ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_error_neon.c endif -VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_dct_neon.c VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_frame_scale_neon.c VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_quantize_neon.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_error_msa.c + +ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct4x4_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct8x8_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct16x16_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct_msa.h +endif # !CONFIG_VP9_HIGHBITDEPTH + +VP9_CX_SRCS-$(HAVE_VSX) += encoder/ppc/vp9_quantize_vsx.c # Strip unnecessary files with CONFIG_REALTIME_ONLY VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_firstpass.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_mbgraph.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_temporal_filter.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/x86/temporal_filter_sse4.c +VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/x86/temporal_filter_constants.h +VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/x86/highbd_temporal_filter_sse4.c +VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_alt_ref_aq.h +VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_alt_ref_aq.c +VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_aq_variance.c +VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_aq_variance.h +VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_aq_360.c +VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_aq_360.h +VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_aq_complexity.c +VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_aq_complexity.h VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes)) diff --git a/media/libvpx/libvpx/vp9/vp9dx.mk b/media/libvpx/libvpx/vp9/vp9dx.mk index 59f612b94c96..93a5f368bdfc 100644 --- a/media/libvpx/libvpx/vp9/vp9dx.mk +++ b/media/libvpx/libvpx/vp9/vp9dx.mk @@ -28,5 +28,7 @@ VP9_DX_SRCS-yes += decoder/vp9_decoder.c VP9_DX_SRCS-yes += decoder/vp9_decoder.h VP9_DX_SRCS-yes += decoder/vp9_dsubexp.c VP9_DX_SRCS-yes += decoder/vp9_dsubexp.h +VP9_DX_SRCS-yes += decoder/vp9_job_queue.c +VP9_DX_SRCS-yes += decoder/vp9_job_queue.h VP9_DX_SRCS-yes := $(filter-out $(VP9_DX_SRCS_REMOVE-yes),$(VP9_DX_SRCS-yes)) diff --git a/media/libvpx/libvpx/vpx/exports_spatial_svc b/media/libvpx/libvpx/vpx/exports_spatial_svc deleted file mode 100644 index d258a1d61813..000000000000 --- a/media/libvpx/libvpx/vpx/exports_spatial_svc +++ /dev/null @@ -1,6 +0,0 @@ -text vpx_svc_dump_statistics -text vpx_svc_encode -text vpx_svc_get_message -text vpx_svc_init -text vpx_svc_release -text vpx_svc_set_options diff --git a/media/libvpx/libvpx/vpx/internal/vpx_codec_internal.h b/media/libvpx/libvpx/vpx/internal/vpx_codec_internal.h index 522e5c1684aa..9eed85e5de7d 100644 --- a/media/libvpx/libvpx/vpx/internal/vpx_codec_internal.h +++ b/media/libvpx/libvpx/vpx/internal/vpx_codec_internal.h @@ -40,8 +40,8 @@ * Once initialized, the instance is manged using other functions from * the vpx_codec_* family. */ -#ifndef VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ -#define VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ +#ifndef VPX_VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ +#define VPX_VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ #include "../vpx_decoder.h" #include "../vpx_encoder.h" #include @@ -442,4 +442,4 @@ void vpx_internal_error(struct vpx_internal_error_info *info, } // extern "C" #endif -#endif // VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ +#endif // VPX_VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ diff --git a/media/libvpx/libvpx/vpx/src/vpx_encoder.c b/media/libvpx/libvpx/vpx/src/vpx_encoder.c index 1cf2dca695a9..6272502ced16 100644 --- a/media/libvpx/libvpx/vpx/src/vpx_encoder.c +++ b/media/libvpx/libvpx/vpx/src/vpx_encoder.c @@ -20,7 +20,7 @@ #include "vpx_config.h" #include "vpx/internal/vpx_codec_internal.h" -#define SAVE_STATUS(ctx, var) (ctx ? (ctx->err = var) : var) +#define SAVE_STATUS(ctx, var) ((ctx) ? ((ctx)->err = (var)) : (var)) static vpx_codec_alg_priv_t *get_alg_priv(vpx_codec_ctx_t *ctx) { return (vpx_codec_alg_priv_t *)ctx->priv; @@ -82,6 +82,9 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver( res = VPX_CODEC_INCAPABLE; else { int i; +#if CONFIG_MULTI_RES_ENCODING + int mem_loc_owned = 0; +#endif void *mem_loc = NULL; if (iface->enc.mr_get_mem_loc == NULL) return VPX_CODEC_INCAPABLE; @@ -101,12 +104,6 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver( mr_cfg.mr_down_sampling_factor.num = dsf->num; mr_cfg.mr_down_sampling_factor.den = dsf->den; - /* Force Key-frame synchronization. Namely, encoder at higher - * resolution always use the same frame_type chosen by the - * lowest-resolution encoder. - */ - if (mr_cfg.mr_encoder_id) cfg->kf_mode = VPX_KF_DISABLED; - ctx->iface = iface; ctx->name = iface->name; ctx->priv = NULL; @@ -129,13 +126,17 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver( i--; } #if CONFIG_MULTI_RES_ENCODING - assert(mem_loc); - free(((LOWER_RES_FRAME_INFO *)mem_loc)->mb_info); - free(mem_loc); + if (!mem_loc_owned) { + assert(mem_loc); + free(((LOWER_RES_FRAME_INFO *)mem_loc)->mb_info); + free(mem_loc); + } #endif return SAVE_STATUS(ctx, res); } - +#if CONFIG_MULTI_RES_ENCODING + mem_loc_owned = 1; +#endif ctx++; cfg++; dsf++; @@ -154,7 +155,7 @@ vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface, vpx_codec_enc_cfg_map_t *map; int i; - if (!iface || !cfg || usage > INT_MAX) + if (!iface || !cfg || usage != 0) res = VPX_CODEC_INVALID_PARAM; else if (!(iface->caps & VPX_CODEC_CAP_ENCODER)) res = VPX_CODEC_INCAPABLE; @@ -163,19 +164,16 @@ vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface, for (i = 0; i < iface->enc.cfg_map_count; ++i) { map = iface->enc.cfg_maps + i; - if (map->usage == (int)usage) { - *cfg = map->cfg; - cfg->g_usage = usage; - res = VPX_CODEC_OK; - break; - } + *cfg = map->cfg; + res = VPX_CODEC_OK; + break; } } return res; } -#if ARCH_X86 || ARCH_X86_64 +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 /* On X86, disable the x87 unit's internal 80 bit precision for better * consistency with the SSE unit's 64 bit precision. */ diff --git a/media/libvpx/libvpx/vpx/src/vpx_image.c b/media/libvpx/libvpx/vpx/src/vpx_image.c index af7c529a7ba4..a7c6ec0ceabf 100644 --- a/media/libvpx/libvpx/vpx/src/vpx_image.c +++ b/media/libvpx/libvpx/vpx/src/vpx_image.c @@ -38,23 +38,8 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt, /* Get sample size for this format */ switch (fmt) { - case VPX_IMG_FMT_RGB32: - case VPX_IMG_FMT_RGB32_LE: - case VPX_IMG_FMT_ARGB: - case VPX_IMG_FMT_ARGB_LE: bps = 32; break; - case VPX_IMG_FMT_RGB24: - case VPX_IMG_FMT_BGR24: bps = 24; break; - case VPX_IMG_FMT_RGB565: - case VPX_IMG_FMT_RGB565_LE: - case VPX_IMG_FMT_RGB555: - case VPX_IMG_FMT_RGB555_LE: - case VPX_IMG_FMT_UYVY: - case VPX_IMG_FMT_YUY2: - case VPX_IMG_FMT_YVYU: bps = 16; break; case VPX_IMG_FMT_I420: - case VPX_IMG_FMT_YV12: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: bps = 12; break; + case VPX_IMG_FMT_YV12: bps = 12; break; case VPX_IMG_FMT_I422: case VPX_IMG_FMT_I440: bps = 16; break; case VPX_IMG_FMT_I444: bps = 24; break; @@ -69,8 +54,6 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt, switch (fmt) { case VPX_IMG_FMT_I420: case VPX_IMG_FMT_YV12: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: case VPX_IMG_FMT_I422: case VPX_IMG_FMT_I42016: case VPX_IMG_FMT_I42216: xcs = 1; break; @@ -81,8 +64,6 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt, case VPX_IMG_FMT_I420: case VPX_IMG_FMT_I440: case VPX_IMG_FMT_YV12: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: case VPX_IMG_FMT_I42016: case VPX_IMG_FMT_I44016: ycs = 1; break; default: ycs = 0; break; diff --git a/media/libvpx/libvpx/vpx/vp8.h b/media/libvpx/libvpx/vpx/vp8.h index 059c9d0f656b..f30dafed5853 100644 --- a/media/libvpx/libvpx/vpx/vp8.h +++ b/media/libvpx/libvpx/vpx/vp8.h @@ -10,7 +10,7 @@ /*!\defgroup vp8 VP8 * \ingroup codecs - * VP8 is vpx's newest video compression algorithm that uses motion + * VP8 is a video compression algorithm that uses motion * compensated prediction, Discrete Cosine Transform (DCT) coding of the * prediction error signal and context dependent entropy coding techniques * based on arithmetic principles. It features: @@ -27,8 +27,8 @@ /*!\file * \brief Provides controls common to both the VP8 encoder and decoder. */ -#ifndef VPX_VP8_H_ -#define VPX_VP8_H_ +#ifndef VPX_VPX_VP8_H_ +#define VPX_VPX_VP8_H_ #include "./vpx_codec.h" #include "./vpx_image.h" @@ -47,10 +47,6 @@ enum vp8_com_control_id { VP8_SET_REFERENCE = 1, VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */ VP8_SET_POSTPROC = 3, /**< set the decoder's post processing settings */ - VP8_SET_DBG_COLOR_REF_FRAME = 4, /**< \deprecated */ - VP8_SET_DBG_COLOR_MB_MODES = 5, /**< \deprecated */ - VP8_SET_DBG_COLOR_B_MODES = 6, /**< \deprecated */ - VP8_SET_DBG_DISPLAY_MV = 7, /**< \deprecated */ /* TODO(jkoleszar): The encoder incorrectly reuses some of these values (5+) * for its control ids. These should be migrated to something like the @@ -70,12 +66,7 @@ enum vp8_postproc_level { VP8_DEBLOCK = 1 << 0, VP8_DEMACROBLOCK = 1 << 1, VP8_ADDNOISE = 1 << 2, - VP8_DEBUG_TXT_FRAME_INFO = 1 << 3, /**< print frame information */ - VP8_DEBUG_TXT_MBLK_MODES = - 1 << 4, /**< print macro block modes over each macro block */ - VP8_DEBUG_TXT_DC_DIFF = 1 << 5, /**< print dc diff for each macro block */ - VP8_DEBUG_TXT_RATE_INFO = 1 << 6, /**< print video rate info (encoder only) */ - VP8_MFQE = 1 << 10 + VP8_MFQE = 1 << 3 }; /*!\brief post process flags @@ -132,14 +123,6 @@ VPX_CTRL_USE_TYPE(VP8_COPY_REFERENCE, vpx_ref_frame_t *) #define VPX_CTRL_VP8_COPY_REFERENCE VPX_CTRL_USE_TYPE(VP8_SET_POSTPROC, vp8_postproc_cfg_t *) #define VPX_CTRL_VP8_SET_POSTPROC -VPX_CTRL_USE_TYPE_DEPRECATED(VP8_SET_DBG_COLOR_REF_FRAME, int) -#define VPX_CTRL_VP8_SET_DBG_COLOR_REF_FRAME -VPX_CTRL_USE_TYPE_DEPRECATED(VP8_SET_DBG_COLOR_MB_MODES, int) -#define VPX_CTRL_VP8_SET_DBG_COLOR_MB_MODES -VPX_CTRL_USE_TYPE_DEPRECATED(VP8_SET_DBG_COLOR_B_MODES, int) -#define VPX_CTRL_VP8_SET_DBG_COLOR_B_MODES -VPX_CTRL_USE_TYPE_DEPRECATED(VP8_SET_DBG_DISPLAY_MV, int) -#define VPX_CTRL_VP8_SET_DBG_DISPLAY_MV VPX_CTRL_USE_TYPE(VP9_GET_REFERENCE, vp9_ref_frame_t *) #define VPX_CTRL_VP9_GET_REFERENCE @@ -150,4 +133,4 @@ VPX_CTRL_USE_TYPE(VP9_GET_REFERENCE, vp9_ref_frame_t *) } // extern "C" #endif -#endif // VPX_VP8_H_ +#endif // VPX_VPX_VP8_H_ diff --git a/media/libvpx/libvpx/vpx/vp8cx.h b/media/libvpx/libvpx/vpx/vp8cx.h index c21b8b60db9f..95e2493b1e1e 100644 --- a/media/libvpx/libvpx/vpx/vp8cx.h +++ b/media/libvpx/libvpx/vpx/vp8cx.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_VP8CX_H_ -#define VPX_VP8CX_H_ +#ifndef VPX_VPX_VP8CX_H_ +#define VPX_VPX_VP8CX_H_ /*!\defgroup vp8_encoder WebM VP8/VP9 Encoder * \ingroup vp8 @@ -125,7 +125,7 @@ extern vpx_codec_iface_t *vpx_codec_vp9_cx(void); enum vp8e_enc_control_id { /*!\brief Codec control function to pass an ROI map to encoder. * - * Supported in codecs: VP8, VP9 + * Supported in codecs: VP8 */ VP8E_SET_ROI_MAP = 8, @@ -148,13 +148,16 @@ enum vp8e_enc_control_id { * speed at the expense of quality. * * \note Valid range for VP8: -16..16 - * \note Valid range for VP9: -8..8 + * \note Valid range for VP9: -9..9 * * Supported in codecs: VP8, VP9 */ VP8E_SET_CPUUSED = 13, - /*!\brief Codec control function to enable automatic set and use alf frames. + /*!\brief Codec control function to enable automatic use of arf frames. + * + * \note Valid range for VP8: 0..1 + * \note Valid range for VP9: 0..6 * * Supported in codecs: VP8, VP9 */ @@ -169,7 +172,10 @@ enum vp8e_enc_control_id { */ VP8E_SET_NOISE_SENSITIVITY, - /*!\brief Codec control function to set sharpness. + /*!\brief Codec control function to set higher sharpness at the expense + * of a lower PSNR. + * + * \note Valid range: 0..7 * * Supported in codecs: VP8, VP9 */ @@ -225,10 +231,10 @@ enum vp8e_enc_control_id { */ VP8E_SET_TUNING, - /*!\brief Codec control function to set constrained quality level. + /*!\brief Codec control function to set constrained / constant quality level. * - * \attention For this value to be used vpx_codec_enc_cfg_t::g_usage must be - * set to #VPX_CQ. + * \attention For this value to be used vpx_codec_enc_cfg_t::rc_end_usage must + * be set to #VPX_CQ or #VPX_Q * \note Valid range: 0..63 * * Supported in codecs: VP8, VP9 @@ -423,6 +429,12 @@ enum vp8e_enc_control_id { */ VP9E_SET_SVC, + /*!\brief Codec control function to pass an ROI map to encoder. + * + * Supported in codecs: VP9 + */ + VP9E_SET_ROI_MAP, + /*!\brief Codec control function to set parameters for SVC. * \note Parameters contain min_q, max_q, scaling factor for each of the * SVC layers. @@ -529,7 +541,7 @@ enum vp8e_enc_control_id { * struct #vpx_svc_ref_frame_config defined below. * * Supported in codecs: VP9 - */ + */ VP9E_SET_SVC_REF_FRAME_CONFIG, /*!\brief Codec control function to set intended rendering image size. @@ -550,11 +562,11 @@ enum vp8e_enc_control_id { VP9E_SET_TARGET_LEVEL, /*!\brief Codec control function to set row level multi-threading. - * - * 0 : off, 1 : on - * - * Supported in codecs: VP9 - */ + * + * 0 : off, 1 : on + * + * Supported in codecs: VP9 + */ VP9E_SET_ROW_MT, /*!\brief Codec control function to get bitstream level. @@ -574,18 +586,18 @@ enum vp8e_enc_control_id { VP9E_SET_ALT_REF_AQ, /*!\brief Boost percentage for Golden Frame in CBR mode. - * - * This value controls the amount of boost given to Golden Frame in - * CBR mode. It is expressed as a percentage of the average - * per-frame bitrate, with the special (and default) value 0 meaning - * the feature is off, i.e., no golden frame boost in CBR mode and - * average bitrate target is used. - * - * For example, to allow 100% more bits, i.e, 2X, in a golden frame - * than average frame, set this to 100. - * - * Supported in codecs: VP8 - */ + * + * This value controls the amount of boost given to Golden Frame in + * CBR mode. It is expressed as a percentage of the average + * per-frame bitrate, with the special (and default) value 0 meaning + * the feature is off, i.e., no golden frame boost in CBR mode and + * average bitrate target is used. + * + * For example, to allow 100% more bits, i.e, 2X, in a golden frame + * than average frame, set this to 100. + * + * Supported in codecs: VP8 + */ VP8E_SET_GF_CBR_BOOST_PCT, /*!\brief Codec control function to enable the extreme motion vector unit test @@ -596,6 +608,74 @@ enum vp8e_enc_control_id { * Supported in codecs: VP9 */ VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, + + /*!\brief Codec control function to constrain the inter-layer prediction + * (prediction of lower spatial resolution) in VP9 SVC. + * + * 0 : inter-layer prediction on, 1 : off, 2 : off only on non-key frames + * + * Supported in codecs: VP9 + */ + VP9E_SET_SVC_INTER_LAYER_PRED, + + /*!\brief Codec control function to set mode and thresholds for frame + * dropping in SVC. Drop frame thresholds are set per-layer. Mode is set as: + * 0 : layer-dependent dropping, 1 : constrained dropping, current layer drop + * forces drop on all upper layers. Default mode is 0. + * + * Supported in codecs: VP9 + */ + VP9E_SET_SVC_FRAME_DROP_LAYER, + + /*!\brief Codec control function to get the refresh and reference flags and + * the buffer indices, up to the last encoded spatial layer. + * + * Supported in codecs: VP9 + */ + VP9E_GET_SVC_REF_FRAME_CONFIG, + + /*!\brief Codec control function to enable/disable use of golden reference as + * a second temporal reference for SVC. Only used when inter-layer prediction + * is disabled on INTER frames. + * + * 0: Off, 1: Enabled (default) + * + * Supported in codecs: VP9 + */ + VP9E_SET_SVC_GF_TEMPORAL_REF, + + /*!\brief Codec control function to enable spatial layer sync frame, for any + * spatial layer. Enabling it for layer k means spatial layer k will disable + * all temporal prediction, but keep the inter-layer prediction. It will + * refresh any temporal reference buffer for that layer, and reset the + * temporal layer for the superframe to 0. Setting the layer sync for base + * spatial layer forces a key frame. Default is off (0) for all spatial + * layers. Spatial layer sync flag is reset to 0 after each encoded layer, + * so when control is invoked it is only used for the current superframe. + * + * 0: Off (default), 1: Enabled + * + * Supported in codecs: VP9 + */ + VP9E_SET_SVC_SPATIAL_LAYER_SYNC, + + /*!\brief Codec control function to enable temporal dependency model. + * + * Vp9 allows the encoder to run temporal dependency model and use it to + * improve the compression performance. To enable, set this parameter to be + * 1. The default value is set to be 1. + */ + VP9E_SET_TPL, + + /*!\brief Codec control function to enable postencode frame drop. + * + * This will allow encoder to drop frame after it's encoded. + * + * 0: Off (default), 1: Enabled + * + * Supported in codecs: VP9 + */ + VP9E_SET_POSTENCODE_DROP, }; /*!\brief vpx 1-D scaling mode @@ -643,16 +723,20 @@ typedef enum vp9e_temporal_layering_mode { */ typedef struct vpx_roi_map { - /*! An id between 0 and 3 for each 16x16 region within a frame. */ + /*! If ROI is enabled. */ + uint8_t enabled; + /*! An id between 0-3 (0-7 for vp9) for each 16x16 (8x8 for VP9) + * region within a frame. */ unsigned char *roi_map; unsigned int rows; /**< Number of rows. */ unsigned int cols; /**< Number of columns. */ - // TODO(paulwilkins): broken for VP9 which has 8 segments - // q and loop filter deltas for each segment - // (see MAX_MB_SEGMENTS) - int delta_q[4]; /**< Quantizer deltas. */ - int delta_lf[4]; /**< Loop filter deltas. */ - /*! Static breakout threshold for each segment. */ + /*! VP8 only uses the first 4 segments. VP9 uses 8 segments. */ + int delta_q[8]; /**< Quantizer deltas. */ + int delta_lf[8]; /**< Loop filter deltas. */ + /*! skip and ref frame segment is only used in VP9. */ + int skip[8]; /**< Skip this block. */ + int ref_frame[8]; /**< Reference frame for this block. */ + /*! Static breakout threshold for each segment. Only used in VP8. */ unsigned int static_threshold[4]; } vpx_roi_map_t; @@ -716,11 +800,13 @@ typedef enum { VP8_TUNE_PSNR, VP8_TUNE_SSIM } vp8e_tuning; * */ typedef struct vpx_svc_layer_id { - int spatial_layer_id; /**< Spatial layer id number. */ + int spatial_layer_id; /**< First spatial layer to start encoding. */ + // TODO(jianj): Deprecated, to be removed. int temporal_layer_id; /**< Temporal layer id number. */ + int temporal_layer_id_per_spatial[VPX_SS_MAX_LAYERS]; /**< Temp layer id. */ } vpx_svc_layer_id_t; -/*!\brief vp9 svc frame flag parameters. +/*!\brief vp9 svc frame flag parameters. * * This defines the frame flags and buffer indices for each spatial layer for * svc encoding. @@ -729,12 +815,58 @@ typedef struct vpx_svc_layer_id { * */ typedef struct vpx_svc_ref_frame_config { - int frame_flags[VPX_TS_MAX_LAYERS]; /**< Frame flags. */ - int lst_fb_idx[VPX_TS_MAX_LAYERS]; /**< Last buffer index. */ - int gld_fb_idx[VPX_TS_MAX_LAYERS]; /**< Golden buffer index. */ - int alt_fb_idx[VPX_TS_MAX_LAYERS]; /**< Altref buffer index. */ + int lst_fb_idx[VPX_SS_MAX_LAYERS]; /**< Last buffer index. */ + int gld_fb_idx[VPX_SS_MAX_LAYERS]; /**< Golden buffer index. */ + int alt_fb_idx[VPX_SS_MAX_LAYERS]; /**< Altref buffer index. */ + int update_buffer_slot[VPX_SS_MAX_LAYERS]; /**< Update reference frames. */ + // TODO(jianj): Remove update_last/golden/alt_ref, these are deprecated. + int update_last[VPX_SS_MAX_LAYERS]; /**< Update last. */ + int update_golden[VPX_SS_MAX_LAYERS]; /**< Update golden. */ + int update_alt_ref[VPX_SS_MAX_LAYERS]; /**< Update altref. */ + int reference_last[VPX_SS_MAX_LAYERS]; /**< Last as reference. */ + int reference_golden[VPX_SS_MAX_LAYERS]; /**< Golden as reference. */ + int reference_alt_ref[VPX_SS_MAX_LAYERS]; /**< Altref as reference. */ + int64_t duration[VPX_SS_MAX_LAYERS]; /**< Duration per spatial layer. */ } vpx_svc_ref_frame_config_t; +/*!\brief VP9 svc frame dropping mode. + * + * This defines the frame drop mode for SVC. + * + */ +typedef enum { + CONSTRAINED_LAYER_DROP, + /**< Upper layers are constrained to drop if current layer drops. */ + LAYER_DROP, /**< Any spatial layer can drop. */ + FULL_SUPERFRAME_DROP, /**< Only full superframe can drop. */ + CONSTRAINED_FROM_ABOVE_DROP, + /**< Lower layers are constrained to drop if current layer drops. */ +} SVC_LAYER_DROP_MODE; + +/*!\brief vp9 svc frame dropping parameters. + * + * This defines the frame drop thresholds for each spatial layer, and + * the frame dropping mode: 0 = layer based frame dropping (default), + * 1 = constrained dropping where current layer drop forces all upper + * spatial layers to drop. + */ +typedef struct vpx_svc_frame_drop { + int framedrop_thresh[VPX_SS_MAX_LAYERS]; /**< Frame drop thresholds */ + SVC_LAYER_DROP_MODE + framedrop_mode; /**< Layer-based or constrained dropping. */ + int max_consec_drop; /**< Maximum consecutive drops, for any layer. */ +} vpx_svc_frame_drop_t; + +/*!\brief vp9 svc spatial layer sync parameters. + * + * This defines the spatial layer sync flag, defined per spatial layer. + * + */ +typedef struct vpx_svc_spatial_layer_sync { + int spatial_layer_sync[VPX_SS_MAX_LAYERS]; /**< Sync layer flags */ + int base_layer_intra_only; /**< Flag for setting Intra-only frame on base */ +} vpx_svc_spatial_layer_sync_t; + /*!\cond */ /*!\brief VP8 encoder control function parameter type * @@ -749,6 +881,8 @@ VPX_CTRL_USE_TYPE(VP8E_SET_TEMPORAL_LAYER_ID, int) #define VPX_CTRL_VP8E_SET_TEMPORAL_LAYER_ID VPX_CTRL_USE_TYPE(VP8E_SET_ROI_MAP, vpx_roi_map_t *) #define VPX_CTRL_VP8E_SET_ROI_MAP +VPX_CTRL_USE_TYPE(VP9E_SET_ROI_MAP, vpx_roi_map_t *) +#define VPX_CTRL_VP9E_SET_ROI_MAP VPX_CTRL_USE_TYPE(VP8E_SET_ACTIVEMAP, vpx_active_map_t *) #define VPX_CTRL_VP8E_SET_ACTIVEMAP VPX_CTRL_USE_TYPE(VP8E_SET_SCALEMODE, vpx_scaling_mode_t *) @@ -792,6 +926,9 @@ VPX_CTRL_USE_TYPE(VP9E_SET_TILE_COLUMNS, int) VPX_CTRL_USE_TYPE(VP9E_SET_TILE_ROWS, int) #define VPX_CTRL_VP9E_SET_TILE_ROWS +VPX_CTRL_USE_TYPE(VP9E_SET_TPL, int) +#define VPX_CTRL_VP9E_SET_TPL + VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *) #define VPX_CTRL_VP8E_GET_LAST_QUANTIZER VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *) @@ -801,8 +938,8 @@ VPX_CTRL_USE_TYPE(VP9E_GET_SVC_LAYER_ID, vpx_svc_layer_id_t *) VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTRA_BITRATE_PCT, unsigned int) #define VPX_CTRL_VP8E_SET_MAX_INTRA_BITRATE_PCT -VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTER_BITRATE_PCT, unsigned int) -#define VPX_CTRL_VP8E_SET_MAX_INTER_BITRATE_PCT +VPX_CTRL_USE_TYPE(VP9E_SET_MAX_INTER_BITRATE_PCT, unsigned int) +#define VPX_CTRL_VP9E_SET_MAX_INTER_BITRATE_PCT VPX_CTRL_USE_TYPE(VP8E_SET_GF_CBR_BOOST_PCT, unsigned int) #define VPX_CTRL_VP8E_SET_GF_CBR_BOOST_PCT @@ -867,10 +1004,29 @@ VPX_CTRL_USE_TYPE(VP9E_GET_LEVEL, int *) VPX_CTRL_USE_TYPE(VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, unsigned int) #define VPX_CTRL_VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST +VPX_CTRL_USE_TYPE(VP9E_SET_SVC_INTER_LAYER_PRED, unsigned int) +#define VPX_CTRL_VP9E_SET_SVC_INTER_LAYER_PRED + +VPX_CTRL_USE_TYPE(VP9E_SET_SVC_FRAME_DROP_LAYER, vpx_svc_frame_drop_t *) +#define VPX_CTRL_VP9E_SET_SVC_FRAME_DROP_LAYER + +VPX_CTRL_USE_TYPE(VP9E_GET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *) +#define VPX_CTRL_VP9E_GET_SVC_REF_FRAME_CONFIG + +VPX_CTRL_USE_TYPE(VP9E_SET_SVC_GF_TEMPORAL_REF, unsigned int) +#define VPX_CTRL_VP9E_SET_SVC_GF_TEMPORAL_REF + +VPX_CTRL_USE_TYPE(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, + vpx_svc_spatial_layer_sync_t *) +#define VPX_CTRL_VP9E_SET_SVC_SPATIAL_LAYER_SYNC + +VPX_CTRL_USE_TYPE(VP9E_SET_POSTENCODE_DROP, unsigned int) +#define VPX_CTRL_VP9E_SET_POSTENCODE_DROP + /*!\endcond */ /*! @} - end defgroup vp8_encoder */ #ifdef __cplusplus } // extern "C" #endif -#endif // VPX_VP8CX_H_ +#endif // VPX_VPX_VP8CX_H_ diff --git a/media/libvpx/libvpx/vpx/vp8dx.h b/media/libvpx/libvpx/vpx/vp8dx.h index 398c67022091..af92f21ae305 100644 --- a/media/libvpx/libvpx/vpx/vp8dx.h +++ b/media/libvpx/libvpx/vpx/vp8dx.h @@ -17,8 +17,8 @@ * \brief Provides definitions for using VP8 or VP9 within the vpx Decoder * interface. */ -#ifndef VPX_VP8DX_H_ -#define VPX_VP8DX_H_ +#ifndef VPX_VPX_VP8DX_H_ +#define VPX_VPX_VP8DX_H_ #ifdef __cplusplus extern "C" { @@ -124,6 +124,24 @@ enum vp8_dec_control_id { */ VPXD_GET_LAST_QUANTIZER, + /*!\brief Codec control function to set row level multi-threading. + * + * 0 : off, 1 : on + * + * Supported in codecs: VP9 + */ + VP9D_SET_ROW_MT, + + /*!\brief Codec control function to set loopfilter optimization. + * + * 0 : off, Loop filter is done after all tiles have been decoded + * 1 : on, Loop filter is done immediately after decode without + * waiting for all threads to sync. + * + * Supported in codecs: VP9 + */ + VP9D_SET_LOOP_FILTER_OPT, + VP8_DECODER_CTRL_ID_MAX }; @@ -145,10 +163,6 @@ typedef struct vpx_decrypt_init { void *decrypt_state; } vpx_decrypt_init; -/*!\brief A deprecated alias for vpx_decrypt_init. - */ -typedef vpx_decrypt_init vp8_decrypt_init; - /*!\cond */ /*!\brief VP8 decoder control function parameter type * @@ -181,6 +195,10 @@ VPX_CTRL_USE_TYPE(VP9_INVERT_TILE_DECODE_ORDER, int) VPX_CTRL_USE_TYPE(VP9_DECODE_SVC_SPATIAL_LAYER, int) #define VPX_CTRL_VP9_SET_SKIP_LOOP_FILTER VPX_CTRL_USE_TYPE(VP9_SET_SKIP_LOOP_FILTER, int) +#define VPX_CTRL_VP9_DECODE_SET_ROW_MT +VPX_CTRL_USE_TYPE(VP9D_SET_ROW_MT, int) +#define VPX_CTRL_VP9_SET_LOOP_FILTER_OPT +VPX_CTRL_USE_TYPE(VP9D_SET_LOOP_FILTER_OPT, int) /*!\endcond */ /*! @} - end defgroup vp8_decoder */ @@ -189,4 +207,4 @@ VPX_CTRL_USE_TYPE(VP9_SET_SKIP_LOOP_FILTER, int) } // extern "C" #endif -#endif // VPX_VP8DX_H_ +#endif // VPX_VPX_VP8DX_H_ diff --git a/media/libvpx/libvpx/vpx/vpx_codec.h b/media/libvpx/libvpx/vpx/vpx_codec.h index ad05f4c74e9e..6371a6ca2812 100644 --- a/media/libvpx/libvpx/vpx/vpx_codec.h +++ b/media/libvpx/libvpx/vpx/vpx_codec.h @@ -35,8 +35,8 @@ * Once initialized, the instance is manged using other functions from * the vpx_codec_* family. */ -#ifndef VPX_VPX_CODEC_H_ -#define VPX_VPX_CODEC_H_ +#ifndef VPX_VPX_VPX_CODEC_H_ +#define VPX_VPX_VPX_CODEC_H_ #ifdef __cplusplus extern "C" { @@ -241,11 +241,11 @@ typedef enum vpx_bit_depth { */ int vpx_codec_version(void); #define VPX_VERSION_MAJOR(v) \ - ((v >> 16) & 0xff) /**< extract major from packed version */ + (((v) >> 16) & 0xff) /**< extract major from packed version */ #define VPX_VERSION_MINOR(v) \ - ((v >> 8) & 0xff) /**< extract minor from packed version */ + (((v) >> 8) & 0xff) /**< extract minor from packed version */ #define VPX_VERSION_PATCH(v) \ - ((v >> 0) & 0xff) /**< extract patch from packed version */ + (((v) >> 0) & 0xff) /**< extract patch from packed version */ /*!\brief Return the version major number */ #define vpx_codec_version_major() ((vpx_codec_version() >> 16) & 0xff) @@ -465,4 +465,4 @@ vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t *ctx, int ctrl_id, ...); #ifdef __cplusplus } #endif -#endif // VPX_VPX_CODEC_H_ +#endif // VPX_VPX_VPX_CODEC_H_ diff --git a/media/libvpx/libvpx/vpx/vpx_codec.mk b/media/libvpx/libvpx/vpx/vpx_codec.mk index b77f45817b00..4ed77ad6d9df 100644 --- a/media/libvpx/libvpx/vpx/vpx_codec.mk +++ b/media/libvpx/libvpx/vpx/vpx_codec.mk @@ -15,10 +15,6 @@ API_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h API_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h -ifeq ($(CONFIG_VP9_ENCODER),yes) - API_SRCS-$(CONFIG_SPATIAL_SVC) += src/svc_encodeframe.c - API_SRCS-$(CONFIG_SPATIAL_SVC) += svc_context.h -endif API_SRCS-$(CONFIG_VP8_DECODER) += vp8.h API_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h diff --git a/media/libvpx/libvpx/vpx/vpx_decoder.h b/media/libvpx/libvpx/vpx/vpx_decoder.h index 2ff12112bcf7..f113f7196b6a 100644 --- a/media/libvpx/libvpx/vpx/vpx_decoder.h +++ b/media/libvpx/libvpx/vpx/vpx_decoder.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_VPX_DECODER_H_ -#define VPX_VPX_DECODER_H_ +#ifndef VPX_VPX_VPX_DECODER_H_ +#define VPX_VPX_VPX_DECODER_H_ /*!\defgroup decoder Decoder Algorithm Interface * \ingroup codec @@ -362,4 +362,4 @@ vpx_codec_err_t vpx_codec_set_frame_buffer_functions( #ifdef __cplusplus } #endif -#endif // VPX_VPX_DECODER_H_ +#endif // VPX_VPX_VPX_DECODER_H_ diff --git a/media/libvpx/libvpx/vpx/vpx_encoder.h b/media/libvpx/libvpx/vpx/vpx_encoder.h index 464bc408c88b..0cd0776884d1 100644 --- a/media/libvpx/libvpx/vpx/vpx_encoder.h +++ b/media/libvpx/libvpx/vpx/vpx_encoder.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_VPX_ENCODER_H_ -#define VPX_VPX_ENCODER_H_ +#ifndef VPX_VPX_VPX_ENCODER_H_ +#define VPX_VPX_VPX_ENCODER_H_ /*!\defgroup encoder Encoder Algorithm Interface * \ingroup codec @@ -39,15 +39,9 @@ extern "C" { /*! Temporal Scalability: Maximum number of coding layers */ #define VPX_TS_MAX_LAYERS 5 -/*!\deprecated Use #VPX_TS_MAX_PERIODICITY instead. */ -#define MAX_PERIODICITY VPX_TS_MAX_PERIODICITY - /*! Temporal+Spatial Scalability: Maximum number of coding layers */ #define VPX_MAX_LAYERS 12 // 3 temporal + 4 spatial layers are allowed. -/*!\deprecated Use #VPX_MAX_LAYERS instead. */ -#define MAX_LAYERS VPX_MAX_LAYERS // 3 temporal + 4 spatial layers allowed. - /*! Spatial Scalability: Maximum number of coding layers */ #define VPX_SS_MAX_LAYERS 5 @@ -63,7 +57,7 @@ extern "C" { * fields to structures */ #define VPX_ENCODER_ABI_VERSION \ - (6 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ + (14 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ /*! \brief Encoder capabilities bitfield * @@ -150,15 +144,10 @@ typedef uint32_t vpx_codec_er_flags_t; * extend this list to provide additional functionality. */ enum vpx_codec_cx_pkt_kind { - VPX_CODEC_CX_FRAME_PKT, /**< Compressed video frame */ - VPX_CODEC_STATS_PKT, /**< Two-pass statistics for this frame */ - VPX_CODEC_FPMB_STATS_PKT, /**< first pass mb statistics for this frame */ - VPX_CODEC_PSNR_PKT, /**< PSNR statistics for this frame */ -// Spatial SVC is still experimental and may be removed. -#if defined(VPX_TEST_SPATIAL_SVC) - VPX_CODEC_SPATIAL_SVC_LAYER_SIZES, /**< Sizes for each layer in this frame*/ - VPX_CODEC_SPATIAL_SVC_LAYER_PSNR, /**< PSNR for each layer in this frame*/ -#endif + VPX_CODEC_CX_FRAME_PKT, /**< Compressed video frame */ + VPX_CODEC_STATS_PKT, /**< Two-pass statistics for this frame */ + VPX_CODEC_FPMB_STATS_PKT, /**< first pass mb statistics for this frame */ + VPX_CODEC_PSNR_PKT, /**< PSNR statistics for this frame */ VPX_CODEC_CUSTOM_PKT = 256 /**< Algorithm extensions */ }; @@ -182,6 +171,13 @@ typedef struct vpx_codec_cx_pkt { * Only applicable when "output partition" mode is enabled. First * partition has id 0.*/ int partition_id; + /*!\brief Width and height of frames in this packet. VP8 will only use the + * first one.*/ + unsigned int width[VPX_SS_MAX_LAYERS]; /**< frame width */ + unsigned int height[VPX_SS_MAX_LAYERS]; /**< frame height */ + /*!\brief Flag to indicate if spatial layer frame in this packet is + * encoded or dropped. VP8 will always be set to 1.*/ + uint8_t spatial_layer_encoded[VPX_SS_MAX_LAYERS]; } frame; /**< data for compressed frame packet */ vpx_fixed_buf_t twopass_stats; /**< data for two-pass packet */ vpx_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */ @@ -191,11 +187,6 @@ typedef struct vpx_codec_cx_pkt { double psnr[4]; /**< PSNR, total/y/u/v */ } psnr; /**< data for PSNR packet */ vpx_fixed_buf_t raw; /**< data for arbitrary packets */ -// Spatial SVC is still experimental and may be removed. -#if defined(VPX_TEST_SPATIAL_SVC) - size_t layer_sizes[VPX_SS_MAX_LAYERS]; - struct vpx_psnr_pkt layer_psnr[VPX_SS_MAX_LAYERS]; -#endif /* This packet size is fixed to allow codecs to extend this * interface without having to manage storage for raw packets, @@ -211,8 +202,6 @@ typedef struct vpx_codec_cx_pkt { * This callback function, when registered, returns with packets when each * spatial layer is encoded. */ -// putting the definitions here for now. (agrange: find if there -// is a better place for this) typedef void (*vpx_codec_enc_output_cx_pkt_cb_fn_t)(vpx_codec_cx_pkt_t *pkt, void *user_data); @@ -232,11 +221,11 @@ typedef struct vpx_rational { } vpx_rational_t; /**< alias for struct vpx_rational */ /*!\brief Multi-pass Encoding Pass */ -enum vpx_enc_pass { +typedef enum vpx_enc_pass { VPX_RC_ONE_PASS, /**< Single pass mode */ VPX_RC_FIRST_PASS, /**< First pass of multi-pass mode */ VPX_RC_LAST_PASS /**< Final pass of multi-pass mode */ -}; +} vpx_enc_pass; /*!\brief Rate control mode */ enum vpx_rc_mode { @@ -281,12 +270,9 @@ typedef struct vpx_codec_enc_cfg { * generic settings (g) */ - /*!\brief Algorithm specific "usage" value + /*!\brief Deprecated: Algorithm specific "usage" value * - * Algorithms may define multiple values for usage, which may convey the - * intent of how the application intends to use the stream. If this value - * is non-zero, consult the documentation for the codec to determine its - * meaning. + * This value must be zero. */ unsigned int g_usage; @@ -397,9 +383,6 @@ typedef struct vpx_codec_enc_cfg { * trade-off is often acceptable, but for many applications is not. It can * be disabled in these cases. * - * Note that not all codecs support this feature. All vpx VPx codecs do. - * For other codecs, consult the documentation for that algorithm. - * * This threshold is described as a percentage of the target data buffer. * When the data buffer falls below this percentage of fullness, a * dropped frame is indicated. Set the threshold to zero (0) to disable @@ -485,8 +468,7 @@ typedef struct vpx_codec_enc_cfg { * The quantizer is the most direct control over the quality of the * encoded image. The range of valid values for the quantizer is codec * specific. Consult the documentation for the codec to determine the - * values to use. To determine the range programmatically, call - * vpx_codec_enc_config_default() with a usage value of 0. + * values to use. */ unsigned int rc_min_quantizer; @@ -495,8 +477,7 @@ typedef struct vpx_codec_enc_cfg { * The quantizer is the most direct control over the quality of the * encoded image. The range of valid values for the quantizer is codec * specific. Consult the documentation for the codec to determine the - * values to use. To determine the range programmatically, call - * vpx_codec_enc_config_default() with a usage value of 0. + * values to use. */ unsigned int rc_max_quantizer; @@ -512,7 +493,7 @@ typedef struct vpx_codec_enc_cfg { * be subtracted from the target bitrate in order to compensate * for prior overshoot. * VP9: Expressed as a percentage of the target bitrate, a threshold - * undershoot level (current rate vs target) beyond which more agressive + * undershoot level (current rate vs target) beyond which more aggressive * corrective measures are taken. * * * Valid values in the range VP8:0-1000 VP9: 0-100. @@ -527,7 +508,7 @@ typedef struct vpx_codec_enc_cfg { * be added to the target bitrate in order to compensate for * prior undershoot. * VP9: Expressed as a percentage of the target bitrate, a threshold - * overshoot level (current rate vs target) beyond which more agressive + * overshoot level (current rate vs target) beyond which more aggressive * corrective measures are taken. * * Valid values in the range VP8:0-1000 VP9: 0-100. @@ -596,10 +577,10 @@ typedef struct vpx_codec_enc_cfg { unsigned int rc_2pass_vbr_maxsection_pct; /*!\brief Two-pass corpus vbr mode complexity control - * Used only in VP9: A value representing the corpus midpoint complexity - * for corpus vbr mode. This value defaults to 0 which disables corpus vbr - * mode in favour of normal vbr mode. - */ + * Used only in VP9: A value representing the corpus midpoint complexity + * for corpus vbr mode. This value defaults to 0 which disables corpus vbr + * mode in favour of normal vbr mode. + */ unsigned int rc_2pass_vbr_corpus_complexity; /* @@ -682,7 +663,7 @@ typedef struct vpx_codec_enc_cfg { * membership of frames to temporal layers. For example, if the * ts_periodicity = 8, then the frames are assigned to coding layers with a * repeated sequence of length 8. - */ + */ unsigned int ts_periodicity; /*!\brief Template defining the membership of frames to temporal layers. @@ -691,7 +672,7 @@ typedef struct vpx_codec_enc_cfg { * For a 2-layer encoding that assigns even numbered frames to one temporal * layer (0) and odd numbered frames to a second temporal layer (1) with * ts_periodicity=8, then ts_layer_id = (0,1,0,1,0,1,0,1). - */ + */ unsigned int ts_layer_id[VPX_TS_MAX_PERIODICITY]; /*!\brief Target bitrate for each spatial/temporal layer. @@ -802,7 +783,7 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver( * * \param[in] iface Pointer to the algorithm interface to use. * \param[out] cfg Configuration buffer to populate. - * \param[in] reserved Must set to 0 for VP8 and VP9. + * \param[in] usage Must be set to 0. * * \retval #VPX_CODEC_OK * The configuration was populated. @@ -813,7 +794,7 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver( */ vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *cfg, - unsigned int reserved); + unsigned int usage); /*!\brief Set or change configuration * @@ -862,7 +843,7 @@ vpx_fixed_buf_t *vpx_codec_get_global_headers(vpx_codec_ctx_t *ctx); * implicit that limiting the available time to encode will degrade the * output quality. The encoder can be given an unlimited time to produce the * best possible frame by specifying a deadline of '0'. This deadline - * supercedes the VPx notion of "best quality, good quality, realtime". + * supersedes the VPx notion of "best quality, good quality, realtime". * Applications that wish to map these former settings to the new deadline * based system can use the symbols #VPX_DL_REALTIME, #VPX_DL_GOOD_QUALITY, * and #VPX_DL_BEST_QUALITY. @@ -984,4 +965,4 @@ const vpx_image_t *vpx_codec_get_preview_frame(vpx_codec_ctx_t *ctx); #ifdef __cplusplus } #endif -#endif // VPX_VPX_ENCODER_H_ +#endif // VPX_VPX_VPX_ENCODER_H_ diff --git a/media/libvpx/libvpx/vpx/vpx_frame_buffer.h b/media/libvpx/libvpx/vpx/vpx_frame_buffer.h index ad70cdd572ba..fc8320017be0 100644 --- a/media/libvpx/libvpx/vpx/vpx_frame_buffer.h +++ b/media/libvpx/libvpx/vpx/vpx_frame_buffer.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_VPX_FRAME_BUFFER_H_ -#define VPX_VPX_FRAME_BUFFER_H_ +#ifndef VPX_VPX_VPX_FRAME_BUFFER_H_ +#define VPX_VPX_VPX_FRAME_BUFFER_H_ /*!\file * \brief Describes the decoder external frame buffer interface. @@ -52,12 +52,12 @@ typedef struct vpx_codec_frame_buffer { * data. The callback is triggered when the decoder needs a frame buffer to * decode a compressed image into. This function may be called more than once * for every call to vpx_codec_decode. The application may set fb->priv to - * some data which will be passed back in the ximage and the release function - * call. |fb| is guaranteed to not be NULL. On success the callback must - * return 0. Any failure the callback must return a value less than 0. + * some data which will be passed back in the vpx_image_t and the release + * function call. |fb| is guaranteed to not be NULL. On success the callback + * must return 0. Any failure the callback must return a value less than 0. * * \param[in] priv Callback's private data - * \param[in] new_size Size in bytes needed by the buffer + * \param[in] min_size Size in bytes needed by the buffer * \param[in,out] fb Pointer to vpx_codec_frame_buffer_t */ typedef int (*vpx_get_frame_buffer_cb_fn_t)(void *priv, size_t min_size, @@ -80,4 +80,4 @@ typedef int (*vpx_release_frame_buffer_cb_fn_t)(void *priv, } // extern "C" #endif -#endif // VPX_VPX_FRAME_BUFFER_H_ +#endif // VPX_VPX_VPX_FRAME_BUFFER_H_ diff --git a/media/libvpx/libvpx/vpx/vpx_image.h b/media/libvpx/libvpx/vpx/vpx_image.h index d6d3166d2ffd..98be5966a246 100644 --- a/media/libvpx/libvpx/vpx/vpx_image.h +++ b/media/libvpx/libvpx/vpx/vpx_image.h @@ -12,8 +12,8 @@ * \brief Describes the vpx image descriptor and associated operations * */ -#ifndef VPX_VPX_IMAGE_H_ -#define VPX_VPX_IMAGE_H_ +#ifndef VPX_VPX_VPX_IMAGE_H_ +#define VPX_VPX_VPX_IMAGE_H_ #ifdef __cplusplus extern "C" { @@ -27,7 +27,7 @@ extern "C" { * types, removing or reassigning enums, adding/removing/rearranging * fields to structures */ -#define VPX_IMAGE_ABI_VERSION (4) /**<\hideinitializer*/ +#define VPX_IMAGE_ABI_VERSION (5) /**<\hideinitializer*/ #define VPX_IMG_FMT_PLANAR 0x100 /**< Image is a planar format. */ #define VPX_IMG_FMT_UV_FLIP 0x200 /**< V plane precedes U in memory. */ @@ -37,29 +37,12 @@ extern "C" { /*!\brief List of supported image formats */ typedef enum vpx_img_fmt { VPX_IMG_FMT_NONE, - VPX_IMG_FMT_RGB24, /**< 24 bit per pixel packed RGB */ - VPX_IMG_FMT_RGB32, /**< 32 bit per pixel packed 0RGB */ - VPX_IMG_FMT_RGB565, /**< 16 bit per pixel, 565 */ - VPX_IMG_FMT_RGB555, /**< 16 bit per pixel, 555 */ - VPX_IMG_FMT_UYVY, /**< UYVY packed YUV */ - VPX_IMG_FMT_YUY2, /**< YUYV packed YUV */ - VPX_IMG_FMT_YVYU, /**< YVYU packed YUV */ - VPX_IMG_FMT_BGR24, /**< 24 bit per pixel packed BGR */ - VPX_IMG_FMT_RGB32_LE, /**< 32 bit packed BGR0 */ - VPX_IMG_FMT_ARGB, /**< 32 bit packed ARGB, alpha=255 */ - VPX_IMG_FMT_ARGB_LE, /**< 32 bit packed BGRA, alpha=255 */ - VPX_IMG_FMT_RGB565_LE, /**< 16 bit per pixel, gggbbbbb rrrrrggg */ - VPX_IMG_FMT_RGB555_LE, /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */ VPX_IMG_FMT_YV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */ VPX_IMG_FMT_I420 = VPX_IMG_FMT_PLANAR | 2, - VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | - 3, /** < planar 4:2:0 format with vpx color space */ - VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4, VPX_IMG_FMT_I422 = VPX_IMG_FMT_PLANAR | 5, VPX_IMG_FMT_I444 = VPX_IMG_FMT_PLANAR | 6, VPX_IMG_FMT_I440 = VPX_IMG_FMT_PLANAR | 7, - VPX_IMG_FMT_444A = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_HAS_ALPHA | 6, VPX_IMG_FMT_I42016 = VPX_IMG_FMT_I420 | VPX_IMG_FMT_HIGHBITDEPTH, VPX_IMG_FMT_I42216 = VPX_IMG_FMT_I422 | VPX_IMG_FMT_HIGHBITDEPTH, VPX_IMG_FMT_I44416 = VPX_IMG_FMT_I444 | VPX_IMG_FMT_HIGHBITDEPTH, @@ -167,21 +150,21 @@ vpx_image_t *vpx_img_alloc(vpx_image_t *img, vpx_img_fmt_t fmt, * storage for descriptor has been allocated elsewhere, and a descriptor is * desired to "wrap" that storage. * - * \param[in] img Pointer to storage for descriptor. If this parameter - * is NULL, the storage for the descriptor will be - * allocated on the heap. - * \param[in] fmt Format for the image - * \param[in] d_w Width of the image - * \param[in] d_h Height of the image - * \param[in] align Alignment, in bytes, of each row in the image. - * \param[in] img_data Storage to use for the image + * \param[in] img Pointer to storage for descriptor. If this + * parameter is NULL, the storage for the descriptor + * will be allocated on the heap. + * \param[in] fmt Format for the image + * \param[in] d_w Width of the image + * \param[in] d_h Height of the image + * \param[in] stride_align Alignment, in bytes, of each row in the image. + * \param[in] img_data Storage to use for the image * * \return Returns a pointer to the initialized image descriptor. If the img * parameter is non-null, the value of the img parameter will be * returned. */ vpx_image_t *vpx_img_wrap(vpx_image_t *img, vpx_img_fmt_t fmt, unsigned int d_w, - unsigned int d_h, unsigned int align, + unsigned int d_h, unsigned int stride_align, unsigned char *img_data); /*!\brief Set the rectangle identifying the displayed portion of the image @@ -221,4 +204,4 @@ void vpx_img_free(vpx_image_t *img); } // extern "C" #endif -#endif // VPX_VPX_IMAGE_H_ +#endif // VPX_VPX_VPX_IMAGE_H_ diff --git a/media/libvpx/libvpx/vpx/vpx_integer.h b/media/libvpx/libvpx/vpx/vpx_integer.h index 0c27142ff95c..4129d156f8a8 100644 --- a/media/libvpx/libvpx/vpx/vpx_integer.h +++ b/media/libvpx/libvpx/vpx/vpx_integer.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_VPX_INTEGER_H_ -#define VPX_VPX_INTEGER_H_ +#ifndef VPX_VPX_VPX_INTEGER_H_ +#define VPX_VPX_VPX_INTEGER_H_ /* get ptrdiff_t, size_t, wchar_t, NULL */ #include @@ -18,29 +18,12 @@ #define VPX_FORCE_INLINE __forceinline #define VPX_INLINE __inline #else -#define VPX_FORCE_INLINE __inline__ __attribute__(always_inline) +#define VPX_FORCE_INLINE __inline__ __attribute__((always_inline)) // TODO(jbb): Allow a way to force inline off for older compilers. #define VPX_INLINE inline #endif -#if !defined(VPX_DONT_DEFINE_STDINT_TYPES) - -#if defined(VPX_EMULATE_INTTYPES) -typedef signed char int8_t; -typedef signed short int16_t; -typedef signed int int32_t; - -typedef unsigned char uint8_t; -typedef unsigned short uint16_t; -typedef unsigned int uint32_t; - -#ifndef _UINTPTR_T_DEFINED -typedef size_t uintptr_t; -#endif - -#else - -/* Most platforms have the C99 standard integer types. */ +/* Assume platforms have the C99 standard integer types. */ #if defined(__cplusplus) #if !defined(__STDC_FORMAT_MACROS) @@ -51,17 +34,7 @@ typedef size_t uintptr_t; #endif #endif // __cplusplus +#include #include -#endif - -#endif // VPX_DONT_DEFINE_STDINT_TYPES - -/* VS2010 defines stdint.h, but not inttypes.h */ -#if defined(_MSC_VER) && _MSC_VER < 1800 -#define PRId64 "I64d" -#else -#include -#endif - -#endif // VPX_VPX_INTEGER_H_ +#endif // VPX_VPX_VPX_INTEGER_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/add_noise.c b/media/libvpx/libvpx/vpx_dsp/add_noise.c index cda6ae8814a3..6839e9792841 100644 --- a/media/libvpx/libvpx/vpx_dsp/add_noise.c +++ b/media/libvpx/libvpx/vpx_dsp/add_noise.c @@ -52,6 +52,7 @@ int vpx_setup_noise(double sigma, int8_t *noise, int size) { const int a_i = (int)(0.5 + 256 * gaussian(sigma, 0, i)); if (a_i) { for (j = 0; j < a_i; ++j) { + if (next + j >= 256) goto set_noise; char_dist[next + j] = (int8_t)i; } next = next + j; @@ -63,6 +64,7 @@ int vpx_setup_noise(double sigma, int8_t *noise, int size) { char_dist[next] = 0; } +set_noise: for (i = 0; i < size; ++i) { noise[i] = char_dist[rand() & 0xff]; // NOLINT } diff --git a/media/libvpx/libvpx/vpx_dsp/arm/avg_pred_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/avg_pred_neon.c index 1370ec2d2ea1..5afdece0aba0 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/avg_pred_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/avg_pred_neon.c @@ -17,8 +17,8 @@ void vpx_comp_avg_pred_neon(uint8_t *comp, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride) { if (width > 8) { - int x, y; - for (y = 0; y < height; ++y) { + int x, y = height; + do { for (x = 0; x < width; x += 16) { const uint8x16_t p = vld1q_u8(pred + x); const uint8x16_t r = vld1q_u8(ref + x); @@ -28,28 +28,38 @@ void vpx_comp_avg_pred_neon(uint8_t *comp, const uint8_t *pred, int width, comp += width; pred += width; ref += ref_stride; - } - } else { - int i; - for (i = 0; i < width * height; i += 16) { + } while (--y); + } else if (width == 8) { + int i = width * height; + do { const uint8x16_t p = vld1q_u8(pred); uint8x16_t r; - - if (width == 4) { - r = load_unaligned_u8q(ref, ref_stride); - ref += 4 * ref_stride; - } else { - const uint8x8_t r_0 = vld1_u8(ref); - const uint8x8_t r_1 = vld1_u8(ref + ref_stride); - assert(width == 8); - r = vcombine_u8(r_0, r_1); - ref += 2 * ref_stride; - } + const uint8x8_t r_0 = vld1_u8(ref); + const uint8x8_t r_1 = vld1_u8(ref + ref_stride); + r = vcombine_u8(r_0, r_1); + ref += 2 * ref_stride; r = vrhaddq_u8(r, p); vst1q_u8(comp, r); pred += 16; comp += 16; - } + i -= 16; + } while (i); + } else { + int i = width * height; + assert(width == 4); + do { + const uint8x16_t p = vld1q_u8(pred); + uint8x16_t r; + + r = load_unaligned_u8q(ref, ref_stride); + ref += 4 * ref_stride; + r = vrhaddq_u8(r, p); + vst1q_u8(comp, r); + + pred += 16; + comp += 16; + i -= 16; + } while (i); } } diff --git a/media/libvpx/libvpx/vpx_dsp/arm/deblock_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/deblock_neon.c index 1fb41d299208..7efce3273597 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/deblock_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/deblock_neon.c @@ -91,11 +91,6 @@ void vpx_post_proc_down_and_across_mb_row_neon(uint8_t *src_ptr, int row; int col; - // Process a stripe of macroblocks. The stripe will be a multiple of 16 (for - // Y) or 8 (for U/V) wide (cols) and the height (size) will be 16 (for Y) or 8 - // (for U/V). - assert((size == 8 || size == 16) && cols % 8 == 0); - // While columns of length 16 can be processed, load them. for (col = 0; col < cols - 8; col += 16) { uint8x16_t a0, a1, a2, a3, a4, a5, a6, a7; diff --git a/media/libvpx/libvpx/vpx_dsp/arm/fdct_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/fdct_neon.c index 04646ed2e0eb..3708cbb11fe9 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/fdct_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/fdct_neon.c @@ -11,6 +11,7 @@ #include #include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/txfm_common.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/arm/idct_neon.h" diff --git a/media/libvpx/libvpx/vpx_dsp/arm/fwd_txfm_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/fwd_txfm_neon.c index 8049277b132b..374a262b93e7 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/fwd_txfm_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/fwd_txfm_neon.c @@ -11,6 +11,7 @@ #include #include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/txfm_common.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/arm/idct_neon.h" diff --git a/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct16x16_add_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct16x16_add_neon.c index 5358839b538c..654ab42ca402 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct16x16_add_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct16x16_add_neon.c @@ -11,61 +11,37 @@ #include #include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/arm/highbd_idct_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/inv_txfm.h" -static INLINE void highbd_idct16x16_add_wrap_low_8x2(const int64x2x2_t *const t, - int32x4x2_t *const d0, - int32x4x2_t *const d1) { - int32x2x2_t t32[4]; +static INLINE int32x4_t dct_const_round_shift_high_4(const int64x2x2_t in) { + int32x2x2_t t32; - t32[0].val[0] = vrshrn_n_s64(t[0].val[0], DCT_CONST_BITS); - t32[0].val[1] = vrshrn_n_s64(t[0].val[1], DCT_CONST_BITS); - t32[1].val[0] = vrshrn_n_s64(t[1].val[0], DCT_CONST_BITS); - t32[1].val[1] = vrshrn_n_s64(t[1].val[1], DCT_CONST_BITS); - t32[2].val[0] = vrshrn_n_s64(t[2].val[0], DCT_CONST_BITS); - t32[2].val[1] = vrshrn_n_s64(t[2].val[1], DCT_CONST_BITS); - t32[3].val[0] = vrshrn_n_s64(t[3].val[0], DCT_CONST_BITS); - t32[3].val[1] = vrshrn_n_s64(t[3].val[1], DCT_CONST_BITS); - d0->val[0] = vcombine_s32(t32[0].val[0], t32[0].val[1]); - d0->val[1] = vcombine_s32(t32[1].val[0], t32[1].val[1]); - d1->val[0] = vcombine_s32(t32[2].val[0], t32[2].val[1]); - d1->val[1] = vcombine_s32(t32[3].val[0], t32[3].val[1]); + t32.val[0] = vrshrn_n_s64(in.val[0], DCT_CONST_BITS); + t32.val[1] = vrshrn_n_s64(in.val[1], DCT_CONST_BITS); + return vcombine_s32(t32.val[0], t32.val[1]); } -static INLINE void highbd_idct16x16_add_wrap_low_4x2(const int64x2x2_t *const t, - int32x4_t *const d0, - int32x4_t *const d1) { - int32x2x2_t t32[2]; - - t32[0].val[0] = vrshrn_n_s64(t[0].val[0], DCT_CONST_BITS); - t32[0].val[1] = vrshrn_n_s64(t[0].val[1], DCT_CONST_BITS); - t32[1].val[0] = vrshrn_n_s64(t[1].val[0], DCT_CONST_BITS); - t32[1].val[1] = vrshrn_n_s64(t[1].val[1], DCT_CONST_BITS); - *d0 = vcombine_s32(t32[0].val[0], t32[0].val[1]); - *d1 = vcombine_s32(t32[1].val[0], t32[1].val[1]); +static INLINE void dct_const_round_shift_high_4_dual( + const int64x2x2_t *const in, int32x4_t *const d0, int32x4_t *const d1) { + *d0 = dct_const_round_shift_high_4(in[0]); + *d1 = dct_const_round_shift_high_4(in[1]); } static INLINE int32x4x2_t -highbd_idct16x16_add_wrap_low_8x1(const int64x2x2_t *const t) { - int32x2x2_t t32[2]; - int32x4x2_t d; - - t32[0].val[0] = vrshrn_n_s64(t[0].val[0], DCT_CONST_BITS); - t32[0].val[1] = vrshrn_n_s64(t[0].val[1], DCT_CONST_BITS); - t32[1].val[0] = vrshrn_n_s64(t[1].val[0], DCT_CONST_BITS); - t32[1].val[1] = vrshrn_n_s64(t[1].val[1], DCT_CONST_BITS); - d.val[0] = vcombine_s32(t32[0].val[0], t32[0].val[1]); - d.val[1] = vcombine_s32(t32[1].val[0], t32[1].val[1]); - return d; +dct_const_round_shift_high_4x2_int64x2x2(const int64x2x2_t *const in) { + int32x4x2_t out; + out.val[0] = dct_const_round_shift_high_4(in[0]); + out.val[1] = dct_const_round_shift_high_4(in[1]); + return out; } -static INLINE int32x4_t highbd_idct16x16_add_wrap_low_4x1(const int64x2x2_t t) { - int32x2x2_t t32; - - t32.val[0] = vrshrn_n_s64(t.val[0], DCT_CONST_BITS); - t32.val[1] = vrshrn_n_s64(t.val[1], DCT_CONST_BITS); - return vcombine_s32(t32.val[0], t32.val[1]); +static INLINE void dct_const_round_shift_high_4x2x2(const int64x2x2_t *const in, + int32x4x2_t *const d0, + int32x4x2_t *const d1) { + *d0 = dct_const_round_shift_high_4x2_int64x2x2(in + 0); + *d1 = dct_const_round_shift_high_4x2_int64x2x2(in + 2); } static INLINE void highbd_idct_cospi_2_30(const int32x4x2_t s0, @@ -107,7 +83,7 @@ static INLINE void highbd_idct_cospi_2_30(const int32x4x2_t s0, vget_low_s32(cospi_2_30_10_22), 0); t[3].val[1] = vmlal_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]), vget_low_s32(cospi_2_30_10_22), 0); - highbd_idct16x16_add_wrap_low_8x2(t, d0, d1); + dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_4_28(const int32x4x2_t s0, @@ -149,7 +125,7 @@ static INLINE void highbd_idct_cospi_4_28(const int32x4x2_t s0, vget_low_s32(cospi_4_12_20N_28), 0); t[3].val[1] = vmlal_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]), vget_low_s32(cospi_4_12_20N_28), 0); - highbd_idct16x16_add_wrap_low_8x2(t, d0, d1); + dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_6_26(const int32x4x2_t s0, @@ -191,7 +167,7 @@ static INLINE void highbd_idct_cospi_6_26(const int32x4x2_t s0, vget_low_s32(cospi_6_26N_14_18N), 1); t[3].val[1] = vmlsl_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]), vget_low_s32(cospi_6_26N_14_18N), 1); - highbd_idct16x16_add_wrap_low_8x2(t, d0, d1); + dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_10_22(const int32x4x2_t s0, @@ -233,7 +209,7 @@ static INLINE void highbd_idct_cospi_10_22(const int32x4x2_t s0, vget_high_s32(cospi_2_30_10_22), 0); t[3].val[1] = vmlal_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]), vget_high_s32(cospi_2_30_10_22), 0); - highbd_idct16x16_add_wrap_low_8x2(t, d0, d1); + dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_12_20(const int32x4x2_t s0, @@ -275,7 +251,7 @@ static INLINE void highbd_idct_cospi_12_20(const int32x4x2_t s0, vget_high_s32(cospi_4_12_20N_28), 0); t[3].val[1] = vmlsl_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]), vget_high_s32(cospi_4_12_20N_28), 0); - highbd_idct16x16_add_wrap_low_8x2(t, d0, d1); + dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_14_18(const int32x4x2_t s0, @@ -317,7 +293,7 @@ static INLINE void highbd_idct_cospi_14_18(const int32x4x2_t s0, vget_high_s32(cospi_6_26N_14_18N), 1); t[3].val[1] = vmlsl_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]), vget_high_s32(cospi_6_26N_14_18N), 1); - highbd_idct16x16_add_wrap_low_8x2(t, d0, d1); + dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_8_24_q_kernel( @@ -386,7 +362,7 @@ static INLINE void highbd_idct_cospi_8_24_q(const int32x4x2_t s0, int64x2x2_t t[4]; highbd_idct_cospi_8_24_q_kernel(s0, s1, cospi_0_8_16_24, t); - highbd_idct16x16_add_wrap_low_8x2(t, d0, d1); + dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_8_24_d(const int32x4_t s0, @@ -397,7 +373,7 @@ static INLINE void highbd_idct_cospi_8_24_d(const int32x4_t s0, int64x2x2_t t[2]; highbd_idct_cospi_8_24_d_kernel(s0, s1, cospi_0_8_16_24, t); - highbd_idct16x16_add_wrap_low_4x2(t, d0, d1); + dct_const_round_shift_high_4_dual(t, d0, d1); } static INLINE void highbd_idct_cospi_8_24_neg_q(const int32x4x2_t s0, @@ -412,7 +388,7 @@ static INLINE void highbd_idct_cospi_8_24_neg_q(const int32x4x2_t s0, t[2].val[1] = vsubq_s64(vdupq_n_s64(0), t[2].val[1]); t[3].val[0] = vsubq_s64(vdupq_n_s64(0), t[3].val[0]); t[3].val[1] = vsubq_s64(vdupq_n_s64(0), t[3].val[1]); - highbd_idct16x16_add_wrap_low_8x2(t, d0, d1); + dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_8_24_neg_d(const int32x4_t s0, @@ -425,7 +401,7 @@ static INLINE void highbd_idct_cospi_8_24_neg_d(const int32x4_t s0, highbd_idct_cospi_8_24_d_kernel(s0, s1, cospi_0_8_16_24, t); t[1].val[0] = vsubq_s64(vdupq_n_s64(0), t[1].val[0]); t[1].val[1] = vsubq_s64(vdupq_n_s64(0), t[1].val[1]); - highbd_idct16x16_add_wrap_low_4x2(t, d0, d1); + dct_const_round_shift_high_4_dual(t, d0, d1); } static INLINE void highbd_idct_cospi_16_16_q(const int32x4x2_t s0, @@ -459,7 +435,7 @@ static INLINE void highbd_idct_cospi_16_16_q(const int32x4x2_t s0, vget_high_s32(cospi_0_8_16_24), 0); t[3].val[1] = vmlal_lane_s32(t[5].val[1], vget_high_s32(s0.val[1]), vget_high_s32(cospi_0_8_16_24), 0); - highbd_idct16x16_add_wrap_low_8x2(t, d0, d1); + dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_16_16_d(const int32x4_t s0, @@ -481,7 +457,7 @@ static INLINE void highbd_idct_cospi_16_16_d(const int32x4_t s0, vget_high_s32(cospi_0_8_16_24), 0); t[1].val[1] = vmlal_lane_s32(t[2].val[1], vget_high_s32(s0), vget_high_s32(cospi_0_8_16_24), 0); - highbd_idct16x16_add_wrap_low_4x2(t, d0, d1); + dct_const_round_shift_high_4_dual(t, d0, d1); } static INLINE void highbd_idct16x16_add_stage7_dual( @@ -540,62 +516,9 @@ static INLINE void highbd_idct16x16_add_stage7(const int32x4_t *const step2, out[15] = vsubq_s32(step2[0], step2[15]); } -static INLINE void highbd_idct16x16_store_pass1(const int32x4x2_t *const out, - int32_t *output) { - // Save the result into output - vst1q_s32(output + 0, out[0].val[0]); - vst1q_s32(output + 4, out[0].val[1]); - output += 16; - vst1q_s32(output + 0, out[1].val[0]); - vst1q_s32(output + 4, out[1].val[1]); - output += 16; - vst1q_s32(output + 0, out[2].val[0]); - vst1q_s32(output + 4, out[2].val[1]); - output += 16; - vst1q_s32(output + 0, out[3].val[0]); - vst1q_s32(output + 4, out[3].val[1]); - output += 16; - vst1q_s32(output + 0, out[4].val[0]); - vst1q_s32(output + 4, out[4].val[1]); - output += 16; - vst1q_s32(output + 0, out[5].val[0]); - vst1q_s32(output + 4, out[5].val[1]); - output += 16; - vst1q_s32(output + 0, out[6].val[0]); - vst1q_s32(output + 4, out[6].val[1]); - output += 16; - vst1q_s32(output + 0, out[7].val[0]); - vst1q_s32(output + 4, out[7].val[1]); - output += 16; - vst1q_s32(output + 0, out[8].val[0]); - vst1q_s32(output + 4, out[8].val[1]); - output += 16; - vst1q_s32(output + 0, out[9].val[0]); - vst1q_s32(output + 4, out[9].val[1]); - output += 16; - vst1q_s32(output + 0, out[10].val[0]); - vst1q_s32(output + 4, out[10].val[1]); - output += 16; - vst1q_s32(output + 0, out[11].val[0]); - vst1q_s32(output + 4, out[11].val[1]); - output += 16; - vst1q_s32(output + 0, out[12].val[0]); - vst1q_s32(output + 4, out[12].val[1]); - output += 16; - vst1q_s32(output + 0, out[13].val[0]); - vst1q_s32(output + 4, out[13].val[1]); - output += 16; - vst1q_s32(output + 0, out[14].val[0]); - vst1q_s32(output + 4, out[14].val[1]); - output += 16; - vst1q_s32(output + 0, out[15].val[0]); - vst1q_s32(output + 4, out[15].val[1]); -} - -static void vpx_highbd_idct16x16_256_add_half1d(const int32_t *input, - int32_t *output, uint16_t *dest, - const int stride, - const int bd) { +void vpx_highbd_idct16x16_256_add_half1d(const int32_t *input, int32_t *output, + uint16_t *dest, const int stride, + const int bd) { const int32x4_t cospi_0_8_16_24 = vld1q_s32(kCospi32 + 0); const int32x4_t cospi_4_12_20N_28 = vld1q_s32(kCospi32 + 4); const int32x4_t cospi_2_30_10_22 = vld1q_s32(kCospi32 + 8); @@ -815,7 +738,7 @@ static INLINE int32x4x2_t highbd_idct_cospi_lane0_dual(const int32x4x2_t s, t[0].val[1] = vmull_lane_s32(vget_high_s32(s.val[0]), coef, 0); t[1].val[0] = vmull_lane_s32(vget_low_s32(s.val[1]), coef, 0); t[1].val[1] = vmull_lane_s32(vget_high_s32(s.val[1]), coef, 0); - return highbd_idct16x16_add_wrap_low_8x1(t); + return dct_const_round_shift_high_4x2_int64x2x2(t); } static INLINE int32x4_t highbd_idct_cospi_lane0(const int32x4_t s, @@ -824,7 +747,7 @@ static INLINE int32x4_t highbd_idct_cospi_lane0(const int32x4_t s, t.val[0] = vmull_lane_s32(vget_low_s32(s), coef, 0); t.val[1] = vmull_lane_s32(vget_high_s32(s), coef, 0); - return highbd_idct16x16_add_wrap_low_4x1(t); + return dct_const_round_shift_high_4(t); } static INLINE int32x4x2_t highbd_idct_cospi_lane1_dual(const int32x4x2_t s, @@ -835,7 +758,7 @@ static INLINE int32x4x2_t highbd_idct_cospi_lane1_dual(const int32x4x2_t s, t[0].val[1] = vmull_lane_s32(vget_high_s32(s.val[0]), coef, 1); t[1].val[0] = vmull_lane_s32(vget_low_s32(s.val[1]), coef, 1); t[1].val[1] = vmull_lane_s32(vget_high_s32(s.val[1]), coef, 1); - return highbd_idct16x16_add_wrap_low_8x1(t); + return dct_const_round_shift_high_4x2_int64x2x2(t); } static INLINE int32x4_t highbd_idct_cospi_lane1(const int32x4_t s, @@ -844,7 +767,7 @@ static INLINE int32x4_t highbd_idct_cospi_lane1(const int32x4_t s, t.val[0] = vmull_lane_s32(vget_low_s32(s), coef, 1); t.val[1] = vmull_lane_s32(vget_high_s32(s), coef, 1); - return highbd_idct16x16_add_wrap_low_4x1(t); + return dct_const_round_shift_high_4(t); } static void vpx_highbd_idct16x16_38_add_half1d(const int32_t *input, @@ -1003,8 +926,8 @@ static void vpx_highbd_idct16x16_38_add_half1d(const int32_t *input, } } -void vpx_highbd_idct16x16_10_add_half1d_pass1(const tran_low_t *input, - int32_t *output) { +static void highbd_idct16x16_10_add_half1d_pass1(const tran_low_t *input, + int32_t *output) { const int32x4_t cospi_0_8_16_24 = vld1q_s32(kCospi32 + 0); const int32x4_t cospi_4_12_20N_28 = vld1q_s32(kCospi32 + 4); const int32x4_t cospi_2_30_10_22 = vld1q_s32(kCospi32 + 8); @@ -1142,10 +1065,11 @@ void vpx_highbd_idct16x16_10_add_half1d_pass1(const tran_low_t *input, vst1q_s32(output, out[15]); } -void vpx_highbd_idct16x16_10_add_half1d_pass2(const int32_t *input, - int32_t *const output, - uint16_t *const dest, - const int stride, const int bd) { +static void highbd_idct16x16_10_add_half1d_pass2(const int32_t *input, + int32_t *const output, + uint16_t *const dest, + const int stride, + const int bd) { const int32x4_t cospi_0_8_16_24 = vld1q_s32(kCospi32 + 0); const int32x4_t cospi_4_12_20N_28 = vld1q_s32(kCospi32 + 4); const int32x4_t cospi_2_30_10_22 = vld1q_s32(kCospi32 + 8); @@ -1366,16 +1290,16 @@ void vpx_highbd_idct16x16_10_add_neon(const tran_low_t *input, uint16_t *dest, // pass 1 // Parallel idct on the upper 8 rows - vpx_highbd_idct16x16_10_add_half1d_pass1(input, row_idct_output); + highbd_idct16x16_10_add_half1d_pass1(input, row_idct_output); // pass 2 // Parallel idct to get the left 8 columns - vpx_highbd_idct16x16_10_add_half1d_pass2(row_idct_output, NULL, dest, - stride, bd); + highbd_idct16x16_10_add_half1d_pass2(row_idct_output, NULL, dest, stride, + bd); // Parallel idct to get the right 8 columns - vpx_highbd_idct16x16_10_add_half1d_pass2(row_idct_output + 4 * 8, NULL, - dest + 8, stride, bd); + highbd_idct16x16_10_add_half1d_pass2(row_idct_output + 4 * 8, NULL, + dest + 8, stride, bd); } } diff --git a/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct32x32_1024_add_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct32x32_1024_add_neon.c index 96a55c472f67..5b36f7336784 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct32x32_1024_add_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct32x32_1024_add_neon.c @@ -124,83 +124,77 @@ static INLINE void do_butterfly(const int32x4x2_t qIn0, const int32x4x2_t qIn1, vrshrn_n_s64(q[3].val[1], DCT_CONST_BITS)); } -static INLINE void load_s32x4q_dual( - const int32_t *in, int32x4x2_t *const s0, int32x4x2_t *const s1, - int32x4x2_t *const s2, int32x4x2_t *const s3, int32x4x2_t *const s4, - int32x4x2_t *const s5, int32x4x2_t *const s6, int32x4x2_t *const s7) { - s0->val[0] = vld1q_s32(in); - s0->val[1] = vld1q_s32(in + 4); +static INLINE void load_s32x4q_dual(const int32_t *in, int32x4x2_t *const s) { + s[0].val[0] = vld1q_s32(in); + s[0].val[1] = vld1q_s32(in + 4); in += 32; - s1->val[0] = vld1q_s32(in); - s1->val[1] = vld1q_s32(in + 4); + s[1].val[0] = vld1q_s32(in); + s[1].val[1] = vld1q_s32(in + 4); in += 32; - s2->val[0] = vld1q_s32(in); - s2->val[1] = vld1q_s32(in + 4); + s[2].val[0] = vld1q_s32(in); + s[2].val[1] = vld1q_s32(in + 4); in += 32; - s3->val[0] = vld1q_s32(in); - s3->val[1] = vld1q_s32(in + 4); + s[3].val[0] = vld1q_s32(in); + s[3].val[1] = vld1q_s32(in + 4); in += 32; - s4->val[0] = vld1q_s32(in); - s4->val[1] = vld1q_s32(in + 4); + s[4].val[0] = vld1q_s32(in); + s[4].val[1] = vld1q_s32(in + 4); in += 32; - s5->val[0] = vld1q_s32(in); - s5->val[1] = vld1q_s32(in + 4); + s[5].val[0] = vld1q_s32(in); + s[5].val[1] = vld1q_s32(in + 4); in += 32; - s6->val[0] = vld1q_s32(in); - s6->val[1] = vld1q_s32(in + 4); + s[6].val[0] = vld1q_s32(in); + s[6].val[1] = vld1q_s32(in + 4); in += 32; - s7->val[0] = vld1q_s32(in); - s7->val[1] = vld1q_s32(in + 4); + s[7].val[0] = vld1q_s32(in); + s[7].val[1] = vld1q_s32(in + 4); } -static INLINE void transpose_and_store_s32_8x8(int32x4x2_t a0, int32x4x2_t a1, - int32x4x2_t a2, int32x4x2_t a3, - int32x4x2_t a4, int32x4x2_t a5, - int32x4x2_t a6, int32x4x2_t a7, +static INLINE void transpose_and_store_s32_8x8(int32x4x2_t *const a, int32_t **out) { - transpose_s32_8x8(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); + transpose_s32_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); - vst1q_s32(*out, a0.val[0]); + vst1q_s32(*out, a[0].val[0]); *out += 4; - vst1q_s32(*out, a0.val[1]); + vst1q_s32(*out, a[0].val[1]); *out += 4; - vst1q_s32(*out, a1.val[0]); + vst1q_s32(*out, a[1].val[0]); *out += 4; - vst1q_s32(*out, a1.val[1]); + vst1q_s32(*out, a[1].val[1]); *out += 4; - vst1q_s32(*out, a2.val[0]); + vst1q_s32(*out, a[2].val[0]); *out += 4; - vst1q_s32(*out, a2.val[1]); + vst1q_s32(*out, a[2].val[1]); *out += 4; - vst1q_s32(*out, a3.val[0]); + vst1q_s32(*out, a[3].val[0]); *out += 4; - vst1q_s32(*out, a3.val[1]); + vst1q_s32(*out, a[3].val[1]); *out += 4; - vst1q_s32(*out, a4.val[0]); + vst1q_s32(*out, a[4].val[0]); *out += 4; - vst1q_s32(*out, a4.val[1]); + vst1q_s32(*out, a[4].val[1]); *out += 4; - vst1q_s32(*out, a5.val[0]); + vst1q_s32(*out, a[5].val[0]); *out += 4; - vst1q_s32(*out, a5.val[1]); + vst1q_s32(*out, a[5].val[1]); *out += 4; - vst1q_s32(*out, a6.val[0]); + vst1q_s32(*out, a[6].val[0]); *out += 4; - vst1q_s32(*out, a6.val[1]); + vst1q_s32(*out, a[6].val[1]); *out += 4; - vst1q_s32(*out, a7.val[0]); + vst1q_s32(*out, a[7].val[0]); *out += 4; - vst1q_s32(*out, a7.val[1]); + vst1q_s32(*out, a[7].val[1]); *out += 4; } static INLINE void idct32_transpose_pair(const int32_t *input, int32_t *t_buf) { int i; - int32x4x2_t s0, s1, s2, s3, s4, s5, s6, s7; + int32x4x2_t s[8]; for (i = 0; i < 4; i++, input += 8) { - load_s32x4q_dual(input, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); - transpose_and_store_s32_8x8(s0, s1, s2, s3, s4, s5, s6, s7, &t_buf); + load_s32x4q_dual(input, s); + transpose_and_store_s32_8x8(s, &t_buf); } } diff --git a/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c index 3970a5a86131..6750c1a426d2 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c @@ -12,6 +12,7 @@ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/arm/highbd_idct_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/txfm_common.h" diff --git a/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c index 5d9063b15dc7..f05932cec3ad 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c @@ -12,6 +12,7 @@ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/arm/highbd_idct_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/txfm_common.h" diff --git a/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct4x4_add_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct4x4_add_neon.c index 1418a75a15b6..7be1dad1d315 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct4x4_add_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct4x4_add_neon.c @@ -11,27 +11,10 @@ #include #include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/arm/highbd_idct_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/inv_txfm.h" -static INLINE void highbd_idct4x4_1_add_kernel1(uint16_t **dest, - const int stride, - const int16x8_t res, - const int16x8_t max) { - const uint16x4_t a0 = vld1_u16(*dest); - const uint16x4_t a1 = vld1_u16(*dest + stride); - const int16x8_t a = vreinterpretq_s16_u16(vcombine_u16(a0, a1)); - // Note: In some profile tests, res is quite close to +/-32767. - // We use saturating addition. - const int16x8_t b = vqaddq_s16(res, a); - const int16x8_t c = vminq_s16(b, max); - const uint16x8_t d = vqshluq_n_s16(c, 0); - vst1_u16(*dest, vget_low_u16(d)); - *dest += stride; - vst1_u16(*dest, vget_high_u16(d)); - *dest += stride; -} - // res is in reverse row order static INLINE void highbd_idct4x4_1_add_kernel2(uint16_t **dest, const int stride, @@ -65,109 +48,42 @@ void vpx_highbd_idct4x4_1_add_neon(const tran_low_t *input, uint16_t *dest, highbd_idct4x4_1_add_kernel1(&dest, stride, dc, max); } -static INLINE void idct4x4_16_kernel_bd10(const int32x4_t cospis, - int32x4_t *const a0, - int32x4_t *const a1, - int32x4_t *const a2, - int32x4_t *const a3) { - int32x4_t b0, b1, b2, b3; - - transpose_s32_4x4(a0, a1, a2, a3); - b0 = vaddq_s32(*a0, *a2); - b1 = vsubq_s32(*a0, *a2); - b0 = vmulq_lane_s32(b0, vget_high_s32(cospis), 0); - b1 = vmulq_lane_s32(b1, vget_high_s32(cospis), 0); - b2 = vmulq_lane_s32(*a1, vget_high_s32(cospis), 1); - b3 = vmulq_lane_s32(*a1, vget_low_s32(cospis), 1); - b2 = vmlsq_lane_s32(b2, *a3, vget_low_s32(cospis), 1); - b3 = vmlaq_lane_s32(b3, *a3, vget_high_s32(cospis), 1); - b0 = vrshrq_n_s32(b0, DCT_CONST_BITS); - b1 = vrshrq_n_s32(b1, DCT_CONST_BITS); - b2 = vrshrq_n_s32(b2, DCT_CONST_BITS); - b3 = vrshrq_n_s32(b3, DCT_CONST_BITS); - *a0 = vaddq_s32(b0, b3); - *a1 = vaddq_s32(b1, b2); - *a2 = vsubq_s32(b1, b2); - *a3 = vsubq_s32(b0, b3); -} - -static INLINE void idct4x4_16_kernel_bd12(const int32x4_t cospis, - int32x4_t *const a0, - int32x4_t *const a1, - int32x4_t *const a2, - int32x4_t *const a3) { - int32x4_t b0, b1, b2, b3; - int64x2_t c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11; - - transpose_s32_4x4(a0, a1, a2, a3); - b0 = vaddq_s32(*a0, *a2); - b1 = vsubq_s32(*a0, *a2); - c0 = vmull_lane_s32(vget_low_s32(b0), vget_high_s32(cospis), 0); - c1 = vmull_lane_s32(vget_high_s32(b0), vget_high_s32(cospis), 0); - c2 = vmull_lane_s32(vget_low_s32(b1), vget_high_s32(cospis), 0); - c3 = vmull_lane_s32(vget_high_s32(b1), vget_high_s32(cospis), 0); - c4 = vmull_lane_s32(vget_low_s32(*a1), vget_high_s32(cospis), 1); - c5 = vmull_lane_s32(vget_high_s32(*a1), vget_high_s32(cospis), 1); - c6 = vmull_lane_s32(vget_low_s32(*a1), vget_low_s32(cospis), 1); - c7 = vmull_lane_s32(vget_high_s32(*a1), vget_low_s32(cospis), 1); - c8 = vmull_lane_s32(vget_low_s32(*a3), vget_low_s32(cospis), 1); - c9 = vmull_lane_s32(vget_high_s32(*a3), vget_low_s32(cospis), 1); - c10 = vmull_lane_s32(vget_low_s32(*a3), vget_high_s32(cospis), 1); - c11 = vmull_lane_s32(vget_high_s32(*a3), vget_high_s32(cospis), 1); - c4 = vsubq_s64(c4, c8); - c5 = vsubq_s64(c5, c9); - c6 = vaddq_s64(c6, c10); - c7 = vaddq_s64(c7, c11); - b0 = vcombine_s32(vrshrn_n_s64(c0, DCT_CONST_BITS), - vrshrn_n_s64(c1, DCT_CONST_BITS)); - b1 = vcombine_s32(vrshrn_n_s64(c2, DCT_CONST_BITS), - vrshrn_n_s64(c3, DCT_CONST_BITS)); - b2 = vcombine_s32(vrshrn_n_s64(c4, DCT_CONST_BITS), - vrshrn_n_s64(c5, DCT_CONST_BITS)); - b3 = vcombine_s32(vrshrn_n_s64(c6, DCT_CONST_BITS), - vrshrn_n_s64(c7, DCT_CONST_BITS)); - *a0 = vaddq_s32(b0, b3); - *a1 = vaddq_s32(b1, b2); - *a2 = vsubq_s32(b1, b2); - *a3 = vsubq_s32(b0, b3); -} - void vpx_highbd_idct4x4_16_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { const int16x8_t max = vdupq_n_s16((1 << bd) - 1); - int32x4_t c0 = vld1q_s32(input); - int32x4_t c1 = vld1q_s32(input + 4); - int32x4_t c2 = vld1q_s32(input + 8); - int32x4_t c3 = vld1q_s32(input + 12); - int16x8_t a0, a1; + int16x8_t a[2]; + int32x4_t c[4]; + + c[0] = vld1q_s32(input); + c[1] = vld1q_s32(input + 4); + c[2] = vld1q_s32(input + 8); + c[3] = vld1q_s32(input + 12); if (bd == 8) { - const int16x4_t cospis = vld1_s16(kCospi); - // Rows - a0 = vcombine_s16(vmovn_s32(c0), vmovn_s32(c1)); - a1 = vcombine_s16(vmovn_s32(c2), vmovn_s32(c3)); - idct4x4_16_kernel_bd8(cospis, &a0, &a1); + a[0] = vcombine_s16(vmovn_s32(c[0]), vmovn_s32(c[1])); + a[1] = vcombine_s16(vmovn_s32(c[2]), vmovn_s32(c[3])); + transpose_idct4x4_16_bd8(a); // Columns - a1 = vcombine_s16(vget_high_s16(a1), vget_low_s16(a1)); - idct4x4_16_kernel_bd8(cospis, &a0, &a1); - a0 = vrshrq_n_s16(a0, 4); - a1 = vrshrq_n_s16(a1, 4); + a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); + transpose_idct4x4_16_bd8(a); + a[0] = vrshrq_n_s16(a[0], 4); + a[1] = vrshrq_n_s16(a[1], 4); } else { const int32x4_t cospis = vld1q_s32(kCospi32); if (bd == 10) { - idct4x4_16_kernel_bd10(cospis, &c0, &c1, &c2, &c3); - idct4x4_16_kernel_bd10(cospis, &c0, &c1, &c2, &c3); + idct4x4_16_kernel_bd10(cospis, c); + idct4x4_16_kernel_bd10(cospis, c); } else { - idct4x4_16_kernel_bd12(cospis, &c0, &c1, &c2, &c3); - idct4x4_16_kernel_bd12(cospis, &c0, &c1, &c2, &c3); + idct4x4_16_kernel_bd12(cospis, c); + idct4x4_16_kernel_bd12(cospis, c); } - a0 = vcombine_s16(vqrshrn_n_s32(c0, 4), vqrshrn_n_s32(c1, 4)); - a1 = vcombine_s16(vqrshrn_n_s32(c3, 4), vqrshrn_n_s32(c2, 4)); + a[0] = vcombine_s16(vqrshrn_n_s32(c[0], 4), vqrshrn_n_s32(c[1], 4)); + a[1] = vcombine_s16(vqrshrn_n_s32(c[3], 4), vqrshrn_n_s32(c[2], 4)); } - highbd_idct4x4_1_add_kernel1(&dest, stride, a0, max); - highbd_idct4x4_1_add_kernel2(&dest, stride, a1, max); + highbd_idct4x4_1_add_kernel1(&dest, stride, a[0], max); + highbd_idct4x4_1_add_kernel2(&dest, stride, a[1], max); } diff --git a/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct8x8_add_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct8x8_add_neon.c index dd90134a6e0e..bed3227ca7d5 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct8x8_add_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct8x8_add_neon.c @@ -11,6 +11,7 @@ #include #include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/arm/highbd_idct_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/inv_txfm.h" @@ -127,7 +128,7 @@ static INLINE void idct8x8_12_half1d_bd12( int32x4_t *const io1, int32x4_t *const io2, int32x4_t *const io3, int32x4_t *const io4, int32x4_t *const io5, int32x4_t *const io6, int32x4_t *const io7) { - int32x2_t input_1l, input_1h, input_3l, input_3h; + int32x2_t input1l, input1h, input3l, input3h; int32x2_t step1l[2], step1h[2]; int32x4_t step1[8], step2[8]; int64x2_t t64[8]; @@ -136,23 +137,23 @@ static INLINE void idct8x8_12_half1d_bd12( transpose_s32_4x4(io0, io1, io2, io3); // stage 1 - input_1l = vget_low_s32(*io1); - input_1h = vget_high_s32(*io1); - input_3l = vget_low_s32(*io3); - input_3h = vget_high_s32(*io3); + input1l = vget_low_s32(*io1); + input1h = vget_high_s32(*io1); + input3l = vget_low_s32(*io3); + input3h = vget_high_s32(*io3); step1l[0] = vget_low_s32(*io0); step1h[0] = vget_high_s32(*io0); step1l[1] = vget_low_s32(*io2); step1h[1] = vget_high_s32(*io2); - t64[0] = vmull_lane_s32(input_1l, vget_high_s32(cospis1), 1); - t64[1] = vmull_lane_s32(input_1h, vget_high_s32(cospis1), 1); - t64[2] = vmull_lane_s32(input_3l, vget_high_s32(cospis1), 0); - t64[3] = vmull_lane_s32(input_3h, vget_high_s32(cospis1), 0); - t64[4] = vmull_lane_s32(input_3l, vget_low_s32(cospis1), 1); - t64[5] = vmull_lane_s32(input_3h, vget_low_s32(cospis1), 1); - t64[6] = vmull_lane_s32(input_1l, vget_low_s32(cospis1), 0); - t64[7] = vmull_lane_s32(input_1h, vget_low_s32(cospis1), 0); + t64[0] = vmull_lane_s32(input1l, vget_high_s32(cospis1), 1); + t64[1] = vmull_lane_s32(input1h, vget_high_s32(cospis1), 1); + t64[2] = vmull_lane_s32(input3l, vget_high_s32(cospis1), 0); + t64[3] = vmull_lane_s32(input3h, vget_high_s32(cospis1), 0); + t64[4] = vmull_lane_s32(input3l, vget_low_s32(cospis1), 1); + t64[5] = vmull_lane_s32(input3h, vget_low_s32(cospis1), 1); + t64[6] = vmull_lane_s32(input1l, vget_low_s32(cospis1), 0); + t64[7] = vmull_lane_s32(input1h, vget_low_s32(cospis1), 0); t32[0] = vrshrn_n_s64(t64[0], DCT_CONST_BITS); t32[1] = vrshrn_n_s64(t64[1], DCT_CONST_BITS); t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS); @@ -222,82 +223,15 @@ static INLINE void idct8x8_12_half1d_bd12( *io7 = vsubq_s32(step1[0], step2[7]); } -static INLINE void highbd_add8x8(int16x8_t a0, int16x8_t a1, int16x8_t a2, - int16x8_t a3, int16x8_t a4, int16x8_t a5, - int16x8_t a6, int16x8_t a7, uint16_t *dest, - const int stride, const int bd) { - const int16x8_t max = vdupq_n_s16((1 << bd) - 1); - const uint16_t *dst = dest; - uint16x8_t d0, d1, d2, d3, d4, d5, d6, d7; - uint16x8_t d0_u16, d1_u16, d2_u16, d3_u16, d4_u16, d5_u16, d6_u16, d7_u16; - int16x8_t d0_s16, d1_s16, d2_s16, d3_s16, d4_s16, d5_s16, d6_s16, d7_s16; - - d0 = vld1q_u16(dst); - dst += stride; - d1 = vld1q_u16(dst); - dst += stride; - d2 = vld1q_u16(dst); - dst += stride; - d3 = vld1q_u16(dst); - dst += stride; - d4 = vld1q_u16(dst); - dst += stride; - d5 = vld1q_u16(dst); - dst += stride; - d6 = vld1q_u16(dst); - dst += stride; - d7 = vld1q_u16(dst); - - d0_s16 = vqaddq_s16(a0, vreinterpretq_s16_u16(d0)); - d1_s16 = vqaddq_s16(a1, vreinterpretq_s16_u16(d1)); - d2_s16 = vqaddq_s16(a2, vreinterpretq_s16_u16(d2)); - d3_s16 = vqaddq_s16(a3, vreinterpretq_s16_u16(d3)); - d4_s16 = vqaddq_s16(a4, vreinterpretq_s16_u16(d4)); - d5_s16 = vqaddq_s16(a5, vreinterpretq_s16_u16(d5)); - d6_s16 = vqaddq_s16(a6, vreinterpretq_s16_u16(d6)); - d7_s16 = vqaddq_s16(a7, vreinterpretq_s16_u16(d7)); - - d0_s16 = vminq_s16(d0_s16, max); - d1_s16 = vminq_s16(d1_s16, max); - d2_s16 = vminq_s16(d2_s16, max); - d3_s16 = vminq_s16(d3_s16, max); - d4_s16 = vminq_s16(d4_s16, max); - d5_s16 = vminq_s16(d5_s16, max); - d6_s16 = vminq_s16(d6_s16, max); - d7_s16 = vminq_s16(d7_s16, max); - d0_u16 = vqshluq_n_s16(d0_s16, 0); - d1_u16 = vqshluq_n_s16(d1_s16, 0); - d2_u16 = vqshluq_n_s16(d2_s16, 0); - d3_u16 = vqshluq_n_s16(d3_s16, 0); - d4_u16 = vqshluq_n_s16(d4_s16, 0); - d5_u16 = vqshluq_n_s16(d5_s16, 0); - d6_u16 = vqshluq_n_s16(d6_s16, 0); - d7_u16 = vqshluq_n_s16(d7_s16, 0); - - vst1q_u16(dest, d0_u16); - dest += stride; - vst1q_u16(dest, d1_u16); - dest += stride; - vst1q_u16(dest, d2_u16); - dest += stride; - vst1q_u16(dest, d3_u16); - dest += stride; - vst1q_u16(dest, d4_u16); - dest += stride; - vst1q_u16(dest, d5_u16); - dest += stride; - vst1q_u16(dest, d6_u16); - dest += stride; - vst1q_u16(dest, d7_u16); -} - void vpx_highbd_idct8x8_12_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { - int32x4_t a0 = vld1q_s32(input); - int32x4_t a1 = vld1q_s32(input + 8); - int32x4_t a2 = vld1q_s32(input + 16); - int32x4_t a3 = vld1q_s32(input + 24); - int16x8_t c0, c1, c2, c3, c4, c5, c6, c7; + int32x4_t a[16]; + int16x8_t c[8]; + + a[0] = vld1q_s32(input); + a[1] = vld1q_s32(input + 8); + a[2] = vld1q_s32(input + 16); + a[3] = vld1q_s32(input + 24); if (bd == 8) { const int16x8_t cospis = vld1q_s16(kCospi); @@ -305,327 +239,133 @@ void vpx_highbd_idct8x8_12_add_neon(const tran_low_t *input, uint16_t *dest, const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 const int16x4_t cospisd0 = vget_low_s16(cospisd); // doubled 0, 8, 16, 24 const int16x4_t cospisd1 = vget_high_s16(cospisd); // doubled 4, 12, 20, 28 - int16x4_t b0 = vmovn_s32(a0); - int16x4_t b1 = vmovn_s32(a1); - int16x4_t b2 = vmovn_s32(a2); - int16x4_t b3 = vmovn_s32(a3); - int16x4_t b4, b5, b6, b7; + int16x4_t b[8]; - idct8x8_12_pass1_bd8(cospis0, cospisd0, cospisd1, &b0, &b1, &b2, &b3, &b4, - &b5, &b6, &b7); - idct8x8_12_pass2_bd8(cospis0, cospisd0, cospisd1, b0, b1, b2, b3, b4, b5, - b6, b7, &c0, &c1, &c2, &c3, &c4, &c5, &c6, &c7); - c0 = vrshrq_n_s16(c0, 5); - c1 = vrshrq_n_s16(c1, 5); - c2 = vrshrq_n_s16(c2, 5); - c3 = vrshrq_n_s16(c3, 5); - c4 = vrshrq_n_s16(c4, 5); - c5 = vrshrq_n_s16(c5, 5); - c6 = vrshrq_n_s16(c6, 5); - c7 = vrshrq_n_s16(c7, 5); + b[0] = vmovn_s32(a[0]); + b[1] = vmovn_s32(a[1]); + b[2] = vmovn_s32(a[2]); + b[3] = vmovn_s32(a[3]); + + idct8x8_12_pass1_bd8(cospis0, cospisd0, cospisd1, b); + idct8x8_12_pass2_bd8(cospis0, cospisd0, cospisd1, b, c); + c[0] = vrshrq_n_s16(c[0], 5); + c[1] = vrshrq_n_s16(c[1], 5); + c[2] = vrshrq_n_s16(c[2], 5); + c[3] = vrshrq_n_s16(c[3], 5); + c[4] = vrshrq_n_s16(c[4], 5); + c[5] = vrshrq_n_s16(c[5], 5); + c[6] = vrshrq_n_s16(c[6], 5); + c[7] = vrshrq_n_s16(c[7], 5); } else { const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24 const int32x4_t cospis1 = vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28 - int32x4_t a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15; if (bd == 10) { - idct8x8_12_half1d_bd10(cospis0, cospis1, &a0, &a1, &a2, &a3, &a4, &a5, - &a6, &a7); - idct8x8_12_half1d_bd10(cospis0, cospis1, &a0, &a1, &a2, &a3, &a8, &a9, - &a10, &a11); - idct8x8_12_half1d_bd10(cospis0, cospis1, &a4, &a5, &a6, &a7, &a12, &a13, - &a14, &a15); + idct8x8_12_half1d_bd10(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], + &a[4], &a[5], &a[6], &a[7]); + idct8x8_12_half1d_bd10(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], + &a[8], &a[9], &a[10], &a[11]); + idct8x8_12_half1d_bd10(cospis0, cospis1, &a[4], &a[5], &a[6], &a[7], + &a[12], &a[13], &a[14], &a[15]); } else { - idct8x8_12_half1d_bd12(cospis0, cospis1, &a0, &a1, &a2, &a3, &a4, &a5, - &a6, &a7); - idct8x8_12_half1d_bd12(cospis0, cospis1, &a0, &a1, &a2, &a3, &a8, &a9, - &a10, &a11); - idct8x8_12_half1d_bd12(cospis0, cospis1, &a4, &a5, &a6, &a7, &a12, &a13, - &a14, &a15); + idct8x8_12_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], + &a[4], &a[5], &a[6], &a[7]); + idct8x8_12_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], + &a[8], &a[9], &a[10], &a[11]); + idct8x8_12_half1d_bd12(cospis0, cospis1, &a[4], &a[5], &a[6], &a[7], + &a[12], &a[13], &a[14], &a[15]); } - c0 = vcombine_s16(vrshrn_n_s32(a0, 5), vrshrn_n_s32(a4, 5)); - c1 = vcombine_s16(vrshrn_n_s32(a1, 5), vrshrn_n_s32(a5, 5)); - c2 = vcombine_s16(vrshrn_n_s32(a2, 5), vrshrn_n_s32(a6, 5)); - c3 = vcombine_s16(vrshrn_n_s32(a3, 5), vrshrn_n_s32(a7, 5)); - c4 = vcombine_s16(vrshrn_n_s32(a8, 5), vrshrn_n_s32(a12, 5)); - c5 = vcombine_s16(vrshrn_n_s32(a9, 5), vrshrn_n_s32(a13, 5)); - c6 = vcombine_s16(vrshrn_n_s32(a10, 5), vrshrn_n_s32(a14, 5)); - c7 = vcombine_s16(vrshrn_n_s32(a11, 5), vrshrn_n_s32(a15, 5)); + c[0] = vcombine_s16(vrshrn_n_s32(a[0], 5), vrshrn_n_s32(a[4], 5)); + c[1] = vcombine_s16(vrshrn_n_s32(a[1], 5), vrshrn_n_s32(a[5], 5)); + c[2] = vcombine_s16(vrshrn_n_s32(a[2], 5), vrshrn_n_s32(a[6], 5)); + c[3] = vcombine_s16(vrshrn_n_s32(a[3], 5), vrshrn_n_s32(a[7], 5)); + c[4] = vcombine_s16(vrshrn_n_s32(a[8], 5), vrshrn_n_s32(a[12], 5)); + c[5] = vcombine_s16(vrshrn_n_s32(a[9], 5), vrshrn_n_s32(a[13], 5)); + c[6] = vcombine_s16(vrshrn_n_s32(a[10], 5), vrshrn_n_s32(a[14], 5)); + c[7] = vcombine_s16(vrshrn_n_s32(a[11], 5), vrshrn_n_s32(a[15], 5)); } - highbd_add8x8(c0, c1, c2, c3, c4, c5, c6, c7, dest, stride, bd); -} - -static INLINE void idct8x8_64_half1d_bd10( - const int32x4_t cospis0, const int32x4_t cospis1, int32x4_t *const io0, - int32x4_t *const io1, int32x4_t *const io2, int32x4_t *const io3, - int32x4_t *const io4, int32x4_t *const io5, int32x4_t *const io6, - int32x4_t *const io7) { - int32x4_t step1[8], step2[8]; - - transpose_s32_8x4(io0, io1, io2, io3, io4, io5, io6, io7); - - // stage 1 - step1[4] = vmulq_lane_s32(*io1, vget_high_s32(cospis1), 1); - step1[5] = vmulq_lane_s32(*io3, vget_high_s32(cospis1), 0); - step1[6] = vmulq_lane_s32(*io3, vget_low_s32(cospis1), 1); - step1[7] = vmulq_lane_s32(*io1, vget_low_s32(cospis1), 0); - - step1[4] = vmlsq_lane_s32(step1[4], *io7, vget_low_s32(cospis1), 0); - step1[5] = vmlaq_lane_s32(step1[5], *io5, vget_low_s32(cospis1), 1); - step1[6] = vmlsq_lane_s32(step1[6], *io5, vget_high_s32(cospis1), 0); - step1[7] = vmlaq_lane_s32(step1[7], *io7, vget_high_s32(cospis1), 1); - - step1[4] = vrshrq_n_s32(step1[4], DCT_CONST_BITS); - step1[5] = vrshrq_n_s32(step1[5], DCT_CONST_BITS); - step1[6] = vrshrq_n_s32(step1[6], DCT_CONST_BITS); - step1[7] = vrshrq_n_s32(step1[7], DCT_CONST_BITS); - - // stage 2 - step2[1] = vmulq_lane_s32(*io0, vget_high_s32(cospis0), 0); - step2[2] = vmulq_lane_s32(*io2, vget_high_s32(cospis0), 1); - step2[3] = vmulq_lane_s32(*io2, vget_low_s32(cospis0), 1); - - step2[0] = vmlaq_lane_s32(step2[1], *io4, vget_high_s32(cospis0), 0); - step2[1] = vmlsq_lane_s32(step2[1], *io4, vget_high_s32(cospis0), 0); - step2[2] = vmlsq_lane_s32(step2[2], *io6, vget_low_s32(cospis0), 1); - step2[3] = vmlaq_lane_s32(step2[3], *io6, vget_high_s32(cospis0), 1); - - step2[0] = vrshrq_n_s32(step2[0], DCT_CONST_BITS); - step2[1] = vrshrq_n_s32(step2[1], DCT_CONST_BITS); - step2[2] = vrshrq_n_s32(step2[2], DCT_CONST_BITS); - step2[3] = vrshrq_n_s32(step2[3], DCT_CONST_BITS); - - step2[4] = vaddq_s32(step1[4], step1[5]); - step2[5] = vsubq_s32(step1[4], step1[5]); - step2[6] = vsubq_s32(step1[7], step1[6]); - step2[7] = vaddq_s32(step1[7], step1[6]); - - // stage 3 - step1[0] = vaddq_s32(step2[0], step2[3]); - step1[1] = vaddq_s32(step2[1], step2[2]); - step1[2] = vsubq_s32(step2[1], step2[2]); - step1[3] = vsubq_s32(step2[0], step2[3]); - - step1[6] = vmulq_lane_s32(step2[6], vget_high_s32(cospis0), 0); - step1[5] = vmlsq_lane_s32(step1[6], step2[5], vget_high_s32(cospis0), 0); - step1[6] = vmlaq_lane_s32(step1[6], step2[5], vget_high_s32(cospis0), 0); - step1[5] = vrshrq_n_s32(step1[5], DCT_CONST_BITS); - step1[6] = vrshrq_n_s32(step1[6], DCT_CONST_BITS); - - // stage 4 - *io0 = vaddq_s32(step1[0], step2[7]); - *io1 = vaddq_s32(step1[1], step1[6]); - *io2 = vaddq_s32(step1[2], step1[5]); - *io3 = vaddq_s32(step1[3], step2[4]); - *io4 = vsubq_s32(step1[3], step2[4]); - *io5 = vsubq_s32(step1[2], step1[5]); - *io6 = vsubq_s32(step1[1], step1[6]); - *io7 = vsubq_s32(step1[0], step2[7]); -} - -static INLINE void idct8x8_64_half1d_bd12( - const int32x4_t cospis0, const int32x4_t cospis1, int32x4_t *const io0, - int32x4_t *const io1, int32x4_t *const io2, int32x4_t *const io3, - int32x4_t *const io4, int32x4_t *const io5, int32x4_t *const io6, - int32x4_t *const io7) { - int32x2_t input_1l, input_1h, input_3l, input_3h, input_5l, input_5h, - input_7l, input_7h; - int32x2_t step1l[4], step1h[4]; - int32x4_t step1[8], step2[8]; - int64x2_t t64[8]; - int32x2_t t32[8]; - - transpose_s32_8x4(io0, io1, io2, io3, io4, io5, io6, io7); - - // stage 1 - input_1l = vget_low_s32(*io1); - input_1h = vget_high_s32(*io1); - input_3l = vget_low_s32(*io3); - input_3h = vget_high_s32(*io3); - input_5l = vget_low_s32(*io5); - input_5h = vget_high_s32(*io5); - input_7l = vget_low_s32(*io7); - input_7h = vget_high_s32(*io7); - step1l[0] = vget_low_s32(*io0); - step1h[0] = vget_high_s32(*io0); - step1l[1] = vget_low_s32(*io2); - step1h[1] = vget_high_s32(*io2); - step1l[2] = vget_low_s32(*io4); - step1h[2] = vget_high_s32(*io4); - step1l[3] = vget_low_s32(*io6); - step1h[3] = vget_high_s32(*io6); - - t64[0] = vmull_lane_s32(input_1l, vget_high_s32(cospis1), 1); - t64[1] = vmull_lane_s32(input_1h, vget_high_s32(cospis1), 1); - t64[2] = vmull_lane_s32(input_3l, vget_high_s32(cospis1), 0); - t64[3] = vmull_lane_s32(input_3h, vget_high_s32(cospis1), 0); - t64[4] = vmull_lane_s32(input_3l, vget_low_s32(cospis1), 1); - t64[5] = vmull_lane_s32(input_3h, vget_low_s32(cospis1), 1); - t64[6] = vmull_lane_s32(input_1l, vget_low_s32(cospis1), 0); - t64[7] = vmull_lane_s32(input_1h, vget_low_s32(cospis1), 0); - t64[0] = vmlsl_lane_s32(t64[0], input_7l, vget_low_s32(cospis1), 0); - t64[1] = vmlsl_lane_s32(t64[1], input_7h, vget_low_s32(cospis1), 0); - t64[2] = vmlal_lane_s32(t64[2], input_5l, vget_low_s32(cospis1), 1); - t64[3] = vmlal_lane_s32(t64[3], input_5h, vget_low_s32(cospis1), 1); - t64[4] = vmlsl_lane_s32(t64[4], input_5l, vget_high_s32(cospis1), 0); - t64[5] = vmlsl_lane_s32(t64[5], input_5h, vget_high_s32(cospis1), 0); - t64[6] = vmlal_lane_s32(t64[6], input_7l, vget_high_s32(cospis1), 1); - t64[7] = vmlal_lane_s32(t64[7], input_7h, vget_high_s32(cospis1), 1); - t32[0] = vrshrn_n_s64(t64[0], DCT_CONST_BITS); - t32[1] = vrshrn_n_s64(t64[1], DCT_CONST_BITS); - t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS); - t32[3] = vrshrn_n_s64(t64[3], DCT_CONST_BITS); - t32[4] = vrshrn_n_s64(t64[4], DCT_CONST_BITS); - t32[5] = vrshrn_n_s64(t64[5], DCT_CONST_BITS); - t32[6] = vrshrn_n_s64(t64[6], DCT_CONST_BITS); - t32[7] = vrshrn_n_s64(t64[7], DCT_CONST_BITS); - step1[4] = vcombine_s32(t32[0], t32[1]); - step1[5] = vcombine_s32(t32[2], t32[3]); - step1[6] = vcombine_s32(t32[4], t32[5]); - step1[7] = vcombine_s32(t32[6], t32[7]); - - // stage 2 - t64[2] = vmull_lane_s32(step1l[0], vget_high_s32(cospis0), 0); - t64[3] = vmull_lane_s32(step1h[0], vget_high_s32(cospis0), 0); - t64[4] = vmull_lane_s32(step1l[1], vget_high_s32(cospis0), 1); - t64[5] = vmull_lane_s32(step1h[1], vget_high_s32(cospis0), 1); - t64[6] = vmull_lane_s32(step1l[1], vget_low_s32(cospis0), 1); - t64[7] = vmull_lane_s32(step1h[1], vget_low_s32(cospis0), 1); - t64[0] = vmlal_lane_s32(t64[2], step1l[2], vget_high_s32(cospis0), 0); - t64[1] = vmlal_lane_s32(t64[3], step1h[2], vget_high_s32(cospis0), 0); - t64[2] = vmlsl_lane_s32(t64[2], step1l[2], vget_high_s32(cospis0), 0); - t64[3] = vmlsl_lane_s32(t64[3], step1h[2], vget_high_s32(cospis0), 0); - t64[4] = vmlsl_lane_s32(t64[4], step1l[3], vget_low_s32(cospis0), 1); - t64[5] = vmlsl_lane_s32(t64[5], step1h[3], vget_low_s32(cospis0), 1); - t64[6] = vmlal_lane_s32(t64[6], step1l[3], vget_high_s32(cospis0), 1); - t64[7] = vmlal_lane_s32(t64[7], step1h[3], vget_high_s32(cospis0), 1); - t32[0] = vrshrn_n_s64(t64[0], DCT_CONST_BITS); - t32[1] = vrshrn_n_s64(t64[1], DCT_CONST_BITS); - t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS); - t32[3] = vrshrn_n_s64(t64[3], DCT_CONST_BITS); - t32[4] = vrshrn_n_s64(t64[4], DCT_CONST_BITS); - t32[5] = vrshrn_n_s64(t64[5], DCT_CONST_BITS); - t32[6] = vrshrn_n_s64(t64[6], DCT_CONST_BITS); - t32[7] = vrshrn_n_s64(t64[7], DCT_CONST_BITS); - step2[0] = vcombine_s32(t32[0], t32[1]); - step2[1] = vcombine_s32(t32[2], t32[3]); - step2[2] = vcombine_s32(t32[4], t32[5]); - step2[3] = vcombine_s32(t32[6], t32[7]); - - step2[4] = vaddq_s32(step1[4], step1[5]); - step2[5] = vsubq_s32(step1[4], step1[5]); - step2[6] = vsubq_s32(step1[7], step1[6]); - step2[7] = vaddq_s32(step1[7], step1[6]); - - // stage 3 - step1[0] = vaddq_s32(step2[0], step2[3]); - step1[1] = vaddq_s32(step2[1], step2[2]); - step1[2] = vsubq_s32(step2[1], step2[2]); - step1[3] = vsubq_s32(step2[0], step2[3]); - - t64[2] = vmull_lane_s32(vget_low_s32(step2[6]), vget_high_s32(cospis0), 0); - t64[3] = vmull_lane_s32(vget_high_s32(step2[6]), vget_high_s32(cospis0), 0); - t64[0] = - vmlsl_lane_s32(t64[2], vget_low_s32(step2[5]), vget_high_s32(cospis0), 0); - t64[1] = vmlsl_lane_s32(t64[3], vget_high_s32(step2[5]), - vget_high_s32(cospis0), 0); - t64[2] = - vmlal_lane_s32(t64[2], vget_low_s32(step2[5]), vget_high_s32(cospis0), 0); - t64[3] = vmlal_lane_s32(t64[3], vget_high_s32(step2[5]), - vget_high_s32(cospis0), 0); - t32[0] = vrshrn_n_s64(t64[0], DCT_CONST_BITS); - t32[1] = vrshrn_n_s64(t64[1], DCT_CONST_BITS); - t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS); - t32[3] = vrshrn_n_s64(t64[3], DCT_CONST_BITS); - step1[5] = vcombine_s32(t32[0], t32[1]); - step1[6] = vcombine_s32(t32[2], t32[3]); - - // stage 4 - *io0 = vaddq_s32(step1[0], step2[7]); - *io1 = vaddq_s32(step1[1], step1[6]); - *io2 = vaddq_s32(step1[2], step1[5]); - *io3 = vaddq_s32(step1[3], step2[4]); - *io4 = vsubq_s32(step1[3], step2[4]); - *io5 = vsubq_s32(step1[2], step1[5]); - *io6 = vsubq_s32(step1[1], step1[6]); - *io7 = vsubq_s32(step1[0], step2[7]); + highbd_add8x8(c, dest, stride, bd); } void vpx_highbd_idct8x8_64_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { - int32x4_t a0 = vld1q_s32(input); - int32x4_t a1 = vld1q_s32(input + 4); - int32x4_t a2 = vld1q_s32(input + 8); - int32x4_t a3 = vld1q_s32(input + 12); - int32x4_t a4 = vld1q_s32(input + 16); - int32x4_t a5 = vld1q_s32(input + 20); - int32x4_t a6 = vld1q_s32(input + 24); - int32x4_t a7 = vld1q_s32(input + 28); - int32x4_t a8 = vld1q_s32(input + 32); - int32x4_t a9 = vld1q_s32(input + 36); - int32x4_t a10 = vld1q_s32(input + 40); - int32x4_t a11 = vld1q_s32(input + 44); - int32x4_t a12 = vld1q_s32(input + 48); - int32x4_t a13 = vld1q_s32(input + 52); - int32x4_t a14 = vld1q_s32(input + 56); - int32x4_t a15 = vld1q_s32(input + 60); - int16x8_t c0, c1, c2, c3, c4, c5, c6, c7; + int32x4_t a[16]; + int16x8_t c[8]; + + a[0] = vld1q_s32(input); + a[1] = vld1q_s32(input + 4); + a[2] = vld1q_s32(input + 8); + a[3] = vld1q_s32(input + 12); + a[4] = vld1q_s32(input + 16); + a[5] = vld1q_s32(input + 20); + a[6] = vld1q_s32(input + 24); + a[7] = vld1q_s32(input + 28); + a[8] = vld1q_s32(input + 32); + a[9] = vld1q_s32(input + 36); + a[10] = vld1q_s32(input + 40); + a[11] = vld1q_s32(input + 44); + a[12] = vld1q_s32(input + 48); + a[13] = vld1q_s32(input + 52); + a[14] = vld1q_s32(input + 56); + a[15] = vld1q_s32(input + 60); if (bd == 8) { const int16x8_t cospis = vld1q_s16(kCospi); const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28 - int16x8_t b0 = vcombine_s16(vmovn_s32(a0), vmovn_s32(a1)); - int16x8_t b1 = vcombine_s16(vmovn_s32(a2), vmovn_s32(a3)); - int16x8_t b2 = vcombine_s16(vmovn_s32(a4), vmovn_s32(a5)); - int16x8_t b3 = vcombine_s16(vmovn_s32(a6), vmovn_s32(a7)); - int16x8_t b4 = vcombine_s16(vmovn_s32(a8), vmovn_s32(a9)); - int16x8_t b5 = vcombine_s16(vmovn_s32(a10), vmovn_s32(a11)); - int16x8_t b6 = vcombine_s16(vmovn_s32(a12), vmovn_s32(a13)); - int16x8_t b7 = vcombine_s16(vmovn_s32(a14), vmovn_s32(a15)); + int16x8_t b[8]; - idct8x8_64_1d_bd8(cospis0, cospis1, &b0, &b1, &b2, &b3, &b4, &b5, &b6, &b7); - idct8x8_64_1d_bd8(cospis0, cospis1, &b0, &b1, &b2, &b3, &b4, &b5, &b6, &b7); + b[0] = vcombine_s16(vmovn_s32(a[0]), vmovn_s32(a[1])); + b[1] = vcombine_s16(vmovn_s32(a[2]), vmovn_s32(a[3])); + b[2] = vcombine_s16(vmovn_s32(a[4]), vmovn_s32(a[5])); + b[3] = vcombine_s16(vmovn_s32(a[6]), vmovn_s32(a[7])); + b[4] = vcombine_s16(vmovn_s32(a[8]), vmovn_s32(a[9])); + b[5] = vcombine_s16(vmovn_s32(a[10]), vmovn_s32(a[11])); + b[6] = vcombine_s16(vmovn_s32(a[12]), vmovn_s32(a[13])); + b[7] = vcombine_s16(vmovn_s32(a[14]), vmovn_s32(a[15])); - c0 = vrshrq_n_s16(b0, 5); - c1 = vrshrq_n_s16(b1, 5); - c2 = vrshrq_n_s16(b2, 5); - c3 = vrshrq_n_s16(b3, 5); - c4 = vrshrq_n_s16(b4, 5); - c5 = vrshrq_n_s16(b5, 5); - c6 = vrshrq_n_s16(b6, 5); - c7 = vrshrq_n_s16(b7, 5); + idct8x8_64_1d_bd8(cospis0, cospis1, b); + idct8x8_64_1d_bd8(cospis0, cospis1, b); + + c[0] = vrshrq_n_s16(b[0], 5); + c[1] = vrshrq_n_s16(b[1], 5); + c[2] = vrshrq_n_s16(b[2], 5); + c[3] = vrshrq_n_s16(b[3], 5); + c[4] = vrshrq_n_s16(b[4], 5); + c[5] = vrshrq_n_s16(b[5], 5); + c[6] = vrshrq_n_s16(b[6], 5); + c[7] = vrshrq_n_s16(b[7], 5); } else { const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24 const int32x4_t cospis1 = vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28 if (bd == 10) { - idct8x8_64_half1d_bd10(cospis0, cospis1, &a0, &a1, &a2, &a3, &a4, &a5, - &a6, &a7); - idct8x8_64_half1d_bd10(cospis0, cospis1, &a8, &a9, &a10, &a11, &a12, &a13, - &a14, &a15); - idct8x8_64_half1d_bd10(cospis0, cospis1, &a0, &a8, &a1, &a9, &a2, &a10, - &a3, &a11); - idct8x8_64_half1d_bd10(cospis0, cospis1, &a4, &a12, &a5, &a13, &a6, &a14, - &a7, &a15); + idct8x8_64_half1d_bd10(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], + &a[4], &a[5], &a[6], &a[7]); + idct8x8_64_half1d_bd10(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11], + &a[12], &a[13], &a[14], &a[15]); + idct8x8_64_half1d_bd10(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9], + &a[2], &a[10], &a[3], &a[11]); + idct8x8_64_half1d_bd10(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13], + &a[6], &a[14], &a[7], &a[15]); } else { - idct8x8_64_half1d_bd12(cospis0, cospis1, &a0, &a1, &a2, &a3, &a4, &a5, - &a6, &a7); - idct8x8_64_half1d_bd12(cospis0, cospis1, &a8, &a9, &a10, &a11, &a12, &a13, - &a14, &a15); - idct8x8_64_half1d_bd12(cospis0, cospis1, &a0, &a8, &a1, &a9, &a2, &a10, - &a3, &a11); - idct8x8_64_half1d_bd12(cospis0, cospis1, &a4, &a12, &a5, &a13, &a6, &a14, - &a7, &a15); + idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], + &a[4], &a[5], &a[6], &a[7]); + idct8x8_64_half1d_bd12(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11], + &a[12], &a[13], &a[14], &a[15]); + idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9], + &a[2], &a[10], &a[3], &a[11]); + idct8x8_64_half1d_bd12(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13], + &a[6], &a[14], &a[7], &a[15]); } - c0 = vcombine_s16(vrshrn_n_s32(a0, 5), vrshrn_n_s32(a4, 5)); - c1 = vcombine_s16(vrshrn_n_s32(a8, 5), vrshrn_n_s32(a12, 5)); - c2 = vcombine_s16(vrshrn_n_s32(a1, 5), vrshrn_n_s32(a5, 5)); - c3 = vcombine_s16(vrshrn_n_s32(a9, 5), vrshrn_n_s32(a13, 5)); - c4 = vcombine_s16(vrshrn_n_s32(a2, 5), vrshrn_n_s32(a6, 5)); - c5 = vcombine_s16(vrshrn_n_s32(a10, 5), vrshrn_n_s32(a14, 5)); - c6 = vcombine_s16(vrshrn_n_s32(a3, 5), vrshrn_n_s32(a7, 5)); - c7 = vcombine_s16(vrshrn_n_s32(a11, 5), vrshrn_n_s32(a15, 5)); + c[0] = vcombine_s16(vrshrn_n_s32(a[0], 5), vrshrn_n_s32(a[4], 5)); + c[1] = vcombine_s16(vrshrn_n_s32(a[8], 5), vrshrn_n_s32(a[12], 5)); + c[2] = vcombine_s16(vrshrn_n_s32(a[1], 5), vrshrn_n_s32(a[5], 5)); + c[3] = vcombine_s16(vrshrn_n_s32(a[9], 5), vrshrn_n_s32(a[13], 5)); + c[4] = vcombine_s16(vrshrn_n_s32(a[2], 5), vrshrn_n_s32(a[6], 5)); + c[5] = vcombine_s16(vrshrn_n_s32(a[10], 5), vrshrn_n_s32(a[14], 5)); + c[6] = vcombine_s16(vrshrn_n_s32(a[3], 5), vrshrn_n_s32(a[7], 5)); + c[7] = vcombine_s16(vrshrn_n_s32(a[11], 5), vrshrn_n_s32(a[15], 5)); } - highbd_add8x8(c0, c1, c2, c3, c4, c5, c6, c7, dest, stride, bd); + highbd_add8x8(c, dest, stride, bd); } diff --git a/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct_neon.h b/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct_neon.h new file mode 100644 index 000000000000..518ef4336e87 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/arm/highbd_idct_neon.h @@ -0,0 +1,474 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_DSP_ARM_HIGHBD_IDCT_NEON_H_ +#define VPX_VPX_DSP_ARM_HIGHBD_IDCT_NEON_H_ + +#include + +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/arm/idct_neon.h" +#include "vpx_dsp/inv_txfm.h" + +static INLINE void highbd_idct4x4_1_add_kernel1(uint16_t **dest, + const int stride, + const int16x8_t res, + const int16x8_t max) { + const uint16x4_t a0 = vld1_u16(*dest); + const uint16x4_t a1 = vld1_u16(*dest + stride); + const int16x8_t a = vreinterpretq_s16_u16(vcombine_u16(a0, a1)); + // Note: In some profile tests, res is quite close to +/-32767. + // We use saturating addition. + const int16x8_t b = vqaddq_s16(res, a); + const int16x8_t c = vminq_s16(b, max); + const uint16x8_t d = vqshluq_n_s16(c, 0); + vst1_u16(*dest, vget_low_u16(d)); + *dest += stride; + vst1_u16(*dest, vget_high_u16(d)); + *dest += stride; +} + +static INLINE void idct4x4_16_kernel_bd10(const int32x4_t cospis, + int32x4_t *const a) { + int32x4_t b0, b1, b2, b3; + + transpose_s32_4x4(&a[0], &a[1], &a[2], &a[3]); + b0 = vaddq_s32(a[0], a[2]); + b1 = vsubq_s32(a[0], a[2]); + b0 = vmulq_lane_s32(b0, vget_high_s32(cospis), 0); + b1 = vmulq_lane_s32(b1, vget_high_s32(cospis), 0); + b2 = vmulq_lane_s32(a[1], vget_high_s32(cospis), 1); + b3 = vmulq_lane_s32(a[1], vget_low_s32(cospis), 1); + b2 = vmlsq_lane_s32(b2, a[3], vget_low_s32(cospis), 1); + b3 = vmlaq_lane_s32(b3, a[3], vget_high_s32(cospis), 1); + b0 = vrshrq_n_s32(b0, DCT_CONST_BITS); + b1 = vrshrq_n_s32(b1, DCT_CONST_BITS); + b2 = vrshrq_n_s32(b2, DCT_CONST_BITS); + b3 = vrshrq_n_s32(b3, DCT_CONST_BITS); + a[0] = vaddq_s32(b0, b3); + a[1] = vaddq_s32(b1, b2); + a[2] = vsubq_s32(b1, b2); + a[3] = vsubq_s32(b0, b3); +} + +static INLINE void idct4x4_16_kernel_bd12(const int32x4_t cospis, + int32x4_t *const a) { + int32x4_t b0, b1, b2, b3; + int64x2_t c[12]; + + transpose_s32_4x4(&a[0], &a[1], &a[2], &a[3]); + b0 = vaddq_s32(a[0], a[2]); + b1 = vsubq_s32(a[0], a[2]); + c[0] = vmull_lane_s32(vget_low_s32(b0), vget_high_s32(cospis), 0); + c[1] = vmull_lane_s32(vget_high_s32(b0), vget_high_s32(cospis), 0); + c[2] = vmull_lane_s32(vget_low_s32(b1), vget_high_s32(cospis), 0); + c[3] = vmull_lane_s32(vget_high_s32(b1), vget_high_s32(cospis), 0); + c[4] = vmull_lane_s32(vget_low_s32(a[1]), vget_high_s32(cospis), 1); + c[5] = vmull_lane_s32(vget_high_s32(a[1]), vget_high_s32(cospis), 1); + c[6] = vmull_lane_s32(vget_low_s32(a[1]), vget_low_s32(cospis), 1); + c[7] = vmull_lane_s32(vget_high_s32(a[1]), vget_low_s32(cospis), 1); + c[8] = vmull_lane_s32(vget_low_s32(a[3]), vget_low_s32(cospis), 1); + c[9] = vmull_lane_s32(vget_high_s32(a[3]), vget_low_s32(cospis), 1); + c[10] = vmull_lane_s32(vget_low_s32(a[3]), vget_high_s32(cospis), 1); + c[11] = vmull_lane_s32(vget_high_s32(a[3]), vget_high_s32(cospis), 1); + c[4] = vsubq_s64(c[4], c[8]); + c[5] = vsubq_s64(c[5], c[9]); + c[6] = vaddq_s64(c[6], c[10]); + c[7] = vaddq_s64(c[7], c[11]); + b0 = vcombine_s32(vrshrn_n_s64(c[0], DCT_CONST_BITS), + vrshrn_n_s64(c[1], DCT_CONST_BITS)); + b1 = vcombine_s32(vrshrn_n_s64(c[2], DCT_CONST_BITS), + vrshrn_n_s64(c[3], DCT_CONST_BITS)); + b2 = vcombine_s32(vrshrn_n_s64(c[4], DCT_CONST_BITS), + vrshrn_n_s64(c[5], DCT_CONST_BITS)); + b3 = vcombine_s32(vrshrn_n_s64(c[6], DCT_CONST_BITS), + vrshrn_n_s64(c[7], DCT_CONST_BITS)); + a[0] = vaddq_s32(b0, b3); + a[1] = vaddq_s32(b1, b2); + a[2] = vsubq_s32(b1, b2); + a[3] = vsubq_s32(b0, b3); +} + +static INLINE void highbd_add8x8(int16x8_t *const a, uint16_t *dest, + const int stride, const int bd) { + const int16x8_t max = vdupq_n_s16((1 << bd) - 1); + const uint16_t *dst = dest; + uint16x8_t d0, d1, d2, d3, d4, d5, d6, d7; + uint16x8_t d0_u16, d1_u16, d2_u16, d3_u16, d4_u16, d5_u16, d6_u16, d7_u16; + int16x8_t d0_s16, d1_s16, d2_s16, d3_s16, d4_s16, d5_s16, d6_s16, d7_s16; + + d0 = vld1q_u16(dst); + dst += stride; + d1 = vld1q_u16(dst); + dst += stride; + d2 = vld1q_u16(dst); + dst += stride; + d3 = vld1q_u16(dst); + dst += stride; + d4 = vld1q_u16(dst); + dst += stride; + d5 = vld1q_u16(dst); + dst += stride; + d6 = vld1q_u16(dst); + dst += stride; + d7 = vld1q_u16(dst); + + d0_s16 = vqaddq_s16(a[0], vreinterpretq_s16_u16(d0)); + d1_s16 = vqaddq_s16(a[1], vreinterpretq_s16_u16(d1)); + d2_s16 = vqaddq_s16(a[2], vreinterpretq_s16_u16(d2)); + d3_s16 = vqaddq_s16(a[3], vreinterpretq_s16_u16(d3)); + d4_s16 = vqaddq_s16(a[4], vreinterpretq_s16_u16(d4)); + d5_s16 = vqaddq_s16(a[5], vreinterpretq_s16_u16(d5)); + d6_s16 = vqaddq_s16(a[6], vreinterpretq_s16_u16(d6)); + d7_s16 = vqaddq_s16(a[7], vreinterpretq_s16_u16(d7)); + + d0_s16 = vminq_s16(d0_s16, max); + d1_s16 = vminq_s16(d1_s16, max); + d2_s16 = vminq_s16(d2_s16, max); + d3_s16 = vminq_s16(d3_s16, max); + d4_s16 = vminq_s16(d4_s16, max); + d5_s16 = vminq_s16(d5_s16, max); + d6_s16 = vminq_s16(d6_s16, max); + d7_s16 = vminq_s16(d7_s16, max); + d0_u16 = vqshluq_n_s16(d0_s16, 0); + d1_u16 = vqshluq_n_s16(d1_s16, 0); + d2_u16 = vqshluq_n_s16(d2_s16, 0); + d3_u16 = vqshluq_n_s16(d3_s16, 0); + d4_u16 = vqshluq_n_s16(d4_s16, 0); + d5_u16 = vqshluq_n_s16(d5_s16, 0); + d6_u16 = vqshluq_n_s16(d6_s16, 0); + d7_u16 = vqshluq_n_s16(d7_s16, 0); + + vst1q_u16(dest, d0_u16); + dest += stride; + vst1q_u16(dest, d1_u16); + dest += stride; + vst1q_u16(dest, d2_u16); + dest += stride; + vst1q_u16(dest, d3_u16); + dest += stride; + vst1q_u16(dest, d4_u16); + dest += stride; + vst1q_u16(dest, d5_u16); + dest += stride; + vst1q_u16(dest, d6_u16); + dest += stride; + vst1q_u16(dest, d7_u16); +} + +static INLINE void idct8x8_64_half1d_bd10( + const int32x4_t cospis0, const int32x4_t cospis1, int32x4_t *const io0, + int32x4_t *const io1, int32x4_t *const io2, int32x4_t *const io3, + int32x4_t *const io4, int32x4_t *const io5, int32x4_t *const io6, + int32x4_t *const io7) { + int32x4_t step1[8], step2[8]; + + transpose_s32_8x4(io0, io1, io2, io3, io4, io5, io6, io7); + + // stage 1 + step1[4] = vmulq_lane_s32(*io1, vget_high_s32(cospis1), 1); + step1[5] = vmulq_lane_s32(*io3, vget_high_s32(cospis1), 0); + step1[6] = vmulq_lane_s32(*io3, vget_low_s32(cospis1), 1); + step1[7] = vmulq_lane_s32(*io1, vget_low_s32(cospis1), 0); + + step1[4] = vmlsq_lane_s32(step1[4], *io7, vget_low_s32(cospis1), 0); + step1[5] = vmlaq_lane_s32(step1[5], *io5, vget_low_s32(cospis1), 1); + step1[6] = vmlsq_lane_s32(step1[6], *io5, vget_high_s32(cospis1), 0); + step1[7] = vmlaq_lane_s32(step1[7], *io7, vget_high_s32(cospis1), 1); + + step1[4] = vrshrq_n_s32(step1[4], DCT_CONST_BITS); + step1[5] = vrshrq_n_s32(step1[5], DCT_CONST_BITS); + step1[6] = vrshrq_n_s32(step1[6], DCT_CONST_BITS); + step1[7] = vrshrq_n_s32(step1[7], DCT_CONST_BITS); + + // stage 2 + step2[1] = vmulq_lane_s32(*io0, vget_high_s32(cospis0), 0); + step2[2] = vmulq_lane_s32(*io2, vget_high_s32(cospis0), 1); + step2[3] = vmulq_lane_s32(*io2, vget_low_s32(cospis0), 1); + + step2[0] = vmlaq_lane_s32(step2[1], *io4, vget_high_s32(cospis0), 0); + step2[1] = vmlsq_lane_s32(step2[1], *io4, vget_high_s32(cospis0), 0); + step2[2] = vmlsq_lane_s32(step2[2], *io6, vget_low_s32(cospis0), 1); + step2[3] = vmlaq_lane_s32(step2[3], *io6, vget_high_s32(cospis0), 1); + + step2[0] = vrshrq_n_s32(step2[0], DCT_CONST_BITS); + step2[1] = vrshrq_n_s32(step2[1], DCT_CONST_BITS); + step2[2] = vrshrq_n_s32(step2[2], DCT_CONST_BITS); + step2[3] = vrshrq_n_s32(step2[3], DCT_CONST_BITS); + + step2[4] = vaddq_s32(step1[4], step1[5]); + step2[5] = vsubq_s32(step1[4], step1[5]); + step2[6] = vsubq_s32(step1[7], step1[6]); + step2[7] = vaddq_s32(step1[7], step1[6]); + + // stage 3 + step1[0] = vaddq_s32(step2[0], step2[3]); + step1[1] = vaddq_s32(step2[1], step2[2]); + step1[2] = vsubq_s32(step2[1], step2[2]); + step1[3] = vsubq_s32(step2[0], step2[3]); + + step1[6] = vmulq_lane_s32(step2[6], vget_high_s32(cospis0), 0); + step1[5] = vmlsq_lane_s32(step1[6], step2[5], vget_high_s32(cospis0), 0); + step1[6] = vmlaq_lane_s32(step1[6], step2[5], vget_high_s32(cospis0), 0); + step1[5] = vrshrq_n_s32(step1[5], DCT_CONST_BITS); + step1[6] = vrshrq_n_s32(step1[6], DCT_CONST_BITS); + + // stage 4 + *io0 = vaddq_s32(step1[0], step2[7]); + *io1 = vaddq_s32(step1[1], step1[6]); + *io2 = vaddq_s32(step1[2], step1[5]); + *io3 = vaddq_s32(step1[3], step2[4]); + *io4 = vsubq_s32(step1[3], step2[4]); + *io5 = vsubq_s32(step1[2], step1[5]); + *io6 = vsubq_s32(step1[1], step1[6]); + *io7 = vsubq_s32(step1[0], step2[7]); +} + +static INLINE void idct8x8_64_half1d_bd12( + const int32x4_t cospis0, const int32x4_t cospis1, int32x4_t *const io0, + int32x4_t *const io1, int32x4_t *const io2, int32x4_t *const io3, + int32x4_t *const io4, int32x4_t *const io5, int32x4_t *const io6, + int32x4_t *const io7) { + int32x2_t input1l, input1h, input3l, input3h, input5l, input5h, input7l, + input7h; + int32x2_t step1l[4], step1h[4]; + int32x4_t step1[8], step2[8]; + int64x2_t t64[8]; + int32x2_t t32[8]; + + transpose_s32_8x4(io0, io1, io2, io3, io4, io5, io6, io7); + + // stage 1 + input1l = vget_low_s32(*io1); + input1h = vget_high_s32(*io1); + input3l = vget_low_s32(*io3); + input3h = vget_high_s32(*io3); + input5l = vget_low_s32(*io5); + input5h = vget_high_s32(*io5); + input7l = vget_low_s32(*io7); + input7h = vget_high_s32(*io7); + step1l[0] = vget_low_s32(*io0); + step1h[0] = vget_high_s32(*io0); + step1l[1] = vget_low_s32(*io2); + step1h[1] = vget_high_s32(*io2); + step1l[2] = vget_low_s32(*io4); + step1h[2] = vget_high_s32(*io4); + step1l[3] = vget_low_s32(*io6); + step1h[3] = vget_high_s32(*io6); + + t64[0] = vmull_lane_s32(input1l, vget_high_s32(cospis1), 1); + t64[1] = vmull_lane_s32(input1h, vget_high_s32(cospis1), 1); + t64[2] = vmull_lane_s32(input3l, vget_high_s32(cospis1), 0); + t64[3] = vmull_lane_s32(input3h, vget_high_s32(cospis1), 0); + t64[4] = vmull_lane_s32(input3l, vget_low_s32(cospis1), 1); + t64[5] = vmull_lane_s32(input3h, vget_low_s32(cospis1), 1); + t64[6] = vmull_lane_s32(input1l, vget_low_s32(cospis1), 0); + t64[7] = vmull_lane_s32(input1h, vget_low_s32(cospis1), 0); + t64[0] = vmlsl_lane_s32(t64[0], input7l, vget_low_s32(cospis1), 0); + t64[1] = vmlsl_lane_s32(t64[1], input7h, vget_low_s32(cospis1), 0); + t64[2] = vmlal_lane_s32(t64[2], input5l, vget_low_s32(cospis1), 1); + t64[3] = vmlal_lane_s32(t64[3], input5h, vget_low_s32(cospis1), 1); + t64[4] = vmlsl_lane_s32(t64[4], input5l, vget_high_s32(cospis1), 0); + t64[5] = vmlsl_lane_s32(t64[5], input5h, vget_high_s32(cospis1), 0); + t64[6] = vmlal_lane_s32(t64[6], input7l, vget_high_s32(cospis1), 1); + t64[7] = vmlal_lane_s32(t64[7], input7h, vget_high_s32(cospis1), 1); + t32[0] = vrshrn_n_s64(t64[0], DCT_CONST_BITS); + t32[1] = vrshrn_n_s64(t64[1], DCT_CONST_BITS); + t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS); + t32[3] = vrshrn_n_s64(t64[3], DCT_CONST_BITS); + t32[4] = vrshrn_n_s64(t64[4], DCT_CONST_BITS); + t32[5] = vrshrn_n_s64(t64[5], DCT_CONST_BITS); + t32[6] = vrshrn_n_s64(t64[6], DCT_CONST_BITS); + t32[7] = vrshrn_n_s64(t64[7], DCT_CONST_BITS); + step1[4] = vcombine_s32(t32[0], t32[1]); + step1[5] = vcombine_s32(t32[2], t32[3]); + step1[6] = vcombine_s32(t32[4], t32[5]); + step1[7] = vcombine_s32(t32[6], t32[7]); + + // stage 2 + t64[2] = vmull_lane_s32(step1l[0], vget_high_s32(cospis0), 0); + t64[3] = vmull_lane_s32(step1h[0], vget_high_s32(cospis0), 0); + t64[4] = vmull_lane_s32(step1l[1], vget_high_s32(cospis0), 1); + t64[5] = vmull_lane_s32(step1h[1], vget_high_s32(cospis0), 1); + t64[6] = vmull_lane_s32(step1l[1], vget_low_s32(cospis0), 1); + t64[7] = vmull_lane_s32(step1h[1], vget_low_s32(cospis0), 1); + t64[0] = vmlal_lane_s32(t64[2], step1l[2], vget_high_s32(cospis0), 0); + t64[1] = vmlal_lane_s32(t64[3], step1h[2], vget_high_s32(cospis0), 0); + t64[2] = vmlsl_lane_s32(t64[2], step1l[2], vget_high_s32(cospis0), 0); + t64[3] = vmlsl_lane_s32(t64[3], step1h[2], vget_high_s32(cospis0), 0); + t64[4] = vmlsl_lane_s32(t64[4], step1l[3], vget_low_s32(cospis0), 1); + t64[5] = vmlsl_lane_s32(t64[5], step1h[3], vget_low_s32(cospis0), 1); + t64[6] = vmlal_lane_s32(t64[6], step1l[3], vget_high_s32(cospis0), 1); + t64[7] = vmlal_lane_s32(t64[7], step1h[3], vget_high_s32(cospis0), 1); + t32[0] = vrshrn_n_s64(t64[0], DCT_CONST_BITS); + t32[1] = vrshrn_n_s64(t64[1], DCT_CONST_BITS); + t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS); + t32[3] = vrshrn_n_s64(t64[3], DCT_CONST_BITS); + t32[4] = vrshrn_n_s64(t64[4], DCT_CONST_BITS); + t32[5] = vrshrn_n_s64(t64[5], DCT_CONST_BITS); + t32[6] = vrshrn_n_s64(t64[6], DCT_CONST_BITS); + t32[7] = vrshrn_n_s64(t64[7], DCT_CONST_BITS); + step2[0] = vcombine_s32(t32[0], t32[1]); + step2[1] = vcombine_s32(t32[2], t32[3]); + step2[2] = vcombine_s32(t32[4], t32[5]); + step2[3] = vcombine_s32(t32[6], t32[7]); + + step2[4] = vaddq_s32(step1[4], step1[5]); + step2[5] = vsubq_s32(step1[4], step1[5]); + step2[6] = vsubq_s32(step1[7], step1[6]); + step2[7] = vaddq_s32(step1[7], step1[6]); + + // stage 3 + step1[0] = vaddq_s32(step2[0], step2[3]); + step1[1] = vaddq_s32(step2[1], step2[2]); + step1[2] = vsubq_s32(step2[1], step2[2]); + step1[3] = vsubq_s32(step2[0], step2[3]); + + t64[2] = vmull_lane_s32(vget_low_s32(step2[6]), vget_high_s32(cospis0), 0); + t64[3] = vmull_lane_s32(vget_high_s32(step2[6]), vget_high_s32(cospis0), 0); + t64[0] = + vmlsl_lane_s32(t64[2], vget_low_s32(step2[5]), vget_high_s32(cospis0), 0); + t64[1] = vmlsl_lane_s32(t64[3], vget_high_s32(step2[5]), + vget_high_s32(cospis0), 0); + t64[2] = + vmlal_lane_s32(t64[2], vget_low_s32(step2[5]), vget_high_s32(cospis0), 0); + t64[3] = vmlal_lane_s32(t64[3], vget_high_s32(step2[5]), + vget_high_s32(cospis0), 0); + t32[0] = vrshrn_n_s64(t64[0], DCT_CONST_BITS); + t32[1] = vrshrn_n_s64(t64[1], DCT_CONST_BITS); + t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS); + t32[3] = vrshrn_n_s64(t64[3], DCT_CONST_BITS); + step1[5] = vcombine_s32(t32[0], t32[1]); + step1[6] = vcombine_s32(t32[2], t32[3]); + + // stage 4 + *io0 = vaddq_s32(step1[0], step2[7]); + *io1 = vaddq_s32(step1[1], step1[6]); + *io2 = vaddq_s32(step1[2], step1[5]); + *io3 = vaddq_s32(step1[3], step2[4]); + *io4 = vsubq_s32(step1[3], step2[4]); + *io5 = vsubq_s32(step1[2], step1[5]); + *io6 = vsubq_s32(step1[1], step1[6]); + *io7 = vsubq_s32(step1[0], step2[7]); +} + +static INLINE void highbd_idct16x16_store_pass1(const int32x4x2_t *const out, + int32_t *output) { + // Save the result into output + vst1q_s32(output + 0, out[0].val[0]); + vst1q_s32(output + 4, out[0].val[1]); + output += 16; + vst1q_s32(output + 0, out[1].val[0]); + vst1q_s32(output + 4, out[1].val[1]); + output += 16; + vst1q_s32(output + 0, out[2].val[0]); + vst1q_s32(output + 4, out[2].val[1]); + output += 16; + vst1q_s32(output + 0, out[3].val[0]); + vst1q_s32(output + 4, out[3].val[1]); + output += 16; + vst1q_s32(output + 0, out[4].val[0]); + vst1q_s32(output + 4, out[4].val[1]); + output += 16; + vst1q_s32(output + 0, out[5].val[0]); + vst1q_s32(output + 4, out[5].val[1]); + output += 16; + vst1q_s32(output + 0, out[6].val[0]); + vst1q_s32(output + 4, out[6].val[1]); + output += 16; + vst1q_s32(output + 0, out[7].val[0]); + vst1q_s32(output + 4, out[7].val[1]); + output += 16; + vst1q_s32(output + 0, out[8].val[0]); + vst1q_s32(output + 4, out[8].val[1]); + output += 16; + vst1q_s32(output + 0, out[9].val[0]); + vst1q_s32(output + 4, out[9].val[1]); + output += 16; + vst1q_s32(output + 0, out[10].val[0]); + vst1q_s32(output + 4, out[10].val[1]); + output += 16; + vst1q_s32(output + 0, out[11].val[0]); + vst1q_s32(output + 4, out[11].val[1]); + output += 16; + vst1q_s32(output + 0, out[12].val[0]); + vst1q_s32(output + 4, out[12].val[1]); + output += 16; + vst1q_s32(output + 0, out[13].val[0]); + vst1q_s32(output + 4, out[13].val[1]); + output += 16; + vst1q_s32(output + 0, out[14].val[0]); + vst1q_s32(output + 4, out[14].val[1]); + output += 16; + vst1q_s32(output + 0, out[15].val[0]); + vst1q_s32(output + 4, out[15].val[1]); +} + +static INLINE void highbd_idct16x16_add_store(const int32x4x2_t *const out, + uint16_t *dest, const int stride, + const int bd) { + // Add the result to dest + const int16x8_t max = vdupq_n_s16((1 << bd) - 1); + int16x8_t o[16]; + o[0] = vcombine_s16(vrshrn_n_s32(out[0].val[0], 6), + vrshrn_n_s32(out[0].val[1], 6)); + o[1] = vcombine_s16(vrshrn_n_s32(out[1].val[0], 6), + vrshrn_n_s32(out[1].val[1], 6)); + o[2] = vcombine_s16(vrshrn_n_s32(out[2].val[0], 6), + vrshrn_n_s32(out[2].val[1], 6)); + o[3] = vcombine_s16(vrshrn_n_s32(out[3].val[0], 6), + vrshrn_n_s32(out[3].val[1], 6)); + o[4] = vcombine_s16(vrshrn_n_s32(out[4].val[0], 6), + vrshrn_n_s32(out[4].val[1], 6)); + o[5] = vcombine_s16(vrshrn_n_s32(out[5].val[0], 6), + vrshrn_n_s32(out[5].val[1], 6)); + o[6] = vcombine_s16(vrshrn_n_s32(out[6].val[0], 6), + vrshrn_n_s32(out[6].val[1], 6)); + o[7] = vcombine_s16(vrshrn_n_s32(out[7].val[0], 6), + vrshrn_n_s32(out[7].val[1], 6)); + o[8] = vcombine_s16(vrshrn_n_s32(out[8].val[0], 6), + vrshrn_n_s32(out[8].val[1], 6)); + o[9] = vcombine_s16(vrshrn_n_s32(out[9].val[0], 6), + vrshrn_n_s32(out[9].val[1], 6)); + o[10] = vcombine_s16(vrshrn_n_s32(out[10].val[0], 6), + vrshrn_n_s32(out[10].val[1], 6)); + o[11] = vcombine_s16(vrshrn_n_s32(out[11].val[0], 6), + vrshrn_n_s32(out[11].val[1], 6)); + o[12] = vcombine_s16(vrshrn_n_s32(out[12].val[0], 6), + vrshrn_n_s32(out[12].val[1], 6)); + o[13] = vcombine_s16(vrshrn_n_s32(out[13].val[0], 6), + vrshrn_n_s32(out[13].val[1], 6)); + o[14] = vcombine_s16(vrshrn_n_s32(out[14].val[0], 6), + vrshrn_n_s32(out[14].val[1], 6)); + o[15] = vcombine_s16(vrshrn_n_s32(out[15].val[0], 6), + vrshrn_n_s32(out[15].val[1], 6)); + highbd_idct16x16_add8x1(o[0], max, &dest, stride); + highbd_idct16x16_add8x1(o[1], max, &dest, stride); + highbd_idct16x16_add8x1(o[2], max, &dest, stride); + highbd_idct16x16_add8x1(o[3], max, &dest, stride); + highbd_idct16x16_add8x1(o[4], max, &dest, stride); + highbd_idct16x16_add8x1(o[5], max, &dest, stride); + highbd_idct16x16_add8x1(o[6], max, &dest, stride); + highbd_idct16x16_add8x1(o[7], max, &dest, stride); + highbd_idct16x16_add8x1(o[8], max, &dest, stride); + highbd_idct16x16_add8x1(o[9], max, &dest, stride); + highbd_idct16x16_add8x1(o[10], max, &dest, stride); + highbd_idct16x16_add8x1(o[11], max, &dest, stride); + highbd_idct16x16_add8x1(o[12], max, &dest, stride); + highbd_idct16x16_add8x1(o[13], max, &dest, stride); + highbd_idct16x16_add8x1(o[14], max, &dest, stride); + highbd_idct16x16_add8x1(o[15], max, &dest, stride); +} + +void vpx_highbd_idct16x16_256_add_half1d(const int32_t *input, int32_t *output, + uint16_t *dest, const int stride, + const int bd); + +#endif // VPX_VPX_DSP_ARM_HIGHBD_IDCT_NEON_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/arm/idct16x16_add_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/idct16x16_add_neon.c index 5c5963d277ef..fc7f4a7747cd 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/idct16x16_add_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/idct16x16_add_neon.c @@ -63,65 +63,6 @@ static INLINE void idct_cospi_16_16_d(const int16x4_t s0, const int16x4_t s1, wrap_low_4x2(t32, d0, d1); } -static INLINE void idct16x16_add_store(const int16x8_t *const out, - uint8_t *dest, const int stride) { - // Add the result to dest - idct16x16_add8x1(out[0], &dest, stride); - idct16x16_add8x1(out[1], &dest, stride); - idct16x16_add8x1(out[2], &dest, stride); - idct16x16_add8x1(out[3], &dest, stride); - idct16x16_add8x1(out[4], &dest, stride); - idct16x16_add8x1(out[5], &dest, stride); - idct16x16_add8x1(out[6], &dest, stride); - idct16x16_add8x1(out[7], &dest, stride); - idct16x16_add8x1(out[8], &dest, stride); - idct16x16_add8x1(out[9], &dest, stride); - idct16x16_add8x1(out[10], &dest, stride); - idct16x16_add8x1(out[11], &dest, stride); - idct16x16_add8x1(out[12], &dest, stride); - idct16x16_add8x1(out[13], &dest, stride); - idct16x16_add8x1(out[14], &dest, stride); - idct16x16_add8x1(out[15], &dest, stride); -} - -static INLINE void idct16x16_add_store_bd8(int16x8_t *const out, uint16_t *dest, - const int stride) { - // Add the result to dest - const int16x8_t max = vdupq_n_s16((1 << 8) - 1); - out[0] = vrshrq_n_s16(out[0], 6); - out[1] = vrshrq_n_s16(out[1], 6); - out[2] = vrshrq_n_s16(out[2], 6); - out[3] = vrshrq_n_s16(out[3], 6); - out[4] = vrshrq_n_s16(out[4], 6); - out[5] = vrshrq_n_s16(out[5], 6); - out[6] = vrshrq_n_s16(out[6], 6); - out[7] = vrshrq_n_s16(out[7], 6); - out[8] = vrshrq_n_s16(out[8], 6); - out[9] = vrshrq_n_s16(out[9], 6); - out[10] = vrshrq_n_s16(out[10], 6); - out[11] = vrshrq_n_s16(out[11], 6); - out[12] = vrshrq_n_s16(out[12], 6); - out[13] = vrshrq_n_s16(out[13], 6); - out[14] = vrshrq_n_s16(out[14], 6); - out[15] = vrshrq_n_s16(out[15], 6); - highbd_idct16x16_add8x1(out[0], max, &dest, stride); - highbd_idct16x16_add8x1(out[1], max, &dest, stride); - highbd_idct16x16_add8x1(out[2], max, &dest, stride); - highbd_idct16x16_add8x1(out[3], max, &dest, stride); - highbd_idct16x16_add8x1(out[4], max, &dest, stride); - highbd_idct16x16_add8x1(out[5], max, &dest, stride); - highbd_idct16x16_add8x1(out[6], max, &dest, stride); - highbd_idct16x16_add8x1(out[7], max, &dest, stride); - highbd_idct16x16_add8x1(out[8], max, &dest, stride); - highbd_idct16x16_add8x1(out[9], max, &dest, stride); - highbd_idct16x16_add8x1(out[10], max, &dest, stride); - highbd_idct16x16_add8x1(out[11], max, &dest, stride); - highbd_idct16x16_add8x1(out[12], max, &dest, stride); - highbd_idct16x16_add8x1(out[13], max, &dest, stride); - highbd_idct16x16_add8x1(out[14], max, &dest, stride); - highbd_idct16x16_add8x1(out[15], max, &dest, stride); -} - void vpx_idct16x16_256_add_half1d(const void *const input, int16_t *output, void *const dest, const int stride, const int highbd_flag) { diff --git a/media/libvpx/libvpx/vpx_dsp/arm/idct32x32_135_add_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/idct32x32_135_add_neon.c index 021211bc9907..057731ad9249 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/idct32x32_135_add_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/idct32x32_135_add_neon.c @@ -650,14 +650,10 @@ void vpx_idct32_16_neon(const int16_t *const input, void *const output, highbd_add_and_store_bd8(out, output, stride); } else { uint8_t *const outputT = (uint8_t *)output; - add_and_store_u8_s16(out[0], out[1], out[2], out[3], out[4], out[5], out[6], - out[7], outputT, stride); - add_and_store_u8_s16(out[8], out[9], out[10], out[11], out[12], out[13], - out[14], out[15], outputT + (8 * stride), stride); - add_and_store_u8_s16(out[16], out[17], out[18], out[19], out[20], out[21], - out[22], out[23], outputT + (16 * stride), stride); - add_and_store_u8_s16(out[24], out[25], out[26], out[27], out[28], out[29], - out[30], out[31], outputT + (24 * stride), stride); + add_and_store_u8_s16(out + 0, outputT, stride); + add_and_store_u8_s16(out + 8, outputT + (8 * stride), stride); + add_and_store_u8_s16(out + 16, outputT + (16 * stride), stride); + add_and_store_u8_s16(out + 24, outputT + (24 * stride), stride); } } diff --git a/media/libvpx/libvpx/vpx_dsp/arm/idct32x32_34_add_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/idct32x32_34_add_neon.c index f3c336fa31f1..f570547e4491 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/idct32x32_34_add_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/idct32x32_34_add_neon.c @@ -490,14 +490,10 @@ void vpx_idct32_8_neon(const int16_t *input, void *const output, int stride, highbd_add_and_store_bd8(out, output, stride); } else { uint8_t *const outputT = (uint8_t *)output; - add_and_store_u8_s16(out[0], out[1], out[2], out[3], out[4], out[5], out[6], - out[7], outputT, stride); - add_and_store_u8_s16(out[8], out[9], out[10], out[11], out[12], out[13], - out[14], out[15], outputT + (8 * stride), stride); - add_and_store_u8_s16(out[16], out[17], out[18], out[19], out[20], out[21], - out[22], out[23], outputT + (16 * stride), stride); - add_and_store_u8_s16(out[24], out[25], out[26], out[27], out[28], out[29], - out[30], out[31], outputT + (24 * stride), stride); + add_and_store_u8_s16(out + 0, outputT, stride); + add_and_store_u8_s16(out + 8, outputT + (8 * stride), stride); + add_and_store_u8_s16(out + 16, outputT + (16 * stride), stride); + add_and_store_u8_s16(out + 24, outputT + (24 * stride), stride); } } diff --git a/media/libvpx/libvpx/vpx_dsp/arm/idct4x4_add_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/idct4x4_add_neon.c index 673a36840e33..8192ee4cf87e 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/idct4x4_add_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/idct4x4_add_neon.c @@ -19,44 +19,41 @@ void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { const uint8_t *dst = dest; - const int16x4_t cospis = vld1_s16(kCospi); - uint8x8_t dest01_u8; - uint32x2_t dest32_u32 = vdup_n_u32(0); - int16x8_t a0, a1; - uint8x8_t d01, d32; - uint16x8_t d01_u16, d32_u16; + uint32x2_t s32 = vdup_n_u32(0); + int16x8_t a[2]; + uint8x8_t s, d[2]; + uint16x8_t sum[2]; assert(!((intptr_t)dest % sizeof(uint32_t))); assert(!(stride % sizeof(uint32_t))); // Rows - a0 = load_tran_low_to_s16q(input); - a1 = load_tran_low_to_s16q(input + 8); - idct4x4_16_kernel_bd8(cospis, &a0, &a1); + a[0] = load_tran_low_to_s16q(input); + a[1] = load_tran_low_to_s16q(input + 8); + transpose_idct4x4_16_bd8(a); // Columns - a1 = vcombine_s16(vget_high_s16(a1), vget_low_s16(a1)); - idct4x4_16_kernel_bd8(cospis, &a0, &a1); - a0 = vrshrq_n_s16(a0, 4); - a1 = vrshrq_n_s16(a1, 4); + a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); + transpose_idct4x4_16_bd8(a); + a[0] = vrshrq_n_s16(a[0], 4); + a[1] = vrshrq_n_s16(a[1], 4); - dest01_u8 = load_u8(dst, stride); + s = load_u8(dst, stride); dst += 2 * stride; // The elements are loaded in reverse order. - dest32_u32 = vld1_lane_u32((const uint32_t *)dst, dest32_u32, 1); + s32 = vld1_lane_u32((const uint32_t *)dst, s32, 1); dst += stride; - dest32_u32 = vld1_lane_u32((const uint32_t *)dst, dest32_u32, 0); + s32 = vld1_lane_u32((const uint32_t *)dst, s32, 0); - d01_u16 = vaddw_u8(vreinterpretq_u16_s16(a0), dest01_u8); - d32_u16 = - vaddw_u8(vreinterpretq_u16_s16(a1), vreinterpret_u8_u32(dest32_u32)); - d01 = vqmovun_s16(vreinterpretq_s16_u16(d01_u16)); - d32 = vqmovun_s16(vreinterpretq_s16_u16(d32_u16)); + sum[0] = vaddw_u8(vreinterpretq_u16_s16(a[0]), s); + sum[1] = vaddw_u8(vreinterpretq_u16_s16(a[1]), vreinterpret_u8_u32(s32)); + d[0] = vqmovun_s16(vreinterpretq_s16_u16(sum[0])); + d[1] = vqmovun_s16(vreinterpretq_s16_u16(sum[1])); - store_u8(dest, stride, d01); + store_u8(dest, stride, d[0]); dest += 2 * stride; // The elements are stored in reverse order. - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d32), 1); + vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d[1]), 1); dest += stride; - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d32), 0); + vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d[1]), 0); } diff --git a/media/libvpx/libvpx/vpx_dsp/arm/idct8x8_add_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/idct8x8_add_neon.c index 1121ade27961..7471387e4768 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/idct8x8_add_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/idct8x8_add_neon.c @@ -17,91 +17,25 @@ #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/txfm_common.h" -static INLINE void add8x8(int16x8_t a0, int16x8_t a1, int16x8_t a2, - int16x8_t a3, int16x8_t a4, int16x8_t a5, - int16x8_t a6, int16x8_t a7, uint8_t *dest, - const int stride) { - const uint8_t *dst = dest; - uint8x8_t d0, d1, d2, d3, d4, d5, d6, d7; - uint16x8_t d0_u16, d1_u16, d2_u16, d3_u16, d4_u16, d5_u16, d6_u16, d7_u16; - - a0 = vrshrq_n_s16(a0, 5); - a1 = vrshrq_n_s16(a1, 5); - a2 = vrshrq_n_s16(a2, 5); - a3 = vrshrq_n_s16(a3, 5); - a4 = vrshrq_n_s16(a4, 5); - a5 = vrshrq_n_s16(a5, 5); - a6 = vrshrq_n_s16(a6, 5); - a7 = vrshrq_n_s16(a7, 5); - - d0 = vld1_u8(dst); - dst += stride; - d1 = vld1_u8(dst); - dst += stride; - d2 = vld1_u8(dst); - dst += stride; - d3 = vld1_u8(dst); - dst += stride; - d4 = vld1_u8(dst); - dst += stride; - d5 = vld1_u8(dst); - dst += stride; - d6 = vld1_u8(dst); - dst += stride; - d7 = vld1_u8(dst); - - d0_u16 = vaddw_u8(vreinterpretq_u16_s16(a0), d0); - d1_u16 = vaddw_u8(vreinterpretq_u16_s16(a1), d1); - d2_u16 = vaddw_u8(vreinterpretq_u16_s16(a2), d2); - d3_u16 = vaddw_u8(vreinterpretq_u16_s16(a3), d3); - d4_u16 = vaddw_u8(vreinterpretq_u16_s16(a4), d4); - d5_u16 = vaddw_u8(vreinterpretq_u16_s16(a5), d5); - d6_u16 = vaddw_u8(vreinterpretq_u16_s16(a6), d6); - d7_u16 = vaddw_u8(vreinterpretq_u16_s16(a7), d7); - - d0 = vqmovun_s16(vreinterpretq_s16_u16(d0_u16)); - d1 = vqmovun_s16(vreinterpretq_s16_u16(d1_u16)); - d2 = vqmovun_s16(vreinterpretq_s16_u16(d2_u16)); - d3 = vqmovun_s16(vreinterpretq_s16_u16(d3_u16)); - d4 = vqmovun_s16(vreinterpretq_s16_u16(d4_u16)); - d5 = vqmovun_s16(vreinterpretq_s16_u16(d5_u16)); - d6 = vqmovun_s16(vreinterpretq_s16_u16(d6_u16)); - d7 = vqmovun_s16(vreinterpretq_s16_u16(d7_u16)); - - vst1_u8(dest, d0); - dest += stride; - vst1_u8(dest, d1); - dest += stride; - vst1_u8(dest, d2); - dest += stride; - vst1_u8(dest, d3); - dest += stride; - vst1_u8(dest, d4); - dest += stride; - vst1_u8(dest, d5); - dest += stride; - vst1_u8(dest, d6); - dest += stride; - vst1_u8(dest, d7); -} - void vpx_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { const int16x8_t cospis = vld1q_s16(kCospi); const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28 - int16x8_t a0 = load_tran_low_to_s16q(input); - int16x8_t a1 = load_tran_low_to_s16q(input + 8); - int16x8_t a2 = load_tran_low_to_s16q(input + 16); - int16x8_t a3 = load_tran_low_to_s16q(input + 24); - int16x8_t a4 = load_tran_low_to_s16q(input + 32); - int16x8_t a5 = load_tran_low_to_s16q(input + 40); - int16x8_t a6 = load_tran_low_to_s16q(input + 48); - int16x8_t a7 = load_tran_low_to_s16q(input + 56); + int16x8_t a[8]; - idct8x8_64_1d_bd8(cospis0, cospis1, &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); - idct8x8_64_1d_bd8(cospis0, cospis1, &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); - add8x8(a0, a1, a2, a3, a4, a5, a6, a7, dest, stride); + a[0] = load_tran_low_to_s16q(input); + a[1] = load_tran_low_to_s16q(input + 8); + a[2] = load_tran_low_to_s16q(input + 16); + a[3] = load_tran_low_to_s16q(input + 24); + a[4] = load_tran_low_to_s16q(input + 32); + a[5] = load_tran_low_to_s16q(input + 40); + a[6] = load_tran_low_to_s16q(input + 48); + a[7] = load_tran_low_to_s16q(input + 56); + + idct8x8_64_1d_bd8(cospis0, cospis1, a); + idct8x8_64_1d_bd8(cospis0, cospis1, a); + idct8x8_add8x8_neon(a, dest, stride); } void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest, @@ -111,17 +45,15 @@ void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest, const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 const int16x4_t cospisd0 = vget_low_s16(cospisd); // doubled 0, 8, 16, 24 const int16x4_t cospisd1 = vget_high_s16(cospisd); // doubled 4, 12, 20, 28 - int16x4_t a0, a1, a2, a3, a4, a5, a6, a7; - int16x8_t b0, b1, b2, b3, b4, b5, b6, b7; + int16x4_t a[8]; + int16x8_t b[8]; - a0 = load_tran_low_to_s16d(input); - a1 = load_tran_low_to_s16d(input + 8); - a2 = load_tran_low_to_s16d(input + 16); - a3 = load_tran_low_to_s16d(input + 24); + a[0] = load_tran_low_to_s16d(input); + a[1] = load_tran_low_to_s16d(input + 8); + a[2] = load_tran_low_to_s16d(input + 16); + a[3] = load_tran_low_to_s16d(input + 24); - idct8x8_12_pass1_bd8(cospis0, cospisd0, cospisd1, &a0, &a1, &a2, &a3, &a4, - &a5, &a6, &a7); - idct8x8_12_pass2_bd8(cospis0, cospisd0, cospisd1, a0, a1, a2, a3, a4, a5, a6, - a7, &b0, &b1, &b2, &b3, &b4, &b5, &b6, &b7); - add8x8(b0, b1, b2, b3, b4, b5, b6, b7, dest, stride); + idct8x8_12_pass1_bd8(cospis0, cospisd0, cospisd1, a); + idct8x8_12_pass2_bd8(cospis0, cospisd0, cospisd1, a, b); + idct8x8_add8x8_neon(b, dest, stride); } diff --git a/media/libvpx/libvpx/vpx_dsp/arm/idct_neon.h b/media/libvpx/libvpx/vpx_dsp/arm/idct_neon.h index 6ed02af5acc1..c02311326bda 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/idct_neon.h +++ b/media/libvpx/libvpx/vpx_dsp/arm/idct_neon.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_ARM_IDCT_NEON_H_ -#define VPX_DSP_ARM_IDCT_NEON_H_ +#ifndef VPX_VPX_DSP_ARM_IDCT_NEON_H_ +#define VPX_VPX_DSP_ARM_IDCT_NEON_H_ #include @@ -78,6 +78,28 @@ static INLINE int32x4x2_t highbd_idct_sub_dual(const int32x4x2_t s0, //------------------------------------------------------------------------------ +static INLINE int16x8_t dct_const_round_shift_low_8(const int32x4_t *const in) { + return vcombine_s16(vrshrn_n_s32(in[0], DCT_CONST_BITS), + vrshrn_n_s32(in[1], DCT_CONST_BITS)); +} + +static INLINE void dct_const_round_shift_low_8_dual(const int32x4_t *const t32, + int16x8_t *const d0, + int16x8_t *const d1) { + *d0 = dct_const_round_shift_low_8(t32 + 0); + *d1 = dct_const_round_shift_low_8(t32 + 2); +} + +static INLINE int32x4x2_t +dct_const_round_shift_high_4x2(const int64x2_t *const in) { + int32x4x2_t out; + out.val[0] = vcombine_s32(vrshrn_n_s64(in[0], DCT_CONST_BITS), + vrshrn_n_s64(in[1], DCT_CONST_BITS)); + out.val[1] = vcombine_s32(vrshrn_n_s64(in[2], DCT_CONST_BITS), + vrshrn_n_s64(in[3], DCT_CONST_BITS)); + return out; +} + // Multiply a by a_const. Saturate, shift and narrow by DCT_CONST_BITS. static INLINE int16x8_t multiply_shift_and_narrow_s16(const int16x8_t a, const int16_t a_const) { @@ -102,24 +124,24 @@ static INLINE int16x8_t add_multiply_shift_and_narrow_s16( // input) this function can not use vaddq_s16. // In order to match existing behavior and intentionally out of range tests, // expand the addition up to 32 bits to prevent truncation. - int32x4_t temp_low = vaddl_s16(vget_low_s16(a), vget_low_s16(b)); - int32x4_t temp_high = vaddl_s16(vget_high_s16(a), vget_high_s16(b)); - temp_low = vmulq_n_s32(temp_low, ab_const); - temp_high = vmulq_n_s32(temp_high, ab_const); - return vcombine_s16(vrshrn_n_s32(temp_low, DCT_CONST_BITS), - vrshrn_n_s32(temp_high, DCT_CONST_BITS)); + int32x4_t t[2]; + t[0] = vaddl_s16(vget_low_s16(a), vget_low_s16(b)); + t[1] = vaddl_s16(vget_high_s16(a), vget_high_s16(b)); + t[0] = vmulq_n_s32(t[0], ab_const); + t[1] = vmulq_n_s32(t[1], ab_const); + return dct_const_round_shift_low_8(t); } // Subtract b from a, then multiply by ab_const. Shift and narrow by // DCT_CONST_BITS. static INLINE int16x8_t sub_multiply_shift_and_narrow_s16( const int16x8_t a, const int16x8_t b, const int16_t ab_const) { - int32x4_t temp_low = vsubl_s16(vget_low_s16(a), vget_low_s16(b)); - int32x4_t temp_high = vsubl_s16(vget_high_s16(a), vget_high_s16(b)); - temp_low = vmulq_n_s32(temp_low, ab_const); - temp_high = vmulq_n_s32(temp_high, ab_const); - return vcombine_s16(vrshrn_n_s32(temp_low, DCT_CONST_BITS), - vrshrn_n_s32(temp_high, DCT_CONST_BITS)); + int32x4_t t[2]; + t[0] = vsubl_s16(vget_low_s16(a), vget_low_s16(b)); + t[1] = vsubl_s16(vget_high_s16(a), vget_high_s16(b)); + t[0] = vmulq_n_s32(t[0], ab_const); + t[1] = vmulq_n_s32(t[1], ab_const); + return dct_const_round_shift_low_8(t); } // Multiply a by a_const and b by b_const, then accumulate. Shift and narrow by @@ -127,12 +149,12 @@ static INLINE int16x8_t sub_multiply_shift_and_narrow_s16( static INLINE int16x8_t multiply_accumulate_shift_and_narrow_s16( const int16x8_t a, const int16_t a_const, const int16x8_t b, const int16_t b_const) { - int32x4_t temp_low = vmull_n_s16(vget_low_s16(a), a_const); - int32x4_t temp_high = vmull_n_s16(vget_high_s16(a), a_const); - temp_low = vmlal_n_s16(temp_low, vget_low_s16(b), b_const); - temp_high = vmlal_n_s16(temp_high, vget_high_s16(b), b_const); - return vcombine_s16(vrshrn_n_s32(temp_low, DCT_CONST_BITS), - vrshrn_n_s32(temp_high, DCT_CONST_BITS)); + int32x4_t t[2]; + t[0] = vmull_n_s16(vget_low_s16(a), a_const); + t[1] = vmull_n_s16(vget_high_s16(a), a_const); + t[0] = vmlal_n_s16(t[0], vget_low_s16(b), b_const); + t[1] = vmlal_n_s16(t[1], vget_high_s16(b), b_const); + return dct_const_round_shift_low_8(t); } //------------------------------------------------------------------------------ @@ -145,53 +167,43 @@ static INLINE int16x8_t multiply_accumulate_shift_and_narrow_s16( static INLINE int32x4x2_t multiply_shift_and_narrow_s32_dual(const int32x4x2_t a, const int32_t a_const) { int64x2_t b[4]; - int32x4x2_t c; + b[0] = vmull_n_s32(vget_low_s32(a.val[0]), a_const); b[1] = vmull_n_s32(vget_high_s32(a.val[0]), a_const); b[2] = vmull_n_s32(vget_low_s32(a.val[1]), a_const); b[3] = vmull_n_s32(vget_high_s32(a.val[1]), a_const); - c.val[0] = vcombine_s32(vrshrn_n_s64(b[0], DCT_CONST_BITS), - vrshrn_n_s64(b[1], DCT_CONST_BITS)); - c.val[1] = vcombine_s32(vrshrn_n_s64(b[2], DCT_CONST_BITS), - vrshrn_n_s64(b[3], DCT_CONST_BITS)); - return c; + return dct_const_round_shift_high_4x2(b); } // Add a and b, then multiply by ab_const. Shift and narrow by DCT_CONST_BITS. static INLINE int32x4x2_t add_multiply_shift_and_narrow_s32_dual( const int32x4x2_t a, const int32x4x2_t b, const int32_t ab_const) { - const int32x4_t temp_low = vaddq_s32(a.val[0], b.val[0]); - const int32x4_t temp_high = vaddq_s32(a.val[1], b.val[1]); + int32x4_t t[2]; int64x2_t c[4]; - int32x4x2_t d; - c[0] = vmull_n_s32(vget_low_s32(temp_low), ab_const); - c[1] = vmull_n_s32(vget_high_s32(temp_low), ab_const); - c[2] = vmull_n_s32(vget_low_s32(temp_high), ab_const); - c[3] = vmull_n_s32(vget_high_s32(temp_high), ab_const); - d.val[0] = vcombine_s32(vrshrn_n_s64(c[0], DCT_CONST_BITS), - vrshrn_n_s64(c[1], DCT_CONST_BITS)); - d.val[1] = vcombine_s32(vrshrn_n_s64(c[2], DCT_CONST_BITS), - vrshrn_n_s64(c[3], DCT_CONST_BITS)); - return d; + + t[0] = vaddq_s32(a.val[0], b.val[0]); + t[1] = vaddq_s32(a.val[1], b.val[1]); + c[0] = vmull_n_s32(vget_low_s32(t[0]), ab_const); + c[1] = vmull_n_s32(vget_high_s32(t[0]), ab_const); + c[2] = vmull_n_s32(vget_low_s32(t[1]), ab_const); + c[3] = vmull_n_s32(vget_high_s32(t[1]), ab_const); + return dct_const_round_shift_high_4x2(c); } // Subtract b from a, then multiply by ab_const. Shift and narrow by // DCT_CONST_BITS. static INLINE int32x4x2_t sub_multiply_shift_and_narrow_s32_dual( const int32x4x2_t a, const int32x4x2_t b, const int32_t ab_const) { - const int32x4_t temp_low = vsubq_s32(a.val[0], b.val[0]); - const int32x4_t temp_high = vsubq_s32(a.val[1], b.val[1]); + int32x4_t t[2]; int64x2_t c[4]; - int32x4x2_t d; - c[0] = vmull_n_s32(vget_low_s32(temp_low), ab_const); - c[1] = vmull_n_s32(vget_high_s32(temp_low), ab_const); - c[2] = vmull_n_s32(vget_low_s32(temp_high), ab_const); - c[3] = vmull_n_s32(vget_high_s32(temp_high), ab_const); - d.val[0] = vcombine_s32(vrshrn_n_s64(c[0], DCT_CONST_BITS), - vrshrn_n_s64(c[1], DCT_CONST_BITS)); - d.val[1] = vcombine_s32(vrshrn_n_s64(c[2], DCT_CONST_BITS), - vrshrn_n_s64(c[3], DCT_CONST_BITS)); - return d; + + t[0] = vsubq_s32(a.val[0], b.val[0]); + t[1] = vsubq_s32(a.val[1], b.val[1]); + c[0] = vmull_n_s32(vget_low_s32(t[0]), ab_const); + c[1] = vmull_n_s32(vget_high_s32(t[0]), ab_const); + c[2] = vmull_n_s32(vget_low_s32(t[1]), ab_const); + c[3] = vmull_n_s32(vget_high_s32(t[1]), ab_const); + return dct_const_round_shift_high_4x2(c); } // Multiply a by a_const and b by b_const, then accumulate. Shift and narrow by @@ -200,7 +212,6 @@ static INLINE int32x4x2_t multiply_accumulate_shift_and_narrow_s32_dual( const int32x4x2_t a, const int32_t a_const, const int32x4x2_t b, const int32_t b_const) { int64x2_t c[4]; - int32x4x2_t d; c[0] = vmull_n_s32(vget_low_s32(a.val[0]), a_const); c[1] = vmull_n_s32(vget_high_s32(a.val[0]), a_const); c[2] = vmull_n_s32(vget_low_s32(a.val[1]), a_const); @@ -209,72 +220,66 @@ static INLINE int32x4x2_t multiply_accumulate_shift_and_narrow_s32_dual( c[1] = vmlal_n_s32(c[1], vget_high_s32(b.val[0]), b_const); c[2] = vmlal_n_s32(c[2], vget_low_s32(b.val[1]), b_const); c[3] = vmlal_n_s32(c[3], vget_high_s32(b.val[1]), b_const); - d.val[0] = vcombine_s32(vrshrn_n_s64(c[0], DCT_CONST_BITS), - vrshrn_n_s64(c[1], DCT_CONST_BITS)); - d.val[1] = vcombine_s32(vrshrn_n_s64(c[2], DCT_CONST_BITS), - vrshrn_n_s64(c[3], DCT_CONST_BITS)); - return d; + return dct_const_round_shift_high_4x2(c); } // Shift the output down by 6 and add it to the destination buffer. -static INLINE void add_and_store_u8_s16(const int16x8_t a0, const int16x8_t a1, - const int16x8_t a2, const int16x8_t a3, - const int16x8_t a4, const int16x8_t a5, - const int16x8_t a6, const int16x8_t a7, - uint8_t *b, const int b_stride) { - uint8x8_t b0, b1, b2, b3, b4, b5, b6, b7; - int16x8_t c0, c1, c2, c3, c4, c5, c6, c7; - b0 = vld1_u8(b); - b += b_stride; - b1 = vld1_u8(b); - b += b_stride; - b2 = vld1_u8(b); - b += b_stride; - b3 = vld1_u8(b); - b += b_stride; - b4 = vld1_u8(b); - b += b_stride; - b5 = vld1_u8(b); - b += b_stride; - b6 = vld1_u8(b); - b += b_stride; - b7 = vld1_u8(b); - b -= (7 * b_stride); +static INLINE void add_and_store_u8_s16(const int16x8_t *const a, uint8_t *d, + const int stride) { + uint8x8_t b[8]; + int16x8_t c[8]; + + b[0] = vld1_u8(d); + d += stride; + b[1] = vld1_u8(d); + d += stride; + b[2] = vld1_u8(d); + d += stride; + b[3] = vld1_u8(d); + d += stride; + b[4] = vld1_u8(d); + d += stride; + b[5] = vld1_u8(d); + d += stride; + b[6] = vld1_u8(d); + d += stride; + b[7] = vld1_u8(d); + d -= (7 * stride); // c = b + (a >> 6) - c0 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b0)), a0, 6); - c1 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b1)), a1, 6); - c2 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b2)), a2, 6); - c3 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b3)), a3, 6); - c4 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b4)), a4, 6); - c5 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b5)), a5, 6); - c6 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b6)), a6, 6); - c7 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b7)), a7, 6); + c[0] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[0])), a[0], 6); + c[1] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[1])), a[1], 6); + c[2] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[2])), a[2], 6); + c[3] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[3])), a[3], 6); + c[4] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[4])), a[4], 6); + c[5] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[5])), a[5], 6); + c[6] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[6])), a[6], 6); + c[7] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[7])), a[7], 6); - b0 = vqmovun_s16(c0); - b1 = vqmovun_s16(c1); - b2 = vqmovun_s16(c2); - b3 = vqmovun_s16(c3); - b4 = vqmovun_s16(c4); - b5 = vqmovun_s16(c5); - b6 = vqmovun_s16(c6); - b7 = vqmovun_s16(c7); + b[0] = vqmovun_s16(c[0]); + b[1] = vqmovun_s16(c[1]); + b[2] = vqmovun_s16(c[2]); + b[3] = vqmovun_s16(c[3]); + b[4] = vqmovun_s16(c[4]); + b[5] = vqmovun_s16(c[5]); + b[6] = vqmovun_s16(c[6]); + b[7] = vqmovun_s16(c[7]); - vst1_u8(b, b0); - b += b_stride; - vst1_u8(b, b1); - b += b_stride; - vst1_u8(b, b2); - b += b_stride; - vst1_u8(b, b3); - b += b_stride; - vst1_u8(b, b4); - b += b_stride; - vst1_u8(b, b5); - b += b_stride; - vst1_u8(b, b6); - b += b_stride; - vst1_u8(b, b7); + vst1_u8(d, b[0]); + d += stride; + vst1_u8(d, b[1]); + d += stride; + vst1_u8(d, b[2]); + d += stride; + vst1_u8(d, b[3]); + d += stride; + vst1_u8(d, b[4]); + d += stride; + vst1_u8(d, b[5]); + d += stride; + vst1_u8(d, b[6]); + d += stride; + vst1_u8(d, b[7]); } static INLINE uint8x16_t create_dcq(const int16_t dc) { @@ -283,56 +288,53 @@ static INLINE uint8x16_t create_dcq(const int16_t dc) { return vdupq_n_u8((uint8_t)t); } -static INLINE void idct4x4_16_kernel_bd8(const int16x4_t cospis, - int16x8_t *const a0, - int16x8_t *const a1) { - int16x4_t b0, b1, b2, b3; - int32x4_t c0, c1, c2, c3; - int16x8_t d0, d1; +static INLINE void idct4x4_16_kernel_bd8(int16x8_t *const a) { + const int16x4_t cospis = vld1_s16(kCospi); + int16x4_t b[4]; + int32x4_t c[4]; + int16x8_t d[2]; - transpose_s16_4x4q(a0, a1); - b0 = vget_low_s16(*a0); - b1 = vget_high_s16(*a0); - b2 = vget_low_s16(*a1); - b3 = vget_high_s16(*a1); - c0 = vmull_lane_s16(b0, cospis, 2); - c2 = vmull_lane_s16(b1, cospis, 2); - c1 = vsubq_s32(c0, c2); - c0 = vaddq_s32(c0, c2); - c2 = vmull_lane_s16(b2, cospis, 3); - c3 = vmull_lane_s16(b2, cospis, 1); - c2 = vmlsl_lane_s16(c2, b3, cospis, 1); - c3 = vmlal_lane_s16(c3, b3, cospis, 3); - b0 = vrshrn_n_s32(c0, DCT_CONST_BITS); - b1 = vrshrn_n_s32(c1, DCT_CONST_BITS); - b2 = vrshrn_n_s32(c2, DCT_CONST_BITS); - b3 = vrshrn_n_s32(c3, DCT_CONST_BITS); - d0 = vcombine_s16(b0, b1); - d1 = vcombine_s16(b3, b2); - *a0 = vaddq_s16(d0, d1); - *a1 = vsubq_s16(d0, d1); + b[0] = vget_low_s16(a[0]); + b[1] = vget_high_s16(a[0]); + b[2] = vget_low_s16(a[1]); + b[3] = vget_high_s16(a[1]); + c[0] = vmull_lane_s16(b[0], cospis, 2); + c[2] = vmull_lane_s16(b[1], cospis, 2); + c[1] = vsubq_s32(c[0], c[2]); + c[0] = vaddq_s32(c[0], c[2]); + c[3] = vmull_lane_s16(b[2], cospis, 3); + c[2] = vmull_lane_s16(b[2], cospis, 1); + c[3] = vmlsl_lane_s16(c[3], b[3], cospis, 1); + c[2] = vmlal_lane_s16(c[2], b[3], cospis, 3); + dct_const_round_shift_low_8_dual(c, &d[0], &d[1]); + a[0] = vaddq_s16(d[0], d[1]); + a[1] = vsubq_s16(d[0], d[1]); } -static INLINE void idct8x8_12_pass1_bd8( - const int16x4_t cospis0, const int16x4_t cospisd0, const int16x4_t cospisd1, - int16x4_t *const io0, int16x4_t *const io1, int16x4_t *const io2, - int16x4_t *const io3, int16x4_t *const io4, int16x4_t *const io5, - int16x4_t *const io6, int16x4_t *const io7) { +static INLINE void transpose_idct4x4_16_bd8(int16x8_t *const a) { + transpose_s16_4x4q(&a[0], &a[1]); + idct4x4_16_kernel_bd8(a); +} + +static INLINE void idct8x8_12_pass1_bd8(const int16x4_t cospis0, + const int16x4_t cospisd0, + const int16x4_t cospisd1, + int16x4_t *const io) { int16x4_t step1[8], step2[8]; int32x4_t t32[2]; - transpose_s16_4x4d(io0, io1, io2, io3); + transpose_s16_4x4d(&io[0], &io[1], &io[2], &io[3]); // stage 1 - step1[4] = vqrdmulh_lane_s16(*io1, cospisd1, 3); - step1[5] = vqrdmulh_lane_s16(*io3, cospisd1, 2); - step1[6] = vqrdmulh_lane_s16(*io3, cospisd1, 1); - step1[7] = vqrdmulh_lane_s16(*io1, cospisd1, 0); + step1[4] = vqrdmulh_lane_s16(io[1], cospisd1, 3); + step1[5] = vqrdmulh_lane_s16(io[3], cospisd1, 2); + step1[6] = vqrdmulh_lane_s16(io[3], cospisd1, 1); + step1[7] = vqrdmulh_lane_s16(io[1], cospisd1, 0); // stage 2 - step2[1] = vqrdmulh_lane_s16(*io0, cospisd0, 2); - step2[2] = vqrdmulh_lane_s16(*io2, cospisd0, 3); - step2[3] = vqrdmulh_lane_s16(*io2, cospisd0, 1); + step2[1] = vqrdmulh_lane_s16(io[0], cospisd0, 2); + step2[2] = vqrdmulh_lane_s16(io[2], cospisd0, 3); + step2[3] = vqrdmulh_lane_s16(io[2], cospisd0, 1); step2[4] = vadd_s16(step1[4], step1[5]); step2[5] = vsub_s16(step1[4], step1[5]); @@ -352,32 +354,27 @@ static INLINE void idct8x8_12_pass1_bd8( step1[6] = vrshrn_n_s32(t32[1], DCT_CONST_BITS); // stage 4 - *io0 = vadd_s16(step1[0], step2[7]); - *io1 = vadd_s16(step1[1], step1[6]); - *io2 = vadd_s16(step1[2], step1[5]); - *io3 = vadd_s16(step1[3], step2[4]); - *io4 = vsub_s16(step1[3], step2[4]); - *io5 = vsub_s16(step1[2], step1[5]); - *io6 = vsub_s16(step1[1], step1[6]); - *io7 = vsub_s16(step1[0], step2[7]); + io[0] = vadd_s16(step1[0], step2[7]); + io[1] = vadd_s16(step1[1], step1[6]); + io[2] = vadd_s16(step1[2], step1[5]); + io[3] = vadd_s16(step1[3], step2[4]); + io[4] = vsub_s16(step1[3], step2[4]); + io[5] = vsub_s16(step1[2], step1[5]); + io[6] = vsub_s16(step1[1], step1[6]); + io[7] = vsub_s16(step1[0], step2[7]); } -static INLINE void idct8x8_12_pass2_bd8( - const int16x4_t cospis0, const int16x4_t cospisd0, const int16x4_t cospisd1, - const int16x4_t input0, const int16x4_t input1, const int16x4_t input2, - const int16x4_t input3, const int16x4_t input4, const int16x4_t input5, - const int16x4_t input6, const int16x4_t input7, int16x8_t *const output0, - int16x8_t *const output1, int16x8_t *const output2, - int16x8_t *const output3, int16x8_t *const output4, - int16x8_t *const output5, int16x8_t *const output6, - int16x8_t *const output7) { +static INLINE void idct8x8_12_pass2_bd8(const int16x4_t cospis0, + const int16x4_t cospisd0, + const int16x4_t cospisd1, + const int16x4_t *const input, + int16x8_t *const output) { int16x8_t in[4]; int16x8_t step1[8], step2[8]; int32x4_t t32[8]; - int16x4_t t16[8]; - transpose_s16_4x8(input0, input1, input2, input3, input4, input5, input6, - input7, &in[0], &in[1], &in[2], &in[3]); + transpose_s16_4x8(input[0], input[1], input[2], input[3], input[4], input[5], + input[6], input[7], &in[0], &in[1], &in[2], &in[3]); // stage 1 step1[4] = vqrdmulhq_lane_s16(in[1], cospisd1, 3); @@ -407,86 +404,64 @@ static INLINE void idct8x8_12_pass2_bd8( t32[1] = vmlsl_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2); t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2); t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2); - t16[0] = vrshrn_n_s32(t32[0], DCT_CONST_BITS); - t16[1] = vrshrn_n_s32(t32[1], DCT_CONST_BITS); - t16[2] = vrshrn_n_s32(t32[2], DCT_CONST_BITS); - t16[3] = vrshrn_n_s32(t32[3], DCT_CONST_BITS); - step1[5] = vcombine_s16(t16[0], t16[1]); - step1[6] = vcombine_s16(t16[2], t16[3]); + dct_const_round_shift_low_8_dual(t32, &step1[5], &step1[6]); // stage 4 - *output0 = vaddq_s16(step1[0], step2[7]); - *output1 = vaddq_s16(step1[1], step1[6]); - *output2 = vaddq_s16(step1[2], step1[5]); - *output3 = vaddq_s16(step1[3], step2[4]); - *output4 = vsubq_s16(step1[3], step2[4]); - *output5 = vsubq_s16(step1[2], step1[5]); - *output6 = vsubq_s16(step1[1], step1[6]); - *output7 = vsubq_s16(step1[0], step2[7]); + output[0] = vaddq_s16(step1[0], step2[7]); + output[1] = vaddq_s16(step1[1], step1[6]); + output[2] = vaddq_s16(step1[2], step1[5]); + output[3] = vaddq_s16(step1[3], step2[4]); + output[4] = vsubq_s16(step1[3], step2[4]); + output[5] = vsubq_s16(step1[2], step1[5]); + output[6] = vsubq_s16(step1[1], step1[6]); + output[7] = vsubq_s16(step1[0], step2[7]); } -static INLINE void idct8x8_64_1d_bd8(const int16x4_t cospis0, - const int16x4_t cospis1, - int16x8_t *const io0, int16x8_t *const io1, - int16x8_t *const io2, int16x8_t *const io3, - int16x8_t *const io4, int16x8_t *const io5, - int16x8_t *const io6, - int16x8_t *const io7) { - int16x4_t input_1l, input_1h, input_3l, input_3h, input_5l, input_5h, - input_7l, input_7h; +static INLINE void idct8x8_64_1d_bd8_kernel(const int16x4_t cospis0, + const int16x4_t cospis1, + int16x8_t *const io) { + int16x4_t input1l, input1h, input3l, input3h, input5l, input5h, input7l, + input7h; int16x4_t step1l[4], step1h[4]; int16x8_t step1[8], step2[8]; int32x4_t t32[8]; - int16x4_t t16[8]; - - transpose_s16_8x8(io0, io1, io2, io3, io4, io5, io6, io7); // stage 1 - input_1l = vget_low_s16(*io1); - input_1h = vget_high_s16(*io1); - input_3l = vget_low_s16(*io3); - input_3h = vget_high_s16(*io3); - input_5l = vget_low_s16(*io5); - input_5h = vget_high_s16(*io5); - input_7l = vget_low_s16(*io7); - input_7h = vget_high_s16(*io7); - step1l[0] = vget_low_s16(*io0); - step1h[0] = vget_high_s16(*io0); - step1l[1] = vget_low_s16(*io2); - step1h[1] = vget_high_s16(*io2); - step1l[2] = vget_low_s16(*io4); - step1h[2] = vget_high_s16(*io4); - step1l[3] = vget_low_s16(*io6); - step1h[3] = vget_high_s16(*io6); + input1l = vget_low_s16(io[1]); + input1h = vget_high_s16(io[1]); + input3l = vget_low_s16(io[3]); + input3h = vget_high_s16(io[3]); + input5l = vget_low_s16(io[5]); + input5h = vget_high_s16(io[5]); + input7l = vget_low_s16(io[7]); + input7h = vget_high_s16(io[7]); + step1l[0] = vget_low_s16(io[0]); + step1h[0] = vget_high_s16(io[0]); + step1l[1] = vget_low_s16(io[2]); + step1h[1] = vget_high_s16(io[2]); + step1l[2] = vget_low_s16(io[4]); + step1h[2] = vget_high_s16(io[4]); + step1l[3] = vget_low_s16(io[6]); + step1h[3] = vget_high_s16(io[6]); - t32[0] = vmull_lane_s16(input_1l, cospis1, 3); - t32[1] = vmull_lane_s16(input_1h, cospis1, 3); - t32[2] = vmull_lane_s16(input_3l, cospis1, 2); - t32[3] = vmull_lane_s16(input_3h, cospis1, 2); - t32[4] = vmull_lane_s16(input_3l, cospis1, 1); - t32[5] = vmull_lane_s16(input_3h, cospis1, 1); - t32[6] = vmull_lane_s16(input_1l, cospis1, 0); - t32[7] = vmull_lane_s16(input_1h, cospis1, 0); - t32[0] = vmlsl_lane_s16(t32[0], input_7l, cospis1, 0); - t32[1] = vmlsl_lane_s16(t32[1], input_7h, cospis1, 0); - t32[2] = vmlal_lane_s16(t32[2], input_5l, cospis1, 1); - t32[3] = vmlal_lane_s16(t32[3], input_5h, cospis1, 1); - t32[4] = vmlsl_lane_s16(t32[4], input_5l, cospis1, 2); - t32[5] = vmlsl_lane_s16(t32[5], input_5h, cospis1, 2); - t32[6] = vmlal_lane_s16(t32[6], input_7l, cospis1, 3); - t32[7] = vmlal_lane_s16(t32[7], input_7h, cospis1, 3); - t16[0] = vrshrn_n_s32(t32[0], DCT_CONST_BITS); - t16[1] = vrshrn_n_s32(t32[1], DCT_CONST_BITS); - t16[2] = vrshrn_n_s32(t32[2], DCT_CONST_BITS); - t16[3] = vrshrn_n_s32(t32[3], DCT_CONST_BITS); - t16[4] = vrshrn_n_s32(t32[4], DCT_CONST_BITS); - t16[5] = vrshrn_n_s32(t32[5], DCT_CONST_BITS); - t16[6] = vrshrn_n_s32(t32[6], DCT_CONST_BITS); - t16[7] = vrshrn_n_s32(t32[7], DCT_CONST_BITS); - step1[4] = vcombine_s16(t16[0], t16[1]); - step1[5] = vcombine_s16(t16[2], t16[3]); - step1[6] = vcombine_s16(t16[4], t16[5]); - step1[7] = vcombine_s16(t16[6], t16[7]); + t32[0] = vmull_lane_s16(input1l, cospis1, 3); + t32[1] = vmull_lane_s16(input1h, cospis1, 3); + t32[2] = vmull_lane_s16(input3l, cospis1, 2); + t32[3] = vmull_lane_s16(input3h, cospis1, 2); + t32[4] = vmull_lane_s16(input3l, cospis1, 1); + t32[5] = vmull_lane_s16(input3h, cospis1, 1); + t32[6] = vmull_lane_s16(input1l, cospis1, 0); + t32[7] = vmull_lane_s16(input1h, cospis1, 0); + t32[0] = vmlsl_lane_s16(t32[0], input7l, cospis1, 0); + t32[1] = vmlsl_lane_s16(t32[1], input7h, cospis1, 0); + t32[2] = vmlal_lane_s16(t32[2], input5l, cospis1, 1); + t32[3] = vmlal_lane_s16(t32[3], input5h, cospis1, 1); + t32[4] = vmlsl_lane_s16(t32[4], input5l, cospis1, 2); + t32[5] = vmlsl_lane_s16(t32[5], input5h, cospis1, 2); + t32[6] = vmlal_lane_s16(t32[6], input7l, cospis1, 3); + t32[7] = vmlal_lane_s16(t32[7], input7h, cospis1, 3); + dct_const_round_shift_low_8_dual(&t32[0], &step1[4], &step1[5]); + dct_const_round_shift_low_8_dual(&t32[4], &step1[6], &step1[7]); // stage 2 t32[2] = vmull_lane_s16(step1l[0], cospis0, 2); @@ -503,18 +478,8 @@ static INLINE void idct8x8_64_1d_bd8(const int16x4_t cospis0, t32[5] = vmlsl_lane_s16(t32[5], step1h[3], cospis0, 1); t32[6] = vmlal_lane_s16(t32[6], step1l[3], cospis0, 3); t32[7] = vmlal_lane_s16(t32[7], step1h[3], cospis0, 3); - t16[0] = vrshrn_n_s32(t32[0], DCT_CONST_BITS); - t16[1] = vrshrn_n_s32(t32[1], DCT_CONST_BITS); - t16[2] = vrshrn_n_s32(t32[2], DCT_CONST_BITS); - t16[3] = vrshrn_n_s32(t32[3], DCT_CONST_BITS); - t16[4] = vrshrn_n_s32(t32[4], DCT_CONST_BITS); - t16[5] = vrshrn_n_s32(t32[5], DCT_CONST_BITS); - t16[6] = vrshrn_n_s32(t32[6], DCT_CONST_BITS); - t16[7] = vrshrn_n_s32(t32[7], DCT_CONST_BITS); - step2[0] = vcombine_s16(t16[0], t16[1]); - step2[1] = vcombine_s16(t16[2], t16[3]); - step2[2] = vcombine_s16(t16[4], t16[5]); - step2[3] = vcombine_s16(t16[6], t16[7]); + dct_const_round_shift_low_8_dual(&t32[0], &step2[0], &step2[1]); + dct_const_round_shift_low_8_dual(&t32[4], &step2[2], &step2[3]); step2[4] = vaddq_s16(step1[4], step1[5]); step2[5] = vsubq_s16(step1[4], step1[5]); @@ -533,35 +498,25 @@ static INLINE void idct8x8_64_1d_bd8(const int16x4_t cospis0, t32[1] = vmlsl_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2); t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2); t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2); - t16[0] = vrshrn_n_s32(t32[0], DCT_CONST_BITS); - t16[1] = vrshrn_n_s32(t32[1], DCT_CONST_BITS); - t16[2] = vrshrn_n_s32(t32[2], DCT_CONST_BITS); - t16[3] = vrshrn_n_s32(t32[3], DCT_CONST_BITS); - step1[5] = vcombine_s16(t16[0], t16[1]); - step1[6] = vcombine_s16(t16[2], t16[3]); + dct_const_round_shift_low_8_dual(t32, &step1[5], &step1[6]); // stage 4 - *io0 = vaddq_s16(step1[0], step2[7]); - *io1 = vaddq_s16(step1[1], step1[6]); - *io2 = vaddq_s16(step1[2], step1[5]); - *io3 = vaddq_s16(step1[3], step2[4]); - *io4 = vsubq_s16(step1[3], step2[4]); - *io5 = vsubq_s16(step1[2], step1[5]); - *io6 = vsubq_s16(step1[1], step1[6]); - *io7 = vsubq_s16(step1[0], step2[7]); + io[0] = vaddq_s16(step1[0], step2[7]); + io[1] = vaddq_s16(step1[1], step1[6]); + io[2] = vaddq_s16(step1[2], step1[5]); + io[3] = vaddq_s16(step1[3], step2[4]); + io[4] = vsubq_s16(step1[3], step2[4]); + io[5] = vsubq_s16(step1[2], step1[5]); + io[6] = vsubq_s16(step1[1], step1[6]); + io[7] = vsubq_s16(step1[0], step2[7]); } -static INLINE void idct16x16_add_wrap_low_8x2(const int32x4_t *const t32, - int16x8_t *const d0, - int16x8_t *const d1) { - int16x4_t t16[4]; - - t16[0] = vrshrn_n_s32(t32[0], DCT_CONST_BITS); - t16[1] = vrshrn_n_s32(t32[1], DCT_CONST_BITS); - t16[2] = vrshrn_n_s32(t32[2], DCT_CONST_BITS); - t16[3] = vrshrn_n_s32(t32[3], DCT_CONST_BITS); - *d0 = vcombine_s16(t16[0], t16[1]); - *d1 = vcombine_s16(t16[2], t16[3]); +static INLINE void idct8x8_64_1d_bd8(const int16x4_t cospis0, + const int16x4_t cospis1, + int16x8_t *const io) { + transpose_s16_8x8(&io[0], &io[1], &io[2], &io[3], &io[4], &io[5], &io[6], + &io[7]); + idct8x8_64_1d_bd8_kernel(cospis0, cospis1, io); } static INLINE void idct_cospi_8_24_q_kernel(const int16x8_t s0, @@ -584,7 +539,7 @@ static INLINE void idct_cospi_8_24_q(const int16x8_t s0, const int16x8_t s1, int32x4_t t32[4]; idct_cospi_8_24_q_kernel(s0, s1, cospi_0_8_16_24, t32); - idct16x16_add_wrap_low_8x2(t32, d0, d1); + dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_8_24_neg_q(const int16x8_t s0, const int16x8_t s1, @@ -596,7 +551,7 @@ static INLINE void idct_cospi_8_24_neg_q(const int16x8_t s0, const int16x8_t s1, idct_cospi_8_24_q_kernel(s0, s1, cospi_0_8_16_24, t32); t32[2] = vnegq_s32(t32[2]); t32[3] = vnegq_s32(t32[3]); - idct16x16_add_wrap_low_8x2(t32, d0, d1); + dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_16_16_q(const int16x8_t s0, const int16x8_t s1, @@ -611,7 +566,7 @@ static INLINE void idct_cospi_16_16_q(const int16x8_t s0, const int16x8_t s1, t32[1] = vmlsl_lane_s16(t32[5], vget_high_s16(s0), cospi_0_8_16_24, 2); t32[2] = vmlal_lane_s16(t32[4], vget_low_s16(s0), cospi_0_8_16_24, 2); t32[3] = vmlal_lane_s16(t32[5], vget_high_s16(s0), cospi_0_8_16_24, 2); - idct16x16_add_wrap_low_8x2(t32, d0, d1); + dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_2_30(const int16x8_t s0, const int16x8_t s1, @@ -627,7 +582,7 @@ static INLINE void idct_cospi_2_30(const int16x8_t s0, const int16x8_t s1, t32[1] = vmlsl_lane_s16(t32[1], vget_high_s16(s1), cospi_2_30_10_22, 0); t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(s0), cospi_2_30_10_22, 0); t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(s0), cospi_2_30_10_22, 0); - idct16x16_add_wrap_low_8x2(t32, d0, d1); + dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_4_28(const int16x8_t s0, const int16x8_t s1, @@ -643,7 +598,7 @@ static INLINE void idct_cospi_4_28(const int16x8_t s0, const int16x8_t s1, t32[1] = vmlsl_lane_s16(t32[1], vget_high_s16(s1), cospi_4_12_20N_28, 0); t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(s0), cospi_4_12_20N_28, 0); t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(s0), cospi_4_12_20N_28, 0); - idct16x16_add_wrap_low_8x2(t32, d0, d1); + dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_6_26(const int16x8_t s0, const int16x8_t s1, @@ -659,7 +614,7 @@ static INLINE void idct_cospi_6_26(const int16x8_t s0, const int16x8_t s1, t32[1] = vmlal_lane_s16(t32[1], vget_high_s16(s1), cospi_6_26N_14_18N, 1); t32[2] = vmlsl_lane_s16(t32[2], vget_low_s16(s0), cospi_6_26N_14_18N, 1); t32[3] = vmlsl_lane_s16(t32[3], vget_high_s16(s0), cospi_6_26N_14_18N, 1); - idct16x16_add_wrap_low_8x2(t32, d0, d1); + dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_10_22(const int16x8_t s0, const int16x8_t s1, @@ -675,7 +630,7 @@ static INLINE void idct_cospi_10_22(const int16x8_t s0, const int16x8_t s1, t32[1] = vmlsl_lane_s16(t32[1], vget_high_s16(s1), cospi_2_30_10_22, 2); t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(s0), cospi_2_30_10_22, 2); t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(s0), cospi_2_30_10_22, 2); - idct16x16_add_wrap_low_8x2(t32, d0, d1); + dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_12_20(const int16x8_t s0, const int16x8_t s1, @@ -691,7 +646,7 @@ static INLINE void idct_cospi_12_20(const int16x8_t s0, const int16x8_t s1, t32[1] = vmlal_lane_s16(t32[1], vget_high_s16(s1), cospi_4_12_20N_28, 2); t32[2] = vmlsl_lane_s16(t32[2], vget_low_s16(s0), cospi_4_12_20N_28, 2); t32[3] = vmlsl_lane_s16(t32[3], vget_high_s16(s0), cospi_4_12_20N_28, 2); - idct16x16_add_wrap_low_8x2(t32, d0, d1); + dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_14_18(const int16x8_t s0, const int16x8_t s1, @@ -707,7 +662,7 @@ static INLINE void idct_cospi_14_18(const int16x8_t s0, const int16x8_t s1, t32[1] = vmlal_lane_s16(t32[1], vget_high_s16(s1), cospi_6_26N_14_18N, 3); t32[2] = vmlsl_lane_s16(t32[2], vget_low_s16(s0), cospi_6_26N_14_18N, 3); t32[3] = vmlsl_lane_s16(t32[3], vget_high_s16(s0), cospi_6_26N_14_18N, 3); - idct16x16_add_wrap_low_8x2(t32, d0, d1); + dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct16x16_add_stage7(const int16x8_t *const step2, @@ -786,129 +741,153 @@ static INLINE void idct16x16_store_pass1(const int16x8_t *const out, vst1q_s16(output, out[15]); } -static INLINE void idct16x16_add8x1(int16x8_t res, uint8_t **dest, - const int stride) { - uint8x8_t d = vld1_u8(*dest); - uint16x8_t q; - - res = vrshrq_n_s16(res, 6); - q = vaddw_u8(vreinterpretq_u16_s16(res), d); - d = vqmovun_s16(vreinterpretq_s16_u16(q)); +static INLINE void idct8x8_add8x1(const int16x8_t a, uint8_t **const dest, + const int stride) { + const uint8x8_t s = vld1_u8(*dest); + const int16x8_t res = vrshrq_n_s16(a, 5); + const uint16x8_t q = vaddw_u8(vreinterpretq_u16_s16(res), s); + const uint8x8_t d = vqmovun_s16(vreinterpretq_s16_u16(q)); vst1_u8(*dest, d); *dest += stride; } -static INLINE void highbd_idct16x16_add8x1(int16x8_t res, const int16x8_t max, - uint16_t **dest, const int stride) { - uint16x8_t d = vld1q_u16(*dest); +static INLINE void idct8x8_add8x8_neon(int16x8_t *const out, uint8_t *dest, + const int stride) { + idct8x8_add8x1(out[0], &dest, stride); + idct8x8_add8x1(out[1], &dest, stride); + idct8x8_add8x1(out[2], &dest, stride); + idct8x8_add8x1(out[3], &dest, stride); + idct8x8_add8x1(out[4], &dest, stride); + idct8x8_add8x1(out[5], &dest, stride); + idct8x8_add8x1(out[6], &dest, stride); + idct8x8_add8x1(out[7], &dest, stride); +} - res = vqaddq_s16(res, vreinterpretq_s16_u16(d)); - res = vminq_s16(res, max); - d = vqshluq_n_s16(res, 0); +static INLINE void idct16x16_add8x1(const int16x8_t a, uint8_t **const dest, + const int stride) { + const uint8x8_t s = vld1_u8(*dest); + const int16x8_t res = vrshrq_n_s16(a, 6); + const uint16x8_t q = vaddw_u8(vreinterpretq_u16_s16(res), s); + const uint8x8_t d = vqmovun_s16(vreinterpretq_s16_u16(q)); + vst1_u8(*dest, d); + *dest += stride; +} + +static INLINE void idct16x16_add_store(const int16x8_t *const out, + uint8_t *dest, const int stride) { + // Add the result to dest + idct16x16_add8x1(out[0], &dest, stride); + idct16x16_add8x1(out[1], &dest, stride); + idct16x16_add8x1(out[2], &dest, stride); + idct16x16_add8x1(out[3], &dest, stride); + idct16x16_add8x1(out[4], &dest, stride); + idct16x16_add8x1(out[5], &dest, stride); + idct16x16_add8x1(out[6], &dest, stride); + idct16x16_add8x1(out[7], &dest, stride); + idct16x16_add8x1(out[8], &dest, stride); + idct16x16_add8x1(out[9], &dest, stride); + idct16x16_add8x1(out[10], &dest, stride); + idct16x16_add8x1(out[11], &dest, stride); + idct16x16_add8x1(out[12], &dest, stride); + idct16x16_add8x1(out[13], &dest, stride); + idct16x16_add8x1(out[14], &dest, stride); + idct16x16_add8x1(out[15], &dest, stride); +} + +static INLINE void highbd_idct16x16_add8x1(const int16x8_t a, + const int16x8_t max, + uint16_t **const dest, + const int stride) { + const uint16x8_t s = vld1q_u16(*dest); + const int16x8_t res0 = vqaddq_s16(a, vreinterpretq_s16_u16(s)); + const int16x8_t res1 = vminq_s16(res0, max); + const uint16x8_t d = vqshluq_n_s16(res1, 0); vst1q_u16(*dest, d); *dest += stride; } -static INLINE void highbd_idct16x16_add8x1_bd8(int16x8_t res, uint16_t **dest, - const int stride) { - uint16x8_t d = vld1q_u16(*dest); +static INLINE void idct16x16_add_store_bd8(int16x8_t *const out, uint16_t *dest, + const int stride) { + // Add the result to dest + const int16x8_t max = vdupq_n_s16((1 << 8) - 1); + out[0] = vrshrq_n_s16(out[0], 6); + out[1] = vrshrq_n_s16(out[1], 6); + out[2] = vrshrq_n_s16(out[2], 6); + out[3] = vrshrq_n_s16(out[3], 6); + out[4] = vrshrq_n_s16(out[4], 6); + out[5] = vrshrq_n_s16(out[5], 6); + out[6] = vrshrq_n_s16(out[6], 6); + out[7] = vrshrq_n_s16(out[7], 6); + out[8] = vrshrq_n_s16(out[8], 6); + out[9] = vrshrq_n_s16(out[9], 6); + out[10] = vrshrq_n_s16(out[10], 6); + out[11] = vrshrq_n_s16(out[11], 6); + out[12] = vrshrq_n_s16(out[12], 6); + out[13] = vrshrq_n_s16(out[13], 6); + out[14] = vrshrq_n_s16(out[14], 6); + out[15] = vrshrq_n_s16(out[15], 6); + highbd_idct16x16_add8x1(out[0], max, &dest, stride); + highbd_idct16x16_add8x1(out[1], max, &dest, stride); + highbd_idct16x16_add8x1(out[2], max, &dest, stride); + highbd_idct16x16_add8x1(out[3], max, &dest, stride); + highbd_idct16x16_add8x1(out[4], max, &dest, stride); + highbd_idct16x16_add8x1(out[5], max, &dest, stride); + highbd_idct16x16_add8x1(out[6], max, &dest, stride); + highbd_idct16x16_add8x1(out[7], max, &dest, stride); + highbd_idct16x16_add8x1(out[8], max, &dest, stride); + highbd_idct16x16_add8x1(out[9], max, &dest, stride); + highbd_idct16x16_add8x1(out[10], max, &dest, stride); + highbd_idct16x16_add8x1(out[11], max, &dest, stride); + highbd_idct16x16_add8x1(out[12], max, &dest, stride); + highbd_idct16x16_add8x1(out[13], max, &dest, stride); + highbd_idct16x16_add8x1(out[14], max, &dest, stride); + highbd_idct16x16_add8x1(out[15], max, &dest, stride); +} - res = vrsraq_n_s16(vreinterpretq_s16_u16(d), res, 6); - d = vmovl_u8(vqmovun_s16(res)); +static INLINE void highbd_idct16x16_add8x1_bd8(const int16x8_t a, + uint16_t **const dest, + const int stride) { + const uint16x8_t s = vld1q_u16(*dest); + const int16x8_t res = vrsraq_n_s16(vreinterpretq_s16_u16(s), a, 6); + const uint16x8_t d = vmovl_u8(vqmovun_s16(res)); vst1q_u16(*dest, d); *dest += stride; } static INLINE void highbd_add_and_store_bd8(const int16x8_t *const a, - uint16_t *out, const int b_stride) { - highbd_idct16x16_add8x1_bd8(a[0], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[1], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[2], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[3], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[4], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[5], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[6], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[7], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[8], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[9], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[10], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[11], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[12], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[13], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[14], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[15], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[16], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[17], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[18], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[19], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[20], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[21], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[22], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[23], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[24], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[25], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[26], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[27], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[28], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[29], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[30], &out, b_stride); - highbd_idct16x16_add8x1_bd8(a[31], &out, b_stride); -} - -static INLINE void highbd_idct16x16_add_store(const int32x4x2_t *const out, - uint16_t *dest, const int stride, - const int bd) { - // Add the result to dest - const int16x8_t max = vdupq_n_s16((1 << bd) - 1); - int16x8_t o[16]; - o[0] = vcombine_s16(vrshrn_n_s32(out[0].val[0], 6), - vrshrn_n_s32(out[0].val[1], 6)); - o[1] = vcombine_s16(vrshrn_n_s32(out[1].val[0], 6), - vrshrn_n_s32(out[1].val[1], 6)); - o[2] = vcombine_s16(vrshrn_n_s32(out[2].val[0], 6), - vrshrn_n_s32(out[2].val[1], 6)); - o[3] = vcombine_s16(vrshrn_n_s32(out[3].val[0], 6), - vrshrn_n_s32(out[3].val[1], 6)); - o[4] = vcombine_s16(vrshrn_n_s32(out[4].val[0], 6), - vrshrn_n_s32(out[4].val[1], 6)); - o[5] = vcombine_s16(vrshrn_n_s32(out[5].val[0], 6), - vrshrn_n_s32(out[5].val[1], 6)); - o[6] = vcombine_s16(vrshrn_n_s32(out[6].val[0], 6), - vrshrn_n_s32(out[6].val[1], 6)); - o[7] = vcombine_s16(vrshrn_n_s32(out[7].val[0], 6), - vrshrn_n_s32(out[7].val[1], 6)); - o[8] = vcombine_s16(vrshrn_n_s32(out[8].val[0], 6), - vrshrn_n_s32(out[8].val[1], 6)); - o[9] = vcombine_s16(vrshrn_n_s32(out[9].val[0], 6), - vrshrn_n_s32(out[9].val[1], 6)); - o[10] = vcombine_s16(vrshrn_n_s32(out[10].val[0], 6), - vrshrn_n_s32(out[10].val[1], 6)); - o[11] = vcombine_s16(vrshrn_n_s32(out[11].val[0], 6), - vrshrn_n_s32(out[11].val[1], 6)); - o[12] = vcombine_s16(vrshrn_n_s32(out[12].val[0], 6), - vrshrn_n_s32(out[12].val[1], 6)); - o[13] = vcombine_s16(vrshrn_n_s32(out[13].val[0], 6), - vrshrn_n_s32(out[13].val[1], 6)); - o[14] = vcombine_s16(vrshrn_n_s32(out[14].val[0], 6), - vrshrn_n_s32(out[14].val[1], 6)); - o[15] = vcombine_s16(vrshrn_n_s32(out[15].val[0], 6), - vrshrn_n_s32(out[15].val[1], 6)); - highbd_idct16x16_add8x1(o[0], max, &dest, stride); - highbd_idct16x16_add8x1(o[1], max, &dest, stride); - highbd_idct16x16_add8x1(o[2], max, &dest, stride); - highbd_idct16x16_add8x1(o[3], max, &dest, stride); - highbd_idct16x16_add8x1(o[4], max, &dest, stride); - highbd_idct16x16_add8x1(o[5], max, &dest, stride); - highbd_idct16x16_add8x1(o[6], max, &dest, stride); - highbd_idct16x16_add8x1(o[7], max, &dest, stride); - highbd_idct16x16_add8x1(o[8], max, &dest, stride); - highbd_idct16x16_add8x1(o[9], max, &dest, stride); - highbd_idct16x16_add8x1(o[10], max, &dest, stride); - highbd_idct16x16_add8x1(o[11], max, &dest, stride); - highbd_idct16x16_add8x1(o[12], max, &dest, stride); - highbd_idct16x16_add8x1(o[13], max, &dest, stride); - highbd_idct16x16_add8x1(o[14], max, &dest, stride); - highbd_idct16x16_add8x1(o[15], max, &dest, stride); + uint16_t *out, const int stride) { + highbd_idct16x16_add8x1_bd8(a[0], &out, stride); + highbd_idct16x16_add8x1_bd8(a[1], &out, stride); + highbd_idct16x16_add8x1_bd8(a[2], &out, stride); + highbd_idct16x16_add8x1_bd8(a[3], &out, stride); + highbd_idct16x16_add8x1_bd8(a[4], &out, stride); + highbd_idct16x16_add8x1_bd8(a[5], &out, stride); + highbd_idct16x16_add8x1_bd8(a[6], &out, stride); + highbd_idct16x16_add8x1_bd8(a[7], &out, stride); + highbd_idct16x16_add8x1_bd8(a[8], &out, stride); + highbd_idct16x16_add8x1_bd8(a[9], &out, stride); + highbd_idct16x16_add8x1_bd8(a[10], &out, stride); + highbd_idct16x16_add8x1_bd8(a[11], &out, stride); + highbd_idct16x16_add8x1_bd8(a[12], &out, stride); + highbd_idct16x16_add8x1_bd8(a[13], &out, stride); + highbd_idct16x16_add8x1_bd8(a[14], &out, stride); + highbd_idct16x16_add8x1_bd8(a[15], &out, stride); + highbd_idct16x16_add8x1_bd8(a[16], &out, stride); + highbd_idct16x16_add8x1_bd8(a[17], &out, stride); + highbd_idct16x16_add8x1_bd8(a[18], &out, stride); + highbd_idct16x16_add8x1_bd8(a[19], &out, stride); + highbd_idct16x16_add8x1_bd8(a[20], &out, stride); + highbd_idct16x16_add8x1_bd8(a[21], &out, stride); + highbd_idct16x16_add8x1_bd8(a[22], &out, stride); + highbd_idct16x16_add8x1_bd8(a[23], &out, stride); + highbd_idct16x16_add8x1_bd8(a[24], &out, stride); + highbd_idct16x16_add8x1_bd8(a[25], &out, stride); + highbd_idct16x16_add8x1_bd8(a[26], &out, stride); + highbd_idct16x16_add8x1_bd8(a[27], &out, stride); + highbd_idct16x16_add8x1_bd8(a[28], &out, stride); + highbd_idct16x16_add8x1_bd8(a[29], &out, stride); + highbd_idct16x16_add8x1_bd8(a[30], &out, stride); + highbd_idct16x16_add8x1_bd8(a[31], &out, stride); } void vpx_idct16x16_256_add_half1d(const void *const input, int16_t *output, @@ -937,4 +916,4 @@ void vpx_idct32_6_neon(const tran_low_t *input, int16_t *output); void vpx_idct32_8_neon(const int16_t *input, void *const output, int stride, const int highbd_flag); -#endif // VPX_DSP_ARM_IDCT_NEON_H_ +#endif // VPX_VPX_DSP_ARM_IDCT_NEON_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/arm/intrapred_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/intrapred_neon.c index fb1fa6b681df..38e275834b04 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/intrapred_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/intrapred_neon.c @@ -667,8 +667,6 @@ void vpx_d135_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, d135_store_32x2(&dst, stride, row_0, row_1, row_2); } -// ----------------------------------------------------------------------------- - #if !HAVE_NEON_ASM void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, diff --git a/media/libvpx/libvpx/vpx_dsp/arm/loopfilter_8_neon.asm b/media/libvpx/libvpx/vpx_dsp/arm/loopfilter_8_neon.asm index a042d40acb2c..a81a9d10132f 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/loopfilter_8_neon.asm +++ b/media/libvpx/libvpx/vpx_dsp/arm/loopfilter_8_neon.asm @@ -201,7 +201,7 @@ str lr, [sp, #16] ; thresh1 add sp, #4 pop {r0-r1, lr} - add r0, r1, lsl #3 ; s + 8 * pitch + add r0, r0, r1, lsl #3 ; s + 8 * pitch b vpx_lpf_vertical_8_neon ENDP ; |vpx_lpf_vertical_8_dual_neon| diff --git a/media/libvpx/libvpx/vpx_dsp/arm/mem_neon.h b/media/libvpx/libvpx/vpx_dsp/arm/mem_neon.h index 4efad5333e30..943865b3c280 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/mem_neon.h +++ b/media/libvpx/libvpx/vpx_dsp/arm/mem_neon.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_ARM_MEM_NEON_H_ -#define VPX_DSP_ARM_MEM_NEON_H_ +#ifndef VPX_VPX_DSP_ARM_MEM_NEON_H_ +#define VPX_VPX_DSP_ARM_MEM_NEON_H_ #include #include @@ -19,6 +19,21 @@ #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_dsp_common.h" +static INLINE int16x4_t create_s16x4_neon(const int16_t c0, const int16_t c1, + const int16_t c2, const int16_t c3) { + return vcreate_s16((uint16_t)c0 | ((uint32_t)c1 << 16) | + ((int64_t)(uint16_t)c2 << 32) | ((int64_t)c3 << 48)); +} + +static INLINE int32x2_t create_s32x2_neon(const int32_t c0, const int32_t c1) { + return vcreate_s32((uint32_t)c0 | ((int64_t)(uint32_t)c1 << 32)); +} + +static INLINE int32x4_t create_s32x4_neon(const int32_t c0, const int32_t c1, + const int32_t c2, const int32_t c3) { + return vcombine_s32(create_s32x2_neon(c0, c1), create_s32x2_neon(c2, c3)); +} + // Helper functions used to load tran_low_t into int16, narrowing if necessary. static INLINE int16x8x2_t load_tran_low_to_s16x2q(const tran_low_t *buf) { #if CONFIG_VP9_HIGHBITDEPTH @@ -86,9 +101,9 @@ static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf, int stride) { if (stride == 4) return vld1_u8(buf); memcpy(&a, buf, 4); buf += stride; - a_u32 = vld1_lane_u32(&a, a_u32, 0); + a_u32 = vset_lane_u32(a, a_u32, 0); memcpy(&a, buf, 4); - a_u32 = vld1_lane_u32(&a, a_u32, 1); + a_u32 = vset_lane_u32(a, a_u32, 1); return vreinterpret_u8_u32(a_u32); } @@ -112,16 +127,16 @@ static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf, int stride) { if (stride == 4) return vld1q_u8(buf); memcpy(&a, buf, 4); buf += stride; - a_u32 = vld1q_lane_u32(&a, a_u32, 0); + a_u32 = vsetq_lane_u32(a, a_u32, 0); memcpy(&a, buf, 4); buf += stride; - a_u32 = vld1q_lane_u32(&a, a_u32, 1); + a_u32 = vsetq_lane_u32(a, a_u32, 1); memcpy(&a, buf, 4); buf += stride; - a_u32 = vld1q_lane_u32(&a, a_u32, 2); + a_u32 = vsetq_lane_u32(a, a_u32, 2); memcpy(&a, buf, 4); buf += stride; - a_u32 = vld1q_lane_u32(&a, a_u32, 3); + a_u32 = vsetq_lane_u32(a, a_u32, 3); return vreinterpretq_u8_u32(a_u32); } @@ -166,4 +181,4 @@ static INLINE void store_u8(uint8_t *buf, int stride, const uint8x8_t a) { buf += stride; vst1_lane_u32((uint32_t *)buf, a_u32, 1); } -#endif // VPX_DSP_ARM_MEM_NEON_H_ +#endif // VPX_VPX_DSP_ARM_MEM_NEON_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/arm/quantize_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/quantize_neon.c index a0a1e6dd5ad3..adef5f6e1516 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/quantize_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/quantize_neon.c @@ -15,17 +15,33 @@ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/mem_neon.h" +static INLINE void calculate_dqcoeff_and_store(const int16x8_t qcoeff, + const int16x8_t dequant, + tran_low_t *dqcoeff) { + const int32x4_t dqcoeff_0 = + vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant)); + const int32x4_t dqcoeff_1 = + vmull_s16(vget_high_s16(qcoeff), vget_high_s16(dequant)); + +#if CONFIG_VP9_HIGHBITDEPTH + vst1q_s32(dqcoeff, dqcoeff_0); + vst1q_s32(dqcoeff + 4, dqcoeff_1); +#else + vst1q_s16(dqcoeff, vcombine_s16(vmovn_s32(dqcoeff_0), vmovn_s32(dqcoeff_1))); +#endif // CONFIG_VP9_HIGHBITDEPTH +} + void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan_ptr, - const int16_t *iscan_ptr) { + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { const int16x8_t one = vdupq_n_s16(1); const int16x8_t neg_one = vdupq_n_s16(-1); uint16x8_t eob_max; - (void)scan_ptr; + (void)scan; (void)skip_block; assert(!skip_block); @@ -38,8 +54,8 @@ void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16x8_t quant_shift = vld1q_s16(quant_shift_ptr); const int16x8_t dequant = vld1q_s16(dequant_ptr); // Add one because the eob does not index from 0. - const uint16x8_t iscan = - vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan_ptr), one)); + const uint16x8_t v_iscan = + vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one)); const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15); @@ -65,17 +81,15 @@ void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, qcoeff = vandq_s16(qcoeff, zbin_mask); // Set non-zero elements to -1 and use that to extract values for eob. - eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), iscan); + eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan); coeff_ptr += 8; - iscan_ptr += 8; + iscan += 8; store_s16q_to_tran_low(qcoeff_ptr, qcoeff); qcoeff_ptr += 8; - qcoeff = vmulq_s16(qcoeff, dequant); - - store_s16q_to_tran_low(dqcoeff_ptr, qcoeff); + calculate_dqcoeff_and_store(qcoeff, dequant, dqcoeff_ptr); dqcoeff_ptr += 8; } @@ -90,8 +104,8 @@ void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, do { // Add one because the eob is not its index. - const uint16x8_t iscan = - vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan_ptr), one)); + const uint16x8_t v_iscan = + vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one)); const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15); @@ -118,23 +132,24 @@ void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, // Set non-zero elements to -1 and use that to extract values for eob. eob_max = - vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), iscan)); + vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan)); coeff_ptr += 8; - iscan_ptr += 8; + iscan += 8; store_s16q_to_tran_low(qcoeff_ptr, qcoeff); qcoeff_ptr += 8; - qcoeff = vmulq_s16(qcoeff, dequant); - - store_s16q_to_tran_low(dqcoeff_ptr, qcoeff); + calculate_dqcoeff_and_store(qcoeff, dequant, dqcoeff_ptr); dqcoeff_ptr += 8; n_coeffs -= 8; } while (n_coeffs > 0); } +#ifdef __aarch64__ + *eob_ptr = vmaxvq_u16(eob_max); +#else { const uint16x4_t eob_max_0 = vmax_u16(vget_low_u16(eob_max), vget_high_u16(eob_max)); @@ -142,25 +157,50 @@ void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1); vst1_lane_u16(eob_ptr, eob_max_2, 0); } +#endif // __aarch64__ } static INLINE int32x4_t extract_sign_bit(int32x4_t a) { return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 31)); } +static INLINE void calculate_dqcoeff_and_store_32x32(const int16x8_t qcoeff, + const int16x8_t dequant, + tran_low_t *dqcoeff) { + int32x4_t dqcoeff_0 = vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant)); + int32x4_t dqcoeff_1 = + vmull_s16(vget_high_s16(qcoeff), vget_high_s16(dequant)); + + // Add 1 if negative to round towards zero because the C uses division. + dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0)); + dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1)); + +#if CONFIG_VP9_HIGHBITDEPTH + dqcoeff_0 = vshrq_n_s32(dqcoeff_0, 1); + dqcoeff_1 = vshrq_n_s32(dqcoeff_1, 1); + vst1q_s32(dqcoeff, dqcoeff_0); + vst1q_s32(dqcoeff + 4, dqcoeff_1); +#else + vst1q_s16(dqcoeff, + vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1))); +#endif // CONFIG_VP9_HIGHBITDEPTH +} + // Main difference is that zbin values are halved before comparison and dqcoeff // values are divided by 2. zbin is rounded but dqcoeff is not. -void vpx_quantize_b_32x32_neon( - const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, - const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan_ptr, const int16_t *iscan_ptr) { +void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, + const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan) { const int16x8_t one = vdupq_n_s16(1); const int16x8_t neg_one = vdupq_n_s16(-1); uint16x8_t eob_max; int i; - (void)scan_ptr; + (void)scan; (void)n_coeffs; // Because we will always calculate 32*32. (void)skip_block; assert(!skip_block); @@ -174,8 +214,8 @@ void vpx_quantize_b_32x32_neon( const int16x8_t quant_shift = vld1q_s16(quant_shift_ptr); const int16x8_t dequant = vld1q_s16(dequant_ptr); // Add one because the eob does not index from 0. - const uint16x8_t iscan = - vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan_ptr), one)); + const uint16x8_t v_iscan = + vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one)); const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15); @@ -188,8 +228,6 @@ void vpx_quantize_b_32x32_neon( // (round * quant * 2) >> 16 >> 1 == (round * quant) >> 16 int16x8_t qcoeff = vshrq_n_s16(vqdmulhq_s16(rounded, quant), 1); - int16x8_t dqcoeff; - int32x4_t dqcoeff_0, dqcoeff_1; qcoeff = vaddq_s16(qcoeff, rounded); @@ -203,25 +241,15 @@ void vpx_quantize_b_32x32_neon( qcoeff = vandq_s16(qcoeff, zbin_mask); // Set non-zero elements to -1 and use that to extract values for eob. - eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), iscan); + eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan); coeff_ptr += 8; - iscan_ptr += 8; + iscan += 8; store_s16q_to_tran_low(qcoeff_ptr, qcoeff); qcoeff_ptr += 8; - dqcoeff_0 = vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant)); - dqcoeff_1 = vmull_s16(vget_high_s16(qcoeff), vget_high_s16(dequant)); - - // Add 1 if negative to round towards zero because the C uses division. - dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0)); - dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1)); - - dqcoeff = - vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1)); - - store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff); + calculate_dqcoeff_and_store_32x32(qcoeff, dequant, dqcoeff_ptr); dqcoeff_ptr += 8; } @@ -234,8 +262,8 @@ void vpx_quantize_b_32x32_neon( for (i = 1; i < 32 * 32 / 8; ++i) { // Add one because the eob is not its index. - const uint16x8_t iscan = - vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan_ptr), one)); + const uint16x8_t v_iscan = + vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one)); const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15); @@ -248,8 +276,6 @@ void vpx_quantize_b_32x32_neon( // (round * quant * 2) >> 16 >> 1 == (round * quant) >> 16 int16x8_t qcoeff = vshrq_n_s16(vqdmulhq_s16(rounded, quant), 1); - int16x8_t dqcoeff; - int32x4_t dqcoeff_0, dqcoeff_1; qcoeff = vaddq_s16(qcoeff, rounded); @@ -264,28 +290,22 @@ void vpx_quantize_b_32x32_neon( // Set non-zero elements to -1 and use that to extract values for eob. eob_max = - vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), iscan)); + vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan)); coeff_ptr += 8; - iscan_ptr += 8; + iscan += 8; store_s16q_to_tran_low(qcoeff_ptr, qcoeff); qcoeff_ptr += 8; - dqcoeff_0 = vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant)); - dqcoeff_1 = vmull_s16(vget_high_s16(qcoeff), vget_high_s16(dequant)); - - dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0)); - dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1)); - - dqcoeff = - vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1)); - - store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff); + calculate_dqcoeff_and_store_32x32(qcoeff, dequant, dqcoeff_ptr); dqcoeff_ptr += 8; } } +#ifdef __aarch64__ + *eob_ptr = vmaxvq_u16(eob_max); +#else { const uint16x4_t eob_max_0 = vmax_u16(vget_low_u16(eob_max), vget_high_u16(eob_max)); @@ -293,4 +313,5 @@ void vpx_quantize_b_32x32_neon( const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1); vst1_lane_u16(eob_ptr, eob_max_2, 0); } +#endif // __aarch64__ } diff --git a/media/libvpx/libvpx/vpx_dsp/arm/sad4d_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/sad4d_neon.c index b04de3aff26a..06443c69956f 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/sad4d_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/sad4d_neon.c @@ -10,233 +10,371 @@ #include +#include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/sum_neon.h" -void vpx_sad4x4x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, - uint32_t *res) { - int i; - const uint8x16_t src_u8 = load_unaligned_u8q(src, src_stride); - for (i = 0; i < 4; ++i) { - const uint8x16_t ref_u8 = load_unaligned_u8q(ref[i], ref_stride); - uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(ref_u8)); - abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(ref_u8)); - res[i] = vget_lane_u32(horizontal_add_uint16x8(abs), 0); - } +static INLINE uint8x8_t load_unaligned_2_buffers(const void *const buf0, + const void *const buf1) { + uint32_t a; + uint32x2_t aa = vdup_n_u32(0); + memcpy(&a, buf0, 4); + aa = vset_lane_u32(a, aa, 0); + memcpy(&a, buf1, 4); + aa = vset_lane_u32(a, aa, 1); + return vreinterpret_u8_u32(aa); } -void vpx_sad4x8x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, - uint32_t *res) { +static INLINE void sad4x_4d(const uint8_t *const src_ptr, const int src_stride, + const uint8_t *const ref_array[4], + const int ref_stride, const int height, + uint32_t *const res) { int i; - const uint8x16_t src_0 = load_unaligned_u8q(src, src_stride); - const uint8x16_t src_1 = load_unaligned_u8q(src + 4 * src_stride, src_stride); - for (i = 0; i < 4; ++i) { - const uint8x16_t ref_0 = load_unaligned_u8q(ref[i], ref_stride); - const uint8x16_t ref_1 = - load_unaligned_u8q(ref[i] + 4 * ref_stride, ref_stride); - uint16x8_t abs = vabdl_u8(vget_low_u8(src_0), vget_low_u8(ref_0)); - abs = vabal_u8(abs, vget_high_u8(src_0), vget_high_u8(ref_0)); - abs = vabal_u8(abs, vget_low_u8(src_1), vget_low_u8(ref_1)); - abs = vabal_u8(abs, vget_high_u8(src_1), vget_high_u8(ref_1)); - res[i] = vget_lane_u32(horizontal_add_uint16x8(abs), 0); - } -} + uint16x8_t abs[2] = { vdupq_n_u16(0), vdupq_n_u16(0) }; + uint16x4_t a[2]; + uint32x4_t r; -static INLINE void sad8x_4d(const uint8_t *a, int a_stride, - const uint8_t *const b[4], int b_stride, - uint32_t *result, const int height) { - int i, j; - uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), - vdupq_n_u16(0) }; - const uint8_t *b_loop[4] = { b[0], b[1], b[2], b[3] }; + assert(!((intptr_t)src_ptr % sizeof(uint32_t))); + assert(!(src_stride % sizeof(uint32_t))); for (i = 0; i < height; ++i) { - const uint8x8_t a_u8 = vld1_u8(a); - a += a_stride; + const uint8x8_t s = vreinterpret_u8_u32( + vld1_dup_u32((const uint32_t *)(src_ptr + i * src_stride))); + const uint8x8_t ref01 = load_unaligned_2_buffers( + ref_array[0] + i * ref_stride, ref_array[1] + i * ref_stride); + const uint8x8_t ref23 = load_unaligned_2_buffers( + ref_array[2] + i * ref_stride, ref_array[3] + i * ref_stride); + abs[0] = vabal_u8(abs[0], s, ref01); + abs[1] = vabal_u8(abs[1], s, ref23); + } + + a[0] = vpadd_u16(vget_low_u16(abs[0]), vget_high_u16(abs[0])); + a[1] = vpadd_u16(vget_low_u16(abs[1]), vget_high_u16(abs[1])); + r = vpaddlq_u16(vcombine_u16(a[0], a[1])); + vst1q_u32(res, r); +} + +void vpx_sad4x4x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res) { + sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 4, res); +} + +void vpx_sad4x8x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res) { + sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 8, res); +} + +//////////////////////////////////////////////////////////////////////////////// + +// Can handle 512 pixels' sad sum (such as 16x32 or 32x16) +static INLINE void sad_512_pel_final_neon(const uint16x8_t *sum /*[4]*/, + uint32_t *const res) { + const uint16x4_t a0 = vadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0])); + const uint16x4_t a1 = vadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1])); + const uint16x4_t a2 = vadd_u16(vget_low_u16(sum[2]), vget_high_u16(sum[2])); + const uint16x4_t a3 = vadd_u16(vget_low_u16(sum[3]), vget_high_u16(sum[3])); + const uint16x4_t b0 = vpadd_u16(a0, a1); + const uint16x4_t b1 = vpadd_u16(a2, a3); + const uint32x4_t r = vpaddlq_u16(vcombine_u16(b0, b1)); + vst1q_u32(res, r); +} + +// Can handle 1024 pixels' sad sum (such as 32x32) +static INLINE void sad_1024_pel_final_neon(const uint16x8_t *sum /*[4]*/, + uint32_t *const res) { + const uint16x4_t a0 = vpadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0])); + const uint16x4_t a1 = vpadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1])); + const uint16x4_t a2 = vpadd_u16(vget_low_u16(sum[2]), vget_high_u16(sum[2])); + const uint16x4_t a3 = vpadd_u16(vget_low_u16(sum[3]), vget_high_u16(sum[3])); + const uint32x4_t b0 = vpaddlq_u16(vcombine_u16(a0, a1)); + const uint32x4_t b1 = vpaddlq_u16(vcombine_u16(a2, a3)); + const uint32x2_t c0 = vpadd_u32(vget_low_u32(b0), vget_high_u32(b0)); + const uint32x2_t c1 = vpadd_u32(vget_low_u32(b1), vget_high_u32(b1)); + vst1q_u32(res, vcombine_u32(c0, c1)); +} + +// Can handle 2048 pixels' sad sum (such as 32x64 or 64x32) +static INLINE void sad_2048_pel_final_neon(const uint16x8_t *sum /*[4]*/, + uint32_t *const res) { + const uint32x4_t a0 = vpaddlq_u16(sum[0]); + const uint32x4_t a1 = vpaddlq_u16(sum[1]); + const uint32x4_t a2 = vpaddlq_u16(sum[2]); + const uint32x4_t a3 = vpaddlq_u16(sum[3]); + const uint32x2_t b0 = vadd_u32(vget_low_u32(a0), vget_high_u32(a0)); + const uint32x2_t b1 = vadd_u32(vget_low_u32(a1), vget_high_u32(a1)); + const uint32x2_t b2 = vadd_u32(vget_low_u32(a2), vget_high_u32(a2)); + const uint32x2_t b3 = vadd_u32(vget_low_u32(a3), vget_high_u32(a3)); + const uint32x2_t c0 = vpadd_u32(b0, b1); + const uint32x2_t c1 = vpadd_u32(b2, b3); + vst1q_u32(res, vcombine_u32(c0, c1)); +} + +// Can handle 4096 pixels' sad sum (such as 64x64) +static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/, + uint32_t *const res) { + const uint32x4_t a0 = vpaddlq_u16(sum[0]); + const uint32x4_t a1 = vpaddlq_u16(sum[1]); + const uint32x4_t a2 = vpaddlq_u16(sum[2]); + const uint32x4_t a3 = vpaddlq_u16(sum[3]); + const uint32x4_t a4 = vpaddlq_u16(sum[4]); + const uint32x4_t a5 = vpaddlq_u16(sum[5]); + const uint32x4_t a6 = vpaddlq_u16(sum[6]); + const uint32x4_t a7 = vpaddlq_u16(sum[7]); + const uint32x4_t b0 = vaddq_u32(a0, a1); + const uint32x4_t b1 = vaddq_u32(a2, a3); + const uint32x4_t b2 = vaddq_u32(a4, a5); + const uint32x4_t b3 = vaddq_u32(a6, a7); + const uint32x2_t c0 = vadd_u32(vget_low_u32(b0), vget_high_u32(b0)); + const uint32x2_t c1 = vadd_u32(vget_low_u32(b1), vget_high_u32(b1)); + const uint32x2_t c2 = vadd_u32(vget_low_u32(b2), vget_high_u32(b2)); + const uint32x2_t c3 = vadd_u32(vget_low_u32(b3), vget_high_u32(b3)); + const uint32x2_t d0 = vpadd_u32(c0, c1); + const uint32x2_t d1 = vpadd_u32(c2, c3); + vst1q_u32(res, vcombine_u32(d0, d1)); +} + +static INLINE void sad8x_4d(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res, const int height) { + int i, j; + const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], + ref_array[3] }; + uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), + vdupq_n_u16(0) }; + + for (i = 0; i < height; ++i) { + const uint8x8_t s = vld1_u8(src_ptr); + src_ptr += src_stride; for (j = 0; j < 4; ++j) { - const uint8x8_t b_u8 = vld1_u8(b_loop[j]); - b_loop[j] += b_stride; - sum[j] = vabal_u8(sum[j], a_u8, b_u8); + const uint8x8_t b_u8 = vld1_u8(ref_loop[j]); + ref_loop[j] += ref_stride; + sum[j] = vabal_u8(sum[j], s, b_u8); } } - for (j = 0; j < 4; ++j) { - result[j] = vget_lane_u32(horizontal_add_uint16x8(sum[j]), 0); - } + sad_512_pel_final_neon(sum, res); } -void vpx_sad8x4x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad8x4x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad8x_4d(src, src_stride, ref, ref_stride, res, 4); + sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 4); } -void vpx_sad8x8x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad8x8x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad8x_4d(src, src_stride, ref, ref_stride, res, 8); + sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 8); } -void vpx_sad8x16x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad8x16x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad8x_4d(src, src_stride, ref, ref_stride, res, 16); + sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16); } -static INLINE void sad16x_4d(const uint8_t *a, int a_stride, - const uint8_t *const b[4], int b_stride, - uint32_t *result, const int height) { +//////////////////////////////////////////////////////////////////////////////// + +static INLINE void sad16_neon(const uint8_t *ref_ptr, const uint8x16_t src_ptr, + uint16x8_t *const sum) { + const uint8x16_t r = vld1q_u8(ref_ptr); + *sum = vabal_u8(*sum, vget_low_u8(src_ptr), vget_low_u8(r)); + *sum = vabal_u8(*sum, vget_high_u8(src_ptr), vget_high_u8(r)); +} + +static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res, const int height) { int i, j; + const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], + ref_array[3] }; uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0) }; - const uint8_t *b_loop[4] = { b[0], b[1], b[2], b[3] }; for (i = 0; i < height; ++i) { - const uint8x16_t a_u8 = vld1q_u8(a); - a += a_stride; + const uint8x16_t s = vld1q_u8(src_ptr); + src_ptr += src_stride; for (j = 0; j < 4; ++j) { - const uint8x16_t b_u8 = vld1q_u8(b_loop[j]); - b_loop[j] += b_stride; - sum[j] = vabal_u8(sum[j], vget_low_u8(a_u8), vget_low_u8(b_u8)); - sum[j] = vabal_u8(sum[j], vget_high_u8(a_u8), vget_high_u8(b_u8)); + sad16_neon(ref_loop[j], s, &sum[j]); + ref_loop[j] += ref_stride; } } - for (j = 0; j < 4; ++j) { - result[j] = vget_lane_u32(horizontal_add_uint16x8(sum[j]), 0); - } + sad_512_pel_final_neon(sum, res); } -void vpx_sad16x8x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad16x8x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad16x_4d(src, src_stride, ref, ref_stride, res, 8); + sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 8); } -void vpx_sad16x16x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad16x16x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad16x_4d(src, src_stride, ref, ref_stride, res, 16); + sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16); } -void vpx_sad16x32x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad16x32x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad16x_4d(src, src_stride, ref, ref_stride, res, 32); + sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 32); } -static INLINE void sad32x_4d(const uint8_t *a, int a_stride, - const uint8_t *const b[4], int b_stride, - uint32_t *result, const int height) { - int i, j; +//////////////////////////////////////////////////////////////////////////////// + +static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + const int height, uint16x8_t *const sum) { + int i; + const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], + ref_array[3] }; + + sum[0] = sum[1] = sum[2] = sum[3] = vdupq_n_u16(0); + + for (i = 0; i < height; ++i) { + uint8x16_t s; + + s = vld1q_u8(src_ptr + 0 * 16); + sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]); + + s = vld1q_u8(src_ptr + 1 * 16); + sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]); + + src_ptr += src_stride; + ref_loop[0] += ref_stride; + ref_loop[1] += ref_stride; + ref_loop[2] += ref_stride; + ref_loop[3] += ref_stride; + } +} + +void vpx_sad32x16x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res) { + uint16x8_t sum[4]; + sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 16, sum); + sad_512_pel_final_neon(sum, res); +} + +void vpx_sad32x32x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res) { + uint16x8_t sum[4]; + sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 32, sum); + sad_1024_pel_final_neon(sum, res); +} + +void vpx_sad32x64x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res) { + uint16x8_t sum[4]; + sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 64, sum); + sad_2048_pel_final_neon(sum, res); +} + +//////////////////////////////////////////////////////////////////////////////// + +void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res) { + int i; + const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], + ref_array[3] }; uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0) }; - const uint8_t *b_loop[4] = { b[0], b[1], b[2], b[3] }; - for (i = 0; i < height; ++i) { - const uint8x16_t a_0 = vld1q_u8(a); - const uint8x16_t a_1 = vld1q_u8(a + 16); - a += a_stride; - for (j = 0; j < 4; ++j) { - const uint8x16_t b_0 = vld1q_u8(b_loop[j]); - const uint8x16_t b_1 = vld1q_u8(b_loop[j] + 16); - b_loop[j] += b_stride; - sum[j] = vabal_u8(sum[j], vget_low_u8(a_0), vget_low_u8(b_0)); - sum[j] = vabal_u8(sum[j], vget_high_u8(a_0), vget_high_u8(b_0)); - sum[j] = vabal_u8(sum[j], vget_low_u8(a_1), vget_low_u8(b_1)); - sum[j] = vabal_u8(sum[j], vget_high_u8(a_1), vget_high_u8(b_1)); - } + for (i = 0; i < 32; ++i) { + uint8x16_t s; + + s = vld1q_u8(src_ptr + 0 * 16); + sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]); + + s = vld1q_u8(src_ptr + 1 * 16); + sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]); + + s = vld1q_u8(src_ptr + 2 * 16); + sad16_neon(ref_loop[0] + 2 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 2 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 2 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 2 * 16, s, &sum[3]); + + s = vld1q_u8(src_ptr + 3 * 16); + sad16_neon(ref_loop[0] + 3 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 3 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 3 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 3 * 16, s, &sum[3]); + + src_ptr += src_stride; + ref_loop[0] += ref_stride; + ref_loop[1] += ref_stride; + ref_loop[2] += ref_stride; + ref_loop[3] += ref_stride; } - for (j = 0; j < 4; ++j) { - result[j] = vget_lane_u32(horizontal_add_uint16x8(sum[j]), 0); - } + sad_2048_pel_final_neon(sum, res); } -void vpx_sad32x16x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { - sad32x_4d(src, src_stride, ref, ref_stride, res, 16); -} - -void vpx_sad32x32x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, - uint32_t *res) { - sad32x_4d(src, src_stride, ref, ref_stride, res, 32); -} - -void vpx_sad32x64x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, - uint32_t *res) { - sad32x_4d(src, src_stride, ref, ref_stride, res, 64); -} - -static INLINE void sum64x(const uint8x16_t a_0, const uint8x16_t a_1, - const uint8x16_t b_0, const uint8x16_t b_1, - uint16x8_t *sum) { - *sum = vabal_u8(*sum, vget_low_u8(a_0), vget_low_u8(b_0)); - *sum = vabal_u8(*sum, vget_high_u8(a_0), vget_high_u8(b_0)); - *sum = vabal_u8(*sum, vget_low_u8(a_1), vget_low_u8(b_1)); - *sum = vabal_u8(*sum, vget_high_u8(a_1), vget_high_u8(b_1)); -} - -static INLINE void sad64x_4d(const uint8_t *a, int a_stride, - const uint8_t *const b[4], int b_stride, - uint32_t *result, const int height) { int i; - uint16x8_t sum_0 = vdupq_n_u16(0); - uint16x8_t sum_1 = vdupq_n_u16(0); - uint16x8_t sum_2 = vdupq_n_u16(0); - uint16x8_t sum_3 = vdupq_n_u16(0); - uint16x8_t sum_4 = vdupq_n_u16(0); - uint16x8_t sum_5 = vdupq_n_u16(0); - uint16x8_t sum_6 = vdupq_n_u16(0); - uint16x8_t sum_7 = vdupq_n_u16(0); - const uint8_t *b_loop[4] = { b[0], b[1], b[2], b[3] }; + const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], + ref_array[3] }; + uint16x8_t sum[8] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), + vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), + vdupq_n_u16(0), vdupq_n_u16(0) }; - for (i = 0; i < height; ++i) { - const uint8x16_t a_0 = vld1q_u8(a); - const uint8x16_t a_1 = vld1q_u8(a + 16); - const uint8x16_t a_2 = vld1q_u8(a + 32); - const uint8x16_t a_3 = vld1q_u8(a + 48); - a += a_stride; - sum64x(a_0, a_1, vld1q_u8(b_loop[0]), vld1q_u8(b_loop[0] + 16), &sum_0); - sum64x(a_2, a_3, vld1q_u8(b_loop[0] + 32), vld1q_u8(b_loop[0] + 48), - &sum_1); - b_loop[0] += b_stride; - sum64x(a_0, a_1, vld1q_u8(b_loop[1]), vld1q_u8(b_loop[1] + 16), &sum_2); - sum64x(a_2, a_3, vld1q_u8(b_loop[1] + 32), vld1q_u8(b_loop[1] + 48), - &sum_3); - b_loop[1] += b_stride; - sum64x(a_0, a_1, vld1q_u8(b_loop[2]), vld1q_u8(b_loop[2] + 16), &sum_4); - sum64x(a_2, a_3, vld1q_u8(b_loop[2] + 32), vld1q_u8(b_loop[2] + 48), - &sum_5); - b_loop[2] += b_stride; - sum64x(a_0, a_1, vld1q_u8(b_loop[3]), vld1q_u8(b_loop[3] + 16), &sum_6); - sum64x(a_2, a_3, vld1q_u8(b_loop[3] + 32), vld1q_u8(b_loop[3] + 48), - &sum_7); - b_loop[3] += b_stride; + for (i = 0; i < 64; ++i) { + uint8x16_t s; + + s = vld1q_u8(src_ptr + 0 * 16); + sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 0 * 16, s, &sum[2]); + sad16_neon(ref_loop[2] + 0 * 16, s, &sum[4]); + sad16_neon(ref_loop[3] + 0 * 16, s, &sum[6]); + + s = vld1q_u8(src_ptr + 1 * 16); + sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 1 * 16, s, &sum[2]); + sad16_neon(ref_loop[2] + 1 * 16, s, &sum[4]); + sad16_neon(ref_loop[3] + 1 * 16, s, &sum[6]); + + s = vld1q_u8(src_ptr + 2 * 16); + sad16_neon(ref_loop[0] + 2 * 16, s, &sum[1]); + sad16_neon(ref_loop[1] + 2 * 16, s, &sum[3]); + sad16_neon(ref_loop[2] + 2 * 16, s, &sum[5]); + sad16_neon(ref_loop[3] + 2 * 16, s, &sum[7]); + + s = vld1q_u8(src_ptr + 3 * 16); + sad16_neon(ref_loop[0] + 3 * 16, s, &sum[1]); + sad16_neon(ref_loop[1] + 3 * 16, s, &sum[3]); + sad16_neon(ref_loop[2] + 3 * 16, s, &sum[5]); + sad16_neon(ref_loop[3] + 3 * 16, s, &sum[7]); + + src_ptr += src_stride; + ref_loop[0] += ref_stride; + ref_loop[1] += ref_stride; + ref_loop[2] += ref_stride; + ref_loop[3] += ref_stride; } - result[0] = vget_lane_u32(horizontal_add_long_uint16x8(sum_0, sum_1), 0); - result[1] = vget_lane_u32(horizontal_add_long_uint16x8(sum_2, sum_3), 0); - result[2] = vget_lane_u32(horizontal_add_long_uint16x8(sum_4, sum_5), 0); - result[3] = vget_lane_u32(horizontal_add_long_uint16x8(sum_6, sum_7), 0); -} - -void vpx_sad64x32x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, - uint32_t *res) { - sad64x_4d(src, src_stride, ref, ref_stride, res, 32); -} - -void vpx_sad64x64x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, - uint32_t *res) { - sad64x_4d(src, src_stride, ref, ref_stride, res, 64); + sad_4096_pel_final_neon(sum, res); } diff --git a/media/libvpx/libvpx/vpx_dsp/arm/sad_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/sad_neon.c index 9518a166bbfb..c4a49e366d53 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/sad_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/sad_neon.c @@ -11,6 +11,7 @@ #include #include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/arm/mem_neon.h" @@ -73,128 +74,132 @@ uint32_t vpx_sad4x8_avg_neon(const uint8_t *src_ptr, int src_stride, return vget_lane_u32(horizontal_add_uint16x8(abs), 0); } -static INLINE uint16x8_t sad8x(const uint8_t *a, int a_stride, const uint8_t *b, - int b_stride, const int height) { +static INLINE uint16x8_t sad8x(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + const int height) { int i; uint16x8_t abs = vdupq_n_u16(0); for (i = 0; i < height; ++i) { - const uint8x8_t a_u8 = vld1_u8(a); - const uint8x8_t b_u8 = vld1_u8(b); - a += a_stride; - b += b_stride; + const uint8x8_t a_u8 = vld1_u8(src_ptr); + const uint8x8_t b_u8 = vld1_u8(ref_ptr); + src_ptr += src_stride; + ref_ptr += ref_stride; abs = vabal_u8(abs, a_u8, b_u8); } return abs; } -static INLINE uint16x8_t sad8x_avg(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - const uint8_t *c, const int height) { +static INLINE uint16x8_t sad8x_avg(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + const uint8_t *second_pred, + const int height) { int i; uint16x8_t abs = vdupq_n_u16(0); for (i = 0; i < height; ++i) { - const uint8x8_t a_u8 = vld1_u8(a); - const uint8x8_t b_u8 = vld1_u8(b); - const uint8x8_t c_u8 = vld1_u8(c); + const uint8x8_t a_u8 = vld1_u8(src_ptr); + const uint8x8_t b_u8 = vld1_u8(ref_ptr); + const uint8x8_t c_u8 = vld1_u8(second_pred); const uint8x8_t avg = vrhadd_u8(b_u8, c_u8); - a += a_stride; - b += b_stride; - c += 8; + src_ptr += src_stride; + ref_ptr += ref_stride; + second_pred += 8; abs = vabal_u8(abs, a_u8, avg); } return abs; } -#define sad8xN(n) \ - uint32_t vpx_sad8x##n##_neon(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride) { \ - const uint16x8_t abs = sad8x(src, src_stride, ref, ref_stride, n); \ - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ - } \ - \ - uint32_t vpx_sad8x##n##_avg_neon(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - const uint8_t *second_pred) { \ - const uint16x8_t abs = \ - sad8x_avg(src, src_stride, ref, ref_stride, second_pred, n); \ - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ +#define sad8xN(n) \ + uint32_t vpx_sad8x##n##_neon(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride) { \ + const uint16x8_t abs = sad8x(src_ptr, src_stride, ref_ptr, ref_stride, n); \ + return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ + } \ + \ + uint32_t vpx_sad8x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + const uint8_t *second_pred) { \ + const uint16x8_t abs = \ + sad8x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \ + return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ } sad8xN(4); sad8xN(8); sad8xN(16); -static INLINE uint16x8_t sad16x(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, +static INLINE uint16x8_t sad16x(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, const int height) { int i; uint16x8_t abs = vdupq_n_u16(0); for (i = 0; i < height; ++i) { - const uint8x16_t a_u8 = vld1q_u8(a); - const uint8x16_t b_u8 = vld1q_u8(b); - a += a_stride; - b += b_stride; + const uint8x16_t a_u8 = vld1q_u8(src_ptr); + const uint8x16_t b_u8 = vld1q_u8(ref_ptr); + src_ptr += src_stride; + ref_ptr += ref_stride; abs = vabal_u8(abs, vget_low_u8(a_u8), vget_low_u8(b_u8)); abs = vabal_u8(abs, vget_high_u8(a_u8), vget_high_u8(b_u8)); } return abs; } -static INLINE uint16x8_t sad16x_avg(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - const uint8_t *c, const int height) { +static INLINE uint16x8_t sad16x_avg(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + const uint8_t *second_pred, + const int height) { int i; uint16x8_t abs = vdupq_n_u16(0); for (i = 0; i < height; ++i) { - const uint8x16_t a_u8 = vld1q_u8(a); - const uint8x16_t b_u8 = vld1q_u8(b); - const uint8x16_t c_u8 = vld1q_u8(c); + const uint8x16_t a_u8 = vld1q_u8(src_ptr); + const uint8x16_t b_u8 = vld1q_u8(ref_ptr); + const uint8x16_t c_u8 = vld1q_u8(second_pred); const uint8x16_t avg = vrhaddq_u8(b_u8, c_u8); - a += a_stride; - b += b_stride; - c += 16; + src_ptr += src_stride; + ref_ptr += ref_stride; + second_pred += 16; abs = vabal_u8(abs, vget_low_u8(a_u8), vget_low_u8(avg)); abs = vabal_u8(abs, vget_high_u8(a_u8), vget_high_u8(avg)); } return abs; } -#define sad16xN(n) \ - uint32_t vpx_sad16x##n##_neon(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride) { \ - const uint16x8_t abs = sad16x(src, src_stride, ref, ref_stride, n); \ - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ - } \ - \ - uint32_t vpx_sad16x##n##_avg_neon(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - const uint8_t *second_pred) { \ - const uint16x8_t abs = \ - sad16x_avg(src, src_stride, ref, ref_stride, second_pred, n); \ - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ +#define sad16xN(n) \ + uint32_t vpx_sad16x##n##_neon(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride) { \ + const uint16x8_t abs = \ + sad16x(src_ptr, src_stride, ref_ptr, ref_stride, n); \ + return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ + } \ + \ + uint32_t vpx_sad16x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + const uint8_t *second_pred) { \ + const uint16x8_t abs = \ + sad16x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \ + return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ } sad16xN(8); sad16xN(16); sad16xN(32); -static INLINE uint16x8_t sad32x(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, +static INLINE uint16x8_t sad32x(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, const int height) { int i; uint16x8_t abs = vdupq_n_u16(0); for (i = 0; i < height; ++i) { - const uint8x16_t a_lo = vld1q_u8(a); - const uint8x16_t a_hi = vld1q_u8(a + 16); - const uint8x16_t b_lo = vld1q_u8(b); - const uint8x16_t b_hi = vld1q_u8(b + 16); - a += a_stride; - b += b_stride; + const uint8x16_t a_lo = vld1q_u8(src_ptr); + const uint8x16_t a_hi = vld1q_u8(src_ptr + 16); + const uint8x16_t b_lo = vld1q_u8(ref_ptr); + const uint8x16_t b_hi = vld1q_u8(ref_ptr + 16); + src_ptr += src_stride; + ref_ptr += ref_stride; abs = vabal_u8(abs, vget_low_u8(a_lo), vget_low_u8(b_lo)); abs = vabal_u8(abs, vget_high_u8(a_lo), vget_high_u8(b_lo)); abs = vabal_u8(abs, vget_low_u8(a_hi), vget_low_u8(b_hi)); @@ -203,24 +208,25 @@ static INLINE uint16x8_t sad32x(const uint8_t *a, int a_stride, return abs; } -static INLINE uint16x8_t sad32x_avg(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - const uint8_t *c, const int height) { +static INLINE uint16x8_t sad32x_avg(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + const uint8_t *second_pred, + const int height) { int i; uint16x8_t abs = vdupq_n_u16(0); for (i = 0; i < height; ++i) { - const uint8x16_t a_lo = vld1q_u8(a); - const uint8x16_t a_hi = vld1q_u8(a + 16); - const uint8x16_t b_lo = vld1q_u8(b); - const uint8x16_t b_hi = vld1q_u8(b + 16); - const uint8x16_t c_lo = vld1q_u8(c); - const uint8x16_t c_hi = vld1q_u8(c + 16); + const uint8x16_t a_lo = vld1q_u8(src_ptr); + const uint8x16_t a_hi = vld1q_u8(src_ptr + 16); + const uint8x16_t b_lo = vld1q_u8(ref_ptr); + const uint8x16_t b_hi = vld1q_u8(ref_ptr + 16); + const uint8x16_t c_lo = vld1q_u8(second_pred); + const uint8x16_t c_hi = vld1q_u8(second_pred + 16); const uint8x16_t avg_lo = vrhaddq_u8(b_lo, c_lo); const uint8x16_t avg_hi = vrhaddq_u8(b_hi, c_hi); - a += a_stride; - b += b_stride; - c += 32; + src_ptr += src_stride; + ref_ptr += ref_stride; + second_pred += 32; abs = vabal_u8(abs, vget_low_u8(a_lo), vget_low_u8(avg_lo)); abs = vabal_u8(abs, vget_high_u8(a_lo), vget_high_u8(avg_lo)); abs = vabal_u8(abs, vget_low_u8(a_hi), vget_low_u8(avg_hi)); @@ -229,43 +235,44 @@ static INLINE uint16x8_t sad32x_avg(const uint8_t *a, int a_stride, return abs; } -#define sad32xN(n) \ - uint32_t vpx_sad32x##n##_neon(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride) { \ - const uint16x8_t abs = sad32x(src, src_stride, ref, ref_stride, n); \ - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ - } \ - \ - uint32_t vpx_sad32x##n##_avg_neon(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - const uint8_t *second_pred) { \ - const uint16x8_t abs = \ - sad32x_avg(src, src_stride, ref, ref_stride, second_pred, n); \ - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ +#define sad32xN(n) \ + uint32_t vpx_sad32x##n##_neon(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride) { \ + const uint16x8_t abs = \ + sad32x(src_ptr, src_stride, ref_ptr, ref_stride, n); \ + return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ + } \ + \ + uint32_t vpx_sad32x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + const uint8_t *second_pred) { \ + const uint16x8_t abs = \ + sad32x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \ + return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ } sad32xN(16); sad32xN(32); sad32xN(64); -static INLINE uint32x4_t sad64x(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, +static INLINE uint32x4_t sad64x(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, const int height) { int i; uint16x8_t abs_0 = vdupq_n_u16(0); uint16x8_t abs_1 = vdupq_n_u16(0); for (i = 0; i < height; ++i) { - const uint8x16_t a_0 = vld1q_u8(a); - const uint8x16_t a_1 = vld1q_u8(a + 16); - const uint8x16_t a_2 = vld1q_u8(a + 32); - const uint8x16_t a_3 = vld1q_u8(a + 48); - const uint8x16_t b_0 = vld1q_u8(b); - const uint8x16_t b_1 = vld1q_u8(b + 16); - const uint8x16_t b_2 = vld1q_u8(b + 32); - const uint8x16_t b_3 = vld1q_u8(b + 48); - a += a_stride; - b += b_stride; + const uint8x16_t a_0 = vld1q_u8(src_ptr); + const uint8x16_t a_1 = vld1q_u8(src_ptr + 16); + const uint8x16_t a_2 = vld1q_u8(src_ptr + 32); + const uint8x16_t a_3 = vld1q_u8(src_ptr + 48); + const uint8x16_t b_0 = vld1q_u8(ref_ptr); + const uint8x16_t b_1 = vld1q_u8(ref_ptr + 16); + const uint8x16_t b_2 = vld1q_u8(ref_ptr + 32); + const uint8x16_t b_3 = vld1q_u8(ref_ptr + 48); + src_ptr += src_stride; + ref_ptr += ref_stride; abs_0 = vabal_u8(abs_0, vget_low_u8(a_0), vget_low_u8(b_0)); abs_0 = vabal_u8(abs_0, vget_high_u8(a_0), vget_high_u8(b_0)); abs_0 = vabal_u8(abs_0, vget_low_u8(a_1), vget_low_u8(b_1)); @@ -282,33 +289,34 @@ static INLINE uint32x4_t sad64x(const uint8_t *a, int a_stride, } } -static INLINE uint32x4_t sad64x_avg(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - const uint8_t *c, const int height) { +static INLINE uint32x4_t sad64x_avg(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + const uint8_t *second_pred, + const int height) { int i; uint16x8_t abs_0 = vdupq_n_u16(0); uint16x8_t abs_1 = vdupq_n_u16(0); for (i = 0; i < height; ++i) { - const uint8x16_t a_0 = vld1q_u8(a); - const uint8x16_t a_1 = vld1q_u8(a + 16); - const uint8x16_t a_2 = vld1q_u8(a + 32); - const uint8x16_t a_3 = vld1q_u8(a + 48); - const uint8x16_t b_0 = vld1q_u8(b); - const uint8x16_t b_1 = vld1q_u8(b + 16); - const uint8x16_t b_2 = vld1q_u8(b + 32); - const uint8x16_t b_3 = vld1q_u8(b + 48); - const uint8x16_t c_0 = vld1q_u8(c); - const uint8x16_t c_1 = vld1q_u8(c + 16); - const uint8x16_t c_2 = vld1q_u8(c + 32); - const uint8x16_t c_3 = vld1q_u8(c + 48); + const uint8x16_t a_0 = vld1q_u8(src_ptr); + const uint8x16_t a_1 = vld1q_u8(src_ptr + 16); + const uint8x16_t a_2 = vld1q_u8(src_ptr + 32); + const uint8x16_t a_3 = vld1q_u8(src_ptr + 48); + const uint8x16_t b_0 = vld1q_u8(ref_ptr); + const uint8x16_t b_1 = vld1q_u8(ref_ptr + 16); + const uint8x16_t b_2 = vld1q_u8(ref_ptr + 32); + const uint8x16_t b_3 = vld1q_u8(ref_ptr + 48); + const uint8x16_t c_0 = vld1q_u8(second_pred); + const uint8x16_t c_1 = vld1q_u8(second_pred + 16); + const uint8x16_t c_2 = vld1q_u8(second_pred + 32); + const uint8x16_t c_3 = vld1q_u8(second_pred + 48); const uint8x16_t avg_0 = vrhaddq_u8(b_0, c_0); const uint8x16_t avg_1 = vrhaddq_u8(b_1, c_1); const uint8x16_t avg_2 = vrhaddq_u8(b_2, c_2); const uint8x16_t avg_3 = vrhaddq_u8(b_3, c_3); - a += a_stride; - b += b_stride; - c += 64; + src_ptr += src_stride; + ref_ptr += ref_stride; + second_pred += 64; abs_0 = vabal_u8(abs_0, vget_low_u8(a_0), vget_low_u8(avg_0)); abs_0 = vabal_u8(abs_0, vget_high_u8(a_0), vget_high_u8(avg_0)); abs_0 = vabal_u8(abs_0, vget_low_u8(a_1), vget_low_u8(avg_1)); @@ -325,19 +333,20 @@ static INLINE uint32x4_t sad64x_avg(const uint8_t *a, int a_stride, } } -#define sad64xN(n) \ - uint32_t vpx_sad64x##n##_neon(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride) { \ - const uint32x4_t abs = sad64x(src, src_stride, ref, ref_stride, n); \ - return vget_lane_u32(horizontal_add_uint32x4(abs), 0); \ - } \ - \ - uint32_t vpx_sad64x##n##_avg_neon(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - const uint8_t *second_pred) { \ - const uint32x4_t abs = \ - sad64x_avg(src, src_stride, ref, ref_stride, second_pred, n); \ - return vget_lane_u32(horizontal_add_uint32x4(abs), 0); \ +#define sad64xN(n) \ + uint32_t vpx_sad64x##n##_neon(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride) { \ + const uint32x4_t abs = \ + sad64x(src_ptr, src_stride, ref_ptr, ref_stride, n); \ + return vget_lane_u32(horizontal_add_uint32x4(abs), 0); \ + } \ + \ + uint32_t vpx_sad64x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + const uint8_t *second_pred) { \ + const uint32x4_t abs = \ + sad64x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \ + return vget_lane_u32(horizontal_add_uint32x4(abs), 0); \ } sad64xN(32); diff --git a/media/libvpx/libvpx/vpx_dsp/arm/subpel_variance_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/subpel_variance_neon.c index 4f58a7832a56..37bfd1cd1fd5 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/subpel_variance_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/subpel_variance_neon.c @@ -97,30 +97,30 @@ static void var_filter_block2d_bil_w16(const uint8_t *src_ptr, // 4xM filter writes an extra row to fdata because it processes two rows at a // time. -#define sub_pixel_varianceNxM(n, m) \ - uint32_t vpx_sub_pixel_variance##n##x##m##_neon( \ - const uint8_t *a, int a_stride, int xoffset, int yoffset, \ - const uint8_t *b, int b_stride, uint32_t *sse) { \ - uint8_t temp0[n * (m + (n == 4 ? 2 : 1))]; \ - uint8_t temp1[n * m]; \ - \ - if (n == 4) { \ - var_filter_block2d_bil_w4(a, temp0, a_stride, 1, (m + 2), \ - bilinear_filters[xoffset]); \ - var_filter_block2d_bil_w4(temp0, temp1, n, n, m, \ - bilinear_filters[yoffset]); \ - } else if (n == 8) { \ - var_filter_block2d_bil_w8(a, temp0, a_stride, 1, (m + 1), \ - bilinear_filters[xoffset]); \ - var_filter_block2d_bil_w8(temp0, temp1, n, n, m, \ - bilinear_filters[yoffset]); \ - } else { \ - var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (m + 1), n, \ - bilinear_filters[xoffset]); \ - var_filter_block2d_bil_w16(temp0, temp1, n, n, m, n, \ - bilinear_filters[yoffset]); \ - } \ - return vpx_variance##n##x##m(temp1, n, b, b_stride, sse); \ +#define sub_pixel_varianceNxM(n, m) \ + uint32_t vpx_sub_pixel_variance##n##x##m##_neon( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ + uint8_t temp0[n * (m + (n == 4 ? 2 : 1))]; \ + uint8_t temp1[n * m]; \ + \ + if (n == 4) { \ + var_filter_block2d_bil_w4(src_ptr, temp0, src_stride, 1, (m + 2), \ + bilinear_filters[x_offset]); \ + var_filter_block2d_bil_w4(temp0, temp1, n, n, m, \ + bilinear_filters[y_offset]); \ + } else if (n == 8) { \ + var_filter_block2d_bil_w8(src_ptr, temp0, src_stride, 1, (m + 1), \ + bilinear_filters[x_offset]); \ + var_filter_block2d_bil_w8(temp0, temp1, n, n, m, \ + bilinear_filters[y_offset]); \ + } else { \ + var_filter_block2d_bil_w16(src_ptr, temp0, src_stride, 1, (m + 1), n, \ + bilinear_filters[x_offset]); \ + var_filter_block2d_bil_w16(temp0, temp1, n, n, m, n, \ + bilinear_filters[y_offset]); \ + } \ + return vpx_variance##n##x##m(temp1, n, ref_ptr, ref_stride, sse); \ } sub_pixel_varianceNxM(4, 4); @@ -139,34 +139,34 @@ sub_pixel_varianceNxM(64, 64); // 4xM filter writes an extra row to fdata because it processes two rows at a // time. -#define sub_pixel_avg_varianceNxM(n, m) \ - uint32_t vpx_sub_pixel_avg_variance##n##x##m##_neon( \ - const uint8_t *a, int a_stride, int xoffset, int yoffset, \ - const uint8_t *b, int b_stride, uint32_t *sse, \ - const uint8_t *second_pred) { \ - uint8_t temp0[n * (m + (n == 4 ? 2 : 1))]; \ - uint8_t temp1[n * m]; \ - \ - if (n == 4) { \ - var_filter_block2d_bil_w4(a, temp0, a_stride, 1, (m + 2), \ - bilinear_filters[xoffset]); \ - var_filter_block2d_bil_w4(temp0, temp1, n, n, m, \ - bilinear_filters[yoffset]); \ - } else if (n == 8) { \ - var_filter_block2d_bil_w8(a, temp0, a_stride, 1, (m + 1), \ - bilinear_filters[xoffset]); \ - var_filter_block2d_bil_w8(temp0, temp1, n, n, m, \ - bilinear_filters[yoffset]); \ - } else { \ - var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (m + 1), n, \ - bilinear_filters[xoffset]); \ - var_filter_block2d_bil_w16(temp0, temp1, n, n, m, n, \ - bilinear_filters[yoffset]); \ - } \ - \ - vpx_comp_avg_pred(temp0, second_pred, n, m, temp1, n); \ - \ - return vpx_variance##n##x##m(temp0, n, b, b_stride, sse); \ +#define sub_pixel_avg_varianceNxM(n, m) \ + uint32_t vpx_sub_pixel_avg_variance##n##x##m##_neon( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ + const uint8_t *second_pred) { \ + uint8_t temp0[n * (m + (n == 4 ? 2 : 1))]; \ + uint8_t temp1[n * m]; \ + \ + if (n == 4) { \ + var_filter_block2d_bil_w4(src_ptr, temp0, src_stride, 1, (m + 2), \ + bilinear_filters[x_offset]); \ + var_filter_block2d_bil_w4(temp0, temp1, n, n, m, \ + bilinear_filters[y_offset]); \ + } else if (n == 8) { \ + var_filter_block2d_bil_w8(src_ptr, temp0, src_stride, 1, (m + 1), \ + bilinear_filters[x_offset]); \ + var_filter_block2d_bil_w8(temp0, temp1, n, n, m, \ + bilinear_filters[y_offset]); \ + } else { \ + var_filter_block2d_bil_w16(src_ptr, temp0, src_stride, 1, (m + 1), n, \ + bilinear_filters[x_offset]); \ + var_filter_block2d_bil_w16(temp0, temp1, n, n, m, n, \ + bilinear_filters[y_offset]); \ + } \ + \ + vpx_comp_avg_pred(temp0, second_pred, n, m, temp1, n); \ + \ + return vpx_variance##n##x##m(temp0, n, ref_ptr, ref_stride, sse); \ } sub_pixel_avg_varianceNxM(4, 4); diff --git a/media/libvpx/libvpx/vpx_dsp/arm/subtract_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/subtract_neon.c index ce81fb630f24..612897e247c2 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/subtract_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/subtract_neon.c @@ -9,71 +9,73 @@ */ #include +#include #include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" +#include "vpx_dsp/arm/mem_neon.h" void vpx_subtract_block_neon(int rows, int cols, int16_t *diff, ptrdiff_t diff_stride, const uint8_t *src, ptrdiff_t src_stride, const uint8_t *pred, ptrdiff_t pred_stride) { - int r, c; + int r = rows, c; if (cols > 16) { - for (r = 0; r < rows; ++r) { + do { for (c = 0; c < cols; c += 32) { - const uint8x16_t v_src_00 = vld1q_u8(&src[c + 0]); - const uint8x16_t v_src_16 = vld1q_u8(&src[c + 16]); - const uint8x16_t v_pred_00 = vld1q_u8(&pred[c + 0]); - const uint8x16_t v_pred_16 = vld1q_u8(&pred[c + 16]); - const uint16x8_t v_diff_lo_00 = - vsubl_u8(vget_low_u8(v_src_00), vget_low_u8(v_pred_00)); - const uint16x8_t v_diff_hi_00 = - vsubl_u8(vget_high_u8(v_src_00), vget_high_u8(v_pred_00)); - const uint16x8_t v_diff_lo_16 = - vsubl_u8(vget_low_u8(v_src_16), vget_low_u8(v_pred_16)); - const uint16x8_t v_diff_hi_16 = - vsubl_u8(vget_high_u8(v_src_16), vget_high_u8(v_pred_16)); - vst1q_s16(&diff[c + 0], vreinterpretq_s16_u16(v_diff_lo_00)); - vst1q_s16(&diff[c + 8], vreinterpretq_s16_u16(v_diff_hi_00)); - vst1q_s16(&diff[c + 16], vreinterpretq_s16_u16(v_diff_lo_16)); - vst1q_s16(&diff[c + 24], vreinterpretq_s16_u16(v_diff_hi_16)); + const uint8x16_t s0 = vld1q_u8(&src[c + 0]); + const uint8x16_t s1 = vld1q_u8(&src[c + 16]); + const uint8x16_t p0 = vld1q_u8(&pred[c + 0]); + const uint8x16_t p1 = vld1q_u8(&pred[c + 16]); + const uint16x8_t d0 = vsubl_u8(vget_low_u8(s0), vget_low_u8(p0)); + const uint16x8_t d1 = vsubl_u8(vget_high_u8(s0), vget_high_u8(p0)); + const uint16x8_t d2 = vsubl_u8(vget_low_u8(s1), vget_low_u8(p1)); + const uint16x8_t d3 = vsubl_u8(vget_high_u8(s1), vget_high_u8(p1)); + vst1q_s16(&diff[c + 0], vreinterpretq_s16_u16(d0)); + vst1q_s16(&diff[c + 8], vreinterpretq_s16_u16(d1)); + vst1q_s16(&diff[c + 16], vreinterpretq_s16_u16(d2)); + vst1q_s16(&diff[c + 24], vreinterpretq_s16_u16(d3)); } diff += diff_stride; pred += pred_stride; src += src_stride; - } + } while (--r); } else if (cols > 8) { - for (r = 0; r < rows; ++r) { - const uint8x16_t v_src = vld1q_u8(&src[0]); - const uint8x16_t v_pred = vld1q_u8(&pred[0]); - const uint16x8_t v_diff_lo = - vsubl_u8(vget_low_u8(v_src), vget_low_u8(v_pred)); - const uint16x8_t v_diff_hi = - vsubl_u8(vget_high_u8(v_src), vget_high_u8(v_pred)); - vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff_lo)); - vst1q_s16(&diff[8], vreinterpretq_s16_u16(v_diff_hi)); + do { + const uint8x16_t s = vld1q_u8(&src[0]); + const uint8x16_t p = vld1q_u8(&pred[0]); + const uint16x8_t d0 = vsubl_u8(vget_low_u8(s), vget_low_u8(p)); + const uint16x8_t d1 = vsubl_u8(vget_high_u8(s), vget_high_u8(p)); + vst1q_s16(&diff[0], vreinterpretq_s16_u16(d0)); + vst1q_s16(&diff[8], vreinterpretq_s16_u16(d1)); diff += diff_stride; pred += pred_stride; src += src_stride; - } + } while (--r); } else if (cols > 4) { - for (r = 0; r < rows; ++r) { - const uint8x8_t v_src = vld1_u8(&src[0]); - const uint8x8_t v_pred = vld1_u8(&pred[0]); - const uint16x8_t v_diff = vsubl_u8(v_src, v_pred); + do { + const uint8x8_t s = vld1_u8(&src[0]); + const uint8x8_t p = vld1_u8(&pred[0]); + const uint16x8_t v_diff = vsubl_u8(s, p); vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff)); diff += diff_stride; pred += pred_stride; src += src_stride; - } + } while (--r); } else { - for (r = 0; r < rows; ++r) { - for (c = 0; c < cols; ++c) diff[c] = src[c] - pred[c]; - - diff += diff_stride; - pred += pred_stride; - src += src_stride; - } + assert(cols == 4); + do { + const uint8x8_t s = load_unaligned_u8(src, (int)src_stride); + const uint8x8_t p = load_unaligned_u8(pred, (int)pred_stride); + const uint16x8_t d = vsubl_u8(s, p); + vst1_s16(diff + 0 * diff_stride, vreinterpret_s16_u16(vget_low_u16(d))); + vst1_s16(diff + 1 * diff_stride, vreinterpret_s16_u16(vget_high_u16(d))); + diff += 2 * diff_stride; + pred += 2 * pred_stride; + src += 2 * src_stride; + r -= 2; + } while (r); } } diff --git a/media/libvpx/libvpx/vpx_dsp/arm/sum_neon.h b/media/libvpx/libvpx/vpx_dsp/arm/sum_neon.h index d74fe0cde423..9e6833aad3d8 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/sum_neon.h +++ b/media/libvpx/libvpx/vpx_dsp/arm/sum_neon.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_ARM_SUM_NEON_H_ -#define VPX_DSP_ARM_SUM_NEON_H_ +#ifndef VPX_VPX_DSP_ARM_SUM_NEON_H_ +#define VPX_VPX_DSP_ARM_SUM_NEON_H_ #include @@ -30,18 +30,9 @@ static INLINE uint32x2_t horizontal_add_uint16x8(const uint16x8_t a) { vreinterpret_u32_u64(vget_high_u64(c))); } -static INLINE uint32x2_t horizontal_add_long_uint16x8(const uint16x8_t a, - const uint16x8_t b) { - const uint32x4_t c = vpaddlq_u16(a); - const uint32x4_t d = vpadalq_u16(c, b); - const uint64x2_t e = vpaddlq_u32(d); - return vadd_u32(vreinterpret_u32_u64(vget_low_u64(e)), - vreinterpret_u32_u64(vget_high_u64(e))); -} - static INLINE uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) { const uint64x2_t b = vpaddlq_u32(a); return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)), vreinterpret_u32_u64(vget_high_u64(b))); } -#endif // VPX_DSP_ARM_SUM_NEON_H_ +#endif // VPX_VPX_DSP_ARM_SUM_NEON_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/arm/sum_squares_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/sum_squares_neon.c new file mode 100644 index 000000000000..cfefad99388e --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/arm/sum_squares_neon.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include +#include "./vpx_dsp_rtcd.h" + +uint64_t vpx_sum_squares_2d_i16_neon(const int16_t *src, int stride, int size) { + uint64x1_t s2; + + if (size == 4) { + int16x4_t s[4]; + int32x4_t s0; + uint32x2_t s1; + + s[0] = vld1_s16(src + 0 * stride); + s[1] = vld1_s16(src + 1 * stride); + s[2] = vld1_s16(src + 2 * stride); + s[3] = vld1_s16(src + 3 * stride); + s0 = vmull_s16(s[0], s[0]); + s0 = vmlal_s16(s0, s[1], s[1]); + s0 = vmlal_s16(s0, s[2], s[2]); + s0 = vmlal_s16(s0, s[3], s[3]); + s1 = vpadd_u32(vget_low_u32(vreinterpretq_u32_s32(s0)), + vget_high_u32(vreinterpretq_u32_s32(s0))); + s2 = vpaddl_u32(s1); + } else { + int r = size; + uint64x2_t s1 = vdupq_n_u64(0); + + do { + int c = size; + int32x4_t s0 = vdupq_n_s32(0); + const int16_t *src_t = src; + + do { + int16x8_t s[8]; + + s[0] = vld1q_s16(src_t + 0 * stride); + s[1] = vld1q_s16(src_t + 1 * stride); + s[2] = vld1q_s16(src_t + 2 * stride); + s[3] = vld1q_s16(src_t + 3 * stride); + s[4] = vld1q_s16(src_t + 4 * stride); + s[5] = vld1q_s16(src_t + 5 * stride); + s[6] = vld1q_s16(src_t + 6 * stride); + s[7] = vld1q_s16(src_t + 7 * stride); + s0 = vmlal_s16(s0, vget_low_s16(s[0]), vget_low_s16(s[0])); + s0 = vmlal_s16(s0, vget_low_s16(s[1]), vget_low_s16(s[1])); + s0 = vmlal_s16(s0, vget_low_s16(s[2]), vget_low_s16(s[2])); + s0 = vmlal_s16(s0, vget_low_s16(s[3]), vget_low_s16(s[3])); + s0 = vmlal_s16(s0, vget_low_s16(s[4]), vget_low_s16(s[4])); + s0 = vmlal_s16(s0, vget_low_s16(s[5]), vget_low_s16(s[5])); + s0 = vmlal_s16(s0, vget_low_s16(s[6]), vget_low_s16(s[6])); + s0 = vmlal_s16(s0, vget_low_s16(s[7]), vget_low_s16(s[7])); + s0 = vmlal_s16(s0, vget_high_s16(s[0]), vget_high_s16(s[0])); + s0 = vmlal_s16(s0, vget_high_s16(s[1]), vget_high_s16(s[1])); + s0 = vmlal_s16(s0, vget_high_s16(s[2]), vget_high_s16(s[2])); + s0 = vmlal_s16(s0, vget_high_s16(s[3]), vget_high_s16(s[3])); + s0 = vmlal_s16(s0, vget_high_s16(s[4]), vget_high_s16(s[4])); + s0 = vmlal_s16(s0, vget_high_s16(s[5]), vget_high_s16(s[5])); + s0 = vmlal_s16(s0, vget_high_s16(s[6]), vget_high_s16(s[6])); + s0 = vmlal_s16(s0, vget_high_s16(s[7]), vget_high_s16(s[7])); + src_t += 8; + c -= 8; + } while (c); + + s1 = vaddw_u32(s1, vget_low_u32(vreinterpretq_u32_s32(s0))); + s1 = vaddw_u32(s1, vget_high_u32(vreinterpretq_u32_s32(s0))); + src += 8 * stride; + r -= 8; + } while (r); + + s2 = vadd_u64(vget_low_u64(s1), vget_high_u64(s1)); + } + + return vget_lane_u64(s2, 0); +} diff --git a/media/libvpx/libvpx/vpx_dsp/arm/transpose_neon.h b/media/libvpx/libvpx/vpx_dsp/arm/transpose_neon.h index d85cbcee4645..752308160dc5 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/transpose_neon.h +++ b/media/libvpx/libvpx/vpx_dsp/arm/transpose_neon.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_ARM_TRANSPOSE_NEON_H_ -#define VPX_DSP_ARM_TRANSPOSE_NEON_H_ +#ifndef VPX_VPX_DSP_ARM_TRANSPOSE_NEON_H_ +#define VPX_VPX_DSP_ARM_TRANSPOSE_NEON_H_ #include @@ -138,8 +138,8 @@ static INLINE void transpose_s16_4x4q(int16x8_t *a0, int16x8_t *a1) { vtrnq_s32(vreinterpretq_s32_s16(*a0), vreinterpretq_s32_s16(*a1)); // Swap 64 bit elements resulting in: - // c0.val[0]: 00 01 20 21 02 03 22 23 - // c0.val[1]: 10 11 30 31 12 13 32 33 + // c0: 00 01 20 21 02 03 22 23 + // c1: 10 11 30 31 12 13 32 33 const int32x4_t c0 = vcombine_s32(vget_low_s32(b0.val[0]), vget_low_s32(b0.val[1])); @@ -169,8 +169,8 @@ static INLINE void transpose_u16_4x4q(uint16x8_t *a0, uint16x8_t *a1) { vtrnq_u32(vreinterpretq_u32_u16(*a0), vreinterpretq_u32_u16(*a1)); // Swap 64 bit elements resulting in: - // c0.val[0]: 00 01 20 21 02 03 22 23 - // c0.val[1]: 10 11 30 31 12 13 32 33 + // c0: 00 01 20 21 02 03 22 23 + // c1: 10 11 30 31 12 13 32 33 const uint32x4_t c0 = vcombine_u32(vget_low_u32(b0.val[0]), vget_low_u32(b0.val[1])); @@ -1313,4 +1313,4 @@ static INLINE void load_and_transpose_s32_8x8( transpose_s32_8x8(a0, a1, a2, a3, a4, a5, a6, a7); } -#endif // VPX_DSP_ARM_TRANSPOSE_NEON_H_ +#endif // VPX_VPX_DSP_ARM_TRANSPOSE_NEON_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/arm/variance_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/variance_neon.c index 61c2c16a7246..77b1015b742a 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/variance_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/variance_neon.c @@ -27,8 +27,9 @@ // this limit. // Process a block of width 4 four rows at a time. -static void variance_neon_w4x4(const uint8_t *a, int a_stride, const uint8_t *b, - int b_stride, int h, uint32_t *sse, int *sum) { +static void variance_neon_w4x4(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, int h, + uint32_t *sse, int *sum) { int i; int16x8_t sum_s16 = vdupq_n_s16(0); int32x4_t sse_lo_s32 = vdupq_n_s32(0); @@ -38,8 +39,8 @@ static void variance_neon_w4x4(const uint8_t *a, int a_stride, const uint8_t *b, assert(h <= 256); for (i = 0; i < h; i += 4) { - const uint8x16_t a_u8 = load_unaligned_u8q(a, a_stride); - const uint8x16_t b_u8 = load_unaligned_u8q(b, b_stride); + const uint8x16_t a_u8 = load_unaligned_u8q(src_ptr, src_stride); + const uint8x16_t b_u8 = load_unaligned_u8q(ref_ptr, ref_stride); const uint16x8_t diff_lo_u16 = vsubl_u8(vget_low_u8(a_u8), vget_low_u8(b_u8)); const uint16x8_t diff_hi_u16 = @@ -61,8 +62,8 @@ static void variance_neon_w4x4(const uint8_t *a, int a_stride, const uint8_t *b, sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_hi_s16), vget_high_s16(diff_hi_s16)); - a += 4 * a_stride; - b += 4 * b_stride; + src_ptr += 4 * src_stride; + ref_ptr += 4 * ref_stride; } *sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0); @@ -72,9 +73,9 @@ static void variance_neon_w4x4(const uint8_t *a, int a_stride, const uint8_t *b, } // Process a block of any size where the width is divisible by 16. -static void variance_neon_w16(const uint8_t *a, int a_stride, const uint8_t *b, - int b_stride, int w, int h, uint32_t *sse, - int *sum) { +static void variance_neon_w16(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, int w, + int h, uint32_t *sse, int *sum) { int i, j; int16x8_t sum_s16 = vdupq_n_s16(0); int32x4_t sse_lo_s32 = vdupq_n_s32(0); @@ -86,8 +87,8 @@ static void variance_neon_w16(const uint8_t *a, int a_stride, const uint8_t *b, for (i = 0; i < h; ++i) { for (j = 0; j < w; j += 16) { - const uint8x16_t a_u8 = vld1q_u8(a + j); - const uint8x16_t b_u8 = vld1q_u8(b + j); + const uint8x16_t a_u8 = vld1q_u8(src_ptr + j); + const uint8x16_t b_u8 = vld1q_u8(ref_ptr + j); const uint16x8_t diff_lo_u16 = vsubl_u8(vget_low_u8(a_u8), vget_low_u8(b_u8)); @@ -110,8 +111,8 @@ static void variance_neon_w16(const uint8_t *a, int a_stride, const uint8_t *b, sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_hi_s16), vget_high_s16(diff_hi_s16)); } - a += a_stride; - b += b_stride; + src_ptr += src_stride; + ref_ptr += ref_stride; } *sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0); @@ -121,8 +122,9 @@ static void variance_neon_w16(const uint8_t *a, int a_stride, const uint8_t *b, } // Process a block of width 8 two rows at a time. -static void variance_neon_w8x2(const uint8_t *a, int a_stride, const uint8_t *b, - int b_stride, int h, uint32_t *sse, int *sum) { +static void variance_neon_w8x2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, int h, + uint32_t *sse, int *sum) { int i = 0; int16x8_t sum_s16 = vdupq_n_s16(0); int32x4_t sse_lo_s32 = vdupq_n_s32(0); @@ -132,10 +134,10 @@ static void variance_neon_w8x2(const uint8_t *a, int a_stride, const uint8_t *b, assert(h <= 128); do { - const uint8x8_t a_0_u8 = vld1_u8(a); - const uint8x8_t a_1_u8 = vld1_u8(a + a_stride); - const uint8x8_t b_0_u8 = vld1_u8(b); - const uint8x8_t b_1_u8 = vld1_u8(b + b_stride); + const uint8x8_t a_0_u8 = vld1_u8(src_ptr); + const uint8x8_t a_1_u8 = vld1_u8(src_ptr + src_stride); + const uint8x8_t b_0_u8 = vld1_u8(ref_ptr); + const uint8x8_t b_1_u8 = vld1_u8(ref_ptr + ref_stride); const uint16x8_t diff_0_u16 = vsubl_u8(a_0_u8, b_0_u8); const uint16x8_t diff_1_u16 = vsubl_u8(a_1_u8, b_1_u8); const int16x8_t diff_0_s16 = vreinterpretq_s16_u16(diff_0_u16); @@ -150,8 +152,8 @@ static void variance_neon_w8x2(const uint8_t *a, int a_stride, const uint8_t *b, vget_high_s16(diff_0_s16)); sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_1_s16), vget_high_s16(diff_1_s16)); - a += a_stride + a_stride; - b += b_stride + b_stride; + src_ptr += src_stride + src_stride; + ref_ptr += ref_stride + ref_stride; i += 2; } while (i < h); @@ -161,31 +163,36 @@ static void variance_neon_w8x2(const uint8_t *a, int a_stride, const uint8_t *b, 0); } -void vpx_get8x8var_neon(const uint8_t *a, int a_stride, const uint8_t *b, - int b_stride, unsigned int *sse, int *sum) { - variance_neon_w8x2(a, a_stride, b, b_stride, 8, sse, sum); +void vpx_get8x8var_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + variance_neon_w8x2(src_ptr, src_stride, ref_ptr, ref_stride, 8, sse, sum); } -void vpx_get16x16var_neon(const uint8_t *a, int a_stride, const uint8_t *b, - int b_stride, unsigned int *sse, int *sum) { - variance_neon_w16(a, a_stride, b, b_stride, 16, 16, sse, sum); +void vpx_get16x16var_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 16, 16, sse, sum); } -#define varianceNxM(n, m, shift) \ - unsigned int vpx_variance##n##x##m##_neon(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - unsigned int *sse) { \ - int sum; \ - if (n == 4) \ - variance_neon_w4x4(a, a_stride, b, b_stride, m, sse, &sum); \ - else if (n == 8) \ - variance_neon_w8x2(a, a_stride, b, b_stride, m, sse, &sum); \ - else \ - variance_neon_w16(a, a_stride, b, b_stride, n, m, sse, &sum); \ - if (n * m < 16 * 16) \ - return *sse - ((sum * sum) >> shift); \ - else \ - return *sse - (uint32_t)(((int64_t)sum * sum) >> shift); \ +#define varianceNxM(n, m, shift) \ + unsigned int vpx_variance##n##x##m##_neon( \ + const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ + int ref_stride, unsigned int *sse) { \ + int sum; \ + if (n == 4) \ + variance_neon_w4x4(src_ptr, src_stride, ref_ptr, ref_stride, m, sse, \ + &sum); \ + else if (n == 8) \ + variance_neon_w8x2(src_ptr, src_stride, ref_ptr, ref_stride, m, sse, \ + &sum); \ + else \ + variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, n, m, sse, \ + &sum); \ + if (n * m < 16 * 16) \ + return *sse - ((sum * sum) >> shift); \ + else \ + return *sse - (uint32_t)(((int64_t)sum * sum) >> shift); \ } varianceNxM(4, 4, 4); @@ -199,58 +206,66 @@ varianceNxM(16, 32, 9); varianceNxM(32, 16, 9); varianceNxM(32, 32, 10); -unsigned int vpx_variance32x64_neon(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, +unsigned int vpx_variance32x64_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum1, sum2; uint32_t sse1, sse2; - variance_neon_w16(a, a_stride, b, b_stride, 32, 32, &sse1, &sum1); - variance_neon_w16(a + (32 * a_stride), a_stride, b + (32 * b_stride), - b_stride, 32, 32, &sse2, &sum2); + variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32, &sse1, + &sum1); + variance_neon_w16(src_ptr + (32 * src_stride), src_stride, + ref_ptr + (32 * ref_stride), ref_stride, 32, 32, &sse2, + &sum2); *sse = sse1 + sse2; sum1 += sum2; return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 11); } -unsigned int vpx_variance64x32_neon(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, +unsigned int vpx_variance64x32_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum1, sum2; uint32_t sse1, sse2; - variance_neon_w16(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1); - variance_neon_w16(a + (16 * a_stride), a_stride, b + (16 * b_stride), - b_stride, 64, 16, &sse2, &sum2); + variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 64, 16, &sse1, + &sum1); + variance_neon_w16(src_ptr + (16 * src_stride), src_stride, + ref_ptr + (16 * ref_stride), ref_stride, 64, 16, &sse2, + &sum2); *sse = sse1 + sse2; sum1 += sum2; return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 11); } -unsigned int vpx_variance64x64_neon(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, +unsigned int vpx_variance64x64_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum1, sum2; uint32_t sse1, sse2; - variance_neon_w16(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1); - variance_neon_w16(a + (16 * a_stride), a_stride, b + (16 * b_stride), - b_stride, 64, 16, &sse2, &sum2); + variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 64, 16, &sse1, + &sum1); + variance_neon_w16(src_ptr + (16 * src_stride), src_stride, + ref_ptr + (16 * ref_stride), ref_stride, 64, 16, &sse2, + &sum2); sse1 += sse2; sum1 += sum2; - variance_neon_w16(a + (16 * 2 * a_stride), a_stride, b + (16 * 2 * b_stride), - b_stride, 64, 16, &sse2, &sum2); + variance_neon_w16(src_ptr + (16 * 2 * src_stride), src_stride, + ref_ptr + (16 * 2 * ref_stride), ref_stride, 64, 16, &sse2, + &sum2); sse1 += sse2; sum1 += sum2; - variance_neon_w16(a + (16 * 3 * a_stride), a_stride, b + (16 * 3 * b_stride), - b_stride, 64, 16, &sse2, &sum2); + variance_neon_w16(src_ptr + (16 * 3 * src_stride), src_stride, + ref_ptr + (16 * 3 * ref_stride), ref_stride, 64, 16, &sse2, + &sum2); *sse = sse1 + sse2; sum1 += sum2; return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 12); } -unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int source_stride, - const unsigned char *ref_ptr, int recon_stride, +unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, unsigned int *sse) { int i; int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; @@ -267,13 +282,13 @@ unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int source_stride, for (i = 0; i < 8; i++) { // mse16x16_neon_loop q0u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; + src_ptr += src_stride; q1u8 = vld1q_u8(src_ptr); - src_ptr += source_stride; + src_ptr += src_stride; q2u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; + ref_ptr += ref_stride; q3u8 = vld1q_u8(ref_ptr); - ref_ptr += recon_stride; + ref_ptr += ref_stride; q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8)); q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8)); @@ -312,10 +327,9 @@ unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int source_stride, return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0); } -unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, - int source_stride, +unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, - int recon_stride) { + int ref_stride) { int16x4_t d22s16, d24s16, d26s16, d28s16; int64x1_t d0s64; uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; @@ -324,21 +338,21 @@ unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int64x2_t q1s64; d0u8 = vld1_u8(src_ptr); - src_ptr += source_stride; + src_ptr += src_stride; d4u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; + ref_ptr += ref_stride; d1u8 = vld1_u8(src_ptr); - src_ptr += source_stride; + src_ptr += src_stride; d5u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; + ref_ptr += ref_stride; d2u8 = vld1_u8(src_ptr); - src_ptr += source_stride; + src_ptr += src_stride; d6u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; + ref_ptr += ref_stride; d3u8 = vld1_u8(src_ptr); - src_ptr += source_stride; + src_ptr += src_stride; d7u8 = vld1_u8(ref_ptr); - ref_ptr += recon_stride; + ref_ptr += ref_stride; q11u16 = vsubl_u8(d0u8, d4u8); q12u16 = vsubl_u8(d1u8, d5u8); diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_horiz_filter_type1_neon.asm b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_horiz_filter_type1_neon.asm new file mode 100644 index 000000000000..d8e4bcc3a7fb --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_horiz_filter_type1_neon.asm @@ -0,0 +1,438 @@ +; +; Copyright (c) 2018 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; +;**************Variables Vs Registers***************************************** +; r0 => src +; r1 => dst +; r2 => src_stride +; r3 => dst_stride +; r4 => filter_x0 +; r8 => ht +; r10 => wd + + EXPORT |vpx_convolve8_avg_horiz_filter_type1_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +|vpx_convolve8_avg_horiz_filter_type1_neon| PROC + + stmfd sp!, {r4 - r12, r14} ;stack stores the values of + ; the arguments + vpush {d8 - d15} ; stack offset by 64 + mov r4, r1 + mov r1, r2 + mov r2, r4 + +start_loop_count + ldr r4, [sp, #104] ;loads pi1_coeff + ldr r8, [sp, #108] ;loads x0_q4 + add r4, r4, r8, lsl #4 ;r4 = filter[x0_q4] + ldr r8, [sp, #128] ;loads ht + ldr r10, [sp, #124] ;loads wd + vld2.8 {d0, d1}, [r4] ;coeff = vld1_s8(pi1_coeff) + mov r11, #1 + subs r14, r8, #0 ;checks for ht == 0 + vabs.s8 d2, d0 ;vabs_s8(coeff) + vdup.8 d24, d2[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, + ; 0) + sub r12, r0, #3 ;pu1_src - 3 + vdup.8 d25, d2[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, + ; 1) + add r4, r12, r2 ;pu1_src_tmp2_8 = pu1_src + src_strd + vdup.8 d26, d2[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, + ; 2) + rsb r9, r10, r2, lsl #1 ;2*src_strd - wd + vdup.8 d27, d2[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, + ; 3) + rsb r8, r10, r3, lsl #1 ;2*dst_strd - wd + vdup.8 d28, d2[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, + ; 4) + vdup.8 d29, d2[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, + ; 5) + vdup.8 d30, d2[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, + ; 6) + vdup.8 d31, d2[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, + ; 7) + mov r7, r1 + cmp r10, #4 + ble outer_loop_4 + + cmp r10, #24 + moveq r10, #16 + addeq r8, #8 + addeq r9, #8 + cmp r10, #16 + bge outer_loop_16 + + cmp r10, #12 + addeq r8, #4 + addeq r9, #4 + b outer_loop_8 + +outer_loop8_residual + sub r12, r0, #3 ;pu1_src - 3 + mov r1, r7 + mov r14, #32 + add r1, #16 + add r12, #16 + mov r10, #8 + add r8, #8 + add r9, #8 + +outer_loop_8 + add r6, r1, r3 ;pu1_dst + dst_strd + add r4, r12, r2 ;pu1_src + src_strd + subs r5, r10, #0 ;checks wd + ble end_inner_loop_8 + +inner_loop_8 + mov r7, #0xc000 + vld1.u32 {d0}, [r12], r11 ;vector load pu1_src + vdup.16 q4, r7 + vld1.u32 {d1}, [r12], r11 + vdup.16 q5, r7 + vld1.u32 {d2}, [r12], r11 + vld1.u32 {d3}, [r12], r11 + mov r7, #0x4000 + vld1.u32 {d4}, [r12], r11 + vmlsl.u8 q4, d1, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u32 {d5}, [r12], r11 + vmlal.u8 q4, d3, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + vld1.u32 {d6}, [r12], r11 + vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vld1.u32 {d7}, [r12], r11 + vmlal.u8 q4, d2, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd + vmlal.u8 q4, d4, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vld1.u32 {d13}, [r4], r11 + vmlal.u8 q4, d5, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vld1.u32 {d14}, [r4], r11 + vmlsl.u8 q4, d6, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vld1.u32 {d15}, [r4], r11 + vmlsl.u8 q4, d7, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + vld1.u32 {d16}, [r4], r11 ;vector load pu1_src + src_strd + vdup.16 q11, r7 + vmlal.u8 q5, d15, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + vld1.u32 {d17}, [r4], r11 + vmlal.u8 q5, d14, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vhadd.s16 q4, q4, q11 + vld1.u32 {d18}, [r4], r11 + vmlal.u8 q5, d16, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vld1.u32 {d19}, [r4], r11 ;vector load pu1_src + src_strd + vmlal.u8 q5, d17, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vld1.u8 {d6}, [r1] + vqrshrun.s16 d20, q4, #6 ;right shift and saturating narrow + ; result 1 + vmlsl.u8 q5, d18, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vmlsl.u8 q5, d19, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + vld1.u8 {d7}, [r6] + vrhadd.u8 d20, d20, d6 + vmlsl.u8 q5, d12, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vmlsl.u8 q5, d13, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vst1.8 {d20}, [r1]! ;store the result pu1_dst + vhadd.s16 q5, q5, q11 + subs r5, r5, #8 ;decrement the wd loop + vqrshrun.s16 d8, q5, #6 ;right shift and saturating narrow + ; result 2 + vrhadd.u8 d8, d8, d7 + vst1.8 {d8}, [r6]! ;store the result pu1_dst + cmp r5, #4 + bgt inner_loop_8 + +end_inner_loop_8 + subs r14, r14, #2 ;decrement the ht loop + add r12, r12, r9 ;increment the src pointer by + ; 2*src_strd-wd + add r1, r1, r8 ;increment the dst pointer by + ; 2*dst_strd-wd + bgt outer_loop_8 + + ldr r10, [sp, #120] ;loads wd + cmp r10, #12 + beq outer_loop4_residual + +end_loops + b end_func + +outer_loop_16 + str r0, [sp, #-4]! + str r7, [sp, #-4]! + add r6, r1, r3 ;pu1_dst + dst_strd + add r4, r12, r2 ;pu1_src + src_strd + and r0, r12, #31 + mov r7, #0xc000 + sub r5, r10, #0 ;checks wd + pld [r4, r2, lsl #1] + pld [r12, r2, lsl #1] + vld1.u32 {q0}, [r12], r11 ;vector load pu1_src + vdup.16 q4, r7 + vld1.u32 {q1}, [r12], r11 + vld1.u32 {q2}, [r12], r11 + vld1.u32 {q3}, [r12], r11 + vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vld1.u32 {q6}, [r12], r11 + vmlsl.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u32 {q7}, [r12], r11 + vmlal.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vld1.u32 {q8}, [r12], r11 + vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + vld1.u32 {q9}, [r12], r11 + vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vmlal.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vdup.16 q10, r7 + vmlsl.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + +inner_loop_16 + vmlsl.u8 q10, d1, d24 + vdup.16 q5, r7 + vmlsl.u8 q10, d3, d25 + mov r7, #0x4000 + vdup.16 q11, r7 + vmlal.u8 q10, d5, d26 + vld1.u32 {q0}, [r4], r11 ;vector load pu1_src + vhadd.s16 q4, q4, q11 + vld1.u32 {q1}, [r4], r11 + vmlal.u8 q10, d7, d27 + add r12, #8 + subs r5, r5, #16 + vmlal.u8 q10, d13, d28 + vld1.u32 {q2}, [r4], r11 + vmlal.u8 q10, d15, d29 + vld1.u32 {q3}, [r4], r11 + vqrshrun.s16 d8, q4, #6 ;right shift and saturating narrow + ; result 1 + vmlsl.u8 q10, d17, d30 + vld1.u32 {q6}, [r4], r11 + vmlsl.u8 q10, d19, d31 + vld1.u32 {q7}, [r4], r11 + add r7, r1, #8 + vmlsl.u8 q5, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vmlsl.u8 q5, d2, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u32 {q8}, [r4], r11 + vhadd.s16 q10, q10, q11 + vld1.u32 {q9}, [r4], r11 + vld1.u8 {d0}, [r1] + vmlal.u8 q5, d4, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vld1.u8 {d2}, [r7] + vmlal.u8 q5, d6, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + add r4, #8 + mov r7, #0xc000 + vmlal.u8 q5, d12, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vmlal.u8 q5, d14, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vqrshrun.s16 d9, q10, #6 + vdup.16 q11, r7 + vmlsl.u8 q5, d16, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vmlsl.u8 q5, d18, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + mov r7, #0x4000 + vrhadd.u8 d8, d8, d0 + vrhadd.u8 d9, d9, d2 + vmlsl.u8 q11, d1, d24 + vmlsl.u8 q11, d3, d25 + vdup.16 q10, r7 + vmlal.u8 q11, d5, d26 + pld [r12, r2, lsl #2] + pld [r4, r2, lsl #2] + addeq r12, r12, r9 ;increment the src pointer by + ; 2*src_strd-wd + addeq r4, r12, r2 ;pu1_src + src_strd + vmlal.u8 q11, d7, d27 + vmlal.u8 q11, d13, d28 + vst1.8 {q4}, [r1]! ;store the result pu1_dst + subeq r14, r14, #2 + vhadd.s16 q5, q5, q10 + vmlal.u8 q11, d15, d29 + addeq r1, r1, r8 + vmlsl.u8 q11, d17, d30 + cmp r14, #0 + vmlsl.u8 q11, d19, d31 + vqrshrun.s16 d10, q5, #6 ;right shift and saturating narrow + ; result 2 + beq epilog_16 + + vld1.u32 {q0}, [r12], r11 ;vector load pu1_src + mov r7, #0xc000 + cmp r5, #0 + vld1.u32 {q1}, [r12], r11 + vhadd.s16 q11, q11, q10 + vld1.u32 {q2}, [r12], r11 + vdup.16 q4, r7 + vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vdup.16 q10, r7 + vld1.u32 {q3}, [r12], r11 + add r7, r6, #8 + moveq r5, r10 + vld1.u8 {d0}, [r6] + vmlsl.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u8 {d2}, [r7] + vqrshrun.s16 d11, q11, #6 + vmlal.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vld1.u32 {q6}, [r12], r11 + vrhadd.u8 d10, d10, d0 + vld1.u32 {q7}, [r12], r11 + vrhadd.u8 d11, d11, d2 + vld1.u32 {q8}, [r12], r11 + vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + vld1.u32 {q9}, [r12], r11 + vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vmlal.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + mov r7, #0xc000 + vmlsl.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vst1.8 {q5}, [r6]! ;store the result pu1_dst + vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + addeq r6, r1, r3 ;pu1_dst + dst_strd + b inner_loop_16 + +epilog_16 + mov r7, #0x4000 + ldr r0, [sp], #4 + ldr r10, [sp, #120] + vdup.16 q10, r7 + vhadd.s16 q11, q11, q10 + vqrshrun.s16 d11, q11, #6 + add r7, r6, #8 + vld1.u8 {d20}, [r6] + vld1.u8 {d21}, [r7] + vrhadd.u8 d10, d10, d20 + vrhadd.u8 d11, d11, d21 + vst1.8 {q5}, [r6]! ;store the result pu1_dst + ldr r7, [sp], #4 + cmp r10, #24 + beq outer_loop8_residual + +end_loops1 + b end_func + +outer_loop4_residual + sub r12, r0, #3 ;pu1_src - 3 + mov r1, r7 + add r1, #8 + mov r10, #4 + add r12, #8 + mov r14, #16 + add r8, #4 + add r9, #4 + +outer_loop_4 + add r6, r1, r3 ;pu1_dst + dst_strd + add r4, r12, r2 ;pu1_src + src_strd + subs r5, r10, #0 ;checks wd + ble end_inner_loop_4 + +inner_loop_4 + vld1.u32 {d0}, [r12], r11 ;vector load pu1_src + vld1.u32 {d1}, [r12], r11 + vld1.u32 {d2}, [r12], r11 + vld1.u32 {d3}, [r12], r11 + vld1.u32 {d4}, [r12], r11 + vld1.u32 {d5}, [r12], r11 + vld1.u32 {d6}, [r12], r11 + vld1.u32 {d7}, [r12], r11 + sub r12, r12, #4 + vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd + vld1.u32 {d13}, [r4], r11 + vzip.32 d0, d12 ;vector zip the i iteration and ii + ; interation in single register + vld1.u32 {d14}, [r4], r11 + vzip.32 d1, d13 + vld1.u32 {d15}, [r4], r11 + vzip.32 d2, d14 + vld1.u32 {d16}, [r4], r11 + vzip.32 d3, d15 + vld1.u32 {d17}, [r4], r11 + vzip.32 d4, d16 + vld1.u32 {d18}, [r4], r11 + vzip.32 d5, d17 + vld1.u32 {d19}, [r4], r11 + mov r7, #0xc000 + vdup.16 q4, r7 + sub r4, r4, #4 + vzip.32 d6, d18 + vzip.32 d7, d19 + vmlsl.u8 q4, d1, d25 ;arithmetic operations for ii + ; iteration in the same time + vmlsl.u8 q4, d0, d24 + vmlal.u8 q4, d2, d26 + vmlal.u8 q4, d3, d27 + vmlal.u8 q4, d4, d28 + vmlal.u8 q4, d5, d29 + vmlsl.u8 q4, d6, d30 + vmlsl.u8 q4, d7, d31 + mov r7, #0x4000 + vdup.16 q10, r7 + vhadd.s16 q4, q4, q10 + vqrshrun.s16 d8, q4, #6 + vld1.u32 {d10[0]}, [r1] + vld1.u32 {d10[1]}, [r6] + vrhadd.u8 d8, d8, d10 + vst1.32 {d8[0]},[r1]! ;store the i iteration result which + ; is in upper part of the register + vst1.32 {d8[1]},[r6]! ;store the ii iteration result which + ; is in lower part of the register + subs r5, r5, #4 ;decrement the wd by 4 + bgt inner_loop_4 + +end_inner_loop_4 + subs r14, r14, #2 ;decrement the ht by 4 + add r12, r12, r9 ;increment the input pointer + ; 2*src_strd-wd + add r1, r1, r8 ;increment the output pointer + ; 2*dst_strd-wd + bgt outer_loop_4 + +end_func + vpop {d8 - d15} + ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp + + ENDP + + END diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_horiz_filter_type2_neon.asm b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_horiz_filter_type2_neon.asm new file mode 100644 index 000000000000..7a77747fec69 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_horiz_filter_type2_neon.asm @@ -0,0 +1,439 @@ +; +; Copyright (c) 2018 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; +;**************Variables Vs Registers*********************************** +; r0 => src +; r1 => dst +; r2 => src_stride +; r3 => dst_stride +; r4 => filter_x0 +; r8 => ht +; r10 => wd + + EXPORT |vpx_convolve8_avg_horiz_filter_type2_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +|vpx_convolve8_avg_horiz_filter_type2_neon| PROC + + stmfd sp!, {r4 - r12, r14} ;stack stores the values of + ; the arguments + vpush {d8 - d15} ; stack offset by 64 + mov r4, r1 + mov r1, r2 + mov r2, r4 + +start_loop_count + ldr r4, [sp, #104] ;loads pi1_coeff + ldr r8, [sp, #108] ;loads x0_q4 + add r4, r4, r8, lsl #4 ;r4 = filter[x0_q4] + ldr r8, [sp, #128] ;loads ht + ldr r10, [sp, #124] ;loads wd + vld2.8 {d0, d1}, [r4] ;coeff = vld1_s8(pi1_coeff) + mov r11, #1 + subs r14, r8, #0 ;checks for ht == 0 + vabs.s8 d2, d0 ;vabs_s8(coeff) + vdup.8 d24, d2[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, + ; 0) + sub r12, r0, #3 ;pu1_src - 3 + vdup.8 d25, d2[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, + ; 1) + add r4, r12, r2 ;pu1_src_tmp2_8 = pu1_src + src_strd + vdup.8 d26, d2[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, + ; 2) + rsb r9, r10, r2, lsl #1 ;2*src_strd - wd + vdup.8 d27, d2[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, + ; 3) + rsb r8, r10, r3, lsl #1 ;2*dst_strd - wd + vdup.8 d28, d2[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, + ; 4) + vdup.8 d29, d2[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, + ; 5) + vdup.8 d30, d2[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, + ; 6) + vdup.8 d31, d2[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, + ; 7) + mov r7, r1 + cmp r10, #4 + ble outer_loop_4 + + cmp r10, #24 + moveq r10, #16 + addeq r8, #8 + addeq r9, #8 + cmp r10, #16 + bge outer_loop_16 + + cmp r10, #12 + addeq r8, #4 + addeq r9, #4 + b outer_loop_8 + +outer_loop8_residual + sub r12, r0, #3 ;pu1_src - 3 + mov r1, r7 + mov r14, #32 + add r1, #16 + add r12, #16 + mov r10, #8 + add r8, #8 + add r9, #8 + +outer_loop_8 + + add r6, r1, r3 ;pu1_dst + dst_strd + add r4, r12, r2 ;pu1_src + src_strd + subs r5, r10, #0 ;checks wd + ble end_inner_loop_8 + +inner_loop_8 + mov r7, #0xc000 + vld1.u32 {d0}, [r12], r11 ;vector load pu1_src + vdup.16 q4, r7 + vld1.u32 {d1}, [r12], r11 + vdup.16 q5, r7 + vld1.u32 {d2}, [r12], r11 + vld1.u32 {d3}, [r12], r11 + mov r7, #0x4000 + vld1.u32 {d4}, [r12], r11 + vmlal.u8 q4, d1, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u32 {d5}, [r12], r11 + vmlal.u8 q4, d3, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + vld1.u32 {d6}, [r12], r11 + vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vld1.u32 {d7}, [r12], r11 + vmlsl.u8 q4, d2, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd + vmlal.u8 q4, d4, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vld1.u32 {d13}, [r4], r11 + vmlsl.u8 q4, d5, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vld1.u32 {d14}, [r4], r11 + vmlal.u8 q4, d6, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vld1.u32 {d15}, [r4], r11 + vmlsl.u8 q4, d7, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + vld1.u32 {d16}, [r4], r11 ;vector load pu1_src + src_strd + vdup.16 q11, r7 + vmlal.u8 q5, d15, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + vld1.u32 {d17}, [r4], r11 + vmlsl.u8 q5, d14, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vhadd.s16 q4, q4, q11 + vld1.u32 {d18}, [r4], r11 + vmlal.u8 q5, d16, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vld1.u32 {d19}, [r4], r11 ;vector load pu1_src + src_strd + vmlsl.u8 q5, d17, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vld1.u8 {d6}, [r1] + vqrshrun.s16 d20, q4, #6 ;right shift and saturating narrow + ; result 1 + vmlal.u8 q5, d18, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vmlsl.u8 q5, d19, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + vld1.u8 {d7}, [r6] + vrhadd.u8 d20, d20, d6 + vmlsl.u8 q5, d12, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vmlal.u8 q5, d13, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vst1.8 {d20}, [r1]! ;store the result pu1_dst + vhadd.s16 q5, q5, q11 + subs r5, r5, #8 ;decrement the wd loop + vqrshrun.s16 d8, q5, #6 ;right shift and saturating narrow + ; result 2 + vrhadd.u8 d8, d8, d7 + vst1.8 {d8}, [r6]! ;store the result pu1_dst + cmp r5, #4 + bgt inner_loop_8 + +end_inner_loop_8 + subs r14, r14, #2 ;decrement the ht loop + add r12, r12, r9 ;increment the src pointer by + ; 2*src_strd-wd + add r1, r1, r8 ;increment the dst pointer by + ; 2*dst_strd-wd + bgt outer_loop_8 + + ldr r10, [sp, #120] ;loads wd + cmp r10, #12 + beq outer_loop4_residual + +end_loops + b end_func + +outer_loop_16 + str r0, [sp, #-4]! + str r7, [sp, #-4]! + add r6, r1, r3 ;pu1_dst + dst_strd + add r4, r12, r2 ;pu1_src + src_strd + and r0, r12, #31 + mov r7, #0xc000 + sub r5, r10, #0 ;checks wd + pld [r4, r2, lsl #1] + pld [r12, r2, lsl #1] + vld1.u32 {q0}, [r12], r11 ;vector load pu1_src + vdup.16 q4, r7 + vld1.u32 {q1}, [r12], r11 + vld1.u32 {q2}, [r12], r11 + vld1.u32 {q3}, [r12], r11 + vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vld1.u32 {q6}, [r12], r11 + vmlal.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u32 {q7}, [r12], r11 + vmlsl.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vld1.u32 {q8}, [r12], r11 + vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + vld1.u32 {q9}, [r12], r11 + vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vmlsl.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vdup.16 q10, r7 + vmlal.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + +inner_loop_16 + vmlsl.u8 q10, d1, d24 + vdup.16 q5, r7 + vmlal.u8 q10, d3, d25 + mov r7, #0x4000 + vdup.16 q11, r7 + vmlsl.u8 q10, d5, d26 + vld1.u32 {q0}, [r4], r11 ;vector load pu1_src + vhadd.s16 q4, q4, q11 + vld1.u32 {q1}, [r4], r11 + vmlal.u8 q10, d7, d27 + add r12, #8 + subs r5, r5, #16 + vmlal.u8 q10, d13, d28 + vld1.u32 {q2}, [r4], r11 + vmlsl.u8 q10, d15, d29 + vld1.u32 {q3}, [r4], r11 + vqrshrun.s16 d8, q4, #6 ;right shift and saturating narrow + ; result 1 + vmlal.u8 q10, d17, d30 + vld1.u32 {q6}, [r4], r11 + vmlsl.u8 q10, d19, d31 + vld1.u32 {q7}, [r4], r11 + add r7, r1, #8 + vmlsl.u8 q5, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vmlal.u8 q5, d2, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u32 {q8}, [r4], r11 + vhadd.s16 q10, q10, q11 + vld1.u32 {q9}, [r4], r11 + vld1.u8 {d0}, [r1] + vmlsl.u8 q5, d4, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vld1.u8 {d2}, [r7] + vmlal.u8 q5, d6, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + add r4, #8 + mov r7, #0xc000 + vmlal.u8 q5, d12, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vmlsl.u8 q5, d14, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vqrshrun.s16 d9, q10, #6 + vdup.16 q11, r7 + vmlal.u8 q5, d16, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vmlsl.u8 q5, d18, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + mov r7, #0x4000 + vrhadd.u8 d8, d8, d0 + vrhadd.u8 d9, d9, d2 + vmlsl.u8 q11, d1, d24 + vmlal.u8 q11, d3, d25 + vdup.16 q10, r7 + vmlsl.u8 q11, d5, d26 + pld [r12, r2, lsl #2] + pld [r4, r2, lsl #2] + addeq r12, r12, r9 ;increment the src pointer by + ; 2*src_strd-wd + addeq r4, r12, r2 ;pu1_src + src_strd + vmlal.u8 q11, d7, d27 + vmlal.u8 q11, d13, d28 + vst1.8 {q4}, [r1]! ;store the result pu1_dst + subeq r14, r14, #2 + vhadd.s16 q5, q5, q10 + vmlsl.u8 q11, d15, d29 + addeq r1, r1, r8 + vmlal.u8 q11, d17, d30 + cmp r14, #0 + vmlsl.u8 q11, d19, d31 + vqrshrun.s16 d10, q5, #6 ;right shift and saturating narrow + ; result 2 + beq epilog_16 + + vld1.u32 {q0}, [r12], r11 ;vector load pu1_src + mov r7, #0xc000 + cmp r5, #0 + vld1.u32 {q1}, [r12], r11 + vhadd.s16 q11, q11, q10 + vld1.u32 {q2}, [r12], r11 + vdup.16 q4, r7 + vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vdup.16 q10, r7 + vld1.u32 {q3}, [r12], r11 + add r7, r6, #8 + moveq r5, r10 + vld1.u8 {d0}, [r6] + vmlal.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u8 {d2}, [r7] + vqrshrun.s16 d11, q11, #6 + vmlsl.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vld1.u32 {q6}, [r12], r11 + vrhadd.u8 d10, d10, d0 + vld1.u32 {q7}, [r12], r11 + vrhadd.u8 d11, d11, d2 + vld1.u32 {q8}, [r12], r11 + vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + vld1.u32 {q9}, [r12], r11 + vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vmlsl.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + mov r7, #0xc000 + vmlal.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vst1.8 {q5}, [r6]! ;store the result pu1_dst + vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + addeq r6, r1, r3 ;pu1_dst + dst_strd + b inner_loop_16 + +epilog_16 + mov r7, #0x4000 + ldr r0, [sp], #4 + ldr r10, [sp, #120] + vdup.16 q10, r7 + vhadd.s16 q11, q11, q10 + vqrshrun.s16 d11, q11, #6 + add r7, r6, #8 + vld1.u8 {d20}, [r6] + vld1.u8 {d21}, [r7] + vrhadd.u8 d10, d10, d20 + vrhadd.u8 d11, d11, d21 + vst1.8 {q5}, [r6]! ;store the result pu1_dst + ldr r7, [sp], #4 + cmp r10, #24 + beq outer_loop8_residual + +end_loops1 + b end_func + +outer_loop4_residual + sub r12, r0, #3 ;pu1_src - 3 + mov r1, r7 + add r1, #8 + mov r10, #4 + add r12, #8 + mov r14, #16 + add r8, #4 + add r9, #4 + +outer_loop_4 + add r6, r1, r3 ;pu1_dst + dst_strd + add r4, r12, r2 ;pu1_src + src_strd + subs r5, r10, #0 ;checks wd + ble end_inner_loop_4 + +inner_loop_4 + vld1.u32 {d0}, [r12], r11 ;vector load pu1_src + vld1.u32 {d1}, [r12], r11 + vld1.u32 {d2}, [r12], r11 + vld1.u32 {d3}, [r12], r11 + vld1.u32 {d4}, [r12], r11 + vld1.u32 {d5}, [r12], r11 + vld1.u32 {d6}, [r12], r11 + vld1.u32 {d7}, [r12], r11 + sub r12, r12, #4 + vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd + vld1.u32 {d13}, [r4], r11 + vzip.32 d0, d12 ;vector zip the i iteration and ii + ; interation in single register + vld1.u32 {d14}, [r4], r11 + vzip.32 d1, d13 + vld1.u32 {d15}, [r4], r11 + vzip.32 d2, d14 + vld1.u32 {d16}, [r4], r11 + vzip.32 d3, d15 + vld1.u32 {d17}, [r4], r11 + vzip.32 d4, d16 + vld1.u32 {d18}, [r4], r11 + vzip.32 d5, d17 + vld1.u32 {d19}, [r4], r11 + mov r7, #0xc000 + vdup.16 q4, r7 + sub r4, r4, #4 + vzip.32 d6, d18 + vzip.32 d7, d19 + vmlal.u8 q4, d1, d25 ;arithmetic operations for ii + ; iteration in the same time + vmlsl.u8 q4, d0, d24 + vmlsl.u8 q4, d2, d26 + vmlal.u8 q4, d3, d27 + vmlal.u8 q4, d4, d28 + vmlsl.u8 q4, d5, d29 + vmlal.u8 q4, d6, d30 + vmlsl.u8 q4, d7, d31 + mov r7, #0x4000 + vdup.16 q10, r7 + vhadd.s16 q4, q4, q10 + vqrshrun.s16 d8, q4, #6 + vld1.u32 {d10[0]}, [r1] + vld1.u32 {d10[1]}, [r6] + vrhadd.u8 d8, d8, d10 + vst1.32 {d8[0]},[r1]! ;store the i iteration result which + ; is in upper part of the register + vst1.32 {d8[1]},[r6]! ;store the ii iteration result which + ; is in lower part of the register + subs r5, r5, #4 ;decrement the wd by 4 + bgt inner_loop_4 + +end_inner_loop_4 + subs r14, r14, #2 ;decrement the ht by 4 + add r12, r12, r9 ;increment the input pointer + ; 2*src_strd-wd + add r1, r1, r8 ;increment the output pointer + ; 2*dst_strd-wd + bgt outer_loop_4 + +end_func + vpop {d8 - d15} + ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp + + ENDP + + END diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm deleted file mode 100644 index 1c2ee5063069..000000000000 --- a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm +++ /dev/null @@ -1,295 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - ; These functions are only valid when: - ; x_step_q4 == 16 - ; w%4 == 0 - ; h%4 == 0 - ; taps == 8 - ; VP9_FILTER_WEIGHT == 128 - ; VP9_FILTER_SHIFT == 7 - - EXPORT |vpx_convolve8_avg_horiz_neon| - EXPORT |vpx_convolve8_avg_vert_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - - ; Multiply and accumulate by q0 - MACRO - MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7 - vmull.s16 $dst, $src0, d0[0] - vmlal.s16 $dst, $src1, d0[1] - vmlal.s16 $dst, $src2, d0[2] - vmlal.s16 $dst, $src3, d0[3] - vmlal.s16 $dst, $src4, d1[0] - vmlal.s16 $dst, $src5, d1[1] - vmlal.s16 $dst, $src6, d1[2] - vmlal.s16 $dst, $src7, d1[3] - MEND - -; r0 const uint8_t *src -; r1 int src_stride -; r2 uint8_t *dst -; r3 int dst_stride -; sp[]const int16_t *filter -; sp[]int x0_q4 -; sp[]int x_step_q4 ; unused -; sp[]int y0_q4 -; sp[]int y_step_q4 ; unused -; sp[]int w -; sp[]int h - -|vpx_convolve8_avg_horiz_neon| PROC - push {r4-r10, lr} - - sub r0, r0, #3 ; adjust for taps - - ldrd r4, r5, [sp, #32] ; filter, x0_q4 - add r4, r5, lsl #4 - ldrd r6, r7, [sp, #52] ; w, h - - vld1.s16 {q0}, [r4] ; filter - - sub r8, r1, r1, lsl #2 ; -src_stride * 3 - add r8, r8, #4 ; -src_stride * 3 + 4 - - sub r4, r3, r3, lsl #2 ; -dst_stride * 3 - add r4, r4, #4 ; -dst_stride * 3 + 4 - - rsb r9, r6, r1, lsl #2 ; reset src for outer loop - sub r9, r9, #7 - rsb r12, r6, r3, lsl #2 ; reset dst for outer loop - - mov r10, r6 ; w loop counter - -vpx_convolve8_avg_loop_horiz_v - vld1.8 {d24}, [r0], r1 - vld1.8 {d25}, [r0], r1 - vld1.8 {d26}, [r0], r1 - vld1.8 {d27}, [r0], r8 - - vtrn.16 q12, q13 - vtrn.8 d24, d25 - vtrn.8 d26, d27 - - pld [r0, r1, lsl #2] - - vmovl.u8 q8, d24 - vmovl.u8 q9, d25 - vmovl.u8 q10, d26 - vmovl.u8 q11, d27 - - ; save a few instructions in the inner loop - vswp d17, d18 - vmov d23, d21 - - add r0, r0, #3 - -vpx_convolve8_avg_loop_horiz - add r5, r0, #64 - - vld1.32 {d28[]}, [r0], r1 - vld1.32 {d29[]}, [r0], r1 - vld1.32 {d31[]}, [r0], r1 - vld1.32 {d30[]}, [r0], r8 - - pld [r5] - - vtrn.16 d28, d31 - vtrn.16 d29, d30 - vtrn.8 d28, d29 - vtrn.8 d31, d30 - - pld [r5, r1] - - ; extract to s16 - vtrn.32 q14, q15 - vmovl.u8 q12, d28 - vmovl.u8 q13, d29 - - pld [r5, r1, lsl #1] - - ; slightly out of order load to match the existing data - vld1.u32 {d6[0]}, [r2], r3 - vld1.u32 {d7[0]}, [r2], r3 - vld1.u32 {d6[1]}, [r2], r3 - vld1.u32 {d7[1]}, [r2], r3 - - sub r2, r2, r3, lsl #2 ; reset for store - - ; src[] * filter - MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24 - MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26 - MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27 - MULTIPLY_BY_Q0 q15, d22, d18, d19, d23, d24, d26, d27, d25 - - pld [r5, -r8] - - ; += 64 >> 7 - vqrshrun.s32 d2, q1, #7 - vqrshrun.s32 d3, q2, #7 - vqrshrun.s32 d4, q14, #7 - vqrshrun.s32 d5, q15, #7 - - ; saturate - vqmovn.u16 d2, q1 - vqmovn.u16 d3, q2 - - ; transpose - vtrn.16 d2, d3 - vtrn.32 d2, d3 - vtrn.8 d2, d3 - - ; average the new value and the dst value - vrhadd.u8 q1, q1, q3 - - vst1.u32 {d2[0]}, [r2@32], r3 - vst1.u32 {d3[0]}, [r2@32], r3 - vst1.u32 {d2[1]}, [r2@32], r3 - vst1.u32 {d3[1]}, [r2@32], r4 - - vmov q8, q9 - vmov d20, d23 - vmov q11, q12 - vmov q9, q13 - - subs r6, r6, #4 ; w -= 4 - bgt vpx_convolve8_avg_loop_horiz - - ; outer loop - mov r6, r10 ; restore w counter - add r0, r0, r9 ; src += src_stride * 4 - w - add r2, r2, r12 ; dst += dst_stride * 4 - w - subs r7, r7, #4 ; h -= 4 - bgt vpx_convolve8_avg_loop_horiz_v - - pop {r4-r10, pc} - - ENDP - -|vpx_convolve8_avg_vert_neon| PROC - push {r4-r8, lr} - - ; adjust for taps - sub r0, r0, r1 - sub r0, r0, r1, lsl #1 - - ldr r4, [sp, #24] ; filter - ldr r5, [sp, #36] ; y0_q4 - add r4, r5, lsl #4 - ldr r6, [sp, #44] ; w - ldr lr, [sp, #48] ; h - - vld1.s16 {q0}, [r4] ; filter - - lsl r1, r1, #1 - lsl r3, r3, #1 - -vpx_convolve8_avg_loop_vert_h - mov r4, r0 - add r7, r0, r1, asr #1 - mov r5, r2 - add r8, r2, r3, asr #1 - mov r12, lr ; h loop counter - - vld1.u32 {d16[0]}, [r4], r1 - vld1.u32 {d16[1]}, [r7], r1 - vld1.u32 {d18[0]}, [r4], r1 - vld1.u32 {d18[1]}, [r7], r1 - vld1.u32 {d20[0]}, [r4], r1 - vld1.u32 {d20[1]}, [r7], r1 - vld1.u32 {d22[0]}, [r4], r1 - - vmovl.u8 q8, d16 - vmovl.u8 q9, d18 - vmovl.u8 q10, d20 - vmovl.u8 q11, d22 - -vpx_convolve8_avg_loop_vert - ; always process a 4x4 block at a time - vld1.u32 {d24[0]}, [r7], r1 - vld1.u32 {d26[0]}, [r4], r1 - vld1.u32 {d26[1]}, [r7], r1 - vld1.u32 {d24[1]}, [r4], r1 - - ; extract to s16 - vmovl.u8 q12, d24 - vmovl.u8 q13, d26 - - vld1.u32 {d6[0]}, [r5@32], r3 - vld1.u32 {d6[1]}, [r8@32], r3 - vld1.u32 {d7[0]}, [r5@32], r3 - vld1.u32 {d7[1]}, [r8@32], r3 - - pld [r7] - pld [r4] - - ; src[] * filter - MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24 - - pld [r7, r1] - pld [r4, r1] - - MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d24, d26 - - pld [r5] - pld [r8] - - MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d24, d26, d27 - - pld [r5, r3] - pld [r8, r3] - - MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d24, d26, d27, d25 - - ; += 64 >> 7 - vqrshrun.s32 d2, q1, #7 - vqrshrun.s32 d3, q2, #7 - vqrshrun.s32 d4, q14, #7 - vqrshrun.s32 d5, q15, #7 - - ; saturate - vqmovn.u16 d2, q1 - vqmovn.u16 d3, q2 - - ; average the new value and the dst value - vrhadd.u8 q1, q1, q3 - - sub r5, r5, r3, lsl #1 ; reset for store - sub r8, r8, r3, lsl #1 - - vst1.u32 {d2[0]}, [r5@32], r3 - vst1.u32 {d2[1]}, [r8@32], r3 - vst1.u32 {d3[0]}, [r5@32], r3 - vst1.u32 {d3[1]}, [r8@32], r3 - - vmov q8, q10 - vmov d18, d22 - vmov d19, d24 - vmov q10, q13 - vmov d22, d25 - - subs r12, r12, #4 ; h -= 4 - bgt vpx_convolve8_avg_loop_vert - - ; outer loop - add r0, r0, #4 - add r2, r2, #4 - subs r6, r6, #4 ; w -= 4 - bgt vpx_convolve8_avg_loop_vert_h - - pop {r4-r8, pc} - - ENDP - END diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_vert_filter_type1_neon.asm b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_vert_filter_type1_neon.asm new file mode 100644 index 000000000000..d310a83dad80 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_vert_filter_type1_neon.asm @@ -0,0 +1,486 @@ +; +; Copyright (c) 2018 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; +;**************Variables Vs Registers*********************************** +; r0 => src +; r1 => dst +; r2 => src_stride +; r6 => dst_stride +; r12 => filter_y0 +; r5 => ht +; r3 => wd + + EXPORT |vpx_convolve8_avg_vert_filter_type1_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +|vpx_convolve8_avg_vert_filter_type1_neon| PROC + + stmfd sp!, {r4 - r12, r14} ;stack stores the values of + ; the arguments + vpush {d8 - d15} ; stack offset by 64 + mov r4, r1 + mov r1, r2 + mov r2, r4 + vmov.i16 q15, #0x4000 + mov r11, #0xc000 + ldr r12, [sp, #104] ;load filter + ldr r6, [sp, #116] ;load y0_q4 + add r12, r12, r6, lsl #4 ;r12 = filter[y0_q4] + mov r6, r3 + ldr r5, [sp, #124] ;load wd + vld2.8 {d0, d1}, [r12] ;coeff = vld1_s8(pi1_coeff) + sub r12, r2, r2, lsl #2 ;src_ctrd & pi1_coeff + vabs.s8 d0, d0 ;vabs_s8(coeff) + add r0, r0, r12 ;r0->pu1_src r12->pi1_coeff + ldr r3, [sp, #128] ;load ht + subs r7, r3, #0 ;r3->ht + vdup.u8 d22, d0[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, + ; 0); + cmp r5, #8 + vdup.u8 d23, d0[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, + ; 1); + vdup.u8 d24, d0[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, + ; 2); + vdup.u8 d25, d0[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, + ; 3); + vdup.u8 d26, d0[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, + ; 4); + vdup.u8 d27, d0[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, + ; 5); + vdup.u8 d28, d0[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, + ; 6); + vdup.u8 d29, d0[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, + ; 7); + blt core_loop_wd_4 ;core loop wd 4 jump + str r0, [sp, #-4]! + str r1, [sp, #-4]! + bic r4, r5, #7 ;r5 ->wd + rsb r9, r4, r6, lsl #2 ;r6->dst_strd r5 ->wd + rsb r8, r4, r2, lsl #2 ;r2->src_strd + mov r3, r5, lsr #3 ;divide by 8 + mul r7, r3 ;multiply height by width + sub r7, #4 ;subtract by one for epilog + +prolog + and r10, r0, #31 + add r3, r0, r2 ;pu1_src_tmp += src_strd; + vdup.16 q4, r11 + vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); + subs r4, r4, #8 + vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, + ; coeffabs_1); + vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp1, coeffabs_0); + vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp3, coeffabs_2); + vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp4, coeffabs_3); + vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp1, coeffabs_4); + vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp2, coeffabs_5); + vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp3, coeffabs_6); + vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp4, coeffabs_7); + vdup.16 q5, r11 + vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, + ; coeffabs_1); + addle r0, r0, r8 + vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp2, coeffabs_0); + bicle r4, r5, #7 ;r5 ->wd + vmlal.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp4, coeffabs_2); + pld [r3] + vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp1, coeffabs_3); + vhadd.s16 q4, q4, q15 + vdup.16 q6, r11 + pld [r3, r2] + pld [r3, r2, lsl #1] + vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp2, coeffabs_4); + add r3, r3, r2 + vmlal.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp3, coeffabs_5); + pld [r3, r2, lsl #1] + vmlsl.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp4, coeffabs_6); + add r3, r0, r2 ;pu1_src_tmp += src_strd; + vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp1, coeffabs_7); + vld1.u8 {d20}, [r1] + vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u8 {d1}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q6, d3, d23 + vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q6, d2, d22 + vrhadd.u8 d8, d8, d20 + vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q6, d4, d24 + vhadd.s16 q5, q5, q15 + vdup.16 q7, r11 + vmlal.u8 q6, d5, d25 + vmlal.u8 q6, d6, d26 + add r14, r1, r6 + vmlal.u8 q6, d7, d27 + vmlsl.u8 q6, d16, d28 + vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); + vmlsl.u8 q6, d17, d29 + vld1.u8 {d20}, [r14] + vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + addle r1, r1, r9 + vmlsl.u8 q7, d4, d23 + subs r7, r7, #4 + vmlsl.u8 q7, d3, d22 + vmlal.u8 q7, d5, d24 + vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vmlal.u8 q7, d6, d25 + vrhadd.u8 d10, d10, d20 + vhadd.s16 q6, q6, q15 + vdup.16 q4, r11 + vmlal.u8 q7, d7, d26 + vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlal.u8 q7, d16, d27 + vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d17, d28 + vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d18, d29 + vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); + vqrshrun.s16 d12, q6, #6 + blt epilog_end ;jumps to epilog_end + + beq epilog ;jumps to epilog + +main_loop_8 + subs r4, r4, #8 + vmlsl.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, + ; coeffabs_1); + vld1.u8 {d20}, [r14] + vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp1, coeffabs_0); + addle r0, r0, r8 + bicle r4, r5, #7 ;r5 ->wd + vmlal.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp3, coeffabs_2); + vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp4, coeffabs_3); + vrhadd.u8 d12, d12, d20 + vhadd.s16 q7, q7, q15 + vdup.16 q5, r11 + vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp1, coeffabs_4); + vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp2, coeffabs_5); + vmlsl.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp3, coeffabs_6); + vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp4, coeffabs_7); + vst1.8 {d12}, [r14], r6 + vld1.u8 {d20}, [r14] + vqrshrun.s16 d14, q7, #6 + add r3, r0, r2 ;pu1_src_tmp += src_strd; + vmlsl.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, + ; coeffabs_1); + vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp2, coeffabs_0); + vrhadd.u8 d14, d14, d20 + vmlal.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp4, coeffabs_2); + vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp1, coeffabs_3); + vhadd.s16 q4, q4, q15 + vdup.16 q6, r11 + vst1.8 {d14}, [r14], r6 + vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp2, coeffabs_4); + add r14, r1, #0 + vmlal.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp3, coeffabs_5); + add r1, r1, #8 + vmlsl.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp4, coeffabs_6); + addle r1, r1, r9 + vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp1, coeffabs_7); + vld1.u8 {d20}, [r14] + vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vmlsl.u8 q6, d3, d23 + add r10, r3, r2, lsl #3 ; 10*strd - 8+2 + vmlsl.u8 q6, d2, d22 + vrhadd.u8 d8, d8, d20 + add r10, r10, r2 ; 11*strd + vmlal.u8 q6, d4, d24 + vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q6, d5, d25 + vhadd.s16 q5, q5, q15 + vdup.16 q7, r11 + vmlal.u8 q6, d6, d26 + vst1.8 {d8}, [r14], r6 ;vst1_u8(pu1_dst,sto_res); + pld [r10] ;11+ 0 + vmlal.u8 q6, d7, d27 + pld [r10, r2] ;11+ 1*strd + pld [r10, r2, lsl #1] ;11+ 2*strd + vmlsl.u8 q6, d16, d28 + add r10, r10, r2 ;12*strd + vmlsl.u8 q6, d17, d29 + vld1.u8 {d20}, [r14] + vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + + pld [r10, r2, lsl #1] ;11+ 3*strd + vmlsl.u8 q7, d4, d23 + vmlsl.u8 q7, d3, d22 + vrhadd.u8 d10, d10, d20 + subs r7, r7, #4 + vmlal.u8 q7, d5, d24 + vmlal.u8 q7, d6, d25 + vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vhadd.s16 q6, q6, q15 + vdup.16 q4, r11 + vmlal.u8 q7, d7, d26 + vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlal.u8 q7, d16, d27 + vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d17, d28 + vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d18, d29 + vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vqrshrun.s16 d12, q6, #6 + vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); + bgt main_loop_8 ;jumps to main_loop_8 + +epilog + vld1.u8 {d20}, [r14] + vmlsl.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, + ; coeffabs_1); + vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp1, coeffabs_0); + vmlal.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp3, coeffabs_2); + vrhadd.u8 d12, d12, d20 + vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp4, coeffabs_3); + vhadd.s16 q7, q7, q15 + vdup.16 q5, r11 + vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp1, coeffabs_4); + vmlal.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp2, coeffabs_5); + vmlsl.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp3, coeffabs_6); + vst1.8 {d12}, [r14], r6 + vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp4, coeffabs_7); + vld1.u8 {d20}, [r14] + vqrshrun.s16 d14, q7, #6 + vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, + ; coeffabs_1); + vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp2, coeffabs_0); + vrhadd.u8 d14, d14, d20 + vmlal.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp4, coeffabs_2); + vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp1, coeffabs_3); + vhadd.s16 q4, q4, q15 + vdup.16 q6, r11 + vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp2, coeffabs_4); + vmlal.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp3, coeffabs_5); + vmlsl.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp4, coeffabs_6); + vst1.8 {d14}, [r14], r6 + vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp1, coeffabs_7); + vld1.u8 {d20}, [r1] + vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q6, d3, d23 + vmlsl.u8 q6, d2, d22 + vrhadd.u8 d8, d8, d20 + vmlal.u8 q6, d4, d24 + vmlal.u8 q6, d5, d25 + vhadd.s16 q5, q5, q15 + vdup.16 q7, r11 + vmlal.u8 q6, d6, d26 + vmlal.u8 q6, d7, d27 + add r14, r1, r6 + vmlsl.u8 q6, d16, d28 + vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); + vmlsl.u8 q6, d17, d29 + vld1.u8 {d20}, [r14] + vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d4, d23 + vmlsl.u8 q7, d3, d22 + vrhadd.u8 d10, d10, d20 + vmlal.u8 q7, d5, d24 + vmlal.u8 q7, d6, d25 + vhadd.s16 q6, q6, q15 + vmlal.u8 q7, d7, d26 + vmlal.u8 q7, d16, d27 + vmlsl.u8 q7, d17, d28 + vmlsl.u8 q7, d18, d29 + vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); + vqrshrun.s16 d12, q6, #6 + +epilog_end + vld1.u8 {d20}, [r14] + vrhadd.u8 d12, d12, d20 + vst1.8 {d12}, [r14], r6 + vhadd.s16 q7, q7, q15 + vqrshrun.s16 d14, q7, #6 + vld1.u8 {d20}, [r14] + vrhadd.u8 d14, d14, d20 + vst1.8 {d14}, [r14], r6 + +end_loops + tst r5, #7 + ldr r1, [sp], #4 + ldr r0, [sp], #4 + vpopeq {d8 - d15} + ldmfdeq sp!, {r4 - r12, r15} ;reload the registers from sp + mov r5, #4 + add r0, r0, #8 + add r1, r1, #8 + mov r7, #16 + +core_loop_wd_4 + rsb r9, r5, r6, lsl #2 ;r6->dst_strd r5 ->wd + rsb r8, r5, r2, lsl #2 ;r2->src_strd + vmov.i8 d4, #0 + +outer_loop_wd_4 + subs r12, r5, #0 + ble end_inner_loop_wd_4 ;outer loop jump + +inner_loop_wd_4 + add r3, r0, r2 + vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp1, 1); + subs r12, r12, #4 + vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, + ; 1); + vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp2, 1); + vld1.u32 {d4[0]},[r0] ;src_tmp1 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp1, 0); + vdup.16 q0, r11 + vmlsl.u8 q0, d5, d23 ;mul_res1 = + ; vmull_u8(vreinterpret_u8_u32(src_tmp2), coeffabs_1); + vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, + ; 1); + add r0, r0, #4 + vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp3, 1); + vmlsl.u8 q0, d4, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp1), coeffabs_0); + vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, + ; 1); + vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp4, 1); + vmlal.u8 q0, d6, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp3), coeffabs_2); + vdup.16 q4, r11 + vmlsl.u8 q4, d7, d23 + vdup.u32 d4, d7[1] ;src_tmp1 = vdup_lane_u32(src_tmp4, + ; 1); + vmull.u8 q1, d7, d25 ;mul_res2 = + ; vmull_u8(vreinterpret_u8_u32(src_tmp4), coeffabs_3); + vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp1, 1); + vmlsl.u8 q4, d6, d22 + vmlal.u8 q0, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp1), coeffabs_4); + vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, + ; 1); + vmlal.u8 q4, d4, d24 + vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp2, 1); + vmlal.u8 q1, d5, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; vreinterpret_u8_u32(src_tmp2), coeffabs_5); + vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, + ; 1); + vmlal.u8 q4, d5, d25 + vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp3, 1); + vmlsl.u8 q0, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp3), coeffabs_6); + vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, + ; 1); + vmlal.u8 q4, d6, d26 + vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp4, 1); + vmlsl.u8 q1, d7, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; vreinterpret_u8_u32(src_tmp4), coeffabs_7); + vdup.u32 d4, d7[1] + vadd.i16 q0, q0, q1 ;mul_res1 = vaddq_u16(mul_res1, + ; mul_res2); + vmlal.u8 q4, d7, d27 + vld1.u32 {d4[1]},[r3], r2 + vmlsl.u8 q4, d4, d28 + vdup.u32 d5, d4[1] + vhadd.s16 q0, q0, q15 + vqrshrun.s16 d0, q0, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u32 {d5[1]},[r3] + add r3, r1, r6 + vld1.u32 {d20[0]}, [r1] + vld1.u32 {d20[1]}, [r3] + vrhadd.u8 d0, d0, d20 + vst1.32 {d0[0]},[r1] ;vst1_lane_u32((uint32_t *)pu1_dst, + ; vreinterpret_u32_u8(sto_res), 0); + vmlsl.u8 q4, d5, d29 + vst1.32 {d0[1]},[r3], r6 ;vst1_lane_u32((uint32_t + ; *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 1); + vhadd.s16 q4, q4, q15 + vqrshrun.s16 d8, q4, #6 + mov r4, r3 + vld1.u32 {d20[0]}, [r4], r6 + vld1.u32 {d20[1]}, [r4] + vrhadd.u8 d8, d8, d20 + vst1.32 {d8[0]},[r3], r6 + add r1, r1, #4 + vst1.32 {d8[1]},[r3] + bgt inner_loop_wd_4 + +end_inner_loop_wd_4 + subs r7, r7, #4 + add r1, r1, r9 + add r0, r0, r8 + bgt outer_loop_wd_4 + + vpop {d8 - d15} + ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp + + ENDP + + END diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_vert_filter_type2_neon.asm b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_vert_filter_type2_neon.asm new file mode 100644 index 000000000000..c5695fbda845 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_avg_vert_filter_type2_neon.asm @@ -0,0 +1,487 @@ +; +; Copyright (c) 2018 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; +;**************Variables Vs Registers*********************************** +; r0 => src +; r1 => dst +; r2 => src_stride +; r6 => dst_stride +; r12 => filter_y0 +; r5 => ht +; r3 => wd + + EXPORT |vpx_convolve8_avg_vert_filter_type2_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +|vpx_convolve8_avg_vert_filter_type2_neon| PROC + + stmfd sp!, {r4 - r12, r14} ;stack stores the values of + ; the arguments + vpush {d8 - d15} ; stack offset by 64 + mov r4, r1 + mov r1, r2 + mov r2, r4 + vmov.i16 q15, #0x4000 + mov r11, #0xc000 + ldr r12, [sp, #104] ;load filter + ldr r6, [sp, #116] ;load y0_q4 + add r12, r12, r6, lsl #4 ;r12 = filter[y0_q4] + mov r6, r3 + ldr r5, [sp, #124] ;load wd + vld2.8 {d0, d1}, [r12] ;coeff = vld1_s8(pi1_coeff) + sub r12, r2, r2, lsl #2 ;src_ctrd & pi1_coeff + vabs.s8 d0, d0 ;vabs_s8(coeff) + add r0, r0, r12 ;r0->pu1_src r12->pi1_coeff + ldr r3, [sp, #128] ;load ht + subs r7, r3, #0 ;r3->ht + vdup.u8 d22, d0[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, + ; 0); + cmp r5, #8 + vdup.u8 d23, d0[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, + ; 1); + vdup.u8 d24, d0[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, + ; 2); + vdup.u8 d25, d0[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, + ; 3); + vdup.u8 d26, d0[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, + ; 4); + vdup.u8 d27, d0[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, + ; 5); + vdup.u8 d28, d0[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, + ; 6); + vdup.u8 d29, d0[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, + ; 7); + blt core_loop_wd_4 ;core loop wd 4 jump + + str r0, [sp, #-4]! + str r1, [sp, #-4]! + bic r4, r5, #7 ;r5 ->wd + rsb r9, r4, r6, lsl #2 ;r6->dst_strd r5 ->wd + rsb r8, r4, r2, lsl #2 ;r2->src_strd + mov r3, r5, lsr #3 ;divide by 8 + mul r7, r3 ;multiply height by width + sub r7, #4 ;subtract by one for epilog + +prolog + and r10, r0, #31 + add r3, r0, r2 ;pu1_src_tmp += src_strd; + vdup.16 q4, r11 + vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); + subs r4, r4, #8 + vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, + ; coeffabs_1); + vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp1, coeffabs_0); + vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp3, coeffabs_2); + vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp4, coeffabs_3); + vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp1, coeffabs_4); + vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp2, coeffabs_5); + vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp3, coeffabs_6); + vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp4, coeffabs_7); + vdup.16 q5, r11 + vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, + ; coeffabs_1); + addle r0, r0, r8 + vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp2, coeffabs_0); + bicle r4, r5, #7 ;r5 ->wd + vmlsl.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp4, coeffabs_2); + pld [r3] + vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp1, coeffabs_3); + vhadd.s16 q4, q4, q15 + vdup.16 q6, r11 + pld [r3, r2] + pld [r3, r2, lsl #1] + vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp2, coeffabs_4); + add r3, r3, r2 + vmlsl.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp3, coeffabs_5); + pld [r3, r2, lsl #1] + vmlal.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp4, coeffabs_6); + add r3, r0, r2 ;pu1_src_tmp += src_strd; + vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp1, coeffabs_7); + vld1.u8 {d20}, [r1] + vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u8 {d1}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q6, d3, d23 + vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q6, d2, d22 + vrhadd.u8 d8, d8, d20 + vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q6, d4, d24 + vhadd.s16 q5, q5, q15 + vdup.16 q7, r11 + vmlal.u8 q6, d5, d25 + vmlal.u8 q6, d6, d26 + add r14, r1, r6 + vmlsl.u8 q6, d7, d27 + vmlal.u8 q6, d16, d28 + vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); + vmlsl.u8 q6, d17, d29 + vld1.u8 {d20}, [r14] + vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + addle r1, r1, r9 + vmlal.u8 q7, d4, d23 + subs r7, r7, #4 + vmlsl.u8 q7, d3, d22 + vmlsl.u8 q7, d5, d24 + vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vmlal.u8 q7, d6, d25 + vrhadd.u8 d10, d10, d20 + vhadd.s16 q6, q6, q15 + vdup.16 q4, r11 + vmlal.u8 q7, d7, d26 + vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d16, d27 + vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q7, d17, d28 + vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d18, d29 + vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); + vqrshrun.s16 d12, q6, #6 + blt epilog_end ;jumps to epilog_end + + beq epilog ;jumps to epilog + +main_loop_8 + subs r4, r4, #8 + vmlal.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, + ; coeffabs_1); + vld1.u8 {d20}, [r14] + vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp1, coeffabs_0); + addle r0, r0, r8 + bicle r4, r5, #7 ;r5 ->wd + vmlsl.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp3, coeffabs_2); + vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp4, coeffabs_3); + vrhadd.u8 d12, d12, d20 + vhadd.s16 q7, q7, q15 + vdup.16 q5, r11 + vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp1, coeffabs_4); + vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp2, coeffabs_5); + vmlal.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp3, coeffabs_6); + vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp4, coeffabs_7); + vst1.8 {d12}, [r14], r6 + vld1.u8 {d20}, [r14] + vqrshrun.s16 d14, q7, #6 + add r3, r0, r2 ;pu1_src_tmp += src_strd; + vmlal.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, + ; coeffabs_1); + vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp2, coeffabs_0); + vrhadd.u8 d14, d14, d20 + vmlsl.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp4, coeffabs_2); + vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp1, coeffabs_3); + vhadd.s16 q4, q4, q15 + vdup.16 q6, r11 + vst1.8 {d14}, [r14], r6 + vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp2, coeffabs_4); + add r14, r1, #0 + vmlsl.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp3, coeffabs_5); + add r1, r1, #8 + vmlal.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp4, coeffabs_6); + addle r1, r1, r9 + vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp1, coeffabs_7); + vld1.u8 {d20}, [r14] + vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vmlal.u8 q6, d3, d23 + add r10, r3, r2, lsl #3 ; 10*strd - 8+2 + vmlsl.u8 q6, d2, d22 + vrhadd.u8 d8, d8, d20 + add r10, r10, r2 ; 11*strd + vmlsl.u8 q6, d4, d24 + vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q6, d5, d25 + vhadd.s16 q5, q5, q15 + vdup.16 q7, r11 + vmlal.u8 q6, d6, d26 + vst1.8 {d8}, [r14], r6 ;vst1_u8(pu1_dst,sto_res); + pld [r10] ;11+ 0 + vmlsl.u8 q6, d7, d27 + pld [r10, r2] ;11+ 1*strd + pld [r10, r2, lsl #1] ;11+ 2*strd + vmlal.u8 q6, d16, d28 + add r10, r10, r2 ;12*strd + vmlsl.u8 q6, d17, d29 + vld1.u8 {d20}, [r14] + vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + pld [r10, r2, lsl #1] ;11+ 3*strd + vmlal.u8 q7, d4, d23 + vmlsl.u8 q7, d3, d22 + vrhadd.u8 d10, d10, d20 + subs r7, r7, #4 + vmlsl.u8 q7, d5, d24 + vmlal.u8 q7, d6, d25 + vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vhadd.s16 q6, q6, q15 + vdup.16 q4, r11 + vmlal.u8 q7, d7, d26 + vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d16, d27 + vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q7, d17, d28 + vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d18, d29 + vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vqrshrun.s16 d12, q6, #6 + vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); + bgt main_loop_8 ;jumps to main_loop_8 + +epilog + vld1.u8 {d20}, [r14] + vmlal.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, + ; coeffabs_1); + vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp1, coeffabs_0); + vmlsl.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp3, coeffabs_2); + vrhadd.u8 d12, d12, d20 + vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp4, coeffabs_3); + vhadd.s16 q7, q7, q15 + vdup.16 q5, r11 + vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp1, coeffabs_4); + vmlsl.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp2, coeffabs_5); + vmlal.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp3, coeffabs_6); + vst1.8 {d12}, [r14], r6 + vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp4, coeffabs_7); + vld1.u8 {d20}, [r14] + vqrshrun.s16 d14, q7, #6 + vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlal.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, + ; coeffabs_1); + vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp2, coeffabs_0); + vrhadd.u8 d14, d14, d20 + vmlsl.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp4, coeffabs_2); + vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp1, coeffabs_3); + vhadd.s16 q4, q4, q15 + vdup.16 q6, r11 + vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp2, coeffabs_4); + vmlsl.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp3, coeffabs_5); + vmlal.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp4, coeffabs_6); + vst1.8 {d14}, [r14], r6 + vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp1, coeffabs_7); + vld1.u8 {d20}, [r1] + vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q6, d3, d23 + vmlsl.u8 q6, d2, d22 + vrhadd.u8 d8, d8, d20 + vmlsl.u8 q6, d4, d24 + vmlal.u8 q6, d5, d25 + vhadd.s16 q5, q5, q15 + vdup.16 q7, r11 + vmlal.u8 q6, d6, d26 + vmlsl.u8 q6, d7, d27 + add r14, r1, r6 + vmlal.u8 q6, d16, d28 + vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); + vmlsl.u8 q6, d17, d29 + vld1.u8 {d20}, [r14] + vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q7, d4, d23 + vmlsl.u8 q7, d3, d22 + vrhadd.u8 d10, d10, d20 + vmlsl.u8 q7, d5, d24 + vmlal.u8 q7, d6, d25 + vhadd.s16 q6, q6, q15 + vmlal.u8 q7, d7, d26 + vmlsl.u8 q7, d16, d27 + vmlal.u8 q7, d17, d28 + vmlsl.u8 q7, d18, d29 + vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); + vqrshrun.s16 d12, q6, #6 + +epilog_end + vld1.u8 {d20}, [r14] + vrhadd.u8 d12, d12, d20 + vst1.8 {d12}, [r14], r6 + vhadd.s16 q7, q7, q15 + vqrshrun.s16 d14, q7, #6 + vld1.u8 {d20}, [r14] + vrhadd.u8 d14, d14, d20 + vst1.8 {d14}, [r14], r6 + +end_loops + tst r5, #7 + ldr r1, [sp], #4 + ldr r0, [sp], #4 + vpopeq {d8 - d15} + ldmfdeq sp!, {r4 - r12, r15} ;reload the registers from sp + + mov r5, #4 + add r0, r0, #8 + add r1, r1, #8 + mov r7, #16 + +core_loop_wd_4 + rsb r9, r5, r6, lsl #2 ;r6->dst_strd r5 ->wd + rsb r8, r5, r2, lsl #2 ;r2->src_strd + vmov.i8 d4, #0 + +outer_loop_wd_4 + subs r12, r5, #0 + ble end_inner_loop_wd_4 ;outer loop jump + +inner_loop_wd_4 + add r3, r0, r2 + vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp1, 1); + subs r12, r12, #4 + vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, + ; 1); + vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp2, 1); + vld1.u32 {d4[0]},[r0] ;src_tmp1 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp1, 0); + vdup.16 q0, r11 + vmlal.u8 q0, d5, d23 ;mul_res1 = + ; vmull_u8(vreinterpret_u8_u32(src_tmp2), coeffabs_1); + vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, + ; 1); + add r0, r0, #4 + vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp3, 1); + vmlsl.u8 q0, d4, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp1), coeffabs_0); + vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, + ; 1); + vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp4, 1); + vmlsl.u8 q0, d6, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp3), coeffabs_2); + vdup.16 q4, r11 + vmlal.u8 q4, d7, d23 + vdup.u32 d4, d7[1] ;src_tmp1 = vdup_lane_u32(src_tmp4, + ; 1); + vmull.u8 q1, d7, d25 ;mul_res2 = + ; vmull_u8(vreinterpret_u8_u32(src_tmp4), coeffabs_3); + vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp1, 1); + vmlsl.u8 q4, d6, d22 + vmlal.u8 q0, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp1), coeffabs_4); + vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, + ; 1); + vmlsl.u8 q4, d4, d24 + vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp2, 1); + vmlsl.u8 q1, d5, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; vreinterpret_u8_u32(src_tmp2), coeffabs_5); + vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, + ; 1); + vmlal.u8 q4, d5, d25 + vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp3, 1); + vmlal.u8 q0, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp3), coeffabs_6); + vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, + ; 1); + vmlal.u8 q4, d6, d26 + vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp4, 1); + vmlsl.u8 q1, d7, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; vreinterpret_u8_u32(src_tmp4), coeffabs_7); + vdup.u32 d4, d7[1] + vadd.i16 q0, q0, q1 ;mul_res1 = vaddq_u16(mul_res1, + ; mul_res2); + vmlsl.u8 q4, d7, d27 + vld1.u32 {d4[1]},[r3], r2 + vmlal.u8 q4, d4, d28 + vdup.u32 d5, d4[1] + vhadd.s16 q0, q0, q15 + vqrshrun.s16 d0, q0, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u32 {d5[1]},[r3] + add r3, r1, r6 + vld1.u32 {d20[0]}, [r1] + vld1.u32 {d20[1]}, [r3] + vrhadd.u8 d0, d0, d20 + vst1.32 {d0[0]},[r1] ;vst1_lane_u32((uint32_t *)pu1_dst, + ; vreinterpret_u32_u8(sto_res), 0); + vmlsl.u8 q4, d5, d29 + vst1.32 {d0[1]},[r3], r6 ;vst1_lane_u32((uint32_t + ; *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 1); + vhadd.s16 q4, q4, q15 + vqrshrun.s16 d8, q4, #6 + mov r4, r3 + vld1.u32 {d20[0]}, [r4], r6 + vld1.u32 {d20[1]}, [r4] + vrhadd.u8 d8, d8, d20 + vst1.32 {d8[0]},[r3], r6 + add r1, r1, #4 + vst1.32 {d8[1]},[r3] + bgt inner_loop_wd_4 + +end_inner_loop_wd_4 + subs r7, r7, #4 + add r1, r1, r9 + add r0, r0, r8 + bgt outer_loop_wd_4 + + vpop {d8 - d15} + ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp + + ENDP + + END diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_horiz_filter_type1_neon.asm b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_horiz_filter_type1_neon.asm new file mode 100644 index 000000000000..fa1b73246608 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_horiz_filter_type1_neon.asm @@ -0,0 +1,415 @@ +; +; Copyright (c) 2018 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; +;**************Variables Vs Registers*********************************** +; r0 => src +; r1 => dst +; r2 => src_stride +; r3 => dst_stride +; r4 => filter_x0 +; r8 => ht +; r10 => wd + + EXPORT |vpx_convolve8_horiz_filter_type1_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +|vpx_convolve8_horiz_filter_type1_neon| PROC + + stmfd sp!, {r4 - r12, r14} ;stack stores the values of + ; the arguments + vpush {d8 - d15} ; stack offset by 64 + mov r4, r1 + mov r1, r2 + mov r2, r4 +start_loop_count + ldr r4, [sp, #104] ;loads pi1_coeff + ldr r8, [sp, #108] ;loads x0_q4 + add r4, r4, r8, lsl #4 ;r4 = filter[x0_q4] + ldr r8, [sp, #128] ;loads ht + ldr r10, [sp, #124] ;loads wd + vld2.8 {d0, d1}, [r4] ;coeff = vld1_s8(pi1_coeff) + mov r11, #1 + subs r14, r8, #0 ;checks for ht == 0 + vabs.s8 d2, d0 ;vabs_s8(coeff) + vdup.8 d24, d2[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, + ; 0) + sub r12, r0, #3 ;pu1_src - 3 + vdup.8 d25, d2[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, + ; 1) + add r4, r12, r2 ;pu1_src_tmp2_8 = pu1_src + src_strd + vdup.8 d26, d2[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, + ; 2) + rsb r9, r10, r2, lsl #1 ;2*src_strd - wd + vdup.8 d27, d2[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, + ; 3) + rsb r8, r10, r3, lsl #1 ;2*dst_strd - wd + vdup.8 d28, d2[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, + ; 4) + vdup.8 d29, d2[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, + ; 5) + vdup.8 d30, d2[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, + ; 6) + vdup.8 d31, d2[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, + ; 7) + mov r7, r1 + cmp r10, #4 + ble outer_loop_4 + + cmp r10, #24 + moveq r10, #16 + addeq r8, #8 + addeq r9, #8 + cmp r10, #16 + bge outer_loop_16 + + cmp r10, #12 + addeq r8, #4 + addeq r9, #4 + b outer_loop_8 + +outer_loop8_residual + sub r12, r0, #3 ;pu1_src - 3 + mov r1, r7 + mov r14, #32 + add r1, #16 + add r12, #16 + mov r10, #8 + add r8, #8 + add r9, #8 + +outer_loop_8 + + add r6, r1, r3 ;pu1_dst + dst_strd + add r4, r12, r2 ;pu1_src + src_strd + subs r5, r10, #0 ;checks wd + ble end_inner_loop_8 + +inner_loop_8 + mov r7, #0xc000 + vld1.u32 {d0}, [r12], r11 ;vector load pu1_src + vdup.16 q4, r7 + vld1.u32 {d1}, [r12], r11 + vdup.16 q5, r7 + vld1.u32 {d2}, [r12], r11 + vld1.u32 {d3}, [r12], r11 + mov r7, #0x4000 + vld1.u32 {d4}, [r12], r11 + vmlsl.u8 q4, d1, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u32 {d5}, [r12], r11 + vmlal.u8 q4, d3, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + vld1.u32 {d6}, [r12], r11 + vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vld1.u32 {d7}, [r12], r11 + vmlal.u8 q4, d2, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd + vmlal.u8 q4, d4, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vld1.u32 {d13}, [r4], r11 + vmlal.u8 q4, d5, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vld1.u32 {d14}, [r4], r11 + vmlsl.u8 q4, d6, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vld1.u32 {d15}, [r4], r11 + vmlsl.u8 q4, d7, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + vld1.u32 {d16}, [r4], r11 ;vector load pu1_src + src_strd + vdup.16 q11, r7 + vmlal.u8 q5, d15, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + vld1.u32 {d17}, [r4], r11 + vmlal.u8 q5, d14, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vhadd.s16 q4, q4, q11 + vld1.u32 {d18}, [r4], r11 + vmlal.u8 q5, d16, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vld1.u32 {d19}, [r4], r11 ;vector load pu1_src + src_strd + vmlal.u8 q5, d17, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vmlsl.u8 q5, d18, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vmlsl.u8 q5, d19, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + vqrshrun.s16 d20, q4, #6 ;right shift and saturating narrow + ; result 1 + vmlsl.u8 q5, d12, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vmlsl.u8 q5, d13, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vst1.8 {d20}, [r1]! ;store the result pu1_dst + vhadd.s16 q5, q5, q11 + subs r5, r5, #8 ;decrement the wd loop + vqrshrun.s16 d8, q5, #6 ;right shift and saturating narrow + ; result 2 + vst1.8 {d8}, [r6]! ;store the result pu1_dst + cmp r5, #4 + bgt inner_loop_8 + +end_inner_loop_8 + subs r14, r14, #2 ;decrement the ht loop + add r12, r12, r9 ;increment the src pointer by + ; 2*src_strd-wd + add r1, r1, r8 ;increment the dst pointer by + ; 2*dst_strd-wd + bgt outer_loop_8 + + ldr r10, [sp, #120] ;loads wd + cmp r10, #12 + beq outer_loop4_residual + +end_loops + b end_func + +outer_loop_16 + str r0, [sp, #-4]! + str r7, [sp, #-4]! + add r6, r1, r3 ;pu1_dst + dst_strd + add r4, r12, r2 ;pu1_src + src_strd + and r0, r12, #31 + mov r7, #0xc000 + sub r5, r10, #0 ;checks wd + pld [r4, r2, lsl #1] + pld [r12, r2, lsl #1] + vld1.u32 {q0}, [r12], r11 ;vector load pu1_src + vdup.16 q4, r7 + vld1.u32 {q1}, [r12], r11 + vld1.u32 {q2}, [r12], r11 + vld1.u32 {q3}, [r12], r11 + vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vld1.u32 {q6}, [r12], r11 + vmlsl.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u32 {q7}, [r12], r11 + vmlal.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vld1.u32 {q8}, [r12], r11 + vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + vld1.u32 {q9}, [r12], r11 + vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vmlal.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vdup.16 q10, r7 + vmlsl.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + +inner_loop_16 + vmlsl.u8 q10, d1, d24 + vdup.16 q5, r7 + vmlsl.u8 q10, d3, d25 + mov r7, #0x4000 + vdup.16 q11, r7 + vmlal.u8 q10, d5, d26 + vld1.u32 {q0}, [r4], r11 ;vector load pu1_src + vhadd.s16 q4, q4, q11 + vld1.u32 {q1}, [r4], r11 + vmlal.u8 q10, d7, d27 + add r12, #8 + subs r5, r5, #16 + vmlal.u8 q10, d13, d28 + vld1.u32 {q2}, [r4], r11 + vmlal.u8 q10, d15, d29 + vld1.u32 {q3}, [r4], r11 + vqrshrun.s16 d8, q4, #6 ;right shift and saturating narrow + ; result 1 + vmlsl.u8 q10, d17, d30 + vld1.u32 {q6}, [r4], r11 + vmlsl.u8 q10, d19, d31 + vld1.u32 {q7}, [r4], r11 + vmlsl.u8 q5, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vmlsl.u8 q5, d2, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u32 {q8}, [r4], r11 + vhadd.s16 q10, q10, q11 + vld1.u32 {q9}, [r4], r11 + vmlal.u8 q5, d4, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vmlal.u8 q5, d6, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + add r4, #8 + mov r7, #0xc000 + vmlal.u8 q5, d12, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vmlal.u8 q5, d14, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vqrshrun.s16 d9, q10, #6 + vdup.16 q11, r7 + vmlsl.u8 q5, d16, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vmlsl.u8 q5, d18, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + mov r7, #0x4000 + vmlsl.u8 q11, d1, d24 + vst1.8 {q4}, [r1]! ;store the result pu1_dst + vmlsl.u8 q11, d3, d25 + vdup.16 q10, r7 + vmlal.u8 q11, d5, d26 + pld [r12, r2, lsl #2] + pld [r4, r2, lsl #2] + addeq r12, r12, r9 ;increment the src pointer by + ; 2*src_strd-wd + addeq r4, r12, r2 ;pu1_src + src_strd + vmlal.u8 q11, d7, d27 + addeq r1, r1, r8 + subeq r14, r14, #2 + vmlal.u8 q11, d13, d28 + vhadd.s16 q5, q5, q10 + vmlal.u8 q11, d15, d29 + vmlsl.u8 q11, d17, d30 + cmp r14, #0 + vmlsl.u8 q11, d19, d31 + vqrshrun.s16 d10, q5, #6 ;right shift and saturating narrow + ; result 2 + beq epilog_16 + + vld1.u32 {q0}, [r12], r11 ;vector load pu1_src + mov r7, #0xc000 + cmp r5, #0 + vld1.u32 {q1}, [r12], r11 + vhadd.s16 q11, q11, q10 + vld1.u32 {q2}, [r12], r11 + vdup.16 q4, r7 + vld1.u32 {q3}, [r12], r11 + vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vld1.u32 {q6}, [r12], r11 + vld1.u32 {q7}, [r12], r11 + vmlsl.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u32 {q8}, [r12], r11 + vmlal.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vld1.u32 {q9}, [r12], r11 + vqrshrun.s16 d11, q11, #6 + vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + moveq r5, r10 + vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vdup.16 q10, r7 + vmlal.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vst1.8 {q5}, [r6]! ;store the result pu1_dst + vmlsl.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + addeq r6, r1, r3 ;pu1_dst + dst_strd + b inner_loop_16 + +epilog_16 + mov r7, #0x4000 + ldr r0, [sp], #4 + ldr r10, [sp, #120] + vdup.16 q10, r7 + vhadd.s16 q11, q11, q10 + vqrshrun.s16 d11, q11, #6 + vst1.8 {q5}, [r6]! ;store the result pu1_dst + ldr r7, [sp], #4 + cmp r10, #24 + beq outer_loop8_residual + +end_loops1 + b end_func + +outer_loop4_residual + sub r12, r0, #3 ;pu1_src - 3 + mov r1, r7 + add r1, #8 + mov r10, #4 + add r12, #8 + mov r14, #16 + add r8, #4 + add r9, #4 + +outer_loop_4 + add r6, r1, r3 ;pu1_dst + dst_strd + add r4, r12, r2 ;pu1_src + src_strd + subs r5, r10, #0 ;checks wd + ble end_inner_loop_4 + +inner_loop_4 + vld1.u32 {d0}, [r12], r11 ;vector load pu1_src + vld1.u32 {d1}, [r12], r11 + vld1.u32 {d2}, [r12], r11 + vld1.u32 {d3}, [r12], r11 + vld1.u32 {d4}, [r12], r11 + vld1.u32 {d5}, [r12], r11 + vld1.u32 {d6}, [r12], r11 + vld1.u32 {d7}, [r12], r11 + sub r12, r12, #4 + vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd + vld1.u32 {d13}, [r4], r11 + vzip.32 d0, d12 ;vector zip the i iteration and ii + ; interation in single register + vld1.u32 {d14}, [r4], r11 + vzip.32 d1, d13 + vld1.u32 {d15}, [r4], r11 + vzip.32 d2, d14 + vld1.u32 {d16}, [r4], r11 + vzip.32 d3, d15 + vld1.u32 {d17}, [r4], r11 + vzip.32 d4, d16 + vld1.u32 {d18}, [r4], r11 + vzip.32 d5, d17 + vld1.u32 {d19}, [r4], r11 + mov r7, #0xc000 + vdup.16 q4, r7 + sub r4, r4, #4 + vzip.32 d6, d18 + vzip.32 d7, d19 + vmlsl.u8 q4, d1, d25 ;arithmetic operations for ii + ; iteration in the same time + vmlsl.u8 q4, d0, d24 + vmlal.u8 q4, d2, d26 + vmlal.u8 q4, d3, d27 + vmlal.u8 q4, d4, d28 + vmlal.u8 q4, d5, d29 + vmlsl.u8 q4, d6, d30 + vmlsl.u8 q4, d7, d31 + mov r7, #0x4000 + vdup.16 q10, r7 + vhadd.s16 q4, q4, q10 + vqrshrun.s16 d8, q4, #6 + vst1.32 {d8[0]},[r1]! ;store the i iteration result which + ; is in upper part of the register + vst1.32 {d8[1]},[r6]! ;store the ii iteration result which + ; is in lower part of the register + subs r5, r5, #4 ;decrement the wd by 4 + bgt inner_loop_4 + +end_inner_loop_4 + subs r14, r14, #2 ;decrement the ht by 4 + add r12, r12, r9 ;increment the input pointer + ; 2*src_strd-wd + add r1, r1, r8 ;increment the output pointer + ; 2*dst_strd-wd + bgt outer_loop_4 + +end_func + vpop {d8 - d15} + ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp + + ENDP + + END diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_horiz_filter_type2_neon.asm b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_horiz_filter_type2_neon.asm new file mode 100644 index 000000000000..90b2c8fef7ac --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_horiz_filter_type2_neon.asm @@ -0,0 +1,415 @@ +; +; Copyright (c) 2018 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; +;**************Variables Vs Registers*********************************** +; r0 => src +; r1 => dst +; r2 => src_stride +; r3 => dst_stride +; r4 => filter_x0 +; r8 => ht +; r10 => wd + + EXPORT |vpx_convolve8_horiz_filter_type2_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +|vpx_convolve8_horiz_filter_type2_neon| PROC + + stmfd sp!, {r4 - r12, r14} ;stack stores the values of + ; the arguments + vpush {d8 - d15} ; stack offset by 64 + mov r4, r1 + mov r1, r2 + mov r2, r4 + +start_loop_count + ldr r4, [sp, #104] ;loads pi1_coeff + ldr r8, [sp, #108] ;loads x0_q4 + add r4, r4, r8, lsl #4 ;r4 = filter[x0_q4] + ldr r8, [sp, #128] ;loads ht + ldr r10, [sp, #124] ;loads wd + vld2.8 {d0, d1}, [r4] ;coeff = vld1_s8(pi1_coeff) + mov r11, #1 + subs r14, r8, #0 ;checks for ht == 0 + vabs.s8 d2, d0 ;vabs_s8(coeff) + vdup.8 d24, d2[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, + ; 0) + sub r12, r0, #3 ;pu1_src - 3 + vdup.8 d25, d2[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, + ; 1) + add r4, r12, r2 ;pu1_src_tmp2_8 = pu1_src + src_strd + vdup.8 d26, d2[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, + ; 2) + rsb r9, r10, r2, lsl #1 ;2*src_strd - wd + vdup.8 d27, d2[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, + ; 3) + rsb r8, r10, r3, lsl #1 ;2*dst_strd - wd + vdup.8 d28, d2[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, + ; 4) + vdup.8 d29, d2[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, + ; 5) + vdup.8 d30, d2[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, + ; 6) + vdup.8 d31, d2[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, + ; 7) + mov r7, r1 + cmp r10, #4 + ble outer_loop_4 + + cmp r10, #24 + moveq r10, #16 + addeq r8, #8 + addeq r9, #8 + cmp r10, #16 + bge outer_loop_16 + + cmp r10, #12 + addeq r8, #4 + addeq r9, #4 + b outer_loop_8 + +outer_loop8_residual + sub r12, r0, #3 ;pu1_src - 3 + mov r1, r7 + mov r14, #32 + add r1, #16 + add r12, #16 + mov r10, #8 + add r8, #8 + add r9, #8 + +outer_loop_8 + add r6, r1, r3 ;pu1_dst + dst_strd + add r4, r12, r2 ;pu1_src + src_strd + subs r5, r10, #0 ;checks wd + ble end_inner_loop_8 + +inner_loop_8 + mov r7, #0xc000 + vld1.u32 {d0}, [r12], r11 ;vector load pu1_src + vdup.16 q4, r7 + vld1.u32 {d1}, [r12], r11 + vdup.16 q5, r7 + vld1.u32 {d2}, [r12], r11 + vld1.u32 {d3}, [r12], r11 + mov r7, #0x4000 + vld1.u32 {d4}, [r12], r11 + vmlal.u8 q4, d1, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u32 {d5}, [r12], r11 + vmlal.u8 q4, d3, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + vld1.u32 {d6}, [r12], r11 + vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vld1.u32 {d7}, [r12], r11 + vmlsl.u8 q4, d2, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd + vmlal.u8 q4, d4, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vld1.u32 {d13}, [r4], r11 + vmlsl.u8 q4, d5, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vld1.u32 {d14}, [r4], r11 + vmlal.u8 q4, d6, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vld1.u32 {d15}, [r4], r11 + vmlsl.u8 q4, d7, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + vld1.u32 {d16}, [r4], r11 ;vector load pu1_src + src_strd + vdup.16 q11, r7 + vmlal.u8 q5, d15, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + vld1.u32 {d17}, [r4], r11 + vmlsl.u8 q5, d14, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vhadd.s16 q4, q4, q11 + vld1.u32 {d18}, [r4], r11 + vmlal.u8 q5, d16, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vld1.u32 {d19}, [r4], r11 ;vector load pu1_src + src_strd + vmlsl.u8 q5, d17, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vmlal.u8 q5, d18, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vmlsl.u8 q5, d19, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + vqrshrun.s16 d20, q4, #6 ;right shift and saturating narrow + ; result 1 + vmlsl.u8 q5, d12, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vmlal.u8 q5, d13, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vst1.8 {d20}, [r1]! ;store the result pu1_dst + vhadd.s16 q5, q5, q11 + subs r5, r5, #8 ;decrement the wd loop + vqrshrun.s16 d8, q5, #6 ;right shift and saturating narrow + ; result 2 + vst1.8 {d8}, [r6]! ;store the result pu1_dst + cmp r5, #4 + bgt inner_loop_8 + +end_inner_loop_8 + subs r14, r14, #2 ;decrement the ht loop + add r12, r12, r9 ;increment the src pointer by + ; 2*src_strd-wd + add r1, r1, r8 ;increment the dst pointer by + ; 2*dst_strd-wd + bgt outer_loop_8 + + ldr r10, [sp, #120] ;loads wd + cmp r10, #12 + beq outer_loop4_residual + +end_loops + b end_func + +outer_loop_16 + str r0, [sp, #-4]! + str r7, [sp, #-4]! + add r6, r1, r3 ;pu1_dst + dst_strd + add r4, r12, r2 ;pu1_src + src_strd + and r0, r12, #31 + mov r7, #0xc000 + sub r5, r10, #0 ;checks wd + pld [r4, r2, lsl #1] + pld [r12, r2, lsl #1] + vld1.u32 {q0}, [r12], r11 ;vector load pu1_src + vdup.16 q4, r7 + vld1.u32 {q1}, [r12], r11 + vld1.u32 {q2}, [r12], r11 + vld1.u32 {q3}, [r12], r11 + vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vld1.u32 {q6}, [r12], r11 + vmlal.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u32 {q7}, [r12], r11 + vmlsl.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vld1.u32 {q8}, [r12], r11 + vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + vld1.u32 {q9}, [r12], r11 + vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vmlsl.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vdup.16 q10, r7 + vmlal.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + +inner_loop_16 + vmlsl.u8 q10, d1, d24 + vdup.16 q5, r7 + vmlal.u8 q10, d3, d25 + mov r7, #0x4000 + vdup.16 q11, r7 + vmlsl.u8 q10, d5, d26 + vld1.u32 {q0}, [r4], r11 ;vector load pu1_src + vhadd.s16 q4, q4, q11 + vld1.u32 {q1}, [r4], r11 + vmlal.u8 q10, d7, d27 + add r12, #8 + subs r5, r5, #16 + vmlal.u8 q10, d13, d28 + vld1.u32 {q2}, [r4], r11 + vmlsl.u8 q10, d15, d29 + vld1.u32 {q3}, [r4], r11 + vqrshrun.s16 d8, q4, #6 ;right shift and saturating narrow + ; result 1 + vmlal.u8 q10, d17, d30 + vld1.u32 {q6}, [r4], r11 + vmlsl.u8 q10, d19, d31 + vld1.u32 {q7}, [r4], r11 + vmlsl.u8 q5, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vmlal.u8 q5, d2, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u32 {q8}, [r4], r11 + vhadd.s16 q10, q10, q11 + vld1.u32 {q9}, [r4], r11 + vmlsl.u8 q5, d4, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vmlal.u8 q5, d6, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + add r4, #8 + mov r7, #0xc000 + vmlal.u8 q5, d12, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vmlsl.u8 q5, d14, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vqrshrun.s16 d9, q10, #6 + vdup.16 q11, r7 + vmlal.u8 q5, d16, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vmlsl.u8 q5, d18, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + mov r7, #0x4000 + vmlsl.u8 q11, d1, d24 + vst1.8 {q4}, [r1]! ;store the result pu1_dst + vmlal.u8 q11, d3, d25 + vdup.16 q10, r7 + vmlsl.u8 q11, d5, d26 + pld [r12, r2, lsl #2] + pld [r4, r2, lsl #2] + addeq r12, r12, r9 ;increment the src pointer by + ; 2*src_strd-wd + addeq r4, r12, r2 ;pu1_src + src_strd + vmlal.u8 q11, d7, d27 + addeq r1, r1, r8 + subeq r14, r14, #2 + vmlal.u8 q11, d13, d28 + vhadd.s16 q5, q5, q10 + vmlsl.u8 q11, d15, d29 + vmlal.u8 q11, d17, d30 + cmp r14, #0 + vmlsl.u8 q11, d19, d31 + vqrshrun.s16 d10, q5, #6 ;right shift and saturating narrow + ; result 2 + beq epilog_16 + + vld1.u32 {q0}, [r12], r11 ;vector load pu1_src + mov r7, #0xc000 + cmp r5, #0 + vld1.u32 {q1}, [r12], r11 + vhadd.s16 q11, q11, q10 + vld1.u32 {q2}, [r12], r11 + vdup.16 q4, r7 + vld1.u32 {q3}, [r12], r11 + vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], + ; coeffabs_0); + vld1.u32 {q6}, [r12], r11 + vld1.u32 {q7}, [r12], r11 + vmlal.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], + ; coeffabs_1); + vld1.u32 {q8}, [r12], r11 + vmlsl.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], + ; coeffabs_2); + vld1.u32 {q9}, [r12], r11 + vqrshrun.s16 d11, q11, #6 + vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], + ; coeffabs_3); + moveq r5, r10 + vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], + ; coeffabs_4); + vdup.16 q10, r7 + vmlsl.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], + ; coeffabs_5); + vst1.8 {q5}, [r6]! ;store the result pu1_dst + vmlal.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], + ; coeffabs_6); + vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], + ; coeffabs_7); + addeq r6, r1, r3 ;pu1_dst + dst_strd + b inner_loop_16 + +epilog_16 + mov r7, #0x4000 + ldr r0, [sp], #4 + ldr r10, [sp, #120] + vdup.16 q10, r7 + vhadd.s16 q11, q11, q10 + vqrshrun.s16 d11, q11, #6 + vst1.8 {q5}, [r6]! ;store the result pu1_dst + ldr r7, [sp], #4 + cmp r10, #24 + beq outer_loop8_residual + +end_loops1 + b end_func + +outer_loop4_residual + sub r12, r0, #3 ;pu1_src - 3 + mov r1, r7 + add r1, #8 + mov r10, #4 + add r12, #8 + mov r14, #16 + add r8, #4 + add r9, #4 + +outer_loop_4 + add r6, r1, r3 ;pu1_dst + dst_strd + add r4, r12, r2 ;pu1_src + src_strd + subs r5, r10, #0 ;checks wd + ble end_inner_loop_4 + +inner_loop_4 + vld1.u32 {d0}, [r12], r11 ;vector load pu1_src + vld1.u32 {d1}, [r12], r11 + vld1.u32 {d2}, [r12], r11 + vld1.u32 {d3}, [r12], r11 + vld1.u32 {d4}, [r12], r11 + vld1.u32 {d5}, [r12], r11 + vld1.u32 {d6}, [r12], r11 + vld1.u32 {d7}, [r12], r11 + sub r12, r12, #4 + vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd + vld1.u32 {d13}, [r4], r11 + vzip.32 d0, d12 ;vector zip the i iteration and ii + ; interation in single register + vld1.u32 {d14}, [r4], r11 + vzip.32 d1, d13 + vld1.u32 {d15}, [r4], r11 + vzip.32 d2, d14 + vld1.u32 {d16}, [r4], r11 + vzip.32 d3, d15 + vld1.u32 {d17}, [r4], r11 + vzip.32 d4, d16 + vld1.u32 {d18}, [r4], r11 + vzip.32 d5, d17 + vld1.u32 {d19}, [r4], r11 + mov r7, #0xc000 + vdup.16 q4, r7 + sub r4, r4, #4 + vzip.32 d6, d18 + vzip.32 d7, d19 + vmlal.u8 q4, d1, d25 ;arithmetic operations for ii + ; iteration in the same time + vmlsl.u8 q4, d0, d24 + vmlsl.u8 q4, d2, d26 + vmlal.u8 q4, d3, d27 + vmlal.u8 q4, d4, d28 + vmlsl.u8 q4, d5, d29 + vmlal.u8 q4, d6, d30 + vmlsl.u8 q4, d7, d31 + mov r7, #0x4000 + vdup.16 q10, r7 + vhadd.s16 q4, q4, q10 + vqrshrun.s16 d8, q4, #6 + vst1.32 {d8[0]},[r1]! ;store the i iteration result which + ; is in upper part of the register + vst1.32 {d8[1]},[r6]! ;store the ii iteration result which + ; is in lower part of the register + subs r5, r5, #4 ;decrement the wd by 4 + bgt inner_loop_4 + +end_inner_loop_4 + subs r14, r14, #2 ;decrement the ht by 4 + add r12, r12, r9 ;increment the input pointer + ; 2*src_strd-wd + add r1, r1, r8 ;increment the output pointer + ; 2*dst_strd-wd + bgt outer_loop_4 + +end_func + vpop {d8 - d15} + ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp + + ENDP + + END diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon.h b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon.h index c1634ed55f6b..4f27da9d2f4e 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon.h +++ b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon.h @@ -8,6 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ +#ifndef VPX_VPX_DSP_ARM_VPX_CONVOLVE8_NEON_H_ +#define VPX_VPX_DSP_ARM_VPX_CONVOLVE8_NEON_H_ + #include #include "./vpx_config.h" @@ -131,3 +134,5 @@ static INLINE uint8x8_t scale_filter_8(const uint8x8_t *const s, return convolve8_8(ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], filters, filter3, filter4); } + +#endif // VPX_VPX_DSP_ARM_VPX_CONVOLVE8_NEON_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.asm b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.asm deleted file mode 100644 index 5eee15664de9..000000000000 --- a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.asm +++ /dev/null @@ -1,273 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - ; These functions are only valid when: - ; x_step_q4 == 16 - ; w%4 == 0 - ; h%4 == 0 - ; taps == 8 - ; VP9_FILTER_WEIGHT == 128 - ; VP9_FILTER_SHIFT == 7 - - EXPORT |vpx_convolve8_horiz_neon| - EXPORT |vpx_convolve8_vert_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 - - ; Multiply and accumulate by q0 - MACRO - MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7 - vmull.s16 $dst, $src0, d0[0] - vmlal.s16 $dst, $src1, d0[1] - vmlal.s16 $dst, $src2, d0[2] - vmlal.s16 $dst, $src3, d0[3] - vmlal.s16 $dst, $src4, d1[0] - vmlal.s16 $dst, $src5, d1[1] - vmlal.s16 $dst, $src6, d1[2] - vmlal.s16 $dst, $src7, d1[3] - MEND - -; r0 const uint8_t *src -; r1 int src_stride -; r2 uint8_t *dst -; r3 int dst_stride -; sp[]const int16_t *filter -; sp[]int x0_q4 -; sp[]int x_step_q4 ; unused -; sp[]int y0_q4 -; sp[]int y_step_q4 ; unused -; sp[]int w -; sp[]int h - -|vpx_convolve8_horiz_neon| PROC - push {r4-r10, lr} - - sub r0, r0, #3 ; adjust for taps - - ldrd r4, r5, [sp, #32] ; filter, x0_q4 - add r4, r5, lsl #4 - ldrd r6, r7, [sp, #52] ; w, h - - vld1.s16 {q0}, [r4] ; filter - - sub r8, r1, r1, lsl #2 ; -src_stride * 3 - add r8, r8, #4 ; -src_stride * 3 + 4 - - sub r4, r3, r3, lsl #2 ; -dst_stride * 3 - add r4, r4, #4 ; -dst_stride * 3 + 4 - - rsb r9, r6, r1, lsl #2 ; reset src for outer loop - sub r9, r9, #7 - rsb r12, r6, r3, lsl #2 ; reset dst for outer loop - - mov r10, r6 ; w loop counter - -vpx_convolve8_loop_horiz_v - vld1.8 {d24}, [r0], r1 - vld1.8 {d25}, [r0], r1 - vld1.8 {d26}, [r0], r1 - vld1.8 {d27}, [r0], r8 - - vtrn.16 q12, q13 - vtrn.8 d24, d25 - vtrn.8 d26, d27 - - pld [r0, r1, lsl #2] - - vmovl.u8 q8, d24 - vmovl.u8 q9, d25 - vmovl.u8 q10, d26 - vmovl.u8 q11, d27 - - ; save a few instructions in the inner loop - vswp d17, d18 - vmov d23, d21 - - add r0, r0, #3 - -vpx_convolve8_loop_horiz - add r5, r0, #64 - - vld1.32 {d28[]}, [r0], r1 - vld1.32 {d29[]}, [r0], r1 - vld1.32 {d31[]}, [r0], r1 - vld1.32 {d30[]}, [r0], r8 - - pld [r5] - - vtrn.16 d28, d31 - vtrn.16 d29, d30 - vtrn.8 d28, d29 - vtrn.8 d31, d30 - - pld [r5, r1] - - ; extract to s16 - vtrn.32 q14, q15 - vmovl.u8 q12, d28 - vmovl.u8 q13, d29 - - pld [r5, r1, lsl #1] - - ; src[] * filter - MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24 - MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26 - MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27 - MULTIPLY_BY_Q0 q15, d22, d18, d19, d23, d24, d26, d27, d25 - - pld [r5, -r8] - - ; += 64 >> 7 - vqrshrun.s32 d2, q1, #7 - vqrshrun.s32 d3, q2, #7 - vqrshrun.s32 d4, q14, #7 - vqrshrun.s32 d5, q15, #7 - - ; saturate - vqmovn.u16 d2, q1 - vqmovn.u16 d3, q2 - - ; transpose - vtrn.16 d2, d3 - vtrn.32 d2, d3 - vtrn.8 d2, d3 - - vst1.u32 {d2[0]}, [r2@32], r3 - vst1.u32 {d3[0]}, [r2@32], r3 - vst1.u32 {d2[1]}, [r2@32], r3 - vst1.u32 {d3[1]}, [r2@32], r4 - - vmov q8, q9 - vmov d20, d23 - vmov q11, q12 - vmov q9, q13 - - subs r6, r6, #4 ; w -= 4 - bgt vpx_convolve8_loop_horiz - - ; outer loop - mov r6, r10 ; restore w counter - add r0, r0, r9 ; src += src_stride * 4 - w - add r2, r2, r12 ; dst += dst_stride * 4 - w - subs r7, r7, #4 ; h -= 4 - bgt vpx_convolve8_loop_horiz_v - - pop {r4-r10, pc} - - ENDP - -|vpx_convolve8_vert_neon| PROC - push {r4-r8, lr} - - ; adjust for taps - sub r0, r0, r1 - sub r0, r0, r1, lsl #1 - - ldr r4, [sp, #24] ; filter - ldr r5, [sp, #36] ; y0_q4 - add r4, r5, lsl #4 - ldr r6, [sp, #44] ; w - ldr lr, [sp, #48] ; h - - vld1.s16 {q0}, [r4] ; filter - - lsl r1, r1, #1 - lsl r3, r3, #1 - -vpx_convolve8_loop_vert_h - mov r4, r0 - add r7, r0, r1, asr #1 - mov r5, r2 - add r8, r2, r3, asr #1 - mov r12, lr ; h loop counter - - vld1.u32 {d16[0]}, [r4], r1 - vld1.u32 {d16[1]}, [r7], r1 - vld1.u32 {d18[0]}, [r4], r1 - vld1.u32 {d18[1]}, [r7], r1 - vld1.u32 {d20[0]}, [r4], r1 - vld1.u32 {d20[1]}, [r7], r1 - vld1.u32 {d22[0]}, [r4], r1 - - vmovl.u8 q8, d16 - vmovl.u8 q9, d18 - vmovl.u8 q10, d20 - vmovl.u8 q11, d22 - -vpx_convolve8_loop_vert - ; always process a 4x4 block at a time - vld1.u32 {d24[0]}, [r7], r1 - vld1.u32 {d26[0]}, [r4], r1 - vld1.u32 {d26[1]}, [r7], r1 - vld1.u32 {d24[1]}, [r4], r1 - - ; extract to s16 - vmovl.u8 q12, d24 - vmovl.u8 q13, d26 - - pld [r5] - pld [r8] - - ; src[] * filter - MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24 - - pld [r5, r3] - pld [r8, r3] - - MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d24, d26 - - pld [r7] - pld [r4] - - MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d24, d26, d27 - - pld [r7, r1] - pld [r4, r1] - - MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d24, d26, d27, d25 - - ; += 64 >> 7 - vqrshrun.s32 d2, q1, #7 - vqrshrun.s32 d3, q2, #7 - vqrshrun.s32 d4, q14, #7 - vqrshrun.s32 d5, q15, #7 - - ; saturate - vqmovn.u16 d2, q1 - vqmovn.u16 d3, q2 - - vst1.u32 {d2[0]}, [r5@32], r3 - vst1.u32 {d2[1]}, [r8@32], r3 - vst1.u32 {d3[0]}, [r5@32], r3 - vst1.u32 {d3[1]}, [r8@32], r3 - - vmov q8, q10 - vmov d18, d22 - vmov d19, d24 - vmov q10, q13 - vmov d22, d25 - - subs r12, r12, #4 ; h -= 4 - bgt vpx_convolve8_loop_vert - - ; outer loop - add r0, r0, #4 - add r2, r2, #4 - subs r6, r6, #4 ; w -= 4 - bgt vpx_convolve8_loop_vert_h - - pop {r4-r8, pc} - - ENDP - END diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.c b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.c new file mode 100644 index 000000000000..4470b28b8845 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_dsp_rtcd.h" +#include "vp9/common/vp9_filter.h" +#include "vpx_dsp/arm/vpx_convolve8_neon_asm.h" + +/* Type1 and Type2 functions are called depending on the position of the + * negative and positive coefficients in the filter. In type1, the filter kernel + * used is sub_pel_filters_8lp, in which only the first two and the last two + * coefficients are negative. In type2, the negative coefficients are 0, 2, 5 & + * 7. + */ + +#define DEFINE_FILTER(dir) \ + void vpx_convolve8_##dir##_neon( \ + const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ + ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ + int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ + if (filter == vp9_filter_kernels[1]) { \ + vpx_convolve8_##dir##_filter_type1_neon( \ + src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, \ + y_step_q4, w, h); \ + } else { \ + vpx_convolve8_##dir##_filter_type2_neon( \ + src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, \ + y_step_q4, w, h); \ + } \ + } + +DEFINE_FILTER(horiz); +DEFINE_FILTER(avg_horiz); +DEFINE_FILTER(vert); +DEFINE_FILTER(avg_vert); diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.h b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.h new file mode 100644 index 000000000000..b123d1cb080c --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_DSP_ARM_VPX_CONVOLVE8_NEON_ASM_H_ +#define VPX_VPX_DSP_ARM_VPX_CONVOLVE8_NEON_ASM_H_ + +#define DECLARE_FILTER(dir, type) \ + void vpx_convolve8_##dir##_filter_##type##_neon( \ + const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ + ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ + int x_step_q4, int y0_q4, int y_step_q4, int w, int h); + +DECLARE_FILTER(horiz, type1); +DECLARE_FILTER(avg_horiz, type1); +DECLARE_FILTER(horiz, type2); +DECLARE_FILTER(avg_horiz, type2); +DECLARE_FILTER(vert, type1); +DECLARE_FILTER(avg_vert, type1); +DECLARE_FILTER(vert, type2); +DECLARE_FILTER(avg_vert, type2); + +#endif // VPX_VPX_DSP_ARM_VPX_CONVOLVE8_NEON_ASM_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_vert_filter_type1_neon.asm b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_vert_filter_type1_neon.asm new file mode 100644 index 000000000000..2666d4253e44 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_vert_filter_type1_neon.asm @@ -0,0 +1,457 @@ +; +; Copyright (c) 2018 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; +;**************Variables Vs Registers*********************************** +; r0 => src +; r1 => dst +; r2 => src_stride +; r6 => dst_stride +; r12 => filter_y0 +; r5 => ht +; r3 => wd + + EXPORT |vpx_convolve8_vert_filter_type1_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +|vpx_convolve8_vert_filter_type1_neon| PROC + + stmfd sp!, {r4 - r12, r14} ;stack stores the values of + ; the arguments + vpush {d8 - d15} ; stack offset by 64 + mov r4, r1 + mov r1, r2 + mov r2, r4 + vmov.i16 q15, #0x4000 + mov r11, #0xc000 + ldr r12, [sp, #104] ;load filter + ldr r6, [sp, #116] ;load y0_q4 + add r12, r12, r6, lsl #4 ;r12 = filter[y0_q4] + mov r6, r3 + ldr r5, [sp, #124] ;load wd + vld2.8 {d0, d1}, [r12] ;coeff = vld1_s8(pi1_coeff) + sub r12, r2, r2, lsl #2 ;src_ctrd & pi1_coeff + vabs.s8 d0, d0 ;vabs_s8(coeff) + add r0, r0, r12 ;r0->pu1_src r12->pi1_coeff + ldr r3, [sp, #128] ;load ht + subs r7, r3, #0 ;r3->ht + vdup.u8 d22, d0[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, + ; 0); + cmp r5, #8 + vdup.u8 d23, d0[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, + ; 1); + vdup.u8 d24, d0[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, + ; 2); + vdup.u8 d25, d0[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, + ; 3); + vdup.u8 d26, d0[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, + ; 4); + vdup.u8 d27, d0[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, + ; 5); + vdup.u8 d28, d0[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, + ; 6); + vdup.u8 d29, d0[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, + ; 7); + blt core_loop_wd_4 ;core loop wd 4 jump + + str r0, [sp, #-4]! + str r1, [sp, #-4]! + bic r4, r5, #7 ;r5 ->wd + rsb r9, r4, r6, lsl #2 ;r6->dst_strd r5 ->wd + rsb r8, r4, r2, lsl #2 ;r2->src_strd + mov r3, r5, lsr #3 ;divide by 8 + mul r7, r3 ;multiply height by width + sub r7, #4 ;subtract by one for epilog + +prolog + and r10, r0, #31 + add r3, r0, r2 ;pu1_src_tmp += src_strd; + vdup.16 q4, r11 + vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); + subs r4, r4, #8 + vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, + ; coeffabs_1); + vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp1, coeffabs_0); + vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp3, coeffabs_2); + vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp4, coeffabs_3); + vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp1, coeffabs_4); + vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp2, coeffabs_5); + vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp3, coeffabs_6); + vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp4, coeffabs_7); + vdup.16 q5, r11 + vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, + ; coeffabs_1); + addle r0, r0, r8 + vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp2, coeffabs_0); + bicle r4, r5, #7 ;r5 ->wd + vmlal.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp4, coeffabs_2); + pld [r3] + vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp1, coeffabs_3); + vhadd.s16 q4, q4, q15 + vdup.16 q6, r11 + pld [r3, r2] + vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp2, coeffabs_4); + pld [r3, r2, lsl #1] + vmlal.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp3, coeffabs_5); + add r3, r3, r2 + vmlsl.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp4, coeffabs_6); + pld [r3, r2, lsl #1] + vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp1, coeffabs_7); + add r3, r0, r2 ;pu1_src_tmp += src_strd; + vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u8 {d1}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q6, d3, d23 + vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q6, d2, d22 + vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q6, d4, d24 + vhadd.s16 q5, q5, q15 + vdup.16 q7, r11 + vmlal.u8 q6, d5, d25 + vmlal.u8 q6, d6, d26 + vmlal.u8 q6, d7, d27 + vmlsl.u8 q6, d16, d28 + vmlsl.u8 q6, d17, d29 + add r14, r1, r6 + vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); + vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + addle r1, r1, r9 + vmlsl.u8 q7, d4, d23 + subs r7, r7, #4 + vmlsl.u8 q7, d3, d22 + vmlal.u8 q7, d5, d24 + vmlal.u8 q7, d6, d25 + vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vhadd.s16 q6, q6, q15 + vdup.16 q4, r11 + vmlal.u8 q7, d7, d26 + vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlal.u8 q7, d16, d27 + vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d17, d28 + vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d18, d29 + vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); + vqrshrun.s16 d12, q6, #6 + blt epilog_end ;jumps to epilog_end + + beq epilog ;jumps to epilog + +main_loop_8 + subs r4, r4, #8 + vmlsl.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, + ; coeffabs_1); + addle r0, r0, r8 + vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp1, coeffabs_0); + bicle r4, r5, #7 ;r5 ->wd + vmlal.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp3, coeffabs_2); + vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp4, coeffabs_3); + vhadd.s16 q7, q7, q15 + vdup.16 q5, r11 + vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp1, coeffabs_4); + vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp2, coeffabs_5); + vmlsl.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp3, coeffabs_6); + vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp4, coeffabs_7); + vst1.8 {d12}, [r14], r6 + vqrshrun.s16 d14, q7, #6 + add r3, r0, r2 ;pu1_src_tmp += src_strd; + vmlsl.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, + ; coeffabs_1); + vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp2, coeffabs_0); + vmlal.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp4, coeffabs_2); + vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp1, coeffabs_3); + vhadd.s16 q4, q4, q15 + vdup.16 q6, r11 + vst1.8 {d14}, [r14], r6 + vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp2, coeffabs_4); + add r14, r1, #0 + vmlal.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp3, coeffabs_5); + add r1, r1, #8 + vmlsl.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp4, coeffabs_6); + vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp1, coeffabs_7); + addle r1, r1, r9 + vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vmlsl.u8 q6, d3, d23 + add r10, r3, r2, lsl #3 ; 10*strd - 8+2 + vmlsl.u8 q6, d2, d22 + add r10, r10, r2 ; 11*strd + vmlal.u8 q6, d4, d24 + vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q6, d5, d25 + vhadd.s16 q5, q5, q15 + vdup.16 q7, r11 + vmlal.u8 q6, d6, d26 + vst1.8 {d8}, [r14], r6 ;vst1_u8(pu1_dst,sto_res); + pld [r10] ;11+ 0 + vmlal.u8 q6, d7, d27 + pld [r10, r2] ;11+ 1*strd + vmlsl.u8 q6, d16, d28 + pld [r10, r2, lsl #1] ;11+ 2*strd + vmlsl.u8 q6, d17, d29 + add r10, r10, r2 ;12*strd + vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + pld [r10, r2, lsl #1] ;11+ 3*strd + vmlsl.u8 q7, d4, d23 + vmlsl.u8 q7, d3, d22 + subs r7, r7, #4 + vmlal.u8 q7, d5, d24 + vmlal.u8 q7, d6, d25 + vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vhadd.s16 q6, q6, q15 + vdup.16 q4, r11 + vmlal.u8 q7, d7, d26 + vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlal.u8 q7, d16, d27 + vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d17, d28 + vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d18, d29 + vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vqrshrun.s16 d12, q6, #6 + vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); + bgt main_loop_8 ;jumps to main_loop_8 + +epilog + vmlsl.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, + ; coeffabs_1); + vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp1, coeffabs_0); + vmlal.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp3, coeffabs_2); + vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp4, coeffabs_3); + vhadd.s16 q7, q7, q15 + vdup.16 q5, r11 + vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp1, coeffabs_4); + vmlal.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp2, coeffabs_5); + vmlsl.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp3, coeffabs_6); + vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp4, coeffabs_7); + vst1.8 {d12}, [r14], r6 + vqrshrun.s16 d14, q7, #6 + vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, + ; coeffabs_1); + vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp2, coeffabs_0); + vmlal.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp4, coeffabs_2); + vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp1, coeffabs_3); + vhadd.s16 q4, q4, q15 + vdup.16 q6, r11 + vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp2, coeffabs_4); + vmlal.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp3, coeffabs_5); + vmlsl.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp4, coeffabs_6); + vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp1, coeffabs_7); + vst1.8 {d14}, [r14], r6 + vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q6, d3, d23 + vmlsl.u8 q6, d2, d22 + vmlal.u8 q6, d4, d24 + vmlal.u8 q6, d5, d25 + vhadd.s16 q5, q5, q15 + vdup.16 q7, r11 + vmlal.u8 q6, d6, d26 + vmlal.u8 q6, d7, d27 + vmlsl.u8 q6, d16, d28 + vmlsl.u8 q6, d17, d29 + add r14, r1, r6 + vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); + vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d4, d23 + vmlsl.u8 q7, d3, d22 + vmlal.u8 q7, d5, d24 + vmlal.u8 q7, d6, d25 + vhadd.s16 q6, q6, q15 + vmlal.u8 q7, d7, d26 + vmlal.u8 q7, d16, d27 + vmlsl.u8 q7, d17, d28 + vmlsl.u8 q7, d18, d29 + vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); + vqrshrun.s16 d12, q6, #6 + +epilog_end + vst1.8 {d12}, [r14], r6 + vhadd.s16 q7, q7, q15 + vqrshrun.s16 d14, q7, #6 + vst1.8 {d14}, [r14], r6 + +end_loops + tst r5, #7 + ldr r1, [sp], #4 + ldr r0, [sp], #4 + vpopeq {d8 - d15} + ldmfdeq sp!, {r4 - r12, r15} ;reload the registers from + ; sp + mov r5, #4 + add r0, r0, #8 + add r1, r1, #8 + mov r7, #16 + +core_loop_wd_4 + rsb r9, r5, r6, lsl #2 ;r6->dst_strd r5 ->wd + rsb r8, r5, r2, lsl #2 ;r2->src_strd + vmov.i8 d4, #0 + +outer_loop_wd_4 + subs r12, r5, #0 + ble end_inner_loop_wd_4 ;outer loop jump + +inner_loop_wd_4 + add r3, r0, r2 + vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp1, 1); + subs r12, r12, #4 + vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, + ; 1); + vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp2, 1); + vld1.u32 {d4[0]},[r0] ;src_tmp1 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp1, 0); + vdup.16 q0, r11 + vmlsl.u8 q0, d5, d23 ;mul_res1 = + ; vmull_u8(vreinterpret_u8_u32(src_tmp2), coeffabs_1); + + vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, + ; 1); + add r0, r0, #4 + vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp3, 1); + vmlsl.u8 q0, d4, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp1), coeffabs_0); + vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, + ; 1); + vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp4, 1); + vmlal.u8 q0, d6, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp3), coeffabs_2); + vdup.16 q4, r11 + vmlsl.u8 q4, d7, d23 + vdup.u32 d4, d7[1] ;src_tmp1 = vdup_lane_u32(src_tmp4, + ; 1); + vmull.u8 q1, d7, d25 ;mul_res2 = + ; vmull_u8(vreinterpret_u8_u32(src_tmp4), coeffabs_3); + vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp1, 1); + vmlsl.u8 q4, d6, d22 + vmlal.u8 q0, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp1), coeffabs_4); + vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, + ; 1); + vmlal.u8 q4, d4, d24 + vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp2, 1); + vmlal.u8 q1, d5, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; vreinterpret_u8_u32(src_tmp2), coeffabs_5); + vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, + ; 1); + vmlal.u8 q4, d5, d25 + vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp3, 1); + vmlsl.u8 q0, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp3), coeffabs_6); + vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, + ; 1); + vmlal.u8 q4, d6, d26 + vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp4, 1); + vmlsl.u8 q1, d7, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; vreinterpret_u8_u32(src_tmp4), coeffabs_7); + vdup.u32 d4, d7[1] + vadd.i16 q0, q0, q1 ;mul_res1 = vaddq_u16(mul_res1, + ; mul_res2); + vmlal.u8 q4, d7, d27 + vld1.u32 {d4[1]},[r3], r2 + vmlsl.u8 q4, d4, d28 + vdup.u32 d5, d4[1] + vhadd.s16 q0, q0, q15 + vqrshrun.s16 d0, q0, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u32 {d5[1]},[r3] + add r3, r1, r6 + vst1.32 {d0[0]},[r1] ;vst1_lane_u32((uint32_t *)pu1_dst, + ; vreinterpret_u32_u8(sto_res), 0); + vmlsl.u8 q4, d5, d29 + vst1.32 {d0[1]},[r3], r6 ;vst1_lane_u32((uint32_t + ; *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 1); + vhadd.s16 q4, q4, q15 + vqrshrun.s16 d8, q4, #6 + vst1.32 {d8[0]},[r3], r6 + add r1, r1, #4 + vst1.32 {d8[1]},[r3] + bgt inner_loop_wd_4 + +end_inner_loop_wd_4 + subs r7, r7, #4 + add r1, r1, r9 + add r0, r0, r8 + bgt outer_loop_wd_4 + + vpop {d8 - d15} + ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp + + ENDP + + END diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_vert_filter_type2_neon.asm b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_vert_filter_type2_neon.asm new file mode 100644 index 000000000000..cb5d6d3fe5cc --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve8_vert_filter_type2_neon.asm @@ -0,0 +1,455 @@ +; +; Copyright (c) 2018 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; +;**************Variables Vs Registers*********************************** +; r0 => src +; r1 => dst +; r2 => src_stride +; r6 => dst_stride +; r12 => filter_y0 +; r5 => ht +; r3 => wd + + EXPORT |vpx_convolve8_vert_filter_type2_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +|vpx_convolve8_vert_filter_type2_neon| PROC + + stmfd sp!, {r4 - r12, r14} ;stack stores the values of + ; the arguments + vpush {d8 - d15} ; stack offset by 64 + mov r4, r1 + mov r1, r2 + mov r2, r4 + vmov.i16 q15, #0x4000 + mov r11, #0xc000 + ldr r12, [sp, #104] ;load filter + ldr r6, [sp, #116] ;load y0_q4 + add r12, r12, r6, lsl #4 ;r12 = filter[y0_q4] + mov r6, r3 + ldr r5, [sp, #124] ;load wd + vld2.8 {d0, d1}, [r12] ;coeff = vld1_s8(pi1_coeff) + sub r12, r2, r2, lsl #2 ;src_ctrd & pi1_coeff + vabs.s8 d0, d0 ;vabs_s8(coeff) + add r0, r0, r12 ;r0->pu1_src r12->pi1_coeff + ldr r3, [sp, #128] ;load ht + subs r7, r3, #0 ;r3->ht + vdup.u8 d22, d0[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, + ; 0); + cmp r5, #8 + vdup.u8 d23, d0[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, + ; 1); + vdup.u8 d24, d0[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, + ; 2); + vdup.u8 d25, d0[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, + ; 3); + vdup.u8 d26, d0[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, + ; 4); + vdup.u8 d27, d0[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, + ; 5); + vdup.u8 d28, d0[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, + ; 6); + vdup.u8 d29, d0[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, + ; 7); + blt core_loop_wd_4 ;core loop wd 4 jump + + str r0, [sp, #-4]! + str r1, [sp, #-4]! + bic r4, r5, #7 ;r5 ->wd + rsb r9, r4, r6, lsl #2 ;r6->dst_strd r5 ->wd + rsb r8, r4, r2, lsl #2 ;r2->src_strd + mov r3, r5, lsr #3 ;divide by 8 + mul r7, r3 ;multiply height by width + sub r7, #4 ;subtract by one for epilog + +prolog + and r10, r0, #31 + add r3, r0, r2 ;pu1_src_tmp += src_strd; + vdup.16 q4, r11 + vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); + subs r4, r4, #8 + vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, + ; coeffabs_1); + vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp1, coeffabs_0); + vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp3, coeffabs_2); + vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp4, coeffabs_3); + vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp1, coeffabs_4); + vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp2, coeffabs_5); + vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp3, coeffabs_6); + vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp4, coeffabs_7); + vdup.16 q5, r11 + vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, + ; coeffabs_1); + addle r0, r0, r8 + vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp2, coeffabs_0); + bicle r4, r5, #7 ;r5 ->wd + vmlsl.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp4, coeffabs_2); + pld [r3] + vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp1, coeffabs_3); + vhadd.s16 q4, q4, q15 + vdup.16 q6, r11 + pld [r3, r2] + vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp2, coeffabs_4); + pld [r3, r2, lsl #1] + vmlsl.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp3, coeffabs_5); + add r3, r3, r2 + vmlal.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp4, coeffabs_6); + pld [r3, r2, lsl #1] + vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp1, coeffabs_7); + add r3, r0, r2 ;pu1_src_tmp += src_strd; + vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + + vld1.u8 {d1}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q6, d3, d23 + vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q6, d2, d22 + vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q6, d4, d24 + vhadd.s16 q5, q5, q15 + vdup.16 q7, r11 + vmlal.u8 q6, d5, d25 + vmlal.u8 q6, d6, d26 + vmlsl.u8 q6, d7, d27 + vmlal.u8 q6, d16, d28 + vmlsl.u8 q6, d17, d29 + add r14, r1, r6 + vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); + vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + addle r1, r1, r9 + vmlal.u8 q7, d4, d23 + subs r7, r7, #4 + vmlsl.u8 q7, d3, d22 + vmlsl.u8 q7, d5, d24 + vmlal.u8 q7, d6, d25 + vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vhadd.s16 q6, q6, q15 + vdup.16 q4, r11 + vmlal.u8 q7, d7, d26 + vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d16, d27 + vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q7, d17, d28 + vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d18, d29 + vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); + vqrshrun.s16 d12, q6, #6 + blt epilog_end ;jumps to epilog_end + + beq epilog ;jumps to epilog + +main_loop_8 + subs r4, r4, #8 + vmlal.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, + ; coeffabs_1); + addle r0, r0, r8 + vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp1, coeffabs_0); + bicle r4, r5, #7 ;r5 ->wd + vmlsl.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp3, coeffabs_2); + vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp4, coeffabs_3); + vhadd.s16 q7, q7, q15 + vdup.16 q5, r11 + vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp1, coeffabs_4); + vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp2, coeffabs_5); + vmlal.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp3, coeffabs_6); + vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp4, coeffabs_7); + vst1.8 {d12}, [r14], r6 + vqrshrun.s16 d14, q7, #6 + add r3, r0, r2 ;pu1_src_tmp += src_strd; + vmlal.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, + ; coeffabs_1); + vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp2, coeffabs_0); + vmlsl.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp4, coeffabs_2); + vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp1, coeffabs_3); + vhadd.s16 q4, q4, q15 + vdup.16 q6, r11 + vst1.8 {d14}, [r14], r6 + vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp2, coeffabs_4); + add r14, r1, #0 + vmlsl.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp3, coeffabs_5); + add r1, r1, #8 + vmlal.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp4, coeffabs_6); + vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp1, coeffabs_7); + addle r1, r1, r9 + vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vmlal.u8 q6, d3, d23 + add r10, r3, r2, lsl #3 ; 10*strd - 8+2 + vmlsl.u8 q6, d2, d22 + add r10, r10, r2 ; 11*strd + vmlsl.u8 q6, d4, d24 + vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q6, d5, d25 + vhadd.s16 q5, q5, q15 + vdup.16 q7, r11 + vmlal.u8 q6, d6, d26 + vst1.8 {d8}, [r14], r6 ;vst1_u8(pu1_dst,sto_res); + pld [r10] ;11+ 0 + vmlsl.u8 q6, d7, d27 + pld [r10, r2] ;11+ 1*strd + vmlal.u8 q6, d16, d28 + pld [r10, r2, lsl #1] ;11+ 2*strd + vmlsl.u8 q6, d17, d29 + add r10, r10, r2 ;12*strd + vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + pld [r10, r2, lsl #1] ;11+ 3*strd + vmlal.u8 q7, d4, d23 + vmlsl.u8 q7, d3, d22 + subs r7, r7, #4 + vmlsl.u8 q7, d5, d24 + vmlal.u8 q7, d6, d25 + vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vhadd.s16 q6, q6, q15 + vdup.16 q4, r11 + vmlal.u8 q7, d7, d26 + vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d16, d27 + vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q7, d17, d28 + vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlsl.u8 q7, d18, d29 + vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); + vqrshrun.s16 d12, q6, #6 + vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); + bgt main_loop_8 ;jumps to main_loop_8 + +epilog + vmlal.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, + vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp1, coeffabs_0); + vmlsl.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp3, coeffabs_2); + vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp4, coeffabs_3); + vhadd.s16 q7, q7, q15 + vdup.16 q5, r11 + vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp1, coeffabs_4); + vmlsl.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp2, coeffabs_5); + vmlal.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; src_tmp3, coeffabs_6); + vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, + ; src_tmp4, coeffabs_7); + vst1.8 {d12}, [r14], r6 + vqrshrun.s16 d14, q7, #6 + vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); + vmlal.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, + ; coeffabs_1); + vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp2, coeffabs_0); + vmlsl.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp4, coeffabs_2); + vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp1, coeffabs_3); + vhadd.s16 q4, q4, q15 + vdup.16 q6, r11 + vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp2, coeffabs_4); + vmlsl.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp3, coeffabs_5); + vmlal.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, + ; src_tmp4, coeffabs_6); + vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; src_tmp1, coeffabs_7); + vst1.8 {d14}, [r14], r6 + vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); + vmlal.u8 q6, d3, d23 + vmlsl.u8 q6, d2, d22 + vmlsl.u8 q6, d4, d24 + vmlal.u8 q6, d5, d25 + vhadd.s16 q5, q5, q15 + vdup.16 q7, r11 + vmlal.u8 q6, d6, d26 + vmlsl.u8 q6, d7, d27 + vmlal.u8 q6, d16, d28 + vmlsl.u8 q6, d17, d29 + add r14, r1, r6 + vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); + vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); + vmlal.u8 q7, d4, d23 + vmlsl.u8 q7, d3, d22 + vmlsl.u8 q7, d5, d24 + vmlal.u8 q7, d6, d25 + vhadd.s16 q6, q6, q15 + vmlal.u8 q7, d7, d26 + vmlsl.u8 q7, d16, d27 + vmlal.u8 q7, d17, d28 + vmlsl.u8 q7, d18, d29 + vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); + vqrshrun.s16 d12, q6, #6 + +epilog_end + vst1.8 {d12}, [r14], r6 + vhadd.s16 q7, q7, q15 + vqrshrun.s16 d14, q7, #6 + vst1.8 {d14}, [r14], r6 + +end_loops + tst r5, #7 + ldr r1, [sp], #4 + ldr r0, [sp], #4 + vpopeq {d8 - d15} + ldmfdeq sp!, {r4 - r12, r15} ;reload the registers from sp + mov r5, #4 + add r0, r0, #8 + add r1, r1, #8 + mov r7, #16 + +core_loop_wd_4 + rsb r9, r5, r6, lsl #2 ;r6->dst_strd r5 ->wd + rsb r8, r5, r2, lsl #2 ;r2->src_strd + vmov.i8 d4, #0 + +outer_loop_wd_4 + subs r12, r5, #0 + ble end_inner_loop_wd_4 ;outer loop jump + +inner_loop_wd_4 + add r3, r0, r2 + vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp1, 1); + subs r12, r12, #4 + vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, + ; 1); + vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp2, 1); + vld1.u32 {d4[0]},[r0] ;src_tmp1 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp1, 0); + vdup.16 q0, r11 + vmlal.u8 q0, d5, d23 ;mul_res1 = + ; vmull_u8(vreinterpret_u8_u32(src_tmp2), coeffabs_1); + vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, + ; 1); + add r0, r0, #4 + vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp3, 1); + vmlsl.u8 q0, d4, d22 ;mul_res1 = vmlsl_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp1), coeffabs_0); + vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, + ; 1); + vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp4, 1); + vmlsl.u8 q0, d6, d24 ;mul_res1 = vmlsl_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp3), coeffabs_2); + vdup.16 q4, r11 + vmlal.u8 q4, d7, d23 + vdup.u32 d4, d7[1] ;src_tmp1 = vdup_lane_u32(src_tmp4, + ; 1); + vmull.u8 q1, d7, d25 ;mul_res2 = + ; vmull_u8(vreinterpret_u8_u32(src_tmp4), coeffabs_3); + vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp1, 1); + vmlsl.u8 q4, d6, d22 + vmlal.u8 q0, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp1), coeffabs_4); + vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, + ; 1); + vmlsl.u8 q4, d4, d24 + vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp2, 1); + vmlsl.u8 q1, d5, d27 ;mul_res2 = vmlsl_u8(mul_res2, + ; vreinterpret_u8_u32(src_tmp2), coeffabs_5); + vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, + ; 1); + vmlal.u8 q4, d5, d25 + vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp3, 1); + vmlal.u8 q0, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, + ; vreinterpret_u8_u32(src_tmp3), coeffabs_6); + vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, + ; 1); + vmlal.u8 q4, d6, d26 + vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t + ; *)pu1_src_tmp, src_tmp4, 1); + vmlsl.u8 q1, d7, d29 ;mul_res2 = vmlsl_u8(mul_res2, + ; vreinterpret_u8_u32(src_tmp4), coeffabs_7); + vdup.u32 d4, d7[1] + vadd.i16 q0, q0, q1 ;mul_res1 = vaddq_u16(mul_res1, + ; mul_res2); + vmlsl.u8 q4, d7, d27 + vld1.u32 {d4[1]},[r3], r2 + vmlal.u8 q4, d4, d28 + vdup.u32 d5, d4[1] + vhadd.s16 q0, q0, q15 + vqrshrun.s16 d0, q0, #6 ;sto_res = vqmovun_s16(sto_res_tmp); + vld1.u32 {d5[1]},[r3] + add r3, r1, r6 + vst1.32 {d0[0]},[r1] ;vst1_lane_u32((uint32_t *)pu1_dst, + ; vreinterpret_u32_u8(sto_res), 0); + vmlsl.u8 q4, d5, d29 + vst1.32 {d0[1]},[r3], r6 ;vst1_lane_u32((uint32_t + ; *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 1); + vhadd.s16 q4, q4, q15 + vqrshrun.s16 d8, q4, #6 + vst1.32 {d8[0]},[r3], r6 + add r1, r1, #4 + vst1.32 {d8[1]},[r3] + bgt inner_loop_wd_4 + +end_inner_loop_wd_4 + subs r7, r7, #4 + add r1, r1, r9 + add r0, r0, r8 + bgt outer_loop_wd_4 + + vpop {d8 - d15} + ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp + + ENDP + + END diff --git a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve_neon.c b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve_neon.c index 2bf2d890be09..830f3176d7ff 100644 --- a/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve_neon.c +++ b/media/libvpx/libvpx/vpx_dsp/arm/vpx_convolve_neon.c @@ -24,7 +24,8 @@ void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, uint8_t temp[64 * 72]; // Account for the vertical phase needing 3 lines prior and 4 lines post - const int intermediate_height = h + 7; + // (+ 1 to make it divisible by 4). + const int intermediate_height = h + 8; assert(y_step_q4 == 16); assert(x_step_q4 == 16); @@ -48,7 +49,7 @@ void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { uint8_t temp[64 * 72]; - const int intermediate_height = h + 7; + const int intermediate_height = h + 8; assert(y_step_q4 == 16); assert(x_step_q4 == 16); diff --git a/media/libvpx/libvpx/vpx_dsp/avg.c b/media/libvpx/libvpx/vpx_dsp/avg.c index a7ac6d953889..1c45e8a73ddc 100644 --- a/media/libvpx/libvpx/vpx_dsp/avg.c +++ b/media/libvpx/libvpx/vpx_dsp/avg.c @@ -32,6 +32,166 @@ unsigned int vpx_avg_4x4_c(const uint8_t *s, int p) { return (sum + 8) >> 4; } +#if CONFIG_VP9_HIGHBITDEPTH +// src_diff: 13 bit, dynamic range [-4095, 4095] +// coeff: 16 bit +static void hadamard_highbd_col8_first_pass(const int16_t *src_diff, + ptrdiff_t src_stride, + int16_t *coeff) { + int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride]; + int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride]; + int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride]; + int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride]; + int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride]; + int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride]; + int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride]; + int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride]; + + int16_t c0 = b0 + b2; + int16_t c1 = b1 + b3; + int16_t c2 = b0 - b2; + int16_t c3 = b1 - b3; + int16_t c4 = b4 + b6; + int16_t c5 = b5 + b7; + int16_t c6 = b4 - b6; + int16_t c7 = b5 - b7; + + coeff[0] = c0 + c4; + coeff[7] = c1 + c5; + coeff[3] = c2 + c6; + coeff[4] = c3 + c7; + coeff[2] = c0 - c4; + coeff[6] = c1 - c5; + coeff[1] = c2 - c6; + coeff[5] = c3 - c7; +} + +// src_diff: 16 bit, dynamic range [-32760, 32760] +// coeff: 19 bit +static void hadamard_highbd_col8_second_pass(const int16_t *src_diff, + ptrdiff_t src_stride, + int32_t *coeff) { + int32_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride]; + int32_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride]; + int32_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride]; + int32_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride]; + int32_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride]; + int32_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride]; + int32_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride]; + int32_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride]; + + int32_t c0 = b0 + b2; + int32_t c1 = b1 + b3; + int32_t c2 = b0 - b2; + int32_t c3 = b1 - b3; + int32_t c4 = b4 + b6; + int32_t c5 = b5 + b7; + int32_t c6 = b4 - b6; + int32_t c7 = b5 - b7; + + coeff[0] = c0 + c4; + coeff[7] = c1 + c5; + coeff[3] = c2 + c6; + coeff[4] = c3 + c7; + coeff[2] = c0 - c4; + coeff[6] = c1 - c5; + coeff[1] = c2 - c6; + coeff[5] = c3 - c7; +} + +// The order of the output coeff of the hadamard is not important. For +// optimization purposes the final transpose may be skipped. +void vpx_highbd_hadamard_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride, + tran_low_t *coeff) { + int idx; + int16_t buffer[64]; + int32_t buffer2[64]; + int16_t *tmp_buf = &buffer[0]; + for (idx = 0; idx < 8; ++idx) { + // src_diff: 13 bit + // buffer: 16 bit, dynamic range [-32760, 32760] + hadamard_highbd_col8_first_pass(src_diff, src_stride, tmp_buf); + tmp_buf += 8; + ++src_diff; + } + + tmp_buf = &buffer[0]; + for (idx = 0; idx < 8; ++idx) { + // buffer: 16 bit + // buffer2: 19 bit, dynamic range [-262080, 262080] + hadamard_highbd_col8_second_pass(tmp_buf, 8, buffer2 + 8 * idx); + ++tmp_buf; + } + + for (idx = 0; idx < 64; ++idx) coeff[idx] = (tran_low_t)buffer2[idx]; +} + +// In place 16x16 2D Hadamard transform +void vpx_highbd_hadamard_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride, + tran_low_t *coeff) { + int idx; + for (idx = 0; idx < 4; ++idx) { + // src_diff: 13 bit, dynamic range [-4095, 4095] + const int16_t *src_ptr = + src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; + vpx_highbd_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64); + } + + // coeff: 19 bit, dynamic range [-262080, 262080] + for (idx = 0; idx < 64; ++idx) { + tran_low_t a0 = coeff[0]; + tran_low_t a1 = coeff[64]; + tran_low_t a2 = coeff[128]; + tran_low_t a3 = coeff[192]; + + tran_low_t b0 = (a0 + a1) >> 1; + tran_low_t b1 = (a0 - a1) >> 1; + tran_low_t b2 = (a2 + a3) >> 1; + tran_low_t b3 = (a2 - a3) >> 1; + + // new coeff dynamic range: 20 bit + coeff[0] = b0 + b2; + coeff[64] = b1 + b3; + coeff[128] = b0 - b2; + coeff[192] = b1 - b3; + + ++coeff; + } +} + +void vpx_highbd_hadamard_32x32_c(const int16_t *src_diff, ptrdiff_t src_stride, + tran_low_t *coeff) { + int idx; + for (idx = 0; idx < 4; ++idx) { + // src_diff: 13 bit, dynamic range [-4095, 4095] + const int16_t *src_ptr = + src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16; + vpx_highbd_hadamard_16x16_c(src_ptr, src_stride, coeff + idx * 256); + } + + // coeff: 20 bit + for (idx = 0; idx < 256; ++idx) { + tran_low_t a0 = coeff[0]; + tran_low_t a1 = coeff[256]; + tran_low_t a2 = coeff[512]; + tran_low_t a3 = coeff[768]; + + tran_low_t b0 = (a0 + a1) >> 2; + tran_low_t b1 = (a0 - a1) >> 2; + tran_low_t b2 = (a2 + a3) >> 2; + tran_low_t b3 = (a2 - a3) >> 2; + + // new coeff dynamic range: 20 bit + coeff[0] = b0 + b2; + coeff[256] = b1 + b3; + coeff[512] = b0 - b2; + coeff[768] = b1 - b3; + + ++coeff; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + // src_diff: first pass, 9 bit, dynamic range [-255, 255] // second pass, 12 bit, dynamic range [-2040, 2040] static void hadamard_col8(const int16_t *src_diff, ptrdiff_t src_stride, @@ -123,6 +283,50 @@ void vpx_hadamard_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride, } } +void vpx_hadamard_32x32_c(const int16_t *src_diff, ptrdiff_t src_stride, + tran_low_t *coeff) { + int idx; + for (idx = 0; idx < 4; ++idx) { + // src_diff: 9 bit, dynamic range [-255, 255] + const int16_t *src_ptr = + src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16; + vpx_hadamard_16x16_c(src_ptr, src_stride, coeff + idx * 256); + } + + // coeff: 15 bit, dynamic range [-16320, 16320] + for (idx = 0; idx < 256; ++idx) { + tran_low_t a0 = coeff[0]; + tran_low_t a1 = coeff[256]; + tran_low_t a2 = coeff[512]; + tran_low_t a3 = coeff[768]; + + tran_low_t b0 = (a0 + a1) >> 2; // (a0 + a1): 16 bit, [-32640, 32640] + tran_low_t b1 = (a0 - a1) >> 2; // b0-b3: 15 bit, dynamic range + tran_low_t b2 = (a2 + a3) >> 2; // [-16320, 16320] + tran_low_t b3 = (a2 - a3) >> 2; + + coeff[0] = b0 + b2; // 16 bit, [-32640, 32640] + coeff[256] = b1 + b3; + coeff[512] = b0 - b2; + coeff[768] = b1 - b3; + + ++coeff; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +// coeff: dynamic range 20 bit. +// length: value range {16, 64, 256, 1024}. +int vpx_highbd_satd_c(const tran_low_t *coeff, int length) { + int i; + int satd = 0; + for (i = 0; i < length; ++i) satd += abs(coeff[i]); + + // satd: 30 bits + return satd; +} +#endif // CONFIG_VP9_HIGHBITDEPTH + // coeff: 16 bits, dynamic range [-32640, 32640]. // length: value range {16, 64, 256, 1024}. int vpx_satd_c(const tran_low_t *coeff, int length) { diff --git a/media/libvpx/libvpx/vpx_dsp/bitreader.h b/media/libvpx/libvpx/vpx_dsp/bitreader.h index 6ee2a58632c5..a5927ea2adad 100644 --- a/media/libvpx/libvpx/vpx_dsp/bitreader.h +++ b/media/libvpx/libvpx/vpx_dsp/bitreader.h @@ -8,10 +8,11 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_BITREADER_H_ -#define VPX_DSP_BITREADER_H_ +#ifndef VPX_VPX_DSP_BITREADER_H_ +#define VPX_VPX_DSP_BITREADER_H_ #include +#include #include #include "./vpx_config.h" @@ -19,6 +20,9 @@ #include "vpx/vp8dx.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/prob.h" +#if CONFIG_BITSTREAM_DEBUG +#include "vpx_util/vpx_debug_util.h" +#endif // CONFIG_BITSTREAM_DEBUG #ifdef __cplusplus extern "C" { @@ -94,7 +98,7 @@ static INLINE int vpx_read(vpx_reader *r, int prob) { } { - register int shift = vpx_norm[range]; + const unsigned char shift = vpx_norm[(unsigned char)range]; range <<= shift; value <<= shift; count -= shift; @@ -103,6 +107,31 @@ static INLINE int vpx_read(vpx_reader *r, int prob) { r->count = count; r->range = range; +#if CONFIG_BITSTREAM_DEBUG + { + const int queue_r = bitstream_queue_get_read(); + const int frame_idx = bitstream_queue_get_frame_read(); + int ref_result, ref_prob; + bitstream_queue_pop(&ref_result, &ref_prob); + if ((int)bit != ref_result) { + fprintf(stderr, + "\n *** [bit] result error, frame_idx_r %d bit %d ref_result %d " + "queue_r %d\n", + frame_idx, bit, ref_result, queue_r); + + assert(0); + } + if (prob != ref_prob) { + fprintf(stderr, + "\n *** [bit] prob error, frame_idx_r %d prob %d ref_prob %d " + "queue_r %d\n", + frame_idx, prob, ref_prob, queue_r); + + assert(0); + } + } +#endif + return bit; } @@ -131,4 +160,4 @@ static INLINE int vpx_read_tree(vpx_reader *r, const vpx_tree_index *tree, } // extern "C" #endif -#endif // VPX_DSP_BITREADER_H_ +#endif // VPX_VPX_DSP_BITREADER_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/bitreader_buffer.c b/media/libvpx/libvpx/vpx_dsp/bitreader_buffer.c index 3e16bfa38c24..f59f1f7cb9da 100644 --- a/media/libvpx/libvpx/vpx_dsp/bitreader_buffer.c +++ b/media/libvpx/libvpx/vpx_dsp/bitreader_buffer.c @@ -23,7 +23,7 @@ int vpx_rb_read_bit(struct vpx_read_bit_buffer *rb) { rb->bit_offset = off + 1; return bit; } else { - rb->error_handler(rb->error_handler_data); + if (rb->error_handler != NULL) rb->error_handler(rb->error_handler_data); return 0; } } diff --git a/media/libvpx/libvpx/vpx_dsp/bitreader_buffer.h b/media/libvpx/libvpx/vpx_dsp/bitreader_buffer.h index 8a48a95ed192..b27703a4dbcb 100644 --- a/media/libvpx/libvpx/vpx_dsp/bitreader_buffer.h +++ b/media/libvpx/libvpx/vpx_dsp/bitreader_buffer.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_BITREADER_BUFFER_H_ -#define VPX_DSP_BITREADER_BUFFER_H_ +#ifndef VPX_VPX_DSP_BITREADER_BUFFER_H_ +#define VPX_VPX_DSP_BITREADER_BUFFER_H_ #include @@ -44,4 +44,4 @@ int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb, int bits); } // extern "C" #endif -#endif // VPX_DSP_BITREADER_BUFFER_H_ +#endif // VPX_VPX_DSP_BITREADER_BUFFER_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/bitwriter.c b/media/libvpx/libvpx/vpx_dsp/bitwriter.c index 81e28b309f57..5b41aa54ddce 100644 --- a/media/libvpx/libvpx/vpx_dsp/bitwriter.c +++ b/media/libvpx/libvpx/vpx_dsp/bitwriter.c @@ -12,6 +12,10 @@ #include "./bitwriter.h" +#if CONFIG_BITSTREAM_DEBUG +#include "vpx_util/vpx_debug_util.h" +#endif + void vpx_start_encode(vpx_writer *br, uint8_t *source) { br->lowvalue = 0; br->range = 255; @@ -24,8 +28,15 @@ void vpx_start_encode(vpx_writer *br, uint8_t *source) { void vpx_stop_encode(vpx_writer *br) { int i; +#if CONFIG_BITSTREAM_DEBUG + bitstream_queue_set_skip_write(1); +#endif for (i = 0; i < 32; i++) vpx_write_bit(br, 0); // Ensure there's no ambigous collision with any index marker bytes if ((br->buffer[br->pos - 1] & 0xe0) == 0xc0) br->buffer[br->pos++] = 0; + +#if CONFIG_BITSTREAM_DEBUG + bitstream_queue_set_skip_write(0); +#endif } diff --git a/media/libvpx/libvpx/vpx_dsp/bitwriter.h b/media/libvpx/libvpx/vpx_dsp/bitwriter.h index 41040cf93549..04084af8f25a 100644 --- a/media/libvpx/libvpx/vpx_dsp/bitwriter.h +++ b/media/libvpx/libvpx/vpx_dsp/bitwriter.h @@ -8,12 +8,17 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_BITWRITER_H_ -#define VPX_DSP_BITWRITER_H_ +#ifndef VPX_VPX_DSP_BITWRITER_H_ +#define VPX_VPX_DSP_BITWRITER_H_ + +#include #include "vpx_ports/mem.h" #include "vpx_dsp/prob.h" +#if CONFIG_BITSTREAM_DEBUG +#include "vpx_util/vpx_debug_util.h" +#endif // CONFIG_BITSTREAM_DEBUG #ifdef __cplusplus extern "C" { @@ -27,15 +32,30 @@ typedef struct vpx_writer { uint8_t *buffer; } vpx_writer; -void vpx_start_encode(vpx_writer *bc, uint8_t *buffer); -void vpx_stop_encode(vpx_writer *bc); +void vpx_start_encode(vpx_writer *br, uint8_t *source); +void vpx_stop_encode(vpx_writer *br); static INLINE void vpx_write(vpx_writer *br, int bit, int probability) { unsigned int split; int count = br->count; unsigned int range = br->range; unsigned int lowvalue = br->lowvalue; - register int shift; + int shift; + +#if CONFIG_BITSTREAM_DEBUG + /* + int queue_r = 0; + int frame_idx_r = 0; + int queue_w = bitstream_queue_get_write(); + int frame_idx_w = bitstream_queue_get_frame_write(); + if (frame_idx_w == frame_idx_r && queue_w == queue_r) { + fprintf(stderr, "\n *** bitstream queue at frame_idx_w %d queue_w %d\n", + frame_idx_w, queue_w); + assert(0); + } + */ + bitstream_queue_push(bit, probability); +#endif split = 1 + (((range - 1) * probability) >> 8); @@ -65,7 +85,7 @@ static INLINE void vpx_write(vpx_writer *br, int bit, int probability) { br->buffer[x] += 1; } - br->buffer[br->pos++] = (lowvalue >> (24 - offset)); + br->buffer[br->pos++] = (lowvalue >> (24 - offset)) & 0xff; lowvalue <<= offset; shift = count; lowvalue &= 0xffffff; @@ -94,4 +114,4 @@ static INLINE void vpx_write_literal(vpx_writer *w, int data, int bits) { } // extern "C" #endif -#endif // VPX_DSP_BITWRITER_H_ +#endif // VPX_VPX_DSP_BITWRITER_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/bitwriter_buffer.h b/media/libvpx/libvpx/vpx_dsp/bitwriter_buffer.h index a123a2fe8c94..3662cb64df62 100644 --- a/media/libvpx/libvpx/vpx_dsp/bitwriter_buffer.h +++ b/media/libvpx/libvpx/vpx_dsp/bitwriter_buffer.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_BITWRITER_BUFFER_H_ -#define VPX_DSP_BITWRITER_BUFFER_H_ +#ifndef VPX_VPX_DSP_BITWRITER_BUFFER_H_ +#define VPX_VPX_DSP_BITWRITER_BUFFER_H_ #include "vpx/vpx_integer.h" @@ -35,4 +35,4 @@ void vpx_wb_write_inv_signed_literal(struct vpx_write_bit_buffer *wb, int data, } // extern "C" #endif -#endif // VPX_DSP_BITWRITER_BUFFER_H_ +#endif // VPX_VPX_DSP_BITWRITER_BUFFER_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/deblock.c b/media/libvpx/libvpx/vpx_dsp/deblock.c index 94acbb391958..455b73bbcead 100644 --- a/media/libvpx/libvpx/vpx_dsp/deblock.c +++ b/media/libvpx/libvpx/vpx_dsp/deblock.c @@ -39,11 +39,10 @@ const int16_t vpx_rv[] = { 9, 10, 13, }; -void vpx_post_proc_down_and_across_mb_row_c(unsigned char *src_ptr, - unsigned char *dst_ptr, - int src_pixels_per_line, - int dst_pixels_per_line, int cols, - unsigned char *f, int size) { +void vpx_post_proc_down_and_across_mb_row_c(unsigned char *src, + unsigned char *dst, int src_pitch, + int dst_pitch, int cols, + unsigned char *flimits, int size) { unsigned char *p_src, *p_dst; int row; int col; @@ -55,19 +54,21 @@ void vpx_post_proc_down_and_across_mb_row_c(unsigned char *src_ptr, for (row = 0; row < size; row++) { /* post_proc_down for one row */ - p_src = src_ptr; - p_dst = dst_ptr; + p_src = src; + p_dst = dst; for (col = 0; col < cols; col++) { - unsigned char p_above2 = p_src[col - 2 * src_pixels_per_line]; - unsigned char p_above1 = p_src[col - src_pixels_per_line]; - unsigned char p_below1 = p_src[col + src_pixels_per_line]; - unsigned char p_below2 = p_src[col + 2 * src_pixels_per_line]; + unsigned char p_above2 = p_src[col - 2 * src_pitch]; + unsigned char p_above1 = p_src[col - src_pitch]; + unsigned char p_below1 = p_src[col + src_pitch]; + unsigned char p_below2 = p_src[col + 2 * src_pitch]; v = p_src[col]; - if ((abs(v - p_above2) < f[col]) && (abs(v - p_above1) < f[col]) && - (abs(v - p_below1) < f[col]) && (abs(v - p_below2) < f[col])) { + if ((abs(v - p_above2) < flimits[col]) && + (abs(v - p_above1) < flimits[col]) && + (abs(v - p_below1) < flimits[col]) && + (abs(v - p_below2) < flimits[col])) { unsigned char k1, k2, k3; k1 = (p_above2 + p_above1 + 1) >> 1; k2 = (p_below2 + p_below1 + 1) >> 1; @@ -79,8 +80,8 @@ void vpx_post_proc_down_and_across_mb_row_c(unsigned char *src_ptr, } /* now post_proc_across */ - p_src = dst_ptr; - p_dst = dst_ptr; + p_src = dst; + p_dst = dst; p_src[-2] = p_src[-1] = p_src[0]; p_src[cols] = p_src[cols + 1] = p_src[cols - 1]; @@ -88,10 +89,10 @@ void vpx_post_proc_down_and_across_mb_row_c(unsigned char *src_ptr, for (col = 0; col < cols; col++) { v = p_src[col]; - if ((abs(v - p_src[col - 2]) < f[col]) && - (abs(v - p_src[col - 1]) < f[col]) && - (abs(v - p_src[col + 1]) < f[col]) && - (abs(v - p_src[col + 2]) < f[col])) { + if ((abs(v - p_src[col - 2]) < flimits[col]) && + (abs(v - p_src[col - 1]) < flimits[col]) && + (abs(v - p_src[col + 1]) < flimits[col]) && + (abs(v - p_src[col + 2]) < flimits[col])) { unsigned char k1, k2, k3; k1 = (p_src[col - 2] + p_src[col - 1] + 1) >> 1; k2 = (p_src[col + 2] + p_src[col + 1] + 1) >> 1; @@ -109,8 +110,8 @@ void vpx_post_proc_down_and_across_mb_row_c(unsigned char *src_ptr, p_dst[col - 1] = d[(col - 1) & 3]; /* next row */ - src_ptr += src_pixels_per_line; - dst_ptr += dst_pixels_per_line; + src += src_pitch; + dst += dst_pitch; } } diff --git a/media/libvpx/libvpx/vpx_dsp/fastssim.c b/media/libvpx/libvpx/vpx_dsp/fastssim.c index 0469071a176b..6ab6f557e250 100644 --- a/media/libvpx/libvpx/vpx_dsp/fastssim.c +++ b/media/libvpx/libvpx/vpx_dsp/fastssim.c @@ -128,10 +128,12 @@ static void fs_downsample_level(fs_ctx *_ctx, int _l) { int i1; i0 = 2 * i; i1 = FS_MINI(i0 + 1, w2); - dst1[j * w + i] = src1[j0offs + i0] + src1[j0offs + i1] + - src1[j1offs + i0] + src1[j1offs + i1]; - dst2[j * w + i] = src2[j0offs + i0] + src2[j0offs + i1] + - src2[j1offs + i0] + src2[j1offs + i1]; + dst1[j * w + i] = + (uint32_t)((int64_t)src1[j0offs + i0] + src1[j0offs + i1] + + src1[j1offs + i0] + src1[j1offs + i1]); + dst2[j * w + i] = + (uint32_t)((int64_t)src2[j0offs + i0] + src2[j0offs + i1] + + src2[j1offs + i0] + src2[j1offs + i1]); } } } @@ -220,12 +222,12 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) { ssim = _ctx->level[_l].ssim; c1 = (double)(ssim_c1 * 4096 * (1 << 4 * _l)); for (j = 0; j < h; j++) { - unsigned mux; - unsigned muy; + int64_t mux; + int64_t muy; int i0; int i1; - mux = 5 * col_sums_x[0]; - muy = 5 * col_sums_y[0]; + mux = (int64_t)5 * col_sums_x[0]; + muy = (int64_t)5 * col_sums_y[0]; for (i = 1; i < 4; i++) { i1 = FS_MINI(i, w - 1); mux += col_sums_x[i1]; @@ -237,8 +239,8 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) { if (i + 1 < w) { i0 = FS_MAXI(0, i - 4); i1 = FS_MINI(i + 4, w - 1); - mux += col_sums_x[i1] - col_sums_x[i0]; - muy += col_sums_x[i1] - col_sums_x[i0]; + mux += (int)col_sums_x[i1] - (int)col_sums_x[i0]; + muy += (int)col_sums_x[i1] - (int)col_sums_x[i0]; } } if (j + 1 < h) { @@ -246,8 +248,10 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) { for (i = 0; i < w; i++) col_sums_x[i] -= im1[j0offs + i]; for (i = 0; i < w; i++) col_sums_y[i] -= im2[j0offs + i]; j1offs = FS_MINI(j + 4, h - 1) * w; - for (i = 0; i < w; i++) col_sums_x[i] += im1[j1offs + i]; - for (i = 0; i < w; i++) col_sums_y[i] += im2[j1offs + i]; + for (i = 0; i < w; i++) + col_sums_x[i] = (uint32_t)((int64_t)col_sums_x[i] + im1[j1offs + i]); + for (i = 0; i < w; i++) + col_sums_y[i] = (uint32_t)((int64_t)col_sums_y[i] + im2[j1offs + i]); } } } @@ -343,18 +347,18 @@ static void fs_calc_structure(fs_ctx *_ctx, int _l, int bit_depth) { for (j = 0; j < h + 4; j++) { if (j < h - 1) { for (i = 0; i < w - 1; i++) { - unsigned g1; - unsigned g2; - unsigned gx; - unsigned gy; - g1 = abs((int)im1[(j + 1) * w + i + 1] - (int)im1[j * w + i]); - g2 = abs((int)im1[(j + 1) * w + i] - (int)im1[j * w + i + 1]); + int64_t g1; + int64_t g2; + int64_t gx; + int64_t gy; + g1 = labs((int64_t)im1[(j + 1) * w + i + 1] - (int64_t)im1[j * w + i]); + g2 = labs((int64_t)im1[(j + 1) * w + i] - (int64_t)im1[j * w + i + 1]); gx = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2); - g1 = abs((int)im2[(j + 1) * w + i + 1] - (int)im2[j * w + i]); - g2 = abs((int)im2[(j + 1) * w + i] - (int)im2[j * w + i + 1]); - gy = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2); - gx_buf[(j & 7) * stride + i + 4] = gx; - gy_buf[(j & 7) * stride + i + 4] = gy; + g1 = labs((int64_t)im2[(j + 1) * w + i + 1] - (int64_t)im2[j * w + i]); + g2 = labs((int64_t)im2[(j + 1) * w + i] - (int64_t)im2[j * w + i + 1]); + gy = ((int64_t)4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2)); + gx_buf[(j & 7) * stride + i + 4] = (uint32_t)gx; + gy_buf[(j & 7) * stride + i + 4] = (uint32_t)gy; } } else { memset(gx_buf + (j & 7) * stride, 0, stride * sizeof(*gx_buf)); diff --git a/media/libvpx/libvpx/vpx_dsp/fwd_txfm.c b/media/libvpx/libvpx/vpx_dsp/fwd_txfm.c index 6dcb3ba66814..ef66de0247a4 100644 --- a/media/libvpx/libvpx/vpx_dsp/fwd_txfm.c +++ b/media/libvpx/libvpx/vpx_dsp/fwd_txfm.c @@ -87,11 +87,11 @@ void vpx_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) { output[0] = sum * 2; } -void vpx_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) { +void vpx_fdct8x8_c(const int16_t *input, tran_low_t *output, int stride) { int i, j; tran_low_t intermediate[64]; int pass; - tran_low_t *output = intermediate; + tran_low_t *out = intermediate; const tran_low_t *in = NULL; // Transform columns @@ -133,10 +133,10 @@ void vpx_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) { t1 = (x0 - x1) * cospi_16_64; t2 = x2 * cospi_24_64 + x3 * cospi_8_64; t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; - output[0] = (tran_low_t)fdct_round_shift(t0); - output[2] = (tran_low_t)fdct_round_shift(t2); - output[4] = (tran_low_t)fdct_round_shift(t1); - output[6] = (tran_low_t)fdct_round_shift(t3); + out[0] = (tran_low_t)fdct_round_shift(t0); + out[2] = (tran_low_t)fdct_round_shift(t2); + out[4] = (tran_low_t)fdct_round_shift(t1); + out[6] = (tran_low_t)fdct_round_shift(t3); // Stage 2 t0 = (s6 - s5) * cospi_16_64; @@ -155,19 +155,19 @@ void vpx_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) { t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; - output[1] = (tran_low_t)fdct_round_shift(t0); - output[3] = (tran_low_t)fdct_round_shift(t2); - output[5] = (tran_low_t)fdct_round_shift(t1); - output[7] = (tran_low_t)fdct_round_shift(t3); - output += 8; + out[1] = (tran_low_t)fdct_round_shift(t0); + out[3] = (tran_low_t)fdct_round_shift(t2); + out[5] = (tran_low_t)fdct_round_shift(t1); + out[7] = (tran_low_t)fdct_round_shift(t3); + out += 8; } in = intermediate; - output = final_output; + out = output; } // Rows for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) final_output[j + i * 8] /= 2; + for (j = 0; j < 8; ++j) output[j + i * 8] /= 2; } } @@ -705,9 +705,9 @@ void vpx_fdct32(const tran_high_t *input, tran_high_t *output, int round) { output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64); } -void vpx_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { +void vpx_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride) { int i, j; - tran_high_t output[32 * 32]; + tran_high_t out[32 * 32]; // Columns for (i = 0; i < 32; ++i) { @@ -715,16 +715,16 @@ void vpx_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; vpx_fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) - output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; + out[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; } // Rows for (i = 0; i < 32; ++i) { tran_high_t temp_in[32], temp_out[32]; - for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; + for (j = 0; j < 32; ++j) temp_in[j] = out[j + i * 32]; vpx_fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) - out[j + i * 32] = + output[j + i * 32] = (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2); } } @@ -732,9 +732,9 @@ void vpx_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { // Note that although we use dct_32_round in dct32 computation flow, // this 2d fdct32x32 for rate-distortion optimization loop is operating // within 16 bits precision. -void vpx_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { +void vpx_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride) { int i, j; - tran_high_t output[32 * 32]; + tran_high_t out[32 * 32]; // Columns for (i = 0; i < 32; ++i) { @@ -745,15 +745,15 @@ void vpx_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { // TODO(cd): see quality impact of only doing // output[j * 32 + i] = (temp_out[j] + 1) >> 2; // PS: also change code in vpx_dsp/x86/vpx_dct_sse2.c - output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; + out[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; } // Rows for (i = 0; i < 32; ++i) { tran_high_t temp_in[32], temp_out[32]; - for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; + for (j = 0; j < 32; ++j) temp_in[j] = out[j + i * 32]; vpx_fdct32(temp_in, temp_out, 1); - for (j = 0; j < 32; ++j) out[j + i * 32] = (tran_low_t)temp_out[j]; + for (j = 0; j < 32; ++j) output[j + i * 32] = (tran_low_t)temp_out[j]; } } @@ -772,14 +772,14 @@ void vpx_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output, vpx_fdct4x4_c(input, output, stride); } -void vpx_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output, +void vpx_highbd_fdct8x8_c(const int16_t *input, tran_low_t *output, int stride) { - vpx_fdct8x8_c(input, final_output, stride); + vpx_fdct8x8_c(input, output, stride); } -void vpx_highbd_fdct8x8_1_c(const int16_t *input, tran_low_t *final_output, +void vpx_highbd_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) { - vpx_fdct8x8_1_c(input, final_output, stride); + vpx_fdct8x8_1_c(input, output, stride); } void vpx_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output, @@ -792,17 +792,18 @@ void vpx_highbd_fdct16x16_1_c(const int16_t *input, tran_low_t *output, vpx_fdct16x16_1_c(input, output, stride); } -void vpx_highbd_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { - vpx_fdct32x32_c(input, out, stride); +void vpx_highbd_fdct32x32_c(const int16_t *input, tran_low_t *output, + int stride) { + vpx_fdct32x32_c(input, output, stride); } -void vpx_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, +void vpx_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride) { - vpx_fdct32x32_rd_c(input, out, stride); + vpx_fdct32x32_rd_c(input, output, stride); } -void vpx_highbd_fdct32x32_1_c(const int16_t *input, tran_low_t *out, +void vpx_highbd_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) { - vpx_fdct32x32_1_c(input, out, stride); + vpx_fdct32x32_1_c(input, output, stride); } #endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/media/libvpx/libvpx/vpx_dsp/fwd_txfm.h b/media/libvpx/libvpx/vpx_dsp/fwd_txfm.h index 29e139c73ba9..a43c8ea7f7f2 100644 --- a/media/libvpx/libvpx/vpx_dsp/fwd_txfm.h +++ b/media/libvpx/libvpx/vpx_dsp/fwd_txfm.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_FWD_TXFM_H_ -#define VPX_DSP_FWD_TXFM_H_ +#ifndef VPX_VPX_DSP_FWD_TXFM_H_ +#define VPX_VPX_DSP_FWD_TXFM_H_ #include "vpx_dsp/txfm_common.h" @@ -22,4 +22,4 @@ static INLINE tran_high_t fdct_round_shift(tran_high_t input) { } void vpx_fdct32(const tran_high_t *input, tran_high_t *output, int round); -#endif // VPX_DSP_FWD_TXFM_H_ +#endif // VPX_VPX_DSP_FWD_TXFM_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/inv_txfm.c b/media/libvpx/libvpx/vpx_dsp/inv_txfm.c index 0194aa1e1864..97655b3a9ea5 100644 --- a/media/libvpx/libvpx/vpx_dsp/inv_txfm.c +++ b/media/libvpx/libvpx/vpx_dsp/inv_txfm.c @@ -67,11 +67,11 @@ void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { } } -void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int stride) { +void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i; tran_high_t a1, e1; tran_low_t tmp[4]; - const tran_low_t *ip = in; + const tran_low_t *ip = input; tran_low_t *op = tmp; a1 = ip[0] >> UNIT_QUANT_SHIFT; @@ -701,22 +701,22 @@ void idct16_c(const tran_low_t *input, tran_low_t *output) { step2[15] = step1[15]; // stage 7 - output[0] = WRAPLOW(step2[0] + step2[15]); - output[1] = WRAPLOW(step2[1] + step2[14]); - output[2] = WRAPLOW(step2[2] + step2[13]); - output[3] = WRAPLOW(step2[3] + step2[12]); - output[4] = WRAPLOW(step2[4] + step2[11]); - output[5] = WRAPLOW(step2[5] + step2[10]); - output[6] = WRAPLOW(step2[6] + step2[9]); - output[7] = WRAPLOW(step2[7] + step2[8]); - output[8] = WRAPLOW(step2[7] - step2[8]); - output[9] = WRAPLOW(step2[6] - step2[9]); - output[10] = WRAPLOW(step2[5] - step2[10]); - output[11] = WRAPLOW(step2[4] - step2[11]); - output[12] = WRAPLOW(step2[3] - step2[12]); - output[13] = WRAPLOW(step2[2] - step2[13]); - output[14] = WRAPLOW(step2[1] - step2[14]); - output[15] = WRAPLOW(step2[0] - step2[15]); + output[0] = (tran_low_t)WRAPLOW(step2[0] + step2[15]); + output[1] = (tran_low_t)WRAPLOW(step2[1] + step2[14]); + output[2] = (tran_low_t)WRAPLOW(step2[2] + step2[13]); + output[3] = (tran_low_t)WRAPLOW(step2[3] + step2[12]); + output[4] = (tran_low_t)WRAPLOW(step2[4] + step2[11]); + output[5] = (tran_low_t)WRAPLOW(step2[5] + step2[10]); + output[6] = (tran_low_t)WRAPLOW(step2[6] + step2[9]); + output[7] = (tran_low_t)WRAPLOW(step2[7] + step2[8]); + output[8] = (tran_low_t)WRAPLOW(step2[7] - step2[8]); + output[9] = (tran_low_t)WRAPLOW(step2[6] - step2[9]); + output[10] = (tran_low_t)WRAPLOW(step2[5] - step2[10]); + output[11] = (tran_low_t)WRAPLOW(step2[4] - step2[11]); + output[12] = (tran_low_t)WRAPLOW(step2[3] - step2[12]); + output[13] = (tran_low_t)WRAPLOW(step2[2] - step2[13]); + output[14] = (tran_low_t)WRAPLOW(step2[1] - step2[14]); + output[15] = (tran_low_t)WRAPLOW(step2[0] - step2[15]); } void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, @@ -1346,12 +1346,12 @@ void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint16_t *dest, } } -void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint16_t *dest, +void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i; tran_high_t a1, e1; tran_low_t tmp[4]; - const tran_low_t *ip = in; + const tran_low_t *ip = input; tran_low_t *op = tmp; (void)bd; diff --git a/media/libvpx/libvpx/vpx_dsp/inv_txfm.h b/media/libvpx/libvpx/vpx_dsp/inv_txfm.h index 13137659faed..6eedbeac357e 100644 --- a/media/libvpx/libvpx/vpx_dsp/inv_txfm.h +++ b/media/libvpx/libvpx/vpx_dsp/inv_txfm.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_INV_TXFM_H_ -#define VPX_DSP_INV_TXFM_H_ +#ifndef VPX_VPX_DSP_INV_TXFM_H_ +#define VPX_VPX_DSP_INV_TXFM_H_ #include @@ -76,7 +76,6 @@ static INLINE tran_high_t highbd_check_range(tran_high_t input, int bd) { // bd of 10 uses trans_low with 18bits, need to remove 14bits // bd of 12 uses trans_low with 20bits, need to remove 12bits // bd of x uses trans_low with 8+x bits, need to remove 24-x bits - #define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16) #if CONFIG_VP9_HIGHBITDEPTH #define HIGHBD_WRAPLOW(x, bd) \ @@ -123,4 +122,4 @@ static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) { } // extern "C" #endif -#endif // VPX_DSP_INV_TXFM_H_ +#endif // VPX_VPX_DSP_INV_TXFM_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/loopfilter.c b/media/libvpx/libvpx/vpx_dsp/loopfilter.c index 9866ea37d6d0..995602831722 100644 --- a/media/libvpx/libvpx/vpx_dsp/loopfilter.c +++ b/media/libvpx/libvpx/vpx_dsp/loopfilter.c @@ -81,11 +81,11 @@ static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1, uint8_t *op0, uint8_t *oq0, uint8_t *oq1) { int8_t filter1, filter2; - const int8_t ps1 = (int8_t)*op1 ^ 0x80; - const int8_t ps0 = (int8_t)*op0 ^ 0x80; - const int8_t qs0 = (int8_t)*oq0 ^ 0x80; - const int8_t qs1 = (int8_t)*oq1 ^ 0x80; - const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); + const int8_t ps1 = (int8_t)(*op1 ^ 0x80); + const int8_t ps0 = (int8_t)(*op0 ^ 0x80); + const int8_t qs0 = (int8_t)(*oq0 ^ 0x80); + const int8_t qs1 = (int8_t)(*oq1 ^ 0x80); + const int8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); // add outer taps if we have high edge variance int8_t filter = signed_char_clamp(ps1 - qs1) & hev; @@ -99,39 +99,40 @@ static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1, filter1 = signed_char_clamp(filter + 4) >> 3; filter2 = signed_char_clamp(filter + 3) >> 3; - *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80; - *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80; + *oq0 = (uint8_t)(signed_char_clamp(qs0 - filter1) ^ 0x80); + *op0 = (uint8_t)(signed_char_clamp(ps0 + filter2) ^ 0x80); // outer tap adjustments filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; - *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80; - *op1 = signed_char_clamp(ps1 + filter) ^ 0x80; + *oq1 = (uint8_t)(signed_char_clamp(qs1 - filter) ^ 0x80); + *op1 = (uint8_t)(signed_char_clamp(ps1 + filter) ^ 0x80); } -void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */, - const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh) { +void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. for (i = 0; i < 8; ++i) { - const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; - const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; + const uint8_t p3 = s[-4 * pitch], p2 = s[-3 * pitch], p1 = s[-2 * pitch], + p0 = s[-pitch]; + const uint8_t q0 = s[0 * pitch], q1 = s[1 * pitch], q2 = s[2 * pitch], + q3 = s[3 * pitch]; const int8_t mask = filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3); - filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p); + filter4(mask, *thresh, s - 2 * pitch, s - 1 * pitch, s, s + 1 * pitch); ++s; } } -void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, +void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0); - vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1); + vpx_lpf_horizontal_4_c(s, pitch, blimit0, limit0, thresh0); + vpx_lpf_horizontal_4_c(s + 8, pitch, blimit1, limit1, thresh1); } void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, @@ -178,31 +179,33 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat, } } -void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit, +void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. for (i = 0; i < 8; ++i) { - const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; - const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; + const uint8_t p3 = s[-4 * pitch], p2 = s[-3 * pitch], p1 = s[-2 * pitch], + p0 = s[-pitch]; + const uint8_t q0 = s[0 * pitch], q1 = s[1 * pitch], q2 = s[2 * pitch], + q3 = s[3 * pitch]; const int8_t mask = filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3); const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); - filter8(mask, *thresh, flat, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, s, - s + 1 * p, s + 2 * p, s + 3 * p); + filter8(mask, *thresh, flat, s - 4 * pitch, s - 3 * pitch, s - 2 * pitch, + s - 1 * pitch, s, s + 1 * pitch, s + 2 * pitch, s + 3 * pitch); ++s; } } -void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0, +void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { - vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0); - vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1); + vpx_lpf_horizontal_8_c(s, pitch, blimit0, limit0, thresh0); + vpx_lpf_horizontal_8_c(s + 8, pitch, blimit1, limit1, thresh1); } void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, @@ -283,7 +286,8 @@ static INLINE void filter16(int8_t mask, uint8_t thresh, uint8_t flat, } } -static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit, +static void mb_lpf_horizontal_edge_w(uint8_t *s, int pitch, + const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { int i; @@ -291,34 +295,37 @@ static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit, // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. for (i = 0; i < 8 * count; ++i) { - const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; - const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; + const uint8_t p3 = s[-4 * pitch], p2 = s[-3 * pitch], p1 = s[-2 * pitch], + p0 = s[-pitch]; + const uint8_t q0 = s[0 * pitch], q1 = s[1 * pitch], q2 = s[2 * pitch], + q3 = s[3 * pitch]; const int8_t mask = filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3); const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); - const int8_t flat2 = - flat_mask5(1, s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], p0, q0, - s[4 * p], s[5 * p], s[6 * p], s[7 * p]); + const int8_t flat2 = flat_mask5( + 1, s[-8 * pitch], s[-7 * pitch], s[-6 * pitch], s[-5 * pitch], p0, q0, + s[4 * pitch], s[5 * pitch], s[6 * pitch], s[7 * pitch]); - filter16(mask, *thresh, flat, flat2, s - 8 * p, s - 7 * p, s - 6 * p, - s - 5 * p, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, s, - s + 1 * p, s + 2 * p, s + 3 * p, s + 4 * p, s + 5 * p, s + 6 * p, - s + 7 * p); + filter16(mask, *thresh, flat, flat2, s - 8 * pitch, s - 7 * pitch, + s - 6 * pitch, s - 5 * pitch, s - 4 * pitch, s - 3 * pitch, + s - 2 * pitch, s - 1 * pitch, s, s + 1 * pitch, s + 2 * pitch, + s + 3 * pitch, s + 4 * pitch, s + 5 * pitch, s + 6 * pitch, + s + 7 * pitch); ++s; } } -void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit, +void vpx_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { - mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1); + mb_lpf_horizontal_edge_w(s, pitch, blimit, limit, thresh, 1); } -void vpx_lpf_horizontal_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, +void vpx_lpf_horizontal_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { - mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2); + mb_lpf_horizontal_edge_w(s, pitch, blimit, limit, thresh, 2); } -static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit, +static void mb_lpf_vertical_edge_w(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { int i; @@ -335,18 +342,18 @@ static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit, filter16(mask, *thresh, flat, flat2, s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1, s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7); - s += p; + s += pitch; } } -void vpx_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, +void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { - mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8); + mb_lpf_vertical_edge_w(s, pitch, blimit, limit, thresh, 8); } -void vpx_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, +void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { - mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16); + mb_lpf_vertical_edge_w(s, pitch, blimit, limit, thresh, 16); } #if CONFIG_VP9_HIGHBITDEPTH @@ -416,7 +423,7 @@ static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1, const int16_t ps0 = (int16_t)*op0 - (0x80 << shift); const int16_t qs0 = (int16_t)*oq0 - (0x80 << shift); const int16_t qs1 = (int16_t)*oq1 - (0x80 << shift); - const uint16_t hev = highbd_hev_mask(thresh, *op1, *op0, *oq0, *oq1, bd); + const int16_t hev = highbd_hev_mask(thresh, *op1, *op0, *oq0, *oq1, bd); // Add outer taps if we have high edge variance. int16_t filter = signed_char_clamp_high(ps1 - qs1, bd) & hev; @@ -440,7 +447,7 @@ static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1, *op1 = signed_char_clamp_high(ps1 + filter, bd) + (0x80 << shift); } -void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */, +void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { int i; @@ -448,27 +455,28 @@ void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */, // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. for (i = 0; i < 8; ++i) { - const uint16_t p3 = s[-4 * p]; - const uint16_t p2 = s[-3 * p]; - const uint16_t p1 = s[-2 * p]; - const uint16_t p0 = s[-p]; - const uint16_t q0 = s[0 * p]; - const uint16_t q1 = s[1 * p]; - const uint16_t q2 = s[2 * p]; - const uint16_t q3 = s[3 * p]; + const uint16_t p3 = s[-4 * pitch]; + const uint16_t p2 = s[-3 * pitch]; + const uint16_t p1 = s[-2 * pitch]; + const uint16_t p0 = s[-pitch]; + const uint16_t q0 = s[0 * pitch]; + const uint16_t q1 = s[1 * pitch]; + const uint16_t q2 = s[2 * pitch]; + const uint16_t q3 = s[3 * pitch]; const int8_t mask = highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd); - highbd_filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p, bd); + highbd_filter4(mask, *thresh, s - 2 * pitch, s - 1 * pitch, s, + s + 1 * pitch, bd); ++s; } } void vpx_highbd_lpf_horizontal_4_dual_c( - uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, + uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, bd); - vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, bd); + vpx_highbd_lpf_horizontal_4_c(s, pitch, blimit0, limit0, thresh0, bd); + vpx_highbd_lpf_horizontal_4_c(s + 8, pitch, blimit1, limit1, thresh1, bd); } void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, @@ -517,33 +525,36 @@ static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat, } } -void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int bd) { +void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int pitch, + const uint8_t *blimit, const uint8_t *limit, + const uint8_t *thresh, int bd) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. for (i = 0; i < 8; ++i) { - const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; - const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; + const uint16_t p3 = s[-4 * pitch], p2 = s[-3 * pitch], p1 = s[-2 * pitch], + p0 = s[-pitch]; + const uint16_t q0 = s[0 * pitch], q1 = s[1 * pitch], q2 = s[2 * pitch], + q3 = s[3 * pitch]; const int8_t mask = highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd); const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, bd); - highbd_filter8(mask, *thresh, flat, s - 4 * p, s - 3 * p, s - 2 * p, - s - 1 * p, s, s + 1 * p, s + 2 * p, s + 3 * p, bd); + highbd_filter8(mask, *thresh, flat, s - 4 * pitch, s - 3 * pitch, + s - 2 * pitch, s - 1 * pitch, s, s + 1 * pitch, + s + 2 * pitch, s + 3 * pitch, bd); ++s; } } void vpx_highbd_lpf_horizontal_8_dual_c( - uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, + uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { - vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, bd); - vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, bd); + vpx_highbd_lpf_horizontal_8_c(s, pitch, blimit0, limit0, thresh0, bd); + vpx_highbd_lpf_horizontal_8_c(s + 8, pitch, blimit1, limit1, thresh1, bd); } void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, @@ -639,7 +650,7 @@ static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, uint8_t flat, } } -static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p, +static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, @@ -649,44 +660,45 @@ static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p, // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. for (i = 0; i < 8 * count; ++i) { - const uint16_t p3 = s[-4 * p]; - const uint16_t p2 = s[-3 * p]; - const uint16_t p1 = s[-2 * p]; - const uint16_t p0 = s[-p]; - const uint16_t q0 = s[0 * p]; - const uint16_t q1 = s[1 * p]; - const uint16_t q2 = s[2 * p]; - const uint16_t q3 = s[3 * p]; + const uint16_t p3 = s[-4 * pitch]; + const uint16_t p2 = s[-3 * pitch]; + const uint16_t p1 = s[-2 * pitch]; + const uint16_t p0 = s[-pitch]; + const uint16_t q0 = s[0 * pitch]; + const uint16_t q1 = s[1 * pitch]; + const uint16_t q2 = s[2 * pitch]; + const uint16_t q3 = s[3 * pitch]; const int8_t mask = highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd); const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, bd); - const int8_t flat2 = - highbd_flat_mask5(1, s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], p0, q0, - s[4 * p], s[5 * p], s[6 * p], s[7 * p], bd); + const int8_t flat2 = highbd_flat_mask5( + 1, s[-8 * pitch], s[-7 * pitch], s[-6 * pitch], s[-5 * pitch], p0, q0, + s[4 * pitch], s[5 * pitch], s[6 * pitch], s[7 * pitch], bd); - highbd_filter16(mask, *thresh, flat, flat2, s - 8 * p, s - 7 * p, s - 6 * p, - s - 5 * p, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, s, - s + 1 * p, s + 2 * p, s + 3 * p, s + 4 * p, s + 5 * p, - s + 6 * p, s + 7 * p, bd); + highbd_filter16(mask, *thresh, flat, flat2, s - 8 * pitch, s - 7 * pitch, + s - 6 * pitch, s - 5 * pitch, s - 4 * pitch, s - 3 * pitch, + s - 2 * pitch, s - 1 * pitch, s, s + 1 * pitch, + s + 2 * pitch, s + 3 * pitch, s + 4 * pitch, s + 5 * pitch, + s + 6 * pitch, s + 7 * pitch, bd); ++s; } } -void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int bd) { - highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd); +void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int pitch, + const uint8_t *blimit, const uint8_t *limit, + const uint8_t *thresh, int bd) { + highbd_mb_lpf_horizontal_edge_w(s, pitch, blimit, limit, thresh, 1, bd); } -void vpx_highbd_lpf_horizontal_16_dual_c(uint16_t *s, int p, +void vpx_highbd_lpf_horizontal_16_dual_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { - highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd); + highbd_mb_lpf_horizontal_edge_w(s, pitch, blimit, limit, thresh, 2, bd); } -static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p, +static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, @@ -712,20 +724,20 @@ static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p, highbd_filter16(mask, *thresh, flat, flat2, s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1, s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7, bd); - s += p; + s += pitch; } } -void vpx_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit, +void vpx_highbd_lpf_vertical_16_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { - highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd); + highbd_mb_lpf_vertical_edge_w(s, pitch, blimit, limit, thresh, 8, bd); } -void vpx_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p, +void vpx_highbd_lpf_vertical_16_dual_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { - highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16, bd); + highbd_mb_lpf_vertical_edge_w(s, pitch, blimit, limit, thresh, 16, bd); } #endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/media/libvpx/libvpx/vpx_dsp/mips/add_noise_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/add_noise_msa.c index 43d2c1146ecf..97541411e413 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/add_noise_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/add_noise_msa.c @@ -9,7 +9,9 @@ */ #include -#include "./macros_msa.h" + +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/mips/macros_msa.h" void vpx_plane_add_noise_msa(uint8_t *start_ptr, const int8_t *noise, int blackclamp, int whiteclamp, int width, diff --git a/media/libvpx/libvpx/vpx_dsp/mips/avg_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/avg_msa.c index d0ac7b8e2968..3fd18dec568c 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/avg_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/avg_msa.c @@ -9,6 +9,7 @@ */ #include +#include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" @@ -56,6 +57,7 @@ uint32_t vpx_avg_4x4_msa(const uint8_t *src, int32_t src_stride) { return sum_out; } +#if !CONFIG_VP9_HIGHBITDEPTH void vpx_hadamard_8x8_msa(const int16_t *src, ptrdiff_t src_stride, int16_t *dst) { v8i16 src0, src1, src2, src3, src4, src5, src6, src7; @@ -391,6 +393,7 @@ int vpx_satd_msa(const int16_t *data, int length) { return satd; } +#endif // !CONFIG_VP9_HIGHBITDEPTH void vpx_int_pro_row_msa(int16_t hbuf[16], const uint8_t *ref, const int ref_stride, const int height) { diff --git a/media/libvpx/libvpx/vpx_dsp/mips/common_dspr2.h b/media/libvpx/libvpx/vpx_dsp/mips/common_dspr2.h index 0a42f5cec21f..87a5bbab5616 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/common_dspr2.h +++ b/media/libvpx/libvpx/vpx_dsp/mips/common_dspr2.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_COMMON_MIPS_DSPR2_H_ -#define VPX_COMMON_MIPS_DSPR2_H_ +#ifndef VPX_VPX_DSP_MIPS_COMMON_DSPR2_H_ +#define VPX_VPX_DSP_MIPS_COMMON_DSPR2_H_ #include #include "./vpx_config.h" @@ -45,4 +45,4 @@ static INLINE void prefetch_store_streamed(unsigned char *dst) { } // extern "C" #endif -#endif // VPX_COMMON_MIPS_DSPR2_H_ +#endif // VPX_VPX_DSP_MIPS_COMMON_DSPR2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/mips/convolve8_avg_dspr2.c b/media/libvpx/libvpx/vpx_dsp/mips/convolve8_avg_dspr2.c index d9c2bef69ed0..cc458c86182d 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/convolve8_avg_dspr2.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/convolve8_avg_dspr2.c @@ -15,6 +15,7 @@ #include "vpx_dsp/mips/convolve_common_dspr2.h" #include "vpx_dsp/vpx_convolve.h" #include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_dsp/vpx_filter.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 @@ -341,7 +342,7 @@ void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, assert(y_step_q4 == 16); assert(((const int32_t *)filter_y)[1] != 0x800000); - if (((const int32_t *)filter_y)[0] == 0) { + if (vpx_get_filter_taps(filter_y) == 2) { vpx_convolve2_avg_vert_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { diff --git a/media/libvpx/libvpx/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c b/media/libvpx/libvpx/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c index fb68ad881398..7a9aa49d8a15 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c @@ -15,6 +15,7 @@ #include "vpx_dsp/mips/convolve_common_dspr2.h" #include "vpx_dsp/vpx_convolve.h" #include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_dsp/vpx_filter.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 @@ -945,7 +946,7 @@ void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, assert(x_step_q4 == 16); assert(((const int32_t *)filter_x)[1] != 0x800000); - if (((const int32_t *)filter_x)[0] == 0) { + if (vpx_get_filter_taps(filter_x) == 2) { vpx_convolve2_avg_horiz_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { diff --git a/media/libvpx/libvpx/vpx_dsp/mips/convolve8_dspr2.c b/media/libvpx/libvpx/vpx_dsp/mips/convolve8_dspr2.c index 89f0f41962af..1e7052f6c58c 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/convolve8_dspr2.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/convolve8_dspr2.c @@ -1322,7 +1322,7 @@ void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, if (filter_x[3] == 0x80) { copy_horiz_transposed(src - src_stride * 3, src_stride, temp, intermediate_height, w, intermediate_height); - } else if (((const int32_t *)filter_x)[0] == 0) { + } else if (vpx_get_filter_taps(filter_x) == 2) { vpx_convolve2_dspr2(src - src_stride * 3, src_stride, temp, intermediate_height, filter_x, w, intermediate_height); } else { @@ -1365,7 +1365,7 @@ void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, /* copy the src to dst */ if (filter_y[3] == 0x80) { copy_horiz_transposed(temp + 3, intermediate_height, dst, dst_stride, h, w); - } else if (((const int32_t *)filter_y)[0] == 0) { + } else if (vpx_get_filter_taps(filter_y) == 2) { vpx_convolve2_dspr2(temp + 3, intermediate_height, dst, dst_stride, filter_y, h, w); } else { diff --git a/media/libvpx/libvpx/vpx_dsp/mips/convolve8_horiz_dspr2.c b/media/libvpx/libvpx/vpx_dsp/mips/convolve8_horiz_dspr2.c index 77e95c844491..09d6f36e5615 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/convolve8_horiz_dspr2.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/convolve8_horiz_dspr2.c @@ -825,7 +825,7 @@ void vpx_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, assert(x_step_q4 == 16); assert(((const int32_t *)filter_x)[1] != 0x800000); - if (((const int32_t *)filter_x)[0] == 0) { + if (vpx_get_filter_taps(filter_x) == 2) { vpx_convolve2_horiz_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { diff --git a/media/libvpx/libvpx/vpx_dsp/mips/convolve8_vert_dspr2.c b/media/libvpx/libvpx/vpx_dsp/mips/convolve8_vert_dspr2.c index c329f71ccf6d..fd977b53360f 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/convolve8_vert_dspr2.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/convolve8_vert_dspr2.c @@ -325,7 +325,7 @@ void vpx_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, assert(y_step_q4 == 16); assert(((const int32_t *)filter_y)[1] != 0x800000); - if (((const int32_t *)filter_y)[0] == 0) { + if (vpx_get_filter_taps(filter_y) == 2) { vpx_convolve2_vert_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { diff --git a/media/libvpx/libvpx/vpx_dsp/mips/convolve_common_dspr2.h b/media/libvpx/libvpx/vpx_dsp/mips/convolve_common_dspr2.h index 48e440d73c5d..14b65bc650c8 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/convolve_common_dspr2.h +++ b/media/libvpx/libvpx/vpx_dsp/mips/convolve_common_dspr2.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_MIPS_VPX_COMMON_DSPR2_H_ -#define VPX_DSP_MIPS_VPX_COMMON_DSPR2_H_ +#ifndef VPX_VPX_DSP_MIPS_CONVOLVE_COMMON_DSPR2_H_ +#define VPX_VPX_DSP_MIPS_CONVOLVE_COMMON_DSPR2_H_ #include @@ -55,4 +55,4 @@ void vpx_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, } // extern "C" #endif -#endif // VPX_DSP_MIPS_VPX_COMMON_DSPR2_H_ +#endif // VPX_VPX_DSP_MIPS_CONVOLVE_COMMON_DSPR2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/mips/deblock_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/deblock_msa.c index aafa272fbdf3..4e93ff594d05 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/deblock_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/deblock_msa.c @@ -10,42 +10,42 @@ #include -#include "./macros_msa.h" +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/mips/macros_msa.h" extern const int16_t vpx_rv[]; -#define VPX_TRANSPOSE8x16_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, out0, \ - out1, out2, out3, out4, out5, out6, out7, \ - out8, out9, out10, out11, out12, out13, out14, \ - out15) \ - { \ - v8i16 temp0, temp1, temp2, temp3, temp4; \ - v8i16 temp5, temp6, temp7, temp8, temp9; \ - \ - ILVR_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, temp0, temp1, temp2, \ - temp3); \ - ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \ - ILVRL_W2_SH(temp5, temp4, temp6, temp7); \ - ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \ - ILVRL_W2_SH(temp5, temp4, temp8, temp9); \ - ILVL_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, temp0, temp1, temp2, \ - temp3); \ - ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \ - ILVRL_W2_UB(temp5, temp4, out8, out10); \ - ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \ - ILVRL_W2_UB(temp5, temp4, out12, out14); \ - out0 = (v16u8)temp6; \ - out2 = (v16u8)temp7; \ - out4 = (v16u8)temp8; \ - out6 = (v16u8)temp9; \ - out9 = (v16u8)__msa_ilvl_d((v2i64)out8, (v2i64)out8); \ - out11 = (v16u8)__msa_ilvl_d((v2i64)out10, (v2i64)out10); \ - out13 = (v16u8)__msa_ilvl_d((v2i64)out12, (v2i64)out12); \ - out15 = (v16u8)__msa_ilvl_d((v2i64)out14, (v2i64)out14); \ - out1 = (v16u8)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \ - out3 = (v16u8)__msa_ilvl_d((v2i64)out2, (v2i64)out2); \ - out5 = (v16u8)__msa_ilvl_d((v2i64)out4, (v2i64)out4); \ - out7 = (v16u8)__msa_ilvl_d((v2i64)out6, (v2i64)out6); \ +#define VPX_TRANSPOSE8x16_UB_UB( \ + in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3, out4, \ + out5, out6, out7, out8, out9, out10, out11, out12, out13, out14, out15) \ + { \ + v8i16 temp0, temp1, temp2, temp3, temp4; \ + v8i16 temp5, temp6, temp7, temp8, temp9; \ + \ + ILVR_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, temp0, temp1, temp2, \ + temp3); \ + ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \ + ILVRL_W2_SH(temp5, temp4, temp6, temp7); \ + ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \ + ILVRL_W2_SH(temp5, temp4, temp8, temp9); \ + ILVL_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, temp0, temp1, temp2, \ + temp3); \ + ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \ + ILVRL_W2_UB(temp5, temp4, out8, out10); \ + ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \ + ILVRL_W2_UB(temp5, temp4, out12, out14); \ + out0 = (v16u8)temp6; \ + out2 = (v16u8)temp7; \ + out4 = (v16u8)temp8; \ + out6 = (v16u8)temp9; \ + out9 = (v16u8)__msa_ilvl_d((v2i64)out8, (v2i64)out8); \ + out11 = (v16u8)__msa_ilvl_d((v2i64)out10, (v2i64)out10); \ + out13 = (v16u8)__msa_ilvl_d((v2i64)out12, (v2i64)out12); \ + out15 = (v16u8)__msa_ilvl_d((v2i64)out14, (v2i64)out14); \ + out1 = (v16u8)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \ + out3 = (v16u8)__msa_ilvl_d((v2i64)out2, (v2i64)out2); \ + out5 = (v16u8)__msa_ilvl_d((v2i64)out4, (v2i64)out4); \ + out7 = (v16u8)__msa_ilvl_d((v2i64)out6, (v2i64)out6); \ } #define VPX_AVER_IF_RETAIN(above2_in, above1_in, src_in, below1_in, below2_in, \ @@ -509,11 +509,11 @@ void vpx_post_proc_down_and_across_mb_row_msa(uint8_t *src, uint8_t *dst, } } -void vpx_mbpost_proc_across_ip_msa(uint8_t *src_ptr, int32_t pitch, - int32_t rows, int32_t cols, int32_t flimit) { +void vpx_mbpost_proc_across_ip_msa(uint8_t *src, int32_t pitch, int32_t rows, + int32_t cols, int32_t flimit) { int32_t row, col, cnt; - uint8_t *src_dup = src_ptr; - v16u8 src0, src, tmp_orig; + uint8_t *src_dup = src; + v16u8 src0, src1, tmp_orig; v16u8 tmp = { 0 }; v16i8 zero = { 0 }; v8u16 sum_h, src_r_h, src_l_h; @@ -532,13 +532,13 @@ void vpx_mbpost_proc_across_ip_msa(uint8_t *src_ptr, int32_t pitch, src_dup[cols + 16] = src_dup[cols - 1]; tmp_orig = (v16u8)__msa_ldi_b(0); tmp_orig[15] = tmp[15]; - src = LD_UB(src_dup - 8); - src[15] = 0; - ILVRL_B2_UH(zero, src, src_r_h, src_l_h); + src1 = LD_UB(src_dup - 8); + src1[15] = 0; + ILVRL_B2_UH(zero, src1, src_r_h, src_l_h); src_r_w = __msa_dotp_u_w(src_r_h, src_r_h); src_r_w += __msa_dotp_u_w(src_l_h, src_l_h); sum_sq = HADD_SW_S32(src_r_w) + 16; - sum_h = __msa_hadd_u_h(src, src); + sum_h = __msa_hadd_u_h(src1, src1); sum = HADD_UH_U32(sum_h); { v16u8 src7, src8, src_r, src_l; @@ -567,8 +567,8 @@ void vpx_mbpost_proc_across_ip_msa(uint8_t *src_ptr, int32_t pitch, sum_l[cnt + 1] = sum_l[cnt] + sub_l[cnt + 1]; } sum = sum_l[7]; - src = LD_UB(src_dup + 16 * col); - ILVRL_B2_UH(zero, src, src_r_h, src_l_h); + src1 = LD_UB(src_dup + 16 * col); + ILVRL_B2_UH(zero, src1, src_r_h, src_l_h); src7 = (v16u8)((const8 + sum_r + (v8i16)src_r_h) >> 4); src8 = (v16u8)((const8 + sum_l + (v8i16)src_l_h) >> 4); tmp = (v16u8)__msa_pckev_b((v16i8)src8, (v16i8)src7); @@ -614,7 +614,7 @@ void vpx_mbpost_proc_across_ip_msa(uint8_t *src_ptr, int32_t pitch, total3 = (total3 < flimit_vec); PCKEV_H2_SH(total1, total0, total3, total2, mask0, mask1); mask = __msa_pckev_b((v16i8)mask1, (v16i8)mask0); - tmp = __msa_bmz_v(tmp, src, (v16u8)mask); + tmp = __msa_bmz_v(tmp, src1, (v16u8)mask); if (col == 0) { uint64_t src_d; diff --git a/media/libvpx/libvpx/vpx_dsp/mips/fwd_dct32x32_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/fwd_dct32x32_msa.c index 06fdc951e793..36583e2d240d 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/fwd_dct32x32_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/fwd_dct32x32_msa.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/fwd_txfm_msa.h" static void fdct8x32_1d_column_load_butterfly(const int16_t *input, diff --git a/media/libvpx/libvpx/vpx_dsp/mips/fwd_txfm_msa.h b/media/libvpx/libvpx/vpx_dsp/mips/fwd_txfm_msa.h index fd589224d326..c0be56b81976 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/fwd_txfm_msa.h +++ b/media/libvpx/libvpx/vpx_dsp/mips/fwd_txfm_msa.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_MIPS_FWD_TXFM_MSA_H_ -#define VPX_DSP_MIPS_FWD_TXFM_MSA_H_ +#ifndef VPX_VPX_DSP_MIPS_FWD_TXFM_MSA_H_ +#define VPX_VPX_DSP_MIPS_FWD_TXFM_MSA_H_ #include "vpx_dsp/mips/txfm_macros_msa.h" #include "vpx_dsp/txfm_common.h" @@ -361,4 +361,4 @@ void fdct8x16_1d_column(const int16_t *input, int16_t *tmp_ptr, int32_t src_stride); void fdct16x8_1d_row(int16_t *input, int16_t *output); -#endif // VPX_DSP_MIPS_FWD_TXFM_MSA_H_ +#endif // VPX_VPX_DSP_MIPS_FWD_TXFM_MSA_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/mips/idct16x16_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/idct16x16_msa.c index 2a211c5677e3..7ca61a28ec8b 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/idct16x16_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/idct16x16_msa.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/inv_txfm_msa.h" void vpx_idct16_1d_rows_msa(const int16_t *input, int16_t *output) { diff --git a/media/libvpx/libvpx/vpx_dsp/mips/idct32x32_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/idct32x32_msa.c index 2ea6136f9bcf..053948183a0c 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/idct32x32_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/idct32x32_msa.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/inv_txfm_msa.h" static void idct32x8_row_transpose_store(const int16_t *input, diff --git a/media/libvpx/libvpx/vpx_dsp/mips/idct4x4_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/idct4x4_msa.c index 0a85742f1065..56ffec3cba9f 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/idct4x4_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/idct4x4_msa.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/inv_txfm_msa.h" void vpx_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst, diff --git a/media/libvpx/libvpx/vpx_dsp/mips/idct8x8_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/idct8x8_msa.c index 7f77d2019187..a383ff206674 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/idct8x8_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/idct8x8_msa.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/inv_txfm_msa.h" void vpx_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst, diff --git a/media/libvpx/libvpx/vpx_dsp/mips/inv_txfm_dspr2.h b/media/libvpx/libvpx/vpx_dsp/mips/inv_txfm_dspr2.h index 27881f0db6cc..cbea22f20f72 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/inv_txfm_dspr2.h +++ b/media/libvpx/libvpx/vpx_dsp/mips/inv_txfm_dspr2.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_MIPS_INV_TXFM_DSPR2_H_ -#define VPX_DSP_MIPS_INV_TXFM_DSPR2_H_ +#ifndef VPX_VPX_DSP_MIPS_INV_TXFM_DSPR2_H_ +#define VPX_VPX_DSP_MIPS_INV_TXFM_DSPR2_H_ #include @@ -25,7 +25,6 @@ extern "C" { #if HAVE_DSPR2 #define DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input) \ ({ \ - \ int32_t tmp, out; \ int dct_cost_rounding = DCT_CONST_ROUNDING; \ int in = input; \ @@ -73,4 +72,4 @@ void iadst16_dspr2(const int16_t *input, int16_t *output); } // extern "C" #endif -#endif // VPX_DSP_MIPS_INV_TXFM_DSPR2_H_ +#endif // VPX_VPX_DSP_MIPS_INV_TXFM_DSPR2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/mips/inv_txfm_msa.h b/media/libvpx/libvpx/vpx_dsp/mips/inv_txfm_msa.h index 1fe9b28e8ad4..3b66249ef2e6 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/inv_txfm_msa.h +++ b/media/libvpx/libvpx/vpx_dsp/mips/inv_txfm_msa.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_MIPS_INV_TXFM_MSA_H_ -#define VPX_DSP_MIPS_INV_TXFM_MSA_H_ +#ifndef VPX_VPX_DSP_MIPS_INV_TXFM_MSA_H_ +#define VPX_VPX_DSP_MIPS_INV_TXFM_MSA_H_ #include "vpx_dsp/mips/macros_msa.h" #include "vpx_dsp/mips/txfm_macros_msa.h" @@ -408,4 +408,4 @@ void vpx_idct16_1d_rows_msa(const int16_t *input, int16_t *output); void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, int32_t dst_stride); void vpx_iadst16_1d_rows_msa(const int16_t *input, int16_t *output); -#endif // VPX_DSP_MIPS_INV_TXFM_MSA_H_ +#endif // VPX_VPX_DSP_MIPS_INV_TXFM_MSA_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_filters_dspr2.h b/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_filters_dspr2.h index 5b0c73345b72..ec339be8685b 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_filters_dspr2.h +++ b/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_filters_dspr2.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_ -#define VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_ +#ifndef VPX_VPX_DSP_MIPS_LOOPFILTER_FILTERS_DSPR2_H_ +#define VPX_VPX_DSP_MIPS_LOOPFILTER_FILTERS_DSPR2_H_ #include @@ -731,4 +731,4 @@ static INLINE void wide_mbfilter_dspr2( } // extern "C" #endif -#endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_ +#endif // VPX_VPX_DSP_MIPS_LOOPFILTER_FILTERS_DSPR2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_macros_dspr2.h b/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_macros_dspr2.h index 38ed0b2a631b..9af0b42360f0 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_macros_dspr2.h +++ b/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_macros_dspr2.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MACROS_DSPR2_H_ -#define VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MACROS_DSPR2_H_ +#ifndef VPX_VPX_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_ +#define VPX_VPX_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_ #include @@ -432,4 +432,4 @@ extern "C" { } // extern "C" #endif -#endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MACROS_DSPR2_H_ +#endif // VPX_VPX_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_masks_dspr2.h b/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_masks_dspr2.h index ee1114226684..24c492bea0d8 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_masks_dspr2.h +++ b/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_masks_dspr2.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ -#define VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ +#ifndef VPX_VPX_DSP_MIPS_LOOPFILTER_MASKS_DSPR2_H_ +#define VPX_VPX_DSP_MIPS_LOOPFILTER_MASKS_DSPR2_H_ #include @@ -352,4 +352,4 @@ static INLINE void flatmask5(uint32_t p4, uint32_t p3, uint32_t p2, uint32_t p1, } // extern "C" #endif -#endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ +#endif // VPX_VPX_DSP_MIPS_LOOPFILTER_MASKS_DSPR2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_msa.h b/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_msa.h index 49fd74c25a45..1ea05e0b0b3b 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_msa.h +++ b/media/libvpx/libvpx/vpx_dsp/mips/loopfilter_msa.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_LOOPFILTER_MSA_H_ -#define VPX_DSP_LOOPFILTER_MSA_H_ +#ifndef VPX_VPX_DSP_MIPS_LOOPFILTER_MSA_H_ +#define VPX_VPX_DSP_MIPS_LOOPFILTER_MSA_H_ #include "vpx_dsp/mips/macros_msa.h" @@ -174,4 +174,4 @@ mask_out = limit_in < (v16u8)mask_out; \ mask_out = __msa_xori_b(mask_out, 0xff); \ } -#endif /* VPX_DSP_LOOPFILTER_MSA_H_ */ +#endif // VPX_VPX_DSP_MIPS_LOOPFILTER_MSA_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/mips/macros_msa.h b/media/libvpx/libvpx/vpx_dsp/mips/macros_msa.h index f9a446e7bc10..a3a5a4dfeeb2 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/macros_msa.h +++ b/media/libvpx/libvpx/vpx_dsp/mips/macros_msa.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_MIPS_MACROS_MSA_H_ -#define VPX_DSP_MIPS_MACROS_MSA_H_ +#ifndef VPX_VPX_DSP_MIPS_MACROS_MSA_H_ +#define VPX_VPX_DSP_MIPS_MACROS_MSA_H_ #include @@ -1966,4 +1966,4 @@ \ tmp1_m; \ }) -#endif /* VPX_DSP_MIPS_MACROS_MSA_H_ */ +#endif // VPX_VPX_DSP_MIPS_MACROS_MSA_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/mips/sad_mmi.c b/media/libvpx/libvpx/vpx_dsp/mips/sad_mmi.c index 33bd3fe7f926..4368db5fdb77 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/sad_mmi.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/sad_mmi.c @@ -341,7 +341,7 @@ const uint8_t *ref_array, int ref_stride, \ uint32_t *sad_array) { \ int i; \ - for (i = 0; i < k; ++i) \ + for (i = 0; i < (k); ++i) \ sad_array[i] = \ vpx_sad##m##x##n##_mmi(src, src_stride, &ref_array[i], ref_stride); \ } diff --git a/media/libvpx/libvpx/vpx_dsp/mips/sub_pixel_variance_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/sub_pixel_variance_msa.c index 313e06f92dda..572fcabfc048 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/sub_pixel_variance_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/sub_pixel_variance_msa.c @@ -27,13 +27,14 @@ static const uint8_t bilinear_filters_msa[8][2] = { HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \ DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \ \ - sub += res_l0_m + res_l1_m; \ + (sub) += res_l0_m + res_l1_m; \ } -#define VARIANCE_WxH(sse, diff, shift) sse - (((uint32_t)diff * diff) >> shift) +#define VARIANCE_WxH(sse, diff, shift) \ + (sse) - (((uint32_t)(diff) * (diff)) >> (shift)) #define VARIANCE_LARGE_WxH(sse, diff, shift) \ - sse - (((int64_t)diff * diff) >> shift) + (sse) - (((int64_t)(diff) * (diff)) >> (shift)) static uint32_t avg_sse_diff_4width_msa(const uint8_t *src_ptr, int32_t src_stride, @@ -1619,16 +1620,16 @@ static uint32_t sub_pixel_avg_sse_diff_64width_hv_msa( #define VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(wd, ht) \ uint32_t vpx_sub_pixel_variance##wd##x##ht##_msa( \ - const uint8_t *src, int32_t src_stride, int32_t xoffset, \ - int32_t yoffset, const uint8_t *ref, int32_t ref_stride, \ + const uint8_t *src, int32_t src_stride, int32_t x_offset, \ + int32_t y_offset, const uint8_t *ref, int32_t ref_stride, \ uint32_t *sse) { \ int32_t diff; \ uint32_t var; \ - const uint8_t *h_filter = bilinear_filters_msa[xoffset]; \ - const uint8_t *v_filter = bilinear_filters_msa[yoffset]; \ + const uint8_t *h_filter = bilinear_filters_msa[x_offset]; \ + const uint8_t *v_filter = bilinear_filters_msa[y_offset]; \ \ - if (yoffset) { \ - if (xoffset) { \ + if (y_offset) { \ + if (x_offset) { \ *sse = sub_pixel_sse_diff_##wd##width_hv_msa( \ src, src_stride, ref, ref_stride, h_filter, v_filter, ht, &diff); \ } else { \ @@ -1638,7 +1639,7 @@ static uint32_t sub_pixel_avg_sse_diff_64width_hv_msa( \ var = VARIANCE_##wd##Wx##ht##H(*sse, diff); \ } else { \ - if (xoffset) { \ + if (x_offset) { \ *sse = sub_pixel_sse_diff_##wd##width_h_msa( \ src, src_stride, ref, ref_stride, h_filter, ht, &diff); \ \ @@ -1672,15 +1673,15 @@ VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(64, 64); #define VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(wd, ht) \ uint32_t vpx_sub_pixel_avg_variance##wd##x##ht##_msa( \ - const uint8_t *src_ptr, int32_t src_stride, int32_t xoffset, \ - int32_t yoffset, const uint8_t *ref_ptr, int32_t ref_stride, \ + const uint8_t *src_ptr, int32_t src_stride, int32_t x_offset, \ + int32_t y_offset, const uint8_t *ref_ptr, int32_t ref_stride, \ uint32_t *sse, const uint8_t *sec_pred) { \ int32_t diff; \ - const uint8_t *h_filter = bilinear_filters_msa[xoffset]; \ - const uint8_t *v_filter = bilinear_filters_msa[yoffset]; \ + const uint8_t *h_filter = bilinear_filters_msa[x_offset]; \ + const uint8_t *v_filter = bilinear_filters_msa[y_offset]; \ \ - if (yoffset) { \ - if (xoffset) { \ + if (y_offset) { \ + if (x_offset) { \ *sse = sub_pixel_avg_sse_diff_##wd##width_hv_msa( \ src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, \ v_filter, ht, &diff); \ @@ -1690,7 +1691,7 @@ VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(64, 64); &diff); \ } \ } else { \ - if (xoffset) { \ + if (x_offset) { \ *sse = sub_pixel_avg_sse_diff_##wd##width_h_msa( \ src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, ht, \ &diff); \ @@ -1719,16 +1720,16 @@ VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(32, 32); uint32_t vpx_sub_pixel_avg_variance32x64_msa(const uint8_t *src_ptr, int32_t src_stride, - int32_t xoffset, int32_t yoffset, + int32_t x_offset, int32_t y_offset, const uint8_t *ref_ptr, int32_t ref_stride, uint32_t *sse, const uint8_t *sec_pred) { int32_t diff; - const uint8_t *h_filter = bilinear_filters_msa[xoffset]; - const uint8_t *v_filter = bilinear_filters_msa[yoffset]; + const uint8_t *h_filter = bilinear_filters_msa[x_offset]; + const uint8_t *v_filter = bilinear_filters_msa[y_offset]; - if (yoffset) { - if (xoffset) { + if (y_offset) { + if (x_offset) { *sse = sub_pixel_avg_sse_diff_32width_hv_msa( src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, v_filter, 64, &diff); @@ -1738,7 +1739,7 @@ uint32_t vpx_sub_pixel_avg_variance32x64_msa(const uint8_t *src_ptr, v_filter, 64, &diff); } } else { - if (xoffset) { + if (x_offset) { *sse = sub_pixel_avg_sse_diff_32width_h_msa(src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, 64, &diff); @@ -1753,15 +1754,15 @@ uint32_t vpx_sub_pixel_avg_variance32x64_msa(const uint8_t *src_ptr, #define VPX_SUB_PIXEL_AVG_VARIANCE64XHEIGHT_MSA(ht) \ uint32_t vpx_sub_pixel_avg_variance64x##ht##_msa( \ - const uint8_t *src_ptr, int32_t src_stride, int32_t xoffset, \ - int32_t yoffset, const uint8_t *ref_ptr, int32_t ref_stride, \ + const uint8_t *src_ptr, int32_t src_stride, int32_t x_offset, \ + int32_t y_offset, const uint8_t *ref_ptr, int32_t ref_stride, \ uint32_t *sse, const uint8_t *sec_pred) { \ int32_t diff; \ - const uint8_t *h_filter = bilinear_filters_msa[xoffset]; \ - const uint8_t *v_filter = bilinear_filters_msa[yoffset]; \ + const uint8_t *h_filter = bilinear_filters_msa[x_offset]; \ + const uint8_t *v_filter = bilinear_filters_msa[y_offset]; \ \ - if (yoffset) { \ - if (xoffset) { \ + if (y_offset) { \ + if (x_offset) { \ *sse = sub_pixel_avg_sse_diff_64width_hv_msa( \ src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, \ v_filter, ht, &diff); \ @@ -1771,7 +1772,7 @@ uint32_t vpx_sub_pixel_avg_variance32x64_msa(const uint8_t *src_ptr, &diff); \ } \ } else { \ - if (xoffset) { \ + if (x_offset) { \ *sse = sub_pixel_avg_sse_diff_64width_h_msa( \ src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, ht, \ &diff); \ diff --git a/media/libvpx/libvpx/vpx_dsp/mips/txfm_macros_msa.h b/media/libvpx/libvpx/vpx_dsp/mips/txfm_macros_msa.h index f077fa4814a8..f27504a20783 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/txfm_macros_msa.h +++ b/media/libvpx/libvpx/vpx_dsp/mips/txfm_macros_msa.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_MIPS_TXFM_MACROS_MIPS_MSA_H_ -#define VPX_DSP_MIPS_TXFM_MACROS_MIPS_MSA_H_ +#ifndef VPX_VPX_DSP_MIPS_TXFM_MACROS_MSA_H_ +#define VPX_VPX_DSP_MIPS_TXFM_MACROS_MSA_H_ #include "vpx_dsp/mips/macros_msa.h" @@ -98,4 +98,4 @@ SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \ PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out2, out3); \ } -#endif // VPX_DSP_MIPS_TXFM_MACROS_MIPS_MSA_H_ +#endif // VPX_VPX_DSP_MIPS_TXFM_MACROS_MSA_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/mips/variance_mmi.c b/media/libvpx/libvpx/vpx_dsp/mips/variance_mmi.c index 4af60d3634ba..c1780c33afa9 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/variance_mmi.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/variance_mmi.c @@ -87,10 +87,10 @@ static const uint8_t bilinear_filters[8][2] = { "paddh %[ftmp12], %[ftmp12], %[ftmp6] \n\t" #define VARIANCE_SSE_8 \ - "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" \ - "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" \ - "gsldlc1 %[ftmp2], 0x07(%[b]) \n\t" \ - "gsldrc1 %[ftmp2], 0x00(%[b]) \n\t" \ + "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \ + "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \ + "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" \ + "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" \ "pasubub %[ftmp3], %[ftmp1], %[ftmp2] \n\t" \ "punpcklbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t" \ "punpckhbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" \ @@ -101,10 +101,10 @@ static const uint8_t bilinear_filters[8][2] = { #define VARIANCE_SSE_16 \ VARIANCE_SSE_8 \ - "gsldlc1 %[ftmp1], 0x0f(%[a]) \n\t" \ - "gsldrc1 %[ftmp1], 0x08(%[a]) \n\t" \ - "gsldlc1 %[ftmp2], 0x0f(%[b]) \n\t" \ - "gsldrc1 %[ftmp2], 0x08(%[b]) \n\t" \ + "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" \ + "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \ + "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t" \ + "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t" \ "pasubub %[ftmp3], %[ftmp1], %[ftmp2] \n\t" \ "punpcklbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t" \ "punpckhbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" \ @@ -115,11 +115,11 @@ static const uint8_t bilinear_filters[8][2] = { #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A \ /* calculate fdata3[0]~fdata3[3], store at ftmp2*/ \ - "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" \ - "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" \ + "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \ + "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ - "gsldlc1 %[ftmp1], 0x08(%[a]) \n\t" \ - "gsldrc1 %[ftmp1], 0x01(%[a]) \n\t" \ + "gsldlc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \ + "gsldrc1 %[ftmp1], 0x01(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ "pmullh %[ftmp2], %[ftmp2], %[filter_x0] \n\t" \ "paddh %[ftmp2], %[ftmp2], %[ff_ph_40] \n\t" \ @@ -129,11 +129,11 @@ static const uint8_t bilinear_filters[8][2] = { #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_B \ /* calculate fdata3[0]~fdata3[3], store at ftmp4*/ \ - "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" \ - "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" \ + "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \ + "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" \ - "gsldlc1 %[ftmp1], 0x08(%[a]) \n\t" \ - "gsldrc1 %[ftmp1], 0x01(%[a]) \n\t" \ + "gsldlc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \ + "gsldrc1 %[ftmp1], 0x01(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \ "pmullh %[ftmp4], %[ftmp4], %[filter_x0] \n\t" \ "paddh %[ftmp4], %[ftmp4], %[ff_ph_40] \n\t" \ @@ -169,12 +169,12 @@ static const uint8_t bilinear_filters[8][2] = { #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A \ /* calculate fdata3[0]~fdata3[7], store at ftmp2 and ftmp3*/ \ - "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" \ - "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" \ + "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \ + "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ - "gsldlc1 %[ftmp1], 0x08(%[a]) \n\t" \ - "gsldrc1 %[ftmp1], 0x01(%[a]) \n\t" \ + "gsldlc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \ + "gsldrc1 %[ftmp1], 0x01(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \ "pmullh %[ftmp2], %[ftmp2], %[filter_x0] \n\t" \ @@ -190,12 +190,12 @@ static const uint8_t bilinear_filters[8][2] = { #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B \ /* calculate fdata3[0]~fdata3[7], store at ftmp8 and ftmp9*/ \ - "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" \ - "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" \ + "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \ + "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t" \ - "gsldlc1 %[ftmp1], 0x08(%[a]) \n\t" \ - "gsldrc1 %[ftmp1], 0x01(%[a]) \n\t" \ + "gsldlc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \ + "gsldrc1 %[ftmp1], 0x01(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp10], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp11], %[ftmp1], %[ftmp0] \n\t" \ "pmullh %[ftmp8], %[ftmp8], %[filter_x0] \n\t" \ @@ -258,12 +258,12 @@ static const uint8_t bilinear_filters[8][2] = { VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A \ \ /* calculate fdata3[8]~fdata3[15], store at ftmp4 and ftmp5*/ \ - "gsldlc1 %[ftmp1], 0x0f(%[a]) \n\t" \ - "gsldrc1 %[ftmp1], 0x08(%[a]) \n\t" \ + "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" \ + "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \ - "gsldlc1 %[ftmp1], 0x10(%[a]) \n\t" \ - "gsldrc1 %[ftmp1], 0x09(%[a]) \n\t" \ + "gsldlc1 %[ftmp1], 0x10(%[src_ptr]) \n\t" \ + "gsldrc1 %[ftmp1], 0x09(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t" \ "pmullh %[ftmp4], %[ftmp4], %[filter_x0] \n\t" \ @@ -282,12 +282,12 @@ static const uint8_t bilinear_filters[8][2] = { VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B \ \ /* calculate fdata3[8]~fdata3[15], store at ftmp10 and ftmp11*/ \ - "gsldlc1 %[ftmp1], 0x0f(%[a]) \n\t" \ - "gsldrc1 %[ftmp1], 0x08(%[a]) \n\t" \ + "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" \ + "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp10], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp11], %[ftmp1], %[ftmp0] \n\t" \ - "gsldlc1 %[ftmp1], 0x10(%[a]) \n\t" \ - "gsldrc1 %[ftmp1], 0x09(%[a]) \n\t" \ + "gsldlc1 %[ftmp1], 0x10(%[src_ptr]) \n\t" \ + "gsldrc1 %[ftmp1], 0x09(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp12], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp13], %[ftmp1], %[ftmp0] \n\t" \ "pmullh %[ftmp10], %[ftmp10], %[filter_x0] \n\t" \ @@ -357,24 +357,23 @@ static const uint8_t bilinear_filters[8][2] = { // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride). // It defines the offset required to move from one input to the next. -static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b, - unsigned int src_pixels_per_line, - int pixel_step, - unsigned int output_height, - unsigned int output_width, - const uint8_t *filter) { +static void var_filter_block2d_bil_first_pass( + const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line, + int pixel_step, unsigned int output_height, unsigned int output_width, + const uint8_t *filter) { unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { - b[j] = ROUND_POWER_OF_TWO( - (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS); + ref_ptr[j] = ROUND_POWER_OF_TWO( + (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], + FILTER_BITS); - ++a; + ++src_ptr; } - a += src_pixels_per_line - output_width; - b += output_width; + src_ptr += src_pixels_per_line - output_width; + ref_ptr += output_width; } } @@ -387,28 +386,27 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b, // filter is applied horizontally (pixel_step = 1) or vertically // (pixel_step = stride). It defines the offset required to move from one input // to the next. Output is 8-bit. -static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const uint8_t *filter) { +static void var_filter_block2d_bil_second_pass( + const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line, + unsigned int pixel_step, unsigned int output_height, + unsigned int output_width, const uint8_t *filter) { unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { - b[j] = ROUND_POWER_OF_TWO( - (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS); - ++a; + ref_ptr[j] = ROUND_POWER_OF_TWO( + (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], + FILTER_BITS); + ++src_ptr; } - a += src_pixels_per_line - output_width; - b += output_width; + src_ptr += src_pixels_per_line - output_width; + ref_ptr += output_width; } } -static inline uint32_t vpx_variance64x(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, +static inline uint32_t vpx_variance64x(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, int high) { int sum; double ftmp[12]; @@ -424,57 +422,57 @@ static inline uint32_t vpx_variance64x(const uint8_t *a, int a_stride, "xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t" "xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "1: \n\t" - "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x07(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x00(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 - "gsldlc1 %[ftmp1], 0x0f(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x08(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x0f(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x08(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 - "gsldlc1 %[ftmp1], 0x17(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x10(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x17(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x10(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x17(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x10(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x17(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x10(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 - "gsldlc1 %[ftmp1], 0x1f(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x18(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x1f(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x18(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x1f(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x18(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x1f(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x18(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 - "gsldlc1 %[ftmp1], 0x27(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x20(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x27(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x20(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x27(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x20(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x27(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x20(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 - "gsldlc1 %[ftmp1], 0x2f(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x28(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x2f(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x28(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x2f(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x28(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x2f(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x28(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 - "gsldlc1 %[ftmp1], 0x37(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x30(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x37(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x30(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x37(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x30(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x37(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x30(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 - "gsldlc1 %[ftmp1], 0x3f(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x38(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x3f(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x38(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x3f(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x38(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x3f(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x38(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 "addiu %[tmp0], %[tmp0], -0x01 \n\t" - MMI_ADDU(%[a], %[a], %[a_stride]) - MMI_ADDU(%[b], %[b], %[b_stride]) + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) + MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "mfc1 %[tmp1], %[ftmp9] \n\t" @@ -491,9 +489,10 @@ static inline uint32_t vpx_variance64x(const uint8_t *a, int a_stride, [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), [tmp2]"=&r"(tmp[2]), - [a]"+&r"(a), [b]"+&r"(b), + [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr), [sum]"=&r"(sum) - : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride), + : [src_stride]"r"((mips_reg)src_stride), + [ref_stride]"r"((mips_reg)ref_stride), [high]"r"(&high), [sse]"r"(sse) : "memory" ); @@ -501,18 +500,19 @@ static inline uint32_t vpx_variance64x(const uint8_t *a, int a_stride, return *sse - (((int64_t)sum * sum) / (64 * high)); } -#define VPX_VARIANCE64XN(n) \ - uint32_t vpx_variance64x##n##_mmi(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - uint32_t *sse) { \ - return vpx_variance64x(a, a_stride, b, b_stride, sse, n); \ +#define VPX_VARIANCE64XN(n) \ + uint32_t vpx_variance64x##n##_mmi(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + return vpx_variance64x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \ } VPX_VARIANCE64XN(64) VPX_VARIANCE64XN(32) -uint32_t vpx_variance32x64_mmi(const uint8_t *a, int a_stride, const uint8_t *b, - int b_stride, uint32_t *sse) { +uint32_t vpx_variance32x64_mmi(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + uint32_t *sse) { int sum; double ftmp[12]; uint32_t tmp[3]; @@ -527,33 +527,33 @@ uint32_t vpx_variance32x64_mmi(const uint8_t *a, int a_stride, const uint8_t *b, "xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t" "xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "1: \n\t" - "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x07(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x00(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 - "gsldlc1 %[ftmp1], 0x0f(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x08(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x0f(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x08(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 - "gsldlc1 %[ftmp1], 0x17(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x10(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x17(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x10(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x17(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x10(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x17(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x10(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 - "gsldlc1 %[ftmp1], 0x1f(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x18(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x1f(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x18(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x1f(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x18(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x1f(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x18(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 "addiu %[tmp0], %[tmp0], -0x01 \n\t" - MMI_ADDU(%[a], %[a], %[a_stride]) - MMI_ADDU(%[b], %[b], %[b_stride]) + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) + MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "mfc1 %[tmp1], %[ftmp9] \n\t" @@ -570,9 +570,10 @@ uint32_t vpx_variance32x64_mmi(const uint8_t *a, int a_stride, const uint8_t *b, [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), [tmp2]"=&r"(tmp[2]), - [a]"+&r"(a), [b]"+&r"(b), + [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr), [sum]"=&r"(sum) - : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride), + : [src_stride]"r"((mips_reg)src_stride), + [ref_stride]"r"((mips_reg)ref_stride), [sse]"r"(sse) : "memory" ); @@ -580,8 +581,8 @@ uint32_t vpx_variance32x64_mmi(const uint8_t *a, int a_stride, const uint8_t *b, return *sse - (((int64_t)sum * sum) / 2048); } -static inline uint32_t vpx_variance32x(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, +static inline uint32_t vpx_variance32x(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, int high) { int sum; double ftmp[13]; @@ -598,30 +599,30 @@ static inline uint32_t vpx_variance32x(const uint8_t *a, int a_stride, "xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t" "1: \n\t" - "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x07(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x00(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8 - "gsldlc1 %[ftmp1], 0x0f(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x08(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x0f(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x08(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8 - "gsldlc1 %[ftmp1], 0x17(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x10(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x17(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x10(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x17(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x10(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x17(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x10(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8 - "gsldlc1 %[ftmp1], 0x1f(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x18(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x1f(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x18(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x1f(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x18(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x1f(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x18(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8 "addiu %[tmp0], %[tmp0], -0x01 \n\t" - MMI_ADDU(%[a], %[a], %[a_stride]) - MMI_ADDU(%[b], %[b], %[b_stride]) + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) + MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" @@ -646,8 +647,9 @@ static inline uint32_t vpx_variance32x(const uint8_t *a, int a_stride, [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]), - [a]"+&r"(a), [b]"+&r"(b) - : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride), + [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr) + : [src_stride]"r"((mips_reg)src_stride), + [ref_stride]"r"((mips_reg)ref_stride), [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum) : "memory" ); @@ -655,18 +657,18 @@ static inline uint32_t vpx_variance32x(const uint8_t *a, int a_stride, return *sse - (((int64_t)sum * sum) / (32 * high)); } -#define VPX_VARIANCE32XN(n) \ - uint32_t vpx_variance32x##n##_mmi(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - uint32_t *sse) { \ - return vpx_variance32x(a, a_stride, b, b_stride, sse, n); \ +#define VPX_VARIANCE32XN(n) \ + uint32_t vpx_variance32x##n##_mmi(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + return vpx_variance32x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \ } VPX_VARIANCE32XN(32) VPX_VARIANCE32XN(16) -static inline uint32_t vpx_variance16x(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, +static inline uint32_t vpx_variance16x(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, int high) { int sum; double ftmp[13]; @@ -683,20 +685,20 @@ static inline uint32_t vpx_variance16x(const uint8_t *a, int a_stride, "xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t" "1: \n\t" - "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x07(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x00(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8 - "gsldlc1 %[ftmp1], 0x0f(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x08(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x0f(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x08(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8 "addiu %[tmp0], %[tmp0], -0x01 \n\t" - MMI_ADDU(%[a], %[a], %[a_stride]) - MMI_ADDU(%[b], %[b], %[b_stride]) + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) + MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" @@ -721,8 +723,9 @@ static inline uint32_t vpx_variance16x(const uint8_t *a, int a_stride, [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]), - [a]"+&r"(a), [b]"+&r"(b) - : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride), + [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr) + : [src_stride]"r"((mips_reg)src_stride), + [ref_stride]"r"((mips_reg)ref_stride), [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum) : "memory" ); @@ -730,19 +733,19 @@ static inline uint32_t vpx_variance16x(const uint8_t *a, int a_stride, return *sse - (((int64_t)sum * sum) / (16 * high)); } -#define VPX_VARIANCE16XN(n) \ - uint32_t vpx_variance16x##n##_mmi(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - uint32_t *sse) { \ - return vpx_variance16x(a, a_stride, b, b_stride, sse, n); \ +#define VPX_VARIANCE16XN(n) \ + uint32_t vpx_variance16x##n##_mmi(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + return vpx_variance16x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \ } VPX_VARIANCE16XN(32) VPX_VARIANCE16XN(16) VPX_VARIANCE16XN(8) -static inline uint32_t vpx_variance8x(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, +static inline uint32_t vpx_variance8x(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, int high) { int sum; double ftmp[13]; @@ -759,15 +762,15 @@ static inline uint32_t vpx_variance8x(const uint8_t *a, int a_stride, "xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t" "1: \n\t" - "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x07(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x00(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8 "addiu %[tmp0], %[tmp0], -0x01 \n\t" - MMI_ADDU(%[a], %[a], %[a_stride]) - MMI_ADDU(%[b], %[b], %[b_stride]) + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) + MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" @@ -792,8 +795,9 @@ static inline uint32_t vpx_variance8x(const uint8_t *a, int a_stride, [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]), - [a]"+&r"(a), [b]"+&r"(b) - : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride), + [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr) + : [src_stride]"r"((mips_reg)src_stride), + [ref_stride]"r"((mips_reg)ref_stride), [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum) : "memory" ); @@ -801,19 +805,19 @@ static inline uint32_t vpx_variance8x(const uint8_t *a, int a_stride, return *sse - (((int64_t)sum * sum) / (8 * high)); } -#define VPX_VARIANCE8XN(n) \ - uint32_t vpx_variance8x##n##_mmi(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - uint32_t *sse) { \ - return vpx_variance8x(a, a_stride, b, b_stride, sse, n); \ +#define VPX_VARIANCE8XN(n) \ + uint32_t vpx_variance8x##n##_mmi(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + return vpx_variance8x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \ } VPX_VARIANCE8XN(16) VPX_VARIANCE8XN(8) VPX_VARIANCE8XN(4) -static inline uint32_t vpx_variance4x(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, +static inline uint32_t vpx_variance4x(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, int high) { int sum; double ftmp[12]; @@ -830,15 +834,15 @@ static inline uint32_t vpx_variance4x(const uint8_t *a, int a_stride, "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "1: \n\t" - "gsldlc1 %[ftmp1], 0x07(%[a]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[a]) \n\t" - "gsldlc1 %[ftmp2], 0x07(%[b]) \n\t" - "gsldrc1 %[ftmp2], 0x00(%[b]) \n\t" + "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" + "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" + "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" + "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_4 "addiu %[tmp0], %[tmp0], -0x01 \n\t" - MMI_ADDU(%[a], %[a], %[a_stride]) - MMI_ADDU(%[b], %[b], %[b_stride]) + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) + MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "dsrl %[ftmp9], %[ftmp6], %[ftmp10] \n\t" @@ -862,8 +866,9 @@ static inline uint32_t vpx_variance4x(const uint8_t *a, int a_stride, [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [tmp0]"=&r"(tmp[0]), - [a]"+&r"(a), [b]"+&r"(b) - : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride), + [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr) + : [src_stride]"r"((mips_reg)src_stride), + [ref_stride]"r"((mips_reg)ref_stride), [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum) : "memory" ); @@ -871,19 +876,19 @@ static inline uint32_t vpx_variance4x(const uint8_t *a, int a_stride, return *sse - (((int64_t)sum * sum) / (4 * high)); } -#define VPX_VARIANCE4XN(n) \ - uint32_t vpx_variance4x##n##_mmi(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - uint32_t *sse) { \ - return vpx_variance4x(a, a_stride, b, b_stride, sse, n); \ +#define VPX_VARIANCE4XN(n) \ + uint32_t vpx_variance4x##n##_mmi(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + return vpx_variance4x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \ } VPX_VARIANCE4XN(8) VPX_VARIANCE4XN(4) -static inline uint32_t vpx_mse16x(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, uint32_t *sse, - uint64_t high) { +static inline uint32_t vpx_mse16x(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + uint32_t *sse, uint64_t high) { double ftmp[12]; uint32_t tmp[1]; @@ -900,8 +905,8 @@ static inline uint32_t vpx_mse16x(const uint8_t *a, int a_stride, VARIANCE_SSE_16 "addiu %[tmp0], %[tmp0], -0x01 \n\t" - MMI_ADDU(%[a], %[a], %[a_stride]) - MMI_ADDU(%[b], %[b], %[b_stride]) + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) + MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" @@ -914,8 +919,9 @@ static inline uint32_t vpx_mse16x(const uint8_t *a, int a_stride, [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), - [a]"+&r"(a), [b]"+&r"(b) - : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride), + [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr) + : [src_stride]"r"((mips_reg)src_stride), + [ref_stride]"r"((mips_reg)ref_stride), [high]"r"(&high), [sse]"r"(sse) : "memory" ); @@ -923,19 +929,19 @@ static inline uint32_t vpx_mse16x(const uint8_t *a, int a_stride, return *sse; } -#define vpx_mse16xN(n) \ - uint32_t vpx_mse16x##n##_mmi(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - uint32_t *sse) { \ - return vpx_mse16x(a, a_stride, b, b_stride, sse, n); \ +#define vpx_mse16xN(n) \ + uint32_t vpx_mse16x##n##_mmi(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + return vpx_mse16x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \ } vpx_mse16xN(16); vpx_mse16xN(8); -static inline uint32_t vpx_mse8x(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, uint32_t *sse, - uint64_t high) { +static inline uint32_t vpx_mse8x(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + uint32_t *sse, uint64_t high) { double ftmp[12]; uint32_t tmp[1]; @@ -952,8 +958,8 @@ static inline uint32_t vpx_mse8x(const uint8_t *a, int a_stride, VARIANCE_SSE_8 "addiu %[tmp0], %[tmp0], -0x01 \n\t" - MMI_ADDU(%[a], %[a], %[a_stride]) - MMI_ADDU(%[b], %[b], %[b_stride]) + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) + MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" @@ -966,8 +972,9 @@ static inline uint32_t vpx_mse8x(const uint8_t *a, int a_stride, [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), - [a]"+&r"(a), [b]"+&r"(b) - : [a_stride]"r"((mips_reg)a_stride),[b_stride]"r"((mips_reg)b_stride), + [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr) + : [src_stride]"r"((mips_reg)src_stride), + [ref_stride]"r"((mips_reg)ref_stride), [high]"r"(&high), [sse]"r"(sse) : "memory" ); @@ -975,28 +982,29 @@ static inline uint32_t vpx_mse8x(const uint8_t *a, int a_stride, return *sse; } -#define vpx_mse8xN(n) \ - uint32_t vpx_mse8x##n##_mmi(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, uint32_t *sse) { \ - return vpx_mse8x(a, a_stride, b, b_stride, sse, n); \ +#define vpx_mse8xN(n) \ + uint32_t vpx_mse8x##n##_mmi(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + return vpx_mse8x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \ } vpx_mse8xN(16); vpx_mse8xN(8); -#define SUBPIX_VAR(W, H) \ - uint32_t vpx_sub_pixel_variance##W##x##H##_mmi( \ - const uint8_t *a, int a_stride, int xoffset, int yoffset, \ - const uint8_t *b, int b_stride, uint32_t *sse) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint8_t temp2[H * W]; \ - \ - var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \ - bilinear_filters[xoffset]); \ - var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters[yoffset]); \ - \ - return vpx_variance##W##x##H##_mmi(temp2, W, b, b_stride, sse); \ +#define SUBPIX_VAR(W, H) \ + uint32_t vpx_sub_pixel_variance##W##x##H##_mmi( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ + uint16_t fdata3[((H) + 1) * (W)]; \ + uint8_t temp2[(H) * (W)]; \ + \ + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, (H) + 1, \ + W, bilinear_filters[x_offset]); \ + var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters[y_offset]); \ + \ + return vpx_variance##W##x##H##_mmi(temp2, W, ref_ptr, ref_stride, sse); \ } SUBPIX_VAR(64, 64) @@ -1006,9 +1014,10 @@ SUBPIX_VAR(32, 32) SUBPIX_VAR(32, 16) SUBPIX_VAR(16, 32) -static inline void var_filter_block2d_bil_16x(const uint8_t *a, int a_stride, - int xoffset, int yoffset, - uint8_t *temp2, int counter) { +static inline void var_filter_block2d_bil_16x(const uint8_t *src_ptr, + int src_stride, int x_offset, + int y_offset, uint8_t *temp2, + int counter) { uint8_t *temp2_ptr = temp2; mips_reg l_counter = counter; double ftmp[15]; @@ -1016,8 +1025,8 @@ static inline void var_filter_block2d_bil_16x(const uint8_t *a, int a_stride, DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL }; - const uint8_t *filter_x = bilinear_filters[xoffset]; - const uint8_t *filter_y = bilinear_filters[yoffset]; + const uint8_t *filter_x = bilinear_filters[x_offset]; + const uint8_t *filter_y = bilinear_filters[y_offset]; __asm__ volatile ( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" @@ -1031,26 +1040,26 @@ static inline void var_filter_block2d_bil_16x(const uint8_t *a, int a_stride, // fdata3: fdata3[0] ~ fdata3[15] VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A - // fdata3 +a_stride*1: fdata3[0] ~ fdata3[15] - MMI_ADDU(%[a], %[a], %[a_stride]) + // fdata3 +src_stride*1: fdata3[0] ~ fdata3[15] + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_B // temp2: temp2[0] ~ temp2[15] VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_A - // fdata3 +a_stride*2: fdata3[0] ~ fdata3[15] - MMI_ADDU(%[a], %[a], %[a_stride]) + // fdata3 +src_stride*2: fdata3[0] ~ fdata3[15] + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A // temp2+16*1: temp2[0] ~ temp2[15] MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x10) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_B "1: \n\t" - MMI_ADDU(%[a], %[a], %[a_stride]) + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_B MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x10) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_A - MMI_ADDU(%[a], %[a], %[a_stride]) + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x10) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_B @@ -1062,43 +1071,44 @@ static inline void var_filter_block2d_bil_16x(const uint8_t *a, int a_stride, [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [ftmp13] "=&f"(ftmp[13]), [ftmp14] "=&f"(ftmp[14]), - [tmp0] "=&r"(tmp[0]), [a] "+&r"(a), [temp2_ptr] "+&r"(temp2_ptr), + [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr), [counter]"+&r"(l_counter) : [filter_x0] "f"((uint64_t)filter_x[0]), [filter_x1] "f"((uint64_t)filter_x[1]), [filter_y0] "f"((uint64_t)filter_y[0]), [filter_y1] "f"((uint64_t)filter_y[1]), - [a_stride] "r"((mips_reg)a_stride), [ff_ph_40] "f"(ff_ph_40), + [src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40), [mask] "f"(mask) : "memory" ); } -#define SUBPIX_VAR16XN(H) \ - uint32_t vpx_sub_pixel_variance16x##H##_mmi( \ - const uint8_t *a, int a_stride, int xoffset, int yoffset, \ - const uint8_t *b, int b_stride, uint32_t *sse) { \ - uint8_t temp2[16 * H]; \ - var_filter_block2d_bil_16x(a, a_stride, xoffset, yoffset, temp2, \ - (H - 2) / 2); \ - \ - return vpx_variance16x##H##_mmi(temp2, 16, b, b_stride, sse); \ +#define SUBPIX_VAR16XN(H) \ + uint32_t vpx_sub_pixel_variance16x##H##_mmi( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ + uint8_t temp2[16 * (H)]; \ + var_filter_block2d_bil_16x(src_ptr, src_stride, x_offset, y_offset, temp2, \ + ((H)-2) / 2); \ + \ + return vpx_variance16x##H##_mmi(temp2, 16, ref_ptr, ref_stride, sse); \ } SUBPIX_VAR16XN(16) SUBPIX_VAR16XN(8) -static inline void var_filter_block2d_bil_8x(const uint8_t *a, int a_stride, - int xoffset, int yoffset, - uint8_t *temp2, int counter) { +static inline void var_filter_block2d_bil_8x(const uint8_t *src_ptr, + int src_stride, int x_offset, + int y_offset, uint8_t *temp2, + int counter) { uint8_t *temp2_ptr = temp2; mips_reg l_counter = counter; double ftmp[15]; mips_reg tmp[2]; DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL }; - const uint8_t *filter_x = bilinear_filters[xoffset]; - const uint8_t *filter_y = bilinear_filters[yoffset]; + const uint8_t *filter_x = bilinear_filters[x_offset]; + const uint8_t *filter_y = bilinear_filters[y_offset]; __asm__ volatile ( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" @@ -1112,26 +1122,26 @@ static inline void var_filter_block2d_bil_8x(const uint8_t *a, int a_stride, // fdata3: fdata3[0] ~ fdata3[7] VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A - // fdata3 +a_stride*1: fdata3[0] ~ fdata3[7] - MMI_ADDU(%[a], %[a], %[a_stride]) + // fdata3 +src_stride*1: fdata3[0] ~ fdata3[7] + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B // temp2: temp2[0] ~ temp2[7] VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_A - // fdata3 +a_stride*2: fdata3[0] ~ fdata3[7] - MMI_ADDU(%[a], %[a], %[a_stride]) + // fdata3 +src_stride*2: fdata3[0] ~ fdata3[7] + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A // temp2+8*1: temp2[0] ~ temp2[7] MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x08) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_B "1: \n\t" - MMI_ADDU(%[a], %[a], %[a_stride]) + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x08) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_A - MMI_ADDU(%[a], %[a], %[a_stride]) + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x08) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_B @@ -1143,44 +1153,45 @@ static inline void var_filter_block2d_bil_8x(const uint8_t *a, int a_stride, [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [ftmp13] "=&f"(ftmp[13]), [ftmp14] "=&f"(ftmp[14]), - [tmp0] "=&r"(tmp[0]), [a] "+&r"(a), [temp2_ptr] "+&r"(temp2_ptr), + [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr), [counter]"+&r"(l_counter) : [filter_x0] "f"((uint64_t)filter_x[0]), [filter_x1] "f"((uint64_t)filter_x[1]), [filter_y0] "f"((uint64_t)filter_y[0]), [filter_y1] "f"((uint64_t)filter_y[1]), - [a_stride] "r"((mips_reg)a_stride), [ff_ph_40] "f"(ff_ph_40), + [src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40), [mask] "f"(mask) : "memory" ); } -#define SUBPIX_VAR8XN(H) \ - uint32_t vpx_sub_pixel_variance8x##H##_mmi( \ - const uint8_t *a, int a_stride, int xoffset, int yoffset, \ - const uint8_t *b, int b_stride, uint32_t *sse) { \ - uint8_t temp2[8 * H]; \ - var_filter_block2d_bil_8x(a, a_stride, xoffset, yoffset, temp2, \ - (H - 2) / 2); \ - \ - return vpx_variance8x##H##_mmi(temp2, 8, b, b_stride, sse); \ +#define SUBPIX_VAR8XN(H) \ + uint32_t vpx_sub_pixel_variance8x##H##_mmi( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ + uint8_t temp2[8 * (H)]; \ + var_filter_block2d_bil_8x(src_ptr, src_stride, x_offset, y_offset, temp2, \ + ((H)-2) / 2); \ + \ + return vpx_variance8x##H##_mmi(temp2, 8, ref_ptr, ref_stride, sse); \ } SUBPIX_VAR8XN(16) SUBPIX_VAR8XN(8) SUBPIX_VAR8XN(4) -static inline void var_filter_block2d_bil_4x(const uint8_t *a, int a_stride, - int xoffset, int yoffset, - uint8_t *temp2, int counter) { +static inline void var_filter_block2d_bil_4x(const uint8_t *src_ptr, + int src_stride, int x_offset, + int y_offset, uint8_t *temp2, + int counter) { uint8_t *temp2_ptr = temp2; mips_reg l_counter = counter; double ftmp[7]; mips_reg tmp[2]; DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL }; - const uint8_t *filter_x = bilinear_filters[xoffset]; - const uint8_t *filter_y = bilinear_filters[yoffset]; + const uint8_t *filter_x = bilinear_filters[x_offset]; + const uint8_t *filter_y = bilinear_filters[y_offset]; __asm__ volatile ( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" @@ -1193,26 +1204,26 @@ static inline void var_filter_block2d_bil_4x(const uint8_t *a, int a_stride, // fdata3: fdata3[0] ~ fdata3[3] VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A - // fdata3 +a_stride*1: fdata3[0] ~ fdata3[3] - MMI_ADDU(%[a], %[a], %[a_stride]) + // fdata3 +src_stride*1: fdata3[0] ~ fdata3[3] + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_B // temp2: temp2[0] ~ temp2[7] VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_A - // fdata3 +a_stride*2: fdata3[0] ~ fdata3[3] - MMI_ADDU(%[a], %[a], %[a_stride]) + // fdata3 +src_stride*2: fdata3[0] ~ fdata3[3] + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A // temp2+4*1: temp2[0] ~ temp2[7] MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x04) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_B "1: \n\t" - MMI_ADDU(%[a], %[a], %[a_stride]) + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_B MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x04) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_A - MMI_ADDU(%[a], %[a], %[a_stride]) + MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x04) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_B @@ -1220,49 +1231,49 @@ static inline void var_filter_block2d_bil_4x(const uint8_t *a, int a_stride, "bnez %[counter], 1b \n\t" : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), - [ftmp6] "=&f"(ftmp[6]), [tmp0] "=&r"(tmp[0]), [a] "+&r"(a), + [ftmp6] "=&f"(ftmp[6]), [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr), [counter]"+&r"(l_counter) : [filter_x0] "f"((uint64_t)filter_x[0]), [filter_x1] "f"((uint64_t)filter_x[1]), [filter_y0] "f"((uint64_t)filter_y[0]), [filter_y1] "f"((uint64_t)filter_y[1]), - [a_stride] "r"((mips_reg)a_stride), [ff_ph_40] "f"(ff_ph_40), + [src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40), [mask] "f"(mask) : "memory" ); } -#define SUBPIX_VAR4XN(H) \ - uint32_t vpx_sub_pixel_variance4x##H##_mmi( \ - const uint8_t *a, int a_stride, int xoffset, int yoffset, \ - const uint8_t *b, int b_stride, uint32_t *sse) { \ - uint8_t temp2[4 * H]; \ - var_filter_block2d_bil_4x(a, a_stride, xoffset, yoffset, temp2, \ - (H - 2) / 2); \ - \ - return vpx_variance4x##H##_mmi(temp2, 4, b, b_stride, sse); \ +#define SUBPIX_VAR4XN(H) \ + uint32_t vpx_sub_pixel_variance4x##H##_mmi( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ + uint8_t temp2[4 * (H)]; \ + var_filter_block2d_bil_4x(src_ptr, src_stride, x_offset, y_offset, temp2, \ + ((H)-2) / 2); \ + \ + return vpx_variance4x##H##_mmi(temp2, 4, ref_ptr, ref_stride, sse); \ } SUBPIX_VAR4XN(8) SUBPIX_VAR4XN(4) -#define SUBPIX_AVG_VAR(W, H) \ - uint32_t vpx_sub_pixel_avg_variance##W##x##H##_mmi( \ - const uint8_t *a, int a_stride, int xoffset, int yoffset, \ - const uint8_t *b, int b_stride, uint32_t *sse, \ - const uint8_t *second_pred) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint8_t temp2[H * W]; \ - DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ - \ - var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \ - bilinear_filters[xoffset]); \ - var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters[yoffset]); \ - \ - vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \ - \ - return vpx_variance##W##x##H##_mmi(temp3, W, b, b_stride, sse); \ +#define SUBPIX_AVG_VAR(W, H) \ + uint32_t vpx_sub_pixel_avg_variance##W##x##H##_mmi( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ + const uint8_t *second_pred) { \ + uint16_t fdata3[((H) + 1) * (W)]; \ + uint8_t temp2[(H) * (W)]; \ + DECLARE_ALIGNED(16, uint8_t, temp3[(H) * (W)]); \ + \ + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, (H) + 1, \ + W, bilinear_filters[x_offset]); \ + var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters[y_offset]); \ + \ + vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \ + \ + return vpx_variance##W##x##H##_mmi(temp3, W, ref_ptr, ref_stride, sse); \ } SUBPIX_AVG_VAR(64, 64) diff --git a/media/libvpx/libvpx/vpx_dsp/mips/variance_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/variance_msa.c index 49b2f99230fa..444b086a6e8a 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/variance_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/variance_msa.c @@ -33,10 +33,11 @@ sub += res_l0_m + res_l1_m; \ } -#define VARIANCE_WxH(sse, diff, shift) sse - (((uint32_t)diff * diff) >> shift) +#define VARIANCE_WxH(sse, diff, shift) \ + (sse) - (((uint32_t)(diff) * (diff)) >> (shift)) #define VARIANCE_LARGE_WxH(sse, diff, shift) \ - sse - (((int64_t)diff * diff) >> shift) + (sse) - (((int64_t)(diff) * (diff)) >> (shift)) static uint32_t sse_diff_4width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, diff --git a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c index 187a013421a6..5b5a1cbc3a51 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c @@ -658,7 +658,7 @@ void vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride, filt_hor[cnt] = filter_x[cnt]; } - if (((const int32_t *)filter_x)[0] == 0) { + if (vpx_get_filter_taps(filter_x) == 2) { switch (w) { case 4: common_hz_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst, diff --git a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_avg_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_avg_msa.c index 5187cea21c98..ba816192a1fa 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_avg_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_avg_msa.c @@ -538,8 +538,8 @@ void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride, filt_ver[cnt] = filter_y[cnt]; } - if (((const int32_t *)filter_x)[0] == 0 && - ((const int32_t *)filter_y)[0] == 0) { + if (vpx_get_filter_taps(filter_x) == 2 && + vpx_get_filter_taps(filter_y) == 2) { switch (w) { case 4: common_hv_2ht_2vt_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst, @@ -571,8 +571,8 @@ void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride, x_step_q4, y0_q4, y_step_q4, w, h); break; } - } else if (((const int32_t *)filter_x)[0] == 0 || - ((const int32_t *)filter_y)[0] == 0) { + } else if (vpx_get_filter_taps(filter_x) == 2 || + vpx_get_filter_taps(filter_y) == 2) { vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { diff --git a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c index ef8c90114060..e6a790dfc6d1 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c @@ -625,7 +625,7 @@ void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride, filt_ver[cnt] = filter_y[cnt]; } - if (((const int32_t *)filter_y)[0] == 0) { + if (vpx_get_filter_taps(filter_y) == 2) { switch (w) { case 4: common_vt_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst, diff --git a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_horiz_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_horiz_msa.c index 152dc26104e9..792c0f709c4c 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_horiz_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_horiz_msa.c @@ -634,7 +634,7 @@ void vpx_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride, filt_hor[cnt] = filter_x[cnt]; } - if (((const int32_t *)filter_x)[0] == 0) { + if (vpx_get_filter_taps(filter_x) == 2) { switch (w) { case 4: common_hz_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, diff --git a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_mmi.c b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_mmi.c new file mode 100644 index 000000000000..ba9ceb86658d --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_mmi.c @@ -0,0 +1,716 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_dsp/vpx_filter.h" +#include "vpx_ports/asmdefs_mmi.h" +#include "vpx_ports/mem.h" + +#define GET_DATA_H_MMI \ + "pmaddhw %[ftmp4], %[ftmp4], %[filter1] \n\t" \ + "pmaddhw %[ftmp5], %[ftmp5], %[filter2] \n\t" \ + "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ + "punpckhwd %[ftmp5], %[ftmp4], %[ftmp0] \n\t" \ + "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ + "pmaddhw %[ftmp6], %[ftmp6], %[filter1] \n\t" \ + "pmaddhw %[ftmp7], %[ftmp7], %[filter2] \n\t" \ + "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \ + "punpckhwd %[ftmp7], %[ftmp6], %[ftmp0] \n\t" \ + "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \ + "punpcklwd %[srcl], %[ftmp4], %[ftmp6] \n\t" \ + "pmaddhw %[ftmp8], %[ftmp8], %[filter1] \n\t" \ + "pmaddhw %[ftmp9], %[ftmp9], %[filter2] \n\t" \ + "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ + "punpckhwd %[ftmp9], %[ftmp8], %[ftmp0] \n\t" \ + "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ + "pmaddhw %[ftmp10], %[ftmp10], %[filter1] \n\t" \ + "pmaddhw %[ftmp11], %[ftmp11], %[filter2] \n\t" \ + "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \ + "punpckhwd %[ftmp11], %[ftmp10], %[ftmp0] \n\t" \ + "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \ + "punpcklwd %[srch], %[ftmp8], %[ftmp10] \n\t" + +#define GET_DATA_V_MMI \ + "punpcklhw %[srcl], %[ftmp4], %[ftmp5] \n\t" \ + "pmaddhw %[srcl], %[srcl], %[filter10] \n\t" \ + "punpcklhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \ + "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \ + "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \ + "punpcklhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \ + "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \ + "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \ + "punpcklhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \ + "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \ + "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \ + "punpckhhw %[srch], %[ftmp4], %[ftmp5] \n\t" \ + "pmaddhw %[srch], %[srch], %[filter10] \n\t" \ + "punpckhhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \ + "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \ + "paddw %[srch], %[srch], %[ftmp12] \n\t" \ + "punpckhhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \ + "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \ + "paddw %[srch], %[srch], %[ftmp12] \n\t" \ + "punpckhhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \ + "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \ + "paddw %[srch], %[srch], %[ftmp12] \n\t" + +/* clang-format off */ +#define ROUND_POWER_OF_TWO_MMI \ + /* Add para[0] */ \ + "lw %[tmp0], 0x00(%[para]) \n\t" \ + MMI_MTC1(%[tmp0], %[ftmp6]) \ + "punpcklwd %[ftmp6], %[ftmp6], %[ftmp6] \n\t" \ + "paddw %[srcl], %[srcl], %[ftmp6] \n\t" \ + "paddw %[srch], %[srch], %[ftmp6] \n\t" \ + /* Arithmetic right shift para[1] bits */ \ + "lw %[tmp0], 0x04(%[para]) \n\t" \ + MMI_MTC1(%[tmp0], %[ftmp5]) \ + "psraw %[srcl], %[srcl], %[ftmp5] \n\t" \ + "psraw %[srch], %[srch], %[ftmp5] \n\t" +/* clang-format on */ + +#define CLIP_PIXEL_MMI \ + /* Staturated operation */ \ + "packsswh %[srcl], %[srcl], %[srch] \n\t" \ + "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t" + +static void convolve_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int x0_q4, + int x_step_q4, int32_t w, int32_t h) { + const int16_t *filter_x = filter[x0_q4]; + double ftmp[14]; + uint32_t tmp[2]; + uint32_t para[5]; + para[0] = (1 << ((FILTER_BITS)-1)); + para[1] = FILTER_BITS; + src -= SUBPEL_TAPS / 2 - 1; + src_stride -= w; + dst_stride -= w; + (void)x_step_q4; + + /* clang-format off */ + __asm__ volatile( + "move %[tmp1], %[width] \n\t" + "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" + "gsldlc1 %[filter1], 0x03(%[filter]) \n\t" + "gsldrc1 %[filter1], 0x00(%[filter]) \n\t" + "gsldlc1 %[filter2], 0x0b(%[filter]) \n\t" + "gsldrc1 %[filter2], 0x08(%[filter]) \n\t" + "1: \n\t" + /* Get 8 data per row */ + "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t" + "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t" + "gsldlc1 %[ftmp7], 0x08(%[src]) \n\t" + "gsldrc1 %[ftmp7], 0x01(%[src]) \n\t" + "gsldlc1 %[ftmp9], 0x09(%[src]) \n\t" + "gsldrc1 %[ftmp9], 0x02(%[src]) \n\t" + "gsldlc1 %[ftmp11], 0x0A(%[src]) \n\t" + "gsldrc1 %[ftmp11], 0x03(%[src]) \n\t" + "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t" + "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" + "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t" + "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" + "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t" + "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" + "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t" + "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" + MMI_ADDIU(%[width], %[width], -0x04) + /* Get raw data */ + GET_DATA_H_MMI + ROUND_POWER_OF_TWO_MMI + CLIP_PIXEL_MMI + "swc1 %[ftmp12], 0x00(%[dst]) \n\t" + MMI_ADDIU(%[dst], %[dst], 0x04) + MMI_ADDIU(%[src], %[src], 0x04) + /* Loop count */ + "bnez %[width], 1b \n\t" + "move %[width], %[tmp1] \n\t" + MMI_ADDU(%[src], %[src], %[src_stride]) + MMI_ADDU(%[dst], %[dst], %[dst_stride]) + MMI_ADDIU(%[height], %[height], -0x01) + "bnez %[height], 1b \n\t" + : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]), + [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]), + [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]), + [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]), + [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]), + [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]), + [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]), + [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), + [src]"+&r"(src), [width]"+&r"(w), + [dst]"+&r"(dst), [height]"+&r"(h) + : [filter]"r"(filter_x), [para]"r"(para), + [src_stride]"r"((mips_reg)src_stride), + [dst_stride]"r"((mips_reg)dst_stride) + : "memory" + ); + /* clang-format on */ +} + +static void convolve_vert_mmi(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int y0_q4, + int y_step_q4, int32_t w, int32_t h) { + const int16_t *filter_y = filter[y0_q4]; + double ftmp[16]; + uint32_t tmp[1]; + uint32_t para[2]; + ptrdiff_t addr = src_stride; + para[0] = (1 << ((FILTER_BITS)-1)); + para[1] = FILTER_BITS; + src -= src_stride * (SUBPEL_TAPS / 2 - 1); + src_stride -= w; + dst_stride -= w; + (void)y_step_q4; + + __asm__ volatile( + "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" + "gsldlc1 %[ftmp4], 0x03(%[filter]) \n\t" + "gsldrc1 %[ftmp4], 0x00(%[filter]) \n\t" + "gsldlc1 %[ftmp5], 0x0b(%[filter]) \n\t" + "gsldrc1 %[ftmp5], 0x08(%[filter]) \n\t" + "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t" + "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t" + "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t" + "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t" + "1: \n\t" + /* Get 8 data per column */ + "gsldlc1 %[ftmp4], 0x07(%[src]) \n\t" + "gsldrc1 %[ftmp4], 0x00(%[src]) \n\t" + MMI_ADDU(%[tmp0], %[src], %[addr]) + "gsldlc1 %[ftmp5], 0x07(%[tmp0]) \n\t" + "gsldrc1 %[ftmp5], 0x00(%[tmp0]) \n\t" + MMI_ADDU(%[tmp0], %[tmp0], %[addr]) + "gsldlc1 %[ftmp6], 0x07(%[tmp0]) \n\t" + "gsldrc1 %[ftmp6], 0x00(%[tmp0]) \n\t" + MMI_ADDU(%[tmp0], %[tmp0], %[addr]) + "gsldlc1 %[ftmp7], 0x07(%[tmp0]) \n\t" + "gsldrc1 %[ftmp7], 0x00(%[tmp0]) \n\t" + MMI_ADDU(%[tmp0], %[tmp0], %[addr]) + "gsldlc1 %[ftmp8], 0x07(%[tmp0]) \n\t" + "gsldrc1 %[ftmp8], 0x00(%[tmp0]) \n\t" + MMI_ADDU(%[tmp0], %[tmp0], %[addr]) + "gsldlc1 %[ftmp9], 0x07(%[tmp0]) \n\t" + "gsldrc1 %[ftmp9], 0x00(%[tmp0]) \n\t" + MMI_ADDU(%[tmp0], %[tmp0], %[addr]) + "gsldlc1 %[ftmp10], 0x07(%[tmp0]) \n\t" + "gsldrc1 %[ftmp10], 0x00(%[tmp0]) \n\t" + MMI_ADDU(%[tmp0], %[tmp0], %[addr]) + "gsldlc1 %[ftmp11], 0x07(%[tmp0]) \n\t" + "gsldrc1 %[ftmp11], 0x00(%[tmp0]) \n\t" + "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" + "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" + "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" + "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" + "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" + "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" + "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t" + "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" + MMI_ADDIU(%[width], %[width], -0x04) + /* Get raw data */ + GET_DATA_V_MMI + ROUND_POWER_OF_TWO_MMI + CLIP_PIXEL_MMI + "swc1 %[ftmp12], 0x00(%[dst]) \n\t" + MMI_ADDIU(%[dst], %[dst], 0x04) + MMI_ADDIU(%[src], %[src], 0x04) + /* Loop count */ + "bnez %[width], 1b \n\t" + MMI_SUBU(%[width], %[addr], %[src_stride]) + MMI_ADDU(%[src], %[src], %[src_stride]) + MMI_ADDU(%[dst], %[dst], %[dst_stride]) + MMI_ADDIU(%[height], %[height], -0x01) + "bnez %[height], 1b \n\t" + : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]), + [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]), + [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]), + [ftmp0]"=&f"(ftmp[6]), [ftmp4]"=&f"(ftmp[7]), + [ftmp5]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[9]), + [ftmp7]"=&f"(ftmp[10]), [ftmp8]"=&f"(ftmp[11]), + [ftmp9]"=&f"(ftmp[12]), [ftmp10]"=&f"(ftmp[13]), + [ftmp11]"=&f"(ftmp[14]), [ftmp12]"=&f"(ftmp[15]), + [src]"+&r"(src), [dst]"+&r"(dst), + [width]"+&r"(w), [height]"+&r"(h), + [tmp0]"=&r"(tmp[0]) + : [filter]"r"(filter_y), [para]"r"(para), + [src_stride]"r"((mips_reg)src_stride), + [dst_stride]"r"((mips_reg)dst_stride), + [addr]"r"((mips_reg)addr) + : "memory" + ); +} + +static void convolve_avg_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int x0_q4, + int x_step_q4, int32_t w, int32_t h) { + const int16_t *filter_x = filter[x0_q4]; + double ftmp[14]; + uint32_t tmp[2]; + uint32_t para[2]; + para[0] = (1 << ((FILTER_BITS)-1)); + para[1] = FILTER_BITS; + src -= SUBPEL_TAPS / 2 - 1; + src_stride -= w; + dst_stride -= w; + (void)x_step_q4; + + __asm__ volatile( + "move %[tmp1], %[width] \n\t" + "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" + "gsldlc1 %[filter1], 0x03(%[filter]) \n\t" + "gsldrc1 %[filter1], 0x00(%[filter]) \n\t" + "gsldlc1 %[filter2], 0x0b(%[filter]) \n\t" + "gsldrc1 %[filter2], 0x08(%[filter]) \n\t" + "1: \n\t" + /* Get 8 data per row */ + "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t" + "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t" + "gsldlc1 %[ftmp7], 0x08(%[src]) \n\t" + "gsldrc1 %[ftmp7], 0x01(%[src]) \n\t" + "gsldlc1 %[ftmp9], 0x09(%[src]) \n\t" + "gsldrc1 %[ftmp9], 0x02(%[src]) \n\t" + "gsldlc1 %[ftmp11], 0x0A(%[src]) \n\t" + "gsldrc1 %[ftmp11], 0x03(%[src]) \n\t" + "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t" + "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" + "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t" + "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" + "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t" + "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" + "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t" + "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" + MMI_ADDIU(%[width], %[width], -0x04) + /* Get raw data */ + GET_DATA_H_MMI + ROUND_POWER_OF_TWO_MMI + CLIP_PIXEL_MMI + "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t" + "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t" + "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t" + "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" + "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t" + "li %[tmp0], 0x10001 \n\t" + MMI_MTC1(%[tmp0], %[ftmp5]) + "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t" + "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t" + "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t" + "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t" + "swc1 %[ftmp12], 0x00(%[dst]) \n\t" + MMI_ADDIU(%[dst], %[dst], 0x04) + MMI_ADDIU(%[src], %[src], 0x04) + /* Loop count */ + "bnez %[width], 1b \n\t" + "move %[width], %[tmp1] \n\t" + MMI_ADDU(%[src], %[src], %[src_stride]) + MMI_ADDU(%[dst], %[dst], %[dst_stride]) + MMI_ADDIU(%[height], %[height], -0x01) + "bnez %[height], 1b \n\t" + : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]), + [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]), + [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]), + [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]), + [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]), + [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]), + [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]), + [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), + [src]"+&r"(src), [width]"+&r"(w), + [dst]"+&r"(dst), [height]"+&r"(h) + : [filter]"r"(filter_x), [para]"r"(para), + [src_stride]"r"((mips_reg)src_stride), + [dst_stride]"r"((mips_reg)dst_stride) + : "memory" + ); +} + +static void convolve_avg_vert_mmi(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int y0_q4, + int y_step_q4, int32_t w, int32_t h) { + const int16_t *filter_y = filter[y0_q4]; + double ftmp[16]; + uint32_t tmp[1]; + uint32_t para[2]; + ptrdiff_t addr = src_stride; + para[0] = (1 << ((FILTER_BITS)-1)); + para[1] = FILTER_BITS; + src -= src_stride * (SUBPEL_TAPS / 2 - 1); + src_stride -= w; + dst_stride -= w; + (void)y_step_q4; + + __asm__ volatile( + "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" + "gsldlc1 %[ftmp4], 0x03(%[filter]) \n\t" + "gsldrc1 %[ftmp4], 0x00(%[filter]) \n\t" + "gsldlc1 %[ftmp5], 0x0b(%[filter]) \n\t" + "gsldrc1 %[ftmp5], 0x08(%[filter]) \n\t" + "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t" + "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t" + "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t" + "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t" + "1: \n\t" + /* Get 8 data per column */ + "gsldlc1 %[ftmp4], 0x07(%[src]) \n\t" + "gsldrc1 %[ftmp4], 0x00(%[src]) \n\t" + MMI_ADDU(%[tmp0], %[src], %[addr]) + "gsldlc1 %[ftmp5], 0x07(%[tmp0]) \n\t" + "gsldrc1 %[ftmp5], 0x00(%[tmp0]) \n\t" + MMI_ADDU(%[tmp0], %[tmp0], %[addr]) + "gsldlc1 %[ftmp6], 0x07(%[tmp0]) \n\t" + "gsldrc1 %[ftmp6], 0x00(%[tmp0]) \n\t" + MMI_ADDU(%[tmp0], %[tmp0], %[addr]) + "gsldlc1 %[ftmp7], 0x07(%[tmp0]) \n\t" + "gsldrc1 %[ftmp7], 0x00(%[tmp0]) \n\t" + MMI_ADDU(%[tmp0], %[tmp0], %[addr]) + "gsldlc1 %[ftmp8], 0x07(%[tmp0]) \n\t" + "gsldrc1 %[ftmp8], 0x00(%[tmp0]) \n\t" + MMI_ADDU(%[tmp0], %[tmp0], %[addr]) + "gsldlc1 %[ftmp9], 0x07(%[tmp0]) \n\t" + "gsldrc1 %[ftmp9], 0x00(%[tmp0]) \n\t" + MMI_ADDU(%[tmp0], %[tmp0], %[addr]) + "gsldlc1 %[ftmp10], 0x07(%[tmp0]) \n\t" + "gsldrc1 %[ftmp10], 0x00(%[tmp0]) \n\t" + MMI_ADDU(%[tmp0], %[tmp0], %[addr]) + "gsldlc1 %[ftmp11], 0x07(%[tmp0]) \n\t" + "gsldrc1 %[ftmp11], 0x00(%[tmp0]) \n\t" + "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" + "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" + "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" + "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" + "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" + "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" + "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t" + "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" + MMI_ADDIU(%[width], %[width], -0x04) + /* Get raw data */ + GET_DATA_V_MMI + ROUND_POWER_OF_TWO_MMI + CLIP_PIXEL_MMI + "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t" + "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t" + "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t" + "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" + "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t" + "li %[tmp0], 0x10001 \n\t" + MMI_MTC1(%[tmp0], %[ftmp5]) + "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t" + "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t" + "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t" + "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t" + "swc1 %[ftmp12], 0x00(%[dst]) \n\t" + MMI_ADDIU(%[dst], %[dst], 0x04) + MMI_ADDIU(%[src], %[src], 0x04) + /* Loop count */ + "bnez %[width], 1b \n\t" + MMI_SUBU(%[width], %[addr], %[src_stride]) + MMI_ADDU(%[src], %[src], %[src_stride]) + MMI_ADDU(%[dst], %[dst], %[dst_stride]) + MMI_ADDIU(%[height], %[height], -0x01) + "bnez %[height], 1b \n\t" + : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]), + [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]), + [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]), + [ftmp0]"=&f"(ftmp[6]), [ftmp4]"=&f"(ftmp[7]), + [ftmp5]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[9]), + [ftmp7]"=&f"(ftmp[10]), [ftmp8]"=&f"(ftmp[11]), + [ftmp9]"=&f"(ftmp[12]), [ftmp10]"=&f"(ftmp[13]), + [ftmp11]"=&f"(ftmp[14]), [ftmp12]"=&f"(ftmp[15]), + [src]"+&r"(src), [dst]"+&r"(dst), + [width]"+&r"(w), [height]"+&r"(h), + [tmp0]"=&r"(tmp[0]) + : [filter]"r"(filter_y), [para]"r"(para), + [src_stride]"r"((mips_reg)src_stride), + [dst_stride]"r"((mips_reg)dst_stride), + [addr]"r"((mips_reg)addr) + : "memory" + ); +} + +void vpx_convolve_avg_mmi(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int x0_q4, int x_step_q4, + int y0_q4, int y_step_q4, int w, int h) { + int x, y; + + (void)filter; + (void)x0_q4; + (void)x_step_q4; + (void)y0_q4; + (void)y_step_q4; + + if (w & 0x03) { + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); + src += src_stride; + dst += dst_stride; + } + } else { + double ftmp[4]; + uint32_t tmp[2]; + src_stride -= w; + dst_stride -= w; + + __asm__ volatile( + "move %[tmp1], %[width] \n\t" + "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" + "li %[tmp0], 0x10001 \n\t" + MMI_MTC1(%[tmp0], %[ftmp3]) + "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" + "1: \n\t" + "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" + "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" + "gsldlc1 %[ftmp2], 0x07(%[dst]) \n\t" + "gsldrc1 %[ftmp2], 0x00(%[dst]) \n\t" + "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" + "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" + "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" + "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" + "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t" + "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" + "swc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_ADDIU(%[width], %[width], -0x04) + MMI_ADDIU(%[dst], %[dst], 0x04) + MMI_ADDIU(%[src], %[src], 0x04) + "bnez %[width], 1b \n\t" + "move %[width], %[tmp1] \n\t" + MMI_ADDU(%[dst], %[dst], %[dst_stride]) + MMI_ADDU(%[src], %[src], %[src_stride]) + MMI_ADDIU(%[height], %[height], -0x01) + "bnez %[height], 1b \n\t" + : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), + [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), + [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), + [src]"+&r"(src), [dst]"+&r"(dst), + [width]"+&r"(w), [height]"+&r"(h) + : [src_stride]"r"((mips_reg)src_stride), + [dst_stride]"r"((mips_reg)dst_stride) + : "memory" + ); + } +} + +static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *x_filters, int x0_q4, + int x_step_q4, int w, int h) { + int x, y; + src -= SUBPEL_TAPS / 2 - 1; + + for (y = 0; y < h; ++y) { + int x_q4 = x0_q4; + for (x = 0; x < w; ++x) { + const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; + const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; + int k, sum = 0; + for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k]; + dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); + x_q4 += x_step_q4; + } + src += src_stride; + dst += dst_stride; + } +} + +static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *y_filters, int y0_q4, + int y_step_q4, int w, int h) { + int x, y; + src -= src_stride * (SUBPEL_TAPS / 2 - 1); + + for (x = 0; x < w; ++x) { + int y_q4 = y0_q4; + for (y = 0; y < h; ++y) { + const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; + const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; + int k, sum = 0; + for (k = 0; k < SUBPEL_TAPS; ++k) + sum += src_y[k * src_stride] * y_filter[k]; + dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); + y_q4 += y_step_q4; + } + ++src; + ++dst; + } +} + +static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *y_filters, int y0_q4, + int y_step_q4, int w, int h) { + int x, y; + src -= src_stride * (SUBPEL_TAPS / 2 - 1); + + for (x = 0; x < w; ++x) { + int y_q4 = y0_q4; + for (y = 0; y < h; ++y) { + const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; + const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; + int k, sum = 0; + for (k = 0; k < SUBPEL_TAPS; ++k) + sum += src_y[k * src_stride] * y_filter[k]; + dst[y * dst_stride] = ROUND_POWER_OF_TWO( + dst[y * dst_stride] + + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), + 1); + y_q4 += y_step_q4; + } + ++src; + ++dst; + } +} + +static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *x_filters, int x0_q4, + int x_step_q4, int w, int h) { + int x, y; + src -= SUBPEL_TAPS / 2 - 1; + + for (y = 0; y < h; ++y) { + int x_q4 = x0_q4; + for (x = 0; x < w; ++x) { + const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; + const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; + int k, sum = 0; + for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k]; + dst[x] = ROUND_POWER_OF_TWO( + dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); + x_q4 += x_step_q4; + } + src += src_stride; + dst += dst_stride; + } +} + +void vpx_convolve8_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, + ptrdiff_t dst_stride, const InterpKernel *filter, + int x0_q4, int32_t x_step_q4, int y0_q4, + int32_t y_step_q4, int32_t w, int32_t h) { + // Note: Fixed size intermediate buffer, temp, places limits on parameters. + // 2d filtering proceeds in 2 steps: + // (1) Interpolate horizontally into an intermediate buffer, temp. + // (2) Interpolate temp vertically to derive the sub-pixel result. + // Deriving the maximum number of rows in the temp buffer (135): + // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). + // --Largest block size is 64x64 pixels. + // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the + // original frame (in 1/16th pixel units). + // --Must round-up because block may be located at sub-pixel position. + // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. + // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. + // When calling in frame scaling function, the smallest scaling factor is x1/4 + // ==> y_step_q4 = 64. Since w and h are at most 16, the temp buffer is still + // big enough. + uint8_t temp[64 * 135]; + const int intermediate_height = + (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; + + assert(w <= 64); + assert(h <= 64); + assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32)); + assert(x_step_q4 <= 64); + + if (w & 0x03) { + convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, + 64, filter, x0_q4, x_step_q4, w, intermediate_height); + convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, + filter, y0_q4, y_step_q4, w, h); + } else { + convolve_horiz_mmi(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, + temp, 64, filter, x0_q4, x_step_q4, w, + intermediate_height); + convolve_vert_mmi(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, + filter, y0_q4, y_step_q4, w, h); + } +} + +void vpx_convolve8_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int x0_q4, + int32_t x_step_q4, int y0_q4, int32_t y_step_q4, + int32_t w, int32_t h) { + (void)y0_q4; + (void)y_step_q4; + if (w & 0x03) + convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, + w, h); + else + convolve_horiz_mmi(src, src_stride, dst, dst_stride, filter, x0_q4, + x_step_q4, w, h); +} + +void vpx_convolve8_vert_mmi(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int x0_q4, + int32_t x_step_q4, int y0_q4, int y_step_q4, int w, + int h) { + (void)x0_q4; + (void)x_step_q4; + if (w & 0x03) + convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w, + h); + else + convolve_vert_mmi(src, src_stride, dst, dst_stride, filter, y0_q4, + y_step_q4, w, h); +} + +void vpx_convolve8_avg_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int x0_q4, + int32_t x_step_q4, int y0_q4, int y_step_q4, + int w, int h) { + (void)y0_q4; + (void)y_step_q4; + if (w & 0x03) + convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, + x_step_q4, w, h); + else + convolve_avg_horiz_mmi(src, src_stride, dst, dst_stride, filter, x0_q4, + x_step_q4, w, h); +} + +void vpx_convolve8_avg_vert_mmi(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int x0_q4, + int32_t x_step_q4, int y0_q4, int y_step_q4, + int w, int h) { + (void)x0_q4; + (void)x_step_q4; + if (w & 0x03) + convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4, + y_step_q4, w, h); + else + convolve_avg_vert_mmi(src, src_stride, dst, dst_stride, filter, y0_q4, + y_step_q4, w, h); +} + +void vpx_convolve8_avg_mmi(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int x0_q4, + int32_t x_step_q4, int y0_q4, int32_t y_step_q4, + int32_t w, int32_t h) { + // Fixed size intermediate buffer places limits on parameters. + DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]); + assert(w <= 64); + assert(h <= 64); + + vpx_convolve8_mmi(src, src_stride, temp, 64, filter, x0_q4, x_step_q4, y0_q4, + y_step_q4, w, h); + vpx_convolve_avg_mmi(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h); +} diff --git a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_msa.c index d35a5a7a639a..c942167587bf 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_msa.c @@ -558,8 +558,8 @@ void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, filt_ver[cnt] = filter_y[cnt]; } - if (((const int32_t *)filter_x)[0] == 0 && - ((const int32_t *)filter_y)[0] == 0) { + if (vpx_get_filter_taps(filter_x) == 2 && + vpx_get_filter_taps(filter_y) == 2) { switch (w) { case 4: common_hv_2ht_2vt_4w_msa(src, (int32_t)src_stride, dst, @@ -591,8 +591,8 @@ void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, x_step_q4, y0_q4, y_step_q4, w, h); break; } - } else if (((const int32_t *)filter_x)[0] == 0 || - ((const int32_t *)filter_y)[0] == 0) { + } else if (vpx_get_filter_taps(filter_x) == 2 || + vpx_get_filter_taps(filter_y) == 2) { vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { diff --git a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_vert_msa.c b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_vert_msa.c index 13fce0077c9b..195228689e0b 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_vert_msa.c +++ b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve8_vert_msa.c @@ -641,7 +641,7 @@ void vpx_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride, filt_ver[cnt] = filter_y[cnt]; } - if (((const int32_t *)filter_y)[0] == 0) { + if (vpx_get_filter_taps(filter_y) == 2) { switch (w) { case 4: common_vt_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, diff --git a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve_msa.h b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve_msa.h index d53244596be3..a0280c5434b3 100644 --- a/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve_msa.h +++ b/media/libvpx/libvpx/vpx_dsp/mips/vpx_convolve_msa.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_ -#define VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_ +#ifndef VPX_VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_ +#define VPX_VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_ #include "vpx_dsp/mips/macros_msa.h" #include "vpx_dsp/vpx_filter.h" @@ -119,4 +119,4 @@ extern const uint8_t mc_filt_mask_arr[16 * 3]; AVER_UB2_UB(tmp0_m, dst0, tmp1_m, dst1, tmp0_m, tmp1_m); \ ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride); \ } -#endif /* VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_ */ +#endif // VPX_VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/postproc.h b/media/libvpx/libvpx/vpx_dsp/postproc.h index 43cb5c8e8dee..37f993f81471 100644 --- a/media/libvpx/libvpx/vpx_dsp/postproc.h +++ b/media/libvpx/libvpx/vpx_dsp/postproc.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_POSTPROC_H_ -#define VPX_DSP_POSTPROC_H_ +#ifndef VPX_VPX_DSP_POSTPROC_H_ +#define VPX_VPX_DSP_POSTPROC_H_ #ifdef __cplusplus extern "C" { @@ -22,4 +22,4 @@ int vpx_setup_noise(double sigma, int8_t *noise, int size); } #endif -#endif // VPX_DSP_POSTPROC_H_ +#endif // VPX_VPX_DSP_POSTPROC_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/ppc/bitdepth_conversion_vsx.h b/media/libvpx/libvpx/vpx_dsp/ppc/bitdepth_conversion_vsx.h index 2c5d9a4f6a57..7ac873f9fc0d 100644 --- a/media/libvpx/libvpx/vpx_dsp/ppc/bitdepth_conversion_vsx.h +++ b/media/libvpx/libvpx/vpx_dsp/ppc/bitdepth_conversion_vsx.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_PPC_BITDEPTH_CONVERSION_VSX_H_ -#define VPX_DSP_PPC_BITDEPTH_CONVERSION_VSX_H_ +#ifndef VPX_VPX_DSP_PPC_BITDEPTH_CONVERSION_VSX_H_ +#define VPX_VPX_DSP_PPC_BITDEPTH_CONVERSION_VSX_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" @@ -44,4 +44,4 @@ static INLINE void store_tran_low(int16x8_t v, int32_t c, tran_low_t *s) { #endif } -#endif // VPX_DSP_PPC_BITDEPTH_CONVERSION_VSX_H_ +#endif // VPX_VPX_DSP_PPC_BITDEPTH_CONVERSION_VSX_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/ppc/deblock_vsx.c b/media/libvpx/libvpx/vpx_dsp/ppc/deblock_vsx.c new file mode 100644 index 000000000000..212991169660 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/ppc/deblock_vsx.c @@ -0,0 +1,374 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/ppc/types_vsx.h" + +extern const int16_t vpx_rv[]; + +static const uint8x16_t load_merge = { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, + 0x0C, 0x0E, 0x18, 0x19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F }; + +static const uint8x16_t st8_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, + 0x06, 0x07, 0x18, 0x19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F }; + +static INLINE uint8x16_t apply_filter(uint8x16_t ctx[4], uint8x16_t v, + uint8x16_t filter) { + const uint8x16_t k1 = vec_avg(ctx[0], ctx[1]); + const uint8x16_t k2 = vec_avg(ctx[3], ctx[2]); + const uint8x16_t k3 = vec_avg(k1, k2); + const uint8x16_t f_a = vec_max(vec_absd(v, ctx[0]), vec_absd(v, ctx[1])); + const uint8x16_t f_b = vec_max(vec_absd(v, ctx[2]), vec_absd(v, ctx[3])); + const bool8x16_t mask = vec_cmplt(vec_max(f_a, f_b), filter); + return vec_sel(v, vec_avg(k3, v), mask); +} + +static INLINE void vert_ctx(uint8x16_t ctx[4], int col, uint8_t *src, + int stride) { + ctx[0] = vec_vsx_ld(col - 2 * stride, src); + ctx[1] = vec_vsx_ld(col - stride, src); + ctx[2] = vec_vsx_ld(col + stride, src); + ctx[3] = vec_vsx_ld(col + 2 * stride, src); +} + +static INLINE void horz_ctx(uint8x16_t ctx[4], uint8x16_t left_ctx, + uint8x16_t v, uint8x16_t right_ctx) { + static const uint8x16_t l2_perm = { 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + 0x1A, 0x1B, 0x1C, 0x1D }; + + static const uint8x16_t l1_perm = { 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, + 0x1B, 0x1C, 0x1D, 0x1E }; + + static const uint8x16_t r1_perm = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, + 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, + 0x0D, 0x0E, 0x0F, 0x10 }; + + static const uint8x16_t r2_perm = { 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, + 0x0E, 0x0F, 0x10, 0x11 }; + ctx[0] = vec_perm(left_ctx, v, l2_perm); + ctx[1] = vec_perm(left_ctx, v, l1_perm); + ctx[2] = vec_perm(v, right_ctx, r1_perm); + ctx[3] = vec_perm(v, right_ctx, r2_perm); +} +void vpx_post_proc_down_and_across_mb_row_vsx(unsigned char *src_ptr, + unsigned char *dst_ptr, + int src_pixels_per_line, + int dst_pixels_per_line, int cols, + unsigned char *f, int size) { + int row, col; + uint8x16_t ctx[4], out, v, left_ctx; + + for (row = 0; row < size; row++) { + for (col = 0; col < cols - 8; col += 16) { + const uint8x16_t filter = vec_vsx_ld(col, f); + v = vec_vsx_ld(col, src_ptr); + vert_ctx(ctx, col, src_ptr, src_pixels_per_line); + vec_vsx_st(apply_filter(ctx, v, filter), col, dst_ptr); + } + + if (col != cols) { + const uint8x16_t filter = vec_vsx_ld(col, f); + v = vec_vsx_ld(col, src_ptr); + vert_ctx(ctx, col, src_ptr, src_pixels_per_line); + out = apply_filter(ctx, v, filter); + vec_vsx_st(vec_perm(out, v, st8_perm), col, dst_ptr); + } + + /* now post_proc_across */ + left_ctx = vec_splats(dst_ptr[0]); + v = vec_vsx_ld(0, dst_ptr); + for (col = 0; col < cols - 8; col += 16) { + const uint8x16_t filter = vec_vsx_ld(col, f); + const uint8x16_t right_ctx = (col + 16 == cols) + ? vec_splats(dst_ptr[cols - 1]) + : vec_vsx_ld(col, dst_ptr + 16); + horz_ctx(ctx, left_ctx, v, right_ctx); + vec_vsx_st(apply_filter(ctx, v, filter), col, dst_ptr); + left_ctx = v; + v = right_ctx; + } + + if (col != cols) { + const uint8x16_t filter = vec_vsx_ld(col, f); + const uint8x16_t right_ctx = vec_splats(dst_ptr[cols - 1]); + horz_ctx(ctx, left_ctx, v, right_ctx); + out = apply_filter(ctx, v, filter); + vec_vsx_st(vec_perm(out, v, st8_perm), col, dst_ptr); + } + + src_ptr += src_pixels_per_line; + dst_ptr += dst_pixels_per_line; + } +} + +// C: s[c + 7] +static INLINE int16x8_t next7l_s16(uint8x16_t c) { + static const uint8x16_t next7_perm = { + 0x07, 0x10, 0x08, 0x11, 0x09, 0x12, 0x0A, 0x13, + 0x0B, 0x14, 0x0C, 0x15, 0x0D, 0x16, 0x0E, 0x17, + }; + return (int16x8_t)vec_perm(c, vec_zeros_u8, next7_perm); +} + +// Slide across window and add. +static INLINE int16x8_t slide_sum_s16(int16x8_t x) { + // x = A B C D E F G H + // + // 0 A B C D E F G + const int16x8_t sum1 = vec_add(x, vec_slo(x, vec_splats((int8_t)(2 << 3)))); + // 0 0 A B C D E F + const int16x8_t sum2 = vec_add(vec_slo(x, vec_splats((int8_t)(4 << 3))), + // 0 0 0 A B C D E + vec_slo(x, vec_splats((int8_t)(6 << 3)))); + // 0 0 0 0 A B C D + const int16x8_t sum3 = vec_add(vec_slo(x, vec_splats((int8_t)(8 << 3))), + // 0 0 0 0 0 A B C + vec_slo(x, vec_splats((int8_t)(10 << 3)))); + // 0 0 0 0 0 0 A B + const int16x8_t sum4 = vec_add(vec_slo(x, vec_splats((int8_t)(12 << 3))), + // 0 0 0 0 0 0 0 A + vec_slo(x, vec_splats((int8_t)(14 << 3)))); + return vec_add(vec_add(sum1, sum2), vec_add(sum3, sum4)); +} + +// Slide across window and add. +static INLINE int32x4_t slide_sumsq_s32(int32x4_t xsq_even, int32x4_t xsq_odd) { + // 0 A C E + // + 0 B D F + int32x4_t sumsq_1 = vec_add(vec_slo(xsq_even, vec_splats((int8_t)(4 << 3))), + vec_slo(xsq_odd, vec_splats((int8_t)(4 << 3)))); + // 0 0 A C + // + 0 0 B D + int32x4_t sumsq_2 = vec_add(vec_slo(xsq_even, vec_splats((int8_t)(8 << 3))), + vec_slo(xsq_odd, vec_splats((int8_t)(8 << 3)))); + // 0 0 0 A + // + 0 0 0 B + int32x4_t sumsq_3 = vec_add(vec_slo(xsq_even, vec_splats((int8_t)(12 << 3))), + vec_slo(xsq_odd, vec_splats((int8_t)(12 << 3)))); + sumsq_1 = vec_add(sumsq_1, xsq_even); + sumsq_2 = vec_add(sumsq_2, sumsq_3); + return vec_add(sumsq_1, sumsq_2); +} + +// C: (b + sum + val) >> 4 +static INLINE int16x8_t filter_s16(int16x8_t b, int16x8_t sum, int16x8_t val) { + return vec_sra(vec_add(vec_add(b, sum), val), vec_splats((uint16_t)4)); +} + +// C: sumsq * 15 - sum * sum +static INLINE bool16x8_t mask_s16(int32x4_t sumsq_even, int32x4_t sumsq_odd, + int16x8_t sum, int32x4_t lim) { + static const uint8x16_t mask_merge = { 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, + 0x14, 0x15, 0x08, 0x09, 0x18, 0x19, + 0x0C, 0x0D, 0x1C, 0x1D }; + const int32x4_t sumsq_odd_scaled = + vec_mul(sumsq_odd, vec_splats((int32_t)15)); + const int32x4_t sumsq_even_scaled = + vec_mul(sumsq_even, vec_splats((int32_t)15)); + const int32x4_t thres_odd = vec_sub(sumsq_odd_scaled, vec_mulo(sum, sum)); + const int32x4_t thres_even = vec_sub(sumsq_even_scaled, vec_mule(sum, sum)); + + const bool32x4_t mask_odd = vec_cmplt(thres_odd, lim); + const bool32x4_t mask_even = vec_cmplt(thres_even, lim); + return vec_perm((bool16x8_t)mask_even, (bool16x8_t)mask_odd, mask_merge); +} + +void vpx_mbpost_proc_across_ip_vsx(unsigned char *src, int pitch, int rows, + int cols, int flimit) { + int row, col; + const int32x4_t lim = vec_splats(flimit); + + // 8 columns are processed at a time. + assert(cols % 8 == 0); + + for (row = 0; row < rows; row++) { + // The sum is signed and requires at most 13 bits. + // (8 bits + sign) * 15 (4 bits) + int16x8_t sum; + // The sum of squares requires at most 20 bits. + // (16 bits + sign) * 15 (4 bits) + int32x4_t sumsq_even, sumsq_odd; + + // Fill left context with first col. + int16x8_t left_ctx = vec_splats((int16_t)src[0]); + int16_t s = src[0] * 9; + int32_t ssq = src[0] * src[0] * 9 + 16; + + // Fill the next 6 columns of the sliding window with cols 2 to 7. + for (col = 1; col <= 6; ++col) { + s += src[col]; + ssq += src[col] * src[col]; + } + // Set this sum to every element in the window. + sum = vec_splats(s); + sumsq_even = vec_splats(ssq); + sumsq_odd = vec_splats(ssq); + + for (col = 0; col < cols; col += 8) { + bool16x8_t mask; + int16x8_t filtered, masked; + uint8x16_t out; + + const uint8x16_t val = vec_vsx_ld(0, src + col); + const int16x8_t val_high = unpack_to_s16_h(val); + + // C: s[c + 7] + const int16x8_t right_ctx = (col + 8 == cols) + ? vec_splats((int16_t)src[col + 7]) + : next7l_s16(val); + + // C: x = s[c + 7] - s[c - 8]; + const int16x8_t x = vec_sub(right_ctx, left_ctx); + const int32x4_t xsq_even = + vec_sub(vec_mule(right_ctx, right_ctx), vec_mule(left_ctx, left_ctx)); + const int32x4_t xsq_odd = + vec_sub(vec_mulo(right_ctx, right_ctx), vec_mulo(left_ctx, left_ctx)); + + const int32x4_t sumsq_tmp = slide_sumsq_s32(xsq_even, xsq_odd); + // A C E G + // 0 B D F + // 0 A C E + // 0 0 B D + // 0 0 A C + // 0 0 0 B + // 0 0 0 A + sumsq_even = vec_add(sumsq_even, sumsq_tmp); + // B D F G + // A C E G + // 0 B D F + // 0 A C E + // 0 0 B D + // 0 0 A C + // 0 0 0 B + // 0 0 0 A + sumsq_odd = vec_add(sumsq_odd, vec_add(sumsq_tmp, xsq_odd)); + + sum = vec_add(sum, slide_sum_s16(x)); + + // C: (8 + sum + s[c]) >> 4 + filtered = filter_s16(vec_splats((int16_t)8), sum, val_high); + // C: sumsq * 15 - sum * sum + mask = mask_s16(sumsq_even, sumsq_odd, sum, lim); + masked = vec_sel(val_high, filtered, mask); + + out = vec_perm((uint8x16_t)masked, vec_vsx_ld(0, src + col), load_merge); + vec_vsx_st(out, 0, src + col); + + // Update window sum and square sum + sum = vec_splat(sum, 7); + sumsq_even = vec_splat(sumsq_odd, 3); + sumsq_odd = vec_splat(sumsq_odd, 3); + + // C: s[c - 8] (for next iteration) + left_ctx = val_high; + } + src += pitch; + } +} + +void vpx_mbpost_proc_down_vsx(uint8_t *dst, int pitch, int rows, int cols, + int flimit) { + int col, row, i; + int16x8_t window[16]; + const int32x4_t lim = vec_splats(flimit); + + // 8 columns are processed at a time. + assert(cols % 8 == 0); + // If rows is less than 8 the bottom border extension fails. + assert(rows >= 8); + + for (col = 0; col < cols; col += 8) { + // The sum is signed and requires at most 13 bits. + // (8 bits + sign) * 15 (4 bits) + int16x8_t r1, sum; + // The sum of squares requires at most 20 bits. + // (16 bits + sign) * 15 (4 bits) + int32x4_t sumsq_even, sumsq_odd; + + r1 = unpack_to_s16_h(vec_vsx_ld(0, dst)); + // Fill sliding window with first row. + for (i = 0; i <= 8; i++) { + window[i] = r1; + } + // First 9 rows of the sliding window are the same. + // sum = r1 * 9 + sum = vec_mladd(r1, vec_splats((int16_t)9), vec_zeros_s16); + + // sumsq = r1 * r1 * 9 + sumsq_even = vec_mule(sum, r1); + sumsq_odd = vec_mulo(sum, r1); + + // Fill the next 6 rows of the sliding window with rows 2 to 7. + for (i = 1; i <= 6; ++i) { + const int16x8_t next_row = unpack_to_s16_h(vec_vsx_ld(i * pitch, dst)); + window[i + 8] = next_row; + sum = vec_add(sum, next_row); + sumsq_odd = vec_add(sumsq_odd, vec_mulo(next_row, next_row)); + sumsq_even = vec_add(sumsq_even, vec_mule(next_row, next_row)); + } + + for (row = 0; row < rows; row++) { + int32x4_t d15_even, d15_odd, d0_even, d0_odd; + bool16x8_t mask; + int16x8_t filtered, masked; + uint8x16_t out; + + const int16x8_t rv = vec_vsx_ld(0, vpx_rv + (row & 127)); + + // Move the sliding window + if (row + 7 < rows) { + window[15] = unpack_to_s16_h(vec_vsx_ld((row + 7) * pitch, dst)); + } else { + window[15] = window[14]; + } + + // C: sum += s[7 * pitch] - s[-8 * pitch]; + sum = vec_add(sum, vec_sub(window[15], window[0])); + + // C: sumsq += s[7 * pitch] * s[7 * pitch] - s[-8 * pitch] * s[-8 * + // pitch]; + // Optimization Note: Caching a squared-window for odd and even is + // slower than just repeating the multiplies. + d15_odd = vec_mulo(window[15], window[15]); + d15_even = vec_mule(window[15], window[15]); + d0_odd = vec_mulo(window[0], window[0]); + d0_even = vec_mule(window[0], window[0]); + sumsq_odd = vec_add(sumsq_odd, vec_sub(d15_odd, d0_odd)); + sumsq_even = vec_add(sumsq_even, vec_sub(d15_even, d0_even)); + + // C: (vpx_rv[(r & 127) + (c & 7)] + sum + s[0]) >> 4 + filtered = filter_s16(rv, sum, window[8]); + + // C: sumsq * 15 - sum * sum + mask = mask_s16(sumsq_even, sumsq_odd, sum, lim); + masked = vec_sel(window[8], filtered, mask); + + // TODO(ltrudeau) If cols % 16 == 0, we could just process 16 per + // iteration + out = vec_perm((uint8x16_t)masked, vec_vsx_ld(0, dst + row * pitch), + load_merge); + vec_vsx_st(out, 0, dst + row * pitch); + + // Optimization Note: Turns out that the following loop is faster than + // using pointers to manage the sliding window. + for (i = 1; i < 16; i++) { + window[i - 1] = window[i]; + } + } + dst += 8; + } +} diff --git a/media/libvpx/libvpx/vpx_dsp/ppc/fdct32x32_vsx.c b/media/libvpx/libvpx/vpx_dsp/ppc/fdct32x32_vsx.c new file mode 100644 index 000000000000..328b0e313012 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/ppc/fdct32x32_vsx.c @@ -0,0 +1,553 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" + +#include "vpx_dsp/ppc/transpose_vsx.h" +#include "vpx_dsp/ppc/txfm_common_vsx.h" +#include "vpx_dsp/ppc/types_vsx.h" + +// Returns ((a +/- b) * cospi16 + (2 << 13)) >> 14. +static INLINE void single_butterfly(int16x8_t a, int16x8_t b, int16x8_t *add, + int16x8_t *sub) { + // Since a + b can overflow 16 bits, the multiplication is distributed + // (a * c +/- b * c). + const int32x4_t ac_e = vec_mule(a, cospi16_v); + const int32x4_t ac_o = vec_mulo(a, cospi16_v); + const int32x4_t bc_e = vec_mule(b, cospi16_v); + const int32x4_t bc_o = vec_mulo(b, cospi16_v); + + // Reuse the same multiplies for sum and difference. + const int32x4_t sum_e = vec_add(ac_e, bc_e); + const int32x4_t sum_o = vec_add(ac_o, bc_o); + const int32x4_t diff_e = vec_sub(ac_e, bc_e); + const int32x4_t diff_o = vec_sub(ac_o, bc_o); + + // Add rounding offset + const int32x4_t rsum_o = vec_add(sum_o, vec_dct_const_rounding); + const int32x4_t rsum_e = vec_add(sum_e, vec_dct_const_rounding); + const int32x4_t rdiff_o = vec_add(diff_o, vec_dct_const_rounding); + const int32x4_t rdiff_e = vec_add(diff_e, vec_dct_const_rounding); + + const int32x4_t ssum_o = vec_sra(rsum_o, vec_dct_const_bits); + const int32x4_t ssum_e = vec_sra(rsum_e, vec_dct_const_bits); + const int32x4_t sdiff_o = vec_sra(rdiff_o, vec_dct_const_bits); + const int32x4_t sdiff_e = vec_sra(rdiff_e, vec_dct_const_bits); + + // There's no pack operation for even and odd, so we need to permute. + *add = (int16x8_t)vec_perm(ssum_e, ssum_o, vec_perm_odd_even_pack); + *sub = (int16x8_t)vec_perm(sdiff_e, sdiff_o, vec_perm_odd_even_pack); +} + +// Returns (a * c1 +/- b * c2 + (2 << 13)) >> 14 +static INLINE void double_butterfly(int16x8_t a, int16x8_t c1, int16x8_t b, + int16x8_t c2, int16x8_t *add, + int16x8_t *sub) { + const int32x4_t ac1_o = vec_mulo(a, c1); + const int32x4_t ac1_e = vec_mule(a, c1); + const int32x4_t ac2_o = vec_mulo(a, c2); + const int32x4_t ac2_e = vec_mule(a, c2); + + const int32x4_t bc1_o = vec_mulo(b, c1); + const int32x4_t bc1_e = vec_mule(b, c1); + const int32x4_t bc2_o = vec_mulo(b, c2); + const int32x4_t bc2_e = vec_mule(b, c2); + + const int32x4_t sum_o = vec_add(ac1_o, bc2_o); + const int32x4_t sum_e = vec_add(ac1_e, bc2_e); + const int32x4_t diff_o = vec_sub(ac2_o, bc1_o); + const int32x4_t diff_e = vec_sub(ac2_e, bc1_e); + + // Add rounding offset + const int32x4_t rsum_o = vec_add(sum_o, vec_dct_const_rounding); + const int32x4_t rsum_e = vec_add(sum_e, vec_dct_const_rounding); + const int32x4_t rdiff_o = vec_add(diff_o, vec_dct_const_rounding); + const int32x4_t rdiff_e = vec_add(diff_e, vec_dct_const_rounding); + + const int32x4_t ssum_o = vec_sra(rsum_o, vec_dct_const_bits); + const int32x4_t ssum_e = vec_sra(rsum_e, vec_dct_const_bits); + const int32x4_t sdiff_o = vec_sra(rdiff_o, vec_dct_const_bits); + const int32x4_t sdiff_e = vec_sra(rdiff_e, vec_dct_const_bits); + + // There's no pack operation for even and odd, so we need to permute. + *add = (int16x8_t)vec_perm(ssum_e, ssum_o, vec_perm_odd_even_pack); + *sub = (int16x8_t)vec_perm(sdiff_e, sdiff_o, vec_perm_odd_even_pack); +} + +// While other architecture combine the load and the stage 1 operations, Power9 +// benchmarking show no benefit in such an approach. +static INLINE void load(const int16_t *a, int stride, int16x8_t *b) { + // Tried out different combinations of load and shift instructions, this is + // the fastest one. + { + const int16x8_t l0 = vec_vsx_ld(0, a); + const int16x8_t l1 = vec_vsx_ld(0, a + stride); + const int16x8_t l2 = vec_vsx_ld(0, a + 2 * stride); + const int16x8_t l3 = vec_vsx_ld(0, a + 3 * stride); + const int16x8_t l4 = vec_vsx_ld(0, a + 4 * stride); + const int16x8_t l5 = vec_vsx_ld(0, a + 5 * stride); + const int16x8_t l6 = vec_vsx_ld(0, a + 6 * stride); + const int16x8_t l7 = vec_vsx_ld(0, a + 7 * stride); + + const int16x8_t l8 = vec_vsx_ld(0, a + 8 * stride); + const int16x8_t l9 = vec_vsx_ld(0, a + 9 * stride); + const int16x8_t l10 = vec_vsx_ld(0, a + 10 * stride); + const int16x8_t l11 = vec_vsx_ld(0, a + 11 * stride); + const int16x8_t l12 = vec_vsx_ld(0, a + 12 * stride); + const int16x8_t l13 = vec_vsx_ld(0, a + 13 * stride); + const int16x8_t l14 = vec_vsx_ld(0, a + 14 * stride); + const int16x8_t l15 = vec_vsx_ld(0, a + 15 * stride); + + b[0] = vec_sl(l0, vec_dct_scale_log2); + b[1] = vec_sl(l1, vec_dct_scale_log2); + b[2] = vec_sl(l2, vec_dct_scale_log2); + b[3] = vec_sl(l3, vec_dct_scale_log2); + b[4] = vec_sl(l4, vec_dct_scale_log2); + b[5] = vec_sl(l5, vec_dct_scale_log2); + b[6] = vec_sl(l6, vec_dct_scale_log2); + b[7] = vec_sl(l7, vec_dct_scale_log2); + + b[8] = vec_sl(l8, vec_dct_scale_log2); + b[9] = vec_sl(l9, vec_dct_scale_log2); + b[10] = vec_sl(l10, vec_dct_scale_log2); + b[11] = vec_sl(l11, vec_dct_scale_log2); + b[12] = vec_sl(l12, vec_dct_scale_log2); + b[13] = vec_sl(l13, vec_dct_scale_log2); + b[14] = vec_sl(l14, vec_dct_scale_log2); + b[15] = vec_sl(l15, vec_dct_scale_log2); + } + { + const int16x8_t l16 = vec_vsx_ld(0, a + 16 * stride); + const int16x8_t l17 = vec_vsx_ld(0, a + 17 * stride); + const int16x8_t l18 = vec_vsx_ld(0, a + 18 * stride); + const int16x8_t l19 = vec_vsx_ld(0, a + 19 * stride); + const int16x8_t l20 = vec_vsx_ld(0, a + 20 * stride); + const int16x8_t l21 = vec_vsx_ld(0, a + 21 * stride); + const int16x8_t l22 = vec_vsx_ld(0, a + 22 * stride); + const int16x8_t l23 = vec_vsx_ld(0, a + 23 * stride); + + const int16x8_t l24 = vec_vsx_ld(0, a + 24 * stride); + const int16x8_t l25 = vec_vsx_ld(0, a + 25 * stride); + const int16x8_t l26 = vec_vsx_ld(0, a + 26 * stride); + const int16x8_t l27 = vec_vsx_ld(0, a + 27 * stride); + const int16x8_t l28 = vec_vsx_ld(0, a + 28 * stride); + const int16x8_t l29 = vec_vsx_ld(0, a + 29 * stride); + const int16x8_t l30 = vec_vsx_ld(0, a + 30 * stride); + const int16x8_t l31 = vec_vsx_ld(0, a + 31 * stride); + + b[16] = vec_sl(l16, vec_dct_scale_log2); + b[17] = vec_sl(l17, vec_dct_scale_log2); + b[18] = vec_sl(l18, vec_dct_scale_log2); + b[19] = vec_sl(l19, vec_dct_scale_log2); + b[20] = vec_sl(l20, vec_dct_scale_log2); + b[21] = vec_sl(l21, vec_dct_scale_log2); + b[22] = vec_sl(l22, vec_dct_scale_log2); + b[23] = vec_sl(l23, vec_dct_scale_log2); + + b[24] = vec_sl(l24, vec_dct_scale_log2); + b[25] = vec_sl(l25, vec_dct_scale_log2); + b[26] = vec_sl(l26, vec_dct_scale_log2); + b[27] = vec_sl(l27, vec_dct_scale_log2); + b[28] = vec_sl(l28, vec_dct_scale_log2); + b[29] = vec_sl(l29, vec_dct_scale_log2); + b[30] = vec_sl(l30, vec_dct_scale_log2); + b[31] = vec_sl(l31, vec_dct_scale_log2); + } +} + +static INLINE void store(tran_low_t *a, const int16x8_t *b) { + vec_vsx_st(b[0], 0, a); + vec_vsx_st(b[8], 0, a + 8); + vec_vsx_st(b[16], 0, a + 16); + vec_vsx_st(b[24], 0, a + 24); + + vec_vsx_st(b[1], 0, a + 32); + vec_vsx_st(b[9], 0, a + 40); + vec_vsx_st(b[17], 0, a + 48); + vec_vsx_st(b[25], 0, a + 56); + + vec_vsx_st(b[2], 0, a + 64); + vec_vsx_st(b[10], 0, a + 72); + vec_vsx_st(b[18], 0, a + 80); + vec_vsx_st(b[26], 0, a + 88); + + vec_vsx_st(b[3], 0, a + 96); + vec_vsx_st(b[11], 0, a + 104); + vec_vsx_st(b[19], 0, a + 112); + vec_vsx_st(b[27], 0, a + 120); + + vec_vsx_st(b[4], 0, a + 128); + vec_vsx_st(b[12], 0, a + 136); + vec_vsx_st(b[20], 0, a + 144); + vec_vsx_st(b[28], 0, a + 152); + + vec_vsx_st(b[5], 0, a + 160); + vec_vsx_st(b[13], 0, a + 168); + vec_vsx_st(b[21], 0, a + 176); + vec_vsx_st(b[29], 0, a + 184); + + vec_vsx_st(b[6], 0, a + 192); + vec_vsx_st(b[14], 0, a + 200); + vec_vsx_st(b[22], 0, a + 208); + vec_vsx_st(b[30], 0, a + 216); + + vec_vsx_st(b[7], 0, a + 224); + vec_vsx_st(b[15], 0, a + 232); + vec_vsx_st(b[23], 0, a + 240); + vec_vsx_st(b[31], 0, a + 248); +} + +// Returns 1 if negative 0 if positive +static INLINE int16x8_t vec_sign_s16(int16x8_t a) { + return vec_sr(a, vec_shift_sign_s16); +} + +// Add 2 if positive, 1 if negative, and shift by 2. +static INLINE int16x8_t sub_round_shift(const int16x8_t a) { + const int16x8_t sign = vec_sign_s16(a); + return vec_sra(vec_sub(vec_add(a, vec_twos_s16), sign), vec_dct_scale_log2); +} + +// Add 1 if positive, 2 if negative, and shift by 2. +// In practice, add 1, then add the sign bit, then shift without rounding. +static INLINE int16x8_t add_round_shift_s16(const int16x8_t a) { + const int16x8_t sign = vec_sign_s16(a); + return vec_sra(vec_add(vec_add(a, vec_ones_s16), sign), vec_dct_scale_log2); +} + +static void fdct32_vsx(const int16x8_t *in, int16x8_t *out, int pass) { + int16x8_t temp0[32]; // Hold stages: 1, 4, 7 + int16x8_t temp1[32]; // Hold stages: 2, 5 + int16x8_t temp2[32]; // Hold stages: 3, 6 + int i; + + // Stage 1 + // Unrolling this loops actually slows down Power9 benchmarks + for (i = 0; i < 16; i++) { + temp0[i] = vec_add(in[i], in[31 - i]); + // pass through to stage 3. + temp1[i + 16] = vec_sub(in[15 - i], in[i + 16]); + } + + // Stage 2 + // Unrolling this loops actually slows down Power9 benchmarks + for (i = 0; i < 8; i++) { + temp1[i] = vec_add(temp0[i], temp0[15 - i]); + temp1[i + 8] = vec_sub(temp0[7 - i], temp0[i + 8]); + } + + // Apply butterflies (in place) on pass through to stage 3. + single_butterfly(temp1[27], temp1[20], &temp1[27], &temp1[20]); + single_butterfly(temp1[26], temp1[21], &temp1[26], &temp1[21]); + single_butterfly(temp1[25], temp1[22], &temp1[25], &temp1[22]); + single_butterfly(temp1[24], temp1[23], &temp1[24], &temp1[23]); + + // dump the magnitude by 4, hence the intermediate values are within + // the range of 16 bits. + if (pass) { + temp1[0] = add_round_shift_s16(temp1[0]); + temp1[1] = add_round_shift_s16(temp1[1]); + temp1[2] = add_round_shift_s16(temp1[2]); + temp1[3] = add_round_shift_s16(temp1[3]); + temp1[4] = add_round_shift_s16(temp1[4]); + temp1[5] = add_round_shift_s16(temp1[5]); + temp1[6] = add_round_shift_s16(temp1[6]); + temp1[7] = add_round_shift_s16(temp1[7]); + temp1[8] = add_round_shift_s16(temp1[8]); + temp1[9] = add_round_shift_s16(temp1[9]); + temp1[10] = add_round_shift_s16(temp1[10]); + temp1[11] = add_round_shift_s16(temp1[11]); + temp1[12] = add_round_shift_s16(temp1[12]); + temp1[13] = add_round_shift_s16(temp1[13]); + temp1[14] = add_round_shift_s16(temp1[14]); + temp1[15] = add_round_shift_s16(temp1[15]); + + temp1[16] = add_round_shift_s16(temp1[16]); + temp1[17] = add_round_shift_s16(temp1[17]); + temp1[18] = add_round_shift_s16(temp1[18]); + temp1[19] = add_round_shift_s16(temp1[19]); + temp1[20] = add_round_shift_s16(temp1[20]); + temp1[21] = add_round_shift_s16(temp1[21]); + temp1[22] = add_round_shift_s16(temp1[22]); + temp1[23] = add_round_shift_s16(temp1[23]); + temp1[24] = add_round_shift_s16(temp1[24]); + temp1[25] = add_round_shift_s16(temp1[25]); + temp1[26] = add_round_shift_s16(temp1[26]); + temp1[27] = add_round_shift_s16(temp1[27]); + temp1[28] = add_round_shift_s16(temp1[28]); + temp1[29] = add_round_shift_s16(temp1[29]); + temp1[30] = add_round_shift_s16(temp1[30]); + temp1[31] = add_round_shift_s16(temp1[31]); + } + + // Stage 3 + temp2[0] = vec_add(temp1[0], temp1[7]); + temp2[1] = vec_add(temp1[1], temp1[6]); + temp2[2] = vec_add(temp1[2], temp1[5]); + temp2[3] = vec_add(temp1[3], temp1[4]); + temp2[5] = vec_sub(temp1[2], temp1[5]); + temp2[6] = vec_sub(temp1[1], temp1[6]); + temp2[8] = temp1[8]; + temp2[9] = temp1[9]; + + single_butterfly(temp1[13], temp1[10], &temp2[13], &temp2[10]); + single_butterfly(temp1[12], temp1[11], &temp2[12], &temp2[11]); + temp2[14] = temp1[14]; + temp2[15] = temp1[15]; + + temp2[18] = vec_add(temp1[18], temp1[21]); + temp2[19] = vec_add(temp1[19], temp1[20]); + + temp2[20] = vec_sub(temp1[19], temp1[20]); + temp2[21] = vec_sub(temp1[18], temp1[21]); + + temp2[26] = vec_sub(temp1[29], temp1[26]); + temp2[27] = vec_sub(temp1[28], temp1[27]); + + temp2[28] = vec_add(temp1[28], temp1[27]); + temp2[29] = vec_add(temp1[29], temp1[26]); + + // Pass through Stage 4 + temp0[7] = vec_sub(temp1[0], temp1[7]); + temp0[4] = vec_sub(temp1[3], temp1[4]); + temp0[16] = vec_add(temp1[16], temp1[23]); + temp0[17] = vec_add(temp1[17], temp1[22]); + temp0[22] = vec_sub(temp1[17], temp1[22]); + temp0[23] = vec_sub(temp1[16], temp1[23]); + temp0[24] = vec_sub(temp1[31], temp1[24]); + temp0[25] = vec_sub(temp1[30], temp1[25]); + temp0[30] = vec_add(temp1[30], temp1[25]); + temp0[31] = vec_add(temp1[31], temp1[24]); + + // Stage 4 + temp0[0] = vec_add(temp2[0], temp2[3]); + temp0[1] = vec_add(temp2[1], temp2[2]); + temp0[2] = vec_sub(temp2[1], temp2[2]); + temp0[3] = vec_sub(temp2[0], temp2[3]); + single_butterfly(temp2[6], temp2[5], &temp0[6], &temp0[5]); + + temp0[9] = vec_add(temp2[9], temp2[10]); + temp0[10] = vec_sub(temp2[9], temp2[10]); + temp0[13] = vec_sub(temp2[14], temp2[13]); + temp0[14] = vec_add(temp2[14], temp2[13]); + + double_butterfly(temp2[29], cospi8_v, temp2[18], cospi24_v, &temp0[29], + &temp0[18]); + double_butterfly(temp2[28], cospi8_v, temp2[19], cospi24_v, &temp0[28], + &temp0[19]); + double_butterfly(temp2[27], cospi24_v, temp2[20], cospi8m_v, &temp0[27], + &temp0[20]); + double_butterfly(temp2[26], cospi24_v, temp2[21], cospi8m_v, &temp0[26], + &temp0[21]); + + // Pass through Stage 5 + temp1[8] = vec_add(temp2[8], temp2[11]); + temp1[11] = vec_sub(temp2[8], temp2[11]); + temp1[12] = vec_sub(temp2[15], temp2[12]); + temp1[15] = vec_add(temp2[15], temp2[12]); + + // Stage 5 + // 0 and 1 pass through to 0 and 16 at the end + single_butterfly(temp0[0], temp0[1], &out[0], &out[16]); + + // 2 and 3 pass through to 8 and 24 at the end + double_butterfly(temp0[3], cospi8_v, temp0[2], cospi24_v, &out[8], &out[24]); + + temp1[4] = vec_add(temp0[4], temp0[5]); + temp1[5] = vec_sub(temp0[4], temp0[5]); + temp1[6] = vec_sub(temp0[7], temp0[6]); + temp1[7] = vec_add(temp0[7], temp0[6]); + + double_butterfly(temp0[14], cospi8_v, temp0[9], cospi24_v, &temp1[14], + &temp1[9]); + double_butterfly(temp0[13], cospi24_v, temp0[10], cospi8m_v, &temp1[13], + &temp1[10]); + + temp1[17] = vec_add(temp0[17], temp0[18]); + temp1[18] = vec_sub(temp0[17], temp0[18]); + + temp1[21] = vec_sub(temp0[22], temp0[21]); + temp1[22] = vec_add(temp0[22], temp0[21]); + + temp1[25] = vec_add(temp0[25], temp0[26]); + temp1[26] = vec_sub(temp0[25], temp0[26]); + + temp1[29] = vec_sub(temp0[30], temp0[29]); + temp1[30] = vec_add(temp0[30], temp0[29]); + + // Pass through Stage 6 + temp2[16] = vec_add(temp0[16], temp0[19]); + temp2[19] = vec_sub(temp0[16], temp0[19]); + temp2[20] = vec_sub(temp0[23], temp0[20]); + temp2[23] = vec_add(temp0[23], temp0[20]); + temp2[24] = vec_add(temp0[24], temp0[27]); + temp2[27] = vec_sub(temp0[24], temp0[27]); + temp2[28] = vec_sub(temp0[31], temp0[28]); + temp2[31] = vec_add(temp0[31], temp0[28]); + + // Stage 6 + // 4 and 7 pass through to 4 and 28 at the end + double_butterfly(temp1[7], cospi4_v, temp1[4], cospi28_v, &out[4], &out[28]); + // 5 and 6 pass through to 20 and 12 at the end + double_butterfly(temp1[6], cospi20_v, temp1[5], cospi12_v, &out[20], + &out[12]); + temp2[8] = vec_add(temp1[8], temp1[9]); + temp2[9] = vec_sub(temp1[8], temp1[9]); + temp2[10] = vec_sub(temp1[11], temp1[10]); + temp2[11] = vec_add(temp1[11], temp1[10]); + temp2[12] = vec_add(temp1[12], temp1[13]); + temp2[13] = vec_sub(temp1[12], temp1[13]); + temp2[14] = vec_sub(temp1[15], temp1[14]); + temp2[15] = vec_add(temp1[15], temp1[14]); + + double_butterfly(temp1[30], cospi4_v, temp1[17], cospi28_v, &temp2[30], + &temp2[17]); + double_butterfly(temp1[29], cospi28_v, temp1[18], cospi4m_v, &temp2[29], + &temp2[18]); + double_butterfly(temp1[26], cospi20_v, temp1[21], cospi12_v, &temp2[26], + &temp2[21]); + double_butterfly(temp1[25], cospi12_v, temp1[22], cospi20m_v, &temp2[25], + &temp2[22]); + + // Stage 7 + double_butterfly(temp2[15], cospi2_v, temp2[8], cospi30_v, &out[2], &out[30]); + double_butterfly(temp2[14], cospi18_v, temp2[9], cospi14_v, &out[18], + &out[14]); + double_butterfly(temp2[13], cospi10_v, temp2[10], cospi22_v, &out[10], + &out[22]); + double_butterfly(temp2[12], cospi26_v, temp2[11], cospi6_v, &out[26], + &out[6]); + + temp0[16] = vec_add(temp2[16], temp2[17]); + temp0[17] = vec_sub(temp2[16], temp2[17]); + temp0[18] = vec_sub(temp2[19], temp2[18]); + temp0[19] = vec_add(temp2[19], temp2[18]); + temp0[20] = vec_add(temp2[20], temp2[21]); + temp0[21] = vec_sub(temp2[20], temp2[21]); + temp0[22] = vec_sub(temp2[23], temp2[22]); + temp0[23] = vec_add(temp2[23], temp2[22]); + temp0[24] = vec_add(temp2[24], temp2[25]); + temp0[25] = vec_sub(temp2[24], temp2[25]); + temp0[26] = vec_sub(temp2[27], temp2[26]); + temp0[27] = vec_add(temp2[27], temp2[26]); + temp0[28] = vec_add(temp2[28], temp2[29]); + temp0[29] = vec_sub(temp2[28], temp2[29]); + temp0[30] = vec_sub(temp2[31], temp2[30]); + temp0[31] = vec_add(temp2[31], temp2[30]); + + // Final stage --- outputs indices are bit-reversed. + double_butterfly(temp0[31], cospi1_v, temp0[16], cospi31_v, &out[1], + &out[31]); + double_butterfly(temp0[30], cospi17_v, temp0[17], cospi15_v, &out[17], + &out[15]); + double_butterfly(temp0[29], cospi9_v, temp0[18], cospi23_v, &out[9], + &out[23]); + double_butterfly(temp0[28], cospi25_v, temp0[19], cospi7_v, &out[25], + &out[7]); + double_butterfly(temp0[27], cospi5_v, temp0[20], cospi27_v, &out[5], + &out[27]); + double_butterfly(temp0[26], cospi21_v, temp0[21], cospi11_v, &out[21], + &out[11]); + double_butterfly(temp0[25], cospi13_v, temp0[22], cospi19_v, &out[13], + &out[19]); + double_butterfly(temp0[24], cospi29_v, temp0[23], cospi3_v, &out[29], + &out[3]); + + if (pass == 0) { + for (i = 0; i < 32; i++) { + out[i] = sub_round_shift(out[i]); + } + } +} + +void vpx_fdct32x32_rd_vsx(const int16_t *input, tran_low_t *out, int stride) { + int16x8_t temp0[32]; + int16x8_t temp1[32]; + int16x8_t temp2[32]; + int16x8_t temp3[32]; + int16x8_t temp4[32]; + int16x8_t temp5[32]; + int16x8_t temp6[32]; + + // Process in 8x32 columns. + load(input, stride, temp0); + fdct32_vsx(temp0, temp1, 0); + + load(input + 8, stride, temp0); + fdct32_vsx(temp0, temp2, 0); + + load(input + 16, stride, temp0); + fdct32_vsx(temp0, temp3, 0); + + load(input + 24, stride, temp0); + fdct32_vsx(temp0, temp4, 0); + + // Generate the top row by munging the first set of 8 from each one + // together. + transpose_8x8(&temp1[0], &temp0[0]); + transpose_8x8(&temp2[0], &temp0[8]); + transpose_8x8(&temp3[0], &temp0[16]); + transpose_8x8(&temp4[0], &temp0[24]); + + fdct32_vsx(temp0, temp5, 1); + + transpose_8x8(&temp5[0], &temp6[0]); + transpose_8x8(&temp5[8], &temp6[8]); + transpose_8x8(&temp5[16], &temp6[16]); + transpose_8x8(&temp5[24], &temp6[24]); + + store(out, temp6); + + // Second row of 8x32. + transpose_8x8(&temp1[8], &temp0[0]); + transpose_8x8(&temp2[8], &temp0[8]); + transpose_8x8(&temp3[8], &temp0[16]); + transpose_8x8(&temp4[8], &temp0[24]); + + fdct32_vsx(temp0, temp5, 1); + + transpose_8x8(&temp5[0], &temp6[0]); + transpose_8x8(&temp5[8], &temp6[8]); + transpose_8x8(&temp5[16], &temp6[16]); + transpose_8x8(&temp5[24], &temp6[24]); + + store(out + 8 * 32, temp6); + + // Third row of 8x32 + transpose_8x8(&temp1[16], &temp0[0]); + transpose_8x8(&temp2[16], &temp0[8]); + transpose_8x8(&temp3[16], &temp0[16]); + transpose_8x8(&temp4[16], &temp0[24]); + + fdct32_vsx(temp0, temp5, 1); + + transpose_8x8(&temp5[0], &temp6[0]); + transpose_8x8(&temp5[8], &temp6[8]); + transpose_8x8(&temp5[16], &temp6[16]); + transpose_8x8(&temp5[24], &temp6[24]); + + store(out + 16 * 32, temp6); + + // Final row of 8x32. + transpose_8x8(&temp1[24], &temp0[0]); + transpose_8x8(&temp2[24], &temp0[8]); + transpose_8x8(&temp3[24], &temp0[16]); + transpose_8x8(&temp4[24], &temp0[24]); + + fdct32_vsx(temp0, temp5, 1); + + transpose_8x8(&temp5[0], &temp6[0]); + transpose_8x8(&temp5[8], &temp6[8]); + transpose_8x8(&temp5[16], &temp6[16]); + transpose_8x8(&temp5[24], &temp6[24]); + + store(out + 24 * 32, temp6); +} diff --git a/media/libvpx/libvpx/vpx_dsp/ppc/intrapred_vsx.c b/media/libvpx/libvpx/vpx_dsp/ppc/intrapred_vsx.c index 6273460f1901..a4c8322ff2e7 100644 --- a/media/libvpx/libvpx/vpx_dsp/ppc/intrapred_vsx.c +++ b/media/libvpx/libvpx/vpx_dsp/ppc/intrapred_vsx.c @@ -35,6 +35,8 @@ void vpx_v_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, } } +// TODO(crbug.com/webm/1522): Fix test failures. +#if 0 static const uint32x4_t mask4 = { 0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; void vpx_h_predictor_4x4_vsx(uint8_t *dst, ptrdiff_t stride, @@ -87,6 +89,7 @@ void vpx_h_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, dst += stride; vec_vsx_st(xxpermdi(v7, vec_vsx_ld(0, dst), 1), 0, dst); } +#endif void vpx_h_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { @@ -233,6 +236,8 @@ void vpx_h_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, H_PREDICTOR_32(v15_1); } +// TODO(crbug.com/webm/1522): Fix test failures. +#if 0 void vpx_tm_predictor_4x4_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int16x8_t tl = unpack_to_s16_h(vec_splat(vec_vsx_ld(-1, above), 0)); @@ -311,6 +316,7 @@ void vpx_tm_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, val = vec_sub(vec_add(vec_splat(l, 7), a), tl); vec_vsx_st(vec_packsu(val, tmp), 0, dst); } +#endif static void tm_predictor_16x8(uint8_t *dst, const ptrdiff_t stride, int16x8_t l, int16x8_t ah, int16x8_t al, int16x8_t tl) { @@ -547,6 +553,8 @@ void vpx_dc_top_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, dc_fill_predictor_32x32(dst, stride, avg32(above)); } +// TODO(crbug.com/webm/1522): Fix test failures. +#if 0 static uint8x16_t dc_avg8(const uint8_t *above, const uint8_t *left) { const uint8x16_t a0 = vec_vsx_ld(0, above); const uint8x16_t l0 = vec_vsx_ld(0, left); @@ -559,6 +567,7 @@ static uint8x16_t dc_avg8(const uint8_t *above, const uint8_t *left) { return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)), 3); } +#endif static uint8x16_t dc_avg16(const uint8_t *above, const uint8_t *left) { const uint8x16_t a0 = vec_vsx_ld(0, above); @@ -573,10 +582,13 @@ static uint8x16_t dc_avg16(const uint8_t *above, const uint8_t *left) { 3); } +// TODO(crbug.com/webm/1522): Fix test failures. +#if 0 void vpx_dc_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { dc_fill_predictor_8x8(dst, stride, dc_avg8(above, left)); } +#endif void vpx_dc_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { @@ -615,6 +627,8 @@ static uint8x16_t avg3(const uint8x16_t a, const uint8x16_t b, static const uint8x16_t sl1 = { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10 }; +// TODO(crbug.com/webm/1522): Fix test failures. +#if 0 void vpx_d45_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t af = vec_vsx_ld(0, above); @@ -633,6 +647,7 @@ void vpx_d45_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, row = vec_perm(row, above_right, sl1); } } +#endif void vpx_d45_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { @@ -674,6 +689,8 @@ void vpx_d45_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, } } +// TODO(crbug.com/webm/1522): Fix test failures. +#if 0 void vpx_d63_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t af = vec_vsx_ld(0, above); @@ -696,6 +713,7 @@ void vpx_d63_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, row1 = vec_perm(row1, above_right, sl1); } } +#endif void vpx_d63_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { diff --git a/media/libvpx/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c b/media/libvpx/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c index d43a9fd18418..e99412ecab93 100644 --- a/media/libvpx/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c +++ b/media/libvpx/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c @@ -14,67 +14,129 @@ #include "vpx_dsp/ppc/bitdepth_conversion_vsx.h" #include "vpx_dsp/ppc/types_vsx.h" +#include "vpx_dsp/ppc/inv_txfm_vsx.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/inv_txfm.h" -static int16x8_t cospi1_v = { 16364, 16364, 16364, 16364, - 16364, 16364, 16364, 16364 }; -static int16x8_t cospi2_v = { 16305, 16305, 16305, 16305, - 16305, 16305, 16305, 16305 }; -static int16x8_t cospi3_v = { 16207, 16207, 16207, 16207, - 16207, 16207, 16207, 16207 }; -static int16x8_t cospi4_v = { 16069, 16069, 16069, 16069, - 16069, 16069, 16069, 16069 }; -static int16x8_t cospi4m_v = { -16069, -16069, -16069, -16069, - -16069, -16069, -16069, -16069 }; -static int16x8_t cospi5_v = { 15893, 15893, 15893, 15893, - 15893, 15893, 15893, 15893 }; -static int16x8_t cospi6_v = { 15679, 15679, 15679, 15679, - 15679, 15679, 15679, 15679 }; -static int16x8_t cospi7_v = { 15426, 15426, 15426, 15426, - 15426, 15426, 15426, 15426 }; -static int16x8_t cospi8_v = { 15137, 15137, 15137, 15137, - 15137, 15137, 15137, 15137 }; -static int16x8_t cospi8m_v = { -15137, -15137, -15137, -15137, - -15137, -15137, -15137, -15137 }; -static int16x8_t cospi9_v = { 14811, 14811, 14811, 14811, - 14811, 14811, 14811, 14811 }; -static int16x8_t cospi10_v = { 14449, 14449, 14449, 14449, - 14449, 14449, 14449, 14449 }; -static int16x8_t cospi11_v = { 14053, 14053, 14053, 14053, - 14053, 14053, 14053, 14053 }; -static int16x8_t cospi12_v = { 13623, 13623, 13623, 13623, - 13623, 13623, 13623, 13623 }; -static int16x8_t cospi13_v = { 13160, 13160, 13160, 13160, - 13160, 13160, 13160, 13160 }; -static int16x8_t cospi14_v = { 12665, 12665, 12665, 12665, - 12665, 12665, 12665, 12665 }; -static int16x8_t cospi15_v = { 12140, 12140, 12140, 12140, - 12140, 12140, 12140, 12140 }; -static int16x8_t cospi16_v = { 11585, 11585, 11585, 11585, - 11585, 11585, 11585, 11585 }; -static int16x8_t cospi17_v = { 11003, 11003, 11003, 11003, - 11003, 11003, 11003, 11003 }; -static int16x8_t cospi18_v = { 10394, 10394, 10394, 10394, - 10394, 10394, 10394, 10394 }; -static int16x8_t cospi19_v = { 9760, 9760, 9760, 9760, 9760, 9760, 9760, 9760 }; -static int16x8_t cospi20_v = { 9102, 9102, 9102, 9102, 9102, 9102, 9102, 9102 }; -static int16x8_t cospi20m_v = { -9102, -9102, -9102, -9102, - -9102, -9102, -9102, -9102 }; -static int16x8_t cospi21_v = { 8423, 8423, 8423, 8423, 8423, 8423, 8423, 8423 }; -static int16x8_t cospi22_v = { 7723, 7723, 7723, 7723, 7723, 7723, 7723, 7723 }; -static int16x8_t cospi23_v = { 7005, 7005, 7005, 7005, 7005, 7005, 7005, 7005 }; -static int16x8_t cospi24_v = { 6270, 6270, 6270, 6270, 6270, 6270, 6270, 6270 }; -static int16x8_t cospi24_mv = { -6270, -6270, -6270, -6270, - -6270, -6270, -6270, -6270 }; -static int16x8_t cospi25_v = { 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520 }; -static int16x8_t cospi26_v = { 4756, 4756, 4756, 4756, 4756, 4756, 4756, 4756 }; -static int16x8_t cospi27_v = { 3981, 3981, 3981, 3981, 3981, 3981, 3981, 3981 }; -static int16x8_t cospi28_v = { 3196, 3196, 3196, 3196, 3196, 3196, 3196, 3196 }; -static int16x8_t cospi29_v = { 2404, 2404, 2404, 2404, 2404, 2404, 2404, 2404 }; -static int16x8_t cospi30_v = { 1606, 1606, 1606, 1606, 1606, 1606, 1606, 1606 }; -static int16x8_t cospi31_v = { 804, 804, 804, 804, 804, 804, 804, 804 }; +static const int16x8_t cospi1_v = { 16364, 16364, 16364, 16364, + 16364, 16364, 16364, 16364 }; +static const int16x8_t cospi1m_v = { -16364, -16364, -16364, -16364, + -16364, -16364, -16364, -16364 }; +static const int16x8_t cospi2_v = { 16305, 16305, 16305, 16305, + 16305, 16305, 16305, 16305 }; +static const int16x8_t cospi2m_v = { -16305, -16305, -16305, -16305, + -16305, -16305, -16305, -16305 }; +static const int16x8_t cospi3_v = { 16207, 16207, 16207, 16207, + 16207, 16207, 16207, 16207 }; +static const int16x8_t cospi4_v = { 16069, 16069, 16069, 16069, + 16069, 16069, 16069, 16069 }; +static const int16x8_t cospi4m_v = { -16069, -16069, -16069, -16069, + -16069, -16069, -16069, -16069 }; +static const int16x8_t cospi5_v = { 15893, 15893, 15893, 15893, + 15893, 15893, 15893, 15893 }; +static const int16x8_t cospi5m_v = { -15893, -15893, -15893, -15893, + -15893, -15893, -15893, -15893 }; +static const int16x8_t cospi6_v = { 15679, 15679, 15679, 15679, + 15679, 15679, 15679, 15679 }; +static const int16x8_t cospi7_v = { 15426, 15426, 15426, 15426, + 15426, 15426, 15426, 15426 }; +static const int16x8_t cospi8_v = { 15137, 15137, 15137, 15137, + 15137, 15137, 15137, 15137 }; +static const int16x8_t cospi8m_v = { -15137, -15137, -15137, -15137, + -15137, -15137, -15137, -15137 }; +static const int16x8_t cospi9_v = { 14811, 14811, 14811, 14811, + 14811, 14811, 14811, 14811 }; +static const int16x8_t cospi9m_v = { -14811, -14811, -14811, -14811, + -14811, -14811, -14811, -14811 }; +static const int16x8_t cospi10_v = { 14449, 14449, 14449, 14449, + 14449, 14449, 14449, 14449 }; +static const int16x8_t cospi10m_v = { -14449, -14449, -14449, -14449, + -14449, -14449, -14449, -14449 }; +static const int16x8_t cospi11_v = { 14053, 14053, 14053, 14053, + 14053, 14053, 14053, 14053 }; +static const int16x8_t cospi12_v = { 13623, 13623, 13623, 13623, + 13623, 13623, 13623, 13623 }; +static const int16x8_t cospi12m_v = { -13623, -13623, -13623, -13623, + -13623, -13623, -13623, -13623 }; +static const int16x8_t cospi13_v = { 13160, 13160, 13160, 13160, + 13160, 13160, 13160, 13160 }; +static const int16x8_t cospi13m_v = { -13160, -13160, -13160, -13160, + -13160, -13160, -13160, -13160 }; +static const int16x8_t cospi14_v = { 12665, 12665, 12665, 12665, + 12665, 12665, 12665, 12665 }; +static const int16x8_t cospi15_v = { 12140, 12140, 12140, 12140, + 12140, 12140, 12140, 12140 }; +static const int16x8_t cospi16_v = { 11585, 11585, 11585, 11585, + 11585, 11585, 11585, 11585 }; +static const int16x8_t cospi16m_v = { -11585, -11585, -11585, -11585, + -11585, -11585, -11585, -11585 }; +static const int16x8_t cospi17_v = { 11003, 11003, 11003, 11003, + 11003, 11003, 11003, 11003 }; +static const int16x8_t cospi17m_v = { -11003, -11003, -11003, -11003, + -11003, -11003, -11003, -11003 }; +static const int16x8_t cospi18_v = { 10394, 10394, 10394, 10394, + 10394, 10394, 10394, 10394 }; +static const int16x8_t cospi18m_v = { -10394, -10394, -10394, -10394, + -10394, -10394, -10394, -10394 }; +static const int16x8_t cospi19_v = { 9760, 9760, 9760, 9760, + 9760, 9760, 9760, 9760 }; +static const int16x8_t cospi20_v = { 9102, 9102, 9102, 9102, + 9102, 9102, 9102, 9102 }; +static const int16x8_t cospi20m_v = { -9102, -9102, -9102, -9102, + -9102, -9102, -9102, -9102 }; +static const int16x8_t cospi21_v = { 8423, 8423, 8423, 8423, + 8423, 8423, 8423, 8423 }; +static const int16x8_t cospi21m_v = { -8423, -8423, -8423, -8423, + -8423, -8423, -8423, -8423 }; +static const int16x8_t cospi22_v = { 7723, 7723, 7723, 7723, + 7723, 7723, 7723, 7723 }; +static const int16x8_t cospi23_v = { 7005, 7005, 7005, 7005, + 7005, 7005, 7005, 7005 }; +static const int16x8_t cospi24_v = { 6270, 6270, 6270, 6270, + 6270, 6270, 6270, 6270 }; +static const int16x8_t cospi24m_v = { -6270, -6270, -6270, -6270, + -6270, -6270, -6270, -6270 }; +static const int16x8_t cospi25_v = { 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520 }; +static const int16x8_t cospi25m_v = { -5520, -5520, -5520, -5520, + -5520, -5520, -5520, -5520 }; +static const int16x8_t cospi26_v = { 4756, 4756, 4756, 4756, + 4756, 4756, 4756, 4756 }; +static const int16x8_t cospi26m_v = { -4756, -4756, -4756, -4756, + -4756, -4756, -4756, -4756 }; +static const int16x8_t cospi27_v = { 3981, 3981, 3981, 3981, + 3981, 3981, 3981, 3981 }; +static const int16x8_t cospi28_v = { 3196, 3196, 3196, 3196, + 3196, 3196, 3196, 3196 }; +static const int16x8_t cospi28m_v = { -3196, -3196, -3196, -3196, + -3196, -3196, -3196, -3196 }; +static const int16x8_t cospi29_v = { 2404, 2404, 2404, 2404, + 2404, 2404, 2404, 2404 }; +static const int16x8_t cospi29m_v = { -2404, -2404, -2404, -2404, + -2404, -2404, -2404, -2404 }; +static const int16x8_t cospi30_v = { 1606, 1606, 1606, 1606, + 1606, 1606, 1606, 1606 }; +static const int16x8_t cospi31_v = { 804, 804, 804, 804, 804, 804, 804, 804 }; + +static const int16x8_t sinpi_1_9_v = { 5283, 5283, 5283, 5283, + 5283, 5283, 5283, 5283 }; +static const int16x8_t sinpi_2_9_v = { 9929, 9929, 9929, 9929, + 9929, 9929, 9929, 9929 }; +static const int16x8_t sinpi_3_9_v = { 13377, 13377, 13377, 13377, + 13377, 13377, 13377, 13377 }; +static const int16x8_t sinpi_4_9_v = { 15212, 15212, 15212, 15212, + 15212, 15212, 15212, 15212 }; + +static uint8x16_t tr8_mask0 = { + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 +}; + +static uint8x16_t tr8_mask1 = { + 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F +}; #define ROUND_SHIFT_INIT \ const int32x4_t shift = vec_sl(vec_splat_s32(1), vec_splat_u32(13)); \ @@ -107,19 +169,18 @@ static int16x8_t cospi31_v = { 804, 804, 804, 804, 804, 804, 804, 804 }; out1 = vec_sub(step0, step1); \ out1 = vec_perm(out1, out1, mask0); -void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, - int stride) { - int32x4_t temp1, temp2, temp3, temp4; - int16x8_t step0, step1, tmp16_0, tmp16_1, t_out0, t_out1; - uint8x16_t mask0 = { 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; - uint8x16_t mask1 = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 }; - int16x8_t v0 = load_tran_low(0, input); - int16x8_t v1 = load_tran_low(8 * sizeof(*input), input); - int16x8_t t0 = vec_mergeh(v0, v1); - int16x8_t t1 = vec_mergel(v0, v1); +#define PACK_STORE(v0, v1) \ + tmp16_0 = vec_add(vec_perm(d_u0, d_u1, tr8_mask0), v0); \ + tmp16_1 = vec_add(vec_perm(d_u2, d_u3, tr8_mask0), v1); \ + output_v = vec_packsu(tmp16_0, tmp16_1); \ + \ + vec_vsx_st(output_v, 0, tmp_dest); \ + for (i = 0; i < 4; i++) \ + for (j = 0; j < 4; j++) dest[j * stride + i] = tmp_dest[j * 4 + i]; +void vpx_round_store4x4_vsx(int16x8_t *in, int16x8_t *out, uint8_t *dest, + int stride) { + int i, j; uint8x16_t dest0 = vec_vsx_ld(0, dest); uint8x16_t dest1 = vec_vsx_ld(stride, dest); uint8x16_t dest2 = vec_vsx_ld(2 * stride, dest); @@ -129,31 +190,45 @@ void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, int16x8_t d_u1 = (int16x8_t)vec_mergeh(dest1, zerov); int16x8_t d_u2 = (int16x8_t)vec_mergeh(dest2, zerov); int16x8_t d_u3 = (int16x8_t)vec_mergeh(dest3, zerov); + int16x8_t tmp16_0, tmp16_1; uint8x16_t output_v; uint8_t tmp_dest[16]; - ROUND_SHIFT_INIT PIXEL_ADD_INIT; - v0 = vec_mergeh(t0, t1); - v1 = vec_mergel(t0, t1); + PIXEL_ADD4(out[0], in[0]); + PIXEL_ADD4(out[1], in[1]); - IDCT4(v0, v1, t_out0, t_out1); - // transpose - t0 = vec_mergeh(t_out0, t_out1); - t1 = vec_mergel(t_out0, t_out1); - v0 = vec_mergeh(t0, t1); - v1 = vec_mergel(t0, t1); - IDCT4(v0, v1, t_out0, t_out1); + PACK_STORE(out[0], out[1]); +} - PIXEL_ADD4(v0, t_out0); - PIXEL_ADD4(v1, t_out1); - tmp16_0 = vec_add(vec_perm(d_u0, d_u1, mask1), v0); - tmp16_1 = vec_add(vec_perm(d_u2, d_u3, mask1), v1); - output_v = vec_packsu(tmp16_0, tmp16_1); +void vpx_idct4_vsx(int16x8_t *in, int16x8_t *out) { + int32x4_t temp1, temp2, temp3, temp4; + int16x8_t step0, step1, tmp16_0; + uint8x16_t mask0 = { 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; + int16x8_t t0 = vec_mergeh(in[0], in[1]); + int16x8_t t1 = vec_mergel(in[0], in[1]); + ROUND_SHIFT_INIT - vec_vsx_st(output_v, 0, tmp_dest); - for (int i = 0; i < 4; i++) - for (int j = 0; j < 4; j++) dest[j * stride + i] = tmp_dest[j * 4 + i]; + in[0] = vec_mergeh(t0, t1); + in[1] = vec_mergel(t0, t1); + + IDCT4(in[0], in[1], out[0], out[1]); +} + +void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, + int stride) { + int16x8_t in[2], out[2]; + + in[0] = load_tran_low(0, input); + in[1] = load_tran_low(8 * sizeof(*input), input); + // Rows + vpx_idct4_vsx(in, out); + + // Columns + vpx_idct4_vsx(out, in); + + vpx_round_store4x4_vsx(in, out, dest, stride); } #define TRANSPOSE8x8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \ @@ -255,28 +330,20 @@ void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, #define PIXEL_ADD(in, out, add, shiftx) \ out = vec_add(vec_sra(vec_add(in, add), shiftx), out); -static uint8x16_t tr8_mask0 = { - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 -}; -static uint8x16_t tr8_mask1 = { - 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, - 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F -}; -void vpx_idct8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, - int stride) { - int32x4_t temp10, temp11; +void vpx_idct8_vsx(int16x8_t *in, int16x8_t *out) { int16x8_t step0, step1, step2, step3, step4, step5, step6, step7; - int16x8_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp16_0, tmp16_1, - tmp16_2, tmp16_3; - int16x8_t src0 = load_tran_low(0, input); - int16x8_t src1 = load_tran_low(8 * sizeof(*input), input); - int16x8_t src2 = load_tran_low(16 * sizeof(*input), input); - int16x8_t src3 = load_tran_low(24 * sizeof(*input), input); - int16x8_t src4 = load_tran_low(32 * sizeof(*input), input); - int16x8_t src5 = load_tran_low(40 * sizeof(*input), input); - int16x8_t src6 = load_tran_low(48 * sizeof(*input), input); - int16x8_t src7 = load_tran_low(56 * sizeof(*input), input); + int16x8_t tmp16_0, tmp16_1, tmp16_2, tmp16_3; + int32x4_t temp10, temp11; + ROUND_SHIFT_INIT; + + TRANSPOSE8x8(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7], out[0], + out[1], out[2], out[3], out[4], out[5], out[6], out[7]); + + IDCT8(out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]); +} + +void vpx_round_store8x8_vsx(int16x8_t *in, uint8_t *dest, int stride) { + uint8x16_t zerov = vec_splat_u8(0); uint8x16_t dest0 = vec_vsx_ld(0, dest); uint8x16_t dest1 = vec_vsx_ld(stride, dest); uint8x16_t dest2 = vec_vsx_ld(2 * stride, dest); @@ -285,7 +352,6 @@ void vpx_idct8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, uint8x16_t dest5 = vec_vsx_ld(5 * stride, dest); uint8x16_t dest6 = vec_vsx_ld(6 * stride, dest); uint8x16_t dest7 = vec_vsx_ld(7 * stride, dest); - uint8x16_t zerov = vec_splat_u8(0); int16x8_t d_u0 = (int16x8_t)vec_mergeh(dest0, zerov); int16x8_t d_u1 = (int16x8_t)vec_mergeh(dest1, zerov); int16x8_t d_u2 = (int16x8_t)vec_mergeh(dest2, zerov); @@ -297,23 +363,15 @@ void vpx_idct8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, int16x8_t add = vec_sl(vec_splat_s16(8), vec_splat_u16(1)); uint16x8_t shift5 = vec_splat_u16(5); uint8x16_t output0, output1, output2, output3; - ROUND_SHIFT_INIT; - TRANSPOSE8x8(src0, src1, src2, src3, src4, src5, src6, src7, tmp0, tmp1, tmp2, - tmp3, tmp4, tmp5, tmp6, tmp7); - - IDCT8(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); - TRANSPOSE8x8(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, src0, src1, src2, - src3, src4, src5, src6, src7); - IDCT8(src0, src1, src2, src3, src4, src5, src6, src7); - PIXEL_ADD(src0, d_u0, add, shift5); - PIXEL_ADD(src1, d_u1, add, shift5); - PIXEL_ADD(src2, d_u2, add, shift5); - PIXEL_ADD(src3, d_u3, add, shift5); - PIXEL_ADD(src4, d_u4, add, shift5); - PIXEL_ADD(src5, d_u5, add, shift5); - PIXEL_ADD(src6, d_u6, add, shift5); - PIXEL_ADD(src7, d_u7, add, shift5); + PIXEL_ADD(in[0], d_u0, add, shift5); + PIXEL_ADD(in[1], d_u1, add, shift5); + PIXEL_ADD(in[2], d_u2, add, shift5); + PIXEL_ADD(in[3], d_u3, add, shift5); + PIXEL_ADD(in[4], d_u4, add, shift5); + PIXEL_ADD(in[5], d_u5, add, shift5); + PIXEL_ADD(in[6], d_u6, add, shift5); + PIXEL_ADD(in[7], d_u7, add, shift5); output0 = vec_packsu(d_u0, d_u1); output1 = vec_packsu(d_u2, d_u3); output2 = vec_packsu(d_u4, d_u5); @@ -329,24 +387,24 @@ void vpx_idct8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, vec_vsx_st(xxpermdi(output3, dest7, 3), 7 * stride, dest); } -#define LOAD_INPUT16(load, source, offset, step, in0, in1, in2, in3, in4, in5, \ - in6, in7, in8, in9, inA, inB, inC, inD, inE, inF) \ - in0 = load(offset, source); \ - in1 = load((step) + (offset), source); \ - in2 = load(2 * (step) + (offset), source); \ - in3 = load(3 * (step) + (offset), source); \ - in4 = load(4 * (step) + (offset), source); \ - in5 = load(5 * (step) + (offset), source); \ - in6 = load(6 * (step) + (offset), source); \ - in7 = load(7 * (step) + (offset), source); \ - in8 = load(8 * (step) + (offset), source); \ - in9 = load(9 * (step) + (offset), source); \ - inA = load(10 * (step) + (offset), source); \ - inB = load(11 * (step) + (offset), source); \ - inC = load(12 * (step) + (offset), source); \ - inD = load(13 * (step) + (offset), source); \ - inE = load(14 * (step) + (offset), source); \ - inF = load(15 * (step) + (offset), source); +void vpx_idct8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, + int stride) { + int16x8_t src[8], tmp[8]; + + src[0] = load_tran_low(0, input); + src[1] = load_tran_low(8 * sizeof(*input), input); + src[2] = load_tran_low(16 * sizeof(*input), input); + src[3] = load_tran_low(24 * sizeof(*input), input); + src[4] = load_tran_low(32 * sizeof(*input), input); + src[5] = load_tran_low(40 * sizeof(*input), input); + src[6] = load_tran_low(48 * sizeof(*input), input); + src[7] = load_tran_low(56 * sizeof(*input), input); + + vpx_idct8_vsx(src, tmp); + vpx_idct8_vsx(tmp, src); + + vpx_round_store8x8_vsx(src, dest, stride); +} #define STEP16_1(inpt0, inpt1, outpt0, outpt1, cospi) \ tmp16_0 = vec_mergeh(inpt0, inpt1); \ @@ -446,9 +504,9 @@ void vpx_idct8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, tmp16_0 = vec_mergeh(outA, outD); \ tmp16_1 = vec_mergel(outA, outD); \ temp10 = \ - vec_sub(vec_mule(tmp16_0, cospi24_mv), vec_mulo(tmp16_0, cospi8_v)); \ + vec_sub(vec_mule(tmp16_0, cospi24m_v), vec_mulo(tmp16_0, cospi8_v)); \ temp11 = \ - vec_sub(vec_mule(tmp16_1, cospi24_mv), vec_mulo(tmp16_1, cospi8_v)); \ + vec_sub(vec_mule(tmp16_1, cospi24m_v), vec_mulo(tmp16_1, cospi8_v)); \ DCT_CONST_ROUND_SHIFT(temp10); \ DCT_CONST_ROUND_SHIFT(temp11); \ inA = vec_packs(temp10, temp11); \ @@ -520,95 +578,131 @@ void vpx_idct8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, PIXEL_ADD(in1, d_ul, add, shift6); \ vec_vsx_st(vec_packsu(d_uh, d_ul), offset, dest); -void vpx_idct16x16_256_add_vsx(const tran_low_t *input, uint8_t *dest, - int stride) { +static void half_idct16x8_vsx(int16x8_t *src) { + int16x8_t tmp0[8], tmp1[8]; int32x4_t temp10, temp11, temp20, temp21, temp30; - int16x8_t src00, src01, src02, src03, src04, src05, src06, src07, src10, - src11, src12, src13, src14, src15, src16, src17; - int16x8_t src20, src21, src22, src23, src24, src25, src26, src27, src30, - src31, src32, src33, src34, src35, src36, src37; - int16x8_t tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp10, - tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17, tmp16_0, tmp16_1; - int16x8_t tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27, tmp30, - tmp31, tmp32, tmp33, tmp34, tmp35, tmp36, tmp37; - uint8x16_t dest0, dest1, dest2, dest3, dest4, dest5, dest6, dest7, dest8, - dest9, destA, destB, destC, destD, destE, destF; - int16x8_t d_uh, d_ul; - int16x8_t add = vec_sl(vec_splat_s16(8), vec_splat_u16(2)); - uint16x8_t shift6 = vec_splat_u16(6); - uint8x16_t zerov = vec_splat_u8(0); + int16x8_t tmp16_0, tmp16_1; ROUND_SHIFT_INIT; - // transform rows - // load and transform the upper half of 16x16 matrix - LOAD_INPUT16(load_tran_low, input, 0, 8 * sizeof(*input), src00, src10, src01, - src11, src02, src12, src03, src13, src04, src14, src05, src15, - src06, src16, src07, src17); - TRANSPOSE8x8(src00, src01, src02, src03, src04, src05, src06, src07, tmp00, - tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07); - TRANSPOSE8x8(src10, src11, src12, src13, src14, src15, src16, src17, tmp10, - tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17); - IDCT16(tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp10, tmp11, - tmp12, tmp13, tmp14, tmp15, tmp16, tmp17, src00, src01, src02, src03, - src04, src05, src06, src07, src10, src11, src12, src13, src14, src15, - src16, src17); - TRANSPOSE8x8(src00, src01, src02, src03, src04, src05, src06, src07, tmp00, - tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07); - TRANSPOSE8x8(src10, src11, src12, src13, src14, src15, src16, src17, tmp10, - tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17); + TRANSPOSE8x8(src[0], src[2], src[4], src[6], src[8], src[10], src[12], + src[14], tmp0[0], tmp0[1], tmp0[2], tmp0[3], tmp0[4], tmp0[5], + tmp0[6], tmp0[7]); + TRANSPOSE8x8(src[1], src[3], src[5], src[7], src[9], src[11], src[13], + src[15], tmp1[0], tmp1[1], tmp1[2], tmp1[3], tmp1[4], tmp1[5], + tmp1[6], tmp1[7]); + IDCT16(tmp0[0], tmp0[1], tmp0[2], tmp0[3], tmp0[4], tmp0[5], tmp0[6], tmp0[7], + tmp1[0], tmp1[1], tmp1[2], tmp1[3], tmp1[4], tmp1[5], tmp1[6], tmp1[7], + src[0], src[2], src[4], src[6], src[8], src[10], src[12], src[14], + src[1], src[3], src[5], src[7], src[9], src[11], src[13], src[15]); +} - // load and transform the lower half of 16x16 matrix +void vpx_idct16_vsx(int16x8_t *src0, int16x8_t *src1) { + int16x8_t tmp0[8], tmp1[8], tmp2[8], tmp3[8]; + int32x4_t temp10, temp11, temp20, temp21, temp30; + int16x8_t tmp16_0, tmp16_1; + ROUND_SHIFT_INIT; + + TRANSPOSE8x8(src0[0], src0[2], src0[4], src0[6], src0[8], src0[10], src0[12], + src0[14], tmp0[0], tmp0[1], tmp0[2], tmp0[3], tmp0[4], tmp0[5], + tmp0[6], tmp0[7]); + TRANSPOSE8x8(src0[1], src0[3], src0[5], src0[7], src0[9], src0[11], src0[13], + src0[15], tmp1[0], tmp1[1], tmp1[2], tmp1[3], tmp1[4], tmp1[5], + tmp1[6], tmp1[7]); + TRANSPOSE8x8(src1[0], src1[2], src1[4], src1[6], src1[8], src1[10], src1[12], + src1[14], tmp2[0], tmp2[1], tmp2[2], tmp2[3], tmp2[4], tmp2[5], + tmp2[6], tmp2[7]); + TRANSPOSE8x8(src1[1], src1[3], src1[5], src1[7], src1[9], src1[11], src1[13], + src1[15], tmp3[0], tmp3[1], tmp3[2], tmp3[3], tmp3[4], tmp3[5], + tmp3[6], tmp3[7]); + + IDCT16(tmp0[0], tmp0[1], tmp0[2], tmp0[3], tmp0[4], tmp0[5], tmp0[6], tmp0[7], + tmp1[0], tmp1[1], tmp1[2], tmp1[3], tmp1[4], tmp1[5], tmp1[6], tmp1[7], + src0[0], src0[2], src0[4], src0[6], src0[8], src0[10], src0[12], + src0[14], src1[0], src1[2], src1[4], src1[6], src1[8], src1[10], + src1[12], src1[14]); + + IDCT16(tmp2[0], tmp2[1], tmp2[2], tmp2[3], tmp2[4], tmp2[5], tmp2[6], tmp2[7], + tmp3[0], tmp3[1], tmp3[2], tmp3[3], tmp3[4], tmp3[5], tmp3[6], tmp3[7], + src0[1], src0[3], src0[5], src0[7], src0[9], src0[11], src0[13], + src0[15], src1[1], src1[3], src1[5], src1[7], src1[9], src1[11], + src1[13], src1[15]); +} + +void vpx_round_store16x16_vsx(int16x8_t *src0, int16x8_t *src1, uint8_t *dest, + int stride) { + uint8x16_t destv[16]; + int16x8_t d_uh, d_ul; + uint8x16_t zerov = vec_splat_u8(0); + uint16x8_t shift6 = vec_splat_u16(6); + int16x8_t add = vec_sl(vec_splat_s16(8), vec_splat_u16(2)); + + // load dest + LOAD_INPUT16(vec_vsx_ld, dest, 0, stride, destv); + + PIXEL_ADD_STORE16(src0[0], src0[1], destv[0], 0); + PIXEL_ADD_STORE16(src0[2], src0[3], destv[1], stride); + PIXEL_ADD_STORE16(src0[4], src0[5], destv[2], 2 * stride); + PIXEL_ADD_STORE16(src0[6], src0[7], destv[3], 3 * stride); + PIXEL_ADD_STORE16(src0[8], src0[9], destv[4], 4 * stride); + PIXEL_ADD_STORE16(src0[10], src0[11], destv[5], 5 * stride); + PIXEL_ADD_STORE16(src0[12], src0[13], destv[6], 6 * stride); + PIXEL_ADD_STORE16(src0[14], src0[15], destv[7], 7 * stride); + + PIXEL_ADD_STORE16(src1[0], src1[1], destv[8], 8 * stride); + PIXEL_ADD_STORE16(src1[2], src1[3], destv[9], 9 * stride); + PIXEL_ADD_STORE16(src1[4], src1[5], destv[10], 10 * stride); + PIXEL_ADD_STORE16(src1[6], src1[7], destv[11], 11 * stride); + PIXEL_ADD_STORE16(src1[8], src1[9], destv[12], 12 * stride); + PIXEL_ADD_STORE16(src1[10], src1[11], destv[13], 13 * stride); + PIXEL_ADD_STORE16(src1[12], src1[13], destv[14], 14 * stride); + PIXEL_ADD_STORE16(src1[14], src1[15], destv[15], 15 * stride); +} +void vpx_idct16x16_256_add_vsx(const tran_low_t *input, uint8_t *dest, + int stride) { + int16x8_t src0[16], src1[16]; + int16x8_t tmp0[8], tmp1[8], tmp2[8], tmp3[8]; + int32x4_t temp10, temp11, temp20, temp21, temp30; + int16x8_t tmp16_0, tmp16_1; + ROUND_SHIFT_INIT; + + LOAD_INPUT16(load_tran_low, input, 0, 8 * sizeof(*input), src0); LOAD_INPUT16(load_tran_low, input, 8 * 8 * 2 * sizeof(*input), - 8 * sizeof(*input), src20, src30, src21, src31, src22, src32, - src23, src33, src24, src34, src25, src35, src26, src36, src27, - src37); - TRANSPOSE8x8(src20, src21, src22, src23, src24, src25, src26, src27, tmp20, - tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27); - TRANSPOSE8x8(src30, src31, src32, src33, src34, src35, src36, src37, tmp30, - tmp31, tmp32, tmp33, tmp34, tmp35, tmp36, tmp37); - IDCT16(tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27, tmp30, tmp31, - tmp32, tmp33, tmp34, tmp35, tmp36, tmp37, src20, src21, src22, src23, - src24, src25, src26, src27, src30, src31, src32, src33, src34, src35, - src36, src37); - TRANSPOSE8x8(src20, src21, src22, src23, src24, src25, src26, src27, tmp20, - tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27); - TRANSPOSE8x8(src30, src31, src32, src33, src34, src35, src36, src37, tmp30, - tmp31, tmp32, tmp33, tmp34, tmp35, tmp36, tmp37); + 8 * sizeof(*input), src1); + + // transform rows + // transform the upper half of 16x16 matrix + half_idct16x8_vsx(src0); + TRANSPOSE8x8(src0[0], src0[2], src0[4], src0[6], src0[8], src0[10], src0[12], + src0[14], tmp0[0], tmp0[1], tmp0[2], tmp0[3], tmp0[4], tmp0[5], + tmp0[6], tmp0[7]); + TRANSPOSE8x8(src0[1], src0[3], src0[5], src0[7], src0[9], src0[11], src0[13], + src0[15], tmp1[0], tmp1[1], tmp1[2], tmp1[3], tmp1[4], tmp1[5], + tmp1[6], tmp1[7]); + + // transform the lower half of 16x16 matrix + half_idct16x8_vsx(src1); + TRANSPOSE8x8(src1[0], src1[2], src1[4], src1[6], src1[8], src1[10], src1[12], + src1[14], tmp2[0], tmp2[1], tmp2[2], tmp2[3], tmp2[4], tmp2[5], + tmp2[6], tmp2[7]); + TRANSPOSE8x8(src1[1], src1[3], src1[5], src1[7], src1[9], src1[11], src1[13], + src1[15], tmp3[0], tmp3[1], tmp3[2], tmp3[3], tmp3[4], tmp3[5], + tmp3[6], tmp3[7]); // transform columns // left half first - IDCT16(tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp20, tmp21, - tmp22, tmp23, tmp24, tmp25, tmp26, tmp27, src00, src01, src02, src03, - src04, src05, src06, src07, src20, src21, src22, src23, src24, src25, - src26, src27); + IDCT16(tmp0[0], tmp0[1], tmp0[2], tmp0[3], tmp0[4], tmp0[5], tmp0[6], tmp0[7], + tmp2[0], tmp2[1], tmp2[2], tmp2[3], tmp2[4], tmp2[5], tmp2[6], tmp2[7], + src0[0], src0[2], src0[4], src0[6], src0[8], src0[10], src0[12], + src0[14], src1[0], src1[2], src1[4], src1[6], src1[8], src1[10], + src1[12], src1[14]); // right half - IDCT16(tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17, tmp30, tmp31, - tmp32, tmp33, tmp34, tmp35, tmp36, tmp37, src10, src11, src12, src13, - src14, src15, src16, src17, src30, src31, src32, src33, src34, src35, - src36, src37); + IDCT16(tmp1[0], tmp1[1], tmp1[2], tmp1[3], tmp1[4], tmp1[5], tmp1[6], tmp1[7], + tmp3[0], tmp3[1], tmp3[2], tmp3[3], tmp3[4], tmp3[5], tmp3[6], tmp3[7], + src0[1], src0[3], src0[5], src0[7], src0[9], src0[11], src0[13], + src0[15], src1[1], src1[3], src1[5], src1[7], src1[9], src1[11], + src1[13], src1[15]); - // load dest - LOAD_INPUT16(vec_vsx_ld, dest, 0, stride, dest0, dest1, dest2, dest3, dest4, - dest5, dest6, dest7, dest8, dest9, destA, destB, destC, destD, - destE, destF); - - PIXEL_ADD_STORE16(src00, src10, dest0, 0); - PIXEL_ADD_STORE16(src01, src11, dest1, stride); - PIXEL_ADD_STORE16(src02, src12, dest2, 2 * stride); - PIXEL_ADD_STORE16(src03, src13, dest3, 3 * stride); - PIXEL_ADD_STORE16(src04, src14, dest4, 4 * stride); - PIXEL_ADD_STORE16(src05, src15, dest5, 5 * stride); - PIXEL_ADD_STORE16(src06, src16, dest6, 6 * stride); - PIXEL_ADD_STORE16(src07, src17, dest7, 7 * stride); - - PIXEL_ADD_STORE16(src20, src30, dest8, 8 * stride); - PIXEL_ADD_STORE16(src21, src31, dest9, 9 * stride); - PIXEL_ADD_STORE16(src22, src32, destA, 10 * stride); - PIXEL_ADD_STORE16(src23, src33, destB, 11 * stride); - PIXEL_ADD_STORE16(src24, src34, destC, 12 * stride); - PIXEL_ADD_STORE16(src25, src35, destD, 13 * stride); - PIXEL_ADD_STORE16(src26, src36, destE, 14 * stride); - PIXEL_ADD_STORE16(src27, src37, destF, 15 * stride); + vpx_round_store16x16_vsx(src0, src1, dest, stride); } #define LOAD_8x32(load, in00, in01, in02, in03, in10, in11, in12, in13, in20, \ @@ -980,15 +1074,15 @@ void vpx_idct16x16_256_add_vsx(const tran_low_t *input, uint8_t *dest, PIXEL_ADD(in3, d_ul, add, shift6); \ vec_vsx_st(vec_packsu(d_uh, d_ul), (step)*stride + 16, dest); -#define ADD_STORE_BLOCK(in, offset) \ - PIXEL_ADD_STORE32(in[0][0], in[1][0], in[2][0], in[3][0], offset + 0); \ - PIXEL_ADD_STORE32(in[0][1], in[1][1], in[2][1], in[3][1], offset + 1); \ - PIXEL_ADD_STORE32(in[0][2], in[1][2], in[2][2], in[3][2], offset + 2); \ - PIXEL_ADD_STORE32(in[0][3], in[1][3], in[2][3], in[3][3], offset + 3); \ - PIXEL_ADD_STORE32(in[0][4], in[1][4], in[2][4], in[3][4], offset + 4); \ - PIXEL_ADD_STORE32(in[0][5], in[1][5], in[2][5], in[3][5], offset + 5); \ - PIXEL_ADD_STORE32(in[0][6], in[1][6], in[2][6], in[3][6], offset + 6); \ - PIXEL_ADD_STORE32(in[0][7], in[1][7], in[2][7], in[3][7], offset + 7); +#define ADD_STORE_BLOCK(in, offset) \ + PIXEL_ADD_STORE32(in[0][0], in[1][0], in[2][0], in[3][0], (offset) + 0); \ + PIXEL_ADD_STORE32(in[0][1], in[1][1], in[2][1], in[3][1], (offset) + 1); \ + PIXEL_ADD_STORE32(in[0][2], in[1][2], in[2][2], in[3][2], (offset) + 2); \ + PIXEL_ADD_STORE32(in[0][3], in[1][3], in[2][3], in[3][3], (offset) + 3); \ + PIXEL_ADD_STORE32(in[0][4], in[1][4], in[2][4], in[3][4], (offset) + 4); \ + PIXEL_ADD_STORE32(in[0][5], in[1][5], in[2][5], in[3][5], (offset) + 5); \ + PIXEL_ADD_STORE32(in[0][6], in[1][6], in[2][6], in[3][6], (offset) + 6); \ + PIXEL_ADD_STORE32(in[0][7], in[1][7], in[2][7], in[3][7], (offset) + 7); void vpx_idct32x32_1024_add_vsx(const tran_low_t *input, uint8_t *dest, int stride) { @@ -1061,3 +1155,674 @@ void vpx_idct32x32_1024_add_vsx(const tran_low_t *input, uint8_t *dest, ADD_STORE_BLOCK(src2, 16); ADD_STORE_BLOCK(src3, 24); } + +#define TRANSFORM_COLS \ + v32_a = vec_add(v32_a, v32_c); \ + v32_d = vec_sub(v32_d, v32_b); \ + v32_e = vec_sub(v32_a, v32_d); \ + v32_e = vec_sra(v32_e, one); \ + v32_b = vec_sub(v32_e, v32_b); \ + v32_c = vec_sub(v32_e, v32_c); \ + v32_a = vec_sub(v32_a, v32_b); \ + v32_d = vec_add(v32_d, v32_c); \ + v_a = vec_packs(v32_a, v32_b); \ + v_c = vec_packs(v32_c, v32_d); + +#define TRANSPOSE_WHT \ + tmp_a = vec_mergeh(v_a, v_c); \ + tmp_c = vec_mergel(v_a, v_c); \ + v_a = vec_mergeh(tmp_a, tmp_c); \ + v_c = vec_mergel(tmp_a, tmp_c); + +void vpx_iwht4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, + int stride) { + int16x8_t v_a = load_tran_low(0, input); + int16x8_t v_c = load_tran_low(8 * sizeof(*input), input); + int16x8_t tmp_a, tmp_c; + uint16x8_t two = vec_splat_u16(2); + uint32x4_t one = vec_splat_u32(1); + int16x8_t tmp16_0, tmp16_1; + int32x4_t v32_a, v32_c, v32_d, v32_b, v32_e; + uint8x16_t dest0 = vec_vsx_ld(0, dest); + uint8x16_t dest1 = vec_vsx_ld(stride, dest); + uint8x16_t dest2 = vec_vsx_ld(2 * stride, dest); + uint8x16_t dest3 = vec_vsx_ld(3 * stride, dest); + int16x8_t d_u0 = (int16x8_t)unpack_to_u16_h(dest0); + int16x8_t d_u1 = (int16x8_t)unpack_to_u16_h(dest1); + int16x8_t d_u2 = (int16x8_t)unpack_to_u16_h(dest2); + int16x8_t d_u3 = (int16x8_t)unpack_to_u16_h(dest3); + uint8x16_t output_v; + uint8_t tmp_dest[16]; + int i, j; + + v_a = vec_sra(v_a, two); + v_c = vec_sra(v_c, two); + + TRANSPOSE_WHT; + + v32_a = vec_unpackh(v_a); + v32_c = vec_unpackl(v_a); + + v32_d = vec_unpackh(v_c); + v32_b = vec_unpackl(v_c); + + TRANSFORM_COLS; + + TRANSPOSE_WHT; + + v32_a = vec_unpackh(v_a); + v32_c = vec_unpackl(v_a); + v32_d = vec_unpackh(v_c); + v32_b = vec_unpackl(v_c); + + TRANSFORM_COLS; + + PACK_STORE(v_a, v_c); +} + +void vp9_iadst4_vsx(int16x8_t *in, int16x8_t *out) { + int16x8_t sinpi_1_3_v, sinpi_4_2_v, sinpi_2_3_v, sinpi_1_4_v, sinpi_12_n3_v; + int32x4_t v_v[5], u_v[4]; + int32x4_t zerov = vec_splat_s32(0); + int16x8_t tmp0, tmp1; + int16x8_t zero16v = vec_splat_s16(0); + uint32x4_t shift16 = vec_sl(vec_splat_u32(8), vec_splat_u32(1)); + ROUND_SHIFT_INIT; + + sinpi_1_3_v = vec_mergel(sinpi_1_9_v, sinpi_3_9_v); + sinpi_4_2_v = vec_mergel(sinpi_4_9_v, sinpi_2_9_v); + sinpi_2_3_v = vec_mergel(sinpi_2_9_v, sinpi_3_9_v); + sinpi_1_4_v = vec_mergel(sinpi_1_9_v, sinpi_4_9_v); + sinpi_12_n3_v = vec_mergel(vec_add(sinpi_1_9_v, sinpi_2_9_v), + vec_sub(zero16v, sinpi_3_9_v)); + + tmp0 = (int16x8_t)vec_mergeh((int32x4_t)in[0], (int32x4_t)in[1]); + tmp1 = (int16x8_t)vec_mergel((int32x4_t)in[0], (int32x4_t)in[1]); + in[0] = (int16x8_t)vec_mergeh((int32x4_t)tmp0, (int32x4_t)tmp1); + in[1] = (int16x8_t)vec_mergel((int32x4_t)tmp0, (int32x4_t)tmp1); + + v_v[0] = vec_msum(in[0], sinpi_1_3_v, zerov); + v_v[1] = vec_msum(in[1], sinpi_4_2_v, zerov); + v_v[2] = vec_msum(in[0], sinpi_2_3_v, zerov); + v_v[3] = vec_msum(in[1], sinpi_1_4_v, zerov); + v_v[4] = vec_msum(in[0], sinpi_12_n3_v, zerov); + + in[0] = vec_sub(in[0], in[1]); + in[1] = (int16x8_t)vec_sra((int32x4_t)in[1], shift16); + in[0] = vec_add(in[0], in[1]); + in[0] = (int16x8_t)vec_sl((int32x4_t)in[0], shift16); + + u_v[0] = vec_add(v_v[0], v_v[1]); + u_v[1] = vec_sub(v_v[2], v_v[3]); + u_v[2] = vec_msum(in[0], sinpi_1_3_v, zerov); + u_v[3] = vec_sub(v_v[1], v_v[3]); + u_v[3] = vec_add(u_v[3], v_v[4]); + + DCT_CONST_ROUND_SHIFT(u_v[0]); + DCT_CONST_ROUND_SHIFT(u_v[1]); + DCT_CONST_ROUND_SHIFT(u_v[2]); + DCT_CONST_ROUND_SHIFT(u_v[3]); + + out[0] = vec_packs(u_v[0], u_v[1]); + out[1] = vec_packs(u_v[2], u_v[3]); +} + +#define MSUM_ROUND_SHIFT(a, b, cospi) \ + b = vec_msums(a, cospi, zerov); \ + DCT_CONST_ROUND_SHIFT(b); + +#define IADST_WRAPLOW(in0, in1, tmp0, tmp1, out, cospi) \ + MSUM_ROUND_SHIFT(in0, tmp0, cospi); \ + MSUM_ROUND_SHIFT(in1, tmp1, cospi); \ + out = vec_packs(tmp0, tmp1); + +void vp9_iadst8_vsx(int16x8_t *in, int16x8_t *out) { + int32x4_t tmp0[16], tmp1[16]; + + int32x4_t zerov = vec_splat_s32(0); + int16x8_t zero16v = vec_splat_s16(0); + int16x8_t cospi_p02_p30_v = vec_mergel(cospi2_v, cospi30_v); + int16x8_t cospi_p30_m02_v = vec_mergel(cospi30_v, cospi2m_v); + int16x8_t cospi_p10_p22_v = vec_mergel(cospi10_v, cospi22_v); + int16x8_t cospi_p22_m10_v = vec_mergel(cospi22_v, cospi10m_v); + int16x8_t cospi_p18_p14_v = vec_mergel(cospi18_v, cospi14_v); + int16x8_t cospi_p14_m18_v = vec_mergel(cospi14_v, cospi18m_v); + int16x8_t cospi_p26_p06_v = vec_mergel(cospi26_v, cospi6_v); + int16x8_t cospi_p06_m26_v = vec_mergel(cospi6_v, cospi26m_v); + int16x8_t cospi_p08_p24_v = vec_mergel(cospi8_v, cospi24_v); + int16x8_t cospi_p24_m08_v = vec_mergel(cospi24_v, cospi8m_v); + int16x8_t cospi_m24_p08_v = vec_mergel(cospi24m_v, cospi8_v); + int16x8_t cospi_p16_m16_v = vec_mergel(cospi16_v, cospi16m_v); + ROUND_SHIFT_INIT; + + TRANSPOSE8x8(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7], out[0], + out[1], out[2], out[3], out[4], out[5], out[6], out[7]); + + // stage 1 + // interleave and multiply/add into 32-bit integer + in[0] = vec_mergeh(out[7], out[0]); + in[1] = vec_mergel(out[7], out[0]); + in[2] = vec_mergeh(out[5], out[2]); + in[3] = vec_mergel(out[5], out[2]); + in[4] = vec_mergeh(out[3], out[4]); + in[5] = vec_mergel(out[3], out[4]); + in[6] = vec_mergeh(out[1], out[6]); + in[7] = vec_mergel(out[1], out[6]); + + tmp1[0] = vec_msum(in[0], cospi_p02_p30_v, zerov); + tmp1[1] = vec_msum(in[1], cospi_p02_p30_v, zerov); + tmp1[2] = vec_msum(in[0], cospi_p30_m02_v, zerov); + tmp1[3] = vec_msum(in[1], cospi_p30_m02_v, zerov); + tmp1[4] = vec_msum(in[2], cospi_p10_p22_v, zerov); + tmp1[5] = vec_msum(in[3], cospi_p10_p22_v, zerov); + tmp1[6] = vec_msum(in[2], cospi_p22_m10_v, zerov); + tmp1[7] = vec_msum(in[3], cospi_p22_m10_v, zerov); + tmp1[8] = vec_msum(in[4], cospi_p18_p14_v, zerov); + tmp1[9] = vec_msum(in[5], cospi_p18_p14_v, zerov); + tmp1[10] = vec_msum(in[4], cospi_p14_m18_v, zerov); + tmp1[11] = vec_msum(in[5], cospi_p14_m18_v, zerov); + tmp1[12] = vec_msum(in[6], cospi_p26_p06_v, zerov); + tmp1[13] = vec_msum(in[7], cospi_p26_p06_v, zerov); + tmp1[14] = vec_msum(in[6], cospi_p06_m26_v, zerov); + tmp1[15] = vec_msum(in[7], cospi_p06_m26_v, zerov); + + tmp0[0] = vec_add(tmp1[0], tmp1[8]); + tmp0[1] = vec_add(tmp1[1], tmp1[9]); + tmp0[2] = vec_add(tmp1[2], tmp1[10]); + tmp0[3] = vec_add(tmp1[3], tmp1[11]); + tmp0[4] = vec_add(tmp1[4], tmp1[12]); + tmp0[5] = vec_add(tmp1[5], tmp1[13]); + tmp0[6] = vec_add(tmp1[6], tmp1[14]); + tmp0[7] = vec_add(tmp1[7], tmp1[15]); + tmp0[8] = vec_sub(tmp1[0], tmp1[8]); + tmp0[9] = vec_sub(tmp1[1], tmp1[9]); + tmp0[10] = vec_sub(tmp1[2], tmp1[10]); + tmp0[11] = vec_sub(tmp1[3], tmp1[11]); + tmp0[12] = vec_sub(tmp1[4], tmp1[12]); + tmp0[13] = vec_sub(tmp1[5], tmp1[13]); + tmp0[14] = vec_sub(tmp1[6], tmp1[14]); + tmp0[15] = vec_sub(tmp1[7], tmp1[15]); + + // shift and rounding + DCT_CONST_ROUND_SHIFT(tmp0[0]); + DCT_CONST_ROUND_SHIFT(tmp0[1]); + DCT_CONST_ROUND_SHIFT(tmp0[2]); + DCT_CONST_ROUND_SHIFT(tmp0[3]); + DCT_CONST_ROUND_SHIFT(tmp0[4]); + DCT_CONST_ROUND_SHIFT(tmp0[5]); + DCT_CONST_ROUND_SHIFT(tmp0[6]); + DCT_CONST_ROUND_SHIFT(tmp0[7]); + DCT_CONST_ROUND_SHIFT(tmp0[8]); + DCT_CONST_ROUND_SHIFT(tmp0[9]); + DCT_CONST_ROUND_SHIFT(tmp0[10]); + DCT_CONST_ROUND_SHIFT(tmp0[11]); + DCT_CONST_ROUND_SHIFT(tmp0[12]); + DCT_CONST_ROUND_SHIFT(tmp0[13]); + DCT_CONST_ROUND_SHIFT(tmp0[14]); + DCT_CONST_ROUND_SHIFT(tmp0[15]); + + // back to 16-bit + out[0] = vec_packs(tmp0[0], tmp0[1]); + out[1] = vec_packs(tmp0[2], tmp0[3]); + out[2] = vec_packs(tmp0[4], tmp0[5]); + out[3] = vec_packs(tmp0[6], tmp0[7]); + out[4] = vec_packs(tmp0[8], tmp0[9]); + out[5] = vec_packs(tmp0[10], tmp0[11]); + out[6] = vec_packs(tmp0[12], tmp0[13]); + out[7] = vec_packs(tmp0[14], tmp0[15]); + + // stage 2 + in[0] = vec_add(out[0], out[2]); + in[1] = vec_add(out[1], out[3]); + in[2] = vec_sub(out[0], out[2]); + in[3] = vec_sub(out[1], out[3]); + in[4] = vec_mergeh(out[4], out[5]); + in[5] = vec_mergel(out[4], out[5]); + in[6] = vec_mergeh(out[6], out[7]); + in[7] = vec_mergel(out[6], out[7]); + + tmp1[0] = vec_msum(in[4], cospi_p08_p24_v, zerov); + tmp1[1] = vec_msum(in[5], cospi_p08_p24_v, zerov); + tmp1[2] = vec_msum(in[4], cospi_p24_m08_v, zerov); + tmp1[3] = vec_msum(in[5], cospi_p24_m08_v, zerov); + tmp1[4] = vec_msum(in[6], cospi_m24_p08_v, zerov); + tmp1[5] = vec_msum(in[7], cospi_m24_p08_v, zerov); + tmp1[6] = vec_msum(in[6], cospi_p08_p24_v, zerov); + tmp1[7] = vec_msum(in[7], cospi_p08_p24_v, zerov); + + tmp0[0] = vec_add(tmp1[0], tmp1[4]); + tmp0[1] = vec_add(tmp1[1], tmp1[5]); + tmp0[2] = vec_add(tmp1[2], tmp1[6]); + tmp0[3] = vec_add(tmp1[3], tmp1[7]); + tmp0[4] = vec_sub(tmp1[0], tmp1[4]); + tmp0[5] = vec_sub(tmp1[1], tmp1[5]); + tmp0[6] = vec_sub(tmp1[2], tmp1[6]); + tmp0[7] = vec_sub(tmp1[3], tmp1[7]); + + DCT_CONST_ROUND_SHIFT(tmp0[0]); + DCT_CONST_ROUND_SHIFT(tmp0[1]); + DCT_CONST_ROUND_SHIFT(tmp0[2]); + DCT_CONST_ROUND_SHIFT(tmp0[3]); + DCT_CONST_ROUND_SHIFT(tmp0[4]); + DCT_CONST_ROUND_SHIFT(tmp0[5]); + DCT_CONST_ROUND_SHIFT(tmp0[6]); + DCT_CONST_ROUND_SHIFT(tmp0[7]); + + in[4] = vec_packs(tmp0[0], tmp0[1]); + in[5] = vec_packs(tmp0[2], tmp0[3]); + in[6] = vec_packs(tmp0[4], tmp0[5]); + in[7] = vec_packs(tmp0[6], tmp0[7]); + + // stage 3 + out[0] = vec_mergeh(in[2], in[3]); + out[1] = vec_mergel(in[2], in[3]); + out[2] = vec_mergeh(in[6], in[7]); + out[3] = vec_mergel(in[6], in[7]); + + IADST_WRAPLOW(out[0], out[1], tmp0[0], tmp0[1], in[2], cospi16_v); + IADST_WRAPLOW(out[0], out[1], tmp0[0], tmp0[1], in[3], cospi_p16_m16_v); + IADST_WRAPLOW(out[2], out[3], tmp0[0], tmp0[1], in[6], cospi16_v); + IADST_WRAPLOW(out[2], out[3], tmp0[0], tmp0[1], in[7], cospi_p16_m16_v); + + out[0] = in[0]; + out[2] = in[6]; + out[4] = in[3]; + out[6] = in[5]; + + out[1] = vec_sub(zero16v, in[4]); + out[3] = vec_sub(zero16v, in[2]); + out[5] = vec_sub(zero16v, in[7]); + out[7] = vec_sub(zero16v, in[1]); +} + +static void iadst16x8_vsx(int16x8_t *in, int16x8_t *out) { + int32x4_t tmp0[32], tmp1[32]; + int16x8_t tmp16_0[8]; + int16x8_t cospi_p01_p31 = vec_mergel(cospi1_v, cospi31_v); + int16x8_t cospi_p31_m01 = vec_mergel(cospi31_v, cospi1m_v); + int16x8_t cospi_p05_p27 = vec_mergel(cospi5_v, cospi27_v); + int16x8_t cospi_p27_m05 = vec_mergel(cospi27_v, cospi5m_v); + int16x8_t cospi_p09_p23 = vec_mergel(cospi9_v, cospi23_v); + int16x8_t cospi_p23_m09 = vec_mergel(cospi23_v, cospi9m_v); + int16x8_t cospi_p13_p19 = vec_mergel(cospi13_v, cospi19_v); + int16x8_t cospi_p19_m13 = vec_mergel(cospi19_v, cospi13m_v); + int16x8_t cospi_p17_p15 = vec_mergel(cospi17_v, cospi15_v); + int16x8_t cospi_p15_m17 = vec_mergel(cospi15_v, cospi17m_v); + int16x8_t cospi_p21_p11 = vec_mergel(cospi21_v, cospi11_v); + int16x8_t cospi_p11_m21 = vec_mergel(cospi11_v, cospi21m_v); + int16x8_t cospi_p25_p07 = vec_mergel(cospi25_v, cospi7_v); + int16x8_t cospi_p07_m25 = vec_mergel(cospi7_v, cospi25m_v); + int16x8_t cospi_p29_p03 = vec_mergel(cospi29_v, cospi3_v); + int16x8_t cospi_p03_m29 = vec_mergel(cospi3_v, cospi29m_v); + int16x8_t cospi_p04_p28 = vec_mergel(cospi4_v, cospi28_v); + int16x8_t cospi_p28_m04 = vec_mergel(cospi28_v, cospi4m_v); + int16x8_t cospi_p20_p12 = vec_mergel(cospi20_v, cospi12_v); + int16x8_t cospi_p12_m20 = vec_mergel(cospi12_v, cospi20m_v); + int16x8_t cospi_m28_p04 = vec_mergel(cospi28m_v, cospi4_v); + int16x8_t cospi_m12_p20 = vec_mergel(cospi12m_v, cospi20_v); + int16x8_t cospi_p08_p24 = vec_mergel(cospi8_v, cospi24_v); + int16x8_t cospi_p24_m08 = vec_mergel(cospi24_v, cospi8m_v); + int16x8_t cospi_m24_p08 = vec_mergel(cospi24m_v, cospi8_v); + int32x4_t zerov = vec_splat_s32(0); + ROUND_SHIFT_INIT; + + tmp16_0[0] = vec_mergeh(in[15], in[0]); + tmp16_0[1] = vec_mergel(in[15], in[0]); + tmp16_0[2] = vec_mergeh(in[13], in[2]); + tmp16_0[3] = vec_mergel(in[13], in[2]); + tmp16_0[4] = vec_mergeh(in[11], in[4]); + tmp16_0[5] = vec_mergel(in[11], in[4]); + tmp16_0[6] = vec_mergeh(in[9], in[6]); + tmp16_0[7] = vec_mergel(in[9], in[6]); + tmp16_0[8] = vec_mergeh(in[7], in[8]); + tmp16_0[9] = vec_mergel(in[7], in[8]); + tmp16_0[10] = vec_mergeh(in[5], in[10]); + tmp16_0[11] = vec_mergel(in[5], in[10]); + tmp16_0[12] = vec_mergeh(in[3], in[12]); + tmp16_0[13] = vec_mergel(in[3], in[12]); + tmp16_0[14] = vec_mergeh(in[1], in[14]); + tmp16_0[15] = vec_mergel(in[1], in[14]); + + tmp0[0] = vec_msum(tmp16_0[0], cospi_p01_p31, zerov); + tmp0[1] = vec_msum(tmp16_0[1], cospi_p01_p31, zerov); + tmp0[2] = vec_msum(tmp16_0[0], cospi_p31_m01, zerov); + tmp0[3] = vec_msum(tmp16_0[1], cospi_p31_m01, zerov); + tmp0[4] = vec_msum(tmp16_0[2], cospi_p05_p27, zerov); + tmp0[5] = vec_msum(tmp16_0[3], cospi_p05_p27, zerov); + tmp0[6] = vec_msum(tmp16_0[2], cospi_p27_m05, zerov); + tmp0[7] = vec_msum(tmp16_0[3], cospi_p27_m05, zerov); + tmp0[8] = vec_msum(tmp16_0[4], cospi_p09_p23, zerov); + tmp0[9] = vec_msum(tmp16_0[5], cospi_p09_p23, zerov); + tmp0[10] = vec_msum(tmp16_0[4], cospi_p23_m09, zerov); + tmp0[11] = vec_msum(tmp16_0[5], cospi_p23_m09, zerov); + tmp0[12] = vec_msum(tmp16_0[6], cospi_p13_p19, zerov); + tmp0[13] = vec_msum(tmp16_0[7], cospi_p13_p19, zerov); + tmp0[14] = vec_msum(tmp16_0[6], cospi_p19_m13, zerov); + tmp0[15] = vec_msum(tmp16_0[7], cospi_p19_m13, zerov); + tmp0[16] = vec_msum(tmp16_0[8], cospi_p17_p15, zerov); + tmp0[17] = vec_msum(tmp16_0[9], cospi_p17_p15, zerov); + tmp0[18] = vec_msum(tmp16_0[8], cospi_p15_m17, zerov); + tmp0[19] = vec_msum(tmp16_0[9], cospi_p15_m17, zerov); + tmp0[20] = vec_msum(tmp16_0[10], cospi_p21_p11, zerov); + tmp0[21] = vec_msum(tmp16_0[11], cospi_p21_p11, zerov); + tmp0[22] = vec_msum(tmp16_0[10], cospi_p11_m21, zerov); + tmp0[23] = vec_msum(tmp16_0[11], cospi_p11_m21, zerov); + tmp0[24] = vec_msum(tmp16_0[12], cospi_p25_p07, zerov); + tmp0[25] = vec_msum(tmp16_0[13], cospi_p25_p07, zerov); + tmp0[26] = vec_msum(tmp16_0[12], cospi_p07_m25, zerov); + tmp0[27] = vec_msum(tmp16_0[13], cospi_p07_m25, zerov); + tmp0[28] = vec_msum(tmp16_0[14], cospi_p29_p03, zerov); + tmp0[29] = vec_msum(tmp16_0[15], cospi_p29_p03, zerov); + tmp0[30] = vec_msum(tmp16_0[14], cospi_p03_m29, zerov); + tmp0[31] = vec_msum(tmp16_0[15], cospi_p03_m29, zerov); + + tmp1[0] = vec_add(tmp0[0], tmp0[16]); + tmp1[1] = vec_add(tmp0[1], tmp0[17]); + tmp1[2] = vec_add(tmp0[2], tmp0[18]); + tmp1[3] = vec_add(tmp0[3], tmp0[19]); + tmp1[4] = vec_add(tmp0[4], tmp0[20]); + tmp1[5] = vec_add(tmp0[5], tmp0[21]); + tmp1[6] = vec_add(tmp0[6], tmp0[22]); + tmp1[7] = vec_add(tmp0[7], tmp0[23]); + tmp1[8] = vec_add(tmp0[8], tmp0[24]); + tmp1[9] = vec_add(tmp0[9], tmp0[25]); + tmp1[10] = vec_add(tmp0[10], tmp0[26]); + tmp1[11] = vec_add(tmp0[11], tmp0[27]); + tmp1[12] = vec_add(tmp0[12], tmp0[28]); + tmp1[13] = vec_add(tmp0[13], tmp0[29]); + tmp1[14] = vec_add(tmp0[14], tmp0[30]); + tmp1[15] = vec_add(tmp0[15], tmp0[31]); + tmp1[16] = vec_sub(tmp0[0], tmp0[16]); + tmp1[17] = vec_sub(tmp0[1], tmp0[17]); + tmp1[18] = vec_sub(tmp0[2], tmp0[18]); + tmp1[19] = vec_sub(tmp0[3], tmp0[19]); + tmp1[20] = vec_sub(tmp0[4], tmp0[20]); + tmp1[21] = vec_sub(tmp0[5], tmp0[21]); + tmp1[22] = vec_sub(tmp0[6], tmp0[22]); + tmp1[23] = vec_sub(tmp0[7], tmp0[23]); + tmp1[24] = vec_sub(tmp0[8], tmp0[24]); + tmp1[25] = vec_sub(tmp0[9], tmp0[25]); + tmp1[26] = vec_sub(tmp0[10], tmp0[26]); + tmp1[27] = vec_sub(tmp0[11], tmp0[27]); + tmp1[28] = vec_sub(tmp0[12], tmp0[28]); + tmp1[29] = vec_sub(tmp0[13], tmp0[29]); + tmp1[30] = vec_sub(tmp0[14], tmp0[30]); + tmp1[31] = vec_sub(tmp0[15], tmp0[31]); + + DCT_CONST_ROUND_SHIFT(tmp1[0]); + DCT_CONST_ROUND_SHIFT(tmp1[1]); + DCT_CONST_ROUND_SHIFT(tmp1[2]); + DCT_CONST_ROUND_SHIFT(tmp1[3]); + DCT_CONST_ROUND_SHIFT(tmp1[4]); + DCT_CONST_ROUND_SHIFT(tmp1[5]); + DCT_CONST_ROUND_SHIFT(tmp1[6]); + DCT_CONST_ROUND_SHIFT(tmp1[7]); + DCT_CONST_ROUND_SHIFT(tmp1[8]); + DCT_CONST_ROUND_SHIFT(tmp1[9]); + DCT_CONST_ROUND_SHIFT(tmp1[10]); + DCT_CONST_ROUND_SHIFT(tmp1[11]); + DCT_CONST_ROUND_SHIFT(tmp1[12]); + DCT_CONST_ROUND_SHIFT(tmp1[13]); + DCT_CONST_ROUND_SHIFT(tmp1[14]); + DCT_CONST_ROUND_SHIFT(tmp1[15]); + DCT_CONST_ROUND_SHIFT(tmp1[16]); + DCT_CONST_ROUND_SHIFT(tmp1[17]); + DCT_CONST_ROUND_SHIFT(tmp1[18]); + DCT_CONST_ROUND_SHIFT(tmp1[19]); + DCT_CONST_ROUND_SHIFT(tmp1[20]); + DCT_CONST_ROUND_SHIFT(tmp1[21]); + DCT_CONST_ROUND_SHIFT(tmp1[22]); + DCT_CONST_ROUND_SHIFT(tmp1[23]); + DCT_CONST_ROUND_SHIFT(tmp1[24]); + DCT_CONST_ROUND_SHIFT(tmp1[25]); + DCT_CONST_ROUND_SHIFT(tmp1[26]); + DCT_CONST_ROUND_SHIFT(tmp1[27]); + DCT_CONST_ROUND_SHIFT(tmp1[28]); + DCT_CONST_ROUND_SHIFT(tmp1[29]); + DCT_CONST_ROUND_SHIFT(tmp1[30]); + DCT_CONST_ROUND_SHIFT(tmp1[31]); + + in[0] = vec_packs(tmp1[0], tmp1[1]); + in[1] = vec_packs(tmp1[2], tmp1[3]); + in[2] = vec_packs(tmp1[4], tmp1[5]); + in[3] = vec_packs(tmp1[6], tmp1[7]); + in[4] = vec_packs(tmp1[8], tmp1[9]); + in[5] = vec_packs(tmp1[10], tmp1[11]); + in[6] = vec_packs(tmp1[12], tmp1[13]); + in[7] = vec_packs(tmp1[14], tmp1[15]); + in[8] = vec_packs(tmp1[16], tmp1[17]); + in[9] = vec_packs(tmp1[18], tmp1[19]); + in[10] = vec_packs(tmp1[20], tmp1[21]); + in[11] = vec_packs(tmp1[22], tmp1[23]); + in[12] = vec_packs(tmp1[24], tmp1[25]); + in[13] = vec_packs(tmp1[26], tmp1[27]); + in[14] = vec_packs(tmp1[28], tmp1[29]); + in[15] = vec_packs(tmp1[30], tmp1[31]); + + // stage 2 + tmp16_0[0] = vec_mergeh(in[8], in[9]); + tmp16_0[1] = vec_mergel(in[8], in[9]); + tmp16_0[2] = vec_mergeh(in[10], in[11]); + tmp16_0[3] = vec_mergel(in[10], in[11]); + tmp16_0[4] = vec_mergeh(in[12], in[13]); + tmp16_0[5] = vec_mergel(in[12], in[13]); + tmp16_0[6] = vec_mergeh(in[14], in[15]); + tmp16_0[7] = vec_mergel(in[14], in[15]); + + tmp0[0] = vec_msum(tmp16_0[0], cospi_p04_p28, zerov); + tmp0[1] = vec_msum(tmp16_0[1], cospi_p04_p28, zerov); + tmp0[2] = vec_msum(tmp16_0[0], cospi_p28_m04, zerov); + tmp0[3] = vec_msum(tmp16_0[1], cospi_p28_m04, zerov); + tmp0[4] = vec_msum(tmp16_0[2], cospi_p20_p12, zerov); + tmp0[5] = vec_msum(tmp16_0[3], cospi_p20_p12, zerov); + tmp0[6] = vec_msum(tmp16_0[2], cospi_p12_m20, zerov); + tmp0[7] = vec_msum(tmp16_0[3], cospi_p12_m20, zerov); + tmp0[8] = vec_msum(tmp16_0[4], cospi_m28_p04, zerov); + tmp0[9] = vec_msum(tmp16_0[5], cospi_m28_p04, zerov); + tmp0[10] = vec_msum(tmp16_0[4], cospi_p04_p28, zerov); + tmp0[11] = vec_msum(tmp16_0[5], cospi_p04_p28, zerov); + tmp0[12] = vec_msum(tmp16_0[6], cospi_m12_p20, zerov); + tmp0[13] = vec_msum(tmp16_0[7], cospi_m12_p20, zerov); + tmp0[14] = vec_msum(tmp16_0[6], cospi_p20_p12, zerov); + tmp0[15] = vec_msum(tmp16_0[7], cospi_p20_p12, zerov); + + tmp1[0] = vec_add(tmp0[0], tmp0[8]); + tmp1[1] = vec_add(tmp0[1], tmp0[9]); + tmp1[2] = vec_add(tmp0[2], tmp0[10]); + tmp1[3] = vec_add(tmp0[3], tmp0[11]); + tmp1[4] = vec_add(tmp0[4], tmp0[12]); + tmp1[5] = vec_add(tmp0[5], tmp0[13]); + tmp1[6] = vec_add(tmp0[6], tmp0[14]); + tmp1[7] = vec_add(tmp0[7], tmp0[15]); + tmp1[8] = vec_sub(tmp0[0], tmp0[8]); + tmp1[9] = vec_sub(tmp0[1], tmp0[9]); + tmp1[10] = vec_sub(tmp0[2], tmp0[10]); + tmp1[11] = vec_sub(tmp0[3], tmp0[11]); + tmp1[12] = vec_sub(tmp0[4], tmp0[12]); + tmp1[13] = vec_sub(tmp0[5], tmp0[13]); + tmp1[14] = vec_sub(tmp0[6], tmp0[14]); + tmp1[15] = vec_sub(tmp0[7], tmp0[15]); + + DCT_CONST_ROUND_SHIFT(tmp1[0]); + DCT_CONST_ROUND_SHIFT(tmp1[1]); + DCT_CONST_ROUND_SHIFT(tmp1[2]); + DCT_CONST_ROUND_SHIFT(tmp1[3]); + DCT_CONST_ROUND_SHIFT(tmp1[4]); + DCT_CONST_ROUND_SHIFT(tmp1[5]); + DCT_CONST_ROUND_SHIFT(tmp1[6]); + DCT_CONST_ROUND_SHIFT(tmp1[7]); + DCT_CONST_ROUND_SHIFT(tmp1[8]); + DCT_CONST_ROUND_SHIFT(tmp1[9]); + DCT_CONST_ROUND_SHIFT(tmp1[10]); + DCT_CONST_ROUND_SHIFT(tmp1[11]); + DCT_CONST_ROUND_SHIFT(tmp1[12]); + DCT_CONST_ROUND_SHIFT(tmp1[13]); + DCT_CONST_ROUND_SHIFT(tmp1[14]); + DCT_CONST_ROUND_SHIFT(tmp1[15]); + + tmp16_0[0] = vec_add(in[0], in[4]); + tmp16_0[1] = vec_add(in[1], in[5]); + tmp16_0[2] = vec_add(in[2], in[6]); + tmp16_0[3] = vec_add(in[3], in[7]); + tmp16_0[4] = vec_sub(in[0], in[4]); + tmp16_0[5] = vec_sub(in[1], in[5]); + tmp16_0[6] = vec_sub(in[2], in[6]); + tmp16_0[7] = vec_sub(in[3], in[7]); + tmp16_0[8] = vec_packs(tmp1[0], tmp1[1]); + tmp16_0[9] = vec_packs(tmp1[2], tmp1[3]); + tmp16_0[10] = vec_packs(tmp1[4], tmp1[5]); + tmp16_0[11] = vec_packs(tmp1[6], tmp1[7]); + tmp16_0[12] = vec_packs(tmp1[8], tmp1[9]); + tmp16_0[13] = vec_packs(tmp1[10], tmp1[11]); + tmp16_0[14] = vec_packs(tmp1[12], tmp1[13]); + tmp16_0[15] = vec_packs(tmp1[14], tmp1[15]); + + // stage 3 + in[0] = vec_mergeh(tmp16_0[4], tmp16_0[5]); + in[1] = vec_mergel(tmp16_0[4], tmp16_0[5]); + in[2] = vec_mergeh(tmp16_0[6], tmp16_0[7]); + in[3] = vec_mergel(tmp16_0[6], tmp16_0[7]); + in[4] = vec_mergeh(tmp16_0[12], tmp16_0[13]); + in[5] = vec_mergel(tmp16_0[12], tmp16_0[13]); + in[6] = vec_mergeh(tmp16_0[14], tmp16_0[15]); + in[7] = vec_mergel(tmp16_0[14], tmp16_0[15]); + + tmp0[0] = vec_msum(in[0], cospi_p08_p24, zerov); + tmp0[1] = vec_msum(in[1], cospi_p08_p24, zerov); + tmp0[2] = vec_msum(in[0], cospi_p24_m08, zerov); + tmp0[3] = vec_msum(in[1], cospi_p24_m08, zerov); + tmp0[4] = vec_msum(in[2], cospi_m24_p08, zerov); + tmp0[5] = vec_msum(in[3], cospi_m24_p08, zerov); + tmp0[6] = vec_msum(in[2], cospi_p08_p24, zerov); + tmp0[7] = vec_msum(in[3], cospi_p08_p24, zerov); + tmp0[8] = vec_msum(in[4], cospi_p08_p24, zerov); + tmp0[9] = vec_msum(in[5], cospi_p08_p24, zerov); + tmp0[10] = vec_msum(in[4], cospi_p24_m08, zerov); + tmp0[11] = vec_msum(in[5], cospi_p24_m08, zerov); + tmp0[12] = vec_msum(in[6], cospi_m24_p08, zerov); + tmp0[13] = vec_msum(in[7], cospi_m24_p08, zerov); + tmp0[14] = vec_msum(in[6], cospi_p08_p24, zerov); + tmp0[15] = vec_msum(in[7], cospi_p08_p24, zerov); + + tmp1[0] = vec_add(tmp0[0], tmp0[4]); + tmp1[1] = vec_add(tmp0[1], tmp0[5]); + tmp1[2] = vec_add(tmp0[2], tmp0[6]); + tmp1[3] = vec_add(tmp0[3], tmp0[7]); + tmp1[4] = vec_sub(tmp0[0], tmp0[4]); + tmp1[5] = vec_sub(tmp0[1], tmp0[5]); + tmp1[6] = vec_sub(tmp0[2], tmp0[6]); + tmp1[7] = vec_sub(tmp0[3], tmp0[7]); + tmp1[8] = vec_add(tmp0[8], tmp0[12]); + tmp1[9] = vec_add(tmp0[9], tmp0[13]); + tmp1[10] = vec_add(tmp0[10], tmp0[14]); + tmp1[11] = vec_add(tmp0[11], tmp0[15]); + tmp1[12] = vec_sub(tmp0[8], tmp0[12]); + tmp1[13] = vec_sub(tmp0[9], tmp0[13]); + tmp1[14] = vec_sub(tmp0[10], tmp0[14]); + tmp1[15] = vec_sub(tmp0[11], tmp0[15]); + + DCT_CONST_ROUND_SHIFT(tmp1[0]); + DCT_CONST_ROUND_SHIFT(tmp1[1]); + DCT_CONST_ROUND_SHIFT(tmp1[2]); + DCT_CONST_ROUND_SHIFT(tmp1[3]); + DCT_CONST_ROUND_SHIFT(tmp1[4]); + DCT_CONST_ROUND_SHIFT(tmp1[5]); + DCT_CONST_ROUND_SHIFT(tmp1[6]); + DCT_CONST_ROUND_SHIFT(tmp1[7]); + DCT_CONST_ROUND_SHIFT(tmp1[8]); + DCT_CONST_ROUND_SHIFT(tmp1[9]); + DCT_CONST_ROUND_SHIFT(tmp1[10]); + DCT_CONST_ROUND_SHIFT(tmp1[11]); + DCT_CONST_ROUND_SHIFT(tmp1[12]); + DCT_CONST_ROUND_SHIFT(tmp1[13]); + DCT_CONST_ROUND_SHIFT(tmp1[14]); + DCT_CONST_ROUND_SHIFT(tmp1[15]); + + in[0] = vec_add(tmp16_0[0], tmp16_0[2]); + in[1] = vec_add(tmp16_0[1], tmp16_0[3]); + in[2] = vec_sub(tmp16_0[0], tmp16_0[2]); + in[3] = vec_sub(tmp16_0[1], tmp16_0[3]); + in[4] = vec_packs(tmp1[0], tmp1[1]); + in[5] = vec_packs(tmp1[2], tmp1[3]); + in[6] = vec_packs(tmp1[4], tmp1[5]); + in[7] = vec_packs(tmp1[6], tmp1[7]); + in[8] = vec_add(tmp16_0[8], tmp16_0[10]); + in[9] = vec_add(tmp16_0[9], tmp16_0[11]); + in[10] = vec_sub(tmp16_0[8], tmp16_0[10]); + in[11] = vec_sub(tmp16_0[9], tmp16_0[11]); + in[12] = vec_packs(tmp1[8], tmp1[9]); + in[13] = vec_packs(tmp1[10], tmp1[11]); + in[14] = vec_packs(tmp1[12], tmp1[13]); + in[15] = vec_packs(tmp1[14], tmp1[15]); + + // stage 4 + out[0] = vec_mergeh(in[2], in[3]); + out[1] = vec_mergel(in[2], in[3]); + out[2] = vec_mergeh(in[6], in[7]); + out[3] = vec_mergel(in[6], in[7]); + out[4] = vec_mergeh(in[10], in[11]); + out[5] = vec_mergel(in[10], in[11]); + out[6] = vec_mergeh(in[14], in[15]); + out[7] = vec_mergel(in[14], in[15]); +} + +void vpx_iadst16_vsx(int16x8_t *src0, int16x8_t *src1) { + int16x8_t tmp0[16], tmp1[16], tmp2[8]; + int32x4_t tmp3, tmp4; + int16x8_t zero16v = vec_splat_s16(0); + int32x4_t zerov = vec_splat_s32(0); + int16x8_t cospi_p16_m16 = vec_mergel(cospi16_v, cospi16m_v); + int16x8_t cospi_m16_p16 = vec_mergel(cospi16m_v, cospi16_v); + ROUND_SHIFT_INIT; + + TRANSPOSE8x8(src0[0], src0[2], src0[4], src0[6], src0[8], src0[10], src0[12], + src0[14], tmp0[0], tmp0[1], tmp0[2], tmp0[3], tmp0[4], tmp0[5], + tmp0[6], tmp0[7]); + TRANSPOSE8x8(src1[0], src1[2], src1[4], src1[6], src1[8], src1[10], src1[12], + src1[14], tmp1[0], tmp1[1], tmp1[2], tmp1[3], tmp1[4], tmp1[5], + tmp1[6], tmp1[7]); + TRANSPOSE8x8(src0[1], src0[3], src0[5], src0[7], src0[9], src0[11], src0[13], + src0[15], tmp0[8], tmp0[9], tmp0[10], tmp0[11], tmp0[12], + tmp0[13], tmp0[14], tmp0[15]); + TRANSPOSE8x8(src1[1], src1[3], src1[5], src1[7], src1[9], src1[11], src1[13], + src1[15], tmp1[8], tmp1[9], tmp1[10], tmp1[11], tmp1[12], + tmp1[13], tmp1[14], tmp1[15]); + + iadst16x8_vsx(tmp0, tmp2); + IADST_WRAPLOW(tmp2[0], tmp2[1], tmp3, tmp4, src0[14], cospi16m_v); + IADST_WRAPLOW(tmp2[0], tmp2[1], tmp3, tmp4, src1[0], cospi_p16_m16); + IADST_WRAPLOW(tmp2[2], tmp2[3], tmp3, tmp4, src0[8], cospi16_v); + IADST_WRAPLOW(tmp2[2], tmp2[3], tmp3, tmp4, src1[6], cospi_m16_p16); + IADST_WRAPLOW(tmp2[4], tmp2[5], tmp3, tmp4, src0[12], cospi16_v); + IADST_WRAPLOW(tmp2[4], tmp2[5], tmp3, tmp4, src1[2], cospi_m16_p16); + IADST_WRAPLOW(tmp2[6], tmp2[7], tmp3, tmp4, src0[10], cospi16m_v); + IADST_WRAPLOW(tmp2[6], tmp2[7], tmp3, tmp4, src1[4], cospi_p16_m16); + + src0[0] = tmp0[0]; + src0[2] = vec_sub(zero16v, tmp0[8]); + src0[4] = tmp0[12]; + src0[6] = vec_sub(zero16v, tmp0[4]); + src1[8] = tmp0[5]; + src1[10] = vec_sub(zero16v, tmp0[13]); + src1[12] = tmp0[9]; + src1[14] = vec_sub(zero16v, tmp0[1]); + + iadst16x8_vsx(tmp1, tmp2); + IADST_WRAPLOW(tmp2[0], tmp2[1], tmp3, tmp4, src0[15], cospi16m_v); + IADST_WRAPLOW(tmp2[0], tmp2[1], tmp3, tmp4, src1[1], cospi_p16_m16); + IADST_WRAPLOW(tmp2[2], tmp2[3], tmp3, tmp4, src0[9], cospi16_v); + IADST_WRAPLOW(tmp2[2], tmp2[3], tmp3, tmp4, src1[7], cospi_m16_p16); + IADST_WRAPLOW(tmp2[4], tmp2[5], tmp3, tmp4, src0[13], cospi16_v); + IADST_WRAPLOW(tmp2[4], tmp2[5], tmp3, tmp4, src1[3], cospi_m16_p16); + IADST_WRAPLOW(tmp2[6], tmp2[7], tmp3, tmp4, src0[11], cospi16m_v); + IADST_WRAPLOW(tmp2[6], tmp2[7], tmp3, tmp4, src1[5], cospi_p16_m16); + + src0[1] = tmp1[0]; + src0[3] = vec_sub(zero16v, tmp1[8]); + src0[5] = tmp1[12]; + src0[7] = vec_sub(zero16v, tmp1[4]); + src1[9] = tmp1[5]; + src1[11] = vec_sub(zero16v, tmp1[13]); + src1[13] = tmp1[9]; + src1[15] = vec_sub(zero16v, tmp1[1]); +} diff --git a/media/libvpx/libvpx/vpx_dsp/ppc/inv_txfm_vsx.h b/media/libvpx/libvpx/vpx_dsp/ppc/inv_txfm_vsx.h new file mode 100644 index 000000000000..7031742c1c91 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/ppc/inv_txfm_vsx.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_DSP_PPC_INV_TXFM_VSX_H_ +#define VPX_VPX_DSP_PPC_INV_TXFM_VSX_H_ + +#include "vpx_dsp/ppc/types_vsx.h" + +void vpx_round_store4x4_vsx(int16x8_t *in, int16x8_t *out, uint8_t *dest, + int stride); +void vpx_idct4_vsx(int16x8_t *in, int16x8_t *out); +void vp9_iadst4_vsx(int16x8_t *in, int16x8_t *out); + +void vpx_round_store8x8_vsx(int16x8_t *in, uint8_t *dest, int stride); +void vpx_idct8_vsx(int16x8_t *in, int16x8_t *out); +void vp9_iadst8_vsx(int16x8_t *in, int16x8_t *out); + +#define LOAD_INPUT16(load, source, offset, step, in) \ + in[0] = load(offset, source); \ + in[1] = load((step) + (offset), source); \ + in[2] = load(2 * (step) + (offset), source); \ + in[3] = load(3 * (step) + (offset), source); \ + in[4] = load(4 * (step) + (offset), source); \ + in[5] = load(5 * (step) + (offset), source); \ + in[6] = load(6 * (step) + (offset), source); \ + in[7] = load(7 * (step) + (offset), source); \ + in[8] = load(8 * (step) + (offset), source); \ + in[9] = load(9 * (step) + (offset), source); \ + in[10] = load(10 * (step) + (offset), source); \ + in[11] = load(11 * (step) + (offset), source); \ + in[12] = load(12 * (step) + (offset), source); \ + in[13] = load(13 * (step) + (offset), source); \ + in[14] = load(14 * (step) + (offset), source); \ + in[15] = load(15 * (step) + (offset), source); + +void vpx_round_store16x16_vsx(int16x8_t *src0, int16x8_t *src1, uint8_t *dest, + int stride); +void vpx_idct16_vsx(int16x8_t *src0, int16x8_t *src1); +void vpx_iadst16_vsx(int16x8_t *src0, int16x8_t *src1); + +#endif // VPX_VPX_DSP_PPC_INV_TXFM_VSX_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/ppc/quantize_vsx.c b/media/libvpx/libvpx/vpx_dsp/ppc/quantize_vsx.c new file mode 100644 index 000000000000..d85e63bd1486 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/ppc/quantize_vsx.c @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/ppc/types_vsx.h" + +// Negate 16-bit integers in a when the corresponding signed 16-bit +// integer in b is negative. +static INLINE int16x8_t vec_sign(int16x8_t a, int16x8_t b) { + const int16x8_t mask = vec_sra(b, vec_shift_sign_s16); + return vec_xor(vec_add(a, mask), mask); +} + +// Sets the value of a 32-bit integers to 1 when the corresponding value in a is +// negative. +static INLINE int32x4_t vec_is_neg(int32x4_t a) { + return vec_sr(a, vec_shift_sign_s32); +} + +// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit +// integers, and return the high 16 bits of the intermediate integers. +// (a * b) >> 16 +static INLINE int16x8_t vec_mulhi(int16x8_t a, int16x8_t b) { + // madds does ((A * B) >>15) + C, we need >> 16, so we perform an extra right + // shift. + return vec_sra(vec_madds(a, b, vec_zeros_s16), vec_ones_u16); +} + +// Quantization function used for 4x4, 8x8 and 16x16 blocks. +static INLINE int16x8_t quantize_coeff(int16x8_t coeff, int16x8_t coeff_abs, + int16x8_t round, int16x8_t quant, + int16x8_t quant_shift, bool16x8_t mask) { + const int16x8_t rounded = vec_vaddshs(coeff_abs, round); + int16x8_t qcoeff = vec_mulhi(rounded, quant); + qcoeff = vec_add(qcoeff, rounded); + qcoeff = vec_mulhi(qcoeff, quant_shift); + qcoeff = vec_sign(qcoeff, coeff); + return vec_and(qcoeff, mask); +} + +// Quantization function used for 32x32 blocks. +static INLINE int16x8_t quantize_coeff_32(int16x8_t coeff, int16x8_t coeff_abs, + int16x8_t round, int16x8_t quant, + int16x8_t quant_shift, + bool16x8_t mask) { + const int16x8_t rounded = vec_vaddshs(coeff_abs, round); + int16x8_t qcoeff = vec_mulhi(rounded, quant); + qcoeff = vec_add(qcoeff, rounded); + // 32x32 blocks require an extra multiplication by 2, this compensates for the + // extra right shift added in vec_mulhi, as such vec_madds can be used + // directly instead of vec_mulhi (((a * b) >> 15) >> 1) << 1 == (a * b >> 15) + qcoeff = vec_madds(qcoeff, quant_shift, vec_zeros_s16); + qcoeff = vec_sign(qcoeff, coeff); + return vec_and(qcoeff, mask); +} + +// DeQuantization function used for 32x32 blocks. Quantized coeff of 32x32 +// blocks are twice as big as for other block sizes. As such, using +// vec_mladd results in overflow. +static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff, + int16x8_t dequant) { + int32x4_t dqcoeffe = vec_mule(qcoeff, dequant); + int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant); + // Add 1 if negative to round towards zero because the C uses division. + dqcoeffe = vec_add(dqcoeffe, vec_is_neg(dqcoeffe)); + dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo)); + dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32); + dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32); + return (int16x8_t)vec_perm(dqcoeffe, dqcoeffo, vec_perm_odd_even_pack); +} + +static INLINE int16x8_t nonzero_scanindex(int16x8_t qcoeff, bool16x8_t mask, + const int16_t *iscan_ptr, int index) { + int16x8_t scan = vec_vsx_ld(index, iscan_ptr); + bool16x8_t zero_coeff = vec_cmpeq(qcoeff, vec_zeros_s16); + scan = vec_sub(scan, mask); + return vec_andc(scan, zero_coeff); +} + +// Compare packed 16-bit integers across a, and return the maximum value in +// every element. Returns a vector containing the biggest value across vector a. +static INLINE int16x8_t vec_max_across(int16x8_t a) { + a = vec_max(a, vec_perm(a, a, vec_perm64)); + a = vec_max(a, vec_perm(a, a, vec_perm32)); + return vec_max(a, vec_perm(a, a, vec_perm16)); +} + +void vpx_quantize_b_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan_ptr, + const int16_t *iscan_ptr) { + int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob; + bool16x8_t zero_mask0, zero_mask1; + + // First set of 8 coeff starts with DC + 7 AC + int16x8_t zbin = vec_vsx_ld(0, zbin_ptr); + int16x8_t round = vec_vsx_ld(0, round_ptr); + int16x8_t quant = vec_vsx_ld(0, quant_ptr); + int16x8_t dequant = vec_vsx_ld(0, dequant_ptr); + int16x8_t quant_shift = vec_vsx_ld(0, quant_shift_ptr); + + int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr); + int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr); + + int16x8_t coeff0_abs = vec_abs(coeff0); + int16x8_t coeff1_abs = vec_abs(coeff1); + + zero_mask0 = vec_cmpge(coeff0_abs, zbin); + zbin = vec_splat(zbin, 1); + zero_mask1 = vec_cmpge(coeff1_abs, zbin); + + (void)scan_ptr; + (void)skip_block; + assert(!skip_block); + + qcoeff0 = + quantize_coeff(coeff0, coeff0_abs, round, quant, quant_shift, zero_mask0); + vec_vsx_st(qcoeff0, 0, qcoeff_ptr); + round = vec_splat(round, 1); + quant = vec_splat(quant, 1); + quant_shift = vec_splat(quant_shift, 1); + qcoeff1 = + quantize_coeff(coeff1, coeff1_abs, round, quant, quant_shift, zero_mask1); + vec_vsx_st(qcoeff1, 16, qcoeff_ptr); + + dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); + vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr); + dequant = vec_splat(dequant, 1); + dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); + vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr); + + eob = vec_max(nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, 0), + nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, 16)); + + if (n_coeffs > 16) { + int index = 16; + int off0 = 32; + int off1 = 48; + int off2 = 64; + do { + int16x8_t coeff2, coeff2_abs, qcoeff2, dqcoeff2, eob2; + bool16x8_t zero_mask2; + coeff0 = vec_vsx_ld(off0, coeff_ptr); + coeff1 = vec_vsx_ld(off1, coeff_ptr); + coeff2 = vec_vsx_ld(off2, coeff_ptr); + coeff0_abs = vec_abs(coeff0); + coeff1_abs = vec_abs(coeff1); + coeff2_abs = vec_abs(coeff2); + zero_mask0 = vec_cmpge(coeff0_abs, zbin); + zero_mask1 = vec_cmpge(coeff1_abs, zbin); + zero_mask2 = vec_cmpge(coeff2_abs, zbin); + qcoeff0 = quantize_coeff(coeff0, coeff0_abs, round, quant, quant_shift, + zero_mask0); + qcoeff1 = quantize_coeff(coeff1, coeff1_abs, round, quant, quant_shift, + zero_mask1); + qcoeff2 = quantize_coeff(coeff2, coeff2_abs, round, quant, quant_shift, + zero_mask2); + vec_vsx_st(qcoeff0, off0, qcoeff_ptr); + vec_vsx_st(qcoeff1, off1, qcoeff_ptr); + vec_vsx_st(qcoeff2, off2, qcoeff_ptr); + + dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); + dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); + dqcoeff2 = vec_mladd(qcoeff2, dequant, vec_zeros_s16); + + vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr); + vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr); + vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr); + + eob = + vec_max(eob, nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, off0)); + eob2 = vec_max(nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, off1), + nonzero_scanindex(qcoeff2, zero_mask2, iscan_ptr, off2)); + eob = vec_max(eob, eob2); + + index += 24; + off0 += 48; + off1 += 48; + off2 += 48; + } while (index < n_coeffs); + } + + eob = vec_max_across(eob); + *eob_ptr = eob[0]; +} + +void vpx_quantize_b_32x32_vsx( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, + const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan_ptr, const int16_t *iscan_ptr) { + // In stage 1, we quantize 16 coeffs (DC + 15 AC) + // In stage 2, we loop 42 times and quantize 24 coeffs per iteration + // (32 * 32 - 16) / 24 = 42 + int num_itr = 42; + // Offsets are in bytes, 16 coeffs = 32 bytes + int off0 = 32; + int off1 = 48; + int off2 = 64; + + int16x8_t qcoeff0, qcoeff1, eob; + bool16x8_t zero_mask0, zero_mask1; + + int16x8_t zbin = vec_vsx_ld(0, zbin_ptr); + int16x8_t round = vec_vsx_ld(0, round_ptr); + int16x8_t quant = vec_vsx_ld(0, quant_ptr); + int16x8_t dequant = vec_vsx_ld(0, dequant_ptr); + int16x8_t quant_shift = vec_vsx_ld(0, quant_shift_ptr); + + int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr); + int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr); + + int16x8_t coeff0_abs = vec_abs(coeff0); + int16x8_t coeff1_abs = vec_abs(coeff1); + + (void)scan_ptr; + (void)skip_block; + (void)n_coeffs; + assert(!skip_block); + + // 32x32 quantization requires that zbin and round be divided by 2 + zbin = vec_sra(vec_add(zbin, vec_ones_s16), vec_ones_u16); + round = vec_sra(vec_add(round, vec_ones_s16), vec_ones_u16); + + zero_mask0 = vec_cmpge(coeff0_abs, zbin); + zbin = vec_splat(zbin, 1); // remove DC from zbin + zero_mask1 = vec_cmpge(coeff1_abs, zbin); + + qcoeff0 = quantize_coeff_32(coeff0, coeff0_abs, round, quant, quant_shift, + zero_mask0); + round = vec_splat(round, 1); // remove DC from round + quant = vec_splat(quant, 1); // remove DC from quant + quant_shift = vec_splat(quant_shift, 1); // remove DC from quant_shift + qcoeff1 = quantize_coeff_32(coeff1, coeff1_abs, round, quant, quant_shift, + zero_mask1); + + vec_vsx_st(qcoeff0, 0, qcoeff_ptr); + vec_vsx_st(qcoeff1, 16, qcoeff_ptr); + + vec_vsx_st(dequantize_coeff_32(qcoeff0, dequant), 0, dqcoeff_ptr); + dequant = vec_splat(dequant, 1); // remove DC from dequant + vec_vsx_st(dequantize_coeff_32(qcoeff1, dequant), 16, dqcoeff_ptr); + + eob = vec_max(nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, 0), + nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, 16)); + + do { + int16x8_t coeff2, coeff2_abs, qcoeff2, eob2; + bool16x8_t zero_mask2; + + coeff0 = vec_vsx_ld(off0, coeff_ptr); + coeff1 = vec_vsx_ld(off1, coeff_ptr); + coeff2 = vec_vsx_ld(off2, coeff_ptr); + + coeff0_abs = vec_abs(coeff0); + coeff1_abs = vec_abs(coeff1); + coeff2_abs = vec_abs(coeff2); + + zero_mask0 = vec_cmpge(coeff0_abs, zbin); + zero_mask1 = vec_cmpge(coeff1_abs, zbin); + zero_mask2 = vec_cmpge(coeff2_abs, zbin); + + qcoeff0 = quantize_coeff_32(coeff0, coeff0_abs, round, quant, quant_shift, + zero_mask0); + qcoeff1 = quantize_coeff_32(coeff1, coeff1_abs, round, quant, quant_shift, + zero_mask1); + qcoeff2 = quantize_coeff_32(coeff2, coeff2_abs, round, quant, quant_shift, + zero_mask2); + + vec_vsx_st(qcoeff0, off0, qcoeff_ptr); + vec_vsx_st(qcoeff1, off1, qcoeff_ptr); + vec_vsx_st(qcoeff2, off2, qcoeff_ptr); + + vec_vsx_st(dequantize_coeff_32(qcoeff0, dequant), off0, dqcoeff_ptr); + vec_vsx_st(dequantize_coeff_32(qcoeff1, dequant), off1, dqcoeff_ptr); + vec_vsx_st(dequantize_coeff_32(qcoeff2, dequant), off2, dqcoeff_ptr); + + eob = vec_max(eob, nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, off0)); + eob2 = vec_max(nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, off1), + nonzero_scanindex(qcoeff2, zero_mask2, iscan_ptr, off2)); + eob = vec_max(eob, eob2); + + // 24 int16_t is 48 bytes + off0 += 48; + off1 += 48; + off2 += 48; + num_itr--; + } while (num_itr != 0); + + eob = vec_max_across(eob); + *eob_ptr = eob[0]; +} diff --git a/media/libvpx/libvpx/vpx_dsp/ppc/sad_vsx.c b/media/libvpx/libvpx/vpx_dsp/ppc/sad_vsx.c index bb49addae179..a08ae12413b6 100644 --- a/media/libvpx/libvpx/vpx_dsp/ppc/sad_vsx.c +++ b/media/libvpx/libvpx/vpx_dsp/ppc/sad_vsx.c @@ -17,71 +17,75 @@ #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" -#define PROCESS16(offset) \ - v_a = vec_vsx_ld(offset, a); \ - v_b = vec_vsx_ld(offset, b); \ - v_ah = unpack_to_s16_h(v_a); \ - v_al = unpack_to_s16_l(v_a); \ - v_bh = unpack_to_s16_h(v_b); \ - v_bl = unpack_to_s16_l(v_b); \ - v_subh = vec_sub(v_ah, v_bh); \ - v_subl = vec_sub(v_al, v_bl); \ - v_absh = vec_abs(v_subh); \ - v_absl = vec_abs(v_subl); \ - v_sad = vec_sum4s(v_absh, v_sad); \ - v_sad = vec_sum4s(v_absl, v_sad); +#define PROCESS16(offset) \ + v_a = vec_vsx_ld(offset, a); \ + v_b = vec_vsx_ld(offset, b); \ + v_abs = vec_absd(v_a, v_b); \ + v_sad = vec_sum4s(v_abs, v_sad); + +#define SAD8(height) \ + unsigned int vpx_sad8x##height##_vsx(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride) { \ + int y = 0; \ + uint8x16_t v_a, v_b, v_abs; \ + uint32x4_t v_sad = vec_zeros_u32; \ + \ + do { \ + PROCESS16(0) \ + \ + a += a_stride; \ + b += b_stride; \ + y++; \ + } while (y < height); \ + \ + return v_sad[1] + v_sad[0]; \ + } #define SAD16(height) \ unsigned int vpx_sad16x##height##_vsx(const uint8_t *a, int a_stride, \ const uint8_t *b, int b_stride) { \ - int y; \ - unsigned int sad[4]; \ - uint8x16_t v_a, v_b; \ - int16x8_t v_ah, v_al, v_bh, v_bl, v_absh, v_absl, v_subh, v_subl; \ - int32x4_t v_sad = vec_splat_s32(0); \ + int y = 0; \ + uint8x16_t v_a, v_b, v_abs; \ + uint32x4_t v_sad = vec_zeros_u32; \ \ - for (y = 0; y < height; y++) { \ + do { \ PROCESS16(0); \ \ a += a_stride; \ b += b_stride; \ - } \ - vec_vsx_st((uint32x4_t)v_sad, 0, sad); \ + y++; \ + } while (y < height); \ \ - return sad[3] + sad[2] + sad[1] + sad[0]; \ + return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0]; \ } #define SAD32(height) \ unsigned int vpx_sad32x##height##_vsx(const uint8_t *a, int a_stride, \ const uint8_t *b, int b_stride) { \ - int y; \ - unsigned int sad[4]; \ - uint8x16_t v_a, v_b; \ - int16x8_t v_ah, v_al, v_bh, v_bl, v_absh, v_absl, v_subh, v_subl; \ - int32x4_t v_sad = vec_splat_s32(0); \ + int y = 0; \ + uint8x16_t v_a, v_b, v_abs; \ + uint32x4_t v_sad = vec_zeros_u32; \ \ - for (y = 0; y < height; y++) { \ + do { \ PROCESS16(0); \ PROCESS16(16); \ \ a += a_stride; \ b += b_stride; \ - } \ - vec_vsx_st((uint32x4_t)v_sad, 0, sad); \ + y++; \ + } while (y < height); \ \ - return sad[3] + sad[2] + sad[1] + sad[0]; \ + return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0]; \ } #define SAD64(height) \ unsigned int vpx_sad64x##height##_vsx(const uint8_t *a, int a_stride, \ const uint8_t *b, int b_stride) { \ - int y; \ - unsigned int sad[4]; \ - uint8x16_t v_a, v_b; \ - int16x8_t v_ah, v_al, v_bh, v_bl, v_absh, v_absl, v_subh, v_subl; \ - int32x4_t v_sad = vec_splat_s32(0); \ + int y = 0; \ + uint8x16_t v_a, v_b, v_abs; \ + uint32x4_t v_sad = vec_zeros_u32; \ \ - for (y = 0; y < height; y++) { \ + do { \ PROCESS16(0); \ PROCESS16(16); \ PROCESS16(32); \ @@ -89,12 +93,15 @@ \ a += a_stride; \ b += b_stride; \ - } \ - vec_vsx_st((uint32x4_t)v_sad, 0, sad); \ + y++; \ + } while (y < height); \ \ - return sad[3] + sad[2] + sad[1] + sad[0]; \ + return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0]; \ } +SAD8(4); +SAD8(8); +SAD8(16); SAD16(8); SAD16(16); SAD16(32); @@ -108,7 +115,7 @@ SAD64(64); unsigned int vpx_sad16x##height##_avg_vsx( \ const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \ const uint8_t *second_pred) { \ - DECLARE_ALIGNED(16, uint8_t, comp_pred[16 * height]); \ + DECLARE_ALIGNED(16, uint8_t, comp_pred[16 * (height)]); \ vpx_comp_avg_pred_vsx(comp_pred, second_pred, 16, height, ref, \ ref_stride); \ \ @@ -119,7 +126,7 @@ SAD64(64); unsigned int vpx_sad32x##height##_avg_vsx( \ const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \ const uint8_t *second_pred) { \ - DECLARE_ALIGNED(32, uint8_t, comp_pred[32 * height]); \ + DECLARE_ALIGNED(32, uint8_t, comp_pred[32 * (height)]); \ vpx_comp_avg_pred_vsx(comp_pred, second_pred, 32, height, ref, \ ref_stride); \ \ @@ -130,7 +137,7 @@ SAD64(64); unsigned int vpx_sad64x##height##_avg_vsx( \ const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \ const uint8_t *second_pred) { \ - DECLARE_ALIGNED(64, uint8_t, comp_pred[64 * height]); \ + DECLARE_ALIGNED(64, uint8_t, comp_pred[64 * (height)]); \ vpx_comp_avg_pred_vsx(comp_pred, second_pred, 64, height, ref, \ ref_stride); \ return vpx_sad64x##height##_vsx(src, src_stride, comp_pred, 64); \ diff --git a/media/libvpx/libvpx/vpx_dsp/ppc/subtract_vsx.c b/media/libvpx/libvpx/vpx_dsp/ppc/subtract_vsx.c new file mode 100644 index 000000000000..76ad302da606 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/ppc/subtract_vsx.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/ppc/types_vsx.h" + +static VPX_FORCE_INLINE void subtract_block4x4( + int16_t *diff, ptrdiff_t diff_stride, const uint8_t *src, + ptrdiff_t src_stride, const uint8_t *pred, ptrdiff_t pred_stride) { + int16_t *diff1 = diff + 2 * diff_stride; + const uint8_t *src1 = src + 2 * src_stride; + const uint8_t *pred1 = pred + 2 * pred_stride; + + const int16x8_t d0 = vec_vsx_ld(0, diff); + const int16x8_t d1 = vec_vsx_ld(0, diff + diff_stride); + const int16x8_t d2 = vec_vsx_ld(0, diff1); + const int16x8_t d3 = vec_vsx_ld(0, diff1 + diff_stride); + + const uint8x16_t s0 = read4x2(src, (int)src_stride); + const uint8x16_t p0 = read4x2(pred, (int)pred_stride); + const uint8x16_t s1 = read4x2(src1, (int)src_stride); + const uint8x16_t p1 = read4x2(pred1, (int)pred_stride); + + const int16x8_t da = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0)); + const int16x8_t db = vec_sub(unpack_to_s16_h(s1), unpack_to_s16_h(p1)); + + vec_vsx_st(xxpermdi(da, d0, 1), 0, diff); + vec_vsx_st(xxpermdi(da, d1, 3), 0, diff + diff_stride); + vec_vsx_st(xxpermdi(db, d2, 1), 0, diff1); + vec_vsx_st(xxpermdi(db, d3, 3), 0, diff1 + diff_stride); +} + +void vpx_subtract_block_vsx(int rows, int cols, int16_t *diff, + ptrdiff_t diff_stride, const uint8_t *src, + ptrdiff_t src_stride, const uint8_t *pred, + ptrdiff_t pred_stride) { + int r = rows, c; + + switch (cols) { + case 64: + case 32: + do { + for (c = 0; c < cols; c += 32) { + const uint8x16_t s0 = vec_vsx_ld(0, src + c); + const uint8x16_t s1 = vec_vsx_ld(16, src + c); + const uint8x16_t p0 = vec_vsx_ld(0, pred + c); + const uint8x16_t p1 = vec_vsx_ld(16, pred + c); + const int16x8_t d0l = + vec_sub(unpack_to_s16_l(s0), unpack_to_s16_l(p0)); + const int16x8_t d0h = + vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0)); + const int16x8_t d1l = + vec_sub(unpack_to_s16_l(s1), unpack_to_s16_l(p1)); + const int16x8_t d1h = + vec_sub(unpack_to_s16_h(s1), unpack_to_s16_h(p1)); + vec_vsx_st(d0h, 0, diff + c); + vec_vsx_st(d0l, 16, diff + c); + vec_vsx_st(d1h, 0, diff + c + 16); + vec_vsx_st(d1l, 16, diff + c + 16); + } + diff += diff_stride; + pred += pred_stride; + src += src_stride; + } while (--r); + break; + case 16: + do { + const uint8x16_t s0 = vec_vsx_ld(0, src); + const uint8x16_t p0 = vec_vsx_ld(0, pred); + const int16x8_t d0l = vec_sub(unpack_to_s16_l(s0), unpack_to_s16_l(p0)); + const int16x8_t d0h = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0)); + vec_vsx_st(d0h, 0, diff); + vec_vsx_st(d0l, 16, diff); + diff += diff_stride; + pred += pred_stride; + src += src_stride; + } while (--r); + break; + case 8: + do { + const uint8x16_t s0 = vec_vsx_ld(0, src); + const uint8x16_t p0 = vec_vsx_ld(0, pred); + const int16x8_t d0h = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0)); + vec_vsx_st(d0h, 0, diff); + diff += diff_stride; + pred += pred_stride; + src += src_stride; + } while (--r); + break; + case 4: + subtract_block4x4(diff, diff_stride, src, src_stride, pred, pred_stride); + if (r > 4) { + diff += 4 * diff_stride; + pred += 4 * pred_stride; + src += 4 * src_stride; + + subtract_block4x4(diff, diff_stride, + + src, src_stride, + + pred, pred_stride); + } + break; + default: assert(0); // unreachable + } +} diff --git a/media/libvpx/libvpx/vpx_dsp/ppc/transpose_vsx.h b/media/libvpx/libvpx/vpx_dsp/ppc/transpose_vsx.h index f02556d52239..4883b734ad25 100644 --- a/media/libvpx/libvpx/vpx_dsp/ppc/transpose_vsx.h +++ b/media/libvpx/libvpx/vpx_dsp/ppc/transpose_vsx.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_PPC_TRANSPOSE_VSX_H_ -#define VPX_DSP_PPC_TRANSPOSE_VSX_H_ +#ifndef VPX_VPX_DSP_PPC_TRANSPOSE_VSX_H_ +#define VPX_VPX_DSP_PPC_TRANSPOSE_VSX_H_ #include "./vpx_config.h" #include "vpx_dsp/ppc/types_vsx.h" @@ -98,4 +98,36 @@ static INLINE void vpx_transpose_s16_8x8(int16x8_t v[8]) { // v[7]: 07 17 27 37 47 57 67 77 } -#endif // VPX_DSP_PPC_TRANSPOSE_VSX_H_ +static INLINE void transpose_8x8(const int16x8_t *a, int16x8_t *b) { + // Stage 1 + const int16x8_t s1_0 = vec_mergeh(a[0], a[4]); + const int16x8_t s1_1 = vec_mergel(a[0], a[4]); + const int16x8_t s1_2 = vec_mergeh(a[1], a[5]); + const int16x8_t s1_3 = vec_mergel(a[1], a[5]); + const int16x8_t s1_4 = vec_mergeh(a[2], a[6]); + const int16x8_t s1_5 = vec_mergel(a[2], a[6]); + const int16x8_t s1_6 = vec_mergeh(a[3], a[7]); + const int16x8_t s1_7 = vec_mergel(a[3], a[7]); + + // Stage 2 + const int16x8_t s2_0 = vec_mergeh(s1_0, s1_4); + const int16x8_t s2_1 = vec_mergel(s1_0, s1_4); + const int16x8_t s2_2 = vec_mergeh(s1_1, s1_5); + const int16x8_t s2_3 = vec_mergel(s1_1, s1_5); + const int16x8_t s2_4 = vec_mergeh(s1_2, s1_6); + const int16x8_t s2_5 = vec_mergel(s1_2, s1_6); + const int16x8_t s2_6 = vec_mergeh(s1_3, s1_7); + const int16x8_t s2_7 = vec_mergel(s1_3, s1_7); + + // Stage 2 + b[0] = vec_mergeh(s2_0, s2_4); + b[1] = vec_mergel(s2_0, s2_4); + b[2] = vec_mergeh(s2_1, s2_5); + b[3] = vec_mergel(s2_1, s2_5); + b[4] = vec_mergeh(s2_2, s2_6); + b[5] = vec_mergel(s2_2, s2_6); + b[6] = vec_mergeh(s2_3, s2_7); + b[7] = vec_mergel(s2_3, s2_7); +} + +#endif // VPX_VPX_DSP_PPC_TRANSPOSE_VSX_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/ppc/txfm_common_vsx.h b/media/libvpx/libvpx/vpx_dsp/ppc/txfm_common_vsx.h new file mode 100644 index 000000000000..2907a1fe40f4 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/ppc/txfm_common_vsx.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_DSP_PPC_TXFM_COMMON_VSX_H_ +#define VPX_VPX_DSP_PPC_TXFM_COMMON_VSX_H_ + +#include "vpx_dsp/ppc/types_vsx.h" + +static const int32x4_t vec_dct_const_rounding = { 8192, 8192, 8192, 8192 }; + +static const uint32x4_t vec_dct_const_bits = { 14, 14, 14, 14 }; + +static const uint16x8_t vec_dct_scale_log2 = { 2, 2, 2, 2, 2, 2, 2, 2 }; + +static const int16x8_t cospi1_v = { 16364, 16364, 16364, 16364, + 16364, 16364, 16364, 16364 }; +static const int16x8_t cospi2_v = { 16305, 16305, 16305, 16305, + 16305, 16305, 16305, 16305 }; +static const int16x8_t cospi3_v = { 16207, 16207, 16207, 16207, + 16207, 16207, 16207, 16207 }; +static const int16x8_t cospi4_v = { 16069, 16069, 16069, 16069, + 16069, 16069, 16069, 16069 }; +static const int16x8_t cospi4m_v = { -16069, -16069, -16069, -16069, + -16069, -16069, -16069, -16069 }; +static const int16x8_t cospi5_v = { 15893, 15893, 15893, 15893, + 15893, 15893, 15893, 15893 }; +static const int16x8_t cospi6_v = { 15679, 15679, 15679, 15679, + 15679, 15679, 15679, 15679 }; +static const int16x8_t cospi7_v = { 15426, 15426, 15426, 15426, + 15426, 15426, 15426, 15426 }; +static const int16x8_t cospi8_v = { 15137, 15137, 15137, 15137, + 15137, 15137, 15137, 15137 }; +static const int16x8_t cospi8m_v = { -15137, -15137, -15137, -15137, + -15137, -15137, -15137, -15137 }; +static const int16x8_t cospi9_v = { 14811, 14811, 14811, 14811, + 14811, 14811, 14811, 14811 }; +static const int16x8_t cospi10_v = { 14449, 14449, 14449, 14449, + 14449, 14449, 14449, 14449 }; +static const int16x8_t cospi11_v = { 14053, 14053, 14053, 14053, + 14053, 14053, 14053, 14053 }; +static const int16x8_t cospi12_v = { 13623, 13623, 13623, 13623, + 13623, 13623, 13623, 13623 }; +static const int16x8_t cospi13_v = { 13160, 13160, 13160, 13160, + 13160, 13160, 13160, 13160 }; +static const int16x8_t cospi14_v = { 12665, 12665, 12665, 12665, + 12665, 12665, 12665, 12665 }; +static const int16x8_t cospi15_v = { 12140, 12140, 12140, 12140, + 12140, 12140, 12140, 12140 }; +static const int16x8_t cospi16_v = { 11585, 11585, 11585, 11585, + 11585, 11585, 11585, 11585 }; +static const int16x8_t cospi17_v = { 11003, 11003, 11003, 11003, + 11003, 11003, 11003, 11003 }; +static const int16x8_t cospi18_v = { 10394, 10394, 10394, 10394, + 10394, 10394, 10394, 10394 }; +static const int16x8_t cospi19_v = { 9760, 9760, 9760, 9760, + 9760, 9760, 9760, 9760 }; +static const int16x8_t cospi20_v = { 9102, 9102, 9102, 9102, + 9102, 9102, 9102, 9102 }; +static const int16x8_t cospi20m_v = { -9102, -9102, -9102, -9102, + -9102, -9102, -9102, -9102 }; +static const int16x8_t cospi21_v = { 8423, 8423, 8423, 8423, + 8423, 8423, 8423, 8423 }; +static const int16x8_t cospi22_v = { 7723, 7723, 7723, 7723, + 7723, 7723, 7723, 7723 }; +static const int16x8_t cospi23_v = { 7005, 7005, 7005, 7005, + 7005, 7005, 7005, 7005 }; +static const int16x8_t cospi24_v = { 6270, 6270, 6270, 6270, + 6270, 6270, 6270, 6270 }; +static const int16x8_t cospi25_v = { 5520, 5520, 5520, 5520, + 5520, 5520, 5520, 5520 }; +static const int16x8_t cospi26_v = { 4756, 4756, 4756, 4756, + 4756, 4756, 4756, 4756 }; +static const int16x8_t cospi27_v = { 3981, 3981, 3981, 3981, + 3981, 3981, 3981, 3981 }; +static const int16x8_t cospi28_v = { 3196, 3196, 3196, 3196, + 3196, 3196, 3196, 3196 }; +static const int16x8_t cospi29_v = { 2404, 2404, 2404, 2404, + 2404, 2404, 2404, 2404 }; +static const int16x8_t cospi30_v = { 1606, 1606, 1606, 1606, + 1606, 1606, 1606, 1606 }; +static const int16x8_t cospi31_v = { 804, 804, 804, 804, 804, 804, 804, 804 }; + +#endif // VPX_VPX_DSP_PPC_TXFM_COMMON_VSX_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/ppc/types_vsx.h b/media/libvpx/libvpx/vpx_dsp/ppc/types_vsx.h index f611d02d2d58..b891169245c7 100644 --- a/media/libvpx/libvpx/vpx_dsp/ppc/types_vsx.h +++ b/media/libvpx/libvpx/vpx_dsp/ppc/types_vsx.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_PPC_TYPES_VSX_H_ -#define VPX_DSP_PPC_TYPES_VSX_H_ +#ifndef VPX_VPX_DSP_PPC_TYPES_VSX_H_ +#define VPX_VPX_DSP_PPC_TYPES_VSX_H_ #include @@ -19,8 +19,11 @@ typedef vector signed short int16x8_t; typedef vector unsigned short uint16x8_t; typedef vector signed int int32x4_t; typedef vector unsigned int uint32x4_t; +typedef vector bool char bool8x16_t; +typedef vector bool short bool16x8_t; +typedef vector bool int bool32x4_t; -#ifdef __clang__ +#if defined(__clang__) && __clang_major__ < 6 static const uint8x16_t xxpermdi0_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 }; @@ -61,8 +64,45 @@ static const uint8x16_t xxpermdi3_perm = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, #define unpack_to_s16_l(v) \ (int16x8_t) vec_mergel((uint8x16_t)v, vec_splat_u8(0)) #ifndef xxpermdi -#define xxpermdi(a, b, c) vec_xxpermdi(b, a, ((c >> 1) | (c & 1) << 1) ^ 3) +#define xxpermdi(a, b, c) vec_xxpermdi(b, a, (((c) >> 1) | ((c)&1) << 1) ^ 3) #endif #endif -#endif // VPX_DSP_PPC_TYPES_VSX_H_ +static INLINE uint8x16_t read4x2(const uint8_t *a, int stride) { + const uint32x4_t a0 = (uint32x4_t)vec_vsx_ld(0, a); + const uint32x4_t a1 = (uint32x4_t)vec_vsx_ld(0, a + stride); + + return (uint8x16_t)vec_mergeh(a0, a1); +} + +#ifndef __POWER9_VECTOR__ +#define vec_absd(a, b) vec_sub(vec_max(a, b), vec_min(a, b)) +#endif + +static const uint8x16_t vec_zeros_u8 = { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; +static const int16x8_t vec_zeros_s16 = { 0, 0, 0, 0, 0, 0, 0, 0 }; +static const int16x8_t vec_ones_s16 = { 1, 1, 1, 1, 1, 1, 1, 1 }; +static const int16x8_t vec_twos_s16 = { 2, 2, 2, 2, 2, 2, 2, 2 }; +static const uint16x8_t vec_ones_u16 = { 1, 1, 1, 1, 1, 1, 1, 1 }; +static const uint32x4_t vec_ones_u32 = { 1, 1, 1, 1 }; +static const int32x4_t vec_zeros_s32 = { 0, 0, 0, 0 }; +static const uint32x4_t vec_zeros_u32 = { 0, 0, 0, 0 }; +static const uint16x8_t vec_shift_sign_s16 = { 15, 15, 15, 15, 15, 15, 15, 15 }; +static const uint32x4_t vec_shift_sign_s32 = { 31, 31, 31, 31 }; +static const uint8x16_t vec_perm64 = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, + 0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07 }; +static const uint8x16_t vec_perm32 = { 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, + 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x00, 0x01, 0x02, 0x03 }; +static const uint8x16_t vec_perm16 = { 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0E, 0x0D, + 0x0E, 0x0F, 0x00, 0x01 }; + +static const uint8x16_t vec_perm_odd_even_pack = { 0x00, 0x01, 0x10, 0x11, + 0x04, 0x05, 0x14, 0x15, + 0x08, 0x09, 0x18, 0x19, + 0x0C, 0x0D, 0x1C, 0x1D }; + +#endif // VPX_VPX_DSP_PPC_TYPES_VSX_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/ppc/variance_vsx.c b/media/libvpx/libvpx/vpx_dsp/ppc/variance_vsx.c index 1efe2f00569f..be9614a35845 100644 --- a/media/libvpx/libvpx/vpx_dsp/ppc/variance_vsx.c +++ b/media/libvpx/libvpx/vpx_dsp/ppc/variance_vsx.c @@ -10,24 +10,20 @@ #include +#include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ppc/types_vsx.h" -static inline uint8x16_t read4x2(const uint8_t *a, int stride) { - const uint32x4_t a0 = (uint32x4_t)vec_vsx_ld(0, a); - const uint32x4_t a1 = (uint32x4_t)vec_vsx_ld(0, a + stride); - - return (uint8x16_t)vec_mergeh(a0, a1); -} - -uint32_t vpx_get4x4sse_cs_vsx(const uint8_t *a, int a_stride, const uint8_t *b, - int b_stride) { +uint32_t vpx_get4x4sse_cs_vsx(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride) { int distortion; - const int16x8_t a0 = unpack_to_s16_h(read4x2(a, a_stride)); - const int16x8_t a1 = unpack_to_s16_h(read4x2(a + a_stride * 2, a_stride)); - const int16x8_t b0 = unpack_to_s16_h(read4x2(b, b_stride)); - const int16x8_t b1 = unpack_to_s16_h(read4x2(b + b_stride * 2, b_stride)); + const int16x8_t a0 = unpack_to_s16_h(read4x2(src_ptr, src_stride)); + const int16x8_t a1 = + unpack_to_s16_h(read4x2(src_ptr + src_stride * 2, src_stride)); + const int16x8_t b0 = unpack_to_s16_h(read4x2(ref_ptr, ref_stride)); + const int16x8_t b1 = + unpack_to_s16_h(read4x2(ref_ptr + ref_stride * 2, ref_stride)); const int16x8_t d0 = vec_sub(a0, b0); const int16x8_t d1 = vec_sub(a1, b1); const int32x4_t ds = vec_msum(d1, d1, vec_msum(d0, d0, vec_splat_s32(0))); @@ -39,12 +35,12 @@ uint32_t vpx_get4x4sse_cs_vsx(const uint8_t *a, int a_stride, const uint8_t *b, } // TODO(lu_zero): Unroll -uint32_t vpx_get_mb_ss_vsx(const int16_t *a) { +uint32_t vpx_get_mb_ss_vsx(const int16_t *src_ptr) { unsigned int i, sum = 0; int32x4_t s = vec_splat_s32(0); for (i = 0; i < 256; i += 8) { - const int16x8_t v = vec_vsx_ld(0, a + i); + const int16x8_t v = vec_vsx_ld(0, src_ptr + i); s = vec_msum(v, v, s); } @@ -101,3 +97,175 @@ void vpx_comp_avg_pred_vsx(uint8_t *comp_pred, const uint8_t *pred, int width, } } } + +static INLINE void variance_inner_32(const uint8_t *src_ptr, + const uint8_t *ref_ptr, + int32x4_t *sum_squared, int32x4_t *sum) { + int32x4_t s = *sum; + int32x4_t ss = *sum_squared; + + const uint8x16_t va0 = vec_vsx_ld(0, src_ptr); + const uint8x16_t vb0 = vec_vsx_ld(0, ref_ptr); + const uint8x16_t va1 = vec_vsx_ld(16, src_ptr); + const uint8x16_t vb1 = vec_vsx_ld(16, ref_ptr); + + const int16x8_t a0 = unpack_to_s16_h(va0); + const int16x8_t b0 = unpack_to_s16_h(vb0); + const int16x8_t a1 = unpack_to_s16_l(va0); + const int16x8_t b1 = unpack_to_s16_l(vb0); + const int16x8_t a2 = unpack_to_s16_h(va1); + const int16x8_t b2 = unpack_to_s16_h(vb1); + const int16x8_t a3 = unpack_to_s16_l(va1); + const int16x8_t b3 = unpack_to_s16_l(vb1); + const int16x8_t d0 = vec_sub(a0, b0); + const int16x8_t d1 = vec_sub(a1, b1); + const int16x8_t d2 = vec_sub(a2, b2); + const int16x8_t d3 = vec_sub(a3, b3); + + s = vec_sum4s(d0, s); + ss = vec_msum(d0, d0, ss); + s = vec_sum4s(d1, s); + ss = vec_msum(d1, d1, ss); + s = vec_sum4s(d2, s); + ss = vec_msum(d2, d2, ss); + s = vec_sum4s(d3, s); + ss = vec_msum(d3, d3, ss); + *sum = s; + *sum_squared = ss; +} + +static INLINE void variance(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, int w, + int h, uint32_t *sse, int *sum) { + int i; + + int32x4_t s = vec_splat_s32(0); + int32x4_t ss = vec_splat_s32(0); + + switch (w) { + case 4: + for (i = 0; i < h / 2; ++i) { + const int16x8_t a0 = unpack_to_s16_h(read4x2(src_ptr, src_stride)); + const int16x8_t b0 = unpack_to_s16_h(read4x2(ref_ptr, ref_stride)); + const int16x8_t d = vec_sub(a0, b0); + s = vec_sum4s(d, s); + ss = vec_msum(d, d, ss); + src_ptr += src_stride * 2; + ref_ptr += ref_stride * 2; + } + break; + case 8: + for (i = 0; i < h; ++i) { + const int16x8_t a0 = unpack_to_s16_h(vec_vsx_ld(0, src_ptr)); + const int16x8_t b0 = unpack_to_s16_h(vec_vsx_ld(0, ref_ptr)); + const int16x8_t d = vec_sub(a0, b0); + + s = vec_sum4s(d, s); + ss = vec_msum(d, d, ss); + src_ptr += src_stride; + ref_ptr += ref_stride; + } + break; + case 16: + for (i = 0; i < h; ++i) { + const uint8x16_t va = vec_vsx_ld(0, src_ptr); + const uint8x16_t vb = vec_vsx_ld(0, ref_ptr); + const int16x8_t a0 = unpack_to_s16_h(va); + const int16x8_t b0 = unpack_to_s16_h(vb); + const int16x8_t a1 = unpack_to_s16_l(va); + const int16x8_t b1 = unpack_to_s16_l(vb); + const int16x8_t d0 = vec_sub(a0, b0); + const int16x8_t d1 = vec_sub(a1, b1); + + s = vec_sum4s(d0, s); + ss = vec_msum(d0, d0, ss); + s = vec_sum4s(d1, s); + ss = vec_msum(d1, d1, ss); + + src_ptr += src_stride; + ref_ptr += ref_stride; + } + break; + case 32: + for (i = 0; i < h; ++i) { + variance_inner_32(src_ptr, ref_ptr, &ss, &s); + src_ptr += src_stride; + ref_ptr += ref_stride; + } + break; + case 64: + for (i = 0; i < h; ++i) { + variance_inner_32(src_ptr, ref_ptr, &ss, &s); + variance_inner_32(src_ptr + 32, ref_ptr + 32, &ss, &s); + + src_ptr += src_stride; + ref_ptr += ref_stride; + } + break; + } + + s = vec_splat(vec_sums(s, vec_splat_s32(0)), 3); + + vec_ste(s, 0, sum); + + ss = vec_splat(vec_sums(ss, vec_splat_s32(0)), 3); + + vec_ste((uint32x4_t)ss, 0, sse); +} + +/* Identical to the variance call except it takes an additional parameter, sum, + * and returns that value using pass-by-reference instead of returning + * sse - sum^2 / w*h + */ +#define GET_VAR(W, H) \ + void vpx_get##W##x##H##var_vsx(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse, int *sum) { \ + variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \ + } + +/* Identical to the variance call except it does not calculate the + * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in + * variable. + */ +#define MSE(W, H) \ + uint32_t vpx_mse##W##x##H##_vsx(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + int sum; \ + variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ + return *sse; \ + } + +#define VAR(W, H) \ + uint32_t vpx_variance##W##x##H##_vsx(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + int sum; \ + variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ + return *sse - (uint32_t)(((int64_t)sum * sum) / ((W) * (H))); \ + } + +#define VARIANCES(W, H) VAR(W, H) + +VARIANCES(64, 64) +VARIANCES(64, 32) +VARIANCES(32, 64) +VARIANCES(32, 32) +VARIANCES(32, 16) +VARIANCES(16, 32) +VARIANCES(16, 16) +VARIANCES(16, 8) +VARIANCES(8, 16) +VARIANCES(8, 8) +VARIANCES(8, 4) +VARIANCES(4, 8) +VARIANCES(4, 4) + +GET_VAR(16, 16) +GET_VAR(8, 8) + +MSE(16, 16) +MSE(16, 8) +MSE(8, 16) +MSE(8, 8) diff --git a/media/libvpx/libvpx/vpx_dsp/ppc/vpx_convolve_vsx.c b/media/libvpx/libvpx/vpx_dsp/ppc/vpx_convolve_vsx.c index 5c3ba4576fa6..2dc66055cc94 100644 --- a/media/libvpx/libvpx/vpx_dsp/ppc/vpx_convolve_vsx.c +++ b/media/libvpx/libvpx/vpx_dsp/ppc/vpx_convolve_vsx.c @@ -9,13 +9,16 @@ */ #include #include + #include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/vpx_filter.h" +#include "vpx/vpx_integer.h" #include "vpx_dsp/ppc/types_vsx.h" +#include "vpx_dsp/vpx_filter.h" // TODO(lu_zero): unroll -static inline void copy_w16(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { +static VPX_FORCE_INLINE void copy_w16(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + int32_t h) { int i; for (i = h; i--;) { @@ -25,8 +28,9 @@ static inline void copy_w16(const uint8_t *src, ptrdiff_t src_stride, } } -static inline void copy_w32(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { +static VPX_FORCE_INLINE void copy_w32(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + int32_t h) { int i; for (i = h; i--;) { @@ -37,8 +41,9 @@ static inline void copy_w32(const uint8_t *src, ptrdiff_t src_stride, } } -static inline void copy_w64(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { +static VPX_FORCE_INLINE void copy_w64(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + int32_t h) { int i; for (i = h; i--;) { @@ -86,8 +91,9 @@ void vpx_convolve_copy_vsx(const uint8_t *src, ptrdiff_t src_stride, } } -static inline void avg_w16(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { +static VPX_FORCE_INLINE void avg_w16(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + int32_t h) { int i; for (i = h; i--;) { @@ -98,8 +104,9 @@ static inline void avg_w16(const uint8_t *src, ptrdiff_t src_stride, } } -static inline void avg_w32(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { +static VPX_FORCE_INLINE void avg_w32(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + int32_t h) { int i; for (i = h; i--;) { @@ -112,8 +119,9 @@ static inline void avg_w32(const uint8_t *src, ptrdiff_t src_stride, } } -static inline void avg_w64(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { +static VPX_FORCE_INLINE void avg_w64(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + int32_t h) { int i; for (i = h; i--;) { @@ -155,8 +163,8 @@ void vpx_convolve_avg_vsx(const uint8_t *src, ptrdiff_t src_stride, } } -static inline void convolve_line(uint8_t *dst, const int16x8_t s, - const int16x8_t f) { +static VPX_FORCE_INLINE void convolve_line(uint8_t *dst, const int16x8_t s, + const int16x8_t f) { const int32x4_t sum = vec_msum(s, f, vec_splat_s32(0)); const int32x4_t bias = vec_sl(vec_splat_s32(1), vec_splat_u32(FILTER_BITS - 1)); @@ -166,8 +174,9 @@ static inline void convolve_line(uint8_t *dst, const int16x8_t s, vec_ste(v, 0, dst); } -static inline void convolve_line_h(uint8_t *dst, const uint8_t *const src_x, - const int16_t *const x_filter) { +static VPX_FORCE_INLINE void convolve_line_h(uint8_t *dst, + const uint8_t *const src_x, + const int16_t *const x_filter) { const int16x8_t s = unpack_to_s16_h(vec_vsx_ld(0, src_x)); const int16x8_t f = vec_vsx_ld(0, x_filter); @@ -175,10 +184,12 @@ static inline void convolve_line_h(uint8_t *dst, const uint8_t *const src_x, } // TODO(lu_zero): Implement 8x8 and bigger block special cases -static inline void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *x_filters, int x0_q4, - int x_step_q4, int w, int h) { +static VPX_FORCE_INLINE void convolve_horiz(const uint8_t *src, + ptrdiff_t src_stride, uint8_t *dst, + ptrdiff_t dst_stride, + const InterpKernel *x_filters, + int x0_q4, int x_step_q4, int w, + int h) { int x, y; src -= SUBPEL_TAPS / 2 - 1; @@ -194,10 +205,10 @@ static inline void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, } } -static inline void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *x_filters, int x0_q4, - int x_step_q4, int w, int h) { +static VPX_FORCE_INLINE void convolve_avg_horiz( + const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, + ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4, + int x_step_q4, int w, int h) { int x, y; src -= SUBPEL_TAPS / 2 - 1; @@ -230,9 +241,10 @@ static uint8x16_t transpose_line_u8_8x8(uint8x16_t a, uint8x16_t b, return (uint8x16_t)vec_mergeh(abcd, efgh); } -static inline void convolve_line_v(uint8_t *dst, const uint8_t *const src_y, - ptrdiff_t src_stride, - const int16_t *const y_filter) { +static VPX_FORCE_INLINE void convolve_line_v(uint8_t *dst, + const uint8_t *const src_y, + ptrdiff_t src_stride, + const int16_t *const y_filter) { uint8x16_t s0 = vec_vsx_ld(0, src_y + 0 * src_stride); uint8x16_t s1 = vec_vsx_ld(0, src_y + 1 * src_stride); uint8x16_t s2 = vec_vsx_ld(0, src_y + 2 * src_stride); @@ -250,10 +262,12 @@ static inline void convolve_line_v(uint8_t *dst, const uint8_t *const src_y, convolve_line(dst, unpack_to_s16_h(s), f); } -static inline void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *y_filters, int y0_q4, - int y_step_q4, int w, int h) { +static VPX_FORCE_INLINE void convolve_vert(const uint8_t *src, + ptrdiff_t src_stride, uint8_t *dst, + ptrdiff_t dst_stride, + const InterpKernel *y_filters, + int y0_q4, int y_step_q4, int w, + int h) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); @@ -270,10 +284,10 @@ static inline void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, } } -static inline void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *y_filters, int y0_q4, - int y_step_q4, int w, int h) { +static VPX_FORCE_INLINE void convolve_avg_vert( + const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, + ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4, + int y_step_q4, int w, int h) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); @@ -291,11 +305,11 @@ static inline void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, } } -static inline void convolve(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *const filter, int x0_q4, - int x_step_q4, int y0_q4, int y_step_q4, int w, - int h) { +static VPX_FORCE_INLINE void convolve(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *const filter, + int x0_q4, int x_step_q4, int y0_q4, + int y_step_q4, int w, int h) { // Note: Fixed size intermediate buffer, temp, places limits on parameters. // 2d filtering proceeds in 2 steps: // (1) Interpolate horizontally into an intermediate buffer, temp. diff --git a/media/libvpx/libvpx/vpx_dsp/prob.h b/media/libvpx/libvpx/vpx_dsp/prob.h index f1cc0eaa105b..7a71c0041f2d 100644 --- a/media/libvpx/libvpx/vpx_dsp/prob.h +++ b/media/libvpx/libvpx/vpx_dsp/prob.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_PROB_H_ -#define VPX_DSP_PROB_H_ +#ifndef VPX_VPX_DSP_PROB_H_ +#define VPX_VPX_DSP_PROB_H_ #include @@ -32,7 +32,7 @@ typedef int8_t vpx_tree_index; #define TREE_SIZE(leaf_count) (2 * (leaf_count)-2) -#define vpx_complement(x) (255 - x) +#define vpx_complement(x) (255 - (x)) #define MODE_MV_COUNT_SAT 20 @@ -103,4 +103,4 @@ DECLARE_ALIGNED(16, extern const uint8_t, vpx_norm[256]); } // extern "C" #endif -#endif // VPX_DSP_PROB_H_ +#endif // VPX_VPX_DSP_PROB_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/psnr.c b/media/libvpx/libvpx/vpx_dsp/psnr.c index 47afd4388abf..48bac04508f5 100644 --- a/media/libvpx/libvpx/vpx_dsp/psnr.c +++ b/media/libvpx/libvpx/vpx_dsp/psnr.c @@ -1,12 +1,12 @@ /* -* Copyright (c) 2016 The WebM project authors. All Rights Reserved. -* -* Use of this source code is governed by a BSD-style license -* that can be found in the LICENSE file in the root of the source -* tree. An additional intellectual property rights grant can be found -* in the file PATENTS. All contributing project authors may -* be found in the AUTHORS file in the root of the source tree. -*/ + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ #include #include @@ -24,8 +24,8 @@ double vpx_sse_to_psnr(double samples, double peak, double sse) { } /* TODO(yaowu): The block_variance calls the unoptimized versions of variance() -* and highbd_8_variance(). It should not. -*/ + * and highbd_8_variance(). It should not. + */ static void encoder_variance(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int w, int h, unsigned int *sse, int *sum) { diff --git a/media/libvpx/libvpx/vpx_dsp/psnr.h b/media/libvpx/libvpx/vpx_dsp/psnr.h index f321131d0b9f..9ebb64dd526b 100644 --- a/media/libvpx/libvpx/vpx_dsp/psnr.h +++ b/media/libvpx/libvpx/vpx_dsp/psnr.h @@ -1,17 +1,18 @@ /* -* Copyright (c) 2016 The WebM project authors. All Rights Reserved. -* -* Use of this source code is governed by a BSD-style license -* that can be found in the LICENSE file in the root of the source -* tree. An additional intellectual property rights grant can be found -* in the file PATENTS. All contributing project authors may -* be found in the AUTHORS file in the root of the source tree. -*/ + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ -#ifndef VPX_DSP_PSNR_H_ -#define VPX_DSP_PSNR_H_ +#ifndef VPX_VPX_DSP_PSNR_H_ +#define VPX_VPX_DSP_PSNR_H_ #include "vpx_scale/yv12config.h" +#include "vpx/vpx_encoder.h" #define MAX_PSNR 100.0 @@ -19,22 +20,18 @@ extern "C" { #endif -typedef struct { - double psnr[4]; // total/y/u/v - uint64_t sse[4]; // total/y/u/v - uint32_t samples[4]; // total/y/u/v -} PSNR_STATS; +typedef struct vpx_psnr_pkt PSNR_STATS; // TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t /*!\brief Converts SSE to PSNR -* -* Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR). -* -* \param[in] samples Number of samples -* \param[in] peak Max sample value -* \param[in] sse Sum of squared errors -*/ + * + * Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR). + * + * \param[in] samples Number of samples + * \param[in] peak Max sample value + * \param[in] sse Sum of squared errors + */ double vpx_sse_to_psnr(double samples, double peak, double sse); int64_t vpx_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); #if CONFIG_VP9_HIGHBITDEPTH @@ -54,4 +51,4 @@ double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source, #ifdef __cplusplus } // extern "C" #endif -#endif // VPX_DSP_PSNR_H_ +#endif // VPX_VPX_DSP_PSNR_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/psnrhvs.c b/media/libvpx/libvpx/vpx_dsp/psnrhvs.c index b3910152c472..d7ec1a429ac5 100644 --- a/media/libvpx/libvpx/vpx_dsp/psnrhvs.c +++ b/media/libvpx/libvpx/vpx_dsp/psnrhvs.c @@ -126,8 +126,10 @@ static double calc_psnrhvs(const unsigned char *src, int _systride, const uint8_t *_dst8 = dst; const uint16_t *_src16 = CONVERT_TO_SHORTPTR(src); const uint16_t *_dst16 = CONVERT_TO_SHORTPTR(dst); - int16_t dct_s[8 * 8], dct_d[8 * 8]; - tran_low_t dct_s_coef[8 * 8], dct_d_coef[8 * 8]; + DECLARE_ALIGNED(16, int16_t, dct_s[8 * 8]); + DECLARE_ALIGNED(16, int16_t, dct_d[8 * 8]); + DECLARE_ALIGNED(16, tran_low_t, dct_s_coef[8 * 8]); + DECLARE_ALIGNED(16, tran_low_t, dct_d_coef[8 * 8]); double mask[8][8]; int pixels; int x; @@ -142,7 +144,7 @@ static double calc_psnrhvs(const unsigned char *src, int _systride, been normalized and then squared." Their CSF matrix (from PSNR-HVS) was also constructed from the JPEG matrices. I can not find any obvious scheme of normalizing to produce their table, but if I multiply their - CSF by 0.38857 and square the result I get their masking table. + CSF by 0.3885746225901003 and square the result I get their masking table. I have no idea where this constant comes from, but deviating from it too greatly hurts MOS agreement. @@ -150,11 +152,15 @@ static double calc_psnrhvs(const unsigned char *src, int _systride, Jaakko Astola, Vladimir Lukin, "On between-coefficient contrast masking of DCT basis functions", CD-ROM Proceedings of the Third International Workshop on Video Processing and Quality Metrics for Consumer - Electronics VPQM-07, Scottsdale, Arizona, USA, 25-26 January, 2007, 4 p.*/ + Electronics VPQM-07, Scottsdale, Arizona, USA, 25-26 January, 2007, 4 p. + + Suggested in aomedia issue #2363: + 0.3885746225901003 is a reciprocal of the maximum coefficient (2.573509) + of the old JPEG based matrix from the paper. Since you are not using that, + divide by actual maximum coefficient. */ for (x = 0; x < 8; x++) for (y = 0; y < 8; y++) - mask[x][y] = - (_csf[x][y] * 0.3885746225901003) * (_csf[x][y] * 0.3885746225901003); + mask[x][y] = (_csf[x][y] / _csf[1][0]) * (_csf[x][y] / _csf[1][0]); for (y = 0; y < _h - 7; y += _step) { for (x = 0; x < _w - 7; x += _step) { int i; diff --git a/media/libvpx/libvpx/vpx_dsp/quantize.c b/media/libvpx/libvpx/vpx_dsp/quantize.c index e37ca92ad458..61818f692e61 100644 --- a/media/libvpx/libvpx/vpx_dsp/quantize.c +++ b/media/libvpx/libvpx/vpx_dsp/quantize.c @@ -12,12 +12,13 @@ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/quantize.h" +#include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr) { + const int16_t dequant, uint16_t *eob_ptr) { const int rc = 0; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); @@ -31,7 +32,7 @@ void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); tmp = (tmp * quant) >> 16; qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr; + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant; if (tmp) eob = 0; } *eob_ptr = eob + 1; @@ -41,7 +42,7 @@ void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, void vpx_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, + tran_low_t *dqcoeff_ptr, const int16_t dequant, uint16_t *eob_ptr) { int eob = -1; @@ -55,7 +56,7 @@ void vpx_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, const int64_t tmp = abs_coeff + round_ptr[0]; const int abs_qcoeff = (int)((tmp * quant) >> 16); qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr; + dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant; if (abs_qcoeff) eob = 0; } *eob_ptr = eob + 1; @@ -65,7 +66,7 @@ void vpx_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, void vpx_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr) { + const int16_t dequant, uint16_t *eob_ptr) { const int n_coeffs = 1024; const int rc = 0; const int coeff = coeff_ptr[rc]; @@ -81,7 +82,7 @@ void vpx_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, INT16_MIN, INT16_MAX); tmp = (tmp * quant) >> 15; qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2; + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / 2; if (tmp) eob = 0; } *eob_ptr = eob + 1; @@ -92,8 +93,7 @@ void vpx_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, - uint16_t *eob_ptr) { + const int16_t dequant, uint16_t *eob_ptr) { const int n_coeffs = 1024; int eob = -1; @@ -107,7 +107,7 @@ void vpx_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1); const int abs_qcoeff = (int)((tmp * quant) >> 15); qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 2; + dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant / 2; if (abs_qcoeff) eob = 0; } *eob_ptr = eob + 1; @@ -156,7 +156,7 @@ void vpx_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, quant_shift_ptr[rc != 0]) >> 16; // quantization qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; + dqcoeff_ptr[rc] = (tran_low_t)(qcoeff_ptr[rc] * dequant_ptr[rc != 0]); if (tmp) eob = i; } @@ -260,7 +260,15 @@ void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, 15; qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; +#if (VPX_ARCH_X86 || VPX_ARCH_X86_64) && !CONFIG_VP9_HIGHBITDEPTH + // When tran_low_t is only 16 bits dqcoeff can outrange it. Rather than + // truncating with a cast, saturate the value. This is easier to implement + // on x86 and preserves the sign of the value. + dqcoeff_ptr[rc] = + clamp(qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2, INT16_MIN, INT16_MAX); +#else dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; +#endif // VPX_ARCH_X86 && CONFIG_VP9_HIGHBITDEPTH if (tmp) eob = idx_arr[i]; } diff --git a/media/libvpx/libvpx/vpx_dsp/quantize.h b/media/libvpx/libvpx/vpx_dsp/quantize.h index e13284546333..7cac140e9de2 100644 --- a/media/libvpx/libvpx/vpx_dsp/quantize.h +++ b/media/libvpx/libvpx/vpx_dsp/quantize.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_QUANTIZE_H_ -#define VPX_DSP_QUANTIZE_H_ +#ifndef VPX_VPX_DSP_QUANTIZE_H_ +#define VPX_VPX_DSP_QUANTIZE_H_ #include "./vpx_config.h" #include "vpx_dsp/vpx_dsp_common.h" @@ -19,30 +19,29 @@ extern "C" { #endif void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, - const int16_t *round_ptr, const int16_t quant_ptr, + const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr); + const int16_t dequant, uint16_t *eob_ptr); void vpx_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, - const int16_t *round_ptr, const int16_t quant_ptr, + const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr); + const int16_t dequant, uint16_t *eob_ptr); #if CONFIG_VP9_HIGHBITDEPTH void vpx_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, - const int16_t quant_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, + const int16_t quant, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t dequant, uint16_t *eob_ptr); void vpx_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, - const int16_t *round_ptr, - const int16_t quant_ptr, + const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t dequant_ptr, uint16_t *eob_ptr); + const int16_t dequant, uint16_t *eob_ptr); #endif #ifdef __cplusplus } // extern "C" #endif -#endif // VPX_DSP_QUANTIZE_H_ +#endif // VPX_VPX_DSP_QUANTIZE_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/sad.c b/media/libvpx/libvpx/vpx_dsp/sad.c index 18b6dc6e09f7..769322019e93 100644 --- a/media/libvpx/libvpx/vpx_dsp/sad.c +++ b/media/libvpx/libvpx/vpx_dsp/sad.c @@ -17,54 +17,55 @@ #include "vpx_ports/mem.h" /* Sum the difference between every corresponding element of the buffers. */ -static INLINE unsigned int sad(const uint8_t *a, int a_stride, const uint8_t *b, - int b_stride, int width, int height) { +static INLINE unsigned int sad(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + int width, int height) { int y, x; unsigned int sad = 0; for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) sad += abs(a[x] - b[x]); + for (x = 0; x < width; x++) sad += abs(src_ptr[x] - ref_ptr[x]); - a += a_stride; - b += b_stride; + src_ptr += src_stride; + ref_ptr += ref_stride; } return sad; } -#define sadMxN(m, n) \ - unsigned int vpx_sad##m##x##n##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride) { \ - return sad(src, src_stride, ref, ref_stride, m, n); \ - } \ - unsigned int vpx_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - const uint8_t *second_pred) { \ - DECLARE_ALIGNED(16, uint8_t, comp_pred[m * n]); \ - vpx_comp_avg_pred_c(comp_pred, second_pred, m, n, ref, ref_stride); \ - return sad(src, src_stride, comp_pred, m, m, n); \ +#define sadMxN(m, n) \ + unsigned int vpx_sad##m##x##n##_c(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride) { \ + return sad(src_ptr, src_stride, ref_ptr, ref_stride, m, n); \ + } \ + unsigned int vpx_sad##m##x##n##_avg_c( \ + const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ + int ref_stride, const uint8_t *second_pred) { \ + DECLARE_ALIGNED(16, uint8_t, comp_pred[m * n]); \ + vpx_comp_avg_pred_c(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \ + return sad(src_ptr, src_stride, comp_pred, m, m, n); \ } // depending on call sites, pass **ref_array to avoid & in subsequent call and // de-dup with 4D below. -#define sadMxNxK(m, n, k) \ - void vpx_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref_array, int ref_stride, \ - uint32_t *sad_array) { \ - int i; \ - for (i = 0; i < k; ++i) \ - sad_array[i] = \ - vpx_sad##m##x##n##_c(src, src_stride, &ref_array[i], ref_stride); \ +#define sadMxNxK(m, n, k) \ + void vpx_sad##m##x##n##x##k##_c(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sad_array) { \ + int i; \ + for (i = 0; i < k; ++i) \ + sad_array[i] = \ + vpx_sad##m##x##n##_c(src_ptr, src_stride, &ref_ptr[i], ref_stride); \ } // This appears to be equivalent to the above when k == 4 and refs is const -#define sadMxNx4D(m, n) \ - void vpx_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ - const uint8_t *const ref_array[], \ - int ref_stride, uint32_t *sad_array) { \ - int i; \ - for (i = 0; i < 4; ++i) \ - sad_array[i] = \ - vpx_sad##m##x##n##_c(src, src_stride, ref_array[i], ref_stride); \ +#define sadMxNx4D(m, n) \ + void vpx_sad##m##x##n##x4d_c(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *const ref_array[], \ + int ref_stride, uint32_t *sad_array) { \ + int i; \ + for (i = 0; i < 4; ++i) \ + sad_array[i] = \ + vpx_sad##m##x##n##_c(src_ptr, src_stride, ref_array[i], ref_stride); \ } /* clang-format off */ @@ -82,6 +83,7 @@ sadMxNx4D(32, 64) // 32x32 sadMxN(32, 32) +sadMxNxK(32, 32, 8) sadMxNx4D(32, 32) // 32x16 @@ -133,59 +135,61 @@ sadMxNx4D(4, 4) #if CONFIG_VP9_HIGHBITDEPTH static INLINE - unsigned int highbd_sad(const uint8_t *a8, int a_stride, const uint8_t *b8, - int b_stride, int width, int height) { + unsigned int highbd_sad(const uint8_t *src8_ptr, int src_stride, + const uint8_t *ref8_ptr, int ref_stride, int width, + int height) { int y, x; unsigned int sad = 0; - const uint16_t *a = CONVERT_TO_SHORTPTR(a8); - const uint16_t *b = CONVERT_TO_SHORTPTR(b8); + const uint16_t *src = CONVERT_TO_SHORTPTR(src8_ptr); + const uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(ref8_ptr); for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) sad += abs(a[x] - b[x]); + for (x = 0; x < width; x++) sad += abs(src[x] - ref_ptr[x]); - a += a_stride; - b += b_stride; + src += src_stride; + ref_ptr += ref_stride; } return sad; } -static INLINE unsigned int highbd_sadb(const uint8_t *a8, int a_stride, - const uint16_t *b, int b_stride, +static INLINE unsigned int highbd_sadb(const uint8_t *src8_ptr, int src_stride, + const uint16_t *ref_ptr, int ref_stride, int width, int height) { int y, x; unsigned int sad = 0; - const uint16_t *a = CONVERT_TO_SHORTPTR(a8); + const uint16_t *src = CONVERT_TO_SHORTPTR(src8_ptr); for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) sad += abs(a[x] - b[x]); + for (x = 0; x < width; x++) sad += abs(src[x] - ref_ptr[x]); - a += a_stride; - b += b_stride; + src += src_stride; + ref_ptr += ref_stride; } return sad; } #define highbd_sadMxN(m, n) \ - unsigned int vpx_highbd_sad##m##x##n##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, \ - int ref_stride) { \ - return highbd_sad(src, src_stride, ref, ref_stride, m, n); \ + unsigned int vpx_highbd_sad##m##x##n##_c( \ + const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ + int ref_stride) { \ + return highbd_sad(src_ptr, src_stride, ref_ptr, ref_stride, m, n); \ } \ unsigned int vpx_highbd_sad##m##x##n##_avg_c( \ - const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \ - const uint8_t *second_pred) { \ + const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ + int ref_stride, const uint8_t *second_pred) { \ DECLARE_ALIGNED(16, uint16_t, comp_pred[m * n]); \ - vpx_highbd_comp_avg_pred_c(comp_pred, second_pred, m, n, ref, ref_stride); \ - return highbd_sadb(src, src_stride, comp_pred, m, m, n); \ + vpx_highbd_comp_avg_pred_c(comp_pred, CONVERT_TO_SHORTPTR(second_pred), m, \ + n, CONVERT_TO_SHORTPTR(ref_ptr), ref_stride); \ + return highbd_sadb(src_ptr, src_stride, comp_pred, m, m, n); \ } -#define highbd_sadMxNx4D(m, n) \ - void vpx_highbd_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ - const uint8_t *const ref_array[], \ - int ref_stride, uint32_t *sad_array) { \ - int i; \ - for (i = 0; i < 4; ++i) { \ - sad_array[i] = vpx_highbd_sad##m##x##n##_c(src, src_stride, \ - ref_array[i], ref_stride); \ - } \ +#define highbd_sadMxNx4D(m, n) \ + void vpx_highbd_sad##m##x##n##x4d_c(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *const ref_array[], \ + int ref_stride, uint32_t *sad_array) { \ + int i; \ + for (i = 0; i < 4; ++i) { \ + sad_array[i] = vpx_highbd_sad##m##x##n##_c(src_ptr, src_stride, \ + ref_array[i], ref_stride); \ + } \ } /* clang-format off */ diff --git a/media/libvpx/libvpx/vpx_dsp/skin_detection.h b/media/libvpx/libvpx/vpx_dsp/skin_detection.h index a2e99baf7e7c..91640c33d5a7 100644 --- a/media/libvpx/libvpx/vpx_dsp/skin_detection.h +++ b/media/libvpx/libvpx/vpx_dsp/skin_detection.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_SKIN_DETECTION_H_ -#define VPX_DSP_SKIN_DETECTION_H_ +#ifndef VPX_VPX_DSP_SKIN_DETECTION_H_ +#define VPX_VPX_DSP_SKIN_DETECTION_H_ #ifdef __cplusplus extern "C" { @@ -21,4 +21,4 @@ int vpx_skin_pixel(const int y, const int cb, const int cr, int motion); } // extern "C" #endif -#endif // VPX_DSP_SKIN_DETECTION_H_ +#endif // VPX_VPX_DSP_SKIN_DETECTION_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/ssim.c b/media/libvpx/libvpx/vpx_dsp/ssim.c index 7a29bd29f9f1..7c3c31bad869 100644 --- a/media/libvpx/libvpx/vpx_dsp/ssim.c +++ b/media/libvpx/libvpx/vpx_dsp/ssim.c @@ -73,7 +73,7 @@ static const int64_t cc2_12 = 61817334; // (64^2*(.03*4095)^2 static double similarity(uint32_t sum_s, uint32_t sum_r, uint32_t sum_sq_s, uint32_t sum_sq_r, uint32_t sum_sxr, int count, uint32_t bd) { - int64_t ssim_n, ssim_d; + double ssim_n, ssim_d; int64_t c1, c2; if (bd == 8) { // scale the constants by number of pixels @@ -90,14 +90,14 @@ static double similarity(uint32_t sum_s, uint32_t sum_r, uint32_t sum_sq_s, assert(0); } - ssim_n = (2 * sum_s * sum_r + c1) * - ((int64_t)2 * count * sum_sxr - (int64_t)2 * sum_s * sum_r + c2); + ssim_n = (2.0 * sum_s * sum_r + c1) * + (2.0 * count * sum_sxr - 2.0 * sum_s * sum_r + c2); - ssim_d = (sum_s * sum_s + sum_r * sum_r + c1) * - ((int64_t)count * sum_sq_s - (int64_t)sum_s * sum_s + - (int64_t)count * sum_sq_r - (int64_t)sum_r * sum_r + c2); + ssim_d = ((double)sum_s * sum_s + (double)sum_r * sum_r + c1) * + ((double)count * sum_sq_s - (double)sum_s * sum_s + + (double)count * sum_sq_r - (double)sum_r * sum_r + c2); - return ssim_n * 1.0 / ssim_d; + return ssim_n / ssim_d; } static double ssim_8x8(const uint8_t *s, int sp, const uint8_t *r, int rp) { @@ -284,7 +284,7 @@ double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2, for (i = 0; i < height; i += 4, img1 += img1_pitch * 4, img2 += img2_pitch * 4) { for (j = 0; j < width; j += 4, ++c) { - Ssimv sv = { 0 }; + Ssimv sv = { 0, 0, 0, 0, 0, 0 }; double ssim; double ssim2; double dssim; diff --git a/media/libvpx/libvpx/vpx_dsp/ssim.h b/media/libvpx/libvpx/vpx_dsp/ssim.h index 4f2bb1d556c8..c382237fc6dc 100644 --- a/media/libvpx/libvpx/vpx_dsp/ssim.h +++ b/media/libvpx/libvpx/vpx_dsp/ssim.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_SSIM_H_ -#define VPX_DSP_SSIM_H_ +#ifndef VPX_VPX_DSP_SSIM_H_ +#define VPX_VPX_DSP_SSIM_H_ #define MAX_SSIM_DB 100.0; @@ -84,4 +84,4 @@ double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source, } // extern "C" #endif -#endif // VPX_DSP_SSIM_H_ +#endif // VPX_VPX_DSP_SSIM_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/subtract.c b/media/libvpx/libvpx/vpx_dsp/subtract.c index 95e7071b27e9..45c819e67a70 100644 --- a/media/libvpx/libvpx/vpx_dsp/subtract.c +++ b/media/libvpx/libvpx/vpx_dsp/subtract.c @@ -16,37 +16,37 @@ #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" -void vpx_subtract_block_c(int rows, int cols, int16_t *diff, - ptrdiff_t diff_stride, const uint8_t *src, - ptrdiff_t src_stride, const uint8_t *pred, +void vpx_subtract_block_c(int rows, int cols, int16_t *diff_ptr, + ptrdiff_t diff_stride, const uint8_t *src_ptr, + ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) { int r, c; for (r = 0; r < rows; r++) { - for (c = 0; c < cols; c++) diff[c] = src[c] - pred[c]; + for (c = 0; c < cols; c++) diff_ptr[c] = src_ptr[c] - pred_ptr[c]; - diff += diff_stride; - pred += pred_stride; - src += src_stride; + diff_ptr += diff_stride; + pred_ptr += pred_stride; + src_ptr += src_stride; } } #if CONFIG_VP9_HIGHBITDEPTH -void vpx_highbd_subtract_block_c(int rows, int cols, int16_t *diff, - ptrdiff_t diff_stride, const uint8_t *src8, - ptrdiff_t src_stride, const uint8_t *pred8, +void vpx_highbd_subtract_block_c(int rows, int cols, int16_t *diff_ptr, + ptrdiff_t diff_stride, const uint8_t *src8_ptr, + ptrdiff_t src_stride, const uint8_t *pred8_ptr, ptrdiff_t pred_stride, int bd) { int r, c; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); + uint16_t *src = CONVERT_TO_SHORTPTR(src8_ptr); + uint16_t *pred = CONVERT_TO_SHORTPTR(pred8_ptr); (void)bd; for (r = 0; r < rows; r++) { for (c = 0; c < cols; c++) { - diff[c] = src[c] - pred[c]; + diff_ptr[c] = src[c] - pred[c]; } - diff += diff_stride; + diff_ptr += diff_stride; pred += pred_stride; src += src_stride; } diff --git a/media/libvpx/libvpx/vpx_dsp/sum_squares.c b/media/libvpx/libvpx/vpx_dsp/sum_squares.c index 7c535ac2db60..b80cd588e42d 100644 --- a/media/libvpx/libvpx/vpx_dsp/sum_squares.c +++ b/media/libvpx/libvpx/vpx_dsp/sum_squares.c @@ -10,8 +10,7 @@ #include "./vpx_dsp_rtcd.h" -uint64_t vpx_sum_squares_2d_i16_c(const int16_t *src, int src_stride, - int size) { +uint64_t vpx_sum_squares_2d_i16_c(const int16_t *src, int stride, int size) { int r, c; uint64_t ss = 0; @@ -20,7 +19,7 @@ uint64_t vpx_sum_squares_2d_i16_c(const int16_t *src, int src_stride, const int16_t v = src[c]; ss += v * v; } - src += src_stride; + src += stride; } return ss; diff --git a/media/libvpx/libvpx/vpx_dsp/txfm_common.h b/media/libvpx/libvpx/vpx_dsp/txfm_common.h index d01d7085a274..25f4fdb32764 100644 --- a/media/libvpx/libvpx/vpx_dsp/txfm_common.h +++ b/media/libvpx/libvpx/vpx_dsp/txfm_common.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_TXFM_COMMON_H_ -#define VPX_DSP_TXFM_COMMON_H_ +#ifndef VPX_VPX_DSP_TXFM_COMMON_H_ +#define VPX_VPX_DSP_TXFM_COMMON_H_ #include "vpx_dsp/vpx_dsp_common.h" @@ -63,4 +63,4 @@ static const tran_coef_t sinpi_2_9 = 9929; static const tran_coef_t sinpi_3_9 = 13377; static const tran_coef_t sinpi_4_9 = 15212; -#endif // VPX_DSP_TXFM_COMMON_H_ +#endif // VPX_VPX_DSP_TXFM_COMMON_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/variance.c b/media/libvpx/libvpx/vpx_dsp/variance.c index 93bd8f30de02..30b55dcb4073 100644 --- a/media/libvpx/libvpx/vpx_dsp/variance.c +++ b/media/libvpx/libvpx/vpx_dsp/variance.c @@ -21,36 +21,37 @@ static const uint8_t bilinear_filters[8][2] = { { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 }, }; -uint32_t vpx_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b, - int b_stride) { +uint32_t vpx_get4x4sse_cs_c(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride) { int distortion = 0; int r, c; for (r = 0; r < 4; ++r) { for (c = 0; c < 4; ++c) { - int diff = a[c] - b[c]; + int diff = src_ptr[c] - ref_ptr[c]; distortion += diff * diff; } - a += a_stride; - b += b_stride; + src_ptr += src_stride; + ref_ptr += ref_stride; } return distortion; } -uint32_t vpx_get_mb_ss_c(const int16_t *a) { +uint32_t vpx_get_mb_ss_c(const int16_t *src_ptr) { unsigned int i, sum = 0; for (i = 0; i < 256; ++i) { - sum += a[i] * a[i]; + sum += src_ptr[i] * src_ptr[i]; } return sum; } -static void variance(const uint8_t *a, int a_stride, const uint8_t *b, - int b_stride, int w, int h, uint32_t *sse, int *sum) { +static void variance(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, int w, int h, + uint32_t *sse, int *sum) { int i, j; *sum = 0; @@ -58,13 +59,13 @@ static void variance(const uint8_t *a, int a_stride, const uint8_t *b, for (i = 0; i < h; ++i) { for (j = 0; j < w; ++j) { - const int diff = a[j] - b[j]; + const int diff = src_ptr[j] - ref_ptr[j]; *sum += diff; *sse += diff * diff; } - a += a_stride; - b += b_stride; + src_ptr += src_stride; + ref_ptr += ref_stride; } } @@ -76,24 +77,23 @@ static void variance(const uint8_t *a, int a_stride, const uint8_t *b, // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride). // It defines the offset required to move from one input to the next. -static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b, - unsigned int src_pixels_per_line, - int pixel_step, - unsigned int output_height, - unsigned int output_width, - const uint8_t *filter) { +static void var_filter_block2d_bil_first_pass( + const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line, + int pixel_step, unsigned int output_height, unsigned int output_width, + const uint8_t *filter) { unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { - b[j] = ROUND_POWER_OF_TWO( - (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS); + ref_ptr[j] = ROUND_POWER_OF_TWO( + (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], + FILTER_BITS); - ++a; + ++src_ptr; } - a += src_pixels_per_line - output_width; - b += output_width; + src_ptr += src_pixels_per_line - output_width; + ref_ptr += output_width; } } @@ -106,91 +106,90 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b, // filter is applied horizontally (pixel_step = 1) or vertically // (pixel_step = stride). It defines the offset required to move from one input // to the next. Output is 8-bit. -static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const uint8_t *filter) { +static void var_filter_block2d_bil_second_pass( + const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line, + unsigned int pixel_step, unsigned int output_height, + unsigned int output_width, const uint8_t *filter) { unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { - b[j] = ROUND_POWER_OF_TWO( - (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS); - ++a; + ref_ptr[j] = ROUND_POWER_OF_TWO( + (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], + FILTER_BITS); + ++src_ptr; } - a += src_pixels_per_line - output_width; - b += output_width; + src_ptr += src_pixels_per_line - output_width; + ref_ptr += output_width; } } -#define VAR(W, H) \ - uint32_t vpx_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - uint32_t *sse) { \ - int sum; \ - variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ +#define VAR(W, H) \ + uint32_t vpx_variance##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + int sum; \ + variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ + return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ } -#define SUBPIX_VAR(W, H) \ - uint32_t vpx_sub_pixel_variance##W##x##H##_c( \ - const uint8_t *a, int a_stride, int xoffset, int yoffset, \ - const uint8_t *b, int b_stride, uint32_t *sse) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint8_t temp2[H * W]; \ - \ - var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \ - bilinear_filters[xoffset]); \ - var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters[yoffset]); \ - \ - return vpx_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \ +#define SUBPIX_VAR(W, H) \ + uint32_t vpx_sub_pixel_variance##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint8_t temp2[H * W]; \ + \ + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters[x_offset]); \ + var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters[y_offset]); \ + \ + return vpx_variance##W##x##H##_c(temp2, W, ref_ptr, ref_stride, sse); \ } -#define SUBPIX_AVG_VAR(W, H) \ - uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c( \ - const uint8_t *a, int a_stride, int xoffset, int yoffset, \ - const uint8_t *b, int b_stride, uint32_t *sse, \ - const uint8_t *second_pred) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint8_t temp2[H * W]; \ - DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ - \ - var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \ - bilinear_filters[xoffset]); \ - var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters[yoffset]); \ - \ - vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \ - \ - return vpx_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \ +#define SUBPIX_AVG_VAR(W, H) \ + uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ + const uint8_t *second_pred) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint8_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ + \ + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters[x_offset]); \ + var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters[y_offset]); \ + \ + vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \ + \ + return vpx_variance##W##x##H##_c(temp3, W, ref_ptr, ref_stride, sse); \ } /* Identical to the variance call except it takes an additional parameter, sum, * and returns that value using pass-by-reference instead of returning * sse - sum^2 / w*h */ -#define GET_VAR(W, H) \ - void vpx_get##W##x##H##var_c(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, uint32_t *sse, \ - int *sum) { \ - variance(a, a_stride, b, b_stride, W, H, sse, sum); \ +#define GET_VAR(W, H) \ + void vpx_get##W##x##H##var_c(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse, int *sum) { \ + variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \ } /* Identical to the variance call except it does not calculate the * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in * variable. */ -#define MSE(W, H) \ - uint32_t vpx_mse##W##x##H##_c(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - uint32_t *sse) { \ - int sum; \ - variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse; \ +#define MSE(W, H) \ + uint32_t vpx_mse##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + int sum; \ + variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ + return *sse; \ } /* All three forms of the variance are available in the same sizes. */ @@ -237,128 +236,140 @@ void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, } #if CONFIG_VP9_HIGHBITDEPTH -static void highbd_variance64(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, int w, int h, - uint64_t *sse, int64_t *sum) { +static void highbd_variance64(const uint8_t *src8_ptr, int src_stride, + const uint8_t *ref8_ptr, int ref_stride, int w, + int h, uint64_t *sse, int64_t *sum) { int i, j; - uint16_t *a = CONVERT_TO_SHORTPTR(a8); - uint16_t *b = CONVERT_TO_SHORTPTR(b8); + uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8_ptr); + uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(ref8_ptr); *sum = 0; *sse = 0; for (i = 0; i < h; ++i) { for (j = 0; j < w; ++j) { - const int diff = a[j] - b[j]; + const int diff = src_ptr[j] - ref_ptr[j]; *sum += diff; *sse += diff * diff; } - a += a_stride; - b += b_stride; + src_ptr += src_stride; + ref_ptr += ref_stride; } } -static void highbd_8_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, int w, int h, - uint32_t *sse, int *sum) { +static void highbd_8_variance(const uint8_t *src8_ptr, int src_stride, + const uint8_t *ref8_ptr, int ref_stride, int w, + int h, uint32_t *sse, int *sum) { uint64_t sse_long = 0; int64_t sum_long = 0; - highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); + highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long, + &sum_long); *sse = (uint32_t)sse_long; *sum = (int)sum_long; } -static void highbd_10_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, int w, int h, - uint32_t *sse, int *sum) { +static void highbd_10_variance(const uint8_t *src8_ptr, int src_stride, + const uint8_t *ref8_ptr, int ref_stride, int w, + int h, uint32_t *sse, int *sum) { uint64_t sse_long = 0; int64_t sum_long = 0; - highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); + highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long, + &sum_long); *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4); *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2); } -static void highbd_12_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, int w, int h, - uint32_t *sse, int *sum) { +static void highbd_12_variance(const uint8_t *src8_ptr, int src_stride, + const uint8_t *ref8_ptr, int ref_stride, int w, + int h, uint32_t *sse, int *sum) { uint64_t sse_long = 0; int64_t sum_long = 0; - highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); + highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long, + &sum_long); *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8); *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4); } -#define HIGHBD_VAR(W, H) \ - uint32_t vpx_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - uint32_t *sse) { \ - int sum; \ - highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ - } \ - \ - uint32_t vpx_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - uint32_t *sse) { \ - int sum; \ - int64_t var; \ - highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ - return (var >= 0) ? (uint32_t)var : 0; \ - } \ - \ - uint32_t vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ - const uint8_t *b, int b_stride, \ - uint32_t *sse) { \ - int sum; \ - int64_t var; \ - highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ - var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ - return (var >= 0) ? (uint32_t)var : 0; \ +#define HIGHBD_VAR(W, H) \ + uint32_t vpx_highbd_8_variance##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ + int ref_stride, uint32_t *sse) { \ + int sum; \ + highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ + &sum); \ + return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ + } \ + \ + uint32_t vpx_highbd_10_variance##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ + int ref_stride, uint32_t *sse) { \ + int sum; \ + int64_t var; \ + highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ + &sum); \ + var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ + return (var >= 0) ? (uint32_t)var : 0; \ + } \ + \ + uint32_t vpx_highbd_12_variance##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ + int ref_stride, uint32_t *sse) { \ + int sum; \ + int64_t var; \ + highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ + &sum); \ + var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ + return (var >= 0) ? (uint32_t)var : 0; \ } -#define HIGHBD_GET_VAR(S) \ - void vpx_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - uint32_t *sse, int *sum) { \ - highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ - } \ - \ - void vpx_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - uint32_t *sse, int *sum) { \ - highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ - } \ - \ - void vpx_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - uint32_t *sse, int *sum) { \ - highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ +#define HIGHBD_GET_VAR(S) \ + void vpx_highbd_8_get##S##x##S##var_c( \ + const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ + int ref_stride, uint32_t *sse, int *sum) { \ + highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \ + sum); \ + } \ + \ + void vpx_highbd_10_get##S##x##S##var_c( \ + const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ + int ref_stride, uint32_t *sse, int *sum) { \ + highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \ + sum); \ + } \ + \ + void vpx_highbd_12_get##S##x##S##var_c( \ + const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ + int ref_stride, uint32_t *sse, int *sum) { \ + highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \ + sum); \ } -#define HIGHBD_MSE(W, H) \ - uint32_t vpx_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - uint32_t *sse) { \ - int sum; \ - highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ - return *sse; \ - } \ - \ - uint32_t vpx_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - uint32_t *sse) { \ - int sum; \ - highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ - return *sse; \ - } \ - \ - uint32_t vpx_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - uint32_t *sse) { \ - int sum; \ - highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ - return *sse; \ +#define HIGHBD_MSE(W, H) \ + uint32_t vpx_highbd_8_mse##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ + int ref_stride, uint32_t *sse) { \ + int sum; \ + highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ + &sum); \ + return *sse; \ + } \ + \ + uint32_t vpx_highbd_10_mse##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ + int ref_stride, uint32_t *sse) { \ + int sum; \ + highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ + &sum); \ + return *sse; \ + } \ + \ + uint32_t vpx_highbd_12_mse##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ + int ref_stride, uint32_t *sse) { \ + int sum; \ + highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ + &sum); \ + return *sse; \ } static void highbd_var_filter_block2d_bil_first_pass( @@ -403,111 +414,111 @@ static void highbd_var_filter_block2d_bil_second_pass( } } -#define HIGHBD_SUBPIX_VAR(W, H) \ - uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, uint32_t *sse) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - \ - highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters[yoffset]); \ - \ - return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ - dst, dst_stride, sse); \ - } \ - \ - uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, uint32_t *sse) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - \ - highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters[yoffset]); \ - \ - return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ - dst, dst_stride, sse); \ - } \ - \ - uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, uint32_t *sse) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - \ - highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters[yoffset]); \ - \ - return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ - dst, dst_stride, sse); \ +#define HIGHBD_SUBPIX_VAR(W, H) \ + uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + \ + highbd_var_filter_block2d_bil_first_pass( \ + src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ + highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters[y_offset]); \ + \ + return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ + ref_ptr, ref_stride, sse); \ + } \ + \ + uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + \ + highbd_var_filter_block2d_bil_first_pass( \ + src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ + highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters[y_offset]); \ + \ + return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ + ref_ptr, ref_stride, sse); \ + } \ + \ + uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + \ + highbd_var_filter_block2d_bil_first_pass( \ + src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ + highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters[y_offset]); \ + \ + return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ + ref_ptr, ref_stride, sse); \ } -#define HIGHBD_SUBPIX_AVG_VAR(W, H) \ - uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, uint32_t *sse, \ - const uint8_t *second_pred) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ - \ - highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters[yoffset]); \ - \ - vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \ - CONVERT_TO_BYTEPTR(temp2), W); \ - \ - return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ - dst, dst_stride, sse); \ - } \ - \ - uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, uint32_t *sse, \ - const uint8_t *second_pred) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ - \ - highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters[yoffset]); \ - \ - vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \ - CONVERT_TO_BYTEPTR(temp2), W); \ - \ - return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ - dst, dst_stride, sse); \ - } \ - \ - uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ - const uint8_t *src, int src_stride, int xoffset, int yoffset, \ - const uint8_t *dst, int dst_stride, uint32_t *sse, \ - const uint8_t *second_pred) { \ - uint16_t fdata3[(H + 1) * W]; \ - uint16_t temp2[H * W]; \ - DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ - \ - highbd_var_filter_block2d_bil_first_pass( \ - src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \ - highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ - bilinear_filters[yoffset]); \ - \ - vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \ - CONVERT_TO_BYTEPTR(temp2), W); \ - \ - return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ - dst, dst_stride, sse); \ +#define HIGHBD_SUBPIX_AVG_VAR(W, H) \ + uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ + const uint8_t *second_pred) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ + \ + highbd_var_filter_block2d_bil_first_pass( \ + src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ + highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters[y_offset]); \ + \ + vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \ + temp2, W); \ + \ + return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ + ref_ptr, ref_stride, sse); \ + } \ + \ + uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ + const uint8_t *second_pred) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ + \ + highbd_var_filter_block2d_bil_first_pass( \ + src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ + highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters[y_offset]); \ + \ + vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \ + temp2, W); \ + \ + return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ + ref_ptr, ref_stride, sse); \ + } \ + \ + uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ + const uint8_t *second_pred) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ + \ + highbd_var_filter_block2d_bil_first_pass( \ + src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ + highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters[y_offset]); \ + \ + vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \ + temp2, W); \ + \ + return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ + ref_ptr, ref_stride, sse); \ } /* All three forms of the variance are available in the same sizes. */ @@ -538,12 +549,10 @@ HIGHBD_MSE(16, 8) HIGHBD_MSE(8, 16) HIGHBD_MSE(8, 8) -void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8, - int width, int height, const uint8_t *ref8, +void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint16_t *pred, + int width, int height, const uint16_t *ref, int ref_stride) { int i, j; - uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { const int tmp = pred[j] + ref[j]; diff --git a/media/libvpx/libvpx/vpx_dsp/variance.h b/media/libvpx/libvpx/vpx_dsp/variance.h index 100573299b9f..f8b44f03d1fb 100644 --- a/media/libvpx/libvpx/vpx_dsp/variance.h +++ b/media/libvpx/libvpx/vpx_dsp/variance.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_VARIANCE_H_ -#define VPX_DSP_VARIANCE_H_ +#ifndef VPX_VPX_DSP_VARIANCE_H_ +#define VPX_VPX_DSP_VARIANCE_H_ #include "./vpx_config.h" @@ -22,37 +22,38 @@ extern "C" { #define FILTER_BITS 7 #define FILTER_WEIGHT 128 -typedef unsigned int (*vpx_sad_fn_t)(const uint8_t *a, int a_stride, - const uint8_t *b_ptr, int b_stride); +typedef unsigned int (*vpx_sad_fn_t)(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride); -typedef unsigned int (*vpx_sad_avg_fn_t)(const uint8_t *a_ptr, int a_stride, - const uint8_t *b_ptr, int b_stride, +typedef unsigned int (*vpx_sad_avg_fn_t)(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); -typedef void (*vp8_copy32xn_fn_t)(const uint8_t *a, int a_stride, uint8_t *b, - int b_stride, int n); +typedef void (*vp8_copy32xn_fn_t)(const uint8_t *src_ptr, int src_stride, + uint8_t *ref_ptr, int ref_stride, int n); -typedef void (*vpx_sad_multi_fn_t)(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, +typedef void (*vpx_sad_multi_fn_t)(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); -typedef void (*vpx_sad_multi_d_fn_t)(const uint8_t *a, int a_stride, +typedef void (*vpx_sad_multi_d_fn_t)(const uint8_t *src_ptr, int src_stride, const uint8_t *const b_array[], - int b_stride, unsigned int *sad_array); + int ref_stride, unsigned int *sad_array); -typedef unsigned int (*vpx_variance_fn_t)(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sse); +typedef unsigned int (*vpx_variance_fn_t)(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, unsigned int *sse); -typedef unsigned int (*vpx_subpixvariance_fn_t)(const uint8_t *a, int a_stride, - int xoffset, int yoffset, - const uint8_t *b, int b_stride, - unsigned int *sse); +typedef unsigned int (*vpx_subpixvariance_fn_t)( + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); typedef unsigned int (*vpx_subp_avg_variance_fn_t)( - const uint8_t *a_ptr, int a_stride, int xoffset, int yoffset, - const uint8_t *b_ptr, int b_stride, unsigned int *sse, + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); + #if CONFIG_VP8 typedef struct variance_vtable { vpx_sad_fn_t sdf; @@ -61,7 +62,7 @@ typedef struct variance_vtable { vpx_sad_multi_fn_t sdx3f; vpx_sad_multi_fn_t sdx8f; vpx_sad_multi_d_fn_t sdx4df; -#if ARCH_X86 || ARCH_X86_64 +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 vp8_copy32xn_fn_t copymem; #endif } vp8_variance_fn_ptr_t; @@ -75,6 +76,7 @@ typedef struct vp9_variance_vtable { vpx_subpixvariance_fn_t svf; vpx_subp_avg_variance_fn_t svaf; vpx_sad_multi_d_fn_t sdx4df; + vpx_sad_multi_fn_t sdx8f; } vp9_variance_fn_ptr_t; #endif // CONFIG_VP9 @@ -82,4 +84,4 @@ typedef struct vp9_variance_vtable { } // extern "C" #endif -#endif // VPX_DSP_VARIANCE_H_ +#endif // VPX_VPX_DSP_VARIANCE_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/vpx_convolve.h b/media/libvpx/libvpx/vpx_dsp/vpx_convolve.h index 7979268a9548..d5793e17ad5f 100644 --- a/media/libvpx/libvpx/vpx_dsp/vpx_convolve.h +++ b/media/libvpx/libvpx/vpx_dsp/vpx_convolve.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_VPX_CONVOLVE_H_ -#define VPX_DSP_VPX_CONVOLVE_H_ +#ifndef VPX_VPX_DSP_VPX_CONVOLVE_H_ +#define VPX_VPX_DSP_VPX_CONVOLVE_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" @@ -35,4 +35,4 @@ typedef void (*highbd_convolve_fn_t)(const uint16_t *src, ptrdiff_t src_stride, } // extern "C" #endif -#endif // VPX_DSP_VPX_CONVOLVE_H_ +#endif // VPX_VPX_DSP_VPX_CONVOLVE_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/vpx_dsp.mk b/media/libvpx/libvpx/vpx_dsp/vpx_dsp.mk index 126a637fac12..028354a98d8c 100644 --- a/media/libvpx/libvpx/vpx_dsp/vpx_dsp.mk +++ b/media/libvpx/libvpx/vpx_dsp/vpx_dsp.mk @@ -47,13 +47,11 @@ endif # intra predictions DSP_SRCS-yes += intrapred.c -DSP_SRCS-$(HAVE_SSE) += x86/intrapred_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/intrapred_sse2.asm DSP_SRCS-$(HAVE_SSSE3) += x86/intrapred_ssse3.asm DSP_SRCS-$(HAVE_VSX) += ppc/intrapred_vsx.c ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) -DSP_SRCS-$(HAVE_SSE) += x86/highbd_intrapred_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_intrin_sse2.c DSP_SRCS-$(HAVE_SSSE3) += x86/highbd_intrapred_intrin_ssse3.c @@ -69,6 +67,8 @@ DSP_SRCS-$(HAVE_MSA) += mips/deblock_msa.c DSP_SRCS-$(HAVE_NEON) += arm/deblock_neon.c DSP_SRCS-$(HAVE_SSE2) += x86/add_noise_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/deblock_sse2.asm +DSP_SRCS-$(HAVE_SSE2) += x86/post_proc_sse2.c +DSP_SRCS-$(HAVE_VSX) += ppc/deblock_vsx.c endif # CONFIG_POSTPROC DSP_SRCS-$(HAVE_NEON_ASM) += arm/intrapred_neon_asm$(ASM) @@ -81,16 +81,19 @@ DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred16_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.h DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.c +DSP_SRCS-yes += vpx_filter.h +ifeq ($(CONFIG_VP9),yes) # interpolation filters DSP_SRCS-yes += vpx_convolve.c DSP_SRCS-yes += vpx_convolve.h -DSP_SRCS-yes += vpx_filter.h -DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/convolve.h -DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/vpx_asm_stubs.c +DSP_SRCS-$(VPX_ARCH_X86)$(VPX_ARCH_X86_64) += x86/convolve.h + +DSP_SRCS-$(HAVE_SSE2) += x86/convolve_sse2.h DSP_SRCS-$(HAVE_SSSE3) += x86/convolve_ssse3.h DSP_SRCS-$(HAVE_AVX2) += x86/convolve_avx2.h DSP_SRCS-$(HAVE_SSE2) += x86/vpx_subpixel_8t_sse2.asm +DSP_SRCS-$(HAVE_SSE2) += x86/vpx_subpixel_4t_intrin_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/vpx_subpixel_bilinear_sse2.asm DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_8t_ssse3.asm DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_bilinear_ssse3.asm @@ -111,9 +114,17 @@ DSP_SRCS-$(HAVE_NEON) += arm/vpx_scaled_convolve8_neon.c ifeq ($(HAVE_NEON_ASM),yes) DSP_SRCS-yes += arm/vpx_convolve_copy_neon_asm$(ASM) -DSP_SRCS-yes += arm/vpx_convolve8_avg_neon_asm$(ASM) -DSP_SRCS-yes += arm/vpx_convolve8_neon_asm$(ASM) +DSP_SRCS-yes += arm/vpx_convolve8_horiz_filter_type2_neon$(ASM) +DSP_SRCS-yes += arm/vpx_convolve8_vert_filter_type2_neon$(ASM) +DSP_SRCS-yes += arm/vpx_convolve8_horiz_filter_type1_neon$(ASM) +DSP_SRCS-yes += arm/vpx_convolve8_vert_filter_type1_neon$(ASM) +DSP_SRCS-yes += arm/vpx_convolve8_avg_horiz_filter_type2_neon$(ASM) +DSP_SRCS-yes += arm/vpx_convolve8_avg_vert_filter_type2_neon$(ASM) +DSP_SRCS-yes += arm/vpx_convolve8_avg_horiz_filter_type1_neon$(ASM) +DSP_SRCS-yes += arm/vpx_convolve8_avg_vert_filter_type1_neon$(ASM) DSP_SRCS-yes += arm/vpx_convolve_avg_neon_asm$(ASM) +DSP_SRCS-yes += arm/vpx_convolve8_neon_asm.c +DSP_SRCS-yes += arm/vpx_convolve8_neon_asm.h DSP_SRCS-yes += arm/vpx_convolve_neon.c else ifeq ($(HAVE_NEON),yes) @@ -134,6 +145,7 @@ DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_vert_msa.c DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve_avg_msa.c DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve_copy_msa.c DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve_msa.h +DSP_SRCS-$(HAVE_MMI) += mips/vpx_convolve8_mmi.c # common (dspr2) DSP_SRCS-$(HAVE_DSPR2) += mips/convolve_common_dspr2.h @@ -153,8 +165,8 @@ DSP_SRCS-$(HAVE_VSX) += ppc/vpx_convolve_vsx.c # loop filters DSP_SRCS-yes += loopfilter.c -DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/loopfilter_intrin_sse2.c -DSP_SRCS-$(HAVE_AVX2) += x86/loopfilter_avx2.c +DSP_SRCS-$(HAVE_SSE2) += x86/loopfilter_intrin_sse2.c +DSP_SRCS-$(HAVE_AVX2) += x86/loopfilter_avx2.c ifeq ($(HAVE_NEON_ASM),yes) DSP_SRCS-yes += arm/loopfilter_16_neon$(ASM) @@ -180,6 +192,7 @@ ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_NEON) += arm/highbd_loopfilter_neon.c DSP_SRCS-$(HAVE_SSE2) += x86/highbd_loopfilter_sse2.c endif # CONFIG_VP9_HIGHBITDEPTH +endif # CONFIG_VP9 DSP_SRCS-yes += txfm_common.h DSP_SRCS-$(HAVE_SSE2) += x86/txfm_common_sse2.h @@ -192,7 +205,7 @@ DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.h DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_impl_sse2.h DSP_SRCS-$(HAVE_SSE2) += x86/fwd_dct32x32_impl_sse2.h -ifeq ($(ARCH_X86_64),yes) +ifeq ($(VPX_ARCH_X86_64),yes) DSP_SRCS-$(HAVE_SSSE3) += x86/fwd_txfm_ssse3_x86_64.asm endif DSP_SRCS-$(HAVE_AVX2) += x86/fwd_txfm_avx2.c @@ -204,7 +217,12 @@ DSP_SRCS-$(HAVE_NEON) += arm/fdct_partial_neon.c DSP_SRCS-$(HAVE_NEON) += arm/fwd_txfm_neon.c DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.h DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.c + +ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_MSA) += mips/fwd_dct32x32_msa.c +endif # !CONFIG_VP9_HIGHBITDEPTH + +DSP_SRCS-$(HAVE_VSX) += ppc/fdct32x32_vsx.c endif # CONFIG_VP9_ENCODER # inverse transform @@ -242,6 +260,7 @@ DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct32x32_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct32x32_34_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct32x32_135_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct32x32_1024_add_neon.c +DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct_neon.h DSP_SRCS-$(HAVE_SSE2) += x86/highbd_inv_txfm_sse2.h DSP_SRCS-$(HAVE_SSE2) += x86/highbd_idct4x4_add_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/highbd_idct8x8_add_sse2.c @@ -279,11 +298,13 @@ ifeq ($(CONFIG_VP9_ENCODER),yes) DSP_SRCS-yes += quantize.c DSP_SRCS-yes += quantize.h -DSP_SRCS-$(HAVE_SSE2) += x86/quantize_x86.h DSP_SRCS-$(HAVE_SSE2) += x86/quantize_sse2.c +DSP_SRCS-$(HAVE_SSE2) += x86/quantize_sse2.h DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3.c +DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3.h DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx.c DSP_SRCS-$(HAVE_NEON) += arm/quantize_neon.c +DSP_SRCS-$(HAVE_VSX) += ppc/quantize_vsx.c ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c endif @@ -295,7 +316,7 @@ DSP_SRCS-$(HAVE_AVX2) += x86/avg_intrin_avx2.c DSP_SRCS-$(HAVE_NEON) += arm/avg_neon.c DSP_SRCS-$(HAVE_NEON) += arm/hadamard_neon.c DSP_SRCS-$(HAVE_MSA) += mips/avg_msa.c -ifeq ($(ARCH_X86_64),yes) +ifeq ($(VPX_ARCH_X86_64),yes) DSP_SRCS-$(HAVE_SSSE3) += x86/avg_ssse3_x86_64.asm endif DSP_SRCS-$(HAVE_VSX) += ppc/hadamard_vsx.c @@ -310,6 +331,7 @@ ifeq ($(CONFIG_ENCODERS),yes) DSP_SRCS-yes += sad.c DSP_SRCS-yes += subtract.c DSP_SRCS-yes += sum_squares.c +DSP_SRCS-$(HAVE_NEON) += arm/sum_squares_neon.c DSP_SRCS-$(HAVE_SSE2) += x86/sum_squares_sse2.c DSP_SRCS-$(HAVE_MSA) += mips/sum_squares_msa.c @@ -330,13 +352,12 @@ DSP_SRCS-$(HAVE_AVX2) += x86/sad4d_avx2.c DSP_SRCS-$(HAVE_AVX2) += x86/sad_avx2.c DSP_SRCS-$(HAVE_AVX512) += x86/sad4d_avx512.c -DSP_SRCS-$(HAVE_SSE) += x86/sad4d_sse2.asm -DSP_SRCS-$(HAVE_SSE) += x86/sad_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/sad4d_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/sad_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/subtract_sse2.asm DSP_SRCS-$(HAVE_VSX) += ppc/sad_vsx.c +DSP_SRCS-$(HAVE_VSX) += ppc/subtract_vsx.c ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad4d_sse2.asm @@ -358,17 +379,15 @@ DSP_SRCS-$(HAVE_MSA) += mips/sub_pixel_variance_msa.c DSP_SRCS-$(HAVE_MMI) += mips/variance_mmi.c -DSP_SRCS-$(HAVE_SSE) += x86/variance_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/avg_pred_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/variance_sse2.c # Contains SSE2 and SSSE3 DSP_SRCS-$(HAVE_AVX2) += x86/variance_avx2.c DSP_SRCS-$(HAVE_VSX) += ppc/variance_vsx.c -ifeq ($(ARCH_X86_64),yes) +ifeq ($(VPX_ARCH_X86_64),yes) DSP_SRCS-$(HAVE_SSE2) += x86/ssim_opt_x86_64.asm -endif # ARCH_X86_64 +endif # VPX_ARCH_X86_64 -DSP_SRCS-$(HAVE_SSE) += x86/subpel_variance_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/subpel_variance_sse2.asm # Contains SSE2 and SSSE3 ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) @@ -386,6 +405,7 @@ DSP_SRCS-$(HAVE_NEON) += arm/vpx_convolve8_neon.h # PPC VSX utilities DSP_SRCS-$(HAVE_VSX) += ppc/types_vsx.h +DSP_SRCS-$(HAVE_VSX) += ppc/txfm_common_vsx.h DSP_SRCS-$(HAVE_VSX) += ppc/transpose_vsx.h DSP_SRCS-$(HAVE_VSX) += ppc/bitdepth_conversion_vsx.h diff --git a/media/libvpx/libvpx/vpx_dsp/vpx_dsp_common.h b/media/libvpx/libvpx/vpx_dsp/vpx_dsp_common.h index c8c852374f5a..2de4495465e3 100644 --- a/media/libvpx/libvpx/vpx_dsp/vpx_dsp_common.h +++ b/media/libvpx/libvpx/vpx_dsp/vpx_dsp_common.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_VPX_DSP_COMMON_H_ -#define VPX_DSP_VPX_DSP_COMMON_H_ +#ifndef VPX_VPX_DSP_VPX_DSP_COMMON_H_ +#define VPX_VPX_DSP_VPX_DSP_COMMON_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" @@ -25,8 +25,8 @@ extern "C" { #define VPX_SWAP(type, a, b) \ do { \ type c = (b); \ - b = a; \ - a = c; \ + (b) = a; \ + (a) = c; \ } while (0) #if CONFIG_VP9_HIGHBITDEPTH @@ -57,6 +57,10 @@ static INLINE double fclamp(double value, double low, double high) { return value < low ? low : (value > high ? high : value); } +static INLINE int64_t lclamp(int64_t value, int64_t low, int64_t high) { + return value < low ? low : (value > high ? high : value); +} + static INLINE uint16_t clip_pixel_highbd(int val, int bd) { switch (bd) { case 8: @@ -70,4 +74,4 @@ static INLINE uint16_t clip_pixel_highbd(int val, int bd) { } // extern "C" #endif -#endif // VPX_DSP_VPX_DSP_COMMON_H_ +#endif // VPX_VPX_DSP_VPX_DSP_COMMON_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl b/media/libvpx/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl index 36848f8b0ba1..fd7eefdad053 100644 --- a/media/libvpx/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/media/libvpx/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -37,325 +37,333 @@ if ($opts{arch} eq "x86_64") { # Intra prediction # -add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d207_predictor_4x4 sse2/; -add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d45_predictor_4x4 neon sse2/; -add_proto qw/void vpx_d45e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d45e_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d63_predictor_4x4 ssse3/; -add_proto qw/void vpx_d63e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d63e_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_h_predictor_4x4 neon dspr2 msa sse2 vsx/; +add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; +# TODO(crbug.com/webm/1522): Re-enable vsx implementation. +specialize qw/vpx_h_predictor_4x4 neon dspr2 msa sse2/; -add_proto qw/void vpx_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d135_predictor_4x4 neon/; -add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d153_predictor_4x4 ssse3/; -add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_v_predictor_4x4 neon msa sse2/; -add_proto qw/void vpx_ve_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_ve_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa sse2 vsx/; +add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; +# TODO(crbug.com/webm/1522): Re-enable vsx implementation. +specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa sse2/; -add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon sse2/; -add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_top_predictor_4x4 msa neon sse2/; -add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_left_predictor_4x4 msa neon sse2/; -add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_128_predictor_4x4 msa neon sse2/; -add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d207_predictor_8x8 ssse3/; -add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d45_predictor_8x8 neon sse2 vsx/; +add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; +# TODO(crbug.com/webm/1522): Re-enable vsx implementation. +specialize qw/vpx_d45_predictor_8x8 neon sse2/; -add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d63_predictor_8x8 ssse3 vsx/; +add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; +# TODO(crbug.com/webm/1522): Re-enable vsx implementation. +specialize qw/vpx_d63_predictor_8x8 ssse3/; -add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_h_predictor_8x8 neon dspr2 msa sse2 vsx/; +add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; +# TODO(crbug.com/webm/1522): Re-enable vsx implementation. +specialize qw/vpx_h_predictor_8x8 neon dspr2 msa sse2/; -add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d135_predictor_8x8 neon/; -add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d153_predictor_8x8 ssse3/; -add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_v_predictor_8x8 neon msa sse2/; -add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa sse2 vsx/; +add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; +# TODO(crbug.com/webm/1522): Re-enable vsx implementation. +specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa sse2/; -add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa sse2 vsx/; +add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; +# TODO(crbug.com/webm/1522): Re-enable vsx implementation. +specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa sse2/; -add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_top_predictor_8x8 neon msa sse2/; -add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_left_predictor_8x8 neon msa sse2/; -add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_128_predictor_8x8 neon msa sse2/; -add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d207_predictor_16x16 ssse3/; -add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d45_predictor_16x16 neon ssse3 vsx/; -add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d63_predictor_16x16 ssse3 vsx/; -add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_h_predictor_16x16 neon dspr2 msa sse2 vsx/; -add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d135_predictor_16x16 neon/; -add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d153_predictor_16x16 ssse3/; -add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_v_predictor_16x16 neon msa sse2 vsx/; -add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_tm_predictor_16x16 neon msa sse2 vsx/; -add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa sse2 vsx/; -add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_top_predictor_16x16 neon msa sse2 vsx/; -add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_left_predictor_16x16 neon msa sse2 vsx/; -add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_128_predictor_16x16 neon msa sse2 vsx/; -add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d207_predictor_32x32 ssse3/; -add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d45_predictor_32x32 neon ssse3 vsx/; -add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d63_predictor_32x32 ssse3 vsx/; -add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_h_predictor_32x32 neon msa sse2 vsx/; -add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d135_predictor_32x32 neon/; -add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d153_predictor_32x32 ssse3/; -add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_v_predictor_32x32 neon msa sse2 vsx/; -add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_tm_predictor_32x32 neon msa sse2 vsx/; -add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_predictor_32x32 msa neon sse2 vsx/; -add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_top_predictor_32x32 msa neon sse2 vsx/; -add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_left_predictor_32x32 msa neon sse2 vsx/; -add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_dc_128_predictor_32x32 msa neon sse2 vsx/; # High bitdepth functions if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { - add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d207_predictor_4x4 sse2/; - add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d45_predictor_4x4 neon ssse3/; - add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d63_predictor_4x4 sse2/; - add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_h_predictor_4x4 neon sse2/; - add_proto qw/void vpx_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d117_predictor_4x4 sse2/; - add_proto qw/void vpx_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d135_predictor_4x4 neon sse2/; - add_proto qw/void vpx_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d153_predictor_4x4 sse2/; - add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_v_predictor_4x4 neon sse2/; - add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_tm_predictor_4x4 neon sse2/; - add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_predictor_4x4 neon sse2/; - add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_top_predictor_4x4 neon sse2/; - add_proto qw/void vpx_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_left_predictor_4x4 neon sse2/; - add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_128_predictor_4x4 neon sse2/; - add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d207_predictor_8x8 ssse3/; - add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d45_predictor_8x8 neon ssse3/; - add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d63_predictor_8x8 ssse3/; - add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_h_predictor_8x8 neon sse2/; - add_proto qw/void vpx_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d117_predictor_8x8 ssse3/; - add_proto qw/void vpx_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d135_predictor_8x8 neon ssse3/; - add_proto qw/void vpx_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d153_predictor_8x8 ssse3/; - add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_v_predictor_8x8 neon sse2/; - add_proto qw/void vpx_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_tm_predictor_8x8 neon sse2/; - add_proto qw/void vpx_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_predictor_8x8 neon sse2/; - add_proto qw/void vpx_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_top_predictor_8x8 neon sse2/; - add_proto qw/void vpx_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_left_predictor_8x8 neon sse2/; - add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_128_predictor_8x8 neon sse2/; - add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d207_predictor_16x16 ssse3/; - add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d45_predictor_16x16 neon ssse3/; - add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d63_predictor_16x16 ssse3/; - add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_h_predictor_16x16 neon sse2/; - add_proto qw/void vpx_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d117_predictor_16x16 ssse3/; - add_proto qw/void vpx_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d135_predictor_16x16 neon ssse3/; - add_proto qw/void vpx_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d153_predictor_16x16 ssse3/; - add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_v_predictor_16x16 neon sse2/; - add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_tm_predictor_16x16 neon sse2/; - add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_predictor_16x16 neon sse2/; - add_proto qw/void vpx_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_top_predictor_16x16 neon sse2/; - add_proto qw/void vpx_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_left_predictor_16x16 neon sse2/; - add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_128_predictor_16x16 neon sse2/; - add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d207_predictor_32x32 ssse3/; - add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d45_predictor_32x32 neon ssse3/; - add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d63_predictor_32x32 ssse3/; - add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_h_predictor_32x32 neon sse2/; - add_proto qw/void vpx_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d117_predictor_32x32 ssse3/; - add_proto qw/void vpx_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d135_predictor_32x32 neon ssse3/; - add_proto qw/void vpx_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d153_predictor_32x32 ssse3/; - add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_v_predictor_32x32 neon sse2/; - add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_tm_predictor_32x32 neon sse2/; - add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_predictor_32x32 neon sse2/; - add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_top_predictor_32x32 neon sse2/; - add_proto qw/void vpx_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_left_predictor_32x32 neon sse2/; - add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; + add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_dc_128_predictor_32x32 neon sse2/; } # CONFIG_VP9_HIGHBITDEPTH +if (vpx_config("CONFIG_VP9") eq "yes") { # # Sub Pixel Filters # @@ -363,25 +371,25 @@ add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, specialize qw/vpx_convolve_copy neon dspr2 msa sse2 vsx/; add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve_avg neon dspr2 msa sse2 vsx/; +specialize qw/vpx_convolve_avg neon dspr2 msa sse2 vsx mmi/; add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa vsx/; +specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa vsx mmi/; add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon dspr2 msa vsx/; +specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon dspr2 msa vsx mmi/; add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon dspr2 msa vsx/; +specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon dspr2 msa vsx mmi/; add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve8_avg sse2 ssse3 avx2 neon dspr2 msa vsx/; +specialize qw/vpx_convolve8_avg sse2 ssse3 avx2 neon dspr2 msa vsx mmi/; add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 avx2 neon dspr2 msa vsx/; +specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 avx2 neon dspr2 msa vsx mmi/; add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve8_avg_vert sse2 ssse3 avx2 neon dspr2 msa vsx/; +specialize qw/vpx_convolve8_avg_vert sse2 ssse3 avx2 neon dspr2 msa vsx mmi/; add_proto qw/void vpx_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; specialize qw/vpx_scaled_2d ssse3 neon msa/; @@ -395,36 +403,38 @@ add_proto qw/void vpx_scaled_avg_2d/, "const uint8_t *src, ptrdiff_t src_stride, add_proto qw/void vpx_scaled_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; add_proto qw/void vpx_scaled_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; +} #CONFIG_VP9 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # # Sub Pixel Filters # - add_proto qw/void vpx_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps"; + add_proto qw/void vpx_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd"; specialize qw/vpx_highbd_convolve_copy sse2 avx2 neon/; - add_proto qw/void vpx_highbd_convolve_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps"; + add_proto qw/void vpx_highbd_convolve_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd"; specialize qw/vpx_highbd_convolve_avg sse2 avx2 neon/; - add_proto qw/void vpx_highbd_convolve8/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps"; + add_proto qw/void vpx_highbd_convolve8/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd"; specialize qw/vpx_highbd_convolve8 avx2 neon/, "$sse2_x86_64"; - add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps"; + add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd"; specialize qw/vpx_highbd_convolve8_horiz avx2 neon/, "$sse2_x86_64"; - add_proto qw/void vpx_highbd_convolve8_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps"; + add_proto qw/void vpx_highbd_convolve8_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd"; specialize qw/vpx_highbd_convolve8_vert avx2 neon/, "$sse2_x86_64"; - add_proto qw/void vpx_highbd_convolve8_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps"; + add_proto qw/void vpx_highbd_convolve8_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd"; specialize qw/vpx_highbd_convolve8_avg avx2 neon/, "$sse2_x86_64"; - add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps"; + add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd"; specialize qw/vpx_highbd_convolve8_avg_horiz avx2 neon/, "$sse2_x86_64"; - add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps"; + add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd"; specialize qw/vpx_highbd_convolve8_avg_vert avx2 neon/, "$sse2_x86_64"; } # CONFIG_VP9_HIGHBITDEPTH +if (vpx_config("CONFIG_VP9") eq "yes") { # # Loopfilter # @@ -463,6 +473,7 @@ specialize qw/vpx_lpf_horizontal_4 sse2 neon dspr2 msa/; add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; specialize qw/vpx_lpf_horizontal_4_dual sse2 neon dspr2 msa/; +} #CONFIG_VP9 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; @@ -583,7 +594,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_fdct32x32 neon sse2 avx2 msa/; add_proto qw/void vpx_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride"; - specialize qw/vpx_fdct32x32_rd sse2 avx2 neon msa/; + specialize qw/vpx_fdct32x32_rd sse2 avx2 neon msa vsx/; add_proto qw/void vpx_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride"; specialize qw/vpx_fdct32x32_1 sse2 neon msa/; @@ -626,6 +637,7 @@ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/; specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/; specialize qw/vpx_idct32x32_1_add neon sse2/; + specialize qw/vpx_iwht4x4_16_add sse2 vsx/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") { # Note that these specializations are appended to the above ones. @@ -646,7 +658,7 @@ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { $vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa; specialize qw/vpx_idct32x32_34_add dspr2 msa/; specialize qw/vpx_idct32x32_1_add dspr2 msa/; - specialize qw/vpx_iwht4x4_16_add msa sse2/; + specialize qw/vpx_iwht4x4_16_add msa/; specialize qw/vpx_iwht4x4_1_add msa/; } # !CONFIG_VP9_HIGHBITDEPTH } # !CONFIG_EMULATE_HARDWARE @@ -654,7 +666,6 @@ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Note as optimized versions of these functions are added we need to add a check to ensure # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only. - specialize qw/vpx_iwht4x4_16_add sse2/; add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; @@ -699,10 +710,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_quantize_b neon sse2 ssse3 avx/; + specialize qw/vpx_quantize_b neon sse2 ssse3 avx vsx/; add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_quantize_b_32x32 neon ssse3 avx/; + specialize qw/vpx_quantize_b_32x32 neon ssse3 avx vsx/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; @@ -718,7 +729,7 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes") { # Block subtraction # add_proto qw/void vpx_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; -specialize qw/vpx_subtract_block neon msa mmi sse2/; +specialize qw/vpx_subtract_block neon msa mmi sse2 vsx/; # # Single block SAD @@ -748,13 +759,13 @@ add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, specialize qw/vpx_sad16x8 neon msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad8x16 neon msa sse2 mmi/; +specialize qw/vpx_sad8x16 neon msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad8x8 neon msa sse2 mmi/; +specialize qw/vpx_sad8x8 neon msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad8x4 neon msa sse2 mmi/; +specialize qw/vpx_sad8x4 neon msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vpx_sad4x8 neon msa sse2 mmi/; @@ -782,8 +793,23 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; specialize qw/vpx_hadamard_16x16 avx2 sse2 neon vsx/; + add_proto qw/void vpx_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; + specialize qw/vpx_hadamard_32x32 sse2 avx2/; + + add_proto qw/void vpx_highbd_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; + specialize qw/vpx_highbd_hadamard_8x8 avx2/; + + add_proto qw/void vpx_highbd_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; + specialize qw/vpx_highbd_hadamard_16x16 avx2/; + + add_proto qw/void vpx_highbd_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; + specialize qw/vpx_highbd_hadamard_32x32 avx2/; + add_proto qw/int vpx_satd/, "const tran_low_t *coeff, int length"; specialize qw/vpx_satd avx2 sse2 neon/; + + add_proto qw/int vpx_highbd_satd/, "const tran_low_t *coeff, int length"; + specialize qw/vpx_highbd_satd avx2/; } else { add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; specialize qw/vpx_hadamard_8x8 sse2 neon msa vsx/, "$ssse3_x86_64"; @@ -791,6 +817,9 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; specialize qw/vpx_hadamard_16x16 avx2 sse2 neon msa vsx/; + add_proto qw/void vpx_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; + specialize qw/vpx_hadamard_32x32 sse2 avx2/; + add_proto qw/int vpx_satd/, "const int16_t *coeff, int length"; specialize qw/vpx_satd avx2 sse2 neon msa/; } @@ -864,6 +893,9 @@ add_proto qw/void vpx_sad4x4x3/, "const uint8_t *src_ptr, int src_stride, const specialize qw/vpx_sad4x4x3 sse3 msa mmi/; # Blocks of 8 +add_proto qw/void vpx_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vpx_sad32x32x8 avx2/; + add_proto qw/void vpx_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; specialize qw/vpx_sad16x16x8 sse4_1 msa mmi/; @@ -882,47 +914,47 @@ specialize qw/vpx_sad4x4x8 sse4_1 msa mmi/; # # Multi-block SAD, comparing a reference to N independent blocks # -add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; -specialize qw/vpx_sad64x64x4d avx2 neon msa sse2 vsx mmi/; +add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array"; +specialize qw/vpx_sad64x64x4d avx512 avx2 neon msa sse2 vsx mmi/; -add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; +add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_sad64x32x4d neon msa sse2 vsx mmi/; -add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; +add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_sad32x64x4d neon msa sse2 vsx mmi/; -add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; +add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_sad32x32x4d avx2 neon msa sse2 vsx mmi/; -add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; +add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_sad32x16x4d neon msa sse2 vsx mmi/; -add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; +add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_sad16x32x4d neon msa sse2 vsx mmi/; -add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; +add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_sad16x16x4d neon msa sse2 vsx mmi/; -add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; +add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_sad16x8x4d neon msa sse2 vsx mmi/; -add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; +add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_sad8x16x4d neon msa sse2 mmi/; -add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; +add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_sad8x8x4d neon msa sse2 mmi/; -add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; +add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_sad8x4x4d neon msa sse2 mmi/; -add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; +add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_sad4x8x4d neon msa sse2 mmi/; -add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array"; +add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_sad4x4x4d neon msa sse2 mmi/; add_proto qw/uint64_t vpx_sum_squares_2d_i16/, "const int16_t *src, int stride, int size"; -specialize qw/vpx_sum_squares_2d_i16 sse2 msa/; +specialize qw/vpx_sum_squares_2d_i16 neon sse2 msa/; # # Structured Similarity (SSIM) @@ -939,7 +971,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # # Block subtraction # - add_proto qw/void vpx_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd"; + add_proto qw/void vpx_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src8_ptr, ptrdiff_t src_stride, const uint8_t *pred8_ptr, ptrdiff_t pred_stride, int bd"; # # Single block SAD @@ -984,9 +1016,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # # Avg # - add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *, int p"; - add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *, int p"; - add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; + add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *s8, int p"; + specialize qw/vpx_highbd_avg_8x8 sse2/; + + add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *s8, int p"; + specialize qw/vpx_highbd_avg_4x4 sse2/; + + add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s8, int p, const uint8_t *d8, int dp, int *min, int *max"; add_proto qw/unsigned int vpx_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vpx_highbd_sad64x64_avg sse2/; @@ -1028,43 +1064,43 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # # Multi-block SAD, comparing a reference to N independent blocks # - add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; + add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_highbd_sad64x64x4d sse2/; - add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; + add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_highbd_sad64x32x4d sse2/; - add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; + add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_highbd_sad32x64x4d sse2/; - add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; + add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_highbd_sad32x32x4d sse2/; - add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; + add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_highbd_sad32x16x4d sse2/; - add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; + add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_highbd_sad16x32x4d sse2/; - add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; + add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_highbd_sad16x16x4d sse2/; - add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; + add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_highbd_sad16x8x4d sse2/; - add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; + add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_highbd_sad8x16x4d sse2/; - add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; + add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_highbd_sad8x8x4d sse2/; - add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; + add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_highbd_sad8x4x4d sse2/; - add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; + add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_highbd_sad4x8x4d sse2/; - add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array"; + add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[], int ref_stride, uint32_t *sad_array"; specialize qw/vpx_highbd_sad4x4x4d sse2/; # @@ -1081,70 +1117,70 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq " # # Variance # -add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi/; +add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi vsx/; -add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi/; +add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi vsx/; -add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance32x64 sse2 neon msa mmi/; +add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_variance32x64 sse2 avx2 neon msa mmi vsx/; -add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi/; +add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi vsx/; -add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi/; +add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi vsx/; -add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x32 sse2 neon msa mmi/; +add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_variance16x32 sse2 avx2 neon msa mmi vsx/; -add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi/; +add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi vsx/; -add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x8 sse2 neon msa mmi/; +add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_variance16x8 sse2 avx2 neon msa mmi vsx/; -add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x16 sse2 neon msa mmi/; +add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_variance8x16 sse2 neon msa mmi vsx/; -add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x8 sse2 neon msa mmi/; +add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_variance8x8 sse2 neon msa mmi vsx/; -add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x4 sse2 neon msa mmi/; +add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_variance8x4 sse2 neon msa mmi vsx/; -add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance4x8 sse2 neon msa mmi/; +add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_variance4x8 sse2 neon msa mmi vsx/; -add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance4x4 sse2 neon msa mmi/; +add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_variance4x4 sse2 neon msa mmi vsx/; # # Specialty Variance # -add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - specialize qw/vpx_get16x16var sse2 avx2 neon msa/; +add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + specialize qw/vpx_get16x16var sse2 avx2 neon msa vsx/; -add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - specialize qw/vpx_get8x8var sse2 neon msa/; +add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + specialize qw/vpx_get8x8var sse2 neon msa vsx/; -add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi/; +add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi vsx/; -add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse16x8 sse2 msa mmi/; +add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_mse16x8 sse2 avx2 msa mmi vsx/; -add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse8x16 sse2 msa mmi/; +add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_mse8x16 sse2 msa mmi vsx/; -add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse8x8 sse2 msa mmi/; +add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + specialize qw/vpx_mse8x8 sse2 msa mmi vsx/; add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *"; specialize qw/vpx_get_mb_ss sse2 msa vsx/; -add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride"; +add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride"; specialize qw/vpx_get4x4sse_cs neon msa vsx/; add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride"; @@ -1153,440 +1189,449 @@ add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, # # Subpixel Variance # -add_proto qw/uint32_t vpx_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; +add_proto qw/uint32_t vpx_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; +add_proto qw/uint32_t vpx_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_sub_pixel_variance64x32 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; +add_proto qw/uint32_t vpx_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_sub_pixel_variance32x64 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; +add_proto qw/uint32_t vpx_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; +add_proto qw/uint32_t vpx_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_sub_pixel_variance32x16 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; +add_proto qw/uint32_t vpx_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_sub_pixel_variance16x32 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; +add_proto qw/uint32_t vpx_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_sub_pixel_variance16x16 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; +add_proto qw/uint32_t vpx_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_sub_pixel_variance16x8 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; +add_proto qw/uint32_t vpx_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_sub_pixel_variance8x16 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; +add_proto qw/uint32_t vpx_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_sub_pixel_variance8x8 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; +add_proto qw/uint32_t vpx_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_sub_pixel_variance8x4 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; +add_proto qw/uint32_t vpx_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_sub_pixel_variance4x8 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; +add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_sub_pixel_variance4x4 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; +add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_sub_pixel_avg_variance64x64 neon avx2 msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; +add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_sub_pixel_avg_variance64x32 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; +add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_sub_pixel_avg_variance32x64 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; +add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_sub_pixel_avg_variance32x32 neon avx2 msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; +add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_sub_pixel_avg_variance32x16 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; +add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_sub_pixel_avg_variance16x32 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; +add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_sub_pixel_avg_variance16x16 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; +add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_sub_pixel_avg_variance16x8 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; +add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_sub_pixel_avg_variance8x16 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; +add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_sub_pixel_avg_variance8x8 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; +add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_sub_pixel_avg_variance8x4 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; +add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_sub_pixel_avg_variance4x8 neon msa mmi sse2 ssse3/; -add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; +add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_sub_pixel_avg_variance4x4 neon msa mmi sse2 ssse3/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { - add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_12_variance64x64 sse2/; - add_proto qw/unsigned int vpx_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_12_variance64x32 sse2/; - add_proto qw/unsigned int vpx_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_12_variance32x64 sse2/; - add_proto qw/unsigned int vpx_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_12_variance32x32 sse2/; - add_proto qw/unsigned int vpx_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_12_variance32x16 sse2/; - add_proto qw/unsigned int vpx_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_12_variance16x32 sse2/; - add_proto qw/unsigned int vpx_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_12_variance16x16 sse2/; - add_proto qw/unsigned int vpx_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_12_variance16x8 sse2/; - add_proto qw/unsigned int vpx_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_12_variance8x16 sse2/; - add_proto qw/unsigned int vpx_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_12_variance8x8 sse2/; - add_proto qw/unsigned int vpx_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - add_proto qw/unsigned int vpx_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - add_proto qw/unsigned int vpx_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_variance4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - add_proto qw/unsigned int vpx_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_10_variance64x64 sse2/; - add_proto qw/unsigned int vpx_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_10_variance64x32 sse2/; - add_proto qw/unsigned int vpx_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_10_variance32x64 sse2/; - add_proto qw/unsigned int vpx_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_10_variance32x32 sse2/; - add_proto qw/unsigned int vpx_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_10_variance32x16 sse2/; - add_proto qw/unsigned int vpx_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_10_variance16x32 sse2/; - add_proto qw/unsigned int vpx_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_10_variance16x16 sse2/; - add_proto qw/unsigned int vpx_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_10_variance16x8 sse2/; - add_proto qw/unsigned int vpx_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_10_variance8x16 sse2/; - add_proto qw/unsigned int vpx_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_10_variance8x8 sse2/; - add_proto qw/unsigned int vpx_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - add_proto qw/unsigned int vpx_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - add_proto qw/unsigned int vpx_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_variance4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - add_proto qw/unsigned int vpx_highbd_8_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_8_variance64x64 sse2/; - add_proto qw/unsigned int vpx_highbd_8_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_8_variance64x32 sse2/; - add_proto qw/unsigned int vpx_highbd_8_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_8_variance32x64 sse2/; - add_proto qw/unsigned int vpx_highbd_8_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_8_variance32x32 sse2/; - add_proto qw/unsigned int vpx_highbd_8_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_8_variance32x16 sse2/; - add_proto qw/unsigned int vpx_highbd_8_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_8_variance16x32 sse2/; - add_proto qw/unsigned int vpx_highbd_8_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_8_variance16x16 sse2/; - add_proto qw/unsigned int vpx_highbd_8_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_8_variance16x8 sse2/; - add_proto qw/unsigned int vpx_highbd_8_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_8_variance8x16 sse2/; - add_proto qw/unsigned int vpx_highbd_8_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_8_variance8x8 sse2/; - add_proto qw/unsigned int vpx_highbd_8_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - add_proto qw/unsigned int vpx_highbd_8_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - add_proto qw/unsigned int vpx_highbd_8_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_variance4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - add_proto qw/void vpx_highbd_8_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - add_proto qw/void vpx_highbd_8_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + add_proto qw/void vpx_highbd_8_get16x16var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + specialize qw/vpx_highbd_8_get16x16var sse2/; - add_proto qw/void vpx_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - add_proto qw/void vpx_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + add_proto qw/void vpx_highbd_8_get8x8var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + specialize qw/vpx_highbd_8_get8x8var sse2/; - add_proto qw/void vpx_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - add_proto qw/void vpx_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + add_proto qw/void vpx_highbd_10_get16x16var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + specialize qw/vpx_highbd_10_get16x16var sse2/; - add_proto qw/unsigned int vpx_highbd_8_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/void vpx_highbd_10_get8x8var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + specialize qw/vpx_highbd_10_get8x8var sse2/; + + add_proto qw/void vpx_highbd_12_get16x16var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + specialize qw/vpx_highbd_12_get16x16var sse2/; + + add_proto qw/void vpx_highbd_12_get8x8var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; + specialize qw/vpx_highbd_12_get8x8var sse2/; + + add_proto qw/unsigned int vpx_highbd_8_mse16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_8_mse16x16 sse2/; - add_proto qw/unsigned int vpx_highbd_8_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - add_proto qw/unsigned int vpx_highbd_8_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - add_proto qw/unsigned int vpx_highbd_8_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_mse16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_mse8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_8_mse8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_8_mse8x8 sse2/; - add_proto qw/unsigned int vpx_highbd_10_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_mse16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_10_mse16x16 sse2/; - add_proto qw/unsigned int vpx_highbd_10_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - add_proto qw/unsigned int vpx_highbd_10_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - add_proto qw/unsigned int vpx_highbd_10_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_mse16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_mse8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_10_mse8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_10_mse8x8 sse2/; - add_proto qw/unsigned int vpx_highbd_12_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_mse16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_12_mse16x16 sse2/; - add_proto qw/unsigned int vpx_highbd_12_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - add_proto qw/unsigned int vpx_highbd_12_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - add_proto qw/unsigned int vpx_highbd_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_mse16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_mse8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; + add_proto qw/unsigned int vpx_highbd_12_mse8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_highbd_12_mse8x8 sse2/; - add_proto qw/void vpx_highbd_comp_avg_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride"; + add_proto qw/void vpx_highbd_comp_avg_pred/, "uint16_t *comp_pred, const uint16_t *pred, int width, int height, const uint16_t *ref, int ref_stride"; # # Subpixel Variance # - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_12_sub_pixel_variance64x64 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_12_sub_pixel_variance64x32 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_12_sub_pixel_variance32x64 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_12_sub_pixel_variance32x32 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_12_sub_pixel_variance32x16 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_12_sub_pixel_variance16x32 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_12_sub_pixel_variance16x16 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_12_sub_pixel_variance16x8 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_12_sub_pixel_variance8x16 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_12_sub_pixel_variance8x8 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_12_sub_pixel_variance8x4 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_10_sub_pixel_variance64x64 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_10_sub_pixel_variance64x32 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_10_sub_pixel_variance32x64 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_10_sub_pixel_variance32x32 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_10_sub_pixel_variance32x16 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_10_sub_pixel_variance16x32 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_10_sub_pixel_variance16x16 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_10_sub_pixel_variance16x8 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_10_sub_pixel_variance8x16 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_10_sub_pixel_variance8x8 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_10_sub_pixel_variance8x4 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_8_sub_pixel_variance64x64 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_8_sub_pixel_variance64x32 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_8_sub_pixel_variance32x64 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_8_sub_pixel_variance32x32 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_8_sub_pixel_variance32x16 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_8_sub_pixel_variance16x32 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_8_sub_pixel_variance16x16 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_8_sub_pixel_variance16x8 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_8_sub_pixel_variance8x16 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_8_sub_pixel_variance8x8 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/vpx_highbd_8_sub_pixel_variance8x4 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x64 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x32 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x64 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x32 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x16 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x32 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x16 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x8 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x16 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x8 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x4 sse2/; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x64 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x32 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x64 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x32 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x16 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x32 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x16 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x8 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x16 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x8 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x4 sse2/; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x64 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x32 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x64 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x32 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x16 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x32 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x16 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x8 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x16 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x8 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x4 sse2/; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; - add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; + add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; } # CONFIG_VP9_HIGHBITDEPTH @@ -1598,13 +1643,13 @@ if (vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") specialize qw/vpx_plane_add_noise sse2 msa/; add_proto qw/void vpx_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit"; - specialize qw/vpx_mbpost_proc_down sse2 neon msa/; + specialize qw/vpx_mbpost_proc_down sse2 neon msa vsx/; - add_proto qw/void vpx_mbpost_proc_across_ip/, "unsigned char *dst, int pitch, int rows, int cols,int flimit"; - specialize qw/vpx_mbpost_proc_across_ip sse2 neon msa/; + add_proto qw/void vpx_mbpost_proc_across_ip/, "unsigned char *src, int pitch, int rows, int cols,int flimit"; + specialize qw/vpx_mbpost_proc_across_ip sse2 neon msa vsx/; add_proto qw/void vpx_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size"; - specialize qw/vpx_post_proc_down_and_across_mb_row sse2 neon msa/; + specialize qw/vpx_post_proc_down_and_across_mb_row sse2 neon msa vsx/; } diff --git a/media/libvpx/libvpx/vpx_dsp/vpx_filter.h b/media/libvpx/libvpx/vpx_dsp/vpx_filter.h index 6cea251bccaa..54357ee6caec 100644 --- a/media/libvpx/libvpx/vpx_dsp/vpx_filter.h +++ b/media/libvpx/libvpx/vpx_dsp/vpx_filter.h @@ -8,9 +8,10 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_VPX_FILTER_H_ -#define VPX_DSP_VPX_FILTER_H_ +#ifndef VPX_VPX_DSP_VPX_FILTER_H_ +#define VPX_VPX_DSP_VPX_FILTER_H_ +#include #include "vpx/vpx_integer.h" #ifdef __cplusplus @@ -26,8 +27,16 @@ extern "C" { typedef int16_t InterpKernel[SUBPEL_TAPS]; +static INLINE int vpx_get_filter_taps(const int16_t *const filter) { + assert(filter[3] != 128); + if (!filter[0] && !filter[1] && !filter[2]) + return 2; + else + return 8; +} + #ifdef __cplusplus } // extern "C" #endif -#endif // VPX_DSP_VPX_FILTER_H_ +#endif // VPX_VPX_DSP_VPX_FILTER_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/avg_intrin_avx2.c b/media/libvpx/libvpx/vpx_dsp/x86/avg_intrin_avx2.c index ff19ea6470d1..3f4f577a21dc 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/avg_intrin_avx2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/avg_intrin_avx2.c @@ -15,6 +15,209 @@ #include "vpx_dsp/x86/bitdepth_conversion_avx2.h" #include "vpx_ports/mem.h" +#if CONFIG_VP9_HIGHBITDEPTH +static void highbd_hadamard_col8_avx2(__m256i *in, int iter) { + __m256i a0 = in[0]; + __m256i a1 = in[1]; + __m256i a2 = in[2]; + __m256i a3 = in[3]; + __m256i a4 = in[4]; + __m256i a5 = in[5]; + __m256i a6 = in[6]; + __m256i a7 = in[7]; + + __m256i b0 = _mm256_add_epi32(a0, a1); + __m256i b1 = _mm256_sub_epi32(a0, a1); + __m256i b2 = _mm256_add_epi32(a2, a3); + __m256i b3 = _mm256_sub_epi32(a2, a3); + __m256i b4 = _mm256_add_epi32(a4, a5); + __m256i b5 = _mm256_sub_epi32(a4, a5); + __m256i b6 = _mm256_add_epi32(a6, a7); + __m256i b7 = _mm256_sub_epi32(a6, a7); + + a0 = _mm256_add_epi32(b0, b2); + a1 = _mm256_add_epi32(b1, b3); + a2 = _mm256_sub_epi32(b0, b2); + a3 = _mm256_sub_epi32(b1, b3); + a4 = _mm256_add_epi32(b4, b6); + a5 = _mm256_add_epi32(b5, b7); + a6 = _mm256_sub_epi32(b4, b6); + a7 = _mm256_sub_epi32(b5, b7); + + if (iter == 0) { + b0 = _mm256_add_epi32(a0, a4); + b7 = _mm256_add_epi32(a1, a5); + b3 = _mm256_add_epi32(a2, a6); + b4 = _mm256_add_epi32(a3, a7); + b2 = _mm256_sub_epi32(a0, a4); + b6 = _mm256_sub_epi32(a1, a5); + b1 = _mm256_sub_epi32(a2, a6); + b5 = _mm256_sub_epi32(a3, a7); + + a0 = _mm256_unpacklo_epi32(b0, b1); + a1 = _mm256_unpacklo_epi32(b2, b3); + a2 = _mm256_unpackhi_epi32(b0, b1); + a3 = _mm256_unpackhi_epi32(b2, b3); + a4 = _mm256_unpacklo_epi32(b4, b5); + a5 = _mm256_unpacklo_epi32(b6, b7); + a6 = _mm256_unpackhi_epi32(b4, b5); + a7 = _mm256_unpackhi_epi32(b6, b7); + + b0 = _mm256_unpacklo_epi64(a0, a1); + b1 = _mm256_unpacklo_epi64(a4, a5); + b2 = _mm256_unpackhi_epi64(a0, a1); + b3 = _mm256_unpackhi_epi64(a4, a5); + b4 = _mm256_unpacklo_epi64(a2, a3); + b5 = _mm256_unpacklo_epi64(a6, a7); + b6 = _mm256_unpackhi_epi64(a2, a3); + b7 = _mm256_unpackhi_epi64(a6, a7); + + in[0] = _mm256_permute2x128_si256(b0, b1, 0x20); + in[1] = _mm256_permute2x128_si256(b0, b1, 0x31); + in[2] = _mm256_permute2x128_si256(b2, b3, 0x20); + in[3] = _mm256_permute2x128_si256(b2, b3, 0x31); + in[4] = _mm256_permute2x128_si256(b4, b5, 0x20); + in[5] = _mm256_permute2x128_si256(b4, b5, 0x31); + in[6] = _mm256_permute2x128_si256(b6, b7, 0x20); + in[7] = _mm256_permute2x128_si256(b6, b7, 0x31); + } else { + in[0] = _mm256_add_epi32(a0, a4); + in[7] = _mm256_add_epi32(a1, a5); + in[3] = _mm256_add_epi32(a2, a6); + in[4] = _mm256_add_epi32(a3, a7); + in[2] = _mm256_sub_epi32(a0, a4); + in[6] = _mm256_sub_epi32(a1, a5); + in[1] = _mm256_sub_epi32(a2, a6); + in[5] = _mm256_sub_epi32(a3, a7); + } +} + +void vpx_highbd_hadamard_8x8_avx2(const int16_t *src_diff, ptrdiff_t src_stride, + tran_low_t *coeff) { + __m128i src16[8]; + __m256i src32[8]; + + src16[0] = _mm_loadu_si128((const __m128i *)src_diff); + src16[1] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride)); + src16[2] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride)); + src16[3] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride)); + src16[4] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride)); + src16[5] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride)); + src16[6] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride)); + src16[7] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride)); + + src32[0] = _mm256_cvtepi16_epi32(src16[0]); + src32[1] = _mm256_cvtepi16_epi32(src16[1]); + src32[2] = _mm256_cvtepi16_epi32(src16[2]); + src32[3] = _mm256_cvtepi16_epi32(src16[3]); + src32[4] = _mm256_cvtepi16_epi32(src16[4]); + src32[5] = _mm256_cvtepi16_epi32(src16[5]); + src32[6] = _mm256_cvtepi16_epi32(src16[6]); + src32[7] = _mm256_cvtepi16_epi32(src16[7]); + + highbd_hadamard_col8_avx2(src32, 0); + highbd_hadamard_col8_avx2(src32, 1); + + _mm256_storeu_si256((__m256i *)coeff, src32[0]); + coeff += 8; + _mm256_storeu_si256((__m256i *)coeff, src32[1]); + coeff += 8; + _mm256_storeu_si256((__m256i *)coeff, src32[2]); + coeff += 8; + _mm256_storeu_si256((__m256i *)coeff, src32[3]); + coeff += 8; + _mm256_storeu_si256((__m256i *)coeff, src32[4]); + coeff += 8; + _mm256_storeu_si256((__m256i *)coeff, src32[5]); + coeff += 8; + _mm256_storeu_si256((__m256i *)coeff, src32[6]); + coeff += 8; + _mm256_storeu_si256((__m256i *)coeff, src32[7]); +} + +void vpx_highbd_hadamard_16x16_avx2(const int16_t *src_diff, + ptrdiff_t src_stride, tran_low_t *coeff) { + int idx; + tran_low_t *t_coeff = coeff; + for (idx = 0; idx < 4; ++idx) { + const int16_t *src_ptr = + src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; + vpx_highbd_hadamard_8x8_avx2(src_ptr, src_stride, t_coeff + idx * 64); + } + + for (idx = 0; idx < 64; idx += 8) { + __m256i coeff0 = _mm256_loadu_si256((const __m256i *)t_coeff); + __m256i coeff1 = _mm256_loadu_si256((const __m256i *)(t_coeff + 64)); + __m256i coeff2 = _mm256_loadu_si256((const __m256i *)(t_coeff + 128)); + __m256i coeff3 = _mm256_loadu_si256((const __m256i *)(t_coeff + 192)); + + __m256i b0 = _mm256_add_epi32(coeff0, coeff1); + __m256i b1 = _mm256_sub_epi32(coeff0, coeff1); + __m256i b2 = _mm256_add_epi32(coeff2, coeff3); + __m256i b3 = _mm256_sub_epi32(coeff2, coeff3); + + b0 = _mm256_srai_epi32(b0, 1); + b1 = _mm256_srai_epi32(b1, 1); + b2 = _mm256_srai_epi32(b2, 1); + b3 = _mm256_srai_epi32(b3, 1); + + coeff0 = _mm256_add_epi32(b0, b2); + coeff1 = _mm256_add_epi32(b1, b3); + coeff2 = _mm256_sub_epi32(b0, b2); + coeff3 = _mm256_sub_epi32(b1, b3); + + _mm256_storeu_si256((__m256i *)coeff, coeff0); + _mm256_storeu_si256((__m256i *)(coeff + 64), coeff1); + _mm256_storeu_si256((__m256i *)(coeff + 128), coeff2); + _mm256_storeu_si256((__m256i *)(coeff + 192), coeff3); + + coeff += 8; + t_coeff += 8; + } +} + +void vpx_highbd_hadamard_32x32_avx2(const int16_t *src_diff, + ptrdiff_t src_stride, tran_low_t *coeff) { + int idx; + tran_low_t *t_coeff = coeff; + for (idx = 0; idx < 4; ++idx) { + const int16_t *src_ptr = + src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16; + vpx_highbd_hadamard_16x16_avx2(src_ptr, src_stride, t_coeff + idx * 256); + } + + for (idx = 0; idx < 256; idx += 8) { + __m256i coeff0 = _mm256_loadu_si256((const __m256i *)t_coeff); + __m256i coeff1 = _mm256_loadu_si256((const __m256i *)(t_coeff + 256)); + __m256i coeff2 = _mm256_loadu_si256((const __m256i *)(t_coeff + 512)); + __m256i coeff3 = _mm256_loadu_si256((const __m256i *)(t_coeff + 768)); + + __m256i b0 = _mm256_add_epi32(coeff0, coeff1); + __m256i b1 = _mm256_sub_epi32(coeff0, coeff1); + __m256i b2 = _mm256_add_epi32(coeff2, coeff3); + __m256i b3 = _mm256_sub_epi32(coeff2, coeff3); + + b0 = _mm256_srai_epi32(b0, 2); + b1 = _mm256_srai_epi32(b1, 2); + b2 = _mm256_srai_epi32(b2, 2); + b3 = _mm256_srai_epi32(b3, 2); + + coeff0 = _mm256_add_epi32(b0, b2); + coeff1 = _mm256_add_epi32(b1, b3); + coeff2 = _mm256_sub_epi32(b0, b2); + coeff3 = _mm256_sub_epi32(b1, b3); + + _mm256_storeu_si256((__m256i *)coeff, coeff0); + _mm256_storeu_si256((__m256i *)(coeff + 256), coeff1); + _mm256_storeu_si256((__m256i *)(coeff + 512), coeff2); + _mm256_storeu_si256((__m256i *)(coeff + 768), coeff3); + + coeff += 8; + t_coeff += 8; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + static void hadamard_col8x2_avx2(__m256i *in, int iter) { __m256i a0 = in[0]; __m256i a1 = in[1]; @@ -91,7 +294,7 @@ static void hadamard_col8x2_avx2(__m256i *in, int iter) { } } -static void hadamard_8x8x2_avx2(int16_t const *src_diff, ptrdiff_t src_stride, +static void hadamard_8x8x2_avx2(const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff) { __m256i src[8]; src[0] = _mm256_loadu_si256((const __m256i *)src_diff); @@ -131,18 +334,19 @@ static void hadamard_8x8x2_avx2(int16_t const *src_diff, ptrdiff_t src_stride, _mm256_permute2x128_si256(src[6], src[7], 0x31)); } -void vpx_hadamard_16x16_avx2(int16_t const *src_diff, ptrdiff_t src_stride, - tran_low_t *coeff) { - int idx; +static INLINE void hadamard_16x16_avx2(const int16_t *src_diff, + ptrdiff_t src_stride, tran_low_t *coeff, + int is_final) { #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(32, int16_t, temp_coeff[16 * 16]); int16_t *t_coeff = temp_coeff; #else int16_t *t_coeff = coeff; #endif - + int16_t *coeff16 = (int16_t *)coeff; + int idx; for (idx = 0; idx < 2; ++idx) { - int16_t const *src_ptr = src_diff + idx * 8 * src_stride; + const int16_t *src_ptr = src_diff + idx * 8 * src_stride; hadamard_8x8x2_avx2(src_ptr, src_stride, t_coeff + (idx * 64 * 2)); } @@ -161,11 +365,69 @@ void vpx_hadamard_16x16_avx2(int16_t const *src_diff, ptrdiff_t src_stride, b1 = _mm256_srai_epi16(b1, 1); b2 = _mm256_srai_epi16(b2, 1); b3 = _mm256_srai_epi16(b3, 1); + if (is_final) { + store_tran_low(_mm256_add_epi16(b0, b2), coeff); + store_tran_low(_mm256_add_epi16(b1, b3), coeff + 64); + store_tran_low(_mm256_sub_epi16(b0, b2), coeff + 128); + store_tran_low(_mm256_sub_epi16(b1, b3), coeff + 192); + coeff += 16; + } else { + _mm256_storeu_si256((__m256i *)coeff16, _mm256_add_epi16(b0, b2)); + _mm256_storeu_si256((__m256i *)(coeff16 + 64), _mm256_add_epi16(b1, b3)); + _mm256_storeu_si256((__m256i *)(coeff16 + 128), _mm256_sub_epi16(b0, b2)); + _mm256_storeu_si256((__m256i *)(coeff16 + 192), _mm256_sub_epi16(b1, b3)); + coeff16 += 16; + } + t_coeff += 16; + } +} + +void vpx_hadamard_16x16_avx2(const int16_t *src_diff, ptrdiff_t src_stride, + tran_low_t *coeff) { + hadamard_16x16_avx2(src_diff, src_stride, coeff, 1); +} + +void vpx_hadamard_32x32_avx2(const int16_t *src_diff, ptrdiff_t src_stride, + tran_low_t *coeff) { +#if CONFIG_VP9_HIGHBITDEPTH + // For high bitdepths, it is unnecessary to store_tran_low + // (mult/unpack/store), then load_tran_low (load/pack) the same memory in the + // next stage. Output to an intermediate buffer first, then store_tran_low() + // in the final stage. + DECLARE_ALIGNED(32, int16_t, temp_coeff[32 * 32]); + int16_t *t_coeff = temp_coeff; +#else + int16_t *t_coeff = coeff; +#endif + int idx; + for (idx = 0; idx < 4; ++idx) { + // src_diff: 9 bit, dynamic range [-255, 255] + const int16_t *src_ptr = + src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16; + hadamard_16x16_avx2(src_ptr, src_stride, + (tran_low_t *)(t_coeff + idx * 256), 0); + } + + for (idx = 0; idx < 256; idx += 16) { + const __m256i coeff0 = _mm256_loadu_si256((const __m256i *)t_coeff); + const __m256i coeff1 = _mm256_loadu_si256((const __m256i *)(t_coeff + 256)); + const __m256i coeff2 = _mm256_loadu_si256((const __m256i *)(t_coeff + 512)); + const __m256i coeff3 = _mm256_loadu_si256((const __m256i *)(t_coeff + 768)); + + __m256i b0 = _mm256_add_epi16(coeff0, coeff1); + __m256i b1 = _mm256_sub_epi16(coeff0, coeff1); + __m256i b2 = _mm256_add_epi16(coeff2, coeff3); + __m256i b3 = _mm256_sub_epi16(coeff2, coeff3); + + b0 = _mm256_srai_epi16(b0, 2); + b1 = _mm256_srai_epi16(b1, 2); + b2 = _mm256_srai_epi16(b2, 2); + b3 = _mm256_srai_epi16(b3, 2); store_tran_low(_mm256_add_epi16(b0, b2), coeff); - store_tran_low(_mm256_add_epi16(b1, b3), coeff + 64); - store_tran_low(_mm256_sub_epi16(b0, b2), coeff + 128); - store_tran_low(_mm256_sub_epi16(b1, b3), coeff + 192); + store_tran_low(_mm256_add_epi16(b1, b3), coeff + 256); + store_tran_low(_mm256_sub_epi16(b0, b2), coeff + 512); + store_tran_low(_mm256_sub_epi16(b1, b3), coeff + 768); coeff += 16; t_coeff += 16; @@ -195,3 +457,26 @@ int vpx_satd_avx2(const tran_low_t *coeff, int length) { return _mm_cvtsi128_si32(accum_128); } } + +#if CONFIG_VP9_HIGHBITDEPTH +int vpx_highbd_satd_avx2(const tran_low_t *coeff, int length) { + __m256i accum = _mm256_setzero_si256(); + int i; + + for (i = 0; i < length; i += 8, coeff += 8) { + const __m256i src_line = _mm256_loadu_si256((const __m256i *)coeff); + const __m256i abs = _mm256_abs_epi32(src_line); + accum = _mm256_add_epi32(accum, abs); + } + + { // 32 bit horizontal add + const __m256i a = _mm256_srli_si256(accum, 8); + const __m256i b = _mm256_add_epi32(accum, a); + const __m256i c = _mm256_srli_epi64(b, 32); + const __m256i d = _mm256_add_epi32(b, c); + const __m128i accum_128 = _mm_add_epi32(_mm256_castsi256_si128(d), + _mm256_extractf128_si256(d, 1)); + return _mm_cvtsi128_si32(accum_128); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/media/libvpx/libvpx/vpx_dsp/x86/avg_intrin_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/avg_intrin_sse2.c index a235ba41df8a..3cba258f619e 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/avg_intrin_sse2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/avg_intrin_sse2.c @@ -138,6 +138,56 @@ unsigned int vpx_avg_4x4_sse2(const uint8_t *s, int p) { return (avg + 8) >> 4; } +#if CONFIG_VP9_HIGHBITDEPTH +unsigned int vpx_highbd_avg_8x8_sse2(const uint8_t *s8, int p) { + __m128i s0, s1; + unsigned int avg; + const uint16_t *s = CONVERT_TO_SHORTPTR(s8); + const __m128i zero = _mm_setzero_si128(); + s0 = _mm_loadu_si128((const __m128i *)(s)); + s1 = _mm_loadu_si128((const __m128i *)(s + p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadu_si128((const __m128i *)(s + 2 * p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadu_si128((const __m128i *)(s + 3 * p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadu_si128((const __m128i *)(s + 4 * p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadu_si128((const __m128i *)(s + 5 * p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadu_si128((const __m128i *)(s + 6 * p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadu_si128((const __m128i *)(s + 7 * p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpackhi_epi16(s0, zero); + s0 = _mm_unpacklo_epi16(s0, zero); + s0 = _mm_add_epi32(s0, s1); + s0 = _mm_add_epi32(s0, _mm_srli_si128(s0, 8)); + s0 = _mm_add_epi32(s0, _mm_srli_si128(s0, 4)); + avg = _mm_cvtsi128_si32(s0); + + return (avg + 32) >> 6; +} + +unsigned int vpx_highbd_avg_4x4_sse2(const uint8_t *s8, int p) { + __m128i s0, s1; + unsigned int avg; + const uint16_t *s = CONVERT_TO_SHORTPTR(s8); + s0 = _mm_loadl_epi64((const __m128i *)(s)); + s1 = _mm_loadl_epi64((const __m128i *)(s + p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadl_epi64((const __m128i *)(s + 2 * p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadl_epi64((const __m128i *)(s + 3 * p)); + s0 = _mm_adds_epu16(s0, s1); + s0 = _mm_add_epi16(s0, _mm_srli_si128(s0, 4)); + s0 = _mm_add_epi16(s0, _mm_srli_si128(s0, 2)); + avg = _mm_extract_epi16(s0, 0); + + return (avg + 8) >> 4; +} +#endif // CONFIG_VP9_HIGHBITDEPTH + static void hadamard_col8_sse2(__m128i *in, int iter) { __m128i a0 = in[0]; __m128i a1 = in[1]; @@ -214,8 +264,9 @@ static void hadamard_col8_sse2(__m128i *in, int iter) { } } -void vpx_hadamard_8x8_sse2(int16_t const *src_diff, ptrdiff_t src_stride, - tran_low_t *coeff) { +static INLINE void hadamard_8x8_sse2(const int16_t *src_diff, + ptrdiff_t src_stride, tran_low_t *coeff, + int is_final) { __m128i src[8]; src[0] = _mm_load_si128((const __m128i *)src_diff); src[1] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); @@ -229,37 +280,74 @@ void vpx_hadamard_8x8_sse2(int16_t const *src_diff, ptrdiff_t src_stride, hadamard_col8_sse2(src, 0); hadamard_col8_sse2(src, 1); - store_tran_low(src[0], coeff); - coeff += 8; - store_tran_low(src[1], coeff); - coeff += 8; - store_tran_low(src[2], coeff); - coeff += 8; - store_tran_low(src[3], coeff); - coeff += 8; - store_tran_low(src[4], coeff); - coeff += 8; - store_tran_low(src[5], coeff); - coeff += 8; - store_tran_low(src[6], coeff); - coeff += 8; - store_tran_low(src[7], coeff); + if (is_final) { + store_tran_low(src[0], coeff); + coeff += 8; + store_tran_low(src[1], coeff); + coeff += 8; + store_tran_low(src[2], coeff); + coeff += 8; + store_tran_low(src[3], coeff); + coeff += 8; + store_tran_low(src[4], coeff); + coeff += 8; + store_tran_low(src[5], coeff); + coeff += 8; + store_tran_low(src[6], coeff); + coeff += 8; + store_tran_low(src[7], coeff); + } else { + int16_t *coeff16 = (int16_t *)coeff; + _mm_store_si128((__m128i *)coeff16, src[0]); + coeff16 += 8; + _mm_store_si128((__m128i *)coeff16, src[1]); + coeff16 += 8; + _mm_store_si128((__m128i *)coeff16, src[2]); + coeff16 += 8; + _mm_store_si128((__m128i *)coeff16, src[3]); + coeff16 += 8; + _mm_store_si128((__m128i *)coeff16, src[4]); + coeff16 += 8; + _mm_store_si128((__m128i *)coeff16, src[5]); + coeff16 += 8; + _mm_store_si128((__m128i *)coeff16, src[6]); + coeff16 += 8; + _mm_store_si128((__m128i *)coeff16, src[7]); + } } -void vpx_hadamard_16x16_sse2(int16_t const *src_diff, ptrdiff_t src_stride, - tran_low_t *coeff) { +void vpx_hadamard_8x8_sse2(const int16_t *src_diff, ptrdiff_t src_stride, + tran_low_t *coeff) { + hadamard_8x8_sse2(src_diff, src_stride, coeff, 1); +} + +static INLINE void hadamard_16x16_sse2(const int16_t *src_diff, + ptrdiff_t src_stride, tran_low_t *coeff, + int is_final) { +#if CONFIG_VP9_HIGHBITDEPTH + // For high bitdepths, it is unnecessary to store_tran_low + // (mult/unpack/store), then load_tran_low (load/pack) the same memory in the + // next stage. Output to an intermediate buffer first, then store_tran_low() + // in the final stage. + DECLARE_ALIGNED(32, int16_t, temp_coeff[16 * 16]); + int16_t *t_coeff = temp_coeff; +#else + int16_t *t_coeff = coeff; +#endif + int16_t *coeff16 = (int16_t *)coeff; int idx; for (idx = 0; idx < 4; ++idx) { - int16_t const *src_ptr = + const int16_t *src_ptr = src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; - vpx_hadamard_8x8_sse2(src_ptr, src_stride, coeff + idx * 64); + hadamard_8x8_sse2(src_ptr, src_stride, (tran_low_t *)(t_coeff + idx * 64), + 0); } for (idx = 0; idx < 64; idx += 8) { - __m128i coeff0 = load_tran_low(coeff); - __m128i coeff1 = load_tran_low(coeff + 64); - __m128i coeff2 = load_tran_low(coeff + 128); - __m128i coeff3 = load_tran_low(coeff + 192); + __m128i coeff0 = _mm_load_si128((const __m128i *)t_coeff); + __m128i coeff1 = _mm_load_si128((const __m128i *)(t_coeff + 64)); + __m128i coeff2 = _mm_load_si128((const __m128i *)(t_coeff + 128)); + __m128i coeff3 = _mm_load_si128((const __m128i *)(t_coeff + 192)); __m128i b0 = _mm_add_epi16(coeff0, coeff1); __m128i b1 = _mm_sub_epi16(coeff0, coeff1); @@ -271,17 +359,82 @@ void vpx_hadamard_16x16_sse2(int16_t const *src_diff, ptrdiff_t src_stride, b2 = _mm_srai_epi16(b2, 1); b3 = _mm_srai_epi16(b3, 1); + coeff0 = _mm_add_epi16(b0, b2); + coeff1 = _mm_add_epi16(b1, b3); + coeff2 = _mm_sub_epi16(b0, b2); + coeff3 = _mm_sub_epi16(b1, b3); + + if (is_final) { + store_tran_low(coeff0, coeff); + store_tran_low(coeff1, coeff + 64); + store_tran_low(coeff2, coeff + 128); + store_tran_low(coeff3, coeff + 192); + coeff += 8; + } else { + _mm_store_si128((__m128i *)coeff16, coeff0); + _mm_store_si128((__m128i *)(coeff16 + 64), coeff1); + _mm_store_si128((__m128i *)(coeff16 + 128), coeff2); + _mm_store_si128((__m128i *)(coeff16 + 192), coeff3); + coeff16 += 8; + } + + t_coeff += 8; + } +} + +void vpx_hadamard_16x16_sse2(const int16_t *src_diff, ptrdiff_t src_stride, + tran_low_t *coeff) { + hadamard_16x16_sse2(src_diff, src_stride, coeff, 1); +} + +void vpx_hadamard_32x32_sse2(const int16_t *src_diff, ptrdiff_t src_stride, + tran_low_t *coeff) { +#if CONFIG_VP9_HIGHBITDEPTH + // For high bitdepths, it is unnecessary to store_tran_low + // (mult/unpack/store), then load_tran_low (load/pack) the same memory in the + // next stage. Output to an intermediate buffer first, then store_tran_low() + // in the final stage. + DECLARE_ALIGNED(32, int16_t, temp_coeff[32 * 32]); + int16_t *t_coeff = temp_coeff; +#else + int16_t *t_coeff = coeff; +#endif + int idx; + for (idx = 0; idx < 4; ++idx) { + const int16_t *src_ptr = + src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16; + hadamard_16x16_sse2(src_ptr, src_stride, + (tran_low_t *)(t_coeff + idx * 256), 0); + } + + for (idx = 0; idx < 256; idx += 8) { + __m128i coeff0 = _mm_load_si128((const __m128i *)t_coeff); + __m128i coeff1 = _mm_load_si128((const __m128i *)(t_coeff + 256)); + __m128i coeff2 = _mm_load_si128((const __m128i *)(t_coeff + 512)); + __m128i coeff3 = _mm_load_si128((const __m128i *)(t_coeff + 768)); + + __m128i b0 = _mm_add_epi16(coeff0, coeff1); + __m128i b1 = _mm_sub_epi16(coeff0, coeff1); + __m128i b2 = _mm_add_epi16(coeff2, coeff3); + __m128i b3 = _mm_sub_epi16(coeff2, coeff3); + + b0 = _mm_srai_epi16(b0, 2); + b1 = _mm_srai_epi16(b1, 2); + b2 = _mm_srai_epi16(b2, 2); + b3 = _mm_srai_epi16(b3, 2); + coeff0 = _mm_add_epi16(b0, b2); coeff1 = _mm_add_epi16(b1, b3); store_tran_low(coeff0, coeff); - store_tran_low(coeff1, coeff + 64); + store_tran_low(coeff1, coeff + 256); coeff2 = _mm_sub_epi16(b0, b2); coeff3 = _mm_sub_epi16(b1, b3); - store_tran_low(coeff2, coeff + 128); - store_tran_low(coeff3, coeff + 192); + store_tran_low(coeff2, coeff + 512); + store_tran_low(coeff3, coeff + 768); coeff += 8; + t_coeff += 8; } } @@ -311,7 +464,7 @@ int vpx_satd_sse2(const tran_low_t *coeff, int length) { return _mm_cvtsi128_si32(accum); } -void vpx_int_pro_row_sse2(int16_t *hbuf, uint8_t const *ref, +void vpx_int_pro_row_sse2(int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int height) { int idx; __m128i zero = _mm_setzero_si128(); @@ -360,16 +513,16 @@ void vpx_int_pro_row_sse2(int16_t *hbuf, uint8_t const *ref, _mm_storeu_si128((__m128i *)hbuf, s1); } -int16_t vpx_int_pro_col_sse2(uint8_t const *ref, const int width) { +int16_t vpx_int_pro_col_sse2(const uint8_t *ref, const int width) { __m128i zero = _mm_setzero_si128(); - __m128i src_line = _mm_load_si128((const __m128i *)ref); + __m128i src_line = _mm_loadu_si128((const __m128i *)ref); __m128i s0 = _mm_sad_epu8(src_line, zero); __m128i s1; int i; for (i = 16; i < width; i += 16) { ref += 16; - src_line = _mm_load_si128((const __m128i *)ref); + src_line = _mm_loadu_si128((const __m128i *)ref); s1 = _mm_sad_epu8(src_line, zero); s0 = _mm_adds_epu16(s0, s1); } @@ -380,7 +533,7 @@ int16_t vpx_int_pro_col_sse2(uint8_t const *ref, const int width) { return _mm_extract_epi16(s0, 0); } -int vpx_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl) { +int vpx_vector_var_sse2(const int16_t *ref, const int16_t *src, const int bwl) { int idx; int width = 4 << bwl; int16_t mean; @@ -418,7 +571,7 @@ int vpx_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl) { v1 = _mm_srli_epi64(sse, 32); sse = _mm_add_epi32(sse, v1); - mean = _mm_extract_epi16(sum, 0); + mean = (int16_t)_mm_extract_epi16(sum, 0); return _mm_cvtsi128_si32(sse) - ((mean * mean) >> (bwl + 2)); } diff --git a/media/libvpx/libvpx/vpx_dsp/x86/avg_pred_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/avg_pred_sse2.c index f83b26490e7b..e4e1e0e7a2c0 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/avg_pred_sse2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/avg_pred_sse2.c @@ -13,11 +13,12 @@ #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" +#include "vpx_dsp/x86/mem_sse2.h" -void vpx_comp_avg_pred_sse2(uint8_t *comp, const uint8_t *pred, int width, +void vpx_comp_avg_pred_sse2(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride) { - /* comp and pred must be 16 byte aligned. */ - assert(((intptr_t)comp & 0xf) == 0); + /* comp_pred and pred must be 16 byte aligned. */ + assert(((intptr_t)comp_pred & 0xf) == 0); assert(((intptr_t)pred & 0xf) == 0); if (width > 8) { int x, y; @@ -26,17 +27,17 @@ void vpx_comp_avg_pred_sse2(uint8_t *comp, const uint8_t *pred, int width, const __m128i p = _mm_load_si128((const __m128i *)(pred + x)); const __m128i r = _mm_loadu_si128((const __m128i *)(ref + x)); const __m128i avg = _mm_avg_epu8(p, r); - _mm_store_si128((__m128i *)(comp + x), avg); + _mm_store_si128((__m128i *)(comp_pred + x), avg); } - comp += width; + comp_pred += width; pred += width; ref += ref_stride; } } else { // width must be 4 or 8. int i; - // Process 16 elements at a time. comp and pred have width == stride and - // therefore live in contigious memory. 4*4, 4*8, 8*4, 8*8, and 8*16 are all - // divisible by 16 so just ref needs to be massaged when loading. + // Process 16 elements at a time. comp_pred and pred have width == stride + // and therefore live in contigious memory. 4*4, 4*8, 8*4, 8*8, and 8*16 are + // all divisible by 16 so just ref needs to be massaged when loading. for (i = 0; i < width * height; i += 16) { const __m128i p = _mm_load_si128((const __m128i *)pred); __m128i r; @@ -45,10 +46,9 @@ void vpx_comp_avg_pred_sse2(uint8_t *comp, const uint8_t *pred, int width, r = _mm_loadu_si128((const __m128i *)ref); ref += 16; } else if (width == 4) { - r = _mm_set_epi32(*(const uint32_t *)(ref + 3 * ref_stride), - *(const uint32_t *)(ref + 2 * ref_stride), - *(const uint32_t *)(ref + ref_stride), - *(const uint32_t *)(ref)); + r = _mm_set_epi32(loadu_uint32(ref + 3 * ref_stride), + loadu_uint32(ref + 2 * ref_stride), + loadu_uint32(ref + ref_stride), loadu_uint32(ref)); ref += 4 * ref_stride; } else { @@ -60,10 +60,10 @@ void vpx_comp_avg_pred_sse2(uint8_t *comp, const uint8_t *pred, int width, ref += 2 * ref_stride; } avg = _mm_avg_epu8(p, r); - _mm_store_si128((__m128i *)comp, avg); + _mm_store_si128((__m128i *)comp_pred, avg); pred += 16; - comp += 16; + comp_pred += 16; } } } diff --git a/media/libvpx/libvpx/vpx_dsp/x86/avg_ssse3_x86_64.asm b/media/libvpx/libvpx/vpx_dsp/x86/avg_ssse3_x86_64.asm index 22e0a086cc20..9122b5a401ae 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/avg_ssse3_x86_64.asm +++ b/media/libvpx/libvpx/vpx_dsp/x86/avg_ssse3_x86_64.asm @@ -13,7 +13,7 @@ SECTION .text -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 ; matrix transpose %macro TRANSPOSE8X8 10 ; stage 1 diff --git a/media/libvpx/libvpx/vpx_dsp/x86/bitdepth_conversion_avx2.h b/media/libvpx/libvpx/vpx_dsp/x86/bitdepth_conversion_avx2.h index 3552c07cd36f..c02b47a3ebf2 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/bitdepth_conversion_avx2.h +++ b/media/libvpx/libvpx/vpx_dsp/x86/bitdepth_conversion_avx2.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_ -#define VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_ +#ifndef VPX_VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_ +#define VPX_VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_ #include @@ -41,4 +41,4 @@ static INLINE void store_tran_low(__m256i a, tran_low_t *b) { _mm256_storeu_si256((__m256i *)b, a); #endif } -#endif // VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_ +#endif // VPX_VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/bitdepth_conversion_sse2.h b/media/libvpx/libvpx/vpx_dsp/x86/bitdepth_conversion_sse2.h index 5d1d7795723c..74dde656b1d5 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/bitdepth_conversion_sse2.h +++ b/media/libvpx/libvpx/vpx_dsp/x86/bitdepth_conversion_sse2.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_X86_BITDEPTH_CONVERSION_SSE2_H_ -#define VPX_DSP_X86_BITDEPTH_CONVERSION_SSE2_H_ +#ifndef VPX_VPX_DSP_X86_BITDEPTH_CONVERSION_SSE2_H_ +#define VPX_VPX_DSP_X86_BITDEPTH_CONVERSION_SSE2_H_ #include @@ -53,4 +53,4 @@ static INLINE void store_zero_tran_low(tran_low_t *a) { _mm_store_si128((__m128i *)(a), zero); #endif } -#endif // VPX_DSP_X86_BITDEPTH_CONVERSION_SSE2_H_ +#endif // VPX_VPX_DSP_X86_BITDEPTH_CONVERSION_SSE2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/convolve.h b/media/libvpx/libvpx/vpx_dsp/x86/convolve.h index 68d7589d45d4..6fd40fef9333 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/convolve.h +++ b/media/libvpx/libvpx/vpx_dsp/x86/convolve.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_X86_CONVOLVE_H_ -#define VPX_DSP_X86_CONVOLVE_H_ +#ifndef VPX_VPX_DSP_X86_CONVOLVE_H_ +#define VPX_VPX_DSP_X86_CONVOLVE_H_ #include @@ -16,56 +16,83 @@ #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" +// TODO(chiyotsai@google.com): Refactor the code here. Currently this is pretty +// hacky and awful to read. Note that there is a filter_x[3] == 128 check in +// HIGHBD_FUN_CONV_2D to avoid seg fault due to the fact that the c function +// assumes the filter is always 8 tap. typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr, ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter); -#define FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt) \ +// TODO(chiyotsai@google.com): Remove the is_avg argument to the MACROS once we +// have 4-tap vert avg filter. +#define FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt, is_avg) \ void vpx_convolve8_##name##_##opt( \ const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ - ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \ + ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ - const int16_t *filter = filter_kernel[offset]; \ + const int16_t *filter_row = filter[offset]; \ (void)x0_q4; \ (void)x_step_q4; \ (void)y0_q4; \ (void)y_step_q4; \ - assert(filter[3] != 128); \ + assert(filter_row[3] != 128); \ assert(step_q4 == 16); \ - if (filter[0] | filter[1] | filter[2]) { \ + if (filter_row[0] | filter_row[1] | filter_row[6] | filter_row[7]) { \ + const int num_taps = 8; \ while (w >= 16) { \ vpx_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \ - dst_stride, h, filter); \ + dst_stride, h, filter_row); \ src += 16; \ dst += 16; \ w -= 16; \ } \ if (w == 8) { \ vpx_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst, \ - dst_stride, h, filter); \ + dst_stride, h, filter_row); \ } else if (w == 4) { \ vpx_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst, \ - dst_stride, h, filter); \ + dst_stride, h, filter_row); \ } \ - } else { \ + (void)num_taps; \ + } else if (filter_row[2] | filter_row[5]) { \ + const int num_taps = is_avg ? 8 : 4; \ while (w >= 16) { \ - vpx_filter_block1d16_##dir##2_##avg##opt(src, src_stride, dst, \ - dst_stride, h, filter); \ + vpx_filter_block1d16_##dir##4_##avg##opt(src_start, src_stride, dst, \ + dst_stride, h, filter_row); \ src += 16; \ dst += 16; \ w -= 16; \ } \ if (w == 8) { \ - vpx_filter_block1d8_##dir##2_##avg##opt(src, src_stride, dst, \ - dst_stride, h, filter); \ + vpx_filter_block1d8_##dir##4_##avg##opt(src_start, src_stride, dst, \ + dst_stride, h, filter_row); \ } else if (w == 4) { \ - vpx_filter_block1d4_##dir##2_##avg##opt(src, src_stride, dst, \ - dst_stride, h, filter); \ + vpx_filter_block1d4_##dir##4_##avg##opt(src_start, src_stride, dst, \ + dst_stride, h, filter_row); \ } \ + (void)num_taps; \ + } else { \ + const int num_taps = 2; \ + while (w >= 16) { \ + vpx_filter_block1d16_##dir##2_##avg##opt(src_start, src_stride, dst, \ + dst_stride, h, filter_row); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + if (w == 8) { \ + vpx_filter_block1d8_##dir##2_##avg##opt(src_start, src_stride, dst, \ + dst_stride, h, filter_row); \ + } else if (w == 4) { \ + vpx_filter_block1d4_##dir##2_##avg##opt(src_start, src_stride, dst, \ + dst_stride, h, filter_row); \ + } \ + (void)num_taps; \ } \ } -#define FUN_CONV_2D(avg, opt) \ +#define FUN_CONV_2D(avg, opt, is_avg) \ void vpx_convolve8_##avg##opt( \ const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ @@ -79,16 +106,25 @@ typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, assert(h <= 64); \ assert(x_step_q4 == 16); \ assert(y_step_q4 == 16); \ - if (filter_x[0] | filter_x[1] | filter_x[2]) { \ - DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ + if (filter_x[0] | filter_x[1] | filter_x[6] | filter_x[7]) { \ + DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \ h + 7); \ vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ filter, x0_q4, x_step_q4, y0_q4, \ y_step_q4, w, h); \ + } else if (filter_x[2] | filter_x[5]) { \ + const int num_taps = is_avg ? 8 : 4; \ + DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ + vpx_convolve8_horiz_##opt( \ + src - (num_taps / 2 - 1) * src_stride, src_stride, fdata2, 64, \ + filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h + num_taps - 1); \ + vpx_convolve8_##avg##vert_##opt(fdata2 + 64 * (num_taps / 2 - 1), 64, \ + dst, dst_stride, filter, x0_q4, \ + x_step_q4, y0_q4, y_step_q4, w, h); \ } else { \ - DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ + DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65] VPX_UNINITIALIZED); \ vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4, \ x_step_q4, y0_q4, y_step_q4, w, h + 1); \ vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter, \ @@ -106,57 +142,86 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, unsigned int output_height, const int16_t *filter, int bd); -#define HIGH_FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt) \ +#define HIGH_FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt, \ + is_avg) \ void vpx_highbd_convolve8_##name##_##opt( \ const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \ ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \ int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \ - const int16_t *filter = filter_kernel[offset]; \ - if (step_q4 == 16 && filter[3] != 128) { \ - if (filter[0] | filter[1] | filter[2]) { \ + const int16_t *filter_row = filter_kernel[offset]; \ + if (step_q4 == 16 && filter_row[3] != 128) { \ + if (filter_row[0] | filter_row[1] | filter_row[6] | filter_row[7]) { \ + const int num_taps = 8; \ while (w >= 16) { \ vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \ - src_start, src_stride, dst, dst_stride, h, filter, bd); \ + src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 16; \ dst += 16; \ w -= 16; \ } \ while (w >= 8) { \ vpx_highbd_filter_block1d8_##dir##8_##avg##opt( \ - src_start, src_stride, dst, dst_stride, h, filter, bd); \ + src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 8; \ dst += 8; \ w -= 8; \ } \ while (w >= 4) { \ vpx_highbd_filter_block1d4_##dir##8_##avg##opt( \ - src_start, src_stride, dst, dst_stride, h, filter, bd); \ + src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 4; \ dst += 4; \ w -= 4; \ } \ + (void)num_taps; \ + } else if (filter_row[2] | filter_row[5]) { \ + const int num_taps = is_avg ? 8 : 4; \ + while (w >= 16) { \ + vpx_highbd_filter_block1d16_##dir##4_##avg##opt( \ + src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ + src += 16; \ + dst += 16; \ + w -= 16; \ + } \ + while (w >= 8) { \ + vpx_highbd_filter_block1d8_##dir##4_##avg##opt( \ + src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ + src += 8; \ + dst += 8; \ + w -= 8; \ + } \ + while (w >= 4) { \ + vpx_highbd_filter_block1d4_##dir##4_##avg##opt( \ + src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ + src += 4; \ + dst += 4; \ + w -= 4; \ + } \ + (void)num_taps; \ } else { \ + const int num_taps = 2; \ while (w >= 16) { \ vpx_highbd_filter_block1d16_##dir##2_##avg##opt( \ - src, src_stride, dst, dst_stride, h, filter, bd); \ + src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 16; \ dst += 16; \ w -= 16; \ } \ while (w >= 8) { \ vpx_highbd_filter_block1d8_##dir##2_##avg##opt( \ - src, src_stride, dst, dst_stride, h, filter, bd); \ + src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 8; \ dst += 8; \ w -= 8; \ } \ while (w >= 4) { \ vpx_highbd_filter_block1d4_##dir##2_##avg##opt( \ - src, src_stride, dst, dst_stride, h, filter, bd); \ + src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 4; \ dst += 4; \ w -= 4; \ } \ + (void)num_taps; \ } \ } \ if (w) { \ @@ -166,7 +231,7 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, } \ } -#define HIGH_FUN_CONV_2D(avg, opt) \ +#define HIGH_FUN_CONV_2D(avg, opt, is_avg) \ void vpx_highbd_convolve8_##avg##opt( \ const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \ ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ @@ -175,16 +240,27 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, assert(w <= 64); \ assert(h <= 64); \ if (x_step_q4 == 16 && y_step_q4 == 16) { \ - if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \ - DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ + if ((filter_x[0] | filter_x[1] | filter_x[6] | filter_x[7]) || \ + filter_x[3] == 128) { \ + DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ fdata2, 64, filter, x0_q4, x_step_q4, \ y0_q4, y_step_q4, w, h + 7, bd); \ vpx_highbd_convolve8_##avg##vert_##opt( \ fdata2 + 192, 64, dst, dst_stride, filter, x0_q4, x_step_q4, \ y0_q4, y_step_q4, w, h, bd); \ + } else if (filter_x[2] | filter_x[5]) { \ + const int num_taps = is_avg ? 8 : 4; \ + DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ + vpx_highbd_convolve8_horiz_##opt( \ + src - (num_taps / 2 - 1) * src_stride, src_stride, fdata2, 64, \ + filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h + num_taps - 1, \ + bd); \ + vpx_highbd_convolve8_##avg##vert_##opt( \ + fdata2 + 64 * (num_taps / 2 - 1), 64, dst, dst_stride, filter, \ + x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); \ } else { \ - DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ + DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65] VPX_UNINITIALIZED); \ vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, \ x0_q4, x_step_q4, y0_q4, y_step_q4, \ w, h + 1, bd); \ @@ -198,6 +274,6 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, bd); \ } \ } -#endif // CONFIG_VP9_HIGHBITDEPTH -#endif // VPX_DSP_X86_CONVOLVE_H_ +#endif // CONFIG_VP9_HIGHBITDEPTH +#endif // VPX_VPX_DSP_X86_CONVOLVE_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/convolve_avx2.h b/media/libvpx/libvpx/vpx_dsp/x86/convolve_avx2.h index bc96b738f465..99bc9637fcb2 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/convolve_avx2.h +++ b/media/libvpx/libvpx/vpx_dsp/x86/convolve_avx2.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_X86_CONVOLVE_AVX2_H_ -#define VPX_DSP_X86_CONVOLVE_AVX2_H_ +#ifndef VPX_VPX_DSP_X86_CONVOLVE_AVX2_H_ +#define VPX_VPX_DSP_X86_CONVOLVE_AVX2_H_ #include // AVX2 @@ -100,6 +100,63 @@ static INLINE __m128i convolve8_8_avx2(const __m256i *const s, return sum1; } +static INLINE __m256i mm256_loadu2_si128(const void *lo, const void *hi) { + const __m256i tmp = + _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)lo)); + return _mm256_inserti128_si256(tmp, _mm_loadu_si128((const __m128i *)hi), 1); +} + +static INLINE __m256i mm256_loadu2_epi64(const void *lo, const void *hi) { + const __m256i tmp = + _mm256_castsi128_si256(_mm_loadl_epi64((const __m128i *)lo)); + return _mm256_inserti128_si256(tmp, _mm_loadl_epi64((const __m128i *)hi), 1); +} + +static INLINE void mm256_store2_si128(__m128i *const dst_ptr_1, + __m128i *const dst_ptr_2, + const __m256i *const src) { + _mm_store_si128(dst_ptr_1, _mm256_castsi256_si128(*src)); + _mm_store_si128(dst_ptr_2, _mm256_extractf128_si256(*src, 1)); +} + +static INLINE void mm256_storeu2_epi64(__m128i *const dst_ptr_1, + __m128i *const dst_ptr_2, + const __m256i *const src) { + _mm_storel_epi64(dst_ptr_1, _mm256_castsi256_si128(*src)); + _mm_storel_epi64(dst_ptr_2, _mm256_extractf128_si256(*src, 1)); +} + +static INLINE void mm256_storeu2_epi32(__m128i *const dst_ptr_1, + __m128i *const dst_ptr_2, + const __m256i *const src) { + *((uint32_t *)(dst_ptr_1)) = _mm_cvtsi128_si32(_mm256_castsi256_si128(*src)); + *((uint32_t *)(dst_ptr_2)) = + _mm_cvtsi128_si32(_mm256_extractf128_si256(*src, 1)); +} + +static INLINE __m256i mm256_round_epi32(const __m256i *const src, + const __m256i *const half_depth, + const int depth) { + const __m256i nearest_src = _mm256_add_epi32(*src, *half_depth); + return _mm256_srai_epi32(nearest_src, depth); +} + +static INLINE __m256i mm256_round_epi16(const __m256i *const src, + const __m256i *const half_depth, + const int depth) { + const __m256i nearest_src = _mm256_adds_epi16(*src, *half_depth); + return _mm256_srai_epi16(nearest_src, depth); +} + +static INLINE __m256i mm256_madd_add_epi32(const __m256i *const src_0, + const __m256i *const src_1, + const __m256i *const ker_0, + const __m256i *const ker_1) { + const __m256i tmp_0 = _mm256_madd_epi16(*src_0, *ker_0); + const __m256i tmp_1 = _mm256_madd_epi16(*src_1, *ker_1); + return _mm256_add_epi32(tmp_0, tmp_1); +} + #undef MM256_BROADCASTSI128_SI256 -#endif // VPX_DSP_X86_CONVOLVE_AVX2_H_ +#endif // VPX_VPX_DSP_X86_CONVOLVE_AVX2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/convolve_sse2.h b/media/libvpx/libvpx/vpx_dsp/x86/convolve_sse2.h new file mode 100644 index 000000000000..84435463949d --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/x86/convolve_sse2.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_DSP_X86_CONVOLVE_SSE2_H_ +#define VPX_VPX_DSP_X86_CONVOLVE_SSE2_H_ + +#include // SSE2 + +#include "./vpx_config.h" + +// Interprets the input register as 16-bit words 7 6 5 4 3 2 1 0, then returns +// values at index 2 and 3 to return 3 2 3 2 3 2 3 2 as 16-bit words +static INLINE __m128i extract_quarter_2_epi16_sse2(const __m128i *const reg) { + __m128i tmp = _mm_unpacklo_epi32(*reg, *reg); + return _mm_unpackhi_epi64(tmp, tmp); +} + +// Interprets the input register as 16-bit words 7 6 5 4 3 2 1 0, then returns +// values at index 2 and 3 to return 5 4 5 4 5 4 5 4 as 16-bit words. +static INLINE __m128i extract_quarter_3_epi16_sse2(const __m128i *const reg) { + __m128i tmp = _mm_unpackhi_epi32(*reg, *reg); + return _mm_unpacklo_epi64(tmp, tmp); +} + +// Interprets src as 8-bit words, zero extends to form 16-bit words, then +// multiplies with ker and add the adjacent results to form 32-bit words. +// Finally adds the result from 1 and 2 together. +static INLINE __m128i mm_madd_add_epi8_sse2(const __m128i *const src_1, + const __m128i *const src_2, + const __m128i *const ker_1, + const __m128i *const ker_2) { + const __m128i src_1_half = _mm_unpacklo_epi8(*src_1, _mm_setzero_si128()); + const __m128i src_2_half = _mm_unpacklo_epi8(*src_2, _mm_setzero_si128()); + const __m128i madd_1 = _mm_madd_epi16(src_1_half, *ker_1); + const __m128i madd_2 = _mm_madd_epi16(src_2_half, *ker_2); + return _mm_add_epi32(madd_1, madd_2); +} + +// Interprets src as 16-bit words, then multiplies with ker and add the +// adjacent results to form 32-bit words. Finally adds the result from 1 and 2 +// together. +static INLINE __m128i mm_madd_add_epi16_sse2(const __m128i *const src_1, + const __m128i *const src_2, + const __m128i *const ker_1, + const __m128i *const ker_2) { + const __m128i madd_1 = _mm_madd_epi16(*src_1, *ker_1); + const __m128i madd_2 = _mm_madd_epi16(*src_2, *ker_2); + return _mm_add_epi32(madd_1, madd_2); +} + +static INLINE __m128i mm_madd_packs_epi16_sse2(const __m128i *const src_0, + const __m128i *const src_1, + const __m128i *const ker) { + const __m128i madd_1 = _mm_madd_epi16(*src_0, *ker); + const __m128i madd_2 = _mm_madd_epi16(*src_1, *ker); + return _mm_packs_epi32(madd_1, madd_2); +} + +// Interleaves src_1 and src_2 +static INLINE __m128i mm_zip_epi32_sse2(const __m128i *const src_1, + const __m128i *const src_2) { + const __m128i tmp_1 = _mm_unpacklo_epi32(*src_1, *src_2); + const __m128i tmp_2 = _mm_unpackhi_epi32(*src_1, *src_2); + return _mm_packs_epi32(tmp_1, tmp_2); +} + +static INLINE __m128i mm_round_epi32_sse2(const __m128i *const src, + const __m128i *const half_depth, + const int depth) { + const __m128i nearest_src = _mm_add_epi32(*src, *half_depth); + return _mm_srai_epi32(nearest_src, depth); +} + +static INLINE __m128i mm_round_epi16_sse2(const __m128i *const src, + const __m128i *const half_depth, + const int depth) { + const __m128i nearest_src = _mm_adds_epi16(*src, *half_depth); + return _mm_srai_epi16(nearest_src, depth); +} + +#endif // VPX_VPX_DSP_X86_CONVOLVE_SSE2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/convolve_ssse3.h b/media/libvpx/libvpx/vpx_dsp/x86/convolve_ssse3.h index e5d452f99ea4..8a4b1651336c 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/convolve_ssse3.h +++ b/media/libvpx/libvpx/vpx_dsp/x86/convolve_ssse3.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_X86_CONVOLVE_SSSE3_H_ -#define VPX_DSP_X86_CONVOLVE_SSSE3_H_ +#ifndef VPX_VPX_DSP_X86_CONVOLVE_SSSE3_H_ +#define VPX_VPX_DSP_X86_CONVOLVE_SSSE3_H_ #include #include // SSSE3 @@ -109,4 +109,4 @@ static INLINE __m128i convolve8_8_odd_offset_ssse3(const __m128i *const s, return temp; } -#endif // VPX_DSP_X86_CONVOLVE_SSSE3_H_ +#endif // VPX_VPX_DSP_X86_CONVOLVE_SSSE3_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/deblock_sse2.asm b/media/libvpx/libvpx/vpx_dsp/x86/deblock_sse2.asm index 97cb43b67113..9d8e5e3e0907 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/deblock_sse2.asm +++ b/media/libvpx/libvpx/vpx_dsp/x86/deblock_sse2.asm @@ -232,237 +232,6 @@ sym(vpx_post_proc_down_and_across_mb_row_sse2): ret %undef flimit -;void vpx_mbpost_proc_down_sse2(unsigned char *dst, -; int pitch, int rows, int cols,int flimit) -extern sym(vpx_rv) -global sym(vpx_mbpost_proc_down_sse2) PRIVATE -sym(vpx_mbpost_proc_down_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 128+16 - - ; unsigned char d[16][8] at [rsp] - ; create flimit2 at [rsp+128] - mov eax, dword ptr arg(4) ;flimit - mov [rsp+128], eax - mov [rsp+128+4], eax - mov [rsp+128+8], eax - mov [rsp+128+12], eax -%define flimit4 [rsp+128] - -%if ABI_IS_32BIT=0 - lea r8, [GLOBAL(sym(vpx_rv))] -%endif - - ;rows +=8; - add dword arg(2), 8 - - ;for(c=0; c - #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/convolve.h" +#include "vpx_dsp/x86/convolve_avx2.h" // ----------------------------------------------------------------------------- // Copy and average @@ -20,7 +20,7 @@ void vpx_highbd_convolve_copy_avx2(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, - int width, int h, int bd) { + int w, int h, int bd) { (void)filter; (void)x0_q4; (void)x_step_q4; @@ -28,8 +28,8 @@ void vpx_highbd_convolve_copy_avx2(const uint16_t *src, ptrdiff_t src_stride, (void)y_step_q4; (void)bd; - assert(width % 4 == 0); - if (width > 32) { // width = 64 + assert(w % 4 == 0); + if (w > 32) { // w = 64 do { const __m256i p0 = _mm256_loadu_si256((const __m256i *)src); const __m256i p1 = _mm256_loadu_si256((const __m256i *)(src + 16)); @@ -43,7 +43,7 @@ void vpx_highbd_convolve_copy_avx2(const uint16_t *src, ptrdiff_t src_stride, dst += dst_stride; h--; } while (h > 0); - } else if (width > 16) { // width = 32 + } else if (w > 16) { // w = 32 do { const __m256i p0 = _mm256_loadu_si256((const __m256i *)src); const __m256i p1 = _mm256_loadu_si256((const __m256i *)(src + 16)); @@ -53,7 +53,7 @@ void vpx_highbd_convolve_copy_avx2(const uint16_t *src, ptrdiff_t src_stride, dst += dst_stride; h--; } while (h > 0); - } else if (width > 8) { // width = 16 + } else if (w > 8) { // w = 16 __m256i p0, p1; do { p0 = _mm256_loadu_si256((const __m256i *)src); @@ -67,7 +67,7 @@ void vpx_highbd_convolve_copy_avx2(const uint16_t *src, ptrdiff_t src_stride, dst += dst_stride; h -= 2; } while (h > 0); - } else if (width > 4) { // width = 8 + } else if (w > 4) { // w = 8 __m128i p0, p1; do { p0 = _mm_loadu_si128((const __m128i *)src); @@ -81,7 +81,7 @@ void vpx_highbd_convolve_copy_avx2(const uint16_t *src, ptrdiff_t src_stride, dst += dst_stride; h -= 2; } while (h > 0); - } else { // width = 4 + } else { // w = 4 __m128i p0, p1; do { p0 = _mm_loadl_epi64((const __m128i *)src); @@ -102,7 +102,7 @@ void vpx_highbd_convolve_avg_avx2(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, - int width, int h, int bd) { + int w, int h, int bd) { (void)filter; (void)x0_q4; (void)x_step_q4; @@ -110,8 +110,8 @@ void vpx_highbd_convolve_avg_avx2(const uint16_t *src, ptrdiff_t src_stride, (void)y_step_q4; (void)bd; - assert(width % 4 == 0); - if (width > 32) { // width = 64 + assert(w % 4 == 0); + if (w > 32) { // w = 64 __m256i p0, p1, p2, p3, u0, u1, u2, u3; do { p0 = _mm256_loadu_si256((const __m256i *)src); @@ -130,7 +130,7 @@ void vpx_highbd_convolve_avg_avx2(const uint16_t *src, ptrdiff_t src_stride, dst += dst_stride; h--; } while (h > 0); - } else if (width > 16) { // width = 32 + } else if (w > 16) { // w = 32 __m256i p0, p1, u0, u1; do { p0 = _mm256_loadu_si256((const __m256i *)src); @@ -143,7 +143,7 @@ void vpx_highbd_convolve_avg_avx2(const uint16_t *src, ptrdiff_t src_stride, dst += dst_stride; h--; } while (h > 0); - } else if (width > 8) { // width = 16 + } else if (w > 8) { // w = 16 __m256i p0, p1, u0, u1; do { p0 = _mm256_loadu_si256((const __m256i *)src); @@ -158,7 +158,7 @@ void vpx_highbd_convolve_avg_avx2(const uint16_t *src, ptrdiff_t src_stride, dst += dst_stride << 1; h -= 2; } while (h > 0); - } else if (width > 4) { // width = 8 + } else if (w > 4) { // w = 8 __m128i p0, p1, u0, u1; do { p0 = _mm_loadu_si128((const __m128i *)src); @@ -172,7 +172,7 @@ void vpx_highbd_convolve_avg_avx2(const uint16_t *src, ptrdiff_t src_stride, dst += dst_stride << 1; h -= 2; } while (h > 0); - } else { // width = 4 + } else { // w = 4 __m128i p0, p1, u0, u1; do { p0 = _mm_loadl_epi64((const __m128i *)src); @@ -192,8 +192,6 @@ void vpx_highbd_convolve_avg_avx2(const uint16_t *src, ptrdiff_t src_stride, // ----------------------------------------------------------------------------- // Horizontal and vertical filtering -#define CONV8_ROUNDING_BITS (7) - static const uint8_t signal_pattern_0[32] = { 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9 }; @@ -210,6 +208,9 @@ static const uint8_t signal_pattern_2[32] = { 6, 7, 8, 9, 8, 9, 10, 11, static const uint32_t signal_index[8] = { 2, 3, 4, 5, 2, 3, 4, 5 }; +#define CONV8_ROUNDING_BITS (7) +#define CONV8_ROUNDING_NUM (1 << (CONV8_ROUNDING_BITS - 1)) + // ----------------------------------------------------------------------------- // Horizontal Filtering @@ -923,6 +924,196 @@ static void vpx_highbd_filter_block1d16_h8_avg_avx2( } while (height > 0); } +static void vpx_highbd_filter_block1d4_h4_avx2( + const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { + // We extract the middle four elements of the kernel into two registers in + // the form + // ... k[3] k[2] k[3] k[2] + // ... k[5] k[4] k[5] k[4] + // Then we shuffle the source into + // ... s[1] s[0] s[0] s[-1] + // ... s[3] s[2] s[2] s[1] + // Calling multiply and add gives us half of the sum. Calling add on the two + // halves gives us the output. Since avx2 allows us to use 256-bit buffer, we + // can do this two rows at a time. + + __m256i src_reg, src_reg_shift_0, src_reg_shift_2; + __m256i res_reg; + __m256i idx_shift_0 = + _mm256_setr_epi8(0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9, 0, 1, 2, + 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9); + __m256i idx_shift_2 = + _mm256_setr_epi8(4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 4, + 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13); + + __m128i kernel_reg_128; // Kernel + __m256i kernel_reg, kernel_reg_23, + kernel_reg_45; // Segments of the kernel used + const __m256i reg_round = + _mm256_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding + const __m256i reg_max = _mm256_set1_epi16((1 << bd) - 1); + const ptrdiff_t unrolled_src_stride = src_stride << 1; + const ptrdiff_t unrolled_dst_stride = dst_stride << 1; + int h; + + // Start one pixel before as we need tap/2 - 1 = 1 sample from the past + src_ptr -= 1; + + // Load Kernel + kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); + kernel_reg_23 = _mm256_shuffle_epi32(kernel_reg, 0x55); + kernel_reg_45 = _mm256_shuffle_epi32(kernel_reg, 0xaa); + + for (h = height; h >= 2; h -= 2) { + // Load the source + src_reg = mm256_loadu2_si128(src_ptr, src_ptr + src_stride); + src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); + src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); + + // Get the output + res_reg = mm256_madd_add_epi32(&src_reg_shift_0, &src_reg_shift_2, + &kernel_reg_23, &kernel_reg_45); + + // Round the result + res_reg = mm256_round_epi32(&res_reg, ®_round, CONV8_ROUNDING_BITS); + + // Finally combine to get the final dst + res_reg = _mm256_packus_epi32(res_reg, res_reg); + res_reg = _mm256_min_epi16(res_reg, reg_max); + mm256_storeu2_epi64((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), + &res_reg); + + src_ptr += unrolled_src_stride; + dst_ptr += unrolled_dst_stride; + } + + // Repeat for the last row if needed + if (h > 0) { + // Load the source + src_reg = mm256_loadu2_si128(src_ptr, src_ptr + 4); + src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); + src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); + + // Get the output + res_reg = mm256_madd_add_epi32(&src_reg_shift_0, &src_reg_shift_2, + &kernel_reg_23, &kernel_reg_45); + + // Round the result + res_reg = mm256_round_epi32(&res_reg, ®_round, CONV8_ROUNDING_BITS); + + // Finally combine to get the final dst + res_reg = _mm256_packus_epi32(res_reg, res_reg); + res_reg = _mm256_min_epi16(res_reg, reg_max); + _mm_storel_epi64((__m128i *)dst_ptr, _mm256_castsi256_si128(res_reg)); + } +} + +static void vpx_highbd_filter_block1d8_h4_avx2( + const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { + // We will extract the middle four elements of the kernel into two registers + // in the form + // ... k[3] k[2] k[3] k[2] + // ... k[5] k[4] k[5] k[4] + // Then we shuffle the source into + // ... s[1] s[0] s[0] s[-1] + // ... s[3] s[2] s[2] s[1] + // Calling multiply and add gives us half of the sum of the first half. + // Calling add gives us first half of the output. Repat again to get the whole + // output. Since avx2 allows us to use 256-bit buffer, we can do this two rows + // at a time. + + __m256i src_reg, src_reg_shift_0, src_reg_shift_2; + __m256i res_reg, res_first, res_last; + __m256i idx_shift_0 = + _mm256_setr_epi8(0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9, 0, 1, 2, + 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9); + __m256i idx_shift_2 = + _mm256_setr_epi8(4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 4, + 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13); + + __m128i kernel_reg_128; // Kernel + __m256i kernel_reg, kernel_reg_23, + kernel_reg_45; // Segments of the kernel used + const __m256i reg_round = + _mm256_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding + const __m256i reg_max = _mm256_set1_epi16((1 << bd) - 1); + const ptrdiff_t unrolled_src_stride = src_stride << 1; + const ptrdiff_t unrolled_dst_stride = dst_stride << 1; + int h; + + // Start one pixel before as we need tap/2 - 1 = 1 sample from the past + src_ptr -= 1; + + // Load Kernel + kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); + kernel_reg_23 = _mm256_shuffle_epi32(kernel_reg, 0x55); + kernel_reg_45 = _mm256_shuffle_epi32(kernel_reg, 0xaa); + + for (h = height; h >= 2; h -= 2) { + // Load the source + src_reg = mm256_loadu2_si128(src_ptr, src_ptr + src_stride); + src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); + src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); + + // Result for first half + res_first = mm256_madd_add_epi32(&src_reg_shift_0, &src_reg_shift_2, + &kernel_reg_23, &kernel_reg_45); + + // Do again to get the second half of dst + // Load the source + src_reg = mm256_loadu2_si128(src_ptr + 4, src_ptr + src_stride + 4); + src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); + src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); + + // Result for second half + res_last = mm256_madd_add_epi32(&src_reg_shift_0, &src_reg_shift_2, + &kernel_reg_23, &kernel_reg_45); + + // Round each result + res_first = mm256_round_epi32(&res_first, ®_round, CONV8_ROUNDING_BITS); + res_last = mm256_round_epi32(&res_last, ®_round, CONV8_ROUNDING_BITS); + + // Finally combine to get the final dst + res_reg = _mm256_packus_epi32(res_first, res_last); + res_reg = _mm256_min_epi16(res_reg, reg_max); + mm256_store2_si128((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), + &res_reg); + + src_ptr += unrolled_src_stride; + dst_ptr += unrolled_dst_stride; + } + + // Repeat for the last row if needed + if (h > 0) { + src_reg = mm256_loadu2_si128(src_ptr, src_ptr + 4); + src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); + src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); + + res_reg = mm256_madd_add_epi32(&src_reg_shift_0, &src_reg_shift_2, + &kernel_reg_23, &kernel_reg_45); + + res_reg = mm256_round_epi32(&res_reg, ®_round, CONV8_ROUNDING_BITS); + + res_reg = _mm256_packus_epi32(res_reg, res_reg); + res_reg = _mm256_min_epi16(res_reg, reg_max); + + mm256_storeu2_epi64((__m128i *)dst_ptr, (__m128i *)(dst_ptr + 4), &res_reg); + } +} + +static void vpx_highbd_filter_block1d16_h4_avx2( + const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { + vpx_highbd_filter_block1d8_h4_avx2(src_ptr, src_stride, dst_ptr, dst_stride, + height, kernel, bd); + vpx_highbd_filter_block1d8_h4_avx2(src_ptr + 8, src_stride, dst_ptr + 8, + dst_stride, height, kernel, bd); +} + static void vpx_highbd_filter_block1d8_v8_avg_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { @@ -1058,39 +1249,235 @@ static void vpx_highbd_filter_block1d8_v2_avg_avx2( } while (height > 0); } -void vpx_highbd_filter_block1d4_h8_sse2(const uint16_t *, ptrdiff_t, uint16_t *, - ptrdiff_t, uint32_t, const int16_t *, - int); -void vpx_highbd_filter_block1d4_h2_sse2(const uint16_t *, ptrdiff_t, uint16_t *, - ptrdiff_t, uint32_t, const int16_t *, - int); -void vpx_highbd_filter_block1d4_v8_sse2(const uint16_t *, ptrdiff_t, uint16_t *, - ptrdiff_t, uint32_t, const int16_t *, - int); -void vpx_highbd_filter_block1d4_v2_sse2(const uint16_t *, ptrdiff_t, uint16_t *, - ptrdiff_t, uint32_t, const int16_t *, - int); +static void vpx_highbd_filter_block1d4_v4_avx2( + const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { + // We will load two rows of pixels and rearrange them into the form + // ... s[1,0] s[0,0] s[0,0] s[-1,0] + // so that we can call multiply and add with the kernel partial output. Then + // we can call add with another row to get the output. + + // Register for source s[-1:3, :] + __m256i src_reg_1, src_reg_2, src_reg_3; + // Interleaved rows of the source. lo is first half, hi second + __m256i src_reg_m10, src_reg_01, src_reg_12, src_reg_23; + __m256i src_reg_m1001, src_reg_1223; + + // Result after multiply and add + __m256i res_reg; + + __m128i kernel_reg_128; // Kernel + __m256i kernel_reg, kernel_reg_23, kernel_reg_45; // Segments of kernel used + + const __m256i reg_round = + _mm256_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding + const __m256i reg_max = _mm256_set1_epi16((1 << bd) - 1); + const ptrdiff_t src_stride_unrolled = src_stride << 1; + const ptrdiff_t dst_stride_unrolled = dst_stride << 1; + int h; + + // Load Kernel + kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); + kernel_reg_23 = _mm256_shuffle_epi32(kernel_reg, 0x55); + kernel_reg_45 = _mm256_shuffle_epi32(kernel_reg, 0xaa); + + // Row -1 to row 0 + src_reg_m10 = mm256_loadu2_epi64((const __m128i *)src_ptr, + (const __m128i *)(src_ptr + src_stride)); + + // Row 0 to row 1 + src_reg_1 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2))); + src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); + + // First three rows + src_reg_m1001 = _mm256_unpacklo_epi16(src_reg_m10, src_reg_01); + + for (h = height; h > 1; h -= 2) { + src_reg_2 = _mm256_castsi128_si256( + _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 3))); + + src_reg_12 = _mm256_inserti128_si256(src_reg_1, + _mm256_castsi256_si128(src_reg_2), 1); + + src_reg_3 = _mm256_castsi128_si256( + _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 4))); + + src_reg_23 = _mm256_inserti128_si256(src_reg_2, + _mm256_castsi256_si128(src_reg_3), 1); + + // Last three rows + src_reg_1223 = _mm256_unpacklo_epi16(src_reg_12, src_reg_23); + + // Output + res_reg = mm256_madd_add_epi32(&src_reg_m1001, &src_reg_1223, + &kernel_reg_23, &kernel_reg_45); + + // Round the words + res_reg = mm256_round_epi32(&res_reg, ®_round, CONV8_ROUNDING_BITS); + + // Combine to get the result + res_reg = _mm256_packus_epi32(res_reg, res_reg); + res_reg = _mm256_min_epi16(res_reg, reg_max); + + // Save the result + mm256_storeu2_epi64((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), + &res_reg); + + // Update the source by two rows + src_ptr += src_stride_unrolled; + dst_ptr += dst_stride_unrolled; + + src_reg_m1001 = src_reg_1223; + src_reg_1 = src_reg_3; + } +} + +static void vpx_highbd_filter_block1d8_v4_avx2( + const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { + // We will load two rows of pixels and rearrange them into the form + // ... s[1,0] s[0,0] s[0,0] s[-1,0] + // so that we can call multiply and add with the kernel partial output. Then + // we can call add with another row to get the output. + + // Register for source s[-1:3, :] + __m256i src_reg_1, src_reg_2, src_reg_3; + // Interleaved rows of the source. lo is first half, hi second + __m256i src_reg_m10, src_reg_01, src_reg_12, src_reg_23; + __m256i src_reg_m1001_lo, src_reg_m1001_hi, src_reg_1223_lo, src_reg_1223_hi; + + __m128i kernel_reg_128; // Kernel + __m256i kernel_reg, kernel_reg_23, kernel_reg_45; // Segments of kernel + + // Result after multiply and add + __m256i res_reg, res_reg_lo, res_reg_hi; + + const __m256i reg_round = + _mm256_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding + const __m256i reg_max = _mm256_set1_epi16((1 << bd) - 1); + const ptrdiff_t src_stride_unrolled = src_stride << 1; + const ptrdiff_t dst_stride_unrolled = dst_stride << 1; + int h; + + // Load Kernel + kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); + kernel_reg_23 = _mm256_shuffle_epi32(kernel_reg, 0x55); + kernel_reg_45 = _mm256_shuffle_epi32(kernel_reg, 0xaa); + + // Row -1 to row 0 + src_reg_m10 = mm256_loadu2_si128((const __m128i *)src_ptr, + (const __m128i *)(src_ptr + src_stride)); + + // Row 0 to row 1 + src_reg_1 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2))); + src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); + + // First three rows + src_reg_m1001_lo = _mm256_unpacklo_epi16(src_reg_m10, src_reg_01); + src_reg_m1001_hi = _mm256_unpackhi_epi16(src_reg_m10, src_reg_01); + + for (h = height; h > 1; h -= 2) { + src_reg_2 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 3))); + + src_reg_12 = _mm256_inserti128_si256(src_reg_1, + _mm256_castsi256_si128(src_reg_2), 1); + + src_reg_3 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 4))); + + src_reg_23 = _mm256_inserti128_si256(src_reg_2, + _mm256_castsi256_si128(src_reg_3), 1); + + // Last three rows + src_reg_1223_lo = _mm256_unpacklo_epi16(src_reg_12, src_reg_23); + src_reg_1223_hi = _mm256_unpackhi_epi16(src_reg_12, src_reg_23); + + // Output from first half + res_reg_lo = mm256_madd_add_epi32(&src_reg_m1001_lo, &src_reg_1223_lo, + &kernel_reg_23, &kernel_reg_45); + + // Output from second half + res_reg_hi = mm256_madd_add_epi32(&src_reg_m1001_hi, &src_reg_1223_hi, + &kernel_reg_23, &kernel_reg_45); + + // Round the words + res_reg_lo = + mm256_round_epi32(&res_reg_lo, ®_round, CONV8_ROUNDING_BITS); + res_reg_hi = + mm256_round_epi32(&res_reg_hi, ®_round, CONV8_ROUNDING_BITS); + + // Combine to get the result + res_reg = _mm256_packus_epi32(res_reg_lo, res_reg_hi); + res_reg = _mm256_min_epi16(res_reg, reg_max); + + // Save the result + mm256_store2_si128((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), + &res_reg); + + // Update the source by two rows + src_ptr += src_stride_unrolled; + dst_ptr += dst_stride_unrolled; + + src_reg_m1001_lo = src_reg_1223_lo; + src_reg_m1001_hi = src_reg_1223_hi; + src_reg_1 = src_reg_3; + } +} + +static void vpx_highbd_filter_block1d16_v4_avx2( + const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { + vpx_highbd_filter_block1d8_v4_avx2(src_ptr, src_stride, dst_ptr, dst_stride, + height, kernel, bd); + vpx_highbd_filter_block1d8_v4_avx2(src_ptr + 8, src_stride, dst_ptr + 8, + dst_stride, height, kernel, bd); +} + +// From vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm. +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2; + +// From vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm. +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2; + #define vpx_highbd_filter_block1d4_h8_avx2 vpx_highbd_filter_block1d4_h8_sse2 #define vpx_highbd_filter_block1d4_h2_avx2 vpx_highbd_filter_block1d4_h2_sse2 #define vpx_highbd_filter_block1d4_v8_avx2 vpx_highbd_filter_block1d4_v8_sse2 #define vpx_highbd_filter_block1d4_v2_avx2 vpx_highbd_filter_block1d4_v2_sse2 -HIGH_FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , avx2); -HIGH_FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , avx2); -HIGH_FUN_CONV_2D(, avx2); +// Use the [vh]8 version because there is no [vh]4 implementation. +#define vpx_highbd_filter_block1d16_v4_avg_avx2 \ + vpx_highbd_filter_block1d16_v8_avg_avx2 +#define vpx_highbd_filter_block1d16_h4_avg_avx2 \ + vpx_highbd_filter_block1d16_h8_avg_avx2 +#define vpx_highbd_filter_block1d8_v4_avg_avx2 \ + vpx_highbd_filter_block1d8_v8_avg_avx2 +#define vpx_highbd_filter_block1d8_h4_avg_avx2 \ + vpx_highbd_filter_block1d8_h8_avg_avx2 +#define vpx_highbd_filter_block1d4_v4_avg_avx2 \ + vpx_highbd_filter_block1d4_v8_avg_avx2 +#define vpx_highbd_filter_block1d4_h4_avg_avx2 \ + vpx_highbd_filter_block1d4_h8_avg_avx2 + +HIGH_FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , avx2, 0); +HIGH_FUN_CONV_1D(vert, y0_q4, y_step_q4, v, + src - src_stride * (num_taps / 2 - 1), , avx2, 0); +HIGH_FUN_CONV_2D(, avx2, 0); + +// From vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm. +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2; + +// From vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm. +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2; -void vpx_highbd_filter_block1d4_h8_avg_sse2(const uint16_t *, ptrdiff_t, - uint16_t *, ptrdiff_t, uint32_t, - const int16_t *, int); -void vpx_highbd_filter_block1d4_h2_avg_sse2(const uint16_t *, ptrdiff_t, - uint16_t *, ptrdiff_t, uint32_t, - const int16_t *, int); -void vpx_highbd_filter_block1d4_v8_avg_sse2(const uint16_t *, ptrdiff_t, - uint16_t *, ptrdiff_t, uint32_t, - const int16_t *, int); -void vpx_highbd_filter_block1d4_v2_avg_sse2(const uint16_t *, ptrdiff_t, - uint16_t *, ptrdiff_t, uint32_t, - const int16_t *, int); #define vpx_highbd_filter_block1d4_h8_avg_avx2 \ vpx_highbd_filter_block1d4_h8_avg_sse2 #define vpx_highbd_filter_block1d4_h2_avg_avx2 \ @@ -1100,9 +1487,9 @@ void vpx_highbd_filter_block1d4_v2_avg_sse2(const uint16_t *, ptrdiff_t, #define vpx_highbd_filter_block1d4_v2_avg_avx2 \ vpx_highbd_filter_block1d4_v2_avg_sse2 -HIGH_FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, avx2); -HIGH_FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_, - avx2); -HIGH_FUN_CONV_2D(avg_, avx2); +HIGH_FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, avx2, 1); +HIGH_FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, + src - src_stride * (num_taps / 2 - 1), avg_, avx2, 1); +HIGH_FUN_CONV_2D(avg_, avx2, 1); #undef HIGHBD_FUNC diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct16x16_add_sse4.c b/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct16x16_add_sse4.c index de097c66a698..7898ee12c849 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct16x16_add_sse4.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct16x16_add_sse4.c @@ -53,7 +53,7 @@ static INLINE void highbd_idct16_4col_stage6(const __m128i *const in, out[15] = in[15]; } -static INLINE void highbd_idct16_4col(__m128i *const io /*io[16]*/) { +void vpx_highbd_idct16_4col_sse4_1(__m128i *const io /*io[16]*/) { __m128i step1[16], step2[16]; // stage 2 @@ -233,7 +233,7 @@ void vpx_highbd_idct16x16_256_add_sse4_1(const tran_low_t *input, in = all[i]; highbd_load_transpose_32bit_8x4(&input[0], 16, &in[0]); highbd_load_transpose_32bit_8x4(&input[8], 16, &in[8]); - highbd_idct16_4col(in); + vpx_highbd_idct16_4col_sse4_1(in); input += 4 * 16; } @@ -243,7 +243,7 @@ void vpx_highbd_idct16x16_256_add_sse4_1(const tran_low_t *input, transpose_32bit_4x4(all[1] + i, out + 4); transpose_32bit_4x4(all[2] + i, out + 8); transpose_32bit_4x4(all[3] + i, out + 12); - highbd_idct16_4col(out); + vpx_highbd_idct16_4col_sse4_1(out); for (j = 0; j < 16; ++j) { highbd_write_buffer_4(dest + j * stride, out[j], bd); diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct4x4_add_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct4x4_add_sse2.c index 2e54d24736e6..b9c8884f992e 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct4x4_add_sse2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct4x4_add_sse2.c @@ -112,8 +112,8 @@ void vpx_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint16_t *dest, min_input = _mm_min_epi16(min_input, _mm_srli_si128(min_input, 4)); max_input = _mm_max_epi16(max_input, _mm_srli_si128(max_input, 2)); min_input = _mm_min_epi16(min_input, _mm_srli_si128(min_input, 2)); - max = _mm_extract_epi16(max_input, 0); - min = _mm_extract_epi16(min_input, 0); + max = (int16_t)_mm_extract_epi16(max_input, 0); + min = (int16_t)_mm_extract_epi16(min_input, 0); } if (bd == 8 || (max < 4096 && min >= -4096)) { diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct4x4_add_sse4.c b/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct4x4_add_sse4.c index 38e64f3bc946..fe74d272add4 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct4x4_add_sse4.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct4x4_add_sse4.c @@ -16,28 +16,6 @@ #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" -static INLINE void highbd_idct4(__m128i *const io) { - __m128i temp[2], step[4]; - - transpose_32bit_4x4(io, io); - - // stage 1 - temp[0] = _mm_add_epi32(io[0], io[2]); // input[0] + input[2] - extend_64bit(temp[0], temp); - step[0] = multiplication_round_shift_sse4_1(temp, cospi_16_64); - temp[0] = _mm_sub_epi32(io[0], io[2]); // input[0] - input[2] - extend_64bit(temp[0], temp); - step[1] = multiplication_round_shift_sse4_1(temp, cospi_16_64); - highbd_butterfly_sse4_1(io[1], io[3], cospi_24_64, cospi_8_64, &step[2], - &step[3]); - - // stage 2 - io[0] = _mm_add_epi32(step[0], step[3]); // step[0] + step[3] - io[1] = _mm_add_epi32(step[1], step[2]); // step[1] + step[2] - io[2] = _mm_sub_epi32(step[1], step[2]); // step[1] - step[2] - io[3] = _mm_sub_epi32(step[0], step[3]); // step[0] - step[3] -} - void vpx_highbd_idct4x4_16_add_sse4_1(const tran_low_t *input, uint16_t *dest, int stride, int bd) { __m128i io[4]; @@ -59,8 +37,8 @@ void vpx_highbd_idct4x4_16_add_sse4_1(const tran_low_t *input, uint16_t *dest, io[0] = _mm_srai_epi16(io_short[0], 4); io[1] = _mm_srai_epi16(io_short[1], 4); } else { - highbd_idct4(io); - highbd_idct4(io); + highbd_idct4_sse4_1(io); + highbd_idct4_sse4_1(io); io[0] = wraplow_16bit_shift4(io[0], io[1], _mm_set1_epi32(8)); io[1] = wraplow_16bit_shift4(io[2], io[3], _mm_set1_epi32(8)); } diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse2.c index 909a6b794850..bb7a510e15bf 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse2.c @@ -124,8 +124,8 @@ void vpx_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint16_t *dest, io_short[6] = _mm_packs_epi32(io[10], io[14]); io_short[7] = _mm_packs_epi32(io[11], io[15]); - idct8_sse2(io_short); - idct8_sse2(io_short); + vpx_idct8_sse2(io_short); + vpx_idct8_sse2(io_short); round_shift_8x8(io_short, io); } else { __m128i temp[4]; diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse4.c b/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse4.c index ae391b2c02c9..8b2e3d2415eb 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse4.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse4.c @@ -17,7 +17,7 @@ #include "vpx_dsp/x86/inv_txfm_ssse3.h" #include "vpx_dsp/x86/transpose_sse2.h" -static void highbd_idct8x8_half1d(__m128i *const io) { +void vpx_highbd_idct8x8_half1d_sse4_1(__m128i *const io) { __m128i step1[8], step2[8]; transpose_32bit_4x4x2(io, io); @@ -126,13 +126,13 @@ void vpx_highbd_idct8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest, io_short[6] = _mm_packs_epi32(io[10], io[14]); io_short[7] = _mm_packs_epi32(io[11], io[15]); - idct8_sse2(io_short); - idct8_sse2(io_short); + vpx_idct8_sse2(io_short); + vpx_idct8_sse2(io_short); round_shift_8x8(io_short, io); } else { __m128i temp[4]; - highbd_idct8x8_half1d(io); + vpx_highbd_idct8x8_half1d_sse4_1(io); io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0)); io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4)); @@ -142,7 +142,7 @@ void vpx_highbd_idct8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest, io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4)); io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0)); io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4)); - highbd_idct8x8_half1d(&io[8]); + vpx_highbd_idct8x8_half1d_sse4_1(&io[8]); temp[0] = io[4]; temp[1] = io[5]; @@ -152,13 +152,13 @@ void vpx_highbd_idct8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest, io[5] = io[9]; io[6] = io[10]; io[7] = io[11]; - highbd_idct8x8_half1d(io); + vpx_highbd_idct8x8_half1d_sse4_1(io); io[8] = temp[0]; io[9] = temp[1]; io[10] = temp[2]; io[11] = temp[3]; - highbd_idct8x8_half1d(&io[8]); + vpx_highbd_idct8x8_half1d_sse4_1(&io[8]); highbd_idct8x8_final_round(io); } diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_intrapred_intrin_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/highbd_intrapred_intrin_sse2.c index 2051381aa880..43634aea3af0 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_intrapred_intrin_sse2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_intrapred_intrin_sse2.c @@ -460,7 +460,8 @@ void vpx_highbd_d153_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride, const int J = left[1]; const int K = left[2]; const int L = left[3]; - const __m128i XXXXXABC = _mm_loadu_si128((const __m128i *)(above - 5)); + const __m128i XXXXXABC = _mm_castps_si128( + _mm_loadh_pi(_mm_setzero_ps(), (const __m64 *)(above - 1))); const __m128i LXXXXABC = _mm_insert_epi16(XXXXXABC, L, 0); const __m128i LKXXXABC = _mm_insert_epi16(LXXXXABC, K, 1); const __m128i LKJXXABC = _mm_insert_epi16(LKXXXABC, J, 2); diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_intrapred_intrin_ssse3.c b/media/libvpx/libvpx/vpx_dsp/x86/highbd_intrapred_intrin_ssse3.c index b9dcef205bf8..d673fac493d3 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_intrapred_intrin_ssse3.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_intrapred_intrin_ssse3.c @@ -170,9 +170,9 @@ void vpx_highbd_d45_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t stride, } } -DECLARE_ALIGNED(16, static const uint8_t, rotate_right_epu16[16]) = { - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1 -}; +DECLARE_ALIGNED(16, static const uint8_t, + rotate_right_epu16[16]) = { 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 0, 1 }; static INLINE __m128i rotr_epu16(__m128i *a, const __m128i *rotrw) { *a = _mm_shuffle_epi8(*a, *rotrw); diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_intrapred_sse2.asm b/media/libvpx/libvpx/vpx_dsp/x86/highbd_intrapred_sse2.asm index c61b62104f83..caf506ac07e8 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_intrapred_sse2.asm +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_intrapred_sse2.asm @@ -256,7 +256,7 @@ cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above REP_RET INIT_XMM sse2 -cglobal highbd_tm_predictor_4x4, 5, 5, 6, dst, stride, above, left, bps +cglobal highbd_tm_predictor_4x4, 5, 5, 6, dst, stride, above, left, bd movd m1, [aboveq-2] movq m0, [aboveq] pshuflw m1, m1, 0x0 @@ -264,7 +264,7 @@ cglobal highbd_tm_predictor_4x4, 5, 5, 6, dst, stride, above, left, bps movlhps m1, m1 ; tl tl tl tl tl tl tl tl ; Get the values to compute the maximum value at this bit depth pcmpeqw m3, m3 - movd m4, bpsd + movd m4, bdd psubw m0, m1 ; t1-tl t2-tl t3-tl t4-tl psllw m3, m4 pcmpeqw m2, m2 @@ -295,7 +295,7 @@ cglobal highbd_tm_predictor_4x4, 5, 5, 6, dst, stride, above, left, bps RET INIT_XMM sse2 -cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one +cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bd, one movd m1, [aboveq-2] mova m0, [aboveq] pshuflw m1, m1, 0x0 @@ -304,7 +304,7 @@ cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one pxor m3, m3 pxor m4, m4 pinsrw m3, oned, 0 - pinsrw m4, bpsd, 0 + pinsrw m4, bdd, 0 pshuflw m3, m3, 0x0 DEFINE_ARGS dst, stride, line, left punpcklqdq m3, m3 @@ -339,14 +339,14 @@ cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one REP_RET INIT_XMM sse2 -cglobal highbd_tm_predictor_16x16, 5, 5, 8, dst, stride, above, left, bps +cglobal highbd_tm_predictor_16x16, 5, 5, 8, dst, stride, above, left, bd movd m2, [aboveq-2] mova m0, [aboveq] mova m1, [aboveq+16] pshuflw m2, m2, 0x0 ; Get the values to compute the maximum value at this bit depth pcmpeqw m3, m3 - movd m4, bpsd + movd m4, bdd punpcklqdq m2, m2 psllw m3, m4 pcmpeqw m5, m5 @@ -386,7 +386,7 @@ cglobal highbd_tm_predictor_16x16, 5, 5, 8, dst, stride, above, left, bps REP_RET INIT_XMM sse2 -cglobal highbd_tm_predictor_32x32, 5, 5, 8, dst, stride, above, left, bps +cglobal highbd_tm_predictor_32x32, 5, 5, 8, dst, stride, above, left, bd movd m0, [aboveq-2] mova m1, [aboveq] mova m2, [aboveq+16] @@ -395,7 +395,7 @@ cglobal highbd_tm_predictor_32x32, 5, 5, 8, dst, stride, above, left, bps pshuflw m0, m0, 0x0 ; Get the values to compute the maximum value at this bit depth pcmpeqw m5, m5 - movd m6, bpsd + movd m6, bdd psllw m5, m6 pcmpeqw m7, m7 pxor m6, m6 ; min possible value diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_inv_txfm_sse2.h b/media/libvpx/libvpx/vpx_dsp/x86/highbd_inv_txfm_sse2.h index e0f7495521a4..78cf9111d939 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_inv_txfm_sse2.h +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_inv_txfm_sse2.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_X86_HIGHBD_INV_TXFM_SSE2_H_ -#define VPX_DSP_X86_HIGHBD_INV_TXFM_SSE2_H_ +#ifndef VPX_VPX_DSP_X86_HIGHBD_INV_TXFM_SSE2_H_ +#define VPX_VPX_DSP_X86_HIGHBD_INV_TXFM_SSE2_H_ #include // SSE2 @@ -19,6 +19,10 @@ #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" +// Note: There is no 64-bit bit-level shifting SIMD instruction. All +// coefficients are left shifted by 2, so that dct_const_round_shift() can be +// done by right shifting 2 bytes. + static INLINE void extend_64bit(const __m128i in, __m128i *const out /*out[2]*/) { out[0] = _mm_unpacklo_epi32(in, in); // 0, 0, 1, 1 @@ -397,4 +401,4 @@ static INLINE void highbd_write_buffer_4(uint16_t *const dest, const __m128i in, recon_and_store_4(out, dest, bd); } -#endif // VPX_DSP_X86_HIGHBD_INV_TXFM_SSE2_H_ +#endif // VPX_VPX_DSP_X86_HIGHBD_INV_TXFM_SSE2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_inv_txfm_sse4.h b/media/libvpx/libvpx/vpx_dsp/x86/highbd_inv_txfm_sse4.h index 9c8eef40f706..f446bb13f37f 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_inv_txfm_sse4.h +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_inv_txfm_sse4.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_X86_HIGHBD_INV_TXFM_SSE4_H_ -#define VPX_DSP_X86_HIGHBD_INV_TXFM_SSE4_H_ +#ifndef VPX_VPX_DSP_X86_HIGHBD_INV_TXFM_SSE4_H_ +#define VPX_VPX_DSP_X86_HIGHBD_INV_TXFM_SSE4_H_ #include // SSE4.1 @@ -84,4 +84,29 @@ static INLINE void highbd_partial_butterfly_sse4_1(const __m128i in, *out1 = multiplication_round_shift_sse4_1(temp, c1); } -#endif // VPX_DSP_X86_HIGHBD_INV_TXFM_SSE4_H_ +static INLINE void highbd_idct4_sse4_1(__m128i *const io) { + __m128i temp[2], step[4]; + + transpose_32bit_4x4(io, io); + + // stage 1 + temp[0] = _mm_add_epi32(io[0], io[2]); // input[0] + input[2] + extend_64bit(temp[0], temp); + step[0] = multiplication_round_shift_sse4_1(temp, cospi_16_64); + temp[0] = _mm_sub_epi32(io[0], io[2]); // input[0] - input[2] + extend_64bit(temp[0], temp); + step[1] = multiplication_round_shift_sse4_1(temp, cospi_16_64); + highbd_butterfly_sse4_1(io[1], io[3], cospi_24_64, cospi_8_64, &step[2], + &step[3]); + + // stage 2 + io[0] = _mm_add_epi32(step[0], step[3]); // step[0] + step[3] + io[1] = _mm_add_epi32(step[1], step[2]); // step[1] + step[2] + io[2] = _mm_sub_epi32(step[1], step[2]); // step[1] - step[2] + io[3] = _mm_sub_epi32(step[0], step[3]); // step[0] - step[3] +} + +void vpx_highbd_idct8x8_half1d_sse4_1(__m128i *const io); +void vpx_highbd_idct16_4col_sse4_1(__m128i *const io /*io[16]*/); + +#endif // VPX_VPX_DSP_X86_HIGHBD_INV_TXFM_SSE4_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_loopfilter_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/highbd_loopfilter_sse2.c index ec22db9f4cda..d265fc1a9217 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_loopfilter_sse2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_loopfilter_sse2.c @@ -47,13 +47,13 @@ static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) { // TODO(debargha, peter): Break up large functions into smaller ones // in this file. -void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, int bd) { +void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int bd) { const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi16(1); - __m128i blimit, limit, thresh; + __m128i blimit_v, limit_v, thresh_v; __m128i q7, p7, q6, p6, q5, p5, q4, p4, q3, p3, q2, p2, q1, p1, q0, p0; __m128i mask, hev, flat, flat2, abs_p1p0, abs_q1q0; __m128i ps1, qs1, ps0, qs0; @@ -70,35 +70,35 @@ void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, __m128i eight, four; if (bd == 8) { - blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero); - limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero); - thresh = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh), zero); + blimit_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero); + limit_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero); + thresh_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero); } else if (bd == 10) { - blimit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero), 2); - limit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero), 2); - thresh = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh), zero), 2); + blimit_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero), 2); + limit_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero), 2); + thresh_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero), 2); } else { // bd == 12 - blimit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero), 4); - limit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero), 4); - thresh = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh), zero), 4); + blimit_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero), 4); + limit_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero), 4); + thresh_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero), 4); } - q4 = _mm_load_si128((__m128i *)(s + 4 * p)); - p4 = _mm_load_si128((__m128i *)(s - 5 * p)); - q3 = _mm_load_si128((__m128i *)(s + 3 * p)); - p3 = _mm_load_si128((__m128i *)(s - 4 * p)); - q2 = _mm_load_si128((__m128i *)(s + 2 * p)); - p2 = _mm_load_si128((__m128i *)(s - 3 * p)); - q1 = _mm_load_si128((__m128i *)(s + 1 * p)); - p1 = _mm_load_si128((__m128i *)(s - 2 * p)); - q0 = _mm_load_si128((__m128i *)(s + 0 * p)); - p0 = _mm_load_si128((__m128i *)(s - 1 * p)); + q4 = _mm_load_si128((__m128i *)(s + 4 * pitch)); + p4 = _mm_load_si128((__m128i *)(s - 5 * pitch)); + q3 = _mm_load_si128((__m128i *)(s + 3 * pitch)); + p3 = _mm_load_si128((__m128i *)(s - 4 * pitch)); + q2 = _mm_load_si128((__m128i *)(s + 2 * pitch)); + p2 = _mm_load_si128((__m128i *)(s - 3 * pitch)); + q1 = _mm_load_si128((__m128i *)(s + 1 * pitch)); + p1 = _mm_load_si128((__m128i *)(s - 2 * pitch)); + q0 = _mm_load_si128((__m128i *)(s + 0 * pitch)); + p0 = _mm_load_si128((__m128i *)(s - 1 * pitch)); // highbd_filter_mask abs_p1p0 = _mm_or_si128(_mm_subs_epu16(p1, p0), _mm_subs_epu16(p0, p1)); @@ -111,14 +111,14 @@ void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, // highbd_hev_mask (in C code this is actually called from highbd_filter4) flat = _mm_max_epi16(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu16(flat, thresh); + hev = _mm_subs_epu16(flat, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi16(hev, zero), ffff); abs_p0q0 = _mm_adds_epu16(abs_p0q0, abs_p0q0); // abs(p0 - q0) * 2 abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1); // abs(p1 - q1) / 2 - mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit); + mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi16(mask, zero), ffff); - mask = _mm_and_si128(mask, _mm_adds_epu16(limit, one)); + mask = _mm_and_si128(mask, _mm_adds_epu16(limit_v, one)); work = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(p1, p0), _mm_subs_epu16(p0, p1)), _mm_or_si128(_mm_subs_epu16(q1, q0), _mm_subs_epu16(q0, q1))); @@ -132,7 +132,7 @@ void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, _mm_or_si128(_mm_subs_epu16(q3, q2), _mm_subs_epu16(q2, q3))); mask = _mm_max_epi16(work, mask); - mask = _mm_subs_epu16(mask, limit); + mask = _mm_subs_epu16(mask, limit_v); mask = _mm_cmpeq_epi16(mask, zero); // return ~mask // lp filter @@ -207,12 +207,12 @@ void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, // (because, in both vars, each block of 16 either all 1s or all 0s) flat = _mm_and_si128(flat, mask); - p5 = _mm_load_si128((__m128i *)(s - 6 * p)); - q5 = _mm_load_si128((__m128i *)(s + 5 * p)); - p6 = _mm_load_si128((__m128i *)(s - 7 * p)); - q6 = _mm_load_si128((__m128i *)(s + 6 * p)); - p7 = _mm_load_si128((__m128i *)(s - 8 * p)); - q7 = _mm_load_si128((__m128i *)(s + 7 * p)); + p5 = _mm_load_si128((__m128i *)(s - 6 * pitch)); + q5 = _mm_load_si128((__m128i *)(s + 5 * pitch)); + p6 = _mm_load_si128((__m128i *)(s - 7 * pitch)); + q6 = _mm_load_si128((__m128i *)(s + 6 * pitch)); + p7 = _mm_load_si128((__m128i *)(s - 8 * pitch)); + q7 = _mm_load_si128((__m128i *)(s + 7 * pitch)); // highbd_flat_mask5 (arguments passed in are p0, q0, p4-p7, q4-q7 // but referred to as p0-p4 & q0-q4 in fn) @@ -389,8 +389,8 @@ void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, flat2_q6 = _mm_and_si128(flat2, flat2_q6); // get values for when (flat2 && flat && mask) q6 = _mm_or_si128(q6, flat2_q6); // full list of q6 values - _mm_store_si128((__m128i *)(s - 7 * p), p6); - _mm_store_si128((__m128i *)(s + 6 * p), q6); + _mm_store_si128((__m128i *)(s - 7 * pitch), p6); + _mm_store_si128((__m128i *)(s + 6 * pitch), q6); p5 = _mm_andnot_si128(flat2, p5); // p5 remains unchanged if !(flat2 && flat && mask) @@ -404,8 +404,8 @@ void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, // get values for when (flat2 && flat && mask) q5 = _mm_or_si128(q5, flat2_q5); // full list of q5 values - _mm_store_si128((__m128i *)(s - 6 * p), p5); - _mm_store_si128((__m128i *)(s + 5 * p), q5); + _mm_store_si128((__m128i *)(s - 6 * pitch), p5); + _mm_store_si128((__m128i *)(s + 5 * pitch), q5); p4 = _mm_andnot_si128(flat2, p4); // p4 remains unchanged if !(flat2 && flat && mask) @@ -417,8 +417,8 @@ void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, flat2_q4 = _mm_and_si128(flat2, flat2_q4); // get values for when (flat2 && flat && mask) q4 = _mm_or_si128(q4, flat2_q4); // full list of q4 values - _mm_store_si128((__m128i *)(s - 5 * p), p4); - _mm_store_si128((__m128i *)(s + 4 * p), q4); + _mm_store_si128((__m128i *)(s - 5 * pitch), p4); + _mm_store_si128((__m128i *)(s + 4 * pitch), q4); p3 = _mm_andnot_si128(flat2, p3); // p3 takes value from highbd_filter8 if !(flat2 && flat && mask) @@ -430,8 +430,8 @@ void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, flat2_q3 = _mm_and_si128(flat2, flat2_q3); // get values for when (flat2 && flat && mask) q3 = _mm_or_si128(q3, flat2_q3); // full list of q3 values - _mm_store_si128((__m128i *)(s - 4 * p), p3); - _mm_store_si128((__m128i *)(s + 3 * p), q3); + _mm_store_si128((__m128i *)(s - 4 * pitch), p3); + _mm_store_si128((__m128i *)(s + 3 * pitch), q3); p2 = _mm_andnot_si128(flat2, p2); // p2 takes value from highbd_filter8 if !(flat2 && flat && mask) @@ -444,8 +444,8 @@ void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, flat2_q2 = _mm_and_si128(flat2, flat2_q2); // get values for when (flat2 && flat && mask) q2 = _mm_or_si128(q2, flat2_q2); // full list of q2 values - _mm_store_si128((__m128i *)(s - 3 * p), p2); - _mm_store_si128((__m128i *)(s + 2 * p), q2); + _mm_store_si128((__m128i *)(s - 3 * pitch), p2); + _mm_store_si128((__m128i *)(s + 2 * pitch), q2); p1 = _mm_andnot_si128(flat2, p1); // p1 takes value from highbd_filter8 if !(flat2 && flat && mask) @@ -457,8 +457,8 @@ void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, flat2_q1 = _mm_and_si128(flat2, flat2_q1); // get values for when (flat2 && flat && mask) q1 = _mm_or_si128(q1, flat2_q1); // full list of q1 values - _mm_store_si128((__m128i *)(s - 2 * p), p1); - _mm_store_si128((__m128i *)(s + 1 * p), q1); + _mm_store_si128((__m128i *)(s - 2 * pitch), p1); + _mm_store_si128((__m128i *)(s + 1 * pitch), q1); p0 = _mm_andnot_si128(flat2, p0); // p0 takes value from highbd_filter8 if !(flat2 && flat && mask) @@ -470,22 +470,22 @@ void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p, flat2_q0 = _mm_and_si128(flat2, flat2_q0); // get values for when (flat2 && flat && mask) q0 = _mm_or_si128(q0, flat2_q0); // full list of q0 values - _mm_store_si128((__m128i *)(s - 1 * p), p0); - _mm_store_si128((__m128i *)(s - 0 * p), q0); + _mm_store_si128((__m128i *)(s - 1 * pitch), p0); + _mm_store_si128((__m128i *)(s - 0 * pitch), q0); } -void vpx_highbd_lpf_horizontal_16_dual_sse2(uint16_t *s, int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, int bd) { - vpx_highbd_lpf_horizontal_16_sse2(s, p, _blimit, _limit, _thresh, bd); - vpx_highbd_lpf_horizontal_16_sse2(s + 8, p, _blimit, _limit, _thresh, bd); +void vpx_highbd_lpf_horizontal_16_dual_sse2(uint16_t *s, int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int bd) { + vpx_highbd_lpf_horizontal_16_sse2(s, pitch, blimit, limit, thresh, bd); + vpx_highbd_lpf_horizontal_16_sse2(s + 8, pitch, blimit, limit, thresh, bd); } -void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, int bd) { +void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int bd) { DECLARE_ALIGNED(16, uint16_t, flat_op2[16]); DECLARE_ALIGNED(16, uint16_t, flat_op1[16]); DECLARE_ALIGNED(16, uint16_t, flat_op0[16]); @@ -493,16 +493,16 @@ void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, DECLARE_ALIGNED(16, uint16_t, flat_oq1[16]); DECLARE_ALIGNED(16, uint16_t, flat_oq0[16]); const __m128i zero = _mm_set1_epi16(0); - __m128i blimit, limit, thresh; + __m128i blimit_v, limit_v, thresh_v; __m128i mask, hev, flat; - __m128i p3 = _mm_load_si128((__m128i *)(s - 4 * p)); - __m128i q3 = _mm_load_si128((__m128i *)(s + 3 * p)); - __m128i p2 = _mm_load_si128((__m128i *)(s - 3 * p)); - __m128i q2 = _mm_load_si128((__m128i *)(s + 2 * p)); - __m128i p1 = _mm_load_si128((__m128i *)(s - 2 * p)); - __m128i q1 = _mm_load_si128((__m128i *)(s + 1 * p)); - __m128i p0 = _mm_load_si128((__m128i *)(s - 1 * p)); - __m128i q0 = _mm_load_si128((__m128i *)(s + 0 * p)); + __m128i p3 = _mm_load_si128((__m128i *)(s - 4 * pitch)); + __m128i q3 = _mm_load_si128((__m128i *)(s + 3 * pitch)); + __m128i p2 = _mm_load_si128((__m128i *)(s - 3 * pitch)); + __m128i q2 = _mm_load_si128((__m128i *)(s + 2 * pitch)); + __m128i p1 = _mm_load_si128((__m128i *)(s - 2 * pitch)); + __m128i q1 = _mm_load_si128((__m128i *)(s + 1 * pitch)); + __m128i p0 = _mm_load_si128((__m128i *)(s - 1 * pitch)); + __m128i q0 = _mm_load_si128((__m128i *)(s + 0 * pitch)); const __m128i one = _mm_set1_epi16(1); const __m128i ffff = _mm_cmpeq_epi16(one, one); __m128i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work; @@ -519,25 +519,25 @@ void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, __m128i filter1, filter2; if (bd == 8) { - blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero); - limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero); - thresh = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh), zero); + blimit_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero); + limit_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero); + thresh_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero); t80 = _mm_set1_epi16(0x80); } else if (bd == 10) { - blimit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero), 2); - limit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero), 2); - thresh = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh), zero), 2); + blimit_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero), 2); + limit_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero), 2); + thresh_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero), 2); t80 = _mm_set1_epi16(0x200); } else { // bd == 12 - blimit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero), 4); - limit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero), 4); - thresh = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh), zero), 4); + blimit_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero), 4); + limit_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero), 4); + thresh_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero), 4); t80 = _mm_set1_epi16(0x800); } @@ -553,16 +553,16 @@ void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, abs_p0q0 = _mm_or_si128(_mm_subs_epu16(p0, q0), _mm_subs_epu16(q0, p0)); abs_p1q1 = _mm_or_si128(_mm_subs_epu16(p1, q1), _mm_subs_epu16(q1, p1)); flat = _mm_max_epi16(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu16(flat, thresh); + hev = _mm_subs_epu16(flat, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi16(hev, zero), ffff); abs_p0q0 = _mm_adds_epu16(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1); - mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit); + mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi16(mask, zero), ffff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; // So taking maximums continues to work: - mask = _mm_and_si128(mask, _mm_adds_epu16(limit, one)); + mask = _mm_and_si128(mask, _mm_adds_epu16(limit_v, one)); mask = _mm_max_epi16(abs_p1p0, mask); // mask |= (abs(p1 - p0) > limit) * -1; mask = _mm_max_epi16(abs_q1q0, mask); @@ -576,7 +576,7 @@ void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, _mm_or_si128(_mm_subs_epu16(p3, p2), _mm_subs_epu16(p2, p3)), _mm_or_si128(_mm_subs_epu16(q3, q2), _mm_subs_epu16(q2, q3))); mask = _mm_max_epi16(work, mask); - mask = _mm_subs_epu16(mask, limit); + mask = _mm_subs_epu16(mask, limit_v); mask = _mm_cmpeq_epi16(mask, zero); // flat_mask4 @@ -674,7 +674,7 @@ void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, q1 = _mm_and_si128(flat, q1); q1 = _mm_or_si128(work_a, q1); - work_a = _mm_loadu_si128((__m128i *)(s + 2 * p)); + work_a = _mm_loadu_si128((__m128i *)(s + 2 * pitch)); q2 = _mm_load_si128((__m128i *)flat_oq2); work_a = _mm_andnot_si128(flat, work_a); q2 = _mm_and_si128(flat, q2); @@ -694,43 +694,43 @@ void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, p1 = _mm_and_si128(flat, p1); p1 = _mm_or_si128(work_a, p1); - work_a = _mm_loadu_si128((__m128i *)(s - 3 * p)); + work_a = _mm_loadu_si128((__m128i *)(s - 3 * pitch)); p2 = _mm_load_si128((__m128i *)flat_op2); work_a = _mm_andnot_si128(flat, work_a); p2 = _mm_and_si128(flat, p2); p2 = _mm_or_si128(work_a, p2); - _mm_store_si128((__m128i *)(s - 3 * p), p2); - _mm_store_si128((__m128i *)(s - 2 * p), p1); - _mm_store_si128((__m128i *)(s - 1 * p), p0); - _mm_store_si128((__m128i *)(s + 0 * p), q0); - _mm_store_si128((__m128i *)(s + 1 * p), q1); - _mm_store_si128((__m128i *)(s + 2 * p), q2); + _mm_store_si128((__m128i *)(s - 3 * pitch), p2); + _mm_store_si128((__m128i *)(s - 2 * pitch), p1); + _mm_store_si128((__m128i *)(s - 1 * pitch), p0); + _mm_store_si128((__m128i *)(s + 0 * pitch), q0); + _mm_store_si128((__m128i *)(s + 1 * pitch), q1); + _mm_store_si128((__m128i *)(s + 2 * pitch), q2); } void vpx_highbd_lpf_horizontal_8_dual_sse2( - uint16_t *s, int p, const uint8_t *_blimit0, const uint8_t *_limit0, - const uint8_t *_thresh0, const uint8_t *_blimit1, const uint8_t *_limit1, - const uint8_t *_thresh1, int bd) { - vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, bd); - vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd); + uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, + const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, + const uint8_t *thresh1, int bd) { + vpx_highbd_lpf_horizontal_8_sse2(s, pitch, blimit0, limit0, thresh0, bd); + vpx_highbd_lpf_horizontal_8_sse2(s + 8, pitch, blimit1, limit1, thresh1, bd); } -void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, - const uint8_t *_blimit, - const uint8_t *_limit, - const uint8_t *_thresh, int bd) { +void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, int bd) { const __m128i zero = _mm_set1_epi16(0); - __m128i blimit, limit, thresh; + __m128i blimit_v, limit_v, thresh_v; __m128i mask, hev, flat; - __m128i p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); - __m128i p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); - __m128i p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); - __m128i p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); - __m128i q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); - __m128i q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); - __m128i q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); - __m128i q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); + __m128i p3 = _mm_loadu_si128((__m128i *)(s - 4 * pitch)); + __m128i p2 = _mm_loadu_si128((__m128i *)(s - 3 * pitch)); + __m128i p1 = _mm_loadu_si128((__m128i *)(s - 2 * pitch)); + __m128i p0 = _mm_loadu_si128((__m128i *)(s - 1 * pitch)); + __m128i q0 = _mm_loadu_si128((__m128i *)(s - 0 * pitch)); + __m128i q1 = _mm_loadu_si128((__m128i *)(s + 1 * pitch)); + __m128i q2 = _mm_loadu_si128((__m128i *)(s + 2 * pitch)); + __m128i q3 = _mm_loadu_si128((__m128i *)(s + 3 * pitch)); const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu16(p1, p0), _mm_subs_epu16(p0, p1)); const __m128i abs_q1q0 = @@ -760,57 +760,57 @@ void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, __m128i filter1, filter2; if (bd == 8) { - blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero); - limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero); - thresh = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh), zero); + blimit_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero); + limit_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero); + thresh_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero); t80 = _mm_set1_epi16(0x80); - tff80 = _mm_set1_epi16(0xff80); - tffe0 = _mm_set1_epi16(0xffe0); + tff80 = _mm_set1_epi16((int16_t)0xff80); + tffe0 = _mm_set1_epi16((int16_t)0xffe0); t1f = _mm_srli_epi16(_mm_set1_epi16(0x1fff), 8); t7f = _mm_srli_epi16(_mm_set1_epi16(0x7fff), 8); } else if (bd == 10) { - blimit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero), 2); - limit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero), 2); - thresh = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh), zero), 2); + blimit_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero), 2); + limit_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero), 2); + thresh_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero), 2); t80 = _mm_slli_epi16(_mm_set1_epi16(0x80), 2); - tff80 = _mm_slli_epi16(_mm_set1_epi16(0xff80), 2); - tffe0 = _mm_slli_epi16(_mm_set1_epi16(0xffe0), 2); + tff80 = _mm_slli_epi16(_mm_set1_epi16((int16_t)0xff80), 2); + tffe0 = _mm_slli_epi16(_mm_set1_epi16((int16_t)0xffe0), 2); t1f = _mm_srli_epi16(_mm_set1_epi16(0x1fff), 6); t7f = _mm_srli_epi16(_mm_set1_epi16(0x7fff), 6); } else { // bd == 12 - blimit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero), 4); - limit = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero), 4); - thresh = _mm_slli_epi16( - _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh), zero), 4); + blimit_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero), 4); + limit_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero), 4); + thresh_v = _mm_slli_epi16( + _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero), 4); t80 = _mm_slli_epi16(_mm_set1_epi16(0x80), 4); - tff80 = _mm_slli_epi16(_mm_set1_epi16(0xff80), 4); - tffe0 = _mm_slli_epi16(_mm_set1_epi16(0xffe0), 4); + tff80 = _mm_slli_epi16(_mm_set1_epi16((int16_t)0xff80), 4); + tffe0 = _mm_slli_epi16(_mm_set1_epi16((int16_t)0xffe0), 4); t1f = _mm_srli_epi16(_mm_set1_epi16(0x1fff), 4); t7f = _mm_srli_epi16(_mm_set1_epi16(0x7fff), 4); } - ps1 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s - 2 * p)), t80); - ps0 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s - 1 * p)), t80); - qs0 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s + 0 * p)), t80); - qs1 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s + 1 * p)), t80); + ps1 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s - 2 * pitch)), t80); + ps0 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s - 1 * pitch)), t80); + qs0 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s + 0 * pitch)), t80); + qs1 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s + 1 * pitch)), t80); // filter_mask and hev_mask flat = _mm_max_epi16(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu16(flat, thresh); + hev = _mm_subs_epu16(flat, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi16(hev, zero), ffff); abs_p0q0 = _mm_adds_epu16(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1); - mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit); + mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi16(mask, zero), ffff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; // So taking maximums continues to work: - mask = _mm_and_si128(mask, _mm_adds_epu16(limit, one)); + mask = _mm_and_si128(mask, _mm_adds_epu16(limit_v, one)); mask = _mm_max_epi16(flat, mask); // mask |= (abs(p1 - p0) > limit) * -1; // mask |= (abs(q1 - q0) > limit) * -1; @@ -822,7 +822,7 @@ void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, _mm_or_si128(_mm_subs_epu16(q2, q1), _mm_subs_epu16(q1, q2)), _mm_or_si128(_mm_subs_epu16(q3, q2), _mm_subs_epu16(q2, q3))); mask = _mm_max_epi16(work, mask); - mask = _mm_subs_epu16(mask, limit); + mask = _mm_subs_epu16(mask, limit_v); mask = _mm_cmpeq_epi16(mask, zero); // filter4 @@ -872,18 +872,18 @@ void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, p1 = _mm_adds_epi16(signed_char_clamp_bd_sse2(_mm_adds_epi16(ps1, filt), bd), t80); - _mm_storeu_si128((__m128i *)(s - 2 * p), p1); - _mm_storeu_si128((__m128i *)(s - 1 * p), p0); - _mm_storeu_si128((__m128i *)(s + 0 * p), q0); - _mm_storeu_si128((__m128i *)(s + 1 * p), q1); + _mm_storeu_si128((__m128i *)(s - 2 * pitch), p1); + _mm_storeu_si128((__m128i *)(s - 1 * pitch), p0); + _mm_storeu_si128((__m128i *)(s + 0 * pitch), q0); + _mm_storeu_si128((__m128i *)(s + 1 * pitch), q1); } void vpx_highbd_lpf_horizontal_4_dual_sse2( - uint16_t *s, int p, const uint8_t *_blimit0, const uint8_t *_limit0, - const uint8_t *_thresh0, const uint8_t *_blimit1, const uint8_t *_limit1, - const uint8_t *_thresh1, int bd) { - vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, bd); - vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd); + uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, + const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, + const uint8_t *thresh1, int bd) { + vpx_highbd_lpf_horizontal_4_sse2(s, pitch, blimit0, limit0, thresh0, bd); + vpx_highbd_lpf_horizontal_4_sse2(s + 8, pitch, blimit1, limit1, thresh1, bd); } static INLINE void highbd_transpose(uint16_t *src[], int in_p, uint16_t *dst[], @@ -998,9 +998,9 @@ static INLINE void highbd_transpose8x16(uint16_t *in0, uint16_t *in1, int in_p, highbd_transpose(src1, in_p, dest1, out_p, 1); } -void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int bd) { +void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int pitch, + const uint8_t *blimit, const uint8_t *limit, + const uint8_t *thresh, int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]); uint16_t *src[1]; uint16_t *dst[1]; @@ -1009,7 +1009,7 @@ void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, const uint8_t *blimit, src[0] = s - 4; dst[0] = t_dst; - highbd_transpose(src, p, dst, 8, 1); + highbd_transpose(src, pitch, dst, 8, 1); // Loop filtering vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd); @@ -1018,11 +1018,11 @@ void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, const uint8_t *blimit, dst[0] = s - 4; // Transpose back - highbd_transpose(src, 8, dst, p, 1); + highbd_transpose(src, 8, dst, pitch, 1); } void vpx_highbd_lpf_vertical_4_dual_sse2( - uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, + uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[16 * 8]); @@ -1030,7 +1030,7 @@ void vpx_highbd_lpf_vertical_4_dual_sse2( uint16_t *dst[2]; // Transpose 8x16 - highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); + highbd_transpose8x16(s - 4, s - 4 + pitch * 8, pitch, t_dst, 16); // Loop filtering vpx_highbd_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, @@ -1038,15 +1038,15 @@ void vpx_highbd_lpf_vertical_4_dual_sse2( src[0] = t_dst; src[1] = t_dst + 8; dst[0] = s - 4; - dst[1] = s - 4 + p * 8; + dst[1] = s - 4 + pitch * 8; // Transpose back - highbd_transpose(src, 16, dst, p, 2); + highbd_transpose(src, 16, dst, pitch, 2); } -void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int bd) { +void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int pitch, + const uint8_t *blimit, const uint8_t *limit, + const uint8_t *thresh, int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]); uint16_t *src[1]; uint16_t *dst[1]; @@ -1055,7 +1055,7 @@ void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p, const uint8_t *blimit, src[0] = s - 4; dst[0] = t_dst; - highbd_transpose(src, p, dst, 8, 1); + highbd_transpose(src, pitch, dst, 8, 1); // Loop filtering vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd); @@ -1064,11 +1064,11 @@ void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p, const uint8_t *blimit, dst[0] = s - 4; // Transpose back - highbd_transpose(src, 8, dst, p, 1); + highbd_transpose(src, 8, dst, pitch, 1); } void vpx_highbd_lpf_vertical_8_dual_sse2( - uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, + uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[16 * 8]); @@ -1076,7 +1076,7 @@ void vpx_highbd_lpf_vertical_8_dual_sse2( uint16_t *dst[2]; // Transpose 8x16 - highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); + highbd_transpose8x16(s - 4, s - 4 + pitch * 8, pitch, t_dst, 16); // Loop filtering vpx_highbd_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, @@ -1085,13 +1085,14 @@ void vpx_highbd_lpf_vertical_8_dual_sse2( src[1] = t_dst + 8; dst[0] = s - 4; - dst[1] = s - 4 + p * 8; + dst[1] = s - 4 + pitch * 8; // Transpose back - highbd_transpose(src, 16, dst, p, 2); + highbd_transpose(src, 16, dst, pitch, 2); } -void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit, +void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int pitch, + const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 16]); @@ -1104,7 +1105,7 @@ void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit, dst[1] = t_dst + 8 * 8; // Transpose 16x8 - highbd_transpose(src, p, dst, 8, 2); + highbd_transpose(src, pitch, dst, 8, 2); // Loop filtering vpx_highbd_lpf_horizontal_16_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh, @@ -1115,24 +1116,25 @@ void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit, dst[1] = s; // Transpose back - highbd_transpose(src, 8, dst, p, 2); + highbd_transpose(src, 8, dst, pitch, 2); } -void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int p, +void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[256]); // Transpose 16x16 - highbd_transpose8x16(s - 8, s - 8 + 8 * p, p, t_dst, 16); - highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); + highbd_transpose8x16(s - 8, s - 8 + 8 * pitch, pitch, t_dst, 16); + highbd_transpose8x16(s, s + 8 * pitch, pitch, t_dst + 8 * 16, 16); // Loop filtering vpx_highbd_lpf_horizontal_16_dual_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh, bd); // Transpose back - highbd_transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); - highbd_transpose8x16(t_dst + 8, t_dst + 8 + 8 * 16, 16, s - 8 + 8 * p, p); + highbd_transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, pitch); + highbd_transpose8x16(t_dst + 8, t_dst + 8 + 8 * 16, 16, s - 8 + 8 * pitch, + pitch); } diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_quantize_intrin_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/highbd_quantize_intrin_sse2.c index cedf98aff4bb..7149e4fb7408 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_quantize_intrin_sse2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_quantize_intrin_sse2.c @@ -11,6 +11,7 @@ #include #include +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_sad_sse2.asm b/media/libvpx/libvpx/vpx_dsp/x86/highbd_sad_sse2.asm index bc4b28db24fb..6a1a6f3d624f 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_sad_sse2.asm +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_sad_sse2.asm @@ -25,11 +25,11 @@ cglobal highbd_sad%1x%2, 4, %3, 7, src, src_stride, ref, ref_stride, \ cglobal highbd_sad%1x%2_avg, 5, 1 + %3, 7, src, src_stride, ref, ref_stride, \ second_pred, n_rows %else ; %3 == 7 -cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, 7, src, src_stride, \ +cglobal highbd_sad%1x%2_avg, 5, VPX_ARCH_X86_64 + %3, 7, src, src_stride, \ ref, ref_stride, \ second_pred, \ src_stride3, ref_stride3 -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 %define n_rowsd r7d %else ; x86-32 %define n_rowsd dword r0m diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/media/libvpx/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm index d9a6932e0b87..5a3a2818defd 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm @@ -32,12 +32,12 @@ SECTION .text ; int vpx_sub_pixel_varianceNxh(const uint8_t *src, ptrdiff_t src_stride, ; int x_offset, int y_offset, -; const uint8_t *dst, ptrdiff_t dst_stride, +; const uint8_t *ref, ptrdiff_t ref_stride, ; int height, unsigned int *sse); ; ; This function returns the SE and stores SSE in the given pointer. -%macro SUM_SSE 6 ; src1, dst1, src2, dst2, sum, sse +%macro SUM_SSE 6 ; src1, ref1, src2, ref2, sum, sse psubw %3, %4 psubw %1, %2 mova %4, %3 ; make copies to manipulate to calc sum @@ -78,7 +78,7 @@ SECTION .text %endmacro %macro INC_SRC_BY_SRC_STRIDE 0 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if VPX_ARCH_X86=1 && CONFIG_PIC=1 add srcq, src_stridemp add srcq, src_stridemp %else @@ -91,81 +91,65 @@ SECTION .text %define filter_idx_shift 5 -%ifdef PIC ; 64bit PIC +%if VPX_ARCH_X86_64 %if %2 == 1 ; avg cglobal highbd_sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \ x_offset, y_offset, \ - dst, dst_stride, \ - sec, sec_stride, height, sse - %define sec_str sec_strideq + ref, ref_stride, \ + second_pred, second_stride, height, sse + %define second_str second_strideq %else - cglobal highbd_sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, \ - y_offset, dst, dst_stride, height, sse + cglobal highbd_sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, \ + x_offset, y_offset, \ + ref, ref_stride, height, sse %endif %define block_height heightd %define bilin_filter sseq %else - %if ARCH_X86=1 && CONFIG_PIC=1 + %if CONFIG_PIC=1 %if %2 == 1 ; avg cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \ - x_offset, y_offset, \ - dst, dst_stride, \ - sec, sec_stride, \ - height, sse, g_bilin_filter, g_pw_8 + x_offset, y_offset, \ + ref, ref_stride, \ + second_pred, second_stride, height, sse %define block_height dword heightm - %define sec_str sec_stridemp - - ; Store bilin_filter and pw_8 location in stack - %if GET_GOT_DEFINED == 1 - GET_GOT eax - add esp, 4 ; restore esp - %endif - - lea ecx, [GLOBAL(bilin_filter_m)] - mov g_bilin_filterm, ecx - - lea ecx, [GLOBAL(pw_8)] - mov g_pw_8m, ecx - - LOAD_IF_USED 0, 1 ; load eax, ecx back + %define second_str second_stridemp %else cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \ - x_offset, y_offset, dst, dst_stride, height, \ - sse, g_bilin_filter, g_pw_8 + x_offset, y_offset, \ + ref, ref_stride, height, sse %define block_height heightd - - ; Store bilin_filter and pw_8 location in stack - %if GET_GOT_DEFINED == 1 - GET_GOT eax - add esp, 4 ; restore esp - %endif - - lea ecx, [GLOBAL(bilin_filter_m)] - mov g_bilin_filterm, ecx - - lea ecx, [GLOBAL(pw_8)] - mov g_pw_8m, ecx - - LOAD_IF_USED 0, 1 ; load eax, ecx back %endif + + ; reuse argument stack space + %define g_bilin_filterm x_offsetm + %define g_pw_8m y_offsetm + + ; Store bilin_filter and pw_8 location in stack + %if GET_GOT_DEFINED == 1 + GET_GOT eax + add esp, 4 ; restore esp + %endif + + lea ecx, [GLOBAL(bilin_filter_m)] + mov g_bilin_filterm, ecx + + lea ecx, [GLOBAL(pw_8)] + mov g_pw_8m, ecx + + LOAD_IF_USED 0, 1 ; load eax, ecx back %else %if %2 == 1 ; avg - cglobal highbd_sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \ - 7 + 2 * ARCH_X86_64, 13, src, src_stride, \ - x_offset, y_offset, \ - dst, dst_stride, \ - sec, sec_stride, \ - height, sse - %if ARCH_X86_64 - %define block_height heightd - %define sec_str sec_strideq - %else + cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \ + x_offset, y_offset, \ + ref, ref_stride, \ + second_pred, second_stride, height, sse %define block_height dword heightm - %define sec_str sec_stridemp - %endif + %define second_str second_stridemp %else cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \ - x_offset, y_offset, dst, dst_stride, height, sse + x_offset, y_offset, \ + ref, ref_stride, height, sse %define block_height heightd %endif @@ -181,7 +165,7 @@ SECTION .text sar block_height, 1 %endif %if %2 == 1 ; avg - shl sec_str, 1 + shl second_str, 1 %endif ; FIXME(rbultje) replace by jumptable? @@ -196,35 +180,35 @@ SECTION .text %if %1 == 16 movu m0, [srcq] movu m2, [srcq + 16] - mova m1, [dstq] - mova m3, [dstq + 16] + mova m1, [refq] + mova m3, [refq + 16] %if %2 == 1 ; avg - pavgw m0, [secq] - pavgw m2, [secq+16] + pavgw m0, [second_predq] + pavgw m2, [second_predq+16] %endif SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq + src_strideq*2] - lea dstq, [dstq + dst_strideq*2] + lea refq, [refq + ref_strideq*2] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] movu m2, [srcq + src_strideq*2] - mova m1, [dstq] - mova m3, [dstq + dst_strideq*2] + mova m1, [refq] + mova m3, [refq + ref_strideq*2] %if %2 == 1 ; avg - pavgw m0, [secq] - add secq, sec_str - pavgw m2, [secq] + pavgw m0, [second_predq] + add second_predq, second_str + pavgw m2, [second_predq] %endif SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq + src_strideq*4] - lea dstq, [dstq + dst_strideq*4] + lea refq, [refq + ref_strideq*4] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %endif dec block_height @@ -242,40 +226,40 @@ SECTION .text movu m1, [srcq+16] movu m4, [srcq+src_strideq*2] movu m5, [srcq+src_strideq*2+16] - mova m2, [dstq] - mova m3, [dstq+16] + mova m2, [refq] + mova m3, [refq+16] pavgw m0, m4 pavgw m1, m5 %if %2 == 1 ; avg - pavgw m0, [secq] - pavgw m1, [secq+16] + pavgw m0, [second_predq] + pavgw m1, [second_predq+16] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*2] - lea dstq, [dstq + dst_strideq*2] + lea refq, [refq + ref_strideq*2] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] movu m1, [srcq+src_strideq*2] movu m5, [srcq+src_strideq*4] - mova m2, [dstq] - mova m3, [dstq+dst_strideq*2] + mova m2, [refq] + mova m3, [refq+ref_strideq*2] pavgw m0, m1 pavgw m1, m5 %if %2 == 1 ; avg - pavgw m0, [secq] - add secq, sec_str - pavgw m1, [secq] + pavgw m0, [second_predq] + add second_predq, second_str + pavgw m1, [second_predq] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*4] - lea dstq, [dstq + dst_strideq*4] + lea refq, [refq + ref_strideq*4] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %endif dec block_height @@ -284,19 +268,19 @@ SECTION .text .x_zero_y_nonhalf: ; x_offset == 0 && y_offset == bilin interpolation -%ifdef PIC - lea bilin_filter, [bilin_filter_m] +%if VPX_ARCH_X86_64 + lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if VPX_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+y_offsetq] mova m9, [bilin_filter+y_offsetq+16] - mova m10, [pw_8] + mova m10, [GLOBAL(pw_8)] %define filter_y_a m8 %define filter_y_b m9 %define filter_rnd m10 %else ; x86-32 or mmx -%if ARCH_X86=1 && CONFIG_PIC=1 +%if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; x_offset == 0, reuse x_offset reg %define tempq x_offsetq add y_offsetq, g_bilin_filterm @@ -308,7 +292,7 @@ SECTION .text add y_offsetq, bilin_filter %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] -%define filter_rnd [pw_8] +%define filter_rnd [GLOBAL(pw_8)] %endif %endif @@ -318,8 +302,8 @@ SECTION .text movu m1, [srcq + 16] movu m4, [srcq+src_strideq*2] movu m5, [srcq+src_strideq*2+16] - mova m2, [dstq] - mova m3, [dstq+16] + mova m2, [refq] + mova m3, [refq+16] ; FIXME(rbultje) instead of out=((num-x)*in1+x*in2+rnd)>>log2(num), we can ; also do out=in1+(((num-x)*(in2-in1)+rnd)>>log2(num)). Total number of ; instructions is the same (5), but it is 1 mul instead of 2, so might be @@ -336,23 +320,23 @@ SECTION .text psrlw m1, 4 psrlw m0, 4 %if %2 == 1 ; avg - pavgw m0, [secq] - pavgw m1, [secq+16] + pavgw m0, [second_predq] + pavgw m1, [second_predq+16] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*2] - lea dstq, [dstq + dst_strideq*2] + lea refq, [refq + ref_strideq*2] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] movu m1, [srcq+src_strideq*2] movu m5, [srcq+src_strideq*4] mova m4, m1 - mova m2, [dstq] - mova m3, [dstq+dst_strideq*2] + mova m2, [refq] + mova m3, [refq+ref_strideq*2] pmullw m1, filter_y_a pmullw m5, filter_y_b paddw m1, filter_rnd @@ -364,16 +348,16 @@ SECTION .text psrlw m1, 4 psrlw m0, 4 %if %2 == 1 ; avg - pavgw m0, [secq] - add secq, sec_str - pavgw m1, [secq] + pavgw m0, [second_predq] + add second_predq, second_str + pavgw m1, [second_predq] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*4] - lea dstq, [dstq + dst_strideq*4] + lea refq, [refq + ref_strideq*4] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %endif dec block_height @@ -397,41 +381,41 @@ SECTION .text movu m1, [srcq + 16] movu m4, [srcq + 2] movu m5, [srcq + 18] - mova m2, [dstq] - mova m3, [dstq + 16] + mova m2, [refq] + mova m3, [refq + 16] pavgw m0, m4 pavgw m1, m5 %if %2 == 1 ; avg - pavgw m0, [secq] - pavgw m1, [secq+16] + pavgw m0, [second_predq] + pavgw m1, [second_predq+16] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*2] - lea dstq, [dstq + dst_strideq*2] + lea refq, [refq + ref_strideq*2] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] movu m1, [srcq + src_strideq*2] movu m4, [srcq + 2] movu m5, [srcq + src_strideq*2 + 2] - mova m2, [dstq] - mova m3, [dstq + dst_strideq*2] + mova m2, [refq] + mova m3, [refq + ref_strideq*2] pavgw m0, m4 pavgw m1, m5 %if %2 == 1 ; avg - pavgw m0, [secq] - add secq, sec_str - pavgw m1, [secq] + pavgw m0, [second_predq] + add second_predq, second_str + pavgw m1, [second_predq] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*4] - lea dstq, [dstq + dst_strideq*4] + lea refq, [refq + ref_strideq*4] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %endif dec block_height @@ -460,20 +444,20 @@ SECTION .text pavgw m3, m5 pavgw m0, m2 pavgw m1, m3 - mova m4, [dstq] - mova m5, [dstq + 16] + mova m4, [refq] + mova m5, [refq + 16] %if %2 == 1 ; avg - pavgw m0, [secq] - pavgw m1, [secq+16] + pavgw m0, [second_predq] + pavgw m1, [second_predq+16] %endif SUM_SSE m0, m4, m1, m5, m6, m7 mova m0, m2 mova m1, m3 lea srcq, [srcq + src_strideq*2] - lea dstq, [dstq + dst_strideq*2] + lea refq, [refq + ref_strideq*2] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] @@ -489,20 +473,20 @@ SECTION .text pavgw m3, m5 pavgw m0, m2 pavgw m2, m3 - mova m4, [dstq] - mova m5, [dstq + dst_strideq*2] + mova m4, [refq] + mova m5, [refq + ref_strideq*2] %if %2 == 1 ; avg - pavgw m0, [secq] - add secq, sec_str - pavgw m2, [secq] + pavgw m0, [second_predq] + add second_predq, second_str + pavgw m2, [second_predq] %endif SUM_SSE m0, m4, m2, m5, m6, m7 mova m0, m3 lea srcq, [srcq + src_strideq*4] - lea dstq, [dstq + dst_strideq*4] + lea refq, [refq + ref_strideq*4] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %endif dec block_height @@ -511,19 +495,19 @@ SECTION .text .x_half_y_nonhalf: ; x_offset == 0.5 && y_offset == bilin interpolation -%ifdef PIC - lea bilin_filter, [bilin_filter_m] +%if VPX_ARCH_X86_64 + lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if VPX_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+y_offsetq] mova m9, [bilin_filter+y_offsetq+16] - mova m10, [pw_8] + mova m10, [GLOBAL(pw_8)] %define filter_y_a m8 %define filter_y_b m9 %define filter_rnd m10 %else ; x86_32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; x_offset == 0.5. We can reuse x_offset reg %define tempq x_offsetq add y_offsetq, g_bilin_filterm @@ -535,7 +519,7 @@ SECTION .text add y_offsetq, bilin_filter %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] -%define filter_rnd [pw_8] +%define filter_rnd [GLOBAL(pw_8)] %endif %endif @@ -565,21 +549,21 @@ SECTION .text paddw m0, filter_rnd psrlw m1, 4 paddw m0, m2 - mova m2, [dstq] + mova m2, [refq] psrlw m0, 4 - mova m3, [dstq+16] + mova m3, [refq+16] %if %2 == 1 ; avg - pavgw m0, [secq] - pavgw m1, [secq+16] + pavgw m0, [second_predq] + pavgw m1, [second_predq+16] %endif SUM_SSE m0, m2, m1, m3, m6, m7 mova m0, m4 mova m1, m5 lea srcq, [srcq + src_strideq*2] - lea dstq, [dstq + dst_strideq*2] + lea refq, [refq + ref_strideq*2] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] @@ -604,21 +588,21 @@ SECTION .text paddw m0, filter_rnd psrlw m4, 4 paddw m0, m2 - mova m2, [dstq] + mova m2, [refq] psrlw m0, 4 - mova m3, [dstq+dst_strideq*2] + mova m3, [refq+ref_strideq*2] %if %2 == 1 ; avg - pavgw m0, [secq] - add secq, sec_str - pavgw m4, [secq] + pavgw m0, [second_predq] + add second_predq, second_str + pavgw m4, [second_predq] %endif SUM_SSE m0, m2, m4, m3, m6, m7 mova m0, m5 lea srcq, [srcq + src_strideq*4] - lea dstq, [dstq + dst_strideq*4] + lea refq, [refq + ref_strideq*4] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %endif dec block_height @@ -633,19 +617,19 @@ SECTION .text jnz .x_nonhalf_y_nonzero ; x_offset == bilin interpolation && y_offset == 0 -%ifdef PIC - lea bilin_filter, [bilin_filter_m] +%if VPX_ARCH_X86_64 + lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if VPX_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+x_offsetq] mova m9, [bilin_filter+x_offsetq+16] - mova m10, [pw_8] + mova m10, [GLOBAL(pw_8)] %define filter_x_a m8 %define filter_x_b m9 %define filter_rnd m10 %else ; x86-32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; y_offset == 0. We can reuse y_offset reg. %define tempq y_offsetq add x_offsetq, g_bilin_filterm @@ -657,7 +641,7 @@ SECTION .text add x_offsetq, bilin_filter %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] -%define filter_rnd [pw_8] +%define filter_rnd [GLOBAL(pw_8)] %endif %endif @@ -667,8 +651,8 @@ SECTION .text movu m1, [srcq+16] movu m2, [srcq+2] movu m3, [srcq+18] - mova m4, [dstq] - mova m5, [dstq+16] + mova m4, [refq] + mova m5, [refq+16] pmullw m1, filter_x_a pmullw m3, filter_x_b paddw m1, filter_rnd @@ -680,23 +664,23 @@ SECTION .text psrlw m1, 4 psrlw m0, 4 %if %2 == 1 ; avg - pavgw m0, [secq] - pavgw m1, [secq+16] + pavgw m0, [second_predq] + pavgw m1, [second_predq+16] %endif SUM_SSE m0, m4, m1, m5, m6, m7 lea srcq, [srcq+src_strideq*2] - lea dstq, [dstq+dst_strideq*2] + lea refq, [refq+ref_strideq*2] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] movu m1, [srcq+src_strideq*2] movu m2, [srcq+2] movu m3, [srcq+src_strideq*2+2] - mova m4, [dstq] - mova m5, [dstq+dst_strideq*2] + mova m4, [refq] + mova m5, [refq+ref_strideq*2] pmullw m1, filter_x_a pmullw m3, filter_x_b paddw m1, filter_rnd @@ -708,16 +692,16 @@ SECTION .text psrlw m1, 4 psrlw m0, 4 %if %2 == 1 ; avg - pavgw m0, [secq] - add secq, sec_str - pavgw m1, [secq] + pavgw m0, [second_predq] + add second_predq, second_str + pavgw m1, [second_predq] %endif SUM_SSE m0, m4, m1, m5, m6, m7 lea srcq, [srcq+src_strideq*4] - lea dstq, [dstq+dst_strideq*4] + lea refq, [refq+ref_strideq*4] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %endif dec block_height @@ -732,19 +716,19 @@ SECTION .text jne .x_nonhalf_y_nonhalf ; x_offset == bilin interpolation && y_offset == 0.5 -%ifdef PIC - lea bilin_filter, [bilin_filter_m] +%if VPX_ARCH_X86_64 + lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if VPX_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+x_offsetq] mova m9, [bilin_filter+x_offsetq+16] - mova m10, [pw_8] + mova m10, [GLOBAL(pw_8)] %define filter_x_a m8 %define filter_x_b m9 %define filter_rnd m10 %else ; x86-32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; y_offset == 0.5. We can reuse y_offset reg. %define tempq y_offsetq add x_offsetq, g_bilin_filterm @@ -756,7 +740,7 @@ SECTION .text add x_offsetq, bilin_filter %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] -%define filter_rnd [pw_8] +%define filter_rnd [GLOBAL(pw_8)] %endif %endif @@ -789,24 +773,24 @@ SECTION .text paddw m3, filter_rnd paddw m2, m4 paddw m3, m5 - mova m4, [dstq] - mova m5, [dstq+16] + mova m4, [refq] + mova m5, [refq+16] psrlw m2, 4 psrlw m3, 4 pavgw m0, m2 pavgw m1, m3 %if %2 == 1 ; avg - pavgw m0, [secq] - pavgw m1, [secq+16] + pavgw m0, [second_predq] + pavgw m1, [second_predq+16] %endif SUM_SSE m0, m4, m1, m5, m6, m7 mova m0, m2 mova m1, m3 lea srcq, [srcq+src_strideq*2] - lea dstq, [dstq+dst_strideq*2] + lea refq, [refq+ref_strideq*2] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] @@ -830,24 +814,24 @@ SECTION .text paddw m3, filter_rnd paddw m2, m4 paddw m3, m5 - mova m4, [dstq] - mova m5, [dstq+dst_strideq*2] + mova m4, [refq] + mova m5, [refq+ref_strideq*2] psrlw m2, 4 psrlw m3, 4 pavgw m0, m2 pavgw m2, m3 %if %2 == 1 ; avg - pavgw m0, [secq] - add secq, sec_str - pavgw m2, [secq] + pavgw m0, [second_predq] + add second_predq, second_str + pavgw m2, [second_predq] %endif SUM_SSE m0, m4, m2, m5, m6, m7 mova m0, m3 lea srcq, [srcq+src_strideq*4] - lea dstq, [dstq+dst_strideq*4] + lea refq, [refq+ref_strideq*4] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %endif dec block_height @@ -859,24 +843,24 @@ SECTION .text .x_nonhalf_y_nonhalf: ; loading filter - this is same as in 8-bit depth -%ifdef PIC - lea bilin_filter, [bilin_filter_m] +%if VPX_ARCH_X86_64 + lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift ; filter_idx_shift = 5 shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if VPX_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+x_offsetq] mova m9, [bilin_filter+x_offsetq+16] mova m10, [bilin_filter+y_offsetq] mova m11, [bilin_filter+y_offsetq+16] - mova m12, [pw_8] + mova m12, [GLOBAL(pw_8)] %define filter_x_a m8 %define filter_x_b m9 %define filter_y_a m10 %define filter_y_b m11 %define filter_rnd m12 %else ; x86-32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; In this case, there is NO unused register. Used src_stride register. Later, ; src_stride has to be loaded from stack when it is needed. %define tempq src_strideq @@ -897,7 +881,7 @@ SECTION .text %define filter_x_b [x_offsetq+16] %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] -%define filter_rnd [pw_8] +%define filter_rnd [GLOBAL(pw_8)] %endif %endif ; end of load filter @@ -945,23 +929,23 @@ SECTION .text pmullw m3, filter_y_b paddw m0, m2 paddw m1, filter_rnd - mova m2, [dstq] + mova m2, [refq] paddw m1, m3 psrlw m0, 4 psrlw m1, 4 - mova m3, [dstq+16] + mova m3, [refq+16] %if %2 == 1 ; avg - pavgw m0, [secq] - pavgw m1, [secq+16] + pavgw m0, [second_predq] + pavgw m1, [second_predq+16] %endif SUM_SSE m0, m2, m1, m3, m6, m7 mova m0, m4 mova m1, m5 INC_SRC_BY_SRC_STRIDE - lea dstq, [dstq + dst_strideq * 2] + lea refq, [refq + ref_strideq * 2] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] @@ -999,23 +983,23 @@ SECTION .text pmullw m3, filter_y_b paddw m0, m2 paddw m4, filter_rnd - mova m2, [dstq] + mova m2, [refq] paddw m4, m3 psrlw m0, 4 psrlw m4, 4 - mova m3, [dstq+dst_strideq*2] + mova m3, [refq+ref_strideq*2] %if %2 == 1 ; avg - pavgw m0, [secq] - add secq, sec_str - pavgw m4, [secq] + pavgw m0, [second_predq] + add second_predq, second_str + pavgw m4, [second_predq] %endif SUM_SSE m0, m2, m4, m3, m6, m7 mova m0, m5 INC_SRC_BY_SRC_STRIDE - lea dstq, [dstq + dst_strideq * 4] + lea refq, [refq + ref_strideq * 4] %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif %endif dec block_height diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_variance_impl_sse2.asm b/media/libvpx/libvpx/vpx_dsp/x86/highbd_variance_impl_sse2.asm index e646767e1908..a256a59ec0e2 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_variance_impl_sse2.asm +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_variance_impl_sse2.asm @@ -16,9 +16,9 @@ SECTION .text ;unsigned int vpx_highbd_calc16x16var_sse2 ;( ; unsigned char * src_ptr, -; int source_stride, +; int src_stride, ; unsigned char * ref_ptr, -; int recon_stride, +; int ref_stride, ; unsigned int * SSE, ; int * Sum ;) @@ -36,8 +36,8 @@ sym(vpx_highbd_calc16x16var_sse2): mov rsi, arg(0) ;[src_ptr] mov rdi, arg(2) ;[ref_ptr] - movsxd rax, DWORD PTR arg(1) ;[source_stride] - movsxd rdx, DWORD PTR arg(3) ;[recon_stride] + movsxd rax, DWORD PTR arg(1) ;[src_stride] + movsxd rdx, DWORD PTR arg(3) ;[ref_stride] add rax, rax ; source stride in bytes add rdx, rdx ; recon stride in bytes @@ -169,9 +169,9 @@ sym(vpx_highbd_calc16x16var_sse2): ;unsigned int vpx_highbd_calc8x8var_sse2 ;( ; unsigned char * src_ptr, -; int source_stride, +; int src_stride, ; unsigned char * ref_ptr, -; int recon_stride, +; int ref_stride, ; unsigned int * SSE, ; int * Sum ;) @@ -189,8 +189,8 @@ sym(vpx_highbd_calc8x8var_sse2): mov rsi, arg(0) ;[src_ptr] mov rdi, arg(2) ;[ref_ptr] - movsxd rax, DWORD PTR arg(1) ;[source_stride] - movsxd rdx, DWORD PTR arg(3) ;[recon_stride] + movsxd rax, DWORD PTR arg(1) ;[src_stride] + movsxd rdx, DWORD PTR arg(3) ;[ref_stride] add rax, rax ; source stride in bytes add rdx, rdx ; recon stride in bytes diff --git a/media/libvpx/libvpx/vpx_dsp/x86/highbd_variance_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/highbd_variance_sse2.c index a6f7c3d25dfd..dd6cfbb2c4c1 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/highbd_variance_sse2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/highbd_variance_sse2.c @@ -7,8 +7,9 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#include "./vpx_config.h" +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" typedef uint32_t (*high_variance_fn_t)(const uint16_t *src, int src_stride, @@ -89,9 +90,9 @@ static void highbd_12_variance_sse2(const uint16_t *src, int src_stride, } #define HIGH_GET_VAR(S) \ - void vpx_highbd_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \ - const uint8_t *ref8, int ref_stride, \ - uint32_t *sse, int *sum) { \ + void vpx_highbd_8_get##S##x##S##var_sse2( \ + const uint8_t *src8, int src_stride, const uint8_t *ref8, \ + int ref_stride, uint32_t *sse, int *sum) { \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, sse, \ @@ -135,7 +136,7 @@ HIGH_GET_VAR(8); highbd_8_variance_sse2( \ src, src_stride, ref, ref_stride, w, h, sse, &sum, \ vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \ - return *sse - (uint32_t)(((int64_t)sum * sum) >> shift); \ + return *sse - (uint32_t)(((int64_t)sum * sum) >> (shift)); \ } \ \ uint32_t vpx_highbd_10_variance##w##x##h##_sse2( \ @@ -148,7 +149,7 @@ HIGH_GET_VAR(8); highbd_10_variance_sse2( \ src, src_stride, ref, ref_stride, w, h, sse, &sum, \ vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \ - var = (int64_t)(*sse) - (((int64_t)sum * sum) >> shift); \ + var = (int64_t)(*sse) - (((int64_t)sum * sum) >> (shift)); \ return (var >= 0) ? (uint32_t)var : 0; \ } \ \ @@ -162,7 +163,7 @@ HIGH_GET_VAR(8); highbd_12_variance_sse2( \ src, src_stride, ref, ref_stride, w, h, sse, &sum, \ vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \ - var = (int64_t)(*sse) - (((int64_t)sum * sum) >> shift); \ + var = (int64_t)(*sse) - (((int64_t)sum * sum) >> (shift)); \ return (var >= 0) ? (uint32_t)var : 0; \ } @@ -251,7 +252,7 @@ unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride, #define DECL(w, opt) \ int vpx_highbd_sub_pixel_variance##w##xh_##opt( \ const uint16_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \ - const uint16_t *dst, ptrdiff_t dst_stride, int height, \ + const uint16_t *ref, ptrdiff_t ref_stride, int height, \ unsigned int *sse, void *unused0, void *unused); #define DECLS(opt) \ DECL(8, opt); \ @@ -265,28 +266,28 @@ DECLS(sse2); #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt( \ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \ - const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr) { \ + const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr) { \ uint32_t sse; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ - src, src_stride, x_offset, y_offset, dst, dst_stride, h, &sse, NULL, \ + src, src_stride, x_offset, y_offset, ref, ref_stride, h, &sse, NULL, \ NULL); \ if (w > wf) { \ unsigned int sse2; \ int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ - src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, h, \ + src + 16, src_stride, x_offset, y_offset, ref + 16, ref_stride, h, \ &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ - src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, h, \ + src + 32, src_stride, x_offset, y_offset, ref + 32, ref_stride, h, \ &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ - src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, h, \ + src + 48, src_stride, x_offset, y_offset, ref + 48, ref_stride, h, \ &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ @@ -298,29 +299,29 @@ DECLS(sse2); \ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \ - const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr) { \ + const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr) { \ int64_t var; \ uint32_t sse; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ - src, src_stride, x_offset, y_offset, dst, dst_stride, h, &sse, NULL, \ + src, src_stride, x_offset, y_offset, ref, ref_stride, h, &sse, NULL, \ NULL); \ if (w > wf) { \ uint32_t sse2; \ int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ - src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, h, \ + src + 16, src_stride, x_offset, y_offset, ref + 16, ref_stride, h, \ &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ - src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, h, \ + src + 32, src_stride, x_offset, y_offset, ref + 32, ref_stride, h, \ &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ - src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, h, \ + src + 48, src_stride, x_offset, y_offset, ref + 48, ref_stride, h, \ &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ @@ -335,40 +336,40 @@ DECLS(sse2); \ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \ - const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr) { \ + const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr) { \ int start_row; \ uint32_t sse; \ int se = 0; \ int64_t var; \ uint64_t long_sse = 0; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ for (start_row = 0; start_row < h; start_row += 16) { \ uint32_t sse2; \ int height = h - start_row < 16 ? h - start_row : 16; \ int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + (start_row * src_stride), src_stride, x_offset, y_offset, \ - dst + (start_row * dst_stride), dst_stride, height, &sse2, NULL, \ + ref + (start_row * ref_stride), ref_stride, height, &sse2, NULL, \ NULL); \ se += se2; \ long_sse += sse2; \ if (w > wf) { \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 16 + (start_row * src_stride), src_stride, x_offset, \ - y_offset, dst + 16 + (start_row * dst_stride), dst_stride, height, \ + y_offset, ref + 16 + (start_row * ref_stride), ref_stride, height, \ &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 32 + (start_row * src_stride), src_stride, x_offset, \ - y_offset, dst + 32 + (start_row * dst_stride), dst_stride, \ + y_offset, ref + 32 + (start_row * ref_stride), ref_stride, \ height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 48 + (start_row * src_stride), src_stride, x_offset, \ - y_offset, dst + 48 + (start_row * dst_stride), dst_stride, \ + y_offset, ref + 48 + (start_row * ref_stride), ref_stride, \ height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ @@ -404,8 +405,8 @@ FNS(sse2); #define DECL(w, opt) \ int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt( \ const uint16_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \ - const uint16_t *dst, ptrdiff_t dst_stride, const uint16_t *sec, \ - ptrdiff_t sec_stride, int height, unsigned int *sse, void *unused0, \ + const uint16_t *ref, ptrdiff_t ref_stride, const uint16_t *second, \ + ptrdiff_t second_stride, int height, unsigned int *sse, void *unused0, \ void *unused); #define DECLS(opt1) \ DECL(16, opt1) \ @@ -418,30 +419,30 @@ DECLS(sse2); #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ uint32_t vpx_highbd_8_sub_pixel_avg_variance##w##x##h##_##opt( \ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \ - const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr, \ + const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr, \ const uint8_t *sec8) { \ uint32_t sse; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \ int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ - src, src_stride, x_offset, y_offset, dst, dst_stride, sec, w, h, &sse, \ + src, src_stride, x_offset, y_offset, ref, ref_stride, sec, w, h, &sse, \ NULL, NULL); \ if (w > wf) { \ uint32_t sse2; \ int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ - src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, \ + src + 16, src_stride, x_offset, y_offset, ref + 16, ref_stride, \ sec + 16, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ - src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, \ + src + 32, src_stride, x_offset, y_offset, ref + 32, ref_stride, \ sec + 32, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ - src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, \ + src + 48, src_stride, x_offset, y_offset, ref + 48, ref_stride, \ sec + 48, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ @@ -453,31 +454,31 @@ DECLS(sse2); \ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \ - const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr, \ + const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr, \ const uint8_t *sec8) { \ int64_t var; \ uint32_t sse; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \ int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ - src, src_stride, x_offset, y_offset, dst, dst_stride, sec, w, h, &sse, \ + src, src_stride, x_offset, y_offset, ref, ref_stride, sec, w, h, &sse, \ NULL, NULL); \ if (w > wf) { \ uint32_t sse2; \ int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ - src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, \ + src + 16, src_stride, x_offset, y_offset, ref + 16, ref_stride, \ sec + 16, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ - src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, \ + src + 32, src_stride, x_offset, y_offset, ref + 32, ref_stride, \ sec + 32, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ - src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, \ + src + 48, src_stride, x_offset, y_offset, ref + 48, ref_stride, \ sec + 48, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ @@ -492,7 +493,7 @@ DECLS(sse2); \ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \ - const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr, \ + const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr, \ const uint8_t *sec8) { \ int start_row; \ int64_t var; \ @@ -500,34 +501,34 @@ DECLS(sse2); int se = 0; \ uint64_t long_sse = 0; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \ for (start_row = 0; start_row < h; start_row += 16) { \ uint32_t sse2; \ int height = h - start_row < 16 ? h - start_row : 16; \ int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + (start_row * src_stride), src_stride, x_offset, y_offset, \ - dst + (start_row * dst_stride), dst_stride, sec + (start_row * w), \ + ref + (start_row * ref_stride), ref_stride, sec + (start_row * w), \ w, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ if (w > wf) { \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 16 + (start_row * src_stride), src_stride, x_offset, \ - y_offset, dst + 16 + (start_row * dst_stride), dst_stride, \ + y_offset, ref + 16 + (start_row * ref_stride), ref_stride, \ sec + 16 + (start_row * w), w, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 32 + (start_row * src_stride), src_stride, x_offset, \ - y_offset, dst + 32 + (start_row * dst_stride), dst_stride, \ + y_offset, ref + 32 + (start_row * ref_stride), ref_stride, \ sec + 32 + (start_row * w), w, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 48 + (start_row * src_stride), src_stride, x_offset, \ - y_offset, dst + 48 + (start_row * dst_stride), dst_stride, \ + y_offset, ref + 48 + (start_row * ref_stride), ref_stride, \ sec + 48 + (start_row * w), w, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/inv_txfm_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/inv_txfm_sse2.c index f6e56b6f9ece..4b02da966649 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/inv_txfm_sse2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/inv_txfm_sse2.c @@ -100,49 +100,44 @@ void idct4_sse2(__m128i *const in) { } void iadst4_sse2(__m128i *const in) { - const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9); - const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9); - const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9); - const __m128i k__sinpi_p03_m04 = pair_set_epi16(sinpi_3_9, -sinpi_4_9); - const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9); - const __m128i kZero = _mm_set1_epi16(0); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - __m128i u[8], v[8], in7; + const __m128i k__sinpi_1_3 = pair_set_epi16(sinpi_1_9, sinpi_3_9); + const __m128i k__sinpi_4_2 = pair_set_epi16(sinpi_4_9, sinpi_2_9); + const __m128i k__sinpi_2_3 = pair_set_epi16(sinpi_2_9, sinpi_3_9); + const __m128i k__sinpi_1_4 = pair_set_epi16(sinpi_1_9, sinpi_4_9); + const __m128i k__sinpi_12_n3 = + pair_set_epi16(sinpi_1_9 + sinpi_2_9, -sinpi_3_9); + __m128i u[4], v[5]; - transpose_16bit_4(in); - in7 = _mm_srli_si128(in[1], 8); - in7 = _mm_add_epi16(in7, in[0]); - in7 = _mm_sub_epi16(in7, in[1]); + // 00 01 20 21 02 03 22 23 + // 10 11 30 31 12 13 32 33 + const __m128i tr0_0 = _mm_unpacklo_epi32(in[0], in[1]); + const __m128i tr0_1 = _mm_unpackhi_epi32(in[0], in[1]); - u[0] = _mm_unpacklo_epi16(in[0], in[1]); - u[1] = _mm_unpackhi_epi16(in[0], in[1]); - u[2] = _mm_unpacklo_epi16(in7, kZero); - u[3] = _mm_unpackhi_epi16(in[0], kZero); + // 00 01 10 11 20 21 30 31 + // 02 03 12 13 22 23 32 33 + in[0] = _mm_unpacklo_epi32(tr0_0, tr0_1); + in[1] = _mm_unpackhi_epi32(tr0_0, tr0_1); - v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p04); // s0 + s3 - v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p02); // s2 + s5 - v[2] = _mm_madd_epi16(u[2], k__sinpi_p03_p03); // x2 - v[3] = _mm_madd_epi16(u[0], k__sinpi_p02_m01); // s1 - s4 - v[4] = _mm_madd_epi16(u[1], k__sinpi_p03_m04); // s2 - s6 - v[5] = _mm_madd_epi16(u[3], k__sinpi_p03_p03); // s2 + v[0] = _mm_madd_epi16(in[0], k__sinpi_1_3); // s_1 * x0 + s_3 * x1 + v[1] = _mm_madd_epi16(in[1], k__sinpi_4_2); // s_4 * x2 + s_2 * x3 + v[2] = _mm_madd_epi16(in[0], k__sinpi_2_3); // s_2 * x0 + s_3 * x1 + v[3] = _mm_madd_epi16(in[1], k__sinpi_1_4); // s_1 * x2 + s_4 * x3 + v[4] = _mm_madd_epi16(in[0], k__sinpi_12_n3); // (s_1 + s_2) * x0 - s_3 * x1 + in[0] = _mm_sub_epi16(in[0], in[1]); // x0 - x2 + in[1] = _mm_srli_epi32(in[1], 16); + in[0] = _mm_add_epi16(in[0], in[1]); + in[0] = _mm_slli_epi32(in[0], 16); // x0 - x2 + x3 u[0] = _mm_add_epi32(v[0], v[1]); - u[1] = _mm_add_epi32(v[3], v[4]); - u[2] = v[2]; - u[3] = _mm_add_epi32(u[0], u[1]); - u[4] = _mm_slli_epi32(v[5], 2); - u[5] = _mm_add_epi32(u[3], v[5]); - u[6] = _mm_sub_epi32(u[5], u[4]); + u[1] = _mm_sub_epi32(v[2], v[3]); + u[2] = _mm_madd_epi16(in[0], k__sinpi_1_3); + u[3] = _mm_sub_epi32(v[1], v[3]); + u[3] = _mm_add_epi32(u[3], v[4]); - v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); - v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); - v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); - v[3] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); + u[0] = dct_const_round_shift_sse2(u[0]); + u[1] = dct_const_round_shift_sse2(u[1]); + u[2] = dct_const_round_shift_sse2(u[2]); + u[3] = dct_const_round_shift_sse2(u[3]); in[0] = _mm_packs_epi32(u[0], u[1]); in[1] = _mm_packs_epi32(u[2], u[3]); @@ -170,7 +165,7 @@ void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, // 2-D for (i = 0; i < 2; i++) { - idct8_sse2(in); + vpx_idct8_sse2(in); } write_buffer_8x8(in, dest, stride); @@ -226,7 +221,7 @@ void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest, recon_and_store_8_dual(dest, dc_value, stride); } -void idct8_sse2(__m128i *const in) { +void vpx_idct8_sse2(__m128i *const in) { // 8x8 Transpose is copied from vpx_fdct8x8_sse2() transpose_16bit_8x8(in, in); @@ -248,191 +243,149 @@ void iadst8_sse2(__m128i *const in) { const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); - const __m128i k__const_0 = _mm_set1_epi16(0); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - - __m128i u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15; - __m128i v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; - __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; - __m128i s0, s1, s2, s3, s4, s5, s6, s7; - __m128i in0, in1, in2, in3, in4, in5, in6, in7; + const __m128i kZero = _mm_set1_epi16(0); + __m128i s[8], u[16], v[8], w[16]; // transpose transpose_16bit_8x8(in, in); - // properly aligned for butterfly input - in0 = in[7]; - in1 = in[0]; - in2 = in[5]; - in3 = in[2]; - in4 = in[3]; - in5 = in[4]; - in6 = in[1]; - in7 = in[6]; - // column transformation // stage 1 // interleave and multiply/add into 32-bit integer - s0 = _mm_unpacklo_epi16(in0, in1); - s1 = _mm_unpackhi_epi16(in0, in1); - s2 = _mm_unpacklo_epi16(in2, in3); - s3 = _mm_unpackhi_epi16(in2, in3); - s4 = _mm_unpacklo_epi16(in4, in5); - s5 = _mm_unpackhi_epi16(in4, in5); - s6 = _mm_unpacklo_epi16(in6, in7); - s7 = _mm_unpackhi_epi16(in6, in7); + s[0] = _mm_unpacklo_epi16(in[7], in[0]); + s[1] = _mm_unpackhi_epi16(in[7], in[0]); + s[2] = _mm_unpacklo_epi16(in[5], in[2]); + s[3] = _mm_unpackhi_epi16(in[5], in[2]); + s[4] = _mm_unpacklo_epi16(in[3], in[4]); + s[5] = _mm_unpackhi_epi16(in[3], in[4]); + s[6] = _mm_unpacklo_epi16(in[1], in[6]); + s[7] = _mm_unpackhi_epi16(in[1], in[6]); - u0 = _mm_madd_epi16(s0, k__cospi_p02_p30); - u1 = _mm_madd_epi16(s1, k__cospi_p02_p30); - u2 = _mm_madd_epi16(s0, k__cospi_p30_m02); - u3 = _mm_madd_epi16(s1, k__cospi_p30_m02); - u4 = _mm_madd_epi16(s2, k__cospi_p10_p22); - u5 = _mm_madd_epi16(s3, k__cospi_p10_p22); - u6 = _mm_madd_epi16(s2, k__cospi_p22_m10); - u7 = _mm_madd_epi16(s3, k__cospi_p22_m10); - u8 = _mm_madd_epi16(s4, k__cospi_p18_p14); - u9 = _mm_madd_epi16(s5, k__cospi_p18_p14); - u10 = _mm_madd_epi16(s4, k__cospi_p14_m18); - u11 = _mm_madd_epi16(s5, k__cospi_p14_m18); - u12 = _mm_madd_epi16(s6, k__cospi_p26_p06); - u13 = _mm_madd_epi16(s7, k__cospi_p26_p06); - u14 = _mm_madd_epi16(s6, k__cospi_p06_m26); - u15 = _mm_madd_epi16(s7, k__cospi_p06_m26); + u[0] = _mm_madd_epi16(s[0], k__cospi_p02_p30); + u[1] = _mm_madd_epi16(s[1], k__cospi_p02_p30); + u[2] = _mm_madd_epi16(s[0], k__cospi_p30_m02); + u[3] = _mm_madd_epi16(s[1], k__cospi_p30_m02); + u[4] = _mm_madd_epi16(s[2], k__cospi_p10_p22); + u[5] = _mm_madd_epi16(s[3], k__cospi_p10_p22); + u[6] = _mm_madd_epi16(s[2], k__cospi_p22_m10); + u[7] = _mm_madd_epi16(s[3], k__cospi_p22_m10); + u[8] = _mm_madd_epi16(s[4], k__cospi_p18_p14); + u[9] = _mm_madd_epi16(s[5], k__cospi_p18_p14); + u[10] = _mm_madd_epi16(s[4], k__cospi_p14_m18); + u[11] = _mm_madd_epi16(s[5], k__cospi_p14_m18); + u[12] = _mm_madd_epi16(s[6], k__cospi_p26_p06); + u[13] = _mm_madd_epi16(s[7], k__cospi_p26_p06); + u[14] = _mm_madd_epi16(s[6], k__cospi_p06_m26); + u[15] = _mm_madd_epi16(s[7], k__cospi_p06_m26); // addition - w0 = _mm_add_epi32(u0, u8); - w1 = _mm_add_epi32(u1, u9); - w2 = _mm_add_epi32(u2, u10); - w3 = _mm_add_epi32(u3, u11); - w4 = _mm_add_epi32(u4, u12); - w5 = _mm_add_epi32(u5, u13); - w6 = _mm_add_epi32(u6, u14); - w7 = _mm_add_epi32(u7, u15); - w8 = _mm_sub_epi32(u0, u8); - w9 = _mm_sub_epi32(u1, u9); - w10 = _mm_sub_epi32(u2, u10); - w11 = _mm_sub_epi32(u3, u11); - w12 = _mm_sub_epi32(u4, u12); - w13 = _mm_sub_epi32(u5, u13); - w14 = _mm_sub_epi32(u6, u14); - w15 = _mm_sub_epi32(u7, u15); + w[0] = _mm_add_epi32(u[0], u[8]); + w[1] = _mm_add_epi32(u[1], u[9]); + w[2] = _mm_add_epi32(u[2], u[10]); + w[3] = _mm_add_epi32(u[3], u[11]); + w[4] = _mm_add_epi32(u[4], u[12]); + w[5] = _mm_add_epi32(u[5], u[13]); + w[6] = _mm_add_epi32(u[6], u[14]); + w[7] = _mm_add_epi32(u[7], u[15]); + w[8] = _mm_sub_epi32(u[0], u[8]); + w[9] = _mm_sub_epi32(u[1], u[9]); + w[10] = _mm_sub_epi32(u[2], u[10]); + w[11] = _mm_sub_epi32(u[3], u[11]); + w[12] = _mm_sub_epi32(u[4], u[12]); + w[13] = _mm_sub_epi32(u[5], u[13]); + w[14] = _mm_sub_epi32(u[6], u[14]); + w[15] = _mm_sub_epi32(u[7], u[15]); // shift and rounding - v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING); - v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING); - v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING); - v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING); - v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING); - v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING); - v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING); - v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING); - v8 = _mm_add_epi32(w8, k__DCT_CONST_ROUNDING); - v9 = _mm_add_epi32(w9, k__DCT_CONST_ROUNDING); - v10 = _mm_add_epi32(w10, k__DCT_CONST_ROUNDING); - v11 = _mm_add_epi32(w11, k__DCT_CONST_ROUNDING); - v12 = _mm_add_epi32(w12, k__DCT_CONST_ROUNDING); - v13 = _mm_add_epi32(w13, k__DCT_CONST_ROUNDING); - v14 = _mm_add_epi32(w14, k__DCT_CONST_ROUNDING); - v15 = _mm_add_epi32(w15, k__DCT_CONST_ROUNDING); - - u0 = _mm_srai_epi32(v0, DCT_CONST_BITS); - u1 = _mm_srai_epi32(v1, DCT_CONST_BITS); - u2 = _mm_srai_epi32(v2, DCT_CONST_BITS); - u3 = _mm_srai_epi32(v3, DCT_CONST_BITS); - u4 = _mm_srai_epi32(v4, DCT_CONST_BITS); - u5 = _mm_srai_epi32(v5, DCT_CONST_BITS); - u6 = _mm_srai_epi32(v6, DCT_CONST_BITS); - u7 = _mm_srai_epi32(v7, DCT_CONST_BITS); - u8 = _mm_srai_epi32(v8, DCT_CONST_BITS); - u9 = _mm_srai_epi32(v9, DCT_CONST_BITS); - u10 = _mm_srai_epi32(v10, DCT_CONST_BITS); - u11 = _mm_srai_epi32(v11, DCT_CONST_BITS); - u12 = _mm_srai_epi32(v12, DCT_CONST_BITS); - u13 = _mm_srai_epi32(v13, DCT_CONST_BITS); - u14 = _mm_srai_epi32(v14, DCT_CONST_BITS); - u15 = _mm_srai_epi32(v15, DCT_CONST_BITS); + u[0] = dct_const_round_shift_sse2(w[0]); + u[1] = dct_const_round_shift_sse2(w[1]); + u[2] = dct_const_round_shift_sse2(w[2]); + u[3] = dct_const_round_shift_sse2(w[3]); + u[4] = dct_const_round_shift_sse2(w[4]); + u[5] = dct_const_round_shift_sse2(w[5]); + u[6] = dct_const_round_shift_sse2(w[6]); + u[7] = dct_const_round_shift_sse2(w[7]); + u[8] = dct_const_round_shift_sse2(w[8]); + u[9] = dct_const_round_shift_sse2(w[9]); + u[10] = dct_const_round_shift_sse2(w[10]); + u[11] = dct_const_round_shift_sse2(w[11]); + u[12] = dct_const_round_shift_sse2(w[12]); + u[13] = dct_const_round_shift_sse2(w[13]); + u[14] = dct_const_round_shift_sse2(w[14]); + u[15] = dct_const_round_shift_sse2(w[15]); // back to 16-bit and pack 8 integers into __m128i - in[0] = _mm_packs_epi32(u0, u1); - in[1] = _mm_packs_epi32(u2, u3); - in[2] = _mm_packs_epi32(u4, u5); - in[3] = _mm_packs_epi32(u6, u7); - in[4] = _mm_packs_epi32(u8, u9); - in[5] = _mm_packs_epi32(u10, u11); - in[6] = _mm_packs_epi32(u12, u13); - in[7] = _mm_packs_epi32(u14, u15); + in[0] = _mm_packs_epi32(u[0], u[1]); + in[1] = _mm_packs_epi32(u[2], u[3]); + in[2] = _mm_packs_epi32(u[4], u[5]); + in[3] = _mm_packs_epi32(u[6], u[7]); + in[4] = _mm_packs_epi32(u[8], u[9]); + in[5] = _mm_packs_epi32(u[10], u[11]); + in[6] = _mm_packs_epi32(u[12], u[13]); + in[7] = _mm_packs_epi32(u[14], u[15]); // stage 2 - s0 = _mm_add_epi16(in[0], in[2]); - s1 = _mm_add_epi16(in[1], in[3]); - s2 = _mm_sub_epi16(in[0], in[2]); - s3 = _mm_sub_epi16(in[1], in[3]); - u0 = _mm_unpacklo_epi16(in[4], in[5]); - u1 = _mm_unpackhi_epi16(in[4], in[5]); - u2 = _mm_unpacklo_epi16(in[6], in[7]); - u3 = _mm_unpackhi_epi16(in[6], in[7]); + s[0] = _mm_add_epi16(in[0], in[2]); + s[1] = _mm_add_epi16(in[1], in[3]); + s[2] = _mm_sub_epi16(in[0], in[2]); + s[3] = _mm_sub_epi16(in[1], in[3]); + u[0] = _mm_unpacklo_epi16(in[4], in[5]); + u[1] = _mm_unpackhi_epi16(in[4], in[5]); + u[2] = _mm_unpacklo_epi16(in[6], in[7]); + u[3] = _mm_unpackhi_epi16(in[6], in[7]); - v0 = _mm_madd_epi16(u0, k__cospi_p08_p24); - v1 = _mm_madd_epi16(u1, k__cospi_p08_p24); - v2 = _mm_madd_epi16(u0, k__cospi_p24_m08); - v3 = _mm_madd_epi16(u1, k__cospi_p24_m08); - v4 = _mm_madd_epi16(u2, k__cospi_m24_p08); - v5 = _mm_madd_epi16(u3, k__cospi_m24_p08); - v6 = _mm_madd_epi16(u2, k__cospi_p08_p24); - v7 = _mm_madd_epi16(u3, k__cospi_p08_p24); + v[0] = _mm_madd_epi16(u[0], k__cospi_p08_p24); + v[1] = _mm_madd_epi16(u[1], k__cospi_p08_p24); + v[2] = _mm_madd_epi16(u[0], k__cospi_p24_m08); + v[3] = _mm_madd_epi16(u[1], k__cospi_p24_m08); + v[4] = _mm_madd_epi16(u[2], k__cospi_m24_p08); + v[5] = _mm_madd_epi16(u[3], k__cospi_m24_p08); + v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24); + v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24); - w0 = _mm_add_epi32(v0, v4); - w1 = _mm_add_epi32(v1, v5); - w2 = _mm_add_epi32(v2, v6); - w3 = _mm_add_epi32(v3, v7); - w4 = _mm_sub_epi32(v0, v4); - w5 = _mm_sub_epi32(v1, v5); - w6 = _mm_sub_epi32(v2, v6); - w7 = _mm_sub_epi32(v3, v7); + w[0] = _mm_add_epi32(v[0], v[4]); + w[1] = _mm_add_epi32(v[1], v[5]); + w[2] = _mm_add_epi32(v[2], v[6]); + w[3] = _mm_add_epi32(v[3], v[7]); + w[4] = _mm_sub_epi32(v[0], v[4]); + w[5] = _mm_sub_epi32(v[1], v[5]); + w[6] = _mm_sub_epi32(v[2], v[6]); + w[7] = _mm_sub_epi32(v[3], v[7]); - v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING); - v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING); - v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING); - v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING); - v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING); - v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING); - v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING); - v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING); - - u0 = _mm_srai_epi32(v0, DCT_CONST_BITS); - u1 = _mm_srai_epi32(v1, DCT_CONST_BITS); - u2 = _mm_srai_epi32(v2, DCT_CONST_BITS); - u3 = _mm_srai_epi32(v3, DCT_CONST_BITS); - u4 = _mm_srai_epi32(v4, DCT_CONST_BITS); - u5 = _mm_srai_epi32(v5, DCT_CONST_BITS); - u6 = _mm_srai_epi32(v6, DCT_CONST_BITS); - u7 = _mm_srai_epi32(v7, DCT_CONST_BITS); + u[0] = dct_const_round_shift_sse2(w[0]); + u[1] = dct_const_round_shift_sse2(w[1]); + u[2] = dct_const_round_shift_sse2(w[2]); + u[3] = dct_const_round_shift_sse2(w[3]); + u[4] = dct_const_round_shift_sse2(w[4]); + u[5] = dct_const_round_shift_sse2(w[5]); + u[6] = dct_const_round_shift_sse2(w[6]); + u[7] = dct_const_round_shift_sse2(w[7]); // back to 16-bit intergers - s4 = _mm_packs_epi32(u0, u1); - s5 = _mm_packs_epi32(u2, u3); - s6 = _mm_packs_epi32(u4, u5); - s7 = _mm_packs_epi32(u6, u7); + s[4] = _mm_packs_epi32(u[0], u[1]); + s[5] = _mm_packs_epi32(u[2], u[3]); + s[6] = _mm_packs_epi32(u[4], u[5]); + s[7] = _mm_packs_epi32(u[6], u[7]); // stage 3 - u0 = _mm_unpacklo_epi16(s2, s3); - u1 = _mm_unpackhi_epi16(s2, s3); - u2 = _mm_unpacklo_epi16(s6, s7); - u3 = _mm_unpackhi_epi16(s6, s7); + u[0] = _mm_unpacklo_epi16(s[2], s[3]); + u[1] = _mm_unpackhi_epi16(s[2], s[3]); + u[2] = _mm_unpacklo_epi16(s[6], s[7]); + u[3] = _mm_unpackhi_epi16(s[6], s[7]); - s2 = idct_calc_wraplow_sse2(u0, u1, k__cospi_p16_p16); - s3 = idct_calc_wraplow_sse2(u0, u1, k__cospi_p16_m16); - s6 = idct_calc_wraplow_sse2(u2, u3, k__cospi_p16_p16); - s7 = idct_calc_wraplow_sse2(u2, u3, k__cospi_p16_m16); + s[2] = idct_calc_wraplow_sse2(u[0], u[1], k__cospi_p16_p16); + s[3] = idct_calc_wraplow_sse2(u[0], u[1], k__cospi_p16_m16); + s[6] = idct_calc_wraplow_sse2(u[2], u[3], k__cospi_p16_p16); + s[7] = idct_calc_wraplow_sse2(u[2], u[3], k__cospi_p16_m16); - in[0] = s0; - in[1] = _mm_sub_epi16(k__const_0, s4); - in[2] = s6; - in[3] = _mm_sub_epi16(k__const_0, s2); - in[4] = s3; - in[5] = _mm_sub_epi16(k__const_0, s7); - in[6] = s5; - in[7] = _mm_sub_epi16(k__const_0, s1); + in[0] = s[0]; + in[1] = _mm_sub_epi16(kZero, s[4]); + in[2] = s[6]; + in[3] = _mm_sub_epi16(kZero, s[2]); + in[4] = s[3]; + in[5] = _mm_sub_epi16(kZero, s[7]); + in[6] = s[5]; + in[7] = _mm_sub_epi16(kZero, s[1]); } static INLINE void idct16_load8x8(const tran_low_t *const input, @@ -561,7 +514,7 @@ void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, } } -static void iadst16_8col(__m128i *const in) { +void vpx_iadst16_8col_sse2(__m128i *const in) { // perform 16x16 1-D ADST for 8 columns __m128i s[16], x[16], u[32], v[32]; const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64); @@ -593,7 +546,6 @@ static void iadst16_8col(__m128i *const in) { const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i kZero = _mm_set1_epi16(0); u[0] = _mm_unpacklo_epi16(in[15], in[0]); @@ -679,71 +631,38 @@ static void iadst16_8col(__m128i *const in) { u[30] = _mm_sub_epi32(v[14], v[30]); u[31] = _mm_sub_epi32(v[15], v[31]); - v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); - v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); - v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); - v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); - v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); - v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); - v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); - v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); - v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); - v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); - v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); - v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); - v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); - v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); - v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); - v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); - v[16] = _mm_add_epi32(u[16], k__DCT_CONST_ROUNDING); - v[17] = _mm_add_epi32(u[17], k__DCT_CONST_ROUNDING); - v[18] = _mm_add_epi32(u[18], k__DCT_CONST_ROUNDING); - v[19] = _mm_add_epi32(u[19], k__DCT_CONST_ROUNDING); - v[20] = _mm_add_epi32(u[20], k__DCT_CONST_ROUNDING); - v[21] = _mm_add_epi32(u[21], k__DCT_CONST_ROUNDING); - v[22] = _mm_add_epi32(u[22], k__DCT_CONST_ROUNDING); - v[23] = _mm_add_epi32(u[23], k__DCT_CONST_ROUNDING); - v[24] = _mm_add_epi32(u[24], k__DCT_CONST_ROUNDING); - v[25] = _mm_add_epi32(u[25], k__DCT_CONST_ROUNDING); - v[26] = _mm_add_epi32(u[26], k__DCT_CONST_ROUNDING); - v[27] = _mm_add_epi32(u[27], k__DCT_CONST_ROUNDING); - v[28] = _mm_add_epi32(u[28], k__DCT_CONST_ROUNDING); - v[29] = _mm_add_epi32(u[29], k__DCT_CONST_ROUNDING); - v[30] = _mm_add_epi32(u[30], k__DCT_CONST_ROUNDING); - v[31] = _mm_add_epi32(u[31], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); - u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); - u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); - u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); - u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); - u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); - u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS); - u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); - u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); - u[16] = _mm_srai_epi32(v[16], DCT_CONST_BITS); - u[17] = _mm_srai_epi32(v[17], DCT_CONST_BITS); - u[18] = _mm_srai_epi32(v[18], DCT_CONST_BITS); - u[19] = _mm_srai_epi32(v[19], DCT_CONST_BITS); - u[20] = _mm_srai_epi32(v[20], DCT_CONST_BITS); - u[21] = _mm_srai_epi32(v[21], DCT_CONST_BITS); - u[22] = _mm_srai_epi32(v[22], DCT_CONST_BITS); - u[23] = _mm_srai_epi32(v[23], DCT_CONST_BITS); - u[24] = _mm_srai_epi32(v[24], DCT_CONST_BITS); - u[25] = _mm_srai_epi32(v[25], DCT_CONST_BITS); - u[26] = _mm_srai_epi32(v[26], DCT_CONST_BITS); - u[27] = _mm_srai_epi32(v[27], DCT_CONST_BITS); - u[28] = _mm_srai_epi32(v[28], DCT_CONST_BITS); - u[29] = _mm_srai_epi32(v[29], DCT_CONST_BITS); - u[30] = _mm_srai_epi32(v[30], DCT_CONST_BITS); - u[31] = _mm_srai_epi32(v[31], DCT_CONST_BITS); + u[0] = dct_const_round_shift_sse2(u[0]); + u[1] = dct_const_round_shift_sse2(u[1]); + u[2] = dct_const_round_shift_sse2(u[2]); + u[3] = dct_const_round_shift_sse2(u[3]); + u[4] = dct_const_round_shift_sse2(u[4]); + u[5] = dct_const_round_shift_sse2(u[5]); + u[6] = dct_const_round_shift_sse2(u[6]); + u[7] = dct_const_round_shift_sse2(u[7]); + u[8] = dct_const_round_shift_sse2(u[8]); + u[9] = dct_const_round_shift_sse2(u[9]); + u[10] = dct_const_round_shift_sse2(u[10]); + u[11] = dct_const_round_shift_sse2(u[11]); + u[12] = dct_const_round_shift_sse2(u[12]); + u[13] = dct_const_round_shift_sse2(u[13]); + u[14] = dct_const_round_shift_sse2(u[14]); + u[15] = dct_const_round_shift_sse2(u[15]); + u[16] = dct_const_round_shift_sse2(u[16]); + u[17] = dct_const_round_shift_sse2(u[17]); + u[18] = dct_const_round_shift_sse2(u[18]); + u[19] = dct_const_round_shift_sse2(u[19]); + u[20] = dct_const_round_shift_sse2(u[20]); + u[21] = dct_const_round_shift_sse2(u[21]); + u[22] = dct_const_round_shift_sse2(u[22]); + u[23] = dct_const_round_shift_sse2(u[23]); + u[24] = dct_const_round_shift_sse2(u[24]); + u[25] = dct_const_round_shift_sse2(u[25]); + u[26] = dct_const_round_shift_sse2(u[26]); + u[27] = dct_const_round_shift_sse2(u[27]); + u[28] = dct_const_round_shift_sse2(u[28]); + u[29] = dct_const_round_shift_sse2(u[29]); + u[30] = dct_const_round_shift_sse2(u[30]); + u[31] = dct_const_round_shift_sse2(u[31]); s[0] = _mm_packs_epi32(u[0], u[1]); s[1] = _mm_packs_epi32(u[2], u[3]); @@ -806,39 +725,22 @@ static void iadst16_8col(__m128i *const in) { u[14] = _mm_sub_epi32(v[6], v[14]); u[15] = _mm_sub_epi32(v[7], v[15]); - v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); - v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); - v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); - v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); - v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); - v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); - v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); - v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); - v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); - v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); - v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); - v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); - v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); - v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); - v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); - v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); - u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); - u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); - u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); - u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); - u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); - u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS); - u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); - u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); + u[0] = dct_const_round_shift_sse2(u[0]); + u[1] = dct_const_round_shift_sse2(u[1]); + u[2] = dct_const_round_shift_sse2(u[2]); + u[3] = dct_const_round_shift_sse2(u[3]); + u[4] = dct_const_round_shift_sse2(u[4]); + u[5] = dct_const_round_shift_sse2(u[5]); + u[6] = dct_const_round_shift_sse2(u[6]); + u[7] = dct_const_round_shift_sse2(u[7]); + u[8] = dct_const_round_shift_sse2(u[8]); + u[9] = dct_const_round_shift_sse2(u[9]); + u[10] = dct_const_round_shift_sse2(u[10]); + u[11] = dct_const_round_shift_sse2(u[11]); + u[12] = dct_const_round_shift_sse2(u[12]); + u[13] = dct_const_round_shift_sse2(u[13]); + u[14] = dct_const_round_shift_sse2(u[14]); + u[15] = dct_const_round_shift_sse2(u[15]); x[0] = _mm_add_epi16(s[0], s[4]); x[1] = _mm_add_epi16(s[1], s[5]); @@ -901,39 +803,22 @@ static void iadst16_8col(__m128i *const in) { u[14] = _mm_sub_epi32(v[10], v[14]); u[15] = _mm_sub_epi32(v[11], v[15]); - u[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); - u[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); - u[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); - u[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); - u[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); - u[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); - u[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); - u[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); - u[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); - - v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); - v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); - v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); - v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); - v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); - v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); - v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); - v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); + v[0] = dct_const_round_shift_sse2(u[0]); + v[1] = dct_const_round_shift_sse2(u[1]); + v[2] = dct_const_round_shift_sse2(u[2]); + v[3] = dct_const_round_shift_sse2(u[3]); + v[4] = dct_const_round_shift_sse2(u[4]); + v[5] = dct_const_round_shift_sse2(u[5]); + v[6] = dct_const_round_shift_sse2(u[6]); + v[7] = dct_const_round_shift_sse2(u[7]); + v[8] = dct_const_round_shift_sse2(u[8]); + v[9] = dct_const_round_shift_sse2(u[9]); + v[10] = dct_const_round_shift_sse2(u[10]); + v[11] = dct_const_round_shift_sse2(u[11]); + v[12] = dct_const_round_shift_sse2(u[12]); + v[13] = dct_const_round_shift_sse2(u[13]); + v[14] = dct_const_round_shift_sse2(u[14]); + v[15] = dct_const_round_shift_sse2(u[15]); s[0] = _mm_add_epi16(x[0], x[2]); s[1] = _mm_add_epi16(x[1], x[3]); @@ -989,8 +874,8 @@ void idct16_sse2(__m128i *const in0, __m128i *const in1) { void iadst16_sse2(__m128i *const in0, __m128i *const in1) { transpose_16bit_16x16(in0, in1); - iadst16_8col(in0); - iadst16_8col(in1); + vpx_iadst16_8col_sse2(in0); + vpx_iadst16_8col_sse2(in1); } // Group the coefficient calculation into smaller functions to prevent stack diff --git a/media/libvpx/libvpx/vpx_dsp/x86/inv_txfm_sse2.h b/media/libvpx/libvpx/vpx_dsp/x86/inv_txfm_sse2.h index 5cd5098f1499..b4bbd186d2b0 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/inv_txfm_sse2.h +++ b/media/libvpx/libvpx/vpx_dsp/x86/inv_txfm_sse2.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_X86_INV_TXFM_SSE2_H_ -#define VPX_DSP_X86_INV_TXFM_SSE2_H_ +#ifndef VPX_VPX_DSP_X86_INV_TXFM_SSE2_H_ +#define VPX_VPX_DSP_X86_INV_TXFM_SSE2_H_ #include // SSE2 @@ -697,13 +697,14 @@ static INLINE void idct32_8x32_quarter_3_4_stage_4_to_7( } void idct4_sse2(__m128i *const in); -void idct8_sse2(__m128i *const in); +void vpx_idct8_sse2(__m128i *const in); void idct16_sse2(__m128i *const in0, __m128i *const in1); void iadst4_sse2(__m128i *const in); void iadst8_sse2(__m128i *const in); +void vpx_iadst16_8col_sse2(__m128i *const in); void iadst16_sse2(__m128i *const in0, __m128i *const in1); void idct32_1024_8x32(const __m128i *const in, __m128i *const out); void idct32_34_8x32_sse2(const __m128i *const in, __m128i *const out); void idct32_34_8x32_ssse3(const __m128i *const in, __m128i *const out); -#endif // VPX_DSP_X86_INV_TXFM_SSE2_H_ +#endif // VPX_VPX_DSP_X86_INV_TXFM_SSE2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/inv_txfm_ssse3.h b/media/libvpx/libvpx/vpx_dsp/x86/inv_txfm_ssse3.h index e785c8eda132..e9f0f6903347 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/inv_txfm_ssse3.h +++ b/media/libvpx/libvpx/vpx_dsp/x86/inv_txfm_ssse3.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_X86_INV_TXFM_SSSE3_H_ -#define VPX_DSP_X86_INV_TXFM_SSSE3_H_ +#ifndef VPX_VPX_DSP_X86_INV_TXFM_SSSE3_H_ +#define VPX_VPX_DSP_X86_INV_TXFM_SSSE3_H_ #include @@ -107,4 +107,4 @@ static INLINE void idct8x8_12_add_kernel_ssse3(__m128i *const io /* io[8] */) { void idct32_135_8x32_ssse3(const __m128i *const in, __m128i *const out); -#endif // VPX_DSP_X86_INV_TXFM_SSSE3_H_ +#endif // VPX_VPX_DSP_X86_INV_TXFM_SSSE3_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/loopfilter_avx2.c b/media/libvpx/libvpx/vpx_dsp/x86/loopfilter_avx2.c index 6652a62dcfc3..be391992af88 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/loopfilter_avx2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/loopfilter_avx2.c @@ -13,38 +13,38 @@ #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" -void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { +void vpx_lpf_horizontal_16_avx2(unsigned char *s, int pitch, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh) { __m128i mask, hev, flat, flat2; const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; __m128i abs_p1p0; - const __m128i thresh = - _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)_thresh[0])); - const __m128i limit = _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)_limit[0])); - const __m128i blimit = - _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)_blimit[0])); + const __m128i thresh_v = + _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)thresh[0])); + const __m128i limit_v = _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)limit[0])); + const __m128i blimit_v = + _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)blimit[0])); - q4p4 = _mm_loadl_epi64((__m128i *)(s - 5 * p)); + q4p4 = _mm_loadl_epi64((__m128i *)(s - 5 * pitch)); q4p4 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q4p4), (__m64 *)(s + 4 * p))); - q3p3 = _mm_loadl_epi64((__m128i *)(s - 4 * p)); + _mm_loadh_pi(_mm_castsi128_ps(q4p4), (__m64 *)(s + 4 * pitch))); + q3p3 = _mm_loadl_epi64((__m128i *)(s - 4 * pitch)); q3p3 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q3p3), (__m64 *)(s + 3 * p))); - q2p2 = _mm_loadl_epi64((__m128i *)(s - 3 * p)); + _mm_loadh_pi(_mm_castsi128_ps(q3p3), (__m64 *)(s + 3 * pitch))); + q2p2 = _mm_loadl_epi64((__m128i *)(s - 3 * pitch)); q2p2 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q2p2), (__m64 *)(s + 2 * p))); - q1p1 = _mm_loadl_epi64((__m128i *)(s - 2 * p)); + _mm_loadh_pi(_mm_castsi128_ps(q2p2), (__m64 *)(s + 2 * pitch))); + q1p1 = _mm_loadl_epi64((__m128i *)(s - 2 * pitch)); q1p1 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q1p1), (__m64 *)(s + 1 * p))); + _mm_loadh_pi(_mm_castsi128_ps(q1p1), (__m64 *)(s + 1 * pitch))); p1q1 = _mm_shuffle_epi32(q1p1, 78); - q0p0 = _mm_loadl_epi64((__m128i *)(s - 1 * p)); + q0p0 = _mm_loadl_epi64((__m128i *)(s - 1 * pitch)); q0p0 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q0p0), (__m64 *)(s - 0 * p))); + _mm_loadh_pi(_mm_castsi128_ps(q0p0), (__m64 *)(s - 0 * pitch))); p0q0 = _mm_shuffle_epi32(q0p0, 78); { @@ -52,19 +52,19 @@ void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p, abs_p1p0 = _mm_or_si128(_mm_subs_epu8(q1p1, q0p0), _mm_subs_epu8(q0p0, q1p1)); abs_q1q0 = _mm_srli_si128(abs_p1p0, 8); - fe = _mm_set1_epi8(0xfe); + fe = _mm_set1_epi8((int8_t)0xfe); ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); abs_p0q0 = _mm_or_si128(_mm_subs_epu8(q0p0, p0q0), _mm_subs_epu8(p0q0, q0p0)); abs_p1q1 = _mm_or_si128(_mm_subs_epu8(q1p1, p1q1), _mm_subs_epu8(p1q1, q1p1)); flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); + hev = _mm_subs_epu8(flat, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); + mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; mask = _mm_max_epu8(abs_p1p0, mask); @@ -76,7 +76,7 @@ void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p, _mm_or_si128(_mm_subs_epu8(q3p3, q2p2), _mm_subs_epu8(q2p2, q3p3))); mask = _mm_max_epu8(work, mask); mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); - mask = _mm_subs_epu8(mask, limit); + mask = _mm_subs_epu8(mask, limit_v); mask = _mm_cmpeq_epi8(mask, zero); } @@ -84,7 +84,7 @@ void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p, { const __m128i t4 = _mm_set1_epi8(4); const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); + const __m128i t80 = _mm_set1_epi8((int8_t)0x80); const __m128i t1 = _mm_set1_epi16(0x1); __m128i qs1ps1 = _mm_xor_si128(q1p1, t80); __m128i qs0ps0 = _mm_xor_si128(q0p0, t80); @@ -136,21 +136,21 @@ void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p, flat = _mm_cmpeq_epi8(flat, zero); flat = _mm_and_si128(flat, mask); - q5p5 = _mm_loadl_epi64((__m128i *)(s - 6 * p)); + q5p5 = _mm_loadl_epi64((__m128i *)(s - 6 * pitch)); q5p5 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q5p5), (__m64 *)(s + 5 * p))); + _mm_loadh_pi(_mm_castsi128_ps(q5p5), (__m64 *)(s + 5 * pitch))); - q6p6 = _mm_loadl_epi64((__m128i *)(s - 7 * p)); + q6p6 = _mm_loadl_epi64((__m128i *)(s - 7 * pitch)); q6p6 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q6p6), (__m64 *)(s + 6 * p))); + _mm_loadh_pi(_mm_castsi128_ps(q6p6), (__m64 *)(s + 6 * pitch))); flat2 = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(q4p4, q0p0), _mm_subs_epu8(q0p0, q4p4)), _mm_or_si128(_mm_subs_epu8(q5p5, q0p0), _mm_subs_epu8(q0p0, q5p5))); - q7p7 = _mm_loadl_epi64((__m128i *)(s - 8 * p)); + q7p7 = _mm_loadl_epi64((__m128i *)(s - 8 * pitch)); q7p7 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q7p7), (__m64 *)(s + 7 * p))); + _mm_loadh_pi(_mm_castsi128_ps(q7p7), (__m64 *)(s + 7 * pitch))); work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(q6p6, q0p0), _mm_subs_epu8(q0p0, q6p6)), @@ -321,44 +321,44 @@ void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p, q6p6 = _mm_andnot_si128(flat2, q6p6); flat2_q6p6 = _mm_and_si128(flat2, flat2_q6p6); q6p6 = _mm_or_si128(q6p6, flat2_q6p6); - _mm_storel_epi64((__m128i *)(s - 7 * p), q6p6); - _mm_storeh_pi((__m64 *)(s + 6 * p), _mm_castsi128_ps(q6p6)); + _mm_storel_epi64((__m128i *)(s - 7 * pitch), q6p6); + _mm_storeh_pi((__m64 *)(s + 6 * pitch), _mm_castsi128_ps(q6p6)); q5p5 = _mm_andnot_si128(flat2, q5p5); flat2_q5p5 = _mm_and_si128(flat2, flat2_q5p5); q5p5 = _mm_or_si128(q5p5, flat2_q5p5); - _mm_storel_epi64((__m128i *)(s - 6 * p), q5p5); - _mm_storeh_pi((__m64 *)(s + 5 * p), _mm_castsi128_ps(q5p5)); + _mm_storel_epi64((__m128i *)(s - 6 * pitch), q5p5); + _mm_storeh_pi((__m64 *)(s + 5 * pitch), _mm_castsi128_ps(q5p5)); q4p4 = _mm_andnot_si128(flat2, q4p4); flat2_q4p4 = _mm_and_si128(flat2, flat2_q4p4); q4p4 = _mm_or_si128(q4p4, flat2_q4p4); - _mm_storel_epi64((__m128i *)(s - 5 * p), q4p4); - _mm_storeh_pi((__m64 *)(s + 4 * p), _mm_castsi128_ps(q4p4)); + _mm_storel_epi64((__m128i *)(s - 5 * pitch), q4p4); + _mm_storeh_pi((__m64 *)(s + 4 * pitch), _mm_castsi128_ps(q4p4)); q3p3 = _mm_andnot_si128(flat2, q3p3); flat2_q3p3 = _mm_and_si128(flat2, flat2_q3p3); q3p3 = _mm_or_si128(q3p3, flat2_q3p3); - _mm_storel_epi64((__m128i *)(s - 4 * p), q3p3); - _mm_storeh_pi((__m64 *)(s + 3 * p), _mm_castsi128_ps(q3p3)); + _mm_storel_epi64((__m128i *)(s - 4 * pitch), q3p3); + _mm_storeh_pi((__m64 *)(s + 3 * pitch), _mm_castsi128_ps(q3p3)); q2p2 = _mm_andnot_si128(flat2, q2p2); flat2_q2p2 = _mm_and_si128(flat2, flat2_q2p2); q2p2 = _mm_or_si128(q2p2, flat2_q2p2); - _mm_storel_epi64((__m128i *)(s - 3 * p), q2p2); - _mm_storeh_pi((__m64 *)(s + 2 * p), _mm_castsi128_ps(q2p2)); + _mm_storel_epi64((__m128i *)(s - 3 * pitch), q2p2); + _mm_storeh_pi((__m64 *)(s + 2 * pitch), _mm_castsi128_ps(q2p2)); q1p1 = _mm_andnot_si128(flat2, q1p1); flat2_q1p1 = _mm_and_si128(flat2, flat2_q1p1); q1p1 = _mm_or_si128(q1p1, flat2_q1p1); - _mm_storel_epi64((__m128i *)(s - 2 * p), q1p1); - _mm_storeh_pi((__m64 *)(s + 1 * p), _mm_castsi128_ps(q1p1)); + _mm_storel_epi64((__m128i *)(s - 2 * pitch), q1p1); + _mm_storeh_pi((__m64 *)(s + 1 * pitch), _mm_castsi128_ps(q1p1)); q0p0 = _mm_andnot_si128(flat2, q0p0); flat2_q0p0 = _mm_and_si128(flat2, flat2_q0p0); q0p0 = _mm_or_si128(q0p0, flat2_q0p0); - _mm_storel_epi64((__m128i *)(s - 1 * p), q0p0); - _mm_storeh_pi((__m64 *)(s - 0 * p), _mm_castsi128_ps(q0p0)); + _mm_storel_epi64((__m128i *)(s - 1 * pitch), q0p0); + _mm_storeh_pi((__m64 *)(s - 0 * pitch), _mm_castsi128_ps(q0p0)); } } @@ -367,10 +367,10 @@ DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = { 8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128 }; -void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { +void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int pitch, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh) { __m128i mask, hev, flat, flat2; const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); @@ -380,32 +380,32 @@ void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int p, __m256i p256_7, q256_7, p256_6, q256_6, p256_5, q256_5, p256_4, q256_4, p256_3, q256_3, p256_2, q256_2, p256_1, q256_1, p256_0, q256_0; - const __m128i thresh = - _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)_thresh[0])); - const __m128i limit = _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)_limit[0])); - const __m128i blimit = - _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)_blimit[0])); + const __m128i thresh_v = + _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)thresh[0])); + const __m128i limit_v = _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)limit[0])); + const __m128i blimit_v = + _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)blimit[0])); - p256_4 = - _mm256_castpd_si256(_mm256_broadcast_pd((__m128d const *)(s - 5 * p))); - p256_3 = - _mm256_castpd_si256(_mm256_broadcast_pd((__m128d const *)(s - 4 * p))); - p256_2 = - _mm256_castpd_si256(_mm256_broadcast_pd((__m128d const *)(s - 3 * p))); - p256_1 = - _mm256_castpd_si256(_mm256_broadcast_pd((__m128d const *)(s - 2 * p))); - p256_0 = - _mm256_castpd_si256(_mm256_broadcast_pd((__m128d const *)(s - 1 * p))); - q256_0 = - _mm256_castpd_si256(_mm256_broadcast_pd((__m128d const *)(s - 0 * p))); - q256_1 = - _mm256_castpd_si256(_mm256_broadcast_pd((__m128d const *)(s + 1 * p))); - q256_2 = - _mm256_castpd_si256(_mm256_broadcast_pd((__m128d const *)(s + 2 * p))); - q256_3 = - _mm256_castpd_si256(_mm256_broadcast_pd((__m128d const *)(s + 3 * p))); - q256_4 = - _mm256_castpd_si256(_mm256_broadcast_pd((__m128d const *)(s + 4 * p))); + p256_4 = _mm256_castpd_si256( + _mm256_broadcast_pd((__m128d const *)(s - 5 * pitch))); + p256_3 = _mm256_castpd_si256( + _mm256_broadcast_pd((__m128d const *)(s - 4 * pitch))); + p256_2 = _mm256_castpd_si256( + _mm256_broadcast_pd((__m128d const *)(s - 3 * pitch))); + p256_1 = _mm256_castpd_si256( + _mm256_broadcast_pd((__m128d const *)(s - 2 * pitch))); + p256_0 = _mm256_castpd_si256( + _mm256_broadcast_pd((__m128d const *)(s - 1 * pitch))); + q256_0 = _mm256_castpd_si256( + _mm256_broadcast_pd((__m128d const *)(s - 0 * pitch))); + q256_1 = _mm256_castpd_si256( + _mm256_broadcast_pd((__m128d const *)(s + 1 * pitch))); + q256_2 = _mm256_castpd_si256( + _mm256_broadcast_pd((__m128d const *)(s + 2 * pitch))); + q256_3 = _mm256_castpd_si256( + _mm256_broadcast_pd((__m128d const *)(s + 3 * pitch))); + q256_4 = _mm256_castpd_si256( + _mm256_broadcast_pd((__m128d const *)(s + 4 * pitch))); p4 = _mm256_castsi256_si128(p256_4); p3 = _mm256_castsi256_si128(p256_3); @@ -423,7 +423,7 @@ void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int p, _mm_or_si128(_mm_subs_epu8(p1, p0), _mm_subs_epu8(p0, p1)); const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), _mm_subs_epu8(q0, q1)); - const __m128i fe = _mm_set1_epi8(0xfe); + const __m128i fe = _mm_set1_epi8((int8_t)0xfe); const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), _mm_subs_epu8(q0, p0)); @@ -431,12 +431,12 @@ void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int p, _mm_or_si128(_mm_subs_epu8(p1, q1), _mm_subs_epu8(q1, p1)); __m128i work; flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); + hev = _mm_subs_epu8(flat, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); + mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; mask = _mm_max_epu8(flat, mask); @@ -450,7 +450,7 @@ void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int p, _mm_or_si128(_mm_subs_epu8(q2, q1), _mm_subs_epu8(q1, q2)), _mm_or_si128(_mm_subs_epu8(q3, q2), _mm_subs_epu8(q2, q3))); mask = _mm_max_epu8(work, mask); - mask = _mm_subs_epu8(mask, limit); + mask = _mm_subs_epu8(mask, limit_v); mask = _mm_cmpeq_epi8(mask, zero); } @@ -458,8 +458,8 @@ void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int p, { const __m128i t4 = _mm_set1_epi8(4); const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i te0 = _mm_set1_epi8(0xe0); + const __m128i t80 = _mm_set1_epi8((int8_t)0x80); + const __m128i te0 = _mm_set1_epi8((int8_t)0xe0); const __m128i t1f = _mm_set1_epi8(0x1f); const __m128i t1 = _mm_set1_epi8(0x1); const __m128i t7f = _mm_set1_epi8(0x7f); @@ -532,9 +532,9 @@ void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int p, flat = _mm_and_si128(flat, mask); p256_5 = _mm256_castpd_si256( - _mm256_broadcast_pd((__m128d const *)(s - 6 * p))); + _mm256_broadcast_pd((__m128d const *)(s - 6 * pitch))); q256_5 = _mm256_castpd_si256( - _mm256_broadcast_pd((__m128d const *)(s + 5 * p))); + _mm256_broadcast_pd((__m128d const *)(s + 5 * pitch))); p5 = _mm256_castsi256_si128(p256_5); q5 = _mm256_castsi256_si128(q256_5); flat2 = _mm_max_epu8( @@ -543,9 +543,9 @@ void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int p, flat2 = _mm_max_epu8(work, flat2); p256_6 = _mm256_castpd_si256( - _mm256_broadcast_pd((__m128d const *)(s - 7 * p))); + _mm256_broadcast_pd((__m128d const *)(s - 7 * pitch))); q256_6 = _mm256_castpd_si256( - _mm256_broadcast_pd((__m128d const *)(s + 6 * p))); + _mm256_broadcast_pd((__m128d const *)(s + 6 * pitch))); p6 = _mm256_castsi256_si128(p256_6); q6 = _mm256_castsi256_si128(q256_6); work = _mm_max_epu8( @@ -555,9 +555,9 @@ void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int p, flat2 = _mm_max_epu8(work, flat2); p256_7 = _mm256_castpd_si256( - _mm256_broadcast_pd((__m128d const *)(s - 8 * p))); + _mm256_broadcast_pd((__m128d const *)(s - 8 * pitch))); q256_7 = _mm256_castpd_si256( - _mm256_broadcast_pd((__m128d const *)(s + 7 * p))); + _mm256_broadcast_pd((__m128d const *)(s + 7 * pitch))); p7 = _mm256_castsi256_si128(p256_7); q7 = _mm256_castsi256_si128(q256_7); work = _mm_max_epu8( @@ -843,71 +843,71 @@ void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int p, p6 = _mm_andnot_si128(flat2, p6); flat2_p6 = _mm_and_si128(flat2, flat2_p6); p6 = _mm_or_si128(flat2_p6, p6); - _mm_storeu_si128((__m128i *)(s - 7 * p), p6); + _mm_storeu_si128((__m128i *)(s - 7 * pitch), p6); p5 = _mm_andnot_si128(flat2, p5); flat2_p5 = _mm_and_si128(flat2, flat2_p5); p5 = _mm_or_si128(flat2_p5, p5); - _mm_storeu_si128((__m128i *)(s - 6 * p), p5); + _mm_storeu_si128((__m128i *)(s - 6 * pitch), p5); p4 = _mm_andnot_si128(flat2, p4); flat2_p4 = _mm_and_si128(flat2, flat2_p4); p4 = _mm_or_si128(flat2_p4, p4); - _mm_storeu_si128((__m128i *)(s - 5 * p), p4); + _mm_storeu_si128((__m128i *)(s - 5 * pitch), p4); p3 = _mm_andnot_si128(flat2, p3); flat2_p3 = _mm_and_si128(flat2, flat2_p3); p3 = _mm_or_si128(flat2_p3, p3); - _mm_storeu_si128((__m128i *)(s - 4 * p), p3); + _mm_storeu_si128((__m128i *)(s - 4 * pitch), p3); p2 = _mm_andnot_si128(flat2, p2); flat2_p2 = _mm_and_si128(flat2, flat2_p2); p2 = _mm_or_si128(flat2_p2, p2); - _mm_storeu_si128((__m128i *)(s - 3 * p), p2); + _mm_storeu_si128((__m128i *)(s - 3 * pitch), p2); p1 = _mm_andnot_si128(flat2, p1); flat2_p1 = _mm_and_si128(flat2, flat2_p1); p1 = _mm_or_si128(flat2_p1, p1); - _mm_storeu_si128((__m128i *)(s - 2 * p), p1); + _mm_storeu_si128((__m128i *)(s - 2 * pitch), p1); p0 = _mm_andnot_si128(flat2, p0); flat2_p0 = _mm_and_si128(flat2, flat2_p0); p0 = _mm_or_si128(flat2_p0, p0); - _mm_storeu_si128((__m128i *)(s - 1 * p), p0); + _mm_storeu_si128((__m128i *)(s - 1 * pitch), p0); q0 = _mm_andnot_si128(flat2, q0); flat2_q0 = _mm_and_si128(flat2, flat2_q0); q0 = _mm_or_si128(flat2_q0, q0); - _mm_storeu_si128((__m128i *)(s - 0 * p), q0); + _mm_storeu_si128((__m128i *)(s - 0 * pitch), q0); q1 = _mm_andnot_si128(flat2, q1); flat2_q1 = _mm_and_si128(flat2, flat2_q1); q1 = _mm_or_si128(flat2_q1, q1); - _mm_storeu_si128((__m128i *)(s + 1 * p), q1); + _mm_storeu_si128((__m128i *)(s + 1 * pitch), q1); q2 = _mm_andnot_si128(flat2, q2); flat2_q2 = _mm_and_si128(flat2, flat2_q2); q2 = _mm_or_si128(flat2_q2, q2); - _mm_storeu_si128((__m128i *)(s + 2 * p), q2); + _mm_storeu_si128((__m128i *)(s + 2 * pitch), q2); q3 = _mm_andnot_si128(flat2, q3); flat2_q3 = _mm_and_si128(flat2, flat2_q3); q3 = _mm_or_si128(flat2_q3, q3); - _mm_storeu_si128((__m128i *)(s + 3 * p), q3); + _mm_storeu_si128((__m128i *)(s + 3 * pitch), q3); q4 = _mm_andnot_si128(flat2, q4); flat2_q4 = _mm_and_si128(flat2, flat2_q4); q4 = _mm_or_si128(flat2_q4, q4); - _mm_storeu_si128((__m128i *)(s + 4 * p), q4); + _mm_storeu_si128((__m128i *)(s + 4 * pitch), q4); q5 = _mm_andnot_si128(flat2, q5); flat2_q5 = _mm_and_si128(flat2, flat2_q5); q5 = _mm_or_si128(flat2_q5, q5); - _mm_storeu_si128((__m128i *)(s + 5 * p), q5); + _mm_storeu_si128((__m128i *)(s + 5 * pitch), q5); q6 = _mm_andnot_si128(flat2, q6); flat2_q6 = _mm_and_si128(flat2, flat2_q6); q6 = _mm_or_si128(flat2_q6, q6); - _mm_storeu_si128((__m128i *)(s + 6 * p), q6); + _mm_storeu_si128((__m128i *)(s + 6 * pitch), q6); } } diff --git a/media/libvpx/libvpx/vpx_dsp/x86/loopfilter_intrin_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/loopfilter_intrin_sse2.c index 28e6fd65f927..f90522cd7d08 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/loopfilter_intrin_sse2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/loopfilter_intrin_sse2.c @@ -13,6 +13,7 @@ #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" #include "vpx_ports/emmintrin_compat.h" +#include "vpx_dsp/x86/mem_sse2.h" static INLINE __m128i abs_diff(__m128i a, __m128i b) { return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a)); @@ -30,7 +31,7 @@ static INLINE __m128i abs_diff(__m128i a, __m128i b) { /* const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); */ \ hev = \ _mm_unpacklo_epi8(_mm_max_epu8(flat, _mm_srli_si128(flat, 8)), zero); \ - hev = _mm_cmpgt_epi16(hev, thresh); \ + hev = _mm_cmpgt_epi16(hev, thresh_v); \ hev = _mm_packs_epi16(hev, hev); \ \ /* const int8_t mask = filter_mask(*limit, *blimit, */ \ @@ -51,7 +52,7 @@ static INLINE __m128i abs_diff(__m128i a, __m128i b) { flat = _mm_max_epu8(work, flat); \ flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); \ mask = _mm_unpacklo_epi64(mask, flat); \ - mask = _mm_subs_epu8(mask, limit); \ + mask = _mm_subs_epu8(mask, limit_v); \ mask = _mm_cmpeq_epi8(mask, zero); \ mask = _mm_and_si128(mask, _mm_srli_si128(mask, 8)); \ } while (0) @@ -60,7 +61,7 @@ static INLINE __m128i abs_diff(__m128i a, __m128i b) { do { \ const __m128i t3t4 = \ _mm_set_epi8(3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4); \ - const __m128i t80 = _mm_set1_epi8(0x80); \ + const __m128i t80 = _mm_set1_epi8((int8_t)0x80); \ __m128i filter, filter2filter1, work; \ \ ps1ps0 = _mm_xor_si128(p1p0, t80); /* ^ 0x80 */ \ @@ -103,27 +104,26 @@ static INLINE __m128i abs_diff(__m128i a, __m128i b) { ps1ps0 = _mm_xor_si128(ps1ps0, t80); /* ^ 0x80 */ \ } while (0) -void vpx_lpf_horizontal_4_sse2(uint8_t *s, int p /* pitch */, - const uint8_t *_blimit, const uint8_t *_limit, - const uint8_t *_thresh) { +void vpx_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { const __m128i zero = _mm_set1_epi16(0); - const __m128i limit = - _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)_blimit), - _mm_loadl_epi64((const __m128i *)_limit)); - const __m128i thresh = - _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh), zero); + const __m128i limit_v = + _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)blimit), + _mm_loadl_epi64((const __m128i *)limit)); + const __m128i thresh_v = + _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)thresh), zero); const __m128i ff = _mm_cmpeq_epi8(zero, zero); __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; __m128i mask, hev; - p3p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)), - _mm_loadl_epi64((__m128i *)(s - 4 * p))); - q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * p)), - _mm_loadl_epi64((__m128i *)(s + 1 * p))); - q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * p)), - _mm_loadl_epi64((__m128i *)(s + 0 * p))); - q3q2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s + 2 * p)), - _mm_loadl_epi64((__m128i *)(s + 3 * p))); + p3p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * pitch)), + _mm_loadl_epi64((__m128i *)(s - 4 * pitch))); + q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * pitch)), + _mm_loadl_epi64((__m128i *)(s + 1 * pitch))); + q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * pitch)), + _mm_loadl_epi64((__m128i *)(s + 0 * pitch))); + q3q2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s + 2 * pitch)), + _mm_loadl_epi64((__m128i *)(s + 3 * pitch))); p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); p2p1 = _mm_unpacklo_epi64(q1p1, p3p2); q1q0 = _mm_unpackhi_epi64(q0p0, q1p1); @@ -132,41 +132,40 @@ void vpx_lpf_horizontal_4_sse2(uint8_t *s, int p /* pitch */, FILTER_HEV_MASK; FILTER4; - _mm_storeh_pi((__m64 *)(s - 2 * p), _mm_castsi128_ps(ps1ps0)); // *op1 - _mm_storel_epi64((__m128i *)(s - 1 * p), ps1ps0); // *op0 - _mm_storel_epi64((__m128i *)(s + 0 * p), qs1qs0); // *oq0 - _mm_storeh_pi((__m64 *)(s + 1 * p), _mm_castsi128_ps(qs1qs0)); // *oq1 + _mm_storeh_pi((__m64 *)(s - 2 * pitch), _mm_castsi128_ps(ps1ps0)); // *op1 + _mm_storel_epi64((__m128i *)(s - 1 * pitch), ps1ps0); // *op0 + _mm_storel_epi64((__m128i *)(s + 0 * pitch), qs1qs0); // *oq0 + _mm_storeh_pi((__m64 *)(s + 1 * pitch), _mm_castsi128_ps(qs1qs0)); // *oq1 } -void vpx_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */, - const uint8_t *_blimit, const uint8_t *_limit, - const uint8_t *_thresh) { +void vpx_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { const __m128i zero = _mm_set1_epi16(0); - const __m128i limit = - _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)_blimit), - _mm_loadl_epi64((const __m128i *)_limit)); - const __m128i thresh = - _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh), zero); + const __m128i limit_v = + _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)blimit), + _mm_loadl_epi64((const __m128i *)limit)); + const __m128i thresh_v = + _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)thresh), zero); const __m128i ff = _mm_cmpeq_epi8(zero, zero); __m128i x0, x1, x2, x3; __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; __m128i mask, hev; // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 - q1q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 0 * p - 4)), - _mm_loadl_epi64((__m128i *)(s + 1 * p - 4))); + q1q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 0 * pitch - 4)), + _mm_loadl_epi64((__m128i *)(s + 1 * pitch - 4))); // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 - x1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 2 * p - 4)), - _mm_loadl_epi64((__m128i *)(s + 3 * p - 4))); + x1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 2 * pitch - 4)), + _mm_loadl_epi64((__m128i *)(s + 3 * pitch - 4))); // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 - x2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 4 * p - 4)), - _mm_loadl_epi64((__m128i *)(s + 5 * p - 4))); + x2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 4 * pitch - 4)), + _mm_loadl_epi64((__m128i *)(s + 5 * pitch - 4))); // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 - x3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 6 * p - 4)), - _mm_loadl_epi64((__m128i *)(s + 7 * p - 4))); + x3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 6 * pitch - 4)), + _mm_loadl_epi64((__m128i *)(s + 7 * pitch - 4))); // Transpose 8x8 // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 @@ -212,69 +211,69 @@ void vpx_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */, // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 ps1ps0 = _mm_unpacklo_epi8(ps1ps0, x0); - *(int *)(s + 0 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + storeu_uint32(s + 0 * pitch - 2, _mm_cvtsi128_si32(ps1ps0)); ps1ps0 = _mm_srli_si128(ps1ps0, 4); - *(int *)(s + 1 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + storeu_uint32(s + 1 * pitch - 2, _mm_cvtsi128_si32(ps1ps0)); ps1ps0 = _mm_srli_si128(ps1ps0, 4); - *(int *)(s + 2 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + storeu_uint32(s + 2 * pitch - 2, _mm_cvtsi128_si32(ps1ps0)); ps1ps0 = _mm_srli_si128(ps1ps0, 4); - *(int *)(s + 3 * p - 2) = _mm_cvtsi128_si32(ps1ps0); + storeu_uint32(s + 3 * pitch - 2, _mm_cvtsi128_si32(ps1ps0)); - *(int *)(s + 4 * p - 2) = _mm_cvtsi128_si32(qs1qs0); + storeu_uint32(s + 4 * pitch - 2, _mm_cvtsi128_si32(qs1qs0)); qs1qs0 = _mm_srli_si128(qs1qs0, 4); - *(int *)(s + 5 * p - 2) = _mm_cvtsi128_si32(qs1qs0); + storeu_uint32(s + 5 * pitch - 2, _mm_cvtsi128_si32(qs1qs0)); qs1qs0 = _mm_srli_si128(qs1qs0, 4); - *(int *)(s + 6 * p - 2) = _mm_cvtsi128_si32(qs1qs0); + storeu_uint32(s + 6 * pitch - 2, _mm_cvtsi128_si32(qs1qs0)); qs1qs0 = _mm_srli_si128(qs1qs0, 4); - *(int *)(s + 7 * p - 2) = _mm_cvtsi128_si32(qs1qs0); + storeu_uint32(s + 7 * pitch - 2, _mm_cvtsi128_si32(qs1qs0)); } -void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { +void vpx_lpf_horizontal_16_sse2(unsigned char *s, int pitch, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh) { const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); - const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); - const __m128i limit = _mm_load_si128((const __m128i *)_limit); - const __m128i thresh = _mm_load_si128((const __m128i *)_thresh); + const __m128i blimit_v = _mm_load_si128((const __m128i *)blimit); + const __m128i limit_v = _mm_load_si128((const __m128i *)limit); + const __m128i thresh_v = _mm_load_si128((const __m128i *)thresh); __m128i mask, hev, flat, flat2; __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; __m128i abs_p1p0; - q4p4 = _mm_loadl_epi64((__m128i *)(s - 5 * p)); + q4p4 = _mm_loadl_epi64((__m128i *)(s - 5 * pitch)); q4p4 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q4p4), (__m64 *)(s + 4 * p))); - q3p3 = _mm_loadl_epi64((__m128i *)(s - 4 * p)); + _mm_loadh_pi(_mm_castsi128_ps(q4p4), (__m64 *)(s + 4 * pitch))); + q3p3 = _mm_loadl_epi64((__m128i *)(s - 4 * pitch)); q3p3 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q3p3), (__m64 *)(s + 3 * p))); - q2p2 = _mm_loadl_epi64((__m128i *)(s - 3 * p)); + _mm_loadh_pi(_mm_castsi128_ps(q3p3), (__m64 *)(s + 3 * pitch))); + q2p2 = _mm_loadl_epi64((__m128i *)(s - 3 * pitch)); q2p2 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q2p2), (__m64 *)(s + 2 * p))); - q1p1 = _mm_loadl_epi64((__m128i *)(s - 2 * p)); + _mm_loadh_pi(_mm_castsi128_ps(q2p2), (__m64 *)(s + 2 * pitch))); + q1p1 = _mm_loadl_epi64((__m128i *)(s - 2 * pitch)); q1p1 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q1p1), (__m64 *)(s + 1 * p))); + _mm_loadh_pi(_mm_castsi128_ps(q1p1), (__m64 *)(s + 1 * pitch))); p1q1 = _mm_shuffle_epi32(q1p1, 78); - q0p0 = _mm_loadl_epi64((__m128i *)(s - 1 * p)); + q0p0 = _mm_loadl_epi64((__m128i *)(s - 1 * pitch)); q0p0 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q0p0), (__m64 *)(s - 0 * p))); + _mm_loadh_pi(_mm_castsi128_ps(q0p0), (__m64 *)(s - 0 * pitch))); p0q0 = _mm_shuffle_epi32(q0p0, 78); { __m128i abs_p1q1, abs_p0q0, abs_q1q0, fe, ff, work; abs_p1p0 = abs_diff(q1p1, q0p0); abs_q1q0 = _mm_srli_si128(abs_p1p0, 8); - fe = _mm_set1_epi8(0xfe); + fe = _mm_set1_epi8((int8_t)0xfe); ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); abs_p0q0 = abs_diff(q0p0, p0q0); abs_p1q1 = abs_diff(q1p1, p1q1); flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); + hev = _mm_subs_epu8(flat, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); + mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; mask = _mm_max_epu8(abs_p1p0, mask); @@ -284,7 +283,7 @@ void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p, work = _mm_max_epu8(abs_diff(q2p2, q1p1), abs_diff(q3p3, q2p2)); mask = _mm_max_epu8(work, mask); mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); - mask = _mm_subs_epu8(mask, limit); + mask = _mm_subs_epu8(mask, limit_v); mask = _mm_cmpeq_epi8(mask, zero); } @@ -292,7 +291,7 @@ void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p, { const __m128i t4 = _mm_set1_epi8(4); const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); + const __m128i t80 = _mm_set1_epi8((int8_t)0x80); const __m128i t1 = _mm_set1_epi16(0x1); __m128i qs1ps1 = _mm_xor_si128(q1p1, t80); __m128i qs0ps0 = _mm_xor_si128(q0p0, t80); @@ -342,18 +341,18 @@ void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p, flat = _mm_cmpeq_epi8(flat, zero); flat = _mm_and_si128(flat, mask); - q5p5 = _mm_loadl_epi64((__m128i *)(s - 6 * p)); + q5p5 = _mm_loadl_epi64((__m128i *)(s - 6 * pitch)); q5p5 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q5p5), (__m64 *)(s + 5 * p))); + _mm_loadh_pi(_mm_castsi128_ps(q5p5), (__m64 *)(s + 5 * pitch))); - q6p6 = _mm_loadl_epi64((__m128i *)(s - 7 * p)); + q6p6 = _mm_loadl_epi64((__m128i *)(s - 7 * pitch)); q6p6 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q6p6), (__m64 *)(s + 6 * p))); + _mm_loadh_pi(_mm_castsi128_ps(q6p6), (__m64 *)(s + 6 * pitch))); flat2 = _mm_max_epu8(abs_diff(q4p4, q0p0), abs_diff(q5p5, q0p0)); - q7p7 = _mm_loadl_epi64((__m128i *)(s - 8 * p)); + q7p7 = _mm_loadl_epi64((__m128i *)(s - 8 * pitch)); q7p7 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q7p7), (__m64 *)(s + 7 * p))); + _mm_loadh_pi(_mm_castsi128_ps(q7p7), (__m64 *)(s + 7 * pitch))); work = _mm_max_epu8(abs_diff(q6p6, q0p0), abs_diff(q7p7, q0p0)); flat2 = _mm_max_epu8(work, flat2); flat2 = _mm_max_epu8(flat2, _mm_srli_si128(flat2, 8)); @@ -520,44 +519,44 @@ void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p, q6p6 = _mm_andnot_si128(flat2, q6p6); flat2_q6p6 = _mm_and_si128(flat2, flat2_q6p6); q6p6 = _mm_or_si128(q6p6, flat2_q6p6); - _mm_storel_epi64((__m128i *)(s - 7 * p), q6p6); - _mm_storeh_pi((__m64 *)(s + 6 * p), _mm_castsi128_ps(q6p6)); + _mm_storel_epi64((__m128i *)(s - 7 * pitch), q6p6); + _mm_storeh_pi((__m64 *)(s + 6 * pitch), _mm_castsi128_ps(q6p6)); q5p5 = _mm_andnot_si128(flat2, q5p5); flat2_q5p5 = _mm_and_si128(flat2, flat2_q5p5); q5p5 = _mm_or_si128(q5p5, flat2_q5p5); - _mm_storel_epi64((__m128i *)(s - 6 * p), q5p5); - _mm_storeh_pi((__m64 *)(s + 5 * p), _mm_castsi128_ps(q5p5)); + _mm_storel_epi64((__m128i *)(s - 6 * pitch), q5p5); + _mm_storeh_pi((__m64 *)(s + 5 * pitch), _mm_castsi128_ps(q5p5)); q4p4 = _mm_andnot_si128(flat2, q4p4); flat2_q4p4 = _mm_and_si128(flat2, flat2_q4p4); q4p4 = _mm_or_si128(q4p4, flat2_q4p4); - _mm_storel_epi64((__m128i *)(s - 5 * p), q4p4); - _mm_storeh_pi((__m64 *)(s + 4 * p), _mm_castsi128_ps(q4p4)); + _mm_storel_epi64((__m128i *)(s - 5 * pitch), q4p4); + _mm_storeh_pi((__m64 *)(s + 4 * pitch), _mm_castsi128_ps(q4p4)); q3p3 = _mm_andnot_si128(flat2, q3p3); flat2_q3p3 = _mm_and_si128(flat2, flat2_q3p3); q3p3 = _mm_or_si128(q3p3, flat2_q3p3); - _mm_storel_epi64((__m128i *)(s - 4 * p), q3p3); - _mm_storeh_pi((__m64 *)(s + 3 * p), _mm_castsi128_ps(q3p3)); + _mm_storel_epi64((__m128i *)(s - 4 * pitch), q3p3); + _mm_storeh_pi((__m64 *)(s + 3 * pitch), _mm_castsi128_ps(q3p3)); q2p2 = _mm_andnot_si128(flat2, q2p2); flat2_q2p2 = _mm_and_si128(flat2, flat2_q2p2); q2p2 = _mm_or_si128(q2p2, flat2_q2p2); - _mm_storel_epi64((__m128i *)(s - 3 * p), q2p2); - _mm_storeh_pi((__m64 *)(s + 2 * p), _mm_castsi128_ps(q2p2)); + _mm_storel_epi64((__m128i *)(s - 3 * pitch), q2p2); + _mm_storeh_pi((__m64 *)(s + 2 * pitch), _mm_castsi128_ps(q2p2)); q1p1 = _mm_andnot_si128(flat2, q1p1); flat2_q1p1 = _mm_and_si128(flat2, flat2_q1p1); q1p1 = _mm_or_si128(q1p1, flat2_q1p1); - _mm_storel_epi64((__m128i *)(s - 2 * p), q1p1); - _mm_storeh_pi((__m64 *)(s + 1 * p), _mm_castsi128_ps(q1p1)); + _mm_storel_epi64((__m128i *)(s - 2 * pitch), q1p1); + _mm_storeh_pi((__m64 *)(s + 1 * pitch), _mm_castsi128_ps(q1p1)); q0p0 = _mm_andnot_si128(flat2, q0p0); flat2_q0p0 = _mm_and_si128(flat2, flat2_q0p0); q0p0 = _mm_or_si128(q0p0, flat2_q0p0); - _mm_storel_epi64((__m128i *)(s - 1 * p), q0p0); - _mm_storeh_pi((__m64 *)(s - 0 * p), _mm_castsi128_ps(q0p0)); + _mm_storel_epi64((__m128i *)(s - 1 * pitch), q0p0); + _mm_storeh_pi((__m64 *)(s - 0 * pitch), _mm_castsi128_ps(q0p0)); } } @@ -591,15 +590,15 @@ static INLINE __m128i filter16_mask(const __m128i *const flat, return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result); } -void vpx_lpf_horizontal_16_dual_sse2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { +void vpx_lpf_horizontal_16_dual_sse2(unsigned char *s, int pitch, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh) { const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); - const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); - const __m128i limit = _mm_load_si128((const __m128i *)_limit); - const __m128i thresh = _mm_load_si128((const __m128i *)_thresh); + const __m128i blimit_v = _mm_load_si128((const __m128i *)blimit); + const __m128i limit_v = _mm_load_si128((const __m128i *)limit); + const __m128i thresh_v = _mm_load_si128((const __m128i *)thresh); __m128i mask, hev, flat, flat2; __m128i p7, p6, p5; __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; @@ -609,27 +608,27 @@ void vpx_lpf_horizontal_16_dual_sse2(unsigned char *s, int p, __m128i max_abs_p1p0q1q0; - p7 = _mm_loadu_si128((__m128i *)(s - 8 * p)); - p6 = _mm_loadu_si128((__m128i *)(s - 7 * p)); - p5 = _mm_loadu_si128((__m128i *)(s - 6 * p)); - p4 = _mm_loadu_si128((__m128i *)(s - 5 * p)); - p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); - p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); - p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); - p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); - q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); - q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); - q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); - q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); - q4 = _mm_loadu_si128((__m128i *)(s + 4 * p)); - q5 = _mm_loadu_si128((__m128i *)(s + 5 * p)); - q6 = _mm_loadu_si128((__m128i *)(s + 6 * p)); - q7 = _mm_loadu_si128((__m128i *)(s + 7 * p)); + p7 = _mm_loadu_si128((__m128i *)(s - 8 * pitch)); + p6 = _mm_loadu_si128((__m128i *)(s - 7 * pitch)); + p5 = _mm_loadu_si128((__m128i *)(s - 6 * pitch)); + p4 = _mm_loadu_si128((__m128i *)(s - 5 * pitch)); + p3 = _mm_loadu_si128((__m128i *)(s - 4 * pitch)); + p2 = _mm_loadu_si128((__m128i *)(s - 3 * pitch)); + p1 = _mm_loadu_si128((__m128i *)(s - 2 * pitch)); + p0 = _mm_loadu_si128((__m128i *)(s - 1 * pitch)); + q0 = _mm_loadu_si128((__m128i *)(s - 0 * pitch)); + q1 = _mm_loadu_si128((__m128i *)(s + 1 * pitch)); + q2 = _mm_loadu_si128((__m128i *)(s + 2 * pitch)); + q3 = _mm_loadu_si128((__m128i *)(s + 3 * pitch)); + q4 = _mm_loadu_si128((__m128i *)(s + 4 * pitch)); + q5 = _mm_loadu_si128((__m128i *)(s + 5 * pitch)); + q6 = _mm_loadu_si128((__m128i *)(s + 6 * pitch)); + q7 = _mm_loadu_si128((__m128i *)(s + 7 * pitch)); { const __m128i abs_p1p0 = abs_diff(p1, p0); const __m128i abs_q1q0 = abs_diff(q1, q0); - const __m128i fe = _mm_set1_epi8(0xfe); + const __m128i fe = _mm_set1_epi8((int8_t)0xfe); const __m128i ff = _mm_cmpeq_epi8(zero, zero); __m128i abs_p0q0 = abs_diff(p0, q0); __m128i abs_p1q1 = abs_diff(p1, q1); @@ -638,7 +637,7 @@ void vpx_lpf_horizontal_16_dual_sse2(unsigned char *s, int p, abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); + mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; mask = _mm_max_epu8(max_abs_p1p0q1q0, mask); @@ -648,7 +647,7 @@ void vpx_lpf_horizontal_16_dual_sse2(unsigned char *s, int p, mask = _mm_max_epu8(work, mask); work = _mm_max_epu8(abs_diff(q2, q1), abs_diff(q3, q2)); mask = _mm_max_epu8(work, mask); - mask = _mm_subs_epu8(mask, limit); + mask = _mm_subs_epu8(mask, limit_v); mask = _mm_cmpeq_epi8(mask, zero); } @@ -678,8 +677,8 @@ void vpx_lpf_horizontal_16_dual_sse2(unsigned char *s, int p, { const __m128i t4 = _mm_set1_epi8(4); const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i te0 = _mm_set1_epi8(0xe0); + const __m128i t80 = _mm_set1_epi8((int8_t)0x80); + const __m128i te0 = _mm_set1_epi8((int8_t)0xe0); const __m128i t1f = _mm_set1_epi8(0x1f); const __m128i t1 = _mm_set1_epi8(0x1); const __m128i t7f = _mm_set1_epi8(0x7f); @@ -694,7 +693,7 @@ void vpx_lpf_horizontal_16_dual_sse2(unsigned char *s, int p, oq0 = _mm_xor_si128(q0, t80); oq1 = _mm_xor_si128(q1, t80); - hev = _mm_subs_epu8(max_abs_p1p0q1q0, thresh); + hev = _mm_subs_epu8(max_abs_p1p0q1q0, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); filt = _mm_and_si128(_mm_subs_epi8(op1, oq1), hev); @@ -851,82 +850,82 @@ void vpx_lpf_horizontal_16_dual_sse2(unsigned char *s, int p, f_hi = _mm_add_epi16(_mm_add_epi16(p5_hi, eight), f_hi); p6 = filter16_mask(&flat2, &p6, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 7 * p), p6); + _mm_storeu_si128((__m128i *)(s - 7 * pitch), p6); f_lo = filter_add2_sub2(&f_lo, &q1_lo, &p5_lo, &p6_lo, &p7_lo); f_hi = filter_add2_sub2(&f_hi, &q1_hi, &p5_hi, &p6_hi, &p7_hi); p5 = filter16_mask(&flat2, &p5, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 6 * p), p5); + _mm_storeu_si128((__m128i *)(s - 6 * pitch), p5); f_lo = filter_add2_sub2(&f_lo, &q2_lo, &p4_lo, &p5_lo, &p7_lo); f_hi = filter_add2_sub2(&f_hi, &q2_hi, &p4_hi, &p5_hi, &p7_hi); p4 = filter16_mask(&flat2, &p4, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 5 * p), p4); + _mm_storeu_si128((__m128i *)(s - 5 * pitch), p4); f_lo = filter_add2_sub2(&f_lo, &q3_lo, &p3_lo, &p4_lo, &p7_lo); f_hi = filter_add2_sub2(&f_hi, &q3_hi, &p3_hi, &p4_hi, &p7_hi); p3 = filter16_mask(&flat2, &p3, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 4 * p), p3); + _mm_storeu_si128((__m128i *)(s - 4 * pitch), p3); f_lo = filter_add2_sub2(&f_lo, &q4_lo, &p2_lo, &p3_lo, &p7_lo); f_hi = filter_add2_sub2(&f_hi, &q4_hi, &p2_hi, &p3_hi, &p7_hi); op2 = filter16_mask(&flat2, &op2, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 3 * p), op2); + _mm_storeu_si128((__m128i *)(s - 3 * pitch), op2); f_lo = filter_add2_sub2(&f_lo, &q5_lo, &p1_lo, &p2_lo, &p7_lo); f_hi = filter_add2_sub2(&f_hi, &q5_hi, &p1_hi, &p2_hi, &p7_hi); op1 = filter16_mask(&flat2, &op1, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 2 * p), op1); + _mm_storeu_si128((__m128i *)(s - 2 * pitch), op1); f_lo = filter_add2_sub2(&f_lo, &q6_lo, &p0_lo, &p1_lo, &p7_lo); f_hi = filter_add2_sub2(&f_hi, &q6_hi, &p0_hi, &p1_hi, &p7_hi); op0 = filter16_mask(&flat2, &op0, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 1 * p), op0); + _mm_storeu_si128((__m128i *)(s - 1 * pitch), op0); f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q0_lo, &p0_lo, &p7_lo); f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q0_hi, &p0_hi, &p7_hi); oq0 = filter16_mask(&flat2, &oq0, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 0 * p), oq0); + _mm_storeu_si128((__m128i *)(s - 0 * pitch), oq0); f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q1_lo, &p6_lo, &q0_lo); f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q1_hi, &p6_hi, &q0_hi); oq1 = filter16_mask(&flat2, &oq1, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 1 * p), oq1); + _mm_storeu_si128((__m128i *)(s + 1 * pitch), oq1); f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q2_lo, &p5_lo, &q1_lo); f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q2_hi, &p5_hi, &q1_hi); oq2 = filter16_mask(&flat2, &oq2, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 2 * p), oq2); + _mm_storeu_si128((__m128i *)(s + 2 * pitch), oq2); f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q3_lo, &p4_lo, &q2_lo); f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q3_hi, &p4_hi, &q2_hi); q3 = filter16_mask(&flat2, &q3, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 3 * p), q3); + _mm_storeu_si128((__m128i *)(s + 3 * pitch), q3); f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q4_lo, &p3_lo, &q3_lo); f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q4_hi, &p3_hi, &q3_hi); q4 = filter16_mask(&flat2, &q4, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 4 * p), q4); + _mm_storeu_si128((__m128i *)(s + 4 * pitch), q4); f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q5_lo, &p2_lo, &q4_lo); f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q5_hi, &p2_hi, &q4_hi); q5 = filter16_mask(&flat2, &q5, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 5 * p), q5); + _mm_storeu_si128((__m128i *)(s + 5 * pitch), q5); f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q6_lo, &p1_lo, &q5_lo); f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q6_hi, &p1_hi, &q5_hi); q6 = filter16_mask(&flat2, &q6, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 6 * p), q6); + _mm_storeu_si128((__m128i *)(s + 6 * pitch), q6); } // wide flat // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } } -void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { +void vpx_lpf_horizontal_8_sse2(unsigned char *s, int pitch, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh) { DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); @@ -934,28 +933,28 @@ void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]); DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); const __m128i zero = _mm_set1_epi16(0); - const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); - const __m128i limit = _mm_load_si128((const __m128i *)_limit); - const __m128i thresh = _mm_load_si128((const __m128i *)_thresh); + const __m128i blimit_v = _mm_load_si128((const __m128i *)blimit); + const __m128i limit_v = _mm_load_si128((const __m128i *)limit); + const __m128i thresh_v = _mm_load_si128((const __m128i *)thresh); __m128i mask, hev, flat; __m128i p3, p2, p1, p0, q0, q1, q2, q3; __m128i q3p3, q2p2, q1p1, q0p0, p1q1, p0q0; - q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * p)), - _mm_loadl_epi64((__m128i *)(s + 3 * p))); - q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)), - _mm_loadl_epi64((__m128i *)(s + 2 * p))); - q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * p)), - _mm_loadl_epi64((__m128i *)(s + 1 * p))); - q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * p)), - _mm_loadl_epi64((__m128i *)(s - 0 * p))); + q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * pitch)), + _mm_loadl_epi64((__m128i *)(s + 3 * pitch))); + q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * pitch)), + _mm_loadl_epi64((__m128i *)(s + 2 * pitch))); + q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * pitch)), + _mm_loadl_epi64((__m128i *)(s + 1 * pitch))); + q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * pitch)), + _mm_loadl_epi64((__m128i *)(s - 0 * pitch))); p1q1 = _mm_shuffle_epi32(q1p1, 78); p0q0 = _mm_shuffle_epi32(q0p0, 78); { // filter_mask and hev_mask const __m128i one = _mm_set1_epi8(1); - const __m128i fe = _mm_set1_epi8(0xfe); + const __m128i fe = _mm_set1_epi8((int8_t)0xfe); const __m128i ff = _mm_cmpeq_epi8(fe, fe); __m128i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work; abs_p1p0 = abs_diff(q1p1, q0p0); @@ -964,12 +963,12 @@ void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, abs_p0q0 = abs_diff(q0p0, p0q0); abs_p1q1 = abs_diff(q1p1, p1q1); flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); + hev = _mm_subs_epu8(flat, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); + mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; mask = _mm_max_epu8(abs_p1p0, mask); @@ -979,7 +978,7 @@ void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, work = _mm_max_epu8(abs_diff(q2p2, q1p1), abs_diff(q3p3, q2p2)); mask = _mm_max_epu8(work, mask); mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); - mask = _mm_subs_epu8(mask, limit); + mask = _mm_subs_epu8(mask, limit_v); mask = _mm_cmpeq_epi8(mask, zero); // flat_mask4 @@ -997,14 +996,22 @@ void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, unsigned char *src = s; { __m128i workp_a, workp_b, workp_shft; - p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero); - p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero); - p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero); - p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero); - q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero); - q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero); - q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero); - q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero); + p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * pitch)), + zero); + p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * pitch)), + zero); + p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * pitch)), + zero); + p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * pitch)), + zero); + q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * pitch)), + zero); + q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * pitch)), + zero); + q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * pitch)), + zero); + q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * pitch)), + zero); workp_a = _mm_add_epi16(_mm_add_epi16(p3, p3), _mm_add_epi16(p2, p1)); workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0); @@ -1047,16 +1054,16 @@ void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, { const __m128i t4 = _mm_set1_epi8(4); const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); + const __m128i t80 = _mm_set1_epi8((int8_t)0x80); const __m128i t1 = _mm_set1_epi8(0x1); const __m128i ps1 = - _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s - 2 * p)), t80); + _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s - 2 * pitch)), t80); const __m128i ps0 = - _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s - 1 * p)), t80); + _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s - 1 * pitch)), t80); const __m128i qs0 = - _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s + 0 * p)), t80); + _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s + 0 * pitch)), t80); const __m128i qs1 = - _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s + 1 * p)), t80); + _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s + 1 * pitch)), t80); __m128i filt; __m128i work_a; __m128i filter1, filter2; @@ -1102,7 +1109,7 @@ void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, q1 = _mm_and_si128(flat, q1); q1 = _mm_or_si128(work_a, q1); - work_a = _mm_loadu_si128((__m128i *)(s + 2 * p)); + work_a = _mm_loadu_si128((__m128i *)(s + 2 * pitch)); q2 = _mm_loadl_epi64((__m128i *)flat_oq2); work_a = _mm_andnot_si128(flat, work_a); q2 = _mm_and_si128(flat, q2); @@ -1120,27 +1127,25 @@ void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, p1 = _mm_and_si128(flat, p1); p1 = _mm_or_si128(work_a, p1); - work_a = _mm_loadu_si128((__m128i *)(s - 3 * p)); + work_a = _mm_loadu_si128((__m128i *)(s - 3 * pitch)); p2 = _mm_loadl_epi64((__m128i *)flat_op2); work_a = _mm_andnot_si128(flat, work_a); p2 = _mm_and_si128(flat, p2); p2 = _mm_or_si128(work_a, p2); - _mm_storel_epi64((__m128i *)(s - 3 * p), p2); - _mm_storel_epi64((__m128i *)(s - 2 * p), p1); - _mm_storel_epi64((__m128i *)(s - 1 * p), p0); - _mm_storel_epi64((__m128i *)(s + 0 * p), q0); - _mm_storel_epi64((__m128i *)(s + 1 * p), q1); - _mm_storel_epi64((__m128i *)(s + 2 * p), q2); + _mm_storel_epi64((__m128i *)(s - 3 * pitch), p2); + _mm_storel_epi64((__m128i *)(s - 2 * pitch), p1); + _mm_storel_epi64((__m128i *)(s - 1 * pitch), p0); + _mm_storel_epi64((__m128i *)(s + 0 * pitch), q0); + _mm_storel_epi64((__m128i *)(s + 1 * pitch), q1); + _mm_storel_epi64((__m128i *)(s + 2 * pitch), q2); } } -void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, const uint8_t *_blimit0, - const uint8_t *_limit0, - const uint8_t *_thresh0, - const uint8_t *_blimit1, - const uint8_t *_limit1, - const uint8_t *_thresh1) { +void vpx_lpf_horizontal_8_dual_sse2( + uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, + const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, + const uint8_t *thresh1) { DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); @@ -1149,33 +1154,33 @@ void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, const uint8_t *_blimit0, DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); const __m128i zero = _mm_set1_epi16(0); const __m128i blimit = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0), - _mm_load_si128((const __m128i *)_blimit1)); + _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)blimit0), + _mm_load_si128((const __m128i *)blimit1)); const __m128i limit = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_limit0), - _mm_load_si128((const __m128i *)_limit1)); + _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)limit0), + _mm_load_si128((const __m128i *)limit1)); const __m128i thresh = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_thresh0), - _mm_load_si128((const __m128i *)_thresh1)); + _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)thresh0), + _mm_load_si128((const __m128i *)thresh1)); __m128i mask, hev, flat; __m128i p3, p2, p1, p0, q0, q1, q2, q3; - p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); - p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); - p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); - p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); - q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); - q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); - q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); - q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); + p3 = _mm_loadu_si128((__m128i *)(s - 4 * pitch)); + p2 = _mm_loadu_si128((__m128i *)(s - 3 * pitch)); + p1 = _mm_loadu_si128((__m128i *)(s - 2 * pitch)); + p0 = _mm_loadu_si128((__m128i *)(s - 1 * pitch)); + q0 = _mm_loadu_si128((__m128i *)(s - 0 * pitch)); + q1 = _mm_loadu_si128((__m128i *)(s + 1 * pitch)); + q2 = _mm_loadu_si128((__m128i *)(s + 2 * pitch)); + q3 = _mm_loadu_si128((__m128i *)(s + 3 * pitch)); { const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), _mm_subs_epu8(p0, p1)); const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), _mm_subs_epu8(q0, q1)); const __m128i one = _mm_set1_epi8(1); - const __m128i fe = _mm_set1_epi8(0xfe); + const __m128i fe = _mm_set1_epi8((int8_t)0xfe); const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), _mm_subs_epu8(q0, p0)); @@ -1227,14 +1232,22 @@ void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, const uint8_t *_blimit0, do { __m128i workp_a, workp_b, workp_shft; - p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero); - p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero); - p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero); - p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero); - q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero); - q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero); - q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero); - q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero); + p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * pitch)), + zero); + p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * pitch)), + zero); + p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * pitch)), + zero); + p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * pitch)), + zero); + q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * pitch)), + zero); + q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * pitch)), + zero); + q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * pitch)), + zero); + q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * pitch)), + zero); workp_a = _mm_add_epi16(_mm_add_epi16(p3, p3), _mm_add_epi16(p2, p1)); workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0); @@ -1279,20 +1292,20 @@ void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, const uint8_t *_blimit0, { const __m128i t4 = _mm_set1_epi8(4); const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i te0 = _mm_set1_epi8(0xe0); + const __m128i t80 = _mm_set1_epi8((int8_t)0x80); + const __m128i te0 = _mm_set1_epi8((int8_t)0xe0); const __m128i t1f = _mm_set1_epi8(0x1f); const __m128i t1 = _mm_set1_epi8(0x1); const __m128i t7f = _mm_set1_epi8(0x7f); const __m128i ps1 = - _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)), t80); + _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * pitch)), t80); const __m128i ps0 = - _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)), t80); + _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * pitch)), t80); const __m128i qs0 = - _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)), t80); + _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * pitch)), t80); const __m128i qs1 = - _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)), t80); + _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * pitch)), t80); __m128i filt; __m128i work_a; __m128i filter1, filter2; @@ -1344,7 +1357,7 @@ void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, const uint8_t *_blimit0, q1 = _mm_and_si128(flat, q1); q1 = _mm_or_si128(work_a, q1); - work_a = _mm_loadu_si128((__m128i *)(s + 2 * p)); + work_a = _mm_loadu_si128((__m128i *)(s + 2 * pitch)); q2 = _mm_load_si128((__m128i *)flat_oq2); work_a = _mm_andnot_si128(flat, work_a); q2 = _mm_and_si128(flat, q2); @@ -1362,49 +1375,49 @@ void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, const uint8_t *_blimit0, p1 = _mm_and_si128(flat, p1); p1 = _mm_or_si128(work_a, p1); - work_a = _mm_loadu_si128((__m128i *)(s - 3 * p)); + work_a = _mm_loadu_si128((__m128i *)(s - 3 * pitch)); p2 = _mm_load_si128((__m128i *)flat_op2); work_a = _mm_andnot_si128(flat, work_a); p2 = _mm_and_si128(flat, p2); p2 = _mm_or_si128(work_a, p2); - _mm_storeu_si128((__m128i *)(s - 3 * p), p2); - _mm_storeu_si128((__m128i *)(s - 2 * p), p1); - _mm_storeu_si128((__m128i *)(s - 1 * p), p0); - _mm_storeu_si128((__m128i *)(s + 0 * p), q0); - _mm_storeu_si128((__m128i *)(s + 1 * p), q1); - _mm_storeu_si128((__m128i *)(s + 2 * p), q2); + _mm_storeu_si128((__m128i *)(s - 3 * pitch), p2); + _mm_storeu_si128((__m128i *)(s - 2 * pitch), p1); + _mm_storeu_si128((__m128i *)(s - 1 * pitch), p0); + _mm_storeu_si128((__m128i *)(s + 0 * pitch), q0); + _mm_storeu_si128((__m128i *)(s + 1 * pitch), q1); + _mm_storeu_si128((__m128i *)(s + 2 * pitch), q2); } } -void vpx_lpf_horizontal_4_dual_sse2(unsigned char *s, int p, - const unsigned char *_blimit0, - const unsigned char *_limit0, - const unsigned char *_thresh0, - const unsigned char *_blimit1, - const unsigned char *_limit1, - const unsigned char *_thresh1) { +void vpx_lpf_horizontal_4_dual_sse2(unsigned char *s, int pitch, + const unsigned char *blimit0, + const unsigned char *limit0, + const unsigned char *thresh0, + const unsigned char *blimit1, + const unsigned char *limit1, + const unsigned char *thresh1) { const __m128i blimit = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0), - _mm_load_si128((const __m128i *)_blimit1)); + _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)blimit0), + _mm_load_si128((const __m128i *)blimit1)); const __m128i limit = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_limit0), - _mm_load_si128((const __m128i *)_limit1)); + _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)limit0), + _mm_load_si128((const __m128i *)limit1)); const __m128i thresh = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_thresh0), - _mm_load_si128((const __m128i *)_thresh1)); + _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)thresh0), + _mm_load_si128((const __m128i *)thresh1)); const __m128i zero = _mm_set1_epi16(0); __m128i p3, p2, p1, p0, q0, q1, q2, q3; __m128i mask, hev, flat; - p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); - p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); - p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); - p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); - q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); - q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); - q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); - q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); + p3 = _mm_loadu_si128((__m128i *)(s - 4 * pitch)); + p2 = _mm_loadu_si128((__m128i *)(s - 3 * pitch)); + p1 = _mm_loadu_si128((__m128i *)(s - 2 * pitch)); + p0 = _mm_loadu_si128((__m128i *)(s - 1 * pitch)); + q0 = _mm_loadu_si128((__m128i *)(s - 0 * pitch)); + q1 = _mm_loadu_si128((__m128i *)(s + 1 * pitch)); + q2 = _mm_loadu_si128((__m128i *)(s + 2 * pitch)); + q3 = _mm_loadu_si128((__m128i *)(s + 3 * pitch)); // filter_mask and hev_mask { @@ -1412,7 +1425,7 @@ void vpx_lpf_horizontal_4_dual_sse2(unsigned char *s, int p, _mm_or_si128(_mm_subs_epu8(p1, p0), _mm_subs_epu8(p0, p1)); const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), _mm_subs_epu8(q0, q1)); - const __m128i fe = _mm_set1_epi8(0xfe); + const __m128i fe = _mm_set1_epi8((int8_t)0xfe); const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), _mm_subs_epu8(q0, p0)); @@ -1448,20 +1461,20 @@ void vpx_lpf_horizontal_4_dual_sse2(unsigned char *s, int p, { const __m128i t4 = _mm_set1_epi8(4); const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i te0 = _mm_set1_epi8(0xe0); + const __m128i t80 = _mm_set1_epi8((int8_t)0x80); + const __m128i te0 = _mm_set1_epi8((int8_t)0xe0); const __m128i t1f = _mm_set1_epi8(0x1f); const __m128i t1 = _mm_set1_epi8(0x1); const __m128i t7f = _mm_set1_epi8(0x7f); const __m128i ps1 = - _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)), t80); + _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * pitch)), t80); const __m128i ps0 = - _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)), t80); + _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * pitch)), t80); const __m128i qs0 = - _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)), t80); + _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * pitch)), t80); const __m128i qs1 = - _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)), t80); + _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * pitch)), t80); __m128i filt; __m128i work_a; __m128i filter1, filter2; @@ -1506,10 +1519,10 @@ void vpx_lpf_horizontal_4_dual_sse2(unsigned char *s, int p, p0 = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); p1 = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); - _mm_storeu_si128((__m128i *)(s - 2 * p), p1); - _mm_storeu_si128((__m128i *)(s - 1 * p), p0); - _mm_storeu_si128((__m128i *)(s + 0 * p), q0); - _mm_storeu_si128((__m128i *)(s + 1 * p), q1); + _mm_storeu_si128((__m128i *)(s - 2 * pitch), p1); + _mm_storeu_si128((__m128i *)(s - 1 * pitch), p0); + _mm_storeu_si128((__m128i *)(s + 0 * pitch), q0); + _mm_storeu_si128((__m128i *)(s + 1 * pitch), q1); } } @@ -1626,16 +1639,12 @@ static INLINE void transpose(unsigned char *src[], int in_p, x5 = _mm_unpacklo_epi16(x2, x3); // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 x6 = _mm_unpacklo_epi32(x4, x5); - _mm_storel_pd((double *)(out + 0 * out_p), - _mm_castsi128_pd(x6)); // 00 10 20 30 40 50 60 70 - _mm_storeh_pd((double *)(out + 1 * out_p), - _mm_castsi128_pd(x6)); // 01 11 21 31 41 51 61 71 + mm_storelu(out + 0 * out_p, x6); // 00 10 20 30 40 50 60 70 + mm_storehu(out + 1 * out_p, x6); // 01 11 21 31 41 51 61 71 // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 x7 = _mm_unpackhi_epi32(x4, x5); - _mm_storel_pd((double *)(out + 2 * out_p), - _mm_castsi128_pd(x7)); // 02 12 22 32 42 52 62 72 - _mm_storeh_pd((double *)(out + 3 * out_p), - _mm_castsi128_pd(x7)); // 03 13 23 33 43 53 63 73 + mm_storelu(out + 2 * out_p, x7); // 02 12 22 32 42 52 62 72 + mm_storehu(out + 3 * out_p, x7); // 03 13 23 33 43 53 63 73 // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 x4 = _mm_unpackhi_epi16(x0, x1); @@ -1643,21 +1652,17 @@ static INLINE void transpose(unsigned char *src[], int in_p, x5 = _mm_unpackhi_epi16(x2, x3); // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 x6 = _mm_unpacklo_epi32(x4, x5); - _mm_storel_pd((double *)(out + 4 * out_p), - _mm_castsi128_pd(x6)); // 04 14 24 34 44 54 64 74 - _mm_storeh_pd((double *)(out + 5 * out_p), - _mm_castsi128_pd(x6)); // 05 15 25 35 45 55 65 75 + mm_storelu(out + 4 * out_p, x6); // 04 14 24 34 44 54 64 74 + mm_storehu(out + 5 * out_p, x6); // 05 15 25 35 45 55 65 75 // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 x7 = _mm_unpackhi_epi32(x4, x5); - _mm_storel_pd((double *)(out + 6 * out_p), - _mm_castsi128_pd(x7)); // 06 16 26 36 46 56 66 76 - _mm_storeh_pd((double *)(out + 7 * out_p), - _mm_castsi128_pd(x7)); // 07 17 27 37 47 57 67 77 + mm_storelu(out + 6 * out_p, x7); // 06 16 26 36 46 56 66 76 + mm_storehu(out + 7 * out_p, x7); // 07 17 27 37 47 57 67 77 } while (++idx8x8 < num_8x8_to_transpose); } -void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, +void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { @@ -1666,7 +1671,7 @@ void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, unsigned char *dst[2]; // Transpose 8x16 - transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); + transpose8x16(s - 4, s - 4 + pitch * 8, pitch, t_dst, 16); // Loop filtering vpx_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, @@ -1674,13 +1679,13 @@ void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, src[0] = t_dst; src[1] = t_dst + 8; dst[0] = s - 4; - dst[1] = s - 4 + p * 8; + dst[1] = s - 4 + pitch * 8; // Transpose back - transpose(src, 16, dst, p, 2); + transpose(src, 16, dst, pitch, 2); } -void vpx_lpf_vertical_8_sse2(unsigned char *s, int p, +void vpx_lpf_vertical_8_sse2(unsigned char *s, int pitch, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh) { @@ -1692,7 +1697,7 @@ void vpx_lpf_vertical_8_sse2(unsigned char *s, int p, src[0] = s - 4; dst[0] = t_dst; - transpose(src, p, dst, 8, 1); + transpose(src, pitch, dst, 8, 1); // Loop filtering vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh); @@ -1701,10 +1706,10 @@ void vpx_lpf_vertical_8_sse2(unsigned char *s, int p, dst[0] = s - 4; // Transpose back - transpose(src, 8, dst, p, 1); + transpose(src, 8, dst, pitch, 1); } -void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, +void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { @@ -1713,7 +1718,7 @@ void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, unsigned char *dst[2]; // Transpose 8x16 - transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); + transpose8x16(s - 4, s - 4 + pitch * 8, pitch, t_dst, 16); // Loop filtering vpx_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, @@ -1722,13 +1727,13 @@ void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, src[1] = t_dst + 8; dst[0] = s - 4; - dst[1] = s - 4 + p * 8; + dst[1] = s - 4 + pitch * 8; // Transpose back - transpose(src, 16, dst, p, 2); + transpose(src, 16, dst, pitch, 2); } -void vpx_lpf_vertical_16_sse2(unsigned char *s, int p, +void vpx_lpf_vertical_16_sse2(unsigned char *s, int pitch, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh) { @@ -1742,7 +1747,7 @@ void vpx_lpf_vertical_16_sse2(unsigned char *s, int p, dst[1] = t_dst + 8 * 8; // Transpose 16x8 - transpose(src, p, dst, 8, 2); + transpose(src, pitch, dst, 8, 2); // Loop filtering vpx_lpf_horizontal_16_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh); @@ -1753,22 +1758,22 @@ void vpx_lpf_vertical_16_sse2(unsigned char *s, int p, dst[1] = s; // Transpose back - transpose(src, 8, dst, p, 2); + transpose(src, 8, dst, pitch, 2); } -void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p, +void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { DECLARE_ALIGNED(16, unsigned char, t_dst[256]); // Transpose 16x16 - transpose8x16(s - 8, s - 8 + 8 * p, p, t_dst, 16); - transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); + transpose8x16(s - 8, s - 8 + 8 * pitch, pitch, t_dst, 16); + transpose8x16(s, s + 8 * pitch, pitch, t_dst + 8 * 16, 16); // Loop filtering vpx_lpf_horizontal_16_dual_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh); // Transpose back - transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); - transpose8x16(t_dst + 8, t_dst + 8 + 8 * 16, 16, s - 8 + 8 * p, p); + transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, pitch); + transpose8x16(t_dst + 8, t_dst + 8 + 8 * 16, 16, s - 8 + 8 * pitch, pitch); } diff --git a/media/libvpx/libvpx/vpx_dsp/x86/mem_sse2.h b/media/libvpx/libvpx/vpx_dsp/x86/mem_sse2.h index 2ce738fb770f..258ab38e606e 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/mem_sse2.h +++ b/media/libvpx/libvpx/vpx_dsp/x86/mem_sse2.h @@ -8,13 +8,43 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_X86_MEM_SSE2_H_ -#define VPX_DSP_X86_MEM_SSE2_H_ +#ifndef VPX_VPX_DSP_X86_MEM_SSE2_H_ +#define VPX_VPX_DSP_X86_MEM_SSE2_H_ #include // SSE2 +#include #include "./vpx_config.h" +static INLINE void storeu_uint32(void *dst, uint32_t v) { + memcpy(dst, &v, sizeof(v)); +} + +static INLINE uint32_t loadu_uint32(const void *src) { + uint32_t v; + memcpy(&v, src, sizeof(v)); + return v; +} + +static INLINE __m128i load_unaligned_u32(const void *a) { + uint32_t val; + memcpy(&val, a, sizeof(val)); + return _mm_cvtsi32_si128(val); +} + +static INLINE void store_unaligned_u32(void *const a, const __m128i v) { + const uint32_t val = _mm_cvtsi128_si32(v); + memcpy(a, &val, sizeof(val)); +} + +#define mm_storelu(dst, v) memcpy((dst), (const char *)&(v), 8) +#define mm_storehu(dst, v) memcpy((dst), (const char *)&(v) + 8, 8) + +static INLINE __m128i loadh_epi64(const __m128i s, const void *const src) { + return _mm_castps_si128( + _mm_loadh_pi(_mm_castsi128_ps(s), (const __m64 *)src)); +} + static INLINE void load_8bit_4x4(const uint8_t *const s, const ptrdiff_t stride, __m128i *const d) { d[0] = _mm_cvtsi32_si128(*(const int *)(s + 0 * stride)); @@ -121,4 +151,4 @@ static INLINE void storeu_8bit_16x4(const __m128i *const s, uint8_t *const d, _mm_storeu_si128((__m128i *)(d + 3 * stride), s[3]); } -#endif // VPX_DSP_X86_MEM_SSE2_H_ +#endif // VPX_VPX_DSP_X86_MEM_SSE2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/post_proc_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/post_proc_sse2.c new file mode 100644 index 000000000000..d1029afc4fe1 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/x86/post_proc_sse2.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include + +#include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/x86/mem_sse2.h" + +extern const int16_t vpx_rv[]; + +void vpx_mbpost_proc_down_sse2(unsigned char *dst, int pitch, int rows, + int cols, int flimit) { + int col; + const __m128i zero = _mm_setzero_si128(); + const __m128i f = _mm_set1_epi32(flimit); + DECLARE_ALIGNED(16, int16_t, above_context[8 * 8]); + + // 8 columns are processed at a time. + // If rows is less than 8 the bottom border extension fails. + assert(cols % 8 == 0); + assert(rows >= 8); + + for (col = 0; col < cols; col += 8) { + int row, i; + __m128i s = _mm_loadl_epi64((__m128i *)dst); + __m128i sum, sumsq_0, sumsq_1; + __m128i tmp_0, tmp_1; + __m128i below_context; + + s = _mm_unpacklo_epi8(s, zero); + + for (i = 0; i < 8; ++i) { + _mm_store_si128((__m128i *)above_context + i, s); + } + + // sum *= 9 + sum = _mm_slli_epi16(s, 3); + sum = _mm_add_epi16(s, sum); + + // sum^2 * 9 == (sum * 9) * sum + tmp_0 = _mm_mullo_epi16(sum, s); + tmp_1 = _mm_mulhi_epi16(sum, s); + + sumsq_0 = _mm_unpacklo_epi16(tmp_0, tmp_1); + sumsq_1 = _mm_unpackhi_epi16(tmp_0, tmp_1); + + // Prime sum/sumsq + for (i = 1; i <= 6; ++i) { + __m128i a = _mm_loadl_epi64((__m128i *)(dst + i * pitch)); + a = _mm_unpacklo_epi8(a, zero); + sum = _mm_add_epi16(sum, a); + a = _mm_mullo_epi16(a, a); + sumsq_0 = _mm_add_epi32(sumsq_0, _mm_unpacklo_epi16(a, zero)); + sumsq_1 = _mm_add_epi32(sumsq_1, _mm_unpackhi_epi16(a, zero)); + } + + for (row = 0; row < rows + 8; row++) { + const __m128i above = + _mm_load_si128((__m128i *)above_context + (row & 7)); + __m128i this_row = _mm_loadl_epi64((__m128i *)(dst + row * pitch)); + __m128i above_sq, below_sq; + __m128i mask_0, mask_1; + __m128i multmp_0, multmp_1; + __m128i rv; + __m128i out; + + this_row = _mm_unpacklo_epi8(this_row, zero); + + if (row + 7 < rows) { + // Instead of copying the end context we just stop loading when we get + // to the last one. + below_context = _mm_loadl_epi64((__m128i *)(dst + (row + 7) * pitch)); + below_context = _mm_unpacklo_epi8(below_context, zero); + } + + sum = _mm_sub_epi16(sum, above); + sum = _mm_add_epi16(sum, below_context); + + // context^2 fits in 16 bits. Don't need to mulhi and combine. Just zero + // extend. Unfortunately we can't do below_sq - above_sq in 16 bits + // because x86 does not have unpack with sign extension. + above_sq = _mm_mullo_epi16(above, above); + sumsq_0 = _mm_sub_epi32(sumsq_0, _mm_unpacklo_epi16(above_sq, zero)); + sumsq_1 = _mm_sub_epi32(sumsq_1, _mm_unpackhi_epi16(above_sq, zero)); + + below_sq = _mm_mullo_epi16(below_context, below_context); + sumsq_0 = _mm_add_epi32(sumsq_0, _mm_unpacklo_epi16(below_sq, zero)); + sumsq_1 = _mm_add_epi32(sumsq_1, _mm_unpackhi_epi16(below_sq, zero)); + + // sumsq * 16 - sumsq == sumsq * 15 + mask_0 = _mm_slli_epi32(sumsq_0, 4); + mask_0 = _mm_sub_epi32(mask_0, sumsq_0); + mask_1 = _mm_slli_epi32(sumsq_1, 4); + mask_1 = _mm_sub_epi32(mask_1, sumsq_1); + + multmp_0 = _mm_mullo_epi16(sum, sum); + multmp_1 = _mm_mulhi_epi16(sum, sum); + + mask_0 = _mm_sub_epi32(mask_0, _mm_unpacklo_epi16(multmp_0, multmp_1)); + mask_1 = _mm_sub_epi32(mask_1, _mm_unpackhi_epi16(multmp_0, multmp_1)); + + // mask - f gives a negative value when mask < f + mask_0 = _mm_sub_epi32(mask_0, f); + mask_1 = _mm_sub_epi32(mask_1, f); + + // Shift the sign bit down to create a mask + mask_0 = _mm_srai_epi32(mask_0, 31); + mask_1 = _mm_srai_epi32(mask_1, 31); + + mask_0 = _mm_packs_epi32(mask_0, mask_1); + + rv = _mm_loadu_si128((__m128i const *)(vpx_rv + (row & 127))); + + mask_1 = _mm_add_epi16(rv, sum); + mask_1 = _mm_add_epi16(mask_1, this_row); + mask_1 = _mm_srai_epi16(mask_1, 4); + + mask_1 = _mm_and_si128(mask_0, mask_1); + mask_0 = _mm_andnot_si128(mask_0, this_row); + out = _mm_or_si128(mask_1, mask_0); + + _mm_storel_epi64((__m128i *)(dst + row * pitch), + _mm_packus_epi16(out, zero)); + + _mm_store_si128((__m128i *)above_context + ((row + 8) & 7), this_row); + } + + dst += 8; + } +} diff --git a/media/libvpx/libvpx/vpx_dsp/x86/quantize_avx.c b/media/libvpx/libvpx/vpx_dsp/x86/quantize_avx.c index 6f4489004dc1..0a91d36eaf9c 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/quantize_avx.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/quantize_avx.c @@ -17,15 +17,16 @@ #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/x86/bitdepth_conversion_sse2.h" -#include "vpx_dsp/x86/quantize_x86.h" +#include "vpx_dsp/x86/quantize_sse2.h" +#include "vpx_dsp/x86/quantize_ssse3.h" void vpx_quantize_b_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan_ptr, - const int16_t *iscan_ptr) { + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { const __m128i zero = _mm_setzero_si128(); const __m256i big_zero = _mm256_setzero_si256(); int index; @@ -37,7 +38,7 @@ void vpx_quantize_b_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, __m128i all_zero; __m128i eob = zero, eob0; - (void)scan_ptr; + (void)scan; (void)skip_block; assert(!skip_block); @@ -90,15 +91,12 @@ void vpx_quantize_b_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, store_tran_low(qcoeff0, qcoeff_ptr); store_tran_low(qcoeff1, qcoeff_ptr + 8); - coeff0 = calculate_dqcoeff(qcoeff0, dequant); + calculate_dqcoeff_and_store(qcoeff0, dequant, dqcoeff_ptr); dequant = _mm_unpackhi_epi64(dequant, dequant); - coeff1 = calculate_dqcoeff(qcoeff1, dequant); + calculate_dqcoeff_and_store(qcoeff1, dequant, dqcoeff_ptr + 8); - store_tran_low(coeff0, dqcoeff_ptr); - store_tran_low(coeff1, dqcoeff_ptr + 8); - - eob = scan_for_eob(&coeff0, &coeff1, cmp_mask0, cmp_mask1, iscan_ptr, 0, - zero); + eob = + scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, 0, zero); } // AC only loop. @@ -135,26 +133,25 @@ void vpx_quantize_b_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, store_tran_low(qcoeff0, qcoeff_ptr + index); store_tran_low(qcoeff1, qcoeff_ptr + index + 8); - coeff0 = calculate_dqcoeff(qcoeff0, dequant); - coeff1 = calculate_dqcoeff(qcoeff1, dequant); + calculate_dqcoeff_and_store(qcoeff0, dequant, dqcoeff_ptr + index); + calculate_dqcoeff_and_store(qcoeff1, dequant, dqcoeff_ptr + index + 8); - store_tran_low(coeff0, dqcoeff_ptr + index); - store_tran_low(coeff1, dqcoeff_ptr + index + 8); - - eob0 = scan_for_eob(&coeff0, &coeff1, cmp_mask0, cmp_mask1, iscan_ptr, - index, zero); + eob0 = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, index, + zero); eob = _mm_max_epi16(eob, eob0); } *eob_ptr = accumulate_eob(eob); } -void vpx_quantize_b_32x32_avx( - const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, - const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan_ptr, const int16_t *iscan_ptr) { +void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, + const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan) { const __m128i zero = _mm_setzero_si128(); const __m128i one = _mm_set1_epi16(1); const __m256i big_zero = _mm256_setzero_si256(); @@ -167,7 +164,7 @@ void vpx_quantize_b_32x32_avx( __m128i all_zero; __m128i eob = zero, eob0; - (void)scan_ptr; + (void)scan; (void)n_coeffs; (void)skip_block; assert(!skip_block); @@ -233,28 +230,12 @@ void vpx_quantize_b_32x32_avx( store_tran_low(qcoeff0, qcoeff_ptr); store_tran_low(qcoeff1, qcoeff_ptr + 8); - // Un-sign to bias rounding like C. - // dequant is almost always negative, so this is probably the backwards way - // to handle the sign. However, it matches the previous assembly. - coeff0 = _mm_abs_epi16(qcoeff0); - coeff1 = _mm_abs_epi16(qcoeff1); - - coeff0 = calculate_dqcoeff(coeff0, dequant); + calculate_dqcoeff_and_store_32x32(qcoeff0, dequant, zero, dqcoeff_ptr); dequant = _mm_unpackhi_epi64(dequant, dequant); - coeff1 = calculate_dqcoeff(coeff1, dequant); + calculate_dqcoeff_and_store_32x32(qcoeff1, dequant, zero, dqcoeff_ptr + 8); - // "Divide" by 2. - coeff0 = _mm_srli_epi16(coeff0, 1); - coeff1 = _mm_srli_epi16(coeff1, 1); - - coeff0 = _mm_sign_epi16(coeff0, qcoeff0); - coeff1 = _mm_sign_epi16(coeff1, qcoeff1); - - store_tran_low(coeff0, dqcoeff_ptr); - store_tran_low(coeff1, dqcoeff_ptr + 8); - - eob = scan_for_eob(&coeff0, &coeff1, cmp_mask0, cmp_mask1, iscan_ptr, 0, - zero); + eob = + scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, 0, zero); } // AC only loop. @@ -291,23 +272,13 @@ void vpx_quantize_b_32x32_avx( store_tran_low(qcoeff0, qcoeff_ptr + index); store_tran_low(qcoeff1, qcoeff_ptr + index + 8); - coeff0 = _mm_abs_epi16(qcoeff0); - coeff1 = _mm_abs_epi16(qcoeff1); + calculate_dqcoeff_and_store_32x32(qcoeff0, dequant, zero, + dqcoeff_ptr + index); + calculate_dqcoeff_and_store_32x32(qcoeff1, dequant, zero, + dqcoeff_ptr + index + 8); - coeff0 = calculate_dqcoeff(coeff0, dequant); - coeff1 = calculate_dqcoeff(coeff1, dequant); - - coeff0 = _mm_srli_epi16(coeff0, 1); - coeff1 = _mm_srli_epi16(coeff1, 1); - - coeff0 = _mm_sign_epi16(coeff0, qcoeff0); - coeff1 = _mm_sign_epi16(coeff1, qcoeff1); - - store_tran_low(coeff0, dqcoeff_ptr + index); - store_tran_low(coeff1, dqcoeff_ptr + index + 8); - - eob0 = scan_for_eob(&coeff0, &coeff1, cmp_mask0, cmp_mask1, iscan_ptr, - index, zero); + eob0 = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, index, + zero); eob = _mm_max_epi16(eob, eob0); } diff --git a/media/libvpx/libvpx/vpx_dsp/x86/quantize_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/quantize_sse2.c index c020b398c3ba..e38a4059ab7d 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/quantize_sse2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/quantize_sse2.c @@ -15,15 +15,15 @@ #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/x86/bitdepth_conversion_sse2.h" -#include "vpx_dsp/x86/quantize_x86.h" +#include "vpx_dsp/x86/quantize_sse2.h" void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan_ptr, - const int16_t *iscan_ptr) { + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { const __m128i zero = _mm_setzero_si128(); int index = 16; @@ -33,7 +33,7 @@ void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, __m128i cmp_mask0, cmp_mask1; __m128i eob, eob0; - (void)scan_ptr; + (void)scan; (void)skip_block; assert(!skip_block); @@ -74,15 +74,11 @@ void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, store_tran_low(qcoeff0, qcoeff_ptr); store_tran_low(qcoeff1, qcoeff_ptr + 8); - coeff0 = calculate_dqcoeff(qcoeff0, dequant); + calculate_dqcoeff_and_store(qcoeff0, dequant, dqcoeff_ptr); dequant = _mm_unpackhi_epi64(dequant, dequant); - coeff1 = calculate_dqcoeff(qcoeff1, dequant); + calculate_dqcoeff_and_store(qcoeff1, dequant, dqcoeff_ptr + 8); - store_tran_low(coeff0, dqcoeff_ptr); - store_tran_low(coeff1, dqcoeff_ptr + 8); - - eob = - scan_for_eob(&coeff0, &coeff1, cmp_mask0, cmp_mask1, iscan_ptr, 0, zero); + eob = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, 0, zero); // AC only loop. while (index < n_coeffs) { @@ -109,14 +105,11 @@ void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, store_tran_low(qcoeff0, qcoeff_ptr + index); store_tran_low(qcoeff1, qcoeff_ptr + index + 8); - coeff0 = calculate_dqcoeff(qcoeff0, dequant); - coeff1 = calculate_dqcoeff(qcoeff1, dequant); + calculate_dqcoeff_and_store(qcoeff0, dequant, dqcoeff_ptr + index); + calculate_dqcoeff_and_store(qcoeff1, dequant, dqcoeff_ptr + index + 8); - store_tran_low(coeff0, dqcoeff_ptr + index); - store_tran_low(coeff1, dqcoeff_ptr + index + 8); - - eob0 = scan_for_eob(&coeff0, &coeff1, cmp_mask0, cmp_mask1, iscan_ptr, - index, zero); + eob0 = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, index, + zero); eob = _mm_max_epi16(eob, eob0); index += 16; diff --git a/media/libvpx/libvpx/vpx_dsp/x86/quantize_x86.h b/media/libvpx/libvpx/vpx_dsp/x86/quantize_sse2.h similarity index 70% rename from media/libvpx/libvpx/vpx_dsp/x86/quantize_x86.h rename to media/libvpx/libvpx/vpx_dsp/x86/quantize_sse2.h index 34928fbb56d6..afe2f924b341 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/quantize_x86.h +++ b/media/libvpx/libvpx/vpx_dsp/x86/quantize_sse2.h @@ -8,11 +8,13 @@ * be found in the AUTHORS file in the root of the source tree. */ +#ifndef VPX_VPX_DSP_X86_QUANTIZE_SSE2_H_ +#define VPX_VPX_DSP_X86_QUANTIZE_SSE2_H_ + #include #include "./vpx_config.h" #include "vpx/vpx_integer.h" -#include "vpx_dsp/x86/bitdepth_conversion_sse2.h" static INLINE void load_b_values(const int16_t *zbin_ptr, __m128i *zbin, const int16_t *round_ptr, __m128i *round, @@ -42,21 +44,35 @@ static INLINE void calculate_qcoeff(__m128i *coeff, const __m128i round, *coeff = _mm_mulhi_epi16(qcoeff, shift); } -static INLINE __m128i calculate_dqcoeff(__m128i qcoeff, __m128i dequant) { - return _mm_mullo_epi16(qcoeff, dequant); +static INLINE void calculate_dqcoeff_and_store(__m128i qcoeff, __m128i dequant, + tran_low_t *dqcoeff) { +#if CONFIG_VP9_HIGHBITDEPTH + const __m128i low = _mm_mullo_epi16(qcoeff, dequant); + const __m128i high = _mm_mulhi_epi16(qcoeff, dequant); + + const __m128i dqcoeff32_0 = _mm_unpacklo_epi16(low, high); + const __m128i dqcoeff32_1 = _mm_unpackhi_epi16(low, high); + + _mm_store_si128((__m128i *)(dqcoeff), dqcoeff32_0); + _mm_store_si128((__m128i *)(dqcoeff + 4), dqcoeff32_1); +#else + const __m128i dqcoeff16 = _mm_mullo_epi16(qcoeff, dequant); + + _mm_store_si128((__m128i *)(dqcoeff), dqcoeff16); +#endif // CONFIG_VP9_HIGHBITDEPTH } -// Scan 16 values for eob reference in scan_ptr. Use masks (-1) from comparing -// to zbin to add 1 to the index in 'scan'. +// Scan 16 values for eob reference in scan. Use masks (-1) from comparing to +// zbin to add 1 to the index in 'scan'. static INLINE __m128i scan_for_eob(__m128i *coeff0, __m128i *coeff1, const __m128i zbin_mask0, const __m128i zbin_mask1, - const int16_t *scan_ptr, const int index, + const int16_t *scan, const int index, const __m128i zero) { const __m128i zero_coeff0 = _mm_cmpeq_epi16(*coeff0, zero); const __m128i zero_coeff1 = _mm_cmpeq_epi16(*coeff1, zero); - __m128i scan0 = _mm_load_si128((const __m128i *)(scan_ptr + index)); - __m128i scan1 = _mm_load_si128((const __m128i *)(scan_ptr + index + 8)); + __m128i scan0 = _mm_load_si128((const __m128i *)(scan + index)); + __m128i scan1 = _mm_load_si128((const __m128i *)(scan + index + 8)); __m128i eob0, eob1; // Add one to convert from indices to counts scan0 = _mm_sub_epi16(scan0, zbin_mask0); @@ -76,3 +92,5 @@ static INLINE int16_t accumulate_eob(__m128i eob) { eob = _mm_max_epi16(eob, eob_shuffled); return _mm_extract_epi16(eob, 1); } + +#endif // VPX_VPX_DSP_X86_QUANTIZE_SSE2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/quantize_ssse3.c b/media/libvpx/libvpx/vpx_dsp/x86/quantize_ssse3.c index 3f528e1a9783..fc1d91959f13 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/quantize_ssse3.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/quantize_ssse3.c @@ -14,7 +14,8 @@ #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/x86/bitdepth_conversion_sse2.h" -#include "vpx_dsp/x86/quantize_x86.h" +#include "vpx_dsp/x86/quantize_sse2.h" +#include "vpx_dsp/x86/quantize_ssse3.h" void vpx_quantize_b_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, @@ -22,7 +23,7 @@ void vpx_quantize_b_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan_ptr, const int16_t *iscan_ptr) { + const int16_t *scan, const int16_t *iscan) { const __m128i zero = _mm_setzero_si128(); int index = 16; @@ -32,7 +33,7 @@ void vpx_quantize_b_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, __m128i cmp_mask0, cmp_mask1; __m128i eob, eob0; - (void)scan_ptr; + (void)scan; (void)skip_block; assert(!skip_block); @@ -67,15 +68,11 @@ void vpx_quantize_b_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, store_tran_low(qcoeff0, qcoeff_ptr); store_tran_low(qcoeff1, qcoeff_ptr + 8); - coeff0 = calculate_dqcoeff(qcoeff0, dequant); + calculate_dqcoeff_and_store(qcoeff0, dequant, dqcoeff_ptr); dequant = _mm_unpackhi_epi64(dequant, dequant); - coeff1 = calculate_dqcoeff(qcoeff1, dequant); + calculate_dqcoeff_and_store(qcoeff1, dequant, dqcoeff_ptr + 8); - store_tran_low(coeff0, dqcoeff_ptr); - store_tran_low(coeff1, dqcoeff_ptr + 8); - - eob = - scan_for_eob(&coeff0, &coeff1, cmp_mask0, cmp_mask1, iscan_ptr, 0, zero); + eob = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, 0, zero); // AC only loop. while (index < n_coeffs) { @@ -100,14 +97,11 @@ void vpx_quantize_b_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, store_tran_low(qcoeff0, qcoeff_ptr + index); store_tran_low(qcoeff1, qcoeff_ptr + index + 8); - coeff0 = calculate_dqcoeff(qcoeff0, dequant); - coeff1 = calculate_dqcoeff(qcoeff1, dequant); + calculate_dqcoeff_and_store(qcoeff0, dequant, dqcoeff_ptr + index); + calculate_dqcoeff_and_store(qcoeff1, dequant, dqcoeff_ptr + index + 8); - store_tran_low(coeff0, dqcoeff_ptr + index); - store_tran_low(coeff1, dqcoeff_ptr + index + 8); - - eob0 = scan_for_eob(&coeff0, &coeff1, cmp_mask0, cmp_mask1, iscan_ptr, - index, zero); + eob0 = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, index, + zero); eob = _mm_max_epi16(eob, eob0); index += 16; @@ -116,12 +110,14 @@ void vpx_quantize_b_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, *eob_ptr = accumulate_eob(eob); } -void vpx_quantize_b_32x32_ssse3( - const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, - const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan_ptr, const int16_t *iscan_ptr) { +void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, + const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan) { const __m128i zero = _mm_setzero_si128(); const __m128i one = _mm_set1_epi16(1); int index; @@ -133,7 +129,7 @@ void vpx_quantize_b_32x32_ssse3( __m128i all_zero; __m128i eob = zero, eob0; - (void)scan_ptr; + (void)scan; (void)n_coeffs; (void)skip_block; assert(!skip_block); @@ -206,28 +202,12 @@ void vpx_quantize_b_32x32_ssse3( store_tran_low(qcoeff0, qcoeff_ptr); store_tran_low(qcoeff1, qcoeff_ptr + 8); - // Un-sign to bias rounding like C. - // dequant is almost always negative, so this is probably the backwards way - // to handle the sign. However, it matches the previous assembly. - coeff0 = _mm_abs_epi16(qcoeff0); - coeff1 = _mm_abs_epi16(qcoeff1); - - coeff0 = calculate_dqcoeff(coeff0, dequant); + calculate_dqcoeff_and_store_32x32(qcoeff0, dequant, zero, dqcoeff_ptr); dequant = _mm_unpackhi_epi64(dequant, dequant); - coeff1 = calculate_dqcoeff(coeff1, dequant); + calculate_dqcoeff_and_store_32x32(qcoeff1, dequant, zero, dqcoeff_ptr + 8); - // "Divide" by 2. - coeff0 = _mm_srli_epi16(coeff0, 1); - coeff1 = _mm_srli_epi16(coeff1, 1); - - coeff0 = _mm_sign_epi16(coeff0, qcoeff0); - coeff1 = _mm_sign_epi16(coeff1, qcoeff1); - - store_tran_low(coeff0, dqcoeff_ptr); - store_tran_low(coeff1, dqcoeff_ptr + 8); - - eob = scan_for_eob(&coeff0, &coeff1, cmp_mask0, cmp_mask1, iscan_ptr, 0, - zero); + eob = + scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, 0, zero); } // AC only loop. @@ -268,23 +248,13 @@ void vpx_quantize_b_32x32_ssse3( store_tran_low(qcoeff0, qcoeff_ptr + index); store_tran_low(qcoeff1, qcoeff_ptr + index + 8); - coeff0 = _mm_abs_epi16(qcoeff0); - coeff1 = _mm_abs_epi16(qcoeff1); + calculate_dqcoeff_and_store_32x32(qcoeff0, dequant, zero, + dqcoeff_ptr + index); + calculate_dqcoeff_and_store_32x32(qcoeff1, dequant, zero, + dqcoeff_ptr + 8 + index); - coeff0 = calculate_dqcoeff(coeff0, dequant); - coeff1 = calculate_dqcoeff(coeff1, dequant); - - coeff0 = _mm_srli_epi16(coeff0, 1); - coeff1 = _mm_srli_epi16(coeff1, 1); - - coeff0 = _mm_sign_epi16(coeff0, qcoeff0); - coeff1 = _mm_sign_epi16(coeff1, qcoeff1); - - store_tran_low(coeff0, dqcoeff_ptr + index); - store_tran_low(coeff1, dqcoeff_ptr + index + 8); - - eob0 = scan_for_eob(&coeff0, &coeff1, cmp_mask0, cmp_mask1, iscan_ptr, - index, zero); + eob0 = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, index, + zero); eob = _mm_max_epi16(eob, eob0); } diff --git a/media/libvpx/libvpx/vpx_dsp/x86/quantize_ssse3.h b/media/libvpx/libvpx/vpx_dsp/x86/quantize_ssse3.h new file mode 100644 index 000000000000..e8d2a057710b --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/x86/quantize_ssse3.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_DSP_X86_QUANTIZE_SSSE3_H_ +#define VPX_VPX_DSP_X86_QUANTIZE_SSSE3_H_ + +#include + +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/x86/quantize_sse2.h" + +static INLINE void calculate_dqcoeff_and_store_32x32(const __m128i qcoeff, + const __m128i dequant, + const __m128i zero, + tran_low_t *dqcoeff) { + // Un-sign to bias rounding like C. + const __m128i coeff = _mm_abs_epi16(qcoeff); + + const __m128i sign_0 = _mm_unpacklo_epi16(zero, qcoeff); + const __m128i sign_1 = _mm_unpackhi_epi16(zero, qcoeff); + + const __m128i low = _mm_mullo_epi16(coeff, dequant); + const __m128i high = _mm_mulhi_epi16(coeff, dequant); + __m128i dqcoeff32_0 = _mm_unpacklo_epi16(low, high); + __m128i dqcoeff32_1 = _mm_unpackhi_epi16(low, high); + + // "Divide" by 2. + dqcoeff32_0 = _mm_srli_epi32(dqcoeff32_0, 1); + dqcoeff32_1 = _mm_srli_epi32(dqcoeff32_1, 1); + + dqcoeff32_0 = _mm_sign_epi32(dqcoeff32_0, sign_0); + dqcoeff32_1 = _mm_sign_epi32(dqcoeff32_1, sign_1); + +#if CONFIG_VP9_HIGHBITDEPTH + _mm_store_si128((__m128i *)(dqcoeff), dqcoeff32_0); + _mm_store_si128((__m128i *)(dqcoeff + 4), dqcoeff32_1); +#else + _mm_store_si128((__m128i *)(dqcoeff), + _mm_packs_epi32(dqcoeff32_0, dqcoeff32_1)); +#endif // CONFIG_VP9_HIGHBITDEPTH +} + +#endif // VPX_VPX_DSP_X86_QUANTIZE_SSSE3_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/sad4d_avx2.c b/media/libvpx/libvpx/vpx_dsp/x86/sad4d_avx2.c index 962b8fb11a42..a5c4f8c5371f 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/sad4d_avx2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/sad4d_avx2.c @@ -11,154 +11,177 @@ #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" -void vpx_sad32x32x4d_avx2(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, - uint32_t res[4]) { - __m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg; - __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3; - __m256i sum_mlow, sum_mhigh; - int i; - const uint8_t *ref0, *ref1, *ref2, *ref3; +static INLINE void calc_final_4(const __m256i *const sums /*[4]*/, + uint32_t *sad_array) { + const __m256i t0 = _mm256_hadd_epi32(sums[0], sums[1]); + const __m256i t1 = _mm256_hadd_epi32(sums[2], sums[3]); + const __m256i t2 = _mm256_hadd_epi32(t0, t1); + const __m128i sum = _mm_add_epi32(_mm256_castsi256_si128(t2), + _mm256_extractf128_si256(t2, 1)); + _mm_storeu_si128((__m128i *)sad_array, sum); +} + +void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t sad_array[4]) { + int i; + const uint8_t *refs[4]; + __m256i sums[4]; + + refs[0] = ref_array[0]; + refs[1] = ref_array[1]; + refs[2] = ref_array[2]; + refs[3] = ref_array[3]; + sums[0] = _mm256_setzero_si256(); + sums[1] = _mm256_setzero_si256(); + sums[2] = _mm256_setzero_si256(); + sums[3] = _mm256_setzero_si256(); - ref0 = ref[0]; - ref1 = ref[1]; - ref2 = ref[2]; - ref3 = ref[3]; - sum_ref0 = _mm256_set1_epi16(0); - sum_ref1 = _mm256_set1_epi16(0); - sum_ref2 = _mm256_set1_epi16(0); - sum_ref3 = _mm256_set1_epi16(0); for (i = 0; i < 32; i++) { - // load src and all refs - src_reg = _mm256_loadu_si256((const __m256i *)src); - ref0_reg = _mm256_loadu_si256((const __m256i *)ref0); - ref1_reg = _mm256_loadu_si256((const __m256i *)ref1); - ref2_reg = _mm256_loadu_si256((const __m256i *)ref2); - ref3_reg = _mm256_loadu_si256((const __m256i *)ref3); - // sum of the absolute differences between every ref-i to src - ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg); - ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg); - ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg); - ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg); - // sum every ref-i - sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg); - sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg); - sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg); - sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg); + __m256i r[4]; - src += src_stride; - ref0 += ref_stride; - ref1 += ref_stride; - ref2 += ref_stride; - ref3 += ref_stride; + // load src and all ref[] + const __m256i s = _mm256_load_si256((const __m256i *)src_ptr); + r[0] = _mm256_loadu_si256((const __m256i *)refs[0]); + r[1] = _mm256_loadu_si256((const __m256i *)refs[1]); + r[2] = _mm256_loadu_si256((const __m256i *)refs[2]); + r[3] = _mm256_loadu_si256((const __m256i *)refs[3]); + + // sum of the absolute differences between every ref[] to src + r[0] = _mm256_sad_epu8(r[0], s); + r[1] = _mm256_sad_epu8(r[1], s); + r[2] = _mm256_sad_epu8(r[2], s); + r[3] = _mm256_sad_epu8(r[3], s); + + // sum every ref[] + sums[0] = _mm256_add_epi32(sums[0], r[0]); + sums[1] = _mm256_add_epi32(sums[1], r[1]); + sums[2] = _mm256_add_epi32(sums[2], r[2]); + sums[3] = _mm256_add_epi32(sums[3], r[3]); + + src_ptr += src_stride; + refs[0] += ref_stride; + refs[1] += ref_stride; + refs[2] += ref_stride; + refs[3] += ref_stride; } - { - __m128i sum; - // in sum_ref-i the result is saved in the first 4 bytes - // the other 4 bytes are zeroed. - // sum_ref1 and sum_ref3 are shifted left by 4 bytes - sum_ref1 = _mm256_slli_si256(sum_ref1, 4); - sum_ref3 = _mm256_slli_si256(sum_ref3, 4); - // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3 - sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1); - sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3); - - // merge every 64 bit from each sum_ref-i - sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2); - sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2); - - // add the low 64 bit to the high 64 bit - sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh); - - // add the low 128 bit to the high 128 bit - sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow), - _mm256_extractf128_si256(sum_mlow, 1)); - - _mm_storeu_si128((__m128i *)(res), sum); - } + calc_final_4(sums, sad_array); } -void vpx_sad64x64x4d_avx2(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, - uint32_t res[4]) { - __m256i src_reg, srcnext_reg, ref0_reg, ref0next_reg; - __m256i ref1_reg, ref1next_reg, ref2_reg, ref2next_reg; - __m256i ref3_reg, ref3next_reg; - __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3; - __m256i sum_mlow, sum_mhigh; +void vpx_sad32x32x8_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + uint32_t *sad_array) { int i; - const uint8_t *ref0, *ref1, *ref2, *ref3; + __m256i sums[8]; - ref0 = ref[0]; - ref1 = ref[1]; - ref2 = ref[2]; - ref3 = ref[3]; - sum_ref0 = _mm256_set1_epi16(0); - sum_ref1 = _mm256_set1_epi16(0); - sum_ref2 = _mm256_set1_epi16(0); - sum_ref3 = _mm256_set1_epi16(0); - for (i = 0; i < 64; i++) { - // load 64 bytes from src and all refs - src_reg = _mm256_loadu_si256((const __m256i *)src); - srcnext_reg = _mm256_loadu_si256((const __m256i *)(src + 32)); - ref0_reg = _mm256_loadu_si256((const __m256i *)ref0); - ref0next_reg = _mm256_loadu_si256((const __m256i *)(ref0 + 32)); - ref1_reg = _mm256_loadu_si256((const __m256i *)ref1); - ref1next_reg = _mm256_loadu_si256((const __m256i *)(ref1 + 32)); - ref2_reg = _mm256_loadu_si256((const __m256i *)ref2); - ref2next_reg = _mm256_loadu_si256((const __m256i *)(ref2 + 32)); - ref3_reg = _mm256_loadu_si256((const __m256i *)ref3); - ref3next_reg = _mm256_loadu_si256((const __m256i *)(ref3 + 32)); - // sum of the absolute differences between every ref-i to src - ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg); - ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg); - ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg); - ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg); - ref0next_reg = _mm256_sad_epu8(ref0next_reg, srcnext_reg); - ref1next_reg = _mm256_sad_epu8(ref1next_reg, srcnext_reg); - ref2next_reg = _mm256_sad_epu8(ref2next_reg, srcnext_reg); - ref3next_reg = _mm256_sad_epu8(ref3next_reg, srcnext_reg); + sums[0] = _mm256_setzero_si256(); + sums[1] = _mm256_setzero_si256(); + sums[2] = _mm256_setzero_si256(); + sums[3] = _mm256_setzero_si256(); + sums[4] = _mm256_setzero_si256(); + sums[5] = _mm256_setzero_si256(); + sums[6] = _mm256_setzero_si256(); + sums[7] = _mm256_setzero_si256(); - // sum every ref-i - sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg); - sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg); - sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg); - sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg); - sum_ref0 = _mm256_add_epi32(sum_ref0, ref0next_reg); - sum_ref1 = _mm256_add_epi32(sum_ref1, ref1next_reg); - sum_ref2 = _mm256_add_epi32(sum_ref2, ref2next_reg); - sum_ref3 = _mm256_add_epi32(sum_ref3, ref3next_reg); - src += src_stride; - ref0 += ref_stride; - ref1 += ref_stride; - ref2 += ref_stride; - ref3 += ref_stride; + for (i = 0; i < 32; i++) { + __m256i r[8]; + + // load src and all ref[] + const __m256i s = _mm256_load_si256((const __m256i *)src_ptr); + r[0] = _mm256_loadu_si256((const __m256i *)&ref_ptr[0]); + r[1] = _mm256_loadu_si256((const __m256i *)&ref_ptr[1]); + r[2] = _mm256_loadu_si256((const __m256i *)&ref_ptr[2]); + r[3] = _mm256_loadu_si256((const __m256i *)&ref_ptr[3]); + r[4] = _mm256_loadu_si256((const __m256i *)&ref_ptr[4]); + r[5] = _mm256_loadu_si256((const __m256i *)&ref_ptr[5]); + r[6] = _mm256_loadu_si256((const __m256i *)&ref_ptr[6]); + r[7] = _mm256_loadu_si256((const __m256i *)&ref_ptr[7]); + + // sum of the absolute differences between every ref[] to src + r[0] = _mm256_sad_epu8(r[0], s); + r[1] = _mm256_sad_epu8(r[1], s); + r[2] = _mm256_sad_epu8(r[2], s); + r[3] = _mm256_sad_epu8(r[3], s); + r[4] = _mm256_sad_epu8(r[4], s); + r[5] = _mm256_sad_epu8(r[5], s); + r[6] = _mm256_sad_epu8(r[6], s); + r[7] = _mm256_sad_epu8(r[7], s); + + // sum every ref[] + sums[0] = _mm256_add_epi32(sums[0], r[0]); + sums[1] = _mm256_add_epi32(sums[1], r[1]); + sums[2] = _mm256_add_epi32(sums[2], r[2]); + sums[3] = _mm256_add_epi32(sums[3], r[3]); + sums[4] = _mm256_add_epi32(sums[4], r[4]); + sums[5] = _mm256_add_epi32(sums[5], r[5]); + sums[6] = _mm256_add_epi32(sums[6], r[6]); + sums[7] = _mm256_add_epi32(sums[7], r[7]); + + src_ptr += src_stride; + ref_ptr += ref_stride; } - { - __m128i sum; - // in sum_ref-i the result is saved in the first 4 bytes - // the other 4 bytes are zeroed. - // sum_ref1 and sum_ref3 are shifted left by 4 bytes - sum_ref1 = _mm256_slli_si256(sum_ref1, 4); - sum_ref3 = _mm256_slli_si256(sum_ref3, 4); - - // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3 - sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1); - sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3); - - // merge every 64 bit from each sum_ref-i - sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2); - sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2); - - // add the low 64 bit to the high 64 bit - sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh); - - // add the low 128 bit to the high 128 bit - sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow), - _mm256_extractf128_si256(sum_mlow, 1)); - - _mm_storeu_si128((__m128i *)(res), sum); - } + calc_final_4(sums, sad_array); + calc_final_4(sums + 4, sad_array + 4); +} + +void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t sad_array[4]) { + __m256i sums[4]; + int i; + const uint8_t *refs[4]; + + refs[0] = ref_array[0]; + refs[1] = ref_array[1]; + refs[2] = ref_array[2]; + refs[3] = ref_array[3]; + sums[0] = _mm256_setzero_si256(); + sums[1] = _mm256_setzero_si256(); + sums[2] = _mm256_setzero_si256(); + sums[3] = _mm256_setzero_si256(); + + for (i = 0; i < 64; i++) { + __m256i r_lo[4], r_hi[4]; + // load 64 bytes from src and all ref[] + const __m256i s_lo = _mm256_load_si256((const __m256i *)src_ptr); + const __m256i s_hi = _mm256_load_si256((const __m256i *)(src_ptr + 32)); + r_lo[0] = _mm256_loadu_si256((const __m256i *)refs[0]); + r_hi[0] = _mm256_loadu_si256((const __m256i *)(refs[0] + 32)); + r_lo[1] = _mm256_loadu_si256((const __m256i *)refs[1]); + r_hi[1] = _mm256_loadu_si256((const __m256i *)(refs[1] + 32)); + r_lo[2] = _mm256_loadu_si256((const __m256i *)refs[2]); + r_hi[2] = _mm256_loadu_si256((const __m256i *)(refs[2] + 32)); + r_lo[3] = _mm256_loadu_si256((const __m256i *)refs[3]); + r_hi[3] = _mm256_loadu_si256((const __m256i *)(refs[3] + 32)); + + // sum of the absolute differences between every ref[] to src + r_lo[0] = _mm256_sad_epu8(r_lo[0], s_lo); + r_lo[1] = _mm256_sad_epu8(r_lo[1], s_lo); + r_lo[2] = _mm256_sad_epu8(r_lo[2], s_lo); + r_lo[3] = _mm256_sad_epu8(r_lo[3], s_lo); + r_hi[0] = _mm256_sad_epu8(r_hi[0], s_hi); + r_hi[1] = _mm256_sad_epu8(r_hi[1], s_hi); + r_hi[2] = _mm256_sad_epu8(r_hi[2], s_hi); + r_hi[3] = _mm256_sad_epu8(r_hi[3], s_hi); + + // sum every ref[] + sums[0] = _mm256_add_epi32(sums[0], r_lo[0]); + sums[1] = _mm256_add_epi32(sums[1], r_lo[1]); + sums[2] = _mm256_add_epi32(sums[2], r_lo[2]); + sums[3] = _mm256_add_epi32(sums[3], r_lo[3]); + sums[0] = _mm256_add_epi32(sums[0], r_hi[0]); + sums[1] = _mm256_add_epi32(sums[1], r_hi[1]); + sums[2] = _mm256_add_epi32(sums[2], r_hi[2]); + sums[3] = _mm256_add_epi32(sums[3], r_hi[3]); + + src_ptr += src_stride; + refs[0] += ref_stride; + refs[1] += ref_stride; + refs[2] += ref_stride; + refs[3] += ref_stride; + } + + calc_final_4(sums, sad_array); } diff --git a/media/libvpx/libvpx/vpx_dsp/x86/sad4d_avx512.c b/media/libvpx/libvpx/vpx_dsp/x86/sad4d_avx512.c index 5f2ab6ea71bd..4c5d70464de7 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/sad4d_avx512.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/sad4d_avx512.c @@ -11,8 +11,8 @@ #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" -void vpx_sad64x64x4d_avx512(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, +void vpx_sad64x64x4d_avx512(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, uint32_t res[4]) { __m512i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg; __m512i sum_ref0, sum_ref1, sum_ref2, sum_ref3; @@ -20,33 +20,33 @@ void vpx_sad64x64x4d_avx512(const uint8_t *src, int src_stride, int i; const uint8_t *ref0, *ref1, *ref2, *ref3; - ref0 = ref[0]; - ref1 = ref[1]; - ref2 = ref[2]; - ref3 = ref[3]; + ref0 = ref_array[0]; + ref1 = ref_array[1]; + ref2 = ref_array[2]; + ref3 = ref_array[3]; sum_ref0 = _mm512_set1_epi16(0); sum_ref1 = _mm512_set1_epi16(0); sum_ref2 = _mm512_set1_epi16(0); sum_ref3 = _mm512_set1_epi16(0); for (i = 0; i < 64; i++) { - // load src and all refs - src_reg = _mm512_loadu_si512((const __m512i *)src); + // load src and all ref[] + src_reg = _mm512_loadu_si512((const __m512i *)src_ptr); ref0_reg = _mm512_loadu_si512((const __m512i *)ref0); ref1_reg = _mm512_loadu_si512((const __m512i *)ref1); ref2_reg = _mm512_loadu_si512((const __m512i *)ref2); ref3_reg = _mm512_loadu_si512((const __m512i *)ref3); - // sum of the absolute differences between every ref-i to src + // sum of the absolute differences between every ref[] to src ref0_reg = _mm512_sad_epu8(ref0_reg, src_reg); ref1_reg = _mm512_sad_epu8(ref1_reg, src_reg); ref2_reg = _mm512_sad_epu8(ref2_reg, src_reg); ref3_reg = _mm512_sad_epu8(ref3_reg, src_reg); - // sum every ref-i + // sum every ref[] sum_ref0 = _mm512_add_epi32(sum_ref0, ref0_reg); sum_ref1 = _mm512_add_epi32(sum_ref1, ref1_reg); sum_ref2 = _mm512_add_epi32(sum_ref2, ref2_reg); sum_ref3 = _mm512_add_epi32(sum_ref3, ref3_reg); - src += src_stride; + src_ptr += src_stride; ref0 += ref_stride; ref1 += ref_stride; ref2 += ref_stride; @@ -55,7 +55,7 @@ void vpx_sad64x64x4d_avx512(const uint8_t *src, int src_stride, { __m256i sum256; __m128i sum128; - // in sum_ref-i the result is saved in the first 4 bytes + // in sum_ref[] the result is saved in the first 4 bytes // the other 4 bytes are zeroed. // sum_ref1 and sum_ref3 are shifted left by 4 bytes sum_ref1 = _mm512_bslli_epi128(sum_ref1, 4); @@ -65,7 +65,7 @@ void vpx_sad64x64x4d_avx512(const uint8_t *src, int src_stride, sum_ref0 = _mm512_or_si512(sum_ref0, sum_ref1); sum_ref2 = _mm512_or_si512(sum_ref2, sum_ref3); - // merge every 64 bit from each sum_ref-i + // merge every 64 bit from each sum_ref[] sum_mlow = _mm512_unpacklo_epi64(sum_ref0, sum_ref2); sum_mhigh = _mm512_unpackhi_epi64(sum_ref0, sum_ref2); diff --git a/media/libvpx/libvpx/vpx_dsp/x86/sad_sse2.asm b/media/libvpx/libvpx/vpx_dsp/x86/sad_sse2.asm index 1ec906c23604..e4e1bc3e98ab 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/sad_sse2.asm +++ b/media/libvpx/libvpx/vpx_dsp/x86/sad_sse2.asm @@ -25,11 +25,11 @@ cglobal sad%1x%2, 4, %3, 6, src, src_stride, ref, ref_stride, \ cglobal sad%1x%2_avg, 5, 1 + %3, 5, src, src_stride, ref, ref_stride, \ second_pred, n_rows %else ; %3 == 7 -cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 6, src, src_stride, \ +cglobal sad%1x%2_avg, 5, VPX_ARCH_X86_64 + %3, 6, src, src_stride, \ ref, ref_stride, \ second_pred, \ src_stride3, ref_stride3 -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 %define n_rowsd r7d %else ; x86-32 %define n_rowsd dword r0m diff --git a/media/libvpx/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm b/media/libvpx/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm index cee4468c1f08..d1d8d3460e39 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm +++ b/media/libvpx/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm @@ -41,12 +41,12 @@ SECTION .text ; int vpx_sub_pixel_varianceNxh(const uint8_t *src, ptrdiff_t src_stride, ; int x_offset, int y_offset, -; const uint8_t *dst, ptrdiff_t dst_stride, +; const uint8_t *ref, ptrdiff_t ref_stride, ; int height, unsigned int *sse); ; ; This function returns the SE and stores SSE in the given pointer. -%macro SUM_SSE 6 ; src1, dst1, src2, dst2, sum, sse +%macro SUM_SSE 6 ; src1, ref1, src2, ref2, sum, sse psubw %3, %4 psubw %1, %2 paddw %5, %3 @@ -95,7 +95,7 @@ SECTION .text %endmacro %macro INC_SRC_BY_SRC_STRIDE 0 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if VPX_ARCH_X86=1 && CONFIG_PIC=1 add srcq, src_stridemp %else add srcq, src_strideq @@ -114,84 +114,65 @@ SECTION .text ; 11, not 13, if the registers are ordered correctly. May make a minor speed ; difference on Win64 -%ifdef PIC ; 64bit PIC +%if VPX_ARCH_X86_64 %if %2 == 1 ; avg cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \ - x_offset, y_offset, \ - dst, dst_stride, \ - sec, sec_stride, height, sse - %define sec_str sec_strideq + x_offset, y_offset, ref, ref_stride, \ + second_pred, second_stride, height, sse + %define second_str second_strideq %else - cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, \ - y_offset, dst, dst_stride, height, sse + cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, \ + x_offset, y_offset, ref, ref_stride, \ + height, sse %endif %define block_height heightd %define bilin_filter sseq %else - %if ARCH_X86=1 && CONFIG_PIC=1 + %if CONFIG_PIC=1 %if %2 == 1 ; avg cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \ - x_offset, y_offset, \ - dst, dst_stride, \ - sec, sec_stride, \ - height, sse, g_bilin_filter, g_pw_8 + x_offset, y_offset, ref, ref_stride, \ + second_pred, second_stride, height, sse %define block_height dword heightm - %define sec_str sec_stridemp - - ;Store bilin_filter and pw_8 location in stack - %if GET_GOT_DEFINED == 1 - GET_GOT eax - add esp, 4 ; restore esp - %endif - - lea ecx, [GLOBAL(bilin_filter_m)] - mov g_bilin_filterm, ecx - - lea ecx, [GLOBAL(pw_8)] - mov g_pw_8m, ecx - - LOAD_IF_USED 0, 1 ; load eax, ecx back + %define second_str second_stridemp %else - cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \ - y_offset, dst, dst_stride, height, sse, \ - g_bilin_filter, g_pw_8 + cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \ + x_offset, y_offset, ref, ref_stride, \ + height, sse %define block_height heightd - - ;Store bilin_filter and pw_8 location in stack - %if GET_GOT_DEFINED == 1 - GET_GOT eax - add esp, 4 ; restore esp - %endif - - lea ecx, [GLOBAL(bilin_filter_m)] - mov g_bilin_filterm, ecx - - lea ecx, [GLOBAL(pw_8)] - mov g_pw_8m, ecx - - LOAD_IF_USED 0, 1 ; load eax, ecx back %endif + + ; reuse argument stack space + %define g_bilin_filterm x_offsetm + %define g_pw_8m y_offsetm + + ;Store bilin_filter and pw_8 location in stack + %if GET_GOT_DEFINED == 1 + GET_GOT eax + add esp, 4 ; restore esp + %endif + + lea ecx, [GLOBAL(bilin_filter_m)] + mov g_bilin_filterm, ecx + + lea ecx, [GLOBAL(pw_8)] + mov g_pw_8m, ecx + + LOAD_IF_USED 0, 1 ; load eax, ecx back %else %if %2 == 1 ; avg - cglobal sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \ - 7 + 2 * ARCH_X86_64, 13, src, src_stride, \ - x_offset, y_offset, \ - dst, dst_stride, \ - sec, sec_stride, \ - height, sse - %if ARCH_X86_64 - %define block_height heightd - %define sec_str sec_strideq - %else + cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \ + x_offset, y_offset, \ + ref, ref_stride, second_pred, second_stride, \ + height, sse %define block_height dword heightm - %define sec_str sec_stridemp - %endif + %define second_str second_stridemp %else - cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \ - y_offset, dst, dst_stride, height, sse + cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \ + x_offset, y_offset, ref, ref_stride, \ + height, sse %define block_height heightd %endif - %define bilin_filter bilin_filter_m %endif %endif @@ -211,7 +192,7 @@ SECTION .text %if %1 < 16 sar block_height, 1 %if %2 == 1 ; avg - shl sec_str, 1 + shl second_str, 1 %endif %endif @@ -226,9 +207,9 @@ SECTION .text .x_zero_y_zero_loop: %if %1 == 16 movu m0, [srcq] - mova m1, [dstq] + mova m1, [refq] %if %2 == 1 ; avg - pavgb m0, [secq] + pavgb m0, [second_predq] punpckhbw m3, m1, m5 punpcklbw m1, m5 %endif @@ -242,7 +223,7 @@ SECTION .text SUM_SSE m0, m1, m2, m3, m6, m7 add srcq, src_strideq - add dstq, dst_strideq + add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] %if %2 == 1 ; avg @@ -256,14 +237,14 @@ SECTION .text movx m2, [srcq+src_strideq] %endif - movx m1, [dstq] - movx m3, [dstq+dst_strideq] + movx m1, [refq] + movx m3, [refq+ref_strideq] %if %2 == 1 ; avg %if %1 > 4 - pavgb m0, [secq] + pavgb m0, [second_predq] %else - movh m2, [secq] + movh m2, [second_predq] pavgb m0, m2 %endif punpcklbw m3, m5 @@ -284,10 +265,10 @@ SECTION .text SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq+src_strideq*2] - lea dstq, [dstq+dst_strideq*2] + lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif dec block_height jg .x_zero_y_zero_loop @@ -302,11 +283,11 @@ SECTION .text %if %1 == 16 movu m0, [srcq] movu m4, [srcq+src_strideq] - mova m1, [dstq] + mova m1, [refq] pavgb m0, m4 punpckhbw m3, m1, m5 %if %2 == 1 ; avg - pavgb m0, [secq] + pavgb m0, [second_predq] %endif punpcklbw m1, m5 punpckhbw m2, m0, m5 @@ -314,7 +295,7 @@ SECTION .text SUM_SSE m0, m1, m2, m3, m6, m7 add srcq, src_strideq - add dstq, dst_strideq + add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m2, [srcq+src_strideq] @@ -325,22 +306,22 @@ SECTION .text movx m1, [srcq+src_strideq*2] punpckldq m2, m1 %endif - movx m1, [dstq] + movx m1, [refq] %if %1 > 4 movlhps m0, m2 %else ; 4xh punpckldq m0, m2 %endif - movx m3, [dstq+dst_strideq] + movx m3, [refq+ref_strideq] pavgb m0, m2 punpcklbw m1, m5 %if %1 > 4 - pavgb m0, [secq] + pavgb m0, [second_predq] punpcklbw m3, m5 punpckhbw m2, m0, m5 punpcklbw m0, m5 %else ; 4xh - movh m4, [secq] + movh m4, [second_predq] pavgb m0, m4 punpcklbw m3, m5 punpcklbw m0, m5 @@ -348,9 +329,9 @@ SECTION .text %endif %else ; !avg movx m4, [srcq+src_strideq*2] - movx m1, [dstq] + movx m1, [refq] pavgb m0, m2 - movx m3, [dstq+dst_strideq] + movx m3, [refq+ref_strideq] pavgb m2, m4 punpcklbw m0, m5 punpcklbw m2, m5 @@ -360,10 +341,10 @@ SECTION .text SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq+src_strideq*2] - lea dstq, [dstq+dst_strideq*2] + lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif dec block_height jg .x_zero_y_half_loop @@ -371,21 +352,21 @@ SECTION .text .x_zero_y_nonhalf: ; x_offset == 0 && y_offset == bilin interpolation -%ifdef PIC - lea bilin_filter, [bilin_filter_m] +%if VPX_ARCH_X86_64 + lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && %1 > 4 +%if VPX_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+y_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+y_offsetq+16] %endif - mova m10, [pw_8] + mova m10, [GLOBAL(pw_8)] %define filter_y_a m8 %define filter_y_b m9 %define filter_rnd m10 %else ; x86-32 or mmx -%if ARCH_X86=1 && CONFIG_PIC=1 +%if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; x_offset == 0, reuse x_offset reg %define tempq x_offsetq add y_offsetq, g_bilin_filterm @@ -397,7 +378,7 @@ SECTION .text add y_offsetq, bilin_filter %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] -%define filter_rnd [pw_8] +%define filter_rnd [GLOBAL(pw_8)] %endif %endif @@ -405,7 +386,7 @@ SECTION .text %if %1 == 16 movu m0, [srcq] movu m4, [srcq+src_strideq] - mova m1, [dstq] + mova m1, [refq] %if cpuflag(ssse3) punpckhbw m2, m0, m4 punpcklbw m0, m4 @@ -437,7 +418,7 @@ SECTION .text %if %2 == 1 ; avg ; FIXME(rbultje) pipeline packuswb m0, m2 - pavgb m0, [secq] + pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %endif @@ -446,14 +427,14 @@ SECTION .text SUM_SSE m0, m1, m2, m3, m6, m7 add srcq, src_strideq - add dstq, dst_strideq + add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m2, [srcq+src_strideq] movx m4, [srcq+src_strideq*2] - movx m3, [dstq+dst_strideq] + movx m3, [refq+ref_strideq] %if cpuflag(ssse3) - movx m1, [dstq] + movx m1, [refq] punpcklbw m0, m2 punpcklbw m2, m4 pmaddubsw m0, filter_y_a @@ -473,7 +454,7 @@ SECTION .text pmullw m4, filter_y_b paddw m0, m1 paddw m2, filter_rnd - movx m1, [dstq] + movx m1, [refq] paddw m2, m4 %endif psraw m0, 4 @@ -485,11 +466,11 @@ SECTION .text %endif packuswb m0, m2 %if %1 > 4 - pavgb m0, [secq] + pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %else ; 4xh - movh m2, [secq] + movh m2, [second_predq] pavgb m0, m2 punpcklbw m0, m5 movhlps m2, m0 @@ -499,10 +480,10 @@ SECTION .text SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq+src_strideq*2] - lea dstq, [dstq+dst_strideq*2] + lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif dec block_height jg .x_zero_y_other_loop @@ -523,11 +504,11 @@ SECTION .text %if %1 == 16 movu m0, [srcq] movu m4, [srcq+1] - mova m1, [dstq] + mova m1, [refq] pavgb m0, m4 punpckhbw m3, m1, m5 %if %2 == 1 ; avg - pavgb m0, [secq] + pavgb m0, [second_predq] %endif punpcklbw m1, m5 punpckhbw m2, m0, m5 @@ -535,7 +516,7 @@ SECTION .text SUM_SSE m0, m1, m2, m3, m6, m7 add srcq, src_strideq - add dstq, dst_strideq + add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m4, [srcq+1] @@ -549,17 +530,17 @@ SECTION .text movx m2, [srcq+src_strideq+1] punpckldq m4, m2 %endif - movx m1, [dstq] - movx m3, [dstq+dst_strideq] + movx m1, [refq] + movx m3, [refq+ref_strideq] pavgb m0, m4 punpcklbw m3, m5 %if %1 > 4 - pavgb m0, [secq] + pavgb m0, [second_predq] punpcklbw m1, m5 punpckhbw m2, m0, m5 punpcklbw m0, m5 %else ; 4xh - movh m2, [secq] + movh m2, [second_predq] pavgb m0, m2 punpcklbw m1, m5 punpcklbw m0, m5 @@ -567,10 +548,10 @@ SECTION .text %endif %else ; !avg movx m2, [srcq+src_strideq] - movx m1, [dstq] + movx m1, [refq] pavgb m0, m4 movx m4, [srcq+src_strideq+1] - movx m3, [dstq+dst_strideq] + movx m3, [refq+ref_strideq] pavgb m2, m4 punpcklbw m0, m5 punpcklbw m2, m5 @@ -580,10 +561,10 @@ SECTION .text SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq+src_strideq*2] - lea dstq, [dstq+dst_strideq*2] + lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif dec block_height jg .x_half_y_zero_loop @@ -602,13 +583,13 @@ SECTION .text .x_half_y_half_loop: movu m4, [srcq] movu m3, [srcq+1] - mova m1, [dstq] + mova m1, [refq] pavgb m4, m3 punpckhbw m3, m1, m5 pavgb m0, m4 %if %2 == 1 ; avg punpcklbw m1, m5 - pavgb m0, [secq] + pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %else @@ -620,7 +601,7 @@ SECTION .text mova m0, m4 add srcq, src_strideq - add dstq, dst_strideq + add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m3, [srcq+1] @@ -647,13 +628,13 @@ SECTION .text punpckldq m0, m2 pshuflw m4, m2, 0xe %endif - movx m1, [dstq] + movx m1, [refq] pavgb m0, m2 - movx m3, [dstq+dst_strideq] + movx m3, [refq+ref_strideq] %if %1 > 4 - pavgb m0, [secq] + pavgb m0, [second_predq] %else - movh m2, [secq] + movh m2, [second_predq] pavgb m0, m2 %endif punpcklbw m3, m5 @@ -672,8 +653,8 @@ SECTION .text pavgb m4, m1 pavgb m0, m2 pavgb m2, m4 - movx m1, [dstq] - movx m3, [dstq+dst_strideq] + movx m1, [refq] + movx m3, [refq+ref_strideq] punpcklbw m0, m5 punpcklbw m2, m5 punpcklbw m3, m5 @@ -683,10 +664,10 @@ SECTION .text mova m0, m4 lea srcq, [srcq+src_strideq*2] - lea dstq, [dstq+dst_strideq*2] + lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif dec block_height jg .x_half_y_half_loop @@ -694,21 +675,21 @@ SECTION .text .x_half_y_nonhalf: ; x_offset == 0.5 && y_offset == bilin interpolation -%ifdef PIC - lea bilin_filter, [bilin_filter_m] +%if VPX_ARCH_X86_64 + lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && %1 > 4 +%if VPX_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+y_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+y_offsetq+16] %endif - mova m10, [pw_8] + mova m10, [GLOBAL(pw_8)] %define filter_y_a m8 %define filter_y_b m9 %define filter_rnd m10 %else ;x86_32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; x_offset == 0.5. We can reuse x_offset reg %define tempq x_offsetq add y_offsetq, g_bilin_filterm @@ -720,7 +701,7 @@ SECTION .text add y_offsetq, bilin_filter %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] -%define filter_rnd [pw_8] +%define filter_rnd [GLOBAL(pw_8)] %endif %endif @@ -732,7 +713,7 @@ SECTION .text .x_half_y_other_loop: movu m4, [srcq] movu m2, [srcq+1] - mova m1, [dstq] + mova m1, [refq] pavgb m4, m2 %if cpuflag(ssse3) punpckhbw m2, m0, m4 @@ -762,7 +743,7 @@ SECTION .text %if %2 == 1 ; avg ; FIXME(rbultje) pipeline packuswb m0, m2 - pavgb m0, [secq] + pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %endif @@ -771,7 +752,7 @@ SECTION .text mova m0, m4 add srcq, src_strideq - add dstq, dst_strideq + add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m3, [srcq+1] @@ -787,9 +768,9 @@ SECTION .text movx m3, [srcq+src_strideq+1] pavgb m2, m1 pavgb m4, m3 - movx m3, [dstq+dst_strideq] + movx m3, [refq+ref_strideq] %if cpuflag(ssse3) - movx m1, [dstq] + movx m1, [refq] punpcklbw m0, m2 punpcklbw m2, m4 pmaddubsw m0, filter_y_a @@ -809,7 +790,7 @@ SECTION .text pmullw m1, m4, filter_y_b paddw m2, filter_rnd paddw m2, m1 - movx m1, [dstq] + movx m1, [refq] %endif psraw m0, 4 psraw m2, 4 @@ -820,11 +801,11 @@ SECTION .text %endif packuswb m0, m2 %if %1 > 4 - pavgb m0, [secq] + pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %else - movh m2, [secq] + movh m2, [second_predq] pavgb m0, m2 punpcklbw m0, m5 movhlps m2, m0 @@ -835,10 +816,10 @@ SECTION .text mova m0, m4 lea srcq, [srcq+src_strideq*2] - lea dstq, [dstq+dst_strideq*2] + lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif dec block_height jg .x_half_y_other_loop @@ -852,21 +833,21 @@ SECTION .text jnz .x_nonhalf_y_nonzero ; x_offset == bilin interpolation && y_offset == 0 -%ifdef PIC - lea bilin_filter, [bilin_filter_m] +%if VPX_ARCH_X86_64 + lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift -%if ARCH_X86_64 && %1 > 4 +%if VPX_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+x_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+x_offsetq+16] %endif - mova m10, [pw_8] + mova m10, [GLOBAL(pw_8)] %define filter_x_a m8 %define filter_x_b m9 %define filter_rnd m10 %else ; x86-32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if VPX_ARCH_X86=1 && CONFIG_PIC=1 ;y_offset == 0. We can reuse y_offset reg. %define tempq y_offsetq add x_offsetq, g_bilin_filterm @@ -878,7 +859,7 @@ SECTION .text add x_offsetq, bilin_filter %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] -%define filter_rnd [pw_8] +%define filter_rnd [GLOBAL(pw_8)] %endif %endif @@ -886,7 +867,7 @@ SECTION .text %if %1 == 16 movu m0, [srcq] movu m4, [srcq+1] - mova m1, [dstq] + mova m1, [refq] %if cpuflag(ssse3) punpckhbw m2, m0, m4 punpcklbw m0, m4 @@ -913,7 +894,7 @@ SECTION .text %if %2 == 1 ; avg ; FIXME(rbultje) pipeline packuswb m0, m2 - pavgb m0, [secq] + pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %endif @@ -922,16 +903,16 @@ SECTION .text SUM_SSE m0, m1, m2, m3, m6, m7 add srcq, src_strideq - add dstq, dst_strideq + add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m1, [srcq+1] movx m2, [srcq+src_strideq] movx m4, [srcq+src_strideq+1] - movx m3, [dstq+dst_strideq] + movx m3, [refq+ref_strideq] %if cpuflag(ssse3) punpcklbw m0, m1 - movx m1, [dstq] + movx m1, [refq] punpcklbw m2, m4 pmaddubsw m0, filter_x_a pmaddubsw m2, filter_x_a @@ -951,7 +932,7 @@ SECTION .text pmullw m4, filter_x_b paddw m0, m1 paddw m2, filter_rnd - movx m1, [dstq] + movx m1, [refq] paddw m2, m4 %endif psraw m0, 4 @@ -963,11 +944,11 @@ SECTION .text %endif packuswb m0, m2 %if %1 > 4 - pavgb m0, [secq] + pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %else - movh m2, [secq] + movh m2, [second_predq] pavgb m0, m2 punpcklbw m0, m5 movhlps m2, m0 @@ -977,10 +958,10 @@ SECTION .text SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq+src_strideq*2] - lea dstq, [dstq+dst_strideq*2] + lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif dec block_height jg .x_other_y_zero_loop @@ -994,21 +975,21 @@ SECTION .text jne .x_nonhalf_y_nonhalf ; x_offset == bilin interpolation && y_offset == 0.5 -%ifdef PIC - lea bilin_filter, [bilin_filter_m] +%if VPX_ARCH_X86_64 + lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift -%if ARCH_X86_64 && %1 > 4 +%if VPX_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+x_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+x_offsetq+16] %endif - mova m10, [pw_8] + mova m10, [GLOBAL(pw_8)] %define filter_x_a m8 %define filter_x_b m9 %define filter_rnd m10 %else ; x86-32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; y_offset == 0.5. We can reuse y_offset reg. %define tempq y_offsetq add x_offsetq, g_bilin_filterm @@ -1020,7 +1001,7 @@ SECTION .text add x_offsetq, bilin_filter %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] -%define filter_rnd [pw_8] +%define filter_rnd [GLOBAL(pw_8)] %endif %endif @@ -1056,7 +1037,7 @@ SECTION .text movu m4, [srcq] movu m3, [srcq+1] %if cpuflag(ssse3) - mova m1, [dstq] + mova m1, [refq] punpckhbw m2, m4, m3 punpcklbw m4, m3 pmaddubsw m2, filter_x_a @@ -1082,7 +1063,7 @@ SECTION .text paddw m2, filter_rnd paddw m4, m3 paddw m2, m1 - mova m1, [dstq] + mova m1, [refq] psraw m4, 4 psraw m2, 4 punpckhbw m3, m1, m5 @@ -1096,7 +1077,7 @@ SECTION .text %endif %if %2 == 1 ; avg ; FIXME(rbultje) pipeline - pavgb m0, [secq] + pavgb m0, [second_predq] %endif punpckhbw m2, m0, m5 punpcklbw m0, m5 @@ -1104,7 +1085,7 @@ SECTION .text mova m0, m4 add srcq, src_strideq - add dstq, dst_strideq + add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m1, [srcq+1] @@ -1132,8 +1113,8 @@ SECTION .text punpcklbw m4, m3 pmaddubsw m2, filter_x_a pmaddubsw m4, filter_x_a - movx m1, [dstq] - movx m3, [dstq+dst_strideq] + movx m1, [refq] + movx m3, [refq+ref_strideq] paddw m2, filter_rnd paddw m4, filter_rnd %else @@ -1148,9 +1129,9 @@ SECTION .text pmullw m3, filter_x_b paddw m4, filter_rnd paddw m2, m1 - movx m1, [dstq] + movx m1, [refq] paddw m4, m3 - movx m3, [dstq+dst_strideq] + movx m3, [refq+ref_strideq] %endif psraw m2, 4 psraw m4, 4 @@ -1163,11 +1144,11 @@ SECTION .text %endif packuswb m0, m2 %if %1 > 4 - pavgb m0, [secq] + pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %else - movh m2, [secq] + movh m2, [second_predq] pavgb m0, m2 punpcklbw m0, m5 movhlps m2, m0 @@ -1179,10 +1160,10 @@ SECTION .text mova m0, m4 lea srcq, [srcq+src_strideq*2] - lea dstq, [dstq+dst_strideq*2] + lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif dec block_height jg .x_other_y_half_loop @@ -1192,12 +1173,12 @@ SECTION .text STORE_AND_RET %1 .x_nonhalf_y_nonhalf: -%ifdef PIC - lea bilin_filter, [bilin_filter_m] +%if VPX_ARCH_X86_64 + lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && %1 > 4 +%if VPX_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+x_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+x_offsetq+16] @@ -1206,14 +1187,14 @@ SECTION .text %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m11, [bilin_filter+y_offsetq+16] %endif - mova m12, [pw_8] + mova m12, [GLOBAL(pw_8)] %define filter_x_a m8 %define filter_x_b m9 %define filter_y_a m10 %define filter_y_b m11 %define filter_rnd m12 %else ; x86-32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; In this case, there is NO unused register. Used src_stride register. Later, ; src_stride has to be loaded from stack when it is needed. %define tempq src_strideq @@ -1234,7 +1215,7 @@ SECTION .text %define filter_x_b [x_offsetq+16] %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] -%define filter_rnd [pw_8] +%define filter_rnd [GLOBAL(pw_8)] %endif %endif @@ -1273,7 +1254,7 @@ SECTION .text %if cpuflag(ssse3) movu m4, [srcq] movu m3, [srcq+1] - mova m1, [dstq] + mova m1, [refq] punpckhbw m2, m4, m3 punpcklbw m4, m3 pmaddubsw m2, filter_x_a @@ -1319,7 +1300,7 @@ SECTION .text pmullw m0, filter_y_a pmullw m3, filter_y_b paddw m2, m1 - mova m1, [dstq] + mova m1, [refq] paddw m0, filter_rnd psraw m2, 4 paddw m0, m3 @@ -1330,7 +1311,7 @@ SECTION .text %if %2 == 1 ; avg ; FIXME(rbultje) pipeline packuswb m0, m2 - pavgb m0, [secq] + pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %endif @@ -1338,7 +1319,7 @@ SECTION .text mova m0, m4 INC_SRC_BY_SRC_STRIDE - add dstq, dst_strideq + add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m1, [srcq+1] @@ -1374,8 +1355,8 @@ SECTION .text punpcklbw m4, m3 pmaddubsw m2, filter_x_a pmaddubsw m4, filter_x_a - movx m3, [dstq+dst_strideq] - movx m1, [dstq] + movx m3, [refq+ref_strideq] + movx m1, [refq] paddw m2, filter_rnd paddw m4, filter_rnd psraw m2, 4 @@ -1414,9 +1395,9 @@ SECTION .text pmullw m1, m4, filter_y_b paddw m2, filter_rnd paddw m0, m3 - movx m3, [dstq+dst_strideq] + movx m3, [refq+ref_strideq] paddw m2, m1 - movx m1, [dstq] + movx m1, [refq] psraw m0, 4 psraw m2, 4 punpcklbw m3, m5 @@ -1429,11 +1410,11 @@ SECTION .text %endif packuswb m0, m2 %if %1 > 4 - pavgb m0, [secq] + pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %else - movh m2, [secq] + movh m2, [second_predq] pavgb m0, m2 punpcklbw m0, m5 movhlps m2, m0 @@ -1443,10 +1424,10 @@ SECTION .text mova m0, m4 INC_SRC_BY_SRC_STRIDE - lea dstq, [dstq+dst_strideq*2] + lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg - add secq, sec_str + add second_predq, second_str %endif dec block_height jg .x_other_y_other_loop diff --git a/media/libvpx/libvpx/vpx_dsp/x86/sum_squares_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/sum_squares_sse2.c index 026d0ca2f272..14f3b35c01a8 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/sum_squares_sse2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/sum_squares_sse2.c @@ -10,120 +10,96 @@ #include #include -#include #include "./vpx_dsp_rtcd.h" - -static uint64_t vpx_sum_squares_2d_i16_4x4_sse2(const int16_t *src, - int stride) { - const __m128i v_val_0_w = - _mm_loadl_epi64((const __m128i *)(src + 0 * stride)); - const __m128i v_val_1_w = - _mm_loadl_epi64((const __m128i *)(src + 1 * stride)); - const __m128i v_val_2_w = - _mm_loadl_epi64((const __m128i *)(src + 2 * stride)); - const __m128i v_val_3_w = - _mm_loadl_epi64((const __m128i *)(src + 3 * stride)); - - const __m128i v_sq_0_d = _mm_madd_epi16(v_val_0_w, v_val_0_w); - const __m128i v_sq_1_d = _mm_madd_epi16(v_val_1_w, v_val_1_w); - const __m128i v_sq_2_d = _mm_madd_epi16(v_val_2_w, v_val_2_w); - const __m128i v_sq_3_d = _mm_madd_epi16(v_val_3_w, v_val_3_w); - - const __m128i v_sum_01_d = _mm_add_epi32(v_sq_0_d, v_sq_1_d); - const __m128i v_sum_23_d = _mm_add_epi32(v_sq_2_d, v_sq_3_d); - const __m128i v_sum_0123_d = _mm_add_epi32(v_sum_01_d, v_sum_23_d); - - const __m128i v_sum_d = - _mm_add_epi32(v_sum_0123_d, _mm_srli_epi64(v_sum_0123_d, 32)); - - return (uint64_t)_mm_cvtsi128_si32(v_sum_d); -} - -// TODO(jingning): Evaluate the performance impact here. -#ifdef __GNUC__ -// This prevents GCC/Clang from inlining this function into -// vpx_sum_squares_2d_i16_sse2, which in turn saves some stack -// maintenance instructions in the common case of 4x4. -__attribute__((noinline)) -#endif -static uint64_t -vpx_sum_squares_2d_i16_nxn_sse2(const int16_t *src, int stride, int size) { - int r, c; - const __m128i v_zext_mask_q = _mm_set_epi32(0, 0xffffffff, 0, 0xffffffff); - __m128i v_acc_q = _mm_setzero_si128(); - - for (r = 0; r < size; r += 8) { - __m128i v_acc_d = _mm_setzero_si128(); - - for (c = 0; c < size; c += 8) { - const int16_t *b = src + c; - const __m128i v_val_0_w = - _mm_load_si128((const __m128i *)(b + 0 * stride)); - const __m128i v_val_1_w = - _mm_load_si128((const __m128i *)(b + 1 * stride)); - const __m128i v_val_2_w = - _mm_load_si128((const __m128i *)(b + 2 * stride)); - const __m128i v_val_3_w = - _mm_load_si128((const __m128i *)(b + 3 * stride)); - const __m128i v_val_4_w = - _mm_load_si128((const __m128i *)(b + 4 * stride)); - const __m128i v_val_5_w = - _mm_load_si128((const __m128i *)(b + 5 * stride)); - const __m128i v_val_6_w = - _mm_load_si128((const __m128i *)(b + 6 * stride)); - const __m128i v_val_7_w = - _mm_load_si128((const __m128i *)(b + 7 * stride)); - - const __m128i v_sq_0_d = _mm_madd_epi16(v_val_0_w, v_val_0_w); - const __m128i v_sq_1_d = _mm_madd_epi16(v_val_1_w, v_val_1_w); - const __m128i v_sq_2_d = _mm_madd_epi16(v_val_2_w, v_val_2_w); - const __m128i v_sq_3_d = _mm_madd_epi16(v_val_3_w, v_val_3_w); - const __m128i v_sq_4_d = _mm_madd_epi16(v_val_4_w, v_val_4_w); - const __m128i v_sq_5_d = _mm_madd_epi16(v_val_5_w, v_val_5_w); - const __m128i v_sq_6_d = _mm_madd_epi16(v_val_6_w, v_val_6_w); - const __m128i v_sq_7_d = _mm_madd_epi16(v_val_7_w, v_val_7_w); - - const __m128i v_sum_01_d = _mm_add_epi32(v_sq_0_d, v_sq_1_d); - const __m128i v_sum_23_d = _mm_add_epi32(v_sq_2_d, v_sq_3_d); - const __m128i v_sum_45_d = _mm_add_epi32(v_sq_4_d, v_sq_5_d); - const __m128i v_sum_67_d = _mm_add_epi32(v_sq_6_d, v_sq_7_d); - - const __m128i v_sum_0123_d = _mm_add_epi32(v_sum_01_d, v_sum_23_d); - const __m128i v_sum_4567_d = _mm_add_epi32(v_sum_45_d, v_sum_67_d); - - v_acc_d = _mm_add_epi32(v_acc_d, v_sum_0123_d); - v_acc_d = _mm_add_epi32(v_acc_d, v_sum_4567_d); - } - - v_acc_q = _mm_add_epi64(v_acc_q, _mm_and_si128(v_acc_d, v_zext_mask_q)); - v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_epi64(v_acc_d, 32)); - - src += 8 * stride; - } - - v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8)); - -#if ARCH_X86_64 - return (uint64_t)_mm_cvtsi128_si64(v_acc_q); -#else - { - uint64_t tmp; - _mm_storel_epi64((__m128i *)&tmp, v_acc_q); - return tmp; - } -#endif -} +#include "vpx_dsp/x86/mem_sse2.h" uint64_t vpx_sum_squares_2d_i16_sse2(const int16_t *src, int stride, int size) { - // 4 elements per row only requires half an XMM register, so this - // must be a special case, but also note that over 75% of all calls - // are with size == 4, so it is also the common case. + // Over 75% of all calls are with size == 4. if (size == 4) { - return vpx_sum_squares_2d_i16_4x4_sse2(src, stride); + __m128i s[2], sq[2], ss; + + s[0] = _mm_loadl_epi64((const __m128i *)(src + 0 * stride)); + s[0] = loadh_epi64(s[0], src + 1 * stride); + s[1] = _mm_loadl_epi64((const __m128i *)(src + 2 * stride)); + s[1] = loadh_epi64(s[1], src + 3 * stride); + sq[0] = _mm_madd_epi16(s[0], s[0]); + sq[1] = _mm_madd_epi16(s[1], s[1]); + sq[0] = _mm_add_epi32(sq[0], sq[1]); + ss = _mm_add_epi32(sq[0], _mm_srli_si128(sq[0], 8)); + ss = _mm_add_epi32(ss, _mm_srli_epi64(ss, 32)); + + return (uint64_t)_mm_cvtsi128_si32(ss); } else { // Generic case + int r = size; + const __m128i v_zext_mask_q = _mm_set_epi32(0, 0xffffffff, 0, 0xffffffff); + __m128i v_acc_q = _mm_setzero_si128(); + assert(size % 8 == 0); - return vpx_sum_squares_2d_i16_nxn_sse2(src, stride, size); + + do { + int c = 0; + __m128i v_acc_d = _mm_setzero_si128(); + + do { + const int16_t *const b = src + c; + const __m128i v_val_0_w = + _mm_load_si128((const __m128i *)(b + 0 * stride)); + const __m128i v_val_1_w = + _mm_load_si128((const __m128i *)(b + 1 * stride)); + const __m128i v_val_2_w = + _mm_load_si128((const __m128i *)(b + 2 * stride)); + const __m128i v_val_3_w = + _mm_load_si128((const __m128i *)(b + 3 * stride)); + const __m128i v_val_4_w = + _mm_load_si128((const __m128i *)(b + 4 * stride)); + const __m128i v_val_5_w = + _mm_load_si128((const __m128i *)(b + 5 * stride)); + const __m128i v_val_6_w = + _mm_load_si128((const __m128i *)(b + 6 * stride)); + const __m128i v_val_7_w = + _mm_load_si128((const __m128i *)(b + 7 * stride)); + + const __m128i v_sq_0_d = _mm_madd_epi16(v_val_0_w, v_val_0_w); + const __m128i v_sq_1_d = _mm_madd_epi16(v_val_1_w, v_val_1_w); + const __m128i v_sq_2_d = _mm_madd_epi16(v_val_2_w, v_val_2_w); + const __m128i v_sq_3_d = _mm_madd_epi16(v_val_3_w, v_val_3_w); + const __m128i v_sq_4_d = _mm_madd_epi16(v_val_4_w, v_val_4_w); + const __m128i v_sq_5_d = _mm_madd_epi16(v_val_5_w, v_val_5_w); + const __m128i v_sq_6_d = _mm_madd_epi16(v_val_6_w, v_val_6_w); + const __m128i v_sq_7_d = _mm_madd_epi16(v_val_7_w, v_val_7_w); + + const __m128i v_sum_01_d = _mm_add_epi32(v_sq_0_d, v_sq_1_d); + const __m128i v_sum_23_d = _mm_add_epi32(v_sq_2_d, v_sq_3_d); + const __m128i v_sum_45_d = _mm_add_epi32(v_sq_4_d, v_sq_5_d); + const __m128i v_sum_67_d = _mm_add_epi32(v_sq_6_d, v_sq_7_d); + + const __m128i v_sum_0123_d = _mm_add_epi32(v_sum_01_d, v_sum_23_d); + const __m128i v_sum_4567_d = _mm_add_epi32(v_sum_45_d, v_sum_67_d); + + v_acc_d = _mm_add_epi32(v_acc_d, v_sum_0123_d); + v_acc_d = _mm_add_epi32(v_acc_d, v_sum_4567_d); + c += 8; + } while (c < size); + + v_acc_q = _mm_add_epi64(v_acc_q, _mm_and_si128(v_acc_d, v_zext_mask_q)); + v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_epi64(v_acc_d, 32)); + + src += 8 * stride; + r -= 8; + } while (r); + + v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8)); + +#if VPX_ARCH_X86_64 + return (uint64_t)_mm_cvtsi128_si64(v_acc_q); +#else + { + uint64_t tmp; + _mm_storel_epi64((__m128i *)&tmp, v_acc_q); + return tmp; + } +#endif } } diff --git a/media/libvpx/libvpx/vpx_dsp/x86/transpose_sse2.h b/media/libvpx/libvpx/vpx_dsp/x86/transpose_sse2.h index 8a0119ca7e54..6e07871b1816 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/transpose_sse2.h +++ b/media/libvpx/libvpx/vpx_dsp/x86/transpose_sse2.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_X86_TRANSPOSE_SSE2_H_ -#define VPX_DSP_X86_TRANSPOSE_SSE2_H_ +#ifndef VPX_VPX_DSP_X86_TRANSPOSE_SSE2_H_ +#define VPX_VPX_DSP_X86_TRANSPOSE_SSE2_H_ #include // SSE2 @@ -364,4 +364,4 @@ static INLINE void transpose_32bit_8x4(const __m128i *const in, out[7] = _mm_unpackhi_epi64(a6, a7); } -#endif // VPX_DSP_X86_TRANSPOSE_SSE2_H_ +#endif // VPX_VPX_DSP_X86_TRANSPOSE_SSE2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/txfm_common_sse2.h b/media/libvpx/libvpx/vpx_dsp/x86/txfm_common_sse2.h index 0a9542c85b40..de5ce43b00a1 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/txfm_common_sse2.h +++ b/media/libvpx/libvpx/vpx_dsp/x86/txfm_common_sse2.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_DSP_X86_TXFM_COMMON_SSE2_H_ -#define VPX_DSP_X86_TXFM_COMMON_SSE2_H_ +#ifndef VPX_VPX_DSP_X86_TXFM_COMMON_SSE2_H_ +#define VPX_VPX_DSP_X86_TXFM_COMMON_SSE2_H_ #include #include "vpx/vpx_integer.h" @@ -29,4 +29,4 @@ _mm_setr_epi16((int16_t)(a), (int16_t)(b), (int16_t)(c), (int16_t)(d), \ (int16_t)(e), (int16_t)(f), (int16_t)(g), (int16_t)(h)) -#endif // VPX_DSP_X86_TXFM_COMMON_SSE2_H_ +#endif // VPX_VPX_DSP_X86_TXFM_COMMON_SSE2_H_ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/variance_avx2.c b/media/libvpx/libvpx/vpx_dsp/x86/variance_avx2.c index d15a89c746be..9232acbfbb30 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/variance_avx2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/variance_avx2.c @@ -38,130 +38,140 @@ DECLARE_ALIGNED(32, static const int8_t, adjacent_sub_avx2[32]) = { }; /* clang-format on */ -void vpx_get16x16var_avx2(const unsigned char *src_ptr, int source_stride, - const unsigned char *ref_ptr, int recon_stride, - unsigned int *sse, int *sum) { - unsigned int i, src_2strides, ref_2strides; - __m256i sum_reg = _mm256_setzero_si256(); - __m256i sse_reg = _mm256_setzero_si256(); - // process two 16 byte locations in a 256 bit register - src_2strides = source_stride << 1; - ref_2strides = recon_stride << 1; - for (i = 0; i < 8; ++i) { - // convert up values in 128 bit registers across lanes - const __m256i src0 = - _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const *)(src_ptr))); - const __m256i src1 = _mm256_cvtepu8_epi16( - _mm_loadu_si128((__m128i const *)(src_ptr + source_stride))); - const __m256i ref0 = - _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const *)(ref_ptr))); - const __m256i ref1 = _mm256_cvtepu8_epi16( - _mm_loadu_si128((__m128i const *)(ref_ptr + recon_stride))); - const __m256i diff0 = _mm256_sub_epi16(src0, ref0); - const __m256i diff1 = _mm256_sub_epi16(src1, ref1); - const __m256i madd0 = _mm256_madd_epi16(diff0, diff0); - const __m256i madd1 = _mm256_madd_epi16(diff1, diff1); +static INLINE void variance_kernel_avx2(const __m256i src, const __m256i ref, + __m256i *const sse, + __m256i *const sum) { + const __m256i adj_sub = _mm256_load_si256((__m256i const *)adjacent_sub_avx2); - // add to the running totals - sum_reg = _mm256_add_epi16(sum_reg, _mm256_add_epi16(diff0, diff1)); - sse_reg = _mm256_add_epi32(sse_reg, _mm256_add_epi32(madd0, madd1)); + // unpack into pairs of source and reference values + const __m256i src_ref0 = _mm256_unpacklo_epi8(src, ref); + const __m256i src_ref1 = _mm256_unpackhi_epi8(src, ref); - src_ptr += src_2strides; - ref_ptr += ref_2strides; - } - { - // extract the low lane and add it to the high lane - const __m128i sum_reg_128 = _mm_add_epi16( - _mm256_castsi256_si128(sum_reg), _mm256_extractf128_si256(sum_reg, 1)); - const __m128i sse_reg_128 = _mm_add_epi32( - _mm256_castsi256_si128(sse_reg), _mm256_extractf128_si256(sse_reg, 1)); + // subtract adjacent elements using src*1 + ref*-1 + const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub); + const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub); + const __m256i madd0 = _mm256_madd_epi16(diff0, diff0); + const __m256i madd1 = _mm256_madd_epi16(diff1, diff1); - // sum upper and lower 64 bits together and convert up to 32 bit values - const __m128i sum_reg_64 = - _mm_add_epi16(sum_reg_128, _mm_srli_si128(sum_reg_128, 8)); - const __m128i sum_int32 = _mm_cvtepi16_epi32(sum_reg_64); + // add to the running totals + *sum = _mm256_add_epi16(*sum, _mm256_add_epi16(diff0, diff1)); + *sse = _mm256_add_epi32(*sse, _mm256_add_epi32(madd0, madd1)); +} - // unpack sse and sum registers and add - const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, sum_int32); - const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, sum_int32); - const __m128i sse_sum = _mm_add_epi32(sse_sum_lo, sse_sum_hi); +static INLINE void variance_final_from_32bit_sum_avx2(__m256i vsse, + __m128i vsum, + unsigned int *const sse, + int *const sum) { + // extract the low lane and add it to the high lane + const __m128i sse_reg_128 = _mm_add_epi32(_mm256_castsi256_si128(vsse), + _mm256_extractf128_si256(vsse, 1)); - // perform the final summation and extract the results - const __m128i res = _mm_add_epi32(sse_sum, _mm_srli_si128(sse_sum, 8)); - *((int *)sse) = _mm_cvtsi128_si32(res); - *((int *)sum) = _mm_extract_epi32(res, 1); + // unpack sse and sum registers and add + const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, vsum); + const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, vsum); + const __m128i sse_sum = _mm_add_epi32(sse_sum_lo, sse_sum_hi); + + // perform the final summation and extract the results + const __m128i res = _mm_add_epi32(sse_sum, _mm_srli_si128(sse_sum, 8)); + *((int *)sse) = _mm_cvtsi128_si32(res); + *((int *)sum) = _mm_extract_epi32(res, 1); +} + +static INLINE void variance_final_from_16bit_sum_avx2(__m256i vsse, + __m256i vsum, + unsigned int *const sse, + int *const sum) { + // extract the low lane and add it to the high lane + const __m128i sum_reg_128 = _mm_add_epi16(_mm256_castsi256_si128(vsum), + _mm256_extractf128_si256(vsum, 1)); + const __m128i sum_reg_64 = + _mm_add_epi16(sum_reg_128, _mm_srli_si128(sum_reg_128, 8)); + const __m128i sum_int32 = _mm_cvtepi16_epi32(sum_reg_64); + + variance_final_from_32bit_sum_avx2(vsse, sum_int32, sse, sum); +} + +static INLINE __m256i sum_to_32bit_avx2(const __m256i sum) { + const __m256i sum_lo = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(sum)); + const __m256i sum_hi = + _mm256_cvtepi16_epi32(_mm256_extractf128_si256(sum, 1)); + return _mm256_add_epi32(sum_lo, sum_hi); +} + +static INLINE void variance16_kernel_avx2( + const uint8_t *const src, const int src_stride, const uint8_t *const ref, + const int ref_stride, __m256i *const sse, __m256i *const sum) { + const __m128i s0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride)); + const __m128i s1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride)); + const __m128i r0 = _mm_loadu_si128((__m128i const *)(ref + 0 * ref_stride)); + const __m128i r1 = _mm_loadu_si128((__m128i const *)(ref + 1 * ref_stride)); + const __m256i s = _mm256_inserti128_si256(_mm256_castsi128_si256(s0), s1, 1); + const __m256i r = _mm256_inserti128_si256(_mm256_castsi128_si256(r0), r1, 1); + variance_kernel_avx2(s, r, sse, sum); +} + +static INLINE void variance32_kernel_avx2(const uint8_t *const src, + const uint8_t *const ref, + __m256i *const sse, + __m256i *const sum) { + const __m256i s = _mm256_loadu_si256((__m256i const *)(src)); + const __m256i r = _mm256_loadu_si256((__m256i const *)(ref)); + variance_kernel_avx2(s, r, sse, sum); +} + +static INLINE void variance16_avx2(const uint8_t *src, const int src_stride, + const uint8_t *ref, const int ref_stride, + const int h, __m256i *const vsse, + __m256i *const vsum) { + int i; + *vsum = _mm256_setzero_si256(); + *vsse = _mm256_setzero_si256(); + + for (i = 0; i < h; i += 2) { + variance16_kernel_avx2(src, src_stride, ref, ref_stride, vsse, vsum); + src += 2 * src_stride; + ref += 2 * ref_stride; } } -static void get32x16var_avx2(const unsigned char *src_ptr, int source_stride, - const unsigned char *ref_ptr, int recon_stride, - unsigned int *sse, int *sum) { - unsigned int i, src_2strides, ref_2strides; - const __m256i adj_sub = _mm256_load_si256((__m256i const *)adjacent_sub_avx2); - __m256i sum_reg = _mm256_setzero_si256(); - __m256i sse_reg = _mm256_setzero_si256(); +static INLINE void variance32_avx2(const uint8_t *src, const int src_stride, + const uint8_t *ref, const int ref_stride, + const int h, __m256i *const vsse, + __m256i *const vsum) { + int i; + *vsum = _mm256_setzero_si256(); + *vsse = _mm256_setzero_si256(); - // process 64 elements in an iteration - src_2strides = source_stride << 1; - ref_2strides = recon_stride << 1; - for (i = 0; i < 8; i++) { - const __m256i src0 = _mm256_loadu_si256((__m256i const *)(src_ptr)); - const __m256i src1 = - _mm256_loadu_si256((__m256i const *)(src_ptr + source_stride)); - const __m256i ref0 = _mm256_loadu_si256((__m256i const *)(ref_ptr)); - const __m256i ref1 = - _mm256_loadu_si256((__m256i const *)(ref_ptr + recon_stride)); - - // unpack into pairs of source and reference values - const __m256i src_ref0 = _mm256_unpacklo_epi8(src0, ref0); - const __m256i src_ref1 = _mm256_unpackhi_epi8(src0, ref0); - const __m256i src_ref2 = _mm256_unpacklo_epi8(src1, ref1); - const __m256i src_ref3 = _mm256_unpackhi_epi8(src1, ref1); - - // subtract adjacent elements using src*1 + ref*-1 - const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub); - const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub); - const __m256i diff2 = _mm256_maddubs_epi16(src_ref2, adj_sub); - const __m256i diff3 = _mm256_maddubs_epi16(src_ref3, adj_sub); - const __m256i madd0 = _mm256_madd_epi16(diff0, diff0); - const __m256i madd1 = _mm256_madd_epi16(diff1, diff1); - const __m256i madd2 = _mm256_madd_epi16(diff2, diff2); - const __m256i madd3 = _mm256_madd_epi16(diff3, diff3); - - // add to the running totals - sum_reg = _mm256_add_epi16(sum_reg, _mm256_add_epi16(diff0, diff1)); - sum_reg = _mm256_add_epi16(sum_reg, _mm256_add_epi16(diff2, diff3)); - sse_reg = _mm256_add_epi32(sse_reg, _mm256_add_epi32(madd0, madd1)); - sse_reg = _mm256_add_epi32(sse_reg, _mm256_add_epi32(madd2, madd3)); - - src_ptr += src_2strides; - ref_ptr += ref_2strides; + for (i = 0; i < h; i++) { + variance32_kernel_avx2(src, ref, vsse, vsum); + src += src_stride; + ref += ref_stride; } +} - { - // extract the low lane and add it to the high lane - const __m128i sum_reg_128 = _mm_add_epi16( - _mm256_castsi256_si128(sum_reg), _mm256_extractf128_si256(sum_reg, 1)); - const __m128i sse_reg_128 = _mm_add_epi32( - _mm256_castsi256_si128(sse_reg), _mm256_extractf128_si256(sse_reg, 1)); +static INLINE void variance64_avx2(const uint8_t *src, const int src_stride, + const uint8_t *ref, const int ref_stride, + const int h, __m256i *const vsse, + __m256i *const vsum) { + int i; + *vsum = _mm256_setzero_si256(); - // sum upper and lower 64 bits together and convert up to 32 bit values - const __m128i sum_reg_64 = - _mm_add_epi16(sum_reg_128, _mm_srli_si128(sum_reg_128, 8)); - const __m128i sum_int32 = _mm_cvtepi16_epi32(sum_reg_64); - - // unpack sse and sum registers and add - const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, sum_int32); - const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, sum_int32); - const __m128i sse_sum = _mm_add_epi32(sse_sum_lo, sse_sum_hi); - - // perform the final summation and extract the results - const __m128i res = _mm_add_epi32(sse_sum, _mm_srli_si128(sse_sum, 8)); - *((int *)sse) = _mm_cvtsi128_si32(res); - *((int *)sum) = _mm_extract_epi32(res, 1); + for (i = 0; i < h; i++) { + variance32_kernel_avx2(src + 0, ref + 0, vsse, vsum); + variance32_kernel_avx2(src + 32, ref + 32, vsse, vsum); + src += src_stride; + ref += ref_stride; } } +void vpx_get16x16var_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + __m256i vsse, vsum; + variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); + variance_final_from_16bit_sum_avx2(vsse, vsum, sse, sum); +} + #define FILTER_SRC(filter) \ /* filter the source */ \ exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter); \ @@ -214,8 +224,9 @@ static void get32x16var_avx2(const unsigned char *src_ptr, int source_stride, static INLINE void spv32_x0_y0(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, - const uint8_t *sec, int sec_stride, int do_sec, - int height, __m256i *sum_reg, __m256i *sse_reg) { + const uint8_t *second_pred, int second_stride, + int do_sec, int height, __m256i *sum_reg, + __m256i *sse_reg) { const __m256i zero_reg = _mm256_setzero_si256(); __m256i exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi; int i; @@ -223,11 +234,11 @@ static INLINE void spv32_x0_y0(const uint8_t *src, int src_stride, const __m256i dst_reg = _mm256_loadu_si256((__m256i const *)dst); const __m256i src_reg = _mm256_loadu_si256((__m256i const *)src); if (do_sec) { - const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)sec); + const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred); const __m256i avg_reg = _mm256_avg_epu8(src_reg, sec_reg); exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg); - sec += sec_stride; + second_pred += second_stride; } else { exp_src_lo = _mm256_unpacklo_epi8(src_reg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(src_reg, zero_reg); @@ -241,9 +252,10 @@ static INLINE void spv32_x0_y0(const uint8_t *src, int src_stride, // (x == 0, y == 4) or (x == 4, y == 0). sstep determines the direction. static INLINE void spv32_half_zero(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, - const uint8_t *sec, int sec_stride, - int do_sec, int height, __m256i *sum_reg, - __m256i *sse_reg, int sstep) { + const uint8_t *second_pred, + int second_stride, int do_sec, int height, + __m256i *sum_reg, __m256i *sse_reg, + int sstep) { const __m256i zero_reg = _mm256_setzero_si256(); __m256i exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi; int i; @@ -253,11 +265,11 @@ static INLINE void spv32_half_zero(const uint8_t *src, int src_stride, const __m256i src_1 = _mm256_loadu_si256((__m256i const *)(src + sstep)); const __m256i src_avg = _mm256_avg_epu8(src_0, src_1); if (do_sec) { - const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)sec); + const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred); const __m256i avg_reg = _mm256_avg_epu8(src_avg, sec_reg); exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg); - sec += sec_stride; + second_pred += second_stride; } else { exp_src_lo = _mm256_unpacklo_epi8(src_avg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(src_avg, zero_reg); @@ -270,24 +282,27 @@ static INLINE void spv32_half_zero(const uint8_t *src, int src_stride, static INLINE void spv32_x0_y4(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, - const uint8_t *sec, int sec_stride, int do_sec, - int height, __m256i *sum_reg, __m256i *sse_reg) { - spv32_half_zero(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec, - height, sum_reg, sse_reg, src_stride); + const uint8_t *second_pred, int second_stride, + int do_sec, int height, __m256i *sum_reg, + __m256i *sse_reg) { + spv32_half_zero(src, src_stride, dst, dst_stride, second_pred, second_stride, + do_sec, height, sum_reg, sse_reg, src_stride); } static INLINE void spv32_x4_y0(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, - const uint8_t *sec, int sec_stride, int do_sec, - int height, __m256i *sum_reg, __m256i *sse_reg) { - spv32_half_zero(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec, - height, sum_reg, sse_reg, 1); + const uint8_t *second_pred, int second_stride, + int do_sec, int height, __m256i *sum_reg, + __m256i *sse_reg) { + spv32_half_zero(src, src_stride, dst, dst_stride, second_pred, second_stride, + do_sec, height, sum_reg, sse_reg, 1); } static INLINE void spv32_x4_y4(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, - const uint8_t *sec, int sec_stride, int do_sec, - int height, __m256i *sum_reg, __m256i *sse_reg) { + const uint8_t *second_pred, int second_stride, + int do_sec, int height, __m256i *sum_reg, + __m256i *sse_reg) { const __m256i zero_reg = _mm256_setzero_si256(); const __m256i src_a = _mm256_loadu_si256((__m256i const *)src); const __m256i src_b = _mm256_loadu_si256((__m256i const *)(src + 1)); @@ -304,11 +319,11 @@ static INLINE void spv32_x4_y4(const uint8_t *src, int src_stride, prev_src_avg = src_avg; if (do_sec) { - const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)sec); + const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred); const __m256i avg_reg = _mm256_avg_epu8(current_avg, sec_reg); exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg); - sec += sec_stride; + second_pred += second_stride; } else { exp_src_lo = _mm256_unpacklo_epi8(current_avg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(current_avg, zero_reg); @@ -323,9 +338,10 @@ static INLINE void spv32_x4_y4(const uint8_t *src, int src_stride, // (x == 0, y == bil) or (x == 4, y == bil). sstep determines the direction. static INLINE void spv32_bilin_zero(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, - const uint8_t *sec, int sec_stride, - int do_sec, int height, __m256i *sum_reg, - __m256i *sse_reg, int offset, int sstep) { + const uint8_t *second_pred, + int second_stride, int do_sec, int height, + __m256i *sum_reg, __m256i *sse_reg, + int offset, int sstep) { const __m256i zero_reg = _mm256_setzero_si256(); const __m256i pw8 = _mm256_set1_epi16(8); const __m256i filter = _mm256_load_si256( @@ -341,10 +357,10 @@ static INLINE void spv32_bilin_zero(const uint8_t *src, int src_stride, FILTER_SRC(filter) if (do_sec) { - const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)sec); + const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred); const __m256i exp_src = _mm256_packus_epi16(exp_src_lo, exp_src_hi); const __m256i avg_reg = _mm256_avg_epu8(exp_src, sec_reg); - sec += sec_stride; + second_pred += second_stride; exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg); } @@ -356,27 +372,27 @@ static INLINE void spv32_bilin_zero(const uint8_t *src, int src_stride, static INLINE void spv32_x0_yb(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, - const uint8_t *sec, int sec_stride, int do_sec, - int height, __m256i *sum_reg, __m256i *sse_reg, - int y_offset) { - spv32_bilin_zero(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec, - height, sum_reg, sse_reg, y_offset, src_stride); + const uint8_t *second_pred, int second_stride, + int do_sec, int height, __m256i *sum_reg, + __m256i *sse_reg, int y_offset) { + spv32_bilin_zero(src, src_stride, dst, dst_stride, second_pred, second_stride, + do_sec, height, sum_reg, sse_reg, y_offset, src_stride); } static INLINE void spv32_xb_y0(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, - const uint8_t *sec, int sec_stride, int do_sec, - int height, __m256i *sum_reg, __m256i *sse_reg, - int x_offset) { - spv32_bilin_zero(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec, - height, sum_reg, sse_reg, x_offset, 1); + const uint8_t *second_pred, int second_stride, + int do_sec, int height, __m256i *sum_reg, + __m256i *sse_reg, int x_offset) { + spv32_bilin_zero(src, src_stride, dst, dst_stride, second_pred, second_stride, + do_sec, height, sum_reg, sse_reg, x_offset, 1); } static INLINE void spv32_x4_yb(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, - const uint8_t *sec, int sec_stride, int do_sec, - int height, __m256i *sum_reg, __m256i *sse_reg, - int y_offset) { + const uint8_t *second_pred, int second_stride, + int do_sec, int height, __m256i *sum_reg, + __m256i *sse_reg, int y_offset) { const __m256i zero_reg = _mm256_setzero_si256(); const __m256i pw8 = _mm256_set1_epi16(8); const __m256i filter = _mm256_load_si256( @@ -398,12 +414,12 @@ static INLINE void spv32_x4_yb(const uint8_t *src, int src_stride, FILTER_SRC(filter) if (do_sec) { - const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)sec); + const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred); const __m256i exp_src_avg = _mm256_packus_epi16(exp_src_lo, exp_src_hi); const __m256i avg_reg = _mm256_avg_epu8(exp_src_avg, sec_reg); exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg); - sec += sec_stride; + second_pred += second_stride; } CALC_SUM_SSE_INSIDE_LOOP dst += dst_stride; @@ -413,9 +429,9 @@ static INLINE void spv32_x4_yb(const uint8_t *src, int src_stride, static INLINE void spv32_xb_y4(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, - const uint8_t *sec, int sec_stride, int do_sec, - int height, __m256i *sum_reg, __m256i *sse_reg, - int x_offset) { + const uint8_t *second_pred, int second_stride, + int do_sec, int height, __m256i *sum_reg, + __m256i *sse_reg, int x_offset) { const __m256i zero_reg = _mm256_setzero_si256(); const __m256i pw8 = _mm256_set1_epi16(8); const __m256i filter = _mm256_load_si256( @@ -446,11 +462,11 @@ static INLINE void spv32_xb_y4(const uint8_t *src, int src_stride, src_pack = _mm256_avg_epu8(src_pack, src_reg); if (do_sec) { - const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)sec); + const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred); const __m256i avg_pack = _mm256_avg_epu8(src_pack, sec_reg); exp_src_lo = _mm256_unpacklo_epi8(avg_pack, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(avg_pack, zero_reg); - sec += sec_stride; + second_pred += second_stride; } else { exp_src_lo = _mm256_unpacklo_epi8(src_pack, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(src_pack, zero_reg); @@ -464,9 +480,9 @@ static INLINE void spv32_xb_y4(const uint8_t *src, int src_stride, static INLINE void spv32_xb_yb(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, - const uint8_t *sec, int sec_stride, int do_sec, - int height, __m256i *sum_reg, __m256i *sse_reg, - int x_offset, int y_offset) { + const uint8_t *second_pred, int second_stride, + int do_sec, int height, __m256i *sum_reg, + __m256i *sse_reg, int x_offset, int y_offset) { const __m256i zero_reg = _mm256_setzero_si256(); const __m256i pw8 = _mm256_set1_epi16(8); const __m256i xfilter = _mm256_load_si256( @@ -501,12 +517,12 @@ static INLINE void spv32_xb_yb(const uint8_t *src, int src_stride, FILTER_SRC(yfilter) if (do_sec) { - const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)sec); + const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred); const __m256i exp_src = _mm256_packus_epi16(exp_src_lo, exp_src_hi); const __m256i avg_reg = _mm256_avg_epu8(exp_src, sec_reg); exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg); - sec += sec_stride; + second_pred += second_stride; } prev_src_pack = src_pack; @@ -520,7 +536,7 @@ static INLINE void spv32_xb_yb(const uint8_t *src, int src_stride, static INLINE int sub_pix_var32xh(const uint8_t *src, int src_stride, int x_offset, int y_offset, const uint8_t *dst, int dst_stride, - const uint8_t *sec, int sec_stride, + const uint8_t *second_pred, int second_stride, int do_sec, int height, unsigned int *sse) { const __m256i zero_reg = _mm256_setzero_si256(); __m256i sum_reg = _mm256_setzero_si256(); @@ -530,44 +546,44 @@ static INLINE int sub_pix_var32xh(const uint8_t *src, int src_stride, // x_offset = 0 and y_offset = 0 if (x_offset == 0) { if (y_offset == 0) { - spv32_x0_y0(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec, - height, &sum_reg, &sse_reg); + spv32_x0_y0(src, src_stride, dst, dst_stride, second_pred, second_stride, + do_sec, height, &sum_reg, &sse_reg); // x_offset = 0 and y_offset = 4 } else if (y_offset == 4) { - spv32_x0_y4(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec, - height, &sum_reg, &sse_reg); + spv32_x0_y4(src, src_stride, dst, dst_stride, second_pred, second_stride, + do_sec, height, &sum_reg, &sse_reg); // x_offset = 0 and y_offset = bilin interpolation } else { - spv32_x0_yb(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec, - height, &sum_reg, &sse_reg, y_offset); + spv32_x0_yb(src, src_stride, dst, dst_stride, second_pred, second_stride, + do_sec, height, &sum_reg, &sse_reg, y_offset); } // x_offset = 4 and y_offset = 0 } else if (x_offset == 4) { if (y_offset == 0) { - spv32_x4_y0(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec, - height, &sum_reg, &sse_reg); + spv32_x4_y0(src, src_stride, dst, dst_stride, second_pred, second_stride, + do_sec, height, &sum_reg, &sse_reg); // x_offset = 4 and y_offset = 4 } else if (y_offset == 4) { - spv32_x4_y4(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec, - height, &sum_reg, &sse_reg); + spv32_x4_y4(src, src_stride, dst, dst_stride, second_pred, second_stride, + do_sec, height, &sum_reg, &sse_reg); // x_offset = 4 and y_offset = bilin interpolation } else { - spv32_x4_yb(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec, - height, &sum_reg, &sse_reg, y_offset); + spv32_x4_yb(src, src_stride, dst, dst_stride, second_pred, second_stride, + do_sec, height, &sum_reg, &sse_reg, y_offset); } // x_offset = bilin interpolation and y_offset = 0 } else { if (y_offset == 0) { - spv32_xb_y0(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec, - height, &sum_reg, &sse_reg, x_offset); + spv32_xb_y0(src, src_stride, dst, dst_stride, second_pred, second_stride, + do_sec, height, &sum_reg, &sse_reg, x_offset); // x_offset = bilin interpolation and y_offset = 4 } else if (y_offset == 4) { - spv32_xb_y4(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec, - height, &sum_reg, &sse_reg, x_offset); + spv32_xb_y4(src, src_stride, dst, dst_stride, second_pred, second_stride, + do_sec, height, &sum_reg, &sse_reg, x_offset); // x_offset = bilin interpolation and y_offset = bilin interpolation } else { - spv32_xb_yb(src, src_stride, dst, dst_stride, sec, sec_stride, do_sec, - height, &sum_reg, &sse_reg, x_offset, y_offset); + spv32_xb_yb(src, src_stride, dst, dst_stride, second_pred, second_stride, + do_sec, height, &sum_reg, &sse_reg, x_offset, y_offset); } } CALC_SUM_AND_SSE @@ -583,127 +599,177 @@ static unsigned int sub_pixel_variance32xh_avx2( static unsigned int sub_pixel_avg_variance32xh_avx2( const uint8_t *src, int src_stride, int x_offset, int y_offset, - const uint8_t *dst, int dst_stride, const uint8_t *sec, int sec_stride, - int height, unsigned int *sse) { + const uint8_t *dst, int dst_stride, const uint8_t *second_pred, + int second_stride, int height, unsigned int *sse) { return sub_pix_var32xh(src, src_stride, x_offset, y_offset, dst, dst_stride, - sec, sec_stride, 1, height, sse); + second_pred, second_stride, 1, height, sse); } -typedef void (*get_var_avx2)(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +typedef void (*get_var_avx2)(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); -static void variance_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, int w, int h, - unsigned int *sse, int *sum, get_var_avx2 var_fn, - int block_size) { - int i, j; - - *sse = 0; - *sum = 0; - - for (i = 0; i < h; i += 16) { - for (j = 0; j < w; j += block_size) { - unsigned int sse0; - int sum0; - var_fn(&src[src_stride * i + j], src_stride, &ref[ref_stride * i + j], - ref_stride, &sse0, &sum0); - *sse += sse0; - *sum += sum0; - } - } +unsigned int vpx_variance16x8_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse) { + int sum; + __m256i vsse, vsum; + variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum); + variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); + return *sse - (uint32_t)(((int64_t)sum * sum) >> 7); } -unsigned int vpx_variance16x16_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum; - variance_avx2(src, src_stride, ref, ref_stride, 16, 16, sse, &sum, - vpx_get16x16var_avx2, 16); + __m256i vsse, vsum; + variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); + variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 8); } -unsigned int vpx_mse16x16_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - vpx_get16x16var_avx2(src, src_stride, ref, ref_stride, sse, &sum); - return *sse; -} - -unsigned int vpx_variance32x16_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +unsigned int vpx_variance16x32_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum; - variance_avx2(src, src_stride, ref, ref_stride, 32, 16, sse, &sum, - get32x16var_avx2, 32); + __m256i vsse, vsum; + variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum); + variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 9); } -unsigned int vpx_variance32x32_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum; - variance_avx2(src, src_stride, ref, ref_stride, 32, 32, sse, &sum, - get32x16var_avx2, 32); + __m256i vsse, vsum; + variance32_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); + variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); + return *sse - (uint32_t)(((int64_t)sum * sum) >> 9); +} + +unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse) { + int sum; + __m256i vsse, vsum; + __m128i vsum_128; + variance32_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum); + vsum_128 = _mm_add_epi16(_mm256_castsi256_si128(vsum), + _mm256_extractf128_si256(vsum, 1)); + vsum_128 = _mm_add_epi32(_mm_cvtepi16_epi32(vsum_128), + _mm_cvtepi16_epi32(_mm_srli_si128(vsum_128, 8))); + variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 10); } -unsigned int vpx_variance64x64_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +unsigned int vpx_variance32x64_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum; - variance_avx2(src, src_stride, ref, ref_stride, 64, 64, sse, &sum, - get32x16var_avx2, 32); - return *sse - (uint32_t)(((int64_t)sum * sum) >> 12); -} - -unsigned int vpx_variance64x32_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_avx2(src, src_stride, ref, ref_stride, 64, 32, sse, &sum, - get32x16var_avx2, 32); + __m256i vsse, vsum; + __m128i vsum_128; + variance32_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 64, &vsse, &vsum); + vsum = sum_to_32bit_avx2(vsum); + vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum), + _mm256_extractf128_si256(vsum, 1)); + variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 11); } -unsigned int vpx_sub_pixel_variance64x64_avx2(const uint8_t *src, - int src_stride, int x_offset, - int y_offset, const uint8_t *dst, - int dst_stride, - unsigned int *sse) { +unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse) { + __m256i vsse = _mm256_setzero_si256(); + __m256i vsum = _mm256_setzero_si256(); + __m128i vsum_128; + int sum; + variance64_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum); + vsum = sum_to_32bit_avx2(vsum); + vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum), + _mm256_extractf128_si256(vsum, 1)); + variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum); + return *sse - (uint32_t)(((int64_t)sum * sum) >> 11); +} + +unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse) { + __m256i vsse = _mm256_setzero_si256(); + __m256i vsum = _mm256_setzero_si256(); + __m128i vsum_128; + int sum; + int i = 0; + + for (i = 0; i < 2; i++) { + __m256i vsum16; + variance64_avx2(src_ptr + 32 * i * src_stride, src_stride, + ref_ptr + 32 * i * ref_stride, ref_stride, 32, &vsse, + &vsum16); + vsum = _mm256_add_epi32(vsum, sum_to_32bit_avx2(vsum16)); + } + vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum), + _mm256_extractf128_si256(vsum, 1)); + variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum); + return *sse - (unsigned int)(((int64_t)sum * sum) >> 12); +} + +unsigned int vpx_mse16x8_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse) { + int sum; + __m256i vsse, vsum; + variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum); + variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); + return *sse; +} + +unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse) { + int sum; + __m256i vsse, vsum; + variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); + variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); + return *sse; +} + +unsigned int vpx_sub_pixel_variance64x64_avx2( + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { unsigned int sse1; const int se1 = sub_pixel_variance32xh_avx2( - src, src_stride, x_offset, y_offset, dst, dst_stride, 64, &sse1); + src_ptr, src_stride, x_offset, y_offset, ref_ptr, ref_stride, 64, &sse1); unsigned int sse2; const int se2 = - sub_pixel_variance32xh_avx2(src + 32, src_stride, x_offset, y_offset, - dst + 32, dst_stride, 64, &sse2); + sub_pixel_variance32xh_avx2(src_ptr + 32, src_stride, x_offset, y_offset, + ref_ptr + 32, ref_stride, 64, &sse2); const int se = se1 + se2; *sse = sse1 + sse2; return *sse - (uint32_t)(((int64_t)se * se) >> 12); } -unsigned int vpx_sub_pixel_variance32x32_avx2(const uint8_t *src, - int src_stride, int x_offset, - int y_offset, const uint8_t *dst, - int dst_stride, - unsigned int *sse) { +unsigned int vpx_sub_pixel_variance32x32_avx2( + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { const int se = sub_pixel_variance32xh_avx2( - src, src_stride, x_offset, y_offset, dst, dst_stride, 32, sse); + src_ptr, src_stride, x_offset, y_offset, ref_ptr, ref_stride, 32, sse); return *sse - (uint32_t)(((int64_t)se * se) >> 10); } unsigned int vpx_sub_pixel_avg_variance64x64_avx2( - const uint8_t *src, int src_stride, int x_offset, int y_offset, - const uint8_t *dst, int dst_stride, unsigned int *sse, const uint8_t *sec) { + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, + const uint8_t *second_pred) { unsigned int sse1; - const int se1 = sub_pixel_avg_variance32xh_avx2( - src, src_stride, x_offset, y_offset, dst, dst_stride, sec, 64, 64, &sse1); + const int se1 = sub_pixel_avg_variance32xh_avx2(src_ptr, src_stride, x_offset, + y_offset, ref_ptr, ref_stride, + second_pred, 64, 64, &sse1); unsigned int sse2; const int se2 = sub_pixel_avg_variance32xh_avx2( - src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, sec + 32, - 64, 64, &sse2); + src_ptr + 32, src_stride, x_offset, y_offset, ref_ptr + 32, ref_stride, + second_pred + 32, 64, 64, &sse2); const int se = se1 + se2; *sse = sse1 + sse2; @@ -712,10 +778,12 @@ unsigned int vpx_sub_pixel_avg_variance64x64_avx2( } unsigned int vpx_sub_pixel_avg_variance32x32_avx2( - const uint8_t *src, int src_stride, int x_offset, int y_offset, - const uint8_t *dst, int dst_stride, unsigned int *sse, const uint8_t *sec) { + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, + const uint8_t *second_pred) { // Process 32 elements in parallel. - const int se = sub_pixel_avg_variance32xh_avx2( - src, src_stride, x_offset, y_offset, dst, dst_stride, sec, 32, 32, sse); + const int se = sub_pixel_avg_variance32xh_avx2(src_ptr, src_stride, x_offset, + y_offset, ref_ptr, ref_stride, + second_pred, 32, 32, sse); return *sse - (uint32_t)(((int64_t)se * se) >> 10); } diff --git a/media/libvpx/libvpx/vpx_dsp/x86/variance_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/variance_sse2.c index 8d8bf183b28f..37ef64ecaa09 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/variance_sse2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/variance_sse2.c @@ -8,312 +8,426 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include // SSE2 #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" - #include "vpx_ports/mem.h" +#include "vpx_dsp/x86/mem_sse2.h" -typedef void (*getNxMvar_fn_t)(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, - unsigned int *sse, int *sum); +static INLINE unsigned int add32x4_sse2(__m128i val) { + val = _mm_add_epi32(val, _mm_srli_si128(val, 8)); + val = _mm_add_epi32(val, _mm_srli_si128(val, 4)); + return _mm_cvtsi128_si32(val); +} -unsigned int vpx_get_mb_ss_sse2(const int16_t *src) { +unsigned int vpx_get_mb_ss_sse2(const int16_t *src_ptr) { __m128i vsum = _mm_setzero_si128(); int i; for (i = 0; i < 32; ++i) { - const __m128i v = _mm_loadu_si128((const __m128i *)src); + const __m128i v = _mm_loadu_si128((const __m128i *)src_ptr); vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v)); - src += 8; + src_ptr += 8; } - vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8)); - vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4)); - return _mm_cvtsi128_si32(vsum); + return add32x4_sse2(vsum); } -#define READ64(p, stride, i) \ - _mm_unpacklo_epi8( \ - _mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \ - _mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride))) +static INLINE __m128i load4x2_sse2(const uint8_t *const p, const int stride) { + const __m128i p0 = _mm_cvtsi32_si128(loadu_uint32(p + 0 * stride)); + const __m128i p1 = _mm_cvtsi32_si128(loadu_uint32(p + 1 * stride)); + const __m128i p01 = _mm_unpacklo_epi32(p0, p1); + return _mm_unpacklo_epi8(p01, _mm_setzero_si128()); +} -static void get4x4var_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum) { - const __m128i zero = _mm_setzero_si128(); - const __m128i src0 = _mm_unpacklo_epi8(READ64(src, src_stride, 0), zero); - const __m128i src1 = _mm_unpacklo_epi8(READ64(src, src_stride, 2), zero); - const __m128i ref0 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 0), zero); - const __m128i ref1 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 2), zero); - const __m128i diff0 = _mm_sub_epi16(src0, ref0); - const __m128i diff1 = _mm_sub_epi16(src1, ref1); +static INLINE void variance_kernel_sse2(const __m128i src_ptr, + const __m128i ref_ptr, + __m128i *const sse, + __m128i *const sum) { + const __m128i diff = _mm_sub_epi16(src_ptr, ref_ptr); + *sse = _mm_add_epi32(*sse, _mm_madd_epi16(diff, diff)); + *sum = _mm_add_epi16(*sum, diff); +} + +// Can handle 128 pixels' diff sum (such as 8x16 or 16x8) +// Slightly faster than variance_final_256_pel_sse2() +static INLINE void variance_final_128_pel_sse2(__m128i vsse, __m128i vsum, + unsigned int *const sse, + int *const sum) { + *sse = add32x4_sse2(vsse); - // sum - __m128i vsum = _mm_add_epi16(diff0, diff1); vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); *sum = (int16_t)_mm_extract_epi16(vsum, 0); - - // sse - vsum = - _mm_add_epi32(_mm_madd_epi16(diff0, diff0), _mm_madd_epi16(diff1, diff1)); - vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8)); - vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4)); - *sse = _mm_cvtsi128_si32(vsum); } -void vpx_get8x8var_sse2(const uint8_t *src, int src_stride, const uint8_t *ref, - int ref_stride, unsigned int *sse, int *sum) { - const __m128i zero = _mm_setzero_si128(); - __m128i vsum = _mm_setzero_si128(); - __m128i vsse = _mm_setzero_si128(); - int i; +// Can handle 256 pixels' diff sum (such as 16x16) +static INLINE void variance_final_256_pel_sse2(__m128i vsse, __m128i vsum, + unsigned int *const sse, + int *const sum) { + *sse = add32x4_sse2(vsse); - for (i = 0; i < 8; i += 2) { - const __m128i src0 = _mm_unpacklo_epi8( - _mm_loadl_epi64((const __m128i *)(src + i * src_stride)), zero); - const __m128i ref0 = _mm_unpacklo_epi8( - _mm_loadl_epi64((const __m128i *)(ref + i * ref_stride)), zero); - const __m128i diff0 = _mm_sub_epi16(src0, ref0); - - const __m128i src1 = _mm_unpacklo_epi8( - _mm_loadl_epi64((const __m128i *)(src + (i + 1) * src_stride)), zero); - const __m128i ref1 = _mm_unpacklo_epi8( - _mm_loadl_epi64((const __m128i *)(ref + (i + 1) * ref_stride)), zero); - const __m128i diff1 = _mm_sub_epi16(src1, ref1); - - vsum = _mm_add_epi16(vsum, diff0); - vsum = _mm_add_epi16(vsum, diff1); - vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0)); - vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1)); - } - - // sum vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); *sum = (int16_t)_mm_extract_epi16(vsum, 0); - - // sse - vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8)); - vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4)); - *sse = _mm_cvtsi128_si32(vsse); + *sum += (int16_t)_mm_extract_epi16(vsum, 1); } -void vpx_get16x16var_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, unsigned int *sse, - int *sum) { - const __m128i zero = _mm_setzero_si128(); - __m128i vsum = _mm_setzero_si128(); - __m128i vsse = _mm_setzero_si128(); +// Can handle 512 pixels' diff sum (such as 16x32 or 32x16) +static INLINE void variance_final_512_pel_sse2(__m128i vsse, __m128i vsum, + unsigned int *const sse, + int *const sum) { + *sse = add32x4_sse2(vsse); + + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); + vsum = _mm_unpacklo_epi16(vsum, vsum); + vsum = _mm_srai_epi32(vsum, 16); + *sum = add32x4_sse2(vsum); +} + +static INLINE __m128i sum_to_32bit_sse2(const __m128i sum) { + const __m128i sum_lo = _mm_srai_epi32(_mm_unpacklo_epi16(sum, sum), 16); + const __m128i sum_hi = _mm_srai_epi32(_mm_unpackhi_epi16(sum, sum), 16); + return _mm_add_epi32(sum_lo, sum_hi); +} + +// Can handle 1024 pixels' diff sum (such as 32x32) +static INLINE int sum_final_sse2(const __m128i sum) { + const __m128i t = sum_to_32bit_sse2(sum); + return add32x4_sse2(t); +} + +static INLINE void variance4_sse2(const uint8_t *src_ptr, const int src_stride, + const uint8_t *ref_ptr, const int ref_stride, + const int h, __m128i *const sse, + __m128i *const sum) { int i; - for (i = 0; i < 16; ++i) { - const __m128i s = _mm_loadu_si128((const __m128i *)src); - const __m128i r = _mm_loadu_si128((const __m128i *)ref); + assert(h <= 256); // May overflow for larger height. + *sse = _mm_setzero_si128(); + *sum = _mm_setzero_si128(); - const __m128i src0 = _mm_unpacklo_epi8(s, zero); - const __m128i ref0 = _mm_unpacklo_epi8(r, zero); - const __m128i diff0 = _mm_sub_epi16(src0, ref0); + for (i = 0; i < h; i += 2) { + const __m128i s = load4x2_sse2(src_ptr, src_stride); + const __m128i r = load4x2_sse2(ref_ptr, ref_stride); - const __m128i src1 = _mm_unpackhi_epi8(s, zero); - const __m128i ref1 = _mm_unpackhi_epi8(r, zero); - const __m128i diff1 = _mm_sub_epi16(src1, ref1); - - vsum = _mm_add_epi16(vsum, diff0); - vsum = _mm_add_epi16(vsum, diff1); - vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0)); - vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1)); - - src += src_stride; - ref += ref_stride; - } - - // sum - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); - *sum = - (int16_t)_mm_extract_epi16(vsum, 0) + (int16_t)_mm_extract_epi16(vsum, 1); - - // sse - vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8)); - vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4)); - *sse = _mm_cvtsi128_si32(vsse); -} - -static void variance_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, int w, - int h, unsigned int *sse, int *sum, - getNxMvar_fn_t var_fn, int block_size) { - int i, j; - - *sse = 0; - *sum = 0; - - for (i = 0; i < h; i += block_size) { - for (j = 0; j < w; j += block_size) { - unsigned int sse0; - int sum0; - var_fn(src + src_stride * i + j, src_stride, ref + ref_stride * i + j, - ref_stride, &sse0, &sum0); - *sse += sse0; - *sum += sum0; - } + variance_kernel_sse2(s, r, sse, sum); + src_ptr += 2 * src_stride; + ref_ptr += 2 * ref_stride; } } -unsigned int vpx_variance4x4_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, +static INLINE void variance8_sse2(const uint8_t *src_ptr, const int src_stride, + const uint8_t *ref_ptr, const int ref_stride, + const int h, __m128i *const sse, + __m128i *const sum) { + const __m128i zero = _mm_setzero_si128(); + int i; + + assert(h <= 128); // May overflow for larger height. + *sse = _mm_setzero_si128(); + *sum = _mm_setzero_si128(); + + for (i = 0; i < h; i++) { + const __m128i s = + _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)src_ptr), zero); + const __m128i r = + _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)ref_ptr), zero); + + variance_kernel_sse2(s, r, sse, sum); + src_ptr += src_stride; + ref_ptr += ref_stride; + } +} + +static INLINE void variance16_kernel_sse2(const uint8_t *const src_ptr, + const uint8_t *const ref_ptr, + __m128i *const sse, + __m128i *const sum) { + const __m128i zero = _mm_setzero_si128(); + const __m128i s = _mm_loadu_si128((const __m128i *)src_ptr); + const __m128i r = _mm_loadu_si128((const __m128i *)ref_ptr); + const __m128i src0 = _mm_unpacklo_epi8(s, zero); + const __m128i ref0 = _mm_unpacklo_epi8(r, zero); + const __m128i src1 = _mm_unpackhi_epi8(s, zero); + const __m128i ref1 = _mm_unpackhi_epi8(r, zero); + + variance_kernel_sse2(src0, ref0, sse, sum); + variance_kernel_sse2(src1, ref1, sse, sum); +} + +static INLINE void variance16_sse2(const uint8_t *src_ptr, const int src_stride, + const uint8_t *ref_ptr, const int ref_stride, + const int h, __m128i *const sse, + __m128i *const sum) { + int i; + + assert(h <= 64); // May overflow for larger height. + *sse = _mm_setzero_si128(); + *sum = _mm_setzero_si128(); + + for (i = 0; i < h; ++i) { + variance16_kernel_sse2(src_ptr, ref_ptr, sse, sum); + src_ptr += src_stride; + ref_ptr += ref_stride; + } +} + +static INLINE void variance32_sse2(const uint8_t *src_ptr, const int src_stride, + const uint8_t *ref_ptr, const int ref_stride, + const int h, __m128i *const sse, + __m128i *const sum) { + int i; + + assert(h <= 32); // May overflow for larger height. + // Don't initialize sse here since it's an accumulation. + *sum = _mm_setzero_si128(); + + for (i = 0; i < h; ++i) { + variance16_kernel_sse2(src_ptr + 0, ref_ptr + 0, sse, sum); + variance16_kernel_sse2(src_ptr + 16, ref_ptr + 16, sse, sum); + src_ptr += src_stride; + ref_ptr += ref_stride; + } +} + +static INLINE void variance64_sse2(const uint8_t *src_ptr, const int src_stride, + const uint8_t *ref_ptr, const int ref_stride, + const int h, __m128i *const sse, + __m128i *const sum) { + int i; + + assert(h <= 16); // May overflow for larger height. + // Don't initialize sse here since it's an accumulation. + *sum = _mm_setzero_si128(); + + for (i = 0; i < h; ++i) { + variance16_kernel_sse2(src_ptr + 0, ref_ptr + 0, sse, sum); + variance16_kernel_sse2(src_ptr + 16, ref_ptr + 16, sse, sum); + variance16_kernel_sse2(src_ptr + 32, ref_ptr + 32, sse, sum); + variance16_kernel_sse2(src_ptr + 48, ref_ptr + 48, sse, sum); + src_ptr += src_stride; + ref_ptr += ref_stride; + } +} + +void vpx_get8x8var_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + __m128i vsse, vsum; + variance8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum); + variance_final_128_pel_sse2(vsse, vsum, sse, sum); +} + +void vpx_get16x16var_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + __m128i vsse, vsum; + variance16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); + variance_final_256_pel_sse2(vsse, vsum, sse, sum); +} + +unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum); + variance4_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 4, &vsse, &vsum); + variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 4); } -unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - variance_sse2(src, src_stride, ref, ref_stride, 8, 4, sse, &sum, - get4x4var_sse2, 4); + variance4_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum); + variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 5); } -unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - variance_sse2(src, src_stride, ref, ref_stride, 4, 8, sse, &sum, - get4x4var_sse2, 4); + variance8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 4, &vsse, &vsum); + variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 5); } -unsigned int vpx_variance8x8_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, +unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - vpx_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum); + variance8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum); + variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 6); } -unsigned int vpx_variance16x8_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, +unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - variance_sse2(src, src_stride, ref, ref_stride, 16, 8, sse, &sum, - vpx_get8x8var_sse2, 8); + variance8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); + variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 7); } -unsigned int vpx_variance8x16_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, +unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - variance_sse2(src, src_stride, ref, ref_stride, 8, 16, sse, &sum, - vpx_get8x8var_sse2, 8); + variance16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum); + variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 7); } -unsigned int vpx_variance16x16_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, +unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - vpx_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum); + variance16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); + variance_final_256_pel_sse2(vsse, vsum, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 8); } -unsigned int vpx_variance32x32_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - variance_sse2(src, src_stride, ref, ref_stride, 32, 32, sse, &sum, - vpx_get16x16var_sse2, 16); + variance16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum); + variance_final_512_pel_sse2(vsse, vsum, sse, &sum); + return *sse - (unsigned int)(((int64_t)sum * sum) >> 9); +} + +unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse) { + __m128i vsse = _mm_setzero_si128(); + __m128i vsum; + int sum; + variance32_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); + variance_final_512_pel_sse2(vsse, vsum, sse, &sum); + return *sse - (unsigned int)(((int64_t)sum * sum) >> 9); +} + +unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse) { + __m128i vsse = _mm_setzero_si128(); + __m128i vsum; + int sum; + variance32_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum); + *sse = add32x4_sse2(vsse); + sum = sum_final_sse2(vsum); return *sse - (unsigned int)(((int64_t)sum * sum) >> 10); } -unsigned int vpx_variance32x16_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { + __m128i vsse = _mm_setzero_si128(); + __m128i vsum = _mm_setzero_si128(); int sum; - variance_sse2(src, src_stride, ref, ref_stride, 32, 16, sse, &sum, - vpx_get16x16var_sse2, 16); - return *sse - (unsigned int)(((int64_t)sum * sum) >> 9); + int i = 0; + + for (i = 0; i < 2; i++) { + __m128i vsum16; + variance32_sse2(src_ptr + 32 * i * src_stride, src_stride, + ref_ptr + 32 * i * ref_stride, ref_stride, 32, &vsse, + &vsum16); + vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16)); + } + *sse = add32x4_sse2(vsse); + sum = add32x4_sse2(vsum); + return *sse - (unsigned int)(((int64_t)sum * sum) >> 11); } -unsigned int vpx_variance16x32_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { + __m128i vsse = _mm_setzero_si128(); + __m128i vsum = _mm_setzero_si128(); int sum; - variance_sse2(src, src_stride, ref, ref_stride, 16, 32, sse, &sum, - vpx_get16x16var_sse2, 16); - return *sse - (unsigned int)(((int64_t)sum * sum) >> 9); + int i = 0; + + for (i = 0; i < 2; i++) { + __m128i vsum16; + variance64_sse2(src_ptr + 16 * i * src_stride, src_stride, + ref_ptr + 16 * i * ref_stride, ref_stride, 16, &vsse, + &vsum16); + vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16)); + } + *sse = add32x4_sse2(vsse); + sum = add32x4_sse2(vsum); + return *sse - (unsigned int)(((int64_t)sum * sum) >> 11); } -unsigned int vpx_variance64x64_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { + __m128i vsse = _mm_setzero_si128(); + __m128i vsum = _mm_setzero_si128(); int sum; - variance_sse2(src, src_stride, ref, ref_stride, 64, 64, sse, &sum, - vpx_get16x16var_sse2, 16); + int i = 0; + + for (i = 0; i < 4; i++) { + __m128i vsum16; + variance64_sse2(src_ptr + 16 * i * src_stride, src_stride, + ref_ptr + 16 * i * ref_stride, ref_stride, 16, &vsse, + &vsum16); + vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16)); + } + *sse = add32x4_sse2(vsse); + sum = add32x4_sse2(vsum); return *sse - (unsigned int)(((int64_t)sum * sum) >> 12); } -unsigned int vpx_variance64x32_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_sse2(src, src_stride, ref, ref_stride, 64, 32, sse, &sum, - vpx_get16x16var_sse2, 16); - return *sse - (unsigned int)(((int64_t)sum * sum) >> 11); -} - -unsigned int vpx_variance32x64_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { - int sum; - variance_sse2(src, src_stride, ref, ref_stride, 32, 64, sse, &sum, - vpx_get16x16var_sse2, 16); - return *sse - (unsigned int)(((int64_t)sum * sum) >> 11); -} - -unsigned int vpx_mse8x8_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { - vpx_variance8x8_sse2(src, src_stride, ref, ref_stride, sse); + vpx_variance8x8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, sse); return *sse; } -unsigned int vpx_mse8x16_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { - vpx_variance8x16_sse2(src, src_stride, ref, ref_stride, sse); + vpx_variance8x16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, sse); return *sse; } -unsigned int vpx_mse16x8_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { - vpx_variance16x8_sse2(src, src_stride, ref, ref_stride, sse); + vpx_variance16x8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, sse); return *sse; } -unsigned int vpx_mse16x16_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, +unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { - vpx_variance16x16_sse2(src, src_stride, ref, ref_stride, sse); + vpx_variance16x16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, sse); return *sse; } // The 2 unused parameters are place holders for PIC enabled build. // These definitions are for functions defined in subpel_variance.asm -#define DECL(w, opt) \ - int vpx_sub_pixel_variance##w##xh_##opt( \ - const uint8_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \ - const uint8_t *dst, ptrdiff_t dst_stride, int height, unsigned int *sse, \ - void *unused0, void *unused) +#define DECL(w, opt) \ + int vpx_sub_pixel_variance##w##xh_##opt( \ + const uint8_t *src_ptr, ptrdiff_t src_stride, int x_offset, \ + int y_offset, const uint8_t *ref_ptr, ptrdiff_t ref_stride, int height, \ + unsigned int *sse, void *unused0, void *unused) #define DECLS(opt1, opt2) \ DECL(4, opt1); \ DECL(8, opt1); \ @@ -324,36 +438,37 @@ DECLS(ssse3, ssse3); #undef DECLS #undef DECL -#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \ - unsigned int vpx_sub_pixel_variance##w##x##h##_##opt( \ - const uint8_t *src, int src_stride, int x_offset, int y_offset, \ - const uint8_t *dst, int dst_stride, unsigned int *sse_ptr) { \ - unsigned int sse; \ - int se = vpx_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \ - y_offset, dst, dst_stride, \ - h, &sse, NULL, NULL); \ - if (w > wf) { \ - unsigned int sse2; \ - int se2 = vpx_sub_pixel_variance##wf##xh_##opt( \ - src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, h, \ - &sse2, NULL, NULL); \ - se += se2; \ - sse += sse2; \ - if (w > wf * 2) { \ - se2 = vpx_sub_pixel_variance##wf##xh_##opt( \ - src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, h, \ - &sse2, NULL, NULL); \ - se += se2; \ - sse += sse2; \ - se2 = vpx_sub_pixel_variance##wf##xh_##opt( \ - src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, h, \ - &sse2, NULL, NULL); \ - se += se2; \ - sse += sse2; \ - } \ - } \ - *sse_ptr = sse; \ - return sse - (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \ +#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \ + unsigned int vpx_sub_pixel_variance##w##x##h##_##opt( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { \ + unsigned int sse_tmp; \ + int se = vpx_sub_pixel_variance##wf##xh_##opt( \ + src_ptr, src_stride, x_offset, y_offset, ref_ptr, ref_stride, h, \ + &sse_tmp, NULL, NULL); \ + if (w > wf) { \ + unsigned int sse2; \ + int se2 = vpx_sub_pixel_variance##wf##xh_##opt( \ + src_ptr + 16, src_stride, x_offset, y_offset, ref_ptr + 16, \ + ref_stride, h, &sse2, NULL, NULL); \ + se += se2; \ + sse_tmp += sse2; \ + if (w > wf * 2) { \ + se2 = vpx_sub_pixel_variance##wf##xh_##opt( \ + src_ptr + 32, src_stride, x_offset, y_offset, ref_ptr + 32, \ + ref_stride, h, &sse2, NULL, NULL); \ + se += se2; \ + sse_tmp += sse2; \ + se2 = vpx_sub_pixel_variance##wf##xh_##opt( \ + src_ptr + 48, src_stride, x_offset, y_offset, ref_ptr + 48, \ + ref_stride, h, &sse2, NULL, NULL); \ + se += se2; \ + sse_tmp += sse2; \ + } \ + } \ + *sse = sse_tmp; \ + return sse_tmp - \ + (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \ } #define FNS(opt1, opt2) \ @@ -378,12 +493,12 @@ FNS(ssse3, ssse3); #undef FN // The 2 unused parameters are place holders for PIC enabled build. -#define DECL(w, opt) \ - int vpx_sub_pixel_avg_variance##w##xh_##opt( \ - const uint8_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \ - const uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *sec, \ - ptrdiff_t sec_stride, int height, unsigned int *sse, void *unused0, \ - void *unused) +#define DECL(w, opt) \ + int vpx_sub_pixel_avg_variance##w##xh_##opt( \ + const uint8_t *src_ptr, ptrdiff_t src_stride, int x_offset, \ + int y_offset, const uint8_t *ref_ptr, ptrdiff_t ref_stride, \ + const uint8_t *second_pred, ptrdiff_t second_stride, int height, \ + unsigned int *sse, void *unused0, void *unused) #define DECLS(opt1, opt2) \ DECL(4, opt1); \ DECL(8, opt1); \ @@ -394,37 +509,38 @@ DECLS(ssse3, ssse3); #undef DECL #undef DECLS -#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \ - unsigned int vpx_sub_pixel_avg_variance##w##x##h##_##opt( \ - const uint8_t *src, int src_stride, int x_offset, int y_offset, \ - const uint8_t *dst, int dst_stride, unsigned int *sseptr, \ - const uint8_t *sec) { \ - unsigned int sse; \ - int se = vpx_sub_pixel_avg_variance##wf##xh_##opt( \ - src, src_stride, x_offset, y_offset, dst, dst_stride, sec, w, h, &sse, \ - NULL, NULL); \ - if (w > wf) { \ - unsigned int sse2; \ - int se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \ - src + 16, src_stride, x_offset, y_offset, dst + 16, dst_stride, \ - sec + 16, w, h, &sse2, NULL, NULL); \ - se += se2; \ - sse += sse2; \ - if (w > wf * 2) { \ - se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \ - src + 32, src_stride, x_offset, y_offset, dst + 32, dst_stride, \ - sec + 32, w, h, &sse2, NULL, NULL); \ - se += se2; \ - sse += sse2; \ - se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \ - src + 48, src_stride, x_offset, y_offset, dst + 48, dst_stride, \ - sec + 48, w, h, &sse2, NULL, NULL); \ - se += se2; \ - sse += sse2; \ - } \ - } \ - *sseptr = sse; \ - return sse - (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \ +#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \ + unsigned int vpx_sub_pixel_avg_variance##w##x##h##_##opt( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, \ + const uint8_t *second_pred) { \ + unsigned int sse_tmp; \ + int se = vpx_sub_pixel_avg_variance##wf##xh_##opt( \ + src_ptr, src_stride, x_offset, y_offset, ref_ptr, ref_stride, \ + second_pred, w, h, &sse_tmp, NULL, NULL); \ + if (w > wf) { \ + unsigned int sse2; \ + int se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \ + src_ptr + 16, src_stride, x_offset, y_offset, ref_ptr + 16, \ + ref_stride, second_pred + 16, w, h, &sse2, NULL, NULL); \ + se += se2; \ + sse_tmp += sse2; \ + if (w > wf * 2) { \ + se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \ + src_ptr + 32, src_stride, x_offset, y_offset, ref_ptr + 32, \ + ref_stride, second_pred + 32, w, h, &sse2, NULL, NULL); \ + se += se2; \ + sse_tmp += sse2; \ + se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \ + src_ptr + 48, src_stride, x_offset, y_offset, ref_ptr + 48, \ + ref_stride, second_pred + 48, w, h, &sse2, NULL, NULL); \ + se += se2; \ + sse_tmp += sse2; \ + } \ + } \ + *sse = sse_tmp; \ + return sse_tmp - \ + (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \ } #define FNS(opt1, opt2) \ diff --git a/media/libvpx/libvpx/vpx_dsp/x86/vpx_asm_stubs.c b/media/libvpx/libvpx/vpx_dsp/x86/vpx_asm_stubs.c deleted file mode 100644 index 4f164afeb4e0..000000000000 --- a/media/libvpx/libvpx/vpx_dsp/x86/vpx_asm_stubs.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/x86/convolve.h" - -#if HAVE_SSE2 -filter8_1dfunction vpx_filter_block1d16_v8_sse2; -filter8_1dfunction vpx_filter_block1d16_h8_sse2; -filter8_1dfunction vpx_filter_block1d8_v8_sse2; -filter8_1dfunction vpx_filter_block1d8_h8_sse2; -filter8_1dfunction vpx_filter_block1d4_v8_sse2; -filter8_1dfunction vpx_filter_block1d4_h8_sse2; -filter8_1dfunction vpx_filter_block1d16_v8_avg_sse2; -filter8_1dfunction vpx_filter_block1d16_h8_avg_sse2; -filter8_1dfunction vpx_filter_block1d8_v8_avg_sse2; -filter8_1dfunction vpx_filter_block1d8_h8_avg_sse2; -filter8_1dfunction vpx_filter_block1d4_v8_avg_sse2; -filter8_1dfunction vpx_filter_block1d4_h8_avg_sse2; - -filter8_1dfunction vpx_filter_block1d16_v2_sse2; -filter8_1dfunction vpx_filter_block1d16_h2_sse2; -filter8_1dfunction vpx_filter_block1d8_v2_sse2; -filter8_1dfunction vpx_filter_block1d8_h2_sse2; -filter8_1dfunction vpx_filter_block1d4_v2_sse2; -filter8_1dfunction vpx_filter_block1d4_h2_sse2; -filter8_1dfunction vpx_filter_block1d16_v2_avg_sse2; -filter8_1dfunction vpx_filter_block1d16_h2_avg_sse2; -filter8_1dfunction vpx_filter_block1d8_v2_avg_sse2; -filter8_1dfunction vpx_filter_block1d8_h2_avg_sse2; -filter8_1dfunction vpx_filter_block1d4_v2_avg_sse2; -filter8_1dfunction vpx_filter_block1d4_h2_avg_sse2; - -// void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const InterpKernel *filter, int x0_q4, -// int32_t x_step_q4, int y0_q4, int y_step_q4, -// int w, int h); -// void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const InterpKernel *filter, int x0_q4, -// int32_t x_step_q4, int y0_q4, int y_step_q4, -// int w, int h); -// void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const InterpKernel *filter, int x0_q4, -// int32_t x_step_q4, int y0_q4, -// int y_step_q4, int w, int h); -// void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const InterpKernel *filter, int x0_q4, -// int32_t x_step_q4, int y0_q4, int y_step_q4, -// int w, int h); -FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2); -FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , sse2); -FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2); -FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_, sse2); - -// void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const InterpKernel *filter, int x0_q4, -// int32_t x_step_q4, int y0_q4, int y_step_q4, -// int w, int h); -// void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const InterpKernel *filter, int x0_q4, -// int32_t x_step_q4, int y0_q4, int y_step_q4, -// int w, int h); -FUN_CONV_2D(, sse2); -FUN_CONV_2D(avg_, sse2); - -#if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2; - -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2; - -// void vpx_highbd_convolve8_horiz_sse2(const uint8_t *src, -// ptrdiff_t src_stride, -// uint8_t *dst, -// ptrdiff_t dst_stride, -// const int16_t *filter_x, -// int x_step_q4, -// const int16_t *filter_y, -// int y_step_q4, -// int w, int h, int bd); -// void vpx_highbd_convolve8_vert_sse2(const uint8_t *src, -// ptrdiff_t src_stride, -// uint8_t *dst, -// ptrdiff_t dst_stride, -// const int16_t *filter_x, -// int x_step_q4, -// const int16_t *filter_y, -// int y_step_q4, -// int w, int h, int bd); -// void vpx_highbd_convolve8_avg_horiz_sse2(const uint8_t *src, -// ptrdiff_t src_stride, -// uint8_t *dst, -// ptrdiff_t dst_stride, -// const int16_t *filter_x, -// int x_step_q4, -// const int16_t *filter_y, -// int y_step_q4, -// int w, int h, int bd); -// void vpx_highbd_convolve8_avg_vert_sse2(const uint8_t *src, -// ptrdiff_t src_stride, -// uint8_t *dst, -// ptrdiff_t dst_stride, -// const int16_t *filter_x, -// int x_step_q4, -// const int16_t *filter_y, -// int y_step_q4, -// int w, int h, int bd); -HIGH_FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2); -HIGH_FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , sse2); -HIGH_FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2); -HIGH_FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_, - sse2); - -// void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const InterpKernel *filter, int x0_q4, -// int32_t x_step_q4, int y0_q4, int y_step_q4, -// int w, int h, int bd); -// void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const InterpKernel *filter, int x0_q4, -// int32_t x_step_q4, int y0_q4, -// int y_step_q4, int w, int h, int bd); -HIGH_FUN_CONV_2D(, sse2); -HIGH_FUN_CONV_2D(avg_, sse2); -#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 -#endif // HAVE_SSE2 diff --git a/media/libvpx/libvpx/vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm b/media/libvpx/libvpx/vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm index d83507dc9954..c57149657ab3 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm +++ b/media/libvpx/libvpx/vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm @@ -45,7 +45,7 @@ ;Compute max and min values of a pixel mov rdx, 0x00010001 - movsxd rcx, DWORD PTR arg(6) ;bps + movsxd rcx, DWORD PTR arg(6) ;bd movq xmm0, rdx movq xmm1, rcx pshufd xmm0, xmm0, 0b @@ -121,7 +121,7 @@ ;Compute max and min values of a pixel mov rdx, 0x00010001 - movsxd rcx, DWORD PTR arg(6) ;bps + movsxd rcx, DWORD PTR arg(6) ;bd movq xmm0, rdx movq xmm1, rcx pshufd xmm0, xmm0, 0b @@ -199,7 +199,7 @@ SECTION .text -;void vpx_filter_block1d4_v8_sse2 +;void vpx_highbd_filter_block1d4_v8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pitch, @@ -269,7 +269,7 @@ sym(vpx_highbd_filter_block1d4_v8_sse2): pop rbp ret -;void vpx_filter_block1d8_v8_sse2 +;void vpx_highbd_filter_block1d8_v8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pitch, @@ -328,7 +328,7 @@ sym(vpx_highbd_filter_block1d8_v8_sse2): pop rbp ret -;void vpx_filter_block1d16_v8_sse2 +;void vpx_highbd_filter_block1d16_v8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pitch, @@ -554,7 +554,7 @@ sym(vpx_highbd_filter_block1d16_v8_avg_sse2): pop rbp ret -;void vpx_filter_block1d4_h8_sse2 +;void vpx_highbd_filter_block1d4_h8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, @@ -629,7 +629,7 @@ sym(vpx_highbd_filter_block1d4_h8_sse2): pop rbp ret -;void vpx_filter_block1d8_h8_sse2 +;void vpx_highbd_filter_block1d8_h8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, @@ -695,7 +695,7 @@ sym(vpx_highbd_filter_block1d8_h8_sse2): pop rbp ret -;void vpx_filter_block1d16_h8_sse2 +;void vpx_highbd_filter_block1d16_h8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, diff --git a/media/libvpx/libvpx/vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm b/media/libvpx/libvpx/vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm index 9bffe504b1f1..ec18d370e215 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm +++ b/media/libvpx/libvpx/vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm @@ -26,7 +26,7 @@ pshufd xmm3, xmm3, 0 mov rdx, 0x00010001 - movsxd rcx, DWORD PTR arg(6) ;bps + movsxd rcx, DWORD PTR arg(6) ;bd movq xmm5, rdx movq xmm2, rcx pshufd xmm5, xmm5, 0b @@ -64,7 +64,7 @@ dec rcx %endm -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 %macro HIGH_GET_PARAM 0 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr @@ -82,7 +82,7 @@ pshufd xmm4, xmm4, 0 mov rdx, 0x00010001 - movsxd rcx, DWORD PTR arg(6) ;bps + movsxd rcx, DWORD PTR arg(6) ;bd movq xmm8, rdx movq xmm5, rcx pshufd xmm8, xmm8, 0b @@ -197,7 +197,7 @@ sym(vpx_highbd_filter_block1d4_v2_sse2): pop rbp ret -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 global sym(vpx_highbd_filter_block1d8_v2_sse2) PRIVATE sym(vpx_highbd_filter_block1d8_v2_sse2): push rbp @@ -277,7 +277,7 @@ sym(vpx_highbd_filter_block1d4_v2_avg_sse2): pop rbp ret -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 global sym(vpx_highbd_filter_block1d8_v2_avg_sse2) PRIVATE sym(vpx_highbd_filter_block1d8_v2_avg_sse2): push rbp @@ -358,7 +358,7 @@ sym(vpx_highbd_filter_block1d4_h2_sse2): pop rbp ret -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 global sym(vpx_highbd_filter_block1d8_h2_sse2) PRIVATE sym(vpx_highbd_filter_block1d8_h2_sse2): push rbp @@ -439,7 +439,7 @@ sym(vpx_highbd_filter_block1d4_h2_avg_sse2): pop rbp ret -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 global sym(vpx_highbd_filter_block1d8_h2_avg_sse2) PRIVATE sym(vpx_highbd_filter_block1d8_h2_avg_sse2): push rbp diff --git a/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_4t_intrin_sse2.c b/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_4t_intrin_sse2.c new file mode 100644 index 000000000000..239179028485 --- /dev/null +++ b/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_4t_intrin_sse2.c @@ -0,0 +1,1161 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "./vpx_dsp_rtcd.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/x86/convolve.h" +#include "vpx_dsp/x86/convolve_sse2.h" +#include "vpx_ports/mem.h" + +#define CONV8_ROUNDING_BITS (7) +#define CONV8_ROUNDING_NUM (1 << (CONV8_ROUNDING_BITS - 1)) + +static void vpx_filter_block1d16_h4_sse2(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + __m128i kernel_reg; // Kernel + __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used + const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding + int h; + + __m128i src_reg, src_reg_shift_1, src_reg_shift_2, src_reg_shift_3; + __m128i dst_first, dst_second; + __m128i even, odd; + + // Start one pixel before as we need tap/2 - 1 = 1 sample from the past + src_ptr -= 1; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm_srai_epi16(kernel_reg, 1); + kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); + kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); + + for (h = height; h > 0; --h) { + // We will load multiple shifted versions of the row and shuffle them into + // 16-bit words of the form + // ... s[2] s[1] s[0] s[-1] + // ... s[4] s[3] s[2] s[1] + // Then we call multiply and add to get partial results + // s[2]k[3]+s[1]k[2] s[0]k[3]s[-1]k[2] + // s[4]k[5]+s[3]k[4] s[2]k[5]s[1]k[4] + // The two results are then added together for the first half of even + // output. + // Repeat multiple times to get the whole outoput + src_reg = _mm_loadu_si128((const __m128i *)src_ptr); + src_reg_shift_1 = _mm_srli_si128(src_reg, 1); + src_reg_shift_2 = _mm_srli_si128(src_reg, 2); + src_reg_shift_3 = _mm_srli_si128(src_reg, 3); + + // Output 6 4 2 0 + even = mm_madd_add_epi8_sse2(&src_reg, &src_reg_shift_2, &kernel_reg_23, + &kernel_reg_45); + + // Output 7 5 3 1 + odd = mm_madd_add_epi8_sse2(&src_reg_shift_1, &src_reg_shift_3, + &kernel_reg_23, &kernel_reg_45); + + // Combine to get the first half of the dst + dst_first = mm_zip_epi32_sse2(&even, &odd); + + // Do again to get the second half of dst + src_reg = _mm_loadu_si128((const __m128i *)(src_ptr + 8)); + src_reg_shift_1 = _mm_srli_si128(src_reg, 1); + src_reg_shift_2 = _mm_srli_si128(src_reg, 2); + src_reg_shift_3 = _mm_srli_si128(src_reg, 3); + + // Output 14 12 10 8 + even = mm_madd_add_epi8_sse2(&src_reg, &src_reg_shift_2, &kernel_reg_23, + &kernel_reg_45); + + // Output 15 13 11 9 + odd = mm_madd_add_epi8_sse2(&src_reg_shift_1, &src_reg_shift_3, + &kernel_reg_23, &kernel_reg_45); + + // Combine to get the second half of the dst + dst_second = mm_zip_epi32_sse2(&even, &odd); + + // Round each result + dst_first = mm_round_epi16_sse2(&dst_first, ®_32, 6); + dst_second = mm_round_epi16_sse2(&dst_second, ®_32, 6); + + // Finally combine to get the final dst + dst_first = _mm_packus_epi16(dst_first, dst_second); + _mm_store_si128((__m128i *)dst_ptr, dst_first); + + src_ptr += src_stride; + dst_ptr += dst_stride; + } +} + +/* The macro used to generate functions shifts the src_ptr up by 3 rows already + * */ + +static void vpx_filter_block1d16_v4_sse2(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // Register for source s[-1:3, :] + __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; + // Interleaved rows of the source. lo is first half, hi second + __m128i src_reg_m10_lo, src_reg_m10_hi, src_reg_01_lo, src_reg_01_hi; + __m128i src_reg_12_lo, src_reg_12_hi, src_reg_23_lo, src_reg_23_hi; + // Half of half of the interleaved rows + __m128i src_reg_m10_lo_1, src_reg_m10_lo_2, src_reg_m10_hi_1, + src_reg_m10_hi_2; + __m128i src_reg_01_lo_1, src_reg_01_lo_2, src_reg_01_hi_1, src_reg_01_hi_2; + __m128i src_reg_12_lo_1, src_reg_12_lo_2, src_reg_12_hi_1, src_reg_12_hi_2; + __m128i src_reg_23_lo_1, src_reg_23_lo_2, src_reg_23_hi_1, src_reg_23_hi_2; + + __m128i kernel_reg; // Kernel + __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used + + // Result after multiply and add + __m128i res_reg_m10_lo, res_reg_01_lo, res_reg_12_lo, res_reg_23_lo; + __m128i res_reg_m10_hi, res_reg_01_hi, res_reg_12_hi, res_reg_23_hi; + __m128i res_reg_m1012, res_reg_0123; + __m128i res_reg_m1012_lo, res_reg_0123_lo, res_reg_m1012_hi, res_reg_0123_hi; + + const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding + + // We will compute the result two rows at a time + const ptrdiff_t src_stride_unrolled = src_stride << 1; + const ptrdiff_t dst_stride_unrolled = dst_stride << 1; + int h; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm_srai_epi16(kernel_reg, 1); + kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); + kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); + + // We will load two rows of pixels as 8-bit words, rearrange them as 16-bit + // words, + // shuffle the data into the form + // ... s[0,1] s[-1,1] s[0,0] s[-1,0] + // ... s[0,7] s[-1,7] s[0,6] s[-1,6] + // ... s[0,9] s[-1,9] s[0,8] s[-1,8] + // ... s[0,13] s[-1,13] s[0,12] s[-1,12] + // so that we can call multiply and add with the kernel to get 32-bit words of + // the form + // ... s[0,1]k[3]+s[-1,1]k[2] s[0,0]k[3]+s[-1,0]k[2] + // Finally, we can add multiple rows together to get the desired output. + + // First shuffle the data + src_reg_m1 = _mm_loadu_si128((const __m128i *)src_ptr); + src_reg_0 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride)); + src_reg_m10_lo = _mm_unpacklo_epi8(src_reg_m1, src_reg_0); + src_reg_m10_hi = _mm_unpackhi_epi8(src_reg_m1, src_reg_0); + src_reg_m10_lo_1 = _mm_unpacklo_epi8(src_reg_m10_lo, _mm_setzero_si128()); + src_reg_m10_lo_2 = _mm_unpackhi_epi8(src_reg_m10_lo, _mm_setzero_si128()); + src_reg_m10_hi_1 = _mm_unpacklo_epi8(src_reg_m10_hi, _mm_setzero_si128()); + src_reg_m10_hi_2 = _mm_unpackhi_epi8(src_reg_m10_hi, _mm_setzero_si128()); + + // More shuffling + src_reg_1 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2)); + src_reg_01_lo = _mm_unpacklo_epi8(src_reg_0, src_reg_1); + src_reg_01_hi = _mm_unpackhi_epi8(src_reg_0, src_reg_1); + src_reg_01_lo_1 = _mm_unpacklo_epi8(src_reg_01_lo, _mm_setzero_si128()); + src_reg_01_lo_2 = _mm_unpackhi_epi8(src_reg_01_lo, _mm_setzero_si128()); + src_reg_01_hi_1 = _mm_unpacklo_epi8(src_reg_01_hi, _mm_setzero_si128()); + src_reg_01_hi_2 = _mm_unpackhi_epi8(src_reg_01_hi, _mm_setzero_si128()); + + for (h = height; h > 1; h -= 2) { + src_reg_2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 3)); + + src_reg_12_lo = _mm_unpacklo_epi8(src_reg_1, src_reg_2); + src_reg_12_hi = _mm_unpackhi_epi8(src_reg_1, src_reg_2); + + src_reg_3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 4)); + + src_reg_23_lo = _mm_unpacklo_epi8(src_reg_2, src_reg_3); + src_reg_23_hi = _mm_unpackhi_epi8(src_reg_2, src_reg_3); + + // Partial output from first half + res_reg_m10_lo = mm_madd_packs_epi16_sse2( + &src_reg_m10_lo_1, &src_reg_m10_lo_2, &kernel_reg_23); + + res_reg_01_lo = mm_madd_packs_epi16_sse2(&src_reg_01_lo_1, &src_reg_01_lo_2, + &kernel_reg_23); + + src_reg_12_lo_1 = _mm_unpacklo_epi8(src_reg_12_lo, _mm_setzero_si128()); + src_reg_12_lo_2 = _mm_unpackhi_epi8(src_reg_12_lo, _mm_setzero_si128()); + res_reg_12_lo = mm_madd_packs_epi16_sse2(&src_reg_12_lo_1, &src_reg_12_lo_2, + &kernel_reg_45); + + src_reg_23_lo_1 = _mm_unpacklo_epi8(src_reg_23_lo, _mm_setzero_si128()); + src_reg_23_lo_2 = _mm_unpackhi_epi8(src_reg_23_lo, _mm_setzero_si128()); + res_reg_23_lo = mm_madd_packs_epi16_sse2(&src_reg_23_lo_1, &src_reg_23_lo_2, + &kernel_reg_45); + + // Add to get first half of the results + res_reg_m1012_lo = _mm_adds_epi16(res_reg_m10_lo, res_reg_12_lo); + res_reg_0123_lo = _mm_adds_epi16(res_reg_01_lo, res_reg_23_lo); + + // Now repeat everything again for the second half + // Partial output for second half + res_reg_m10_hi = mm_madd_packs_epi16_sse2( + &src_reg_m10_hi_1, &src_reg_m10_hi_2, &kernel_reg_23); + + res_reg_01_hi = mm_madd_packs_epi16_sse2(&src_reg_01_hi_1, &src_reg_01_hi_2, + &kernel_reg_23); + + src_reg_12_hi_1 = _mm_unpacklo_epi8(src_reg_12_hi, _mm_setzero_si128()); + src_reg_12_hi_2 = _mm_unpackhi_epi8(src_reg_12_hi, _mm_setzero_si128()); + res_reg_12_hi = mm_madd_packs_epi16_sse2(&src_reg_12_hi_1, &src_reg_12_hi_2, + &kernel_reg_45); + + src_reg_23_hi_1 = _mm_unpacklo_epi8(src_reg_23_hi, _mm_setzero_si128()); + src_reg_23_hi_2 = _mm_unpackhi_epi8(src_reg_23_hi, _mm_setzero_si128()); + res_reg_23_hi = mm_madd_packs_epi16_sse2(&src_reg_23_hi_1, &src_reg_23_hi_2, + &kernel_reg_45); + + // Second half of the results + res_reg_m1012_hi = _mm_adds_epi16(res_reg_m10_hi, res_reg_12_hi); + res_reg_0123_hi = _mm_adds_epi16(res_reg_01_hi, res_reg_23_hi); + + // Round the words + res_reg_m1012_lo = mm_round_epi16_sse2(&res_reg_m1012_lo, ®_32, 6); + res_reg_0123_lo = mm_round_epi16_sse2(&res_reg_0123_lo, ®_32, 6); + res_reg_m1012_hi = mm_round_epi16_sse2(&res_reg_m1012_hi, ®_32, 6); + res_reg_0123_hi = mm_round_epi16_sse2(&res_reg_0123_hi, ®_32, 6); + + // Combine to get the result + res_reg_m1012 = _mm_packus_epi16(res_reg_m1012_lo, res_reg_m1012_hi); + res_reg_0123 = _mm_packus_epi16(res_reg_0123_lo, res_reg_0123_hi); + + _mm_store_si128((__m128i *)dst_ptr, res_reg_m1012); + _mm_store_si128((__m128i *)(dst_ptr + dst_stride), res_reg_0123); + + // Update the source by two rows + src_ptr += src_stride_unrolled; + dst_ptr += dst_stride_unrolled; + + src_reg_m10_lo_1 = src_reg_12_lo_1; + src_reg_m10_lo_2 = src_reg_12_lo_2; + src_reg_m10_hi_1 = src_reg_12_hi_1; + src_reg_m10_hi_2 = src_reg_12_hi_2; + src_reg_01_lo_1 = src_reg_23_lo_1; + src_reg_01_lo_2 = src_reg_23_lo_2; + src_reg_01_hi_1 = src_reg_23_hi_1; + src_reg_01_hi_2 = src_reg_23_hi_2; + src_reg_1 = src_reg_3; + } +} + +static void vpx_filter_block1d8_h4_sse2(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + __m128i kernel_reg; // Kernel + __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used + const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding + int h; + + __m128i src_reg, src_reg_shift_1, src_reg_shift_2, src_reg_shift_3; + __m128i dst_first; + __m128i even, odd; + + // Start one pixel before as we need tap/2 - 1 = 1 sample from the past + src_ptr -= 1; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm_srai_epi16(kernel_reg, 1); + kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); + kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); + + for (h = height; h > 0; --h) { + // We will load multiple shifted versions of the row and shuffle them into + // 16-bit words of the form + // ... s[2] s[1] s[0] s[-1] + // ... s[4] s[3] s[2] s[1] + // Then we call multiply and add to get partial results + // s[2]k[3]+s[1]k[2] s[0]k[3]s[-1]k[2] + // s[4]k[5]+s[3]k[4] s[2]k[5]s[1]k[4] + // The two results are then added together to get the even output + src_reg = _mm_loadu_si128((const __m128i *)src_ptr); + src_reg_shift_1 = _mm_srli_si128(src_reg, 1); + src_reg_shift_2 = _mm_srli_si128(src_reg, 2); + src_reg_shift_3 = _mm_srli_si128(src_reg, 3); + + // Output 6 4 2 0 + even = mm_madd_add_epi8_sse2(&src_reg, &src_reg_shift_2, &kernel_reg_23, + &kernel_reg_45); + + // Output 7 5 3 1 + odd = mm_madd_add_epi8_sse2(&src_reg_shift_1, &src_reg_shift_3, + &kernel_reg_23, &kernel_reg_45); + + // Combine to get the first half of the dst + dst_first = mm_zip_epi32_sse2(&even, &odd); + dst_first = mm_round_epi16_sse2(&dst_first, ®_32, 6); + + // Saturate and convert to 8-bit words + dst_first = _mm_packus_epi16(dst_first, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i *)dst_ptr, dst_first); + + src_ptr += src_stride; + dst_ptr += dst_stride; + } +} + +static void vpx_filter_block1d8_v4_sse2(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // Register for source s[-1:3, :] + __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; + // Interleaved rows of the source. lo is first half, hi second + __m128i src_reg_m10_lo, src_reg_01_lo; + __m128i src_reg_12_lo, src_reg_23_lo; + // Half of half of the interleaved rows + __m128i src_reg_m10_lo_1, src_reg_m10_lo_2; + __m128i src_reg_01_lo_1, src_reg_01_lo_2; + __m128i src_reg_12_lo_1, src_reg_12_lo_2; + __m128i src_reg_23_lo_1, src_reg_23_lo_2; + + __m128i kernel_reg; // Kernel + __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used + + // Result after multiply and add + __m128i res_reg_m10_lo, res_reg_01_lo, res_reg_12_lo, res_reg_23_lo; + __m128i res_reg_m1012, res_reg_0123; + __m128i res_reg_m1012_lo, res_reg_0123_lo; + + const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding + + // We will compute the result two rows at a time + const ptrdiff_t src_stride_unrolled = src_stride << 1; + const ptrdiff_t dst_stride_unrolled = dst_stride << 1; + int h; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm_srai_epi16(kernel_reg, 1); + kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); + kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); + + // We will load two rows of pixels as 8-bit words, rearrange them as 16-bit + // words, + // shuffle the data into the form + // ... s[0,1] s[-1,1] s[0,0] s[-1,0] + // ... s[0,7] s[-1,7] s[0,6] s[-1,6] + // ... s[0,9] s[-1,9] s[0,8] s[-1,8] + // ... s[0,13] s[-1,13] s[0,12] s[-1,12] + // so that we can call multiply and add with the kernel to get 32-bit words of + // the form + // ... s[0,1]k[3]+s[-1,1]k[2] s[0,0]k[3]+s[-1,0]k[2] + // Finally, we can add multiple rows together to get the desired output. + + // First shuffle the data + src_reg_m1 = _mm_loadu_si128((const __m128i *)src_ptr); + src_reg_0 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride)); + src_reg_m10_lo = _mm_unpacklo_epi8(src_reg_m1, src_reg_0); + src_reg_m10_lo_1 = _mm_unpacklo_epi8(src_reg_m10_lo, _mm_setzero_si128()); + src_reg_m10_lo_2 = _mm_unpackhi_epi8(src_reg_m10_lo, _mm_setzero_si128()); + + // More shuffling + src_reg_1 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2)); + src_reg_01_lo = _mm_unpacklo_epi8(src_reg_0, src_reg_1); + src_reg_01_lo_1 = _mm_unpacklo_epi8(src_reg_01_lo, _mm_setzero_si128()); + src_reg_01_lo_2 = _mm_unpackhi_epi8(src_reg_01_lo, _mm_setzero_si128()); + + for (h = height; h > 1; h -= 2) { + src_reg_2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 3)); + + src_reg_12_lo = _mm_unpacklo_epi8(src_reg_1, src_reg_2); + + src_reg_3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 4)); + + src_reg_23_lo = _mm_unpacklo_epi8(src_reg_2, src_reg_3); + + // Partial output + res_reg_m10_lo = mm_madd_packs_epi16_sse2( + &src_reg_m10_lo_1, &src_reg_m10_lo_2, &kernel_reg_23); + + res_reg_01_lo = mm_madd_packs_epi16_sse2(&src_reg_01_lo_1, &src_reg_01_lo_2, + &kernel_reg_23); + + src_reg_12_lo_1 = _mm_unpacklo_epi8(src_reg_12_lo, _mm_setzero_si128()); + src_reg_12_lo_2 = _mm_unpackhi_epi8(src_reg_12_lo, _mm_setzero_si128()); + res_reg_12_lo = mm_madd_packs_epi16_sse2(&src_reg_12_lo_1, &src_reg_12_lo_2, + &kernel_reg_45); + + src_reg_23_lo_1 = _mm_unpacklo_epi8(src_reg_23_lo, _mm_setzero_si128()); + src_reg_23_lo_2 = _mm_unpackhi_epi8(src_reg_23_lo, _mm_setzero_si128()); + res_reg_23_lo = mm_madd_packs_epi16_sse2(&src_reg_23_lo_1, &src_reg_23_lo_2, + &kernel_reg_45); + + // Add to get results + res_reg_m1012_lo = _mm_adds_epi16(res_reg_m10_lo, res_reg_12_lo); + res_reg_0123_lo = _mm_adds_epi16(res_reg_01_lo, res_reg_23_lo); + + // Round the words + res_reg_m1012_lo = mm_round_epi16_sse2(&res_reg_m1012_lo, ®_32, 6); + res_reg_0123_lo = mm_round_epi16_sse2(&res_reg_0123_lo, ®_32, 6); + + // Convert to 8-bit words + res_reg_m1012 = _mm_packus_epi16(res_reg_m1012_lo, _mm_setzero_si128()); + res_reg_0123 = _mm_packus_epi16(res_reg_0123_lo, _mm_setzero_si128()); + + // Save only half of the register (8 words) + _mm_storel_epi64((__m128i *)dst_ptr, res_reg_m1012); + _mm_storel_epi64((__m128i *)(dst_ptr + dst_stride), res_reg_0123); + + // Update the source by two rows + src_ptr += src_stride_unrolled; + dst_ptr += dst_stride_unrolled; + + src_reg_m10_lo_1 = src_reg_12_lo_1; + src_reg_m10_lo_2 = src_reg_12_lo_2; + src_reg_01_lo_1 = src_reg_23_lo_1; + src_reg_01_lo_2 = src_reg_23_lo_2; + src_reg_1 = src_reg_3; + } +} + +static void vpx_filter_block1d4_h4_sse2(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + __m128i kernel_reg; // Kernel + __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used + const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding + int h; + + __m128i src_reg, src_reg_shift_1, src_reg_shift_2, src_reg_shift_3; + __m128i dst_first; + __m128i tmp_0, tmp_1; + + // Start one pixel before as we need tap/2 - 1 = 1 sample from the past + src_ptr -= 1; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm_srai_epi16(kernel_reg, 1); + kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); + kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); + + for (h = height; h > 0; --h) { + // We will load multiple shifted versions of the row and shuffle them into + // 16-bit words of the form + // ... s[1] s[0] s[0] s[-1] + // ... s[3] s[2] s[2] s[1] + // Then we call multiply and add to get partial results + // s[1]k[3]+s[0]k[2] s[0]k[3]s[-1]k[2] + // s[3]k[5]+s[2]k[4] s[2]k[5]s[1]k[4] + // The two results are then added together to get the output + src_reg = _mm_loadu_si128((const __m128i *)src_ptr); + src_reg_shift_1 = _mm_srli_si128(src_reg, 1); + src_reg_shift_2 = _mm_srli_si128(src_reg, 2); + src_reg_shift_3 = _mm_srli_si128(src_reg, 3); + + // Convert to 16-bit words + src_reg = _mm_unpacklo_epi8(src_reg, _mm_setzero_si128()); + src_reg_shift_1 = _mm_unpacklo_epi8(src_reg_shift_1, _mm_setzero_si128()); + src_reg_shift_2 = _mm_unpacklo_epi8(src_reg_shift_2, _mm_setzero_si128()); + src_reg_shift_3 = _mm_unpacklo_epi8(src_reg_shift_3, _mm_setzero_si128()); + + // Shuffle into the right format + tmp_0 = _mm_unpacklo_epi32(src_reg, src_reg_shift_1); + tmp_1 = _mm_unpacklo_epi32(src_reg_shift_2, src_reg_shift_3); + + // Partial output + tmp_0 = _mm_madd_epi16(tmp_0, kernel_reg_23); + tmp_1 = _mm_madd_epi16(tmp_1, kernel_reg_45); + + // Output + dst_first = _mm_add_epi32(tmp_0, tmp_1); + dst_first = _mm_packs_epi32(dst_first, _mm_setzero_si128()); + + dst_first = mm_round_epi16_sse2(&dst_first, ®_32, 6); + + // Saturate and convert to 8-bit words + dst_first = _mm_packus_epi16(dst_first, _mm_setzero_si128()); + + *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(dst_first); + + src_ptr += src_stride; + dst_ptr += dst_stride; + } +} + +static void vpx_filter_block1d4_v4_sse2(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // Register for source s[-1:3, :] + __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; + // Interleaved rows of the source. lo is first half, hi second + __m128i src_reg_m10_lo, src_reg_01_lo; + __m128i src_reg_12_lo, src_reg_23_lo; + // Half of half of the interleaved rows + __m128i src_reg_m10_lo_1; + __m128i src_reg_01_lo_1; + __m128i src_reg_12_lo_1; + __m128i src_reg_23_lo_1; + + __m128i kernel_reg; // Kernel + __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used + + // Result after multiply and add + __m128i res_reg_m10_lo, res_reg_01_lo, res_reg_12_lo, res_reg_23_lo; + __m128i res_reg_m1012, res_reg_0123; + __m128i res_reg_m1012_lo, res_reg_0123_lo; + + const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding + const __m128i reg_zero = _mm_setzero_si128(); + + // We will compute the result two rows at a time + const ptrdiff_t src_stride_unrolled = src_stride << 1; + const ptrdiff_t dst_stride_unrolled = dst_stride << 1; + int h; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm_srai_epi16(kernel_reg, 1); + kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); + kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); + + // We will load two rows of pixels as 8-bit words, rearrange them as 16-bit + // words, + // shuffle the data into the form + // ... s[0,1] s[-1,1] s[0,0] s[-1,0] + // ... s[0,7] s[-1,7] s[0,6] s[-1,6] + // ... s[0,9] s[-1,9] s[0,8] s[-1,8] + // ... s[0,13] s[-1,13] s[0,12] s[-1,12] + // so that we can call multiply and add with the kernel to get 32-bit words of + // the form + // ... s[0,1]k[3]+s[-1,1]k[2] s[0,0]k[3]+s[-1,0]k[2] + // Finally, we can add multiple rows together to get the desired output. + + // First shuffle the data + src_reg_m1 = _mm_loadu_si128((const __m128i *)src_ptr); + src_reg_0 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride)); + src_reg_m10_lo = _mm_unpacklo_epi8(src_reg_m1, src_reg_0); + src_reg_m10_lo_1 = _mm_unpacklo_epi8(src_reg_m10_lo, _mm_setzero_si128()); + + // More shuffling + src_reg_1 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2)); + src_reg_01_lo = _mm_unpacklo_epi8(src_reg_0, src_reg_1); + src_reg_01_lo_1 = _mm_unpacklo_epi8(src_reg_01_lo, _mm_setzero_si128()); + + for (h = height; h > 1; h -= 2) { + src_reg_2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 3)); + + src_reg_12_lo = _mm_unpacklo_epi8(src_reg_1, src_reg_2); + + src_reg_3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 4)); + + src_reg_23_lo = _mm_unpacklo_epi8(src_reg_2, src_reg_3); + + // Partial output + res_reg_m10_lo = + mm_madd_packs_epi16_sse2(&src_reg_m10_lo_1, ®_zero, &kernel_reg_23); + + res_reg_01_lo = + mm_madd_packs_epi16_sse2(&src_reg_01_lo_1, ®_zero, &kernel_reg_23); + + src_reg_12_lo_1 = _mm_unpacklo_epi8(src_reg_12_lo, _mm_setzero_si128()); + res_reg_12_lo = + mm_madd_packs_epi16_sse2(&src_reg_12_lo_1, ®_zero, &kernel_reg_45); + + src_reg_23_lo_1 = _mm_unpacklo_epi8(src_reg_23_lo, _mm_setzero_si128()); + res_reg_23_lo = + mm_madd_packs_epi16_sse2(&src_reg_23_lo_1, ®_zero, &kernel_reg_45); + + // Add to get results + res_reg_m1012_lo = _mm_adds_epi16(res_reg_m10_lo, res_reg_12_lo); + res_reg_0123_lo = _mm_adds_epi16(res_reg_01_lo, res_reg_23_lo); + + // Round the words + res_reg_m1012_lo = mm_round_epi16_sse2(&res_reg_m1012_lo, ®_32, 6); + res_reg_0123_lo = mm_round_epi16_sse2(&res_reg_0123_lo, ®_32, 6); + + // Convert to 8-bit words + res_reg_m1012 = _mm_packus_epi16(res_reg_m1012_lo, reg_zero); + res_reg_0123 = _mm_packus_epi16(res_reg_0123_lo, reg_zero); + + // Save only half of the register (8 words) + *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(res_reg_m1012); + *((uint32_t *)(dst_ptr + dst_stride)) = _mm_cvtsi128_si32(res_reg_0123); + + // Update the source by two rows + src_ptr += src_stride_unrolled; + dst_ptr += dst_stride_unrolled; + + src_reg_m10_lo_1 = src_reg_12_lo_1; + src_reg_01_lo_1 = src_reg_23_lo_1; + src_reg_1 = src_reg_3; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64 +static void vpx_highbd_filter_block1d4_h4_sse2( + const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { + // We will load multiple shifted versions of the row and shuffle them into + // 16-bit words of the form + // ... s[2] s[1] s[0] s[-1] + // ... s[4] s[3] s[2] s[1] + // Then we call multiply and add to get partial results + // s[2]k[3]+s[1]k[2] s[0]k[3]s[-1]k[2] + // s[4]k[5]+s[3]k[4] s[2]k[5]s[1]k[4] + // The two results are then added together to get the even output + + __m128i src_reg, src_reg_shift_1, src_reg_shift_2, src_reg_shift_3; + __m128i res_reg; + __m128i even, odd; + + __m128i kernel_reg; // Kernel + __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used + const __m128i reg_round = + _mm_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding + const __m128i reg_max = _mm_set1_epi16((1 << bd) - 1); + const __m128i reg_zero = _mm_setzero_si128(); + int h; + + // Start one pixel before as we need tap/2 - 1 = 1 sample from the past + src_ptr -= 1; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); + kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); + + for (h = height; h > 0; --h) { + src_reg = _mm_loadu_si128((const __m128i *)src_ptr); + src_reg_shift_1 = _mm_srli_si128(src_reg, 2); + src_reg_shift_2 = _mm_srli_si128(src_reg, 4); + src_reg_shift_3 = _mm_srli_si128(src_reg, 6); + + // Output 2 0 + even = mm_madd_add_epi16_sse2(&src_reg, &src_reg_shift_2, &kernel_reg_23, + &kernel_reg_45); + + // Output 3 1 + odd = mm_madd_add_epi16_sse2(&src_reg_shift_1, &src_reg_shift_3, + &kernel_reg_23, &kernel_reg_45); + + // Combine to get the first half of the dst + res_reg = _mm_unpacklo_epi32(even, odd); + res_reg = mm_round_epi32_sse2(&res_reg, ®_round, CONV8_ROUNDING_BITS); + res_reg = _mm_packs_epi32(res_reg, reg_zero); + + // Saturate the result and save + res_reg = _mm_min_epi16(res_reg, reg_max); + res_reg = _mm_max_epi16(res_reg, reg_zero); + _mm_storel_epi64((__m128i *)dst_ptr, res_reg); + + src_ptr += src_stride; + dst_ptr += dst_stride; + } +} + +static void vpx_highbd_filter_block1d4_v4_sse2( + const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { + // We will load two rows of pixels as 16-bit words, and shuffle them into the + // form + // ... s[0,1] s[-1,1] s[0,0] s[-1,0] + // ... s[0,7] s[-1,7] s[0,6] s[-1,6] + // ... s[0,9] s[-1,9] s[0,8] s[-1,8] + // ... s[0,13] s[-1,13] s[0,12] s[-1,12] + // so that we can call multiply and add with the kernel to get 32-bit words of + // the form + // ... s[0,1]k[3]+s[-1,1]k[2] s[0,0]k[3]+s[-1,0]k[2] + // Finally, we can add multiple rows together to get the desired output. + + // Register for source s[-1:3, :] + __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; + // Interleaved rows of the source. lo is first half, hi second + __m128i src_reg_m10, src_reg_01; + __m128i src_reg_12, src_reg_23; + + __m128i kernel_reg; // Kernel + __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used + + // Result after multiply and add + __m128i res_reg_m10, res_reg_01, res_reg_12, res_reg_23; + __m128i res_reg_m1012, res_reg_0123; + + const __m128i reg_round = + _mm_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding + const __m128i reg_max = _mm_set1_epi16((1 << bd) - 1); + const __m128i reg_zero = _mm_setzero_si128(); + + // We will compute the result two rows at a time + const ptrdiff_t src_stride_unrolled = src_stride << 1; + const ptrdiff_t dst_stride_unrolled = dst_stride << 1; + int h; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); + kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); + + // First shuffle the data + src_reg_m1 = _mm_loadl_epi64((const __m128i *)src_ptr); + src_reg_0 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride)); + src_reg_m10 = _mm_unpacklo_epi16(src_reg_m1, src_reg_0); + + // More shuffling + src_reg_1 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 2)); + src_reg_01 = _mm_unpacklo_epi16(src_reg_0, src_reg_1); + + for (h = height; h > 1; h -= 2) { + src_reg_2 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 3)); + + src_reg_12 = _mm_unpacklo_epi16(src_reg_1, src_reg_2); + + src_reg_3 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 4)); + + src_reg_23 = _mm_unpacklo_epi16(src_reg_2, src_reg_3); + + // Partial output + res_reg_m10 = _mm_madd_epi16(src_reg_m10, kernel_reg_23); + res_reg_01 = _mm_madd_epi16(src_reg_01, kernel_reg_23); + res_reg_12 = _mm_madd_epi16(src_reg_12, kernel_reg_45); + res_reg_23 = _mm_madd_epi16(src_reg_23, kernel_reg_45); + + // Add to get results + res_reg_m1012 = _mm_add_epi32(res_reg_m10, res_reg_12); + res_reg_0123 = _mm_add_epi32(res_reg_01, res_reg_23); + + // Round the words + res_reg_m1012 = + mm_round_epi32_sse2(&res_reg_m1012, ®_round, CONV8_ROUNDING_BITS); + res_reg_0123 = + mm_round_epi32_sse2(&res_reg_0123, ®_round, CONV8_ROUNDING_BITS); + + res_reg_m1012 = _mm_packs_epi32(res_reg_m1012, reg_zero); + res_reg_0123 = _mm_packs_epi32(res_reg_0123, reg_zero); + + // Saturate according to bit depth + res_reg_m1012 = _mm_min_epi16(res_reg_m1012, reg_max); + res_reg_0123 = _mm_min_epi16(res_reg_0123, reg_max); + res_reg_m1012 = _mm_max_epi16(res_reg_m1012, reg_zero); + res_reg_0123 = _mm_max_epi16(res_reg_0123, reg_zero); + + // Save only half of the register (8 words) + _mm_storel_epi64((__m128i *)dst_ptr, res_reg_m1012); + _mm_storel_epi64((__m128i *)(dst_ptr + dst_stride), res_reg_0123); + + // Update the source by two rows + src_ptr += src_stride_unrolled; + dst_ptr += dst_stride_unrolled; + + src_reg_m10 = src_reg_12; + src_reg_01 = src_reg_23; + src_reg_1 = src_reg_3; + } +} + +static void vpx_highbd_filter_block1d8_h4_sse2( + const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { + // We will load multiple shifted versions of the row and shuffle them into + // 16-bit words of the form + // ... s[2] s[1] s[0] s[-1] + // ... s[4] s[3] s[2] s[1] + // Then we call multiply and add to get partial results + // s[2]k[3]+s[1]k[2] s[0]k[3]s[-1]k[2] + // s[4]k[5]+s[3]k[4] s[2]k[5]s[1]k[4] + // The two results are then added together for the first half of even + // output. + // Repeat multiple times to get the whole outoput + + __m128i src_reg, src_reg_next, src_reg_shift_1, src_reg_shift_2, + src_reg_shift_3; + __m128i res_reg; + __m128i even, odd; + __m128i tmp_0, tmp_1; + + __m128i kernel_reg; // Kernel + __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used + const __m128i reg_round = + _mm_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding + const __m128i reg_max = _mm_set1_epi16((1 << bd) - 1); + const __m128i reg_zero = _mm_setzero_si128(); + int h; + + // Start one pixel before as we need tap/2 - 1 = 1 sample from the past + src_ptr -= 1; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); + kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); + + for (h = height; h > 0; --h) { + // We will put first half in the first half of the reg, and second half in + // second half + src_reg = _mm_loadu_si128((const __m128i *)src_ptr); + src_reg_next = _mm_loadu_si128((const __m128i *)(src_ptr + 5)); + + // Output 6 4 2 0 + tmp_0 = _mm_srli_si128(src_reg, 4); + tmp_1 = _mm_srli_si128(src_reg_next, 2); + src_reg_shift_2 = _mm_unpacklo_epi64(tmp_0, tmp_1); + even = mm_madd_add_epi16_sse2(&src_reg, &src_reg_shift_2, &kernel_reg_23, + &kernel_reg_45); + + // Output 7 5 3 1 + tmp_0 = _mm_srli_si128(src_reg, 2); + tmp_1 = src_reg_next; + src_reg_shift_1 = _mm_unpacklo_epi64(tmp_0, tmp_1); + + tmp_0 = _mm_srli_si128(src_reg, 6); + tmp_1 = _mm_srli_si128(src_reg_next, 4); + src_reg_shift_3 = _mm_unpacklo_epi64(tmp_0, tmp_1); + + odd = mm_madd_add_epi16_sse2(&src_reg_shift_1, &src_reg_shift_3, + &kernel_reg_23, &kernel_reg_45); + + // Combine to get the first half of the dst + even = mm_round_epi32_sse2(&even, ®_round, CONV8_ROUNDING_BITS); + odd = mm_round_epi32_sse2(&odd, ®_round, CONV8_ROUNDING_BITS); + res_reg = mm_zip_epi32_sse2(&even, &odd); + + // Saturate the result and save + res_reg = _mm_min_epi16(res_reg, reg_max); + res_reg = _mm_max_epi16(res_reg, reg_zero); + + _mm_store_si128((__m128i *)dst_ptr, res_reg); + + src_ptr += src_stride; + dst_ptr += dst_stride; + } +} + +static void vpx_highbd_filter_block1d8_v4_sse2( + const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { + // We will load two rows of pixels as 16-bit words, and shuffle them into the + // form + // ... s[0,1] s[-1,1] s[0,0] s[-1,0] + // ... s[0,7] s[-1,7] s[0,6] s[-1,6] + // ... s[0,9] s[-1,9] s[0,8] s[-1,8] + // ... s[0,13] s[-1,13] s[0,12] s[-1,12] + // so that we can call multiply and add with the kernel to get 32-bit words of + // the form + // ... s[0,1]k[3]+s[-1,1]k[2] s[0,0]k[3]+s[-1,0]k[2] + // Finally, we can add multiple rows together to get the desired output. + + // Register for source s[-1:3, :] + __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; + // Interleaved rows of the source. lo is first half, hi second + __m128i src_reg_m10_lo, src_reg_01_lo, src_reg_m10_hi, src_reg_01_hi; + __m128i src_reg_12_lo, src_reg_23_lo, src_reg_12_hi, src_reg_23_hi; + + // Result after multiply and add + __m128i res_reg_m10_lo, res_reg_01_lo, res_reg_12_lo, res_reg_23_lo; + __m128i res_reg_m10_hi, res_reg_01_hi, res_reg_12_hi, res_reg_23_hi; + __m128i res_reg_m1012, res_reg_0123; + __m128i res_reg_m1012_lo, res_reg_0123_lo; + __m128i res_reg_m1012_hi, res_reg_0123_hi; + + __m128i kernel_reg; // Kernel + __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used + + const __m128i reg_round = + _mm_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding + const __m128i reg_max = _mm_set1_epi16((1 << bd) - 1); + const __m128i reg_zero = _mm_setzero_si128(); + + // We will compute the result two rows at a time + const ptrdiff_t src_stride_unrolled = src_stride << 1; + const ptrdiff_t dst_stride_unrolled = dst_stride << 1; + int h; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); + kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); + + // First shuffle the data + src_reg_m1 = _mm_loadu_si128((const __m128i *)src_ptr); + src_reg_0 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride)); + src_reg_m10_lo = _mm_unpacklo_epi16(src_reg_m1, src_reg_0); + src_reg_m10_hi = _mm_unpackhi_epi16(src_reg_m1, src_reg_0); + + // More shuffling + src_reg_1 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2)); + src_reg_01_lo = _mm_unpacklo_epi16(src_reg_0, src_reg_1); + src_reg_01_hi = _mm_unpackhi_epi16(src_reg_0, src_reg_1); + + for (h = height; h > 1; h -= 2) { + src_reg_2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 3)); + + src_reg_12_lo = _mm_unpacklo_epi16(src_reg_1, src_reg_2); + src_reg_12_hi = _mm_unpackhi_epi16(src_reg_1, src_reg_2); + + src_reg_3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 4)); + + src_reg_23_lo = _mm_unpacklo_epi16(src_reg_2, src_reg_3); + src_reg_23_hi = _mm_unpackhi_epi16(src_reg_2, src_reg_3); + + // Partial output for first half + res_reg_m10_lo = _mm_madd_epi16(src_reg_m10_lo, kernel_reg_23); + res_reg_01_lo = _mm_madd_epi16(src_reg_01_lo, kernel_reg_23); + res_reg_12_lo = _mm_madd_epi16(src_reg_12_lo, kernel_reg_45); + res_reg_23_lo = _mm_madd_epi16(src_reg_23_lo, kernel_reg_45); + + // Add to get results + res_reg_m1012_lo = _mm_add_epi32(res_reg_m10_lo, res_reg_12_lo); + res_reg_0123_lo = _mm_add_epi32(res_reg_01_lo, res_reg_23_lo); + + // Round the words + res_reg_m1012_lo = + mm_round_epi32_sse2(&res_reg_m1012_lo, ®_round, CONV8_ROUNDING_BITS); + res_reg_0123_lo = + mm_round_epi32_sse2(&res_reg_0123_lo, ®_round, CONV8_ROUNDING_BITS); + + // Partial output for first half + res_reg_m10_hi = _mm_madd_epi16(src_reg_m10_hi, kernel_reg_23); + res_reg_01_hi = _mm_madd_epi16(src_reg_01_hi, kernel_reg_23); + res_reg_12_hi = _mm_madd_epi16(src_reg_12_hi, kernel_reg_45); + res_reg_23_hi = _mm_madd_epi16(src_reg_23_hi, kernel_reg_45); + + // Add to get results + res_reg_m1012_hi = _mm_add_epi32(res_reg_m10_hi, res_reg_12_hi); + res_reg_0123_hi = _mm_add_epi32(res_reg_01_hi, res_reg_23_hi); + + // Round the words + res_reg_m1012_hi = + mm_round_epi32_sse2(&res_reg_m1012_hi, ®_round, CONV8_ROUNDING_BITS); + res_reg_0123_hi = + mm_round_epi32_sse2(&res_reg_0123_hi, ®_round, CONV8_ROUNDING_BITS); + + // Combine the two halfs + res_reg_m1012 = _mm_packs_epi32(res_reg_m1012_lo, res_reg_m1012_hi); + res_reg_0123 = _mm_packs_epi32(res_reg_0123_lo, res_reg_0123_hi); + + // Saturate according to bit depth + res_reg_m1012 = _mm_min_epi16(res_reg_m1012, reg_max); + res_reg_0123 = _mm_min_epi16(res_reg_0123, reg_max); + res_reg_m1012 = _mm_max_epi16(res_reg_m1012, reg_zero); + res_reg_0123 = _mm_max_epi16(res_reg_0123, reg_zero); + + // Save only half of the register (8 words) + _mm_store_si128((__m128i *)dst_ptr, res_reg_m1012); + _mm_store_si128((__m128i *)(dst_ptr + dst_stride), res_reg_0123); + + // Update the source by two rows + src_ptr += src_stride_unrolled; + dst_ptr += dst_stride_unrolled; + + src_reg_m10_lo = src_reg_12_lo; + src_reg_m10_hi = src_reg_12_hi; + src_reg_01_lo = src_reg_23_lo; + src_reg_01_hi = src_reg_23_hi; + src_reg_1 = src_reg_3; + } +} + +static void vpx_highbd_filter_block1d16_h4_sse2( + const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { + vpx_highbd_filter_block1d8_h4_sse2(src_ptr, src_stride, dst_ptr, dst_stride, + height, kernel, bd); + vpx_highbd_filter_block1d8_h4_sse2(src_ptr + 8, src_stride, dst_ptr + 8, + dst_stride, height, kernel, bd); +} + +static void vpx_highbd_filter_block1d16_v4_sse2( + const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { + vpx_highbd_filter_block1d8_v4_sse2(src_ptr, src_stride, dst_ptr, dst_stride, + height, kernel, bd); + vpx_highbd_filter_block1d8_v4_sse2(src_ptr + 8, src_stride, dst_ptr + 8, + dst_stride, height, kernel, bd); +} +#endif // CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64 + +// From vpx_subpixel_8t_sse2.asm. +filter8_1dfunction vpx_filter_block1d16_v8_sse2; +filter8_1dfunction vpx_filter_block1d16_h8_sse2; +filter8_1dfunction vpx_filter_block1d8_v8_sse2; +filter8_1dfunction vpx_filter_block1d8_h8_sse2; +filter8_1dfunction vpx_filter_block1d4_v8_sse2; +filter8_1dfunction vpx_filter_block1d4_h8_sse2; +filter8_1dfunction vpx_filter_block1d16_v8_avg_sse2; +filter8_1dfunction vpx_filter_block1d16_h8_avg_sse2; +filter8_1dfunction vpx_filter_block1d8_v8_avg_sse2; +filter8_1dfunction vpx_filter_block1d8_h8_avg_sse2; +filter8_1dfunction vpx_filter_block1d4_v8_avg_sse2; +filter8_1dfunction vpx_filter_block1d4_h8_avg_sse2; + +// Use the [vh]8 version because there is no [vh]4 implementation. +#define vpx_filter_block1d16_v4_avg_sse2 vpx_filter_block1d16_v8_avg_sse2 +#define vpx_filter_block1d16_h4_avg_sse2 vpx_filter_block1d16_h8_avg_sse2 +#define vpx_filter_block1d8_v4_avg_sse2 vpx_filter_block1d8_v8_avg_sse2 +#define vpx_filter_block1d8_h4_avg_sse2 vpx_filter_block1d8_h8_avg_sse2 +#define vpx_filter_block1d4_v4_avg_sse2 vpx_filter_block1d4_v8_avg_sse2 +#define vpx_filter_block1d4_h4_avg_sse2 vpx_filter_block1d4_h8_avg_sse2 + +// From vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm. +filter8_1dfunction vpx_filter_block1d16_v2_sse2; +filter8_1dfunction vpx_filter_block1d16_h2_sse2; +filter8_1dfunction vpx_filter_block1d8_v2_sse2; +filter8_1dfunction vpx_filter_block1d8_h2_sse2; +filter8_1dfunction vpx_filter_block1d4_v2_sse2; +filter8_1dfunction vpx_filter_block1d4_h2_sse2; +filter8_1dfunction vpx_filter_block1d16_v2_avg_sse2; +filter8_1dfunction vpx_filter_block1d16_h2_avg_sse2; +filter8_1dfunction vpx_filter_block1d8_v2_avg_sse2; +filter8_1dfunction vpx_filter_block1d8_h2_avg_sse2; +filter8_1dfunction vpx_filter_block1d4_v2_avg_sse2; +filter8_1dfunction vpx_filter_block1d4_h2_avg_sse2; + +// void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const InterpKernel *filter, int x0_q4, +// int32_t x_step_q4, int y0_q4, int y_step_q4, +// int w, int h); +// void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const InterpKernel *filter, int x0_q4, +// int32_t x_step_q4, int y0_q4, int y_step_q4, +// int w, int h); +// void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const InterpKernel *filter, int x0_q4, +// int32_t x_step_q4, int y0_q4, +// int y_step_q4, int w, int h); +// void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const InterpKernel *filter, int x0_q4, +// int32_t x_step_q4, int y0_q4, int y_step_q4, +// int w, int h); +FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2, 0); +FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - (num_taps / 2 - 1) * src_stride, , + sse2, 0); +FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2, 1); +FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, + src - (num_taps / 2 - 1) * src_stride, avg_, sse2, 1); + +// void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const InterpKernel *filter, int x0_q4, +// int32_t x_step_q4, int y0_q4, int y_step_q4, +// int w, int h); +// void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const InterpKernel *filter, int x0_q4, +// int32_t x_step_q4, int y0_q4, int y_step_q4, +// int w, int h); +FUN_CONV_2D(, sse2, 0); +FUN_CONV_2D(avg_, sse2, 1); + +#if CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64 +// From vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm. +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2; + +// Use the [vh]8 version because there is no [vh]4 implementation. +#define vpx_highbd_filter_block1d16_v4_avg_sse2 \ + vpx_highbd_filter_block1d16_v8_avg_sse2 +#define vpx_highbd_filter_block1d16_h4_avg_sse2 \ + vpx_highbd_filter_block1d16_h8_avg_sse2 +#define vpx_highbd_filter_block1d8_v4_avg_sse2 \ + vpx_highbd_filter_block1d8_v8_avg_sse2 +#define vpx_highbd_filter_block1d8_h4_avg_sse2 \ + vpx_highbd_filter_block1d8_h8_avg_sse2 +#define vpx_highbd_filter_block1d4_v4_avg_sse2 \ + vpx_highbd_filter_block1d4_v8_avg_sse2 +#define vpx_highbd_filter_block1d4_h4_avg_sse2 \ + vpx_highbd_filter_block1d4_h8_avg_sse2 + +// From vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm. +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2; +highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2; + +// void vpx_highbd_convolve8_horiz_sse2(const uint8_t *src, +// ptrdiff_t src_stride, +// uint8_t *dst, +// ptrdiff_t dst_stride, +// const int16_t *filter_x, +// int x_step_q4, +// const int16_t *filter_y, +// int y_step_q4, +// int w, int h, int bd); +// void vpx_highbd_convolve8_vert_sse2(const uint8_t *src, +// ptrdiff_t src_stride, +// uint8_t *dst, +// ptrdiff_t dst_stride, +// const int16_t *filter_x, +// int x_step_q4, +// const int16_t *filter_y, +// int y_step_q4, +// int w, int h, int bd); +// void vpx_highbd_convolve8_avg_horiz_sse2(const uint8_t *src, +// ptrdiff_t src_stride, +// uint8_t *dst, +// ptrdiff_t dst_stride, +// const int16_t *filter_x, +// int x_step_q4, +// const int16_t *filter_y, +// int y_step_q4, +// int w, int h, int bd); +// void vpx_highbd_convolve8_avg_vert_sse2(const uint8_t *src, +// ptrdiff_t src_stride, +// uint8_t *dst, +// ptrdiff_t dst_stride, +// const int16_t *filter_x, +// int x_step_q4, +// const int16_t *filter_y, +// int y_step_q4, +// int w, int h, int bd); +HIGH_FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2, 0); +HIGH_FUN_CONV_1D(vert, y0_q4, y_step_q4, v, + src - src_stride * (num_taps / 2 - 1), , sse2, 0); +HIGH_FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2, 1); +HIGH_FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, + src - src_stride * (num_taps / 2 - 1), avg_, sse2, 1); + +// void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const InterpKernel *filter, int x0_q4, +// int32_t x_step_q4, int y0_q4, int y_step_q4, +// int w, int h, int bd); +// void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, +// uint8_t *dst, ptrdiff_t dst_stride, +// const InterpKernel *filter, int x0_q4, +// int32_t x_step_q4, int y0_q4, +// int y_step_q4, int w, int h, int bd); +HIGH_FUN_CONV_2D(, sse2, 0); +HIGH_FUN_CONV_2D(avg_, sse2, 1); +#endif // CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64 diff --git a/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c b/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c index d0919695ce96..1eaa19bfc537 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c @@ -9,22 +9,24 @@ */ #include +#include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/convolve.h" #include "vpx_dsp/x86/convolve_avx2.h" +#include "vpx_dsp/x86/convolve_sse2.h" #include "vpx_ports/mem.h" // filters for 16_h8 -DECLARE_ALIGNED(32, static const uint8_t, filt1_global_avx2[32]) = { - 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, - 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 -}; +DECLARE_ALIGNED(32, static const uint8_t, + filt1_global_avx2[32]) = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, + 6, 6, 7, 7, 8, 0, 1, 1, 2, 2, 3, + 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; -DECLARE_ALIGNED(32, static const uint8_t, filt2_global_avx2[32]) = { - 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, - 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 -}; +DECLARE_ALIGNED(32, static const uint8_t, + filt2_global_avx2[32]) = { 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, + 8, 8, 9, 9, 10, 2, 3, 3, 4, 4, 5, + 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 }; DECLARE_ALIGNED(32, static const uint8_t, filt3_global_avx2[32]) = { 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, @@ -326,23 +328,587 @@ static void vpx_filter_block1d16_v8_avg_avx2( height, filter, 1); } +static void vpx_filter_block1d16_h4_avx2(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // We will cast the kernel from 16-bit words to 8-bit words, and then extract + // the middle four elements of the kernel into two registers in the form + // ... k[3] k[2] k[3] k[2] + // ... k[5] k[4] k[5] k[4] + // Then we shuffle the source into + // ... s[1] s[0] s[0] s[-1] + // ... s[3] s[2] s[2] s[1] + // Calling multiply and add gives us half of the sum. Calling add gives us + // first half of the output. Repeat again to get the second half of the + // output. Finally we shuffle again to combine the two outputs. + // Since avx2 allows us to use 256-bit buffer, we can do this two rows at a + // time. + + __m128i kernel_reg; // Kernel + __m256i kernel_reg_256, kernel_reg_23, + kernel_reg_45; // Segments of the kernel used + const __m256i reg_32 = _mm256_set1_epi16(32); // Used for rounding + const ptrdiff_t unrolled_src_stride = src_stride << 1; + const ptrdiff_t unrolled_dst_stride = dst_stride << 1; + int h; + + __m256i src_reg, src_reg_shift_0, src_reg_shift_2; + __m256i dst_first, dst_second; + __m256i tmp_0, tmp_1; + __m256i idx_shift_0 = + _mm256_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 0, 1, 1, + 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8); + __m256i idx_shift_2 = + _mm256_setr_epi8(2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10); + + // Start one pixel before as we need tap/2 - 1 = 1 sample from the past + src_ptr -= 1; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm_srai_epi16(kernel_reg, 1); + kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); + kernel_reg_256 = _mm256_broadcastsi128_si256(kernel_reg); + kernel_reg_23 = + _mm256_shuffle_epi8(kernel_reg_256, _mm256_set1_epi16(0x0302u)); + kernel_reg_45 = + _mm256_shuffle_epi8(kernel_reg_256, _mm256_set1_epi16(0x0504u)); + + for (h = height; h >= 2; h -= 2) { + // Load the source + src_reg = mm256_loadu2_si128(src_ptr, src_ptr + src_stride); + src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); + src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); + + // Partial result for first half + tmp_0 = _mm256_maddubs_epi16(src_reg_shift_0, kernel_reg_23); + tmp_1 = _mm256_maddubs_epi16(src_reg_shift_2, kernel_reg_45); + dst_first = _mm256_adds_epi16(tmp_0, tmp_1); + + // Do again to get the second half of dst + // Load the source + src_reg = mm256_loadu2_si128(src_ptr + 8, src_ptr + src_stride + 8); + src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); + src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); + + // Partial result for second half + tmp_0 = _mm256_maddubs_epi16(src_reg_shift_0, kernel_reg_23); + tmp_1 = _mm256_maddubs_epi16(src_reg_shift_2, kernel_reg_45); + dst_second = _mm256_adds_epi16(tmp_0, tmp_1); + + // Round each result + dst_first = mm256_round_epi16(&dst_first, ®_32, 6); + dst_second = mm256_round_epi16(&dst_second, ®_32, 6); + + // Finally combine to get the final dst + dst_first = _mm256_packus_epi16(dst_first, dst_second); + mm256_store2_si128((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), + &dst_first); + + src_ptr += unrolled_src_stride; + dst_ptr += unrolled_dst_stride; + } + + // Repeat for the last row if needed + if (h > 0) { + src_reg = _mm256_loadu_si256((const __m256i *)src_ptr); + // Reorder into 2 1 1 2 + src_reg = _mm256_permute4x64_epi64(src_reg, 0x94); + + src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); + src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); + + tmp_0 = _mm256_maddubs_epi16(src_reg_shift_0, kernel_reg_23); + tmp_1 = _mm256_maddubs_epi16(src_reg_shift_2, kernel_reg_45); + dst_first = _mm256_adds_epi16(tmp_0, tmp_1); + + dst_first = mm256_round_epi16(&dst_first, ®_32, 6); + + dst_first = _mm256_packus_epi16(dst_first, dst_first); + dst_first = _mm256_permute4x64_epi64(dst_first, 0x8); + + _mm_store_si128((__m128i *)dst_ptr, _mm256_castsi256_si128(dst_first)); + } +} + +static void vpx_filter_block1d16_v4_avx2(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // We will load two rows of pixels as 8-bit words, rearrange them into the + // form + // ... s[1,0] s[0,0] s[0,0] s[-1,0] + // so that we can call multiply and add with the kernel partial output. Then + // we can call add with another row to get the output. + + // Register for source s[-1:3, :] + __m256i src_reg_1, src_reg_2, src_reg_3; + // Interleaved rows of the source. lo is first half, hi second + __m256i src_reg_m10, src_reg_01, src_reg_12, src_reg_23; + __m256i src_reg_m1001_lo, src_reg_m1001_hi, src_reg_1223_lo, src_reg_1223_hi; + + __m128i kernel_reg; // Kernel + __m256i kernel_reg_256, kernel_reg_23, + kernel_reg_45; // Segments of the kernel used + + // Result after multiply and add + __m256i res_reg_m1001_lo, res_reg_1223_lo, res_reg_m1001_hi, res_reg_1223_hi; + __m256i res_reg, res_reg_lo, res_reg_hi; + + const __m256i reg_32 = _mm256_set1_epi16(32); // Used for rounding + + // We will compute the result two rows at a time + const ptrdiff_t src_stride_unrolled = src_stride << 1; + const ptrdiff_t dst_stride_unrolled = dst_stride << 1; + int h; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm_srai_epi16(kernel_reg, 1); + kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); + kernel_reg_256 = _mm256_broadcastsi128_si256(kernel_reg); + kernel_reg_23 = + _mm256_shuffle_epi8(kernel_reg_256, _mm256_set1_epi16(0x0302u)); + kernel_reg_45 = + _mm256_shuffle_epi8(kernel_reg_256, _mm256_set1_epi16(0x0504u)); + + // Row -1 to row 0 + src_reg_m10 = mm256_loadu2_si128((const __m128i *)src_ptr, + (const __m128i *)(src_ptr + src_stride)); + + // Row 0 to row 1 + src_reg_1 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2))); + src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); + + // First three rows + src_reg_m1001_lo = _mm256_unpacklo_epi8(src_reg_m10, src_reg_01); + src_reg_m1001_hi = _mm256_unpackhi_epi8(src_reg_m10, src_reg_01); + + for (h = height; h > 1; h -= 2) { + src_reg_2 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 3))); + + src_reg_12 = _mm256_inserti128_si256(src_reg_1, + _mm256_castsi256_si128(src_reg_2), 1); + + src_reg_3 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 4))); + + src_reg_23 = _mm256_inserti128_si256(src_reg_2, + _mm256_castsi256_si128(src_reg_3), 1); + + // Last three rows + src_reg_1223_lo = _mm256_unpacklo_epi8(src_reg_12, src_reg_23); + src_reg_1223_hi = _mm256_unpackhi_epi8(src_reg_12, src_reg_23); + + // Output from first half + res_reg_m1001_lo = _mm256_maddubs_epi16(src_reg_m1001_lo, kernel_reg_23); + res_reg_1223_lo = _mm256_maddubs_epi16(src_reg_1223_lo, kernel_reg_45); + res_reg_lo = _mm256_adds_epi16(res_reg_m1001_lo, res_reg_1223_lo); + + // Output from second half + res_reg_m1001_hi = _mm256_maddubs_epi16(src_reg_m1001_hi, kernel_reg_23); + res_reg_1223_hi = _mm256_maddubs_epi16(src_reg_1223_hi, kernel_reg_45); + res_reg_hi = _mm256_adds_epi16(res_reg_m1001_hi, res_reg_1223_hi); + + // Round the words + res_reg_lo = mm256_round_epi16(&res_reg_lo, ®_32, 6); + res_reg_hi = mm256_round_epi16(&res_reg_hi, ®_32, 6); + + // Combine to get the result + res_reg = _mm256_packus_epi16(res_reg_lo, res_reg_hi); + + // Save the result + mm256_store2_si128((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), + &res_reg); + + // Update the source by two rows + src_ptr += src_stride_unrolled; + dst_ptr += dst_stride_unrolled; + + src_reg_m1001_lo = src_reg_1223_lo; + src_reg_m1001_hi = src_reg_1223_hi; + src_reg_1 = src_reg_3; + } +} + +static void vpx_filter_block1d8_h4_avx2(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // We will cast the kernel from 16-bit words to 8-bit words, and then extract + // the middle four elements of the kernel into two registers in the form + // ... k[3] k[2] k[3] k[2] + // ... k[5] k[4] k[5] k[4] + // Then we shuffle the source into + // ... s[1] s[0] s[0] s[-1] + // ... s[3] s[2] s[2] s[1] + // Calling multiply and add gives us half of the sum. Calling add gives us + // first half of the output. Repeat again to get the second half of the + // output. Finally we shuffle again to combine the two outputs. + // Since avx2 allows us to use 256-bit buffer, we can do this two rows at a + // time. + + __m128i kernel_reg_128; // Kernel + __m256i kernel_reg, kernel_reg_23, + kernel_reg_45; // Segments of the kernel used + const __m256i reg_32 = _mm256_set1_epi16(32); // Used for rounding + const ptrdiff_t unrolled_src_stride = src_stride << 1; + const ptrdiff_t unrolled_dst_stride = dst_stride << 1; + int h; + + __m256i src_reg, src_reg_shift_0, src_reg_shift_2; + __m256i dst_reg; + __m256i tmp_0, tmp_1; + __m256i idx_shift_0 = + _mm256_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 0, 1, 1, + 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8); + __m256i idx_shift_2 = + _mm256_setr_epi8(2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10); + + // Start one pixel before as we need tap/2 - 1 = 1 sample from the past + src_ptr -= 1; + + // Load Kernel + kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg_128 = _mm_srai_epi16(kernel_reg_128, 1); + kernel_reg_128 = _mm_packs_epi16(kernel_reg_128, kernel_reg_128); + kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); + kernel_reg_23 = _mm256_shuffle_epi8(kernel_reg, _mm256_set1_epi16(0x0302u)); + kernel_reg_45 = _mm256_shuffle_epi8(kernel_reg, _mm256_set1_epi16(0x0504u)); + + for (h = height; h >= 2; h -= 2) { + // Load the source + src_reg = mm256_loadu2_si128(src_ptr, src_ptr + src_stride); + src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); + src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); + + // Get the output + tmp_0 = _mm256_maddubs_epi16(src_reg_shift_0, kernel_reg_23); + tmp_1 = _mm256_maddubs_epi16(src_reg_shift_2, kernel_reg_45); + dst_reg = _mm256_adds_epi16(tmp_0, tmp_1); + + // Round the result + dst_reg = mm256_round_epi16(&dst_reg, ®_32, 6); + + // Finally combine to get the final dst + dst_reg = _mm256_packus_epi16(dst_reg, dst_reg); + mm256_storeu2_epi64((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), + &dst_reg); + + src_ptr += unrolled_src_stride; + dst_ptr += unrolled_dst_stride; + } + + // Repeat for the last row if needed + if (h > 0) { + __m128i src_reg = _mm_loadu_si128((const __m128i *)src_ptr); + __m128i dst_reg; + const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding + __m128i tmp_0, tmp_1; + + __m128i src_reg_shift_0 = + _mm_shuffle_epi8(src_reg, _mm256_castsi256_si128(idx_shift_0)); + __m128i src_reg_shift_2 = + _mm_shuffle_epi8(src_reg, _mm256_castsi256_si128(idx_shift_2)); + + tmp_0 = _mm_maddubs_epi16(src_reg_shift_0, + _mm256_castsi256_si128(kernel_reg_23)); + tmp_1 = _mm_maddubs_epi16(src_reg_shift_2, + _mm256_castsi256_si128(kernel_reg_45)); + dst_reg = _mm_adds_epi16(tmp_0, tmp_1); + + dst_reg = mm_round_epi16_sse2(&dst_reg, ®_32, 6); + + dst_reg = _mm_packus_epi16(dst_reg, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i *)dst_ptr, dst_reg); + } +} + +static void vpx_filter_block1d8_v4_avx2(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // We will load two rows of pixels as 8-bit words, rearrange them into the + // form + // ... s[1,0] s[0,0] s[0,0] s[-1,0] + // so that we can call multiply and add with the kernel partial output. Then + // we can call add with another row to get the output. + + // Register for source s[-1:3, :] + __m256i src_reg_1, src_reg_2, src_reg_3; + // Interleaved rows of the source. lo is first half, hi second + __m256i src_reg_m10, src_reg_01, src_reg_12, src_reg_23; + __m256i src_reg_m1001, src_reg_1223; + + __m128i kernel_reg_128; // Kernel + __m256i kernel_reg, kernel_reg_23, + kernel_reg_45; // Segments of the kernel used + + // Result after multiply and add + __m256i res_reg_m1001, res_reg_1223; + __m256i res_reg; + + const __m256i reg_32 = _mm256_set1_epi16(32); // Used for rounding + + // We will compute the result two rows at a time + const ptrdiff_t src_stride_unrolled = src_stride << 1; + const ptrdiff_t dst_stride_unrolled = dst_stride << 1; + int h; + + // Load Kernel + kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg_128 = _mm_srai_epi16(kernel_reg_128, 1); + kernel_reg_128 = _mm_packs_epi16(kernel_reg_128, kernel_reg_128); + kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); + kernel_reg_23 = _mm256_shuffle_epi8(kernel_reg, _mm256_set1_epi16(0x0302u)); + kernel_reg_45 = _mm256_shuffle_epi8(kernel_reg, _mm256_set1_epi16(0x0504u)); + + // Row -1 to row 0 + src_reg_m10 = mm256_loadu2_epi64((const __m128i *)src_ptr, + (const __m128i *)(src_ptr + src_stride)); + + // Row 0 to row 1 + src_reg_1 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2))); + src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); + + // First three rows + src_reg_m1001 = _mm256_unpacklo_epi8(src_reg_m10, src_reg_01); + + for (h = height; h > 1; h -= 2) { + src_reg_2 = _mm256_castsi128_si256( + _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 3))); + + src_reg_12 = _mm256_inserti128_si256(src_reg_1, + _mm256_castsi256_si128(src_reg_2), 1); + + src_reg_3 = _mm256_castsi128_si256( + _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 4))); + + src_reg_23 = _mm256_inserti128_si256(src_reg_2, + _mm256_castsi256_si128(src_reg_3), 1); + + // Last three rows + src_reg_1223 = _mm256_unpacklo_epi8(src_reg_12, src_reg_23); + + // Output + res_reg_m1001 = _mm256_maddubs_epi16(src_reg_m1001, kernel_reg_23); + res_reg_1223 = _mm256_maddubs_epi16(src_reg_1223, kernel_reg_45); + res_reg = _mm256_adds_epi16(res_reg_m1001, res_reg_1223); + + // Round the words + res_reg = mm256_round_epi16(&res_reg, ®_32, 6); + + // Combine to get the result + res_reg = _mm256_packus_epi16(res_reg, res_reg); + + // Save the result + mm256_storeu2_epi64((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), + &res_reg); + + // Update the source by two rows + src_ptr += src_stride_unrolled; + dst_ptr += dst_stride_unrolled; + + src_reg_m1001 = src_reg_1223; + src_reg_1 = src_reg_3; + } +} + +static void vpx_filter_block1d4_h4_avx2(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // We will cast the kernel from 16-bit words to 8-bit words, and then extract + // the middle four elements of the kernel into a single register in the form + // k[5:2] k[5:2] k[5:2] k[5:2] + // Then we shuffle the source into + // s[5:2] s[4:1] s[3:0] s[2:-1] + // Calling multiply and add gives us half of the sum next to each other. + // Calling horizontal add then gives us the output. + // Since avx2 has 256-bit register, we can do 2 rows at a time. + + __m128i kernel_reg_128; // Kernel + __m256i kernel_reg; + const __m256i reg_32 = _mm256_set1_epi16(32); // Used for rounding + int h; + const ptrdiff_t unrolled_src_stride = src_stride << 1; + const ptrdiff_t unrolled_dst_stride = dst_stride << 1; + + __m256i src_reg, src_reg_shuf; + __m256i dst; + __m256i shuf_idx = + _mm256_setr_epi8(0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6, 0, 1, 2, + 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6); + + // Start one pixel before as we need tap/2 - 1 = 1 sample from the past + src_ptr -= 1; + + // Load Kernel + kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg_128 = _mm_srai_epi16(kernel_reg_128, 1); + kernel_reg_128 = _mm_packs_epi16(kernel_reg_128, kernel_reg_128); + kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); + kernel_reg = _mm256_shuffle_epi8(kernel_reg, _mm256_set1_epi32(0x05040302u)); + + for (h = height; h > 1; h -= 2) { + // Load the source + src_reg = mm256_loadu2_epi64((const __m128i *)src_ptr, + (const __m128i *)(src_ptr + src_stride)); + src_reg_shuf = _mm256_shuffle_epi8(src_reg, shuf_idx); + + // Get the result + dst = _mm256_maddubs_epi16(src_reg_shuf, kernel_reg); + dst = _mm256_hadds_epi16(dst, _mm256_setzero_si256()); + + // Round result + dst = mm256_round_epi16(&dst, ®_32, 6); + + // Pack to 8-bits + dst = _mm256_packus_epi16(dst, _mm256_setzero_si256()); + + // Save + mm256_storeu2_epi32((__m128i *const)dst_ptr, + (__m128i *const)(dst_ptr + dst_stride), &dst); + + src_ptr += unrolled_src_stride; + dst_ptr += unrolled_dst_stride; + } + + if (h > 0) { + // Load the source + const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding + __m128i src_reg = _mm_loadl_epi64((const __m128i *)src_ptr); + __m128i src_reg_shuf = + _mm_shuffle_epi8(src_reg, _mm256_castsi256_si128(shuf_idx)); + + // Get the result + __m128i dst = + _mm_maddubs_epi16(src_reg_shuf, _mm256_castsi256_si128(kernel_reg)); + dst = _mm_hadds_epi16(dst, _mm_setzero_si128()); + + // Round result + dst = mm_round_epi16_sse2(&dst, ®_32, 6); + + // Pack to 8-bits + dst = _mm_packus_epi16(dst, _mm_setzero_si128()); + *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(dst); + } +} + +static void vpx_filter_block1d4_v4_avx2(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // We will load two rows of pixels as 8-bit words, rearrange them into the + // form + // ... s[3,0] s[2,0] s[1,0] s[0,0] s[2,0] s[1,0] s[0,0] s[-1,0] + // so that we can call multiply and add with the kernel to get partial output. + // Calling horizontal add then gives us the completely output + + // Register for source s[-1:3, :] + __m256i src_reg_1, src_reg_2, src_reg_3; + // Interleaved rows of the source. lo is first half, hi second + __m256i src_reg_m10, src_reg_01, src_reg_12, src_reg_23; + __m256i src_reg_m1001, src_reg_1223, src_reg_m1012_1023; + + __m128i kernel_reg_128; // Kernel + __m256i kernel_reg; + + // Result after multiply and add + __m256i res_reg; + + const __m256i reg_32 = _mm256_set1_epi16(32); // Used for rounding + + // We will compute the result two rows at a time + const ptrdiff_t src_stride_unrolled = src_stride << 1; + const ptrdiff_t dst_stride_unrolled = dst_stride << 1; + int h; + + // Load Kernel + kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg_128 = _mm_srai_epi16(kernel_reg_128, 1); + kernel_reg_128 = _mm_packs_epi16(kernel_reg_128, kernel_reg_128); + kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); + kernel_reg = _mm256_shuffle_epi8(kernel_reg, _mm256_set1_epi32(0x05040302u)); + + // Row -1 to row 0 + src_reg_m10 = mm256_loadu2_si128((const __m128i *)src_ptr, + (const __m128i *)(src_ptr + src_stride)); + + // Row 0 to row 1 + src_reg_1 = _mm256_castsi128_si256( + _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2))); + src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); + + // First three rows + src_reg_m1001 = _mm256_unpacklo_epi8(src_reg_m10, src_reg_01); + + for (h = height; h > 1; h -= 2) { + src_reg_2 = _mm256_castsi128_si256( + _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 3))); + + src_reg_12 = _mm256_inserti128_si256(src_reg_1, + _mm256_castsi256_si128(src_reg_2), 1); + + src_reg_3 = _mm256_castsi128_si256( + _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 4))); + + src_reg_23 = _mm256_inserti128_si256(src_reg_2, + _mm256_castsi256_si128(src_reg_3), 1); + + // Last three rows + src_reg_1223 = _mm256_unpacklo_epi8(src_reg_12, src_reg_23); + + // Combine all the rows + src_reg_m1012_1023 = _mm256_unpacklo_epi16(src_reg_m1001, src_reg_1223); + + // Output + res_reg = _mm256_maddubs_epi16(src_reg_m1012_1023, kernel_reg); + res_reg = _mm256_hadds_epi16(res_reg, _mm256_setzero_si256()); + + // Round the words + res_reg = mm256_round_epi16(&res_reg, ®_32, 6); + + // Combine to get the result + res_reg = _mm256_packus_epi16(res_reg, res_reg); + + // Save the result + mm256_storeu2_epi32((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), + &res_reg); + + // Update the source by two rows + src_ptr += src_stride_unrolled; + dst_ptr += dst_stride_unrolled; + + src_reg_m1001 = src_reg_1223; + src_reg_1 = src_reg_3; + } +} + #if HAVE_AVX2 && HAVE_SSSE3 filter8_1dfunction vpx_filter_block1d4_v8_ssse3; -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3; filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3; filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3; #define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_intrin_ssse3 #define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_intrin_ssse3 #define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_intrin_ssse3 -#else // ARCH_X86 +#else // VPX_ARCH_X86 filter8_1dfunction vpx_filter_block1d8_v8_ssse3; filter8_1dfunction vpx_filter_block1d8_h8_ssse3; filter8_1dfunction vpx_filter_block1d4_h8_ssse3; #define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_ssse3 #define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_ssse3 #define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_ssse3 -#endif // ARCH_X86_64 +#endif // VPX_ARCH_X86_64 filter8_1dfunction vpx_filter_block1d8_v8_avg_ssse3; filter8_1dfunction vpx_filter_block1d8_h8_avg_ssse3; filter8_1dfunction vpx_filter_block1d4_v8_avg_ssse3; @@ -376,6 +942,13 @@ filter8_1dfunction vpx_filter_block1d4_h2_avg_ssse3; #define vpx_filter_block1d8_h2_avg_avx2 vpx_filter_block1d8_h2_avg_ssse3 #define vpx_filter_block1d4_v2_avg_avx2 vpx_filter_block1d4_v2_avg_ssse3 #define vpx_filter_block1d4_h2_avg_avx2 vpx_filter_block1d4_h2_avg_ssse3 + +#define vpx_filter_block1d16_v4_avg_avx2 vpx_filter_block1d16_v8_avg_avx2 +#define vpx_filter_block1d16_h4_avg_avx2 vpx_filter_block1d16_h8_avg_avx2 +#define vpx_filter_block1d8_v4_avg_avx2 vpx_filter_block1d8_v8_avg_avx2 +#define vpx_filter_block1d8_h4_avg_avx2 vpx_filter_block1d8_h8_avg_avx2 +#define vpx_filter_block1d4_v4_avg_avx2 vpx_filter_block1d4_v8_avg_avx2 +#define vpx_filter_block1d4_h4_avg_avx2 vpx_filter_block1d4_h8_avg_avx2 // void vpx_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, @@ -396,10 +969,12 @@ filter8_1dfunction vpx_filter_block1d4_h2_avg_ssse3; // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, // int y_step_q4, int w, int h); -FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , avx2); -FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , avx2); -FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, avx2); -FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_, avx2); +FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , avx2, 0); +FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * (num_taps / 2 - 1), , + avx2, 0); +FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, avx2, 1); +FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, + src - src_stride * (num_taps / 2 - 1), avg_, avx2, 1); // void vpx_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, @@ -411,6 +986,6 @@ FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_, avx2); // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); -FUN_CONV_2D(, avx2); -FUN_CONV_2D(avg_, avx2); +FUN_CONV_2D(, avx2, 0); +FUN_CONV_2D(avg_, avx2, 1); #endif // HAVE_AX2 && HAVE_SSSE3 diff --git a/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c b/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c index e4f992780ff4..77355a2085b8 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c +++ b/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c @@ -12,20 +12,17 @@ #include +#include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_dsp/x86/convolve.h" +#include "vpx_dsp/x86/convolve_sse2.h" #include "vpx_dsp/x86/convolve_ssse3.h" #include "vpx_dsp/x86/mem_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" -// These are reused by the avx2 intrinsics. -// vpx_filter_block1d8_v8_intrin_ssse3() -// vpx_filter_block1d8_h8_intrin_ssse3() -// vpx_filter_block1d4_h8_intrin_ssse3() - static INLINE __m128i shuffle_filter_convolve8_8_ssse3( const __m128i *const s, const int16_t *const filter) { __m128i f[4]; @@ -33,6 +30,23 @@ static INLINE __m128i shuffle_filter_convolve8_8_ssse3( return convolve8_8_ssse3(s, f); } +// Used by the avx2 implementation. +#if VPX_ARCH_X86_64 +// Use the intrinsics below +filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3; +filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3; +filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3; +#define vpx_filter_block1d4_h8_ssse3 vpx_filter_block1d4_h8_intrin_ssse3 +#define vpx_filter_block1d8_h8_ssse3 vpx_filter_block1d8_h8_intrin_ssse3 +#define vpx_filter_block1d8_v8_ssse3 vpx_filter_block1d8_v8_intrin_ssse3 +#else // VPX_ARCH_X86 +// Use the assembly in vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm. +filter8_1dfunction vpx_filter_block1d4_h8_ssse3; +filter8_1dfunction vpx_filter_block1d8_h8_ssse3; +filter8_1dfunction vpx_filter_block1d8_v8_ssse3; +#endif + +#if VPX_ARCH_X86_64 void vpx_filter_block1d4_h8_intrin_ssse3( const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr, ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) { @@ -184,13 +198,490 @@ void vpx_filter_block1d8_v8_intrin_ssse3( output_ptr += out_pitch; } } +#endif // VPX_ARCH_X86_64 +static void vpx_filter_block1d16_h4_ssse3(const uint8_t *src_ptr, + ptrdiff_t src_stride, + uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // We will cast the kernel from 16-bit words to 8-bit words, and then extract + // the middle four elements of the kernel into two registers in the form + // ... k[3] k[2] k[3] k[2] + // ... k[5] k[4] k[5] k[4] + // Then we shuffle the source into + // ... s[1] s[0] s[0] s[-1] + // ... s[3] s[2] s[2] s[1] + // Calling multiply and add gives us half of the sum. Calling add gives us + // first half of the output. Repeat again to get the second half of the + // output. Finally we shuffle again to combine the two outputs. + + __m128i kernel_reg; // Kernel + __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used + const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding + int h; + + __m128i src_reg, src_reg_shift_0, src_reg_shift_2; + __m128i dst_first, dst_second; + __m128i tmp_0, tmp_1; + __m128i idx_shift_0 = + _mm_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8); + __m128i idx_shift_2 = + _mm_setr_epi8(2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10); + + // Start one pixel before as we need tap/2 - 1 = 1 sample from the past + src_ptr -= 1; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm_srai_epi16(kernel_reg, 1); + kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); + kernel_reg_23 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0302u)); + kernel_reg_45 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0504u)); + + for (h = height; h > 0; --h) { + // Load the source + src_reg = _mm_loadu_si128((const __m128i *)src_ptr); + src_reg_shift_0 = _mm_shuffle_epi8(src_reg, idx_shift_0); + src_reg_shift_2 = _mm_shuffle_epi8(src_reg, idx_shift_2); + + // Partial result for first half + tmp_0 = _mm_maddubs_epi16(src_reg_shift_0, kernel_reg_23); + tmp_1 = _mm_maddubs_epi16(src_reg_shift_2, kernel_reg_45); + dst_first = _mm_adds_epi16(tmp_0, tmp_1); + + // Do again to get the second half of dst + // Load the source + src_reg = _mm_loadu_si128((const __m128i *)(src_ptr + 8)); + src_reg_shift_0 = _mm_shuffle_epi8(src_reg, idx_shift_0); + src_reg_shift_2 = _mm_shuffle_epi8(src_reg, idx_shift_2); + + // Partial result for first half + tmp_0 = _mm_maddubs_epi16(src_reg_shift_0, kernel_reg_23); + tmp_1 = _mm_maddubs_epi16(src_reg_shift_2, kernel_reg_45); + dst_second = _mm_adds_epi16(tmp_0, tmp_1); + + // Round each result + dst_first = mm_round_epi16_sse2(&dst_first, ®_32, 6); + dst_second = mm_round_epi16_sse2(&dst_second, ®_32, 6); + + // Finally combine to get the final dst + dst_first = _mm_packus_epi16(dst_first, dst_second); + _mm_store_si128((__m128i *)dst_ptr, dst_first); + + src_ptr += src_stride; + dst_ptr += dst_stride; + } +} + +static void vpx_filter_block1d16_v4_ssse3(const uint8_t *src_ptr, + ptrdiff_t src_stride, + uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // We will load two rows of pixels as 8-bit words, rearrange them into the + // form + // ... s[0,1] s[-1,1] s[0,0] s[-1,0] + // ... s[0,9] s[-1,9] s[0,8] s[-1,8] + // so that we can call multiply and add with the kernel to get 16-bit words of + // the form + // ... s[0,1]k[3]+s[-1,1]k[2] s[0,0]k[3]+s[-1,0]k[2] + // Finally, we can add multiple rows together to get the desired output. + + // Register for source s[-1:3, :] + __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; + // Interleaved rows of the source. lo is first half, hi second + __m128i src_reg_m10_lo, src_reg_m10_hi, src_reg_01_lo, src_reg_01_hi; + __m128i src_reg_12_lo, src_reg_12_hi, src_reg_23_lo, src_reg_23_hi; + + __m128i kernel_reg; // Kernel + __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used + + // Result after multiply and add + __m128i res_reg_m10_lo, res_reg_01_lo, res_reg_12_lo, res_reg_23_lo; + __m128i res_reg_m10_hi, res_reg_01_hi, res_reg_12_hi, res_reg_23_hi; + __m128i res_reg_m1012, res_reg_0123; + __m128i res_reg_m1012_lo, res_reg_0123_lo, res_reg_m1012_hi, res_reg_0123_hi; + + const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding + + // We will compute the result two rows at a time + const ptrdiff_t src_stride_unrolled = src_stride << 1; + const ptrdiff_t dst_stride_unrolled = dst_stride << 1; + int h; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm_srai_epi16(kernel_reg, 1); + kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); + kernel_reg_23 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0302u)); + kernel_reg_45 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0504u)); + + // First shuffle the data + src_reg_m1 = _mm_loadu_si128((const __m128i *)src_ptr); + src_reg_0 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride)); + src_reg_m10_lo = _mm_unpacklo_epi8(src_reg_m1, src_reg_0); + src_reg_m10_hi = _mm_unpackhi_epi8(src_reg_m1, src_reg_0); + + // More shuffling + src_reg_1 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2)); + src_reg_01_lo = _mm_unpacklo_epi8(src_reg_0, src_reg_1); + src_reg_01_hi = _mm_unpackhi_epi8(src_reg_0, src_reg_1); + + for (h = height; h > 1; h -= 2) { + src_reg_2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 3)); + + src_reg_12_lo = _mm_unpacklo_epi8(src_reg_1, src_reg_2); + src_reg_12_hi = _mm_unpackhi_epi8(src_reg_1, src_reg_2); + + src_reg_3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 4)); + + src_reg_23_lo = _mm_unpacklo_epi8(src_reg_2, src_reg_3); + src_reg_23_hi = _mm_unpackhi_epi8(src_reg_2, src_reg_3); + + // Partial output from first half + res_reg_m10_lo = _mm_maddubs_epi16(src_reg_m10_lo, kernel_reg_23); + res_reg_01_lo = _mm_maddubs_epi16(src_reg_01_lo, kernel_reg_23); + + res_reg_12_lo = _mm_maddubs_epi16(src_reg_12_lo, kernel_reg_45); + res_reg_23_lo = _mm_maddubs_epi16(src_reg_23_lo, kernel_reg_45); + + // Add to get first half of the results + res_reg_m1012_lo = _mm_adds_epi16(res_reg_m10_lo, res_reg_12_lo); + res_reg_0123_lo = _mm_adds_epi16(res_reg_01_lo, res_reg_23_lo); + + // Partial output for second half + res_reg_m10_hi = _mm_maddubs_epi16(src_reg_m10_hi, kernel_reg_23); + res_reg_01_hi = _mm_maddubs_epi16(src_reg_01_hi, kernel_reg_23); + + res_reg_12_hi = _mm_maddubs_epi16(src_reg_12_hi, kernel_reg_45); + res_reg_23_hi = _mm_maddubs_epi16(src_reg_23_hi, kernel_reg_45); + + // Second half of the results + res_reg_m1012_hi = _mm_adds_epi16(res_reg_m10_hi, res_reg_12_hi); + res_reg_0123_hi = _mm_adds_epi16(res_reg_01_hi, res_reg_23_hi); + + // Round the words + res_reg_m1012_lo = mm_round_epi16_sse2(&res_reg_m1012_lo, ®_32, 6); + res_reg_0123_lo = mm_round_epi16_sse2(&res_reg_0123_lo, ®_32, 6); + res_reg_m1012_hi = mm_round_epi16_sse2(&res_reg_m1012_hi, ®_32, 6); + res_reg_0123_hi = mm_round_epi16_sse2(&res_reg_0123_hi, ®_32, 6); + + // Combine to get the result + res_reg_m1012 = _mm_packus_epi16(res_reg_m1012_lo, res_reg_m1012_hi); + res_reg_0123 = _mm_packus_epi16(res_reg_0123_lo, res_reg_0123_hi); + + _mm_store_si128((__m128i *)dst_ptr, res_reg_m1012); + _mm_store_si128((__m128i *)(dst_ptr + dst_stride), res_reg_0123); + + // Update the source by two rows + src_ptr += src_stride_unrolled; + dst_ptr += dst_stride_unrolled; + + src_reg_m10_lo = src_reg_12_lo; + src_reg_m10_hi = src_reg_12_hi; + src_reg_01_lo = src_reg_23_lo; + src_reg_01_hi = src_reg_23_hi; + src_reg_1 = src_reg_3; + } +} + +static void vpx_filter_block1d8_h4_ssse3(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // We will cast the kernel from 16-bit words to 8-bit words, and then extract + // the middle four elements of the kernel into two registers in the form + // ... k[3] k[2] k[3] k[2] + // ... k[5] k[4] k[5] k[4] + // Then we shuffle the source into + // ... s[1] s[0] s[0] s[-1] + // ... s[3] s[2] s[2] s[1] + // Calling multiply and add gives us half of the sum. Calling add gives us + // first half of the output. Repeat again to get the second half of the + // output. Finally we shuffle again to combine the two outputs. + + __m128i kernel_reg; // Kernel + __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used + const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding + int h; + + __m128i src_reg, src_reg_shift_0, src_reg_shift_2; + __m128i dst_first; + __m128i tmp_0, tmp_1; + __m128i idx_shift_0 = + _mm_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8); + __m128i idx_shift_2 = + _mm_setr_epi8(2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10); + + // Start one pixel before as we need tap/2 - 1 = 1 sample from the past + src_ptr -= 1; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm_srai_epi16(kernel_reg, 1); + kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); + kernel_reg_23 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0302u)); + kernel_reg_45 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0504u)); + + for (h = height; h > 0; --h) { + // Load the source + src_reg = _mm_loadu_si128((const __m128i *)src_ptr); + src_reg_shift_0 = _mm_shuffle_epi8(src_reg, idx_shift_0); + src_reg_shift_2 = _mm_shuffle_epi8(src_reg, idx_shift_2); + + // Get the result + tmp_0 = _mm_maddubs_epi16(src_reg_shift_0, kernel_reg_23); + tmp_1 = _mm_maddubs_epi16(src_reg_shift_2, kernel_reg_45); + dst_first = _mm_adds_epi16(tmp_0, tmp_1); + + // Round round result + dst_first = mm_round_epi16_sse2(&dst_first, ®_32, 6); + + // Pack to 8-bits + dst_first = _mm_packus_epi16(dst_first, _mm_setzero_si128()); + _mm_storel_epi64((__m128i *)dst_ptr, dst_first); + + src_ptr += src_stride; + dst_ptr += dst_stride; + } +} + +static void vpx_filter_block1d8_v4_ssse3(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // We will load two rows of pixels as 8-bit words, rearrange them into the + // form + // ... s[0,1] s[-1,1] s[0,0] s[-1,0] + // so that we can call multiply and add with the kernel to get 16-bit words of + // the form + // ... s[0,1]k[3]+s[-1,1]k[2] s[0,0]k[3]+s[-1,0]k[2] + // Finally, we can add multiple rows together to get the desired output. + + // Register for source s[-1:3, :] + __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; + // Interleaved rows of the source. lo is first half, hi second + __m128i src_reg_m10, src_reg_01; + __m128i src_reg_12, src_reg_23; + + __m128i kernel_reg; // Kernel + __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used + + // Result after multiply and add + __m128i res_reg_m10, res_reg_01, res_reg_12, res_reg_23; + __m128i res_reg_m1012, res_reg_0123; + + const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding + + // We will compute the result two rows at a time + const ptrdiff_t src_stride_unrolled = src_stride << 1; + const ptrdiff_t dst_stride_unrolled = dst_stride << 1; + int h; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm_srai_epi16(kernel_reg, 1); + kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); + kernel_reg_23 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0302u)); + kernel_reg_45 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0504u)); + + // First shuffle the data + src_reg_m1 = _mm_loadl_epi64((const __m128i *)src_ptr); + src_reg_0 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride)); + src_reg_m10 = _mm_unpacklo_epi8(src_reg_m1, src_reg_0); + + // More shuffling + src_reg_1 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 2)); + src_reg_01 = _mm_unpacklo_epi8(src_reg_0, src_reg_1); + + for (h = height; h > 1; h -= 2) { + src_reg_2 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 3)); + + src_reg_12 = _mm_unpacklo_epi8(src_reg_1, src_reg_2); + + src_reg_3 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 4)); + + src_reg_23 = _mm_unpacklo_epi8(src_reg_2, src_reg_3); + + // Partial output + res_reg_m10 = _mm_maddubs_epi16(src_reg_m10, kernel_reg_23); + res_reg_01 = _mm_maddubs_epi16(src_reg_01, kernel_reg_23); + + res_reg_12 = _mm_maddubs_epi16(src_reg_12, kernel_reg_45); + res_reg_23 = _mm_maddubs_epi16(src_reg_23, kernel_reg_45); + + // Add to get entire output + res_reg_m1012 = _mm_adds_epi16(res_reg_m10, res_reg_12); + res_reg_0123 = _mm_adds_epi16(res_reg_01, res_reg_23); + + // Round the words + res_reg_m1012 = mm_round_epi16_sse2(&res_reg_m1012, ®_32, 6); + res_reg_0123 = mm_round_epi16_sse2(&res_reg_0123, ®_32, 6); + + // Pack from 16-bit to 8-bit + res_reg_m1012 = _mm_packus_epi16(res_reg_m1012, _mm_setzero_si128()); + res_reg_0123 = _mm_packus_epi16(res_reg_0123, _mm_setzero_si128()); + + _mm_storel_epi64((__m128i *)dst_ptr, res_reg_m1012); + _mm_storel_epi64((__m128i *)(dst_ptr + dst_stride), res_reg_0123); + + // Update the source by two rows + src_ptr += src_stride_unrolled; + dst_ptr += dst_stride_unrolled; + + src_reg_m10 = src_reg_12; + src_reg_01 = src_reg_23; + src_reg_1 = src_reg_3; + } +} + +static void vpx_filter_block1d4_h4_ssse3(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // We will cast the kernel from 16-bit words to 8-bit words, and then extract + // the middle four elements of the kernel into a single register in the form + // k[5:2] k[5:2] k[5:2] k[5:2] + // Then we shuffle the source into + // s[5:2] s[4:1] s[3:0] s[2:-1] + // Calling multiply and add gives us half of the sum next to each other. + // Calling horizontal add then gives us the output. + + __m128i kernel_reg; // Kernel + const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding + int h; + + __m128i src_reg, src_reg_shuf; + __m128i dst_first; + __m128i shuf_idx = + _mm_setr_epi8(0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6); + + // Start one pixel before as we need tap/2 - 1 = 1 sample from the past + src_ptr -= 1; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm_srai_epi16(kernel_reg, 1); + kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); + kernel_reg = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi32(0x05040302u)); + + for (h = height; h > 0; --h) { + // Load the source + src_reg = _mm_loadu_si128((const __m128i *)src_ptr); + src_reg_shuf = _mm_shuffle_epi8(src_reg, shuf_idx); + + // Get the result + dst_first = _mm_maddubs_epi16(src_reg_shuf, kernel_reg); + dst_first = _mm_hadds_epi16(dst_first, _mm_setzero_si128()); + + // Round result + dst_first = mm_round_epi16_sse2(&dst_first, ®_32, 6); + + // Pack to 8-bits + dst_first = _mm_packus_epi16(dst_first, _mm_setzero_si128()); + *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(dst_first); + + src_ptr += src_stride; + dst_ptr += dst_stride; + } +} + +static void vpx_filter_block1d4_v4_ssse3(const uint8_t *src_ptr, + ptrdiff_t src_stride, uint8_t *dst_ptr, + ptrdiff_t dst_stride, uint32_t height, + const int16_t *kernel) { + // We will load two rows of pixels as 8-bit words, rearrange them into the + // form + // ... s[2,0] s[1,0] s[0,0] s[-1,0] + // so that we can call multiply and add with the kernel partial output. Then + // we can call horizontal add to get the output. + // Finally, we can add multiple rows together to get the desired output. + // This is done two rows at a time + + // Register for source s[-1:3, :] + __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; + // Interleaved rows of the source. + __m128i src_reg_m10, src_reg_01; + __m128i src_reg_12, src_reg_23; + __m128i src_reg_m1001, src_reg_1223; + __m128i src_reg_m1012_1023_lo, src_reg_m1012_1023_hi; + + __m128i kernel_reg; // Kernel + + // Result after multiply and add + __m128i reg_0, reg_1; + + const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding + + // We will compute the result two rows at a time + const ptrdiff_t src_stride_unrolled = src_stride << 1; + const ptrdiff_t dst_stride_unrolled = dst_stride << 1; + int h; + + // Load Kernel + kernel_reg = _mm_loadu_si128((const __m128i *)kernel); + kernel_reg = _mm_srai_epi16(kernel_reg, 1); + kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); + kernel_reg = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi32(0x05040302u)); + + // First shuffle the data + src_reg_m1 = _mm_loadl_epi64((const __m128i *)src_ptr); + src_reg_0 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride)); + src_reg_m10 = _mm_unpacklo_epi32(src_reg_m1, src_reg_0); + + // More shuffling + src_reg_1 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 2)); + src_reg_01 = _mm_unpacklo_epi32(src_reg_0, src_reg_1); + + // Put three rows next to each other + src_reg_m1001 = _mm_unpacklo_epi8(src_reg_m10, src_reg_01); + + for (h = height; h > 1; h -= 2) { + src_reg_2 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 3)); + src_reg_12 = _mm_unpacklo_epi32(src_reg_1, src_reg_2); + + src_reg_3 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 4)); + src_reg_23 = _mm_unpacklo_epi32(src_reg_2, src_reg_3); + + // Put three rows next to each other + src_reg_1223 = _mm_unpacklo_epi8(src_reg_12, src_reg_23); + + // Put all four rows next to each other + src_reg_m1012_1023_lo = _mm_unpacklo_epi16(src_reg_m1001, src_reg_1223); + src_reg_m1012_1023_hi = _mm_unpackhi_epi16(src_reg_m1001, src_reg_1223); + + // Get the results + reg_0 = _mm_maddubs_epi16(src_reg_m1012_1023_lo, kernel_reg); + reg_1 = _mm_maddubs_epi16(src_reg_m1012_1023_hi, kernel_reg); + reg_0 = _mm_hadds_epi16(reg_0, _mm_setzero_si128()); + reg_1 = _mm_hadds_epi16(reg_1, _mm_setzero_si128()); + + // Round the words + reg_0 = mm_round_epi16_sse2(®_0, ®_32, 6); + reg_1 = mm_round_epi16_sse2(®_1, ®_32, 6); + + // Pack from 16-bit to 8-bit and put them in the right order + reg_0 = _mm_packus_epi16(reg_0, reg_0); + reg_1 = _mm_packus_epi16(reg_1, reg_1); + + // Save the result + *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(reg_0); + *((uint32_t *)(dst_ptr + dst_stride)) = _mm_cvtsi128_si32(reg_1); + + // Update the source by two rows + src_ptr += src_stride_unrolled; + dst_ptr += dst_stride_unrolled; + + src_reg_m1001 = src_reg_1223; + src_reg_1 = src_reg_3; + } +} + +// From vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm filter8_1dfunction vpx_filter_block1d16_v8_ssse3; filter8_1dfunction vpx_filter_block1d16_h8_ssse3; -filter8_1dfunction vpx_filter_block1d8_v8_ssse3; -filter8_1dfunction vpx_filter_block1d8_h8_ssse3; filter8_1dfunction vpx_filter_block1d4_v8_ssse3; -filter8_1dfunction vpx_filter_block1d4_h8_ssse3; filter8_1dfunction vpx_filter_block1d16_v8_avg_ssse3; filter8_1dfunction vpx_filter_block1d16_h8_avg_ssse3; filter8_1dfunction vpx_filter_block1d8_v8_avg_ssse3; @@ -198,6 +689,15 @@ filter8_1dfunction vpx_filter_block1d8_h8_avg_ssse3; filter8_1dfunction vpx_filter_block1d4_v8_avg_ssse3; filter8_1dfunction vpx_filter_block1d4_h8_avg_ssse3; +// Use the [vh]8 version because there is no [vh]4 implementation. +#define vpx_filter_block1d16_v4_avg_ssse3 vpx_filter_block1d16_v8_avg_ssse3 +#define vpx_filter_block1d16_h4_avg_ssse3 vpx_filter_block1d16_h8_avg_ssse3 +#define vpx_filter_block1d8_v4_avg_ssse3 vpx_filter_block1d8_v8_avg_ssse3 +#define vpx_filter_block1d8_h4_avg_ssse3 vpx_filter_block1d8_h8_avg_ssse3 +#define vpx_filter_block1d4_v4_avg_ssse3 vpx_filter_block1d4_v8_avg_ssse3 +#define vpx_filter_block1d4_h4_avg_ssse3 vpx_filter_block1d4_h8_avg_ssse3 + +// From vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm filter8_1dfunction vpx_filter_block1d16_v2_ssse3; filter8_1dfunction vpx_filter_block1d16_h2_ssse3; filter8_1dfunction vpx_filter_block1d8_v2_ssse3; @@ -231,10 +731,12 @@ filter8_1dfunction vpx_filter_block1d4_h2_avg_ssse3; // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, // int y_step_q4, int w, int h); -FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , ssse3); -FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , ssse3); -FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, ssse3); -FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_, ssse3); +FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , ssse3, 0); +FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * (num_taps / 2 - 1), , + ssse3, 0); +FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, ssse3, 1); +FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, + src - src_stride * (num_taps / 2 - 1), avg_, ssse3, 1); static void filter_horiz_w8_ssse3(const uint8_t *const src, const ptrdiff_t src_stride, @@ -571,7 +1073,7 @@ void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, } } -// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, +// void vpx_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, @@ -581,5 +1083,5 @@ void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); -FUN_CONV_2D(, ssse3); -FUN_CONV_2D(avg_, ssse3); +FUN_CONV_2D(, ssse3, 0); +FUN_CONV_2D(avg_, ssse3, 1); diff --git a/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm b/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm index 952d9307d1a4..fe617f120744 100644 --- a/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm +++ b/media/libvpx/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm @@ -26,7 +26,7 @@ SECTION .text %define LOCAL_VARS_SIZE 16*6 %macro SETUP_LOCAL_VARS 0 - ; TODO(slavarnway): using xmm registers for these on ARCH_X86_64 + + ; TODO(slavarnway): using xmm registers for these on VPX_ARCH_X86_64 + ; pmaddubsw has a higher latency on some platforms, this might be eased by ; interleaving the instructions. %define k0k1 [rsp + 16*0] @@ -48,7 +48,7 @@ SECTION .text mova k2k3, m1 mova k4k5, m2 mova k6k7, m3 -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 %define krd m12 %define tmp0 [rsp + 16*4] %define tmp1 [rsp + 16*5] @@ -68,7 +68,7 @@ SECTION .text %endm ;------------------------------------------------------------------------------- -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 %define LOCAL_VARS_SIZE_H4 0 %else %define LOCAL_VARS_SIZE_H4 16*4 @@ -79,7 +79,7 @@ cglobal filter_block1d4_%1, 6, 6, 11, LOCAL_VARS_SIZE_H4, \ src, sstride, dst, dstride, height, filter mova m4, [filterq] packsswb m4, m4 -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 %define k0k1k4k5 m8 %define k2k3k6k7 m9 %define krd m10 @@ -339,7 +339,7 @@ SUBPIX_HFILTER4 h8_avg ; vpx_filter_block1d4_h8_avg_ssse3 ; TODO(Linfeng): Detect cpu type and choose the code with better performance. %define X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON 1 -%if ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON +%if VPX_ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON %define NUM_GENERAL_REG_USED 9 %else %define NUM_GENERAL_REG_USED 6 @@ -359,9 +359,9 @@ cglobal filter_block1d%2_%1, 6, NUM_GENERAL_REG_USED, 15, LOCAL_VARS_SIZE, \ dec heightd -%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON +%if VPX_ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 %define src1q r7 %define sstride6q r8 %define dst_stride dstrideq @@ -467,7 +467,7 @@ cglobal filter_block1d%2_%1, 6, NUM_GENERAL_REG_USED, 15, LOCAL_VARS_SIZE, \ movx [dstq], m0 %else - ; ARCH_X86_64 + ; VPX_ARCH_X86_64 movx m0, [srcq ] ;A movx m1, [srcq + sstrideq ] ;B @@ -567,7 +567,7 @@ cglobal filter_block1d%2_%1, 6, NUM_GENERAL_REG_USED, 15, LOCAL_VARS_SIZE, \ %endif movx [dstq], m0 -%endif ; ARCH_X86_64 +%endif ; VPX_ARCH_X86_64 .done: REP_RET @@ -581,9 +581,9 @@ cglobal filter_block1d16_%1, 6, NUM_GENERAL_REG_USED, 16, LOCAL_VARS_SIZE, \ mova m4, [filterq] SETUP_LOCAL_VARS -%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON +%if VPX_ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON -%if ARCH_X86_64 +%if VPX_ARCH_X86_64 %define src1q r7 %define sstride6q r8 %define dst_stride dstrideq @@ -654,7 +654,7 @@ cglobal filter_block1d16_%1, 6, NUM_GENERAL_REG_USED, 16, LOCAL_VARS_SIZE, \ REP_RET %else - ; ARCH_X86_64 + ; VPX_ARCH_X86_64 dec heightd movu m1, [srcq ] ;A @@ -790,7 +790,7 @@ cglobal filter_block1d16_%1, 6, NUM_GENERAL_REG_USED, 16, LOCAL_VARS_SIZE, \ .done: REP_RET -%endif ; ARCH_X86_64 +%endif ; VPX_ARCH_X86_64 %endm diff --git a/media/libvpx/libvpx/vpx_mem/include/vpx_mem_intrnl.h b/media/libvpx/libvpx/vpx_mem/include/vpx_mem_intrnl.h index 2c259d322e6a..5631130243b3 100644 --- a/media/libvpx/libvpx/vpx_mem/include/vpx_mem_intrnl.h +++ b/media/libvpx/libvpx/vpx_mem/include/vpx_mem_intrnl.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ -#define VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ +#ifndef VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ +#define VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ #include "./vpx_config.h" #define ADDRESS_STORAGE_SIZE sizeof(size_t) @@ -28,4 +28,4 @@ #define align_addr(addr, align) \ (void *)(((size_t)(addr) + ((align)-1)) & ~(size_t)((align)-1)) -#endif // VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ +#endif // VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ diff --git a/media/libvpx/libvpx/vpx_mem/vpx_mem.c b/media/libvpx/libvpx/vpx_mem/vpx_mem.c index eeba34c373bb..18abf1158b5b 100644 --- a/media/libvpx/libvpx/vpx_mem/vpx_mem.c +++ b/media/libvpx/libvpx/vpx_mem/vpx_mem.c @@ -16,12 +16,14 @@ #include "include/vpx_mem_intrnl.h" #include "vpx/vpx_integer.h" +#if !defined(VPX_MAX_ALLOCABLE_MEMORY) #if SIZE_MAX > (1ULL << 40) #define VPX_MAX_ALLOCABLE_MEMORY (1ULL << 40) #else // For 32-bit targets keep this below INT_MAX to avoid valgrind warnings. #define VPX_MAX_ALLOCABLE_MEMORY ((1ULL << 31) - (1 << 16)) #endif +#endif // Returns 0 in case of overflow of nmemb * size. static int check_size_argument_overflow(uint64_t nmemb, uint64_t size) { diff --git a/media/libvpx/libvpx/vpx_mem/vpx_mem.h b/media/libvpx/libvpx/vpx_mem/vpx_mem.h index a4274b8856cc..7689a05e6ea1 100644 --- a/media/libvpx/libvpx/vpx_mem/vpx_mem.h +++ b/media/libvpx/libvpx/vpx_mem/vpx_mem.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_MEM_VPX_MEM_H_ -#define VPX_MEM_VPX_MEM_H_ +#ifndef VPX_VPX_MEM_VPX_MEM_H_ +#define VPX_VPX_MEM_VPX_MEM_H_ #include "vpx_config.h" #if defined(__uClinux__) @@ -49,4 +49,4 @@ static INLINE void *vpx_memset16(void *dest, int val, size_t length) { } #endif -#endif // VPX_MEM_VPX_MEM_H_ +#endif // VPX_VPX_MEM_VPX_MEM_H_ diff --git a/media/libvpx/libvpx/vpx_ports/arm.h b/media/libvpx/libvpx/vpx_ports/arm.h index 7be6104a4f50..6458a2c5b001 100644 --- a/media/libvpx/libvpx/vpx_ports/arm.h +++ b/media/libvpx/libvpx/vpx_ports/arm.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_PORTS_ARM_H_ -#define VPX_PORTS_ARM_H_ +#ifndef VPX_VPX_PORTS_ARM_H_ +#define VPX_VPX_PORTS_ARM_H_ #include #include "vpx_config.h" @@ -36,4 +36,4 @@ int arm_cpu_caps(void); } // extern "C" #endif -#endif // VPX_PORTS_ARM_H_ +#endif // VPX_VPX_PORTS_ARM_H_ diff --git a/media/libvpx/libvpx/vpx_ports/asmdefs_mmi.h b/media/libvpx/libvpx/vpx_ports/asmdefs_mmi.h index a9a49745afd6..28355bf9fbfb 100644 --- a/media/libvpx/libvpx/vpx_ports/asmdefs_mmi.h +++ b/media/libvpx/libvpx/vpx_ports/asmdefs_mmi.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_PORTS_ASMDEFS_MMI_H_ -#define VPX_PORTS_ASMDEFS_MMI_H_ +#ifndef VPX_VPX_PORTS_ASMDEFS_MMI_H_ +#define VPX_VPX_PORTS_ASMDEFS_MMI_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" @@ -78,4 +78,4 @@ #endif /* HAVE_MMI */ -#endif /* VPX_PORTS_ASMDEFS_MMI_H_ */ +#endif // VPX_VPX_PORTS_ASMDEFS_MMI_H_ diff --git a/media/libvpx/libvpx/vpx_ports/bitops.h b/media/libvpx/libvpx/vpx_ports/bitops.h index 0ed7189ff655..5b2f31cd11e4 100644 --- a/media/libvpx/libvpx/vpx_ports/bitops.h +++ b/media/libvpx/libvpx/vpx_ports/bitops.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_PORTS_BITOPS_H_ -#define VPX_PORTS_BITOPS_H_ +#ifndef VPX_VPX_PORTS_BITOPS_H_ +#define VPX_VPX_PORTS_BITOPS_H_ #include @@ -72,4 +72,4 @@ static INLINE int get_msb(unsigned int n) { } // extern "C" #endif -#endif // VPX_PORTS_BITOPS_H_ +#endif // VPX_VPX_PORTS_BITOPS_H_ diff --git a/media/libvpx/libvpx/vpx_ports/emmintrin_compat.h b/media/libvpx/libvpx/vpx_ports/emmintrin_compat.h index 903534e0c0f3..d6cc68ee4d2a 100644 --- a/media/libvpx/libvpx/vpx_ports/emmintrin_compat.h +++ b/media/libvpx/libvpx/vpx_ports/emmintrin_compat.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_PORTS_EMMINTRIN_COMPAT_H_ -#define VPX_PORTS_EMMINTRIN_COMPAT_H_ +#ifndef VPX_VPX_PORTS_EMMINTRIN_COMPAT_H_ +#define VPX_VPX_PORTS_EMMINTRIN_COMPAT_H_ #if defined(__GNUC__) && __GNUC__ < 4 /* From emmintrin.h (gcc 4.5.3) */ @@ -52,4 +52,4 @@ extern __inline __m128d } #endif -#endif // VPX_PORTS_EMMINTRIN_COMPAT_H_ +#endif // VPX_VPX_PORTS_EMMINTRIN_COMPAT_H_ diff --git a/media/libvpx/libvpx/vpx_ports/emms_mmx.asm b/media/libvpx/libvpx/vpx_ports/emms_mmx.asm new file mode 100644 index 000000000000..9f33590a285a --- /dev/null +++ b/media/libvpx/libvpx/vpx_ports/emms_mmx.asm @@ -0,0 +1,18 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +section .text +global sym(vpx_clear_system_state) PRIVATE +sym(vpx_clear_system_state): + emms + ret diff --git a/media/libvpx/libvpx/vpx_ports/config.h b/media/libvpx/libvpx/vpx_ports/emms_mmx.c similarity index 66% rename from media/libvpx/libvpx/vpx_ports/config.h rename to media/libvpx/libvpx/vpx_ports/emms_mmx.c index 3c1ab99f4aa3..f1036b98edb8 100644 --- a/media/libvpx/libvpx/vpx_ports/config.h +++ b/media/libvpx/libvpx/vpx_ports/emms_mmx.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,9 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_PORTS_CONFIG_H_ -#define VPX_PORTS_CONFIG_H_ +#include -#include "vpx_config.h" +#include "vpx_ports/system_state.h" -#endif // VPX_PORTS_CONFIG_H_ +void vpx_clear_system_state() { _mm_empty(); } diff --git a/media/libvpx/libvpx/vpx_ports/emms.asm b/media/libvpx/libvpx/vpx_ports/float_control_word.asm similarity index 90% rename from media/libvpx/libvpx/vpx_ports/emms.asm rename to media/libvpx/libvpx/vpx_ports/float_control_word.asm index db8da2873752..256dae084479 100644 --- a/media/libvpx/libvpx/vpx_ports/emms.asm +++ b/media/libvpx/libvpx/vpx_ports/float_control_word.asm @@ -12,11 +12,6 @@ %include "vpx_ports/x86_abi_support.asm" section .text -global sym(vpx_reset_mmx_state) PRIVATE -sym(vpx_reset_mmx_state): - emms - ret - %if LIBVPX_YASM_WIN64 global sym(vpx_winx64_fldcw) PRIVATE diff --git a/media/libvpx/libvpx/vpx_ports/mem.h b/media/libvpx/libvpx/vpx_ports/mem.h index bfef783b1332..4e904130481f 100644 --- a/media/libvpx/libvpx/vpx_ports/mem.h +++ b/media/libvpx/libvpx/vpx_ports/mem.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_PORTS_MEM_H_ -#define VPX_PORTS_MEM_H_ +#ifndef VPX_VPX_PORTS_MEM_H_ +#define VPX_VPX_PORTS_MEM_H_ #include "vpx_config.h" #include "vpx/vpx_integer.h" @@ -51,4 +51,24 @@ #define VPX_WITH_ASAN 0 #endif // __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) -#endif // VPX_PORTS_MEM_H_ +#if !defined(__has_attribute) +#define __has_attribute(x) 0 +#endif // !defined(__has_attribute) + +#if __has_attribute(uninitialized) +// Attribute "uninitialized" disables -ftrivial-auto-var-init=pattern for +// the specified variable. +// +// -ftrivial-auto-var-init is security risk mitigation feature, so attribute +// should not be used "just in case", but only to fix real performance +// bottlenecks when other approaches do not work. In general the compiler is +// quite effective at eliminating unneeded initializations introduced by the +// flag, e.g. when they are followed by actual initialization by a program. +// However if compiler optimization fails and code refactoring is hard, the +// attribute can be used as a workaround. +#define VPX_UNINITIALIZED __attribute__((uninitialized)) +#else +#define VPX_UNINITIALIZED +#endif // __has_attribute(uninitialized) + +#endif // VPX_VPX_PORTS_MEM_H_ diff --git a/media/libvpx/libvpx/vpx_ports/mem_ops.h b/media/libvpx/libvpx/vpx_ports/mem_ops.h index 343f27577c00..b17015e7ecf0 100644 --- a/media/libvpx/libvpx/vpx_ports/mem_ops.h +++ b/media/libvpx/libvpx/vpx_ports/mem_ops.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_PORTS_MEM_OPS_H_ -#define VPX_PORTS_MEM_OPS_H_ +#ifndef VPX_VPX_PORTS_MEM_OPS_H_ +#define VPX_VPX_PORTS_MEM_OPS_H_ /* \file * \brief Provides portable memory access primitives @@ -224,5 +224,4 @@ static VPX_INLINE void mem_put_le32(void *vmem, MEM_VALUE_T val) { mem[3] = (MAU_T)((val >> 24) & 0xff); } /* clang-format on */ - -#endif // VPX_PORTS_MEM_OPS_H_ +#endif // VPX_VPX_PORTS_MEM_OPS_H_ diff --git a/media/libvpx/libvpx/vpx_ports/mem_ops_aligned.h b/media/libvpx/libvpx/vpx_ports/mem_ops_aligned.h index ccac391ba007..8649b87623b5 100644 --- a/media/libvpx/libvpx/vpx_ports/mem_ops_aligned.h +++ b/media/libvpx/libvpx/vpx_ports/mem_ops_aligned.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_PORTS_MEM_OPS_ALIGNED_H_ -#define VPX_PORTS_MEM_OPS_ALIGNED_H_ +#ifndef VPX_VPX_PORTS_MEM_OPS_ALIGNED_H_ +#define VPX_VPX_PORTS_MEM_OPS_ALIGNED_H_ #include "vpx/vpx_integer.h" @@ -168,4 +168,4 @@ mem_put_le_aligned_generic(32) #undef swap_endian_32_se /* clang-format on */ -#endif // VPX_PORTS_MEM_OPS_ALIGNED_H_ +#endif // VPX_VPX_PORTS_MEM_OPS_ALIGNED_H_ diff --git a/media/libvpx/libvpx/vpx_ports/msvc.h b/media/libvpx/libvpx/vpx_ports/msvc.h index 3ff71474b3b4..d58de3535a07 100644 --- a/media/libvpx/libvpx/vpx_ports/msvc.h +++ b/media/libvpx/libvpx/vpx_ports/msvc.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_PORTS_MSVC_H_ -#define VPX_PORTS_MSVC_H_ +#ifndef VPX_VPX_PORTS_MSVC_H_ +#define VPX_VPX_PORTS_MSVC_H_ #ifdef _MSC_VER #include "./vpx_config.h" @@ -29,4 +29,4 @@ static INLINE double round(double x) { #endif // _MSC_VER < 1800 #endif // _MSC_VER -#endif // VPX_PORTS_MSVC_H_ +#endif // VPX_VPX_PORTS_MSVC_H_ diff --git a/media/libvpx/libvpx/vpx_ports/ppc.h b/media/libvpx/libvpx/vpx_ports/ppc.h index ed29ef25b45d..a11f4e8732ef 100644 --- a/media/libvpx/libvpx/vpx_ports/ppc.h +++ b/media/libvpx/libvpx/vpx_ports/ppc.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_PORTS_PPC_H_ -#define VPX_PORTS_PPC_H_ +#ifndef VPX_VPX_PORTS_PPC_H_ +#define VPX_VPX_PORTS_PPC_H_ #include #include "./vpx_config.h" @@ -26,4 +26,4 @@ int ppc_simd_caps(void); } // extern "C" #endif -#endif // VPX_PORTS_PPC_H_ +#endif // VPX_VPX_PORTS_PPC_H_ diff --git a/media/libvpx/libvpx/vpx_ports/system_state.h b/media/libvpx/libvpx/vpx_ports/system_state.h index 086c64681f58..32ebd0ed8cc7 100644 --- a/media/libvpx/libvpx/vpx_ports/system_state.h +++ b/media/libvpx/libvpx/vpx_ports/system_state.h @@ -8,15 +8,23 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_PORTS_SYSTEM_STATE_H_ -#define VPX_PORTS_SYSTEM_STATE_H_ +#ifndef VPX_VPX_PORTS_SYSTEM_STATE_H_ +#define VPX_VPX_PORTS_SYSTEM_STATE_H_ #include "./vpx_config.h" -#if ARCH_X86 || ARCH_X86_64 -void vpx_reset_mmx_state(void); -#define vpx_clear_system_state() vpx_reset_mmx_state() +#ifdef __cplusplus +extern "C" { +#endif + +#if (VPX_ARCH_X86 || VPX_ARCH_X86_64) && HAVE_MMX +extern void vpx_clear_system_state(void); #else #define vpx_clear_system_state() -#endif // ARCH_X86 || ARCH_X86_64 -#endif // VPX_PORTS_SYSTEM_STATE_H_ +#endif // (VPX_ARCH_X86 || VPX_ARCH_X86_64) && HAVE_MMX + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_VPX_PORTS_SYSTEM_STATE_H_ diff --git a/media/libvpx/libvpx/vpx_ports/vpx_once.h b/media/libvpx/libvpx/vpx_ports/vpx_once.h index 7d9fc3b4063b..4eb592b87e57 100644 --- a/media/libvpx/libvpx/vpx_ports/vpx_once.h +++ b/media/libvpx/libvpx/vpx_ports/vpx_once.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_PORTS_VPX_ONCE_H_ -#define VPX_PORTS_VPX_ONCE_H_ +#ifndef VPX_VPX_PORTS_VPX_ONCE_H_ +#define VPX_VPX_PORTS_VPX_ONCE_H_ #include "vpx_config.h" @@ -137,4 +137,4 @@ static void once(void (*func)(void)) { } #endif -#endif // VPX_PORTS_VPX_ONCE_H_ +#endif // VPX_VPX_PORTS_VPX_ONCE_H_ diff --git a/media/libvpx/libvpx/vpx_ports/vpx_ports.mk b/media/libvpx/libvpx/vpx_ports/vpx_ports.mk index e17145e6cb87..8bc897dd87c1 100644 --- a/media/libvpx/libvpx/vpx_ports/vpx_ports.mk +++ b/media/libvpx/libvpx/vpx_ports/vpx_ports.mk @@ -17,18 +17,29 @@ PORTS_SRCS-yes += msvc.h PORTS_SRCS-yes += system_state.h PORTS_SRCS-yes += vpx_timer.h -ifeq ($(ARCH_X86)$(ARCH_X86_64),yes) -PORTS_SRCS-yes += emms.asm +ifeq ($(VPX_ARCH_X86),yes) +PORTS_SRCS-$(HAVE_MMX) += emms_mmx.c +endif +ifeq ($(VPX_ARCH_X86_64),yes) +# Visual Studio x64 does not support the _mm_empty() intrinsic. +PORTS_SRCS-$(HAVE_MMX) += emms_mmx.asm +endif + +ifeq ($(VPX_ARCH_X86_64),yes) +PORTS_SRCS-yes += float_control_word.asm +endif + +ifeq ($(VPX_ARCH_X86)$(VPX_ARCH_X86_64),yes) PORTS_SRCS-yes += x86.h PORTS_SRCS-yes += x86_abi_support.asm endif -PORTS_SRCS-$(ARCH_ARM) += arm_cpudetect.c -PORTS_SRCS-$(ARCH_ARM) += arm.h +PORTS_SRCS-$(VPX_ARCH_ARM) += arm_cpudetect.c +PORTS_SRCS-$(VPX_ARCH_ARM) += arm.h -PORTS_SRCS-$(ARCH_PPC) += ppc_cpudetect.c -PORTS_SRCS-$(ARCH_PPC) += ppc.h +PORTS_SRCS-$(VPX_ARCH_PPC) += ppc_cpudetect.c +PORTS_SRCS-$(VPX_ARCH_PPC) += ppc.h -ifeq ($(ARCH_MIPS), yes) +ifeq ($(VPX_ARCH_MIPS), yes) PORTS_SRCS-yes += asmdefs_mmi.h endif diff --git a/media/libvpx/libvpx/vpx_ports/vpx_timer.h b/media/libvpx/libvpx/vpx_ports/vpx_timer.h index 2083b4ece448..4934d5296a03 100644 --- a/media/libvpx/libvpx/vpx_ports/vpx_timer.h +++ b/media/libvpx/libvpx/vpx_ports/vpx_timer.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_PORTS_VPX_TIMER_H_ -#define VPX_PORTS_VPX_TIMER_H_ +#ifndef VPX_VPX_PORTS_VPX_TIMER_H_ +#define VPX_VPX_PORTS_VPX_TIMER_H_ #include "./vpx_config.h" @@ -106,4 +106,4 @@ static INLINE int vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { return 0; } #endif /* CONFIG_OS_SUPPORT */ -#endif // VPX_PORTS_VPX_TIMER_H_ +#endif // VPX_VPX_PORTS_VPX_TIMER_H_ diff --git a/media/libvpx/libvpx/vpx_ports/x86.h b/media/libvpx/libvpx/vpx_ports/x86.h index ced65ac058fa..ed26b16715a7 100644 --- a/media/libvpx/libvpx/vpx_ports/x86.h +++ b/media/libvpx/libvpx/vpx_ports/x86.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_PORTS_X86_H_ -#define VPX_PORTS_X86_H_ +#ifndef VPX_VPX_PORTS_X86_H_ +#define VPX_VPX_PORTS_X86_H_ #include #if defined(_MSC_VER) @@ -43,7 +43,7 @@ typedef enum { } vpx_cpu_t; #if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__) -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 #define cpuid(func, func2, ax, bx, cx, dx) \ __asm__ __volatile__("cpuid \n\t" \ : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \ @@ -59,7 +59,7 @@ typedef enum { #endif #elif defined(__SUNPRO_C) || \ defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/ -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 #define cpuid(func, func2, ax, bx, cx, dx) \ asm volatile( \ "xchg %rsi, %rbx \n\t" \ @@ -79,7 +79,7 @@ typedef enum { : "a"(func), "c"(func2)); #endif #else /* end __SUNPRO__ */ -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 #if defined(_MSC_VER) && _MSC_VER > 1500 #define cpuid(func, func2, a, b, c, d) \ do { \ @@ -161,7 +161,7 @@ static INLINE uint64_t xgetbv(void) { #define HAS_AVX2 0x080 #define HAS_AVX512 0x100 #ifndef BIT -#define BIT(n) (1u << n) +#define BIT(n) (1u << (n)) #endif static INLINE int x86_simd_caps(void) { @@ -202,6 +202,7 @@ static INLINE int x86_simd_caps(void) { // bits 27 (OSXSAVE) & 28 (256-bit AVX) if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) { + // Check for OS-support of YMM state. Necessary for AVX and AVX2. if ((xgetbv() & 0x6) == 0x6) { flags |= HAS_AVX; @@ -214,8 +215,10 @@ static INLINE int x86_simd_caps(void) { // bits 16 (AVX-512F) & 17 (AVX-512DQ) & 28 (AVX-512CD) & // 30 (AVX-512BW) & 32 (AVX-512VL) if ((reg_ebx & (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) == - (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) - flags |= HAS_AVX512; + (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) { + // Check for OS-support of ZMM and YMM state. Necessary for AVX-512. + if ((xgetbv() & 0xe6) == 0xe6) flags |= HAS_AVX512; + } } } } @@ -223,11 +226,26 @@ static INLINE int x86_simd_caps(void) { return flags & mask; } -// Note: -// 32-bit CPU cycle counter is light-weighted for most function performance -// measurement. For large function (CPU time > a couple of seconds), 64-bit -// counter should be used. -// 32-bit CPU cycle counter +// Fine-Grain Measurement Functions +// +// If you are timing a small region of code, access the timestamp counter +// (TSC) via: +// +// unsigned int start = x86_tsc_start(); +// ... +// unsigned int end = x86_tsc_end(); +// unsigned int diff = end - start; +// +// The start/end functions introduce a few more instructions than using +// x86_readtsc directly, but prevent the CPU's out-of-order execution from +// affecting the measurement (by having earlier/later instructions be evaluated +// in the time interval). See the white paper, "How to Benchmark Code +// Execution Times on Intel® IA-32 and IA-64 Instruction Set Architectures" by +// Gabriele Paoloni for more information. +// +// If you are timing a large function (CPU time > a couple of seconds), use +// x86_readtsc64 to read the timestamp counter in a 64-bit integer. The +// out-of-order leakage that can occur is minimal compared to total runtime. static INLINE unsigned int x86_readtsc(void) { #if defined(__GNUC__) && __GNUC__ unsigned int tsc; @@ -238,7 +256,7 @@ static INLINE unsigned int x86_readtsc(void) { asm volatile("rdtsc\n\t" : "=a"(tsc) :); return tsc; #else -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 return (unsigned int)__rdtsc(); #else __asm rdtsc; @@ -256,7 +274,7 @@ static INLINE uint64_t x86_readtsc64(void) { asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi)); return ((uint64_t)hi << 32) | lo; #else -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 return (uint64_t)__rdtsc(); #else __asm rdtsc; @@ -264,12 +282,47 @@ static INLINE uint64_t x86_readtsc64(void) { #endif } +// 32-bit CPU cycle counter with a partial fence against out-of-order execution. +static INLINE unsigned int x86_readtscp(void) { +#if defined(__GNUC__) && __GNUC__ + unsigned int tscp; + __asm__ __volatile__("rdtscp\n\t" : "=a"(tscp) :); + return tscp; +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) + unsigned int tscp; + asm volatile("rdtscp\n\t" : "=a"(tscp) :); + return tscp; +#elif defined(_MSC_VER) + unsigned int ui; + return (unsigned int)__rdtscp(&ui); +#else +#if VPX_ARCH_X86_64 + return (unsigned int)__rdtscp(); +#else + __asm rdtscp; +#endif +#endif +} + +static INLINE unsigned int x86_tsc_start(void) { + unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; + cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); + return x86_readtsc(); +} + +static INLINE unsigned int x86_tsc_end(void) { + uint32_t v = x86_readtscp(); + unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; + cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); + return v; +} + #if defined(__GNUC__) && __GNUC__ #define x86_pause_hint() __asm__ __volatile__("pause \n\t") #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) #define x86_pause_hint() asm volatile("pause \n\t") #else -#if ARCH_X86_64 +#if VPX_ARCH_X86_64 #define x86_pause_hint() _mm_pause(); #else #define x86_pause_hint() __asm pause @@ -294,7 +347,7 @@ static unsigned short x87_get_control_word(void) { asm volatile("fstcw %0\n\t" : "=m"(*&mode) :); return mode; } -#elif ARCH_X86_64 +#elif VPX_ARCH_X86_64 /* No fldcw intrinsics on Windows x64, punt to external asm */ extern void vpx_winx64_fldcw(unsigned short mode); extern unsigned short vpx_winx64_fstcw(void); @@ -313,14 +366,23 @@ static unsigned short x87_get_control_word(void) { static INLINE unsigned int x87_set_double_precision(void) { unsigned int mode = x87_get_control_word(); + // Intel 64 and IA-32 Architectures Developer's Manual: Vol. 1 + // https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-1-manual.pdf + // 8.1.5.2 Precision Control Field + // Bits 8 and 9 (0x300) of the x87 FPU Control Word ("Precision Control") + // determine the number of bits used in floating point calculations. To match + // later SSE instructions restrict x87 operations to Double Precision (0x200). + // Precision PC Field + // Single Precision (24-Bits) 00B + // Reserved 01B + // Double Precision (53-Bits) 10B + // Extended Precision (64-Bits) 11B x87_set_control_word((mode & ~0x300) | 0x200); return mode; } -extern void vpx_reset_mmx_state(void); - #ifdef __cplusplus } // extern "C" #endif -#endif // VPX_PORTS_X86_H_ +#endif // VPX_VPX_PORTS_X86_H_ diff --git a/media/libvpx/libvpx/vpx_scale/generic/gen_scalers.c b/media/libvpx/libvpx/vpx_scale/generic/gen_scalers.c index b554a56e832e..d8db4b354758 100644 --- a/media/libvpx/libvpx/vpx_scale/generic/gen_scalers.c +++ b/media/libvpx/libvpx/vpx_scale/generic/gen_scalers.c @@ -12,8 +12,8 @@ #include "vpx_scale/vpx_scale.h" #include "vpx_mem/vpx_mem.h" /**************************************************************************** -* Imports -****************************************************************************/ + * Imports + ****************************************************************************/ /**************************************************************************** * diff --git a/media/libvpx/libvpx/vpx_scale/generic/vpx_scale.c b/media/libvpx/libvpx/vpx_scale/generic/vpx_scale.c index 20e1ff90fd57..958bb320fc41 100644 --- a/media/libvpx/libvpx/vpx_scale/generic/vpx_scale.c +++ b/media/libvpx/libvpx/vpx_scale/generic/vpx_scale.c @@ -17,8 +17,8 @@ ***************************************************************************/ /**************************************************************************** -* Header Files -****************************************************************************/ + * Header Files + ****************************************************************************/ #include "./vpx_scale_rtcd.h" #include "vpx_mem/vpx_mem.h" #include "vpx_scale/vpx_scale.h" diff --git a/media/libvpx/libvpx/vpx_scale/generic/yv12config.c b/media/libvpx/libvpx/vpx_scale/generic/yv12config.c index 9c7ca42c78b4..eee291c30d8b 100644 --- a/media/libvpx/libvpx/vpx_scale/generic/yv12config.c +++ b/media/libvpx/libvpx/vpx_scale/generic/yv12config.c @@ -15,9 +15,12 @@ #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" +#if defined(VPX_MAX_ALLOCABLE_MEMORY) +#include "vp9/common/vp9_onyxc_int.h" +#endif // VPX_MAX_ALLOCABLE_MEMORY /**************************************************************************** -* Exports -****************************************************************************/ + * Exports + ****************************************************************************/ /**************************************************************************** * @@ -54,13 +57,21 @@ int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int uv_width = aligned_width >> 1; int uv_height = aligned_height >> 1; /** There is currently a bunch of code which assumes - * uv_stride == y_stride/2, so enforce this here. */ + * uv_stride == y_stride/2, so enforce this here. */ int uv_stride = y_stride >> 1; int uvplane_size = (uv_height + border) * uv_stride; - const int frame_size = yplane_size + 2 * uvplane_size; + const size_t frame_size = yplane_size + 2 * uvplane_size; if (!ybf->buffer_alloc) { ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, frame_size); +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) + // This memset is needed for fixing the issue of using uninitialized + // value in msan test. It will cause a perf loss, so only do this for + // msan test. + memset(ybf->buffer_alloc, 0, frame_size); +#endif +#endif ybf->buffer_alloc_sz = frame_size; } @@ -142,6 +153,17 @@ int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border, int byte_alignment, vpx_codec_frame_buffer_t *fb, vpx_get_frame_buffer_cb_fn_t cb, void *cb_priv) { +#if CONFIG_SIZE_LIMIT + if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT) return -1; +#endif + + /* Only support allocating buffers that have a border that's a multiple + * of 32. The border restriction is required to get 16-byte alignment of + * the start of the chroma rows without introducing an arbitrary gap + * between planes, which would break the semantics of things like + * vpx_img_set_rect(). */ + if (border & 0x1f) return -3; + if (ybf) { const int vp9_byte_align = (byte_alignment == 0) ? 1 : byte_alignment; const int aligned_width = (width + 7) & ~7; @@ -166,9 +188,16 @@ int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, uint8_t *buf = NULL; - // frame_size is stored in buffer_alloc_sz, which is an int. If it won't +#if defined(VPX_MAX_ALLOCABLE_MEMORY) + // The decoder may allocate REF_FRAMES frame buffers in the frame buffer + // pool. Bound the total amount of allocated memory as if these REF_FRAMES + // frame buffers were allocated in a single allocation. + if (frame_size > VPX_MAX_ALLOCABLE_MEMORY / REF_FRAMES) return -1; +#endif // VPX_MAX_ALLOCABLE_MEMORY + + // frame_size is stored in buffer_alloc_sz, which is a size_t. If it won't // fit, fail early. - if (frame_size > INT_MAX) { + if (frame_size > SIZE_MAX) { return -1; } @@ -192,18 +221,19 @@ int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, // This memset is needed for fixing the issue of using uninitialized // value in msan test. It will cause a perf loss, so only do this for // msan test. - memset(ybf->buffer_alloc, 0, (int)frame_size); + memset(ybf->buffer_alloc, 0, (size_t)frame_size); #endif #endif - } else if (frame_size > (size_t)ybf->buffer_alloc_sz) { + } else if (frame_size > ybf->buffer_alloc_sz) { // Allocation to hold larger frame, or first allocation. vpx_free(ybf->buffer_alloc); ybf->buffer_alloc = NULL; + ybf->buffer_alloc_sz = 0; ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, (size_t)frame_size); if (!ybf->buffer_alloc) return -1; - ybf->buffer_alloc_sz = (int)frame_size; + ybf->buffer_alloc_sz = (size_t)frame_size; // This memset is needed for fixing valgrind error from C loop filter // due to access uninitialized memory in frame border. It could be @@ -211,13 +241,6 @@ int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz); } - /* Only support allocating buffers that have a border that's a multiple - * of 32. The border restriction is required to get 16-byte alignment of - * the start of the chroma rows without introducing an arbitrary gap - * between planes, which would break the semantics of things like - * vpx_img_set_rect(). */ - if (border & 0x1f) return -3; - ybf->y_crop_width = width; ybf->y_crop_height = height; ybf->y_width = aligned_width; @@ -231,7 +254,7 @@ int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, ybf->uv_stride = uv_stride; ybf->border = border; - ybf->frame_size = (int)frame_size; + ybf->frame_size = (size_t)frame_size; ybf->subsampling_x = ss_x; ybf->subsampling_y = ss_y; diff --git a/media/libvpx/libvpx/vpx_scale/vpx_scale.h b/media/libvpx/libvpx/vpx_scale/vpx_scale.h index 478a4834610c..fd5ba7ccdc16 100644 --- a/media/libvpx/libvpx/vpx_scale/vpx_scale.h +++ b/media/libvpx/libvpx/vpx_scale/vpx_scale.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_SCALE_VPX_SCALE_H_ -#define VPX_SCALE_VPX_SCALE_H_ +#ifndef VPX_VPX_SCALE_VPX_SCALE_H_ +#define VPX_VPX_SCALE_VPX_SCALE_H_ #include "vpx_scale/yv12config.h" @@ -19,4 +19,4 @@ extern void vpx_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, unsigned int vscale, unsigned int vratio, unsigned int interlaced); -#endif // VPX_SCALE_VPX_SCALE_H_ +#endif // VPX_VPX_SCALE_VPX_SCALE_H_ diff --git a/media/libvpx/libvpx/vpx_scale/yv12config.h b/media/libvpx/libvpx/vpx_scale/yv12config.h index b9b33621449b..2cf18217f601 100644 --- a/media/libvpx/libvpx/vpx_scale/yv12config.h +++ b/media/libvpx/libvpx/vpx_scale/yv12config.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_SCALE_YV12CONFIG_H_ -#define VPX_SCALE_YV12CONFIG_H_ +#ifndef VPX_VPX_SCALE_YV12CONFIG_H_ +#define VPX_VPX_SCALE_YV12CONFIG_H_ #ifdef __cplusplus extern "C" { @@ -49,9 +49,9 @@ typedef struct yv12_buffer_config { uint8_t *alpha_buffer; uint8_t *buffer_alloc; - int buffer_alloc_sz; + size_t buffer_alloc_sz; int border; - int frame_size; + size_t frame_size; int subsampling_x; int subsampling_y; unsigned int bit_depth; @@ -100,4 +100,4 @@ int vpx_free_frame_buffer(YV12_BUFFER_CONFIG *ybf); } #endif -#endif // VPX_SCALE_YV12CONFIG_H_ +#endif // VPX_VPX_SCALE_YV12CONFIG_H_ diff --git a/media/libvpx/libvpx/vpx_util/endian_inl.h b/media/libvpx/libvpx/vpx_util/endian_inl.h index dc387740958e..1b6ef56c6956 100644 --- a/media/libvpx/libvpx/vpx_util/endian_inl.h +++ b/media/libvpx/libvpx/vpx_util/endian_inl.h @@ -9,8 +9,8 @@ // // Endian related functions. -#ifndef VPX_UTIL_ENDIAN_INL_H_ -#define VPX_UTIL_ENDIAN_INL_H_ +#ifndef VPX_VPX_UTIL_ENDIAN_INL_H_ +#define VPX_VPX_UTIL_ENDIAN_INL_H_ #include #include "./vpx_config.h" @@ -115,4 +115,4 @@ static INLINE uint64_t BSwap64(uint64_t x) { #endif // HAVE_BUILTIN_BSWAP64 } -#endif // VPX_UTIL_ENDIAN_INL_H_ +#endif // VPX_VPX_UTIL_ENDIAN_INL_H_ diff --git a/media/libvpx/libvpx/vpx_util/vpx_atomics.h b/media/libvpx/libvpx/vpx_util/vpx_atomics.h index b8cf80daeb52..23ad56685131 100644 --- a/media/libvpx/libvpx/vpx_util/vpx_atomics.h +++ b/media/libvpx/libvpx/vpx_util/vpx_atomics.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_UTIL_VPX_ATOMICS_H_ -#define VPX_UTIL_VPX_ATOMICS_H_ +#ifndef VPX_VPX_UTIL_VPX_ATOMICS_H_ +#define VPX_VPX_UTIL_VPX_ATOMICS_H_ #include "./vpx_config.h" @@ -51,16 +51,16 @@ extern "C" { do { \ } while (0) #else -#if ARCH_X86 || ARCH_X86_64 +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 // Use a compiler barrier on x86, no runtime penalty. #define vpx_atomic_memory_barrier() __asm__ __volatile__("" ::: "memory") -#elif ARCH_ARM +#elif VPX_ARCH_ARM #define vpx_atomic_memory_barrier() __asm__ __volatile__("dmb ish" ::: "memory") -#elif ARCH_MIPS +#elif VPX_ARCH_MIPS #define vpx_atomic_memory_barrier() __asm__ __volatile__("sync" ::: "memory") #else #error Unsupported architecture! -#endif // ARCH_X86 || ARCH_X86_64 +#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64 #endif // defined(_MSC_VER) #endif // atomic builtin availability check @@ -68,7 +68,9 @@ extern "C" { // on any platform (to discourage programmer errors by setting values directly). // This primitive MUST be initialized using vpx_atomic_init or VPX_ATOMIC_INIT // (NOT memset) and accessed through vpx_atomic_ functions. -typedef struct vpx_atomic_int { volatile int value; } vpx_atomic_int; +typedef struct vpx_atomic_int { + volatile int value; +} vpx_atomic_int; #define VPX_ATOMIC_INIT(num) \ { num } @@ -106,4 +108,4 @@ static INLINE int vpx_atomic_load_acquire(const vpx_atomic_int *atomic) { } // extern "C" #endif // __cplusplus -#endif // VPX_UTIL_VPX_ATOMICS_H_ +#endif // VPX_VPX_UTIL_VPX_ATOMICS_H_ diff --git a/media/libvpx/libvpx/vpx_util/vpx_debug_util.c b/media/libvpx/libvpx/vpx_util/vpx_debug_util.c new file mode 100644 index 000000000000..3ce4065ba504 --- /dev/null +++ b/media/libvpx/libvpx/vpx_util/vpx_debug_util.c @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include +#include "vpx_util/vpx_debug_util.h" + +#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG +static int frame_idx_w = 0; +static int frame_idx_r = 0; + +void bitstream_queue_set_frame_write(int frame_idx) { frame_idx_w = frame_idx; } + +int bitstream_queue_get_frame_write(void) { return frame_idx_w; } + +void bitstream_queue_set_frame_read(int frame_idx) { frame_idx_r = frame_idx; } + +int bitstream_queue_get_frame_read(void) { return frame_idx_r; } +#endif + +#if CONFIG_BITSTREAM_DEBUG +#define QUEUE_MAX_SIZE 2000000 +static int result_queue[QUEUE_MAX_SIZE]; +static int prob_queue[QUEUE_MAX_SIZE]; + +static int queue_r = 0; +static int queue_w = 0; +static int queue_prev_w = -1; +static int skip_r = 0; +static int skip_w = 0; +void bitstream_queue_set_skip_write(int skip) { skip_w = skip; } + +void bitstream_queue_set_skip_read(int skip) { skip_r = skip; } + +void bitstream_queue_record_write(void) { queue_prev_w = queue_w; } + +void bitstream_queue_reset_write(void) { queue_w = queue_prev_w; } + +int bitstream_queue_get_write(void) { return queue_w; } + +int bitstream_queue_get_read(void) { return queue_r; } + +void bitstream_queue_pop(int *result, int *prob) { + if (!skip_r) { + if (queue_w == queue_r) { + printf("buffer underflow queue_w %d queue_r %d\n", queue_w, queue_r); + assert(0); + } + *result = result_queue[queue_r]; + *prob = prob_queue[queue_r]; + queue_r = (queue_r + 1) % QUEUE_MAX_SIZE; + } +} + +void bitstream_queue_push(int result, const int prob) { + if (!skip_w) { + result_queue[queue_w] = result; + prob_queue[queue_w] = prob; + queue_w = (queue_w + 1) % QUEUE_MAX_SIZE; + if (queue_w == queue_r) { + printf("buffer overflow queue_w %d queue_r %d\n", queue_w, queue_r); + assert(0); + } + } +} +#endif // CONFIG_BITSTREAM_DEBUG + +#if CONFIG_MISMATCH_DEBUG +static int frame_buf_idx_r = 0; +static int frame_buf_idx_w = 0; +#define MAX_FRAME_BUF_NUM 20 +#define MAX_FRAME_STRIDE 1920 +#define MAX_FRAME_HEIGHT 1080 +static uint16_t + frame_pre[MAX_FRAME_BUF_NUM][3] + [MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT]; // prediction only +static uint16_t + frame_tx[MAX_FRAME_BUF_NUM][3] + [MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT]; // prediction + txfm +static int frame_stride = MAX_FRAME_STRIDE; +static int frame_height = MAX_FRAME_HEIGHT; +static int frame_size = MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT; +void mismatch_move_frame_idx_w(void) { + frame_buf_idx_w = (frame_buf_idx_w + 1) % MAX_FRAME_BUF_NUM; + if (frame_buf_idx_w == frame_buf_idx_r) { + printf("frame_buf overflow\n"); + assert(0); + } +} + +void mismatch_reset_frame(int num_planes) { + int plane; + for (plane = 0; plane < num_planes; ++plane) { + memset(frame_pre[frame_buf_idx_w][plane], 0, + sizeof(frame_pre[frame_buf_idx_w][plane][0]) * frame_size); + memset(frame_tx[frame_buf_idx_w][plane], 0, + sizeof(frame_tx[frame_buf_idx_w][plane][0]) * frame_size); + } +} + +void mismatch_move_frame_idx_r(void) { + if (frame_buf_idx_w == frame_buf_idx_r) { + printf("frame_buf underflow\n"); + assert(0); + } + frame_buf_idx_r = (frame_buf_idx_r + 1) % MAX_FRAME_BUF_NUM; +} + +void mismatch_record_block_pre(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd) { + const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL; + int r, c; + + if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) { + printf("frame_buf undersized\n"); + assert(0); + } + + for (r = 0; r < blk_h; ++r) { + for (c = 0; c < blk_w; ++c) { + frame_pre[frame_buf_idx_w][plane] + [(r + pixel_r) * frame_stride + c + pixel_c] = + src16 ? src16[r * src_stride + c] : src[r * src_stride + c]; + } + } +#if 0 + { + int ref_frame_idx = 3; + int ref_plane = 1; + int ref_pixel_c = 162; + int ref_pixel_r = 16; + if (frame_idx_w == ref_frame_idx && plane == ref_plane && + ref_pixel_c >= pixel_c && ref_pixel_c < pixel_c + blk_w && + ref_pixel_r >= pixel_r && ref_pixel_r < pixel_r + blk_h) { + printf( + "\nrecord_block_pre frame_idx %d plane %d pixel_c %d pixel_r %d blk_w" + " %d blk_h %d\n", + frame_idx_w, plane, pixel_c, pixel_r, blk_w, blk_h); + } + } +#endif +} +void mismatch_record_block_tx(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd) { + const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL; + int r, c; + if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) { + printf("frame_buf undersized\n"); + assert(0); + } + + for (r = 0; r < blk_h; ++r) { + for (c = 0; c < blk_w; ++c) { + frame_tx[frame_buf_idx_w][plane] + [(r + pixel_r) * frame_stride + c + pixel_c] = + src16 ? src16[r * src_stride + c] : src[r * src_stride + c]; + } + } +#if 0 + { + int ref_frame_idx = 3; + int ref_plane = 1; + int ref_pixel_c = 162; + int ref_pixel_r = 16; + if (frame_idx_w == ref_frame_idx && plane == ref_plane && + ref_pixel_c >= pixel_c && ref_pixel_c < pixel_c + blk_w && + ref_pixel_r >= pixel_r && ref_pixel_r < pixel_r + blk_h) { + printf( + "\nrecord_block_tx frame_idx %d plane %d pixel_c %d pixel_r %d blk_w " + "%d blk_h %d\n", + frame_idx_w, plane, pixel_c, pixel_r, blk_w, blk_h); + } + } +#endif +} +void mismatch_check_block_pre(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd) { + const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL; + int mismatch = 0; + int r, c; + if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) { + printf("frame_buf undersized\n"); + assert(0); + } + + for (r = 0; r < blk_h; ++r) { + for (c = 0; c < blk_w; ++c) { + if (frame_pre[frame_buf_idx_r][plane] + [(r + pixel_r) * frame_stride + c + pixel_c] != + (uint16_t)(src16 ? src16[r * src_stride + c] + : src[r * src_stride + c])) { + mismatch = 1; + } + } + } + if (mismatch) { + int rr, cc; + printf( + "\ncheck_block_pre failed frame_idx %d plane %d " + "pixel_c %d pixel_r " + "%d blk_w %d blk_h %d\n", + frame_idx_r, plane, pixel_c, pixel_r, blk_w, blk_h); + printf("enc\n"); + for (rr = 0; rr < blk_h; ++rr) { + for (cc = 0; cc < blk_w; ++cc) { + printf("%d ", frame_pre[frame_buf_idx_r][plane] + [(rr + pixel_r) * frame_stride + cc + pixel_c]); + } + printf("\n"); + } + + printf("dec\n"); + for (rr = 0; rr < blk_h; ++rr) { + for (cc = 0; cc < blk_w; ++cc) { + printf("%d ", + src16 ? src16[rr * src_stride + cc] : src[rr * src_stride + cc]); + } + printf("\n"); + } + assert(0); + } +} +void mismatch_check_block_tx(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd) { + const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL; + int mismatch = 0; + int r, c; + if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) { + printf("frame_buf undersized\n"); + assert(0); + } + + for (r = 0; r < blk_h; ++r) { + for (c = 0; c < blk_w; ++c) { + if (frame_tx[frame_buf_idx_r][plane] + [(r + pixel_r) * frame_stride + c + pixel_c] != + (uint16_t)(src16 ? src16[r * src_stride + c] + : src[r * src_stride + c])) { + mismatch = 1; + } + } + } + if (mismatch) { + int rr, cc; + printf( + "\ncheck_block_tx failed frame_idx %d plane %d pixel_c " + "%d pixel_r " + "%d blk_w %d blk_h %d\n", + frame_idx_r, plane, pixel_c, pixel_r, blk_w, blk_h); + printf("enc\n"); + for (rr = 0; rr < blk_h; ++rr) { + for (cc = 0; cc < blk_w; ++cc) { + printf("%d ", frame_tx[frame_buf_idx_r][plane] + [(rr + pixel_r) * frame_stride + cc + pixel_c]); + } + printf("\n"); + } + + printf("dec\n"); + for (rr = 0; rr < blk_h; ++rr) { + for (cc = 0; cc < blk_w; ++cc) { + printf("%d ", + src16 ? src16[rr * src_stride + cc] : src[rr * src_stride + cc]); + } + printf("\n"); + } + assert(0); + } +} +#endif // CONFIG_MISMATCH_DEBUG diff --git a/media/libvpx/libvpx/vpx_util/vpx_debug_util.h b/media/libvpx/libvpx/vpx_util/vpx_debug_util.h new file mode 100644 index 000000000000..df1a1aab2cba --- /dev/null +++ b/media/libvpx/libvpx/vpx_util/vpx_debug_util.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_UTIL_VPX_DEBUG_UTIL_H_ +#define VPX_VPX_UTIL_VPX_DEBUG_UTIL_H_ + +#include "./vpx_config.h" + +#include "vpx_dsp/prob.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG +void bitstream_queue_set_frame_write(int frame_idx); +int bitstream_queue_get_frame_write(void); +void bitstream_queue_set_frame_read(int frame_idx); +int bitstream_queue_get_frame_read(void); +#endif + +#if CONFIG_BITSTREAM_DEBUG +/* This is a debug tool used to detect bitstream error. On encoder side, it + * pushes each bit and probability into a queue before the bit is written into + * the Arithmetic coder. On decoder side, whenever a bit is read out from the + * Arithmetic coder, it pops out the reference bit and probability from the + * queue as well. If the two results do not match, this debug tool will report + * an error. This tool can be used to pin down the bitstream error precisely. + * By combining gdb's backtrace method, we can detect which module causes the + * bitstream error. */ +int bitstream_queue_get_write(void); +int bitstream_queue_get_read(void); +void bitstream_queue_record_write(void); +void bitstream_queue_reset_write(void); +void bitstream_queue_pop(int *result, int *prob); +void bitstream_queue_push(int result, const int prob); +void bitstream_queue_set_skip_write(int skip); +void bitstream_queue_set_skip_read(int skip); +#endif // CONFIG_BITSTREAM_DEBUG + +#if CONFIG_MISMATCH_DEBUG +void mismatch_move_frame_idx_w(void); +void mismatch_move_frame_idx_r(void); +void mismatch_reset_frame(int num_planes); +void mismatch_record_block_pre(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd); +void mismatch_record_block_tx(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd); +void mismatch_check_block_pre(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd); +void mismatch_check_block_tx(const uint8_t *src, int src_stride, int plane, + int pixel_c, int pixel_r, int blk_w, int blk_h, + int highbd); +#endif // CONFIG_MISMATCH_DEBUG + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_VPX_UTIL_VPX_DEBUG_UTIL_H_ diff --git a/media/libvpx/libvpx/vpx_util/vpx_thread.h b/media/libvpx/libvpx/vpx_util/vpx_thread.h index 53a5f4966a43..6d308e949b11 100644 --- a/media/libvpx/libvpx/vpx_util/vpx_thread.h +++ b/media/libvpx/libvpx/vpx_util/vpx_thread.h @@ -12,8 +12,8 @@ // Original source: // https://chromium.googlesource.com/webm/libwebp -#ifndef VPX_THREAD_H_ -#define VPX_THREAD_H_ +#ifndef VPX_VPX_UTIL_VPX_THREAD_H_ +#define VPX_VPX_UTIL_VPX_THREAD_H_ #include "./vpx_config.h" @@ -159,6 +159,23 @@ static INLINE int pthread_cond_init(pthread_cond_t *const condition, return 0; } +static INLINE int pthread_cond_broadcast(pthread_cond_t *const condition) { + int ok = 1; +#ifdef USE_WINDOWS_CONDITION_VARIABLE + WakeAllConditionVariable(condition); +#else + while (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) { + // a thread is waiting in pthread_cond_wait: allow it to be notified + ok &= SetEvent(condition->signal_event_); + // wait until the event is consumed so the signaler cannot consume + // the event via its own pthread_cond_wait. + ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) != + WAIT_OBJECT_0); + } +#endif + return !ok; +} + static INLINE int pthread_cond_signal(pthread_cond_t *const condition) { int ok = 1; #ifdef USE_WINDOWS_CONDITION_VARIABLE @@ -194,6 +211,7 @@ static INLINE int pthread_cond_wait(pthread_cond_t *const condition, #endif return !ok; } + #elif defined(__OS2__) #define INCL_DOS #include // NOLINT @@ -202,6 +220,11 @@ static INLINE int pthread_cond_wait(pthread_cond_t *const condition, #include // NOLINT #include // NOLINT +#if defined(__STRICT_ANSI__) +// _beginthread() is not declared on __STRICT_ANSI__ mode. Declare here. +int _beginthread(void (*)(void *), void *, unsigned, void *); +#endif + #define pthread_t TID #define pthread_mutex_t HMTX @@ -412,4 +435,4 @@ const VPxWorkerInterface *vpx_get_worker_interface(void); } // extern "C" #endif -#endif // VPX_THREAD_H_ +#endif // VPX_VPX_UTIL_VPX_THREAD_H_ diff --git a/media/libvpx/libvpx/vpx_util/vpx_timestamp.h b/media/libvpx/libvpx/vpx_util/vpx_timestamp.h new file mode 100644 index 000000000000..c210de5e537f --- /dev/null +++ b/media/libvpx/libvpx/vpx_util/vpx_timestamp.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_UTIL_VPX_TIMESTAMP_H_ +#define VPX_VPX_UTIL_VPX_TIMESTAMP_H_ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// Rational Number with an int64 numerator +typedef struct vpx_rational64 { + int64_t num; // fraction numerator + int den; // fraction denominator +} vpx_rational64_t; // alias for struct vpx_rational64_t + +static INLINE int gcd(int64_t a, int b) { + int r; // remainder + while (b > 0) { + r = (int)(a % b); + a = b; + b = r; + } + + return (int)a; +} + +static INLINE void reduce_ratio(vpx_rational64_t *ratio) { + const int denom = gcd(ratio->num, ratio->den); + ratio->num /= denom; + ratio->den /= denom; +} + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // VPX_VPX_UTIL_VPX_TIMESTAMP_H_ diff --git a/media/libvpx/libvpx/vpx_util/vpx_util.mk b/media/libvpx/libvpx/vpx_util/vpx_util.mk index 86d3ece3c899..116271495699 100644 --- a/media/libvpx/libvpx/vpx_util/vpx_util.mk +++ b/media/libvpx/libvpx/vpx_util/vpx_util.mk @@ -15,3 +15,6 @@ UTIL_SRCS-yes += vpx_thread.h UTIL_SRCS-yes += endian_inl.h UTIL_SRCS-yes += vpx_write_yuv_frame.h UTIL_SRCS-yes += vpx_write_yuv_frame.c +UTIL_SRCS-yes += vpx_timestamp.h +UTIL_SRCS-$(or $(CONFIG_BITSTREAM_DEBUG),$(CONFIG_MISMATCH_DEBUG)) += vpx_debug_util.h +UTIL_SRCS-$(or $(CONFIG_BITSTREAM_DEBUG),$(CONFIG_MISMATCH_DEBUG)) += vpx_debug_util.c diff --git a/media/libvpx/libvpx/vpx_util/vpx_write_yuv_frame.c b/media/libvpx/libvpx/vpx_util/vpx_write_yuv_frame.c index ab685581157c..4ef57a2fee0a 100644 --- a/media/libvpx/libvpx/vpx_util/vpx_write_yuv_frame.c +++ b/media/libvpx/libvpx/vpx_util/vpx_write_yuv_frame.c @@ -13,7 +13,7 @@ void vpx_write_yuv_frame(FILE *yuv_file, YV12_BUFFER_CONFIG *s) { #if defined(OUTPUT_YUV_SRC) || defined(OUTPUT_YUV_DENOISED) || \ - defined(OUTPUT_YUV_SKINMAP) + defined(OUTPUT_YUV_SKINMAP) || defined(OUTPUT_YUV_SVC_SRC) unsigned char *src = s->y_buffer; int h = s->y_crop_height; diff --git a/media/libvpx/libvpx/vpx_util/vpx_write_yuv_frame.h b/media/libvpx/libvpx/vpx_util/vpx_write_yuv_frame.h index 1cb7029817e6..ce1102458edf 100644 --- a/media/libvpx/libvpx/vpx_util/vpx_write_yuv_frame.h +++ b/media/libvpx/libvpx/vpx_util/vpx_write_yuv_frame.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPX_UTIL_VPX_WRITE_YUV_FRAME_H_ -#define VPX_UTIL_VPX_WRITE_YUV_FRAME_H_ +#ifndef VPX_VPX_UTIL_VPX_WRITE_YUV_FRAME_H_ +#define VPX_VPX_UTIL_VPX_WRITE_YUV_FRAME_H_ #include #include "vpx_scale/yv12config.h" @@ -24,4 +24,4 @@ void vpx_write_yuv_frame(FILE *yuv_file, YV12_BUFFER_CONFIG *s); } // extern "C" #endif -#endif // VPX_UTIL_VPX_WRITE_YUV_FRAME_H_ +#endif // VPX_VPX_UTIL_VPX_WRITE_YUV_FRAME_H_ diff --git a/media/libvpx/libvpx/vpxdec.c b/media/libvpx/libvpx/vpxdec.c index ff20e6a3c9bb..ad368a230b8d 100644 --- a/media/libvpx/libvpx/vpxdec.c +++ b/media/libvpx/libvpx/vpxdec.c @@ -98,20 +98,41 @@ static const arg_def_t svcdecodingarg = ARG_DEF( NULL, "svc-decode-layer", 1, "Decode SVC stream up to given spatial layer"); static const arg_def_t framestatsarg = ARG_DEF(NULL, "framestats", 1, "Output per-frame stats (.csv format)"); +static const arg_def_t rowmtarg = + ARG_DEF(NULL, "row-mt", 1, "Enable multi-threading to run row-wise in VP9"); +static const arg_def_t lpfoptarg = + ARG_DEF(NULL, "lpf-opt", 1, + "Do loopfilter without waiting for all threads to sync."); -static const arg_def_t *all_args[] = { - &help, &codecarg, &use_yv12, - &use_i420, &flipuvarg, &rawvideo, - &noblitarg, &progressarg, &limitarg, - &skiparg, &postprocarg, &summaryarg, - &outputfile, &threadsarg, &frameparallelarg, - &verbosearg, &scalearg, &fb_arg, - &md5arg, &error_concealment, &continuearg, +static const arg_def_t *all_args[] = { &help, + &codecarg, + &use_yv12, + &use_i420, + &flipuvarg, + &rawvideo, + &noblitarg, + &progressarg, + &limitarg, + &skiparg, + &postprocarg, + &summaryarg, + &outputfile, + &threadsarg, + &frameparallelarg, + &verbosearg, + &scalearg, + &fb_arg, + &md5arg, + &error_concealment, + &continuearg, #if CONFIG_VP9_HIGHBITDEPTH - &outbitdeptharg, + &outbitdeptharg, #endif - &svcdecodingarg, &framestatsarg, NULL -}; + &svcdecodingarg, + &framestatsarg, + &rowmtarg, + &lpfoptarg, + NULL }; #if CONFIG_VP8_DECODER static const arg_def_t addnoise_level = @@ -154,7 +175,7 @@ static INLINE int libyuv_scale(vpx_image_t *src, vpx_image_t *dst, dst->d_h, mode); } #endif -void show_help(FILE *fout, int shorthelp) { +static void show_help(FILE *fout, int shorthelp) { int i; fprintf(fout, "Usage: %s filename\n\n", exec_name); @@ -238,13 +259,14 @@ static int raw_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read, return 1; } *bytes_read = frame_size; + return 0; } - return 0; + return 1; } -static int read_frame(struct VpxDecInputContext *input, uint8_t **buf, - size_t *bytes_in_buffer, size_t *buffer_size) { +static int dec_read_frame(struct VpxDecInputContext *input, uint8_t **buf, + size_t *bytes_in_buffer, size_t *buffer_size) { switch (input->vpx_input_ctx->file_type) { #if CONFIG_WEBM_IO case FILE_TYPE_WEBM: @@ -506,6 +528,8 @@ static int main_loop(int argc, const char **argv_) { int arg_skip = 0; int ec_enabled = 0; int keep_going = 0; + int enable_row_mt = 0; + int enable_lpf_opt = 0; const VpxInterface *interface = NULL; const VpxInterface *fourcc_interface = NULL; uint64_t dx_time = 0; @@ -628,6 +652,10 @@ static int main_loop(int argc, const char **argv_) { die("Error: Could not open --framestats file (%s) for writing.\n", arg.val); } + } else if (arg_match(&arg, &rowmtarg, argi)) { + enable_row_mt = arg_parse_uint(&arg); + } else if (arg_match(&arg, &lpfoptarg, argi)) { + enable_lpf_opt = arg_parse_uint(&arg); } #if CONFIG_VP8_DECODER else if (arg_match(&arg, &addnoise_level, argi)) { @@ -694,6 +722,7 @@ static int main_loop(int argc, const char **argv_) { #if !CONFIG_WEBM_IO fprintf(stderr, "vpxdec was built without WebM container support.\n"); #endif + free(argv); return EXIT_FAILURE; } @@ -753,6 +782,18 @@ static int main_loop(int argc, const char **argv_) { goto fail; } } + if (interface->fourcc == VP9_FOURCC && + vpx_codec_control(&decoder, VP9D_SET_ROW_MT, enable_row_mt)) { + fprintf(stderr, "Failed to set decoder in row multi-thread mode: %s\n", + vpx_codec_error(&decoder)); + goto fail; + } + if (interface->fourcc == VP9_FOURCC && + vpx_codec_control(&decoder, VP9D_SET_LOOP_FILTER_OPT, enable_lpf_opt)) { + fprintf(stderr, "Failed to set decoder in optimized loopfilter mode: %s\n", + vpx_codec_error(&decoder)); + goto fail; + } if (!quiet) fprintf(stderr, "%s\n", decoder.name); #if CONFIG_VP8_DECODER @@ -766,7 +807,7 @@ static int main_loop(int argc, const char **argv_) { if (arg_skip) fprintf(stderr, "Skipping first %d frames.\n", arg_skip); while (arg_skip) { - if (read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) break; + if (dec_read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) break; arg_skip--; } @@ -797,7 +838,7 @@ static int main_loop(int argc, const char **argv_) { frame_avail = 0; if (!stop_after || frame_in < stop_after) { - if (!read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) { + if (!dec_read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) { frame_avail = 1; frame_in++; diff --git a/media/libvpx/libvpx/vpxenc.c b/media/libvpx/libvpx/vpxenc.c index 4db7eccc351f..50c36bedd541 100644 --- a/media/libvpx/libvpx/vpxenc.c +++ b/media/libvpx/libvpx/vpxenc.c @@ -50,12 +50,6 @@ #endif #include "./y4minput.h" -/* Swallow warnings about unused results of fread/fwrite */ -static size_t wrap_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { - return fread(ptr, size, nmemb, stream); -} -#define fread wrap_fread - static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) { return fwrite(ptr, size, nmemb, stream); @@ -95,34 +89,6 @@ static void warn_or_exit_on_error(vpx_codec_ctx_t *ctx, int fatal, va_end(ap); } -static int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) { - FILE *f = input_ctx->file; - y4m_input *y4m = &input_ctx->y4m; - int shortread = 0; - - if (input_ctx->file_type == FILE_TYPE_Y4M) { - if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0; - } else { - shortread = read_yuv_frame(input_ctx, img); - } - - return !shortread; -} - -static int file_is_y4m(const char detect[4]) { - if (memcmp(detect, "YUV4", 4) == 0) { - return 1; - } - return 0; -} - -static int fourcc_is_ivf(const char detect[4]) { - if (memcmp(detect, "DKIF", 4) == 0) { - return 1; - } - return 0; -} - static const arg_def_t help = ARG_DEF(NULL, "help", 0, "Show usage options and exit"); static const arg_def_t debugmode = @@ -326,9 +292,9 @@ static const arg_def_t maxsection_pct = ARG_DEF(NULL, "maxsection-pct", 1, "GOP max bitrate (% of target)"); static const arg_def_t corpus_complexity = ARG_DEF(NULL, "corpus-complexity", 1, "corpus vbr complexity midpoint"); -static const arg_def_t *rc_twopass_args[] = { - &bias_pct, &minsection_pct, &maxsection_pct, &corpus_complexity, NULL -}; +static const arg_def_t *rc_twopass_args[] = { &bias_pct, &minsection_pct, + &maxsection_pct, + &corpus_complexity, NULL }; static const arg_def_t kf_min_dist = ARG_DEF(NULL, "kf-min-dist", 1, "Minimum keyframe interval (frames)"); @@ -342,19 +308,19 @@ static const arg_def_t *kf_args[] = { &kf_min_dist, &kf_max_dist, &kf_disabled, static const arg_def_t noise_sens = ARG_DEF(NULL, "noise-sensitivity", 1, "Noise sensitivity (frames to blur)"); static const arg_def_t sharpness = - ARG_DEF(NULL, "sharpness", 1, "Loop filter sharpness (0..7)"); + ARG_DEF(NULL, "sharpness", 1, + "Increase sharpness at the expense of lower PSNR. (0..7)"); static const arg_def_t static_thresh = ARG_DEF(NULL, "static-thresh", 1, "Motion detection threshold"); -static const arg_def_t auto_altref = - ARG_DEF(NULL, "auto-alt-ref", 1, "Enable automatic alt reference frames"); static const arg_def_t arnr_maxframes = ARG_DEF(NULL, "arnr-maxframes", 1, "AltRef max frames (0..15)"); static const arg_def_t arnr_strength = ARG_DEF(NULL, "arnr-strength", 1, "AltRef filter strength (0..6)"); -static const arg_def_t arnr_type = ARG_DEF(NULL, "arnr-type", 1, "AltRef type"); -static const struct arg_enum_list tuning_enum[] = { - { "psnr", VP8_TUNE_PSNR }, { "ssim", VP8_TUNE_SSIM }, { NULL, 0 } -}; +static const arg_def_t arnr_type = + ARG_DEF(NULL, "arnr-type", 1, "AltRef filter type (1..3)"); +static const struct arg_enum_list tuning_enum[] = { { "psnr", VP8_TUNE_PSNR }, + { "ssim", VP8_TUNE_SSIM }, + { NULL, 0 } }; static const arg_def_t tune_ssim = ARG_DEF_ENUM(NULL, "tune", 1, "Material to favor", tuning_enum); static const arg_def_t cq_level = @@ -367,12 +333,14 @@ static const arg_def_t gf_cbr_boost_pct = ARG_DEF( #if CONFIG_VP8_ENCODER static const arg_def_t cpu_used_vp8 = ARG_DEF(NULL, "cpu-used", 1, "CPU Used (-16..16)"); +static const arg_def_t auto_altref_vp8 = ARG_DEF( + NULL, "auto-alt-ref", 1, "Enable automatic alt reference frames. (0..1)"); static const arg_def_t token_parts = ARG_DEF(NULL, "token-parts", 1, "Number of token partitions to use, log2"); static const arg_def_t screen_content_mode = ARG_DEF(NULL, "screen-content-mode", 1, "Screen content mode"); static const arg_def_t *vp8_args[] = { &cpu_used_vp8, - &auto_altref, + &auto_altref_vp8, &noise_sens, &sharpness, &static_thresh, @@ -405,12 +373,19 @@ static const int vp8_arg_ctrl_map[] = { VP8E_SET_CPUUSED, #if CONFIG_VP9_ENCODER static const arg_def_t cpu_used_vp9 = - ARG_DEF(NULL, "cpu-used", 1, "CPU Used (-8..8)"); + ARG_DEF(NULL, "cpu-used", 1, "CPU Used (-9..9)"); +static const arg_def_t auto_altref_vp9 = ARG_DEF( + NULL, "auto-alt-ref", 1, + "Enable automatic alt reference frames, 2+ enables multi-layer. (0..6)"); static const arg_def_t tile_cols = ARG_DEF(NULL, "tile-columns", 1, "Number of tile columns to use, log2"); static const arg_def_t tile_rows = ARG_DEF(NULL, "tile-rows", 1, "Number of tile rows to use, log2 (set to 0 while threads > 1)"); + +static const arg_def_t enable_tpl_model = + ARG_DEF(NULL, "enable-tpl", 1, "Enable temporal dependency model"); + static const arg_def_t lossless = ARG_DEF(NULL, "lossless", 1, "Lossless mode (0: false (default), 1: true)"); static const arg_def_t frame_parallel_decoding = ARG_DEF( @@ -491,11 +466,12 @@ static const arg_def_t row_mt = #if CONFIG_VP9_ENCODER static const arg_def_t *vp9_args[] = { &cpu_used_vp9, - &auto_altref, + &auto_altref_vp9, &sharpness, &static_thresh, &tile_cols, &tile_rows, + &enable_tpl_model, &arnr_maxframes, &arnr_strength, &arnr_type, @@ -527,6 +503,7 @@ static const int vp9_arg_ctrl_map[] = { VP8E_SET_CPUUSED, VP8E_SET_STATIC_THRESHOLD, VP9E_SET_TILE_COLUMNS, VP9E_SET_TILE_ROWS, + VP9E_SET_TPL, VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE, @@ -552,7 +529,7 @@ static const int vp9_arg_ctrl_map[] = { VP8E_SET_CPUUSED, static const arg_def_t *no_args[] = { NULL }; -void show_help(FILE *fout, int shorthelp) { +static void show_help(FILE *fout, int shorthelp) { int i; const int num_encoder = get_vpx_encoder_count(); @@ -603,230 +580,6 @@ void usage_exit(void) { exit(EXIT_FAILURE); } -#define mmin(a, b) ((a) < (b) ? (a) : (b)) - -#if CONFIG_VP9_HIGHBITDEPTH -static void find_mismatch_high(const vpx_image_t *const img1, - const vpx_image_t *const img2, int yloc[4], - int uloc[4], int vloc[4]) { - uint16_t *plane1, *plane2; - uint32_t stride1, stride2; - const uint32_t bsize = 64; - const uint32_t bsizey = bsize >> img1->y_chroma_shift; - const uint32_t bsizex = bsize >> img1->x_chroma_shift; - const uint32_t c_w = - (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; - const uint32_t c_h = - (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; - int match = 1; - uint32_t i, j; - yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; - plane1 = (uint16_t *)img1->planes[VPX_PLANE_Y]; - plane2 = (uint16_t *)img2->planes[VPX_PLANE_Y]; - stride1 = img1->stride[VPX_PLANE_Y] / 2; - stride2 = img2->stride[VPX_PLANE_Y] / 2; - for (i = 0, match = 1; match && i < img1->d_h; i += bsize) { - for (j = 0; match && j < img1->d_w; j += bsize) { - int k, l; - const int si = mmin(i + bsize, img1->d_h) - i; - const int sj = mmin(j + bsize, img1->d_w) - j; - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(plane1 + (i + k) * stride1 + j + l) != - *(plane2 + (i + k) * stride2 + j + l)) { - yloc[0] = i + k; - yloc[1] = j + l; - yloc[2] = *(plane1 + (i + k) * stride1 + j + l); - yloc[3] = *(plane2 + (i + k) * stride2 + j + l); - match = 0; - break; - } - } - } - } - } - - uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; - plane1 = (uint16_t *)img1->planes[VPX_PLANE_U]; - plane2 = (uint16_t *)img2->planes[VPX_PLANE_U]; - stride1 = img1->stride[VPX_PLANE_U] / 2; - stride2 = img2->stride[VPX_PLANE_U] / 2; - for (i = 0, match = 1; match && i < c_h; i += bsizey) { - for (j = 0; match && j < c_w; j += bsizex) { - int k, l; - const int si = mmin(i + bsizey, c_h - i); - const int sj = mmin(j + bsizex, c_w - j); - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(plane1 + (i + k) * stride1 + j + l) != - *(plane2 + (i + k) * stride2 + j + l)) { - uloc[0] = i + k; - uloc[1] = j + l; - uloc[2] = *(plane1 + (i + k) * stride1 + j + l); - uloc[3] = *(plane2 + (i + k) * stride2 + j + l); - match = 0; - break; - } - } - } - } - } - - vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; - plane1 = (uint16_t *)img1->planes[VPX_PLANE_V]; - plane2 = (uint16_t *)img2->planes[VPX_PLANE_V]; - stride1 = img1->stride[VPX_PLANE_V] / 2; - stride2 = img2->stride[VPX_PLANE_V] / 2; - for (i = 0, match = 1; match && i < c_h; i += bsizey) { - for (j = 0; match && j < c_w; j += bsizex) { - int k, l; - const int si = mmin(i + bsizey, c_h - i); - const int sj = mmin(j + bsizex, c_w - j); - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(plane1 + (i + k) * stride1 + j + l) != - *(plane2 + (i + k) * stride2 + j + l)) { - vloc[0] = i + k; - vloc[1] = j + l; - vloc[2] = *(plane1 + (i + k) * stride1 + j + l); - vloc[3] = *(plane2 + (i + k) * stride2 + j + l); - match = 0; - break; - } - } - } - } - } -} -#endif - -static void find_mismatch(const vpx_image_t *const img1, - const vpx_image_t *const img2, int yloc[4], - int uloc[4], int vloc[4]) { - const uint32_t bsize = 64; - const uint32_t bsizey = bsize >> img1->y_chroma_shift; - const uint32_t bsizex = bsize >> img1->x_chroma_shift; - const uint32_t c_w = - (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; - const uint32_t c_h = - (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; - int match = 1; - uint32_t i, j; - yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; - for (i = 0, match = 1; match && i < img1->d_h; i += bsize) { - for (j = 0; match && j < img1->d_w; j += bsize) { - int k, l; - const int si = mmin(i + bsize, img1->d_h) - i; - const int sj = mmin(j + bsize, img1->d_w) - j; - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(img1->planes[VPX_PLANE_Y] + - (i + k) * img1->stride[VPX_PLANE_Y] + j + l) != - *(img2->planes[VPX_PLANE_Y] + - (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) { - yloc[0] = i + k; - yloc[1] = j + l; - yloc[2] = *(img1->planes[VPX_PLANE_Y] + - (i + k) * img1->stride[VPX_PLANE_Y] + j + l); - yloc[3] = *(img2->planes[VPX_PLANE_Y] + - (i + k) * img2->stride[VPX_PLANE_Y] + j + l); - match = 0; - break; - } - } - } - } - } - - uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; - for (i = 0, match = 1; match && i < c_h; i += bsizey) { - for (j = 0; match && j < c_w; j += bsizex) { - int k, l; - const int si = mmin(i + bsizey, c_h - i); - const int sj = mmin(j + bsizex, c_w - j); - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(img1->planes[VPX_PLANE_U] + - (i + k) * img1->stride[VPX_PLANE_U] + j + l) != - *(img2->planes[VPX_PLANE_U] + - (i + k) * img2->stride[VPX_PLANE_U] + j + l)) { - uloc[0] = i + k; - uloc[1] = j + l; - uloc[2] = *(img1->planes[VPX_PLANE_U] + - (i + k) * img1->stride[VPX_PLANE_U] + j + l); - uloc[3] = *(img2->planes[VPX_PLANE_U] + - (i + k) * img2->stride[VPX_PLANE_U] + j + l); - match = 0; - break; - } - } - } - } - } - vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; - for (i = 0, match = 1; match && i < c_h; i += bsizey) { - for (j = 0; match && j < c_w; j += bsizex) { - int k, l; - const int si = mmin(i + bsizey, c_h - i); - const int sj = mmin(j + bsizex, c_w - j); - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(img1->planes[VPX_PLANE_V] + - (i + k) * img1->stride[VPX_PLANE_V] + j + l) != - *(img2->planes[VPX_PLANE_V] + - (i + k) * img2->stride[VPX_PLANE_V] + j + l)) { - vloc[0] = i + k; - vloc[1] = j + l; - vloc[2] = *(img1->planes[VPX_PLANE_V] + - (i + k) * img1->stride[VPX_PLANE_V] + j + l); - vloc[3] = *(img2->planes[VPX_PLANE_V] + - (i + k) * img2->stride[VPX_PLANE_V] + j + l); - match = 0; - break; - } - } - } - } - } -} - -static int compare_img(const vpx_image_t *const img1, - const vpx_image_t *const img2) { - uint32_t l_w = img1->d_w; - uint32_t c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; - const uint32_t c_h = - (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; - uint32_t i; - int match = 1; - - match &= (img1->fmt == img2->fmt); - match &= (img1->d_w == img2->d_w); - match &= (img1->d_h == img2->d_h); -#if CONFIG_VP9_HIGHBITDEPTH - if (img1->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { - l_w *= 2; - c_w *= 2; - } -#endif - - for (i = 0; i < img1->d_h; ++i) - match &= (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y], - img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y], - l_w) == 0); - - for (i = 0; i < c_h; ++i) - match &= (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U], - img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U], - c_w) == 0); - - for (i = 0; i < c_h; ++i) - match &= (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V], - img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V], - c_w) == 0); - - return match; -} - #define NELEMENTS(x) (sizeof(x) / sizeof(x[0])) #if CONFIG_VP9_ENCODER #define ARG_CTRL_CNT_MAX NELEMENTS(vp9_arg_ctrl_map) @@ -1012,57 +765,6 @@ static void parse_global_config(struct VpxEncoderConfig *global, char **argv) { } } -static void open_input_file(struct VpxInputContext *input) { - /* Parse certain options from the input file, if possible */ - input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb") - : set_binary_mode(stdin); - - if (!input->file) fatal("Failed to open input file"); - - if (!fseeko(input->file, 0, SEEK_END)) { - /* Input file is seekable. Figure out how long it is, so we can get - * progress info. - */ - input->length = ftello(input->file); - rewind(input->file); - } - - /* Default to 1:1 pixel aspect ratio. */ - input->pixel_aspect_ratio.numerator = 1; - input->pixel_aspect_ratio.denominator = 1; - - /* For RAW input sources, these bytes will applied on the first frame - * in read_frame(). - */ - input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file); - input->detect.position = 0; - - if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) { - if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, - input->only_i420) >= 0) { - input->file_type = FILE_TYPE_Y4M; - input->width = input->y4m.pic_w; - input->height = input->y4m.pic_h; - input->pixel_aspect_ratio.numerator = input->y4m.par_n; - input->pixel_aspect_ratio.denominator = input->y4m.par_d; - input->framerate.numerator = input->y4m.fps_n; - input->framerate.denominator = input->y4m.fps_d; - input->fmt = input->y4m.vpx_fmt; - input->bit_depth = input->y4m.bit_depth; - } else - fatal("Unsupported Y4M stream."); - } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) { - fatal("IVF is not supported as input."); - } else { - input->file_type = FILE_TYPE_RAW; - } -} - -static void close_input_file(struct VpxInputContext *input) { - fclose(input->file); - if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m); -} - static struct stream_state *new_stream(struct VpxEncoderConfig *global, struct stream_state *prev) { struct stream_state *stream; @@ -1278,8 +980,8 @@ static int parse_stream_params(struct VpxEncoderConfig *global, match = 1; /* Point either to the next free element or the first - * instance of this control. - */ + * instance of this control. + */ for (j = 0; j < config->arg_ctrl_cnt; j++) if (ctrl_args_map != NULL && config->arg_ctrls[j][0] == ctrl_args_map[i]) @@ -1614,14 +1316,14 @@ static void encode_frame(struct stream_state *stream, vpx_img_alloc(NULL, VPX_IMG_FMT_I42016, cfg->g_w, cfg->g_h, 16); } I420Scale_16( - (uint16 *)img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y] / 2, - (uint16 *)img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U] / 2, - (uint16 *)img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V] / 2, - img->d_w, img->d_h, (uint16 *)stream->img->planes[VPX_PLANE_Y], + (uint16_t *)img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y] / 2, + (uint16_t *)img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U] / 2, + (uint16_t *)img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V] / 2, + img->d_w, img->d_h, (uint16_t *)stream->img->planes[VPX_PLANE_Y], stream->img->stride[VPX_PLANE_Y] / 2, - (uint16 *)stream->img->planes[VPX_PLANE_U], + (uint16_t *)stream->img->planes[VPX_PLANE_U], stream->img->stride[VPX_PLANE_U] / 2, - (uint16 *)stream->img->planes[VPX_PLANE_V], + (uint16_t *)stream->img->planes[VPX_PLANE_V], stream->img->stride[VPX_PLANE_V] / 2, stream->img->d_w, stream->img->d_h, kFilterBox); img = stream->img; @@ -2215,9 +1917,9 @@ int main(int argc, const char **argv_) { if (!global.quiet) { FOREACH_STREAM(fprintf( - stderr, "\rPass %d/%d frame %4d/%-4d %7" PRId64 "B %7" PRId64 - "b/f %7" PRId64 "b/s" - " %7" PRId64 " %s (%.2f fps)\033[K\n", + stderr, + "\rPass %d/%d frame %4d/%-4d %7" PRId64 "B %7" PRId64 "b/f %7" PRId64 + "b/s %7" PRId64 " %s (%.2f fps)\033[K\n", pass + 1, global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes, seen_frames ? (int64_t)(stream->nbytes * 8 / seen_frames) : 0, diff --git a/media/libvpx/libvpx/vpxenc.h b/media/libvpx/libvpx/vpxenc.h index d867e9d9548a..b780aedca699 100644 --- a/media/libvpx/libvpx/vpxenc.h +++ b/media/libvpx/libvpx/vpxenc.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPXENC_H_ -#define VPXENC_H_ +#ifndef VPX_VPXENC_H_ +#define VPX_VPXENC_H_ #include "vpx/vpx_encoder.h" @@ -61,4 +61,4 @@ struct VpxEncoderConfig { } // extern "C" #endif -#endif // VPXENC_H_ +#endif // VPX_VPXENC_H_ diff --git a/media/libvpx/libvpx/vpxstats.h b/media/libvpx/libvpx/vpxstats.h index 5c9ea34f71a3..3625ee3291f9 100644 --- a/media/libvpx/libvpx/vpxstats.h +++ b/media/libvpx/libvpx/vpxstats.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VPXSTATS_H_ -#define VPXSTATS_H_ +#ifndef VPX_VPXSTATS_H_ +#define VPX_VPXSTATS_H_ #include @@ -40,4 +40,4 @@ vpx_fixed_buf_t stats_get(stats_io_t *stats); } // extern "C" #endif -#endif // VPXSTATS_H_ +#endif // VPX_VPXSTATS_H_ diff --git a/media/libvpx/libvpx/warnings.h b/media/libvpx/libvpx/warnings.h index 6b8ae6796f37..15558c64374f 100644 --- a/media/libvpx/libvpx/warnings.h +++ b/media/libvpx/libvpx/warnings.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WARNINGS_H_ -#define WARNINGS_H_ +#ifndef VPX_WARNINGS_H_ +#define VPX_WARNINGS_H_ #ifdef __cplusplus extern "C" { @@ -30,4 +30,4 @@ void check_encoder_config(int disable_prompt, } // extern "C" #endif -#endif // WARNINGS_H_ +#endif // VPX_WARNINGS_H_ diff --git a/media/libvpx/libvpx/webmdec.h b/media/libvpx/libvpx/webmdec.h index 7dcb170caf3c..d8618b07d675 100644 --- a/media/libvpx/libvpx/webmdec.h +++ b/media/libvpx/libvpx/webmdec.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBMDEC_H_ -#define WEBMDEC_H_ +#ifndef VPX_WEBMDEC_H_ +#define VPX_WEBMDEC_H_ #include "./tools_common.h" @@ -66,4 +66,4 @@ void webm_free(struct WebmInputContext *webm_ctx); } // extern "C" #endif -#endif // WEBMDEC_H_ +#endif // VPX_WEBMDEC_H_ diff --git a/media/libvpx/libvpx/webmenc.h b/media/libvpx/libvpx/webmenc.h index b4a9e357bb83..4176e820814e 100644 --- a/media/libvpx/libvpx/webmenc.h +++ b/media/libvpx/libvpx/webmenc.h @@ -7,8 +7,8 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBMENC_H_ -#define WEBMENC_H_ +#ifndef VPX_WEBMENC_H_ +#define VPX_WEBMENC_H_ #include #include @@ -52,4 +52,4 @@ void write_webm_file_footer(struct WebmOutputContext *webm_ctx); } // extern "C" #endif -#endif // WEBMENC_H_ +#endif // VPX_WEBMENC_H_ diff --git a/media/libvpx/libvpx/y4menc.c b/media/libvpx/libvpx/y4menc.c index 05018dbc4339..02b729e5bbbf 100644 --- a/media/libvpx/libvpx/y4menc.c +++ b/media/libvpx/libvpx/y4menc.c @@ -17,11 +17,9 @@ int y4m_write_file_header(char *buf, size_t len, int width, int height, const char *color; switch (bit_depth) { case 8: - color = fmt == VPX_IMG_FMT_444A - ? "C444alpha\n" - : fmt == VPX_IMG_FMT_I444 - ? "C444\n" - : fmt == VPX_IMG_FMT_I422 ? "C422\n" : "C420jpeg\n"; + color = fmt == VPX_IMG_FMT_I444 + ? "C444\n" + : fmt == VPX_IMG_FMT_I422 ? "C422\n" : "C420jpeg\n"; break; case 9: color = fmt == VPX_IMG_FMT_I44416 diff --git a/media/libvpx/libvpx/y4menc.h b/media/libvpx/libvpx/y4menc.h index 69d590413ec7..9a367e34c639 100644 --- a/media/libvpx/libvpx/y4menc.h +++ b/media/libvpx/libvpx/y4menc.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef Y4MENC_H_ -#define Y4MENC_H_ +#ifndef VPX_Y4MENC_H_ +#define VPX_Y4MENC_H_ #include "./tools_common.h" @@ -30,4 +30,4 @@ int y4m_write_frame_header(char *buf, size_t len); } // extern "C" #endif -#endif // Y4MENC_H_ +#endif // VPX_Y4MENC_H_ diff --git a/media/libvpx/libvpx/y4minput.c b/media/libvpx/libvpx/y4minput.c index 1de636cc0bc8..007bd9971b4f 100644 --- a/media/libvpx/libvpx/y4minput.c +++ b/media/libvpx/libvpx/y4minput.c @@ -130,8 +130,8 @@ static int y4m_parse_tags(y4m_input *_y4m, char *_tags) { The number of taps is intentionally kept small to reduce computational overhead and limit ringing. - The taps from these filters are scaled so that their sum is 1, and the result - is scaled by 128 and rounded to integers to create a filter whose + The taps from these filters are scaled so that their sum is 1, and the + result is scaled by 128 and rounded to integers to create a filter whose intermediate values fit inside 16 bits. Coefficients are rounded in such a way as to ensure their sum is still 128, which is usually equivalent to normal rounding. @@ -139,7 +139,6 @@ static int y4m_parse_tags(y4m_input *_y4m, char *_tags) { Conversions which require both horizontal and vertical filtering could have these steps pipelined, for less memory consumption and better cache performance, but we do them separately for simplicity.*/ - #define OC_MINI(_a, _b) ((_a) > (_b) ? (_b) : (_a)) #define OC_MAXI(_a, _b) ((_a) < (_b) ? (_b) : (_a)) #define OC_CLAMPI(_a, _b, _c) (OC_MAXI(_a, OC_MINI(_b, _c))) @@ -976,6 +975,8 @@ int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip, _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; _y4m->convert = y4m_convert_411_420jpeg; + fprintf(stderr, "Unsupported conversion from yuv 411\n"); + return -1; } else if (strcmp(_y4m->chroma_type, "444") == 0) { _y4m->src_c_dec_h = 1; _y4m->src_c_dec_v = 1; @@ -1030,30 +1031,6 @@ int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip, fprintf(stderr, "Unsupported conversion from 444p12 to 420jpeg\n"); return -1; } - } else if (strcmp(_y4m->chroma_type, "444alpha") == 0) { - _y4m->src_c_dec_h = 1; - _y4m->src_c_dec_v = 1; - if (only_420) { - _y4m->dst_c_dec_h = 2; - _y4m->dst_c_dec_v = 2; - _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; - /*Chroma filter required: read into the aux buf first. - We need to make two filter passes, so we need some extra space in the - aux buffer. - The extra plane also gets read into the aux buf. - It will be discarded.*/ - _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h; - _y4m->convert = y4m_convert_444_420jpeg; - } else { - _y4m->vpx_fmt = VPX_IMG_FMT_444A; - _y4m->bps = 32; - _y4m->dst_c_dec_h = _y4m->src_c_dec_h; - _y4m->dst_c_dec_v = _y4m->src_c_dec_v; - _y4m->dst_buf_read_sz = 4 * _y4m->pic_w * _y4m->pic_h; - /*Natively supported: no conversion required.*/ - _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; - _y4m->convert = y4m_convert_null; - } } else if (strcmp(_y4m->chroma_type, "mono") == 0) { _y4m->src_c_dec_h = _y4m->src_c_dec_v = 0; _y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2; diff --git a/media/libvpx/libvpx/y4minput.h b/media/libvpx/libvpx/y4minput.h index 9e69ceb835a8..a4a8b18dc53d 100644 --- a/media/libvpx/libvpx/y4minput.h +++ b/media/libvpx/libvpx/y4minput.h @@ -11,8 +11,8 @@ * Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors. */ -#ifndef Y4MINPUT_H_ -#define Y4MINPUT_H_ +#ifndef VPX_Y4MINPUT_H_ +#define VPX_Y4MINPUT_H_ #include #include "vpx/vpx_image.h" @@ -65,4 +65,4 @@ int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *img); } // extern "C" #endif -#endif // Y4MINPUT_H_ +#endif // VPX_Y4MINPUT_H_

VC7@YGQ9-qGMu|9?U&bLCg zx4-fSyVkXJlFyl=i<~hI12QviG{|3$S@3b3Q({H;NtCyhZ@754VX+8=S%W z0$ptwBG1nr_H9(ih~!K$&8U|9W;QWQ+Z8}@0$^;y@tIM+xYtZjz@prx2d zQ?*sN@HY_Zp-g%g+45@glD|Acu(y%9a&YIaDdzqfYzws`qRli;9`u+HE`b^?PuoMU zcNt&rylO0D(mr`j#|X=NKjv+XhLsfwESE4YHsM9$n;%KflD67;z~T6JwG+m(z!;#c zE?%V|o;~l6o1eb$I_bys(cI-PBCysw!yJ{P?fhUDrQ>5RKO73bp9vhcFLdgC`xQ~; zAuQ|A(j#)k!|lilU8O~SSh-UnezHVNRNLxMnuTi=H+z=p>VY}yo+_^9Y%kOuJu%nA z1Mb$j-|7Ur&jI(_s?w}AIo>p3d)W1$9K~zGGK?eD=*-{$dh9p*{QXaV{w{OrVqx}W zq_Nm5L&G5gCP4mZnoe`gU)6W%N6PALpUBeQX|vOra5G=fdQWuvB4w}j&)xQLmy&HU zlokfQQ0T4KZx^SFKl``8epE~FUSnxISP8`2p3~F)^04od=I8yLb6%nU&gZ|1q?EwX zQr2kx!+-h@H+zp*3DZHmof^B(=5GE^tACLBU!?3u1UU_c00Ig;`SH_#oO|37dY35n zm4Ed5igr0|ac7SnG~nxVzZ+@&$vS-U`&A*2Xp|+8Xf$A} za>n^#p}3j~kA~EF`0RSg`6(|r0!(KLdw`1K@w#QTTo}GO`$o$P2U)FKd;w-|Ga?hE zreIokjsjE7&bMWKrfx08$|ypEXG{{Wx`CDPAdmrz-}&()lr8>DKqE_tz-+`r7^8T=n_}s)@FX9gS1lL!`81% zc567Mav&21ByyN;20KQ;a(mCWiJ$d-ZG!ve$oeZv%y^BuJW?iWF7qV_lSW#rKv=Zp z_KeDp5gs;VKJRfli=HaD4J^2U@sPElBZv&=7)okDsoNLr!%e}!I9)=k?Sf_nCZn*< zR%zc%X3=InK=p;P`-ISPqsiJOAk2fzq1s~zpgAeg92pVTe3r8*-sysn64`*d8l>jt zJ@Cf^?s6R)EsUij7^d-{ew_1VN@7h@PO=7cCl+jOnopko2I9h)4NW@drfz=hkP!Jf-PN4yIXyMJ`D)X12juB+ni zdh&=I)n@?buw!BHl0Ik@sL{ML#j7^uND3x%s-Sfd4~DD5)?m?}ex}QM1sl(fEXVxU z`;kJZQQ!o?T5rG}oaezI>t7Svn`+2zwb>fF?%mGgJ{D-rEo9o~%8aPz%9b0~_0p6Q zo0{%-=;+=gawXk8n`mi3AjkHiFChU26`%Augv8-Ras0vt)AzL@oF`*or{uh{{N(ig z5OYDP#bdGX9`ipLxsYtdVuqtJ4>)y?}BH(Z^@XCrle zWEp$JbUHTqh;OVhC;C=UU5;+w&wpV#_T_S4&|YiwJ`-Gc(Iw6= zNz;MO)n(hDc60_#JxVG?SqJi-=RrM}xF+flj}wKKtI_o@wIvAQMsEd)2?c6z&m@2JQ9GpsxONU63{ zv*UXYt&`9F4Q+p0eCsdpZA)N^Wc54jmS_KIfC_ZpD8d~lRd+KMMe`re}DV* z34&}L!ppwiwPMBT?hS|M{vhE_Ii}@ z!(ZgxH{t`sayXp?{{(O*-vdUkR=m1zoAG@;9_jUiT%pZKgT>F3Q?Nhpo)?kh9^eL# zn+3??UIyAHxp7Y;pnNYmIbXjSVN;igm@(cXh~yct2(;?)*&y2I3z7S=q&SsW!s$7j z=waqM$rY^St7P-PQckI>q&Q0UC!6NPKDM!ufAr-*VUq_-9NcW1zE3TEch&T2LHcED zc3yc18#W6z0d)7OD+))k%y2$ndH8R0yGZdUt~{mBZ&YFo>6`F)!qYnHIAsPWjVHia41x z!roNMiFF6Pg#>AX--PC(i=AG%n`1okvmkr+E7qE6*K<&top%yIJE6s4d$BT{U()Ia z16GKlmX6VfJGZCY14+|f+Mx94CMAt^{gzEfV!}borURLne|r;^a)i)#h^7EKo=8h+gVC4vV^BlKs7=$@`H7 zV;bixA4=`*8XoGO7_%xnaSqKb>1f#+st-#6!nKdskSIi+4%B#c8=JTz2-$W+0;fZE zP@2=sM|MY2c2BK+37rgiCgjw1hTHJz*jP*L^@UfTe>A%6uMMX2q=B6%&QOdGmy0#s zv+FQoM$qMjhl5i_NNJ2!(7Y0_hEMn%k8f5kHgP&&uVZ7#?KBG`1JyU^@lG3v0|Xfs zT<~yI8skZ55Kr|=5->;AJ~@HibNl0HpjhawhbfI;COZ@Fto@posqpw5b~N<#SPqFN zNd3KW<#Tp*U6v{gd!rj8tle6?RTMRhMbMmEEPV#JgR;zZ^?Cx0HR(fs#G)Ec^%BQ{ zAlP+W80>c6^zHAAmRr8ZS5WF@4P@&8?-VBW5N|(5Xw^zjz@mG!xNxr+`JDa3ho zP%LeoTT)BY`3oN$9|vs%56TZ>s9*oIZ4W=Z%J_v?59=yzmnhvxQ+dSi%Ky+R2t{B6 zSmr8AthZl5)1z2gUZvzM*HXuiO`49raQ z;Z6HYRoXwjux@$aum&fu(5)@j-CD?ayN9@38W|BIZ;Wz0i=;~se#vTkG2Mil3c?+c zJRqi5s>5^I)bYl#E$AGNy1_1Sus~7@MU1-8f{i#Dp93*~ne6ClLShO!(lzW~XjY$L z<+CTnaHi<~&eE7WyzTHE>pSFYCQ@e|G9K_0i7)(i5?BXdNii&Z31zN3mNwjP_OAZP zw;sFxxANFZB}JcA6EtIuVJkfXaT)c`b+yfpY6Vj(HhVtS{ciD|aL1AFHN*bn z!kKC)E-8p{^)q`MOW9NBXU1WaIAu-JJMvCnkWjeD;clRvrjgkd&lzfW$lb2PG@juj zTPwnr>nx0S_rlIE8pm}{4Aj-A-~J>kj~i}fPE0hS09-^mEeFkg$V3zh>*y)~zZ zWu3r>hu+0?wi|t>1+UN;WtJuW8O)2pq2?UMpT=Lax%yzE1zeXTCSw$r!&nVXJ z)CnDd@dy#OSX<{lj=JA*L?%E4>lHW3wme&i2B*Vao2GSity>^1&c7%r$rnGKt6iG2 zH-9pD_(f(=9^^J|vK!FM?Hk|RHJ{P=|bG@eJ2AcDS*GYS|X^d3OyKMvjJ9KN5k}1MhiZ z9FBP&0{g*jLb&oSs4?)#4a5Fv{a`@fg=Z`V+!RI2*{~40Mv6?Ck9 z^Y>v_uS*8%q3QCE$=wU1FRJjaW4L+l%(ML4LC+DQChayueh~&d_I7tL?^!nZ27l4j z;l&+IfBRgrgbu<;;=&cYEKX$=--xg73yso<7~1UK+JoKyQu6-B>UR(C`=N1$jiASE z^vgcVW(mzeN1x&wcDTfufK3}o6Y#;q^Y2ZvYsbzj_P2lg*B<=%CttTZ-KP1YH)4To z_T;BP)ZU%M!l`@Te@_5uFZW*3CJz$%>~X2=Sp4~CZL4jAHD4%Xfg9qT=e@>#v*{>X zg;~Q&S67ci{2gw5EDxF9*-&)VUQ`008~U9>@T&F0XV3Yw@MMlXXdIDOT!EXn->G$1 zQvFSOulfJ@q*gMg!0F$%uhd=iGrSX#ZFDW$qrw!PBcHM;i<+e&;9wQL6_^hwpUGHF zQ+mYZ%j(0kc@rWA+P-3lCKMcS`z|ujo5a0Ej$5-?i|DMyc#cm2IR=}lBl(lHeVgJ7 za?ZJe<-IU8vm9CjEd?!j6KyvFov!foM#6qX|(9`hy039QV zlYrF2Do zrT0Jpg{K<>bAN1|g)N6{%d5FHLZ0rNiNOFXxw=IcKxfcgqzgW=S+Nqbq^%bdM_T1cnW1@9y@Qya^^_7h@=&-KFaSV4l?cAm8PEFS4*3)( zu`NJxaO*^`ZLu-UQJCy_lRnUgD(xvD2C8mjV~;w^gG{mz(Y^e@fu8i_FI^$d#}-uM zoYbz^4vxlJ&i}YsodW}~&oy&+UN)o9m}+aCSgJS)-@bV|KBe}o>B|92gf8uS+-Uof za7qX4iKb^ipy88sC2_b|dbQDe5|qd+QMReoJWkC>)&uUqQeOr*83wva#@cI!2)HVq z-j%2Ug0_jYE-P{T*qHbOL3Z#X#K&xdA-2~IYT~wZse^qs1sA<83(Jksc5HSloSQ`% z=C}+=(BwwIbs~9f-ol2&oYXA?Z>bmxMft2f9NIe7D2`n*!w=;5SwGdE_u;J)Uje*0 z6{c*IhZ>fMS=!q&kDm%_7sBv1D}j1qRv}I_;ZnQ_o#{C|gC@}_V~%yzWwnbVPdX{r z)gDt%`!Tc*_GmZ8)zmhgF`IBls~SX`h#LL#M3j!*PQd(HSCG4(fP;V|2$v)}7C=X? z`11NH4~X5YU`kgvzS&8aKCE=-R0s`3Yx$#2%;gJ)9PDSVh;5?kbz3FGr1nnhNtnYf zl;|fyFD@zql`l=}u82bSO0!O-6@T4%KAuY5Oz*FT1K?VOZUHiB@pjjOroFp<-qEyY zG+BHcQlrxd89d8AySz;E{m8&UJOazIvNj+ac3g(XfM(iE$GCDSNcFTp227vCu!k|+Bsjzs545|m{hHWJGUmiPv;|`Aq@|bX4sLR|uXIq;N3;qQ6 z5Rxvlfknc~-a5r6EDAeJi9aC67rj#Z_?3D@WR3Gu4jV6}%8$Rh4Mg8Gsim129_7Q) zuhL%wf`lt)${p9<^gm8_pYJu=2EIBT9_@X)8Hg>)qKX|dA7|gay(BwN(|<#uvHbf= zNUWO^uj0t*j!+x#2aK1E;OCcyMyW0EUQZIp9|#NhRN@Q`U4yit@Y*;!{hY(`bvtFjZYR}*Dy(@ z?ktRp2I65OgIuXPYfa!T6CRd@(9Bfxt^_P$x4n%z4Tmkm*wwDAj>*EclZ8*L|B5uo zwxIZEw%Ediz;v{>hi^K~hpuOH{b)YkXLx)3d{y55_^PGVQ3uKSHyX#{W~tfY?E|)c z(3mur?CP;?wv8Mk7uO*3`RRLSxsHciSP00I_&TsS4E$W{Umh*q)t{i(h>uR1jBVvM zs9EU`Ejs$0Uw-><|E9}|CRcqc*S=x=`IDg+9BjUK`g+<6;|`*hzj~c$H`@mofF8X6 z=mZ<<3ZPjn&ehbHC2J8Zl&1Uk5AKaVZq>b;s^39N@jlvn>e(6-skjlneHBOl?8n{M zhLrL6`t;#x9?!nlj{6AMB>CQWF&h2l$AA9!tKa)4?Thtav5!k9$quvzSa%}~@{~sH z@AMe}V327?YuW&xFKJRhdxD5&U9p=E2nC|dDLm9hCVt+6ykn0SX&=-xffOh1J4rOi zG6ml$Yl-eRbRaHrI^=^rBcM34YPyZ#JX$cWT^9H!fjjS=DPj8ulr_x2XXG!!9gm0Y zozm8bC@qE!Qf0RWw$V5nq(ywE_LoNoc|XTD4bPjeF;p~#$D4B}i{o7FXkDuIPp+NWPVY)X{hnRZy0LFQU5%n$b6frSI^^s0dy`4pzKJJGIHvxS7yv%28| zonpk31kCZUzz!Lm(E;db@nwqihm{6Pvr}lP;sM#6If^=b+OaK@f6=x0fsmo?9jhRF zZuc8OC^(YNtp1zlHOFE>y^v6zlO0WPtjnouoSePb*9*wxs}tnsUY6oJO39yyOa^r9 z+{R{VhXl2Lt{0dUk9v%*IMFN>zED=U_A^6qU^Obrm9WK&K&N5P`9m}Ag7x{n&{m35 zuS1iXL-+nn{_P5)XGkA1u`Io+X&zjnwqb9LKeU^8_B&D9gbp1lmRsY9_}c16KpT zoxZz?mF>WTLVjw6LsRqtH}xyNR2h2+$TR$%I{nLP9J6+rEY!Gm*9ksqr_^|Vq0^yp zi>x8z*@^J3<|8VjJ9aaxOl!h{ekJ6cLc?jLH%*ua3Y0979Aor0^{V7RiK2!&?8Wvd zjZa$EJJ3q5Lwhft$pTZmC?{{%&;P;$cxJ|}vZ)(bg`t<%lXea@Q6wHD2a{nqk0`<& z*s0AxrK!bHw&*|vpKZBaJH_YoLI@Z^`nZKzDJwtQD{`}V<{@eQ%ErQRY10$;awqcC zaz0Uv?fvJO3G^W?8iO%EyB8Y6S;I-KPyAZ~RgX4~^3&q>$VuX^!}fks;8EYBT!*$$ zpq23jJz{BlS^KR~u&H+Hv@{xo#PZN>qh*aep9>m2YN00G@80|=EXfg&CT{8!NZeF+=%%Tjfs_$R}tUs1$rA#L$m z?UPq3?31OKxBN-QVE{2e&c8jaJUNftnf1qSo$kK4=5Y#*T5V~$H{MEuSAA-^Di3i6 z3BQcpi3cMa!VB8gVOxZOT&C$qu8+#8L>viz&EB&Nox5pO6qvo}!9<`9PR7B}gnEuBX_`gh{m z)(Mvw+^r2;Ot>r94bf0`%WU15eZ`5XB*6=v6Kkih1p8FhI*2btzldoE3)5hv>E@$Gi6mLjM;}IKl3T3> z=cRT6fTz6J+NwN15~c7tZwuk+#h6oFbNfwO!9FgC&)tjzJu~H*Vd1m5qD+{;gQK6k zckuXn=UcY;qk0un-fhiFU(ziPtKO1JnA1-S6E~y}7yVwqyO}URB0ROUm&BQ|(30!O zVjVXF4{G})eQP?{EMk%v9Ffjde0$UG#~qHt&|oDX@Xy}G7MG01DarQiyX256z8Z|XX~V@p_lyniKMz-dzN|;>3gramaD-n<9LrQGatfD+xTm& zV?3;eW%A=m(LiG_N>X!uU6D`p!XV^e`+0ompDR3+OAW8l@fLF7}V9%@)I z)?s5kRrG<$9EIzw3P{bCd#05W0jl>#DBP)Ad3uOJhK^_TJO~G@SPy?b=Z%|%5)O2Z z4oUaAXTZ5L8#d$NpzrLVnCE?~Nju=#K65WDUj@2M&S~*<+DATD(9g>==KW#03Jveh z&au1gXBnk8#Y0C!DtzDw`iQeL^(-YXGv`1d&PD>bNHq|JGu=s_J#ho|* z;LE*3^M&eu74$qj{r>2DC$G*z3oZSW#<5(svYerJKKlG@xQfEM8grgFB=~H1L=D;> z*z6f^o+J5E-#e{=%+L1p*>j4eG0zzCfI2aooz>XqupYAf-8|KkMgj8{(LW_Gybfo_ z&)UtpK4vZwa`iEl_iH}v#Me|7W%1*%p;qdHy$&Kqa4ASoPiij4UWv!a!BdCzZD>?z zyYV21yi{m^K^h0XiR-033j13ABW%;85#WC)QJoE1jp0 zDE(xl5f&QMWdwg^GAJ6(i0-<)L`eu!2%XrB?-03JduhhdRfUVF* z=M%s7y0)Hu^a+`Ex^V9#iTWGYRiTbu_eM7dk4a^T+u1X0+$nnS#{I<{KNBN&c5yk$m3;hXpF{gq?^d zIKLP<+j)P?@Utq_b)4v4vR>hXzpOjcpX1EX@X5G}`93b(58^*r4!!EXwfJ@0@7YI3 z&C_mvG|gO+}y_u9%AN0og# zya}ExfP&x3)TZzzy|2oOp{DHS(|J5{=+{<$&Yp!J=-JlsQaOEm(YI(YgEHATE%|MP z-sQIpA-%3Mg-K?TXkVAHyqh;SEovINqHqtxn?ZgKLwZ|Z@Vi87yXiQ$9RQxzVqxcxF6=On9~cNvBbCR>R4UZyL8`X4pR-=50PHl${n@_Tq~v}cE&8Snus)3n!wg2mTtCKe4CmCg-_?9;Q^G8T{ zmUPpBb3HV*&UI$ZH#2h!fh}H%-E3_KvIFqI2yTQjSC%{hl13@tPK(K+$LR@IAJqYF)#hmwjPW(PuI-w+u{w-U zH^b@52bD$BMWd%iKc3HTOo4|8P`B4JI!T;QM-~rpO%3guN>Bylu96Db^7qc7?A)=^ z6m4h(IVHD=)W$XaiI@z&aeHjB$tkbh$h71F3*LpP4rQ6K3RU zjdCt*1DwEbo9Wuk&;6R(M-m5<=X!37aa|s@&N*WxX$T_S>hZU~?j+t{%oQJ3=@-Hw zVvA!qamkzkwY8N9gh`3vP= z0{KYSE?GEsJZY(?(zk%{(FJDeK6W(wL2)1zvho{y-ou#YAxz@2PrLv!joDg%Cn$Nf zOgnYMw&I4~X$n`n@Mm>jG^Flzpt)jhI=uLBY5Rh5ZPrpL4v!om%Ik<`vMnWJD;SaG zUQvKHrW$W^kHpSAZshFxFH%`sOPcD_Iqu8WhQN^8lq!_dQ&jeLG?e8^ibb$7J?(LKr%8Q-%-sXr=UaLNT}k( z1`}BlwO$36_FGQJEb{@KyEZf&18LS@YG)%4zi0$bil_!OTg)7Gm~%MYy((y{Q9%p6 zr=lU`c+z*W)*LVwqODgyYzSZU*GnyzLJyrfG>%Xn$r!f-w~S_U#*Z3G*dMSdp1u=T zg@+B;hhrDqeaj~M5k0W_A9hb?LRa;32R5HIZHOscURsdXjbrFNQ%#&PsA+@90OeLvdPP|*kxeS@|0W$$BJTClWO-7lx@fyO50 zbx2a9J63LqT!AT*a)gG64qoc?J(kevawld$m!HKq#Z4k}>k1J2gWu!Scxa7ha%6cl zNQU3q|3x!&?CRS-#p*Fj_~PthiE2a(A|+Se$EORY!^~_{ z^mf{nYdqF+tEF6doLPh%;(S{iu}%-$=`c|FM2#RY*Y21|a4an@!6Z^YR~Rj1!Lchz zY(Qcc0rxx7JAx~jm8wn(C@)@HnrY1j3~nEaGIHoDAz{-Q0A2Ym#}Sj zo3kpkQSED|Lwh2@GWw_|Q|=pMA8EF&Wt8-jC`;%rghU&V1ZPln&uDY(>LD1xtH$^`m-W0rj75tP7>=!jI&3HT(-G%s7=b**(1?%MBscZeH>N;izGz!sw^-EK zu=ApyN_qNp(x(~I2afFGQtQ##5!G!yjzjkx;;$Y~sU$)?bm*Z4Zwdf!p|<>}xG)$w zY=0J`CA)`cXY5uho3qKIuLjOi8dMKdugP z!~;jOg`HCeLZ?p+=r&Ojcuvwp`(x!^t}q6ELIn4#b#@Rw@ig)SeQ+{B#V|0IOU3Qc zr|s^27?+CIeFb@k`@{f9*v!X0M+Wg@^Jbe7ir(3Nh{q_I# zt*dYRcYp6o|M;!{@n8P>T^AAAd|4>UB6Kfl*^dL5^G?uW7S~+GtbR-=o2OB0Ogq)i zH2FjR<1c_tN4TSUj{L$gf;Lj8F=IpY^tY!;>s4Tdv{<@|x5c)ygQBM=owXz#&Kd6& z2P@Sz%VNmxNh2|AeSIcMK*=&-nWxtsE=v@>ct%3Yp#tEJ7itPl+?gjtIX)75Tp)Y`Q1iZXT-07skO4w0qcx&{;E zxXp8)?iNn&Jz;l`i^FWcf{(C5y$64 z8G0GW?tN6Yd{B?&L@n+dP>R~i&aAB&I&A|0^2#f4DteNPeeV}L&*b(dlYc(acnppL z;KIhNDbxwD`&s$BRb^Y>kJJ|@shkb7u4~-bd_Hg2YYzN`4g{Utg;9ch0H?1+bW%Ht zyUzV%oAzlKP`y5SLe|bBd+>ChKhjG8e$hKzMMq7DM$bA-VqKm;VOl7XntB2&=yDjR zPZu6gc+&JXr+2QcUa>Ngz1m^jBy-U_MFW@GqYKA`hg@m?bmye^%i3g5;4gS17oy*S z@4|PUD?6Rz(|hV6L4?yxeLsMpGU{$o%qp^eDdIb|SzbXCjNO|Bn_zXNWrhd!R;w2J zHO1utqC-#BA<77fj8Ax6i1V5Hr8K@l9{$z;$O73t7X3k zm!{*eZ>zwRm6;y3M zuQRS{&HHmNpG{WF8{{aUgGI49Kk_y~m0 z6G+*5$-3eOt&gp(db~S#@g|FQBmL62IQFNb!GhToG<(~=Ss6%Upn+&ffLd$--He!Z zb-vJLMs{hG3q(|-eTMm2g#Qm*V37m69lScy3V;I5#kZC?1r zE&Q;WR^v$#d~SK-E)5%v-$*c%c52aU$eGB4Llvl3&lIG&iNy& z+|UH;-SbN&OPJ?Rx8INGQ6N(FDCCYX9wXFYA7*ulGm{Fc`W&s9iIo;dpu>l<=j1}I zfW}yJ{WasIZRGM8s3ka5QQTdq8p2X~zjeiCk<0$P@(CKRdq+sM!LX5TE@E?l((toY zMM5wJaV2gqRP>hLyAMYz#UMsMEWDO-^qP$%o~Z0^IpOy-NE{PanH!xeqYv8eAZeSF1?gV$?hNgU@RZvQ!z@0W^;P_*yiHXFSDG^~FKN+! z@l_W*3|WwvUU5+Mv*A$bKInNVW-^0Oo|YrN>|m=&mdQ2b%G z8}Q?BH6L^=7f}1mr-uhV-bQFLma?<}JRTVU#nau)g1ggI*lvlL0*^e4Oys@tKB4Mf z#M0_&J{!`4&cpU$pp)6-SJ#EwOsDZln|589m^~zwws1!i87;K>&cITm*Xl~Pj8Gp~ ztvDP{hijyZ=st*!-t69{yxo0C&hy@7y5x19V*~EjytdlRwEtOZCD3Z^KPVHcYontw zbJHjy9D!u~)m~mccDQtS25Zv|e=Zo#qtV4KTpLxaDZ8Az+)fqt0xZ`ebd4 ze)Q;z=hdSxd!`#`2Y^J+&kxI%XaByBho1l7-O?M%^y6_W{n}TfU%s*!rt@iU+bZ4b zOVH!*`i{x>D@~HTWQ}?R^x9FqbggVWRWxJ3oi$z+FH{jcmEsPpZi8G+zTK$f1e`e< zo?<22n`zL~oo$UCq0<2vFfZ&c{walaNZT565*RMMa7eE0n*i-Ecz8dtG97%ZXslK9 zojVJWf#iDAF4sJT`M@pSt@#8B1h^lM&)@chLVW=VsR4*dW77@_K7GR3R*9OcdXIi13=}?id@%4HmWs&y z!@u||=ieh(g{cqZw#p+ax>87l{_(HB`N8At-n;uWTv*mV2a&2AT0v)Q<{1y*uu6`yms~N+s$a&u~>}OovL1Gi<%x;0Y>-LqwC>R&)@8ID3*fYcamzf zx+KzKi_m8M*P*w6AKS{Gp(pghdb&n&y0N&|{>VAwuFID6`TRc9vadkX^6->2x#8FX zFucHSSGz6=znC=oc`&D-#8L~Wm zr!TJ38U4BaF#QA(LD}PV9EF;}mfb-e>A}jXJXfwie+azsMrHfV8ki2~Tgy6E9FqMJ zGGUd;A*G*>I*8#G=Q-C2W|bRGoy+?trlgGU6by~72arCjDV77Dm3jUWxZR!Zf5rMk zXaKmHRJG%TKi_0)&M}vdG+<#I@B*rba^;MvGYE@TaB0V9Ze&Kiy|b4U$C95XC~7`? zezFWq%5wY)-uaQRuns+!95zJ52nn~@-Mw#Q|;hm?=$ID`ws&`D!wuucZ@pKB8L2U)Bm$)R0+;CSAAc#BW$IPk~? z)1PhE;~le&Sse^=dhiHf=`ZG6M@jnt?18L{#DmB-4_(tD$BzDG4^}*idrURJkgRC~ zJ)6COZXnESee`GyItkL59-f_5fq1N2pMYQj`oX5~XYKuw-zw3T*^ zYk|hRx><_OZ7HgK0_yC!2kTmm); z%~cf;nusoIadeXKLDPYVFGGr`G6A^I;Uz7myp>fqF_O#*X_jt=hwiMA_jt+Rli|f1 z`hTrQGKhvmQA7#f{p;W5uw~cKWvrMh3|ZEl{tZ znKf4JQAYz38e0<)4{U3>8)mUN_OD=Ly@3MW<| zrE2k!YIA-JyRB4idSdv)KuZpUbhzf1fH+m|)c|A&zyQvZsLTbC)F~iaPwK*4G}15U z@!G;UTPH{G5DINj!_q_+IPlN@)!($x!N(kQ>GWZbyyzi^rp<5MY7yV>3ziIUbC4cM zGauKHr40I|-@LT!eIVes|L5=Ci2vl#zdr-a-py6tqiJEe?gXHCUG7|oo)b?pbqYzC2EA1cAxB*cW$t+4 zo2|w3oW-%2+jfSC``LMi+FWD3Qk{lwY(L8RS2f`LqU_ja5U(AoL2-Zmi36TRmVF$5 z@U#s_H7rdS^(MJ7GsX&wr!!5q+>@7`d_fHfQlGHy2lzL0&KIy8ru$i@r56*J?*x1j z7AOG@22yHgS7+edXeOyV_hVRr8_dGC2gfexx)Fe*muUHbt2!$-FzB+tK*{Hi*W>t= z9e38oLU_#$dk8P67JGRR^YsncayZUVfz4otye|zF4C)vvBpN!!W~|=HiSuCg^qS^H z!LsgfrV9P-sTPraxz^AdF<^|9anIpVDe!wVe2?32-J7%{&Uz~!LRP%tk&9!_R->O6 z?T~rtI6_{r{B^F`x>qg~xa;41Gj=7`Bb9Gn0DDmQ8!w5Ebo0Fb2>T)NWn19!1dDOrBblJLEsXf+nJ*)3yw)xN$BHP@5LLhxPw*y;l30zTIUxnm&aUwld? z<~2(!hE!~=n~3xASA71HR=F zRwK>bd`Ob=cWG;QD!S{P~+GrZRE#7(vu&YMx~2;rJv(S*%w@i z!5aZ0@YAio8r2Csz(#uA!>~$ER4G4FeZ(aXiADxh?NU%d)c~^lm|vO)(uq~qn&WGi zzZ|tjfurgPo1bys9#ksD2R-xE#;P6u@xOWgC(k|vb~j&)ZJt+>pZ6^NW#L5t6PzRL zTmSl>zy7(!zxbCw%5LQoD=R_wK&(qa$+G(QSA}iZs5+~`(Z-PY>%V$A5~lWrx~1~QI5aW?&Ksw_pCyjTJRct0CSeDv+vwhI*>^dn z9ktLm`@|;!H-X9=d&D2GHtAgVt*Ox%-Z@SoMUjt;>_t1hK;|ol%q^mNf;t<{>5K+c zrvN#a8xw7>djfWGT<-S03xqQu1I*>5W)tRN=%LPQ&3?4t=q}D*(Ck};<$C;7?qiD0hmvJG2s~tp=1V`_Ve6&g#4u4O1rKK{dc&&y$4NP0S~cDsq?CJ zD`~mva2RkWQ)9j_vzWxK~+pHStp#~KOS~Bti37}5F?1z4$ZSq zKd3$PW_+LZH4aGY3 zUdGsZp^w|9`{*^x@;&Ce#|rOIGvdBYXityR1#2BzGT z=}}w6?%@8Cvl}SQ_H8{~K_Frp2Ze0F+Rani$WAN{$7ar6CUD+1>t!Dg&K$X3*5!wl z4uLtR5)m*f#o)T0lV;{l!}c5Gp#BK$)3u1rCz(q;{BZ2K5Pi1Qadowk4S>cfggPgS ziUnw>6k$J$s0-p!-?h8k@sJqweAX8(%7+Pw_S$dlNV_6m3{F#dr3rQ!AZn0=T){7X z8G7fuCwETLRc7k4m)n7~JuljMQw6<1X5eR2|!9 zlKzRf=^j%)Ym9^R^0{w-R|oij6n(7!po#h!1im^8CK11`lx8~W7-!sF z{LmN!z%Wf&8=8e*Y+diKMp5n;ZI?hI$lNq2!|paEnGi6Cfu(bVUeEvzHX5Sgi^AX& zk+>Xx+!$Px1-Y+Th}3`^9C{BB6!(p<2POwqyUSx6(%^7Xk^$>dh%Gf+rWVzKa$;&b zJDrl3#{7}TC3FV_ZH8fs>Vc)BG7d$WMn9Z*^+J{F&y5*iGBw;@ua?a|u|#g@gGWs{ zCHuk)vREq{%vF>ss)}P4J|1}Q)`u@Wt$8HW2@88U>B692QTsh#BKf7iv(zn&^|Oe0 zaeLBCjy|amlD!D*B~tJ*4-K!b(Nn@JL)M?j+1;SwkDAL zfSv%ohvqY{=L!U)qQ-}imb0C3=E=`I(rJlu*xhqC(kX%P5WKgysy7`~Zj`k@v&~^z`E*Mx<GL20dd&n_L|l&C!5FEhOxO;sdUEuFAx1?#pA=3|$2 zlK1tWtxmYs9e21l_JzWZ_&A=js~wxK_DH^Q5nVY|^I7{Q=dGwg3~?SJo{4F+zHoe5 zrM+zl-65c%IaJRbRtfEf>q8}?IN0VsV>~>a;x_ig2!}@z$rzh|BKE8;~|K`FUz4iNn-v&Pic2VFYuM1!uu7X7R$nQYu zn2tN+P*P{azdS6gIpXP2Q6A9u7A`n zPXmYf%Vn4zXrvnM<@}p|>345?p^XQ3)xf}YA?RWK>4#1J<^9QH9yvcNmR{B`6#Q?l z9?UzFhqFw^=eq>o+z5%Q3;yuMRPcJJ6UgXm;w2!Om^&(-WNOr&+eo-1b(o%CoB5Vw zl}Ca|J-Q-ra@6RYeBM99Ad+0htb99v1|`piNS{79KLvq`h+g-9k_n?^2IR3h3Grmy;MMnNxt!!?Tcr+ZTs*j%2)lB&2+&{alCnM8Fd+X?9`%tZZvZFKA3X@%p)@{Dv zN~$*(#wcz4gH`o%k*E4eIF)kIV6ozF%_c3$vaCD(@;N%07kM2b0n@QOF1fk4 za|f=D2B<+WW=krrT*-3^U66@AYC*R4S&TY|X9gHKK!=P&0(&Ffj|}E~?r=3|kuP>> zr^sg5VHRVT*%g zsi#3`?LZD#OH3zeOOV782H@B!)-aE=bvn+%zFM(RG2I9M)xrSsyNV#5L)kj$3&R(2&E`!S$ z-dj6=(&Oq$??xQ)O0&{r$T=6RPTxS~dDXJ9W>59~dSDT*#`-Bv5**p8HE@zq+O@ik zkO}96*2HfjDUx*f)Y3zN$BITL=ymiIB4yg7a5z|*@Hcd90K@`VPe1dvjzMd{m9l%p zc7HVaK@F0j;ma1TuV_|g>>j|jq)N+510cGL2zsgz;1sYS0LKc)`^J=d zT)v;?t5)2(nX-5-0fUAp$GGM)X3fEQZOok@vcIOs^2*S}pC2cM4onCWes40~*=tXo z{%26C!cNt#dMIj{s1KNy=)#7$L)N+#_?(#z0@Wfj(M6PK8Y>lm14XuNt-vXENXIOe z&=o5PAb^W-ZHSq5VOCqWtax8(@EiM8@8X28^-d`VQJP2^YHCalz*VyrZ^tw=Hnp7O zr=`Yq6;p6T6N1--W>Nsl8jtr%89 zaocse(IDn5>?>KQq4#u18(BQ%t;4CWG_Ew)P|lEdOSXk4_O>@xb|a1;5p*)g_Enot z_mX0wR|GdiDO>s8Ehxs8AxXWRxbZCOA<2&`Rwt;8Ms%dtMKKX0J- z2N|_kcXBbrrZHbyfvUh%ITO|_2ym&Y*=!)NCeKE%g>J>{k=dKhs@d$yiAB?W9ox_Z zXqt0X3eFc`;oVRB_}-4=@fZiNnaujPAIs#zn*!2PU|M_SCr#nzWOYl|$v_fcz7%Fbh zNbMQ`F*^wNrj-igT0G_=tNZ33{_|HB0zU&{U;kTw>vf0s!*@?e>6aF2L(3odeHMZX z6%={KNivjx5p^Os=kfnh-1q!!B$$J2%UK;B3K4VGr#-|*Fag~G( z6u=cLXDzdjc$Qjgy3P^BV{wGsJ+SA}deZ^!=INqux>y~xBw8hmQ73{eX4zx1DVma% zPp*1_QM1q|<1pc)qle_-%Jy>kEgMIkXovhFVHfg4HIW+fZCe4Be+>wnK=1ZDmk_zo z_s}-d>33qoX`@%~${2FqBgqtTp|dQ{9Vr;-sHkQ+2rfHD-P#iBe1UJ_Y&6~DD+s!T zG53#nCkj{Q>&8oCKnn@K%hQHmPGF~4R2QRmZ!<~Ahy9Rq$I~^Rd-{Tywpx96NBpsE z&@|5GxeFjo873K%sHoS`i$O94$e$K#mm)53mrzP%d z#emoL?u32KYOyITkTr&14G0-R3wj&a_=AR{-58idW_qbUh#eF^PB(j zUz~&;mp8u1Px<5htYC4;XB0W z+3-(yf24>(OZTJycF2kQ93TzuzT#hq{_dam9=p87O8#uM%Z>QJ6 z96%Z!>J%w3WExP*N=wg*S425$? zNBqhC6X6VR`G5UQ?{2~St$PsKf)%D7-@=SWb<9N)cG zm4B$UOK<$%uSbxThwSnqMPXbdY2axR`C^#m!M)aDmx6^B6 zoVRcna+*6WKes?o-`2fw?P~2;F;e~hlSt}0*Y+H`D7zc(Ho6NnVj39=>({(xyXhIS z4xa*MoWA|u4ju6}<)wlqXhHgn-=~(YUHP1$qoblJVNfhw<2jeu#p%z)7O(1(I}0r&FVb{+Oy|cCJW)pXywk z^N*8#Uv3};SkoPM4=PmvT6R0LH9_jofMj+F^Y*2^+@sq)S`J+su-n=u%e$OOd#`(P zdPccjjE)Z_a@I9%;UYA)v>q^ z40$j6CBCWG!BuANLqS*~ruvnZcuMjwisOl(1@wXyv31FrNj*3I!u2J`{JfYGFJ4?b zhbFxz+pndQWSXi%Z~vbR4rjiaFeFte$x7Z1@Hi_uZEG^joxanB_mB>LLw!BQdBkn0^o3t9DYFR=bU zf9d*9^MCsApXp;@1l%4np8*j!a6AspvyH69Ajn9xEWmDTPu9~aZ9tmd@i6eq5e)?F z*s~IS9T1Nl9^EK&z=}&15H_RR{jd3l#|#yRXZt8WQ?`k|4_)rXW_A(D3>Tf8FsBb# z>PpUH1%Oal13(rE$YXb6G<=|Y!gwPpO&z!hZIgsuI)Sk@la*7zb7bxBeI$nGY^Nx= zQ0){UVAb=BI(j4An84z8+Y+V7)+A>Sn~BPT2hDsl;^JAC(K1sYtQ0Nv5e*_S)#`(p-)&X_y_2O%mj;W+MAkW7@J?buv#^C>=N!Mj&sMKuG;US?bE7%m;6V#= ztUM1EE&-$kktX3Yx-%|Hp|zs=eJ$nF;b@sAaL z;WUz)KjS9}v(G);+;s<&m}{!wgXYfT(x=42%9S9;r|ssO;Ok4T20ujeWaNo!0u~*W z&38|{e~E(^2nj*!#lB)>2lov7M$lfdjK-oq>9yhijK8|590+L}+!64pos&+R$2&dm z(#8KjLHGf6OJW!Vnw4|a%{hnv{yDxJyI<#))RHXQl8ucq%K!s248z$yyAzf@Fl^xL z3@psSz>tgsmSrn;OKNq$4)68L@BKOdH|HGc*1c7?a_#rMrIZ@JWN5b22+uzyD$YMa z?5P@`P1BGvm*|E%$s7MpF8+ERsz*+%7vX?(zHYk=5#)h*S4A#_2ie?OKh?Ml)}DxuM@Nor zj~JX9b#mQtJ;NFJ@edcPk)zDo+3`oi`fykizM=1S-h4fBN`UwNPtgat-9`xt`3C9g z>;gLoRs9MAn^(VAPE}p?#pbWmQ5m8_N&3DP?7NcypaEl4GIkGU3&Xewx3yLeaP90r zd~Y3M=8`LO81c`3?G4fQxBf4``B{Fj8%*{l`P$&;*&jvDm9L^V@!NZhkd}}NXMWG$ zRenB%msWB}?54ZvuWk-FfD|7+ee`*gJKBBT%9o?KZ`PmA#gQw_jpn+#&u^12POS7` zb+l40Y0SKeZ6z|3Vigmh;n(5FY(W|JN!N@$Ln3?iK}yEf==50NBk9hCa#-pvdZ!~N z-FqG0tQIC0Y{Q8ecwJ-NSuAzsa^d2r$wl?P5duik3-e9aRR7Nlq?~_OE!Y@@YeFwPVoHM(dY-V+>DmKeP-Up}RM)5>8 zX~KQkOLa}S{K+(R9qMVa$2Lal=Ir#vEacM2u+neX$M?iV53x7(&ow5AOUY+UF*FYO zHXcuNOL*WD<9uyw@4!wl5j>ONB_?zQD|O5$=zw6aGJZ+lfp?F}czWyma?iun9lnx2 zrhuuR&^lN!8@TPbdFO&OMMzP~9cCuqPmoLEP7_?yt_H3W>EE0w> zWd*#dw$JM_wS9bR*_X2`9kca#v^@mISQkv9o;@exXac;1bm^MAPb@72w|mu!-AAVn zD-(Ra?mpnMYspBw%?+0jPBnL?B-PuCl5T^c57sBX>15L9lW2^th}&@*?I?MmIO{EW znB5liW_mNvpGoZwz@$!GZV>OXA+dKHnP))Fd*P^!D>fPjo63un<5oH76259uZq1+d ztE0!=kTY^Q<|#ng_M=zQ@>}!^9SrP@>ihB(=qZPQozgLz?l+BEU%+)hMCw2x?AJ{x z!M;F8fEg!FVCu24*Z1>9^er6F$!cXNs{1 zy*%dw_3)EKJuta^y?y?eJ3i!X^I@ufx<#ey?HPuRCTBEh>rY+GWSB1l%YZVA7qPh| z-RoV%jmGIjB3joJ_fYRSZ@7>Wcgl{P8rk<^S;(E$$jb>&{;W79b*qOj+$~fd&^NaV zh7e*6M0S`E(R0J(Kou7N`}Wd#K!Kki&H!%786?4nh2KIvAIW?05zvVW3)Ai15eoT3 zR6&O9tk=PmHl3GVc=#(sUw;~+jgmr)F`w2O*{{rbs?|Nn8O={~E6^ImTFqV=2_&3A z-1O(+nXS)ixBHiVxY){{V6WL9c>7C z@@cTJcIA@LhUd8rJaEw}-P6*Ot*e!M6TQ89Ytg%MGn)HAZ1yh$s!c#G7l*gzqs`E#FAW&_Omqvkl>cS}`TT;J^h@p4Gz1ouQ3WO z*lo?D5Tz+B6jC9B?jeUPoo~;%xnIr(^RcHPSGZR?O0TW}jUQ=uGi<5s0q4~K5QDPIGhS6+Um4@dX6F5m8 zEf}|Da!4+Avb5iICO~+!>HiinsacbHAmAafP-`s@!H(%tQn>zd?bss^LuIYb-K}X` z4&%i5`aKl+$-!4i@pobSE)FU&bM)#B_P_F6d06kCKhNIG&(7kywOn}lD|mpk#|<)J zTJC$+P$hz`3g%>2g1etMh(xI;AtAZ*Yz@8JioQ_Wuf(oL0#I~>#=JPVdU{xjqa)Cc z{DNBawS5Nq86ep<&gfUn;EG7fql)ToGyGBUk5cD<g(RZMvi>7u3@>PG)VwB|XNUCRFJ5O6;KRd{L` z;GN0ktBHkBpq$Nb7j|k}_XPCLZ*hrQ^KmlZx;XFuyzye$3RbE`+0}pXj7TrO-Dy1i zi|3DAWT<-4NGxjq;!pn**1CDzYd~4`rMmE%_dMEE@!&gmx}L{!Ke0T43dltQNlkdq z{G3DU&p7`dJpYe{E_*v5T>R?ntn1}Y3%UdPM|amhr=o4QFnXMas{{L(W(>7nJ>sbK zs?}L-)VbS~`qC8jLO~tiJ={5#Y%oBLxThKzbk3$n$bz%Gd$^2vn>U*B*)nIIl(7rT zH(3Kim9c>{-LqVK-KRgjJ+U1l(6YZQOW360kK7zo@tDyo)WY7m`lrM2>+$07lf+X6 zQUnlkTw&4;``M&=@Zl4MZAW{Mdu~MCK`#Vo-NubDA=t{;`lAtwkxm2EkzifRUdA@6 zIU}xDW)WtA@E1c+604{AHzq`Og5R8b17_&tUuK1XqhaWm&+(>xy#C6Of35qy!MKHE zg$5HIa6wvw)kPAlC~E(! zB}pc2D?=k5HzLHw5oYvTk>2*u7 z`!7ic`DbZZFrv0N-#C&Mr8*_KwdnT5xh51_O}q{`2}H^)ohY59xi=kz&AnG_WOkl# zSLWkPYNw#0eB&V1QBV*trwOO0ActM`;{mme?t0mU;F(@Sq2h6Xb4f-DZe>9|IHdaA z2wrL&A1$%Q%c;LiX5MTUTBK1Yi0jGo*~4p-pOEvb*Qn2rsvHp#nM4&kBSrjXjM*n@ z-eHwz>U{FDij+WXFi2Wp9^&SEP{sYg9dQb6?M+#Jv+nt>OT5|joV9ZI#Ln%_rR6b-{*fr z@sHS8uQXOdbO3Spb>^DSMfx!94d+_|FupI10J`}6F$<7@lOK|aVfU=H0vN|m2P3&a z2zQP=4K)ymwgxZyPIYn6>xv+wq*)J^k|PDK>uiIdXO@DQHqRjvv7gm}wf=RSZ14LT zj%?_*fB48Ze`&r^?84rWqIjM`TMjs{GiSfvzXW{Q?fP+}e$;R%Qcbnd(h8lqusQu| zyMTu?z^s8wPSn*|`7+}B4j_6pwhgq}8(QZa_m%m0AlOOqw_JW{JcP||b@oaaK*oDf z>(7&~Ze-=g1~{qBkL}Oa#5GTLofW5@MC-u-eFfErqNFZMtx@f0U ztNNh=$7U|@hVBHL;-dQiMRhtp`_{8P`pVV1X15LVZd)b{fG_LPI~h$H`rZr0>fE%(Q^8d$pY=c{ zfKDdM^YnPm9F`4d8-E^Mir3p)-Ve0qzhdSJb~7~aCdJ45n-c^$mA}fUc)NAQeWEL` z4Z}~bTd9MT{NtaulYC?LB`UmSOGpncaa^k(smcuF)r9(5*40H!v@9hhGllP_W>>Q$ zfUJ7DIg8<$H%x0cSohDw4d3E+?AZP4{jXg)JpZH}@tO|zEtt!Ilflg^EkeGmgL?6x z4{!ZztlL!in*g^Q)5{UJQ|8?LbtM$^d`*|c{&H2WGz;oq-*C#;WkS!^vu{3u{+Tnr z0-@Vm&cD+LmH{&xKBqq)m{p!h`ASOC_%X}M%CzBi_9vq)$gV|Z<1YHM zmxsfU=h=OTi}#m+R#x$VU|-HS5S--K9fn67=Y)q`LTO@;pH-Oo9bk z4o^LRHM7x(P*sL^B8-$to5ke;lZ#G06x-T5vgdR_T&E_rVF)<}=!MjM|5>PMw<}1T zvkk&Cf6nQ1JI2$_H7BG`@Rn_m02=drn7XVnkKAK>@ji6@eYv3fSE;K*))r7c%$GiX z{JEW(x{p0nXz|ipQy_K4-f|jP2i#$2j#>+9uC2|KvxT6j7ehO0dnIR~KGzawZ`nra zrvvmVDTQ73K0e>VzR37zBY_>5W?&+TM1YUL&ieQ;m_mqBU2^ge_bvy=w)n~y9OD2z zvH20)X5OaNJ?Y$gmr#!TXeB|oUr9rxg~9$g?)7@9iM%tAtKHS64U)&_b1|0;0=v_c zd%30wqZ#WSw!yR^K4)b@H#4r4^0WJ_#C0=$=m6>j;NCJ?I2MJ@?F>Xhz`;H|-|y_g zPSOSFP0gndi#HYhnaGa8Ho!UV?8}oJ5y0S`B7!PlOxr$8`QRB4teMAN_Z!X1V-SO_ z3F)>&)JeP(3xMEn|4F)?We|ig8G^Wf3HhdCTwe5(ZRh2&iBe1=-qAZp?71f%3 zYYu#RAm@isa%I{*XOWt7h(H}N2PA~Xx#4%o`;Nlxss0Syh{K)9Oii^Hhc`yVxDlRl zX5$=iCyQj$M=U}LFVg{TJZSZNqe&ONPOABUF+68H7Djpt?1B#|?6}rqoqEp$)ZG9n zbk~H9;`vD8h(L%j`wZw0-GFz#n$n4>>#i&GAG+{c(m9-)8+lh0t1&I;?kE!p%40`V z&5zKTIJBjGJR_=UY}zWNmi5sin2NYNI$dk?rn_~+JDa)%LIOmG{wtlh8W2%RdoJ3- zr;ZTNJzK?t#$!L#+4iFNSLK>(L$*(0H?A3;;ldc7oKQPLD}KZRTr*;$n=#$i7=q@z zg8NhA$YVHMr8#8z^qQoPG02AAgL)6K;TT+gk2sLK5O)bh_j!SGba`~^K+Rpr&q#RM z4L}0`xnBP20seJ+)Eg1(O#Eb5T3pL02LRFEeM-ib-LmdPN5e*{E1Yq_bPDbGDsz4+ z1+8eHken2ag4=Vc`pnyGtmTl3w;cV#*Sd3Ock~GIiEAOY;TWfnM&?p6^(e(?{03c7Z+QoU=37A8~ZVto%(-^)I zIT`=~!pbDZGZeTu^a4N+p~`x%Na2@$KAmviuA_aT)j8v@C#oR1C>W=Nqb$`!o=>F3 z{~DWIng(<+cCJoa1zK~}_!3R&XG#+;o%NyhuD`!>NQ2Z>#32X;en{+J=l*;2>BN5z z)w#TH)@awKHx}gQs$H989YbO2UMx)Uh;3r*e59{NF1BF&E5(Q29Uiu6H~Lp><&Wk1 z$L{#DFzP7Hj_Ws)(8S&1yQGQ*BhyD^jZXF!W#7kiKEc@ZU-HMdLe_~6Io#zw1ai>zD-xweF-&WmF zPuNNMO;y41lkC*v5Y{z(W=^wt8D9w7vyxYAjzBwhNG>dmkA~m4;`qW5L(fmtm6f`= zdMX~0bCWH#KEQ1;*08kNkwHam@}q4h86&_TN9|*Ns23>38sv#Cab2i5u-M4ahjv6d z8Z4}F!{G~Z6s`0HJ4bHB(V=3UZHsRQ^vBpAtvtKDDivGvb2i(kRUFMpzW~O!^7t#c zLX~Y{+Uf~iV}}`2`}sFkvAxyxo|@~NS78^+65)t>&tw0I*=?&iD?4w_k)$3J z+)~6*p7V8XR?+`FonQ-ynhJTrv%KxD=KbRTfYU)E5o;eFOGNUGNXS7{#TTxFpfUdU zJg z%B6=(>IMzIcwSsd_~|#+zrK+OWYz%@7}1kHz`n7806@a~G4`> zOMVk|{^+AuZ~of%Zr#;OMXqsW%?;fD;^2ObEHBC5`;SkCY^VB%;-O413()Rschkq= zx%)4U*K^$>=1ok;*e07$rcHU`nK+LiSqb=OEEN6~f0)?>1C7NCeQ%yfM~6(d*1ll| zU+H_0ZXsf(JV#DhTX9af+Dla3fZKD4s_Yf66h|iun=3%Q4(sfoB0#e?;FpPv1C}m0 zXNtitG_N06T8GE2t6kb8=gpPNM5ANb`VYzDosXZfaE$f}(`RYl(&amUE9K~pi^rSB zS_OVO;>L-ZFz^L|+EFs)oHpzDPT2b8yX6yiB zCq5_bqPxTx7VSG``}RQ|>#hp*A9WMSgzW899A2b1j7QonWx0R!7*5RJU7YXsUp~y1 zPQIA7T(Q?YTY#+2P}Q7UI<9;UZ7r-LB+;t#>rv;fISgK2pBQ5Ub>F0)`3Oe&FQZ;0 zmOC#`-Q@QX1m>pEN(}LVoDl%W?csdW>TT^;_HVsE@9DbxJqw^uX6S2@v#imHdTAV7 zpTue{ihlicTeHv{zMSkt3A$#4a4Za!vLYO;nNLG+p8U~g>{Vf<5S9y2F)2E?O>Mf= z3ba{DF2tbNiFN88qtF@!&Wb9&`dX7ai5^l>XXBybH3A(U@YVzxGd6(EOUEhu1~i*N zv_A5WwL`bGr8~L-lkX}&Y3d=9vGAU&odJ|>o*O20tg(PAH9|c*NhOk2POoP$Q`7rr z0SGHbbvE!?gM4B$zybAMgm^i28k}x3;krk5i9ll;qMX6b7Hw@W0}Ubqs(WMiC48sL zTxq^70UtSn8dGUigFs(mNPAtMj8Q*r8}q#-bD;}*$cRFZRIH@8zcEhAac%Aht0!l` zS+|Wuh`}cCo^a#r>8J|q%WOmiVUGBPU2VYVc0 znHoPExx5JCm>4@N8eQ4b_7nVgGD(^q9VC%_=v>|l_U(&db$5F1B2(hnsY9KAv%lC+qb5;t&gMqST{G#CeNevrjz&L%k*;yWysNG>NcRwD7({ z!HzG_pT6hvw9u356MyM2z2`WgrJDobf()To`tBalA_H{){K%BP;+VIb-evFm<@Kk+ z(C!D^CJ_(M_Ml@P@M)haSLV?CrCh&x2LRh=FTV_VZ9nVI8R^__jw6dj6Mwof{koq&-DW8iVmnp;-LXbS@-Z5F>Oe9xQtn z7!z-OTN>JF+Np7B_Yra;@vbjxJ*`hP`_!J zie^^#3*(4!eSjFW*Ho05f?KCDbp?Ce zdL^_&3sjJ7L6~E3YNW%5jEFxD3Sa-pMt#nOw>_Tw;q;lUZjHxz>-gdC+DWln&;0)KjbKWM`64k8 z0G|BeKl~nUAGdt`be2C5Eer@P`|ogbhz}u5T2yI0ZEN<09r+> zfeC1@$$o%6%i7HAP(3(xw!(yYJ`B(h9lz8*C>gUC{TC0^O3!_;~S;2-05^EPjCUD2lR<9ZUD9+>(Iki^vM-;bHD9>UEaBmVM3W4 zdZ_E<%c^xlrtt5e`A1xG?`)C^x;}+2sOs%h@S@%16N~2b06}61+NJhph5|rFMq3=< zHE9An>JxIv`7Gp>KZOG?Lt@&F6sjSwo&z{lux8G|#g(g(FLr1Cff&#jNDr3`Z8L-o z;$6J~%n*-GmP&CiMfHxvB|e*H?QPIZH>jZr0G{>I}4H1 zW7Gi+VXUt9T>_^`Kx#36iqB+!?EgD126R*Lv=EHChGdW7pp$Ls;Vj+ykUQ7upIyzd+KLwMlsf z^$F|xtJ9Y&964OpufAOz9O_&^k!_t?Kf<|sw7S)IS1|&R+x>K>Xbz+ zLkn1NIH-HwbA#IW6^&5T63GZYWw`SyZ4ck4*^KzZX)5X#`J>sn8+N+cQTHgxIqV_- zBV*V9N$}X->6SlHYwDMAngMs~+p_*a@`S6d_Ew)5xx&#jji)c-y?bbK#1L{BDIq1H znswK&iRQUDzY6P=(j)-5-A4RuLJX?FFQL)G$)8}J+aXGx!5%BD_Vdp(XI(Yo^tQi0 zR3iSF)dIh$PTgMopmT)npWPWB{(udpj#PB=y2%}yTg}Fw4W%hZtlo+iYkZ%p8}L0nQd;|tnZz; zRS6z4&|5nHVt5T(ItjkgK>uYO4l?2yaOp+Py}0#J9sVH99_Sv+AO4Py{A|4WlBsam zOwn;)%Hpy($v^<1jD)j9B(wX&(pwYvA$Xeon!5ViQ-%& z$rJe@RSdB#H>z#c``eLqn+vEPcMG-`{d$UGApWcXas;IwRa^5wr#ewwiwl+zd6AyK zCvg^bO2#Dh1esE|oeNJ%q~#RCd#c(VLc!Fo=ZItjv3)EN?RVu7XzHmt&DuxMQ*Yhb zukA-@azqhB^C(yxwfUf#`_)Lghq7l3Ge>`+YF{tiubsy9#9j>+7ryWWZk`-4Nz$q| z^WsMzpPI9H&2??W*SMQLUOO32qaT>5Mz=T_kQjW>5IB1&Z6J**!GxW?Gq{2E-L9VX z)eULMG!o!mC5>c6=D|@OzVxt#ek6&5*Au^7#34zoUbwMbfeO$o2R=;vEEo zcBVU;>*{YB)FYQ|*cX+@HV*mpwM-%S*wRy?`vO=2wxU!lgkN@ z|I>IQ6W6XJ`9wY8901e6`l(xIonol*6)1O}4+!V^@^6IQ*6)2u#-gAJ zIA+pu#kOV^z(Qwq{mF2816pzSS@+^X2b6|=l6c{MCcmt)SFW5rn=HAhgDs&{aJ*G$ ze~I9$jU#3R)>dv}baQ!6n3hk(n1eUeTjZ?1U)n`T;^btnX_SAr!U!*a)zstq6MOOO zm$L=3@$%zi^Zy(U{rTZX8*%Rg#J$=Dh%Mj3UwLEk)x`@F=@md&vLlT1f84w)W13@~ z7?1w!;l+MnACF_}=JvJmWVhHj zEoZyWo{(T1>^U6H*tJY~%ea%BJncn(h&+mT?XM%QmYpn(+EMTlPt@nySbH60dc1Bb z_~eZR*F{fJnfzHsyWJz(^k7o<$!jT(i%%at|I~)Z`=MHTLd+!mjy+Iv2IEF~kaB|; zyZ+`kJF&vm_+WSFS*B&M3b6gi^zF$hcjb*~-8&2V(V+s>-KnKX0(F(BehoOGe)8q4 zZ^Y~PNiX(ZFp|v7i}QxV0{JF1;99c-+icvC!pJXv%}njNp+sVS_G(HNK}tN;8>Az? z2on86IQZMK?$gPb(E?~C^hN~N^Yx9C&%We!)KaRW4a@{`IG1&3{-Ns;{^Vwi&ibwX zz-We2K@fb$IO=1_;g}D+^^4E{h5fLZ!((`Y@QJ*2zGQ}5V_GTV26%j+O*ouD)aGkb zm2-nHNAZ2cmeD*1j0(I z-2zl2Q;QZlSSXEwS?|;afY>!8bsh!M17A=XJg*51J@%{`ikL1-t&A?4bMCclxG?8~ zT$9*Z4A16TW3wdsmQ1;pm>`t_K6k2L+@@_u$0HNPqF-M}YKM)1+pC0VrCN&{%QyECiUDMCe#oF~!3A?G8xQOVh7srnWuG>vCD5)((u^oG|EO7IxXgRgo?p zT~~5#{i*IFr(Hc{b=V#ybELjlE}7|-yszaG_uEa+3Q(EcnW^oN)?ny_h2?stuAw!}*KDeYk}9jleC!-%D_A^cXYb?W2ep-x-3K)NclL4?71ZUP`(z zl;O;uAyR~VX((}F=YCYZ(yhHHR<3iK2lk#;G#3rDVIO(f&@VTSVwK_J50lqPGZ4Kb zabK1vp*tW=okvt~23!sVo^uoKII&}wnTf?EkqAy_yNb;dP`t(JSBa>H*zV}R&LYp# zaB$aB-L*wwQM$Fq_VK(o*D$J{3x zbHwQwT}42fUvBpP*gaY;HAl=eGYbe-%Nl0`5FlCr+g#C{2TneS7_Qoq)f@4)DupD2 zU=h5CH2{znvSD36BGDzJ?G!0HJ!_KyTc1LYu;<%T7#LBH^=PxR9Uy&?fjqvec6=wU zf@x+yyC9CL1lCg|r$s#nfLD$Egl$v;I=KEtjP-@bsEaK*6JqsBTdxS;X1SLLZ%ul0 z?%F|Dvr&CnoUH7)+pXshkA{P%ex*=2YqBJ5O_t@e$1h%te;6qtV|MGs1pmG`P%-RcwS#&}^d7dBxaZWe}}%@uIXgK$O3w^x7Ku=yg(zvnjJan3IhMAPBT z#2N1Gpc-r)!;@2u2fmI&gZ1z6KE0G+g(JM~?RF&kS`THuUp;#@Zf2NK$Ph4dVgS|W zGMroXR+lENSvTc$^)0*e&h%>F+0v1WZ7fjAk@Y<<%k zX5G(Ed&B_KJd0~cG_}>;ZZ~IZJ|P+ItaZXJc>0&!^6}xL|NJjM`UijTU;R%X4L<(A z$qy9n<)585zp#%lFd%tvJ@yD}xY-V{-yHG9cfY>$*I$1FyBoj$-+t!@Ox>~W+qtkl zD<$T%H*SQSzLno%Qz@qldh4~n_r?b|em}VRq1|x1lZE*Oa*H_<3%?wHUOWue3}CzQ z14}PyRRA%PZ!A*5wEH5w@+0Pt2UhH>iL+DpA&r(Wu*w~Ra1t<2J zEQdbqztH_X{0)1g%VinwbP9Rx^B$$BWcmHLQyd3TY?R#hj{$&tz^naqyK6`Bmt!=U zMx4@LOIos|t&>nb?ZKC?Xe-RzLi8;cO1js5OHBo4_6@zdhSN>DdU1U4#UJO+J|q#K zPnz&smo|{^a`Xy^3|Y~|DLIKMFIB^RwkYxF|5``#KmkvwO|$vygX zphs_zxehHpZ?g(85mJHt5GH8&Xx4glmesC)BNKEuRi;wwxOHt09wnU12y-Lqq?hL@ z4`;IWg6Y2ruw90J_0hi-Nn0?%+qvQUq6|y|DbJ9H0_ExAVNTbXIdKvMm}FsgA?Sj| z&a)%MiG@O|Qytu$c|bfh-AQy#CcJt&8jFL@47lx~1~zx_QNe@i1`r)~oz?j)$CJ>6 z)LafNp>Y~*7cWVH@nes-#R!hRJ>E>R97o2wRLd7KQ%$m;?ydpBsVB;LTLbfWh+@Y8 z!x1kFZc;ND&edGD=Vzc64Kl`%aP9dA4U6${9S0L4#_Go=2S>Cb3Tdi4&bf5GbztE* zt&T@vL9IX7^4)v}2VAqJAJ%xHBQDrdFHY=a?-Dl1I!MyMFUT3~K%6>|WdZxjS=M}_ zpnZJ|`1)^M@j_!u@mO1lv*JQvnggV+ zSCSmvZgP|K6C?B$8Iq7_!tANY>I)nD?9r?1geg2K zc2hc~k8F}LwxNgtPn)|UAfyNW=>1uAXA2C}Kv^pw<^@+4XcC|2X zGzJiljOL*?BDSm5rNbtgcg^~BV>0MCrh`$60PGFvW^H9raoChl#oKRY`R=$A==)<` z)&Q$gjvNwQSix_tjp*d!I+j|N%%nEg0mEN&*&zaIyY41ehMsZEFQ3!a!3kMe*Ea`nX0PhPyR2HUTOI~aWM{Nc-!7u%owbUDADcb-bt zZ(kn0$0r%VzHlp21^`74+glI&FFz?UI7l&y5Q=*1sU=+r7e$kGX+4yo#xBz<&DhtF3>ca=q+3}^GnEg^l<6q~&NW>Mm=IW0&_k6YT zLDyi-XB?D4u}L4S)oounhKNZXJ?>YJ`8<7^l|u$qYptICax>+99|>ZsOfXu2I{Yu} zK;B9ILZC)DQj(o^u>+pDynE^`*wWiK$7TTqpPj5)jMGCKDzsB2*alI}N;o*IHstxM=X9>7k;uxMAlrzi&PvnfmFY z&H$fq*N9+WyFqyuGR+aiD1m`(%5vQ&5z^~%a2So1N>nG$^c?ZVLQ}D@p#Ay;pEH!2 zL>*WCgRZh|U!M_@G4-abtSNF^deqTLr zb=ZErP=dYnUBayQ6rF*fN~3iIqMFa)K+7zLSCrqFI*5=i~^4oK_hhc4De0=)Z zh5V7MK#9b}x;VW4>UF>R^7y3E%m0TLKk4;%0Y^I;gPix?UR(@Y(8efo&A4IDWUdXl zl()SiVjFQB|9r0vRiEZ-d)4|M{_4;EOW{x(W}$T&NV}vUIN=C&P%p1H{k9r8-}A7w zK)>6r595|b-Rx}CejY%n}5J^#*@1Q=x_Ao5VLf$ZB+4Y{Mtch{&}TiJsGRuH__zKA;&Hd@FkOuKR25U^3Cr$4zSe@e#H7#%yuOx1Eag z@#16{&$I0wemqE2-$vZCKPl-{IL-u8*HdUb+>-l5x1IhTf6N+Sy;=_(3I{o1#{S12 zij$L$wPvX=Dz5@7qTujwLD$_f=XBIAS;;Ak^u?T*hztGQ?d=!ak1c|?H!H_R#FHbR zX%XKRhh4ekgyv5aBi4y$;r@$=;>L)@rq4>E)pS&Lu*)HHCP?WKJ1HEGk14W~+XQO> zC1DgtOc0n5g85DkcbSo@M{Wc!AhYNG(%L@MC?RV_EZnYk1lMQLk?hGoONM~xt72?vosy4G7mR@Tp_CK7m{UJB9#Co;gO`_Ha zm0)0`O(sM{Hkht!0sCPqV@LZ78wV+FrfVY$Hn?o5(7!1{=sJ*^8-W3 zUc&&uaO7klx}t(TcINPUpE6;UJRYXM<1GhYEJI-EkD;mbmMFoeThC!Th;i8X(QflC-F z7BpIEjN5et0-QuiQ2@J_&=OT=9)|`SWL=TPU1hAoHmrq>Ly@Sv($d@_rQ7pknO=1` zGRs`r(VTrEZ9?ABw8Wc60aoGw!;?+46Kxdp)N1(<;|v!=_CY zNY_ai2C7SS(nKiWu$t)BOv#4$bg!kK$v~BcYq&@A*NVapUz^H>nJeQiU1;`EpzrD| z-e_GL%#S=&`)%#Z&~iIFlnlSN+!m>8k;nKIRe?30A9ADqG z0o8P&npGj#l~yb)Ua7-`wVJ`Zg}c#h`h9RbD+$bll-MSEux&pu7Y(OC~G{ZYm2 z0e!-aI$mo(V6fRlz;YeKsn*AjXLWwRTjm(`=aYCZ6Pdb~UuJ{K<1bG3`{&OQePQ5Y z{p!z^(l9rT0+p`vrDA$v1M-t9JR zmJP8jv7mPyn-K`$9Dj^?qv1$1Y=dEmT7KChxc1-$Y5 z-7Vi+_KKBe)D;+h`9s(=A91o znv>gjF&~T{P7UJ2ucnxX;%J{Xr8h%5>57)r7UmKG4o1`!j5xX4cTBp+UHk2^Pa3J6 z)fUm`IV8R|ocTRQwEUZim%nl8>WvOn*8-Jye%9NmNbJBkynlreUyo`P8$9g zTq_nC2Q|sk$Yt0qUD2Z28mTwW#~gRjdX{MSc2p1MfUp=l`BK>OQe*ELOeh{Hv;t>?iOwVSxV$?gLWebM%?BH@HGt&w?Lw}4$U^_-3qZXwN4rI4n z%jw)wOx9Y_R}SO$m~c8=*Or2#c5|okRl~S>hYLphbTQoG%Z|12VcCkG;^3;|(gHiu zO#P(!vZgtlVwzhB-(8vsh|*?i$v<%%N2cYWk_c7d&V1GS;!%i5))jW(U(pM>W4pRC ztN3T00W{&p|N91L6*mppHJ*Td6>nuEu8kfv0N zGz~FxJLH{kALS2o`&D4P_w#a)K?9-RKIT8wRTi+dt5N@(?vvX3rT6}6q2UKvw^&Lod$5t1dhMEl7ox5veA$94UT9No%X_ne=V>Wxas z8ou@WfB&te#lQb={>2@4#DD2>bVaOH%(I_Ye*c}zWSD(+0@QDG{DRl06nD=KU+q=k zAcS(bozA=yc1oQPOEb=x+4Mo)q8bCS?F4^&xIo;x`Op6F|1DPU(`ew2y9dNCtw8W> zcnBocf1{xd5Ox_4es(DJ0A&!GaI>@9V1!)qW^1%~eX@Na9e>HdXg;^r0b;I% zxfs_rf@@#^$HB6Qf}>h&bFNcus69t*q2>2cerJr~KAYY&#=rlTB)6Gmojv5<)bF^nV(n|Q;2WLnh<^u7d;f&K=Mrl3aXg4HEucpZR zOL*5P0^_Ugh-72wNp6;`uCUiTJ#hDI>hOwgn-}Rf#uAf(t-=c0*q@N|GeY4k0N{*J z0Y8ZKM+EMypQL3+apVV5#Q?OZBTflzI9Q7wQ+*4#}cjd(k9RM>|mZy2O@5KEL0G&>8 z3iUadF9v2JZ?1S~0qsRxyi0p(t_UF>-zdH{Qef4e9D?`~F! zkcmfynIlT7b+2gaBpkhlpr{VrUI9psW`nY}<5a?4Lg?$RPC^DABGv#(OqhJT;JrW{Mz4%g)3)BWg%yohBCx>bfCP-aII*=>stCN-t%i6dU zX~gYI0lBJci*5GR60lw<@>!q_3^AVv#3wd>I3(@(40a&C8Fb+i7lUN;e37xJ6jcCga=69VfYWN+d^JGRZkGQPVs7sEV4qMzWie`Y*-8 zQX|~6)myQ5|GPI*_p6Ba^s5B*7eD-8d;2IDwS(sqG~Kt7N!M1bMQHP|{MzdqjlcERn*jTNeD=qG`X_(-8$Ib)(3i-3 z(Eo@|UC*!rYylJL3hd4{4AidTq=$_3=}Eb}v8eB$&VC_(&p%fXyA{|MMh1FhsA&s6 z?t*|_JOKifhD35@3@Mzt}UR3dnt*?dNdwtqJ>ItnY)7mh$ z3Vb>)Z_wzZbvS|N;qiC{CqSCrwl}fbO2nA%qg*IR+eegV!Jt2HRyzunMu3sW?d%P1 z3;bbXZ8cltJ^)+W5=6CrdBi|^6}>_EhYm}JoXZH-ballX(O4Npv~sT=1?bf&n2b@4 zvPWVcR>g45uS^v5JyibG5sb-w&f$bf%{O4xfpx?UXZ^NsI6c+ln>HokUwy?kH#l>2 zA1%Gzxq7gvq0gq5HYsl4q+s-^bt1GWJnl)1n9?nN>*JJhbp}nb$=*a=N_G>i8*@5R zyUJMEUyK_5gK941#9mnQ85yvhRY$De>`+YbT1 z*U4RGXNREb=*n1fVbkr`M?0elt6NL<;R}IXuUV((&FJ4)vRki5ZIb+X?g0JqUcoDS zO2Z?6WNqlI@t?T?ujot6Z-rGDjlc8F?~d*HPyUzxZR%e7_Ihey7Za#b-C~Ai0&PNc zt$4Ow53Tq|5{20oHyvzQJMZTJg=spxlv$b_Qta1SgC|`w6(%uG>$9zflnm<$k3Xyc z0RR9=L_t(_+`V$PeWczJh9a?k-Xs@qo*ft|rrEHGZ=!SED<05ljlVyX_3-&6_^;i2 z=S@5FpMH6EcS(Bp+uwDRtbhOHi)0(V^qv2qYo4Pkdw=?;m8UP8-}`28=RZFF%ac!~ z02&vh%@6*Y`Gt79QH}c{A?}tskqvsI=R6Q5Kp;8^sLP-HaM&IG2B zaCnKh^LgoUN|lWeI_&6BkRKVgFpmp>rdbI$r6c`m2W$a$!YL3#=d7!SU548ndjHYlokA!efjefw(H;=Qw9Rgef&WPV2kg*sQ%*kOHO}v%o$2&7z;UEN zE|Z548nsa)&g*9oyiyKXjuNx2HkUL|q7TTq!?e#p05N(xURfWtuz+ zO&Hsg7JuimIR4|V>Bz%LLFcxdhrT&h7Zw=lOL}odw*U zYr)(B6cswB`V@pX)ABDZG@gEybF^EB1!-ipZ_`19Y1)0zU-azh%n&<{uic^^=c_)6 z0GJi;;K2z4AYMnR;5}Fs=SMP?@n7tdseo_Ww#&7dh3T{qy5Y*JY5qWEh-|!);K>dI zC}UU6z&6RH^6V3iVESY4tT{pgkarZk&>M27x_|+`2vSpmMV*EgMq4jsG^)t_)wy-P z|B@ZR6$Oc;#lC6TavC7PMDz_CnX87r=%^Z(_u>f?agzZ@hk6?8}T9NtLxU%bu=&GP}Yyra8 zq8v8s)3FmFD+w>#5S%MW?xh@Z#wV1PA)y!EE262$1f_yHYgRFXZ4NY=o#HGohGVA9dV`-lvyV}GIJ*7y6Z$Ac3Kjd39`VXKNP1S0+ryy-oPH7j_aP(29+tS&*-OT zjTxq%%h$fur=MX@CZGqKvKxaRK2nn**2M?zxSKz6EwB@HI)E8xiK8N%&gqdom0rwd zhvbLq0P2##Kq*F;`*|Je$4rp+XQ&2SOJBIY%Gd%S`@No%ayce*m5b`wC^i>EQf>72 zzn1K~*H{&xrsk`opZ&p4cP1st4^VMvPzeDlNC@H-G$JambGRi?1Q2sC5<6#g0I*Uh zXhLv3Q&H=&u+u?VN7)(>Nh7s7$RP6rx#$R>e=koPr~4iuHTu;s(myhtED!vC{2}R zv;07Y;iut>rx~J~Dx0JT>SDzE+&^iFbB3J9(640>uDBA=bVA*d4wbC(h=GIS0W;0;g-B)r&vwJX{V~4dZ|C%}g z+l`BfD;#jTj%4i{Rp4`XJgPQ=*5$g#2G%ID`G(8!;4@we99?UC9Sjz18;h%>#Aw93 z)|STu0vA3~TcW?EeBt`s{>UQ)f!om}WVziATPV7WLD`GF@?ayJ+>8c!M=SoKm}7>8 z*0)j!HZ!X-5hOhqc}*vw;o$i^8I^?{;mX>0BCCVZ+^rj~FMY!?V9O*rA-^BuVHE5S za|Nz%r?}z$mRhh}jncXM`rP&*nCaY>eb&1^dFUA6-}7x8F#X{GDk{d?W&pq zNOTI6m%`oMzZ`V~%j93|OOXWj6=kvH$-NLI$pdz39>Clsg=4ERy zKt5SI$PB~%%VQaAAx3-!XltYLr+={<@qEMeJKw$HDhFKuDEFTSh5hKg{|0_tLAvgp z)1UvFfBI)v-d(j$zN>c7jm{0KQ<8vrqY40W?+2fMF@K#qeJm#D=>y*9Y4LEvadA#w z{K1c4`;mYQSE}TRKbX`b=;}tGS32^XB5COqk2p{H=_-4fzxn`s!m<<4Ft=FSXEj(g zKyEnY(nvox(hB})>+!_2RqTf?)oqNO(0In9bHN5VxK6B9yE)LADhUHG9j!I9sTFs% z?-S_gT(L6=KMtPVInI!mAOVIHyoa=q)RGFFmVfBkh{owC>D7qMTbCx91%Lke%a1q> zmq)#3Yb(6im**GfUcco^kZ~>MkbJV_O7&g zPk@`rscQl4#|GBHEMK`HfCfD{LcEFTaHq45QoV_DZ4oQO1*`}#BLF-=!@t{`RJCx1 zAp7IJYvFLqPJz|+Y&yS+X6+kTqYUpH{ow3#f{h^B2{3SuL&L@ORY>V)$k%E-0j4~a>XhjC~lduPwwki}( z)1&NAjz&8#T73BXTiT^E)EyEd4=+6e?%iPpqtTd5Xl2n|>4#P2=~_o0+WN^RT;jqG z#*Ib#g0kGOv+Hp4$5g|vQ+o=V5q|zh@?CP;oU4)KrUH}=_8!VRf3#3%?nuI?L1NI2);+1npGa(DlnOFpwl;tB~ zw(0kR42nL=Hq$om6xJXMkoN(|heH>REoyw0gshi?{Z;Sf@>0&Z~({j zaOdbe5aZEol4bpMaEOa|@PrxAYJ+LmQ3&MaT*S$p-1f9UYY8PGiHzXRQN87VS6y$m&ijO`^WIT?k8LQF>^9Lwd-Q-ARBLf@YoTO1u+?AMyht8%m+ zd}(-6v;K@W)D0MY_TBOLx#T&%(dCa8XLC|->TP->KFE>-EcNIVTBLeYB}EhrD!Obg z=05LBes^%b-~WXcb{VO@f4fVD{67RKmQrsz=||`0)PEfB(W2;K7cM3wd{(4z(7+AQ zhBw$q0N>j?l$#b;W5zzjm4J9V>HXD&%Tm8wcn~d!Izsi`jP&tmiEy-hChuLyj_aeX z+dedRz`4Fgp41i=+s8vy6RTb(wg8pq>VewI>F@Tjw)K2uV*=HIi=}pL!%WXT_GcF! zP|xp)T8HRv4&1`)h=M=y_g93BIU~>0Itzn0#^UpxC~`5C*4M_p0)&=S{c~b!b#Mfj za?svTU$8Hn=|*&{7sp9pbC2lxg!qBPZ?|v#p0(d`q^5Ja#TZwhs1ScOJ+wST`UGrwrY@vi ztT80^fRTP<@}Yh1Qt2c#e_c}xwINPL`eL{Y=V)@h(6Z7bbFw$dq*gOHGIU!}g)WKt zIL=xjw{JgJCb%RP+duov@J#9!gpYgq8_RdhUbmKf4KQPOB0v6ta$KLoUaQJl+%s}m z@wPmUHxj0OQD({)<(6H&LcRIb{@nbV=h~N5^!5VKU`D4iO#$z5gU=z#jw&QPw(9<^ zo>)!?%IMTHXLt|X-RaL{@-3EOVpVaI>KihNSvs=wTsdKQh z8bi*HqIvia`LI$`W7x8+G-=f;TyN> z_{z#(Je}U%4509V|J(`57ca;&w-qsh?RgI^potD<_(@B+IBBp{f5WpuOyBl2gq1*UrZF%0 zk3vZ7e9tvq*W&PkPb?cRuw?k;MD{snatVlC9;4Z*(?bC>ypszXMj`;0U{muMT@}|A zbdL!e=b??nNk8e}Pj-dyY!%%MvpaGQGuT<98Cpih%Asc~1vOC@fXE9KCWk^y099 zRIXK1Vj8A3YKBJ`+w-A)%NLkt_Tnd<0|6UFtWeKXM%iCEu6&@hjwA9Q3yQSI$LqS= z(Q)|U7TiN$lJL#6UMe=Hq@tnyygLl>W!tlg#JoA|6ajG*>8QkP9Cg}2D;!`Iah2L_2WKE>Uw4V0TgPcGEaZCr25572(A>zL_Q>^|!DV`B z!nMP`vWC8#lQ%}l>jVWatZPbHfI1YGgQP2GX2fuofON5u`zF|XVDj4hdOX@KblAXz zaPj9d9^YKGx(#F89A(1pioye=qoUeHTSr$Fn56nW0m8t@-F^HV27~?MJ`iZ9rLw7y z<&rcAxxE9*xHd=ThoQ#wX(BOar!LBEGBQ*X@_yYy?P3FlK-&*4aiM3>+QOuB8o$64 z%{6gbPFRF_$Lv~n{uL>$#1O$MkxGm!h+pEptLanHK%dp`Q zGm!9KjLN;^(lqj>>*TX>CJ5MVc*<|orYTJ&AAm!k5x*%uwy&<##$&qu;W3}ASI+i5 zq1W_By1FH(wZ7wMe>Q(t+wW7Xd5WIJ4gv1(yRV+8{gu4uMTq;YVCQJc6K*?s_CVKf zkA8g+>A5F0&M8rDs4yd)w3wu#7vZ6#hm$4#TonP!ba;VM=ai$pEe>$y0g%+n zc8fvP76ibB&pQAR!GHlT@W_0+JUR{lZXRiXZ+zpfFc8C`>uh6yEa8-41gg`(`s#;H z_m-qMmBUzrAf%($zxTaQf9Ln#<=NN&{69bW(I0GK$=%OCy=wcdW8v&dNX?p5y0JU< zEJT`e?2Y9|-otcq4L(!TcA$st?R^oV*DGl4@^Z*D(d<)f8G1AttRHbobbxXx9i(Jh zAyq|4L|I_kt2fv8V`yffyh%r;rqU*qeh|5#vKHwo96-8_UF@Fc#$CZ{8GV(Q+DW$v z6db^}^jSrw5{tH#MB+%ON#$9~Xa-$dM!98%-HYVrMei35CT-x3$erkb%=k;&*QRx^cOwcBK9bNtS5ST3-7GA6D!`%KWl;(ZF^tJ!CId{hjIOp4t0G%N9>g zcvnk3-U6fXg@~?$wFq8+F_=bYcf_^x)*!fnx){X-MQX3R12@IG%X^bp$9&IFa1I|* znCF^kK8GEnoqA>q8tJG-c72nT3!;QPY|cW-p~SnqyeoL;1po9V|BmJ=J!^O851zCKA-B%u=3g~X}YR^-j zOntUV3|b9e@=AHg-#_{%mTN2A>Xf*6S2Z1C{j7z3eZE_eCXU!Yxg7Z$|L`C2<7fZ! zqv!wTU;W?1tq7kiZLViFSN{QBJN<`y{}%kEmRg|}B7enuv*J?Xy$LULYCnd=KX zFCCe!3_RMs=<1Za-n&0PpN#v~cFrFn*d-hvC*Vkl3TcI>k~$b(!kTd6zDl!uVjW-z-GJ3vi)H1)_HLTU8zY{lz!fBTy-b5jpJpz+pWV7L^>%G z8l<336m99YW=nfxBhynJveT3tZxfFawG^77StN2cr?|5u8fusPIjhg0rE{||cMgeH z1HU$>odm7Q)842$owvT`SwAq@wJ^o&Q>;#{E&*pvKZOoq`&90rWtbvU)A-cho(|%( zXae(|?H^~IJUDX%GSldUYAG?WvnSt4TjNc)QrU-hS0JF*PO52it9LdBe&h7@$=<7T zRqL!7(P+wz*v^0Pzm*W@WcA1_>;ls`L7Cl2d$2!k~V|JIzApO(Bw@uV_fI9 zW-ObTm1eU@^7?3$6EYi>os(OD?P8byCV3J+@= zi*M7yp6}bS^QWp4moIo(9m=|Gt}iycQcaOJT~~8OyWuN%5S{R|bzv}7+hi;fklc%5 zy!_KIJ1da$*xg6M^K|LVzG6z}qzyyzjNLWxFX5jTXqVme=7X~8pn?aK0w^m`>m=x4 zs?>FQl#96e97bH7~;uAJHjFs7XnHJc5}BEU!E02l5cJHs*vv7P2UGaaOfE*Nlz3a3H8Q^mYPpG^~NT5^g1@W=o?Js7n%*o>FIYml(jL&VaKyH8VoU z4|1v1z;lOdxpPj;NIZ<6^RdaeGS94Zp$Dlp3I~=i1QCU_V9mZeY08TpYN*Y}Ek6!w zJ%5$)>%k5(!C;N^)gSbegmW>?^&E}d1$=#U$2L!SsbN-81=mi!Ce!G>a2t|b?8wt} z8sTNI3s`9M8u22pj3vz9s+;5Jh&U~9oxCVJlmwyc8zFYDJ#EW9yv~sulv>EzP@pSU z1J+ybYjflRwY?6q@KDV9!Hc64PnzqV43QW|D1!;W#@kwD)obgZnrMT) z=fczn3?Xsg7B60Bq!T`hjieH2;7wn2+wA?Oaly%4$jY?VyqC7#?JcD}S;te;w3~;- z)M14N^RcNAIP*rkC*V+i!|U{?vZe;~ zMwzuwg35&$;fW=PLD(B1fKF_{Z+?(@H{{OTjt9~d&ADC>!)&&DfHhHfrU2C>&g~qZ zn<1YWfcLV|6Wh_RfesG-*l3?G9=FSq%m1`cVXYp}OCTv6On>mb(CqxU%-60;JL;QD z*ND5{2RB|LWur#W@*7l%qXjMTM}G0=j>}m%D?8Vbs(p9OAbbxye0jW z+eEDI=*Q>^=@D0(Zb%Eif~u1b)W?```U z&3WQ8PYkaVaw}?2=BHiA&`LSWy{TONU;k=|36Imgxhq%RaiI%K8~@J_{)YeV;;*Ov z_I&Ku9ro1JQ$H>KxH&FJD&c)L2kKl8)}Omox2A?5Hx^3@ob56ls9Ux8m(-|I6FQDQ zICE8Lq3&t*+q}uUs2ekH!|LA9)$q2N-zw zjRsd^83l$$4*rI2OFMW|ujnV)zGGL^(7-1Bjmy}}_RB}Z-tJ5C!V$+-M%)gy=4*FD zV&!eZp5^o^1@Gu(o5FU_d$9?UY|m82Q#+I;!0{Nxo&mW*)I=5o?y4LqS$Vs&OI90i zrpcNWq@VF@KQ&Ep?zy=C^!B*zm<#vWpZN}VI!8YnKlm^QRmZ&Y?2mW%CfJukv-9J> z=vJ*uzxgw~ReO@GZ6i$WDiA4(hhfAS4%-r{KF zmC+l;&ts*-M;GMly9HMj)#u6AO6bwsX}TUIg~M^&>l=+&5aSF_#YS4#h9FN?-mw-wF->V!i%_vAde zqD@#BPwY|;oEg`ixU2d0^>V?K8L$^&x8_4jjq;PRG$cDFt~0MkZ3( z-2ywKAE$U~>LU(tnV&4zN7I9$xjzpnJz-G&^vg$U&-=a+b2AP?LNy9{gC1cmI3d}K zFrdWXiq)|$3dkt_pkjqIGtxjx{vg>L7qwb8-;*Sv@DSdf#=GqhVH1J~E4c(d;pKHU zT2=Pg*Z|j@NlQt}E}nunWK1#i6$qiOCWs!lt#Z#WR<{OjUDn(lG1m$&CM;V}s%QId zJTY8~EHYjsm^pHg^ZBTx4!u&z0T|s@e%WkketqiXobie)B0@2-KR=407}Hp`mMn06 zK*FMPue91Wkjlk&-S2Ue9fxjl-66M;Q^9&{o5fC%10_eL=kQ)CvGyw*fZS<8M*mEnjDH({rxal0O z-AVYr_IEID($=(?{JUm`hMC9}JAb6##AqTy4am*{~q- z3aqO&5ku{L{Gzh6s>s|>O#rD(WiXr4tf-cq0moT&K~)Ua_;6SI#vZ zlb_BrYbb=13sVqDK`#q~fyrzcP(8)FHGNrYo@SbT;w_sq%}-wV!1ho}Ii&-pcm>}m z6-G>q@%6e2)L*8WkF}UOQ_wwXvWyXYffkkWtm(xFl+Krt$alfh^MVh8{vr49<|}TD zNujYHsFhtdw$p~N(J`2@+gjq6%-@}FC!M$eYTs&?d!DYgzl^bNk!> z(Cexv+TFh-T*Fm;z{B&^lWNJSrT0Er2rqX!9}lK${HXly-S2b5|3S1qFObD(`o7B+ zUnmcL#5TW$f9*G}CRzjjrw>RYqDAMgWA9}ytS@fpY!UdK%WqC1@fCiOTiTU}3*|DH z_MrEMK5c?eo#wQ^rhZShynh#O+@n^5n-LYns_H=<8Ifkwc z#MuLFaQds5$bP~KVO?G@!`*4z%<|h)q^wzp;=G3n#pZ$E?o5gBJ}Lyqm>G~=Dtg0Z zD~&4?y6&kLKeyeTQ`-lZG4@5)H+}Cs_-HZ&Llk{FoBF_s_WCpeFXe%0l{xE=v6(RM zrK4wu+31~X1ABQ3|-#icJ(8yPELdqd&a-!W$PIz=<&CTv!0iB zOS11uLjKuqb&*p2%3QF?pVGZgGXePA#O!YfqVS_*LecViD7bn)iLHn4ka<(jUK%o9QrEhdRB6#XrZ_TP#@8K$t&5%ZR!^NVq9JZU% z+F;aFZc<0}rx{^2?i8F7Q2bl-bAb6f29@8S@#^af!?6`am|IGRs=D4>bV^y!PNa72 zBV3?Y@_|Z%2(_%R-eF~CJRN#SyMBjiLA1Z;8>_f9B57IULo@5cag*1Brqk}_RIld( z8dY0*fBINI*0q(dH7l=LrfKqgOG50!!7pESh`9rivQg1Dzuo`jmts)0@e0xnPZ7sF zYPjYI+@HWMj(*-$lbiIVeQ!3Mt$JPkb2)Adqo2a0obX_*2?kQ-tv!+di1yeRaiPWL^>Rh=ylpy_+u8{k{|Cu7Vz^0bNQUp zafgWER-Jv0pMW4O7!JRDjUGPmY^;@rojxQX`?P+>+%DyHeAL(~!E|nZr$l^jm)G_!v!s`KoIIIR$?n zu-&%X8LfPhFb8FHao%%2X$M)aCSn7Ay#y?DO=k&Aglb%fLzjC>-L0EJLyE& za`j9dqWp@5O?v|#aAP>^AKmY^0rhoe^2|HIVa74^8${RW=pKn2EOhE4)>>Gqzxs== z{#MlWSz;RL*e|KV;T6L_494SY!Vep)m$f%#%H>gS|D1#4)tE3t*4fvlH(wRca|*JWc_8}(Pe;Aigzr5D zM1R9d+D}WZInRbeS7TdVC(r;=x~vN_Ra79LIlx1&6XW{?E!AAtQTilZX9cFy@UMEH zSwOl@EMy!-^^+TWcw_N4ElTFp5;|$YO-1Hd6k(M)x1O+e6O2cjDl)6yRtCDRAKBE( z_<1~hGmo3U)^@ zyf(l7Z1Vlfh0c*Sv%%zvByUIps(z0VhxY68<3B81y17Zsl!$bJoB|i#cQV+vjd;a!vt$Y zU!%OzjnL}-eQ`#Z-HABjjwv<7x1+|b;nWa`K0O|N9Xo!WS7v|s+@82P?Ti4#8-Wiz zEIPl_&cE@#U+CbnC$&aNm3vAs88BtLCA{u*X?)Ksrn8j%#?SAh{$EM_wQGxQ>#MVa zmM|>@dHILM;1NHvN64`h2&M0z?G(N^Tw9$E^9g;Wf)*K@KB(1LmtU-PyUoI%e)?ID zAJpa#5yRb@?XkG0cvyvfps9|LC)1Wz$K4@!g_R6+ddPc{&)a9gI6KNIzv7?zQr%@4 zQfQE2>44JDJ-pC2l2;%^}%b zCbTgPmH_VhN~rjFx*lx*5+J=~kv{Y_sW=$es~s-V{qsqDr7##R`MarhL}?qd_wV?R zmxI?v_pO-Yf-?pkJniA8w?K-ZmVZ({5|axnl=e=YeI?cZuOF9wYwf1^&Ywq0(lzq> ztwmh#T}gfSw}WdF@2Y(!g4V0HxV1=17k`bsJcds11>HQzq!Q!=gwdXJefft_Jr%hI z)=#UUbMTDq?zZM>bKJZo-|R3D?DQiHxBD6AcEC!|KxVT+k(NE~x%9L1SAGPUcg6hf z0XMkv()FCm-7fmzpvc(=vT9rTGr$pzyk~+kjxCE}y5C4I*@+SO~|g5|;uJ-sCbdQa@8QW9tkWpRAx`k4+A8H=Z@ zD(hsSYKBXBf~)Pp#+k}ZGws1;8=3P?2zp^#?h=W`r+(Hl7N6&{Nk3S2flk6`15%2% zLql`~b%IuEh=|U3dR7u2*znHXCr{SJbFVv~J00F&ka(xjIJ{G0LzexD!8gIcHJN5p z@_01ae6BM%GvRn+Q`G=JGV_jbSUIfw6}nwm3{6+yw0n_ryqV4ev-+jVa&U7;^su#}Tql$w7$7cxZ%!OooO2xc|6pmL;=_wP{R%y*rU6ckJ zk77Z8s=^6N_Y7^Dh}h>@bkoh#qw9V6jymQIBwa*|3_WhUh9NyuL`&Ab%_FN&f?_?V z2IT6aF5b28IZ&%)8Q5F92pngbAd~C$6n)hbI2qm#kmrSQ>(G|J@o}+v?ez|PI-3|* zEPlU5roS(#GeQ63%*fhDk|;XCuB|Hwp-bb3#nxiYUS-Qe5b8HYCHOx%^~T5Pou#M& zPPRo`vRa*WsO~07l8^2vmpSF0Z4dph*Z=y3@OSTweeTo7Q8zw8f8F47P0j&evzun2 zHsrRw)X+YTU^k*yLBAiQCK4ZRQRi9fRrguHKri9mh4v{r-^N_G$SY)8U%;sAnN(Mzy0OJ=}u(#@zIB! zy~DUSbi=p<-g$|<3VZG1zyjb+Th%7uJK87=r~)*JKuOJJ8NQz%0SmbkajNBmNm$$T zw>jOEP>FMCXJwVpJoM|=|G_7z%HNqkcxU76(Y230Qr3y9-e-dLwdzY3Oz3#{UUH!K z^~pi4!5j~J7soW~ZMRJ?We(2DbhrluH)b(Ao*871Aj|o^eJNz?3A+}t;av093A3yS zExdaUbpZHhyhc?-CcHps4zf%@FJHJ6%cZ3Cck`qAWa4ECBnmf=Hr+R$NnLpM?|TLY ze9XS!#Wrc2Li$QtS||tA)wz?=J;7QFcN{lY;t2Nng%?iZsXk?l38vOk_mxRMJbG>F zq6BLl5tFY%k6cZZ|L2&pi7lvf*?EJZaq9bIhCfpt5#26hI1v zX@`qcVH7s^%>yC+sIp``wVn`=r=tP;$$|(~BpR$(7HidRe%>AfQ)9e5z9L!n#=VN& zdLdspOyk)Ad#)z+S`+nFnH`yFDW5?n9qDX;I>1%x85E`)7#Ou(;f18r1}j@$=Q2NA zwVw@o#*ovJ&Qm!gVby0CVYibq=bSm4?&m7%xO!Tfzxqkb!>z#%oW0)d+Frlm_%I_k zrH+4iR&)TEwW#@l3k205Wv{(WRHfsyc+pHXMcTp?*o#s zJ?;5yQ`SZ;KeKKx$xK?U&{=8oIIhKC`()}wyJik)2#Vb zX0kRct8;afr=_t!hJ#$3D}k;E5gwJ|eXyz+D7DKjljK%wN}t((HgA8}(IS9G5w_{J zl=C64>CnM1)XUYWFiZ*G&r=I&W^Y1V>!4@PAkl(vI2OS)5BJBVnqPV0j(fx3l(7p1 z=J{Tqu909DC(w5m5O0jA_NHMN@Qnji(A+Uf7hf; zWi&K0pWoRfhqOO-{;6>4Z?=tmuf|O(I_)^ta@JtLMTh=NavJ4AXbb_|g};d~5FWzhzJUMeV-S7egC6&tLuBSl~ESxfJ25 z9a1yqn8;0QbZpMRuIbWY!neN8%W9QN$Eb_Rq+y!4pEWCR7=6(=W9YFP#b3VU+j)rg zV~T)d!0m_{ZJuGN5(`Et95-*=69`MgPtP1X&GuZF8xA zPtF*ouVC-9_-a~8Uae2%;en|ivyf`skOy?fJpgC^S_^i5{>DMJwFCx@k|Ot-Glo|V zXR71PVcj_WjCcRgnqaEVdHuEBeed-(cJQ+D3OM~4fBD9og5>_*vo80Niy;6y{xrBM z=s)AhwdSh)tZXxvhQWo)nI^I2doEx+eXIm8Oh>#kmcgsPbo}w(&c*LI=yiivSqrZ@ znnH7qvK{wpH*F?-Nm&O);{}wbmiyqy1knFIFU|Yi&~%!J*%X+Xj4WXOq0woGbVu&hg~!zUP9$p2i%rPi!EO0zY;P~8 zoA!cEGp$nJ)-spe(Rb6u<7A%v&MtfV&=?}eM7FTerWK~@{NdKOMqQW6eH*4fr#63t z^?~A|jGo;OU@UA3L_f?ud3DzF_doTAcgja)<^CSzwM)A^1I+m&i_@boq$(NOH^PQ! zgojkgBX;W~AWRLkB6WbF47n=6$9kVyPC?QLTGCJ-p*Ne$@3%`Npv^mdE15k8U|jb@ z^`^&^{9EGns9-gD2RYG+vtM0qw$TA2M<42b%HEW-KW6;b>oh#Y7(9b(MBYryE$r)w z11dY;Rfg1&vg7RxPPqp}?$7V{XeUy~VtVNvTwd=ypiMpy@C~4B5Zhh$g3+(XuXX z*ML=GKA9yx$UlOr7Tx~82gf(2y9XnVM41PN20Fr)#d=MFh>7S$`o-Ga(}}^T;}dfq zmUZ9JRM{uHHdShAJ5@GaRO!lod^BA*S{L%fS^v~3U1a5~KgUM^R0SDeb^o`EHxPGhq0+VG#6X8KFM{N|%8dTQgOc{Q+hGC!ECPT=P8 zql=9P^!y9AyPB8e1GjZf>g=cW?iTTOJcujro8{dED@+%&<;C18=?ldU3N&$k( z$h~vUgq)r&jYFXhNVt%A!}B66hWbe!cPJ>?MP$}1{9hqc9!PGF;d^V zjfZEncolN9we*;##cVmq5@2bpp2X;;;EP#>$6Dm(ij=Zq+QHrX2TW^j;`6n~756h) zgvfE`2{3ab(RfSTWwaS&wLeFJvbLZVAeTf9a4t!nIa>_QVg{`05F?m7app`PEImKn zMA&mjrV@j-&*$QAk^837Gy4y>qyWo^hH)<4ruFkd{Q>Y>TBrO(l#sSfuwlohg!VK@ z7wmKO{D+>jU)H=8*FIt)77ON| z>p@n`=6oVCL;_KDzW>Hs$Sec8>~oM{X0v)&P*lz+(<#t$P3Rl1->NqF3AP1En3w z)Jv}XX*GIzwYV(7fUQE5i_EYkXNNAT`{&cN-g767{Z6fO=(Si#26_0P$U{W6bD{#1Rch`t=lytlc}kd@Mnyfo5Ngnp@^7 z)m})EM_QqTO%(=BW5}m)d(zz0azg6`MW`M(MLol= zPjkGxfPX;P?qhc}e)*?I{Pjp(^-nBlqjbQQ+B^UUsXf|_#J zM1~<7$Ic83o>`ecGr*rMZYK%jsax@kEF)EON^NbSVciJmiUBLv%Ai3h`C6T$W00>g zIp8W5g=)CUgTWFE_Dub84fpGK_B}B~0kPd}q6@k-u$zTC9bbjAZ80d(B0BuvZ++O` z{26{B!2Y-{do5EokCpCWU|l17@oH{m?WXhd^YJwJ2mk*6{KAFjdqnAv|NPI0j0jiBEqzV?O^KmBXkG5d*LfBIK`XFm0@c|Dlf z-gk!P#7L%BT+k0kh7r@al#gv{1#F^i(;s9M-mX2$?DGe=UZ1;uX)<)@&HZda+;^qs zam;jjEq<_+LtVdVD7b;fr(%_Fwi@$wgx&jK&#zslUswulQe3i8#uyl z8;EC{-D7vH!a(QN%8#B>lBI5Fl%cc}C8WppN#7W@hDvzN z4mLL52&vbT-EThF-qO|=k$N>e6E#hQH&D!3D;v3Pmt7z2zqtlWKWIu0mw$L-%lWha z!tmv8tj>%&LYm44Oai~nu`IHCqMPWXAfJ=Y9hKi(zwo?a(mj5#k@d=mbsg?&jUnA& z+gKB^Fps4IiSWcBnozZKq0@cTGm@ za*DYCLOevIWfI%B^2+^27jL=Zq&D;0=U(!M`~T*R0xv4Yv4Rh=th~W%-kGtY8^;sC z>I9QR3tdnJTRCAO4Be3VxN!vUnbd1?sevqvo87dC`j(F4M|1PmdIpJ28Wube=@jA} z*Nqnuxcr_V1}$X5nse7d%;jnoO84{uTE)1wDK6G~c9>*JhQde{MnU7kMIyd8+s?jUQG{O zc~Q63QvILi^Idj3l1=GhAT^d6HCmcTsoKOs?SYDzPkiVLt}l>icBk$5;)|~zWxiM4 z`M%lsqEQg)mvMYsQ-{sEx~8N~=O)cIEl|67hMkN?K3v`blhcrD$)Y9LuH30eq?N-5Rl$ z`T;kvY}ybXw}j~}98u$@>~pvymn;vNuC*WPv{S-rAhzix*6~<%yJ_)mNrQ@ruMs)M zMfJNrT_N_z?HdW>4#8t+K?9KqaZs77R*nDxXktPQnB18z=^hrG=;ckZpiNZh{Bi@` zSZ>ix6=CPyHJP9ei^f$JDWegVKJFaSc9UGRK=#6W}%P8Bpn(EwaWV zqsneiQOAt0*z3DrF~M(=h$-Gd97~Gx3~NJpl&lXcT4UC1$f0o_XE3d=+8B4+Y*i=s z?Fa6*!6DO!J!wU;57(@7XM0;FfEC!bD|Z>1N*au`D{b3$9eeA1A9f-%H6Yr<48yJk z^xmUki}WavLWk7MBdzZ&hQ_Zd+OEYCKIorKR~7wAM>>r~M#jTyt&qeuEyF><4hvmN zAId@H5Mdmc3gOCZEs$X1UQXy&&sR@3j2Pb2CE8eE8(Ub?#Up;(wIop0JY=(cLO=54nSLP_cc(F&D@mN`CVX5y-P zS~=+mq8}<%+B0UR6(K!>5nMGcj{PxIIjRm?h^QUEd;OKUU;mwdn!Nut#ap)8Pn)OL zBb<=+rHt%EK$ zo0$!EZ6VkS)ymRfqEG%jXSy<`&ekgzlz;sD|L(1~Z@GrYRha^9$r=r`iftRNHOCSV zfo}@nCAguv2oEd`8anTp(=w+r5KzpZ&iX+Cig4rzvTMxygFf`JE-npev)cXZzx=^B z@^}A(XHpV6=@nr+@sfFZ!3a z?mnKghw9~)5scci7Z#4=hSjlg{`$gI+d~cuSi!kWa%;d|lwK~uDgSGsXDvhKE_0_& z1kbXtYcLrKH`r4e%q!~xzV$*8OAgt)C)_JEU7c!{I`GpGNDaGKKbQeylilWZNQM? zLFv|8))6*0Y!-by$d1dQp>*|EuN#`hzZt8;_cr}(bfEnpz5gG+zX$Er*k%1oQ>IV8 z`n8D*^X`+eJWg{JPkCYA&#q^moQw^;IE>9iGn+X)>2RVr^zXj)(^0yD?Ms%K;JJTj zyU4$?HMspB{^;=+E-FM%3{77{E&fm0f(&M~Sv!zm@3Er+wuNf;o^1CG5WX$OoYu!q zN3RF=Y}vp5i@y|~aQGy$I?>mtm ze=&j1H=FW0M7>w2SfZ{NiH@P|`>sp#>PRY7s(4}1bWyOYoskL@+tOfMI65#?q<)QVdbvhP}HNcV&DthGEYWe*zaGZeX9mpzw4F|mzX zpQTpp9;yvfdRt?Npm`BO`4(F?6S3U58DRsX^0;ElYelHT)10f$t?C-O%WhuNCwaEqYm*nz;9=@L&ByPM=zPsn zUiDj#9g}6j>U(s*0y1~Bgjrj2ZdF?^^2Xi16K46IDN2B5H(Xurwz(brzTp#JyL7NA zl1wAr=u>r{&!y4}?Q%{VTKzsSB7t{2!cu$X4VYwf#2?OX!Qiv zvSEbcfT&UN&bt-du!s&Gm6rk(nkmQ&W zi|p~r>DsMGaYjItA!kNK!PY6H@pFo;F1f^s*0`%$grT}V9+jE2XkD>DeGNWN4Q>wV zQkbKOR`Hb1Q~4S;sr>pbS^sw2%^P^Y+n$R(BSa zbvNCvgiK1yX2c8quE8R;(x1U+zOVJK0Qmt?oyZvyh=O2lJjxhVzb5=lv>_Lp>xn|G zcxm7e7;)IKft~@fP}DS5eV8v!IV`=(Sn%5Cw|mYDpQ}$lJV+#64F{G;)h0pEK$Vly zV9rC7%LUCr0gf!ED~#p^ZjGw&USP>CHNolFV7***-ZeFdrhNo>27yDan$f`!)wm)ArnH$rIw>tgmZCsC5#3P~p5|F%9uv+FOE7 zeB&3ausAX>&GGx8#7qUptN<3GN6e-pd0Dm|EmtiEuo0zon3p><5vDKTSl8_3cTJGB zg077{RS?hEp7Z_A)tJe)m}uKaMPo_+E-{FiW>y(nzFN>Rhp3h+i2b5ZO;!UH%RAS) zukraGGzA_PiMNjL;ABiBalPH1V$T&W4_-cwl_QqS;LP~UZ+-HUle%#BB1Ps80mZd# z!tAJBC{+uT88L>dns&`KlRrJ(M+WxZ_-bFNug5Yu+K3;PtS+D0iM8DK7tE9W{lU21 zJN0|4jA9y!B^Qdt8)jm$3yqcy@?rh((!s_L|Jnck64xG+M!Keq00;nQ24n&78B9f$ z$6%m<5C9F+l86-%wWy&WB0QgFs%9$TEl`J+2xTvNP$hK)&`Y3wsL@S^+m~z0*f=hl z&4IDOE-L8NaDdi^(-yIwSX)OyWem)7Nu{j02AL4Gc^`~)a*II?bQd#<4msGh(Bub* zos2cC%F|NcKrux9y&Bx=Jq#HwX-IM7#_6a0NA}RP?HITZz?XlgTj_l(UithB=O5>5 zOUJG|&CQT}a_i+Oe)k@iZEgSF+rQh`fBv&w!_CIcwd(-AAg2fYqYs;7Sk=A^*Wa!V z#+3>}hLyD9eNV6RrgMeg`jhhgy>Bk$Uf6_upSG-FvZc5h;);nKX2x;!qV{|$;Eo(x9y34bR$+ZYtw(%E97gXd0@8<^_cI9 zG1fr_^g(X4Zcg(98(j^IY=&t?xt(je!f-q+HZq5K6S({1-0`Em1&j0eK0GW8+Qn^R zooK_O8OZqIqbC^;aoK!6A4JXM)8VWC%>UdkfQ}B#5Xh2KA+;kR0D~Y<)k|X#FkB?f z0}V)7Um#7QwM-W09ysX)B*G&pa9r4JC}!`MN%pt1vT_S)Lr@7Usm5uR88ZNL_fBlS=$@v`$=2OUmu zuFOf*GhHzn!Ut%;1J{o4{V>;dA1K)`xro`K$S?GdN^VxW&W^r2vM#%nEo!k)WHyGO+x;B2bju$ux+Ie4| z`3y~5YfwL(=GHtfg`Y9ZeWSbw6^6zgdQpo>fdlnwcq-3;7P;5{h*Ijo*XG;2VsCjz zlfdxE8EWU6F;lTTo}NLWimyJ4sl$O(TV7tP7>4@5T+&$0V>E3U9n0CBc67edJIxdl z@}V*8_U%{d=jP5E-PP$UllO=^i#92rrKX-mTiT235{kVf1bV%P+onn9v+$gGVX?{; zm@Yc8Vk9z5qAcRGq>-!OAa-{2IR2yEfF(J`D0r~q58Ixflei{^8+6g0#HHY{=aqEix#zrhJ~)xB?k7U; z0^Bc|JhU~-Tj49+^nNQjcWSX==XalZ7?OR0kqBqser&9dy5umyMJ6p3w~7m7TVgz} z5nsi;aHEXFpWeaBo<|N}crd6Fy$z!k*Ctc4J{#W4b*&X@fT!L)g+xQXdhGDt94E2_ zxmXSiI4W_8#m8N(U+JPpB#&n?^izNkBzH?t;c1EK1&a%UtTiE!t2OlA`V{d@4zJjxWNeaX z_oVmF+CGV^AfRICF-@H^uw}~-R;Oi)Om)u$G&6D&_L{j61i@TLUo2w@gCJqTwghS{>?LgxkN@fCYz*w)3Hp*uf@ywtVA#Fzys9)0^ zB23W$HX5~hLqmZ*SV_h;tGhNfxwu57Orgpgc7Ckxw_T$qH#QM6+FEnD5*_-JPY3UK z1mu8kPUguzR41k`Idn>!RgSXWHD7KAHWAgH$~Y`TwAUdNvk`7>Ixm7ze2qxZ8#Jr4MyD$0tV33|doq=F^w1Rd!Xu+PYR>RrmmhG^MoQT}M?8~0lU#+?Q1?@SmngV2 zm<8rz)Efn~fLm8myBoM}m7R)zceZWhFKI+;xz>u;Wr}L;cu=B>xaCySV(WVwli9#g zpy$2kZjIZ-5`L2dY!7!!;f>&J(^kcIL+$u*^8; zh2sxzs_l5#^QitbgdCOIsPV|-B_8DtW2u$;#d^>!wL6^?wZ0m69BV<wMGNj z0o6c}LqnYPn)H-YLA!vX$<`<>tH^Z>wu!bv;bFKk5kzMAxyhKIk4$4^LSq2p0ja(g zV5eU*kYuI&{Um_l#N{OXpk=dpS_ylBe%!EzKGQJuNMNT8Q388hoi@Ms!Pp&BO=WtO zWtAb}#e$F|t$1TbANrH**jc|iJ)Ju#h6T0MH)P$0Qq7nAINN~n(zkDc9vtvvhi{BS z&{ORB=$FUZX#g?st2?=&nfME=w-i=-A44ojpd@Av0rd6{ZwEE(IyT7+WV&6y^~=9M z{Quo#y@_!Tch;3E$1E_8HYZnlul~lxd|}{US!^Em&EB(Cz7b{dL80sR^>c&N!{Vi0<3|_IC{xtKa;zKw z$Ait|x3TV(*On)w`OIFSW0+=EXXyBo@Z$Jx&EbK_B8Smgxi=*z$9II?XZ+U27`(n; zI%OT?N8ijG{wx`5j*2cLXV(e!xcR~MM#B?fCyCDO=fJB!ezf@`^RZ>`lr90H`bXQx zAO805eBB8aB){m(TEt$qJkAnZtLezC&VeR@bni*$6z(}h`#=4yUz6CdVGP1hT5Cp# zX^2!{MH&n#9}D#XK>!~Cp;Z&*+5q!b4j^3sI0(2^rml}WHCi?R3kFdlNMj)80Kfu9 zKwBG3aK?iiQ7U8Fkpj-MXNQMKRa$&=4R zlepA>;20Wn7d1^nX@!SmBIa zTvWY9u2c3_O-BoXk+>T@I~_ytkA6;7+CO{8Wr0L^R=5y8*WvJ0$e;op36-2Dtal1` z_ZO0;jJW3X+&dXk4$$J6jy6wbBVwyFoc za>ex8Gr#}S_y05p{lMs{Xj#srX6wSjx`XLO!LT$Q&$>cSi-QFSxlQT77QD*#_s70@ zbGt#IR&mbyv|VF&s2_2eaOi162XhqYW3gVT0?f$yVW>7|Q_2-29|HHjHX-EoEPik|b0aG8I@Mw^dfZtor!3=A@doE*?e zhGlqm`;0bw0$E$t8gB$Jq1&T#)h4$h4SN0Zj3-pKyUUek>9oLhO#bOmE=8NB>2d!3 zyd72!Q$zS!r`cus<}T$DW; zdv!`0CZa>uzI~(O-SJxDsiY^&sU6BO=w?ULUgV<+2uGlAoi-!gd}D%iq@e*y4m|lr zhmcKaqdG6qFs+?tTiTezHlJtQtTY&F zNEM32m}M-r-ce(fvOnU~>lcKNhJ90&;cy)$!?4_3(-w1+r`ve@A`#158#djxOQU|s zr_@gez31+)bWzJzX<(0PM@CH661|2A824c`YkX$lA4bDSNaQJXB&)}fB z6Ym70Nb^7$%uIWW<5DqibIFN)zydY(EHeFAm@+JMBlDcT-K%2qtOg%D`>s)oLSWb! zaA6HczS%VN{KSlg{3zq|2Zu^ot>(oSjPHl6VPLX4D=^5L*Isd%h2~~96HB=6H!0b1 z*J#tFb{HN$mG=#67q!E7Kz4Ynjn4S_f>ij5sr3LhnCAwRfn==B(=GRQYGZmD_gptc zEQ4NWChB(bt=!1fGhuk#LQ#T4g$70)HSc3`gvS+@zEpJg(|yn7iYhZY>&5BXE32+Y zePr$6!`l(u5{DX}N}oG9ZFm^dz+{xFD~pk{jJw#a`K>XD9S>JExjt?-0RU8wQX@%9 zT`q}n;4h^boUBR)6&na9*(LSn4EhmcWbi@*f)LrS29zdkvx#CO974JWhM}uOLi(@? zilci`JkjOL-}^7QuoZqItxVh9Gc-aEZcxEzm+fG1VI}>@^HHy@k=u&v!PJ zce0s6?`N-HoS%64^8ev-SuxAuXa}{D9CrcE-F<^RuL(t|U;DwW;taSg+b?@3Xe3Rh zhmwb&X%es(pHAb>n#BIAJ)8o6`Q1`^Xd_ypGfx&JCz z?Vi5-F%lfP>Xlm0&Zu0-gk|ciQ6f!WGQRTKDrM?|)_E}w?tSlXzH#zbjl-@`JIsOM z>OZ^m%Jdf{z8~{67qw?SpSQ!}MVhD_(C+ph{Pd}?+fQ)ke{uSiPdzVWF_y_RszK8% z4B#SG_elz%xE^4PSx^h*oFF&^5;@=jZSR+Iv^M}Z8o=%W0JNHt)kevn(+4KY5CV;H z&J9<0z-;yOdZmIa!O}eemE1Z~PuovGyWvQ{%igK3RI!$2Xx& zNdU(=-&}a-yz528U?RlhLxZMtlHncinPD!#bcnDqP3GG_vnOJ@pFg1?_9nN2J)7&t zlaF+msYIzvRS8%6a!Ku9>?}IY<;p02ZnI)zZJj9l_{?PV@6{A_CQc@7&C~`*AvjHY zy2|Y?680+(Q<5=JaV#$@wougIp|xa9#>Q6-lSM-&LUDs4HTxUAlkJJ0y!$tnXT8^b zkzDp7z~6=LnruNicJ(Vk-Y)h&_}~=YeDoK*7B)@y92QAV%owI&a{Rm-dYa#g{?cc2 zqkOdM3zhHPs$KWi-|u3SXo}R1VKaW(sn%JD;=|*B8H)?{Fbt5-`euq24xOTFJ57)6 z{!g}vVNKD)R-b);Rvjvg`QjPW34~;=(JP0W-2EK0lbxA#HZed6qd0 zWrrtk{OG>y$(m~#HuXZpPF{|Bh>%OPn{<6+*d`dX46Ex-Y3e@xgO}RtZ?+%5`<;6b z>c5OfrvKX~`S%}YNyGXLu6dG#9>cUNFi@R_iIZ}>AL^QF#wO?V4EFg>mqKPGnTdtr zI_S0%&AvDo3E4TPedslv74?f@_lQBQS@Wq`)zjJDM;mX>4Q7wrl6j}X!K=EpDFN9< zCLzCZFBzJ#bfGoFyt)64fBe&Lz5UI*zwtDV(4!0QsKFN(8m7i{^W->m#Z;7vW}h@{ zxhh*j&qa!TMKibBLSfchfpH;Zz3|ciF%bqd|lu*6oWgSzE2LJZK9I7w$)TH4iH8 z2Hk;VmGn%uN0?%)jyv|eQee>F6xQ0tHFsg67x>5>x-KV=X(gA)PpP5WOFAESMERU< zLF}yc37vN2jkp1YrMe+wHRzqwX{Te5vH9Jd49aqHSG36tA>jCBQ>tDLwAId_onq$D zqxgtudaqjhAB6o#x5vmO7=pQxp*i3d8AqX;DfDC9yr}iMMo@)2MslK=--cx{6T5Oe ze83IeD{ez+((1E|!AdaMET&71dPNKdt%V`)cUYr)=97goa}dz{$0(|lhu2Z(L8I*m z!Yw{gg2Q%M=y@QTBFq)ZK(3M0<~9W!BO1{shVe?p^T{lgx7$$ECb-R`M`NXTq$E`}Vy~2jFIBwI6+6*t_`0&dW>xDmcA-dUBf2hb-6IF4FDo zXiRlnqLlX22c06GCZvA6bmpNA9dgZx8(btRiJ9;vu6jSfUlc-;N!aN1i5&y^g50`& ztQn0p`<1EZe!(zPc`a`@k*L-LINclwsSWr7=Gzk*(scH|~K9r=- z(eIz4vkqC}OOa(W@}Sr-a;D(fzVHc0AFYSLWhtY=Q8fJlIP%+P#`(i5o zM{k{c=P&*v+JYsZfCyqmtaPGNKzKkf%L7^?$H$ALHC{xA!SYA}pbR(wXacTaW&pt< zpQs(8WE3=hn zqQCpiTDo8Vc~rFhpFjL-JR629ANSI}XoUBr#Y_mZ&D#To+Q2`5>B5!YeQxd1yTa)> zxh72Hl1kv&J)i4E@_5(w>~wBiwzTA~tvW(iMrR^s1IN%r`yf-fA>S6`+%HvSOLpl~ zFIt*Y`Ou+K)}Q9@jt$8#jW4bL{^uNCsrr!z2nPc(+TzIilxbh@r*8k1>yK*qFPdIL zuDOiorVY%;mF;goczSMP*8i0-Up&6hpZ8wETl@V!+dtecOt-0>5Odm>twH7C&JXtA zxoaP>6D$Akzy8(LQR66MwQoJzd*g4vF=sHCrovzU)TQZwhk(`cv6*t$Ice@=Tc`ct z_=g65_rci>-r}Be6p#9ImyHf@=>G0IA66=#4XoN-t_VimU=POYudeilg-sk@Hkfv# zdSp8#KnH*Y)qc~;*c1RpnKN_1J*ZGwi!w`704V()HZ}qTfT27P zQ?xk%*bYef0IdJlfB3K7%>HDvSMyE(vV39kADJgAhWv%PCzt%jQ@k zb3LEW!;>gnPxK$)bga@xf{kjx96a5mjx9n+d_rzrkT2)e&-kOQ{mvG5-%DI0mki-F zv1B0sD{rz;CqqKIW_7fB!IRn>HJffGT>~*;>o0rdN_=K9*s+e77NzyG%_3%-7ys-5 z|1yHtCO>WJ{)nO)`JYG4(|k$p*VR^L$M@WG5ua5VKv^y~JCyqdtGjhY4b{w5eE3QB zRpIjQee3_};$<7{ANOL2a}yuE^2N`r`Tf;*AIr2CK6~e*w?7t~xt{T5Elj@l!T3k7 z8ZLgo>o2rLc51@9@G=_e8apF)#NLTlSiD@`$l zGBp1B3%t_$U~_x2Y3yK7qEf+6)n3qWkyr4GA(XD=cg)cFp2l@f6)SU37qmjFoQYv< z=-Ih(*ilck5vkj{f85d#i_-}d%espZ{KzDHUSwLBo8|UE*2SgZri{nOb(7hK<4;nH z11((CKui-ru`B>(!1Plo+(>sVh5< zi5Fx3z4&NE@)S8@F@^1`i2zQFGo*j^E6ZOX*8&Is>D|_oJJQTqkErYvyBGtghGFn}aK^QHLs9O!QOS8q8Unc!zGDjR%o=zdx_s zht6}#8|{53KxwPdj2VS0%uYwP$r4C~%##wr$3ZCKx{tMpwifPU1)FH#mU~%Yg{s~j zHmA)#!w!_zn&_Gpt((Nbp^l8Mj7?95T*q5Hr<$rIn{4Mn1m=#$RZ=!Wqj89p9$PeD zkwyf)M>~B0la0+9rBKa9lV9{K%xlS1i`Xe8b%}2hoXgh}T|q;#5HJQ{=`2IowVJt0 z4}G$S=svP^@un-LHjC+?d9^8zhLx7#e08cz6bF8|&_vf^htJ+D^vG^U$VHHO(Rh-R zuyu>410Y${3(6U5jan+b29J?uL^ZhGdZo@o9n|!?Yxu#TZiFj(ADL#@63TDl!)Z~irXkWwP^!cXEl+G_7uLrWzzR^d zO$k$-i8h?Yq)tDqb}<80@L@rPAg%yI?dN-e^})j*jtLYR zSn2^~`QY;%W?V7u!a}XL-`IoHQs>kt6HFX|0}k34B1b}Is89}aCe~pJ9d+U>?ed%5 z#;}@;O+d^`pIVmq(%1v6@+t@q<*6JaYld>9=FC!+X|JBhGLV@xa2!57;RX;o>JHr( zIsky(62?**YwqeJ0m;m@0tpY0r4}SJ26dCvO{e0lv66I6xQV_1MxzR88`%wy-Sm;~iiOw`H%3|ui{STyoB6;!ww zUTRAg89-zR{k?6w zma+%;{ufl` zUH;Pi6tpEWOuSDd$a@n!wm<4z>KkIv`JZ%7PFfqMg^6P;?^<4(N<=f0 zkNE|0!e~(ULSbEPUCik3|Jk>p-};xY_7$a?c#GbcH?Q5<5hfcjp8#;5TSo-nv zHG4dHcjI*bezPNvwZ+))ek$-wT$v3K>V^u=BLL*AYOrP0%!p-7BQyYZkOq82vCRF- z3v*w13E%)UlK_}O2?MUFeh4E+K;Xt^K*|n~puilDf=I5~4H}BZj!SWakq2}E;kDVQ zP8pbS0N6d3x&-3%GR#fN@(izyZnh#oxYt zy7N&Dcy+X3=|?xV|L^hU?CR9P1%CRC!O!QNH~42QYduAI{-*lTcD5b2$-ate9*2Zo zqk%qa=#Aam=GlvBlCHkm)al6CgQ|}6^}=s`$fI6d8krLLoJ`CRLlJ9)*4Wf&pgIZ#>(1rgP17^}hTwR&tu;j<`MpS^CD~ z8acEPgJTR#*yL5)Ja08_#0@nIsOY#z!;R_NtLRcmZBfe1{%m)gGZ3xq(P>w(VDgxA zi`{pIPb~UNqff-}Zll6*<^CS=4%Om}KGH%R?cN9yKO1vnCeSBLXS@CK?!(@Dn?;lB z%H`n7^%qBCJu@7eeBrFIF`u^*TZhM{X(hiWL$oPqP|kKwQUP8rr!dD3fv!$^yat!# z=>*2%ze&G2IN}p-_$baHF^KKj-*cI+LoS2ANoi?LInnx2VK|g4e z%h+g+pCqP^d}XwDbo{aK;cqX$Fm>bW?@7>uy4+3dMQl9p(y#-Dp5UtGsi7eO4QAHP zW0~qX8Rhe1d-t%}*l-Df zrGQ;N=qh8h>5)f2V!UWIXFi&j;6h;g!f-6dL}UGm(Um=}0h1e>;VnbTJQ$&akg?EE zBr4+9WRQEOc-av-Y=`&H=fo|)(_(J)n;!eZ&K|$+pK6eYeiJF=WFtFlO*c)I6O+~O z8AEz|V33c}3syJ}%+PVZGd~N^-6-U24{*RrIjZm2Q<2ZK-DgnV-nI+jy;#GeoA@Ta z-c2Vq=V0>=(RV7YQDb<=ycVE)`EEVvGiC3YB&rH{2!bLu)8_8$L%my zI~eAXgiFS_bQuHIR6%xFPECRJTyt#DjO4YhmbO}2{qoapquI2ttw*)EI&dZ^S4BFS zjJtJCX;Tpc45<|wMY=YrU1fosWYnO`+U9ySzFs8}bB$yGBaudrr%ee1&%pHHq&*k; zq@Ays)wwoZHz-&TzO zcrg8@x8gt`Mup}hpNTmheQV#%Hjbag+?(RSK2fJjUogEgjNg4Y^qN2|U_}oeIio6v z(J)!?ji*n_rB45?wit8ee_oSA#E;9#1>?>12R#dt)*tyW$qX~Px8yrK*PZQCa_eZ! zxgX8aXc6x#n+D!GFqv0X`(sMBcQP;|*D=48X?v$j1ZMp}ZHMLYe$Jq2^V63OU2USf z;F2D~d=CsY4BW>jB4VQHvZ#V6;1}G;G|s9U7m&=<}9nLG0wsv*)fs9c{#zEg|#TB}_#6ed+0m z{odR7gnEeRk>ittFpVBhyug9ONSCRwERv}grL%MHh{;dIg`xD}Xvi9awdvTzp)i~d zV+FA|BoyLIB~&Re=;9ruRcj3mEGDam?>sFe&)fZ$Y9)b1OwB$*}XkDg% z#d`D1nGm0p|01q`6@E9rj2wQfdg2G6-zwzs>)U_z6s~|MfCeQl5hpI7J&DAGMgGM7 zHT%6CnQ2s$9~2+a*PdId8$F2@G-bIJ@m-?uPq>JmfuBN9xFoncUnNM6Tr78P0 zTKiLXZ2QD;#SL%oS!Voaf4+U??`}GpF#Hqs$>%!P@$}}K?ZR?7>Yjw#%#a0U?)?po zKJQiy52*Lo$cfqT==`T&Tn+rnyZR3x*^s3BGVb`>e0<*MX2E_XF;2PE?k%Tfypv9O zgSrwq_yzLceBrg<)QVu98dj&;;1D}M-vVO-c&r+jY~fqB&Z2B)82~_}z;*ovZeRo> z09erV#wa<32Ymowd_{93h}qF^^}1H0eU2qZ@DLCzHUtO<(JCgy%PbdcF=?~x9(qOq zn3`b8W&-&EAfmw8DTASXTf;soa`-QW1-^TZ< zQeB}YFOMB^VE<`r*kF*#5V<;A^jQ~2)w8jH_6x(Ianyg+weaKjYB-%T&o%-Lzwvqu z1Hn_iUzv^Eq%rY@Ry@H{9{=XxoCj_`?py8MCK9L@yE0r;_l=H0dsq3M{rH9UU=bqz z=4an{<~9_Ut9e`9{vB?Vf?h?%TkhT~2~8Z_KXtYmZSV6zEGXx#r`FLtwP85W($m}u z>w=xv4V~TK1WY^>(#BgqEa7vR&C@}+thF3`5Gk1j;#l=#m%cO4KOILoB5 z`|hizY+w?p9y;vq^2c{_&k>xzD)~XxHX!z}qSpWTgEu-4w!>sd0^SQ(Cb7%cYK=Tn zJ^@qm&pvvLKxlq_yaV+x4r8`w7;~eJdQDA?+M#xOQhs)3yYrY z;bv@nE^MnN3=2dj$#8AxK`zaHk-A&Fy5=hPgsBd=&gaOh^l(y_K@x4F4%MUi(oJlJ`}oxeu^s8#lj*89D!BDy=X z7dsRSJv?Hm+8%b>uPm2_yCE3=c>!b9M!yX)S&!w)6La5jj0wDrWIt|N#;PKecRH<7 z+7H5D4vlo_yoDS zw?DRM=Zus2(=%&0taS3m<#B3Lr~q+Fcb!T-$|IYfjJ2h~0HJ7(7pvzij(BM!@1~5t zWeQ7M6|PUUr5VRYUbwOo0=4w9fAJ<_>?c}#?s<|fvZeB#bw!U1<+wDWJ+!G`w{~o)9nSEsoZV|4wK4Z?UwND(0+t;jfG|%p3y!?Sin3$_)*JVoKvIK#tE2tp|uqs z?shoHnVJ-8>$9u5vU|UDG@oYZV|UZr(n8VLEK zhW`SWKgOO3UJjeDxUO3h8c_E;!!hDlPevujr#|fkT`?yVy_fA>&DZPIJqFohE*7C= ztIa3V-~PW^dF@a0vecW5&HjzaF`qoD&&(@&3S@#gy0+&2Nt<^J=7v)ToPc#h){8zR zdt{EIWvl=EIbi$i@$XrT77aIY-l9cw<>+v6;}pg|iYC5PzOeoJPo*txQ{oL`qTa-1 zFa)EJ$=fAs))+h}Xb5N0AuCYI5^e%{s|LVeqXvjm1CC8l7QV%tLKva~%-sYPj;oDO zno5YMMyz)ex1ckFi?tbJ?BulXIotBkMI7f>N|7Q0m5dq-?c0`Lfkfu%Z7kLuf3tjF zW>L$?IJOTPb!0s9081@`V60BwRYMI3BbPY8RlJoFn-pNtgdj zWosWioL=>JeEjM>@F&OP*>~T1TR9UM9h@y`|Ez@u-J^Qu8Prb!+5|s%)O2Q;hCkk$ zC&hP@vL3!dDUVMyoI;jg{>;SqFY(NL7ju36(z{P!x-_ym$Ep{$G;{Z|tb_RZ=I34<>4akIPkJXpKz|S5|`(n zSyQ>s&9P+==zQWkyLUMc9kgW6#?3_Qc=&?^?8f=GxBMPg)I=${!7u#I{|o;Y7deEr z5$xN8!HO4Hz&rpz0Whs9O#o~jzyKM;0RS=pxE&(90Z`JxB-94L+z=sqR2Tvl20k|e z>LmbdR^2~17|;u#Okn0V&iOK>Aw`f(&Ky{yfz?E)O{&|b+>^it6i~$i9FPFeavo&4 zq5<~+Oc@3MtN;MOpy%#mU^wLe^wCdgwRG_3!{gC*RCoDpNva1Q+KjCh;j}s$b(Bv4QkN+y%)MIdsUj1Kj|V9OJ$dIpyn{AQ!PVDJ%tSXLGzlZUHL|N7+%in#cb{1fE8Y)3XWXQ5Xl+KkR=w5+0t0A!B5EwQ!e$)P}SlLXN`GI^oQDC`lpt1 zVQz44P!m(Vd%eRQQ!&(t><>O%@Pz4$5p80Bv8h!yRXB@|{_xwampub`T~#?=rEhlk z^RzPZUwTnYo;1^k8`=gYcgFyCUN+9VR$`1f{XhYse7@DL-#^Q=&H;SOCby1ECZg{u z$2qHBDy!`w&Caxiwg=kb@4(0_qGgZpw>*%vmo^4q{U_y#@zrpw@f6cs_NWzoEH-8kKNn{L=TaY` z&MD5eT-37S+Nh91dVLMoBTzd}%#FM5xxuc*Z6d&hp3;k9!5Nd`ZVyp->o7dLIPj*9 z`AXCzHtKvyR0jk4i>TWDPe<#Vz>Et}w(z&y-Nk zz)U#&1{j5Ejf&twanS|&UG)B4B4nD>=(ySr3~Aewvd=OUThSqx?-Zxmf!w7&WB%g* zH^CE+cw@`K==skiPuiSC3~}UvQS?!jt>C0tQ6aNlRJ%~pe(pf`fZmU znN?0iK2ku%UMj|jN5^(%USO0y6r6TG5YjL3y`yXqap zyh7~&<4ROAs>T`$M&0oQH|#N>jDUd-A|dOlWxlA$-Ci{XH#7SZSPI-Aj9`*)eV8V; zyy!A+f&;RpoY_`Ka=PBoEGqJr%ks=P3*zov>8P)ChsP=-y_*0&lq^!2QzhUKj>Sgp z<86W@n9zQtuAcC)2rI7b&MjgLT$Xz2kKfa9e1!Tu*`4p+ZoxK}AZHW`nDYRO#s;4# z*?tuBIf?G1^tAhU^2o3g>1z8g8Cij|`9MD%HtX`ZnlM31$)^RGGOvKL9I_J4hM_Mo z69_iwmo#9u*bYw;s;sfD@}yvDRV=y_Yt`aR*BLpT{@j-V0I5CzVeK5?c?Te$fq}b#_q>Zb!P0`sJAaW96TQHcu_=@7q!$_{!8by@CqDyG-RU%>rOrFR@k^sbq;TATxO+%pJ2YV?_@Bs7PYv9{Ij z?&RzrK2bHE>^`?VHK=aoaG}j3(x3FZ*Sk+AUYe}k?V#an+z z)na9$nliAK&zhg(KCw8QpLW9AIkP{mxP(HzBH{~(7<9H@e4QLW;#p>3)pGa=fq=Us%J9WBU3S3TDhm3j63vm z^7#HL;~Ea(&**tHK&Ih})pE``G@?^xSt4m&T=(U;4 zMQMu_Os6HQapR%UrRaGC;;V7bd($Z{Xu*b^jp4zSp4LtwU8=Fkw>r~Mqxhb3cU8T5 z?NtxG%(xG@yr=J_jh@pNE1tJsv~aD~aEwn3$(+!1_(?C*8yP)|$rU6b44~Irv&GnO zKGztmLax@U)(&1F1_r=?G!nVv6fu+d>nGsq>Abi;!%PgOSS~O_Jlf^ojqgEEc5pee4y?h?Uo$OF zqYP$5(0St^Q~1&CBb%cmfJh?BX}e@MR34vZnTe1wKgwGX+iqSyZEt&^sVv`@2aKVg z95U!l3_X2Sv>r97ya^FBWJ(`lcrQ8uF>TfCadnLq@!jp?^jR6V2TUMa*Th6_;Fw;r z2yNYRM5lar_p56ml3+HI-3hbSb*SKdlZj!=Z=keGq!gI zZDf=4=4epN2!aFFBpD%XC~Ad*SFK`c^(K^lqQKUnO`-QH%Ce)&?Z70{*3?m>zUAu8 z_U2s8gQGC*G(%E<&`Znii4=Elr9qN4VSTEfIxYtxuY|&^g27XSR+qh~jR+4cL+VUF z8g2vB{32HN#;xBywGt zu2Ov`<2^%m=oaMf&Z?2)#L$a3EFSYIp)GN0v7k8&W$_6r)9#))T%JmHCRT^ZX{DbU z2O*t;6E$eY+?kRfgOaGjzDQ*k#uea7mPy>Ko??S`F9`x;kcrH8KTZHeK)S!NQ^X_p z3Y#x0#nLr$`n|_AWH$9hWLd&b?wkbt6d@MW;dMCv&EynK$`RZ~%5vDFN`-k9RQf{K zCZWVa(NiDj11}iw4Xn;1(*m=+Q#b~6-ChDlU}y=i8fEW zKBjXkSvA@1v0*=_nZ_-9$Jr02ll5}|%i1fRLmWcfDePdHK37(1&IZ%)mgjN%k^M5L z7CM??UKI05al#I7=?>Avl9i(*Lwl=ScUCpVQ?iN? z59TM?QeK-J8HWT&V#<6wr}hePA-K@9)!aSKW6*o85IKK%0W}v(3eqt{Q`2ar1$N8BI$rm0KINo;VQYao;l#Q8pHi)_yP)K1K`HZw$o)fW?OAw52~O~7U8h3 zp8-U`=PQ(FpYzOA)Z1pmqdRR!)b^qL=;Qx>sGlXFS&KtY#NmP6Ps&w1SblVCYEa$t zJy^4_)$bS_fqC;36fOOuOD~=ahGNwQr@D64L2>KxrC+K*;S20LZ7J(-`!jfss7r^g*S3?$Nn*QpWwz+elk$YZGY88F5Xyw=g9f| zudTd;IsezUe?L@H9~=qP(+)SZaqg#3YMh#2=TmSpke;zDEMje_`|M{gVe`NAn{N0V zA|IalCA%Wj@k^|*&jEc{QAk74&|LaBXs{W&AW-|W?`L{{UYuEL8G)sds${(MnW=me z%$*8x(<7c5P0TE!4_S@crk`7yPF!~MZe{A|k1Kc~AT0DBZlaVOM!5CoR$hPYim z!2ks#7?}AQFgcGD$u3g^0|&|9b5IXDw4lV>KKag574Pih{>tuAy3V8(&BLoGfKmPl-%ktFw zx4P}%?3eKZrl#ud<&xUA=udKc6G_u2Zr(VZWzH)U{tLtA?tsdvJf7i2cMs>F+GhWd zuTb{6K(K}r)8q0MgJJFCo+*lT8Wco{hcxFviHO}*fXdTiyBaXKP8}V?`dsYew@+(O zxqp8oK-4gHyc)Ob7Z=mxO}W{1cW0s(zx>SF(|&7o)M=ItB77L~uNjHmabu2n`tV)6 z+F@8f+UwvdP#`<+4%W_O4yyafj%KktTrQgAM%ntgS~dHXEtnkxOy-&y)_U)cV^ z-t?6ySD8PUc)%xj;36dWIiEIVJ?Gpie5j-%7e?6V7p}ZM(XrRQda$so)2#<@TTtxV zzheLDyYB86E9AG^U26Vp`?eU*{kZHRkpI;j7=o(3H*u}AVE@J1!xHOr`{RwjLR!IL zeZ4-cOj*A$GJaf?-js4?DF|!Ti@tT=FFbd*9UqfG-mRUS-U7p6bU3y0>V$3Tyu&+g zqT~uspb_?i(`4b@`)8G>LkMQtG1($t7cwd3g3h%OT!PGvgg(@|99P0@p~YQ04-94S z6kBtJM^MWaxjLjzUmUsb5N2N#R?BkFkK5^bqAj8~#ohLiqq68cx)eLEiiQ7r5s?EMZww9VLsh38+zWX1&7be2D zs<-pU)vn*97rDSNj~QlCIv%#6YfjKch)#@JE;~g`g*yiVt&APd=YH}~xnLjHhC`$> zYmUmw@qX252TeudCSfUl@iO)8_oY`gb-O-*rwt+H0#eQ95g_B--0Fut&C#v1R@ucb zw-?&__l~J>Kpd%K{KOs`Lq~XC0kdJTAkPZTY|l6y)z17#MD*dAT7^u*9NbPyA{M+l z#`4e!Z(3}hbXX1r z2Mo$M8Vo$G2`wHfb0eCq2!=@me?}YV%Y(0zR=x$*Kc-H{TP02h+>D_n60^y6-e!Y9 zG_%ZK8zqi*O$K+rG4dHD*iGrWC$)$BP30<=P`tsp4qo=iy~DC40FwgcJ|$rXmmQT_ zaA=u6R7p>U493pRic>LLt5}pZYF+E?Xm-z9U5qPY3$vRCjbfBGbVO@AY@F*<`GLDs zGEPsHC02!w4P9rl+jm8RVzD>rm}-`{5qEwJ`w_V?Y6dGxZtkKBDzQnW%IFc)pcqLa znMQR(c>>s~k-bpknN7b8QriI%7{?A{eY*9{Z-4O;fcUer5@yk$s7?^N@oxlEXf7Ph5v)5iKr@reJy~Ci`=wnO|g_45xdDnRB zVB}`0ia|D^YmT+&zrOZQ|I?4PKm1O00-O(BvD;f*_Op;lI(PFr=0ofk<6z5>_M<(n z^3Ibkc5>2|CTmKn?;m_-=H)vdi_+o0t@q*Sy2bwo3r8R9A6kR|#NsF$AyqD!>sPp0 z z8(Kl}JDWKs)+W%okt^{0NjYnb9_Rle(3a9RH+Iq1Ij7ATUJP~5-<&iBG@EM?ka!Q} zjlclFX{Wa!_Qxg<+#LWfEra|31jwP#EwauS5G6oT02xTfI2EaZinl{{%}!1cPbGHR z06?X3thi@A0OY{XAa=Y8qo{1z27&+p0Kll-B`|n6Vk^|{AHTa*BJ!T#RJXP_!xCbN zEE>#p`CgzlLAsg9UaPn+b#zlC#QnUPoiCf6xx8dH9}UXTpX|a6R#o?(L8%|r3T z?Dy0wS}os!`z@N#{Pv5nkYQjJ%dkKv>Mi(+Jy9_OG;jI1n`?Kw?9jSA9IAF#N%IlN zBjIY$7bo?kPP(2pk&CcQ(qjr=kxi#)Yx`06l1uEm0^PLL?DG!xzH|75L=0oma1`-< z28U8Y+iQjTNv!axuRcd%*NSS~*nPkA0b;r^Sstm$#V+#GB9+-MWY;;vC=8TiuEUwb z(jxKfCd%untgHV*?3eA&d$fuvt!E=DWK^dG*Xe%#@aEigBO*S0mn^7)0qon*xx$_vHQ2Y06Y#ZKF8GdkdO z;ctr#b~w_^gf&-?m`SGVPiP6Cbf^4E)JMR+*QbwsqL0*p*)t`+KEuLa`|@I|2Xq7Q zSh~2l{r!WfFPK04&p-eD|9(dr1 z=3!cxhx_o#YDGo5_8@E%X@G9pmefQqz!fD=M-V)(FK@M$*gU z#SCeiwvhp&WTnZzK4e8pJj5%5o~LV@-S6GA)aPbj3mOT%N1~kfuIH}!k>HTrjkOe~ znnq8HK1Vw~iZ=T75KXg+%IQwy98#;Da@k<3+p&ru>u%AW`GKJU^$YY~l>Z9UynD7v zP@bF7jg$OZLuy#BXu@7R=|_EgaAT>1{G?fPD2QHdSxxSoNi(3JDObr3J%d_K=ZyZ3 zj(Z+=&_QU&?FtOlR7)7ApPvHz+)bQ?gZpvLPfiZ(X#CH^^XN)=9q*n#4)q6N`k!}Ad+pk($GZ|Ae)@XIjvtv)) zIMl^sI7CV*xM}mQBueFp^Fz7GG=`l}a?FMmx9aTl{25L2t_Tl~x$A7aDNSIu-eIa_ zn{<-4fk`&V%~U&aZJB-VJ5Na{3QVo4$bllNMnD<{3-Cyk36ubaA-IQcM&O00Ko)s0 zF3SJo*3W+a@BVRqRDyL(vMU#xY(7kkX~sd*8kJus&)wl=gA&%Q+xVkC_4jW4gXI@r z_{^v$Uo&zslh)yp1-+;UZBN7t%Y)9E)oPG_G-bf*MvQQOaT!#LYF2c^KyUuN$s z5!EKEE&DHj1A5oEQQYrq2-W$-4O9JT_c%Go4RpwLj5W@k!Q~q+Qf%bvMC>xs)O>^F)9#}Ol6_Kr(42Yw>f-9e#Qay@8~vI1MiX z4(TWWLom_-Nwok#L5%~>zA(U8nvh0dEC54a0$%FYlkWlGSzNsG%bfe2>A}M309pcy zWl|_RKKbdXZ++_oriz{~hT7lR#n(u_6;vK=4zF8YHLq6-M@gHFZdG))_7Zc+6{0je zddTPE3B$BmtTc_ZZ{JDVhOP7G9bH|lXK&z+chrDI{djHgY{6ESpk_ntjymC<^CJhK zkFxf~02c8{g}4a8%MJd+7DahF7TxDGIv?wmnUT%F;DvjeFzmSHDThKtxO=6N$f}(R z?rWmQD(P}WCgxA$4-v{w4)mL!nBuBXQ93{4N41{N0K12+CYt!>qI~k<)dFvroRzmMWg^(fbnO#MoY1R)#en zKCP*$;qO>N{8-hH4rYWJ)TdWwJTE?fa`dmi_h)-GAm;PkWpJesdeHKp4?R7Tg25m- zPx<>FE8WNBXPGSXcbj* zD5Xh}ZO?ccr#wX%Amsl~l7Bx)k|%_L?>8bcTxLXMM0)Sb`>OVCy2rbno%M!Y77r4V z5JGpHxD(Q$cRU>rI6&gTO0cl(vVL}F$NQdX-(B71ZB}~kU1Vfr`1iy6{2QN9+-mM* z2by2a4F+7~l>1>?yBLA8A-R7*Tfze@>%L{&J8$z#Th{gyW91{58RwkK%g9hTLR2D} z!ZSSgccu5}?Z(?++nHiJps#s#XpG{7#qX`82a^HoP})avYv&15XIBQMNWhk~o-m=y zOs%sol)uV0H@kiwt8t3ouUe4=B-p|T^OM69hdX0I$I^p#eK8(rr@BU*GLLe4U|uim z87MM_eV(#blMj`-nNBY0wrK4Ju-L)S95(j}Z6Hp7=1B}4QPA76oncBt zgc^}yQzDtkIZh9QqjCcY4@bj9cO*D)b3S5`dfN*_|Be;G(>e!`+T~4K*NbW>(jEhP zlo0rABZ0g0Hk^q~gIs@-=p!B$a==TMh9{a?n(6xQMi{eNqBI0ehoWKR7W}}(L;x{uIece? zUJ3i}7K(n^XnE;*7&?M@1OQsihHLG~K-(^+G#io26nx?H)U) zm`bT4=YiZf)2$+6prq-(QNChRqhr54kbKyho%bHZ>){&;s+a93-3|hZ_;4T#mNEKEO z^a|9hoXA|v2a(RQ`O6%Z!gQ$$%^Mq)$dj(`F%!?hGV`Tpp3;v=i`gle5qoOqp|6V{ zm~cOD9WXY&xfP#s60Ea|gv|XtkGn%0aM7ncwd?KpDtLQyB{1E#(3XSVCm`iEhoqHm z)LY)ZKYx*I*ML(V^lgN#VH|;|<&|&W`qsbys|EzDM#KuBwxvB1%%nwJMEdt|Friq} zJy{1RhrnIK1l(q-iVmS77qgAaqG@f!SFN+CdT=}fvoiD_-v0W7@ABHJQE6Q-d@>HV$~+tGwD!Q2 zQ?3q9;d#4d4Ib_GdNZpwODUDvC(X_W%C1zm9(J&RGwY z_zy3A$$qMLUN(efj<4K(xb^PIk8gduGQ(M4dinF-wcm)*=>fcuUC&heyQile4{8iM zk@@lMNBVX?CIS7bvausmI8<+*nDa&%DvsP|V5zRI_%9zn94VW!*}#9E6PHnrBfltl*v*cbL+5-vP;shDhZKIrMQ zG@vWChTp*KgK-I2Ae#GwYJrQmckQ>&w8`O|4YY0G&h~A?j;vaJ8-vE_iu1&F{^BC` z)zEW)x%F4vob&b%2V?5cct`#(fBg%~lfV0we=`&5Ybn=Rj5zM@_rCORfAi13_?0hz z?LQqItQ!$+)N_mw@dDIojFA#EIWtQjCDC$K;2TrZd4zgNt@X|?8pBVv8Yji#%!_9~ zUVjkSMeqxwrxfcL)7tUamGI1{n}aDOxmz_5We}b7SVdp1pMfL@qFV!nM;ce1zLL>u z;HCbZZZ|FE69cias_ET3n$nIx< z@$-PibOFFU0D}Mj)dJ%IAQ*K7+GgNx17L6V%1#sq{iC6fH8Q)e|I;k@^grMK>0$BF zq~&q`_592!|K+8}Kf#BOTOenLIog5t1M7zFwxYUYfRISwcE4#Iy{>yBacaR>yjL53 zAv9`^>jw#GG9Q3fEW>w13yLwTQt-{!|LRADkI1fg)pHp?6?Mo^H-H;&*4r+Ps+kj~ zeH&AQh1_&j>N6{i9))(cF8D+jup|o-@e2CYmSA z?DUFNokYqv1UDKyz;~}cH}|#hr5iuo|J(I9`-iRJnCVZaJV#NdL$QvU;l(q1QIQ^~ zkFiY2;xR_jNxopA9Co8z6V-rw`h*^FcgfM@&>srq5ZCgT`uUGd^BP<1?4UF;GC56f zE9Rfg^a-aZnClzOl^7v#&3(=19bPWLpUhi@w$~A^z!?EVM|!@qW6)j~Bt7clSI*qZ z-*^$R!Gl6kIbFxMxuNa=(enH01K(}=6IMl#muPF6 zsbyuR-)*un9Ueq1XEqgh*_j&cOKrsCCo#9vWKMhv9=QFVeQj9P-?(vt)r#8Qomx-V>o$C$(rPLRz&+L%Uyi~-kAt^3o&wZoOBP@oI`dOa$40MBbqUYcp`PBbe)!b!WyeC*)Yqy=iXym444mIY7mnB#cRu zHOaOu8fh!;jv}!xp)E=I!z=&`p{cy3J=5?UK`mQ!WLkHpKqV0Om(yO|^sZe$H?p^V z+At)vZc-DIRQ*`T**5Ud_&$7Q?ws#@U+8t-L%Q3lx$4fJUe7c z($`iRQl^xYfGOhlw1|wN(~u+DDw+LS)o&%Uu-*)f9S2CuWM_v=U)o(8Dkt7q0>8rz z&j@&@X2T=nRQ1H-f6m-rx9V^K#R9{UBnRaQ8`R)ChWipS-er&bHdH&gl<*%FHp+&_ zN104xThg{`D%xxTa4O&a82{ALXrl+pU0=)+%!{(&+N1{lgd9?bsRq%+YXFZ5 zJ;GzE-e{Utz2S?1yC)Ohp7~F|c=N;iZ$xIk@#$WV52^FO0mkD7(Mh*_=NzN-rr8sf zL>6yJ5^~Td!nhOekig3qiIbAjD*n^km3QC#Z++r2-%1ac5Yq!cG(-3FL%Inm<|5ss z3Y0TO%?xAy*bC-K$YF1KT?@7SOrLVB+QpW_=m`sHlxs~-h*bHJ1LL1nDew^?VrQZp69Ch-2FP0&;~{`^0GvogYf zTcZn_ijixmqoWtKi2eA`I33~hxp;#((?yz{tv5vHf|p~v)Z$aE%Ice9Qp5T`&JZKdwcfsRXc&A4s`V6qpdH@ zKo39aHVTL@VC$|6&rUnOxmOm#Sa*LD>1^{;@#Udi4TwB)pj4+6vo=fsRW8~rtB`$U zdhclOP;q!Gm5q^1IFdyAQ{3~;k=`K2h6k(Vvs9ch;UtYv9%vOb4$W>pDNKc{R>cV% zj`yI@^Z)XN&;03+Kf-p>^w+K;*?&AUbKy*AOhVf(1Ha>caPLi)M-8G{N%6x<4=!y?z!l>a`gvy8Slc0b^@n#e!?VYH?*>g zbx$)YL`{yXb=CyBQNdbtH^%u{V(nf2;YC|qW=EX8sqxdr5x->7Y8%rorf)V;%t{Pa z)Q$3^`=97n=HA~#wE4(srL2?Gi*)07hkD(#*OtjAeB}OJ_k#TqbvaslBn{hP0z^F8T> zTMnf3(C-Js$RGX2ul?pP{?c#$_S40?56EYJt@7bbVWtOnI!#R=2I13JrnT&?@Z6R5 z&?-f@Tb%{Vdh<9^;!}0ZPVi06J`7cnwDPcdb~@m@^Tta7~9i#r;M!gk_*A2G!b{E5LGI!T+Ol%ZP;C z1g&(>UT>~*ljKxo>~MJxwS0ISldv|i;GDXZ-hFTQuKPe%^_717uuWp|^@={Hw(?e6 zpBwmlo`ExqKstmdv2tRlH?-h|mbRC0CHBV~&v}-4k}D)R6ESF@b*}_j*7e-Ep^+VR zygjuTpvP5fV#yaV!*<14HAGvrdpFJ>cDH}=*S_?^`04F``sn-beqSQYcJFG<2`%v{ zddjA?gM=iD;jB5xnTruRFDO!a>&j`qm{tqg1;5)$`3u>9&P1Qs7Rc%KM`#$?FSbmyLcx$U;lh0eZdUL2@i7bQz-=nZ#XZAqG z*IjH7009x;YM?T5P8eC1=xZXIWh_~WBG5*7VCv~yeNd0g zO=wlYDmCj~^LRf`ayHsK-R|%9Ja#VcvYOQVc>h_5`B29q-d^&^H@R@4CWDy^X6R6z zwmVb#Y+zh(SX}`xPzYPwXu!ufZGn12>f*vhtA9kWo42kLM7hSB8_p5f!$I5|mycz_ zSF~2dYF*$Fw?p5cdDtLyVb zYiKb^2ItToFDNm%e`l=6W$z|cU8XOL`O90QdoLyd#yU!H-JcGrpUw8_gZz=tPjm-3cf6hNfRt>pes1 zlwobAuGS|>AEt~~&s-#f+6?j3w0XWfkH^i*u2zg>TRzIF#3u!Hhv_bBNYgy<7AD`< zQ!As$y-GJaA@}jU%doHq6*ilYs}3UD43r!>N{xwAF>dDJrO2HH_!=E3b(zVkPK zol^%6jniF22|)!xu@CSzaL$5k-2psSkWX7o1l}7v07%ZctZG9o*+g%LsDcp|cXyj( z;1B~n2lUOq`F_8w-+J&Pji4VFddY%m@)Gpy#f!5qext8v@P>~Vu^=$sq#bdKm{kg+ zj=~Jg-F7i5c%iv6o=V)F5ANRCz`p+jY(#Ke=Yf}+b;`9YvEZ9d22s`~7yqd2x@KX~ zdbuw3S>dRV)FF*HLRy&BY8kIF;nC%#;=R52VR&+RXA7-Dtj?7eVVGTky($*7p8-%u zu5y(Ka%$$edFf{LQF!=7i+ZEgA(uzN50#F3^Wh_#e<3m=D&t3k4vD$XaIP;}7e;6O zF;`8jb~}Cc!BG6nf(i6~{s(`aQEg-S2IP8t{a$8x^ke2MJ}qHpYBJk89PL_>1cdg( zwv`%Tg`qOMwGG52+MH?mSC&ZY4^ zSK;nS1uhL9yz@JM^A7+5KmBjF+WN-i#3|Y-Wet8tIYXN|H!IuD2(s|r>}%&+%Y%Xa z>Cn>eOIYtv~aHPfskL`rY4nX@&Sq7)idO*{4u9G=6>D2sl4mrSmiYw0WG+bI+cg z{Gf95Vaaj&-lpyuiVLgu_dDHSe|y790siS}dhfy)gA@7Hmg%T3z**VlVH>NMSyZ%x zcFjOetoxzOmnJ5p+DxaA?mqW2Q*JTY_d2T~Wqxkvjr*N9cQYY#8L7>u?5QV}*hjTT zNYC}DN$wGc4O``INnae}1|hB_SomU;Hn>T8dWJgaMadG!tmfixJiN8r-5jouSHALM z+}u-^&oYeyJ+O1W?p{5%66sp{-Trx4>lH0&f9{}nAne@UUVrx^yKMm1@IEDpg1^b( z(>+LK(_I2-Cnl%+hcnBcfBYINx6~IF&*Yv8Jq7%5GX$JAAS(k5WdIoF^HeBYhk&~W z43R$)Xh#5;WuTitRs+_fLHTE+%3@8fj$QT#M|-7R6H)n-Z@z)D)8oQXU_5K`sr5=S z3>^H7QpOO^wp|ChaXK5&XE&=Ys8f65mwsWIJvB9>2$H{7%;dARj(cl!TlzLHj(qO6f<+8O6Ba4@~vVq+=^J8r-Pv4Gf6?IccAVqj39X zKY_MNVJbc#N?wB&stOVgv;GX94AW5qr?k5Q_8iQ?(e9{1^v0S^^z9+tc?gm_O-v2H zH7r=;o$y|poeLDl)e-fSfV1#_t(I*nkv(=@YZVx?~M+ExF8w|3WB zGk%)BSz+vu+O`bstS#0j+aS}W0oez39n>0RGp6}6nk(+yVrym|v+Ib;iCSx-neYu}gn&4Iy0I{r07m_&(@+2v(Sw4spdb6&-~n(>El^Ub)fuDAg4)?Z zJl1h0?6Reaj%R33D|WzT9D==zHvRdfx+Alyp*5v72nE}X25<-5>8#?kjPm?=ib)JC zZPspX1u*2DEFUB7N-Q+WW>u|=+9H&U6O40aa?xx1PJV}e6HCn%L8j#o0#=9_T!wFi zi(GzD3>=SxQ(OfWG|Fk)X$gW0vh@ejxD~QS`EEin_(-H$t4R&q%yC|7|7LR{eEP)5 zyRZOT+-QVm!7HC@4@W;sKRqaB1$TUE}%YdkkEN=K{w5X=QD z4m6UMk3A07()10-M54*_Zqs}6t)oc zgL8w64?7k9JmF|INqv6ii&pADbh+jcq5k2+P2^CT9Zft08z)S&+}jYzsfnpJwzbr3 zjVHwHN6^^eN@}V+sGRn>offCt99+3J9@;`$s;f3#Cq{1?43E$!&M+S-lR>?Qk|w6! z>n3UgSd@>Mp1O4UIirTAI*zsw%|KbH!fJ*DK{&$zqANTk_fD_7C3wudh9{ zvY^xlz!3qHIs07<_e6Synrzr0E~j(>u$n=CJWECo@FKu)08v!E4Tt0b?DOOZ3sf~Y zR3AXQgO=%-eCuyMoL`>)WOt)wr6=t_M{U#Rp1*RcpI)yvPhn3UO<^rahu+CR&HeFH zmwS!gqpb}Gs=IAaX!5jh_S)SazUF=B`!x#B8|<{rt2Wgt*FU`!AbC&1IzAe+mcbo} zX&g%Naoq}!tEwB8UNal5M);D+>-GX)oS4%(#q4FdV3L0Gff)rF1|ybx_BG-t^V zOf8*sz$P#RfU<~SU;x1kyg-x%GHK~L0O#qoTC4*Gzy^c?7%+evl#-bi4hcoW%gq?6 zTxLaj{GeI<&Y%6kg!Liw+&QoI#M|%vxL&D6njB9adLI8mbyVq=b}>muVUo%a$yho# z;`k(5e6j4xERstx=Yv#sdNgPBctAB|KRq<}MyhPwvo1crN+8O!o(e`S{J8M?{R7Dj z&ngH^cmQUziPdx@=&v7{#29(O;9?Z9W4iDk&Sh_z*8A^Wd5J*4*QbDUs#JU9L^(e; z?9Z)$*5KRL>{B=D_n*MHBX%^(hgL6`tL9ITy>Ij^qD=<66>kl|Y^WpKQ^g;6BF`Ft z)sd>B3(H9Aok!bWoO>okmHYRA@9_(WoIAYBUV6;2fAjkP{Kfwo{<~9g#Hn{?v-kg& z_W$y){+mzzi$DAg)#P7*tsC|0ly7$UumA=8Ltgz#eDrQlGph~j<-YH)=j!?vu1%2V zydCa%!w}(jZKGZ6bI+*24{Z|mO7ESu)=ZzI+-E0Ay_~Jqt6iKa-%_Q(N4xFG3o}L+ zjy@eg3)ZCf<6z+Y=DqK(5_5A4k4fp~_YeEfy_;cAM1G8=T59cK+pq}KxIa-3=Cw!7 zu>YbQw#MBHPm`7~&1X?pktFAAt@;=WxWwD*KP**=4_ZqX{bcZf5YJcwQ=O6}JlV?m zON+KADj;P&Fj+^LUDc>J;rV(iYG2$dTDDSeH-_v}mp|7&>-8+<+$U34TwZi846OjP z0yJO0d;*6i3ZTx)n1C4&usM|s+wcNEIgkOf4Jsl~bpVWmH}*vfU#mTAr0cgD(6AF| zKH^%Azri(2O%fVrz)0uVgeNt6+r3Lu8W_zCk%O@{!~!4;4YaKr7t8Se+AP8IE$5kN z^;W6YwO+Fg4qbC+@F?-c=cB3ncTaY(5s{7zE)1r|T)0Mz&rG_!PT2U--(IgyL6W;S zkzWOBSGa)B5`)7$J&S8TNZp^B@V$!cTmqmX=&!uK^vrxEbS;aN7Mf#AVKeJ_GDIJB z-@)|fJF)I)GsU(v?)h2i0Awe+@y(d_-p)f>=r}U%3NjZT4$5GFd8xiRI7;0NyE&3| z%S=1icRU&hc-L-Fq^a}X?bfsU(*5vV znFvY6^G~@?H1C6It9O_*Sr<8|ZK?Qk&vqHJ_TSI_h6%U5`JW zC245oX7Q=1h)DQulhAT@?0ItHl- zy}m?X6c>kz4SZSZ99ySwscdk49Cy%V69SvX@v+rDnIB_lUs(8(+s)dY>+sJluOoLz ztHatW)L>v91Hfa1AnPtauycd<3D+5CDb+gz~Ir$LT#!F9wN?gHJAt5A~ni7PdxI4LhFu`g_OE|8c4;{++cmwa^f>O zO_JSik8KW*hanm?XhxIbilYXOA!03T9MvSDHxbiTAm~T60HE6Q!{DB<2QbQNH9@@A zLm=8lJNOoFbGOvSw8$9lnn6gA$l&LHc@h;()&0ze(u3K3Hbau|=YaC9aVJiZNmCSQ zSEF>-E`qkPTIg-`iqsq&^xqt=Ze2lXgAq3ncqFByM*@!k`xwG$WabF z#z99L1R8W8*88O2b+qNC76o6E{zyBXy8GN$F2CrU#HJQN2JOQ6s6lM2PHar}R%GX> zO1`NT{DkK*k9(}sV~leJ+u4>~V)roV^kWUj$;t*ga9Bbi21_Oz?hrTP%aG17mi8?X z8`Xu!#wQ=2KI6}v6yZF*H(@!448CvboIQrB(6OkmQs;)jii5OV)JzVob36m_3q|YM;fP%0A9*rY@#2j%T?>y#5@1+!@vIf|IW7%V`?58 zR{<`85N&PrB?muFN1pb|#ST)iNJ6Ir0P6;b869aFSzx#cw$sn`8zzs#T%=qvEhj>{ zG1zX?&f_D$F;SmGjvsdT+DDzk-q&AzeAFoQpc&t?6L5vht=@2C9Ta-5XP5H#Ugr;_ zTva~N?*7Twere<4+MT=KB)@;+9N3ROT@yynyB{Xyi;b!6vDtgd{ACsGySLUG3AkUj z4x-Lre{V3Nlf@cm9Jq-qiGUBJ71po z>H1sI&tFRq55Dm1l|NYi3>j4jE36;*wu+T;iKVzb^_*{5VyH4wjASRNsrH8(rUnk5 zqH4Hqt+-9h-dW;Q!JnDIP2VR%{_j7kKf{4+mx@f98td&X&GW>}&2nK8XfiZxa%Cn% zny{}tw)*k<^}gi4irdE%_~+Hrm+L!+Z(hIg5Fh-?%9>D`I==sxflHsCi_zictm|{LPDXk= z^OuEJqYP5>*?(c_Uq#xFDz)1`Z|v}s&&T77VX=LfCp*K3!QNoS76POab~aD{ssub&2gO!J%jw^3Tz4rPGQi8w zZr{`qDhXG_e90BEK*?4GG8_GMNxz1#+2Q8LaMhz3gPwK+sOMvLsTOv!$SPm%GaBiJsHvXS6-v*(Z?^z*rH03Lr);u;2io3&t?mw1V@7Qio7t z55SX0>g_nkqum!kVP9A}81FvF{Znf%xw$`@wUFtDyV)K9`CO++UxKuCEAH#TS=elb zO#b<-sj*mSm2evGVvPxpCX$RD9^lH0gMJe3>-A~OraA2CI_{K)ATIAU-T%wE$7%Zy zzqe=h9X{4N^^@dLlg$!3XNo^|_5zfVQc8Q4E_csip6b&0lzXC|qqya{ZEh3%(?&xF z6G5|pIl0z)=fb=m^V;6*x9D2jyo|{YKBgA~xenhMA?L!*+y>-8a%odUl2V-e1(Lio zF6L?I>5@=bF>F$#y4Uy2EVbniZoG1Y+U@O(U=;R^*c48pZH~g*hs;BpmlPd2lpp}daC00sb#6? zQ+#0?o9mB>5nh819crvNq5BHRT=q44dtOw0&lfpO$Z;zi)x z@xo2D8E;xnJcy%agaL9yS7v!|42SUL?AfI4o$y?|HPk^ygdCuihTLAB@>=?R+?+O- z&kvA6!P=X(f_w@y!vn-c)%bh@0hP|8YHwK-Br;bt-+ey!YZGO>v6%h-;XCl@RegH^ zL=512m>!Bkw6QJe;MV!{iCz+>MbQm8%9FDjA|3 zX}0=n325&~^*}_!8WMXN0DCWmFE|N*a}w^>HP2{ zfoiG%20SavJrV#S;2y#aWMIG&tJ)o%k{WT!fH!yatiA%r?+2UnYILuV2dh+kLo>{{ zmXP2k&(9P1`{5DD}bu63@V@{x$rTTVnj%E zoX^RCJxpUR1ul-ksMcV^TyJ=Tum^~rUMjr&9#Qf`dOH^I3MPFlV!o<$Y?}3@KWM$Y zaHA%~Gg4&bq>P-I8c5xCHw@$B;$S-%x(IdQuq2+h2ZCmMKN3^!=6li@(Z5=NFCtjkKP+f7Y!Q? zeDs;{mtDw&Nj$mifXtPC>1RFOr1>eZx6nBC;c?;g7*AccP`u;)oJi4!@KE^_ZRvdZ zbT25PceeN6s=~=u6=TnasXxm!D3bWH5IL~z3WJFsTsn!20QteteQ{>lsyIeLAPg7} z0RX^|uo-Be1=JAk1067+ffVu~|$Stw&KMSE+ci=|ExOVP=_%q0STPNI;*1)mZWcVB{ z+OZ!|pOzf=8`4rbaC6)w9zv1%CCtIUqi#~$RZg4Q2%4w1vfr3KoC(gJdFFiG#6+D> zh24*GZ+&0!o^sD)A0YFN06sv$zW_EAp*Ls#{1{)8u?MZaY zem+oHthl3>hZ9JrCCCPJ?)0<08$VV}C-%W8u&U<@cMaR>us%9iB$nlI@?8@uOH9MS z<>aEnm+`H*`yF$rSL7$xW{|Cu?*32kr_q6RTt9YCo3SZV_2>j_?7StpuSv}`)Ocq< zdv?L2fw}+uve0|jDn6hxYK6uTzHN0l%;6&>M6v#aX z_GB-wvaUi$57^MS0Td*!R>NHiIBy6IApC!GhY$XVVxKJHjSo%^cDqIR<4>o-Z@&1Y zK!|(m+pirT^$IZzGnl4O`$A^W*9%HaIbBN{=m9oTk#F z8O~{OMoRyJTJCOG{%?IVoJrQ)Jm+5(&8?_sdFEyN1ZFnoBhGWV*53y!s}p80MeTP%rL$I<=rcq!sfp!Rejrx?AVYm&%!nA2jno0kjrdFPFd z>zTqszMGHqVZZf4V`0T7`!=|{&PPsp(cIO|6A0SNcaiDF%{pwAEuIkVMW1iDCS8pO zU6bD;rcWmN;)&Q{R~qdC>{iteI!BA%^J%QMR<<3i*!&;EIjtQFA*IGO%)Wk;5>n%b4{8LOZ?W0;1YXPmOj+G}JFFUYrU97$*x?YxB#%y=A-jkeB-3rgZgYA!; zkH7HB?b}$+nOz%M+Vz1hJ|U`qVT=qAy1Aygl0tYAD+<##VlQo@0-Q}>*I-r4gv}n* z>2^0O1QI;~kHU-{Dw%U6#L|kubR)d~b_fm@s$$3MRnVa{Yd%sCu_8rasOK3WHNEen z`kl&@3zSJJ2(|Wg`i&tBGJ83#D|;+PzaLaV%NWN%HQTG=lAXkALnlPzN&&?LwSzio zf@i#zaH=f?+y`RY)IR4>Z`r0?BjgtP!aPsxUeWaW9lW%V6vi}Y0jo&DeQSs8JK7U5 zHJOn-LQ|Y|!`VtZVAxB;nXyq7`u0eW(7XrU`#_DXl&YJoYfguQ47_W9NVHjah?*sH z#Dx%q4`_!(-)FcxjlPI=t#YseouFPP+Jy^4LbO$T9d5$tpVTcxz%@ao#nN1eX&IV( zP;Dy{co0RTBtI%VYBCLouc zaIYy4+xdd;DprN`XYgyS(d!kyv>&FC<_?hNw81`Mx=*>UZBmVi+==@6Xm+DH>F*yN zNBRqlxv(=JJn&fTT6CsU!J8N{<0vY%I;*?-wk=m1v>doRAUML`WA?f*<`yJ$+%wrt zI6h6lHFHlh(GJb#X(UZ?JkK|^uB+|CDm^*aQ_Bz)hs8YVYIS=;WQ>&D*c#D1hRjII znm5|?q63CSr5}tEde&o~u80CwM@*)XEFta@s6_7X6U7CaLUDI(K&u&Tb@2DjuZ=0 zq$B1pf=}&f1=$AwI8C!}<*dQnP|+TODfjfFW6EX{naO=zD!KMn$JhfV?At55U6QZv zXj*B7I6t1w%r~Fx@!0U?kCsPTO)q!%gBq%ZI#_FJ&RA(p&D>BvT6C7(Ukx<_gtZuv zy+43m=jZWmbC5tBlq?cr&pIV!);#nNK+RF3%fP!W)BxWAfCI3VA2)!>Jj(CFc2IKJ zXZr3#06C_@v$0&{|f6U#ZR+bnoGD?v+m-{<|mVZ_*7@Xs~2KUr(RR z)JqROh)OZjy3c(3$9vsc=GGH;j6VJHWNUr5+8y8rQTbwd!9G26*Gp$d>Py)>v+jRo zicN}L9!YdVAgb$LI1DN>p!+~1u#dLiq+Yum}U9PsUy^L0A>)9V}fbc9ROwsVY8lK6RID~T3NA4lYlPOj*4wNxjNE|#2 zoavTSVxS7+NelELuX_ZtP_sbS7F&%IUwhOpYKrX21X|AzmC}Qs*=K&m;_92QPW7!! z_tRzYdhfo~OYLsfE_QBwACgztA>lIm@1aw{&y@$=m1;n>HDA+{sfOqKKiisKz!EM0 zPd+$#6t~5Owet)5sH-W-;~PLAHbU2*tPNg&Xe8@ho@zKzz4+b)3kE|gI}zOfQAx(C zYZHrm^}Nf%6#Pf1tM}bnM2bno_BlGT*B$hzvolZMd5fJUy3)*^4ZS#OzospC#UfSP z>jtjMyFYtwk{NdWFv$PX%qm8p+gV^fDe<#-fA>}8$Sy>nTX$tJ1fOwEVR zfODoE7!gC5fQY&Z+*Yjz5Eirmz=HcT{aFAg{*4afN0L5s!i~|b;a!JqkgKxe&h z{REyqt!$7_dFAWwIm8m!O*TA`;F|Q6?wGb?>XPD>k?RHcy>~w7z{$ys+CW#A=eZ)z zT`5}h$okQ{*5-7FysvwoB5dMbDiPu2s^l_FjGbZ6V#)dXxxQYs_b1zWa?auGbkW5~ z(AM_tyP6xN^QX~I$ZjUk4lwv0e7Ac1++gKLtCce@KXE{t7D>~5uGCs~soJLPvUgzF z9=4uPJ4eE!md5XPycD(al`nn$CqH{Ho!vq?`Sr=!HDrn^;3?x|LQ>F$cT+1i<^ayd zK(Ne{0;fI#`FMI}FL8af5_*6{AwdzlB~6a0)(le-s)n)mq66Fl#NcFT4Nj)S9hd0K zV;B+{SVtV+atJ>XG7F@s%Bf+@tLO-V+q2!0DIOkk zNn+FQX9vCT!eV2$({bsC@rf(;xB5Rib`Pv=ALnk+00^`iEYzwAWjHa&@3?S4DF-Rq zYc_@Bu3o399STAOOxJTQ7Mz#?7=eK)GU+usFuZr1wE}V3Y>$K; zJvw!zs&-;mn(Z5AH14XWjRlbJFPH~1=aWua(30HAdf;5y)S?GaXub)Lot}bHl4ydS z&WobK6p^-Ca8HTCgHsF%pm;;iU04El?}rkMCqO(X!Exj$>v8U=_*%WRFE=>=p4amm zCCi@aNpL274fBXe-ur;FZtIt_-UogVGZklcr9<=o%HcK2+#&7v5l#=dq>t zUw-DdWF#RpZ+CTJJ#|1$^TSP#EwkEE-wu)=vB9UNPeuGF;_Srs6JYpY5T7b?R+A1wtVnU*5p(QX*MnMgU$i=#O2_$22zSQS$0nHCFNAv0nd%~>CTR8 z;t8gO-otVz9cs4^P>UO@)UX(zax}0e(*S)2)U?uU9~^iCU7&)X!W$l~Ws3~Sq3u9oh&s6z=Y>b_gi6URqNd@}Jj~oBx_G`*V|Lis>kuE-M8}2r! zwJlUW>pA0$?|jDb%U8d2luf?(;pXp!&mDUOPyDQ@VOh_;iHOzTc>C3FUistU?=1a4 z|N8g6-~SL=qO7OF#OitFAFp4B#tCP1?gC<~Es?!{whm)#Jem(K3R`cz51mXi6w{pJ z%zaDvOczg|`N!NIMlzo>FSvi{?;e(J^nP;r!v~2O)@OO@XT^1~=2!ht;>AHr@cVn( z7h}W_3L1fHpL=N$l6soKHq!u~1Be0`p|fGGMX>NMz5b`3zu1Iq!cEghPxiioyi6#O z#Q*l>Xa43pcRv2l|HsPn(vNN~E-%_{-+#C~Nly|M$d+(Sv8_ye*n@3ICAsK z?rB?|*i&nLFEpv8_oIk)he17KqQOp+@BTC&Bfx}6yDiG~$@F~7Wn-U-AJrbl-c?Gp zPltAg=I9*SDCSsoHP5!$Qd=H~Zsv<(bR-?ii3#(MjT_yRwlp`I+M5+F;FzG~EDLsp zwxr4yZk$v1Z2(;J10ou*`aM@h@05qn4djPevvpFP?ZkKenQTP@jsy5&?C}f+ z$CQF~W@6$+YHznkf`wC_?ZdYhJWqQ1rhA>AOKoTEj#f1lt<+@O^V3P_SA^h$&AUNs zU3VaThn2nyZcmI8ZC8D2?`q_;>97k+Q7?nzzaQhD>!3H$4>ATj<#2n6IT=d{ABt(Z zVr!S0h#628JI2v5<-j}ZkF==uL1Fku-~6-oCjajq zJO9L2ehn#na=6_YIAs15&UVR4t@ZYuH~ocq5Y-N?L_>IimgwPW*oYeahtXlGlSS^QLVl6iFeP9ApPQZ{GzSaJ7HuTix26>6UunIl*@|V$51-C z)PowSZT)wCdjVITCKj_4HSOSzQodi2wXqoJ4COtvzm zs#KUO_Yh2VFdn~`9#ua% z5b6GsseWsuJxR{m>C=g)+zngv<3E0M!XK?nxw^+P@3T7>$^847#rRb~^LR*iH3&Yy zBE2N&2X{H$MddkYA<<{+5~_{)Wm6pLYz$2l$>!f{pKzr!L+k+#hjNC;+5^E} znp=FK0?951Z-y3_23DR)X0qIxwUH?yFE7JqR(`vp-T#OEkJDLU0XT-5^>L@TPTV94 zPfNY_jIEh**!!loyA2omE|+|xtM`#*9UjdJ^9O*B!+D8vql7W? zkmNqo)pl9GQyhmzy}H5HM>Zyy$5>CZ-d1%}3^x@7YOY_(KNohq&YQyXdf|F`259iq z7;4)Z2xajrYOikY=Y6KT-zsL>WR<*9dD2oNw+y?Qi&lVxM*e19un_mmP5-@z5#_)c?3+o(NV%@ z?{yY>!%YlR?7&%B$U7^38kPGe{aV28FKU8&zT+>uYxKx?Xf{!xzZ|jNDfDU8P<#B; zGvM&hM&bckGJ?5k#beR?gFSr&%b6@O16;>>P+?+Vo zdw4&u*5tW`IVJ%>&q=UHx;TAM>w@IF_da^--7Wu^ezS~vzBHQ{=}~!o3kox4ao^N_ za=g;3_>kVMfm_CuD7v=U_{cVIR21Cwp~kK5@*mm6Y2QUR?K4hOb15@Lb#i{e(`6wj zjlby5Zb6C6L_ac;klyKG@QyC%!_;WXKJ)F5vKw#zZ_N?XAvL25`#C~Tt+EEqjuH=N z3edQL^a+0lEn0)q>cb|^ifT_&XGG@_Agtb854h(W<)Q{b!Be6M`@HGA22m#$vI)~1 z$qcrN|LRFZT6s}>-2K9Exp(PqkE}BsH|a9j7mRUDEP{TQzHhI2)T%d9Vk*<{@Ue(`-ht!d~%?z|7P96gr3tP ziIpoXG|coV+L{5LIiMB0NKomst_0v1AWt0jdXlo_`Mi_5xFPSufAQhMg@qqB&;IdW z{OgYSLOG(pR9q*md3`dUL7!YUD}I`2iG+h)SDqv91)_KM47KgZmS*C)Ufvqy0n^m{YSZi_I+o?OJzF=L5?;!^(Hz%zX} zo1YemaOl$?(;q{$`s~`5U7^CoYtb*XUigi__?f-ZxiEWbOeXK3waHwsXhyg$9Mi&3 zX!X};mxBL34%U0#uLC|a4$npoKCaiw2VS`Lb$tq(_3v$U(8Tv%t$6UC=7)=pVjn_2`^6hK*{fVL~dNTz^B;z#(wVg+~ymoCoABbz&lS0{SDtP*%XD zT=hX|)E-&MXE^r^-#AF6^8JHB(b9YU74I@(t}Wgz6{C{L>@XjKoNt#9L8^1edWh)a z`DZUV%!zMYwe_^q&R{k+5luj?Wa*Cr0bmBB0T}&T;OHx0hycF`3nnl`8A_?jsJBHO z_<$8On+hZMZOU+cJlq4_nG@A)FSTvmV{cb|7@VcCymAt}`S5zdp}WHqw7qAHuU6Xgd+iV7S!`P)rni)4X|y#f z4KqTWPJ;2|8R|LcMzwX(bFD~}82;5=hq`n`;;SRQ9(nQ<(a3q1&kvCOPfFXTpc%$t ztF%A7{K7BA&&@}zgPk%*)<5urYY%MTa%b`HdlmY|BiAz)*gHQ_O1CeB!$#|Ft7|T? z6eXWp@gLX2YVEC48KW!=nXLFWu&r3iu*cOn5-q&Pe{Q_Rv|Q|y6XTCDHGFSf7piZU z4{?#mf7)exj7Lw{VMBY}7f%*)K3%b->u$WL{PsVcI14$e|av z<%d9Ps9K}1g^J;teXBMBV)sD@_H%ANi{u=@1FLN^9Kql|<09ph1r}00mcU)D!-wbm zwwoJrv|o4 zg!2@PDn-a?UF!D}xSo&SL-#>6az@WTP?c-V>~$TqY636TcWg9J7W?APuxFt`8xBtM zDcxn#cif8AJ#sXPts?GMk!>5}w)d(X1sD~>V(qi0738F3TTC_{0n%(ujb=>AlWE3s zBKipuBu72R$ArslE$E`ZM;<>u>>lXhW+Pyq2i+4-h^VDpK(_XZ1Q~O1MqR(J5Eh5r zA25CjH5F-btckW{(B`fMXeoUp@l9k}?fYrl{V2i6!=Y^ouat)%$PAMJ$i1!$8p|xV1+{In-;7{7}bmEiw3>o{%oaDUkV9{jSPDykLwe+bCdya)7 zJDs9?dCUq@JL-edZB^=r6A4ER3_Q1IX(-@!RP_&8@HfbcRm?am_j{PaRJS~J7w!7cxs&O)61~+^c zr?tGeBeQJRgoh5eJ+X3F>Diq9zj^OH$LPg9=pPsCQ(e&#w02eVMWbtWqvZCxCuAZ% zijGo#*`D!lGDEDqZK3D@zcxaKiLA0u)YXQ&fK>06J(gN!)bCHl^^x%fYVnhqnq`O3 z1L_x~3l3&6XFIAFy{FshkGucl!T;sJ`Hf_tDfq97#wKkm6BgT}t(L$VN*&lg4rt3s zEF(CD0Pm|L|CWeY64C-Y`Q~xxx&QD+FhBp~gso#0_fPJ}21h@40pUnn96NSEt3yvx zP^m|@c%cFg!Kb3J?Fl~LY?z1mfrPJM;XKB6*hA~lvI^=*(88DYG*@`KljoCXOeN5t zjC^g|`+r<`!tTpGA@~#&8#@-A?8pr+hEAm+=M%Gqtv~m+hP`r%wLljCB#|cD6z`l* zV+9HEDh`-D#}NJuK!{Mae(GuYs066;7MqjGj(%^CyZgw(OeDwtI+AMK{KDng%|hsS z{BVA*yZYG6hGW4yiz7weCbAMi zG;LzkegE3TW0e4pEB5WKuxt-j7)GP)6H3}=Lbs0Y7e)J|qj~I1d6lV6Pz@8g*P4-M zn9PmgF#hzKX@Ikbrh@YV(&QVeLLO; z$(2{X{rhdKbECft-qZ2Z+rwNA`S0@kK?VL6{s^mD*@~H(YYLp)yg2EX#e&ppFlA@J z@(7t++AkIslkQP;r~!XAS)8Ac9UfB@xqmk&Sp0K1|LtSs0-nbgjFShuxhW+5wv9Zx zJz4={hb$I4RbTWoJ!q?Z>y@9r13hb#@6)okKaGB6DiJEhMxII{a?YHg{g)HVFZ{yb zGJ8f_0Nw!zF%>YP1=V66Q?9Otzs9`}{;J^%`?drqvaE^&6*4DaNK>m@zItG%nUlMi^{Po#$BMqW*h(~ET|NAhrs4fs+`oDIAkX2J z_f5=n?d$Z#ALlz1J99aFN^~&ZnB_L%g)R4@95*o#79q#pt;XG7XMeMsga+{fR+tjY z&ud4DY3Y`j8Kefpyn>}KLeVtblx6veO}2Q-gx5m6<<<1}#N$)9zmT|^Dp$E7VYlTS8;vVQ;V z6Y=@Uw)E|jP$;WdQ6_jGxz`K{U$E*`&qBGqM zAK4#7JIiRRShhMN$30=-pBj)=V(h?97fuybOd7Z8bp6BKYR~GW(GBUyb|YlOdw19# z@^nP$gjt74Js2+IF{C5;EXXRWv>>drvrw^=C@Sl92H2&Jqma(OLr+s_{Do(i!kzkm zyd^(jyS3iwxj07)qwxjHde`!$kzy|X3p$Z`u!RvsJ*j8%&qku1(H4Mw(Y+8H-QxTM>*DKLu!@V_uTFece&2yT6pj2z~oLTQfZj;gu6~yA@2uR-#k0NHVI=<9l ziinbS^_W&G(hf#w^e3bE&Vk1Q?2>{y2MrqZg#JaS zQf8ypi%Pc>5}_Oyb0V2RbRf0*q8Bp{{8)f-Jg>RJvpv?xxyPjCIEEGJtb>yEk zDoFuxSQ0hWcd)j1wL7klD5ZTrfR9UXIz&?>w10fdA7gJ^6`$z{712d#oodS#H&Ip6 zwoKR3MY8NIfSkua+wO1ml-vyBM~A>V6myQgo3S}NnYvyp#%4@8|MVC@jJgXN!O>!x zT(8{*GoVuxx@tQVoM*7(;sJ)3%xH>dU5@NbX3by<{XTCIO@36*5t&Jeu3AIw+O9Kb z9vA0vZ*An^-Ro>04Am9-QM;=+M@BG?hN{{FbJyNJoxziu%Vo%&-Wk}HJ#1N>I&4x+ zH+Gk%7qcg$x`&9m+}$mCq-Ya^@dlGdldt*<_e*E>GAy;;89KGipJ=9M&b{7)3%`@& zi(XDtymoP&pILBpu4&-GF4(&G_G$F#_k~Z85GIr(x6bB&>wa^%{-6j$LnC1hh8N;C zVwXMzv4YI>^ZnyOt2Om=TG7r-`Ui+Xz3-iZ53(}>zOhhFDsi)IWc9L4Wpp^wCnp^{ zAFIQ9z97uf%qOR|mJUM0LT_*G0RNHAt_yE3_w;Z4%2W1?&6t$V|B1MJSP^=F_B%?< zkW1Ut=5`Zi5^=WBG1;)zx^BSxO~=pzlax+D69NAHW2>EZY8%7kU|Xz*Vj|A*r7hu8m^U9+!Sjy3AM;P2am@|v;u z7pvN? zLc8IBBZrj|2oOhNlW2sXB^oikHlcdlfADYrJ^TRH&;2UHoIB?YSXTLtM5AI4vat*g zKnMUq1r}3l+;`M1c?pCeGqCg;V6O_W0@3SD>>hN-ALX^jZ@)7*piz1M-;^!vtsc8J zfB6a1P*(}eHEuaOt?hhU;|63Um;0piL52IR;=K%lk6cCOl~=y`!pNNxv8e!sly`5N zYyZ|=_?X)VyTxYcB46{D!~jva!=s3kT^W)d($wkcy@B z!y)MX$+l;W>lg6NdwZsIVMFJ#{rB8Rmmrr~l`YYGZ(;{`iO62ht|BL{L44UZuNCc0 z^r++HC=H~Ghi4~#b!~0se)os9X8v_+{VE6}*FxOg(wL0x9%Wps%=@EBuX619e*c@( z(CPI&*m5TAcnS$fd-t8DKwEJMrZgXDeVsNKc3C1osgECw9 zzv}lU`cmhob&lOa3{#F0voTI-ha=57WZ z_l`!^0p&SRz--%Np`@xCWYJsL4(-?-)a}+SSMw|<-|t^MM~Lhc->Q0ObL~>cT%o0N zfXze@-D)wIMt~fle5QC|%5#&f+V+PwpE@uN?}S5+t(LyWt9EW%H6M2$xiL;m^hUHl zcr;#l%ym-Wy_*wiqP-Ha?6){CVyz}`7d_z>e5>q}ok>%ZLJl)at8AvVDg0>S!LyA; zfAjR?zpI|?H8ORmV9JkRmpI|&Gz7Pk3qLU*OCy!x^!CHdlrkJ+dPxLfQE7Fts!c2T1_d3c|EVOF+3`3-Q*|->{f5b0=mE(#pkn@eYXlK zz$td~Q=~4!vR1Ic5uuZZOVQa6}FKIuU^Uuodmc!&0>$g`V`lj~~ z%3)2Jq&?b6-?TUwABG#;I+3>VQ(^r$Ye~ozNrx#N?=CdG_j{eFZL&PXCR%85jX1g) z3)vs`437+rs7xZccQg{zJHcqqyT>+v=D#R#<~ix9beoq0H_wJu;i*AvGr# z$7VNMw$FAUh`E!}2W@9ew3td+Y>aD2D7rixZw0)oN*SFQHZtSEDm<#jJ5qBA+RWI( z;}K!zx;0}67-tu54$Cs=Hb1t2*+#JFZ_RocqC8bsi}ph+?oXOOJoUk3**wq9yluaw z2z}uYpyVmCTB||k;H+k0F>$?!C=QfD9Inq$<3A@@OJT6a*ly&6pw|fSh1%3eXi@cH zDl{FR7Uct`uc3+&J9zlGg52}fg{;JLYnHhO0Xea7(SUY3!a9Ogu4q>YbrXj z$;?n=5n&&jT`Uu(iF{=276!+p{|Qw7uJ7B4&3wvU=U#i}nCtv9>|mM0QCxkwTT( z`musKO%;&x&Ny=4nK4wPlNu9!_1)E>S}T0U`BnD%y_<1Id=;p$3fm|U3Z6Of<$X{V zBe8?x&QHEqunn@ki&^V+pW+DJ^fz6{riL1-OVd9H(+x4B;`n^~4_WA5w z%KYZ;$kfXZA$Rs=$Lo6g+Nk*!vE{5R=-!31*-SoYIp>?K_Vt~sEqk8eWkrzI)Xo7r z43I|Zy{djG8k}(h)Rfr`xn~HP{MP)}>Dupit>Psb74N843op0d3|x)L@U^AC6by{p!~_Py=W} z(Du6ZAcVpJ?oYq@Pg<3jFf#Oh#;DZ$PB+(Vu?~dMdl3mF{kB?34u#AR2u=*l8IAVv zbk0rGr`lht+I>`g-3)(?uB+n7Ns!$C2(b41qq(Qo%5)+y+}muN zZ%!J98#KTP-GhbGAHFJxqk7|7YyO#QfA|Gl-Bjke{>gANdVE*9Rs67(*k{YP zXoorZ^Rct|ehOK%4)rDT5W4PoDX>$Rz4 z+u&T(1S6#5ba3<*n?J|7ZrvQtPuX*Om7huy*CE}dQl{2Q2UbVBpFjhwary&eJP}%N z;4g5kObdBD7QNj{hn6SWOIj2Hx&Hw*P~YN}A)-b)XA&P6xl;p=)rR54YAHs```$|c zxhuCr@h(N4+i7HxG8E_)uAbRB}An~CHl;>D-HhUGUvGW7CO6@ z;&>$UUw?Qk!S%C4Zavs*E1sBwMmTCj>67-W@UWgU-x^z9ax5e~$Jd+3s@??qO2E|C-G^}dl30Zu9uMAxiwa@SFF7xdqNPqthhMusfaDWO zZ1pV&_Q-DsSx0!D)ECdpm9F~hkEFF29&?Fh)cnb9R=S{_Jxn%2%TO`>fqf;Seo_gn z5KVE(-``cthI(%*iTr-n358uzuKz(}v$%r>rzCaD zvKn}_KXO~+ox+}xqzF(MMEIc%Xx+F6Di-X6?hw)=mbopXF)_-+Ju@2|&*15ny|c$e zT}MD?GrO@4-_27{hb7SwB2HqPO`YDTkXZKv- zGeyC(HXk|s7}CAhP6_72aXOtYq0MKv%gARgEUfx|>C`28#U4p}o~P${{``;s`Fd^V zcc1k!p_U9BIKUf@5IhRHK`PowD!}G<7YlZ^W}<>!Y2b3IYI#HfQ$P<7iV^^#T&nB? zDItVYQ$zNM?vk{bU`>J{H`Oe_IZ%TPBRkx-e9&MYZQs~?=lABxW#i&xC~6Njn0#+o zS`4mTb0ptt-`mT-;1@k-qv)!AyC>nl|BqST-c;`Y_P_dz{-E~Gy$>h$L;LkAJ>{?l z?HTCaFT>L()@TI!IJ4`5x-JD`$Ve-RFrjX#Rsw2iW8^60dS{h-Z!riJ2zA(NUns?N z4r@b@!=Ys~%V;xao3X&q<0bnejcfT^qwbJaV#d7+G19WT_pwA-h9FWrCZ?=6e?RD8uS(zE^xMg(uI z4{p4FvpOMqFw^=BGdEPw0w$UfGy^$IF59o#{XEp)PV=EF%cmyqWq+pQk*jy=YQU{! zu~hqJ%FHe})S*po$_pj>me+qq{r0VDM@v6%`aE(jc*Xnkd#}YlKH5B@1Z+4nXSVd8 zdM})&vQ@&&l!@{9(M}jxi@e7a?cpoZ*Z$qLy$|JD8Jv9Q0}l)97N@f<qKvww#!@Z5oU#!+QG~ z2pEm3aNa(?>EJK26GKM@Bq(RAm-(0@Qqm!Sjf*c${oc~7a5vhLoP2xRJkkm_*D^9K zuUSI|b}Gy`m!_&F+N%I%UP2|D$FsB%0RY<3!6I@GfO8!HP`Q@GJTsu*1mpk|a_R&1 zUUjRD1Guku^68{J5(bq6qnfuU$$yqzv2Fu6SP`68FIy+JQ%QXx;AJuXFCXqv=+tU- z;7ClM$jK|);*y9fvE6wK}6Kd?qmdpKX#IO{%T3OXr>EFM%U&mxN{a7#Xv(a}7^%q)= z>;l>QLCyTQ$rz-WWBlZCJ9-l*PAB_6jUrBOoH9dGnbIZ&UxWffG}B30ggB1?0RR9= zL_t&qr%I`&cZsyg65a00mcqy7HJc-?cK!D2)SFAw6bK(b|TjKHZ0gfd-HH>d@Vg=&remmxch9n{}44z_K=ok$Uw%_o zQl>hViW4*r)8Pcj3>oK08=bPOxIME7`uZzJk`CH3f*~LQ)zEYDIO9`$6%og~48uj5 z9wojZjbosk=}o~sp4KXCFd*%eYG?gKu{x%7XVNM?751$+2fV*xS2&^24RlFUC?V3B z{y_67qK%qE_B) z_y?29OtqUorVKl#WqNi;eq@{*GJD=GEV&S*WrYUfkd5nVM@EmZ>JkMyF=~`H5UUU? z0FjDpizZO(MFTAdzc`9TXHKyJxb@aaD!F&;SKXbC=;Q_k6|(~i>UK+7H#`nv!H2EX zWM(lx|8cx?0gL6f(v)imH|$3%IUp|u_75Bh`|eO)@?*o}T5=j#+g>oPdWLi_FoCBw z3Uj8}wqB}wj6~(q0J;yRlu9Os>)U>(ec&{>qSUmE7}angv4?)dwySZXFrIbv4?60T z9{GbJWj~eZ912ukS_5!u4bpq*cG_z7x*hETDqr5lON-cX-Lg8;s(g41j@r0=*4va# zGjW%rV?l-)i>LKq)Tg^(&kt+}WfqtHxxFGP=r~zBMgfk%PfkB(_>$1>0vYbN-bLHE zq_~bPWG@HkQ%>7@Zf^CtV_%BtzzAGwiay`CPO)_HPMKbHpn_@}V@JzlY)-VFx z?3pXZ#pkZrMg=yoAg4dURHM|iPWWmQE0Nbf)>^;) z3kkCZ0J16I>%I4OYsJgT-KxGF~yWTqto9kGh2H?&lPw&09S z)T{0=E#*yh5VC>;7DqJHjz;ahlbzlv8>AWrQd0{a(-8p2f3lTQic3TuS;{n_vz;GT zNJrnXbV(BLRoKerchzYHp|XMKFogW+#bMYU!L&nLU4F@e5om&1g4XP`*%LL znG9M)f%HO9$Yq5enUCg41|| zZo7l8#Fh8?h= zzd}W`j#huqi~HcdeetWI{0I4mx4-8^k&L4Av6!#es4LERkk}YWQ|+WqdSjwj_Gmm! zE^CN`zH)naL()krq+NLVOG__d*Djx}H4GgKyW@cB0>lrpK!(Q{%8bT2v)7g{Fm_7D z<-SU?<0e|oq;HDU6IK(Ae)lHjZv9#B(d@8S{KOjZsyvt4-?69D8@YxZkq^Wv`|M$q zkKHep;wD$lX;vJ=`MvVMA&bs%KVPM>kgwO=HDnfNY|<#h@v{zRQE5`sNZ59o88nwp zh4N{;3P&(!esmnN6KO)POGBoR)An0iKou;!(ty+fh@uHu5w)+&mBsS=E7mA(j;V9jqc_*D ze)@}*la1Z$?=7AA+|ut~KKg(Ebcd_YE?#j3x%Ic+X-dhAdMXiUlD7u^voU{@>`5ns zzRy(eI^u18DJG;sRN=s7Qg&q|u|)V*&h=3$T(8j743|LY$?2`X`=9rTd0VpwhP&jz z+G2vy_;Dle6$d%N>c@i?&#~$`eMmPGdCx#dx~sjkQsyl=O}2@W86Qho#qNX^&qc<7 zlo@u3yf_)^2};hj60x>GbC+CnWFN8!Y$DWKH;>vB9W+GfRpR`67;K}}dE9cNC^k*%$17bd)>?Xa=p54&p;Y8&$< zpUbU{y?y*%<~^SGL?i+|J8Inx*sj!y>zY^0bjVW%T+e1gGkDqM>ac>RSkv!ZUiX0M zujv1B`pFo>4Q^EbR`}7;LmJzuX=jnnZn4_yP0{vak)AemHKg|%`326kt~cPCU$>Ep zxf3g4xSic1JMockX4rm%?s5apNA;R*ULUy}r{|Bv?KMfs8|GGuw4j_882JSuO%&Z! zKxj*D(x#Xi!<0*PG$eaBoCA>ai2+rQ$fmmpYD0UT>fivj0v(!()r5*_07hs_NQ6{I zk_#TGO*%n&DQquOY~P90G=+-`aY)0iQafreYlV@7en=9B)&gU!^? z)`dQ^b9!hgJG+CP40RKzm?}8dBBPBQJ^{$2SEE3aFfBUyPM@__lolsnd9TOI}l&%HoC+Dty&9vkC z}WoXvj#cogV z=9J1IZ75FnYP{cUEhyd*(*`3Ku2~*8zxd?x)MiM#zUi1)~u0kIal&q zqdRsXlJRxD71c;gfl3>;j|PwI(&DTa9tHn2c^#}>mZaR2D8 zwm8*j!f7FoikNqO=(Zd36K6LE!x=cIz4z3o|6h;Cw`VS%ru<X>v$ z(7=#TfaMgxpy21<+b$PBK!oU&i{GOjYF)Q;6!qB62aWZ_{AG`6E4^oBTT;tP_7C~+ z)AMj?xXLZ)6HR9CIKTFI8?&6EJ3Cq2&5!A;-kCZFtU%X6AyJONc2HBs0n-SKk^Ixl$6h2GzY{_BfeaP!W?YoATV`kdw;&iQ;xaX0`oMrtmNLTmBn5sy|;BTa!b-V%o_Zq#nEfRU83PGR2qQB zf<#8ge|PcmT92B#Y*}^t#iSa0dJ-feU@XZ%J8Th407j2CG z&;>G{#hOuIQvu)q*>~Uh%y0ifzt{NwdmsMRzkOLTnb58X%uJmsx;MPlRg(ckp8@)P z(yHme{!j*s*oX%mCm4d_RGxL!h3d;gR|o(<=sTSLI0Q&xdS73!omlRkxwQ89-mUM9IDBl`9UY)24<57}bNFC4r6sJk zJ!ru=eXJa7zOV%Vxt#})8=Y*&c95!D4b{aX@6?$meHGj_xbw_efu9eGs|?=DY<5txH=xa#|coK*HYUzfZHzA!!+111-ZQ3wb@Ln;l#^r0XM_H z{?Zda+epVdX7h^#|D))R{|L5NN=EVY7+j<;UH$4WZ#nM5uJFCT&Aqkv-GBAWuRLiP z{A{@QzSZKbb`J>3ckn&3?}dcfvc>>tJcsl=1pn%Yy;K&C;}(mqQLwawVi$d}8@o48 zbB=j8Q7Rc40Ur941G zfIq9{Rw{*ZcP>0p$`{?uJ~Dwd*?*wY+N&301rbJUj-hxIlX-pfP3z< z;LV2tZ^r~zba?2}=J%+v4JL8C1vl}0RCktlrR-{@qKs$GO#a@B{|gryb-#sv>u-PZ zuKUJX%a#$hpwa2R>(Qk1v%8(^(}i}C1U3fnE{^C#1bC#nG6M+;)S(u?Li;mPr(uUU zH*XsRm15l&Br{cf;8nTw7)Y_^nQrW)kfb~w73Jz$aSoam5p9&)w7J-RuUAaWe!4aK zz}2@OCq^3W^lr(OuoT&o5nIE|UvN2x+ZNS2%B8ByGcKuH6l!>meV$^DdqUgOj_|}T zX{?30jWicGz(76hGxOH|Ue4}!H{9Y}O?sd!Pkd&T%1?aw(JN+k6;Pou^pD2RjcZ>kiC9O%a}qFopuMSn?rMY zJtY8~L(N&!$kAiQa>oR`JyV*E`^_2vYDF32K!ci`7!tiCWCU^J11LszRg@kMG}B{w zy%uQjW-DE;4i|#WQ94qf1mBoFO_xr(P#0xkR5=O`f_oejsbmEcj|cvL1mOjsG<#wY z`2NeKtJ>YWkNY{F>;10VxM!lo(QIJZJO~TB1U4*zK!8UeyotT-L(-&j!76txSP(OP$!!*4Iqj02ZDhS~B<1WpMhD#Z|L6OP$|F4Yb}zd8HQ z-jeam&z?Q9Rz+Ni!(DM`k3JLjepT<2sCwOJHz=25V^CqIbh#y^Sv@JjyoD;Y19;C(hQo< zi{eBU<{mqgdB1;8?#6zoPru+SAjGQj2wWc z0TUc3(;l*1-h4RN3N7&`K(O|arv`5={k@+9h6LnC*4!6NVE)p-`O3Hc)%O92fKbC5 z1!@sET|gYfXV_|P0I&d{@nE$MYe+LW1j=hbcsC`7Yi!pL0?F}FrA%clLo(q81CK#p zYyaiP1LwryWFIGg^5@^#sx)VAndp1O=<0lT_oHj+3BuP0HG;sxy~8J-t1+veM1}*D zIPVN`Q z%5w52ubGdY0BNt5KlTO|>t_Zkfh+ z$Y~Y&U)Yvchky6n?Bnje{e5pogszC4{kAQ=7Wvgr|6^Y(xcB7U`UN@u{X4{SuRnaW z{?g@fYX0(svgDifjLJ3t!c>?tBp{3)k)g}hNOO2`W3`XYQ9xio=mdz9K;eK7?ZstWX zxO|hwLW{4sy2`-f=wONmzzZ5JArfUx0IEYS<#N{*J3tmOkSPM0?(dJj`!D|a*I|0; zo&Wp4J#(Lw-@W^T%2yA+DgFa^@m(tJ*m3BD?OwvqP2b9%$DdD696slm$qk0KZBKX= z0K%r(kdcT0gos5v;p8il00MDPsTTVG*B|`p&fos{+RuM^>cv~-`}cmBI}krK+u+0v z#+e!3GrXLd#XeS*1(h0b-YtCGA zXajQGEp6@lmTyHuqd+Ci`G||SzQ&p=$Up7x8GmJZzvxPySa;~njO$m!RauS{DJ*v6 zWG~oG)UmG|U8)T3(xh{c)aF^VH}r@X0DjwB3;)QiUZp(d!~(LuhIIx{51#hY&rSB) zTjz%>)uuH$9(U9+=2Kd?ZpG2L^2Nr4^$DaYp_Fj>CnvM{p0^R%g$D!G$068XlZPg8nGHTN6=7Zu$h<9K)EwkVFNT zxRlZcP_PEynahs{Vtl?ohLnl}FFvn~epvrR^7pU*=4>(W@Bi%2%8dq2$~(hi6m!?@ zi{94TeZSkUa{WvK>cZoU36E5`#PUd%lw^!>K28 z)#d6dr~CI91cf7%!#(N)x-URktlUs95DY)YaC%QV&>ewF%L!g=F;AmjLoijv9kEKS zg%PxI$?x1jbzE^-J&N)lr<3+@NA0=BKqxPrU5L4>{-);)^!nKd-mW-CNk@R!^|Md} z42HlM0oN!hS#{?EIN0?#<1YE~o3A|k>0O2&NQq=gP=Gcyq1^B3QEUz)MZw$acWl>a zg&OT7(3w-$6VuXzy+A*%A!y5q0Jt0(I2#l^F-)EY3S(#8Hh@COR4y&KTYX-fWBc1N zax8Dn}=lr+zV zU4)#J26SYy@3oc;?*6(>p^k-KjVu zo*lmysp*ZHHa+G&I!Ut`Z@B!l8%kb@6qlBNXGj0qU^TeeJ@B9=TE_b>htItd5ZY>d zYj0;R>^k6O5@c^F@-ge8|B$}I>T&MQ7i zaG(C}bFcl;|MSOyZ1CWMtKzrF zBp~3eVY|4&Asc_NPop2L9*n?#?gOG{&iiH$rk&oHCV+JeS&~LvEIJW8l|mQiZl}YO z)*baI*VnHfj8FXbZ_aB@jNn0C4`25sZnn`H#(&m!6(!{amzINi$5!jm$v8MC88LvC zonmsw(Hn5q7yi+IAOG>chIt@@^E6Zn&%qrH>`NO)*c7lJ2o#fk42sq2)DE zC!jbu9ZMDVB?hRmNqML4r7g*jh6(5^|N46#cSu=*wxkHM_Gc~{&y=s#-Ky&J!JfA{ zG!@cf)1HnonAZ++eg+S%_$R?cXUQ8#4^Kd-1B{OHO{{S)B9XB>;rWpmPKUlc4Ve3t!v1`$qax3Oyw7X)hS<^*T;pt!wrx zxrE1{%hQM9?O-4>a#>TZ0ac&31Ws+hsPw}ne_TkmbURpTU z8RCesVLJNMew;ctIDlu_!3p)Oy3r!E;%4Sc&u8#T0q#f(%k(bF-_7AQ@!R`Pzw}RD zDm$tv>tiS1_@+PmTHPoAqAOCwR6wG`JBM8SNbkJKevZVq&wl#uohi@d7t&Bg1Ae0V zQEwEfz`ljlc9H(_R~8)ixBmlYe+>0)6s#{M$9(eXwg#gSXBu26k4~czZt>@T?h4^s zRg*mRuKFj^#P8l%eX^P#7@wN;;{p3{EBxuD%amML^W)Ef#-64icr*>N6-l0?v6g~5 zNkExLK*Ir^l@S1OGU=v(4FJ>vW(f2x5GYz8F|@H(v*eGYNN~)6DFDEz-!i-&2H@ZX z^xcvtNVk-c4`HY#u!6@hF$KB+xMeVcfg5jt1_`1Tu&raLI0d2r^Z{4^YTsD>TQup} zS^bk2|J&(o{cHdGpO}8PHylo_g{kT8aGV~e4NwouF+lZ903!gl&m=tzaLNL}1PaAd zsuP5O-Z{d$KwV_LO`YHjtT2*L|n&sOKyvD`FuDcwA+*JpzC{;_y% z_&hY{?7;NJsa<_-d@nnQR>{{FA`g=;p;;4bVkPB}jY-Rc2AYHBr24^659T%2INSR6 zsBrUAqI;UsYft-EVo{z>APZ>^3A`tQ>iJQ9@G^t*B8YY%XVAAdeiC{x<}gZwjF$r@ zRLl(NN7BIZRfo#X^}W?>wva!pTr+=L0%=NK}=ITjr4xb6`9E>!{8ff=|0 zB!RBDd|a!pG(D14rea1w4pkZ4rJb0381;v&a=zstUl>G-3;V87Vy_@B(el3FW@Z&l z)^yk8IKN+0WOC@R#Y8W4VpXkaFYaJHMbo2ctivwqtq!z!9UVNZHeU*Br$}^A>(Znj zOm=*7ywmJu9QGk52q6aSLB27}q4&kE-(iSd7nv9zSO*c{qefzb?*N-<~*Tz2n7NxE=&*eFM{^Y~d)e_kk!JfP!shMK~$J>~=L9PD(8ZN{mo z)*GmDkKjoWp6@NLojWBrSbG*HTK-0CgS6(7duP4z_=PI5pVAPyjNr;J2QV+(gsk~q z_Uw9!c?Q|z!l1Qzt`BX%I^~^bT$-Qi9t-YJNAya)qp{@Ge5>WNA3le)vRvHSMdKU#gkS$pkFe<>sQmoj`5(Wq`$ym1hyr(Imh*PbgU%*5ZO!Mv zjse1{g%MTA9BI35Ey{s9OnQCJzTI}a67xc(P6NK?={g{Y>1R1Fhe2A|;D{5kG_$iO z_M~r|b%OI*6zVJGt2aTZini}{T)od*7lnQ<;rwze%bhNI(w>kB7aaiq>i($lfGbZ9Nij9(!GOvwF)rT|*8~p|yEpPTCVExcOrG!o{%7UEMu+c=U|R z5T)!FoZDjNwNcCKc0W?*Cqnrnc$M6K!6}@CrE>B5WwvS@Ws-5~PWM5Jklb+>w~eML zb*57=bkF;N@f3Am?h`0_UejgzhHtW7tJ?p)In>t zE*2BUR$b2`voDga&fbsigQ_nQz7m7BcZr^7M}Ga|Z!|A@SIYL%)Xymv4`qT)s6G?( z((N>};P?0=j02!(LkAHCw6RkhA>4aj;9`W9^3gLh<2@!}?cokbhkd?5ShI2cr;k5u zuD|sL^~tgEZ0&jF+(vpUBpl{87l<*(^-pIICeYJ29J?{#>^M(pQ_v5E!+_L#m)^5m zZ@%1n>kc39`}ni{=(vVzldU_F-+kQe#aP;VgBtzu);eMNq4U{^FJAdqSH9>9@-c9s zkMlD_7I$@Oz&!wH8o;X@pxk!Nl&&Iop0${It6pjNI#~kA|83(qf99l&9I9jJ(ZoVOhThtUA!507iiXgBTBl5nvc#J8y5P-c6!6Sz_8;>z{o0zgR!vpv?HPz*1iI zr1aM#$W_;Z>9qE${#@+E+57K&yJHv~NL29igvYr05>>o+9G4?!@ny-Aliam-J+(;cg1@D1ip!I(p^`d4-@BN2B{Y8}Q#xt5;vuK6 zzkS-MZhSNy^l`!$Qq~c1{T7WEIQXc)(|2#=Znp-Ex2=>x(e`x-bh_@3kFAvHzF!{? zE$?KM7jWN)18^hc^|nA!?p2Iz%yC$-Ap#iYhqg9-)au43klNS!W%4h6)_Hw_W9Hj*!;7tgV!?F2pwqIxmmFv&3PxA295t z0`={y-15VTg1I%>Bi}Wjuve}v@8nknXD|XE&?D-H3L1mF;$e=^9jdN5i~dra=S1oM`|g{ zJ3yVmky-FnqS7LKT(!^j!-BW&m}=CHF<&Ck=v8P7l|kAebZcQMeTpdlp46R~qAmUk zumz;N$|;{3clf<8pGuX6FhUx%CRKdt&}yXy%AUkjnkzQ&aLzUpC=QeU7T(m8b&nie zId;8^sHqMsB@69AXbi~a48;z(Q9mqC=-keLqEuH1njw#^!zgX0EklBe%nBXmDDJ$Ebo|uuSI^y)ub2L>@3=EHyI?_ZkK)0sa{{h(E)W4=@v2ICy5dtarTNDgT^mp zjs|DG11FZy_K#n=G%Z~S(Y^qC2(`8bk8?*X5X2Qv#M6tM%+{f5y9RkEE2LOWrvQ*h zM(aP}DSVcX_{G*(LBC>(9|iP$WsJ{iQEps%Lq!{Sdzy$M&q6wEW)O9u$4X$Cr& z2#H>&VklKOGK-&KMKkTYA`K4rg7K9;KcK5RWw$6-Mg5rH+3MEb|NX!C%=w95y=XL~ z(akfcRU=Gg3icMU07@{0VfZ4Psh@uu6&+2!Cp!%91B3RPf@%#kVkYQ)x3cB1LX}gg z`obZ&A7=-IAC_dfiH~-^ZAL9lkk$9Fvlk;~+kPp67hTGj<}1bpC+^B`kCJu!M;~)s z`rrTH^NJB{CFW9~4S;t5!X9HZQ0G04Q*&QO64Ss51G58655Q4-BwCN&(_SSr`jRKWV)=Vmg$|TVbWU0wC?1?%C72|5`{dAKy)4TMsJxr?${xX?*$@1Fpwauw|a{c7QJD;_4tS5a`y6ds$ zjScA%QhAh@3(jyBvig0#&J8_x^sxls+iAjGwT4jGIzKEPS}73C#c*PEEo;csGmML9 zJq>cc-MSSh-W}qOUo!6Ji{nnh_tepkvdyE~ES^l~$|A({D>3(tWWEO9nh43K5e;?C zPh4g^;a{JAoxJAFG4L%nGE^awAO>J`21Yi>?;f;{W~G_dvy-Y2s+I2}>QS{ywuL!y zzIL{{bLsB+;%Dp5XO3B3_&)p4Bb48%VQa(N_B+w%pIh6mRV?WEt9-lp;K}Dv=+D3D z-P!7y{!|Rrb-^fG4;NKt<7v+t^Gjz)|;K773%w|1@dod4N z-g=5^4-TU*`9!R7^7aTYl?NOityDfT&(WPd8$^nv3MkE;(z!kqE5|%ZKg_nyPQ0`F z=e=M4AMCg3BP({Jzl+p9(hH8f{H*iGeqr_?Rdi>@AKks5aGmxc`X}GmTKv@5$-VVA z;NbvDZ2ate*rjFTjYg~=tShP`V6-*pspL)NyiKAsd{q% zv#Bdb!EXHgmHWL-PL!8^HQ4;t*+YK%ee1(lB~~39oyhxvhfEc191j%#(e{LR@vF2k zGr9Y{wOWB`_M6rI2dtMLxib`14p%ihJj>(gMM7@}XD5SIbR@GcQWRweLQIJp`b&0n z3=H<^uGvLUHnk2v3}2T#YXmtDpY|?$(~cq1)>16GrvPc+3%Qn|l&jp{p|osKY5P5F z$J&xyvlS%hz@`VZH0ikdO0wy=D(3dX-Y9d}t7bci!ZAi%+VU8Y+wLQI3t0)QqNPN0 z8Z_*-I?GPb?G|*b?!JDRLeNHw3m_#aeIcyzy*eG6sZ}a|p3|PBPNt&%(V54^?{rZ9 ze!c1i0v3t&VFqRcR%JWj3+9~g#DWLAy?byrg(sS1$iX%%!>SIo2=|rk>=Ui*U24%4 z=xpyaWz1GjCM}ZZ@UXYKug_j28^@4LQYPQ?6bboaYxSAH_?t5^Z~NqO$h8_8HQFItq4AU4D zvMWN8>Xto~t_5NB;SnEdY;mO0viwvCUmBeNKd2S8`XV+r z+P$|G%qG~-fw}Pz&oIM;ceo=t9kF9alwlr1hToNAG=t}Jh6fmRa0p?b5@wt|e_-L% z>|8O5du3&0jCH1;fWdWtwCw_??#5!fe+&$gU`T$zf>C=BXtm)WF|8rAuMF8&P4uDa zy4WX&NQGT;bn{4R)U7%`cf;OZyCs6h=-6>L?^4s)MO3bpMl^lSsP5tJ79EH!3<_@% zxIvE4bg+Y{2dTXVvKJtRsVM;U3Ohf(LaGq__W z!4sh=9-m!Izj8IA<^?v>hW0U%MmolsOZRl^f-Z~`-V>_l80k#bZJMWXZDK3k;rxeQ zlojkY9?r}_8K|WmU|^BBy#slsj$k=NhWSF#8ywN2wvd5GdoY=t^bUKn4oTgJJgsWQ z#zOL_RBo3WP?GI-03HDwjp<3M^V1SHnYPf5louwW=(<2e0+DDCSw9J0LNUHtU6|_8 z$Dr-=JZ5%MJvRbH)G<(c01M$+l%^^0^udolR> zTYpnXpliF`kkB~w_n~s8%e-23uUp^7*QOysbB<%t=ZU+u*mCmbZ+;1co+CSY46xv2 zU(*uMeAz^!9<&J>2r_pl0SGiG+Q2|!2u^%whez#e_pbs6cbW}tEd$W5gQ@`z65c@N z0C+){9%YZzGj#s>^k_R=9rR6U0Dud)h7N~8AwVH@Fyh4_Fr>KFJ_{#-Jx8@$kX=?E z;P#CWVVVFzK#jBfZn$0nLlk^->zl^Ar;AghRDeEl!y`}LnwXw=z_;(`j8~rg6*jgk zanVfJe-{MGcfL+JnR6rYf30$GU^x}tiFz*H-u;U{PE4Mvq+PMdD+{z*qA2ZHZTLaL z8*H_EnPDkEzM#j#SMH_v$0kb>H-v1b7@qNs|W@oa7G$6@Tujx4^UhISp@V3!_RN6ygYS6+Yd z@!;$i9J5^~Dl>y=Z(6G5tH~=)>-d>`zeQFOeB^~!e(BEJtAacJThq^@&tuaCXE#~n z)v*q9_ut_yu_Q6x zM0NgFHtM~q$detf2VzmY+$&MT!Qix0^U3$u96A#44iz`ojV%oC zAI&f!+0E=>AZ0Y7NOoi?hb`Mjy3e+%$?;aN6j)e-PWB<6+)P})Ofo}yupIDDp8D3q zvrtwI9h}(k6d6?{(|&Q%_&#b2-|aBlqL!{BPP=W9Mc|};m6#ki$Sv7E4>^-pjuas% zmZE(s?-5V*6ZMhPNcVdV2sEirodvDQ$PQi%^-F*1dL>%rj(K(a5cY4FA z)(&hTA=j%o6H9tIC8U;{H(wa8KA16daZtGD@u6oM_=R7TH|iIx)zQj%ee%>Hg}B}< za-hsaefxoG?7(}kR&j)$4a#~;bpcqpvLcmt5H}M#sF)tm&R%}bs#d#tLe{EYLCq~rKcffw zF{@D1SHd!A>6^TN+TR}|6Yp2c?gbm;oH0Nuf~+YEwonl$3EfO#}L#@it`EY}d8yhKnBJp>5Uona;hGp~p3ZTb=>PPrI+9%(e_)^Q1-u5+GIj`Dz!$C^I1^Vh<3 zGc0umLmxJySNjf&I7wA*?aA%n{9*7EZL2Iew=8ZQ4=o8e04pL)gy(70$Rkl`RJMT` z1x(!^^pFwX=<#U$O<1E-+lhhGHRy2=j)ka8#zu1H}Zlh>zP;sMly zagHgdaS-BO6pTkcQ^Apt;3KMS4jTjy~%z`2=%P1_^MLjb8@ycF4SHKg+wY}cE=MRw_P z=h^-i;X1|imrJ_Oa&JXsyfjSWJxsbdxGAtDy;rrUX&b6g#7$}SCxph{PF7#@#H#lN z+Zobwo9`UD*njwB-f?5D@|x@5gU4RX<=Eo#>bnS@ZMEw~w#l3!ir1UyFGXkFG05Oh z;W^~rf9_Wn#mSD74=7!p2^)~ojE85@c>r{9)CJQl=vDjrqBo2Jvj^T02hGC%@;8at zt>Ni~@BQF^`=8%A1~o4gI=X5mpOMz6ZZnCIJiwBMMM=Xt$k?Eyptx$2GwI%89?Sxe zC*v$$)uZkqWrO3PBM{5-Fl5`d0ssyGZ0i8R0I2kNSaIr2;Q6m_<=IDHiOx(qit3Hm zhmi}v41kAjg#KNn5&!A`dE<9}aoMxf9!b0lMhDW<&bMEkyt#E(avN`hmwtzDxv|{; zdg~GL1z#rTfWvz5#`E)|_(-hah6bVHaR2FB7k>Tc6`1v?7|f3Z9C14w0BtHDO60(h z1e6PWckQj6eg0D+3gh^%SIRzUx+@-?bPN5*{iO@jq@OSC*2pntDnSI8EM-;7Xrz%V zt{!(nEF^NZ2>GL1^cQ0*ho}xZUnjJB_U+VXzo&I0m{8%t|=4|}5QXw|C zYxRrqVYz5R(<>;yF#f4l?CgSC8i?%QB8>~*{OQ;Hby0V`aeTV^$zO<{b<=3<@fmpG z?sb$I!+9i$@CBE7J4bj24?ElRna{A?Ra6Q%<$&%H`bPBgm&R%eI#0d$ zuJy;wJNOqr{hODfdIRHn?vYcI&TFp}d~c6eFSKKQ6Sy$c-;UA2`l%wps=R%7BOnHw z$Zcq&wSflBKHTz~Oaj6-5j{pTRtID&@P#k_f)%MpT;~V68@S&eHG*z62s==}Q+4+I z>Mca(5{RqgNo~T9JwH??=8HdmXvt!g8KDmq8j<%m(55QPA0o?Q)kmZb!4+qX>g8SZ|~*FB)d7Oi}qMux*9O*nUTGPkbHQ}hY(VH!VQ;rFv;&oDze zO3^}QFweW=*;}DoO+DmJp^GdoYgOfFYDDny=i1E!M$vknK2+QpQVrtvGBmSuyDE)0 zJ#Xoo6bG2@_yFTYt}TCJ#`BZ56ALzdrDWy}fVle6Hd!`v$!l61;b{+DN zHzjsT9T4fqVhUJVK=udAAsq29XT|f|S8C%9wR)R$GX88@J*^3%D#Ij=NNIxK>2$k& zQR}(3$l+B`hmG-GK){lMbb84jK)@u#6^*gLyr6x6G%KUF!7=6fgK@!=4_dC{`H6E! zRh1r}o3lEK!!sINNE!ieZKJLi(Uc7I38o?q!Ww0n=qLn^nx_!qob-Fqa=$Zz8%8I> z30}XC?ZVX*R zL_!>L4RahgvPeoUJS=c-4fS1ggllHUjts@azF=|AklTdb$#E8jXG58>aumiF}q>7&dAK9I-9CYcha64RkvyKQ6m-6UnQ*JR}A@YRDX{ zwQ#UBEp!jUcu#6&;&EF%;oy)1Cg&6U+p8nLhI`|o5-h23+hgCFu#YzLip!UDdxm0; z=pXQztP=`#B$usxaSZGqSSA_%6OF)hQ6L z6RkSE6tH@+igaXE@QD?GXnglUM2=|#uLF1s!8I?LsUq=5%DB@ zE_rx@rC~vYW9$p%!7gH=&WZN$Y>~7ik9g2P2rr_RN%m&{dOlroqjqJ~f&HD~USJ*% zG~(@m7ayE>hQ@wiY;o9fc%=TXjXp(uzBe+H6W3~mUWhxEeGWAnuC14DLcuzUn2xh4 z;-V~VR?p_0DJ>zm&qz7WHa1$;SO&NTqsj=ATrTs_pl2RenoXP-n*j?BN9VBtO(kOD zS!FHPDRd8BWxb{A$=Ien1cH}JZrcYF=FG=GGyM1`K4wERQ0qQP{&p`a`04>M3ipBG z22F+oke&phQB8ZGvt3^{C~QCh<;i`A;*0cbrVxNOsOvq@aHrH7;+piR)%fGguw%YD*5_2_^7E!r$g&S=*$^28|o`t5Vk*s$~Wg}49PHyZgU5qtUX zXd%zXeTwpm; z9yZF6qj)9ov_3Yzs6wx~X73l;k55lNcjFf&8v0nD1{B@1y%oCmzx|Vc(YomT4Z<_5 zm$Pfk(UtLTx0(CfcWe7c>0f_|-%;##Gny(pDIov#af=SBv0+< zZ~V?!pE+H2T=Mz3noIn|Qd7Oh@6&U=PN8`)5noJx@iQ<%ynqaznsDWrejxlz%`(M< z!pjq>qjyhbO1SwCe&xxtttZ00^A-#vGg`YVtk=v??Ec}0{s<917VlM$h-f5zTHCyO z>mR>y<0t>E8hQhD5t7GEl~w22H~yrC-vH`^008dz6cH5mzVD*_InomVi8tKMy;vyB zyBO9nx;&d0o=_&`34@>z(I(w)UMN1=HJ-Nq(upwt;**rto z?UniP28pjBo5Ag3N$N<0lcpbL2H2|3kMX{RI^hZg%3(6yadW}$m>AfU-8X&c(+5wd zG8ap%6$>@Q?x=G6a8oS4zW9s3@XtP0o@S;ke!Zc3;URQFeTcl1hJ(f)vH?|H3F3IL z#mZEy=_cLU7!$J%*U8~&-D)FjdY#&zqakf5m9BvxXEAa(y zJRbVm2{manzWk3rCg&=DcKhGe-$R@ezW?Lz|DVsj9(iWC_rLy@?Y)muU+V8X+F|SX z>2zo?+zaQN%`?9&SLl~$mGiW<=p+z!oA)j(jx~>=ekb9)(3#2J^mZVJF;#u}^x(w)P%m6XURkbtgvXvk8f%(%47!-mEs)C-0Oy z{m{;Qg)K?FX~k0+&avLJI)(bOhSCl@3Cctv$%#zAoEZH0ZsB3;v>a6EZ9jvJx^aH2 zk3{rIEOE_Do0RN*?N(>!*y+eQIzf&xJ#OdINALvdZrDGA6*X7{s(SxY^0ae z?xftk(`DwEMlSYd9Rkh=lp|v4`1Ti<@ zdS*)W9|zDtr^YP#%cDUu_wpfQoO6~qQ-t8CA*4d{_&I|=k?-~kQHb9yxKY11g4!xB zc}Hr4XqxToX!=0jb=XPZ&Q7G9maoh7j@lH^EM7C23B;C*EX>eRv8|HxG5T4hu57>j zD=#@tXYZc=h=&r^{3GuMNsi1qsUa8G3me`XV*nUagraX{QM#ii7ds}~RUU>xypQ;y zgUYm{x1CrHDdk%Z=m)YptmG{%qjW>a@)K<<<8e0xui~KM$k$D? z*)psmXSI=ge5mGb(czq9qF&iB&%3#8_EFnBwFm?%B3Y=n2t~ZHQWi={ZLHwd!~MIpnX z)I@@DRjLky&yVe|_v}eLvomVyBQLUSySIFJsM)5vW^pmTvdvfML>CcQ#&G%T%&xO? zUiEAz?aaV!Dn}X|tTXH_p6|eeT-sE2CIX>ihY80k1&IPM9Ug|*F`hdDDroW`=mkCE ze28jNin(jXfYA-0WQS>|YlN+OLNmwD2F~Cs-0S1N=x#JA`m!}qVkFPSxtFc2qx;Ni z7@@qbS0~^2TWRa%lH`w&hk1>d?8NMk_toaKP};IX8P;@9cXBM_MsB=Mt-h|(tzkM` z+0c)1VKEAE1K*gzNgob{^AyZLY?LgIjLgByqsU#jUTzz{^mI~L5@*+f_DV`cTpo}E zLUflM&1-2J@56ckV8;NY`@M#jFnCmVmR-D}Gg=9DaY8Hjum1Qef3opoP*woI(u7`d zu}%RJWuJ@DaM5dJdyPDnq`P$zHh>N=*Ho_s`N#TAxsBDYjDaV$rBs4%9W;r64E^TP z-;0F9$G-5($)WQfQS4xfZ%E6_&k%MJCJm>>@*xZRcKap$EH~a_s)6Bt} z+;~ffjxMk7ZHQN+w!kln&zretk67T!2AzJXKYjT%D;Qx0{=fd4ccVeXsgMlU_|&Ja zt4YK6#_#>n*M8)?d-wIve7d7cqo=2<-iKNfBm?~HIIJyAhtD!IXEqtrhyPhcZBBne);i9r5tdwqBIRD z%tZ8Vsts2s0(`DN=R|wV81mfu5B_p4kcz?NNlTu1IYLVz)=JcwynNhV`U2b8$+%xl zvQ|%FsxJQ*!r7snH}l658;`IbgV~eN=AC!``tAQ8TeW}fPkyzBGQrIH+P?w)`o{9- z{+@cjD0zuNsb4$m!Z2-CHyzsf;$m)c$D*eFc53?C;+PT`TD{QnyjO4T^b#|Eyf@Uc z-FW$6Cp$;(DBJJ-^jkIBp&VWP+~0fU=ZE_HJ!@~U*7pq?&5i5;AEGwb0A#>d8LN-K zpFaxEI1mwgCFyBk7bvzfwi5Q(2_rsZx5bPXJkI}@J=e-}Gm*nT{a!&CoQ z|L~uFK4uAY{9E7uL-qiB@sECQ-Pt^nzfw4U z){Bgr`#o=TbcMWjhd+yt34V}C7h-a}ItqUB@6Riqn=c#Bo^j3-r};*ErY`fA-F5`x z&8Hj&Aphi&ci0f#`Om+fQ*0}4VEMVer^r^Uq8FVx?6o6mWIMK(=p?3m{nZwRAUZ~d z60*0W4|M?4sED);m)t#}W*OgzwTm6%5O0#*$wb72F9%9TM#yE^Fk19pr3$-f zv>`l#^WG1^{tHI(0pm;tlZ6l}ZLz!5%->OqLQx2O3{kKj{ zmD5%-(_7JcD!->SqY`>4+Ck{4BbRQyA4RV1$WJ=z>}x^Ta8~Q5ajDQ9n$;@PBDCdO zgVJNWF3=Ik6Y?tSir<&bnw2MLVtIL(p}hBsFw!a_zCbKVipt5zx$sMGEakuZ!9#`K zc?9SLA$2w){$;G*Ad@14iv5Azb572QCx`8E-#H}&n3hhSSygY7f;<%>>V@P;W1|%F zOdGK^=l11eUnLW7X(oXWW14>;ZPAp?sZJjDO=EbyPDUWU5Rq;QA{=HOo~n{RJqg!a zDm2SxDq+E_oHc@aLQLr$85s%9NaDO9YC*u+4P+mK=g(n#+n8fm60_XJ9ync?7%xeZ z$zeF6Q?ww`m9Y6{G>&75UhUl-#3zFbg1vsZFcG6atR$8P zOD*ua;w#(>SvK#>Pog?P3fY$@6T)6gwQrwJjP^zU`SicZ;5sYJLezt?a+ za2-V4$%fbjgwuDjWG(K$K8Xf!czVXE6|BJ6DUKyQVrWUouR{TyIJ zYUM{4VjSZkmsb{d-_6a9huU#p zaFjfE13SChO+umQOS4F5`}#>{)ylNUhE;A~THwONd2cd&wn9>J!R5Xf=jr|E{uOo% z0FT2tBz=8%CWI-WtpUiJPgeiUHvIq8eK=fAhj*AsHNj5$^?!HW2F07V2`eNX3uCz}ZP z>mO{s`_peJM?=AroE;o@SeGbp%OJk)sd`Zl;x?1Ld9qgoU^V5lH&-mVbN&-#vThm&bjtJ6(K4k$-qGi5ZySY(Ngxu#N5 zFK1#i^A`iIY61P&a}yKcOU%!uzyI!f_*>r@Tby!y`sL$c_vpdb-+iwbFPAia_Q7K7 zweZ!;`UW> z1{Zd;AFI!54gR%>i{rmPv#c+DZ}l5y5FMJt!}Ygcd~IP^%2&}+_YX^&v*kl)WW_W0 zOIOT`Q=y+=n6xu-{^QB(F;;{=ynFoZzxo5%o}@#q>9Oa&{N^u}*rz}E_aEh2c^^c! z^6$e}7H@ix$_Infq{9osLg`tZ$hXj?4A0LsuDKBjCRF-$w@PPSwx@u=G{Tj6;6qyi2HyNlONfm?Km;!g1C_4jAL-~XXT z*9IP9)=$gThF~&@rvJA4*!=A0&s|2Q4#~RI8)GkyA*En1`y&SY{0JSj{f)qhMUl9} zY0F4wit-;1y>8;r7}6{kT18F<938BGXElF;lbAun2stV!@ZwZu^Ym7bWnT3Qxp5=5D^+++0{Pax!uD{FlNJ)> zohJ1eYX80aq^;f<8_VzXB){uSA|uXQ0d&fL@u|}c3M~*#hCMBopZ6LiDLLLiW-qT; zz1=jZJFANtDm6VY&||^R>Rm)CcnHJlJZZv96I1E(8+06DMIU4z2CSJ?Z?wlS!7! zWwhfq$%w>IZpnt7vOA4hPlU?sa6$B~;mS4EUFZ)zsKcur-$}eM6Mr%D_(4U{YQ9}( z-z*@`l}2qeJ6$j>W7(6%k;J~p`vzf0Y}_ny@|7a0Xr5q7E}OV3tn)jNVpigK=s{n( zgms((cGPQ_$qO#J1pzED^d2$N^knm>r=D|+?W+VM?~cUb={hiz9lqrYMtf#qzB`qn z@V(i{e2Am%l?u-!F?8bj!STcPgu`8_!=sGFb_+VOYMT=+UpPv*8^Z$y4-LdS@#5H- z;t1h={{(9^ji*EMQ!`RmqZ)*UmxI<7Bz2ms4xb``)Q_o$au-Z(J5Jn$!4jqsS6U|o zNnIhYbeVGy!ge*j zwA_I^d?9kVUwrCo(Yj62bJK)!G|HmB2kMCsVaXEVAA`FLq4XOmF`_$q^%H#sfYFpo zKS?*zt+YBAh?r~9#P3JCKsG_ZhBhRd38kFA7U0e+gPb$vYUfYnn3q0uNC`RCb7kl4 zyMh~ZH(2*{URH{rGWUC_PU2T*)+Klyt8Hn+n)@0BxYNvJ#teiMj5z8svki9379vJqG z(-)1<7C`zIh-0is8Ut4+UA@*u9?}AU31C@_9I{Nc=P1r5U4?_8Mfou?UV#zuK9UB|=`*B*w=BHYkqmOUKtSjkHUco-NS97f6t3igI ztj2}KUbVowZ7QOa6VWA0q|Tg!f);pc$|42{I_6Pks^r46FAuY4}BWC&y-!^~mlp9X4X$~8Q z_#=v%wxG3vfa_tas#B5j$xzLX-HK8w5gi?hLd5mL%IGnao`oBukm(eat$jIO-+cN9 z#czU5j36vqH#H0Z^(yc@2A%MDN^cH($(hi>6Ag?n0{#I&TtJ1x+7yI796B9RNyI=_ z2TnF3A&nEr*6oroN^!$SYp2|OjDw!_W~fiT@8w?WK9j*_U5H{ZENUazCk6dQsH< zfPyAdM88u2u5PX9jM($To*KX+SfV{-oQpk|>sq_Z6^%7(3v@1nB{dOv;p18IgGcN7 zl^16_o4au<@Y0*FT?kColi5H0^FKRz|EGWVm;ZzICh7#oAO5Qne|n^8SHkfT51%#& zJ}^%XgENjl?F5o&eMZ6BnR4i1^Q{xff)@hL%h%NLsb78V)|dY4y>Df>$jhHhSEZrc zslNB`fAWKR7i*81c^wl>k~ zHEo=ql0tF@WU~L{rq@39Pu+NM`d#vm$ry1c+B z{pT{srcK*xy;+Go@nOuL_U|;lo11@r>eBE1o$NDA^2=xs_8Ib#J-q$&{g*OX@!)Lu z#;w=Wp=Y&bWF(YRY?b4Br+K#6shsoyZxLv_-ot~;cnd%Jwb_>+lT;$q1=uX z!&}A=IuCE2Cch<)UWUm*G7)91Udhy^W<0P1+^8=1u>KIg_v7zuQ?EL|9DK!d{?%}# zjqka#o{<#14%u;x+G)6xXs^p1IHvap_m}qdPHlH*d&4*%MUq&sWA~5aI7OM|y)HY- ziPfc+6_h>iUGmJH6y*LS?uuQ#q6Vee&Qioj%P>6UVD;xwN99~v+phv2s}!Cd)~u0c z?dm7>_{d$i-oVlC9;(EX580)Q{o$Iv7NQBz8XqjSo|}smgp2I^HF4~ycdAS+{+##k zUHM{B-;qx*P2A!Jg1-wkorlh|1nTFx)6?~ny~oLM_enqC?G3k!$uj4cGex z!I+T8jtR$1W2^hR-}lsY$*nym&;k#6ATf|p_g80q-UBP>4vxXq{ZhC{5B&~EPjyMz z@Caa}y%*VZiA;K}^t`GF!r5v8BzXCHZ2m19d|DQtMu6)@R1J8JSi ztL=cG)a#3mU64!$VAfYv&a42@F&JCP2ErA&6pE=uLvE>4Xm^jc(F#f4tl^Cz#r7L_ z7q%kBOI4SY7->+#qzu@aOxG0p67uY$E{;$7<0pC@^f)-xdB3OopR6cBWz=rdek<9< z20;-IP2*Yv88F4A;1pk~AGu?WzKxrFBA$7KNFOudjk*oVr(vw$cZT{s%E!L;rE}4p z(8GFbwf_+93bX>(1HO#m@7X(gC(2KHN#Y=^$BMQ#HW=kCoC)%t5u-YS&SNX*Iq~Qq zIFUFx^-`+jR4X>a^hZm2@mUJJ%MyqSuOI|g~6g^*DjN%#ip5mI%a z*^J*6t<`oQW-@Ws>YD9q~uVVy(M4zEnTnkY1iJpBTW|w0iSM zTJ4kqHd+tU$EG@KkzH18jl!=c!DbB#@#?N<0-rSRb0JK-ULCVxmUL7?UhX)5brQ+Ua(cb_stCY1E9x_Cj;m)z=TM2E}jARB4!`(QR_4q6Fz z9C?wDo`5Q4dX!d`V&kQtkvKp~082!RbkEl%J<-@;FbpsKv%S{4q# z0Vi1J!B{C$H6gLta|QJw9(0&`6aZ$jw|RH$Ww!oMM(lJX*-%f!VR*@~ET6=dsXZ*s zM>&9UP@YJFa0y>eYgzJwIDpxi&L~&z>lOx<=2Kgxg9z%Lk%SFGxBSs;1e4#bjm|B| z8~a2|(ve`GJkuqsQH85dxTJwo>|!8N!$s4!$joIC`!JLb^)Ch6wcR%}S1z9Cj=y%k z(>+>C=g3pb@p3FAVx`8=tHloQm=HYOt3z%mC9S#u-k4hK8tzVQ zeacC@6W7dP72vdvROlk}j&XGPT#cQ0{%Y0NjThxdoDs`B_^=3#_flZYzO&VLjh2G1 z>?l29SU4p^ex^rn$6O12o+~+p+QHD<^G;``!F5Jmuc15ocelP( z_``=3$j$!R7gE=Woo{^T5ZGY+948#9vS0{`$MMUR?2jhQ+2#t>=^>;zY8~AjmD%a& zL2Yi#4}U=~ZAl8Gbq4OP0}1g>NieC}QtVBt>Tk{!gB8a}GId8OG#h&dLnfr{N^Q(T zTZ>92*`&&`{f{sIe{OyOoWYK-e)#pHtq0Go{G#GS+Ib@rtaj_x!nEfc8rS*Eh}0iS zKizq65T5QwC(}~an%K?khqd*e-SgaDyW^YgZl?OfcbNm_dk;UtGGIFV<}Uw1hV?+z9+$j67qx()J2-i}yp5|s zV6gZH!ov8F$*Y!Om`5d&qdh#ls`DAo#82+vEUsk(gWN7F? z$;Ln%(%LtF=Ra>&A1Q9?Q>kU+v*7L4_-}PT`abIEK_>KJJC1b@pvjlxArMl<1DxbJ zKzewQM}27x@kq5+02|;8WltKKkBYf3=uYi*Ok5rLvtM=%|8o4ySrYoezA#xd&R?^#o{8q7;9GY<7FT ztLL-rfcFc|Xd#Kl7eWAKg8CwIPR9#dKl!7*BkPCI<(FQ4;RY%QSlv@)g`he*Nb-?< zki@)|x4!3C3N;T0{a%mMi+p3+H!|vptu{{rT$rL>oA3bk8sa+A#T_90-7iv(?e#yo zn_9x|7UA)z>)hvm_qrtyULr#MW~)2}K`j*A+k8K$3(cB;;MnRO`9{6Hh_gLt3U>iA zzif+%g)cbB()_E}eDP!q;PtH|??SQ{gJtv)&%LlVymRy)w`1d0`peU?hTja_eX@x- zk7~8^L}zPF%mw?lj>g>3nq)!-?i?t=?77CNNX{JR`a!n?@d`=Zb?VKT#RUyHmHT*T z`8iPdnO}95quzynFh6a!)}F=5iR!5IB1(Kj@LA-d*SU{K$@cRK@+gKj`=f+GA?a~@ zu%Fbx{;(f}FbMcdup7cs;g{|Yk7wCpmFYC2m|k?jqCY~FdUj+YcUqYcgVp)7NpTKc z){vR2W9zERQ6II#Tx?I2r}`tI_oT9Pc{v6ZcO0{R+Stb0Z|n~2*+ovcf7%(R0uRmM zGR};MUI=#NypnU^h}t&ll#RnBA#!y0s65dRR;NxyIyl-Cs0h4Rpit76U1F>uadb|d zw`20Kk0VzUXO00}9i6>e+T0*tkRA6LL?EfimZJy93F+-MfExh!3jk?j7UwJBe6k5zq~!2VMDlXs^{V_m@a7AlpD4cRTJdP4 z+;m%I3%F8l^c3Sb*2r+AB+`j1J-DS;0wnv?(Y@+1ADwA4(nNRp@$hMwWg*RAN#cYs z8Pz5u7o`5yY;C^h?5NMb5w%Yp&vJJWC0#r!PR)4)f%eFfF>;k+g8&Ri1=24&S|q*J zFS?Svs}rPIrwkYbDw9yKgkkSWDKj&`h}iY8#)@)bX<8t2NNeNTxgdPlNxhEH{I$&C&&e>ics;O3n!SJ zg`9fvu(LdvSo05^axh5gB2?5n40+40=Ief#2v#iKp}Umjf}vp1uC0#{YdPp?RV%@v+I~qog_dD2`roTok$m3TKdh2ap7;kT6=^H%UeWa zoQm3&ijJn`0B1pLkbnfd4*3awL&CkL?Tw>dZXmbAc);niQNkhe$iU^GD~D|_9_TsC zuJ+|oe0>setW7X*30M`~VURtXNVo=|+Vw3=7TjPtON73a*IN5oIRO+8#Q93!6qcxLH%yTi^{jD!oM11>W} zc_%hIjLmqOkf7WS4BhQ(XhQq-$jc9yZ|AX^KOd%VP1fIoBSz&Rp0lLJT+|`-5p_E> zx^{2A8~j?`HRn=1R7N+2iq?+Xs&&iDtw*ARVy8T?pv=f6MHHPI4`rCzJ_5xp66q)& z@hoquqA2&<hMyCr-f@=zC*dG?Ay{i* z0Ie;Yk|m&!kYEca;JP3l2f!3%M8}Wh6DTsH7=yQeR`K2K%igV4Y;vdep!Mex6FQjs z1%FJ$x2l3~0jiUfcUu`8x7Z7>EGUj=c=H;EZMopkU9s~Ta`vN-v{p_JN>S~Wb0&87 z(pc|5y&uEmuI}*7YTh9k(2PXH+wrDKq;R7j+4HY*4JPnfy82Gx>BF_FSXTRlD-l`A ze(SE`plANc&wuiQyX$)4@BYbuMGoKnr@#D%_wU`UZQk?7m%K{^B(u9-`^zk|!p{D! ztNGxE-)`0J#`@1|%iS>SY4z2U*1&A{ee-_oNkz1VrxOdFGf@}=^XH6Mb#gYK>aGkn z96r3)P-)2ndrQG%`($L5P)As5^FarE794yAo!zVVCY0IEb;nX&eyj0VRcp(EXCC)d!*)2JmNJ%NP?g*u0<$ij{A?R@uim9aD1LgnCCAJ99(RIprEL1lM6Vs z>Ji4JtRv$)v3I9x$u<71Yw)@Eu_LV?-?ARq{r+?-`px>|naU0yc_~F-A(eNYzSoLn zCa3$FPfS#9ROQELWCHW&Lw9e9Z=%;4SU%udkl?y=HtOjIQEbp4-NmzGJvGb(6r7x7 za`k;}KA9d6WqqT+`fzn43mqHEm)&FK(Ys^*%y+uxOHI_{&Oh_p&VAfv>peD;nF~;f zBw4eRAOvH}%yen&ZTkml^B*ZdOz|4 z3pb%7cWVrrZJVIi)*Xqkg%WPxiVG>c`sSiu&~T5y@U(N9>5YPeO65(96)!va-JEZ- z-QLPd^6k1&Ci6F26W_h_fpMx0(%oraw0*E9H7p?oyRaqyK`S3pgNHSiHdGpELcIh)wbfmu1=zWi+P|;ZV`AkiN3h#mD@|p)(#v>(PehSJq(PJ zrRhRKi(@|46{OLls*l&|$UrI#;UqScVaEi67A@_@CG$QaT+t>S^~v2xHZlEd@p1Z0 z`$6GZ>pkx!gDuUFUd@fTdZRYvbI=~iEsq8*kK&u}AteF|?8)^R-&&GvH^NrGnaSD`96zfMJ|mt|jP_t+ zM&SAh&OK_Kh`yA<$w-2S);<2fSuUU~UKl^G%@0KJU_;DB%dSeW9^aVcCQgnadivxX zl@S|af$A+pf~PH?G)y{^M+|p1!L&BO+?fNKxyr`p7?GZIg z9dmmDcFtWXm@cE#@Iqm?px_Zs?zm|PDF<}B?j@ba2# z)G&rsTymgE!(&$1!Z{b3Wt z3b#wd#eC;v90}y8+=OGcebPt+#GbYqsuV5;hW2xO`rVA*yIIX$9#2?Dpg1_u0l`Hl zXYg7(7{Rt`3aW-#*@0ZN(328v_{cz~0tfsJvM2BMFHGudC$96f#!Z7TA1k1vDapU? zQhBa0#a^TAO{6Wg9r2)~bxAwnX^QO&1FQ%e>8doeabr9cIxJdb?-`nS&a5=Z=~HLH zBYGXFuDCv}0Np!v#K=jTFRNCZ3RL+#Sh{fO9kT=pYFtj&U@0b_bXS~{XSQgK@%C1~ z5~af|RWGQQJ+qzQBNTkn{fT%sCSLoV`nMG@tliu5*TFDFuqIn&i9jvc@m3c*`o2h9 z549^!ooS{kOC-0MON5C^-5r$uOj15$Mqm)UNFpajb0|<$<|t$VO?8R_>tMP5ykqi? zu#^1_(ETBH#$QOf>qy&hu{EikJ-Kw2dX_5PC|x}ALT0!_G6uP-^}>D$(o%I)n>p-u z-7dfGt&JlAtd170q@#I4o)cRC=(Cq@UH<%z*V4^Mzt#^v?Ed8O!Mj}BTrF2a3LH$D zCyfvt@yrhU1G|T^2y9RJ5Hr>o9IUiPhte{C-txeC-8_oFH+}n)3-PJ(|AW~5A=62` z-O739eF3QFsm|21PR56GYCC6gezbQfWs6EwL-K4S&`yITQAaao+c;zkbP!@ z3;=-J27(X7;ud@cgM7m-P{W;X{a~e7>$uQ9H1@`lIT|j%wfnRAge!E;_x8QNF82ty zhI|s+kV*E4A>lgn}zxS2@`-7*`9jMf4 zqnFtjjtJ{||NPlh_}8`JA#&o0P)GZRO;sNhuvk{+hc^59Pu<|g`i&1ih}VA}gNJ6i|tloNxe=d9I4(0|OII?Zg(>zNC# zb_VM1iLlU^#gg4z6bh&dPWs8&v&N%AKO79!3n)6*w@+Ge0+I%HIr^)KH-6>zXP3eW z22Q`*Y;HVwfQHOh3!kk`?5ut8aASD%*4FVazBv8ySHA?qfG9?4?xqSK6i5wP)#lEn z_jrDN=R<$3c8J^X-;BffcervGwVjY-Z83Op6-8YY(w?eKO#IzTyzkrV-;d9aF8ta* z3z}X_uMm@gD?;yBnr%nw$zA|t#b{~~*!3iRL7n-@()w@p%-b!%7Q9!#aJ@U=N#>MS z(~#qR&Adb(>=wZj^*!RTcGA$H_G$}@>l?@OH*d8aN5>oJ;wO=v2U5t%Ea%w^<5R?W zw#xdiO{jYXUMCt}-Oew^E<~nsBX9|uP%fUrT|IAG!dns=IyTPaylZz|DyJTp6@kR$ zhWgAkHinzJ~hlh87C6gkje8!m+;;keoqxfTcp7 z<*Rc_yDCv8?-N&_0x!RZtd81@BccqE^gI(Diz1Z@=LjTE8&;HIBUlm|tHuFtHT?Yg zcMg8`_S@#z$cIk8F1m7FcZ+TU2P*bT?s|%#B$(7ik~ll-PKM1uDybVj#;y*Ipdbcj z!q|^6%^MVqwuc}mrNZ~zv)6m~^W$l|?F#{|W9;>VuXuGTWfpJ=SYC^Hd$1nqv`17z zE{g^bZaGPO^7SCy14v7@BKF)x9NHulA_uX7mW@*_o$ki$(bf*<3A>m+)Vp#)YvwVi z%qVQJ$rE0wd**e!tvZUuN7F96Vvk8iea724lpXAGgd*xAnm$A6Wa($8EtYpEjEE4l zZD%eK`wbEB+*11-G`&6sPoFk*R3A z&63x0>?OvW?K~Xdllw0_&<{?{QIrR(fs4^j(YV}EFxqWSRQ9w0idj_7+X>_x{V9qv zWdxja)o;5;$sI46>BIZp-b57Y4I)9lI|%e(jI_z@g`Mr&i;*xQ4?t89fLjw$v&lKk z8@wUL=*s>CB#+AD@RgC?b!z=Sf+z9e2@)J5O7Je-CMm+C6Bm5x$?RH55%=yFyY!sf zY&g1pbI$4I*UV6|e-sK1{IKFdsk7^z%oz#S87f`RwcLS`8x2|;gFJO(#c{TQ zbFVvxKhC*zBYkmJ=^oTlKy7+&4jHo=$%nU@V7S|zk_7$x^y1Jj4`tlpp^u5-q_VxU#v+NB~)@U#a8=%RIWWNE2bpPo5K;^_1Dhb>z z55Hr8#6qC+s9ecTowK^N5fRCD$!g*NNFlMI;#7pQRLp(4f>{T{1RU4A?RpZQ>Gkxk zRc0xf#?4SkX<`FX2YwK&W=&GfItjzB)T_}XrQ6e1XBTcPlbXK*TK+ec<~rpP zHmB{yc=R97|LXWZy^$G(j_#b&cr5A_+(dZadGeiqx4D1sCuS&A>O6?s)0~6v8m^0{ zjqU}0-*%=1QTAooW)`}(1)oxqviThSVk0&BU`-~-xyvv0J^oZ6gLnS+_&bNnIkf>R zEb+-$_alc#c~lO#5+!RfJ?ab-Pty>ldCmn7yMmk9TY-Yw;PNW?^9=ZSeS(YZ%KB{Q z!_7+`wofh2g-WO6!|GtnkZ$`de^OS<^(o}sJt;_TaX9rZFJf|bUS^NSc7v92+ysE| z1B=TJsYp_2D8N8L*#u1g1suKIcct)ZiA8A4#DoP~#CdiAh$^t5!O<2gn97cMe>Y03VhHfZ( zwvjfXd?QZ>iT*$(F9McksbYQWeR>l+dhX(czF?S5w}+Q%zGl$joG(V+&fV|2yw|Ty z9qi`?cJJrjm3|@<=*YrS` zRNq+|CeP>|S}XY4-mhG{_=0jWH9mBzcR{2KzXz z4aSVCo8i@rzgr;^ZajS*HmUDJmYeGuYWgK%3`_tk1h`1 z%~C2#ft_{b74L)12Ul-sO~35@NuY>`F`YI2@yM;zr0(wI_J8m}X#q`v zZ~mKK99DxdT<*by5hxuXv$k`$^B4G2`XhGsTGrv~?-uZ+t#fPbQ;gKdeQ0O+0jN(@ zoJaBi3(pP6U;shKC$l&8vHJL14nh*{BE4!s>izM5_?K4OH44#%old>Es<=82S4%-O zB-5bJIbGe(N0kRzuU$N>!klH>z?5lV%=w^E&(RDRuF&Pt@q;irEl)mn(Linl4-GUc zOj3JbcA9f7>Dj^?~MgxzTJ`voNgFLwedtc6O;Pc4o!C`z(1j zuR+X8BF#@)Gvaj2ak|^xyQBMF*nIXd_dCD$8;F+d?r5%9P8oO#qk>$*C=}`S)wpqu z3s+a%plS}2Ot7jqohuIRIFT1RuU&PEjixa)>-5h46Pjt2s#vCw9{@FT{l<^`A7-qX zro$Jr2IX!Ca~YGfG3g_NTPVzI2Oc|JW6>y^VTdvZJNxYx96xbQPG;VxH+^nKEjXd+ zUf7OtC@fnux$qVQ*v>w^on7vZ>OZmXEsIQD(g8Z3UeR7ZYf>IIb(Kj zEUJ~B+{Ban#B@#GI8QoC5vyGtP5{yhLc@V>u>DX2cI;Y7a@K|Cj@L*77A4A=n12)%gvVO%Wm?9VL6ZtU3y7~Btvw%fm zsj}pv;433-XKZf_rD6AEC0FJlrWduCL63y`$<_qyg@PYnQ_dEao)24hgI#Dy{7Hf%e+>NMGU0~%D%J< zv72!2x%%^EuoJmDq#mEUOBek~EMb(8O6H+s?&mJ}y1jh!>_K-h-@Vm&O~K1gt@954E!zO42BT>R_55g*$v-An3zpuLL z-XZ7(eBtTg*hfZ*xgj`O;@lbjQP>3>y9>2s1^01dwZs67%NI5rZ>KsF?I4JEMrRx% zdi|PyYLiN8cn+hO+*mEDpM+ZROyfuck&^q`iiR-|qs zHaD!Zqu?+kHMlu{WFsd8k{!6UEGIhM90BIEDVo4nyG}6vaArMK8cUDIhRj5z%%PnP zJy?jQ=f3zxeDl~2tV{ccrCk$crzuBQ>7r)}#LxquQxT;M_G;GQ$0+G zw6UrqG46TX=a=KHJRGR3y}^To1I&};O~XC zG&-%pqxasce|-63LHZthHXY4QEIuEu?j9+H#$!?AyYHRWCVP$dYu=z-X6XTx9^)&1 z#_q??*N>{x1ma;Mxmj)StQlHJcnTXM*cGasag!;3XrfiCI2`s!#^{0}pt^7a4OXm% z#`JX~&h{4io;7sKg-t-J#Xqo#e4XgzTu&naHTb6MIc z6cb02M7BbnK*tzmIFVYwbd^U^`#58$dly0KI9-B^6ZN=E3bB#?G#bc2qk>>~jNyO* zRNEaPvl_8c)iXdUU}1n`DU5cxip{1^r0S5Z?Sk(7K-tV^*eN;Q8k%s66lCOo6pxR@ z?_TON!FKJZ^VApJD;I89AAQg(JnGdaSURswbY}i&@9<)1;L<}8B6R)bJC6(h zG-|XEj*X=p-ESdQ>i^GyvCoeArU?qKd(ceotuLdR~ z(}Ou#2V$x3@D_h2oTF;Tv5x)KZ~pKg_I>o7(VEB6@GN}sGt~SC&Bn~u?t5ns=I6$& z=1bXmA(i$ZTVCJ!^GB7_Pa)Q$jFCA9+x_GzO!{xoQr92!=W|a@VR*8{CQTB2=joIC zU;ROJ(wckopH`3ijn2Ip*>=qKxN1UO1GQjvLI6|f^uu6-=Y0uaPGM8+NHGwO$WDmO zd0KJnLUP)xZcD*s*)|EtNwxD&uP;pOpZG2C)7FEN@B9SzMiZY{L485r5hnot{iD&X zeYv33I}UryH#Xkc-Mu+OEs(@p6RxP6=^qR)`+~Xg>z|t{|r3Q%E zE(}@JS*z-=u5|krf7X=)VDdOXD}PQ5e+1J#HzI6dL(w+8z=wp%WwY1xodwme^z>W zG``yW3D61i)DAEmfecbN2f`yqhcjvm+L#@7dYGZ)btiS}D2cdsC#0x2VM9_kRA<_R zM!6Asq^wyM#Xso|81S@k=0#+7!ZI*lFc|p6(iP?D&x(!Vnv!nmT`Vy&nQ?aCiv+qY z;r;?LQCZe~dL&*{t?^_K;*awvG%eXpyY>n;#!oh$^~V!&cyEU%$CbglI5Fup9*9j3 zEDUkI`+#4z?9&L68X>EfVhcmYJMxFU5*m#PJ!Lsu_y;@XF~alB^VsEIqlCJzd#1hH1^9i0OpRb3DEW z>Rlp(4=0N365-a*>G;IEap`;Dt9p=n>A zoxN&ae0Kb#AU;l((T7X(pjitmzE0=JoG=60ScmWVZIl5O6nC1?ysL${vQP82ix5qi z6(KL2AI`BnC)gYe0g5GL!sTy~2EQ?RfqMAR@0$U=!#Ex5hsI&}NTlca;12yX-WlnW zv3^q*9fNQ$=$loJI5>7{^{}3PDFhyxC{Plf$TKe*QCpr7dcast23{KYD=H7C2E3?- zpgFyM+;Ndkc|;A*`)1A|-p(`iyZ&+jf@v#cI8t2kg8-ulzEB8K8lzz>NZ0qzUk>ga zNu7cUSUV|DXc!XR5YC^Y^(NBkpV8H;*Xd_N+_R$#Q%OqU6kZ9=khueFGJw*xP!B7K z{LtxLkZ_eMH0&j$SCstdbvX41H^@V+?FRFlJxGdL2MULMY!l99w^-=B#qBT{rn95a zS@3w&ucjiCLQZm#A*GdkJJ-ro}*y#`9%1~SO3Z=2>fdGMRIdER4_lHg< zRxxBOM2C)*` zh@ThRAH&`%5T1t-2U5{HQ#1w?kBvE>0{5*}jHx9d8-zC+lvwPj{=wyo{-b+l^s@JU zU%AM8Oi|~2X%ycwc-%oAyBwEC{ej_OnFhtFQ->fs|!ve_cSX5zi%Z2623hn(2pRK5D(jIsP&o%w%Wv z?pG;z z6zXgVQy-B`(vZWK2j_ClatfGkr_JQ8`R`7ApT&3P-$LJ!IDdts#pg-p?BCpbxSnwC z=+@6Tyys9bl)gXRxWaxYi^DEBtC#M0nK1Qgp!JjH@=^c|g!P%s*Zcgztlx@V{n>Op zHFqo62b|hu*hWO27`K5=7_g5N0zJ6!gZ3)yaEry-4;&K6X^`^NN zaRnzEsHsRdmX}<#o>YTL7mYDUFeHDFLSJr9Psjeg%)B%&4urq>v(wOZHK%>`W%b2r zB-I$RY!_ffd&rV*AP`i$RjqX>{OKb%o{qk4u zv_p-`{f%aOdxZ7_NbsrL^B5dSHIooj6THHr*Ak17nqK!aUICOZT9JuQ?1PCg|M||u z7taEN7J%}9NO^rGITt?6R;-5W{9peeJjN4$@0V_6;k({N%?q?P$84Z{A%t(S4^IIGcE15KlPS8~FPAsOG{^F%|Hci}!YmsjDev z{Fp(m#ssc6OAMf{QDo`Txw#i^yl`&*Fu$L1-7IOhf5H2FOX`@fK7T2inR5jaZby=t z{@Hug?T@~{Bh{W0SLM`Fv%F^p_Yo`U4D+ET6i+YbDX`F+1l>1lItMHB=^dD-r38IM^DJ2uq_JG{WmmZV5bNl_AooPngAKxdX^OB8BwbQRr!JA(3tpehlpSj7eNIxlAH@ z>N|Vh9!>92yY7Q9eQV?^Mr!(LiuOQshcIGlZzQ>bn0LUPaQ+wx%+vhdfYWdK$Dd53 z8^nc=gW&3!F{7Nk6%_o&OuR<4JY(XeuYJdB<)41lxYw(3STE6aLQ6gk5T!GY&v{rm z6N+u+caw8&fH!puZHS`&}qWggEz3lVaXWSEd;)l&qMl(Av zU4@E7b$a{fMPhqK>E(g6b9vNvG-vdSJuwfCA_=EjL6|XaUigQ<{Oo`H4?lui*ci?i z7)K%zLR$ezc>)3D$vIT|IpuiQwuHtev}ed10(NA8?A*7WpVpG-$J0UT21 zsM=p!OYho?uKj%=;R$0k6vuihX7%eBnsv-nthQSnpzU1unO$n=7@g=45OmsT=!SpP zJRnT1&&|2rXQL=(6j;CF1VxhFL%dO0l`Ip1Fk&JWxmE=s%qz1wD+sGG0B-vRia0(6{aN)w1hcl|l2 zqnnGW?cf7<-BV}Okk!@j=~g?-TAgIiWnt)w=QEH$(Hv}ZzRxpQj||0mO!o3Ot+%O zh2hi=R8ez*SR4v1GsuT|{!9e@{s2QjyuTO0&Mxjq$xf~v65L}yDU9rMwgwDi%LBsc zw5HMNJOn=lF{?T7_y}}JoH&ZucW?p9k^4hFx(C3QhDpa+SRZ?S(66Rtv~JCY9y90o0<;#aS5PE@f2)@7uEug zcP5ywx!ihhtUnjBcM;Rouq!ByJLg#bb~nS&B|FXNvJkp28zP@q&jJ zCUd2`J^Rvh-`r|GmA>12B zGoKzEdP698yP1mO!#Q)N<+MHA?g8Dxtc=xu(m#Xq!EmT~(3y8S_O{1{>PpjE8?~cm zqtO5#Nv)d@+lvtnGqj3M9@dIZ5g^h_Ns9_Ucix%K4IOYO{!-z;S54#HCmrLa%ZX*- zneFB1n?xn3H@oZ{aRAOfZ{HPj>(5IqFOw01uf>^W!nX>#Ol6RBxhbGo<*Aso(>wO; z4sI8)VR!EUYqSf(WF$Q&>vxKM*_y;z&T$N^*e}CByB3W^J{i2{X$XY`>&~^mz4C7j zq_pUp@se}fYkY;dP& zWl+d$pMNrWe3pCR7cXqA+%Nx2tROl8Jd}~0OkDnS?1gO0`3<9(3zdajTFlN@S z`BCIl)^X6&JUT*G#{Eh!-p9*`z8P zy2s%zw_h4|b1_I95=`6;^weA3_}k{OBh(I=7hyF2SoE*k`Vk327?qd|>d(0Xg9%1X zW{h~Umpy!8SDA7-XtuHIaKwjm(`dU*K5`g0UVD8$5cEJO9ruNeSVSG|d7Uehu*ZA5 z3WOjv#wI5MH|x9i!@7#}U%9yK=EYA3JEcEsS@D2pef}n?=>F7R|Ka1m{Pdw={LcUU z+Yh^6R{rR105wY^!`+JNP{%XEpZodiBX1w&j2!CK+%XxDmtDQmS-Q5``|u0t)lfj8 zNN2PD14Tg`6?MV~#dNB=-B`Q@LM7?hbt~&Bf)Jemrk|rP3SIEa=#31jrz|;4liuen zo<{@NKzHLEXB>VC(XgXk6Uk-IJXTTkq5|b`2+<}}d&rI1_z;#AYf4)*2u(>2VQHsa zYX`3Q&L0&tF#)F~B!w@!Z)HL&Uwrx(JLAV0!e@>L;|V}rXQKxJDl$1`o$1GPbN(1E zLN?(Ttt4EXme?C*W&uuLXQr~)LVD?mC-3uyr6;9kIOwouvsU9t$Vlu1>hoBy4@c+E z$^9hMGDoqds$GwkiE6kj&w?-4fjd?m<={)P$)F!Kx+rh^@Yy^(K`ZUnd}_UrP}qve zxG-Fw>Vqys23wS(cOr`f*AdNVL;#N1UUn_K5F^=m=k8Z_6yP$>i@i2pHT{1);6@0#Ajoo3?8JP-t4~~qK zqfz03Sn5^KRNPPU#H+@WhR1(?=3WYt0D67|1lEz?V`ARVrTP z7E70a0S*NV6)vjEJv-)zPe`Gv@>vmJyn!997vxq3o4wEzkpy0+Uu9M*U;Ypst&PNc zeKg~7m%5R)$dh05e6PK_bmvmRb*E4m^~YQJ5def08nSOSsanQ5P$0mgK`BuO?T2?*Nom~<0F(8@R5flVs;__*%%Ecw1BJo(Wivj~@V zup3`H?;W#rUhO#-8jdc71ESvmfN=;`kVd!>uKfD)jnHIqyu5S%(<`0}U)X4tfL2ej zlhBtt!-eH5erBAA=O=DUUmvA@(>abyNR>2-d= z`@M?yXhTTy=b_@~^aA;D_bM{?kXBhw*BS)F z%N@RzXg8Xs^E{u9dNc_-Oq8qLnKfgy1YNX^k(r1Y_AoYqI`iqBlaUU{h)Yj9`##;n z$=T$UozV%Rl}!|v4(A)d)@%eo1PwOr&tTNKo$mulK&akY^dIf_hbbV6Vp2ZAp$ejB zh|oj-kSxt`fn#g}YXA(`NWiP(%1n(p_EeGp+nw{Q6hMA#(-Z)hzyNLer(Bgtz6(IA zEKZ($_27fpLWI!c5Z{6=+bXu z9wiERfPPUUWGw&4yL_WF=#kDW%yo?Q;5caK4%jji4DjcaI(g(VwMuJ!;f)<->qYV< z_GG_%;WFC#>0kZd)zM}!@Z)a1bqoNB$Zuo)#T-VKrAP3Kpr^4WT+>nw7vmZf!*j{r zv0|N?zBxyji6gC$b#m)swl-K!_mc;m@Hzpa)oo|IrWL+8edx_t?wR zT=+uf*E5N3wcMiwg48}}-+l7fZH1V)Y8=W|;96!$O;n`aOyZe&CuMkK(lbJuSD#yx zTHUo3sdRefi+}aImK1&NcmHz`s_~EhXb^mJ^QL}xN|dB>WA}JPjXV|q!1W}1G*uB)3^0*rR+Uc+i)>E@b1)n`1t}=cJ&N5A>tMvX+;-!npLXJW93G!^Q)|Te6TchlL`VA5 z!5B`e(0^~&{Rf{6BhZyf?4aemGP|O{W7a)kofpGjw;jUe8DP@x0t`e~l7OyvSiA6f zWxxGNty9(kvYY)|e>*Am;8R^Cgl)w+$I0}J(94I3Wji&L1YUE@tCsEzPewbn_2hz> z$cDR&fA~iyA5yn5)dA`_BFUJFtAxEYCY;lFGz-?1)#zzS@aU!Z$Gwp;|)LfzAGCxji_g$_R`L2O=(`kW8!YNx)i#yOP*|+ zNz7N)``sbIrcRc}fdRPE1BS4->Mjz4W_NaRey`>yS!;CA;KMr2}~u8#02 zx?-+%>_TN2xxnlnH8Cqylo8x$ zBPfMojxfVDyy~7YM|;Erf}A;29Up4VZ*^e4dXSqfkCq(<$!lOW?G>r;?+6}`!% zDacQo+D0Jhbe8sY2h(SGsb|{`c9sm~;kF-X_kxL=7JB_}uDaTB^1RK1!>HRioQKZ; zwBMgc;sh+&4i^(k`9g<>e5Ol%hLzZ)%65caFKBvP6Xt+Yv1|nL(Tn6nw<*H#^*sX^ zW-M0wnvRquwhoEQC2_d9JUKr`_J#)$H0q$;8rL#79g;$zS&Dc=U%9MB3z4}NZw%V^ z*`;N@Q$Q#$;7Ut>;o{yPz%?_{Puk*9=-RE|RvGqNk2U5JFs3w&2<5);9L#_LrivMU z$&XW${1#uwcVFvm0_iV4suTbIhdTgoCQY*nPZ3|%Kqa4SVkRPrHDUhQ^wFsVAS0N; z{N-Dm^n|Z>Ux8{kh4z48lax*gbHwJ6`}nUI>=@&D$UIp&^3{`V+@w8F-Qz1j+sDdbSlUJ38oJz3l28XdO!e){GQtRvzM^ zppDUl!FhO(OXBLRz320O-L{Aob?(G_oZ);N)=c!B54~=3w(7ipXV6PSuWWjY&XBU( z!cng6urfSeV!6mZTl94S4!JcA%_+2Y^D~!m~!&twM6@V zadVX_i|$S(XdAy|7YS`BCft_El2r3oEy? zsb6lghJO!`eGjvcv(Vf=J1pcUshWMJFt2|-9mkl3F!IAcJ#cGc-x>1&+hg+5rL}gs z3WkIUO9nm{HTSSl_dvil?ZyM}-Ka)8G)7L@0 zU)S87Ro9}*zkHElT=qc0+W?04$CLqcBlb4fZPeHk!tZCZS zFgJjVni(;TRsdX9CLX!8r0OiqQ428HAa zpNZk9`2wCkorru@2XmcA!|9v-={r;%n$BlMeIK0w0OuVjs>gm7dFc)iy4YzRh3gx! z$aeD?LHfash}&roKG1K>22ThB&uMg3(EEvYz@+ykCqOl1*dqDn&MqG zJn=aeaIrGc8JI*yqgt3www>pJR$O-)4%##9A9)6VGZE){6aE{D^&`_4G9rz|%Hnax zN)1n?2@s{9+2zLm-?;pZFMsmi+s-Ve=tHZ_!XCmu-{{uU8YluaX>sveuUuk#uA$g@ zx_#0DG_D5%h~!RUDa&^QqK{TDL|D*mrfjyBFvgWaz!N&C59i_Vy+I@y8hTsiz+tnLczfVUQGH+1 z=pHk-lG2FQ>nAJ0M%Bvs?U3S&tdILY=?ro}6WR2SiHP40#t*uAqnlMmd3aPmvM~p1 zkz@<*Uh=tg&;Cj2>+Ce|U)8!t@Qm$fNg^~mKP4ns)t>Mp(l?V0GcXmI!g^BcL%vx? zJTbX`BolLRDcd+GE0p*0qC*}|IXkG7Yn)Wou(C9BDNLug|K-2?z4Dj(fsc%WIu9sL z3F>(*ppF2u6aaD9sXqwtY)m_aV54R_yrg?7;gPspYgwvO1S~Hx^^)wqJp@S5w&gB> z2NmUb(w`8?85;Q502I07_~AS6J>0$Z`p=9$wvS#CxV^69=RSxk&p-OJqm%dFLr48h z7Wv3M{}L87Av<`}++%Jsl@N4UqI(H}c^t{T2Yryk}?|tZCUY*mXXH0jllXN7^QK!iCVMqcG z{AAWb#`8?!@mxCg5$mT^@j3V8&NhjxO@cwzK4=ZBsF$8Pqgo2OhU(?AoCfg;6phTR z%f9nVuf6u0FTV;rc9t*qxV|P~%Cs>OB`?^2L_(X=f-5LB(~ZDA>hnI6iLE4op_5ik zEn4J{U5sz{=-3G`ZAt1q{v*zrt_hDrv{#^WYoOlQA8Y|1x2r|8JtL&RW>axQ4Q;8 zNzlXA5>7gYb0}Sr%_%)~phrP=HYp&=)l0zbB9o06i(RHMqsbS5(4!&pQm|cFfYQ!L zuYCKwH-_^a1UUT6u^r^PR~}@cKR7^1#(DJk9zV-1%H+4G2I^uU0yn z(q(3DrrtTq$aKYnJI~S*kdH+SU4TZN8#(ak{+eNP9uIUHW|qX-qoO-{wGDUBT4V7t zRXXs3{-R*dtjq+e$9*G#3e5&1^GQ1hbMOG@NfZO3ZftC0uPNfW&Xfl9t23uV-BgAf zygY)<=?qFA>T?kAlmbi+$ax%f`lh6OmT3*BK;$3C1a)#f3 zsWU%Fa{fyTSDn6xT+fFt_{UIV3ObkT#V;IC2<}STzWPNj-Ac_&7FzhBFzxDoKa>ZO zl-Zc8q^sBJkH^Cdho`lLTD3~eF|e4NXY2XnY5XQ_SVgt{0@*L$wT|5rDNh(_c{~>8 zN6XS`SymIM!~oqsoftyee0#o`)WKZyYIVI{DXM8INNfZ9zIxWz3AJxNWJO?U8ag(R zl+!^fp`=JM+1Yh#arvbVLZ2X}j9Y+rI=X$3^-BLbBj;Qfl4df7VwYw{ zbNRwqXf4_-G#%N5a#~|FNB%k-C{F#c%TsB`>7z!TEV}3N630iIsSJda?ug%Zr+0QYZ7{fwBmNMxO08j%OGeilC?Gq?pDkj4C`0P5has-bc zuif5*Y2XUVJW**&?u)@qB0VPhV@q@}-@2X{_e+-L;KWe$d*6LSj6pA{sF(^0MRXyL z3&6dTi<@5PV6bCB-Y|SxTll*2)qbeq429EG)`ho!Ha-Q*Mp+(L^D5+F1A z;jX@!oZ*ZCxjhB19UD)=6sbE}?ZP~vjhrB6oOq5e2^YsMy}RXFNRIsKro4XG- zRNgxVWE2>IY*I6-xCE#`dNDT6k2Np_tX4tOu0U#Jg7XB>*MuMsbw8xime3P^_|GR# zeRFTD^$xo~y*U5bPu^QN{kR;cF1}4wX(7r54M*qv!Yjdk5p$o?$CrBEW>${sbC0y4 z*SA9yK!y$3!`{#i&v_eT+tc(Co(6;t;wF_>mV-PeM~+LxrBr`NHWyuAQ=*>o?&Gs4 z=se>>Q$k(!PdS)DsxWXp(5g^b}vGtrK=JoF^$=t3rLz0L=N`%j|{ol8zZd_ z*L4Ij)jPi^>*E5opZZCd2(h+gaN6ONby%WN?R+9=nc|=~X{csu$UT`G3BrgOUI#A( zF24M=%__&W?-qHM)82C-M&6i&w3*JilS{K{y^B=B#{2=N0AVKH?08+_-GJ>nQ#3A_ z$-2bF;!b|%jhQz$|HX~}B~gYXCt#NV?So(bN1wd?FYHm=OuF-! z@jCeXDf_uD)DPE#lK;l);9l8Bpp8Y`J7PU&@W#dYPeX&}C1uXqJ33a+I?U3gXvodz zvXn|VpdB9^0|IWbGl5j)N&A$kHXcdsMVs!aN1`2 zQ^7sa_XRNkO&45)a`lqKUxP(xVoaxNftavW79GnrHNsaBde17*L)nz0k_&Vo6KxvB zz;n7gBYB&eP6^p**bv;62t3o?t|Sm%3}P6|C^?5zG_a|rOMZuAf1%>Om&{&cu|nFx z-n%W3X|x(>0u9hNM@z?oH$>nA1vHA_9DdrH z^DTWPQEx`3+oL7SQ=9^w4gd#>a?4A?bG;-P@QPL!=G89Z1vBRtwu6<08w*nV-b!F~ zTp8jHLYt$aVmOr4B^yyA(Z#R2T_?5?w(kK>#(-T5=A=6w8lu{AMv2G(l3a?zjd-9* znfY~R>9cZ8P>0qjzb`V5pZ6I>B}>!gQ**X6&_W@1ptS|5OS@rNAFE)ufmIs1&uj38>TS`45&TBMK!wN5IWp)HWH?pMrfJ zW_?Z$Y5QVi?csulestoQxk#1v%UUcJXml+~^MHE<54xTl$D^uej+!v<8GaoEH5VHcQ>WkG!#vKQ4JjJd0Y zF%rkp?pU#Igx#%%5ne8;TZ>evSH`_qzwgf_HhegQYD)Mle~bbMMSc(Kb$@ zsMk2V;~OPg>2c78N8M@AB#WN@Mkjh63#sECV^s%Lmud{9kn@7Ba{$|mgAQ)$@54Cd z7O{d5dV^?gI~K-{rvYJ%27T2dNe#(K^crGx$N+f&_kIyh98UWf-0hD8Gc~U7f)r8- zd!x;fIg1I8`^rXUJOL<;vl!RlDIp{`IQ2}06;oFv}`Dlnk zSc%t>u@M1~%!V*mM8tpZ^Q`FaNR21j^x*Z8?HC%!~YjPumvUWJI+G>ZHqZGW*tP7L`ZR!qioo$~CcVE6t|nId&f>xd zVP?mtdo}>N8iT@lKon-5p*caP zX4u%Ni%G%yp;CP%4t&_w7u?#$p>Z5^MtIaBAS`^_JW7wxzi3ULX?rMWzX;Fc1w(gE z%tckd7yDxP!NR=DYWCB;#gEcs;OODoA8r4&7gB%pHUVT5*P)V(+TpJLn7ZomepC&o zLk-ttEaNX(9%JeyXrZE3GQ6{tzEy0;m%j7T>dPGkecSw3y;g0wH!>#GByw>lErnCU zVtM(l4>zk{X57nj>|*)Ly^VNO`nxZL-LBrv5nC3Y(N@L{rS(d**EJcJ13@1rVtKkxY$8u@YF`*#ms8O1X?H@nB z-yKI7D->~_F30_!oCWg1lUqZu;FxHr%jzRUPT!$O;LGua0CLIw6*k631>Hwt(v zqDhf$p{s?i#<~S9Oh{alK}1szq(r9%7*vQjWs}(O^MRi5Mwm9L}_5D?l&#ila|3|lL(`xsKMZ~7*Ev7?|K&4tdlsYBPp>wLVMVLOl_Rz$UJ zrPt>iyDhi7d~e0T?<9oWG^KfLlc1mG+ode;tLSvgj3#E0hqv3` zxQb207Ei6?Nd01;c~tf$QpZ1ixR)ioyY+g?ZB6Gwy)W%xh&aIE#mLx^TksXf5$Tc& zkJ{9bJwD&5AP(1|R!*GIMHlP|G5tzrjnb}1u8ree8!a^FzaDBQ!qQvE$R&vX!V%Bb zl?okpiU-6@4^dp!2p5s46a1t*U4qR0ptOo6Am@Jh6F8IYG`UN(p>)&}d+BF=4?iBQ z&JB;2sQ6#G4_@di4^E-}j0}_DbQtyMqX91k*^r4N9Og=aNdnONCkUARge4DL!WNqPM(3zwu)#oN z+e9WrKeQBi-MaixDYtjtw|GtPyRoSjmJE(!7!v9p!Sg9$3q_q`4{`@b-4_vaI~oz} zY{zvR)TR)O302LS0PBh1F0Pp8_OmH@gjUCsZ#eb0$5L2VU1oT!+X(IJ%IvL#eIgGz ze!Kw41u~sToq9*~u(5J5IN4;Gb~pi^?txK;m>6dO`*lG-Pf22Bj#@k!9}C$G+h&|X z5l0CJsqL6tZ1tsIWS#Hqp7u>n=7BY_SBnQ!2_&&@S&pnafm6d14hpjmfqlS5Egy9x z3-`?XT6NvTT|A7^U~goW6Kj`@Dq}LGc0&qvV;DA8F)#^VDDo%1v>y_{0d*#cu?4|2 z{CMBVIVWR9G$;iVAyTu)vt<7OoO3aii8ed89YzFYW~#daG&wFfG+3G6mSWl1jda|x(+~67X)`{9TdlWuH+`KBhTHs ze!L&9Mt{`$k(_XueW=rzhCCCjKhPYBR?2&sZ_$GH6U_qV==e#&LwLt=uYLMR3f}aN zpR}%Dc6PS>#GJVUcC)8JS?QqR`Tktl{iug3VaH5oq9<*xg2z)_S0q8F8Ks%i#g{KV zHmjkTMkFKm;0|8evpKLWn1zk6IL{=#%43>c(Gay$ceot**Bw56O|o$9Kz| zoh30QtMRZf5N|}L4-5bm{V5tBAAVf-Ca@;qn(;(v4(PKwDOx$HAep8d;#(6k;&u-6 zJ>D6^;3cnF>6tMW`zmC>fz;~gOy>Q<@puAgXu=xfh3~3vdH&Xz+IHthhbFMhq5xw9MQ_*eqIwk(do8qIs#Zx;l$zrHywm@= zxHp9`;&HV#I4!uZ#`9bG1T+X|R(|v1uQ}%R$y!uDufPB3Q%BpuOee^Oqj?9+R}P9> z`58ak@_W6e>WKtrk9PY%`Dbj8z@x>~$Xqt^!^aN{@r z&M!Ul!Z!sK7(C$r)6W0;#sBf{$WJeN=a>tT;t$Fl7g`VWB3$itDWj7J9$5VP|8%vt z5)(hW_veL2jiyGkEy}w#IN!}?GQr+#JPV~SqYu7Z$gI@He1btH#6^B&>dj+&j-0WS zpPskiCYrMoBj#lHsOGPBPk^d&HQ3y0Vv!_VE2^__7z`O_445!lV(9cjk-AFsPtLs3It1(8x#=(# zSpz%WXp0;eig7j3X;;@QfA3^$CF3o$;f8(Ds{3wkH?7TIci3OargHdbn2l=(mVPZs z)H;678GEMP1lPjbM#Ec$PgwTlVSOw`Mt)lGw+1y=iXPG@l>;V%Buu+QJ>#he=R^N$ zqI!MA-6a-V;R@VdWRp{10&NgNUT9BGq0kCbVHM3_o#kBDe7@(lR7XJ-2@K7i(^ zwFy%7!xR)$-8CFL>$MP43GVj0t~GjbC_=~LQ<$MMNNTK|u#&~V{xZ~pNa0`{ycn0H zX_#tbU|W}H@eu*dhPBAy5YCaEt~u?yZKQf&G-5^66GWXTc&@;)+0l`6!Ci;_Qw_vM zZm(Y@dgp2)fjY?02&*VX+>?S;wgL}Opf}l=iGTH(75+vPl`D;A573fs2O_%#|46Kx zx*B4^)42_N=YtLYMj0GpLJeG?1-Tq?ZdRtUYz0S=!h+DYcazoUaZ77c%x1KBGA3xM zWi{5l#PMM~5}S$VNtDJ~Lp-x-waTL&GlcJ=5=?P4b}v*Lc= z@j?18(tAjG4!h*?7xQk&({#6ZxiT(XQo%n8;ldFcjp(}6-=H^NMIseHqZ*Nq%Ym>lt8161mn7Eg=lH$ z`6-0q17 zz`LTHbvZ}QER2l0)4@zQTHbDSE;@r<9B?1|)d1!gHF+mW;PpXp2CFiTkS(2a0iXmW z-7N^?Tgt_01V)JNwNkGYiIwzi_HD++n_u^uDPJ#s2eU>vr% z_Am-;h5&W2S?LM70y&mT!(-HQO+hkl{y2|AxyFnC%Rh7VlS`xDzi_0zJp?{le)ICh-rxH4{r~h{ z|MN}OJpS6{-&uHe!*>bsZD_7MC}xfepIF1pH-9Fu;+c7u7^SxMPCD3-(IMLcs5#